diff options
Diffstat (limited to 'tools/lib/perf')
25 files changed, 928 insertions, 247 deletions
diff --git a/tools/lib/perf/.gitignore b/tools/lib/perf/.gitignore new file mode 100644 index 000000000000..0f5b4af63f62 --- /dev/null +++ b/tools/lib/perf/.gitignore @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0-only +libperf.pc +libperf.so.* +tests-shared +tests-static diff --git a/tools/lib/perf/Documentation/Makefile b/tools/lib/perf/Documentation/Makefile index 972754082a85..573ca5b27556 100644 --- a/tools/lib/perf/Documentation/Makefile +++ b/tools/lib/perf/Documentation/Makefile @@ -121,7 +121,7 @@ install-man: all $(INSTALL) -d -m 755 $(DESTDIR)$(man7dir); \ $(INSTALL) -m 644 $(MAN_7) $(DESTDIR)$(man7dir); -install-html: +install-html: $(MAN_HTML) $(call QUIET_INSTALL, html) \ $(INSTALL) -d -m 755 $(DESTDIR)$(htmldir); \ $(INSTALL) -m 644 $(MAN_HTML) $(DESTDIR)$(htmldir); \ diff --git a/tools/lib/perf/Documentation/examples/sampling.c b/tools/lib/perf/Documentation/examples/sampling.c index 8e1a926a9cfe..bc142f0664b5 100644 --- a/tools/lib/perf/Documentation/examples/sampling.c +++ b/tools/lib/perf/Documentation/examples/sampling.c @@ -39,7 +39,7 @@ int main(int argc, char **argv) libperf_init(libperf_print); - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) { fprintf(stderr, "failed to create cpus\n"); return -1; diff --git a/tools/lib/perf/Documentation/libperf-sampling.txt b/tools/lib/perf/Documentation/libperf-sampling.txt index d6ca24f6ef78..2378980fab8a 100644 --- a/tools/lib/perf/Documentation/libperf-sampling.txt +++ b/tools/lib/perf/Documentation/libperf-sampling.txt @@ -97,7 +97,7 @@ In this case we will monitor all the available CPUs: [source,c] -- - 42 cpus = perf_cpu_map__new(NULL); + 42 cpus = perf_cpu_map__new_online_cpus(); 43 if (!cpus) { 44 fprintf(stderr, "failed to create cpus\n"); 45 return -1; diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt index a8f1a237931b..4072bc9b7670 100644 --- a/tools/lib/perf/Documentation/libperf.txt +++ b/tools/lib/perf/Documentation/libperf.txt @@ -37,16 +37,15 @@ SYNOPSIS struct perf_cpu_map; - struct perf_cpu_map *perf_cpu_map__dummy_new(void); + struct perf_cpu_map *perf_cpu_map__new_any_cpu(void); struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list); - struct perf_cpu_map *perf_cpu_map__read(FILE *file); struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map); struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, struct perf_cpu_map *other); void perf_cpu_map__put(struct perf_cpu_map *map); int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); int perf_cpu_map__nr(const struct perf_cpu_map *cpus); - bool perf_cpu_map__empty(const struct perf_cpu_map *map); + bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map); int perf_cpu_map__max(struct perf_cpu_map *map); bool perf_cpu_map__has(const struct perf_cpu_map *map, int cpu); @@ -211,6 +210,7 @@ SYNOPSIS struct perf_record_time_conv; struct perf_record_header_feature; struct perf_record_compressed; + struct perf_record_compressed2; -- DESCRIPTION diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile index d8cad124e4c5..7fbb50b74c00 100644 --- a/tools/lib/perf/Makefile +++ b/tools/lib/perf/Makefile @@ -39,29 +39,10 @@ libdir = $(prefix)/$(libdir_relative) libdir_SQ = $(subst ','\'',$(libdir)) libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) -ifeq ("$(origin V)", "command line") - VERBOSE = $(V) -endif -ifndef VERBOSE - VERBOSE = 0 -endif - -ifeq ($(VERBOSE),1) - Q = -else - Q = @ -endif - TEST_ARGS := $(if $(V),-v) -# Set compile option CFLAGS -ifdef EXTRA_CFLAGS - CFLAGS := $(EXTRA_CFLAGS) -else - CFLAGS := -g -Wall -endif - INCLUDES = \ +-I$(OUTPUT)arch/$(SRCARCH)/include/generated/uapi \ -I$(srctree)/tools/lib/perf/include \ -I$(srctree)/tools/lib/ \ -I$(srctree)/tools/include \ @@ -70,11 +51,12 @@ INCLUDES = \ -I$(srctree)/tools/include/uapi # Append required CFLAGS -override CFLAGS += $(EXTRA_WARNINGS) -override CFLAGS += -Werror -Wall +override CFLAGS += -g -Werror -Wall override CFLAGS += -fPIC override CFLAGS += $(INCLUDES) override CFLAGS += -fvisibility=hidden +override CFLAGS += $(EXTRA_WARNINGS) +override CFLAGS += $(EXTRA_CFLAGS) all: @@ -118,7 +100,16 @@ $(LIBAPI)-clean: $(call QUIET_CLEAN, libapi) $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null -$(LIBPERF_IN): FORCE +uapi-asm := $(OUTPUT)arch/$(SRCARCH)/include/generated/uapi/asm +ifeq ($(SRCARCH),arm64) + syscall-y := $(uapi-asm)/unistd_64.h +endif +uapi-asm-generic: + $(if $(syscall-y),\ + $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.asm-headers obj=$(uapi-asm) \ + generic=include/uapi/asm-generic $(syscall-y),) + +$(LIBPERF_IN): uapi-asm-generic FORCE $(Q)$(MAKE) $(build)=libperf $(LIBPERF_A): $(LIBPERF_IN) @@ -139,7 +130,7 @@ all: fixdep clean: $(LIBAPI)-clean $(call QUIET_CLEAN, libperf) $(RM) $(LIBPERF_A) \ *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBPERF_VERSION) .*.d .*.cmd tests/*.o LIBPERF-CFLAGS $(LIBPERF_PC) \ - $(TESTS_STATIC) $(TESTS_SHARED) + $(TESTS_STATIC) $(TESTS_SHARED) $(syscall-y) TESTS_IN = tests-in.o @@ -188,7 +179,7 @@ install_lib: libs cp -fpR $(LIBPERF_ALL) $(DESTDIR)$(libdir_SQ) HDRS := bpf_perf.h core.h cpumap.h threadmap.h evlist.h evsel.h event.h mmap.h -INTERNAL_HDRS := cpumap.h evlist.h evsel.h lib.h mmap.h threadmap.h xyarray.h +INTERNAL_HDRS := cpumap.h evlist.h evsel.h lib.h mmap.h rc_check.h threadmap.h xyarray.h INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/perf INSTALL_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(HDRS)) diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c index 6cd0be7c1bb4..4160e7d2e120 100644 --- a/tools/lib/perf/cpumap.c +++ b/tools/lib/perf/cpumap.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only +#include <errno.h> #include <perf/cpumap.h> #include <stdlib.h> #include <linux/refcount.h> @@ -9,25 +10,38 @@ #include <unistd.h> #include <ctype.h> #include <limits.h> +#include "internal.h" +#include <api/fs/fs.h> -static struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus) +#define MAX_NR_CPUS 4096 + +void perf_cpu_map__set_nr(struct perf_cpu_map *map, int nr_cpus) { - struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(struct perf_cpu) * nr_cpus); + RC_CHK_ACCESS(map)->nr = nr_cpus; +} - if (cpus != NULL) { +struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus) +{ + RC_STRUCT(perf_cpu_map) *cpus; + struct perf_cpu_map *result; + + if (nr_cpus == 0) + return NULL; + + cpus = malloc(sizeof(*cpus) + sizeof(struct perf_cpu) * nr_cpus); + if (ADD_RC_CHK(result, cpus)) { cpus->nr = nr_cpus; refcount_set(&cpus->refcnt, 1); - } - return cpus; + return result; } -struct perf_cpu_map *perf_cpu_map__dummy_new(void) +struct perf_cpu_map *perf_cpu_map__new_any_cpu(void) { struct perf_cpu_map *cpus = perf_cpu_map__alloc(1); if (cpus) - cpus->map[0].cpu = -1; + RC_CHK_ACCESS(cpus)->map[0].cpu = -1; return cpus; } @@ -35,48 +49,79 @@ struct perf_cpu_map *perf_cpu_map__dummy_new(void) static void cpu_map__delete(struct perf_cpu_map *map) { if (map) { - WARN_ONCE(refcount_read(&map->refcnt) != 0, + WARN_ONCE(refcount_read(perf_cpu_map__refcnt(map)) != 0, "cpu_map refcnt unbalanced\n"); - free(map); + RC_CHK_FREE(map); } } struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map) { - if (map) - refcount_inc(&map->refcnt); - return map; + struct perf_cpu_map *result; + + if (RC_CHK_GET(result, map)) + refcount_inc(perf_cpu_map__refcnt(map)); + + return result; } void perf_cpu_map__put(struct perf_cpu_map *map) { - if (map && refcount_dec_and_test(&map->refcnt)) - cpu_map__delete(map); + if (map) { + if (refcount_dec_and_test(perf_cpu_map__refcnt(map))) + cpu_map__delete(map); + else + RC_CHK_PUT(map); + } } -static struct perf_cpu_map *cpu_map__default_new(void) +static struct perf_cpu_map *cpu_map__new_sysconf(void) { struct perf_cpu_map *cpus; - int nr_cpus; + int nr_cpus, nr_cpus_conf; nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); if (nr_cpus < 0) return NULL; + nr_cpus_conf = sysconf(_SC_NPROCESSORS_CONF); + if (nr_cpus != nr_cpus_conf) { + pr_warning("Number of online CPUs (%d) differs from the number configured (%d) the CPU map will only cover the first %d CPUs.", + nr_cpus, nr_cpus_conf, nr_cpus); + } + cpus = perf_cpu_map__alloc(nr_cpus); if (cpus != NULL) { int i; for (i = 0; i < nr_cpus; ++i) - cpus->map[i].cpu = i; + RC_CHK_ACCESS(cpus)->map[i].cpu = i; } return cpus; } -struct perf_cpu_map *perf_cpu_map__default_new(void) +static struct perf_cpu_map *cpu_map__new_sysfs_online(void) +{ + struct perf_cpu_map *cpus = NULL; + char *buf = NULL; + size_t buf_len; + + if (sysfs__read_str("devices/system/cpu/online", &buf, &buf_len) >= 0) { + cpus = perf_cpu_map__new(buf); + free(buf); + } + return cpus; +} + +struct perf_cpu_map *perf_cpu_map__new_online_cpus(void) { - return cpu_map__default_new(); + struct perf_cpu_map *cpus = cpu_map__new_sysfs_online(); + + if (cpus) + return cpus; + + return cpu_map__new_sysconf(); } @@ -87,6 +132,11 @@ static int cmp_cpu(const void *a, const void *b) return cpu_a->cpu - cpu_b->cpu; } +static struct perf_cpu __perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx) +{ + return RC_CHK_ACCESS(cpus)->map[idx]; +} + static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu *tmp_cpus) { size_t payload_size = nr_cpus * sizeof(struct perf_cpu); @@ -94,89 +144,21 @@ static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu int i, j; if (cpus != NULL) { - memcpy(cpus->map, tmp_cpus, payload_size); - qsort(cpus->map, nr_cpus, sizeof(struct perf_cpu), cmp_cpu); + memcpy(RC_CHK_ACCESS(cpus)->map, tmp_cpus, payload_size); + qsort(RC_CHK_ACCESS(cpus)->map, nr_cpus, sizeof(struct perf_cpu), cmp_cpu); /* Remove dups */ j = 0; for (i = 0; i < nr_cpus; i++) { - if (i == 0 || cpus->map[i].cpu != cpus->map[i - 1].cpu) - cpus->map[j++].cpu = cpus->map[i].cpu; - } - cpus->nr = j; - assert(j <= nr_cpus); - } - return cpus; -} - -struct perf_cpu_map *perf_cpu_map__read(FILE *file) -{ - struct perf_cpu_map *cpus = NULL; - int nr_cpus = 0; - struct perf_cpu *tmp_cpus = NULL, *tmp; - int max_entries = 0; - int n, cpu, prev; - char sep; - - sep = 0; - prev = -1; - for (;;) { - n = fscanf(file, "%u%c", &cpu, &sep); - if (n <= 0) - break; - if (prev >= 0) { - int new_max = nr_cpus + cpu - prev - 1; - - WARN_ONCE(new_max >= MAX_NR_CPUS, "Perf can support %d CPUs. " - "Consider raising MAX_NR_CPUS\n", MAX_NR_CPUS); - - if (new_max >= max_entries) { - max_entries = new_max + MAX_NR_CPUS / 2; - tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu)); - if (tmp == NULL) - goto out_free_tmp; - tmp_cpus = tmp; + if (i == 0 || + __perf_cpu_map__cpu(cpus, i).cpu != + __perf_cpu_map__cpu(cpus, i - 1).cpu) { + RC_CHK_ACCESS(cpus)->map[j++].cpu = + __perf_cpu_map__cpu(cpus, i).cpu; } - - while (++prev < cpu) - tmp_cpus[nr_cpus++].cpu = prev; - } - if (nr_cpus == max_entries) { - max_entries += MAX_NR_CPUS; - tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu)); - if (tmp == NULL) - goto out_free_tmp; - tmp_cpus = tmp; } - - tmp_cpus[nr_cpus++].cpu = cpu; - if (n == 2 && sep == '-') - prev = cpu; - else - prev = -1; - if (n == 1 || sep == '\n') - break; + perf_cpu_map__set_nr(cpus, j); + assert(j <= nr_cpus); } - - if (nr_cpus > 0) - cpus = cpu_map__trim_new(nr_cpus, tmp_cpus); - else - cpus = cpu_map__default_new(); -out_free_tmp: - free(tmp_cpus); - return cpus; -} - -static struct perf_cpu_map *cpu_map__read_all_cpu_map(void) -{ - struct perf_cpu_map *cpus = NULL; - FILE *onlnf; - - onlnf = fopen("/sys/devices/system/cpu/online", "r"); - if (!onlnf) - return cpu_map__default_new(); - - cpus = perf_cpu_map__read(onlnf); - fclose(onlnf); return cpus; } @@ -190,7 +172,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) int max_entries = 0; if (!cpu_list) - return cpu_map__read_all_cpu_map(); + return perf_cpu_map__new_online_cpus(); /* * must handle the case of empty cpumap to cover @@ -203,8 +185,8 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) while (isdigit(*cpu_list)) { p = NULL; start_cpu = strtoul(cpu_list, &p, 0); - if (start_cpu >= INT_MAX - || (*p != '\0' && *p != ',' && *p != '-')) + if (start_cpu >= INT16_MAX + || (*p != '\0' && *p != ',' && *p != '-' && *p != '\n')) goto invalid; if (*p == '-') { @@ -212,7 +194,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) p = NULL; end_cpu = strtoul(cpu_list, &p, 0); - if (end_cpu >= INT_MAX || (*p != '\0' && *p != ',')) + if (end_cpu >= INT16_MAX || (*p != '\0' && *p != ',' && *p != '\n')) goto invalid; if (end_cpu < start_cpu) @@ -227,17 +209,17 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) for (; start_cpu <= end_cpu; start_cpu++) { /* check for duplicates */ for (i = 0; i < nr_cpus; i++) - if (tmp_cpus[i].cpu == (int)start_cpu) + if (tmp_cpus[i].cpu == (int16_t)start_cpu) goto invalid; if (nr_cpus == max_entries) { - max_entries += MAX_NR_CPUS; + max_entries += max(end_cpu - start_cpu + 1, 16UL); tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu)); if (tmp == NULL) goto invalid; tmp_cpus = tmp; } - tmp_cpus[nr_cpus++].cpu = (int)start_cpu; + tmp_cpus[nr_cpus++].cpu = (int16_t)start_cpu; } if (*p) ++p; @@ -245,38 +227,69 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) cpu_list = p; } - if (nr_cpus > 0) + if (nr_cpus > 0) { cpus = cpu_map__trim_new(nr_cpus, tmp_cpus); - else if (*cpu_list != '\0') - cpus = cpu_map__default_new(); - else - cpus = perf_cpu_map__dummy_new(); + } else if (*cpu_list != '\0') { + pr_warning("Unexpected characters at end of cpu list ('%s'), using online CPUs.", + cpu_list); + cpus = perf_cpu_map__new_online_cpus(); + } else { + cpus = perf_cpu_map__new_any_cpu(); + } invalid: free(tmp_cpus); out: return cpus; } +struct perf_cpu_map *perf_cpu_map__new_int(int cpu) +{ + struct perf_cpu_map *cpus = perf_cpu_map__alloc(1); + + if (cpus) + RC_CHK_ACCESS(cpus)->map[0].cpu = cpu; + + return cpus; +} + +static int __perf_cpu_map__nr(const struct perf_cpu_map *cpus) +{ + return RC_CHK_ACCESS(cpus)->nr; +} + struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx) { struct perf_cpu result = { .cpu = -1 }; - if (cpus && idx < cpus->nr) - return cpus->map[idx]; + if (cpus && idx < __perf_cpu_map__nr(cpus)) + return __perf_cpu_map__cpu(cpus, idx); return result; } int perf_cpu_map__nr(const struct perf_cpu_map *cpus) { - return cpus ? cpus->nr : 1; + return cpus ? __perf_cpu_map__nr(cpus) : 1; } -bool perf_cpu_map__empty(const struct perf_cpu_map *map) +bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map) { - return map ? map->map[0].cpu == -1 : true; + return map ? __perf_cpu_map__cpu(map, 0).cpu == -1 : true; +} + +bool perf_cpu_map__is_any_cpu_or_is_empty(const struct perf_cpu_map *map) +{ + if (!map) + return true; + + return __perf_cpu_map__nr(map) == 1 && __perf_cpu_map__cpu(map, 0).cpu == -1; +} + +bool perf_cpu_map__is_empty(const struct perf_cpu_map *map) +{ + return map == NULL; } int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu) @@ -287,10 +300,10 @@ int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu) return -1; low = 0; - high = cpus->nr; + high = __perf_cpu_map__nr(cpus); while (low < high) { int idx = (low + high) / 2; - struct perf_cpu cpu_at_idx = cpus->map[idx]; + struct perf_cpu cpu_at_idx = __perf_cpu_map__cpu(cpus, idx); if (cpu_at_idx.cpu == cpu.cpu) return idx; @@ -309,14 +322,58 @@ bool perf_cpu_map__has(const struct perf_cpu_map *cpus, struct perf_cpu cpu) return perf_cpu_map__idx(cpus, cpu) != -1; } +bool perf_cpu_map__equal(const struct perf_cpu_map *lhs, const struct perf_cpu_map *rhs) +{ + int nr; + + if (lhs == rhs) + return true; + + if (!lhs || !rhs) + return false; + + nr = __perf_cpu_map__nr(lhs); + if (nr != __perf_cpu_map__nr(rhs)) + return false; + + for (int idx = 0; idx < nr; idx++) { + if (__perf_cpu_map__cpu(lhs, idx).cpu != __perf_cpu_map__cpu(rhs, idx).cpu) + return false; + } + return true; +} + +bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map) +{ + return map && __perf_cpu_map__cpu(map, 0).cpu == -1; +} + +struct perf_cpu perf_cpu_map__min(const struct perf_cpu_map *map) +{ + struct perf_cpu cpu, result = { + .cpu = -1 + }; + int idx; + + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, map) { + result = cpu; + break; + } + return result; +} + struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map) { struct perf_cpu result = { .cpu = -1 }; - // cpu_map__trim_new() qsort()s it, cpu_map__default_new() sorts it as well. - return map->nr > 0 ? map->map[map->nr - 1] : result; + if (!map) + return result; + + // The CPUs are always sorted and nr is always > 0 as 0 length map is + // encoded as NULL. + return __perf_cpu_map__cpu(map, __perf_cpu_map__nr(map) - 1); } /** Is 'b' a subset of 'a'. */ @@ -324,15 +381,15 @@ bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu { if (a == b || !b) return true; - if (!a || b->nr > a->nr) + if (!a || __perf_cpu_map__nr(b) > __perf_cpu_map__nr(a)) return false; - for (int i = 0, j = 0; i < a->nr; i++) { - if (a->map[i].cpu > b->map[j].cpu) + for (int i = 0, j = 0; i < __perf_cpu_map__nr(a); i++) { + if (__perf_cpu_map__cpu(a, i).cpu > __perf_cpu_map__cpu(b, j).cpu) return false; - if (a->map[i].cpu == b->map[j].cpu) { + if (__perf_cpu_map__cpu(a, i).cpu == __perf_cpu_map__cpu(b, j).cpu) { j++; - if (j == b->nr) + if (j == __perf_cpu_map__nr(b)) return true; } } @@ -340,53 +397,101 @@ bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu } /* - * Merge two cpumaps + * Merge two cpumaps. * - * orig either gets freed and replaced with a new map, or reused - * with no reference count change (similar to "realloc") - * other has its reference count increased. + * If 'other' is subset of '*orig', '*orig' keeps itself with no reference count + * change (similar to "realloc"). + * + * If '*orig' is subset of 'other', '*orig' reuses 'other' with its reference + * count increased. + * + * Otherwise, '*orig' gets freed and replaced with a new map. */ - -struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, - struct perf_cpu_map *other) +int perf_cpu_map__merge(struct perf_cpu_map **orig, struct perf_cpu_map *other) { struct perf_cpu *tmp_cpus; int tmp_len; int i, j, k; struct perf_cpu_map *merged; - if (perf_cpu_map__is_subset(orig, other)) - return orig; - if (perf_cpu_map__is_subset(other, orig)) { - perf_cpu_map__put(orig); - return perf_cpu_map__get(other); + if (perf_cpu_map__is_subset(*orig, other)) + return 0; + if (perf_cpu_map__is_subset(other, *orig)) { + perf_cpu_map__put(*orig); + *orig = perf_cpu_map__get(other); + return 0; } - tmp_len = orig->nr + other->nr; + tmp_len = __perf_cpu_map__nr(*orig) + __perf_cpu_map__nr(other); tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu)); if (!tmp_cpus) - return NULL; + return -ENOMEM; /* Standard merge algorithm from wikipedia */ i = j = k = 0; - while (i < orig->nr && j < other->nr) { - if (orig->map[i].cpu <= other->map[j].cpu) { - if (orig->map[i].cpu == other->map[j].cpu) + while (i < __perf_cpu_map__nr(*orig) && j < __perf_cpu_map__nr(other)) { + if (__perf_cpu_map__cpu(*orig, i).cpu <= __perf_cpu_map__cpu(other, j).cpu) { + if (__perf_cpu_map__cpu(*orig, i).cpu == __perf_cpu_map__cpu(other, j).cpu) j++; - tmp_cpus[k++] = orig->map[i++]; + tmp_cpus[k++] = __perf_cpu_map__cpu(*orig, i++); } else - tmp_cpus[k++] = other->map[j++]; + tmp_cpus[k++] = __perf_cpu_map__cpu(other, j++); } - while (i < orig->nr) - tmp_cpus[k++] = orig->map[i++]; + while (i < __perf_cpu_map__nr(*orig)) + tmp_cpus[k++] = __perf_cpu_map__cpu(*orig, i++); - while (j < other->nr) - tmp_cpus[k++] = other->map[j++]; + while (j < __perf_cpu_map__nr(other)) + tmp_cpus[k++] = __perf_cpu_map__cpu(other, j++); assert(k <= tmp_len); merged = cpu_map__trim_new(k, tmp_cpus); free(tmp_cpus); - perf_cpu_map__put(orig); + perf_cpu_map__put(*orig); + *orig = merged; + return 0; +} + +struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig, + struct perf_cpu_map *other) +{ + int i, j, k; + struct perf_cpu_map *merged; + + if (perf_cpu_map__is_subset(other, orig)) + return perf_cpu_map__get(orig); + if (perf_cpu_map__is_subset(orig, other)) + return perf_cpu_map__get(other); + + i = j = k = 0; + while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) { + if (__perf_cpu_map__cpu(orig, i).cpu < __perf_cpu_map__cpu(other, j).cpu) + i++; + else if (__perf_cpu_map__cpu(orig, i).cpu > __perf_cpu_map__cpu(other, j).cpu) + j++; + else { /* CPUs match. */ + i++; + j++; + k++; + } + } + if (k == 0) /* Maps are completely disjoint. */ + return NULL; + + merged = perf_cpu_map__alloc(k); + if (!merged) + return NULL; + /* Entries are added to merged in sorted order, so no need to sort again. */ + i = j = k = 0; + while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) { + if (__perf_cpu_map__cpu(orig, i).cpu < __perf_cpu_map__cpu(other, j).cpu) + i++; + else if (__perf_cpu_map__cpu(orig, i).cpu > __perf_cpu_map__cpu(other, j).cpu) + j++; + else { + j++; + RC_CHK_ACCESS(merged)->map[k++] = __perf_cpu_map__cpu(orig, i++); + } + } return merged; } diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 61b637f29b82..3ed023f4b190 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -36,20 +36,88 @@ void perf_evlist__init(struct perf_evlist *evlist) static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, struct perf_evsel *evsel) { + if (perf_cpu_map__is_empty(evsel->cpus)) { + if (perf_cpu_map__is_empty(evsel->pmu_cpus)) { + /* + * Assume the unset PMU cpus were for a system-wide + * event, like a software or tracepoint. + */ + evsel->pmu_cpus = perf_cpu_map__new_online_cpus(); + } + if (evlist->has_user_cpus && !evsel->system_wide) { + /* + * Use the user CPUs unless the evsel is set to be + * system wide, such as the dummy event. + */ + evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus); + } else { + /* + * System wide and other modes, assume the cpu map + * should be set to all PMU CPUs. + */ + evsel->cpus = perf_cpu_map__get(evsel->pmu_cpus); + } + } /* - * We already have cpus for evsel (via PMU sysfs) so - * keep it, if there's no target cpu list defined. + * Avoid "any CPU"(-1) for uncore and PMUs that require a CPU, even if + * requested. */ - if (evsel->system_wide) { + if (evsel->requires_cpu && perf_cpu_map__has_any_cpu(evsel->cpus)) { perf_cpu_map__put(evsel->cpus); - evsel->cpus = perf_cpu_map__new(NULL); - } else if (!evsel->own_cpus || evlist->has_user_cpus || - (!evsel->requires_cpu && perf_cpu_map__empty(evlist->user_requested_cpus))) { + evsel->cpus = perf_cpu_map__get(evsel->pmu_cpus); + } + + /* + * Globally requested CPUs replace user requested unless the evsel is + * set to be system wide. + */ + if (evlist->has_user_cpus && !evsel->system_wide) { + assert(!perf_cpu_map__has_any_cpu(evlist->user_requested_cpus)); + if (!perf_cpu_map__equal(evsel->cpus, evlist->user_requested_cpus)) { + perf_cpu_map__put(evsel->cpus); + evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus); + } + } + + /* Ensure cpus only references valid PMU CPUs. */ + if (!perf_cpu_map__has_any_cpu(evsel->cpus) && + !perf_cpu_map__is_subset(evsel->pmu_cpus, evsel->cpus)) { + struct perf_cpu_map *tmp = perf_cpu_map__intersect(evsel->pmu_cpus, evsel->cpus); + perf_cpu_map__put(evsel->cpus); - evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus); - } else if (evsel->cpus != evsel->own_cpus) { + evsel->cpus = tmp; + } + + /* + * Was event requested on all the PMU's CPUs but the user requested is + * any CPU (-1)? If so switch to using any CPU (-1) to reduce the number + * of events. + */ + if (!evsel->system_wide && + !evsel->requires_cpu && + perf_cpu_map__equal(evsel->cpus, evsel->pmu_cpus) && + perf_cpu_map__has_any_cpu(evlist->user_requested_cpus)) { perf_cpu_map__put(evsel->cpus); - evsel->cpus = perf_cpu_map__get(evsel->own_cpus); + evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus); + } + + /* Sanity check assert before the evsel is potentially removed. */ + assert(!evsel->requires_cpu || !perf_cpu_map__has_any_cpu(evsel->cpus)); + + /* + * Empty cpu lists would eventually get opened as "any" so remove + * genuinely empty ones before they're opened in the wrong place. + */ + if (perf_cpu_map__is_empty(evsel->cpus)) { + struct perf_evsel *next = perf_evlist__next(evlist, evsel); + + perf_evlist__remove(evlist, evsel); + /* Keep idx contiguous */ + if (next) + list_for_each_entry_from(next, &evlist->entries, node) + next->idx--; + + return; } if (evsel->system_wide) { @@ -60,16 +128,20 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, evsel->threads = perf_thread_map__get(evlist->threads); } - evlist->all_cpus = perf_cpu_map__merge(evlist->all_cpus, evsel->cpus); + perf_cpu_map__merge(&evlist->all_cpus, evsel->cpus); } static void perf_evlist__propagate_maps(struct perf_evlist *evlist) { - struct perf_evsel *evsel; + struct perf_evsel *evsel, *n; evlist->needs_map_propagation = true; - perf_evlist__for_each_evsel(evlist, evsel) + /* Clear the all_cpus set which will be merged into during propagation. */ + perf_cpu_map__put(evlist->all_cpus); + evlist->all_cpus = NULL; + + list_for_each_entry_safe(evsel, n, &evlist->entries, node) __perf_evlist__propagate_maps(evlist, evsel); } @@ -233,10 +305,10 @@ u64 perf_evlist__read_format(struct perf_evlist *evlist) static void perf_evlist__id_hash(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, u64 id) + int cpu_map_idx, int thread, u64 id) { int hash; - struct perf_sample_id *sid = SID(evsel, cpu, thread); + struct perf_sample_id *sid = SID(evsel, cpu_map_idx, thread); sid->id = id; sid->evsel = evsel; @@ -254,21 +326,27 @@ void perf_evlist__reset_id_hash(struct perf_evlist *evlist) void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, u64 id) + int cpu_map_idx, int thread, u64 id) { - perf_evlist__id_hash(evlist, evsel, cpu, thread, id); + if (!SID(evsel, cpu_map_idx, thread)) + return; + + perf_evlist__id_hash(evlist, evsel, cpu_map_idx, thread, id); evsel->id[evsel->ids++] = id; } int perf_evlist__id_add_fd(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, int fd) + int cpu_map_idx, int thread, int fd) { u64 read_data[4] = { 0, }; int id_idx = 1; /* The first entry is the counter value */ u64 id; int ret; + if (!SID(evsel, cpu_map_idx, thread)) + return -1; + ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); if (!ret) goto add; @@ -297,7 +375,7 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist, id = read_data[id_idx]; add: - perf_evlist__id_add(evlist, evsel, cpu, thread, id); + perf_evlist__id_add(evlist, evsel, cpu_map_idx, thread, id); return 0; } @@ -604,7 +682,7 @@ static int perf_evlist__nr_mmaps(struct perf_evlist *evlist) /* One for each CPU */ nr_mmaps = perf_cpu_map__nr(evlist->all_cpus); - if (perf_cpu_map__empty(evlist->all_cpus)) { + if (perf_cpu_map__has_any_cpu_or_is_empty(evlist->all_cpus)) { /* Plus one for each thread */ nr_mmaps += perf_thread_map__nr(evlist->threads); /* Minus the per-thread CPU (-1) */ @@ -638,7 +716,7 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) return -ENOMEM; - if (perf_cpu_map__empty(cpus)) + if (perf_cpu_map__has_any_cpu_or_is_empty(cpus)) return mmap_per_thread(evlist, ops, mp); return mmap_per_cpu(evlist, ops, mp); @@ -687,15 +765,14 @@ perf_evlist__next_mmap(struct perf_evlist *evlist, struct perf_mmap *map, void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader) { - struct perf_evsel *first, *last, *evsel; - - first = list_first_entry(list, struct perf_evsel, node); - last = list_last_entry(list, struct perf_evsel, node); - - leader->nr_members = last->idx - first->idx + 1; + struct perf_evsel *evsel; + int n = 0; - __perf_evlist__for_each_entry(list, evsel) + __perf_evlist__for_each_entry(list, evsel) { evsel->leader = leader; + n++; + } + leader->nr_members = n; } void perf_evlist__set_leader(struct perf_evlist *evlist) @@ -704,7 +781,32 @@ void perf_evlist__set_leader(struct perf_evlist *evlist) struct perf_evsel *first = list_entry(evlist->entries.next, struct perf_evsel, node); - evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0; __perf_evlist__set_leader(&evlist->entries, first); } } + +int perf_evlist__nr_groups(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + int nr_groups = 0; + + perf_evlist__for_each_evsel(evlist, evsel) { + /* + * evsels by default have a nr_members of 1, and they are their + * own leader. If the nr_members is >1 then this is an + * indication of a group. + */ + if (evsel->leader == evsel && evsel->nr_members > 1) + nr_groups++; + } + return nr_groups; +} + +void perf_evlist__go_system_wide(struct perf_evlist *evlist, struct perf_evsel *evsel) +{ + if (!evsel->system_wide) { + evsel->system_wide = true; + if (evlist->needs_map_propagation) + __perf_evlist__propagate_maps(evlist, evsel); + } +} diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index 8b51b008a81f..13a307fc75ae 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -5,6 +5,7 @@ #include <perf/evsel.h> #include <perf/cpumap.h> #include <perf/threadmap.h> +#include <linux/hash.h> #include <linux/list.h> #include <internal/evsel.h> #include <linux/zalloc.h> @@ -23,6 +24,7 @@ void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr, int idx) { INIT_LIST_HEAD(&evsel->node); + INIT_LIST_HEAD(&evsel->per_stream_periods); evsel->attr = *attr; evsel->idx = idx; evsel->leader = evsel; @@ -38,8 +40,19 @@ struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr) return evsel; } +void perf_evsel__exit(struct perf_evsel *evsel) +{ + assert(evsel->fd == NULL); /* If not fds were not closed. */ + assert(evsel->mmap == NULL); /* If not munmap wasn't called. */ + assert(evsel->sample_id == NULL); /* If not free_id wasn't called. */ + perf_cpu_map__put(evsel->cpus); + perf_cpu_map__put(evsel->pmu_cpus); + perf_thread_map__put(evsel->threads); +} + void perf_evsel__delete(struct perf_evsel *evsel) { + perf_evsel__exit(evsel); free(evsel); } @@ -120,7 +133,7 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, static struct perf_cpu_map *empty_cpu_map; if (empty_cpu_map == NULL) { - empty_cpu_map = perf_cpu_map__dummy_new(); + empty_cpu_map = perf_cpu_map__new_any_cpu(); if (empty_cpu_map == NULL) return -ENOMEM; } @@ -531,10 +544,56 @@ int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) void perf_evsel__free_id(struct perf_evsel *evsel) { + struct perf_sample_id_period *pos, *n; + xyarray__delete(evsel->sample_id); evsel->sample_id = NULL; zfree(&evsel->id); evsel->ids = 0; + + perf_evsel_for_each_per_thread_period_safe(evsel, n, pos) { + list_del_init(&pos->node); + free(pos); + } +} + +bool perf_evsel__attr_has_per_thread_sample_period(struct perf_evsel *evsel) +{ + return (evsel->attr.sample_type & PERF_SAMPLE_READ) && + (evsel->attr.sample_type & PERF_SAMPLE_TID) && + evsel->attr.inherit; +} + +u64 *perf_sample_id__get_period_storage(struct perf_sample_id *sid, u32 tid, bool per_thread) +{ + struct hlist_head *head; + struct perf_sample_id_period *res; + int hash; + + if (!per_thread) + return &sid->period; + + hash = hash_32(tid, PERF_SAMPLE_ID__HLIST_BITS); + head = &sid->periods[hash]; + + hlist_for_each_entry(res, head, hnode) + if (res->tid == tid) + return &res->period; + + if (sid->evsel == NULL) + return NULL; + + res = zalloc(sizeof(struct perf_sample_id_period)); + if (res == NULL) + return NULL; + + INIT_LIST_HEAD(&res->node); + res->tid = tid; + + list_add_tail(&res->node, &sid->evsel->per_stream_periods); + hlist_add_head(&res->hnode, &sid->periods[hash]); + + return &res->period; } void perf_counts_values__scale(struct perf_counts_values *count, diff --git a/tools/lib/perf/include/internal/cpumap.h b/tools/lib/perf/include/internal/cpumap.h index 35dd29642296..e2be2d17c32b 100644 --- a/tools/lib/perf/include/internal/cpumap.h +++ b/tools/lib/perf/include/internal/cpumap.h @@ -4,6 +4,7 @@ #include <linux/refcount.h> #include <perf/cpumap.h> +#include <internal/rc_check.h> /** * A sized, reference counted, sorted array of integers representing CPU @@ -12,7 +13,7 @@ * gaps if CPU numbers were used. For events associated with a pid, rather than * a CPU, a single dummy map with an entry of -1 is used. */ -struct perf_cpu_map { +DECLARE_RC_STRUCT(perf_cpu_map) { refcount_t refcnt; /** Length of the map array. */ int nr; @@ -20,11 +21,14 @@ struct perf_cpu_map { struct perf_cpu map[]; }; -#ifndef MAX_NR_CPUS -#define MAX_NR_CPUS 2048 -#endif - +struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus); int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu); bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu_map *b); +void perf_cpu_map__set_nr(struct perf_cpu_map *map, int nr_cpus); + +static inline refcount_t *perf_cpu_map__refcnt(struct perf_cpu_map *map) +{ + return &RC_CHK_ACCESS(map)->refcnt; +} #endif /* __LIBPERF_INTERNAL_CPUMAP_H */ diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h index 850f07070036..f43bdb9b6227 100644 --- a/tools/lib/perf/include/internal/evlist.h +++ b/tools/lib/perf/include/internal/evlist.h @@ -17,7 +17,6 @@ struct perf_mmap_param; struct perf_evlist { struct list_head entries; int nr_entries; - int nr_groups; bool has_user_cpus; bool needs_map_propagation; /** @@ -127,13 +126,15 @@ u64 perf_evlist__read_format(struct perf_evlist *evlist); void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, u64 id); + int cpu_map_idx, int thread, u64 id); int perf_evlist__id_add_fd(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, int fd); + int cpu_map_idx, int thread, int fd); void perf_evlist__reset_id_hash(struct perf_evlist *evlist); void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader); + +void perf_evlist__go_system_wide(struct perf_evlist *evlist, struct perf_evsel *evsel); #endif /* __LIBPERF_INTERNAL_EVLIST_H */ diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h index a99a75d9e78f..fefe64ba5e26 100644 --- a/tools/lib/perf/include/internal/evsel.h +++ b/tools/lib/perf/include/internal/evsel.h @@ -11,6 +11,32 @@ struct perf_thread_map; struct xyarray; +/** + * The per-thread accumulated period storage node. + */ +struct perf_sample_id_period { + struct list_head node; + struct hlist_node hnode; + /* Holds total ID period value for PERF_SAMPLE_READ processing. */ + u64 period; + /* The TID that the values belongs to */ + u32 tid; +}; + +/** + * perf_evsel_for_each_per_thread_period_safe - safely iterate thru all the + * per_stream_periods + * @evlist:perf_evsel instance to iterate + * @item: struct perf_sample_id_period iterator + * @tmp: struct perf_sample_id_period temp iterator + */ +#define perf_evsel_for_each_per_thread_period_safe(evsel, tmp, item) \ + list_for_each_entry_safe(item, tmp, &(evsel)->per_stream_periods, node) + + +#define PERF_SAMPLE_ID__HLIST_BITS 4 +#define PERF_SAMPLE_ID__HLIST_SIZE (1 << PERF_SAMPLE_ID__HLIST_BITS) + /* * Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are * more than one entry in the evlist. @@ -34,15 +60,46 @@ struct perf_sample_id { pid_t machine_pid; struct perf_cpu vcpu; - /* Holds total ID period value for PERF_SAMPLE_READ processing. */ - u64 period; + /* + * Per-thread, and global event counts are mutually exclusive: + * Whilst it is possible to combine events into a group with differing + * values of PERF_SAMPLE_READ, it is not valid to have inconsistent + * values for `inherit`. Therefore it is not possible to have a + * situation where a per-thread event is sampled as a global event; + * all !inherit groups are global, and all groups where the sampling + * event is inherit + PERF_SAMPLE_READ will be per-thread. Any event + * that is part of such a group that is inherit but not PERF_SAMPLE_READ + * will be read as per-thread. If such an event can also trigger a + * sample (such as with sample_period > 0) then it will not cause + * `read_format` to be included in its PERF_RECORD_SAMPLE, and + * therefore will not expose the per-thread group members as global. + */ + union { + /* + * Holds total ID period value for PERF_SAMPLE_READ processing + * (when period is not per-thread). + */ + u64 period; + /* + * Holds total ID period value for PERF_SAMPLE_READ processing + * (when period is per-thread). + */ + struct hlist_head periods[PERF_SAMPLE_ID__HLIST_SIZE]; + }; }; struct perf_evsel { struct list_head node; struct perf_event_attr attr; + /** The commonly used cpu map of CPUs the event should be opened upon, etc. */ struct perf_cpu_map *cpus; - struct perf_cpu_map *own_cpus; + /** + * The cpu map read from the PMU. For core PMUs this is the list of all + * CPUs the event can be opened upon. For other PMUs this is the default + * cpu map for opening the event on, for example, the first CPU on a + * socket for an uncore event. + */ + struct perf_cpu_map *pmu_cpus; struct perf_thread_map *threads; struct xyarray *fd; struct xyarray *mmap; @@ -51,13 +108,17 @@ struct perf_evsel { u32 ids; struct perf_evsel *leader; + /* For events where the read_format value is per-thread rather than + * global, stores the per-thread cumulative period */ + struct list_head per_stream_periods; + /* parse modifier helper */ int nr_members; /* * system_wide is for events that need to be on every CPU, irrespective - * of user requested CPUs or threads. Map propagation will set cpus to - * this event's own_cpus, whereby they will contribute to evlist - * all_cpus. + * of user requested CPUs or threads. Tha main example of this is the + * dummy event. Map propagation will set cpus for this event to all CPUs + * as software PMU events like dummy, have a CPU map that is empty. */ bool system_wide; /* @@ -65,11 +126,14 @@ struct perf_evsel { * i.e. it cannot be the 'any CPU' value of -1. */ bool requires_cpu; + /** Is the PMU for the event a core one? Effects the handling of own_cpus. */ + bool is_pmu_core; int idx; }; void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr, int idx); +void perf_evsel__exit(struct perf_evsel *evsel); int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads); void perf_evsel__close_fd(struct perf_evsel *evsel); void perf_evsel__free_fd(struct perf_evsel *evsel); @@ -79,4 +143,9 @@ int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter); int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads); void perf_evsel__free_id(struct perf_evsel *evsel); +bool perf_evsel__attr_has_per_thread_sample_period(struct perf_evsel *evsel); + +u64 *perf_sample_id__get_period_storage(struct perf_sample_id *sid, u32 tid, + bool per_thread); + #endif /* __LIBPERF_INTERNAL_EVSEL_H */ diff --git a/tools/lib/perf/include/internal/mmap.h b/tools/lib/perf/include/internal/mmap.h index 5a062af8e9d8..5f08cab61ece 100644 --- a/tools/lib/perf/include/internal/mmap.h +++ b/tools/lib/perf/include/internal/mmap.h @@ -33,7 +33,8 @@ struct perf_mmap { bool overwrite; u64 flush; libperf_unmap_cb_t unmap_cb; - char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); + void *event_copy; + size_t event_copy_sz; struct perf_mmap *next; }; diff --git a/tools/lib/perf/include/internal/rc_check.h b/tools/lib/perf/include/internal/rc_check.h new file mode 100644 index 000000000000..f80ddfc80129 --- /dev/null +++ b/tools/lib/perf/include/internal/rc_check.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __LIBPERF_INTERNAL_RC_CHECK_H +#define __LIBPERF_INTERNAL_RC_CHECK_H + +#include <stdlib.h> +#include <linux/zalloc.h> + +/* + * Enable reference count checking implicitly with leak checking, which is + * integrated into address sanitizer. + */ +#if defined(__SANITIZE_ADDRESS__) || defined(LEAK_SANITIZER) || defined(ADDRESS_SANITIZER) +#define REFCNT_CHECKING 1 +#elif defined(__has_feature) +#if __has_feature(address_sanitizer) || __has_feature(leak_sanitizer) +#define REFCNT_CHECKING 1 +#endif +#endif + +/* + * Shared reference count checking macros. + * + * Reference count checking is an approach to sanitizing the use of reference + * counted structs. It leverages address and leak sanitizers to make sure gets + * are paired with a put. Reference count checking adds a malloc-ed layer of + * indirection on a get, and frees it on a put. A missed put will be reported as + * a memory leak. A double put will be reported as a double free. Accessing + * after a put will cause a use-after-free and/or a segfault. + */ + +#ifndef REFCNT_CHECKING +/* Replaces "struct foo" so that the pointer may be interposed. */ +#define DECLARE_RC_STRUCT(struct_name) \ + struct struct_name + +/* Declare a reference counted struct variable. */ +#define RC_STRUCT(struct_name) struct struct_name + +/* + * Interpose the indirection. Result will hold the indirection and object is the + * reference counted struct. + */ +#define ADD_RC_CHK(result, object) (result = object, object) + +/* Strip the indirection layer. */ +#define RC_CHK_ACCESS(object) object + +/* Frees the object and the indirection layer. */ +#define RC_CHK_FREE(object) free(object) + +/* A get operation adding the indirection layer. */ +#define RC_CHK_GET(result, object) ADD_RC_CHK(result, object) + +/* A put operation removing the indirection layer. */ +#define RC_CHK_PUT(object) {} + +/* Pointer equality when the indirection may or may not be there. */ +#define RC_CHK_EQUAL(object1, object2) (object1 == object2) + +#else + +/* Replaces "struct foo" so that the pointer may be interposed. */ +#define DECLARE_RC_STRUCT(struct_name) \ + struct original_##struct_name; \ + struct struct_name { \ + struct original_##struct_name *orig; \ + }; \ + struct original_##struct_name + +/* Declare a reference counted struct variable. */ +#define RC_STRUCT(struct_name) struct original_##struct_name + +/* + * Interpose the indirection. Result will hold the indirection and object is the + * reference counted struct. + */ +#define ADD_RC_CHK(result, object) \ + ( \ + object ? (result = malloc(sizeof(*result)), \ + result ? (result->orig = object, result) \ + : (result = NULL, NULL)) \ + : (result = NULL, NULL) \ + ) + +/* Strip the indirection layer. */ +#define RC_CHK_ACCESS(object) object->orig + +/* Frees the object and the indirection layer. */ +#define RC_CHK_FREE(object) \ + do { \ + zfree(&object->orig); \ + free(object); \ + } while(0) + +/* A get operation adding the indirection layer. */ +#define RC_CHK_GET(result, object) ADD_RC_CHK(result, (object ? object->orig : NULL)) + +/* A put operation removing the indirection layer. */ +#define RC_CHK_PUT(object) \ + do { \ + if (object) { \ + object->orig = NULL; \ + free(object); \ + } \ + } while(0) + +/* Pointer equality when the indirection may or may not be there. */ +#define RC_CHK_EQUAL(object1, object2) (object1 == object2 || \ + (object1 && object2 && object1->orig == object2->orig)) + +#endif + +#endif /* __LIBPERF_INTERNAL_RC_CHECK_H */ diff --git a/tools/lib/perf/include/perf/core.h b/tools/lib/perf/include/perf/core.h index a3f6d68edad7..06cc132d88cf 100644 --- a/tools/lib/perf/include/perf/core.h +++ b/tools/lib/perf/include/perf/core.h @@ -5,7 +5,7 @@ #include <stdarg.h> #ifndef LIBPERF_API -#define LIBPERF_API __attribute__((visibility("default"))) +#define LIBPERF_API extern __attribute__((visibility("default"))) #endif enum libperf_print_level { diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index 3f43f770cdac..58cc5c5fa47c 100644 --- a/tools/lib/perf/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h @@ -3,35 +3,101 @@ #define __LIBPERF_CPUMAP_H #include <perf/core.h> -#include <stdio.h> #include <stdbool.h> +#include <stdint.h> /** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */ struct perf_cpu { - int cpu; + int16_t cpu; +}; + +struct perf_cache { + int cache_lvl; + int cache; }; struct perf_cpu_map; -LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void); -LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void); +/** + * perf_cpu_map__new_any_cpu - a map with a singular "any CPU"/dummy -1 value. + */ +LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_any_cpu(void); +/** + * perf_cpu_map__new_online_cpus - a map read from + * /sys/devices/system/cpu/online if + * available. If reading wasn't possible a map + * is created using the online processors + * assuming the first 'n' processors are all + * online. + */ +LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_online_cpus(void); +/** + * perf_cpu_map__new - create a map from the given cpu_list such as "0-7". If no + * cpu_list argument is provided then + * perf_cpu_map__new_online_cpus is returned. + */ LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list); -LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file); +/** perf_cpu_map__new_int - create a map with the one given cpu. */ +LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_int(int cpu); LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map); -LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, - struct perf_cpu_map *other); +LIBPERF_API int perf_cpu_map__merge(struct perf_cpu_map **orig, + struct perf_cpu_map *other); +LIBPERF_API struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig, + struct perf_cpu_map *other); LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map); +/** + * perf_cpu_map__cpu - get the CPU value at the given index. Returns -1 if index + * is invalid. + */ LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); +/** + * perf_cpu_map__nr - for an empty map returns 1, as perf_cpu_map__cpu returns a + * cpu of -1 for an invalid index, this makes an empty map + * look like it contains the "any CPU"/dummy value. Otherwise + * the result is the number CPUs in the map plus one if the + * "any CPU"/dummy value is present. + */ LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus); -LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map); +/** + * perf_cpu_map__has_any_cpu_or_is_empty - is map either empty or has the "any CPU"/dummy value. + */ +LIBPERF_API bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map); +/** + * perf_cpu_map__is_any_cpu_or_is_empty - is map either empty or the "any CPU"/dummy value. + */ +LIBPERF_API bool perf_cpu_map__is_any_cpu_or_is_empty(const struct perf_cpu_map *map); +/** + * perf_cpu_map__is_empty - does the map contain no values and it doesn't + * contain the special "any CPU"/dummy value. + */ +LIBPERF_API bool perf_cpu_map__is_empty(const struct perf_cpu_map *map); +/** + * perf_cpu_map__min - the minimum CPU value or -1 if empty or just the "any CPU"/dummy value. + */ +LIBPERF_API struct perf_cpu perf_cpu_map__min(const struct perf_cpu_map *map); +/** + * perf_cpu_map__max - the maximum CPU value or -1 if empty or just the "any CPU"/dummy value. + */ LIBPERF_API struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map); LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu); +LIBPERF_API bool perf_cpu_map__equal(const struct perf_cpu_map *lhs, + const struct perf_cpu_map *rhs); +/** + * perf_cpu_map__any_cpu - Does the map contain the "any CPU"/dummy -1 value? + */ +LIBPERF_API bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map); #define perf_cpu_map__for_each_cpu(cpu, idx, cpus) \ for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx); \ (idx) < perf_cpu_map__nr(cpus); \ (idx)++, (cpu) = perf_cpu_map__cpu(cpus, idx)) +#define perf_cpu_map__for_each_cpu_skip_any(_cpu, idx, cpus) \ + for ((idx) = 0, (_cpu) = perf_cpu_map__cpu(cpus, idx); \ + (idx) < perf_cpu_map__nr(cpus); \ + (idx)++, (_cpu) = perf_cpu_map__cpu(cpus, idx)) \ + if ((_cpu).cpu != -1) + #define perf_cpu_map__for_each_idx(idx, cpus) \ for ((idx) = 0; (idx) < perf_cpu_map__nr(cpus); (idx)++) diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index ad47d7b31046..43a8cb04994f 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -70,11 +70,19 @@ struct perf_record_lost { __u64 lost; }; +#define PERF_RECORD_MISC_LOST_SAMPLES_BPF (1 << 15) + struct perf_record_lost_samples { struct perf_event_header header; __u64 lost; }; +#define MAX_ID_HDR_ENTRIES 6 +struct perf_record_lost_samples_and_ids { + struct perf_record_lost_samples lost; + __u64 sample_ids[MAX_ID_HDR_ENTRIES]; +}; + /* * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID | PERF_FORMAT_LOST */ @@ -143,12 +151,34 @@ struct perf_record_switch { __u32 next_prev_tid; }; +struct perf_record_callchain_deferred { + struct perf_event_header header; + /* + * This is to match kernel and (deferred) user stacks together. + * The kernel part will be in the sample callchain array after + * the PERF_CONTEXT_USER_DEFERRED entry. + */ + __u64 cookie; + __u64 nr; + __u64 ips[]; +}; + struct perf_record_header_attr { struct perf_event_header header; struct perf_event_attr attr; - __u64 id[]; + /* + * Array of u64 id follows here but we cannot use a flexible array + * because size of attr in the data can be different then current + * version. Please use perf_record_header_attr_id() below. + * + * __u64 id[]; // do not use this + */ }; +/* Returns the pointer to id array based on the actual attr size. */ +#define perf_record_header_attr_id(evt) \ + ((void *)&(evt)->attr.attr + (evt)->attr.attr.size) + enum { PERF_CPU_MAP__CPUS = 0, PERF_CPU_MAP__MASK = 1, @@ -273,6 +303,7 @@ struct perf_record_header_event_type { struct perf_record_header_tracing_data { struct perf_event_header header; __u32 size; + __u32 pad; }; #define PERF_RECORD_MISC_BUILD_ID_SIZE (1 << 15) @@ -378,7 +409,8 @@ enum { PERF_STAT_CONFIG_TERM__AGGR_MODE = 0, PERF_STAT_CONFIG_TERM__INTERVAL = 1, PERF_STAT_CONFIG_TERM__SCALE = 2, - PERF_STAT_CONFIG_TERM__MAX = 3, + PERF_STAT_CONFIG_TERM__AGGR_LEVEL = 3, + PERF_STAT_CONFIG_TERM__MAX = 4, }; struct perf_record_stat_config_entry { @@ -438,6 +470,32 @@ struct perf_record_compressed { char data[]; }; +/* + * `header.size` includes the padding we are going to add while writing the record. + * `data_size` only includes the size of `data[]` itself. + */ +struct perf_record_compressed2 { + struct perf_event_header header; + __u64 data_size; + char data[]; +}; + +#define BPF_METADATA_KEY_LEN 64 +#define BPF_METADATA_VALUE_LEN 256 +#define BPF_PROG_NAME_LEN KSYM_NAME_LEN + +struct perf_record_bpf_metadata_entry { + char key[BPF_METADATA_KEY_LEN]; + char value[BPF_METADATA_VALUE_LEN]; +}; + +struct perf_record_bpf_metadata { + struct perf_event_header header; + char prog_name[BPF_PROG_NAME_LEN]; + __u64 nr_entries; + struct perf_record_bpf_metadata_entry entries[]; +}; + enum perf_user_event_type { /* above any possible kernel type */ PERF_RECORD_USER_TYPE_START = 64, PERF_RECORD_HEADER_ATTR = 64, @@ -459,6 +517,8 @@ enum perf_user_event_type { /* above any possible kernel type */ PERF_RECORD_HEADER_FEATURE = 80, PERF_RECORD_COMPRESSED = 81, PERF_RECORD_FINISHED_INIT = 82, + PERF_RECORD_COMPRESSED2 = 83, + PERF_RECORD_BPF_METADATA = 84, PERF_RECORD_HEADER_MAX }; @@ -475,6 +535,7 @@ union perf_event { struct perf_record_read read; struct perf_record_throttle throttle; struct perf_record_sample sample; + struct perf_record_callchain_deferred callchain_deferred; struct perf_record_bpf_event bpf; struct perf_record_ksymbol ksymbol; struct perf_record_text_poke_event text_poke; @@ -499,6 +560,8 @@ union perf_event { struct perf_record_time_conv time_conv; struct perf_record_header_feature feat; struct perf_record_compressed pack; + struct perf_record_compressed2 pack2; + struct perf_record_bpf_metadata bpf_metadata; }; #endif /* __LIBPERF_EVENT_H */ diff --git a/tools/lib/perf/include/perf/evlist.h b/tools/lib/perf/include/perf/evlist.h index 9ca399d49bb4..e894b770779e 100644 --- a/tools/lib/perf/include/perf/evlist.h +++ b/tools/lib/perf/include/perf/evlist.h @@ -47,4 +47,5 @@ LIBPERF_API struct perf_mmap *perf_evlist__next_mmap(struct perf_evlist *evlist, (pos) = perf_evlist__next_mmap((evlist), (pos), overwrite)) LIBPERF_API void perf_evlist__set_leader(struct perf_evlist *evlist); +LIBPERF_API int perf_evlist__nr_groups(struct perf_evlist *evlist); #endif /* __LIBPERF_EVLIST_H */ diff --git a/tools/lib/perf/include/perf/threadmap.h b/tools/lib/perf/include/perf/threadmap.h index 8b40e7777cea..44deb815b817 100644 --- a/tools/lib/perf/include/perf/threadmap.h +++ b/tools/lib/perf/include/perf/threadmap.h @@ -14,6 +14,7 @@ LIBPERF_API void perf_thread_map__set_pid(struct perf_thread_map *map, int idx, LIBPERF_API char *perf_thread_map__comm(struct perf_thread_map *map, int idx); LIBPERF_API int perf_thread_map__nr(struct perf_thread_map *threads); LIBPERF_API pid_t perf_thread_map__pid(struct perf_thread_map *map, int idx); +LIBPERF_API int perf_thread_map__idx(struct perf_thread_map *map, pid_t pid); LIBPERF_API struct perf_thread_map *perf_thread_map__get(struct perf_thread_map *map); LIBPERF_API void perf_thread_map__put(struct perf_thread_map *map); diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map index 190b56ae923a..fdd8304fe9d0 100644 --- a/tools/lib/perf/libperf.map +++ b/tools/lib/perf/libperf.map @@ -1,15 +1,18 @@ LIBPERF_0.0.1 { global: libperf_init; - perf_cpu_map__dummy_new; - perf_cpu_map__default_new; + perf_cpu_map__new_any_cpu; + perf_cpu_map__new_online_cpus; perf_cpu_map__get; perf_cpu_map__put; perf_cpu_map__new; - perf_cpu_map__read; perf_cpu_map__nr; perf_cpu_map__cpu; - perf_cpu_map__empty; + perf_cpu_map__has_any_cpu_or_is_empty; + perf_cpu_map__is_any_cpu_or_is_empty; + perf_cpu_map__is_empty; + perf_cpu_map__has_any_cpu; + perf_cpu_map__min; perf_cpu_map__max; perf_cpu_map__has; perf_thread_map__new_array; diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c index 0d1634cedf44..ec124eb0ec0a 100644 --- a/tools/lib/perf/mmap.c +++ b/tools/lib/perf/mmap.c @@ -19,6 +19,7 @@ void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev, bool overwrite, libperf_unmap_cb_t unmap_cb) { + /* Assume fields were zero initialized. */ map->fd = -1; map->overwrite = overwrite; map->unmap_cb = unmap_cb; @@ -51,13 +52,18 @@ int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, void perf_mmap__munmap(struct perf_mmap *map) { - if (map && map->base != NULL) { + if (!map) + return; + + zfree(&map->event_copy); + map->event_copy_sz = 0; + if (map->base) { munmap(map->base, perf_mmap__mmap_len(map)); map->base = NULL; map->fd = -1; refcount_set(&map->refcnt, 0); } - if (map && map->unmap_cb) + if (map->unmap_cb) map->unmap_cb(map); } @@ -223,9 +229,17 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map, */ if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) { unsigned int offset = *startp; - unsigned int len = min(sizeof(*event), size), cpy; + unsigned int len = size, cpy; void *dst = map->event_copy; + if (size > map->event_copy_sz) { + dst = realloc(map->event_copy, size); + if (!dst) + return NULL; + map->event_copy = dst; + map->event_copy_sz = size; + } + do { cpy = min(map->mask + 1 - (offset & map->mask), len); memcpy(dst, &data[offset & map->mask], cpy); @@ -265,7 +279,7 @@ union perf_event *perf_mmap__read_event(struct perf_mmap *map) if (!refcount_read(&map->refcnt)) return NULL; - /* non-overwirte doesn't pause the ringbuffer */ + /* non-overwrite doesn't pause the ringbuffer */ if (!map->overwrite) map->end = perf_mmap__read_head(map); @@ -392,6 +406,72 @@ static u64 read_perf_counter(unsigned int counter) static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); } +/* __riscv_xlen contains the witdh of the native base integer, here 64-bit */ +#elif defined(__riscv) && __riscv_xlen == 64 + +/* TODO: implement rv32 support */ + +#define CSR_CYCLE 0xc00 +#define CSR_TIME 0xc01 + +#define csr_read(csr) \ +({ \ + register unsigned long __v; \ + __asm__ __volatile__ ("csrr %0, %1" \ + : "=r" (__v) \ + : "i" (csr) : ); \ + __v; \ +}) + +static unsigned long csr_read_num(int csr_num) +{ +#define switchcase_csr_read(__csr_num, __val) {\ + case __csr_num: \ + __val = csr_read(__csr_num); \ + break; } +#define switchcase_csr_read_2(__csr_num, __val) {\ + switchcase_csr_read(__csr_num + 0, __val) \ + switchcase_csr_read(__csr_num + 1, __val)} +#define switchcase_csr_read_4(__csr_num, __val) {\ + switchcase_csr_read_2(__csr_num + 0, __val) \ + switchcase_csr_read_2(__csr_num + 2, __val)} +#define switchcase_csr_read_8(__csr_num, __val) {\ + switchcase_csr_read_4(__csr_num + 0, __val) \ + switchcase_csr_read_4(__csr_num + 4, __val)} +#define switchcase_csr_read_16(__csr_num, __val) {\ + switchcase_csr_read_8(__csr_num + 0, __val) \ + switchcase_csr_read_8(__csr_num + 8, __val)} +#define switchcase_csr_read_32(__csr_num, __val) {\ + switchcase_csr_read_16(__csr_num + 0, __val) \ + switchcase_csr_read_16(__csr_num + 16, __val)} + + unsigned long ret = 0; + + switch (csr_num) { + switchcase_csr_read_32(CSR_CYCLE, ret) + default: + break; + } + + return ret; +#undef switchcase_csr_read_32 +#undef switchcase_csr_read_16 +#undef switchcase_csr_read_8 +#undef switchcase_csr_read_4 +#undef switchcase_csr_read_2 +#undef switchcase_csr_read +} + +static u64 read_perf_counter(unsigned int counter) +{ + return csr_read_num(CSR_CYCLE + counter); +} + +static u64 read_timestamp(void) +{ + return csr_read_num(CSR_TIME); +} + #else static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; } static u64 read_timestamp(void) { return 0; } @@ -428,7 +508,7 @@ int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count idx = READ_ONCE(pc->index); cnt = READ_ONCE(pc->offset); if (pc->cap_user_rdpmc && idx) { - s64 evcnt = read_perf_counter(idx - 1); + u64 evcnt = read_perf_counter(idx - 1); u16 width = READ_ONCE(pc->pmc_width); evcnt <<= 64 - width; diff --git a/tools/lib/perf/tests/test-cpumap.c b/tools/lib/perf/tests/test-cpumap.c index 87b0510a556f..c998b1dae863 100644 --- a/tools/lib/perf/tests/test-cpumap.c +++ b/tools/lib/perf/tests/test-cpumap.c @@ -21,7 +21,7 @@ int test_cpumap(int argc, char **argv) libperf_init(libperf_print); - cpus = perf_cpu_map__dummy_new(); + cpus = perf_cpu_map__new_any_cpu(); if (!cpus) return -1; @@ -29,7 +29,7 @@ int test_cpumap(int argc, char **argv) perf_cpu_map__put(cpus); perf_cpu_map__put(cpus); - cpus = perf_cpu_map__default_new(); + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) return -1; diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index ed616fc19b4f..10f70cb41ff1 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -46,7 +46,7 @@ static int test_stat_cpu(void) }; int err, idx; - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); __T("failed to create cpus", cpus); evlist = perf_evlist__new(); @@ -261,7 +261,7 @@ static int test_mmap_thread(void) threads = perf_thread_map__new_dummy(); __T("failed to create threads", threads); - cpus = perf_cpu_map__dummy_new(); + cpus = perf_cpu_map__new_any_cpu(); __T("failed to create cpus", cpus); perf_thread_map__set_pid(threads, 0, pid); @@ -350,7 +350,7 @@ static int test_mmap_cpus(void) attr.config = id; - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); __T("failed to create cpus", cpus); evlist = perf_evlist__new(); diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c index a11fc51bfb68..545ec3150546 100644 --- a/tools/lib/perf/tests/test-evsel.c +++ b/tools/lib/perf/tests/test-evsel.c @@ -27,7 +27,7 @@ static int test_stat_cpu(void) }; int err, idx; - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); __T("failed to create cpus", cpus); evsel = perf_evsel__new(&attr); diff --git a/tools/lib/perf/threadmap.c b/tools/lib/perf/threadmap.c index 07968f3ea093..db431b036f57 100644 --- a/tools/lib/perf/threadmap.c +++ b/tools/lib/perf/threadmap.c @@ -97,5 +97,22 @@ int perf_thread_map__nr(struct perf_thread_map *threads) pid_t perf_thread_map__pid(struct perf_thread_map *map, int idx) { + if (!map) { + assert(idx == 0); + return -1; + } + return map->map[idx].pid; } + +int perf_thread_map__idx(struct perf_thread_map *threads, pid_t pid) +{ + if (!threads) + return pid == -1 ? 0 : -1; + + for (int i = 0; i < threads->nr; ++i) { + if (threads->map[i].pid == pid) + return i; + } + return -1; +} |
