summaryrefslogtreecommitdiff
path: root/tools/perf/util
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/util')
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.c1
-rw-r--r--tools/perf/util/env.c8
-rw-r--r--tools/perf/util/evlist.c43
-rw-r--r--tools/perf/util/evlist.h2
-rw-r--r--tools/perf/util/evsel.c84
-rw-r--r--tools/perf/util/header.c22
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c20
-rw-r--r--tools/perf/util/machine.c32
-rw-r--r--tools/perf/util/map.c20
-rw-r--r--tools/perf/util/map.h4
-rw-r--r--tools/perf/util/pmu.c10
11 files changed, 152 insertions, 94 deletions
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index ba4c623cd8de..39fe21e1cf93 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -387,6 +387,7 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
break;
case OCSD_INSTR_ISB:
case OCSD_INSTR_DSB_DMB:
+ case OCSD_INSTR_WFI_WFE:
case OCSD_INSTR_OTHER:
default:
packet->last_instr_taken_branch = false;
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index c6351b557bb0..9494f9dc61ec 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -57,9 +57,11 @@ struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
else if (prog_id > node->info_linear->info.id)
n = n->rb_right;
else
- break;
+ goto out;
}
+ node = NULL;
+out:
up_read(&env->bpf_progs.lock);
return node;
}
@@ -109,10 +111,12 @@ struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id)
else if (btf_id > node->id)
n = n->rb_right;
else
- break;
+ goto out;
}
+ node = NULL;
up_read(&env->bpf_progs.lock);
+out:
return node;
}
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index ec78e93085de..51ead577533f 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -231,35 +231,6 @@ void perf_evlist__set_leader(struct perf_evlist *evlist)
}
}
-void perf_event_attr__set_max_precise_ip(struct perf_event_attr *pattr)
-{
- struct perf_event_attr attr = {
- .type = PERF_TYPE_HARDWARE,
- .config = PERF_COUNT_HW_CPU_CYCLES,
- .exclude_kernel = 1,
- .precise_ip = 3,
- };
-
- event_attr_init(&attr);
-
- /*
- * Unnamed union member, not supported as struct member named
- * initializer in older compilers such as gcc 4.4.7
- */
- attr.sample_period = 1;
-
- while (attr.precise_ip != 0) {
- int fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
- if (fd != -1) {
- close(fd);
- break;
- }
- --attr.precise_ip;
- }
-
- pattr->precise_ip = attr.precise_ip;
-}
-
int __perf_evlist__add_default(struct perf_evlist *evlist, bool precise)
{
struct perf_evsel *evsel = perf_evsel__new_cycles(precise);
@@ -1897,12 +1868,12 @@ static void *perf_evlist__poll_thread(void *arg)
{
struct perf_evlist *evlist = arg;
bool draining = false;
- int i;
+ int i, done = 0;
+
+ while (!done) {
+ bool got_data = false;
- while (draining || !(evlist->thread.done)) {
- if (draining)
- draining = false;
- else if (evlist->thread.done)
+ if (evlist->thread.done)
draining = true;
if (!draining)
@@ -1923,9 +1894,13 @@ static void *perf_evlist__poll_thread(void *arg)
pr_warning("cannot locate proper evsel for the side band event\n");
perf_mmap__consume(map);
+ got_data = true;
}
perf_mmap__read_done(map);
}
+
+ if (draining && !got_data)
+ break;
}
return NULL;
}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index dcb68f34d2cd..6a94785b9100 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -315,8 +315,6 @@ void perf_evlist__to_front(struct perf_evlist *evlist,
void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
struct perf_evsel *tracking_evsel);
-void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr);
-
struct perf_evsel *
perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, const char *str);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 7835e05f0c0a..966360844fff 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -295,7 +295,6 @@ struct perf_evsel *perf_evsel__new_cycles(bool precise)
if (!precise)
goto new_event;
- perf_event_attr__set_max_precise_ip(&attr);
/*
* Now let the usual logic to set up the perf_event_attr defaults
* to kick in when we return and before perf_evsel__open() is called.
@@ -305,6 +304,8 @@ new_event:
if (evsel == NULL)
goto out;
+ evsel->precise_max = true;
+
/* use asprintf() because free(evsel) assumes name is allocated */
if (asprintf(&evsel->name, "cycles%s%s%.*s",
(attr.precise_ip || attr.exclude_kernel) ? ":" : "",
@@ -1083,7 +1084,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
}
if (evsel->precise_max)
- perf_event_attr__set_max_precise_ip(attr);
+ attr->precise_ip = 3;
if (opts->all_user) {
attr->exclude_kernel = 1;
@@ -1749,6 +1750,59 @@ static bool ignore_missing_thread(struct perf_evsel *evsel,
return true;
}
+static void display_attr(struct perf_event_attr *attr)
+{
+ if (verbose >= 2) {
+ fprintf(stderr, "%.60s\n", graph_dotted_line);
+ fprintf(stderr, "perf_event_attr:\n");
+ perf_event_attr__fprintf(stderr, attr, __open_attr__fprintf, NULL);
+ fprintf(stderr, "%.60s\n", graph_dotted_line);
+ }
+}
+
+static int perf_event_open(struct perf_evsel *evsel,
+ pid_t pid, int cpu, int group_fd,
+ unsigned long flags)
+{
+ int precise_ip = evsel->attr.precise_ip;
+ int fd;
+
+ while (1) {
+ pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx",
+ pid, cpu, group_fd, flags);
+
+ fd = sys_perf_event_open(&evsel->attr, pid, cpu, group_fd, flags);
+ if (fd >= 0)
+ break;
+
+ /*
+ * Do quick precise_ip fallback if:
+ * - there is precise_ip set in perf_event_attr
+ * - maximum precise is requested
+ * - sys_perf_event_open failed with ENOTSUP error,
+ * which is associated with wrong precise_ip
+ */
+ if (!precise_ip || !evsel->precise_max || (errno != ENOTSUP))
+ break;
+
+ /*
+ * We tried all the precise_ip values, and it's
+ * still failing, so leave it to standard fallback.
+ */
+ if (!evsel->attr.precise_ip) {
+ evsel->attr.precise_ip = precise_ip;
+ break;
+ }
+
+ pr_debug2("\nsys_perf_event_open failed, error %d\n", -ENOTSUP);
+ evsel->attr.precise_ip--;
+ pr_debug2("decreasing precise_ip by one (%d)\n", evsel->attr.precise_ip);
+ display_attr(&evsel->attr);
+ }
+
+ return fd;
+}
+
int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
struct thread_map *threads)
{
@@ -1824,12 +1878,7 @@ retry_sample_id:
if (perf_missing_features.sample_id_all)
evsel->attr.sample_id_all = 0;
- if (verbose >= 2) {
- fprintf(stderr, "%.60s\n", graph_dotted_line);
- fprintf(stderr, "perf_event_attr:\n");
- perf_event_attr__fprintf(stderr, &evsel->attr, __open_attr__fprintf, NULL);
- fprintf(stderr, "%.60s\n", graph_dotted_line);
- }
+ display_attr(&evsel->attr);
for (cpu = 0; cpu < cpus->nr; cpu++) {
@@ -1841,13 +1890,10 @@ retry_sample_id:
group_fd = get_group_fd(evsel, cpu, thread);
retry_open:
- pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx",
- pid, cpus->map[cpu], group_fd, flags);
-
test_attr__ready();
- fd = sys_perf_event_open(&evsel->attr, pid, cpus->map[cpu],
- group_fd, flags);
+ fd = perf_event_open(evsel, pid, cpus->map[cpu],
+ group_fd, flags);
FD(evsel, cpu, thread) = fd;
@@ -2322,7 +2368,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
if (data->user_regs.abi) {
u64 mask = evsel->attr.sample_regs_user;
- sz = hweight_long(mask) * sizeof(u64);
+ sz = hweight64(mask) * sizeof(u64);
OVERFLOW_CHECK(array, sz, max_size);
data->user_regs.mask = mask;
data->user_regs.regs = (u64 *)array;
@@ -2378,7 +2424,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
if (data->intr_regs.abi != PERF_SAMPLE_REGS_ABI_NONE) {
u64 mask = evsel->attr.sample_regs_intr;
- sz = hweight_long(mask) * sizeof(u64);
+ sz = hweight64(mask) * sizeof(u64);
OVERFLOW_CHECK(array, sz, max_size);
data->intr_regs.mask = mask;
data->intr_regs.regs = (u64 *)array;
@@ -2506,7 +2552,7 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
if (type & PERF_SAMPLE_REGS_USER) {
if (sample->user_regs.abi) {
result += sizeof(u64);
- sz = hweight_long(sample->user_regs.mask) * sizeof(u64);
+ sz = hweight64(sample->user_regs.mask) * sizeof(u64);
result += sz;
} else {
result += sizeof(u64);
@@ -2534,7 +2580,7 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
if (type & PERF_SAMPLE_REGS_INTR) {
if (sample->intr_regs.abi) {
result += sizeof(u64);
- sz = hweight_long(sample->intr_regs.mask) * sizeof(u64);
+ sz = hweight64(sample->intr_regs.mask) * sizeof(u64);
result += sz;
} else {
result += sizeof(u64);
@@ -2664,7 +2710,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type,
if (type & PERF_SAMPLE_REGS_USER) {
if (sample->user_regs.abi) {
*array++ = sample->user_regs.abi;
- sz = hweight_long(sample->user_regs.mask) * sizeof(u64);
+ sz = hweight64(sample->user_regs.mask) * sizeof(u64);
memcpy(array, sample->user_regs.regs, sz);
array = (void *)array + sz;
} else {
@@ -2700,7 +2746,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type,
if (type & PERF_SAMPLE_REGS_INTR) {
if (sample->intr_regs.abi) {
*array++ = sample->intr_regs.abi;
- sz = hweight_long(sample->intr_regs.mask) * sizeof(u64);
+ sz = hweight64(sample->intr_regs.mask) * sizeof(u64);
memcpy(array, sample->intr_regs.regs, sz);
array = (void *)array + sz;
} else {
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index b9e693825873..2d2af2ac2b1e 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -2606,6 +2606,7 @@ static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused)
perf_env__insert_bpf_prog_info(env, info_node);
}
+ up_write(&env->bpf_progs.lock);
return 0;
out:
free(info_linear);
@@ -2623,7 +2624,9 @@ static int process_bpf_prog_info(struct feat_fd *ff __maybe_unused, void *data _
static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused)
{
struct perf_env *env = &ff->ph->env;
+ struct btf_node *node = NULL;
u32 count, i;
+ int err = -1;
if (ff->ph->needs_swap) {
pr_warning("interpreting btf from systems with endianity is not yet supported\n");
@@ -2636,31 +2639,32 @@ static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused)
down_write(&env->bpf_progs.lock);
for (i = 0; i < count; ++i) {
- struct btf_node *node;
u32 id, data_size;
if (do_read_u32(ff, &id))
- return -1;
+ goto out;
if (do_read_u32(ff, &data_size))
- return -1;
+ goto out;
node = malloc(sizeof(struct btf_node) + data_size);
if (!node)
- return -1;
+ goto out;
node->id = id;
node->data_size = data_size;
- if (__do_read(ff, node->data, data_size)) {
- free(node);
- return -1;
- }
+ if (__do_read(ff, node->data, data_size))
+ goto out;
perf_env__insert_btf(env, node);
+ node = NULL;
}
+ err = 0;
+out:
up_write(&env->bpf_progs.lock);
- return 0;
+ free(node);
+ return err;
}
struct feature_ops {
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 6e03db142091..872fab163585 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -251,19 +251,15 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
if (!(decoder->tsc_ctc_ratio_n % decoder->tsc_ctc_ratio_d))
decoder->tsc_ctc_mult = decoder->tsc_ctc_ratio_n /
decoder->tsc_ctc_ratio_d;
-
- /*
- * Allow for timestamps appearing to backwards because a TSC
- * packet has slipped past a MTC packet, so allow 2 MTC ticks
- * or ...
- */
- decoder->tsc_slip = multdiv(2 << decoder->mtc_shift,
- decoder->tsc_ctc_ratio_n,
- decoder->tsc_ctc_ratio_d);
}
- /* ... or 0x100 paranoia */
- if (decoder->tsc_slip < 0x100)
- decoder->tsc_slip = 0x100;
+
+ /*
+ * A TSC packet can slip past MTC packets so that the timestamp appears
+ * to go backwards. One estimate is that can be up to about 40 CPU
+ * cycles, which is certainly less than 0x1000 TSC ticks, but accept
+ * slippage an order of magnitude more to be on the safe side.
+ */
+ decoder->tsc_slip = 0x10000;
intel_pt_log("timestamp: mtc_shift %u\n", decoder->mtc_shift);
intel_pt_log("timestamp: tsc_ctc_ratio_n %u\n", decoder->tsc_ctc_ratio_n);
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 61959aba7e27..3c520baa198c 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1421,6 +1421,20 @@ static void machine__set_kernel_mmap(struct machine *machine,
machine->vmlinux_map->end = ~0ULL;
}
+static void machine__update_kernel_mmap(struct machine *machine,
+ u64 start, u64 end)
+{
+ struct map *map = machine__kernel_map(machine);
+
+ map__get(map);
+ map_groups__remove(&machine->kmaps, map);
+
+ machine__set_kernel_mmap(machine, start, end);
+
+ map_groups__insert(&machine->kmaps, map);
+ map__put(map);
+}
+
int machine__create_kernel_maps(struct machine *machine)
{
struct dso *kernel = machine__get_kernel(machine);
@@ -1453,17 +1467,11 @@ int machine__create_kernel_maps(struct machine *machine)
goto out_put;
}
- /* we have a real start address now, so re-order the kmaps */
- map = machine__kernel_map(machine);
-
- map__get(map);
- map_groups__remove(&machine->kmaps, map);
-
- /* assume it's the last in the kmaps */
- machine__set_kernel_mmap(machine, addr, ~0ULL);
-
- map_groups__insert(&machine->kmaps, map);
- map__put(map);
+ /*
+ * we have a real start address now, so re-order the kmaps
+ * assume it's the last in the kmaps
+ */
+ machine__update_kernel_mmap(machine, addr, ~0ULL);
}
if (machine__create_extra_kernel_maps(machine, kernel))
@@ -1599,7 +1607,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
if (strstr(kernel->long_name, "vmlinux"))
dso__set_short_name(kernel, "[kernel.vmlinux]", false);
- machine__set_kernel_mmap(machine, event->mmap.start,
+ machine__update_kernel_mmap(machine, event->mmap.start,
event->mmap.start + event->mmap.len);
/*
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index e32628cd20a7..ee71efb9db62 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -261,6 +261,22 @@ bool __map__is_extra_kernel_map(const struct map *map)
return kmap && kmap->name[0];
}
+bool __map__is_bpf_prog(const struct map *map)
+{
+ const char *name;
+
+ if (map->dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO)
+ return true;
+
+ /*
+ * If PERF_RECORD_BPF_EVENT is not included, the dso will not have
+ * type of DSO_BINARY_TYPE__BPF_PROG_INFO. In such cases, we can
+ * guess the type based on name.
+ */
+ name = map->dso->short_name;
+ return name && (strstr(name, "bpf_prog_") == name);
+}
+
bool map__has_symbols(const struct map *map)
{
return dso__has_symbols(map->dso);
@@ -910,10 +926,8 @@ static void __maps__insert_name(struct maps *maps, struct map *map)
rc = strcmp(m->dso->short_name, map->dso->short_name);
if (rc < 0)
p = &(*p)->rb_left;
- else if (rc > 0)
- p = &(*p)->rb_right;
else
- return;
+ p = &(*p)->rb_right;
}
rb_link_node(&map->rb_node_name, parent, p);
rb_insert_color(&map->rb_node_name, &maps->names);
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 0e20749f2c55..dc93787c74f0 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -159,10 +159,12 @@ int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name,
bool __map__is_kernel(const struct map *map);
bool __map__is_extra_kernel_map(const struct map *map);
+bool __map__is_bpf_prog(const struct map *map);
static inline bool __map__is_kmodule(const struct map *map)
{
- return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map);
+ return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map) &&
+ !__map__is_bpf_prog(map);
}
bool map__has_symbols(const struct map *map);
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 6199a3174ab9..e0429f4ef335 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -732,10 +732,20 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
if (!is_arm_pmu_core(name)) {
pname = pe->pmu ? pe->pmu : "cpu";
+
+ /*
+ * uncore alias may be from different PMU
+ * with common prefix
+ */
+ if (pmu_is_uncore(name) &&
+ !strncmp(pname, name, strlen(pname)))
+ goto new_alias;
+
if (strcmp(pname, name))
continue;
}
+new_alias:
/* need type casts to override 'const' */
__perf_pmu__new_alias(head, NULL, (char *)pe->name,
(char *)pe->desc, (char *)pe->event,