diff options
128 files changed, 5791 insertions, 994 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index c6c34d04ce95..2c9a92979f4f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12773,6 +12773,13 @@ F: arch/*/events/* F: arch/*/events/*/* F: tools/perf/ +PERFORMANCE EVENTS SUBSYSTEM ARM64 PMU EVENTS +R: John Garry <john.garry@huawei.com> +R: Will Deacon <will@kernel.org> +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +S: Supported +F: tools/perf/pmu-events/arch/arm64/ + PERSONALITY HANDLING M: Christoph Hellwig <hch@infradead.org> L: linux-abi-devel@lists.sourceforge.net diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index ca92e01d0bd1..48604625ab31 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -96,7 +96,7 @@ static inline unsigned long perf_ip_adjust(struct pt_regs *regs) { return 0; } -static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { } +static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *regs, u64 *addrp) { } static inline u32 perf_get_misc_flags(struct pt_regs *regs) { return 0; @@ -127,7 +127,7 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw) static inline void power_pmu_bhrb_enable(struct perf_event *event) {} static inline void power_pmu_bhrb_disable(struct perf_event *event) {} static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {} -static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {} +static inline void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw) {} static void pmao_restore_workaround(bool ebb) { } #endif /* CONFIG_PPC32 */ @@ -179,7 +179,7 @@ static inline unsigned long perf_ip_adjust(struct pt_regs *regs) * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC, the * [POWER7P_]MMCRA_SDAR_VALID bit in MMCRA, or the SDAR_VALID bit in SIER. */ -static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) +static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *regs, u64 *addrp) { unsigned long mmcra = regs->dsisr; bool sdar_valid; @@ -204,8 +204,7 @@ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid) *addrp = mfspr(SPRN_SDAR); - if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) && - is_kernel_addr(mfspr(SPRN_SDAR))) + if (is_kernel_addr(mfspr(SPRN_SDAR)) && perf_allow_kernel(&event->attr) != 0) *addrp = 0; } @@ -444,7 +443,7 @@ static __u64 power_pmu_bhrb_to(u64 addr) } /* Processing BHRB entries */ -static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) +static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw) { u64 val; u64 addr; @@ -472,8 +471,7 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) * exporting it to userspace (avoid exposure of regions * where we could have speculative execution) */ - if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) && - is_kernel_addr(addr)) + if (is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0) continue; /* Branches are read most recent first (ie. mfbhrb 0 is @@ -2087,12 +2085,12 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (event->attr.sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) - perf_get_data_addr(regs, &data.addr); + perf_get_data_addr(event, regs, &data.addr); if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) { struct cpu_hw_events *cpuhw; cpuhw = this_cpu_ptr(&cpu_hw_events); - power_pmu_bhrb_read(cpuhw); + power_pmu_bhrb_read(event, cpuhw); data.br_stack = &cpuhw->bhrb_stack; } diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 5ee3fed881d3..38de4a7f6752 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -549,9 +549,11 @@ static int bts_event_init(struct perf_event *event) * Note that the default paranoia setting permits unprivileged * users to profile the kernel. */ - if (event->attr.exclude_kernel && perf_paranoid_kernel() && - !capable(CAP_SYS_ADMIN)) - return -EACCES; + if (event->attr.exclude_kernel) { + ret = perf_allow_kernel(&event->attr); + if (ret) + return ret; + } if (x86_add_exclusive(x86_lbr_exclusive_bts)) return -EBUSY; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index fcef678c3423..bbf6588d47ee 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3315,8 +3315,9 @@ static int intel_pmu_hw_config(struct perf_event *event) if (x86_pmu.version < 3) return -EINVAL; - if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) - return -EACCES; + ret = perf_allow_cpu(&event->attr); + if (ret) + return ret; event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY; diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c index dee579efb2b2..a4cc66005ce8 100644 --- a/arch/x86/events/intel/p4.c +++ b/arch/x86/events/intel/p4.c @@ -776,8 +776,9 @@ static int p4_validate_raw_event(struct perf_event *event) * the user needs special permissions to be able to use it */ if (p4_ht_active() && p4_event_bind_map[v].shared) { - if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) - return -EACCES; + v = perf_allow_cpu(&event->attr); + if (v) + return v; } /* ESCR EventMask bits may be invalid */ diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index a3763247547c..20d8cf194fb7 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1818,6 +1818,14 @@ union security_list_options { void (*bpf_prog_free_security)(struct bpf_prog_aux *aux); #endif /* CONFIG_BPF_SYSCALL */ int (*locked_down)(enum lockdown_reason what); +#ifdef CONFIG_PERF_EVENTS + int (*perf_event_open)(struct perf_event_attr *attr, int type); + int (*perf_event_alloc)(struct perf_event *event); + void (*perf_event_free)(struct perf_event *event); + int (*perf_event_read)(struct perf_event *event); + int (*perf_event_write)(struct perf_event *event); + +#endif }; struct security_hook_heads { @@ -2060,6 +2068,13 @@ struct security_hook_heads { struct hlist_head bpf_prog_free_security; #endif /* CONFIG_BPF_SYSCALL */ struct hlist_head locked_down; +#ifdef CONFIG_PERF_EVENTS + struct hlist_head perf_event_open; + struct hlist_head perf_event_alloc; + struct hlist_head perf_event_free; + struct hlist_head perf_event_read; + struct hlist_head perf_event_write; +#endif } __randomize_layout; /* diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 68ccc5b1913b..4f77b22d47be 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -56,6 +56,7 @@ struct perf_guest_info_callbacks { #include <linux/perf_regs.h> #include <linux/cgroup.h> #include <linux/refcount.h> +#include <linux/security.h> #include <asm/local.h> struct perf_callchain_entry { @@ -721,6 +722,9 @@ struct perf_event { struct perf_cgroup *cgrp; /* cgroup event is attach to */ #endif +#ifdef CONFIG_SECURITY + void *security; +#endif struct list_head sb_list; #endif /* CONFIG_PERF_EVENTS */ }; @@ -1241,19 +1245,41 @@ extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, int perf_event_max_stack_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -static inline bool perf_paranoid_tracepoint_raw(void) +/* Access to perf_event_open(2) syscall. */ +#define PERF_SECURITY_OPEN 0 + +/* Finer grained perf_event_open(2) access control. */ +#define PERF_SECURITY_CPU 1 +#define PERF_SECURITY_KERNEL 2 +#define PERF_SECURITY_TRACEPOINT 3 + +static inline int perf_is_paranoid(void) { return sysctl_perf_event_paranoid > -1; } -static inline bool perf_paranoid_cpu(void) +static inline int perf_allow_kernel(struct perf_event_attr *attr) { - return sysctl_perf_event_paranoid > 0; + if (sysctl_perf_event_paranoid > 1 && !capable(CAP_SYS_ADMIN)) + return -EACCES; + + return security_perf_event_open(attr, PERF_SECURITY_KERNEL); } -static inline bool perf_paranoid_kernel(void) +static inline int perf_allow_cpu(struct perf_event_attr *attr) { - return sysctl_perf_event_paranoid > 1; + if (sysctl_perf_event_paranoid > 0 && !capable(CAP_SYS_ADMIN)) + return -EACCES; + + return security_perf_event_open(attr, PERF_SECURITY_CPU); +} + +static inline int perf_allow_tracepoint(struct perf_event_attr *attr) +{ + if (sysctl_perf_event_paranoid > -1 && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + return security_perf_event_open(attr, PERF_SECURITY_TRACEPOINT); } extern void perf_event_init(void); diff --git a/include/linux/security.h b/include/linux/security.h index a8d59d612d27..0a86bfea64d0 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1894,5 +1894,42 @@ static inline void security_bpf_prog_free(struct bpf_prog_aux *aux) #endif /* CONFIG_SECURITY */ #endif /* CONFIG_BPF_SYSCALL */ -#endif /* ! __LINUX_SECURITY_H */ +#ifdef CONFIG_PERF_EVENTS +struct perf_event_attr; +struct perf_event; + +#ifdef CONFIG_SECURITY +extern int security_perf_event_open(struct perf_event_attr *attr, int type); +extern int security_perf_event_alloc(struct perf_event *event); +extern void security_perf_event_free(struct perf_event *event); +extern int security_perf_event_read(struct perf_event *event); +extern int security_perf_event_write(struct perf_event *event); +#else +static inline int security_perf_event_open(struct perf_event_attr *attr, + int type) +{ + return 0; +} + +static inline int security_perf_event_alloc(struct perf_event *event) +{ + return 0; +} + +static inline void security_perf_event_free(struct perf_event *event) +{ +} + +static inline int security_perf_event_read(struct perf_event *event) +{ + return 0; +} +static inline int security_perf_event_write(struct perf_event *event) +{ + return 0; +} +#endif /* CONFIG_SECURITY */ +#endif /* CONFIG_PERF_EVENTS */ + +#endif /* ! __LINUX_SECURITY_H */ diff --git a/kernel/events/core.c b/kernel/events/core.c index aec8dba2bea4..0940c8810be0 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4229,8 +4229,9 @@ find_get_context(struct pmu *pmu, struct task_struct *task, if (!task) { /* Must be root to operate on a CPU event: */ - if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) - return ERR_PTR(-EACCES); + err = perf_allow_cpu(&event->attr); + if (err) + return ERR_PTR(err); cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); ctx = &cpuctx->ctx; @@ -4539,6 +4540,8 @@ static void _free_event(struct perf_event *event) unaccount_event(event); + security_perf_event_free(event); + if (event->rb) { /* * Can happen when we close an event with re-directed output. @@ -4992,6 +4995,10 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) struct perf_event_context *ctx; int ret; + ret = security_perf_event_read(event); + if (ret) + return ret; + ctx = perf_event_ctx_lock(event); ret = __perf_read(event, buf, count); perf_event_ctx_unlock(event, ctx); @@ -5256,6 +5263,11 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct perf_event_context *ctx; long ret; + /* Treat ioctl like writes as it is likely a mutating operation. */ + ret = security_perf_event_write(event); + if (ret) + return ret; + ctx = perf_event_ctx_lock(event); ret = _perf_ioctl(event, cmd, arg); perf_event_ctx_unlock(event, ctx); @@ -5721,6 +5733,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) if (!(vma->vm_flags & VM_SHARED)) return -EINVAL; + ret = security_perf_event_read(event); + if (ret) + return ret; + vma_size = vma->vm_end - vma->vm_start; if (vma->vm_pgoff == 0) { @@ -5846,7 +5862,7 @@ accounting: lock_limit >>= PAGE_SHIFT; locked = atomic64_read(&vma->vm_mm->pinned_vm) + extra; - if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() && + if ((locked > lock_limit) && perf_is_paranoid() && !capable(CAP_IPC_LOCK)) { ret = -EPERM; goto unlock; @@ -10580,11 +10596,20 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, } } + err = security_perf_event_alloc(event); + if (err) + goto err_callchain_buffer; + /* symmetric to unaccount_event() in _free_event() */ account_event(event); return event; +err_callchain_buffer: + if (!event->parent) { + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) + put_callchain_buffers(); + } err_addr_filters: kfree(event->addr_filter_ranges); @@ -10673,9 +10698,11 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, attr->branch_sample_type = mask; } /* privileged levels capture (kernel, hv): check permissions */ - if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM) - && perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) - return -EACCES; + if (mask & PERF_SAMPLE_BRANCH_PERM_PLM) { + ret = perf_allow_kernel(attr); + if (ret) + return ret; + } } if (attr->sample_type & PERF_SAMPLE_REGS_USER) { @@ -10888,13 +10915,19 @@ SYSCALL_DEFINE5(perf_event_open, if (flags & ~PERF_FLAG_ALL) return -EINVAL; + /* Do we allow access to perf_event_open(2) ? */ + err = security_perf_event_open(&attr, PERF_SECURITY_OPEN); + if (err) + return err; + err = perf_copy_attr(attr_uptr, &attr); if (err) return err; if (!attr.exclude_kernel) { - if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) - return -EACCES; + err = perf_allow_kernel(&attr); + if (err) + return err; } if (attr.namespaces) { @@ -10911,9 +10944,11 @@ SYSCALL_DEFINE5(perf_event_open, } /* Only privileged users can get physical addresses */ - if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR) && - perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) - return -EACCES; + if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR)) { + err = perf_allow_kernel(&attr); + if (err) + return err; + } err = security_locked_down(LOCKDOWN_PERF); if (err && (attr.sample_type & PERF_SAMPLE_REGS_INTR)) diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index ffb59a4ef4ff..246c83ac5643 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -754,6 +754,14 @@ static void *perf_mmap_alloc_page(int cpu) return page_address(page); } +static void perf_mmap_free_page(void *addr) +{ + struct page *page = virt_to_page(addr); + + page->mapping = NULL; + __free_page(page); +} + struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) { struct ring_buffer *rb; @@ -788,9 +796,9 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) fail_data_pages: for (i--; i >= 0; i--) - free_page((unsigned long)rb->data_pages[i]); + perf_mmap_free_page(rb->data_pages[i]); - free_page((unsigned long)rb->user_page); + perf_mmap_free_page(rb->user_page); fail_user_page: kfree(rb); @@ -799,21 +807,13 @@ fail: return NULL; } -static void perf_mmap_free_page(unsigned long addr) -{ - struct page *page = virt_to_page((void *)addr); - - page->mapping = NULL; - __free_page(page); -} - void rb_free(struct ring_buffer *rb) { int i; - perf_mmap_free_page((unsigned long)rb->user_page); + perf_mmap_free_page(rb->user_page); for (i = 0; i < rb->nr_pages; i++) - perf_mmap_free_page((unsigned long)rb->data_pages[i]); + perf_mmap_free_page(rb->data_pages[i]); kfree(rb); } diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index a9dfa04ffa44..643e0b19920d 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -8,6 +8,7 @@ #include <linux/module.h> #include <linux/kprobes.h> +#include <linux/security.h> #include "trace.h" #include "trace_probe.h" @@ -26,8 +27,10 @@ static int total_ref_count; static int perf_trace_event_perm(struct trace_event_call *tp_event, struct perf_event *p_event) { + int ret; + if (tp_event->perf_perm) { - int ret = tp_event->perf_perm(tp_event, p_event); + ret = tp_event->perf_perm(tp_event, p_event); if (ret) return ret; } @@ -46,8 +49,9 @@ static int perf_trace_event_perm(struct trace_event_call *tp_event, /* The ftrace function trace is allowed only for root. */ if (ftrace_event_is_function(tp_event)) { - if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) - return -EPERM; + ret = perf_allow_tracepoint(&p_event->attr); + if (ret) + return ret; if (!is_sampling_event(p_event)) return 0; @@ -82,8 +86,9 @@ static int perf_trace_event_perm(struct trace_event_call *tp_event, * ...otherwise raw tracepoint data can be a severe data leak, * only allow root to have these. */ - if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) - return -EPERM; + ret = perf_allow_tracepoint(&p_event->attr); + if (ret) + return ret; return 0; } diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 1d9be26b4edd..42b571cde177 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -176,6 +176,7 @@ KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/bpf/ KBUILD_HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include KBUILD_HOSTCFLAGS += -I$(srctree)/tools/perf +KBUILD_HOSTCFLAGS += -DHAVE_ATTR_TEST=0 HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable diff --git a/security/security.c b/security/security.c index 1bc000f834e2..cd2d18d2d279 100644 --- a/security/security.c +++ b/security/security.c @@ -2404,3 +2404,30 @@ int security_locked_down(enum lockdown_reason what) return call_int_hook(locked_down, 0, what); } EXPORT_SYMBOL(security_locked_down); + +#ifdef CONFIG_PERF_EVENTS +int security_perf_event_open(struct perf_event_attr *attr, int type) +{ + return call_int_hook(perf_event_open, 0, attr, type); +} + +int security_perf_event_alloc(struct perf_event *event) +{ + return call_int_hook(perf_event_alloc, 0, event); +} + +void security_perf_event_free(struct perf_event *event) +{ + call_void_hook(perf_event_free, event); +} + +int security_perf_event_read(struct perf_event *event) +{ + return call_int_hook(perf_event_read, 0, event); +} + +int security_perf_event_write(struct perf_event *event) +{ + return call_int_hook(perf_event_write, 0, event); +} +#endif /* CONFIG_PERF_EVENTS */ diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 9625b99e677f..28eb05490d59 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -6795,6 +6795,67 @@ struct lsm_blob_sizes selinux_blob_sizes __lsm_ro_after_init = { .lbs_msg_msg = sizeof(struct msg_security_struct), }; +#ifdef CONFIG_PERF_EVENTS +static int selinux_perf_event_open(struct perf_event_attr *attr, int type) +{ + u32 requested, sid = current_sid(); + + if (type == PERF_SECURITY_OPEN) + requested = PERF_EVENT__OPEN; + else if (type == PERF_SECURITY_CPU) + requested = PERF_EVENT__CPU; + else if (type == PERF_SECURITY_KERNEL) + requested = PERF_EVENT__KERNEL; + else if (type == PERF_SECURITY_TRACEPOINT) + requested = PERF_EVENT__TRACEPOINT; + else + return -EINVAL; + + return avc_has_perm(&selinux_state, sid, sid, SECCLASS_PERF_EVENT, + requested, NULL); +} + +static int selinux_perf_event_alloc(struct perf_event *event) +{ + struct perf_event_security_struct *perfsec; + + perfsec = kzalloc(sizeof(*perfsec), GFP_KERNEL); + if (!perfsec) + return -ENOMEM; + + perfsec->sid = current_sid(); + event->security = perfsec; + + return 0; +} + +static void selinux_perf_event_free(struct perf_event *event) +{ + struct perf_event_security_struct *perfsec = event->security; + + event->security = NULL; + kfree(perfsec); +} + +static int selinux_perf_event_read(struct perf_event *event) +{ + struct perf_event_security_struct *perfsec = event->security; + u32 sid = current_sid(); + + return avc_has_perm(&selinux_state, sid, perfsec->sid, + SECCLASS_PERF_EVENT, PERF_EVENT__READ, NULL); +} + +static int selinux_perf_event_write(struct perf_event *event) +{ + struct perf_event_security_struct *perfsec = event->security; + u32 sid = current_sid(); + + return avc_has_perm(&selinux_state, sid, perfsec->sid, + SECCLASS_PERF_EVENT, PERF_EVENT__WRITE, NULL); +} +#endif + static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(binder_set_context_mgr, selinux_binder_set_context_mgr), LSM_HOOK_INIT(binder_transaction, selinux_binder_transaction), @@ -7030,6 +7091,14 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(bpf_map_free_security, selinux_bpf_map_free), LSM_HOOK_INIT(bpf_prog_free_security, selinux_bpf_prog_free), #endif + +#ifdef CONFIG_PERF_EVENTS + LSM_HOOK_INIT(perf_event_open, selinux_perf_event_open), + LSM_HOOK_INIT(perf_event_alloc, selinux_perf_event_alloc), + LSM_HOOK_INIT(perf_event_free, selinux_perf_event_free), + LSM_HOOK_INIT(perf_event_read, selinux_perf_event_read), + LSM_HOOK_INIT(perf_event_write, selinux_perf_event_write), +#endif }; static __init int selinux_init(void) diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index 32e9b03be3dd..7db24855e12d 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -244,6 +244,8 @@ struct security_class_mapping secclass_map[] = { {"map_create", "map_read", "map_write", "prog_load", "prog_run"} }, { "xdp_socket", { COMMON_SOCK_PERMS, NULL } }, + { "perf_event", + {"open", "cpu", "kernel", "tracepoint", "read", "write"} }, { NULL } }; diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index 586b7abd0aa7..a4a86cbcfb0a 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -141,7 +141,11 @@ struct pkey_security_struct { }; struct bpf_security_struct { - u32 sid; /*SID of bpf obj creater*/ + u32 sid; /* SID of bpf obj creator */ +}; + +struct perf_event_security_struct { + u32 sid; /* SID of perf_event obj creator */ }; extern struct lsm_blob_sizes selinux_blob_sizes; diff --git a/tools/arch/x86/include/asm/irq_vectors.h b/tools/arch/x86/include/asm/irq_vectors.h new file mode 100644 index 000000000000..889f8b1b5b7f --- /dev/null +++ b/tools/arch/x86/include/asm/irq_vectors.h @@ -0,0 +1,146 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_IRQ_VECTORS_H +#define _ASM_X86_IRQ_VECTORS_H + +#include <linux/threads.h> +/* + * Linux IRQ vector layout. + * + * There are 256 IDT entries (per CPU - each entry is 8 bytes) which can + * be defined by Linux. They are used as a jump table by the CPU when a + * given vector is triggered - by a CPU-external, CPU-internal or + * software-triggered event. + * + * Linux sets the kernel code address each entry jumps to early during + * bootup, and never changes them. This is the general layout of the + * IDT entries: + * + * Vectors 0 ... 31 : system traps and exceptions - hardcoded events + * Vectors 32 ... 127 : device interrupts + * Vector 128 : legacy int80 syscall interface + * Vectors 129 ... LOCAL_TIMER_VECTOR-1 + * Vectors LOCAL_TIMER_VECTOR ... 255 : special interrupts + * + * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table. + * + * This file enumerates the exact layout of them: + */ + +#define NMI_VECTOR 0x02 +#define MCE_VECTOR 0x12 + +/* + * IDT vectors usable for external interrupt sources start at 0x20. + * (0x80 is the syscall vector, 0x30-0x3f are for ISA) + */ +#define FIRST_EXTERNAL_VECTOR 0x20 + +/* + * Reserve the lowest usable vector (and hence lowest priority) 0x20 for + * triggering cleanup after irq migration. 0x21-0x2f will still be used + * for device interrupts. + */ +#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR + +#define IA32_SYSCALL_VECTOR 0x80 + +/* + * Vectors 0x30-0x3f are used for ISA interrupts. + * round up to the next 16-vector boundary + */ +#define ISA_IRQ_VECTOR(irq) (((FIRST_EXTERNAL_VECTOR + 16) & ~15) + irq) + +/* + * Special IRQ vectors used by the SMP architecture, 0xf0-0xff + * + * some of the following vectors are 'rare', they are merged + * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. + * TLB, reschedule and local APIC vectors are performance-critical. + */ + +#define SPURIOUS_APIC_VECTOR 0xff +/* + * Sanity check + */ +#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F) +# error SPURIOUS_APIC_VECTOR definition error +#endif + +#define ERROR_APIC_VECTOR 0xfe +#define RESCHEDULE_VECTOR 0xfd +#define CALL_FUNCTION_VECTOR 0xfc +#define CALL_FUNCTION_SINGLE_VECTOR 0xfb +#define THERMAL_APIC_VECTOR 0xfa +#define THRESHOLD_APIC_VECTOR 0xf9 +#define REBOOT_VECTOR 0xf8 + +/* + * Generic system vector for platform specific use + */ +#define X86_PLATFORM_IPI_VECTOR 0xf7 + +/* + * IRQ work vector: + */ +#define IRQ_WORK_VECTOR 0xf6 + +#define UV_BAU_MESSAGE 0xf5 +#define DEFERRED_ERROR_VECTOR 0xf4 + +/* Vector on which hypervisor callbacks will be delivered */ +#define HYPERVISOR_CALLBACK_VECTOR 0xf3 + +/* Vector for KVM to deliver posted interrupt IPI */ +#ifdef CONFIG_HAVE_KVM +#define POSTED_INTR_VECTOR 0xf2 +#define POSTED_INTR_WAKEUP_VECTOR 0xf1 +#define POSTED_INTR_NESTED_VECTOR 0xf0 +#endif + +#define MANAGED_IRQ_SHUTDOWN_VECTOR 0xef + +#if IS_ENABLED(CONFIG_HYPERV) +#define HYPERV_REENLIGHTENMENT_VECTOR 0xee +#define HYPERV_STIMER0_VECTOR 0xed +#endif + +#define LOCAL_TIMER_VECTOR 0xec + +#define NR_VECTORS 256 + +#ifdef CONFIG_X86_LOCAL_APIC +#define FIRST_SYSTEM_VECTOR LOCAL_TIMER_VECTOR +#else +#define FIRST_SYSTEM_VECTOR NR_VECTORS +#endif + +/* + * Size the maximum number of interrupts. + * + * If the irq_desc[] array has a sparse layout, we can size things + * generously - it scales up linearly with the maximum number of CPUs, + * and the maximum number of IO-APICs, whichever is higher. + * + * In other cases we size more conservatively, to not create too large + * static arrays. + */ + +#define NR_IRQS_LEGACY 16 + +#define CPU_VECTOR_LIMIT (64 * NR_CPUS) +#define IO_APIC_VECTOR_LIMIT (32 * MAX_IO_APICS) + +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_PCI_MSI) +#define NR_IRQS \ + (CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \ + (NR_VECTORS + CPU_VECTOR_LIMIT) : \ + (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) +#elif defined(CONFIG_X86_IO_APIC) +#define NR_IRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT) +#elif defined(CONFIG_PCI_MSI) +#define NR_IRQS (NR_VECTORS + CPU_VECTOR_LIMIT) +#else +#define NR_IRQS NR_IRQS_LEGACY +#endif + +#endif /* _ASM_X86_IRQ_VECTORS_H */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h new file mode 100644 index 000000000000..20ce682a2540 --- /dev/null +++ b/tools/arch/x86/include/asm/msr-index.h @@ -0,0 +1,857 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_MSR_INDEX_H +#define _ASM_X86_MSR_INDEX_H + +#include <linux/bits.h> + +/* + * CPU model specific register (MSR) numbers. + * + * Do not add new entries to this file unless the definitions are shared + * between multiple compilation units. + */ + +/* x86-64 specific MSRs */ +#define MSR_EFER 0xc0000080 /* extended feature register */ +#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */ +#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */ +#define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */ +#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */ +#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */ +#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */ +#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */ +#define MSR_TSC_AUX 0xc0000103 /* Auxiliary TSC */ + +/* EFER bits: */ +#define _EFER_SCE 0 /* SYSCALL/SYSRET */ +#define _EFER_LME 8 /* Long mode enable */ +#define _EFER_LMA 10 /* Long mode active (read-only) */ +#define _EFER_NX 11 /* No execute enable */ +#define _EFER_SVME 12 /* Enable virtualization */ +#define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */ +#define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ + +#define EFER_SCE (1<<_EFER_SCE) +#define EFER_LME (1<<_EFER_LME) +#define EFER_LMA (1<<_EFER_LMA) +#define EFER_NX (1<<_EFER_NX) +#define EFER_SVME (1<<_EFER_SVME) +#define EFER_LMSLE (1<<_EFER_LMSLE) +#define EFER_FFXSR (1<<_EFER_FFXSR) + +/* Intel MSRs. Some also available on other CPUs */ + +#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ +#define SPEC_CTRL_IBRS BIT(0) /* Indirect Branch Restricted Speculation */ +#define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */ +#define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ +#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ +#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ + +#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ +#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ + +#define MSR_PPIN_CTL 0x0000004e +#define MSR_PPIN 0x0000004f + +#define MSR_IA32_PERFCTR0 0x000000c1 +#define MSR_IA32_PERFCTR1 0x000000c2 +#define MSR_FSB_FREQ 0x000000cd +#define MSR_PLATFORM_INFO 0x000000ce +#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31 +#define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT) + +#define MSR_IA32_UMWAIT_CONTROL 0xe1 +#define MSR_IA32_UMWAIT_CONTROL_C02_DISABLE BIT(0) +#define MSR_IA32_UMWAIT_CONTROL_RESERVED BIT(1) +/* + * The time field is bit[31:2], but representing a 32bit value with + * bit[1:0] zero. + */ +#define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U) + +#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 +#define NHM_C3_AUTO_DEMOTE (1UL << 25) +#define NHM_C1_AUTO_DEMOTE (1UL << 26) +#define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25) +#define SNB_C3_AUTO_UNDEMOTE (1UL << 27) +#define SNB_C1_AUTO_UNDEMOTE (1UL << 28) + +#define MSR_MTRRcap 0x000000fe + +#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a +#define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */ +#define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */ +#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */ +#define ARCH_CAP_SSB_NO BIT(4) /* + * Not susceptible to Speculative Store Bypass + * attack, so no Speculative Store Bypass + * control required. + */ +#define ARCH_CAP_MDS_NO BIT(5) /* + * Not susceptible to + * Microarchitectural Data + * Sampling (MDS) vulnerabilities. + */ + +#define MSR_IA32_FLUSH_CMD 0x0000010b +#define L1D_FLUSH BIT(0) /* + * Writeback and invalidate the + * L1 data cache. + */ + +#define MSR_IA32_BBL_CR_CTL 0x00000119 +#define MSR_IA32_BBL_CR_CTL3 0x0000011e + +#define MSR_IA32_SYSENTER_CS 0x00000174 +#define MSR_IA32_SYSENTER_ESP 0x00000175 +#define MSR_IA32_SYSENTER_EIP 0x00000176 + +#define MSR_IA32_MCG_CAP 0x00000179 +#define MSR_IA32_MCG_STATUS 0x0000017a +#define MSR_IA32_MCG_CTL 0x0000017b +#define MSR_IA32_MCG_EXT_CTL 0x000004d0 + +#define MSR_OFFCORE_RSP_0 0x000001a6 +#define MSR_OFFCORE_RSP_1 0x000001a7 +#define MSR_TURBO_RATIO_LIMIT 0x000001ad +#define MSR_TURBO_RATIO_LIMIT1 0x000001ae +#define MSR_TURBO_RATIO_LIMIT2 0x000001af + +#define MSR_LBR_SELECT 0x000001c8 +#define MSR_LBR_TOS 0x000001c9 +#define MSR_LBR_NHM_FROM 0x00000680 +#define MSR_LBR_NHM_TO 0x000006c0 +#define MSR_LBR_CORE_FROM 0x00000040 +#define MSR_LBR_CORE_TO 0x00000060 + +#define MSR_LBR_INFO_0 0x00000dc0 /* ... 0xddf for _31 */ +#define LBR_INFO_MISPRED BIT_ULL(63) +#define LBR_INFO_IN_TX BIT_ULL(62) +#define LBR_INFO_ABORT BIT_ULL(61) +#define LBR_INFO_CYCLES 0xffff + +#define MSR_IA32_PEBS_ENABLE 0x000003f1 +#define MSR_PEBS_DATA_CFG 0x000003f2 +#define MSR_IA32_DS_AREA 0x00000600 +#define MSR_IA32_PERF_CAPABILITIES 0x00000345 +#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 + +#define MSR_IA32_RTIT_CTL 0x00000570 +#define RTIT_CTL_TRACEEN BIT(0) +#define RTIT_CTL_CYCLEACC BIT(1) +#define RTIT_CTL_OS BIT(2) +#define RTIT_CTL_USR BIT(3) +#define RTIT_CTL_PWR_EVT_EN BIT(4) +#define RTIT_CTL_FUP_ON_PTW BIT(5) +#define RTIT_CTL_FABRIC_EN BIT(6) +#define RTIT_CTL_CR3EN BIT(7) +#define RTIT_CTL_TOPA BIT(8) +#define RTIT_CTL_MTC_EN BIT(9) +#define RTIT_CTL_TSC_EN BIT(10) +#define RTIT_CTL_DISRETC BIT(11) +#define RTIT_CTL_PTW_EN BIT(12) +#define RTIT_CTL_BRANCH_EN BIT(13) +#define RTIT_CTL_MTC_RANGE_OFFSET 14 +#define RTIT_CTL_MTC_RANGE (0x0full << RTIT_CTL_MTC_RANGE_OFFSET) +#define RTIT_CTL_CYC_THRESH_OFFSET 19 +#define RTIT_CTL_CYC_THRESH (0x0full << RTIT_CTL_CYC_THRESH_OFFSET) +#define RTIT_CTL_PSB_FREQ_OFFSET 24 +#define RTIT_CTL_PSB_FREQ (0x0full << RTIT_CTL_PSB_FREQ_OFFSET) +#define RTIT_CTL_ADDR0_OFFSET 32 +#define RTIT_CTL_ADDR0 (0x0full << RTIT_CTL_ADDR0_OFFSET) +#define RTIT_CTL_ADDR1_OFFSET 36 +#define RTIT_CTL_ADDR1 (0x0full << RTIT_CTL_ADDR1_OFFSET) +#define RTIT_CTL_ADDR2_OFFSET 40 +#define RTIT_CTL_ADDR2 (0x0full << RTIT_CTL_ADDR2_OFFSET) +#define RTIT_CTL_ADDR3_OFFSET 44 +#define RTIT_CTL_ADDR3 (0x0full << RTIT_CTL_ADDR3_OFFSET) +#define MSR_IA32_RTIT_STATUS 0x00000571 +#define RTIT_STATUS_FILTEREN BIT(0) +#define RTIT_STATUS_CONTEXTEN BIT(1) +#define RTIT_STATUS_TRIGGEREN BIT(2) +#define RTIT_STATUS_BUFFOVF BIT(3) +#define RTIT_STATUS_ERROR BIT(4) +#define RTIT_STATUS_STOPPED BIT(5) +#define RTIT_STATUS_BYTECNT_OFFSET 32 +#define RTIT_STATUS_BYTECNT (0x1ffffull << RTIT_STATUS_BYTECNT_OFFSET) +#define MSR_IA32_RTIT_ADDR0_A 0x00000580 +#define MSR_IA32_RTIT_ADDR0_B 0x00000581 +#define MSR_IA32_RTIT_ADDR1_A 0x00000582 +#define MSR_IA32_RTIT_ADDR1_B 0x00000583 +#define MSR_IA32_RTIT_ADDR2_A 0x00000584 +#define MSR_IA32_RTIT_ADDR2_B 0x00000585 +#define MSR_IA32_RTIT_ADDR3_A 0x00000586 +#define MSR_IA32_RTIT_ADDR3_B 0x00000587 +#define MSR_IA32_RTIT_CR3_MATCH 0x00000572 +#define MSR_IA32_RTIT_OUTPUT_BASE 0x00000560 +#define MSR_IA32_RTIT_OUTPUT_MASK 0x00000561 + +#define MSR_MTRRfix64K_00000 0x00000250 +#define MSR_MTRRfix16K_80000 0x00000258 +#define MSR_MTRRfix16K_A0000 0x00000259 +#define MSR_MTRRfix4K_C0000 0x00000268 +#define MSR_MTRRfix4K_C8000 0x00000269 +#define MSR_MTRRfix4K_D0000 0x0000026a +#define MSR_MTRRfix4K_D8000 0x0000026b +#define MSR_MTRRfix4K_E0000 0x0000026c +#define MSR_MTRRfix4K_E8000 0x0000026d +#define MSR_MTRRfix4K_F0000 0x0000026e +#define MSR_MTRRfix4K_F8000 0x0000026f +#define MSR_MTRRdefType 0x000002ff + +#define MSR_IA32_CR_PAT 0x00000277 + +#define MSR_IA32_DEBUGCTLMSR 0x000001d9 +#define MSR_IA32_LASTBRANCHFROMIP 0x000001db +#define MSR_IA32_LASTBRANCHTOIP 0x000001dc +#define MSR_IA32_LASTINTFROMIP 0x000001dd +#define MSR_IA32_LASTINTTOIP 0x000001de + +/* DEBUGCTLMSR bits (others vary by model): */ +#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ +#define DEBUGCTLMSR_BTF_SHIFT 1 +#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ +#define DEBUGCTLMSR_TR (1UL << 6) +#define DEBUGCTLMSR_BTS (1UL << 7) +#define DEBUGCTLMSR_BTINT (1UL << 8) +#define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9) +#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) +#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) +#define DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI (1UL << 12) +#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14 +#define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT) + +#define MSR_PEBS_FRONTEND 0x000003f7 + +#define MSR_IA32_POWER_CTL 0x000001fc + +#define MSR_IA32_MC0_CTL 0x00000400 +#define MSR_IA32_MC0_STATUS 0x00000401 +#define MSR_IA32_MC0_ADDR 0x00000402 +#define MSR_IA32_MC0_MISC 0x00000403 + +/* C-state Residency Counters */ +#define MSR_PKG_C3_RESIDENCY 0x000003f8 +#define MSR_PKG_C6_RESIDENCY 0x000003f9 +#define MSR_ATOM_PKG_C6_RESIDENCY 0x000003fa +#define MSR_PKG_C7_RESIDENCY 0x000003fa +#define MSR_CORE_C3_RESIDENCY 0x000003fc +#define MSR_CORE_C6_RESIDENCY 0x000003fd +#define MSR_CORE_C7_RESIDENCY 0x000003fe +#define MSR_KNL_CORE_C6_RESIDENCY 0x000003ff +#define MSR_PKG_C2_RESIDENCY 0x0000060d +#define MSR_PKG_C8_RESIDENCY 0x00000630 +#define MSR_PKG_C9_RESIDENCY 0x00000631 +#define MSR_PKG_C10_RESIDENCY 0x00000632 + +/* Interrupt Response Limit */ +#define MSR_PKGC3_IRTL 0x0000060a +#define MSR_PKGC6_IRTL 0x0000060b +#define MSR_PKGC7_IRTL 0x0000060c +#define MSR_PKGC8_IRTL 0x00000633 +#define MSR_PKGC9_IRTL 0x00000634 +#define MSR_PKGC10_IRTL 0x00000635 + +/* Run Time Average Power Limiting (RAPL) Interface */ + +#define MSR_RAPL_POWER_UNIT 0x00000606 + +#define MSR_PKG_POWER_LIMIT 0x00000610 +#define MSR_PKG_ENERGY_STATUS 0x00000611 +#define MSR_PKG_PERF_STATUS 0x00000613 +#define MSR_PKG_POWER_INFO 0x00000614 + +#define MSR_DRAM_POWER_LIMIT 0x00000618 +#define MSR_DRAM_ENERGY_STATUS 0x00000619 +#define MSR_DRAM_PERF_STATUS 0x0000061b +#define MSR_DRAM_POWER_INFO 0x0000061c + +#define MSR_PP0_POWER_LIMIT 0x00000638 +#define MSR_PP0_ENERGY_STATUS 0x00000639 +#define MSR_PP0_POLICY 0x0000063a +#define MSR_PP0_PERF_STATUS 0x0000063b + +#define MSR_PP1_POWER_LIMIT 0x00000640 +#define MSR_PP1_ENERGY_STATUS 0x00000641 +#define MSR_PP1_POLICY 0x00000642 + +/* Config TDP MSRs */ +#define MSR_CONFIG_TDP_NOMINAL 0x00000648 +#define MSR_CONFIG_TDP_LEVEL_1 0x00000649 +#define MSR_CONFIG_TDP_LEVEL_2 0x0000064A +#define MSR_CONFIG_TDP_CONTROL 0x0000064B +#define MSR_TURBO_ACTIVATION_RATIO 0x0000064C + +#define MSR_PLATFORM_ENERGY_STATUS 0x0000064D + +#define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658 +#define MSR_PKG_ANY_CORE_C0_RES 0x00000659 +#define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A +#define MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B + +#define MSR_CORE_C1_RES 0x00000660 +#define MSR_MODULE_C6_RES_MS 0x00000664 + +#define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668 +#define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669 + +#define MSR_ATOM_CORE_RATIOS 0x0000066a +#define MSR_ATOM_CORE_VIDS 0x0000066b +#define MSR_ATOM_CORE_TURBO_RATIOS 0x0000066c +#define MSR_ATOM_CORE_TURBO_VIDS 0x0000066d + + +#define MSR_CORE_PERF_LIMIT_REASONS 0x00000690 +#define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0 +#define MSR_RING_PERF_LIMIT_REASONS 0x000006B1 + +/* Hardware P state interface */ +#define MSR_PPERF 0x0000064e +#define MSR_PERF_LIMIT_REASONS 0x0000064f +#define MSR_PM_ENABLE 0x00000770 +#define MSR_HWP_CAPABILITIES 0x00000771 +#define MSR_HWP_REQUEST_PKG 0x00000772 +#define MSR_HWP_INTERRUPT 0x00000773 +#define MSR_HWP_REQUEST 0x00000774 +#define MSR_HWP_STATUS 0x00000777 + +/* CPUID.6.EAX */ +#define HWP_BASE_BIT (1<<7) +#define HWP_NOTIFICATIONS_BIT (1<<8) +#define HWP_ACTIVITY_WINDOW_BIT (1<<9) +#define HWP_ENERGY_PERF_PREFERENCE_BIT (1<<10) +#define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11) + +/* IA32_HWP_CAPABILITIES */ +#define HWP_HIGHEST_PERF(x) (((x) >> 0) & 0xff) +#define HWP_GUARANTEED_PERF(x) (((x) >> 8) & 0xff) +#define HWP_MOSTEFFICIENT_PERF(x) (((x) >> 16) & 0xff) +#define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff) + +/* IA32_HWP_REQUEST */ +#define HWP_MIN_PERF(x) (x & 0xff) +#define HWP_MAX_PERF(x) ((x & 0xff) << 8) +#define HWP_DESIRED_PERF(x) ((x & 0xff) << 16) +#define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24) +#define HWP_EPP_PERFORMANCE 0x00 +#define HWP_EPP_BALANCE_PERFORMANCE 0x80 +#define HWP_EPP_BALANCE_POWERSAVE 0xC0 +#define HWP_EPP_POWERSAVE 0xFF +#define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32) +#define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42) + +/* IA32_HWP_STATUS */ +#define HWP_GUARANTEED_CHANGE(x) (x & 0x1) +#define HWP_EXCURSION_TO_MINIMUM(x) (x & 0x4) + +/* IA32_HWP_INTERRUPT */ +#define HWP_CHANGE_TO_GUARANTEED_INT(x) (x & 0x1) +#define HWP_EXCURSION_TO_MINIMUM_INT(x) (x & 0x2) + +#define MSR_AMD64_MC0_MASK 0xc0010044 + +#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) +#define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x)) +#define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x)) +#define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x)) + +#define MSR_AMD64_MCx_MASK(x) (MSR_AMD64_MC0_MASK + (x)) + +/* These are consecutive and not in the normal 4er MCE bank block */ +#define MSR_IA32_MC0_CTL2 0x00000280 +#define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x)) + +#define MSR_P6_PERFCTR0 0x000000c1 +#define MSR_P6_PERFCTR1 0x000000c2 +#define MSR_P6_EVNTSEL0 0x00000186 +#define MSR_P6_EVNTSEL1 0x00000187 + +#define MSR_KNC_PERFCTR0 0x00000020 +#define MSR_KNC_PERFCTR1 0x00000021 +#define MSR_KNC_EVNTSEL0 0x00000028 +#define MSR_KNC_EVNTSEL1 0x00000029 + +/* Alternative perfctr range with full access. */ +#define MSR_IA32_PMC0 0x000004c1 + +/* Auto-reload via MSR instead of DS area */ +#define MSR_RELOAD_PMC0 0x000014c1 +#define MSR_RELOAD_FIXED_CTR0 0x00001309 + +/* + * AMD64 MSRs. Not complete. See the architecture manual for a more + * complete list. + */ +#define MSR_AMD64_PATCH_LEVEL 0x0000008b +#define MSR_AMD64_TSC_RATIO 0xc0000104 +#define MSR_AMD64_NB_CFG 0xc001001f +#define MSR_AMD64_CPUID_FN_1 0xc0011004 +#define MSR_AMD64_PATCH_LOADER 0xc0010020 +#define MSR_AMD_PERF_CTL 0xc0010062 +#define MSR_AMD_PERF_STATUS 0xc0010063 +#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 +#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 +#define MSR_AMD64_OSVW_STATUS 0xc0010141 +#define MSR_AMD64_LS_CFG 0xc0011020 +#define MSR_AMD64_DC_CFG 0xc0011022 +#define MSR_AMD64_BU_CFG2 0xc001102a +#define MSR_AMD64_IBSFETCHCTL 0xc0011030 +#define MSR_AMD64_IBSFETCHLINAD 0xc0011031 +#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 +#define MSR_AMD64_IBSFETCH_REG_COUNT 3 +#define MSR_AMD64_IBSFETCH_REG_MASK ((1UL<<MSR_AMD64_IBSFETCH_REG_COUNT)-1) +#define MSR_AMD64_IBSOPCTL 0xc0011033 +#define MSR_AMD64_IBSOPRIP 0xc0011034 +#define MSR_AMD64_IBSOPDATA 0xc0011035 +#define MSR_AMD64_IBSOPDATA2 0xc0011036 +#define MSR_AMD64_IBSOPDATA3 0xc0011037 +#define MSR_AMD64_IBSDCLINAD 0xc0011038 +#define MSR_AMD64_IBSDCPHYSAD 0xc0011039 +#define MSR_AMD64_IBSOP_REG_COUNT 7 +#define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1) +#define MSR_AMD64_IBSCTL 0xc001103a +#define MSR_AMD64_IBSBRTARGET 0xc001103b +#define MSR_AMD64_IBSOPDATA4 0xc001103d +#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ +#define MSR_AMD64_SEV 0xc0010131 +#define MSR_AMD64_SEV_ENABLED_BIT 0 +#define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT) + +#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f + +/* Fam 17h MSRs */ +#define MSR_F17H_IRPERF 0xc00000e9 + +/* Fam 16h MSRs */ +#define MSR_F16H_L2I_PERF_CTL 0xc0010230 +#define MSR_F16H_L2I_PERF_CTR 0xc0010231 +#define MSR_F16H_DR1_ADDR_MASK 0xc0011019 +#define MSR_F16H_DR2_ADDR_MASK 0xc001101a +#define MSR_F16H_DR3_ADDR_MASK 0xc001101b +#define MSR_F16H_DR0_ADDR_MASK 0xc0011027 + +/* Fam 15h MSRs */ +#define MSR_F15H_PERF_CTL 0xc0010200 +#define MSR_F15H_PERF_CTL0 MSR_F15H_PERF_CTL +#define MSR_F15H_PERF_CTL1 (MSR_F15H_PERF_CTL + 2) +#define MSR_F15H_PERF_CTL2 (MSR_F15H_PERF_CTL + 4) +#define MSR_F15H_PERF_CTL3 (MSR_F15H_PERF_CTL + 6) +#define MSR_F15H_PERF_CTL4 (MSR_F15H_PERF_CTL + 8) +#define MSR_F15H_PERF_CTL5 (MSR_F15H_PERF_CTL + 10) + +#define MSR_F15H_PERF_CTR 0xc0010201 +#define MSR_F15H_PERF_CTR0 MSR_F15H_PERF_CTR +#define MSR_F15H_PERF_CTR1 (MSR_F15H_PERF_CTR + 2) +#define MSR_F15H_PERF_CTR2 (MSR_F15H_PERF_CTR + 4) +#define MSR_F15H_PERF_CTR3 (MSR_F15H_PERF_CTR + 6) +#define MSR_F15H_PERF_CTR4 (MSR_F15H_PERF_CTR + 8) +#define MSR_F15H_PERF_CTR5 (MSR_F15H_PERF_CTR + 10) + +#define MSR_F15H_NB_PERF_CTL 0xc0010240 +#define MSR_F15H_NB_PERF_CTR 0xc0010241 +#define MSR_F15H_PTSC 0xc0010280 +#define MSR_F15H_IC_CFG 0xc0011021 +#define MSR_F15H_EX_CFG 0xc001102c + +/* Fam 10h MSRs */ +#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 +#define FAM10H_MMIO_CONF_ENABLE (1<<0) +#define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf +#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2 +#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL +#define FAM10H_MMIO_CONF_BASE_SHIFT 20 +#define MSR_FAM10H_NODE_ID 0xc001100c +#define MSR_F10H_DECFG 0xc0011029 +#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 +#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT) + +/* K8 MSRs */ +#define MSR_K8_TOP_MEM1 0xc001001a +#define MSR_K8_TOP_MEM2 0xc001001d +#define MSR_K8_SYSCFG 0xc0010010 +#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT 23 +#define MSR_K8_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT) +#define MSR_K8_INT_PENDING_MSG 0xc0010055 +/* C1E active bits in int pending message */ +#define K8_INTP_C1E_ACTIVE_MASK 0x18000000 +#define MSR_K8_TSEG_ADDR 0xc0010112 +#define MSR_K8_TSEG_MASK 0xc0010113 +#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */ +#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */ +#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */ + +/* K7 MSRs */ +#define MSR_K7_EVNTSEL0 0xc0010000 +#define MSR_K7_PERFCTR0 0xc0010004 +#define MSR_K7_EVNTSEL1 0xc0010001 +#define MSR_K7_PERFCTR1 0xc0010005 +#define MSR_K7_EVNTSEL2 0xc0010002 +#define MSR_K7_PERFCTR2 0xc0010006 +#define MSR_K7_EVNTSEL3 0xc0010003 +#define MSR_K7_PERFCTR3 0xc0010007 +#define MSR_K7_CLK_CTL 0xc001001b +#define MSR_K7_HWCR 0xc0010015 +#define MSR_K7_HWCR_SMMLOCK_BIT 0 +#define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT) +#define MSR_K7_FID_VID_CTL 0xc0010041 +#define MSR_K7_FID_VID_STATUS 0xc0010042 + +/* K6 MSRs */ +#define MSR_K6_WHCR 0xc0000082 +#define MSR_K6_UWCCR 0xc0000085 +#define MSR_K6_EPMR 0xc0000086 +#define MSR_K6_PSOR 0xc0000087 +#define MSR_K6_PFIR 0xc0000088 + +/* Centaur-Hauls/IDT defined MSRs. */ +#define MSR_IDT_FCR1 0x00000107 +#define MSR_IDT_FCR2 0x00000108 +#define MSR_IDT_FCR3 0x00000109 +#define MSR_IDT_FCR4 0x0000010a + +#define MSR_IDT_MCR0 0x00000110 +#define MSR_IDT_MCR1 0x00000111 +#define MSR_IDT_MCR2 0x00000112 +#define MSR_IDT_MCR3 0x00000113 +#define MSR_IDT_MCR4 0x00000114 +#define MSR_IDT_MCR5 0x00000115 +#define MSR_IDT_MCR6 0x00000116 +#define MSR_IDT_MCR7 0x00000117 +#define MSR_IDT_MCR_CTRL 0x00000120 + +/* VIA Cyrix defined MSRs*/ +#define MSR_VIA_FCR 0x00001107 +#define MSR_VIA_LONGHAUL 0x0000110a +#define MSR_VIA_RNG 0x0000110b +#define MSR_VIA_BCR2 0x00001147 + +/* Transmeta defined MSRs */ +#define MSR_TMTA_LONGRUN_CTRL 0x80868010 +#define MSR_TMTA_LONGRUN_FLAGS 0x80868011 +#define MSR_TMTA_LRTI_READOUT 0x80868018 +#define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a + +/* Intel defined MSRs. */ +#define MSR_IA32_P5_MC_ADDR 0x00000000 +#define MSR_IA32_P5_MC_TYPE 0x00000001 +#define MSR_IA32_TSC 0x00000010 +#define MSR_IA32_PLATFORM_ID 0x00000017 +#define MSR_IA32_EBL_CR_POWERON 0x0000002a +#define MSR_EBC_FREQUENCY_ID 0x0000002c +#define MSR_SMI_COUNT 0x00000034 +#define MSR_IA32_FEATURE_CONTROL 0x0000003a +#define MSR_IA32_TSC_ADJUST 0x0000003b +#define MSR_IA32_BNDCFGS 0x00000d90 + +#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc + +#define MSR_IA32_XSS 0x00000da0 + +#define FEATURE_CONTROL_LOCKED (1<<0) +#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) +#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2) +#define FEATURE_CONTROL_LMCE (1<<20) + +#define MSR_IA32_APICBASE 0x0000001b +#define MSR_IA32_APICBASE_BSP (1<<8) +#define MSR_IA32_APICBASE_ENABLE (1<<11) +#define MSR_IA32_APICBASE_BASE (0xfffff<<12) + +#define MSR_IA32_TSCDEADLINE 0x000006e0 + +#define MSR_IA32_UCODE_WRITE 0x00000079 +#define MSR_IA32_UCODE_REV 0x0000008b + +#define MSR_IA32_SMM_MONITOR_CTL 0x0000009b +#define MSR_IA32_SMBASE 0x0000009e + +#define MSR_IA32_PERF_STATUS 0x00000198 +#define MSR_IA32_PERF_CTL 0x00000199 +#define INTEL_PERF_CTL_MASK 0xffff + +#define MSR_IA32_MPERF 0x000000e7 +#define MSR_IA32_APERF 0x000000e8 + +#define MSR_IA32_THERM_CONTROL 0x0000019a +#define MSR_IA32_THERM_INTERRUPT 0x0000019b + +#define THERM_INT_HIGH_ENABLE (1 << 0) +#define THERM_INT_LOW_ENABLE (1 << 1) +#define THERM_INT_PLN_ENABLE (1 << 24) + +#define MSR_IA32_THERM_STATUS 0x0000019c + +#define THERM_STATUS_PROCHOT (1 << 0) +#define THERM_STATUS_POWER_LIMIT (1 << 10) + +#define MSR_THERM2_CTL 0x0000019d + +#define MSR_THERM2_CTL_TM_SELECT (1ULL << 16) + +#define MSR_IA32_MISC_ENABLE 0x000001a0 + +#define MSR_IA32_TEMPERATURE_TARGET 0x000001a2 + +#define MSR_MISC_FEATURE_CONTROL 0x000001a4 +#define MSR_MISC_PWR_MGMT 0x000001aa + +#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0 +#define ENERGY_PERF_BIAS_PERFORMANCE 0 +#define ENERGY_PERF_BIAS_BALANCE_PERFORMANCE 4 +#define ENERGY_PERF_BIAS_NORMAL 6 +#define ENERGY_PERF_BIAS_BALANCE_POWERSAVE 8 +#define ENERGY_PERF_BIAS_POWERSAVE 15 + +#define MSR_IA32_PACKAGE_THERM_STATUS 0x000001b1 + +#define PACKAGE_THERM_STATUS_PROCHOT (1 << 0) +#define PACKAGE_THERM_STATUS_POWER_LIMIT (1 << 10) + +#define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x000001b2 + +#define PACKAGE_THERM_INT_HIGH_ENABLE (1 << 0) +#define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1) +#define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24) + +/* Thermal Thresholds Support */ +#define THERM_INT_THRESHOLD0_ENABLE (1 << 15) +#define THERM_SHIFT_THRESHOLD0 8 +#define THERM_MASK_THRESHOLD0 (0x7f << THERM_SHIFT_THRESHOLD0) +#define THERM_INT_THRESHOLD1_ENABLE (1 << 23) +#define THERM_SHIFT_THRESHOLD1 16 +#define THERM_MASK_THRESHOLD1 (0x7f << THERM_SHIFT_THRESHOLD1) +#define THERM_STATUS_THRESHOLD0 (1 << 6) +#define THERM_LOG_THRESHOLD0 (1 << 7) +#define THERM_STATUS_THRESHOLD1 (1 << 8) +#define THERM_LOG_THRESHOLD1 (1 << 9) + +/* MISC_ENABLE bits: architectural */ +#define MSR_IA32_MISC_ENABLE_FAST_STRING_BIT 0 +#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << MSR_IA32_MISC_ENABLE_FAST_STRING_BIT) +#define MSR_IA32_MISC_ENABLE_TCC_BIT 1 +#define MSR_IA32_MISC_ENABLE_TCC (1ULL << MSR_IA32_MISC_ENABLE_TCC_BIT) +#define MSR_IA32_MISC_ENABLE_EMON_BIT 7 +#define MSR_IA32_MISC_ENABLE_EMON (1ULL << MSR_IA32_MISC_ENABLE_EMON_BIT) +#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT 11 +#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT) +#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT 12 +#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT) +#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT 16 +#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP (1ULL << MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT) +#define MSR_IA32_MISC_ENABLE_MWAIT_BIT 18 +#define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << MSR_IA32_MISC_ENABLE_MWAIT_BIT) +#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT 22 +#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) +#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT 23 +#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT 34 +#define MSR_IA32_MISC_ENABLE_XD_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT) + +/* MISC_ENABLE bits: model-specific, meaning may vary from core to core */ +#define MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT 2 +#define MSR_IA32_MISC_ENABLE_X87_COMPAT (1ULL << MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT) +#define MSR_IA32_MISC_ENABLE_TM1_BIT 3 +#define MSR_IA32_MISC_ENABLE_TM1 (1ULL << MSR_IA32_MISC_ENABLE_TM1_BIT) +#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT 4 +#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT 6 +#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT 8 +#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT) +#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT 9 +#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_FERR_BIT 10 +#define MSR_IA32_MISC_ENABLE_FERR (1ULL << MSR_IA32_MISC_ENABLE_FERR_BIT) +#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT 10 +#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX (1ULL << MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT) +#define MSR_IA32_MISC_ENABLE_TM2_BIT 13 +#define MSR_IA32_MISC_ENABLE_TM2 (1ULL << MSR_IA32_MISC_ENABLE_TM2_BIT) +#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT 19 +#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT 20 +#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT) +#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT 24 +#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT (1ULL << MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT) +#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT 37 +#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT 38 +#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39 +#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT) + +/* MISC_FEATURES_ENABLES non-architectural features */ +#define MSR_MISC_FEATURES_ENABLES 0x00000140 + +#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT 0 +#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT BIT_ULL(MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT) +#define MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT 1 + +#define MSR_IA32_TSC_DEADLINE 0x000006E0 + + +#define MSR_TSX_FORCE_ABORT 0x0000010F + +#define MSR_TFA_RTM_FORCE_ABORT_BIT 0 +#define MSR_TFA_RTM_FORCE_ABORT BIT_ULL(MSR_TFA_RTM_FORCE_ABORT_BIT) + +/* P4/Xeon+ specific */ +#define MSR_IA32_MCG_EAX 0x00000180 +#define MSR_IA32_MCG_EBX 0x00000181 +#define MSR_IA32_MCG_ECX 0x00000182 +#define MSR_IA32_MCG_EDX 0x00000183 +#define MSR_IA32_MCG_ESI 0x00000184 +#define MSR_IA32_MCG_EDI 0x00000185 +#define MSR_IA32_MCG_EBP 0x00000186 +#define MSR_IA32_MCG_ESP 0x00000187 +#define MSR_IA32_MCG_EFLAGS 0x00000188 +#define MSR_IA32_MCG_EIP 0x00000189 +#define MSR_IA32_MCG_RESERVED 0x0000018a + +/* Pentium IV performance counter MSRs */ +#define MSR_P4_BPU_PERFCTR0 0x00000300 +#define MSR_P4_BPU_PERFCTR1 0x00000301 +#define MSR_P4_BPU_PERFCTR2 0x00000302 +#define MSR_P4_BPU_PERFCTR3 0x00000303 +#define MSR_P4_MS_PERFCTR0 0x00000304 +#define MSR_P4_MS_PERFCTR1 0x00000305 +#define MSR_P4_MS_PERFCTR2 0x00000306 +#define MSR_P4_MS_PERFCTR3 0x00000307 +#define MSR_P4_FLAME_PERFCTR0 0x00000308 +#define MSR_P4_FLAME_PERFCTR1 0x00000309 +#define MSR_P4_FLAME_PERFCTR2 0x0000030a +#define MSR_P4_FLAME_PERFCTR3 0x0000030b +#define MSR_P4_IQ_PERFCTR0 0x0000030c +#define MSR_P4_IQ_PERFCTR1 0x0000030d +#define MSR_P4_IQ_PERFCTR2 0x0000030e +#define MSR_P4_IQ_PERFCTR3 0x0000030f +#define MSR_P4_IQ_PERFCTR4 0x00000310 +#define MSR_P4_IQ_PERFCTR5 0x00000311 +#define MSR_P4_BPU_CCCR0 0x00000360 +#define MSR_P4_BPU_CCCR1 0x00000361 +#define MSR_P4_BPU_CCCR2 0x00000362 +#define MSR_P4_BPU_CCCR3 0x00000363 +#define MSR_P4_MS_CCCR0 0x00000364 +#define MSR_P4_MS_CCCR1 0x00000365 +#define MSR_P4_MS_CCCR2 0x00000366 +#define MSR_P4_MS_CCCR3 0x00000367 +#define MSR_P4_FLAME_CCCR0 0x00000368 +#define MSR_P4_FLAME_CCCR1 0x00000369 +#define MSR_P4_FLAME_CCCR2 0x0000036a +#define MSR_P4_FLAME_CCCR3 0x0000036b +#define MSR_P4_IQ_CCCR0 0x0000036c +#define MSR_P4_IQ_CCCR1 0x0000036d +#define MSR_P4_IQ_CCCR2 0x0000036e +#define MSR_P4_IQ_CCCR3 0x0000036f +#define MSR_P4_IQ_CCCR4 0x00000370 +#define MSR_P4_IQ_CCCR5 0x00000371 +#define MSR_P4_ALF_ESCR0 0x000003ca +#define MSR_P4_ALF_ESCR1 0x000003cb +#define MSR_P4_BPU_ESCR0 0x000003b2 +#define MSR_P4_BPU_ESCR1 0x000003b3 +#define MSR_P4_BSU_ESCR0 0x000003a0 +#define MSR_P4_BSU_ESCR1 0x000003a1 +#define MSR_P4_CRU_ESCR0 0x000003b8 +#define MSR_P4_CRU_ESCR1 0x000003b9 +#define MSR_P4_CRU_ESCR2 0x000003cc +#define MSR_P4_CRU_ESCR3 0x000003cd +#define MSR_P4_CRU_ESCR4 0x000003e0 +#define MSR_P4_CRU_ESCR5 0x000003e1 +#define MSR_P4_DAC_ESCR0 0x000003a8 +#define MSR_P4_DAC_ESCR1 0x000003a9 +#define MSR_P4_FIRM_ESCR0 0x000003a4 +#define MSR_P4_FIRM_ESCR1 0x000003a5 +#define MSR_P4_FLAME_ESCR0 0x000003a6 +#define MSR_P4_FLAME_ESCR1 0x000003a7 +#define MSR_P4_FSB_ESCR0 0x000003a2 +#define MSR_P4_FSB_ESCR1 0x000003a3 +#define MSR_P4_IQ_ESCR0 0x000003ba +#define MSR_P4_IQ_ESCR1 0x000003bb +#define MSR_P4_IS_ESCR0 0x000003b4 +#define MSR_P4_IS_ESCR1 0x000003b5 +#define MSR_P4_ITLB_ESCR0 0x000003b6 +#define MSR_P4_ITLB_ESCR1 0x000003b7 +#define MSR_P4_IX_ESCR0 0x000003c8 +#define MSR_P4_IX_ESCR1 0x000003c9 +#define MSR_P4_MOB_ESCR0 0x000003aa +#define MSR_P4_MOB_ESCR1 0x000003ab +#define MSR_P4_MS_ESCR0 0x000003c0 +#define MSR_P4_MS_ESCR1 0x000003c1 +#define MSR_P4_PMH_ESCR0 0x000003ac +#define MSR_P4_PMH_ESCR1 0x000003ad +#define MSR_P4_RAT_ESCR0 0x000003bc +#define MSR_P4_RAT_ESCR1 0x000003bd +#define MSR_P4_SAAT_ESCR0 0x000003ae +#define MSR_P4_SAAT_ESCR1 0x000003af +#define MSR_P4_SSU_ESCR0 0x000003be +#define MSR_P4_SSU_ESCR1 0x000003bf /* guess: not in manual */ + +#define MSR_P4_TBPU_ESCR0 0x000003c2 +#define MSR_P4_TBPU_ESCR1 0x000003c3 +#define MSR_P4_TC_ESCR0 0x000003c4 +#define MSR_P4_TC_ESCR1 0x000003c5 +#define MSR_P4_U2L_ESCR0 0x000003b0 +#define MSR_P4_U2L_ESCR1 0x000003b1 + +#define MSR_P4_PEBS_MATRIX_VERT 0x000003f2 + +/* Intel Core-based CPU performance counters */ +#define MSR_CORE_PERF_FIXED_CTR0 0x00000309 +#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a +#define MSR_CORE_PERF_FIXED_CTR2 0x0000030b +#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d +#define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e +#define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390 + +/* PERF_GLOBAL_OVF_CTL bits */ +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT 55 +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT) +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF_BIT 62 +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF_BIT) +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD_BIT 63 +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD_BIT) + +/* Geode defined MSRs */ +#define MSR_GEODE_BUSCONT_CONF0 0x00001900 + +/* Intel VT MSRs */ +#define MSR_IA32_VMX_BASIC 0x00000480 +#define MSR_IA32_VMX_PINBASED_CTLS 0x00000481 +#define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482 +#define MSR_IA32_VMX_EXIT_CTLS 0x00000483 +#define MSR_IA32_VMX_ENTRY_CTLS 0x00000484 +#define MSR_IA32_VMX_MISC 0x00000485 +#define MSR_IA32_VMX_CR0_FIXED0 0x00000486 +#define MSR_IA32_VMX_CR0_FIXED1 0x00000487 +#define MSR_IA32_VMX_CR4_FIXED0 0x00000488 +#define MSR_IA32_VMX_CR4_FIXED1 0x00000489 +#define MSR_IA32_VMX_VMCS_ENUM 0x0000048a +#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b +#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c +#define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x0000048d +#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e +#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f +#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 +#define MSR_IA32_VMX_VMFUNC 0x00000491 + +/* VMX_BASIC bits and bitmasks */ +#define VMX_BASIC_VMCS_SIZE_SHIFT 32 +#define VMX_BASIC_TRUE_CTLS (1ULL << 55) +#define VMX_BASIC_64 0x0001000000000000LLU +#define VMX_BASIC_MEM_TYPE_SHIFT 50 +#define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU +#define VMX_BASIC_MEM_TYPE_WB 6LLU +#define VMX_BASIC_INOUT 0x0040000000000000LLU + +/* MSR_IA32_VMX_MISC bits */ +#define MSR_IA32_VMX_MISC_INTEL_PT (1ULL << 14) +#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) +#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F +/* AMD-V MSRs */ + +#define MSR_VM_CR 0xc0010114 +#define MSR_VM_IGNNE 0xc0010115 +#define MSR_VM_HSAVE_PA 0xc0010117 + +#endif /* _ASM_X86_MSR_INDEX_H */ diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index c599623a1f3d..c4dd23c4b478 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -561,6 +561,11 @@ trace.*:: trace.show_zeros:: Do not suppress syscall arguments that are equal to zero. + trace.tracepoint_beautifiers:: + Use "libtraceevent" to use that library to augment the tracepoint arguments, + "libbeauty", the default, to use the same argument beautifiers used in the + strace-like sys_enter+sys_exit lines. + llvm.*:: llvm.clang-path:: Path to clang. If omit, search it from $PATH. diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt index d5cc15e651cf..f50ca0fef0a4 100644 --- a/tools/perf/Documentation/perf-diff.txt +++ b/tools/perf/Documentation/perf-diff.txt @@ -95,6 +95,11 @@ OPTIONS diff.compute config option. See COMPARISON METHODS section for more info. +--cycles-hist:: + Report a histogram and the standard deviation for cycles data. + It can help us to judge if the reported cycles data is noisy or + not. This option should be used with '-c cycles'. + -p:: --period:: Show period values for both compared hist entries. diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 18ed1b0fceb3..6345db33c533 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -36,6 +36,9 @@ Enable debugging output. Print how named events are resolved internally into perf events, and also any extra expressions computed by perf stat. +--deprecated:: +Print deprecated events. By default the deprecated events are hidden. + [[EVENT_MODIFIERS]] EVENT MODIFIERS --------------- diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 930c51c01201..a9af4e440e80 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -323,6 +323,12 @@ The output is SMI cycles%, equals to (aperf - unhalted core cycles) / aperf Users who wants to get the actual value can apply --no-metric-only. +--all-kernel:: +Configure all used events to run in kernel space. + +--all-user:: +Configure all used events to run in user space. + EXAMPLES -------- diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 25b74fdb36fa..abc9b5d83312 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -42,6 +42,11 @@ OPTIONS Prefixing with ! shows all syscalls but the ones specified. You may need to escape it. +--filter=<filter>:: + Event filter. This option should follow an event selector (-e) which + selects tracepoint event(s). + + -D msecs:: --delay msecs:: After starting the program, wait msecs before measuring. This is useful to @@ -141,6 +146,10 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. Show all syscalls followed by a summary by thread with min, max, and average times (in msec) and relative stddev. +--errno-summary:: + To be used with -s or -S, to show stats for the errnos experienced by + syscalls, using only this option will trigger --summary. + --tool_stats:: Show tool stats such as number of times fd->pathname was discovered thru hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc. @@ -219,6 +228,11 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. may happen, for instance, when a thread gets migrated to a different CPU while processing a syscall. +--libtraceevent_print:: + Use libtraceevent to print tracepoint arguments. By default 'perf trace' uses + the same beautifiers used in the strace-like enter+exit lines to augment the + tracepoint arguments. + --map-dump:: Dump BPF maps setup by events passed via -e, for instance the augmented_raw_syscalls living in tools/perf/examples/bpf/augmented_raw_syscalls.c. For now this diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 46f7fba2306c..1783427da9b0 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -188,7 +188,7 @@ endif # Treat warnings as errors unless directed not to ifneq ($(WERROR),0) - CFLAGS += -Werror + CORE_CFLAGS += -Werror CXXFLAGS += -Werror endif @@ -198,9 +198,9 @@ endif ifeq ($(DEBUG),0) ifeq ($(CC_NO_CLANG), 0) - CFLAGS += -O3 + CORE_CFLAGS += -O3 else - CFLAGS += -O6 + CORE_CFLAGS += -O6 endif endif @@ -245,12 +245,12 @@ FEATURE_CHECK_LDFLAGS-libaio = -lrt FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes -ldl -CFLAGS += -fno-omit-frame-pointer -CFLAGS += -ggdb3 -CFLAGS += -funwind-tables -CFLAGS += -Wall -CFLAGS += -Wextra -CFLAGS += -std=gnu99 +CORE_CFLAGS += -fno-omit-frame-pointer +CORE_CFLAGS += -ggdb3 +CORE_CFLAGS += -funwind-tables +CORE_CFLAGS += -Wall +CORE_CFLAGS += -Wextra +CORE_CFLAGS += -std=gnu99 CXXFLAGS += -std=gnu++11 -fno-exceptions -fno-rtti CXXFLAGS += -Wall @@ -265,6 +265,11 @@ LDFLAGS += -Wl,-z,noexecstack EXTLIBS = -lpthread -lrt -lm -ldl +ifneq ($(TCMALLOC),) + CFLAGS += -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free + EXTLIBS += -ltcmalloc +endif + ifeq ($(FEATURES_DUMP),) include $(srctree)/tools/build/Makefile.feature else @@ -272,12 +277,12 @@ include $(FEATURES_DUMP) endif ifeq ($(feature-stackprotector-all), 1) - CFLAGS += -fstack-protector-all + CORE_CFLAGS += -fstack-protector-all endif ifeq ($(DEBUG),0) ifeq ($(feature-fortify-source), 1) - CFLAGS += -D_FORTIFY_SOURCE=2 + CORE_CFLAGS += -D_FORTIFY_SOURCE=2 endif endif @@ -301,10 +306,12 @@ INC_FLAGS += -I$(src-perf)/util INC_FLAGS += -I$(src-perf) INC_FLAGS += -I$(srctree)/tools/lib/ -CFLAGS += $(INC_FLAGS) +CORE_CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE + +CFLAGS += $(CORE_CFLAGS) $(INC_FLAGS) CXXFLAGS += $(INC_FLAGS) -CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE +LIBPERF_CFLAGS := $(CORE_CFLAGS) $(EXTRA_CFLAGS) ifeq ($(feature-sync-compare-and-swap), 1) CFLAGS += -DHAVE_SYNC_COMPARE_AND_SWAP_SUPPORT diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 902c792f326a..1cd294468a1f 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -114,6 +114,8 @@ include ../scripts/utilities.mak # Define NO_LIBZSTD if you do not want support of Zstandard based runtime # trace compression in record mode. # +# Define TCMALLOC to enable tcmalloc heap profiling. +# # As per kernel Makefile, avoid funny character set dependencies unexport LC_ALL @@ -407,6 +409,7 @@ linux_uapi_dir := $(srctree)/tools/include/uapi/linux asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic arch_asm_uapi_dir := $(srctree)/tools/arch/$(SRCARCH)/include/uapi/asm/ x86_arch_asm_uapi_dir := $(srctree)/tools/arch/x86/include/uapi/asm/ +x86_arch_asm_dir := $(srctree)/tools/arch/x86/include/asm/ beauty_outdir := $(OUTPUT)trace/beauty/generated beauty_ioctl_outdir := $(beauty_outdir)/ioctl @@ -543,6 +546,18 @@ x86_arch_prctl_code_tbl := $(srctree)/tools/perf/trace/beauty/x86_arch_prctl.sh $(x86_arch_prctl_code_array): $(x86_arch_asm_uapi_dir)/prctl.h $(x86_arch_prctl_code_tbl) $(Q)$(SHELL) '$(x86_arch_prctl_code_tbl)' $(x86_arch_asm_uapi_dir) > $@ +x86_arch_irq_vectors_array := $(beauty_outdir)/x86_arch_irq_vectors_array.c +x86_arch_irq_vectors_tbl := $(srctree)/tools/perf/trace/beauty/tracepoints/x86_irq_vectors.sh + +$(x86_arch_irq_vectors_array): $(x86_arch_asm_dir)/irq_vectors.h $(x86_arch_irq_vectors_tbl) + $(Q)$(SHELL) '$(x86_arch_irq_vectors_tbl)' $(x86_arch_asm_dir) > $@ + +x86_arch_MSRs_array := $(beauty_outdir)/x86_arch_MSRs_array.c +x86_arch_MSRs_tbl := $(srctree)/tools/perf/trace/beauty/tracepoints/x86_msr.sh + +$(x86_arch_MSRs_array): $(x86_arch_asm_dir)/msr-index.h $(x86_arch_MSRs_tbl) + $(Q)$(SHELL) '$(x86_arch_MSRs_tbl)' $(x86_arch_asm_dir) > $@ + rename_flags_array := $(beauty_outdir)/rename_flags_array.c rename_flags_tbl := $(srctree)/tools/perf/trace/beauty/rename_flags.sh @@ -677,6 +692,8 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(perf_ioctl_array) \ $(prctl_option_array) \ $(usbdevfs_ioctl_array) \ + $(x86_arch_irq_vectors_array) \ + $(x86_arch_MSRs_array) \ $(x86_arch_prctl_code_array) \ $(rename_flags_array) \ $(arch_errno_name_array) \ @@ -761,7 +778,7 @@ $(LIBBPF)-clean: $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) clean >/dev/null $(LIBPERF): FORCE - $(Q)$(MAKE) -C $(LIBPERF_DIR) O=$(OUTPUT) $(OUTPUT)libperf.a + $(Q)$(MAKE) -C $(LIBPERF_DIR) EXTRA_CFLAGS="$(LIBPERF_CFLAGS)" O=$(OUTPUT) $(OUTPUT)libperf.a $(LIBPERF)-clean: $(call QUIET_CLEAN, libperf) @@ -981,6 +998,8 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(OUTPUT)$(perf_ioctl_array) \ $(OUTPUT)$(prctl_option_array) \ $(OUTPUT)$(usbdevfs_ioctl_array) \ + $(OUTPUT)$(x86_arch_irq_vectors_array) \ + $(OUTPUT)$(x86_arch_MSRs_array) \ $(OUTPUT)$(x86_arch_prctl_code_array) \ $(OUTPUT)$(rename_flags_array) \ $(OUTPUT)$(arch_errno_name_array) \ diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build index 296f0eac5e18..37fc63708966 100644 --- a/tools/perf/arch/arm/util/Build +++ b/tools/perf/arch/arm/util/Build @@ -1,3 +1,5 @@ +perf-y += perf_regs.o + perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/arm/util/perf_regs.c b/tools/perf/arch/arm/util/perf_regs.c new file mode 100644 index 000000000000..2864e2e3776d --- /dev/null +++ b/tools/perf/arch/arm/util/perf_regs.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../util/perf_regs.h" + +const struct sample_reg sample_reg_masks[] = { + SMPL_REG_END +}; diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index 3cde540d2fcf..0a7782c61209 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -1,4 +1,5 @@ perf-y += header.o +perf-y += perf_regs.o perf-y += sym-handling.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c new file mode 100644 index 000000000000..2864e2e3776d --- /dev/null +++ b/tools/perf/arch/arm64/util/perf_regs.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../util/perf_regs.h" + +const struct sample_reg sample_reg_masks[] = { + SMPL_REG_END +}; diff --git a/tools/perf/arch/csky/util/Build b/tools/perf/arch/csky/util/Build index 1160bb2332ba..7d3050134ae0 100644 --- a/tools/perf/arch/csky/util/Build +++ b/tools/perf/arch/csky/util/Build @@ -1,2 +1,4 @@ +perf-y += perf_regs.o + perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/csky/util/perf_regs.c b/tools/perf/arch/csky/util/perf_regs.c new file mode 100644 index 000000000000..2864e2e3776d --- /dev/null +++ b/tools/perf/arch/csky/util/perf_regs.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../util/perf_regs.h" + +const struct sample_reg sample_reg_masks[] = { + SMPL_REG_END +}; diff --git a/tools/perf/arch/riscv/util/Build b/tools/perf/arch/riscv/util/Build index 1160bb2332ba..7d3050134ae0 100644 --- a/tools/perf/arch/riscv/util/Build +++ b/tools/perf/arch/riscv/util/Build @@ -1,2 +1,4 @@ +perf-y += perf_regs.o + perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/riscv/util/perf_regs.c b/tools/perf/arch/riscv/util/perf_regs.c new file mode 100644 index 000000000000..2864e2e3776d --- /dev/null +++ b/tools/perf/arch/riscv/util/perf_regs.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../util/perf_regs.h" + +const struct sample_reg sample_reg_masks[] = { + SMPL_REG_END +}; diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build index 22797f043b84..3d9d0f4f72ca 100644 --- a/tools/perf/arch/s390/util/Build +++ b/tools/perf/arch/s390/util/Build @@ -1,5 +1,6 @@ perf-y += header.o perf-y += kvm-stat.o +perf-y += perf_regs.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/s390/util/perf_regs.c b/tools/perf/arch/s390/util/perf_regs.c new file mode 100644 index 000000000000..2864e2e3776d --- /dev/null +++ b/tools/perf/arch/s390/util/perf_regs.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../util/perf_regs.h" + +const struct sample_reg sample_reg_masks[] = { + SMPL_REG_END +}; diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c index fa947952c16a..909ead08a6f6 100644 --- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c +++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c @@ -9,6 +9,7 @@ #include <sys/prctl.h> #include <perf/cpumap.h> #include <perf/evlist.h> +#include <perf/mmap.h> #include "debug.h" #include "parse-events.h" @@ -117,10 +118,10 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe for (i = 0; i < evlist->core.nr_mmaps; i++) { md = &evlist->mmap[i]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) continue; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { struct perf_sample sample; if (event->header.type != PERF_RECORD_COMM || @@ -139,9 +140,9 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe comm2_time = sample.time; } next_event: - perf_mmap__consume(md); + perf_mmap__consume(&md->core); } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); } if (!comm1_time || !comm2_time) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index c37a78677955..5281629c27b1 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -23,6 +23,7 @@ #include "util/time-utils.h" #include "util/annotate.h" #include "util/map.h" +#include "util/spark.h" #include <linux/err.h> #include <linux/zalloc.h> #include <subcmd/pager.h> @@ -53,6 +54,7 @@ enum { PERF_HPP_DIFF__FORMULA, PERF_HPP_DIFF__DELTA_ABS, PERF_HPP_DIFF__CYCLES, + PERF_HPP_DIFF__CYCLES_HIST, PERF_HPP_DIFF__MAX_INDEX }; @@ -87,6 +89,7 @@ static bool force; static bool show_period; static bool show_formula; static bool show_baseline_only; +static bool cycles_hist; static unsigned int sort_compute = 1; static s64 compute_wdiff_w1; @@ -164,6 +167,10 @@ static struct header_column { [PERF_HPP_DIFF__CYCLES] = { .name = "[Program Block Range] Cycles Diff", .width = 70, + }, + [PERF_HPP_DIFF__CYCLES_HIST] = { + .name = "stddev/Hist", + .width = NUM_SPARKS + 9, } }; @@ -610,6 +617,9 @@ static void init_block_info(struct block_info *bi, struct symbol *sym, bi->cycles_aggr = ch->cycles_aggr; bi->num = ch->num; bi->num_aggr = ch->num_aggr; + + memcpy(bi->cycles_spark, ch->cycles_spark, + NUM_SPARKS * sizeof(u64)); } static int process_block_per_sym(struct hist_entry *he) @@ -689,6 +699,21 @@ static struct hist_entry *get_block_pair(struct hist_entry *he, return NULL; } +static void init_spark_values(unsigned long *svals, int num) +{ + for (int i = 0; i < num; i++) + svals[i] = 0; +} + +static void update_spark_value(unsigned long *svals, int num, + struct stats *stats, u64 val) +{ + int n = stats->n; + + if (n < num) + svals[n] = val; +} + static void compute_cycles_diff(struct hist_entry *he, struct hist_entry *pair) { @@ -697,6 +722,26 @@ static void compute_cycles_diff(struct hist_entry *he, pair->diff.cycles = pair->block_info->cycles_aggr / pair->block_info->num_aggr - he->block_info->cycles_aggr / he->block_info->num_aggr; + + if (!cycles_hist) + return; + + init_stats(&pair->diff.stats); + init_spark_values(pair->diff.svals, NUM_SPARKS); + + for (int i = 0; i < pair->block_info->num; i++) { + u64 val; + + if (i >= he->block_info->num || i >= NUM_SPARKS) + break; + + val = labs(pair->block_info->cycles_spark[i] - + he->block_info->cycles_spark[i]); + + update_spark_value(pair->diff.svals, NUM_SPARKS, + &pair->diff.stats, val); + update_stats(&pair->diff.stats, val); + } } } @@ -1255,6 +1300,9 @@ static const struct option options[] = { "Show period values."), OPT_BOOLEAN('F', "formula", &show_formula, "Show formula."), + OPT_BOOLEAN(0, "cycles-hist", &cycles_hist, + "Show cycles histogram and standard deviation " + "- WARNING: use only with -c cycles."), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), @@ -1462,6 +1510,90 @@ static int hpp__color_cycles(struct perf_hpp_fmt *fmt, return __hpp__color_compare(fmt, hpp, he, COMPUTE_CYCLES); } +static int all_zero(unsigned long *vals, int len) +{ + int i; + + for (i = 0; i < len; i++) + if (vals[i] != 0) + return 0; + return 1; +} + +static int print_cycles_spark(char *bf, int size, unsigned long *svals, u64 n) +{ + int printed; + + if (n <= 1) + return 0; + + if (n > NUM_SPARKS) + n = NUM_SPARKS; + if (all_zero(svals, n)) + return 0; + + printed = print_spark(bf, size, svals, n); + printed += scnprintf(bf + printed, size - printed, " "); + return printed; +} + +static int hpp__color_cycles_hist(struct perf_hpp_fmt *fmt, + struct perf_hpp *hpp, struct hist_entry *he) +{ + struct diff_hpp_fmt *dfmt = + container_of(fmt, struct diff_hpp_fmt, fmt); + struct hist_entry *pair = get_pair_fmt(he, dfmt); + struct block_hist *bh = container_of(he, struct block_hist, he); + struct block_hist *bh_pair; + struct hist_entry *block_he; + char spark[32], buf[128]; + double r; + int ret, pad; + + if (!pair) { + if (bh->block_idx) + hpp->skip = true; + + goto no_print; + } + + bh_pair = container_of(pair, struct block_hist, he); + + block_he = hists__get_entry(&bh_pair->block_hists, bh->block_idx); + if (!block_he) { + hpp->skip = true; + goto no_print; + } + + ret = print_cycles_spark(spark, sizeof(spark), block_he->diff.svals, + block_he->diff.stats.n); + + r = rel_stddev_stats(stddev_stats(&block_he->diff.stats), + avg_stats(&block_he->diff.stats)); + + if (ret) { + /* + * Padding spaces if number of sparks less than NUM_SPARKS + * otherwise the output is not aligned. + */ + pad = NUM_SPARKS - ((ret - 1) / 3); + scnprintf(buf, sizeof(buf), "%s%5.1f%% %s", "\u00B1", r, spark); + ret = scnprintf(hpp->buf, hpp->size, "%*s", + dfmt->header_width, buf); + + if (pad) { + ret += scnprintf(hpp->buf + ret, hpp->size - ret, + "%-*s", pad, " "); + } + + return ret; + } + +no_print: + return scnprintf(hpp->buf, hpp->size, "%*s", + dfmt->header_width, " "); +} + static void hpp__entry_unpair(struct hist_entry *he, int idx, char *buf, size_t size) { @@ -1667,6 +1799,10 @@ static void data__hpp_register(struct data__file *d, int idx) fmt->color = hpp__color_cycles; fmt->sort = hist_entry__cmp_nop; break; + case PERF_HPP_DIFF__CYCLES_HIST: + fmt->color = hpp__color_cycles_hist; + fmt->sort = hist_entry__cmp_nop; + break; default: fmt->sort = hist_entry__cmp_nop; break; @@ -1692,10 +1828,14 @@ static int ui_init(void) * PERF_HPP_DIFF__DELTA * PERF_HPP_DIFF__RATIO * PERF_HPP_DIFF__WEIGHTED_DIFF + * PERF_HPP_DIFF__CYCLES */ data__hpp_register(d, i ? compute_2_hpp[compute] : PERF_HPP_DIFF__BASELINE); + if (cycles_hist && i) + data__hpp_register(d, PERF_HPP_DIFF__CYCLES_HIST); + /* * And the rest: * @@ -1850,6 +1990,9 @@ int cmd_diff(int argc, const char **argv) if (quiet) perf_quiet_option(); + if (cycles_hist && (compute != COMPUTE_CYCLES)) + usage_with_options(diff_usage, options); + symbol__annotation_init(); if (symbol__init(NULL) < 0) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 58a9e0989491..858da896b518 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -46,6 +46,7 @@ #include <semaphore.h> #include <signal.h> #include <math.h> +#include <perf/mmap.h> static const char *get_filename_for_perf_kvm(void) { @@ -759,14 +760,14 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, *mmap_time = ULLONG_MAX; md = &evlist->mmap[idx]; - err = perf_mmap__read_init(md); + err = perf_mmap__read_init(&md->core); if (err < 0) return (err == -EAGAIN) ? 0 : -1; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { err = perf_evlist__parse_sample_timestamp(evlist, event, ×tamp); if (err) { - perf_mmap__consume(md); + perf_mmap__consume(&md->core); pr_err("Failed to parse sample\n"); return -1; } @@ -776,7 +777,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, * FIXME: Here we can't consume the event, as perf_session__queue_event will * point to it, and it'll get possibly overwritten by the kernel. */ - perf_mmap__consume(md); + perf_mmap__consume(&md->core); if (err) { pr_err("Failed to enqueue sample: %d\n", err); @@ -793,7 +794,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, break; } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); return n; } diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 08e62ae9d37e..965ef017496f 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -26,6 +26,7 @@ int cmd_list(int argc, const char **argv) int i; bool raw_dump = false; bool long_desc_flag = false; + bool deprecated = false; struct option list_options[] = { OPT_BOOLEAN(0, "raw-dump", &raw_dump, "Dump raw events"), OPT_BOOLEAN('d', "desc", &desc_flag, @@ -34,6 +35,8 @@ int cmd_list(int argc, const char **argv) "Print longer event descriptions."), OPT_BOOLEAN(0, "details", &details_flag, "Print information on the perf event names and expressions used internally by events."), + OPT_BOOLEAN(0, "deprecated", &deprecated, + "Print deprecated events."), OPT_INCR(0, "debug", &verbose, "Enable debugging output"), OPT_END() @@ -55,7 +58,7 @@ int cmd_list(int argc, const char **argv) if (argc == 0) { print_events(NULL, raw_dump, !desc_flag, long_desc_flag, - details_flag); + details_flag, deprecated); return 0; } @@ -78,7 +81,8 @@ int cmd_list(int argc, const char **argv) print_hwcache_events(NULL, raw_dump); else if (strcmp(argv[i], "pmu") == 0) print_pmu_events(NULL, raw_dump, !desc_flag, - long_desc_flag, details_flag); + long_desc_flag, details_flag, + deprecated); else if (strcmp(argv[i], "sdt") == 0) print_sdt_events(NULL, NULL, raw_dump); else if (strcmp(argv[i], "metric") == 0 || strcmp(argv[i], "metrics") == 0) @@ -91,7 +95,8 @@ int cmd_list(int argc, const char **argv) if (sep == NULL) { print_events(argv[i], raw_dump, !desc_flag, long_desc_flag, - details_flag); + details_flag, + deprecated); continue; } sep_idx = sep - argv[i]; @@ -117,7 +122,8 @@ int cmd_list(int argc, const char **argv) print_hwcache_events(s, raw_dump); print_pmu_events(s, raw_dump, !desc_flag, long_desc_flag, - details_flag); + details_flag, + deprecated); print_tracepoint_events(NULL, s, raw_dump); print_sdt_events(NULL, s, raw_dump); metricgroup__print(true, true, s, raw_dump, details_flag); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 23332861de6e..2fb83aabbef5 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -197,7 +197,7 @@ static int record__aio_complete(struct mmap *md, struct aiocb *cblock) * every aio write request started in record__aio_push() so * decrement it because the request is now complete. */ - perf_mmap__put(md); + perf_mmap__put(&md->core); rc = 1; } else { /* @@ -276,7 +276,7 @@ static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size if (record__comp_enabled(aio->rec)) { size = zstd_compress(aio->rec->session, aio->data + aio->size, - perf_mmap__mmap_len(map) - aio->size, + mmap__mmap_len(map) - aio->size, buf, size); } else { memcpy(aio->data + aio->size, buf, size); @@ -293,7 +293,7 @@ static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size * after started aio request completion or at record__aio_push() * if the request failed to start. */ - perf_mmap__get(map); + perf_mmap__get(&map->core); } aio->size += size; @@ -332,7 +332,7 @@ static int record__aio_push(struct record *rec, struct mmap *map, off_t *off) * map->refcount is decremented in record__aio_complete() after * aio write operation finishes successfully. */ - perf_mmap__put(map); + perf_mmap__put(&map->core); } return ret; @@ -488,7 +488,7 @@ static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) struct record *rec = to; if (record__comp_enabled(rec)) { - size = zstd_compress(rec->session, map->data, perf_mmap__mmap_len(map), bf, size); + size = zstd_compress(rec->session, map->data, mmap__mmap_len(map), bf, size); bf = map->data; } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index aae0e57c60fb..7accaf8ef689 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -399,6 +399,13 @@ static int report__setup_sample_type(struct report *rep) PERF_SAMPLE_BRANCH_ANY)) rep->nonany_branch_mode = true; +#ifndef HAVE_LIBUNWIND_SUPPORT + if (dwarf_callchain_users) { + ui__warning("Please install libunwind development packages " + "during the perf build.\n"); + } +#endif + return 0; } diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 67be8d31afab..f86c5cce5b2c 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -3605,11 +3605,6 @@ int cmd_script(int argc, const char **argv) } } - if (script.time_str && reltime) { - fprintf(stderr, "Don't combine --reltime with --time\n"); - return -1; - } - if (itrace_synth_opts.callchain && itrace_synth_opts.callchain_sz > scripting_max_stack) scripting_max_stack = itrace_synth_opts.callchain_sz; @@ -3869,10 +3864,11 @@ int cmd_script(int argc, const char **argv) goto out_delete; if (script.time_str) { - err = perf_time__parse_for_ranges(script.time_str, session, + err = perf_time__parse_for_ranges_reltime(script.time_str, session, &script.ptime_range, &script.range_size, - &script.range_num); + &script.range_num, + reltime); if (err < 0) goto out_delete; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 468fc49420ce..c88d4e118409 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -803,6 +803,12 @@ static struct option stat_options[] = { OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list", "monitor specified metrics or metric groups (separated by ,)", parse_metric_groups), + OPT_BOOLEAN_FLAG(0, "all-kernel", &stat_config.all_kernel, + "Configure all used events to run in kernel space.", + PARSE_OPT_EXCLUSIVE), + OPT_BOOLEAN_FLAG(0, "all-user", &stat_config.all_user, + "Configure all used events to run in user space.", + PARSE_OPT_EXCLUSIVE), OPT_END() }; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 1f60124eb19b..d96f24c8770d 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -82,6 +82,7 @@ #include <linux/err.h> #include <linux/ctype.h> +#include <perf/mmap.h> static volatile int done; static volatile int resize; @@ -869,10 +870,10 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) union perf_event *event; md = opts->overwrite ? &evlist->overwrite_mmap[idx] : &evlist->mmap[idx]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) return; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { int ret; ret = perf_evlist__parse_sample_timestamp(evlist, event, &last_timestamp); @@ -883,7 +884,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) if (ret) break; - perf_mmap__consume(md); + perf_mmap__consume(&md->core); if (top->qe.rotate) { pthread_mutex_lock(&top->qe.mutex); @@ -893,7 +894,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) } } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); } static void perf_top__mmap_read(struct perf_top *top) @@ -1560,6 +1561,17 @@ int cmd_top(int argc, const char **argv) status = perf_config(perf_top_config, &top); if (status) return status; + /* + * Since the per arch annotation init routine may need the cpuid, read + * it here, since we are not getting this from the perf.data header. + */ + status = perf_env__read_cpuid(&perf_env); + if (status) { + pr_err("Couldn't read the cpuid for this machine: %s\n", + str_error_r(errno, errbuf, sizeof(errbuf))); + goto out_delete_evlist; + } + top.evlist->env = &perf_env; argc = parse_options(argc, argv, options, top_usage, 0); if (argc) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index bb5130d02155..43c05eae1768 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -77,6 +77,7 @@ #include <sys/sysmacros.h> #include <linux/ctype.h> +#include <perf/mmap.h> #ifndef O_CLOEXEC # define O_CLOEXEC 02000000 @@ -86,6 +87,33 @@ # define F_LINUX_SPECIFIC_BASE 1024 #endif +/* + * strtoul: Go from a string to a value, i.e. for msr: MSR_FS_BASE to 0xc0000100 + */ +struct syscall_arg_fmt { + size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg); + bool (*strtoul)(char *bf, size_t size, struct syscall_arg *arg, u64 *val); + unsigned long (*mask_val)(struct syscall_arg *arg, unsigned long val); + void *parm; + const char *name; + u16 nr_entries; // for arrays + bool show_zero; +}; + +struct syscall_fmt { + const char *name; + const char *alias; + struct { + const char *sys_enter, + *sys_exit; + } bpf_prog_name; + struct syscall_arg_fmt arg[6]; + u8 nr_args; + bool errpid; + bool timeout; + bool hexret; +}; + struct trace { struct perf_tool tool; struct syscalltbl *sctbl; @@ -147,11 +175,13 @@ struct trace { bool multiple_threads; bool summary; bool summary_only; + bool errno_summary; bool failure_only; bool show_comm; bool print_sample; bool show_tool_stats; bool trace_syscalls; + bool libtraceevent_print; bool kernel_syscallchains; s16 args_alignment; bool show_tstamp; @@ -162,6 +192,7 @@ struct trace { bool force; bool vfs_getname; int trace_pgfaults; + char *perfconfig_events; struct { struct ordered_events data; u64 last; @@ -254,6 +285,87 @@ struct syscall_tp { }; }; +/* + * The evsel->priv as used by 'perf trace' + * sc: for raw_syscalls:sys_{enter,exit} and syscalls:sys_{enter,exit}_SYSCALLNAME + * fmt: for all the other tracepoints + */ +struct evsel_trace { + struct syscall_tp sc; + struct syscall_arg_fmt *fmt; +}; + +static struct evsel_trace *evsel_trace__new(void) +{ + return zalloc(sizeof(struct evsel_trace)); +} + +static void evsel_trace__delete(struct evsel_trace *et) +{ + if (et == NULL) + return; + + zfree(&et->fmt); + free(et); +} + +/* + * Used with raw_syscalls:sys_{enter,exit} and with the + * syscalls:sys_{enter,exit}_SYSCALL tracepoints + */ +static inline struct syscall_tp *__evsel__syscall_tp(struct evsel *evsel) +{ + struct evsel_trace *et = evsel->priv; + + return &et->sc; +} + +static struct syscall_tp *evsel__syscall_tp(struct evsel *evsel) +{ + if (evsel->priv == NULL) { + evsel->priv = evsel_trace__new(); + if (evsel->priv == NULL) + return NULL; + } + + return __evsel__syscall_tp(evsel); +} + +/* + * Used with all the other tracepoints. + */ +static inline struct syscall_arg_fmt *__evsel__syscall_arg_fmt(struct evsel *evsel) +{ + struct evsel_trace *et = evsel->priv; + + return et->fmt; +} + +static struct syscall_arg_fmt *evsel__syscall_arg_fmt(struct evsel *evsel) +{ + struct evsel_trace *et = evsel->priv; + + if (evsel->priv == NULL) { + et = evsel->priv = evsel_trace__new(); + + if (et == NULL) + return NULL; + } + + if (et->fmt == NULL) { + et->fmt = calloc(evsel->tp_format->format.nr_fields, sizeof(struct syscall_arg_fmt)); + if (et->fmt == NULL) + goto out_delete; + } + + return __evsel__syscall_arg_fmt(evsel); + +out_delete: + evsel_trace__delete(evsel->priv); + evsel->priv = NULL; + return NULL; +} + static int perf_evsel__init_tp_uint_field(struct evsel *evsel, struct tp_field *field, const char *name) @@ -267,7 +379,7 @@ static int perf_evsel__init_tp_uint_field(struct evsel *evsel, } #define perf_evsel__init_sc_tp_uint_field(evsel, name) \ - ({ struct syscall_tp *sc = evsel->priv;\ + ({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\ perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); }) static int perf_evsel__init_tp_ptr_field(struct evsel *evsel, @@ -283,7 +395,7 @@ static int perf_evsel__init_tp_ptr_field(struct evsel *evsel, } #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \ - ({ struct syscall_tp *sc = evsel->priv;\ + ({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\ perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); }) static void evsel__delete_priv(struct evsel *evsel) @@ -294,73 +406,61 @@ static void evsel__delete_priv(struct evsel *evsel) static int perf_evsel__init_syscall_tp(struct evsel *evsel) { - struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp)); + struct syscall_tp *sc = evsel__syscall_tp(evsel); - if (evsel->priv != NULL) { + if (sc != NULL) { if (perf_evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr") && perf_evsel__init_tp_uint_field(evsel, &sc->id, "nr")) - goto out_delete; + return -ENOENT; return 0; } return -ENOMEM; -out_delete: - zfree(&evsel->priv); - return -ENOENT; } static int perf_evsel__init_augmented_syscall_tp(struct evsel *evsel, struct evsel *tp) { - struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp)); + struct syscall_tp *sc = evsel__syscall_tp(evsel); - if (evsel->priv != NULL) { + if (sc != NULL) { struct tep_format_field *syscall_id = perf_evsel__field(tp, "id"); if (syscall_id == NULL) syscall_id = perf_evsel__field(tp, "__syscall_nr"); - if (syscall_id == NULL) - goto out_delete; - if (__tp_field__init_uint(&sc->id, syscall_id->size, syscall_id->offset, evsel->needs_swap)) - goto out_delete; + if (syscall_id == NULL || + __tp_field__init_uint(&sc->id, syscall_id->size, syscall_id->offset, evsel->needs_swap)) + return -EINVAL; return 0; } return -ENOMEM; -out_delete: - zfree(&evsel->priv); - return -EINVAL; } static int perf_evsel__init_augmented_syscall_tp_args(struct evsel *evsel) { - struct syscall_tp *sc = evsel->priv; + struct syscall_tp *sc = __evsel__syscall_tp(evsel); return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64)); } static int perf_evsel__init_augmented_syscall_tp_ret(struct evsel *evsel) { - struct syscall_tp *sc = evsel->priv; + struct syscall_tp *sc = __evsel__syscall_tp(evsel); return __tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap); } static int perf_evsel__init_raw_syscall_tp(struct evsel *evsel, void *handler) { - evsel->priv = malloc(sizeof(struct syscall_tp)); - if (evsel->priv != NULL) { + if (evsel__syscall_tp(evsel) != NULL) { if (perf_evsel__init_sc_tp_uint_field(evsel, id)) - goto out_delete; + return -ENOENT; evsel->handler = handler; return 0; } return -ENOMEM; - -out_delete: - zfree(&evsel->priv); - return -ENOENT; } static struct evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler) @@ -385,13 +485,27 @@ out_delete: } #define perf_evsel__sc_tp_uint(evsel, name, sample) \ - ({ struct syscall_tp *fields = evsel->priv; \ + ({ struct syscall_tp *fields = __evsel__syscall_tp(evsel); \ fields->name.integer(&fields->name, sample); }) #define perf_evsel__sc_tp_ptr(evsel, name, sample) \ - ({ struct syscall_tp *fields = evsel->priv; \ + ({ struct syscall_tp *fields = __evsel__syscall_tp(evsel); \ fields->name.pointer(&fields->name, sample); }) +size_t strarray__scnprintf_suffix(struct strarray *sa, char *bf, size_t size, const char *intfmt, bool show_suffix, int val) +{ + int idx = val - sa->offset; + + if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL) { + size_t printed = scnprintf(bf, size, intfmt, val); + if (show_suffix) + printed += scnprintf(bf + printed, size - printed, " /* %s??? */", sa->prefix); + return printed; + } + + return scnprintf(bf, size, "%s%s", sa->entries[idx], show_suffix ? sa->prefix : ""); +} + size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, bool show_prefix, int val) { int idx = val - sa->offset; @@ -421,6 +535,21 @@ static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size, #define SCA_STRARRAY syscall_arg__scnprintf_strarray +bool syscall_arg__strtoul_strarray(char *bf, size_t size, struct syscall_arg *arg, u64 *ret) +{ + return strarray__strtoul(arg->parm, bf, size, ret); +} + +bool syscall_arg__strtoul_strarray_flags(char *bf, size_t size, struct syscall_arg *arg, u64 *ret) +{ + return strarray__strtoul_flags(arg->parm, bf, size, ret); +} + +bool syscall_arg__strtoul_strarrays(char *bf, size_t size, struct syscall_arg *arg, u64 *ret) +{ + return strarrays__strtoul(arg->parm, bf, size, ret); +} + size_t syscall_arg__scnprintf_strarray_flags(char *bf, size_t size, struct syscall_arg *arg) { return strarray__scnprintf_flags(arg->parm, bf, size, arg->show_string_prefix, arg->val); @@ -448,6 +577,77 @@ size_t strarrays__scnprintf(struct strarrays *sas, char *bf, size_t size, const return printed; } +bool strarray__strtoul(struct strarray *sa, char *bf, size_t size, u64 *ret) +{ + int i; + + for (i = 0; i < sa->nr_entries; ++i) { + if (sa->entries[i] && strncmp(sa->entries[i], bf, size) == 0 && sa->entries[i][size] == '\0') { + *ret = sa->offset + i; + return true; + } + } + + return false; +} + +bool strarray__strtoul_flags(struct strarray *sa, char *bf, size_t size, u64 *ret) +{ + u64 val = 0; + char *tok = bf, *sep, *end; + + *ret = 0; + + while (size != 0) { + int toklen = size; + + sep = memchr(tok, '|', size); + if (sep != NULL) { + size -= sep - tok + 1; + + end = sep - 1; + while (end > tok && isspace(*end)) + --end; + + toklen = end - tok + 1; + } + + while (isspace(*tok)) + ++tok; + + if (isalpha(*tok) || *tok == '_') { + if (!strarray__strtoul(sa, tok, toklen, &val)) + return false; + } else { + bool is_hexa = tok[0] == 0 && (tok[1] = 'x' || tok[1] == 'X'); + + val = strtoul(tok, NULL, is_hexa ? 16 : 0); + } + + *ret |= (1 << (val - 1)); + + if (sep == NULL) + break; + tok = sep + 1; + } + + return true; +} + +bool strarrays__strtoul(struct strarrays *sas, char *bf, size_t size, u64 *ret) +{ + int i; + + for (i = 0; i < sas->nr_entries; ++i) { + struct strarray *sa = sas->entries[i]; + + if (strarray__strtoul(sa, bf, size, ret)) + return true; + } + + return false; +} + size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, struct syscall_arg *arg) { @@ -499,6 +699,16 @@ size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *ar return scnprintf(bf, size, "%ld", arg->val); } +static size_t syscall_arg__scnprintf_char_array(char *bf, size_t size, struct syscall_arg *arg) +{ + // XXX Hey, maybe for sched:sched_switch prev/next comm fields we can + // fill missing comms using thread__set_comm()... + // here or in a special syscall_arg__scnprintf_pid_sched_tp... + return scnprintf(bf, size, "\"%-.*s\"", arg->fmt->nr_entries ?: arg->len, arg->val); +} + +#define SCA_CHAR_ARRAY syscall_arg__scnprintf_char_array + static const char *bpf_cmd[] = { "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM", "MAP_GET_NEXT_KEY", "PROG_LOAD", @@ -672,10 +882,12 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size, #define STRARRAY(name, array) \ { .scnprintf = SCA_STRARRAY, \ + .strtoul = STUL_STRARRAY, \ .parm = &strarray__##array, } #define STRARRAY_FLAGS(name, array) \ { .scnprintf = SCA_STRARRAY_FLAGS, \ + .strtoul = STUL_STRARRAY_FLAGS, \ .parm = &strarray__##array, } #include "trace/beauty/arch_errno_names.c" @@ -694,27 +906,7 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size, #include "trace/beauty/socket_type.c" #include "trace/beauty/waitid_options.c" -struct syscall_arg_fmt { - size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg); - unsigned long (*mask_val)(struct syscall_arg *arg, unsigned long val); - void *parm; - const char *name; - bool show_zero; -}; - -static struct syscall_fmt { - const char *name; - const char *alias; - struct { - const char *sys_enter, - *sys_exit; - } bpf_prog_name; - struct syscall_arg_fmt arg[6]; - u8 nr_args; - bool errpid; - bool timeout; - bool hexret; -} syscall_fmts[] = { +static struct syscall_fmt syscall_fmts[] = { { .name = "access", .arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, }, { .name = "arch_prctl", @@ -751,7 +943,8 @@ static struct syscall_fmt { { .name = "fchownat", .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, { .name = "fcntl", - .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD, /* cmd */ + .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD, /* cmd */ + .strtoul = STUL_STRARRAYS, .parm = &strarrays__fcntl_cmds_arrays, .show_zero = true, }, [2] = { .scnprintf = SCA_FCNTL_ARG, /* arg */ }, }, }, @@ -822,7 +1015,9 @@ static struct syscall_fmt { .alias = "old_mmap", #endif .arg = { [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, - [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ }, + [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ + .strtoul = STUL_STRARRAY_FLAGS, + .parm = &strarray__mmap_flags, }, [5] = { .scnprintf = SCA_HEX, /* offset */ }, }, }, { .name = "mount", .arg = { [0] = { .scnprintf = SCA_FILENAME, /* dev_name */ }, @@ -964,24 +1159,35 @@ static int syscall_fmt__cmp(const void *name, const void *fmtp) return strcmp(name, fmt->name); } +static struct syscall_fmt *__syscall_fmt__find(struct syscall_fmt *fmts, const int nmemb, const char *name) +{ + return bsearch(name, fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp); +} + static struct syscall_fmt *syscall_fmt__find(const char *name) { const int nmemb = ARRAY_SIZE(syscall_fmts); - return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp); + return __syscall_fmt__find(syscall_fmts, nmemb, name); } -static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias) +static struct syscall_fmt *__syscall_fmt__find_by_alias(struct syscall_fmt *fmts, const int nmemb, const char *alias) { - int i, nmemb = ARRAY_SIZE(syscall_fmts); + int i; for (i = 0; i < nmemb; ++i) { - if (syscall_fmts[i].alias && strcmp(syscall_fmts[i].alias, alias) == 0) - return &syscall_fmts[i]; + if (fmts[i].alias && strcmp(fmts[i].alias, alias) == 0) + return &fmts[i]; } return NULL; } +static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias) +{ + const int nmemb = ARRAY_SIZE(syscall_fmts); + return __syscall_fmt__find_by_alias(syscall_fmts, nmemb, alias); +} + /* * is_exit: is this "exit" or "exit_group"? * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter. @@ -1453,15 +1659,39 @@ static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args) return 0; } -static int syscall__set_arg_fmts(struct syscall *sc) +static struct syscall_arg_fmt syscall_arg_fmts__by_name[] = { + { .name = "msr", .scnprintf = SCA_X86_MSR, .strtoul = STUL_X86_MSR, }, + { .name = "vector", .scnprintf = SCA_X86_IRQ_VECTORS, .strtoul = STUL_X86_IRQ_VECTORS, }, +}; + +static int syscall_arg_fmt__cmp(const void *name, const void *fmtp) +{ + const struct syscall_arg_fmt *fmt = fmtp; + return strcmp(name, fmt->name); +} + +static struct syscall_arg_fmt * +__syscall_arg_fmt__find_by_name(struct syscall_arg_fmt *fmts, const int nmemb, const char *name) +{ + return bsearch(name, fmts, nmemb, sizeof(struct syscall_arg_fmt), syscall_arg_fmt__cmp); +} + +static struct syscall_arg_fmt *syscall_arg_fmt__find_by_name(const char *name) { - struct tep_format_field *field, *last_field = NULL; - int idx = 0, len; + const int nmemb = ARRAY_SIZE(syscall_arg_fmts__by_name); + return __syscall_arg_fmt__find_by_name(syscall_arg_fmts__by_name, nmemb, name); +} - for (field = sc->args; field; field = field->next, ++idx) { +static struct tep_format_field * +syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field *field) +{ + struct tep_format_field *last_field = NULL; + int len; + + for (; field; field = field->next, ++arg) { last_field = field; - if (sc->fmt && sc->fmt->arg[idx].scnprintf) + if (arg->scnprintf) continue; len = strlen(field->name); @@ -1469,14 +1699,17 @@ static int syscall__set_arg_fmts(struct syscall *sc) if (strcmp(field->type, "const char *") == 0 && ((len >= 4 && strcmp(field->name + len - 4, "name") == 0) || strstr(field->name, "path") != NULL)) - sc->arg_fmt[idx].scnprintf = SCA_FILENAME; + arg->scnprintf = SCA_FILENAME; else if ((field->flags & TEP_FIELD_IS_POINTER) || strstr(field->name, "addr")) - sc->arg_fmt[idx].scnprintf = SCA_PTR; + arg->scnprintf = SCA_PTR; else if (strcmp(field->type, "pid_t") == 0) - sc->arg_fmt[idx].scnprintf = SCA_PID; + arg->scnprintf = SCA_PID; else if (strcmp(field->type, "umode_t") == 0) - sc->arg_fmt[idx].scnprintf = SCA_MODE_T; - else if ((strcmp(field->type, "int") == 0 || + arg->scnprintf = SCA_MODE_T; + else if ((field->flags & TEP_FIELD_IS_ARRAY) && strstr(field->type, "char")) { + arg->scnprintf = SCA_CHAR_ARRAY; + arg->nr_entries = field->arraylen; + } else if ((strcmp(field->type, "int") == 0 || strcmp(field->type, "unsigned int") == 0 || strcmp(field->type, "long") == 0) && len >= 2 && strcmp(field->name + len - 2, "fd") == 0) { @@ -1487,10 +1720,24 @@ static int syscall__set_arg_fmts(struct syscall *sc) * 23 unsigned int * 7 unsigned long */ - sc->arg_fmt[idx].scnprintf = SCA_FD; + arg->scnprintf = SCA_FD; + } else { + struct syscall_arg_fmt *fmt = syscall_arg_fmt__find_by_name(field->name); + + if (fmt) { + arg->scnprintf = fmt->scnprintf; + arg->strtoul = fmt->strtoul; + } } } + return last_field; +} + +static int syscall__set_arg_fmts(struct syscall *sc) +{ + struct tep_format_field *last_field = syscall_arg_fmt__init_array(sc->arg_fmt, sc->args); + if (last_field) sc->args_size = last_field->offset + last_field->size; @@ -1552,6 +1799,18 @@ static int trace__read_syscall_info(struct trace *trace, int id) return syscall__set_arg_fmts(sc); } +static int perf_evsel__init_tp_arg_scnprintf(struct evsel *evsel) +{ + struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel); + + if (fmt != NULL) { + syscall_arg_fmt__init_array(fmt, evsel->tp_format->format.fields); + return 0; + } + + return -ENOMEM; +} + static int intcmp(const void *a, const void *b) { const int *one = a, *another = b; @@ -1680,22 +1939,22 @@ static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size, * as mount 'flags' argument that needs ignoring some magic flag, see comment * in tools/perf/trace/beauty/mount_flags.c */ -static unsigned long syscall__mask_val(struct syscall *sc, struct syscall_arg *arg, unsigned long val) +static unsigned long syscall_arg_fmt__mask_val(struct syscall_arg_fmt *fmt, struct syscall_arg *arg, unsigned long val) { - if (sc->arg_fmt && sc->arg_fmt[arg->idx].mask_val) - return sc->arg_fmt[arg->idx].mask_val(arg, val); + if (fmt && fmt->mask_val) + return fmt->mask_val(arg, val); return val; } -static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size, - struct syscall_arg *arg, unsigned long val) +static size_t syscall_arg_fmt__scnprintf_val(struct syscall_arg_fmt *fmt, char *bf, size_t size, + struct syscall_arg *arg, unsigned long val) { - if (sc->arg_fmt && sc->arg_fmt[arg->idx].scnprintf) { + if (fmt && fmt->scnprintf) { arg->val = val; - if (sc->arg_fmt[arg->idx].parm) - arg->parm = sc->arg_fmt[arg->idx].parm; - return sc->arg_fmt[arg->idx].scnprintf(bf, size, arg); + if (fmt->parm) + arg->parm = fmt->parm; + return fmt->scnprintf(bf, size, arg); } return scnprintf(bf, size, "%ld", val); } @@ -1736,12 +1995,13 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, if (arg.mask & bit) continue; + arg.fmt = &sc->arg_fmt[arg.idx]; val = syscall_arg__val(&arg, arg.idx); /* * Some syscall args need some mask, most don't and * return val untouched. */ - val = syscall__mask_val(sc, &arg, val); + val = syscall_arg_fmt__mask_val(&sc->arg_fmt[arg.idx], &arg, val); /* * Suppress this argument if its value is zero and @@ -1762,7 +2022,8 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, if (trace->show_arg_names) printed += scnprintf(bf + printed, size - printed, "%s: ", field->name); - printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val); + printed += syscall_arg_fmt__scnprintf_val(&sc->arg_fmt[arg.idx], + bf + printed, size - printed, &arg, val); } } else if (IS_ERR(sc->tp_format)) { /* @@ -1777,7 +2038,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, if (printed) printed += scnprintf(bf + printed, size - printed, ", "); printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg); - printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val); + printed += syscall_arg_fmt__scnprintf_val(&sc->arg_fmt[arg.idx], bf + printed, size - printed, &arg, val); next_arg: ++arg.idx; bit <<= 1; @@ -1844,11 +2105,18 @@ out_cant_read: return NULL; } -static void thread__update_stats(struct thread_trace *ttrace, - int id, struct perf_sample *sample) +struct syscall_stats { + struct stats stats; + u64 nr_failures; + int max_errno; + u32 *errnos; +}; + +static void thread__update_stats(struct thread *thread, struct thread_trace *ttrace, + int id, struct perf_sample *sample, long err, bool errno_summary) { struct int_node *inode; - struct stats *stats; + struct syscall_stats *stats; u64 duration = 0; inode = intlist__findnew(ttrace->syscall_stats, id); @@ -1857,17 +2125,46 @@ static void thread__update_stats(struct thread_trace *ttrace, stats = inode->priv; if (stats == NULL) { - stats = malloc(sizeof(struct stats)); + stats = malloc(sizeof(*stats)); if (stats == NULL) return; - init_stats(stats); + + stats->nr_failures = 0; + stats->max_errno = 0; + stats->errnos = NULL; + init_stats(&stats->stats); inode->priv = stats; } if (ttrace->entry_time && sample->time > ttrace->entry_time) duration = sample->time - ttrace->entry_time; - update_stats(stats, duration); + update_stats(&stats->stats, duration); + + if (err < 0) { + ++stats->nr_failures; + + if (!errno_summary) + return; + + err = -err; + if (err > stats->max_errno) { + u32 *new_errnos = realloc(stats->errnos, err * sizeof(u32)); + + if (new_errnos) { + memset(new_errnos + stats->max_errno, 0, (err - stats->max_errno) * sizeof(u32)); + } else { + pr_debug("Not enough memory for errno stats for thread \"%s\"(%d/%d), results will be incomplete\n", + thread__comm_str(thread), thread->pid_, thread->tid); + return; + } + + stats->errnos = new_errnos; + stats->max_errno = err; + } + + ++stats->errnos[err - 1]; + } } static int trace__printf_interrupted_entry(struct trace *trace) @@ -2112,11 +2409,11 @@ static int trace__sys_exit(struct trace *trace, struct evsel *evsel, trace__fprintf_sample(trace, evsel, sample, thread); - if (trace->summary) - thread__update_stats(ttrace, id, sample); - ret = perf_evsel__sc_tp_uint(evsel, ret, sample); + if (trace->summary) + thread__update_stats(thread, ttrace, id, sample, ret, trace->errno_summary); + if (!trace->fd_path_disabled && sc->is_open && ret >= 0 && ttrace->filename.pending_open) { trace__set_fd_pathname(thread, ret, ttrace->filename.name); ttrace->filename.pending_open = false; @@ -2346,6 +2643,80 @@ static void bpf_output__fprintf(struct trace *trace, ++trace->nr_events_printed; } +static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel, struct perf_sample *sample, + struct thread *thread, void *augmented_args, int augmented_args_size) +{ + char bf[2048]; + size_t size = sizeof(bf); + struct tep_format_field *field = evsel->tp_format->format.fields; + struct syscall_arg_fmt *arg = __evsel__syscall_arg_fmt(evsel); + size_t printed = 0; + unsigned long val; + u8 bit = 1; + struct syscall_arg syscall_arg = { + .augmented = { + .size = augmented_args_size, + .args = augmented_args, + }, + .idx = 0, + .mask = 0, + .trace = trace, + .thread = thread, + .show_string_prefix = trace->show_string_prefix, + }; + + for (; field && arg; field = field->next, ++syscall_arg.idx, bit <<= 1, ++arg) { + if (syscall_arg.mask & bit) + continue; + + syscall_arg.len = 0; + syscall_arg.fmt = arg; + if (field->flags & TEP_FIELD_IS_ARRAY) { + int offset = field->offset; + + if (field->flags & TEP_FIELD_IS_DYNAMIC) { + offset = format_field__intval(field, sample, evsel->needs_swap); + syscall_arg.len = offset >> 16; + offset &= 0xffff; + } + + val = (uintptr_t)(sample->raw_data + offset); + } else + val = format_field__intval(field, sample, evsel->needs_swap); + /* + * Some syscall args need some mask, most don't and + * return val untouched. + */ + val = syscall_arg_fmt__mask_val(arg, &syscall_arg, val); + + /* + * Suppress this argument if its value is zero and + * and we don't have a string associated in an + * strarray for it. + */ + if (val == 0 && + !trace->show_zeros && + !((arg->show_zero || + arg->scnprintf == SCA_STRARRAY || + arg->scnprintf == SCA_STRARRAYS) && + arg->parm)) + continue; + + printed += scnprintf(bf + printed, size - printed, "%s", printed ? ", " : ""); + + /* + * XXX Perhaps we should have a show_tp_arg_names, + * leaving show_arg_names just for syscalls? + */ + if (1 || trace->show_arg_names) + printed += scnprintf(bf + printed, size - printed, "%s: ", field->name); + + printed += syscall_arg_fmt__scnprintf_val(arg, bf + printed, size - printed, &syscall_arg, val); + } + + return printed + fprintf(trace->output, "%s", bf); +} + static int trace__event_handler(struct trace *trace, struct evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) @@ -2399,32 +2770,37 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel, */ } - fprintf(trace->output, "%s:", evsel->name); + fprintf(trace->output, "%s(", evsel->name); if (perf_evsel__is_bpf_output(evsel)) { bpf_output__fprintf(trace, sample); } else if (evsel->tp_format) { if (strncmp(evsel->tp_format->name, "sys_enter_", 10) || trace__fprintf_sys_enter(trace, evsel, sample)) { - event_format__fprintf(evsel->tp_format, sample->cpu, - sample->raw_data, sample->raw_size, - trace->output); - ++trace->nr_events_printed; - - if (evsel->max_events != ULONG_MAX && ++evsel->nr_events_printed == evsel->max_events) { - evsel__disable(evsel); - evsel__close(evsel); + if (trace->libtraceevent_print) { + event_format__fprintf(evsel->tp_format, sample->cpu, + sample->raw_data, sample->raw_size, + trace->output); + } else { + trace__fprintf_tp_fields(trace, evsel, sample, thread, NULL, 0); } } } newline: - fprintf(trace->output, "\n"); + fprintf(trace->output, ")\n"); if (callchain_ret > 0) trace__fprintf_callchain(trace, sample); else if (callchain_ret < 0) pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); + + ++trace->nr_events_printed; + + if (evsel->max_events != ULONG_MAX && ++evsel->nr_events_printed == evsel->max_events) { + evsel__disable(evsel); + evsel__close(evsel); + } out: thread__put(thread); return 0; @@ -2576,21 +2952,23 @@ static int trace__record(struct trace *trace, int argc, const char **argv) "-m", "1024", "-c", "1", }; - + pid_t pid = getpid(); + char *filter = asprintf__tp_filter_pids(1, &pid); const char * const sc_args[] = { "-e", }; unsigned int sc_args_nr = ARRAY_SIZE(sc_args); const char * const majpf_args[] = { "-e", "major-faults" }; unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args); const char * const minpf_args[] = { "-e", "minor-faults" }; unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args); + int err = -1; - /* +1 is for the event string below */ - rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 + + /* +3 is for the event string below and the pid filter */ + rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 3 + majpf_args_nr + minpf_args_nr + argc; rec_argv = calloc(rec_argc + 1, sizeof(char *)); - if (rec_argv == NULL) - return -ENOMEM; + if (rec_argv == NULL || filter == NULL) + goto out_free; j = 0; for (i = 0; i < ARRAY_SIZE(record_args); i++) @@ -2607,11 +2985,13 @@ static int trace__record(struct trace *trace, int argc, const char **argv) rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit"; else { pr_err("Neither raw_syscalls nor syscalls events exist.\n"); - free(rec_argv); - return -1; + goto out_free; } } + rec_argv[j++] = "--filter"; + rec_argv[j++] = filter; + if (trace->trace_pgfaults & TRACE_PFMAJ) for (i = 0; i < majpf_args_nr; i++) rec_argv[j++] = majpf_args[i]; @@ -2623,7 +3003,11 @@ static int trace__record(struct trace *trace, int argc, const char **argv) for (i = 0; i < (unsigned int)argc; i++) rec_argv[j++] = argv[i]; - return cmd_record(j, rec_argv); + err = cmd_record(j, rec_argv); +out_free: + free(filter); + free(rec_argv); + return err; } static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); @@ -3103,7 +3487,27 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace) return err; } -#else + +static void trace__delete_augmented_syscalls(struct trace *trace) +{ + struct evsel *evsel, *tmp; + + evlist__remove(trace->evlist, trace->syscalls.events.augmented); + evsel__delete(trace->syscalls.events.augmented); + trace->syscalls.events.augmented = NULL; + + evlist__for_each_entry_safe(trace->evlist, tmp, evsel) { + if (evsel->bpf_obj == trace->bpf_obj) { + evlist__remove(trace->evlist, evsel); + evsel__delete(evsel); + } + + } + + bpf_object__close(trace->bpf_obj); + trace->bpf_obj = NULL; +} +#else // HAVE_LIBBPF_SUPPORT static int trace__set_ev_qualifier_bpf_filter(struct trace *trace __maybe_unused) { return 0; @@ -3124,8 +3528,27 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace __maybe_ { return 0; } + +static void trace__delete_augmented_syscalls(struct trace *trace __maybe_unused) +{ +} #endif // HAVE_LIBBPF_SUPPORT +static bool trace__only_augmented_syscalls_evsels(struct trace *trace) +{ + struct evsel *evsel; + + evlist__for_each_entry(trace->evlist, evsel) { + if (evsel == trace->syscalls.events.augmented || + evsel->bpf_obj == trace->bpf_obj) + continue; + + return false; + } + + return true; +} + static int trace__set_ev_qualifier_filter(struct trace *trace) { if (trace->syscalls.map) @@ -3175,7 +3598,7 @@ static int trace__set_filter_loop_pids(struct trace *trace) thread = parent; } - err = perf_evlist__set_tp_filter_pids(trace->evlist, nr, pids); + err = perf_evlist__append_tp_filter_pids(trace->evlist, nr, pids); if (!err && trace->filter_pids.map) err = bpf_map__set_filter_pids(trace->filter_pids.map, nr, pids); @@ -3192,8 +3615,8 @@ static int trace__set_filter_pids(struct trace *trace) * we fork the workload in perf_evlist__prepare_workload. */ if (trace->filter_pids.nr > 0) { - err = perf_evlist__set_tp_filter_pids(trace->evlist, trace->filter_pids.nr, - trace->filter_pids.entries); + err = perf_evlist__append_tp_filter_pids(trace->evlist, trace->filter_pids.nr, + trace->filter_pids.entries); if (!err && trace->filter_pids.map) { err = bpf_map__set_filter_pids(trace->filter_pids.map, trace->filter_pids.nr, trace->filter_pids.entries); @@ -3263,6 +3686,137 @@ static int ordered_events__deliver_event(struct ordered_events *oe, return __trace__deliver_event(trace, event->event); } +static struct syscall_arg_fmt *perf_evsel__syscall_arg_fmt(struct evsel *evsel, char *arg) +{ + struct tep_format_field *field; + struct syscall_arg_fmt *fmt = __evsel__syscall_arg_fmt(evsel); + + if (evsel->tp_format == NULL || fmt == NULL) + return NULL; + + for (field = evsel->tp_format->format.fields; field; field = field->next, ++fmt) + if (strcmp(field->name, arg) == 0) + return fmt; + + return NULL; +} + +static int trace__expand_filter(struct trace *trace __maybe_unused, struct evsel *evsel) +{ + char *tok, *left = evsel->filter, *new_filter = evsel->filter; + + while ((tok = strpbrk(left, "=<>!")) != NULL) { + char *right = tok + 1, *right_end; + + if (*right == '=') + ++right; + + while (isspace(*right)) + ++right; + + if (*right == '\0') + break; + + while (!isalpha(*left)) + if (++left == tok) { + /* + * Bail out, can't find the name of the argument that is being + * used in the filter, let it try to set this filter, will fail later. + */ + return 0; + } + + right_end = right + 1; + while (isalnum(*right_end) || *right_end == '_' || *right_end == '|') + ++right_end; + + if (isalpha(*right)) { + struct syscall_arg_fmt *fmt; + int left_size = tok - left, + right_size = right_end - right; + char arg[128]; + + while (isspace(left[left_size - 1])) + --left_size; + + scnprintf(arg, sizeof(arg), "%.*s", left_size, left); + + fmt = perf_evsel__syscall_arg_fmt(evsel, arg); + if (fmt == NULL) { + pr_err("\"%s\" not found in \"%s\", can't set filter \"%s\"\n", + arg, evsel->name, evsel->filter); + return -1; + } + + pr_debug2("trying to expand \"%s\" \"%.*s\" \"%.*s\" -> ", + arg, (int)(right - tok), tok, right_size, right); + + if (fmt->strtoul) { + u64 val; + struct syscall_arg syscall_arg = { + .parm = fmt->parm, + }; + + if (fmt->strtoul(right, right_size, &syscall_arg, &val)) { + char *n, expansion[19]; + int expansion_lenght = scnprintf(expansion, sizeof(expansion), "%#" PRIx64, val); + int expansion_offset = right - new_filter; + + pr_debug("%s", expansion); + + if (asprintf(&n, "%.*s%s%s", expansion_offset, new_filter, expansion, right_end) < 0) { + pr_debug(" out of memory!\n"); + free(new_filter); + return -1; + } + if (new_filter != evsel->filter) + free(new_filter); + left = n + expansion_offset + expansion_lenght; + new_filter = n; + } else { + pr_err("\"%.*s\" not found for \"%s\" in \"%s\", can't set filter \"%s\"\n", + right_size, right, arg, evsel->name, evsel->filter); + return -1; + } + } else { + pr_err("No resolver (strtoul) for \"%s\" in \"%s\", can't set filter \"%s\"\n", + arg, evsel->name, evsel->filter); + return -1; + } + + pr_debug("\n"); + } else { + left = right_end; + } + } + + if (new_filter != evsel->filter) { + pr_debug("New filter for %s: %s\n", evsel->name, new_filter); + perf_evsel__set_filter(evsel, new_filter); + free(new_filter); + } + + return 0; +} + +static int trace__expand_filters(struct trace *trace, struct evsel **err_evsel) +{ + struct evlist *evlist = trace->evlist; + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (evsel->filter == NULL) + continue; + + if (trace__expand_filter(trace, evsel)) { + *err_evsel = evsel; + return -1; + } + } + + return 0; +} + static int trace__run(struct trace *trace, int argc, const char **argv) { struct evlist *evlist = trace->evlist; @@ -3302,7 +3856,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv) perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", trace__sched_stat_runtime)) goto out_error_sched_stat_runtime; - /* * If a global cgroup was set, apply it to all the events without an * explicit cgroup. I.e.: @@ -3405,6 +3958,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv) */ trace->fd_path_disabled = !trace__syscall_enabled(trace, syscalltbl__id(trace->sctbl, "close")); + err = trace__expand_filters(trace, &evsel); + if (err) + goto out_delete_evlist; err = perf_evlist__apply_filters(evlist, &evsel); if (err < 0) goto out_error_apply_filters; @@ -3450,17 +4006,17 @@ again: struct mmap *md; md = &evlist->mmap[i]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) continue; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { ++trace->nr_events; err = trace__deliver_event(trace, event); if (err) goto out_disable; - perf_mmap__consume(md); + perf_mmap__consume(&md->core); if (interrupted) goto out_disable; @@ -3470,7 +4026,7 @@ again: draining = true; } } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); } if (trace->nr_events == before) { @@ -3665,17 +4221,17 @@ static size_t trace__fprintf_threads_header(FILE *fp) } DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs, - struct stats *stats; - double msecs; - int syscall; + struct syscall_stats *stats; + double msecs; + int syscall; ) { struct int_node *source = rb_entry(nd, struct int_node, rb_node); - struct stats *stats = source->priv; + struct syscall_stats *stats = source->priv; entry->syscall = source->i; entry->stats = stats; - entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0; + entry->msecs = stats ? (u64)stats->stats.n * (avg_stats(&stats->stats) / NSEC_PER_MSEC) : 0; } static size_t thread__dump_stats(struct thread_trace *ttrace, @@ -3691,27 +4247,37 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, printed += fprintf(fp, "\n"); - printed += fprintf(fp, " syscall calls total min avg max stddev\n"); - printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); - printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n"); + printed += fprintf(fp, " syscall calls errors total min avg max stddev\n"); + printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); + printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n"); resort_rb__for_each_entry(nd, syscall_stats) { - struct stats *stats = syscall_stats_entry->stats; + struct syscall_stats *stats = syscall_stats_entry->stats; if (stats) { - double min = (double)(stats->min) / NSEC_PER_MSEC; - double max = (double)(stats->max) / NSEC_PER_MSEC; - double avg = avg_stats(stats); + double min = (double)(stats->stats.min) / NSEC_PER_MSEC; + double max = (double)(stats->stats.max) / NSEC_PER_MSEC; + double avg = avg_stats(&stats->stats); double pct; - u64 n = (u64) stats->n; + u64 n = (u64)stats->stats.n; - pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0; + pct = avg ? 100.0 * stddev_stats(&stats->stats) / avg : 0.0; avg /= NSEC_PER_MSEC; sc = &trace->syscalls.table[syscall_stats_entry->syscall]; printed += fprintf(fp, " %-15s", sc->name); - printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f", - n, syscall_stats_entry->msecs, min, avg); + printed += fprintf(fp, " %8" PRIu64 " %6" PRIu64 " %9.3f %9.3f %9.3f", + n, stats->nr_failures, syscall_stats_entry->msecs, min, avg); printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct); + + if (trace->errno_summary && stats->nr_failures) { + const char *arch_name = perf_env__arch(trace->host->env); + int e; + + for (e = 0; e < stats->max_errno; ++e) { + if (stats->errnos[e] != 0) + fprintf(fp, "\t\t\t\t%s: %d\n", arch_syscalls__strerrno(arch_name, e + 1), stats->errnos[e]); + } + } } } @@ -3858,12 +4424,33 @@ static int parse_pagefaults(const struct option *opt, const char *str, return 0; } -static void evlist__set_evsel_handler(struct evlist *evlist, void *handler) +static void evlist__set_default_evsel_handler(struct evlist *evlist, void *handler) { struct evsel *evsel; - evlist__for_each_entry(evlist, evsel) - evsel->handler = handler; + evlist__for_each_entry(evlist, evsel) { + if (evsel->handler == NULL) + evsel->handler = handler; + } +} + +static void evsel__set_syscall_arg_fmt(struct evsel *evsel, const char *name) +{ + struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel); + + if (fmt) { + struct syscall_fmt *scfmt = syscall_fmt__find(name); + + if (scfmt) { + int skip = 0; + + if (strcmp(evsel->tp_format->format.fields->name, "__syscall_nr") == 0 || + strcmp(evsel->tp_format->format.fields->name, "nr") == 0) + ++skip; + + memcpy(fmt + skip, scfmt->arg, (evsel->tp_format->format.nr_fields - skip) * sizeof(*fmt)); + } + } } static int evlist__set_syscall_tp_fields(struct evlist *evlist) @@ -3874,22 +4461,28 @@ static int evlist__set_syscall_tp_fields(struct evlist *evlist) if (evsel->priv || !evsel->tp_format) continue; - if (strcmp(evsel->tp_format->system, "syscalls")) + if (strcmp(evsel->tp_format->system, "syscalls")) { + perf_evsel__init_tp_arg_scnprintf(evsel); continue; + } if (perf_evsel__init_syscall_tp(evsel)) return -1; if (!strncmp(evsel->tp_format->name, "sys_enter_", 10)) { - struct syscall_tp *sc = evsel->priv; + struct syscall_tp *sc = __evsel__syscall_tp(evsel); if (__tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64))) return -1; + + evsel__set_syscall_arg_fmt(evsel, evsel->tp_format->name + sizeof("sys_enter_") - 1); } else if (!strncmp(evsel->tp_format->name, "sys_exit_", 9)) { - struct syscall_tp *sc = evsel->priv; + struct syscall_tp *sc = __evsel__syscall_tp(evsel); if (__tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap)) return -1; + + evsel__set_syscall_arg_fmt(evsel, evsel->tp_format->name + sizeof("sys_exit_") - 1); } } @@ -4029,15 +4622,11 @@ static int trace__config(const char *var, const char *value, void *arg) int err = 0; if (!strcmp(var, "trace.add_events")) { - struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event", - "event selector. use 'perf list' to list available events", - parse_events_option); - /* - * We can't propagate parse_event_option() return, as it is 1 - * for failure while perf_config() expects -1. - */ - if (parse_events_option(&o, value, 0)) - err = -1; + trace->perfconfig_events = strdup(value); + if (trace->perfconfig_events == NULL) { + pr_err("Not enough memory for %s\n", "trace.add_events"); + return -1; + } } else if (!strcmp(var, "trace.show_timestamp")) { trace->show_tstamp = perf_config_bool(var, value); } else if (!strcmp(var, "trace.show_duration")) { @@ -4061,6 +4650,11 @@ static int trace__config(const char *var, const char *value, void *arg) int args_alignment = 0; if (perf_config_int(&args_alignment, var, value) == 0) trace->args_alignment = args_alignment; + } else if (!strcmp(var, "trace.tracepoint_beautifiers")) { + if (strcasecmp(value, "libtraceevent") == 0) + trace->libtraceevent_print = true; + else if (strcasecmp(value, "libbeauty") == 0) + trace->libtraceevent_print = false; } out: return err; @@ -4103,6 +4697,8 @@ int cmd_trace(int argc, const char **argv) OPT_CALLBACK('e', "event", &trace, "event", "event/syscall selector. use 'perf list' to list available events", trace__parse_events_option), + OPT_CALLBACK(0, "filter", &trace.evlist, "filter", + "event filter", parse_filter), OPT_BOOLEAN(0, "comm", &trace.show_comm, "show the thread COMM next to its id"), OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"), @@ -4143,6 +4739,8 @@ int cmd_trace(int argc, const char **argv) "Show only syscall summary with statistics"), OPT_BOOLEAN('S', "with-summary", &trace.summary, "Show all syscalls and summary with statistics"), + OPT_BOOLEAN(0, "errno-summary", &trace.errno_summary, + "Show errno stats per syscall, use with -s or -S"), OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min", "Trace pagefaults", parse_pagefaults, "maj"), OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"), @@ -4150,6 +4748,8 @@ int cmd_trace(int argc, const char **argv) OPT_CALLBACK(0, "call-graph", &trace.opts, "record_mode[,record_size]", record_callchain_help, &record_parse_callchain_opt), + OPT_BOOLEAN(0, "libtraceevent_print", &trace.libtraceevent_print, + "Use libtraceevent to print the tracepoint arguments."), OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains, "Show the kernel callchains on the syscall exit path"), OPT_ULONG(0, "max-events", &trace.max_events, @@ -4210,6 +4810,37 @@ int cmd_trace(int argc, const char **argv) argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands, trace_usage, PARSE_OPT_STOP_AT_NON_OPTION); + /* + * Here we already passed thru trace__parse_events_option() and it has + * already figured out if -e syscall_name, if not but if --event + * foo:bar was used, the user is interested _just_ in those, say, + * tracepoint events, not in the strace-like syscall-name-based mode. + * + * This is important because we need to check if strace-like mode is + * needed to decided if we should filter out the eBPF + * __augmented_syscalls__ code, if it is in the mix, say, via + * .perfconfig trace.add_events, and filter those out. + */ + if (!trace.trace_syscalls && !trace.trace_pgfaults && + trace.evlist->core.nr_entries == 0 /* Was --events used? */) { + trace.trace_syscalls = true; + } + /* + * Now that we have --verbose figured out, lets see if we need to parse + * events from .perfconfig, so that if those events fail parsing, say some + * BPF program fails, then we'll be able to use --verbose to see what went + * wrong in more detail. + */ + if (trace.perfconfig_events != NULL) { + struct parse_events_error parse_err = { .idx = 0, }; + + err = parse_events(trace.evlist, trace.perfconfig_events, &parse_err); + if (err) { + parse_events_print_error(&parse_err, trace.perfconfig_events); + goto out; + } + } + if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) { usage_with_options_msg(trace_usage, trace_options, "cgroup monitoring only available in system-wide mode"); @@ -4238,9 +4869,45 @@ int cmd_trace(int argc, const char **argv) trace.bpf_obj = evsel->bpf_obj; - trace__set_bpf_map_filtered_pids(&trace); - trace__set_bpf_map_syscalls(&trace); - trace.syscalls.unaugmented_prog = trace__find_bpf_program_by_title(&trace, "!raw_syscalls:unaugmented"); + /* + * If we have _just_ the augmenter event but don't have a + * explicit --syscalls, then assume we want all strace-like + * syscalls: + */ + if (!trace.trace_syscalls && trace__only_augmented_syscalls_evsels(&trace)) + trace.trace_syscalls = true; + /* + * So, if we have a syscall augmenter, but trace_syscalls, aka + * strace-like syscall tracing is not set, then we need to trow + * away the augmenter, i.e. all the events that were created + * from that BPF object file. + * + * This is more to fix the current .perfconfig trace.add_events + * style of setting up the strace-like eBPF based syscall point + * payload augmenter. + * + * All this complexity will be avoided by adding an alternative + * to trace.add_events in the form of + * trace.bpf_augmented_syscalls, that will be only parsed if we + * need it. + * + * .perfconfig trace.add_events is still useful if we want, for + * instance, have msr_write.msr in some .perfconfig profile based + * 'perf trace --config determinism.profile' mode, where for some + * particular goal/workload type we want a set of events and + * output mode (with timings, etc) instead of having to add + * all via the command line. + * + * Also --config to specify an alternate .perfconfig file needs + * to be implemented. + */ + if (!trace.trace_syscalls) { + trace__delete_augmented_syscalls(&trace); + } else { + trace__set_bpf_map_filtered_pids(&trace); + trace__set_bpf_map_syscalls(&trace); + trace.syscalls.unaugmented_prog = trace__find_bpf_program_by_title(&trace, "!raw_syscalls:unaugmented"); + } } err = bpf__setup_stdout(trace.evlist); @@ -4287,7 +4954,7 @@ int cmd_trace(int argc, const char **argv) } if (trace.evlist->core.nr_entries > 0) { - evlist__set_evsel_handler(trace.evlist, trace__event_handler); + evlist__set_default_evsel_handler(trace.evlist, trace__event_handler); if (evlist__set_syscall_tp_fields(trace.evlist)) { perror("failed to set syscalls:* tracepoint fields"); goto out; @@ -4348,7 +5015,7 @@ int cmd_trace(int argc, const char **argv) init_augmented_syscall_tp: if (perf_evsel__init_augmented_syscall_tp(evsel, evsel)) goto out; - sc = evsel->priv; + sc = __evsel__syscall_tp(evsel); /* * For now with BPF raw_augmented we hook into * raw_syscalls:sys_enter and there we get all @@ -4379,15 +5046,14 @@ init_augmented_syscall_tp: if ((argc >= 1) && (strcmp(argv[0], "record") == 0)) return trace__record(&trace, argc-1, &argv[1]); + /* Using just --errno-summary will trigger --summary */ + if (trace.errno_summary && !trace.summary && !trace.summary_only) + trace.summary_only = true; + /* summary_only implies summary option, but don't overwrite summary if set */ if (trace.summary_only) trace.summary = trace.summary_only; - if (!trace.trace_syscalls && !trace.trace_pgfaults && - trace.evlist->core.nr_entries == 0 /* Was --events used? */) { - trace.trace_syscalls = true; - } - if (output_name != NULL) { err = trace__open_output(&trace, output_name); if (err < 0) { @@ -4426,5 +5092,6 @@ out_close: if (output_name != NULL) fclose(trace.output); out: + zfree(&trace.perfconfig_events); return err; } diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index cea13cb987d0..48290a0c917c 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -28,6 +28,8 @@ arch/x86/include/asm/disabled-features.h arch/x86/include/asm/required-features.h arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/inat_types.h +arch/x86/include/asm/irq_vectors.h +arch/x86/include/asm/msr-index.h arch/x86/include/uapi/asm/prctl.h arch/x86/lib/x86-opcode-map.txt arch/x86/tools/gen-insn-attr-x86.awk diff --git a/tools/perf/lib/Build b/tools/perf/lib/Build index c31f1c111f8f..2ef9a4ec6d99 100644 --- a/tools/perf/lib/Build +++ b/tools/perf/lib/Build @@ -3,6 +3,7 @@ libperf-y += cpumap.o libperf-y += threadmap.o libperf-y += evsel.o libperf-y += evlist.o +libperf-y += mmap.o libperf-y += zalloc.o libperf-y += xyarray.o libperf-y += lib.o diff --git a/tools/perf/lib/Makefile b/tools/perf/lib/Makefile index 85ccb8c439a4..0f233638ef1f 100644 --- a/tools/perf/lib/Makefile +++ b/tools/perf/lib/Makefile @@ -107,6 +107,7 @@ else endif LIBAPI = $(API_PATH)libapi.a +export LIBAPI $(LIBAPI): FORCE $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a @@ -172,8 +173,9 @@ install_headers: $(call do_install,include/perf/cpumap.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/threadmap.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/evlist.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/evsel.h,$(prefix)/include/perf,644); - $(call do_install,include/perf/event.h,$(prefix)/include/perf,644); + $(call do_install,include/perf/evsel.h,$(prefix)/include/perf,644); \ + $(call do_install,include/perf/event.h,$(prefix)/include/perf,644); \ + $(call do_install,include/perf/mmap.h,$(prefix)/include/perf,644); install_pkgconfig: $(LIBPERF_PC) $(call QUIET_INSTALL, $(LIBPERF_PC)) \ diff --git a/tools/perf/lib/core.c b/tools/perf/lib/core.c index d0b9ae422b9f..58fc894b76c5 100644 --- a/tools/perf/lib/core.c +++ b/tools/perf/lib/core.c @@ -5,11 +5,12 @@ #include <stdio.h> #include <stdarg.h> #include <unistd.h> +#include <linux/compiler.h> #include <perf/core.h> #include <internal/lib.h> #include "internal.h" -static int __base_pr(enum libperf_print_level level, const char *format, +static int __base_pr(enum libperf_print_level level __maybe_unused, const char *format, va_list args) { return vfprintf(stderr, format, args); diff --git a/tools/perf/lib/evlist.c b/tools/perf/lib/evlist.c index d1496fee810c..205ddbb80bc1 100644 --- a/tools/perf/lib/evlist.c +++ b/tools/perf/lib/evlist.c @@ -8,13 +8,20 @@ #include <internal/evlist.h> #include <internal/evsel.h> #include <internal/xyarray.h> +#include <internal/mmap.h> +#include <internal/cpumap.h> +#include <internal/threadmap.h> +#include <internal/xyarray.h> +#include <internal/lib.h> #include <linux/zalloc.h> +#include <sys/ioctl.h> #include <stdlib.h> #include <errno.h> #include <unistd.h> #include <fcntl.h> #include <signal.h> #include <poll.h> +#include <sys/mman.h> #include <perf/cpumap.h> #include <perf/threadmap.h> #include <api/fd/array.h> @@ -27,6 +34,7 @@ void perf_evlist__init(struct perf_evlist *evlist) INIT_HLIST_HEAD(&evlist->heads[i]); INIT_LIST_HEAD(&evlist->entries); evlist->nr_entries = 0; + fdarray__init(&evlist->pollfd, 64); } static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, @@ -101,8 +109,36 @@ perf_evlist__next(struct perf_evlist *evlist, struct perf_evsel *prev) return next; } +static void perf_evlist__purge(struct perf_evlist *evlist) +{ + struct perf_evsel *pos, *n; + + perf_evlist__for_each_entry_safe(evlist, n, pos) { + list_del_init(&pos->node); + perf_evsel__delete(pos); + } + + evlist->nr_entries = 0; +} + +void perf_evlist__exit(struct perf_evlist *evlist) +{ + perf_cpu_map__put(evlist->cpus); + perf_thread_map__put(evlist->threads); + evlist->cpus = NULL; + evlist->threads = NULL; + fdarray__exit(&evlist->pollfd); +} + void perf_evlist__delete(struct perf_evlist *evlist) { + if (evlist == NULL) + return; + + perf_evlist__munmap(evlist); + perf_evlist__close(evlist); + perf_evlist__purge(evlist); + perf_evlist__exit(evlist); free(evlist); } @@ -277,7 +313,328 @@ int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, return pos; } +static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, + void *arg __maybe_unused) +{ + struct perf_mmap *map = fda->priv[fd].ptr; + + if (map) + perf_mmap__put(map); +} + +int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) +{ + return fdarray__filter(&evlist->pollfd, revents_and_mask, + perf_evlist__munmap_filtered, NULL); +} + int perf_evlist__poll(struct perf_evlist *evlist, int timeout) { return fdarray__poll(&evlist->pollfd, timeout); } + +static struct perf_mmap* perf_evlist__alloc_mmap(struct perf_evlist *evlist, bool overwrite) +{ + int i; + struct perf_mmap *map; + + map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); + if (!map) + return NULL; + + for (i = 0; i < evlist->nr_mmaps; i++) { + struct perf_mmap *prev = i ? &map[i - 1] : NULL; + + /* + * When the perf_mmap() call is made we grab one refcount, plus + * one extra to let perf_mmap__consume() get the last + * events after all real references (perf_mmap__get()) are + * dropped. + * + * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and + * thus does perf_mmap__get() on it. + */ + perf_mmap__init(&map[i], prev, overwrite, NULL); + } + + return map; +} + +static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, + struct perf_evsel *evsel, int idx, int cpu, + int thread) +{ + struct perf_sample_id *sid = SID(evsel, cpu, thread); + + sid->idx = idx; + if (evlist->cpus && cpu >= 0) + sid->cpu = evlist->cpus->map[cpu]; + else + sid->cpu = -1; + if (!evsel->system_wide && evlist->threads && thread >= 0) + sid->tid = perf_thread_map__pid(evlist->threads, thread); + else + sid->tid = -1; +} + +static struct perf_mmap* +perf_evlist__mmap_cb_get(struct perf_evlist *evlist, bool overwrite, int idx) +{ + struct perf_mmap *maps; + + maps = overwrite ? evlist->mmap_ovw : evlist->mmap; + + if (!maps) { + maps = perf_evlist__alloc_mmap(evlist, overwrite); + if (!maps) + return NULL; + + if (overwrite) + evlist->mmap_ovw = maps; + else + evlist->mmap = maps; + } + + return &maps[idx]; +} + +#define FD(e, x, y) (*(int *) xyarray__entry(e->fd, x, y)) + +static int +perf_evlist__mmap_cb_mmap(struct perf_mmap *map, struct perf_mmap_param *mp, + int output, int cpu) +{ + return perf_mmap__mmap(map, mp, output, cpu); +} + +static void perf_evlist__set_mmap_first(struct perf_evlist *evlist, struct perf_mmap *map, + bool overwrite) +{ + if (overwrite) + evlist->mmap_ovw_first = map; + else + evlist->mmap_first = map; +} + +static int +mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, + int idx, struct perf_mmap_param *mp, int cpu_idx, + int thread, int *_output, int *_output_overwrite) +{ + int evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx); + struct perf_evsel *evsel; + int revent; + + perf_evlist__for_each_entry(evlist, evsel) { + bool overwrite = evsel->attr.write_backward; + struct perf_mmap *map; + int *output, fd, cpu; + + if (evsel->system_wide && thread) + continue; + + cpu = perf_cpu_map__idx(evsel->cpus, evlist_cpu); + if (cpu == -1) + continue; + + map = ops->get(evlist, overwrite, idx); + if (map == NULL) + return -ENOMEM; + + if (overwrite) { + mp->prot = PROT_READ; + output = _output_overwrite; + } else { + mp->prot = PROT_READ | PROT_WRITE; + output = _output; + } + + fd = FD(evsel, cpu, thread); + + if (*output == -1) { + *output = fd; + + /* + * The last one will be done at perf_mmap__consume(), so that we + * make sure we don't prevent tools from consuming every last event in + * the ring buffer. + * + * I.e. we can get the POLLHUP meaning that the fd doesn't exist + * anymore, but the last events for it are still in the ring buffer, + * waiting to be consumed. + * + * Tools can chose to ignore this at their own discretion, but the + * evlist layer can't just drop it when filtering events in + * perf_evlist__filter_pollfd(). + */ + refcount_set(&map->refcnt, 2); + + if (ops->mmap(map, mp, *output, evlist_cpu) < 0) + return -1; + + if (!idx) + perf_evlist__set_mmap_first(evlist, map, overwrite); + } else { + if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) + return -1; + + perf_mmap__get(map); + } + + revent = !overwrite ? POLLIN : 0; + + if (!evsel->system_wide && + perf_evlist__add_pollfd(evlist, fd, map, revent) < 0) { + perf_mmap__put(map); + return -1; + } + + if (evsel->attr.read_format & PERF_FORMAT_ID) { + if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, + fd) < 0) + return -1; + perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, + thread); + } + } + + return 0; +} + +static int +mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, + struct perf_mmap_param *mp) +{ + int thread; + int nr_threads = perf_thread_map__nr(evlist->threads); + + for (thread = 0; thread < nr_threads; thread++) { + int output = -1; + int output_overwrite = -1; + + if (ops->idx) + ops->idx(evlist, mp, thread, false); + + if (mmap_per_evsel(evlist, ops, thread, mp, 0, thread, + &output, &output_overwrite)) + goto out_unmap; + } + + return 0; + +out_unmap: + perf_evlist__munmap(evlist); + return -1; +} + +static int +mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, + struct perf_mmap_param *mp) +{ + int nr_threads = perf_thread_map__nr(evlist->threads); + int nr_cpus = perf_cpu_map__nr(evlist->cpus); + int cpu, thread; + + for (cpu = 0; cpu < nr_cpus; cpu++) { + int output = -1; + int output_overwrite = -1; + + if (ops->idx) + ops->idx(evlist, mp, cpu, true); + + for (thread = 0; thread < nr_threads; thread++) { + if (mmap_per_evsel(evlist, ops, cpu, mp, cpu, + thread, &output, &output_overwrite)) + goto out_unmap; + } + } + + return 0; + +out_unmap: + perf_evlist__munmap(evlist); + return -1; +} + +static int perf_evlist__nr_mmaps(struct perf_evlist *evlist) +{ + int nr_mmaps; + + nr_mmaps = perf_cpu_map__nr(evlist->cpus); + if (perf_cpu_map__empty(evlist->cpus)) + nr_mmaps = perf_thread_map__nr(evlist->threads); + + return nr_mmaps; +} + +int perf_evlist__mmap_ops(struct perf_evlist *evlist, + struct perf_evlist_mmap_ops *ops, + struct perf_mmap_param *mp) +{ + struct perf_evsel *evsel; + const struct perf_cpu_map *cpus = evlist->cpus; + const struct perf_thread_map *threads = evlist->threads; + + if (!ops || !ops->get || !ops->mmap) + return -EINVAL; + + mp->mask = evlist->mmap_len - page_size - 1; + + evlist->nr_mmaps = perf_evlist__nr_mmaps(evlist); + + perf_evlist__for_each_entry(evlist, evsel) { + if ((evsel->attr.read_format & PERF_FORMAT_ID) && + evsel->sample_id == NULL && + perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0) + return -ENOMEM; + } + + if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) + return -ENOMEM; + + if (perf_cpu_map__empty(cpus)) + return mmap_per_thread(evlist, ops, mp); + + return mmap_per_cpu(evlist, ops, mp); +} + +int perf_evlist__mmap(struct perf_evlist *evlist, int pages) +{ + struct perf_mmap_param mp; + struct perf_evlist_mmap_ops ops = { + .get = perf_evlist__mmap_cb_get, + .mmap = perf_evlist__mmap_cb_mmap, + }; + + evlist->mmap_len = (pages + 1) * page_size; + + return perf_evlist__mmap_ops(evlist, &ops, &mp); +} + +void perf_evlist__munmap(struct perf_evlist *evlist) +{ + int i; + + if (evlist->mmap) { + for (i = 0; i < evlist->nr_mmaps; i++) + perf_mmap__munmap(&evlist->mmap[i]); + } + + if (evlist->mmap_ovw) { + for (i = 0; i < evlist->nr_mmaps; i++) + perf_mmap__munmap(&evlist->mmap_ovw[i]); + } + + zfree(&evlist->mmap); + zfree(&evlist->mmap_ovw); +} + +struct perf_mmap* +perf_evlist__next_mmap(struct perf_evlist *evlist, struct perf_mmap *map, + bool overwrite) +{ + if (map) + return map->next; + + return overwrite ? evlist->mmap_ovw_first : evlist->mmap_first; +} diff --git a/tools/perf/lib/include/internal/evlist.h b/tools/perf/lib/include/internal/evlist.h index 9f440ab12b76..a2fbccf1922f 100644 --- a/tools/perf/lib/include/internal/evlist.h +++ b/tools/perf/lib/include/internal/evlist.h @@ -11,6 +11,7 @@ struct perf_cpu_map; struct perf_thread_map; +struct perf_mmap_param; struct perf_evlist { struct list_head entries; @@ -22,12 +23,36 @@ struct perf_evlist { size_t mmap_len; struct fdarray pollfd; struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; + struct perf_mmap *mmap; + struct perf_mmap *mmap_ovw; + struct perf_mmap *mmap_first; + struct perf_mmap *mmap_ovw_first; +}; + +typedef void +(*perf_evlist_mmap__cb_idx_t)(struct perf_evlist*, struct perf_mmap_param*, int, bool); +typedef struct perf_mmap* +(*perf_evlist_mmap__cb_get_t)(struct perf_evlist*, bool, int); +typedef int +(*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, int); + +struct perf_evlist_mmap_ops { + perf_evlist_mmap__cb_idx_t idx; + perf_evlist_mmap__cb_get_t get; + perf_evlist_mmap__cb_mmap_t mmap; }; int perf_evlist__alloc_pollfd(struct perf_evlist *evlist); int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, void *ptr, short revent); +int perf_evlist__mmap_ops(struct perf_evlist *evlist, + struct perf_evlist_mmap_ops *ops, + struct perf_mmap_param *mp); + +void perf_evlist__init(struct perf_evlist *evlist); +void perf_evlist__exit(struct perf_evlist *evlist); + /** * __perf_evlist__for_each_entry - iterate thru all the evsels * @list: list_head instance to iterate @@ -60,6 +85,24 @@ int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, #define perf_evlist__for_each_entry_reverse(evlist, evsel) \ __perf_evlist__for_each_entry_reverse(&(evlist)->entries, evsel) +/** + * __perf_evlist__for_each_entry_safe - safely iterate thru all the evsels + * @list: list_head instance to iterate + * @tmp: struct evsel temp iterator + * @evsel: struct evsel iterator + */ +#define __perf_evlist__for_each_entry_safe(list, tmp, evsel) \ + list_for_each_entry_safe(evsel, tmp, list, node) + +/** + * perf_evlist__for_each_entry_safe - safely iterate thru all the evsels + * @evlist: evlist instance to iterate + * @evsel: struct evsel iterator + * @tmp: struct evsel temp iterator + */ +#define perf_evlist__for_each_entry_safe(evlist, tmp, evsel) \ + __perf_evlist__for_each_entry_safe(&(evlist)->entries, tmp, evsel) + static inline struct perf_evsel *perf_evlist__first(struct perf_evlist *evlist) { return list_entry(evlist->entries.next, struct perf_evsel, node); diff --git a/tools/perf/lib/include/internal/evsel.h b/tools/perf/lib/include/internal/evsel.h index a69b8299c36f..1ffd083b235e 100644 --- a/tools/perf/lib/include/internal/evsel.h +++ b/tools/perf/lib/include/internal/evsel.h @@ -50,6 +50,7 @@ struct perf_evsel { bool system_wide; }; +void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr); int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads); void perf_evsel__close_fd(struct perf_evsel *evsel); void perf_evsel__free_fd(struct perf_evsel *evsel); diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h index ba1e519c15b9..be7556e0a2b2 100644 --- a/tools/perf/lib/include/internal/mmap.h +++ b/tools/perf/lib/include/internal/mmap.h @@ -10,23 +10,46 @@ /* perf sample has 16 bits size limit */ #define PERF_SAMPLE_MAX_SIZE (1 << 16) +struct perf_mmap; + +typedef void (*libperf_unmap_cb_t)(struct perf_mmap *map); + /** * struct perf_mmap - perf's ring buffer mmap details * * @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this */ struct perf_mmap { - void *base; - int mask; - int fd; - int cpu; - refcount_t refcnt; - u64 prev; - u64 start; - u64 end; - bool overwrite; - u64 flush; - char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); + void *base; + int mask; + int fd; + int cpu; + refcount_t refcnt; + u64 prev; + u64 start; + u64 end; + bool overwrite; + u64 flush; + libperf_unmap_cb_t unmap_cb; + char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); + struct perf_mmap *next; +}; + +struct perf_mmap_param { + int prot; + int mask; }; +size_t perf_mmap__mmap_len(struct perf_mmap *map); + +void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev, + bool overwrite, libperf_unmap_cb_t unmap_cb); +int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, + int fd, int cpu); +void perf_mmap__munmap(struct perf_mmap *map); +void perf_mmap__get(struct perf_mmap *map); +void perf_mmap__put(struct perf_mmap *map); + +u64 perf_mmap__read_head(struct perf_mmap *map); + #endif /* __LIBPERF_INTERNAL_MMAP_H */ diff --git a/tools/perf/lib/include/internal/tests.h b/tools/perf/lib/include/internal/tests.h index b7a20cd24ee1..2093e8868a67 100644 --- a/tools/perf/lib/include/internal/tests.h +++ b/tools/perf/lib/include/internal/tests.h @@ -4,14 +4,28 @@ #include <stdio.h> -#define __T_START fprintf(stdout, "- running %s...", __FILE__) -#define __T_OK fprintf(stdout, "OK\n") -#define __T_FAIL fprintf(stdout, "FAIL\n") +int tests_failed; + +#define __T_START \ +do { \ + fprintf(stdout, "- running %s...", __FILE__); \ + fflush(NULL); \ + tests_failed = 0; \ +} while (0) + +#define __T_END \ +do { \ + if (tests_failed) \ + fprintf(stdout, " FAILED (%d)\n", tests_failed); \ + else \ + fprintf(stdout, "OK\n"); \ +} while (0) #define __T(text, cond) \ do { \ if (!(cond)) { \ fprintf(stderr, "FAILED %s:%d %s\n", __FILE__, __LINE__, text); \ + tests_failed++; \ return -1; \ } \ } while (0) diff --git a/tools/perf/lib/include/perf/core.h b/tools/perf/lib/include/perf/core.h index cfd70e720c1c..a3f6d68edad7 100644 --- a/tools/perf/lib/include/perf/core.h +++ b/tools/perf/lib/include/perf/core.h @@ -9,9 +9,12 @@ #endif enum libperf_print_level { + LIBPERF_ERR, LIBPERF_WARN, LIBPERF_INFO, LIBPERF_DEBUG, + LIBPERF_DEBUG2, + LIBPERF_DEBUG3, }; typedef int (*libperf_print_fn_t)(enum libperf_print_level level, diff --git a/tools/perf/lib/include/perf/evlist.h b/tools/perf/lib/include/perf/evlist.h index 8a2ce0757ab2..0a7479dc13bf 100644 --- a/tools/perf/lib/include/perf/evlist.h +++ b/tools/perf/lib/include/perf/evlist.h @@ -3,13 +3,13 @@ #define __LIBPERF_EVLIST_H #include <perf/core.h> +#include <stdbool.h> struct perf_evlist; struct perf_evsel; struct perf_cpu_map; struct perf_thread_map; -LIBPERF_API void perf_evlist__init(struct perf_evlist *evlist); LIBPERF_API void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *evsel); LIBPERF_API void perf_evlist__remove(struct perf_evlist *evlist, @@ -32,5 +32,18 @@ LIBPERF_API void perf_evlist__set_maps(struct perf_evlist *evlist, struct perf_cpu_map *cpus, struct perf_thread_map *threads); LIBPERF_API int perf_evlist__poll(struct perf_evlist *evlist, int timeout); +LIBPERF_API int perf_evlist__filter_pollfd(struct perf_evlist *evlist, + short revents_and_mask); + +LIBPERF_API int perf_evlist__mmap(struct perf_evlist *evlist, int pages); +LIBPERF_API void perf_evlist__munmap(struct perf_evlist *evlist); + +LIBPERF_API struct perf_mmap *perf_evlist__next_mmap(struct perf_evlist *evlist, + struct perf_mmap *map, + bool overwrite); +#define perf_evlist__for_each_mmap(evlist, pos, overwrite) \ + for ((pos) = perf_evlist__next_mmap((evlist), NULL, overwrite); \ + (pos) != NULL; \ + (pos) = perf_evlist__next_mmap((evlist), (pos), overwrite)) #endif /* __LIBPERF_EVLIST_H */ diff --git a/tools/perf/lib/include/perf/evsel.h b/tools/perf/lib/include/perf/evsel.h index 4388667f265c..557f5815a9c9 100644 --- a/tools/perf/lib/include/perf/evsel.h +++ b/tools/perf/lib/include/perf/evsel.h @@ -21,8 +21,6 @@ struct perf_counts_values { }; }; -LIBPERF_API void perf_evsel__init(struct perf_evsel *evsel, - struct perf_event_attr *attr); LIBPERF_API struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr); LIBPERF_API void perf_evsel__delete(struct perf_evsel *evsel); LIBPERF_API int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, diff --git a/tools/perf/lib/include/perf/mmap.h b/tools/perf/lib/include/perf/mmap.h new file mode 100644 index 000000000000..9508ad90d8b9 --- /dev/null +++ b/tools/perf/lib/include/perf/mmap.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LIBPERF_MMAP_H +#define __LIBPERF_MMAP_H + +#include <perf/core.h> + +struct perf_mmap; +union perf_event; + +LIBPERF_API void perf_mmap__consume(struct perf_mmap *map); +LIBPERF_API int perf_mmap__read_init(struct perf_mmap *map); +LIBPERF_API void perf_mmap__read_done(struct perf_mmap *map); +LIBPERF_API union perf_event *perf_mmap__read_event(struct perf_mmap *map); + +#endif /* __LIBPERF_MMAP_H */ diff --git a/tools/perf/lib/internal.h b/tools/perf/lib/internal.h index dc92f241732e..2c27e158de6b 100644 --- a/tools/perf/lib/internal.h +++ b/tools/perf/lib/internal.h @@ -2,6 +2,8 @@ #ifndef __LIBPERF_INTERNAL_H #define __LIBPERF_INTERNAL_H +#include <perf/core.h> + void libperf_print(enum libperf_print_level level, const char *format, ...) __attribute__((format(printf, 2, 3))); @@ -11,8 +13,11 @@ do { \ libperf_print(level, "libperf: " fmt, ##__VA_ARGS__); \ } while (0) +#define pr_err(fmt, ...) __pr(LIBPERF_ERR, fmt, ##__VA_ARGS__) #define pr_warning(fmt, ...) __pr(LIBPERF_WARN, fmt, ##__VA_ARGS__) #define pr_info(fmt, ...) __pr(LIBPERF_INFO, fmt, ##__VA_ARGS__) #define pr_debug(fmt, ...) __pr(LIBPERF_DEBUG, fmt, ##__VA_ARGS__) +#define pr_debug2(fmt, ...) __pr(LIBPERF_DEBUG2, fmt, ##__VA_ARGS__) +#define pr_debug3(fmt, ...) __pr(LIBPERF_DEBUG3, fmt, ##__VA_ARGS__) #endif /* __LIBPERF_INTERNAL_H */ diff --git a/tools/perf/lib/libperf.map b/tools/perf/lib/libperf.map index ab8dbde1136c..7be1af8a546c 100644 --- a/tools/perf/lib/libperf.map +++ b/tools/perf/lib/libperf.map @@ -21,7 +21,6 @@ LIBPERF_0.0.1 { perf_evsel__delete; perf_evsel__enable; perf_evsel__disable; - perf_evsel__init; perf_evsel__open; perf_evsel__close; perf_evsel__read; @@ -34,12 +33,19 @@ LIBPERF_0.0.1 { perf_evlist__close; perf_evlist__enable; perf_evlist__disable; - perf_evlist__init; perf_evlist__add; perf_evlist__remove; perf_evlist__next; perf_evlist__set_maps; perf_evlist__poll; + perf_evlist__mmap; + perf_evlist__munmap; + perf_evlist__filter_pollfd; + perf_evlist__next_mmap; + perf_mmap__consume; + perf_mmap__read_init; + perf_mmap__read_done; + perf_mmap__read_event; local: *; }; diff --git a/tools/perf/lib/mmap.c b/tools/perf/lib/mmap.c new file mode 100644 index 000000000000..79d5ed6c38cc --- /dev/null +++ b/tools/perf/lib/mmap.c @@ -0,0 +1,275 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <sys/mman.h> +#include <inttypes.h> +#include <asm/bug.h> +#include <errno.h> +#include <string.h> +#include <linux/ring_buffer.h> +#include <linux/perf_event.h> +#include <perf/mmap.h> +#include <perf/event.h> +#include <internal/mmap.h> +#include <internal/lib.h> +#include <linux/kernel.h> +#include "internal.h" + +void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev, + bool overwrite, libperf_unmap_cb_t unmap_cb) +{ + map->fd = -1; + map->overwrite = overwrite; + map->unmap_cb = unmap_cb; + refcount_set(&map->refcnt, 0); + if (prev) + prev->next = map; +} + +size_t perf_mmap__mmap_len(struct perf_mmap *map) +{ + return map->mask + 1 + page_size; +} + +int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, + int fd, int cpu) +{ + map->prev = 0; + map->mask = mp->mask; + map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot, + MAP_SHARED, fd, 0); + if (map->base == MAP_FAILED) { + map->base = NULL; + return -1; + } + + map->fd = fd; + map->cpu = cpu; + return 0; +} + +void perf_mmap__munmap(struct perf_mmap *map) +{ + if (map && map->base != NULL) { + munmap(map->base, perf_mmap__mmap_len(map)); + map->base = NULL; + map->fd = -1; + refcount_set(&map->refcnt, 0); + } + if (map && map->unmap_cb) + map->unmap_cb(map); +} + +void perf_mmap__get(struct perf_mmap *map) +{ + refcount_inc(&map->refcnt); +} + +void perf_mmap__put(struct perf_mmap *map) +{ + BUG_ON(map->base && refcount_read(&map->refcnt) == 0); + + if (refcount_dec_and_test(&map->refcnt)) + perf_mmap__munmap(map); +} + +static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) +{ + ring_buffer_write_tail(md->base, tail); +} + +u64 perf_mmap__read_head(struct perf_mmap *map) +{ + return ring_buffer_read_head(map->base); +} + +static bool perf_mmap__empty(struct perf_mmap *map) +{ + struct perf_event_mmap_page *pc = map->base; + + return perf_mmap__read_head(map) == map->prev && !pc->aux_size; +} + +void perf_mmap__consume(struct perf_mmap *map) +{ + if (!map->overwrite) { + u64 old = map->prev; + + perf_mmap__write_tail(map, old); + } + + if (refcount_read(&map->refcnt) == 1 && perf_mmap__empty(map)) + perf_mmap__put(map); +} + +static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end) +{ + struct perf_event_header *pheader; + u64 evt_head = *start; + int size = mask + 1; + + pr_debug2("%s: buf=%p, start=%"PRIx64"\n", __func__, buf, *start); + pheader = (struct perf_event_header *)(buf + (*start & mask)); + while (true) { + if (evt_head - *start >= (unsigned int)size) { + pr_debug("Finished reading overwrite ring buffer: rewind\n"); + if (evt_head - *start > (unsigned int)size) + evt_head -= pheader->size; + *end = evt_head; + return 0; + } + + pheader = (struct perf_event_header *)(buf + (evt_head & mask)); + + if (pheader->size == 0) { + pr_debug("Finished reading overwrite ring buffer: get start\n"); + *end = evt_head; + return 0; + } + + evt_head += pheader->size; + pr_debug3("move evt_head: %"PRIx64"\n", evt_head); + } + WARN_ONCE(1, "Shouldn't get here\n"); + return -1; +} + +/* + * Report the start and end of the available data in ringbuffer + */ +static int __perf_mmap__read_init(struct perf_mmap *md) +{ + u64 head = perf_mmap__read_head(md); + u64 old = md->prev; + unsigned char *data = md->base + page_size; + unsigned long size; + + md->start = md->overwrite ? head : old; + md->end = md->overwrite ? old : head; + + if ((md->end - md->start) < md->flush) + return -EAGAIN; + + size = md->end - md->start; + if (size > (unsigned long)(md->mask) + 1) { + if (!md->overwrite) { + WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); + + md->prev = head; + perf_mmap__consume(md); + return -EAGAIN; + } + + /* + * Backward ring buffer is full. We still have a chance to read + * most of data from it. + */ + if (overwrite_rb_find_range(data, md->mask, &md->start, &md->end)) + return -EINVAL; + } + + return 0; +} + +int perf_mmap__read_init(struct perf_mmap *map) +{ + /* + * Check if event was unmapped due to a POLLHUP/POLLERR. + */ + if (!refcount_read(&map->refcnt)) + return -ENOENT; + + return __perf_mmap__read_init(map); +} + +/* + * Mandatory for overwrite mode + * The direction of overwrite mode is backward. + * The last perf_mmap__read() will set tail to map->core.prev. + * Need to correct the map->core.prev to head which is the end of next read. + */ +void perf_mmap__read_done(struct perf_mmap *map) +{ + /* + * Check if event was unmapped due to a POLLHUP/POLLERR. + */ + if (!refcount_read(&map->refcnt)) + return; + + map->prev = perf_mmap__read_head(map); +} + +/* When check_messup is true, 'end' must points to a good entry */ +static union perf_event *perf_mmap__read(struct perf_mmap *map, + u64 *startp, u64 end) +{ + unsigned char *data = map->base + page_size; + union perf_event *event = NULL; + int diff = end - *startp; + + if (diff >= (int)sizeof(event->header)) { + size_t size; + + event = (union perf_event *)&data[*startp & map->mask]; + size = event->header.size; + + if (size < sizeof(event->header) || diff < (int)size) + return NULL; + + /* + * Event straddles the mmap boundary -- header should always + * be inside due to u64 alignment of output. + */ + if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) { + unsigned int offset = *startp; + unsigned int len = min(sizeof(*event), size), cpy; + void *dst = map->event_copy; + + do { + cpy = min(map->mask + 1 - (offset & map->mask), len); + memcpy(dst, &data[offset & map->mask], cpy); + offset += cpy; + dst += cpy; + len -= cpy; + } while (len); + + event = (union perf_event *)map->event_copy; + } + + *startp += size; + } + + return event; +} + +/* + * Read event from ring buffer one by one. + * Return one event for each call. + * + * Usage: + * perf_mmap__read_init() + * while(event = perf_mmap__read_event()) { + * //process the event + * perf_mmap__consume() + * } + * perf_mmap__read_done() + */ +union perf_event *perf_mmap__read_event(struct perf_mmap *map) +{ + union perf_event *event; + + /* + * Check if event was unmapped due to a POLLHUP/POLLERR. + */ + if (!refcount_read(&map->refcnt)) + return NULL; + + /* non-overwirte doesn't pause the ringbuffer */ + if (!map->overwrite) + map->end = perf_mmap__read_head(map); + + event = perf_mmap__read(map, &map->start, map->end); + + if (!map->overwrite) + map->prev = map->start; + + return event; +} diff --git a/tools/perf/lib/tests/Makefile b/tools/perf/lib/tests/Makefile index 1ee4e9ba848b..a43cd08c5c03 100644 --- a/tools/perf/lib/tests/Makefile +++ b/tools/perf/lib/tests/Makefile @@ -16,13 +16,13 @@ all: include $(srctree)/tools/scripts/Makefile.include -INCLUDE = -I$(srctree)/tools/perf/lib/include -I$(srctree)/tools/include +INCLUDE = -I$(srctree)/tools/perf/lib/include -I$(srctree)/tools/include -I$(srctree)/tools/lib $(TESTS_A): FORCE - $(QUIET_LINK)$(CC) $(INCLUDE) $(CFLAGS) -o $@ $(subst -a,.c,$@) ../libperf.a + $(QUIET_LINK)$(CC) $(INCLUDE) $(CFLAGS) -o $@ $(subst -a,.c,$@) ../libperf.a $(LIBAPI) $(TESTS_SO): FORCE - $(QUIET_LINK)$(CC) $(INCLUDE) $(CFLAGS) -L.. -o $@ $(subst -so,.c,$@) -lperf + $(QUIET_LINK)$(CC) $(INCLUDE) $(CFLAGS) -L.. -o $@ $(subst -so,.c,$@) $(LIBAPI) -lperf all: $(TESTS_A) $(TESTS_SO) diff --git a/tools/perf/lib/tests/test-cpumap.c b/tools/perf/lib/tests/test-cpumap.c index aa34c20df07e..c8d45091e7c2 100644 --- a/tools/perf/lib/tests/test-cpumap.c +++ b/tools/perf/lib/tests/test-cpumap.c @@ -26,6 +26,6 @@ int main(int argc, char **argv) perf_cpu_map__put(cpus); perf_cpu_map__put(cpus); - __T_OK; + __T_END; return 0; } diff --git a/tools/perf/lib/tests/test-evlist.c b/tools/perf/lib/tests/test-evlist.c index e6b2ab2e2bde..6d8ebe0c2504 100644 --- a/tools/perf/lib/tests/test-evlist.c +++ b/tools/perf/lib/tests/test-evlist.c @@ -1,12 +1,23 @@ // SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE // needed for sched.h to get sched_[gs]etaffinity and CPU_(ZERO,SET) +#include <sched.h> #include <stdio.h> #include <stdarg.h> +#include <unistd.h> +#include <stdlib.h> #include <linux/perf_event.h> +#include <linux/limits.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/prctl.h> #include <perf/cpumap.h> #include <perf/threadmap.h> #include <perf/evlist.h> #include <perf/evsel.h> +#include <perf/mmap.h> +#include <perf/event.h> #include <internal/tests.h> +#include <api/fs/fs.h> static int libperf_print(enum libperf_print_level level, const char *fmt, va_list ap) @@ -181,6 +192,210 @@ static int test_stat_thread_enable(void) return 0; } +static int test_mmap_thread(void) +{ + struct perf_evlist *evlist; + struct perf_evsel *evsel; + struct perf_mmap *map; + struct perf_cpu_map *cpus; + struct perf_thread_map *threads; + struct perf_event_attr attr = { + .type = PERF_TYPE_TRACEPOINT, + .sample_period = 1, + .wakeup_watermark = 1, + .disabled = 1, + }; + char path[PATH_MAX]; + int id, err, pid, go_pipe[2]; + union perf_event *event; + char bf; + int count = 0; + + snprintf(path, PATH_MAX, "%s/kernel/debug/tracing/events/syscalls/sys_enter_prctl/id", + sysfs__mountpoint()); + + if (filename__read_int(path, &id)) { + fprintf(stderr, "error: failed to get tracepoint id: %s\n", path); + return -1; + } + + attr.config = id; + + err = pipe(go_pipe); + __T("failed to create pipe", err == 0); + + fflush(NULL); + + pid = fork(); + if (!pid) { + int i; + + read(go_pipe[0], &bf, 1); + + /* Generate 100 prctl calls. */ + for (i = 0; i < 100; i++) + prctl(0, 0, 0, 0, 0); + + exit(0); + } + + threads = perf_thread_map__new_dummy(); + __T("failed to create threads", threads); + + cpus = perf_cpu_map__dummy_new(); + __T("failed to create cpus", cpus); + + perf_thread_map__set_pid(threads, 0, pid); + + evlist = perf_evlist__new(); + __T("failed to create evlist", evlist); + + evsel = perf_evsel__new(&attr); + __T("failed to create evsel1", evsel); + + perf_evlist__add(evlist, evsel); + + perf_evlist__set_maps(evlist, cpus, threads); + + err = perf_evlist__open(evlist); + __T("failed to open evlist", err == 0); + + err = perf_evlist__mmap(evlist, 4); + __T("failed to mmap evlist", err == 0); + + perf_evlist__enable(evlist); + + /* kick the child and wait for it to finish */ + write(go_pipe[1], &bf, 1); + waitpid(pid, NULL, 0); + + /* + * There's no need to call perf_evlist__disable, + * monitored process is dead now. + */ + + perf_evlist__for_each_mmap(evlist, map, false) { + if (perf_mmap__read_init(map) < 0) + continue; + + while ((event = perf_mmap__read_event(map)) != NULL) { + count++; + perf_mmap__consume(map); + } + + perf_mmap__read_done(map); + } + + /* calls perf_evlist__munmap/perf_evlist__close */ + perf_evlist__delete(evlist); + + perf_thread_map__put(threads); + perf_cpu_map__put(cpus); + + /* + * The generated prctl calls should match the + * number of events in the buffer. + */ + __T("failed count", count == 100); + + return 0; +} + +static int test_mmap_cpus(void) +{ + struct perf_evlist *evlist; + struct perf_evsel *evsel; + struct perf_mmap *map; + struct perf_cpu_map *cpus; + struct perf_event_attr attr = { + .type = PERF_TYPE_TRACEPOINT, + .sample_period = 1, + .wakeup_watermark = 1, + .disabled = 1, + }; + cpu_set_t saved_mask; + char path[PATH_MAX]; + int id, err, cpu, tmp; + union perf_event *event; + int count = 0; + + snprintf(path, PATH_MAX, "%s/kernel/debug/tracing/events/syscalls/sys_enter_prctl/id", + sysfs__mountpoint()); + + if (filename__read_int(path, &id)) { + fprintf(stderr, "error: failed to get tracepoint id: %s\n", path); + return -1; + } + + attr.config = id; + + cpus = perf_cpu_map__new(NULL); + __T("failed to create cpus", cpus); + + evlist = perf_evlist__new(); + __T("failed to create evlist", evlist); + + evsel = perf_evsel__new(&attr); + __T("failed to create evsel1", evsel); + + perf_evlist__add(evlist, evsel); + + perf_evlist__set_maps(evlist, cpus, NULL); + + err = perf_evlist__open(evlist); + __T("failed to open evlist", err == 0); + + err = perf_evlist__mmap(evlist, 4); + __T("failed to mmap evlist", err == 0); + + perf_evlist__enable(evlist); + + err = sched_getaffinity(0, sizeof(saved_mask), &saved_mask); + __T("sched_getaffinity failed", err == 0); + + perf_cpu_map__for_each_cpu(cpu, tmp, cpus) { + cpu_set_t mask; + + CPU_ZERO(&mask); + CPU_SET(cpu, &mask); + + err = sched_setaffinity(0, sizeof(mask), &mask); + __T("sched_setaffinity failed", err == 0); + + prctl(0, 0, 0, 0, 0); + } + + err = sched_setaffinity(0, sizeof(saved_mask), &saved_mask); + __T("sched_setaffinity failed", err == 0); + + perf_evlist__disable(evlist); + + perf_evlist__for_each_mmap(evlist, map, false) { + if (perf_mmap__read_init(map) < 0) + continue; + + while ((event = perf_mmap__read_event(map)) != NULL) { + count++; + perf_mmap__consume(map); + } + + perf_mmap__read_done(map); + } + + /* calls perf_evlist__munmap/perf_evlist__close */ + perf_evlist__delete(evlist); + + /* + * The generated prctl events should match the + * number of cpus or be bigger (we are system-wide). + */ + __T("failed count", count >= perf_cpu_map__nr(cpus)); + + perf_cpu_map__put(cpus); + + return 0; +} + int main(int argc, char **argv) { __T_START; @@ -190,7 +405,9 @@ int main(int argc, char **argv) test_stat_cpu(); test_stat_thread(); test_stat_thread_enable(); + test_mmap_thread(); + test_mmap_cpus(); - __T_OK; + __T_END; return 0; } diff --git a/tools/perf/lib/tests/test-evsel.c b/tools/perf/lib/tests/test-evsel.c index 1b6c4285ac2b..135722ac965b 100644 --- a/tools/perf/lib/tests/test-evsel.c +++ b/tools/perf/lib/tests/test-evsel.c @@ -130,6 +130,6 @@ int main(int argc, char **argv) test_stat_thread(); test_stat_thread_enable(); - __T_OK; + __T_END; return 0; } diff --git a/tools/perf/lib/tests/test-threadmap.c b/tools/perf/lib/tests/test-threadmap.c index 8c5f47247d9e..7dc4d6fbedde 100644 --- a/tools/perf/lib/tests/test-threadmap.c +++ b/tools/perf/lib/tests/test-threadmap.c @@ -26,6 +26,6 @@ int main(int argc, char **argv) perf_thread_map__put(threads); perf_thread_map__put(threads); - __T_OK; + __T_END; return 0; } diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h index 63e4349a772a..15e458e150bd 100644 --- a/tools/perf/perf-sys.h +++ b/tools/perf/perf-sys.h @@ -15,7 +15,9 @@ void test_attr__init(void); void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu, int fd, int group_fd, unsigned long flags); -#define HAVE_ATTR_TEST +#ifndef HAVE_ATTR_TEST +#define HAVE_ATTR_TEST 1 +#endif static inline int sys_perf_event_open(struct perf_event_attr *attr, @@ -27,7 +29,7 @@ sys_perf_event_open(struct perf_event_attr *attr, fd = syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); -#ifdef HAVE_ATTR_TEST +#if HAVE_ATTR_TEST if (unlikely(test_attr__enabled)) test_attr__open(attr, pid, cpu, fd, group_fd, flags); #endif diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-ddrc.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-ddrc.json index 0d1556fcdffe..7da86942dae2 100644 --- a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-ddrc.json +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-ddrc.json @@ -1,5 +1,19 @@ [ { + "EventCode": "0x00", + "EventName": "uncore_hisi_ddrc.flux_wr", + "BriefDescription": "DDRC total write operations", + "PublicDescription": "DDRC total write operations", + "Unit": "hisi_sccl,ddrc", + }, + { + "EventCode": "0x01", + "EventName": "uncore_hisi_ddrc.flux_rd", + "BriefDescription": "DDRC total read operations", + "PublicDescription": "DDRC total read operations", + "Unit": "hisi_sccl,ddrc", + }, + { "EventCode": "0x02", "EventName": "uncore_hisi_ddrc.flux_wcmd", "BriefDescription": "DDRC write commands", @@ -15,7 +29,7 @@ }, { "EventCode": "0x04", - "EventName": "uncore_hisi_ddrc.flux_wr", + "EventName": "uncore_hisi_ddrc.pre_cmd", "BriefDescription": "DDRC precharge commands", "PublicDescription": "DDRC precharge commands", "Unit": "hisi_sccl,ddrc", diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-hha.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-hha.json index 447d3064de90..3be418a248ea 100644 --- a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-hha.json +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-hha.json @@ -21,6 +21,13 @@ "Unit": "hisi_sccl,hha", }, { + "EventCode": "0x03", + "EventName": "uncore_hisi_hha.rx_ccix", + "BriefDescription": "Count of the number of operations that HHA has received from CCIX", + "PublicDescription": "Count of the number of operations that HHA has received from CCIX", + "Unit": "hisi_sccl,hha", + }, + { "EventCode": "0x1c", "EventName": "uncore_hisi_hha.rd_ddr_64b", "BriefDescription": "The number of read operations sent by HHA to DDRC which size is 64 bytes", @@ -29,7 +36,7 @@ }, { "EventCode": "0x1d", - "EventName": "uncore_hisi_hha.wr_dr_64b", + "EventName": "uncore_hisi_hha.wr_ddr_64b", "BriefDescription": "The number of write operations sent by HHA to DDRC which size is 64 bytes", "PublicDescription": "The number of write operations sent by HHA to DDRC which size is 64 bytes", "Unit": "hisi_sccl,hha", @@ -48,4 +55,18 @@ "PublicDescription": "The number of write operations sent by HHA to DDRC which size is 128 bytes", "Unit": "hisi_sccl,hha", }, + { + "EventCode": "0x20", + "EventName": "uncore_hisi_hha.spill_num", + "BriefDescription": "Count of the number of spill operations that the HHA has sent", + "PublicDescription": "Count of the number of spill operations that the HHA has sent", + "Unit": "hisi_sccl,hha", + }, + { + "EventCode": "0x21", + "EventName": "uncore_hisi_hha.spill_success", + "BriefDescription": "Count of the number of successful spill operations that the HHA has sent", + "PublicDescription": "Count of the number of successful spill operations that the HHA has sent", + "Unit": "hisi_sccl,hha", + }, ] diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-l3c.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-l3c.json index ca48747642e1..f463d0acfaef 100644 --- a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-l3c.json +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-l3c.json @@ -34,4 +34,60 @@ "PublicDescription": "l3c precharge commands", "Unit": "hisi_sccl,l3c", }, + { + "EventCode": "0x20", + "EventName": "uncore_hisi_l3c.rd_spipe", + "BriefDescription": "Count of the number of read lines that come from this cluster of CPU core in spipe", + "PublicDescription": "Count of the number of read lines that come from this cluster of CPU core in spipe", + "Unit": "hisi_sccl,l3c", + }, + { + "EventCode": "0x21", + "EventName": "uncore_hisi_l3c.wr_spipe", + "BriefDescription": "Count of the number of write lines that come from this cluster of CPU core in spipe", + "PublicDescription": "Count of the number of write lines that come from this cluster of CPU core in spipe", + "Unit": "hisi_sccl,l3c", + }, + { + "EventCode": "0x22", + "EventName": "uncore_hisi_l3c.rd_hit_spipe", + "BriefDescription": "Count of the number of read lines that hits in spipe of this L3C", + "PublicDescription": "Count of the number of read lines that hits in spipe of this L3C", + "Unit": "hisi_sccl,l3c", + }, + { + "EventCode": "0x23", + "EventName": "uncore_hisi_l3c.wr_hit_spipe", + "BriefDescription": "Count of the number of write lines that hits in spipe of this L3C", + "PublicDescription": "Count of the number of write lines that hits in spipe of this L3C", + "Unit": "hisi_sccl,l3c", + }, + { + "EventCode": "0x29", + "EventName": "uncore_hisi_l3c.back_invalid", + "BriefDescription": "Count of the number of L3C back invalid operations", + "PublicDescription": "Count of the number of L3C back invalid operations", + "Unit": "hisi_sccl,l3c", + }, + { + "EventCode": "0x40", + "EventName": "uncore_hisi_l3c.retry_cpu", + "BriefDescription": "Count of the number of retry that L3C suppresses the CPU operations", + "PublicDescription": "Count of the number of retry that L3C suppresses the CPU operations", + "Unit": "hisi_sccl,l3c", + }, + { + "EventCode": "0x41", + "EventName": "uncore_hisi_l3c.retry_ring", + "BriefDescription": "Count of the number of retry that L3C suppresses the ring operations", + "PublicDescription": "Count of the number of retry that L3C suppresses the ring operations", + "Unit": "hisi_sccl,l3c", + }, + { + "EventCode": "0x42", + "EventName": "uncore_hisi_l3c.prefetch_drop", + "BriefDescription": "Count of the number of prefetch drops from this L3C", + "PublicDescription": "Count of the number of prefetch drops from this L3C", + "Unit": "hisi_sccl,l3c", + }, ] diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index e2837260ca4d..7d69727f44bd 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -322,7 +322,8 @@ static int print_events_table_entry(void *data, char *name, char *event, char *desc, char *long_desc, char *pmu, char *unit, char *perpkg, char *metric_expr, - char *metric_name, char *metric_group) + char *metric_name, char *metric_group, + char *deprecated) { struct perf_entry_data *pd = data; FILE *outfp = pd->outfp; @@ -354,6 +355,8 @@ static int print_events_table_entry(void *data, char *name, char *event, fprintf(outfp, "\t.metric_name = \"%s\",\n", metric_name); if (metric_group) fprintf(outfp, "\t.metric_group = \"%s\",\n", metric_group); + if (deprecated) + fprintf(outfp, "\t.deprecated = \"%s\",\n", deprecated); fprintf(outfp, "},\n"); return 0; @@ -371,6 +374,7 @@ struct event_struct { char *metric_expr; char *metric_name; char *metric_group; + char *deprecated; }; #define ADD_EVENT_FIELD(field) do { if (field) { \ @@ -398,6 +402,7 @@ struct event_struct { op(metric_expr); \ op(metric_name); \ op(metric_group); \ + op(deprecated); \ } while (0) static LIST_HEAD(arch_std_events); @@ -416,7 +421,8 @@ static void free_arch_std_events(void) static int save_arch_std_events(void *data, char *name, char *event, char *desc, char *long_desc, char *pmu, char *unit, char *perpkg, char *metric_expr, - char *metric_name, char *metric_group) + char *metric_name, char *metric_group, + char *deprecated) { struct event_struct *es; @@ -479,7 +485,8 @@ static int try_fixup(const char *fn, char *arch_std, char **event, char **desc, char **name, char **long_desc, char **pmu, char **filter, char **perpkg, char **unit, char **metric_expr, char **metric_name, - char **metric_group, unsigned long long eventcode) + char **metric_group, unsigned long long eventcode, + char **deprecated) { /* try to find matching event from arch standard values */ struct event_struct *es; @@ -507,7 +514,8 @@ int json_events(const char *fn, char *long_desc, char *pmu, char *unit, char *perpkg, char *metric_expr, - char *metric_name, char *metric_group), + char *metric_name, char *metric_group, + char *deprecated), void *data) { int err; @@ -536,6 +544,7 @@ int json_events(const char *fn, char *metric_expr = NULL; char *metric_name = NULL; char *metric_group = NULL; + char *deprecated = NULL; char *arch_std = NULL; unsigned long long eventcode = 0; struct msrmap *msr = NULL; @@ -614,6 +623,8 @@ int json_events(const char *fn, addfield(map, &unit, "", "", val); } else if (json_streq(map, field, "PerPkg")) { addfield(map, &perpkg, "", "", val); + } else if (json_streq(map, field, "Deprecated")) { + addfield(map, &deprecated, "", "", val); } else if (json_streq(map, field, "MetricName")) { addfield(map, &metric_name, "", "", val); } else if (json_streq(map, field, "MetricGroup")) { @@ -658,12 +669,14 @@ int json_events(const char *fn, err = try_fixup(fn, arch_std, &event, &desc, &name, &long_desc, &pmu, &filter, &perpkg, &unit, &metric_expr, &metric_name, - &metric_group, eventcode); + &metric_group, eventcode, + &deprecated); if (err) goto free_strings; } err = func(data, name, real_event(name, event), desc, long_desc, - pmu, unit, perpkg, metric_expr, metric_name, metric_group); + pmu, unit, perpkg, metric_expr, metric_name, + metric_group, deprecated); free_strings: free(event); free(desc); @@ -673,6 +686,7 @@ free_strings: free(pmu); free(filter); free(perpkg); + free(deprecated); free(unit); free(metric_expr); free(metric_name); diff --git a/tools/perf/pmu-events/jevents.h b/tools/perf/pmu-events/jevents.h index 4684c673c445..5cda49a42143 100644 --- a/tools/perf/pmu-events/jevents.h +++ b/tools/perf/pmu-events/jevents.h @@ -7,7 +7,8 @@ int json_events(const char *fn, char *long_desc, char *pmu, char *unit, char *perpkg, char *metric_expr, - char *metric_name, char *metric_group), + char *metric_name, char *metric_group, + char *deprecated), void *data); char *get_cpu_str(void); diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index 92a4d15ee0b9..caeb577d36c9 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -17,6 +17,7 @@ struct pmu_event { const char *metric_expr; const char *metric_name; const char *metric_group; + const char *deprecated; }; /* diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 61b3911d91e6..ebc6a2e5eae9 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -105,6 +105,9 @@ except ImportError: glb_nsz = 16 import re import os +import random +import copy +import math pyside_version_1 = True if not "--pyside-version-1" in sys.argv: @@ -341,6 +344,15 @@ def LookupCreateModel(model_name, create_fn): model_cache_lock.release() return model +def LookupModel(model_name): + model_cache_lock.acquire() + try: + model = model_cache[model_name] + except: + model = None + model_cache_lock.release() + return model + # Find bar class FindBar(): @@ -785,15 +797,16 @@ class CallGraphModel(CallGraphModelBase): class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase): - def __init__(self, glb, params, row, comm_id, thread_id, calls_id, time, insn_cnt, cyc_cnt, branch_count, parent_item): + def __init__(self, glb, params, row, comm_id, thread_id, calls_id, call_time, time, insn_cnt, cyc_cnt, branch_count, parent_item): super(CallTreeLevelTwoPlusItemBase, self).__init__(glb, params, row, parent_item) self.comm_id = comm_id self.thread_id = thread_id self.calls_id = calls_id + self.call_time = call_time + self.time = time self.insn_cnt = insn_cnt self.cyc_cnt = cyc_cnt self.branch_count = branch_count - self.time = time def Select(self): self.query_done = True @@ -830,17 +843,17 @@ class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase): class CallTreeLevelThreeItem(CallTreeLevelTwoPlusItemBase): - def __init__(self, glb, params, row, comm_id, thread_id, calls_id, name, dso, count, time, insn_cnt, cyc_cnt, branch_count, parent_item): - super(CallTreeLevelThreeItem, self).__init__(glb, params, row, comm_id, thread_id, calls_id, time, insn_cnt, cyc_cnt, branch_count, parent_item) + def __init__(self, glb, params, row, comm_id, thread_id, calls_id, name, dso, call_time, time, insn_cnt, cyc_cnt, branch_count, parent_item): + super(CallTreeLevelThreeItem, self).__init__(glb, params, row, comm_id, thread_id, calls_id, call_time, time, insn_cnt, cyc_cnt, branch_count, parent_item) dso = dsoname(dso) if self.params.have_ipc: insn_pcnt = PercentToOneDP(insn_cnt, parent_item.insn_cnt) cyc_pcnt = PercentToOneDP(cyc_cnt, parent_item.cyc_cnt) br_pcnt = PercentToOneDP(branch_count, parent_item.branch_count) ipc = CalcIPC(cyc_cnt, insn_cnt) - self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(insn_cnt), insn_pcnt, str(cyc_cnt), cyc_pcnt, ipc, str(branch_count), br_pcnt ] + self.data = [ name, dso, str(call_time), str(time), PercentToOneDP(time, parent_item.time), str(insn_cnt), insn_pcnt, str(cyc_cnt), cyc_pcnt, ipc, str(branch_count), br_pcnt ] else: - self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ] + self.data = [ name, dso, str(call_time), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ] self.dbid = calls_id # Call tree data model level two item @@ -848,7 +861,7 @@ class CallTreeLevelThreeItem(CallTreeLevelTwoPlusItemBase): class CallTreeLevelTwoItem(CallTreeLevelTwoPlusItemBase): def __init__(self, glb, params, row, comm_id, thread_id, pid, tid, parent_item): - super(CallTreeLevelTwoItem, self).__init__(glb, params, row, comm_id, thread_id, 0, 0, 0, 0, 0, parent_item) + super(CallTreeLevelTwoItem, self).__init__(glb, params, row, comm_id, thread_id, 0, 0, 0, 0, 0, 0, parent_item) if self.params.have_ipc: self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", "", "", "", "", "", ""] else: @@ -971,20 +984,41 @@ class CallTreeModel(CallGraphModelBase): ids.insert(0, query.value(1)) return ids -# Vertical widget layout +# Vertical layout -class VBox(): +class HBoxLayout(QHBoxLayout): - def __init__(self, w1, w2, w3=None): - self.vbox = QWidget() - self.vbox.setLayout(QVBoxLayout()) + def __init__(self, *children): + super(HBoxLayout, self).__init__() + + self.layout().setContentsMargins(0, 0, 0, 0) + for child in children: + if child.isWidgetType(): + self.layout().addWidget(child) + else: + self.layout().addLayout(child) + +# Horizontal layout + +class VBoxLayout(QVBoxLayout): - self.vbox.layout().setContentsMargins(0, 0, 0, 0) + def __init__(self, *children): + super(VBoxLayout, self).__init__() - self.vbox.layout().addWidget(w1) - self.vbox.layout().addWidget(w2) - if w3: - self.vbox.layout().addWidget(w3) + self.layout().setContentsMargins(0, 0, 0, 0) + for child in children: + if child.isWidgetType(): + self.layout().addWidget(child) + else: + self.layout().addLayout(child) + +# Vertical layout widget + +class VBox(): + + def __init__(self, *children): + self.vbox = QWidget() + self.vbox.setLayout(VBoxLayout(*children)) def Widget(self): return self.vbox @@ -1063,7 +1097,7 @@ class CallGraphWindow(TreeWindowBase): class CallTreeWindow(TreeWindowBase): - def __init__(self, glb, parent=None): + def __init__(self, glb, parent=None, thread_at_time=None): super(CallTreeWindow, self).__init__(parent) self.model = LookupCreateModel("Call Tree", lambda x=glb: CallTreeModel(x)) @@ -1081,6 +1115,1343 @@ class CallTreeWindow(TreeWindowBase): AddSubWindow(glb.mainwindow.mdi_area, self, "Call Tree") + if thread_at_time: + self.DisplayThreadAtTime(*thread_at_time) + + def DisplayThreadAtTime(self, comm_id, thread_id, time): + parent = QModelIndex() + for dbid in (comm_id, thread_id): + found = False + n = self.model.rowCount(parent) + for row in xrange(n): + child = self.model.index(row, 0, parent) + if child.internalPointer().dbid == dbid: + found = True + self.view.setCurrentIndex(child) + parent = child + break + if not found: + return + found = False + while True: + n = self.model.rowCount(parent) + if not n: + return + last_child = None + for row in xrange(n): + child = self.model.index(row, 0, parent) + child_call_time = child.internalPointer().call_time + if child_call_time < time: + last_child = child + elif child_call_time == time: + self.view.setCurrentIndex(child) + return + elif child_call_time > time: + break + if not last_child: + if not found: + child = self.model.index(0, 0, parent) + self.view.setCurrentIndex(child) + return + found = True + self.view.setCurrentIndex(last_child) + parent = last_child + +# ExecComm() gets the comm_id of the command string that was set when the process exec'd i.e. the program name + +def ExecComm(db, thread_id, time): + query = QSqlQuery(db) + QueryExec(query, "SELECT comm_threads.comm_id, comms.c_time, comms.exec_flag" + " FROM comm_threads" + " INNER JOIN comms ON comms.id = comm_threads.comm_id" + " WHERE comm_threads.thread_id = " + str(thread_id) + + " ORDER BY comms.c_time, comms.id") + first = None + last = None + while query.next(): + if first is None: + first = query.value(0) + if query.value(2) and Decimal(query.value(1)) <= Decimal(time): + last = query.value(0) + if not(last is None): + return last + return first + +# Container for (x, y) data + +class XY(): + def __init__(self, x=0, y=0): + self.x = x + self.y = y + + def __str__(self): + return "XY({}, {})".format(str(self.x), str(self.y)) + +# Container for sub-range data + +class Subrange(): + def __init__(self, lo=0, hi=0): + self.lo = lo + self.hi = hi + + def __str__(self): + return "Subrange({}, {})".format(str(self.lo), str(self.hi)) + +# Graph data region base class + +class GraphDataRegion(object): + + def __init__(self, key, title = "", ordinal = ""): + self.key = key + self.title = title + self.ordinal = ordinal + +# Function to sort GraphDataRegion + +def GraphDataRegionOrdinal(data_region): + return data_region.ordinal + +# Attributes for a graph region + +class GraphRegionAttribute(): + + def __init__(self, colour): + self.colour = colour + +# Switch graph data region represents a task + +class SwitchGraphDataRegion(GraphDataRegion): + + def __init__(self, key, exec_comm_id, pid, tid, comm, thread_id, comm_id): + super(SwitchGraphDataRegion, self).__init__(key) + + self.title = str(pid) + " / " + str(tid) + " " + comm + # Order graph legend within exec comm by pid / tid / time + self.ordinal = str(pid).rjust(16) + str(exec_comm_id).rjust(8) + str(tid).rjust(16) + self.exec_comm_id = exec_comm_id + self.pid = pid + self.tid = tid + self.comm = comm + self.thread_id = thread_id + self.comm_id = comm_id + +# Graph data point + +class GraphDataPoint(): + + def __init__(self, data, index, x, y, altx=None, alty=None, hregion=None, vregion=None): + self.data = data + self.index = index + self.x = x + self.y = y + self.altx = altx + self.alty = alty + self.hregion = hregion + self.vregion = vregion + +# Graph data (single graph) base class + +class GraphData(object): + + def __init__(self, collection, xbase=Decimal(0), ybase=Decimal(0)): + self.collection = collection + self.points = [] + self.xbase = xbase + self.ybase = ybase + self.title = "" + + def AddPoint(self, x, y, altx=None, alty=None, hregion=None, vregion=None): + index = len(self.points) + + x = float(Decimal(x) - self.xbase) + y = float(Decimal(y) - self.ybase) + + self.points.append(GraphDataPoint(self, index, x, y, altx, alty, hregion, vregion)) + + def XToData(self, x): + return Decimal(x) + self.xbase + + def YToData(self, y): + return Decimal(y) + self.ybase + +# Switch graph data (for one CPU) + +class SwitchGraphData(GraphData): + + def __init__(self, db, collection, cpu, xbase): + super(SwitchGraphData, self).__init__(collection, xbase) + + self.cpu = cpu + self.title = "CPU " + str(cpu) + self.SelectSwitches(db) + + def SelectComms(self, db, thread_id, last_comm_id, start_time, end_time): + query = QSqlQuery(db) + QueryExec(query, "SELECT id, c_time" + " FROM comms" + " WHERE c_thread_id = " + str(thread_id) + + " AND exec_flag = TRUE" + " AND c_time >= " + str(start_time) + + " AND c_time <= " + str(end_time) + + " ORDER BY c_time, id") + while query.next(): + comm_id = query.value(0) + if comm_id == last_comm_id: + continue + time = query.value(1) + hregion = self.HRegion(db, thread_id, comm_id, time) + self.AddPoint(time, 1000, None, None, hregion) + + def SelectSwitches(self, db): + last_time = None + last_comm_id = None + last_thread_id = None + query = QSqlQuery(db) + QueryExec(query, "SELECT time, thread_out_id, thread_in_id, comm_out_id, comm_in_id, flags" + " FROM context_switches" + " WHERE machine_id = " + str(self.collection.machine_id) + + " AND cpu = " + str(self.cpu) + + " ORDER BY time, id") + while query.next(): + flags = int(query.value(5)) + if flags & 1: + # Schedule-out: detect and add exec's + if last_thread_id == query.value(1) and last_comm_id is not None and last_comm_id != query.value(3): + self.SelectComms(db, last_thread_id, last_comm_id, last_time, query.value(0)) + continue + # Schedule-in: add data point + if len(self.points) == 0: + start_time = self.collection.glb.StartTime(self.collection.machine_id) + hregion = self.HRegion(db, query.value(1), query.value(3), start_time) + self.AddPoint(start_time, 1000, None, None, hregion) + time = query.value(0) + comm_id = query.value(4) + thread_id = query.value(2) + hregion = self.HRegion(db, thread_id, comm_id, time) + self.AddPoint(time, 1000, None, None, hregion) + last_time = time + last_comm_id = comm_id + last_thread_id = thread_id + + def NewHRegion(self, db, key, thread_id, comm_id, time): + exec_comm_id = ExecComm(db, thread_id, time) + query = QSqlQuery(db) + QueryExec(query, "SELECT pid, tid FROM threads WHERE id = " + str(thread_id)) + if query.next(): + pid = query.value(0) + tid = query.value(1) + else: + pid = -1 + tid = -1 + query = QSqlQuery(db) + QueryExec(query, "SELECT comm FROM comms WHERE id = " + str(comm_id)) + if query.next(): + comm = query.value(0) + else: + comm = "" + return SwitchGraphDataRegion(key, exec_comm_id, pid, tid, comm, thread_id, comm_id) + + def HRegion(self, db, thread_id, comm_id, time): + key = str(thread_id) + ":" + str(comm_id) + hregion = self.collection.LookupHRegion(key) + if hregion is None: + hregion = self.NewHRegion(db, key, thread_id, comm_id, time) + self.collection.AddHRegion(key, hregion) + return hregion + +# Graph data collection (multiple related graphs) base class + +class GraphDataCollection(object): + + def __init__(self, glb): + self.glb = glb + self.data = [] + self.hregions = {} + self.xrangelo = None + self.xrangehi = None + self.yrangelo = None + self.yrangehi = None + self.dp = XY(0, 0) + + def AddGraphData(self, data): + self.data.append(data) + + def LookupHRegion(self, key): + if key in self.hregions: + return self.hregions[key] + return None + + def AddHRegion(self, key, hregion): + self.hregions[key] = hregion + +# Switch graph data collection (SwitchGraphData for each CPU) + +class SwitchGraphDataCollection(GraphDataCollection): + + def __init__(self, glb, db, machine_id): + super(SwitchGraphDataCollection, self).__init__(glb) + + self.machine_id = machine_id + self.cpus = self.SelectCPUs(db) + + self.xrangelo = glb.StartTime(machine_id) + self.xrangehi = glb.FinishTime(machine_id) + + self.yrangelo = Decimal(0) + self.yrangehi = Decimal(1000) + + for cpu in self.cpus: + self.AddGraphData(SwitchGraphData(db, self, cpu, self.xrangelo)) + + def SelectCPUs(self, db): + cpus = [] + query = QSqlQuery(db) + QueryExec(query, "SELECT DISTINCT cpu" + " FROM context_switches" + " WHERE machine_id = " + str(self.machine_id)) + while query.next(): + cpus.append(int(query.value(0))) + return sorted(cpus) + +# Switch graph data graphics item displays the graphed data + +class SwitchGraphDataGraphicsItem(QGraphicsItem): + + def __init__(self, data, graph_width, graph_height, attrs, event_handler, parent=None): + super(SwitchGraphDataGraphicsItem, self).__init__(parent) + + self.data = data + self.graph_width = graph_width + self.graph_height = graph_height + self.attrs = attrs + self.event_handler = event_handler + self.setAcceptHoverEvents(True) + + def boundingRect(self): + return QRectF(0, 0, self.graph_width, self.graph_height) + + def PaintPoint(self, painter, last, x): + if not(last is None or last.hregion.pid == 0 or x < self.attrs.subrange.x.lo): + if last.x < self.attrs.subrange.x.lo: + x0 = self.attrs.subrange.x.lo + else: + x0 = last.x + if x > self.attrs.subrange.x.hi: + x1 = self.attrs.subrange.x.hi + else: + x1 = x - 1 + x0 = self.attrs.XToPixel(x0) + x1 = self.attrs.XToPixel(x1) + + y0 = self.attrs.YToPixel(last.y) + + colour = self.attrs.region_attributes[last.hregion.key].colour + + width = x1 - x0 + 1 + if width < 2: + painter.setPen(colour) + painter.drawLine(x0, self.graph_height - y0, x0, self.graph_height) + else: + painter.fillRect(x0, self.graph_height - y0, width, self.graph_height - 1, colour) + + def paint(self, painter, option, widget): + last = None + for point in self.data.points: + self.PaintPoint(painter, last, point.x) + if point.x > self.attrs.subrange.x.hi: + break; + last = point + self.PaintPoint(painter, last, self.attrs.subrange.x.hi + 1) + + def BinarySearchPoint(self, target): + lower_pos = 0 + higher_pos = len(self.data.points) + while True: + pos = int((lower_pos + higher_pos) / 2) + val = self.data.points[pos].x + if target >= val: + lower_pos = pos + else: + higher_pos = pos + if higher_pos <= lower_pos + 1: + return lower_pos + + def XPixelToData(self, x): + x = self.attrs.PixelToX(x) + if x < self.data.points[0].x: + x = 0 + pos = 0 + low = True + else: + pos = self.BinarySearchPoint(x) + low = False + return (low, pos, self.data.XToData(x)) + + def EventToData(self, event): + no_data = (None,) * 4 + if len(self.data.points) < 1: + return no_data + x = event.pos().x() + if x < 0: + return no_data + low0, pos0, time_from = self.XPixelToData(x) + low1, pos1, time_to = self.XPixelToData(x + 1) + hregions = set() + hregion_times = [] + if not low1: + for i in xrange(pos0, pos1 + 1): + hregion = self.data.points[i].hregion + hregions.add(hregion) + if i == pos0: + time = time_from + else: + time = self.data.XToData(self.data.points[i].x) + hregion_times.append((hregion, time)) + return (time_from, time_to, hregions, hregion_times) + + def hoverMoveEvent(self, event): + time_from, time_to, hregions, hregion_times = self.EventToData(event) + if time_from is not None: + self.event_handler.PointEvent(self.data.cpu, time_from, time_to, hregions) + + def hoverLeaveEvent(self, event): + self.event_handler.NoPointEvent() + + def mousePressEvent(self, event): + if event.button() != Qt.RightButton: + super(SwitchGraphDataGraphicsItem, self).mousePressEvent(event) + return + time_from, time_to, hregions, hregion_times = self.EventToData(event) + if hregion_times: + self.event_handler.RightClickEvent(self.data.cpu, hregion_times, event.screenPos()) + +# X-axis graphics item + +class XAxisGraphicsItem(QGraphicsItem): + + def __init__(self, width, parent=None): + super(XAxisGraphicsItem, self).__init__(parent) + + self.width = width + self.max_mark_sz = 4 + self.height = self.max_mark_sz + 1 + + def boundingRect(self): + return QRectF(0, 0, self.width, self.height) + + def Step(self): + attrs = self.parentItem().attrs + subrange = attrs.subrange.x + t = subrange.hi - subrange.lo + s = (3.0 * t) / self.width + n = 1.0 + while s > n: + n = n * 10.0 + return n + + def PaintMarks(self, painter, at_y, lo, hi, step, i): + attrs = self.parentItem().attrs + x = lo + while x <= hi: + xp = attrs.XToPixel(x) + if i % 10: + if i % 5: + sz = 1 + else: + sz = 2 + else: + sz = self.max_mark_sz + i = 0 + painter.drawLine(xp, at_y, xp, at_y + sz) + x += step + i += 1 + + def paint(self, painter, option, widget): + # Using QPainter::drawLine(int x1, int y1, int x2, int y2) so x2 = width -1 + painter.drawLine(0, 0, self.width - 1, 0) + n = self.Step() + attrs = self.parentItem().attrs + subrange = attrs.subrange.x + if subrange.lo: + x_offset = n - (subrange.lo % n) + else: + x_offset = 0.0 + x = subrange.lo + x_offset + i = (x / n) % 10 + self.PaintMarks(painter, 0, x, subrange.hi, n, i) + + def ScaleDimensions(self): + n = self.Step() + attrs = self.parentItem().attrs + lo = attrs.subrange.x.lo + hi = (n * 10.0) + lo + width = attrs.XToPixel(hi) + if width > 500: + width = 0 + return (n, lo, hi, width) + + def PaintScale(self, painter, at_x, at_y): + n, lo, hi, width = self.ScaleDimensions() + if not width: + return + painter.drawLine(at_x, at_y, at_x + width, at_y) + self.PaintMarks(painter, at_y, lo, hi, n, 0) + + def ScaleWidth(self): + n, lo, hi, width = self.ScaleDimensions() + return width + + def ScaleHeight(self): + return self.height + + def ScaleUnit(self): + return self.Step() * 10 + +# Scale graphics item base class + +class ScaleGraphicsItem(QGraphicsItem): + + def __init__(self, axis, parent=None): + super(ScaleGraphicsItem, self).__init__(parent) + self.axis = axis + + def boundingRect(self): + scale_width = self.axis.ScaleWidth() + if not scale_width: + return QRectF() + return QRectF(0, 0, self.axis.ScaleWidth() + 100, self.axis.ScaleHeight()) + + def paint(self, painter, option, widget): + scale_width = self.axis.ScaleWidth() + if not scale_width: + return + self.axis.PaintScale(painter, 0, 5) + x = scale_width + 4 + painter.drawText(QPointF(x, 10), self.Text()) + + def Unit(self): + return self.axis.ScaleUnit() + + def Text(self): + return "" + +# Switch graph scale graphics item + +class SwitchScaleGraphicsItem(ScaleGraphicsItem): + + def __init__(self, axis, parent=None): + super(SwitchScaleGraphicsItem, self).__init__(axis, parent) + + def Text(self): + unit = self.Unit() + if unit >= 1000000000: + unit = int(unit / 1000000000) + us = "s" + elif unit >= 1000000: + unit = int(unit / 1000000) + us = "ms" + elif unit >= 1000: + unit = int(unit / 1000) + us = "us" + else: + unit = int(unit) + us = "ns" + return " = " + str(unit) + " " + us + +# Switch graph graphics item contains graph title, scale, x/y-axis, and the graphed data + +class SwitchGraphGraphicsItem(QGraphicsItem): + + def __init__(self, collection, data, attrs, event_handler, first, parent=None): + super(SwitchGraphGraphicsItem, self).__init__(parent) + self.collection = collection + self.data = data + self.attrs = attrs + self.event_handler = event_handler + + margin = 20 + title_width = 50 + + self.title_graphics = QGraphicsSimpleTextItem(data.title, self) + + self.title_graphics.setPos(margin, margin) + graph_width = attrs.XToPixel(attrs.subrange.x.hi) + 1 + graph_height = attrs.YToPixel(attrs.subrange.y.hi) + 1 + + self.graph_origin_x = margin + title_width + margin + self.graph_origin_y = graph_height + margin + + x_axis_size = 1 + y_axis_size = 1 + self.yline = QGraphicsLineItem(0, 0, 0, graph_height, self) + + self.x_axis = XAxisGraphicsItem(graph_width, self) + self.x_axis.setPos(self.graph_origin_x, self.graph_origin_y + 1) + + if first: + self.scale_item = SwitchScaleGraphicsItem(self.x_axis, self) + self.scale_item.setPos(self.graph_origin_x, self.graph_origin_y + 10) + + self.yline.setPos(self.graph_origin_x - y_axis_size, self.graph_origin_y - graph_height) + + self.axis_point = QGraphicsLineItem(0, 0, 0, 0, self) + self.axis_point.setPos(self.graph_origin_x - 1, self.graph_origin_y +1) + + self.width = self.graph_origin_x + graph_width + margin + self.height = self.graph_origin_y + margin + + self.graph = SwitchGraphDataGraphicsItem(data, graph_width, graph_height, attrs, event_handler, self) + self.graph.setPos(self.graph_origin_x, self.graph_origin_y - graph_height) + + if parent and 'EnableRubberBand' in dir(parent): + parent.EnableRubberBand(self.graph_origin_x, self.graph_origin_x + graph_width - 1, self) + + def boundingRect(self): + return QRectF(0, 0, self.width, self.height) + + def paint(self, painter, option, widget): + pass + + def RBXToPixel(self, x): + return self.attrs.PixelToX(x - self.graph_origin_x) + + def RBXRangeToPixel(self, x0, x1): + return (self.RBXToPixel(x0), self.RBXToPixel(x1 + 1)) + + def RBPixelToTime(self, x): + if x < self.data.points[0].x: + return self.data.XToData(0) + return self.data.XToData(x) + + def RBEventTimes(self, x0, x1): + x0, x1 = self.RBXRangeToPixel(x0, x1) + time_from = self.RBPixelToTime(x0) + time_to = self.RBPixelToTime(x1) + return (time_from, time_to) + + def RBEvent(self, x0, x1): + time_from, time_to = self.RBEventTimes(x0, x1) + self.event_handler.RangeEvent(time_from, time_to) + + def RBMoveEvent(self, x0, x1): + if x1 < x0: + x0, x1 = x1, x0 + self.RBEvent(x0, x1) + + def RBReleaseEvent(self, x0, x1, selection_state): + if x1 < x0: + x0, x1 = x1, x0 + x0, x1 = self.RBXRangeToPixel(x0, x1) + self.event_handler.SelectEvent(x0, x1, selection_state) + +# Graphics item to draw a vertical bracket (used to highlight "forward" sub-range) + +class VerticalBracketGraphicsItem(QGraphicsItem): + + def __init__(self, parent=None): + super(VerticalBracketGraphicsItem, self).__init__(parent) + + self.width = 0 + self.height = 0 + self.hide() + + def SetSize(self, width, height): + self.width = width + 1 + self.height = height + 1 + + def boundingRect(self): + return QRectF(0, 0, self.width, self.height) + + def paint(self, painter, option, widget): + colour = QColor(255, 255, 0, 32) + painter.fillRect(0, 0, self.width, self.height, colour) + x1 = self.width - 1 + y1 = self.height - 1 + painter.drawLine(0, 0, x1, 0) + painter.drawLine(0, 0, 0, 3) + painter.drawLine(x1, 0, x1, 3) + painter.drawLine(0, y1, x1, y1) + painter.drawLine(0, y1, 0, y1 - 3) + painter.drawLine(x1, y1, x1, y1 - 3) + +# Graphics item to contain graphs arranged vertically + +class VertcalGraphSetGraphicsItem(QGraphicsItem): + + def __init__(self, collection, attrs, event_handler, child_class, parent=None): + super(VertcalGraphSetGraphicsItem, self).__init__(parent) + + self.collection = collection + + self.top = 10 + + self.width = 0 + self.height = self.top + + self.rubber_band = None + self.rb_enabled = False + + first = True + for data in collection.data: + child = child_class(collection, data, attrs, event_handler, first, self) + child.setPos(0, self.height + 1) + rect = child.boundingRect() + if rect.right() > self.width: + self.width = rect.right() + self.height = self.height + rect.bottom() + 1 + first = False + + self.bracket = VerticalBracketGraphicsItem(self) + + def EnableRubberBand(self, xlo, xhi, rb_event_handler): + if self.rb_enabled: + return + self.rb_enabled = True + self.rb_in_view = False + self.setAcceptedMouseButtons(Qt.LeftButton) + self.rb_xlo = xlo + self.rb_xhi = xhi + self.rb_event_handler = rb_event_handler + self.mousePressEvent = self.MousePressEvent + self.mouseMoveEvent = self.MouseMoveEvent + self.mouseReleaseEvent = self.MouseReleaseEvent + + def boundingRect(self): + return QRectF(0, 0, self.width, self.height) + + def paint(self, painter, option, widget): + pass + + def RubberBandParent(self): + scene = self.scene() + view = scene.views()[0] + viewport = view.viewport() + return viewport + + def RubberBandSetGeometry(self, rect): + scene_rectf = self.mapRectToScene(QRectF(rect)) + scene = self.scene() + view = scene.views()[0] + poly = view.mapFromScene(scene_rectf) + self.rubber_band.setGeometry(poly.boundingRect()) + + def SetSelection(self, selection_state): + if self.rubber_band: + if selection_state: + self.RubberBandSetGeometry(selection_state) + self.rubber_band.show() + else: + self.rubber_band.hide() + + def SetBracket(self, rect): + if rect: + x, y, width, height = rect.x(), rect.y(), rect.width(), rect.height() + self.bracket.setPos(x, y) + self.bracket.SetSize(width, height) + self.bracket.show() + else: + self.bracket.hide() + + def RubberBandX(self, event): + x = event.pos().toPoint().x() + if x < self.rb_xlo: + x = self.rb_xlo + elif x > self.rb_xhi: + x = self.rb_xhi + else: + self.rb_in_view = True + return x + + def RubberBandRect(self, x): + if self.rb_origin.x() <= x: + width = x - self.rb_origin.x() + rect = QRect(self.rb_origin, QSize(width, self.height)) + else: + width = self.rb_origin.x() - x + top_left = QPoint(self.rb_origin.x() - width, self.rb_origin.y()) + rect = QRect(top_left, QSize(width, self.height)) + return rect + + def MousePressEvent(self, event): + self.rb_in_view = False + x = self.RubberBandX(event) + self.rb_origin = QPoint(x, self.top) + if self.rubber_band is None: + self.rubber_band = QRubberBand(QRubberBand.Rectangle, self.RubberBandParent()) + self.RubberBandSetGeometry(QRect(self.rb_origin, QSize(0, self.height))) + if self.rb_in_view: + self.rubber_band.show() + self.rb_event_handler.RBMoveEvent(x, x) + else: + self.rubber_band.hide() + + def MouseMoveEvent(self, event): + x = self.RubberBandX(event) + rect = self.RubberBandRect(x) + self.RubberBandSetGeometry(rect) + if self.rb_in_view: + self.rubber_band.show() + self.rb_event_handler.RBMoveEvent(self.rb_origin.x(), x) + + def MouseReleaseEvent(self, event): + x = self.RubberBandX(event) + if self.rb_in_view: + selection_state = self.RubberBandRect(x) + else: + selection_state = None + self.rb_event_handler.RBReleaseEvent(self.rb_origin.x(), x, selection_state) + +# Switch graph legend data model + +class SwitchGraphLegendModel(QAbstractTableModel): + + def __init__(self, collection, region_attributes, parent=None): + super(SwitchGraphLegendModel, self).__init__(parent) + + self.region_attributes = region_attributes + + self.child_items = sorted(collection.hregions.values(), key=GraphDataRegionOrdinal) + self.child_count = len(self.child_items) + + self.highlight_set = set() + + self.column_headers = ("pid", "tid", "comm") + + def rowCount(self, parent): + return self.child_count + + def headerData(self, section, orientation, role): + if role != Qt.DisplayRole: + return None + if orientation != Qt.Horizontal: + return None + return self.columnHeader(section) + + def index(self, row, column, parent): + return self.createIndex(row, column, self.child_items[row]) + + def columnCount(self, parent=None): + return len(self.column_headers) + + def columnHeader(self, column): + return self.column_headers[column] + + def data(self, index, role): + if role == Qt.BackgroundRole: + child = self.child_items[index.row()] + if child in self.highlight_set: + return self.region_attributes[child.key].colour + return None + if role == Qt.ForegroundRole: + child = self.child_items[index.row()] + if child in self.highlight_set: + return QColor(255, 255, 255) + return self.region_attributes[child.key].colour + if role != Qt.DisplayRole: + return None + hregion = self.child_items[index.row()] + col = index.column() + if col == 0: + return hregion.pid + if col == 1: + return hregion.tid + if col == 2: + return hregion.comm + return None + + def SetHighlight(self, row, set_highlight): + child = self.child_items[row] + top_left = self.createIndex(row, 0, child) + bottom_right = self.createIndex(row, len(self.column_headers) - 1, child) + self.dataChanged.emit(top_left, bottom_right) + + def Highlight(self, highlight_set): + for row in xrange(self.child_count): + child = self.child_items[row] + if child in self.highlight_set: + if child not in highlight_set: + self.SetHighlight(row, False) + elif child in highlight_set: + self.SetHighlight(row, True) + self.highlight_set = highlight_set + +# Switch graph legend is a table + +class SwitchGraphLegend(QWidget): + + def __init__(self, collection, region_attributes, parent=None): + super(SwitchGraphLegend, self).__init__(parent) + + self.data_model = SwitchGraphLegendModel(collection, region_attributes) + + self.model = QSortFilterProxyModel() + self.model.setSourceModel(self.data_model) + + self.view = QTableView() + self.view.setModel(self.model) + self.view.setEditTriggers(QAbstractItemView.NoEditTriggers) + self.view.verticalHeader().setVisible(False) + self.view.sortByColumn(-1, Qt.AscendingOrder) + self.view.setSortingEnabled(True) + self.view.resizeColumnsToContents() + self.view.resizeRowsToContents() + + self.vbox = VBoxLayout(self.view) + self.setLayout(self.vbox) + + sz1 = self.view.columnWidth(0) + self.view.columnWidth(1) + self.view.columnWidth(2) + 2 + sz1 = sz1 + self.view.verticalScrollBar().sizeHint().width() + self.saved_size = sz1 + + def resizeEvent(self, event): + self.saved_size = self.size().width() + super(SwitchGraphLegend, self).resizeEvent(event) + + def Highlight(self, highlight_set): + self.data_model.Highlight(highlight_set) + self.update() + + def changeEvent(self, event): + if event.type() == QEvent.FontChange: + self.view.resizeRowsToContents() + self.view.resizeColumnsToContents() + # Need to resize rows again after column resize + self.view.resizeRowsToContents() + super(SwitchGraphLegend, self).changeEvent(event) + +# Random colour generation + +def RGBColourTooLight(r, g, b): + if g > 230: + return True + if g <= 160: + return False + if r <= 180 and g <= 180: + return False + if r < 60: + return False + return True + +def GenerateColours(x): + cs = [0] + for i in xrange(1, x): + cs.append(int((255.0 / i) + 0.5)) + colours = [] + for r in cs: + for g in cs: + for b in cs: + # Exclude black and colours that look too light against a white background + if (r, g, b) == (0, 0, 0) or RGBColourTooLight(r, g, b): + continue + colours.append(QColor(r, g, b)) + return colours + +def GenerateNColours(n): + for x in xrange(2, n + 2): + colours = GenerateColours(x) + if len(colours) >= n: + return colours + return [] + +def GenerateNRandomColours(n, seed): + colours = GenerateNColours(n) + random.seed(seed) + random.shuffle(colours) + return colours + +# Graph attributes, in particular the scale and subrange that change when zooming + +class GraphAttributes(): + + def __init__(self, scale, subrange, region_attributes, dp): + self.scale = scale + self.subrange = subrange + self.region_attributes = region_attributes + # Rounding avoids errors due to finite floating point precision + self.dp = dp # data decimal places + self.Update() + + def XToPixel(self, x): + return int(round((x - self.subrange.x.lo) * self.scale.x, self.pdp.x)) + + def YToPixel(self, y): + return int(round((y - self.subrange.y.lo) * self.scale.y, self.pdp.y)) + + def PixelToXRounded(self, px): + return round((round(px, 0) / self.scale.x), self.dp.x) + self.subrange.x.lo + + def PixelToYRounded(self, py): + return round((round(py, 0) / self.scale.y), self.dp.y) + self.subrange.y.lo + + def PixelToX(self, px): + x = self.PixelToXRounded(px) + if self.pdp.x == 0: + rt = self.XToPixel(x) + if rt > px: + return x - 1 + return x + + def PixelToY(self, py): + y = self.PixelToYRounded(py) + if self.pdp.y == 0: + rt = self.YToPixel(y) + if rt > py: + return y - 1 + return y + + def ToPDP(self, dp, scale): + # Calculate pixel decimal places: + # (10 ** dp) is the minimum delta in the data + # scale it to get the minimum delta in pixels + # log10 gives the number of decimals places negatively + # subtrace 1 to divide by 10 + # round to the lower negative number + # change the sign to get the number of decimals positively + x = math.log10((10 ** dp) * scale) + if x < 0: + x -= 1 + x = -int(math.floor(x) - 0.1) + else: + x = 0 + return x + + def Update(self): + x = self.ToPDP(self.dp.x, self.scale.x) + y = self.ToPDP(self.dp.y, self.scale.y) + self.pdp = XY(x, y) # pixel decimal places + +# Switch graph splitter which divides the CPU graphs from the legend + +class SwitchGraphSplitter(QSplitter): + + def __init__(self, parent=None): + super(SwitchGraphSplitter, self).__init__(parent) + + self.first_time = False + + def resizeEvent(self, ev): + if self.first_time: + self.first_time = False + sz1 = self.widget(1).view.columnWidth(0) + self.widget(1).view.columnWidth(1) + self.widget(1).view.columnWidth(2) + 2 + sz1 = sz1 + self.widget(1).view.verticalScrollBar().sizeHint().width() + sz0 = self.size().width() - self.handleWidth() - sz1 + self.setSizes([sz0, sz1]) + elif not(self.widget(1).saved_size is None): + sz1 = self.widget(1).saved_size + sz0 = self.size().width() - self.handleWidth() - sz1 + self.setSizes([sz0, sz1]) + super(SwitchGraphSplitter, self).resizeEvent(ev) + +# Graph widget base class + +class GraphWidget(QWidget): + + graph_title_changed = Signal(object) + + def __init__(self, parent=None): + super(GraphWidget, self).__init__(parent) + + def GraphTitleChanged(self, title): + self.graph_title_changed.emit(title) + + def Title(self): + return "" + +# Display time in s, ms, us or ns + +def ToTimeStr(val): + val = Decimal(val) + if val >= 1000000000: + return "{} s".format((val / 1000000000).quantize(Decimal("0.000000001"))) + if val >= 1000000: + return "{} ms".format((val / 1000000).quantize(Decimal("0.000001"))) + if val >= 1000: + return "{} us".format((val / 1000).quantize(Decimal("0.001"))) + return "{} ns".format(val.quantize(Decimal("1"))) + +# Switch (i.e. context switch i.e. Time Chart by CPU) graph widget which contains the CPU graphs and the legend and control buttons + +class SwitchGraphWidget(GraphWidget): + + def __init__(self, glb, collection, parent=None): + super(SwitchGraphWidget, self).__init__(parent) + + self.glb = glb + self.collection = collection + + self.back_state = [] + self.forward_state = [] + self.selection_state = (None, None) + self.fwd_rect = None + self.start_time = self.glb.StartTime(collection.machine_id) + + i = 0 + hregions = collection.hregions.values() + colours = GenerateNRandomColours(len(hregions), 1013) + region_attributes = {} + for hregion in hregions: + if hregion.pid == 0 and hregion.tid == 0: + region_attributes[hregion.key] = GraphRegionAttribute(QColor(0, 0, 0)) + else: + region_attributes[hregion.key] = GraphRegionAttribute(colours[i]) + i = i + 1 + + # Default to entire range + xsubrange = Subrange(0.0, float(collection.xrangehi - collection.xrangelo) + 1.0) + ysubrange = Subrange(0.0, float(collection.yrangehi - collection.yrangelo) + 1.0) + subrange = XY(xsubrange, ysubrange) + + scale = self.GetScaleForRange(subrange) + + self.attrs = GraphAttributes(scale, subrange, region_attributes, collection.dp) + + self.item = VertcalGraphSetGraphicsItem(collection, self.attrs, self, SwitchGraphGraphicsItem) + + self.scene = QGraphicsScene() + self.scene.addItem(self.item) + + self.view = QGraphicsView(self.scene) + self.view.centerOn(0, 0) + self.view.setAlignment(Qt.AlignLeft | Qt.AlignTop) + + self.legend = SwitchGraphLegend(collection, region_attributes) + + self.splitter = SwitchGraphSplitter() + self.splitter.addWidget(self.view) + self.splitter.addWidget(self.legend) + + self.point_label = QLabel("") + self.point_label.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Fixed) + + self.back_button = QToolButton() + self.back_button.setIcon(self.style().standardIcon(QStyle.SP_ArrowLeft)) + self.back_button.setDisabled(True) + self.back_button.released.connect(lambda: self.Back()) + + self.forward_button = QToolButton() + self.forward_button.setIcon(self.style().standardIcon(QStyle.SP_ArrowRight)) + self.forward_button.setDisabled(True) + self.forward_button.released.connect(lambda: self.Forward()) + + self.zoom_button = QToolButton() + self.zoom_button.setText("Zoom") + self.zoom_button.setDisabled(True) + self.zoom_button.released.connect(lambda: self.Zoom()) + + self.hbox = HBoxLayout(self.back_button, self.forward_button, self.zoom_button, self.point_label) + + self.vbox = VBoxLayout(self.splitter, self.hbox) + + self.setLayout(self.vbox) + + def GetScaleForRangeX(self, xsubrange): + # Default graph 1000 pixels wide + dflt = 1000.0 + r = xsubrange.hi - xsubrange.lo + return dflt / r + + def GetScaleForRangeY(self, ysubrange): + # Default graph 50 pixels high + dflt = 50.0 + r = ysubrange.hi - ysubrange.lo + return dflt / r + + def GetScaleForRange(self, subrange): + # Default graph 1000 pixels wide, 50 pixels high + xscale = self.GetScaleForRangeX(subrange.x) + yscale = self.GetScaleForRangeY(subrange.y) + return XY(xscale, yscale) + + def PointEvent(self, cpu, time_from, time_to, hregions): + text = "CPU: " + str(cpu) + time_from = time_from.quantize(Decimal(1)) + rel_time_from = time_from - self.glb.StartTime(self.collection.machine_id) + text = text + " Time: " + str(time_from) + " (+" + ToTimeStr(rel_time_from) + ")" + self.point_label.setText(text) + self.legend.Highlight(hregions) + + def RightClickEvent(self, cpu, hregion_times, pos): + if not IsSelectable(self.glb.db, "calls", "WHERE parent_id >= 0"): + return + menu = QMenu(self.view) + for hregion, time in hregion_times: + thread_at_time = (hregion.exec_comm_id, hregion.thread_id, time) + menu_text = "Show Call Tree for {} {}:{} at {}".format(hregion.comm, hregion.pid, hregion.tid, time) + menu.addAction(CreateAction(menu_text, "Show Call Tree", lambda a=None, args=thread_at_time: self.RightClickSelect(args), self.view)) + menu.exec_(pos) + + def RightClickSelect(self, args): + CallTreeWindow(self.glb, self.glb.mainwindow, thread_at_time=args) + + def NoPointEvent(self): + self.point_label.setText("") + self.legend.Highlight({}) + + def RangeEvent(self, time_from, time_to): + time_from = time_from.quantize(Decimal(1)) + time_to = time_to.quantize(Decimal(1)) + if time_to <= time_from: + self.point_label.setText("") + return + rel_time_from = time_from - self.start_time + rel_time_to = time_to - self.start_time + text = " Time: " + str(time_from) + " (+" + ToTimeStr(rel_time_from) + ") to: " + str(time_to) + " (+" + ToTimeStr(rel_time_to) + ")" + text = text + " duration: " + ToTimeStr(time_to - time_from) + self.point_label.setText(text) + + def BackState(self): + return (self.attrs.subrange, self.attrs.scale, self.selection_state, self.fwd_rect) + + def PushBackState(self): + state = copy.deepcopy(self.BackState()) + self.back_state.append(state) + self.back_button.setEnabled(True) + + def PopBackState(self): + self.attrs.subrange, self.attrs.scale, self.selection_state, self.fwd_rect = self.back_state.pop() + self.attrs.Update() + if not self.back_state: + self.back_button.setDisabled(True) + + def PushForwardState(self): + state = copy.deepcopy(self.BackState()) + self.forward_state.append(state) + self.forward_button.setEnabled(True) + + def PopForwardState(self): + self.attrs.subrange, self.attrs.scale, self.selection_state, self.fwd_rect = self.forward_state.pop() + self.attrs.Update() + if not self.forward_state: + self.forward_button.setDisabled(True) + + def Title(self): + time_from = self.collection.xrangelo + Decimal(self.attrs.subrange.x.lo) + time_to = self.collection.xrangelo + Decimal(self.attrs.subrange.x.hi) + rel_time_from = time_from - self.start_time + rel_time_to = time_to - self.start_time + title = "+" + ToTimeStr(rel_time_from) + " to +" + ToTimeStr(rel_time_to) + title = title + " (" + ToTimeStr(time_to - time_from) + ")" + return title + + def Update(self): + selected_subrange, selection_state = self.selection_state + self.item.SetSelection(selection_state) + self.item.SetBracket(self.fwd_rect) + self.zoom_button.setDisabled(selected_subrange is None) + self.GraphTitleChanged(self.Title()) + self.item.update(self.item.boundingRect()) + + def Back(self): + if not self.back_state: + return + self.PushForwardState() + self.PopBackState() + self.Update() + + def Forward(self): + if not self.forward_state: + return + self.PushBackState() + self.PopForwardState() + self.Update() + + def SelectEvent(self, x0, x1, selection_state): + if selection_state is None: + selected_subrange = None + else: + if x1 - x0 < 1.0: + x1 += 1.0 + selected_subrange = Subrange(x0, x1) + self.selection_state = (selected_subrange, selection_state) + self.zoom_button.setDisabled(selected_subrange is None) + + def Zoom(self): + selected_subrange, selection_state = self.selection_state + if selected_subrange is None: + return + self.fwd_rect = selection_state + self.item.SetSelection(None) + self.PushBackState() + self.attrs.subrange.x = selected_subrange + self.forward_state = [] + self.forward_button.setDisabled(True) + self.selection_state = (None, None) + self.fwd_rect = None + self.attrs.scale.x = self.GetScaleForRangeX(self.attrs.subrange.x) + self.attrs.Update() + self.Update() + +# Slow initialization - perform non-GUI initialization in a separate thread and put up a modal message box while waiting + +class SlowInitClass(): + + def __init__(self, glb, title, init_fn): + self.init_fn = init_fn + self.done = False + self.result = None + + self.msg_box = QMessageBox(glb.mainwindow) + self.msg_box.setText("Initializing " + title + ". Please wait.") + self.msg_box.setWindowTitle("Initializing " + title) + self.msg_box.setWindowIcon(glb.mainwindow.style().standardIcon(QStyle.SP_MessageBoxInformation)) + + self.init_thread = Thread(self.ThreadFn, glb) + self.init_thread.done.connect(lambda: self.Done(), Qt.QueuedConnection) + + self.init_thread.start() + + def Done(self): + self.msg_box.done(0) + + def ThreadFn(self, glb): + conn_name = "SlowInitClass" + str(os.getpid()) + db, dbname = glb.dbref.Open(conn_name) + self.result = self.init_fn(db) + self.done = True + return (True, 0) + + def Result(self): + while not self.done: + self.msg_box.exec_() + self.init_thread.wait() + return self.result + +def SlowInit(glb, title, init_fn): + init = SlowInitClass(glb, title, init_fn) + return init.Result() + +# Time chart by CPU window + +class TimeChartByCPUWindow(QMdiSubWindow): + + def __init__(self, glb, parent=None): + super(TimeChartByCPUWindow, self).__init__(parent) + + self.glb = glb + self.machine_id = glb.HostMachineId() + self.collection_name = "SwitchGraphDataCollection " + str(self.machine_id) + + collection = LookupModel(self.collection_name) + if collection is None: + collection = SlowInit(glb, "Time Chart", self.Init) + + self.widget = SwitchGraphWidget(glb, collection, self) + self.view = self.widget + + self.base_title = "Time Chart by CPU" + self.setWindowTitle(self.base_title + self.widget.Title()) + self.widget.graph_title_changed.connect(self.GraphTitleChanged) + + self.setWidget(self.widget) + + AddSubWindow(glb.mainwindow.mdi_area, self, self.windowTitle()) + + def Init(self, db): + return LookupCreateModel(self.collection_name, lambda : SwitchGraphDataCollection(self.glb, db, self.machine_id)) + + def GraphTitleChanged(self, title): + self.setWindowTitle(self.base_title + " : " + title) + # Child data item finder class ChildDataItemFinder(): @@ -2058,10 +3429,8 @@ class SampleTimeRangesDataItem(LineEditDataItem): QueryExec(query, "SELECT id, time FROM samples ORDER BY id DESC LIMIT 1") if query.next(): self.last_id = int(query.value(0)) - self.last_time = int(query.value(1)) - QueryExec(query, "SELECT time FROM samples WHERE time != 0 ORDER BY id LIMIT 1") - if query.next(): - self.first_time = int(query.value(0)) + self.first_time = int(glb.HostStartTime()) + self.last_time = int(glb.HostFinishTime()) if placeholder_text: placeholder_text += ", between " + str(self.first_time) + " and " + str(self.last_time) @@ -2954,7 +4323,9 @@ p.c2 { <p class=c2><a href=#allbranches>1.3 All branches</a></p> <p class=c2><a href=#selectedbranches>1.4 Selected branches</a></p> <p class=c2><a href=#topcallsbyelapsedtime>1.5 Top calls by elapsed time</a></p> -<p class=c1><a href=#tables>2. Tables</a></p> +<p class=c1><a href=#charts>2. Charts</a></p> +<p class=c2><a href=#timechartbycpu>2.1 Time chart by CPU</a></p> +<p class=c1><a href=#tables>3. Tables</a></p> <h1 id=reports>1. Reports</h1> <h2 id=callgraph>1.1 Context-Sensitive Call Graph</h2> The result is a GUI window with a tree representing a context-sensitive @@ -3042,7 +4413,29 @@ N.B. Due to the granularity of timestamps, there could be no branches in any giv The Top calls by elapsed time report displays calls in descending order of time elapsed between when the function was called and when it returned. The data is reduced by various selection criteria. A dialog box displays available criteria which are AND'ed together. If not all data is fetched, a Fetch bar is provided. Ctrl-F displays a Find bar. -<h1 id=tables>2. Tables</h1> +<h1 id=charts>2. Charts</h1> +<h2 id=timechartbycpu>2.1 Time chart by CPU</h2> +This chart displays context switch information when that data is available. Refer to context_switches_view on the Tables menu. +<h3>Features</h3> +<ol> +<li>Mouse over to highight the task and show the time</li> +<li>Drag the mouse to select a region and zoom by pushing the Zoom button</li> +<li>Go back and forward by pressing the arrow buttons</li> +<li>If call information is available, right-click to show a call tree opened to that task and time. +Note, the call tree may take some time to appear, and there may not be call information for the task or time selected. +</li> +</ol> +<h3>Important</h3> +The graph can be misleading in the following respects: +<ol> +<li>The graph shows the first task on each CPU as running from the beginning of the time range. +Because tracing might start on different CPUs at different times, that is not necessarily the case. +Refer to context_switches_view on the Tables menu to understand what data the graph is based upon.</li> +<li>Similarly, the last task on each CPU can be showing running longer than it really was. +Again, refer to context_switches_view on the Tables menu to understand what data the graph is based upon.</li> +<li>When the mouse is over a task, the highlighted task might not be visible on the legend without scrolling if the legend does not fit fully in the window</li> +</ol> +<h1 id=tables>3. Tables</h1> The Tables menu shows all tables and views in the database. Most tables have an associated view which displays the information in a more friendly way. Not all data for large tables is fetched immediately. More records can be fetched using the Fetch bar provided. Columns can be sorted, @@ -3238,6 +4631,10 @@ class MainWindow(QMainWindow): if IsSelectable(glb.db, "calls"): reports_menu.addAction(CreateAction("&Top calls by elapsed time", "Create a new window displaying top calls by elapsed time", self.NewTopCalls, self)) + if IsSelectable(glb.db, "context_switches"): + charts_menu = menu.addMenu("&Charts") + charts_menu.addAction(CreateAction("&Time chart by CPU", "Create a new window displaying time charts by CPU", self.TimeChartByCPU, self)) + self.TableMenu(GetTableList(glb), menu) self.window_menu = WindowMenu(self.mdi_area, menu) @@ -3298,6 +4695,9 @@ class MainWindow(QMainWindow): label = "Selected branches" if branches_events == 1 else "Selected branches " + "(id=" + dbid + ")" reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda a=None,x=dbid: self.NewSelectedBranchView(x), self)) + def TimeChartByCPU(self): + TimeChartByCPUWindow(self.glb, self) + def TableMenu(self, tables, menu): table_menu = menu.addMenu("&Tables") for table in tables: @@ -3470,6 +4870,9 @@ class Glb(): self.have_disassembler = True except: self.have_disassembler = False + self.host_machine_id = 0 + self.host_start_time = 0 + self.host_finish_time = 0 def FileFromBuildId(self, build_id): file_name = self.buildid_dir + build_id[0:2] + "/" + build_id[2:] + "/elf" @@ -3502,6 +4905,110 @@ class Glb(): except: pass + def GetHostMachineId(self): + query = QSqlQuery(self.db) + QueryExec(query, "SELECT id FROM machines WHERE pid = -1") + if query.next(): + self.host_machine_id = query.value(0) + else: + self.host_machine_id = 0 + return self.host_machine_id + + def HostMachineId(self): + if self.host_machine_id: + return self.host_machine_id + return self.GetHostMachineId() + + def SelectValue(self, sql): + query = QSqlQuery(self.db) + try: + QueryExec(query, sql) + except: + return None + if query.next(): + return Decimal(query.value(0)) + return None + + def SwitchesMinTime(self, machine_id): + return self.SelectValue("SELECT time" + " FROM context_switches" + " WHERE time != 0 AND machine_id = " + str(machine_id) + + " ORDER BY id LIMIT 1") + + def SwitchesMaxTime(self, machine_id): + return self.SelectValue("SELECT time" + " FROM context_switches" + " WHERE time != 0 AND machine_id = " + str(machine_id) + + " ORDER BY id DESC LIMIT 1") + + def SamplesMinTime(self, machine_id): + return self.SelectValue("SELECT time" + " FROM samples" + " WHERE time != 0 AND machine_id = " + str(machine_id) + + " ORDER BY id LIMIT 1") + + def SamplesMaxTime(self, machine_id): + return self.SelectValue("SELECT time" + " FROM samples" + " WHERE time != 0 AND machine_id = " + str(machine_id) + + " ORDER BY id DESC LIMIT 1") + + def CallsMinTime(self, machine_id): + return self.SelectValue("SELECT calls.call_time" + " FROM calls" + " INNER JOIN threads ON threads.thread_id = calls.thread_id" + " WHERE calls.call_time != 0 AND threads.machine_id = " + str(machine_id) + + " ORDER BY calls.id LIMIT 1") + + def CallsMaxTime(self, machine_id): + return self.SelectValue("SELECT calls.return_time" + " FROM calls" + " INNER JOIN threads ON threads.thread_id = calls.thread_id" + " WHERE calls.return_time != 0 AND threads.machine_id = " + str(machine_id) + + " ORDER BY calls.return_time DESC LIMIT 1") + + def GetStartTime(self, machine_id): + t0 = self.SwitchesMinTime(machine_id) + t1 = self.SamplesMinTime(machine_id) + t2 = self.CallsMinTime(machine_id) + if t0 is None or (not(t1 is None) and t1 < t0): + t0 = t1 + if t0 is None or (not(t2 is None) and t2 < t0): + t0 = t2 + return t0 + + def GetFinishTime(self, machine_id): + t0 = self.SwitchesMaxTime(machine_id) + t1 = self.SamplesMaxTime(machine_id) + t2 = self.CallsMaxTime(machine_id) + if t0 is None or (not(t1 is None) and t1 > t0): + t0 = t1 + if t0 is None or (not(t2 is None) and t2 > t0): + t0 = t2 + return t0 + + def HostStartTime(self): + if self.host_start_time: + return self.host_start_time + self.host_start_time = self.GetStartTime(self.HostMachineId()) + return self.host_start_time + + def HostFinishTime(self): + if self.host_finish_time: + return self.host_finish_time + self.host_finish_time = self.GetFinishTime(self.HostMachineId()) + return self.host_finish_time + + def StartTime(self, machine_id): + if machine_id == self.HostMachineId(): + return self.HostStartTime() + return self.GetStartTime(machine_id) + + def FinishTime(self, machine_id): + if machine_id == self.HostMachineId(): + return self.HostFinishTime() + return self.GetFinishTime(machine_id) + # Database reference class DBRef(): diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index 338cd9faa835..a4cd30c0beb3 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -13,6 +13,7 @@ #include "util/mmap.h" #include <errno.h> #include <linux/string.h> +#include <perf/mmap.h> #define NR_ITERS 111 @@ -37,8 +38,8 @@ static int count_samples(struct evlist *evlist, int *sample_count, struct mmap *map = &evlist->overwrite_mmap[i]; union perf_event *event; - perf_mmap__read_init(map); - while ((event = perf_mmap__read_event(map)) != NULL) { + perf_mmap__read_init(&map->core); + while ((event = perf_mmap__read_event(&map->core)) != NULL) { const u32 type = event->header.type; switch (type) { @@ -53,7 +54,7 @@ static int count_samples(struct evlist *evlist, int *sample_count, return TEST_FAIL; } } - perf_mmap__read_done(map); + perf_mmap__read_done(&map->core); } return TEST_OK; } diff --git a/tools/perf/tests/bp_account.c b/tools/perf/tests/bp_account.c index 016bba2c142d..d0b935356274 100644 --- a/tools/perf/tests/bp_account.c +++ b/tools/perf/tests/bp_account.c @@ -10,11 +10,7 @@ #include <unistd.h> #include <string.h> #include <sys/ioctl.h> -#include <time.h> #include <fcntl.h> -#include <signal.h> -#include <sys/mman.h> -#include <linux/compiler.h> #include <linux/hw_breakpoint.h> #include "tests.h" @@ -192,3 +188,19 @@ int test__bp_accounting(struct test *test __maybe_unused, int subtest __maybe_un return bp_accounting(wp_cnt, share); } + +bool test__bp_account_is_supported(void) +{ + /* + * PowerPC and S390 do not support creation of instruction + * breakpoints using the perf_event interface. + * + * Just disable the test for these architectures until these + * issues are resolved. + */ +#if defined(__powerpc__) || defined(__s390x__) + return false; +#else + return true; +#endif +} diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c index c1c2c13de254..166f411568a5 100644 --- a/tools/perf/tests/bp_signal.c +++ b/tools/perf/tests/bp_signal.c @@ -49,14 +49,6 @@ asm ( "__test_function:\n" "incq (%rdi)\n" "ret\n"); -#elif defined (__aarch64__) -extern void __test_function(volatile long *ptr); -asm ( - ".globl __test_function\n" - "__test_function:\n" - "str x30, [x0]\n" - "ret\n"); - #else static void __test_function(volatile long *ptr) { @@ -302,10 +294,15 @@ bool test__bp_signal_is_supported(void) * stepping into the SIGIO handler and getting stuck on the * breakpointed instruction. * + * Since arm64 has the same issue with arm for the single-step + * handling, this case also gets suck on the breakpointed + * instruction. + * * Just disable the test for these architectures until these * issues are resolved. */ -#if defined(__powerpc__) || defined(__s390x__) || defined(__arm__) +#if defined(__powerpc__) || defined(__s390x__) || defined(__arm__) || \ + defined(__aarch64__) return false; #else return true; diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 1eb0bffaed6c..5d20bf8397f0 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -15,6 +15,7 @@ #include <linux/string.h> #include <api/fs/fs.h> #include <bpf/bpf.h> +#include <perf/mmap.h> #include "tests.h" #include "llvm.h" #include "debug.h" @@ -184,16 +185,16 @@ static int do_test(struct bpf_object *obj, int (*func)(void), struct mmap *md; md = &evlist->mmap[i]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) continue; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { const u32 type = event->header.type; if (type == PERF_RECORD_SAMPLE) count ++; } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); } if (count != expect) { diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 55774baffc2a..8b286e9b7549 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -121,7 +121,7 @@ static struct test generic_tests[] = { { .desc = "Breakpoint accounting", .func = test__bp_accounting, - .is_supported = test__bp_signal_is_supported, + .is_supported = test__bp_account_is_supported, }, { .desc = "Watchpoint", diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index f5764a3890b9..1f017e1b2a55 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -10,6 +10,7 @@ #include <sys/param.h> #include <perf/cpumap.h> #include <perf/evlist.h> +#include <perf/mmap.h> #include "debug.h" #include "dso.h" @@ -425,16 +426,16 @@ static int process_events(struct machine *machine, struct evlist *evlist, for (i = 0; i < evlist->core.nr_mmaps; i++) { md = &evlist->mmap[i]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) continue; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { ret = process_event(machine, evlist, event, state); - perf_mmap__consume(md); + perf_mmap__consume(&md->core); if (ret < 0) return ret; } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); } return 0; } diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index 92c7d591bcac..50a0c9fcde7d 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -5,6 +5,7 @@ #include <sys/prctl.h> #include <perf/cpumap.h> #include <perf/evlist.h> +#include <perf/mmap.h> #include "debug.h" #include "parse-events.h" @@ -38,17 +39,17 @@ static int find_comm(struct evlist *evlist, const char *comm) found = 0; for (i = 0; i < evlist->core.nr_mmaps; i++) { md = &evlist->mmap[i]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) continue; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { if (event->header.type == PERF_RECORD_COMM && (pid_t)event->comm.pid == getpid() && (pid_t)event->comm.tid == getpid() && strcmp(event->comm.comm, comm) == 0) found += 1; - perf_mmap__consume(md); + perf_mmap__consume(&md->core); } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); } return found; } diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 3a22dce991ba..5f4c0dbb4715 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -16,6 +16,7 @@ #include <linux/kernel.h> #include <linux/string.h> #include <perf/evlist.h> +#include <perf/mmap.h> /* * This test will generate random numbers of calls to some getpid syscalls, @@ -113,10 +114,10 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse } md = &evlist->mmap[0]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) goto out_init; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { struct perf_sample sample; if (event->header.type != PERF_RECORD_SAMPLE) { @@ -139,9 +140,9 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse goto out_delete_evlist; } nr_events[evsel->idx]++; - perf_mmap__consume(md); + perf_mmap__consume(&md->core); } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); out_init: err = 0; diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index 2b5c46813053..c6b2d7aab608 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -13,6 +13,7 @@ #include "debug.h" #include "util/mmap.h" #include <errno.h> +#include <perf/mmap.h> #ifndef O_DIRECTORY #define O_DIRECTORY 00200000 @@ -92,10 +93,10 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest struct mmap *md; md = &evlist->mmap[i]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) continue; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { const u32 type = event->header.type; int tp_flags; struct perf_sample sample; @@ -103,7 +104,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest ++nr_events; if (type != PERF_RECORD_SAMPLE) { - perf_mmap__consume(md); + perf_mmap__consume(&md->core); continue; } @@ -123,7 +124,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest goto out_ok; } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); } if (nr_events == before) diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 437426be29e9..2195fc205e72 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -6,6 +6,7 @@ #include <pthread.h> #include <sched.h> +#include <perf/mmap.h> #include "evlist.h" #include "evsel.h" #include "debug.h" @@ -170,10 +171,10 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus struct mmap *md; md = &evlist->mmap[i]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) continue; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { const u32 type = event->header.type; const char *name = perf_event__name(type); @@ -276,9 +277,9 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus ++errs; } - perf_mmap__consume(md); + perf_mmap__consume(&md->core); } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); } /* diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 84519df87f30..bfb9986093d8 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -15,6 +15,7 @@ #include "util/mmap.h" #include "util/thread_map.h" #include <perf/evlist.h> +#include <perf/mmap.h> #define NR_LOOPS 10000000 @@ -99,10 +100,10 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) evlist__disable(evlist); md = &evlist->mmap[0]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) goto out_init; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { struct perf_sample sample; if (event->header.type != PERF_RECORD_SAMPLE) @@ -117,9 +118,9 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) total_periods += sample.period; nr_samples++; next_event: - perf_mmap__consume(md); + perf_mmap__consume(&md->core); } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); out_init: if ((u64) nr_samples == total_periods) { diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index ffa592e0020e..fcb0d03dba4e 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -8,6 +8,7 @@ #include <linux/zalloc.h> #include <perf/cpumap.h> #include <perf/evlist.h> +#include <perf/mmap.h> #include "debug.h" #include "parse-events.h" @@ -269,17 +270,17 @@ static int process_events(struct evlist *evlist, for (i = 0; i < evlist->core.nr_mmaps; i++) { md = &evlist->mmap[i]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) continue; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { cnt += 1; ret = add_event(evlist, &events, event); - perf_mmap__consume(md); + perf_mmap__consume(&md->core); if (ret < 0) goto out_free_nodes; } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); } events_array = calloc(cnt, sizeof(struct event_node)); diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index bce3a4cb4c89..adaff9044331 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -12,6 +12,7 @@ #include <linux/string.h> #include <perf/cpumap.h> #include <perf/evlist.h> +#include <perf/mmap.h> static int exited; static int nr_exit; @@ -53,6 +54,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused struct perf_cpu_map *cpus; struct perf_thread_map *threads; struct mmap *md; + int retry_count = 0; signal(SIGCHLD, sig_handler); @@ -110,6 +112,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused if (evlist__mmap(evlist, 128) < 0) { pr_debug("failed to mmap events: %d (%s)\n", errno, str_error_r(errno, sbuf, sizeof(sbuf))); + err = -1; goto out_delete_evlist; } @@ -117,20 +120,27 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused retry: md = &evlist->mmap[0]; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) goto out_init; - while ((event = perf_mmap__read_event(md)) != NULL) { + while ((event = perf_mmap__read_event(&md->core)) != NULL) { if (event->header.type == PERF_RECORD_EXIT) nr_exit++; - perf_mmap__consume(md); + perf_mmap__consume(&md->core); } - perf_mmap__read_done(md); + perf_mmap__read_done(&md->core); out_init: if (!exited || !nr_exit) { evlist__poll(evlist, -1); + + if (retry_count++ > 1000) { + pr_debug("Failed after retrying 1000 times\n"); + err = -1; + goto out_free_maps; + } + goto retry; } diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 72912eb473cb..9837b6e93023 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -111,6 +111,7 @@ int test__map_groups__merge_in(struct test *t, int subtest); int test__time_utils(struct test *t, int subtest); bool test__bp_signal_is_supported(void); +bool test__bp_account_is_supported(void); bool test__wp_is_supported(void); #if defined(__arm__) || defined(__aarch64__) diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build index afa75a76f6b8..433dc39053a7 100644 --- a/tools/perf/trace/beauty/Build +++ b/tools/perf/trace/beauty/Build @@ -17,3 +17,4 @@ perf-y += sockaddr.o perf-y += socket.o perf-y += statx.o perf-y += sync_file_range.o +perf-y += tracepoints/ diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index 7e06605f7c76..5a61043c2ff7 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -5,9 +5,10 @@ #include <linux/kernel.h> #include <linux/types.h> #include <sys/types.h> +#include <stdbool.h> struct strarray { - int offset; + u64 offset; int nr_entries; const char *prefix; const char **entries; @@ -27,8 +28,12 @@ struct strarray { } size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, bool show_prefix, int val); +size_t strarray__scnprintf_suffix(struct strarray *sa, char *bf, size_t size, const char *intfmt, bool show_suffix, int val); size_t strarray__scnprintf_flags(struct strarray *sa, char *bf, size_t size, bool show_prefix, unsigned long flags); +bool strarray__strtoul(struct strarray *sa, char *bf, size_t size, u64 *ret); +bool strarray__strtoul_flags(struct strarray *sa, char *bf, size_t size, u64 *ret); + struct trace; struct thread; @@ -51,6 +56,8 @@ struct strarrays { size_t strarrays__scnprintf(struct strarrays *sas, char *bf, size_t size, const char *intfmt, bool show_prefix, int val); +bool strarrays__strtoul(struct strarrays *sas, char *bf, size_t size, u64 *ret); + size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size); extern struct strarray strarray__socket_families; @@ -78,8 +85,11 @@ struct augmented_arg { u64 value[]; }; +struct syscall_arg_fmt; + /** * @val: value of syscall argument being formatted + * @len: for tracepoint dynamic arrays, if fmt->nr_entries == 0, then its not a fixed array, look at arg->len * @args: All the args, use syscall_args__val(arg, nth) to access one * @augmented_args: Extra data that can be collected, for instance, with eBPF for expanding the pathname for open, etc * @augmented_args_size: augmented_args total payload size @@ -94,6 +104,7 @@ struct augmented_arg { struct syscall_arg { unsigned long val; unsigned char *args; + struct syscall_arg_fmt *fmt; struct { struct augmented_arg *args; int size; @@ -101,6 +112,7 @@ struct syscall_arg { struct thread *thread; struct trace *trace; void *parm; + u16 len; u8 idx; u8 mask; bool show_string_prefix; @@ -111,6 +123,27 @@ unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx); size_t syscall_arg__scnprintf_strarray_flags(char *bf, size_t size, struct syscall_arg *arg); #define SCA_STRARRAY_FLAGS syscall_arg__scnprintf_strarray_flags +bool syscall_arg__strtoul_strarray(char *bf, size_t size, struct syscall_arg *arg, u64 *ret); +#define STUL_STRARRAY syscall_arg__strtoul_strarray + +bool syscall_arg__strtoul_strarray_flags(char *bf, size_t size, struct syscall_arg *arg, u64 *ret); +#define STUL_STRARRAY_FLAGS syscall_arg__strtoul_strarray_flags + +bool syscall_arg__strtoul_strarrays(char *bf, size_t size, struct syscall_arg *arg, u64 *ret); +#define STUL_STRARRAYS syscall_arg__strtoul_strarrays + +size_t syscall_arg__scnprintf_x86_irq_vectors(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_X86_IRQ_VECTORS syscall_arg__scnprintf_x86_irq_vectors + +bool syscall_arg__strtoul_x86_irq_vectors(char *bf, size_t size, struct syscall_arg *arg, u64 *ret); +#define STUL_X86_IRQ_VECTORS syscall_arg__strtoul_x86_irq_vectors + +size_t syscall_arg__scnprintf_x86_MSR(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_X86_MSR syscall_arg__scnprintf_x86_MSR + +bool syscall_arg__strtoul_x86_MSR(char *bf, size_t size, struct syscall_arg *arg, u64 *ret); +#define STUL_X86_MSR syscall_arg__strtoul_x86_MSR + size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, struct syscall_arg *arg); #define SCA_STRARRAYS syscall_arg__scnprintf_strarrays diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c index 859a8a9db2c6..9fa771a90d79 100644 --- a/tools/perf/trace/beauty/mmap.c +++ b/tools/perf/trace/beauty/mmap.c @@ -33,11 +33,11 @@ static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot -static size_t mmap__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix) -{ #include "trace/beauty/generated/mmap_flags_array.c" static DEFINE_STRARRAY(mmap_flags, "MAP_"); +static size_t mmap__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix) +{ return strarray__scnprintf_flags(&strarray__mmap_flags, bf, size, show_prefix, flags); } diff --git a/tools/perf/trace/beauty/tracepoints/Build b/tools/perf/trace/beauty/tracepoints/Build new file mode 100644 index 000000000000..e35087fdd108 --- /dev/null +++ b/tools/perf/trace/beauty/tracepoints/Build @@ -0,0 +1,2 @@ +perf-y += x86_irq_vectors.o +perf-y += x86_msr.o diff --git a/tools/perf/trace/beauty/tracepoints/x86_irq_vectors.c b/tools/perf/trace/beauty/tracepoints/x86_irq_vectors.c new file mode 100644 index 000000000000..8eb9bc8534ac --- /dev/null +++ b/tools/perf/trace/beauty/tracepoints/x86_irq_vectors.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * trace/beauty/x86_irq_vectors.c + * + * Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> + */ + +#include "trace/beauty/beauty.h" + +#include "trace/beauty/generated/x86_arch_irq_vectors_array.c" + +static DEFINE_STRARRAY(x86_irq_vectors, "_VECTOR"); + +static size_t x86_irq_vectors__scnprintf(unsigned long vector, char *bf, size_t size, bool show_prefix) +{ + return strarray__scnprintf_suffix(&strarray__x86_irq_vectors, bf, size, "%#x", show_prefix, vector); +} + +size_t syscall_arg__scnprintf_x86_irq_vectors(char *bf, size_t size, struct syscall_arg *arg) +{ + unsigned long vector = arg->val; + + return x86_irq_vectors__scnprintf(vector, bf, size, arg->show_string_prefix); +} + +bool syscall_arg__strtoul_x86_irq_vectors(char *bf, size_t size, struct syscall_arg *arg __maybe_unused, u64 *ret) +{ + return strarray__strtoul(&strarray__x86_irq_vectors, bf, size, ret); +} diff --git a/tools/perf/trace/beauty/tracepoints/x86_irq_vectors.sh b/tools/perf/trace/beauty/tracepoints/x86_irq_vectors.sh new file mode 100755 index 000000000000..f920003723b3 --- /dev/null +++ b/tools/perf/trace/beauty/tracepoints/x86_irq_vectors.sh @@ -0,0 +1,27 @@ +#!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 +# (C) 2019, Arnaldo Carvalho de Melo <acme@redhat.com> + +if [ $# -ne 1 ] ; then + arch_x86_header_dir=tools/arch/x86/include/asm/ +else + arch_x86_header_dir=$1 +fi + +x86_irq_vectors=${arch_x86_header_dir}/irq_vectors.h + +# FIRST_EXTERNAL_VECTOR is not that useful, find what is its number +# and then replace whatever is using it and that is useful, which at +# the time of writing of this script was: IRQ_MOVE_CLEANUP_VECTOR. + +first_external_regex='^#define[[:space:]]+FIRST_EXTERNAL_VECTOR[[:space:]]+(0x[[:xdigit:]]+)$' +first_external_vector=$(egrep ${first_external_regex} ${x86_irq_vectors} | sed -r "s/${first_external_regex}/\1/g") + +printf "static const char *x86_irq_vectors[] = {\n" +regex='^#define[[:space:]]+([[:alnum:]_]+)_VECTOR[[:space:]]+(0x[[:xdigit:]]+)$' +sed -r "s/FIRST_EXTERNAL_VECTOR/${first_external_vector}/g" ${x86_irq_vectors} | \ +egrep ${regex} | \ + sed -r "s/${regex}/\2 \1/g" | sort -n | \ + xargs printf "\t[%s] = \"%s\",\n" +printf "};\n\n" + diff --git a/tools/perf/trace/beauty/tracepoints/x86_msr.c b/tools/perf/trace/beauty/tracepoints/x86_msr.c new file mode 100644 index 000000000000..6b8f129579d6 --- /dev/null +++ b/tools/perf/trace/beauty/tracepoints/x86_msr.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * trace/beauty/x86_msr.c + * + * Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> + */ + +#include "trace/beauty/beauty.h" + +#include "trace/beauty/generated/x86_arch_MSRs_array.c" + +static DEFINE_STRARRAY(x86_MSRs, "MSR_"); +static DEFINE_STRARRAY_OFFSET(x86_64_specific_MSRs, "MSR_", x86_64_specific_MSRs_offset); +static DEFINE_STRARRAY_OFFSET(x86_AMD_V_KVM_MSRs, "MSR_", x86_AMD_V_KVM_MSRs_offset); + +static struct strarray *x86_MSRs_tables[] = { + &strarray__x86_MSRs, + &strarray__x86_64_specific_MSRs, + &strarray__x86_AMD_V_KVM_MSRs, +}; + +static DEFINE_STRARRAYS(x86_MSRs_tables); + +static size_t x86_MSR__scnprintf(unsigned long msr, char *bf, size_t size, bool show_prefix) +{ + return strarrays__scnprintf(&strarrays__x86_MSRs_tables, bf, size, "%#x", show_prefix, msr); +} + +size_t syscall_arg__scnprintf_x86_MSR(char *bf, size_t size, struct syscall_arg *arg) +{ + unsigned long flags = arg->val; + + return x86_MSR__scnprintf(flags, bf, size, arg->show_string_prefix); +} + +bool syscall_arg__strtoul_x86_MSR(char *bf, size_t size, struct syscall_arg *arg __maybe_unused, u64 *ret) +{ + return strarrays__strtoul(&strarrays__x86_MSRs_tables, bf, size, ret); +} diff --git a/tools/perf/trace/beauty/tracepoints/x86_msr.sh b/tools/perf/trace/beauty/tracepoints/x86_msr.sh new file mode 100755 index 000000000000..831c02cf0586 --- /dev/null +++ b/tools/perf/trace/beauty/tracepoints/x86_msr.sh @@ -0,0 +1,40 @@ +#!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 + +if [ $# -ne 1 ] ; then + arch_x86_header_dir=tools/arch/x86/include/asm/ +else + arch_x86_header_dir=$1 +fi + +x86_msr_index=${arch_x86_header_dir}/msr-index.h + +# Support all later, with some hash table, for now chop off +# Just the ones starting with 0x00000 so as to have a simple +# array. + +printf "static const char *x86_MSRs[] = {\n" +regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MSR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0x00000[[:xdigit:]]+)[[:space:]]*.*' +egrep $regex ${x86_msr_index} | egrep -v 'MSR_(ATOM|P[46]|AMD64|IA32_TSCDEADLINE|IDT_FCR4)' | \ + sed -r "s/$regex/\2 \1/g" | sort -n | \ + xargs printf "\t[%s] = \"%s\",\n" +printf "};\n\n" + +# Remove MSR_K6_WHCR, clashes with MSR_LSTAR +regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MSR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0xc0000[[:xdigit:]]+)[[:space:]]*.*' +printf "#define x86_64_specific_MSRs_offset " +egrep $regex ${x86_msr_index} | sed -r "s/$regex/\2/g" | sort -n | head -1 +printf "static const char *x86_64_specific_MSRs[] = {\n" +egrep $regex ${x86_msr_index} | \ + sed -r "s/$regex/\2 \1/g" | egrep -vw 'K6_WHCR' | sort -n | \ + xargs printf "\t[%s - x86_64_specific_MSRs_offset] = \"%s\",\n" +printf "};\n\n" + +regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MSR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0xc0010[[:xdigit:]]+)[[:space:]]*.*' +printf "#define x86_AMD_V_KVM_MSRs_offset " +egrep $regex ${x86_msr_index} | sed -r "s/$regex/\2/g" | sort -n | head -1 +printf "static const char *x86_AMD_V_KVM_MSRs[] = {\n" +egrep $regex ${x86_msr_index} | \ + sed -r "s/$regex/\2 \1/g" | sort -n | \ + xargs printf "\t[%s - x86_AMD_V_KVM_MSRs_offset] = \"%s\",\n" +printf "};\n" diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 8dcfca1a882f..39814b1806a6 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -95,6 +95,7 @@ perf-y += cloexec.o perf-y += call-path.o perf-y += rwsem.o perf-y += thread-stack.o +perf-y += spark.o perf-$(CONFIG_AUXTRACE) += auxtrace.o perf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ perf-$(CONFIG_AUXTRACE) += intel-pt.o diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index e42bf572358c..ef1866a902c4 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -43,6 +43,7 @@ #include <linux/string.h> #include <bpf/libbpf.h> #include <subcmd/parse-options.h> +#include <subcmd/run-command.h> /* FIXME: For the HE_COLORSET */ #include "ui/browser.h" @@ -853,6 +854,10 @@ static int __symbol__account_cycles(struct cyc_hist *ch, ch[offset].start < start) return 0; } + + if (ch[offset].num < NUM_SPARKS) + ch[offset].cycles_spark[ch[offset].num] = cycles; + ch[offset].have_start = have_start; ch[offset].start = start; ch[offset].cycles += cycles; @@ -1485,44 +1490,26 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start * means that it's not a disassembly line so should be treated differently. * The ops.raw part will be parsed further according to type of the instruction. */ -static int symbol__parse_objdump_line(struct symbol *sym, FILE *file, +static int symbol__parse_objdump_line(struct symbol *sym, struct annotate_args *args, - int *line_nr) + char *parsed_line, int *line_nr) { struct map *map = args->ms.map; struct annotation *notes = symbol__annotation(sym); struct disasm_line *dl; - char *line = NULL, *parsed_line, *tmp, *tmp2; - size_t line_len; + char *tmp; s64 line_ip, offset = -1; regmatch_t match[2]; - if (getline(&line, &line_len, file) < 0) - return -1; - - if (!line) - return -1; - - line_ip = -1; - parsed_line = strim(line); - /* /filename:linenr ? Save line number and ignore. */ if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) { *line_nr = atoi(parsed_line + match[1].rm_so); return 0; } - tmp = skip_spaces(parsed_line); - if (*tmp) { - /* - * Parse hexa addresses followed by ':' - */ - line_ip = strtoull(tmp, &tmp2, 16); - if (*tmp2 != ':' || tmp == tmp2 || tmp2[1] == '\0') - line_ip = -1; - } - - if (line_ip != -1) { + /* Process hex address followed by ':'. */ + line_ip = strtoull(parsed_line, &tmp, 16); + if (parsed_line != tmp && tmp[0] == ':' && tmp[1] != '\0') { u64 start = map__rip_2objdump(map, sym->start), end = map__rip_2objdump(map, sym->end); @@ -1530,7 +1517,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, FILE *file, if ((u64)line_ip < start || (u64)line_ip >= end) offset = -1; else - parsed_line = tmp2 + 1; + parsed_line = tmp + 1; } args->offset = offset; @@ -1539,7 +1526,6 @@ static int symbol__parse_objdump_line(struct symbol *sym, FILE *file, args->ms.sym = sym; dl = disasm_line__new(args); - free(line); (*line_nr)++; if (dl == NULL) @@ -1857,6 +1843,67 @@ static int symbol__disassemble_bpf(struct symbol *sym __maybe_unused, } #endif // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT) +/* + * Possibly create a new version of line with tabs expanded. Returns the + * existing or new line, storage is updated if a new line is allocated. If + * allocation fails then NULL is returned. + */ +static char *expand_tabs(char *line, char **storage, size_t *storage_len) +{ + size_t i, src, dst, len, new_storage_len, num_tabs; + char *new_line; + size_t line_len = strlen(line); + + for (num_tabs = 0, i = 0; i < line_len; i++) + if (line[i] == '\t') + num_tabs++; + + if (num_tabs == 0) + return line; + + /* + * Space for the line and '\0', less the leading and trailing + * spaces. Each tab may introduce 7 additional spaces. + */ + new_storage_len = line_len + 1 + (num_tabs * 7); + + new_line = malloc(new_storage_len); + if (new_line == NULL) { + pr_err("Failure allocating memory for tab expansion\n"); + return NULL; + } + + /* + * Copy regions starting at src and expand tabs. If there are two + * adjacent tabs then 'src == i', the memcpy is of size 0 and the spaces + * are inserted. + */ + for (i = 0, src = 0, dst = 0; i < line_len && num_tabs; i++) { + if (line[i] == '\t') { + len = i - src; + memcpy(&new_line[dst], &line[src], len); + dst += len; + new_line[dst++] = ' '; + while (dst % 8 != 0) + new_line[dst++] = ' '; + src = i + 1; + num_tabs--; + } + } + + /* Expand the last region. */ + len = line_len + 1 - src; + memcpy(&new_line[dst], &line[src], len); + dst += len; + new_line[dst] = '\0'; + + free(*storage); + *storage = new_line; + *storage_len = new_storage_len; + return new_line; + +} + static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) { struct annotation_options *opts = args->options; @@ -1868,10 +1915,19 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) struct kcore_extract kce; bool delete_extract = false; bool decomp = false; - int stdout_fd[2]; int lineno = 0; int nline; - pid_t pid; + char *line; + size_t line_len; + const char *objdump_argv[] = { + "/bin/sh", + "-c", + NULL, /* Will be the objdump command to run. */ + "--", + NULL, /* Will be the symfs path. */ + NULL, + }; + struct child_process objdump_process; int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename)); if (err) @@ -1901,7 +1957,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) if (dso__decompress_kmodule_path(dso, symfs_filename, tmp, sizeof(tmp)) < 0) - goto out; + return -1; decomp = true; strcpy(symfs_filename, tmp); @@ -1910,13 +1966,13 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) err = asprintf(&command, "%s %s%s --start-address=0x%016" PRIx64 " --stop-address=0x%016" PRIx64 - " -l -d %s %s -C \"$1\" 2>/dev/null|grep -v \"$1:\"|expand", + " -l -d %s %s -C \"$1\"", opts->objdump_path ?: "objdump", opts->disassembler_style ? "-M " : "", opts->disassembler_style ?: "", map__rip_2objdump(map, sym->start), map__rip_2objdump(map, sym->end), - opts->show_asm_raw ? "" : "--no-show-raw", + opts->show_asm_raw ? "" : "--no-show-raw-insn", opts->annotate_src ? "-S" : ""); if (err < 0) { @@ -1926,55 +1982,73 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) pr_debug("Executing: %s\n", command); - err = -1; - if (pipe(stdout_fd) < 0) { - pr_err("Failure creating the pipe to run %s\n", command); - goto out_free_command; - } + objdump_argv[2] = command; + objdump_argv[4] = symfs_filename; - pid = fork(); - if (pid < 0) { - pr_err("Failure forking to run %s\n", command); - goto out_close_stdout; - } - - if (pid == 0) { - close(stdout_fd[0]); - dup2(stdout_fd[1], 1); - close(stdout_fd[1]); - execl("/bin/sh", "sh", "-c", command, "--", symfs_filename, - NULL); - perror(command); - exit(-1); + /* Create a pipe to read from for stdout */ + memset(&objdump_process, 0, sizeof(objdump_process)); + objdump_process.argv = objdump_argv; + objdump_process.out = -1; + if (start_command(&objdump_process)) { + pr_err("Failure starting to run %s\n", command); + err = -1; + goto out_free_command; } - close(stdout_fd[1]); - - file = fdopen(stdout_fd[0], "r"); + file = fdopen(objdump_process.out, "r"); if (!file) { pr_err("Failure creating FILE stream for %s\n", command); /* * If we were using debug info should retry with * original binary. */ - goto out_free_command; + err = -1; + goto out_close_stdout; } + /* Storage for getline. */ + line = NULL; + line_len = 0; + nline = 0; while (!feof(file)) { + const char *match; + char *expanded_line; + + if (getline(&line, &line_len, file) < 0 || !line) + break; + + /* Skip lines containing "filename:" */ + match = strstr(line, symfs_filename); + if (match && match[strlen(symfs_filename)] == ':') + continue; + + expanded_line = strim(line); + expanded_line = expand_tabs(expanded_line, &line, &line_len); + if (!expanded_line) + break; + /* * The source code line number (lineno) needs to be kept in * across calls to symbol__parse_objdump_line(), so that it * can associate it with the instructions till the next one. * See disasm_line__new() and struct disasm_line::line_nr. */ - if (symbol__parse_objdump_line(sym, file, args, &lineno) < 0) + if (symbol__parse_objdump_line(sym, args, expanded_line, + &lineno) < 0) break; nline++; } + free(line); - if (nline == 0) + err = finish_command(&objdump_process); + if (err) + pr_err("Error running %s\n", command); + + if (nline == 0) { + err = -1; pr_err("No output from %s\n", command); + } /* * kallsyms does not have symbol sizes so there may a nop at the end. @@ -1984,23 +2058,21 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) delete_last_nop(sym); fclose(file); - err = 0; + +out_close_stdout: + close(objdump_process.out); + out_free_command: free(command); -out_remove_tmp: - close(stdout_fd[0]); +out_remove_tmp: if (decomp) unlink(symfs_filename); if (delete_extract) kcore_extract__delete(&kce); -out: - return err; -out_close_stdout: - close(stdout_fd[1]); - goto out_free_command; + return err; } static void calc_percent(struct sym_hist *sym_hist, diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index d76fd0e81f46..3528bd4f8f21 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -11,6 +11,7 @@ #include <pthread.h> #include <asm/bug.h> #include "symbol_conf.h" +#include "spark.h" struct hist_browser_timer; struct hist_entry; @@ -235,6 +236,7 @@ struct cyc_hist { u64 cycles_aggr; u64 cycles_max; u64 cycles_min; + s64 cycles_spark[NUM_SPARKS]; u32 num; u32 num_aggr; u8 have_start; diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 3baca06786fb..2a91a10ccfcc 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -2,6 +2,7 @@ #include "cpumap.h" #include "debug.h" #include "env.h" +#include "util/header.h" #include <linux/ctype.h> #include <linux/zalloc.h> #include "bpf-event.h" @@ -256,6 +257,21 @@ int perf_env__read_cpu_topology_map(struct perf_env *env) return 0; } +int perf_env__read_cpuid(struct perf_env *env) +{ + char cpuid[128]; + int err = get_cpuid(cpuid, sizeof(cpuid)); + + if (err) + return err; + + free(env->cpuid); + env->cpuid = strdup(cpuid); + if (env->cpuid == NULL) + return ENOMEM; + return 0; +} + static int perf_env__read_arch(struct perf_env *env) { struct utsname uts; diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index db40906e2937..a3059dc1abe5 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -104,6 +104,7 @@ void perf_env__exit(struct perf_env *env); int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]); +int perf_env__read_cpuid(struct perf_env *env); int perf_env__read_cpu_topology_map(struct perf_env *env); void cpu_cache_level__free(struct cpu_cache_level *cache); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index de79c735e441..fdce590d2278 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -21,6 +21,7 @@ #include "../perf.h" #include "asm/bug.h" #include "bpf-event.h" +#include "util/string2.h" #include <signal.h> #include <unistd.h> #include <sched.h> @@ -42,6 +43,7 @@ #include <perf/evlist.h> #include <perf/evsel.h> #include <perf/cpumap.h> +#include <perf/mmap.h> #include <internal/xyarray.h> @@ -57,7 +59,6 @@ void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus, { perf_evlist__init(&evlist->core); perf_evlist__set_maps(&evlist->core, cpus, threads); - fdarray__init(&evlist->core.pollfd, 64); evlist->workload.pid = -1; evlist->bkw_mmap_state = BKW_MMAP_NOTREADY; } @@ -138,7 +139,7 @@ void evlist__exit(struct evlist *evlist) { zfree(&evlist->mmap); zfree(&evlist->overwrite_mmap); - fdarray__exit(&evlist->core.pollfd); + perf_evlist__exit(&evlist->core); } void evlist__delete(struct evlist *evlist) @@ -148,10 +149,6 @@ void evlist__delete(struct evlist *evlist) evlist__munmap(evlist); evlist__close(evlist); - perf_cpu_map__put(evlist->core.cpus); - perf_thread_map__put(evlist->core.threads); - evlist->core.cpus = NULL; - evlist->core.threads = NULL; evlist__purge(evlist); evlist__exit(evlist); free(evlist); @@ -186,6 +183,30 @@ void perf_evlist__splice_list_tail(struct evlist *evlist, } } +int __evlist__set_tracepoints_handlers(struct evlist *evlist, + const struct evsel_str_handler *assocs, size_t nr_assocs) +{ + struct evsel *evsel; + size_t i; + int err; + + for (i = 0; i < nr_assocs; i++) { + // Adding a handler for an event not in this evlist, just ignore it. + evsel = perf_evlist__find_tracepoint_by_name(evlist, assocs[i].name); + if (evsel == NULL) + continue; + + err = -EEXIST; + if (evsel->handler != NULL) + goto out; + evsel->handler = assocs[i].handler; + } + + err = 0; +out: + return err; +} + void __perf_evlist__set_leader(struct list_head *list) { struct evsel *evsel, *leader; @@ -403,19 +424,9 @@ int evlist__add_pollfd(struct evlist *evlist, int fd) return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN); } -static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, - void *arg __maybe_unused) -{ - struct mmap *map = fda->priv[fd].ptr; - - if (map) - perf_mmap__put(map); -} - int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask) { - return fdarray__filter(&evlist->core.pollfd, revents_and_mask, - perf_evlist__munmap_filtered, NULL); + return perf_evlist__filter_pollfd(&evlist->core, revents_and_mask); } int evlist__poll(struct evlist *evlist, int timeout) @@ -423,22 +434,6 @@ int evlist__poll(struct evlist *evlist, int timeout) return perf_evlist__poll(&evlist->core, timeout); } -static void perf_evlist__set_sid_idx(struct evlist *evlist, - struct evsel *evsel, int idx, int cpu, - int thread) -{ - struct perf_sample_id *sid = SID(evsel, cpu, thread); - sid->idx = idx; - if (evlist->core.cpus && cpu >= 0) - sid->cpu = evlist->core.cpus->map[cpu]; - else - sid->cpu = -1; - if (!evsel->core.system_wide && evlist->core.threads && thread >= 0) - sid->tid = perf_thread_map__pid(evlist->core.threads, thread); - else - sid->tid = -1; -} - struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id) { struct hlist_head *head; @@ -577,11 +572,11 @@ static void evlist__munmap_nofree(struct evlist *evlist) if (evlist->mmap) for (i = 0; i < evlist->core.nr_mmaps; i++) - perf_mmap__munmap(&evlist->mmap[i]); + perf_mmap__munmap(&evlist->mmap[i].core); if (evlist->overwrite_mmap) for (i = 0; i < evlist->core.nr_mmaps; i++) - perf_mmap__munmap(&evlist->overwrite_mmap[i]); + perf_mmap__munmap(&evlist->overwrite_mmap[i].core); } void evlist__munmap(struct evlist *evlist) @@ -591,22 +586,26 @@ void evlist__munmap(struct evlist *evlist) zfree(&evlist->overwrite_mmap); } +static void perf_mmap__unmap_cb(struct perf_mmap *map) +{ + struct mmap *m = container_of(map, struct mmap, core); + + mmap__munmap(m); +} + static struct mmap *evlist__alloc_mmap(struct evlist *evlist, bool overwrite) { int i; struct mmap *map; - evlist->core.nr_mmaps = perf_cpu_map__nr(evlist->core.cpus); - if (perf_cpu_map__empty(evlist->core.cpus)) - evlist->core.nr_mmaps = perf_thread_map__nr(evlist->core.threads); map = zalloc(evlist->core.nr_mmaps * sizeof(struct mmap)); if (!map) return NULL; for (i = 0; i < evlist->core.nr_mmaps; i++) { - map[i].core.fd = -1; - map[i].core.overwrite = overwrite; + struct perf_mmap *prev = i ? &map[i - 1].core : NULL; + /* * When the perf_mmap() call is made we grab one refcount, plus * one extra to let perf_mmap__consume() get the last @@ -616,151 +615,56 @@ static struct mmap *evlist__alloc_mmap(struct evlist *evlist, * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and * thus does perf_mmap__get() on it. */ - refcount_set(&map[i].core.refcnt, 0); + perf_mmap__init(&map[i].core, prev, overwrite, perf_mmap__unmap_cb); } + return map; } -static bool -perf_evlist__should_poll(struct evlist *evlist __maybe_unused, - struct evsel *evsel) +static void +perf_evlist__mmap_cb_idx(struct perf_evlist *_evlist, + struct perf_mmap_param *_mp, + int idx, bool per_cpu) { - if (evsel->core.attr.write_backward) - return false; - return true; + struct evlist *evlist = container_of(_evlist, struct evlist, core); + struct mmap_params *mp = container_of(_mp, struct mmap_params, core); + + auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, idx, per_cpu); } -static int evlist__mmap_per_evsel(struct evlist *evlist, int idx, - struct mmap_params *mp, int cpu_idx, - int thread, int *_output, int *_output_overwrite) +static struct perf_mmap* +perf_evlist__mmap_cb_get(struct perf_evlist *_evlist, bool overwrite, int idx) { - struct evsel *evsel; - int revent; - int evlist_cpu = cpu_map__cpu(evlist->core.cpus, cpu_idx); - - evlist__for_each_entry(evlist, evsel) { - struct mmap *maps = evlist->mmap; - int *output = _output; - int fd; - int cpu; - - mp->prot = PROT_READ | PROT_WRITE; - if (evsel->core.attr.write_backward) { - output = _output_overwrite; - maps = evlist->overwrite_mmap; - - if (!maps) { - maps = evlist__alloc_mmap(evlist, true); - if (!maps) - return -1; - evlist->overwrite_mmap = maps; - if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) - perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); - } - mp->prot &= ~PROT_WRITE; - } - - if (evsel->core.system_wide && thread) - continue; + struct evlist *evlist = container_of(_evlist, struct evlist, core); + struct mmap *maps; - cpu = perf_cpu_map__idx(evsel->core.cpus, evlist_cpu); - if (cpu == -1) - continue; - - fd = FD(evsel, cpu, thread); + maps = overwrite ? evlist->overwrite_mmap : evlist->mmap; - if (*output == -1) { - *output = fd; + if (!maps) { + maps = evlist__alloc_mmap(evlist, overwrite); + if (!maps) + return NULL; - if (perf_mmap__mmap(&maps[idx], mp, *output, evlist_cpu) < 0) - return -1; + if (overwrite) { + evlist->overwrite_mmap = maps; + if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) + perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); } else { - if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) - return -1; - - perf_mmap__get(&maps[idx]); - } - - revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0; - - /* - * The system_wide flag causes a selected event to be opened - * always without a pid. Consequently it will never get a - * POLLHUP, but it is used for tracking in combination with - * other events, so it should not need to be polled anyway. - * Therefore don't add it for polling. - */ - if (!evsel->core.system_wide && - perf_evlist__add_pollfd(&evlist->core, fd, &maps[idx], revent) < 0) { - perf_mmap__put(&maps[idx]); - return -1; - } - - if (evsel->core.attr.read_format & PERF_FORMAT_ID) { - if (perf_evlist__id_add_fd(&evlist->core, &evsel->core, cpu, thread, - fd) < 0) - return -1; - perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, - thread); + evlist->mmap = maps; } } - return 0; + return &maps[idx].core; } -static int evlist__mmap_per_cpu(struct evlist *evlist, - struct mmap_params *mp) +static int +perf_evlist__mmap_cb_mmap(struct perf_mmap *_map, struct perf_mmap_param *_mp, + int output, int cpu) { - int cpu, thread; - int nr_cpus = perf_cpu_map__nr(evlist->core.cpus); - int nr_threads = perf_thread_map__nr(evlist->core.threads); - - pr_debug2("perf event ring buffer mmapped per cpu\n"); - for (cpu = 0; cpu < nr_cpus; cpu++) { - int output = -1; - int output_overwrite = -1; - - auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu, - true); + struct mmap *map = container_of(_map, struct mmap, core); + struct mmap_params *mp = container_of(_mp, struct mmap_params, core); - for (thread = 0; thread < nr_threads; thread++) { - if (evlist__mmap_per_evsel(evlist, cpu, mp, cpu, - thread, &output, &output_overwrite)) - goto out_unmap; - } - } - - return 0; - -out_unmap: - evlist__munmap_nofree(evlist); - return -1; -} - -static int evlist__mmap_per_thread(struct evlist *evlist, - struct mmap_params *mp) -{ - int thread; - int nr_threads = perf_thread_map__nr(evlist->core.threads); - - pr_debug2("perf event ring buffer mmapped per thread\n"); - for (thread = 0; thread < nr_threads; thread++) { - int output = -1; - int output_overwrite = -1; - - auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, - false); - - if (evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, - &output, &output_overwrite)) - goto out_unmap; - } - - return 0; - -out_unmap: - evlist__munmap_nofree(evlist); - return -1; + return mmap__mmap(map, mp, output, cpu); } unsigned long perf_event_mlock_kb_in_pages(void) @@ -890,43 +794,30 @@ int evlist__mmap_ex(struct evlist *evlist, unsigned int pages, bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush, int comp_level) { - struct evsel *evsel; - const struct perf_cpu_map *cpus = evlist->core.cpus; - const struct perf_thread_map *threads = evlist->core.threads; /* * Delay setting mp.prot: set it before calling perf_mmap__mmap. * Its value is decided by evsel's write_backward. * So &mp should not be passed through const pointer. */ - struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush, - .comp_level = comp_level }; - - if (!evlist->mmap) - evlist->mmap = evlist__alloc_mmap(evlist, false); - if (!evlist->mmap) - return -ENOMEM; - - if (evlist->core.pollfd.entries == NULL && perf_evlist__alloc_pollfd(&evlist->core) < 0) - return -ENOMEM; + struct mmap_params mp = { + .nr_cblocks = nr_cblocks, + .affinity = affinity, + .flush = flush, + .comp_level = comp_level + }; + struct perf_evlist_mmap_ops ops = { + .idx = perf_evlist__mmap_cb_idx, + .get = perf_evlist__mmap_cb_get, + .mmap = perf_evlist__mmap_cb_mmap, + }; evlist->core.mmap_len = evlist__mmap_size(pages); pr_debug("mmap size %zuB\n", evlist->core.mmap_len); - mp.mask = evlist->core.mmap_len - page_size - 1; auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->core.mmap_len, auxtrace_pages, auxtrace_overwrite); - evlist__for_each_entry(evlist, evsel) { - if ((evsel->core.attr.read_format & PERF_FORMAT_ID) && - evsel->core.sample_id == NULL && - perf_evsel__alloc_id(&evsel->core, perf_cpu_map__nr(cpus), threads->nr) < 0) - return -ENOMEM; - } - - if (perf_cpu_map__empty(cpus)) - return evlist__mmap_per_thread(evlist, &mp); - - return evlist__mmap_per_cpu(evlist, &mp); + return perf_evlist__mmap_ops(&evlist->core, &ops, &mp.core); } int evlist__mmap(struct evlist *evlist, unsigned int pages) @@ -1029,6 +920,9 @@ int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter) struct evsel *evsel; int err = 0; + if (filter == NULL) + return -1; + evlist__for_each_entry(evlist, evsel) { if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) continue; @@ -1041,16 +935,35 @@ int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter) return err; } -int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) +int perf_evlist__append_tp_filter(struct evlist *evlist, const char *filter) +{ + struct evsel *evsel; + int err = 0; + + if (filter == NULL) + return -1; + + evlist__for_each_entry(evlist, evsel) { + if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) + continue; + + err = perf_evsel__append_tp_filter(evsel, filter); + if (err) + break; + } + + return err; +} + +char *asprintf__tp_filter_pids(size_t npids, pid_t *pids) { char *filter; - int ret = -1; size_t i; for (i = 0; i < npids; ++i) { if (i == 0) { if (asprintf(&filter, "common_pid != %d", pids[i]) < 0) - return -1; + return NULL; } else { char *tmp; @@ -1062,9 +975,18 @@ int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t * } } - ret = perf_evlist__set_tp_filter(evlist, filter); + return filter; out_free: free(filter); + return NULL; +} + +int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) +{ + char *filter = asprintf__tp_filter_pids(npids, pids); + int ret = perf_evlist__set_tp_filter(evlist, filter); + + free(filter); return ret; } @@ -1073,6 +995,20 @@ int perf_evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid) return perf_evlist__set_tp_filter_pids(evlist, 1, &pid); } +int perf_evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) +{ + char *filter = asprintf__tp_filter_pids(npids, pids); + int ret = perf_evlist__append_tp_filter(evlist, filter); + + free(filter); + return ret; +} + +int perf_evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid) +{ + return perf_evlist__append_tp_filter_pids(evlist, 1, &pid); +} + bool perf_evlist__valid_sample_type(struct evlist *evlist) { struct evsel *pos; @@ -1729,9 +1665,9 @@ static void *perf_evlist__poll_thread(void *arg) struct mmap *map = &evlist->mmap[i]; union perf_event *event; - if (perf_mmap__read_init(map)) + if (perf_mmap__read_init(&map->core)) continue; - while ((event = perf_mmap__read_event(map)) != NULL) { + while ((event = perf_mmap__read_event(&map->core)) != NULL) { struct evsel *evsel = perf_evlist__event2evsel(evlist, event); if (evsel && evsel->side_band.cb) @@ -1739,10 +1675,10 @@ static void *perf_evlist__poll_thread(void *arg) else pr_warning("cannot locate proper evsel for the side band event\n"); - perf_mmap__consume(map); + perf_mmap__consume(&map->core); got_data = true; } - perf_mmap__read_done(map); + perf_mmap__read_done(&map->core); } if (draining && !got_data) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 7cfe75522ba5..13051409fd22 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -118,6 +118,13 @@ void perf_evlist__stop_sb_thread(struct evlist *evlist); int perf_evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler); +int __evlist__set_tracepoints_handlers(struct evlist *evlist, + const struct evsel_str_handler *assocs, + size_t nr_assocs); + +#define evlist__set_tracepoints_handlers(evlist, array) \ + __evlist__set_tracepoints_handlers(evlist, array, ARRAY_SIZE(array)) + void __perf_evlist__set_sample_bit(struct evlist *evlist, enum perf_event_sample_format bit); void __perf_evlist__reset_sample_bit(struct evlist *evlist, @@ -133,6 +140,11 @@ int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter); int perf_evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid); int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids); +int perf_evlist__append_tp_filter(struct evlist *evlist, const char *filter); + +int perf_evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid); +int perf_evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids); + struct evsel * perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id); diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index a35dc57d5995..063d1b93c53d 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -13,6 +13,7 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> // sysconf() +#include <perf/mmap.h> #ifdef HAVE_LIBNUMA_SUPPORT #include <numaif.h> #endif @@ -23,116 +24,9 @@ #include "../perf.h" #include <internal/lib.h> /* page_size */ -size_t perf_mmap__mmap_len(struct mmap *map) +size_t mmap__mmap_len(struct mmap *map) { - return map->core.mask + 1 + page_size; -} - -/* When check_messup is true, 'end' must points to a good entry */ -static union perf_event *perf_mmap__read(struct mmap *map, - u64 *startp, u64 end) -{ - unsigned char *data = map->core.base + page_size; - union perf_event *event = NULL; - int diff = end - *startp; - - if (diff >= (int)sizeof(event->header)) { - size_t size; - - event = (union perf_event *)&data[*startp & map->core.mask]; - size = event->header.size; - - if (size < sizeof(event->header) || diff < (int)size) - return NULL; - - /* - * Event straddles the mmap boundary -- header should always - * be inside due to u64 alignment of output. - */ - if ((*startp & map->core.mask) + size != ((*startp + size) & map->core.mask)) { - unsigned int offset = *startp; - unsigned int len = min(sizeof(*event), size), cpy; - void *dst = map->core.event_copy; - - do { - cpy = min(map->core.mask + 1 - (offset & map->core.mask), len); - memcpy(dst, &data[offset & map->core.mask], cpy); - offset += cpy; - dst += cpy; - len -= cpy; - } while (len); - - event = (union perf_event *)map->core.event_copy; - } - - *startp += size; - } - - return event; -} - -/* - * Read event from ring buffer one by one. - * Return one event for each call. - * - * Usage: - * perf_mmap__read_init() - * while(event = perf_mmap__read_event()) { - * //process the event - * perf_mmap__consume() - * } - * perf_mmap__read_done() - */ -union perf_event *perf_mmap__read_event(struct mmap *map) -{ - union perf_event *event; - - /* - * Check if event was unmapped due to a POLLHUP/POLLERR. - */ - if (!refcount_read(&map->core.refcnt)) - return NULL; - - /* non-overwirte doesn't pause the ringbuffer */ - if (!map->core.overwrite) - map->core.end = perf_mmap__read_head(map); - - event = perf_mmap__read(map, &map->core.start, map->core.end); - - if (!map->core.overwrite) - map->core.prev = map->core.start; - - return event; -} - -static bool perf_mmap__empty(struct mmap *map) -{ - return perf_mmap__read_head(map) == map->core.prev && !map->auxtrace_mmap.base; -} - -void perf_mmap__get(struct mmap *map) -{ - refcount_inc(&map->core.refcnt); -} - -void perf_mmap__put(struct mmap *map) -{ - BUG_ON(map->core.base && refcount_read(&map->core.refcnt) == 0); - - if (refcount_dec_and_test(&map->core.refcnt)) - perf_mmap__munmap(map); -} - -void perf_mmap__consume(struct mmap *map) -{ - if (!map->core.overwrite) { - u64 old = map->core.prev; - - perf_mmap__write_tail(map, old); - } - - if (refcount_read(&map->core.refcnt) == 1 && perf_mmap__empty(map)) - perf_mmap__put(map); + return perf_mmap__mmap_len(&map->core); } int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused, @@ -170,7 +64,7 @@ static int perf_mmap__aio_enabled(struct mmap *map) #ifdef HAVE_LIBNUMA_SUPPORT static int perf_mmap__aio_alloc(struct mmap *map, int idx) { - map->aio.data[idx] = mmap(NULL, perf_mmap__mmap_len(map), PROT_READ|PROT_WRITE, + map->aio.data[idx] = mmap(NULL, mmap__mmap_len(map), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); if (map->aio.data[idx] == MAP_FAILED) { map->aio.data[idx] = NULL; @@ -183,7 +77,7 @@ static int perf_mmap__aio_alloc(struct mmap *map, int idx) static void perf_mmap__aio_free(struct mmap *map, int idx) { if (map->aio.data[idx]) { - munmap(map->aio.data[idx], perf_mmap__mmap_len(map)); + munmap(map->aio.data[idx], mmap__mmap_len(map)); map->aio.data[idx] = NULL; } } @@ -196,7 +90,7 @@ static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity) if (affinity != PERF_AFFINITY_SYS && cpu__max_node() > 1) { data = map->aio.data[idx]; - mmap_len = perf_mmap__mmap_len(map); + mmap_len = mmap__mmap_len(map); node_mask = 1UL << cpu__get_node(cpu); if (mbind(data, mmap_len, MPOL_BIND, &node_mask, 1, 0)) { pr_err("Failed to bind [%p-%p] AIO buffer to node %d: error %m\n", @@ -210,7 +104,7 @@ static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity) #else /* !HAVE_LIBNUMA_SUPPORT */ static int perf_mmap__aio_alloc(struct mmap *map, int idx) { - map->aio.data[idx] = malloc(perf_mmap__mmap_len(map)); + map->aio.data[idx] = malloc(mmap__mmap_len(map)); if (map->aio.data[idx] == NULL) return -1; @@ -311,19 +205,13 @@ static void perf_mmap__aio_munmap(struct mmap *map __maybe_unused) } #endif -void perf_mmap__munmap(struct mmap *map) +void mmap__munmap(struct mmap *map) { perf_mmap__aio_munmap(map); if (map->data != NULL) { - munmap(map->data, perf_mmap__mmap_len(map)); + munmap(map->data, mmap__mmap_len(map)); map->data = NULL; } - if (map->core.base != NULL) { - munmap(map->core.base, perf_mmap__mmap_len(map)); - map->core.base = NULL; - map->core.fd = -1; - refcount_set(&map->core.refcnt, 0); - } auxtrace_mmap__munmap(&map->auxtrace_mmap); } @@ -353,34 +241,13 @@ static void perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params CPU_SET(map->core.cpu, &map->affinity_mask); } -int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) +int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) { - /* - * The last one will be done at perf_mmap__consume(), so that we - * make sure we don't prevent tools from consuming every last event in - * the ring buffer. - * - * I.e. we can get the POLLHUP meaning that the fd doesn't exist - * anymore, but the last events for it are still in the ring buffer, - * waiting to be consumed. - * - * Tools can chose to ignore this at their own discretion, but the - * evlist layer can't just drop it when filtering events in - * perf_evlist__filter_pollfd(). - */ - refcount_set(&map->core.refcnt, 2); - map->core.prev = 0; - map->core.mask = mp->mask; - map->core.base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot, - MAP_SHARED, fd, 0); - if (map->core.base == MAP_FAILED) { + if (perf_mmap__mmap(&map->core, &mp->core, fd, cpu)) { pr_debug2("failed to mmap perf event ring buffer, error %d\n", errno); - map->core.base = NULL; return -1; } - map->core.fd = fd; - map->core.cpu = cpu; perf_mmap__setup_affinity_mask(map, mp); @@ -389,7 +256,7 @@ int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) map->comp_level = mp->comp_level; if (map->comp_level && !perf_mmap__aio_enabled(map)) { - map->data = mmap(NULL, perf_mmap__mmap_len(map), PROT_READ|PROT_WRITE, + map->data = mmap(NULL, mmap__mmap_len(map), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); if (map->data == MAP_FAILED) { pr_debug2("failed to mmap data buffer, error %d\n", @@ -406,96 +273,16 @@ int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) return perf_mmap__aio_mmap(map, mp); } -static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end) -{ - struct perf_event_header *pheader; - u64 evt_head = *start; - int size = mask + 1; - - pr_debug2("%s: buf=%p, start=%"PRIx64"\n", __func__, buf, *start); - pheader = (struct perf_event_header *)(buf + (*start & mask)); - while (true) { - if (evt_head - *start >= (unsigned int)size) { - pr_debug("Finished reading overwrite ring buffer: rewind\n"); - if (evt_head - *start > (unsigned int)size) - evt_head -= pheader->size; - *end = evt_head; - return 0; - } - - pheader = (struct perf_event_header *)(buf + (evt_head & mask)); - - if (pheader->size == 0) { - pr_debug("Finished reading overwrite ring buffer: get start\n"); - *end = evt_head; - return 0; - } - - evt_head += pheader->size; - pr_debug3("move evt_head: %"PRIx64"\n", evt_head); - } - WARN_ONCE(1, "Shouldn't get here\n"); - return -1; -} - -/* - * Report the start and end of the available data in ringbuffer - */ -static int __perf_mmap__read_init(struct mmap *md) -{ - u64 head = perf_mmap__read_head(md); - u64 old = md->core.prev; - unsigned char *data = md->core.base + page_size; - unsigned long size; - - md->core.start = md->core.overwrite ? head : old; - md->core.end = md->core.overwrite ? old : head; - - if ((md->core.end - md->core.start) < md->core.flush) - return -EAGAIN; - - size = md->core.end - md->core.start; - if (size > (unsigned long)(md->core.mask) + 1) { - if (!md->core.overwrite) { - WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); - - md->core.prev = head; - perf_mmap__consume(md); - return -EAGAIN; - } - - /* - * Backward ring buffer is full. We still have a chance to read - * most of data from it. - */ - if (overwrite_rb_find_range(data, md->core.mask, &md->core.start, &md->core.end)) - return -EINVAL; - } - - return 0; -} - -int perf_mmap__read_init(struct mmap *map) -{ - /* - * Check if event was unmapped due to a POLLHUP/POLLERR. - */ - if (!refcount_read(&map->core.refcnt)) - return -ENOENT; - - return __perf_mmap__read_init(map); -} - int perf_mmap__push(struct mmap *md, void *to, int push(struct mmap *map, void *to, void *buf, size_t size)) { - u64 head = perf_mmap__read_head(md); + u64 head = perf_mmap__read_head(&md->core); unsigned char *data = md->core.base + page_size; unsigned long size; void *buf; int rc = 0; - rc = perf_mmap__read_init(md); + rc = perf_mmap__read_init(&md->core); if (rc < 0) return (rc == -EAGAIN) ? 1 : -1; @@ -522,24 +309,7 @@ int perf_mmap__push(struct mmap *md, void *to, } md->core.prev = head; - perf_mmap__consume(md); + perf_mmap__consume(&md->core); out: return rc; } - -/* - * Mandatory for overwrite mode - * The direction of overwrite mode is backward. - * The last perf_mmap__read() will set tail to map->core.prev. - * Need to correct the map->core.prev to head which is the end of next read. - */ -void perf_mmap__read_done(struct mmap *map) -{ - /* - * Check if event was unmapped due to a POLLHUP/POLLERR. - */ - if (!refcount_read(&map->core.refcnt)) - return; - - map->core.prev = perf_mmap__read_head(map); -} diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index e567c1c875bd..bee4e83f7109 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -37,37 +37,19 @@ struct mmap { }; struct mmap_params { - int prot, mask, nr_cblocks, affinity, flush, comp_level; + struct perf_mmap_param core; + int nr_cblocks, affinity, flush, comp_level; struct auxtrace_mmap_params auxtrace_mp; }; -int perf_mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu); -void perf_mmap__munmap(struct mmap *map); - -void perf_mmap__get(struct mmap *map); -void perf_mmap__put(struct mmap *map); - -void perf_mmap__consume(struct mmap *map); - -static inline u64 perf_mmap__read_head(struct mmap *mm) -{ - return ring_buffer_read_head(mm->core.base); -} - -static inline void perf_mmap__write_tail(struct mmap *md, u64 tail) -{ - ring_buffer_write_tail(md->core.base, tail); -} +int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu); +void mmap__munmap(struct mmap *map); union perf_event *perf_mmap__read_forward(struct mmap *map); -union perf_event *perf_mmap__read_event(struct mmap *map); - int perf_mmap__push(struct mmap *md, void *to, int push(struct mmap *map, void *to, void *buf, size_t size)); -size_t perf_mmap__mmap_len(struct mmap *map); +size_t mmap__mmap_len(struct mmap *map); -int perf_mmap__read_init(struct mmap *md); -void perf_mmap__read_done(struct mmap *map); #endif /*__PERF_MMAP_H */ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index b5e2adef49de..db882f630f7e 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2600,7 +2600,7 @@ out_enomem: * Print the help text for the event symbols: */ void print_events(const char *event_glob, bool name_only, bool quiet_flag, - bool long_desc, bool details_flag) + bool long_desc, bool details_flag, bool deprecated) { print_symbol_events(event_glob, PERF_TYPE_HARDWARE, event_symbols_hw, PERF_COUNT_HW_MAX, name_only); @@ -2612,7 +2612,7 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag, print_hwcache_events(event_glob, name_only); print_pmu_events(event_glob, name_only, quiet_flag, long_desc, - details_flag); + details_flag, deprecated); if (event_glob != NULL) return; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 616ca1eda0eb..769e07cddaa2 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -195,7 +195,7 @@ void parse_events_evlist_error(struct parse_events_state *parse_state, int idx, const char *str); void print_events(const char *event_glob, bool name_only, bool quiet, - bool long_desc, bool details_flag); + bool long_desc, bool details_flag, bool deprecated); struct event_symbol { const char *symbol; diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c index ef46c2848808..e687497b3aac 100644 --- a/tools/perf/util/parse-regs-options.c +++ b/tools/perf/util/parse-regs-options.c @@ -13,7 +13,7 @@ static int __parse_regs(const struct option *opt, const char *str, int unset, bool intr) { uint64_t *mode = (uint64_t *)opt->value; - const struct sample_reg *r; + const struct sample_reg *r = NULL; char *s, *os = NULL, *p; int ret = -1; uint64_t mask; @@ -46,19 +46,23 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr) if (!strcmp(s, "?")) { fprintf(stderr, "available registers: "); +#ifdef HAVE_PERF_REGS_SUPPORT for (r = sample_reg_masks; r->name; r++) { if (r->mask & mask) fprintf(stderr, "%s ", r->name); } +#endif fputc('\n', stderr); /* just printing available regs */ return -1; } +#ifdef HAVE_PERF_REGS_SUPPORT for (r = sample_reg_masks; r->name; r++) { if ((r->mask & mask) && !strcasecmp(s, r->name)) break; } - if (!r->name) { +#endif + if (!r || !r->name) { ui__warning("Unknown register \"%s\", check man page or run \"perf record %s?\"\n", s, intr ? "-I" : "--user-regs="); goto error; diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index 2774cec1f15f..5ee47ae1509c 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -3,10 +3,6 @@ #include "perf_regs.h" #include "event.h" -const struct sample_reg __weak sample_reg_masks[] = { - SMPL_REG_END -}; - int __weak arch_sdt_arg_parse_op(char *old_op __maybe_unused, char **new_op __maybe_unused) { diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index 47fe34e5f7d5..e014c2c038f4 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -15,8 +15,6 @@ struct sample_reg { #define SMPL_REG2(n, b) { .name = #n, .mask = 3ULL << (b) } #define SMPL_REG_END { .name = NULL } -extern const struct sample_reg sample_reg_masks[]; - enum { SDT_ARG_VALID = 0, SDT_ARG_SKIP, @@ -27,6 +25,8 @@ uint64_t arch__intr_reg_mask(void); uint64_t arch__user_reg_mask(void); #ifdef HAVE_PERF_REGS_SUPPORT +extern const struct sample_reg sample_reg_masks[]; + #include <perf_regs.h> #define DWARF_MINIMAL_REGS ((1ULL << PERF_REG_IP) | (1ULL << PERF_REG_SP)) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 5608da82ad23..adbe97e941dd 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -308,7 +308,8 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, char *long_desc, char *topic, char *unit, char *perpkg, char *metric_expr, - char *metric_name) + char *metric_name, + char *deprecated) { struct parse_events_term *term; struct perf_pmu_alias *alias; @@ -325,6 +326,7 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, alias->unit[0] = '\0'; alias->per_pkg = false; alias->snapshot = false; + alias->deprecated = false; ret = parse_events_terms(&alias->terms, val); if (ret) { @@ -379,6 +381,9 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, alias->per_pkg = perpkg && sscanf(perpkg, "%d", &num) == 1 && num == 1; alias->str = strdup(newval); + if (deprecated) + alias->deprecated = true; + if (!perf_pmu_merge_alias(alias, list)) list_add_tail(&alias->list, list); @@ -400,7 +405,7 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI strim(buf); return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL, NULL, NULL, - NULL, NULL, NULL); + NULL, NULL, NULL, NULL); } static inline bool pmu_alias_info_file(char *name) @@ -787,7 +792,8 @@ new_alias: (char *)pe->long_desc, (char *)pe->topic, (char *)pe->unit, (char *)pe->perpkg, (char *)pe->metric_expr, - (char *)pe->metric_name); + (char *)pe->metric_name, + (char *)pe->deprecated); } } @@ -1383,7 +1389,7 @@ static void wordwrap(char *s, int start, int max, int corr) } void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, - bool long_desc, bool details_flag) + bool long_desc, bool details_flag, bool deprecated) { struct perf_pmu *pmu; struct perf_pmu_alias *alias; @@ -1414,6 +1420,9 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, format_alias(buf, sizeof(buf), pmu, alias); bool is_cpu = !strcmp(pmu->name, "cpu"); + if (alias->deprecated && !deprecated) + continue; + if (event_glob != NULL && !(strglobmatch_nocase(name, event_glob) || (!is_cpu && strglobmatch_nocase(alias->name, diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index f36ade6df76d..3e8cd31a89cc 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -57,6 +57,7 @@ struct perf_pmu_alias { double scale; bool per_pkg; bool snapshot; + bool deprecated; char *metric_expr; char *metric_name; }; @@ -85,7 +86,8 @@ int perf_pmu__format_parse(char *dir, struct list_head *head); struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu); void print_pmu_events(const char *event_glob, bool name_only, bool quiet, - bool long_desc, bool details_flag); + bool long_desc, bool details_flag, + bool deprecated); bool pmu_have_event(const char *pname, const char *name); int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...) __scanf(3, 4); diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 02460362256d..25118605f3f8 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -6,6 +6,7 @@ #include <linux/err.h> #include <perf/cpumap.h> #include <traceevent/event-parse.h> +#include <perf/mmap.h> #include "evlist.h" #include "callchain.h" #include "evsel.h" @@ -1022,10 +1023,10 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, if (!md) return NULL; - if (perf_mmap__read_init(md) < 0) + if (perf_mmap__read_init(&md->core) < 0) goto end; - event = perf_mmap__read_event(md); + event = perf_mmap__read_event(&md->core); if (event != NULL) { PyObject *pyevent = pyrf_event__new(event); struct pyrf_event *pevent = (struct pyrf_event *)pyevent; @@ -1045,7 +1046,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, err = perf_evsel__parse_sample(evsel, event, &pevent->sample); /* Consume the even only after we parsed it out. */ - perf_mmap__consume(md); + perf_mmap__consume(&md->core); if (err) return PyErr_Format(PyExc_OSError, diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 15961854ba67..741f040648b5 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -539,10 +539,11 @@ static int perl_stop_script(void) static int perl_generate_script(struct tep_handle *pevent, const char *outfile) { + int i, not_first, count, nr_events; + struct tep_event **all_events; struct tep_event *event = NULL; struct tep_format_field *f; char fname[PATH_MAX]; - int not_first, count; FILE *ofp; sprintf(fname, "%s.pl", outfile); @@ -603,8 +604,11 @@ sub print_backtrace\n\ }\n\n\ "); + nr_events = tep_get_events_count(pevent); + all_events = tep_list_events(pevent, TEP_EVENT_SORT_ID); - while ((event = trace_find_next_event(pevent, event))) { + for (i = 0; all_events && i < nr_events; i++) { + event = all_events[i]; fprintf(ofp, "sub %s::%s\n{\n", event->system, event->name); fprintf(ofp, "\tmy ("); diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 5d341efc3237..93c03b39cd9c 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1687,10 +1687,11 @@ static int python_stop_script(void) static int python_generate_script(struct tep_handle *pevent, const char *outfile) { + int i, not_first, count, nr_events; + struct tep_event **all_events; struct tep_event *event = NULL; struct tep_format_field *f; char fname[PATH_MAX]; - int not_first, count; FILE *ofp; sprintf(fname, "%s.py", outfile); @@ -1735,7 +1736,11 @@ static int python_generate_script(struct tep_handle *pevent, const char *outfile fprintf(ofp, "def trace_end():\n"); fprintf(ofp, "\tprint(\"in trace_end\")\n\n"); - while ((event = trace_find_next_event(pevent, event))) { + nr_events = tep_get_events_count(pevent); + all_events = tep_list_events(pevent, TEP_EVENT_SORT_ID); + + for (i = 0; all_events && i < nr_events; i++) { + event = all_events[i]; fprintf(ofp, "def %s__%s(", event->system, event->name); fprintf(ofp, "event_name, "); fprintf(ofp, "context, "); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 061bb4d6a3f5..6cc32f5ec043 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2355,35 +2355,6 @@ void perf_session__fprintf_info(struct perf_session *session, FILE *fp, fprintf(fp, "# ========\n#\n"); } - -int __perf_session__set_tracepoints_handlers(struct perf_session *session, - const struct evsel_str_handler *assocs, - size_t nr_assocs) -{ - struct evsel *evsel; - size_t i; - int err; - - for (i = 0; i < nr_assocs; i++) { - /* - * Adding a handler for an event not in the session, - * just ignore it. - */ - evsel = perf_evlist__find_tracepoint_by_name(session->evlist, assocs[i].name); - if (evsel == NULL) - continue; - - err = -EEXIST; - if (evsel->handler != NULL) - goto out; - evsel->handler = assocs[i].handler; - } - - err = 0; -out: - return err; -} - int perf_event__process_id_index(struct perf_session *session, union perf_event *event) { diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index b4c9428c18f0..8456e1d868fd 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -120,12 +120,8 @@ void perf_session__fprintf_info(struct perf_session *s, FILE *fp, bool full); struct evsel_str_handler; -int __perf_session__set_tracepoints_handlers(struct perf_session *session, - const struct evsel_str_handler *assocs, - size_t nr_assocs); - #define perf_session__set_tracepoints_handlers(session, array) \ - __perf_session__set_tracepoints_handlers(session, array, ARRAY_SIZE(array)) + __evlist__set_tracepoints_handlers(session->evlist, array, ARRAY_SIZE(array)) extern volatile int session_done; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 7b93f34ac1f4..5aff9542d9b7 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -10,6 +10,8 @@ #include "callchain.h" #include "values.h" #include "hist.h" +#include "stat.h" +#include "spark.h" struct option; struct thread; @@ -71,6 +73,8 @@ struct hist_entry_diff { /* PERF_HPP_DIFF__CYCLES */ s64 cycles; }; + struct stats stats; + unsigned long svals[NUM_SPARKS]; }; struct hist_entry_ops { diff --git a/tools/perf/util/spark.c b/tools/perf/util/spark.c new file mode 100644 index 000000000000..70272a8b81a6 --- /dev/null +++ b/tools/perf/util/spark.c @@ -0,0 +1,34 @@ +#include <stdio.h> +#include <limits.h> +#include <string.h> +#include <stdlib.h> +#include "spark.h" +#include "stat.h" + +#define SPARK_SHIFT 8 + +/* Print spark lines on outf for numval values in val. */ +int print_spark(char *bf, int size, unsigned long *val, int numval) +{ + static const char *ticks[NUM_SPARKS] = { + "▁", "▂", "▃", "▄", "▅", "▆", "▇", "█" + }; + int i, printed = 0; + unsigned long min = ULONG_MAX, max = 0, f; + + for (i = 0; i < numval; i++) { + if (val[i] < min) + min = val[i]; + if (val[i] > max) + max = val[i]; + } + f = ((max - min) << SPARK_SHIFT) / (NUM_SPARKS - 1); + if (f < 1) + f = 1; + for (i = 0; i < numval; i++) { + printed += scnprintf(bf + printed, size - printed, "%s", + ticks[((val[i] - min) << SPARK_SHIFT) / f]); + } + + return printed; +} diff --git a/tools/perf/util/spark.h b/tools/perf/util/spark.h new file mode 100644 index 000000000000..25402d7d7a64 --- /dev/null +++ b/tools/perf/util/spark.h @@ -0,0 +1,8 @@ +#ifndef SPARK_H +#define SPARK_H 1 + +#define NUM_SPARKS 8 + +int print_spark(char *bf, int size, unsigned long *val, int numval); + +#endif diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index ebdd130557fb..6822e4ffe224 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -490,6 +490,16 @@ int create_perf_stat_counter(struct evsel *evsel, if (config->identifier) attr->sample_type = PERF_SAMPLE_IDENTIFIER; + if (config->all_user) { + attr->exclude_kernel = 1; + attr->exclude_user = 0; + } + + if (config->all_kernel) { + attr->exclude_kernel = 0; + attr->exclude_user = 1; + } + /* * Disabling all counters initially, they will be enabled * either manually by us or by kernel via enable_on_exec diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index edbeb2f63e8d..081c4a5113c6 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -106,6 +106,8 @@ struct perf_stat_config { bool big_num; bool no_merge; bool walltime_run_table; + bool all_kernel; + bool all_user; FILE *output; unsigned int interval; unsigned int timeout; diff --git a/tools/perf/util/string2.h b/tools/perf/util/string2.h index 708805f5573e..73df616ced43 100644 --- a/tools/perf/util/string2.h +++ b/tools/perf/util/string2.h @@ -4,6 +4,7 @@ #include <linux/string.h> #include <linux/types.h> +#include <sys/types.h> // pid_t #include <stddef.h> #include <string.h> @@ -32,6 +33,8 @@ static inline char *asprintf_expr_not_in_ints(const char *var, size_t nints, int return asprintf_expr_inout_ints(var, false, nints, ints); } +char *asprintf__tp_filter_pids(size_t npids, pid_t *pids); + char *strpbrk_esc(char *str, const char *stopset); char *strdup_esc(const char *str); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 0b0c6b5b1899..cc2a89b99d3d 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -11,6 +11,7 @@ #include <stdio.h> #include "path.h" #include "symbol_conf.h" +#include "spark.h" #ifdef HAVE_LIBELF_SUPPORT #include <libelf.h> @@ -111,6 +112,7 @@ struct block_info { u64 end; u64 cycles; u64 cycles_aggr; + s64 cycles_spark[NUM_SPARKS]; int num; int num_aggr; refcount_t refcnt; diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 9796a2e43f67..302443921681 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -458,10 +458,11 @@ bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf, return true; } -int perf_time__parse_for_ranges(const char *time_str, +int perf_time__parse_for_ranges_reltime(const char *time_str, struct perf_session *session, struct perf_time_interval **ranges, - int *range_size, int *range_num) + int *range_size, int *range_num, + bool reltime) { bool has_percent = strchr(time_str, '%'); struct perf_time_interval *ptime_range; @@ -471,7 +472,7 @@ int perf_time__parse_for_ranges(const char *time_str, if (!ptime_range) return -ENOMEM; - if (has_percent) { + if (has_percent || reltime) { if (session->evlist->first_sample_time == 0 && session->evlist->last_sample_time == 0) { pr_err("HINT: no first/last sample time found in perf data.\n" @@ -479,7 +480,9 @@ int perf_time__parse_for_ranges(const char *time_str, "(if '--buildid-all' is enabled, please set '--timestamp-boundary').\n"); goto error; } + } + if (has_percent) { num = perf_time__percent_parse_str( ptime_range, size, time_str, @@ -492,6 +495,15 @@ int perf_time__parse_for_ranges(const char *time_str, if (num < 0) goto error_invalid; + if (reltime) { + int i; + + for (i = 0; i < num; i++) { + ptime_range[i].start += session->evlist->first_sample_time; + ptime_range[i].end += session->evlist->first_sample_time; + } + } + *range_size = size; *range_num = num; *ranges = ptime_range; @@ -504,6 +516,15 @@ error: return ret; } +int perf_time__parse_for_ranges(const char *time_str, + struct perf_session *session, + struct perf_time_interval **ranges, + int *range_size, int *range_num) +{ + return perf_time__parse_for_ranges_reltime(time_str, session, ranges, + range_size, range_num, false); +} + int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz) { u64 sec = timestamp / NSEC_PER_SEC; diff --git a/tools/perf/util/time-utils.h b/tools/perf/util/time-utils.h index 4f42988eb2f7..1142b0bddd5e 100644 --- a/tools/perf/util/time-utils.h +++ b/tools/perf/util/time-utils.h @@ -26,6 +26,11 @@ bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf, struct perf_session; +int perf_time__parse_for_ranges_reltime(const char *str, struct perf_session *session, + struct perf_time_interval **ranges, + int *range_size, int *range_num, + bool reltime); + int perf_time__parse_for_ranges(const char *str, struct perf_session *session, struct perf_time_interval **ranges, int *range_size, int *range_num); diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 5d6bfc70b210..9634f0ae57be 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -173,37 +173,6 @@ int parse_event_file(struct tep_handle *pevent, return tep_parse_event(pevent, buf, size, sys); } -struct tep_event *trace_find_next_event(struct tep_handle *pevent, - struct tep_event *event) -{ - static int idx; - int events_count; - struct tep_event *all_events; - - all_events = tep_get_first_event(pevent); - events_count = tep_get_events_count(pevent); - if (!pevent || !all_events || events_count < 1) - return NULL; - - if (!event) { - idx = 0; - return all_events; - } - - if (idx < events_count && event == (all_events + idx)) { - idx++; - if (idx == events_count) - return NULL; - return (all_events + idx); - } - - for (idx = 1; idx < events_count; idx++) { - if (event == (all_events + (idx - 1))) - return (all_events + idx); - } - return NULL; -} - struct flag { const char *name; unsigned long long value; diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 2e158387b3d7..72fdf2a3577c 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -47,8 +47,6 @@ void parse_saved_cmdline(struct tep_handle *pevent, char *file, unsigned int siz ssize_t trace_report(int fd, struct trace_event *tevent, bool repipe); -struct tep_event *trace_find_next_event(struct tep_handle *pevent, - struct tep_event *event); unsigned long long read_size(struct tep_event *event, void *ptr, int size); unsigned long long eval_flag(const char *flag); |