summaryrefslogtreecommitdiff
path: root/tools/perf/util/machine.h
diff options
context:
space:
mode:
authorKan Liang <kan.liang@intel.com>2017-09-29 07:47:54 -0700
committerArnaldo Carvalho de Melo <acme@redhat.com>2017-10-03 09:27:46 -0300
commit340b47f510bbe55a76b7309107276f02ea11f117 (patch)
tree4dde66840cd57b47c01f7c350ad545347f4e2178 /tools/perf/util/machine.h
parentf988e71bc6220d8b404dbd43c0e0962e30305795 (diff)
perf top: Implement multithreading for perf_event__synthesize_threads
The proc files which is sorted with alphabetical order are evenly assigned to several synthesize threads to be processed in parallel. For 'perf top', the threads number hard code to online CPU number. The following patch will introduce an option to set it. For other perf tools, the thread number is 1. Because the process function is not ready for multithreading, e.g. process_synthesized_event. This patch series only support event synthesize multithreading for 'perf top'. For other tools, it can be done separately later. With multithread applied, the total processing time can get up to 1.56x speedup on Knights Mill for 'perf top'. For specific single event processing, the processing time could increase because of the lock contention. So proc_map_timeout may need to be increased. Otherwise some proc maps will be truncated. Based on my test, increasing the proc_map_timeout has small impact on the total processing time. The total processing time still get 1.49x speedup on Knights Mill after increasing the proc_map_timeout. The patch itself doesn't increase the proc_map_timeout. Doesn't need to implement multithreading for per task monitoring, perf_event__synthesize_thread_map. It doesn't have performance issue. Committer testing: # getconf _NPROCESSORS_ONLN 4 # perf trace --no-inherit -e clone -o /tmp/output perf top # tail -4 /tmp/bla 0.124 ( 0.041 ms): clone(flags: VM|FS|FILES|SIGHAND|THREAD|SYSVSEM|SETTLS|PARENT_SETTID|CHILD_CLEARTID, child_stack: 0x7fc3eb3a8f30, parent_tidptr: 0x7fc3eb3a99d0, child_tidptr: 0x7fc3eb3a99d0, tls: 0x7fc3eb3a9700) = 9548 (perf) 0.246 ( 0.023 ms): clone(flags: VM|FS|FILES|SIGHAND|THREAD|SYSVSEM|SETTLS|PARENT_SETTID|CHILD_CLEARTID, child_stack: 0x7fc3eaba7f30, parent_tidptr: 0x7fc3eaba89d0, child_tidptr: 0x7fc3eaba89d0, tls: 0x7fc3eaba8700) = 9549 (perf) 0.286 ( 0.019 ms): clone(flags: VM|FS|FILES|SIGHAND|THREAD|SYSVSEM|SETTLS|PARENT_SETTID|CHILD_CLEARTID, child_stack: 0x7fc3ea3a6f30, parent_tidptr: 0x7fc3ea3a79d0, child_tidptr: 0x7fc3ea3a79d0, tls: 0x7fc3ea3a7700) = 9550 (perf) 246.540 ( 0.047 ms): clone(flags: VM|FS|FILES|SIGHAND|THREAD|SYSVSEM|SETTLS|PARENT_SETTID|CHILD_CLEARTID, child_stack: 0x7fc3ea3a6f30, parent_tidptr: 0x7fc3ea3a79d0, child_tidptr: 0x7fc3ea3a79d0, tls: 0x7fc3ea3a7700) = 9551 (perf) # Signed-off-by: Kan Liang <kan.liang@intel.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Andi Kleen <ak@linux.intel.com> Cc: He Kuang <hekuang@huawei.com> Cc: Lukasz Odzioba <lukasz.odzioba@intel.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/r/1506696477-146932-4-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/util/machine.h')
-rw-r--r--tools/perf/util/machine.h9
1 files changed, 6 insertions, 3 deletions
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index b1cd516f2025..c6a299ea506c 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -257,15 +257,18 @@ int machines__for_each_thread(struct machines *machines,
int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
struct target *target, struct thread_map *threads,
perf_event__handler_t process, bool data_mmap,
- unsigned int proc_map_timeout);
+ unsigned int proc_map_timeout,
+ unsigned int nr_threads_synthesize);
static inline
int machine__synthesize_threads(struct machine *machine, struct target *target,
struct thread_map *threads, bool data_mmap,
- unsigned int proc_map_timeout)
+ unsigned int proc_map_timeout,
+ unsigned int nr_threads_synthesize)
{
return __machine__synthesize_threads(machine, NULL, target, threads,
perf_event__process, data_mmap,
- proc_map_timeout);
+ proc_map_timeout,
+ nr_threads_synthesize);
}
pid_t machine__get_current_tid(struct machine *machine, int cpu);