3 files changed, 412 insertions, 2 deletions
diff --git a/tools/perf/Documentation/perf-kwork.txt b/tools/perf/Documentation/perf-kwork.txt
index 2092ab916ea9..0601fcb0feea 100644
--- a/tools/perf/Documentation/perf-kwork.txt
+++ b/tools/perf/Documentation/perf-kwork.txt
@@ -8,7 +8,7 @@ perf-kwork - Tool to trace/measure kernel work properties (latencies)
 SYNOPSIS
 --------
 [verse]
-'perf kwork' {record|report|latency|timehist}
+'perf kwork' {record|report|latency|timehist|top}
 
 DESCRIPTION
 -----------
@@ -23,6 +23,8 @@ There are several variants of 'perf kwork':
 
   'perf kwork timehist' provides an analysis of kernel work events.
 
+  'perf kwork top' to report the task cpu usage.
+
     Example usage:
         perf kwork record -- sleep 1
         perf kwork report
@@ -30,6 +32,7 @@ There are several variants of 'perf kwork':
         perf kwork latency
         perf kwork latency -b
         perf kwork timehist
+        perf kwork top
 
    By default it shows the individual work events such as irq, workqeueu,
    including the run time and delay (time between raise and actually entry):
diff --git a/tools/perf/builtin-kwork.c b/tools/perf/builtin-kwork.c
index 7d93951c2ed2..1b213af59135 100644
--- a/tools/perf/builtin-kwork.c
+++ b/tools/perf/builtin-kwork.c
@@ -45,6 +45,11 @@
 #define PRINT_BRACKETPAIR_WIDTH 2
 #define PRINT_TIME_UNIT_SEC_WIDTH 2
 #define PRINT_TIME_UNIT_MESC_WIDTH 3
+#define PRINT_PID_WIDTH 7
+#define PRINT_TASK_NAME_WIDTH 16
+#define PRINT_CPU_USAGE_WIDTH 6
+#define PRINT_CPU_USAGE_DECIMAL_WIDTH 2
+#define PRINT_CPU_USAGE_HIST_WIDTH 30
 #define PRINT_RUNTIME_HEADER_WIDTH (PRINT_RUNTIME_WIDTH + PRINT_TIME_UNIT_MESC_WIDTH)
 #define PRINT_LATENCY_HEADER_WIDTH (PRINT_LATENCY_WIDTH + PRINT_TIME_UNIT_MESC_WIDTH)
 #define PRINT_TIMEHIST_CPU_WIDTH (PRINT_CPU_WIDTH + PRINT_BRACKETPAIR_WIDTH)
@@ -131,6 +136,16 @@ static int max_latency_cmp(struct kwork_work *l, struct kwork_work *r)
 	return 0;
 }
 
+static int cpu_usage_cmp(struct kwork_work *l, struct kwork_work *r)
+{
+	if (l->cpu_usage > r->cpu_usage)
+		return 1;
+	if (l->cpu_usage < r->cpu_usage)
+		return -1;
+
+	return 0;
+}
+
 static int sort_dimension__add(struct perf_kwork *kwork __maybe_unused,
 			       const char *tok, struct list_head *list)
 {
@@ -155,12 +170,17 @@ static int sort_dimension__add(struct perf_kwork *kwork __maybe_unused,
 		.name = "avg",
 		.cmp  = avg_latency_cmp,
 	};
+	static struct sort_dimension rate_sort_dimension = {
+		.name = "rate",
+		.cmp  = cpu_usage_cmp,
+	};
 	struct sort_dimension *available_sorts[] = {
 		&id_sort_dimension,
 		&max_sort_dimension,
 		&count_sort_dimension,
 		&runtime_sort_dimension,
 		&avg_sort_dimension,
+		&rate_sort_dimension,
 	};
 
 	if (kwork->report == KWORK_REPORT_LATENCY)
@@ -485,6 +505,38 @@ static struct kwork_atom *work_pop_atom(struct perf_kwork *kwork,
 	return NULL;
 }
 
+static struct kwork_work *find_work_by_id(struct rb_root_cached *root,
+					  u64 id, int cpu)
+{
+	struct rb_node *next;
+	struct kwork_work *work;
+
+	next = rb_first_cached(root);
+	while (next) {
+		work = rb_entry(next, struct kwork_work, node);
+		if ((cpu != -1 && work->id == id && work->cpu == cpu) ||
+		    (cpu == -1 && work->id == id))
+			return work;
+
+		next = rb_next(next);
+	}
+
+	return NULL;
+}
+
+static struct kwork_class *get_kwork_class(struct perf_kwork *kwork,
+					   enum kwork_class_type type)
+{
+	struct kwork_class *class;
+
+	list_for_each_entry(class, &kwork->class_list, list) {
+		if (class->type == type)
+			return class;
+	}
+
+	return NULL;
+}
+
 static void report_update_exit_event(struct kwork_work *work,
 				     struct kwork_atom *atom,
 				     struct perf_sample *sample)
@@ -789,6 +841,54 @@ out:
 	return ret;
 }
 
+static void top_update_runtime(struct kwork_work *work,
+			       struct kwork_atom *atom,
+			       struct perf_sample *sample)
+{
+	u64 delta;
+	u64 exit_time = sample->time;
+	u64 entry_time = atom->time;
+
+	if ((entry_time != 0) && (exit_time >= entry_time)) {
+		delta = exit_time - entry_time;
+		work->total_runtime += delta;
+	}
+}
+
+static int top_entry_event(struct perf_kwork *kwork,
+			   struct kwork_class *class,
+			   struct evsel *evsel,
+			   struct perf_sample *sample,
+			   struct machine *machine)
+{
+	return work_push_atom(kwork, class, KWORK_TRACE_ENTRY,
+			      KWORK_TRACE_MAX, evsel, sample,
+			      machine, NULL, true);
+}
+
+static int top_sched_switch_event(struct perf_kwork *kwork,
+				  struct kwork_class *class,
+				  struct evsel *evsel,
+				  struct perf_sample *sample,
+				  struct machine *machine)
+{
+	struct kwork_atom *atom;
+	struct kwork_work *work;
+
+	atom = work_pop_atom(kwork, class, KWORK_TRACE_EXIT,
+			     KWORK_TRACE_ENTRY, evsel, sample,
+			     machine, &work);
+	if (!work)
+		return -1;
+
+	if (atom) {
+		top_update_runtime(work, atom, sample);
+		atom_del(atom);
+	}
+
+	return top_entry_event(kwork, class, evsel, sample, machine);
+}
+
 static struct kwork_class kwork_irq;
 static int process_irq_handler_entry_event(struct perf_tool *tool,
 					   struct evsel *evsel,
@@ -1378,6 +1478,101 @@ static void print_bad_events(struct perf_kwork *kwork)
 	}
 }
 
+const char *graph_load = "||||||||||||||||||||||||||||||||||||||||||||||||";
+const char *graph_idle = "                                                ";
+static void top_print_per_cpu_load(struct perf_kwork *kwork)
+{
+	int i, load_width;
+	u64 total, load, load_ratio;
+	struct kwork_top_stat *stat = &kwork->top_stat;
+
+	for (i = 0; i < MAX_NR_CPUS; i++) {
+		total = stat->cpus_runtime[i].total;
+		load = stat->cpus_runtime[i].load;
+		if (test_bit(i, stat->all_cpus_bitmap) && total) {
+			load_ratio = load * 10000 / total;
+			load_width = PRINT_CPU_USAGE_HIST_WIDTH *
+				load_ratio / 10000;
+
+			printf("%%Cpu%-*d[%.*s%.*s %*.*f%%]\n",
+			       PRINT_CPU_WIDTH, i,
+			       load_width, graph_load,
+			       PRINT_CPU_USAGE_HIST_WIDTH - load_width,
+			       graph_idle,
+			       PRINT_CPU_USAGE_WIDTH,
+			       PRINT_CPU_USAGE_DECIMAL_WIDTH,
+			       (double)load_ratio / 100);
+		}
+	}
+}
+
+static void top_print_cpu_usage(struct perf_kwork *kwork)
+{
+	struct kwork_top_stat *stat = &kwork->top_stat;
+	u64 idle_time = stat->cpus_runtime[MAX_NR_CPUS].idle;
+	int cpus_nr = bitmap_weight(stat->all_cpus_bitmap, MAX_NR_CPUS);
+	u64 cpus_total_time = stat->cpus_runtime[MAX_NR_CPUS].total;
+
+	printf("Total  : %*.*f ms, %d cpus\n",
+	       PRINT_RUNTIME_WIDTH, RPINT_DECIMAL_WIDTH,
+	       (double)cpus_total_time / NSEC_PER_MSEC,
+	       cpus_nr);
+
+	printf("%%Cpu(s): %*.*f%% id\n",
+	       PRINT_CPU_USAGE_WIDTH, PRINT_CPU_USAGE_DECIMAL_WIDTH,
+	       cpus_total_time ? (double)idle_time * 100 / cpus_total_time : 0);
+
+	top_print_per_cpu_load(kwork);
+}
+
+static void top_print_header(struct perf_kwork *kwork __maybe_unused)
+{
+	int ret;
+
+	printf("\n ");
+	ret = printf(" %*s  %*s  %*s  %-*s",
+		     PRINT_PID_WIDTH, "PID",
+		     PRINT_CPU_USAGE_WIDTH, "%CPU",
+		     PRINT_RUNTIME_HEADER_WIDTH + RPINT_DECIMAL_WIDTH, "RUNTIME",
+		     PRINT_TASK_NAME_WIDTH, "COMMMAND");
+	printf("\n ");
+	print_separator(ret);
+}
+
+static int top_print_work(struct perf_kwork *kwork __maybe_unused, struct kwork_work *work)
+{
+	int ret = 0;
+
+	printf(" ");
+
+	/*
+	 * pid
+	 */
+	ret += printf(" %*ld ", PRINT_PID_WIDTH, work->id);
+
+	/*
+	 * cpu usage
+	 */
+	ret += printf(" %*.*f ",
+		      PRINT_CPU_USAGE_WIDTH, PRINT_CPU_USAGE_DECIMAL_WIDTH,
+		      (double)work->cpu_usage / 100);
+
+	/*
+	 * total runtime
+	 */
+	ret += printf(" %*.*f ms ",
+		      PRINT_RUNTIME_WIDTH + RPINT_DECIMAL_WIDTH, RPINT_DECIMAL_WIDTH,
+		      (double)work->total_runtime / NSEC_PER_MSEC);
+
+	/*
+	 * command
+	 */
+	ret += printf(" %-*s", PRINT_TASK_NAME_WIDTH, work->name);
+
+	printf("\n");
+	return ret;
+}
+
 static void work_sort(struct perf_kwork *kwork,
 		      struct kwork_class *class, struct rb_root_cached *root)
 {
@@ -1425,6 +1620,9 @@ static int perf_kwork__check_config(struct perf_kwork *kwork,
 		.entry_event = timehist_entry_event,
 		.exit_event  = timehist_exit_event,
 	};
+	static struct trace_kwork_handler top_ops = {
+		.sched_switch_event = top_sched_switch_event,
+	};
 
 	switch (kwork->report) {
 	case KWORK_REPORT_RUNTIME:
@@ -1436,6 +1634,9 @@ static int perf_kwork__check_config(struct perf_kwork *kwork,
 	case KWORK_REPORT_TIMEHIST:
 		kwork->tp_handler = &timehist_ops;
 		break;
+	case KWORK_REPORT_TOP:
+		kwork->tp_handler = &top_ops;
+		break;
 	default:
 		pr_debug("Invalid report type %d\n", kwork->report);
 		return -1;
@@ -1682,6 +1883,169 @@ static int perf_kwork__timehist(struct perf_kwork *kwork)
 	return perf_kwork__read_events(kwork);
 }
 
+static void top_calc_total_runtime(struct perf_kwork *kwork)
+{
+	struct kwork_class *class;
+	struct kwork_work *work;
+	struct rb_node *next;
+	struct kwork_top_stat *stat = &kwork->top_stat;
+
+	class = get_kwork_class(kwork, KWORK_CLASS_SCHED);
+	if (!class)
+		return;
+
+	next = rb_first_cached(&class->work_root);
+	while (next) {
+		work = rb_entry(next, struct kwork_work, node);
+		BUG_ON(work->cpu >= MAX_NR_CPUS);
+		stat->cpus_runtime[work->cpu].total += work->total_runtime;
+		stat->cpus_runtime[MAX_NR_CPUS].total += work->total_runtime;
+		next = rb_next(next);
+	}
+}
+
+static void top_calc_idle_time(struct perf_kwork *kwork,
+				struct kwork_work *work)
+{
+	struct kwork_top_stat *stat = &kwork->top_stat;
+
+	if (work->id == 0) {
+		stat->cpus_runtime[work->cpu].idle += work->total_runtime;
+		stat->cpus_runtime[MAX_NR_CPUS].idle += work->total_runtime;
+	}
+}
+
+static void top_calc_cpu_usage(struct perf_kwork *kwork)
+{
+	struct kwork_class *class;
+	struct kwork_work *work;
+	struct rb_node *next;
+	struct kwork_top_stat *stat = &kwork->top_stat;
+
+	class = get_kwork_class(kwork, KWORK_CLASS_SCHED);
+	if (!class)
+		return;
+
+	next = rb_first_cached(&class->work_root);
+	while (next) {
+		work = rb_entry(next, struct kwork_work, node);
+
+		if (work->total_runtime == 0)
+			goto next;
+
+		__set_bit(work->cpu, stat->all_cpus_bitmap);
+
+		work->cpu_usage = work->total_runtime * 10000 /
+			stat->cpus_runtime[work->cpu].total;
+
+		top_calc_idle_time(kwork, work);
+next:
+		next = rb_next(next);
+	}
+}
+
+static void top_calc_load_runtime(struct perf_kwork *kwork,
+				  struct kwork_work *work)
+{
+	struct kwork_top_stat *stat = &kwork->top_stat;
+
+	if (work->id != 0) {
+		stat->cpus_runtime[work->cpu].load += work->total_runtime;
+		stat->cpus_runtime[MAX_NR_CPUS].load += work->total_runtime;
+	}
+}
+
+static void top_merge_tasks(struct perf_kwork *kwork)
+{
+	struct kwork_work *merged_work, *data;
+	struct kwork_class *class;
+	struct rb_node *node;
+	int cpu;
+	struct rb_root_cached merged_root = RB_ROOT_CACHED;
+
+	class = get_kwork_class(kwork, KWORK_CLASS_SCHED);
+	if (!class)
+		return;
+
+	for (;;) {
+		node = rb_first_cached(&class->work_root);
+		if (!node)
+			break;
+
+		rb_erase_cached(node, &class->work_root);
+		data = rb_entry(node, struct kwork_work, node);
+
+		cpu = data->cpu;
+		merged_work = find_work_by_id(&merged_root, data->id,
+					      data->id == 0 ? cpu : -1);
+		if (!merged_work) {
+			work_insert(&merged_root, data, &kwork->cmp_id);
+		} else {
+			merged_work->total_runtime += data->total_runtime;
+			merged_work->cpu_usage += data->cpu_usage;
+		}
+
+		top_calc_load_runtime(kwork, data);
+	}
+
+	work_sort(kwork, class, &merged_root);
+}
+
+static void perf_kwork__top_report(struct perf_kwork *kwork)
+{
+	struct kwork_work *work;
+	struct rb_node *next;
+
+	printf("\n");
+
+	top_print_cpu_usage(kwork);
+	top_print_header(kwork);
+	next = rb_first_cached(&kwork->sorted_work_root);
+	while (next) {
+		work = rb_entry(next, struct kwork_work, node);
+		process_skipped_events(kwork, work);
+
+		if (work->total_runtime == 0)
+			goto next;
+
+		top_print_work(kwork, work);
+
+next:
+		next = rb_next(next);
+	}
+
+	printf("\n");
+}
+
+static int perf_kwork__top(struct perf_kwork *kwork)
+{
+	struct __top_cpus_runtime *cpus_runtime;
+	int ret = 0;
+
+	cpus_runtime = zalloc(sizeof(struct __top_cpus_runtime) * (MAX_NR_CPUS + 1));
+	if (!cpus_runtime)
+		return -1;
+
+	kwork->top_stat.cpus_runtime = cpus_runtime;
+	bitmap_zero(kwork->top_stat.all_cpus_bitmap, MAX_NR_CPUS);
+
+	ret = perf_kwork__read_events(kwork);
+	if (ret)
+		goto out;
+
+	top_calc_total_runtime(kwork);
+	top_calc_cpu_usage(kwork);
+	top_merge_tasks(kwork);
+
+	setup_pager();
+
+	perf_kwork__top_report(kwork);
+
+out:
+	free(kwork->top_stat.cpus_runtime);
+	return ret;
+}
+
 static void setup_event_list(struct perf_kwork *kwork,
 			     const struct option *options,
 			     const char * const usage_msg[])
@@ -1801,6 +2165,7 @@ int cmd_kwork(int argc, const char **argv)
 	};
 	static const char default_report_sort_order[] = "runtime, max, count";
 	static const char default_latency_sort_order[] = "avg, max, count";
+	static const char default_top_sort_order[] = "rate, runtime";
 	const struct option kwork_options[] = {
 	OPT_INCR('v', "verbose", &verbose,
 		 "be more verbose (show symbol address, etc)"),
@@ -1868,6 +2233,9 @@ int cmd_kwork(int argc, const char **argv)
 		   "input file name"),
 	OPT_PARENT(kwork_options)
 	};
+	const struct option top_options[] = {
+	OPT_PARENT(kwork_options)
+	};
 	const char *kwork_usage[] = {
 		NULL,
 		NULL
@@ -1884,8 +2252,12 @@ int cmd_kwork(int argc, const char **argv)
 		"perf kwork timehist [<options>]",
 		NULL
 	};
+	const char * const top_usage[] = {
+		"perf kwork top [<options>]",
+		NULL
+	};
 	const char *const kwork_subcommands[] = {
-		"record", "report", "latency", "timehist", NULL
+		"record", "report", "latency", "timehist", "top", NULL
 	};
 
 	argc = parse_options_subcommand(argc, argv, kwork_options,
@@ -1930,6 +2302,19 @@ int cmd_kwork(int argc, const char **argv)
 		kwork.report = KWORK_REPORT_TIMEHIST;
 		setup_event_list(&kwork, kwork_options, kwork_usage);
 		return perf_kwork__timehist(&kwork);
+	} else if (strlen(argv[0]) > 2 && strstarts("top", argv[0])) {
+		kwork.sort_order = default_top_sort_order;
+		if (argc > 1) {
+			argc = parse_options(argc, argv, top_options, top_usage, 0);
+			if (argc)
+				usage_with_options(top_usage, top_options);
+		}
+		kwork.report = KWORK_REPORT_TOP;
+		if (!kwork.event_list_str)
+			kwork.event_list_str = "sched";
+		setup_event_list(&kwork, kwork_options, kwork_usage);
+		setup_sorting(&kwork, top_options, top_usage);
+		return perf_kwork__top(&kwork);
 	} else
 		usage_with_options(kwork_usage, kwork_options);
 
diff --git a/tools/perf/util/kwork.h b/tools/perf/util/kwork.h
index f8e9cdd1371a..41ed193d5d8b 100644
--- a/tools/perf/util/kwork.h
+++ b/tools/perf/util/kwork.h
@@ -24,6 +24,7 @@ enum kwork_report_type {
 	KWORK_REPORT_RUNTIME,
 	KWORK_REPORT_LATENCY,
 	KWORK_REPORT_TIMEHIST,
+	KWORK_REPORT_TOP,
 };
 
 enum kwork_trace_type {
@@ -129,6 +130,11 @@ struct kwork_work {
 	u64 max_latency_start;
 	u64 max_latency_end;
 	u64 total_latency;
+
+	/*
+	 * top report
+	 */
+	u32 cpu_usage;
 };
 
 struct kwork_class {
@@ -174,6 +180,17 @@ struct trace_kwork_handler {
 				  struct perf_sample *sample, struct machine *machine);
 };
 
+struct __top_cpus_runtime {
+	u64 load;
+	u64 idle;
+	u64 total;
+};
+
+struct kwork_top_stat {
+	DECLARE_BITMAP(all_cpus_bitmap, MAX_NR_CPUS);
+	struct __top_cpus_runtime *cpus_runtime;
+};
+
 struct perf_kwork {
 	/*
 	 * metadata
@@ -225,6 +242,11 @@ struct perf_kwork {
 	u64 all_runtime;
 	u64 all_count;
 	u64 nr_skipped_events[KWORK_TRACE_MAX + 1];
+
+	/*
+	 * perf kwork top data
+	 */
+	struct kwork_top_stat top_stat;
 };
 
 struct kwork_work *perf_kwork_add_work(struct perf_kwork *kwork,