From 2e76c24d72372db35f226a49c2b99d0fd8cfd400 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 29 Mar 2013 14:36:31 +0800 Subject: sched: Split cpuacct code out of core.c Signed-off-by: Li Zefan Acked-by: Peter Zijlstra Link: http://lkml.kernel.org/r/5155366F.5060404@huawei.com Signed-off-by: Ingo Molnar --- kernel/sched/cpuacct.c | 227 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 227 insertions(+) create mode 100644 kernel/sched/cpuacct.c (limited to 'kernel/sched/cpuacct.c') diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c new file mode 100644 index 000000000000..50ec24b6193d --- /dev/null +++ b/kernel/sched/cpuacct.c @@ -0,0 +1,227 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sched.h" + +/* + * CPU accounting code for task groups. + * + * Based on the work by Paul Menage (menage@google.com) and Balbir Singh + * (balbir@in.ibm.com). + */ + +struct cpuacct root_cpuacct; + +/* create a new cpu accounting group */ +static struct cgroup_subsys_state *cpuacct_css_alloc(struct cgroup *cgrp) +{ + struct cpuacct *ca; + + if (!cgrp->parent) + return &root_cpuacct.css; + + ca = kzalloc(sizeof(*ca), GFP_KERNEL); + if (!ca) + goto out; + + ca->cpuusage = alloc_percpu(u64); + if (!ca->cpuusage) + goto out_free_ca; + + ca->cpustat = alloc_percpu(struct kernel_cpustat); + if (!ca->cpustat) + goto out_free_cpuusage; + + return &ca->css; + +out_free_cpuusage: + free_percpu(ca->cpuusage); +out_free_ca: + kfree(ca); +out: + return ERR_PTR(-ENOMEM); +} + +/* destroy an existing cpu accounting group */ +static void cpuacct_css_free(struct cgroup *cgrp) +{ + struct cpuacct *ca = cgroup_ca(cgrp); + + free_percpu(ca->cpustat); + free_percpu(ca->cpuusage); + kfree(ca); +} + +static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu) +{ + u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); + u64 data; + +#ifndef CONFIG_64BIT + /* + * Take rq->lock to make 64-bit read safe on 32-bit platforms. + */ + raw_spin_lock_irq(&cpu_rq(cpu)->lock); + data = *cpuusage; + raw_spin_unlock_irq(&cpu_rq(cpu)->lock); +#else + data = *cpuusage; +#endif + + return data; +} + +static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) +{ + u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); + +#ifndef CONFIG_64BIT + /* + * Take rq->lock to make 64-bit write safe on 32-bit platforms. + */ + raw_spin_lock_irq(&cpu_rq(cpu)->lock); + *cpuusage = val; + raw_spin_unlock_irq(&cpu_rq(cpu)->lock); +#else + *cpuusage = val; +#endif +} + +/* return total cpu usage (in nanoseconds) of a group */ +static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft) +{ + struct cpuacct *ca = cgroup_ca(cgrp); + u64 totalcpuusage = 0; + int i; + + for_each_present_cpu(i) + totalcpuusage += cpuacct_cpuusage_read(ca, i); + + return totalcpuusage; +} + +static int cpuusage_write(struct cgroup *cgrp, struct cftype *cftype, + u64 reset) +{ + struct cpuacct *ca = cgroup_ca(cgrp); + int err = 0; + int i; + + if (reset) { + err = -EINVAL; + goto out; + } + + for_each_present_cpu(i) + cpuacct_cpuusage_write(ca, i, 0); + +out: + return err; +} + +static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft, + struct seq_file *m) +{ + struct cpuacct *ca = cgroup_ca(cgroup); + u64 percpu; + int i; + + for_each_present_cpu(i) { + percpu = cpuacct_cpuusage_read(ca, i); + seq_printf(m, "%llu ", (unsigned long long) percpu); + } + seq_printf(m, "\n"); + return 0; +} + +static const char * const cpuacct_stat_desc[] = { + [CPUACCT_STAT_USER] = "user", + [CPUACCT_STAT_SYSTEM] = "system", +}; + +static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft, + struct cgroup_map_cb *cb) +{ + struct cpuacct *ca = cgroup_ca(cgrp); + int cpu; + s64 val = 0; + + for_each_online_cpu(cpu) { + struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); + val += kcpustat->cpustat[CPUTIME_USER]; + val += kcpustat->cpustat[CPUTIME_NICE]; + } + val = cputime64_to_clock_t(val); + cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_USER], val); + + val = 0; + for_each_online_cpu(cpu) { + struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); + val += kcpustat->cpustat[CPUTIME_SYSTEM]; + val += kcpustat->cpustat[CPUTIME_IRQ]; + val += kcpustat->cpustat[CPUTIME_SOFTIRQ]; + } + + val = cputime64_to_clock_t(val); + cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val); + + return 0; +} + +static struct cftype files[] = { + { + .name = "usage", + .read_u64 = cpuusage_read, + .write_u64 = cpuusage_write, + }, + { + .name = "usage_percpu", + .read_seq_string = cpuacct_percpu_seq_read, + }, + { + .name = "stat", + .read_map = cpuacct_stats_show, + }, + { } /* terminate */ +}; + +/* + * charge this task's execution time to its accounting group. + * + * called with rq->lock held. + */ +void cpuacct_charge(struct task_struct *tsk, u64 cputime) +{ + struct cpuacct *ca; + int cpu; + + if (unlikely(!cpuacct_subsys.active)) + return; + + cpu = task_cpu(tsk); + + rcu_read_lock(); + + ca = task_ca(tsk); + + for (; ca; ca = parent_ca(ca)) { + u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); + *cpuusage += cputime; + } + + rcu_read_unlock(); +} + +struct cgroup_subsys cpuacct_subsys = { + .name = "cpuacct", + .css_alloc = cpuacct_css_alloc, + .css_free = cpuacct_css_free, + .subsys_id = cpuacct_subsys_id, + .base_cftypes = files, +}; -- cgit From dbe4b41f9800223949ce72e4289814697e0ea91a Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 29 Mar 2013 14:36:55 +0800 Subject: sched/cpuacct: Add cpuacct_init() So we don't open-coded initialization of cpuacct in core.c. Signed-off-by: Li Zefan Acked-by: Peter Zijlstra Link: http://lkml.kernel.org/r/51553687.1060906@huawei.com Signed-off-by: Ingo Molnar --- kernel/sched/cpuacct.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel/sched/cpuacct.c') diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c index 50ec24b6193d..48b5e9184dcc 100644 --- a/kernel/sched/cpuacct.c +++ b/kernel/sched/cpuacct.c @@ -218,6 +218,13 @@ void cpuacct_charge(struct task_struct *tsk, u64 cputime) rcu_read_unlock(); } +void __init cpuacct_init(void) +{ + root_cpuacct.cpustat = &kernel_cpustat; + root_cpuacct.cpuusage = alloc_percpu(u64); + BUG_ON(!root_cpuacct.cpuusage); /* Too early, not expected to fail */ +} + struct cgroup_subsys cpuacct_subsys = { .name = "cpuacct", .css_alloc = cpuacct_css_alloc, -- cgit From 1966aaf7d54608e8ddb7ac454b461840e763409a Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 29 Mar 2013 14:37:06 +0800 Subject: sched/cpuacct: Add cpuacct_acount_field() So we can remove open-coded cpuacct code in cputime.c. Signed-off-by: Li Zefan Acked-by: Peter Zijlstra Link: http://lkml.kernel.org/r/51553692.9060008@huawei.com Signed-off-by: Ingo Molnar --- kernel/sched/cpuacct.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'kernel/sched/cpuacct.c') diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c index 48b5e9184dcc..72bd971ea377 100644 --- a/kernel/sched/cpuacct.c +++ b/kernel/sched/cpuacct.c @@ -218,6 +218,29 @@ void cpuacct_charge(struct task_struct *tsk, u64 cputime) rcu_read_unlock(); } +/* + * Add user/system time to cpuacct. + * + * Note: it's the caller that updates the account of the root cgroup. + */ +void cpuacct_account_field(struct task_struct *p, int index, u64 val) +{ + struct kernel_cpustat *kcpustat; + struct cpuacct *ca; + + if (unlikely(!cpuacct_subsys.active)) + return; + + rcu_read_lock(); + ca = task_ca(p); + while (ca && (ca != &root_cpuacct)) { + kcpustat = this_cpu_ptr(ca->cpustat); + kcpustat->cpustat[index] += val; + ca = parent_ca(ca); + } + rcu_read_unlock(); +} + void __init cpuacct_init(void) { root_cpuacct.cpustat = &kernel_cpustat; -- cgit From 543bc0e76e6bb84300eaf9833edc5a481f788678 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 29 Mar 2013 14:37:29 +0800 Subject: sched/cpuacct: Remove redundant NULL checks in cpuacct_charge() This is a micro optimization for the hot path. - We don't need to check if @ca is NULL in parent_ca(). - We don't need to check if @ca is NULL in the beginning of the for loop. Signed-off-by: Li Zefan Acked-by: Peter Zijlstra Link: http://lkml.kernel.org/r/515536A9.5000700@huawei.com Signed-off-by: Ingo Molnar --- kernel/sched/cpuacct.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel/sched/cpuacct.c') diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c index 72bd971ea377..b2aaaba16d46 100644 --- a/kernel/sched/cpuacct.c +++ b/kernel/sched/cpuacct.c @@ -210,9 +210,13 @@ void cpuacct_charge(struct task_struct *tsk, u64 cputime) ca = task_ca(tsk); - for (; ca; ca = parent_ca(ca)) { + while (true) { u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); *cpuusage += cputime; + + ca = parent_ca(ca); + if (!ca) + break; } rcu_read_unlock(); -- cgit From 5f40d804325e925409907e29f46ecb012090b6c2 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 29 Mar 2013 14:37:43 +0800 Subject: sched/cpuacct: Remove redundant NULL checks in cpuacct_acount_field() This is a micro optimazation for a hot path. - We don't need to check if @ca returned from task_ca() is NULL. - We don't need to check if @ca returned from parent_ca() is NULL. Signed-off-by: Li Zefan Acked-by: Peter Zijlstra Link: http://lkml.kernel.org/r/515536B7.6060602@huawei.com Signed-off-by: Ingo Molnar --- kernel/sched/cpuacct.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/sched/cpuacct.c') diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c index b2aaaba16d46..071ae8d08181 100644 --- a/kernel/sched/cpuacct.c +++ b/kernel/sched/cpuacct.c @@ -237,10 +237,10 @@ void cpuacct_account_field(struct task_struct *p, int index, u64 val) rcu_read_lock(); ca = task_ca(p); - while (ca && (ca != &root_cpuacct)) { + while (ca != &root_cpuacct) { kcpustat = this_cpu_ptr(ca->cpustat); kcpustat->cpustat[index] += val; - ca = parent_ca(ca); + ca = __parent_ca(ca); } rcu_read_unlock(); } -- cgit From d1712796a880bea0a44739941116001923f3275b Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 29 Mar 2013 14:38:13 +0800 Subject: sched/cpuacct: Clean up cpuacct.h Now most of the code in cpuacct.h can be moved to cpuacct.c Signed-off-by: Li Zefan Acked-by: Peter Zijlstra Link: http://lkml.kernel.org/r/515536D5.2080401@huawei.com Signed-off-by: Ingo Molnar --- kernel/sched/cpuacct.c | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) (limited to 'kernel/sched/cpuacct.c') diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c index 071ae8d08181..9305fd2f8cf9 100644 --- a/kernel/sched/cpuacct.c +++ b/kernel/sched/cpuacct.c @@ -16,7 +16,49 @@ * (balbir@in.ibm.com). */ -struct cpuacct root_cpuacct; +/* Time spent by the tasks of the cpu accounting group executing in ... */ +enum cpuacct_stat_index { + CPUACCT_STAT_USER, /* ... user mode */ + CPUACCT_STAT_SYSTEM, /* ... kernel mode */ + + CPUACCT_STAT_NSTATS, +}; + +/* track cpu usage of a group of tasks and its child groups */ +struct cpuacct { + struct cgroup_subsys_state css; + /* cpuusage holds pointer to a u64-type object on every cpu */ + u64 __percpu *cpuusage; + struct kernel_cpustat __percpu *cpustat; +}; + +/* return cpu accounting group corresponding to this container */ +static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp) +{ + return container_of(cgroup_subsys_state(cgrp, cpuacct_subsys_id), + struct cpuacct, css); +} + +/* return cpu accounting group to which this task belongs */ +static inline struct cpuacct *task_ca(struct task_struct *tsk) +{ + return container_of(task_subsys_state(tsk, cpuacct_subsys_id), + struct cpuacct, css); +} + +static inline struct cpuacct *__parent_ca(struct cpuacct *ca) +{ + return cgroup_ca(ca->css.cgroup->parent); +} + +static inline struct cpuacct *parent_ca(struct cpuacct *ca) +{ + if (!ca->css.cgroup->parent) + return NULL; + return cgroup_ca(ca->css.cgroup->parent); +} + +static struct cpuacct root_cpuacct; /* create a new cpu accounting group */ static struct cgroup_subsys_state *cpuacct_css_alloc(struct cgroup *cgrp) -- cgit From 7943e15a3e91db78a7a3fbc84e45cf9d1c7c7d23 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 29 Mar 2013 14:43:46 +0800 Subject: sched/cpuacct: Allocate per_cpu cpuusage for root cpuacct statically This is a preparation, so later we can initialize cpuacct earlier. Signed-off-by: Li Zefan Cc: Tejun Heo Acked-by: Peter Zijlstra Link: http://lkml.kernel.org/r/51553822.5000403@huawei.com Signed-off-by: Ingo Molnar --- kernel/sched/cpuacct.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/sched/cpuacct.c') diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c index 9305fd2f8cf9..a691c4dd65be 100644 --- a/kernel/sched/cpuacct.c +++ b/kernel/sched/cpuacct.c @@ -58,6 +58,7 @@ static inline struct cpuacct *parent_ca(struct cpuacct *ca) return cgroup_ca(ca->css.cgroup->parent); } +static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage); static struct cpuacct root_cpuacct; /* create a new cpu accounting group */ @@ -290,8 +291,7 @@ void cpuacct_account_field(struct task_struct *p, int index, u64 val) void __init cpuacct_init(void) { root_cpuacct.cpustat = &kernel_cpustat; - root_cpuacct.cpuusage = alloc_percpu(u64); - BUG_ON(!root_cpuacct.cpuusage); /* Too early, not expected to fail */ + root_cpuacct.cpuusage = &root_cpuacct_cpuusage; } struct cgroup_subsys cpuacct_subsys = { -- cgit From 14c6d3c8a47ced185b6375c4940b5b393f1a294e Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 29 Mar 2013 14:44:04 +0800 Subject: sched/cpuacct: Initialize root cpuacct earlier Now we don't need cpuacct_init(), and instead we just initialize root_cpuacct when it's defined. Signed-off-by: Li Zefan Cc: Tejun Heo Acked-by: Peter Zijlstra Link: http://lkml.kernel.org/r/51553834.9090701@huawei.com Signed-off-by: Ingo Molnar --- kernel/sched/cpuacct.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'kernel/sched/cpuacct.c') diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c index a691c4dd65be..04255814a0ed 100644 --- a/kernel/sched/cpuacct.c +++ b/kernel/sched/cpuacct.c @@ -59,7 +59,10 @@ static inline struct cpuacct *parent_ca(struct cpuacct *ca) } static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage); -static struct cpuacct root_cpuacct; +static struct cpuacct root_cpuacct = { + .cpustat = &kernel_cpustat, + .cpuusage = &root_cpuacct_cpuusage, +}; /* create a new cpu accounting group */ static struct cgroup_subsys_state *cpuacct_css_alloc(struct cgroup *cgrp) @@ -288,12 +291,6 @@ void cpuacct_account_field(struct task_struct *p, int index, u64 val) rcu_read_unlock(); } -void __init cpuacct_init(void) -{ - root_cpuacct.cpustat = &kernel_cpustat; - root_cpuacct.cpuusage = &root_cpuacct_cpuusage; -} - struct cgroup_subsys cpuacct_subsys = { .name = "cpuacct", .css_alloc = cpuacct_css_alloc, -- cgit From 621e2de02403a6f776852c564b79c38bf3cc9032 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 29 Mar 2013 14:44:15 +0800 Subject: sched/cpuacct: Initialize cpuacct subsystem earlier Initialize cpuacct before the scheduler is functioning, so when cpuacct_charge() and cpuacct_account_field() are called, task_ca() won't return NULL. Signed-off-by: Li Zefan Cc: Tejun Heo Acked-by: Peter Zijlstra Link: http://lkml.kernel.org/r/5155383F.8000005@huawei.com Signed-off-by: Ingo Molnar --- kernel/sched/cpuacct.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'kernel/sched/cpuacct.c') diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c index 04255814a0ed..75e46d291f9c 100644 --- a/kernel/sched/cpuacct.c +++ b/kernel/sched/cpuacct.c @@ -292,9 +292,10 @@ void cpuacct_account_field(struct task_struct *p, int index, u64 val) } struct cgroup_subsys cpuacct_subsys = { - .name = "cpuacct", - .css_alloc = cpuacct_css_alloc, - .css_free = cpuacct_css_free, - .subsys_id = cpuacct_subsys_id, - .base_cftypes = files, + .name = "cpuacct", + .css_alloc = cpuacct_css_alloc, + .css_free = cpuacct_css_free, + .subsys_id = cpuacct_subsys_id, + .base_cftypes = files, + .early_init = 1, }; -- cgit From a2b0ae25fc8bfeeb4022b8e847ab811b3c8368d1 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 29 Mar 2013 14:44:28 +0800 Subject: sched/cpuacct: No need to check subsys active state Now we're guaranteed when cpuacct_charge() and cpuacct_account_field() are called, cpuacct has already been properly initialized, so we no longer need those checks. Signed-off-by: Li Zefan Cc: Tejun Heo Acked-by: Peter Zijlstra Link: http://lkml.kernel.org/r/5155384C.7000508@huawei.com Signed-off-by: Ingo Molnar --- kernel/sched/cpuacct.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'kernel/sched/cpuacct.c') diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c index 75e46d291f9c..ef57ab658722 100644 --- a/kernel/sched/cpuacct.c +++ b/kernel/sched/cpuacct.c @@ -247,9 +247,6 @@ void cpuacct_charge(struct task_struct *tsk, u64 cputime) struct cpuacct *ca; int cpu; - if (unlikely(!cpuacct_subsys.active)) - return; - cpu = task_cpu(tsk); rcu_read_lock(); @@ -278,9 +275,6 @@ void cpuacct_account_field(struct task_struct *p, int index, u64 val) struct kernel_cpustat *kcpustat; struct cpuacct *ca; - if (unlikely(!cpuacct_subsys.active)) - return; - rcu_read_lock(); ca = task_ca(p); while (ca != &root_cpuacct) { -- cgit From b329fd5b018ffd64cfef6a2551bb2ca4bbfbacf2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 10 Apr 2013 15:10:50 +0200 Subject: sched/cpuacct/UML: Fix header file dependency bug on the UML build The cpuacct split caused this build failure on UML: kernel/sched/cpuacct.c:94:2: error: implicit declaration of function 'ERR_PTR' Cc: Li Zefan Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched/cpuacct.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/sched/cpuacct.c') diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c index ef57ab658722..dbb7e2cd95eb 100644 --- a/kernel/sched/cpuacct.c +++ b/kernel/sched/cpuacct.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "sched.h" -- cgit