aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBharata B Rao <bharata@linux.vnet.ibm.com>2009-03-31 00:32:22 -0400
committerIngo Molnar <mingo@elte.hu>2009-04-01 10:49:38 -0400
commitef12fefabf94b6a902ad3abd3eb124b00560c445 (patch)
tree07a23161b84b11379691ab36e6c44464d801b9ac
parentc5f8d99585d7b5b7e857fabf8aefd0174903a98c (diff)
cpuacct: add per-cgroup utime/stime statistics
Add per-cgroup cpuacct controller statistics like the system and user time consumed by the group of tasks. Changelog: v7 - Changed the name of the statistic from utime to user and from stime to system so that in future we could easily add other statistics like irq, softirq, steal times etc easily. v6 - Fixed a bug in the error path of cpuacct_create() (pointed by Li Zefan). v5 - In cpuacct_stats_show(), use cputime64_to_clock_t() since we are operating on a 64bit variable here. v4 - Remove comments in cpuacct_update_stats() which explained why rcu_read_lock() was needed (as per Peter Zijlstra's review comments). - Don't say that percpu_counter_read() is broken in Documentation/cpuacct.txt as per KAMEZAWA Hiroyuki's review comments. v3 - Fix a small race in the cpuacct hierarchy walk. v2 - stime and utime now exported in clock_t units instead of msecs. - Addressed the code review comments from Balbir and Li Zefan. - Moved to -tip tree. v1 - Moved the stime/utime accounting to cpuacct controller. Earlier versions - http://lkml.org/lkml/2009/2/25/129 Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> Signed-off-by: Balaji Rao <balajirrao@gmail.com> Cc: Dhaval Giani <dhaval@linux.vnet.ibm.com> Cc: Paul Menage <menage@google.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Reviewed-by: Li Zefan <lizf@cn.fujitsu.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com> Tested-by: Balbir Singh <balbir@linux.vnet.ibm.com> LKML-Reference: <20090331043222.GA4093@in.ibm.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--Documentation/cgroups/cpuacct.txt18
-rw-r--r--kernel/sched.c87
2 files changed, 99 insertions, 6 deletions
diff --git a/Documentation/cgroups/cpuacct.txt b/Documentation/cgroups/cpuacct.txt
index bb775fbe43d7..8b930946c52a 100644
--- a/Documentation/cgroups/cpuacct.txt
+++ b/Documentation/cgroups/cpuacct.txt
@@ -30,3 +30,21 @@ The above steps create a new group g1 and move the current shell
30process (bash) into it. CPU time consumed by this bash and its children 30process (bash) into it. CPU time consumed by this bash and its children
31can be obtained from g1/cpuacct.usage and the same is accumulated in 31can be obtained from g1/cpuacct.usage and the same is accumulated in
32/cgroups/cpuacct.usage also. 32/cgroups/cpuacct.usage also.
33
34cpuacct.stat file lists a few statistics which further divide the
35CPU time obtained by the cgroup into user and system times. Currently
36the following statistics are supported:
37
38user: Time spent by tasks of the cgroup in user mode.
39system: Time spent by tasks of the cgroup in kernel mode.
40
41user and system are in USER_HZ unit.
42
43cpuacct controller uses percpu_counter interface to collect user and
44system times. This has two side effects:
45
46- It is theoretically possible to see wrong values for user and system times.
47 This is because percpu_counter_read() on 32bit systems isn't safe
48 against concurrent writes.
49- It is possible to see slightly outdated values for user and system times
50 due to the batch processing nature of percpu_counter.
diff --git a/kernel/sched.c b/kernel/sched.c
index c8d7f17bd036..8d1bdbe8aafc 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1393,10 +1393,22 @@ iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
1393 struct rq_iterator *iterator); 1393 struct rq_iterator *iterator);
1394#endif 1394#endif
1395 1395
1396/* Time spent by the tasks of the cpu accounting group executing in ... */
1397enum cpuacct_stat_index {
1398 CPUACCT_STAT_USER, /* ... user mode */
1399 CPUACCT_STAT_SYSTEM, /* ... kernel mode */
1400
1401 CPUACCT_STAT_NSTATS,
1402};
1403
1396#ifdef CONFIG_CGROUP_CPUACCT 1404#ifdef CONFIG_CGROUP_CPUACCT
1397static void cpuacct_charge(struct task_struct *tsk, u64 cputime); 1405static void cpuacct_charge(struct task_struct *tsk, u64 cputime);
1406static void cpuacct_update_stats(struct task_struct *tsk,
1407 enum cpuacct_stat_index idx, cputime_t val);
1398#else 1408#else
1399static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} 1409static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
1410static inline void cpuacct_update_stats(struct task_struct *tsk,
1411 enum cpuacct_stat_index idx, cputime_t val) {}
1400#endif 1412#endif
1401 1413
1402static inline void inc_cpu_load(struct rq *rq, unsigned long load) 1414static inline void inc_cpu_load(struct rq *rq, unsigned long load)
@@ -4236,6 +4248,8 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
4236 cpustat->nice = cputime64_add(cpustat->nice, tmp); 4248 cpustat->nice = cputime64_add(cpustat->nice, tmp);
4237 else 4249 else
4238 cpustat->user = cputime64_add(cpustat->user, tmp); 4250 cpustat->user = cputime64_add(cpustat->user, tmp);
4251
4252 cpuacct_update_stats(p, CPUACCT_STAT_USER, cputime);
4239 /* Account for user time used */ 4253 /* Account for user time used */
4240 acct_update_integrals(p); 4254 acct_update_integrals(p);
4241} 4255}
@@ -4297,6 +4311,8 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
4297 else 4311 else
4298 cpustat->system = cputime64_add(cpustat->system, tmp); 4312 cpustat->system = cputime64_add(cpustat->system, tmp);
4299 4313
4314 cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime);
4315
4300 /* Account for system time used */ 4316 /* Account for system time used */
4301 acct_update_integrals(p); 4317 acct_update_integrals(p);
4302} 4318}
@@ -9539,6 +9555,7 @@ struct cpuacct {
9539 struct cgroup_subsys_state css; 9555 struct cgroup_subsys_state css;
9540 /* cpuusage holds pointer to a u64-type object on every cpu */ 9556 /* cpuusage holds pointer to a u64-type object on every cpu */
9541 u64 *cpuusage; 9557 u64 *cpuusage;
9558 struct percpu_counter cpustat[CPUACCT_STAT_NSTATS];
9542 struct cpuacct *parent; 9559 struct cpuacct *parent;
9543}; 9560};
9544 9561
@@ -9563,20 +9580,32 @@ static struct cgroup_subsys_state *cpuacct_create(
9563 struct cgroup_subsys *ss, struct cgroup *cgrp) 9580 struct cgroup_subsys *ss, struct cgroup *cgrp)
9564{ 9581{
9565 struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL); 9582 struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
9583 int i;
9566 9584
9567 if (!ca) 9585 if (!ca)
9568 return ERR_PTR(-ENOMEM); 9586 goto out;
9569 9587
9570 ca->cpuusage = alloc_percpu(u64); 9588 ca->cpuusage = alloc_percpu(u64);
9571 if (!ca->cpuusage) { 9589 if (!ca->cpuusage)
9572 kfree(ca); 9590 goto out_free_ca;
9573 return ERR_PTR(-ENOMEM); 9591
9574 } 9592 for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
9593 if (percpu_counter_init(&ca->cpustat[i], 0))
9594 goto out_free_counters;
9575 9595
9576 if (cgrp->parent) 9596 if (cgrp->parent)
9577 ca->parent = cgroup_ca(cgrp->parent); 9597 ca->parent = cgroup_ca(cgrp->parent);
9578 9598
9579 return &ca->css; 9599 return &ca->css;
9600
9601out_free_counters:
9602 while (--i >= 0)
9603 percpu_counter_destroy(&ca->cpustat[i]);
9604 free_percpu(ca->cpuusage);
9605out_free_ca:
9606 kfree(ca);
9607out:
9608 return ERR_PTR(-ENOMEM);
9580} 9609}
9581 9610
9582/* destroy an existing cpu accounting group */ 9611/* destroy an existing cpu accounting group */
@@ -9584,7 +9613,10 @@ static void
9584cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) 9613cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
9585{ 9614{
9586 struct cpuacct *ca = cgroup_ca(cgrp); 9615 struct cpuacct *ca = cgroup_ca(cgrp);
9616 int i;
9587 9617
9618 for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
9619 percpu_counter_destroy(&ca->cpustat[i]);
9588 free_percpu(ca->cpuusage); 9620 free_percpu(ca->cpuusage);
9589 kfree(ca); 9621 kfree(ca);
9590} 9622}
@@ -9671,6 +9703,25 @@ static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft,
9671 return 0; 9703 return 0;
9672} 9704}
9673 9705
9706static const char *cpuacct_stat_desc[] = {
9707 [CPUACCT_STAT_USER] = "user",
9708 [CPUACCT_STAT_SYSTEM] = "system",
9709};
9710
9711static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft,
9712 struct cgroup_map_cb *cb)
9713{
9714 struct cpuacct *ca = cgroup_ca(cgrp);
9715 int i;
9716
9717 for (i = 0; i < CPUACCT_STAT_NSTATS; i++) {
9718 s64 val = percpu_counter_read(&ca->cpustat[i]);
9719 val = cputime64_to_clock_t(val);
9720 cb->fill(cb, cpuacct_stat_desc[i], val);
9721 }
9722 return 0;
9723}
9724
9674static struct cftype files[] = { 9725static struct cftype files[] = {
9675 { 9726 {
9676 .name = "usage", 9727 .name = "usage",
@@ -9681,7 +9732,10 @@ static struct cftype files[] = {
9681 .name = "usage_percpu", 9732 .name = "usage_percpu",
9682 .read_seq_string = cpuacct_percpu_seq_read, 9733 .read_seq_string = cpuacct_percpu_seq_read,
9683 }, 9734 },
9684 9735 {
9736 .name = "stat",
9737 .read_map = cpuacct_stats_show,
9738 },
9685}; 9739};
9686 9740
9687static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) 9741static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
@@ -9716,6 +9770,27 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
9716 rcu_read_unlock(); 9770 rcu_read_unlock();
9717} 9771}
9718 9772
9773/*
9774 * Charge the system/user time to the task's accounting group.
9775 */
9776static void cpuacct_update_stats(struct task_struct *tsk,
9777 enum cpuacct_stat_index idx, cputime_t val)
9778{
9779 struct cpuacct *ca;
9780
9781 if (unlikely(!cpuacct_subsys.active))
9782 return;
9783
9784 rcu_read_lock();
9785 ca = task_ca(tsk);
9786
9787 do {
9788 percpu_counter_add(&ca->cpustat[idx], val);
9789 ca = ca->parent;
9790 } while (ca);
9791 rcu_read_unlock();
9792}
9793
9719struct cgroup_subsys cpuacct_subsys = { 9794struct cgroup_subsys cpuacct_subsys = {
9720 .name = "cpuacct", 9795 .name = "cpuacct",
9721 .create = cpuacct_create, 9796 .create = cpuacct_create,