diff options
-rw-r--r-- | Documentation/cgroups/cpuacct.txt | 18 | ||||
-rw-r--r-- | kernel/sched.c | 87 |
2 files changed, 99 insertions, 6 deletions
diff --git a/Documentation/cgroups/cpuacct.txt b/Documentation/cgroups/cpuacct.txt index bb775fbe43d7..8b930946c52a 100644 --- a/Documentation/cgroups/cpuacct.txt +++ b/Documentation/cgroups/cpuacct.txt | |||
@@ -30,3 +30,21 @@ The above steps create a new group g1 and move the current shell | |||
30 | process (bash) into it. CPU time consumed by this bash and its children | 30 | process (bash) into it. CPU time consumed by this bash and its children |
31 | can be obtained from g1/cpuacct.usage and the same is accumulated in | 31 | can be obtained from g1/cpuacct.usage and the same is accumulated in |
32 | /cgroups/cpuacct.usage also. | 32 | /cgroups/cpuacct.usage also. |
33 | |||
34 | cpuacct.stat file lists a few statistics which further divide the | ||
35 | CPU time obtained by the cgroup into user and system times. Currently | ||
36 | the following statistics are supported: | ||
37 | |||
38 | user: Time spent by tasks of the cgroup in user mode. | ||
39 | system: Time spent by tasks of the cgroup in kernel mode. | ||
40 | |||
41 | user and system are in USER_HZ unit. | ||
42 | |||
43 | cpuacct controller uses percpu_counter interface to collect user and | ||
44 | system times. This has two side effects: | ||
45 | |||
46 | - It is theoretically possible to see wrong values for user and system times. | ||
47 | This is because percpu_counter_read() on 32bit systems isn't safe | ||
48 | against concurrent writes. | ||
49 | - It is possible to see slightly outdated values for user and system times | ||
50 | due to the batch processing nature of percpu_counter. | ||
diff --git a/kernel/sched.c b/kernel/sched.c index c8d7f17bd036..8d1bdbe8aafc 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -1393,10 +1393,22 @@ iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
1393 | struct rq_iterator *iterator); | 1393 | struct rq_iterator *iterator); |
1394 | #endif | 1394 | #endif |
1395 | 1395 | ||
1396 | /* Time spent by the tasks of the cpu accounting group executing in ... */ | ||
1397 | enum cpuacct_stat_index { | ||
1398 | CPUACCT_STAT_USER, /* ... user mode */ | ||
1399 | CPUACCT_STAT_SYSTEM, /* ... kernel mode */ | ||
1400 | |||
1401 | CPUACCT_STAT_NSTATS, | ||
1402 | }; | ||
1403 | |||
1396 | #ifdef CONFIG_CGROUP_CPUACCT | 1404 | #ifdef CONFIG_CGROUP_CPUACCT |
1397 | static void cpuacct_charge(struct task_struct *tsk, u64 cputime); | 1405 | static void cpuacct_charge(struct task_struct *tsk, u64 cputime); |
1406 | static void cpuacct_update_stats(struct task_struct *tsk, | ||
1407 | enum cpuacct_stat_index idx, cputime_t val); | ||
1398 | #else | 1408 | #else |
1399 | static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} | 1409 | static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} |
1410 | static inline void cpuacct_update_stats(struct task_struct *tsk, | ||
1411 | enum cpuacct_stat_index idx, cputime_t val) {} | ||
1400 | #endif | 1412 | #endif |
1401 | 1413 | ||
1402 | static inline void inc_cpu_load(struct rq *rq, unsigned long load) | 1414 | static inline void inc_cpu_load(struct rq *rq, unsigned long load) |
@@ -4236,6 +4248,8 @@ void account_user_time(struct task_struct *p, cputime_t cputime, | |||
4236 | cpustat->nice = cputime64_add(cpustat->nice, tmp); | 4248 | cpustat->nice = cputime64_add(cpustat->nice, tmp); |
4237 | else | 4249 | else |
4238 | cpustat->user = cputime64_add(cpustat->user, tmp); | 4250 | cpustat->user = cputime64_add(cpustat->user, tmp); |
4251 | |||
4252 | cpuacct_update_stats(p, CPUACCT_STAT_USER, cputime); | ||
4239 | /* Account for user time used */ | 4253 | /* Account for user time used */ |
4240 | acct_update_integrals(p); | 4254 | acct_update_integrals(p); |
4241 | } | 4255 | } |
@@ -4297,6 +4311,8 @@ void account_system_time(struct task_struct *p, int hardirq_offset, | |||
4297 | else | 4311 | else |
4298 | cpustat->system = cputime64_add(cpustat->system, tmp); | 4312 | cpustat->system = cputime64_add(cpustat->system, tmp); |
4299 | 4313 | ||
4314 | cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime); | ||
4315 | |||
4300 | /* Account for system time used */ | 4316 | /* Account for system time used */ |
4301 | acct_update_integrals(p); | 4317 | acct_update_integrals(p); |
4302 | } | 4318 | } |
@@ -9539,6 +9555,7 @@ struct cpuacct { | |||
9539 | struct cgroup_subsys_state css; | 9555 | struct cgroup_subsys_state css; |
9540 | /* cpuusage holds pointer to a u64-type object on every cpu */ | 9556 | /* cpuusage holds pointer to a u64-type object on every cpu */ |
9541 | u64 *cpuusage; | 9557 | u64 *cpuusage; |
9558 | struct percpu_counter cpustat[CPUACCT_STAT_NSTATS]; | ||
9542 | struct cpuacct *parent; | 9559 | struct cpuacct *parent; |
9543 | }; | 9560 | }; |
9544 | 9561 | ||
@@ -9563,20 +9580,32 @@ static struct cgroup_subsys_state *cpuacct_create( | |||
9563 | struct cgroup_subsys *ss, struct cgroup *cgrp) | 9580 | struct cgroup_subsys *ss, struct cgroup *cgrp) |
9564 | { | 9581 | { |
9565 | struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL); | 9582 | struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL); |
9583 | int i; | ||
9566 | 9584 | ||
9567 | if (!ca) | 9585 | if (!ca) |
9568 | return ERR_PTR(-ENOMEM); | 9586 | goto out; |
9569 | 9587 | ||
9570 | ca->cpuusage = alloc_percpu(u64); | 9588 | ca->cpuusage = alloc_percpu(u64); |
9571 | if (!ca->cpuusage) { | 9589 | if (!ca->cpuusage) |
9572 | kfree(ca); | 9590 | goto out_free_ca; |
9573 | return ERR_PTR(-ENOMEM); | 9591 | |
9574 | } | 9592 | for (i = 0; i < CPUACCT_STAT_NSTATS; i++) |
9593 | if (percpu_counter_init(&ca->cpustat[i], 0)) | ||
9594 | goto out_free_counters; | ||
9575 | 9595 | ||
9576 | if (cgrp->parent) | 9596 | if (cgrp->parent) |
9577 | ca->parent = cgroup_ca(cgrp->parent); | 9597 | ca->parent = cgroup_ca(cgrp->parent); |
9578 | 9598 | ||
9579 | return &ca->css; | 9599 | return &ca->css; |
9600 | |||
9601 | out_free_counters: | ||
9602 | while (--i >= 0) | ||
9603 | percpu_counter_destroy(&ca->cpustat[i]); | ||
9604 | free_percpu(ca->cpuusage); | ||
9605 | out_free_ca: | ||
9606 | kfree(ca); | ||
9607 | out: | ||
9608 | return ERR_PTR(-ENOMEM); | ||
9580 | } | 9609 | } |
9581 | 9610 | ||
9582 | /* destroy an existing cpu accounting group */ | 9611 | /* destroy an existing cpu accounting group */ |
@@ -9584,7 +9613,10 @@ static void | |||
9584 | cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) | 9613 | cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) |
9585 | { | 9614 | { |
9586 | struct cpuacct *ca = cgroup_ca(cgrp); | 9615 | struct cpuacct *ca = cgroup_ca(cgrp); |
9616 | int i; | ||
9587 | 9617 | ||
9618 | for (i = 0; i < CPUACCT_STAT_NSTATS; i++) | ||
9619 | percpu_counter_destroy(&ca->cpustat[i]); | ||
9588 | free_percpu(ca->cpuusage); | 9620 | free_percpu(ca->cpuusage); |
9589 | kfree(ca); | 9621 | kfree(ca); |
9590 | } | 9622 | } |
@@ -9671,6 +9703,25 @@ static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft, | |||
9671 | return 0; | 9703 | return 0; |
9672 | } | 9704 | } |
9673 | 9705 | ||
9706 | static const char *cpuacct_stat_desc[] = { | ||
9707 | [CPUACCT_STAT_USER] = "user", | ||
9708 | [CPUACCT_STAT_SYSTEM] = "system", | ||
9709 | }; | ||
9710 | |||
9711 | static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft, | ||
9712 | struct cgroup_map_cb *cb) | ||
9713 | { | ||
9714 | struct cpuacct *ca = cgroup_ca(cgrp); | ||
9715 | int i; | ||
9716 | |||
9717 | for (i = 0; i < CPUACCT_STAT_NSTATS; i++) { | ||
9718 | s64 val = percpu_counter_read(&ca->cpustat[i]); | ||
9719 | val = cputime64_to_clock_t(val); | ||
9720 | cb->fill(cb, cpuacct_stat_desc[i], val); | ||
9721 | } | ||
9722 | return 0; | ||
9723 | } | ||
9724 | |||
9674 | static struct cftype files[] = { | 9725 | static struct cftype files[] = { |
9675 | { | 9726 | { |
9676 | .name = "usage", | 9727 | .name = "usage", |
@@ -9681,7 +9732,10 @@ static struct cftype files[] = { | |||
9681 | .name = "usage_percpu", | 9732 | .name = "usage_percpu", |
9682 | .read_seq_string = cpuacct_percpu_seq_read, | 9733 | .read_seq_string = cpuacct_percpu_seq_read, |
9683 | }, | 9734 | }, |
9684 | 9735 | { | |
9736 | .name = "stat", | ||
9737 | .read_map = cpuacct_stats_show, | ||
9738 | }, | ||
9685 | }; | 9739 | }; |
9686 | 9740 | ||
9687 | static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) | 9741 | static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) |
@@ -9716,6 +9770,27 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime) | |||
9716 | rcu_read_unlock(); | 9770 | rcu_read_unlock(); |
9717 | } | 9771 | } |
9718 | 9772 | ||
9773 | /* | ||
9774 | * Charge the system/user time to the task's accounting group. | ||
9775 | */ | ||
9776 | static void cpuacct_update_stats(struct task_struct *tsk, | ||
9777 | enum cpuacct_stat_index idx, cputime_t val) | ||
9778 | { | ||
9779 | struct cpuacct *ca; | ||
9780 | |||
9781 | if (unlikely(!cpuacct_subsys.active)) | ||
9782 | return; | ||
9783 | |||
9784 | rcu_read_lock(); | ||
9785 | ca = task_ca(tsk); | ||
9786 | |||
9787 | do { | ||
9788 | percpu_counter_add(&ca->cpustat[idx], val); | ||
9789 | ca = ca->parent; | ||
9790 | } while (ca); | ||
9791 | rcu_read_unlock(); | ||
9792 | } | ||
9793 | |||
9719 | struct cgroup_subsys cpuacct_subsys = { | 9794 | struct cgroup_subsys cpuacct_subsys = { |
9720 | .name = "cpuacct", | 9795 | .name = "cpuacct", |
9721 | .create = cpuacct_create, | 9796 | .create = cpuacct_create, |