diff options
| -rw-r--r-- | Documentation/cgroups/cpuacct.txt | 18 | ||||
| -rw-r--r-- | kernel/sched.c | 87 |
2 files changed, 99 insertions, 6 deletions
diff --git a/Documentation/cgroups/cpuacct.txt b/Documentation/cgroups/cpuacct.txt index bb775fbe43d7..8b930946c52a 100644 --- a/Documentation/cgroups/cpuacct.txt +++ b/Documentation/cgroups/cpuacct.txt | |||
| @@ -30,3 +30,21 @@ The above steps create a new group g1 and move the current shell | |||
| 30 | process (bash) into it. CPU time consumed by this bash and its children | 30 | process (bash) into it. CPU time consumed by this bash and its children |
| 31 | can be obtained from g1/cpuacct.usage and the same is accumulated in | 31 | can be obtained from g1/cpuacct.usage and the same is accumulated in |
| 32 | /cgroups/cpuacct.usage also. | 32 | /cgroups/cpuacct.usage also. |
| 33 | |||
| 34 | cpuacct.stat file lists a few statistics which further divide the | ||
| 35 | CPU time obtained by the cgroup into user and system times. Currently | ||
| 36 | the following statistics are supported: | ||
| 37 | |||
| 38 | user: Time spent by tasks of the cgroup in user mode. | ||
| 39 | system: Time spent by tasks of the cgroup in kernel mode. | ||
| 40 | |||
| 41 | user and system are in USER_HZ unit. | ||
| 42 | |||
| 43 | cpuacct controller uses percpu_counter interface to collect user and | ||
| 44 | system times. This has two side effects: | ||
| 45 | |||
| 46 | - It is theoretically possible to see wrong values for user and system times. | ||
| 47 | This is because percpu_counter_read() on 32bit systems isn't safe | ||
| 48 | against concurrent writes. | ||
| 49 | - It is possible to see slightly outdated values for user and system times | ||
| 50 | due to the batch processing nature of percpu_counter. | ||
diff --git a/kernel/sched.c b/kernel/sched.c index c8d7f17bd036..8d1bdbe8aafc 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -1393,10 +1393,22 @@ iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
| 1393 | struct rq_iterator *iterator); | 1393 | struct rq_iterator *iterator); |
| 1394 | #endif | 1394 | #endif |
| 1395 | 1395 | ||
| 1396 | /* Time spent by the tasks of the cpu accounting group executing in ... */ | ||
| 1397 | enum cpuacct_stat_index { | ||
| 1398 | CPUACCT_STAT_USER, /* ... user mode */ | ||
| 1399 | CPUACCT_STAT_SYSTEM, /* ... kernel mode */ | ||
| 1400 | |||
| 1401 | CPUACCT_STAT_NSTATS, | ||
| 1402 | }; | ||
| 1403 | |||
| 1396 | #ifdef CONFIG_CGROUP_CPUACCT | 1404 | #ifdef CONFIG_CGROUP_CPUACCT |
| 1397 | static void cpuacct_charge(struct task_struct *tsk, u64 cputime); | 1405 | static void cpuacct_charge(struct task_struct *tsk, u64 cputime); |
| 1406 | static void cpuacct_update_stats(struct task_struct *tsk, | ||
| 1407 | enum cpuacct_stat_index idx, cputime_t val); | ||
| 1398 | #else | 1408 | #else |
| 1399 | static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} | 1409 | static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} |
| 1410 | static inline void cpuacct_update_stats(struct task_struct *tsk, | ||
| 1411 | enum cpuacct_stat_index idx, cputime_t val) {} | ||
| 1400 | #endif | 1412 | #endif |
| 1401 | 1413 | ||
| 1402 | static inline void inc_cpu_load(struct rq *rq, unsigned long load) | 1414 | static inline void inc_cpu_load(struct rq *rq, unsigned long load) |
| @@ -4236,6 +4248,8 @@ void account_user_time(struct task_struct *p, cputime_t cputime, | |||
| 4236 | cpustat->nice = cputime64_add(cpustat->nice, tmp); | 4248 | cpustat->nice = cputime64_add(cpustat->nice, tmp); |
| 4237 | else | 4249 | else |
| 4238 | cpustat->user = cputime64_add(cpustat->user, tmp); | 4250 | cpustat->user = cputime64_add(cpustat->user, tmp); |
| 4251 | |||
| 4252 | cpuacct_update_stats(p, CPUACCT_STAT_USER, cputime); | ||
| 4239 | /* Account for user time used */ | 4253 | /* Account for user time used */ |
| 4240 | acct_update_integrals(p); | 4254 | acct_update_integrals(p); |
| 4241 | } | 4255 | } |
| @@ -4297,6 +4311,8 @@ void account_system_time(struct task_struct *p, int hardirq_offset, | |||
| 4297 | else | 4311 | else |
| 4298 | cpustat->system = cputime64_add(cpustat->system, tmp); | 4312 | cpustat->system = cputime64_add(cpustat->system, tmp); |
| 4299 | 4313 | ||
| 4314 | cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime); | ||
| 4315 | |||
| 4300 | /* Account for system time used */ | 4316 | /* Account for system time used */ |
| 4301 | acct_update_integrals(p); | 4317 | acct_update_integrals(p); |
| 4302 | } | 4318 | } |
| @@ -9539,6 +9555,7 @@ struct cpuacct { | |||
| 9539 | struct cgroup_subsys_state css; | 9555 | struct cgroup_subsys_state css; |
| 9540 | /* cpuusage holds pointer to a u64-type object on every cpu */ | 9556 | /* cpuusage holds pointer to a u64-type object on every cpu */ |
| 9541 | u64 *cpuusage; | 9557 | u64 *cpuusage; |
| 9558 | struct percpu_counter cpustat[CPUACCT_STAT_NSTATS]; | ||
| 9542 | struct cpuacct *parent; | 9559 | struct cpuacct *parent; |
| 9543 | }; | 9560 | }; |
| 9544 | 9561 | ||
| @@ -9563,20 +9580,32 @@ static struct cgroup_subsys_state *cpuacct_create( | |||
| 9563 | struct cgroup_subsys *ss, struct cgroup *cgrp) | 9580 | struct cgroup_subsys *ss, struct cgroup *cgrp) |
| 9564 | { | 9581 | { |
| 9565 | struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL); | 9582 | struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL); |
| 9583 | int i; | ||
| 9566 | 9584 | ||
| 9567 | if (!ca) | 9585 | if (!ca) |
| 9568 | return ERR_PTR(-ENOMEM); | 9586 | goto out; |
| 9569 | 9587 | ||
| 9570 | ca->cpuusage = alloc_percpu(u64); | 9588 | ca->cpuusage = alloc_percpu(u64); |
| 9571 | if (!ca->cpuusage) { | 9589 | if (!ca->cpuusage) |
| 9572 | kfree(ca); | 9590 | goto out_free_ca; |
| 9573 | return ERR_PTR(-ENOMEM); | 9591 | |
| 9574 | } | 9592 | for (i = 0; i < CPUACCT_STAT_NSTATS; i++) |
| 9593 | if (percpu_counter_init(&ca->cpustat[i], 0)) | ||
| 9594 | goto out_free_counters; | ||
| 9575 | 9595 | ||
| 9576 | if (cgrp->parent) | 9596 | if (cgrp->parent) |
| 9577 | ca->parent = cgroup_ca(cgrp->parent); | 9597 | ca->parent = cgroup_ca(cgrp->parent); |
| 9578 | 9598 | ||
| 9579 | return &ca->css; | 9599 | return &ca->css; |
| 9600 | |||
| 9601 | out_free_counters: | ||
| 9602 | while (--i >= 0) | ||
| 9603 | percpu_counter_destroy(&ca->cpustat[i]); | ||
| 9604 | free_percpu(ca->cpuusage); | ||
| 9605 | out_free_ca: | ||
| 9606 | kfree(ca); | ||
| 9607 | out: | ||
| 9608 | return ERR_PTR(-ENOMEM); | ||
| 9580 | } | 9609 | } |
| 9581 | 9610 | ||
| 9582 | /* destroy an existing cpu accounting group */ | 9611 | /* destroy an existing cpu accounting group */ |
| @@ -9584,7 +9613,10 @@ static void | |||
| 9584 | cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) | 9613 | cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) |
| 9585 | { | 9614 | { |
| 9586 | struct cpuacct *ca = cgroup_ca(cgrp); | 9615 | struct cpuacct *ca = cgroup_ca(cgrp); |
| 9616 | int i; | ||
| 9587 | 9617 | ||
| 9618 | for (i = 0; i < CPUACCT_STAT_NSTATS; i++) | ||
| 9619 | percpu_counter_destroy(&ca->cpustat[i]); | ||
| 9588 | free_percpu(ca->cpuusage); | 9620 | free_percpu(ca->cpuusage); |
| 9589 | kfree(ca); | 9621 | kfree(ca); |
| 9590 | } | 9622 | } |
| @@ -9671,6 +9703,25 @@ static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft, | |||
| 9671 | return 0; | 9703 | return 0; |
| 9672 | } | 9704 | } |
| 9673 | 9705 | ||
| 9706 | static const char *cpuacct_stat_desc[] = { | ||
| 9707 | [CPUACCT_STAT_USER] = "user", | ||
| 9708 | [CPUACCT_STAT_SYSTEM] = "system", | ||
| 9709 | }; | ||
| 9710 | |||
| 9711 | static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft, | ||
| 9712 | struct cgroup_map_cb *cb) | ||
| 9713 | { | ||
| 9714 | struct cpuacct *ca = cgroup_ca(cgrp); | ||
| 9715 | int i; | ||
| 9716 | |||
| 9717 | for (i = 0; i < CPUACCT_STAT_NSTATS; i++) { | ||
| 9718 | s64 val = percpu_counter_read(&ca->cpustat[i]); | ||
| 9719 | val = cputime64_to_clock_t(val); | ||
| 9720 | cb->fill(cb, cpuacct_stat_desc[i], val); | ||
| 9721 | } | ||
| 9722 | return 0; | ||
| 9723 | } | ||
| 9724 | |||
| 9674 | static struct cftype files[] = { | 9725 | static struct cftype files[] = { |
| 9675 | { | 9726 | { |
| 9676 | .name = "usage", | 9727 | .name = "usage", |
| @@ -9681,7 +9732,10 @@ static struct cftype files[] = { | |||
| 9681 | .name = "usage_percpu", | 9732 | .name = "usage_percpu", |
| 9682 | .read_seq_string = cpuacct_percpu_seq_read, | 9733 | .read_seq_string = cpuacct_percpu_seq_read, |
| 9683 | }, | 9734 | }, |
| 9684 | 9735 | { | |
| 9736 | .name = "stat", | ||
| 9737 | .read_map = cpuacct_stats_show, | ||
| 9738 | }, | ||
| 9685 | }; | 9739 | }; |
| 9686 | 9740 | ||
| 9687 | static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) | 9741 | static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) |
| @@ -9716,6 +9770,27 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime) | |||
| 9716 | rcu_read_unlock(); | 9770 | rcu_read_unlock(); |
| 9717 | } | 9771 | } |
| 9718 | 9772 | ||
| 9773 | /* | ||
| 9774 | * Charge the system/user time to the task's accounting group. | ||
| 9775 | */ | ||
| 9776 | static void cpuacct_update_stats(struct task_struct *tsk, | ||
| 9777 | enum cpuacct_stat_index idx, cputime_t val) | ||
| 9778 | { | ||
| 9779 | struct cpuacct *ca; | ||
| 9780 | |||
| 9781 | if (unlikely(!cpuacct_subsys.active)) | ||
| 9782 | return; | ||
| 9783 | |||
| 9784 | rcu_read_lock(); | ||
| 9785 | ca = task_ca(tsk); | ||
| 9786 | |||
| 9787 | do { | ||
| 9788 | percpu_counter_add(&ca->cpustat[idx], val); | ||
| 9789 | ca = ca->parent; | ||
| 9790 | } while (ca); | ||
| 9791 | rcu_read_unlock(); | ||
| 9792 | } | ||
| 9793 | |||
| 9719 | struct cgroup_subsys cpuacct_subsys = { | 9794 | struct cgroup_subsys cpuacct_subsys = { |
| 9720 | .name = "cpuacct", | 9795 | .name = "cpuacct", |
| 9721 | .create = cpuacct_create, | 9796 | .create = cpuacct_create, |
