aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSrivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>2007-12-02 14:04:49 -0500
committerIngo Molnar <mingo@elte.hu>2007-12-02 14:04:49 -0500
commitd842de871c8c5e2110c7e4f3f29bbe7b1a519ab8 (patch)
tree3a0be7d00de97c561e486242f11eec0e1281074b
parent92d499d991ec4f5cbd00d6f33967eab9d3ee8d6c (diff)
sched: cpu accounting controller (V2)
Commit cfb5285660aad4931b2ebbfa902ea48a37dfffa1 removed a useful feature for us, which provided a cpu accounting resource controller. This feature would be useful if someone wants to group tasks only for accounting purpose and doesnt really want to exercise any control over their cpu consumption. The patch below reintroduces the feature. It is based on Paul Menage's original patch (Commit 62d0df64065e7c135d0002f069444fbdfc64768f), with these differences: - Removed load average information. I felt it needs more thought (esp to deal with SMP and virtualized platforms) and can be added for 2.6.25 after more discussions. - Convert group cpu usage to be nanosecond accurate (as rest of the cfs stats are) and invoke cpuacct_charge() from the respective scheduler classes - Make accounting scalable on SMP systems by splitting the usage counter to be per-cpu - Move the code from kernel/cpu_acct.c to kernel/sched.c (since the code is not big enough to warrant a new file and also this rightly needs to live inside the scheduler. Also things like accessing rq->lock while reading cpu usage becomes easier if the code lived in kernel/sched.c) The patch also modifies the cpu controller not to provide the same accounting information. Tested-by: Balbir Singh <balbir@linux.vnet.ibm.com> Tested the patches on top of 2.6.24-rc3. The patches work fine. Ran some simple tests like cpuspin (spin on the cpu), ran several tasks in the same group and timed them. Compared their time stamps with cpuacct.usage. Signed-off-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com> Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--include/linux/cgroup_subsys.h7
-rw-r--r--init/Kconfig7
-rw-r--r--kernel/sched.c155
-rw-r--r--kernel/sched_fair.c6
-rw-r--r--kernel/sched_rt.c1
5 files changed, 150 insertions, 26 deletions
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index d62fcee9a08a..9ec43186ba80 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -30,3 +30,10 @@ SUBSYS(cpu_cgroup)
30#endif 30#endif
31 31
32/* */ 32/* */
33
34#ifdef CONFIG_CGROUP_CPUACCT
35SUBSYS(cpuacct)
36#endif
37
38/* */
39
diff --git a/init/Kconfig b/init/Kconfig
index d35e44f4dd6b..404bbf3699be 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -354,6 +354,13 @@ config FAIR_CGROUP_SCHED
354 354
355endchoice 355endchoice
356 356
357config CGROUP_CPUACCT
358 bool "Simple CPU accounting cgroup subsystem"
359 depends on CGROUPS
360 help
361 Provides a simple Resource Controller for monitoring the
362 total CPU consumed by the tasks in a cgroup
363
357config SYSFS_DEPRECATED 364config SYSFS_DEPRECATED
358 bool "Create deprecated sysfs files" 365 bool "Create deprecated sysfs files"
359 default y 366 default y
diff --git a/kernel/sched.c b/kernel/sched.c
index 98dcdf272db3..59ff6b140edb 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -854,6 +854,12 @@ iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
854 struct rq_iterator *iterator); 854 struct rq_iterator *iterator);
855#endif 855#endif
856 856
857#ifdef CONFIG_CGROUP_CPUACCT
858static void cpuacct_charge(struct task_struct *tsk, u64 cputime);
859#else
860static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
861#endif
862
857#include "sched_stats.h" 863#include "sched_stats.h"
858#include "sched_idletask.c" 864#include "sched_idletask.c"
859#include "sched_fair.c" 865#include "sched_fair.c"
@@ -7221,38 +7227,12 @@ static u64 cpu_shares_read_uint(struct cgroup *cgrp, struct cftype *cft)
7221 return (u64) tg->shares; 7227 return (u64) tg->shares;
7222} 7228}
7223 7229
7224static u64 cpu_usage_read(struct cgroup *cgrp, struct cftype *cft)
7225{
7226 struct task_group *tg = cgroup_tg(cgrp);
7227 unsigned long flags;
7228 u64 res = 0;
7229 int i;
7230
7231 for_each_possible_cpu(i) {
7232 /*
7233 * Lock to prevent races with updating 64-bit counters
7234 * on 32-bit arches.
7235 */
7236 spin_lock_irqsave(&cpu_rq(i)->lock, flags);
7237 res += tg->se[i]->sum_exec_runtime;
7238 spin_unlock_irqrestore(&cpu_rq(i)->lock, flags);
7239 }
7240 /* Convert from ns to ms */
7241 do_div(res, NSEC_PER_MSEC);
7242
7243 return res;
7244}
7245
7246static struct cftype cpu_files[] = { 7230static struct cftype cpu_files[] = {
7247 { 7231 {
7248 .name = "shares", 7232 .name = "shares",
7249 .read_uint = cpu_shares_read_uint, 7233 .read_uint = cpu_shares_read_uint,
7250 .write_uint = cpu_shares_write_uint, 7234 .write_uint = cpu_shares_write_uint,
7251 }, 7235 },
7252 {
7253 .name = "usage",
7254 .read_uint = cpu_usage_read,
7255 },
7256}; 7236};
7257 7237
7258static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont) 7238static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
@@ -7272,3 +7252,126 @@ struct cgroup_subsys cpu_cgroup_subsys = {
7272}; 7252};
7273 7253
7274#endif /* CONFIG_FAIR_CGROUP_SCHED */ 7254#endif /* CONFIG_FAIR_CGROUP_SCHED */
7255
7256#ifdef CONFIG_CGROUP_CPUACCT
7257
7258/*
7259 * CPU accounting code for task groups.
7260 *
7261 * Based on the work by Paul Menage (menage@google.com) and Balbir Singh
7262 * (balbir@in.ibm.com).
7263 */
7264
7265/* track cpu usage of a group of tasks */
7266struct cpuacct {
7267 struct cgroup_subsys_state css;
7268 /* cpuusage holds pointer to a u64-type object on every cpu */
7269 u64 *cpuusage;
7270};
7271
7272struct cgroup_subsys cpuacct_subsys;
7273
7274/* return cpu accounting group corresponding to this container */
7275static inline struct cpuacct *cgroup_ca(struct cgroup *cont)
7276{
7277 return container_of(cgroup_subsys_state(cont, cpuacct_subsys_id),
7278 struct cpuacct, css);
7279}
7280
7281/* return cpu accounting group to which this task belongs */
7282static inline struct cpuacct *task_ca(struct task_struct *tsk)
7283{
7284 return container_of(task_subsys_state(tsk, cpuacct_subsys_id),
7285 struct cpuacct, css);
7286}
7287
7288/* create a new cpu accounting group */
7289static struct cgroup_subsys_state *cpuacct_create(
7290 struct cgroup_subsys *ss, struct cgroup *cont)
7291{
7292 struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
7293
7294 if (!ca)
7295 return ERR_PTR(-ENOMEM);
7296
7297 ca->cpuusage = alloc_percpu(u64);
7298 if (!ca->cpuusage) {
7299 kfree(ca);
7300 return ERR_PTR(-ENOMEM);
7301 }
7302
7303 return &ca->css;
7304}
7305
7306/* destroy an existing cpu accounting group */
7307static void cpuacct_destroy(struct cgroup_subsys *ss,
7308 struct cgroup *cont)
7309{
7310 struct cpuacct *ca = cgroup_ca(cont);
7311
7312 free_percpu(ca->cpuusage);
7313 kfree(ca);
7314}
7315
7316/* return total cpu usage (in nanoseconds) of a group */
7317static u64 cpuusage_read(struct cgroup *cont, struct cftype *cft)
7318{
7319 struct cpuacct *ca = cgroup_ca(cont);
7320 u64 totalcpuusage = 0;
7321 int i;
7322
7323 for_each_possible_cpu(i) {
7324 u64 *cpuusage = percpu_ptr(ca->cpuusage, i);
7325
7326 /*
7327 * Take rq->lock to make 64-bit addition safe on 32-bit
7328 * platforms.
7329 */
7330 spin_lock_irq(&cpu_rq(i)->lock);
7331 totalcpuusage += *cpuusage;
7332 spin_unlock_irq(&cpu_rq(i)->lock);
7333 }
7334
7335 return totalcpuusage;
7336}
7337
7338static struct cftype files[] = {
7339 {
7340 .name = "usage",
7341 .read_uint = cpuusage_read,
7342 },
7343};
7344
7345static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cont)
7346{
7347 return cgroup_add_files(cont, ss, files, ARRAY_SIZE(files));
7348}
7349
7350/*
7351 * charge this task's execution time to its accounting group.
7352 *
7353 * called with rq->lock held.
7354 */
7355static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
7356{
7357 struct cpuacct *ca;
7358
7359 if (!cpuacct_subsys.active)
7360 return;
7361
7362 ca = task_ca(tsk);
7363 if (ca) {
7364 u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));
7365
7366 *cpuusage += cputime;
7367 }
7368}
7369
7370struct cgroup_subsys cpuacct_subsys = {
7371 .name = "cpuacct",
7372 .create = cpuacct_create,
7373 .destroy = cpuacct_destroy,
7374 .populate = cpuacct_populate,
7375 .subsys_id = cpuacct_subsys_id,
7376};
7377#endif /* CONFIG_CGROUP_CPUACCT */
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 2f16e15c022c..37bb265598db 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -351,6 +351,12 @@ static void update_curr(struct cfs_rq *cfs_rq)
351 351
352 __update_curr(cfs_rq, curr, delta_exec); 352 __update_curr(cfs_rq, curr, delta_exec);
353 curr->exec_start = now; 353 curr->exec_start = now;
354
355 if (entity_is_task(curr)) {
356 struct task_struct *curtask = task_of(curr);
357
358 cpuacct_charge(curtask, delta_exec);
359 }
354} 360}
355 361
356static inline void 362static inline void
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 8abd752a0ebd..ee9c8b6529e9 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -23,6 +23,7 @@ static void update_curr_rt(struct rq *rq)
23 23
24 curr->se.sum_exec_runtime += delta_exec; 24 curr->se.sum_exec_runtime += delta_exec;
25 curr->se.exec_start = rq->clock; 25 curr->se.exec_start = rq->clock;
26 cpuacct_charge(curr, delta_exec);
26} 27}
27 28
28static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) 29static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)