aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPaul Menage <menage@google.com>2007-10-19 02:39:42 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-19 14:53:36 -0400
commit62d0df64065e7c135d0002f069444fbdfc64768f (patch)
tree9087bf336182ab9c619460ba2370a223200179bc /kernel
parent8793d854edbc2774943a4b0de3304dc73991159a (diff)
Task Control Groups: example CPU accounting subsystem
This example demonstrates how to use the generic cgroup subsystem for a simple resource tracker that counts, for the processes in a cgroup, the total CPU time used and the %CPU used in the last complete 10 second interval. Portions contributed by Balbir Singh <balbir@in.ibm.com> Signed-off-by: Paul Menage <menage@google.com> Cc: Serge E. Hallyn <serue@us.ibm.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Dave Hansen <haveblue@us.ibm.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Paul Jackson <pj@sgi.com> Cc: Kirill Korotaev <dev@openvz.org> Cc: Herbert Poetzl <herbert@13thfloor.at> Cc: Srivatsa Vaddagiri <vatsa@in.ibm.com> Cc: Cedric Le Goater <clg@fr.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/cpu_acct.c186
-rw-r--r--kernel/sched.c14
3 files changed, 198 insertions, 3 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index ea8c8a12e19a..48a7fae00271 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -38,6 +38,7 @@ obj-$(CONFIG_KEXEC) += kexec.o
38obj-$(CONFIG_COMPAT) += compat.o 38obj-$(CONFIG_COMPAT) += compat.o
39obj-$(CONFIG_CGROUPS) += cgroup.o 39obj-$(CONFIG_CGROUPS) += cgroup.o
40obj-$(CONFIG_CPUSETS) += cpuset.o 40obj-$(CONFIG_CPUSETS) += cpuset.o
41obj-$(CONFIG_CGROUP_CPUACCT) += cpu_acct.o
41obj-$(CONFIG_IKCONFIG) += configs.o 42obj-$(CONFIG_IKCONFIG) += configs.o
42obj-$(CONFIG_STOP_MACHINE) += stop_machine.o 43obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
43obj-$(CONFIG_AUDIT) += audit.o auditfilter.o 44obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
diff --git a/kernel/cpu_acct.c b/kernel/cpu_acct.c
new file mode 100644
index 000000000000..731e47e7f164
--- /dev/null
+++ b/kernel/cpu_acct.c
@@ -0,0 +1,186 @@
1/*
2 * kernel/cpu_acct.c - CPU accounting cgroup subsystem
3 *
4 * Copyright (C) Google Inc, 2006
5 *
6 * Developed by Paul Menage (menage@google.com) and Balbir Singh
7 * (balbir@in.ibm.com)
8 *
9 */
10
11/*
12 * Example cgroup subsystem for reporting total CPU usage of tasks in a
13 * cgroup, along with percentage load over a time interval
14 */
15
16#include <linux/module.h>
17#include <linux/cgroup.h>
18#include <linux/fs.h>
19#include <linux/rcupdate.h>
20
21#include <asm/div64.h>
22
23struct cpuacct {
24 struct cgroup_subsys_state css;
25 spinlock_t lock;
26 /* total time used by this class */
27 cputime64_t time;
28
29 /* time when next load calculation occurs */
30 u64 next_interval_check;
31
32 /* time used in current period */
33 cputime64_t current_interval_time;
34
35 /* time used in last period */
36 cputime64_t last_interval_time;
37};
38
39struct cgroup_subsys cpuacct_subsys;
40
41static inline struct cpuacct *cgroup_ca(struct cgroup *cont)
42{
43 return container_of(cgroup_subsys_state(cont, cpuacct_subsys_id),
44 struct cpuacct, css);
45}
46
47static inline struct cpuacct *task_ca(struct task_struct *task)
48{
49 return container_of(task_subsys_state(task, cpuacct_subsys_id),
50 struct cpuacct, css);
51}
52
53#define INTERVAL (HZ * 10)
54
55static inline u64 next_interval_boundary(u64 now)
56{
57 /* calculate the next interval boundary beyond the
58 * current time */
59 do_div(now, INTERVAL);
60 return (now + 1) * INTERVAL;
61}
62
63static struct cgroup_subsys_state *cpuacct_create(
64 struct cgroup_subsys *ss, struct cgroup *cont)
65{
66 struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
67
68 if (!ca)
69 return ERR_PTR(-ENOMEM);
70 spin_lock_init(&ca->lock);
71 ca->next_interval_check = next_interval_boundary(get_jiffies_64());
72 return &ca->css;
73}
74
75static void cpuacct_destroy(struct cgroup_subsys *ss,
76 struct cgroup *cont)
77{
78 kfree(cgroup_ca(cont));
79}
80
81/* Lazily update the load calculation if necessary. Called with ca locked */
82static void cpuusage_update(struct cpuacct *ca)
83{
84 u64 now = get_jiffies_64();
85
86 /* If we're not due for an update, return */
87 if (ca->next_interval_check > now)
88 return;
89
90 if (ca->next_interval_check <= (now - INTERVAL)) {
91 /* If it's been more than an interval since the last
92 * check, then catch up - the last interval must have
93 * been zero load */
94 ca->last_interval_time = 0;
95 ca->next_interval_check = next_interval_boundary(now);
96 } else {
97 /* If a steal takes the last interval time negative,
98 * then we just ignore it */
99 if ((s64)ca->current_interval_time > 0)
100 ca->last_interval_time = ca->current_interval_time;
101 else
102 ca->last_interval_time = 0;
103 ca->next_interval_check += INTERVAL;
104 }
105 ca->current_interval_time = 0;
106}
107
108static u64 cpuusage_read(struct cgroup *cont, struct cftype *cft)
109{
110 struct cpuacct *ca = cgroup_ca(cont);
111 u64 time;
112
113 spin_lock_irq(&ca->lock);
114 cpuusage_update(ca);
115 time = cputime64_to_jiffies64(ca->time);
116 spin_unlock_irq(&ca->lock);
117
118 /* Convert 64-bit jiffies to seconds */
119 time *= 1000;
120 do_div(time, HZ);
121 return time;
122}
123
124static u64 load_read(struct cgroup *cont, struct cftype *cft)
125{
126 struct cpuacct *ca = cgroup_ca(cont);
127 u64 time;
128
129 /* Find the time used in the previous interval */
130 spin_lock_irq(&ca->lock);
131 cpuusage_update(ca);
132 time = cputime64_to_jiffies64(ca->last_interval_time);
133 spin_unlock_irq(&ca->lock);
134
135 /* Convert time to a percentage, to give the load in the
136 * previous period */
137 time *= 100;
138 do_div(time, INTERVAL);
139
140 return time;
141}
142
143static struct cftype files[] = {
144 {
145 .name = "usage",
146 .read_uint = cpuusage_read,
147 },
148 {
149 .name = "load",
150 .read_uint = load_read,
151 }
152};
153
154static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cont)
155{
156 return cgroup_add_files(cont, ss, files, ARRAY_SIZE(files));
157}
158
159void cpuacct_charge(struct task_struct *task, cputime_t cputime)
160{
161
162 struct cpuacct *ca;
163 unsigned long flags;
164
165 if (!cpuacct_subsys.active)
166 return;
167 rcu_read_lock();
168 ca = task_ca(task);
169 if (ca) {
170 spin_lock_irqsave(&ca->lock, flags);
171 cpuusage_update(ca);
172 ca->time = cputime64_add(ca->time, cputime);
173 ca->current_interval_time =
174 cputime64_add(ca->current_interval_time, cputime);
175 spin_unlock_irqrestore(&ca->lock, flags);
176 }
177 rcu_read_unlock();
178}
179
180struct cgroup_subsys cpuacct_subsys = {
181 .name = "cpuacct",
182 .create = cpuacct_create,
183 .destroy = cpuacct_destroy,
184 .populate = cpuacct_populate,
185 .subsys_id = cpuacct_subsys_id,
186};
diff --git a/kernel/sched.c b/kernel/sched.c
index ed90be46fb31..72a2a16e2214 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -51,6 +51,7 @@
51#include <linux/cpu.h> 51#include <linux/cpu.h>
52#include <linux/cpuset.h> 52#include <linux/cpuset.h>
53#include <linux/percpu.h> 53#include <linux/percpu.h>
54#include <linux/cpu_acct.h>
54#include <linux/kthread.h> 55#include <linux/kthread.h>
55#include <linux/seq_file.h> 56#include <linux/seq_file.h>
56#include <linux/sysctl.h> 57#include <linux/sysctl.h>
@@ -3307,9 +3308,13 @@ void account_user_time(struct task_struct *p, cputime_t cputime)
3307{ 3308{
3308 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; 3309 struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
3309 cputime64_t tmp; 3310 cputime64_t tmp;
3311 struct rq *rq = this_rq();
3310 3312
3311 p->utime = cputime_add(p->utime, cputime); 3313 p->utime = cputime_add(p->utime, cputime);
3312 3314
3315 if (p != rq->idle)
3316 cpuacct_charge(p, cputime);
3317
3313 /* Add user time to cpustat. */ 3318 /* Add user time to cpustat. */
3314 tmp = cputime_to_cputime64(cputime); 3319 tmp = cputime_to_cputime64(cputime);
3315 if (TASK_NICE(p) > 0) 3320 if (TASK_NICE(p) > 0)
@@ -3374,9 +3379,10 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
3374 cpustat->irq = cputime64_add(cpustat->irq, tmp); 3379 cpustat->irq = cputime64_add(cpustat->irq, tmp);
3375 else if (softirq_count()) 3380 else if (softirq_count())
3376 cpustat->softirq = cputime64_add(cpustat->softirq, tmp); 3381 cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
3377 else if (p != rq->idle) 3382 else if (p != rq->idle) {
3378 cpustat->system = cputime64_add(cpustat->system, tmp); 3383 cpustat->system = cputime64_add(cpustat->system, tmp);
3379 else if (atomic_read(&rq->nr_iowait) > 0) 3384 cpuacct_charge(p, cputime);
3385 } else if (atomic_read(&rq->nr_iowait) > 0)
3380 cpustat->iowait = cputime64_add(cpustat->iowait, tmp); 3386 cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
3381 else 3387 else
3382 cpustat->idle = cputime64_add(cpustat->idle, tmp); 3388 cpustat->idle = cputime64_add(cpustat->idle, tmp);
@@ -3412,8 +3418,10 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
3412 cpustat->iowait = cputime64_add(cpustat->iowait, tmp); 3418 cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
3413 else 3419 else
3414 cpustat->idle = cputime64_add(cpustat->idle, tmp); 3420 cpustat->idle = cputime64_add(cpustat->idle, tmp);
3415 } else 3421 } else {
3416 cpustat->steal = cputime64_add(cpustat->steal, tmp); 3422 cpustat->steal = cputime64_add(cpustat->steal, tmp);
3423 cpuacct_charge(p, -tmp);
3424 }
3417} 3425}
3418 3426
3419/* 3427/*