diff options
author | Paul Menage <menage@google.com> | 2007-10-19 02:39:42 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-19 14:53:36 -0400 |
commit | 62d0df64065e7c135d0002f069444fbdfc64768f (patch) | |
tree | 9087bf336182ab9c619460ba2370a223200179bc /kernel | |
parent | 8793d854edbc2774943a4b0de3304dc73991159a (diff) |
Task Control Groups: example CPU accounting subsystem
This example demonstrates how to use the generic cgroup subsystem for a
simple resource tracker that counts, for the processes in a cgroup, the
total CPU time used and the %CPU used in the last complete 10 second interval.
Portions contributed by Balbir Singh <balbir@in.ibm.com>
Signed-off-by: Paul Menage <menage@google.com>
Cc: Serge E. Hallyn <serue@us.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Kirill Korotaev <dev@openvz.org>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Srivatsa Vaddagiri <vatsa@in.ibm.com>
Cc: Cedric Le Goater <clg@fr.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 1 | ||||
-rw-r--r-- | kernel/cpu_acct.c | 186 | ||||
-rw-r--r-- | kernel/sched.c | 14 |
3 files changed, 198 insertions, 3 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index ea8c8a12e19a..48a7fae00271 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -38,6 +38,7 @@ obj-$(CONFIG_KEXEC) += kexec.o | |||
38 | obj-$(CONFIG_COMPAT) += compat.o | 38 | obj-$(CONFIG_COMPAT) += compat.o |
39 | obj-$(CONFIG_CGROUPS) += cgroup.o | 39 | obj-$(CONFIG_CGROUPS) += cgroup.o |
40 | obj-$(CONFIG_CPUSETS) += cpuset.o | 40 | obj-$(CONFIG_CPUSETS) += cpuset.o |
41 | obj-$(CONFIG_CGROUP_CPUACCT) += cpu_acct.o | ||
41 | obj-$(CONFIG_IKCONFIG) += configs.o | 42 | obj-$(CONFIG_IKCONFIG) += configs.o |
42 | obj-$(CONFIG_STOP_MACHINE) += stop_machine.o | 43 | obj-$(CONFIG_STOP_MACHINE) += stop_machine.o |
43 | obj-$(CONFIG_AUDIT) += audit.o auditfilter.o | 44 | obj-$(CONFIG_AUDIT) += audit.o auditfilter.o |
diff --git a/kernel/cpu_acct.c b/kernel/cpu_acct.c new file mode 100644 index 000000000000..731e47e7f164 --- /dev/null +++ b/kernel/cpu_acct.c | |||
@@ -0,0 +1,186 @@ | |||
1 | /* | ||
2 | * kernel/cpu_acct.c - CPU accounting cgroup subsystem | ||
3 | * | ||
4 | * Copyright (C) Google Inc, 2006 | ||
5 | * | ||
6 | * Developed by Paul Menage (menage@google.com) and Balbir Singh | ||
7 | * (balbir@in.ibm.com) | ||
8 | * | ||
9 | */ | ||
10 | |||
11 | /* | ||
12 | * Example cgroup subsystem for reporting total CPU usage of tasks in a | ||
13 | * cgroup, along with percentage load over a time interval | ||
14 | */ | ||
15 | |||
16 | #include <linux/module.h> | ||
17 | #include <linux/cgroup.h> | ||
18 | #include <linux/fs.h> | ||
19 | #include <linux/rcupdate.h> | ||
20 | |||
21 | #include <asm/div64.h> | ||
22 | |||
23 | struct cpuacct { | ||
24 | struct cgroup_subsys_state css; | ||
25 | spinlock_t lock; | ||
26 | /* total time used by this class */ | ||
27 | cputime64_t time; | ||
28 | |||
29 | /* time when next load calculation occurs */ | ||
30 | u64 next_interval_check; | ||
31 | |||
32 | /* time used in current period */ | ||
33 | cputime64_t current_interval_time; | ||
34 | |||
35 | /* time used in last period */ | ||
36 | cputime64_t last_interval_time; | ||
37 | }; | ||
38 | |||
39 | struct cgroup_subsys cpuacct_subsys; | ||
40 | |||
41 | static inline struct cpuacct *cgroup_ca(struct cgroup *cont) | ||
42 | { | ||
43 | return container_of(cgroup_subsys_state(cont, cpuacct_subsys_id), | ||
44 | struct cpuacct, css); | ||
45 | } | ||
46 | |||
47 | static inline struct cpuacct *task_ca(struct task_struct *task) | ||
48 | { | ||
49 | return container_of(task_subsys_state(task, cpuacct_subsys_id), | ||
50 | struct cpuacct, css); | ||
51 | } | ||
52 | |||
53 | #define INTERVAL (HZ * 10) | ||
54 | |||
55 | static inline u64 next_interval_boundary(u64 now) | ||
56 | { | ||
57 | /* calculate the next interval boundary beyond the | ||
58 | * current time */ | ||
59 | do_div(now, INTERVAL); | ||
60 | return (now + 1) * INTERVAL; | ||
61 | } | ||
62 | |||
63 | static struct cgroup_subsys_state *cpuacct_create( | ||
64 | struct cgroup_subsys *ss, struct cgroup *cont) | ||
65 | { | ||
66 | struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL); | ||
67 | |||
68 | if (!ca) | ||
69 | return ERR_PTR(-ENOMEM); | ||
70 | spin_lock_init(&ca->lock); | ||
71 | ca->next_interval_check = next_interval_boundary(get_jiffies_64()); | ||
72 | return &ca->css; | ||
73 | } | ||
74 | |||
75 | static void cpuacct_destroy(struct cgroup_subsys *ss, | ||
76 | struct cgroup *cont) | ||
77 | { | ||
78 | kfree(cgroup_ca(cont)); | ||
79 | } | ||
80 | |||
81 | /* Lazily update the load calculation if necessary. Called with ca locked */ | ||
82 | static void cpuusage_update(struct cpuacct *ca) | ||
83 | { | ||
84 | u64 now = get_jiffies_64(); | ||
85 | |||
86 | /* If we're not due for an update, return */ | ||
87 | if (ca->next_interval_check > now) | ||
88 | return; | ||
89 | |||
90 | if (ca->next_interval_check <= (now - INTERVAL)) { | ||
91 | /* If it's been more than an interval since the last | ||
92 | * check, then catch up - the last interval must have | ||
93 | * been zero load */ | ||
94 | ca->last_interval_time = 0; | ||
95 | ca->next_interval_check = next_interval_boundary(now); | ||
96 | } else { | ||
97 | /* If a steal takes the last interval time negative, | ||
98 | * then we just ignore it */ | ||
99 | if ((s64)ca->current_interval_time > 0) | ||
100 | ca->last_interval_time = ca->current_interval_time; | ||
101 | else | ||
102 | ca->last_interval_time = 0; | ||
103 | ca->next_interval_check += INTERVAL; | ||
104 | } | ||
105 | ca->current_interval_time = 0; | ||
106 | } | ||
107 | |||
108 | static u64 cpuusage_read(struct cgroup *cont, struct cftype *cft) | ||
109 | { | ||
110 | struct cpuacct *ca = cgroup_ca(cont); | ||
111 | u64 time; | ||
112 | |||
113 | spin_lock_irq(&ca->lock); | ||
114 | cpuusage_update(ca); | ||
115 | time = cputime64_to_jiffies64(ca->time); | ||
116 | spin_unlock_irq(&ca->lock); | ||
117 | |||
118 | /* Convert 64-bit jiffies to seconds */ | ||
119 | time *= 1000; | ||
120 | do_div(time, HZ); | ||
121 | return time; | ||
122 | } | ||
123 | |||
124 | static u64 load_read(struct cgroup *cont, struct cftype *cft) | ||
125 | { | ||
126 | struct cpuacct *ca = cgroup_ca(cont); | ||
127 | u64 time; | ||
128 | |||
129 | /* Find the time used in the previous interval */ | ||
130 | spin_lock_irq(&ca->lock); | ||
131 | cpuusage_update(ca); | ||
132 | time = cputime64_to_jiffies64(ca->last_interval_time); | ||
133 | spin_unlock_irq(&ca->lock); | ||
134 | |||
135 | /* Convert time to a percentage, to give the load in the | ||
136 | * previous period */ | ||
137 | time *= 100; | ||
138 | do_div(time, INTERVAL); | ||
139 | |||
140 | return time; | ||
141 | } | ||
142 | |||
143 | static struct cftype files[] = { | ||
144 | { | ||
145 | .name = "usage", | ||
146 | .read_uint = cpuusage_read, | ||
147 | }, | ||
148 | { | ||
149 | .name = "load", | ||
150 | .read_uint = load_read, | ||
151 | } | ||
152 | }; | ||
153 | |||
154 | static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cont) | ||
155 | { | ||
156 | return cgroup_add_files(cont, ss, files, ARRAY_SIZE(files)); | ||
157 | } | ||
158 | |||
159 | void cpuacct_charge(struct task_struct *task, cputime_t cputime) | ||
160 | { | ||
161 | |||
162 | struct cpuacct *ca; | ||
163 | unsigned long flags; | ||
164 | |||
165 | if (!cpuacct_subsys.active) | ||
166 | return; | ||
167 | rcu_read_lock(); | ||
168 | ca = task_ca(task); | ||
169 | if (ca) { | ||
170 | spin_lock_irqsave(&ca->lock, flags); | ||
171 | cpuusage_update(ca); | ||
172 | ca->time = cputime64_add(ca->time, cputime); | ||
173 | ca->current_interval_time = | ||
174 | cputime64_add(ca->current_interval_time, cputime); | ||
175 | spin_unlock_irqrestore(&ca->lock, flags); | ||
176 | } | ||
177 | rcu_read_unlock(); | ||
178 | } | ||
179 | |||
180 | struct cgroup_subsys cpuacct_subsys = { | ||
181 | .name = "cpuacct", | ||
182 | .create = cpuacct_create, | ||
183 | .destroy = cpuacct_destroy, | ||
184 | .populate = cpuacct_populate, | ||
185 | .subsys_id = cpuacct_subsys_id, | ||
186 | }; | ||
diff --git a/kernel/sched.c b/kernel/sched.c index ed90be46fb31..72a2a16e2214 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <linux/cpu.h> | 51 | #include <linux/cpu.h> |
52 | #include <linux/cpuset.h> | 52 | #include <linux/cpuset.h> |
53 | #include <linux/percpu.h> | 53 | #include <linux/percpu.h> |
54 | #include <linux/cpu_acct.h> | ||
54 | #include <linux/kthread.h> | 55 | #include <linux/kthread.h> |
55 | #include <linux/seq_file.h> | 56 | #include <linux/seq_file.h> |
56 | #include <linux/sysctl.h> | 57 | #include <linux/sysctl.h> |
@@ -3307,9 +3308,13 @@ void account_user_time(struct task_struct *p, cputime_t cputime) | |||
3307 | { | 3308 | { |
3308 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; | 3309 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; |
3309 | cputime64_t tmp; | 3310 | cputime64_t tmp; |
3311 | struct rq *rq = this_rq(); | ||
3310 | 3312 | ||
3311 | p->utime = cputime_add(p->utime, cputime); | 3313 | p->utime = cputime_add(p->utime, cputime); |
3312 | 3314 | ||
3315 | if (p != rq->idle) | ||
3316 | cpuacct_charge(p, cputime); | ||
3317 | |||
3313 | /* Add user time to cpustat. */ | 3318 | /* Add user time to cpustat. */ |
3314 | tmp = cputime_to_cputime64(cputime); | 3319 | tmp = cputime_to_cputime64(cputime); |
3315 | if (TASK_NICE(p) > 0) | 3320 | if (TASK_NICE(p) > 0) |
@@ -3374,9 +3379,10 @@ void account_system_time(struct task_struct *p, int hardirq_offset, | |||
3374 | cpustat->irq = cputime64_add(cpustat->irq, tmp); | 3379 | cpustat->irq = cputime64_add(cpustat->irq, tmp); |
3375 | else if (softirq_count()) | 3380 | else if (softirq_count()) |
3376 | cpustat->softirq = cputime64_add(cpustat->softirq, tmp); | 3381 | cpustat->softirq = cputime64_add(cpustat->softirq, tmp); |
3377 | else if (p != rq->idle) | 3382 | else if (p != rq->idle) { |
3378 | cpustat->system = cputime64_add(cpustat->system, tmp); | 3383 | cpustat->system = cputime64_add(cpustat->system, tmp); |
3379 | else if (atomic_read(&rq->nr_iowait) > 0) | 3384 | cpuacct_charge(p, cputime); |
3385 | } else if (atomic_read(&rq->nr_iowait) > 0) | ||
3380 | cpustat->iowait = cputime64_add(cpustat->iowait, tmp); | 3386 | cpustat->iowait = cputime64_add(cpustat->iowait, tmp); |
3381 | else | 3387 | else |
3382 | cpustat->idle = cputime64_add(cpustat->idle, tmp); | 3388 | cpustat->idle = cputime64_add(cpustat->idle, tmp); |
@@ -3412,8 +3418,10 @@ void account_steal_time(struct task_struct *p, cputime_t steal) | |||
3412 | cpustat->iowait = cputime64_add(cpustat->iowait, tmp); | 3418 | cpustat->iowait = cputime64_add(cpustat->iowait, tmp); |
3413 | else | 3419 | else |
3414 | cpustat->idle = cputime64_add(cpustat->idle, tmp); | 3420 | cpustat->idle = cputime64_add(cpustat->idle, tmp); |
3415 | } else | 3421 | } else { |
3416 | cpustat->steal = cputime64_add(cpustat->steal, tmp); | 3422 | cpustat->steal = cputime64_add(cpustat->steal, tmp); |
3423 | cpuacct_charge(p, -tmp); | ||
3424 | } | ||
3417 | } | 3425 | } |
3418 | 3426 | ||
3419 | /* | 3427 | /* |