diff options
| author | Paul Menage <menage@google.com> | 2007-10-19 02:39:42 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-19 14:53:36 -0400 |
| commit | 62d0df64065e7c135d0002f069444fbdfc64768f (patch) | |
| tree | 9087bf336182ab9c619460ba2370a223200179bc /kernel | |
| parent | 8793d854edbc2774943a4b0de3304dc73991159a (diff) | |
Task Control Groups: example CPU accounting subsystem
This example demonstrates how to use the generic cgroup subsystem for a
simple resource tracker that counts, for the processes in a cgroup, the
total CPU time used and the %CPU used in the last complete 10 second interval.
Portions contributed by Balbir Singh <balbir@in.ibm.com>
Signed-off-by: Paul Menage <menage@google.com>
Cc: Serge E. Hallyn <serue@us.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Kirill Korotaev <dev@openvz.org>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Srivatsa Vaddagiri <vatsa@in.ibm.com>
Cc: Cedric Le Goater <clg@fr.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/Makefile | 1 | ||||
| -rw-r--r-- | kernel/cpu_acct.c | 186 | ||||
| -rw-r--r-- | kernel/sched.c | 14 |
3 files changed, 198 insertions, 3 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index ea8c8a12e19a..48a7fae00271 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
| @@ -38,6 +38,7 @@ obj-$(CONFIG_KEXEC) += kexec.o | |||
| 38 | obj-$(CONFIG_COMPAT) += compat.o | 38 | obj-$(CONFIG_COMPAT) += compat.o |
| 39 | obj-$(CONFIG_CGROUPS) += cgroup.o | 39 | obj-$(CONFIG_CGROUPS) += cgroup.o |
| 40 | obj-$(CONFIG_CPUSETS) += cpuset.o | 40 | obj-$(CONFIG_CPUSETS) += cpuset.o |
| 41 | obj-$(CONFIG_CGROUP_CPUACCT) += cpu_acct.o | ||
| 41 | obj-$(CONFIG_IKCONFIG) += configs.o | 42 | obj-$(CONFIG_IKCONFIG) += configs.o |
| 42 | obj-$(CONFIG_STOP_MACHINE) += stop_machine.o | 43 | obj-$(CONFIG_STOP_MACHINE) += stop_machine.o |
| 43 | obj-$(CONFIG_AUDIT) += audit.o auditfilter.o | 44 | obj-$(CONFIG_AUDIT) += audit.o auditfilter.o |
diff --git a/kernel/cpu_acct.c b/kernel/cpu_acct.c new file mode 100644 index 000000000000..731e47e7f164 --- /dev/null +++ b/kernel/cpu_acct.c | |||
| @@ -0,0 +1,186 @@ | |||
| 1 | /* | ||
| 2 | * kernel/cpu_acct.c - CPU accounting cgroup subsystem | ||
| 3 | * | ||
| 4 | * Copyright (C) Google Inc, 2006 | ||
| 5 | * | ||
| 6 | * Developed by Paul Menage (menage@google.com) and Balbir Singh | ||
| 7 | * (balbir@in.ibm.com) | ||
| 8 | * | ||
| 9 | */ | ||
| 10 | |||
| 11 | /* | ||
| 12 | * Example cgroup subsystem for reporting total CPU usage of tasks in a | ||
| 13 | * cgroup, along with percentage load over a time interval | ||
| 14 | */ | ||
| 15 | |||
| 16 | #include <linux/module.h> | ||
| 17 | #include <linux/cgroup.h> | ||
| 18 | #include <linux/fs.h> | ||
| 19 | #include <linux/rcupdate.h> | ||
| 20 | |||
| 21 | #include <asm/div64.h> | ||
| 22 | |||
| 23 | struct cpuacct { | ||
| 24 | struct cgroup_subsys_state css; | ||
| 25 | spinlock_t lock; | ||
| 26 | /* total time used by this class */ | ||
| 27 | cputime64_t time; | ||
| 28 | |||
| 29 | /* time when next load calculation occurs */ | ||
| 30 | u64 next_interval_check; | ||
| 31 | |||
| 32 | /* time used in current period */ | ||
| 33 | cputime64_t current_interval_time; | ||
| 34 | |||
| 35 | /* time used in last period */ | ||
| 36 | cputime64_t last_interval_time; | ||
| 37 | }; | ||
| 38 | |||
| 39 | struct cgroup_subsys cpuacct_subsys; | ||
| 40 | |||
| 41 | static inline struct cpuacct *cgroup_ca(struct cgroup *cont) | ||
| 42 | { | ||
| 43 | return container_of(cgroup_subsys_state(cont, cpuacct_subsys_id), | ||
| 44 | struct cpuacct, css); | ||
| 45 | } | ||
| 46 | |||
| 47 | static inline struct cpuacct *task_ca(struct task_struct *task) | ||
| 48 | { | ||
| 49 | return container_of(task_subsys_state(task, cpuacct_subsys_id), | ||
| 50 | struct cpuacct, css); | ||
| 51 | } | ||
| 52 | |||
| 53 | #define INTERVAL (HZ * 10) | ||
| 54 | |||
| 55 | static inline u64 next_interval_boundary(u64 now) | ||
| 56 | { | ||
| 57 | /* calculate the next interval boundary beyond the | ||
| 58 | * current time */ | ||
| 59 | do_div(now, INTERVAL); | ||
| 60 | return (now + 1) * INTERVAL; | ||
| 61 | } | ||
| 62 | |||
| 63 | static struct cgroup_subsys_state *cpuacct_create( | ||
| 64 | struct cgroup_subsys *ss, struct cgroup *cont) | ||
| 65 | { | ||
| 66 | struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL); | ||
| 67 | |||
| 68 | if (!ca) | ||
| 69 | return ERR_PTR(-ENOMEM); | ||
| 70 | spin_lock_init(&ca->lock); | ||
| 71 | ca->next_interval_check = next_interval_boundary(get_jiffies_64()); | ||
| 72 | return &ca->css; | ||
| 73 | } | ||
| 74 | |||
| 75 | static void cpuacct_destroy(struct cgroup_subsys *ss, | ||
| 76 | struct cgroup *cont) | ||
| 77 | { | ||
| 78 | kfree(cgroup_ca(cont)); | ||
| 79 | } | ||
| 80 | |||
| 81 | /* Lazily update the load calculation if necessary. Called with ca locked */ | ||
| 82 | static void cpuusage_update(struct cpuacct *ca) | ||
| 83 | { | ||
| 84 | u64 now = get_jiffies_64(); | ||
| 85 | |||
| 86 | /* If we're not due for an update, return */ | ||
| 87 | if (ca->next_interval_check > now) | ||
| 88 | return; | ||
| 89 | |||
| 90 | if (ca->next_interval_check <= (now - INTERVAL)) { | ||
| 91 | /* If it's been more than an interval since the last | ||
| 92 | * check, then catch up - the last interval must have | ||
| 93 | * been zero load */ | ||
| 94 | ca->last_interval_time = 0; | ||
| 95 | ca->next_interval_check = next_interval_boundary(now); | ||
| 96 | } else { | ||
| 97 | /* If a steal takes the last interval time negative, | ||
| 98 | * then we just ignore it */ | ||
| 99 | if ((s64)ca->current_interval_time > 0) | ||
| 100 | ca->last_interval_time = ca->current_interval_time; | ||
| 101 | else | ||
| 102 | ca->last_interval_time = 0; | ||
| 103 | ca->next_interval_check += INTERVAL; | ||
| 104 | } | ||
| 105 | ca->current_interval_time = 0; | ||
| 106 | } | ||
| 107 | |||
| 108 | static u64 cpuusage_read(struct cgroup *cont, struct cftype *cft) | ||
| 109 | { | ||
| 110 | struct cpuacct *ca = cgroup_ca(cont); | ||
| 111 | u64 time; | ||
| 112 | |||
| 113 | spin_lock_irq(&ca->lock); | ||
| 114 | cpuusage_update(ca); | ||
| 115 | time = cputime64_to_jiffies64(ca->time); | ||
| 116 | spin_unlock_irq(&ca->lock); | ||
| 117 | |||
| 118 | /* Convert 64-bit jiffies to seconds */ | ||
| 119 | time *= 1000; | ||
| 120 | do_div(time, HZ); | ||
| 121 | return time; | ||
| 122 | } | ||
| 123 | |||
| 124 | static u64 load_read(struct cgroup *cont, struct cftype *cft) | ||
| 125 | { | ||
| 126 | struct cpuacct *ca = cgroup_ca(cont); | ||
| 127 | u64 time; | ||
| 128 | |||
| 129 | /* Find the time used in the previous interval */ | ||
| 130 | spin_lock_irq(&ca->lock); | ||
| 131 | cpuusage_update(ca); | ||
| 132 | time = cputime64_to_jiffies64(ca->last_interval_time); | ||
| 133 | spin_unlock_irq(&ca->lock); | ||
| 134 | |||
| 135 | /* Convert time to a percentage, to give the load in the | ||
| 136 | * previous period */ | ||
| 137 | time *= 100; | ||
| 138 | do_div(time, INTERVAL); | ||
| 139 | |||
| 140 | return time; | ||
| 141 | } | ||
| 142 | |||
| 143 | static struct cftype files[] = { | ||
| 144 | { | ||
| 145 | .name = "usage", | ||
| 146 | .read_uint = cpuusage_read, | ||
| 147 | }, | ||
| 148 | { | ||
| 149 | .name = "load", | ||
| 150 | .read_uint = load_read, | ||
| 151 | } | ||
| 152 | }; | ||
| 153 | |||
| 154 | static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cont) | ||
| 155 | { | ||
| 156 | return cgroup_add_files(cont, ss, files, ARRAY_SIZE(files)); | ||
| 157 | } | ||
| 158 | |||
| 159 | void cpuacct_charge(struct task_struct *task, cputime_t cputime) | ||
| 160 | { | ||
| 161 | |||
| 162 | struct cpuacct *ca; | ||
| 163 | unsigned long flags; | ||
| 164 | |||
| 165 | if (!cpuacct_subsys.active) | ||
| 166 | return; | ||
| 167 | rcu_read_lock(); | ||
| 168 | ca = task_ca(task); | ||
| 169 | if (ca) { | ||
| 170 | spin_lock_irqsave(&ca->lock, flags); | ||
| 171 | cpuusage_update(ca); | ||
| 172 | ca->time = cputime64_add(ca->time, cputime); | ||
| 173 | ca->current_interval_time = | ||
| 174 | cputime64_add(ca->current_interval_time, cputime); | ||
| 175 | spin_unlock_irqrestore(&ca->lock, flags); | ||
| 176 | } | ||
| 177 | rcu_read_unlock(); | ||
| 178 | } | ||
| 179 | |||
| 180 | struct cgroup_subsys cpuacct_subsys = { | ||
| 181 | .name = "cpuacct", | ||
| 182 | .create = cpuacct_create, | ||
| 183 | .destroy = cpuacct_destroy, | ||
| 184 | .populate = cpuacct_populate, | ||
| 185 | .subsys_id = cpuacct_subsys_id, | ||
| 186 | }; | ||
diff --git a/kernel/sched.c b/kernel/sched.c index ed90be46fb31..72a2a16e2214 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -51,6 +51,7 @@ | |||
| 51 | #include <linux/cpu.h> | 51 | #include <linux/cpu.h> |
| 52 | #include <linux/cpuset.h> | 52 | #include <linux/cpuset.h> |
| 53 | #include <linux/percpu.h> | 53 | #include <linux/percpu.h> |
| 54 | #include <linux/cpu_acct.h> | ||
| 54 | #include <linux/kthread.h> | 55 | #include <linux/kthread.h> |
| 55 | #include <linux/seq_file.h> | 56 | #include <linux/seq_file.h> |
| 56 | #include <linux/sysctl.h> | 57 | #include <linux/sysctl.h> |
| @@ -3307,9 +3308,13 @@ void account_user_time(struct task_struct *p, cputime_t cputime) | |||
| 3307 | { | 3308 | { |
| 3308 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; | 3309 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; |
| 3309 | cputime64_t tmp; | 3310 | cputime64_t tmp; |
| 3311 | struct rq *rq = this_rq(); | ||
| 3310 | 3312 | ||
| 3311 | p->utime = cputime_add(p->utime, cputime); | 3313 | p->utime = cputime_add(p->utime, cputime); |
| 3312 | 3314 | ||
| 3315 | if (p != rq->idle) | ||
| 3316 | cpuacct_charge(p, cputime); | ||
| 3317 | |||
| 3313 | /* Add user time to cpustat. */ | 3318 | /* Add user time to cpustat. */ |
| 3314 | tmp = cputime_to_cputime64(cputime); | 3319 | tmp = cputime_to_cputime64(cputime); |
| 3315 | if (TASK_NICE(p) > 0) | 3320 | if (TASK_NICE(p) > 0) |
| @@ -3374,9 +3379,10 @@ void account_system_time(struct task_struct *p, int hardirq_offset, | |||
| 3374 | cpustat->irq = cputime64_add(cpustat->irq, tmp); | 3379 | cpustat->irq = cputime64_add(cpustat->irq, tmp); |
| 3375 | else if (softirq_count()) | 3380 | else if (softirq_count()) |
| 3376 | cpustat->softirq = cputime64_add(cpustat->softirq, tmp); | 3381 | cpustat->softirq = cputime64_add(cpustat->softirq, tmp); |
| 3377 | else if (p != rq->idle) | 3382 | else if (p != rq->idle) { |
| 3378 | cpustat->system = cputime64_add(cpustat->system, tmp); | 3383 | cpustat->system = cputime64_add(cpustat->system, tmp); |
| 3379 | else if (atomic_read(&rq->nr_iowait) > 0) | 3384 | cpuacct_charge(p, cputime); |
| 3385 | } else if (atomic_read(&rq->nr_iowait) > 0) | ||
| 3380 | cpustat->iowait = cputime64_add(cpustat->iowait, tmp); | 3386 | cpustat->iowait = cputime64_add(cpustat->iowait, tmp); |
| 3381 | else | 3387 | else |
| 3382 | cpustat->idle = cputime64_add(cpustat->idle, tmp); | 3388 | cpustat->idle = cputime64_add(cpustat->idle, tmp); |
| @@ -3412,8 +3418,10 @@ void account_steal_time(struct task_struct *p, cputime_t steal) | |||
| 3412 | cpustat->iowait = cputime64_add(cpustat->iowait, tmp); | 3418 | cpustat->iowait = cputime64_add(cpustat->iowait, tmp); |
| 3413 | else | 3419 | else |
| 3414 | cpustat->idle = cputime64_add(cpustat->idle, tmp); | 3420 | cpustat->idle = cputime64_add(cpustat->idle, tmp); |
| 3415 | } else | 3421 | } else { |
| 3416 | cpustat->steal = cputime64_add(cpustat->steal, tmp); | 3422 | cpustat->steal = cputime64_add(cpustat->steal, tmp); |
| 3423 | cpuacct_charge(p, -tmp); | ||
| 3424 | } | ||
| 3417 | } | 3425 | } |
| 3418 | 3426 | ||
| 3419 | /* | 3427 | /* |
