diff options
Diffstat (limited to 'kernel/sched/cpuacct.c')
-rw-r--r-- | kernel/sched/cpuacct.c | 296 |
1 files changed, 296 insertions, 0 deletions
diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c new file mode 100644 index 000000000000..dbb7e2cd95eb --- /dev/null +++ b/kernel/sched/cpuacct.c | |||
@@ -0,0 +1,296 @@ | |||
1 | #include <linux/cgroup.h> | ||
2 | #include <linux/slab.h> | ||
3 | #include <linux/percpu.h> | ||
4 | #include <linux/spinlock.h> | ||
5 | #include <linux/cpumask.h> | ||
6 | #include <linux/seq_file.h> | ||
7 | #include <linux/rcupdate.h> | ||
8 | #include <linux/kernel_stat.h> | ||
9 | #include <linux/err.h> | ||
10 | |||
11 | #include "sched.h" | ||
12 | |||
13 | /* | ||
14 | * CPU accounting code for task groups. | ||
15 | * | ||
16 | * Based on the work by Paul Menage (menage@google.com) and Balbir Singh | ||
17 | * (balbir@in.ibm.com). | ||
18 | */ | ||
19 | |||
20 | /* Time spent by the tasks of the cpu accounting group executing in ... */ | ||
21 | enum cpuacct_stat_index { | ||
22 | CPUACCT_STAT_USER, /* ... user mode */ | ||
23 | CPUACCT_STAT_SYSTEM, /* ... kernel mode */ | ||
24 | |||
25 | CPUACCT_STAT_NSTATS, | ||
26 | }; | ||
27 | |||
28 | /* track cpu usage of a group of tasks and its child groups */ | ||
29 | struct cpuacct { | ||
30 | struct cgroup_subsys_state css; | ||
31 | /* cpuusage holds pointer to a u64-type object on every cpu */ | ||
32 | u64 __percpu *cpuusage; | ||
33 | struct kernel_cpustat __percpu *cpustat; | ||
34 | }; | ||
35 | |||
36 | /* return cpu accounting group corresponding to this container */ | ||
37 | static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp) | ||
38 | { | ||
39 | return container_of(cgroup_subsys_state(cgrp, cpuacct_subsys_id), | ||
40 | struct cpuacct, css); | ||
41 | } | ||
42 | |||
43 | /* return cpu accounting group to which this task belongs */ | ||
44 | static inline struct cpuacct *task_ca(struct task_struct *tsk) | ||
45 | { | ||
46 | return container_of(task_subsys_state(tsk, cpuacct_subsys_id), | ||
47 | struct cpuacct, css); | ||
48 | } | ||
49 | |||
50 | static inline struct cpuacct *__parent_ca(struct cpuacct *ca) | ||
51 | { | ||
52 | return cgroup_ca(ca->css.cgroup->parent); | ||
53 | } | ||
54 | |||
55 | static inline struct cpuacct *parent_ca(struct cpuacct *ca) | ||
56 | { | ||
57 | if (!ca->css.cgroup->parent) | ||
58 | return NULL; | ||
59 | return cgroup_ca(ca->css.cgroup->parent); | ||
60 | } | ||
61 | |||
62 | static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage); | ||
63 | static struct cpuacct root_cpuacct = { | ||
64 | .cpustat = &kernel_cpustat, | ||
65 | .cpuusage = &root_cpuacct_cpuusage, | ||
66 | }; | ||
67 | |||
68 | /* create a new cpu accounting group */ | ||
69 | static struct cgroup_subsys_state *cpuacct_css_alloc(struct cgroup *cgrp) | ||
70 | { | ||
71 | struct cpuacct *ca; | ||
72 | |||
73 | if (!cgrp->parent) | ||
74 | return &root_cpuacct.css; | ||
75 | |||
76 | ca = kzalloc(sizeof(*ca), GFP_KERNEL); | ||
77 | if (!ca) | ||
78 | goto out; | ||
79 | |||
80 | ca->cpuusage = alloc_percpu(u64); | ||
81 | if (!ca->cpuusage) | ||
82 | goto out_free_ca; | ||
83 | |||
84 | ca->cpustat = alloc_percpu(struct kernel_cpustat); | ||
85 | if (!ca->cpustat) | ||
86 | goto out_free_cpuusage; | ||
87 | |||
88 | return &ca->css; | ||
89 | |||
90 | out_free_cpuusage: | ||
91 | free_percpu(ca->cpuusage); | ||
92 | out_free_ca: | ||
93 | kfree(ca); | ||
94 | out: | ||
95 | return ERR_PTR(-ENOMEM); | ||
96 | } | ||
97 | |||
98 | /* destroy an existing cpu accounting group */ | ||
99 | static void cpuacct_css_free(struct cgroup *cgrp) | ||
100 | { | ||
101 | struct cpuacct *ca = cgroup_ca(cgrp); | ||
102 | |||
103 | free_percpu(ca->cpustat); | ||
104 | free_percpu(ca->cpuusage); | ||
105 | kfree(ca); | ||
106 | } | ||
107 | |||
108 | static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu) | ||
109 | { | ||
110 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); | ||
111 | u64 data; | ||
112 | |||
113 | #ifndef CONFIG_64BIT | ||
114 | /* | ||
115 | * Take rq->lock to make 64-bit read safe on 32-bit platforms. | ||
116 | */ | ||
117 | raw_spin_lock_irq(&cpu_rq(cpu)->lock); | ||
118 | data = *cpuusage; | ||
119 | raw_spin_unlock_irq(&cpu_rq(cpu)->lock); | ||
120 | #else | ||
121 | data = *cpuusage; | ||
122 | #endif | ||
123 | |||
124 | return data; | ||
125 | } | ||
126 | |||
127 | static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) | ||
128 | { | ||
129 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); | ||
130 | |||
131 | #ifndef CONFIG_64BIT | ||
132 | /* | ||
133 | * Take rq->lock to make 64-bit write safe on 32-bit platforms. | ||
134 | */ | ||
135 | raw_spin_lock_irq(&cpu_rq(cpu)->lock); | ||
136 | *cpuusage = val; | ||
137 | raw_spin_unlock_irq(&cpu_rq(cpu)->lock); | ||
138 | #else | ||
139 | *cpuusage = val; | ||
140 | #endif | ||
141 | } | ||
142 | |||
143 | /* return total cpu usage (in nanoseconds) of a group */ | ||
144 | static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft) | ||
145 | { | ||
146 | struct cpuacct *ca = cgroup_ca(cgrp); | ||
147 | u64 totalcpuusage = 0; | ||
148 | int i; | ||
149 | |||
150 | for_each_present_cpu(i) | ||
151 | totalcpuusage += cpuacct_cpuusage_read(ca, i); | ||
152 | |||
153 | return totalcpuusage; | ||
154 | } | ||
155 | |||
156 | static int cpuusage_write(struct cgroup *cgrp, struct cftype *cftype, | ||
157 | u64 reset) | ||
158 | { | ||
159 | struct cpuacct *ca = cgroup_ca(cgrp); | ||
160 | int err = 0; | ||
161 | int i; | ||
162 | |||
163 | if (reset) { | ||
164 | err = -EINVAL; | ||
165 | goto out; | ||
166 | } | ||
167 | |||
168 | for_each_present_cpu(i) | ||
169 | cpuacct_cpuusage_write(ca, i, 0); | ||
170 | |||
171 | out: | ||
172 | return err; | ||
173 | } | ||
174 | |||
175 | static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft, | ||
176 | struct seq_file *m) | ||
177 | { | ||
178 | struct cpuacct *ca = cgroup_ca(cgroup); | ||
179 | u64 percpu; | ||
180 | int i; | ||
181 | |||
182 | for_each_present_cpu(i) { | ||
183 | percpu = cpuacct_cpuusage_read(ca, i); | ||
184 | seq_printf(m, "%llu ", (unsigned long long) percpu); | ||
185 | } | ||
186 | seq_printf(m, "\n"); | ||
187 | return 0; | ||
188 | } | ||
189 | |||
190 | static const char * const cpuacct_stat_desc[] = { | ||
191 | [CPUACCT_STAT_USER] = "user", | ||
192 | [CPUACCT_STAT_SYSTEM] = "system", | ||
193 | }; | ||
194 | |||
195 | static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft, | ||
196 | struct cgroup_map_cb *cb) | ||
197 | { | ||
198 | struct cpuacct *ca = cgroup_ca(cgrp); | ||
199 | int cpu; | ||
200 | s64 val = 0; | ||
201 | |||
202 | for_each_online_cpu(cpu) { | ||
203 | struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); | ||
204 | val += kcpustat->cpustat[CPUTIME_USER]; | ||
205 | val += kcpustat->cpustat[CPUTIME_NICE]; | ||
206 | } | ||
207 | val = cputime64_to_clock_t(val); | ||
208 | cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_USER], val); | ||
209 | |||
210 | val = 0; | ||
211 | for_each_online_cpu(cpu) { | ||
212 | struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); | ||
213 | val += kcpustat->cpustat[CPUTIME_SYSTEM]; | ||
214 | val += kcpustat->cpustat[CPUTIME_IRQ]; | ||
215 | val += kcpustat->cpustat[CPUTIME_SOFTIRQ]; | ||
216 | } | ||
217 | |||
218 | val = cputime64_to_clock_t(val); | ||
219 | cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val); | ||
220 | |||
221 | return 0; | ||
222 | } | ||
223 | |||
224 | static struct cftype files[] = { | ||
225 | { | ||
226 | .name = "usage", | ||
227 | .read_u64 = cpuusage_read, | ||
228 | .write_u64 = cpuusage_write, | ||
229 | }, | ||
230 | { | ||
231 | .name = "usage_percpu", | ||
232 | .read_seq_string = cpuacct_percpu_seq_read, | ||
233 | }, | ||
234 | { | ||
235 | .name = "stat", | ||
236 | .read_map = cpuacct_stats_show, | ||
237 | }, | ||
238 | { } /* terminate */ | ||
239 | }; | ||
240 | |||
241 | /* | ||
242 | * charge this task's execution time to its accounting group. | ||
243 | * | ||
244 | * called with rq->lock held. | ||
245 | */ | ||
246 | void cpuacct_charge(struct task_struct *tsk, u64 cputime) | ||
247 | { | ||
248 | struct cpuacct *ca; | ||
249 | int cpu; | ||
250 | |||
251 | cpu = task_cpu(tsk); | ||
252 | |||
253 | rcu_read_lock(); | ||
254 | |||
255 | ca = task_ca(tsk); | ||
256 | |||
257 | while (true) { | ||
258 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); | ||
259 | *cpuusage += cputime; | ||
260 | |||
261 | ca = parent_ca(ca); | ||
262 | if (!ca) | ||
263 | break; | ||
264 | } | ||
265 | |||
266 | rcu_read_unlock(); | ||
267 | } | ||
268 | |||
269 | /* | ||
270 | * Add user/system time to cpuacct. | ||
271 | * | ||
272 | * Note: it's the caller that updates the account of the root cgroup. | ||
273 | */ | ||
274 | void cpuacct_account_field(struct task_struct *p, int index, u64 val) | ||
275 | { | ||
276 | struct kernel_cpustat *kcpustat; | ||
277 | struct cpuacct *ca; | ||
278 | |||
279 | rcu_read_lock(); | ||
280 | ca = task_ca(p); | ||
281 | while (ca != &root_cpuacct) { | ||
282 | kcpustat = this_cpu_ptr(ca->cpustat); | ||
283 | kcpustat->cpustat[index] += val; | ||
284 | ca = __parent_ca(ca); | ||
285 | } | ||
286 | rcu_read_unlock(); | ||
287 | } | ||
288 | |||
289 | struct cgroup_subsys cpuacct_subsys = { | ||
290 | .name = "cpuacct", | ||
291 | .css_alloc = cpuacct_css_alloc, | ||
292 | .css_free = cpuacct_css_free, | ||
293 | .subsys_id = cpuacct_subsys_id, | ||
294 | .base_cftypes = files, | ||
295 | .early_init = 1, | ||
296 | }; | ||