summaryrefslogtreecommitdiffstats
path: root/kernel/cgroup
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-11-15 17:29:44 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-15 17:29:44 -0500
commit22714a2ba4b55737cd7d5299db7aaf1fa8287354 (patch)
tree32b25f2e3e40732156a8a8d0dcb2ddf38410776f /kernel/cgroup
parent766ec76a27aa9dfdfee3a80f29ddc1f7539c71f9 (diff)
parent5f2e673405b742be64e7c3604ed4ed3ac14f35ce (diff)
Merge branch 'for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo: "Cgroup2 cpu controller support is finally merged. - Basic cpu statistics support to allow monitoring by default without the CPU controller enabled. - cgroup2 cpu controller support. - /sys/kernel/cgroup files to help dealing with new / optional features" * 'for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: cgroup: export list of cgroups v2 features using sysfs cgroup: export list of delegatable control files using sysfs cgroup: mark @cgrp __maybe_unused in cpu_stat_show() MAINTAINERS: relocate cpuset.c cgroup, sched: Move basic cpu stats from cgroup.stat to cpu.stat sched: Implement interface for cgroup unified hierarchy sched: Misc preps for cgroup unified hierarchy interface sched/cputime: Add dummy cputime_adjust() implementation for CONFIG_VIRT_CPU_ACCOUNTING_NATIVE cgroup: statically initialize init_css_set->dfl_cgrp cgroup: Implement cgroup2 basic CPU usage accounting cpuacct: Introduce cgroup_account_cputime[_field]() sched/cputime: Expose cputime_adjust()
Diffstat (limited to 'kernel/cgroup')
-rw-r--r--kernel/cgroup/Makefile2
-rw-r--r--kernel/cgroup/cgroup-internal.h9
-rw-r--r--kernel/cgroup/cgroup.c157
-rw-r--r--kernel/cgroup/stat.c334
4 files changed, 499 insertions, 3 deletions
diff --git a/kernel/cgroup/Makefile b/kernel/cgroup/Makefile
index ae448f7632cc..2be89a003185 100644
--- a/kernel/cgroup/Makefile
+++ b/kernel/cgroup/Makefile
@@ -1,5 +1,5 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2obj-y := cgroup.o namespace.o cgroup-v1.o 2obj-y := cgroup.o stat.o namespace.o cgroup-v1.o
3 3
4obj-$(CONFIG_CGROUP_FREEZER) += freezer.o 4obj-$(CONFIG_CGROUP_FREEZER) += freezer.o
5obj-$(CONFIG_CGROUP_PIDS) += pids.o 5obj-$(CONFIG_CGROUP_PIDS) += pids.o
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index bf54ade001be..b928b27050c6 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -201,6 +201,15 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
201int cgroup_task_count(const struct cgroup *cgrp); 201int cgroup_task_count(const struct cgroup *cgrp);
202 202
203/* 203/*
204 * stat.c
205 */
206void cgroup_stat_flush(struct cgroup *cgrp);
207int cgroup_stat_init(struct cgroup *cgrp);
208void cgroup_stat_exit(struct cgroup *cgrp);
209void cgroup_stat_show_cputime(struct seq_file *seq);
210void cgroup_stat_boot(void);
211
212/*
204 * namespace.c 213 * namespace.c
205 */ 214 */
206extern const struct proc_ns_operations cgroupns_operations; 215extern const struct proc_ns_operations cgroupns_operations;
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 00f5b358aeac..0b1ffe147f24 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -142,12 +142,14 @@ static struct static_key_true *cgroup_subsys_on_dfl_key[] = {
142}; 142};
143#undef SUBSYS 143#undef SUBSYS
144 144
145static DEFINE_PER_CPU(struct cgroup_cpu_stat, cgrp_dfl_root_cpu_stat);
146
145/* 147/*
146 * The default hierarchy, reserved for the subsystems that are otherwise 148 * The default hierarchy, reserved for the subsystems that are otherwise
147 * unattached - it never has more than a single cgroup, and all tasks are 149 * unattached - it never has more than a single cgroup, and all tasks are
148 * part of that cgroup. 150 * part of that cgroup.
149 */ 151 */
150struct cgroup_root cgrp_dfl_root; 152struct cgroup_root cgrp_dfl_root = { .cgrp.cpu_stat = &cgrp_dfl_root_cpu_stat };
151EXPORT_SYMBOL_GPL(cgrp_dfl_root); 153EXPORT_SYMBOL_GPL(cgrp_dfl_root);
152 154
153/* 155/*
@@ -462,6 +464,28 @@ static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
462} 464}
463 465
464/** 466/**
467 * cgroup_tryget_css - try to get a cgroup's css for the specified subsystem
468 * @cgrp: the cgroup of interest
469 * @ss: the subsystem of interest
470 *
471 * Find and get @cgrp's css assocaited with @ss. If the css doesn't exist
472 * or is offline, %NULL is returned.
473 */
474static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
475 struct cgroup_subsys *ss)
476{
477 struct cgroup_subsys_state *css;
478
479 rcu_read_lock();
480 css = cgroup_css(cgrp, ss);
481 if (!css || !css_tryget_online(css))
482 css = NULL;
483 rcu_read_unlock();
484
485 return css;
486}
487
488/**
465 * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem 489 * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
466 * @cgrp: the cgroup of interest 490 * @cgrp: the cgroup of interest
467 * @ss: the subsystem of interest (%NULL returns @cgrp->self) 491 * @ss: the subsystem of interest (%NULL returns @cgrp->self)
@@ -647,6 +671,14 @@ struct css_set init_css_set = {
647 .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links), 671 .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links),
648 .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node), 672 .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node),
649 .mg_node = LIST_HEAD_INIT(init_css_set.mg_node), 673 .mg_node = LIST_HEAD_INIT(init_css_set.mg_node),
674
675 /*
676 * The following field is re-initialized when this cset gets linked
677 * in cgroup_init(). However, let's initialize the field
678 * statically too so that the default cgroup can be accessed safely
679 * early during boot.
680 */
681 .dfl_cgrp = &cgrp_dfl_root.cgrp,
650}; 682};
651 683
652static int css_set_count = 1; /* 1 for init_css_set */ 684static int css_set_count = 1; /* 1 for init_css_set */
@@ -3315,6 +3347,37 @@ static int cgroup_stat_show(struct seq_file *seq, void *v)
3315 return 0; 3347 return 0;
3316} 3348}
3317 3349
3350static int __maybe_unused cgroup_extra_stat_show(struct seq_file *seq,
3351 struct cgroup *cgrp, int ssid)
3352{
3353 struct cgroup_subsys *ss = cgroup_subsys[ssid];
3354 struct cgroup_subsys_state *css;
3355 int ret;
3356
3357 if (!ss->css_extra_stat_show)
3358 return 0;
3359
3360 css = cgroup_tryget_css(cgrp, ss);
3361 if (!css)
3362 return 0;
3363
3364 ret = ss->css_extra_stat_show(seq, css);
3365 css_put(css);
3366 return ret;
3367}
3368
3369static int cpu_stat_show(struct seq_file *seq, void *v)
3370{
3371 struct cgroup __maybe_unused *cgrp = seq_css(seq)->cgroup;
3372 int ret = 0;
3373
3374 cgroup_stat_show_cputime(seq);
3375#ifdef CONFIG_CGROUP_SCHED
3376 ret = cgroup_extra_stat_show(seq, cgrp, cpu_cgrp_id);
3377#endif
3378 return ret;
3379}
3380
3318static int cgroup_file_open(struct kernfs_open_file *of) 3381static int cgroup_file_open(struct kernfs_open_file *of)
3319{ 3382{
3320 struct cftype *cft = of->kn->priv; 3383 struct cftype *cft = of->kn->priv;
@@ -4422,6 +4485,11 @@ static struct cftype cgroup_base_files[] = {
4422 .name = "cgroup.stat", 4485 .name = "cgroup.stat",
4423 .seq_show = cgroup_stat_show, 4486 .seq_show = cgroup_stat_show,
4424 }, 4487 },
4488 {
4489 .name = "cpu.stat",
4490 .flags = CFTYPE_NOT_ON_ROOT,
4491 .seq_show = cpu_stat_show,
4492 },
4425 { } /* terminate */ 4493 { } /* terminate */
4426}; 4494};
4427 4495
@@ -4482,6 +4550,8 @@ static void css_free_work_fn(struct work_struct *work)
4482 */ 4550 */
4483 cgroup_put(cgroup_parent(cgrp)); 4551 cgroup_put(cgroup_parent(cgrp));
4484 kernfs_put(cgrp->kn); 4552 kernfs_put(cgrp->kn);
4553 if (cgroup_on_dfl(cgrp))
4554 cgroup_stat_exit(cgrp);
4485 kfree(cgrp); 4555 kfree(cgrp);
4486 } else { 4556 } else {
4487 /* 4557 /*
@@ -4526,6 +4596,9 @@ static void css_release_work_fn(struct work_struct *work)
4526 /* cgroup release path */ 4596 /* cgroup release path */
4527 trace_cgroup_release(cgrp); 4597 trace_cgroup_release(cgrp);
4528 4598
4599 if (cgroup_on_dfl(cgrp))
4600 cgroup_stat_flush(cgrp);
4601
4529 for (tcgrp = cgroup_parent(cgrp); tcgrp; 4602 for (tcgrp = cgroup_parent(cgrp); tcgrp;
4530 tcgrp = cgroup_parent(tcgrp)) 4603 tcgrp = cgroup_parent(tcgrp))
4531 tcgrp->nr_dying_descendants--; 4604 tcgrp->nr_dying_descendants--;
@@ -4709,6 +4782,12 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
4709 if (ret) 4782 if (ret)
4710 goto out_free_cgrp; 4783 goto out_free_cgrp;
4711 4784
4785 if (cgroup_on_dfl(parent)) {
4786 ret = cgroup_stat_init(cgrp);
4787 if (ret)
4788 goto out_cancel_ref;
4789 }
4790
4712 /* 4791 /*
4713 * Temporarily set the pointer to NULL, so idr_find() won't return 4792 * Temporarily set the pointer to NULL, so idr_find() won't return
4714 * a half-baked cgroup. 4793 * a half-baked cgroup.
@@ -4716,7 +4795,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
4716 cgrp->id = cgroup_idr_alloc(&root->cgroup_idr, NULL, 2, 0, GFP_KERNEL); 4795 cgrp->id = cgroup_idr_alloc(&root->cgroup_idr, NULL, 2, 0, GFP_KERNEL);
4717 if (cgrp->id < 0) { 4796 if (cgrp->id < 0) {
4718 ret = -ENOMEM; 4797 ret = -ENOMEM;
4719 goto out_cancel_ref; 4798 goto out_stat_exit;
4720 } 4799 }
4721 4800
4722 init_cgroup_housekeeping(cgrp); 4801 init_cgroup_housekeeping(cgrp);
@@ -4767,6 +4846,9 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
4767 4846
4768out_idr_free: 4847out_idr_free:
4769 cgroup_idr_remove(&root->cgroup_idr, cgrp->id); 4848 cgroup_idr_remove(&root->cgroup_idr, cgrp->id);
4849out_stat_exit:
4850 if (cgroup_on_dfl(parent))
4851 cgroup_stat_exit(cgrp);
4770out_cancel_ref: 4852out_cancel_ref:
4771 percpu_ref_exit(&cgrp->self.refcnt); 4853 percpu_ref_exit(&cgrp->self.refcnt);
4772out_free_cgrp: 4854out_free_cgrp:
@@ -5161,6 +5243,8 @@ int __init cgroup_init(void)
5161 BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files)); 5243 BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files));
5162 BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files)); 5244 BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files));
5163 5245
5246 cgroup_stat_boot();
5247
5164 /* 5248 /*
5165 * The latency of the synchronize_sched() is too high for cgroups, 5249 * The latency of the synchronize_sched() is too high for cgroups,
5166 * avoid it at the cost of forcing all readers into the slow path. 5250 * avoid it at the cost of forcing all readers into the slow path.
@@ -5780,3 +5864,72 @@ int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
5780 return ret; 5864 return ret;
5781} 5865}
5782#endif /* CONFIG_CGROUP_BPF */ 5866#endif /* CONFIG_CGROUP_BPF */
5867
5868#ifdef CONFIG_SYSFS
5869static ssize_t show_delegatable_files(struct cftype *files, char *buf,
5870 ssize_t size, const char *prefix)
5871{
5872 struct cftype *cft;
5873 ssize_t ret = 0;
5874
5875 for (cft = files; cft && cft->name[0] != '\0'; cft++) {
5876 if (!(cft->flags & CFTYPE_NS_DELEGATABLE))
5877 continue;
5878
5879 if (prefix)
5880 ret += snprintf(buf + ret, size - ret, "%s.", prefix);
5881
5882 ret += snprintf(buf + ret, size - ret, "%s\n", cft->name);
5883
5884 if (unlikely(ret >= size)) {
5885 WARN_ON(1);
5886 break;
5887 }
5888 }
5889
5890 return ret;
5891}
5892
5893static ssize_t delegate_show(struct kobject *kobj, struct kobj_attribute *attr,
5894 char *buf)
5895{
5896 struct cgroup_subsys *ss;
5897 int ssid;
5898 ssize_t ret = 0;
5899
5900 ret = show_delegatable_files(cgroup_base_files, buf, PAGE_SIZE - ret,
5901 NULL);
5902
5903 for_each_subsys(ss, ssid)
5904 ret += show_delegatable_files(ss->dfl_cftypes, buf + ret,
5905 PAGE_SIZE - ret,
5906 cgroup_subsys_name[ssid]);
5907
5908 return ret;
5909}
5910static struct kobj_attribute cgroup_delegate_attr = __ATTR_RO(delegate);
5911
5912static ssize_t features_show(struct kobject *kobj, struct kobj_attribute *attr,
5913 char *buf)
5914{
5915 return snprintf(buf, PAGE_SIZE, "nsdelegate\n");
5916}
5917static struct kobj_attribute cgroup_features_attr = __ATTR_RO(features);
5918
5919static struct attribute *cgroup_sysfs_attrs[] = {
5920 &cgroup_delegate_attr.attr,
5921 &cgroup_features_attr.attr,
5922 NULL,
5923};
5924
5925static const struct attribute_group cgroup_sysfs_attr_group = {
5926 .attrs = cgroup_sysfs_attrs,
5927 .name = "cgroup",
5928};
5929
5930static int __init cgroup_sysfs_init(void)
5931{
5932 return sysfs_create_group(kernel_kobj, &cgroup_sysfs_attr_group);
5933}
5934subsys_initcall(cgroup_sysfs_init);
5935#endif /* CONFIG_SYSFS */
diff --git a/kernel/cgroup/stat.c b/kernel/cgroup/stat.c
new file mode 100644
index 000000000000..133b465691d6
--- /dev/null
+++ b/kernel/cgroup/stat.c
@@ -0,0 +1,334 @@
1#include "cgroup-internal.h"
2
3#include <linux/sched/cputime.h>
4
5static DEFINE_MUTEX(cgroup_stat_mutex);
6static DEFINE_PER_CPU(raw_spinlock_t, cgroup_cpu_stat_lock);
7
8static struct cgroup_cpu_stat *cgroup_cpu_stat(struct cgroup *cgrp, int cpu)
9{
10 return per_cpu_ptr(cgrp->cpu_stat, cpu);
11}
12
13/**
14 * cgroup_cpu_stat_updated - keep track of updated cpu_stat
15 * @cgrp: target cgroup
16 * @cpu: cpu on which cpu_stat was updated
17 *
18 * @cgrp's cpu_stat on @cpu was updated. Put it on the parent's matching
19 * cpu_stat->updated_children list. See the comment on top of
20 * cgroup_cpu_stat definition for details.
21 */
22static void cgroup_cpu_stat_updated(struct cgroup *cgrp, int cpu)
23{
24 raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_cpu_stat_lock, cpu);
25 struct cgroup *parent;
26 unsigned long flags;
27
28 /*
29 * Speculative already-on-list test. This may race leading to
30 * temporary inaccuracies, which is fine.
31 *
32 * Because @parent's updated_children is terminated with @parent
33 * instead of NULL, we can tell whether @cgrp is on the list by
34 * testing the next pointer for NULL.
35 */
36 if (cgroup_cpu_stat(cgrp, cpu)->updated_next)
37 return;
38
39 raw_spin_lock_irqsave(cpu_lock, flags);
40
41 /* put @cgrp and all ancestors on the corresponding updated lists */
42 for (parent = cgroup_parent(cgrp); parent;
43 cgrp = parent, parent = cgroup_parent(cgrp)) {
44 struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
45 struct cgroup_cpu_stat *pcstat = cgroup_cpu_stat(parent, cpu);
46
47 /*
48 * Both additions and removals are bottom-up. If a cgroup
49 * is already in the tree, all ancestors are.
50 */
51 if (cstat->updated_next)
52 break;
53
54 cstat->updated_next = pcstat->updated_children;
55 pcstat->updated_children = cgrp;
56 }
57
58 raw_spin_unlock_irqrestore(cpu_lock, flags);
59}
60
61/**
62 * cgroup_cpu_stat_pop_updated - iterate and dismantle cpu_stat updated tree
63 * @pos: current position
64 * @root: root of the tree to traversal
65 * @cpu: target cpu
66 *
67 * Walks the udpated cpu_stat tree on @cpu from @root. %NULL @pos starts
68 * the traversal and %NULL return indicates the end. During traversal,
69 * each returned cgroup is unlinked from the tree. Must be called with the
70 * matching cgroup_cpu_stat_lock held.
71 *
72 * The only ordering guarantee is that, for a parent and a child pair
73 * covered by a given traversal, if a child is visited, its parent is
74 * guaranteed to be visited afterwards.
75 */
76static struct cgroup *cgroup_cpu_stat_pop_updated(struct cgroup *pos,
77 struct cgroup *root, int cpu)
78{
79 struct cgroup_cpu_stat *cstat;
80 struct cgroup *parent;
81
82 if (pos == root)
83 return NULL;
84
85 /*
86 * We're gonna walk down to the first leaf and visit/remove it. We
87 * can pick whatever unvisited node as the starting point.
88 */
89 if (!pos)
90 pos = root;
91 else
92 pos = cgroup_parent(pos);
93
94 /* walk down to the first leaf */
95 while (true) {
96 cstat = cgroup_cpu_stat(pos, cpu);
97 if (cstat->updated_children == pos)
98 break;
99 pos = cstat->updated_children;
100 }
101
102 /*
103 * Unlink @pos from the tree. As the updated_children list is
104 * singly linked, we have to walk it to find the removal point.
105 * However, due to the way we traverse, @pos will be the first
106 * child in most cases. The only exception is @root.
107 */
108 parent = cgroup_parent(pos);
109 if (parent && cstat->updated_next) {
110 struct cgroup_cpu_stat *pcstat = cgroup_cpu_stat(parent, cpu);
111 struct cgroup_cpu_stat *ncstat;
112 struct cgroup **nextp;
113
114 nextp = &pcstat->updated_children;
115 while (true) {
116 ncstat = cgroup_cpu_stat(*nextp, cpu);
117 if (*nextp == pos)
118 break;
119
120 WARN_ON_ONCE(*nextp == parent);
121 nextp = &ncstat->updated_next;
122 }
123
124 *nextp = cstat->updated_next;
125 cstat->updated_next = NULL;
126 }
127
128 return pos;
129}
130
131static void cgroup_stat_accumulate(struct cgroup_stat *dst_stat,
132 struct cgroup_stat *src_stat)
133{
134 dst_stat->cputime.utime += src_stat->cputime.utime;
135 dst_stat->cputime.stime += src_stat->cputime.stime;
136 dst_stat->cputime.sum_exec_runtime += src_stat->cputime.sum_exec_runtime;
137}
138
139static void cgroup_cpu_stat_flush_one(struct cgroup *cgrp, int cpu)
140{
141 struct cgroup *parent = cgroup_parent(cgrp);
142 struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
143 struct task_cputime *last_cputime = &cstat->last_cputime;
144 struct task_cputime cputime;
145 struct cgroup_stat delta;
146 unsigned seq;
147
148 lockdep_assert_held(&cgroup_stat_mutex);
149
150 /* fetch the current per-cpu values */
151 do {
152 seq = __u64_stats_fetch_begin(&cstat->sync);
153 cputime = cstat->cputime;
154 } while (__u64_stats_fetch_retry(&cstat->sync, seq));
155
156 /* accumulate the deltas to propgate */
157 delta.cputime.utime = cputime.utime - last_cputime->utime;
158 delta.cputime.stime = cputime.stime - last_cputime->stime;
159 delta.cputime.sum_exec_runtime = cputime.sum_exec_runtime -
160 last_cputime->sum_exec_runtime;
161 *last_cputime = cputime;
162
163 /* transfer the pending stat into delta */
164 cgroup_stat_accumulate(&delta, &cgrp->pending_stat);
165 memset(&cgrp->pending_stat, 0, sizeof(cgrp->pending_stat));
166
167 /* propagate delta into the global stat and the parent's pending */
168 cgroup_stat_accumulate(&cgrp->stat, &delta);
169 if (parent)
170 cgroup_stat_accumulate(&parent->pending_stat, &delta);
171}
172
173/* see cgroup_stat_flush() */
174static void cgroup_stat_flush_locked(struct cgroup *cgrp)
175{
176 int cpu;
177
178 lockdep_assert_held(&cgroup_stat_mutex);
179
180 for_each_possible_cpu(cpu) {
181 raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_cpu_stat_lock, cpu);
182 struct cgroup *pos = NULL;
183
184 raw_spin_lock_irq(cpu_lock);
185 while ((pos = cgroup_cpu_stat_pop_updated(pos, cgrp, cpu)))
186 cgroup_cpu_stat_flush_one(pos, cpu);
187 raw_spin_unlock_irq(cpu_lock);
188 }
189}
190
191/**
192 * cgroup_stat_flush - flush stats in @cgrp's subtree
193 * @cgrp: target cgroup
194 *
195 * Collect all per-cpu stats in @cgrp's subtree into the global counters
196 * and propagate them upwards. After this function returns, all cgroups in
197 * the subtree have up-to-date ->stat.
198 *
199 * This also gets all cgroups in the subtree including @cgrp off the
200 * ->updated_children lists.
201 */
202void cgroup_stat_flush(struct cgroup *cgrp)
203{
204 mutex_lock(&cgroup_stat_mutex);
205 cgroup_stat_flush_locked(cgrp);
206 mutex_unlock(&cgroup_stat_mutex);
207}
208
209static struct cgroup_cpu_stat *cgroup_cpu_stat_account_begin(struct cgroup *cgrp)
210{
211 struct cgroup_cpu_stat *cstat;
212
213 cstat = get_cpu_ptr(cgrp->cpu_stat);
214 u64_stats_update_begin(&cstat->sync);
215 return cstat;
216}
217
218static void cgroup_cpu_stat_account_end(struct cgroup *cgrp,
219 struct cgroup_cpu_stat *cstat)
220{
221 u64_stats_update_end(&cstat->sync);
222 cgroup_cpu_stat_updated(cgrp, smp_processor_id());
223 put_cpu_ptr(cstat);
224}
225
226void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec)
227{
228 struct cgroup_cpu_stat *cstat;
229
230 cstat = cgroup_cpu_stat_account_begin(cgrp);
231 cstat->cputime.sum_exec_runtime += delta_exec;
232 cgroup_cpu_stat_account_end(cgrp, cstat);
233}
234
235void __cgroup_account_cputime_field(struct cgroup *cgrp,
236 enum cpu_usage_stat index, u64 delta_exec)
237{
238 struct cgroup_cpu_stat *cstat;
239
240 cstat = cgroup_cpu_stat_account_begin(cgrp);
241
242 switch (index) {
243 case CPUTIME_USER:
244 case CPUTIME_NICE:
245 cstat->cputime.utime += delta_exec;
246 break;
247 case CPUTIME_SYSTEM:
248 case CPUTIME_IRQ:
249 case CPUTIME_SOFTIRQ:
250 cstat->cputime.stime += delta_exec;
251 break;
252 default:
253 break;
254 }
255
256 cgroup_cpu_stat_account_end(cgrp, cstat);
257}
258
259void cgroup_stat_show_cputime(struct seq_file *seq)
260{
261 struct cgroup *cgrp = seq_css(seq)->cgroup;
262 u64 usage, utime, stime;
263
264 if (!cgroup_parent(cgrp))
265 return;
266
267 mutex_lock(&cgroup_stat_mutex);
268
269 cgroup_stat_flush_locked(cgrp);
270
271 usage = cgrp->stat.cputime.sum_exec_runtime;
272 cputime_adjust(&cgrp->stat.cputime, &cgrp->stat.prev_cputime,
273 &utime, &stime);
274
275 mutex_unlock(&cgroup_stat_mutex);
276
277 do_div(usage, NSEC_PER_USEC);
278 do_div(utime, NSEC_PER_USEC);
279 do_div(stime, NSEC_PER_USEC);
280
281 seq_printf(seq, "usage_usec %llu\n"
282 "user_usec %llu\n"
283 "system_usec %llu\n",
284 usage, utime, stime);
285}
286
287int cgroup_stat_init(struct cgroup *cgrp)
288{
289 int cpu;
290
291 /* the root cgrp has cpu_stat preallocated */
292 if (!cgrp->cpu_stat) {
293 cgrp->cpu_stat = alloc_percpu(struct cgroup_cpu_stat);
294 if (!cgrp->cpu_stat)
295 return -ENOMEM;
296 }
297
298 /* ->updated_children list is self terminated */
299 for_each_possible_cpu(cpu)
300 cgroup_cpu_stat(cgrp, cpu)->updated_children = cgrp;
301
302 prev_cputime_init(&cgrp->stat.prev_cputime);
303
304 return 0;
305}
306
307void cgroup_stat_exit(struct cgroup *cgrp)
308{
309 int cpu;
310
311 cgroup_stat_flush(cgrp);
312
313 /* sanity check */
314 for_each_possible_cpu(cpu) {
315 struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
316
317 if (WARN_ON_ONCE(cstat->updated_children != cgrp) ||
318 WARN_ON_ONCE(cstat->updated_next))
319 return;
320 }
321
322 free_percpu(cgrp->cpu_stat);
323 cgrp->cpu_stat = NULL;
324}
325
326void __init cgroup_stat_boot(void)
327{
328 int cpu;
329
330 for_each_possible_cpu(cpu)
331 raw_spin_lock_init(per_cpu_ptr(&cgroup_cpu_stat_lock, cpu));
332
333 BUG_ON(cgroup_stat_init(&cgrp_dfl_root.cgrp));
334}