aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cgroup
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-06-05 20:08:45 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-06-05 20:08:45 -0400
commit9f25a8da423226d7797e35a132535186c531228b (patch)
treef35f830ac31fa31f5591eb96eea650ae5d20cc04 /kernel/cgroup
parent0bbddb8cbe7a8765e9c6ef598a33b50461934f88 (diff)
parentd8742e22902186e30c346b1ba881cb52942ae3e4 (diff)
Merge branch 'for-4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo: - For cpustat, cgroup has a percpu hierarchical stat mechanism which propagates up the hierarchy lazily. This contains commits to factor out and generalize the mechanism so that it can be used for other cgroup stats too. The original intention was to update memcg stats to use it but memcg went for a different approach, so still the only user is cpustat. The factoring out and generalization still make sense and it's likely that this can be used for other purposes in the future. - cgroup uses kernfs_notify() (which uses fsnotify()) to inform user space of certain events. A rate limiting mechanism is added. - Other misc changes. * 'for-4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: cgroup: css_set_lock should nest inside tasklist_lock rdmacg: Convert to use match_string() helper cgroup: Make cgroup_rstat_updated() ready for root cgroup usage cgroup: Add memory barriers to plug cgroup_rstat_updated() race window cgroup: Add cgroup_subsys->css_rstat_flush() cgroup: Replace cgroup_rstat_mutex with a spinlock cgroup: Factor out and expose cgroup_rstat_*() interface functions cgroup: Reorganize kernel/cgroup/rstat.c cgroup: Distinguish base resource stat implementation from rstat cgroup: Rename stat to rstat cgroup: Rename kernel/cgroup/stat.c to kernel/cgroup/rstat.c cgroup: Limit event generation frequency cgroup: Explicitly remove core interface files
Diffstat (limited to 'kernel/cgroup')
-rw-r--r--kernel/cgroup/Makefile2
-rw-r--r--kernel/cgroup/cgroup-internal.h11
-rw-r--r--kernel/cgroup/cgroup.c105
-rw-r--r--kernel/cgroup/rdma.c35
-rw-r--r--kernel/cgroup/rstat.c416
-rw-r--r--kernel/cgroup/stat.c338
6 files changed, 512 insertions, 395 deletions
diff --git a/kernel/cgroup/Makefile b/kernel/cgroup/Makefile
index 2be89a003185..bfcdae896122 100644
--- a/kernel/cgroup/Makefile
+++ b/kernel/cgroup/Makefile
@@ -1,5 +1,5 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2obj-y := cgroup.o stat.o namespace.o cgroup-v1.o 2obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o
3 3
4obj-$(CONFIG_CGROUP_FREEZER) += freezer.o 4obj-$(CONFIG_CGROUP_FREEZER) += freezer.o
5obj-$(CONFIG_CGROUP_PIDS) += pids.o 5obj-$(CONFIG_CGROUP_PIDS) += pids.o
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index 0808a33d16d3..77ff1cd6a252 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -201,13 +201,12 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
201int cgroup_task_count(const struct cgroup *cgrp); 201int cgroup_task_count(const struct cgroup *cgrp);
202 202
203/* 203/*
204 * stat.c 204 * rstat.c
205 */ 205 */
206void cgroup_stat_flush(struct cgroup *cgrp); 206int cgroup_rstat_init(struct cgroup *cgrp);
207int cgroup_stat_init(struct cgroup *cgrp); 207void cgroup_rstat_exit(struct cgroup *cgrp);
208void cgroup_stat_exit(struct cgroup *cgrp); 208void cgroup_rstat_boot(void);
209void cgroup_stat_show_cputime(struct seq_file *seq); 209void cgroup_base_stat_cputime_show(struct seq_file *seq);
210void cgroup_stat_boot(void);
211 210
212/* 211/*
213 * namespace.c 212 * namespace.c
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 12883656e63e..acb66713f9b6 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -54,6 +54,7 @@
54#include <linux/proc_ns.h> 54#include <linux/proc_ns.h>
55#include <linux/nsproxy.h> 55#include <linux/nsproxy.h>
56#include <linux/file.h> 56#include <linux/file.h>
57#include <linux/sched/cputime.h>
57#include <net/sock.h> 58#include <net/sock.h>
58 59
59#define CREATE_TRACE_POINTS 60#define CREATE_TRACE_POINTS
@@ -61,6 +62,8 @@
61 62
62#define CGROUP_FILE_NAME_MAX (MAX_CGROUP_TYPE_NAMELEN + \ 63#define CGROUP_FILE_NAME_MAX (MAX_CGROUP_TYPE_NAMELEN + \
63 MAX_CFTYPE_NAME + 2) 64 MAX_CFTYPE_NAME + 2)
65/* let's not notify more than 100 times per second */
66#define CGROUP_FILE_NOTIFY_MIN_INTV DIV_ROUND_UP(HZ, 100)
64 67
65/* 68/*
66 * cgroup_mutex is the master lock. Any modification to cgroup or its 69 * cgroup_mutex is the master lock. Any modification to cgroup or its
@@ -142,14 +145,14 @@ static struct static_key_true *cgroup_subsys_on_dfl_key[] = {
142}; 145};
143#undef SUBSYS 146#undef SUBSYS
144 147
145static DEFINE_PER_CPU(struct cgroup_cpu_stat, cgrp_dfl_root_cpu_stat); 148static DEFINE_PER_CPU(struct cgroup_rstat_cpu, cgrp_dfl_root_rstat_cpu);
146 149
147/* 150/*
148 * The default hierarchy, reserved for the subsystems that are otherwise 151 * The default hierarchy, reserved for the subsystems that are otherwise
149 * unattached - it never has more than a single cgroup, and all tasks are 152 * unattached - it never has more than a single cgroup, and all tasks are
150 * part of that cgroup. 153 * part of that cgroup.
151 */ 154 */
152struct cgroup_root cgrp_dfl_root = { .cgrp.cpu_stat = &cgrp_dfl_root_cpu_stat }; 155struct cgroup_root cgrp_dfl_root = { .cgrp.rstat_cpu = &cgrp_dfl_root_rstat_cpu };
153EXPORT_SYMBOL_GPL(cgrp_dfl_root); 156EXPORT_SYMBOL_GPL(cgrp_dfl_root);
154 157
155/* 158/*
@@ -1554,6 +1557,8 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
1554 spin_lock_irq(&cgroup_file_kn_lock); 1557 spin_lock_irq(&cgroup_file_kn_lock);
1555 cfile->kn = NULL; 1558 cfile->kn = NULL;
1556 spin_unlock_irq(&cgroup_file_kn_lock); 1559 spin_unlock_irq(&cgroup_file_kn_lock);
1560
1561 del_timer_sync(&cfile->notify_timer);
1557 } 1562 }
1558 1563
1559 kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name)); 1564 kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
@@ -1573,8 +1578,17 @@ static void css_clear_dir(struct cgroup_subsys_state *css)
1573 1578
1574 css->flags &= ~CSS_VISIBLE; 1579 css->flags &= ~CSS_VISIBLE;
1575 1580
1576 list_for_each_entry(cfts, &css->ss->cfts, node) 1581 if (!css->ss) {
1582 if (cgroup_on_dfl(cgrp))
1583 cfts = cgroup_base_files;
1584 else
1585 cfts = cgroup1_base_files;
1586
1577 cgroup_addrm_files(css, cgrp, cfts, false); 1587 cgroup_addrm_files(css, cgrp, cfts, false);
1588 } else {
1589 list_for_each_entry(cfts, &css->ss->cfts, node)
1590 cgroup_addrm_files(css, cgrp, cfts, false);
1591 }
1578} 1592}
1579 1593
1580/** 1594/**
@@ -1598,14 +1612,16 @@ static int css_populate_dir(struct cgroup_subsys_state *css)
1598 else 1612 else
1599 cfts = cgroup1_base_files; 1613 cfts = cgroup1_base_files;
1600 1614
1601 return cgroup_addrm_files(&cgrp->self, cgrp, cfts, true); 1615 ret = cgroup_addrm_files(&cgrp->self, cgrp, cfts, true);
1602 } 1616 if (ret < 0)
1603 1617 return ret;
1604 list_for_each_entry(cfts, &css->ss->cfts, node) { 1618 } else {
1605 ret = cgroup_addrm_files(css, cgrp, cfts, true); 1619 list_for_each_entry(cfts, &css->ss->cfts, node) {
1606 if (ret < 0) { 1620 ret = cgroup_addrm_files(css, cgrp, cfts, true);
1607 failed_cfts = cfts; 1621 if (ret < 0) {
1608 goto err; 1622 failed_cfts = cfts;
1623 goto err;
1624 }
1609 } 1625 }
1610 } 1626 }
1611 1627
@@ -1782,13 +1798,6 @@ static void cgroup_enable_task_cg_lists(void)
1782{ 1798{
1783 struct task_struct *p, *g; 1799 struct task_struct *p, *g;
1784 1800
1785 spin_lock_irq(&css_set_lock);
1786
1787 if (use_task_css_set_links)
1788 goto out_unlock;
1789
1790 use_task_css_set_links = true;
1791
1792 /* 1801 /*
1793 * We need tasklist_lock because RCU is not safe against 1802 * We need tasklist_lock because RCU is not safe against
1794 * while_each_thread(). Besides, a forking task that has passed 1803 * while_each_thread(). Besides, a forking task that has passed
@@ -1797,6 +1806,13 @@ static void cgroup_enable_task_cg_lists(void)
1797 * tasklist if we walk through it with RCU. 1806 * tasklist if we walk through it with RCU.
1798 */ 1807 */
1799 read_lock(&tasklist_lock); 1808 read_lock(&tasklist_lock);
1809 spin_lock_irq(&css_set_lock);
1810
1811 if (use_task_css_set_links)
1812 goto out_unlock;
1813
1814 use_task_css_set_links = true;
1815
1800 do_each_thread(g, p) { 1816 do_each_thread(g, p) {
1801 WARN_ON_ONCE(!list_empty(&p->cg_list) || 1817 WARN_ON_ONCE(!list_empty(&p->cg_list) ||
1802 task_css_set(p) != &init_css_set); 1818 task_css_set(p) != &init_css_set);
@@ -1824,9 +1840,9 @@ static void cgroup_enable_task_cg_lists(void)
1824 } 1840 }
1825 spin_unlock(&p->sighand->siglock); 1841 spin_unlock(&p->sighand->siglock);
1826 } while_each_thread(g, p); 1842 } while_each_thread(g, p);
1827 read_unlock(&tasklist_lock);
1828out_unlock: 1843out_unlock:
1829 spin_unlock_irq(&css_set_lock); 1844 spin_unlock_irq(&css_set_lock);
1845 read_unlock(&tasklist_lock);
1830} 1846}
1831 1847
1832static void init_cgroup_housekeeping(struct cgroup *cgrp) 1848static void init_cgroup_housekeeping(struct cgroup *cgrp)
@@ -1844,6 +1860,8 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
1844 cgrp->dom_cgrp = cgrp; 1860 cgrp->dom_cgrp = cgrp;
1845 cgrp->max_descendants = INT_MAX; 1861 cgrp->max_descendants = INT_MAX;
1846 cgrp->max_depth = INT_MAX; 1862 cgrp->max_depth = INT_MAX;
1863 INIT_LIST_HEAD(&cgrp->rstat_css_list);
1864 prev_cputime_init(&cgrp->prev_cputime);
1847 1865
1848 for_each_subsys(ss, ssid) 1866 for_each_subsys(ss, ssid)
1849 INIT_LIST_HEAD(&cgrp->e_csets[ssid]); 1867 INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
@@ -3381,7 +3399,7 @@ static int cpu_stat_show(struct seq_file *seq, void *v)
3381 struct cgroup __maybe_unused *cgrp = seq_css(seq)->cgroup; 3399 struct cgroup __maybe_unused *cgrp = seq_css(seq)->cgroup;
3382 int ret = 0; 3400 int ret = 0;
3383 3401
3384 cgroup_stat_show_cputime(seq); 3402 cgroup_base_stat_cputime_show(seq);
3385#ifdef CONFIG_CGROUP_SCHED 3403#ifdef CONFIG_CGROUP_SCHED
3386 ret = cgroup_extra_stat_show(seq, cgrp, cpu_cgrp_id); 3404 ret = cgroup_extra_stat_show(seq, cgrp, cpu_cgrp_id);
3387#endif 3405#endif
@@ -3521,6 +3539,12 @@ static int cgroup_kn_set_ugid(struct kernfs_node *kn)
3521 return kernfs_setattr(kn, &iattr); 3539 return kernfs_setattr(kn, &iattr);
3522} 3540}
3523 3541
3542static void cgroup_file_notify_timer(struct timer_list *timer)
3543{
3544 cgroup_file_notify(container_of(timer, struct cgroup_file,
3545 notify_timer));
3546}
3547
3524static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp, 3548static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
3525 struct cftype *cft) 3549 struct cftype *cft)
3526{ 3550{
@@ -3547,6 +3571,8 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
3547 if (cft->file_offset) { 3571 if (cft->file_offset) {
3548 struct cgroup_file *cfile = (void *)css + cft->file_offset; 3572 struct cgroup_file *cfile = (void *)css + cft->file_offset;
3549 3573
3574 timer_setup(&cfile->notify_timer, cgroup_file_notify_timer, 0);
3575
3550 spin_lock_irq(&cgroup_file_kn_lock); 3576 spin_lock_irq(&cgroup_file_kn_lock);
3551 cfile->kn = kn; 3577 cfile->kn = kn;
3552 spin_unlock_irq(&cgroup_file_kn_lock); 3578 spin_unlock_irq(&cgroup_file_kn_lock);
@@ -3796,8 +3822,17 @@ void cgroup_file_notify(struct cgroup_file *cfile)
3796 unsigned long flags; 3822 unsigned long flags;
3797 3823
3798 spin_lock_irqsave(&cgroup_file_kn_lock, flags); 3824 spin_lock_irqsave(&cgroup_file_kn_lock, flags);
3799 if (cfile->kn) 3825 if (cfile->kn) {
3800 kernfs_notify(cfile->kn); 3826 unsigned long last = cfile->notified_at;
3827 unsigned long next = last + CGROUP_FILE_NOTIFY_MIN_INTV;
3828
3829 if (time_in_range(jiffies, last, next)) {
3830 timer_reduce(&cfile->notify_timer, next);
3831 } else {
3832 kernfs_notify(cfile->kn);
3833 cfile->notified_at = jiffies;
3834 }
3835 }
3801 spin_unlock_irqrestore(&cgroup_file_kn_lock, flags); 3836 spin_unlock_irqrestore(&cgroup_file_kn_lock, flags);
3802} 3837}
3803 3838
@@ -4560,7 +4595,7 @@ static void css_free_rwork_fn(struct work_struct *work)
4560 cgroup_put(cgroup_parent(cgrp)); 4595 cgroup_put(cgroup_parent(cgrp));
4561 kernfs_put(cgrp->kn); 4596 kernfs_put(cgrp->kn);
4562 if (cgroup_on_dfl(cgrp)) 4597 if (cgroup_on_dfl(cgrp))
4563 cgroup_stat_exit(cgrp); 4598 cgroup_rstat_exit(cgrp);
4564 kfree(cgrp); 4599 kfree(cgrp);
4565 } else { 4600 } else {
4566 /* 4601 /*
@@ -4587,6 +4622,11 @@ static void css_release_work_fn(struct work_struct *work)
4587 4622
4588 if (ss) { 4623 if (ss) {
4589 /* css release path */ 4624 /* css release path */
4625 if (!list_empty(&css->rstat_css_node)) {
4626 cgroup_rstat_flush(cgrp);
4627 list_del_rcu(&css->rstat_css_node);
4628 }
4629
4590 cgroup_idr_replace(&ss->css_idr, NULL, css->id); 4630 cgroup_idr_replace(&ss->css_idr, NULL, css->id);
4591 if (ss->css_released) 4631 if (ss->css_released)
4592 ss->css_released(css); 4632 ss->css_released(css);
@@ -4597,7 +4637,7 @@ static void css_release_work_fn(struct work_struct *work)
4597 trace_cgroup_release(cgrp); 4637 trace_cgroup_release(cgrp);
4598 4638
4599 if (cgroup_on_dfl(cgrp)) 4639 if (cgroup_on_dfl(cgrp))
4600 cgroup_stat_flush(cgrp); 4640 cgroup_rstat_flush(cgrp);
4601 4641
4602 for (tcgrp = cgroup_parent(cgrp); tcgrp; 4642 for (tcgrp = cgroup_parent(cgrp); tcgrp;
4603 tcgrp = cgroup_parent(tcgrp)) 4643 tcgrp = cgroup_parent(tcgrp))
@@ -4648,6 +4688,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
4648 css->id = -1; 4688 css->id = -1;
4649 INIT_LIST_HEAD(&css->sibling); 4689 INIT_LIST_HEAD(&css->sibling);
4650 INIT_LIST_HEAD(&css->children); 4690 INIT_LIST_HEAD(&css->children);
4691 INIT_LIST_HEAD(&css->rstat_css_node);
4651 css->serial_nr = css_serial_nr_next++; 4692 css->serial_nr = css_serial_nr_next++;
4652 atomic_set(&css->online_cnt, 0); 4693 atomic_set(&css->online_cnt, 0);
4653 4694
@@ -4656,6 +4697,9 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
4656 css_get(css->parent); 4697 css_get(css->parent);
4657 } 4698 }
4658 4699
4700 if (cgroup_on_dfl(cgrp) && ss->css_rstat_flush)
4701 list_add_rcu(&css->rstat_css_node, &cgrp->rstat_css_list);
4702
4659 BUG_ON(cgroup_css(cgrp, ss)); 4703 BUG_ON(cgroup_css(cgrp, ss));
4660} 4704}
4661 4705
@@ -4757,6 +4801,7 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
4757err_list_del: 4801err_list_del:
4758 list_del_rcu(&css->sibling); 4802 list_del_rcu(&css->sibling);
4759err_free_css: 4803err_free_css:
4804 list_del_rcu(&css->rstat_css_node);
4760 INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn); 4805 INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
4761 queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork); 4806 queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
4762 return ERR_PTR(err); 4807 return ERR_PTR(err);
@@ -4785,7 +4830,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
4785 goto out_free_cgrp; 4830 goto out_free_cgrp;
4786 4831
4787 if (cgroup_on_dfl(parent)) { 4832 if (cgroup_on_dfl(parent)) {
4788 ret = cgroup_stat_init(cgrp); 4833 ret = cgroup_rstat_init(cgrp);
4789 if (ret) 4834 if (ret)
4790 goto out_cancel_ref; 4835 goto out_cancel_ref;
4791 } 4836 }
@@ -4850,7 +4895,7 @@ out_idr_free:
4850 cgroup_idr_remove(&root->cgroup_idr, cgrp->id); 4895 cgroup_idr_remove(&root->cgroup_idr, cgrp->id);
4851out_stat_exit: 4896out_stat_exit:
4852 if (cgroup_on_dfl(parent)) 4897 if (cgroup_on_dfl(parent))
4853 cgroup_stat_exit(cgrp); 4898 cgroup_rstat_exit(cgrp);
4854out_cancel_ref: 4899out_cancel_ref:
4855 percpu_ref_exit(&cgrp->self.refcnt); 4900 percpu_ref_exit(&cgrp->self.refcnt);
4856out_free_cgrp: 4901out_free_cgrp:
@@ -5090,10 +5135,8 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
5090 for_each_css(css, ssid, cgrp) 5135 for_each_css(css, ssid, cgrp)
5091 kill_css(css); 5136 kill_css(css);
5092 5137
5093 /* 5138 /* clear and remove @cgrp dir, @cgrp has an extra ref on its kn */
5094 * Remove @cgrp directory along with the base files. @cgrp has an 5139 css_clear_dir(&cgrp->self);
5095 * extra ref on its kn.
5096 */
5097 kernfs_remove(cgrp->kn); 5140 kernfs_remove(cgrp->kn);
5098 5141
5099 if (parent && cgroup_is_threaded(cgrp)) 5142 if (parent && cgroup_is_threaded(cgrp))
@@ -5245,7 +5288,7 @@ int __init cgroup_init(void)
5245 BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files)); 5288 BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files));
5246 BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files)); 5289 BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files));
5247 5290
5248 cgroup_stat_boot(); 5291 cgroup_rstat_boot();
5249 5292
5250 /* 5293 /*
5251 * The latency of the synchronize_sched() is too high for cgroups, 5294 * The latency of the synchronize_sched() is too high for cgroups,
diff --git a/kernel/cgroup/rdma.c b/kernel/cgroup/rdma.c
index defad3c5e7dc..d3bbb757ee49 100644
--- a/kernel/cgroup/rdma.c
+++ b/kernel/cgroup/rdma.c
@@ -362,35 +362,32 @@ EXPORT_SYMBOL(rdmacg_unregister_device);
362static int parse_resource(char *c, int *intval) 362static int parse_resource(char *c, int *intval)
363{ 363{
364 substring_t argstr; 364 substring_t argstr;
365 const char **table = &rdmacg_resource_names[0];
366 char *name, *value = c; 365 char *name, *value = c;
367 size_t len; 366 size_t len;
368 int ret, i = 0; 367 int ret, i;
369 368
370 name = strsep(&value, "="); 369 name = strsep(&value, "=");
371 if (!name || !value) 370 if (!name || !value)
372 return -EINVAL; 371 return -EINVAL;
373 372
374 len = strlen(value); 373 i = match_string(rdmacg_resource_names, RDMACG_RESOURCE_MAX, name);
374 if (i < 0)
375 return i;
375 376
376 for (i = 0; i < RDMACG_RESOURCE_MAX; i++) { 377 len = strlen(value);
377 if (strcmp(table[i], name))
378 continue;
379 378
380 argstr.from = value; 379 argstr.from = value;
381 argstr.to = value + len; 380 argstr.to = value + len;
382 381
383 ret = match_int(&argstr, intval); 382 ret = match_int(&argstr, intval);
384 if (ret >= 0) { 383 if (ret >= 0) {
385 if (*intval < 0) 384 if (*intval < 0)
386 break; 385 return -EINVAL;
387 return i; 386 return i;
388 } 387 }
389 if (strncmp(value, RDMACG_MAX_STR, len) == 0) { 388 if (strncmp(value, RDMACG_MAX_STR, len) == 0) {
390 *intval = S32_MAX; 389 *intval = S32_MAX;
391 return i; 390 return i;
392 }
393 break;
394 } 391 }
395 return -EINVAL; 392 return -EINVAL;
396} 393}
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
new file mode 100644
index 000000000000..d503d1a9007c
--- /dev/null
+++ b/kernel/cgroup/rstat.c
@@ -0,0 +1,416 @@
1#include "cgroup-internal.h"
2
3#include <linux/sched/cputime.h>
4
5static DEFINE_SPINLOCK(cgroup_rstat_lock);
6static DEFINE_PER_CPU(raw_spinlock_t, cgroup_rstat_cpu_lock);
7
8static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu);
9
10static struct cgroup_rstat_cpu *cgroup_rstat_cpu(struct cgroup *cgrp, int cpu)
11{
12 return per_cpu_ptr(cgrp->rstat_cpu, cpu);
13}
14
15/**
16 * cgroup_rstat_updated - keep track of updated rstat_cpu
17 * @cgrp: target cgroup
18 * @cpu: cpu on which rstat_cpu was updated
19 *
20 * @cgrp's rstat_cpu on @cpu was updated. Put it on the parent's matching
21 * rstat_cpu->updated_children list. See the comment on top of
22 * cgroup_rstat_cpu definition for details.
23 */
24void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
25{
26 raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu);
27 struct cgroup *parent;
28 unsigned long flags;
29
30 /* nothing to do for root */
31 if (!cgroup_parent(cgrp))
32 return;
33
34 /*
35 * Paired with the one in cgroup_rstat_cpu_pop_upated(). Either we
36 * see NULL updated_next or they see our updated stat.
37 */
38 smp_mb();
39
40 /*
41 * Because @parent's updated_children is terminated with @parent
42 * instead of NULL, we can tell whether @cgrp is on the list by
43 * testing the next pointer for NULL.
44 */
45 if (cgroup_rstat_cpu(cgrp, cpu)->updated_next)
46 return;
47
48 raw_spin_lock_irqsave(cpu_lock, flags);
49
50 /* put @cgrp and all ancestors on the corresponding updated lists */
51 for (parent = cgroup_parent(cgrp); parent;
52 cgrp = parent, parent = cgroup_parent(cgrp)) {
53 struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
54 struct cgroup_rstat_cpu *prstatc = cgroup_rstat_cpu(parent, cpu);
55
56 /*
57 * Both additions and removals are bottom-up. If a cgroup
58 * is already in the tree, all ancestors are.
59 */
60 if (rstatc->updated_next)
61 break;
62
63 rstatc->updated_next = prstatc->updated_children;
64 prstatc->updated_children = cgrp;
65 }
66
67 raw_spin_unlock_irqrestore(cpu_lock, flags);
68}
69EXPORT_SYMBOL_GPL(cgroup_rstat_updated);
70
71/**
72 * cgroup_rstat_cpu_pop_updated - iterate and dismantle rstat_cpu updated tree
73 * @pos: current position
74 * @root: root of the tree to traversal
75 * @cpu: target cpu
76 *
77 * Walks the udpated rstat_cpu tree on @cpu from @root. %NULL @pos starts
78 * the traversal and %NULL return indicates the end. During traversal,
79 * each returned cgroup is unlinked from the tree. Must be called with the
80 * matching cgroup_rstat_cpu_lock held.
81 *
82 * The only ordering guarantee is that, for a parent and a child pair
83 * covered by a given traversal, if a child is visited, its parent is
84 * guaranteed to be visited afterwards.
85 */
86static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos,
87 struct cgroup *root, int cpu)
88{
89 struct cgroup_rstat_cpu *rstatc;
90 struct cgroup *parent;
91
92 if (pos == root)
93 return NULL;
94
95 /*
96 * We're gonna walk down to the first leaf and visit/remove it. We
97 * can pick whatever unvisited node as the starting point.
98 */
99 if (!pos)
100 pos = root;
101 else
102 pos = cgroup_parent(pos);
103
104 /* walk down to the first leaf */
105 while (true) {
106 rstatc = cgroup_rstat_cpu(pos, cpu);
107 if (rstatc->updated_children == pos)
108 break;
109 pos = rstatc->updated_children;
110 }
111
112 /*
113 * Unlink @pos from the tree. As the updated_children list is
114 * singly linked, we have to walk it to find the removal point.
115 * However, due to the way we traverse, @pos will be the first
116 * child in most cases. The only exception is @root.
117 */
118 parent = cgroup_parent(pos);
119 if (parent && rstatc->updated_next) {
120 struct cgroup_rstat_cpu *prstatc = cgroup_rstat_cpu(parent, cpu);
121 struct cgroup_rstat_cpu *nrstatc;
122 struct cgroup **nextp;
123
124 nextp = &prstatc->updated_children;
125 while (true) {
126 nrstatc = cgroup_rstat_cpu(*nextp, cpu);
127 if (*nextp == pos)
128 break;
129
130 WARN_ON_ONCE(*nextp == parent);
131 nextp = &nrstatc->updated_next;
132 }
133
134 *nextp = rstatc->updated_next;
135 rstatc->updated_next = NULL;
136
137 /*
138 * Paired with the one in cgroup_rstat_cpu_updated().
139 * Either they see NULL updated_next or we see their
140 * updated stat.
141 */
142 smp_mb();
143 }
144
145 return pos;
146}
147
148/* see cgroup_rstat_flush() */
149static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep)
150 __releases(&cgroup_rstat_lock) __acquires(&cgroup_rstat_lock)
151{
152 int cpu;
153
154 lockdep_assert_held(&cgroup_rstat_lock);
155
156 for_each_possible_cpu(cpu) {
157 raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock,
158 cpu);
159 struct cgroup *pos = NULL;
160
161 raw_spin_lock(cpu_lock);
162 while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu))) {
163 struct cgroup_subsys_state *css;
164
165 cgroup_base_stat_flush(pos, cpu);
166
167 rcu_read_lock();
168 list_for_each_entry_rcu(css, &pos->rstat_css_list,
169 rstat_css_node)
170 css->ss->css_rstat_flush(css, cpu);
171 rcu_read_unlock();
172 }
173 raw_spin_unlock(cpu_lock);
174
175 /* if @may_sleep, play nice and yield if necessary */
176 if (may_sleep && (need_resched() ||
177 spin_needbreak(&cgroup_rstat_lock))) {
178 spin_unlock_irq(&cgroup_rstat_lock);
179 if (!cond_resched())
180 cpu_relax();
181 spin_lock_irq(&cgroup_rstat_lock);
182 }
183 }
184}
185
186/**
187 * cgroup_rstat_flush - flush stats in @cgrp's subtree
188 * @cgrp: target cgroup
189 *
190 * Collect all per-cpu stats in @cgrp's subtree into the global counters
191 * and propagate them upwards. After this function returns, all cgroups in
192 * the subtree have up-to-date ->stat.
193 *
194 * This also gets all cgroups in the subtree including @cgrp off the
195 * ->updated_children lists.
196 *
197 * This function may block.
198 */
199void cgroup_rstat_flush(struct cgroup *cgrp)
200{
201 might_sleep();
202
203 spin_lock_irq(&cgroup_rstat_lock);
204 cgroup_rstat_flush_locked(cgrp, true);
205 spin_unlock_irq(&cgroup_rstat_lock);
206}
207
208/**
209 * cgroup_rstat_flush_irqsafe - irqsafe version of cgroup_rstat_flush()
210 * @cgrp: target cgroup
211 *
212 * This function can be called from any context.
213 */
214void cgroup_rstat_flush_irqsafe(struct cgroup *cgrp)
215{
216 unsigned long flags;
217
218 spin_lock_irqsave(&cgroup_rstat_lock, flags);
219 cgroup_rstat_flush_locked(cgrp, false);
220 spin_unlock_irqrestore(&cgroup_rstat_lock, flags);
221}
222
223/**
224 * cgroup_rstat_flush_begin - flush stats in @cgrp's subtree and hold
225 * @cgrp: target cgroup
226 *
227 * Flush stats in @cgrp's subtree and prevent further flushes. Must be
228 * paired with cgroup_rstat_flush_release().
229 *
230 * This function may block.
231 */
232void cgroup_rstat_flush_hold(struct cgroup *cgrp)
233 __acquires(&cgroup_rstat_lock)
234{
235 might_sleep();
236 spin_lock_irq(&cgroup_rstat_lock);
237 cgroup_rstat_flush_locked(cgrp, true);
238}
239
240/**
241 * cgroup_rstat_flush_release - release cgroup_rstat_flush_hold()
242 */
243void cgroup_rstat_flush_release(void)
244 __releases(&cgroup_rstat_lock)
245{
246 spin_unlock_irq(&cgroup_rstat_lock);
247}
248
249int cgroup_rstat_init(struct cgroup *cgrp)
250{
251 int cpu;
252
253 /* the root cgrp has rstat_cpu preallocated */
254 if (!cgrp->rstat_cpu) {
255 cgrp->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu);
256 if (!cgrp->rstat_cpu)
257 return -ENOMEM;
258 }
259
260 /* ->updated_children list is self terminated */
261 for_each_possible_cpu(cpu) {
262 struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
263
264 rstatc->updated_children = cgrp;
265 u64_stats_init(&rstatc->bsync);
266 }
267
268 return 0;
269}
270
271void cgroup_rstat_exit(struct cgroup *cgrp)
272{
273 int cpu;
274
275 cgroup_rstat_flush(cgrp);
276
277 /* sanity check */
278 for_each_possible_cpu(cpu) {
279 struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
280
281 if (WARN_ON_ONCE(rstatc->updated_children != cgrp) ||
282 WARN_ON_ONCE(rstatc->updated_next))
283 return;
284 }
285
286 free_percpu(cgrp->rstat_cpu);
287 cgrp->rstat_cpu = NULL;
288}
289
290void __init cgroup_rstat_boot(void)
291{
292 int cpu;
293
294 for_each_possible_cpu(cpu)
295 raw_spin_lock_init(per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu));
296
297 BUG_ON(cgroup_rstat_init(&cgrp_dfl_root.cgrp));
298}
299
300/*
301 * Functions for cgroup basic resource statistics implemented on top of
302 * rstat.
303 */
304static void cgroup_base_stat_accumulate(struct cgroup_base_stat *dst_bstat,
305 struct cgroup_base_stat *src_bstat)
306{
307 dst_bstat->cputime.utime += src_bstat->cputime.utime;
308 dst_bstat->cputime.stime += src_bstat->cputime.stime;
309 dst_bstat->cputime.sum_exec_runtime += src_bstat->cputime.sum_exec_runtime;
310}
311
312static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu)
313{
314 struct cgroup *parent = cgroup_parent(cgrp);
315 struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
316 struct task_cputime *last_cputime = &rstatc->last_bstat.cputime;
317 struct task_cputime cputime;
318 struct cgroup_base_stat delta;
319 unsigned seq;
320
321 /* fetch the current per-cpu values */
322 do {
323 seq = __u64_stats_fetch_begin(&rstatc->bsync);
324 cputime = rstatc->bstat.cputime;
325 } while (__u64_stats_fetch_retry(&rstatc->bsync, seq));
326
327 /* calculate the delta to propgate */
328 delta.cputime.utime = cputime.utime - last_cputime->utime;
329 delta.cputime.stime = cputime.stime - last_cputime->stime;
330 delta.cputime.sum_exec_runtime = cputime.sum_exec_runtime -
331 last_cputime->sum_exec_runtime;
332 *last_cputime = cputime;
333
334 /* transfer the pending stat into delta */
335 cgroup_base_stat_accumulate(&delta, &cgrp->pending_bstat);
336 memset(&cgrp->pending_bstat, 0, sizeof(cgrp->pending_bstat));
337
338 /* propagate delta into the global stat and the parent's pending */
339 cgroup_base_stat_accumulate(&cgrp->bstat, &delta);
340 if (parent)
341 cgroup_base_stat_accumulate(&parent->pending_bstat, &delta);
342}
343
344static struct cgroup_rstat_cpu *
345cgroup_base_stat_cputime_account_begin(struct cgroup *cgrp)
346{
347 struct cgroup_rstat_cpu *rstatc;
348
349 rstatc = get_cpu_ptr(cgrp->rstat_cpu);
350 u64_stats_update_begin(&rstatc->bsync);
351 return rstatc;
352}
353
354static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp,
355 struct cgroup_rstat_cpu *rstatc)
356{
357 u64_stats_update_end(&rstatc->bsync);
358 cgroup_rstat_updated(cgrp, smp_processor_id());
359 put_cpu_ptr(rstatc);
360}
361
362void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec)
363{
364 struct cgroup_rstat_cpu *rstatc;
365
366 rstatc = cgroup_base_stat_cputime_account_begin(cgrp);
367 rstatc->bstat.cputime.sum_exec_runtime += delta_exec;
368 cgroup_base_stat_cputime_account_end(cgrp, rstatc);
369}
370
371void __cgroup_account_cputime_field(struct cgroup *cgrp,
372 enum cpu_usage_stat index, u64 delta_exec)
373{
374 struct cgroup_rstat_cpu *rstatc;
375
376 rstatc = cgroup_base_stat_cputime_account_begin(cgrp);
377
378 switch (index) {
379 case CPUTIME_USER:
380 case CPUTIME_NICE:
381 rstatc->bstat.cputime.utime += delta_exec;
382 break;
383 case CPUTIME_SYSTEM:
384 case CPUTIME_IRQ:
385 case CPUTIME_SOFTIRQ:
386 rstatc->bstat.cputime.stime += delta_exec;
387 break;
388 default:
389 break;
390 }
391
392 cgroup_base_stat_cputime_account_end(cgrp, rstatc);
393}
394
395void cgroup_base_stat_cputime_show(struct seq_file *seq)
396{
397 struct cgroup *cgrp = seq_css(seq)->cgroup;
398 u64 usage, utime, stime;
399
400 if (!cgroup_parent(cgrp))
401 return;
402
403 cgroup_rstat_flush_hold(cgrp);
404 usage = cgrp->bstat.cputime.sum_exec_runtime;
405 cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime, &utime, &stime);
406 cgroup_rstat_flush_release();
407
408 do_div(usage, NSEC_PER_USEC);
409 do_div(utime, NSEC_PER_USEC);
410 do_div(stime, NSEC_PER_USEC);
411
412 seq_printf(seq, "usage_usec %llu\n"
413 "user_usec %llu\n"
414 "system_usec %llu\n",
415 usage, utime, stime);
416}
diff --git a/kernel/cgroup/stat.c b/kernel/cgroup/stat.c
deleted file mode 100644
index 1e111dd455c4..000000000000
--- a/kernel/cgroup/stat.c
+++ /dev/null
@@ -1,338 +0,0 @@
1#include "cgroup-internal.h"
2
3#include <linux/sched/cputime.h>
4
5static DEFINE_MUTEX(cgroup_stat_mutex);
6static DEFINE_PER_CPU(raw_spinlock_t, cgroup_cpu_stat_lock);
7
8static struct cgroup_cpu_stat *cgroup_cpu_stat(struct cgroup *cgrp, int cpu)
9{
10 return per_cpu_ptr(cgrp->cpu_stat, cpu);
11}
12
13/**
14 * cgroup_cpu_stat_updated - keep track of updated cpu_stat
15 * @cgrp: target cgroup
16 * @cpu: cpu on which cpu_stat was updated
17 *
18 * @cgrp's cpu_stat on @cpu was updated. Put it on the parent's matching
19 * cpu_stat->updated_children list. See the comment on top of
20 * cgroup_cpu_stat definition for details.
21 */
22static void cgroup_cpu_stat_updated(struct cgroup *cgrp, int cpu)
23{
24 raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_cpu_stat_lock, cpu);
25 struct cgroup *parent;
26 unsigned long flags;
27
28 /*
29 * Speculative already-on-list test. This may race leading to
30 * temporary inaccuracies, which is fine.
31 *
32 * Because @parent's updated_children is terminated with @parent
33 * instead of NULL, we can tell whether @cgrp is on the list by
34 * testing the next pointer for NULL.
35 */
36 if (cgroup_cpu_stat(cgrp, cpu)->updated_next)
37 return;
38
39 raw_spin_lock_irqsave(cpu_lock, flags);
40
41 /* put @cgrp and all ancestors on the corresponding updated lists */
42 for (parent = cgroup_parent(cgrp); parent;
43 cgrp = parent, parent = cgroup_parent(cgrp)) {
44 struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
45 struct cgroup_cpu_stat *pcstat = cgroup_cpu_stat(parent, cpu);
46
47 /*
48 * Both additions and removals are bottom-up. If a cgroup
49 * is already in the tree, all ancestors are.
50 */
51 if (cstat->updated_next)
52 break;
53
54 cstat->updated_next = pcstat->updated_children;
55 pcstat->updated_children = cgrp;
56 }
57
58 raw_spin_unlock_irqrestore(cpu_lock, flags);
59}
60
61/**
62 * cgroup_cpu_stat_pop_updated - iterate and dismantle cpu_stat updated tree
63 * @pos: current position
64 * @root: root of the tree to traversal
65 * @cpu: target cpu
66 *
67 * Walks the udpated cpu_stat tree on @cpu from @root. %NULL @pos starts
68 * the traversal and %NULL return indicates the end. During traversal,
69 * each returned cgroup is unlinked from the tree. Must be called with the
70 * matching cgroup_cpu_stat_lock held.
71 *
72 * The only ordering guarantee is that, for a parent and a child pair
73 * covered by a given traversal, if a child is visited, its parent is
74 * guaranteed to be visited afterwards.
75 */
76static struct cgroup *cgroup_cpu_stat_pop_updated(struct cgroup *pos,
77 struct cgroup *root, int cpu)
78{
79 struct cgroup_cpu_stat *cstat;
80 struct cgroup *parent;
81
82 if (pos == root)
83 return NULL;
84
85 /*
86 * We're gonna walk down to the first leaf and visit/remove it. We
87 * can pick whatever unvisited node as the starting point.
88 */
89 if (!pos)
90 pos = root;
91 else
92 pos = cgroup_parent(pos);
93
94 /* walk down to the first leaf */
95 while (true) {
96 cstat = cgroup_cpu_stat(pos, cpu);
97 if (cstat->updated_children == pos)
98 break;
99 pos = cstat->updated_children;
100 }
101
102 /*
103 * Unlink @pos from the tree. As the updated_children list is
104 * singly linked, we have to walk it to find the removal point.
105 * However, due to the way we traverse, @pos will be the first
106 * child in most cases. The only exception is @root.
107 */
108 parent = cgroup_parent(pos);
109 if (parent && cstat->updated_next) {
110 struct cgroup_cpu_stat *pcstat = cgroup_cpu_stat(parent, cpu);
111 struct cgroup_cpu_stat *ncstat;
112 struct cgroup **nextp;
113
114 nextp = &pcstat->updated_children;
115 while (true) {
116 ncstat = cgroup_cpu_stat(*nextp, cpu);
117 if (*nextp == pos)
118 break;
119
120 WARN_ON_ONCE(*nextp == parent);
121 nextp = &ncstat->updated_next;
122 }
123
124 *nextp = cstat->updated_next;
125 cstat->updated_next = NULL;
126 }
127
128 return pos;
129}
130
131static void cgroup_stat_accumulate(struct cgroup_stat *dst_stat,
132 struct cgroup_stat *src_stat)
133{
134 dst_stat->cputime.utime += src_stat->cputime.utime;
135 dst_stat->cputime.stime += src_stat->cputime.stime;
136 dst_stat->cputime.sum_exec_runtime += src_stat->cputime.sum_exec_runtime;
137}
138
139static void cgroup_cpu_stat_flush_one(struct cgroup *cgrp, int cpu)
140{
141 struct cgroup *parent = cgroup_parent(cgrp);
142 struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
143 struct task_cputime *last_cputime = &cstat->last_cputime;
144 struct task_cputime cputime;
145 struct cgroup_stat delta;
146 unsigned seq;
147
148 lockdep_assert_held(&cgroup_stat_mutex);
149
150 /* fetch the current per-cpu values */
151 do {
152 seq = __u64_stats_fetch_begin(&cstat->sync);
153 cputime = cstat->cputime;
154 } while (__u64_stats_fetch_retry(&cstat->sync, seq));
155
156 /* accumulate the deltas to propgate */
157 delta.cputime.utime = cputime.utime - last_cputime->utime;
158 delta.cputime.stime = cputime.stime - last_cputime->stime;
159 delta.cputime.sum_exec_runtime = cputime.sum_exec_runtime -
160 last_cputime->sum_exec_runtime;
161 *last_cputime = cputime;
162
163 /* transfer the pending stat into delta */
164 cgroup_stat_accumulate(&delta, &cgrp->pending_stat);
165 memset(&cgrp->pending_stat, 0, sizeof(cgrp->pending_stat));
166
167 /* propagate delta into the global stat and the parent's pending */
168 cgroup_stat_accumulate(&cgrp->stat, &delta);
169 if (parent)
170 cgroup_stat_accumulate(&parent->pending_stat, &delta);
171}
172
173/* see cgroup_stat_flush() */
174static void cgroup_stat_flush_locked(struct cgroup *cgrp)
175{
176 int cpu;
177
178 lockdep_assert_held(&cgroup_stat_mutex);
179
180 for_each_possible_cpu(cpu) {
181 raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_cpu_stat_lock, cpu);
182 struct cgroup *pos = NULL;
183
184 raw_spin_lock_irq(cpu_lock);
185 while ((pos = cgroup_cpu_stat_pop_updated(pos, cgrp, cpu)))
186 cgroup_cpu_stat_flush_one(pos, cpu);
187 raw_spin_unlock_irq(cpu_lock);
188 }
189}
190
191/**
192 * cgroup_stat_flush - flush stats in @cgrp's subtree
193 * @cgrp: target cgroup
194 *
195 * Collect all per-cpu stats in @cgrp's subtree into the global counters
196 * and propagate them upwards. After this function returns, all cgroups in
197 * the subtree have up-to-date ->stat.
198 *
199 * This also gets all cgroups in the subtree including @cgrp off the
200 * ->updated_children lists.
201 */
202void cgroup_stat_flush(struct cgroup *cgrp)
203{
204 mutex_lock(&cgroup_stat_mutex);
205 cgroup_stat_flush_locked(cgrp);
206 mutex_unlock(&cgroup_stat_mutex);
207}
208
209static struct cgroup_cpu_stat *cgroup_cpu_stat_account_begin(struct cgroup *cgrp)
210{
211 struct cgroup_cpu_stat *cstat;
212
213 cstat = get_cpu_ptr(cgrp->cpu_stat);
214 u64_stats_update_begin(&cstat->sync);
215 return cstat;
216}
217
218static void cgroup_cpu_stat_account_end(struct cgroup *cgrp,
219 struct cgroup_cpu_stat *cstat)
220{
221 u64_stats_update_end(&cstat->sync);
222 cgroup_cpu_stat_updated(cgrp, smp_processor_id());
223 put_cpu_ptr(cstat);
224}
225
226void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec)
227{
228 struct cgroup_cpu_stat *cstat;
229
230 cstat = cgroup_cpu_stat_account_begin(cgrp);
231 cstat->cputime.sum_exec_runtime += delta_exec;
232 cgroup_cpu_stat_account_end(cgrp, cstat);
233}
234
235void __cgroup_account_cputime_field(struct cgroup *cgrp,
236 enum cpu_usage_stat index, u64 delta_exec)
237{
238 struct cgroup_cpu_stat *cstat;
239
240 cstat = cgroup_cpu_stat_account_begin(cgrp);
241
242 switch (index) {
243 case CPUTIME_USER:
244 case CPUTIME_NICE:
245 cstat->cputime.utime += delta_exec;
246 break;
247 case CPUTIME_SYSTEM:
248 case CPUTIME_IRQ:
249 case CPUTIME_SOFTIRQ:
250 cstat->cputime.stime += delta_exec;
251 break;
252 default:
253 break;
254 }
255
256 cgroup_cpu_stat_account_end(cgrp, cstat);
257}
258
259void cgroup_stat_show_cputime(struct seq_file *seq)
260{
261 struct cgroup *cgrp = seq_css(seq)->cgroup;
262 u64 usage, utime, stime;
263
264 if (!cgroup_parent(cgrp))
265 return;
266
267 mutex_lock(&cgroup_stat_mutex);
268
269 cgroup_stat_flush_locked(cgrp);
270
271 usage = cgrp->stat.cputime.sum_exec_runtime;
272 cputime_adjust(&cgrp->stat.cputime, &cgrp->stat.prev_cputime,
273 &utime, &stime);
274
275 mutex_unlock(&cgroup_stat_mutex);
276
277 do_div(usage, NSEC_PER_USEC);
278 do_div(utime, NSEC_PER_USEC);
279 do_div(stime, NSEC_PER_USEC);
280
281 seq_printf(seq, "usage_usec %llu\n"
282 "user_usec %llu\n"
283 "system_usec %llu\n",
284 usage, utime, stime);
285}
286
287int cgroup_stat_init(struct cgroup *cgrp)
288{
289 int cpu;
290
291 /* the root cgrp has cpu_stat preallocated */
292 if (!cgrp->cpu_stat) {
293 cgrp->cpu_stat = alloc_percpu(struct cgroup_cpu_stat);
294 if (!cgrp->cpu_stat)
295 return -ENOMEM;
296 }
297
298 /* ->updated_children list is self terminated */
299 for_each_possible_cpu(cpu) {
300 struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
301
302 cstat->updated_children = cgrp;
303 u64_stats_init(&cstat->sync);
304 }
305
306 prev_cputime_init(&cgrp->stat.prev_cputime);
307
308 return 0;
309}
310
311void cgroup_stat_exit(struct cgroup *cgrp)
312{
313 int cpu;
314
315 cgroup_stat_flush(cgrp);
316
317 /* sanity check */
318 for_each_possible_cpu(cpu) {
319 struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
320
321 if (WARN_ON_ONCE(cstat->updated_children != cgrp) ||
322 WARN_ON_ONCE(cstat->updated_next))
323 return;
324 }
325
326 free_percpu(cgrp->cpu_stat);
327 cgrp->cpu_stat = NULL;
328}
329
330void __init cgroup_stat_boot(void)
331{
332 int cpu;
333
334 for_each_possible_cpu(cpu)
335 raw_spin_lock_init(per_cpu_ptr(&cgroup_cpu_stat_lock, cpu));
336
337 BUG_ON(cgroup_stat_init(&cgrp_dfl_root.cgrp));
338}