diff options
| -rw-r--r-- | include/linux/cgroup-defs.h | 52 | ||||
| -rw-r--r-- | include/linux/cgroup.h | 12 | ||||
| -rw-r--r-- | kernel/cgroup/Makefile | 2 | ||||
| -rw-r--r-- | kernel/cgroup/cgroup-internal.h | 11 | ||||
| -rw-r--r-- | kernel/cgroup/cgroup.c | 105 | ||||
| -rw-r--r-- | kernel/cgroup/rdma.c | 35 | ||||
| -rw-r--r-- | kernel/cgroup/rstat.c | 416 | ||||
| -rw-r--r-- | kernel/cgroup/stat.c | 338 |
8 files changed, 554 insertions, 417 deletions
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index dc5b70449dc6..c0e68f903011 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h | |||
| @@ -105,6 +105,8 @@ enum { | |||
| 105 | struct cgroup_file { | 105 | struct cgroup_file { |
| 106 | /* do not access any fields from outside cgroup core */ | 106 | /* do not access any fields from outside cgroup core */ |
| 107 | struct kernfs_node *kn; | 107 | struct kernfs_node *kn; |
| 108 | unsigned long notified_at; | ||
| 109 | struct timer_list notify_timer; | ||
| 108 | }; | 110 | }; |
| 109 | 111 | ||
| 110 | /* | 112 | /* |
| @@ -128,6 +130,9 @@ struct cgroup_subsys_state { | |||
| 128 | struct list_head sibling; | 130 | struct list_head sibling; |
| 129 | struct list_head children; | 131 | struct list_head children; |
| 130 | 132 | ||
| 133 | /* flush target list anchored at cgrp->rstat_css_list */ | ||
| 134 | struct list_head rstat_css_node; | ||
| 135 | |||
| 131 | /* | 136 | /* |
| 132 | * PI: Subsys-unique ID. 0 is unused and root is always 1. The | 137 | * PI: Subsys-unique ID. 0 is unused and root is always 1. The |
| 133 | * matching css can be looked up using css_from_id(). | 138 | * matching css can be looked up using css_from_id(). |
| @@ -256,12 +261,16 @@ struct css_set { | |||
| 256 | struct rcu_head rcu_head; | 261 | struct rcu_head rcu_head; |
| 257 | }; | 262 | }; |
| 258 | 263 | ||
| 264 | struct cgroup_base_stat { | ||
| 265 | struct task_cputime cputime; | ||
| 266 | }; | ||
| 267 | |||
| 259 | /* | 268 | /* |
| 260 | * cgroup basic resource usage statistics. Accounting is done per-cpu in | 269 | * rstat - cgroup scalable recursive statistics. Accounting is done |
| 261 | * cgroup_cpu_stat which is then lazily propagated up the hierarchy on | 270 | * per-cpu in cgroup_rstat_cpu which is then lazily propagated up the |
| 262 | * reads. | 271 | * hierarchy on reads. |
| 263 | * | 272 | * |
| 264 | * When a stat gets updated, the cgroup_cpu_stat and its ancestors are | 273 | * When a stat gets updated, the cgroup_rstat_cpu and its ancestors are |
| 265 | * linked into the updated tree. On the following read, propagation only | 274 | * linked into the updated tree. On the following read, propagation only |
| 266 | * considers and consumes the updated tree. This makes reading O(the | 275 | * considers and consumes the updated tree. This makes reading O(the |
| 267 | * number of descendants which have been active since last read) instead of | 276 | * number of descendants which have been active since last read) instead of |
| @@ -271,20 +280,24 @@ struct css_set { | |||
| 271 | * aren't active and stat may be read frequently. The combination can | 280 | * aren't active and stat may be read frequently. The combination can |
| 272 | * become very expensive. By propagating selectively, increasing reading | 281 | * become very expensive. By propagating selectively, increasing reading |
| 273 | * frequency decreases the cost of each read. | 282 | * frequency decreases the cost of each read. |
| 283 | * | ||
| 284 | * This struct hosts both the fields which implement the above - | ||
| 285 | * updated_children and updated_next - and the fields which track basic | ||
| 286 | * resource statistics on top of it - bsync, bstat and last_bstat. | ||
| 274 | */ | 287 | */ |
| 275 | struct cgroup_cpu_stat { | 288 | struct cgroup_rstat_cpu { |
| 276 | /* | 289 | /* |
| 277 | * ->sync protects all the current counters. These are the only | 290 | * ->bsync protects ->bstat. These are the only fields which get |
| 278 | * fields which get updated in the hot path. | 291 | * updated in the hot path. |
| 279 | */ | 292 | */ |
| 280 | struct u64_stats_sync sync; | 293 | struct u64_stats_sync bsync; |
| 281 | struct task_cputime cputime; | 294 | struct cgroup_base_stat bstat; |
| 282 | 295 | ||
| 283 | /* | 296 | /* |
| 284 | * Snapshots at the last reading. These are used to calculate the | 297 | * Snapshots at the last reading. These are used to calculate the |
| 285 | * deltas to propagate to the global counters. | 298 | * deltas to propagate to the global counters. |
| 286 | */ | 299 | */ |
| 287 | struct task_cputime last_cputime; | 300 | struct cgroup_base_stat last_bstat; |
| 288 | 301 | ||
| 289 | /* | 302 | /* |
| 290 | * Child cgroups with stat updates on this cpu since the last read | 303 | * Child cgroups with stat updates on this cpu since the last read |
| @@ -295,18 +308,12 @@ struct cgroup_cpu_stat { | |||
| 295 | * to the cgroup makes it unnecessary for each per-cpu struct to | 308 | * to the cgroup makes it unnecessary for each per-cpu struct to |
| 296 | * point back to the associated cgroup. | 309 | * point back to the associated cgroup. |
| 297 | * | 310 | * |
| 298 | * Protected by per-cpu cgroup_cpu_stat_lock. | 311 | * Protected by per-cpu cgroup_rstat_cpu_lock. |
| 299 | */ | 312 | */ |
| 300 | struct cgroup *updated_children; /* terminated by self cgroup */ | 313 | struct cgroup *updated_children; /* terminated by self cgroup */ |
| 301 | struct cgroup *updated_next; /* NULL iff not on the list */ | 314 | struct cgroup *updated_next; /* NULL iff not on the list */ |
| 302 | }; | 315 | }; |
| 303 | 316 | ||
| 304 | struct cgroup_stat { | ||
| 305 | /* per-cpu statistics are collected into the folowing global counters */ | ||
| 306 | struct task_cputime cputime; | ||
| 307 | struct prev_cputime prev_cputime; | ||
| 308 | }; | ||
| 309 | |||
| 310 | struct cgroup { | 317 | struct cgroup { |
| 311 | /* self css with NULL ->ss, points back to this cgroup */ | 318 | /* self css with NULL ->ss, points back to this cgroup */ |
| 312 | struct cgroup_subsys_state self; | 319 | struct cgroup_subsys_state self; |
| @@ -406,10 +413,14 @@ struct cgroup { | |||
| 406 | */ | 413 | */ |
| 407 | struct cgroup *dom_cgrp; | 414 | struct cgroup *dom_cgrp; |
| 408 | 415 | ||
| 416 | /* per-cpu recursive resource statistics */ | ||
| 417 | struct cgroup_rstat_cpu __percpu *rstat_cpu; | ||
| 418 | struct list_head rstat_css_list; | ||
| 419 | |||
| 409 | /* cgroup basic resource statistics */ | 420 | /* cgroup basic resource statistics */ |
| 410 | struct cgroup_cpu_stat __percpu *cpu_stat; | 421 | struct cgroup_base_stat pending_bstat; /* pending from children */ |
| 411 | struct cgroup_stat pending_stat; /* pending from children */ | 422 | struct cgroup_base_stat bstat; |
| 412 | struct cgroup_stat stat; | 423 | struct prev_cputime prev_cputime; /* for printing out cputime */ |
| 413 | 424 | ||
| 414 | /* | 425 | /* |
| 415 | * list of pidlists, up to two for each namespace (one for procs, one | 426 | * list of pidlists, up to two for each namespace (one for procs, one |
| @@ -570,6 +581,7 @@ struct cgroup_subsys { | |||
| 570 | void (*css_released)(struct cgroup_subsys_state *css); | 581 | void (*css_released)(struct cgroup_subsys_state *css); |
| 571 | void (*css_free)(struct cgroup_subsys_state *css); | 582 | void (*css_free)(struct cgroup_subsys_state *css); |
| 572 | void (*css_reset)(struct cgroup_subsys_state *css); | 583 | void (*css_reset)(struct cgroup_subsys_state *css); |
| 584 | void (*css_rstat_flush)(struct cgroup_subsys_state *css, int cpu); | ||
| 573 | int (*css_extra_stat_show)(struct seq_file *seq, | 585 | int (*css_extra_stat_show)(struct seq_file *seq, |
| 574 | struct cgroup_subsys_state *css); | 586 | struct cgroup_subsys_state *css); |
| 575 | 587 | ||
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 473e0c0abb86..c9fdf6f57913 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
| @@ -690,11 +690,19 @@ static inline void cgroup_path_from_kernfs_id(const union kernfs_node_id *id, | |||
| 690 | char *buf, size_t buflen) {} | 690 | char *buf, size_t buflen) {} |
| 691 | #endif /* !CONFIG_CGROUPS */ | 691 | #endif /* !CONFIG_CGROUPS */ |
| 692 | 692 | ||
| 693 | #ifdef CONFIG_CGROUPS | ||
| 693 | /* | 694 | /* |
| 694 | * Basic resource stats. | 695 | * cgroup scalable recursive statistics. |
| 695 | */ | 696 | */ |
| 696 | #ifdef CONFIG_CGROUPS | 697 | void cgroup_rstat_updated(struct cgroup *cgrp, int cpu); |
| 698 | void cgroup_rstat_flush(struct cgroup *cgrp); | ||
| 699 | void cgroup_rstat_flush_irqsafe(struct cgroup *cgrp); | ||
| 700 | void cgroup_rstat_flush_hold(struct cgroup *cgrp); | ||
| 701 | void cgroup_rstat_flush_release(void); | ||
| 697 | 702 | ||
| 703 | /* | ||
| 704 | * Basic resource stats. | ||
| 705 | */ | ||
| 698 | #ifdef CONFIG_CGROUP_CPUACCT | 706 | #ifdef CONFIG_CGROUP_CPUACCT |
| 699 | void cpuacct_charge(struct task_struct *tsk, u64 cputime); | 707 | void cpuacct_charge(struct task_struct *tsk, u64 cputime); |
| 700 | void cpuacct_account_field(struct task_struct *tsk, int index, u64 val); | 708 | void cpuacct_account_field(struct task_struct *tsk, int index, u64 val); |
diff --git a/kernel/cgroup/Makefile b/kernel/cgroup/Makefile index 2be89a003185..bfcdae896122 100644 --- a/kernel/cgroup/Makefile +++ b/kernel/cgroup/Makefile | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | # SPDX-License-Identifier: GPL-2.0 | 1 | # SPDX-License-Identifier: GPL-2.0 |
| 2 | obj-y := cgroup.o stat.o namespace.o cgroup-v1.o | 2 | obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o |
| 3 | 3 | ||
| 4 | obj-$(CONFIG_CGROUP_FREEZER) += freezer.o | 4 | obj-$(CONFIG_CGROUP_FREEZER) += freezer.o |
| 5 | obj-$(CONFIG_CGROUP_PIDS) += pids.o | 5 | obj-$(CONFIG_CGROUP_PIDS) += pids.o |
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 0808a33d16d3..77ff1cd6a252 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h | |||
| @@ -201,13 +201,12 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, | |||
| 201 | int cgroup_task_count(const struct cgroup *cgrp); | 201 | int cgroup_task_count(const struct cgroup *cgrp); |
| 202 | 202 | ||
| 203 | /* | 203 | /* |
| 204 | * stat.c | 204 | * rstat.c |
| 205 | */ | 205 | */ |
| 206 | void cgroup_stat_flush(struct cgroup *cgrp); | 206 | int cgroup_rstat_init(struct cgroup *cgrp); |
| 207 | int cgroup_stat_init(struct cgroup *cgrp); | 207 | void cgroup_rstat_exit(struct cgroup *cgrp); |
| 208 | void cgroup_stat_exit(struct cgroup *cgrp); | 208 | void cgroup_rstat_boot(void); |
| 209 | void cgroup_stat_show_cputime(struct seq_file *seq); | 209 | void cgroup_base_stat_cputime_show(struct seq_file *seq); |
| 210 | void cgroup_stat_boot(void); | ||
| 211 | 210 | ||
| 212 | /* | 211 | /* |
| 213 | * namespace.c | 212 | * namespace.c |
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 12883656e63e..acb66713f9b6 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c | |||
| @@ -54,6 +54,7 @@ | |||
| 54 | #include <linux/proc_ns.h> | 54 | #include <linux/proc_ns.h> |
| 55 | #include <linux/nsproxy.h> | 55 | #include <linux/nsproxy.h> |
| 56 | #include <linux/file.h> | 56 | #include <linux/file.h> |
| 57 | #include <linux/sched/cputime.h> | ||
| 57 | #include <net/sock.h> | 58 | #include <net/sock.h> |
| 58 | 59 | ||
| 59 | #define CREATE_TRACE_POINTS | 60 | #define CREATE_TRACE_POINTS |
| @@ -61,6 +62,8 @@ | |||
| 61 | 62 | ||
| 62 | #define CGROUP_FILE_NAME_MAX (MAX_CGROUP_TYPE_NAMELEN + \ | 63 | #define CGROUP_FILE_NAME_MAX (MAX_CGROUP_TYPE_NAMELEN + \ |
| 63 | MAX_CFTYPE_NAME + 2) | 64 | MAX_CFTYPE_NAME + 2) |
| 65 | /* let's not notify more than 100 times per second */ | ||
| 66 | #define CGROUP_FILE_NOTIFY_MIN_INTV DIV_ROUND_UP(HZ, 100) | ||
| 64 | 67 | ||
| 65 | /* | 68 | /* |
| 66 | * cgroup_mutex is the master lock. Any modification to cgroup or its | 69 | * cgroup_mutex is the master lock. Any modification to cgroup or its |
| @@ -142,14 +145,14 @@ static struct static_key_true *cgroup_subsys_on_dfl_key[] = { | |||
| 142 | }; | 145 | }; |
| 143 | #undef SUBSYS | 146 | #undef SUBSYS |
| 144 | 147 | ||
| 145 | static DEFINE_PER_CPU(struct cgroup_cpu_stat, cgrp_dfl_root_cpu_stat); | 148 | static DEFINE_PER_CPU(struct cgroup_rstat_cpu, cgrp_dfl_root_rstat_cpu); |
| 146 | 149 | ||
| 147 | /* | 150 | /* |
| 148 | * The default hierarchy, reserved for the subsystems that are otherwise | 151 | * The default hierarchy, reserved for the subsystems that are otherwise |
| 149 | * unattached - it never has more than a single cgroup, and all tasks are | 152 | * unattached - it never has more than a single cgroup, and all tasks are |
| 150 | * part of that cgroup. | 153 | * part of that cgroup. |
| 151 | */ | 154 | */ |
| 152 | struct cgroup_root cgrp_dfl_root = { .cgrp.cpu_stat = &cgrp_dfl_root_cpu_stat }; | 155 | struct cgroup_root cgrp_dfl_root = { .cgrp.rstat_cpu = &cgrp_dfl_root_rstat_cpu }; |
| 153 | EXPORT_SYMBOL_GPL(cgrp_dfl_root); | 156 | EXPORT_SYMBOL_GPL(cgrp_dfl_root); |
| 154 | 157 | ||
| 155 | /* | 158 | /* |
| @@ -1554,6 +1557,8 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) | |||
| 1554 | spin_lock_irq(&cgroup_file_kn_lock); | 1557 | spin_lock_irq(&cgroup_file_kn_lock); |
| 1555 | cfile->kn = NULL; | 1558 | cfile->kn = NULL; |
| 1556 | spin_unlock_irq(&cgroup_file_kn_lock); | 1559 | spin_unlock_irq(&cgroup_file_kn_lock); |
| 1560 | |||
| 1561 | del_timer_sync(&cfile->notify_timer); | ||
| 1557 | } | 1562 | } |
| 1558 | 1563 | ||
| 1559 | kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name)); | 1564 | kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name)); |
| @@ -1573,8 +1578,17 @@ static void css_clear_dir(struct cgroup_subsys_state *css) | |||
| 1573 | 1578 | ||
| 1574 | css->flags &= ~CSS_VISIBLE; | 1579 | css->flags &= ~CSS_VISIBLE; |
| 1575 | 1580 | ||
| 1576 | list_for_each_entry(cfts, &css->ss->cfts, node) | 1581 | if (!css->ss) { |
| 1582 | if (cgroup_on_dfl(cgrp)) | ||
| 1583 | cfts = cgroup_base_files; | ||
| 1584 | else | ||
| 1585 | cfts = cgroup1_base_files; | ||
| 1586 | |||
| 1577 | cgroup_addrm_files(css, cgrp, cfts, false); | 1587 | cgroup_addrm_files(css, cgrp, cfts, false); |
| 1588 | } else { | ||
| 1589 | list_for_each_entry(cfts, &css->ss->cfts, node) | ||
| 1590 | cgroup_addrm_files(css, cgrp, cfts, false); | ||
| 1591 | } | ||
| 1578 | } | 1592 | } |
| 1579 | 1593 | ||
| 1580 | /** | 1594 | /** |
| @@ -1598,14 +1612,16 @@ static int css_populate_dir(struct cgroup_subsys_state *css) | |||
| 1598 | else | 1612 | else |
| 1599 | cfts = cgroup1_base_files; | 1613 | cfts = cgroup1_base_files; |
| 1600 | 1614 | ||
| 1601 | return cgroup_addrm_files(&cgrp->self, cgrp, cfts, true); | 1615 | ret = cgroup_addrm_files(&cgrp->self, cgrp, cfts, true); |
| 1602 | } | 1616 | if (ret < 0) |
| 1603 | 1617 | return ret; | |
| 1604 | list_for_each_entry(cfts, &css->ss->cfts, node) { | 1618 | } else { |
| 1605 | ret = cgroup_addrm_files(css, cgrp, cfts, true); | 1619 | list_for_each_entry(cfts, &css->ss->cfts, node) { |
| 1606 | if (ret < 0) { | 1620 | ret = cgroup_addrm_files(css, cgrp, cfts, true); |
| 1607 | failed_cfts = cfts; | 1621 | if (ret < 0) { |
| 1608 | goto err; | 1622 | failed_cfts = cfts; |
| 1623 | goto err; | ||
| 1624 | } | ||
| 1609 | } | 1625 | } |
| 1610 | } | 1626 | } |
| 1611 | 1627 | ||
| @@ -1782,13 +1798,6 @@ static void cgroup_enable_task_cg_lists(void) | |||
| 1782 | { | 1798 | { |
| 1783 | struct task_struct *p, *g; | 1799 | struct task_struct *p, *g; |
| 1784 | 1800 | ||
| 1785 | spin_lock_irq(&css_set_lock); | ||
| 1786 | |||
| 1787 | if (use_task_css_set_links) | ||
| 1788 | goto out_unlock; | ||
| 1789 | |||
| 1790 | use_task_css_set_links = true; | ||
| 1791 | |||
| 1792 | /* | 1801 | /* |
| 1793 | * We need tasklist_lock because RCU is not safe against | 1802 | * We need tasklist_lock because RCU is not safe against |
| 1794 | * while_each_thread(). Besides, a forking task that has passed | 1803 | * while_each_thread(). Besides, a forking task that has passed |
| @@ -1797,6 +1806,13 @@ static void cgroup_enable_task_cg_lists(void) | |||
| 1797 | * tasklist if we walk through it with RCU. | 1806 | * tasklist if we walk through it with RCU. |
| 1798 | */ | 1807 | */ |
| 1799 | read_lock(&tasklist_lock); | 1808 | read_lock(&tasklist_lock); |
| 1809 | spin_lock_irq(&css_set_lock); | ||
| 1810 | |||
| 1811 | if (use_task_css_set_links) | ||
| 1812 | goto out_unlock; | ||
| 1813 | |||
| 1814 | use_task_css_set_links = true; | ||
| 1815 | |||
| 1800 | do_each_thread(g, p) { | 1816 | do_each_thread(g, p) { |
| 1801 | WARN_ON_ONCE(!list_empty(&p->cg_list) || | 1817 | WARN_ON_ONCE(!list_empty(&p->cg_list) || |
| 1802 | task_css_set(p) != &init_css_set); | 1818 | task_css_set(p) != &init_css_set); |
| @@ -1824,9 +1840,9 @@ static void cgroup_enable_task_cg_lists(void) | |||
| 1824 | } | 1840 | } |
| 1825 | spin_unlock(&p->sighand->siglock); | 1841 | spin_unlock(&p->sighand->siglock); |
| 1826 | } while_each_thread(g, p); | 1842 | } while_each_thread(g, p); |
| 1827 | read_unlock(&tasklist_lock); | ||
| 1828 | out_unlock: | 1843 | out_unlock: |
| 1829 | spin_unlock_irq(&css_set_lock); | 1844 | spin_unlock_irq(&css_set_lock); |
| 1845 | read_unlock(&tasklist_lock); | ||
| 1830 | } | 1846 | } |
| 1831 | 1847 | ||
| 1832 | static void init_cgroup_housekeeping(struct cgroup *cgrp) | 1848 | static void init_cgroup_housekeeping(struct cgroup *cgrp) |
| @@ -1844,6 +1860,8 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) | |||
| 1844 | cgrp->dom_cgrp = cgrp; | 1860 | cgrp->dom_cgrp = cgrp; |
| 1845 | cgrp->max_descendants = INT_MAX; | 1861 | cgrp->max_descendants = INT_MAX; |
| 1846 | cgrp->max_depth = INT_MAX; | 1862 | cgrp->max_depth = INT_MAX; |
| 1863 | INIT_LIST_HEAD(&cgrp->rstat_css_list); | ||
| 1864 | prev_cputime_init(&cgrp->prev_cputime); | ||
| 1847 | 1865 | ||
| 1848 | for_each_subsys(ss, ssid) | 1866 | for_each_subsys(ss, ssid) |
| 1849 | INIT_LIST_HEAD(&cgrp->e_csets[ssid]); | 1867 | INIT_LIST_HEAD(&cgrp->e_csets[ssid]); |
| @@ -3381,7 +3399,7 @@ static int cpu_stat_show(struct seq_file *seq, void *v) | |||
| 3381 | struct cgroup __maybe_unused *cgrp = seq_css(seq)->cgroup; | 3399 | struct cgroup __maybe_unused *cgrp = seq_css(seq)->cgroup; |
| 3382 | int ret = 0; | 3400 | int ret = 0; |
| 3383 | 3401 | ||
| 3384 | cgroup_stat_show_cputime(seq); | 3402 | cgroup_base_stat_cputime_show(seq); |
| 3385 | #ifdef CONFIG_CGROUP_SCHED | 3403 | #ifdef CONFIG_CGROUP_SCHED |
| 3386 | ret = cgroup_extra_stat_show(seq, cgrp, cpu_cgrp_id); | 3404 | ret = cgroup_extra_stat_show(seq, cgrp, cpu_cgrp_id); |
| 3387 | #endif | 3405 | #endif |
| @@ -3521,6 +3539,12 @@ static int cgroup_kn_set_ugid(struct kernfs_node *kn) | |||
| 3521 | return kernfs_setattr(kn, &iattr); | 3539 | return kernfs_setattr(kn, &iattr); |
| 3522 | } | 3540 | } |
| 3523 | 3541 | ||
| 3542 | static void cgroup_file_notify_timer(struct timer_list *timer) | ||
| 3543 | { | ||
| 3544 | cgroup_file_notify(container_of(timer, struct cgroup_file, | ||
| 3545 | notify_timer)); | ||
| 3546 | } | ||
| 3547 | |||
| 3524 | static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp, | 3548 | static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp, |
| 3525 | struct cftype *cft) | 3549 | struct cftype *cft) |
| 3526 | { | 3550 | { |
| @@ -3547,6 +3571,8 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp, | |||
| 3547 | if (cft->file_offset) { | 3571 | if (cft->file_offset) { |
| 3548 | struct cgroup_file *cfile = (void *)css + cft->file_offset; | 3572 | struct cgroup_file *cfile = (void *)css + cft->file_offset; |
| 3549 | 3573 | ||
| 3574 | timer_setup(&cfile->notify_timer, cgroup_file_notify_timer, 0); | ||
| 3575 | |||
| 3550 | spin_lock_irq(&cgroup_file_kn_lock); | 3576 | spin_lock_irq(&cgroup_file_kn_lock); |
| 3551 | cfile->kn = kn; | 3577 | cfile->kn = kn; |
| 3552 | spin_unlock_irq(&cgroup_file_kn_lock); | 3578 | spin_unlock_irq(&cgroup_file_kn_lock); |
| @@ -3796,8 +3822,17 @@ void cgroup_file_notify(struct cgroup_file *cfile) | |||
| 3796 | unsigned long flags; | 3822 | unsigned long flags; |
| 3797 | 3823 | ||
| 3798 | spin_lock_irqsave(&cgroup_file_kn_lock, flags); | 3824 | spin_lock_irqsave(&cgroup_file_kn_lock, flags); |
| 3799 | if (cfile->kn) | 3825 | if (cfile->kn) { |
| 3800 | kernfs_notify(cfile->kn); | 3826 | unsigned long last = cfile->notified_at; |
| 3827 | unsigned long next = last + CGROUP_FILE_NOTIFY_MIN_INTV; | ||
| 3828 | |||
| 3829 | if (time_in_range(jiffies, last, next)) { | ||
| 3830 | timer_reduce(&cfile->notify_timer, next); | ||
| 3831 | } else { | ||
| 3832 | kernfs_notify(cfile->kn); | ||
| 3833 | cfile->notified_at = jiffies; | ||
| 3834 | } | ||
| 3835 | } | ||
| 3801 | spin_unlock_irqrestore(&cgroup_file_kn_lock, flags); | 3836 | spin_unlock_irqrestore(&cgroup_file_kn_lock, flags); |
| 3802 | } | 3837 | } |
| 3803 | 3838 | ||
| @@ -4560,7 +4595,7 @@ static void css_free_rwork_fn(struct work_struct *work) | |||
| 4560 | cgroup_put(cgroup_parent(cgrp)); | 4595 | cgroup_put(cgroup_parent(cgrp)); |
| 4561 | kernfs_put(cgrp->kn); | 4596 | kernfs_put(cgrp->kn); |
| 4562 | if (cgroup_on_dfl(cgrp)) | 4597 | if (cgroup_on_dfl(cgrp)) |
| 4563 | cgroup_stat_exit(cgrp); | 4598 | cgroup_rstat_exit(cgrp); |
| 4564 | kfree(cgrp); | 4599 | kfree(cgrp); |
| 4565 | } else { | 4600 | } else { |
| 4566 | /* | 4601 | /* |
| @@ -4587,6 +4622,11 @@ static void css_release_work_fn(struct work_struct *work) | |||
| 4587 | 4622 | ||
| 4588 | if (ss) { | 4623 | if (ss) { |
| 4589 | /* css release path */ | 4624 | /* css release path */ |
| 4625 | if (!list_empty(&css->rstat_css_node)) { | ||
| 4626 | cgroup_rstat_flush(cgrp); | ||
| 4627 | list_del_rcu(&css->rstat_css_node); | ||
| 4628 | } | ||
| 4629 | |||
| 4590 | cgroup_idr_replace(&ss->css_idr, NULL, css->id); | 4630 | cgroup_idr_replace(&ss->css_idr, NULL, css->id); |
| 4591 | if (ss->css_released) | 4631 | if (ss->css_released) |
| 4592 | ss->css_released(css); | 4632 | ss->css_released(css); |
| @@ -4597,7 +4637,7 @@ static void css_release_work_fn(struct work_struct *work) | |||
| 4597 | trace_cgroup_release(cgrp); | 4637 | trace_cgroup_release(cgrp); |
| 4598 | 4638 | ||
| 4599 | if (cgroup_on_dfl(cgrp)) | 4639 | if (cgroup_on_dfl(cgrp)) |
| 4600 | cgroup_stat_flush(cgrp); | 4640 | cgroup_rstat_flush(cgrp); |
| 4601 | 4641 | ||
| 4602 | for (tcgrp = cgroup_parent(cgrp); tcgrp; | 4642 | for (tcgrp = cgroup_parent(cgrp); tcgrp; |
| 4603 | tcgrp = cgroup_parent(tcgrp)) | 4643 | tcgrp = cgroup_parent(tcgrp)) |
| @@ -4648,6 +4688,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css, | |||
| 4648 | css->id = -1; | 4688 | css->id = -1; |
| 4649 | INIT_LIST_HEAD(&css->sibling); | 4689 | INIT_LIST_HEAD(&css->sibling); |
| 4650 | INIT_LIST_HEAD(&css->children); | 4690 | INIT_LIST_HEAD(&css->children); |
| 4691 | INIT_LIST_HEAD(&css->rstat_css_node); | ||
| 4651 | css->serial_nr = css_serial_nr_next++; | 4692 | css->serial_nr = css_serial_nr_next++; |
| 4652 | atomic_set(&css->online_cnt, 0); | 4693 | atomic_set(&css->online_cnt, 0); |
| 4653 | 4694 | ||
| @@ -4656,6 +4697,9 @@ static void init_and_link_css(struct cgroup_subsys_state *css, | |||
| 4656 | css_get(css->parent); | 4697 | css_get(css->parent); |
| 4657 | } | 4698 | } |
| 4658 | 4699 | ||
| 4700 | if (cgroup_on_dfl(cgrp) && ss->css_rstat_flush) | ||
| 4701 | list_add_rcu(&css->rstat_css_node, &cgrp->rstat_css_list); | ||
| 4702 | |||
| 4659 | BUG_ON(cgroup_css(cgrp, ss)); | 4703 | BUG_ON(cgroup_css(cgrp, ss)); |
| 4660 | } | 4704 | } |
| 4661 | 4705 | ||
| @@ -4757,6 +4801,7 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp, | |||
| 4757 | err_list_del: | 4801 | err_list_del: |
| 4758 | list_del_rcu(&css->sibling); | 4802 | list_del_rcu(&css->sibling); |
| 4759 | err_free_css: | 4803 | err_free_css: |
| 4804 | list_del_rcu(&css->rstat_css_node); | ||
| 4760 | INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn); | 4805 | INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn); |
| 4761 | queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork); | 4806 | queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork); |
| 4762 | return ERR_PTR(err); | 4807 | return ERR_PTR(err); |
| @@ -4785,7 +4830,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent) | |||
| 4785 | goto out_free_cgrp; | 4830 | goto out_free_cgrp; |
| 4786 | 4831 | ||
| 4787 | if (cgroup_on_dfl(parent)) { | 4832 | if (cgroup_on_dfl(parent)) { |
| 4788 | ret = cgroup_stat_init(cgrp); | 4833 | ret = cgroup_rstat_init(cgrp); |
| 4789 | if (ret) | 4834 | if (ret) |
| 4790 | goto out_cancel_ref; | 4835 | goto out_cancel_ref; |
| 4791 | } | 4836 | } |
| @@ -4850,7 +4895,7 @@ out_idr_free: | |||
| 4850 | cgroup_idr_remove(&root->cgroup_idr, cgrp->id); | 4895 | cgroup_idr_remove(&root->cgroup_idr, cgrp->id); |
| 4851 | out_stat_exit: | 4896 | out_stat_exit: |
| 4852 | if (cgroup_on_dfl(parent)) | 4897 | if (cgroup_on_dfl(parent)) |
| 4853 | cgroup_stat_exit(cgrp); | 4898 | cgroup_rstat_exit(cgrp); |
| 4854 | out_cancel_ref: | 4899 | out_cancel_ref: |
| 4855 | percpu_ref_exit(&cgrp->self.refcnt); | 4900 | percpu_ref_exit(&cgrp->self.refcnt); |
| 4856 | out_free_cgrp: | 4901 | out_free_cgrp: |
| @@ -5090,10 +5135,8 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) | |||
| 5090 | for_each_css(css, ssid, cgrp) | 5135 | for_each_css(css, ssid, cgrp) |
| 5091 | kill_css(css); | 5136 | kill_css(css); |
| 5092 | 5137 | ||
| 5093 | /* | 5138 | /* clear and remove @cgrp dir, @cgrp has an extra ref on its kn */ |
| 5094 | * Remove @cgrp directory along with the base files. @cgrp has an | 5139 | css_clear_dir(&cgrp->self); |
| 5095 | * extra ref on its kn. | ||
| 5096 | */ | ||
| 5097 | kernfs_remove(cgrp->kn); | 5140 | kernfs_remove(cgrp->kn); |
| 5098 | 5141 | ||
| 5099 | if (parent && cgroup_is_threaded(cgrp)) | 5142 | if (parent && cgroup_is_threaded(cgrp)) |
| @@ -5245,7 +5288,7 @@ int __init cgroup_init(void) | |||
| 5245 | BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files)); | 5288 | BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files)); |
| 5246 | BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files)); | 5289 | BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files)); |
| 5247 | 5290 | ||
| 5248 | cgroup_stat_boot(); | 5291 | cgroup_rstat_boot(); |
| 5249 | 5292 | ||
| 5250 | /* | 5293 | /* |
| 5251 | * The latency of the synchronize_sched() is too high for cgroups, | 5294 | * The latency of the synchronize_sched() is too high for cgroups, |
diff --git a/kernel/cgroup/rdma.c b/kernel/cgroup/rdma.c index defad3c5e7dc..d3bbb757ee49 100644 --- a/kernel/cgroup/rdma.c +++ b/kernel/cgroup/rdma.c | |||
| @@ -362,35 +362,32 @@ EXPORT_SYMBOL(rdmacg_unregister_device); | |||
| 362 | static int parse_resource(char *c, int *intval) | 362 | static int parse_resource(char *c, int *intval) |
| 363 | { | 363 | { |
| 364 | substring_t argstr; | 364 | substring_t argstr; |
| 365 | const char **table = &rdmacg_resource_names[0]; | ||
| 366 | char *name, *value = c; | 365 | char *name, *value = c; |
| 367 | size_t len; | 366 | size_t len; |
| 368 | int ret, i = 0; | 367 | int ret, i; |
| 369 | 368 | ||
| 370 | name = strsep(&value, "="); | 369 | name = strsep(&value, "="); |
| 371 | if (!name || !value) | 370 | if (!name || !value) |
| 372 | return -EINVAL; | 371 | return -EINVAL; |
| 373 | 372 | ||
| 374 | len = strlen(value); | 373 | i = match_string(rdmacg_resource_names, RDMACG_RESOURCE_MAX, name); |
| 374 | if (i < 0) | ||
| 375 | return i; | ||
| 375 | 376 | ||
| 376 | for (i = 0; i < RDMACG_RESOURCE_MAX; i++) { | 377 | len = strlen(value); |
| 377 | if (strcmp(table[i], name)) | ||
| 378 | continue; | ||
| 379 | 378 | ||
| 380 | argstr.from = value; | 379 | argstr.from = value; |
| 381 | argstr.to = value + len; | 380 | argstr.to = value + len; |
| 382 | 381 | ||
| 383 | ret = match_int(&argstr, intval); | 382 | ret = match_int(&argstr, intval); |
| 384 | if (ret >= 0) { | 383 | if (ret >= 0) { |
| 385 | if (*intval < 0) | 384 | if (*intval < 0) |
| 386 | break; | 385 | return -EINVAL; |
| 387 | return i; | 386 | return i; |
| 388 | } | 387 | } |
| 389 | if (strncmp(value, RDMACG_MAX_STR, len) == 0) { | 388 | if (strncmp(value, RDMACG_MAX_STR, len) == 0) { |
| 390 | *intval = S32_MAX; | 389 | *intval = S32_MAX; |
| 391 | return i; | 390 | return i; |
| 392 | } | ||
| 393 | break; | ||
| 394 | } | 391 | } |
| 395 | return -EINVAL; | 392 | return -EINVAL; |
| 396 | } | 393 | } |
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c new file mode 100644 index 000000000000..d503d1a9007c --- /dev/null +++ b/kernel/cgroup/rstat.c | |||
| @@ -0,0 +1,416 @@ | |||
| 1 | #include "cgroup-internal.h" | ||
| 2 | |||
| 3 | #include <linux/sched/cputime.h> | ||
| 4 | |||
| 5 | static DEFINE_SPINLOCK(cgroup_rstat_lock); | ||
| 6 | static DEFINE_PER_CPU(raw_spinlock_t, cgroup_rstat_cpu_lock); | ||
| 7 | |||
| 8 | static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu); | ||
| 9 | |||
| 10 | static struct cgroup_rstat_cpu *cgroup_rstat_cpu(struct cgroup *cgrp, int cpu) | ||
| 11 | { | ||
| 12 | return per_cpu_ptr(cgrp->rstat_cpu, cpu); | ||
| 13 | } | ||
| 14 | |||
| 15 | /** | ||
| 16 | * cgroup_rstat_updated - keep track of updated rstat_cpu | ||
| 17 | * @cgrp: target cgroup | ||
| 18 | * @cpu: cpu on which rstat_cpu was updated | ||
| 19 | * | ||
| 20 | * @cgrp's rstat_cpu on @cpu was updated. Put it on the parent's matching | ||
| 21 | * rstat_cpu->updated_children list. See the comment on top of | ||
| 22 | * cgroup_rstat_cpu definition for details. | ||
| 23 | */ | ||
| 24 | void cgroup_rstat_updated(struct cgroup *cgrp, int cpu) | ||
| 25 | { | ||
| 26 | raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu); | ||
| 27 | struct cgroup *parent; | ||
| 28 | unsigned long flags; | ||
| 29 | |||
| 30 | /* nothing to do for root */ | ||
| 31 | if (!cgroup_parent(cgrp)) | ||
| 32 | return; | ||
| 33 | |||
| 34 | /* | ||
| 35 | * Paired with the one in cgroup_rstat_cpu_pop_upated(). Either we | ||
| 36 | * see NULL updated_next or they see our updated stat. | ||
| 37 | */ | ||
| 38 | smp_mb(); | ||
| 39 | |||
| 40 | /* | ||
| 41 | * Because @parent's updated_children is terminated with @parent | ||
| 42 | * instead of NULL, we can tell whether @cgrp is on the list by | ||
| 43 | * testing the next pointer for NULL. | ||
| 44 | */ | ||
| 45 | if (cgroup_rstat_cpu(cgrp, cpu)->updated_next) | ||
| 46 | return; | ||
| 47 | |||
| 48 | raw_spin_lock_irqsave(cpu_lock, flags); | ||
| 49 | |||
| 50 | /* put @cgrp and all ancestors on the corresponding updated lists */ | ||
| 51 | for (parent = cgroup_parent(cgrp); parent; | ||
| 52 | cgrp = parent, parent = cgroup_parent(cgrp)) { | ||
| 53 | struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu); | ||
| 54 | struct cgroup_rstat_cpu *prstatc = cgroup_rstat_cpu(parent, cpu); | ||
| 55 | |||
| 56 | /* | ||
| 57 | * Both additions and removals are bottom-up. If a cgroup | ||
| 58 | * is already in the tree, all ancestors are. | ||
| 59 | */ | ||
| 60 | if (rstatc->updated_next) | ||
| 61 | break; | ||
| 62 | |||
| 63 | rstatc->updated_next = prstatc->updated_children; | ||
| 64 | prstatc->updated_children = cgrp; | ||
| 65 | } | ||
| 66 | |||
| 67 | raw_spin_unlock_irqrestore(cpu_lock, flags); | ||
| 68 | } | ||
| 69 | EXPORT_SYMBOL_GPL(cgroup_rstat_updated); | ||
| 70 | |||
| 71 | /** | ||
| 72 | * cgroup_rstat_cpu_pop_updated - iterate and dismantle rstat_cpu updated tree | ||
| 73 | * @pos: current position | ||
| 74 | * @root: root of the tree to traversal | ||
| 75 | * @cpu: target cpu | ||
| 76 | * | ||
| 77 | * Walks the udpated rstat_cpu tree on @cpu from @root. %NULL @pos starts | ||
| 78 | * the traversal and %NULL return indicates the end. During traversal, | ||
| 79 | * each returned cgroup is unlinked from the tree. Must be called with the | ||
| 80 | * matching cgroup_rstat_cpu_lock held. | ||
| 81 | * | ||
| 82 | * The only ordering guarantee is that, for a parent and a child pair | ||
| 83 | * covered by a given traversal, if a child is visited, its parent is | ||
| 84 | * guaranteed to be visited afterwards. | ||
| 85 | */ | ||
| 86 | static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos, | ||
| 87 | struct cgroup *root, int cpu) | ||
| 88 | { | ||
| 89 | struct cgroup_rstat_cpu *rstatc; | ||
| 90 | struct cgroup *parent; | ||
| 91 | |||
| 92 | if (pos == root) | ||
| 93 | return NULL; | ||
| 94 | |||
| 95 | /* | ||
| 96 | * We're gonna walk down to the first leaf and visit/remove it. We | ||
| 97 | * can pick whatever unvisited node as the starting point. | ||
| 98 | */ | ||
| 99 | if (!pos) | ||
| 100 | pos = root; | ||
| 101 | else | ||
| 102 | pos = cgroup_parent(pos); | ||
| 103 | |||
| 104 | /* walk down to the first leaf */ | ||
| 105 | while (true) { | ||
| 106 | rstatc = cgroup_rstat_cpu(pos, cpu); | ||
| 107 | if (rstatc->updated_children == pos) | ||
| 108 | break; | ||
| 109 | pos = rstatc->updated_children; | ||
| 110 | } | ||
| 111 | |||
| 112 | /* | ||
| 113 | * Unlink @pos from the tree. As the updated_children list is | ||
| 114 | * singly linked, we have to walk it to find the removal point. | ||
| 115 | * However, due to the way we traverse, @pos will be the first | ||
| 116 | * child in most cases. The only exception is @root. | ||
| 117 | */ | ||
| 118 | parent = cgroup_parent(pos); | ||
| 119 | if (parent && rstatc->updated_next) { | ||
| 120 | struct cgroup_rstat_cpu *prstatc = cgroup_rstat_cpu(parent, cpu); | ||
| 121 | struct cgroup_rstat_cpu *nrstatc; | ||
| 122 | struct cgroup **nextp; | ||
| 123 | |||
| 124 | nextp = &prstatc->updated_children; | ||
| 125 | while (true) { | ||
| 126 | nrstatc = cgroup_rstat_cpu(*nextp, cpu); | ||
| 127 | if (*nextp == pos) | ||
| 128 | break; | ||
| 129 | |||
| 130 | WARN_ON_ONCE(*nextp == parent); | ||
| 131 | nextp = &nrstatc->updated_next; | ||
| 132 | } | ||
| 133 | |||
| 134 | *nextp = rstatc->updated_next; | ||
| 135 | rstatc->updated_next = NULL; | ||
| 136 | |||
| 137 | /* | ||
| 138 | * Paired with the one in cgroup_rstat_cpu_updated(). | ||
| 139 | * Either they see NULL updated_next or we see their | ||
| 140 | * updated stat. | ||
| 141 | */ | ||
| 142 | smp_mb(); | ||
| 143 | } | ||
| 144 | |||
| 145 | return pos; | ||
| 146 | } | ||
| 147 | |||
| 148 | /* see cgroup_rstat_flush() */ | ||
| 149 | static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep) | ||
| 150 | __releases(&cgroup_rstat_lock) __acquires(&cgroup_rstat_lock) | ||
| 151 | { | ||
| 152 | int cpu; | ||
| 153 | |||
| 154 | lockdep_assert_held(&cgroup_rstat_lock); | ||
| 155 | |||
| 156 | for_each_possible_cpu(cpu) { | ||
| 157 | raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, | ||
| 158 | cpu); | ||
| 159 | struct cgroup *pos = NULL; | ||
| 160 | |||
| 161 | raw_spin_lock(cpu_lock); | ||
| 162 | while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu))) { | ||
| 163 | struct cgroup_subsys_state *css; | ||
| 164 | |||
| 165 | cgroup_base_stat_flush(pos, cpu); | ||
| 166 | |||
| 167 | rcu_read_lock(); | ||
| 168 | list_for_each_entry_rcu(css, &pos->rstat_css_list, | ||
| 169 | rstat_css_node) | ||
| 170 | css->ss->css_rstat_flush(css, cpu); | ||
| 171 | rcu_read_unlock(); | ||
| 172 | } | ||
| 173 | raw_spin_unlock(cpu_lock); | ||
| 174 | |||
| 175 | /* if @may_sleep, play nice and yield if necessary */ | ||
| 176 | if (may_sleep && (need_resched() || | ||
| 177 | spin_needbreak(&cgroup_rstat_lock))) { | ||
| 178 | spin_unlock_irq(&cgroup_rstat_lock); | ||
| 179 | if (!cond_resched()) | ||
| 180 | cpu_relax(); | ||
| 181 | spin_lock_irq(&cgroup_rstat_lock); | ||
| 182 | } | ||
| 183 | } | ||
| 184 | } | ||
| 185 | |||
| 186 | /** | ||
| 187 | * cgroup_rstat_flush - flush stats in @cgrp's subtree | ||
| 188 | * @cgrp: target cgroup | ||
| 189 | * | ||
| 190 | * Collect all per-cpu stats in @cgrp's subtree into the global counters | ||
| 191 | * and propagate them upwards. After this function returns, all cgroups in | ||
| 192 | * the subtree have up-to-date ->stat. | ||
| 193 | * | ||
| 194 | * This also gets all cgroups in the subtree including @cgrp off the | ||
| 195 | * ->updated_children lists. | ||
| 196 | * | ||
| 197 | * This function may block. | ||
| 198 | */ | ||
| 199 | void cgroup_rstat_flush(struct cgroup *cgrp) | ||
| 200 | { | ||
| 201 | might_sleep(); | ||
| 202 | |||
| 203 | spin_lock_irq(&cgroup_rstat_lock); | ||
| 204 | cgroup_rstat_flush_locked(cgrp, true); | ||
| 205 | spin_unlock_irq(&cgroup_rstat_lock); | ||
| 206 | } | ||
| 207 | |||
| 208 | /** | ||
| 209 | * cgroup_rstat_flush_irqsafe - irqsafe version of cgroup_rstat_flush() | ||
| 210 | * @cgrp: target cgroup | ||
| 211 | * | ||
| 212 | * This function can be called from any context. | ||
| 213 | */ | ||
| 214 | void cgroup_rstat_flush_irqsafe(struct cgroup *cgrp) | ||
| 215 | { | ||
| 216 | unsigned long flags; | ||
| 217 | |||
| 218 | spin_lock_irqsave(&cgroup_rstat_lock, flags); | ||
| 219 | cgroup_rstat_flush_locked(cgrp, false); | ||
| 220 | spin_unlock_irqrestore(&cgroup_rstat_lock, flags); | ||
| 221 | } | ||
| 222 | |||
| 223 | /** | ||
| 224 | * cgroup_rstat_flush_begin - flush stats in @cgrp's subtree and hold | ||
| 225 | * @cgrp: target cgroup | ||
| 226 | * | ||
| 227 | * Flush stats in @cgrp's subtree and prevent further flushes. Must be | ||
| 228 | * paired with cgroup_rstat_flush_release(). | ||
| 229 | * | ||
| 230 | * This function may block. | ||
| 231 | */ | ||
| 232 | void cgroup_rstat_flush_hold(struct cgroup *cgrp) | ||
| 233 | __acquires(&cgroup_rstat_lock) | ||
| 234 | { | ||
| 235 | might_sleep(); | ||
| 236 | spin_lock_irq(&cgroup_rstat_lock); | ||
| 237 | cgroup_rstat_flush_locked(cgrp, true); | ||
| 238 | } | ||
| 239 | |||
| 240 | /** | ||
| 241 | * cgroup_rstat_flush_release - release cgroup_rstat_flush_hold() | ||
| 242 | */ | ||
| 243 | void cgroup_rstat_flush_release(void) | ||
| 244 | __releases(&cgroup_rstat_lock) | ||
| 245 | { | ||
| 246 | spin_unlock_irq(&cgroup_rstat_lock); | ||
| 247 | } | ||
| 248 | |||
| 249 | int cgroup_rstat_init(struct cgroup *cgrp) | ||
| 250 | { | ||
| 251 | int cpu; | ||
| 252 | |||
| 253 | /* the root cgrp has rstat_cpu preallocated */ | ||
| 254 | if (!cgrp->rstat_cpu) { | ||
| 255 | cgrp->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu); | ||
| 256 | if (!cgrp->rstat_cpu) | ||
| 257 | return -ENOMEM; | ||
| 258 | } | ||
| 259 | |||
| 260 | /* ->updated_children list is self terminated */ | ||
| 261 | for_each_possible_cpu(cpu) { | ||
| 262 | struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu); | ||
| 263 | |||
| 264 | rstatc->updated_children = cgrp; | ||
| 265 | u64_stats_init(&rstatc->bsync); | ||
| 266 | } | ||
| 267 | |||
| 268 | return 0; | ||
| 269 | } | ||
| 270 | |||
| 271 | void cgroup_rstat_exit(struct cgroup *cgrp) | ||
| 272 | { | ||
| 273 | int cpu; | ||
| 274 | |||
| 275 | cgroup_rstat_flush(cgrp); | ||
| 276 | |||
| 277 | /* sanity check */ | ||
| 278 | for_each_possible_cpu(cpu) { | ||
| 279 | struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu); | ||
| 280 | |||
| 281 | if (WARN_ON_ONCE(rstatc->updated_children != cgrp) || | ||
| 282 | WARN_ON_ONCE(rstatc->updated_next)) | ||
| 283 | return; | ||
| 284 | } | ||
| 285 | |||
| 286 | free_percpu(cgrp->rstat_cpu); | ||
| 287 | cgrp->rstat_cpu = NULL; | ||
| 288 | } | ||
| 289 | |||
| 290 | void __init cgroup_rstat_boot(void) | ||
| 291 | { | ||
| 292 | int cpu; | ||
| 293 | |||
| 294 | for_each_possible_cpu(cpu) | ||
| 295 | raw_spin_lock_init(per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu)); | ||
| 296 | |||
| 297 | BUG_ON(cgroup_rstat_init(&cgrp_dfl_root.cgrp)); | ||
| 298 | } | ||
| 299 | |||
| 300 | /* | ||
| 301 | * Functions for cgroup basic resource statistics implemented on top of | ||
| 302 | * rstat. | ||
| 303 | */ | ||
| 304 | static void cgroup_base_stat_accumulate(struct cgroup_base_stat *dst_bstat, | ||
| 305 | struct cgroup_base_stat *src_bstat) | ||
| 306 | { | ||
| 307 | dst_bstat->cputime.utime += src_bstat->cputime.utime; | ||
| 308 | dst_bstat->cputime.stime += src_bstat->cputime.stime; | ||
| 309 | dst_bstat->cputime.sum_exec_runtime += src_bstat->cputime.sum_exec_runtime; | ||
| 310 | } | ||
| 311 | |||
| 312 | static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu) | ||
| 313 | { | ||
| 314 | struct cgroup *parent = cgroup_parent(cgrp); | ||
| 315 | struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu); | ||
| 316 | struct task_cputime *last_cputime = &rstatc->last_bstat.cputime; | ||
| 317 | struct task_cputime cputime; | ||
| 318 | struct cgroup_base_stat delta; | ||
| 319 | unsigned seq; | ||
| 320 | |||
| 321 | /* fetch the current per-cpu values */ | ||
| 322 | do { | ||
| 323 | seq = __u64_stats_fetch_begin(&rstatc->bsync); | ||
| 324 | cputime = rstatc->bstat.cputime; | ||
| 325 | } while (__u64_stats_fetch_retry(&rstatc->bsync, seq)); | ||
| 326 | |||
| 327 | /* calculate the delta to propgate */ | ||
| 328 | delta.cputime.utime = cputime.utime - last_cputime->utime; | ||
| 329 | delta.cputime.stime = cputime.stime - last_cputime->stime; | ||
| 330 | delta.cputime.sum_exec_runtime = cputime.sum_exec_runtime - | ||
| 331 | last_cputime->sum_exec_runtime; | ||
| 332 | *last_cputime = cputime; | ||
| 333 | |||
| 334 | /* transfer the pending stat into delta */ | ||
| 335 | cgroup_base_stat_accumulate(&delta, &cgrp->pending_bstat); | ||
| 336 | memset(&cgrp->pending_bstat, 0, sizeof(cgrp->pending_bstat)); | ||
| 337 | |||
| 338 | /* propagate delta into the global stat and the parent's pending */ | ||
| 339 | cgroup_base_stat_accumulate(&cgrp->bstat, &delta); | ||
| 340 | if (parent) | ||
| 341 | cgroup_base_stat_accumulate(&parent->pending_bstat, &delta); | ||
| 342 | } | ||
| 343 | |||
| 344 | static struct cgroup_rstat_cpu * | ||
| 345 | cgroup_base_stat_cputime_account_begin(struct cgroup *cgrp) | ||
| 346 | { | ||
| 347 | struct cgroup_rstat_cpu *rstatc; | ||
| 348 | |||
| 349 | rstatc = get_cpu_ptr(cgrp->rstat_cpu); | ||
| 350 | u64_stats_update_begin(&rstatc->bsync); | ||
| 351 | return rstatc; | ||
| 352 | } | ||
| 353 | |||
| 354 | static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp, | ||
| 355 | struct cgroup_rstat_cpu *rstatc) | ||
| 356 | { | ||
| 357 | u64_stats_update_end(&rstatc->bsync); | ||
| 358 | cgroup_rstat_updated(cgrp, smp_processor_id()); | ||
| 359 | put_cpu_ptr(rstatc); | ||
| 360 | } | ||
| 361 | |||
| 362 | void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec) | ||
| 363 | { | ||
| 364 | struct cgroup_rstat_cpu *rstatc; | ||
| 365 | |||
| 366 | rstatc = cgroup_base_stat_cputime_account_begin(cgrp); | ||
| 367 | rstatc->bstat.cputime.sum_exec_runtime += delta_exec; | ||
| 368 | cgroup_base_stat_cputime_account_end(cgrp, rstatc); | ||
| 369 | } | ||
| 370 | |||
| 371 | void __cgroup_account_cputime_field(struct cgroup *cgrp, | ||
| 372 | enum cpu_usage_stat index, u64 delta_exec) | ||
| 373 | { | ||
| 374 | struct cgroup_rstat_cpu *rstatc; | ||
| 375 | |||
| 376 | rstatc = cgroup_base_stat_cputime_account_begin(cgrp); | ||
| 377 | |||
| 378 | switch (index) { | ||
| 379 | case CPUTIME_USER: | ||
| 380 | case CPUTIME_NICE: | ||
| 381 | rstatc->bstat.cputime.utime += delta_exec; | ||
| 382 | break; | ||
| 383 | case CPUTIME_SYSTEM: | ||
| 384 | case CPUTIME_IRQ: | ||
| 385 | case CPUTIME_SOFTIRQ: | ||
| 386 | rstatc->bstat.cputime.stime += delta_exec; | ||
| 387 | break; | ||
| 388 | default: | ||
| 389 | break; | ||
| 390 | } | ||
| 391 | |||
| 392 | cgroup_base_stat_cputime_account_end(cgrp, rstatc); | ||
| 393 | } | ||
| 394 | |||
| 395 | void cgroup_base_stat_cputime_show(struct seq_file *seq) | ||
| 396 | { | ||
| 397 | struct cgroup *cgrp = seq_css(seq)->cgroup; | ||
| 398 | u64 usage, utime, stime; | ||
| 399 | |||
| 400 | if (!cgroup_parent(cgrp)) | ||
| 401 | return; | ||
| 402 | |||
| 403 | cgroup_rstat_flush_hold(cgrp); | ||
| 404 | usage = cgrp->bstat.cputime.sum_exec_runtime; | ||
| 405 | cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime, &utime, &stime); | ||
| 406 | cgroup_rstat_flush_release(); | ||
| 407 | |||
| 408 | do_div(usage, NSEC_PER_USEC); | ||
| 409 | do_div(utime, NSEC_PER_USEC); | ||
| 410 | do_div(stime, NSEC_PER_USEC); | ||
| 411 | |||
| 412 | seq_printf(seq, "usage_usec %llu\n" | ||
| 413 | "user_usec %llu\n" | ||
| 414 | "system_usec %llu\n", | ||
| 415 | usage, utime, stime); | ||
| 416 | } | ||
diff --git a/kernel/cgroup/stat.c b/kernel/cgroup/stat.c deleted file mode 100644 index 1e111dd455c4..000000000000 --- a/kernel/cgroup/stat.c +++ /dev/null | |||
| @@ -1,338 +0,0 @@ | |||
| 1 | #include "cgroup-internal.h" | ||
| 2 | |||
| 3 | #include <linux/sched/cputime.h> | ||
| 4 | |||
| 5 | static DEFINE_MUTEX(cgroup_stat_mutex); | ||
| 6 | static DEFINE_PER_CPU(raw_spinlock_t, cgroup_cpu_stat_lock); | ||
| 7 | |||
| 8 | static struct cgroup_cpu_stat *cgroup_cpu_stat(struct cgroup *cgrp, int cpu) | ||
| 9 | { | ||
| 10 | return per_cpu_ptr(cgrp->cpu_stat, cpu); | ||
| 11 | } | ||
| 12 | |||
| 13 | /** | ||
| 14 | * cgroup_cpu_stat_updated - keep track of updated cpu_stat | ||
| 15 | * @cgrp: target cgroup | ||
| 16 | * @cpu: cpu on which cpu_stat was updated | ||
| 17 | * | ||
| 18 | * @cgrp's cpu_stat on @cpu was updated. Put it on the parent's matching | ||
| 19 | * cpu_stat->updated_children list. See the comment on top of | ||
| 20 | * cgroup_cpu_stat definition for details. | ||
| 21 | */ | ||
| 22 | static void cgroup_cpu_stat_updated(struct cgroup *cgrp, int cpu) | ||
| 23 | { | ||
| 24 | raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_cpu_stat_lock, cpu); | ||
| 25 | struct cgroup *parent; | ||
| 26 | unsigned long flags; | ||
| 27 | |||
| 28 | /* | ||
| 29 | * Speculative already-on-list test. This may race leading to | ||
| 30 | * temporary inaccuracies, which is fine. | ||
| 31 | * | ||
| 32 | * Because @parent's updated_children is terminated with @parent | ||
| 33 | * instead of NULL, we can tell whether @cgrp is on the list by | ||
| 34 | * testing the next pointer for NULL. | ||
| 35 | */ | ||
| 36 | if (cgroup_cpu_stat(cgrp, cpu)->updated_next) | ||
| 37 | return; | ||
| 38 | |||
| 39 | raw_spin_lock_irqsave(cpu_lock, flags); | ||
| 40 | |||
| 41 | /* put @cgrp and all ancestors on the corresponding updated lists */ | ||
| 42 | for (parent = cgroup_parent(cgrp); parent; | ||
| 43 | cgrp = parent, parent = cgroup_parent(cgrp)) { | ||
| 44 | struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu); | ||
| 45 | struct cgroup_cpu_stat *pcstat = cgroup_cpu_stat(parent, cpu); | ||
| 46 | |||
| 47 | /* | ||
| 48 | * Both additions and removals are bottom-up. If a cgroup | ||
| 49 | * is already in the tree, all ancestors are. | ||
| 50 | */ | ||
| 51 | if (cstat->updated_next) | ||
| 52 | break; | ||
| 53 | |||
| 54 | cstat->updated_next = pcstat->updated_children; | ||
| 55 | pcstat->updated_children = cgrp; | ||
| 56 | } | ||
| 57 | |||
| 58 | raw_spin_unlock_irqrestore(cpu_lock, flags); | ||
| 59 | } | ||
| 60 | |||
| 61 | /** | ||
| 62 | * cgroup_cpu_stat_pop_updated - iterate and dismantle cpu_stat updated tree | ||
| 63 | * @pos: current position | ||
| 64 | * @root: root of the tree to traversal | ||
| 65 | * @cpu: target cpu | ||
| 66 | * | ||
| 67 | * Walks the udpated cpu_stat tree on @cpu from @root. %NULL @pos starts | ||
| 68 | * the traversal and %NULL return indicates the end. During traversal, | ||
| 69 | * each returned cgroup is unlinked from the tree. Must be called with the | ||
| 70 | * matching cgroup_cpu_stat_lock held. | ||
| 71 | * | ||
| 72 | * The only ordering guarantee is that, for a parent and a child pair | ||
| 73 | * covered by a given traversal, if a child is visited, its parent is | ||
| 74 | * guaranteed to be visited afterwards. | ||
| 75 | */ | ||
| 76 | static struct cgroup *cgroup_cpu_stat_pop_updated(struct cgroup *pos, | ||
| 77 | struct cgroup *root, int cpu) | ||
| 78 | { | ||
| 79 | struct cgroup_cpu_stat *cstat; | ||
| 80 | struct cgroup *parent; | ||
| 81 | |||
| 82 | if (pos == root) | ||
| 83 | return NULL; | ||
| 84 | |||
| 85 | /* | ||
| 86 | * We're gonna walk down to the first leaf and visit/remove it. We | ||
| 87 | * can pick whatever unvisited node as the starting point. | ||
| 88 | */ | ||
| 89 | if (!pos) | ||
| 90 | pos = root; | ||
| 91 | else | ||
| 92 | pos = cgroup_parent(pos); | ||
| 93 | |||
| 94 | /* walk down to the first leaf */ | ||
| 95 | while (true) { | ||
| 96 | cstat = cgroup_cpu_stat(pos, cpu); | ||
| 97 | if (cstat->updated_children == pos) | ||
| 98 | break; | ||
| 99 | pos = cstat->updated_children; | ||
| 100 | } | ||
| 101 | |||
| 102 | /* | ||
| 103 | * Unlink @pos from the tree. As the updated_children list is | ||
| 104 | * singly linked, we have to walk it to find the removal point. | ||
| 105 | * However, due to the way we traverse, @pos will be the first | ||
| 106 | * child in most cases. The only exception is @root. | ||
| 107 | */ | ||
| 108 | parent = cgroup_parent(pos); | ||
| 109 | if (parent && cstat->updated_next) { | ||
| 110 | struct cgroup_cpu_stat *pcstat = cgroup_cpu_stat(parent, cpu); | ||
| 111 | struct cgroup_cpu_stat *ncstat; | ||
| 112 | struct cgroup **nextp; | ||
| 113 | |||
| 114 | nextp = &pcstat->updated_children; | ||
| 115 | while (true) { | ||
| 116 | ncstat = cgroup_cpu_stat(*nextp, cpu); | ||
| 117 | if (*nextp == pos) | ||
| 118 | break; | ||
| 119 | |||
| 120 | WARN_ON_ONCE(*nextp == parent); | ||
| 121 | nextp = &ncstat->updated_next; | ||
| 122 | } | ||
| 123 | |||
| 124 | *nextp = cstat->updated_next; | ||
| 125 | cstat->updated_next = NULL; | ||
| 126 | } | ||
| 127 | |||
| 128 | return pos; | ||
| 129 | } | ||
| 130 | |||
| 131 | static void cgroup_stat_accumulate(struct cgroup_stat *dst_stat, | ||
| 132 | struct cgroup_stat *src_stat) | ||
| 133 | { | ||
| 134 | dst_stat->cputime.utime += src_stat->cputime.utime; | ||
| 135 | dst_stat->cputime.stime += src_stat->cputime.stime; | ||
| 136 | dst_stat->cputime.sum_exec_runtime += src_stat->cputime.sum_exec_runtime; | ||
| 137 | } | ||
| 138 | |||
| 139 | static void cgroup_cpu_stat_flush_one(struct cgroup *cgrp, int cpu) | ||
| 140 | { | ||
| 141 | struct cgroup *parent = cgroup_parent(cgrp); | ||
| 142 | struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu); | ||
| 143 | struct task_cputime *last_cputime = &cstat->last_cputime; | ||
| 144 | struct task_cputime cputime; | ||
| 145 | struct cgroup_stat delta; | ||
| 146 | unsigned seq; | ||
| 147 | |||
| 148 | lockdep_assert_held(&cgroup_stat_mutex); | ||
| 149 | |||
| 150 | /* fetch the current per-cpu values */ | ||
| 151 | do { | ||
| 152 | seq = __u64_stats_fetch_begin(&cstat->sync); | ||
| 153 | cputime = cstat->cputime; | ||
| 154 | } while (__u64_stats_fetch_retry(&cstat->sync, seq)); | ||
| 155 | |||
| 156 | /* accumulate the deltas to propgate */ | ||
| 157 | delta.cputime.utime = cputime.utime - last_cputime->utime; | ||
| 158 | delta.cputime.stime = cputime.stime - last_cputime->stime; | ||
| 159 | delta.cputime.sum_exec_runtime = cputime.sum_exec_runtime - | ||
| 160 | last_cputime->sum_exec_runtime; | ||
| 161 | *last_cputime = cputime; | ||
| 162 | |||
| 163 | /* transfer the pending stat into delta */ | ||
| 164 | cgroup_stat_accumulate(&delta, &cgrp->pending_stat); | ||
| 165 | memset(&cgrp->pending_stat, 0, sizeof(cgrp->pending_stat)); | ||
| 166 | |||
| 167 | /* propagate delta into the global stat and the parent's pending */ | ||
| 168 | cgroup_stat_accumulate(&cgrp->stat, &delta); | ||
| 169 | if (parent) | ||
| 170 | cgroup_stat_accumulate(&parent->pending_stat, &delta); | ||
| 171 | } | ||
| 172 | |||
| 173 | /* see cgroup_stat_flush() */ | ||
| 174 | static void cgroup_stat_flush_locked(struct cgroup *cgrp) | ||
| 175 | { | ||
| 176 | int cpu; | ||
| 177 | |||
| 178 | lockdep_assert_held(&cgroup_stat_mutex); | ||
| 179 | |||
| 180 | for_each_possible_cpu(cpu) { | ||
| 181 | raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_cpu_stat_lock, cpu); | ||
| 182 | struct cgroup *pos = NULL; | ||
| 183 | |||
| 184 | raw_spin_lock_irq(cpu_lock); | ||
| 185 | while ((pos = cgroup_cpu_stat_pop_updated(pos, cgrp, cpu))) | ||
| 186 | cgroup_cpu_stat_flush_one(pos, cpu); | ||
| 187 | raw_spin_unlock_irq(cpu_lock); | ||
| 188 | } | ||
| 189 | } | ||
| 190 | |||
| 191 | /** | ||
| 192 | * cgroup_stat_flush - flush stats in @cgrp's subtree | ||
| 193 | * @cgrp: target cgroup | ||
| 194 | * | ||
| 195 | * Collect all per-cpu stats in @cgrp's subtree into the global counters | ||
| 196 | * and propagate them upwards. After this function returns, all cgroups in | ||
| 197 | * the subtree have up-to-date ->stat. | ||
| 198 | * | ||
| 199 | * This also gets all cgroups in the subtree including @cgrp off the | ||
| 200 | * ->updated_children lists. | ||
| 201 | */ | ||
| 202 | void cgroup_stat_flush(struct cgroup *cgrp) | ||
| 203 | { | ||
| 204 | mutex_lock(&cgroup_stat_mutex); | ||
| 205 | cgroup_stat_flush_locked(cgrp); | ||
| 206 | mutex_unlock(&cgroup_stat_mutex); | ||
| 207 | } | ||
| 208 | |||
| 209 | static struct cgroup_cpu_stat *cgroup_cpu_stat_account_begin(struct cgroup *cgrp) | ||
| 210 | { | ||
| 211 | struct cgroup_cpu_stat *cstat; | ||
| 212 | |||
| 213 | cstat = get_cpu_ptr(cgrp->cpu_stat); | ||
| 214 | u64_stats_update_begin(&cstat->sync); | ||
| 215 | return cstat; | ||
| 216 | } | ||
| 217 | |||
| 218 | static void cgroup_cpu_stat_account_end(struct cgroup *cgrp, | ||
| 219 | struct cgroup_cpu_stat *cstat) | ||
| 220 | { | ||
| 221 | u64_stats_update_end(&cstat->sync); | ||
| 222 | cgroup_cpu_stat_updated(cgrp, smp_processor_id()); | ||
| 223 | put_cpu_ptr(cstat); | ||
| 224 | } | ||
| 225 | |||
| 226 | void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec) | ||
| 227 | { | ||
| 228 | struct cgroup_cpu_stat *cstat; | ||
| 229 | |||
| 230 | cstat = cgroup_cpu_stat_account_begin(cgrp); | ||
| 231 | cstat->cputime.sum_exec_runtime += delta_exec; | ||
| 232 | cgroup_cpu_stat_account_end(cgrp, cstat); | ||
| 233 | } | ||
| 234 | |||
| 235 | void __cgroup_account_cputime_field(struct cgroup *cgrp, | ||
| 236 | enum cpu_usage_stat index, u64 delta_exec) | ||
| 237 | { | ||
| 238 | struct cgroup_cpu_stat *cstat; | ||
| 239 | |||
| 240 | cstat = cgroup_cpu_stat_account_begin(cgrp); | ||
| 241 | |||
| 242 | switch (index) { | ||
| 243 | case CPUTIME_USER: | ||
| 244 | case CPUTIME_NICE: | ||
| 245 | cstat->cputime.utime += delta_exec; | ||
| 246 | break; | ||
| 247 | case CPUTIME_SYSTEM: | ||
| 248 | case CPUTIME_IRQ: | ||
| 249 | case CPUTIME_SOFTIRQ: | ||
| 250 | cstat->cputime.stime += delta_exec; | ||
| 251 | break; | ||
| 252 | default: | ||
| 253 | break; | ||
| 254 | } | ||
| 255 | |||
| 256 | cgroup_cpu_stat_account_end(cgrp, cstat); | ||
| 257 | } | ||
| 258 | |||
| 259 | void cgroup_stat_show_cputime(struct seq_file *seq) | ||
| 260 | { | ||
| 261 | struct cgroup *cgrp = seq_css(seq)->cgroup; | ||
| 262 | u64 usage, utime, stime; | ||
| 263 | |||
| 264 | if (!cgroup_parent(cgrp)) | ||
| 265 | return; | ||
| 266 | |||
| 267 | mutex_lock(&cgroup_stat_mutex); | ||
| 268 | |||
| 269 | cgroup_stat_flush_locked(cgrp); | ||
| 270 | |||
| 271 | usage = cgrp->stat.cputime.sum_exec_runtime; | ||
| 272 | cputime_adjust(&cgrp->stat.cputime, &cgrp->stat.prev_cputime, | ||
| 273 | &utime, &stime); | ||
| 274 | |||
| 275 | mutex_unlock(&cgroup_stat_mutex); | ||
| 276 | |||
| 277 | do_div(usage, NSEC_PER_USEC); | ||
| 278 | do_div(utime, NSEC_PER_USEC); | ||
| 279 | do_div(stime, NSEC_PER_USEC); | ||
| 280 | |||
| 281 | seq_printf(seq, "usage_usec %llu\n" | ||
| 282 | "user_usec %llu\n" | ||
| 283 | "system_usec %llu\n", | ||
| 284 | usage, utime, stime); | ||
| 285 | } | ||
| 286 | |||
| 287 | int cgroup_stat_init(struct cgroup *cgrp) | ||
| 288 | { | ||
| 289 | int cpu; | ||
| 290 | |||
| 291 | /* the root cgrp has cpu_stat preallocated */ | ||
| 292 | if (!cgrp->cpu_stat) { | ||
| 293 | cgrp->cpu_stat = alloc_percpu(struct cgroup_cpu_stat); | ||
| 294 | if (!cgrp->cpu_stat) | ||
| 295 | return -ENOMEM; | ||
| 296 | } | ||
| 297 | |||
| 298 | /* ->updated_children list is self terminated */ | ||
| 299 | for_each_possible_cpu(cpu) { | ||
| 300 | struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu); | ||
| 301 | |||
| 302 | cstat->updated_children = cgrp; | ||
| 303 | u64_stats_init(&cstat->sync); | ||
| 304 | } | ||
| 305 | |||
| 306 | prev_cputime_init(&cgrp->stat.prev_cputime); | ||
| 307 | |||
| 308 | return 0; | ||
| 309 | } | ||
| 310 | |||
| 311 | void cgroup_stat_exit(struct cgroup *cgrp) | ||
| 312 | { | ||
| 313 | int cpu; | ||
| 314 | |||
| 315 | cgroup_stat_flush(cgrp); | ||
| 316 | |||
| 317 | /* sanity check */ | ||
| 318 | for_each_possible_cpu(cpu) { | ||
| 319 | struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu); | ||
| 320 | |||
| 321 | if (WARN_ON_ONCE(cstat->updated_children != cgrp) || | ||
| 322 | WARN_ON_ONCE(cstat->updated_next)) | ||
| 323 | return; | ||
| 324 | } | ||
| 325 | |||
| 326 | free_percpu(cgrp->cpu_stat); | ||
| 327 | cgrp->cpu_stat = NULL; | ||
| 328 | } | ||
| 329 | |||
| 330 | void __init cgroup_stat_boot(void) | ||
| 331 | { | ||
| 332 | int cpu; | ||
| 333 | |||
| 334 | for_each_possible_cpu(cpu) | ||
| 335 | raw_spin_lock_init(per_cpu_ptr(&cgroup_cpu_stat_lock, cpu)); | ||
| 336 | |||
| 337 | BUG_ON(cgroup_stat_init(&cgrp_dfl_root.cgrp)); | ||
| 338 | } | ||
