Merge branch 'for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup updates from Tejun Heo: "Cgroup2 cpu controller support is finally merged. - Basic cpu statistics support to allow monitoring by default without the CPU controller enabled. - cgroup2 cpu controller support. - /sys/kernel/cgroup files to help dealing with new / optional features" * 'for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: cgroup: export list of cgroups v2 features using sysfs cgroup: export list of delegatable control files using sysfs cgroup: mark @cgrp __maybe_unused in cpu_stat_show() MAINTAINERS: relocate cpuset.c cgroup, sched: Move basic cpu stats from cgroup.stat to cpu.stat sched: Implement interface for cgroup unified hierarchy sched: Misc preps for cgroup unified hierarchy interface sched/cputime: Add dummy cputime_adjust() implementation for CONFIG_VIRT_CPU_ACCOUNTING_NATIVE cgroup: statically initialize init_css_set->dfl_cgrp cgroup: Implement cgroup2 basic CPU usage accounting cpuacct: Introduce cgroup_account_cputime[_field]() sched/cputime: Expose cputime_adjust()
author: Linus Torvalds <torvalds@linux-foundation.org> 2017-11-15 17:29:44 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2017-11-15 17:29:44 -0500
commit: 22714a2ba4b55737cd7d5299db7aaf1fa8287354 (patch)
tree: 32b25f2e3e40732156a8a8d0dcb2ddf38410776f /kernel/cgroup
parent: 766ec76a27aa9dfdfee3a80f29ddc1f7539c71f9 (diff)
parent: 5f2e673405b742be64e7c3604ed4ed3ac14f35ce (diff)
4 files changed, 499 insertions, 3 deletions
diff --git a/kernel/cgroup/Makefile b/kernel/cgroup/Makefile
index ae448f7632cc..2be89a003185 100644
--- a/kernel/cgroup/Makefile
+++ b/kernel/cgroup/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-obj-y := cgroup.o namespace.o cgroup-v1.o
+obj-y := cgroup.o stat.o namespace.o cgroup-v1.o
 obj-$(CONFIG_CGROUP_FREEZER) += freezer.o
 obj-$(CONFIG_CGROUP_PIDS) += pids.o
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index bf54ade001be..b928b27050c6 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -201,6 +201,15 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
 int cgroup_task_count(const struct cgroup *cgrp);
 /*
+ * stat.c
+ */
+void cgroup_stat_flush(struct cgroup *cgrp);
+int cgroup_stat_init(struct cgroup *cgrp);
+void cgroup_stat_exit(struct cgroup *cgrp);
+void cgroup_stat_show_cputime(struct seq_file *seq);
+void cgroup_stat_boot(void);
+/*
 * namespace.c
 */
 extern const struct proc_ns_operations cgroupns_operations;
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 00f5b358aeac..0b1ffe147f24 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -142,12 +142,14 @@ static struct static_key_true *cgroup_subsys_on_dfl_key[] = {
 };
 #undef SUBSYS
+static DEFINE_PER_CPU(struct cgroup_cpu_stat, cgrp_dfl_root_cpu_stat);
 /*
 * The default hierarchy, reserved for the subsystems that are otherwise
 * unattached - it never has more than a single cgroup, and all tasks are
 * part of that cgroup.
 */
-struct cgroup_root cgrp_dfl_root;
+struct cgroup_root cgrp_dfl_root = { .cgrp.cpu_stat = &cgrp_dfl_root_cpu_stat };
 EXPORT_SYMBOL_GPL(cgrp_dfl_root);
 /*
@@ -462,6 +464,28 @@ static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
 }
 /**
+ * cgroup_tryget_css - try to get a cgroup's css for the specified subsystem
+ * @cgrp: the cgroup of interest
+ * @ss: the subsystem of interest
+ *
+ * Find and get @cgrp's css assocaited with @ss.  If the css doesn't exist
+ * or is offline, %NULL is returned.
+ */
+static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
+                                                     struct cgroup_subsys *ss)
+{
+        struct cgroup_subsys_state *css;
+        rcu_read_lock();
+        css = cgroup_css(cgrp, ss);
+        if (!css || !css_tryget_online(css))
+                css = NULL;
+        rcu_read_unlock();
+        return css;
+}
+/**
 * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
 * @cgrp: the cgroup of interest
 * @ss: the subsystem of interest (%NULL returns @cgrp->self)
@@ -647,6 +671,14 @@ struct css_set init_css_set = {
        .cgrp_links             = LIST_HEAD_INIT(init_css_set.cgrp_links),
        .mg_preload_node        = LIST_HEAD_INIT(init_css_set.mg_preload_node),
        .mg_node                = LIST_HEAD_INIT(init_css_set.mg_node),
+        /*
+         * The following field is re-initialized when this cset gets linked
+         * in cgroup_init().  However, let's initialize the field
+         * statically too so that the default cgroup can be accessed safely
+         * early during boot.
+         */
+        .dfl_cgrp               = &cgrp_dfl_root.cgrp,
 };
 static int css_set_count        = 1;    /* 1 for init_css_set */
@@ -3315,6 +3347,37 @@ static int cgroup_stat_show(struct seq_file *seq, void *v)
        return 0;
 }
+static int __maybe_unused cgroup_extra_stat_show(struct seq_file *seq,
+                                                 struct cgroup *cgrp, int ssid)
+{
+        struct cgroup_subsys *ss = cgroup_subsys[ssid];
+        struct cgroup_subsys_state *css;
+        int ret;
+        if (!ss->css_extra_stat_show)
+                return 0;
+        css = cgroup_tryget_css(cgrp, ss);
+        if (!css)
+                return 0;
+        ret = ss->css_extra_stat_show(seq, css);
+        css_put(css);
+        return ret;
+}
+static int cpu_stat_show(struct seq_file *seq, void *v)
+{
+        struct cgroup __maybe_unused *cgrp = seq_css(seq)->cgroup;
+        int ret = 0;
+        cgroup_stat_show_cputime(seq);
+#ifdef CONFIG_CGROUP_SCHED
+        ret = cgroup_extra_stat_show(seq, cgrp, cpu_cgrp_id);
+#endif
+        return ret;
+}
 static int cgroup_file_open(struct kernfs_open_file *of)
 {
        struct cftype *cft = of->kn->priv;
@@ -4422,6 +4485,11 @@ static struct cftype cgroup_base_files[] = {
                .name = "cgroup.stat",
                .seq_show = cgroup_stat_show,
        },
+        {
+                .name = "cpu.stat",
+                .flags = CFTYPE_NOT_ON_ROOT,
+                .seq_show = cpu_stat_show,
+        },
        { }     /* terminate */
 };
@@ -4482,6 +4550,8 @@ static void css_free_work_fn(struct work_struct *work)
                         */
                        cgroup_put(cgroup_parent(cgrp));
                        kernfs_put(cgrp->kn);
+                        if (cgroup_on_dfl(cgrp))
+                                cgroup_stat_exit(cgrp);
                        kfree(cgrp);
                } else {
                        /*
@@ -4526,6 +4596,9 @@ static void css_release_work_fn(struct work_struct *work)
                /* cgroup release path */
                trace_cgroup_release(cgrp);
+                if (cgroup_on_dfl(cgrp))
+                        cgroup_stat_flush(cgrp);
                for (tcgrp = cgroup_parent(cgrp); tcgrp;
                     tcgrp = cgroup_parent(tcgrp))
                        tcgrp->nr_dying_descendants--;
@@ -4709,6 +4782,12 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
        if (ret)
                goto out_free_cgrp;
+        if (cgroup_on_dfl(parent)) {
+                ret = cgroup_stat_init(cgrp);
+                if (ret)
+                        goto out_cancel_ref;
+        }
        /*
         * Temporarily set the pointer to NULL, so idr_find() won't return
         * a half-baked cgroup.
@@ -4716,7 +4795,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
        cgrp->id = cgroup_idr_alloc(&root->cgroup_idr, NULL, 2, 0, GFP_KERNEL);
        if (cgrp->id < 0) {
                ret = -ENOMEM;
-                goto out_cancel_ref;
+                goto out_stat_exit;
        }
        init_cgroup_housekeeping(cgrp);
@@ -4767,6 +4846,9 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
 out_idr_free:
        cgroup_idr_remove(&root->cgroup_idr, cgrp->id);
+out_stat_exit:
+        if (cgroup_on_dfl(parent))
+                cgroup_stat_exit(cgrp);
 out_cancel_ref:
        percpu_ref_exit(&cgrp->self.refcnt);
 out_free_cgrp:
@@ -5161,6 +5243,8 @@ int __init cgroup_init(void)
        BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files));
        BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files));
+        cgroup_stat_boot();
        /*
         * The latency of the synchronize_sched() is too high for cgroups,
         * avoid it at the cost of forcing all readers into the slow path.
@@ -5780,3 +5864,72 @@ int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
        return ret;
 }
 #endif /* CONFIG_CGROUP_BPF */
+#ifdef CONFIG_SYSFS
+static ssize_t show_delegatable_files(struct cftype *files, char *buf,
+                                      ssize_t size, const char *prefix)
+{
+        struct cftype *cft;
+        ssize_t ret = 0;
+        for (cft = files; cft && cft->name[0] != '\0'; cft++) {
+                if (!(cft->flags & CFTYPE_NS_DELEGATABLE))
+                        continue;
+                if (prefix)
+                        ret += snprintf(buf + ret, size - ret, "%s.", prefix);
+                ret += snprintf(buf + ret, size - ret, "%s\n", cft->name);
+                if (unlikely(ret >= size)) {
+                        WARN_ON(1);
+                        break;
+                }
+        }
+        return ret;
+}
+static ssize_t delegate_show(struct kobject *kobj, struct kobj_attribute *attr,
+                              char *buf)
+{
+        struct cgroup_subsys *ss;
+        int ssid;
+        ssize_t ret = 0;
+        ret = show_delegatable_files(cgroup_base_files, buf, PAGE_SIZE - ret,
+                                     NULL);
+        for_each_subsys(ss, ssid)
+                ret += show_delegatable_files(ss->dfl_cftypes, buf + ret,
+                                              PAGE_SIZE - ret,
+                                              cgroup_subsys_name[ssid]);
+        return ret;
+}
+static struct kobj_attribute cgroup_delegate_attr = __ATTR_RO(delegate);
+static ssize_t features_show(struct kobject *kobj, struct kobj_attribute *attr,
+                             char *buf)
+{
+        return snprintf(buf, PAGE_SIZE, "nsdelegate\n");
+}
+static struct kobj_attribute cgroup_features_attr = __ATTR_RO(features);
+static struct attribute *cgroup_sysfs_attrs[] = {
+        &cgroup_delegate_attr.attr,
+        &cgroup_features_attr.attr,
+        NULL,
+};
+static const struct attribute_group cgroup_sysfs_attr_group = {
+        .attrs = cgroup_sysfs_attrs,
+        .name = "cgroup",
+};
+static int __init cgroup_sysfs_init(void)
+{
+        return sysfs_create_group(kernel_kobj, &cgroup_sysfs_attr_group);
+}
+subsys_initcall(cgroup_sysfs_init);
+#endif /* CONFIG_SYSFS */
diff --git a/kernel/cgroup/stat.c b/kernel/cgroup/stat.c
new file mode 100644
index 000000000000..133b465691d6
--- /dev/null
+++ b/kernel/cgroup/stat.c
@@ -0,0 +1,334 @@
+#include "cgroup-internal.h"
+#include <linux/sched/cputime.h>
+static DEFINE_MUTEX(cgroup_stat_mutex);
+static DEFINE_PER_CPU(raw_spinlock_t, cgroup_cpu_stat_lock);
+static struct cgroup_cpu_stat *cgroup_cpu_stat(struct cgroup *cgrp, int cpu)
+{
+        return per_cpu_ptr(cgrp->cpu_stat, cpu);
+}
+/**
+ * cgroup_cpu_stat_updated - keep track of updated cpu_stat
+ * @cgrp: target cgroup
+ * @cpu: cpu on which cpu_stat was updated
+ *
+ * @cgrp's cpu_stat on @cpu was updated.  Put it on the parent's matching
+ * cpu_stat->updated_children list.  See the comment on top of
+ * cgroup_cpu_stat definition for details.
+ */
+static void cgroup_cpu_stat_updated(struct cgroup *cgrp, int cpu)
+{
+        raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_cpu_stat_lock, cpu);
+        struct cgroup *parent;
+        unsigned long flags;
+        /*
+         * Speculative already-on-list test.  This may race leading to
+         * temporary inaccuracies, which is fine.
+         *
+         * Because @parent's updated_children is terminated with @parent
+         * instead of NULL, we can tell whether @cgrp is on the list by
+         * testing the next pointer for NULL.
+         */
+        if (cgroup_cpu_stat(cgrp, cpu)->updated_next)
+                return;
+        raw_spin_lock_irqsave(cpu_lock, flags);
+        /* put @cgrp and all ancestors on the corresponding updated lists */
+        for (parent = cgroup_parent(cgrp); parent;
+             cgrp = parent, parent = cgroup_parent(cgrp)) {
+                struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
+                struct cgroup_cpu_stat *pcstat = cgroup_cpu_stat(parent, cpu);
+                /*
+                 * Both additions and removals are bottom-up.  If a cgroup
+                 * is already in the tree, all ancestors are.
+                 */
+                if (cstat->updated_next)
+                        break;
+                cstat->updated_next = pcstat->updated_children;
+                pcstat->updated_children = cgrp;
+        }
+        raw_spin_unlock_irqrestore(cpu_lock, flags);
+}
+/**
+ * cgroup_cpu_stat_pop_updated - iterate and dismantle cpu_stat updated tree
+ * @pos: current position
+ * @root: root of the tree to traversal
+ * @cpu: target cpu
+ *
+ * Walks the udpated cpu_stat tree on @cpu from @root.  %NULL @pos starts
+ * the traversal and %NULL return indicates the end.  During traversal,
+ * each returned cgroup is unlinked from the tree.  Must be called with the
+ * matching cgroup_cpu_stat_lock held.
+ *
+ * The only ordering guarantee is that, for a parent and a child pair
+ * covered by a given traversal, if a child is visited, its parent is
+ * guaranteed to be visited afterwards.
+ */
+static struct cgroup *cgroup_cpu_stat_pop_updated(struct cgroup *pos,
+                                                  struct cgroup *root, int cpu)
+{
+        struct cgroup_cpu_stat *cstat;
+        struct cgroup *parent;
+        if (pos == root)
+                return NULL;
+        /*
+         * We're gonna walk down to the first leaf and visit/remove it.  We
+         * can pick whatever unvisited node as the starting point.
+         */
+        if (!pos)
+                pos = root;
+        else
+                pos = cgroup_parent(pos);
+        /* walk down to the first leaf */
+        while (true) {
+                cstat = cgroup_cpu_stat(pos, cpu);
+                if (cstat->updated_children == pos)
+                        break;
+                pos = cstat->updated_children;
+        }
+        /*
+         * Unlink @pos from the tree.  As the updated_children list is
+         * singly linked, we have to walk it to find the removal point.
+         * However, due to the way we traverse, @pos will be the first
+         * child in most cases. The only exception is @root.
+         */
+        parent = cgroup_parent(pos);
+        if (parent && cstat->updated_next) {
+                struct cgroup_cpu_stat *pcstat = cgroup_cpu_stat(parent, cpu);
+                struct cgroup_cpu_stat *ncstat;
+                struct cgroup **nextp;
+                nextp = &pcstat->updated_children;
+                while (true) {
+                        ncstat = cgroup_cpu_stat(*nextp, cpu);
+                        if (*nextp == pos)
+                                break;
+                        WARN_ON_ONCE(*nextp == parent);
+                        nextp = &ncstat->updated_next;
+                }
+                *nextp = cstat->updated_next;
+                cstat->updated_next = NULL;
+        }
+        return pos;
+}
+static void cgroup_stat_accumulate(struct cgroup_stat *dst_stat,
+                                   struct cgroup_stat *src_stat)
+{
+        dst_stat->cputime.utime += src_stat->cputime.utime;
+        dst_stat->cputime.stime += src_stat->cputime.stime;
+        dst_stat->cputime.sum_exec_runtime += src_stat->cputime.sum_exec_runtime;
+}
+static void cgroup_cpu_stat_flush_one(struct cgroup *cgrp, int cpu)
+{
+        struct cgroup *parent = cgroup_parent(cgrp);
+        struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
+        struct task_cputime *last_cputime = &cstat->last_cputime;
+        struct task_cputime cputime;
+        struct cgroup_stat delta;
+        unsigned seq;
+        lockdep_assert_held(&cgroup_stat_mutex);
+        /* fetch the current per-cpu values */
+        do {
+                seq = __u64_stats_fetch_begin(&cstat->sync);
+                cputime = cstat->cputime;
+        } while (__u64_stats_fetch_retry(&cstat->sync, seq));
+        /* accumulate the deltas to propgate */
+        delta.cputime.utime = cputime.utime - last_cputime->utime;
+        delta.cputime.stime = cputime.stime - last_cputime->stime;
+        delta.cputime.sum_exec_runtime = cputime.sum_exec_runtime -
+                                         last_cputime->sum_exec_runtime;
+        *last_cputime = cputime;
+        /* transfer the pending stat into delta */
+        cgroup_stat_accumulate(&delta, &cgrp->pending_stat);
+        memset(&cgrp->pending_stat, 0, sizeof(cgrp->pending_stat));
+        /* propagate delta into the global stat and the parent's pending */
+        cgroup_stat_accumulate(&cgrp->stat, &delta);
+        if (parent)
+                cgroup_stat_accumulate(&parent->pending_stat, &delta);
+}
+/* see cgroup_stat_flush() */
+static void cgroup_stat_flush_locked(struct cgroup *cgrp)
+{
+        int cpu;
+        lockdep_assert_held(&cgroup_stat_mutex);
+        for_each_possible_cpu(cpu) {
+                raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_cpu_stat_lock, cpu);
+                struct cgroup *pos = NULL;
+                raw_spin_lock_irq(cpu_lock);
+                while ((pos = cgroup_cpu_stat_pop_updated(pos, cgrp, cpu)))
+                        cgroup_cpu_stat_flush_one(pos, cpu);
+                raw_spin_unlock_irq(cpu_lock);
+        }
+}
+/**
+ * cgroup_stat_flush - flush stats in @cgrp's subtree
+ * @cgrp: target cgroup
+ *
+ * Collect all per-cpu stats in @cgrp's subtree into the global counters
+ * and propagate them upwards.  After this function returns, all cgroups in
+ * the subtree have up-to-date ->stat.
+ *
+ * This also gets all cgroups in the subtree including @cgrp off the
+ * ->updated_children lists.
+ */
+void cgroup_stat_flush(struct cgroup *cgrp)
+{
+        mutex_lock(&cgroup_stat_mutex);
+        cgroup_stat_flush_locked(cgrp);
+        mutex_unlock(&cgroup_stat_mutex);
+}
+static struct cgroup_cpu_stat *cgroup_cpu_stat_account_begin(struct cgroup *cgrp)
+{
+        struct cgroup_cpu_stat *cstat;
+        cstat = get_cpu_ptr(cgrp->cpu_stat);
+        u64_stats_update_begin(&cstat->sync);
+        return cstat;
+}
+static void cgroup_cpu_stat_account_end(struct cgroup *cgrp,
+                                        struct cgroup_cpu_stat *cstat)
+{
+        u64_stats_update_end(&cstat->sync);
+        cgroup_cpu_stat_updated(cgrp, smp_processor_id());
+        put_cpu_ptr(cstat);
+}
+void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec)
+{
+        struct cgroup_cpu_stat *cstat;
+        cstat = cgroup_cpu_stat_account_begin(cgrp);
+        cstat->cputime.sum_exec_runtime += delta_exec;
+        cgroup_cpu_stat_account_end(cgrp, cstat);
+}
+void __cgroup_account_cputime_field(struct cgroup *cgrp,
+                                    enum cpu_usage_stat index, u64 delta_exec)
+{
+        struct cgroup_cpu_stat *cstat;
+        cstat = cgroup_cpu_stat_account_begin(cgrp);
+        switch (index) {
+        case CPUTIME_USER:
+        case CPUTIME_NICE:
+                cstat->cputime.utime += delta_exec;
+                break;
+        case CPUTIME_SYSTEM:
+        case CPUTIME_IRQ:
+        case CPUTIME_SOFTIRQ:
+                cstat->cputime.stime += delta_exec;
+                break;
+        default:
+                break;
+        }
+        cgroup_cpu_stat_account_end(cgrp, cstat);
+}
+void cgroup_stat_show_cputime(struct seq_file *seq)
+{
+        struct cgroup *cgrp = seq_css(seq)->cgroup;
+        u64 usage, utime, stime;
+        if (!cgroup_parent(cgrp))
+                return;
+        mutex_lock(&cgroup_stat_mutex);
+        cgroup_stat_flush_locked(cgrp);
+        usage = cgrp->stat.cputime.sum_exec_runtime;
+        cputime_adjust(&cgrp->stat.cputime, &cgrp->stat.prev_cputime,
+                       &utime, &stime);
+        mutex_unlock(&cgroup_stat_mutex);
+        do_div(usage, NSEC_PER_USEC);
+        do_div(utime, NSEC_PER_USEC);
+        do_div(stime, NSEC_PER_USEC);
+        seq_printf(seq, "usage_usec %llu\n"
+                   "user_usec %llu\n"
+                   "system_usec %llu\n",
+                   usage, utime, stime);
+}
+int cgroup_stat_init(struct cgroup *cgrp)
+{
+        int cpu;
+        /* the root cgrp has cpu_stat preallocated */
+        if (!cgrp->cpu_stat) {
+                cgrp->cpu_stat = alloc_percpu(struct cgroup_cpu_stat);
+                if (!cgrp->cpu_stat)
+                        return -ENOMEM;
+        }
+        /* ->updated_children list is self terminated */
+        for_each_possible_cpu(cpu)
+                cgroup_cpu_stat(cgrp, cpu)->updated_children = cgrp;
+        prev_cputime_init(&cgrp->stat.prev_cputime);
+        return 0;
+}
+void cgroup_stat_exit(struct cgroup *cgrp)
+{
+        int cpu;
+        cgroup_stat_flush(cgrp);
+        /* sanity check */
+        for_each_possible_cpu(cpu) {
+                struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
+                if (WARN_ON_ONCE(cstat->updated_children != cgrp) ||
+                    WARN_ON_ONCE(cstat->updated_next))
+                        return;
+        }
+        free_percpu(cgrp->cpu_stat);
+        cgrp->cpu_stat = NULL;
+}
+void __init cgroup_stat_boot(void)
+{
+        int cpu;
+        for_each_possible_cpu(cpu)
+                raw_spin_lock_init(per_cpu_ptr(&cgroup_cpu_stat_lock, cpu));
+        BUG_ON(cgroup_stat_init(&cgrp_dfl_root.cgrp));
+}
author	Linus Torvalds <torvalds@linux-foundation.org>	2017-11-15 17:29:44 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2017-11-15 17:29:44 -0500
commit	22714a2ba4b55737cd7d5299db7aaf1fa8287354 (patch)
tree	32b25f2e3e40732156a8a8d0dcb2ddf38410776f /kernel/cgroup
parent	766ec76a27aa9dfdfee3a80f29ddc1f7539c71f9 (diff)
parent	5f2e673405b742be64e7c3604ed4ed3ac14f35ce (diff)

diff --git a/kernel/cgroup/Makefile b/kernel/cgroup/Makefile index ae448f7632cc..2be89a003185 100644 --- a/kernel/cgroup/Makefile +++ b/kernel/cgroup/Makefile
@@ -1,5 +1,5 @@
1	# SPDX-License-Identifier: GPL-2.0	1	# SPDX-License-Identifier: GPL-2.0
2	obj-y := cgroup.o namespace.o cgroup-v1.o	2	obj-y := cgroup.o stat.o namespace.o cgroup-v1.o
3		3
4	obj-$(CONFIG_CGROUP_FREEZER) += freezer.o	4	obj-$(CONFIG_CGROUP_FREEZER) += freezer.o
5	obj-$(CONFIG_CGROUP_PIDS) += pids.o	5	obj-$(CONFIG_CGROUP_PIDS) += pids.o


diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index bf54ade001be..b928b27050c6 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h
@@ -201,6 +201,15 @@ int cgroup_show_path(struct seq_file sf, struct kernfs_node kf_node,
201	int cgroup_task_count(const struct cgroup *cgrp);	201	int cgroup_task_count(const struct cgroup *cgrp);
202		202
203	/*	203	/*
		204	* stat.c
		205	*/
		206	void cgroup_stat_flush(struct cgroup *cgrp);
		207	int cgroup_stat_init(struct cgroup *cgrp);
		208	void cgroup_stat_exit(struct cgroup *cgrp);
		209	void cgroup_stat_show_cputime(struct seq_file *seq);
		210	void cgroup_stat_boot(void);
		211
		212	/*
204	* namespace.c	213	* namespace.c
205	*/	214	*/
206	extern const struct proc_ns_operations cgroupns_operations;	215	extern const struct proc_ns_operations cgroupns_operations;


diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 00f5b358aeac..0b1ffe147f24 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c
@@ -142,12 +142,14 @@ static struct static_key_true *cgroup_subsys_on_dfl_key[] = {
142	};	142	};
143	#undef SUBSYS	143	#undef SUBSYS
144		144
		145	static DEFINE_PER_CPU(struct cgroup_cpu_stat, cgrp_dfl_root_cpu_stat);
		146
145	/*	147	/*
146	* The default hierarchy, reserved for the subsystems that are otherwise	148	* The default hierarchy, reserved for the subsystems that are otherwise
147	* unattached - it never has more than a single cgroup, and all tasks are	149	* unattached - it never has more than a single cgroup, and all tasks are
148	* part of that cgroup.	150	* part of that cgroup.
149	*/	151	*/
150	struct cgroup_root cgrp_dfl_root;	152	struct cgroup_root cgrp_dfl_root = { .cgrp.cpu_stat = &cgrp_dfl_root_cpu_stat };
151	EXPORT_SYMBOL_GPL(cgrp_dfl_root);	153	EXPORT_SYMBOL_GPL(cgrp_dfl_root);
152		154
153	/*	155	/*
@@ -462,6 +464,28 @@ static struct cgroup_subsys_state cgroup_css(struct cgroup cgrp,
462	}	464	}
463		465
464	/**	466	/**
		467	* cgroup_tryget_css - try to get a cgroup's css for the specified subsystem
		468	* @cgrp: the cgroup of interest
		469	* @ss: the subsystem of interest
		470	*
		471	* Find and get @cgrp's css assocaited with @ss. If the css doesn't exist
		472	* or is offline, %NULL is returned.
		473	*/
		474	static struct cgroup_subsys_state cgroup_tryget_css(struct cgroup cgrp,
		475	struct cgroup_subsys *ss)
		476	{
		477	struct cgroup_subsys_state *css;
		478
		479	rcu_read_lock();
		480	css = cgroup_css(cgrp, ss);
		481	if (!css \|\| !css_tryget_online(css))
		482	css = NULL;
		483	rcu_read_unlock();
		484
		485	return css;
		486	}
		487
		488	/**
465	* cgroup_e_css - obtain a cgroup's effective css for the specified subsystem	489	* cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
466	* @cgrp: the cgroup of interest	490	* @cgrp: the cgroup of interest
467	* @ss: the subsystem of interest (%NULL returns @cgrp->self)	491	* @ss: the subsystem of interest (%NULL returns @cgrp->self)
@@ -647,6 +671,14 @@ struct css_set init_css_set = {
647	.cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links),	671	.cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links),
648	.mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node),	672	.mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node),
649	.mg_node = LIST_HEAD_INIT(init_css_set.mg_node),	673	.mg_node = LIST_HEAD_INIT(init_css_set.mg_node),
		674
		675	/*
		676	* The following field is re-initialized when this cset gets linked
		677	* in cgroup_init(). However, let's initialize the field
		678	* statically too so that the default cgroup can be accessed safely
		679	* early during boot.
		680	*/
		681	.dfl_cgrp = &cgrp_dfl_root.cgrp,
650	};	682	};
651		683
652	static int css_set_count = 1; /* 1 for init_css_set */	684	static int css_set_count = 1; /* 1 for init_css_set */
@@ -3315,6 +3347,37 @@ static int cgroup_stat_show(struct seq_file seq, void v)
3315	return 0;	3347	return 0;
3316	}	3348	}
3317		3349
		3350	static int __maybe_unused cgroup_extra_stat_show(struct seq_file *seq,
		3351	struct cgroup *cgrp, int ssid)
		3352	{
		3353	struct cgroup_subsys *ss = cgroup_subsys[ssid];
		3354	struct cgroup_subsys_state *css;
		3355	int ret;
		3356
		3357	if (!ss->css_extra_stat_show)
		3358	return 0;
		3359
		3360	css = cgroup_tryget_css(cgrp, ss);
		3361	if (!css)
		3362	return 0;
		3363
		3364	ret = ss->css_extra_stat_show(seq, css);
		3365	css_put(css);
		3366	return ret;
		3367	}
		3368
		3369	static int cpu_stat_show(struct seq_file seq, void v)
		3370	{
		3371	struct cgroup __maybe_unused *cgrp = seq_css(seq)->cgroup;
		3372	int ret = 0;
		3373
		3374	cgroup_stat_show_cputime(seq);
		3375	#ifdef CONFIG_CGROUP_SCHED
		3376	ret = cgroup_extra_stat_show(seq, cgrp, cpu_cgrp_id);
		3377	#endif
		3378	return ret;
		3379	}
		3380
3318	static int cgroup_file_open(struct kernfs_open_file *of)	3381	static int cgroup_file_open(struct kernfs_open_file *of)
3319	{	3382	{
3320	struct cftype *cft = of->kn->priv;	3383	struct cftype *cft = of->kn->priv;
@@ -4422,6 +4485,11 @@ static struct cftype cgroup_base_files[] = {
4422	.name = "cgroup.stat",	4485	.name = "cgroup.stat",
4423	.seq_show = cgroup_stat_show,	4486	.seq_show = cgroup_stat_show,
4424	},	4487	},
		4488	{
		4489	.name = "cpu.stat",
		4490	.flags = CFTYPE_NOT_ON_ROOT,
		4491	.seq_show = cpu_stat_show,
		4492	},
4425	{ } /* terminate */	4493	{ } /* terminate */
4426	};	4494	};
4427		4495
@@ -4482,6 +4550,8 @@ static void css_free_work_fn(struct work_struct *work)
4482	*/	4550	*/
4483	cgroup_put(cgroup_parent(cgrp));	4551	cgroup_put(cgroup_parent(cgrp));
4484	kernfs_put(cgrp->kn);	4552	kernfs_put(cgrp->kn);
		4553	if (cgroup_on_dfl(cgrp))
		4554	cgroup_stat_exit(cgrp);
4485	kfree(cgrp);	4555	kfree(cgrp);
4486	} else {	4556	} else {
4487	/*	4557	/*
@@ -4526,6 +4596,9 @@ static void css_release_work_fn(struct work_struct *work)
4526	/* cgroup release path */	4596	/* cgroup release path */
4527	trace_cgroup_release(cgrp);	4597	trace_cgroup_release(cgrp);
4528		4598
		4599	if (cgroup_on_dfl(cgrp))
		4600	cgroup_stat_flush(cgrp);
		4601
4529	for (tcgrp = cgroup_parent(cgrp); tcgrp;	4602	for (tcgrp = cgroup_parent(cgrp); tcgrp;
4530	tcgrp = cgroup_parent(tcgrp))	4603	tcgrp = cgroup_parent(tcgrp))
4531	tcgrp->nr_dying_descendants--;	4604	tcgrp->nr_dying_descendants--;
@@ -4709,6 +4782,12 @@ static struct cgroup cgroup_create(struct cgroup parent)
4709	if (ret)	4782	if (ret)
4710	goto out_free_cgrp;	4783	goto out_free_cgrp;
4711		4784
		4785	if (cgroup_on_dfl(parent)) {
		4786	ret = cgroup_stat_init(cgrp);
		4787	if (ret)
		4788	goto out_cancel_ref;
		4789	}
		4790
4712	/*	4791	/*
4713	* Temporarily set the pointer to NULL, so idr_find() won't return	4792	* Temporarily set the pointer to NULL, so idr_find() won't return
4714	* a half-baked cgroup.	4793	* a half-baked cgroup.
@@ -4716,7 +4795,7 @@ static struct cgroup cgroup_create(struct cgroup parent)
4716	cgrp->id = cgroup_idr_alloc(&root->cgroup_idr, NULL, 2, 0, GFP_KERNEL);	4795	cgrp->id = cgroup_idr_alloc(&root->cgroup_idr, NULL, 2, 0, GFP_KERNEL);
4717	if (cgrp->id < 0) {	4796	if (cgrp->id < 0) {
4718	ret = -ENOMEM;	4797	ret = -ENOMEM;
4719	goto out_cancel_ref;	4798	goto out_stat_exit;
4720	}	4799	}
4721		4800
4722	init_cgroup_housekeeping(cgrp);	4801	init_cgroup_housekeeping(cgrp);
@@ -4767,6 +4846,9 @@ static struct cgroup cgroup_create(struct cgroup parent)
4767		4846
4768	out_idr_free:	4847	out_idr_free:
4769	cgroup_idr_remove(&root->cgroup_idr, cgrp->id);	4848	cgroup_idr_remove(&root->cgroup_idr, cgrp->id);
		4849	out_stat_exit:
		4850	if (cgroup_on_dfl(parent))
		4851	cgroup_stat_exit(cgrp);
4770	out_cancel_ref:	4852	out_cancel_ref:
4771	percpu_ref_exit(&cgrp->self.refcnt);	4853	percpu_ref_exit(&cgrp->self.refcnt);
4772	out_free_cgrp:	4854	out_free_cgrp:
@@ -5161,6 +5243,8 @@ int __init cgroup_init(void)
5161	BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files));	5243	BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files));
5162	BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files));	5244	BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files));
5163		5245
		5246	cgroup_stat_boot();
		5247
5164	/*	5248	/*
5165	* The latency of the synchronize_sched() is too high for cgroups,	5249	* The latency of the synchronize_sched() is too high for cgroups,
5166	* avoid it at the cost of forcing all readers into the slow path.	5250	* avoid it at the cost of forcing all readers into the slow path.
@@ -5780,3 +5864,72 @@ int cgroup_bpf_query(struct cgroup cgrp, const union bpf_attr attr,
5780	return ret;	5864	return ret;
5781	}	5865	}
5782	#endif /* CONFIG_CGROUP_BPF */	5866	#endif /* CONFIG_CGROUP_BPF */
		5867
		5868	#ifdef CONFIG_SYSFS
		5869	static ssize_t show_delegatable_files(struct cftype files, char buf,
		5870	ssize_t size, const char *prefix)
		5871	{
		5872	struct cftype *cft;
		5873	ssize_t ret = 0;
		5874
		5875	for (cft = files; cft && cft->name[0] != '\0'; cft++) {
		5876	if (!(cft->flags & CFTYPE_NS_DELEGATABLE))
		5877	continue;
		5878
		5879	if (prefix)
		5880	ret += snprintf(buf + ret, size - ret, "%s.", prefix);
		5881
		5882	ret += snprintf(buf + ret, size - ret, "%s\n", cft->name);
		5883
		5884	if (unlikely(ret >= size)) {
		5885	WARN_ON(1);
		5886	break;
		5887	}
		5888	}
		5889
		5890	return ret;
		5891	}
		5892
		5893	static ssize_t delegate_show(struct kobject kobj, struct kobj_attribute attr,
		5894	char *buf)
		5895	{
		5896	struct cgroup_subsys *ss;
		5897	int ssid;
		5898	ssize_t ret = 0;
		5899
		5900	ret = show_delegatable_files(cgroup_base_files, buf, PAGE_SIZE - ret,
		5901	NULL);
		5902
		5903	for_each_subsys(ss, ssid)
		5904	ret += show_delegatable_files(ss->dfl_cftypes, buf + ret,
		5905	PAGE_SIZE - ret,
		5906	cgroup_subsys_name[ssid]);
		5907
		5908	return ret;
		5909	}
		5910	static struct kobj_attribute cgroup_delegate_attr = __ATTR_RO(delegate);
		5911
		5912	static ssize_t features_show(struct kobject kobj, struct kobj_attribute attr,
		5913	char *buf)
		5914	{
		5915	return snprintf(buf, PAGE_SIZE, "nsdelegate\n");
		5916	}
		5917	static struct kobj_attribute cgroup_features_attr = __ATTR_RO(features);
		5918
		5919	static struct attribute *cgroup_sysfs_attrs[] = {
		5920	&cgroup_delegate_attr.attr,
		5921	&cgroup_features_attr.attr,
		5922	NULL,
		5923	};
		5924
		5925	static const struct attribute_group cgroup_sysfs_attr_group = {
		5926	.attrs = cgroup_sysfs_attrs,
		5927	.name = "cgroup",
		5928	};
		5929
		5930	static int __init cgroup_sysfs_init(void)
		5931	{
		5932	return sysfs_create_group(kernel_kobj, &cgroup_sysfs_attr_group);
		5933	}
		5934	subsys_initcall(cgroup_sysfs_init);
		5935	#endif /* CONFIG_SYSFS */


diff --git a/kernel/cgroup/stat.c b/kernel/cgroup/stat.c new file mode 100644 index 000000000000..133b465691d6 --- /dev/null +++ b/kernel/cgroup/stat.c
@@ -0,0 +1,334 @@
		1	#include "cgroup-internal.h"
		2
		3	#include <linux/sched/cputime.h>
		4
		5	static DEFINE_MUTEX(cgroup_stat_mutex);
		6	static DEFINE_PER_CPU(raw_spinlock_t, cgroup_cpu_stat_lock);
		7
		8	static struct cgroup_cpu_stat cgroup_cpu_stat(struct cgroup cgrp, int cpu)
		9	{
		10	return per_cpu_ptr(cgrp->cpu_stat, cpu);
		11	}
		12
		13	/**
		14	* cgroup_cpu_stat_updated - keep track of updated cpu_stat
		15	* @cgrp: target cgroup
		16	* @cpu: cpu on which cpu_stat was updated
		17	*
		18	* @cgrp's cpu_stat on @cpu was updated. Put it on the parent's matching
		19	* cpu_stat->updated_children list. See the comment on top of
		20	* cgroup_cpu_stat definition for details.
		21	*/
		22	static void cgroup_cpu_stat_updated(struct cgroup *cgrp, int cpu)
		23	{
		24	raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_cpu_stat_lock, cpu);
		25	struct cgroup *parent;
		26	unsigned long flags;
		27
		28	/*
		29	* Speculative already-on-list test. This may race leading to
		30	* temporary inaccuracies, which is fine.
		31	*
		32	* Because @parent's updated_children is terminated with @parent
		33	* instead of NULL, we can tell whether @cgrp is on the list by
		34	* testing the next pointer for NULL.
		35	*/
		36	if (cgroup_cpu_stat(cgrp, cpu)->updated_next)
		37	return;
		38
		39	raw_spin_lock_irqsave(cpu_lock, flags);
		40
		41	/* put @cgrp and all ancestors on the corresponding updated lists */
		42	for (parent = cgroup_parent(cgrp); parent;
		43	cgrp = parent, parent = cgroup_parent(cgrp)) {
		44	struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
		45	struct cgroup_cpu_stat *pcstat = cgroup_cpu_stat(parent, cpu);
		46
		47	/*
		48	* Both additions and removals are bottom-up. If a cgroup
		49	* is already in the tree, all ancestors are.
		50	*/
		51	if (cstat->updated_next)
		52	break;
		53
		54	cstat->updated_next = pcstat->updated_children;
		55	pcstat->updated_children = cgrp;
		56	}
		57
		58	raw_spin_unlock_irqrestore(cpu_lock, flags);
		59	}
		60
		61	/**
		62	* cgroup_cpu_stat_pop_updated - iterate and dismantle cpu_stat updated tree
		63	* @pos: current position
		64	* @root: root of the tree to traversal
		65	* @cpu: target cpu
		66	*
		67	* Walks the udpated cpu_stat tree on @cpu from @root. %NULL @pos starts
		68	* the traversal and %NULL return indicates the end. During traversal,
		69	* each returned cgroup is unlinked from the tree. Must be called with the
		70	* matching cgroup_cpu_stat_lock held.
		71	*
		72	* The only ordering guarantee is that, for a parent and a child pair
		73	* covered by a given traversal, if a child is visited, its parent is
		74	* guaranteed to be visited afterwards.
		75	*/
		76	static struct cgroup cgroup_cpu_stat_pop_updated(struct cgroup pos,
		77	struct cgroup *root, int cpu)
		78	{
		79	struct cgroup_cpu_stat *cstat;
		80	struct cgroup *parent;
		81
		82	if (pos == root)
		83	return NULL;
		84
		85	/*
		86	* We're gonna walk down to the first leaf and visit/remove it. We
		87	* can pick whatever unvisited node as the starting point.
		88	*/
		89	if (!pos)
		90	pos = root;
		91	else
		92	pos = cgroup_parent(pos);
		93
		94	/* walk down to the first leaf */
		95	while (true) {
		96	cstat = cgroup_cpu_stat(pos, cpu);
		97	if (cstat->updated_children == pos)
		98	break;
		99	pos = cstat->updated_children;
		100	}
		101
		102	/*
		103	* Unlink @pos from the tree. As the updated_children list is
		104	* singly linked, we have to walk it to find the removal point.
		105	* However, due to the way we traverse, @pos will be the first
		106	* child in most cases. The only exception is @root.
		107	*/
		108	parent = cgroup_parent(pos);
		109	if (parent && cstat->updated_next) {
		110	struct cgroup_cpu_stat *pcstat = cgroup_cpu_stat(parent, cpu);
		111	struct cgroup_cpu_stat *ncstat;
		112	struct cgroup **nextp;
		113
		114	nextp = &pcstat->updated_children;
		115	while (true) {
		116	ncstat = cgroup_cpu_stat(*nextp, cpu);
		117	if (*nextp == pos)
		118	break;
		119
		120	WARN_ON_ONCE(*nextp == parent);
		121	nextp = &ncstat->updated_next;
		122	}
		123
		124	*nextp = cstat->updated_next;
		125	cstat->updated_next = NULL;
		126	}
		127
		128	return pos;
		129	}
		130
		131	static void cgroup_stat_accumulate(struct cgroup_stat *dst_stat,
		132	struct cgroup_stat *src_stat)
		133	{
		134	dst_stat->cputime.utime += src_stat->cputime.utime;
		135	dst_stat->cputime.stime += src_stat->cputime.stime;
		136	dst_stat->cputime.sum_exec_runtime += src_stat->cputime.sum_exec_runtime;
		137	}
		138
		139	static void cgroup_cpu_stat_flush_one(struct cgroup *cgrp, int cpu)
		140	{
		141	struct cgroup *parent = cgroup_parent(cgrp);
		142	struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
		143	struct task_cputime *last_cputime = &cstat->last_cputime;
		144	struct task_cputime cputime;
		145	struct cgroup_stat delta;
		146	unsigned seq;
		147
		148	lockdep_assert_held(&cgroup_stat_mutex);
		149
		150	/* fetch the current per-cpu values */
		151	do {
		152	seq = __u64_stats_fetch_begin(&cstat->sync);
		153	cputime = cstat->cputime;
		154	} while (__u64_stats_fetch_retry(&cstat->sync, seq));
		155
		156	/* accumulate the deltas to propgate */
		157	delta.cputime.utime = cputime.utime - last_cputime->utime;
		158	delta.cputime.stime = cputime.stime - last_cputime->stime;
		159	delta.cputime.sum_exec_runtime = cputime.sum_exec_runtime -
		160	last_cputime->sum_exec_runtime;
		161	*last_cputime = cputime;
		162
		163	/* transfer the pending stat into delta */
		164	cgroup_stat_accumulate(&delta, &cgrp->pending_stat);
		165	memset(&cgrp->pending_stat, 0, sizeof(cgrp->pending_stat));
		166
		167	/* propagate delta into the global stat and the parent's pending */
		168	cgroup_stat_accumulate(&cgrp->stat, &delta);
		169	if (parent)
		170	cgroup_stat_accumulate(&parent->pending_stat, &delta);
		171	}
		172
		173	/* see cgroup_stat_flush() */
		174	static void cgroup_stat_flush_locked(struct cgroup *cgrp)
		175	{
		176	int cpu;
		177
		178	lockdep_assert_held(&cgroup_stat_mutex);
		179
		180	for_each_possible_cpu(cpu) {
		181	raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_cpu_stat_lock, cpu);
		182	struct cgroup *pos = NULL;
		183
		184	raw_spin_lock_irq(cpu_lock);
		185	while ((pos = cgroup_cpu_stat_pop_updated(pos, cgrp, cpu)))
		186	cgroup_cpu_stat_flush_one(pos, cpu);
		187	raw_spin_unlock_irq(cpu_lock);
		188	}
		189	}
		190
		191	/**
		192	* cgroup_stat_flush - flush stats in @cgrp's subtree
		193	* @cgrp: target cgroup
		194	*
		195	* Collect all per-cpu stats in @cgrp's subtree into the global counters
		196	* and propagate them upwards. After this function returns, all cgroups in
		197	* the subtree have up-to-date ->stat.
		198	*
		199	* This also gets all cgroups in the subtree including @cgrp off the
		200	* ->updated_children lists.
		201	*/
		202	void cgroup_stat_flush(struct cgroup *cgrp)
		203	{
		204	mutex_lock(&cgroup_stat_mutex);
		205	cgroup_stat_flush_locked(cgrp);
		206	mutex_unlock(&cgroup_stat_mutex);
		207	}
		208
		209	static struct cgroup_cpu_stat cgroup_cpu_stat_account_begin(struct cgroup cgrp)
		210	{
		211	struct cgroup_cpu_stat *cstat;
		212
		213	cstat = get_cpu_ptr(cgrp->cpu_stat);
		214	u64_stats_update_begin(&cstat->sync);
		215	return cstat;
		216	}
		217
		218	static void cgroup_cpu_stat_account_end(struct cgroup *cgrp,
		219	struct cgroup_cpu_stat *cstat)
		220	{
		221	u64_stats_update_end(&cstat->sync);
		222	cgroup_cpu_stat_updated(cgrp, smp_processor_id());
		223	put_cpu_ptr(cstat);
		224	}
		225
		226	void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec)
		227	{
		228	struct cgroup_cpu_stat *cstat;
		229
		230	cstat = cgroup_cpu_stat_account_begin(cgrp);
		231	cstat->cputime.sum_exec_runtime += delta_exec;
		232	cgroup_cpu_stat_account_end(cgrp, cstat);
		233	}
		234
		235	void __cgroup_account_cputime_field(struct cgroup *cgrp,
		236	enum cpu_usage_stat index, u64 delta_exec)
		237	{
		238	struct cgroup_cpu_stat *cstat;
		239
		240	cstat = cgroup_cpu_stat_account_begin(cgrp);
		241
		242	switch (index) {
		243	case CPUTIME_USER:
		244	case CPUTIME_NICE:
		245	cstat->cputime.utime += delta_exec;
		246	break;
		247	case CPUTIME_SYSTEM:
		248	case CPUTIME_IRQ:
		249	case CPUTIME_SOFTIRQ:
		250	cstat->cputime.stime += delta_exec;
		251	break;
		252	default:
		253	break;
		254	}
		255
		256	cgroup_cpu_stat_account_end(cgrp, cstat);
		257	}
		258
		259	void cgroup_stat_show_cputime(struct seq_file *seq)
		260	{
		261	struct cgroup *cgrp = seq_css(seq)->cgroup;
		262	u64 usage, utime, stime;
		263
		264	if (!cgroup_parent(cgrp))
		265	return;
		266
		267	mutex_lock(&cgroup_stat_mutex);
		268
		269	cgroup_stat_flush_locked(cgrp);
		270
		271	usage = cgrp->stat.cputime.sum_exec_runtime;
		272	cputime_adjust(&cgrp->stat.cputime, &cgrp->stat.prev_cputime,
		273	&utime, &stime);
		274
		275	mutex_unlock(&cgroup_stat_mutex);
		276
		277	do_div(usage, NSEC_PER_USEC);
		278	do_div(utime, NSEC_PER_USEC);
		279	do_div(stime, NSEC_PER_USEC);
		280
		281	seq_printf(seq, "usage_usec %llu\n"
		282	"user_usec %llu\n"
		283	"system_usec %llu\n",
		284	usage, utime, stime);
		285	}
		286
		287	int cgroup_stat_init(struct cgroup *cgrp)
		288	{
		289	int cpu;
		290
		291	/* the root cgrp has cpu_stat preallocated */
		292	if (!cgrp->cpu_stat) {
		293	cgrp->cpu_stat = alloc_percpu(struct cgroup_cpu_stat);
		294	if (!cgrp->cpu_stat)
		295	return -ENOMEM;
		296	}
		297
		298	/* ->updated_children list is self terminated */
		299	for_each_possible_cpu(cpu)
		300	cgroup_cpu_stat(cgrp, cpu)->updated_children = cgrp;
		301
		302	prev_cputime_init(&cgrp->stat.prev_cputime);
		303
		304	return 0;
		305	}
		306
		307	void cgroup_stat_exit(struct cgroup *cgrp)
		308	{
		309	int cpu;
		310
		311	cgroup_stat_flush(cgrp);
		312
		313	/* sanity check */
		314	for_each_possible_cpu(cpu) {
		315	struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
		316
		317	if (WARN_ON_ONCE(cstat->updated_children != cgrp) \|\|
		318	WARN_ON_ONCE(cstat->updated_next))
		319	return;
		320	}
		321
		322	free_percpu(cgrp->cpu_stat);
		323	cgrp->cpu_stat = NULL;
		324	}
		325
		326	void __init cgroup_stat_boot(void)
		327	{
		328	int cpu;
		329
		330	for_each_possible_cpu(cpu)
		331	raw_spin_lock_init(per_cpu_ptr(&cgroup_cpu_stat_lock, cpu));
		332
		333	BUG_ON(cgroup_stat_init(&cgrp_dfl_root.cgrp));
		334	}