aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2017-05-15 09:34:02 -0400
committerTejun Heo <tj@kernel.org>2017-07-21 11:14:51 -0400
commit454000adaa2a7420df6e56a42f22726d05872a3f (patch)
treef4cd89577cc86f65822adf1fb238ad32c0f9007f
parentbc2fb7ed089ffd16d26e1d95b898a37d2b37d201 (diff)
cgroup: introduce cgroup->dom_cgrp and threaded css_set handling
cgroup v2 is in the process of growing thread granularity support. A threaded subtree is composed of a thread root and threaded cgroups which are proper members of the subtree. The root cgroup of the subtree serves as the domain cgroup to which the processes (as opposed to threads / tasks) of the subtree conceptually belong and domain-level resource consumptions not tied to any specific task are charged. Inside the subtree, threads won't be subject to process granularity or no-internal-task constraint and can be distributed arbitrarily across the subtree. This patch introduces cgroup->dom_cgrp along with threaded css_set handling. * cgroup->dom_cgrp points to self for normal and thread roots. For proper thread subtree members, points to the dom_cgrp (the thread root). * css_set->dom_cset points to self if for normal and thread roots. If threaded, points to the css_set which belongs to the cgrp->dom_cgrp. The dom_cgrp serves as the resource domain and keeps the matching csses available. The dom_cset holds those csses and makes them easily accessible. * All threaded csets are linked on their dom_csets to enable iteration of all threaded tasks. * cgroup->nr_threaded_children keeps track of the number of threaded children. This patch adds the above but doesn't actually use them yet. The following patches will build on top. v4: ->nr_threaded_children added. v3: ->proc_cgrp/cset renamed to ->dom_cgrp/cset. Updated for the new enable-threaded-per-cgroup behavior. v2: Added cgroup_is_threaded() helper. Signed-off-by: Tejun Heo <tj@kernel.org>
-rw-r--r--include/linux/cgroup-defs.h33
-rw-r--r--include/linux/cgroup.h3
-rw-r--r--kernel/cgroup/cgroup.c69
3 files changed, 97 insertions, 8 deletions
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index ae7bc1e70085..651c4363c85e 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -172,6 +172,14 @@ struct css_set {
172 /* reference count */ 172 /* reference count */
173 refcount_t refcount; 173 refcount_t refcount;
174 174
175 /*
176 * For a domain cgroup, the following points to self. If threaded,
177 * to the matching cset of the nearest domain ancestor. The
178 * dom_cset provides access to the domain cgroup and its csses to
179 * which domain level resource consumptions should be charged.
180 */
181 struct css_set *dom_cset;
182
175 /* the default cgroup associated with this css_set */ 183 /* the default cgroup associated with this css_set */
176 struct cgroup *dfl_cgrp; 184 struct cgroup *dfl_cgrp;
177 185
@@ -200,6 +208,10 @@ struct css_set {
200 */ 208 */
201 struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; 209 struct list_head e_cset_node[CGROUP_SUBSYS_COUNT];
202 210
211 /* all threaded csets whose ->dom_cset points to this cset */
212 struct list_head threaded_csets;
213 struct list_head threaded_csets_node;
214
203 /* 215 /*
204 * List running through all cgroup groups in the same hash 216 * List running through all cgroup groups in the same hash
205 * slot. Protected by css_set_lock 217 * slot. Protected by css_set_lock
@@ -267,12 +279,16 @@ struct cgroup {
267 * doesn't have any tasks. 279 * doesn't have any tasks.
268 * 280 *
269 * All children which have non-zero nr_populated_csets and/or 281 * All children which have non-zero nr_populated_csets and/or
270 * nr_populated_children of their own contribute one to 282 * nr_populated_children of their own contribute one to either
271 * nr_populated_children. The counter is zero iff this cgroup's 283 * nr_populated_domain_children or nr_populated_threaded_children
272 * subtree proper doesn't have any tasks. 284 * depending on their type. Each counter is zero iff all cgroups
285 * of the type in the subtree proper don't have any tasks.
273 */ 286 */
274 int nr_populated_csets; 287 int nr_populated_csets;
275 int nr_populated_children; 288 int nr_populated_domain_children;
289 int nr_populated_threaded_children;
290
291 int nr_threaded_children; /* # of live threaded child cgroups */
276 292
277 struct kernfs_node *kn; /* cgroup kernfs entry */ 293 struct kernfs_node *kn; /* cgroup kernfs entry */
278 struct cgroup_file procs_file; /* handle for "cgroup.procs" */ 294 struct cgroup_file procs_file; /* handle for "cgroup.procs" */
@@ -311,6 +327,15 @@ struct cgroup {
311 struct list_head e_csets[CGROUP_SUBSYS_COUNT]; 327 struct list_head e_csets[CGROUP_SUBSYS_COUNT];
312 328
313 /* 329 /*
330 * If !threaded, self. If threaded, it points to the nearest
331 * domain ancestor. Inside a threaded subtree, cgroups are exempt
332 * from process granularity and no-internal-task constraint.
333 * Domain level resource consumptions which aren't tied to a
334 * specific task are charged to the dom_cgrp.
335 */
336 struct cgroup *dom_cgrp;
337
338 /*
314 * list of pidlists, up to two for each namespace (one for procs, one 339 * list of pidlists, up to two for each namespace (one for procs, one
315 * for tasks); created on demand. 340 * for tasks); created on demand.
316 */ 341 */
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index cae5831ae650..b7dd23040cd5 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -541,7 +541,8 @@ static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
541/* no synchronization, the result can only be used as a hint */ 541/* no synchronization, the result can only be used as a hint */
542static inline bool cgroup_is_populated(struct cgroup *cgrp) 542static inline bool cgroup_is_populated(struct cgroup *cgrp)
543{ 543{
544 return cgrp->nr_populated_csets + cgrp->nr_populated_children; 544 return cgrp->nr_populated_csets + cgrp->nr_populated_domain_children +
545 cgrp->nr_populated_threaded_children;
545} 546}
546 547
547/* returns ino associated with a cgroup */ 548/* returns ino associated with a cgroup */
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 3c5a37a9a892..c7e1c243b77d 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -330,6 +330,11 @@ static bool cgroup_has_tasks(struct cgroup *cgrp)
330 return cgrp->nr_populated_csets; 330 return cgrp->nr_populated_csets;
331} 331}
332 332
333static bool cgroup_is_threaded(struct cgroup *cgrp)
334{
335 return cgrp->dom_cgrp != cgrp;
336}
337
333/* subsystems visibly enabled on a cgroup */ 338/* subsystems visibly enabled on a cgroup */
334static u16 cgroup_control(struct cgroup *cgrp) 339static u16 cgroup_control(struct cgroup *cgrp)
335{ 340{
@@ -565,9 +570,11 @@ EXPORT_SYMBOL_GPL(of_css);
565 */ 570 */
566struct css_set init_css_set = { 571struct css_set init_css_set = {
567 .refcount = REFCOUNT_INIT(1), 572 .refcount = REFCOUNT_INIT(1),
573 .dom_cset = &init_css_set,
568 .tasks = LIST_HEAD_INIT(init_css_set.tasks), 574 .tasks = LIST_HEAD_INIT(init_css_set.tasks),
569 .mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks), 575 .mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks),
570 .task_iters = LIST_HEAD_INIT(init_css_set.task_iters), 576 .task_iters = LIST_HEAD_INIT(init_css_set.task_iters),
577 .threaded_csets = LIST_HEAD_INIT(init_css_set.threaded_csets),
571 .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links), 578 .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links),
572 .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node), 579 .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node),
573 .mg_node = LIST_HEAD_INIT(init_css_set.mg_node), 580 .mg_node = LIST_HEAD_INIT(init_css_set.mg_node),
@@ -575,6 +582,11 @@ struct css_set init_css_set = {
575 582
576static int css_set_count = 1; /* 1 for init_css_set */ 583static int css_set_count = 1; /* 1 for init_css_set */
577 584
585static bool css_set_threaded(struct css_set *cset)
586{
587 return cset->dom_cset != cset;
588}
589
578/** 590/**
579 * css_set_populated - does a css_set contain any tasks? 591 * css_set_populated - does a css_set contain any tasks?
580 * @cset: target css_set 592 * @cset: target css_set
@@ -618,10 +630,14 @@ static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
618 do { 630 do {
619 bool was_populated = cgroup_is_populated(cgrp); 631 bool was_populated = cgroup_is_populated(cgrp);
620 632
621 if (!child) 633 if (!child) {
622 cgrp->nr_populated_csets += adj; 634 cgrp->nr_populated_csets += adj;
623 else 635 } else {
624 cgrp->nr_populated_children += adj; 636 if (cgroup_is_threaded(child))
637 cgrp->nr_populated_threaded_children += adj;
638 else
639 cgrp->nr_populated_domain_children += adj;
640 }
625 641
626 if (was_populated == cgroup_is_populated(cgrp)) 642 if (was_populated == cgroup_is_populated(cgrp))
627 break; 643 break;
@@ -747,6 +763,8 @@ void put_css_set_locked(struct css_set *cset)
747 if (!refcount_dec_and_test(&cset->refcount)) 763 if (!refcount_dec_and_test(&cset->refcount))
748 return; 764 return;
749 765
766 WARN_ON_ONCE(!list_empty(&cset->threaded_csets));
767
750 /* This css_set is dead. unlink it and release cgroup and css refs */ 768 /* This css_set is dead. unlink it and release cgroup and css refs */
751 for_each_subsys(ss, ssid) { 769 for_each_subsys(ss, ssid) {
752 list_del(&cset->e_cset_node[ssid]); 770 list_del(&cset->e_cset_node[ssid]);
@@ -763,6 +781,11 @@ void put_css_set_locked(struct css_set *cset)
763 kfree(link); 781 kfree(link);
764 } 782 }
765 783
784 if (css_set_threaded(cset)) {
785 list_del(&cset->threaded_csets_node);
786 put_css_set_locked(cset->dom_cset);
787 }
788
766 kfree_rcu(cset, rcu_head); 789 kfree_rcu(cset, rcu_head);
767} 790}
768 791
@@ -781,6 +804,7 @@ static bool compare_css_sets(struct css_set *cset,
781 struct cgroup *new_cgrp, 804 struct cgroup *new_cgrp,
782 struct cgroup_subsys_state *template[]) 805 struct cgroup_subsys_state *template[])
783{ 806{
807 struct cgroup *new_dfl_cgrp;
784 struct list_head *l1, *l2; 808 struct list_head *l1, *l2;
785 809
786 /* 810 /*
@@ -791,6 +815,16 @@ static bool compare_css_sets(struct css_set *cset,
791 if (memcmp(template, cset->subsys, sizeof(cset->subsys))) 815 if (memcmp(template, cset->subsys, sizeof(cset->subsys)))
792 return false; 816 return false;
793 817
818
819 /* @cset's domain should match the default cgroup's */
820 if (cgroup_on_dfl(new_cgrp))
821 new_dfl_cgrp = new_cgrp;
822 else
823 new_dfl_cgrp = old_cset->dfl_cgrp;
824
825 if (new_dfl_cgrp->dom_cgrp != cset->dom_cset->dfl_cgrp)
826 return false;
827
794 /* 828 /*
795 * Compare cgroup pointers in order to distinguish between 829 * Compare cgroup pointers in order to distinguish between
796 * different cgroups in hierarchies. As different cgroups may 830 * different cgroups in hierarchies. As different cgroups may
@@ -998,9 +1032,11 @@ static struct css_set *find_css_set(struct css_set *old_cset,
998 } 1032 }
999 1033
1000 refcount_set(&cset->refcount, 1); 1034 refcount_set(&cset->refcount, 1);
1035 cset->dom_cset = cset;
1001 INIT_LIST_HEAD(&cset->tasks); 1036 INIT_LIST_HEAD(&cset->tasks);
1002 INIT_LIST_HEAD(&cset->mg_tasks); 1037 INIT_LIST_HEAD(&cset->mg_tasks);
1003 INIT_LIST_HEAD(&cset->task_iters); 1038 INIT_LIST_HEAD(&cset->task_iters);
1039 INIT_LIST_HEAD(&cset->threaded_csets);
1004 INIT_HLIST_NODE(&cset->hlist); 1040 INIT_HLIST_NODE(&cset->hlist);
1005 INIT_LIST_HEAD(&cset->cgrp_links); 1041 INIT_LIST_HEAD(&cset->cgrp_links);
1006 INIT_LIST_HEAD(&cset->mg_preload_node); 1042 INIT_LIST_HEAD(&cset->mg_preload_node);
@@ -1038,6 +1074,28 @@ static struct css_set *find_css_set(struct css_set *old_cset,
1038 1074
1039 spin_unlock_irq(&css_set_lock); 1075 spin_unlock_irq(&css_set_lock);
1040 1076
1077 /*
1078 * If @cset should be threaded, look up the matching dom_cset and
1079 * link them up. We first fully initialize @cset then look for the
1080 * dom_cset. It's simpler this way and safe as @cset is guaranteed
1081 * to stay empty until we return.
1082 */
1083 if (cgroup_is_threaded(cset->dfl_cgrp)) {
1084 struct css_set *dcset;
1085
1086 dcset = find_css_set(cset, cset->dfl_cgrp->dom_cgrp);
1087 if (!dcset) {
1088 put_css_set(cset);
1089 return NULL;
1090 }
1091
1092 spin_lock_irq(&css_set_lock);
1093 cset->dom_cset = dcset;
1094 list_add_tail(&cset->threaded_csets_node,
1095 &dcset->threaded_csets);
1096 spin_unlock_irq(&css_set_lock);
1097 }
1098
1041 return cset; 1099 return cset;
1042} 1100}
1043 1101
@@ -1680,6 +1738,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
1680 mutex_init(&cgrp->pidlist_mutex); 1738 mutex_init(&cgrp->pidlist_mutex);
1681 cgrp->self.cgroup = cgrp; 1739 cgrp->self.cgroup = cgrp;
1682 cgrp->self.flags |= CSS_ONLINE; 1740 cgrp->self.flags |= CSS_ONLINE;
1741 cgrp->dom_cgrp = cgrp;
1683 1742
1684 for_each_subsys(ss, ssid) 1743 for_each_subsys(ss, ssid)
1685 INIT_LIST_HEAD(&cgrp->e_csets[ssid]); 1744 INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
@@ -4408,6 +4467,7 @@ static void kill_css(struct cgroup_subsys_state *css)
4408static int cgroup_destroy_locked(struct cgroup *cgrp) 4467static int cgroup_destroy_locked(struct cgroup *cgrp)
4409 __releases(&cgroup_mutex) __acquires(&cgroup_mutex) 4468 __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
4410{ 4469{
4470 struct cgroup *parent = cgroup_parent(cgrp);
4411 struct cgroup_subsys_state *css; 4471 struct cgroup_subsys_state *css;
4412 struct cgrp_cset_link *link; 4472 struct cgrp_cset_link *link;
4413 int ssid; 4473 int ssid;
@@ -4452,6 +4512,9 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
4452 */ 4512 */
4453 kernfs_remove(cgrp->kn); 4513 kernfs_remove(cgrp->kn);
4454 4514
4515 if (parent && cgroup_is_threaded(cgrp))
4516 parent->nr_threaded_children--;
4517
4455 cgroup1_check_for_release(cgroup_parent(cgrp)); 4518 cgroup1_check_for_release(cgroup_parent(cgrp));
4456 4519
4457 /* put the base reference */ 4520 /* put the base reference */