aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cgroup.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2014-04-25 18:28:02 -0400
committerTejun Heo <tj@kernel.org>2014-04-25 18:28:02 -0400
commit842b597ee0a7e1aa5a3148164ffdba00ec17f614 (patch)
tree545209e6b3830a92bae889d41182d15dcccc01aa /kernel/cgroup.c
parent50bce01b0ee34ab9f18a2d5a7467053dda355d30 (diff)
cgroup: implement cgroup.populated for the default hierarchy
cgroup users often need a way to determine when a cgroup's subhierarchy becomes empty so that it can be cleaned up. cgroup currently provides release_agent for it; unfortunately, this mechanism is riddled with issues. * It delivers events by forking and execing a userland binary specified as the release_agent. This is a long deprecated method of notification delivery. It's extremely heavy, slow and cumbersome to integrate with larger infrastructure. * There is single monitoring point at the root. There's no way to delegate management of a subtree. * The event isn't recursive. It triggers when a cgroup doesn't have any tasks or child cgroups. Events for internal nodes trigger only after all children are removed. This again makes it impossible to delegate management of a subtree. * Events are filtered from the kernel side. "notify_on_release" file is used to subscribe to or suppress release event. This is unnecessarily complicated and probably done this way because event delivery itself was expensive. This patch implements interface file "cgroup.populated" which can be used to monitor whether the cgroup's subhierarchy has tasks in it or not. Its value is 0 if there is no task in the cgroup and its descendants; otherwise, 1, and kernfs_notify() notificaiton is triggers when the value changes, which can be monitored through poll and [di]notify. This is a lot ligther and simpler and trivially allows delegating management of subhierarchy - subhierarchy monitoring can block further propgation simply by putting itself or another process in the root of the subhierarchy and monitor events that it's interested in from there without interfering with monitoring higher in the tree. v2: Patch description updated as per Serge. v3: "cgroup.subtree_populated" renamed to "cgroup.populated". The subtree_ prefix was a bit confusing because "cgroup.subtree_control" uses it to denote the tree rooted at the cgroup sans the cgroup itself while the populated state includes the cgroup itself. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Serge Hallyn <serge.hallyn@ubuntu.com> Acked-by: Li Zefan <lizefan@huawei.com> Cc: Lennart Poettering <lennart@poettering.net>
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r--kernel/cgroup.c65
1 files changed, 61 insertions, 4 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 809dd903ceb8..0f986f7afee4 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -411,6 +411,43 @@ static struct css_set init_css_set = {
411 411
412static int css_set_count = 1; /* 1 for init_css_set */ 412static int css_set_count = 1; /* 1 for init_css_set */
413 413
414/**
415 * cgroup_update_populated - updated populated count of a cgroup
416 * @cgrp: the target cgroup
417 * @populated: inc or dec populated count
418 *
419 * @cgrp is either getting the first task (css_set) or losing the last.
420 * Update @cgrp->populated_cnt accordingly. The count is propagated
421 * towards root so that a given cgroup's populated_cnt is zero iff the
422 * cgroup and all its descendants are empty.
423 *
424 * @cgrp's interface file "cgroup.populated" is zero if
425 * @cgrp->populated_cnt is zero and 1 otherwise. When @cgrp->populated_cnt
426 * changes from or to zero, userland is notified that the content of the
427 * interface file has changed. This can be used to detect when @cgrp and
428 * its descendants become populated or empty.
429 */
430static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
431{
432 lockdep_assert_held(&css_set_rwsem);
433
434 do {
435 bool trigger;
436
437 if (populated)
438 trigger = !cgrp->populated_cnt++;
439 else
440 trigger = !--cgrp->populated_cnt;
441
442 if (!trigger)
443 break;
444
445 if (cgrp->populated_kn)
446 kernfs_notify(cgrp->populated_kn);
447 cgrp = cgrp->parent;
448 } while (cgrp);
449}
450
414/* 451/*
415 * hash table for cgroup groups. This improves the performance to find 452 * hash table for cgroup groups. This improves the performance to find
416 * an existing css_set. This hash doesn't (currently) take into 453 * an existing css_set. This hash doesn't (currently) take into
@@ -456,10 +493,13 @@ static void put_css_set_locked(struct css_set *cset, bool taskexit)
456 list_del(&link->cgrp_link); 493 list_del(&link->cgrp_link);
457 494
458 /* @cgrp can't go away while we're holding css_set_rwsem */ 495 /* @cgrp can't go away while we're holding css_set_rwsem */
459 if (list_empty(&cgrp->cset_links) && notify_on_release(cgrp)) { 496 if (list_empty(&cgrp->cset_links)) {
460 if (taskexit) 497 cgroup_update_populated(cgrp, false);
461 set_bit(CGRP_RELEASABLE, &cgrp->flags); 498 if (notify_on_release(cgrp)) {
462 check_for_release(cgrp); 499 if (taskexit)
500 set_bit(CGRP_RELEASABLE, &cgrp->flags);
501 check_for_release(cgrp);
502 }
463 } 503 }
464 504
465 kfree(link); 505 kfree(link);
@@ -668,7 +708,11 @@ static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
668 link = list_first_entry(tmp_links, struct cgrp_cset_link, cset_link); 708 link = list_first_entry(tmp_links, struct cgrp_cset_link, cset_link);
669 link->cset = cset; 709 link->cset = cset;
670 link->cgrp = cgrp; 710 link->cgrp = cgrp;
711
712 if (list_empty(&cgrp->cset_links))
713 cgroup_update_populated(cgrp, true);
671 list_move(&link->cset_link, &cgrp->cset_links); 714 list_move(&link->cset_link, &cgrp->cset_links);
715
672 /* 716 /*
673 * Always add links to the tail of the list so that the list 717 * Always add links to the tail of the list so that the list
674 * is sorted by order of hierarchy creation 718 * is sorted by order of hierarchy creation
@@ -2643,6 +2687,12 @@ err_undo_css:
2643 goto out_unlock; 2687 goto out_unlock;
2644} 2688}
2645 2689
2690static int cgroup_populated_show(struct seq_file *seq, void *v)
2691{
2692 seq_printf(seq, "%d\n", (bool)seq_css(seq)->cgroup->populated_cnt);
2693 return 0;
2694}
2695
2646static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, 2696static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
2647 size_t nbytes, loff_t off) 2697 size_t nbytes, loff_t off)
2648{ 2698{
@@ -2809,6 +2859,8 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cftype *cft)
2809 2859
2810 if (cft->seq_show == cgroup_subtree_control_show) 2860 if (cft->seq_show == cgroup_subtree_control_show)
2811 cgrp->control_kn = kn; 2861 cgrp->control_kn = kn;
2862 else if (cft->seq_show == cgroup_populated_show)
2863 cgrp->populated_kn = kn;
2812 return 0; 2864 return 0;
2813} 2865}
2814 2866
@@ -3918,6 +3970,11 @@ static struct cftype cgroup_base_files[] = {
3918 .seq_show = cgroup_subtree_control_show, 3970 .seq_show = cgroup_subtree_control_show,
3919 .write_string = cgroup_subtree_control_write, 3971 .write_string = cgroup_subtree_control_write,
3920 }, 3972 },
3973 {
3974 .name = "cgroup.populated",
3975 .flags = CFTYPE_ONLY_ON_DFL | CFTYPE_NOT_ON_ROOT,
3976 .seq_show = cgroup_populated_show,
3977 },
3921 3978
3922 /* 3979 /*
3923 * Historical crazy stuff. These don't have "cgroup." prefix and 3980 * Historical crazy stuff. These don't have "cgroup." prefix and