diff options
-rw-r--r-- | Documentation/cgroups/cgroups.txt | 6 | ||||
-rw-r--r-- | include/linux/cgroup.h | 6 | ||||
-rw-r--r-- | kernel/cgroup.c | 81 | ||||
-rw-r--r-- | mm/memcontrol.c | 5 |
4 files changed, 79 insertions, 19 deletions
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index 93feb8444489..cdc46a501b85 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt | |||
@@ -476,11 +476,13 @@ cgroup->parent is still valid. (Note - can also be called for a | |||
476 | newly-created cgroup if an error occurs after this subsystem's | 476 | newly-created cgroup if an error occurs after this subsystem's |
477 | create() method has been called for the new cgroup). | 477 | create() method has been called for the new cgroup). |
478 | 478 | ||
479 | void pre_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp); | 479 | int pre_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp); |
480 | 480 | ||
481 | Called before checking the reference count on each subsystem. This may | 481 | Called before checking the reference count on each subsystem. This may |
482 | be useful for subsystems which have some extra references even if | 482 | be useful for subsystems which have some extra references even if |
483 | there are not tasks in the cgroup. | 483 | there are not tasks in the cgroup. If pre_destroy() returns error code, |
484 | rmdir() will fail with it. From this behavior, pre_destroy() can be | ||
485 | called multiple times against a cgroup. | ||
484 | 486 | ||
485 | int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | 487 | int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, |
486 | struct task_struct *task) | 488 | struct task_struct *task) |
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 9a23bb098205..7d824b80b3d7 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
@@ -135,6 +135,10 @@ enum { | |||
135 | CGRP_RELEASABLE, | 135 | CGRP_RELEASABLE, |
136 | /* Control Group requires release notifications to userspace */ | 136 | /* Control Group requires release notifications to userspace */ |
137 | CGRP_NOTIFY_ON_RELEASE, | 137 | CGRP_NOTIFY_ON_RELEASE, |
138 | /* | ||
139 | * A thread in rmdir() is wating for this cgroup. | ||
140 | */ | ||
141 | CGRP_WAIT_ON_RMDIR, | ||
138 | }; | 142 | }; |
139 | 143 | ||
140 | struct cgroup { | 144 | struct cgroup { |
@@ -360,7 +364,7 @@ int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task); | |||
360 | struct cgroup_subsys { | 364 | struct cgroup_subsys { |
361 | struct cgroup_subsys_state *(*create)(struct cgroup_subsys *ss, | 365 | struct cgroup_subsys_state *(*create)(struct cgroup_subsys *ss, |
362 | struct cgroup *cgrp); | 366 | struct cgroup *cgrp); |
363 | void (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); | 367 | int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); |
364 | void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); | 368 | void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); |
365 | int (*can_attach)(struct cgroup_subsys *ss, | 369 | int (*can_attach)(struct cgroup_subsys *ss, |
366 | struct cgroup *cgrp, struct task_struct *tsk); | 370 | struct cgroup *cgrp, struct task_struct *tsk); |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d3c521137425..fc5e4a48582f 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -622,13 +622,18 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) | |||
622 | * Call subsys's pre_destroy handler. | 622 | * Call subsys's pre_destroy handler. |
623 | * This is called before css refcnt check. | 623 | * This is called before css refcnt check. |
624 | */ | 624 | */ |
625 | static void cgroup_call_pre_destroy(struct cgroup *cgrp) | 625 | static int cgroup_call_pre_destroy(struct cgroup *cgrp) |
626 | { | 626 | { |
627 | struct cgroup_subsys *ss; | 627 | struct cgroup_subsys *ss; |
628 | int ret = 0; | ||
629 | |||
628 | for_each_subsys(cgrp->root, ss) | 630 | for_each_subsys(cgrp->root, ss) |
629 | if (ss->pre_destroy) | 631 | if (ss->pre_destroy) { |
630 | ss->pre_destroy(ss, cgrp); | 632 | ret = ss->pre_destroy(ss, cgrp); |
631 | return; | 633 | if (ret) |
634 | break; | ||
635 | } | ||
636 | return ret; | ||
632 | } | 637 | } |
633 | 638 | ||
634 | static void free_cgroup_rcu(struct rcu_head *obj) | 639 | static void free_cgroup_rcu(struct rcu_head *obj) |
@@ -722,6 +727,22 @@ static void cgroup_d_remove_dir(struct dentry *dentry) | |||
722 | remove_dir(dentry); | 727 | remove_dir(dentry); |
723 | } | 728 | } |
724 | 729 | ||
730 | /* | ||
731 | * A queue for waiters to do rmdir() cgroup. A tasks will sleep when | ||
732 | * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some | ||
733 | * reference to css->refcnt. In general, this refcnt is expected to goes down | ||
734 | * to zero, soon. | ||
735 | * | ||
736 | * CGRP_WAIT_ON_RMDIR flag is modified under cgroup's inode->i_mutex; | ||
737 | */ | ||
738 | DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq); | ||
739 | |||
740 | static void cgroup_wakeup_rmdir_waiters(const struct cgroup *cgrp) | ||
741 | { | ||
742 | if (unlikely(test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))) | ||
743 | wake_up_all(&cgroup_rmdir_waitq); | ||
744 | } | ||
745 | |||
725 | static int rebind_subsystems(struct cgroupfs_root *root, | 746 | static int rebind_subsystems(struct cgroupfs_root *root, |
726 | unsigned long final_bits) | 747 | unsigned long final_bits) |
727 | { | 748 | { |
@@ -1317,6 +1338,12 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1317 | set_bit(CGRP_RELEASABLE, &oldcgrp->flags); | 1338 | set_bit(CGRP_RELEASABLE, &oldcgrp->flags); |
1318 | synchronize_rcu(); | 1339 | synchronize_rcu(); |
1319 | put_css_set(cg); | 1340 | put_css_set(cg); |
1341 | |||
1342 | /* | ||
1343 | * wake up rmdir() waiter. the rmdir should fail since the cgroup | ||
1344 | * is no longer empty. | ||
1345 | */ | ||
1346 | cgroup_wakeup_rmdir_waiters(cgrp); | ||
1320 | return 0; | 1347 | return 0; |
1321 | } | 1348 | } |
1322 | 1349 | ||
@@ -2608,9 +2635,11 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) | |||
2608 | struct cgroup *cgrp = dentry->d_fsdata; | 2635 | struct cgroup *cgrp = dentry->d_fsdata; |
2609 | struct dentry *d; | 2636 | struct dentry *d; |
2610 | struct cgroup *parent; | 2637 | struct cgroup *parent; |
2638 | DEFINE_WAIT(wait); | ||
2639 | int ret; | ||
2611 | 2640 | ||
2612 | /* the vfs holds both inode->i_mutex already */ | 2641 | /* the vfs holds both inode->i_mutex already */ |
2613 | 2642 | again: | |
2614 | mutex_lock(&cgroup_mutex); | 2643 | mutex_lock(&cgroup_mutex); |
2615 | if (atomic_read(&cgrp->count) != 0) { | 2644 | if (atomic_read(&cgrp->count) != 0) { |
2616 | mutex_unlock(&cgroup_mutex); | 2645 | mutex_unlock(&cgroup_mutex); |
@@ -2626,17 +2655,39 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) | |||
2626 | * Call pre_destroy handlers of subsys. Notify subsystems | 2655 | * Call pre_destroy handlers of subsys. Notify subsystems |
2627 | * that rmdir() request comes. | 2656 | * that rmdir() request comes. |
2628 | */ | 2657 | */ |
2629 | cgroup_call_pre_destroy(cgrp); | 2658 | ret = cgroup_call_pre_destroy(cgrp); |
2659 | if (ret) | ||
2660 | return ret; | ||
2630 | 2661 | ||
2631 | mutex_lock(&cgroup_mutex); | 2662 | mutex_lock(&cgroup_mutex); |
2632 | parent = cgrp->parent; | 2663 | parent = cgrp->parent; |
2633 | 2664 | if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) { | |
2634 | if (atomic_read(&cgrp->count) | ||
2635 | || !list_empty(&cgrp->children) | ||
2636 | || !cgroup_clear_css_refs(cgrp)) { | ||
2637 | mutex_unlock(&cgroup_mutex); | 2665 | mutex_unlock(&cgroup_mutex); |
2638 | return -EBUSY; | 2666 | return -EBUSY; |
2639 | } | 2667 | } |
2668 | /* | ||
2669 | * css_put/get is provided for subsys to grab refcnt to css. In typical | ||
2670 | * case, subsystem has no reference after pre_destroy(). But, under | ||
2671 | * hierarchy management, some *temporal* refcnt can be hold. | ||
2672 | * To avoid returning -EBUSY to a user, waitqueue is used. If subsys | ||
2673 | * is really busy, it should return -EBUSY at pre_destroy(). wake_up | ||
2674 | * is called when css_put() is called and refcnt goes down to 0. | ||
2675 | */ | ||
2676 | set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags); | ||
2677 | prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE); | ||
2678 | |||
2679 | if (!cgroup_clear_css_refs(cgrp)) { | ||
2680 | mutex_unlock(&cgroup_mutex); | ||
2681 | schedule(); | ||
2682 | finish_wait(&cgroup_rmdir_waitq, &wait); | ||
2683 | clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags); | ||
2684 | if (signal_pending(current)) | ||
2685 | return -EINTR; | ||
2686 | goto again; | ||
2687 | } | ||
2688 | /* NO css_tryget() can success after here. */ | ||
2689 | finish_wait(&cgroup_rmdir_waitq, &wait); | ||
2690 | clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags); | ||
2640 | 2691 | ||
2641 | spin_lock(&release_list_lock); | 2692 | spin_lock(&release_list_lock); |
2642 | set_bit(CGRP_REMOVED, &cgrp->flags); | 2693 | set_bit(CGRP_REMOVED, &cgrp->flags); |
@@ -3194,10 +3245,12 @@ void __css_put(struct cgroup_subsys_state *css) | |||
3194 | { | 3245 | { |
3195 | struct cgroup *cgrp = css->cgroup; | 3246 | struct cgroup *cgrp = css->cgroup; |
3196 | rcu_read_lock(); | 3247 | rcu_read_lock(); |
3197 | if ((atomic_dec_return(&css->refcnt) == 1) && | 3248 | if (atomic_dec_return(&css->refcnt) == 1) { |
3198 | notify_on_release(cgrp)) { | 3249 | if (notify_on_release(cgrp)) { |
3199 | set_bit(CGRP_RELEASABLE, &cgrp->flags); | 3250 | set_bit(CGRP_RELEASABLE, &cgrp->flags); |
3200 | check_for_release(cgrp); | 3251 | check_for_release(cgrp); |
3252 | } | ||
3253 | cgroup_wakeup_rmdir_waiters(cgrp); | ||
3201 | } | 3254 | } |
3202 | rcu_read_unlock(); | 3255 | rcu_read_unlock(); |
3203 | } | 3256 | } |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8e4be9cb2a6a..8ffec674c5ac 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -2272,11 +2272,12 @@ free_out: | |||
2272 | return ERR_PTR(-ENOMEM); | 2272 | return ERR_PTR(-ENOMEM); |
2273 | } | 2273 | } |
2274 | 2274 | ||
2275 | static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss, | 2275 | static int mem_cgroup_pre_destroy(struct cgroup_subsys *ss, |
2276 | struct cgroup *cont) | 2276 | struct cgroup *cont) |
2277 | { | 2277 | { |
2278 | struct mem_cgroup *mem = mem_cgroup_from_cont(cont); | 2278 | struct mem_cgroup *mem = mem_cgroup_from_cont(cont); |
2279 | mem_cgroup_force_empty(mem, false); | 2279 | |
2280 | return mem_cgroup_force_empty(mem, false); | ||
2280 | } | 2281 | } |
2281 | 2282 | ||
2282 | static void mem_cgroup_destroy(struct cgroup_subsys *ss, | 2283 | static void mem_cgroup_destroy(struct cgroup_subsys *ss, |