aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r--kernel/cgroup.c81
1 files changed, 67 insertions, 14 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d3c521137425..fc5e4a48582f 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -622,13 +622,18 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
622 * Call subsys's pre_destroy handler. 622 * Call subsys's pre_destroy handler.
623 * This is called before css refcnt check. 623 * This is called before css refcnt check.
624 */ 624 */
625static void cgroup_call_pre_destroy(struct cgroup *cgrp) 625static int cgroup_call_pre_destroy(struct cgroup *cgrp)
626{ 626{
627 struct cgroup_subsys *ss; 627 struct cgroup_subsys *ss;
628 int ret = 0;
629
628 for_each_subsys(cgrp->root, ss) 630 for_each_subsys(cgrp->root, ss)
629 if (ss->pre_destroy) 631 if (ss->pre_destroy) {
630 ss->pre_destroy(ss, cgrp); 632 ret = ss->pre_destroy(ss, cgrp);
631 return; 633 if (ret)
634 break;
635 }
636 return ret;
632} 637}
633 638
634static void free_cgroup_rcu(struct rcu_head *obj) 639static void free_cgroup_rcu(struct rcu_head *obj)
@@ -722,6 +727,22 @@ static void cgroup_d_remove_dir(struct dentry *dentry)
722 remove_dir(dentry); 727 remove_dir(dentry);
723} 728}
724 729
730/*
731 * A queue for waiters to do rmdir() cgroup. A tasks will sleep when
732 * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some
733 * reference to css->refcnt. In general, this refcnt is expected to goes down
734 * to zero, soon.
735 *
736 * CGRP_WAIT_ON_RMDIR flag is modified under cgroup's inode->i_mutex;
737 */
738DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
739
740static void cgroup_wakeup_rmdir_waiters(const struct cgroup *cgrp)
741{
742 if (unlikely(test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
743 wake_up_all(&cgroup_rmdir_waitq);
744}
745
725static int rebind_subsystems(struct cgroupfs_root *root, 746static int rebind_subsystems(struct cgroupfs_root *root,
726 unsigned long final_bits) 747 unsigned long final_bits)
727{ 748{
@@ -1317,6 +1338,12 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1317 set_bit(CGRP_RELEASABLE, &oldcgrp->flags); 1338 set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
1318 synchronize_rcu(); 1339 synchronize_rcu();
1319 put_css_set(cg); 1340 put_css_set(cg);
1341
1342 /*
1343 * wake up rmdir() waiter. the rmdir should fail since the cgroup
1344 * is no longer empty.
1345 */
1346 cgroup_wakeup_rmdir_waiters(cgrp);
1320 return 0; 1347 return 0;
1321} 1348}
1322 1349
@@ -2608,9 +2635,11 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
2608 struct cgroup *cgrp = dentry->d_fsdata; 2635 struct cgroup *cgrp = dentry->d_fsdata;
2609 struct dentry *d; 2636 struct dentry *d;
2610 struct cgroup *parent; 2637 struct cgroup *parent;
2638 DEFINE_WAIT(wait);
2639 int ret;
2611 2640
2612 /* the vfs holds both inode->i_mutex already */ 2641 /* the vfs holds both inode->i_mutex already */
2613 2642again:
2614 mutex_lock(&cgroup_mutex); 2643 mutex_lock(&cgroup_mutex);
2615 if (atomic_read(&cgrp->count) != 0) { 2644 if (atomic_read(&cgrp->count) != 0) {
2616 mutex_unlock(&cgroup_mutex); 2645 mutex_unlock(&cgroup_mutex);
@@ -2626,17 +2655,39 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
2626 * Call pre_destroy handlers of subsys. Notify subsystems 2655 * Call pre_destroy handlers of subsys. Notify subsystems
2627 * that rmdir() request comes. 2656 * that rmdir() request comes.
2628 */ 2657 */
2629 cgroup_call_pre_destroy(cgrp); 2658 ret = cgroup_call_pre_destroy(cgrp);
2659 if (ret)
2660 return ret;
2630 2661
2631 mutex_lock(&cgroup_mutex); 2662 mutex_lock(&cgroup_mutex);
2632 parent = cgrp->parent; 2663 parent = cgrp->parent;
2633 2664 if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
2634 if (atomic_read(&cgrp->count)
2635 || !list_empty(&cgrp->children)
2636 || !cgroup_clear_css_refs(cgrp)) {
2637 mutex_unlock(&cgroup_mutex); 2665 mutex_unlock(&cgroup_mutex);
2638 return -EBUSY; 2666 return -EBUSY;
2639 } 2667 }
2668 /*
2669 * css_put/get is provided for subsys to grab refcnt to css. In typical
2670 * case, subsystem has no reference after pre_destroy(). But, under
2671 * hierarchy management, some *temporal* refcnt can be hold.
2672 * To avoid returning -EBUSY to a user, waitqueue is used. If subsys
2673 * is really busy, it should return -EBUSY at pre_destroy(). wake_up
2674 * is called when css_put() is called and refcnt goes down to 0.
2675 */
2676 set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
2677 prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
2678
2679 if (!cgroup_clear_css_refs(cgrp)) {
2680 mutex_unlock(&cgroup_mutex);
2681 schedule();
2682 finish_wait(&cgroup_rmdir_waitq, &wait);
2683 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
2684 if (signal_pending(current))
2685 return -EINTR;
2686 goto again;
2687 }
2688 /* NO css_tryget() can success after here. */
2689 finish_wait(&cgroup_rmdir_waitq, &wait);
2690 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
2640 2691
2641 spin_lock(&release_list_lock); 2692 spin_lock(&release_list_lock);
2642 set_bit(CGRP_REMOVED, &cgrp->flags); 2693 set_bit(CGRP_REMOVED, &cgrp->flags);
@@ -3194,10 +3245,12 @@ void __css_put(struct cgroup_subsys_state *css)
3194{ 3245{
3195 struct cgroup *cgrp = css->cgroup; 3246 struct cgroup *cgrp = css->cgroup;
3196 rcu_read_lock(); 3247 rcu_read_lock();
3197 if ((atomic_dec_return(&css->refcnt) == 1) && 3248 if (atomic_dec_return(&css->refcnt) == 1) {
3198 notify_on_release(cgrp)) { 3249 if (notify_on_release(cgrp)) {
3199 set_bit(CGRP_RELEASABLE, &cgrp->flags); 3250 set_bit(CGRP_RELEASABLE, &cgrp->flags);
3200 check_for_release(cgrp); 3251 check_for_release(cgrp);
3252 }
3253 cgroup_wakeup_rmdir_waiters(cgrp);
3201 } 3254 }
3202 rcu_read_unlock(); 3255 rcu_read_unlock();
3203} 3256}