diff options
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r-- | kernel/cgroup.c | 81 |
1 files changed, 67 insertions, 14 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d3c521137425..fc5e4a48582f 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -622,13 +622,18 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) | |||
622 | * Call subsys's pre_destroy handler. | 622 | * Call subsys's pre_destroy handler. |
623 | * This is called before css refcnt check. | 623 | * This is called before css refcnt check. |
624 | */ | 624 | */ |
625 | static void cgroup_call_pre_destroy(struct cgroup *cgrp) | 625 | static int cgroup_call_pre_destroy(struct cgroup *cgrp) |
626 | { | 626 | { |
627 | struct cgroup_subsys *ss; | 627 | struct cgroup_subsys *ss; |
628 | int ret = 0; | ||
629 | |||
628 | for_each_subsys(cgrp->root, ss) | 630 | for_each_subsys(cgrp->root, ss) |
629 | if (ss->pre_destroy) | 631 | if (ss->pre_destroy) { |
630 | ss->pre_destroy(ss, cgrp); | 632 | ret = ss->pre_destroy(ss, cgrp); |
631 | return; | 633 | if (ret) |
634 | break; | ||
635 | } | ||
636 | return ret; | ||
632 | } | 637 | } |
633 | 638 | ||
634 | static void free_cgroup_rcu(struct rcu_head *obj) | 639 | static void free_cgroup_rcu(struct rcu_head *obj) |
@@ -722,6 +727,22 @@ static void cgroup_d_remove_dir(struct dentry *dentry) | |||
722 | remove_dir(dentry); | 727 | remove_dir(dentry); |
723 | } | 728 | } |
724 | 729 | ||
730 | /* | ||
731 | * A queue for waiters to do rmdir() cgroup. A tasks will sleep when | ||
732 | * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some | ||
733 | * reference to css->refcnt. In general, this refcnt is expected to goes down | ||
734 | * to zero, soon. | ||
735 | * | ||
736 | * CGRP_WAIT_ON_RMDIR flag is modified under cgroup's inode->i_mutex; | ||
737 | */ | ||
738 | DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq); | ||
739 | |||
740 | static void cgroup_wakeup_rmdir_waiters(const struct cgroup *cgrp) | ||
741 | { | ||
742 | if (unlikely(test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))) | ||
743 | wake_up_all(&cgroup_rmdir_waitq); | ||
744 | } | ||
745 | |||
725 | static int rebind_subsystems(struct cgroupfs_root *root, | 746 | static int rebind_subsystems(struct cgroupfs_root *root, |
726 | unsigned long final_bits) | 747 | unsigned long final_bits) |
727 | { | 748 | { |
@@ -1317,6 +1338,12 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1317 | set_bit(CGRP_RELEASABLE, &oldcgrp->flags); | 1338 | set_bit(CGRP_RELEASABLE, &oldcgrp->flags); |
1318 | synchronize_rcu(); | 1339 | synchronize_rcu(); |
1319 | put_css_set(cg); | 1340 | put_css_set(cg); |
1341 | |||
1342 | /* | ||
1343 | * wake up rmdir() waiter. the rmdir should fail since the cgroup | ||
1344 | * is no longer empty. | ||
1345 | */ | ||
1346 | cgroup_wakeup_rmdir_waiters(cgrp); | ||
1320 | return 0; | 1347 | return 0; |
1321 | } | 1348 | } |
1322 | 1349 | ||
@@ -2608,9 +2635,11 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) | |||
2608 | struct cgroup *cgrp = dentry->d_fsdata; | 2635 | struct cgroup *cgrp = dentry->d_fsdata; |
2609 | struct dentry *d; | 2636 | struct dentry *d; |
2610 | struct cgroup *parent; | 2637 | struct cgroup *parent; |
2638 | DEFINE_WAIT(wait); | ||
2639 | int ret; | ||
2611 | 2640 | ||
2612 | /* the vfs holds both inode->i_mutex already */ | 2641 | /* the vfs holds both inode->i_mutex already */ |
2613 | 2642 | again: | |
2614 | mutex_lock(&cgroup_mutex); | 2643 | mutex_lock(&cgroup_mutex); |
2615 | if (atomic_read(&cgrp->count) != 0) { | 2644 | if (atomic_read(&cgrp->count) != 0) { |
2616 | mutex_unlock(&cgroup_mutex); | 2645 | mutex_unlock(&cgroup_mutex); |
@@ -2626,17 +2655,39 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) | |||
2626 | * Call pre_destroy handlers of subsys. Notify subsystems | 2655 | * Call pre_destroy handlers of subsys. Notify subsystems |
2627 | * that rmdir() request comes. | 2656 | * that rmdir() request comes. |
2628 | */ | 2657 | */ |
2629 | cgroup_call_pre_destroy(cgrp); | 2658 | ret = cgroup_call_pre_destroy(cgrp); |
2659 | if (ret) | ||
2660 | return ret; | ||
2630 | 2661 | ||
2631 | mutex_lock(&cgroup_mutex); | 2662 | mutex_lock(&cgroup_mutex); |
2632 | parent = cgrp->parent; | 2663 | parent = cgrp->parent; |
2633 | 2664 | if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) { | |
2634 | if (atomic_read(&cgrp->count) | ||
2635 | || !list_empty(&cgrp->children) | ||
2636 | || !cgroup_clear_css_refs(cgrp)) { | ||
2637 | mutex_unlock(&cgroup_mutex); | 2665 | mutex_unlock(&cgroup_mutex); |
2638 | return -EBUSY; | 2666 | return -EBUSY; |
2639 | } | 2667 | } |
2668 | /* | ||
2669 | * css_put/get is provided for subsys to grab refcnt to css. In typical | ||
2670 | * case, subsystem has no reference after pre_destroy(). But, under | ||
2671 | * hierarchy management, some *temporal* refcnt can be hold. | ||
2672 | * To avoid returning -EBUSY to a user, waitqueue is used. If subsys | ||
2673 | * is really busy, it should return -EBUSY at pre_destroy(). wake_up | ||
2674 | * is called when css_put() is called and refcnt goes down to 0. | ||
2675 | */ | ||
2676 | set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags); | ||
2677 | prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE); | ||
2678 | |||
2679 | if (!cgroup_clear_css_refs(cgrp)) { | ||
2680 | mutex_unlock(&cgroup_mutex); | ||
2681 | schedule(); | ||
2682 | finish_wait(&cgroup_rmdir_waitq, &wait); | ||
2683 | clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags); | ||
2684 | if (signal_pending(current)) | ||
2685 | return -EINTR; | ||
2686 | goto again; | ||
2687 | } | ||
2688 | /* NO css_tryget() can success after here. */ | ||
2689 | finish_wait(&cgroup_rmdir_waitq, &wait); | ||
2690 | clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags); | ||
2640 | 2691 | ||
2641 | spin_lock(&release_list_lock); | 2692 | spin_lock(&release_list_lock); |
2642 | set_bit(CGRP_REMOVED, &cgrp->flags); | 2693 | set_bit(CGRP_REMOVED, &cgrp->flags); |
@@ -3194,10 +3245,12 @@ void __css_put(struct cgroup_subsys_state *css) | |||
3194 | { | 3245 | { |
3195 | struct cgroup *cgrp = css->cgroup; | 3246 | struct cgroup *cgrp = css->cgroup; |
3196 | rcu_read_lock(); | 3247 | rcu_read_lock(); |
3197 | if ((atomic_dec_return(&css->refcnt) == 1) && | 3248 | if (atomic_dec_return(&css->refcnt) == 1) { |
3198 | notify_on_release(cgrp)) { | 3249 | if (notify_on_release(cgrp)) { |
3199 | set_bit(CGRP_RELEASABLE, &cgrp->flags); | 3250 | set_bit(CGRP_RELEASABLE, &cgrp->flags); |
3200 | check_for_release(cgrp); | 3251 | check_for_release(cgrp); |
3252 | } | ||
3253 | cgroup_wakeup_rmdir_waiters(cgrp); | ||
3201 | } | 3254 | } |
3202 | rcu_read_unlock(); | 3255 | rcu_read_unlock(); |
3203 | } | 3256 | } |