aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r--kernel/cgroup.c55
1 files changed, 38 insertions, 17 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 250dac05680f..b6eadfe30e7b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -735,16 +735,28 @@ static void cgroup_d_remove_dir(struct dentry *dentry)
735 * reference to css->refcnt. In general, this refcnt is expected to goes down 735 * reference to css->refcnt. In general, this refcnt is expected to goes down
736 * to zero, soon. 736 * to zero, soon.
737 * 737 *
738 * CGRP_WAIT_ON_RMDIR flag is modified under cgroup's inode->i_mutex; 738 * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
739 */ 739 */
740DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq); 740DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
741 741
742static void cgroup_wakeup_rmdir_waiters(const struct cgroup *cgrp) 742static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
743{ 743{
744 if (unlikely(test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))) 744 if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
745 wake_up_all(&cgroup_rmdir_waitq); 745 wake_up_all(&cgroup_rmdir_waitq);
746} 746}
747 747
748void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
749{
750 css_get(css);
751}
752
753void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
754{
755 cgroup_wakeup_rmdir_waiter(css->cgroup);
756 css_put(css);
757}
758
759
748static int rebind_subsystems(struct cgroupfs_root *root, 760static int rebind_subsystems(struct cgroupfs_root *root,
749 unsigned long final_bits) 761 unsigned long final_bits)
750{ 762{
@@ -1359,7 +1371,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1359 * wake up rmdir() waiter. the rmdir should fail since the cgroup 1371 * wake up rmdir() waiter. the rmdir should fail since the cgroup
1360 * is no longer empty. 1372 * is no longer empty.
1361 */ 1373 */
1362 cgroup_wakeup_rmdir_waiters(cgrp); 1374 cgroup_wakeup_rmdir_waiter(cgrp);
1363 return 0; 1375 return 0;
1364} 1376}
1365 1377
@@ -2744,33 +2756,42 @@ again:
2744 mutex_unlock(&cgroup_mutex); 2756 mutex_unlock(&cgroup_mutex);
2745 2757
2746 /* 2758 /*
2759 * In general, subsystem has no css->refcnt after pre_destroy(). But
2760 * in racy cases, subsystem may have to get css->refcnt after
2761 * pre_destroy() and it makes rmdir return with -EBUSY. This sometimes
2762 * make rmdir return -EBUSY too often. To avoid that, we use waitqueue
2763 * for cgroup's rmdir. CGRP_WAIT_ON_RMDIR is for synchronizing rmdir
2764 * and subsystem's reference count handling. Please see css_get/put
2765 * and css_tryget() and cgroup_wakeup_rmdir_waiter() implementation.
2766 */
2767 set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
2768
2769 /*
2747 * Call pre_destroy handlers of subsys. Notify subsystems 2770 * Call pre_destroy handlers of subsys. Notify subsystems
2748 * that rmdir() request comes. 2771 * that rmdir() request comes.
2749 */ 2772 */
2750 ret = cgroup_call_pre_destroy(cgrp); 2773 ret = cgroup_call_pre_destroy(cgrp);
2751 if (ret) 2774 if (ret) {
2775 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
2752 return ret; 2776 return ret;
2777 }
2753 2778
2754 mutex_lock(&cgroup_mutex); 2779 mutex_lock(&cgroup_mutex);
2755 parent = cgrp->parent; 2780 parent = cgrp->parent;
2756 if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) { 2781 if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
2782 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
2757 mutex_unlock(&cgroup_mutex); 2783 mutex_unlock(&cgroup_mutex);
2758 return -EBUSY; 2784 return -EBUSY;
2759 } 2785 }
2760 /*
2761 * css_put/get is provided for subsys to grab refcnt to css. In typical
2762 * case, subsystem has no reference after pre_destroy(). But, under
2763 * hierarchy management, some *temporal* refcnt can be hold.
2764 * To avoid returning -EBUSY to a user, waitqueue is used. If subsys
2765 * is really busy, it should return -EBUSY at pre_destroy(). wake_up
2766 * is called when css_put() is called and refcnt goes down to 0.
2767 */
2768 set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
2769 prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE); 2786 prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
2770
2771 if (!cgroup_clear_css_refs(cgrp)) { 2787 if (!cgroup_clear_css_refs(cgrp)) {
2772 mutex_unlock(&cgroup_mutex); 2788 mutex_unlock(&cgroup_mutex);
2773 schedule(); 2789 /*
2790 * Because someone may call cgroup_wakeup_rmdir_waiter() before
2791 * prepare_to_wait(), we need to check this flag.
2792 */
2793 if (test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))
2794 schedule();
2774 finish_wait(&cgroup_rmdir_waitq, &wait); 2795 finish_wait(&cgroup_rmdir_waitq, &wait);
2775 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags); 2796 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
2776 if (signal_pending(current)) 2797 if (signal_pending(current))
@@ -3342,7 +3363,7 @@ void __css_put(struct cgroup_subsys_state *css)
3342 set_bit(CGRP_RELEASABLE, &cgrp->flags); 3363 set_bit(CGRP_RELEASABLE, &cgrp->flags);
3343 check_for_release(cgrp); 3364 check_for_release(cgrp);
3344 } 3365 }
3345 cgroup_wakeup_rmdir_waiters(cgrp); 3366 cgroup_wakeup_rmdir_waiter(cgrp);
3346 } 3367 }
3347 rcu_read_unlock(); 3368 rcu_read_unlock();
3348} 3369}