aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/cgroup.h21
-rw-r--r--kernel/cgroup.c51
-rw-r--r--mm/memcontrol.c24
3 files changed, 1 insertions, 95 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index a3098046250b..47868a86ba2b 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -145,10 +145,6 @@ enum {
145 /* Control Group requires release notifications to userspace */ 145 /* Control Group requires release notifications to userspace */
146 CGRP_NOTIFY_ON_RELEASE, 146 CGRP_NOTIFY_ON_RELEASE,
147 /* 147 /*
148 * A thread in rmdir() is wating for this cgroup.
149 */
150 CGRP_WAIT_ON_RMDIR,
151 /*
152 * Clone cgroup values when creating a new child cgroup 148 * Clone cgroup values when creating a new child cgroup
153 */ 149 */
154 CGRP_CLONE_CHILDREN, 150 CGRP_CLONE_CHILDREN,
@@ -412,23 +408,6 @@ int cgroup_task_count(const struct cgroup *cgrp);
412int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task); 408int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task);
413 409
414/* 410/*
415 * When the subsys has to access css and may add permanent refcnt to css,
416 * it should take care of racy conditions with rmdir(). Following set of
417 * functions, is for stop/restart rmdir if necessary.
418 * Because these will call css_get/put, "css" should be alive css.
419 *
420 * cgroup_exclude_rmdir();
421 * ...do some jobs which may access arbitrary empty cgroup
422 * cgroup_release_and_wakeup_rmdir();
423 *
424 * When someone removes a cgroup while cgroup_exclude_rmdir() holds it,
425 * it sleeps and cgroup_release_and_wakeup_rmdir() will wake him up.
426 */
427
428void cgroup_exclude_rmdir(struct cgroup_subsys_state *css);
429void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css);
430
431/*
432 * Control Group taskset, used to pass around set of tasks to cgroup_subsys 411 * Control Group taskset, used to pass around set of tasks to cgroup_subsys
433 * methods. 412 * methods.
434 */ 413 */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 66204a6f68f3..c5f6fb28dd0e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -966,33 +966,6 @@ static void cgroup_d_remove_dir(struct dentry *dentry)
966} 966}
967 967
968/* 968/*
969 * A queue for waiters to do rmdir() cgroup. A tasks will sleep when
970 * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some
971 * reference to css->refcnt. In general, this refcnt is expected to goes down
972 * to zero, soon.
973 *
974 * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
975 */
976static DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
977
978static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
979{
980 if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
981 wake_up_all(&cgroup_rmdir_waitq);
982}
983
984void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
985{
986 css_get(css);
987}
988
989void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
990{
991 cgroup_wakeup_rmdir_waiter(css->cgroup);
992 css_put(css);
993}
994
995/*
996 * Call with cgroup_mutex held. Drops reference counts on modules, including 969 * Call with cgroup_mutex held. Drops reference counts on modules, including
997 * any duplicate ones that parse_cgroupfs_options took. If this function 970 * any duplicate ones that parse_cgroupfs_options took. If this function
998 * returns an error, no reference counts are touched. 971 * returns an error, no reference counts are touched.
@@ -1963,12 +1936,6 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1963 } 1936 }
1964 1937
1965 synchronize_rcu(); 1938 synchronize_rcu();
1966
1967 /*
1968 * wake up rmdir() waiter. the rmdir should fail since the cgroup
1969 * is no longer empty.
1970 */
1971 cgroup_wakeup_rmdir_waiter(cgrp);
1972out: 1939out:
1973 if (retval) { 1940 if (retval) {
1974 for_each_subsys(root, ss) { 1941 for_each_subsys(root, ss) {
@@ -2138,7 +2105,6 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
2138 * step 5: success! and cleanup 2105 * step 5: success! and cleanup
2139 */ 2106 */
2140 synchronize_rcu(); 2107 synchronize_rcu();
2141 cgroup_wakeup_rmdir_waiter(cgrp);
2142 retval = 0; 2108 retval = 0;
2143out_put_css_set_refs: 2109out_put_css_set_refs:
2144 if (retval) { 2110 if (retval) {
@@ -4058,26 +4024,13 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
4058 struct cgroup_event *event, *tmp; 4024 struct cgroup_event *event, *tmp;
4059 struct cgroup_subsys *ss; 4025 struct cgroup_subsys *ss;
4060 4026
4061 /*
4062 * In general, subsystem has no css->refcnt after pre_destroy(). But
4063 * in racy cases, subsystem may have to get css->refcnt after
4064 * pre_destroy() and it makes rmdir return with -EBUSY. This sometimes
4065 * make rmdir return -EBUSY too often. To avoid that, we use waitqueue
4066 * for cgroup's rmdir. CGRP_WAIT_ON_RMDIR is for synchronizing rmdir
4067 * and subsystem's reference count handling. Please see css_get/put
4068 * and css_tryget() and cgroup_wakeup_rmdir_waiter() implementation.
4069 */
4070 set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
4071
4072 /* the vfs holds both inode->i_mutex already */ 4027 /* the vfs holds both inode->i_mutex already */
4073 mutex_lock(&cgroup_mutex); 4028 mutex_lock(&cgroup_mutex);
4074 parent = cgrp->parent; 4029 parent = cgrp->parent;
4075 if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) { 4030 if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
4076 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
4077 mutex_unlock(&cgroup_mutex); 4031 mutex_unlock(&cgroup_mutex);
4078 return -EBUSY; 4032 return -EBUSY;
4079 } 4033 }
4080 prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
4081 4034
4082 /* 4035 /*
4083 * Block new css_tryget() by deactivating refcnt and mark @cgrp 4036 * Block new css_tryget() by deactivating refcnt and mark @cgrp
@@ -4114,9 +4067,6 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
4114 for_each_subsys(cgrp->root, ss) 4067 for_each_subsys(cgrp->root, ss)
4115 css_put(cgrp->subsys[ss->subsys_id]); 4068 css_put(cgrp->subsys[ss->subsys_id]);
4116 4069
4117 finish_wait(&cgroup_rmdir_waitq, &wait);
4118 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
4119
4120 raw_spin_lock(&release_list_lock); 4070 raw_spin_lock(&release_list_lock);
4121 if (!list_empty(&cgrp->release_list)) 4071 if (!list_empty(&cgrp->release_list))
4122 list_del_init(&cgrp->release_list); 4072 list_del_init(&cgrp->release_list);
@@ -4864,7 +4814,6 @@ void __css_put(struct cgroup_subsys_state *css)
4864 set_bit(CGRP_RELEASABLE, &cgrp->flags); 4814 set_bit(CGRP_RELEASABLE, &cgrp->flags);
4865 check_for_release(cgrp); 4815 check_for_release(cgrp);
4866 } 4816 }
4867 cgroup_wakeup_rmdir_waiter(cgrp);
4868 break; 4817 break;
4869 case 0: 4818 case 0:
4870 schedule_work(&css->dput_work); 4819 schedule_work(&css->dput_work);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 37c356646544..930edfaa5187 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2681,13 +2681,6 @@ static int mem_cgroup_move_account(struct page *page,
2681 /* caller should have done css_get */ 2681 /* caller should have done css_get */
2682 pc->mem_cgroup = to; 2682 pc->mem_cgroup = to;
2683 mem_cgroup_charge_statistics(to, anon, nr_pages); 2683 mem_cgroup_charge_statistics(to, anon, nr_pages);
2684 /*
2685 * We charges against "to" which may not have any tasks. Then, "to"
2686 * can be under rmdir(). But in current implementation, caller of
2687 * this function is just force_empty() and move charge, so it's
2688 * guaranteed that "to" is never removed. So, we don't check rmdir
2689 * status here.
2690 */
2691 move_unlock_mem_cgroup(from, &flags); 2684 move_unlock_mem_cgroup(from, &flags);
2692 ret = 0; 2685 ret = 0;
2693unlock: 2686unlock:
@@ -2893,7 +2886,6 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg,
2893 return; 2886 return;
2894 if (!memcg) 2887 if (!memcg)
2895 return; 2888 return;
2896 cgroup_exclude_rmdir(&memcg->css);
2897 2889
2898 __mem_cgroup_commit_charge(memcg, page, 1, ctype, true); 2890 __mem_cgroup_commit_charge(memcg, page, 1, ctype, true);
2899 /* 2891 /*
@@ -2907,12 +2899,6 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg,
2907 swp_entry_t ent = {.val = page_private(page)}; 2899 swp_entry_t ent = {.val = page_private(page)};
2908 mem_cgroup_uncharge_swap(ent); 2900 mem_cgroup_uncharge_swap(ent);
2909 } 2901 }
2910 /*
2911 * At swapin, we may charge account against cgroup which has no tasks.
2912 * So, rmdir()->pre_destroy() can be called while we do this charge.
2913 * In that case, we need to call pre_destroy() again. check it here.
2914 */
2915 cgroup_release_and_wakeup_rmdir(&memcg->css);
2916} 2902}
2917 2903
2918void mem_cgroup_commit_charge_swapin(struct page *page, 2904void mem_cgroup_commit_charge_swapin(struct page *page,
@@ -3360,8 +3346,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,
3360 3346
3361 if (!memcg) 3347 if (!memcg)
3362 return; 3348 return;
3363 /* blocks rmdir() */ 3349
3364 cgroup_exclude_rmdir(&memcg->css);
3365 if (!migration_ok) { 3350 if (!migration_ok) {
3366 used = oldpage; 3351 used = oldpage;
3367 unused = newpage; 3352 unused = newpage;
@@ -3395,13 +3380,6 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,
3395 */ 3380 */
3396 if (anon) 3381 if (anon)
3397 mem_cgroup_uncharge_page(used); 3382 mem_cgroup_uncharge_page(used);
3398 /*
3399 * At migration, we may charge account against cgroup which has no
3400 * tasks.
3401 * So, rmdir()->pre_destroy() can be called while we do this charge.
3402 * In that case, we need to call pre_destroy() again. check it here.
3403 */
3404 cgroup_release_and_wakeup_rmdir(&memcg->css);
3405} 3383}
3406 3384
3407/* 3385/*