diff options
-rw-r--r-- | include/linux/cgroup.h | 21 | ||||
-rw-r--r-- | kernel/cgroup.c | 51 | ||||
-rw-r--r-- | mm/memcontrol.c | 24 |
3 files changed, 1 insertions, 95 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index a3098046250b..47868a86ba2b 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
@@ -145,10 +145,6 @@ enum { | |||
145 | /* Control Group requires release notifications to userspace */ | 145 | /* Control Group requires release notifications to userspace */ |
146 | CGRP_NOTIFY_ON_RELEASE, | 146 | CGRP_NOTIFY_ON_RELEASE, |
147 | /* | 147 | /* |
148 | * A thread in rmdir() is wating for this cgroup. | ||
149 | */ | ||
150 | CGRP_WAIT_ON_RMDIR, | ||
151 | /* | ||
152 | * Clone cgroup values when creating a new child cgroup | 148 | * Clone cgroup values when creating a new child cgroup |
153 | */ | 149 | */ |
154 | CGRP_CLONE_CHILDREN, | 150 | CGRP_CLONE_CHILDREN, |
@@ -412,23 +408,6 @@ int cgroup_task_count(const struct cgroup *cgrp); | |||
412 | int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task); | 408 | int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task); |
413 | 409 | ||
414 | /* | 410 | /* |
415 | * When the subsys has to access css and may add permanent refcnt to css, | ||
416 | * it should take care of racy conditions with rmdir(). Following set of | ||
417 | * functions, is for stop/restart rmdir if necessary. | ||
418 | * Because these will call css_get/put, "css" should be alive css. | ||
419 | * | ||
420 | * cgroup_exclude_rmdir(); | ||
421 | * ...do some jobs which may access arbitrary empty cgroup | ||
422 | * cgroup_release_and_wakeup_rmdir(); | ||
423 | * | ||
424 | * When someone removes a cgroup while cgroup_exclude_rmdir() holds it, | ||
425 | * it sleeps and cgroup_release_and_wakeup_rmdir() will wake him up. | ||
426 | */ | ||
427 | |||
428 | void cgroup_exclude_rmdir(struct cgroup_subsys_state *css); | ||
429 | void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css); | ||
430 | |||
431 | /* | ||
432 | * Control Group taskset, used to pass around set of tasks to cgroup_subsys | 411 | * Control Group taskset, used to pass around set of tasks to cgroup_subsys |
433 | * methods. | 412 | * methods. |
434 | */ | 413 | */ |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 66204a6f68f3..c5f6fb28dd0e 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -966,33 +966,6 @@ static void cgroup_d_remove_dir(struct dentry *dentry) | |||
966 | } | 966 | } |
967 | 967 | ||
968 | /* | 968 | /* |
969 | * A queue for waiters to do rmdir() cgroup. A tasks will sleep when | ||
970 | * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some | ||
971 | * reference to css->refcnt. In general, this refcnt is expected to goes down | ||
972 | * to zero, soon. | ||
973 | * | ||
974 | * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex; | ||
975 | */ | ||
976 | static DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq); | ||
977 | |||
978 | static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp) | ||
979 | { | ||
980 | if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))) | ||
981 | wake_up_all(&cgroup_rmdir_waitq); | ||
982 | } | ||
983 | |||
984 | void cgroup_exclude_rmdir(struct cgroup_subsys_state *css) | ||
985 | { | ||
986 | css_get(css); | ||
987 | } | ||
988 | |||
989 | void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css) | ||
990 | { | ||
991 | cgroup_wakeup_rmdir_waiter(css->cgroup); | ||
992 | css_put(css); | ||
993 | } | ||
994 | |||
995 | /* | ||
996 | * Call with cgroup_mutex held. Drops reference counts on modules, including | 969 | * Call with cgroup_mutex held. Drops reference counts on modules, including |
997 | * any duplicate ones that parse_cgroupfs_options took. If this function | 970 | * any duplicate ones that parse_cgroupfs_options took. If this function |
998 | * returns an error, no reference counts are touched. | 971 | * returns an error, no reference counts are touched. |
@@ -1963,12 +1936,6 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1963 | } | 1936 | } |
1964 | 1937 | ||
1965 | synchronize_rcu(); | 1938 | synchronize_rcu(); |
1966 | |||
1967 | /* | ||
1968 | * wake up rmdir() waiter. the rmdir should fail since the cgroup | ||
1969 | * is no longer empty. | ||
1970 | */ | ||
1971 | cgroup_wakeup_rmdir_waiter(cgrp); | ||
1972 | out: | 1939 | out: |
1973 | if (retval) { | 1940 | if (retval) { |
1974 | for_each_subsys(root, ss) { | 1941 | for_each_subsys(root, ss) { |
@@ -2138,7 +2105,6 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
2138 | * step 5: success! and cleanup | 2105 | * step 5: success! and cleanup |
2139 | */ | 2106 | */ |
2140 | synchronize_rcu(); | 2107 | synchronize_rcu(); |
2141 | cgroup_wakeup_rmdir_waiter(cgrp); | ||
2142 | retval = 0; | 2108 | retval = 0; |
2143 | out_put_css_set_refs: | 2109 | out_put_css_set_refs: |
2144 | if (retval) { | 2110 | if (retval) { |
@@ -4058,26 +4024,13 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) | |||
4058 | struct cgroup_event *event, *tmp; | 4024 | struct cgroup_event *event, *tmp; |
4059 | struct cgroup_subsys *ss; | 4025 | struct cgroup_subsys *ss; |
4060 | 4026 | ||
4061 | /* | ||
4062 | * In general, subsystem has no css->refcnt after pre_destroy(). But | ||
4063 | * in racy cases, subsystem may have to get css->refcnt after | ||
4064 | * pre_destroy() and it makes rmdir return with -EBUSY. This sometimes | ||
4065 | * make rmdir return -EBUSY too often. To avoid that, we use waitqueue | ||
4066 | * for cgroup's rmdir. CGRP_WAIT_ON_RMDIR is for synchronizing rmdir | ||
4067 | * and subsystem's reference count handling. Please see css_get/put | ||
4068 | * and css_tryget() and cgroup_wakeup_rmdir_waiter() implementation. | ||
4069 | */ | ||
4070 | set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags); | ||
4071 | |||
4072 | /* the vfs holds both inode->i_mutex already */ | 4027 | /* the vfs holds both inode->i_mutex already */ |
4073 | mutex_lock(&cgroup_mutex); | 4028 | mutex_lock(&cgroup_mutex); |
4074 | parent = cgrp->parent; | 4029 | parent = cgrp->parent; |
4075 | if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) { | 4030 | if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) { |
4076 | clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags); | ||
4077 | mutex_unlock(&cgroup_mutex); | 4031 | mutex_unlock(&cgroup_mutex); |
4078 | return -EBUSY; | 4032 | return -EBUSY; |
4079 | } | 4033 | } |
4080 | prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE); | ||
4081 | 4034 | ||
4082 | /* | 4035 | /* |
4083 | * Block new css_tryget() by deactivating refcnt and mark @cgrp | 4036 | * Block new css_tryget() by deactivating refcnt and mark @cgrp |
@@ -4114,9 +4067,6 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) | |||
4114 | for_each_subsys(cgrp->root, ss) | 4067 | for_each_subsys(cgrp->root, ss) |
4115 | css_put(cgrp->subsys[ss->subsys_id]); | 4068 | css_put(cgrp->subsys[ss->subsys_id]); |
4116 | 4069 | ||
4117 | finish_wait(&cgroup_rmdir_waitq, &wait); | ||
4118 | clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags); | ||
4119 | |||
4120 | raw_spin_lock(&release_list_lock); | 4070 | raw_spin_lock(&release_list_lock); |
4121 | if (!list_empty(&cgrp->release_list)) | 4071 | if (!list_empty(&cgrp->release_list)) |
4122 | list_del_init(&cgrp->release_list); | 4072 | list_del_init(&cgrp->release_list); |
@@ -4864,7 +4814,6 @@ void __css_put(struct cgroup_subsys_state *css) | |||
4864 | set_bit(CGRP_RELEASABLE, &cgrp->flags); | 4814 | set_bit(CGRP_RELEASABLE, &cgrp->flags); |
4865 | check_for_release(cgrp); | 4815 | check_for_release(cgrp); |
4866 | } | 4816 | } |
4867 | cgroup_wakeup_rmdir_waiter(cgrp); | ||
4868 | break; | 4817 | break; |
4869 | case 0: | 4818 | case 0: |
4870 | schedule_work(&css->dput_work); | 4819 | schedule_work(&css->dput_work); |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 37c356646544..930edfaa5187 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -2681,13 +2681,6 @@ static int mem_cgroup_move_account(struct page *page, | |||
2681 | /* caller should have done css_get */ | 2681 | /* caller should have done css_get */ |
2682 | pc->mem_cgroup = to; | 2682 | pc->mem_cgroup = to; |
2683 | mem_cgroup_charge_statistics(to, anon, nr_pages); | 2683 | mem_cgroup_charge_statistics(to, anon, nr_pages); |
2684 | /* | ||
2685 | * We charges against "to" which may not have any tasks. Then, "to" | ||
2686 | * can be under rmdir(). But in current implementation, caller of | ||
2687 | * this function is just force_empty() and move charge, so it's | ||
2688 | * guaranteed that "to" is never removed. So, we don't check rmdir | ||
2689 | * status here. | ||
2690 | */ | ||
2691 | move_unlock_mem_cgroup(from, &flags); | 2684 | move_unlock_mem_cgroup(from, &flags); |
2692 | ret = 0; | 2685 | ret = 0; |
2693 | unlock: | 2686 | unlock: |
@@ -2893,7 +2886,6 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg, | |||
2893 | return; | 2886 | return; |
2894 | if (!memcg) | 2887 | if (!memcg) |
2895 | return; | 2888 | return; |
2896 | cgroup_exclude_rmdir(&memcg->css); | ||
2897 | 2889 | ||
2898 | __mem_cgroup_commit_charge(memcg, page, 1, ctype, true); | 2890 | __mem_cgroup_commit_charge(memcg, page, 1, ctype, true); |
2899 | /* | 2891 | /* |
@@ -2907,12 +2899,6 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg, | |||
2907 | swp_entry_t ent = {.val = page_private(page)}; | 2899 | swp_entry_t ent = {.val = page_private(page)}; |
2908 | mem_cgroup_uncharge_swap(ent); | 2900 | mem_cgroup_uncharge_swap(ent); |
2909 | } | 2901 | } |
2910 | /* | ||
2911 | * At swapin, we may charge account against cgroup which has no tasks. | ||
2912 | * So, rmdir()->pre_destroy() can be called while we do this charge. | ||
2913 | * In that case, we need to call pre_destroy() again. check it here. | ||
2914 | */ | ||
2915 | cgroup_release_and_wakeup_rmdir(&memcg->css); | ||
2916 | } | 2902 | } |
2917 | 2903 | ||
2918 | void mem_cgroup_commit_charge_swapin(struct page *page, | 2904 | void mem_cgroup_commit_charge_swapin(struct page *page, |
@@ -3360,8 +3346,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg, | |||
3360 | 3346 | ||
3361 | if (!memcg) | 3347 | if (!memcg) |
3362 | return; | 3348 | return; |
3363 | /* blocks rmdir() */ | 3349 | |
3364 | cgroup_exclude_rmdir(&memcg->css); | ||
3365 | if (!migration_ok) { | 3350 | if (!migration_ok) { |
3366 | used = oldpage; | 3351 | used = oldpage; |
3367 | unused = newpage; | 3352 | unused = newpage; |
@@ -3395,13 +3380,6 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg, | |||
3395 | */ | 3380 | */ |
3396 | if (anon) | 3381 | if (anon) |
3397 | mem_cgroup_uncharge_page(used); | 3382 | mem_cgroup_uncharge_page(used); |
3398 | /* | ||
3399 | * At migration, we may charge account against cgroup which has no | ||
3400 | * tasks. | ||
3401 | * So, rmdir()->pre_destroy() can be called while we do this charge. | ||
3402 | * In that case, we need to call pre_destroy() again. check it here. | ||
3403 | */ | ||
3404 | cgroup_release_and_wakeup_rmdir(&memcg->css); | ||
3405 | } | 3383 | } |
3406 | 3384 | ||
3407 | /* | 3385 | /* |