aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r--kernel/cgroup.c184
1 files changed, 53 insertions, 131 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 753df01a9831..136eceadeed1 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -185,7 +185,6 @@ static int need_forkexit_callback __read_mostly;
185static struct cftype cgroup_dfl_base_files[]; 185static struct cftype cgroup_dfl_base_files[];
186static struct cftype cgroup_legacy_base_files[]; 186static struct cftype cgroup_legacy_base_files[];
187 187
188static void cgroup_put(struct cgroup *cgrp);
189static int rebind_subsystems(struct cgroup_root *dst_root, 188static int rebind_subsystems(struct cgroup_root *dst_root,
190 unsigned int ss_mask); 189 unsigned int ss_mask);
191static int cgroup_destroy_locked(struct cgroup *cgrp); 190static int cgroup_destroy_locked(struct cgroup *cgrp);
@@ -195,7 +194,6 @@ static void css_release(struct percpu_ref *ref);
195static void kill_css(struct cgroup_subsys_state *css); 194static void kill_css(struct cgroup_subsys_state *css);
196static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], 195static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
197 bool is_add); 196 bool is_add);
198static void cgroup_pidlist_destroy_all(struct cgroup *cgrp);
199 197
200/* IDR wrappers which synchronize using cgroup_idr_lock */ 198/* IDR wrappers which synchronize using cgroup_idr_lock */
201static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end, 199static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
@@ -331,14 +329,6 @@ bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor)
331 return false; 329 return false;
332} 330}
333 331
334static int cgroup_is_releasable(const struct cgroup *cgrp)
335{
336 const int bits =
337 (1 << CGRP_RELEASABLE) |
338 (1 << CGRP_NOTIFY_ON_RELEASE);
339 return (cgrp->flags & bits) == bits;
340}
341
342static int notify_on_release(const struct cgroup *cgrp) 332static int notify_on_release(const struct cgroup *cgrp)
343{ 333{
344 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); 334 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
@@ -394,12 +384,7 @@ static int notify_on_release(const struct cgroup *cgrp)
394 ; \ 384 ; \
395 else 385 else
396 386
397/* the list of cgroups eligible for automatic release. Protected by
398 * release_list_lock */
399static LIST_HEAD(release_list);
400static DEFINE_RAW_SPINLOCK(release_list_lock);
401static void cgroup_release_agent(struct work_struct *work); 387static void cgroup_release_agent(struct work_struct *work);
402static DECLARE_WORK(release_agent_work, cgroup_release_agent);
403static void check_for_release(struct cgroup *cgrp); 388static void check_for_release(struct cgroup *cgrp);
404 389
405/* 390/*
@@ -498,7 +483,7 @@ static unsigned long css_set_hash(struct cgroup_subsys_state *css[])
498 return key; 483 return key;
499} 484}
500 485
501static void put_css_set_locked(struct css_set *cset, bool taskexit) 486static void put_css_set_locked(struct css_set *cset)
502{ 487{
503 struct cgrp_cset_link *link, *tmp_link; 488 struct cgrp_cset_link *link, *tmp_link;
504 struct cgroup_subsys *ss; 489 struct cgroup_subsys *ss;
@@ -524,11 +509,7 @@ static void put_css_set_locked(struct css_set *cset, bool taskexit)
524 /* @cgrp can't go away while we're holding css_set_rwsem */ 509 /* @cgrp can't go away while we're holding css_set_rwsem */
525 if (list_empty(&cgrp->cset_links)) { 510 if (list_empty(&cgrp->cset_links)) {
526 cgroup_update_populated(cgrp, false); 511 cgroup_update_populated(cgrp, false);
527 if (notify_on_release(cgrp)) { 512 check_for_release(cgrp);
528 if (taskexit)
529 set_bit(CGRP_RELEASABLE, &cgrp->flags);
530 check_for_release(cgrp);
531 }
532 } 513 }
533 514
534 kfree(link); 515 kfree(link);
@@ -537,7 +518,7 @@ static void put_css_set_locked(struct css_set *cset, bool taskexit)
537 kfree_rcu(cset, rcu_head); 518 kfree_rcu(cset, rcu_head);
538} 519}
539 520
540static void put_css_set(struct css_set *cset, bool taskexit) 521static void put_css_set(struct css_set *cset)
541{ 522{
542 /* 523 /*
543 * Ensure that the refcount doesn't hit zero while any readers 524 * Ensure that the refcount doesn't hit zero while any readers
@@ -548,7 +529,7 @@ static void put_css_set(struct css_set *cset, bool taskexit)
548 return; 529 return;
549 530
550 down_write(&css_set_rwsem); 531 down_write(&css_set_rwsem);
551 put_css_set_locked(cset, taskexit); 532 put_css_set_locked(cset);
552 up_write(&css_set_rwsem); 533 up_write(&css_set_rwsem);
553} 534}
554 535
@@ -969,14 +950,6 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task,
969 * knows that the cgroup won't be removed, as cgroup_rmdir() 950 * knows that the cgroup won't be removed, as cgroup_rmdir()
970 * needs that mutex. 951 * needs that mutex.
971 * 952 *
972 * The fork and exit callbacks cgroup_fork() and cgroup_exit(), don't
973 * (usually) take cgroup_mutex. These are the two most performance
974 * critical pieces of code here. The exception occurs on cgroup_exit(),
975 * when a task in a notify_on_release cgroup exits. Then cgroup_mutex
976 * is taken, and if the cgroup count is zero, a usermode call made
977 * to the release agent with the name of the cgroup (path relative to
978 * the root of cgroup file system) as the argument.
979 *
980 * A cgroup can only be deleted if both its 'count' of using tasks 953 * A cgroup can only be deleted if both its 'count' of using tasks
981 * is zero, and its list of 'children' cgroups is empty. Since all 954 * is zero, and its list of 'children' cgroups is empty. Since all
982 * tasks in the system use _some_ cgroup, and since there is always at 955 * tasks in the system use _some_ cgroup, and since there is always at
@@ -1587,7 +1560,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
1587 INIT_LIST_HEAD(&cgrp->self.sibling); 1560 INIT_LIST_HEAD(&cgrp->self.sibling);
1588 INIT_LIST_HEAD(&cgrp->self.children); 1561 INIT_LIST_HEAD(&cgrp->self.children);
1589 INIT_LIST_HEAD(&cgrp->cset_links); 1562 INIT_LIST_HEAD(&cgrp->cset_links);
1590 INIT_LIST_HEAD(&cgrp->release_list);
1591 INIT_LIST_HEAD(&cgrp->pidlists); 1563 INIT_LIST_HEAD(&cgrp->pidlists);
1592 mutex_init(&cgrp->pidlist_mutex); 1564 mutex_init(&cgrp->pidlist_mutex);
1593 cgrp->self.cgroup = cgrp; 1565 cgrp->self.cgroup = cgrp;
@@ -1597,6 +1569,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
1597 INIT_LIST_HEAD(&cgrp->e_csets[ssid]); 1569 INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
1598 1570
1599 init_waitqueue_head(&cgrp->offline_waitq); 1571 init_waitqueue_head(&cgrp->offline_waitq);
1572 INIT_WORK(&cgrp->release_agent_work, cgroup_release_agent);
1600} 1573}
1601 1574
1602static void init_cgroup_root(struct cgroup_root *root, 1575static void init_cgroup_root(struct cgroup_root *root,
@@ -2053,8 +2026,7 @@ static void cgroup_task_migrate(struct cgroup *old_cgrp,
2053 * task. As trading it for new_cset is protected by cgroup_mutex, 2026 * task. As trading it for new_cset is protected by cgroup_mutex,
2054 * we're safe to drop it here; it will be freed under RCU. 2027 * we're safe to drop it here; it will be freed under RCU.
2055 */ 2028 */
2056 set_bit(CGRP_RELEASABLE, &old_cgrp->flags); 2029 put_css_set_locked(old_cset);
2057 put_css_set_locked(old_cset, false);
2058} 2030}
2059 2031
2060/** 2032/**
@@ -2075,7 +2047,7 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets)
2075 cset->mg_src_cgrp = NULL; 2047 cset->mg_src_cgrp = NULL;
2076 cset->mg_dst_cset = NULL; 2048 cset->mg_dst_cset = NULL;
2077 list_del_init(&cset->mg_preload_node); 2049 list_del_init(&cset->mg_preload_node);
2078 put_css_set_locked(cset, false); 2050 put_css_set_locked(cset);
2079 } 2051 }
2080 up_write(&css_set_rwsem); 2052 up_write(&css_set_rwsem);
2081} 2053}
@@ -2169,8 +2141,8 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp,
2169 if (src_cset == dst_cset) { 2141 if (src_cset == dst_cset) {
2170 src_cset->mg_src_cgrp = NULL; 2142 src_cset->mg_src_cgrp = NULL;
2171 list_del_init(&src_cset->mg_preload_node); 2143 list_del_init(&src_cset->mg_preload_node);
2172 put_css_set(src_cset, false); 2144 put_css_set(src_cset);
2173 put_css_set(dst_cset, false); 2145 put_css_set(dst_cset);
2174 continue; 2146 continue;
2175 } 2147 }
2176 2148
@@ -2179,7 +2151,7 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp,
2179 if (list_empty(&dst_cset->mg_preload_node)) 2151 if (list_empty(&dst_cset->mg_preload_node))
2180 list_add(&dst_cset->mg_preload_node, &csets); 2152 list_add(&dst_cset->mg_preload_node, &csets);
2181 else 2153 else
2182 put_css_set(dst_cset, false); 2154 put_css_set(dst_cset);
2183 } 2155 }
2184 2156
2185 list_splice_tail(&csets, preloaded_csets); 2157 list_splice_tail(&csets, preloaded_csets);
@@ -4174,7 +4146,6 @@ static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css,
4174static int cgroup_write_notify_on_release(struct cgroup_subsys_state *css, 4146static int cgroup_write_notify_on_release(struct cgroup_subsys_state *css,
4175 struct cftype *cft, u64 val) 4147 struct cftype *cft, u64 val)
4176{ 4148{
4177 clear_bit(CGRP_RELEASABLE, &css->cgroup->flags);
4178 if (val) 4149 if (val)
4179 set_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags); 4150 set_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags);
4180 else 4151 else
@@ -4352,6 +4323,7 @@ static void css_free_work_fn(struct work_struct *work)
4352 /* cgroup free path */ 4323 /* cgroup free path */
4353 atomic_dec(&cgrp->root->nr_cgrps); 4324 atomic_dec(&cgrp->root->nr_cgrps);
4354 cgroup_pidlist_destroy_all(cgrp); 4325 cgroup_pidlist_destroy_all(cgrp);
4326 cancel_work_sync(&cgrp->release_agent_work);
4355 4327
4356 if (cgroup_parent(cgrp)) { 4328 if (cgroup_parent(cgrp)) {
4357 /* 4329 /*
@@ -4814,19 +4786,12 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
4814 for_each_css(css, ssid, cgrp) 4786 for_each_css(css, ssid, cgrp)
4815 kill_css(css); 4787 kill_css(css);
4816 4788
4817 /* CSS_ONLINE is clear, remove from ->release_list for the last time */
4818 raw_spin_lock(&release_list_lock);
4819 if (!list_empty(&cgrp->release_list))
4820 list_del_init(&cgrp->release_list);
4821 raw_spin_unlock(&release_list_lock);
4822
4823 /* 4789 /*
4824 * Remove @cgrp directory along with the base files. @cgrp has an 4790 * Remove @cgrp directory along with the base files. @cgrp has an
4825 * extra ref on its kn. 4791 * extra ref on its kn.
4826 */ 4792 */
4827 kernfs_remove(cgrp->kn); 4793 kernfs_remove(cgrp->kn);
4828 4794
4829 set_bit(CGRP_RELEASABLE, &cgroup_parent(cgrp)->flags);
4830 check_for_release(cgroup_parent(cgrp)); 4795 check_for_release(cgroup_parent(cgrp));
4831 4796
4832 /* put the base reference */ 4797 /* put the base reference */
@@ -4843,13 +4808,10 @@ static int cgroup_rmdir(struct kernfs_node *kn)
4843 cgrp = cgroup_kn_lock_live(kn); 4808 cgrp = cgroup_kn_lock_live(kn);
4844 if (!cgrp) 4809 if (!cgrp)
4845 return 0; 4810 return 0;
4846 cgroup_get(cgrp); /* for @kn->priv clearing */
4847 4811
4848 ret = cgroup_destroy_locked(cgrp); 4812 ret = cgroup_destroy_locked(cgrp);
4849 4813
4850 cgroup_kn_unlock(kn); 4814 cgroup_kn_unlock(kn);
4851
4852 cgroup_put(cgrp);
4853 return ret; 4815 return ret;
4854} 4816}
4855 4817
@@ -5053,12 +5015,9 @@ core_initcall(cgroup_wq_init);
5053 * - Print task's cgroup paths into seq_file, one line for each hierarchy 5015 * - Print task's cgroup paths into seq_file, one line for each hierarchy
5054 * - Used for /proc/<pid>/cgroup. 5016 * - Used for /proc/<pid>/cgroup.
5055 */ 5017 */
5056 5018int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
5057/* TODO: Use a proper seq_file iterator */ 5019 struct pid *pid, struct task_struct *tsk)
5058int proc_cgroup_show(struct seq_file *m, void *v)
5059{ 5020{
5060 struct pid *pid;
5061 struct task_struct *tsk;
5062 char *buf, *path; 5021 char *buf, *path;
5063 int retval; 5022 int retval;
5064 struct cgroup_root *root; 5023 struct cgroup_root *root;
@@ -5068,14 +5027,6 @@ int proc_cgroup_show(struct seq_file *m, void *v)
5068 if (!buf) 5027 if (!buf)
5069 goto out; 5028 goto out;
5070 5029
5071 retval = -ESRCH;
5072 pid = m->private;
5073 tsk = get_pid_task(pid, PIDTYPE_PID);
5074 if (!tsk)
5075 goto out_free;
5076
5077 retval = 0;
5078
5079 mutex_lock(&cgroup_mutex); 5030 mutex_lock(&cgroup_mutex);
5080 down_read(&css_set_rwsem); 5031 down_read(&css_set_rwsem);
5081 5032
@@ -5105,11 +5056,10 @@ int proc_cgroup_show(struct seq_file *m, void *v)
5105 seq_putc(m, '\n'); 5056 seq_putc(m, '\n');
5106 } 5057 }
5107 5058
5059 retval = 0;
5108out_unlock: 5060out_unlock:
5109 up_read(&css_set_rwsem); 5061 up_read(&css_set_rwsem);
5110 mutex_unlock(&cgroup_mutex); 5062 mutex_unlock(&cgroup_mutex);
5111 put_task_struct(tsk);
5112out_free:
5113 kfree(buf); 5063 kfree(buf);
5114out: 5064out:
5115 return retval; 5065 return retval;
@@ -5180,7 +5130,7 @@ void cgroup_post_fork(struct task_struct *child)
5180 int i; 5130 int i;
5181 5131
5182 /* 5132 /*
5183 * This may race against cgroup_enable_task_cg_links(). As that 5133 * This may race against cgroup_enable_task_cg_lists(). As that
5184 * function sets use_task_css_set_links before grabbing 5134 * function sets use_task_css_set_links before grabbing
5185 * tasklist_lock and we just went through tasklist_lock to add 5135 * tasklist_lock and we just went through tasklist_lock to add
5186 * @child, it's guaranteed that either we see the set 5136 * @child, it's guaranteed that either we see the set
@@ -5195,7 +5145,7 @@ void cgroup_post_fork(struct task_struct *child)
5195 * when implementing operations which need to migrate all tasks of 5145 * when implementing operations which need to migrate all tasks of
5196 * a cgroup to another. 5146 * a cgroup to another.
5197 * 5147 *
5198 * Note that if we lose to cgroup_enable_task_cg_links(), @child 5148 * Note that if we lose to cgroup_enable_task_cg_lists(), @child
5199 * will remain in init_css_set. This is safe because all tasks are 5149 * will remain in init_css_set. This is safe because all tasks are
5200 * in the init_css_set before cg_links is enabled and there's no 5150 * in the init_css_set before cg_links is enabled and there's no
5201 * operation which transfers all tasks out of init_css_set. 5151 * operation which transfers all tasks out of init_css_set.
@@ -5279,30 +5229,14 @@ void cgroup_exit(struct task_struct *tsk)
5279 } 5229 }
5280 5230
5281 if (put_cset) 5231 if (put_cset)
5282 put_css_set(cset, true); 5232 put_css_set(cset);
5283} 5233}
5284 5234
5285static void check_for_release(struct cgroup *cgrp) 5235static void check_for_release(struct cgroup *cgrp)
5286{ 5236{
5287 if (cgroup_is_releasable(cgrp) && list_empty(&cgrp->cset_links) && 5237 if (notify_on_release(cgrp) && !cgroup_has_tasks(cgrp) &&
5288 !css_has_online_children(&cgrp->self)) { 5238 !css_has_online_children(&cgrp->self) && !cgroup_is_dead(cgrp))
5289 /* 5239 schedule_work(&cgrp->release_agent_work);
5290 * Control Group is currently removeable. If it's not
5291 * already queued for a userspace notification, queue
5292 * it now
5293 */
5294 int need_schedule_work = 0;
5295
5296 raw_spin_lock(&release_list_lock);
5297 if (!cgroup_is_dead(cgrp) &&
5298 list_empty(&cgrp->release_list)) {
5299 list_add(&cgrp->release_list, &release_list);
5300 need_schedule_work = 1;
5301 }
5302 raw_spin_unlock(&release_list_lock);
5303 if (need_schedule_work)
5304 schedule_work(&release_agent_work);
5305 }
5306} 5240}
5307 5241
5308/* 5242/*
@@ -5330,52 +5264,39 @@ static void check_for_release(struct cgroup *cgrp)
5330 */ 5264 */
5331static void cgroup_release_agent(struct work_struct *work) 5265static void cgroup_release_agent(struct work_struct *work)
5332{ 5266{
5333 BUG_ON(work != &release_agent_work); 5267 struct cgroup *cgrp =
5268 container_of(work, struct cgroup, release_agent_work);
5269 char *pathbuf = NULL, *agentbuf = NULL, *path;
5270 char *argv[3], *envp[3];
5271
5334 mutex_lock(&cgroup_mutex); 5272 mutex_lock(&cgroup_mutex);
5335 raw_spin_lock(&release_list_lock); 5273
5336 while (!list_empty(&release_list)) { 5274 pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
5337 char *argv[3], *envp[3]; 5275 agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
5338 int i; 5276 if (!pathbuf || !agentbuf)
5339 char *pathbuf = NULL, *agentbuf = NULL, *path; 5277 goto out;
5340 struct cgroup *cgrp = list_entry(release_list.next, 5278
5341 struct cgroup, 5279 path = cgroup_path(cgrp, pathbuf, PATH_MAX);
5342 release_list); 5280 if (!path)
5343 list_del_init(&cgrp->release_list); 5281 goto out;
5344 raw_spin_unlock(&release_list_lock); 5282
5345 pathbuf = kmalloc(PATH_MAX, GFP_KERNEL); 5283 argv[0] = agentbuf;
5346 if (!pathbuf) 5284 argv[1] = path;
5347 goto continue_free; 5285 argv[2] = NULL;
5348 path = cgroup_path(cgrp, pathbuf, PATH_MAX); 5286
5349 if (!path) 5287 /* minimal command environment */
5350 goto continue_free; 5288 envp[0] = "HOME=/";
5351 agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL); 5289 envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
5352 if (!agentbuf) 5290 envp[2] = NULL;
5353 goto continue_free; 5291
5354 5292 mutex_unlock(&cgroup_mutex);
5355 i = 0; 5293 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
5356 argv[i++] = agentbuf; 5294 goto out_free;
5357 argv[i++] = path; 5295out:
5358 argv[i] = NULL;
5359
5360 i = 0;
5361 /* minimal command environment */
5362 envp[i++] = "HOME=/";
5363 envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
5364 envp[i] = NULL;
5365
5366 /* Drop the lock while we invoke the usermode helper,
5367 * since the exec could involve hitting disk and hence
5368 * be a slow process */
5369 mutex_unlock(&cgroup_mutex);
5370 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
5371 mutex_lock(&cgroup_mutex);
5372 continue_free:
5373 kfree(pathbuf);
5374 kfree(agentbuf);
5375 raw_spin_lock(&release_list_lock);
5376 }
5377 raw_spin_unlock(&release_list_lock);
5378 mutex_unlock(&cgroup_mutex); 5296 mutex_unlock(&cgroup_mutex);
5297out_free:
5298 kfree(agentbuf);
5299 kfree(pathbuf);
5379} 5300}
5380 5301
5381static int __init cgroup_disable(char *str) 5302static int __init cgroup_disable(char *str)
@@ -5563,7 +5484,8 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
5563 5484
5564static u64 releasable_read(struct cgroup_subsys_state *css, struct cftype *cft) 5485static u64 releasable_read(struct cgroup_subsys_state *css, struct cftype *cft)
5565{ 5486{
5566 return test_bit(CGRP_RELEASABLE, &css->cgroup->flags); 5487 return (!cgroup_has_tasks(css->cgroup) &&
5488 !css_has_online_children(&css->cgroup->self));
5567} 5489}
5568 5490
5569static struct cftype debug_files[] = { 5491static struct cftype debug_files[] = {