aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/cgroup.h34
-rw-r--r--kernel/cgroup.c107
2 files changed, 119 insertions, 22 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 2357733a0a80..88e863460726 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -141,15 +141,36 @@ enum {
141 CGRP_WAIT_ON_RMDIR, 141 CGRP_WAIT_ON_RMDIR,
142}; 142};
143 143
144/* which pidlist file are we talking about? */
145enum cgroup_filetype {
146 CGROUP_FILE_PROCS,
147 CGROUP_FILE_TASKS,
148};
149
150/*
151 * A pidlist is a list of pids that virtually represents the contents of one
152 * of the cgroup files ("procs" or "tasks"). We keep a list of such pidlists,
153 * a pair (one each for procs, tasks) for each pid namespace that's relevant
154 * to the cgroup.
155 */
144struct cgroup_pidlist { 156struct cgroup_pidlist {
145 /* protects the other fields */ 157 /*
146 struct rw_semaphore mutex; 158 * used to find which pidlist is wanted. doesn't change as long as
159 * this particular list stays in the list.
160 */
161 struct { enum cgroup_filetype type; struct pid_namespace *ns; } key;
147 /* array of xids */ 162 /* array of xids */
148 pid_t *list; 163 pid_t *list;
149 /* how many elements the above list has */ 164 /* how many elements the above list has */
150 int length; 165 int length;
151 /* how many files are using the current array */ 166 /* how many files are using the current array */
152 int use_count; 167 int use_count;
168 /* each of these stored in a list by its cgroup */
169 struct list_head links;
170 /* pointer to the cgroup we belong to, for list removal purposes */
171 struct cgroup *owner;
172 /* protects the other fields */
173 struct rw_semaphore mutex;
153}; 174};
154 175
155struct cgroup { 176struct cgroup {
@@ -190,9 +211,12 @@ struct cgroup {
190 */ 211 */
191 struct list_head release_list; 212 struct list_head release_list;
192 213
193 /* we will have two separate pidlists, one for pids (the tasks file) 214 /*
194 * and one for tgids (the procs file). */ 215 * list of pidlists, up to two for each namespace (one for procs, one
195 struct cgroup_pidlist tasks, procs; 216 * for tasks); created on demand.
217 */
218 struct list_head pidlists;
219 struct mutex pidlist_mutex;
196 220
197 /* For RCU-protected deletion */ 221 /* For RCU-protected deletion */
198 struct rcu_head rcu_head; 222 struct rcu_head rcu_head;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a9433f50e53d..97194ba12014 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -776,6 +776,12 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
776 */ 776 */
777 deactivate_super(cgrp->root->sb); 777 deactivate_super(cgrp->root->sb);
778 778
779 /*
780 * if we're getting rid of the cgroup, refcount should ensure
781 * that there are no pidlists left.
782 */
783 BUG_ON(!list_empty(&cgrp->pidlists));
784
779 call_rcu(&cgrp->rcu_head, free_cgroup_rcu); 785 call_rcu(&cgrp->rcu_head, free_cgroup_rcu);
780 } 786 }
781 iput(inode); 787 iput(inode);
@@ -1121,8 +1127,8 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
1121 INIT_LIST_HEAD(&cgrp->children); 1127 INIT_LIST_HEAD(&cgrp->children);
1122 INIT_LIST_HEAD(&cgrp->css_sets); 1128 INIT_LIST_HEAD(&cgrp->css_sets);
1123 INIT_LIST_HEAD(&cgrp->release_list); 1129 INIT_LIST_HEAD(&cgrp->release_list);
1124 init_rwsem(&(cgrp->tasks.mutex)); 1130 INIT_LIST_HEAD(&cgrp->pidlists);
1125 init_rwsem(&(cgrp->procs.mutex)); 1131 mutex_init(&cgrp->pidlist_mutex);
1126} 1132}
1127 1133
1128static void init_cgroup_root(struct cgroupfs_root *root) 1134static void init_cgroup_root(struct cgroupfs_root *root)
@@ -2396,9 +2402,59 @@ static int cmppid(const void *a, const void *b)
2396} 2402}
2397 2403
2398/* 2404/*
2405 * find the appropriate pidlist for our purpose (given procs vs tasks)
2406 * returns with the lock on that pidlist already held, and takes care
2407 * of the use count, or returns NULL with no locks held if we're out of
2408 * memory.
2409 */
2410static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
2411 enum cgroup_filetype type)
2412{
2413 struct cgroup_pidlist *l;
2414 /* don't need task_nsproxy() if we're looking at ourself */
2415 struct pid_namespace *ns = get_pid_ns(current->nsproxy->pid_ns);
2416 /*
2417 * We can't drop the pidlist_mutex before taking the l->mutex in case
2418 * the last ref-holder is trying to remove l from the list at the same
2419 * time. Holding the pidlist_mutex precludes somebody taking whichever
2420 * list we find out from under us - compare release_pid_array().
2421 */
2422 mutex_lock(&cgrp->pidlist_mutex);
2423 list_for_each_entry(l, &cgrp->pidlists, links) {
2424 if (l->key.type == type && l->key.ns == ns) {
2425 /* found a matching list - drop the extra refcount */
2426 put_pid_ns(ns);
2427 /* make sure l doesn't vanish out from under us */
2428 down_write(&l->mutex);
2429 mutex_unlock(&cgrp->pidlist_mutex);
2430 l->use_count++;
2431 return l;
2432 }
2433 }
2434 /* entry not found; create a new one */
2435 l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
2436 if (!l) {
2437 mutex_unlock(&cgrp->pidlist_mutex);
2438 put_pid_ns(ns);
2439 return l;
2440 }
2441 init_rwsem(&l->mutex);
2442 down_write(&l->mutex);
2443 l->key.type = type;
2444 l->key.ns = ns;
2445 l->use_count = 0; /* don't increment here */
2446 l->list = NULL;
2447 l->owner = cgrp;
2448 list_add(&l->links, &cgrp->pidlists);
2449 mutex_unlock(&cgrp->pidlist_mutex);
2450 return l;
2451}
2452
2453/*
2399 * Load a cgroup's pidarray with either procs' tgids or tasks' pids 2454 * Load a cgroup's pidarray with either procs' tgids or tasks' pids
2400 */ 2455 */
2401static int pidlist_array_load(struct cgroup *cgrp, bool procs) 2456static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
2457 struct cgroup_pidlist **lp)
2402{ 2458{
2403 pid_t *array; 2459 pid_t *array;
2404 int length; 2460 int length;
@@ -2423,7 +2479,10 @@ static int pidlist_array_load(struct cgroup *cgrp, bool procs)
2423 if (unlikely(n == length)) 2479 if (unlikely(n == length))
2424 break; 2480 break;
2425 /* get tgid or pid for procs or tasks file respectively */ 2481 /* get tgid or pid for procs or tasks file respectively */
2426 pid = (procs ? task_tgid_vnr(tsk) : task_pid_vnr(tsk)); 2482 if (type == CGROUP_FILE_PROCS)
2483 pid = task_tgid_vnr(tsk);
2484 else
2485 pid = task_pid_vnr(tsk);
2427 if (pid > 0) /* make sure to only use valid results */ 2486 if (pid > 0) /* make sure to only use valid results */
2428 array[n++] = pid; 2487 array[n++] = pid;
2429 } 2488 }
@@ -2431,19 +2490,20 @@ static int pidlist_array_load(struct cgroup *cgrp, bool procs)
2431 length = n; 2490 length = n;
2432 /* now sort & (if procs) strip out duplicates */ 2491 /* now sort & (if procs) strip out duplicates */
2433 sort(array, length, sizeof(pid_t), cmppid, NULL); 2492 sort(array, length, sizeof(pid_t), cmppid, NULL);
2434 if (procs) { 2493 if (type == CGROUP_FILE_PROCS)
2435 length = pidlist_uniq(&array, length); 2494 length = pidlist_uniq(&array, length);
2436 l = &(cgrp->procs); 2495 l = cgroup_pidlist_find(cgrp, type);
2437 } else { 2496 if (!l) {
2438 l = &(cgrp->tasks); 2497 kfree(array);
2498 return -ENOMEM;
2439 } 2499 }
2440 /* store array in cgroup, freeing old if necessary */ 2500 /* store array, freeing old if necessary - lock already held */
2441 down_write(&l->mutex);
2442 kfree(l->list); 2501 kfree(l->list);
2443 l->list = array; 2502 l->list = array;
2444 l->length = length; 2503 l->length = length;
2445 l->use_count++; 2504 l->use_count++;
2446 up_write(&l->mutex); 2505 up_write(&l->mutex);
2506 *lp = l;
2447 return 0; 2507 return 0;
2448} 2508}
2449 2509
@@ -2586,13 +2646,26 @@ static const struct seq_operations cgroup_pidlist_seq_operations = {
2586 2646
2587static void cgroup_release_pid_array(struct cgroup_pidlist *l) 2647static void cgroup_release_pid_array(struct cgroup_pidlist *l)
2588{ 2648{
2649 /*
2650 * the case where we're the last user of this particular pidlist will
2651 * have us remove it from the cgroup's list, which entails taking the
2652 * mutex. since in pidlist_find the pidlist->lock depends on cgroup->
2653 * pidlist_mutex, we have to take pidlist_mutex first.
2654 */
2655 mutex_lock(&l->owner->pidlist_mutex);
2589 down_write(&l->mutex); 2656 down_write(&l->mutex);
2590 BUG_ON(!l->use_count); 2657 BUG_ON(!l->use_count);
2591 if (!--l->use_count) { 2658 if (!--l->use_count) {
2659 /* we're the last user if refcount is 0; remove and free */
2660 list_del(&l->links);
2661 mutex_unlock(&l->owner->pidlist_mutex);
2592 kfree(l->list); 2662 kfree(l->list);
2593 l->list = NULL; 2663 put_pid_ns(l->key.ns);
2594 l->length = 0; 2664 up_write(&l->mutex);
2665 kfree(l);
2666 return;
2595 } 2667 }
2668 mutex_unlock(&l->owner->pidlist_mutex);
2596 up_write(&l->mutex); 2669 up_write(&l->mutex);
2597} 2670}
2598 2671
@@ -2623,10 +2696,10 @@ static const struct file_operations cgroup_pidlist_operations = {
2623 * in the cgroup. 2696 * in the cgroup.
2624 */ 2697 */
2625/* helper function for the two below it */ 2698/* helper function for the two below it */
2626static int cgroup_pidlist_open(struct file *file, bool procs) 2699static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type)
2627{ 2700{
2628 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); 2701 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2629 struct cgroup_pidlist *l = (procs ? &cgrp->procs : &cgrp->tasks); 2702 struct cgroup_pidlist *l;
2630 int retval; 2703 int retval;
2631 2704
2632 /* Nothing to do for write-only files */ 2705 /* Nothing to do for write-only files */
@@ -2634,7 +2707,7 @@ static int cgroup_pidlist_open(struct file *file, bool procs)
2634 return 0; 2707 return 0;
2635 2708
2636 /* have the array populated */ 2709 /* have the array populated */
2637 retval = pidlist_array_load(cgrp, procs); 2710 retval = pidlist_array_load(cgrp, type, &l);
2638 if (retval) 2711 if (retval)
2639 return retval; 2712 return retval;
2640 /* configure file information */ 2713 /* configure file information */
@@ -2650,11 +2723,11 @@ static int cgroup_pidlist_open(struct file *file, bool procs)
2650} 2723}
2651static int cgroup_tasks_open(struct inode *unused, struct file *file) 2724static int cgroup_tasks_open(struct inode *unused, struct file *file)
2652{ 2725{
2653 return cgroup_pidlist_open(file, false); 2726 return cgroup_pidlist_open(file, CGROUP_FILE_TASKS);
2654} 2727}
2655static int cgroup_procs_open(struct inode *unused, struct file *file) 2728static int cgroup_procs_open(struct inode *unused, struct file *file)
2656{ 2729{
2657 return cgroup_pidlist_open(file, true); 2730 return cgroup_pidlist_open(file, CGROUP_FILE_PROCS);
2658} 2731}
2659 2732
2660static u64 cgroup_read_notify_on_release(struct cgroup *cgrp, 2733static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,