diff options
-rw-r--r-- | include/linux/cgroup.h | 34 | ||||
-rw-r--r-- | kernel/cgroup.c | 107 |
2 files changed, 119 insertions, 22 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 2357733a0a80..88e863460726 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
@@ -141,15 +141,36 @@ enum { | |||
141 | CGRP_WAIT_ON_RMDIR, | 141 | CGRP_WAIT_ON_RMDIR, |
142 | }; | 142 | }; |
143 | 143 | ||
144 | /* which pidlist file are we talking about? */ | ||
145 | enum cgroup_filetype { | ||
146 | CGROUP_FILE_PROCS, | ||
147 | CGROUP_FILE_TASKS, | ||
148 | }; | ||
149 | |||
150 | /* | ||
151 | * A pidlist is a list of pids that virtually represents the contents of one | ||
152 | * of the cgroup files ("procs" or "tasks"). We keep a list of such pidlists, | ||
153 | * a pair (one each for procs, tasks) for each pid namespace that's relevant | ||
154 | * to the cgroup. | ||
155 | */ | ||
144 | struct cgroup_pidlist { | 156 | struct cgroup_pidlist { |
145 | /* protects the other fields */ | 157 | /* |
146 | struct rw_semaphore mutex; | 158 | * used to find which pidlist is wanted. doesn't change as long as |
159 | * this particular list stays in the list. | ||
160 | */ | ||
161 | struct { enum cgroup_filetype type; struct pid_namespace *ns; } key; | ||
147 | /* array of xids */ | 162 | /* array of xids */ |
148 | pid_t *list; | 163 | pid_t *list; |
149 | /* how many elements the above list has */ | 164 | /* how many elements the above list has */ |
150 | int length; | 165 | int length; |
151 | /* how many files are using the current array */ | 166 | /* how many files are using the current array */ |
152 | int use_count; | 167 | int use_count; |
168 | /* each of these stored in a list by its cgroup */ | ||
169 | struct list_head links; | ||
170 | /* pointer to the cgroup we belong to, for list removal purposes */ | ||
171 | struct cgroup *owner; | ||
172 | /* protects the other fields */ | ||
173 | struct rw_semaphore mutex; | ||
153 | }; | 174 | }; |
154 | 175 | ||
155 | struct cgroup { | 176 | struct cgroup { |
@@ -190,9 +211,12 @@ struct cgroup { | |||
190 | */ | 211 | */ |
191 | struct list_head release_list; | 212 | struct list_head release_list; |
192 | 213 | ||
193 | /* we will have two separate pidlists, one for pids (the tasks file) | 214 | /* |
194 | * and one for tgids (the procs file). */ | 215 | * list of pidlists, up to two for each namespace (one for procs, one |
195 | struct cgroup_pidlist tasks, procs; | 216 | * for tasks); created on demand. |
217 | */ | ||
218 | struct list_head pidlists; | ||
219 | struct mutex pidlist_mutex; | ||
196 | 220 | ||
197 | /* For RCU-protected deletion */ | 221 | /* For RCU-protected deletion */ |
198 | struct rcu_head rcu_head; | 222 | struct rcu_head rcu_head; |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index a9433f50e53d..97194ba12014 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -776,6 +776,12 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) | |||
776 | */ | 776 | */ |
777 | deactivate_super(cgrp->root->sb); | 777 | deactivate_super(cgrp->root->sb); |
778 | 778 | ||
779 | /* | ||
780 | * if we're getting rid of the cgroup, refcount should ensure | ||
781 | * that there are no pidlists left. | ||
782 | */ | ||
783 | BUG_ON(!list_empty(&cgrp->pidlists)); | ||
784 | |||
779 | call_rcu(&cgrp->rcu_head, free_cgroup_rcu); | 785 | call_rcu(&cgrp->rcu_head, free_cgroup_rcu); |
780 | } | 786 | } |
781 | iput(inode); | 787 | iput(inode); |
@@ -1121,8 +1127,8 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) | |||
1121 | INIT_LIST_HEAD(&cgrp->children); | 1127 | INIT_LIST_HEAD(&cgrp->children); |
1122 | INIT_LIST_HEAD(&cgrp->css_sets); | 1128 | INIT_LIST_HEAD(&cgrp->css_sets); |
1123 | INIT_LIST_HEAD(&cgrp->release_list); | 1129 | INIT_LIST_HEAD(&cgrp->release_list); |
1124 | init_rwsem(&(cgrp->tasks.mutex)); | 1130 | INIT_LIST_HEAD(&cgrp->pidlists); |
1125 | init_rwsem(&(cgrp->procs.mutex)); | 1131 | mutex_init(&cgrp->pidlist_mutex); |
1126 | } | 1132 | } |
1127 | 1133 | ||
1128 | static void init_cgroup_root(struct cgroupfs_root *root) | 1134 | static void init_cgroup_root(struct cgroupfs_root *root) |
@@ -2396,9 +2402,59 @@ static int cmppid(const void *a, const void *b) | |||
2396 | } | 2402 | } |
2397 | 2403 | ||
2398 | /* | 2404 | /* |
2405 | * find the appropriate pidlist for our purpose (given procs vs tasks) | ||
2406 | * returns with the lock on that pidlist already held, and takes care | ||
2407 | * of the use count, or returns NULL with no locks held if we're out of | ||
2408 | * memory. | ||
2409 | */ | ||
2410 | static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp, | ||
2411 | enum cgroup_filetype type) | ||
2412 | { | ||
2413 | struct cgroup_pidlist *l; | ||
2414 | /* don't need task_nsproxy() if we're looking at ourself */ | ||
2415 | struct pid_namespace *ns = get_pid_ns(current->nsproxy->pid_ns); | ||
2416 | /* | ||
2417 | * We can't drop the pidlist_mutex before taking the l->mutex in case | ||
2418 | * the last ref-holder is trying to remove l from the list at the same | ||
2419 | * time. Holding the pidlist_mutex precludes somebody taking whichever | ||
2420 | * list we find out from under us - compare release_pid_array(). | ||
2421 | */ | ||
2422 | mutex_lock(&cgrp->pidlist_mutex); | ||
2423 | list_for_each_entry(l, &cgrp->pidlists, links) { | ||
2424 | if (l->key.type == type && l->key.ns == ns) { | ||
2425 | /* found a matching list - drop the extra refcount */ | ||
2426 | put_pid_ns(ns); | ||
2427 | /* make sure l doesn't vanish out from under us */ | ||
2428 | down_write(&l->mutex); | ||
2429 | mutex_unlock(&cgrp->pidlist_mutex); | ||
2430 | l->use_count++; | ||
2431 | return l; | ||
2432 | } | ||
2433 | } | ||
2434 | /* entry not found; create a new one */ | ||
2435 | l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL); | ||
2436 | if (!l) { | ||
2437 | mutex_unlock(&cgrp->pidlist_mutex); | ||
2438 | put_pid_ns(ns); | ||
2439 | return l; | ||
2440 | } | ||
2441 | init_rwsem(&l->mutex); | ||
2442 | down_write(&l->mutex); | ||
2443 | l->key.type = type; | ||
2444 | l->key.ns = ns; | ||
2445 | l->use_count = 0; /* don't increment here */ | ||
2446 | l->list = NULL; | ||
2447 | l->owner = cgrp; | ||
2448 | list_add(&l->links, &cgrp->pidlists); | ||
2449 | mutex_unlock(&cgrp->pidlist_mutex); | ||
2450 | return l; | ||
2451 | } | ||
2452 | |||
2453 | /* | ||
2399 | * Load a cgroup's pidarray with either procs' tgids or tasks' pids | 2454 | * Load a cgroup's pidarray with either procs' tgids or tasks' pids |
2400 | */ | 2455 | */ |
2401 | static int pidlist_array_load(struct cgroup *cgrp, bool procs) | 2456 | static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, |
2457 | struct cgroup_pidlist **lp) | ||
2402 | { | 2458 | { |
2403 | pid_t *array; | 2459 | pid_t *array; |
2404 | int length; | 2460 | int length; |
@@ -2423,7 +2479,10 @@ static int pidlist_array_load(struct cgroup *cgrp, bool procs) | |||
2423 | if (unlikely(n == length)) | 2479 | if (unlikely(n == length)) |
2424 | break; | 2480 | break; |
2425 | /* get tgid or pid for procs or tasks file respectively */ | 2481 | /* get tgid or pid for procs or tasks file respectively */ |
2426 | pid = (procs ? task_tgid_vnr(tsk) : task_pid_vnr(tsk)); | 2482 | if (type == CGROUP_FILE_PROCS) |
2483 | pid = task_tgid_vnr(tsk); | ||
2484 | else | ||
2485 | pid = task_pid_vnr(tsk); | ||
2427 | if (pid > 0) /* make sure to only use valid results */ | 2486 | if (pid > 0) /* make sure to only use valid results */ |
2428 | array[n++] = pid; | 2487 | array[n++] = pid; |
2429 | } | 2488 | } |
@@ -2431,19 +2490,20 @@ static int pidlist_array_load(struct cgroup *cgrp, bool procs) | |||
2431 | length = n; | 2490 | length = n; |
2432 | /* now sort & (if procs) strip out duplicates */ | 2491 | /* now sort & (if procs) strip out duplicates */ |
2433 | sort(array, length, sizeof(pid_t), cmppid, NULL); | 2492 | sort(array, length, sizeof(pid_t), cmppid, NULL); |
2434 | if (procs) { | 2493 | if (type == CGROUP_FILE_PROCS) |
2435 | length = pidlist_uniq(&array, length); | 2494 | length = pidlist_uniq(&array, length); |
2436 | l = &(cgrp->procs); | 2495 | l = cgroup_pidlist_find(cgrp, type); |
2437 | } else { | 2496 | if (!l) { |
2438 | l = &(cgrp->tasks); | 2497 | kfree(array); |
2498 | return -ENOMEM; | ||
2439 | } | 2499 | } |
2440 | /* store array in cgroup, freeing old if necessary */ | 2500 | /* store array, freeing old if necessary - lock already held */ |
2441 | down_write(&l->mutex); | ||
2442 | kfree(l->list); | 2501 | kfree(l->list); |
2443 | l->list = array; | 2502 | l->list = array; |
2444 | l->length = length; | 2503 | l->length = length; |
2445 | l->use_count++; | 2504 | l->use_count++; |
2446 | up_write(&l->mutex); | 2505 | up_write(&l->mutex); |
2506 | *lp = l; | ||
2447 | return 0; | 2507 | return 0; |
2448 | } | 2508 | } |
2449 | 2509 | ||
@@ -2586,13 +2646,26 @@ static const struct seq_operations cgroup_pidlist_seq_operations = { | |||
2586 | 2646 | ||
2587 | static void cgroup_release_pid_array(struct cgroup_pidlist *l) | 2647 | static void cgroup_release_pid_array(struct cgroup_pidlist *l) |
2588 | { | 2648 | { |
2649 | /* | ||
2650 | * the case where we're the last user of this particular pidlist will | ||
2651 | * have us remove it from the cgroup's list, which entails taking the | ||
2652 | * mutex. since in pidlist_find the pidlist->lock depends on cgroup-> | ||
2653 | * pidlist_mutex, we have to take pidlist_mutex first. | ||
2654 | */ | ||
2655 | mutex_lock(&l->owner->pidlist_mutex); | ||
2589 | down_write(&l->mutex); | 2656 | down_write(&l->mutex); |
2590 | BUG_ON(!l->use_count); | 2657 | BUG_ON(!l->use_count); |
2591 | if (!--l->use_count) { | 2658 | if (!--l->use_count) { |
2659 | /* we're the last user if refcount is 0; remove and free */ | ||
2660 | list_del(&l->links); | ||
2661 | mutex_unlock(&l->owner->pidlist_mutex); | ||
2592 | kfree(l->list); | 2662 | kfree(l->list); |
2593 | l->list = NULL; | 2663 | put_pid_ns(l->key.ns); |
2594 | l->length = 0; | 2664 | up_write(&l->mutex); |
2665 | kfree(l); | ||
2666 | return; | ||
2595 | } | 2667 | } |
2668 | mutex_unlock(&l->owner->pidlist_mutex); | ||
2596 | up_write(&l->mutex); | 2669 | up_write(&l->mutex); |
2597 | } | 2670 | } |
2598 | 2671 | ||
@@ -2623,10 +2696,10 @@ static const struct file_operations cgroup_pidlist_operations = { | |||
2623 | * in the cgroup. | 2696 | * in the cgroup. |
2624 | */ | 2697 | */ |
2625 | /* helper function for the two below it */ | 2698 | /* helper function for the two below it */ |
2626 | static int cgroup_pidlist_open(struct file *file, bool procs) | 2699 | static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type) |
2627 | { | 2700 | { |
2628 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); | 2701 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); |
2629 | struct cgroup_pidlist *l = (procs ? &cgrp->procs : &cgrp->tasks); | 2702 | struct cgroup_pidlist *l; |
2630 | int retval; | 2703 | int retval; |
2631 | 2704 | ||
2632 | /* Nothing to do for write-only files */ | 2705 | /* Nothing to do for write-only files */ |
@@ -2634,7 +2707,7 @@ static int cgroup_pidlist_open(struct file *file, bool procs) | |||
2634 | return 0; | 2707 | return 0; |
2635 | 2708 | ||
2636 | /* have the array populated */ | 2709 | /* have the array populated */ |
2637 | retval = pidlist_array_load(cgrp, procs); | 2710 | retval = pidlist_array_load(cgrp, type, &l); |
2638 | if (retval) | 2711 | if (retval) |
2639 | return retval; | 2712 | return retval; |
2640 | /* configure file information */ | 2713 | /* configure file information */ |
@@ -2650,11 +2723,11 @@ static int cgroup_pidlist_open(struct file *file, bool procs) | |||
2650 | } | 2723 | } |
2651 | static int cgroup_tasks_open(struct inode *unused, struct file *file) | 2724 | static int cgroup_tasks_open(struct inode *unused, struct file *file) |
2652 | { | 2725 | { |
2653 | return cgroup_pidlist_open(file, false); | 2726 | return cgroup_pidlist_open(file, CGROUP_FILE_TASKS); |
2654 | } | 2727 | } |
2655 | static int cgroup_procs_open(struct inode *unused, struct file *file) | 2728 | static int cgroup_procs_open(struct inode *unused, struct file *file) |
2656 | { | 2729 | { |
2657 | return cgroup_pidlist_open(file, true); | 2730 | return cgroup_pidlist_open(file, CGROUP_FILE_PROCS); |
2658 | } | 2731 | } |
2659 | 2732 | ||
2660 | static u64 cgroup_read_notify_on_release(struct cgroup *cgrp, | 2733 | static u64 cgroup_read_notify_on_release(struct cgroup *cgrp, |