diff options
| author | Ben Blum <bblum@andrew.cmu.edu> | 2011-05-26 19:25:21 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-26 20:12:34 -0400 |
| commit | d846687d7f84e45f23ecf3846dbb43312a1206dd (patch) | |
| tree | 5c19cf2e0e2faf288bb536f6878cddf11498ef3e /kernel | |
| parent | 74a1166dfe1135dcc168d35fa5261aa7e087011b (diff) | |
cgroups: use flex_array in attach_proc
Convert cgroup_attach_proc to use flex_array.
The cgroup_attach_proc implementation requires a pre-allocated array to
store task pointers to atomically move a thread-group, but asking for a
monolithic array with kmalloc() may be unreliable for very large groups.
Using flex_array provides the same functionality with less risk of
failure.
This is a post-patch for cgroup-procs-write.patch.
Signed-off-by: Ben Blum <bblum@andrew.cmu.edu>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Matt Helsley <matthltc@us.ibm.com>
Reviewed-by: Paul Menage <menage@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/cgroup.c | 33 |
1 files changed, 24 insertions, 9 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 5e6a9745f0e7..00a884342d3d 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
| @@ -57,6 +57,7 @@ | |||
| 57 | #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ | 57 | #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ |
| 58 | #include <linux/eventfd.h> | 58 | #include <linux/eventfd.h> |
| 59 | #include <linux/poll.h> | 59 | #include <linux/poll.h> |
| 60 | #include <linux/flex_array.h> /* used in cgroup_attach_proc */ | ||
| 60 | 61 | ||
| 61 | #include <asm/atomic.h> | 62 | #include <asm/atomic.h> |
| 62 | 63 | ||
| @@ -1995,7 +1996,7 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
| 1995 | struct cgroupfs_root *root = cgrp->root; | 1996 | struct cgroupfs_root *root = cgrp->root; |
| 1996 | /* threadgroup list cursor and array */ | 1997 | /* threadgroup list cursor and array */ |
| 1997 | struct task_struct *tsk; | 1998 | struct task_struct *tsk; |
| 1998 | struct task_struct **group; | 1999 | struct flex_array *group; |
| 1999 | /* | 2000 | /* |
| 2000 | * we need to make sure we have css_sets for all the tasks we're | 2001 | * we need to make sure we have css_sets for all the tasks we're |
| 2001 | * going to move -before- we actually start moving them, so that in | 2002 | * going to move -before- we actually start moving them, so that in |
| @@ -2012,9 +2013,15 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
| 2012 | * and if threads exit, this will just be an over-estimate. | 2013 | * and if threads exit, this will just be an over-estimate. |
| 2013 | */ | 2014 | */ |
| 2014 | group_size = get_nr_threads(leader); | 2015 | group_size = get_nr_threads(leader); |
| 2015 | group = kmalloc(group_size * sizeof(*group), GFP_KERNEL); | 2016 | /* flex_array supports very large thread-groups better than kmalloc. */ |
| 2017 | group = flex_array_alloc(sizeof(struct task_struct *), group_size, | ||
| 2018 | GFP_KERNEL); | ||
| 2016 | if (!group) | 2019 | if (!group) |
| 2017 | return -ENOMEM; | 2020 | return -ENOMEM; |
| 2021 | /* pre-allocate to guarantee space while iterating in rcu read-side. */ | ||
| 2022 | retval = flex_array_prealloc(group, 0, group_size - 1, GFP_KERNEL); | ||
| 2023 | if (retval) | ||
| 2024 | goto out_free_group_list; | ||
| 2018 | 2025 | ||
| 2019 | /* prevent changes to the threadgroup list while we take a snapshot. */ | 2026 | /* prevent changes to the threadgroup list while we take a snapshot. */ |
| 2020 | rcu_read_lock(); | 2027 | rcu_read_lock(); |
| @@ -2037,7 +2044,12 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
| 2037 | /* as per above, nr_threads may decrease, but not increase. */ | 2044 | /* as per above, nr_threads may decrease, but not increase. */ |
| 2038 | BUG_ON(i >= group_size); | 2045 | BUG_ON(i >= group_size); |
| 2039 | get_task_struct(tsk); | 2046 | get_task_struct(tsk); |
| 2040 | group[i] = tsk; | 2047 | /* |
| 2048 | * saying GFP_ATOMIC has no effect here because we did prealloc | ||
| 2049 | * earlier, but it's good form to communicate our expectations. | ||
| 2050 | */ | ||
| 2051 | retval = flex_array_put_ptr(group, i, tsk, GFP_ATOMIC); | ||
| 2052 | BUG_ON(retval != 0); | ||
| 2041 | i++; | 2053 | i++; |
| 2042 | } while_each_thread(leader, tsk); | 2054 | } while_each_thread(leader, tsk); |
| 2043 | /* remember the number of threads in the array for later. */ | 2055 | /* remember the number of threads in the array for later. */ |
| @@ -2059,7 +2071,8 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
| 2059 | if (ss->can_attach_task) { | 2071 | if (ss->can_attach_task) { |
| 2060 | /* run on each task in the threadgroup. */ | 2072 | /* run on each task in the threadgroup. */ |
| 2061 | for (i = 0; i < group_size; i++) { | 2073 | for (i = 0; i < group_size; i++) { |
| 2062 | retval = ss->can_attach_task(cgrp, group[i]); | 2074 | tsk = flex_array_get_ptr(group, i); |
| 2075 | retval = ss->can_attach_task(cgrp, tsk); | ||
| 2063 | if (retval) { | 2076 | if (retval) { |
| 2064 | failed_ss = ss; | 2077 | failed_ss = ss; |
| 2065 | cancel_failed_ss = true; | 2078 | cancel_failed_ss = true; |
| @@ -2075,7 +2088,7 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
| 2075 | */ | 2088 | */ |
| 2076 | INIT_LIST_HEAD(&newcg_list); | 2089 | INIT_LIST_HEAD(&newcg_list); |
| 2077 | for (i = 0; i < group_size; i++) { | 2090 | for (i = 0; i < group_size; i++) { |
| 2078 | tsk = group[i]; | 2091 | tsk = flex_array_get_ptr(group, i); |
| 2079 | /* nothing to do if this task is already in the cgroup */ | 2092 | /* nothing to do if this task is already in the cgroup */ |
| 2080 | oldcgrp = task_cgroup_from_root(tsk, root); | 2093 | oldcgrp = task_cgroup_from_root(tsk, root); |
| 2081 | if (cgrp == oldcgrp) | 2094 | if (cgrp == oldcgrp) |
| @@ -2114,7 +2127,7 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
| 2114 | ss->pre_attach(cgrp); | 2127 | ss->pre_attach(cgrp); |
| 2115 | } | 2128 | } |
| 2116 | for (i = 0; i < group_size; i++) { | 2129 | for (i = 0; i < group_size; i++) { |
| 2117 | tsk = group[i]; | 2130 | tsk = flex_array_get_ptr(group, i); |
| 2118 | /* leave current thread as it is if it's already there */ | 2131 | /* leave current thread as it is if it's already there */ |
| 2119 | oldcgrp = task_cgroup_from_root(tsk, root); | 2132 | oldcgrp = task_cgroup_from_root(tsk, root); |
| 2120 | if (cgrp == oldcgrp) | 2133 | if (cgrp == oldcgrp) |
| @@ -2167,10 +2180,12 @@ out_cancel_attach: | |||
| 2167 | } | 2180 | } |
| 2168 | } | 2181 | } |
| 2169 | /* clean up the array of referenced threads in the group. */ | 2182 | /* clean up the array of referenced threads in the group. */ |
| 2170 | for (i = 0; i < group_size; i++) | 2183 | for (i = 0; i < group_size; i++) { |
| 2171 | put_task_struct(group[i]); | 2184 | tsk = flex_array_get_ptr(group, i); |
| 2185 | put_task_struct(tsk); | ||
| 2186 | } | ||
| 2172 | out_free_group_list: | 2187 | out_free_group_list: |
| 2173 | kfree(group); | 2188 | flex_array_free(group); |
| 2174 | return retval; | 2189 | return retval; |
| 2175 | } | 2190 | } |
| 2176 | 2191 | ||
