diff options
author | Li Zefan <lizf@cn.fujitsu.com> | 2009-07-29 18:04:04 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-07-29 22:10:35 -0400 |
commit | 096b7fe012d66ed55e98bc8022405ede0cc80e96 (patch) | |
tree | 755709b6d3ff21a9e9640d6c19432b31c863ad34 /kernel | |
parent | b317c833211b7fbf902163de766f09554090e0bf (diff) |
cgroups: fix pid namespace bug
The bug was introduced by commit cc31edceee04a7b87f2be48f9489ebb72d264844
("cgroups: convert tasks file to use a seq_file with shared pid array").
We cache a pid array for all threads that are opening the same "tasks"
file, but the pids in the array are always from the namespace of the
last process that opened the file, so all other threads will read pids
from that namespace instead of their own namespaces.
To fix it, we maintain a list of pid arrays, which is keyed by pid_ns.
The list will be of length 1 at most time.
Reported-by: Paul Menage <menage@google.com>
Idea-by: Paul Menage <menage@google.com>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Reviewed-by: Serge Hallyn <serue@us.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cgroup.c | 96 |
1 files changed, 72 insertions, 24 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 3737a682cdf..250dac05680 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #include <linux/hash.h> | 47 | #include <linux/hash.h> |
48 | #include <linux/namei.h> | 48 | #include <linux/namei.h> |
49 | #include <linux/smp_lock.h> | 49 | #include <linux/smp_lock.h> |
50 | #include <linux/pid_namespace.h> | ||
50 | 51 | ||
51 | #include <asm/atomic.h> | 52 | #include <asm/atomic.h> |
52 | 53 | ||
@@ -960,6 +961,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) | |||
960 | INIT_LIST_HEAD(&cgrp->children); | 961 | INIT_LIST_HEAD(&cgrp->children); |
961 | INIT_LIST_HEAD(&cgrp->css_sets); | 962 | INIT_LIST_HEAD(&cgrp->css_sets); |
962 | INIT_LIST_HEAD(&cgrp->release_list); | 963 | INIT_LIST_HEAD(&cgrp->release_list); |
964 | INIT_LIST_HEAD(&cgrp->pids_list); | ||
963 | init_rwsem(&cgrp->pids_mutex); | 965 | init_rwsem(&cgrp->pids_mutex); |
964 | } | 966 | } |
965 | static void init_cgroup_root(struct cgroupfs_root *root) | 967 | static void init_cgroup_root(struct cgroupfs_root *root) |
@@ -2201,12 +2203,30 @@ err: | |||
2201 | return ret; | 2203 | return ret; |
2202 | } | 2204 | } |
2203 | 2205 | ||
2206 | /* | ||
2207 | * Cache pids for all threads in the same pid namespace that are | ||
2208 | * opening the same "tasks" file. | ||
2209 | */ | ||
2210 | struct cgroup_pids { | ||
2211 | /* The node in cgrp->pids_list */ | ||
2212 | struct list_head list; | ||
2213 | /* The cgroup those pids belong to */ | ||
2214 | struct cgroup *cgrp; | ||
2215 | /* The namepsace those pids belong to */ | ||
2216 | struct pid_namespace *ns; | ||
2217 | /* Array of process ids in the cgroup */ | ||
2218 | pid_t *tasks_pids; | ||
2219 | /* How many files are using the this tasks_pids array */ | ||
2220 | int use_count; | ||
2221 | /* Length of the current tasks_pids array */ | ||
2222 | int length; | ||
2223 | }; | ||
2224 | |||
2204 | static int cmppid(const void *a, const void *b) | 2225 | static int cmppid(const void *a, const void *b) |
2205 | { | 2226 | { |
2206 | return *(pid_t *)a - *(pid_t *)b; | 2227 | return *(pid_t *)a - *(pid_t *)b; |
2207 | } | 2228 | } |
2208 | 2229 | ||
2209 | |||
2210 | /* | 2230 | /* |
2211 | * seq_file methods for the "tasks" file. The seq_file position is the | 2231 | * seq_file methods for the "tasks" file. The seq_file position is the |
2212 | * next pid to display; the seq_file iterator is a pointer to the pid | 2232 | * next pid to display; the seq_file iterator is a pointer to the pid |
@@ -2221,45 +2241,47 @@ static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos) | |||
2221 | * after a seek to the start). Use a binary-search to find the | 2241 | * after a seek to the start). Use a binary-search to find the |
2222 | * next pid to display, if any | 2242 | * next pid to display, if any |
2223 | */ | 2243 | */ |
2224 | struct cgroup *cgrp = s->private; | 2244 | struct cgroup_pids *cp = s->private; |
2245 | struct cgroup *cgrp = cp->cgrp; | ||
2225 | int index = 0, pid = *pos; | 2246 | int index = 0, pid = *pos; |
2226 | int *iter; | 2247 | int *iter; |
2227 | 2248 | ||
2228 | down_read(&cgrp->pids_mutex); | 2249 | down_read(&cgrp->pids_mutex); |
2229 | if (pid) { | 2250 | if (pid) { |
2230 | int end = cgrp->pids_length; | 2251 | int end = cp->length; |
2231 | 2252 | ||
2232 | while (index < end) { | 2253 | while (index < end) { |
2233 | int mid = (index + end) / 2; | 2254 | int mid = (index + end) / 2; |
2234 | if (cgrp->tasks_pids[mid] == pid) { | 2255 | if (cp->tasks_pids[mid] == pid) { |
2235 | index = mid; | 2256 | index = mid; |
2236 | break; | 2257 | break; |
2237 | } else if (cgrp->tasks_pids[mid] <= pid) | 2258 | } else if (cp->tasks_pids[mid] <= pid) |
2238 | index = mid + 1; | 2259 | index = mid + 1; |
2239 | else | 2260 | else |
2240 | end = mid; | 2261 | end = mid; |
2241 | } | 2262 | } |
2242 | } | 2263 | } |
2243 | /* If we're off the end of the array, we're done */ | 2264 | /* If we're off the end of the array, we're done */ |
2244 | if (index >= cgrp->pids_length) | 2265 | if (index >= cp->length) |
2245 | return NULL; | 2266 | return NULL; |
2246 | /* Update the abstract position to be the actual pid that we found */ | 2267 | /* Update the abstract position to be the actual pid that we found */ |
2247 | iter = cgrp->tasks_pids + index; | 2268 | iter = cp->tasks_pids + index; |
2248 | *pos = *iter; | 2269 | *pos = *iter; |
2249 | return iter; | 2270 | return iter; |
2250 | } | 2271 | } |
2251 | 2272 | ||
2252 | static void cgroup_tasks_stop(struct seq_file *s, void *v) | 2273 | static void cgroup_tasks_stop(struct seq_file *s, void *v) |
2253 | { | 2274 | { |
2254 | struct cgroup *cgrp = s->private; | 2275 | struct cgroup_pids *cp = s->private; |
2276 | struct cgroup *cgrp = cp->cgrp; | ||
2255 | up_read(&cgrp->pids_mutex); | 2277 | up_read(&cgrp->pids_mutex); |
2256 | } | 2278 | } |
2257 | 2279 | ||
2258 | static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos) | 2280 | static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos) |
2259 | { | 2281 | { |
2260 | struct cgroup *cgrp = s->private; | 2282 | struct cgroup_pids *cp = s->private; |
2261 | int *p = v; | 2283 | int *p = v; |
2262 | int *end = cgrp->tasks_pids + cgrp->pids_length; | 2284 | int *end = cp->tasks_pids + cp->length; |
2263 | 2285 | ||
2264 | /* | 2286 | /* |
2265 | * Advance to the next pid in the array. If this goes off the | 2287 | * Advance to the next pid in the array. If this goes off the |
@@ -2286,26 +2308,33 @@ static struct seq_operations cgroup_tasks_seq_operations = { | |||
2286 | .show = cgroup_tasks_show, | 2308 | .show = cgroup_tasks_show, |
2287 | }; | 2309 | }; |
2288 | 2310 | ||
2289 | static void release_cgroup_pid_array(struct cgroup *cgrp) | 2311 | static void release_cgroup_pid_array(struct cgroup_pids *cp) |
2290 | { | 2312 | { |
2313 | struct cgroup *cgrp = cp->cgrp; | ||
2314 | |||
2291 | down_write(&cgrp->pids_mutex); | 2315 | down_write(&cgrp->pids_mutex); |
2292 | BUG_ON(!cgrp->pids_use_count); | 2316 | BUG_ON(!cp->use_count); |
2293 | if (!--cgrp->pids_use_count) { | 2317 | if (!--cp->use_count) { |
2294 | kfree(cgrp->tasks_pids); | 2318 | list_del(&cp->list); |
2295 | cgrp->tasks_pids = NULL; | 2319 | put_pid_ns(cp->ns); |
2296 | cgrp->pids_length = 0; | 2320 | kfree(cp->tasks_pids); |
2321 | kfree(cp); | ||
2297 | } | 2322 | } |
2298 | up_write(&cgrp->pids_mutex); | 2323 | up_write(&cgrp->pids_mutex); |
2299 | } | 2324 | } |
2300 | 2325 | ||
2301 | static int cgroup_tasks_release(struct inode *inode, struct file *file) | 2326 | static int cgroup_tasks_release(struct inode *inode, struct file *file) |
2302 | { | 2327 | { |
2303 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); | 2328 | struct seq_file *seq; |
2329 | struct cgroup_pids *cp; | ||
2304 | 2330 | ||
2305 | if (!(file->f_mode & FMODE_READ)) | 2331 | if (!(file->f_mode & FMODE_READ)) |
2306 | return 0; | 2332 | return 0; |
2307 | 2333 | ||
2308 | release_cgroup_pid_array(cgrp); | 2334 | seq = file->private_data; |
2335 | cp = seq->private; | ||
2336 | |||
2337 | release_cgroup_pid_array(cp); | ||
2309 | return seq_release(inode, file); | 2338 | return seq_release(inode, file); |
2310 | } | 2339 | } |
2311 | 2340 | ||
@@ -2324,6 +2353,8 @@ static struct file_operations cgroup_tasks_operations = { | |||
2324 | static int cgroup_tasks_open(struct inode *unused, struct file *file) | 2353 | static int cgroup_tasks_open(struct inode *unused, struct file *file) |
2325 | { | 2354 | { |
2326 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); | 2355 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); |
2356 | struct pid_namespace *ns = current->nsproxy->pid_ns; | ||
2357 | struct cgroup_pids *cp; | ||
2327 | pid_t *pidarray; | 2358 | pid_t *pidarray; |
2328 | int npids; | 2359 | int npids; |
2329 | int retval; | 2360 | int retval; |
@@ -2350,20 +2381,37 @@ static int cgroup_tasks_open(struct inode *unused, struct file *file) | |||
2350 | * array if necessary | 2381 | * array if necessary |
2351 | */ | 2382 | */ |
2352 | down_write(&cgrp->pids_mutex); | 2383 | down_write(&cgrp->pids_mutex); |
2353 | kfree(cgrp->tasks_pids); | 2384 | |
2354 | cgrp->tasks_pids = pidarray; | 2385 | list_for_each_entry(cp, &cgrp->pids_list, list) { |
2355 | cgrp->pids_length = npids; | 2386 | if (ns == cp->ns) |
2356 | cgrp->pids_use_count++; | 2387 | goto found; |
2388 | } | ||
2389 | |||
2390 | cp = kzalloc(sizeof(*cp), GFP_KERNEL); | ||
2391 | if (!cp) { | ||
2392 | up_write(&cgrp->pids_mutex); | ||
2393 | kfree(pidarray); | ||
2394 | return -ENOMEM; | ||
2395 | } | ||
2396 | cp->cgrp = cgrp; | ||
2397 | cp->ns = ns; | ||
2398 | get_pid_ns(ns); | ||
2399 | list_add(&cp->list, &cgrp->pids_list); | ||
2400 | found: | ||
2401 | kfree(cp->tasks_pids); | ||
2402 | cp->tasks_pids = pidarray; | ||
2403 | cp->length = npids; | ||
2404 | cp->use_count++; | ||
2357 | up_write(&cgrp->pids_mutex); | 2405 | up_write(&cgrp->pids_mutex); |
2358 | 2406 | ||
2359 | file->f_op = &cgroup_tasks_operations; | 2407 | file->f_op = &cgroup_tasks_operations; |
2360 | 2408 | ||
2361 | retval = seq_open(file, &cgroup_tasks_seq_operations); | 2409 | retval = seq_open(file, &cgroup_tasks_seq_operations); |
2362 | if (retval) { | 2410 | if (retval) { |
2363 | release_cgroup_pid_array(cgrp); | 2411 | release_cgroup_pid_array(cp); |
2364 | return retval; | 2412 | return retval; |
2365 | } | 2413 | } |
2366 | ((struct seq_file *)file->private_data)->private = cgrp; | 2414 | ((struct seq_file *)file->private_data)->private = cp; |
2367 | return 0; | 2415 | return 0; |
2368 | } | 2416 | } |
2369 | 2417 | ||