diff options
| author | Ben Blum <bblum@google.com> | 2009-09-23 18:56:28 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-24 10:20:58 -0400 |
| commit | d1d9fd3308fdef6b4bf564fa3d6cfe35b68b50bc (patch) | |
| tree | 8de392166f2edb696950a90e468ef27f043be509 /kernel | |
| parent | 72a8cb30d10d4041c455a7054607a7d519167c87 (diff) | |
cgroups: use vmalloc for large cgroups pidlist allocations
Separates all pidlist allocation requests to a separate function that
judges based on the requested size whether or not the array needs to be
vmalloced or can be gotten via kmalloc, and similar for kfree/vfree.
Signed-off-by: Ben Blum <bblum@google.com>
Signed-off-by: Paul Menage <menage@google.com>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/cgroup.c | 47 |
1 files changed, 42 insertions, 5 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 97194ba12014..3e356b05b2d5 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
| @@ -50,6 +50,7 @@ | |||
| 50 | #include <linux/smp_lock.h> | 50 | #include <linux/smp_lock.h> |
| 51 | #include <linux/pid_namespace.h> | 51 | #include <linux/pid_namespace.h> |
| 52 | #include <linux/idr.h> | 52 | #include <linux/idr.h> |
| 53 | #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ | ||
| 53 | 54 | ||
| 54 | #include <asm/atomic.h> | 55 | #include <asm/atomic.h> |
| 55 | 56 | ||
| @@ -2351,6 +2352,42 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) | |||
| 2351 | */ | 2352 | */ |
| 2352 | 2353 | ||
| 2353 | /* | 2354 | /* |
| 2355 | * The following two functions "fix" the issue where there are more pids | ||
| 2356 | * than kmalloc will give memory for; in such cases, we use vmalloc/vfree. | ||
| 2357 | * TODO: replace with a kernel-wide solution to this problem | ||
| 2358 | */ | ||
| 2359 | #define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2)) | ||
| 2360 | static void *pidlist_allocate(int count) | ||
| 2361 | { | ||
| 2362 | if (PIDLIST_TOO_LARGE(count)) | ||
| 2363 | return vmalloc(count * sizeof(pid_t)); | ||
| 2364 | else | ||
| 2365 | return kmalloc(count * sizeof(pid_t), GFP_KERNEL); | ||
| 2366 | } | ||
| 2367 | static void pidlist_free(void *p) | ||
| 2368 | { | ||
| 2369 | if (is_vmalloc_addr(p)) | ||
| 2370 | vfree(p); | ||
| 2371 | else | ||
| 2372 | kfree(p); | ||
| 2373 | } | ||
| 2374 | static void *pidlist_resize(void *p, int newcount) | ||
| 2375 | { | ||
| 2376 | void *newlist; | ||
| 2377 | /* note: if new alloc fails, old p will still be valid either way */ | ||
| 2378 | if (is_vmalloc_addr(p)) { | ||
| 2379 | newlist = vmalloc(newcount * sizeof(pid_t)); | ||
| 2380 | if (!newlist) | ||
| 2381 | return NULL; | ||
| 2382 | memcpy(newlist, p, newcount * sizeof(pid_t)); | ||
| 2383 | vfree(p); | ||
| 2384 | } else { | ||
| 2385 | newlist = krealloc(p, newcount * sizeof(pid_t), GFP_KERNEL); | ||
| 2386 | } | ||
| 2387 | return newlist; | ||
| 2388 | } | ||
| 2389 | |||
| 2390 | /* | ||
| 2354 | * pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries | 2391 | * pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries |
| 2355 | * If the new stripped list is sufficiently smaller and there's enough memory | 2392 | * If the new stripped list is sufficiently smaller and there's enough memory |
| 2356 | * to allocate a new buffer, will let go of the unneeded memory. Returns the | 2393 | * to allocate a new buffer, will let go of the unneeded memory. Returns the |
| @@ -2389,7 +2426,7 @@ after: | |||
| 2389 | * we'll just stay with what we've got. | 2426 | * we'll just stay with what we've got. |
| 2390 | */ | 2427 | */ |
| 2391 | if (PIDLIST_REALLOC_DIFFERENCE(length, dest)) { | 2428 | if (PIDLIST_REALLOC_DIFFERENCE(length, dest)) { |
| 2392 | newlist = krealloc(list, dest * sizeof(pid_t), GFP_KERNEL); | 2429 | newlist = pidlist_resize(list, dest); |
| 2393 | if (newlist) | 2430 | if (newlist) |
| 2394 | *p = newlist; | 2431 | *p = newlist; |
| 2395 | } | 2432 | } |
| @@ -2470,7 +2507,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, | |||
| 2470 | * show up until sometime later on. | 2507 | * show up until sometime later on. |
| 2471 | */ | 2508 | */ |
| 2472 | length = cgroup_task_count(cgrp); | 2509 | length = cgroup_task_count(cgrp); |
| 2473 | array = kmalloc(length * sizeof(pid_t), GFP_KERNEL); | 2510 | array = pidlist_allocate(length); |
| 2474 | if (!array) | 2511 | if (!array) |
| 2475 | return -ENOMEM; | 2512 | return -ENOMEM; |
| 2476 | /* now, populate the array */ | 2513 | /* now, populate the array */ |
| @@ -2494,11 +2531,11 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, | |||
| 2494 | length = pidlist_uniq(&array, length); | 2531 | length = pidlist_uniq(&array, length); |
| 2495 | l = cgroup_pidlist_find(cgrp, type); | 2532 | l = cgroup_pidlist_find(cgrp, type); |
| 2496 | if (!l) { | 2533 | if (!l) { |
| 2497 | kfree(array); | 2534 | pidlist_free(array); |
| 2498 | return -ENOMEM; | 2535 | return -ENOMEM; |
| 2499 | } | 2536 | } |
| 2500 | /* store array, freeing old if necessary - lock already held */ | 2537 | /* store array, freeing old if necessary - lock already held */ |
| 2501 | kfree(l->list); | 2538 | pidlist_free(l->list); |
| 2502 | l->list = array; | 2539 | l->list = array; |
| 2503 | l->length = length; | 2540 | l->length = length; |
| 2504 | l->use_count++; | 2541 | l->use_count++; |
| @@ -2659,7 +2696,7 @@ static void cgroup_release_pid_array(struct cgroup_pidlist *l) | |||
| 2659 | /* we're the last user if refcount is 0; remove and free */ | 2696 | /* we're the last user if refcount is 0; remove and free */ |
| 2660 | list_del(&l->links); | 2697 | list_del(&l->links); |
| 2661 | mutex_unlock(&l->owner->pidlist_mutex); | 2698 | mutex_unlock(&l->owner->pidlist_mutex); |
| 2662 | kfree(l->list); | 2699 | pidlist_free(l->list); |
| 2663 | put_pid_ns(l->key.ns); | 2700 | put_pid_ns(l->key.ns); |
| 2664 | up_write(&l->mutex); | 2701 | up_write(&l->mutex); |
| 2665 | kfree(l); | 2702 | kfree(l); |
