diff options
author | Martin Peschke <mp3@de.ibm.com> | 2006-09-26 02:31:21 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-09-26 11:48:47 -0400 |
commit | 7ff6f08295d90ab20d25200ef485ebb45b1b8d71 (patch) | |
tree | 4c3410dcf5191ab574304f3ffbafd675545c2297 | |
parent | 8bc719d3cab8414938f9ea6e33b58d8810d18068 (diff) |
[PATCH] CPU hotplug compatible alloc_percpu()
This patch splits alloc_percpu() up into two phases. Likewise for
free_percpu(). This allows clients to limit initial allocations to online
cpu's, and to populate or depopulate per-cpu data at run time as needed:
struct my_struct *obj;
/* initial allocation for online cpu's */
obj = percpu_alloc(sizeof(struct my_struct), GFP_KERNEL);
...
/* populate per-cpu data for cpu coming online */
ptr = percpu_populate(obj, sizeof(struct my_struct), GFP_KERNEL, cpu);
...
/* access per-cpu object */
ptr = percpu_ptr(obj, smp_processor_id());
...
/* depopulate per-cpu data for cpu going offline */
percpu_depopulate(obj, cpu);
...
/* final removal */
percpu_free(obj);
Signed-off-by: Martin Peschke <mp3@de.ibm.com>
Cc: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | include/linux/percpu.h | 79 | ||||
-rw-r--r-- | mm/slab.c | 166 |
2 files changed, 169 insertions, 76 deletions
diff --git a/include/linux/percpu.h b/include/linux/percpu.h index f926490a7d8b..3835a9642f13 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h | |||
@@ -1,9 +1,12 @@ | |||
1 | #ifndef __LINUX_PERCPU_H | 1 | #ifndef __LINUX_PERCPU_H |
2 | #define __LINUX_PERCPU_H | 2 | #define __LINUX_PERCPU_H |
3 | |||
3 | #include <linux/spinlock.h> /* For preempt_disable() */ | 4 | #include <linux/spinlock.h> /* For preempt_disable() */ |
4 | #include <linux/slab.h> /* For kmalloc() */ | 5 | #include <linux/slab.h> /* For kmalloc() */ |
5 | #include <linux/smp.h> | 6 | #include <linux/smp.h> |
6 | #include <linux/string.h> /* For memset() */ | 7 | #include <linux/string.h> /* For memset() */ |
8 | #include <linux/cpumask.h> | ||
9 | |||
7 | #include <asm/percpu.h> | 10 | #include <asm/percpu.h> |
8 | 11 | ||
9 | /* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */ | 12 | /* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */ |
@@ -27,39 +30,77 @@ struct percpu_data { | |||
27 | void *ptrs[NR_CPUS]; | 30 | void *ptrs[NR_CPUS]; |
28 | }; | 31 | }; |
29 | 32 | ||
33 | #define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) | ||
30 | /* | 34 | /* |
31 | * Use this to get to a cpu's version of the per-cpu object allocated using | 35 | * Use this to get to a cpu's version of the per-cpu object dynamically |
32 | * alloc_percpu. Non-atomic access to the current CPU's version should | 36 | * allocated. Non-atomic access to the current CPU's version should |
33 | * probably be combined with get_cpu()/put_cpu(). | 37 | * probably be combined with get_cpu()/put_cpu(). |
34 | */ | 38 | */ |
35 | #define per_cpu_ptr(ptr, cpu) \ | 39 | #define percpu_ptr(ptr, cpu) \ |
36 | ({ \ | 40 | ({ \ |
37 | struct percpu_data *__p = (struct percpu_data *)~(unsigned long)(ptr); \ | 41 | struct percpu_data *__p = __percpu_disguise(ptr); \ |
38 | (__typeof__(ptr))__p->ptrs[(cpu)]; \ | 42 | (__typeof__(ptr))__p->ptrs[(cpu)]; \ |
39 | }) | 43 | }) |
40 | 44 | ||
41 | extern void *__alloc_percpu(size_t size); | 45 | extern void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu); |
42 | extern void free_percpu(const void *); | 46 | extern void percpu_depopulate(void *__pdata, int cpu); |
47 | extern int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp, | ||
48 | cpumask_t *mask); | ||
49 | extern void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask); | ||
50 | extern void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask); | ||
51 | extern void percpu_free(void *__pdata); | ||
43 | 52 | ||
44 | #else /* CONFIG_SMP */ | 53 | #else /* CONFIG_SMP */ |
45 | 54 | ||
46 | #define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) | 55 | #define percpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) |
56 | |||
57 | static inline void percpu_depopulate(void *__pdata, int cpu) | ||
58 | { | ||
59 | } | ||
60 | |||
61 | static inline void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask) | ||
62 | { | ||
63 | } | ||
47 | 64 | ||
48 | static inline void *__alloc_percpu(size_t size) | 65 | static inline void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, |
66 | int cpu) | ||
49 | { | 67 | { |
50 | void *ret = kmalloc(size, GFP_KERNEL); | 68 | return percpu_ptr(__pdata, cpu); |
51 | if (ret) | ||
52 | memset(ret, 0, size); | ||
53 | return ret; | ||
54 | } | 69 | } |
55 | static inline void free_percpu(const void *ptr) | 70 | |
56 | { | 71 | static inline int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp, |
57 | kfree(ptr); | 72 | cpumask_t *mask) |
73 | { | ||
74 | return 0; | ||
75 | } | ||
76 | |||
77 | static inline void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask) | ||
78 | { | ||
79 | return kzalloc(size, gfp); | ||
80 | } | ||
81 | |||
82 | static inline void percpu_free(void *__pdata) | ||
83 | { | ||
84 | kfree(__pdata); | ||
58 | } | 85 | } |
59 | 86 | ||
60 | #endif /* CONFIG_SMP */ | 87 | #endif /* CONFIG_SMP */ |
61 | 88 | ||
62 | /* Simple wrapper for the common case: zeros memory. */ | 89 | #define percpu_populate_mask(__pdata, size, gfp, mask) \ |
63 | #define alloc_percpu(type) ((type *)(__alloc_percpu(sizeof(type)))) | 90 | __percpu_populate_mask((__pdata), (size), (gfp), &(mask)) |
91 | #define percpu_depopulate_mask(__pdata, mask) \ | ||
92 | __percpu_depopulate_mask((__pdata), &(mask)) | ||
93 | #define percpu_alloc_mask(size, gfp, mask) \ | ||
94 | __percpu_alloc_mask((size), (gfp), &(mask)) | ||
95 | |||
96 | #define percpu_alloc(size, gfp) percpu_alloc_mask((size), (gfp), cpu_online_map) | ||
97 | |||
98 | /* (legacy) interface for use without CPU hotplug handling */ | ||
99 | |||
100 | #define __alloc_percpu(size) percpu_alloc_mask((size), GFP_KERNEL, \ | ||
101 | cpu_possible_map) | ||
102 | #define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type)) | ||
103 | #define free_percpu(ptr) percpu_free((ptr)) | ||
104 | #define per_cpu_ptr(ptr, cpu) percpu_ptr((ptr), (cpu)) | ||
64 | 105 | ||
65 | #endif /* __LINUX_PERCPU_H */ | 106 | #endif /* __LINUX_PERCPU_H */ |
@@ -3371,52 +3371,127 @@ EXPORT_SYMBOL(__kmalloc_track_caller); | |||
3371 | 3371 | ||
3372 | #ifdef CONFIG_SMP | 3372 | #ifdef CONFIG_SMP |
3373 | /** | 3373 | /** |
3374 | * __alloc_percpu - allocate one copy of the object for every present | 3374 | * percpu_depopulate - depopulate per-cpu data for given cpu |
3375 | * cpu in the system, zeroing them. | 3375 | * @__pdata: per-cpu data to depopulate |
3376 | * Objects should be dereferenced using the per_cpu_ptr macro only. | 3376 | * @cpu: depopulate per-cpu data for this cpu |
3377 | * | 3377 | * |
3378 | * @size: how many bytes of memory are required. | 3378 | * Depopulating per-cpu data for a cpu going offline would be a typical |
3379 | * use case. You need to register a cpu hotplug handler for that purpose. | ||
3379 | */ | 3380 | */ |
3380 | void *__alloc_percpu(size_t size) | 3381 | void percpu_depopulate(void *__pdata, int cpu) |
3381 | { | 3382 | { |
3382 | int i; | 3383 | struct percpu_data *pdata = __percpu_disguise(__pdata); |
3383 | struct percpu_data *pdata = kmalloc(sizeof(*pdata), GFP_KERNEL); | 3384 | if (pdata->ptrs[cpu]) { |
3385 | kfree(pdata->ptrs[cpu]); | ||
3386 | pdata->ptrs[cpu] = NULL; | ||
3387 | } | ||
3388 | } | ||
3389 | EXPORT_SYMBOL_GPL(percpu_depopulate); | ||
3384 | 3390 | ||
3385 | if (!pdata) | 3391 | /** |
3386 | return NULL; | 3392 | * percpu_depopulate_mask - depopulate per-cpu data for some cpu's |
3393 | * @__pdata: per-cpu data to depopulate | ||
3394 | * @mask: depopulate per-cpu data for cpu's selected through mask bits | ||
3395 | */ | ||
3396 | void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask) | ||
3397 | { | ||
3398 | int cpu; | ||
3399 | for_each_cpu_mask(cpu, *mask) | ||
3400 | percpu_depopulate(__pdata, cpu); | ||
3401 | } | ||
3402 | EXPORT_SYMBOL_GPL(__percpu_depopulate_mask); | ||
3387 | 3403 | ||
3388 | /* | 3404 | /** |
3389 | * Cannot use for_each_online_cpu since a cpu may come online | 3405 | * percpu_populate - populate per-cpu data for given cpu |
3390 | * and we have no way of figuring out how to fix the array | 3406 | * @__pdata: per-cpu data to populate further |
3391 | * that we have allocated then.... | 3407 | * @size: size of per-cpu object |
3392 | */ | 3408 | * @gfp: may sleep or not etc. |
3393 | for_each_possible_cpu(i) { | 3409 | * @cpu: populate per-data for this cpu |
3394 | int node = cpu_to_node(i); | 3410 | * |
3411 | * Populating per-cpu data for a cpu coming online would be a typical | ||
3412 | * use case. You need to register a cpu hotplug handler for that purpose. | ||
3413 | * Per-cpu object is populated with zeroed buffer. | ||
3414 | */ | ||
3415 | void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu) | ||
3416 | { | ||
3417 | struct percpu_data *pdata = __percpu_disguise(__pdata); | ||
3418 | int node = cpu_to_node(cpu); | ||
3395 | 3419 | ||
3396 | if (node_online(node)) | 3420 | BUG_ON(pdata->ptrs[cpu]); |
3397 | pdata->ptrs[i] = kmalloc_node(size, GFP_KERNEL, node); | 3421 | if (node_online(node)) { |
3398 | else | 3422 | /* FIXME: kzalloc_node(size, gfp, node) */ |
3399 | pdata->ptrs[i] = kmalloc(size, GFP_KERNEL); | 3423 | pdata->ptrs[cpu] = kmalloc_node(size, gfp, node); |
3424 | if (pdata->ptrs[cpu]) | ||
3425 | memset(pdata->ptrs[cpu], 0, size); | ||
3426 | } else | ||
3427 | pdata->ptrs[cpu] = kzalloc(size, gfp); | ||
3428 | return pdata->ptrs[cpu]; | ||
3429 | } | ||
3430 | EXPORT_SYMBOL_GPL(percpu_populate); | ||
3400 | 3431 | ||
3401 | if (!pdata->ptrs[i]) | 3432 | /** |
3402 | goto unwind_oom; | 3433 | * percpu_populate_mask - populate per-cpu data for more cpu's |
3403 | memset(pdata->ptrs[i], 0, size); | 3434 | * @__pdata: per-cpu data to populate further |
3404 | } | 3435 | * @size: size of per-cpu object |
3436 | * @gfp: may sleep or not etc. | ||
3437 | * @mask: populate per-cpu data for cpu's selected through mask bits | ||
3438 | * | ||
3439 | * Per-cpu objects are populated with zeroed buffers. | ||
3440 | */ | ||
3441 | int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp, | ||
3442 | cpumask_t *mask) | ||
3443 | { | ||
3444 | cpumask_t populated = CPU_MASK_NONE; | ||
3445 | int cpu; | ||
3446 | |||
3447 | for_each_cpu_mask(cpu, *mask) | ||
3448 | if (unlikely(!percpu_populate(__pdata, size, gfp, cpu))) { | ||
3449 | __percpu_depopulate_mask(__pdata, &populated); | ||
3450 | return -ENOMEM; | ||
3451 | } else | ||
3452 | cpu_set(cpu, populated); | ||
3453 | return 0; | ||
3454 | } | ||
3455 | EXPORT_SYMBOL_GPL(__percpu_populate_mask); | ||
3405 | 3456 | ||
3406 | /* Catch derefs w/o wrappers */ | 3457 | /** |
3407 | return (void *)(~(unsigned long)pdata); | 3458 | * percpu_alloc_mask - initial setup of per-cpu data |
3459 | * @size: size of per-cpu object | ||
3460 | * @gfp: may sleep or not etc. | ||
3461 | * @mask: populate per-data for cpu's selected through mask bits | ||
3462 | * | ||
3463 | * Populating per-cpu data for all online cpu's would be a typical use case, | ||
3464 | * which is simplified by the percpu_alloc() wrapper. | ||
3465 | * Per-cpu objects are populated with zeroed buffers. | ||
3466 | */ | ||
3467 | void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask) | ||
3468 | { | ||
3469 | void *pdata = kzalloc(sizeof(struct percpu_data), gfp); | ||
3470 | void *__pdata = __percpu_disguise(pdata); | ||
3408 | 3471 | ||
3409 | unwind_oom: | 3472 | if (unlikely(!pdata)) |
3410 | while (--i >= 0) { | 3473 | return NULL; |
3411 | if (!cpu_possible(i)) | 3474 | if (likely(!__percpu_populate_mask(__pdata, size, gfp, mask))) |
3412 | continue; | 3475 | return __pdata; |
3413 | kfree(pdata->ptrs[i]); | ||
3414 | } | ||
3415 | kfree(pdata); | 3476 | kfree(pdata); |
3416 | return NULL; | 3477 | return NULL; |
3417 | } | 3478 | } |
3418 | EXPORT_SYMBOL(__alloc_percpu); | 3479 | EXPORT_SYMBOL_GPL(__percpu_alloc_mask); |
3419 | #endif | 3480 | |
3481 | /** | ||
3482 | * percpu_free - final cleanup of per-cpu data | ||
3483 | * @__pdata: object to clean up | ||
3484 | * | ||
3485 | * We simply clean up any per-cpu object left. No need for the client to | ||
3486 | * track and specify through a bis mask which per-cpu objects are to free. | ||
3487 | */ | ||
3488 | void percpu_free(void *__pdata) | ||
3489 | { | ||
3490 | __percpu_depopulate_mask(__pdata, &cpu_possible_map); | ||
3491 | kfree(__percpu_disguise(__pdata)); | ||
3492 | } | ||
3493 | EXPORT_SYMBOL_GPL(percpu_free); | ||
3494 | #endif /* CONFIG_SMP */ | ||
3420 | 3495 | ||
3421 | /** | 3496 | /** |
3422 | * kmem_cache_free - Deallocate an object | 3497 | * kmem_cache_free - Deallocate an object |
@@ -3463,29 +3538,6 @@ void kfree(const void *objp) | |||
3463 | } | 3538 | } |
3464 | EXPORT_SYMBOL(kfree); | 3539 | EXPORT_SYMBOL(kfree); |
3465 | 3540 | ||
3466 | #ifdef CONFIG_SMP | ||
3467 | /** | ||
3468 | * free_percpu - free previously allocated percpu memory | ||
3469 | * @objp: pointer returned by alloc_percpu. | ||
3470 | * | ||
3471 | * Don't free memory not originally allocated by alloc_percpu() | ||
3472 | * The complemented objp is to check for that. | ||
3473 | */ | ||
3474 | void free_percpu(const void *objp) | ||
3475 | { | ||
3476 | int i; | ||
3477 | struct percpu_data *p = (struct percpu_data *)(~(unsigned long)objp); | ||
3478 | |||
3479 | /* | ||
3480 | * We allocate for all cpus so we cannot use for online cpu here. | ||
3481 | */ | ||
3482 | for_each_possible_cpu(i) | ||
3483 | kfree(p->ptrs[i]); | ||
3484 | kfree(p); | ||
3485 | } | ||
3486 | EXPORT_SYMBOL(free_percpu); | ||
3487 | #endif | ||
3488 | |||
3489 | unsigned int kmem_cache_size(struct kmem_cache *cachep) | 3541 | unsigned int kmem_cache_size(struct kmem_cache *cachep) |
3490 | { | 3542 | { |
3491 | return obj_size(cachep); | 3543 | return obj_size(cachep); |