aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Peschke <mp3@de.ibm.com>2006-09-26 02:31:21 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-09-26 11:48:47 -0400
commit7ff6f08295d90ab20d25200ef485ebb45b1b8d71 (patch)
tree4c3410dcf5191ab574304f3ffbafd675545c2297
parent8bc719d3cab8414938f9ea6e33b58d8810d18068 (diff)
[PATCH] CPU hotplug compatible alloc_percpu()
This patch splits alloc_percpu() up into two phases. Likewise for free_percpu(). This allows clients to limit initial allocations to online cpu's, and to populate or depopulate per-cpu data at run time as needed: struct my_struct *obj; /* initial allocation for online cpu's */ obj = percpu_alloc(sizeof(struct my_struct), GFP_KERNEL); ... /* populate per-cpu data for cpu coming online */ ptr = percpu_populate(obj, sizeof(struct my_struct), GFP_KERNEL, cpu); ... /* access per-cpu object */ ptr = percpu_ptr(obj, smp_processor_id()); ... /* depopulate per-cpu data for cpu going offline */ percpu_depopulate(obj, cpu); ... /* final removal */ percpu_free(obj); Signed-off-by: Martin Peschke <mp3@de.ibm.com> Cc: Paul Jackson <pj@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--include/linux/percpu.h79
-rw-r--r--mm/slab.c166
2 files changed, 169 insertions, 76 deletions
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index f926490a7d8b..3835a9642f13 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -1,9 +1,12 @@
1#ifndef __LINUX_PERCPU_H 1#ifndef __LINUX_PERCPU_H
2#define __LINUX_PERCPU_H 2#define __LINUX_PERCPU_H
3
3#include <linux/spinlock.h> /* For preempt_disable() */ 4#include <linux/spinlock.h> /* For preempt_disable() */
4#include <linux/slab.h> /* For kmalloc() */ 5#include <linux/slab.h> /* For kmalloc() */
5#include <linux/smp.h> 6#include <linux/smp.h>
6#include <linux/string.h> /* For memset() */ 7#include <linux/string.h> /* For memset() */
8#include <linux/cpumask.h>
9
7#include <asm/percpu.h> 10#include <asm/percpu.h>
8 11
9/* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */ 12/* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */
@@ -27,39 +30,77 @@ struct percpu_data {
27 void *ptrs[NR_CPUS]; 30 void *ptrs[NR_CPUS];
28}; 31};
29 32
33#define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata)
30/* 34/*
31 * Use this to get to a cpu's version of the per-cpu object allocated using 35 * Use this to get to a cpu's version of the per-cpu object dynamically
32 * alloc_percpu. Non-atomic access to the current CPU's version should 36 * allocated. Non-atomic access to the current CPU's version should
33 * probably be combined with get_cpu()/put_cpu(). 37 * probably be combined with get_cpu()/put_cpu().
34 */ 38 */
35#define per_cpu_ptr(ptr, cpu) \ 39#define percpu_ptr(ptr, cpu) \
36({ \ 40({ \
37 struct percpu_data *__p = (struct percpu_data *)~(unsigned long)(ptr); \ 41 struct percpu_data *__p = __percpu_disguise(ptr); \
38 (__typeof__(ptr))__p->ptrs[(cpu)]; \ 42 (__typeof__(ptr))__p->ptrs[(cpu)]; \
39}) 43})
40 44
41extern void *__alloc_percpu(size_t size); 45extern void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu);
42extern void free_percpu(const void *); 46extern void percpu_depopulate(void *__pdata, int cpu);
47extern int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
48 cpumask_t *mask);
49extern void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask);
50extern void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask);
51extern void percpu_free(void *__pdata);
43 52
44#else /* CONFIG_SMP */ 53#else /* CONFIG_SMP */
45 54
46#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) 55#define percpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
56
57static inline void percpu_depopulate(void *__pdata, int cpu)
58{
59}
60
61static inline void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask)
62{
63}
47 64
48static inline void *__alloc_percpu(size_t size) 65static inline void *percpu_populate(void *__pdata, size_t size, gfp_t gfp,
66 int cpu)
49{ 67{
50 void *ret = kmalloc(size, GFP_KERNEL); 68 return percpu_ptr(__pdata, cpu);
51 if (ret)
52 memset(ret, 0, size);
53 return ret;
54} 69}
55static inline void free_percpu(const void *ptr) 70
56{ 71static inline int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
57 kfree(ptr); 72 cpumask_t *mask)
73{
74 return 0;
75}
76
77static inline void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask)
78{
79 return kzalloc(size, gfp);
80}
81
82static inline void percpu_free(void *__pdata)
83{
84 kfree(__pdata);
58} 85}
59 86
60#endif /* CONFIG_SMP */ 87#endif /* CONFIG_SMP */
61 88
62/* Simple wrapper for the common case: zeros memory. */ 89#define percpu_populate_mask(__pdata, size, gfp, mask) \
63#define alloc_percpu(type) ((type *)(__alloc_percpu(sizeof(type)))) 90 __percpu_populate_mask((__pdata), (size), (gfp), &(mask))
91#define percpu_depopulate_mask(__pdata, mask) \
92 __percpu_depopulate_mask((__pdata), &(mask))
93#define percpu_alloc_mask(size, gfp, mask) \
94 __percpu_alloc_mask((size), (gfp), &(mask))
95
96#define percpu_alloc(size, gfp) percpu_alloc_mask((size), (gfp), cpu_online_map)
97
98/* (legacy) interface for use without CPU hotplug handling */
99
100#define __alloc_percpu(size) percpu_alloc_mask((size), GFP_KERNEL, \
101 cpu_possible_map)
102#define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type))
103#define free_percpu(ptr) percpu_free((ptr))
104#define per_cpu_ptr(ptr, cpu) percpu_ptr((ptr), (cpu))
64 105
65#endif /* __LINUX_PERCPU_H */ 106#endif /* __LINUX_PERCPU_H */
diff --git a/mm/slab.c b/mm/slab.c
index 5870bcbd33cf..00584dbbec03 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3371,52 +3371,127 @@ EXPORT_SYMBOL(__kmalloc_track_caller);
3371 3371
3372#ifdef CONFIG_SMP 3372#ifdef CONFIG_SMP
3373/** 3373/**
3374 * __alloc_percpu - allocate one copy of the object for every present 3374 * percpu_depopulate - depopulate per-cpu data for given cpu
3375 * cpu in the system, zeroing them. 3375 * @__pdata: per-cpu data to depopulate
3376 * Objects should be dereferenced using the per_cpu_ptr macro only. 3376 * @cpu: depopulate per-cpu data for this cpu
3377 * 3377 *
3378 * @size: how many bytes of memory are required. 3378 * Depopulating per-cpu data for a cpu going offline would be a typical
3379 * use case. You need to register a cpu hotplug handler for that purpose.
3379 */ 3380 */
3380void *__alloc_percpu(size_t size) 3381void percpu_depopulate(void *__pdata, int cpu)
3381{ 3382{
3382 int i; 3383 struct percpu_data *pdata = __percpu_disguise(__pdata);
3383 struct percpu_data *pdata = kmalloc(sizeof(*pdata), GFP_KERNEL); 3384 if (pdata->ptrs[cpu]) {
3385 kfree(pdata->ptrs[cpu]);
3386 pdata->ptrs[cpu] = NULL;
3387 }
3388}
3389EXPORT_SYMBOL_GPL(percpu_depopulate);
3384 3390
3385 if (!pdata) 3391/**
3386 return NULL; 3392 * percpu_depopulate_mask - depopulate per-cpu data for some cpu's
3393 * @__pdata: per-cpu data to depopulate
3394 * @mask: depopulate per-cpu data for cpu's selected through mask bits
3395 */
3396void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask)
3397{
3398 int cpu;
3399 for_each_cpu_mask(cpu, *mask)
3400 percpu_depopulate(__pdata, cpu);
3401}
3402EXPORT_SYMBOL_GPL(__percpu_depopulate_mask);
3387 3403
3388 /* 3404/**
3389 * Cannot use for_each_online_cpu since a cpu may come online 3405 * percpu_populate - populate per-cpu data for given cpu
3390 * and we have no way of figuring out how to fix the array 3406 * @__pdata: per-cpu data to populate further
3391 * that we have allocated then.... 3407 * @size: size of per-cpu object
3392 */ 3408 * @gfp: may sleep or not etc.
3393 for_each_possible_cpu(i) { 3409 * @cpu: populate per-data for this cpu
3394 int node = cpu_to_node(i); 3410 *
3411 * Populating per-cpu data for a cpu coming online would be a typical
3412 * use case. You need to register a cpu hotplug handler for that purpose.
3413 * Per-cpu object is populated with zeroed buffer.
3414 */
3415void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu)
3416{
3417 struct percpu_data *pdata = __percpu_disguise(__pdata);
3418 int node = cpu_to_node(cpu);
3395 3419
3396 if (node_online(node)) 3420 BUG_ON(pdata->ptrs[cpu]);
3397 pdata->ptrs[i] = kmalloc_node(size, GFP_KERNEL, node); 3421 if (node_online(node)) {
3398 else 3422 /* FIXME: kzalloc_node(size, gfp, node) */
3399 pdata->ptrs[i] = kmalloc(size, GFP_KERNEL); 3423 pdata->ptrs[cpu] = kmalloc_node(size, gfp, node);
3424 if (pdata->ptrs[cpu])
3425 memset(pdata->ptrs[cpu], 0, size);
3426 } else
3427 pdata->ptrs[cpu] = kzalloc(size, gfp);
3428 return pdata->ptrs[cpu];
3429}
3430EXPORT_SYMBOL_GPL(percpu_populate);
3400 3431
3401 if (!pdata->ptrs[i]) 3432/**
3402 goto unwind_oom; 3433 * percpu_populate_mask - populate per-cpu data for more cpu's
3403 memset(pdata->ptrs[i], 0, size); 3434 * @__pdata: per-cpu data to populate further
3404 } 3435 * @size: size of per-cpu object
3436 * @gfp: may sleep or not etc.
3437 * @mask: populate per-cpu data for cpu's selected through mask bits
3438 *
3439 * Per-cpu objects are populated with zeroed buffers.
3440 */
3441int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
3442 cpumask_t *mask)
3443{
3444 cpumask_t populated = CPU_MASK_NONE;
3445 int cpu;
3446
3447 for_each_cpu_mask(cpu, *mask)
3448 if (unlikely(!percpu_populate(__pdata, size, gfp, cpu))) {
3449 __percpu_depopulate_mask(__pdata, &populated);
3450 return -ENOMEM;
3451 } else
3452 cpu_set(cpu, populated);
3453 return 0;
3454}
3455EXPORT_SYMBOL_GPL(__percpu_populate_mask);
3405 3456
3406 /* Catch derefs w/o wrappers */ 3457/**
3407 return (void *)(~(unsigned long)pdata); 3458 * percpu_alloc_mask - initial setup of per-cpu data
3459 * @size: size of per-cpu object
3460 * @gfp: may sleep or not etc.
3461 * @mask: populate per-data for cpu's selected through mask bits
3462 *
3463 * Populating per-cpu data for all online cpu's would be a typical use case,
3464 * which is simplified by the percpu_alloc() wrapper.
3465 * Per-cpu objects are populated with zeroed buffers.
3466 */
3467void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask)
3468{
3469 void *pdata = kzalloc(sizeof(struct percpu_data), gfp);
3470 void *__pdata = __percpu_disguise(pdata);
3408 3471
3409unwind_oom: 3472 if (unlikely(!pdata))
3410 while (--i >= 0) { 3473 return NULL;
3411 if (!cpu_possible(i)) 3474 if (likely(!__percpu_populate_mask(__pdata, size, gfp, mask)))
3412 continue; 3475 return __pdata;
3413 kfree(pdata->ptrs[i]);
3414 }
3415 kfree(pdata); 3476 kfree(pdata);
3416 return NULL; 3477 return NULL;
3417} 3478}
3418EXPORT_SYMBOL(__alloc_percpu); 3479EXPORT_SYMBOL_GPL(__percpu_alloc_mask);
3419#endif 3480
3481/**
3482 * percpu_free - final cleanup of per-cpu data
3483 * @__pdata: object to clean up
3484 *
3485 * We simply clean up any per-cpu object left. No need for the client to
3486 * track and specify through a bis mask which per-cpu objects are to free.
3487 */
3488void percpu_free(void *__pdata)
3489{
3490 __percpu_depopulate_mask(__pdata, &cpu_possible_map);
3491 kfree(__percpu_disguise(__pdata));
3492}
3493EXPORT_SYMBOL_GPL(percpu_free);
3494#endif /* CONFIG_SMP */
3420 3495
3421/** 3496/**
3422 * kmem_cache_free - Deallocate an object 3497 * kmem_cache_free - Deallocate an object
@@ -3463,29 +3538,6 @@ void kfree(const void *objp)
3463} 3538}
3464EXPORT_SYMBOL(kfree); 3539EXPORT_SYMBOL(kfree);
3465 3540
3466#ifdef CONFIG_SMP
3467/**
3468 * free_percpu - free previously allocated percpu memory
3469 * @objp: pointer returned by alloc_percpu.
3470 *
3471 * Don't free memory not originally allocated by alloc_percpu()
3472 * The complemented objp is to check for that.
3473 */
3474void free_percpu(const void *objp)
3475{
3476 int i;
3477 struct percpu_data *p = (struct percpu_data *)(~(unsigned long)objp);
3478
3479 /*
3480 * We allocate for all cpus so we cannot use for online cpu here.
3481 */
3482 for_each_possible_cpu(i)
3483 kfree(p->ptrs[i]);
3484 kfree(p);
3485}
3486EXPORT_SYMBOL(free_percpu);
3487#endif
3488
3489unsigned int kmem_cache_size(struct kmem_cache *cachep) 3541unsigned int kmem_cache_size(struct kmem_cache *cachep)
3490{ 3542{
3491 return obj_size(cachep); 3543 return obj_size(cachep);