diff options
author | Tejun Heo <tj@kernel.org> | 2009-03-30 06:07:44 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2009-06-24 02:13:35 -0400 |
commit | e74e396204bfcb67570ba4517b08f5918e69afea (patch) | |
tree | df57c859e10f7fcbe5790e9b51a106d5bccfe8dc | |
parent | 0017c869ddcb73069905d09f9e98e68627466237 (diff) |
percpu: use dynamic percpu allocator as the default percpu allocator
This patch makes most !CONFIG_HAVE_SETUP_PER_CPU_AREA archs use
dynamic percpu allocator. The first chunk is allocated using
embedding helper and 8k is reserved for modules. This ensures that
the new allocator behaves almost identically to the original allocator
as long as static percpu variables are concerned, so it shouldn't
introduce much breakage.
s390 and alpha use custom SHIFT_PERCPU_PTR() to work around addressing
range limit the addressing model imposes. Unfortunately, this breaks
if the address is specified using a variable, so for now, the two
archs aren't converted.
The following architectures are affected by this change.
* sh
* arm
* cris
* mips
* sparc(32)
* blackfin
* avr32
* parisc (broken, under investigation)
* m32r
* powerpc(32)
As this change makes the dynamic allocator the default one,
CONFIG_HAVE_DYNAMIC_PER_CPU_AREA is replaced with its invert -
CONFIG_HAVE_LEGACY_PER_CPU_AREA, which is added to yet-to-be converted
archs. These archs implement their own setup_per_cpu_areas() and the
conversion is not trivial.
* powerpc(64)
* sparc(64)
* ia64
* alpha
* s390
Boot and batch alloc/free tests on x86_32 with debug code (x86_32
doesn't use default first chunk initialization). Compile tested on
sparc(32), powerpc(32), arm and alpha.
Kyle McMartin reported that this change breaks parisc. The problem is
still under investigation and he is okay with pushing this patch
forward and fixing parisc later.
[ Impact: use dynamic allocator for most archs w/o custom percpu setup ]
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Reviewed-by: Christoph Lameter <cl@linux.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Mikael Starvik <starvik@axis.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Bryan Wu <cooloney@kernel.org>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Grant Grundler <grundler@parisc-linux.org>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/alpha/Kconfig | 3 | ||||
-rw-r--r-- | arch/ia64/Kconfig | 3 | ||||
-rw-r--r-- | arch/powerpc/Kconfig | 3 | ||||
-rw-r--r-- | arch/s390/Kconfig | 3 | ||||
-rw-r--r-- | arch/sparc/Kconfig | 3 | ||||
-rw-r--r-- | arch/x86/Kconfig | 3 | ||||
-rw-r--r-- | include/linux/percpu.h | 12 | ||||
-rw-r--r-- | init/main.c | 24 | ||||
-rw-r--r-- | kernel/module.c | 6 | ||||
-rw-r--r-- | mm/Makefile | 2 | ||||
-rw-r--r-- | mm/allocpercpu.c | 28 | ||||
-rw-r--r-- | mm/percpu.c | 40 |
12 files changed, 95 insertions, 35 deletions
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 9fb8aae5c391..05d86407188c 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig | |||
@@ -70,6 +70,9 @@ config AUTO_IRQ_AFFINITY | |||
70 | depends on SMP | 70 | depends on SMP |
71 | default y | 71 | default y |
72 | 72 | ||
73 | config HAVE_LEGACY_PER_CPU_AREA | ||
74 | def_bool y | ||
75 | |||
73 | source "init/Kconfig" | 76 | source "init/Kconfig" |
74 | source "kernel/Kconfig.freezer" | 77 | source "kernel/Kconfig.freezer" |
75 | 78 | ||
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 170042b420d4..328d2f8b8c3f 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig | |||
@@ -89,6 +89,9 @@ config GENERIC_TIME_VSYSCALL | |||
89 | bool | 89 | bool |
90 | default y | 90 | default y |
91 | 91 | ||
92 | config HAVE_LEGACY_PER_CPU_AREA | ||
93 | def_bool y | ||
94 | |||
92 | config HAVE_SETUP_PER_CPU_AREA | 95 | config HAVE_SETUP_PER_CPU_AREA |
93 | def_bool y | 96 | def_bool y |
94 | 97 | ||
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index bf6cedfa05db..a774c2acbe69 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig | |||
@@ -46,6 +46,9 @@ config GENERIC_HARDIRQS_NO__DO_IRQ | |||
46 | bool | 46 | bool |
47 | default y | 47 | default y |
48 | 48 | ||
49 | config HAVE_LEGACY_PER_CPU_AREA | ||
50 | def_bool PPC64 | ||
51 | |||
49 | config HAVE_SETUP_PER_CPU_AREA | 52 | config HAVE_SETUP_PER_CPU_AREA |
50 | def_bool PPC64 | 53 | def_bool PPC64 |
51 | 54 | ||
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index a14dba0e4d67..f4a3cc62d28f 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig | |||
@@ -75,6 +75,9 @@ config VIRT_CPU_ACCOUNTING | |||
75 | config ARCH_SUPPORTS_DEBUG_PAGEALLOC | 75 | config ARCH_SUPPORTS_DEBUG_PAGEALLOC |
76 | def_bool y | 76 | def_bool y |
77 | 77 | ||
78 | config HAVE_LEGACY_PER_CPU_AREA | ||
79 | def_bool y | ||
80 | |||
78 | mainmenu "Linux Kernel Configuration" | 81 | mainmenu "Linux Kernel Configuration" |
79 | 82 | ||
80 | config S390 | 83 | config S390 |
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 3f8b6a92eabd..7a8698b913fe 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig | |||
@@ -92,6 +92,9 @@ config AUDIT_ARCH | |||
92 | bool | 92 | bool |
93 | default y | 93 | default y |
94 | 94 | ||
95 | config HAVE_LEGACY_PER_CPU_AREA | ||
96 | def_bool y if SPARC64 | ||
97 | |||
95 | config HAVE_SETUP_PER_CPU_AREA | 98 | config HAVE_SETUP_PER_CPU_AREA |
96 | def_bool y if SPARC64 | 99 | def_bool y if SPARC64 |
97 | 100 | ||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d1430ef6b4f9..a48a90076d83 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -149,9 +149,6 @@ config ARCH_HAS_CACHE_LINE_SIZE | |||
149 | config HAVE_SETUP_PER_CPU_AREA | 149 | config HAVE_SETUP_PER_CPU_AREA |
150 | def_bool y | 150 | def_bool y |
151 | 151 | ||
152 | config HAVE_DYNAMIC_PER_CPU_AREA | ||
153 | def_bool y | ||
154 | |||
155 | config HAVE_CPUMASK_OF_CPU_MAP | 152 | config HAVE_CPUMASK_OF_CPU_MAP |
156 | def_bool X86_64_SMP | 153 | def_bool X86_64_SMP |
157 | 154 | ||
diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 26fd9d12f050..e5000343dd61 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h | |||
@@ -34,7 +34,7 @@ | |||
34 | 34 | ||
35 | #ifdef CONFIG_SMP | 35 | #ifdef CONFIG_SMP |
36 | 36 | ||
37 | #ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA | 37 | #ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA |
38 | 38 | ||
39 | /* minimum unit size, also is the maximum supported allocation size */ | 39 | /* minimum unit size, also is the maximum supported allocation size */ |
40 | #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(64 << 10) | 40 | #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(64 << 10) |
@@ -80,7 +80,7 @@ extern ssize_t __init pcpu_embed_first_chunk( | |||
80 | 80 | ||
81 | extern void *__alloc_reserved_percpu(size_t size, size_t align); | 81 | extern void *__alloc_reserved_percpu(size_t size, size_t align); |
82 | 82 | ||
83 | #else /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ | 83 | #else /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */ |
84 | 84 | ||
85 | struct percpu_data { | 85 | struct percpu_data { |
86 | void *ptrs[1]; | 86 | void *ptrs[1]; |
@@ -99,11 +99,15 @@ struct percpu_data { | |||
99 | (__typeof__(ptr))__p->ptrs[(cpu)]; \ | 99 | (__typeof__(ptr))__p->ptrs[(cpu)]; \ |
100 | }) | 100 | }) |
101 | 101 | ||
102 | #endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ | 102 | #endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */ |
103 | 103 | ||
104 | extern void *__alloc_percpu(size_t size, size_t align); | 104 | extern void *__alloc_percpu(size_t size, size_t align); |
105 | extern void free_percpu(void *__pdata); | 105 | extern void free_percpu(void *__pdata); |
106 | 106 | ||
107 | #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA | ||
108 | extern void __init setup_per_cpu_areas(void); | ||
109 | #endif | ||
110 | |||
107 | #else /* CONFIG_SMP */ | 111 | #else /* CONFIG_SMP */ |
108 | 112 | ||
109 | #define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) | 113 | #define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) |
@@ -124,6 +128,8 @@ static inline void free_percpu(void *p) | |||
124 | kfree(p); | 128 | kfree(p); |
125 | } | 129 | } |
126 | 130 | ||
131 | static inline void __init setup_per_cpu_areas(void) { } | ||
132 | |||
127 | #endif /* CONFIG_SMP */ | 133 | #endif /* CONFIG_SMP */ |
128 | 134 | ||
129 | #define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type), \ | 135 | #define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type), \ |
diff --git a/init/main.c b/init/main.c index 09131ec090c1..602d724afa5c 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -357,7 +357,6 @@ static void __init smp_init(void) | |||
357 | #define smp_init() do { } while (0) | 357 | #define smp_init() do { } while (0) |
358 | #endif | 358 | #endif |
359 | 359 | ||
360 | static inline void setup_per_cpu_areas(void) { } | ||
361 | static inline void setup_nr_cpu_ids(void) { } | 360 | static inline void setup_nr_cpu_ids(void) { } |
362 | static inline void smp_prepare_cpus(unsigned int maxcpus) { } | 361 | static inline void smp_prepare_cpus(unsigned int maxcpus) { } |
363 | 362 | ||
@@ -378,29 +377,6 @@ static void __init setup_nr_cpu_ids(void) | |||
378 | nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1; | 377 | nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1; |
379 | } | 378 | } |
380 | 379 | ||
381 | #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA | ||
382 | unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; | ||
383 | |||
384 | EXPORT_SYMBOL(__per_cpu_offset); | ||
385 | |||
386 | static void __init setup_per_cpu_areas(void) | ||
387 | { | ||
388 | unsigned long size, i; | ||
389 | char *ptr; | ||
390 | unsigned long nr_possible_cpus = num_possible_cpus(); | ||
391 | |||
392 | /* Copy section for each CPU (we discard the original) */ | ||
393 | size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE); | ||
394 | ptr = alloc_bootmem_pages(size * nr_possible_cpus); | ||
395 | |||
396 | for_each_possible_cpu(i) { | ||
397 | __per_cpu_offset[i] = ptr - __per_cpu_start; | ||
398 | memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); | ||
399 | ptr += size; | ||
400 | } | ||
401 | } | ||
402 | #endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ | ||
403 | |||
404 | /* Called by boot processor to activate the rest. */ | 380 | /* Called by boot processor to activate the rest. */ |
405 | static void __init smp_init(void) | 381 | static void __init smp_init(void) |
406 | { | 382 | { |
diff --git a/kernel/module.c b/kernel/module.c index 38928fcaff2b..f5934954fa99 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -364,7 +364,7 @@ EXPORT_SYMBOL_GPL(find_module); | |||
364 | 364 | ||
365 | #ifdef CONFIG_SMP | 365 | #ifdef CONFIG_SMP |
366 | 366 | ||
367 | #ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA | 367 | #ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA |
368 | 368 | ||
369 | static void *percpu_modalloc(unsigned long size, unsigned long align, | 369 | static void *percpu_modalloc(unsigned long size, unsigned long align, |
370 | const char *name) | 370 | const char *name) |
@@ -389,7 +389,7 @@ static void percpu_modfree(void *freeme) | |||
389 | free_percpu(freeme); | 389 | free_percpu(freeme); |
390 | } | 390 | } |
391 | 391 | ||
392 | #else /* ... !CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ | 392 | #else /* ... CONFIG_HAVE_LEGACY_PER_CPU_AREA */ |
393 | 393 | ||
394 | /* Number of blocks used and allocated. */ | 394 | /* Number of blocks used and allocated. */ |
395 | static unsigned int pcpu_num_used, pcpu_num_allocated; | 395 | static unsigned int pcpu_num_used, pcpu_num_allocated; |
@@ -535,7 +535,7 @@ static int percpu_modinit(void) | |||
535 | } | 535 | } |
536 | __initcall(percpu_modinit); | 536 | __initcall(percpu_modinit); |
537 | 537 | ||
538 | #endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ | 538 | #endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */ |
539 | 539 | ||
540 | static unsigned int find_pcpusec(Elf_Ehdr *hdr, | 540 | static unsigned int find_pcpusec(Elf_Ehdr *hdr, |
541 | Elf_Shdr *sechdrs, | 541 | Elf_Shdr *sechdrs, |
diff --git a/mm/Makefile b/mm/Makefile index 5e0bd6426693..c77c6487552f 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
@@ -33,7 +33,7 @@ obj-$(CONFIG_FAILSLAB) += failslab.o | |||
33 | obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o | 33 | obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o |
34 | obj-$(CONFIG_FS_XIP) += filemap_xip.o | 34 | obj-$(CONFIG_FS_XIP) += filemap_xip.o |
35 | obj-$(CONFIG_MIGRATION) += migrate.o | 35 | obj-$(CONFIG_MIGRATION) += migrate.o |
36 | ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA | 36 | ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA |
37 | obj-$(CONFIG_SMP) += percpu.o | 37 | obj-$(CONFIG_SMP) += percpu.o |
38 | else | 38 | else |
39 | obj-$(CONFIG_SMP) += allocpercpu.o | 39 | obj-$(CONFIG_SMP) += allocpercpu.o |
diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c index dfdee6a47359..df34ceae0c67 100644 --- a/mm/allocpercpu.c +++ b/mm/allocpercpu.c | |||
@@ -5,6 +5,8 @@ | |||
5 | */ | 5 | */ |
6 | #include <linux/mm.h> | 6 | #include <linux/mm.h> |
7 | #include <linux/module.h> | 7 | #include <linux/module.h> |
8 | #include <linux/bootmem.h> | ||
9 | #include <asm/sections.h> | ||
8 | 10 | ||
9 | #ifndef cache_line_size | 11 | #ifndef cache_line_size |
10 | #define cache_line_size() L1_CACHE_BYTES | 12 | #define cache_line_size() L1_CACHE_BYTES |
@@ -147,3 +149,29 @@ void free_percpu(void *__pdata) | |||
147 | kfree(__percpu_disguise(__pdata)); | 149 | kfree(__percpu_disguise(__pdata)); |
148 | } | 150 | } |
149 | EXPORT_SYMBOL_GPL(free_percpu); | 151 | EXPORT_SYMBOL_GPL(free_percpu); |
152 | |||
153 | /* | ||
154 | * Generic percpu area setup. | ||
155 | */ | ||
156 | #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA | ||
157 | unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; | ||
158 | |||
159 | EXPORT_SYMBOL(__per_cpu_offset); | ||
160 | |||
161 | void __init setup_per_cpu_areas(void) | ||
162 | { | ||
163 | unsigned long size, i; | ||
164 | char *ptr; | ||
165 | unsigned long nr_possible_cpus = num_possible_cpus(); | ||
166 | |||
167 | /* Copy section for each CPU (we discard the original) */ | ||
168 | size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE); | ||
169 | ptr = alloc_bootmem_pages(size * nr_possible_cpus); | ||
170 | |||
171 | for_each_possible_cpu(i) { | ||
172 | __per_cpu_offset[i] = ptr - __per_cpu_start; | ||
173 | memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); | ||
174 | ptr += size; | ||
175 | } | ||
176 | } | ||
177 | #endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ | ||
diff --git a/mm/percpu.c b/mm/percpu.c index b70f2acd8853..b14984566f5a 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -43,7 +43,7 @@ | |||
43 | * | 43 | * |
44 | * To use this allocator, arch code should do the followings. | 44 | * To use this allocator, arch code should do the followings. |
45 | * | 45 | * |
46 | * - define CONFIG_HAVE_DYNAMIC_PER_CPU_AREA | 46 | * - drop CONFIG_HAVE_LEGACY_PER_CPU_AREA |
47 | * | 47 | * |
48 | * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate | 48 | * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate |
49 | * regular address to percpu pointer and back if they need to be | 49 | * regular address to percpu pointer and back if they need to be |
@@ -1275,3 +1275,41 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, | |||
1275 | reserved_size, dyn_size, | 1275 | reserved_size, dyn_size, |
1276 | pcpue_unit_size, pcpue_ptr, NULL); | 1276 | pcpue_unit_size, pcpue_ptr, NULL); |
1277 | } | 1277 | } |
1278 | |||
1279 | /* | ||
1280 | * Generic percpu area setup. | ||
1281 | * | ||
1282 | * The embedding helper is used because its behavior closely resembles | ||
1283 | * the original non-dynamic generic percpu area setup. This is | ||
1284 | * important because many archs have addressing restrictions and might | ||
1285 | * fail if the percpu area is located far away from the previous | ||
1286 | * location. As an added bonus, in non-NUMA cases, embedding is | ||
1287 | * generally a good idea TLB-wise because percpu area can piggy back | ||
1288 | * on the physical linear memory mapping which uses large page | ||
1289 | * mappings on applicable archs. | ||
1290 | */ | ||
1291 | #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA | ||
1292 | unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; | ||
1293 | EXPORT_SYMBOL(__per_cpu_offset); | ||
1294 | |||
1295 | void __init setup_per_cpu_areas(void) | ||
1296 | { | ||
1297 | size_t static_size = __per_cpu_end - __per_cpu_start; | ||
1298 | ssize_t unit_size; | ||
1299 | unsigned long delta; | ||
1300 | unsigned int cpu; | ||
1301 | |||
1302 | /* | ||
1303 | * Always reserve area for module percpu variables. That's | ||
1304 | * what the legacy allocator did. | ||
1305 | */ | ||
1306 | unit_size = pcpu_embed_first_chunk(static_size, PERCPU_MODULE_RESERVE, | ||
1307 | PERCPU_DYNAMIC_RESERVE, -1); | ||
1308 | if (unit_size < 0) | ||
1309 | panic("Failed to initialized percpu areas."); | ||
1310 | |||
1311 | delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; | ||
1312 | for_each_possible_cpu(cpu) | ||
1313 | __per_cpu_offset[cpu] = delta + cpu * unit_size; | ||
1314 | } | ||
1315 | #endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ | ||