aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2009-03-30 06:07:44 -0400
committerTejun Heo <tj@kernel.org>2009-06-24 02:13:35 -0400
commite74e396204bfcb67570ba4517b08f5918e69afea (patch)
treedf57c859e10f7fcbe5790e9b51a106d5bccfe8dc
parent0017c869ddcb73069905d09f9e98e68627466237 (diff)
percpu: use dynamic percpu allocator as the default percpu allocator
This patch makes most !CONFIG_HAVE_SETUP_PER_CPU_AREA archs use dynamic percpu allocator. The first chunk is allocated using embedding helper and 8k is reserved for modules. This ensures that the new allocator behaves almost identically to the original allocator as long as static percpu variables are concerned, so it shouldn't introduce much breakage. s390 and alpha use custom SHIFT_PERCPU_PTR() to work around addressing range limit the addressing model imposes. Unfortunately, this breaks if the address is specified using a variable, so for now, the two archs aren't converted. The following architectures are affected by this change. * sh * arm * cris * mips * sparc(32) * blackfin * avr32 * parisc (broken, under investigation) * m32r * powerpc(32) As this change makes the dynamic allocator the default one, CONFIG_HAVE_DYNAMIC_PER_CPU_AREA is replaced with its invert - CONFIG_HAVE_LEGACY_PER_CPU_AREA, which is added to yet-to-be converted archs. These archs implement their own setup_per_cpu_areas() and the conversion is not trivial. * powerpc(64) * sparc(64) * ia64 * alpha * s390 Boot and batch alloc/free tests on x86_32 with debug code (x86_32 doesn't use default first chunk initialization). Compile tested on sparc(32), powerpc(32), arm and alpha. Kyle McMartin reported that this change breaks parisc. The problem is still under investigation and he is okay with pushing this patch forward and fixing parisc later. [ Impact: use dynamic allocator for most archs w/o custom percpu setup ] Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Rusty Russell <rusty@rustcorp.com.au> Acked-by: David S. Miller <davem@davemloft.net> Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Reviewed-by: Christoph Lameter <cl@linux.com> Cc: Paul Mundt <lethal@linux-sh.org> Cc: Russell King <rmk@arm.linux.org.uk> Cc: Mikael Starvik <starvik@axis.com> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Bryan Wu <cooloney@kernel.org> Cc: Kyle McMartin <kyle@mcmartin.ca> Cc: Matthew Wilcox <matthew@wil.cx> Cc: Grant Grundler <grundler@parisc-linux.org> Cc: Hirokazu Takata <takata@linux-m32r.org> Cc: Richard Henderson <rth@twiddle.net> Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/alpha/Kconfig3
-rw-r--r--arch/ia64/Kconfig3
-rw-r--r--arch/powerpc/Kconfig3
-rw-r--r--arch/s390/Kconfig3
-rw-r--r--arch/sparc/Kconfig3
-rw-r--r--arch/x86/Kconfig3
-rw-r--r--include/linux/percpu.h12
-rw-r--r--init/main.c24
-rw-r--r--kernel/module.c6
-rw-r--r--mm/Makefile2
-rw-r--r--mm/allocpercpu.c28
-rw-r--r--mm/percpu.c40
12 files changed, 95 insertions, 35 deletions
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 9fb8aae5c391..05d86407188c 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -70,6 +70,9 @@ config AUTO_IRQ_AFFINITY
70 depends on SMP 70 depends on SMP
71 default y 71 default y
72 72
73config HAVE_LEGACY_PER_CPU_AREA
74 def_bool y
75
73source "init/Kconfig" 76source "init/Kconfig"
74source "kernel/Kconfig.freezer" 77source "kernel/Kconfig.freezer"
75 78
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 170042b420d4..328d2f8b8c3f 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -89,6 +89,9 @@ config GENERIC_TIME_VSYSCALL
89 bool 89 bool
90 default y 90 default y
91 91
92config HAVE_LEGACY_PER_CPU_AREA
93 def_bool y
94
92config HAVE_SETUP_PER_CPU_AREA 95config HAVE_SETUP_PER_CPU_AREA
93 def_bool y 96 def_bool y
94 97
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index bf6cedfa05db..a774c2acbe69 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -46,6 +46,9 @@ config GENERIC_HARDIRQS_NO__DO_IRQ
46 bool 46 bool
47 default y 47 default y
48 48
49config HAVE_LEGACY_PER_CPU_AREA
50 def_bool PPC64
51
49config HAVE_SETUP_PER_CPU_AREA 52config HAVE_SETUP_PER_CPU_AREA
50 def_bool PPC64 53 def_bool PPC64
51 54
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index a14dba0e4d67..f4a3cc62d28f 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -75,6 +75,9 @@ config VIRT_CPU_ACCOUNTING
75config ARCH_SUPPORTS_DEBUG_PAGEALLOC 75config ARCH_SUPPORTS_DEBUG_PAGEALLOC
76 def_bool y 76 def_bool y
77 77
78config HAVE_LEGACY_PER_CPU_AREA
79 def_bool y
80
78mainmenu "Linux Kernel Configuration" 81mainmenu "Linux Kernel Configuration"
79 82
80config S390 83config S390
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 3f8b6a92eabd..7a8698b913fe 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -92,6 +92,9 @@ config AUDIT_ARCH
92 bool 92 bool
93 default y 93 default y
94 94
95config HAVE_LEGACY_PER_CPU_AREA
96 def_bool y if SPARC64
97
95config HAVE_SETUP_PER_CPU_AREA 98config HAVE_SETUP_PER_CPU_AREA
96 def_bool y if SPARC64 99 def_bool y if SPARC64
97 100
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d1430ef6b4f9..a48a90076d83 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -149,9 +149,6 @@ config ARCH_HAS_CACHE_LINE_SIZE
149config HAVE_SETUP_PER_CPU_AREA 149config HAVE_SETUP_PER_CPU_AREA
150 def_bool y 150 def_bool y
151 151
152config HAVE_DYNAMIC_PER_CPU_AREA
153 def_bool y
154
155config HAVE_CPUMASK_OF_CPU_MAP 152config HAVE_CPUMASK_OF_CPU_MAP
156 def_bool X86_64_SMP 153 def_bool X86_64_SMP
157 154
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 26fd9d12f050..e5000343dd61 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -34,7 +34,7 @@
34 34
35#ifdef CONFIG_SMP 35#ifdef CONFIG_SMP
36 36
37#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA 37#ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA
38 38
39/* minimum unit size, also is the maximum supported allocation size */ 39/* minimum unit size, also is the maximum supported allocation size */
40#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(64 << 10) 40#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(64 << 10)
@@ -80,7 +80,7 @@ extern ssize_t __init pcpu_embed_first_chunk(
80 80
81extern void *__alloc_reserved_percpu(size_t size, size_t align); 81extern void *__alloc_reserved_percpu(size_t size, size_t align);
82 82
83#else /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ 83#else /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */
84 84
85struct percpu_data { 85struct percpu_data {
86 void *ptrs[1]; 86 void *ptrs[1];
@@ -99,11 +99,15 @@ struct percpu_data {
99 (__typeof__(ptr))__p->ptrs[(cpu)]; \ 99 (__typeof__(ptr))__p->ptrs[(cpu)]; \
100}) 100})
101 101
102#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ 102#endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */
103 103
104extern void *__alloc_percpu(size_t size, size_t align); 104extern void *__alloc_percpu(size_t size, size_t align);
105extern void free_percpu(void *__pdata); 105extern void free_percpu(void *__pdata);
106 106
107#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
108extern void __init setup_per_cpu_areas(void);
109#endif
110
107#else /* CONFIG_SMP */ 111#else /* CONFIG_SMP */
108 112
109#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) 113#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
@@ -124,6 +128,8 @@ static inline void free_percpu(void *p)
124 kfree(p); 128 kfree(p);
125} 129}
126 130
131static inline void __init setup_per_cpu_areas(void) { }
132
127#endif /* CONFIG_SMP */ 133#endif /* CONFIG_SMP */
128 134
129#define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type), \ 135#define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type), \
diff --git a/init/main.c b/init/main.c
index 09131ec090c1..602d724afa5c 100644
--- a/init/main.c
+++ b/init/main.c
@@ -357,7 +357,6 @@ static void __init smp_init(void)
357#define smp_init() do { } while (0) 357#define smp_init() do { } while (0)
358#endif 358#endif
359 359
360static inline void setup_per_cpu_areas(void) { }
361static inline void setup_nr_cpu_ids(void) { } 360static inline void setup_nr_cpu_ids(void) { }
362static inline void smp_prepare_cpus(unsigned int maxcpus) { } 361static inline void smp_prepare_cpus(unsigned int maxcpus) { }
363 362
@@ -378,29 +377,6 @@ static void __init setup_nr_cpu_ids(void)
378 nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1; 377 nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
379} 378}
380 379
381#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
382unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
383
384EXPORT_SYMBOL(__per_cpu_offset);
385
386static void __init setup_per_cpu_areas(void)
387{
388 unsigned long size, i;
389 char *ptr;
390 unsigned long nr_possible_cpus = num_possible_cpus();
391
392 /* Copy section for each CPU (we discard the original) */
393 size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
394 ptr = alloc_bootmem_pages(size * nr_possible_cpus);
395
396 for_each_possible_cpu(i) {
397 __per_cpu_offset[i] = ptr - __per_cpu_start;
398 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
399 ptr += size;
400 }
401}
402#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
403
404/* Called by boot processor to activate the rest. */ 380/* Called by boot processor to activate the rest. */
405static void __init smp_init(void) 381static void __init smp_init(void)
406{ 382{
diff --git a/kernel/module.c b/kernel/module.c
index 38928fcaff2b..f5934954fa99 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -364,7 +364,7 @@ EXPORT_SYMBOL_GPL(find_module);
364 364
365#ifdef CONFIG_SMP 365#ifdef CONFIG_SMP
366 366
367#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA 367#ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA
368 368
369static void *percpu_modalloc(unsigned long size, unsigned long align, 369static void *percpu_modalloc(unsigned long size, unsigned long align,
370 const char *name) 370 const char *name)
@@ -389,7 +389,7 @@ static void percpu_modfree(void *freeme)
389 free_percpu(freeme); 389 free_percpu(freeme);
390} 390}
391 391
392#else /* ... !CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ 392#else /* ... CONFIG_HAVE_LEGACY_PER_CPU_AREA */
393 393
394/* Number of blocks used and allocated. */ 394/* Number of blocks used and allocated. */
395static unsigned int pcpu_num_used, pcpu_num_allocated; 395static unsigned int pcpu_num_used, pcpu_num_allocated;
@@ -535,7 +535,7 @@ static int percpu_modinit(void)
535} 535}
536__initcall(percpu_modinit); 536__initcall(percpu_modinit);
537 537
538#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ 538#endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */
539 539
540static unsigned int find_pcpusec(Elf_Ehdr *hdr, 540static unsigned int find_pcpusec(Elf_Ehdr *hdr,
541 Elf_Shdr *sechdrs, 541 Elf_Shdr *sechdrs,
diff --git a/mm/Makefile b/mm/Makefile
index 5e0bd6426693..c77c6487552f 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -33,7 +33,7 @@ obj-$(CONFIG_FAILSLAB) += failslab.o
33obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o 33obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
34obj-$(CONFIG_FS_XIP) += filemap_xip.o 34obj-$(CONFIG_FS_XIP) += filemap_xip.o
35obj-$(CONFIG_MIGRATION) += migrate.o 35obj-$(CONFIG_MIGRATION) += migrate.o
36ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA 36ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA
37obj-$(CONFIG_SMP) += percpu.o 37obj-$(CONFIG_SMP) += percpu.o
38else 38else
39obj-$(CONFIG_SMP) += allocpercpu.o 39obj-$(CONFIG_SMP) += allocpercpu.o
diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c
index dfdee6a47359..df34ceae0c67 100644
--- a/mm/allocpercpu.c
+++ b/mm/allocpercpu.c
@@ -5,6 +5,8 @@
5 */ 5 */
6#include <linux/mm.h> 6#include <linux/mm.h>
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/bootmem.h>
9#include <asm/sections.h>
8 10
9#ifndef cache_line_size 11#ifndef cache_line_size
10#define cache_line_size() L1_CACHE_BYTES 12#define cache_line_size() L1_CACHE_BYTES
@@ -147,3 +149,29 @@ void free_percpu(void *__pdata)
147 kfree(__percpu_disguise(__pdata)); 149 kfree(__percpu_disguise(__pdata));
148} 150}
149EXPORT_SYMBOL_GPL(free_percpu); 151EXPORT_SYMBOL_GPL(free_percpu);
152
153/*
154 * Generic percpu area setup.
155 */
156#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
157unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
158
159EXPORT_SYMBOL(__per_cpu_offset);
160
161void __init setup_per_cpu_areas(void)
162{
163 unsigned long size, i;
164 char *ptr;
165 unsigned long nr_possible_cpus = num_possible_cpus();
166
167 /* Copy section for each CPU (we discard the original) */
168 size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
169 ptr = alloc_bootmem_pages(size * nr_possible_cpus);
170
171 for_each_possible_cpu(i) {
172 __per_cpu_offset[i] = ptr - __per_cpu_start;
173 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
174 ptr += size;
175 }
176}
177#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
diff --git a/mm/percpu.c b/mm/percpu.c
index b70f2acd8853..b14984566f5a 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -43,7 +43,7 @@
43 * 43 *
44 * To use this allocator, arch code should do the followings. 44 * To use this allocator, arch code should do the followings.
45 * 45 *
46 * - define CONFIG_HAVE_DYNAMIC_PER_CPU_AREA 46 * - drop CONFIG_HAVE_LEGACY_PER_CPU_AREA
47 * 47 *
48 * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate 48 * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate
49 * regular address to percpu pointer and back if they need to be 49 * regular address to percpu pointer and back if they need to be
@@ -1275,3 +1275,41 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
1275 reserved_size, dyn_size, 1275 reserved_size, dyn_size,
1276 pcpue_unit_size, pcpue_ptr, NULL); 1276 pcpue_unit_size, pcpue_ptr, NULL);
1277} 1277}
1278
1279/*
1280 * Generic percpu area setup.
1281 *
1282 * The embedding helper is used because its behavior closely resembles
1283 * the original non-dynamic generic percpu area setup. This is
1284 * important because many archs have addressing restrictions and might
1285 * fail if the percpu area is located far away from the previous
1286 * location. As an added bonus, in non-NUMA cases, embedding is
1287 * generally a good idea TLB-wise because percpu area can piggy back
1288 * on the physical linear memory mapping which uses large page
1289 * mappings on applicable archs.
1290 */
1291#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
1292unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
1293EXPORT_SYMBOL(__per_cpu_offset);
1294
1295void __init setup_per_cpu_areas(void)
1296{
1297 size_t static_size = __per_cpu_end - __per_cpu_start;
1298 ssize_t unit_size;
1299 unsigned long delta;
1300 unsigned int cpu;
1301
1302 /*
1303 * Always reserve area for module percpu variables. That's
1304 * what the legacy allocator did.
1305 */
1306 unit_size = pcpu_embed_first_chunk(static_size, PERCPU_MODULE_RESERVE,
1307 PERCPU_DYNAMIC_RESERVE, -1);
1308 if (unit_size < 0)
1309 panic("Failed to initialized percpu areas.");
1310
1311 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
1312 for_each_possible_cpu(cpu)
1313 __per_cpu_offset[cpu] = delta + cpu * unit_size;
1314}
1315#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */