aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2009-06-21 22:56:24 -0400
committerTejun Heo <tj@kernel.org>2009-06-21 22:56:24 -0400
commitfa8a7094ba1679b4b9b443e0ac9f5e046c79ee8d (patch)
treecda9df47b1a84581685d8f4e0cd8ce66cac1d234
parente59a1bb2fdfb745c685f5b40ffbed126331d3223 (diff)
x86: implement percpu_alloc kernel parameter
According to Andi, it isn't clear whether lpage allocator is worth the trouble as there are many processors where PMD TLB is far scarcer than PTE TLB. The advantage or disadvantage probably depends on the actual size of percpu area and specific processor. As performance degradation due to TLB pressure tends to be highly workload specific and subtle, it is difficult to decide which way to go without more data. This patch implements percpu_alloc kernel parameter to allow selecting which first chunk allocator to use to ease debugging and testing. While at it, make sure all the failure paths report why something failed to help determining why certain allocator isn't working. Also, kill the "Great future plan" comment which had already been realized quite some time ago. [ Impact: allow explicit percpu first chunk allocator selection ] Signed-off-by: Tejun Heo <tj@kernel.org> Reported-by: Jan Beulich <JBeulich@novell.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Ingo Molnar <mingo@elte.hu>
-rw-r--r--Documentation/kernel-parameters.txt6
-rw-r--r--arch/x86/kernel/setup_percpu.c69
-rw-r--r--mm/percpu.c13
3 files changed, 65 insertions, 23 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 08def8deb5f5..ecad946920d1 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1882,6 +1882,12 @@ and is between 256 and 4096 characters. It is defined in the file
1882 Format: { 0 | 1 } 1882 Format: { 0 | 1 }
1883 See arch/parisc/kernel/pdc_chassis.c 1883 See arch/parisc/kernel/pdc_chassis.c
1884 1884
1885 percpu_alloc= [X86] Select which percpu first chunk allocator to use.
1886 Allowed values are one of "lpage", "embed" and "4k".
1887 See comments in arch/x86/kernel/setup_percpu.c for
1888 details on each allocator. This parameter is primarily
1889 for debugging and performance comparison.
1890
1885 pf. [PARIDE] 1891 pf. [PARIDE]
1886 See Documentation/blockdev/paride.txt. 1892 See Documentation/blockdev/paride.txt.
1887 1893
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index bad2fd223114..165ebd5ba83b 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -156,20 +156,23 @@ static struct page * __init pcpul_get_page(unsigned int cpu, int pageno)
156 return virt_to_page(pcpul_map[cpu].ptr + off); 156 return virt_to_page(pcpul_map[cpu].ptr + off);
157} 157}
158 158
159static ssize_t __init setup_pcpu_lpage(size_t static_size) 159static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
160{ 160{
161 size_t map_size, dyn_size; 161 size_t map_size, dyn_size;
162 unsigned int cpu; 162 unsigned int cpu;
163 int i, j; 163 int i, j;
164 ssize_t ret; 164 ssize_t ret;
165 165
166 /* 166 /* on non-NUMA, embedding is better */
167 * If large page isn't supported, there's no benefit in doing 167 if (!chosen && !pcpu_need_numa())
168 * this. Also, on non-NUMA, embedding is better.
169 */
170 if (!cpu_has_pse || !pcpu_need_numa())
171 return -EINVAL; 168 return -EINVAL;
172 169
170 /* need PSE */
171 if (!cpu_has_pse) {
172 pr_warning("PERCPU: lpage allocator requires PSE\n");
173 return -EINVAL;
174 }
175
173 /* 176 /*
174 * Currently supports only single page. Supporting multiple 177 * Currently supports only single page. Supporting multiple
175 * pages won't be too difficult if it ever becomes necessary. 178 * pages won't be too difficult if it ever becomes necessary.
@@ -191,8 +194,11 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size)
191 pcpul_map[cpu].cpu = cpu; 194 pcpul_map[cpu].cpu = cpu;
192 pcpul_map[cpu].ptr = pcpu_alloc_bootmem(cpu, PMD_SIZE, 195 pcpul_map[cpu].ptr = pcpu_alloc_bootmem(cpu, PMD_SIZE,
193 PMD_SIZE); 196 PMD_SIZE);
194 if (!pcpul_map[cpu].ptr) 197 if (!pcpul_map[cpu].ptr) {
198 pr_warning("PERCPU: failed to allocate large page "
199 "for cpu%u\n", cpu);
195 goto enomem; 200 goto enomem;
201 }
196 202
197 /* 203 /*
198 * Only use pcpul_size bytes and give back the rest. 204 * Only use pcpul_size bytes and give back the rest.
@@ -297,7 +303,7 @@ void *pcpu_lpage_remapped(void *kaddr)
297 return NULL; 303 return NULL;
298} 304}
299#else 305#else
300static ssize_t __init setup_pcpu_lpage(size_t static_size) 306static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
301{ 307{
302 return -EINVAL; 308 return -EINVAL;
303} 309}
@@ -311,7 +317,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size)
311 * mapping so that it can use PMD mapping without additional TLB 317 * mapping so that it can use PMD mapping without additional TLB
312 * pressure. 318 * pressure.
313 */ 319 */
314static ssize_t __init setup_pcpu_embed(size_t static_size) 320static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen)
315{ 321{
316 size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; 322 size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
317 323
@@ -320,7 +326,7 @@ static ssize_t __init setup_pcpu_embed(size_t static_size)
320 * this. Also, embedding allocation doesn't play well with 326 * this. Also, embedding allocation doesn't play well with
321 * NUMA. 327 * NUMA.
322 */ 328 */
323 if (!cpu_has_pse || pcpu_need_numa()) 329 if (!chosen && (!cpu_has_pse || pcpu_need_numa()))
324 return -EINVAL; 330 return -EINVAL;
325 331
326 return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, 332 return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
@@ -370,8 +376,11 @@ static ssize_t __init setup_pcpu_4k(size_t static_size)
370 void *ptr; 376 void *ptr;
371 377
372 ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE); 378 ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE);
373 if (!ptr) 379 if (!ptr) {
380 pr_warning("PERCPU: failed to allocate "
381 "4k page for cpu%u\n", cpu);
374 goto enomem; 382 goto enomem;
383 }
375 384
376 memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE); 385 memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE);
377 pcpu4k_pages[j++] = virt_to_page(ptr); 386 pcpu4k_pages[j++] = virt_to_page(ptr);
@@ -395,6 +404,16 @@ out_free_ar:
395 return ret; 404 return ret;
396} 405}
397 406
407/* for explicit first chunk allocator selection */
408static char pcpu_chosen_alloc[16] __initdata;
409
410static int __init percpu_alloc_setup(char *str)
411{
412 strncpy(pcpu_chosen_alloc, str, sizeof(pcpu_chosen_alloc) - 1);
413 return 0;
414}
415early_param("percpu_alloc", percpu_alloc_setup);
416
398static inline void setup_percpu_segment(int cpu) 417static inline void setup_percpu_segment(int cpu)
399{ 418{
400#ifdef CONFIG_X86_32 419#ifdef CONFIG_X86_32
@@ -408,11 +427,6 @@ static inline void setup_percpu_segment(int cpu)
408#endif 427#endif
409} 428}
410 429
411/*
412 * Great future plan:
413 * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
414 * Always point %gs to its beginning
415 */
416void __init setup_per_cpu_areas(void) 430void __init setup_per_cpu_areas(void)
417{ 431{
418 size_t static_size = __per_cpu_end - __per_cpu_start; 432 size_t static_size = __per_cpu_end - __per_cpu_start;
@@ -429,9 +443,26 @@ void __init setup_per_cpu_areas(void)
429 * of large page mappings. Please read comments on top of 443 * of large page mappings. Please read comments on top of
430 * each allocator for details. 444 * each allocator for details.
431 */ 445 */
432 ret = setup_pcpu_lpage(static_size); 446 ret = -EINVAL;
433 if (ret < 0) 447 if (strlen(pcpu_chosen_alloc)) {
434 ret = setup_pcpu_embed(static_size); 448 if (strcmp(pcpu_chosen_alloc, "4k")) {
449 if (!strcmp(pcpu_chosen_alloc, "lpage"))
450 ret = setup_pcpu_lpage(static_size, true);
451 else if (!strcmp(pcpu_chosen_alloc, "embed"))
452 ret = setup_pcpu_embed(static_size, true);
453 else
454 pr_warning("PERCPU: unknown allocator %s "
455 "specified\n", pcpu_chosen_alloc);
456 if (ret < 0)
457 pr_warning("PERCPU: %s allocator failed (%zd), "
458 "falling back to 4k\n",
459 pcpu_chosen_alloc, ret);
460 }
461 } else {
462 ret = setup_pcpu_lpage(static_size, false);
463 if (ret < 0)
464 ret = setup_pcpu_embed(static_size, false);
465 }
435 if (ret < 0) 466 if (ret < 0)
436 ret = setup_pcpu_4k(static_size); 467 ret = setup_pcpu_4k(static_size);
437 if (ret < 0) 468 if (ret < 0)
diff --git a/mm/percpu.c b/mm/percpu.c
index d06f4748271e..b70f2acd8853 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1233,6 +1233,7 @@ static struct page * __init pcpue_get_page(unsigned int cpu, int pageno)
1233ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, 1233ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
1234 ssize_t dyn_size, ssize_t unit_size) 1234 ssize_t dyn_size, ssize_t unit_size)
1235{ 1235{
1236 size_t chunk_size;
1236 unsigned int cpu; 1237 unsigned int cpu;
1237 1238
1238 /* determine parameters and allocate */ 1239 /* determine parameters and allocate */
@@ -1247,11 +1248,15 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
1247 } else 1248 } else
1248 pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); 1249 pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE);
1249 1250
1250 pcpue_ptr = __alloc_bootmem_nopanic( 1251 chunk_size = pcpue_unit_size * num_possible_cpus();
1251 num_possible_cpus() * pcpue_unit_size, 1252
1252 PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); 1253 pcpue_ptr = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE,
1253 if (!pcpue_ptr) 1254 __pa(MAX_DMA_ADDRESS));
1255 if (!pcpue_ptr) {
1256 pr_warning("PERCPU: failed to allocate %zu bytes for "
1257 "embedding\n", chunk_size);
1254 return -ENOMEM; 1258 return -ENOMEM;
1259 }
1255 1260
1256 /* return the leftover and copy */ 1261 /* return the leftover and copy */
1257 for_each_possible_cpu(cpu) { 1262 for_each_possible_cpu(cpu) {