diff options
-rw-r--r-- | Documentation/kernel-parameters.txt | 6 | ||||
-rw-r--r-- | arch/x86/kernel/setup_percpu.c | 69 | ||||
-rw-r--r-- | mm/percpu.c | 13 |
3 files changed, 65 insertions, 23 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 08def8deb5f5..ecad946920d1 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -1882,6 +1882,12 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1882 | Format: { 0 | 1 } | 1882 | Format: { 0 | 1 } |
1883 | See arch/parisc/kernel/pdc_chassis.c | 1883 | See arch/parisc/kernel/pdc_chassis.c |
1884 | 1884 | ||
1885 | percpu_alloc= [X86] Select which percpu first chunk allocator to use. | ||
1886 | Allowed values are one of "lpage", "embed" and "4k". | ||
1887 | See comments in arch/x86/kernel/setup_percpu.c for | ||
1888 | details on each allocator. This parameter is primarily | ||
1889 | for debugging and performance comparison. | ||
1890 | |||
1885 | pf. [PARIDE] | 1891 | pf. [PARIDE] |
1886 | See Documentation/blockdev/paride.txt. | 1892 | See Documentation/blockdev/paride.txt. |
1887 | 1893 | ||
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index bad2fd223114..165ebd5ba83b 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -156,20 +156,23 @@ static struct page * __init pcpul_get_page(unsigned int cpu, int pageno) | |||
156 | return virt_to_page(pcpul_map[cpu].ptr + off); | 156 | return virt_to_page(pcpul_map[cpu].ptr + off); |
157 | } | 157 | } |
158 | 158 | ||
159 | static ssize_t __init setup_pcpu_lpage(size_t static_size) | 159 | static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) |
160 | { | 160 | { |
161 | size_t map_size, dyn_size; | 161 | size_t map_size, dyn_size; |
162 | unsigned int cpu; | 162 | unsigned int cpu; |
163 | int i, j; | 163 | int i, j; |
164 | ssize_t ret; | 164 | ssize_t ret; |
165 | 165 | ||
166 | /* | 166 | /* on non-NUMA, embedding is better */ |
167 | * If large page isn't supported, there's no benefit in doing | 167 | if (!chosen && !pcpu_need_numa()) |
168 | * this. Also, on non-NUMA, embedding is better. | ||
169 | */ | ||
170 | if (!cpu_has_pse || !pcpu_need_numa()) | ||
171 | return -EINVAL; | 168 | return -EINVAL; |
172 | 169 | ||
170 | /* need PSE */ | ||
171 | if (!cpu_has_pse) { | ||
172 | pr_warning("PERCPU: lpage allocator requires PSE\n"); | ||
173 | return -EINVAL; | ||
174 | } | ||
175 | |||
173 | /* | 176 | /* |
174 | * Currently supports only single page. Supporting multiple | 177 | * Currently supports only single page. Supporting multiple |
175 | * pages won't be too difficult if it ever becomes necessary. | 178 | * pages won't be too difficult if it ever becomes necessary. |
@@ -191,8 +194,11 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size) | |||
191 | pcpul_map[cpu].cpu = cpu; | 194 | pcpul_map[cpu].cpu = cpu; |
192 | pcpul_map[cpu].ptr = pcpu_alloc_bootmem(cpu, PMD_SIZE, | 195 | pcpul_map[cpu].ptr = pcpu_alloc_bootmem(cpu, PMD_SIZE, |
193 | PMD_SIZE); | 196 | PMD_SIZE); |
194 | if (!pcpul_map[cpu].ptr) | 197 | if (!pcpul_map[cpu].ptr) { |
198 | pr_warning("PERCPU: failed to allocate large page " | ||
199 | "for cpu%u\n", cpu); | ||
195 | goto enomem; | 200 | goto enomem; |
201 | } | ||
196 | 202 | ||
197 | /* | 203 | /* |
198 | * Only use pcpul_size bytes and give back the rest. | 204 | * Only use pcpul_size bytes and give back the rest. |
@@ -297,7 +303,7 @@ void *pcpu_lpage_remapped(void *kaddr) | |||
297 | return NULL; | 303 | return NULL; |
298 | } | 304 | } |
299 | #else | 305 | #else |
300 | static ssize_t __init setup_pcpu_lpage(size_t static_size) | 306 | static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) |
301 | { | 307 | { |
302 | return -EINVAL; | 308 | return -EINVAL; |
303 | } | 309 | } |
@@ -311,7 +317,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size) | |||
311 | * mapping so that it can use PMD mapping without additional TLB | 317 | * mapping so that it can use PMD mapping without additional TLB |
312 | * pressure. | 318 | * pressure. |
313 | */ | 319 | */ |
314 | static ssize_t __init setup_pcpu_embed(size_t static_size) | 320 | static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen) |
315 | { | 321 | { |
316 | size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; | 322 | size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; |
317 | 323 | ||
@@ -320,7 +326,7 @@ static ssize_t __init setup_pcpu_embed(size_t static_size) | |||
320 | * this. Also, embedding allocation doesn't play well with | 326 | * this. Also, embedding allocation doesn't play well with |
321 | * NUMA. | 327 | * NUMA. |
322 | */ | 328 | */ |
323 | if (!cpu_has_pse || pcpu_need_numa()) | 329 | if (!chosen && (!cpu_has_pse || pcpu_need_numa())) |
324 | return -EINVAL; | 330 | return -EINVAL; |
325 | 331 | ||
326 | return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, | 332 | return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, |
@@ -370,8 +376,11 @@ static ssize_t __init setup_pcpu_4k(size_t static_size) | |||
370 | void *ptr; | 376 | void *ptr; |
371 | 377 | ||
372 | ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE); | 378 | ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE); |
373 | if (!ptr) | 379 | if (!ptr) { |
380 | pr_warning("PERCPU: failed to allocate " | ||
381 | "4k page for cpu%u\n", cpu); | ||
374 | goto enomem; | 382 | goto enomem; |
383 | } | ||
375 | 384 | ||
376 | memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE); | 385 | memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE); |
377 | pcpu4k_pages[j++] = virt_to_page(ptr); | 386 | pcpu4k_pages[j++] = virt_to_page(ptr); |
@@ -395,6 +404,16 @@ out_free_ar: | |||
395 | return ret; | 404 | return ret; |
396 | } | 405 | } |
397 | 406 | ||
407 | /* for explicit first chunk allocator selection */ | ||
408 | static char pcpu_chosen_alloc[16] __initdata; | ||
409 | |||
410 | static int __init percpu_alloc_setup(char *str) | ||
411 | { | ||
412 | strncpy(pcpu_chosen_alloc, str, sizeof(pcpu_chosen_alloc) - 1); | ||
413 | return 0; | ||
414 | } | ||
415 | early_param("percpu_alloc", percpu_alloc_setup); | ||
416 | |||
398 | static inline void setup_percpu_segment(int cpu) | 417 | static inline void setup_percpu_segment(int cpu) |
399 | { | 418 | { |
400 | #ifdef CONFIG_X86_32 | 419 | #ifdef CONFIG_X86_32 |
@@ -408,11 +427,6 @@ static inline void setup_percpu_segment(int cpu) | |||
408 | #endif | 427 | #endif |
409 | } | 428 | } |
410 | 429 | ||
411 | /* | ||
412 | * Great future plan: | ||
413 | * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. | ||
414 | * Always point %gs to its beginning | ||
415 | */ | ||
416 | void __init setup_per_cpu_areas(void) | 430 | void __init setup_per_cpu_areas(void) |
417 | { | 431 | { |
418 | size_t static_size = __per_cpu_end - __per_cpu_start; | 432 | size_t static_size = __per_cpu_end - __per_cpu_start; |
@@ -429,9 +443,26 @@ void __init setup_per_cpu_areas(void) | |||
429 | * of large page mappings. Please read comments on top of | 443 | * of large page mappings. Please read comments on top of |
430 | * each allocator for details. | 444 | * each allocator for details. |
431 | */ | 445 | */ |
432 | ret = setup_pcpu_lpage(static_size); | 446 | ret = -EINVAL; |
433 | if (ret < 0) | 447 | if (strlen(pcpu_chosen_alloc)) { |
434 | ret = setup_pcpu_embed(static_size); | 448 | if (strcmp(pcpu_chosen_alloc, "4k")) { |
449 | if (!strcmp(pcpu_chosen_alloc, "lpage")) | ||
450 | ret = setup_pcpu_lpage(static_size, true); | ||
451 | else if (!strcmp(pcpu_chosen_alloc, "embed")) | ||
452 | ret = setup_pcpu_embed(static_size, true); | ||
453 | else | ||
454 | pr_warning("PERCPU: unknown allocator %s " | ||
455 | "specified\n", pcpu_chosen_alloc); | ||
456 | if (ret < 0) | ||
457 | pr_warning("PERCPU: %s allocator failed (%zd), " | ||
458 | "falling back to 4k\n", | ||
459 | pcpu_chosen_alloc, ret); | ||
460 | } | ||
461 | } else { | ||
462 | ret = setup_pcpu_lpage(static_size, false); | ||
463 | if (ret < 0) | ||
464 | ret = setup_pcpu_embed(static_size, false); | ||
465 | } | ||
435 | if (ret < 0) | 466 | if (ret < 0) |
436 | ret = setup_pcpu_4k(static_size); | 467 | ret = setup_pcpu_4k(static_size); |
437 | if (ret < 0) | 468 | if (ret < 0) |
diff --git a/mm/percpu.c b/mm/percpu.c index d06f4748271e..b70f2acd8853 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -1233,6 +1233,7 @@ static struct page * __init pcpue_get_page(unsigned int cpu, int pageno) | |||
1233 | ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, | 1233 | ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, |
1234 | ssize_t dyn_size, ssize_t unit_size) | 1234 | ssize_t dyn_size, ssize_t unit_size) |
1235 | { | 1235 | { |
1236 | size_t chunk_size; | ||
1236 | unsigned int cpu; | 1237 | unsigned int cpu; |
1237 | 1238 | ||
1238 | /* determine parameters and allocate */ | 1239 | /* determine parameters and allocate */ |
@@ -1247,11 +1248,15 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, | |||
1247 | } else | 1248 | } else |
1248 | pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); | 1249 | pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); |
1249 | 1250 | ||
1250 | pcpue_ptr = __alloc_bootmem_nopanic( | 1251 | chunk_size = pcpue_unit_size * num_possible_cpus(); |
1251 | num_possible_cpus() * pcpue_unit_size, | 1252 | |
1252 | PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); | 1253 | pcpue_ptr = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE, |
1253 | if (!pcpue_ptr) | 1254 | __pa(MAX_DMA_ADDRESS)); |
1255 | if (!pcpue_ptr) { | ||
1256 | pr_warning("PERCPU: failed to allocate %zu bytes for " | ||
1257 | "embedding\n", chunk_size); | ||
1254 | return -ENOMEM; | 1258 | return -ENOMEM; |
1259 | } | ||
1255 | 1260 | ||
1256 | /* return the leftover and copy */ | 1261 | /* return the leftover and copy */ |
1257 | for_each_possible_cpu(cpu) { | 1262 | for_each_possible_cpu(cpu) { |