diff options
author | Tejun Heo <tj@kernel.org> | 2009-07-21 04:11:50 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2009-08-14 00:20:53 -0400 |
commit | 74d46d6b2d23d44d72c37df4c6a5d2e782f7b088 (patch) | |
tree | bcc468968e738394b9c4a44b8936a0e6037be163 | |
parent | d6647bdf98a0de19963de8d5d9698d469ed72097 (diff) |
percpu, sparc64: fix sparse possible cpu map handling
percpu code has been assuming num_possible_cpus() == nr_cpu_ids which
is incorrect if cpu_possible_map contains holes. This causes percpu
code to access beyond allocated memories and vmalloc areas. On a
sparc64 machine with cpus 0 and 2 (u60), this triggers the following
warning or fails boot.
WARNING: at /devel/tj/os/work/mm/vmalloc.c:106 vmap_page_range_noflush+0x1f0/0x240()
Modules linked in:
Call Trace:
[00000000004b17d0] vmap_page_range_noflush+0x1f0/0x240
[00000000004b1840] map_vm_area+0x20/0x60
[00000000004b1950] __vmalloc_area_node+0xd0/0x160
[0000000000593434] deflate_init+0x14/0xe0
[0000000000583b94] __crypto_alloc_tfm+0xd4/0x1e0
[00000000005844f0] crypto_alloc_base+0x50/0xa0
[000000000058b898] alg_test_comp+0x18/0x80
[000000000058dad4] alg_test+0x54/0x180
[000000000058af00] cryptomgr_test+0x40/0x60
[0000000000473098] kthread+0x58/0x80
[000000000042b590] kernel_thread+0x30/0x60
[0000000000472fd0] kthreadd+0xf0/0x160
---[ end trace 429b268a213317ba ]---
This patch fixes generic percpu functions and sparc64
setup_per_cpu_areas() so that they handle sparse cpu_possible_map
properly.
Please note that on x86, cpu_possible_map() doesn't contain holes and
thus num_possible_cpus() == nr_cpu_ids and this patch doesn't cause
any behavior difference.
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/sparc/kernel/smp_64.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/setup_percpu.c | 14 | ||||
-rw-r--r-- | mm/percpu.c | 33 |
3 files changed, 27 insertions, 24 deletions
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index fa44eaf8d897..3691907a43b4 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c | |||
@@ -1499,7 +1499,7 @@ void __init setup_per_cpu_areas(void) | |||
1499 | dyn_size = pcpur_size - static_size - PERCPU_MODULE_RESERVE; | 1499 | dyn_size = pcpur_size - static_size - PERCPU_MODULE_RESERVE; |
1500 | 1500 | ||
1501 | 1501 | ||
1502 | ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0])); | 1502 | ptrs_size = PFN_ALIGN(nr_cpu_ids * sizeof(pcpur_ptrs[0])); |
1503 | pcpur_ptrs = alloc_bootmem(ptrs_size); | 1503 | pcpur_ptrs = alloc_bootmem(ptrs_size); |
1504 | 1504 | ||
1505 | for_each_possible_cpu(cpu) { | 1505 | for_each_possible_cpu(cpu) { |
@@ -1514,7 +1514,7 @@ void __init setup_per_cpu_areas(void) | |||
1514 | 1514 | ||
1515 | /* allocate address and map */ | 1515 | /* allocate address and map */ |
1516 | vm.flags = VM_ALLOC; | 1516 | vm.flags = VM_ALLOC; |
1517 | vm.size = num_possible_cpus() * PCPU_CHUNK_SIZE; | 1517 | vm.size = nr_cpu_ids * PCPU_CHUNK_SIZE; |
1518 | vm_area_register_early(&vm, PCPU_CHUNK_SIZE); | 1518 | vm_area_register_early(&vm, PCPU_CHUNK_SIZE); |
1519 | 1519 | ||
1520 | for_each_possible_cpu(cpu) { | 1520 | for_each_possible_cpu(cpu) { |
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 29a3eef7cf4a..07d81916f212 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -165,7 +165,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) | |||
165 | 165 | ||
166 | if (!chosen) { | 166 | if (!chosen) { |
167 | size_t vm_size = VMALLOC_END - VMALLOC_START; | 167 | size_t vm_size = VMALLOC_END - VMALLOC_START; |
168 | size_t tot_size = num_possible_cpus() * PMD_SIZE; | 168 | size_t tot_size = nr_cpu_ids * PMD_SIZE; |
169 | 169 | ||
170 | /* on non-NUMA, embedding is better */ | 170 | /* on non-NUMA, embedding is better */ |
171 | if (!pcpu_need_numa()) | 171 | if (!pcpu_need_numa()) |
@@ -199,7 +199,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) | |||
199 | dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; | 199 | dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; |
200 | 200 | ||
201 | /* allocate pointer array and alloc large pages */ | 201 | /* allocate pointer array and alloc large pages */ |
202 | map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0])); | 202 | map_size = PFN_ALIGN(nr_cpu_ids * sizeof(pcpul_map[0])); |
203 | pcpul_map = alloc_bootmem(map_size); | 203 | pcpul_map = alloc_bootmem(map_size); |
204 | 204 | ||
205 | for_each_possible_cpu(cpu) { | 205 | for_each_possible_cpu(cpu) { |
@@ -228,7 +228,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) | |||
228 | 228 | ||
229 | /* allocate address and map */ | 229 | /* allocate address and map */ |
230 | pcpul_vm.flags = VM_ALLOC; | 230 | pcpul_vm.flags = VM_ALLOC; |
231 | pcpul_vm.size = num_possible_cpus() * PMD_SIZE; | 231 | pcpul_vm.size = nr_cpu_ids * PMD_SIZE; |
232 | vm_area_register_early(&pcpul_vm, PMD_SIZE); | 232 | vm_area_register_early(&pcpul_vm, PMD_SIZE); |
233 | 233 | ||
234 | for_each_possible_cpu(cpu) { | 234 | for_each_possible_cpu(cpu) { |
@@ -250,8 +250,8 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) | |||
250 | PMD_SIZE, pcpul_vm.addr, NULL); | 250 | PMD_SIZE, pcpul_vm.addr, NULL); |
251 | 251 | ||
252 | /* sort pcpul_map array for pcpu_lpage_remapped() */ | 252 | /* sort pcpul_map array for pcpu_lpage_remapped() */ |
253 | for (i = 0; i < num_possible_cpus() - 1; i++) | 253 | for (i = 0; i < nr_cpu_ids - 1; i++) |
254 | for (j = i + 1; j < num_possible_cpus(); j++) | 254 | for (j = i + 1; j < nr_cpu_ids; j++) |
255 | if (pcpul_map[i].ptr > pcpul_map[j].ptr) { | 255 | if (pcpul_map[i].ptr > pcpul_map[j].ptr) { |
256 | struct pcpul_ent tmp = pcpul_map[i]; | 256 | struct pcpul_ent tmp = pcpul_map[i]; |
257 | pcpul_map[i] = pcpul_map[j]; | 257 | pcpul_map[i] = pcpul_map[j]; |
@@ -288,7 +288,7 @@ void *pcpu_lpage_remapped(void *kaddr) | |||
288 | { | 288 | { |
289 | void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK); | 289 | void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK); |
290 | unsigned long offset = (unsigned long)kaddr & ~PMD_MASK; | 290 | unsigned long offset = (unsigned long)kaddr & ~PMD_MASK; |
291 | int left = 0, right = num_possible_cpus() - 1; | 291 | int left = 0, right = nr_cpu_ids - 1; |
292 | int pos; | 292 | int pos; |
293 | 293 | ||
294 | /* pcpul in use at all? */ | 294 | /* pcpul in use at all? */ |
@@ -377,7 +377,7 @@ static ssize_t __init setup_pcpu_4k(size_t static_size) | |||
377 | pcpu4k_nr_static_pages = PFN_UP(static_size); | 377 | pcpu4k_nr_static_pages = PFN_UP(static_size); |
378 | 378 | ||
379 | /* unaligned allocations can't be freed, round up to page size */ | 379 | /* unaligned allocations can't be freed, round up to page size */ |
380 | pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * num_possible_cpus() | 380 | pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * nr_cpu_ids |
381 | * sizeof(pcpu4k_pages[0])); | 381 | * sizeof(pcpu4k_pages[0])); |
382 | pcpu4k_pages = alloc_bootmem(pages_size); | 382 | pcpu4k_pages = alloc_bootmem(pages_size); |
383 | 383 | ||
diff --git a/mm/percpu.c b/mm/percpu.c index b70f2acd8853..e0be1146f617 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -8,12 +8,12 @@ | |||
8 | * | 8 | * |
9 | * This is percpu allocator which can handle both static and dynamic | 9 | * This is percpu allocator which can handle both static and dynamic |
10 | * areas. Percpu areas are allocated in chunks in vmalloc area. Each | 10 | * areas. Percpu areas are allocated in chunks in vmalloc area. Each |
11 | * chunk is consisted of num_possible_cpus() units and the first chunk | 11 | * chunk is consisted of nr_cpu_ids units and the first chunk is used |
12 | * is used for static percpu variables in the kernel image (special | 12 | * for static percpu variables in the kernel image (special boot time |
13 | * boot time alloc/init handling necessary as these areas need to be | 13 | * alloc/init handling necessary as these areas need to be brought up |
14 | * brought up before allocation services are running). Unit grows as | 14 | * before allocation services are running). Unit grows as necessary |
15 | * necessary and all units grow or shrink in unison. When a chunk is | 15 | * and all units grow or shrink in unison. When a chunk is filled up, |
16 | * filled up, another chunk is allocated. ie. in vmalloc area | 16 | * another chunk is allocated. ie. in vmalloc area |
17 | * | 17 | * |
18 | * c0 c1 c2 | 18 | * c0 c1 c2 |
19 | * ------------------- ------------------- ------------ | 19 | * ------------------- ------------------- ------------ |
@@ -558,7 +558,7 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme) | |||
558 | static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end, | 558 | static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end, |
559 | bool flush_tlb) | 559 | bool flush_tlb) |
560 | { | 560 | { |
561 | unsigned int last = num_possible_cpus() - 1; | 561 | unsigned int last = nr_cpu_ids - 1; |
562 | unsigned int cpu; | 562 | unsigned int cpu; |
563 | 563 | ||
564 | /* unmap must not be done on immutable chunk */ | 564 | /* unmap must not be done on immutable chunk */ |
@@ -643,7 +643,7 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size, | |||
643 | */ | 643 | */ |
644 | static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end) | 644 | static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end) |
645 | { | 645 | { |
646 | unsigned int last = num_possible_cpus() - 1; | 646 | unsigned int last = nr_cpu_ids - 1; |
647 | unsigned int cpu; | 647 | unsigned int cpu; |
648 | int err; | 648 | int err; |
649 | 649 | ||
@@ -1067,9 +1067,9 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, | |||
1067 | PFN_UP(size_sum)); | 1067 | PFN_UP(size_sum)); |
1068 | 1068 | ||
1069 | pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; | 1069 | pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; |
1070 | pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; | 1070 | pcpu_chunk_size = nr_cpu_ids * pcpu_unit_size; |
1071 | pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) | 1071 | pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) |
1072 | + num_possible_cpus() * pcpu_unit_pages * sizeof(struct page *); | 1072 | + nr_cpu_ids * pcpu_unit_pages * sizeof(struct page *); |
1073 | 1073 | ||
1074 | if (dyn_size < 0) | 1074 | if (dyn_size < 0) |
1075 | dyn_size = pcpu_unit_size - static_size - reserved_size; | 1075 | dyn_size = pcpu_unit_size - static_size - reserved_size; |
@@ -1248,7 +1248,7 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, | |||
1248 | } else | 1248 | } else |
1249 | pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); | 1249 | pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); |
1250 | 1250 | ||
1251 | chunk_size = pcpue_unit_size * num_possible_cpus(); | 1251 | chunk_size = pcpue_unit_size * nr_cpu_ids; |
1252 | 1252 | ||
1253 | pcpue_ptr = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE, | 1253 | pcpue_ptr = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE, |
1254 | __pa(MAX_DMA_ADDRESS)); | 1254 | __pa(MAX_DMA_ADDRESS)); |
@@ -1259,12 +1259,15 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, | |||
1259 | } | 1259 | } |
1260 | 1260 | ||
1261 | /* return the leftover and copy */ | 1261 | /* return the leftover and copy */ |
1262 | for_each_possible_cpu(cpu) { | 1262 | for (cpu = 0; cpu < nr_cpu_ids; cpu++) { |
1263 | void *ptr = pcpue_ptr + cpu * pcpue_unit_size; | 1263 | void *ptr = pcpue_ptr + cpu * pcpue_unit_size; |
1264 | 1264 | ||
1265 | free_bootmem(__pa(ptr + pcpue_size), | 1265 | if (cpu_possible(cpu)) { |
1266 | pcpue_unit_size - pcpue_size); | 1266 | free_bootmem(__pa(ptr + pcpue_size), |
1267 | memcpy(ptr, __per_cpu_load, static_size); | 1267 | pcpue_unit_size - pcpue_size); |
1268 | memcpy(ptr, __per_cpu_load, static_size); | ||
1269 | } else | ||
1270 | free_bootmem(__pa(ptr), pcpue_unit_size); | ||
1268 | } | 1271 | } |
1269 | 1272 | ||
1270 | /* we're ready, commit */ | 1273 | /* we're ready, commit */ |