diff options
author | Yinghai Lu <yinghai@kernel.org> | 2010-02-10 04:20:22 -0500 |
---|---|---|
committer | H. Peter Anvin <hpa@zytor.com> | 2010-02-12 12:42:38 -0500 |
commit | 9bdac914240759457175ac0d6529a37d2820bc4d (patch) | |
tree | 8fb7d26a351d2cd526835f1494ebeb818e988abb /mm | |
parent | a4322e1bad91fbca27056fc38d2cbca3f1eae0cf (diff) |
sparsemem: Put mem map for one node together.
Add vmemmap_alloc_block_buf for mem map only.
It will fallback to the old way if it cannot get a block that big.
Before this patch, when a node have 128g ram installed, memmap are
split into two parts or more.
[ 0.000000] [ffffea0000000000-ffffea003fffffff] PMD -> [ffff880100600000-ffff88013e9fffff] on node 1
[ 0.000000] [ffffea0040000000-ffffea006fffffff] PMD -> [ffff88013ec00000-ffff88016ebfffff] on node 1
[ 0.000000] [ffffea0070000000-ffffea007fffffff] PMD -> [ffff882000600000-ffff8820105fffff] on node 0
[ 0.000000] [ffffea0080000000-ffffea00bfffffff] PMD -> [ffff882010800000-ffff8820507fffff] on node 0
[ 0.000000] [ffffea00c0000000-ffffea00dfffffff] PMD -> [ffff882050a00000-ffff8820709fffff] on node 0
[ 0.000000] [ffffea00e0000000-ffffea00ffffffff] PMD -> [ffff884000600000-ffff8840205fffff] on node 2
[ 0.000000] [ffffea0100000000-ffffea013fffffff] PMD -> [ffff884020800000-ffff8840607fffff] on node 2
[ 0.000000] [ffffea0140000000-ffffea014fffffff] PMD -> [ffff884060a00000-ffff8840709fffff] on node 2
[ 0.000000] [ffffea0150000000-ffffea017fffffff] PMD -> [ffff886000600000-ffff8860305fffff] on node 3
[ 0.000000] [ffffea0180000000-ffffea01bfffffff] PMD -> [ffff886030800000-ffff8860707fffff] on node 3
[ 0.000000] [ffffea01c0000000-ffffea01ffffffff] PMD -> [ffff888000600000-ffff8880405fffff] on node 4
[ 0.000000] [ffffea0200000000-ffffea022fffffff] PMD -> [ffff888040800000-ffff8880707fffff] on node 4
[ 0.000000] [ffffea0230000000-ffffea023fffffff] PMD -> [ffff88a000600000-ffff88a0105fffff] on node 5
[ 0.000000] [ffffea0240000000-ffffea027fffffff] PMD -> [ffff88a010800000-ffff88a0507fffff] on node 5
[ 0.000000] [ffffea0280000000-ffffea029fffffff] PMD -> [ffff88a050a00000-ffff88a0709fffff] on node 5
[ 0.000000] [ffffea02a0000000-ffffea02bfffffff] PMD -> [ffff88c000600000-ffff88c0205fffff] on node 6
[ 0.000000] [ffffea02c0000000-ffffea02ffffffff] PMD -> [ffff88c020800000-ffff88c0607fffff] on node 6
[ 0.000000] [ffffea0300000000-ffffea030fffffff] PMD -> [ffff88c060a00000-ffff88c0709fffff] on node 6
[ 0.000000] [ffffea0310000000-ffffea033fffffff] PMD -> [ffff88e000600000-ffff88e0305fffff] on node 7
[ 0.000000] [ffffea0340000000-ffffea037fffffff] PMD -> [ffff88e030800000-ffff88e0707fffff] on node 7
after patch will get
[ 0.000000] [ffffea0000000000-ffffea006fffffff] PMD -> [ffff880100200000-ffff88016e5fffff] on node 0
[ 0.000000] [ffffea0070000000-ffffea00dfffffff] PMD -> [ffff882000200000-ffff8820701fffff] on node 1
[ 0.000000] [ffffea00e0000000-ffffea014fffffff] PMD -> [ffff884000200000-ffff8840701fffff] on node 2
[ 0.000000] [ffffea0150000000-ffffea01bfffffff] PMD -> [ffff886000200000-ffff8860701fffff] on node 3
[ 0.000000] [ffffea01c0000000-ffffea022fffffff] PMD -> [ffff888000200000-ffff8880701fffff] on node 4
[ 0.000000] [ffffea0230000000-ffffea029fffffff] PMD -> [ffff88a000200000-ffff88a0701fffff] on node 5
[ 0.000000] [ffffea02a0000000-ffffea030fffffff] PMD -> [ffff88c000200000-ffff88c0701fffff] on node 6
[ 0.000000] [ffffea0310000000-ffffea037fffffff] PMD -> [ffff88e000200000-ffff88e0701fffff] on node 7
-v2: change buf to vmemmap_buf instead according to Ingo
also add CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER according to Ingo
-v3: according to Andrew, use sizeof(name) instead of hard coded 15
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <1265793639-15071-19-git-send-email-yinghai@kernel.org>
Cc: Christoph Lameter <cl@linux-foundation.org>
Acked-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 4 | ||||
-rw-r--r-- | mm/sparse-vmemmap.c | 74 | ||||
-rw-r--r-- | mm/sparse.c | 111 |
3 files changed, 187 insertions, 2 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 17b8947aa7da..e4a33b9479b2 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -115,6 +115,10 @@ config SPARSEMEM_EXTREME | |||
115 | config SPARSEMEM_VMEMMAP_ENABLE | 115 | config SPARSEMEM_VMEMMAP_ENABLE |
116 | bool | 116 | bool |
117 | 117 | ||
118 | config SPARSEMEM_ALLOC_MEM_MAP_TOGETHER | ||
119 | def_bool y | ||
120 | depends on SPARSEMEM && X86_64 | ||
121 | |||
118 | config SPARSEMEM_VMEMMAP | 122 | config SPARSEMEM_VMEMMAP |
119 | bool "Sparse Memory virtual memmap" | 123 | bool "Sparse Memory virtual memmap" |
120 | depends on SPARSEMEM && SPARSEMEM_VMEMMAP_ENABLE | 124 | depends on SPARSEMEM && SPARSEMEM_VMEMMAP_ENABLE |
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 9506c39942f6..392b9bb5bc01 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c | |||
@@ -43,6 +43,8 @@ static void * __init_refok __earlyonly_bootmem_alloc(int node, | |||
43 | return __alloc_bootmem_node_high(NODE_DATA(node), size, align, goal); | 43 | return __alloc_bootmem_node_high(NODE_DATA(node), size, align, goal); |
44 | } | 44 | } |
45 | 45 | ||
46 | static void *vmemmap_buf; | ||
47 | static void *vmemmap_buf_end; | ||
46 | 48 | ||
47 | void * __meminit vmemmap_alloc_block(unsigned long size, int node) | 49 | void * __meminit vmemmap_alloc_block(unsigned long size, int node) |
48 | { | 50 | { |
@@ -64,6 +66,24 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node) | |||
64 | __pa(MAX_DMA_ADDRESS)); | 66 | __pa(MAX_DMA_ADDRESS)); |
65 | } | 67 | } |
66 | 68 | ||
69 | /* need to make sure size is all the same during early stage */ | ||
70 | void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node) | ||
71 | { | ||
72 | void *ptr; | ||
73 | |||
74 | if (!vmemmap_buf) | ||
75 | return vmemmap_alloc_block(size, node); | ||
76 | |||
77 | /* take the from buf */ | ||
78 | ptr = (void *)ALIGN((unsigned long)vmemmap_buf, size); | ||
79 | if (ptr + size > vmemmap_buf_end) | ||
80 | return vmemmap_alloc_block(size, node); | ||
81 | |||
82 | vmemmap_buf = ptr + size; | ||
83 | |||
84 | return ptr; | ||
85 | } | ||
86 | |||
67 | void __meminit vmemmap_verify(pte_t *pte, int node, | 87 | void __meminit vmemmap_verify(pte_t *pte, int node, |
68 | unsigned long start, unsigned long end) | 88 | unsigned long start, unsigned long end) |
69 | { | 89 | { |
@@ -80,7 +100,7 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node) | |||
80 | pte_t *pte = pte_offset_kernel(pmd, addr); | 100 | pte_t *pte = pte_offset_kernel(pmd, addr); |
81 | if (pte_none(*pte)) { | 101 | if (pte_none(*pte)) { |
82 | pte_t entry; | 102 | pte_t entry; |
83 | void *p = vmemmap_alloc_block(PAGE_SIZE, node); | 103 | void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node); |
84 | if (!p) | 104 | if (!p) |
85 | return NULL; | 105 | return NULL; |
86 | entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); | 106 | entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); |
@@ -163,3 +183,55 @@ struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid) | |||
163 | 183 | ||
164 | return map; | 184 | return map; |
165 | } | 185 | } |
186 | |||
187 | void __init sparse_mem_maps_populate_node(struct page **map_map, | ||
188 | unsigned long pnum_begin, | ||
189 | unsigned long pnum_end, | ||
190 | unsigned long map_count, int nodeid) | ||
191 | { | ||
192 | unsigned long pnum; | ||
193 | unsigned long size = sizeof(struct page) * PAGES_PER_SECTION; | ||
194 | void *vmemmap_buf_start; | ||
195 | |||
196 | size = ALIGN(size, PMD_SIZE); | ||
197 | vmemmap_buf_start = __earlyonly_bootmem_alloc(nodeid, size * map_count, | ||
198 | PMD_SIZE, __pa(MAX_DMA_ADDRESS)); | ||
199 | |||
200 | if (vmemmap_buf_start) { | ||
201 | vmemmap_buf = vmemmap_buf_start; | ||
202 | vmemmap_buf_end = vmemmap_buf_start + size * map_count; | ||
203 | } | ||
204 | |||
205 | for (pnum = pnum_begin; pnum < pnum_end; pnum++) { | ||
206 | struct mem_section *ms; | ||
207 | |||
208 | if (!present_section_nr(pnum)) | ||
209 | continue; | ||
210 | |||
211 | map_map[pnum] = sparse_mem_map_populate(pnum, nodeid); | ||
212 | if (map_map[pnum]) | ||
213 | continue; | ||
214 | ms = __nr_to_section(pnum); | ||
215 | printk(KERN_ERR "%s: sparsemem memory map backing failed " | ||
216 | "some memory will not be available.\n", __func__); | ||
217 | ms->section_mem_map = 0; | ||
218 | } | ||
219 | |||
220 | if (vmemmap_buf_start) { | ||
221 | /* need to free left buf */ | ||
222 | #ifdef CONFIG_NO_BOOTMEM | ||
223 | free_early(__pa(vmemmap_buf_start), __pa(vmemmap_buf_end)); | ||
224 | if (vmemmap_buf_start < vmemmap_buf) { | ||
225 | char name[15]; | ||
226 | |||
227 | snprintf(name, sizeof(name), "MEMMAP %d", nodeid); | ||
228 | reserve_early_without_check(__pa(vmemmap_buf_start), | ||
229 | __pa(vmemmap_buf), name); | ||
230 | } | ||
231 | #else | ||
232 | free_bootmem(__pa(vmemmap_buf), vmemmap_buf_end - vmemmap_buf); | ||
233 | #endif | ||
234 | vmemmap_buf = NULL; | ||
235 | vmemmap_buf_end = NULL; | ||
236 | } | ||
237 | } | ||
diff --git a/mm/sparse.c b/mm/sparse.c index 0cdaf0b58457..9b6b93a4d78d 100644 --- a/mm/sparse.c +++ b/mm/sparse.c | |||
@@ -390,8 +390,65 @@ struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid) | |||
390 | PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION)); | 390 | PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION)); |
391 | return map; | 391 | return map; |
392 | } | 392 | } |
393 | void __init sparse_mem_maps_populate_node(struct page **map_map, | ||
394 | unsigned long pnum_begin, | ||
395 | unsigned long pnum_end, | ||
396 | unsigned long map_count, int nodeid) | ||
397 | { | ||
398 | void *map; | ||
399 | unsigned long pnum; | ||
400 | unsigned long size = sizeof(struct page) * PAGES_PER_SECTION; | ||
401 | |||
402 | map = alloc_remap(nodeid, size * map_count); | ||
403 | if (map) { | ||
404 | for (pnum = pnum_begin; pnum < pnum_end; pnum++) { | ||
405 | if (!present_section_nr(pnum)) | ||
406 | continue; | ||
407 | map_map[pnum] = map; | ||
408 | map += size; | ||
409 | } | ||
410 | return; | ||
411 | } | ||
412 | |||
413 | size = PAGE_ALIGN(size); | ||
414 | map = alloc_bootmem_pages_node(NODE_DATA(nodeid), size * map_count); | ||
415 | if (map) { | ||
416 | for (pnum = pnum_begin; pnum < pnum_end; pnum++) { | ||
417 | if (!present_section_nr(pnum)) | ||
418 | continue; | ||
419 | map_map[pnum] = map; | ||
420 | map += size; | ||
421 | } | ||
422 | return; | ||
423 | } | ||
424 | |||
425 | /* fallback */ | ||
426 | for (pnum = pnum_begin; pnum < pnum_end; pnum++) { | ||
427 | struct mem_section *ms; | ||
428 | |||
429 | if (!present_section_nr(pnum)) | ||
430 | continue; | ||
431 | map_map[pnum] = sparse_mem_map_populate(pnum, nodeid); | ||
432 | if (map_map[pnum]) | ||
433 | continue; | ||
434 | ms = __nr_to_section(pnum); | ||
435 | printk(KERN_ERR "%s: sparsemem memory map backing failed " | ||
436 | "some memory will not be available.\n", __func__); | ||
437 | ms->section_mem_map = 0; | ||
438 | } | ||
439 | } | ||
393 | #endif /* !CONFIG_SPARSEMEM_VMEMMAP */ | 440 | #endif /* !CONFIG_SPARSEMEM_VMEMMAP */ |
394 | 441 | ||
442 | static void __init sparse_early_mem_maps_alloc_node(struct page **map_map, | ||
443 | unsigned long pnum_begin, | ||
444 | unsigned long pnum_end, | ||
445 | unsigned long map_count, int nodeid) | ||
446 | { | ||
447 | sparse_mem_maps_populate_node(map_map, pnum_begin, pnum_end, | ||
448 | map_count, nodeid); | ||
449 | } | ||
450 | |||
451 | #ifndef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER | ||
395 | static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) | 452 | static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) |
396 | { | 453 | { |
397 | struct page *map; | 454 | struct page *map; |
@@ -407,6 +464,7 @@ static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) | |||
407 | ms->section_mem_map = 0; | 464 | ms->section_mem_map = 0; |
408 | return NULL; | 465 | return NULL; |
409 | } | 466 | } |
467 | #endif | ||
410 | 468 | ||
411 | void __attribute__((weak)) __meminit vmemmap_populate_print_last(void) | 469 | void __attribute__((weak)) __meminit vmemmap_populate_print_last(void) |
412 | { | 470 | { |
@@ -420,12 +478,14 @@ void __init sparse_init(void) | |||
420 | { | 478 | { |
421 | unsigned long pnum; | 479 | unsigned long pnum; |
422 | struct page *map; | 480 | struct page *map; |
481 | struct page **map_map; | ||
423 | unsigned long *usemap; | 482 | unsigned long *usemap; |
424 | unsigned long **usemap_map; | 483 | unsigned long **usemap_map; |
425 | int size; | 484 | int size, size2; |
426 | int nodeid_begin = 0; | 485 | int nodeid_begin = 0; |
427 | unsigned long pnum_begin = 0; | 486 | unsigned long pnum_begin = 0; |
428 | unsigned long usemap_count; | 487 | unsigned long usemap_count; |
488 | unsigned long map_count; | ||
429 | 489 | ||
430 | /* | 490 | /* |
431 | * map is using big page (aka 2M in x86 64 bit) | 491 | * map is using big page (aka 2M in x86 64 bit) |
@@ -478,6 +538,48 @@ void __init sparse_init(void) | |||
478 | sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, NR_MEM_SECTIONS, | 538 | sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, NR_MEM_SECTIONS, |
479 | usemap_count, nodeid_begin); | 539 | usemap_count, nodeid_begin); |
480 | 540 | ||
541 | #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER | ||
542 | size2 = sizeof(struct page *) * NR_MEM_SECTIONS; | ||
543 | map_map = alloc_bootmem(size2); | ||
544 | if (!map_map) | ||
545 | panic("can not allocate map_map\n"); | ||
546 | |||
547 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { | ||
548 | struct mem_section *ms; | ||
549 | |||
550 | if (!present_section_nr(pnum)) | ||
551 | continue; | ||
552 | ms = __nr_to_section(pnum); | ||
553 | nodeid_begin = sparse_early_nid(ms); | ||
554 | pnum_begin = pnum; | ||
555 | break; | ||
556 | } | ||
557 | map_count = 1; | ||
558 | for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) { | ||
559 | struct mem_section *ms; | ||
560 | int nodeid; | ||
561 | |||
562 | if (!present_section_nr(pnum)) | ||
563 | continue; | ||
564 | ms = __nr_to_section(pnum); | ||
565 | nodeid = sparse_early_nid(ms); | ||
566 | if (nodeid == nodeid_begin) { | ||
567 | map_count++; | ||
568 | continue; | ||
569 | } | ||
570 | /* ok, we need to take cake of from pnum_begin to pnum - 1*/ | ||
571 | sparse_early_mem_maps_alloc_node(map_map, pnum_begin, pnum, | ||
572 | map_count, nodeid_begin); | ||
573 | /* new start, update count etc*/ | ||
574 | nodeid_begin = nodeid; | ||
575 | pnum_begin = pnum; | ||
576 | map_count = 1; | ||
577 | } | ||
578 | /* ok, last chunk */ | ||
579 | sparse_early_mem_maps_alloc_node(map_map, pnum_begin, NR_MEM_SECTIONS, | ||
580 | map_count, nodeid_begin); | ||
581 | #endif | ||
582 | |||
481 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { | 583 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { |
482 | if (!present_section_nr(pnum)) | 584 | if (!present_section_nr(pnum)) |
483 | continue; | 585 | continue; |
@@ -486,7 +588,11 @@ void __init sparse_init(void) | |||
486 | if (!usemap) | 588 | if (!usemap) |
487 | continue; | 589 | continue; |
488 | 590 | ||
591 | #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER | ||
592 | map = map_map[pnum]; | ||
593 | #else | ||
489 | map = sparse_early_mem_map_alloc(pnum); | 594 | map = sparse_early_mem_map_alloc(pnum); |
595 | #endif | ||
490 | if (!map) | 596 | if (!map) |
491 | continue; | 597 | continue; |
492 | 598 | ||
@@ -496,6 +602,9 @@ void __init sparse_init(void) | |||
496 | 602 | ||
497 | vmemmap_populate_print_last(); | 603 | vmemmap_populate_print_last(); |
498 | 604 | ||
605 | #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER | ||
606 | free_bootmem(__pa(map_map), size2); | ||
607 | #endif | ||
499 | free_bootmem(__pa(usemap_map), size); | 608 | free_bootmem(__pa(usemap_map), size); |
500 | } | 609 | } |
501 | 610 | ||