From a4322e1bad91fbca27056fc38d2cbca3f1eae0cf Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 10 Feb 2010 01:20:21 -0800 Subject: sparsemem: Put usemap for one node together Could save some buffer space instead of applying one by one. Could help that system that is going to use early_res instead of bootmem less entries in early_res make search more faster on system with more memory. Signed-off-by: Yinghai Lu LKML-Reference: <1265793639-15071-18-git-send-email-yinghai@kernel.org> Signed-off-by: H. Peter Anvin --- mm/sparse.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 66 insertions(+), 18 deletions(-) (limited to 'mm/sparse.c') diff --git a/mm/sparse.c b/mm/sparse.c index 6ce4aab69e99..0cdaf0b58457 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -271,7 +271,8 @@ static unsigned long *__kmalloc_section_usemap(void) #ifdef CONFIG_MEMORY_HOTREMOVE static unsigned long * __init -sparse_early_usemap_alloc_pgdat_section(struct pglist_data *pgdat) +sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, + unsigned long count) { unsigned long section_nr; @@ -286,7 +287,7 @@ sparse_early_usemap_alloc_pgdat_section(struct pglist_data *pgdat) * this problem. */ section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); - return alloc_bootmem_section(usemap_size(), section_nr); + return alloc_bootmem_section(usemap_size() * count, section_nr); } static void __init check_usemap_section_nr(int nid, unsigned long *usemap) @@ -329,7 +330,8 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap) } #else static unsigned long * __init -sparse_early_usemap_alloc_pgdat_section(struct pglist_data *pgdat) +sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, + unsigned long count) { return NULL; } @@ -339,27 +341,40 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap) } #endif /* CONFIG_MEMORY_HOTREMOVE */ -static unsigned long *__init sparse_early_usemap_alloc(unsigned long pnum) +static void __init sparse_early_usemaps_alloc_node(unsigned long**usemap_map, + unsigned long pnum_begin, + unsigned long pnum_end, + unsigned long usemap_count, int nodeid) { - unsigned long *usemap; - struct mem_section *ms = __nr_to_section(pnum); - int nid = sparse_early_nid(ms); - - usemap = sparse_early_usemap_alloc_pgdat_section(NODE_DATA(nid)); - if (usemap) - return usemap; + void *usemap; + unsigned long pnum; + int size = usemap_size(); - usemap = alloc_bootmem_node(NODE_DATA(nid), usemap_size()); + usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid), + usemap_count); if (usemap) { - check_usemap_section_nr(nid, usemap); - return usemap; + for (pnum = pnum_begin; pnum < pnum_end; pnum++) { + if (!present_section_nr(pnum)) + continue; + usemap_map[pnum] = usemap; + usemap += size; + } + return; } - /* Stupid: suppress gcc warning for SPARSEMEM && !NUMA */ - nid = 0; + usemap = alloc_bootmem_node(NODE_DATA(nodeid), size * usemap_count); + if (usemap) { + for (pnum = pnum_begin; pnum < pnum_end; pnum++) { + if (!present_section_nr(pnum)) + continue; + usemap_map[pnum] = usemap; + usemap += size; + check_usemap_section_nr(nodeid, usemap_map[pnum]); + } + return; + } printk(KERN_WARNING "%s: allocation failed\n", __func__); - return NULL; } #ifndef CONFIG_SPARSEMEM_VMEMMAP @@ -396,6 +411,7 @@ static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) void __attribute__((weak)) __meminit vmemmap_populate_print_last(void) { } + /* * Allocate the accumulated non-linear sections, allocate a mem_map * for each and record the physical to section mapping. @@ -407,6 +423,9 @@ void __init sparse_init(void) unsigned long *usemap; unsigned long **usemap_map; int size; + int nodeid_begin = 0; + unsigned long pnum_begin = 0; + unsigned long usemap_count; /* * map is using big page (aka 2M in x86 64 bit) @@ -425,10 +444,39 @@ void __init sparse_init(void) panic("can not allocate usemap_map\n"); for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { + struct mem_section *ms; + if (!present_section_nr(pnum)) continue; - usemap_map[pnum] = sparse_early_usemap_alloc(pnum); + ms = __nr_to_section(pnum); + nodeid_begin = sparse_early_nid(ms); + pnum_begin = pnum; + break; + } + usemap_count = 1; + for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) { + struct mem_section *ms; + int nodeid; + + if (!present_section_nr(pnum)) + continue; + ms = __nr_to_section(pnum); + nodeid = sparse_early_nid(ms); + if (nodeid == nodeid_begin) { + usemap_count++; + continue; + } + /* ok, we need to take cake of from pnum_begin to pnum - 1*/ + sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, pnum, + usemap_count, nodeid_begin); + /* new start, update count etc*/ + nodeid_begin = nodeid; + pnum_begin = pnum; + usemap_count = 1; } + /* ok, last chunk */ + sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, NR_MEM_SECTIONS, + usemap_count, nodeid_begin); for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { if (!present_section_nr(pnum)) -- cgit v1.2.2 From 9bdac914240759457175ac0d6529a37d2820bc4d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 10 Feb 2010 01:20:22 -0800 Subject: sparsemem: Put mem map for one node together. Add vmemmap_alloc_block_buf for mem map only. It will fallback to the old way if it cannot get a block that big. Before this patch, when a node have 128g ram installed, memmap are split into two parts or more. [ 0.000000] [ffffea0000000000-ffffea003fffffff] PMD -> [ffff880100600000-ffff88013e9fffff] on node 1 [ 0.000000] [ffffea0040000000-ffffea006fffffff] PMD -> [ffff88013ec00000-ffff88016ebfffff] on node 1 [ 0.000000] [ffffea0070000000-ffffea007fffffff] PMD -> [ffff882000600000-ffff8820105fffff] on node 0 [ 0.000000] [ffffea0080000000-ffffea00bfffffff] PMD -> [ffff882010800000-ffff8820507fffff] on node 0 [ 0.000000] [ffffea00c0000000-ffffea00dfffffff] PMD -> [ffff882050a00000-ffff8820709fffff] on node 0 [ 0.000000] [ffffea00e0000000-ffffea00ffffffff] PMD -> [ffff884000600000-ffff8840205fffff] on node 2 [ 0.000000] [ffffea0100000000-ffffea013fffffff] PMD -> [ffff884020800000-ffff8840607fffff] on node 2 [ 0.000000] [ffffea0140000000-ffffea014fffffff] PMD -> [ffff884060a00000-ffff8840709fffff] on node 2 [ 0.000000] [ffffea0150000000-ffffea017fffffff] PMD -> [ffff886000600000-ffff8860305fffff] on node 3 [ 0.000000] [ffffea0180000000-ffffea01bfffffff] PMD -> [ffff886030800000-ffff8860707fffff] on node 3 [ 0.000000] [ffffea01c0000000-ffffea01ffffffff] PMD -> [ffff888000600000-ffff8880405fffff] on node 4 [ 0.000000] [ffffea0200000000-ffffea022fffffff] PMD -> [ffff888040800000-ffff8880707fffff] on node 4 [ 0.000000] [ffffea0230000000-ffffea023fffffff] PMD -> [ffff88a000600000-ffff88a0105fffff] on node 5 [ 0.000000] [ffffea0240000000-ffffea027fffffff] PMD -> [ffff88a010800000-ffff88a0507fffff] on node 5 [ 0.000000] [ffffea0280000000-ffffea029fffffff] PMD -> [ffff88a050a00000-ffff88a0709fffff] on node 5 [ 0.000000] [ffffea02a0000000-ffffea02bfffffff] PMD -> [ffff88c000600000-ffff88c0205fffff] on node 6 [ 0.000000] [ffffea02c0000000-ffffea02ffffffff] PMD -> [ffff88c020800000-ffff88c0607fffff] on node 6 [ 0.000000] [ffffea0300000000-ffffea030fffffff] PMD -> [ffff88c060a00000-ffff88c0709fffff] on node 6 [ 0.000000] [ffffea0310000000-ffffea033fffffff] PMD -> [ffff88e000600000-ffff88e0305fffff] on node 7 [ 0.000000] [ffffea0340000000-ffffea037fffffff] PMD -> [ffff88e030800000-ffff88e0707fffff] on node 7 after patch will get [ 0.000000] [ffffea0000000000-ffffea006fffffff] PMD -> [ffff880100200000-ffff88016e5fffff] on node 0 [ 0.000000] [ffffea0070000000-ffffea00dfffffff] PMD -> [ffff882000200000-ffff8820701fffff] on node 1 [ 0.000000] [ffffea00e0000000-ffffea014fffffff] PMD -> [ffff884000200000-ffff8840701fffff] on node 2 [ 0.000000] [ffffea0150000000-ffffea01bfffffff] PMD -> [ffff886000200000-ffff8860701fffff] on node 3 [ 0.000000] [ffffea01c0000000-ffffea022fffffff] PMD -> [ffff888000200000-ffff8880701fffff] on node 4 [ 0.000000] [ffffea0230000000-ffffea029fffffff] PMD -> [ffff88a000200000-ffff88a0701fffff] on node 5 [ 0.000000] [ffffea02a0000000-ffffea030fffffff] PMD -> [ffff88c000200000-ffff88c0701fffff] on node 6 [ 0.000000] [ffffea0310000000-ffffea037fffffff] PMD -> [ffff88e000200000-ffff88e0701fffff] on node 7 -v2: change buf to vmemmap_buf instead according to Ingo also add CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER according to Ingo -v3: according to Andrew, use sizeof(name) instead of hard coded 15 Signed-off-by: Yinghai Lu LKML-Reference: <1265793639-15071-19-git-send-email-yinghai@kernel.org> Cc: Christoph Lameter Acked-by: Christoph Lameter Signed-off-by: H. Peter Anvin --- mm/sparse.c | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 110 insertions(+), 1 deletion(-) (limited to 'mm/sparse.c') diff --git a/mm/sparse.c b/mm/sparse.c index 0cdaf0b58457..9b6b93a4d78d 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -390,8 +390,65 @@ struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid) PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION)); return map; } +void __init sparse_mem_maps_populate_node(struct page **map_map, + unsigned long pnum_begin, + unsigned long pnum_end, + unsigned long map_count, int nodeid) +{ + void *map; + unsigned long pnum; + unsigned long size = sizeof(struct page) * PAGES_PER_SECTION; + + map = alloc_remap(nodeid, size * map_count); + if (map) { + for (pnum = pnum_begin; pnum < pnum_end; pnum++) { + if (!present_section_nr(pnum)) + continue; + map_map[pnum] = map; + map += size; + } + return; + } + + size = PAGE_ALIGN(size); + map = alloc_bootmem_pages_node(NODE_DATA(nodeid), size * map_count); + if (map) { + for (pnum = pnum_begin; pnum < pnum_end; pnum++) { + if (!present_section_nr(pnum)) + continue; + map_map[pnum] = map; + map += size; + } + return; + } + + /* fallback */ + for (pnum = pnum_begin; pnum < pnum_end; pnum++) { + struct mem_section *ms; + + if (!present_section_nr(pnum)) + continue; + map_map[pnum] = sparse_mem_map_populate(pnum, nodeid); + if (map_map[pnum]) + continue; + ms = __nr_to_section(pnum); + printk(KERN_ERR "%s: sparsemem memory map backing failed " + "some memory will not be available.\n", __func__); + ms->section_mem_map = 0; + } +} #endif /* !CONFIG_SPARSEMEM_VMEMMAP */ +static void __init sparse_early_mem_maps_alloc_node(struct page **map_map, + unsigned long pnum_begin, + unsigned long pnum_end, + unsigned long map_count, int nodeid) +{ + sparse_mem_maps_populate_node(map_map, pnum_begin, pnum_end, + map_count, nodeid); +} + +#ifndef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) { struct page *map; @@ -407,6 +464,7 @@ static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) ms->section_mem_map = 0; return NULL; } +#endif void __attribute__((weak)) __meminit vmemmap_populate_print_last(void) { @@ -420,12 +478,14 @@ void __init sparse_init(void) { unsigned long pnum; struct page *map; + struct page **map_map; unsigned long *usemap; unsigned long **usemap_map; - int size; + int size, size2; int nodeid_begin = 0; unsigned long pnum_begin = 0; unsigned long usemap_count; + unsigned long map_count; /* * map is using big page (aka 2M in x86 64 bit) @@ -478,6 +538,48 @@ void __init sparse_init(void) sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, NR_MEM_SECTIONS, usemap_count, nodeid_begin); +#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER + size2 = sizeof(struct page *) * NR_MEM_SECTIONS; + map_map = alloc_bootmem(size2); + if (!map_map) + panic("can not allocate map_map\n"); + + for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { + struct mem_section *ms; + + if (!present_section_nr(pnum)) + continue; + ms = __nr_to_section(pnum); + nodeid_begin = sparse_early_nid(ms); + pnum_begin = pnum; + break; + } + map_count = 1; + for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) { + struct mem_section *ms; + int nodeid; + + if (!present_section_nr(pnum)) + continue; + ms = __nr_to_section(pnum); + nodeid = sparse_early_nid(ms); + if (nodeid == nodeid_begin) { + map_count++; + continue; + } + /* ok, we need to take cake of from pnum_begin to pnum - 1*/ + sparse_early_mem_maps_alloc_node(map_map, pnum_begin, pnum, + map_count, nodeid_begin); + /* new start, update count etc*/ + nodeid_begin = nodeid; + pnum_begin = pnum; + map_count = 1; + } + /* ok, last chunk */ + sparse_early_mem_maps_alloc_node(map_map, pnum_begin, NR_MEM_SECTIONS, + map_count, nodeid_begin); +#endif + for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { if (!present_section_nr(pnum)) continue; @@ -486,7 +588,11 @@ void __init sparse_init(void) if (!usemap) continue; +#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER + map = map_map[pnum]; +#else map = sparse_early_mem_map_alloc(pnum); +#endif if (!map) continue; @@ -496,6 +602,9 @@ void __init sparse_init(void) vmemmap_populate_print_last(); +#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER + free_bootmem(__pa(map_map), size2); +#endif free_bootmem(__pa(usemap_map), size); } -- cgit v1.2.2 From 81d0d950e5037a26b71e568ff235ff9e998f4ab3 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 27 Feb 2010 09:29:38 -0800 Subject: sparsemem: Fix compilation on PowerPC Stephen reported: build (powerpc ppc64_defconfig) produced these warnings: mm/sparse.c: In function 'sparse_init': mm/sparse.c:488: warning: unused variable 'map_count' mm/sparse.c:484: warning: unused variable 'size2' mm/sparse.c:481: warning: unused variable 'map_map' mm/sparse.c: At top level: mm/sparse.c:442: warning: 'sparse_early_mem_maps_alloc_node' defined but not used Introduced by commit 9bdac914240759457175ac0d6529a37d2820bc4d ("sparsemem: Put mem map for one node together"). Conditionalize the bits appropriately based on the setting of CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER. Reported-by: Stephen Rothwell Tested-by: Stephen Rothwell Signed-off-by: Yinghai Lu LKML-Reference: <4B895682.1080706@kernel.org> Signed-off-by: H. Peter Anvin --- mm/sparse.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'mm/sparse.c') diff --git a/mm/sparse.c b/mm/sparse.c index 9b6b93a4d78d..22896d589133 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -439,6 +439,7 @@ void __init sparse_mem_maps_populate_node(struct page **map_map, } #endif /* !CONFIG_SPARSEMEM_VMEMMAP */ +#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER static void __init sparse_early_mem_maps_alloc_node(struct page **map_map, unsigned long pnum_begin, unsigned long pnum_end, @@ -447,8 +448,7 @@ static void __init sparse_early_mem_maps_alloc_node(struct page **map_map, sparse_mem_maps_populate_node(map_map, pnum_begin, pnum_end, map_count, nodeid); } - -#ifndef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER +#else static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) { struct page *map; @@ -478,14 +478,17 @@ void __init sparse_init(void) { unsigned long pnum; struct page *map; - struct page **map_map; unsigned long *usemap; unsigned long **usemap_map; - int size, size2; + int size; int nodeid_begin = 0; unsigned long pnum_begin = 0; unsigned long usemap_count; +#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER unsigned long map_count; + int size2; + struct page **map_map; +#endif /* * map is using big page (aka 2M in x86 64 bit) -- cgit v1.2.2