diff options
author | Yinghai Lu <yhlu.kernel@gmail.com> | 2008-03-18 15:52:37 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-04-26 16:51:08 -0400 |
commit | 1a27fc0a42162964d758e9d36d2d1b49c082a67c (patch) | |
tree | b94c4864edd0869d8ab7b25b9c3942a14bb6f9db /arch | |
parent | 8b3cd09ed23049fcb02479c6286744b36324ac9d (diff) |
x86_64: fix setup_node_bootmem to support big mem excluding with memmap
typical case: four sockets system, every node has 4g ram, and we are using:
memmap=10g$4g
to mask out memory on node1 and node2
when numa is enabled, early_node_mem is used to get node_data and node_bootmap.
if it can not get memory from the same node with find_e820_area(), it will
use alloc_bootmem to get buff from previous nodes.
so check it and print out some info about it.
need to move early_res_to_bootmem into every setup_node_bootmem.
and it takes range that node has. otherwise alloc_bootmem could return addr
that reserved early.
depends on "mm: make reserve_bootmem can crossed the nodes".
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/kernel/e820_64.c | 13 | ||||
-rw-r--r-- | arch/x86/kernel/setup_64.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 42 |
3 files changed, 46 insertions, 12 deletions
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 79f0d52fa99a..645ee5e32a27 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c | |||
@@ -106,14 +106,19 @@ void __init free_early(unsigned long start, unsigned long end) | |||
106 | early_res[j - 1].end = 0; | 106 | early_res[j - 1].end = 0; |
107 | } | 107 | } |
108 | 108 | ||
109 | void __init early_res_to_bootmem(void) | 109 | void __init early_res_to_bootmem(unsigned long start, unsigned long end) |
110 | { | 110 | { |
111 | int i; | 111 | int i; |
112 | unsigned long final_start, final_end; | ||
112 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | 113 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { |
113 | struct early_res *r = &early_res[i]; | 114 | struct early_res *r = &early_res[i]; |
114 | printk(KERN_INFO "early res: %d [%lx-%lx] %s\n", i, | 115 | final_start = max(start, r->start); |
115 | r->start, r->end - 1, r->name); | 116 | final_end = min(end, r->end); |
116 | reserve_bootmem_generic(r->start, r->end - r->start); | 117 | if (final_start >= final_end) |
118 | continue; | ||
119 | printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i, | ||
120 | final_start, final_end - 1, r->name); | ||
121 | reserve_bootmem_generic(final_start, final_end - final_start); | ||
117 | } | 122 | } |
118 | } | 123 | } |
119 | 124 | ||
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index b04e2c011e1a..60e64c8eee92 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c | |||
@@ -190,6 +190,7 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
190 | bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); | 190 | bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); |
191 | e820_register_active_regions(0, start_pfn, end_pfn); | 191 | e820_register_active_regions(0, start_pfn, end_pfn); |
192 | free_bootmem_with_active_regions(0, end_pfn); | 192 | free_bootmem_with_active_regions(0, end_pfn); |
193 | early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT); | ||
193 | reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); | 194 | reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); |
194 | } | 195 | } |
195 | #endif | 196 | #endif |
@@ -421,8 +422,6 @@ void __init setup_arch(char **cmdline_p) | |||
421 | contig_initmem_init(0, end_pfn); | 422 | contig_initmem_init(0, end_pfn); |
422 | #endif | 423 | #endif |
423 | 424 | ||
424 | early_res_to_bootmem(); | ||
425 | |||
426 | dma32_reserve_bootmem(); | 425 | dma32_reserve_bootmem(); |
427 | 426 | ||
428 | #ifdef CONFIG_ACPI_SLEEP | 427 | #ifdef CONFIG_ACPI_SLEEP |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 9a6892200b27..c5066d519e5d 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -196,6 +196,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
196 | unsigned long bootmap_start, nodedata_phys; | 196 | unsigned long bootmap_start, nodedata_phys; |
197 | void *bootmap; | 197 | void *bootmap; |
198 | const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); | 198 | const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); |
199 | int nid; | ||
199 | 200 | ||
200 | start = round_up(start, ZONE_ALIGN); | 201 | start = round_up(start, ZONE_ALIGN); |
201 | 202 | ||
@@ -218,9 +219,19 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
218 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; | 219 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; |
219 | NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; | 220 | NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; |
220 | 221 | ||
221 | /* Find a place for the bootmem map */ | 222 | /* |
223 | * Find a place for the bootmem map | ||
224 | * nodedata_phys could be on other nodes by alloc_bootmem, | ||
225 | * so need to sure bootmap_start not to be small, otherwise | ||
226 | * early_node_mem will get that with find_e820_area instead | ||
227 | * of alloc_bootmem, that could clash with reserved range | ||
228 | */ | ||
222 | bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); | 229 | bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); |
223 | bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); | 230 | nid = phys_to_nid(nodedata_phys); |
231 | if (nid == nodeid) | ||
232 | bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); | ||
233 | else | ||
234 | bootmap_start = round_up(start, PAGE_SIZE); | ||
224 | /* | 235 | /* |
225 | * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like | 236 | * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like |
226 | * to use that to align to PAGE_SIZE | 237 | * to use that to align to PAGE_SIZE |
@@ -245,10 +256,29 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
245 | 256 | ||
246 | free_bootmem_with_active_regions(nodeid, end); | 257 | free_bootmem_with_active_regions(nodeid, end); |
247 | 258 | ||
248 | reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size, | 259 | /* |
249 | BOOTMEM_DEFAULT); | 260 | * convert early reserve to bootmem reserve earlier |
250 | reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, | 261 | * otherwise early_node_mem could use early reserved mem |
251 | bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); | 262 | * on previous node |
263 | */ | ||
264 | early_res_to_bootmem(start, end); | ||
265 | |||
266 | /* | ||
267 | * in some case early_node_mem could use alloc_bootmem | ||
268 | * to get range on other node, don't reserve that again | ||
269 | */ | ||
270 | if (nid != nodeid) | ||
271 | printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid); | ||
272 | else | ||
273 | reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, | ||
274 | pgdat_size, BOOTMEM_DEFAULT); | ||
275 | nid = phys_to_nid(bootmap_start); | ||
276 | if (nid != nodeid) | ||
277 | printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid); | ||
278 | else | ||
279 | reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, | ||
280 | bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); | ||
281 | |||
252 | #ifdef CONFIG_ACPI_NUMA | 282 | #ifdef CONFIG_ACPI_NUMA |
253 | srat_reserve_add_area(nodeid); | 283 | srat_reserve_add_area(nodeid); |
254 | #endif | 284 | #endif |