diff options
author | Yinghai Lu <yhlu.kernel@gmail.com> | 2008-03-18 15:52:37 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-04-26 16:51:08 -0400 |
commit | 1a27fc0a42162964d758e9d36d2d1b49c082a67c (patch) | |
tree | b94c4864edd0869d8ab7b25b9c3942a14bb6f9db /arch/x86/mm/numa_64.c | |
parent | 8b3cd09ed23049fcb02479c6286744b36324ac9d (diff) |
x86_64: fix setup_node_bootmem to support big mem excluding with memmap
typical case: four sockets system, every node has 4g ram, and we are using:
memmap=10g$4g
to mask out memory on node1 and node2
when numa is enabled, early_node_mem is used to get node_data and node_bootmap.
if it can not get memory from the same node with find_e820_area(), it will
use alloc_bootmem to get buff from previous nodes.
so check it and print out some info about it.
need to move early_res_to_bootmem into every setup_node_bootmem.
and it takes range that node has. otherwise alloc_bootmem could return addr
that reserved early.
depends on "mm: make reserve_bootmem can crossed the nodes".
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/mm/numa_64.c')
-rw-r--r-- | arch/x86/mm/numa_64.c | 42 |
1 files changed, 36 insertions, 6 deletions
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 9a6892200b27..c5066d519e5d 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -196,6 +196,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
196 | unsigned long bootmap_start, nodedata_phys; | 196 | unsigned long bootmap_start, nodedata_phys; |
197 | void *bootmap; | 197 | void *bootmap; |
198 | const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); | 198 | const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); |
199 | int nid; | ||
199 | 200 | ||
200 | start = round_up(start, ZONE_ALIGN); | 201 | start = round_up(start, ZONE_ALIGN); |
201 | 202 | ||
@@ -218,9 +219,19 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
218 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; | 219 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; |
219 | NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; | 220 | NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; |
220 | 221 | ||
221 | /* Find a place for the bootmem map */ | 222 | /* |
223 | * Find a place for the bootmem map | ||
224 | * nodedata_phys could be on other nodes by alloc_bootmem, | ||
225 | * so need to sure bootmap_start not to be small, otherwise | ||
226 | * early_node_mem will get that with find_e820_area instead | ||
227 | * of alloc_bootmem, that could clash with reserved range | ||
228 | */ | ||
222 | bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); | 229 | bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); |
223 | bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); | 230 | nid = phys_to_nid(nodedata_phys); |
231 | if (nid == nodeid) | ||
232 | bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); | ||
233 | else | ||
234 | bootmap_start = round_up(start, PAGE_SIZE); | ||
224 | /* | 235 | /* |
225 | * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like | 236 | * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like |
226 | * to use that to align to PAGE_SIZE | 237 | * to use that to align to PAGE_SIZE |
@@ -245,10 +256,29 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
245 | 256 | ||
246 | free_bootmem_with_active_regions(nodeid, end); | 257 | free_bootmem_with_active_regions(nodeid, end); |
247 | 258 | ||
248 | reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size, | 259 | /* |
249 | BOOTMEM_DEFAULT); | 260 | * convert early reserve to bootmem reserve earlier |
250 | reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, | 261 | * otherwise early_node_mem could use early reserved mem |
251 | bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); | 262 | * on previous node |
263 | */ | ||
264 | early_res_to_bootmem(start, end); | ||
265 | |||
266 | /* | ||
267 | * in some case early_node_mem could use alloc_bootmem | ||
268 | * to get range on other node, don't reserve that again | ||
269 | */ | ||
270 | if (nid != nodeid) | ||
271 | printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid); | ||
272 | else | ||
273 | reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, | ||
274 | pgdat_size, BOOTMEM_DEFAULT); | ||
275 | nid = phys_to_nid(bootmap_start); | ||
276 | if (nid != nodeid) | ||
277 | printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid); | ||
278 | else | ||
279 | reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, | ||
280 | bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); | ||
281 | |||
252 | #ifdef CONFIG_ACPI_NUMA | 282 | #ifdef CONFIG_ACPI_NUMA |
253 | srat_reserve_add_area(nodeid); | 283 | srat_reserve_add_area(nodeid); |
254 | #endif | 284 | #endif |