aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYinghai Lu <yhlu.kernel@gmail.com>2008-03-18 15:52:37 -0400
committerIngo Molnar <mingo@elte.hu>2008-04-26 16:51:08 -0400
commit1a27fc0a42162964d758e9d36d2d1b49c082a67c (patch)
treeb94c4864edd0869d8ab7b25b9c3942a14bb6f9db
parent8b3cd09ed23049fcb02479c6286744b36324ac9d (diff)
x86_64: fix setup_node_bootmem to support big mem excluding with memmap
typical case: four sockets system, every node has 4g ram, and we are using: memmap=10g$4g to mask out memory on node1 and node2 when numa is enabled, early_node_mem is used to get node_data and node_bootmap. if it can not get memory from the same node with find_e820_area(), it will use alloc_bootmem to get buff from previous nodes. so check it and print out some info about it. need to move early_res_to_bootmem into every setup_node_bootmem. and it takes range that node has. otherwise alloc_bootmem could return addr that reserved early. depends on "mm: make reserve_bootmem can crossed the nodes". Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/kernel/e820_64.c13
-rw-r--r--arch/x86/kernel/setup_64.c3
-rw-r--r--arch/x86/mm/numa_64.c42
-rw-r--r--include/asm-x86/e820_64.h2
4 files changed, 47 insertions, 13 deletions
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index 79f0d52fa99a..645ee5e32a27 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -106,14 +106,19 @@ void __init free_early(unsigned long start, unsigned long end)
106 early_res[j - 1].end = 0; 106 early_res[j - 1].end = 0;
107} 107}
108 108
109void __init early_res_to_bootmem(void) 109void __init early_res_to_bootmem(unsigned long start, unsigned long end)
110{ 110{
111 int i; 111 int i;
112 unsigned long final_start, final_end;
112 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { 113 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
113 struct early_res *r = &early_res[i]; 114 struct early_res *r = &early_res[i];
114 printk(KERN_INFO "early res: %d [%lx-%lx] %s\n", i, 115 final_start = max(start, r->start);
115 r->start, r->end - 1, r->name); 116 final_end = min(end, r->end);
116 reserve_bootmem_generic(r->start, r->end - r->start); 117 if (final_start >= final_end)
118 continue;
119 printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i,
120 final_start, final_end - 1, r->name);
121 reserve_bootmem_generic(final_start, final_end - final_start);
117 } 122 }
118} 123}
119 124
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index b04e2c011e1a..60e64c8eee92 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -190,6 +190,7 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
190 bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); 190 bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
191 e820_register_active_regions(0, start_pfn, end_pfn); 191 e820_register_active_regions(0, start_pfn, end_pfn);
192 free_bootmem_with_active_regions(0, end_pfn); 192 free_bootmem_with_active_regions(0, end_pfn);
193 early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
193 reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); 194 reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
194} 195}
195#endif 196#endif
@@ -421,8 +422,6 @@ void __init setup_arch(char **cmdline_p)
421 contig_initmem_init(0, end_pfn); 422 contig_initmem_init(0, end_pfn);
422#endif 423#endif
423 424
424 early_res_to_bootmem();
425
426 dma32_reserve_bootmem(); 425 dma32_reserve_bootmem();
427 426
428#ifdef CONFIG_ACPI_SLEEP 427#ifdef CONFIG_ACPI_SLEEP
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 9a6892200b27..c5066d519e5d 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -196,6 +196,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
196 unsigned long bootmap_start, nodedata_phys; 196 unsigned long bootmap_start, nodedata_phys;
197 void *bootmap; 197 void *bootmap;
198 const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); 198 const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE);
199 int nid;
199 200
200 start = round_up(start, ZONE_ALIGN); 201 start = round_up(start, ZONE_ALIGN);
201 202
@@ -218,9 +219,19 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
218 NODE_DATA(nodeid)->node_start_pfn = start_pfn; 219 NODE_DATA(nodeid)->node_start_pfn = start_pfn;
219 NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; 220 NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
220 221
221 /* Find a place for the bootmem map */ 222 /*
223 * Find a place for the bootmem map
224 * nodedata_phys could be on other nodes by alloc_bootmem,
225 * so need to sure bootmap_start not to be small, otherwise
226 * early_node_mem will get that with find_e820_area instead
227 * of alloc_bootmem, that could clash with reserved range
228 */
222 bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); 229 bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
223 bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); 230 nid = phys_to_nid(nodedata_phys);
231 if (nid == nodeid)
232 bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
233 else
234 bootmap_start = round_up(start, PAGE_SIZE);
224 /* 235 /*
225 * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like 236 * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like
226 * to use that to align to PAGE_SIZE 237 * to use that to align to PAGE_SIZE
@@ -245,10 +256,29 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
245 256
246 free_bootmem_with_active_regions(nodeid, end); 257 free_bootmem_with_active_regions(nodeid, end);
247 258
248 reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size, 259 /*
249 BOOTMEM_DEFAULT); 260 * convert early reserve to bootmem reserve earlier
250 reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, 261 * otherwise early_node_mem could use early reserved mem
251 bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); 262 * on previous node
263 */
264 early_res_to_bootmem(start, end);
265
266 /*
267 * in some case early_node_mem could use alloc_bootmem
268 * to get range on other node, don't reserve that again
269 */
270 if (nid != nodeid)
271 printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid);
272 else
273 reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys,
274 pgdat_size, BOOTMEM_DEFAULT);
275 nid = phys_to_nid(bootmap_start);
276 if (nid != nodeid)
277 printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid);
278 else
279 reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
280 bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
281
252#ifdef CONFIG_ACPI_NUMA 282#ifdef CONFIG_ACPI_NUMA
253 srat_reserve_add_area(nodeid); 283 srat_reserve_add_area(nodeid);
254#endif 284#endif
diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h
index b5e02e379af3..71c4d685d30d 100644
--- a/include/asm-x86/e820_64.h
+++ b/include/asm-x86/e820_64.h
@@ -49,7 +49,7 @@ extern void update_e820(void);
49 49
50extern void reserve_early(unsigned long start, unsigned long end, char *name); 50extern void reserve_early(unsigned long start, unsigned long end, char *name);
51extern void free_early(unsigned long start, unsigned long end); 51extern void free_early(unsigned long start, unsigned long end);
52extern void early_res_to_bootmem(void); 52extern void early_res_to_bootmem(unsigned long start, unsigned long end);
53 53
54#endif/*!__ASSEMBLY__*/ 54#endif/*!__ASSEMBLY__*/
55 55