aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2011-02-16 11:11:08 -0500
committerTejun Heo <tj@kernel.org>2011-02-16 11:11:08 -0500
commitfd0435d8fb1d4e5771f9ae3af71f2a77c1f4bd09 (patch)
tree633e662f21d47ab162d3651d7b1c527ae0f458fe /arch/x86
parent43a662f04f731c331706456c9852ef7146ba5d85 (diff)
x86-64, NUMA: Unify the rest of memblk registration
Move the remaining memblk registration logic from acpi_scan_nodes() to numa_register_memblks() and initmem_init(). This applies nodes_cover_memory() sanity check, memory node sorting and node_online() checking, which were only applied to acpi, to all init methods. As all memblk registration is moved to common code, active range clearing is moved to initmem_init() too and removed from bad_srat(). Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Yinghai Lu <yinghai@kernel.org> Cc: Brian Gerst <brgerst@gmail.com> Cc: Cyrill Gorcunov <gorcunov@gmail.com> Cc: Shaohui Zheng <shaohui.zheng@intel.com> Cc: David Rientjes <rientjes@google.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/mm/amdtopology_64.c7
-rw-r--r--arch/x86/mm/numa_64.c74
-rw-r--r--arch/x86/mm/srat_64.c61
3 files changed, 68 insertions, 74 deletions
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c
index d6d7aa4b98c6..9c9f46adf414 100644
--- a/arch/x86/mm/amdtopology_64.c
+++ b/arch/x86/mm/amdtopology_64.c
@@ -262,12 +262,5 @@ void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes)
262 262
263int __init amd_scan_nodes(void) 263int __init amd_scan_nodes(void)
264{ 264{
265 int i;
266
267 init_memory_mapping_high();
268 for_each_node_mask(i, node_possible_map)
269 setup_node_bootmem(i, numa_nodes[i].start, numa_nodes[i].end);
270
271 numa_init_array();
272 return 0; 265 return 0;
273} 266}
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 552080e8472b..748c6b5bff6d 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -287,6 +287,37 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
287 node_set_online(nodeid); 287 node_set_online(nodeid);
288} 288}
289 289
290/*
291 * Sanity check to catch more bad NUMA configurations (they are amazingly
292 * common). Make sure the nodes cover all memory.
293 */
294static int __init nodes_cover_memory(const struct bootnode *nodes)
295{
296 unsigned long numaram, e820ram;
297 int i;
298
299 numaram = 0;
300 for_each_node_mask(i, mem_nodes_parsed) {
301 unsigned long s = nodes[i].start >> PAGE_SHIFT;
302 unsigned long e = nodes[i].end >> PAGE_SHIFT;
303 numaram += e - s;
304 numaram -= __absent_pages_in_range(i, s, e);
305 if ((long)numaram < 0)
306 numaram = 0;
307 }
308
309 e820ram = max_pfn -
310 (memblock_x86_hole_size(0, max_pfn<<PAGE_SHIFT) >> PAGE_SHIFT);
311 /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
312 if ((long)(e820ram - numaram) >= (1<<(20 - PAGE_SHIFT))) {
313 printk(KERN_ERR "NUMA: nodes only cover %luMB of your %luMB e820 RAM. Not used.\n",
314 (numaram << PAGE_SHIFT) >> 20,
315 (e820ram << PAGE_SHIFT) >> 20);
316 return 0;
317 }
318 return 1;
319}
320
290static int __init numa_register_memblks(void) 321static int __init numa_register_memblks(void)
291{ 322{
292 int i; 323 int i;
@@ -349,6 +380,27 @@ static int __init numa_register_memblks(void)
349 memblock_x86_register_active_regions(memblk_nodeid[i], 380 memblock_x86_register_active_regions(memblk_nodeid[i],
350 node_memblk_range[i].start >> PAGE_SHIFT, 381 node_memblk_range[i].start >> PAGE_SHIFT,
351 node_memblk_range[i].end >> PAGE_SHIFT); 382 node_memblk_range[i].end >> PAGE_SHIFT);
383
384 /* for out of order entries */
385 sort_node_map();
386 if (!nodes_cover_memory(numa_nodes))
387 return -EINVAL;
388
389 init_memory_mapping_high();
390
391 /* Finally register nodes. */
392 for_each_node_mask(i, node_possible_map)
393 setup_node_bootmem(i, numa_nodes[i].start, numa_nodes[i].end);
394
395 /*
396 * Try again in case setup_node_bootmem missed one due to missing
397 * bootmem.
398 */
399 for_each_node_mask(i, node_possible_map)
400 if (!node_online(i))
401 setup_node_bootmem(i, numa_nodes[i].start,
402 numa_nodes[i].end);
403
352 return 0; 404 return 0;
353} 405}
354 406
@@ -714,16 +766,14 @@ static int dummy_numa_init(void)
714 node_set(0, cpu_nodes_parsed); 766 node_set(0, cpu_nodes_parsed);
715 node_set(0, mem_nodes_parsed); 767 node_set(0, mem_nodes_parsed);
716 numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT); 768 numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT);
769 numa_nodes[0].start = 0;
770 numa_nodes[0].end = (u64)max_pfn << PAGE_SHIFT;
717 771
718 return 0; 772 return 0;
719} 773}
720 774
721static int dummy_scan_nodes(void) 775static int dummy_scan_nodes(void)
722{ 776{
723 init_memory_mapping_high();
724 setup_node_bootmem(0, 0, max_pfn << PAGE_SHIFT);
725 numa_init_array();
726
727 return 0; 777 return 0;
728} 778}
729 779
@@ -759,6 +809,7 @@ void __init initmem_init(void)
759 memset(node_memblk_range, 0, sizeof(node_memblk_range)); 809 memset(node_memblk_range, 0, sizeof(node_memblk_range));
760 memset(memblk_nodeid, 0, sizeof(memblk_nodeid)); 810 memset(memblk_nodeid, 0, sizeof(memblk_nodeid));
761 memset(numa_nodes, 0, sizeof(numa_nodes)); 811 memset(numa_nodes, 0, sizeof(numa_nodes));
812 remove_all_active_ranges();
762 813
763 if (numa_init[i]() < 0) 814 if (numa_init[i]() < 0)
764 continue; 815 continue;
@@ -783,8 +834,19 @@ void __init initmem_init(void)
783 if (numa_register_memblks() < 0) 834 if (numa_register_memblks() < 0)
784 continue; 835 continue;
785 836
786 if (!scan_nodes[i]()) 837 if (scan_nodes[i]() < 0)
787 return; 838 continue;
839
840 for (j = 0; j < nr_cpu_ids; j++) {
841 int nid = early_cpu_to_node(j);
842
843 if (nid == NUMA_NO_NODE)
844 continue;
845 if (!node_online(nid))
846 numa_clear_node(j);
847 }
848 numa_init_array();
849 return;
788 } 850 }
789 BUG(); 851 BUG();
790} 852}
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 69f147116da7..4a2c33b0a48c 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -44,7 +44,6 @@ static __init void bad_srat(void)
44 numa_nodes[i].start = numa_nodes[i].end = 0; 44 numa_nodes[i].start = numa_nodes[i].end = 0;
45 nodes_add[i].start = nodes_add[i].end = 0; 45 nodes_add[i].start = nodes_add[i].end = 0;
46 } 46 }
47 remove_all_active_ranges();
48} 47}
49 48
50static __init inline int srat_disabled(void) 49static __init inline int srat_disabled(void)
@@ -259,35 +258,6 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
259 update_nodes_add(node, start, end); 258 update_nodes_add(node, start, end);
260} 259}
261 260
262/* Sanity check to catch more bad SRATs (they are amazingly common).
263 Make sure the PXMs cover all memory. */
264static int __init nodes_cover_memory(const struct bootnode *nodes)
265{
266 int i;
267 unsigned long pxmram, e820ram;
268
269 pxmram = 0;
270 for_each_node_mask(i, mem_nodes_parsed) {
271 unsigned long s = nodes[i].start >> PAGE_SHIFT;
272 unsigned long e = nodes[i].end >> PAGE_SHIFT;
273 pxmram += e - s;
274 pxmram -= __absent_pages_in_range(i, s, e);
275 if ((long)pxmram < 0)
276 pxmram = 0;
277 }
278
279 e820ram = max_pfn - (memblock_x86_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT);
280 /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
281 if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) {
282 printk(KERN_ERR
283 "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n",
284 (pxmram << PAGE_SHIFT) >> 20,
285 (e820ram << PAGE_SHIFT) >> 20);
286 return 0;
287 }
288 return 1;
289}
290
291void __init acpi_numa_arch_fixup(void) {} 261void __init acpi_numa_arch_fixup(void) {}
292 262
293int __init x86_acpi_numa_init(void) 263int __init x86_acpi_numa_init(void)
@@ -303,39 +273,8 @@ int __init x86_acpi_numa_init(void)
303/* Use the information discovered above to actually set up the nodes. */ 273/* Use the information discovered above to actually set up the nodes. */
304int __init acpi_scan_nodes(void) 274int __init acpi_scan_nodes(void)
305{ 275{
306 int i;
307
308 if (acpi_numa <= 0) 276 if (acpi_numa <= 0)
309 return -1; 277 return -1;
310
311 /* for out of order entries in SRAT */
312 sort_node_map();
313 if (!nodes_cover_memory(numa_nodes)) {
314 bad_srat();
315 return -1;
316 }
317
318 init_memory_mapping_high();
319
320 /* Finally register nodes */
321 for_each_node_mask(i, node_possible_map)
322 setup_node_bootmem(i, numa_nodes[i].start, numa_nodes[i].end);
323 /* Try again in case setup_node_bootmem missed one due
324 to missing bootmem */
325 for_each_node_mask(i, node_possible_map)
326 if (!node_online(i))
327 setup_node_bootmem(i, numa_nodes[i].start,
328 numa_nodes[i].end);
329
330 for (i = 0; i < nr_cpu_ids; i++) {
331 int node = early_cpu_to_node(i);
332
333 if (node == NUMA_NO_NODE)
334 continue;
335 if (!node_online(node))
336 numa_clear_node(i);
337 }
338 numa_init_array();
339 return 0; 278 return 0;
340} 279}
341 280