diff options
author | Tejun Heo <tj@kernel.org> | 2011-02-16 11:11:08 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2011-02-16 11:11:08 -0500 |
commit | fd0435d8fb1d4e5771f9ae3af71f2a77c1f4bd09 (patch) | |
tree | 633e662f21d47ab162d3651d7b1c527ae0f458fe /arch/x86 | |
parent | 43a662f04f731c331706456c9852ef7146ba5d85 (diff) |
x86-64, NUMA: Unify the rest of memblk registration
Move the remaining memblk registration logic from acpi_scan_nodes() to
numa_register_memblks() and initmem_init().
This applies nodes_cover_memory() sanity check, memory node sorting
and node_online() checking, which were only applied to acpi, to all
init methods.
As all memblk registration is moved to common code, active range
clearing is moved to initmem_init() too and removed from bad_srat().
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Shaohui Zheng <shaohui.zheng@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/mm/amdtopology_64.c | 7 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 74 | ||||
-rw-r--r-- | arch/x86/mm/srat_64.c | 61 |
3 files changed, 68 insertions, 74 deletions
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c index d6d7aa4b98c6..9c9f46adf414 100644 --- a/arch/x86/mm/amdtopology_64.c +++ b/arch/x86/mm/amdtopology_64.c | |||
@@ -262,12 +262,5 @@ void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes) | |||
262 | 262 | ||
263 | int __init amd_scan_nodes(void) | 263 | int __init amd_scan_nodes(void) |
264 | { | 264 | { |
265 | int i; | ||
266 | |||
267 | init_memory_mapping_high(); | ||
268 | for_each_node_mask(i, node_possible_map) | ||
269 | setup_node_bootmem(i, numa_nodes[i].start, numa_nodes[i].end); | ||
270 | |||
271 | numa_init_array(); | ||
272 | return 0; | 265 | return 0; |
273 | } | 266 | } |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 552080e8472b..748c6b5bff6d 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -287,6 +287,37 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
287 | node_set_online(nodeid); | 287 | node_set_online(nodeid); |
288 | } | 288 | } |
289 | 289 | ||
290 | /* | ||
291 | * Sanity check to catch more bad NUMA configurations (they are amazingly | ||
292 | * common). Make sure the nodes cover all memory. | ||
293 | */ | ||
294 | static int __init nodes_cover_memory(const struct bootnode *nodes) | ||
295 | { | ||
296 | unsigned long numaram, e820ram; | ||
297 | int i; | ||
298 | |||
299 | numaram = 0; | ||
300 | for_each_node_mask(i, mem_nodes_parsed) { | ||
301 | unsigned long s = nodes[i].start >> PAGE_SHIFT; | ||
302 | unsigned long e = nodes[i].end >> PAGE_SHIFT; | ||
303 | numaram += e - s; | ||
304 | numaram -= __absent_pages_in_range(i, s, e); | ||
305 | if ((long)numaram < 0) | ||
306 | numaram = 0; | ||
307 | } | ||
308 | |||
309 | e820ram = max_pfn - | ||
310 | (memblock_x86_hole_size(0, max_pfn<<PAGE_SHIFT) >> PAGE_SHIFT); | ||
311 | /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ | ||
312 | if ((long)(e820ram - numaram) >= (1<<(20 - PAGE_SHIFT))) { | ||
313 | printk(KERN_ERR "NUMA: nodes only cover %luMB of your %luMB e820 RAM. Not used.\n", | ||
314 | (numaram << PAGE_SHIFT) >> 20, | ||
315 | (e820ram << PAGE_SHIFT) >> 20); | ||
316 | return 0; | ||
317 | } | ||
318 | return 1; | ||
319 | } | ||
320 | |||
290 | static int __init numa_register_memblks(void) | 321 | static int __init numa_register_memblks(void) |
291 | { | 322 | { |
292 | int i; | 323 | int i; |
@@ -349,6 +380,27 @@ static int __init numa_register_memblks(void) | |||
349 | memblock_x86_register_active_regions(memblk_nodeid[i], | 380 | memblock_x86_register_active_regions(memblk_nodeid[i], |
350 | node_memblk_range[i].start >> PAGE_SHIFT, | 381 | node_memblk_range[i].start >> PAGE_SHIFT, |
351 | node_memblk_range[i].end >> PAGE_SHIFT); | 382 | node_memblk_range[i].end >> PAGE_SHIFT); |
383 | |||
384 | /* for out of order entries */ | ||
385 | sort_node_map(); | ||
386 | if (!nodes_cover_memory(numa_nodes)) | ||
387 | return -EINVAL; | ||
388 | |||
389 | init_memory_mapping_high(); | ||
390 | |||
391 | /* Finally register nodes. */ | ||
392 | for_each_node_mask(i, node_possible_map) | ||
393 | setup_node_bootmem(i, numa_nodes[i].start, numa_nodes[i].end); | ||
394 | |||
395 | /* | ||
396 | * Try again in case setup_node_bootmem missed one due to missing | ||
397 | * bootmem. | ||
398 | */ | ||
399 | for_each_node_mask(i, node_possible_map) | ||
400 | if (!node_online(i)) | ||
401 | setup_node_bootmem(i, numa_nodes[i].start, | ||
402 | numa_nodes[i].end); | ||
403 | |||
352 | return 0; | 404 | return 0; |
353 | } | 405 | } |
354 | 406 | ||
@@ -714,16 +766,14 @@ static int dummy_numa_init(void) | |||
714 | node_set(0, cpu_nodes_parsed); | 766 | node_set(0, cpu_nodes_parsed); |
715 | node_set(0, mem_nodes_parsed); | 767 | node_set(0, mem_nodes_parsed); |
716 | numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT); | 768 | numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT); |
769 | numa_nodes[0].start = 0; | ||
770 | numa_nodes[0].end = (u64)max_pfn << PAGE_SHIFT; | ||
717 | 771 | ||
718 | return 0; | 772 | return 0; |
719 | } | 773 | } |
720 | 774 | ||
721 | static int dummy_scan_nodes(void) | 775 | static int dummy_scan_nodes(void) |
722 | { | 776 | { |
723 | init_memory_mapping_high(); | ||
724 | setup_node_bootmem(0, 0, max_pfn << PAGE_SHIFT); | ||
725 | numa_init_array(); | ||
726 | |||
727 | return 0; | 777 | return 0; |
728 | } | 778 | } |
729 | 779 | ||
@@ -759,6 +809,7 @@ void __init initmem_init(void) | |||
759 | memset(node_memblk_range, 0, sizeof(node_memblk_range)); | 809 | memset(node_memblk_range, 0, sizeof(node_memblk_range)); |
760 | memset(memblk_nodeid, 0, sizeof(memblk_nodeid)); | 810 | memset(memblk_nodeid, 0, sizeof(memblk_nodeid)); |
761 | memset(numa_nodes, 0, sizeof(numa_nodes)); | 811 | memset(numa_nodes, 0, sizeof(numa_nodes)); |
812 | remove_all_active_ranges(); | ||
762 | 813 | ||
763 | if (numa_init[i]() < 0) | 814 | if (numa_init[i]() < 0) |
764 | continue; | 815 | continue; |
@@ -783,8 +834,19 @@ void __init initmem_init(void) | |||
783 | if (numa_register_memblks() < 0) | 834 | if (numa_register_memblks() < 0) |
784 | continue; | 835 | continue; |
785 | 836 | ||
786 | if (!scan_nodes[i]()) | 837 | if (scan_nodes[i]() < 0) |
787 | return; | 838 | continue; |
839 | |||
840 | for (j = 0; j < nr_cpu_ids; j++) { | ||
841 | int nid = early_cpu_to_node(j); | ||
842 | |||
843 | if (nid == NUMA_NO_NODE) | ||
844 | continue; | ||
845 | if (!node_online(nid)) | ||
846 | numa_clear_node(j); | ||
847 | } | ||
848 | numa_init_array(); | ||
849 | return; | ||
788 | } | 850 | } |
789 | BUG(); | 851 | BUG(); |
790 | } | 852 | } |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 69f147116da7..4a2c33b0a48c 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -44,7 +44,6 @@ static __init void bad_srat(void) | |||
44 | numa_nodes[i].start = numa_nodes[i].end = 0; | 44 | numa_nodes[i].start = numa_nodes[i].end = 0; |
45 | nodes_add[i].start = nodes_add[i].end = 0; | 45 | nodes_add[i].start = nodes_add[i].end = 0; |
46 | } | 46 | } |
47 | remove_all_active_ranges(); | ||
48 | } | 47 | } |
49 | 48 | ||
50 | static __init inline int srat_disabled(void) | 49 | static __init inline int srat_disabled(void) |
@@ -259,35 +258,6 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | |||
259 | update_nodes_add(node, start, end); | 258 | update_nodes_add(node, start, end); |
260 | } | 259 | } |
261 | 260 | ||
262 | /* Sanity check to catch more bad SRATs (they are amazingly common). | ||
263 | Make sure the PXMs cover all memory. */ | ||
264 | static int __init nodes_cover_memory(const struct bootnode *nodes) | ||
265 | { | ||
266 | int i; | ||
267 | unsigned long pxmram, e820ram; | ||
268 | |||
269 | pxmram = 0; | ||
270 | for_each_node_mask(i, mem_nodes_parsed) { | ||
271 | unsigned long s = nodes[i].start >> PAGE_SHIFT; | ||
272 | unsigned long e = nodes[i].end >> PAGE_SHIFT; | ||
273 | pxmram += e - s; | ||
274 | pxmram -= __absent_pages_in_range(i, s, e); | ||
275 | if ((long)pxmram < 0) | ||
276 | pxmram = 0; | ||
277 | } | ||
278 | |||
279 | e820ram = max_pfn - (memblock_x86_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT); | ||
280 | /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ | ||
281 | if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) { | ||
282 | printk(KERN_ERR | ||
283 | "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n", | ||
284 | (pxmram << PAGE_SHIFT) >> 20, | ||
285 | (e820ram << PAGE_SHIFT) >> 20); | ||
286 | return 0; | ||
287 | } | ||
288 | return 1; | ||
289 | } | ||
290 | |||
291 | void __init acpi_numa_arch_fixup(void) {} | 261 | void __init acpi_numa_arch_fixup(void) {} |
292 | 262 | ||
293 | int __init x86_acpi_numa_init(void) | 263 | int __init x86_acpi_numa_init(void) |
@@ -303,39 +273,8 @@ int __init x86_acpi_numa_init(void) | |||
303 | /* Use the information discovered above to actually set up the nodes. */ | 273 | /* Use the information discovered above to actually set up the nodes. */ |
304 | int __init acpi_scan_nodes(void) | 274 | int __init acpi_scan_nodes(void) |
305 | { | 275 | { |
306 | int i; | ||
307 | |||
308 | if (acpi_numa <= 0) | 276 | if (acpi_numa <= 0) |
309 | return -1; | 277 | return -1; |
310 | |||
311 | /* for out of order entries in SRAT */ | ||
312 | sort_node_map(); | ||
313 | if (!nodes_cover_memory(numa_nodes)) { | ||
314 | bad_srat(); | ||
315 | return -1; | ||
316 | } | ||
317 | |||
318 | init_memory_mapping_high(); | ||
319 | |||
320 | /* Finally register nodes */ | ||
321 | for_each_node_mask(i, node_possible_map) | ||
322 | setup_node_bootmem(i, numa_nodes[i].start, numa_nodes[i].end); | ||
323 | /* Try again in case setup_node_bootmem missed one due | ||
324 | to missing bootmem */ | ||
325 | for_each_node_mask(i, node_possible_map) | ||
326 | if (!node_online(i)) | ||
327 | setup_node_bootmem(i, numa_nodes[i].start, | ||
328 | numa_nodes[i].end); | ||
329 | |||
330 | for (i = 0; i < nr_cpu_ids; i++) { | ||
331 | int node = early_cpu_to_node(i); | ||
332 | |||
333 | if (node == NUMA_NO_NODE) | ||
334 | continue; | ||
335 | if (!node_online(node)) | ||
336 | numa_clear_node(i); | ||
337 | } | ||
338 | numa_init_array(); | ||
339 | return 0; | 278 | return 0; |
340 | } | 279 | } |
341 | 280 | ||