diff options
author | Yinghai Lu <yinghai@kernel.org> | 2010-12-27 19:48:17 -0500 |
---|---|---|
committer | H. Peter Anvin <hpa@linux.intel.com> | 2010-12-29 18:48:08 -0500 |
commit | 1411e0ec3123ae4c4ead6bfc9fe3ee5a3ae5c327 (patch) | |
tree | ef7db8944eb21abbfbd5d7197b18860db95b2656 /arch/x86/mm | |
parent | dbef7b56d2fc5115f26f72a0b080283bbf972cab (diff) |
x86-64, numa: Put pgtable to local node memory
Introduce init_memory_mapping_high(), and use it with 64bit.
It will go with every memory segment above 4g to create page table to the
memory range itself.
before this patch all page tables was on one node.
with this patch, one RED-PEN is killed
debug out for 8 sockets system after patch
[ 0.000000] initial memory mapped : 0 - 20000000
[ 0.000000] init_memory_mapping: [0x00000000000000-0x0000007f74ffff]
[ 0.000000] 0000000000 - 007f600000 page 2M
[ 0.000000] 007f600000 - 007f750000 page 4k
[ 0.000000] kernel direct mapping tables up to 7f750000 @ [0x7f74c000-0x7f74ffff]
[ 0.000000] RAMDISK: 7bc84000 - 7f745000
....
[ 0.000000] Adding active range (0, 0x10, 0x95) 0 entries of 3200 used
[ 0.000000] Adding active range (0, 0x100, 0x7f750) 1 entries of 3200 used
[ 0.000000] Adding active range (0, 0x100000, 0x1080000) 2 entries of 3200 used
[ 0.000000] Adding active range (1, 0x1080000, 0x2080000) 3 entries of 3200 used
[ 0.000000] Adding active range (2, 0x2080000, 0x3080000) 4 entries of 3200 used
[ 0.000000] Adding active range (3, 0x3080000, 0x4080000) 5 entries of 3200 used
[ 0.000000] Adding active range (4, 0x4080000, 0x5080000) 6 entries of 3200 used
[ 0.000000] Adding active range (5, 0x5080000, 0x6080000) 7 entries of 3200 used
[ 0.000000] Adding active range (6, 0x6080000, 0x7080000) 8 entries of 3200 used
[ 0.000000] Adding active range (7, 0x7080000, 0x8080000) 9 entries of 3200 used
[ 0.000000] init_memory_mapping: [0x00000100000000-0x0000107fffffff]
[ 0.000000] 0100000000 - 1080000000 page 2M
[ 0.000000] kernel direct mapping tables up to 1080000000 @ [0x107ffbd000-0x107fffffff]
[ 0.000000] memblock_x86_reserve_range: [0x107ffc2000-0x107fffffff] PGTABLE
[ 0.000000] init_memory_mapping: [0x00001080000000-0x0000207fffffff]
[ 0.000000] 1080000000 - 2080000000 page 2M
[ 0.000000] kernel direct mapping tables up to 2080000000 @ [0x207ff7d000-0x207fffffff]
[ 0.000000] memblock_x86_reserve_range: [0x207ffc0000-0x207fffffff] PGTABLE
[ 0.000000] init_memory_mapping: [0x00002080000000-0x0000307fffffff]
[ 0.000000] 2080000000 - 3080000000 page 2M
[ 0.000000] kernel direct mapping tables up to 3080000000 @ [0x307ff3d000-0x307fffffff]
[ 0.000000] memblock_x86_reserve_range: [0x307ffc0000-0x307fffffff] PGTABLE
[ 0.000000] init_memory_mapping: [0x00003080000000-0x0000407fffffff]
[ 0.000000] 3080000000 - 4080000000 page 2M
[ 0.000000] kernel direct mapping tables up to 4080000000 @ [0x407fefd000-0x407fffffff]
[ 0.000000] memblock_x86_reserve_range: [0x407ffc0000-0x407fffffff] PGTABLE
[ 0.000000] init_memory_mapping: [0x00004080000000-0x0000507fffffff]
[ 0.000000] 4080000000 - 5080000000 page 2M
[ 0.000000] kernel direct mapping tables up to 5080000000 @ [0x507febd000-0x507fffffff]
[ 0.000000] memblock_x86_reserve_range: [0x507ffc0000-0x507fffffff] PGTABLE
[ 0.000000] init_memory_mapping: [0x00005080000000-0x0000607fffffff]
[ 0.000000] 5080000000 - 6080000000 page 2M
[ 0.000000] kernel direct mapping tables up to 6080000000 @ [0x607fe7d000-0x607fffffff]
[ 0.000000] memblock_x86_reserve_range: [0x607ffc0000-0x607fffffff] PGTABLE
[ 0.000000] init_memory_mapping: [0x00006080000000-0x0000707fffffff]
[ 0.000000] 6080000000 - 7080000000 page 2M
[ 0.000000] kernel direct mapping tables up to 7080000000 @ [0x707fe3d000-0x707fffffff]
[ 0.000000] memblock_x86_reserve_range: [0x707ffc0000-0x707fffffff] PGTABLE
[ 0.000000] init_memory_mapping: [0x00007080000000-0x0000807fffffff]
[ 0.000000] 7080000000 - 8080000000 page 2M
[ 0.000000] kernel direct mapping tables up to 8080000000 @ [0x807fdfc000-0x807fffffff]
[ 0.000000] memblock_x86_reserve_range: [0x807ffbf000-0x807fffffff] PGTABLE
[ 0.000000] Initmem setup node 0 [0000000000000000-000000107fffffff]
[ 0.000000] NODE_DATA [0x0000107ffbd000-0x0000107ffc1fff]
[ 0.000000] Initmem setup node 1 [0000001080000000-000000207fffffff]
[ 0.000000] NODE_DATA [0x0000207ffbb000-0x0000207ffbffff]
[ 0.000000] Initmem setup node 2 [0000002080000000-000000307fffffff]
[ 0.000000] NODE_DATA [0x0000307ffbb000-0x0000307ffbffff]
[ 0.000000] Initmem setup node 3 [0000003080000000-000000407fffffff]
[ 0.000000] NODE_DATA [0x0000407ffbb000-0x0000407ffbffff]
[ 0.000000] Initmem setup node 4 [0000004080000000-000000507fffffff]
[ 0.000000] NODE_DATA [0x0000507ffbb000-0x0000507ffbffff]
[ 0.000000] Initmem setup node 5 [0000005080000000-000000607fffffff]
[ 0.000000] NODE_DATA [0x0000607ffbb000-0x0000607ffbffff]
[ 0.000000] Initmem setup node 6 [0000006080000000-000000707fffffff]
[ 0.000000] NODE_DATA [0x0000707ffbb000-0x0000707ffbffff]
[ 0.000000] Initmem setup node 7 [0000007080000000-000000807fffffff]
[ 0.000000] NODE_DATA [0x0000807ffba000-0x0000807ffbefff]
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <4D1933D1.9020609@kernel.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/amdtopology_64.c | 8 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 8 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 54 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 6 | ||||
-rw-r--r-- | arch/x86/mm/srat_64.c | 2 |
5 files changed, 66 insertions, 12 deletions
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c index 51fae9cfdecb..ae6ad691a14a 100644 --- a/arch/x86/mm/amdtopology_64.c +++ b/arch/x86/mm/amdtopology_64.c | |||
@@ -221,12 +221,14 @@ int __init amd_scan_nodes(void) | |||
221 | apicid_base = boot_cpu_physical_apicid; | 221 | apicid_base = boot_cpu_physical_apicid; |
222 | } | 222 | } |
223 | 223 | ||
224 | for_each_node_mask(i, node_possible_map) { | 224 | for_each_node_mask(i, node_possible_map) |
225 | int j; | ||
226 | |||
227 | memblock_x86_register_active_regions(i, | 225 | memblock_x86_register_active_regions(i, |
228 | nodes[i].start >> PAGE_SHIFT, | 226 | nodes[i].start >> PAGE_SHIFT, |
229 | nodes[i].end >> PAGE_SHIFT); | 227 | nodes[i].end >> PAGE_SHIFT); |
228 | init_memory_mapping_high(); | ||
229 | for_each_node_mask(i, node_possible_map) { | ||
230 | int j; | ||
231 | |||
230 | for (j = apicid_base; j < cores + apicid_base; j++) | 232 | for (j = apicid_base; j < cores + apicid_base; j++) |
231 | apicid_to_node[(i << bits) + j] = i; | 233 | apicid_to_node[(i << bits) + j] = i; |
232 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); | 234 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 5863950ebe0c..fa6fe756d912 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -65,16 +65,10 @@ static void __init find_early_table_space(unsigned long end, int use_pse, | |||
65 | #ifdef CONFIG_X86_32 | 65 | #ifdef CONFIG_X86_32 |
66 | /* for fixmap */ | 66 | /* for fixmap */ |
67 | tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); | 67 | tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); |
68 | #endif | ||
69 | 68 | ||
70 | /* | ||
71 | * RED-PEN putting page tables only on node 0 could | ||
72 | * cause a hotspot and fill up ZONE_DMA. The page tables | ||
73 | * need roughly 0.5KB per GB. | ||
74 | */ | ||
75 | #ifdef CONFIG_X86_32 | ||
76 | good_end = max_pfn_mapped << PAGE_SHIFT; | 69 | good_end = max_pfn_mapped << PAGE_SHIFT; |
77 | #endif | 70 | #endif |
71 | |||
78 | base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE); | 72 | base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE); |
79 | if (base == MEMBLOCK_ERROR) | 73 | if (base == MEMBLOCK_ERROR) |
80 | panic("Cannot find space for the kernel page tables"); | 74 | panic("Cannot find space for the kernel page tables"); |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 024847dc81ab..194f2732ab77 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -607,9 +607,63 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, | |||
607 | int acpi, int k8) | 607 | int acpi, int k8) |
608 | { | 608 | { |
609 | memblock_x86_register_active_regions(0, start_pfn, end_pfn); | 609 | memblock_x86_register_active_regions(0, start_pfn, end_pfn); |
610 | init_memory_mapping_high(); | ||
610 | } | 611 | } |
611 | #endif | 612 | #endif |
612 | 613 | ||
614 | struct mapping_work_data { | ||
615 | unsigned long start; | ||
616 | unsigned long end; | ||
617 | unsigned long pfn_mapped; | ||
618 | }; | ||
619 | |||
620 | static int __init_refok | ||
621 | mapping_work_fn(unsigned long start_pfn, unsigned long end_pfn, void *datax) | ||
622 | { | ||
623 | struct mapping_work_data *data = datax; | ||
624 | unsigned long pfn_mapped; | ||
625 | unsigned long final_start, final_end; | ||
626 | |||
627 | final_start = max_t(unsigned long, start_pfn<<PAGE_SHIFT, data->start); | ||
628 | final_end = min_t(unsigned long, end_pfn<<PAGE_SHIFT, data->end); | ||
629 | |||
630 | if (final_end <= final_start) | ||
631 | return 0; | ||
632 | |||
633 | pfn_mapped = init_memory_mapping(final_start, final_end); | ||
634 | |||
635 | if (pfn_mapped > data->pfn_mapped) | ||
636 | data->pfn_mapped = pfn_mapped; | ||
637 | |||
638 | return 0; | ||
639 | } | ||
640 | |||
641 | static unsigned long __init_refok | ||
642 | init_memory_mapping_active_regions(unsigned long start, unsigned long end) | ||
643 | { | ||
644 | struct mapping_work_data data; | ||
645 | |||
646 | data.start = start; | ||
647 | data.end = end; | ||
648 | data.pfn_mapped = 0; | ||
649 | |||
650 | work_with_active_regions(MAX_NUMNODES, mapping_work_fn, &data); | ||
651 | |||
652 | return data.pfn_mapped; | ||
653 | } | ||
654 | |||
655 | void __init_refok init_memory_mapping_high(void) | ||
656 | { | ||
657 | if (max_pfn > max_low_pfn) { | ||
658 | max_pfn_mapped = init_memory_mapping_active_regions(1UL<<32, | ||
659 | max_pfn<<PAGE_SHIFT); | ||
660 | /* can we preserve max_low_pfn ? */ | ||
661 | max_low_pfn = max_pfn; | ||
662 | |||
663 | memblock.current_limit = get_max_mapped(); | ||
664 | } | ||
665 | } | ||
666 | |||
613 | void __init paging_init(void) | 667 | void __init paging_init(void) |
614 | { | 668 | { |
615 | unsigned long max_zone_pfns[MAX_NR_ZONES]; | 669 | unsigned long max_zone_pfns[MAX_NR_ZONES]; |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 02d36ff85ebd..7cc26ae0a15d 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -590,11 +590,12 @@ static int __init numa_emulation(unsigned long start_pfn, | |||
590 | * the e820 memory map. | 590 | * the e820 memory map. |
591 | */ | 591 | */ |
592 | remove_all_active_ranges(); | 592 | remove_all_active_ranges(); |
593 | for_each_node_mask(i, node_possible_map) { | 593 | for_each_node_mask(i, node_possible_map) |
594 | memblock_x86_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, | 594 | memblock_x86_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, |
595 | nodes[i].end >> PAGE_SHIFT); | 595 | nodes[i].end >> PAGE_SHIFT); |
596 | init_memory_mapping_high(); | ||
597 | for_each_node_mask(i, node_possible_map) | ||
596 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); | 598 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); |
597 | } | ||
598 | acpi_fake_nodes(nodes, num_nodes); | 599 | acpi_fake_nodes(nodes, num_nodes); |
599 | numa_init_array(); | 600 | numa_init_array(); |
600 | return 0; | 601 | return 0; |
@@ -645,6 +646,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn, | |||
645 | for (i = 0; i < nr_cpu_ids; i++) | 646 | for (i = 0; i < nr_cpu_ids; i++) |
646 | numa_set_node(i, 0); | 647 | numa_set_node(i, 0); |
647 | memblock_x86_register_active_regions(0, start_pfn, last_pfn); | 648 | memblock_x86_register_active_regions(0, start_pfn, last_pfn); |
649 | init_memory_mapping_high(); | ||
648 | setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT); | 650 | setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT); |
649 | } | 651 | } |
650 | 652 | ||
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index a35cb9d8b060..0b961c8bffb4 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -433,6 +433,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
433 | return -1; | 433 | return -1; |
434 | } | 434 | } |
435 | 435 | ||
436 | init_memory_mapping_high(); | ||
437 | |||
436 | /* Account for nodes with cpus and no memory */ | 438 | /* Account for nodes with cpus and no memory */ |
437 | nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed); | 439 | nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed); |
438 | 440 | ||