aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorYinghai Lu <yinghai@kernel.org>2010-12-27 19:48:17 -0500
committerH. Peter Anvin <hpa@linux.intel.com>2010-12-29 18:48:08 -0500
commit1411e0ec3123ae4c4ead6bfc9fe3ee5a3ae5c327 (patch)
treeef7db8944eb21abbfbd5d7197b18860db95b2656 /arch/x86/mm
parentdbef7b56d2fc5115f26f72a0b080283bbf972cab (diff)
x86-64, numa: Put pgtable to local node memory
Introduce init_memory_mapping_high(), and use it with 64bit. It will go with every memory segment above 4g to create page table to the memory range itself. before this patch all page tables was on one node. with this patch, one RED-PEN is killed debug out for 8 sockets system after patch [ 0.000000] initial memory mapped : 0 - 20000000 [ 0.000000] init_memory_mapping: [0x00000000000000-0x0000007f74ffff] [ 0.000000] 0000000000 - 007f600000 page 2M [ 0.000000] 007f600000 - 007f750000 page 4k [ 0.000000] kernel direct mapping tables up to 7f750000 @ [0x7f74c000-0x7f74ffff] [ 0.000000] RAMDISK: 7bc84000 - 7f745000 .... [ 0.000000] Adding active range (0, 0x10, 0x95) 0 entries of 3200 used [ 0.000000] Adding active range (0, 0x100, 0x7f750) 1 entries of 3200 used [ 0.000000] Adding active range (0, 0x100000, 0x1080000) 2 entries of 3200 used [ 0.000000] Adding active range (1, 0x1080000, 0x2080000) 3 entries of 3200 used [ 0.000000] Adding active range (2, 0x2080000, 0x3080000) 4 entries of 3200 used [ 0.000000] Adding active range (3, 0x3080000, 0x4080000) 5 entries of 3200 used [ 0.000000] Adding active range (4, 0x4080000, 0x5080000) 6 entries of 3200 used [ 0.000000] Adding active range (5, 0x5080000, 0x6080000) 7 entries of 3200 used [ 0.000000] Adding active range (6, 0x6080000, 0x7080000) 8 entries of 3200 used [ 0.000000] Adding active range (7, 0x7080000, 0x8080000) 9 entries of 3200 used [ 0.000000] init_memory_mapping: [0x00000100000000-0x0000107fffffff] [ 0.000000] 0100000000 - 1080000000 page 2M [ 0.000000] kernel direct mapping tables up to 1080000000 @ [0x107ffbd000-0x107fffffff] [ 0.000000] memblock_x86_reserve_range: [0x107ffc2000-0x107fffffff] PGTABLE [ 0.000000] init_memory_mapping: [0x00001080000000-0x0000207fffffff] [ 0.000000] 1080000000 - 2080000000 page 2M [ 0.000000] kernel direct mapping tables up to 2080000000 @ [0x207ff7d000-0x207fffffff] [ 0.000000] memblock_x86_reserve_range: [0x207ffc0000-0x207fffffff] PGTABLE [ 0.000000] init_memory_mapping: [0x00002080000000-0x0000307fffffff] [ 0.000000] 2080000000 - 3080000000 page 2M [ 0.000000] kernel direct mapping tables up to 3080000000 @ [0x307ff3d000-0x307fffffff] [ 0.000000] memblock_x86_reserve_range: [0x307ffc0000-0x307fffffff] PGTABLE [ 0.000000] init_memory_mapping: [0x00003080000000-0x0000407fffffff] [ 0.000000] 3080000000 - 4080000000 page 2M [ 0.000000] kernel direct mapping tables up to 4080000000 @ [0x407fefd000-0x407fffffff] [ 0.000000] memblock_x86_reserve_range: [0x407ffc0000-0x407fffffff] PGTABLE [ 0.000000] init_memory_mapping: [0x00004080000000-0x0000507fffffff] [ 0.000000] 4080000000 - 5080000000 page 2M [ 0.000000] kernel direct mapping tables up to 5080000000 @ [0x507febd000-0x507fffffff] [ 0.000000] memblock_x86_reserve_range: [0x507ffc0000-0x507fffffff] PGTABLE [ 0.000000] init_memory_mapping: [0x00005080000000-0x0000607fffffff] [ 0.000000] 5080000000 - 6080000000 page 2M [ 0.000000] kernel direct mapping tables up to 6080000000 @ [0x607fe7d000-0x607fffffff] [ 0.000000] memblock_x86_reserve_range: [0x607ffc0000-0x607fffffff] PGTABLE [ 0.000000] init_memory_mapping: [0x00006080000000-0x0000707fffffff] [ 0.000000] 6080000000 - 7080000000 page 2M [ 0.000000] kernel direct mapping tables up to 7080000000 @ [0x707fe3d000-0x707fffffff] [ 0.000000] memblock_x86_reserve_range: [0x707ffc0000-0x707fffffff] PGTABLE [ 0.000000] init_memory_mapping: [0x00007080000000-0x0000807fffffff] [ 0.000000] 7080000000 - 8080000000 page 2M [ 0.000000] kernel direct mapping tables up to 8080000000 @ [0x807fdfc000-0x807fffffff] [ 0.000000] memblock_x86_reserve_range: [0x807ffbf000-0x807fffffff] PGTABLE [ 0.000000] Initmem setup node 0 [0000000000000000-000000107fffffff] [ 0.000000] NODE_DATA [0x0000107ffbd000-0x0000107ffc1fff] [ 0.000000] Initmem setup node 1 [0000001080000000-000000207fffffff] [ 0.000000] NODE_DATA [0x0000207ffbb000-0x0000207ffbffff] [ 0.000000] Initmem setup node 2 [0000002080000000-000000307fffffff] [ 0.000000] NODE_DATA [0x0000307ffbb000-0x0000307ffbffff] [ 0.000000] Initmem setup node 3 [0000003080000000-000000407fffffff] [ 0.000000] NODE_DATA [0x0000407ffbb000-0x0000407ffbffff] [ 0.000000] Initmem setup node 4 [0000004080000000-000000507fffffff] [ 0.000000] NODE_DATA [0x0000507ffbb000-0x0000507ffbffff] [ 0.000000] Initmem setup node 5 [0000005080000000-000000607fffffff] [ 0.000000] NODE_DATA [0x0000607ffbb000-0x0000607ffbffff] [ 0.000000] Initmem setup node 6 [0000006080000000-000000707fffffff] [ 0.000000] NODE_DATA [0x0000707ffbb000-0x0000707ffbffff] [ 0.000000] Initmem setup node 7 [0000007080000000-000000807fffffff] [ 0.000000] NODE_DATA [0x0000807ffba000-0x0000807ffbefff] Signed-off-by: Yinghai Lu <yinghai@kernel.org> LKML-Reference: <4D1933D1.9020609@kernel.org> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/amdtopology_64.c8
-rw-r--r--arch/x86/mm/init.c8
-rw-r--r--arch/x86/mm/init_64.c54
-rw-r--r--arch/x86/mm/numa_64.c6
-rw-r--r--arch/x86/mm/srat_64.c2
5 files changed, 66 insertions, 12 deletions
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c
index 51fae9cfdecb..ae6ad691a14a 100644
--- a/arch/x86/mm/amdtopology_64.c
+++ b/arch/x86/mm/amdtopology_64.c
@@ -221,12 +221,14 @@ int __init amd_scan_nodes(void)
221 apicid_base = boot_cpu_physical_apicid; 221 apicid_base = boot_cpu_physical_apicid;
222 } 222 }
223 223
224 for_each_node_mask(i, node_possible_map) { 224 for_each_node_mask(i, node_possible_map)
225 int j;
226
227 memblock_x86_register_active_regions(i, 225 memblock_x86_register_active_regions(i,
228 nodes[i].start >> PAGE_SHIFT, 226 nodes[i].start >> PAGE_SHIFT,
229 nodes[i].end >> PAGE_SHIFT); 227 nodes[i].end >> PAGE_SHIFT);
228 init_memory_mapping_high();
229 for_each_node_mask(i, node_possible_map) {
230 int j;
231
230 for (j = apicid_base; j < cores + apicid_base; j++) 232 for (j = apicid_base; j < cores + apicid_base; j++)
231 apicid_to_node[(i << bits) + j] = i; 233 apicid_to_node[(i << bits) + j] = i;
232 setup_node_bootmem(i, nodes[i].start, nodes[i].end); 234 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 5863950ebe0c..fa6fe756d912 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -65,16 +65,10 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
65#ifdef CONFIG_X86_32 65#ifdef CONFIG_X86_32
66 /* for fixmap */ 66 /* for fixmap */
67 tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); 67 tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
68#endif
69 68
70 /*
71 * RED-PEN putting page tables only on node 0 could
72 * cause a hotspot and fill up ZONE_DMA. The page tables
73 * need roughly 0.5KB per GB.
74 */
75#ifdef CONFIG_X86_32
76 good_end = max_pfn_mapped << PAGE_SHIFT; 69 good_end = max_pfn_mapped << PAGE_SHIFT;
77#endif 70#endif
71
78 base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE); 72 base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE);
79 if (base == MEMBLOCK_ERROR) 73 if (base == MEMBLOCK_ERROR)
80 panic("Cannot find space for the kernel page tables"); 74 panic("Cannot find space for the kernel page tables");
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 024847dc81ab..194f2732ab77 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -607,9 +607,63 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
607 int acpi, int k8) 607 int acpi, int k8)
608{ 608{
609 memblock_x86_register_active_regions(0, start_pfn, end_pfn); 609 memblock_x86_register_active_regions(0, start_pfn, end_pfn);
610 init_memory_mapping_high();
610} 611}
611#endif 612#endif
612 613
614struct mapping_work_data {
615 unsigned long start;
616 unsigned long end;
617 unsigned long pfn_mapped;
618};
619
620static int __init_refok
621mapping_work_fn(unsigned long start_pfn, unsigned long end_pfn, void *datax)
622{
623 struct mapping_work_data *data = datax;
624 unsigned long pfn_mapped;
625 unsigned long final_start, final_end;
626
627 final_start = max_t(unsigned long, start_pfn<<PAGE_SHIFT, data->start);
628 final_end = min_t(unsigned long, end_pfn<<PAGE_SHIFT, data->end);
629
630 if (final_end <= final_start)
631 return 0;
632
633 pfn_mapped = init_memory_mapping(final_start, final_end);
634
635 if (pfn_mapped > data->pfn_mapped)
636 data->pfn_mapped = pfn_mapped;
637
638 return 0;
639}
640
641static unsigned long __init_refok
642init_memory_mapping_active_regions(unsigned long start, unsigned long end)
643{
644 struct mapping_work_data data;
645
646 data.start = start;
647 data.end = end;
648 data.pfn_mapped = 0;
649
650 work_with_active_regions(MAX_NUMNODES, mapping_work_fn, &data);
651
652 return data.pfn_mapped;
653}
654
655void __init_refok init_memory_mapping_high(void)
656{
657 if (max_pfn > max_low_pfn) {
658 max_pfn_mapped = init_memory_mapping_active_regions(1UL<<32,
659 max_pfn<<PAGE_SHIFT);
660 /* can we preserve max_low_pfn ? */
661 max_low_pfn = max_pfn;
662
663 memblock.current_limit = get_max_mapped();
664 }
665}
666
613void __init paging_init(void) 667void __init paging_init(void)
614{ 668{
615 unsigned long max_zone_pfns[MAX_NR_ZONES]; 669 unsigned long max_zone_pfns[MAX_NR_ZONES];
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 02d36ff85ebd..7cc26ae0a15d 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -590,11 +590,12 @@ static int __init numa_emulation(unsigned long start_pfn,
590 * the e820 memory map. 590 * the e820 memory map.
591 */ 591 */
592 remove_all_active_ranges(); 592 remove_all_active_ranges();
593 for_each_node_mask(i, node_possible_map) { 593 for_each_node_mask(i, node_possible_map)
594 memblock_x86_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, 594 memblock_x86_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
595 nodes[i].end >> PAGE_SHIFT); 595 nodes[i].end >> PAGE_SHIFT);
596 init_memory_mapping_high();
597 for_each_node_mask(i, node_possible_map)
596 setup_node_bootmem(i, nodes[i].start, nodes[i].end); 598 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
597 }
598 acpi_fake_nodes(nodes, num_nodes); 599 acpi_fake_nodes(nodes, num_nodes);
599 numa_init_array(); 600 numa_init_array();
600 return 0; 601 return 0;
@@ -645,6 +646,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
645 for (i = 0; i < nr_cpu_ids; i++) 646 for (i = 0; i < nr_cpu_ids; i++)
646 numa_set_node(i, 0); 647 numa_set_node(i, 0);
647 memblock_x86_register_active_regions(0, start_pfn, last_pfn); 648 memblock_x86_register_active_regions(0, start_pfn, last_pfn);
649 init_memory_mapping_high();
648 setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT); 650 setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT);
649} 651}
650 652
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index a35cb9d8b060..0b961c8bffb4 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -433,6 +433,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
433 return -1; 433 return -1;
434 } 434 }
435 435
436 init_memory_mapping_high();
437
436 /* Account for nodes with cpus and no memory */ 438 /* Account for nodes with cpus and no memory */
437 nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed); 439 nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed);
438 440