aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2011-03-04 04:26:36 -0500
committerTejun Heo <tj@kernel.org>2011-03-04 04:26:36 -0500
commitf89112502805c1f6a6955f90ad158e538edb319d (patch)
treec8fff8bdf2a2297e92e78b8661c9e8b405a7b304 /arch
parenteb8c1e2c830fc25c93bc94e215ed387fe142a98d (diff)
x86-64, NUMA: Revert NUMA affine page table allocation
This patch reverts NUMA affine page table allocation added by commit 1411e0ec31 (x86-64, numa: Put pgtable to local node memory). The commit made an undocumented change where the kernel linear mapping strictly follows intersection of e820 memory map and NUMA configuration. If the physical memory configuration has holes or NUMA nodes are not properly aligned, this leads to using unnecessarily smaller mapping size which leads to increased TLB pressure. For details, http://thread.gmane.org/gmane.linux.kernel/1104672 Patches to fix the problem have been proposed but the underlying code needs more cleanup and the approach itself seems a bit heavy handed and it has been determined to revert the feature for now and come back to it in the next developement cycle. http://thread.gmane.org/gmane.linux.kernel/1105959 As init_memory_mapping_high() callsites have been consolidated since the commit, reverting is done manually. Also, the RED-PEN comment in arch/x86/mm/init.c is not restored as the problem no longer exists with memblock based top-down early memory allocation. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Yinghai Lu <yinghai@kernel.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/include/asm/page_types.h2
-rw-r--r--arch/x86/kernel/setup.c8
-rw-r--r--arch/x86/mm/init_64.c54
-rw-r--r--arch/x86/mm/numa_64.c2
4 files changed, 8 insertions, 58 deletions
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 97e6007e4edd..bce688d54c12 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -54,8 +54,6 @@ static inline phys_addr_t get_max_mapped(void)
54extern unsigned long init_memory_mapping(unsigned long start, 54extern unsigned long init_memory_mapping(unsigned long start,
55 unsigned long end); 55 unsigned long end);
56 56
57void init_memory_mapping_high(void);
58
59extern void initmem_init(void); 57extern void initmem_init(void);
60extern void free_initmem(void); 58extern void free_initmem(void);
61 59
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 46e684f85b36..c3a606c41ce0 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -963,6 +963,14 @@ void __init setup_arch(char **cmdline_p)
963 max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT); 963 max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
964 max_pfn_mapped = max_low_pfn_mapped; 964 max_pfn_mapped = max_low_pfn_mapped;
965 965
966#ifdef CONFIG_X86_64
967 if (max_pfn > max_low_pfn) {
968 max_pfn_mapped = init_memory_mapping(1UL<<32,
969 max_pfn<<PAGE_SHIFT);
970 /* can we preseve max_low_pfn ?*/
971 max_low_pfn = max_pfn;
972 }
973#endif
966 memblock.current_limit = get_max_mapped(); 974 memblock.current_limit = get_max_mapped();
967 975
968 /* 976 /*
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 470cc4704a9a..c8813aa39740 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -606,63 +606,9 @@ kernel_physical_mapping_init(unsigned long start,
606void __init initmem_init(void) 606void __init initmem_init(void)
607{ 607{
608 memblock_x86_register_active_regions(0, 0, max_pfn); 608 memblock_x86_register_active_regions(0, 0, max_pfn);
609 init_memory_mapping_high();
610} 609}
611#endif 610#endif
612 611
613struct mapping_work_data {
614 unsigned long start;
615 unsigned long end;
616 unsigned long pfn_mapped;
617};
618
619static int __init_refok
620mapping_work_fn(unsigned long start_pfn, unsigned long end_pfn, void *datax)
621{
622 struct mapping_work_data *data = datax;
623 unsigned long pfn_mapped;
624 unsigned long final_start, final_end;
625
626 final_start = max_t(unsigned long, start_pfn<<PAGE_SHIFT, data->start);
627 final_end = min_t(unsigned long, end_pfn<<PAGE_SHIFT, data->end);
628
629 if (final_end <= final_start)
630 return 0;
631
632 pfn_mapped = init_memory_mapping(final_start, final_end);
633
634 if (pfn_mapped > data->pfn_mapped)
635 data->pfn_mapped = pfn_mapped;
636
637 return 0;
638}
639
640static unsigned long __init_refok
641init_memory_mapping_active_regions(unsigned long start, unsigned long end)
642{
643 struct mapping_work_data data;
644
645 data.start = start;
646 data.end = end;
647 data.pfn_mapped = 0;
648
649 work_with_active_regions(MAX_NUMNODES, mapping_work_fn, &data);
650
651 return data.pfn_mapped;
652}
653
654void __init_refok init_memory_mapping_high(void)
655{
656 if (max_pfn > max_low_pfn) {
657 max_pfn_mapped = init_memory_mapping_active_regions(1UL<<32,
658 max_pfn<<PAGE_SHIFT);
659 /* can we preserve max_low_pfn ? */
660 max_low_pfn = max_pfn;
661
662 memblock.current_limit = get_max_mapped();
663 }
664}
665
666void __init paging_init(void) 612void __init paging_init(void)
667{ 613{
668 unsigned long max_zone_pfns[MAX_NR_ZONES]; 614 unsigned long max_zone_pfns[MAX_NR_ZONES];
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 74064e8ae79f..86491ba568d9 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -543,8 +543,6 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
543 if (!numa_meminfo_cover_memory(mi)) 543 if (!numa_meminfo_cover_memory(mi))
544 return -EINVAL; 544 return -EINVAL;
545 545
546 init_memory_mapping_high();
547
548 /* Finally register nodes. */ 546 /* Finally register nodes. */
549 for_each_node_mask(nid, node_possible_map) { 547 for_each_node_mask(nid, node_possible_map) {
550 u64 start = (u64)max_pfn << PAGE_SHIFT; 548 u64 start = (u64)max_pfn << PAGE_SHIFT;