aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@sgi.com>2008-01-30 07:30:47 -0500
committerIngo Molnar <mingo@elte.hu>2008-01-30 07:30:47 -0500
commitb263295dbffd33b0fbff670720fa178c30e3392a (patch)
tree014b402cb65b17e1521ed356e5c225083ea578dd
parent4ebd1290ba12121d66285cc06987ca97bcdfc55b (diff)
x86: 64-bit, make sparsemem vmemmap the only memory model
Use sparsemem as the only memory model for UP, SMP and NUMA. Measurements indicate that DISCONTIGMEM has a higher overhead than sparsemem. And FLATMEMs benefits are minimal. So I think its best to simply standardize on sparsemem. Results of page allocator tests (test can be had via git from slab git tree branch tests) Measurements in cycle counts. 1000 allocations were performed and then the average cycle count was calculated. Order FlatMem Discontig SparseMem 0 639 665 641 1 567 647 593 2 679 774 692 3 763 967 781 4 961 1501 962 5 1356 2344 1392 6 2224 3982 2336 7 4869 7225 5074 8 12500 14048 12732 9 27926 28223 28165 10 58578 58714 58682 (Note that FlatMem is an SMP config and the rest NUMA configurations) Memory use: SMP Sparsemem ------------- Kernel size: text data bss dec hex filename 3849268 397739 1264856 5511863 541ab7 vmlinux total used free shared buffers cached Mem: 8242252 41164 8201088 0 352 11512 -/+ buffers/cache: 29300 8212952 Swap: 9775512 0 9775512 SMP Flatmem ----------- Kernel size: text data bss dec hex filename 3844612 397739 1264536 5506887 540747 vmlinux So 4.5k growth in text size vs. FLATMEM. total used free shared buffers cached Mem: 8244052 40544 8203508 0 352 11484 -/+ buffers/cache: 28708 8215344 2k growth in overall memory use after boot. NUMA discontig: text data bss dec hex filename 3888124 470659 1276504 5635287 55fcd7 vmlinux total used free shared buffers cached Mem: 8256256 56908 8199348 0 352 11496 -/+ buffers/cache: 45060 8211196 Swap: 9775512 0 9775512 NUMA sparse: text data bss dec hex filename 3896428 470659 1276824 5643911 561e87 vmlinux 8k text growth. Given that we fully inline virt_to_page and friends now that is rather good. total used free shared buffers cached Mem: 8264720 57240 8207480 0 352 11516 -/+ buffers/cache: 45372 8219348 Swap: 9775512 0 9775512 The total available memory is increased by 8k. This patch makes sparsemem the default and removes discontig and flatmem support from x86. [ akpm@linux-foundation.org: allnoconfig build fix ] Acked-by: Andi Kleen <ak@suse.de> Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--arch/x86/Kconfig22
-rw-r--r--arch/x86/configs/x86_64_defconfig9
-rw-r--r--arch/x86/kernel/machine_kexec_64.c5
-rw-r--r--arch/x86/mm/init_64.c28
-rw-r--r--arch/x86/mm/ioremap_64.c17
-rw-r--r--arch/x86/mm/numa_64.c21
-rw-r--r--arch/x86/mm/srat_64.c57
-rw-r--r--include/asm-x86/mmzone_64.h6
-rw-r--r--include/asm-x86/page_64.h3
9 files changed, 9 insertions, 159 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2f4d88babd36..da98368f66af 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -891,25 +891,29 @@ config HAVE_ARCH_ALLOC_REMAP
891 891
892config ARCH_FLATMEM_ENABLE 892config ARCH_FLATMEM_ENABLE
893 def_bool y 893 def_bool y
894 depends on (X86_32 && ARCH_SELECT_MEMORY_MODEL && X86_PC) || (X86_64 && !NUMA) 894 depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && X86_PC
895 895
896config ARCH_DISCONTIGMEM_ENABLE 896config ARCH_DISCONTIGMEM_ENABLE
897 def_bool y 897 def_bool y
898 depends on NUMA 898 depends on NUMA && X86_32
899 899
900config ARCH_DISCONTIGMEM_DEFAULT 900config ARCH_DISCONTIGMEM_DEFAULT
901 def_bool y 901 def_bool y
902 depends on NUMA 902 depends on NUMA && X86_32
903
904config ARCH_SPARSEMEM_DEFAULT
905 def_bool y
906 depends on X86_64
903 907
904config ARCH_SPARSEMEM_ENABLE 908config ARCH_SPARSEMEM_ENABLE
905 def_bool y 909 def_bool y
906 depends on NUMA || (EXPERIMENTAL && (X86_PC || X86_64)) 910 depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC)
907 select SPARSEMEM_STATIC if X86_32 911 select SPARSEMEM_STATIC if X86_32
908 select SPARSEMEM_VMEMMAP_ENABLE if X86_64 912 select SPARSEMEM_VMEMMAP_ENABLE if X86_64
909 913
910config ARCH_SELECT_MEMORY_MODEL 914config ARCH_SELECT_MEMORY_MODEL
911 def_bool y 915 def_bool y
912 depends on X86_32 && ARCH_SPARSEMEM_ENABLE 916 depends on ARCH_SPARSEMEM_ENABLE
913 917
914config ARCH_MEMORY_PROBE 918config ARCH_MEMORY_PROBE
915 def_bool X86_64 919 def_bool X86_64
@@ -1207,18 +1211,10 @@ config ARCH_ENABLE_MEMORY_HOTPLUG
1207 def_bool y 1211 def_bool y
1208 depends on X86_64 || (X86_32 && HIGHMEM) 1212 depends on X86_64 || (X86_32 && HIGHMEM)
1209 1213
1210config MEMORY_HOTPLUG_RESERVE
1211 def_bool X86_64
1212 depends on (MEMORY_HOTPLUG && DISCONTIGMEM)
1213
1214config HAVE_ARCH_EARLY_PFN_TO_NID 1214config HAVE_ARCH_EARLY_PFN_TO_NID
1215 def_bool X86_64 1215 def_bool X86_64
1216 depends on NUMA 1216 depends on NUMA
1217 1217
1218config OUT_OF_LINE_PFN_TO_PAGE
1219 def_bool X86_64
1220 depends on DISCONTIGMEM
1221
1222menu "Power management options" 1218menu "Power management options"
1223 depends on !X86_VOYAGER 1219 depends on !X86_VOYAGER
1224 1220
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 38a83f9c966f..9e2b0ef851de 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -145,15 +145,6 @@ CONFIG_K8_NUMA=y
145CONFIG_NODES_SHIFT=6 145CONFIG_NODES_SHIFT=6
146CONFIG_X86_64_ACPI_NUMA=y 146CONFIG_X86_64_ACPI_NUMA=y
147CONFIG_NUMA_EMU=y 147CONFIG_NUMA_EMU=y
148CONFIG_ARCH_DISCONTIGMEM_ENABLE=y
149CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y
150CONFIG_ARCH_SPARSEMEM_ENABLE=y
151CONFIG_SELECT_MEMORY_MODEL=y
152# CONFIG_FLATMEM_MANUAL is not set
153CONFIG_DISCONTIGMEM_MANUAL=y
154# CONFIG_SPARSEMEM_MANUAL is not set
155CONFIG_DISCONTIGMEM=y
156CONFIG_FLAT_NODE_MEM_MAP=y
157CONFIG_NEED_MULTIPLE_NODES=y 148CONFIG_NEED_MULTIPLE_NODES=y
158# CONFIG_SPARSEMEM_STATIC is not set 149# CONFIG_SPARSEMEM_STATIC is not set
159CONFIG_SPLIT_PTLOCK_CPUS=4 150CONFIG_SPLIT_PTLOCK_CPUS=4
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index aa3d2c8f7737..a1fef42f8cdb 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -234,10 +234,5 @@ NORET_TYPE void machine_kexec(struct kimage *image)
234void arch_crash_save_vmcoreinfo(void) 234void arch_crash_save_vmcoreinfo(void)
235{ 235{
236 VMCOREINFO_SYMBOL(init_level4_pgt); 236 VMCOREINFO_SYMBOL(init_level4_pgt);
237
238#ifdef CONFIG_ARCH_DISCONTIGMEM_ENABLE
239 VMCOREINFO_SYMBOL(node_data);
240 VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
241#endif
242} 237}
243 238
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 0fbb657a8b19..251eeb325ae3 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -486,34 +486,6 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
486 486
487#endif /* CONFIG_MEMORY_HOTPLUG */ 487#endif /* CONFIG_MEMORY_HOTPLUG */
488 488
489#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
490/*
491 * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
492 * just online the pages.
493 */
494int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages)
495{
496 int err = -EIO;
497 unsigned long pfn;
498 unsigned long total = 0, mem = 0;
499 for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
500 if (pfn_valid(pfn)) {
501 online_page(pfn_to_page(pfn));
502 err = 0;
503 mem++;
504 }
505 total++;
506 }
507 if (!err) {
508 z->spanned_pages += total;
509 z->present_pages += mem;
510 z->zone_pgdat->node_spanned_pages += total;
511 z->zone_pgdat->node_present_pages += mem;
512 }
513 return err;
514}
515#endif
516
517static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, 489static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
518 kcore_vsyscall; 490 kcore_vsyscall;
519 491
diff --git a/arch/x86/mm/ioremap_64.c b/arch/x86/mm/ioremap_64.c
index 6cac90aa5032..b03db4ca9cad 100644
--- a/arch/x86/mm/ioremap_64.c
+++ b/arch/x86/mm/ioremap_64.c
@@ -86,23 +86,6 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
86 if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS) 86 if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
87 return (__force void __iomem *)phys_to_virt(phys_addr); 87 return (__force void __iomem *)phys_to_virt(phys_addr);
88 88
89#ifdef CONFIG_FLATMEM
90 /*
91 * Don't allow anybody to remap normal RAM that we're using..
92 */
93 if (last_addr < virt_to_phys(high_memory)) {
94 char *t_addr, *t_end;
95 struct page *page;
96
97 t_addr = __va(phys_addr);
98 t_end = t_addr + (size - 1);
99
100 for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++)
101 if(!PageReserved(page))
102 return NULL;
103 }
104#endif
105
106 pgprot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_GLOBAL 89 pgprot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_GLOBAL
107 | _PAGE_DIRTY | _PAGE_ACCESSED | flags); 90 | _PAGE_DIRTY | _PAGE_ACCESSED | flags);
108 /* 91 /*
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 29b69300aee2..46b4b5e1a02a 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -153,12 +153,10 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes)
153 return shift; 153 return shift;
154} 154}
155 155
156#ifdef CONFIG_SPARSEMEM
157int early_pfn_to_nid(unsigned long pfn) 156int early_pfn_to_nid(unsigned long pfn)
158{ 157{
159 return phys_to_nid(pfn << PAGE_SHIFT); 158 return phys_to_nid(pfn << PAGE_SHIFT);
160} 159}
161#endif
162 160
163static void * __init early_node_mem(int nodeid, unsigned long start, 161static void * __init early_node_mem(int nodeid, unsigned long start,
164 unsigned long end, unsigned long size) 162 unsigned long end, unsigned long size)
@@ -635,23 +633,4 @@ void __init init_cpu_to_node(void)
635 } 633 }
636} 634}
637 635
638#ifdef CONFIG_DISCONTIGMEM
639/*
640 * Functions to convert PFNs from/to per node page addresses.
641 * These are out of line because they are quite big.
642 * They could be all tuned by pre caching more state.
643 * Should do that.
644 */
645 636
646int pfn_valid(unsigned long pfn)
647{
648 unsigned nid;
649 if (pfn >= num_physpages)
650 return 0;
651 nid = pfn_to_nid(pfn);
652 if (nid == 0xff)
653 return 0;
654 return pfn >= node_start_pfn(nid) && (pfn) < node_end_pfn(nid);
655}
656EXPORT_SYMBOL(pfn_valid);
657#endif
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index ea85172fc0cc..9be14171144b 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -151,62 +151,6 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
151 pxm, pa->apic_id, node); 151 pxm, pa->apic_id, node);
152} 152}
153 153
154#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
155/*
156 * Protect against too large hotadd areas that would fill up memory.
157 */
158static int hotadd_enough_memory(struct bootnode *nd)
159{
160 static unsigned long allocated;
161 static unsigned long last_area_end;
162 unsigned long pages = (nd->end - nd->start) >> PAGE_SHIFT;
163 long mem = pages * sizeof(struct page);
164 unsigned long addr;
165 unsigned long allowed;
166 unsigned long oldpages = pages;
167
168 if (mem < 0)
169 return 0;
170 allowed = (end_pfn - absent_pages_in_range(0, end_pfn)) * PAGE_SIZE;
171 allowed = (allowed / 100) * hotadd_percent;
172 if (allocated + mem > allowed) {
173 unsigned long range;
174 /* Give them at least part of their hotadd memory upto hotadd_percent
175 It would be better to spread the limit out
176 over multiple hotplug areas, but that is too complicated
177 right now */
178 if (allocated >= allowed)
179 return 0;
180 range = allowed - allocated;
181 pages = (range / PAGE_SIZE);
182 mem = pages * sizeof(struct page);
183 nd->end = nd->start + range;
184 }
185 /* Not completely fool proof, but a good sanity check */
186 addr = find_e820_area(last_area_end, end_pfn<<PAGE_SHIFT, mem);
187 if (addr == -1UL)
188 return 0;
189 if (pages != oldpages)
190 printk(KERN_NOTICE "SRAT: Hotadd area limited to %lu bytes\n",
191 pages << PAGE_SHIFT);
192 last_area_end = addr + mem;
193 allocated += mem;
194 return 1;
195}
196
197static int update_end_of_memory(unsigned long end)
198{
199 found_add_area = 1;
200 if ((end >> PAGE_SHIFT) > end_pfn)
201 end_pfn = end >> PAGE_SHIFT;
202 return 1;
203}
204
205static inline int save_add_info(void)
206{
207 return hotadd_percent > 0;
208}
209#else
210int update_end_of_memory(unsigned long end) {return -1;} 154int update_end_of_memory(unsigned long end) {return -1;}
211static int hotadd_enough_memory(struct bootnode *nd) {return 1;} 155static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
212#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE 156#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
@@ -214,7 +158,6 @@ static inline int save_add_info(void) {return 1;}
214#else 158#else
215static inline int save_add_info(void) {return 0;} 159static inline int save_add_info(void) {return 0;}
216#endif 160#endif
217#endif
218/* 161/*
219 * Update nodes_add and decide if to include add are in the zone. 162 * Update nodes_add and decide if to include add are in the zone.
220 * Both SPARSE and RESERVE need nodes_add infomation. 163 * Both SPARSE and RESERVE need nodes_add infomation.
diff --git a/include/asm-x86/mmzone_64.h b/include/asm-x86/mmzone_64.h
index 1e0ed34a6adc..b0c25ae111d9 100644
--- a/include/asm-x86/mmzone_64.h
+++ b/include/asm-x86/mmzone_64.h
@@ -43,12 +43,6 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr)
43 43
44extern int early_pfn_to_nid(unsigned long pfn); 44extern int early_pfn_to_nid(unsigned long pfn);
45 45
46#ifdef CONFIG_DISCONTIGMEM
47#define pfn_to_nid(pfn) phys_to_nid((unsigned long)(pfn) << PAGE_SHIFT)
48
49extern int pfn_valid(unsigned long pfn);
50#endif
51
52#ifdef CONFIG_NUMA_EMU 46#ifdef CONFIG_NUMA_EMU
53#define FAKE_NODE_MIN_SIZE (64*1024*1024) 47#define FAKE_NODE_MIN_SIZE (64*1024*1024)
54#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1uL)) 48#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1uL))
diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index 6fdc904a5fa5..d400167c5509 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -122,9 +122,6 @@ extern unsigned long __phys_addr(unsigned long);
122#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) 122#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
123#define __boot_va(x) __va(x) 123#define __boot_va(x) __va(x)
124#define __boot_pa(x) __pa(x) 124#define __boot_pa(x) __pa(x)
125#ifdef CONFIG_FLATMEM
126#define pfn_valid(pfn) ((pfn) < end_pfn)
127#endif
128 125
129#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) 126#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
130#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) 127#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)