diff options
author | Christoph Lameter <clameter@sgi.com> | 2008-01-30 07:30:47 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-01-30 07:30:47 -0500 |
commit | b263295dbffd33b0fbff670720fa178c30e3392a (patch) | |
tree | 014b402cb65b17e1521ed356e5c225083ea578dd /arch | |
parent | 4ebd1290ba12121d66285cc06987ca97bcdfc55b (diff) |
x86: 64-bit, make sparsemem vmemmap the only memory model
Use sparsemem as the only memory model for UP, SMP and NUMA. Measurements
indicate that DISCONTIGMEM has a higher overhead than sparsemem. And
FLATMEMs benefits are minimal. So I think its best to simply standardize
on sparsemem.
Results of page allocator tests (test can be had via git from slab git
tree branch tests)
Measurements in cycle counts. 1000 allocations were performed and then the
average cycle count was calculated.
Order FlatMem Discontig SparseMem
0 639 665 641
1 567 647 593
2 679 774 692
3 763 967 781
4 961 1501 962
5 1356 2344 1392
6 2224 3982 2336
7 4869 7225 5074
8 12500 14048 12732
9 27926 28223 28165
10 58578 58714 58682
(Note that FlatMem is an SMP config and the rest NUMA configurations)
Memory use:
SMP Sparsemem
-------------
Kernel size:
text data bss dec hex filename
3849268 397739 1264856 5511863 541ab7 vmlinux
total used free shared buffers cached
Mem: 8242252 41164 8201088 0 352 11512
-/+ buffers/cache: 29300 8212952
Swap: 9775512 0 9775512
SMP Flatmem
-----------
Kernel size:
text data bss dec hex filename
3844612 397739 1264536 5506887 540747 vmlinux
So 4.5k growth in text size vs. FLATMEM.
total used free shared buffers cached
Mem: 8244052 40544 8203508 0 352 11484
-/+ buffers/cache: 28708 8215344
2k growth in overall memory use after boot.
NUMA discontig:
text data bss dec hex filename
3888124 470659 1276504 5635287 55fcd7 vmlinux
total used free shared buffers cached
Mem: 8256256 56908 8199348 0 352 11496
-/+ buffers/cache: 45060 8211196
Swap: 9775512 0 9775512
NUMA sparse:
text data bss dec hex filename
3896428 470659 1276824 5643911 561e87 vmlinux
8k text growth. Given that we fully inline virt_to_page and friends now
that is rather good.
total used free shared buffers cached
Mem: 8264720 57240 8207480 0 352 11516
-/+ buffers/cache: 45372 8219348
Swap: 9775512 0 9775512
The total available memory is increased by 8k.
This patch makes sparsemem the default and removes discontig and
flatmem support from x86.
[ akpm@linux-foundation.org: allnoconfig build fix ]
Acked-by: Andi Kleen <ak@suse.de>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/Kconfig | 22 | ||||
-rw-r--r-- | arch/x86/configs/x86_64_defconfig | 9 | ||||
-rw-r--r-- | arch/x86/kernel/machine_kexec_64.c | 5 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 28 | ||||
-rw-r--r-- | arch/x86/mm/ioremap_64.c | 17 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 21 | ||||
-rw-r--r-- | arch/x86/mm/srat_64.c | 57 |
7 files changed, 9 insertions, 150 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2f4d88babd36..da98368f66af 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -891,25 +891,29 @@ config HAVE_ARCH_ALLOC_REMAP | |||
891 | 891 | ||
892 | config ARCH_FLATMEM_ENABLE | 892 | config ARCH_FLATMEM_ENABLE |
893 | def_bool y | 893 | def_bool y |
894 | depends on (X86_32 && ARCH_SELECT_MEMORY_MODEL && X86_PC) || (X86_64 && !NUMA) | 894 | depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && X86_PC |
895 | 895 | ||
896 | config ARCH_DISCONTIGMEM_ENABLE | 896 | config ARCH_DISCONTIGMEM_ENABLE |
897 | def_bool y | 897 | def_bool y |
898 | depends on NUMA | 898 | depends on NUMA && X86_32 |
899 | 899 | ||
900 | config ARCH_DISCONTIGMEM_DEFAULT | 900 | config ARCH_DISCONTIGMEM_DEFAULT |
901 | def_bool y | 901 | def_bool y |
902 | depends on NUMA | 902 | depends on NUMA && X86_32 |
903 | |||
904 | config ARCH_SPARSEMEM_DEFAULT | ||
905 | def_bool y | ||
906 | depends on X86_64 | ||
903 | 907 | ||
904 | config ARCH_SPARSEMEM_ENABLE | 908 | config ARCH_SPARSEMEM_ENABLE |
905 | def_bool y | 909 | def_bool y |
906 | depends on NUMA || (EXPERIMENTAL && (X86_PC || X86_64)) | 910 | depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC) |
907 | select SPARSEMEM_STATIC if X86_32 | 911 | select SPARSEMEM_STATIC if X86_32 |
908 | select SPARSEMEM_VMEMMAP_ENABLE if X86_64 | 912 | select SPARSEMEM_VMEMMAP_ENABLE if X86_64 |
909 | 913 | ||
910 | config ARCH_SELECT_MEMORY_MODEL | 914 | config ARCH_SELECT_MEMORY_MODEL |
911 | def_bool y | 915 | def_bool y |
912 | depends on X86_32 && ARCH_SPARSEMEM_ENABLE | 916 | depends on ARCH_SPARSEMEM_ENABLE |
913 | 917 | ||
914 | config ARCH_MEMORY_PROBE | 918 | config ARCH_MEMORY_PROBE |
915 | def_bool X86_64 | 919 | def_bool X86_64 |
@@ -1207,18 +1211,10 @@ config ARCH_ENABLE_MEMORY_HOTPLUG | |||
1207 | def_bool y | 1211 | def_bool y |
1208 | depends on X86_64 || (X86_32 && HIGHMEM) | 1212 | depends on X86_64 || (X86_32 && HIGHMEM) |
1209 | 1213 | ||
1210 | config MEMORY_HOTPLUG_RESERVE | ||
1211 | def_bool X86_64 | ||
1212 | depends on (MEMORY_HOTPLUG && DISCONTIGMEM) | ||
1213 | |||
1214 | config HAVE_ARCH_EARLY_PFN_TO_NID | 1214 | config HAVE_ARCH_EARLY_PFN_TO_NID |
1215 | def_bool X86_64 | 1215 | def_bool X86_64 |
1216 | depends on NUMA | 1216 | depends on NUMA |
1217 | 1217 | ||
1218 | config OUT_OF_LINE_PFN_TO_PAGE | ||
1219 | def_bool X86_64 | ||
1220 | depends on DISCONTIGMEM | ||
1221 | |||
1222 | menu "Power management options" | 1218 | menu "Power management options" |
1223 | depends on !X86_VOYAGER | 1219 | depends on !X86_VOYAGER |
1224 | 1220 | ||
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 38a83f9c966f..9e2b0ef851de 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig | |||
@@ -145,15 +145,6 @@ CONFIG_K8_NUMA=y | |||
145 | CONFIG_NODES_SHIFT=6 | 145 | CONFIG_NODES_SHIFT=6 |
146 | CONFIG_X86_64_ACPI_NUMA=y | 146 | CONFIG_X86_64_ACPI_NUMA=y |
147 | CONFIG_NUMA_EMU=y | 147 | CONFIG_NUMA_EMU=y |
148 | CONFIG_ARCH_DISCONTIGMEM_ENABLE=y | ||
149 | CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y | ||
150 | CONFIG_ARCH_SPARSEMEM_ENABLE=y | ||
151 | CONFIG_SELECT_MEMORY_MODEL=y | ||
152 | # CONFIG_FLATMEM_MANUAL is not set | ||
153 | CONFIG_DISCONTIGMEM_MANUAL=y | ||
154 | # CONFIG_SPARSEMEM_MANUAL is not set | ||
155 | CONFIG_DISCONTIGMEM=y | ||
156 | CONFIG_FLAT_NODE_MEM_MAP=y | ||
157 | CONFIG_NEED_MULTIPLE_NODES=y | 148 | CONFIG_NEED_MULTIPLE_NODES=y |
158 | # CONFIG_SPARSEMEM_STATIC is not set | 149 | # CONFIG_SPARSEMEM_STATIC is not set |
159 | CONFIG_SPLIT_PTLOCK_CPUS=4 | 150 | CONFIG_SPLIT_PTLOCK_CPUS=4 |
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index aa3d2c8f7737..a1fef42f8cdb 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c | |||
@@ -234,10 +234,5 @@ NORET_TYPE void machine_kexec(struct kimage *image) | |||
234 | void arch_crash_save_vmcoreinfo(void) | 234 | void arch_crash_save_vmcoreinfo(void) |
235 | { | 235 | { |
236 | VMCOREINFO_SYMBOL(init_level4_pgt); | 236 | VMCOREINFO_SYMBOL(init_level4_pgt); |
237 | |||
238 | #ifdef CONFIG_ARCH_DISCONTIGMEM_ENABLE | ||
239 | VMCOREINFO_SYMBOL(node_data); | ||
240 | VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); | ||
241 | #endif | ||
242 | } | 237 | } |
243 | 238 | ||
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 0fbb657a8b19..251eeb325ae3 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -486,34 +486,6 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); | |||
486 | 486 | ||
487 | #endif /* CONFIG_MEMORY_HOTPLUG */ | 487 | #endif /* CONFIG_MEMORY_HOTPLUG */ |
488 | 488 | ||
489 | #ifdef CONFIG_MEMORY_HOTPLUG_RESERVE | ||
490 | /* | ||
491 | * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance, | ||
492 | * just online the pages. | ||
493 | */ | ||
494 | int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages) | ||
495 | { | ||
496 | int err = -EIO; | ||
497 | unsigned long pfn; | ||
498 | unsigned long total = 0, mem = 0; | ||
499 | for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) { | ||
500 | if (pfn_valid(pfn)) { | ||
501 | online_page(pfn_to_page(pfn)); | ||
502 | err = 0; | ||
503 | mem++; | ||
504 | } | ||
505 | total++; | ||
506 | } | ||
507 | if (!err) { | ||
508 | z->spanned_pages += total; | ||
509 | z->present_pages += mem; | ||
510 | z->zone_pgdat->node_spanned_pages += total; | ||
511 | z->zone_pgdat->node_present_pages += mem; | ||
512 | } | ||
513 | return err; | ||
514 | } | ||
515 | #endif | ||
516 | |||
517 | static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, | 489 | static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, |
518 | kcore_vsyscall; | 490 | kcore_vsyscall; |
519 | 491 | ||
diff --git a/arch/x86/mm/ioremap_64.c b/arch/x86/mm/ioremap_64.c index 6cac90aa5032..b03db4ca9cad 100644 --- a/arch/x86/mm/ioremap_64.c +++ b/arch/x86/mm/ioremap_64.c | |||
@@ -86,23 +86,6 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l | |||
86 | if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS) | 86 | if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS) |
87 | return (__force void __iomem *)phys_to_virt(phys_addr); | 87 | return (__force void __iomem *)phys_to_virt(phys_addr); |
88 | 88 | ||
89 | #ifdef CONFIG_FLATMEM | ||
90 | /* | ||
91 | * Don't allow anybody to remap normal RAM that we're using.. | ||
92 | */ | ||
93 | if (last_addr < virt_to_phys(high_memory)) { | ||
94 | char *t_addr, *t_end; | ||
95 | struct page *page; | ||
96 | |||
97 | t_addr = __va(phys_addr); | ||
98 | t_end = t_addr + (size - 1); | ||
99 | |||
100 | for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++) | ||
101 | if(!PageReserved(page)) | ||
102 | return NULL; | ||
103 | } | ||
104 | #endif | ||
105 | |||
106 | pgprot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_GLOBAL | 89 | pgprot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_GLOBAL |
107 | | _PAGE_DIRTY | _PAGE_ACCESSED | flags); | 90 | | _PAGE_DIRTY | _PAGE_ACCESSED | flags); |
108 | /* | 91 | /* |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 29b69300aee2..46b4b5e1a02a 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -153,12 +153,10 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes) | |||
153 | return shift; | 153 | return shift; |
154 | } | 154 | } |
155 | 155 | ||
156 | #ifdef CONFIG_SPARSEMEM | ||
157 | int early_pfn_to_nid(unsigned long pfn) | 156 | int early_pfn_to_nid(unsigned long pfn) |
158 | { | 157 | { |
159 | return phys_to_nid(pfn << PAGE_SHIFT); | 158 | return phys_to_nid(pfn << PAGE_SHIFT); |
160 | } | 159 | } |
161 | #endif | ||
162 | 160 | ||
163 | static void * __init early_node_mem(int nodeid, unsigned long start, | 161 | static void * __init early_node_mem(int nodeid, unsigned long start, |
164 | unsigned long end, unsigned long size) | 162 | unsigned long end, unsigned long size) |
@@ -635,23 +633,4 @@ void __init init_cpu_to_node(void) | |||
635 | } | 633 | } |
636 | } | 634 | } |
637 | 635 | ||
638 | #ifdef CONFIG_DISCONTIGMEM | ||
639 | /* | ||
640 | * Functions to convert PFNs from/to per node page addresses. | ||
641 | * These are out of line because they are quite big. | ||
642 | * They could be all tuned by pre caching more state. | ||
643 | * Should do that. | ||
644 | */ | ||
645 | 636 | ||
646 | int pfn_valid(unsigned long pfn) | ||
647 | { | ||
648 | unsigned nid; | ||
649 | if (pfn >= num_physpages) | ||
650 | return 0; | ||
651 | nid = pfn_to_nid(pfn); | ||
652 | if (nid == 0xff) | ||
653 | return 0; | ||
654 | return pfn >= node_start_pfn(nid) && (pfn) < node_end_pfn(nid); | ||
655 | } | ||
656 | EXPORT_SYMBOL(pfn_valid); | ||
657 | #endif | ||
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index ea85172fc0cc..9be14171144b 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -151,62 +151,6 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) | |||
151 | pxm, pa->apic_id, node); | 151 | pxm, pa->apic_id, node); |
152 | } | 152 | } |
153 | 153 | ||
154 | #ifdef CONFIG_MEMORY_HOTPLUG_RESERVE | ||
155 | /* | ||
156 | * Protect against too large hotadd areas that would fill up memory. | ||
157 | */ | ||
158 | static int hotadd_enough_memory(struct bootnode *nd) | ||
159 | { | ||
160 | static unsigned long allocated; | ||
161 | static unsigned long last_area_end; | ||
162 | unsigned long pages = (nd->end - nd->start) >> PAGE_SHIFT; | ||
163 | long mem = pages * sizeof(struct page); | ||
164 | unsigned long addr; | ||
165 | unsigned long allowed; | ||
166 | unsigned long oldpages = pages; | ||
167 | |||
168 | if (mem < 0) | ||
169 | return 0; | ||
170 | allowed = (end_pfn - absent_pages_in_range(0, end_pfn)) * PAGE_SIZE; | ||
171 | allowed = (allowed / 100) * hotadd_percent; | ||
172 | if (allocated + mem > allowed) { | ||
173 | unsigned long range; | ||
174 | /* Give them at least part of their hotadd memory upto hotadd_percent | ||
175 | It would be better to spread the limit out | ||
176 | over multiple hotplug areas, but that is too complicated | ||
177 | right now */ | ||
178 | if (allocated >= allowed) | ||
179 | return 0; | ||
180 | range = allowed - allocated; | ||
181 | pages = (range / PAGE_SIZE); | ||
182 | mem = pages * sizeof(struct page); | ||
183 | nd->end = nd->start + range; | ||
184 | } | ||
185 | /* Not completely fool proof, but a good sanity check */ | ||
186 | addr = find_e820_area(last_area_end, end_pfn<<PAGE_SHIFT, mem); | ||
187 | if (addr == -1UL) | ||
188 | return 0; | ||
189 | if (pages != oldpages) | ||
190 | printk(KERN_NOTICE "SRAT: Hotadd area limited to %lu bytes\n", | ||
191 | pages << PAGE_SHIFT); | ||
192 | last_area_end = addr + mem; | ||
193 | allocated += mem; | ||
194 | return 1; | ||
195 | } | ||
196 | |||
197 | static int update_end_of_memory(unsigned long end) | ||
198 | { | ||
199 | found_add_area = 1; | ||
200 | if ((end >> PAGE_SHIFT) > end_pfn) | ||
201 | end_pfn = end >> PAGE_SHIFT; | ||
202 | return 1; | ||
203 | } | ||
204 | |||
205 | static inline int save_add_info(void) | ||
206 | { | ||
207 | return hotadd_percent > 0; | ||
208 | } | ||
209 | #else | ||
210 | int update_end_of_memory(unsigned long end) {return -1;} | 154 | int update_end_of_memory(unsigned long end) {return -1;} |
211 | static int hotadd_enough_memory(struct bootnode *nd) {return 1;} | 155 | static int hotadd_enough_memory(struct bootnode *nd) {return 1;} |
212 | #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE | 156 | #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE |
@@ -214,7 +158,6 @@ static inline int save_add_info(void) {return 1;} | |||
214 | #else | 158 | #else |
215 | static inline int save_add_info(void) {return 0;} | 159 | static inline int save_add_info(void) {return 0;} |
216 | #endif | 160 | #endif |
217 | #endif | ||
218 | /* | 161 | /* |
219 | * Update nodes_add and decide if to include add are in the zone. | 162 | * Update nodes_add and decide if to include add are in the zone. |
220 | * Both SPARSE and RESERVE need nodes_add infomation. | 163 | * Both SPARSE and RESERVE need nodes_add infomation. |