aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMel Gorman <mel@csn.ul.ie>2008-01-30 07:32:54 -0500
committerIngo Molnar <mingo@elte.hu>2008-01-30 07:32:54 -0500
commitbac4894dfa9c75c297b905e2ea88caaa5768f1e2 (patch)
tree09310d1824cf279e4322cf95e4ae407402d47efb
parent87e8407f9ad2a2df901c4b690ab0a2bf0fb168c5 (diff)
x86: make NUMA work on 32-bit again
On 32-bit NUMA, the memmap representing struct pages on each node is allocated from node-local memory if possible. As only node-0 has memory from ZONE_NORMAL, the memmap must be mapped into low memory. This is done by reserving space in the Kernel Virtual Area (KVA) for the memmap belonging to other nodes by taking pages from the end of ZONE_NORMAL and remapping the other nodes memmap into those virtual addresses. The node boundaries are then adjusted so that the region of pages is not used and it is marked as reserved in the bootmem allocator. This reserved portion of the KVA is PMD aligned althought strictly speaking that requirement could be lifted (see thread at http://lkml.org/lkml/2007/8/24/220). The problem is that when aligned, there may be a portion of ZONE_NORMAL at the end that is not used for memmap and does not have an initialised memmap nor is it marked reserved in the bootmem allocator. Later in the boot process, these pages are freed and a storm of Bad page state messages result. This patch marks these pages reserved that are wasted due to alignment in the bootmem allocator so they are not accidently freed. It is worth noting that memory from node-0 is wasted where it could have been put into ZONE_HIGHMEM on NUMA machines. Worse, the KVA is always reserved from the location of real memory even when there is plenty of spare virtual address space. This patch also makes sure that reserve_bootmem() is not called with a 0-length size in numa_kva_reserve(). When this happens, it usually means that a kernel built for Summit is being booted on a normal machine. The resulting BUG_ON() is misleading so it is caught here. Signed-off-by: Mel Gorman <mel@csn.ul.ie> Signed-off-by: Andy Whitcroft <apw@shadowen.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/mm/discontig_32.c13
1 files changed, 11 insertions, 2 deletions
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index 88a7499e8e48..9f1d02cfde37 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -268,6 +268,7 @@ unsigned long __init setup_memory(void)
268{ 268{
269 int nid; 269 int nid;
270 unsigned long system_start_pfn, system_max_low_pfn; 270 unsigned long system_start_pfn, system_max_low_pfn;
271 unsigned long wasted_pages;
271 272
272 /* 273 /*
273 * When mapping a NUMA machine we allocate the node_mem_map arrays 274 * When mapping a NUMA machine we allocate the node_mem_map arrays
@@ -292,7 +293,14 @@ unsigned long __init setup_memory(void)
292 kva_start_pfn = PFN_DOWN(initrd_start - PAGE_OFFSET) 293 kva_start_pfn = PFN_DOWN(initrd_start - PAGE_OFFSET)
293 - kva_pages; 294 - kva_pages;
294#endif 295#endif
295 kva_start_pfn -= kva_start_pfn & (PTRS_PER_PTE-1); 296
297 /*
298 * We waste pages past at the end of the KVA for no good reason other
299 * than how it is located. This is bad.
300 */
301 wasted_pages = kva_start_pfn & (PTRS_PER_PTE-1);
302 kva_start_pfn -= wasted_pages;
303 kva_pages += wasted_pages;
296 304
297 system_max_low_pfn = max_low_pfn = find_max_low_pfn(); 305 system_max_low_pfn = max_low_pfn = find_max_low_pfn();
298 printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n", 306 printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n",
@@ -345,7 +353,8 @@ unsigned long __init setup_memory(void)
345 353
346void __init numa_kva_reserve(void) 354void __init numa_kva_reserve(void)
347{ 355{
348 reserve_bootmem(PFN_PHYS(kva_start_pfn),PFN_PHYS(kva_pages)); 356 if (kva_pages)
357 reserve_bootmem(PFN_PHYS(kva_start_pfn), PFN_PHYS(kva_pages));
349} 358}
350 359
351void __init zone_sizes_init(void) 360void __init zone_sizes_init(void)