aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkeith mannthey <kmannth@us.ibm.com>2006-09-26 02:31:03 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-09-26 11:48:45 -0400
commit91023300057e96de7f46e95166a3e02394ae72f9 (patch)
treeb28306089d7f5631bb023c7657808380359df316
parentb221385bc41d6789edde3d2fa0cb20d5045730eb (diff)
[PATCH] convert i386 NUMA KVA space to bootmem
Address a long standing issue of booting with an initrd on an i386 numa system. Currently (and always) the numa kva area is mapped into low memory by finding the end of low memory and moving that mark down (thus creating space for the kva). The issue with this is that Grub loads initrds into this similar space so when the kernel check the initrd it finds it outside max_low_pfn and disables it (it thinks the initrd is not mapped into usable memory) thus initrd enabled kernels can't boot i386 numa :( My solution to the problem just converts the numa kva area to use the bootmem allocator to save it's area (instead of moving the end of low memory). Using bootmem allows the kva area to be mapped into more diverse addresses (not just the end of low memory) and enables the kva area to be mapped below the initrd if present. I have tested this patch on numaq(no initrd) and summit(initrd) i386 numa based systems. [akpm@osdl.org: cleanups] Signed-off-by: Keith Mannthey <kmannth@us.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--arch/i386/kernel/setup.c3
-rw-r--r--arch/i386/mm/discontig.c29
-rw-r--r--include/asm-i386/mmzone.h6
3 files changed, 29 insertions, 9 deletions
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index f1682206d304..27d4dc0d3ef1 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -53,6 +53,7 @@
53#include <asm/apic.h> 53#include <asm/apic.h>
54#include <asm/e820.h> 54#include <asm/e820.h>
55#include <asm/mpspec.h> 55#include <asm/mpspec.h>
56#include <asm/mmzone.h>
56#include <asm/setup.h> 57#include <asm/setup.h>
57#include <asm/arch_hooks.h> 58#include <asm/arch_hooks.h>
58#include <asm/sections.h> 59#include <asm/sections.h>
@@ -1258,7 +1259,7 @@ void __init setup_bootmem_allocator(void)
1258 */ 1259 */
1259 find_smp_config(); 1260 find_smp_config();
1260#endif 1261#endif
1261 1262 numa_kva_reserve();
1262#ifdef CONFIG_BLK_DEV_INITRD 1263#ifdef CONFIG_BLK_DEV_INITRD
1263 if (LOADER_TYPE && INITRD_START) { 1264 if (LOADER_TYPE && INITRD_START) {
1264 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) { 1265 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c
index 7c392dc553b8..2e36eff8aff9 100644
--- a/arch/i386/mm/discontig.c
+++ b/arch/i386/mm/discontig.c
@@ -117,7 +117,8 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
117 117
118void *node_remap_end_vaddr[MAX_NUMNODES]; 118void *node_remap_end_vaddr[MAX_NUMNODES];
119void *node_remap_alloc_vaddr[MAX_NUMNODES]; 119void *node_remap_alloc_vaddr[MAX_NUMNODES];
120 120static unsigned long kva_start_pfn;
121static unsigned long kva_pages;
121/* 122/*
122 * FLAT - support for basic PC memory model with discontig enabled, essentially 123 * FLAT - support for basic PC memory model with discontig enabled, essentially
123 * a single node with all available processors in it with a flat 124 * a single node with all available processors in it with a flat
@@ -286,7 +287,6 @@ unsigned long __init setup_memory(void)
286{ 287{
287 int nid; 288 int nid;
288 unsigned long system_start_pfn, system_max_low_pfn; 289 unsigned long system_start_pfn, system_max_low_pfn;
289 unsigned long reserve_pages;
290 290
291 /* 291 /*
292 * When mapping a NUMA machine we allocate the node_mem_map arrays 292 * When mapping a NUMA machine we allocate the node_mem_map arrays
@@ -298,14 +298,23 @@ unsigned long __init setup_memory(void)
298 find_max_pfn(); 298 find_max_pfn();
299 get_memcfg_numa(); 299 get_memcfg_numa();
300 300
301 reserve_pages = calculate_numa_remap_pages(); 301 kva_pages = calculate_numa_remap_pages();
302 302
303 /* partially used pages are not usable - thus round upwards */ 303 /* partially used pages are not usable - thus round upwards */
304 system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end); 304 system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end);
305 305
306 system_max_low_pfn = max_low_pfn = find_max_low_pfn() - reserve_pages; 306 kva_start_pfn = find_max_low_pfn() - kva_pages;
307 printk("reserve_pages = %ld find_max_low_pfn() ~ %ld\n", 307
308 reserve_pages, max_low_pfn + reserve_pages); 308#ifdef CONFIG_BLK_DEV_INITRD
309 /* Numa kva area is below the initrd */
310 if (LOADER_TYPE && INITRD_START)
311 kva_start_pfn = PFN_DOWN(INITRD_START) - kva_pages;
312#endif
313 kva_start_pfn -= kva_start_pfn & (PTRS_PER_PTE-1);
314
315 system_max_low_pfn = max_low_pfn = find_max_low_pfn();
316 printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n",
317 kva_start_pfn, max_low_pfn);
309 printk("max_pfn = %ld\n", max_pfn); 318 printk("max_pfn = %ld\n", max_pfn);
310#ifdef CONFIG_HIGHMEM 319#ifdef CONFIG_HIGHMEM
311 highstart_pfn = highend_pfn = max_pfn; 320 highstart_pfn = highend_pfn = max_pfn;
@@ -323,7 +332,7 @@ unsigned long __init setup_memory(void)
323 (ulong) pfn_to_kaddr(max_low_pfn)); 332 (ulong) pfn_to_kaddr(max_low_pfn));
324 for_each_online_node(nid) { 333 for_each_online_node(nid) {
325 node_remap_start_vaddr[nid] = pfn_to_kaddr( 334 node_remap_start_vaddr[nid] = pfn_to_kaddr(
326 highstart_pfn + node_remap_offset[nid]); 335 kva_start_pfn + node_remap_offset[nid]);
327 /* Init the node remap allocator */ 336 /* Init the node remap allocator */
328 node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + 337 node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] +
329 (node_remap_size[nid] * PAGE_SIZE); 338 (node_remap_size[nid] * PAGE_SIZE);
@@ -338,7 +347,6 @@ unsigned long __init setup_memory(void)
338 } 347 }
339 printk("High memory starts at vaddr %08lx\n", 348 printk("High memory starts at vaddr %08lx\n",
340 (ulong) pfn_to_kaddr(highstart_pfn)); 349 (ulong) pfn_to_kaddr(highstart_pfn));
341 vmalloc_earlyreserve = reserve_pages * PAGE_SIZE;
342 for_each_online_node(nid) 350 for_each_online_node(nid)
343 find_max_pfn_node(nid); 351 find_max_pfn_node(nid);
344 352
@@ -348,6 +356,11 @@ unsigned long __init setup_memory(void)
348 return max_low_pfn; 356 return max_low_pfn;
349} 357}
350 358
359void __init numa_kva_reserve(void)
360{
361 reserve_bootmem(PFN_PHYS(kva_start_pfn),PFN_PHYS(kva_pages));
362}
363
351void __init zone_sizes_init(void) 364void __init zone_sizes_init(void)
352{ 365{
353 int nid; 366 int nid;
diff --git a/include/asm-i386/mmzone.h b/include/asm-i386/mmzone.h
index 22cb07cc8f32..61b073322006 100644
--- a/include/asm-i386/mmzone.h
+++ b/include/asm-i386/mmzone.h
@@ -38,10 +38,16 @@ static inline void get_memcfg_numa(void)
38} 38}
39 39
40extern int early_pfn_to_nid(unsigned long pfn); 40extern int early_pfn_to_nid(unsigned long pfn);
41extern void numa_kva_reserve(void);
41 42
42#else /* !CONFIG_NUMA */ 43#else /* !CONFIG_NUMA */
44
43#define get_memcfg_numa get_memcfg_numa_flat 45#define get_memcfg_numa get_memcfg_numa_flat
44#define get_zholes_size(n) (0) 46#define get_zholes_size(n) (0)
47
48static inline void numa_kva_reserve(void)
49{
50}
45#endif /* CONFIG_NUMA */ 51#endif /* CONFIG_NUMA */
46 52
47#ifdef CONFIG_DISCONTIGMEM 53#ifdef CONFIG_DISCONTIGMEM