diff options
author | keith mannthey <kmannth@us.ibm.com> | 2006-09-26 02:31:03 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-09-26 11:48:45 -0400 |
commit | 91023300057e96de7f46e95166a3e02394ae72f9 (patch) | |
tree | b28306089d7f5631bb023c7657808380359df316 | |
parent | b221385bc41d6789edde3d2fa0cb20d5045730eb (diff) |
[PATCH] convert i386 NUMA KVA space to bootmem
Address a long standing issue of booting with an initrd on an i386 numa
system. Currently (and always) the numa kva area is mapped into low memory
by finding the end of low memory and moving that mark down (thus creating
space for the kva). The issue with this is that Grub loads initrds into
this similar space so when the kernel check the initrd it finds it outside
max_low_pfn and disables it (it thinks the initrd is not mapped into usable
memory) thus initrd enabled kernels can't boot i386 numa :(
My solution to the problem just converts the numa kva area to use the
bootmem allocator to save it's area (instead of moving the end of low
memory). Using bootmem allows the kva area to be mapped into more diverse
addresses (not just the end of low memory) and enables the kva area to be
mapped below the initrd if present.
I have tested this patch on numaq(no initrd) and summit(initrd) i386 numa
based systems.
[akpm@osdl.org: cleanups]
Signed-off-by: Keith Mannthey <kmannth@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | arch/i386/kernel/setup.c | 3 | ||||
-rw-r--r-- | arch/i386/mm/discontig.c | 29 | ||||
-rw-r--r-- | include/asm-i386/mmzone.h | 6 |
3 files changed, 29 insertions, 9 deletions
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index f1682206d304..27d4dc0d3ef1 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c | |||
@@ -53,6 +53,7 @@ | |||
53 | #include <asm/apic.h> | 53 | #include <asm/apic.h> |
54 | #include <asm/e820.h> | 54 | #include <asm/e820.h> |
55 | #include <asm/mpspec.h> | 55 | #include <asm/mpspec.h> |
56 | #include <asm/mmzone.h> | ||
56 | #include <asm/setup.h> | 57 | #include <asm/setup.h> |
57 | #include <asm/arch_hooks.h> | 58 | #include <asm/arch_hooks.h> |
58 | #include <asm/sections.h> | 59 | #include <asm/sections.h> |
@@ -1258,7 +1259,7 @@ void __init setup_bootmem_allocator(void) | |||
1258 | */ | 1259 | */ |
1259 | find_smp_config(); | 1260 | find_smp_config(); |
1260 | #endif | 1261 | #endif |
1261 | 1262 | numa_kva_reserve(); | |
1262 | #ifdef CONFIG_BLK_DEV_INITRD | 1263 | #ifdef CONFIG_BLK_DEV_INITRD |
1263 | if (LOADER_TYPE && INITRD_START) { | 1264 | if (LOADER_TYPE && INITRD_START) { |
1264 | if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) { | 1265 | if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) { |
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index 7c392dc553b8..2e36eff8aff9 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c | |||
@@ -117,7 +117,8 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); | |||
117 | 117 | ||
118 | void *node_remap_end_vaddr[MAX_NUMNODES]; | 118 | void *node_remap_end_vaddr[MAX_NUMNODES]; |
119 | void *node_remap_alloc_vaddr[MAX_NUMNODES]; | 119 | void *node_remap_alloc_vaddr[MAX_NUMNODES]; |
120 | 120 | static unsigned long kva_start_pfn; | |
121 | static unsigned long kva_pages; | ||
121 | /* | 122 | /* |
122 | * FLAT - support for basic PC memory model with discontig enabled, essentially | 123 | * FLAT - support for basic PC memory model with discontig enabled, essentially |
123 | * a single node with all available processors in it with a flat | 124 | * a single node with all available processors in it with a flat |
@@ -286,7 +287,6 @@ unsigned long __init setup_memory(void) | |||
286 | { | 287 | { |
287 | int nid; | 288 | int nid; |
288 | unsigned long system_start_pfn, system_max_low_pfn; | 289 | unsigned long system_start_pfn, system_max_low_pfn; |
289 | unsigned long reserve_pages; | ||
290 | 290 | ||
291 | /* | 291 | /* |
292 | * When mapping a NUMA machine we allocate the node_mem_map arrays | 292 | * When mapping a NUMA machine we allocate the node_mem_map arrays |
@@ -298,14 +298,23 @@ unsigned long __init setup_memory(void) | |||
298 | find_max_pfn(); | 298 | find_max_pfn(); |
299 | get_memcfg_numa(); | 299 | get_memcfg_numa(); |
300 | 300 | ||
301 | reserve_pages = calculate_numa_remap_pages(); | 301 | kva_pages = calculate_numa_remap_pages(); |
302 | 302 | ||
303 | /* partially used pages are not usable - thus round upwards */ | 303 | /* partially used pages are not usable - thus round upwards */ |
304 | system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end); | 304 | system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end); |
305 | 305 | ||
306 | system_max_low_pfn = max_low_pfn = find_max_low_pfn() - reserve_pages; | 306 | kva_start_pfn = find_max_low_pfn() - kva_pages; |
307 | printk("reserve_pages = %ld find_max_low_pfn() ~ %ld\n", | 307 | |
308 | reserve_pages, max_low_pfn + reserve_pages); | 308 | #ifdef CONFIG_BLK_DEV_INITRD |
309 | /* Numa kva area is below the initrd */ | ||
310 | if (LOADER_TYPE && INITRD_START) | ||
311 | kva_start_pfn = PFN_DOWN(INITRD_START) - kva_pages; | ||
312 | #endif | ||
313 | kva_start_pfn -= kva_start_pfn & (PTRS_PER_PTE-1); | ||
314 | |||
315 | system_max_low_pfn = max_low_pfn = find_max_low_pfn(); | ||
316 | printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n", | ||
317 | kva_start_pfn, max_low_pfn); | ||
309 | printk("max_pfn = %ld\n", max_pfn); | 318 | printk("max_pfn = %ld\n", max_pfn); |
310 | #ifdef CONFIG_HIGHMEM | 319 | #ifdef CONFIG_HIGHMEM |
311 | highstart_pfn = highend_pfn = max_pfn; | 320 | highstart_pfn = highend_pfn = max_pfn; |
@@ -323,7 +332,7 @@ unsigned long __init setup_memory(void) | |||
323 | (ulong) pfn_to_kaddr(max_low_pfn)); | 332 | (ulong) pfn_to_kaddr(max_low_pfn)); |
324 | for_each_online_node(nid) { | 333 | for_each_online_node(nid) { |
325 | node_remap_start_vaddr[nid] = pfn_to_kaddr( | 334 | node_remap_start_vaddr[nid] = pfn_to_kaddr( |
326 | highstart_pfn + node_remap_offset[nid]); | 335 | kva_start_pfn + node_remap_offset[nid]); |
327 | /* Init the node remap allocator */ | 336 | /* Init the node remap allocator */ |
328 | node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + | 337 | node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + |
329 | (node_remap_size[nid] * PAGE_SIZE); | 338 | (node_remap_size[nid] * PAGE_SIZE); |
@@ -338,7 +347,6 @@ unsigned long __init setup_memory(void) | |||
338 | } | 347 | } |
339 | printk("High memory starts at vaddr %08lx\n", | 348 | printk("High memory starts at vaddr %08lx\n", |
340 | (ulong) pfn_to_kaddr(highstart_pfn)); | 349 | (ulong) pfn_to_kaddr(highstart_pfn)); |
341 | vmalloc_earlyreserve = reserve_pages * PAGE_SIZE; | ||
342 | for_each_online_node(nid) | 350 | for_each_online_node(nid) |
343 | find_max_pfn_node(nid); | 351 | find_max_pfn_node(nid); |
344 | 352 | ||
@@ -348,6 +356,11 @@ unsigned long __init setup_memory(void) | |||
348 | return max_low_pfn; | 356 | return max_low_pfn; |
349 | } | 357 | } |
350 | 358 | ||
359 | void __init numa_kva_reserve(void) | ||
360 | { | ||
361 | reserve_bootmem(PFN_PHYS(kva_start_pfn),PFN_PHYS(kva_pages)); | ||
362 | } | ||
363 | |||
351 | void __init zone_sizes_init(void) | 364 | void __init zone_sizes_init(void) |
352 | { | 365 | { |
353 | int nid; | 366 | int nid; |
diff --git a/include/asm-i386/mmzone.h b/include/asm-i386/mmzone.h index 22cb07cc8f32..61b073322006 100644 --- a/include/asm-i386/mmzone.h +++ b/include/asm-i386/mmzone.h | |||
@@ -38,10 +38,16 @@ static inline void get_memcfg_numa(void) | |||
38 | } | 38 | } |
39 | 39 | ||
40 | extern int early_pfn_to_nid(unsigned long pfn); | 40 | extern int early_pfn_to_nid(unsigned long pfn); |
41 | extern void numa_kva_reserve(void); | ||
41 | 42 | ||
42 | #else /* !CONFIG_NUMA */ | 43 | #else /* !CONFIG_NUMA */ |
44 | |||
43 | #define get_memcfg_numa get_memcfg_numa_flat | 45 | #define get_memcfg_numa get_memcfg_numa_flat |
44 | #define get_zholes_size(n) (0) | 46 | #define get_zholes_size(n) (0) |
47 | |||
48 | static inline void numa_kva_reserve(void) | ||
49 | { | ||
50 | } | ||
45 | #endif /* CONFIG_NUMA */ | 51 | #endif /* CONFIG_NUMA */ |
46 | 52 | ||
47 | #ifdef CONFIG_DISCONTIGMEM | 53 | #ifdef CONFIG_DISCONTIGMEM |