diff options
author | Mel Gorman <mel@csn.ul.ie> | 2008-01-30 07:33:25 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-01-30 07:33:25 -0500 |
commit | 1b000a5dbeb2f34bc03d45ebdf3f6d24a60c3aed (patch) | |
tree | b5641d5d0548110b3bb33ba4abf3b37f2b34c0d5 | |
parent | a5ff677c2fb10567d1e750fb9e4417d95081071b (diff) |
x86: make NUMA work on 32-bit
The DISCONTIG memory model on x86 32 bit uses a remap allocator early
in boot. The objective is that portions of every node are mapped in to
the kernel virtual area (KVA) in place of ZONE_NORMAL so that node-local
allocations can be made for pgdat and mem_map structures.
With SPARSEMEM, the amount that is set aside is insufficient for all the
mem_maps to be allocated. During the boot process, it falls back to using
the bootmem allocator. This breaks assumptions that SPARSEMEM makes about
the layout of the mem_map in memory and results in a VM_BUG_ON triggering
due to pfn_to_page() returning garbage values.
This patch only enables the remap allocator for use with DISCONTIG.
Without SRAT support, a compile-error occurs because ACPI table parsing
functions are only available in x86-64. This patch also adds no-op stubs
and prints a warning message. What likely needs to be done is sharing
the table parsing functions between 32 and 64 bit if they are
compatible.
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r-- | arch/x86/mm/discontig_32.c | 93 |
1 files changed, 78 insertions, 15 deletions
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index 9f1d02cfde37..04b1d20e2613 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <linux/kexec.h> | 32 | #include <linux/kexec.h> |
33 | #include <linux/pfn.h> | 33 | #include <linux/pfn.h> |
34 | #include <linux/swap.h> | 34 | #include <linux/swap.h> |
35 | #include <linux/acpi.h> | ||
35 | 36 | ||
36 | #include <asm/e820.h> | 37 | #include <asm/e820.h> |
37 | #include <asm/setup.h> | 38 | #include <asm/setup.h> |
@@ -103,14 +104,10 @@ extern unsigned long highend_pfn, highstart_pfn; | |||
103 | 104 | ||
104 | #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) | 105 | #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) |
105 | 106 | ||
106 | static unsigned long node_remap_start_pfn[MAX_NUMNODES]; | ||
107 | unsigned long node_remap_size[MAX_NUMNODES]; | 107 | unsigned long node_remap_size[MAX_NUMNODES]; |
108 | static unsigned long node_remap_offset[MAX_NUMNODES]; | ||
109 | static void *node_remap_start_vaddr[MAX_NUMNODES]; | 108 | static void *node_remap_start_vaddr[MAX_NUMNODES]; |
110 | void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); | 109 | void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); |
111 | 110 | ||
112 | static void *node_remap_end_vaddr[MAX_NUMNODES]; | ||
113 | static void *node_remap_alloc_vaddr[MAX_NUMNODES]; | ||
114 | static unsigned long kva_start_pfn; | 111 | static unsigned long kva_start_pfn; |
115 | static unsigned long kva_pages; | 112 | static unsigned long kva_pages; |
116 | /* | 113 | /* |
@@ -167,6 +164,22 @@ static void __init allocate_pgdat(int nid) | |||
167 | } | 164 | } |
168 | } | 165 | } |
169 | 166 | ||
167 | #ifdef CONFIG_DISCONTIGMEM | ||
168 | /* | ||
169 | * In the discontig memory model, a portion of the kernel virtual area (KVA) | ||
170 | * is reserved and portions of nodes are mapped using it. This is to allow | ||
171 | * node-local memory to be allocated for structures that would normally require | ||
172 | * ZONE_NORMAL. The memory is allocated with alloc_remap() and callers | ||
173 | * should be prepared to allocate from the bootmem allocator instead. This KVA | ||
174 | * mechanism is incompatible with SPARSEMEM as it makes assumptions about the | ||
175 | * layout of memory that are broken if alloc_remap() succeeds for some of the | ||
176 | * map and fails for others | ||
177 | */ | ||
178 | static unsigned long node_remap_start_pfn[MAX_NUMNODES]; | ||
179 | static void *node_remap_end_vaddr[MAX_NUMNODES]; | ||
180 | static void *node_remap_alloc_vaddr[MAX_NUMNODES]; | ||
181 | static unsigned long node_remap_offset[MAX_NUMNODES]; | ||
182 | |||
170 | void *alloc_remap(int nid, unsigned long size) | 183 | void *alloc_remap(int nid, unsigned long size) |
171 | { | 184 | { |
172 | void *allocation = node_remap_alloc_vaddr[nid]; | 185 | void *allocation = node_remap_alloc_vaddr[nid]; |
@@ -263,6 +276,40 @@ static unsigned long calculate_numa_remap_pages(void) | |||
263 | return reserve_pages; | 276 | return reserve_pages; |
264 | } | 277 | } |
265 | 278 | ||
279 | static void init_remap_allocator(int nid) | ||
280 | { | ||
281 | node_remap_start_vaddr[nid] = pfn_to_kaddr( | ||
282 | kva_start_pfn + node_remap_offset[nid]); | ||
283 | node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + | ||
284 | (node_remap_size[nid] * PAGE_SIZE); | ||
285 | node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] + | ||
286 | ALIGN(sizeof(pg_data_t), PAGE_SIZE); | ||
287 | |||
288 | printk ("node %d will remap to vaddr %08lx - %08lx\n", nid, | ||
289 | (ulong) node_remap_start_vaddr[nid], | ||
290 | (ulong) pfn_to_kaddr(highstart_pfn | ||
291 | + node_remap_offset[nid] + node_remap_size[nid])); | ||
292 | } | ||
293 | #else | ||
294 | void *alloc_remap(int nid, unsigned long size) | ||
295 | { | ||
296 | return NULL; | ||
297 | } | ||
298 | |||
299 | static unsigned long calculate_numa_remap_pages(void) | ||
300 | { | ||
301 | return 0; | ||
302 | } | ||
303 | |||
304 | static void init_remap_allocator(int nid) | ||
305 | { | ||
306 | } | ||
307 | |||
308 | void __init remap_numa_kva(void) | ||
309 | { | ||
310 | } | ||
311 | #endif /* CONFIG_DISCONTIGMEM */ | ||
312 | |||
266 | extern void setup_bootmem_allocator(void); | 313 | extern void setup_bootmem_allocator(void); |
267 | unsigned long __init setup_memory(void) | 314 | unsigned long __init setup_memory(void) |
268 | { | 315 | { |
@@ -326,19 +373,9 @@ unsigned long __init setup_memory(void) | |||
326 | printk("Low memory ends at vaddr %08lx\n", | 373 | printk("Low memory ends at vaddr %08lx\n", |
327 | (ulong) pfn_to_kaddr(max_low_pfn)); | 374 | (ulong) pfn_to_kaddr(max_low_pfn)); |
328 | for_each_online_node(nid) { | 375 | for_each_online_node(nid) { |
329 | node_remap_start_vaddr[nid] = pfn_to_kaddr( | 376 | init_remap_allocator(nid); |
330 | kva_start_pfn + node_remap_offset[nid]); | ||
331 | /* Init the node remap allocator */ | ||
332 | node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + | ||
333 | (node_remap_size[nid] * PAGE_SIZE); | ||
334 | node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] + | ||
335 | ALIGN(sizeof(pg_data_t), PAGE_SIZE); | ||
336 | 377 | ||
337 | allocate_pgdat(nid); | 378 | allocate_pgdat(nid); |
338 | printk ("node %d will remap to vaddr %08lx - %08lx\n", nid, | ||
339 | (ulong) node_remap_start_vaddr[nid], | ||
340 | (ulong) pfn_to_kaddr(highstart_pfn | ||
341 | + node_remap_offset[nid] + node_remap_size[nid])); | ||
342 | } | 379 | } |
343 | printk("High memory starts at vaddr %08lx\n", | 380 | printk("High memory starts at vaddr %08lx\n", |
344 | (ulong) pfn_to_kaddr(highstart_pfn)); | 381 | (ulong) pfn_to_kaddr(highstart_pfn)); |
@@ -439,3 +476,29 @@ int memory_add_physaddr_to_nid(u64 addr) | |||
439 | 476 | ||
440 | EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); | 477 | EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); |
441 | #endif | 478 | #endif |
479 | |||
480 | #ifndef CONFIG_HAVE_ARCH_PARSE_SRAT | ||
481 | /* | ||
482 | * XXX FIXME: Make SLIT table parsing available to 32-bit NUMA | ||
483 | * | ||
484 | * These stub functions are needed to compile 32-bit NUMA when SRAT is | ||
485 | * not set. There are functions in srat_64.c for parsing this table | ||
486 | * and it may be possible to make them common functions. | ||
487 | */ | ||
488 | void acpi_numa_slit_init (struct acpi_table_slit *slit) | ||
489 | { | ||
490 | printk(KERN_INFO "ACPI: No support for parsing SLIT table\n"); | ||
491 | } | ||
492 | |||
493 | void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa) | ||
494 | { | ||
495 | } | ||
496 | |||
497 | void acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma) | ||
498 | { | ||
499 | } | ||
500 | |||
501 | void acpi_numa_arch_fixup(void) | ||
502 | { | ||
503 | } | ||
504 | #endif /* CONFIG_HAVE_ARCH_PARSE_SRAT */ | ||