diff options
author | Tejun Heo <tj@kernel.org> | 2011-05-02 11:24:48 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2011-05-02 11:24:48 -0400 |
commit | bd6709a91a593d8fe35d08da542e9f93bb74a304 (patch) | |
tree | a7e24844eaee2113bfb61e6fa6d990c01ed5ec64 /arch/x86/mm/numa_32.c | |
parent | 7888e96b264fad27f97f58c0f3a4d20326eaf181 (diff) |
x86, NUMA: Make 32bit use common NUMA init path
With both _numa_init() methods converted and the rest of init code
adjusted, numa_32.c now can switch from the 32bit only init code to
the common one in numa.c.
* Shim get_memcfg_*()'s are dropped and initmem_init() calls
x86_numa_init(), which is updated to handle NUMAQ.
* All boilerplate operations including node range limiting, pgdat
alloc/init are handled by numa_init(). 32bit only implementation is
removed.
* 32bit numa_add_memblk(), numa_set_distance() and
memory_add_physaddr_to_nid() removed and common versions in
numa_32.c enabled for 32bit.
This change causes the following behavior changes.
* NODE_DATA()->node_start_pfn/node_spanned_pages properly initialized
for 32bit too.
* Much more sanity checks and configuration cleanups.
* Proper handling of node distances.
* The same NUMA init messages as 64bit.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Diffstat (limited to 'arch/x86/mm/numa_32.c')
-rw-r--r-- | arch/x86/mm/numa_32.c | 231 |
1 files changed, 2 insertions, 229 deletions
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index fbd558fe10bc..849a975d3fa0 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c | |||
@@ -22,36 +22,11 @@ | |||
22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/mm.h> | ||
26 | #include <linux/bootmem.h> | 25 | #include <linux/bootmem.h> |
27 | #include <linux/memblock.h> | 26 | #include <linux/memblock.h> |
28 | #include <linux/mmzone.h> | ||
29 | #include <linux/highmem.h> | ||
30 | #include <linux/initrd.h> | ||
31 | #include <linux/nodemask.h> | ||
32 | #include <linux/module.h> | 27 | #include <linux/module.h> |
33 | #include <linux/kexec.h> | ||
34 | #include <linux/pfn.h> | ||
35 | #include <linux/swap.h> | ||
36 | #include <linux/acpi.h> | ||
37 | |||
38 | #include <asm/e820.h> | ||
39 | #include <asm/setup.h> | ||
40 | #include <asm/mmzone.h> | ||
41 | #include <asm/bios_ebda.h> | ||
42 | #include <asm/proto.h> | ||
43 | |||
44 | /* | ||
45 | * numa interface - we expect the numa architecture specific code to have | ||
46 | * populated the following initialisation. | ||
47 | * | ||
48 | * 1) node_online_map - the map of all nodes configured (online) in the system | ||
49 | * 2) node_start_pfn - the starting page frame number for a node | ||
50 | * 3) node_end_pfn - the ending page fram number for a node | ||
51 | */ | ||
52 | unsigned long node_start_pfn[MAX_NUMNODES] __read_mostly; | ||
53 | unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly; | ||
54 | 28 | ||
29 | #include "numa_internal.h" | ||
55 | 30 | ||
56 | #ifdef CONFIG_DISCONTIGMEM | 31 | #ifdef CONFIG_DISCONTIGMEM |
57 | /* | 32 | /* |
@@ -96,7 +71,6 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, | |||
96 | } | 71 | } |
97 | #endif | 72 | #endif |
98 | 73 | ||
99 | extern unsigned long find_max_low_pfn(void); | ||
100 | extern unsigned long highend_pfn, highstart_pfn; | 74 | extern unsigned long highend_pfn, highstart_pfn; |
101 | 75 | ||
102 | #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) | 76 | #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) |
@@ -105,68 +79,6 @@ static void *node_remap_start_vaddr[MAX_NUMNODES]; | |||
105 | void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); | 79 | void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); |
106 | 80 | ||
107 | /* | 81 | /* |
108 | * FLAT - support for basic PC memory model with discontig enabled, essentially | ||
109 | * a single node with all available processors in it with a flat | ||
110 | * memory map. | ||
111 | */ | ||
112 | static int __init get_memcfg_numa_flat(void) | ||
113 | { | ||
114 | printk(KERN_DEBUG "NUMA - single node, flat memory mode\n"); | ||
115 | |||
116 | node_start_pfn[0] = 0; | ||
117 | node_end_pfn[0] = max_pfn; | ||
118 | memblock_x86_register_active_regions(0, 0, max_pfn); | ||
119 | |||
120 | /* Indicate there is one node available. */ | ||
121 | nodes_clear(node_online_map); | ||
122 | node_set_online(0); | ||
123 | return 1; | ||
124 | } | ||
125 | |||
126 | /* | ||
127 | * Find the highest page frame number we have available for the node | ||
128 | */ | ||
129 | static void __init propagate_e820_map_node(int nid) | ||
130 | { | ||
131 | if (node_end_pfn[nid] > max_pfn) | ||
132 | node_end_pfn[nid] = max_pfn; | ||
133 | /* | ||
134 | * if a user has given mem=XXXX, then we need to make sure | ||
135 | * that the node _starts_ before that, too, not just ends | ||
136 | */ | ||
137 | if (node_start_pfn[nid] > max_pfn) | ||
138 | node_start_pfn[nid] = max_pfn; | ||
139 | BUG_ON(node_start_pfn[nid] > node_end_pfn[nid]); | ||
140 | } | ||
141 | |||
142 | /* | ||
143 | * Allocate memory for the pg_data_t for this node via a crude pre-bootmem | ||
144 | * method. For node zero take this from the bottom of memory, for | ||
145 | * subsequent nodes place them at node_remap_start_vaddr which contains | ||
146 | * node local data in physically node local memory. See setup_memory() | ||
147 | * for details. | ||
148 | */ | ||
149 | static void __init allocate_pgdat(int nid) | ||
150 | { | ||
151 | char buf[16]; | ||
152 | |||
153 | NODE_DATA(nid) = alloc_remap(nid, ALIGN(sizeof(pg_data_t), PAGE_SIZE)); | ||
154 | if (!NODE_DATA(nid)) { | ||
155 | unsigned long pgdat_phys; | ||
156 | pgdat_phys = memblock_find_in_range(min_low_pfn<<PAGE_SHIFT, | ||
157 | max_pfn_mapped<<PAGE_SHIFT, | ||
158 | sizeof(pg_data_t), | ||
159 | PAGE_SIZE); | ||
160 | NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(pgdat_phys>>PAGE_SHIFT)); | ||
161 | memset(buf, 0, sizeof(buf)); | ||
162 | sprintf(buf, "NODE_DATA %d", nid); | ||
163 | memblock_x86_reserve_range(pgdat_phys, pgdat_phys + sizeof(pg_data_t), buf); | ||
164 | } | ||
165 | printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n", | ||
166 | nid, (unsigned long)NODE_DATA(nid)); | ||
167 | } | ||
168 | |||
169 | /* | ||
170 | * Remap memory allocator | 82 | * Remap memory allocator |
171 | */ | 83 | */ |
172 | static unsigned long node_remap_start_pfn[MAX_NUMNODES]; | 84 | static unsigned long node_remap_start_pfn[MAX_NUMNODES]; |
@@ -322,76 +234,9 @@ void __init init_alloc_remap(int nid, u64 start, u64 end) | |||
322 | nid, node_pa, node_pa + size, remap_va, remap_va + size); | 234 | nid, node_pa, node_pa + size, remap_va, remap_va + size); |
323 | } | 235 | } |
324 | 236 | ||
325 | static int get_memcfg_numaq(void) | ||
326 | { | ||
327 | #ifdef CONFIG_X86_NUMAQ | ||
328 | int nid; | ||
329 | |||
330 | if (numa_off) | ||
331 | return 0; | ||
332 | |||
333 | if (numaq_numa_init() < 0) { | ||
334 | nodes_clear(numa_nodes_parsed); | ||
335 | remove_all_active_ranges(); | ||
336 | return 0; | ||
337 | } | ||
338 | |||
339 | for_each_node_mask(nid, numa_nodes_parsed) | ||
340 | node_set_online(nid); | ||
341 | sort_node_map(); | ||
342 | return 1; | ||
343 | #else | ||
344 | return 0; | ||
345 | #endif | ||
346 | } | ||
347 | |||
348 | static int get_memcfg_from_srat(void) | ||
349 | { | ||
350 | #ifdef CONFIG_ACPI_NUMA | ||
351 | int nid; | ||
352 | |||
353 | if (numa_off) | ||
354 | return 0; | ||
355 | |||
356 | if (x86_acpi_numa_init() < 0) { | ||
357 | nodes_clear(numa_nodes_parsed); | ||
358 | remove_all_active_ranges(); | ||
359 | return 0; | ||
360 | } | ||
361 | |||
362 | for_each_node_mask(nid, numa_nodes_parsed) | ||
363 | node_set_online(nid); | ||
364 | sort_node_map(); | ||
365 | return 1; | ||
366 | #else | ||
367 | return 0; | ||
368 | #endif | ||
369 | } | ||
370 | |||
371 | static void get_memcfg_numa(void) | ||
372 | { | ||
373 | if (get_memcfg_numaq()) | ||
374 | return; | ||
375 | if (get_memcfg_from_srat()) | ||
376 | return; | ||
377 | get_memcfg_numa_flat(); | ||
378 | } | ||
379 | |||
380 | void __init initmem_init(void) | 237 | void __init initmem_init(void) |
381 | { | 238 | { |
382 | int nid; | 239 | x86_numa_init(); |
383 | |||
384 | get_memcfg_numa(); | ||
385 | numa_init_array(); | ||
386 | |||
387 | for_each_online_node(nid) { | ||
388 | u64 start = (u64)node_start_pfn[nid] << PAGE_SHIFT; | ||
389 | u64 end = min((u64)node_end_pfn[nid] << PAGE_SHIFT, | ||
390 | (u64)max_pfn << PAGE_SHIFT); | ||
391 | |||
392 | if (start < end) | ||
393 | init_alloc_remap(nid, start, end); | ||
394 | } | ||
395 | 240 | ||
396 | #ifdef CONFIG_HIGHMEM | 241 | #ifdef CONFIG_HIGHMEM |
397 | highstart_pfn = highend_pfn = max_pfn; | 242 | highstart_pfn = highend_pfn = max_pfn; |
@@ -412,81 +257,9 @@ void __init initmem_init(void) | |||
412 | 257 | ||
413 | printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n", | 258 | printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n", |
414 | (ulong) pfn_to_kaddr(max_low_pfn)); | 259 | (ulong) pfn_to_kaddr(max_low_pfn)); |
415 | for_each_online_node(nid) | ||
416 | allocate_pgdat(nid); | ||
417 | 260 | ||
418 | printk(KERN_DEBUG "High memory starts at vaddr %08lx\n", | 261 | printk(KERN_DEBUG "High memory starts at vaddr %08lx\n", |
419 | (ulong) pfn_to_kaddr(highstart_pfn)); | 262 | (ulong) pfn_to_kaddr(highstart_pfn)); |
420 | for_each_online_node(nid) | ||
421 | propagate_e820_map_node(nid); | ||
422 | |||
423 | for_each_online_node(nid) { | ||
424 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); | ||
425 | NODE_DATA(nid)->node_id = nid; | ||
426 | } | ||
427 | 263 | ||
428 | setup_bootmem_allocator(); | 264 | setup_bootmem_allocator(); |
429 | } | 265 | } |
430 | |||
431 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
432 | static int paddr_to_nid(u64 addr) | ||
433 | { | ||
434 | int nid; | ||
435 | unsigned long pfn = PFN_DOWN(addr); | ||
436 | |||
437 | for_each_node(nid) | ||
438 | if (node_start_pfn[nid] <= pfn && | ||
439 | pfn < node_end_pfn[nid]) | ||
440 | return nid; | ||
441 | |||
442 | return -1; | ||
443 | } | ||
444 | |||
445 | /* | ||
446 | * This function is used to ask node id BEFORE memmap and mem_section's | ||
447 | * initialization (pfn_to_nid() can't be used yet). | ||
448 | * If _PXM is not defined on ACPI's DSDT, node id must be found by this. | ||
449 | */ | ||
450 | int memory_add_physaddr_to_nid(u64 addr) | ||
451 | { | ||
452 | int nid = paddr_to_nid(addr); | ||
453 | return (nid >= 0) ? nid : 0; | ||
454 | } | ||
455 | |||
456 | EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); | ||
457 | #endif | ||
458 | |||
459 | /* temporary shim, will go away soon */ | ||
460 | int __init numa_add_memblk(int nid, u64 start, u64 end) | ||
461 | { | ||
462 | unsigned long start_pfn = start >> PAGE_SHIFT; | ||
463 | unsigned long end_pfn = end >> PAGE_SHIFT; | ||
464 | |||
465 | printk(KERN_DEBUG "nid %d start_pfn %08lx end_pfn %08lx\n", | ||
466 | nid, start_pfn, end_pfn); | ||
467 | |||
468 | if (start >= (u64)max_pfn << PAGE_SHIFT) { | ||
469 | printk(KERN_INFO "Ignoring SRAT pfns: %08lx - %08lx\n", | ||
470 | start_pfn, end_pfn); | ||
471 | return 0; | ||
472 | } | ||
473 | |||
474 | node_set_online(nid); | ||
475 | memblock_x86_register_active_regions(nid, start_pfn, | ||
476 | min(end_pfn, max_pfn)); | ||
477 | |||
478 | if (!node_has_online_mem(nid)) { | ||
479 | node_start_pfn[nid] = start_pfn; | ||
480 | node_end_pfn[nid] = end_pfn; | ||
481 | } else { | ||
482 | node_start_pfn[nid] = min(node_start_pfn[nid], start_pfn); | ||
483 | node_end_pfn[nid] = max(node_end_pfn[nid], end_pfn); | ||
484 | } | ||
485 | return 0; | ||
486 | } | ||
487 | |||
488 | /* temporary shim, will go away soon */ | ||
489 | void __init numa_set_distance(int from, int to, int distance) | ||
490 | { | ||
491 | /* nada */ | ||
492 | } | ||