diff options
author | Yinghai Lu <yinghai@kernel.org> | 2009-05-15 16:59:37 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-05-18 03:21:04 -0400 |
commit | 7c43769a9776141ec23ca81a1bdd5a9c0512f165 (patch) | |
tree | e24d99a84d76386b0bf910152d1841853857e63f /arch | |
parent | 888a589f6be07d624e21e2174d98375e9f95911b (diff) |
x86, mm: Fix node_possible_map logic
Recently there were some changes to the meaning of node_possible_map,
and it is quite strange:
- the node without memory would be set in node_possible_map
- but some node with less NODE_MIN_SIZE will be kicked out of node_possible_map.
fix it by adding strict_setup_node_bootmem().
Also, remove unparse_node().
so result will be:
1. cpu_to_node() will return online node only (nearest one)
2. apicid_to_node() still returns the node that could be not online but is set
in node_possible_map.
3. node_possible_map will include nodes that mem on it are less NODE_MIN_SIZE
v2: after move_cpus_to_node change.
[ Impact: get node_possible_map right ]
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Tested-by: Jack Steiner <steiner@sgi.com>
LKML-Reference: <4A0C49BE.6080800@kernel.org>
[ v3: various small cleanups and comment clarifications ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/include/asm/numa_64.h | 7 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 13 | ||||
-rw-r--r-- | arch/x86/mm/srat_64.c | 29 |
3 files changed, 19 insertions, 30 deletions
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h index 7feff0648d74..c4ae822e415f 100644 --- a/arch/x86/include/asm/numa_64.h +++ b/arch/x86/include/asm/numa_64.h | |||
@@ -24,6 +24,13 @@ extern void setup_node_bootmem(int nodeid, unsigned long start, | |||
24 | unsigned long end); | 24 | unsigned long end); |
25 | 25 | ||
26 | #ifdef CONFIG_NUMA | 26 | #ifdef CONFIG_NUMA |
27 | /* | ||
28 | * Too small node sizes may confuse the VM badly. Usually they | ||
29 | * result from BIOS bugs. So dont recognize nodes as standalone | ||
30 | * NUMA entities that have less than this amount of RAM listed: | ||
31 | */ | ||
32 | #define NODE_MIN_SIZE (4*1024*1024) | ||
33 | |||
27 | extern void __init init_cpu_to_node(void); | 34 | extern void __init init_cpu_to_node(void); |
28 | extern void __cpuinit numa_set_node(int cpu, int node); | 35 | extern void __cpuinit numa_set_node(int cpu, int node); |
29 | extern void __cpuinit numa_clear_node(int cpu); | 36 | extern void __cpuinit numa_clear_node(int cpu); |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index a6a93c395231..459913beac71 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -179,18 +179,25 @@ static void * __init early_node_mem(int nodeid, unsigned long start, | |||
179 | } | 179 | } |
180 | 180 | ||
181 | /* Initialize bootmem allocator for a node */ | 181 | /* Initialize bootmem allocator for a node */ |
182 | void __init setup_node_bootmem(int nodeid, unsigned long start, | 182 | void __init |
183 | unsigned long end) | 183 | setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) |
184 | { | 184 | { |
185 | unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; | 185 | unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; |
186 | const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); | ||
186 | unsigned long bootmap_start, nodedata_phys; | 187 | unsigned long bootmap_start, nodedata_phys; |
187 | void *bootmap; | 188 | void *bootmap; |
188 | const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); | ||
189 | int nid; | 189 | int nid; |
190 | 190 | ||
191 | if (!end) | 191 | if (!end) |
192 | return; | 192 | return; |
193 | 193 | ||
194 | /* | ||
195 | * Don't confuse VM with a node that doesn't have the | ||
196 | * minimum amount of memory: | ||
197 | */ | ||
198 | if (end && (end - start) < NODE_MIN_SIZE) | ||
199 | return; | ||
200 | |||
194 | start = roundup(start, ZONE_ALIGN); | 201 | start = roundup(start, ZONE_ALIGN); |
195 | 202 | ||
196 | printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, | 203 | printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index b0dbbd48e58a..2dfcbf9df2ae 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -36,10 +36,6 @@ static int num_node_memblks __initdata; | |||
36 | static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata; | 36 | static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata; |
37 | static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata; | 37 | static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata; |
38 | 38 | ||
39 | /* Too small nodes confuse the VM badly. Usually they result | ||
40 | from BIOS bugs. */ | ||
41 | #define NODE_MIN_SIZE (4*1024*1024) | ||
42 | |||
43 | static __init int setup_node(int pxm) | 39 | static __init int setup_node(int pxm) |
44 | { | 40 | { |
45 | return acpi_map_pxm_to_node(pxm); | 41 | return acpi_map_pxm_to_node(pxm); |
@@ -338,17 +334,6 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) | |||
338 | return 1; | 334 | return 1; |
339 | } | 335 | } |
340 | 336 | ||
341 | static void __init unparse_node(int node) | ||
342 | { | ||
343 | int i; | ||
344 | node_clear(node, nodes_parsed); | ||
345 | node_clear(node, cpu_nodes_parsed); | ||
346 | for (i = 0; i < MAX_LOCAL_APIC; i++) { | ||
347 | if (apicid_to_node[i] == node) | ||
348 | apicid_to_node[i] = NUMA_NO_NODE; | ||
349 | } | ||
350 | } | ||
351 | |||
352 | void __init acpi_numa_arch_fixup(void) {} | 337 | void __init acpi_numa_arch_fixup(void) {} |
353 | 338 | ||
354 | /* Use the information discovered above to actually set up the nodes. */ | 339 | /* Use the information discovered above to actually set up the nodes. */ |
@@ -360,18 +345,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
360 | return -1; | 345 | return -1; |
361 | 346 | ||
362 | /* First clean up the node list */ | 347 | /* First clean up the node list */ |
363 | for (i = 0; i < MAX_NUMNODES; i++) { | 348 | for (i = 0; i < MAX_NUMNODES; i++) |
364 | cutoff_node(i, start, end); | 349 | cutoff_node(i, start, end); |
365 | /* | ||
366 | * don't confuse VM with a node that doesn't have the | ||
367 | * minimum memory. | ||
368 | */ | ||
369 | if (nodes[i].end && | ||
370 | (nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) { | ||
371 | unparse_node(i); | ||
372 | node_set_offline(i); | ||
373 | } | ||
374 | } | ||
375 | 350 | ||
376 | if (!nodes_cover_memory(nodes)) { | 351 | if (!nodes_cover_memory(nodes)) { |
377 | bad_srat(); | 352 | bad_srat(); |
@@ -404,7 +379,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
404 | 379 | ||
405 | if (node == NUMA_NO_NODE) | 380 | if (node == NUMA_NO_NODE) |
406 | continue; | 381 | continue; |
407 | if (!node_isset(node, node_possible_map)) | 382 | if (!node_online(node)) |
408 | numa_clear_node(i); | 383 | numa_clear_node(i); |
409 | } | 384 | } |
410 | numa_init_array(); | 385 | numa_init_array(); |