aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorYinghai Lu <yinghai@kernel.org>2009-05-15 16:59:37 -0400
committerIngo Molnar <mingo@elte.hu>2009-05-18 03:21:04 -0400
commit7c43769a9776141ec23ca81a1bdd5a9c0512f165 (patch)
treee24d99a84d76386b0bf910152d1841853857e63f /arch
parent888a589f6be07d624e21e2174d98375e9f95911b (diff)
x86, mm: Fix node_possible_map logic
Recently there were some changes to the meaning of node_possible_map, and it is quite strange: - the node without memory would be set in node_possible_map - but some node with less NODE_MIN_SIZE will be kicked out of node_possible_map. fix it by adding strict_setup_node_bootmem(). Also, remove unparse_node(). so result will be: 1. cpu_to_node() will return online node only (nearest one) 2. apicid_to_node() still returns the node that could be not online but is set in node_possible_map. 3. node_possible_map will include nodes that mem on it are less NODE_MIN_SIZE v2: after move_cpus_to_node change. [ Impact: get node_possible_map right ] Signed-off-by: Yinghai Lu <yinghai@kernel.org> Tested-by: Jack Steiner <steiner@sgi.com> LKML-Reference: <4A0C49BE.6080800@kernel.org> [ v3: various small cleanups and comment clarifications ] Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/include/asm/numa_64.h7
-rw-r--r--arch/x86/mm/numa_64.c13
-rw-r--r--arch/x86/mm/srat_64.c29
3 files changed, 19 insertions, 30 deletions
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 7feff0648d74..c4ae822e415f 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -24,6 +24,13 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
24 unsigned long end); 24 unsigned long end);
25 25
26#ifdef CONFIG_NUMA 26#ifdef CONFIG_NUMA
27/*
28 * Too small node sizes may confuse the VM badly. Usually they
29 * result from BIOS bugs. So dont recognize nodes as standalone
30 * NUMA entities that have less than this amount of RAM listed:
31 */
32#define NODE_MIN_SIZE (4*1024*1024)
33
27extern void __init init_cpu_to_node(void); 34extern void __init init_cpu_to_node(void);
28extern void __cpuinit numa_set_node(int cpu, int node); 35extern void __cpuinit numa_set_node(int cpu, int node);
29extern void __cpuinit numa_clear_node(int cpu); 36extern void __cpuinit numa_clear_node(int cpu);
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index a6a93c395231..459913beac71 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -179,18 +179,25 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
179} 179}
180 180
181/* Initialize bootmem allocator for a node */ 181/* Initialize bootmem allocator for a node */
182void __init setup_node_bootmem(int nodeid, unsigned long start, 182void __init
183 unsigned long end) 183setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
184{ 184{
185 unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; 185 unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size;
186 const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
186 unsigned long bootmap_start, nodedata_phys; 187 unsigned long bootmap_start, nodedata_phys;
187 void *bootmap; 188 void *bootmap;
188 const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
189 int nid; 189 int nid;
190 190
191 if (!end) 191 if (!end)
192 return; 192 return;
193 193
194 /*
195 * Don't confuse VM with a node that doesn't have the
196 * minimum amount of memory:
197 */
198 if (end && (end - start) < NODE_MIN_SIZE)
199 return;
200
194 start = roundup(start, ZONE_ALIGN); 201 start = roundup(start, ZONE_ALIGN);
195 202
196 printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, 203 printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid,
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index b0dbbd48e58a..2dfcbf9df2ae 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -36,10 +36,6 @@ static int num_node_memblks __initdata;
36static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata; 36static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
37static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata; 37static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
38 38
39/* Too small nodes confuse the VM badly. Usually they result
40 from BIOS bugs. */
41#define NODE_MIN_SIZE (4*1024*1024)
42
43static __init int setup_node(int pxm) 39static __init int setup_node(int pxm)
44{ 40{
45 return acpi_map_pxm_to_node(pxm); 41 return acpi_map_pxm_to_node(pxm);
@@ -338,17 +334,6 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
338 return 1; 334 return 1;
339} 335}
340 336
341static void __init unparse_node(int node)
342{
343 int i;
344 node_clear(node, nodes_parsed);
345 node_clear(node, cpu_nodes_parsed);
346 for (i = 0; i < MAX_LOCAL_APIC; i++) {
347 if (apicid_to_node[i] == node)
348 apicid_to_node[i] = NUMA_NO_NODE;
349 }
350}
351
352void __init acpi_numa_arch_fixup(void) {} 337void __init acpi_numa_arch_fixup(void) {}
353 338
354/* Use the information discovered above to actually set up the nodes. */ 339/* Use the information discovered above to actually set up the nodes. */
@@ -360,18 +345,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
360 return -1; 345 return -1;
361 346
362 /* First clean up the node list */ 347 /* First clean up the node list */
363 for (i = 0; i < MAX_NUMNODES; i++) { 348 for (i = 0; i < MAX_NUMNODES; i++)
364 cutoff_node(i, start, end); 349 cutoff_node(i, start, end);
365 /*
366 * don't confuse VM with a node that doesn't have the
367 * minimum memory.
368 */
369 if (nodes[i].end &&
370 (nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) {
371 unparse_node(i);
372 node_set_offline(i);
373 }
374 }
375 350
376 if (!nodes_cover_memory(nodes)) { 351 if (!nodes_cover_memory(nodes)) {
377 bad_srat(); 352 bad_srat();
@@ -404,7 +379,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
404 379
405 if (node == NUMA_NO_NODE) 380 if (node == NUMA_NO_NODE)
406 continue; 381 continue;
407 if (!node_isset(node, node_possible_map)) 382 if (!node_online(node))
408 numa_clear_node(i); 383 numa_clear_node(i);
409 } 384 }
410 numa_init_array(); 385 numa_init_array();