aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2011-02-16 11:11:09 -0500
committerTejun Heo <tj@kernel.org>2011-02-16 11:11:09 -0500
commit91556237ec872e1029e3036174bae3b1a8df65eb (patch)
tree967d5fe1fedfd372f4af7e7cf79e54d074b1ee0f /arch/x86
parenta844ef46fa3055165c28feede6114a711b8375ad (diff)
x86-64, NUMA: Kill numa_nodes[]
numa_nodes[] doesn't carry any information which isn't present in numa_meminfo. Each entry is simply min/max range of all the memblks for the node. This is not only redundant but also inaccurate when memblks for different nodes interleave - for example, find_node_by_addr() can return the wrong nodeid. Kill numa_nodes[] and always use numa_meminfo instead. * nodes_cover_memory() is renamed to numa_meminfo_cover_memory() and now operations on numa_meminfo and returns bool. * setup_node_bootmem() needs min/max range. Compute the range on the fly. setup_node_bootmem() invocation is restructured to use outer loop instead of hardcoding the double invocations. * find_node_by_addr() now operates on numa_meminfo. * setup_physnodes() builds physnodes[] from memblks. This will go away when emulation code is updated to use struct numa_meminfo. This patch also makes the following misc changes. * Clearing of nodes_add[] clearing is converted to memset(). * numa_add_memblk() in amd_numa_init() is moved down a bit for consistency. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Yinghai Lu <yinghai@kernel.org> Cc: Brian Gerst <brgerst@gmail.com> Cc: Cyrill Gorcunov <gorcunov@gmail.com> Cc: Shaohui Zheng <shaohui.zheng@intel.com> Cc: David Rientjes <rientjes@google.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/numa_64.h1
-rw-r--r--arch/x86/mm/amdtopology_64.c6
-rw-r--r--arch/x86/mm/numa_64.c82
-rw-r--r--arch/x86/mm/srat_64.c22
4 files changed, 53 insertions, 58 deletions
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 925ade9d67e4..20b69a98f37d 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -26,7 +26,6 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
26 26
27extern nodemask_t cpu_nodes_parsed __initdata; 27extern nodemask_t cpu_nodes_parsed __initdata;
28extern nodemask_t mem_nodes_parsed __initdata; 28extern nodemask_t mem_nodes_parsed __initdata;
29extern struct bootnode numa_nodes[MAX_NUMNODES] __initdata;
30 29
31extern int __cpuinit numa_cpu_node(int cpu); 30extern int __cpuinit numa_cpu_node(int cpu);
32extern int __init numa_add_memblk(int nodeid, u64 start, u64 end); 31extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c
index 8f7a5eb4bd3c..0cb59e582007 100644
--- a/arch/x86/mm/amdtopology_64.c
+++ b/arch/x86/mm/amdtopology_64.c
@@ -165,12 +165,8 @@ int __init amd_numa_init(void)
165 pr_info("Node %d MemBase %016lx Limit %016lx\n", 165 pr_info("Node %d MemBase %016lx Limit %016lx\n",
166 nodeid, base, limit); 166 nodeid, base, limit);
167 167
168 numa_nodes[nodeid].start = base;
169 numa_nodes[nodeid].end = limit;
170 numa_add_memblk(nodeid, base, limit);
171
172 prevbase = base; 168 prevbase = base;
173 169 numa_add_memblk(nodeid, base, limit);
174 node_set(nodeid, mem_nodes_parsed); 170 node_set(nodeid, mem_nodes_parsed);
175 node_set(nodeid, cpu_nodes_parsed); 171 node_set(nodeid, cpu_nodes_parsed);
176 } 172 }
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 681bc0d59db5..c490448d716a 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -46,8 +46,6 @@ static unsigned long __initdata nodemap_size;
46 46
47static struct numa_meminfo numa_meminfo __initdata; 47static struct numa_meminfo numa_meminfo __initdata;
48 48
49struct bootnode numa_nodes[MAX_NUMNODES] __initdata;
50
51/* 49/*
52 * Given a shift value, try to populate memnodemap[] 50 * Given a shift value, try to populate memnodemap[]
53 * Returns : 51 * Returns :
@@ -349,17 +347,17 @@ static int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
349 * Sanity check to catch more bad NUMA configurations (they are amazingly 347 * Sanity check to catch more bad NUMA configurations (they are amazingly
350 * common). Make sure the nodes cover all memory. 348 * common). Make sure the nodes cover all memory.
351 */ 349 */
352static int __init nodes_cover_memory(const struct bootnode *nodes) 350static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
353{ 351{
354 unsigned long numaram, e820ram; 352 unsigned long numaram, e820ram;
355 int i; 353 int i;
356 354
357 numaram = 0; 355 numaram = 0;
358 for_each_node_mask(i, mem_nodes_parsed) { 356 for (i = 0; i < mi->nr_blks; i++) {
359 unsigned long s = nodes[i].start >> PAGE_SHIFT; 357 unsigned long s = mi->blk[i].start >> PAGE_SHIFT;
360 unsigned long e = nodes[i].end >> PAGE_SHIFT; 358 unsigned long e = mi->blk[i].end >> PAGE_SHIFT;
361 numaram += e - s; 359 numaram += e - s;
362 numaram -= __absent_pages_in_range(i, s, e); 360 numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
363 if ((long)numaram < 0) 361 if ((long)numaram < 0)
364 numaram = 0; 362 numaram = 0;
365 } 363 }
@@ -371,14 +369,14 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
371 printk(KERN_ERR "NUMA: nodes only cover %luMB of your %luMB e820 RAM. Not used.\n", 369 printk(KERN_ERR "NUMA: nodes only cover %luMB of your %luMB e820 RAM. Not used.\n",
372 (numaram << PAGE_SHIFT) >> 20, 370 (numaram << PAGE_SHIFT) >> 20,
373 (e820ram << PAGE_SHIFT) >> 20); 371 (e820ram << PAGE_SHIFT) >> 20);
374 return 0; 372 return false;
375 } 373 }
376 return 1; 374 return true;
377} 375}
378 376
379static int __init numa_register_memblks(struct numa_meminfo *mi) 377static int __init numa_register_memblks(struct numa_meminfo *mi)
380{ 378{
381 int i; 379 int i, j, nid;
382 380
383 /* Account for nodes with cpus and no memory */ 381 /* Account for nodes with cpus and no memory */
384 nodes_or(node_possible_map, mem_nodes_parsed, cpu_nodes_parsed); 382 nodes_or(node_possible_map, mem_nodes_parsed, cpu_nodes_parsed);
@@ -398,23 +396,34 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
398 396
399 /* for out of order entries */ 397 /* for out of order entries */
400 sort_node_map(); 398 sort_node_map();
401 if (!nodes_cover_memory(numa_nodes)) 399 if (!numa_meminfo_cover_memory(mi))
402 return -EINVAL; 400 return -EINVAL;
403 401
404 init_memory_mapping_high(); 402 init_memory_mapping_high();
405 403
406 /* Finally register nodes. */
407 for_each_node_mask(i, node_possible_map)
408 setup_node_bootmem(i, numa_nodes[i].start, numa_nodes[i].end);
409
410 /* 404 /*
411 * Try again in case setup_node_bootmem missed one due to missing 405 * Finally register nodes. Do it twice in case setup_node_bootmem
412 * bootmem. 406 * missed one due to missing bootmem.
413 */ 407 */
414 for_each_node_mask(i, node_possible_map) 408 for (i = 0; i < 2; i++) {
415 if (!node_online(i)) 409 for_each_node_mask(nid, node_possible_map) {
416 setup_node_bootmem(i, numa_nodes[i].start, 410 u64 start = (u64)max_pfn << PAGE_SHIFT;
417 numa_nodes[i].end); 411 u64 end = 0;
412
413 if (node_online(nid))
414 continue;
415
416 for (j = 0; j < mi->nr_blks; j++) {
417 if (nid != mi->blk[j].nid)
418 continue;
419 start = min(mi->blk[j].start, start);
420 end = max(mi->blk[j].end, end);
421 }
422
423 if (start < end)
424 setup_node_bootmem(nid, start, end);
425 }
426 }
418 427
419 return 0; 428 return 0;
420} 429}
@@ -432,33 +441,41 @@ void __init numa_emu_cmdline(char *str)
432 441
433int __init find_node_by_addr(unsigned long addr) 442int __init find_node_by_addr(unsigned long addr)
434{ 443{
435 int ret = NUMA_NO_NODE; 444 const struct numa_meminfo *mi = &numa_meminfo;
436 int i; 445 int i;
437 446
438 for_each_node_mask(i, mem_nodes_parsed) { 447 for (i = 0; i < mi->nr_blks; i++) {
439 /* 448 /*
440 * Find the real node that this emulated node appears on. For 449 * Find the real node that this emulated node appears on. For
441 * the sake of simplicity, we only use a real node's starting 450 * the sake of simplicity, we only use a real node's starting
442 * address to determine which emulated node it appears on. 451 * address to determine which emulated node it appears on.
443 */ 452 */
444 if (addr >= numa_nodes[i].start && addr < numa_nodes[i].end) { 453 if (addr >= mi->blk[i].start && addr < mi->blk[i].end)
445 ret = i; 454 return mi->blk[i].nid;
446 break;
447 }
448 } 455 }
449 return ret; 456 return NUMA_NO_NODE;
450} 457}
451 458
452static int __init setup_physnodes(unsigned long start, unsigned long end) 459static int __init setup_physnodes(unsigned long start, unsigned long end)
453{ 460{
461 const struct numa_meminfo *mi = &numa_meminfo;
454 int ret = 0; 462 int ret = 0;
455 int i; 463 int i;
456 464
457 memset(physnodes, 0, sizeof(physnodes)); 465 memset(physnodes, 0, sizeof(physnodes));
458 466
459 for_each_node_mask(i, mem_nodes_parsed) { 467 for (i = 0; i < mi->nr_blks; i++) {
460 physnodes[i].start = numa_nodes[i].start; 468 int nid = mi->blk[i].nid;
461 physnodes[i].end = numa_nodes[i].end; 469
470 if (physnodes[nid].start == physnodes[nid].end) {
471 physnodes[nid].start = mi->blk[i].start;
472 physnodes[nid].end = mi->blk[i].end;
473 } else {
474 physnodes[nid].start = min(physnodes[nid].start,
475 mi->blk[i].start);
476 physnodes[nid].end = max(physnodes[nid].end,
477 mi->blk[i].end);
478 }
462 } 479 }
463 480
464 /* 481 /*
@@ -809,8 +826,6 @@ static int dummy_numa_init(void)
809 node_set(0, cpu_nodes_parsed); 826 node_set(0, cpu_nodes_parsed);
810 node_set(0, mem_nodes_parsed); 827 node_set(0, mem_nodes_parsed);
811 numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT); 828 numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT);
812 numa_nodes[0].start = 0;
813 numa_nodes[0].end = (u64)max_pfn << PAGE_SHIFT;
814 829
815 return 0; 830 return 0;
816} 831}
@@ -841,7 +856,6 @@ void __init initmem_init(void)
841 nodes_clear(node_possible_map); 856 nodes_clear(node_possible_map);
842 nodes_clear(node_online_map); 857 nodes_clear(node_online_map);
843 memset(&numa_meminfo, 0, sizeof(numa_meminfo)); 858 memset(&numa_meminfo, 0, sizeof(numa_meminfo));
844 memset(numa_nodes, 0, sizeof(numa_nodes));
845 remove_all_active_ranges(); 859 remove_all_active_ranges();
846 860
847 if (numa_init[i]() < 0) 861 if (numa_init[i]() < 0)
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 51d07338d2e4..e8b3b3cb2c2b 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -37,13 +37,9 @@ static __init int setup_node(int pxm)
37 37
38static __init void bad_srat(void) 38static __init void bad_srat(void)
39{ 39{
40 int i;
41 printk(KERN_ERR "SRAT: SRAT not used.\n"); 40 printk(KERN_ERR "SRAT: SRAT not used.\n");
42 acpi_numa = -1; 41 acpi_numa = -1;
43 for (i = 0; i < MAX_NUMNODES; i++) { 42 memset(nodes_add, 0, sizeof(nodes_add));
44 numa_nodes[i].start = numa_nodes[i].end = 0;
45 nodes_add[i].start = nodes_add[i].end = 0;
46 }
47} 43}
48 44
49static __init inline int srat_disabled(void) 45static __init inline int srat_disabled(void)
@@ -210,7 +206,6 @@ update_nodes_add(int node, unsigned long start, unsigned long end)
210void __init 206void __init
211acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) 207acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
212{ 208{
213 struct bootnode *nd;
214 unsigned long start, end; 209 unsigned long start, end;
215 int node, pxm; 210 int node, pxm;
216 211
@@ -243,18 +238,9 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
243 printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm, 238 printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
244 start, end); 239 start, end);
245 240
246 if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)) { 241 if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE))
247 nd = &numa_nodes[node]; 242 node_set(node, mem_nodes_parsed);
248 if (!node_test_and_set(node, mem_nodes_parsed)) { 243 else
249 nd->start = start;
250 nd->end = end;
251 } else {
252 if (start < nd->start)
253 nd->start = start;
254 if (nd->end < end)
255 nd->end = end;
256 }
257 } else
258 update_nodes_add(node, start, end); 244 update_nodes_add(node, start, end);
259} 245}
260 246