diff options
author | Tejun Heo <tj@kernel.org> | 2011-02-16 11:11:09 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2011-02-16 11:11:09 -0500 |
commit | 91556237ec872e1029e3036174bae3b1a8df65eb (patch) | |
tree | 967d5fe1fedfd372f4af7e7cf79e54d074b1ee0f /arch/x86 | |
parent | a844ef46fa3055165c28feede6114a711b8375ad (diff) |
x86-64, NUMA: Kill numa_nodes[]
numa_nodes[] doesn't carry any information which isn't present in
numa_meminfo. Each entry is simply min/max range of all the memblks
for the node. This is not only redundant but also inaccurate when
memblks for different nodes interleave - for example,
find_node_by_addr() can return the wrong nodeid.
Kill numa_nodes[] and always use numa_meminfo instead.
* nodes_cover_memory() is renamed to numa_meminfo_cover_memory() and
now operations on numa_meminfo and returns bool.
* setup_node_bootmem() needs min/max range. Compute the range on the
fly. setup_node_bootmem() invocation is restructured to use outer
loop instead of hardcoding the double invocations.
* find_node_by_addr() now operates on numa_meminfo.
* setup_physnodes() builds physnodes[] from memblks. This will go
away when emulation code is updated to use struct numa_meminfo.
This patch also makes the following misc changes.
* Clearing of nodes_add[] clearing is converted to memset().
* numa_add_memblk() in amd_numa_init() is moved down a bit for
consistency.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Shaohui Zheng <shaohui.zheng@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/numa_64.h | 1 | ||||
-rw-r--r-- | arch/x86/mm/amdtopology_64.c | 6 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 82 | ||||
-rw-r--r-- | arch/x86/mm/srat_64.c | 22 |
4 files changed, 53 insertions, 58 deletions
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h index 925ade9d67e4..20b69a98f37d 100644 --- a/arch/x86/include/asm/numa_64.h +++ b/arch/x86/include/asm/numa_64.h | |||
@@ -26,7 +26,6 @@ extern void setup_node_bootmem(int nodeid, unsigned long start, | |||
26 | 26 | ||
27 | extern nodemask_t cpu_nodes_parsed __initdata; | 27 | extern nodemask_t cpu_nodes_parsed __initdata; |
28 | extern nodemask_t mem_nodes_parsed __initdata; | 28 | extern nodemask_t mem_nodes_parsed __initdata; |
29 | extern struct bootnode numa_nodes[MAX_NUMNODES] __initdata; | ||
30 | 29 | ||
31 | extern int __cpuinit numa_cpu_node(int cpu); | 30 | extern int __cpuinit numa_cpu_node(int cpu); |
32 | extern int __init numa_add_memblk(int nodeid, u64 start, u64 end); | 31 | extern int __init numa_add_memblk(int nodeid, u64 start, u64 end); |
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c index 8f7a5eb4bd3c..0cb59e582007 100644 --- a/arch/x86/mm/amdtopology_64.c +++ b/arch/x86/mm/amdtopology_64.c | |||
@@ -165,12 +165,8 @@ int __init amd_numa_init(void) | |||
165 | pr_info("Node %d MemBase %016lx Limit %016lx\n", | 165 | pr_info("Node %d MemBase %016lx Limit %016lx\n", |
166 | nodeid, base, limit); | 166 | nodeid, base, limit); |
167 | 167 | ||
168 | numa_nodes[nodeid].start = base; | ||
169 | numa_nodes[nodeid].end = limit; | ||
170 | numa_add_memblk(nodeid, base, limit); | ||
171 | |||
172 | prevbase = base; | 168 | prevbase = base; |
173 | 169 | numa_add_memblk(nodeid, base, limit); | |
174 | node_set(nodeid, mem_nodes_parsed); | 170 | node_set(nodeid, mem_nodes_parsed); |
175 | node_set(nodeid, cpu_nodes_parsed); | 171 | node_set(nodeid, cpu_nodes_parsed); |
176 | } | 172 | } |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 681bc0d59db5..c490448d716a 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -46,8 +46,6 @@ static unsigned long __initdata nodemap_size; | |||
46 | 46 | ||
47 | static struct numa_meminfo numa_meminfo __initdata; | 47 | static struct numa_meminfo numa_meminfo __initdata; |
48 | 48 | ||
49 | struct bootnode numa_nodes[MAX_NUMNODES] __initdata; | ||
50 | |||
51 | /* | 49 | /* |
52 | * Given a shift value, try to populate memnodemap[] | 50 | * Given a shift value, try to populate memnodemap[] |
53 | * Returns : | 51 | * Returns : |
@@ -349,17 +347,17 @@ static int __init numa_cleanup_meminfo(struct numa_meminfo *mi) | |||
349 | * Sanity check to catch more bad NUMA configurations (they are amazingly | 347 | * Sanity check to catch more bad NUMA configurations (they are amazingly |
350 | * common). Make sure the nodes cover all memory. | 348 | * common). Make sure the nodes cover all memory. |
351 | */ | 349 | */ |
352 | static int __init nodes_cover_memory(const struct bootnode *nodes) | 350 | static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) |
353 | { | 351 | { |
354 | unsigned long numaram, e820ram; | 352 | unsigned long numaram, e820ram; |
355 | int i; | 353 | int i; |
356 | 354 | ||
357 | numaram = 0; | 355 | numaram = 0; |
358 | for_each_node_mask(i, mem_nodes_parsed) { | 356 | for (i = 0; i < mi->nr_blks; i++) { |
359 | unsigned long s = nodes[i].start >> PAGE_SHIFT; | 357 | unsigned long s = mi->blk[i].start >> PAGE_SHIFT; |
360 | unsigned long e = nodes[i].end >> PAGE_SHIFT; | 358 | unsigned long e = mi->blk[i].end >> PAGE_SHIFT; |
361 | numaram += e - s; | 359 | numaram += e - s; |
362 | numaram -= __absent_pages_in_range(i, s, e); | 360 | numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e); |
363 | if ((long)numaram < 0) | 361 | if ((long)numaram < 0) |
364 | numaram = 0; | 362 | numaram = 0; |
365 | } | 363 | } |
@@ -371,14 +369,14 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) | |||
371 | printk(KERN_ERR "NUMA: nodes only cover %luMB of your %luMB e820 RAM. Not used.\n", | 369 | printk(KERN_ERR "NUMA: nodes only cover %luMB of your %luMB e820 RAM. Not used.\n", |
372 | (numaram << PAGE_SHIFT) >> 20, | 370 | (numaram << PAGE_SHIFT) >> 20, |
373 | (e820ram << PAGE_SHIFT) >> 20); | 371 | (e820ram << PAGE_SHIFT) >> 20); |
374 | return 0; | 372 | return false; |
375 | } | 373 | } |
376 | return 1; | 374 | return true; |
377 | } | 375 | } |
378 | 376 | ||
379 | static int __init numa_register_memblks(struct numa_meminfo *mi) | 377 | static int __init numa_register_memblks(struct numa_meminfo *mi) |
380 | { | 378 | { |
381 | int i; | 379 | int i, j, nid; |
382 | 380 | ||
383 | /* Account for nodes with cpus and no memory */ | 381 | /* Account for nodes with cpus and no memory */ |
384 | nodes_or(node_possible_map, mem_nodes_parsed, cpu_nodes_parsed); | 382 | nodes_or(node_possible_map, mem_nodes_parsed, cpu_nodes_parsed); |
@@ -398,23 +396,34 @@ static int __init numa_register_memblks(struct numa_meminfo *mi) | |||
398 | 396 | ||
399 | /* for out of order entries */ | 397 | /* for out of order entries */ |
400 | sort_node_map(); | 398 | sort_node_map(); |
401 | if (!nodes_cover_memory(numa_nodes)) | 399 | if (!numa_meminfo_cover_memory(mi)) |
402 | return -EINVAL; | 400 | return -EINVAL; |
403 | 401 | ||
404 | init_memory_mapping_high(); | 402 | init_memory_mapping_high(); |
405 | 403 | ||
406 | /* Finally register nodes. */ | ||
407 | for_each_node_mask(i, node_possible_map) | ||
408 | setup_node_bootmem(i, numa_nodes[i].start, numa_nodes[i].end); | ||
409 | |||
410 | /* | 404 | /* |
411 | * Try again in case setup_node_bootmem missed one due to missing | 405 | * Finally register nodes. Do it twice in case setup_node_bootmem |
412 | * bootmem. | 406 | * missed one due to missing bootmem. |
413 | */ | 407 | */ |
414 | for_each_node_mask(i, node_possible_map) | 408 | for (i = 0; i < 2; i++) { |
415 | if (!node_online(i)) | 409 | for_each_node_mask(nid, node_possible_map) { |
416 | setup_node_bootmem(i, numa_nodes[i].start, | 410 | u64 start = (u64)max_pfn << PAGE_SHIFT; |
417 | numa_nodes[i].end); | 411 | u64 end = 0; |
412 | |||
413 | if (node_online(nid)) | ||
414 | continue; | ||
415 | |||
416 | for (j = 0; j < mi->nr_blks; j++) { | ||
417 | if (nid != mi->blk[j].nid) | ||
418 | continue; | ||
419 | start = min(mi->blk[j].start, start); | ||
420 | end = max(mi->blk[j].end, end); | ||
421 | } | ||
422 | |||
423 | if (start < end) | ||
424 | setup_node_bootmem(nid, start, end); | ||
425 | } | ||
426 | } | ||
418 | 427 | ||
419 | return 0; | 428 | return 0; |
420 | } | 429 | } |
@@ -432,33 +441,41 @@ void __init numa_emu_cmdline(char *str) | |||
432 | 441 | ||
433 | int __init find_node_by_addr(unsigned long addr) | 442 | int __init find_node_by_addr(unsigned long addr) |
434 | { | 443 | { |
435 | int ret = NUMA_NO_NODE; | 444 | const struct numa_meminfo *mi = &numa_meminfo; |
436 | int i; | 445 | int i; |
437 | 446 | ||
438 | for_each_node_mask(i, mem_nodes_parsed) { | 447 | for (i = 0; i < mi->nr_blks; i++) { |
439 | /* | 448 | /* |
440 | * Find the real node that this emulated node appears on. For | 449 | * Find the real node that this emulated node appears on. For |
441 | * the sake of simplicity, we only use a real node's starting | 450 | * the sake of simplicity, we only use a real node's starting |
442 | * address to determine which emulated node it appears on. | 451 | * address to determine which emulated node it appears on. |
443 | */ | 452 | */ |
444 | if (addr >= numa_nodes[i].start && addr < numa_nodes[i].end) { | 453 | if (addr >= mi->blk[i].start && addr < mi->blk[i].end) |
445 | ret = i; | 454 | return mi->blk[i].nid; |
446 | break; | ||
447 | } | ||
448 | } | 455 | } |
449 | return ret; | 456 | return NUMA_NO_NODE; |
450 | } | 457 | } |
451 | 458 | ||
452 | static int __init setup_physnodes(unsigned long start, unsigned long end) | 459 | static int __init setup_physnodes(unsigned long start, unsigned long end) |
453 | { | 460 | { |
461 | const struct numa_meminfo *mi = &numa_meminfo; | ||
454 | int ret = 0; | 462 | int ret = 0; |
455 | int i; | 463 | int i; |
456 | 464 | ||
457 | memset(physnodes, 0, sizeof(physnodes)); | 465 | memset(physnodes, 0, sizeof(physnodes)); |
458 | 466 | ||
459 | for_each_node_mask(i, mem_nodes_parsed) { | 467 | for (i = 0; i < mi->nr_blks; i++) { |
460 | physnodes[i].start = numa_nodes[i].start; | 468 | int nid = mi->blk[i].nid; |
461 | physnodes[i].end = numa_nodes[i].end; | 469 | |
470 | if (physnodes[nid].start == physnodes[nid].end) { | ||
471 | physnodes[nid].start = mi->blk[i].start; | ||
472 | physnodes[nid].end = mi->blk[i].end; | ||
473 | } else { | ||
474 | physnodes[nid].start = min(physnodes[nid].start, | ||
475 | mi->blk[i].start); | ||
476 | physnodes[nid].end = max(physnodes[nid].end, | ||
477 | mi->blk[i].end); | ||
478 | } | ||
462 | } | 479 | } |
463 | 480 | ||
464 | /* | 481 | /* |
@@ -809,8 +826,6 @@ static int dummy_numa_init(void) | |||
809 | node_set(0, cpu_nodes_parsed); | 826 | node_set(0, cpu_nodes_parsed); |
810 | node_set(0, mem_nodes_parsed); | 827 | node_set(0, mem_nodes_parsed); |
811 | numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT); | 828 | numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT); |
812 | numa_nodes[0].start = 0; | ||
813 | numa_nodes[0].end = (u64)max_pfn << PAGE_SHIFT; | ||
814 | 829 | ||
815 | return 0; | 830 | return 0; |
816 | } | 831 | } |
@@ -841,7 +856,6 @@ void __init initmem_init(void) | |||
841 | nodes_clear(node_possible_map); | 856 | nodes_clear(node_possible_map); |
842 | nodes_clear(node_online_map); | 857 | nodes_clear(node_online_map); |
843 | memset(&numa_meminfo, 0, sizeof(numa_meminfo)); | 858 | memset(&numa_meminfo, 0, sizeof(numa_meminfo)); |
844 | memset(numa_nodes, 0, sizeof(numa_nodes)); | ||
845 | remove_all_active_ranges(); | 859 | remove_all_active_ranges(); |
846 | 860 | ||
847 | if (numa_init[i]() < 0) | 861 | if (numa_init[i]() < 0) |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 51d07338d2e4..e8b3b3cb2c2b 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -37,13 +37,9 @@ static __init int setup_node(int pxm) | |||
37 | 37 | ||
38 | static __init void bad_srat(void) | 38 | static __init void bad_srat(void) |
39 | { | 39 | { |
40 | int i; | ||
41 | printk(KERN_ERR "SRAT: SRAT not used.\n"); | 40 | printk(KERN_ERR "SRAT: SRAT not used.\n"); |
42 | acpi_numa = -1; | 41 | acpi_numa = -1; |
43 | for (i = 0; i < MAX_NUMNODES; i++) { | 42 | memset(nodes_add, 0, sizeof(nodes_add)); |
44 | numa_nodes[i].start = numa_nodes[i].end = 0; | ||
45 | nodes_add[i].start = nodes_add[i].end = 0; | ||
46 | } | ||
47 | } | 43 | } |
48 | 44 | ||
49 | static __init inline int srat_disabled(void) | 45 | static __init inline int srat_disabled(void) |
@@ -210,7 +206,6 @@ update_nodes_add(int node, unsigned long start, unsigned long end) | |||
210 | void __init | 206 | void __init |
211 | acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | 207 | acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) |
212 | { | 208 | { |
213 | struct bootnode *nd; | ||
214 | unsigned long start, end; | 209 | unsigned long start, end; |
215 | int node, pxm; | 210 | int node, pxm; |
216 | 211 | ||
@@ -243,18 +238,9 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | |||
243 | printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm, | 238 | printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm, |
244 | start, end); | 239 | start, end); |
245 | 240 | ||
246 | if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)) { | 241 | if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)) |
247 | nd = &numa_nodes[node]; | 242 | node_set(node, mem_nodes_parsed); |
248 | if (!node_test_and_set(node, mem_nodes_parsed)) { | 243 | else |
249 | nd->start = start; | ||
250 | nd->end = end; | ||
251 | } else { | ||
252 | if (start < nd->start) | ||
253 | nd->start = start; | ||
254 | if (nd->end < end) | ||
255 | nd->end = end; | ||
256 | } | ||
257 | } else | ||
258 | update_nodes_add(node, start, end); | 244 | update_nodes_add(node, start, end); |
259 | } | 245 | } |
260 | 246 | ||