aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Rientjes <rientjes@google.com>2007-07-21 11:10:32 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-07-21 21:37:10 -0400
commit3484d79813707bb6045773953a809abba443dc20 (patch)
treea986c674698face8fc51132a1feeac53fa7946a2
parent3af044e0f832cfa3fcdce14dc30678b79dd36995 (diff)
x86_64: fake pxm-to-node mapping for fake numa
For NUMA emulation, our SLIT should represent the true NUMA topology of the system but our proximity domain to node ID mapping needs to reflect the emulated state. When NUMA emulation has successfully setup fake nodes on the system, a new function, acpi_fake_nodes() is called. This function determines the proximity domain (_PXM) for each true node found on the system. It then finds which emulated nodes have been allocated on this true node as determined by its starting address. The node ID to PXM mapping is changed so that each fake node ID points to the PXM of the true node that it is located on. If the machine failed to register a SLIT, then we assume there is no special requirement for emulated node affinity so we use the default LOCAL_DISTANCE, which is newly exported to this code, as our measurement if the emulated nodes appear in the same PXM. Otherwise, we use REMOTE_DISTANCE. PXM_INVAL and NID_INVAL are also exported to the ACPI header file so that we can compare node_to_pxm() results in generic code (in this case, the SRAT code). Cc: Len Brown <lenb@kernel.org> Signed-off-by: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/x86_64/mm/numa.c1
-rw-r--r--arch/x86_64/mm/srat.c76
-rw-r--r--drivers/acpi/numa.c11
-rw-r--r--include/acpi/acpi_numa.h1
-rw-r--r--include/asm-x86_64/acpi.h11
-rw-r--r--include/linux/acpi.h3
6 files changed, 96 insertions, 7 deletions
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
index 51548947ad3b..30bf8043984d 100644
--- a/arch/x86_64/mm/numa.c
+++ b/arch/x86_64/mm/numa.c
@@ -484,6 +484,7 @@ out:
484 nodes[i].end >> PAGE_SHIFT); 484 nodes[i].end >> PAGE_SHIFT);
485 setup_node_bootmem(i, nodes[i].start, nodes[i].end); 485 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
486 } 486 }
487 acpi_fake_nodes(nodes, num_nodes);
487 numa_init_array(); 488 numa_init_array();
488 return 0; 489 return 0;
489} 490}
diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c
index 0e0725db20b7..7ac8ff333e84 100644
--- a/arch/x86_64/mm/srat.c
+++ b/arch/x86_64/mm/srat.c
@@ -350,7 +350,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
350 350
351/* Sanity check to catch more bad SRATs (they are amazingly common). 351/* Sanity check to catch more bad SRATs (they are amazingly common).
352 Make sure the PXMs cover all memory. */ 352 Make sure the PXMs cover all memory. */
353static int nodes_cover_memory(void) 353static int __init nodes_cover_memory(const struct bootnode *nodes)
354{ 354{
355 int i; 355 int i;
356 unsigned long pxmram, e820ram; 356 unsigned long pxmram, e820ram;
@@ -406,7 +406,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
406 } 406 }
407 } 407 }
408 408
409 if (!nodes_cover_memory()) { 409 if (!nodes_cover_memory(nodes)) {
410 bad_srat(); 410 bad_srat();
411 return -1; 411 return -1;
412 } 412 }
@@ -440,6 +440,75 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
440 return 0; 440 return 0;
441} 441}
442 442
443#ifdef CONFIG_NUMA_EMU
444static int __init find_node_by_addr(unsigned long addr)
445{
446 int ret = NUMA_NO_NODE;
447 int i;
448
449 for_each_node_mask(i, nodes_parsed) {
450 /*
451 * Find the real node that this emulated node appears on. For
452 * the sake of simplicity, we only use a real node's starting
453 * address to determine which emulated node it appears on.
454 */
455 if (addr >= nodes[i].start && addr < nodes[i].end) {
456 ret = i;
457 break;
458 }
459 }
460 return i;
461}
462
463/*
464 * In NUMA emulation, we need to setup proximity domain (_PXM) to node ID
465 * mappings that respect the real ACPI topology but reflect our emulated
466 * environment. For each emulated node, we find which real node it appears on
467 * and create PXM to NID mappings for those fake nodes which mirror that
468 * locality. SLIT will now represent the correct distances between emulated
469 * nodes as a result of the real topology.
470 */
471void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
472{
473 int i;
474 int fake_node_to_pxm_map[MAX_NUMNODES] = {
475 [0 ... MAX_NUMNODES-1] = PXM_INVAL
476 };
477
478 printk(KERN_INFO "Faking PXM affinity for fake nodes on real "
479 "topology.\n");
480 for (i = 0; i < num_nodes; i++) {
481 int nid, pxm;
482
483 nid = find_node_by_addr(fake_nodes[i].start);
484 if (nid == NUMA_NO_NODE)
485 continue;
486 pxm = node_to_pxm(nid);
487 if (pxm == PXM_INVAL)
488 continue;
489 fake_node_to_pxm_map[i] = pxm;
490 }
491 for (i = 0; i < num_nodes; i++)
492 __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
493
494 nodes_clear(nodes_parsed);
495 for (i = 0; i < num_nodes; i++)
496 if (fake_nodes[i].start != fake_nodes[i].end)
497 node_set(i, nodes_parsed);
498 WARN_ON(!nodes_cover_memory(fake_nodes));
499}
500
501static int null_slit_node_compare(int a, int b)
502{
503 return node_to_pxm(a) == node_to_pxm(b);
504}
505#else
506static int null_slit_node_compare(int a, int b)
507{
508 return a == b;
509}
510#endif /* CONFIG_NUMA_EMU */
511
443void __init srat_reserve_add_area(int nodeid) 512void __init srat_reserve_add_area(int nodeid)
444{ 513{
445 if (found_add_area && nodes_add[nodeid].end) { 514 if (found_add_area && nodes_add[nodeid].end) {
@@ -464,7 +533,8 @@ int __node_distance(int a, int b)
464 int index; 533 int index;
465 534
466 if (!acpi_slit) 535 if (!acpi_slit)
467 return a == b ? LOCAL_DISTANCE : REMOTE_DISTANCE; 536 return null_slit_node_compare(a, b) ? LOCAL_DISTANCE :
537 REMOTE_DISTANCE;
468 index = acpi_slit->locality_count * node_to_pxm(a); 538 index = acpi_slit->locality_count * node_to_pxm(a);
469 return acpi_slit->entry[index + node_to_pxm(b)]; 539 return acpi_slit->entry[index + node_to_pxm(b)];
470} 540}
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index 6c44b522f4d3..ab04d848b19d 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -36,8 +36,6 @@
36ACPI_MODULE_NAME("numa"); 36ACPI_MODULE_NAME("numa");
37 37
38static nodemask_t nodes_found_map = NODE_MASK_NONE; 38static nodemask_t nodes_found_map = NODE_MASK_NONE;
39#define PXM_INVAL -1
40#define NID_INVAL -1
41 39
42/* maps to convert between proximity domain and logical node ID */ 40/* maps to convert between proximity domain and logical node ID */
43static int __cpuinitdata pxm_to_node_map[MAX_PXM_DOMAINS] 41static int __cpuinitdata pxm_to_node_map[MAX_PXM_DOMAINS]
@@ -59,6 +57,12 @@ int node_to_pxm(int node)
59 return node_to_pxm_map[node]; 57 return node_to_pxm_map[node];
60} 58}
61 59
60void __acpi_map_pxm_to_node(int pxm, int node)
61{
62 pxm_to_node_map[pxm] = node;
63 node_to_pxm_map[node] = pxm;
64}
65
62int acpi_map_pxm_to_node(int pxm) 66int acpi_map_pxm_to_node(int pxm)
63{ 67{
64 int node = pxm_to_node_map[pxm]; 68 int node = pxm_to_node_map[pxm];
@@ -67,8 +71,7 @@ int acpi_map_pxm_to_node(int pxm)
67 if (nodes_weight(nodes_found_map) >= MAX_NUMNODES) 71 if (nodes_weight(nodes_found_map) >= MAX_NUMNODES)
68 return NID_INVAL; 72 return NID_INVAL;
69 node = first_unset_node(nodes_found_map); 73 node = first_unset_node(nodes_found_map);
70 pxm_to_node_map[pxm] = node; 74 __acpi_map_pxm_to_node(pxm, node);
71 node_to_pxm_map[node] = pxm;
72 node_set(node, nodes_found_map); 75 node_set(node, nodes_found_map);
73 } 76 }
74 77
diff --git a/include/acpi/acpi_numa.h b/include/acpi/acpi_numa.h
index e2fcee2b340d..62c5ee4311da 100644
--- a/include/acpi/acpi_numa.h
+++ b/include/acpi/acpi_numa.h
@@ -13,6 +13,7 @@
13 13
14extern int pxm_to_node(int); 14extern int pxm_to_node(int);
15extern int node_to_pxm(int); 15extern int node_to_pxm(int);
16extern void __acpi_map_pxm_to_node(int, int);
16extern int acpi_map_pxm_to_node(int); 17extern int acpi_map_pxm_to_node(int);
17extern void __cpuinit acpi_unmap_pxm_to_node(int); 18extern void __cpuinit acpi_unmap_pxm_to_node(int);
18 19
diff --git a/include/asm-x86_64/acpi.h b/include/asm-x86_64/acpi.h
index a29f05087a31..1da8f49c0fe2 100644
--- a/include/asm-x86_64/acpi.h
+++ b/include/asm-x86_64/acpi.h
@@ -29,6 +29,7 @@
29#ifdef __KERNEL__ 29#ifdef __KERNEL__
30 30
31#include <acpi/pdc_intel.h> 31#include <acpi/pdc_intel.h>
32#include <asm/numa.h>
32 33
33#define COMPILER_DEPENDENT_INT64 long long 34#define COMPILER_DEPENDENT_INT64 long long
34#define COMPILER_DEPENDENT_UINT64 unsigned long long 35#define COMPILER_DEPENDENT_UINT64 unsigned long long
@@ -141,6 +142,16 @@ extern int acpi_pci_disabled;
141extern int acpi_skip_timer_override; 142extern int acpi_skip_timer_override;
142extern int acpi_use_timer_override; 143extern int acpi_use_timer_override;
143 144
145#ifdef CONFIG_ACPI_NUMA
146extern void __init acpi_fake_nodes(const struct bootnode *fake_nodes,
147 int num_nodes);
148#else
149static inline void acpi_fake_nodes(const struct bootnode *fake_nodes,
150 int num_nodes)
151{
152}
153#endif
154
144#endif /*__KERNEL__*/ 155#endif /*__KERNEL__*/
145 156
146#endif /*_ASM_ACPI_H*/ 157#endif /*_ASM_ACPI_H*/
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index e88b62e6b3aa..d5680cd7746a 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -231,6 +231,9 @@ extern int acpi_paddr_to_node(u64 start_addr, u64 size);
231 231
232extern int pnpacpi_disabled; 232extern int pnpacpi_disabled;
233 233
234#define PXM_INVAL (-1)
235#define NID_INVAL (-1)
236
234#else /* CONFIG_ACPI */ 237#else /* CONFIG_ACPI */
235 238
236static inline int acpi_boot_init(void) 239static inline int acpi_boot_init(void)