aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Rientjes <rientjes@google.com>2010-12-22 20:23:51 -0500
committerH. Peter Anvin <hpa@linux.intel.com>2010-12-23 18:27:14 -0500
commitf51bf3073a145a5b3263fd882c52d6ec04b687da (patch)
treec9e07ebc957cbb768c4e750b1a67640e0ad81118
parent4e76f4e67a106ed827ca721b4c8b622047cd2f6d (diff)
x86, numa: Fake apicid and pxm mappings for NUMA emulation
This patch adds the equivalent of acpi_fake_nodes() for AMD Northbridge platforms. The goal is to fake the apicid-to-node mappings for NUMA emulation so the physical topology of the machine is correctly maintained within the kernel. This change also fakes proximity domains for both ACPI and k8 code so the physical distance between emulated nodes is maintained via node_distance(). This exports the correct distances via /sys/devices/system/node/.../distance based on the underlying topology. A new helper function, fake_physnodes(), is introduced to correctly invoke the correct NUMA code to fake these two mappings based on the system type. If there is no underlying NUMA configuration, all cpus are mapped to node 0 for local distance. Since acpi_fake_nodes() is no longer called with CONFIG_ACPI_NUMA, it's prototype can be removed from the header file for such a configuration. Signed-off-by: David Rientjes <rientjes@google.com> LKML-Reference: <alpine.DEB.2.00.1012221701360.3701@chino.kir.corp.google.com> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
-rw-r--r--arch/x86/include/asm/acpi.h5
-rw-r--r--arch/x86/include/asm/amd_nb.h1
-rw-r--r--arch/x86/mm/amdtopology_64.c91
-rw-r--r--arch/x86/mm/numa_64.c20
-rw-r--r--arch/x86/mm/srat_64.c2
5 files changed, 95 insertions, 24 deletions
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index b326fa99db57..8288daf72dc9 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -193,11 +193,6 @@ extern int acpi_scan_nodes(unsigned long start, unsigned long end);
193extern void acpi_fake_nodes(const struct bootnode *fake_nodes, 193extern void acpi_fake_nodes(const struct bootnode *fake_nodes,
194 int num_nodes); 194 int num_nodes);
195#endif 195#endif
196#else
197static inline void acpi_fake_nodes(const struct bootnode *fake_nodes,
198 int num_nodes)
199{
200}
201#endif /* CONFIG_ACPI_NUMA */ 196#endif /* CONFIG_ACPI_NUMA */
202 197
203#define acpi_unlazy_tlb(x) leave_mm(x) 198#define acpi_unlazy_tlb(x) leave_mm(x)
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 9c16cde63f04..8f6192c1592c 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -13,6 +13,7 @@ extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn);
13extern int amd_scan_nodes(void); 13extern int amd_scan_nodes(void);
14 14
15#ifdef CONFIG_NUMA_EMU 15#ifdef CONFIG_NUMA_EMU
16extern void amd_fake_nodes(const struct bootnode *nodes, int nr_nodes);
16extern int amd_get_nodes(struct bootnode *nodes); 17extern int amd_get_nodes(struct bootnode *nodes);
17#endif 18#endif
18 19
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c
index fe050af614e2..eb5cbb97b68d 100644
--- a/arch/x86/mm/amdtopology_64.c
+++ b/arch/x86/mm/amdtopology_64.c
@@ -27,6 +27,7 @@
27#include <asm/amd_nb.h> 27#include <asm/amd_nb.h>
28 28
29static struct bootnode __initdata nodes[8]; 29static struct bootnode __initdata nodes[8];
30static unsigned char __initdata nodeids[8];
30static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE; 31static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE;
31 32
32static __init int find_northbridge(void) 33static __init int find_northbridge(void)
@@ -69,21 +70,6 @@ static __init void early_get_boot_cpu_id(void)
69 early_init_lapic_mapping(); 70 early_init_lapic_mapping();
70} 71}
71 72
72#ifdef CONFIG_NUMA_EMU
73int __init amd_get_nodes(struct bootnode *physnodes)
74{
75 int i;
76 int ret = 0;
77
78 for_each_node_mask(i, nodes_parsed) {
79 physnodes[ret].start = nodes[i].start;
80 physnodes[ret].end = nodes[i].end;
81 ret++;
82 }
83 return ret;
84}
85#endif /* CONFIG_NUMA_EMU */
86
87int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn) 73int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
88{ 74{
89 unsigned long start = PFN_PHYS(start_pfn); 75 unsigned long start = PFN_PHYS(start_pfn);
@@ -116,7 +102,7 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
116 base = read_pci_config(0, nb, 1, 0x40 + i*8); 102 base = read_pci_config(0, nb, 1, 0x40 + i*8);
117 limit = read_pci_config(0, nb, 1, 0x44 + i*8); 103 limit = read_pci_config(0, nb, 1, 0x44 + i*8);
118 104
119 nodeid = limit & 7; 105 nodeids[i] = nodeid = limit & 7;
120 if ((base & 3) == 0) { 106 if ((base & 3) == 0) {
121 if (i < numnodes) 107 if (i < numnodes)
122 pr_info("Skipping disabled node %d\n", i); 108 pr_info("Skipping disabled node %d\n", i);
@@ -196,6 +182,79 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
196 return 0; 182 return 0;
197} 183}
198 184
185#ifdef CONFIG_NUMA_EMU
186static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
187 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
188};
189
190int __init amd_get_nodes(struct bootnode *physnodes)
191{
192 int i;
193 int ret = 0;
194
195 for_each_node_mask(i, nodes_parsed) {
196 physnodes[ret].start = nodes[i].start;
197 physnodes[ret].end = nodes[i].end;
198 ret++;
199 }
200 return ret;
201}
202
203static int __init find_node_by_addr(unsigned long addr)
204{
205 int ret = NUMA_NO_NODE;
206 int i;
207
208 for (i = 0; i < 8; i++)
209 if (addr >= nodes[i].start && addr < nodes[i].end) {
210 ret = i;
211 break;
212 }
213 return ret;
214}
215
216/*
217 * For NUMA emulation, fake proximity domain (_PXM) to node id mappings must be
218 * setup to represent the physical topology but reflect the emulated
219 * environment. For each emulated node, the real node which it appears on is
220 * found and a fake pxm to nid mapping is created which mirrors the actual
221 * locality. node_distance() then represents the correct distances between
222 * emulated nodes by using the fake acpi mappings to pxms.
223 */
224void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes)
225{
226 unsigned int bits;
227 unsigned int cores;
228 unsigned int apicid_base = 0;
229 int i;
230
231 bits = boot_cpu_data.x86_coreid_bits;
232 cores = 1 << bits;
233 early_get_boot_cpu_id();
234 if (boot_cpu_physical_apicid > 0)
235 apicid_base = boot_cpu_physical_apicid;
236
237 for (i = 0; i < nr_nodes; i++) {
238 int index;
239 int nid;
240 int j;
241
242 nid = find_node_by_addr(nodes[i].start);
243 if (nid == NUMA_NO_NODE)
244 continue;
245
246 index = nodeids[nid] << bits;
247 if (fake_apicid_to_node[index + apicid_base] == NUMA_NO_NODE)
248 for (j = apicid_base; j < cores + apicid_base; j++)
249 fake_apicid_to_node[index + j] = i;
250#ifdef CONFIG_ACPI_NUMA
251 __acpi_map_pxm_to_node(nid, i);
252#endif
253 }
254 memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
255}
256#endif /* CONFIG_NUMA_EMU */
257
199int __init amd_scan_nodes(void) 258int __init amd_scan_nodes(void)
200{ 259{
201 unsigned int bits; 260 unsigned int bits;
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 7762a517d69d..cc390f3a1bde 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -324,6 +324,24 @@ static int __init setup_physnodes(unsigned long start, unsigned long end,
324 return ret; 324 return ret;
325} 325}
326 326
327static void __init fake_physnodes(int acpi, int amd, int nr_nodes)
328{
329 int i;
330
331 BUG_ON(acpi && amd);
332#ifdef CONFIG_ACPI_NUMA
333 if (acpi)
334 acpi_fake_nodes(nodes, nr_nodes);
335#endif
336#ifdef CONFIG_AMD_NUMA
337 if (amd)
338 amd_fake_nodes(nodes, nr_nodes);
339#endif
340 if (!acpi && !amd)
341 for (i = 0; i < nr_cpu_ids; i++)
342 numa_set_node(i, 0);
343}
344
327/* 345/*
328 * Setups up nid to range from addr to addr + size. If the end 346 * Setups up nid to range from addr to addr + size. If the end
329 * boundary is greater than max_addr, then max_addr is used instead. 347 * boundary is greater than max_addr, then max_addr is used instead.
@@ -595,7 +613,7 @@ static int __init numa_emulation(unsigned long start_pfn,
595 nodes[i].end >> PAGE_SHIFT); 613 nodes[i].end >> PAGE_SHIFT);
596 setup_node_bootmem(i, nodes[i].start, nodes[i].end); 614 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
597 } 615 }
598 acpi_fake_nodes(nodes, num_nodes); 616 fake_physnodes(acpi, amd, num_nodes);
599 numa_init_array(); 617 numa_init_array();
600 return 0; 618 return 0;
601} 619}
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 8241bf0f6eb2..c48b443706c5 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -497,8 +497,6 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
497{ 497{
498 int i, j; 498 int i, j;
499 499
500 printk(KERN_INFO "Faking PXM affinity for fake nodes on real "
501 "topology.\n");
502 for (i = 0; i < num_nodes; i++) { 500 for (i = 0; i < num_nodes; i++) {
503 int nid, pxm; 501 int nid, pxm;
504 502