aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/acpi.h6
-rw-r--r--arch/x86/include/asm/amd_nb.h4
-rw-r--r--arch/x86/include/asm/numa_64.h1
-rw-r--r--arch/x86/mm/amdtopology_64.c38
-rw-r--r--arch/x86/mm/numa_64.c102
-rw-r--r--arch/x86/mm/srat_64.c65
6 files changed, 40 insertions, 176 deletions
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 9c9fe1b0bc4e..a37da6df07f9 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -186,12 +186,6 @@ struct bootnode;
186#ifdef CONFIG_ACPI_NUMA 186#ifdef CONFIG_ACPI_NUMA
187extern int acpi_numa; 187extern int acpi_numa;
188extern int x86_acpi_numa_init(void); 188extern int x86_acpi_numa_init(void);
189
190#ifdef CONFIG_NUMA_EMU
191extern void acpi_fake_nodes(const struct bootnode *fake_nodes,
192 int num_nodes);
193extern int acpi_emu_node_distance(int a, int b);
194#endif
195#endif /* CONFIG_ACPI_NUMA */ 189#endif /* CONFIG_ACPI_NUMA */
196 190
197#define acpi_unlazy_tlb(x) leave_mm(x) 191#define acpi_unlazy_tlb(x) leave_mm(x)
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 384d1188e787..e264ae5a1443 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -20,10 +20,6 @@ extern int amd_numa_init(void);
20extern int amd_get_subcaches(int); 20extern int amd_get_subcaches(int);
21extern int amd_set_subcaches(int, int); 21extern int amd_set_subcaches(int, int);
22 22
23#ifdef CONFIG_NUMA_EMU
24extern void amd_fake_nodes(const struct bootnode *nodes, int nr_nodes);
25#endif
26
27struct amd_northbridge { 23struct amd_northbridge {
28 struct pci_dev *misc; 24 struct pci_dev *misc;
29 struct pci_dev *link; 25 struct pci_dev *link;
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 5361c5947986..344eb1790b46 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -34,7 +34,6 @@ extern void __init numa_set_distance(int from, int to, int distance);
34#define FAKE_NODE_MIN_SIZE ((u64)32 << 20) 34#define FAKE_NODE_MIN_SIZE ((u64)32 << 20)
35#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) 35#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL))
36void numa_emu_cmdline(char *); 36void numa_emu_cmdline(char *);
37int __init find_node_by_addr(unsigned long addr);
38#endif /* CONFIG_NUMA_EMU */ 37#endif /* CONFIG_NUMA_EMU */
39#else 38#else
40static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; } 39static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; }
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c
index f37ea2fe85e6..0919c26820d4 100644
--- a/arch/x86/mm/amdtopology_64.c
+++ b/arch/x86/mm/amdtopology_64.c
@@ -194,41 +194,3 @@ int __init amd_numa_init(void)
194 194
195 return 0; 195 return 0;
196} 196}
197
198#ifdef CONFIG_NUMA_EMU
199/*
200 * For NUMA emulation, fake proximity domain (_PXM) to node id mappings must be
201 * setup to represent the physical topology but reflect the emulated
202 * environment. For each emulated node, the real node which it appears on is
203 * found and a fake pxm to nid mapping is created which mirrors the actual
204 * locality. node_distance() then represents the correct distances between
205 * emulated nodes by using the fake acpi mappings to pxms.
206 */
207void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes)
208{
209 unsigned int bits;
210 unsigned int cores;
211 unsigned int apicid_base = 0;
212 int i;
213
214 bits = boot_cpu_data.x86_coreid_bits;
215 cores = 1 << bits;
216 early_get_boot_cpu_id();
217 if (boot_cpu_physical_apicid > 0)
218 apicid_base = boot_cpu_physical_apicid;
219
220 for (i = 0; i < nr_nodes; i++) {
221 int index;
222 int nid;
223
224 nid = find_node_by_addr(nodes[i].start);
225 if (nid == NUMA_NO_NODE)
226 continue;
227
228 index = nodeids[nid] << bits;
229#ifdef CONFIG_ACPI_NUMA
230 __acpi_map_pxm_to_node(nid, i);
231#endif
232 }
233}
234#endif /* CONFIG_NUMA_EMU */
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 722039e0948f..8ce617735900 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -48,10 +48,6 @@ static struct numa_meminfo numa_meminfo __initdata;
48static int numa_distance_cnt; 48static int numa_distance_cnt;
49static u8 *numa_distance; 49static u8 *numa_distance;
50 50
51#ifdef CONFIG_NUMA_EMU
52static bool numa_emu_dist;
53#endif
54
55/* 51/*
56 * Given a shift value, try to populate memnodemap[] 52 * Given a shift value, try to populate memnodemap[]
57 * Returns : 53 * Returns :
@@ -443,10 +439,6 @@ void __init numa_set_distance(int from, int to, int distance)
443 439
444int __node_distance(int from, int to) 440int __node_distance(int from, int to)
445{ 441{
446#if defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA_EMU)
447 if (numa_emu_dist)
448 return acpi_emu_node_distance(from, to);
449#endif
450 if (from >= numa_distance_cnt || to >= numa_distance_cnt) 442 if (from >= numa_distance_cnt || to >= numa_distance_cnt)
451 return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE; 443 return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;
452 return numa_distance[from * numa_distance_cnt + to]; 444 return numa_distance[from * numa_distance_cnt + to];
@@ -559,56 +551,6 @@ static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi)
559 return -ENOENT; 551 return -ENOENT;
560} 552}
561 553
562int __init find_node_by_addr(unsigned long addr)
563{
564 const struct numa_meminfo *mi = &numa_meminfo;
565 int i;
566
567 for (i = 0; i < mi->nr_blks; i++) {
568 /*
569 * Find the real node that this emulated node appears on. For
570 * the sake of simplicity, we only use a real node's starting
571 * address to determine which emulated node it appears on.
572 */
573 if (addr >= mi->blk[i].start && addr < mi->blk[i].end)
574 return mi->blk[i].nid;
575 }
576 return NUMA_NO_NODE;
577}
578
579static void __init fake_physnodes(int acpi, int amd,
580 const struct numa_meminfo *ei)
581{
582 static struct bootnode nodes[MAX_NUMNODES] __initdata;
583 int i, nr_nodes = 0;
584
585 for (i = 0; i < ei->nr_blks; i++) {
586 int nid = ei->blk[i].nid;
587
588 if (nodes[nid].start == nodes[nid].end) {
589 nodes[nid].start = ei->blk[i].start;
590 nodes[nid].end = ei->blk[i].end;
591 nr_nodes++;
592 } else {
593 nodes[nid].start = min(ei->blk[i].start, nodes[nid].start);
594 nodes[nid].end = max(ei->blk[i].end, nodes[nid].end);
595 }
596 }
597
598 BUG_ON(acpi && amd);
599#ifdef CONFIG_ACPI_NUMA
600 if (acpi)
601 acpi_fake_nodes(nodes, nr_nodes);
602#endif
603#ifdef CONFIG_AMD_NUMA
604 if (amd)
605 amd_fake_nodes(nodes, nr_nodes);
606#endif
607 if (!acpi && !amd)
608 for (i = 0; i < nr_cpu_ids; i++)
609 numa_set_node(i, 0);
610}
611
612/* 554/*
613 * Sets up nid to range from @start to @end. The return value is -errno if 555 * Sets up nid to range from @start to @end. The return value is -errno if
614 * something went wrong, 0 otherwise. 556 * something went wrong, 0 otherwise.
@@ -853,11 +795,13 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
853 * Sets up the system RAM area from start_pfn to last_pfn according to the 795 * Sets up the system RAM area from start_pfn to last_pfn according to the
854 * numa=fake command-line option. 796 * numa=fake command-line option.
855 */ 797 */
856static bool __init numa_emulation(int acpi, int amd) 798static bool __init numa_emulation(void)
857{ 799{
858 static struct numa_meminfo ei __initdata; 800 static struct numa_meminfo ei __initdata;
859 static struct numa_meminfo pi __initdata; 801 static struct numa_meminfo pi __initdata;
860 const u64 max_addr = max_pfn << PAGE_SHIFT; 802 const u64 max_addr = max_pfn << PAGE_SHIFT;
803 int phys_dist_cnt = numa_distance_cnt;
804 u8 *phys_dist = NULL;
861 int i, j, ret; 805 int i, j, ret;
862 806
863 memset(&ei, 0, sizeof(ei)); 807 memset(&ei, 0, sizeof(ei));
@@ -891,6 +835,25 @@ static bool __init numa_emulation(int acpi, int amd)
891 return false; 835 return false;
892 } 836 }
893 837
838 /*
839 * Copy the original distance table. It's temporary so no need to
840 * reserve it.
841 */
842 if (phys_dist_cnt) {
843 size_t size = phys_dist_cnt * sizeof(numa_distance[0]);
844 u64 phys;
845
846 phys = memblock_find_in_range(0,
847 (u64)max_pfn_mapped << PAGE_SHIFT,
848 size, PAGE_SIZE);
849 if (phys == MEMBLOCK_ERROR) {
850 pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n");
851 return false;
852 }
853 phys_dist = __va(phys);
854 memcpy(phys_dist, numa_distance, size);
855 }
856
894 /* commit */ 857 /* commit */
895 numa_meminfo = ei; 858 numa_meminfo = ei;
896 859
@@ -913,8 +876,23 @@ static bool __init numa_emulation(int acpi, int amd)
913 if (emu_nid_to_phys[i] == NUMA_NO_NODE) 876 if (emu_nid_to_phys[i] == NUMA_NO_NODE)
914 emu_nid_to_phys[i] = 0; 877 emu_nid_to_phys[i] = 0;
915 878
916 fake_physnodes(acpi, amd, &ei); 879 /* transform distance table */
917 numa_emu_dist = true; 880 numa_reset_distance();
881 for (i = 0; i < MAX_NUMNODES; i++) {
882 for (j = 0; j < MAX_NUMNODES; j++) {
883 int physi = emu_nid_to_phys[i];
884 int physj = emu_nid_to_phys[j];
885 int dist;
886
887 if (physi >= phys_dist_cnt || physj >= phys_dist_cnt)
888 dist = physi == physj ?
889 LOCAL_DISTANCE : REMOTE_DISTANCE;
890 else
891 dist = phys_dist[physi * phys_dist_cnt + physj];
892
893 numa_set_distance(i, j, dist);
894 }
895 }
918 return true; 896 return true;
919} 897}
920#endif /* CONFIG_NUMA_EMU */ 898#endif /* CONFIG_NUMA_EMU */
@@ -970,7 +948,7 @@ void __init initmem_init(void)
970 * If requested, try emulation. If emulation is not used, 948 * If requested, try emulation. If emulation is not used,
971 * build identity emu_nid_to_phys[] for numa_add_cpu() 949 * build identity emu_nid_to_phys[] for numa_add_cpu()
972 */ 950 */
973 if (!emu_cmdline || !numa_emulation(i == 0, i == 1)) 951 if (!emu_cmdline || !numa_emulation())
974 for (j = 0; j < ARRAY_SIZE(emu_nid_to_phys); j++) 952 for (j = 0; j < ARRAY_SIZE(emu_nid_to_phys); j++)
975 emu_nid_to_phys[j] = j; 953 emu_nid_to_phys[j] = j;
976#endif 954#endif
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index d4fbfea53543..8e9d3394f6d4 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -26,8 +26,6 @@
26 26
27int acpi_numa __initdata; 27int acpi_numa __initdata;
28 28
29static struct acpi_table_slit *acpi_slit;
30
31static struct bootnode nodes_add[MAX_NUMNODES]; 29static struct bootnode nodes_add[MAX_NUMNODES];
32 30
33static __init int setup_node(int pxm) 31static __init int setup_node(int pxm)
@@ -51,25 +49,11 @@ static __init inline int srat_disabled(void)
51void __init acpi_numa_slit_init(struct acpi_table_slit *slit) 49void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
52{ 50{
53 int i, j; 51 int i, j;
54 unsigned length;
55 unsigned long phys;
56 52
57 for (i = 0; i < slit->locality_count; i++) 53 for (i = 0; i < slit->locality_count; i++)
58 for (j = 0; j < slit->locality_count; j++) 54 for (j = 0; j < slit->locality_count; j++)
59 numa_set_distance(pxm_to_node(i), pxm_to_node(j), 55 numa_set_distance(pxm_to_node(i), pxm_to_node(j),
60 slit->entry[slit->locality_count * i + j]); 56 slit->entry[slit->locality_count * i + j]);
61
62 /* acpi_slit is used only by emulation */
63 length = slit->header.length;
64 phys = memblock_find_in_range(0, max_pfn_mapped<<PAGE_SHIFT, length,
65 PAGE_SIZE);
66
67 if (phys == MEMBLOCK_ERROR)
68 panic(" Can not save slit!\n");
69
70 acpi_slit = __va(phys);
71 memcpy(acpi_slit, slit, length);
72 memblock_x86_reserve_range(phys, phys + length, "ACPI SLIT");
73} 57}
74 58
75/* Callback for Proximity Domain -> x2APIC mapping */ 59/* Callback for Proximity Domain -> x2APIC mapping */
@@ -261,55 +245,6 @@ int __init x86_acpi_numa_init(void)
261 return srat_disabled() ? -EINVAL : 0; 245 return srat_disabled() ? -EINVAL : 0;
262} 246}
263 247
264#ifdef CONFIG_NUMA_EMU
265static int fake_node_to_pxm_map[MAX_NUMNODES] __initdata = {
266 [0 ... MAX_NUMNODES-1] = PXM_INVAL
267};
268
269/*
270 * In NUMA emulation, we need to setup proximity domain (_PXM) to node ID
271 * mappings that respect the real ACPI topology but reflect our emulated
272 * environment. For each emulated node, we find which real node it appears on
273 * and create PXM to NID mappings for those fake nodes which mirror that
274 * locality. SLIT will now represent the correct distances between emulated
275 * nodes as a result of the real topology.
276 */
277void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
278{
279 int i;
280
281 for (i = 0; i < num_nodes; i++) {
282 int nid, pxm;
283
284 nid = find_node_by_addr(fake_nodes[i].start);
285 if (nid == NUMA_NO_NODE)
286 continue;
287 pxm = node_to_pxm(nid);
288 if (pxm == PXM_INVAL)
289 continue;
290 fake_node_to_pxm_map[i] = pxm;
291 }
292
293 for (i = 0; i < num_nodes; i++)
294 __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
295
296 for (i = 0; i < num_nodes; i++)
297 if (fake_nodes[i].start != fake_nodes[i].end)
298 node_set(i, numa_nodes_parsed);
299}
300
301int acpi_emu_node_distance(int a, int b)
302{
303 int index;
304
305 if (!acpi_slit)
306 return node_to_pxm(a) == node_to_pxm(b) ?
307 LOCAL_DISTANCE : REMOTE_DISTANCE;
308 index = acpi_slit->locality_count * node_to_pxm(a);
309 return acpi_slit->entry[index + node_to_pxm(b)];
310}
311#endif /* CONFIG_NUMA_EMU */
312
313#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY) 248#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY)
314int memory_add_physaddr_to_nid(u64 start) 249int memory_add_physaddr_to_nid(u64 start)
315{ 250{