aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2011-02-16 11:11:10 -0500
committerTejun Heo <tj@kernel.org>2011-02-16 11:11:10 -0500
commite23bba604433a202cd301a976454a90ea6b783ef (patch)
tree012bc42b25abd68d77e9c6946a6b0d38b9b71bdc /arch/x86/mm
parent6b78cb549b4105cbf7c6f7461f27a21f00c44997 (diff)
x86-64, NUMA: Unify emulated distance mapping
NUMA emulation needs to update node distance information. It did it by remapping apicid to PXM mapping, even when amdtopology is being used. There is no reason to go through such convolution. The generic code has all the information necessary to transform the distance table to the emulated nid space. Implement generic distance table transformation in numa_emulation() and drop private implementations in srat_64 and amdtopology_64. This makes find_node_by_addr() and fake_physnodes() and related functions unnecessary, drop them. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Yinghai Lu <yinghai@kernel.org> Cc: Brian Gerst <brgerst@gmail.com> Cc: Cyrill Gorcunov <gorcunov@gmail.com> Cc: Shaohui Zheng <shaohui.zheng@intel.com> Cc: David Rientjes <rientjes@google.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/amdtopology_64.c38
-rw-r--r--arch/x86/mm/numa_64.c102
-rw-r--r--arch/x86/mm/srat_64.c65
3 files changed, 40 insertions, 165 deletions
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c
index f37ea2fe85e6..0919c26820d4 100644
--- a/arch/x86/mm/amdtopology_64.c
+++ b/arch/x86/mm/amdtopology_64.c
@@ -194,41 +194,3 @@ int __init amd_numa_init(void)
194 194
195 return 0; 195 return 0;
196} 196}
197
198#ifdef CONFIG_NUMA_EMU
199/*
200 * For NUMA emulation, fake proximity domain (_PXM) to node id mappings must be
201 * setup to represent the physical topology but reflect the emulated
202 * environment. For each emulated node, the real node which it appears on is
203 * found and a fake pxm to nid mapping is created which mirrors the actual
204 * locality. node_distance() then represents the correct distances between
205 * emulated nodes by using the fake acpi mappings to pxms.
206 */
207void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes)
208{
209 unsigned int bits;
210 unsigned int cores;
211 unsigned int apicid_base = 0;
212 int i;
213
214 bits = boot_cpu_data.x86_coreid_bits;
215 cores = 1 << bits;
216 early_get_boot_cpu_id();
217 if (boot_cpu_physical_apicid > 0)
218 apicid_base = boot_cpu_physical_apicid;
219
220 for (i = 0; i < nr_nodes; i++) {
221 int index;
222 int nid;
223
224 nid = find_node_by_addr(nodes[i].start);
225 if (nid == NUMA_NO_NODE)
226 continue;
227
228 index = nodeids[nid] << bits;
229#ifdef CONFIG_ACPI_NUMA
230 __acpi_map_pxm_to_node(nid, i);
231#endif
232 }
233}
234#endif /* CONFIG_NUMA_EMU */
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 722039e0948f..8ce617735900 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -48,10 +48,6 @@ static struct numa_meminfo numa_meminfo __initdata;
48static int numa_distance_cnt; 48static int numa_distance_cnt;
49static u8 *numa_distance; 49static u8 *numa_distance;
50 50
51#ifdef CONFIG_NUMA_EMU
52static bool numa_emu_dist;
53#endif
54
55/* 51/*
56 * Given a shift value, try to populate memnodemap[] 52 * Given a shift value, try to populate memnodemap[]
57 * Returns : 53 * Returns :
@@ -443,10 +439,6 @@ void __init numa_set_distance(int from, int to, int distance)
443 439
444int __node_distance(int from, int to) 440int __node_distance(int from, int to)
445{ 441{
446#if defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA_EMU)
447 if (numa_emu_dist)
448 return acpi_emu_node_distance(from, to);
449#endif
450 if (from >= numa_distance_cnt || to >= numa_distance_cnt) 442 if (from >= numa_distance_cnt || to >= numa_distance_cnt)
451 return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE; 443 return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;
452 return numa_distance[from * numa_distance_cnt + to]; 444 return numa_distance[from * numa_distance_cnt + to];
@@ -559,56 +551,6 @@ static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi)
559 return -ENOENT; 551 return -ENOENT;
560} 552}
561 553
562int __init find_node_by_addr(unsigned long addr)
563{
564 const struct numa_meminfo *mi = &numa_meminfo;
565 int i;
566
567 for (i = 0; i < mi->nr_blks; i++) {
568 /*
569 * Find the real node that this emulated node appears on. For
570 * the sake of simplicity, we only use a real node's starting
571 * address to determine which emulated node it appears on.
572 */
573 if (addr >= mi->blk[i].start && addr < mi->blk[i].end)
574 return mi->blk[i].nid;
575 }
576 return NUMA_NO_NODE;
577}
578
579static void __init fake_physnodes(int acpi, int amd,
580 const struct numa_meminfo *ei)
581{
582 static struct bootnode nodes[MAX_NUMNODES] __initdata;
583 int i, nr_nodes = 0;
584
585 for (i = 0; i < ei->nr_blks; i++) {
586 int nid = ei->blk[i].nid;
587
588 if (nodes[nid].start == nodes[nid].end) {
589 nodes[nid].start = ei->blk[i].start;
590 nodes[nid].end = ei->blk[i].end;
591 nr_nodes++;
592 } else {
593 nodes[nid].start = min(ei->blk[i].start, nodes[nid].start);
594 nodes[nid].end = max(ei->blk[i].end, nodes[nid].end);
595 }
596 }
597
598 BUG_ON(acpi && amd);
599#ifdef CONFIG_ACPI_NUMA
600 if (acpi)
601 acpi_fake_nodes(nodes, nr_nodes);
602#endif
603#ifdef CONFIG_AMD_NUMA
604 if (amd)
605 amd_fake_nodes(nodes, nr_nodes);
606#endif
607 if (!acpi && !amd)
608 for (i = 0; i < nr_cpu_ids; i++)
609 numa_set_node(i, 0);
610}
611
612/* 554/*
613 * Sets up nid to range from @start to @end. The return value is -errno if 555 * Sets up nid to range from @start to @end. The return value is -errno if
614 * something went wrong, 0 otherwise. 556 * something went wrong, 0 otherwise.
@@ -853,11 +795,13 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
853 * Sets up the system RAM area from start_pfn to last_pfn according to the 795 * Sets up the system RAM area from start_pfn to last_pfn according to the
854 * numa=fake command-line option. 796 * numa=fake command-line option.
855 */ 797 */
856static bool __init numa_emulation(int acpi, int amd) 798static bool __init numa_emulation(void)
857{ 799{
858 static struct numa_meminfo ei __initdata; 800 static struct numa_meminfo ei __initdata;
859 static struct numa_meminfo pi __initdata; 801 static struct numa_meminfo pi __initdata;
860 const u64 max_addr = max_pfn << PAGE_SHIFT; 802 const u64 max_addr = max_pfn << PAGE_SHIFT;
803 int phys_dist_cnt = numa_distance_cnt;
804 u8 *phys_dist = NULL;
861 int i, j, ret; 805 int i, j, ret;
862 806
863 memset(&ei, 0, sizeof(ei)); 807 memset(&ei, 0, sizeof(ei));
@@ -891,6 +835,25 @@ static bool __init numa_emulation(int acpi, int amd)
891 return false; 835 return false;
892 } 836 }
893 837
838 /*
839 * Copy the original distance table. It's temporary so no need to
840 * reserve it.
841 */
842 if (phys_dist_cnt) {
843 size_t size = phys_dist_cnt * sizeof(numa_distance[0]);
844 u64 phys;
845
846 phys = memblock_find_in_range(0,
847 (u64)max_pfn_mapped << PAGE_SHIFT,
848 size, PAGE_SIZE);
849 if (phys == MEMBLOCK_ERROR) {
850 pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n");
851 return false;
852 }
853 phys_dist = __va(phys);
854 memcpy(phys_dist, numa_distance, size);
855 }
856
894 /* commit */ 857 /* commit */
895 numa_meminfo = ei; 858 numa_meminfo = ei;
896 859
@@ -913,8 +876,23 @@ static bool __init numa_emulation(int acpi, int amd)
913 if (emu_nid_to_phys[i] == NUMA_NO_NODE) 876 if (emu_nid_to_phys[i] == NUMA_NO_NODE)
914 emu_nid_to_phys[i] = 0; 877 emu_nid_to_phys[i] = 0;
915 878
916 fake_physnodes(acpi, amd, &ei); 879 /* transform distance table */
917 numa_emu_dist = true; 880 numa_reset_distance();
881 for (i = 0; i < MAX_NUMNODES; i++) {
882 for (j = 0; j < MAX_NUMNODES; j++) {
883 int physi = emu_nid_to_phys[i];
884 int physj = emu_nid_to_phys[j];
885 int dist;
886
887 if (physi >= phys_dist_cnt || physj >= phys_dist_cnt)
888 dist = physi == physj ?
889 LOCAL_DISTANCE : REMOTE_DISTANCE;
890 else
891 dist = phys_dist[physi * phys_dist_cnt + physj];
892
893 numa_set_distance(i, j, dist);
894 }
895 }
918 return true; 896 return true;
919} 897}
920#endif /* CONFIG_NUMA_EMU */ 898#endif /* CONFIG_NUMA_EMU */
@@ -970,7 +948,7 @@ void __init initmem_init(void)
970 * If requested, try emulation. If emulation is not used, 948 * If requested, try emulation. If emulation is not used,
971 * build identity emu_nid_to_phys[] for numa_add_cpu() 949 * build identity emu_nid_to_phys[] for numa_add_cpu()
972 */ 950 */
973 if (!emu_cmdline || !numa_emulation(i == 0, i == 1)) 951 if (!emu_cmdline || !numa_emulation())
974 for (j = 0; j < ARRAY_SIZE(emu_nid_to_phys); j++) 952 for (j = 0; j < ARRAY_SIZE(emu_nid_to_phys); j++)
975 emu_nid_to_phys[j] = j; 953 emu_nid_to_phys[j] = j;
976#endif 954#endif
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index d4fbfea53543..8e9d3394f6d4 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -26,8 +26,6 @@
26 26
27int acpi_numa __initdata; 27int acpi_numa __initdata;
28 28
29static struct acpi_table_slit *acpi_slit;
30
31static struct bootnode nodes_add[MAX_NUMNODES]; 29static struct bootnode nodes_add[MAX_NUMNODES];
32 30
33static __init int setup_node(int pxm) 31static __init int setup_node(int pxm)
@@ -51,25 +49,11 @@ static __init inline int srat_disabled(void)
51void __init acpi_numa_slit_init(struct acpi_table_slit *slit) 49void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
52{ 50{
53 int i, j; 51 int i, j;
54 unsigned length;
55 unsigned long phys;
56 52
57 for (i = 0; i < slit->locality_count; i++) 53 for (i = 0; i < slit->locality_count; i++)
58 for (j = 0; j < slit->locality_count; j++) 54 for (j = 0; j < slit->locality_count; j++)
59 numa_set_distance(pxm_to_node(i), pxm_to_node(j), 55 numa_set_distance(pxm_to_node(i), pxm_to_node(j),
60 slit->entry[slit->locality_count * i + j]); 56 slit->entry[slit->locality_count * i + j]);
61
62 /* acpi_slit is used only by emulation */
63 length = slit->header.length;
64 phys = memblock_find_in_range(0, max_pfn_mapped<<PAGE_SHIFT, length,
65 PAGE_SIZE);
66
67 if (phys == MEMBLOCK_ERROR)
68 panic(" Can not save slit!\n");
69
70 acpi_slit = __va(phys);
71 memcpy(acpi_slit, slit, length);
72 memblock_x86_reserve_range(phys, phys + length, "ACPI SLIT");
73} 57}
74 58
75/* Callback for Proximity Domain -> x2APIC mapping */ 59/* Callback for Proximity Domain -> x2APIC mapping */
@@ -261,55 +245,6 @@ int __init x86_acpi_numa_init(void)
261 return srat_disabled() ? -EINVAL : 0; 245 return srat_disabled() ? -EINVAL : 0;
262} 246}
263 247
264#ifdef CONFIG_NUMA_EMU
265static int fake_node_to_pxm_map[MAX_NUMNODES] __initdata = {
266 [0 ... MAX_NUMNODES-1] = PXM_INVAL
267};
268
269/*
270 * In NUMA emulation, we need to setup proximity domain (_PXM) to node ID
271 * mappings that respect the real ACPI topology but reflect our emulated
272 * environment. For each emulated node, we find which real node it appears on
273 * and create PXM to NID mappings for those fake nodes which mirror that
274 * locality. SLIT will now represent the correct distances between emulated
275 * nodes as a result of the real topology.
276 */
277void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
278{
279 int i;
280
281 for (i = 0; i < num_nodes; i++) {
282 int nid, pxm;
283
284 nid = find_node_by_addr(fake_nodes[i].start);
285 if (nid == NUMA_NO_NODE)
286 continue;
287 pxm = node_to_pxm(nid);
288 if (pxm == PXM_INVAL)
289 continue;
290 fake_node_to_pxm_map[i] = pxm;
291 }
292
293 for (i = 0; i < num_nodes; i++)
294 __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
295
296 for (i = 0; i < num_nodes; i++)
297 if (fake_nodes[i].start != fake_nodes[i].end)
298 node_set(i, numa_nodes_parsed);
299}
300
301int acpi_emu_node_distance(int a, int b)
302{
303 int index;
304
305 if (!acpi_slit)
306 return node_to_pxm(a) == node_to_pxm(b) ?
307 LOCAL_DISTANCE : REMOTE_DISTANCE;
308 index = acpi_slit->locality_count * node_to_pxm(a);
309 return acpi_slit->entry[index + node_to_pxm(b)];
310}
311#endif /* CONFIG_NUMA_EMU */
312
313#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY) 248#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY)
314int memory_add_physaddr_to_nid(u64 start) 249int memory_add_physaddr_to_nid(u64 start)
315{ 250{