aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm/numa_64.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2011-02-16 11:11:10 -0500
committerTejun Heo <tj@kernel.org>2011-02-16 11:11:10 -0500
commit9d073caeb372940af02a768d2b7e845ac732bda0 (patch)
tree7d46006e3f0fc1fb0c13924735d9afea6ac9c3eb /arch/x86/mm/numa_64.c
parentd9c515eacb3bde73f7a5ecb7e35ea6e660ad421d (diff)
x86-64, NUMA: Build and use direct emulated nid -> phys nid mapping
NUMA emulation copied physical NUMA configuration into physnodes[] and used it to reverse-map emulated nodes to physical nodes, which is unnecessarily convoluted. Build emu_nid_to_phys[] array to map emulated nids directly to the matching physical nids and use it in numa_add_cpu(). physnodes[] will be removed with further patches. - v2: Build failure when CONFIG_DEBUG_PER_CPU_MAPS due to missing local variable definition fixed. Reported by Ingo. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Yinghai Lu <yinghai@kernel.org> Cc: Brian Gerst <brgerst@gmail.com> Cc: Cyrill Gorcunov <gorcunov@gmail.com> Cc: Shaohui Zheng <shaohui.zheng@intel.com> Cc: David Rientjes <rientjes@google.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86/mm/numa_64.c')
-rw-r--r--arch/x86/mm/numa_64.c64
1 files changed, 35 insertions, 29 deletions
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 20e2cfe5ab82..e9919c4d1573 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -542,7 +542,9 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
542#ifdef CONFIG_NUMA_EMU 542#ifdef CONFIG_NUMA_EMU
543/* Numa emulation */ 543/* Numa emulation */
544static struct bootnode nodes[MAX_NUMNODES] __initdata; 544static struct bootnode nodes[MAX_NUMNODES] __initdata;
545static struct bootnode physnodes[MAX_NUMNODES] __cpuinitdata; 545static struct bootnode physnodes[MAX_NUMNODES] __initdata;
546
547static int emu_nid_to_phys[MAX_NUMNODES] __cpuinitdata;
546static char *emu_cmdline __initdata; 548static char *emu_cmdline __initdata;
547 549
548void __init numa_emu_cmdline(char *str) 550void __init numa_emu_cmdline(char *str)
@@ -649,7 +651,8 @@ static void __init fake_physnodes(int acpi, int amd, int nr_nodes)
649 * allocation past addr and -1 otherwise. addr is adjusted to be at 651 * allocation past addr and -1 otherwise. addr is adjusted to be at
650 * the end of the node. 652 * the end of the node.
651 */ 653 */
652static int __init setup_node_range(int nid, u64 *addr, u64 size, u64 max_addr) 654static int __init setup_node_range(int nid, int physnid,
655 u64 *addr, u64 size, u64 max_addr)
653{ 656{
654 int ret = 0; 657 int ret = 0;
655 nodes[nid].start = *addr; 658 nodes[nid].start = *addr;
@@ -660,6 +663,10 @@ static int __init setup_node_range(int nid, u64 *addr, u64 size, u64 max_addr)
660 } 663 }
661 nodes[nid].end = *addr; 664 nodes[nid].end = *addr;
662 node_set(nid, node_possible_map); 665 node_set(nid, node_possible_map);
666
667 if (emu_nid_to_phys[nid] == NUMA_NO_NODE)
668 emu_nid_to_phys[nid] = physnid;
669
663 printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid, 670 printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid,
664 nodes[nid].start, nodes[nid].end, 671 nodes[nid].start, nodes[nid].end,
665 (nodes[nid].end - nodes[nid].start) >> 20); 672 (nodes[nid].end - nodes[nid].start) >> 20);
@@ -756,7 +763,7 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr, int nr_nodes)
756 if (nodes_weight(physnode_mask) + ret >= nr_nodes) 763 if (nodes_weight(physnode_mask) + ret >= nr_nodes)
757 end = physnodes[i].end; 764 end = physnodes[i].end;
758 765
759 if (setup_node_range(ret++, &physnodes[i].start, 766 if (setup_node_range(ret++, i, &physnodes[i].start,
760 end - physnodes[i].start, 767 end - physnodes[i].start,
761 physnodes[i].end) < 0) 768 physnodes[i].end) < 0)
762 node_clear(i, physnode_mask); 769 node_clear(i, physnode_mask);
@@ -852,7 +859,7 @@ static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size)
852 * later. If setup_node_range() returns non-zero, there 859 * later. If setup_node_range() returns non-zero, there
853 * is no more memory available on this physical node. 860 * is no more memory available on this physical node.
854 */ 861 */
855 if (setup_node_range(ret++, &physnodes[i].start, 862 if (setup_node_range(ret++, i, &physnodes[i].start,
856 end - physnodes[i].start, 863 end - physnodes[i].start,
857 physnodes[i].end) < 0) 864 physnodes[i].end) < 0)
858 node_clear(i, physnode_mask); 865 node_clear(i, physnode_mask);
@@ -872,6 +879,9 @@ static int __init numa_emulation(int acpi, int amd)
872 int num_nodes; 879 int num_nodes;
873 int i; 880 int i;
874 881
882 for (i = 0; i < MAX_NUMNODES; i++)
883 emu_nid_to_phys[i] = NUMA_NO_NODE;
884
875 /* 885 /*
876 * If the numa=fake command-line contains a 'M' or 'G', it represents 886 * If the numa=fake command-line contains a 'M' or 'G', it represents
877 * the fixed node size. Otherwise, if it is just a single number N, 887 * the fixed node size. Otherwise, if it is just a single number N,
@@ -892,6 +902,11 @@ static int __init numa_emulation(int acpi, int amd)
892 if (num_nodes < 0) 902 if (num_nodes < 0)
893 return num_nodes; 903 return num_nodes;
894 904
905 /* make sure all emulated nodes are mapped to a physical node */
906 for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++)
907 if (emu_nid_to_phys[i] == NUMA_NO_NODE)
908 emu_nid_to_phys[i] = 0;
909
895 ei.nr_blks = num_nodes; 910 ei.nr_blks = num_nodes;
896 for (i = 0; i < ei.nr_blks; i++) { 911 for (i = 0; i < ei.nr_blks; i++) {
897 ei.blk[i].start = nodes[i].start; 912 ei.blk[i].start = nodes[i].start;
@@ -918,7 +933,6 @@ static int __init numa_emulation(int acpi, int amd)
918 init_memory_mapping_high(); 933 init_memory_mapping_high();
919 for_each_node_mask(i, node_possible_map) 934 for_each_node_mask(i, node_possible_map)
920 setup_node_bootmem(i, nodes[i].start, nodes[i].end); 935 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
921 setup_physnodes(0, max_addr);
922 fake_physnodes(acpi, amd, num_nodes); 936 fake_physnodes(acpi, amd, num_nodes);
923 numa_init_array(); 937 numa_init_array();
924 numa_emu_dist = true; 938 numa_emu_dist = true;
@@ -976,7 +990,11 @@ void __init initmem_init(void)
976 setup_physnodes(0, max_pfn << PAGE_SHIFT); 990 setup_physnodes(0, max_pfn << PAGE_SHIFT);
977 if (emu_cmdline && !numa_emulation(i == 0, i == 1)) 991 if (emu_cmdline && !numa_emulation(i == 0, i == 1))
978 return; 992 return;
979 setup_physnodes(0, max_pfn << PAGE_SHIFT); 993
994 /* not emulating, build identity mapping for numa_add_cpu() */
995 for (j = 0; j < ARRAY_SIZE(emu_nid_to_phys); j++)
996 emu_nid_to_phys[j] = j;
997
980 nodes_clear(node_possible_map); 998 nodes_clear(node_possible_map);
981 nodes_clear(node_online_map); 999 nodes_clear(node_online_map);
982#endif 1000#endif
@@ -1033,7 +1051,6 @@ int __cpuinit numa_cpu_node(int cpu)
1033# ifndef CONFIG_DEBUG_PER_CPU_MAPS 1051# ifndef CONFIG_DEBUG_PER_CPU_MAPS
1034void __cpuinit numa_add_cpu(int cpu) 1052void __cpuinit numa_add_cpu(int cpu)
1035{ 1053{
1036 unsigned long addr;
1037 int physnid, nid; 1054 int physnid, nid;
1038 1055
1039 nid = numa_cpu_node(cpu); 1056 nid = numa_cpu_node(cpu);
@@ -1041,26 +1058,15 @@ void __cpuinit numa_add_cpu(int cpu)
1041 nid = early_cpu_to_node(cpu); 1058 nid = early_cpu_to_node(cpu);
1042 BUG_ON(nid == NUMA_NO_NODE || !node_online(nid)); 1059 BUG_ON(nid == NUMA_NO_NODE || !node_online(nid));
1043 1060
1044 /* 1061 physnid = emu_nid_to_phys[nid];
1045 * Use the starting address of the emulated node to find which physical
1046 * node it is allocated on.
1047 */
1048 addr = node_start_pfn(nid) << PAGE_SHIFT;
1049 for (physnid = 0; physnid < MAX_NUMNODES; physnid++)
1050 if (addr >= physnodes[physnid].start &&
1051 addr < physnodes[physnid].end)
1052 break;
1053 1062
1054 /* 1063 /*
1055 * Map the cpu to each emulated node that is allocated on the physical 1064 * Map the cpu to each emulated node that is allocated on the physical
1056 * node of the cpu's apic id. 1065 * node of the cpu's apic id.
1057 */ 1066 */
1058 for_each_online_node(nid) { 1067 for_each_online_node(nid)
1059 addr = node_start_pfn(nid) << PAGE_SHIFT; 1068 if (emu_nid_to_phys[nid] == physnid)
1060 if (addr >= physnodes[physnid].start &&
1061 addr < physnodes[physnid].end)
1062 cpumask_set_cpu(cpu, node_to_cpumask_map[nid]); 1069 cpumask_set_cpu(cpu, node_to_cpumask_map[nid]);
1063 }
1064} 1070}
1065 1071
1066void __cpuinit numa_remove_cpu(int cpu) 1072void __cpuinit numa_remove_cpu(int cpu)
@@ -1073,21 +1079,21 @@ void __cpuinit numa_remove_cpu(int cpu)
1073# else /* !CONFIG_DEBUG_PER_CPU_MAPS */ 1079# else /* !CONFIG_DEBUG_PER_CPU_MAPS */
1074static void __cpuinit numa_set_cpumask(int cpu, int enable) 1080static void __cpuinit numa_set_cpumask(int cpu, int enable)
1075{ 1081{
1076 int node = early_cpu_to_node(cpu);
1077 struct cpumask *mask; 1082 struct cpumask *mask;
1078 int i; 1083 int nid, physnid, i;
1079 1084
1080 if (node == NUMA_NO_NODE) { 1085 nid = early_cpu_to_node(cpu);
1086 if (nid == NUMA_NO_NODE) {
1081 /* early_cpu_to_node() already emits a warning and trace */ 1087 /* early_cpu_to_node() already emits a warning and trace */
1082 return; 1088 return;
1083 } 1089 }
1084 for_each_online_node(i) {
1085 unsigned long addr;
1086 1090
1087 addr = node_start_pfn(i) << PAGE_SHIFT; 1091 physnid = emu_nid_to_phys[nid];
1088 if (addr < physnodes[node].start || 1092
1089 addr >= physnodes[node].end) 1093 for_each_online_node(i) {
1094 if (emu_nid_to_phys[nid] != physnid)
1090 continue; 1095 continue;
1096
1091 mask = debug_cpumask_set_cpu(cpu, enable); 1097 mask = debug_cpumask_set_cpu(cpu, enable);
1092 if (!mask) 1098 if (!mask)
1093 return; 1099 return;