diff options
author | Tejun Heo <tj@kernel.org> | 2011-02-16 11:11:10 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2011-02-16 11:11:10 -0500 |
commit | 9d073caeb372940af02a768d2b7e845ac732bda0 (patch) | |
tree | 7d46006e3f0fc1fb0c13924735d9afea6ac9c3eb /arch/x86/mm/numa_64.c | |
parent | d9c515eacb3bde73f7a5ecb7e35ea6e660ad421d (diff) |
x86-64, NUMA: Build and use direct emulated nid -> phys nid mapping
NUMA emulation copied physical NUMA configuration into physnodes[] and
used it to reverse-map emulated nodes to physical nodes, which is
unnecessarily convoluted. Build emu_nid_to_phys[] array to map
emulated nids directly to the matching physical nids and use it in
numa_add_cpu().
physnodes[] will be removed with further patches.
- v2: Build failure when CONFIG_DEBUG_PER_CPU_MAPS due to missing
local variable definition fixed. Reported by Ingo.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Shaohui Zheng <shaohui.zheng@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86/mm/numa_64.c')
-rw-r--r-- | arch/x86/mm/numa_64.c | 64 |
1 files changed, 35 insertions, 29 deletions
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 20e2cfe5ab82..e9919c4d1573 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -542,7 +542,9 @@ static int __init numa_register_memblks(struct numa_meminfo *mi) | |||
542 | #ifdef CONFIG_NUMA_EMU | 542 | #ifdef CONFIG_NUMA_EMU |
543 | /* Numa emulation */ | 543 | /* Numa emulation */ |
544 | static struct bootnode nodes[MAX_NUMNODES] __initdata; | 544 | static struct bootnode nodes[MAX_NUMNODES] __initdata; |
545 | static struct bootnode physnodes[MAX_NUMNODES] __cpuinitdata; | 545 | static struct bootnode physnodes[MAX_NUMNODES] __initdata; |
546 | |||
547 | static int emu_nid_to_phys[MAX_NUMNODES] __cpuinitdata; | ||
546 | static char *emu_cmdline __initdata; | 548 | static char *emu_cmdline __initdata; |
547 | 549 | ||
548 | void __init numa_emu_cmdline(char *str) | 550 | void __init numa_emu_cmdline(char *str) |
@@ -649,7 +651,8 @@ static void __init fake_physnodes(int acpi, int amd, int nr_nodes) | |||
649 | * allocation past addr and -1 otherwise. addr is adjusted to be at | 651 | * allocation past addr and -1 otherwise. addr is adjusted to be at |
650 | * the end of the node. | 652 | * the end of the node. |
651 | */ | 653 | */ |
652 | static int __init setup_node_range(int nid, u64 *addr, u64 size, u64 max_addr) | 654 | static int __init setup_node_range(int nid, int physnid, |
655 | u64 *addr, u64 size, u64 max_addr) | ||
653 | { | 656 | { |
654 | int ret = 0; | 657 | int ret = 0; |
655 | nodes[nid].start = *addr; | 658 | nodes[nid].start = *addr; |
@@ -660,6 +663,10 @@ static int __init setup_node_range(int nid, u64 *addr, u64 size, u64 max_addr) | |||
660 | } | 663 | } |
661 | nodes[nid].end = *addr; | 664 | nodes[nid].end = *addr; |
662 | node_set(nid, node_possible_map); | 665 | node_set(nid, node_possible_map); |
666 | |||
667 | if (emu_nid_to_phys[nid] == NUMA_NO_NODE) | ||
668 | emu_nid_to_phys[nid] = physnid; | ||
669 | |||
663 | printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid, | 670 | printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid, |
664 | nodes[nid].start, nodes[nid].end, | 671 | nodes[nid].start, nodes[nid].end, |
665 | (nodes[nid].end - nodes[nid].start) >> 20); | 672 | (nodes[nid].end - nodes[nid].start) >> 20); |
@@ -756,7 +763,7 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr, int nr_nodes) | |||
756 | if (nodes_weight(physnode_mask) + ret >= nr_nodes) | 763 | if (nodes_weight(physnode_mask) + ret >= nr_nodes) |
757 | end = physnodes[i].end; | 764 | end = physnodes[i].end; |
758 | 765 | ||
759 | if (setup_node_range(ret++, &physnodes[i].start, | 766 | if (setup_node_range(ret++, i, &physnodes[i].start, |
760 | end - physnodes[i].start, | 767 | end - physnodes[i].start, |
761 | physnodes[i].end) < 0) | 768 | physnodes[i].end) < 0) |
762 | node_clear(i, physnode_mask); | 769 | node_clear(i, physnode_mask); |
@@ -852,7 +859,7 @@ static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size) | |||
852 | * later. If setup_node_range() returns non-zero, there | 859 | * later. If setup_node_range() returns non-zero, there |
853 | * is no more memory available on this physical node. | 860 | * is no more memory available on this physical node. |
854 | */ | 861 | */ |
855 | if (setup_node_range(ret++, &physnodes[i].start, | 862 | if (setup_node_range(ret++, i, &physnodes[i].start, |
856 | end - physnodes[i].start, | 863 | end - physnodes[i].start, |
857 | physnodes[i].end) < 0) | 864 | physnodes[i].end) < 0) |
858 | node_clear(i, physnode_mask); | 865 | node_clear(i, physnode_mask); |
@@ -872,6 +879,9 @@ static int __init numa_emulation(int acpi, int amd) | |||
872 | int num_nodes; | 879 | int num_nodes; |
873 | int i; | 880 | int i; |
874 | 881 | ||
882 | for (i = 0; i < MAX_NUMNODES; i++) | ||
883 | emu_nid_to_phys[i] = NUMA_NO_NODE; | ||
884 | |||
875 | /* | 885 | /* |
876 | * If the numa=fake command-line contains a 'M' or 'G', it represents | 886 | * If the numa=fake command-line contains a 'M' or 'G', it represents |
877 | * the fixed node size. Otherwise, if it is just a single number N, | 887 | * the fixed node size. Otherwise, if it is just a single number N, |
@@ -892,6 +902,11 @@ static int __init numa_emulation(int acpi, int amd) | |||
892 | if (num_nodes < 0) | 902 | if (num_nodes < 0) |
893 | return num_nodes; | 903 | return num_nodes; |
894 | 904 | ||
905 | /* make sure all emulated nodes are mapped to a physical node */ | ||
906 | for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++) | ||
907 | if (emu_nid_to_phys[i] == NUMA_NO_NODE) | ||
908 | emu_nid_to_phys[i] = 0; | ||
909 | |||
895 | ei.nr_blks = num_nodes; | 910 | ei.nr_blks = num_nodes; |
896 | for (i = 0; i < ei.nr_blks; i++) { | 911 | for (i = 0; i < ei.nr_blks; i++) { |
897 | ei.blk[i].start = nodes[i].start; | 912 | ei.blk[i].start = nodes[i].start; |
@@ -918,7 +933,6 @@ static int __init numa_emulation(int acpi, int amd) | |||
918 | init_memory_mapping_high(); | 933 | init_memory_mapping_high(); |
919 | for_each_node_mask(i, node_possible_map) | 934 | for_each_node_mask(i, node_possible_map) |
920 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); | 935 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); |
921 | setup_physnodes(0, max_addr); | ||
922 | fake_physnodes(acpi, amd, num_nodes); | 936 | fake_physnodes(acpi, amd, num_nodes); |
923 | numa_init_array(); | 937 | numa_init_array(); |
924 | numa_emu_dist = true; | 938 | numa_emu_dist = true; |
@@ -976,7 +990,11 @@ void __init initmem_init(void) | |||
976 | setup_physnodes(0, max_pfn << PAGE_SHIFT); | 990 | setup_physnodes(0, max_pfn << PAGE_SHIFT); |
977 | if (emu_cmdline && !numa_emulation(i == 0, i == 1)) | 991 | if (emu_cmdline && !numa_emulation(i == 0, i == 1)) |
978 | return; | 992 | return; |
979 | setup_physnodes(0, max_pfn << PAGE_SHIFT); | 993 | |
994 | /* not emulating, build identity mapping for numa_add_cpu() */ | ||
995 | for (j = 0; j < ARRAY_SIZE(emu_nid_to_phys); j++) | ||
996 | emu_nid_to_phys[j] = j; | ||
997 | |||
980 | nodes_clear(node_possible_map); | 998 | nodes_clear(node_possible_map); |
981 | nodes_clear(node_online_map); | 999 | nodes_clear(node_online_map); |
982 | #endif | 1000 | #endif |
@@ -1033,7 +1051,6 @@ int __cpuinit numa_cpu_node(int cpu) | |||
1033 | # ifndef CONFIG_DEBUG_PER_CPU_MAPS | 1051 | # ifndef CONFIG_DEBUG_PER_CPU_MAPS |
1034 | void __cpuinit numa_add_cpu(int cpu) | 1052 | void __cpuinit numa_add_cpu(int cpu) |
1035 | { | 1053 | { |
1036 | unsigned long addr; | ||
1037 | int physnid, nid; | 1054 | int physnid, nid; |
1038 | 1055 | ||
1039 | nid = numa_cpu_node(cpu); | 1056 | nid = numa_cpu_node(cpu); |
@@ -1041,26 +1058,15 @@ void __cpuinit numa_add_cpu(int cpu) | |||
1041 | nid = early_cpu_to_node(cpu); | 1058 | nid = early_cpu_to_node(cpu); |
1042 | BUG_ON(nid == NUMA_NO_NODE || !node_online(nid)); | 1059 | BUG_ON(nid == NUMA_NO_NODE || !node_online(nid)); |
1043 | 1060 | ||
1044 | /* | 1061 | physnid = emu_nid_to_phys[nid]; |
1045 | * Use the starting address of the emulated node to find which physical | ||
1046 | * node it is allocated on. | ||
1047 | */ | ||
1048 | addr = node_start_pfn(nid) << PAGE_SHIFT; | ||
1049 | for (physnid = 0; physnid < MAX_NUMNODES; physnid++) | ||
1050 | if (addr >= physnodes[physnid].start && | ||
1051 | addr < physnodes[physnid].end) | ||
1052 | break; | ||
1053 | 1062 | ||
1054 | /* | 1063 | /* |
1055 | * Map the cpu to each emulated node that is allocated on the physical | 1064 | * Map the cpu to each emulated node that is allocated on the physical |
1056 | * node of the cpu's apic id. | 1065 | * node of the cpu's apic id. |
1057 | */ | 1066 | */ |
1058 | for_each_online_node(nid) { | 1067 | for_each_online_node(nid) |
1059 | addr = node_start_pfn(nid) << PAGE_SHIFT; | 1068 | if (emu_nid_to_phys[nid] == physnid) |
1060 | if (addr >= physnodes[physnid].start && | ||
1061 | addr < physnodes[physnid].end) | ||
1062 | cpumask_set_cpu(cpu, node_to_cpumask_map[nid]); | 1069 | cpumask_set_cpu(cpu, node_to_cpumask_map[nid]); |
1063 | } | ||
1064 | } | 1070 | } |
1065 | 1071 | ||
1066 | void __cpuinit numa_remove_cpu(int cpu) | 1072 | void __cpuinit numa_remove_cpu(int cpu) |
@@ -1073,21 +1079,21 @@ void __cpuinit numa_remove_cpu(int cpu) | |||
1073 | # else /* !CONFIG_DEBUG_PER_CPU_MAPS */ | 1079 | # else /* !CONFIG_DEBUG_PER_CPU_MAPS */ |
1074 | static void __cpuinit numa_set_cpumask(int cpu, int enable) | 1080 | static void __cpuinit numa_set_cpumask(int cpu, int enable) |
1075 | { | 1081 | { |
1076 | int node = early_cpu_to_node(cpu); | ||
1077 | struct cpumask *mask; | 1082 | struct cpumask *mask; |
1078 | int i; | 1083 | int nid, physnid, i; |
1079 | 1084 | ||
1080 | if (node == NUMA_NO_NODE) { | 1085 | nid = early_cpu_to_node(cpu); |
1086 | if (nid == NUMA_NO_NODE) { | ||
1081 | /* early_cpu_to_node() already emits a warning and trace */ | 1087 | /* early_cpu_to_node() already emits a warning and trace */ |
1082 | return; | 1088 | return; |
1083 | } | 1089 | } |
1084 | for_each_online_node(i) { | ||
1085 | unsigned long addr; | ||
1086 | 1090 | ||
1087 | addr = node_start_pfn(i) << PAGE_SHIFT; | 1091 | physnid = emu_nid_to_phys[nid]; |
1088 | if (addr < physnodes[node].start || | 1092 | |
1089 | addr >= physnodes[node].end) | 1093 | for_each_online_node(i) { |
1094 | if (emu_nid_to_phys[nid] != physnid) | ||
1090 | continue; | 1095 | continue; |
1096 | |||
1091 | mask = debug_cpumask_set_cpu(cpu, enable); | 1097 | mask = debug_cpumask_set_cpu(cpu, enable); |
1092 | if (!mask) | 1098 | if (!mask) |
1093 | return; | 1099 | return; |