diff options
author | Tejun Heo <tj@kernel.org> | 2011-01-23 08:37:42 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-01-28 08:54:10 -0500 |
commit | 8db78cc4b4048e3add40bca1bc3e55057c319256 (patch) | |
tree | 16779c36de4baba8add96402a6331820df735f2c /arch/x86/mm | |
parent | de2d9445f1627830ed2ebd00ee9d851986c940b5 (diff) |
x86: Unify NUMA initialization between 32 and 64bit
Now that everything else is unified, NUMA initialization can be
unified too.
* numa_init_array() and init_cpu_to_node() are moved from
numa_64 to numa.
* numa_32::initmem_init() is updated to call numa_init_array()
and setup_arch() to call init_cpu_to_node() on 32bit too.
* x86_cpu_to_node_map is now initialized to NUMA_NO_NODE on
32bit too. This is safe now as numa_init_array() will initialize
it early during boot.
This makes NUMA mapping fully initialized before
setup_per_cpu_areas() on 32bit too and thus makes the first
percpu chunk which contains all the static variables and some of
dynamic area allocated with NUMA affinity correctly considered.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: yinghai@kernel.org
Cc: brgerst@gmail.com
Cc: gorcunov@gmail.com
Cc: shaohui.zheng@intel.com
Cc: rientjes@google.com
LKML-Reference: <1295789862-25482-17-git-send-email-tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/numa.c | 76 | ||||
-rw-r--r-- | arch/x86/mm/numa_32.c | 1 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 75 |
3 files changed, 73 insertions, 79 deletions
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 75abecb614c..bf60715bd1b 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c | |||
@@ -38,11 +38,7 @@ EXPORT_SYMBOL(node_to_cpumask_map); | |||
38 | /* | 38 | /* |
39 | * Map cpu index to node index | 39 | * Map cpu index to node index |
40 | */ | 40 | */ |
41 | #ifdef CONFIG_X86_32 | ||
42 | DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, 0); | ||
43 | #else | ||
44 | DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); | 41 | DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); |
45 | #endif | ||
46 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); | 42 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); |
47 | 43 | ||
48 | void __cpuinit numa_set_node(int cpu, int node) | 44 | void __cpuinit numa_set_node(int cpu, int node) |
@@ -99,6 +95,78 @@ void __init setup_node_to_cpumask_map(void) | |||
99 | pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids); | 95 | pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids); |
100 | } | 96 | } |
101 | 97 | ||
98 | /* | ||
99 | * There are unfortunately some poorly designed mainboards around that | ||
100 | * only connect memory to a single CPU. This breaks the 1:1 cpu->node | ||
101 | * mapping. To avoid this fill in the mapping for all possible CPUs, | ||
102 | * as the number of CPUs is not known yet. We round robin the existing | ||
103 | * nodes. | ||
104 | */ | ||
105 | void __init numa_init_array(void) | ||
106 | { | ||
107 | int rr, i; | ||
108 | |||
109 | rr = first_node(node_online_map); | ||
110 | for (i = 0; i < nr_cpu_ids; i++) { | ||
111 | if (early_cpu_to_node(i) != NUMA_NO_NODE) | ||
112 | continue; | ||
113 | numa_set_node(i, rr); | ||
114 | rr = next_node(rr, node_online_map); | ||
115 | if (rr == MAX_NUMNODES) | ||
116 | rr = first_node(node_online_map); | ||
117 | } | ||
118 | } | ||
119 | |||
120 | static __init int find_near_online_node(int node) | ||
121 | { | ||
122 | int n, val; | ||
123 | int min_val = INT_MAX; | ||
124 | int best_node = -1; | ||
125 | |||
126 | for_each_online_node(n) { | ||
127 | val = node_distance(node, n); | ||
128 | |||
129 | if (val < min_val) { | ||
130 | min_val = val; | ||
131 | best_node = n; | ||
132 | } | ||
133 | } | ||
134 | |||
135 | return best_node; | ||
136 | } | ||
137 | |||
138 | /* | ||
139 | * Setup early cpu_to_node. | ||
140 | * | ||
141 | * Populate cpu_to_node[] only if x86_cpu_to_apicid[], | ||
142 | * and apicid_to_node[] tables have valid entries for a CPU. | ||
143 | * This means we skip cpu_to_node[] initialisation for NUMA | ||
144 | * emulation and faking node case (when running a kernel compiled | ||
145 | * for NUMA on a non NUMA box), which is OK as cpu_to_node[] | ||
146 | * is already initialized in a round robin manner at numa_init_array, | ||
147 | * prior to this call, and this initialization is good enough | ||
148 | * for the fake NUMA cases. | ||
149 | * | ||
150 | * Called before the per_cpu areas are setup. | ||
151 | */ | ||
152 | void __init init_cpu_to_node(void) | ||
153 | { | ||
154 | int cpu; | ||
155 | u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); | ||
156 | |||
157 | BUG_ON(cpu_to_apicid == NULL); | ||
158 | |||
159 | for_each_possible_cpu(cpu) { | ||
160 | int node = numa_cpu_node(cpu); | ||
161 | |||
162 | if (node == NUMA_NO_NODE) | ||
163 | continue; | ||
164 | if (!node_online(node)) | ||
165 | node = find_near_online_node(node); | ||
166 | numa_set_node(cpu, node); | ||
167 | } | ||
168 | } | ||
169 | |||
102 | #ifndef CONFIG_DEBUG_PER_CPU_MAPS | 170 | #ifndef CONFIG_DEBUG_PER_CPU_MAPS |
103 | 171 | ||
104 | # ifndef CONFIG_NUMA_EMU | 172 | # ifndef CONFIG_NUMA_EMU |
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 8d91d227be0..505bb04654b 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c | |||
@@ -367,6 +367,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, | |||
367 | */ | 367 | */ |
368 | 368 | ||
369 | get_memcfg_numa(); | 369 | get_memcfg_numa(); |
370 | numa_init_array(); | ||
370 | 371 | ||
371 | kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE); | 372 | kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE); |
372 | 373 | ||
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 14664f58a75..f548fbf75f4 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -224,28 +224,6 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
224 | node_set_online(nodeid); | 224 | node_set_online(nodeid); |
225 | } | 225 | } |
226 | 226 | ||
227 | /* | ||
228 | * There are unfortunately some poorly designed mainboards around that | ||
229 | * only connect memory to a single CPU. This breaks the 1:1 cpu->node | ||
230 | * mapping. To avoid this fill in the mapping for all possible CPUs, | ||
231 | * as the number of CPUs is not known yet. We round robin the existing | ||
232 | * nodes. | ||
233 | */ | ||
234 | void __init numa_init_array(void) | ||
235 | { | ||
236 | int rr, i; | ||
237 | |||
238 | rr = first_node(node_online_map); | ||
239 | for (i = 0; i < nr_cpu_ids; i++) { | ||
240 | if (early_cpu_to_node(i) != NUMA_NO_NODE) | ||
241 | continue; | ||
242 | numa_set_node(i, rr); | ||
243 | rr = next_node(rr, node_online_map); | ||
244 | if (rr == MAX_NUMNODES) | ||
245 | rr = first_node(node_online_map); | ||
246 | } | ||
247 | } | ||
248 | |||
249 | #ifdef CONFIG_NUMA_EMU | 227 | #ifdef CONFIG_NUMA_EMU |
250 | /* Numa emulation */ | 228 | /* Numa emulation */ |
251 | static struct bootnode nodes[MAX_NUMNODES] __initdata; | 229 | static struct bootnode nodes[MAX_NUMNODES] __initdata; |
@@ -664,59 +642,6 @@ unsigned long __init numa_free_all_bootmem(void) | |||
664 | return pages; | 642 | return pages; |
665 | } | 643 | } |
666 | 644 | ||
667 | #ifdef CONFIG_NUMA | ||
668 | |||
669 | static __init int find_near_online_node(int node) | ||
670 | { | ||
671 | int n, val; | ||
672 | int min_val = INT_MAX; | ||
673 | int best_node = -1; | ||
674 | |||
675 | for_each_online_node(n) { | ||
676 | val = node_distance(node, n); | ||
677 | |||
678 | if (val < min_val) { | ||
679 | min_val = val; | ||
680 | best_node = n; | ||
681 | } | ||
682 | } | ||
683 | |||
684 | return best_node; | ||
685 | } | ||
686 | |||
687 | /* | ||
688 | * Setup early cpu_to_node. | ||
689 | * | ||
690 | * Populate cpu_to_node[] only if x86_cpu_to_apicid[], | ||
691 | * and apicid_to_node[] tables have valid entries for a CPU. | ||
692 | * This means we skip cpu_to_node[] initialisation for NUMA | ||
693 | * emulation and faking node case (when running a kernel compiled | ||
694 | * for NUMA on a non NUMA box), which is OK as cpu_to_node[] | ||
695 | * is already initialized in a round robin manner at numa_init_array, | ||
696 | * prior to this call, and this initialization is good enough | ||
697 | * for the fake NUMA cases. | ||
698 | * | ||
699 | * Called before the per_cpu areas are setup. | ||
700 | */ | ||
701 | void __init init_cpu_to_node(void) | ||
702 | { | ||
703 | int cpu; | ||
704 | u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); | ||
705 | |||
706 | BUG_ON(cpu_to_apicid == NULL); | ||
707 | |||
708 | for_each_possible_cpu(cpu) { | ||
709 | int node = numa_cpu_node(cpu); | ||
710 | |||
711 | if (node == NUMA_NO_NODE) | ||
712 | continue; | ||
713 | if (!node_online(node)) | ||
714 | node = find_near_online_node(node); | ||
715 | numa_set_node(cpu, node); | ||
716 | } | ||
717 | } | ||
718 | #endif | ||
719 | |||
720 | int __cpuinit numa_cpu_node(int cpu) | 645 | int __cpuinit numa_cpu_node(int cpu) |
721 | { | 646 | { |
722 | int apicid = early_per_cpu(x86_cpu_to_apicid, cpu); | 647 | int apicid = early_per_cpu(x86_cpu_to_apicid, cpu); |