aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2011-01-23 08:37:42 -0500
committerIngo Molnar <mingo@elte.hu>2011-01-28 08:54:10 -0500
commit8db78cc4b4048e3add40bca1bc3e55057c319256 (patch)
tree16779c36de4baba8add96402a6331820df735f2c /arch/x86
parentde2d9445f1627830ed2ebd00ee9d851986c940b5 (diff)
x86: Unify NUMA initialization between 32 and 64bit
Now that everything else is unified, NUMA initialization can be unified too. * numa_init_array() and init_cpu_to_node() are moved from numa_64 to numa. * numa_32::initmem_init() is updated to call numa_init_array() and setup_arch() to call init_cpu_to_node() on 32bit too. * x86_cpu_to_node_map is now initialized to NUMA_NO_NODE on 32bit too. This is safe now as numa_init_array() will initialize it early during boot. This makes NUMA mapping fully initialized before setup_per_cpu_areas() on 32bit too and thus makes the first percpu chunk which contains all the static variables and some of dynamic area allocated with NUMA affinity correctly considered. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: yinghai@kernel.org Cc: brgerst@gmail.com Cc: gorcunov@gmail.com Cc: shaohui.zheng@intel.com Cc: rientjes@google.com LKML-Reference: <1295789862-25482-17-git-send-email-tj@kernel.org> Signed-off-by: Ingo Molnar <mingo@elte.hu> Reported-by: Eric Dumazet <eric.dumazet@gmail.com> Reviewed-by: Pekka Enberg <penberg@kernel.org>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/numa.h4
-rw-r--r--arch/x86/include/asm/numa_64.h3
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/mm/numa.c76
-rw-r--r--arch/x86/mm/numa_32.c1
-rw-r--r--arch/x86/mm/numa_64.c75
6 files changed, 77 insertions, 84 deletions
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index d3964b28b12..26fc6e2dd0f 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -34,11 +34,15 @@ static inline void set_apicid_to_node(int apicid, s16 node)
34#ifdef CONFIG_NUMA 34#ifdef CONFIG_NUMA
35extern void __cpuinit numa_set_node(int cpu, int node); 35extern void __cpuinit numa_set_node(int cpu, int node);
36extern void __cpuinit numa_clear_node(int cpu); 36extern void __cpuinit numa_clear_node(int cpu);
37extern void __init numa_init_array(void);
38extern void __init init_cpu_to_node(void);
37extern void __cpuinit numa_add_cpu(int cpu); 39extern void __cpuinit numa_add_cpu(int cpu);
38extern void __cpuinit numa_remove_cpu(int cpu); 40extern void __cpuinit numa_remove_cpu(int cpu);
39#else /* CONFIG_NUMA */ 41#else /* CONFIG_NUMA */
40static inline void numa_set_node(int cpu, int node) { } 42static inline void numa_set_node(int cpu, int node) { }
41static inline void numa_clear_node(int cpu) { } 43static inline void numa_clear_node(int cpu) { }
44static inline void numa_init_array(void) { }
45static inline void init_cpu_to_node(void) { }
42static inline void numa_add_cpu(int cpu) { } 46static inline void numa_add_cpu(int cpu) { }
43static inline void numa_remove_cpu(int cpu) { } 47static inline void numa_remove_cpu(int cpu) { }
44#endif /* CONFIG_NUMA */ 48#endif /* CONFIG_NUMA */
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 123f1856101..2819afa3363 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -13,7 +13,6 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks,
13 13
14#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) 14#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
15 15
16extern void numa_init_array(void);
17extern int numa_off; 16extern int numa_off;
18 17
19extern unsigned long numa_free_all_bootmem(void); 18extern unsigned long numa_free_all_bootmem(void);
@@ -28,7 +27,6 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
28 */ 27 */
29#define NODE_MIN_SIZE (4*1024*1024) 28#define NODE_MIN_SIZE (4*1024*1024)
30 29
31extern void __init init_cpu_to_node(void);
32extern int __cpuinit numa_cpu_node(int cpu); 30extern int __cpuinit numa_cpu_node(int cpu);
33 31
34#ifdef CONFIG_NUMA_EMU 32#ifdef CONFIG_NUMA_EMU
@@ -37,7 +35,6 @@ extern int __cpuinit numa_cpu_node(int cpu);
37void numa_emu_cmdline(char *); 35void numa_emu_cmdline(char *);
38#endif /* CONFIG_NUMA_EMU */ 36#endif /* CONFIG_NUMA_EMU */
39#else 37#else
40static inline void init_cpu_to_node(void) { }
41static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; } 38static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; }
42#endif 39#endif
43 40
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d3cfe26c025..12023412fdf 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1040,9 +1040,7 @@ void __init setup_arch(char **cmdline_p)
1040 1040
1041 prefill_possible_map(); 1041 prefill_possible_map();
1042 1042
1043#ifdef CONFIG_X86_64
1044 init_cpu_to_node(); 1043 init_cpu_to_node();
1045#endif
1046 1044
1047 init_apic_mappings(); 1045 init_apic_mappings();
1048 ioapic_and_gsi_init(); 1046 ioapic_and_gsi_init();
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 75abecb614c..bf60715bd1b 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -38,11 +38,7 @@ EXPORT_SYMBOL(node_to_cpumask_map);
38/* 38/*
39 * Map cpu index to node index 39 * Map cpu index to node index
40 */ 40 */
41#ifdef CONFIG_X86_32
42DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, 0);
43#else
44DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); 41DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
45#endif
46EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); 42EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
47 43
48void __cpuinit numa_set_node(int cpu, int node) 44void __cpuinit numa_set_node(int cpu, int node)
@@ -99,6 +95,78 @@ void __init setup_node_to_cpumask_map(void)
99 pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids); 95 pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
100} 96}
101 97
98/*
99 * There are unfortunately some poorly designed mainboards around that
100 * only connect memory to a single CPU. This breaks the 1:1 cpu->node
101 * mapping. To avoid this fill in the mapping for all possible CPUs,
102 * as the number of CPUs is not known yet. We round robin the existing
103 * nodes.
104 */
105void __init numa_init_array(void)
106{
107 int rr, i;
108
109 rr = first_node(node_online_map);
110 for (i = 0; i < nr_cpu_ids; i++) {
111 if (early_cpu_to_node(i) != NUMA_NO_NODE)
112 continue;
113 numa_set_node(i, rr);
114 rr = next_node(rr, node_online_map);
115 if (rr == MAX_NUMNODES)
116 rr = first_node(node_online_map);
117 }
118}
119
120static __init int find_near_online_node(int node)
121{
122 int n, val;
123 int min_val = INT_MAX;
124 int best_node = -1;
125
126 for_each_online_node(n) {
127 val = node_distance(node, n);
128
129 if (val < min_val) {
130 min_val = val;
131 best_node = n;
132 }
133 }
134
135 return best_node;
136}
137
138/*
139 * Setup early cpu_to_node.
140 *
141 * Populate cpu_to_node[] only if x86_cpu_to_apicid[],
142 * and apicid_to_node[] tables have valid entries for a CPU.
143 * This means we skip cpu_to_node[] initialisation for NUMA
144 * emulation and faking node case (when running a kernel compiled
145 * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
146 * is already initialized in a round robin manner at numa_init_array,
147 * prior to this call, and this initialization is good enough
148 * for the fake NUMA cases.
149 *
150 * Called before the per_cpu areas are setup.
151 */
152void __init init_cpu_to_node(void)
153{
154 int cpu;
155 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
156
157 BUG_ON(cpu_to_apicid == NULL);
158
159 for_each_possible_cpu(cpu) {
160 int node = numa_cpu_node(cpu);
161
162 if (node == NUMA_NO_NODE)
163 continue;
164 if (!node_online(node))
165 node = find_near_online_node(node);
166 numa_set_node(cpu, node);
167 }
168}
169
102#ifndef CONFIG_DEBUG_PER_CPU_MAPS 170#ifndef CONFIG_DEBUG_PER_CPU_MAPS
103 171
104# ifndef CONFIG_NUMA_EMU 172# ifndef CONFIG_NUMA_EMU
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 8d91d227be0..505bb04654b 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -367,6 +367,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
367 */ 367 */
368 368
369 get_memcfg_numa(); 369 get_memcfg_numa();
370 numa_init_array();
370 371
371 kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE); 372 kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE);
372 373
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 14664f58a75..f548fbf75f4 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -224,28 +224,6 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
224 node_set_online(nodeid); 224 node_set_online(nodeid);
225} 225}
226 226
227/*
228 * There are unfortunately some poorly designed mainboards around that
229 * only connect memory to a single CPU. This breaks the 1:1 cpu->node
230 * mapping. To avoid this fill in the mapping for all possible CPUs,
231 * as the number of CPUs is not known yet. We round robin the existing
232 * nodes.
233 */
234void __init numa_init_array(void)
235{
236 int rr, i;
237
238 rr = first_node(node_online_map);
239 for (i = 0; i < nr_cpu_ids; i++) {
240 if (early_cpu_to_node(i) != NUMA_NO_NODE)
241 continue;
242 numa_set_node(i, rr);
243 rr = next_node(rr, node_online_map);
244 if (rr == MAX_NUMNODES)
245 rr = first_node(node_online_map);
246 }
247}
248
249#ifdef CONFIG_NUMA_EMU 227#ifdef CONFIG_NUMA_EMU
250/* Numa emulation */ 228/* Numa emulation */
251static struct bootnode nodes[MAX_NUMNODES] __initdata; 229static struct bootnode nodes[MAX_NUMNODES] __initdata;
@@ -664,59 +642,6 @@ unsigned long __init numa_free_all_bootmem(void)
664 return pages; 642 return pages;
665} 643}
666 644
667#ifdef CONFIG_NUMA
668
669static __init int find_near_online_node(int node)
670{
671 int n, val;
672 int min_val = INT_MAX;
673 int best_node = -1;
674
675 for_each_online_node(n) {
676 val = node_distance(node, n);
677
678 if (val < min_val) {
679 min_val = val;
680 best_node = n;
681 }
682 }
683
684 return best_node;
685}
686
687/*
688 * Setup early cpu_to_node.
689 *
690 * Populate cpu_to_node[] only if x86_cpu_to_apicid[],
691 * and apicid_to_node[] tables have valid entries for a CPU.
692 * This means we skip cpu_to_node[] initialisation for NUMA
693 * emulation and faking node case (when running a kernel compiled
694 * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
695 * is already initialized in a round robin manner at numa_init_array,
696 * prior to this call, and this initialization is good enough
697 * for the fake NUMA cases.
698 *
699 * Called before the per_cpu areas are setup.
700 */
701void __init init_cpu_to_node(void)
702{
703 int cpu;
704 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
705
706 BUG_ON(cpu_to_apicid == NULL);
707
708 for_each_possible_cpu(cpu) {
709 int node = numa_cpu_node(cpu);
710
711 if (node == NUMA_NO_NODE)
712 continue;
713 if (!node_online(node))
714 node = find_near_online_node(node);
715 numa_set_node(cpu, node);
716 }
717}
718#endif
719
720int __cpuinit numa_cpu_node(int cpu) 645int __cpuinit numa_cpu_node(int cpu)
721{ 646{
722 int apicid = early_per_cpu(x86_cpu_to_apicid, cpu); 647 int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);