aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMike Travis <travis@sgi.com>2008-05-12 15:21:12 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-08 05:31:20 -0400
commit23ca4bba3e20c6c3cb11c1bb0ab4770b724d39ac (patch)
tree39ba5f7705e48717d7a6f2621b8ca7e7015c9802
parent1184dc2ffe2c8fb9afb766d870850f2c3165ef25 (diff)
x86: cleanup early per cpu variables/accesses v4
* Introduce a new PER_CPU macro called "EARLY_PER_CPU". This is used by some per_cpu variables that are initialized and accessed before there are per_cpu areas allocated. ["Early" in respect to per_cpu variables is "earlier than the per_cpu areas have been setup".] This patchset adds these new macros: DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) EXPORT_EARLY_PER_CPU_SYMBOL(_name) DECLARE_EARLY_PER_CPU(_type, _name) early_per_cpu_ptr(_name) early_per_cpu_map(_name, _idx) early_per_cpu(_name, _cpu) The DEFINE macro defines the per_cpu variable as well as the early map and pointer. It also initializes the per_cpu variable and map elements to "_initvalue". The early_* macros provide access to the initial map (usually setup during system init) and the early pointer. This pointer is initialized to point to the early map but is then NULL'ed when the actual per_cpu areas are setup. After that the per_cpu variable is the correct access to the variable. The early_per_cpu() macro is not very efficient but does show how to access the variable if you have a function that can be called both "early" and "late". It tests the early ptr to be NULL, and if not then it's still valid. Otherwise, the per_cpu variable is used instead: #define early_per_cpu(_name, _cpu) \ (early_per_cpu_ptr(_name) ? \ early_per_cpu_ptr(_name)[_cpu] : \ per_cpu(_name, _cpu)) A better method is to actually check the pointer manually. In the case below, numa_set_node can be called both "early" and "late": void __cpuinit numa_set_node(int cpu, int node) { int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); if (cpu_to_node_map) cpu_to_node_map[cpu] = node; else per_cpu(x86_cpu_to_node_map, cpu) = node; } * Add a flag "arch_provides_topology_pointers" that indicates pointers to topology cpumask_t maps are available. Otherwise, use the function returning the cpumask_t value. This is useful if cpumask_t set size is very large to avoid copying data on to/off of the stack. * The coverage of CONFIG_DEBUG_PER_CPU_MAPS has been increased while the non-debug case has been optimized a bit. * Remove an unreferenced compiler warning in drivers/base/topology.c * Clean up #ifdef in setup.c For inclusion into sched-devel/latest tree. Based on: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git + sched-devel/latest .../mingo/linux-2.6-sched-devel.git Signed-off-by: Mike Travis <travis@sgi.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/Kconfig.debug2
-rw-r--r--arch/x86/kernel/apic_32.c9
-rw-r--r--arch/x86/kernel/apic_64.c11
-rw-r--r--arch/x86/kernel/setup.c96
-rw-r--r--arch/x86/kernel/setup_32.c24
-rw-r--r--arch/x86/kernel/setup_64.c9
-rw-r--r--arch/x86/kernel/smpboot.c20
-rw-r--r--arch/x86/mm/numa_64.c43
-rw-r--r--arch/x86/mm/srat_64.c2
-rw-r--r--drivers/base/topology.c25
-rw-r--r--include/asm-x86/numa_64.h19
-rw-r--r--include/asm-x86/percpu.h46
-rw-r--r--include/asm-x86/smp.h15
-rw-r--r--include/asm-x86/topology.h143
15 files changed, 270 insertions, 196 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2e325521e5e9..4469a0db1ae1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -121,7 +121,7 @@ config ARCH_HAS_CACHE_LINE_SIZE
121 def_bool y 121 def_bool y
122 122
123config HAVE_SETUP_PER_CPU_AREA 123config HAVE_SETUP_PER_CPU_AREA
124 def_bool X86_64 || (X86_SMP && !X86_VOYAGER) 124 def_bool X86_64_SMP || (X86_SMP && !X86_VOYAGER)
125 125
126config HAVE_CPUMASK_OF_CPU_MAP 126config HAVE_CPUMASK_OF_CPU_MAP
127 def_bool X86_64_SMP 127 def_bool X86_64_SMP
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 18363374d51a..24ca95a0ba54 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -60,7 +60,7 @@ config DEBUG_PAGEALLOC
60config DEBUG_PER_CPU_MAPS 60config DEBUG_PER_CPU_MAPS
61 bool "Debug access to per_cpu maps" 61 bool "Debug access to per_cpu maps"
62 depends on DEBUG_KERNEL 62 depends on DEBUG_KERNEL
63 depends on X86_64_SMP 63 depends on X86_SMP
64 default n 64 default n
65 help 65 help
66 Say Y to verify that the per_cpu map being accessed has 66 Say Y to verify that the per_cpu map being accessed has
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 4b99b1bdeb6c..f17c1c1bc384 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -52,9 +52,6 @@
52 52
53unsigned long mp_lapic_addr; 53unsigned long mp_lapic_addr;
54 54
55DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
56EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
57
58/* 55/*
59 * Knob to control our willingness to enable the local APIC. 56 * Knob to control our willingness to enable the local APIC.
60 * 57 *
@@ -1534,9 +1531,9 @@ void __cpuinit generic_processor_info(int apicid, int version)
1534 } 1531 }
1535#ifdef CONFIG_SMP 1532#ifdef CONFIG_SMP
1536 /* are we being called early in kernel startup? */ 1533 /* are we being called early in kernel startup? */
1537 if (x86_cpu_to_apicid_early_ptr) { 1534 if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
1538 u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; 1535 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
1539 u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; 1536 u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
1540 1537
1541 cpu_to_apicid[cpu] = apicid; 1538 cpu_to_apicid[cpu] = apicid;
1542 bios_cpu_apicid[cpu] = apicid; 1539 bios_cpu_apicid[cpu] = apicid;
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 0633cfd0dc29..4fd21f7d698c 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -87,9 +87,6 @@ static unsigned long apic_phys;
87 87
88unsigned long mp_lapic_addr; 88unsigned long mp_lapic_addr;
89 89
90DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
91EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
92
93unsigned int __cpuinitdata maxcpus = NR_CPUS; 90unsigned int __cpuinitdata maxcpus = NR_CPUS;
94/* 91/*
95 * Get the LAPIC version 92 * Get the LAPIC version
@@ -1091,9 +1088,9 @@ void __cpuinit generic_processor_info(int apicid, int version)
1091 cpu = 0; 1088 cpu = 0;
1092 } 1089 }
1093 /* are we being called early in kernel startup? */ 1090 /* are we being called early in kernel startup? */
1094 if (x86_cpu_to_apicid_early_ptr) { 1091 if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
1095 u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; 1092 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
1096 u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; 1093 u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
1097 1094
1098 cpu_to_apicid[cpu] = apicid; 1095 cpu_to_apicid[cpu] = apicid;
1099 bios_cpu_apicid[cpu] = apicid; 1096 bios_cpu_apicid[cpu] = apicid;
@@ -1269,7 +1266,7 @@ __cpuinit int apic_is_clustered_box(void)
1269 if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box()) 1266 if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
1270 return 0; 1267 return 0;
1271 1268
1272 bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; 1269 bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
1273 bitmap_zero(clustermap, NUM_APIC_CLUSTERS); 1270 bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
1274 1271
1275 for (i = 0; i < NR_CPUS; i++) { 1272 for (i = 0; i < NR_CPUS; i++) {
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 6f80b852a196..03caa8e4351f 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -19,13 +19,23 @@ unsigned disabled_cpus __cpuinitdata;
19unsigned int boot_cpu_physical_apicid = -1U; 19unsigned int boot_cpu_physical_apicid = -1U;
20EXPORT_SYMBOL(boot_cpu_physical_apicid); 20EXPORT_SYMBOL(boot_cpu_physical_apicid);
21 21
22DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
23EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
24
25/* Bitmask of physically existing CPUs */ 22/* Bitmask of physically existing CPUs */
26physid_mask_t phys_cpu_present_map; 23physid_mask_t phys_cpu_present_map;
27#endif 24#endif
28 25
26/* map cpu index to physical APIC ID */
27DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
28DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
29EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
30EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
31
32#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
33#define X86_64_NUMA 1
34
35DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
36EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
37#endif
38
29#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) 39#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP)
30/* 40/*
31 * Copy data used in early init routines from the initial arrays to the 41 * Copy data used in early init routines from the initial arrays to the
@@ -37,20 +47,21 @@ static void __init setup_per_cpu_maps(void)
37 int cpu; 47 int cpu;
38 48
39 for_each_possible_cpu(cpu) { 49 for_each_possible_cpu(cpu) {
40 per_cpu(x86_cpu_to_apicid, cpu) = x86_cpu_to_apicid_init[cpu]; 50 per_cpu(x86_cpu_to_apicid, cpu) =
51 early_per_cpu_map(x86_cpu_to_apicid, cpu);
41 per_cpu(x86_bios_cpu_apicid, cpu) = 52 per_cpu(x86_bios_cpu_apicid, cpu) =
42 x86_bios_cpu_apicid_init[cpu]; 53 early_per_cpu_map(x86_bios_cpu_apicid, cpu);
43#ifdef CONFIG_NUMA 54#ifdef X86_64_NUMA
44 per_cpu(x86_cpu_to_node_map, cpu) = 55 per_cpu(x86_cpu_to_node_map, cpu) =
45 x86_cpu_to_node_map_init[cpu]; 56 early_per_cpu_map(x86_cpu_to_node_map, cpu);
46#endif 57#endif
47 } 58 }
48 59
49 /* indicate the early static arrays will soon be gone */ 60 /* indicate the early static arrays will soon be gone */
50 x86_cpu_to_apicid_early_ptr = NULL; 61 early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
51 x86_bios_cpu_apicid_early_ptr = NULL; 62 early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
52#ifdef CONFIG_NUMA 63#ifdef X86_64_NUMA
53 x86_cpu_to_node_map_early_ptr = NULL; 64 early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
54#endif 65#endif
55} 66}
56 67
@@ -109,7 +120,8 @@ void __init setup_per_cpu_areas(void)
109 if (!node_online(node) || !NODE_DATA(node)) { 120 if (!node_online(node) || !NODE_DATA(node)) {
110 ptr = alloc_bootmem_pages(size); 121 ptr = alloc_bootmem_pages(size);
111 printk(KERN_INFO 122 printk(KERN_INFO
112 "cpu %d has no node or node-local memory\n", i); 123 "cpu %d has no node %d or node-local memory\n",
124 i, node);
113 } 125 }
114 else 126 else
115 ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); 127 ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
@@ -137,3 +149,63 @@ void __init setup_per_cpu_areas(void)
137} 149}
138 150
139#endif 151#endif
152
153#ifdef X86_64_NUMA
154void __cpuinit numa_set_node(int cpu, int node)
155{
156 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
157
158 if (cpu_to_node_map)
159 cpu_to_node_map[cpu] = node;
160
161 else if (per_cpu_offset(cpu))
162 per_cpu(x86_cpu_to_node_map, cpu) = node;
163
164 else
165 Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu);
166}
167
168void __cpuinit numa_clear_node(int cpu)
169{
170 numa_set_node(cpu, NUMA_NO_NODE);
171}
172
173void __cpuinit numa_add_cpu(int cpu)
174{
175 cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
176}
177
178void __cpuinit numa_remove_cpu(int cpu)
179{
180 cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]);
181}
182#endif /* CONFIG_NUMA */
183
184#if defined(CONFIG_DEBUG_PER_CPU_MAPS) && defined(CONFIG_X86_64)
185
186int cpu_to_node(int cpu)
187{
188 if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
189 printk(KERN_WARNING
190 "cpu_to_node(%d): usage too early!\n", cpu);
191 dump_stack();
192 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
193 }
194 return per_cpu(x86_cpu_to_node_map, cpu);
195}
196EXPORT_SYMBOL(cpu_to_node);
197
198int early_cpu_to_node(int cpu)
199{
200 if (early_per_cpu_ptr(x86_cpu_to_node_map))
201 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
202
203 if (!per_cpu_offset(cpu)) {
204 printk(KERN_WARNING
205 "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
206 dump_stack();
207 return NUMA_NO_NODE;
208 }
209 return per_cpu(x86_cpu_to_node_map, cpu);
210}
211#endif
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 5a2f8e063887..ccd5f5cdbbe6 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -737,18 +737,6 @@ char * __init __attribute__((weak)) memory_setup(void)
737 return machine_specific_memory_setup(); 737 return machine_specific_memory_setup();
738} 738}
739 739
740#ifdef CONFIG_NUMA
741/*
742 * In the golden day, when everything among i386 and x86_64 will be
743 * integrated, this will not live here
744 */
745void *x86_cpu_to_node_map_early_ptr;
746int x86_cpu_to_node_map_init[NR_CPUS] = {
747 [0 ... NR_CPUS-1] = NUMA_NO_NODE
748};
749DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
750#endif
751
752/* 740/*
753 * Determine if we were loaded by an EFI loader. If so, then we have also been 741 * Determine if we were loaded by an EFI loader. If so, then we have also been
754 * passed the efi memmap, systab, etc., so we should use these data structures 742 * passed the efi memmap, systab, etc., so we should use these data structures
@@ -887,18 +875,6 @@ void __init setup_arch(char **cmdline_p)
887 875
888 io_delay_init(); 876 io_delay_init();
889 877
890#ifdef CONFIG_X86_SMP
891 /*
892 * setup to use the early static init tables during kernel startup
893 * X86_SMP will exclude sub-arches that don't deal well with it.
894 */
895 x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
896 x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init;
897#ifdef CONFIG_NUMA
898 x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init;
899#endif
900#endif
901
902#ifdef CONFIG_X86_GENERICARCH 878#ifdef CONFIG_X86_GENERICARCH
903 generic_apic_probe(); 879 generic_apic_probe();
904#endif 880#endif
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 6dff1286ad8a..e8df64fad540 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -406,15 +406,6 @@ void __init setup_arch(char **cmdline_p)
406 kvmclock_init(); 406 kvmclock_init();
407#endif 407#endif
408 408
409#ifdef CONFIG_SMP
410 /* setup to use the early static init tables during kernel startup */
411 x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
412 x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init;
413#ifdef CONFIG_NUMA
414 x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init;
415#endif
416#endif
417
418#ifdef CONFIG_ACPI 409#ifdef CONFIG_ACPI
419 /* 410 /*
420 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). 411 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 3e1cecedde42..036604d3daed 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -68,22 +68,6 @@
68#include <mach_wakecpu.h> 68#include <mach_wakecpu.h>
69#include <smpboot_hooks.h> 69#include <smpboot_hooks.h>
70 70
71/*
72 * FIXME: For x86_64, those are defined in other files. But moving them here,
73 * would make the setup areas dependent on smp, which is a loss. When we
74 * integrate apic between arches, we can probably do a better job, but
75 * right now, they'll stay here -- glommer
76 */
77
78/* which logical CPU number maps to which CPU (physical APIC ID) */
79u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata =
80 { [0 ... NR_CPUS-1] = BAD_APICID };
81void *x86_cpu_to_apicid_early_ptr;
82
83u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata
84 = { [0 ... NR_CPUS-1] = BAD_APICID };
85void *x86_bios_cpu_apicid_early_ptr;
86
87#ifdef CONFIG_X86_32 71#ifdef CONFIG_X86_32
88u8 apicid_2_node[MAX_APICID]; 72u8 apicid_2_node[MAX_APICID];
89static int low_mappings; 73static int low_mappings;
@@ -992,7 +976,7 @@ do_rest:
992 /* Try to put things back the way they were before ... */ 976 /* Try to put things back the way they were before ... */
993 unmap_cpu_to_logical_apicid(cpu); 977 unmap_cpu_to_logical_apicid(cpu);
994#ifdef CONFIG_X86_64 978#ifdef CONFIG_X86_64
995 clear_node_cpumask(cpu); /* was set by numa_add_cpu */ 979 numa_remove_cpu(cpu); /* was set by numa_add_cpu */
996#endif 980#endif
997 cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */ 981 cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */
998 cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ 982 cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
@@ -1373,7 +1357,7 @@ static void __ref remove_cpu_from_maps(int cpu)
1373 cpu_clear(cpu, cpu_callin_map); 1357 cpu_clear(cpu, cpu_callin_map);
1374 /* was set by cpu_init() */ 1358 /* was set by cpu_init() */
1375 clear_bit(cpu, (unsigned long *)&cpu_initialized); 1359 clear_bit(cpu, (unsigned long *)&cpu_initialized);
1376 clear_node_cpumask(cpu); 1360 numa_remove_cpu(cpu);
1377#endif 1361#endif
1378} 1362}
1379 1363
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index c5066d519e5d..970f86775c41 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -31,16 +31,6 @@ bootmem_data_t plat_node_bdata[MAX_NUMNODES];
31 31
32struct memnode memnode; 32struct memnode memnode;
33 33
34#ifdef CONFIG_SMP
35int x86_cpu_to_node_map_init[NR_CPUS] = {
36 [0 ... NR_CPUS-1] = NUMA_NO_NODE
37};
38void *x86_cpu_to_node_map_early_ptr;
39EXPORT_SYMBOL(x86_cpu_to_node_map_early_ptr);
40#endif
41DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
42EXPORT_PER_CPU_SYMBOL(x86_cpu_to_node_map);
43
44s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { 34s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
45 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE 35 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
46}; 36};
@@ -577,24 +567,6 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
577 setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); 567 setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
578} 568}
579 569
580__cpuinit void numa_add_cpu(int cpu)
581{
582 set_bit(cpu,
583 (unsigned long *)&node_to_cpumask_map[early_cpu_to_node(cpu)]);
584}
585
586void __cpuinit numa_set_node(int cpu, int node)
587{
588 int *cpu_to_node_map = x86_cpu_to_node_map_early_ptr;
589
590 if(cpu_to_node_map)
591 cpu_to_node_map[cpu] = node;
592 else if(per_cpu_offset(cpu))
593 per_cpu(x86_cpu_to_node_map, cpu) = node;
594 else
595 Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu);
596}
597
598unsigned long __init numa_free_all_bootmem(void) 570unsigned long __init numa_free_all_bootmem(void)
599{ 571{
600 unsigned long pages = 0; 572 unsigned long pages = 0;
@@ -641,6 +613,7 @@ static __init int numa_setup(char *opt)
641} 613}
642early_param("numa", numa_setup); 614early_param("numa", numa_setup);
643 615
616#ifdef CONFIG_NUMA
644/* 617/*
645 * Setup early cpu_to_node. 618 * Setup early cpu_to_node.
646 * 619 *
@@ -652,14 +625,19 @@ early_param("numa", numa_setup);
652 * is already initialized in a round robin manner at numa_init_array, 625 * is already initialized in a round robin manner at numa_init_array,
653 * prior to this call, and this initialization is good enough 626 * prior to this call, and this initialization is good enough
654 * for the fake NUMA cases. 627 * for the fake NUMA cases.
628 *
629 * Called before the per_cpu areas are setup.
655 */ 630 */
656void __init init_cpu_to_node(void) 631void __init init_cpu_to_node(void)
657{ 632{
658 int i; 633 int cpu;
634 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
659 635
660 for (i = 0; i < NR_CPUS; i++) { 636 BUG_ON(cpu_to_apicid == NULL);
637
638 for_each_possible_cpu(cpu) {
661 int node; 639 int node;
662 u16 apicid = x86_cpu_to_apicid_init[i]; 640 u16 apicid = cpu_to_apicid[cpu];
663 641
664 if (apicid == BAD_APICID) 642 if (apicid == BAD_APICID)
665 continue; 643 continue;
@@ -668,8 +646,9 @@ void __init init_cpu_to_node(void)
668 continue; 646 continue;
669 if (!node_online(node)) 647 if (!node_online(node))
670 continue; 648 continue;
671 numa_set_node(i, node); 649 numa_set_node(cpu, node);
672 } 650 }
673} 651}
652#endif
674 653
675 654
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 99649dccad28..012220e31c99 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -376,7 +376,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
376 if (node == NUMA_NO_NODE) 376 if (node == NUMA_NO_NODE)
377 continue; 377 continue;
378 if (!node_isset(node, node_possible_map)) 378 if (!node_isset(node, node_possible_map))
379 numa_set_node(i, NUMA_NO_NODE); 379 numa_clear_node(i);
380 } 380 }
381 numa_init_array(); 381 numa_init_array();
382 return 0; 382 return 0;
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index fdf4044d2e74..1efe162e16d7 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -40,6 +40,7 @@ static ssize_t show_##name(struct sys_device *dev, char *buf) \
40 return sprintf(buf, "%d\n", topology_##name(cpu)); \ 40 return sprintf(buf, "%d\n", topology_##name(cpu)); \
41} 41}
42 42
43#if defined(topology_thread_siblings) || defined(topology_core_siblings)
43static ssize_t show_cpumap(int type, cpumask_t *mask, char *buf) 44static ssize_t show_cpumap(int type, cpumask_t *mask, char *buf)
44{ 45{
45 ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf; 46 ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf;
@@ -54,21 +55,41 @@ static ssize_t show_cpumap(int type, cpumask_t *mask, char *buf)
54 } 55 }
55 return n; 56 return n;
56} 57}
58#endif
57 59
60#ifdef arch_provides_topology_pointers
58#define define_siblings_show_map(name) \ 61#define define_siblings_show_map(name) \
59static inline ssize_t show_##name(struct sys_device *dev, char *buf) \ 62static ssize_t show_##name(struct sys_device *dev, char *buf) \
60{ \ 63{ \
61 unsigned int cpu = dev->id; \ 64 unsigned int cpu = dev->id; \
62 return show_cpumap(0, &(topology_##name(cpu)), buf); \ 65 return show_cpumap(0, &(topology_##name(cpu)), buf); \
63} 66}
64 67
65#define define_siblings_show_list(name) \ 68#define define_siblings_show_list(name) \
66static inline ssize_t show_##name##_list(struct sys_device *dev, char *buf) \ 69static ssize_t show_##name##_list(struct sys_device *dev, char *buf) \
67{ \ 70{ \
68 unsigned int cpu = dev->id; \ 71 unsigned int cpu = dev->id; \
69 return show_cpumap(1, &(topology_##name(cpu)), buf); \ 72 return show_cpumap(1, &(topology_##name(cpu)), buf); \
70} 73}
71 74
75#else
76#define define_siblings_show_map(name) \
77static ssize_t show_##name(struct sys_device *dev, char *buf) \
78{ \
79 unsigned int cpu = dev->id; \
80 cpumask_t mask = topology_##name(cpu); \
81 return show_cpumap(0, &mask, buf); \
82}
83
84#define define_siblings_show_list(name) \
85static ssize_t show_##name##_list(struct sys_device *dev, char *buf) \
86{ \
87 unsigned int cpu = dev->id; \
88 cpumask_t mask = topology_##name(cpu); \
89 return show_cpumap(1, &mask, buf); \
90}
91#endif
92
72#define define_siblings_show_func(name) \ 93#define define_siblings_show_func(name) \
73 define_siblings_show_map(name); define_siblings_show_list(name) 94 define_siblings_show_map(name); define_siblings_show_list(name)
74 95
diff --git a/include/asm-x86/numa_64.h b/include/asm-x86/numa_64.h
index 22e87c9f6a80..b510daf4f4d8 100644
--- a/include/asm-x86/numa_64.h
+++ b/include/asm-x86/numa_64.h
@@ -14,11 +14,9 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks,
14 14
15#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) 15#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
16 16
17extern void numa_add_cpu(int cpu);
18extern void numa_init_array(void); 17extern void numa_init_array(void);
19extern int numa_off; 18extern int numa_off;
20 19
21extern void numa_set_node(int cpu, int node);
22extern void srat_reserve_add_area(int nodeid); 20extern void srat_reserve_add_area(int nodeid);
23extern int hotadd_percent; 21extern int hotadd_percent;
24 22
@@ -31,15 +29,16 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
31 29
32#ifdef CONFIG_NUMA 30#ifdef CONFIG_NUMA
33extern void __init init_cpu_to_node(void); 31extern void __init init_cpu_to_node(void);
34 32extern void __cpuinit numa_set_node(int cpu, int node);
35static inline void clear_node_cpumask(int cpu) 33extern void __cpuinit numa_clear_node(int cpu);
36{ 34extern void __cpuinit numa_add_cpu(int cpu);
37 clear_bit(cpu, (unsigned long *)&node_to_cpumask_map[cpu_to_node(cpu)]); 35extern void __cpuinit numa_remove_cpu(int cpu);
38}
39
40#else 36#else
41#define init_cpu_to_node() do {} while (0) 37static inline void init_cpu_to_node(void) { }
42#define clear_node_cpumask(cpu) do {} while (0) 38static inline void numa_set_node(int cpu, int node) { }
39static inline void numa_clear_node(int cpu) { }
40static inline void numa_add_cpu(int cpu, int node) { }
41static inline void numa_remove_cpu(int cpu) { }
43#endif 42#endif
44 43
45#endif 44#endif
diff --git a/include/asm-x86/percpu.h b/include/asm-x86/percpu.h
index 736fc3bb8e1e..912a3a17b9db 100644
--- a/include/asm-x86/percpu.h
+++ b/include/asm-x86/percpu.h
@@ -143,4 +143,50 @@ do { \
143#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) 143#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val)
144#endif /* !__ASSEMBLY__ */ 144#endif /* !__ASSEMBLY__ */
145#endif /* !CONFIG_X86_64 */ 145#endif /* !CONFIG_X86_64 */
146
147#ifdef CONFIG_SMP
148
149/*
150 * Define the "EARLY_PER_CPU" macros. These are used for some per_cpu
151 * variables that are initialized and accessed before there are per_cpu
152 * areas allocated.
153 */
154
155#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \
156 DEFINE_PER_CPU(_type, _name) = _initvalue; \
157 __typeof__(_type) _name##_early_map[NR_CPUS] __initdata = \
158 { [0 ... NR_CPUS-1] = _initvalue }; \
159 __typeof__(_type) *_name##_early_ptr = _name##_early_map
160
161#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \
162 EXPORT_PER_CPU_SYMBOL(_name)
163
164#define DECLARE_EARLY_PER_CPU(_type, _name) \
165 DECLARE_PER_CPU(_type, _name); \
166 extern __typeof__(_type) *_name##_early_ptr; \
167 extern __typeof__(_type) _name##_early_map[]
168
169#define early_per_cpu_ptr(_name) (_name##_early_ptr)
170#define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx])
171#define early_per_cpu(_name, _cpu) \
172 (early_per_cpu_ptr(_name) ? \
173 early_per_cpu_ptr(_name)[_cpu] : \
174 per_cpu(_name, _cpu))
175
176#else /* !CONFIG_SMP */
177#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \
178 DEFINE_PER_CPU(_type, _name) = _initvalue
179
180#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \
181 EXPORT_PER_CPU_SYMBOL(_name)
182
183#define DECLARE_EARLY_PER_CPU(_type, _name) \
184 DECLARE_PER_CPU(_type, _name)
185
186#define early_per_cpu(_name, _cpu) per_cpu(_name, _cpu)
187#define early_per_cpu_ptr(_name) NULL
188/* no early_per_cpu_map() */
189
190#endif /* !CONFIG_SMP */
191
146#endif /* _ASM_X86_PERCPU_H_ */ 192#endif /* _ASM_X86_PERCPU_H_ */
diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
index 1ebaa5cd3112..ec841639fb44 100644
--- a/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h
@@ -29,21 +29,12 @@ extern int smp_num_siblings;
29extern unsigned int num_processors; 29extern unsigned int num_processors;
30extern cpumask_t cpu_initialized; 30extern cpumask_t cpu_initialized;
31 31
32#ifdef CONFIG_SMP
33extern u16 x86_cpu_to_apicid_init[];
34extern u16 x86_bios_cpu_apicid_init[];
35extern void *x86_cpu_to_apicid_early_ptr;
36extern void *x86_bios_cpu_apicid_early_ptr;
37#else
38#define x86_cpu_to_apicid_early_ptr NULL
39#define x86_bios_cpu_apicid_early_ptr NULL
40#endif
41
42DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); 32DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
43DECLARE_PER_CPU(cpumask_t, cpu_core_map); 33DECLARE_PER_CPU(cpumask_t, cpu_core_map);
44DECLARE_PER_CPU(u16, cpu_llc_id); 34DECLARE_PER_CPU(u16, cpu_llc_id);
45DECLARE_PER_CPU(u16, x86_cpu_to_apicid); 35
46DECLARE_PER_CPU(u16, x86_bios_cpu_apicid); 36DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid);
37DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
47 38
48/* Static state in head.S used to set up a CPU */ 39/* Static state in head.S used to set up a CPU */
49extern struct { 40extern struct {
diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h
index dcf3f8131d6b..dac09cb66dca 100644
--- a/include/asm-x86/topology.h
+++ b/include/asm-x86/topology.h
@@ -35,87 +35,67 @@
35# endif 35# endif
36#endif 36#endif
37 37
38/* Node not present */
39#define NUMA_NO_NODE (-1)
40
38#ifdef CONFIG_NUMA 41#ifdef CONFIG_NUMA
39#include <linux/cpumask.h> 42#include <linux/cpumask.h>
40#include <asm/mpspec.h> 43#include <asm/mpspec.h>
41 44
42/* Mappings between logical cpu number and node number */
43#ifdef CONFIG_X86_32 45#ifdef CONFIG_X86_32
44extern int cpu_to_node_map[];
45#else
46/* Returns the number of the current Node. */
47#define numa_node_id() (early_cpu_to_node(raw_smp_processor_id()))
48#endif
49
50DECLARE_PER_CPU(int, x86_cpu_to_node_map);
51
52#ifdef CONFIG_SMP
53extern int x86_cpu_to_node_map_init[];
54extern void *x86_cpu_to_node_map_early_ptr;
55#else
56#define x86_cpu_to_node_map_early_ptr NULL
57#endif
58 46
47/* Mappings between node number and cpus on that node. */
59extern cpumask_t node_to_cpumask_map[]; 48extern cpumask_t node_to_cpumask_map[];
60 49
61#define NUMA_NO_NODE (-1) 50/* Mappings between logical cpu number and node number */
51extern int cpu_to_node_map[];
62 52
63/* Returns the number of the node containing CPU 'cpu' */ 53/* Returns the number of the node containing CPU 'cpu' */
64#ifdef CONFIG_X86_32
65#define early_cpu_to_node(cpu) cpu_to_node(cpu)
66static inline int cpu_to_node(int cpu) 54static inline int cpu_to_node(int cpu)
67{ 55{
68 return cpu_to_node_map[cpu]; 56 return cpu_to_node_map[cpu];
69} 57}
58#define early_cpu_to_node(cpu) cpu_to_node(cpu)
70 59
71#else /* CONFIG_X86_64 */ 60#else /* CONFIG_X86_64 */
72 61
73#ifdef CONFIG_SMP 62/* Mappings between node number and cpus on that node. */
74static inline int early_cpu_to_node(int cpu) 63extern cpumask_t node_to_cpumask_map[];
75{ 64
76 int *cpu_to_node_map = x86_cpu_to_node_map_early_ptr; 65/* Mappings between logical cpu number and node number */
77 66DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
78 if (cpu_to_node_map) 67
79 return cpu_to_node_map[cpu]; 68/* Returns the number of the current Node. */
80 else if (per_cpu_offset(cpu)) 69#define numa_node_id() (per_cpu(x86_cpu_to_node_map, raw_smp_processor_id()))
81 return per_cpu(x86_cpu_to_node_map, cpu); 70
82 else 71#ifdef CONFIG_DEBUG_PER_CPU_MAPS
83 return NUMA_NO_NODE; 72extern int cpu_to_node(int cpu);
84} 73extern int early_cpu_to_node(int cpu);
85#else 74extern cpumask_t *_node_to_cpumask_ptr(int node);
86#define early_cpu_to_node(cpu) cpu_to_node(cpu) 75extern cpumask_t node_to_cpumask(int node);
87#endif
88 76
77#else /* !CONFIG_DEBUG_PER_CPU_MAPS */
78
79/* Returns the number of the node containing CPU 'cpu' */
89static inline int cpu_to_node(int cpu) 80static inline int cpu_to_node(int cpu)
90{ 81{
91#ifdef CONFIG_DEBUG_PER_CPU_MAPS
92 if (x86_cpu_to_node_map_early_ptr) {
93 printk("KERN_NOTICE cpu_to_node(%d): usage too early!\n",
94 (int)cpu);
95 dump_stack();
96 return ((int *)x86_cpu_to_node_map_early_ptr)[cpu];
97 }
98#endif
99 return per_cpu(x86_cpu_to_node_map, cpu); 82 return per_cpu(x86_cpu_to_node_map, cpu);
100} 83}
101 84
102#ifdef CONFIG_NUMA 85/* Same function but used if called before per_cpu areas are setup */
103 86static inline int early_cpu_to_node(int cpu)
104/* Returns a pointer to the cpumask of CPUs on Node 'node'. */ 87{
105#define node_to_cpumask_ptr(v, node) \ 88 if (early_per_cpu_ptr(x86_cpu_to_node_map))
106 cpumask_t *v = &(node_to_cpumask_map[node]) 89 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
107
108#define node_to_cpumask_ptr_next(v, node) \
109 v = &(node_to_cpumask_map[node])
110#endif
111 90
112#endif /* CONFIG_X86_64 */ 91 return per_cpu(x86_cpu_to_node_map, cpu);
92}
113 93
114/* 94/* Returns a pointer to the cpumask of CPUs on Node 'node'. */
115 * Returns the number of the node containing Node 'node'. This 95static inline cpumask_t *_node_to_cpumask_ptr(int node)
116 * architecture is flat, so it is a pretty simple function! 96{
117 */ 97 return &node_to_cpumask_map[node];
118#define parent_node(node) (node) 98}
119 99
120/* Returns a bitmask of CPUs on Node 'node'. */ 100/* Returns a bitmask of CPUs on Node 'node'. */
121static inline cpumask_t node_to_cpumask(int node) 101static inline cpumask_t node_to_cpumask(int node)
@@ -123,14 +103,29 @@ static inline cpumask_t node_to_cpumask(int node)
123 return node_to_cpumask_map[node]; 103 return node_to_cpumask_map[node];
124} 104}
125 105
106#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
107#endif /* CONFIG_X86_64 */
108
109/* Replace default node_to_cpumask_ptr with optimized version */
110#define node_to_cpumask_ptr(v, node) \
111 cpumask_t *v = _node_to_cpumask_ptr(node)
112
113#define node_to_cpumask_ptr_next(v, node) \
114 v = _node_to_cpumask_ptr(node)
115
126/* Returns the number of the first CPU on Node 'node'. */ 116/* Returns the number of the first CPU on Node 'node'. */
127static inline int node_to_first_cpu(int node) 117static inline int node_to_first_cpu(int node)
128{ 118{
129 cpumask_t mask = node_to_cpumask(node); 119 node_to_cpumask_ptr(mask, node);
130 120 return first_cpu(*mask);
131 return first_cpu(mask);
132} 121}
133 122
123/*
124 * Returns the number of the node containing Node 'node'. This
125 * architecture is flat, so it is a pretty simple function!
126 */
127#define parent_node(node) (node)
128
134#define pcibus_to_node(bus) __pcibus_to_node(bus) 129#define pcibus_to_node(bus) __pcibus_to_node(bus)
135#define pcibus_to_cpumask(bus) __pcibus_to_cpumask(bus) 130#define pcibus_to_cpumask(bus) __pcibus_to_cpumask(bus)
136 131
@@ -180,8 +175,31 @@ extern int __node_distance(int, int);
180#define node_distance(a, b) __node_distance(a, b) 175#define node_distance(a, b) __node_distance(a, b)
181#endif 176#endif
182 177
183#else /* CONFIG_NUMA */ 178#else /* !CONFIG_NUMA */
179
180#define numa_node_id() 0
181#define cpu_to_node(cpu) 0
182#define early_cpu_to_node(cpu) 0
183
184static inline cpumask_t *_node_to_cpumask_ptr(int node)
185{
186 return &cpu_online_map;
187}
188static inline cpumask_t node_to_cpumask(int node)
189{
190 return cpu_online_map;
191}
192static inline int node_to_first_cpu(int node)
193{
194 return first_cpu(cpu_online_map);
195}
196
197/* Replace default node_to_cpumask_ptr with optimized version */
198#define node_to_cpumask_ptr(v, node) \
199 cpumask_t *v = _node_to_cpumask_ptr(node)
184 200
201#define node_to_cpumask_ptr_next(v, node) \
202 v = _node_to_cpumask_ptr(node)
185#endif 203#endif
186 204
187#include <asm-generic/topology.h> 205#include <asm-generic/topology.h>
@@ -193,6 +211,9 @@ extern cpumask_t cpu_coregroup_map(int cpu);
193#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) 211#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id)
194#define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) 212#define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu))
195#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) 213#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu))
214
215/* indicates that pointers to the topology cpumask_t maps are valid */
216#define arch_provides_topology_pointers yes
196#endif 217#endif
197 218
198static inline void arch_fix_phys_package_id(int num, u32 slot) 219static inline void arch_fix_phys_package_id(int num, u32 slot)
@@ -220,4 +241,4 @@ static inline void set_mp_bus_to_node(int busnum, int node)
220} 241}
221#endif 242#endif
222 243
223#endif 244#endif /* _ASM_X86_TOPOLOGY_H */