diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-07-08 05:59:23 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-07-08 05:59:23 -0400 |
commit | 2b4fa851b2f06fdb04cac808b57324f5e51e1578 (patch) | |
tree | 97db3ad5adda7683923630982f68b8b52c86e790 /arch/x86/kernel | |
parent | 3de352bbd86f890dd0c5e1c09a6a1b0b29e0f8ce (diff) | |
parent | 46f68e1c6b04a04772e828ff3bcd07ed708805c2 (diff) |
Merge branch 'x86/numa' into x86/devel
Conflicts:
arch/x86/Kconfig
arch/x86/kernel/e820.c
arch/x86/kernel/efi_64.c
arch/x86/kernel/mpparse.c
arch/x86/kernel/setup.c
arch/x86/kernel/setup_32.c
arch/x86/mm/init_64.c
include/asm-x86/proto.h
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/apic_32.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/apic_64.c | 11 | ||||
-rw-r--r-- | arch/x86/kernel/head64.c | 22 | ||||
-rw-r--r-- | arch/x86/kernel/nmi_64.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 297 | ||||
-rw-r--r-- | arch/x86/kernel/setup64.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/setup_32.c | 24 | ||||
-rw-r--r-- | arch/x86/kernel/setup_64.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 81 |
9 files changed, 352 insertions, 113 deletions
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 570c362eca8c..84ce106b33c8 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c | |||
@@ -52,9 +52,6 @@ | |||
52 | 52 | ||
53 | unsigned long mp_lapic_addr; | 53 | unsigned long mp_lapic_addr; |
54 | 54 | ||
55 | DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID; | ||
56 | EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid); | ||
57 | |||
58 | /* | 55 | /* |
59 | * Knob to control our willingness to enable the local APIC. | 56 | * Knob to control our willingness to enable the local APIC. |
60 | * | 57 | * |
@@ -1546,9 +1543,9 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
1546 | } | 1543 | } |
1547 | #ifdef CONFIG_SMP | 1544 | #ifdef CONFIG_SMP |
1548 | /* are we being called early in kernel startup? */ | 1545 | /* are we being called early in kernel startup? */ |
1549 | if (x86_cpu_to_apicid_early_ptr) { | 1546 | if (early_per_cpu_ptr(x86_cpu_to_apicid)) { |
1550 | u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; | 1547 | u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); |
1551 | u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; | 1548 | u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); |
1552 | 1549 | ||
1553 | cpu_to_apicid[cpu] = apicid; | 1550 | cpu_to_apicid[cpu] = apicid; |
1554 | bios_cpu_apicid[cpu] = apicid; | 1551 | bios_cpu_apicid[cpu] = apicid; |
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index d7406aa1c985..e494809fc508 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c | |||
@@ -90,9 +90,6 @@ static unsigned long apic_phys; | |||
90 | 90 | ||
91 | unsigned long mp_lapic_addr; | 91 | unsigned long mp_lapic_addr; |
92 | 92 | ||
93 | DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID; | ||
94 | EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid); | ||
95 | |||
96 | unsigned int __cpuinitdata maxcpus = NR_CPUS; | 93 | unsigned int __cpuinitdata maxcpus = NR_CPUS; |
97 | /* | 94 | /* |
98 | * Get the LAPIC version | 95 | * Get the LAPIC version |
@@ -1075,9 +1072,9 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
1075 | max_physical_apicid = apicid; | 1072 | max_physical_apicid = apicid; |
1076 | 1073 | ||
1077 | /* are we being called early in kernel startup? */ | 1074 | /* are we being called early in kernel startup? */ |
1078 | if (x86_cpu_to_apicid_early_ptr) { | 1075 | if (early_per_cpu_ptr(x86_cpu_to_apicid)) { |
1079 | u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; | 1076 | u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); |
1080 | u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; | 1077 | u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); |
1081 | 1078 | ||
1082 | cpu_to_apicid[cpu] = apicid; | 1079 | cpu_to_apicid[cpu] = apicid; |
1083 | bios_cpu_apicid[cpu] = apicid; | 1080 | bios_cpu_apicid[cpu] = apicid; |
@@ -1253,7 +1250,7 @@ __cpuinit int apic_is_clustered_box(void) | |||
1253 | if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box()) | 1250 | if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box()) |
1254 | return 0; | 1251 | return 0; |
1255 | 1252 | ||
1256 | bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; | 1253 | bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); |
1257 | bitmap_zero(clustermap, NUM_APIC_CLUSTERS); | 1254 | bitmap_zero(clustermap, NUM_APIC_CLUSTERS); |
1258 | 1255 | ||
1259 | for (i = 0; i < NR_CPUS; i++) { | 1256 | for (i = 0; i < NR_CPUS; i++) { |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 5fbed459ff3b..c970929bb15d 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -25,6 +25,20 @@ | |||
25 | #include <asm/e820.h> | 25 | #include <asm/e820.h> |
26 | #include <asm/bios_ebda.h> | 26 | #include <asm/bios_ebda.h> |
27 | 27 | ||
28 | /* boot cpu pda */ | ||
29 | static struct x8664_pda _boot_cpu_pda __read_mostly; | ||
30 | |||
31 | #ifdef CONFIG_SMP | ||
32 | /* | ||
33 | * We install an empty cpu_pda pointer table to indicate to early users | ||
34 | * (numa_set_node) that the cpu_pda pointer table for cpus other than | ||
35 | * the boot cpu is not yet setup. | ||
36 | */ | ||
37 | static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata; | ||
38 | #else | ||
39 | static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly; | ||
40 | #endif | ||
41 | |||
28 | static void __init zap_identity_mappings(void) | 42 | static void __init zap_identity_mappings(void) |
29 | { | 43 | { |
30 | pgd_t *pgd = pgd_offset_k(0UL); | 44 | pgd_t *pgd = pgd_offset_k(0UL); |
@@ -88,10 +102,12 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
88 | 102 | ||
89 | early_printk("Kernel alive\n"); | 103 | early_printk("Kernel alive\n"); |
90 | 104 | ||
91 | for (i = 0; i < NR_CPUS; i++) | 105 | _cpu_pda = __cpu_pda; |
92 | cpu_pda(i) = &boot_cpu_pda[i]; | 106 | cpu_pda(0) = &_boot_cpu_pda; |
93 | |||
94 | pda_init(0); | 107 | pda_init(0); |
108 | |||
109 | early_printk("Kernel really alive\n"); | ||
110 | |||
95 | copy_bootdata(__va(real_mode_data)); | 111 | copy_bootdata(__va(real_mode_data)); |
96 | 112 | ||
97 | reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); | 113 | reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); |
diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c index 0060e44e8989..d62f3b66b529 100644 --- a/arch/x86/kernel/nmi_64.c +++ b/arch/x86/kernel/nmi_64.c | |||
@@ -90,7 +90,7 @@ int __init check_nmi_watchdog(void) | |||
90 | if (!atomic_read(&nmi_active)) | 90 | if (!atomic_read(&nmi_active)) |
91 | return 0; | 91 | return 0; |
92 | 92 | ||
93 | prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); | 93 | prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL); |
94 | if (!prev_nmi_count) | 94 | if (!prev_nmi_count) |
95 | goto error; | 95 | goto error; |
96 | 96 | ||
@@ -101,7 +101,7 @@ int __init check_nmi_watchdog(void) | |||
101 | smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); | 101 | smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); |
102 | #endif | 102 | #endif |
103 | 103 | ||
104 | for (cpu = 0; cpu < NR_CPUS; cpu++) | 104 | for (cpu = 0; cpu < nr_cpu_ids; cpu++) |
105 | prev_nmi_count[cpu] = cpu_pda(cpu)->__nmi_count; | 105 | prev_nmi_count[cpu] = cpu_pda(cpu)->__nmi_count; |
106 | local_irq_enable(); | 106 | local_irq_enable(); |
107 | mdelay((20*1000)/nmi_hz); // wait 20 ticks | 107 | mdelay((20*1000)/nmi_hz); // wait 20 ticks |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 5b0de38cde48..ebb0a2bcdc08 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -20,13 +20,34 @@ unsigned int boot_cpu_physical_apicid = -1U; | |||
20 | unsigned int max_physical_apicid; | 20 | unsigned int max_physical_apicid; |
21 | EXPORT_SYMBOL(boot_cpu_physical_apicid); | 21 | EXPORT_SYMBOL(boot_cpu_physical_apicid); |
22 | 22 | ||
23 | DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID; | ||
24 | EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); | ||
25 | |||
26 | /* Bitmask of physically existing CPUs */ | 23 | /* Bitmask of physically existing CPUs */ |
27 | physid_mask_t phys_cpu_present_map; | 24 | physid_mask_t phys_cpu_present_map; |
28 | #endif | 25 | #endif |
29 | 26 | ||
27 | /* map cpu index to physical APIC ID */ | ||
28 | DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); | ||
29 | DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); | ||
30 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); | ||
31 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); | ||
32 | |||
33 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | ||
34 | #define X86_64_NUMA 1 | ||
35 | |||
36 | /* map cpu index to node index */ | ||
37 | DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); | ||
38 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); | ||
39 | |||
40 | /* which logical CPUs are on which nodes */ | ||
41 | cpumask_t *node_to_cpumask_map; | ||
42 | EXPORT_SYMBOL(node_to_cpumask_map); | ||
43 | |||
44 | /* setup node_to_cpumask_map */ | ||
45 | static void __init setup_node_to_cpumask_map(void); | ||
46 | |||
47 | #else | ||
48 | static inline void setup_node_to_cpumask_map(void) { } | ||
49 | #endif | ||
50 | |||
30 | #if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) | 51 | #if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) |
31 | /* | 52 | /* |
32 | * Copy data used in early init routines from the initial arrays to the | 53 | * Copy data used in early init routines from the initial arrays to the |
@@ -38,20 +59,21 @@ static void __init setup_per_cpu_maps(void) | |||
38 | int cpu; | 59 | int cpu; |
39 | 60 | ||
40 | for_each_possible_cpu(cpu) { | 61 | for_each_possible_cpu(cpu) { |
41 | per_cpu(x86_cpu_to_apicid, cpu) = x86_cpu_to_apicid_init[cpu]; | 62 | per_cpu(x86_cpu_to_apicid, cpu) = |
63 | early_per_cpu_map(x86_cpu_to_apicid, cpu); | ||
42 | per_cpu(x86_bios_cpu_apicid, cpu) = | 64 | per_cpu(x86_bios_cpu_apicid, cpu) = |
43 | x86_bios_cpu_apicid_init[cpu]; | 65 | early_per_cpu_map(x86_bios_cpu_apicid, cpu); |
44 | #ifdef CONFIG_NUMA | 66 | #ifdef X86_64_NUMA |
45 | per_cpu(x86_cpu_to_node_map, cpu) = | 67 | per_cpu(x86_cpu_to_node_map, cpu) = |
46 | x86_cpu_to_node_map_init[cpu]; | 68 | early_per_cpu_map(x86_cpu_to_node_map, cpu); |
47 | #endif | 69 | #endif |
48 | } | 70 | } |
49 | 71 | ||
50 | /* indicate the early static arrays will soon be gone */ | 72 | /* indicate the early static arrays will soon be gone */ |
51 | x86_cpu_to_apicid_early_ptr = NULL; | 73 | early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; |
52 | x86_bios_cpu_apicid_early_ptr = NULL; | 74 | early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; |
53 | #ifdef CONFIG_NUMA | 75 | #ifdef X86_64_NUMA |
54 | x86_cpu_to_node_map_early_ptr = NULL; | 76 | early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; |
55 | #endif | 77 | #endif |
56 | } | 78 | } |
57 | 79 | ||
@@ -80,6 +102,50 @@ static inline void setup_cpumask_of_cpu(void) { } | |||
80 | */ | 102 | */ |
81 | unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; | 103 | unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; |
82 | EXPORT_SYMBOL(__per_cpu_offset); | 104 | EXPORT_SYMBOL(__per_cpu_offset); |
105 | static inline void setup_cpu_pda_map(void) { } | ||
106 | |||
107 | #elif !defined(CONFIG_SMP) | ||
108 | static inline void setup_cpu_pda_map(void) { } | ||
109 | |||
110 | #else /* CONFIG_SMP && CONFIG_X86_64 */ | ||
111 | |||
112 | /* | ||
113 | * Allocate cpu_pda pointer table and array via alloc_bootmem. | ||
114 | */ | ||
115 | static void __init setup_cpu_pda_map(void) | ||
116 | { | ||
117 | char *pda; | ||
118 | struct x8664_pda **new_cpu_pda; | ||
119 | unsigned long size; | ||
120 | int cpu; | ||
121 | |||
122 | size = roundup(sizeof(struct x8664_pda), cache_line_size()); | ||
123 | |||
124 | /* allocate cpu_pda array and pointer table */ | ||
125 | { | ||
126 | unsigned long tsize = nr_cpu_ids * sizeof(void *); | ||
127 | unsigned long asize = size * (nr_cpu_ids - 1); | ||
128 | |||
129 | tsize = roundup(tsize, cache_line_size()); | ||
130 | new_cpu_pda = alloc_bootmem(tsize + asize); | ||
131 | pda = (char *)new_cpu_pda + tsize; | ||
132 | } | ||
133 | |||
134 | /* initialize pointer table to static pda's */ | ||
135 | for_each_possible_cpu(cpu) { | ||
136 | if (cpu == 0) { | ||
137 | /* leave boot cpu pda in place */ | ||
138 | new_cpu_pda[0] = cpu_pda(0); | ||
139 | continue; | ||
140 | } | ||
141 | new_cpu_pda[cpu] = (struct x8664_pda *)pda; | ||
142 | new_cpu_pda[cpu]->in_bootmem = 1; | ||
143 | pda += size; | ||
144 | } | ||
145 | |||
146 | /* point to new pointer table */ | ||
147 | _cpu_pda = new_cpu_pda; | ||
148 | } | ||
83 | #endif | 149 | #endif |
84 | 150 | ||
85 | /* | 151 | /* |
@@ -89,50 +155,52 @@ EXPORT_SYMBOL(__per_cpu_offset); | |||
89 | */ | 155 | */ |
90 | void __init setup_per_cpu_areas(void) | 156 | void __init setup_per_cpu_areas(void) |
91 | { | 157 | { |
92 | int i, highest_cpu = 0; | 158 | ssize_t size = PERCPU_ENOUGH_ROOM; |
93 | unsigned long size; | 159 | char *ptr; |
160 | int cpu; | ||
94 | 161 | ||
95 | #ifdef CONFIG_HOTPLUG_CPU | 162 | #ifdef CONFIG_HOTPLUG_CPU |
96 | prefill_possible_map(); | 163 | prefill_possible_map(); |
164 | #else | ||
165 | nr_cpu_ids = num_processors; | ||
97 | #endif | 166 | #endif |
98 | 167 | ||
168 | /* Setup cpu_pda map */ | ||
169 | setup_cpu_pda_map(); | ||
170 | |||
99 | /* Copy section for each CPU (we discard the original) */ | 171 | /* Copy section for each CPU (we discard the original) */ |
100 | size = PERCPU_ENOUGH_ROOM; | 172 | size = PERCPU_ENOUGH_ROOM; |
101 | printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", | 173 | printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", |
102 | size); | 174 | size); |
103 | 175 | ||
104 | for_each_possible_cpu(i) { | 176 | for_each_possible_cpu(cpu) { |
105 | char *ptr; | ||
106 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 177 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
107 | ptr = alloc_bootmem_pages(size); | 178 | ptr = alloc_bootmem_pages(size); |
108 | #else | 179 | #else |
109 | int node = early_cpu_to_node(i); | 180 | int node = early_cpu_to_node(cpu); |
110 | if (!node_online(node) || !NODE_DATA(node)) { | 181 | if (!node_online(node) || !NODE_DATA(node)) { |
111 | ptr = alloc_bootmem_pages(size); | 182 | ptr = alloc_bootmem_pages(size); |
112 | printk(KERN_INFO | 183 | printk(KERN_INFO |
113 | "cpu %d has no node or node-local memory\n", i); | 184 | "cpu %d has no node %d or node-local memory\n", |
185 | cpu, node); | ||
114 | } | 186 | } |
115 | else | 187 | else |
116 | ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); | 188 | ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); |
117 | #endif | 189 | #endif |
118 | if (!ptr) | 190 | per_cpu_offset(cpu) = ptr - __per_cpu_start; |
119 | panic("Cannot allocate cpu data for CPU %d\n", i); | ||
120 | #ifdef CONFIG_X86_64 | ||
121 | cpu_pda(i)->data_offset = ptr - __per_cpu_start; | ||
122 | #else | ||
123 | __per_cpu_offset[i] = ptr - __per_cpu_start; | ||
124 | #endif | ||
125 | memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); | 191 | memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); |
126 | 192 | ||
127 | highest_cpu = i; | ||
128 | } | 193 | } |
129 | 194 | ||
130 | nr_cpu_ids = highest_cpu + 1; | 195 | printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n", |
131 | printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d\n", NR_CPUS, nr_cpu_ids); | 196 | NR_CPUS, nr_cpu_ids, nr_node_ids); |
132 | 197 | ||
133 | /* Setup percpu data maps */ | 198 | /* Setup percpu data maps */ |
134 | setup_per_cpu_maps(); | 199 | setup_per_cpu_maps(); |
135 | 200 | ||
201 | /* Setup node to cpumask map */ | ||
202 | setup_node_to_cpumask_map(); | ||
203 | |||
136 | /* Setup cpumask_of_cpu map */ | 204 | /* Setup cpumask_of_cpu map */ |
137 | setup_cpumask_of_cpu(); | 205 | setup_cpumask_of_cpu(); |
138 | } | 206 | } |
@@ -163,3 +231,176 @@ void __init parse_setup_data(void) | |||
163 | early_iounmap(data, PAGE_SIZE); | 231 | early_iounmap(data, PAGE_SIZE); |
164 | } | 232 | } |
165 | } | 233 | } |
234 | |||
235 | #ifdef X86_64_NUMA | ||
236 | |||
237 | /* | ||
238 | * Allocate node_to_cpumask_map based on number of available nodes | ||
239 | * Requires node_possible_map to be valid. | ||
240 | * | ||
241 | * Note: node_to_cpumask() is not valid until after this is done. | ||
242 | */ | ||
243 | static void __init setup_node_to_cpumask_map(void) | ||
244 | { | ||
245 | unsigned int node, num = 0; | ||
246 | cpumask_t *map; | ||
247 | |||
248 | /* setup nr_node_ids if not done yet */ | ||
249 | if (nr_node_ids == MAX_NUMNODES) { | ||
250 | for_each_node_mask(node, node_possible_map) | ||
251 | num = node; | ||
252 | nr_node_ids = num + 1; | ||
253 | } | ||
254 | |||
255 | /* allocate the map */ | ||
256 | map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); | ||
257 | |||
258 | Dprintk(KERN_DEBUG "Node to cpumask map at %p for %d nodes\n", | ||
259 | map, nr_node_ids); | ||
260 | |||
261 | /* node_to_cpumask() will now work */ | ||
262 | node_to_cpumask_map = map; | ||
263 | } | ||
264 | |||
265 | void __cpuinit numa_set_node(int cpu, int node) | ||
266 | { | ||
267 | int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); | ||
268 | |||
269 | if (cpu_pda(cpu) && node != NUMA_NO_NODE) | ||
270 | cpu_pda(cpu)->nodenumber = node; | ||
271 | |||
272 | if (cpu_to_node_map) | ||
273 | cpu_to_node_map[cpu] = node; | ||
274 | |||
275 | else if (per_cpu_offset(cpu)) | ||
276 | per_cpu(x86_cpu_to_node_map, cpu) = node; | ||
277 | |||
278 | else | ||
279 | Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu); | ||
280 | } | ||
281 | |||
282 | void __cpuinit numa_clear_node(int cpu) | ||
283 | { | ||
284 | numa_set_node(cpu, NUMA_NO_NODE); | ||
285 | } | ||
286 | |||
287 | #ifndef CONFIG_DEBUG_PER_CPU_MAPS | ||
288 | |||
289 | void __cpuinit numa_add_cpu(int cpu) | ||
290 | { | ||
291 | cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); | ||
292 | } | ||
293 | |||
294 | void __cpuinit numa_remove_cpu(int cpu) | ||
295 | { | ||
296 | cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]); | ||
297 | } | ||
298 | |||
299 | #else /* CONFIG_DEBUG_PER_CPU_MAPS */ | ||
300 | |||
301 | /* | ||
302 | * --------- debug versions of the numa functions --------- | ||
303 | */ | ||
304 | static void __cpuinit numa_set_cpumask(int cpu, int enable) | ||
305 | { | ||
306 | int node = cpu_to_node(cpu); | ||
307 | cpumask_t *mask; | ||
308 | char buf[64]; | ||
309 | |||
310 | if (node_to_cpumask_map == NULL) { | ||
311 | printk(KERN_ERR "node_to_cpumask_map NULL\n"); | ||
312 | dump_stack(); | ||
313 | return; | ||
314 | } | ||
315 | |||
316 | mask = &node_to_cpumask_map[node]; | ||
317 | if (enable) | ||
318 | cpu_set(cpu, *mask); | ||
319 | else | ||
320 | cpu_clear(cpu, *mask); | ||
321 | |||
322 | cpulist_scnprintf(buf, sizeof(buf), *mask); | ||
323 | printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", | ||
324 | enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf); | ||
325 | } | ||
326 | |||
327 | void __cpuinit numa_add_cpu(int cpu) | ||
328 | { | ||
329 | numa_set_cpumask(cpu, 1); | ||
330 | } | ||
331 | |||
332 | void __cpuinit numa_remove_cpu(int cpu) | ||
333 | { | ||
334 | numa_set_cpumask(cpu, 0); | ||
335 | } | ||
336 | |||
337 | int cpu_to_node(int cpu) | ||
338 | { | ||
339 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) { | ||
340 | printk(KERN_WARNING | ||
341 | "cpu_to_node(%d): usage too early!\n", cpu); | ||
342 | dump_stack(); | ||
343 | return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; | ||
344 | } | ||
345 | return per_cpu(x86_cpu_to_node_map, cpu); | ||
346 | } | ||
347 | EXPORT_SYMBOL(cpu_to_node); | ||
348 | |||
349 | /* | ||
350 | * Same function as cpu_to_node() but used if called before the | ||
351 | * per_cpu areas are setup. | ||
352 | */ | ||
353 | int early_cpu_to_node(int cpu) | ||
354 | { | ||
355 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) | ||
356 | return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; | ||
357 | |||
358 | if (!per_cpu_offset(cpu)) { | ||
359 | printk(KERN_WARNING | ||
360 | "early_cpu_to_node(%d): no per_cpu area!\n", cpu); | ||
361 | dump_stack(); | ||
362 | return NUMA_NO_NODE; | ||
363 | } | ||
364 | return per_cpu(x86_cpu_to_node_map, cpu); | ||
365 | } | ||
366 | |||
367 | /* | ||
368 | * Returns a pointer to the bitmask of CPUs on Node 'node'. | ||
369 | */ | ||
370 | cpumask_t *_node_to_cpumask_ptr(int node) | ||
371 | { | ||
372 | if (node_to_cpumask_map == NULL) { | ||
373 | printk(KERN_WARNING | ||
374 | "_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n", | ||
375 | node); | ||
376 | dump_stack(); | ||
377 | return &cpu_online_map; | ||
378 | } | ||
379 | BUG_ON(node >= nr_node_ids); | ||
380 | return &node_to_cpumask_map[node]; | ||
381 | } | ||
382 | EXPORT_SYMBOL(_node_to_cpumask_ptr); | ||
383 | |||
384 | /* | ||
385 | * Returns a bitmask of CPUs on Node 'node'. | ||
386 | */ | ||
387 | cpumask_t node_to_cpumask(int node) | ||
388 | { | ||
389 | if (node_to_cpumask_map == NULL) { | ||
390 | printk(KERN_WARNING | ||
391 | "node_to_cpumask(%d): no node_to_cpumask_map!\n", node); | ||
392 | dump_stack(); | ||
393 | return cpu_online_map; | ||
394 | } | ||
395 | BUG_ON(node >= nr_node_ids); | ||
396 | return node_to_cpumask_map[node]; | ||
397 | } | ||
398 | EXPORT_SYMBOL(node_to_cpumask); | ||
399 | |||
400 | /* | ||
401 | * --------- end of debug versions of the numa functions --------- | ||
402 | */ | ||
403 | |||
404 | #endif /* CONFIG_DEBUG_PER_CPU_MAPS */ | ||
405 | |||
406 | #endif /* X86_64_NUMA */ | ||
diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c index aee0e8200777..631ea6cc01d8 100644 --- a/arch/x86/kernel/setup64.c +++ b/arch/x86/kernel/setup64.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/bitops.h> | 12 | #include <linux/bitops.h> |
13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
14 | #include <linux/kgdb.h> | 14 | #include <linux/kgdb.h> |
15 | #include <linux/topology.h> | ||
15 | #include <asm/pda.h> | 16 | #include <asm/pda.h> |
16 | #include <asm/pgtable.h> | 17 | #include <asm/pgtable.h> |
17 | #include <asm/processor.h> | 18 | #include <asm/processor.h> |
@@ -34,9 +35,8 @@ struct boot_params boot_params; | |||
34 | 35 | ||
35 | cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; | 36 | cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; |
36 | 37 | ||
37 | struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly; | 38 | struct x8664_pda **_cpu_pda __read_mostly; |
38 | EXPORT_SYMBOL(_cpu_pda); | 39 | EXPORT_SYMBOL(_cpu_pda); |
39 | struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned; | ||
40 | 40 | ||
41 | struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; | 41 | struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; |
42 | 42 | ||
@@ -114,8 +114,10 @@ void pda_init(int cpu) | |||
114 | __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); | 114 | __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); |
115 | if (!pda->irqstackptr) | 115 | if (!pda->irqstackptr) |
116 | panic("cannot allocate irqstack for cpu %d", cpu); | 116 | panic("cannot allocate irqstack for cpu %d", cpu); |
117 | } | ||
118 | 117 | ||
118 | if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) | ||
119 | pda->nodenumber = cpu_to_node(cpu); | ||
120 | } | ||
119 | 121 | ||
120 | pda->irqstackptr += IRQSTACKSIZE-64; | 122 | pda->irqstackptr += IRQSTACKSIZE-64; |
121 | } | 123 | } |
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 7e06ecd83174..a9b19ad24edb 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c | |||
@@ -659,18 +659,6 @@ static void set_mca_bus(int x) | |||
659 | static void set_mca_bus(int x) { } | 659 | static void set_mca_bus(int x) { } |
660 | #endif | 660 | #endif |
661 | 661 | ||
662 | #ifdef CONFIG_NUMA | ||
663 | /* | ||
664 | * In the golden day, when everything among i386 and x86_64 will be | ||
665 | * integrated, this will not live here | ||
666 | */ | ||
667 | void *x86_cpu_to_node_map_early_ptr; | ||
668 | int x86_cpu_to_node_map_init[NR_CPUS] = { | ||
669 | [0 ... NR_CPUS-1] = NUMA_NO_NODE | ||
670 | }; | ||
671 | DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE; | ||
672 | #endif | ||
673 | |||
674 | static void probe_roms(void); | 662 | static void probe_roms(void); |
675 | 663 | ||
676 | /* | 664 | /* |
@@ -866,18 +854,6 @@ void __init setup_arch(char **cmdline_p) | |||
866 | 854 | ||
867 | paravirt_post_allocator_init(); | 855 | paravirt_post_allocator_init(); |
868 | 856 | ||
869 | #ifdef CONFIG_X86_SMP | ||
870 | /* | ||
871 | * setup to use the early static init tables during kernel startup | ||
872 | * X86_SMP will exclude sub-arches that don't deal well with it. | ||
873 | */ | ||
874 | x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init; | ||
875 | x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init; | ||
876 | #ifdef CONFIG_NUMA | ||
877 | x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init; | ||
878 | #endif | ||
879 | #endif | ||
880 | |||
881 | #ifdef CONFIG_X86_GENERICARCH | 857 | #ifdef CONFIG_X86_GENERICARCH |
882 | generic_apic_probe(); | 858 | generic_apic_probe(); |
883 | #endif | 859 | #endif |
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 9a87113ba996..16ef53ab538a 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c | |||
@@ -376,15 +376,6 @@ void __init setup_arch(char **cmdline_p) | |||
376 | kvmclock_init(); | 376 | kvmclock_init(); |
377 | #endif | 377 | #endif |
378 | 378 | ||
379 | #ifdef CONFIG_SMP | ||
380 | /* setup to use the early static init tables during kernel startup */ | ||
381 | x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init; | ||
382 | x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init; | ||
383 | #ifdef CONFIG_NUMA | ||
384 | x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init; | ||
385 | #endif | ||
386 | #endif | ||
387 | |||
388 | #ifdef CONFIG_ACPI | 379 | #ifdef CONFIG_ACPI |
389 | /* | 380 | /* |
390 | * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). | 381 | * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6be701f3027f..ae0a7a200421 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -67,22 +67,6 @@ | |||
67 | #include <mach_wakecpu.h> | 67 | #include <mach_wakecpu.h> |
68 | #include <smpboot_hooks.h> | 68 | #include <smpboot_hooks.h> |
69 | 69 | ||
70 | /* | ||
71 | * FIXME: For x86_64, those are defined in other files. But moving them here, | ||
72 | * would make the setup areas dependent on smp, which is a loss. When we | ||
73 | * integrate apic between arches, we can probably do a better job, but | ||
74 | * right now, they'll stay here -- glommer | ||
75 | */ | ||
76 | |||
77 | /* which logical CPU number maps to which CPU (physical APIC ID) */ | ||
78 | u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata = | ||
79 | { [0 ... NR_CPUS-1] = BAD_APICID }; | ||
80 | void *x86_cpu_to_apicid_early_ptr; | ||
81 | |||
82 | u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata | ||
83 | = { [0 ... NR_CPUS-1] = BAD_APICID }; | ||
84 | void *x86_bios_cpu_apicid_early_ptr; | ||
85 | |||
86 | #ifdef CONFIG_X86_32 | 70 | #ifdef CONFIG_X86_32 |
87 | u8 apicid_2_node[MAX_APICID]; | 71 | u8 apicid_2_node[MAX_APICID]; |
88 | static int low_mappings; | 72 | static int low_mappings; |
@@ -814,6 +798,45 @@ static void __cpuinit do_fork_idle(struct work_struct *work) | |||
814 | complete(&c_idle->done); | 798 | complete(&c_idle->done); |
815 | } | 799 | } |
816 | 800 | ||
801 | #ifdef CONFIG_X86_64 | ||
802 | /* | ||
803 | * Allocate node local memory for the AP pda. | ||
804 | * | ||
805 | * Must be called after the _cpu_pda pointer table is initialized. | ||
806 | */ | ||
807 | static int __cpuinit get_local_pda(int cpu) | ||
808 | { | ||
809 | struct x8664_pda *oldpda, *newpda; | ||
810 | unsigned long size = sizeof(struct x8664_pda); | ||
811 | int node = cpu_to_node(cpu); | ||
812 | |||
813 | if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem) | ||
814 | return 0; | ||
815 | |||
816 | oldpda = cpu_pda(cpu); | ||
817 | newpda = kmalloc_node(size, GFP_ATOMIC, node); | ||
818 | if (!newpda) { | ||
819 | printk(KERN_ERR "Could not allocate node local PDA " | ||
820 | "for CPU %d on node %d\n", cpu, node); | ||
821 | |||
822 | if (oldpda) | ||
823 | return 0; /* have a usable pda */ | ||
824 | else | ||
825 | return -1; | ||
826 | } | ||
827 | |||
828 | if (oldpda) { | ||
829 | memcpy(newpda, oldpda, size); | ||
830 | if (!after_bootmem) | ||
831 | free_bootmem((unsigned long)oldpda, size); | ||
832 | } | ||
833 | |||
834 | newpda->in_bootmem = 0; | ||
835 | cpu_pda(cpu) = newpda; | ||
836 | return 0; | ||
837 | } | ||
838 | #endif /* CONFIG_X86_64 */ | ||
839 | |||
817 | static int __cpuinit do_boot_cpu(int apicid, int cpu) | 840 | static int __cpuinit do_boot_cpu(int apicid, int cpu) |
818 | /* | 841 | /* |
819 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad | 842 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad |
@@ -839,19 +862,11 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) | |||
839 | } | 862 | } |
840 | 863 | ||
841 | /* Allocate node local memory for AP pdas */ | 864 | /* Allocate node local memory for AP pdas */ |
842 | if (cpu_pda(cpu) == &boot_cpu_pda[cpu]) { | 865 | if (cpu > 0) { |
843 | struct x8664_pda *newpda, *pda; | 866 | boot_error = get_local_pda(cpu); |
844 | int node = cpu_to_node(cpu); | 867 | if (boot_error) |
845 | pda = cpu_pda(cpu); | 868 | goto restore_state; |
846 | newpda = kmalloc_node(sizeof(struct x8664_pda), GFP_ATOMIC, | 869 | /* if can't get pda memory, can't start cpu */ |
847 | node); | ||
848 | if (newpda) { | ||
849 | memcpy(newpda, pda, sizeof(struct x8664_pda)); | ||
850 | cpu_pda(cpu) = newpda; | ||
851 | } else | ||
852 | printk(KERN_ERR | ||
853 | "Could not allocate node local PDA for CPU %d on node %d\n", | ||
854 | cpu, node); | ||
855 | } | 870 | } |
856 | #endif | 871 | #endif |
857 | 872 | ||
@@ -970,11 +985,13 @@ do_rest: | |||
970 | } | 985 | } |
971 | } | 986 | } |
972 | 987 | ||
988 | restore_state: | ||
989 | |||
973 | if (boot_error) { | 990 | if (boot_error) { |
974 | /* Try to put things back the way they were before ... */ | 991 | /* Try to put things back the way they were before ... */ |
975 | unmap_cpu_to_logical_apicid(cpu); | 992 | unmap_cpu_to_logical_apicid(cpu); |
976 | #ifdef CONFIG_X86_64 | 993 | #ifdef CONFIG_X86_64 |
977 | clear_node_cpumask(cpu); /* was set by numa_add_cpu */ | 994 | numa_remove_cpu(cpu); /* was set by numa_add_cpu */ |
978 | #endif | 995 | #endif |
979 | cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */ | 996 | cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */ |
980 | cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ | 997 | cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ |
@@ -1347,6 +1364,8 @@ __init void prefill_possible_map(void) | |||
1347 | 1364 | ||
1348 | for (i = 0; i < possible; i++) | 1365 | for (i = 0; i < possible; i++) |
1349 | cpu_set(i, cpu_possible_map); | 1366 | cpu_set(i, cpu_possible_map); |
1367 | |||
1368 | nr_cpu_ids = possible; | ||
1350 | } | 1369 | } |
1351 | 1370 | ||
1352 | static void __ref remove_cpu_from_maps(int cpu) | 1371 | static void __ref remove_cpu_from_maps(int cpu) |
@@ -1357,7 +1376,7 @@ static void __ref remove_cpu_from_maps(int cpu) | |||
1357 | cpu_clear(cpu, cpu_callin_map); | 1376 | cpu_clear(cpu, cpu_callin_map); |
1358 | /* was set by cpu_init() */ | 1377 | /* was set by cpu_init() */ |
1359 | clear_bit(cpu, (unsigned long *)&cpu_initialized); | 1378 | clear_bit(cpu, (unsigned long *)&cpu_initialized); |
1360 | clear_node_cpumask(cpu); | 1379 | numa_remove_cpu(cpu); |
1361 | #endif | 1380 | #endif |
1362 | } | 1381 | } |
1363 | 1382 | ||