diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/Kconfig | 35 | ||||
-rw-r--r-- | arch/x86/Kconfig.debug | 2 | ||||
-rw-r--r-- | arch/x86/kernel/apic_32.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/apic_64.c | 11 | ||||
-rw-r--r-- | arch/x86/kernel/head64.c | 22 | ||||
-rw-r--r-- | arch/x86/kernel/nmi_64.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 297 | ||||
-rw-r--r-- | arch/x86/kernel/setup64.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/setup_32.c | 24 | ||||
-rw-r--r-- | arch/x86/kernel/setup_64.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 81 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 87 | ||||
-rw-r--r-- | arch/x86/mm/srat_64.c | 2 | ||||
-rw-r--r-- | arch/x86/pci/acpi.c | 17 |
14 files changed, 426 insertions, 182 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 640dc62a7fa0..112afd368c77 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -121,7 +121,7 @@ config ARCH_HAS_CACHE_LINE_SIZE | |||
121 | def_bool y | 121 | def_bool y |
122 | 122 | ||
123 | config HAVE_SETUP_PER_CPU_AREA | 123 | config HAVE_SETUP_PER_CPU_AREA |
124 | def_bool X86_64 || (X86_SMP && !X86_VOYAGER) | 124 | def_bool X86_64_SMP || (X86_SMP && !X86_VOYAGER) |
125 | 125 | ||
126 | config HAVE_CPUMASK_OF_CPU_MAP | 126 | config HAVE_CPUMASK_OF_CPU_MAP |
127 | def_bool X86_64_SMP | 127 | def_bool X86_64_SMP |
@@ -579,7 +579,21 @@ config SWIOTLB | |||
579 | 579 | ||
580 | config IOMMU_HELPER | 580 | config IOMMU_HELPER |
581 | def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB) | 581 | def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB) |
582 | config MAXSMP | ||
583 | bool "Configure Maximum number of SMP Processors and NUMA Nodes" | ||
584 | depends on X86_64 && SMP | ||
585 | default n | ||
586 | help | ||
587 | Configure maximum number of CPUS and NUMA Nodes for this architecture. | ||
588 | If unsure, say N. | ||
582 | 589 | ||
590 | if MAXSMP | ||
591 | config NR_CPUS | ||
592 | int | ||
593 | default "4096" | ||
594 | endif | ||
595 | |||
596 | if !MAXSMP | ||
583 | config NR_CPUS | 597 | config NR_CPUS |
584 | int "Maximum number of CPUs (2-4096)" | 598 | int "Maximum number of CPUs (2-4096)" |
585 | range 2 4096 | 599 | range 2 4096 |
@@ -592,7 +606,8 @@ config NR_CPUS | |||
592 | minimum value which makes sense is 2. | 606 | minimum value which makes sense is 2. |
593 | 607 | ||
594 | This is purely to save memory - each supported CPU adds | 608 | This is purely to save memory - each supported CPU adds |
595 | approximately one kilobyte to the kernel image. | 609 | approximately eight kilobytes to the kernel image. |
610 | endif | ||
596 | 611 | ||
597 | config SCHED_SMT | 612 | config SCHED_SMT |
598 | bool "SMT (Hyperthreading) scheduler support" | 613 | bool "SMT (Hyperthreading) scheduler support" |
@@ -983,13 +998,25 @@ config NUMA_EMU | |||
983 | into virtual nodes when booted with "numa=fake=N", where N is the | 998 | into virtual nodes when booted with "numa=fake=N", where N is the |
984 | number of nodes. This is only useful for debugging. | 999 | number of nodes. This is only useful for debugging. |
985 | 1000 | ||
1001 | if MAXSMP | ||
1002 | |||
1003 | config NODES_SHIFT | ||
1004 | int | ||
1005 | default "9" | ||
1006 | endif | ||
1007 | |||
1008 | if !MAXSMP | ||
986 | config NODES_SHIFT | 1009 | config NODES_SHIFT |
987 | int "Max num nodes shift(1-9)" | 1010 | int "Maximum NUMA Nodes (as a power of 2)" |
988 | range 1 9 if X86_64 | 1011 | range 1 9 if X86_64 |
989 | default "6" if X86_64 | 1012 | default "6" if X86_64 |
990 | default "4" if X86_NUMAQ | 1013 | default "4" if X86_NUMAQ |
991 | default "3" | 1014 | default "3" |
992 | depends on NEED_MULTIPLE_NODES | 1015 | depends on NEED_MULTIPLE_NODES |
1016 | help | ||
1017 | Specify the maximum number of NUMA Nodes available on the target | ||
1018 | system. Increases memory reserved to accomodate various tables. | ||
1019 | endif | ||
993 | 1020 | ||
994 | config HAVE_ARCH_BOOTMEM_NODE | 1021 | config HAVE_ARCH_BOOTMEM_NODE |
995 | def_bool y | 1022 | def_bool y |
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index f0684bb74faf..acc0271920f2 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
@@ -68,7 +68,7 @@ config DEBUG_PAGEALLOC | |||
68 | config DEBUG_PER_CPU_MAPS | 68 | config DEBUG_PER_CPU_MAPS |
69 | bool "Debug access to per_cpu maps" | 69 | bool "Debug access to per_cpu maps" |
70 | depends on DEBUG_KERNEL | 70 | depends on DEBUG_KERNEL |
71 | depends on X86_64_SMP | 71 | depends on X86_SMP |
72 | default n | 72 | default n |
73 | help | 73 | help |
74 | Say Y to verify that the per_cpu map being accessed has | 74 | Say Y to verify that the per_cpu map being accessed has |
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 570c362eca8c..84ce106b33c8 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c | |||
@@ -52,9 +52,6 @@ | |||
52 | 52 | ||
53 | unsigned long mp_lapic_addr; | 53 | unsigned long mp_lapic_addr; |
54 | 54 | ||
55 | DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID; | ||
56 | EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid); | ||
57 | |||
58 | /* | 55 | /* |
59 | * Knob to control our willingness to enable the local APIC. | 56 | * Knob to control our willingness to enable the local APIC. |
60 | * | 57 | * |
@@ -1546,9 +1543,9 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
1546 | } | 1543 | } |
1547 | #ifdef CONFIG_SMP | 1544 | #ifdef CONFIG_SMP |
1548 | /* are we being called early in kernel startup? */ | 1545 | /* are we being called early in kernel startup? */ |
1549 | if (x86_cpu_to_apicid_early_ptr) { | 1546 | if (early_per_cpu_ptr(x86_cpu_to_apicid)) { |
1550 | u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; | 1547 | u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); |
1551 | u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; | 1548 | u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); |
1552 | 1549 | ||
1553 | cpu_to_apicid[cpu] = apicid; | 1550 | cpu_to_apicid[cpu] = apicid; |
1554 | bios_cpu_apicid[cpu] = apicid; | 1551 | bios_cpu_apicid[cpu] = apicid; |
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index d7406aa1c985..e494809fc508 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c | |||
@@ -90,9 +90,6 @@ static unsigned long apic_phys; | |||
90 | 90 | ||
91 | unsigned long mp_lapic_addr; | 91 | unsigned long mp_lapic_addr; |
92 | 92 | ||
93 | DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID; | ||
94 | EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid); | ||
95 | |||
96 | unsigned int __cpuinitdata maxcpus = NR_CPUS; | 93 | unsigned int __cpuinitdata maxcpus = NR_CPUS; |
97 | /* | 94 | /* |
98 | * Get the LAPIC version | 95 | * Get the LAPIC version |
@@ -1075,9 +1072,9 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
1075 | max_physical_apicid = apicid; | 1072 | max_physical_apicid = apicid; |
1076 | 1073 | ||
1077 | /* are we being called early in kernel startup? */ | 1074 | /* are we being called early in kernel startup? */ |
1078 | if (x86_cpu_to_apicid_early_ptr) { | 1075 | if (early_per_cpu_ptr(x86_cpu_to_apicid)) { |
1079 | u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr; | 1076 | u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); |
1080 | u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; | 1077 | u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); |
1081 | 1078 | ||
1082 | cpu_to_apicid[cpu] = apicid; | 1079 | cpu_to_apicid[cpu] = apicid; |
1083 | bios_cpu_apicid[cpu] = apicid; | 1080 | bios_cpu_apicid[cpu] = apicid; |
@@ -1253,7 +1250,7 @@ __cpuinit int apic_is_clustered_box(void) | |||
1253 | if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box()) | 1250 | if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box()) |
1254 | return 0; | 1251 | return 0; |
1255 | 1252 | ||
1256 | bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr; | 1253 | bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); |
1257 | bitmap_zero(clustermap, NUM_APIC_CLUSTERS); | 1254 | bitmap_zero(clustermap, NUM_APIC_CLUSTERS); |
1258 | 1255 | ||
1259 | for (i = 0; i < NR_CPUS; i++) { | 1256 | for (i = 0; i < NR_CPUS; i++) { |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 5fbed459ff3b..c970929bb15d 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -25,6 +25,20 @@ | |||
25 | #include <asm/e820.h> | 25 | #include <asm/e820.h> |
26 | #include <asm/bios_ebda.h> | 26 | #include <asm/bios_ebda.h> |
27 | 27 | ||
28 | /* boot cpu pda */ | ||
29 | static struct x8664_pda _boot_cpu_pda __read_mostly; | ||
30 | |||
31 | #ifdef CONFIG_SMP | ||
32 | /* | ||
33 | * We install an empty cpu_pda pointer table to indicate to early users | ||
34 | * (numa_set_node) that the cpu_pda pointer table for cpus other than | ||
35 | * the boot cpu is not yet setup. | ||
36 | */ | ||
37 | static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata; | ||
38 | #else | ||
39 | static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly; | ||
40 | #endif | ||
41 | |||
28 | static void __init zap_identity_mappings(void) | 42 | static void __init zap_identity_mappings(void) |
29 | { | 43 | { |
30 | pgd_t *pgd = pgd_offset_k(0UL); | 44 | pgd_t *pgd = pgd_offset_k(0UL); |
@@ -88,10 +102,12 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
88 | 102 | ||
89 | early_printk("Kernel alive\n"); | 103 | early_printk("Kernel alive\n"); |
90 | 104 | ||
91 | for (i = 0; i < NR_CPUS; i++) | 105 | _cpu_pda = __cpu_pda; |
92 | cpu_pda(i) = &boot_cpu_pda[i]; | 106 | cpu_pda(0) = &_boot_cpu_pda; |
93 | |||
94 | pda_init(0); | 107 | pda_init(0); |
108 | |||
109 | early_printk("Kernel really alive\n"); | ||
110 | |||
95 | copy_bootdata(__va(real_mode_data)); | 111 | copy_bootdata(__va(real_mode_data)); |
96 | 112 | ||
97 | reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); | 113 | reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); |
diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c index 0060e44e8989..d62f3b66b529 100644 --- a/arch/x86/kernel/nmi_64.c +++ b/arch/x86/kernel/nmi_64.c | |||
@@ -90,7 +90,7 @@ int __init check_nmi_watchdog(void) | |||
90 | if (!atomic_read(&nmi_active)) | 90 | if (!atomic_read(&nmi_active)) |
91 | return 0; | 91 | return 0; |
92 | 92 | ||
93 | prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); | 93 | prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL); |
94 | if (!prev_nmi_count) | 94 | if (!prev_nmi_count) |
95 | goto error; | 95 | goto error; |
96 | 96 | ||
@@ -101,7 +101,7 @@ int __init check_nmi_watchdog(void) | |||
101 | smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); | 101 | smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); |
102 | #endif | 102 | #endif |
103 | 103 | ||
104 | for (cpu = 0; cpu < NR_CPUS; cpu++) | 104 | for (cpu = 0; cpu < nr_cpu_ids; cpu++) |
105 | prev_nmi_count[cpu] = cpu_pda(cpu)->__nmi_count; | 105 | prev_nmi_count[cpu] = cpu_pda(cpu)->__nmi_count; |
106 | local_irq_enable(); | 106 | local_irq_enable(); |
107 | mdelay((20*1000)/nmi_hz); // wait 20 ticks | 107 | mdelay((20*1000)/nmi_hz); // wait 20 ticks |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 5b0de38cde48..ebb0a2bcdc08 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -20,13 +20,34 @@ unsigned int boot_cpu_physical_apicid = -1U; | |||
20 | unsigned int max_physical_apicid; | 20 | unsigned int max_physical_apicid; |
21 | EXPORT_SYMBOL(boot_cpu_physical_apicid); | 21 | EXPORT_SYMBOL(boot_cpu_physical_apicid); |
22 | 22 | ||
23 | DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID; | ||
24 | EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); | ||
25 | |||
26 | /* Bitmask of physically existing CPUs */ | 23 | /* Bitmask of physically existing CPUs */ |
27 | physid_mask_t phys_cpu_present_map; | 24 | physid_mask_t phys_cpu_present_map; |
28 | #endif | 25 | #endif |
29 | 26 | ||
27 | /* map cpu index to physical APIC ID */ | ||
28 | DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); | ||
29 | DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); | ||
30 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); | ||
31 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); | ||
32 | |||
33 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | ||
34 | #define X86_64_NUMA 1 | ||
35 | |||
36 | /* map cpu index to node index */ | ||
37 | DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); | ||
38 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); | ||
39 | |||
40 | /* which logical CPUs are on which nodes */ | ||
41 | cpumask_t *node_to_cpumask_map; | ||
42 | EXPORT_SYMBOL(node_to_cpumask_map); | ||
43 | |||
44 | /* setup node_to_cpumask_map */ | ||
45 | static void __init setup_node_to_cpumask_map(void); | ||
46 | |||
47 | #else | ||
48 | static inline void setup_node_to_cpumask_map(void) { } | ||
49 | #endif | ||
50 | |||
30 | #if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) | 51 | #if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) |
31 | /* | 52 | /* |
32 | * Copy data used in early init routines from the initial arrays to the | 53 | * Copy data used in early init routines from the initial arrays to the |
@@ -38,20 +59,21 @@ static void __init setup_per_cpu_maps(void) | |||
38 | int cpu; | 59 | int cpu; |
39 | 60 | ||
40 | for_each_possible_cpu(cpu) { | 61 | for_each_possible_cpu(cpu) { |
41 | per_cpu(x86_cpu_to_apicid, cpu) = x86_cpu_to_apicid_init[cpu]; | 62 | per_cpu(x86_cpu_to_apicid, cpu) = |
63 | early_per_cpu_map(x86_cpu_to_apicid, cpu); | ||
42 | per_cpu(x86_bios_cpu_apicid, cpu) = | 64 | per_cpu(x86_bios_cpu_apicid, cpu) = |
43 | x86_bios_cpu_apicid_init[cpu]; | 65 | early_per_cpu_map(x86_bios_cpu_apicid, cpu); |
44 | #ifdef CONFIG_NUMA | 66 | #ifdef X86_64_NUMA |
45 | per_cpu(x86_cpu_to_node_map, cpu) = | 67 | per_cpu(x86_cpu_to_node_map, cpu) = |
46 | x86_cpu_to_node_map_init[cpu]; | 68 | early_per_cpu_map(x86_cpu_to_node_map, cpu); |
47 | #endif | 69 | #endif |
48 | } | 70 | } |
49 | 71 | ||
50 | /* indicate the early static arrays will soon be gone */ | 72 | /* indicate the early static arrays will soon be gone */ |
51 | x86_cpu_to_apicid_early_ptr = NULL; | 73 | early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; |
52 | x86_bios_cpu_apicid_early_ptr = NULL; | 74 | early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; |
53 | #ifdef CONFIG_NUMA | 75 | #ifdef X86_64_NUMA |
54 | x86_cpu_to_node_map_early_ptr = NULL; | 76 | early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; |
55 | #endif | 77 | #endif |
56 | } | 78 | } |
57 | 79 | ||
@@ -80,6 +102,50 @@ static inline void setup_cpumask_of_cpu(void) { } | |||
80 | */ | 102 | */ |
81 | unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; | 103 | unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; |
82 | EXPORT_SYMBOL(__per_cpu_offset); | 104 | EXPORT_SYMBOL(__per_cpu_offset); |
105 | static inline void setup_cpu_pda_map(void) { } | ||
106 | |||
107 | #elif !defined(CONFIG_SMP) | ||
108 | static inline void setup_cpu_pda_map(void) { } | ||
109 | |||
110 | #else /* CONFIG_SMP && CONFIG_X86_64 */ | ||
111 | |||
112 | /* | ||
113 | * Allocate cpu_pda pointer table and array via alloc_bootmem. | ||
114 | */ | ||
115 | static void __init setup_cpu_pda_map(void) | ||
116 | { | ||
117 | char *pda; | ||
118 | struct x8664_pda **new_cpu_pda; | ||
119 | unsigned long size; | ||
120 | int cpu; | ||
121 | |||
122 | size = roundup(sizeof(struct x8664_pda), cache_line_size()); | ||
123 | |||
124 | /* allocate cpu_pda array and pointer table */ | ||
125 | { | ||
126 | unsigned long tsize = nr_cpu_ids * sizeof(void *); | ||
127 | unsigned long asize = size * (nr_cpu_ids - 1); | ||
128 | |||
129 | tsize = roundup(tsize, cache_line_size()); | ||
130 | new_cpu_pda = alloc_bootmem(tsize + asize); | ||
131 | pda = (char *)new_cpu_pda + tsize; | ||
132 | } | ||
133 | |||
134 | /* initialize pointer table to static pda's */ | ||
135 | for_each_possible_cpu(cpu) { | ||
136 | if (cpu == 0) { | ||
137 | /* leave boot cpu pda in place */ | ||
138 | new_cpu_pda[0] = cpu_pda(0); | ||
139 | continue; | ||
140 | } | ||
141 | new_cpu_pda[cpu] = (struct x8664_pda *)pda; | ||
142 | new_cpu_pda[cpu]->in_bootmem = 1; | ||
143 | pda += size; | ||
144 | } | ||
145 | |||
146 | /* point to new pointer table */ | ||
147 | _cpu_pda = new_cpu_pda; | ||
148 | } | ||
83 | #endif | 149 | #endif |
84 | 150 | ||
85 | /* | 151 | /* |
@@ -89,50 +155,52 @@ EXPORT_SYMBOL(__per_cpu_offset); | |||
89 | */ | 155 | */ |
90 | void __init setup_per_cpu_areas(void) | 156 | void __init setup_per_cpu_areas(void) |
91 | { | 157 | { |
92 | int i, highest_cpu = 0; | 158 | ssize_t size = PERCPU_ENOUGH_ROOM; |
93 | unsigned long size; | 159 | char *ptr; |
160 | int cpu; | ||
94 | 161 | ||
95 | #ifdef CONFIG_HOTPLUG_CPU | 162 | #ifdef CONFIG_HOTPLUG_CPU |
96 | prefill_possible_map(); | 163 | prefill_possible_map(); |
164 | #else | ||
165 | nr_cpu_ids = num_processors; | ||
97 | #endif | 166 | #endif |
98 | 167 | ||
168 | /* Setup cpu_pda map */ | ||
169 | setup_cpu_pda_map(); | ||
170 | |||
99 | /* Copy section for each CPU (we discard the original) */ | 171 | /* Copy section for each CPU (we discard the original) */ |
100 | size = PERCPU_ENOUGH_ROOM; | 172 | size = PERCPU_ENOUGH_ROOM; |
101 | printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", | 173 | printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", |
102 | size); | 174 | size); |
103 | 175 | ||
104 | for_each_possible_cpu(i) { | 176 | for_each_possible_cpu(cpu) { |
105 | char *ptr; | ||
106 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 177 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
107 | ptr = alloc_bootmem_pages(size); | 178 | ptr = alloc_bootmem_pages(size); |
108 | #else | 179 | #else |
109 | int node = early_cpu_to_node(i); | 180 | int node = early_cpu_to_node(cpu); |
110 | if (!node_online(node) || !NODE_DATA(node)) { | 181 | if (!node_online(node) || !NODE_DATA(node)) { |
111 | ptr = alloc_bootmem_pages(size); | 182 | ptr = alloc_bootmem_pages(size); |
112 | printk(KERN_INFO | 183 | printk(KERN_INFO |
113 | "cpu %d has no node or node-local memory\n", i); | 184 | "cpu %d has no node %d or node-local memory\n", |
185 | cpu, node); | ||
114 | } | 186 | } |
115 | else | 187 | else |
116 | ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); | 188 | ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); |
117 | #endif | 189 | #endif |
118 | if (!ptr) | 190 | per_cpu_offset(cpu) = ptr - __per_cpu_start; |
119 | panic("Cannot allocate cpu data for CPU %d\n", i); | ||
120 | #ifdef CONFIG_X86_64 | ||
121 | cpu_pda(i)->data_offset = ptr - __per_cpu_start; | ||
122 | #else | ||
123 | __per_cpu_offset[i] = ptr - __per_cpu_start; | ||
124 | #endif | ||
125 | memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); | 191 | memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); |
126 | 192 | ||
127 | highest_cpu = i; | ||
128 | } | 193 | } |
129 | 194 | ||
130 | nr_cpu_ids = highest_cpu + 1; | 195 | printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n", |
131 | printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d\n", NR_CPUS, nr_cpu_ids); | 196 | NR_CPUS, nr_cpu_ids, nr_node_ids); |
132 | 197 | ||
133 | /* Setup percpu data maps */ | 198 | /* Setup percpu data maps */ |
134 | setup_per_cpu_maps(); | 199 | setup_per_cpu_maps(); |
135 | 200 | ||
201 | /* Setup node to cpumask map */ | ||
202 | setup_node_to_cpumask_map(); | ||
203 | |||
136 | /* Setup cpumask_of_cpu map */ | 204 | /* Setup cpumask_of_cpu map */ |
137 | setup_cpumask_of_cpu(); | 205 | setup_cpumask_of_cpu(); |
138 | } | 206 | } |
@@ -163,3 +231,176 @@ void __init parse_setup_data(void) | |||
163 | early_iounmap(data, PAGE_SIZE); | 231 | early_iounmap(data, PAGE_SIZE); |
164 | } | 232 | } |
165 | } | 233 | } |
234 | |||
235 | #ifdef X86_64_NUMA | ||
236 | |||
237 | /* | ||
238 | * Allocate node_to_cpumask_map based on number of available nodes | ||
239 | * Requires node_possible_map to be valid. | ||
240 | * | ||
241 | * Note: node_to_cpumask() is not valid until after this is done. | ||
242 | */ | ||
243 | static void __init setup_node_to_cpumask_map(void) | ||
244 | { | ||
245 | unsigned int node, num = 0; | ||
246 | cpumask_t *map; | ||
247 | |||
248 | /* setup nr_node_ids if not done yet */ | ||
249 | if (nr_node_ids == MAX_NUMNODES) { | ||
250 | for_each_node_mask(node, node_possible_map) | ||
251 | num = node; | ||
252 | nr_node_ids = num + 1; | ||
253 | } | ||
254 | |||
255 | /* allocate the map */ | ||
256 | map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); | ||
257 | |||
258 | Dprintk(KERN_DEBUG "Node to cpumask map at %p for %d nodes\n", | ||
259 | map, nr_node_ids); | ||
260 | |||
261 | /* node_to_cpumask() will now work */ | ||
262 | node_to_cpumask_map = map; | ||
263 | } | ||
264 | |||
265 | void __cpuinit numa_set_node(int cpu, int node) | ||
266 | { | ||
267 | int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); | ||
268 | |||
269 | if (cpu_pda(cpu) && node != NUMA_NO_NODE) | ||
270 | cpu_pda(cpu)->nodenumber = node; | ||
271 | |||
272 | if (cpu_to_node_map) | ||
273 | cpu_to_node_map[cpu] = node; | ||
274 | |||
275 | else if (per_cpu_offset(cpu)) | ||
276 | per_cpu(x86_cpu_to_node_map, cpu) = node; | ||
277 | |||
278 | else | ||
279 | Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu); | ||
280 | } | ||
281 | |||
282 | void __cpuinit numa_clear_node(int cpu) | ||
283 | { | ||
284 | numa_set_node(cpu, NUMA_NO_NODE); | ||
285 | } | ||
286 | |||
287 | #ifndef CONFIG_DEBUG_PER_CPU_MAPS | ||
288 | |||
289 | void __cpuinit numa_add_cpu(int cpu) | ||
290 | { | ||
291 | cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); | ||
292 | } | ||
293 | |||
294 | void __cpuinit numa_remove_cpu(int cpu) | ||
295 | { | ||
296 | cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]); | ||
297 | } | ||
298 | |||
299 | #else /* CONFIG_DEBUG_PER_CPU_MAPS */ | ||
300 | |||
301 | /* | ||
302 | * --------- debug versions of the numa functions --------- | ||
303 | */ | ||
304 | static void __cpuinit numa_set_cpumask(int cpu, int enable) | ||
305 | { | ||
306 | int node = cpu_to_node(cpu); | ||
307 | cpumask_t *mask; | ||
308 | char buf[64]; | ||
309 | |||
310 | if (node_to_cpumask_map == NULL) { | ||
311 | printk(KERN_ERR "node_to_cpumask_map NULL\n"); | ||
312 | dump_stack(); | ||
313 | return; | ||
314 | } | ||
315 | |||
316 | mask = &node_to_cpumask_map[node]; | ||
317 | if (enable) | ||
318 | cpu_set(cpu, *mask); | ||
319 | else | ||
320 | cpu_clear(cpu, *mask); | ||
321 | |||
322 | cpulist_scnprintf(buf, sizeof(buf), *mask); | ||
323 | printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", | ||
324 | enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf); | ||
325 | } | ||
326 | |||
327 | void __cpuinit numa_add_cpu(int cpu) | ||
328 | { | ||
329 | numa_set_cpumask(cpu, 1); | ||
330 | } | ||
331 | |||
332 | void __cpuinit numa_remove_cpu(int cpu) | ||
333 | { | ||
334 | numa_set_cpumask(cpu, 0); | ||
335 | } | ||
336 | |||
337 | int cpu_to_node(int cpu) | ||
338 | { | ||
339 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) { | ||
340 | printk(KERN_WARNING | ||
341 | "cpu_to_node(%d): usage too early!\n", cpu); | ||
342 | dump_stack(); | ||
343 | return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; | ||
344 | } | ||
345 | return per_cpu(x86_cpu_to_node_map, cpu); | ||
346 | } | ||
347 | EXPORT_SYMBOL(cpu_to_node); | ||
348 | |||
349 | /* | ||
350 | * Same function as cpu_to_node() but used if called before the | ||
351 | * per_cpu areas are setup. | ||
352 | */ | ||
353 | int early_cpu_to_node(int cpu) | ||
354 | { | ||
355 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) | ||
356 | return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; | ||
357 | |||
358 | if (!per_cpu_offset(cpu)) { | ||
359 | printk(KERN_WARNING | ||
360 | "early_cpu_to_node(%d): no per_cpu area!\n", cpu); | ||
361 | dump_stack(); | ||
362 | return NUMA_NO_NODE; | ||
363 | } | ||
364 | return per_cpu(x86_cpu_to_node_map, cpu); | ||
365 | } | ||
366 | |||
367 | /* | ||
368 | * Returns a pointer to the bitmask of CPUs on Node 'node'. | ||
369 | */ | ||
370 | cpumask_t *_node_to_cpumask_ptr(int node) | ||
371 | { | ||
372 | if (node_to_cpumask_map == NULL) { | ||
373 | printk(KERN_WARNING | ||
374 | "_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n", | ||
375 | node); | ||
376 | dump_stack(); | ||
377 | return &cpu_online_map; | ||
378 | } | ||
379 | BUG_ON(node >= nr_node_ids); | ||
380 | return &node_to_cpumask_map[node]; | ||
381 | } | ||
382 | EXPORT_SYMBOL(_node_to_cpumask_ptr); | ||
383 | |||
384 | /* | ||
385 | * Returns a bitmask of CPUs on Node 'node'. | ||
386 | */ | ||
387 | cpumask_t node_to_cpumask(int node) | ||
388 | { | ||
389 | if (node_to_cpumask_map == NULL) { | ||
390 | printk(KERN_WARNING | ||
391 | "node_to_cpumask(%d): no node_to_cpumask_map!\n", node); | ||
392 | dump_stack(); | ||
393 | return cpu_online_map; | ||
394 | } | ||
395 | BUG_ON(node >= nr_node_ids); | ||
396 | return node_to_cpumask_map[node]; | ||
397 | } | ||
398 | EXPORT_SYMBOL(node_to_cpumask); | ||
399 | |||
400 | /* | ||
401 | * --------- end of debug versions of the numa functions --------- | ||
402 | */ | ||
403 | |||
404 | #endif /* CONFIG_DEBUG_PER_CPU_MAPS */ | ||
405 | |||
406 | #endif /* X86_64_NUMA */ | ||
diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c index aee0e8200777..631ea6cc01d8 100644 --- a/arch/x86/kernel/setup64.c +++ b/arch/x86/kernel/setup64.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/bitops.h> | 12 | #include <linux/bitops.h> |
13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
14 | #include <linux/kgdb.h> | 14 | #include <linux/kgdb.h> |
15 | #include <linux/topology.h> | ||
15 | #include <asm/pda.h> | 16 | #include <asm/pda.h> |
16 | #include <asm/pgtable.h> | 17 | #include <asm/pgtable.h> |
17 | #include <asm/processor.h> | 18 | #include <asm/processor.h> |
@@ -34,9 +35,8 @@ struct boot_params boot_params; | |||
34 | 35 | ||
35 | cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; | 36 | cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; |
36 | 37 | ||
37 | struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly; | 38 | struct x8664_pda **_cpu_pda __read_mostly; |
38 | EXPORT_SYMBOL(_cpu_pda); | 39 | EXPORT_SYMBOL(_cpu_pda); |
39 | struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned; | ||
40 | 40 | ||
41 | struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; | 41 | struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; |
42 | 42 | ||
@@ -114,8 +114,10 @@ void pda_init(int cpu) | |||
114 | __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); | 114 | __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); |
115 | if (!pda->irqstackptr) | 115 | if (!pda->irqstackptr) |
116 | panic("cannot allocate irqstack for cpu %d", cpu); | 116 | panic("cannot allocate irqstack for cpu %d", cpu); |
117 | } | ||
118 | 117 | ||
118 | if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) | ||
119 | pda->nodenumber = cpu_to_node(cpu); | ||
120 | } | ||
119 | 121 | ||
120 | pda->irqstackptr += IRQSTACKSIZE-64; | 122 | pda->irqstackptr += IRQSTACKSIZE-64; |
121 | } | 123 | } |
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 7e06ecd83174..a9b19ad24edb 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c | |||
@@ -659,18 +659,6 @@ static void set_mca_bus(int x) | |||
659 | static void set_mca_bus(int x) { } | 659 | static void set_mca_bus(int x) { } |
660 | #endif | 660 | #endif |
661 | 661 | ||
662 | #ifdef CONFIG_NUMA | ||
663 | /* | ||
664 | * In the golden day, when everything among i386 and x86_64 will be | ||
665 | * integrated, this will not live here | ||
666 | */ | ||
667 | void *x86_cpu_to_node_map_early_ptr; | ||
668 | int x86_cpu_to_node_map_init[NR_CPUS] = { | ||
669 | [0 ... NR_CPUS-1] = NUMA_NO_NODE | ||
670 | }; | ||
671 | DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE; | ||
672 | #endif | ||
673 | |||
674 | static void probe_roms(void); | 662 | static void probe_roms(void); |
675 | 663 | ||
676 | /* | 664 | /* |
@@ -866,18 +854,6 @@ void __init setup_arch(char **cmdline_p) | |||
866 | 854 | ||
867 | paravirt_post_allocator_init(); | 855 | paravirt_post_allocator_init(); |
868 | 856 | ||
869 | #ifdef CONFIG_X86_SMP | ||
870 | /* | ||
871 | * setup to use the early static init tables during kernel startup | ||
872 | * X86_SMP will exclude sub-arches that don't deal well with it. | ||
873 | */ | ||
874 | x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init; | ||
875 | x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init; | ||
876 | #ifdef CONFIG_NUMA | ||
877 | x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init; | ||
878 | #endif | ||
879 | #endif | ||
880 | |||
881 | #ifdef CONFIG_X86_GENERICARCH | 857 | #ifdef CONFIG_X86_GENERICARCH |
882 | generic_apic_probe(); | 858 | generic_apic_probe(); |
883 | #endif | 859 | #endif |
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 9a87113ba996..16ef53ab538a 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c | |||
@@ -376,15 +376,6 @@ void __init setup_arch(char **cmdline_p) | |||
376 | kvmclock_init(); | 376 | kvmclock_init(); |
377 | #endif | 377 | #endif |
378 | 378 | ||
379 | #ifdef CONFIG_SMP | ||
380 | /* setup to use the early static init tables during kernel startup */ | ||
381 | x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init; | ||
382 | x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init; | ||
383 | #ifdef CONFIG_NUMA | ||
384 | x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init; | ||
385 | #endif | ||
386 | #endif | ||
387 | |||
388 | #ifdef CONFIG_ACPI | 379 | #ifdef CONFIG_ACPI |
389 | /* | 380 | /* |
390 | * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). | 381 | * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6be701f3027f..ae0a7a200421 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -67,22 +67,6 @@ | |||
67 | #include <mach_wakecpu.h> | 67 | #include <mach_wakecpu.h> |
68 | #include <smpboot_hooks.h> | 68 | #include <smpboot_hooks.h> |
69 | 69 | ||
70 | /* | ||
71 | * FIXME: For x86_64, those are defined in other files. But moving them here, | ||
72 | * would make the setup areas dependent on smp, which is a loss. When we | ||
73 | * integrate apic between arches, we can probably do a better job, but | ||
74 | * right now, they'll stay here -- glommer | ||
75 | */ | ||
76 | |||
77 | /* which logical CPU number maps to which CPU (physical APIC ID) */ | ||
78 | u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata = | ||
79 | { [0 ... NR_CPUS-1] = BAD_APICID }; | ||
80 | void *x86_cpu_to_apicid_early_ptr; | ||
81 | |||
82 | u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata | ||
83 | = { [0 ... NR_CPUS-1] = BAD_APICID }; | ||
84 | void *x86_bios_cpu_apicid_early_ptr; | ||
85 | |||
86 | #ifdef CONFIG_X86_32 | 70 | #ifdef CONFIG_X86_32 |
87 | u8 apicid_2_node[MAX_APICID]; | 71 | u8 apicid_2_node[MAX_APICID]; |
88 | static int low_mappings; | 72 | static int low_mappings; |
@@ -814,6 +798,45 @@ static void __cpuinit do_fork_idle(struct work_struct *work) | |||
814 | complete(&c_idle->done); | 798 | complete(&c_idle->done); |
815 | } | 799 | } |
816 | 800 | ||
801 | #ifdef CONFIG_X86_64 | ||
802 | /* | ||
803 | * Allocate node local memory for the AP pda. | ||
804 | * | ||
805 | * Must be called after the _cpu_pda pointer table is initialized. | ||
806 | */ | ||
807 | static int __cpuinit get_local_pda(int cpu) | ||
808 | { | ||
809 | struct x8664_pda *oldpda, *newpda; | ||
810 | unsigned long size = sizeof(struct x8664_pda); | ||
811 | int node = cpu_to_node(cpu); | ||
812 | |||
813 | if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem) | ||
814 | return 0; | ||
815 | |||
816 | oldpda = cpu_pda(cpu); | ||
817 | newpda = kmalloc_node(size, GFP_ATOMIC, node); | ||
818 | if (!newpda) { | ||
819 | printk(KERN_ERR "Could not allocate node local PDA " | ||
820 | "for CPU %d on node %d\n", cpu, node); | ||
821 | |||
822 | if (oldpda) | ||
823 | return 0; /* have a usable pda */ | ||
824 | else | ||
825 | return -1; | ||
826 | } | ||
827 | |||
828 | if (oldpda) { | ||
829 | memcpy(newpda, oldpda, size); | ||
830 | if (!after_bootmem) | ||
831 | free_bootmem((unsigned long)oldpda, size); | ||
832 | } | ||
833 | |||
834 | newpda->in_bootmem = 0; | ||
835 | cpu_pda(cpu) = newpda; | ||
836 | return 0; | ||
837 | } | ||
838 | #endif /* CONFIG_X86_64 */ | ||
839 | |||
817 | static int __cpuinit do_boot_cpu(int apicid, int cpu) | 840 | static int __cpuinit do_boot_cpu(int apicid, int cpu) |
818 | /* | 841 | /* |
819 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad | 842 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad |
@@ -839,19 +862,11 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) | |||
839 | } | 862 | } |
840 | 863 | ||
841 | /* Allocate node local memory for AP pdas */ | 864 | /* Allocate node local memory for AP pdas */ |
842 | if (cpu_pda(cpu) == &boot_cpu_pda[cpu]) { | 865 | if (cpu > 0) { |
843 | struct x8664_pda *newpda, *pda; | 866 | boot_error = get_local_pda(cpu); |
844 | int node = cpu_to_node(cpu); | 867 | if (boot_error) |
845 | pda = cpu_pda(cpu); | 868 | goto restore_state; |
846 | newpda = kmalloc_node(sizeof(struct x8664_pda), GFP_ATOMIC, | 869 | /* if can't get pda memory, can't start cpu */ |
847 | node); | ||
848 | if (newpda) { | ||
849 | memcpy(newpda, pda, sizeof(struct x8664_pda)); | ||
850 | cpu_pda(cpu) = newpda; | ||
851 | } else | ||
852 | printk(KERN_ERR | ||
853 | "Could not allocate node local PDA for CPU %d on node %d\n", | ||
854 | cpu, node); | ||
855 | } | 870 | } |
856 | #endif | 871 | #endif |
857 | 872 | ||
@@ -970,11 +985,13 @@ do_rest: | |||
970 | } | 985 | } |
971 | } | 986 | } |
972 | 987 | ||
988 | restore_state: | ||
989 | |||
973 | if (boot_error) { | 990 | if (boot_error) { |
974 | /* Try to put things back the way they were before ... */ | 991 | /* Try to put things back the way they were before ... */ |
975 | unmap_cpu_to_logical_apicid(cpu); | 992 | unmap_cpu_to_logical_apicid(cpu); |
976 | #ifdef CONFIG_X86_64 | 993 | #ifdef CONFIG_X86_64 |
977 | clear_node_cpumask(cpu); /* was set by numa_add_cpu */ | 994 | numa_remove_cpu(cpu); /* was set by numa_add_cpu */ |
978 | #endif | 995 | #endif |
979 | cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */ | 996 | cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */ |
980 | cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ | 997 | cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ |
@@ -1347,6 +1364,8 @@ __init void prefill_possible_map(void) | |||
1347 | 1364 | ||
1348 | for (i = 0; i < possible; i++) | 1365 | for (i = 0; i < possible; i++) |
1349 | cpu_set(i, cpu_possible_map); | 1366 | cpu_set(i, cpu_possible_map); |
1367 | |||
1368 | nr_cpu_ids = possible; | ||
1350 | } | 1369 | } |
1351 | 1370 | ||
1352 | static void __ref remove_cpu_from_maps(int cpu) | 1371 | static void __ref remove_cpu_from_maps(int cpu) |
@@ -1357,7 +1376,7 @@ static void __ref remove_cpu_from_maps(int cpu) | |||
1357 | cpu_clear(cpu, cpu_callin_map); | 1376 | cpu_clear(cpu, cpu_callin_map); |
1358 | /* was set by cpu_init() */ | 1377 | /* was set by cpu_init() */ |
1359 | clear_bit(cpu, (unsigned long *)&cpu_initialized); | 1378 | clear_bit(cpu, (unsigned long *)&cpu_initialized); |
1360 | clear_node_cpumask(cpu); | 1379 | numa_remove_cpu(cpu); |
1361 | #endif | 1380 | #endif |
1362 | } | 1381 | } |
1363 | 1382 | ||
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index afb07ffb931d..c4557e25f60c 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -27,30 +27,17 @@ | |||
27 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; | 27 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; |
28 | EXPORT_SYMBOL(node_data); | 28 | EXPORT_SYMBOL(node_data); |
29 | 29 | ||
30 | bootmem_data_t plat_node_bdata[MAX_NUMNODES]; | 30 | static bootmem_data_t plat_node_bdata[MAX_NUMNODES]; |
31 | 31 | ||
32 | struct memnode memnode; | 32 | struct memnode memnode; |
33 | 33 | ||
34 | #ifdef CONFIG_SMP | ||
35 | int x86_cpu_to_node_map_init[NR_CPUS] = { | ||
36 | [0 ... NR_CPUS-1] = NUMA_NO_NODE | ||
37 | }; | ||
38 | void *x86_cpu_to_node_map_early_ptr; | ||
39 | EXPORT_SYMBOL(x86_cpu_to_node_map_early_ptr); | ||
40 | #endif | ||
41 | DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE; | ||
42 | EXPORT_PER_CPU_SYMBOL(x86_cpu_to_node_map); | ||
43 | |||
44 | s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { | 34 | s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { |
45 | [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE | 35 | [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE |
46 | }; | 36 | }; |
47 | 37 | ||
48 | cpumask_t node_to_cpumask_map[MAX_NUMNODES] __read_mostly; | ||
49 | EXPORT_SYMBOL(node_to_cpumask_map); | ||
50 | |||
51 | int numa_off __initdata; | 38 | int numa_off __initdata; |
52 | unsigned long __initdata nodemap_addr; | 39 | static unsigned long __initdata nodemap_addr; |
53 | unsigned long __initdata nodemap_size; | 40 | static unsigned long __initdata nodemap_size; |
54 | 41 | ||
55 | /* | 42 | /* |
56 | * Given a shift value, try to populate memnodemap[] | 43 | * Given a shift value, try to populate memnodemap[] |
@@ -192,7 +179,7 @@ static void * __init early_node_mem(int nodeid, unsigned long start, | |||
192 | void __init setup_node_bootmem(int nodeid, unsigned long start, | 179 | void __init setup_node_bootmem(int nodeid, unsigned long start, |
193 | unsigned long end) | 180 | unsigned long end) |
194 | { | 181 | { |
195 | unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size; | 182 | unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; |
196 | unsigned long bootmap_start, nodedata_phys; | 183 | unsigned long bootmap_start, nodedata_phys; |
197 | void *bootmap; | 184 | void *bootmap; |
198 | const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); | 185 | const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); |
@@ -204,7 +191,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
204 | start, end); | 191 | start, end); |
205 | 192 | ||
206 | start_pfn = start >> PAGE_SHIFT; | 193 | start_pfn = start >> PAGE_SHIFT; |
207 | end_pfn = end >> PAGE_SHIFT; | 194 | last_pfn = end >> PAGE_SHIFT; |
208 | 195 | ||
209 | node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size, | 196 | node_data[nodeid] = early_node_mem(nodeid, start, end, pgdat_size, |
210 | SMP_CACHE_BYTES); | 197 | SMP_CACHE_BYTES); |
@@ -217,7 +204,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
217 | memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); | 204 | memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); |
218 | NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; | 205 | NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; |
219 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; | 206 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; |
220 | NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; | 207 | NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; |
221 | 208 | ||
222 | /* | 209 | /* |
223 | * Find a place for the bootmem map | 210 | * Find a place for the bootmem map |
@@ -226,7 +213,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
226 | * early_node_mem will get that with find_e820_area instead | 213 | * early_node_mem will get that with find_e820_area instead |
227 | * of alloc_bootmem, that could clash with reserved range | 214 | * of alloc_bootmem, that could clash with reserved range |
228 | */ | 215 | */ |
229 | bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); | 216 | bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn); |
230 | nid = phys_to_nid(nodedata_phys); | 217 | nid = phys_to_nid(nodedata_phys); |
231 | if (nid == nodeid) | 218 | if (nid == nodeid) |
232 | bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); | 219 | bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); |
@@ -248,7 +235,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
248 | 235 | ||
249 | bootmap_size = init_bootmem_node(NODE_DATA(nodeid), | 236 | bootmap_size = init_bootmem_node(NODE_DATA(nodeid), |
250 | bootmap_start >> PAGE_SHIFT, | 237 | bootmap_start >> PAGE_SHIFT, |
251 | start_pfn, end_pfn); | 238 | start_pfn, last_pfn); |
252 | 239 | ||
253 | printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n", | 240 | printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n", |
254 | bootmap_start, bootmap_start + bootmap_size - 1, | 241 | bootmap_start, bootmap_start + bootmap_size - 1, |
@@ -309,7 +296,7 @@ void __init numa_init_array(void) | |||
309 | 296 | ||
310 | #ifdef CONFIG_NUMA_EMU | 297 | #ifdef CONFIG_NUMA_EMU |
311 | /* Numa emulation */ | 298 | /* Numa emulation */ |
312 | char *cmdline __initdata; | 299 | static char *cmdline __initdata; |
313 | 300 | ||
314 | /* | 301 | /* |
315 | * Setups up nid to range from addr to addr + size. If the end | 302 | * Setups up nid to range from addr to addr + size. If the end |
@@ -413,15 +400,15 @@ static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr, | |||
413 | } | 400 | } |
414 | 401 | ||
415 | /* | 402 | /* |
416 | * Sets up the system RAM area from start_pfn to end_pfn according to the | 403 | * Sets up the system RAM area from start_pfn to last_pfn according to the |
417 | * numa=fake command-line option. | 404 | * numa=fake command-line option. |
418 | */ | 405 | */ |
419 | static struct bootnode nodes[MAX_NUMNODES] __initdata; | 406 | static struct bootnode nodes[MAX_NUMNODES] __initdata; |
420 | 407 | ||
421 | static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn) | 408 | static int __init numa_emulation(unsigned long start_pfn, unsigned long last_pfn) |
422 | { | 409 | { |
423 | u64 size, addr = start_pfn << PAGE_SHIFT; | 410 | u64 size, addr = start_pfn << PAGE_SHIFT; |
424 | u64 max_addr = end_pfn << PAGE_SHIFT; | 411 | u64 max_addr = last_pfn << PAGE_SHIFT; |
425 | int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i; | 412 | int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i; |
426 | 413 | ||
427 | memset(&nodes, 0, sizeof(nodes)); | 414 | memset(&nodes, 0, sizeof(nodes)); |
@@ -527,7 +514,7 @@ out: | |||
527 | } | 514 | } |
528 | #endif /* CONFIG_NUMA_EMU */ | 515 | #endif /* CONFIG_NUMA_EMU */ |
529 | 516 | ||
530 | void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | 517 | void __init numa_initmem_init(unsigned long start_pfn, unsigned long last_pfn) |
531 | { | 518 | { |
532 | int i; | 519 | int i; |
533 | 520 | ||
@@ -535,7 +522,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
535 | nodes_clear(node_online_map); | 522 | nodes_clear(node_online_map); |
536 | 523 | ||
537 | #ifdef CONFIG_NUMA_EMU | 524 | #ifdef CONFIG_NUMA_EMU |
538 | if (cmdline && !numa_emulation(start_pfn, end_pfn)) | 525 | if (cmdline && !numa_emulation(start_pfn, last_pfn)) |
539 | return; | 526 | return; |
540 | nodes_clear(node_possible_map); | 527 | nodes_clear(node_possible_map); |
541 | nodes_clear(node_online_map); | 528 | nodes_clear(node_online_map); |
@@ -543,7 +530,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
543 | 530 | ||
544 | #ifdef CONFIG_ACPI_NUMA | 531 | #ifdef CONFIG_ACPI_NUMA |
545 | if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, | 532 | if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, |
546 | end_pfn << PAGE_SHIFT)) | 533 | last_pfn << PAGE_SHIFT)) |
547 | return; | 534 | return; |
548 | nodes_clear(node_possible_map); | 535 | nodes_clear(node_possible_map); |
549 | nodes_clear(node_online_map); | 536 | nodes_clear(node_online_map); |
@@ -551,7 +538,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
551 | 538 | ||
552 | #ifdef CONFIG_K8_NUMA | 539 | #ifdef CONFIG_K8_NUMA |
553 | if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, | 540 | if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, |
554 | end_pfn<<PAGE_SHIFT)) | 541 | last_pfn<<PAGE_SHIFT)) |
555 | return; | 542 | return; |
556 | nodes_clear(node_possible_map); | 543 | nodes_clear(node_possible_map); |
557 | nodes_clear(node_online_map); | 544 | nodes_clear(node_online_map); |
@@ -561,7 +548,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
561 | 548 | ||
562 | printk(KERN_INFO "Faking a node at %016lx-%016lx\n", | 549 | printk(KERN_INFO "Faking a node at %016lx-%016lx\n", |
563 | start_pfn << PAGE_SHIFT, | 550 | start_pfn << PAGE_SHIFT, |
564 | end_pfn << PAGE_SHIFT); | 551 | last_pfn << PAGE_SHIFT); |
565 | /* setup dummy node covering all memory */ | 552 | /* setup dummy node covering all memory */ |
566 | memnode_shift = 63; | 553 | memnode_shift = 63; |
567 | memnodemap = memnode.embedded_map; | 554 | memnodemap = memnode.embedded_map; |
@@ -570,29 +557,8 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
570 | node_set(0, node_possible_map); | 557 | node_set(0, node_possible_map); |
571 | for (i = 0; i < NR_CPUS; i++) | 558 | for (i = 0; i < NR_CPUS; i++) |
572 | numa_set_node(i, 0); | 559 | numa_set_node(i, 0); |
573 | /* cpumask_of_cpu() may not be available during early startup */ | 560 | e820_register_active_regions(0, start_pfn, last_pfn); |
574 | memset(&node_to_cpumask_map[0], 0, sizeof(node_to_cpumask_map[0])); | 561 | setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT); |
575 | cpu_set(0, node_to_cpumask_map[0]); | ||
576 | e820_register_active_regions(0, start_pfn, end_pfn); | ||
577 | setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); | ||
578 | } | ||
579 | |||
580 | __cpuinit void numa_add_cpu(int cpu) | ||
581 | { | ||
582 | set_bit(cpu, | ||
583 | (unsigned long *)&node_to_cpumask_map[early_cpu_to_node(cpu)]); | ||
584 | } | ||
585 | |||
586 | void __cpuinit numa_set_node(int cpu, int node) | ||
587 | { | ||
588 | int *cpu_to_node_map = x86_cpu_to_node_map_early_ptr; | ||
589 | |||
590 | if(cpu_to_node_map) | ||
591 | cpu_to_node_map[cpu] = node; | ||
592 | else if(per_cpu_offset(cpu)) | ||
593 | per_cpu(x86_cpu_to_node_map, cpu) = node; | ||
594 | else | ||
595 | Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu); | ||
596 | } | 562 | } |
597 | 563 | ||
598 | unsigned long __init numa_free_all_bootmem(void) | 564 | unsigned long __init numa_free_all_bootmem(void) |
@@ -641,6 +607,7 @@ static __init int numa_setup(char *opt) | |||
641 | } | 607 | } |
642 | early_param("numa", numa_setup); | 608 | early_param("numa", numa_setup); |
643 | 609 | ||
610 | #ifdef CONFIG_NUMA | ||
644 | /* | 611 | /* |
645 | * Setup early cpu_to_node. | 612 | * Setup early cpu_to_node. |
646 | * | 613 | * |
@@ -652,14 +619,19 @@ early_param("numa", numa_setup); | |||
652 | * is already initialized in a round robin manner at numa_init_array, | 619 | * is already initialized in a round robin manner at numa_init_array, |
653 | * prior to this call, and this initialization is good enough | 620 | * prior to this call, and this initialization is good enough |
654 | * for the fake NUMA cases. | 621 | * for the fake NUMA cases. |
622 | * | ||
623 | * Called before the per_cpu areas are setup. | ||
655 | */ | 624 | */ |
656 | void __init init_cpu_to_node(void) | 625 | void __init init_cpu_to_node(void) |
657 | { | 626 | { |
658 | int i; | 627 | int cpu; |
628 | u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); | ||
659 | 629 | ||
660 | for (i = 0; i < NR_CPUS; i++) { | 630 | BUG_ON(cpu_to_apicid == NULL); |
631 | |||
632 | for_each_possible_cpu(cpu) { | ||
661 | int node; | 633 | int node; |
662 | u16 apicid = x86_cpu_to_apicid_init[i]; | 634 | u16 apicid = cpu_to_apicid[cpu]; |
663 | 635 | ||
664 | if (apicid == BAD_APICID) | 636 | if (apicid == BAD_APICID) |
665 | continue; | 637 | continue; |
@@ -668,8 +640,9 @@ void __init init_cpu_to_node(void) | |||
668 | continue; | 640 | continue; |
669 | if (!node_online(node)) | 641 | if (!node_online(node)) |
670 | continue; | 642 | continue; |
671 | numa_set_node(i, node); | 643 | numa_set_node(cpu, node); |
672 | } | 644 | } |
673 | } | 645 | } |
646 | #endif | ||
674 | 647 | ||
675 | 648 | ||
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 391d51035871..b67f5a16755f 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -376,7 +376,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
376 | if (node == NUMA_NO_NODE) | 376 | if (node == NUMA_NO_NODE) |
377 | continue; | 377 | continue; |
378 | if (!node_isset(node, node_possible_map)) | 378 | if (!node_isset(node, node_possible_map)) |
379 | numa_set_node(i, NUMA_NO_NODE); | 379 | numa_clear_node(i); |
380 | } | 380 | } |
381 | numa_init_array(); | 381 | numa_init_array(); |
382 | return 0; | 382 | return 0; |
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 464279da49c4..4fa52d3dc848 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c | |||
@@ -171,8 +171,11 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do | |||
171 | if (node != -1) | 171 | if (node != -1) |
172 | set_mp_bus_to_node(busnum, node); | 172 | set_mp_bus_to_node(busnum, node); |
173 | else | 173 | else |
174 | node = get_mp_bus_to_node(busnum); | ||
175 | #endif | 174 | #endif |
175 | node = get_mp_bus_to_node(busnum); | ||
176 | |||
177 | if (node != -1 && !node_online(node)) | ||
178 | node = -1; | ||
176 | 179 | ||
177 | /* Allocate per-root-bus (not per bus) arch-specific data. | 180 | /* Allocate per-root-bus (not per bus) arch-specific data. |
178 | * TODO: leak; this memory is never freed. | 181 | * TODO: leak; this memory is never freed. |
@@ -204,14 +207,16 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do | |||
204 | if (!bus) | 207 | if (!bus) |
205 | kfree(sd); | 208 | kfree(sd); |
206 | 209 | ||
210 | if (bus && node != -1) { | ||
207 | #ifdef CONFIG_ACPI_NUMA | 211 | #ifdef CONFIG_ACPI_NUMA |
208 | if (bus) { | 212 | if (pxm >= 0) |
209 | if (pxm >= 0) { | ||
210 | printk(KERN_DEBUG "bus %02x -> pxm %d -> node %d\n", | 213 | printk(KERN_DEBUG "bus %02x -> pxm %d -> node %d\n", |
211 | busnum, pxm, pxm_to_node(pxm)); | 214 | busnum, pxm, node); |
212 | } | 215 | #else |
213 | } | 216 | printk(KERN_DEBUG "bus %02x -> node %d\n", |
217 | busnum, node); | ||
214 | #endif | 218 | #endif |
219 | } | ||
215 | 220 | ||
216 | if (bus && (pci_probe & PCI_USE__CRS)) | 221 | if (bus && (pci_probe & PCI_USE__CRS)) |
217 | get_current_resources(device, busnum, domain, bus); | 222 | get_current_resources(device, busnum, domain, bus); |