diff options
author | Tejun Heo <tj@kernel.org> | 2009-01-13 06:41:35 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-01-16 08:19:46 -0500 |
commit | 1a51e3a0aed18767cf2762e95456ecfeb0bca5e6 (patch) | |
tree | 2d930218ef1072a59f7dac0f97bb03aa02796c8c /arch/x86/kernel | |
parent | c8f3329a0ddd751241e96b4100df7eda14b2cbc6 (diff) |
x86: fold pda into percpu area on SMP
[ Based on original patch from Christoph Lameter and Mike Travis. ]
Currently pdas and percpu areas are allocated separately. %gs points
to local pda and percpu area can be reached using pda->data_offset.
This patch folds pda into percpu area.
Due to strange gcc requirement, pda needs to be at the beginning of
the percpu area so that pda->stack_canary is at %gs:40. To achieve
this, a new percpu output section macro - PERCPU_VADDR_PREALLOC() - is
added and used to reserve pda sized chunk at the start of the percpu
area.
After this change, for boot cpu, %gs first points to pda in the
data.init area and later during setup_per_cpu_areas() gets updated to
point to the actual pda. This means that setup_per_cpu_areas() need
to reload %gs for CPU0 while clearing pda area for other cpus as cpu0
already has modified it when control reaches setup_per_cpu_areas().
This patch also removes now unnecessary get_local_pda() and its call
sites.
A lot of this patch is taken from Mike Travis' "x86_64: Fold pda into
per cpu area" patch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/asm-offsets_64.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/head64.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/head_64.S | 15 | ||||
-rw-r--r-- | arch/x86/kernel/setup_percpu.c | 107 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 60 | ||||
-rw-r--r-- | arch/x86/kernel/vmlinux_64.lds.S | 6 |
7 files changed, 72 insertions, 131 deletions
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 1d41d3f1edbc..f8d1b047ef4f 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c | |||
@@ -56,6 +56,7 @@ int main(void) | |||
56 | ENTRY(cpunumber); | 56 | ENTRY(cpunumber); |
57 | ENTRY(irqstackptr); | 57 | ENTRY(irqstackptr); |
58 | ENTRY(data_offset); | 58 | ENTRY(data_offset); |
59 | DEFINE(pda_size, sizeof(struct x8664_pda)); | ||
59 | BLANK(); | 60 | BLANK(); |
60 | #undef ENTRY | 61 | #undef ENTRY |
61 | #ifdef CONFIG_PARAVIRT | 62 | #ifdef CONFIG_PARAVIRT |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c116c599326e..7041acdf5579 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -893,10 +893,8 @@ void __cpuinit pda_init(int cpu) | |||
893 | /* Setup up data that may be needed in __get_free_pages early */ | 893 | /* Setup up data that may be needed in __get_free_pages early */ |
894 | loadsegment(fs, 0); | 894 | loadsegment(fs, 0); |
895 | loadsegment(gs, 0); | 895 | loadsegment(gs, 0); |
896 | /* Memory clobbers used to order PDA accessed */ | 896 | |
897 | mb(); | 897 | load_pda_offset(cpu); |
898 | wrmsrl(MSR_GS_BASE, pda); | ||
899 | mb(); | ||
900 | 898 | ||
901 | pda->cpunumber = cpu; | 899 | pda->cpunumber = cpu; |
902 | pda->irqcount = -1; | 900 | pda->irqcount = -1; |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 462d0beccb6b..1a311293f733 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -26,12 +26,18 @@ | |||
26 | #include <asm/bios_ebda.h> | 26 | #include <asm/bios_ebda.h> |
27 | #include <asm/trampoline.h> | 27 | #include <asm/trampoline.h> |
28 | 28 | ||
29 | /* boot cpu pda, referenced by head_64.S to initialize %gs for boot CPU */ | 29 | #ifndef CONFIG_SMP |
30 | /* boot cpu pda, referenced by head_64.S to initialize %gs on UP */ | ||
30 | struct x8664_pda _boot_cpu_pda; | 31 | struct x8664_pda _boot_cpu_pda; |
32 | #endif | ||
31 | 33 | ||
32 | void __init x86_64_init_pda(void) | 34 | void __init x86_64_init_pda(void) |
33 | { | 35 | { |
36 | #ifdef CONFIG_SMP | ||
37 | cpu_pda(0) = (void *)__per_cpu_load; | ||
38 | #else | ||
34 | cpu_pda(0) = &_boot_cpu_pda; | 39 | cpu_pda(0) = &_boot_cpu_pda; |
40 | #endif | ||
35 | cpu_pda(0)->data_offset = | 41 | cpu_pda(0)->data_offset = |
36 | (unsigned long)(__per_cpu_load - __per_cpu_start); | 42 | (unsigned long)(__per_cpu_load - __per_cpu_start); |
37 | pda_init(0); | 43 | pda_init(0); |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 2f0ab0089883..7a995d0e9f78 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -245,10 +245,13 @@ ENTRY(secondary_startup_64) | |||
245 | 245 | ||
246 | /* Set up %gs. | 246 | /* Set up %gs. |
247 | * | 247 | * |
248 | * %gs should point to the pda. For initial boot, make %gs point | 248 | * On SMP, %gs should point to the per-cpu area. For initial |
249 | * to the _boot_cpu_pda in data section. For a secondary CPU, | 249 | * boot, make %gs point to the init data section. For a |
250 | * initial_gs should be set to its pda address before the CPU runs | 250 | * secondary CPU,initial_gs should be set to its pda address |
251 | * this code. | 251 | * before the CPU runs this code. |
252 | * | ||
253 | * On UP, initial_gs points to _boot_cpu_pda and doesn't | ||
254 | * change. | ||
252 | */ | 255 | */ |
253 | movl $MSR_GS_BASE,%ecx | 256 | movl $MSR_GS_BASE,%ecx |
254 | movq initial_gs(%rip),%rax | 257 | movq initial_gs(%rip),%rax |
@@ -278,7 +281,11 @@ ENTRY(secondary_startup_64) | |||
278 | ENTRY(initial_code) | 281 | ENTRY(initial_code) |
279 | .quad x86_64_start_kernel | 282 | .quad x86_64_start_kernel |
280 | ENTRY(initial_gs) | 283 | ENTRY(initial_gs) |
284 | #ifdef CONFIG_SMP | ||
285 | .quad __per_cpu_load | ||
286 | #else | ||
281 | .quad _boot_cpu_pda | 287 | .quad _boot_cpu_pda |
288 | #endif | ||
282 | __FINITDATA | 289 | __FINITDATA |
283 | 290 | ||
284 | ENTRY(stack_start) | 291 | ENTRY(stack_start) |
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 73ab01b297c5..63d462802272 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <asm/mpspec.h> | 13 | #include <asm/mpspec.h> |
14 | #include <asm/apicdef.h> | 14 | #include <asm/apicdef.h> |
15 | #include <asm/highmem.h> | 15 | #include <asm/highmem.h> |
16 | #include <asm/proto.h> | ||
16 | #include <asm/cpumask.h> | 17 | #include <asm/cpumask.h> |
17 | 18 | ||
18 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS | 19 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS |
@@ -65,6 +66,36 @@ static void __init setup_node_to_cpumask_map(void); | |||
65 | static inline void setup_node_to_cpumask_map(void) { } | 66 | static inline void setup_node_to_cpumask_map(void) { } |
66 | #endif | 67 | #endif |
67 | 68 | ||
69 | #ifdef CONFIG_X86_64 | ||
70 | void __cpuinit load_pda_offset(int cpu) | ||
71 | { | ||
72 | /* Memory clobbers used to order pda/percpu accesses */ | ||
73 | mb(); | ||
74 | wrmsrl(MSR_GS_BASE, cpu_pda(cpu)); | ||
75 | mb(); | ||
76 | } | ||
77 | |||
78 | #endif /* CONFIG_SMP && CONFIG_X86_64 */ | ||
79 | |||
80 | #ifdef CONFIG_X86_64 | ||
81 | |||
82 | /* correctly size the local cpu masks */ | ||
83 | static void setup_cpu_local_masks(void) | ||
84 | { | ||
85 | alloc_bootmem_cpumask_var(&cpu_initialized_mask); | ||
86 | alloc_bootmem_cpumask_var(&cpu_callin_mask); | ||
87 | alloc_bootmem_cpumask_var(&cpu_callout_mask); | ||
88 | alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); | ||
89 | } | ||
90 | |||
91 | #else /* CONFIG_X86_32 */ | ||
92 | |||
93 | static inline void setup_cpu_local_masks(void) | ||
94 | { | ||
95 | } | ||
96 | |||
97 | #endif /* CONFIG_X86_32 */ | ||
98 | |||
68 | #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA | 99 | #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA |
69 | /* | 100 | /* |
70 | * Copy data used in early init routines from the initial arrays to the | 101 | * Copy data used in early init routines from the initial arrays to the |
@@ -101,63 +132,7 @@ static void __init setup_per_cpu_maps(void) | |||
101 | */ | 132 | */ |
102 | unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; | 133 | unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; |
103 | EXPORT_SYMBOL(__per_cpu_offset); | 134 | EXPORT_SYMBOL(__per_cpu_offset); |
104 | static inline void setup_cpu_pda_map(void) { } | 135 | #endif |
105 | |||
106 | #elif !defined(CONFIG_SMP) | ||
107 | static inline void setup_cpu_pda_map(void) { } | ||
108 | |||
109 | #else /* CONFIG_SMP && CONFIG_X86_64 */ | ||
110 | |||
111 | /* | ||
112 | * Allocate cpu_pda pointer table and array via alloc_bootmem. | ||
113 | */ | ||
114 | static void __init setup_cpu_pda_map(void) | ||
115 | { | ||
116 | char *pda; | ||
117 | unsigned long size; | ||
118 | int cpu; | ||
119 | |||
120 | size = roundup(sizeof(struct x8664_pda), cache_line_size()); | ||
121 | |||
122 | /* allocate cpu_pda array and pointer table */ | ||
123 | { | ||
124 | unsigned long asize = size * (nr_cpu_ids - 1); | ||
125 | |||
126 | pda = alloc_bootmem(asize); | ||
127 | } | ||
128 | |||
129 | /* initialize pointer table to static pda's */ | ||
130 | for_each_possible_cpu(cpu) { | ||
131 | if (cpu == 0) { | ||
132 | /* leave boot cpu pda in place */ | ||
133 | continue; | ||
134 | } | ||
135 | cpu_pda(cpu) = (struct x8664_pda *)pda; | ||
136 | cpu_pda(cpu)->in_bootmem = 1; | ||
137 | pda += size; | ||
138 | } | ||
139 | } | ||
140 | |||
141 | #endif /* CONFIG_SMP && CONFIG_X86_64 */ | ||
142 | |||
143 | #ifdef CONFIG_X86_64 | ||
144 | |||
145 | /* correctly size the local cpu masks */ | ||
146 | static void setup_cpu_local_masks(void) | ||
147 | { | ||
148 | alloc_bootmem_cpumask_var(&cpu_initialized_mask); | ||
149 | alloc_bootmem_cpumask_var(&cpu_callin_mask); | ||
150 | alloc_bootmem_cpumask_var(&cpu_callout_mask); | ||
151 | alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); | ||
152 | } | ||
153 | |||
154 | #else /* CONFIG_X86_32 */ | ||
155 | |||
156 | static inline void setup_cpu_local_masks(void) | ||
157 | { | ||
158 | } | ||
159 | |||
160 | #endif /* CONFIG_X86_32 */ | ||
161 | 136 | ||
162 | /* | 137 | /* |
163 | * Great future plan: | 138 | * Great future plan: |
@@ -171,9 +146,6 @@ void __init setup_per_cpu_areas(void) | |||
171 | int cpu; | 146 | int cpu; |
172 | unsigned long align = 1; | 147 | unsigned long align = 1; |
173 | 148 | ||
174 | /* Setup cpu_pda map */ | ||
175 | setup_cpu_pda_map(); | ||
176 | |||
177 | /* Copy section for each CPU (we discard the original) */ | 149 | /* Copy section for each CPU (we discard the original) */ |
178 | old_size = PERCPU_ENOUGH_ROOM; | 150 | old_size = PERCPU_ENOUGH_ROOM; |
179 | align = max_t(unsigned long, PAGE_SIZE, align); | 151 | align = max_t(unsigned long, PAGE_SIZE, align); |
@@ -204,8 +176,21 @@ void __init setup_per_cpu_areas(void) | |||
204 | cpu, node, __pa(ptr)); | 176 | cpu, node, __pa(ptr)); |
205 | } | 177 | } |
206 | #endif | 178 | #endif |
207 | per_cpu_offset(cpu) = ptr - __per_cpu_start; | 179 | |
208 | memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start); | 180 | memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start); |
181 | #ifdef CONFIG_X86_64 | ||
182 | cpu_pda(cpu) = (void *)ptr; | ||
183 | |||
184 | /* | ||
185 | * CPU0 modified pda in the init data area, reload pda | ||
186 | * offset for CPU0 and clear the area for others. | ||
187 | */ | ||
188 | if (cpu == 0) | ||
189 | load_pda_offset(0); | ||
190 | else | ||
191 | memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu))); | ||
192 | #endif | ||
193 | per_cpu_offset(cpu) = ptr - __per_cpu_start; | ||
209 | 194 | ||
210 | DBG("PERCPU: cpu %4d %p\n", cpu, ptr); | 195 | DBG("PERCPU: cpu %4d %p\n", cpu, ptr); |
211 | } | 196 | } |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 70d846628bbf..f2f77ca494d4 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -744,52 +744,6 @@ static void __cpuinit do_fork_idle(struct work_struct *work) | |||
744 | complete(&c_idle->done); | 744 | complete(&c_idle->done); |
745 | } | 745 | } |
746 | 746 | ||
747 | #ifdef CONFIG_X86_64 | ||
748 | |||
749 | /* __ref because it's safe to call free_bootmem when after_bootmem == 0. */ | ||
750 | static void __ref free_bootmem_pda(struct x8664_pda *oldpda) | ||
751 | { | ||
752 | if (!after_bootmem) | ||
753 | free_bootmem((unsigned long)oldpda, sizeof(*oldpda)); | ||
754 | } | ||
755 | |||
756 | /* | ||
757 | * Allocate node local memory for the AP pda. | ||
758 | * | ||
759 | * Must be called after the _cpu_pda pointer table is initialized. | ||
760 | */ | ||
761 | int __cpuinit get_local_pda(int cpu) | ||
762 | { | ||
763 | struct x8664_pda *oldpda, *newpda; | ||
764 | unsigned long size = sizeof(struct x8664_pda); | ||
765 | int node = cpu_to_node(cpu); | ||
766 | |||
767 | if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem) | ||
768 | return 0; | ||
769 | |||
770 | oldpda = cpu_pda(cpu); | ||
771 | newpda = kmalloc_node(size, GFP_ATOMIC, node); | ||
772 | if (!newpda) { | ||
773 | printk(KERN_ERR "Could not allocate node local PDA " | ||
774 | "for CPU %d on node %d\n", cpu, node); | ||
775 | |||
776 | if (oldpda) | ||
777 | return 0; /* have a usable pda */ | ||
778 | else | ||
779 | return -1; | ||
780 | } | ||
781 | |||
782 | if (oldpda) { | ||
783 | memcpy(newpda, oldpda, size); | ||
784 | free_bootmem_pda(oldpda); | ||
785 | } | ||
786 | |||
787 | newpda->in_bootmem = 0; | ||
788 | cpu_pda(cpu) = newpda; | ||
789 | return 0; | ||
790 | } | ||
791 | #endif /* CONFIG_X86_64 */ | ||
792 | |||
793 | static int __cpuinit do_boot_cpu(int apicid, int cpu) | 747 | static int __cpuinit do_boot_cpu(int apicid, int cpu) |
794 | /* | 748 | /* |
795 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad | 749 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad |
@@ -807,16 +761,6 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) | |||
807 | }; | 761 | }; |
808 | INIT_WORK(&c_idle.work, do_fork_idle); | 762 | INIT_WORK(&c_idle.work, do_fork_idle); |
809 | 763 | ||
810 | #ifdef CONFIG_X86_64 | ||
811 | /* Allocate node local memory for AP pdas */ | ||
812 | if (cpu > 0) { | ||
813 | boot_error = get_local_pda(cpu); | ||
814 | if (boot_error) | ||
815 | goto restore_state; | ||
816 | /* if can't get pda memory, can't start cpu */ | ||
817 | } | ||
818 | #endif | ||
819 | |||
820 | alternatives_smp_switch(1); | 764 | alternatives_smp_switch(1); |
821 | 765 | ||
822 | c_idle.idle = get_idle_for_cpu(cpu); | 766 | c_idle.idle = get_idle_for_cpu(cpu); |
@@ -931,9 +875,7 @@ do_rest: | |||
931 | inquire_remote_apic(apicid); | 875 | inquire_remote_apic(apicid); |
932 | } | 876 | } |
933 | } | 877 | } |
934 | #ifdef CONFIG_X86_64 | 878 | |
935 | restore_state: | ||
936 | #endif | ||
937 | if (boot_error) { | 879 | if (boot_error) { |
938 | /* Try to put things back the way they were before ... */ | 880 | /* Try to put things back the way they were before ... */ |
939 | numa_remove_cpu(cpu); /* was set by numa_add_cpu */ | 881 | numa_remove_cpu(cpu); /* was set by numa_add_cpu */ |
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index f50280db0dfe..962f21f1d4d7 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S | |||
@@ -5,6 +5,7 @@ | |||
5 | #define LOAD_OFFSET __START_KERNEL_map | 5 | #define LOAD_OFFSET __START_KERNEL_map |
6 | 6 | ||
7 | #include <asm-generic/vmlinux.lds.h> | 7 | #include <asm-generic/vmlinux.lds.h> |
8 | #include <asm/asm-offsets.h> | ||
8 | #include <asm/page.h> | 9 | #include <asm/page.h> |
9 | 10 | ||
10 | #undef i386 /* in case the preprocessor is a 32bit one */ | 11 | #undef i386 /* in case the preprocessor is a 32bit one */ |
@@ -215,10 +216,11 @@ SECTIONS | |||
215 | /* | 216 | /* |
216 | * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the | 217 | * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the |
217 | * output PHDR, so the next output section - __data_nosave - should | 218 | * output PHDR, so the next output section - __data_nosave - should |
218 | * switch it back to data.init. | 219 | * switch it back to data.init. Also, pda should be at the head of |
220 | * percpu area. Preallocate it. | ||
219 | */ | 221 | */ |
220 | . = ALIGN(PAGE_SIZE); | 222 | . = ALIGN(PAGE_SIZE); |
221 | PERCPU_VADDR(0, :percpu) | 223 | PERCPU_VADDR_PREALLOC(0, :percpu, pda_size) |
222 | #else | 224 | #else |
223 | PERCPU(PAGE_SIZE) | 225 | PERCPU(PAGE_SIZE) |
224 | #endif | 226 | #endif |