aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2009-01-13 06:41:35 -0500
committerIngo Molnar <mingo@elte.hu>2009-01-16 08:19:46 -0500
commit1a51e3a0aed18767cf2762e95456ecfeb0bca5e6 (patch)
tree2d930218ef1072a59f7dac0f97bb03aa02796c8c /arch/x86/kernel
parentc8f3329a0ddd751241e96b4100df7eda14b2cbc6 (diff)
x86: fold pda into percpu area on SMP
[ Based on original patch from Christoph Lameter and Mike Travis. ] Currently pdas and percpu areas are allocated separately. %gs points to local pda and percpu area can be reached using pda->data_offset. This patch folds pda into percpu area. Due to strange gcc requirement, pda needs to be at the beginning of the percpu area so that pda->stack_canary is at %gs:40. To achieve this, a new percpu output section macro - PERCPU_VADDR_PREALLOC() - is added and used to reserve pda sized chunk at the start of the percpu area. After this change, for boot cpu, %gs first points to pda in the data.init area and later during setup_per_cpu_areas() gets updated to point to the actual pda. This means that setup_per_cpu_areas() need to reload %gs for CPU0 while clearing pda area for other cpus as cpu0 already has modified it when control reaches setup_per_cpu_areas(). This patch also removes now unnecessary get_local_pda() and its call sites. A lot of this patch is taken from Mike Travis' "x86_64: Fold pda into per cpu area" patch. Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/asm-offsets_64.c1
-rw-r--r--arch/x86/kernel/cpu/common.c6
-rw-r--r--arch/x86/kernel/head64.c8
-rw-r--r--arch/x86/kernel/head_64.S15
-rw-r--r--arch/x86/kernel/setup_percpu.c107
-rw-r--r--arch/x86/kernel/smpboot.c60
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S6
7 files changed, 72 insertions, 131 deletions
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 1d41d3f1edbc..f8d1b047ef4f 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -56,6 +56,7 @@ int main(void)
56 ENTRY(cpunumber); 56 ENTRY(cpunumber);
57 ENTRY(irqstackptr); 57 ENTRY(irqstackptr);
58 ENTRY(data_offset); 58 ENTRY(data_offset);
59 DEFINE(pda_size, sizeof(struct x8664_pda));
59 BLANK(); 60 BLANK();
60#undef ENTRY 61#undef ENTRY
61#ifdef CONFIG_PARAVIRT 62#ifdef CONFIG_PARAVIRT
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c116c599326e..7041acdf5579 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -893,10 +893,8 @@ void __cpuinit pda_init(int cpu)
893 /* Setup up data that may be needed in __get_free_pages early */ 893 /* Setup up data that may be needed in __get_free_pages early */
894 loadsegment(fs, 0); 894 loadsegment(fs, 0);
895 loadsegment(gs, 0); 895 loadsegment(gs, 0);
896 /* Memory clobbers used to order PDA accessed */ 896
897 mb(); 897 load_pda_offset(cpu);
898 wrmsrl(MSR_GS_BASE, pda);
899 mb();
900 898
901 pda->cpunumber = cpu; 899 pda->cpunumber = cpu;
902 pda->irqcount = -1; 900 pda->irqcount = -1;
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 462d0beccb6b..1a311293f733 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -26,12 +26,18 @@
26#include <asm/bios_ebda.h> 26#include <asm/bios_ebda.h>
27#include <asm/trampoline.h> 27#include <asm/trampoline.h>
28 28
29/* boot cpu pda, referenced by head_64.S to initialize %gs for boot CPU */ 29#ifndef CONFIG_SMP
30/* boot cpu pda, referenced by head_64.S to initialize %gs on UP */
30struct x8664_pda _boot_cpu_pda; 31struct x8664_pda _boot_cpu_pda;
32#endif
31 33
32void __init x86_64_init_pda(void) 34void __init x86_64_init_pda(void)
33{ 35{
36#ifdef CONFIG_SMP
37 cpu_pda(0) = (void *)__per_cpu_load;
38#else
34 cpu_pda(0) = &_boot_cpu_pda; 39 cpu_pda(0) = &_boot_cpu_pda;
40#endif
35 cpu_pda(0)->data_offset = 41 cpu_pda(0)->data_offset =
36 (unsigned long)(__per_cpu_load - __per_cpu_start); 42 (unsigned long)(__per_cpu_load - __per_cpu_start);
37 pda_init(0); 43 pda_init(0);
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 2f0ab0089883..7a995d0e9f78 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -245,10 +245,13 @@ ENTRY(secondary_startup_64)
245 245
246 /* Set up %gs. 246 /* Set up %gs.
247 * 247 *
248 * %gs should point to the pda. For initial boot, make %gs point 248 * On SMP, %gs should point to the per-cpu area. For initial
249 * to the _boot_cpu_pda in data section. For a secondary CPU, 249 * boot, make %gs point to the init data section. For a
250 * initial_gs should be set to its pda address before the CPU runs 250 * secondary CPU,initial_gs should be set to its pda address
251 * this code. 251 * before the CPU runs this code.
252 *
253 * On UP, initial_gs points to _boot_cpu_pda and doesn't
254 * change.
252 */ 255 */
253 movl $MSR_GS_BASE,%ecx 256 movl $MSR_GS_BASE,%ecx
254 movq initial_gs(%rip),%rax 257 movq initial_gs(%rip),%rax
@@ -278,7 +281,11 @@ ENTRY(secondary_startup_64)
278 ENTRY(initial_code) 281 ENTRY(initial_code)
279 .quad x86_64_start_kernel 282 .quad x86_64_start_kernel
280 ENTRY(initial_gs) 283 ENTRY(initial_gs)
284#ifdef CONFIG_SMP
285 .quad __per_cpu_load
286#else
281 .quad _boot_cpu_pda 287 .quad _boot_cpu_pda
288#endif
282 __FINITDATA 289 __FINITDATA
283 290
284 ENTRY(stack_start) 291 ENTRY(stack_start)
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 73ab01b297c5..63d462802272 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -13,6 +13,7 @@
13#include <asm/mpspec.h> 13#include <asm/mpspec.h>
14#include <asm/apicdef.h> 14#include <asm/apicdef.h>
15#include <asm/highmem.h> 15#include <asm/highmem.h>
16#include <asm/proto.h>
16#include <asm/cpumask.h> 17#include <asm/cpumask.h>
17 18
18#ifdef CONFIG_DEBUG_PER_CPU_MAPS 19#ifdef CONFIG_DEBUG_PER_CPU_MAPS
@@ -65,6 +66,36 @@ static void __init setup_node_to_cpumask_map(void);
65static inline void setup_node_to_cpumask_map(void) { } 66static inline void setup_node_to_cpumask_map(void) { }
66#endif 67#endif
67 68
69#ifdef CONFIG_X86_64
70void __cpuinit load_pda_offset(int cpu)
71{
72 /* Memory clobbers used to order pda/percpu accesses */
73 mb();
74 wrmsrl(MSR_GS_BASE, cpu_pda(cpu));
75 mb();
76}
77
78#endif /* CONFIG_SMP && CONFIG_X86_64 */
79
80#ifdef CONFIG_X86_64
81
82/* correctly size the local cpu masks */
83static void setup_cpu_local_masks(void)
84{
85 alloc_bootmem_cpumask_var(&cpu_initialized_mask);
86 alloc_bootmem_cpumask_var(&cpu_callin_mask);
87 alloc_bootmem_cpumask_var(&cpu_callout_mask);
88 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
89}
90
91#else /* CONFIG_X86_32 */
92
93static inline void setup_cpu_local_masks(void)
94{
95}
96
97#endif /* CONFIG_X86_32 */
98
68#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA 99#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
69/* 100/*
70 * Copy data used in early init routines from the initial arrays to the 101 * Copy data used in early init routines from the initial arrays to the
@@ -101,63 +132,7 @@ static void __init setup_per_cpu_maps(void)
101 */ 132 */
102unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; 133unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
103EXPORT_SYMBOL(__per_cpu_offset); 134EXPORT_SYMBOL(__per_cpu_offset);
104static inline void setup_cpu_pda_map(void) { } 135#endif
105
106#elif !defined(CONFIG_SMP)
107static inline void setup_cpu_pda_map(void) { }
108
109#else /* CONFIG_SMP && CONFIG_X86_64 */
110
111/*
112 * Allocate cpu_pda pointer table and array via alloc_bootmem.
113 */
114static void __init setup_cpu_pda_map(void)
115{
116 char *pda;
117 unsigned long size;
118 int cpu;
119
120 size = roundup(sizeof(struct x8664_pda), cache_line_size());
121
122 /* allocate cpu_pda array and pointer table */
123 {
124 unsigned long asize = size * (nr_cpu_ids - 1);
125
126 pda = alloc_bootmem(asize);
127 }
128
129 /* initialize pointer table to static pda's */
130 for_each_possible_cpu(cpu) {
131 if (cpu == 0) {
132 /* leave boot cpu pda in place */
133 continue;
134 }
135 cpu_pda(cpu) = (struct x8664_pda *)pda;
136 cpu_pda(cpu)->in_bootmem = 1;
137 pda += size;
138 }
139}
140
141#endif /* CONFIG_SMP && CONFIG_X86_64 */
142
143#ifdef CONFIG_X86_64
144
145/* correctly size the local cpu masks */
146static void setup_cpu_local_masks(void)
147{
148 alloc_bootmem_cpumask_var(&cpu_initialized_mask);
149 alloc_bootmem_cpumask_var(&cpu_callin_mask);
150 alloc_bootmem_cpumask_var(&cpu_callout_mask);
151 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
152}
153
154#else /* CONFIG_X86_32 */
155
156static inline void setup_cpu_local_masks(void)
157{
158}
159
160#endif /* CONFIG_X86_32 */
161 136
162/* 137/*
163 * Great future plan: 138 * Great future plan:
@@ -171,9 +146,6 @@ void __init setup_per_cpu_areas(void)
171 int cpu; 146 int cpu;
172 unsigned long align = 1; 147 unsigned long align = 1;
173 148
174 /* Setup cpu_pda map */
175 setup_cpu_pda_map();
176
177 /* Copy section for each CPU (we discard the original) */ 149 /* Copy section for each CPU (we discard the original) */
178 old_size = PERCPU_ENOUGH_ROOM; 150 old_size = PERCPU_ENOUGH_ROOM;
179 align = max_t(unsigned long, PAGE_SIZE, align); 151 align = max_t(unsigned long, PAGE_SIZE, align);
@@ -204,8 +176,21 @@ void __init setup_per_cpu_areas(void)
204 cpu, node, __pa(ptr)); 176 cpu, node, __pa(ptr));
205 } 177 }
206#endif 178#endif
207 per_cpu_offset(cpu) = ptr - __per_cpu_start; 179
208 memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start); 180 memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start);
181#ifdef CONFIG_X86_64
182 cpu_pda(cpu) = (void *)ptr;
183
184 /*
185 * CPU0 modified pda in the init data area, reload pda
186 * offset for CPU0 and clear the area for others.
187 */
188 if (cpu == 0)
189 load_pda_offset(0);
190 else
191 memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu)));
192#endif
193 per_cpu_offset(cpu) = ptr - __per_cpu_start;
209 194
210 DBG("PERCPU: cpu %4d %p\n", cpu, ptr); 195 DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
211 } 196 }
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 70d846628bbf..f2f77ca494d4 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -744,52 +744,6 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
744 complete(&c_idle->done); 744 complete(&c_idle->done);
745} 745}
746 746
747#ifdef CONFIG_X86_64
748
749/* __ref because it's safe to call free_bootmem when after_bootmem == 0. */
750static void __ref free_bootmem_pda(struct x8664_pda *oldpda)
751{
752 if (!after_bootmem)
753 free_bootmem((unsigned long)oldpda, sizeof(*oldpda));
754}
755
756/*
757 * Allocate node local memory for the AP pda.
758 *
759 * Must be called after the _cpu_pda pointer table is initialized.
760 */
761int __cpuinit get_local_pda(int cpu)
762{
763 struct x8664_pda *oldpda, *newpda;
764 unsigned long size = sizeof(struct x8664_pda);
765 int node = cpu_to_node(cpu);
766
767 if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem)
768 return 0;
769
770 oldpda = cpu_pda(cpu);
771 newpda = kmalloc_node(size, GFP_ATOMIC, node);
772 if (!newpda) {
773 printk(KERN_ERR "Could not allocate node local PDA "
774 "for CPU %d on node %d\n", cpu, node);
775
776 if (oldpda)
777 return 0; /* have a usable pda */
778 else
779 return -1;
780 }
781
782 if (oldpda) {
783 memcpy(newpda, oldpda, size);
784 free_bootmem_pda(oldpda);
785 }
786
787 newpda->in_bootmem = 0;
788 cpu_pda(cpu) = newpda;
789 return 0;
790}
791#endif /* CONFIG_X86_64 */
792
793static int __cpuinit do_boot_cpu(int apicid, int cpu) 747static int __cpuinit do_boot_cpu(int apicid, int cpu)
794/* 748/*
795 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad 749 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
@@ -807,16 +761,6 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
807 }; 761 };
808 INIT_WORK(&c_idle.work, do_fork_idle); 762 INIT_WORK(&c_idle.work, do_fork_idle);
809 763
810#ifdef CONFIG_X86_64
811 /* Allocate node local memory for AP pdas */
812 if (cpu > 0) {
813 boot_error = get_local_pda(cpu);
814 if (boot_error)
815 goto restore_state;
816 /* if can't get pda memory, can't start cpu */
817 }
818#endif
819
820 alternatives_smp_switch(1); 764 alternatives_smp_switch(1);
821 765
822 c_idle.idle = get_idle_for_cpu(cpu); 766 c_idle.idle = get_idle_for_cpu(cpu);
@@ -931,9 +875,7 @@ do_rest:
931 inquire_remote_apic(apicid); 875 inquire_remote_apic(apicid);
932 } 876 }
933 } 877 }
934#ifdef CONFIG_X86_64 878
935restore_state:
936#endif
937 if (boot_error) { 879 if (boot_error) {
938 /* Try to put things back the way they were before ... */ 880 /* Try to put things back the way they were before ... */
939 numa_remove_cpu(cpu); /* was set by numa_add_cpu */ 881 numa_remove_cpu(cpu); /* was set by numa_add_cpu */
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index f50280db0dfe..962f21f1d4d7 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -5,6 +5,7 @@
5#define LOAD_OFFSET __START_KERNEL_map 5#define LOAD_OFFSET __START_KERNEL_map
6 6
7#include <asm-generic/vmlinux.lds.h> 7#include <asm-generic/vmlinux.lds.h>
8#include <asm/asm-offsets.h>
8#include <asm/page.h> 9#include <asm/page.h>
9 10
10#undef i386 /* in case the preprocessor is a 32bit one */ 11#undef i386 /* in case the preprocessor is a 32bit one */
@@ -215,10 +216,11 @@ SECTIONS
215 /* 216 /*
216 * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the 217 * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
217 * output PHDR, so the next output section - __data_nosave - should 218 * output PHDR, so the next output section - __data_nosave - should
218 * switch it back to data.init. 219 * switch it back to data.init. Also, pda should be at the head of
220 * percpu area. Preallocate it.
219 */ 221 */
220 . = ALIGN(PAGE_SIZE); 222 . = ALIGN(PAGE_SIZE);
221 PERCPU_VADDR(0, :percpu) 223 PERCPU_VADDR_PREALLOC(0, :percpu, pda_size)
222#else 224#else
223 PERCPU(PAGE_SIZE) 225 PERCPU(PAGE_SIZE)
224#endif 226#endif