aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2009-10-02 00:28:56 -0400
committerTejun Heo <tj@kernel.org>2009-10-02 00:28:56 -0400
commit36886478f59ec0fdc24a8877c572b92f8d416aba (patch)
treeeda76efcb0f05011531e3d83a524cf80d0c67cef
parent12cda817779ce5381a9a4ba8d464abe17c50a9e2 (diff)
ia64: allocate percpu area for cpu0 like percpu areas for other cpus
cpu0 used special percpu area reserved by the linker, __cpu0_per_cpu, which is set up early in boot by head.S. However, this doesn't guarantee that the area will be on the same node as cpu0 and the percpu area for cpu0 ends up very far away from percpu areas for other cpus which cause problems for congruent percpu allocator. This patch makes percpu area initialization allocate percpu area for cpu0 like any other cpus and copy it from __cpu0_per_cpu which now resides in the __init area. This means that for cpu0, percpu area is first setup at __cpu0_per_cpu early by head.S and then moved to an area in the linear mapping during memory initialization and it's not allowed to take a pointer to percpu variables between head.S and memory initialization. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Tony Luck <tony.luck@intel.com> Cc: Fenghua Yu <fenghua.yu@intel.com> Cc: linux-ia64 <linux-ia64@vger.kernel.org>
-rw-r--r--arch/ia64/kernel/vmlinux.lds.S11
-rw-r--r--arch/ia64/mm/contig.c41
-rw-r--r--arch/ia64/mm/discontig.c35
3 files changed, 57 insertions, 30 deletions
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index 0a0c77b2c988..1295ba327f6f 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -166,6 +166,12 @@ SECTIONS
166 } 166 }
167#endif 167#endif
168 168
169#ifdef CONFIG_SMP
170 . = ALIGN(PERCPU_PAGE_SIZE);
171 __cpu0_per_cpu = .;
172 . = . + PERCPU_PAGE_SIZE; /* cpu0 per-cpu space */
173#endif
174
169 . = ALIGN(PAGE_SIZE); 175 . = ALIGN(PAGE_SIZE);
170 __init_end = .; 176 __init_end = .;
171 177
@@ -198,11 +204,6 @@ SECTIONS
198 data : { } :data 204 data : { } :data
199 .data : AT(ADDR(.data) - LOAD_OFFSET) 205 .data : AT(ADDR(.data) - LOAD_OFFSET)
200 { 206 {
201#ifdef CONFIG_SMP
202 . = ALIGN(PERCPU_PAGE_SIZE);
203 __cpu0_per_cpu = .;
204 . = . + PERCPU_PAGE_SIZE; /* cpu0 per-cpu space */
205#endif
206 INIT_TASK_DATA(PAGE_SIZE) 207 INIT_TASK_DATA(PAGE_SIZE)
207 CACHELINE_ALIGNED_DATA(SMP_CACHE_BYTES) 208 CACHELINE_ALIGNED_DATA(SMP_CACHE_BYTES)
208 READ_MOSTLY_DATA(SMP_CACHE_BYTES) 209 READ_MOSTLY_DATA(SMP_CACHE_BYTES)
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index 1341437c1b26..351da0a06cd0 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -154,36 +154,49 @@ static void *cpu_data;
154void * __cpuinit 154void * __cpuinit
155per_cpu_init (void) 155per_cpu_init (void)
156{ 156{
157 int cpu; 157 static bool first_time = true;
158 static int first_time=1; 158 void *cpu0_data = __cpu0_per_cpu;
159 unsigned int cpu;
160
161 if (!first_time)
162 goto skip;
163 first_time = false;
159 164
160 /* 165 /*
161 * get_free_pages() cannot be used before cpu_init() done. BSP 166 * get_free_pages() cannot be used before cpu_init() done. BSP
162 * allocates "NR_CPUS" pages for all CPUs to avoid that AP calls 167 * allocates "NR_CPUS" pages for all CPUs to avoid that AP calls
163 * get_zeroed_page(). 168 * get_zeroed_page().
164 */ 169 */
165 if (first_time) { 170 for (cpu = 0; cpu < NR_CPUS; cpu++) {
166 void *cpu0_data = __cpu0_per_cpu; 171 void *src = cpu == 0 ? cpu0_data : __phys_per_cpu_start;
167 172
168 first_time=0; 173 memcpy(cpu_data, src, __per_cpu_end - __per_cpu_start);
174 __per_cpu_offset[cpu] = (char *)cpu_data - __per_cpu_start;
175 per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
169 176
170 __per_cpu_offset[0] = (char *) cpu0_data - __per_cpu_start; 177 /*
171 per_cpu(local_per_cpu_offset, 0) = __per_cpu_offset[0]; 178 * percpu area for cpu0 is moved from the __init area
179 * which is setup by head.S and used till this point.
180 * Update ar.k3. This move is ensures that percpu
181 * area for cpu0 is on the correct node and its
182 * virtual address isn't insanely far from other
183 * percpu areas which is important for congruent
184 * percpu allocator.
185 */
186 if (cpu == 0)
187 ia64_set_kr(IA64_KR_PER_CPU_DATA, __pa(cpu_data) -
188 (unsigned long)__per_cpu_start);
172 189
173 for (cpu = 1; cpu < NR_CPUS; cpu++) { 190 cpu_data += PERCPU_PAGE_SIZE;
174 memcpy(cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start);
175 __per_cpu_offset[cpu] = (char *) cpu_data - __per_cpu_start;
176 cpu_data += PERCPU_PAGE_SIZE;
177 per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
178 }
179 } 191 }
192skip:
180 return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; 193 return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
181} 194}
182 195
183static inline void 196static inline void
184alloc_per_cpu_data(void) 197alloc_per_cpu_data(void)
185{ 198{
186 cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS-1, 199 cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS,
187 PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); 200 PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
188} 201}
189#else 202#else
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 9f24b3c6dc71..200282b92981 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -143,17 +143,30 @@ static void *per_cpu_node_setup(void *cpu_data, int node)
143 int cpu; 143 int cpu;
144 144
145 for_each_possible_early_cpu(cpu) { 145 for_each_possible_early_cpu(cpu) {
146 if (cpu == 0) { 146 void *src = cpu == 0 ? __cpu0_per_cpu : __phys_per_cpu_start;
147 void *cpu0_data = __cpu0_per_cpu; 147
148 __per_cpu_offset[cpu] = (char*)cpu0_data - 148 if (node != node_cpuid[cpu].nid)
149 __per_cpu_start; 149 continue;
150 } else if (node == node_cpuid[cpu].nid) { 150
151 memcpy(__va(cpu_data), __phys_per_cpu_start, 151 memcpy(__va(cpu_data), src, __per_cpu_end - __per_cpu_start);
152 __per_cpu_end - __per_cpu_start); 152 __per_cpu_offset[cpu] = (char *)__va(cpu_data) -
153 __per_cpu_offset[cpu] = (char*)__va(cpu_data) - 153 __per_cpu_start;
154 __per_cpu_start; 154
155 cpu_data += PERCPU_PAGE_SIZE; 155 /*
156 } 156 * percpu area for cpu0 is moved from the __init area
157 * which is setup by head.S and used till this point.
158 * Update ar.k3. This move is ensures that percpu
159 * area for cpu0 is on the correct node and its
160 * virtual address isn't insanely far from other
161 * percpu areas which is important for congruent
162 * percpu allocator.
163 */
164 if (cpu == 0)
165 ia64_set_kr(IA64_KR_PER_CPU_DATA,
166 (unsigned long)cpu_data -
167 (unsigned long)__per_cpu_start);
168
169 cpu_data += PERCPU_PAGE_SIZE;
157 } 170 }
158#endif 171#endif
159 return cpu_data; 172 return cpu_data;