ia64: allocate percpu area for cpu0 like percpu areas for other cpus

cpu0 used special percpu area reserved by the linker, __cpu0_per_cpu, which is set up early in boot by head.S. However, this doesn't guarantee that the area will be on the same node as cpu0 and the percpu area for cpu0 ends up very far away from percpu areas for other cpus which cause problems for congruent percpu allocator. This patch makes percpu area initialization allocate percpu area for cpu0 like any other cpus and copy it from __cpu0_per_cpu which now resides in the __init area. This means that for cpu0, percpu area is first setup at __cpu0_per_cpu early by head.S and then moved to an area in the linear mapping during memory initialization and it's not allowed to take a pointer to percpu variables between head.S and memory initialization. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Tony Luck <tony.luck@intel.com> Cc: Fenghua Yu <fenghua.yu@intel.com> Cc: linux-ia64 <linux-ia64@vger.kernel.org>
author: Tejun Heo <tj@kernel.org> 2009-10-02 00:28:56 -0400
committer: Tejun Heo <tj@kernel.org> 2009-10-02 00:28:56 -0400
commit: 36886478f59ec0fdc24a8877c572b92f8d416aba (patch)
tree: eda76efcb0f05011531e3d83a524cf80d0c67cef /arch/ia64
parent: 12cda817779ce5381a9a4ba8d464abe17c50a9e2 (diff)
3 files changed, 57 insertions, 30 deletions
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index 0a0c77b2c988..1295ba327f6f 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -166,6 +166,12 @@ SECTIONS
        }
 #endif
+#ifdef  CONFIG_SMP
+  . = ALIGN(PERCPU_PAGE_SIZE);
+  __cpu0_per_cpu = .;
+  . = . + PERCPU_PAGE_SIZE;     /* cpu0 per-cpu space */
+#endif
  . = ALIGN(PAGE_SIZE);
  __init_end = .;
@@ -198,11 +204,6 @@ SECTIONS
  data : { } :data
  .data : AT(ADDR(.data) - LOAD_OFFSET)
        {
-#ifdef  CONFIG_SMP
-  . = ALIGN(PERCPU_PAGE_SIZE);
-                __cpu0_per_cpu = .;
-  . = . + PERCPU_PAGE_SIZE;     /* cpu0 per-cpu space */
-#endif
                INIT_TASK_DATA(PAGE_SIZE)
                CACHELINE_ALIGNED_DATA(SMP_CACHE_BYTES)
                READ_MOSTLY_DATA(SMP_CACHE_BYTES)
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index 1341437c1b26..351da0a06cd0 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -154,36 +154,49 @@ static void *cpu_data;
 void * __cpuinit
 per_cpu_init (void)
 {
-        int cpu;
+        static bool first_time = true;
-        static int first_time=1;
+        void *cpu0_data = __cpu0_per_cpu;
+        unsigned int cpu;
+        if (!first_time)
+                goto skip;
+        first_time = false;
        /*
         * get_free_pages() cannot be used before cpu_init() done.  BSP
         * allocates "NR_CPUS" pages for all CPUs to avoid that AP calls
         * get_zeroed_page().
         */
-        if (first_time) {
+        for (cpu = 0; cpu < NR_CPUS; cpu++) {
-                void *cpu0_data = __cpu0_per_cpu;
+                void *src = cpu == 0 ? cpu0_data : __phys_per_cpu_start;
-                first_time=0;
+                memcpy(cpu_data, src, __per_cpu_end - __per_cpu_start);
+                __per_cpu_offset[cpu] = (char *)cpu_data - __per_cpu_start;
+                per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
-                __per_cpu_offset[0] = (char *) cpu0_data - __per_cpu_start;
+                /*
-                per_cpu(local_per_cpu_offset, 0) = __per_cpu_offset[0];
+                 * percpu area for cpu0 is moved from the __init area
+                 * which is setup by head.S and used till this point.
+                 * Update ar.k3.  This move is ensures that percpu
+                 * area for cpu0 is on the correct node and its
+                 * virtual address isn't insanely far from other
+                 * percpu areas which is important for congruent
+                 * percpu allocator.
+                 */
+                if (cpu == 0)
+                        ia64_set_kr(IA64_KR_PER_CPU_DATA, __pa(cpu_data) -
+                                    (unsigned long)__per_cpu_start);
-                for (cpu = 1; cpu < NR_CPUS; cpu++) {
+                cpu_data += PERCPU_PAGE_SIZE;
-                        memcpy(cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start);
-                        __per_cpu_offset[cpu] = (char *) cpu_data - __per_cpu_start;
-                        cpu_data += PERCPU_PAGE_SIZE;
-                        per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
-                }
        }
+skip:
        return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
 }
 static inline void
 alloc_per_cpu_data(void)
 {
-        cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS-1,
+        cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS,
                                   PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
 }
 #else
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 9f24b3c6dc71..200282b92981 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -143,17 +143,30 @@ static void *per_cpu_node_setup(void *cpu_data, int node)
        int cpu;
        for_each_possible_early_cpu(cpu) {
-                if (cpu == 0) {
+                void *src = cpu == 0 ? __cpu0_per_cpu : __phys_per_cpu_start;
-                        void *cpu0_data = __cpu0_per_cpu;
-                        __per_cpu_offset[cpu] = (char*)cpu0_data -
+                if (node != node_cpuid[cpu].nid)
-                                __per_cpu_start;
+                        continue;
-                } else if (node == node_cpuid[cpu].nid) {
-                        memcpy(__va(cpu_data), __phys_per_cpu_start,
+                memcpy(__va(cpu_data), src, __per_cpu_end - __per_cpu_start);
-                               __per_cpu_end - __per_cpu_start);
+                __per_cpu_offset[cpu] = (char *)__va(cpu_data) -
-                        __per_cpu_offset[cpu] = (char*)__va(cpu_data) -
+                        __per_cpu_start;
-                                __per_cpu_start;
-                        cpu_data += PERCPU_PAGE_SIZE;
+                /*
-                }
+                 * percpu area for cpu0 is moved from the __init area
+                 * which is setup by head.S and used till this point.
+                 * Update ar.k3.  This move is ensures that percpu
+                 * area for cpu0 is on the correct node and its
+                 * virtual address isn't insanely far from other
+                 * percpu areas which is important for congruent
+                 * percpu allocator.
+                 */
+                if (cpu == 0)
+                        ia64_set_kr(IA64_KR_PER_CPU_DATA,
+                                    (unsigned long)cpu_data -
+                                    (unsigned long)__per_cpu_start);
+                cpu_data += PERCPU_PAGE_SIZE;
        }
 #endif
        return cpu_data;
author	Tejun Heo <tj@kernel.org>	2009-10-02 00:28:56 -0400
committer	Tejun Heo <tj@kernel.org>	2009-10-02 00:28:56 -0400
commit	36886478f59ec0fdc24a8877c572b92f8d416aba (patch)
tree	eda76efcb0f05011531e3d83a524cf80d0c67cef /arch/ia64
parent	12cda817779ce5381a9a4ba8d464abe17c50a9e2 (diff)

diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 0a0c77b2c988..1295ba327f6f 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -166,6 +166,12 @@ SECTIONS
166	}	166	}
167	#endif	167	#endif
168		168
		169	#ifdef CONFIG_SMP
		170	. = ALIGN(PERCPU_PAGE_SIZE);
		171	__cpu0_per_cpu = .;
		172	. = . + PERCPU_PAGE_SIZE; /* cpu0 per-cpu space */
		173	#endif
		174
169	. = ALIGN(PAGE_SIZE);	175	. = ALIGN(PAGE_SIZE);
170	__init_end = .;	176	__init_end = .;
171		177
@@ -198,11 +204,6 @@ SECTIONS
198	data : { } :data	204	data : { } :data
199	.data : AT(ADDR(.data) - LOAD_OFFSET)	205	.data : AT(ADDR(.data) - LOAD_OFFSET)
200	{	206	{
201	#ifdef CONFIG_SMP
202	. = ALIGN(PERCPU_PAGE_SIZE);
203	__cpu0_per_cpu = .;
204	. = . + PERCPU_PAGE_SIZE; /* cpu0 per-cpu space */
205	#endif
206	INIT_TASK_DATA(PAGE_SIZE)	207	INIT_TASK_DATA(PAGE_SIZE)
207	CACHELINE_ALIGNED_DATA(SMP_CACHE_BYTES)	208	CACHELINE_ALIGNED_DATA(SMP_CACHE_BYTES)
208	READ_MOSTLY_DATA(SMP_CACHE_BYTES)	209	READ_MOSTLY_DATA(SMP_CACHE_BYTES)


diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index 1341437c1b26..351da0a06cd0 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c
@@ -154,36 +154,49 @@ static void *cpu_data;
154	void * __cpuinit	154	void * __cpuinit
155	per_cpu_init (void)	155	per_cpu_init (void)
156	{	156	{
157	int cpu;	157	static bool first_time = true;
158	static int first_time=1;	158	void *cpu0_data = __cpu0_per_cpu;
		159	unsigned int cpu;
		160
		161	if (!first_time)
		162	goto skip;
		163	first_time = false;
159		164
160	/*	165	/*
161	* get_free_pages() cannot be used before cpu_init() done. BSP	166	* get_free_pages() cannot be used before cpu_init() done. BSP
162	* allocates "NR_CPUS" pages for all CPUs to avoid that AP calls	167	* allocates "NR_CPUS" pages for all CPUs to avoid that AP calls
163	* get_zeroed_page().	168	* get_zeroed_page().
164	*/	169	*/
165	if (first_time) {	170	for (cpu = 0; cpu < NR_CPUS; cpu++) {
166	void *cpu0_data = __cpu0_per_cpu;	171	void *src = cpu == 0 ? cpu0_data : __phys_per_cpu_start;
167		172
168	first_time=0;	173	memcpy(cpu_data, src, __per_cpu_end - __per_cpu_start);
		174	__per_cpu_offset[cpu] = (char *)cpu_data - __per_cpu_start;
		175	per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
169		176
170	__per_cpu_offset[0] = (char *) cpu0_data - __per_cpu_start;	177	/*
171	per_cpu(local_per_cpu_offset, 0) = __per_cpu_offset[0];	178	* percpu area for cpu0 is moved from the __init area
		179	* which is setup by head.S and used till this point.
		180	* Update ar.k3. This move is ensures that percpu
		181	* area for cpu0 is on the correct node and its
		182	* virtual address isn't insanely far from other
		183	* percpu areas which is important for congruent
		184	* percpu allocator.
		185	*/
		186	if (cpu == 0)
		187	ia64_set_kr(IA64_KR_PER_CPU_DATA, __pa(cpu_data) -
		188	(unsigned long)__per_cpu_start);
172		189
173	for (cpu = 1; cpu < NR_CPUS; cpu++) {	190	cpu_data += PERCPU_PAGE_SIZE;
174	memcpy(cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start);
175	__per_cpu_offset[cpu] = (char *) cpu_data - __per_cpu_start;
176	cpu_data += PERCPU_PAGE_SIZE;
177	per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
178	}
179	}	191	}
		192	skip:
180	return __per_cpu_start + __per_cpu_offset[smp_processor_id()];	193	return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
181	}	194	}
182		195
183	static inline void	196	static inline void
184	alloc_per_cpu_data(void)	197	alloc_per_cpu_data(void)
185	{	198	{
186	cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS-1,	199	cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS,
187	PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));	200	PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
188	}	201	}
189	#else	202	#else


diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 9f24b3c6dc71..200282b92981 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c
@@ -143,17 +143,30 @@ static void per_cpu_node_setup(void cpu_data, int node)
143	int cpu;	143	int cpu;
144		144
145	for_each_possible_early_cpu(cpu) {	145	for_each_possible_early_cpu(cpu) {
146	if (cpu == 0) {	146	void *src = cpu == 0 ? __cpu0_per_cpu : __phys_per_cpu_start;
147	void *cpu0_data = __cpu0_per_cpu;	147
148	__per_cpu_offset[cpu] = (char*)cpu0_data -	148	if (node != node_cpuid[cpu].nid)
149	__per_cpu_start;	149	continue;
150	} else if (node == node_cpuid[cpu].nid) {	150
151	memcpy(__va(cpu_data), __phys_per_cpu_start,	151	memcpy(__va(cpu_data), src, __per_cpu_end - __per_cpu_start);
152	__per_cpu_end - __per_cpu_start);	152	__per_cpu_offset[cpu] = (char *)__va(cpu_data) -
153	__per_cpu_offset[cpu] = (char*)__va(cpu_data) -	153	__per_cpu_start;
154	__per_cpu_start;	154
155	cpu_data += PERCPU_PAGE_SIZE;	155	/*
156	}	156	* percpu area for cpu0 is moved from the __init area
		157	* which is setup by head.S and used till this point.
		158	* Update ar.k3. This move is ensures that percpu
		159	* area for cpu0 is on the correct node and its
		160	* virtual address isn't insanely far from other
		161	* percpu areas which is important for congruent
		162	* percpu allocator.
		163	*/
		164	if (cpu == 0)
		165	ia64_set_kr(IA64_KR_PER_CPU_DATA,
		166	(unsigned long)cpu_data -
		167	(unsigned long)__per_cpu_start);
		168
		169	cpu_data += PERCPU_PAGE_SIZE;
157	}	170	}
158	#endif	171	#endif
159	return cpu_data;	172	return cpu_data;