aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/setup_percpu.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2009-06-21 22:56:24 -0400
committerTejun Heo <tj@kernel.org>2009-06-21 22:56:24 -0400
commite59a1bb2fdfb745c685f5b40ffbed126331d3223 (patch)
treef2916a898b56f70554160f33be2252d15347fe8b /arch/x86/kernel/setup_percpu.c
parent992f4c1c2c1583cef3296ec4bf5205843a9a5f3d (diff)
x86: fix pageattr handling for lpage percpu allocator and re-enable it
lpage allocator aliases a PMD page for each cpu and returns whatever is unused to the page allocator. When the pageattr of the recycled pages are changed, this makes the two aliases point to the overlapping regions with different attributes which isn't allowed and known to cause subtle data corruption in certain cases. This can be handled in simliar manner to the x86_64 highmap alias. pageattr code should detect if the target pages have PMD alias and split the PMD alias and synchronize the attributes. pcpur allocator is updated to keep the allocated PMD pages map sorted in ascending address order and provide pcpu_lpage_remapped() function which binary searches the array to determine whether the given address is aliased and if so to which address. pageattr is updated to use pcpu_lpage_remapped() to detect the PMD alias and split it up as necessary from cpa_process_alias(). Jan Beulich spotted the original problem and incorrect usage of vaddr instead of laddr for lookup. With this, lpage percpu allocator should work correctly. Re-enable it. [ Impact: fix subtle lpage pageattr bug and re-enable lpage ] Signed-off-by: Tejun Heo <tj@kernel.org> Reported-by: Jan Beulich <JBeulich@novell.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/setup_percpu.c')
-rw-r--r--arch/x86/kernel/setup_percpu.c72
1 files changed, 63 insertions, 9 deletions
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 7d38941e2b8c..bad2fd223114 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -142,8 +142,8 @@ struct pcpul_ent {
142 void *ptr; 142 void *ptr;
143}; 143};
144 144
145static size_t pcpul_size __initdata; 145static size_t pcpul_size;
146static struct pcpul_ent *pcpul_map __initdata; 146static struct pcpul_ent *pcpul_map;
147static struct vm_struct pcpul_vm; 147static struct vm_struct pcpul_vm;
148 148
149static struct page * __init pcpul_get_page(unsigned int cpu, int pageno) 149static struct page * __init pcpul_get_page(unsigned int cpu, int pageno)
@@ -160,15 +160,14 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size)
160{ 160{
161 size_t map_size, dyn_size; 161 size_t map_size, dyn_size;
162 unsigned int cpu; 162 unsigned int cpu;
163 int i, j;
163 ssize_t ret; 164 ssize_t ret;
164 165
165 /* 166 /*
166 * If large page isn't supported, there's no benefit in doing 167 * If large page isn't supported, there's no benefit in doing
167 * this. Also, on non-NUMA, embedding is better. 168 * this. Also, on non-NUMA, embedding is better.
168 *
169 * NOTE: disabled for now.
170 */ 169 */
171 if (true || !cpu_has_pse || !pcpu_need_numa()) 170 if (!cpu_has_pse || !pcpu_need_numa())
172 return -EINVAL; 171 return -EINVAL;
173 172
174 /* 173 /*
@@ -231,16 +230,71 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size)
231 ret = pcpu_setup_first_chunk(pcpul_get_page, static_size, 230 ret = pcpu_setup_first_chunk(pcpul_get_page, static_size,
232 PERCPU_FIRST_CHUNK_RESERVE, dyn_size, 231 PERCPU_FIRST_CHUNK_RESERVE, dyn_size,
233 PMD_SIZE, pcpul_vm.addr, NULL); 232 PMD_SIZE, pcpul_vm.addr, NULL);
234 goto out_free_map; 233
234 /* sort pcpul_map array for pcpu_lpage_remapped() */
235 for (i = 0; i < num_possible_cpus() - 1; i++)
236 for (j = i + 1; j < num_possible_cpus(); j++)
237 if (pcpul_map[i].ptr > pcpul_map[j].ptr) {
238 struct pcpul_ent tmp = pcpul_map[i];
239 pcpul_map[i] = pcpul_map[j];
240 pcpul_map[j] = tmp;
241 }
242
243 return ret;
235 244
236enomem: 245enomem:
237 for_each_possible_cpu(cpu) 246 for_each_possible_cpu(cpu)
238 if (pcpul_map[cpu].ptr) 247 if (pcpul_map[cpu].ptr)
239 free_bootmem(__pa(pcpul_map[cpu].ptr), pcpul_size); 248 free_bootmem(__pa(pcpul_map[cpu].ptr), pcpul_size);
240 ret = -ENOMEM;
241out_free_map:
242 free_bootmem(__pa(pcpul_map), map_size); 249 free_bootmem(__pa(pcpul_map), map_size);
243 return ret; 250 return -ENOMEM;
251}
252
253/**
254 * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area
255 * @kaddr: the kernel address in question
256 *
257 * Determine whether @kaddr falls in the pcpul recycled area. This is
258 * used by pageattr to detect VM aliases and break up the pcpu PMD
259 * mapping such that the same physical page is not mapped under
260 * different attributes.
261 *
262 * The recycled area is always at the tail of a partially used PMD
263 * page.
264 *
265 * RETURNS:
266 * Address of corresponding remapped pcpu address if match is found;
267 * otherwise, NULL.
268 */
269void *pcpu_lpage_remapped(void *kaddr)
270{
271 void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK);
272 unsigned long offset = (unsigned long)kaddr & ~PMD_MASK;
273 int left = 0, right = num_possible_cpus() - 1;
274 int pos;
275
276 /* pcpul in use at all? */
277 if (!pcpul_map)
278 return NULL;
279
280 /* okay, perform binary search */
281 while (left <= right) {
282 pos = (left + right) / 2;
283
284 if (pcpul_map[pos].ptr < pmd_addr)
285 left = pos + 1;
286 else if (pcpul_map[pos].ptr > pmd_addr)
287 right = pos - 1;
288 else {
289 /* it shouldn't be in the area for the first chunk */
290 WARN_ON(offset < pcpul_size);
291
292 return pcpul_vm.addr +
293 pcpul_map[pos].cpu * PMD_SIZE + offset;
294 }
295 }
296
297 return NULL;
244} 298}
245#else 299#else
246static ssize_t __init setup_pcpu_lpage(size_t static_size) 300static ssize_t __init setup_pcpu_lpage(size_t static_size)