diff options
author | Tejun Heo <tj@kernel.org> | 2009-07-03 19:10:59 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2009-07-03 19:10:59 -0400 |
commit | 8c4bfc6e8801616ab2e01c38140b2159b388d2ff (patch) | |
tree | e29e8bbfae362362554b870371a6187b41f92d82 /arch/x86/kernel/setup_percpu.c | |
parent | 8f05a6a65d944f2fed4eb384fb58aa8c8e5a9bab (diff) |
x86,percpu: generalize lpage first chunk allocator
Generalize and move x86 setup_pcpu_lpage() into
pcpu_lpage_first_chunk(). setup_pcpu_lpage() now is a simple wrapper
around the generalized version. Other than taking size parameters and
using arch supplied callbacks to allocate/free/map memory,
pcpu_lpage_first_chunk() is identical to the original implementation.
This simplifies arch code and will help converting more archs to
dynamic percpu allocator.
While at it, factor out pcpu_calc_fc_sizes() which is common to
pcpu_embed_first_chunk() and pcpu_lpage_first_chunk().
[ Impact: code reorganization and generalization ]
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/setup_percpu.c')
-rw-r--r-- | arch/x86/kernel/setup_percpu.c | 169 |
1 files changed, 11 insertions, 158 deletions
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index ab896b31e80b..4f2e0ac9130b 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -137,44 +137,21 @@ static void __init pcpu_fc_free(void *ptr, size_t size) | |||
137 | } | 137 | } |
138 | 138 | ||
139 | /* | 139 | /* |
140 | * Large page remap allocator | 140 | * Large page remapping allocator |
141 | * | ||
142 | * This allocator uses PMD page as unit. A PMD page is allocated for | ||
143 | * each cpu and each is remapped into vmalloc area using PMD mapping. | ||
144 | * As PMD page is quite large, only part of it is used for the first | ||
145 | * chunk. Unused part is returned to the bootmem allocator. | ||
146 | * | ||
147 | * So, the PMD pages are mapped twice - once to the physical mapping | ||
148 | * and to the vmalloc area for the first percpu chunk. The double | ||
149 | * mapping does add one more PMD TLB entry pressure but still is much | ||
150 | * better than only using 4k mappings while still being NUMA friendly. | ||
151 | */ | 141 | */ |
152 | #ifdef CONFIG_NEED_MULTIPLE_NODES | 142 | #ifdef CONFIG_NEED_MULTIPLE_NODES |
153 | struct pcpul_ent { | 143 | static void __init pcpul_map(void *ptr, size_t size, void *addr) |
154 | unsigned int cpu; | ||
155 | void *ptr; | ||
156 | }; | ||
157 | |||
158 | static size_t pcpul_size; | ||
159 | static struct pcpul_ent *pcpul_map; | ||
160 | static struct vm_struct pcpul_vm; | ||
161 | |||
162 | static struct page * __init pcpul_get_page(unsigned int cpu, int pageno) | ||
163 | { | 144 | { |
164 | size_t off = (size_t)pageno << PAGE_SHIFT; | 145 | pmd_t *pmd, pmd_v; |
165 | 146 | ||
166 | if (off >= pcpul_size) | 147 | pmd = populate_extra_pmd((unsigned long)addr); |
167 | return NULL; | 148 | pmd_v = pfn_pmd(page_to_pfn(virt_to_page(ptr)), PAGE_KERNEL_LARGE); |
168 | 149 | set_pmd(pmd, pmd_v); | |
169 | return virt_to_page(pcpul_map[cpu].ptr + off); | ||
170 | } | 150 | } |
171 | 151 | ||
172 | static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) | 152 | static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) |
173 | { | 153 | { |
174 | size_t map_size, dyn_size; | 154 | size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; |
175 | unsigned int cpu; | ||
176 | int i, j; | ||
177 | ssize_t ret; | ||
178 | 155 | ||
179 | if (!chosen) { | 156 | if (!chosen) { |
180 | size_t vm_size = VMALLOC_END - VMALLOC_START; | 157 | size_t vm_size = VMALLOC_END - VMALLOC_START; |
@@ -198,134 +175,10 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) | |||
198 | return -EINVAL; | 175 | return -EINVAL; |
199 | } | 176 | } |
200 | 177 | ||
201 | /* | 178 | return pcpu_lpage_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, |
202 | * Currently supports only single page. Supporting multiple | 179 | reserve - PERCPU_FIRST_CHUNK_RESERVE, |
203 | * pages won't be too difficult if it ever becomes necessary. | 180 | PMD_SIZE, |
204 | */ | 181 | pcpu_fc_alloc, pcpu_fc_free, pcpul_map); |
205 | pcpul_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + | ||
206 | PERCPU_DYNAMIC_RESERVE); | ||
207 | if (pcpul_size > PMD_SIZE) { | ||
208 | pr_warning("PERCPU: static data is larger than large page, " | ||
209 | "can't use large page\n"); | ||
210 | return -EINVAL; | ||
211 | } | ||
212 | dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; | ||
213 | |||
214 | /* allocate pointer array and alloc large pages */ | ||
215 | map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0])); | ||
216 | pcpul_map = alloc_bootmem(map_size); | ||
217 | |||
218 | for_each_possible_cpu(cpu) { | ||
219 | pcpul_map[cpu].cpu = cpu; | ||
220 | pcpul_map[cpu].ptr = pcpu_alloc_bootmem(cpu, PMD_SIZE, | ||
221 | PMD_SIZE); | ||
222 | if (!pcpul_map[cpu].ptr) { | ||
223 | pr_warning("PERCPU: failed to allocate large page " | ||
224 | "for cpu%u\n", cpu); | ||
225 | goto enomem; | ||
226 | } | ||
227 | |||
228 | /* | ||
229 | * Only use pcpul_size bytes and give back the rest. | ||
230 | * | ||
231 | * Ingo: The 2MB up-rounding bootmem is needed to make | ||
232 | * sure the partial 2MB page is still fully RAM - it's | ||
233 | * not well-specified to have a PAT-incompatible area | ||
234 | * (unmapped RAM, device memory, etc.) in that hole. | ||
235 | */ | ||
236 | free_bootmem(__pa(pcpul_map[cpu].ptr + pcpul_size), | ||
237 | PMD_SIZE - pcpul_size); | ||
238 | |||
239 | memcpy(pcpul_map[cpu].ptr, __per_cpu_load, static_size); | ||
240 | } | ||
241 | |||
242 | /* allocate address and map */ | ||
243 | pcpul_vm.flags = VM_ALLOC; | ||
244 | pcpul_vm.size = num_possible_cpus() * PMD_SIZE; | ||
245 | vm_area_register_early(&pcpul_vm, PMD_SIZE); | ||
246 | |||
247 | for_each_possible_cpu(cpu) { | ||
248 | pmd_t *pmd, pmd_v; | ||
249 | |||
250 | pmd = populate_extra_pmd((unsigned long)pcpul_vm.addr + | ||
251 | cpu * PMD_SIZE); | ||
252 | pmd_v = pfn_pmd(page_to_pfn(virt_to_page(pcpul_map[cpu].ptr)), | ||
253 | PAGE_KERNEL_LARGE); | ||
254 | set_pmd(pmd, pmd_v); | ||
255 | } | ||
256 | |||
257 | /* we're ready, commit */ | ||
258 | pr_info("PERCPU: Remapped at %p with large pages, static data " | ||
259 | "%zu bytes\n", pcpul_vm.addr, static_size); | ||
260 | |||
261 | ret = pcpu_setup_first_chunk(pcpul_get_page, static_size, | ||
262 | PERCPU_FIRST_CHUNK_RESERVE, dyn_size, | ||
263 | PMD_SIZE, pcpul_vm.addr, NULL); | ||
264 | |||
265 | /* sort pcpul_map array for pcpu_lpage_remapped() */ | ||
266 | for (i = 0; i < num_possible_cpus() - 1; i++) | ||
267 | for (j = i + 1; j < num_possible_cpus(); j++) | ||
268 | if (pcpul_map[i].ptr > pcpul_map[j].ptr) { | ||
269 | struct pcpul_ent tmp = pcpul_map[i]; | ||
270 | pcpul_map[i] = pcpul_map[j]; | ||
271 | pcpul_map[j] = tmp; | ||
272 | } | ||
273 | |||
274 | return ret; | ||
275 | |||
276 | enomem: | ||
277 | for_each_possible_cpu(cpu) | ||
278 | if (pcpul_map[cpu].ptr) | ||
279 | free_bootmem(__pa(pcpul_map[cpu].ptr), pcpul_size); | ||
280 | free_bootmem(__pa(pcpul_map), map_size); | ||
281 | return -ENOMEM; | ||
282 | } | ||
283 | |||
284 | /** | ||
285 | * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area | ||
286 | * @kaddr: the kernel address in question | ||
287 | * | ||
288 | * Determine whether @kaddr falls in the pcpul recycled area. This is | ||
289 | * used by pageattr to detect VM aliases and break up the pcpu PMD | ||
290 | * mapping such that the same physical page is not mapped under | ||
291 | * different attributes. | ||
292 | * | ||
293 | * The recycled area is always at the tail of a partially used PMD | ||
294 | * page. | ||
295 | * | ||
296 | * RETURNS: | ||
297 | * Address of corresponding remapped pcpu address if match is found; | ||
298 | * otherwise, NULL. | ||
299 | */ | ||
300 | void *pcpu_lpage_remapped(void *kaddr) | ||
301 | { | ||
302 | void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK); | ||
303 | unsigned long offset = (unsigned long)kaddr & ~PMD_MASK; | ||
304 | int left = 0, right = num_possible_cpus() - 1; | ||
305 | int pos; | ||
306 | |||
307 | /* pcpul in use at all? */ | ||
308 | if (!pcpul_map) | ||
309 | return NULL; | ||
310 | |||
311 | /* okay, perform binary search */ | ||
312 | while (left <= right) { | ||
313 | pos = (left + right) / 2; | ||
314 | |||
315 | if (pcpul_map[pos].ptr < pmd_addr) | ||
316 | left = pos + 1; | ||
317 | else if (pcpul_map[pos].ptr > pmd_addr) | ||
318 | right = pos - 1; | ||
319 | else { | ||
320 | /* it shouldn't be in the area for the first chunk */ | ||
321 | WARN_ON(offset < pcpul_size); | ||
322 | |||
323 | return pcpul_vm.addr + | ||
324 | pcpul_map[pos].cpu * PMD_SIZE + offset; | ||
325 | } | ||
326 | } | ||
327 | |||
328 | return NULL; | ||
329 | } | 182 | } |
330 | #else | 183 | #else |
331 | static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) | 184 | static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) |