diff options
author | Tejun Heo <tj@kernel.org> | 2010-04-09 05:57:01 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2010-05-01 02:30:50 -0400 |
commit | 9f6455325618821dcf6775d7972881fde32e77c5 (patch) | |
tree | 6031e6f28aaaa3bf8d8e08dd59031d94c19fa89e | |
parent | 88999a898b565960690f18e4a13a1e8a9fa4dfef (diff) |
percpu: move vmalloc based chunk management into percpu-vm.c
Separate out and move chunk management (creation/desctruction and
[de]population) code into percpu-vm.c which is included by percpu.c
and compiled together. The interface for chunk management is defined
as follows.
* pcpu_populate_chunk - populate the specified range of a chunk
* pcpu_depopulate_chunk - depopulate the specified range of a chunk
* pcpu_create_chunk - create a new chunk
* pcpu_destroy_chunk - destroy a chunk, always preceded by full depop
* pcpu_addr_to_page - translate address to physical address
* pcpu_verify_alloc_info - check alloc_info is acceptable during init
Other than wrapping vmalloc_to_page() inside pcpu_addr_to_page() and
dummy pcpu_verify_alloc_info() implementation, this patch only moves
code around. This separation is to allow alternate chunk management
implementation.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: David Howells <dhowells@redhat.com>
Cc: Graff Yang <graff.yang@gmail.com>
Cc: Sonic Zhang <sonic.adi@gmail.com>
-rw-r--r-- | mm/percpu-vm.c | 451 | ||||
-rw-r--r-- | mm/percpu.c | 452 |
2 files changed, 475 insertions, 428 deletions
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c new file mode 100644 index 000000000000..7d9c1d0ebd3f --- /dev/null +++ b/mm/percpu-vm.c | |||
@@ -0,0 +1,451 @@ | |||
1 | /* | ||
2 | * mm/percpu-vm.c - vmalloc area based chunk allocation | ||
3 | * | ||
4 | * Copyright (C) 2010 SUSE Linux Products GmbH | ||
5 | * Copyright (C) 2010 Tejun Heo <tj@kernel.org> | ||
6 | * | ||
7 | * This file is released under the GPLv2. | ||
8 | * | ||
9 | * Chunks are mapped into vmalloc areas and populated page by page. | ||
10 | * This is the default chunk allocator. | ||
11 | */ | ||
12 | |||
13 | static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk, | ||
14 | unsigned int cpu, int page_idx) | ||
15 | { | ||
16 | /* must not be used on pre-mapped chunk */ | ||
17 | WARN_ON(chunk->immutable); | ||
18 | |||
19 | return vmalloc_to_page((void *)pcpu_chunk_addr(chunk, cpu, page_idx)); | ||
20 | } | ||
21 | |||
22 | /** | ||
23 | * pcpu_get_pages_and_bitmap - get temp pages array and bitmap | ||
24 | * @chunk: chunk of interest | ||
25 | * @bitmapp: output parameter for bitmap | ||
26 | * @may_alloc: may allocate the array | ||
27 | * | ||
28 | * Returns pointer to array of pointers to struct page and bitmap, | ||
29 | * both of which can be indexed with pcpu_page_idx(). The returned | ||
30 | * array is cleared to zero and *@bitmapp is copied from | ||
31 | * @chunk->populated. Note that there is only one array and bitmap | ||
32 | * and access exclusion is the caller's responsibility. | ||
33 | * | ||
34 | * CONTEXT: | ||
35 | * pcpu_alloc_mutex and does GFP_KERNEL allocation if @may_alloc. | ||
36 | * Otherwise, don't care. | ||
37 | * | ||
38 | * RETURNS: | ||
39 | * Pointer to temp pages array on success, NULL on failure. | ||
40 | */ | ||
41 | static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk, | ||
42 | unsigned long **bitmapp, | ||
43 | bool may_alloc) | ||
44 | { | ||
45 | static struct page **pages; | ||
46 | static unsigned long *bitmap; | ||
47 | size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]); | ||
48 | size_t bitmap_size = BITS_TO_LONGS(pcpu_unit_pages) * | ||
49 | sizeof(unsigned long); | ||
50 | |||
51 | if (!pages || !bitmap) { | ||
52 | if (may_alloc && !pages) | ||
53 | pages = pcpu_mem_alloc(pages_size); | ||
54 | if (may_alloc && !bitmap) | ||
55 | bitmap = pcpu_mem_alloc(bitmap_size); | ||
56 | if (!pages || !bitmap) | ||
57 | return NULL; | ||
58 | } | ||
59 | |||
60 | memset(pages, 0, pages_size); | ||
61 | bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages); | ||
62 | |||
63 | *bitmapp = bitmap; | ||
64 | return pages; | ||
65 | } | ||
66 | |||
67 | /** | ||
68 | * pcpu_free_pages - free pages which were allocated for @chunk | ||
69 | * @chunk: chunk pages were allocated for | ||
70 | * @pages: array of pages to be freed, indexed by pcpu_page_idx() | ||
71 | * @populated: populated bitmap | ||
72 | * @page_start: page index of the first page to be freed | ||
73 | * @page_end: page index of the last page to be freed + 1 | ||
74 | * | ||
75 | * Free pages [@page_start and @page_end) in @pages for all units. | ||
76 | * The pages were allocated for @chunk. | ||
77 | */ | ||
78 | static void pcpu_free_pages(struct pcpu_chunk *chunk, | ||
79 | struct page **pages, unsigned long *populated, | ||
80 | int page_start, int page_end) | ||
81 | { | ||
82 | unsigned int cpu; | ||
83 | int i; | ||
84 | |||
85 | for_each_possible_cpu(cpu) { | ||
86 | for (i = page_start; i < page_end; i++) { | ||
87 | struct page *page = pages[pcpu_page_idx(cpu, i)]; | ||
88 | |||
89 | if (page) | ||
90 | __free_page(page); | ||
91 | } | ||
92 | } | ||
93 | } | ||
94 | |||
95 | /** | ||
96 | * pcpu_alloc_pages - allocates pages for @chunk | ||
97 | * @chunk: target chunk | ||
98 | * @pages: array to put the allocated pages into, indexed by pcpu_page_idx() | ||
99 | * @populated: populated bitmap | ||
100 | * @page_start: page index of the first page to be allocated | ||
101 | * @page_end: page index of the last page to be allocated + 1 | ||
102 | * | ||
103 | * Allocate pages [@page_start,@page_end) into @pages for all units. | ||
104 | * The allocation is for @chunk. Percpu core doesn't care about the | ||
105 | * content of @pages and will pass it verbatim to pcpu_map_pages(). | ||
106 | */ | ||
107 | static int pcpu_alloc_pages(struct pcpu_chunk *chunk, | ||
108 | struct page **pages, unsigned long *populated, | ||
109 | int page_start, int page_end) | ||
110 | { | ||
111 | const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD; | ||
112 | unsigned int cpu; | ||
113 | int i; | ||
114 | |||
115 | for_each_possible_cpu(cpu) { | ||
116 | for (i = page_start; i < page_end; i++) { | ||
117 | struct page **pagep = &pages[pcpu_page_idx(cpu, i)]; | ||
118 | |||
119 | *pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0); | ||
120 | if (!*pagep) { | ||
121 | pcpu_free_pages(chunk, pages, populated, | ||
122 | page_start, page_end); | ||
123 | return -ENOMEM; | ||
124 | } | ||
125 | } | ||
126 | } | ||
127 | return 0; | ||
128 | } | ||
129 | |||
130 | /** | ||
131 | * pcpu_pre_unmap_flush - flush cache prior to unmapping | ||
132 | * @chunk: chunk the regions to be flushed belongs to | ||
133 | * @page_start: page index of the first page to be flushed | ||
134 | * @page_end: page index of the last page to be flushed + 1 | ||
135 | * | ||
136 | * Pages in [@page_start,@page_end) of @chunk are about to be | ||
137 | * unmapped. Flush cache. As each flushing trial can be very | ||
138 | * expensive, issue flush on the whole region at once rather than | ||
139 | * doing it for each cpu. This could be an overkill but is more | ||
140 | * scalable. | ||
141 | */ | ||
142 | static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk, | ||
143 | int page_start, int page_end) | ||
144 | { | ||
145 | flush_cache_vunmap( | ||
146 | pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), | ||
147 | pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); | ||
148 | } | ||
149 | |||
150 | static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) | ||
151 | { | ||
152 | unmap_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT); | ||
153 | } | ||
154 | |||
155 | /** | ||
156 | * pcpu_unmap_pages - unmap pages out of a pcpu_chunk | ||
157 | * @chunk: chunk of interest | ||
158 | * @pages: pages array which can be used to pass information to free | ||
159 | * @populated: populated bitmap | ||
160 | * @page_start: page index of the first page to unmap | ||
161 | * @page_end: page index of the last page to unmap + 1 | ||
162 | * | ||
163 | * For each cpu, unmap pages [@page_start,@page_end) out of @chunk. | ||
164 | * Corresponding elements in @pages were cleared by the caller and can | ||
165 | * be used to carry information to pcpu_free_pages() which will be | ||
166 | * called after all unmaps are finished. The caller should call | ||
167 | * proper pre/post flush functions. | ||
168 | */ | ||
169 | static void pcpu_unmap_pages(struct pcpu_chunk *chunk, | ||
170 | struct page **pages, unsigned long *populated, | ||
171 | int page_start, int page_end) | ||
172 | { | ||
173 | unsigned int cpu; | ||
174 | int i; | ||
175 | |||
176 | for_each_possible_cpu(cpu) { | ||
177 | for (i = page_start; i < page_end; i++) { | ||
178 | struct page *page; | ||
179 | |||
180 | page = pcpu_chunk_page(chunk, cpu, i); | ||
181 | WARN_ON(!page); | ||
182 | pages[pcpu_page_idx(cpu, i)] = page; | ||
183 | } | ||
184 | __pcpu_unmap_pages(pcpu_chunk_addr(chunk, cpu, page_start), | ||
185 | page_end - page_start); | ||
186 | } | ||
187 | |||
188 | for (i = page_start; i < page_end; i++) | ||
189 | __clear_bit(i, populated); | ||
190 | } | ||
191 | |||
192 | /** | ||
193 | * pcpu_post_unmap_tlb_flush - flush TLB after unmapping | ||
194 | * @chunk: pcpu_chunk the regions to be flushed belong to | ||
195 | * @page_start: page index of the first page to be flushed | ||
196 | * @page_end: page index of the last page to be flushed + 1 | ||
197 | * | ||
198 | * Pages [@page_start,@page_end) of @chunk have been unmapped. Flush | ||
199 | * TLB for the regions. This can be skipped if the area is to be | ||
200 | * returned to vmalloc as vmalloc will handle TLB flushing lazily. | ||
201 | * | ||
202 | * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once | ||
203 | * for the whole region. | ||
204 | */ | ||
205 | static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk, | ||
206 | int page_start, int page_end) | ||
207 | { | ||
208 | flush_tlb_kernel_range( | ||
209 | pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), | ||
210 | pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); | ||
211 | } | ||
212 | |||
213 | static int __pcpu_map_pages(unsigned long addr, struct page **pages, | ||
214 | int nr_pages) | ||
215 | { | ||
216 | return map_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT, | ||
217 | PAGE_KERNEL, pages); | ||
218 | } | ||
219 | |||
220 | /** | ||
221 | * pcpu_map_pages - map pages into a pcpu_chunk | ||
222 | * @chunk: chunk of interest | ||
223 | * @pages: pages array containing pages to be mapped | ||
224 | * @populated: populated bitmap | ||
225 | * @page_start: page index of the first page to map | ||
226 | * @page_end: page index of the last page to map + 1 | ||
227 | * | ||
228 | * For each cpu, map pages [@page_start,@page_end) into @chunk. The | ||
229 | * caller is responsible for calling pcpu_post_map_flush() after all | ||
230 | * mappings are complete. | ||
231 | * | ||
232 | * This function is responsible for setting corresponding bits in | ||
233 | * @chunk->populated bitmap and whatever is necessary for reverse | ||
234 | * lookup (addr -> chunk). | ||
235 | */ | ||
236 | static int pcpu_map_pages(struct pcpu_chunk *chunk, | ||
237 | struct page **pages, unsigned long *populated, | ||
238 | int page_start, int page_end) | ||
239 | { | ||
240 | unsigned int cpu, tcpu; | ||
241 | int i, err; | ||
242 | |||
243 | for_each_possible_cpu(cpu) { | ||
244 | err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start), | ||
245 | &pages[pcpu_page_idx(cpu, page_start)], | ||
246 | page_end - page_start); | ||
247 | if (err < 0) | ||
248 | goto err; | ||
249 | } | ||
250 | |||
251 | /* mapping successful, link chunk and mark populated */ | ||
252 | for (i = page_start; i < page_end; i++) { | ||
253 | for_each_possible_cpu(cpu) | ||
254 | pcpu_set_page_chunk(pages[pcpu_page_idx(cpu, i)], | ||
255 | chunk); | ||
256 | __set_bit(i, populated); | ||
257 | } | ||
258 | |||
259 | return 0; | ||
260 | |||
261 | err: | ||
262 | for_each_possible_cpu(tcpu) { | ||
263 | if (tcpu == cpu) | ||
264 | break; | ||
265 | __pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start), | ||
266 | page_end - page_start); | ||
267 | } | ||
268 | return err; | ||
269 | } | ||
270 | |||
271 | /** | ||
272 | * pcpu_post_map_flush - flush cache after mapping | ||
273 | * @chunk: pcpu_chunk the regions to be flushed belong to | ||
274 | * @page_start: page index of the first page to be flushed | ||
275 | * @page_end: page index of the last page to be flushed + 1 | ||
276 | * | ||
277 | * Pages [@page_start,@page_end) of @chunk have been mapped. Flush | ||
278 | * cache. | ||
279 | * | ||
280 | * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once | ||
281 | * for the whole region. | ||
282 | */ | ||
283 | static void pcpu_post_map_flush(struct pcpu_chunk *chunk, | ||
284 | int page_start, int page_end) | ||
285 | { | ||
286 | flush_cache_vmap( | ||
287 | pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), | ||
288 | pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); | ||
289 | } | ||
290 | |||
291 | /** | ||
292 | * pcpu_populate_chunk - populate and map an area of a pcpu_chunk | ||
293 | * @chunk: chunk of interest | ||
294 | * @off: offset to the area to populate | ||
295 | * @size: size of the area to populate in bytes | ||
296 | * | ||
297 | * For each cpu, populate and map pages [@page_start,@page_end) into | ||
298 | * @chunk. The area is cleared on return. | ||
299 | * | ||
300 | * CONTEXT: | ||
301 | * pcpu_alloc_mutex, does GFP_KERNEL allocation. | ||
302 | */ | ||
303 | static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) | ||
304 | { | ||
305 | int page_start = PFN_DOWN(off); | ||
306 | int page_end = PFN_UP(off + size); | ||
307 | int free_end = page_start, unmap_end = page_start; | ||
308 | struct page **pages; | ||
309 | unsigned long *populated; | ||
310 | unsigned int cpu; | ||
311 | int rs, re, rc; | ||
312 | |||
313 | /* quick path, check whether all pages are already there */ | ||
314 | rs = page_start; | ||
315 | pcpu_next_pop(chunk, &rs, &re, page_end); | ||
316 | if (rs == page_start && re == page_end) | ||
317 | goto clear; | ||
318 | |||
319 | /* need to allocate and map pages, this chunk can't be immutable */ | ||
320 | WARN_ON(chunk->immutable); | ||
321 | |||
322 | pages = pcpu_get_pages_and_bitmap(chunk, &populated, true); | ||
323 | if (!pages) | ||
324 | return -ENOMEM; | ||
325 | |||
326 | /* alloc and map */ | ||
327 | pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { | ||
328 | rc = pcpu_alloc_pages(chunk, pages, populated, rs, re); | ||
329 | if (rc) | ||
330 | goto err_free; | ||
331 | free_end = re; | ||
332 | } | ||
333 | |||
334 | pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { | ||
335 | rc = pcpu_map_pages(chunk, pages, populated, rs, re); | ||
336 | if (rc) | ||
337 | goto err_unmap; | ||
338 | unmap_end = re; | ||
339 | } | ||
340 | pcpu_post_map_flush(chunk, page_start, page_end); | ||
341 | |||
342 | /* commit new bitmap */ | ||
343 | bitmap_copy(chunk->populated, populated, pcpu_unit_pages); | ||
344 | clear: | ||
345 | for_each_possible_cpu(cpu) | ||
346 | memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size); | ||
347 | return 0; | ||
348 | |||
349 | err_unmap: | ||
350 | pcpu_pre_unmap_flush(chunk, page_start, unmap_end); | ||
351 | pcpu_for_each_unpop_region(chunk, rs, re, page_start, unmap_end) | ||
352 | pcpu_unmap_pages(chunk, pages, populated, rs, re); | ||
353 | pcpu_post_unmap_tlb_flush(chunk, page_start, unmap_end); | ||
354 | err_free: | ||
355 | pcpu_for_each_unpop_region(chunk, rs, re, page_start, free_end) | ||
356 | pcpu_free_pages(chunk, pages, populated, rs, re); | ||
357 | return rc; | ||
358 | } | ||
359 | |||
360 | /** | ||
361 | * pcpu_depopulate_chunk - depopulate and unmap an area of a pcpu_chunk | ||
362 | * @chunk: chunk to depopulate | ||
363 | * @off: offset to the area to depopulate | ||
364 | * @size: size of the area to depopulate in bytes | ||
365 | * @flush: whether to flush cache and tlb or not | ||
366 | * | ||
367 | * For each cpu, depopulate and unmap pages [@page_start,@page_end) | ||
368 | * from @chunk. If @flush is true, vcache is flushed before unmapping | ||
369 | * and tlb after. | ||
370 | * | ||
371 | * CONTEXT: | ||
372 | * pcpu_alloc_mutex. | ||
373 | */ | ||
374 | static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size) | ||
375 | { | ||
376 | int page_start = PFN_DOWN(off); | ||
377 | int page_end = PFN_UP(off + size); | ||
378 | struct page **pages; | ||
379 | unsigned long *populated; | ||
380 | int rs, re; | ||
381 | |||
382 | /* quick path, check whether it's empty already */ | ||
383 | rs = page_start; | ||
384 | pcpu_next_unpop(chunk, &rs, &re, page_end); | ||
385 | if (rs == page_start && re == page_end) | ||
386 | return; | ||
387 | |||
388 | /* immutable chunks can't be depopulated */ | ||
389 | WARN_ON(chunk->immutable); | ||
390 | |||
391 | /* | ||
392 | * If control reaches here, there must have been at least one | ||
393 | * successful population attempt so the temp pages array must | ||
394 | * be available now. | ||
395 | */ | ||
396 | pages = pcpu_get_pages_and_bitmap(chunk, &populated, false); | ||
397 | BUG_ON(!pages); | ||
398 | |||
399 | /* unmap and free */ | ||
400 | pcpu_pre_unmap_flush(chunk, page_start, page_end); | ||
401 | |||
402 | pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) | ||
403 | pcpu_unmap_pages(chunk, pages, populated, rs, re); | ||
404 | |||
405 | /* no need to flush tlb, vmalloc will handle it lazily */ | ||
406 | |||
407 | pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) | ||
408 | pcpu_free_pages(chunk, pages, populated, rs, re); | ||
409 | |||
410 | /* commit new bitmap */ | ||
411 | bitmap_copy(chunk->populated, populated, pcpu_unit_pages); | ||
412 | } | ||
413 | |||
414 | static struct pcpu_chunk *pcpu_create_chunk(void) | ||
415 | { | ||
416 | struct pcpu_chunk *chunk; | ||
417 | struct vm_struct **vms; | ||
418 | |||
419 | chunk = pcpu_alloc_chunk(); | ||
420 | if (!chunk) | ||
421 | return NULL; | ||
422 | |||
423 | vms = pcpu_get_vm_areas(pcpu_group_offsets, pcpu_group_sizes, | ||
424 | pcpu_nr_groups, pcpu_atom_size, GFP_KERNEL); | ||
425 | if (!vms) { | ||
426 | pcpu_free_chunk(chunk); | ||
427 | return NULL; | ||
428 | } | ||
429 | |||
430 | chunk->data = vms; | ||
431 | chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0]; | ||
432 | return chunk; | ||
433 | } | ||
434 | |||
435 | static void pcpu_destroy_chunk(struct pcpu_chunk *chunk) | ||
436 | { | ||
437 | if (chunk && chunk->data) | ||
438 | pcpu_free_vm_areas(chunk->data, pcpu_nr_groups); | ||
439 | pcpu_free_chunk(chunk); | ||
440 | } | ||
441 | |||
442 | static struct page *pcpu_addr_to_page(void *addr) | ||
443 | { | ||
444 | return vmalloc_to_page(addr); | ||
445 | } | ||
446 | |||
447 | static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai) | ||
448 | { | ||
449 | /* no extra restriction */ | ||
450 | return 0; | ||
451 | } | ||
diff --git a/mm/percpu.c b/mm/percpu.c index b403d7c02c67..15f680430671 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -236,15 +236,6 @@ static unsigned long __maybe_unused pcpu_chunk_addr(struct pcpu_chunk *chunk, | |||
236 | (page_idx << PAGE_SHIFT); | 236 | (page_idx << PAGE_SHIFT); |
237 | } | 237 | } |
238 | 238 | ||
239 | static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk, | ||
240 | unsigned int cpu, int page_idx) | ||
241 | { | ||
242 | /* must not be used on pre-mapped chunk */ | ||
243 | WARN_ON(chunk->immutable); | ||
244 | |||
245 | return vmalloc_to_page((void *)pcpu_chunk_addr(chunk, cpu, page_idx)); | ||
246 | } | ||
247 | |||
248 | static void __maybe_unused pcpu_next_unpop(struct pcpu_chunk *chunk, | 239 | static void __maybe_unused pcpu_next_unpop(struct pcpu_chunk *chunk, |
249 | int *rs, int *re, int end) | 240 | int *rs, int *re, int end) |
250 | { | 241 | { |
@@ -641,425 +632,29 @@ static void pcpu_free_chunk(struct pcpu_chunk *chunk) | |||
641 | kfree(chunk); | 632 | kfree(chunk); |
642 | } | 633 | } |
643 | 634 | ||
644 | /** | 635 | /* |
645 | * pcpu_get_pages_and_bitmap - get temp pages array and bitmap | 636 | * Chunk management implementation. |
646 | * @chunk: chunk of interest | 637 | * |
647 | * @bitmapp: output parameter for bitmap | 638 | * To allow different implementations, chunk alloc/free and |
648 | * @may_alloc: may allocate the array | 639 | * [de]population are implemented in a separate file which is pulled |
649 | * | 640 | * into this file and compiled together. The following functions |
650 | * Returns pointer to array of pointers to struct page and bitmap, | 641 | * should be implemented. |
651 | * both of which can be indexed with pcpu_page_idx(). The returned | 642 | * |
652 | * array is cleared to zero and *@bitmapp is copied from | 643 | * pcpu_populate_chunk - populate the specified range of a chunk |
653 | * @chunk->populated. Note that there is only one array and bitmap | 644 | * pcpu_depopulate_chunk - depopulate the specified range of a chunk |
654 | * and access exclusion is the caller's responsibility. | 645 | * pcpu_create_chunk - create a new chunk |
655 | * | 646 | * pcpu_destroy_chunk - destroy a chunk, always preceded by full depop |
656 | * CONTEXT: | 647 | * pcpu_addr_to_page - translate address to physical address |
657 | * pcpu_alloc_mutex and does GFP_KERNEL allocation if @may_alloc. | 648 | * pcpu_verify_alloc_info - check alloc_info is acceptable during init |
658 | * Otherwise, don't care. | ||
659 | * | ||
660 | * RETURNS: | ||
661 | * Pointer to temp pages array on success, NULL on failure. | ||
662 | */ | ||
663 | static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk, | ||
664 | unsigned long **bitmapp, | ||
665 | bool may_alloc) | ||
666 | { | ||
667 | static struct page **pages; | ||
668 | static unsigned long *bitmap; | ||
669 | size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]); | ||
670 | size_t bitmap_size = BITS_TO_LONGS(pcpu_unit_pages) * | ||
671 | sizeof(unsigned long); | ||
672 | |||
673 | if (!pages || !bitmap) { | ||
674 | if (may_alloc && !pages) | ||
675 | pages = pcpu_mem_alloc(pages_size); | ||
676 | if (may_alloc && !bitmap) | ||
677 | bitmap = pcpu_mem_alloc(bitmap_size); | ||
678 | if (!pages || !bitmap) | ||
679 | return NULL; | ||
680 | } | ||
681 | |||
682 | memset(pages, 0, pages_size); | ||
683 | bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages); | ||
684 | |||
685 | *bitmapp = bitmap; | ||
686 | return pages; | ||
687 | } | ||
688 | |||
689 | /** | ||
690 | * pcpu_free_pages - free pages which were allocated for @chunk | ||
691 | * @chunk: chunk pages were allocated for | ||
692 | * @pages: array of pages to be freed, indexed by pcpu_page_idx() | ||
693 | * @populated: populated bitmap | ||
694 | * @page_start: page index of the first page to be freed | ||
695 | * @page_end: page index of the last page to be freed + 1 | ||
696 | * | ||
697 | * Free pages [@page_start and @page_end) in @pages for all units. | ||
698 | * The pages were allocated for @chunk. | ||
699 | */ | ||
700 | static void pcpu_free_pages(struct pcpu_chunk *chunk, | ||
701 | struct page **pages, unsigned long *populated, | ||
702 | int page_start, int page_end) | ||
703 | { | ||
704 | unsigned int cpu; | ||
705 | int i; | ||
706 | |||
707 | for_each_possible_cpu(cpu) { | ||
708 | for (i = page_start; i < page_end; i++) { | ||
709 | struct page *page = pages[pcpu_page_idx(cpu, i)]; | ||
710 | |||
711 | if (page) | ||
712 | __free_page(page); | ||
713 | } | ||
714 | } | ||
715 | } | ||
716 | |||
717 | /** | ||
718 | * pcpu_alloc_pages - allocates pages for @chunk | ||
719 | * @chunk: target chunk | ||
720 | * @pages: array to put the allocated pages into, indexed by pcpu_page_idx() | ||
721 | * @populated: populated bitmap | ||
722 | * @page_start: page index of the first page to be allocated | ||
723 | * @page_end: page index of the last page to be allocated + 1 | ||
724 | * | ||
725 | * Allocate pages [@page_start,@page_end) into @pages for all units. | ||
726 | * The allocation is for @chunk. Percpu core doesn't care about the | ||
727 | * content of @pages and will pass it verbatim to pcpu_map_pages(). | ||
728 | */ | ||
729 | static int pcpu_alloc_pages(struct pcpu_chunk *chunk, | ||
730 | struct page **pages, unsigned long *populated, | ||
731 | int page_start, int page_end) | ||
732 | { | ||
733 | const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD; | ||
734 | unsigned int cpu; | ||
735 | int i; | ||
736 | |||
737 | for_each_possible_cpu(cpu) { | ||
738 | for (i = page_start; i < page_end; i++) { | ||
739 | struct page **pagep = &pages[pcpu_page_idx(cpu, i)]; | ||
740 | |||
741 | *pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0); | ||
742 | if (!*pagep) { | ||
743 | pcpu_free_pages(chunk, pages, populated, | ||
744 | page_start, page_end); | ||
745 | return -ENOMEM; | ||
746 | } | ||
747 | } | ||
748 | } | ||
749 | return 0; | ||
750 | } | ||
751 | |||
752 | /** | ||
753 | * pcpu_pre_unmap_flush - flush cache prior to unmapping | ||
754 | * @chunk: chunk the regions to be flushed belongs to | ||
755 | * @page_start: page index of the first page to be flushed | ||
756 | * @page_end: page index of the last page to be flushed + 1 | ||
757 | * | ||
758 | * Pages in [@page_start,@page_end) of @chunk are about to be | ||
759 | * unmapped. Flush cache. As each flushing trial can be very | ||
760 | * expensive, issue flush on the whole region at once rather than | ||
761 | * doing it for each cpu. This could be an overkill but is more | ||
762 | * scalable. | ||
763 | */ | ||
764 | static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk, | ||
765 | int page_start, int page_end) | ||
766 | { | ||
767 | flush_cache_vunmap( | ||
768 | pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), | ||
769 | pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); | ||
770 | } | ||
771 | |||
772 | static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) | ||
773 | { | ||
774 | unmap_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT); | ||
775 | } | ||
776 | |||
777 | /** | ||
778 | * pcpu_unmap_pages - unmap pages out of a pcpu_chunk | ||
779 | * @chunk: chunk of interest | ||
780 | * @pages: pages array which can be used to pass information to free | ||
781 | * @populated: populated bitmap | ||
782 | * @page_start: page index of the first page to unmap | ||
783 | * @page_end: page index of the last page to unmap + 1 | ||
784 | * | ||
785 | * For each cpu, unmap pages [@page_start,@page_end) out of @chunk. | ||
786 | * Corresponding elements in @pages were cleared by the caller and can | ||
787 | * be used to carry information to pcpu_free_pages() which will be | ||
788 | * called after all unmaps are finished. The caller should call | ||
789 | * proper pre/post flush functions. | ||
790 | */ | ||
791 | static void pcpu_unmap_pages(struct pcpu_chunk *chunk, | ||
792 | struct page **pages, unsigned long *populated, | ||
793 | int page_start, int page_end) | ||
794 | { | ||
795 | unsigned int cpu; | ||
796 | int i; | ||
797 | |||
798 | for_each_possible_cpu(cpu) { | ||
799 | for (i = page_start; i < page_end; i++) { | ||
800 | struct page *page; | ||
801 | |||
802 | page = pcpu_chunk_page(chunk, cpu, i); | ||
803 | WARN_ON(!page); | ||
804 | pages[pcpu_page_idx(cpu, i)] = page; | ||
805 | } | ||
806 | __pcpu_unmap_pages(pcpu_chunk_addr(chunk, cpu, page_start), | ||
807 | page_end - page_start); | ||
808 | } | ||
809 | |||
810 | for (i = page_start; i < page_end; i++) | ||
811 | __clear_bit(i, populated); | ||
812 | } | ||
813 | |||
814 | /** | ||
815 | * pcpu_post_unmap_tlb_flush - flush TLB after unmapping | ||
816 | * @chunk: pcpu_chunk the regions to be flushed belong to | ||
817 | * @page_start: page index of the first page to be flushed | ||
818 | * @page_end: page index of the last page to be flushed + 1 | ||
819 | * | ||
820 | * Pages [@page_start,@page_end) of @chunk have been unmapped. Flush | ||
821 | * TLB for the regions. This can be skipped if the area is to be | ||
822 | * returned to vmalloc as vmalloc will handle TLB flushing lazily. | ||
823 | * | ||
824 | * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once | ||
825 | * for the whole region. | ||
826 | */ | ||
827 | static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk, | ||
828 | int page_start, int page_end) | ||
829 | { | ||
830 | flush_tlb_kernel_range( | ||
831 | pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), | ||
832 | pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); | ||
833 | } | ||
834 | |||
835 | static int __pcpu_map_pages(unsigned long addr, struct page **pages, | ||
836 | int nr_pages) | ||
837 | { | ||
838 | return map_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT, | ||
839 | PAGE_KERNEL, pages); | ||
840 | } | ||
841 | |||
842 | /** | ||
843 | * pcpu_map_pages - map pages into a pcpu_chunk | ||
844 | * @chunk: chunk of interest | ||
845 | * @pages: pages array containing pages to be mapped | ||
846 | * @populated: populated bitmap | ||
847 | * @page_start: page index of the first page to map | ||
848 | * @page_end: page index of the last page to map + 1 | ||
849 | * | ||
850 | * For each cpu, map pages [@page_start,@page_end) into @chunk. The | ||
851 | * caller is responsible for calling pcpu_post_map_flush() after all | ||
852 | * mappings are complete. | ||
853 | * | ||
854 | * This function is responsible for setting corresponding bits in | ||
855 | * @chunk->populated bitmap and whatever is necessary for reverse | ||
856 | * lookup (addr -> chunk). | ||
857 | */ | ||
858 | static int pcpu_map_pages(struct pcpu_chunk *chunk, | ||
859 | struct page **pages, unsigned long *populated, | ||
860 | int page_start, int page_end) | ||
861 | { | ||
862 | unsigned int cpu, tcpu; | ||
863 | int i, err; | ||
864 | |||
865 | for_each_possible_cpu(cpu) { | ||
866 | err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start), | ||
867 | &pages[pcpu_page_idx(cpu, page_start)], | ||
868 | page_end - page_start); | ||
869 | if (err < 0) | ||
870 | goto err; | ||
871 | } | ||
872 | |||
873 | /* mapping successful, link chunk and mark populated */ | ||
874 | for (i = page_start; i < page_end; i++) { | ||
875 | for_each_possible_cpu(cpu) | ||
876 | pcpu_set_page_chunk(pages[pcpu_page_idx(cpu, i)], | ||
877 | chunk); | ||
878 | __set_bit(i, populated); | ||
879 | } | ||
880 | |||
881 | return 0; | ||
882 | |||
883 | err: | ||
884 | for_each_possible_cpu(tcpu) { | ||
885 | if (tcpu == cpu) | ||
886 | break; | ||
887 | __pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start), | ||
888 | page_end - page_start); | ||
889 | } | ||
890 | return err; | ||
891 | } | ||
892 | |||
893 | /** | ||
894 | * pcpu_post_map_flush - flush cache after mapping | ||
895 | * @chunk: pcpu_chunk the regions to be flushed belong to | ||
896 | * @page_start: page index of the first page to be flushed | ||
897 | * @page_end: page index of the last page to be flushed + 1 | ||
898 | * | ||
899 | * Pages [@page_start,@page_end) of @chunk have been mapped. Flush | ||
900 | * cache. | ||
901 | * | ||
902 | * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once | ||
903 | * for the whole region. | ||
904 | */ | ||
905 | static void pcpu_post_map_flush(struct pcpu_chunk *chunk, | ||
906 | int page_start, int page_end) | ||
907 | { | ||
908 | flush_cache_vmap( | ||
909 | pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), | ||
910 | pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); | ||
911 | } | ||
912 | |||
913 | /** | ||
914 | * pcpu_depopulate_chunk - depopulate and unmap an area of a pcpu_chunk | ||
915 | * @chunk: chunk to depopulate | ||
916 | * @off: offset to the area to depopulate | ||
917 | * @size: size of the area to depopulate in bytes | ||
918 | * @flush: whether to flush cache and tlb or not | ||
919 | * | ||
920 | * For each cpu, depopulate and unmap pages [@page_start,@page_end) | ||
921 | * from @chunk. If @flush is true, vcache is flushed before unmapping | ||
922 | * and tlb after. | ||
923 | * | ||
924 | * CONTEXT: | ||
925 | * pcpu_alloc_mutex. | ||
926 | */ | ||
927 | static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size) | ||
928 | { | ||
929 | int page_start = PFN_DOWN(off); | ||
930 | int page_end = PFN_UP(off + size); | ||
931 | struct page **pages; | ||
932 | unsigned long *populated; | ||
933 | int rs, re; | ||
934 | |||
935 | /* quick path, check whether it's empty already */ | ||
936 | rs = page_start; | ||
937 | pcpu_next_unpop(chunk, &rs, &re, page_end); | ||
938 | if (rs == page_start && re == page_end) | ||
939 | return; | ||
940 | |||
941 | /* immutable chunks can't be depopulated */ | ||
942 | WARN_ON(chunk->immutable); | ||
943 | |||
944 | /* | ||
945 | * If control reaches here, there must have been at least one | ||
946 | * successful population attempt so the temp pages array must | ||
947 | * be available now. | ||
948 | */ | ||
949 | pages = pcpu_get_pages_and_bitmap(chunk, &populated, false); | ||
950 | BUG_ON(!pages); | ||
951 | |||
952 | /* unmap and free */ | ||
953 | pcpu_pre_unmap_flush(chunk, page_start, page_end); | ||
954 | |||
955 | pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) | ||
956 | pcpu_unmap_pages(chunk, pages, populated, rs, re); | ||
957 | |||
958 | /* no need to flush tlb, vmalloc will handle it lazily */ | ||
959 | |||
960 | pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) | ||
961 | pcpu_free_pages(chunk, pages, populated, rs, re); | ||
962 | |||
963 | /* commit new bitmap */ | ||
964 | bitmap_copy(chunk->populated, populated, pcpu_unit_pages); | ||
965 | } | ||
966 | |||
967 | /** | ||
968 | * pcpu_populate_chunk - populate and map an area of a pcpu_chunk | ||
969 | * @chunk: chunk of interest | ||
970 | * @off: offset to the area to populate | ||
971 | * @size: size of the area to populate in bytes | ||
972 | * | ||
973 | * For each cpu, populate and map pages [@page_start,@page_end) into | ||
974 | * @chunk. The area is cleared on return. | ||
975 | * | ||
976 | * CONTEXT: | ||
977 | * pcpu_alloc_mutex, does GFP_KERNEL allocation. | ||
978 | */ | 649 | */ |
979 | static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) | 650 | static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size); |
980 | { | 651 | static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size); |
981 | int page_start = PFN_DOWN(off); | 652 | static struct pcpu_chunk *pcpu_create_chunk(void); |
982 | int page_end = PFN_UP(off + size); | 653 | static void pcpu_destroy_chunk(struct pcpu_chunk *chunk); |
983 | int free_end = page_start, unmap_end = page_start; | 654 | static struct page *pcpu_addr_to_page(void *addr); |
984 | struct page **pages; | 655 | static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai); |
985 | unsigned long *populated; | ||
986 | unsigned int cpu; | ||
987 | int rs, re, rc; | ||
988 | |||
989 | /* quick path, check whether all pages are already there */ | ||
990 | rs = page_start; | ||
991 | pcpu_next_pop(chunk, &rs, &re, page_end); | ||
992 | if (rs == page_start && re == page_end) | ||
993 | goto clear; | ||
994 | |||
995 | /* need to allocate and map pages, this chunk can't be immutable */ | ||
996 | WARN_ON(chunk->immutable); | ||
997 | |||
998 | pages = pcpu_get_pages_and_bitmap(chunk, &populated, true); | ||
999 | if (!pages) | ||
1000 | return -ENOMEM; | ||
1001 | |||
1002 | /* alloc and map */ | ||
1003 | pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { | ||
1004 | rc = pcpu_alloc_pages(chunk, pages, populated, rs, re); | ||
1005 | if (rc) | ||
1006 | goto err_free; | ||
1007 | free_end = re; | ||
1008 | } | ||
1009 | |||
1010 | pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { | ||
1011 | rc = pcpu_map_pages(chunk, pages, populated, rs, re); | ||
1012 | if (rc) | ||
1013 | goto err_unmap; | ||
1014 | unmap_end = re; | ||
1015 | } | ||
1016 | pcpu_post_map_flush(chunk, page_start, page_end); | ||
1017 | |||
1018 | /* commit new bitmap */ | ||
1019 | bitmap_copy(chunk->populated, populated, pcpu_unit_pages); | ||
1020 | clear: | ||
1021 | for_each_possible_cpu(cpu) | ||
1022 | memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size); | ||
1023 | return 0; | ||
1024 | |||
1025 | err_unmap: | ||
1026 | pcpu_pre_unmap_flush(chunk, page_start, unmap_end); | ||
1027 | pcpu_for_each_unpop_region(chunk, rs, re, page_start, unmap_end) | ||
1028 | pcpu_unmap_pages(chunk, pages, populated, rs, re); | ||
1029 | pcpu_post_unmap_tlb_flush(chunk, page_start, unmap_end); | ||
1030 | err_free: | ||
1031 | pcpu_for_each_unpop_region(chunk, rs, re, page_start, free_end) | ||
1032 | pcpu_free_pages(chunk, pages, populated, rs, re); | ||
1033 | return rc; | ||
1034 | } | ||
1035 | 656 | ||
1036 | static void pcpu_destroy_chunk(struct pcpu_chunk *chunk) | 657 | #include "percpu-vm.c" |
1037 | { | ||
1038 | if (chunk && chunk->data) | ||
1039 | pcpu_free_vm_areas(chunk->data, pcpu_nr_groups); | ||
1040 | pcpu_free_chunk(chunk); | ||
1041 | } | ||
1042 | |||
1043 | static struct pcpu_chunk *pcpu_create_chunk(void) | ||
1044 | { | ||
1045 | struct pcpu_chunk *chunk; | ||
1046 | struct vm_struct **vms; | ||
1047 | |||
1048 | chunk = pcpu_alloc_chunk(); | ||
1049 | if (!chunk) | ||
1050 | return NULL; | ||
1051 | |||
1052 | vms = pcpu_get_vm_areas(pcpu_group_offsets, pcpu_group_sizes, | ||
1053 | pcpu_nr_groups, pcpu_atom_size, GFP_KERNEL); | ||
1054 | if (!vms) { | ||
1055 | pcpu_free_chunk(chunk); | ||
1056 | return NULL; | ||
1057 | } | ||
1058 | |||
1059 | chunk->data = vms; | ||
1060 | chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0]; | ||
1061 | return chunk; | ||
1062 | } | ||
1063 | 658 | ||
1064 | /** | 659 | /** |
1065 | * pcpu_chunk_addr_search - determine chunk containing specified address | 660 | * pcpu_chunk_addr_search - determine chunk containing specified address |
@@ -1086,7 +681,7 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) | |||
1086 | * there's no need to worry about preemption or cpu hotplug. | 681 | * there's no need to worry about preemption or cpu hotplug. |
1087 | */ | 682 | */ |
1088 | addr += pcpu_unit_offsets[raw_smp_processor_id()]; | 683 | addr += pcpu_unit_offsets[raw_smp_processor_id()]; |
1089 | return pcpu_get_page_chunk(vmalloc_to_page(addr)); | 684 | return pcpu_get_page_chunk(pcpu_addr_to_page(addr)); |
1090 | } | 685 | } |
1091 | 686 | ||
1092 | /** | 687 | /** |
@@ -1386,7 +981,7 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr) | |||
1386 | else | 981 | else |
1387 | return page_to_phys(vmalloc_to_page(addr)); | 982 | return page_to_phys(vmalloc_to_page(addr)); |
1388 | } else | 983 | } else |
1389 | return page_to_phys(vmalloc_to_page(addr)); | 984 | return page_to_phys(pcpu_addr_to_page(addr)); |
1390 | } | 985 | } |
1391 | 986 | ||
1392 | static inline size_t pcpu_calc_fc_sizes(size_t static_size, | 987 | static inline size_t pcpu_calc_fc_sizes(size_t static_size, |
@@ -1758,6 +1353,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
1758 | PCPU_SETUP_BUG_ON(ai->unit_size < size_sum); | 1353 | PCPU_SETUP_BUG_ON(ai->unit_size < size_sum); |
1759 | PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK); | 1354 | PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK); |
1760 | PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE); | 1355 | PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE); |
1356 | PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0); | ||
1761 | 1357 | ||
1762 | /* process group information and build config tables accordingly */ | 1358 | /* process group information and build config tables accordingly */ |
1763 | group_offsets = alloc_bootmem(ai->nr_groups * sizeof(group_offsets[0])); | 1359 | group_offsets = alloc_bootmem(ai->nr_groups * sizeof(group_offsets[0])); |