diff options
Diffstat (limited to 'mm/vmalloc.c')
-rw-r--r-- | mm/vmalloc.c | 588 |
1 files changed, 588 insertions, 0 deletions
diff --git a/mm/vmalloc.c b/mm/vmalloc.c new file mode 100644 index 000000000000..c6182f6f1305 --- /dev/null +++ b/mm/vmalloc.c | |||
@@ -0,0 +1,588 @@ | |||
1 | /* | ||
2 | * linux/mm/vmalloc.c | ||
3 | * | ||
4 | * Copyright (C) 1993 Linus Torvalds | ||
5 | * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 | ||
6 | * SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000 | ||
7 | * Major rework to support vmap/vunmap, Christoph Hellwig, SGI, August 2002 | ||
8 | */ | ||
9 | |||
10 | #include <linux/mm.h> | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/highmem.h> | ||
13 | #include <linux/slab.h> | ||
14 | #include <linux/spinlock.h> | ||
15 | #include <linux/interrupt.h> | ||
16 | |||
17 | #include <linux/vmalloc.h> | ||
18 | |||
19 | #include <asm/uaccess.h> | ||
20 | #include <asm/tlbflush.h> | ||
21 | |||
22 | |||
23 | DEFINE_RWLOCK(vmlist_lock); | ||
24 | struct vm_struct *vmlist; | ||
25 | |||
26 | static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) | ||
27 | { | ||
28 | pte_t *pte; | ||
29 | |||
30 | pte = pte_offset_kernel(pmd, addr); | ||
31 | do { | ||
32 | pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte); | ||
33 | WARN_ON(!pte_none(ptent) && !pte_present(ptent)); | ||
34 | } while (pte++, addr += PAGE_SIZE, addr != end); | ||
35 | } | ||
36 | |||
37 | static inline void vunmap_pmd_range(pud_t *pud, unsigned long addr, | ||
38 | unsigned long end) | ||
39 | { | ||
40 | pmd_t *pmd; | ||
41 | unsigned long next; | ||
42 | |||
43 | pmd = pmd_offset(pud, addr); | ||
44 | do { | ||
45 | next = pmd_addr_end(addr, end); | ||
46 | if (pmd_none_or_clear_bad(pmd)) | ||
47 | continue; | ||
48 | vunmap_pte_range(pmd, addr, next); | ||
49 | } while (pmd++, addr = next, addr != end); | ||
50 | } | ||
51 | |||
52 | static inline void vunmap_pud_range(pgd_t *pgd, unsigned long addr, | ||
53 | unsigned long end) | ||
54 | { | ||
55 | pud_t *pud; | ||
56 | unsigned long next; | ||
57 | |||
58 | pud = pud_offset(pgd, addr); | ||
59 | do { | ||
60 | next = pud_addr_end(addr, end); | ||
61 | if (pud_none_or_clear_bad(pud)) | ||
62 | continue; | ||
63 | vunmap_pmd_range(pud, addr, next); | ||
64 | } while (pud++, addr = next, addr != end); | ||
65 | } | ||
66 | |||
67 | void unmap_vm_area(struct vm_struct *area) | ||
68 | { | ||
69 | pgd_t *pgd; | ||
70 | unsigned long next; | ||
71 | unsigned long addr = (unsigned long) area->addr; | ||
72 | unsigned long end = addr + area->size; | ||
73 | |||
74 | BUG_ON(addr >= end); | ||
75 | pgd = pgd_offset_k(addr); | ||
76 | flush_cache_vunmap(addr, end); | ||
77 | do { | ||
78 | next = pgd_addr_end(addr, end); | ||
79 | if (pgd_none_or_clear_bad(pgd)) | ||
80 | continue; | ||
81 | vunmap_pud_range(pgd, addr, next); | ||
82 | } while (pgd++, addr = next, addr != end); | ||
83 | flush_tlb_kernel_range((unsigned long) area->addr, end); | ||
84 | } | ||
85 | |||
86 | static int vmap_pte_range(pmd_t *pmd, unsigned long addr, | ||
87 | unsigned long end, pgprot_t prot, struct page ***pages) | ||
88 | { | ||
89 | pte_t *pte; | ||
90 | |||
91 | pte = pte_alloc_kernel(&init_mm, pmd, addr); | ||
92 | if (!pte) | ||
93 | return -ENOMEM; | ||
94 | do { | ||
95 | struct page *page = **pages; | ||
96 | WARN_ON(!pte_none(*pte)); | ||
97 | if (!page) | ||
98 | return -ENOMEM; | ||
99 | set_pte_at(&init_mm, addr, pte, mk_pte(page, prot)); | ||
100 | (*pages)++; | ||
101 | } while (pte++, addr += PAGE_SIZE, addr != end); | ||
102 | return 0; | ||
103 | } | ||
104 | |||
105 | static inline int vmap_pmd_range(pud_t *pud, unsigned long addr, | ||
106 | unsigned long end, pgprot_t prot, struct page ***pages) | ||
107 | { | ||
108 | pmd_t *pmd; | ||
109 | unsigned long next; | ||
110 | |||
111 | pmd = pmd_alloc(&init_mm, pud, addr); | ||
112 | if (!pmd) | ||
113 | return -ENOMEM; | ||
114 | do { | ||
115 | next = pmd_addr_end(addr, end); | ||
116 | if (vmap_pte_range(pmd, addr, next, prot, pages)) | ||
117 | return -ENOMEM; | ||
118 | } while (pmd++, addr = next, addr != end); | ||
119 | return 0; | ||
120 | } | ||
121 | |||
122 | static inline int vmap_pud_range(pgd_t *pgd, unsigned long addr, | ||
123 | unsigned long end, pgprot_t prot, struct page ***pages) | ||
124 | { | ||
125 | pud_t *pud; | ||
126 | unsigned long next; | ||
127 | |||
128 | pud = pud_alloc(&init_mm, pgd, addr); | ||
129 | if (!pud) | ||
130 | return -ENOMEM; | ||
131 | do { | ||
132 | next = pud_addr_end(addr, end); | ||
133 | if (vmap_pmd_range(pud, addr, next, prot, pages)) | ||
134 | return -ENOMEM; | ||
135 | } while (pud++, addr = next, addr != end); | ||
136 | return 0; | ||
137 | } | ||
138 | |||
139 | int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages) | ||
140 | { | ||
141 | pgd_t *pgd; | ||
142 | unsigned long next; | ||
143 | unsigned long addr = (unsigned long) area->addr; | ||
144 | unsigned long end = addr + area->size - PAGE_SIZE; | ||
145 | int err; | ||
146 | |||
147 | BUG_ON(addr >= end); | ||
148 | pgd = pgd_offset_k(addr); | ||
149 | spin_lock(&init_mm.page_table_lock); | ||
150 | do { | ||
151 | next = pgd_addr_end(addr, end); | ||
152 | err = vmap_pud_range(pgd, addr, next, prot, pages); | ||
153 | if (err) | ||
154 | break; | ||
155 | } while (pgd++, addr = next, addr != end); | ||
156 | spin_unlock(&init_mm.page_table_lock); | ||
157 | flush_cache_vmap((unsigned long) area->addr, end); | ||
158 | return err; | ||
159 | } | ||
160 | |||
161 | #define IOREMAP_MAX_ORDER (7 + PAGE_SHIFT) /* 128 pages */ | ||
162 | |||
163 | struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, | ||
164 | unsigned long start, unsigned long end) | ||
165 | { | ||
166 | struct vm_struct **p, *tmp, *area; | ||
167 | unsigned long align = 1; | ||
168 | unsigned long addr; | ||
169 | |||
170 | if (flags & VM_IOREMAP) { | ||
171 | int bit = fls(size); | ||
172 | |||
173 | if (bit > IOREMAP_MAX_ORDER) | ||
174 | bit = IOREMAP_MAX_ORDER; | ||
175 | else if (bit < PAGE_SHIFT) | ||
176 | bit = PAGE_SHIFT; | ||
177 | |||
178 | align = 1ul << bit; | ||
179 | } | ||
180 | addr = ALIGN(start, align); | ||
181 | size = PAGE_ALIGN(size); | ||
182 | |||
183 | area = kmalloc(sizeof(*area), GFP_KERNEL); | ||
184 | if (unlikely(!area)) | ||
185 | return NULL; | ||
186 | |||
187 | if (unlikely(!size)) { | ||
188 | kfree (area); | ||
189 | return NULL; | ||
190 | } | ||
191 | |||
192 | /* | ||
193 | * We always allocate a guard page. | ||
194 | */ | ||
195 | size += PAGE_SIZE; | ||
196 | |||
197 | write_lock(&vmlist_lock); | ||
198 | for (p = &vmlist; (tmp = *p) != NULL ;p = &tmp->next) { | ||
199 | if ((unsigned long)tmp->addr < addr) { | ||
200 | if((unsigned long)tmp->addr + tmp->size >= addr) | ||
201 | addr = ALIGN(tmp->size + | ||
202 | (unsigned long)tmp->addr, align); | ||
203 | continue; | ||
204 | } | ||
205 | if ((size + addr) < addr) | ||
206 | goto out; | ||
207 | if (size + addr <= (unsigned long)tmp->addr) | ||
208 | goto found; | ||
209 | addr = ALIGN(tmp->size + (unsigned long)tmp->addr, align); | ||
210 | if (addr > end - size) | ||
211 | goto out; | ||
212 | } | ||
213 | |||
214 | found: | ||
215 | area->next = *p; | ||
216 | *p = area; | ||
217 | |||
218 | area->flags = flags; | ||
219 | area->addr = (void *)addr; | ||
220 | area->size = size; | ||
221 | area->pages = NULL; | ||
222 | area->nr_pages = 0; | ||
223 | area->phys_addr = 0; | ||
224 | write_unlock(&vmlist_lock); | ||
225 | |||
226 | return area; | ||
227 | |||
228 | out: | ||
229 | write_unlock(&vmlist_lock); | ||
230 | kfree(area); | ||
231 | if (printk_ratelimit()) | ||
232 | printk(KERN_WARNING "allocation failed: out of vmalloc space - use vmalloc=<size> to increase size.\n"); | ||
233 | return NULL; | ||
234 | } | ||
235 | |||
236 | /** | ||
237 | * get_vm_area - reserve a contingous kernel virtual area | ||
238 | * | ||
239 | * @size: size of the area | ||
240 | * @flags: %VM_IOREMAP for I/O mappings or VM_ALLOC | ||
241 | * | ||
242 | * Search an area of @size in the kernel virtual mapping area, | ||
243 | * and reserved it for out purposes. Returns the area descriptor | ||
244 | * on success or %NULL on failure. | ||
245 | */ | ||
246 | struct vm_struct *get_vm_area(unsigned long size, unsigned long flags) | ||
247 | { | ||
248 | return __get_vm_area(size, flags, VMALLOC_START, VMALLOC_END); | ||
249 | } | ||
250 | |||
251 | /** | ||
252 | * remove_vm_area - find and remove a contingous kernel virtual area | ||
253 | * | ||
254 | * @addr: base address | ||
255 | * | ||
256 | * Search for the kernel VM area starting at @addr, and remove it. | ||
257 | * This function returns the found VM area, but using it is NOT safe | ||
258 | * on SMP machines. | ||
259 | */ | ||
260 | struct vm_struct *remove_vm_area(void *addr) | ||
261 | { | ||
262 | struct vm_struct **p, *tmp; | ||
263 | |||
264 | write_lock(&vmlist_lock); | ||
265 | for (p = &vmlist ; (tmp = *p) != NULL ;p = &tmp->next) { | ||
266 | if (tmp->addr == addr) | ||
267 | goto found; | ||
268 | } | ||
269 | write_unlock(&vmlist_lock); | ||
270 | return NULL; | ||
271 | |||
272 | found: | ||
273 | unmap_vm_area(tmp); | ||
274 | *p = tmp->next; | ||
275 | write_unlock(&vmlist_lock); | ||
276 | |||
277 | /* | ||
278 | * Remove the guard page. | ||
279 | */ | ||
280 | tmp->size -= PAGE_SIZE; | ||
281 | return tmp; | ||
282 | } | ||
283 | |||
284 | void __vunmap(void *addr, int deallocate_pages) | ||
285 | { | ||
286 | struct vm_struct *area; | ||
287 | |||
288 | if (!addr) | ||
289 | return; | ||
290 | |||
291 | if ((PAGE_SIZE-1) & (unsigned long)addr) { | ||
292 | printk(KERN_ERR "Trying to vfree() bad address (%p)\n", addr); | ||
293 | WARN_ON(1); | ||
294 | return; | ||
295 | } | ||
296 | |||
297 | area = remove_vm_area(addr); | ||
298 | if (unlikely(!area)) { | ||
299 | printk(KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n", | ||
300 | addr); | ||
301 | WARN_ON(1); | ||
302 | return; | ||
303 | } | ||
304 | |||
305 | if (deallocate_pages) { | ||
306 | int i; | ||
307 | |||
308 | for (i = 0; i < area->nr_pages; i++) { | ||
309 | if (unlikely(!area->pages[i])) | ||
310 | BUG(); | ||
311 | __free_page(area->pages[i]); | ||
312 | } | ||
313 | |||
314 | if (area->nr_pages > PAGE_SIZE/sizeof(struct page *)) | ||
315 | vfree(area->pages); | ||
316 | else | ||
317 | kfree(area->pages); | ||
318 | } | ||
319 | |||
320 | kfree(area); | ||
321 | return; | ||
322 | } | ||
323 | |||
324 | /** | ||
325 | * vfree - release memory allocated by vmalloc() | ||
326 | * | ||
327 | * @addr: memory base address | ||
328 | * | ||
329 | * Free the virtually contiguous memory area starting at @addr, as | ||
330 | * obtained from vmalloc(), vmalloc_32() or __vmalloc(). | ||
331 | * | ||
332 | * May not be called in interrupt context. | ||
333 | */ | ||
334 | void vfree(void *addr) | ||
335 | { | ||
336 | BUG_ON(in_interrupt()); | ||
337 | __vunmap(addr, 1); | ||
338 | } | ||
339 | |||
340 | EXPORT_SYMBOL(vfree); | ||
341 | |||
342 | /** | ||
343 | * vunmap - release virtual mapping obtained by vmap() | ||
344 | * | ||
345 | * @addr: memory base address | ||
346 | * | ||
347 | * Free the virtually contiguous memory area starting at @addr, | ||
348 | * which was created from the page array passed to vmap(). | ||
349 | * | ||
350 | * May not be called in interrupt context. | ||
351 | */ | ||
352 | void vunmap(void *addr) | ||
353 | { | ||
354 | BUG_ON(in_interrupt()); | ||
355 | __vunmap(addr, 0); | ||
356 | } | ||
357 | |||
358 | EXPORT_SYMBOL(vunmap); | ||
359 | |||
360 | /** | ||
361 | * vmap - map an array of pages into virtually contiguous space | ||
362 | * | ||
363 | * @pages: array of page pointers | ||
364 | * @count: number of pages to map | ||
365 | * @flags: vm_area->flags | ||
366 | * @prot: page protection for the mapping | ||
367 | * | ||
368 | * Maps @count pages from @pages into contiguous kernel virtual | ||
369 | * space. | ||
370 | */ | ||
371 | void *vmap(struct page **pages, unsigned int count, | ||
372 | unsigned long flags, pgprot_t prot) | ||
373 | { | ||
374 | struct vm_struct *area; | ||
375 | |||
376 | if (count > num_physpages) | ||
377 | return NULL; | ||
378 | |||
379 | area = get_vm_area((count << PAGE_SHIFT), flags); | ||
380 | if (!area) | ||
381 | return NULL; | ||
382 | if (map_vm_area(area, prot, &pages)) { | ||
383 | vunmap(area->addr); | ||
384 | return NULL; | ||
385 | } | ||
386 | |||
387 | return area->addr; | ||
388 | } | ||
389 | |||
390 | EXPORT_SYMBOL(vmap); | ||
391 | |||
392 | void *__vmalloc_area(struct vm_struct *area, unsigned int __nocast gfp_mask, pgprot_t prot) | ||
393 | { | ||
394 | struct page **pages; | ||
395 | unsigned int nr_pages, array_size, i; | ||
396 | |||
397 | nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT; | ||
398 | array_size = (nr_pages * sizeof(struct page *)); | ||
399 | |||
400 | area->nr_pages = nr_pages; | ||
401 | /* Please note that the recursion is strictly bounded. */ | ||
402 | if (array_size > PAGE_SIZE) | ||
403 | pages = __vmalloc(array_size, gfp_mask, PAGE_KERNEL); | ||
404 | else | ||
405 | pages = kmalloc(array_size, (gfp_mask & ~__GFP_HIGHMEM)); | ||
406 | area->pages = pages; | ||
407 | if (!area->pages) { | ||
408 | remove_vm_area(area->addr); | ||
409 | kfree(area); | ||
410 | return NULL; | ||
411 | } | ||
412 | memset(area->pages, 0, array_size); | ||
413 | |||
414 | for (i = 0; i < area->nr_pages; i++) { | ||
415 | area->pages[i] = alloc_page(gfp_mask); | ||
416 | if (unlikely(!area->pages[i])) { | ||
417 | /* Successfully allocated i pages, free them in __vunmap() */ | ||
418 | area->nr_pages = i; | ||
419 | goto fail; | ||
420 | } | ||
421 | } | ||
422 | |||
423 | if (map_vm_area(area, prot, &pages)) | ||
424 | goto fail; | ||
425 | return area->addr; | ||
426 | |||
427 | fail: | ||
428 | vfree(area->addr); | ||
429 | return NULL; | ||
430 | } | ||
431 | |||
432 | /** | ||
433 | * __vmalloc - allocate virtually contiguous memory | ||
434 | * | ||
435 | * @size: allocation size | ||
436 | * @gfp_mask: flags for the page level allocator | ||
437 | * @prot: protection mask for the allocated pages | ||
438 | * | ||
439 | * Allocate enough pages to cover @size from the page level | ||
440 | * allocator with @gfp_mask flags. Map them into contiguous | ||
441 | * kernel virtual space, using a pagetable protection of @prot. | ||
442 | */ | ||
443 | void *__vmalloc(unsigned long size, unsigned int __nocast gfp_mask, pgprot_t prot) | ||
444 | { | ||
445 | struct vm_struct *area; | ||
446 | |||
447 | size = PAGE_ALIGN(size); | ||
448 | if (!size || (size >> PAGE_SHIFT) > num_physpages) | ||
449 | return NULL; | ||
450 | |||
451 | area = get_vm_area(size, VM_ALLOC); | ||
452 | if (!area) | ||
453 | return NULL; | ||
454 | |||
455 | return __vmalloc_area(area, gfp_mask, prot); | ||
456 | } | ||
457 | |||
458 | EXPORT_SYMBOL(__vmalloc); | ||
459 | |||
460 | /** | ||
461 | * vmalloc - allocate virtually contiguous memory | ||
462 | * | ||
463 | * @size: allocation size | ||
464 | * | ||
465 | * Allocate enough pages to cover @size from the page level | ||
466 | * allocator and map them into contiguous kernel virtual space. | ||
467 | * | ||
468 | * For tight cotrol over page level allocator and protection flags | ||
469 | * use __vmalloc() instead. | ||
470 | */ | ||
471 | void *vmalloc(unsigned long size) | ||
472 | { | ||
473 | return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL); | ||
474 | } | ||
475 | |||
476 | EXPORT_SYMBOL(vmalloc); | ||
477 | |||
478 | /** | ||
479 | * vmalloc_exec - allocate virtually contiguous, executable memory | ||
480 | * | ||
481 | * @size: allocation size | ||
482 | * | ||
483 | * Kernel-internal function to allocate enough pages to cover @size | ||
484 | * the page level allocator and map them into contiguous and | ||
485 | * executable kernel virtual space. | ||
486 | * | ||
487 | * For tight cotrol over page level allocator and protection flags | ||
488 | * use __vmalloc() instead. | ||
489 | */ | ||
490 | |||
491 | #ifndef PAGE_KERNEL_EXEC | ||
492 | # define PAGE_KERNEL_EXEC PAGE_KERNEL | ||
493 | #endif | ||
494 | |||
495 | void *vmalloc_exec(unsigned long size) | ||
496 | { | ||
497 | return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC); | ||
498 | } | ||
499 | |||
500 | /** | ||
501 | * vmalloc_32 - allocate virtually contiguous memory (32bit addressable) | ||
502 | * | ||
503 | * @size: allocation size | ||
504 | * | ||
505 | * Allocate enough 32bit PA addressable pages to cover @size from the | ||
506 | * page level allocator and map them into contiguous kernel virtual space. | ||
507 | */ | ||
508 | void *vmalloc_32(unsigned long size) | ||
509 | { | ||
510 | return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL); | ||
511 | } | ||
512 | |||
513 | EXPORT_SYMBOL(vmalloc_32); | ||
514 | |||
515 | long vread(char *buf, char *addr, unsigned long count) | ||
516 | { | ||
517 | struct vm_struct *tmp; | ||
518 | char *vaddr, *buf_start = buf; | ||
519 | unsigned long n; | ||
520 | |||
521 | /* Don't allow overflow */ | ||
522 | if ((unsigned long) addr + count < count) | ||
523 | count = -(unsigned long) addr; | ||
524 | |||
525 | read_lock(&vmlist_lock); | ||
526 | for (tmp = vmlist; tmp; tmp = tmp->next) { | ||
527 | vaddr = (char *) tmp->addr; | ||
528 | if (addr >= vaddr + tmp->size - PAGE_SIZE) | ||
529 | continue; | ||
530 | while (addr < vaddr) { | ||
531 | if (count == 0) | ||
532 | goto finished; | ||
533 | *buf = '\0'; | ||
534 | buf++; | ||
535 | addr++; | ||
536 | count--; | ||
537 | } | ||
538 | n = vaddr + tmp->size - PAGE_SIZE - addr; | ||
539 | do { | ||
540 | if (count == 0) | ||
541 | goto finished; | ||
542 | *buf = *addr; | ||
543 | buf++; | ||
544 | addr++; | ||
545 | count--; | ||
546 | } while (--n > 0); | ||
547 | } | ||
548 | finished: | ||
549 | read_unlock(&vmlist_lock); | ||
550 | return buf - buf_start; | ||
551 | } | ||
552 | |||
553 | long vwrite(char *buf, char *addr, unsigned long count) | ||
554 | { | ||
555 | struct vm_struct *tmp; | ||
556 | char *vaddr, *buf_start = buf; | ||
557 | unsigned long n; | ||
558 | |||
559 | /* Don't allow overflow */ | ||
560 | if ((unsigned long) addr + count < count) | ||
561 | count = -(unsigned long) addr; | ||
562 | |||
563 | read_lock(&vmlist_lock); | ||
564 | for (tmp = vmlist; tmp; tmp = tmp->next) { | ||
565 | vaddr = (char *) tmp->addr; | ||
566 | if (addr >= vaddr + tmp->size - PAGE_SIZE) | ||
567 | continue; | ||
568 | while (addr < vaddr) { | ||
569 | if (count == 0) | ||
570 | goto finished; | ||
571 | buf++; | ||
572 | addr++; | ||
573 | count--; | ||
574 | } | ||
575 | n = vaddr + tmp->size - PAGE_SIZE - addr; | ||
576 | do { | ||
577 | if (count == 0) | ||
578 | goto finished; | ||
579 | *addr = *buf; | ||
580 | buf++; | ||
581 | addr++; | ||
582 | count--; | ||
583 | } while (--n > 0); | ||
584 | } | ||
585 | finished: | ||
586 | read_unlock(&vmlist_lock); | ||
587 | return buf - buf_start; | ||
588 | } | ||