diff options
Diffstat (limited to 'mm/vmalloc.c')
-rw-r--r-- | mm/vmalloc.c | 356 |
1 files changed, 231 insertions, 125 deletions
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 6b8889da69a6..1d34d75366a7 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
@@ -31,8 +31,6 @@ | |||
31 | #include <asm/tlbflush.h> | 31 | #include <asm/tlbflush.h> |
32 | #include <asm/shmparam.h> | 32 | #include <asm/shmparam.h> |
33 | 33 | ||
34 | bool vmap_lazy_unmap __read_mostly = true; | ||
35 | |||
36 | /*** Page table manipulation functions ***/ | 34 | /*** Page table manipulation functions ***/ |
37 | 35 | ||
38 | static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) | 36 | static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) |
@@ -263,8 +261,15 @@ struct vmap_area { | |||
263 | }; | 261 | }; |
264 | 262 | ||
265 | static DEFINE_SPINLOCK(vmap_area_lock); | 263 | static DEFINE_SPINLOCK(vmap_area_lock); |
266 | static struct rb_root vmap_area_root = RB_ROOT; | ||
267 | static LIST_HEAD(vmap_area_list); | 264 | static LIST_HEAD(vmap_area_list); |
265 | static struct rb_root vmap_area_root = RB_ROOT; | ||
266 | |||
267 | /* The vmap cache globals are protected by vmap_area_lock */ | ||
268 | static struct rb_node *free_vmap_cache; | ||
269 | static unsigned long cached_hole_size; | ||
270 | static unsigned long cached_vstart; | ||
271 | static unsigned long cached_align; | ||
272 | |||
268 | static unsigned long vmap_area_pcpu_hole; | 273 | static unsigned long vmap_area_pcpu_hole; |
269 | 274 | ||
270 | static struct vmap_area *__find_vmap_area(unsigned long addr) | 275 | static struct vmap_area *__find_vmap_area(unsigned long addr) |
@@ -293,13 +298,13 @@ static void __insert_vmap_area(struct vmap_area *va) | |||
293 | struct rb_node *tmp; | 298 | struct rb_node *tmp; |
294 | 299 | ||
295 | while (*p) { | 300 | while (*p) { |
296 | struct vmap_area *tmp; | 301 | struct vmap_area *tmp_va; |
297 | 302 | ||
298 | parent = *p; | 303 | parent = *p; |
299 | tmp = rb_entry(parent, struct vmap_area, rb_node); | 304 | tmp_va = rb_entry(parent, struct vmap_area, rb_node); |
300 | if (va->va_start < tmp->va_end) | 305 | if (va->va_start < tmp_va->va_end) |
301 | p = &(*p)->rb_left; | 306 | p = &(*p)->rb_left; |
302 | else if (va->va_end > tmp->va_start) | 307 | else if (va->va_end > tmp_va->va_start) |
303 | p = &(*p)->rb_right; | 308 | p = &(*p)->rb_right; |
304 | else | 309 | else |
305 | BUG(); | 310 | BUG(); |
@@ -333,9 +338,11 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, | |||
333 | struct rb_node *n; | 338 | struct rb_node *n; |
334 | unsigned long addr; | 339 | unsigned long addr; |
335 | int purged = 0; | 340 | int purged = 0; |
341 | struct vmap_area *first; | ||
336 | 342 | ||
337 | BUG_ON(!size); | 343 | BUG_ON(!size); |
338 | BUG_ON(size & ~PAGE_MASK); | 344 | BUG_ON(size & ~PAGE_MASK); |
345 | BUG_ON(!is_power_of_2(align)); | ||
339 | 346 | ||
340 | va = kmalloc_node(sizeof(struct vmap_area), | 347 | va = kmalloc_node(sizeof(struct vmap_area), |
341 | gfp_mask & GFP_RECLAIM_MASK, node); | 348 | gfp_mask & GFP_RECLAIM_MASK, node); |
@@ -343,79 +350,106 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, | |||
343 | return ERR_PTR(-ENOMEM); | 350 | return ERR_PTR(-ENOMEM); |
344 | 351 | ||
345 | retry: | 352 | retry: |
346 | addr = ALIGN(vstart, align); | ||
347 | |||
348 | spin_lock(&vmap_area_lock); | 353 | spin_lock(&vmap_area_lock); |
349 | if (addr + size - 1 < addr) | 354 | /* |
350 | goto overflow; | 355 | * Invalidate cache if we have more permissive parameters. |
356 | * cached_hole_size notes the largest hole noticed _below_ | ||
357 | * the vmap_area cached in free_vmap_cache: if size fits | ||
358 | * into that hole, we want to scan from vstart to reuse | ||
359 | * the hole instead of allocating above free_vmap_cache. | ||
360 | * Note that __free_vmap_area may update free_vmap_cache | ||
361 | * without updating cached_hole_size or cached_align. | ||
362 | */ | ||
363 | if (!free_vmap_cache || | ||
364 | size < cached_hole_size || | ||
365 | vstart < cached_vstart || | ||
366 | align < cached_align) { | ||
367 | nocache: | ||
368 | cached_hole_size = 0; | ||
369 | free_vmap_cache = NULL; | ||
370 | } | ||
371 | /* record if we encounter less permissive parameters */ | ||
372 | cached_vstart = vstart; | ||
373 | cached_align = align; | ||
374 | |||
375 | /* find starting point for our search */ | ||
376 | if (free_vmap_cache) { | ||
377 | first = rb_entry(free_vmap_cache, struct vmap_area, rb_node); | ||
378 | addr = ALIGN(first->va_end, align); | ||
379 | if (addr < vstart) | ||
380 | goto nocache; | ||
381 | if (addr + size - 1 < addr) | ||
382 | goto overflow; | ||
383 | |||
384 | } else { | ||
385 | addr = ALIGN(vstart, align); | ||
386 | if (addr + size - 1 < addr) | ||
387 | goto overflow; | ||
351 | 388 | ||
352 | /* XXX: could have a last_hole cache */ | 389 | n = vmap_area_root.rb_node; |
353 | n = vmap_area_root.rb_node; | 390 | first = NULL; |
354 | if (n) { | ||
355 | struct vmap_area *first = NULL; | ||
356 | 391 | ||
357 | do { | 392 | while (n) { |
358 | struct vmap_area *tmp; | 393 | struct vmap_area *tmp; |
359 | tmp = rb_entry(n, struct vmap_area, rb_node); | 394 | tmp = rb_entry(n, struct vmap_area, rb_node); |
360 | if (tmp->va_end >= addr) { | 395 | if (tmp->va_end >= addr) { |
361 | if (!first && tmp->va_start < addr + size) | ||
362 | first = tmp; | ||
363 | n = n->rb_left; | ||
364 | } else { | ||
365 | first = tmp; | 396 | first = tmp; |
397 | if (tmp->va_start <= addr) | ||
398 | break; | ||
399 | n = n->rb_left; | ||
400 | } else | ||
366 | n = n->rb_right; | 401 | n = n->rb_right; |
367 | } | 402 | } |
368 | } while (n); | ||
369 | 403 | ||
370 | if (!first) | 404 | if (!first) |
371 | goto found; | 405 | goto found; |
372 | |||
373 | if (first->va_end < addr) { | ||
374 | n = rb_next(&first->rb_node); | ||
375 | if (n) | ||
376 | first = rb_entry(n, struct vmap_area, rb_node); | ||
377 | else | ||
378 | goto found; | ||
379 | } | ||
380 | |||
381 | while (addr + size > first->va_start && addr + size <= vend) { | ||
382 | addr = ALIGN(first->va_end + PAGE_SIZE, align); | ||
383 | if (addr + size - 1 < addr) | ||
384 | goto overflow; | ||
385 | |||
386 | n = rb_next(&first->rb_node); | ||
387 | if (n) | ||
388 | first = rb_entry(n, struct vmap_area, rb_node); | ||
389 | else | ||
390 | goto found; | ||
391 | } | ||
392 | } | 406 | } |
393 | found: | 407 | |
394 | if (addr + size > vend) { | 408 | /* from the starting point, walk areas until a suitable hole is found */ |
395 | overflow: | 409 | while (addr + size > first->va_start && addr + size <= vend) { |
396 | spin_unlock(&vmap_area_lock); | 410 | if (addr + cached_hole_size < first->va_start) |
397 | if (!purged) { | 411 | cached_hole_size = first->va_start - addr; |
398 | purge_vmap_area_lazy(); | 412 | addr = ALIGN(first->va_end, align); |
399 | purged = 1; | 413 | if (addr + size - 1 < addr) |
400 | goto retry; | 414 | goto overflow; |
401 | } | 415 | |
402 | if (printk_ratelimit()) | 416 | n = rb_next(&first->rb_node); |
403 | printk(KERN_WARNING | 417 | if (n) |
404 | "vmap allocation for size %lu failed: " | 418 | first = rb_entry(n, struct vmap_area, rb_node); |
405 | "use vmalloc=<size> to increase size.\n", size); | 419 | else |
406 | kfree(va); | 420 | goto found; |
407 | return ERR_PTR(-EBUSY); | ||
408 | } | 421 | } |
409 | 422 | ||
410 | BUG_ON(addr & (align-1)); | 423 | found: |
424 | if (addr + size > vend) | ||
425 | goto overflow; | ||
411 | 426 | ||
412 | va->va_start = addr; | 427 | va->va_start = addr; |
413 | va->va_end = addr + size; | 428 | va->va_end = addr + size; |
414 | va->flags = 0; | 429 | va->flags = 0; |
415 | __insert_vmap_area(va); | 430 | __insert_vmap_area(va); |
431 | free_vmap_cache = &va->rb_node; | ||
416 | spin_unlock(&vmap_area_lock); | 432 | spin_unlock(&vmap_area_lock); |
417 | 433 | ||
434 | BUG_ON(va->va_start & (align-1)); | ||
435 | BUG_ON(va->va_start < vstart); | ||
436 | BUG_ON(va->va_end > vend); | ||
437 | |||
418 | return va; | 438 | return va; |
439 | |||
440 | overflow: | ||
441 | spin_unlock(&vmap_area_lock); | ||
442 | if (!purged) { | ||
443 | purge_vmap_area_lazy(); | ||
444 | purged = 1; | ||
445 | goto retry; | ||
446 | } | ||
447 | if (printk_ratelimit()) | ||
448 | printk(KERN_WARNING | ||
449 | "vmap allocation for size %lu failed: " | ||
450 | "use vmalloc=<size> to increase size.\n", size); | ||
451 | kfree(va); | ||
452 | return ERR_PTR(-EBUSY); | ||
419 | } | 453 | } |
420 | 454 | ||
421 | static void rcu_free_va(struct rcu_head *head) | 455 | static void rcu_free_va(struct rcu_head *head) |
@@ -428,6 +462,22 @@ static void rcu_free_va(struct rcu_head *head) | |||
428 | static void __free_vmap_area(struct vmap_area *va) | 462 | static void __free_vmap_area(struct vmap_area *va) |
429 | { | 463 | { |
430 | BUG_ON(RB_EMPTY_NODE(&va->rb_node)); | 464 | BUG_ON(RB_EMPTY_NODE(&va->rb_node)); |
465 | |||
466 | if (free_vmap_cache) { | ||
467 | if (va->va_end < cached_vstart) { | ||
468 | free_vmap_cache = NULL; | ||
469 | } else { | ||
470 | struct vmap_area *cache; | ||
471 | cache = rb_entry(free_vmap_cache, struct vmap_area, rb_node); | ||
472 | if (va->va_start <= cache->va_start) { | ||
473 | free_vmap_cache = rb_prev(&va->rb_node); | ||
474 | /* | ||
475 | * We don't try to update cached_hole_size or | ||
476 | * cached_align, but it won't go very wrong. | ||
477 | */ | ||
478 | } | ||
479 | } | ||
480 | } | ||
431 | rb_erase(&va->rb_node, &vmap_area_root); | 481 | rb_erase(&va->rb_node, &vmap_area_root); |
432 | RB_CLEAR_NODE(&va->rb_node); | 482 | RB_CLEAR_NODE(&va->rb_node); |
433 | list_del_rcu(&va->list); | 483 | list_del_rcu(&va->list); |
@@ -503,9 +553,6 @@ static unsigned long lazy_max_pages(void) | |||
503 | { | 553 | { |
504 | unsigned int log; | 554 | unsigned int log; |
505 | 555 | ||
506 | if (!vmap_lazy_unmap) | ||
507 | return 0; | ||
508 | |||
509 | log = fls(num_online_cpus()); | 556 | log = fls(num_online_cpus()); |
510 | 557 | ||
511 | return log * (32UL * 1024 * 1024 / PAGE_SIZE); | 558 | return log * (32UL * 1024 * 1024 / PAGE_SIZE); |
@@ -517,6 +564,15 @@ static atomic_t vmap_lazy_nr = ATOMIC_INIT(0); | |||
517 | static void purge_fragmented_blocks_allcpus(void); | 564 | static void purge_fragmented_blocks_allcpus(void); |
518 | 565 | ||
519 | /* | 566 | /* |
567 | * called before a call to iounmap() if the caller wants vm_area_struct's | ||
568 | * immediately freed. | ||
569 | */ | ||
570 | void set_iounmap_nonlazy(void) | ||
571 | { | ||
572 | atomic_set(&vmap_lazy_nr, lazy_max_pages()+1); | ||
573 | } | ||
574 | |||
575 | /* | ||
520 | * Purges all lazily-freed vmap areas. | 576 | * Purges all lazily-freed vmap areas. |
521 | * | 577 | * |
522 | * If sync is 0 then don't purge if there is already a purge in progress. | 578 | * If sync is 0 then don't purge if there is already a purge in progress. |
@@ -557,7 +613,6 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, | |||
557 | if (va->va_end > *end) | 613 | if (va->va_end > *end) |
558 | *end = va->va_end; | 614 | *end = va->va_end; |
559 | nr += (va->va_end - va->va_start) >> PAGE_SHIFT; | 615 | nr += (va->va_end - va->va_start) >> PAGE_SHIFT; |
560 | unmap_vmap_area(va); | ||
561 | list_add_tail(&va->purge_list, &valist); | 616 | list_add_tail(&va->purge_list, &valist); |
562 | va->flags |= VM_LAZY_FREEING; | 617 | va->flags |= VM_LAZY_FREEING; |
563 | va->flags &= ~VM_LAZY_FREE; | 618 | va->flags &= ~VM_LAZY_FREE; |
@@ -602,10 +657,11 @@ static void purge_vmap_area_lazy(void) | |||
602 | } | 657 | } |
603 | 658 | ||
604 | /* | 659 | /* |
605 | * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been | 660 | * Free a vmap area, caller ensuring that the area has been unmapped |
606 | * called for the correct range previously. | 661 | * and flush_cache_vunmap had been called for the correct range |
662 | * previously. | ||
607 | */ | 663 | */ |
608 | static void free_unmap_vmap_area_noflush(struct vmap_area *va) | 664 | static void free_vmap_area_noflush(struct vmap_area *va) |
609 | { | 665 | { |
610 | va->flags |= VM_LAZY_FREE; | 666 | va->flags |= VM_LAZY_FREE; |
611 | atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr); | 667 | atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr); |
@@ -614,6 +670,16 @@ static void free_unmap_vmap_area_noflush(struct vmap_area *va) | |||
614 | } | 670 | } |
615 | 671 | ||
616 | /* | 672 | /* |
673 | * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been | ||
674 | * called for the correct range previously. | ||
675 | */ | ||
676 | static void free_unmap_vmap_area_noflush(struct vmap_area *va) | ||
677 | { | ||
678 | unmap_vmap_area(va); | ||
679 | free_vmap_area_noflush(va); | ||
680 | } | ||
681 | |||
682 | /* | ||
617 | * Free and unmap a vmap area | 683 | * Free and unmap a vmap area |
618 | */ | 684 | */ |
619 | static void free_unmap_vmap_area(struct vmap_area *va) | 685 | static void free_unmap_vmap_area(struct vmap_area *va) |
@@ -734,7 +800,7 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask) | |||
734 | va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE, | 800 | va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE, |
735 | VMALLOC_START, VMALLOC_END, | 801 | VMALLOC_START, VMALLOC_END, |
736 | node, gfp_mask); | 802 | node, gfp_mask); |
737 | if (unlikely(IS_ERR(va))) { | 803 | if (IS_ERR(va)) { |
738 | kfree(vb); | 804 | kfree(vb); |
739 | return ERR_CAST(va); | 805 | return ERR_CAST(va); |
740 | } | 806 | } |
@@ -789,7 +855,7 @@ static void free_vmap_block(struct vmap_block *vb) | |||
789 | spin_unlock(&vmap_block_tree_lock); | 855 | spin_unlock(&vmap_block_tree_lock); |
790 | BUG_ON(tmp != vb); | 856 | BUG_ON(tmp != vb); |
791 | 857 | ||
792 | free_unmap_vmap_area_noflush(vb->va); | 858 | free_vmap_area_noflush(vb->va); |
793 | call_rcu(&vb->rcu_head, rcu_free_vb); | 859 | call_rcu(&vb->rcu_head, rcu_free_vb); |
794 | } | 860 | } |
795 | 861 | ||
@@ -927,6 +993,8 @@ static void vb_free(const void *addr, unsigned long size) | |||
927 | rcu_read_unlock(); | 993 | rcu_read_unlock(); |
928 | BUG_ON(!vb); | 994 | BUG_ON(!vb); |
929 | 995 | ||
996 | vunmap_page_range((unsigned long)addr, (unsigned long)addr + size); | ||
997 | |||
930 | spin_lock(&vb->lock); | 998 | spin_lock(&vb->lock); |
931 | BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order)); | 999 | BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order)); |
932 | 1000 | ||
@@ -979,7 +1047,6 @@ void vm_unmap_aliases(void) | |||
979 | 1047 | ||
980 | s = vb->va->va_start + (i << PAGE_SHIFT); | 1048 | s = vb->va->va_start + (i << PAGE_SHIFT); |
981 | e = vb->va->va_start + (j << PAGE_SHIFT); | 1049 | e = vb->va->va_start + (j << PAGE_SHIFT); |
982 | vunmap_page_range(s, e); | ||
983 | flush = 1; | 1050 | flush = 1; |
984 | 1051 | ||
985 | if (s < start) | 1052 | if (s < start) |
@@ -1160,6 +1227,7 @@ void unmap_kernel_range_noflush(unsigned long addr, unsigned long size) | |||
1160 | { | 1227 | { |
1161 | vunmap_page_range(addr, addr + size); | 1228 | vunmap_page_range(addr, addr + size); |
1162 | } | 1229 | } |
1230 | EXPORT_SYMBOL_GPL(unmap_kernel_range_noflush); | ||
1163 | 1231 | ||
1164 | /** | 1232 | /** |
1165 | * unmap_kernel_range - unmap kernel VM area and flush cache and TLB | 1233 | * unmap_kernel_range - unmap kernel VM area and flush cache and TLB |
@@ -1300,13 +1368,6 @@ struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags, | |||
1300 | -1, GFP_KERNEL, caller); | 1368 | -1, GFP_KERNEL, caller); |
1301 | } | 1369 | } |
1302 | 1370 | ||
1303 | struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags, | ||
1304 | int node, gfp_t gfp_mask) | ||
1305 | { | ||
1306 | return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END, | ||
1307 | node, gfp_mask, __builtin_return_address(0)); | ||
1308 | } | ||
1309 | |||
1310 | static struct vm_struct *find_vm_area(const void *addr) | 1371 | static struct vm_struct *find_vm_area(const void *addr) |
1311 | { | 1372 | { |
1312 | struct vmap_area *va; | 1373 | struct vmap_area *va; |
@@ -1473,6 +1534,7 @@ static void *__vmalloc_node(unsigned long size, unsigned long align, | |||
1473 | static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, | 1534 | static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, |
1474 | pgprot_t prot, int node, void *caller) | 1535 | pgprot_t prot, int node, void *caller) |
1475 | { | 1536 | { |
1537 | const int order = 0; | ||
1476 | struct page **pages; | 1538 | struct page **pages; |
1477 | unsigned int nr_pages, array_size, i; | 1539 | unsigned int nr_pages, array_size, i; |
1478 | gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; | 1540 | gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; |
@@ -1499,11 +1561,12 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, | |||
1499 | 1561 | ||
1500 | for (i = 0; i < area->nr_pages; i++) { | 1562 | for (i = 0; i < area->nr_pages; i++) { |
1501 | struct page *page; | 1563 | struct page *page; |
1564 | gfp_t tmp_mask = gfp_mask | __GFP_NOWARN; | ||
1502 | 1565 | ||
1503 | if (node < 0) | 1566 | if (node < 0) |
1504 | page = alloc_page(gfp_mask); | 1567 | page = alloc_page(tmp_mask); |
1505 | else | 1568 | else |
1506 | page = alloc_pages_node(node, gfp_mask, 0); | 1569 | page = alloc_pages_node(node, tmp_mask, order); |
1507 | 1570 | ||
1508 | if (unlikely(!page)) { | 1571 | if (unlikely(!page)) { |
1509 | /* Successfully allocated i pages, free them in __vunmap() */ | 1572 | /* Successfully allocated i pages, free them in __vunmap() */ |
@@ -1518,29 +1581,19 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, | |||
1518 | return area->addr; | 1581 | return area->addr; |
1519 | 1582 | ||
1520 | fail: | 1583 | fail: |
1584 | warn_alloc_failed(gfp_mask, order, "vmalloc: allocation failure, " | ||
1585 | "allocated %ld of %ld bytes\n", | ||
1586 | (area->nr_pages*PAGE_SIZE), area->size); | ||
1521 | vfree(area->addr); | 1587 | vfree(area->addr); |
1522 | return NULL; | 1588 | return NULL; |
1523 | } | 1589 | } |
1524 | 1590 | ||
1525 | void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot) | ||
1526 | { | ||
1527 | void *addr = __vmalloc_area_node(area, gfp_mask, prot, -1, | ||
1528 | __builtin_return_address(0)); | ||
1529 | |||
1530 | /* | ||
1531 | * A ref_count = 3 is needed because the vm_struct and vmap_area | ||
1532 | * structures allocated in the __get_vm_area_node() function contain | ||
1533 | * references to the virtual address of the vmalloc'ed block. | ||
1534 | */ | ||
1535 | kmemleak_alloc(addr, area->size - PAGE_SIZE, 3, gfp_mask); | ||
1536 | |||
1537 | return addr; | ||
1538 | } | ||
1539 | |||
1540 | /** | 1591 | /** |
1541 | * __vmalloc_node - allocate virtually contiguous memory | 1592 | * __vmalloc_node_range - allocate virtually contiguous memory |
1542 | * @size: allocation size | 1593 | * @size: allocation size |
1543 | * @align: desired alignment | 1594 | * @align: desired alignment |
1595 | * @start: vm area range start | ||
1596 | * @end: vm area range end | ||
1544 | * @gfp_mask: flags for the page level allocator | 1597 | * @gfp_mask: flags for the page level allocator |
1545 | * @prot: protection mask for the allocated pages | 1598 | * @prot: protection mask for the allocated pages |
1546 | * @node: node to use for allocation or -1 | 1599 | * @node: node to use for allocation or -1 |
@@ -1550,9 +1603,9 @@ void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot) | |||
1550 | * allocator with @gfp_mask flags. Map them into contiguous | 1603 | * allocator with @gfp_mask flags. Map them into contiguous |
1551 | * kernel virtual space, using a pagetable protection of @prot. | 1604 | * kernel virtual space, using a pagetable protection of @prot. |
1552 | */ | 1605 | */ |
1553 | static void *__vmalloc_node(unsigned long size, unsigned long align, | 1606 | void *__vmalloc_node_range(unsigned long size, unsigned long align, |
1554 | gfp_t gfp_mask, pgprot_t prot, | 1607 | unsigned long start, unsigned long end, gfp_t gfp_mask, |
1555 | int node, void *caller) | 1608 | pgprot_t prot, int node, void *caller) |
1556 | { | 1609 | { |
1557 | struct vm_struct *area; | 1610 | struct vm_struct *area; |
1558 | void *addr; | 1611 | void *addr; |
@@ -1562,8 +1615,8 @@ static void *__vmalloc_node(unsigned long size, unsigned long align, | |||
1562 | if (!size || (size >> PAGE_SHIFT) > totalram_pages) | 1615 | if (!size || (size >> PAGE_SHIFT) > totalram_pages) |
1563 | return NULL; | 1616 | return NULL; |
1564 | 1617 | ||
1565 | area = __get_vm_area_node(size, align, VM_ALLOC, VMALLOC_START, | 1618 | area = __get_vm_area_node(size, align, VM_ALLOC, start, end, node, |
1566 | VMALLOC_END, node, gfp_mask, caller); | 1619 | gfp_mask, caller); |
1567 | 1620 | ||
1568 | if (!area) | 1621 | if (!area) |
1569 | return NULL; | 1622 | return NULL; |
@@ -1580,6 +1633,27 @@ static void *__vmalloc_node(unsigned long size, unsigned long align, | |||
1580 | return addr; | 1633 | return addr; |
1581 | } | 1634 | } |
1582 | 1635 | ||
1636 | /** | ||
1637 | * __vmalloc_node - allocate virtually contiguous memory | ||
1638 | * @size: allocation size | ||
1639 | * @align: desired alignment | ||
1640 | * @gfp_mask: flags for the page level allocator | ||
1641 | * @prot: protection mask for the allocated pages | ||
1642 | * @node: node to use for allocation or -1 | ||
1643 | * @caller: caller's return address | ||
1644 | * | ||
1645 | * Allocate enough pages to cover @size from the page level | ||
1646 | * allocator with @gfp_mask flags. Map them into contiguous | ||
1647 | * kernel virtual space, using a pagetable protection of @prot. | ||
1648 | */ | ||
1649 | static void *__vmalloc_node(unsigned long size, unsigned long align, | ||
1650 | gfp_t gfp_mask, pgprot_t prot, | ||
1651 | int node, void *caller) | ||
1652 | { | ||
1653 | return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, | ||
1654 | gfp_mask, prot, node, caller); | ||
1655 | } | ||
1656 | |||
1583 | void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) | 1657 | void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) |
1584 | { | 1658 | { |
1585 | return __vmalloc_node(size, 1, gfp_mask, prot, -1, | 1659 | return __vmalloc_node(size, 1, gfp_mask, prot, -1, |
@@ -1587,6 +1661,13 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) | |||
1587 | } | 1661 | } |
1588 | EXPORT_SYMBOL(__vmalloc); | 1662 | EXPORT_SYMBOL(__vmalloc); |
1589 | 1663 | ||
1664 | static inline void *__vmalloc_node_flags(unsigned long size, | ||
1665 | int node, gfp_t flags) | ||
1666 | { | ||
1667 | return __vmalloc_node(size, 1, flags, PAGE_KERNEL, | ||
1668 | node, __builtin_return_address(0)); | ||
1669 | } | ||
1670 | |||
1590 | /** | 1671 | /** |
1591 | * vmalloc - allocate virtually contiguous memory | 1672 | * vmalloc - allocate virtually contiguous memory |
1592 | * @size: allocation size | 1673 | * @size: allocation size |
@@ -1598,12 +1679,28 @@ EXPORT_SYMBOL(__vmalloc); | |||
1598 | */ | 1679 | */ |
1599 | void *vmalloc(unsigned long size) | 1680 | void *vmalloc(unsigned long size) |
1600 | { | 1681 | { |
1601 | return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL, | 1682 | return __vmalloc_node_flags(size, -1, GFP_KERNEL | __GFP_HIGHMEM); |
1602 | -1, __builtin_return_address(0)); | ||
1603 | } | 1683 | } |
1604 | EXPORT_SYMBOL(vmalloc); | 1684 | EXPORT_SYMBOL(vmalloc); |
1605 | 1685 | ||
1606 | /** | 1686 | /** |
1687 | * vzalloc - allocate virtually contiguous memory with zero fill | ||
1688 | * @size: allocation size | ||
1689 | * Allocate enough pages to cover @size from the page level | ||
1690 | * allocator and map them into contiguous kernel virtual space. | ||
1691 | * The memory allocated is set to zero. | ||
1692 | * | ||
1693 | * For tight control over page level allocator and protection flags | ||
1694 | * use __vmalloc() instead. | ||
1695 | */ | ||
1696 | void *vzalloc(unsigned long size) | ||
1697 | { | ||
1698 | return __vmalloc_node_flags(size, -1, | ||
1699 | GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); | ||
1700 | } | ||
1701 | EXPORT_SYMBOL(vzalloc); | ||
1702 | |||
1703 | /** | ||
1607 | * vmalloc_user - allocate zeroed virtually contiguous memory for userspace | 1704 | * vmalloc_user - allocate zeroed virtually contiguous memory for userspace |
1608 | * @size: allocation size | 1705 | * @size: allocation size |
1609 | * | 1706 | * |
@@ -1644,6 +1741,25 @@ void *vmalloc_node(unsigned long size, int node) | |||
1644 | } | 1741 | } |
1645 | EXPORT_SYMBOL(vmalloc_node); | 1742 | EXPORT_SYMBOL(vmalloc_node); |
1646 | 1743 | ||
1744 | /** | ||
1745 | * vzalloc_node - allocate memory on a specific node with zero fill | ||
1746 | * @size: allocation size | ||
1747 | * @node: numa node | ||
1748 | * | ||
1749 | * Allocate enough pages to cover @size from the page level | ||
1750 | * allocator and map them into contiguous kernel virtual space. | ||
1751 | * The memory allocated is set to zero. | ||
1752 | * | ||
1753 | * For tight control over page level allocator and protection flags | ||
1754 | * use __vmalloc_node() instead. | ||
1755 | */ | ||
1756 | void *vzalloc_node(unsigned long size, int node) | ||
1757 | { | ||
1758 | return __vmalloc_node_flags(size, node, | ||
1759 | GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); | ||
1760 | } | ||
1761 | EXPORT_SYMBOL(vzalloc_node); | ||
1762 | |||
1647 | #ifndef PAGE_KERNEL_EXEC | 1763 | #ifndef PAGE_KERNEL_EXEC |
1648 | # define PAGE_KERNEL_EXEC PAGE_KERNEL | 1764 | # define PAGE_KERNEL_EXEC PAGE_KERNEL |
1649 | #endif | 1765 | #endif |
@@ -1892,8 +2008,6 @@ finished: | |||
1892 | * should know vmalloc() area is valid and can use memcpy(). | 2008 | * should know vmalloc() area is valid and can use memcpy(). |
1893 | * This is for routines which have to access vmalloc area without | 2009 | * This is for routines which have to access vmalloc area without |
1894 | * any informaion, as /dev/kmem. | 2010 | * any informaion, as /dev/kmem. |
1895 | * | ||
1896 | * The caller should guarantee KM_USER1 is not used. | ||
1897 | */ | 2011 | */ |
1898 | 2012 | ||
1899 | long vwrite(char *buf, char *addr, unsigned long count) | 2013 | long vwrite(char *buf, char *addr, unsigned long count) |
@@ -2039,10 +2153,6 @@ struct vm_struct *alloc_vm_area(size_t size) | |||
2039 | return NULL; | 2153 | return NULL; |
2040 | } | 2154 | } |
2041 | 2155 | ||
2042 | /* Make sure the pagetables are constructed in process kernel | ||
2043 | mappings */ | ||
2044 | vmalloc_sync_all(); | ||
2045 | |||
2046 | return area; | 2156 | return area; |
2047 | } | 2157 | } |
2048 | EXPORT_SYMBOL_GPL(alloc_vm_area); | 2158 | EXPORT_SYMBOL_GPL(alloc_vm_area); |
@@ -2056,6 +2166,7 @@ void free_vm_area(struct vm_struct *area) | |||
2056 | } | 2166 | } |
2057 | EXPORT_SYMBOL_GPL(free_vm_area); | 2167 | EXPORT_SYMBOL_GPL(free_vm_area); |
2058 | 2168 | ||
2169 | #ifdef CONFIG_SMP | ||
2059 | static struct vmap_area *node_to_va(struct rb_node *n) | 2170 | static struct vmap_area *node_to_va(struct rb_node *n) |
2060 | { | 2171 | { |
2061 | return n ? rb_entry(n, struct vmap_area, rb_node) : NULL; | 2172 | return n ? rb_entry(n, struct vmap_area, rb_node) : NULL; |
@@ -2145,17 +2256,16 @@ static unsigned long pvm_determine_end(struct vmap_area **pnext, | |||
2145 | * @sizes: array containing size of each area | 2256 | * @sizes: array containing size of each area |
2146 | * @nr_vms: the number of areas to allocate | 2257 | * @nr_vms: the number of areas to allocate |
2147 | * @align: alignment, all entries in @offsets and @sizes must be aligned to this | 2258 | * @align: alignment, all entries in @offsets and @sizes must be aligned to this |
2148 | * @gfp_mask: allocation mask | ||
2149 | * | 2259 | * |
2150 | * Returns: kmalloc'd vm_struct pointer array pointing to allocated | 2260 | * Returns: kmalloc'd vm_struct pointer array pointing to allocated |
2151 | * vm_structs on success, %NULL on failure | 2261 | * vm_structs on success, %NULL on failure |
2152 | * | 2262 | * |
2153 | * Percpu allocator wants to use congruent vm areas so that it can | 2263 | * Percpu allocator wants to use congruent vm areas so that it can |
2154 | * maintain the offsets among percpu areas. This function allocates | 2264 | * maintain the offsets among percpu areas. This function allocates |
2155 | * congruent vmalloc areas for it. These areas tend to be scattered | 2265 | * congruent vmalloc areas for it with GFP_KERNEL. These areas tend to |
2156 | * pretty far, distance between two areas easily going up to | 2266 | * be scattered pretty far, distance between two areas easily going up |
2157 | * gigabytes. To avoid interacting with regular vmallocs, these areas | 2267 | * to gigabytes. To avoid interacting with regular vmallocs, these |
2158 | * are allocated from top. | 2268 | * areas are allocated from top. |
2159 | * | 2269 | * |
2160 | * Despite its complicated look, this allocator is rather simple. It | 2270 | * Despite its complicated look, this allocator is rather simple. It |
2161 | * does everything top-down and scans areas from the end looking for | 2271 | * does everything top-down and scans areas from the end looking for |
@@ -2166,7 +2276,7 @@ static unsigned long pvm_determine_end(struct vmap_area **pnext, | |||
2166 | */ | 2276 | */ |
2167 | struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, | 2277 | struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, |
2168 | const size_t *sizes, int nr_vms, | 2278 | const size_t *sizes, int nr_vms, |
2169 | size_t align, gfp_t gfp_mask) | 2279 | size_t align) |
2170 | { | 2280 | { |
2171 | const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align); | 2281 | const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align); |
2172 | const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); | 2282 | const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); |
@@ -2176,8 +2286,6 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, | |||
2176 | unsigned long base, start, end, last_end; | 2286 | unsigned long base, start, end, last_end; |
2177 | bool purged = false; | 2287 | bool purged = false; |
2178 | 2288 | ||
2179 | gfp_mask &= GFP_RECLAIM_MASK; | ||
2180 | |||
2181 | /* verify parameters and allocate data structures */ | 2289 | /* verify parameters and allocate data structures */ |
2182 | BUG_ON(align & ~PAGE_MASK || !is_power_of_2(align)); | 2290 | BUG_ON(align & ~PAGE_MASK || !is_power_of_2(align)); |
2183 | for (last_area = 0, area = 0; area < nr_vms; area++) { | 2291 | for (last_area = 0, area = 0; area < nr_vms; area++) { |
@@ -2210,14 +2318,14 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, | |||
2210 | return NULL; | 2318 | return NULL; |
2211 | } | 2319 | } |
2212 | 2320 | ||
2213 | vms = kzalloc(sizeof(vms[0]) * nr_vms, gfp_mask); | 2321 | vms = kzalloc(sizeof(vms[0]) * nr_vms, GFP_KERNEL); |
2214 | vas = kzalloc(sizeof(vas[0]) * nr_vms, gfp_mask); | 2322 | vas = kzalloc(sizeof(vas[0]) * nr_vms, GFP_KERNEL); |
2215 | if (!vas || !vms) | 2323 | if (!vas || !vms) |
2216 | goto err_free; | 2324 | goto err_free; |
2217 | 2325 | ||
2218 | for (area = 0; area < nr_vms; area++) { | 2326 | for (area = 0; area < nr_vms; area++) { |
2219 | vas[area] = kzalloc(sizeof(struct vmap_area), gfp_mask); | 2327 | vas[area] = kzalloc(sizeof(struct vmap_area), GFP_KERNEL); |
2220 | vms[area] = kzalloc(sizeof(struct vm_struct), gfp_mask); | 2328 | vms[area] = kzalloc(sizeof(struct vm_struct), GFP_KERNEL); |
2221 | if (!vas[area] || !vms[area]) | 2329 | if (!vas[area] || !vms[area]) |
2222 | goto err_free; | 2330 | goto err_free; |
2223 | } | 2331 | } |
@@ -2336,9 +2444,11 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) | |||
2336 | free_vm_area(vms[i]); | 2444 | free_vm_area(vms[i]); |
2337 | kfree(vms); | 2445 | kfree(vms); |
2338 | } | 2446 | } |
2447 | #endif /* CONFIG_SMP */ | ||
2339 | 2448 | ||
2340 | #ifdef CONFIG_PROC_FS | 2449 | #ifdef CONFIG_PROC_FS |
2341 | static void *s_start(struct seq_file *m, loff_t *pos) | 2450 | static void *s_start(struct seq_file *m, loff_t *pos) |
2451 | __acquires(&vmlist_lock) | ||
2342 | { | 2452 | { |
2343 | loff_t n = *pos; | 2453 | loff_t n = *pos; |
2344 | struct vm_struct *v; | 2454 | struct vm_struct *v; |
@@ -2365,6 +2475,7 @@ static void *s_next(struct seq_file *m, void *p, loff_t *pos) | |||
2365 | } | 2475 | } |
2366 | 2476 | ||
2367 | static void s_stop(struct seq_file *m, void *p) | 2477 | static void s_stop(struct seq_file *m, void *p) |
2478 | __releases(&vmlist_lock) | ||
2368 | { | 2479 | { |
2369 | read_unlock(&vmlist_lock); | 2480 | read_unlock(&vmlist_lock); |
2370 | } | 2481 | } |
@@ -2395,13 +2506,8 @@ static int s_show(struct seq_file *m, void *p) | |||
2395 | seq_printf(m, "0x%p-0x%p %7ld", | 2506 | seq_printf(m, "0x%p-0x%p %7ld", |
2396 | v->addr, v->addr + v->size, v->size); | 2507 | v->addr, v->addr + v->size, v->size); |
2397 | 2508 | ||
2398 | if (v->caller) { | 2509 | if (v->caller) |
2399 | char buff[KSYM_SYMBOL_LEN]; | 2510 | seq_printf(m, " %pS", v->caller); |
2400 | |||
2401 | seq_putc(m, ' '); | ||
2402 | sprint_symbol(buff, (unsigned long)v->caller); | ||
2403 | seq_puts(m, buff); | ||
2404 | } | ||
2405 | 2511 | ||
2406 | if (v->nr_pages) | 2512 | if (v->nr_pages) |
2407 | seq_printf(m, " pages=%d", v->nr_pages); | 2513 | seq_printf(m, " pages=%d", v->nr_pages); |