aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmalloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmalloc.c')
-rw-r--r--mm/vmalloc.c356
1 files changed, 231 insertions, 125 deletions
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 6b8889da69a6..1d34d75366a7 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -31,8 +31,6 @@
31#include <asm/tlbflush.h> 31#include <asm/tlbflush.h>
32#include <asm/shmparam.h> 32#include <asm/shmparam.h>
33 33
34bool vmap_lazy_unmap __read_mostly = true;
35
36/*** Page table manipulation functions ***/ 34/*** Page table manipulation functions ***/
37 35
38static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) 36static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
@@ -263,8 +261,15 @@ struct vmap_area {
263}; 261};
264 262
265static DEFINE_SPINLOCK(vmap_area_lock); 263static DEFINE_SPINLOCK(vmap_area_lock);
266static struct rb_root vmap_area_root = RB_ROOT;
267static LIST_HEAD(vmap_area_list); 264static LIST_HEAD(vmap_area_list);
265static struct rb_root vmap_area_root = RB_ROOT;
266
267/* The vmap cache globals are protected by vmap_area_lock */
268static struct rb_node *free_vmap_cache;
269static unsigned long cached_hole_size;
270static unsigned long cached_vstart;
271static unsigned long cached_align;
272
268static unsigned long vmap_area_pcpu_hole; 273static unsigned long vmap_area_pcpu_hole;
269 274
270static struct vmap_area *__find_vmap_area(unsigned long addr) 275static struct vmap_area *__find_vmap_area(unsigned long addr)
@@ -293,13 +298,13 @@ static void __insert_vmap_area(struct vmap_area *va)
293 struct rb_node *tmp; 298 struct rb_node *tmp;
294 299
295 while (*p) { 300 while (*p) {
296 struct vmap_area *tmp; 301 struct vmap_area *tmp_va;
297 302
298 parent = *p; 303 parent = *p;
299 tmp = rb_entry(parent, struct vmap_area, rb_node); 304 tmp_va = rb_entry(parent, struct vmap_area, rb_node);
300 if (va->va_start < tmp->va_end) 305 if (va->va_start < tmp_va->va_end)
301 p = &(*p)->rb_left; 306 p = &(*p)->rb_left;
302 else if (va->va_end > tmp->va_start) 307 else if (va->va_end > tmp_va->va_start)
303 p = &(*p)->rb_right; 308 p = &(*p)->rb_right;
304 else 309 else
305 BUG(); 310 BUG();
@@ -333,9 +338,11 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
333 struct rb_node *n; 338 struct rb_node *n;
334 unsigned long addr; 339 unsigned long addr;
335 int purged = 0; 340 int purged = 0;
341 struct vmap_area *first;
336 342
337 BUG_ON(!size); 343 BUG_ON(!size);
338 BUG_ON(size & ~PAGE_MASK); 344 BUG_ON(size & ~PAGE_MASK);
345 BUG_ON(!is_power_of_2(align));
339 346
340 va = kmalloc_node(sizeof(struct vmap_area), 347 va = kmalloc_node(sizeof(struct vmap_area),
341 gfp_mask & GFP_RECLAIM_MASK, node); 348 gfp_mask & GFP_RECLAIM_MASK, node);
@@ -343,79 +350,106 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
343 return ERR_PTR(-ENOMEM); 350 return ERR_PTR(-ENOMEM);
344 351
345retry: 352retry:
346 addr = ALIGN(vstart, align);
347
348 spin_lock(&vmap_area_lock); 353 spin_lock(&vmap_area_lock);
349 if (addr + size - 1 < addr) 354 /*
350 goto overflow; 355 * Invalidate cache if we have more permissive parameters.
356 * cached_hole_size notes the largest hole noticed _below_
357 * the vmap_area cached in free_vmap_cache: if size fits
358 * into that hole, we want to scan from vstart to reuse
359 * the hole instead of allocating above free_vmap_cache.
360 * Note that __free_vmap_area may update free_vmap_cache
361 * without updating cached_hole_size or cached_align.
362 */
363 if (!free_vmap_cache ||
364 size < cached_hole_size ||
365 vstart < cached_vstart ||
366 align < cached_align) {
367nocache:
368 cached_hole_size = 0;
369 free_vmap_cache = NULL;
370 }
371 /* record if we encounter less permissive parameters */
372 cached_vstart = vstart;
373 cached_align = align;
374
375 /* find starting point for our search */
376 if (free_vmap_cache) {
377 first = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
378 addr = ALIGN(first->va_end, align);
379 if (addr < vstart)
380 goto nocache;
381 if (addr + size - 1 < addr)
382 goto overflow;
383
384 } else {
385 addr = ALIGN(vstart, align);
386 if (addr + size - 1 < addr)
387 goto overflow;
351 388
352 /* XXX: could have a last_hole cache */ 389 n = vmap_area_root.rb_node;
353 n = vmap_area_root.rb_node; 390 first = NULL;
354 if (n) {
355 struct vmap_area *first = NULL;
356 391
357 do { 392 while (n) {
358 struct vmap_area *tmp; 393 struct vmap_area *tmp;
359 tmp = rb_entry(n, struct vmap_area, rb_node); 394 tmp = rb_entry(n, struct vmap_area, rb_node);
360 if (tmp->va_end >= addr) { 395 if (tmp->va_end >= addr) {
361 if (!first && tmp->va_start < addr + size)
362 first = tmp;
363 n = n->rb_left;
364 } else {
365 first = tmp; 396 first = tmp;
397 if (tmp->va_start <= addr)
398 break;
399 n = n->rb_left;
400 } else
366 n = n->rb_right; 401 n = n->rb_right;
367 } 402 }
368 } while (n);
369 403
370 if (!first) 404 if (!first)
371 goto found; 405 goto found;
372
373 if (first->va_end < addr) {
374 n = rb_next(&first->rb_node);
375 if (n)
376 first = rb_entry(n, struct vmap_area, rb_node);
377 else
378 goto found;
379 }
380
381 while (addr + size > first->va_start && addr + size <= vend) {
382 addr = ALIGN(first->va_end + PAGE_SIZE, align);
383 if (addr + size - 1 < addr)
384 goto overflow;
385
386 n = rb_next(&first->rb_node);
387 if (n)
388 first = rb_entry(n, struct vmap_area, rb_node);
389 else
390 goto found;
391 }
392 } 406 }
393found: 407
394 if (addr + size > vend) { 408 /* from the starting point, walk areas until a suitable hole is found */
395overflow: 409 while (addr + size > first->va_start && addr + size <= vend) {
396 spin_unlock(&vmap_area_lock); 410 if (addr + cached_hole_size < first->va_start)
397 if (!purged) { 411 cached_hole_size = first->va_start - addr;
398 purge_vmap_area_lazy(); 412 addr = ALIGN(first->va_end, align);
399 purged = 1; 413 if (addr + size - 1 < addr)
400 goto retry; 414 goto overflow;
401 } 415
402 if (printk_ratelimit()) 416 n = rb_next(&first->rb_node);
403 printk(KERN_WARNING 417 if (n)
404 "vmap allocation for size %lu failed: " 418 first = rb_entry(n, struct vmap_area, rb_node);
405 "use vmalloc=<size> to increase size.\n", size); 419 else
406 kfree(va); 420 goto found;
407 return ERR_PTR(-EBUSY);
408 } 421 }
409 422
410 BUG_ON(addr & (align-1)); 423found:
424 if (addr + size > vend)
425 goto overflow;
411 426
412 va->va_start = addr; 427 va->va_start = addr;
413 va->va_end = addr + size; 428 va->va_end = addr + size;
414 va->flags = 0; 429 va->flags = 0;
415 __insert_vmap_area(va); 430 __insert_vmap_area(va);
431 free_vmap_cache = &va->rb_node;
416 spin_unlock(&vmap_area_lock); 432 spin_unlock(&vmap_area_lock);
417 433
434 BUG_ON(va->va_start & (align-1));
435 BUG_ON(va->va_start < vstart);
436 BUG_ON(va->va_end > vend);
437
418 return va; 438 return va;
439
440overflow:
441 spin_unlock(&vmap_area_lock);
442 if (!purged) {
443 purge_vmap_area_lazy();
444 purged = 1;
445 goto retry;
446 }
447 if (printk_ratelimit())
448 printk(KERN_WARNING
449 "vmap allocation for size %lu failed: "
450 "use vmalloc=<size> to increase size.\n", size);
451 kfree(va);
452 return ERR_PTR(-EBUSY);
419} 453}
420 454
421static void rcu_free_va(struct rcu_head *head) 455static void rcu_free_va(struct rcu_head *head)
@@ -428,6 +462,22 @@ static void rcu_free_va(struct rcu_head *head)
428static void __free_vmap_area(struct vmap_area *va) 462static void __free_vmap_area(struct vmap_area *va)
429{ 463{
430 BUG_ON(RB_EMPTY_NODE(&va->rb_node)); 464 BUG_ON(RB_EMPTY_NODE(&va->rb_node));
465
466 if (free_vmap_cache) {
467 if (va->va_end < cached_vstart) {
468 free_vmap_cache = NULL;
469 } else {
470 struct vmap_area *cache;
471 cache = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
472 if (va->va_start <= cache->va_start) {
473 free_vmap_cache = rb_prev(&va->rb_node);
474 /*
475 * We don't try to update cached_hole_size or
476 * cached_align, but it won't go very wrong.
477 */
478 }
479 }
480 }
431 rb_erase(&va->rb_node, &vmap_area_root); 481 rb_erase(&va->rb_node, &vmap_area_root);
432 RB_CLEAR_NODE(&va->rb_node); 482 RB_CLEAR_NODE(&va->rb_node);
433 list_del_rcu(&va->list); 483 list_del_rcu(&va->list);
@@ -503,9 +553,6 @@ static unsigned long lazy_max_pages(void)
503{ 553{
504 unsigned int log; 554 unsigned int log;
505 555
506 if (!vmap_lazy_unmap)
507 return 0;
508
509 log = fls(num_online_cpus()); 556 log = fls(num_online_cpus());
510 557
511 return log * (32UL * 1024 * 1024 / PAGE_SIZE); 558 return log * (32UL * 1024 * 1024 / PAGE_SIZE);
@@ -517,6 +564,15 @@ static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
517static void purge_fragmented_blocks_allcpus(void); 564static void purge_fragmented_blocks_allcpus(void);
518 565
519/* 566/*
567 * called before a call to iounmap() if the caller wants vm_area_struct's
568 * immediately freed.
569 */
570void set_iounmap_nonlazy(void)
571{
572 atomic_set(&vmap_lazy_nr, lazy_max_pages()+1);
573}
574
575/*
520 * Purges all lazily-freed vmap areas. 576 * Purges all lazily-freed vmap areas.
521 * 577 *
522 * If sync is 0 then don't purge if there is already a purge in progress. 578 * If sync is 0 then don't purge if there is already a purge in progress.
@@ -557,7 +613,6 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
557 if (va->va_end > *end) 613 if (va->va_end > *end)
558 *end = va->va_end; 614 *end = va->va_end;
559 nr += (va->va_end - va->va_start) >> PAGE_SHIFT; 615 nr += (va->va_end - va->va_start) >> PAGE_SHIFT;
560 unmap_vmap_area(va);
561 list_add_tail(&va->purge_list, &valist); 616 list_add_tail(&va->purge_list, &valist);
562 va->flags |= VM_LAZY_FREEING; 617 va->flags |= VM_LAZY_FREEING;
563 va->flags &= ~VM_LAZY_FREE; 618 va->flags &= ~VM_LAZY_FREE;
@@ -602,10 +657,11 @@ static void purge_vmap_area_lazy(void)
602} 657}
603 658
604/* 659/*
605 * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been 660 * Free a vmap area, caller ensuring that the area has been unmapped
606 * called for the correct range previously. 661 * and flush_cache_vunmap had been called for the correct range
662 * previously.
607 */ 663 */
608static void free_unmap_vmap_area_noflush(struct vmap_area *va) 664static void free_vmap_area_noflush(struct vmap_area *va)
609{ 665{
610 va->flags |= VM_LAZY_FREE; 666 va->flags |= VM_LAZY_FREE;
611 atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr); 667 atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr);
@@ -614,6 +670,16 @@ static void free_unmap_vmap_area_noflush(struct vmap_area *va)
614} 670}
615 671
616/* 672/*
673 * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been
674 * called for the correct range previously.
675 */
676static void free_unmap_vmap_area_noflush(struct vmap_area *va)
677{
678 unmap_vmap_area(va);
679 free_vmap_area_noflush(va);
680}
681
682/*
617 * Free and unmap a vmap area 683 * Free and unmap a vmap area
618 */ 684 */
619static void free_unmap_vmap_area(struct vmap_area *va) 685static void free_unmap_vmap_area(struct vmap_area *va)
@@ -734,7 +800,7 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
734 va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE, 800 va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
735 VMALLOC_START, VMALLOC_END, 801 VMALLOC_START, VMALLOC_END,
736 node, gfp_mask); 802 node, gfp_mask);
737 if (unlikely(IS_ERR(va))) { 803 if (IS_ERR(va)) {
738 kfree(vb); 804 kfree(vb);
739 return ERR_CAST(va); 805 return ERR_CAST(va);
740 } 806 }
@@ -789,7 +855,7 @@ static void free_vmap_block(struct vmap_block *vb)
789 spin_unlock(&vmap_block_tree_lock); 855 spin_unlock(&vmap_block_tree_lock);
790 BUG_ON(tmp != vb); 856 BUG_ON(tmp != vb);
791 857
792 free_unmap_vmap_area_noflush(vb->va); 858 free_vmap_area_noflush(vb->va);
793 call_rcu(&vb->rcu_head, rcu_free_vb); 859 call_rcu(&vb->rcu_head, rcu_free_vb);
794} 860}
795 861
@@ -927,6 +993,8 @@ static void vb_free(const void *addr, unsigned long size)
927 rcu_read_unlock(); 993 rcu_read_unlock();
928 BUG_ON(!vb); 994 BUG_ON(!vb);
929 995
996 vunmap_page_range((unsigned long)addr, (unsigned long)addr + size);
997
930 spin_lock(&vb->lock); 998 spin_lock(&vb->lock);
931 BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order)); 999 BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order));
932 1000
@@ -979,7 +1047,6 @@ void vm_unmap_aliases(void)
979 1047
980 s = vb->va->va_start + (i << PAGE_SHIFT); 1048 s = vb->va->va_start + (i << PAGE_SHIFT);
981 e = vb->va->va_start + (j << PAGE_SHIFT); 1049 e = vb->va->va_start + (j << PAGE_SHIFT);
982 vunmap_page_range(s, e);
983 flush = 1; 1050 flush = 1;
984 1051
985 if (s < start) 1052 if (s < start)
@@ -1160,6 +1227,7 @@ void unmap_kernel_range_noflush(unsigned long addr, unsigned long size)
1160{ 1227{
1161 vunmap_page_range(addr, addr + size); 1228 vunmap_page_range(addr, addr + size);
1162} 1229}
1230EXPORT_SYMBOL_GPL(unmap_kernel_range_noflush);
1163 1231
1164/** 1232/**
1165 * unmap_kernel_range - unmap kernel VM area and flush cache and TLB 1233 * unmap_kernel_range - unmap kernel VM area and flush cache and TLB
@@ -1300,13 +1368,6 @@ struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
1300 -1, GFP_KERNEL, caller); 1368 -1, GFP_KERNEL, caller);
1301} 1369}
1302 1370
1303struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags,
1304 int node, gfp_t gfp_mask)
1305{
1306 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
1307 node, gfp_mask, __builtin_return_address(0));
1308}
1309
1310static struct vm_struct *find_vm_area(const void *addr) 1371static struct vm_struct *find_vm_area(const void *addr)
1311{ 1372{
1312 struct vmap_area *va; 1373 struct vmap_area *va;
@@ -1473,6 +1534,7 @@ static void *__vmalloc_node(unsigned long size, unsigned long align,
1473static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, 1534static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
1474 pgprot_t prot, int node, void *caller) 1535 pgprot_t prot, int node, void *caller)
1475{ 1536{
1537 const int order = 0;
1476 struct page **pages; 1538 struct page **pages;
1477 unsigned int nr_pages, array_size, i; 1539 unsigned int nr_pages, array_size, i;
1478 gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; 1540 gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
@@ -1499,11 +1561,12 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
1499 1561
1500 for (i = 0; i < area->nr_pages; i++) { 1562 for (i = 0; i < area->nr_pages; i++) {
1501 struct page *page; 1563 struct page *page;
1564 gfp_t tmp_mask = gfp_mask | __GFP_NOWARN;
1502 1565
1503 if (node < 0) 1566 if (node < 0)
1504 page = alloc_page(gfp_mask); 1567 page = alloc_page(tmp_mask);
1505 else 1568 else
1506 page = alloc_pages_node(node, gfp_mask, 0); 1569 page = alloc_pages_node(node, tmp_mask, order);
1507 1570
1508 if (unlikely(!page)) { 1571 if (unlikely(!page)) {
1509 /* Successfully allocated i pages, free them in __vunmap() */ 1572 /* Successfully allocated i pages, free them in __vunmap() */
@@ -1518,29 +1581,19 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
1518 return area->addr; 1581 return area->addr;
1519 1582
1520fail: 1583fail:
1584 warn_alloc_failed(gfp_mask, order, "vmalloc: allocation failure, "
1585 "allocated %ld of %ld bytes\n",
1586 (area->nr_pages*PAGE_SIZE), area->size);
1521 vfree(area->addr); 1587 vfree(area->addr);
1522 return NULL; 1588 return NULL;
1523} 1589}
1524 1590
1525void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot)
1526{
1527 void *addr = __vmalloc_area_node(area, gfp_mask, prot, -1,
1528 __builtin_return_address(0));
1529
1530 /*
1531 * A ref_count = 3 is needed because the vm_struct and vmap_area
1532 * structures allocated in the __get_vm_area_node() function contain
1533 * references to the virtual address of the vmalloc'ed block.
1534 */
1535 kmemleak_alloc(addr, area->size - PAGE_SIZE, 3, gfp_mask);
1536
1537 return addr;
1538}
1539
1540/** 1591/**
1541 * __vmalloc_node - allocate virtually contiguous memory 1592 * __vmalloc_node_range - allocate virtually contiguous memory
1542 * @size: allocation size 1593 * @size: allocation size
1543 * @align: desired alignment 1594 * @align: desired alignment
1595 * @start: vm area range start
1596 * @end: vm area range end
1544 * @gfp_mask: flags for the page level allocator 1597 * @gfp_mask: flags for the page level allocator
1545 * @prot: protection mask for the allocated pages 1598 * @prot: protection mask for the allocated pages
1546 * @node: node to use for allocation or -1 1599 * @node: node to use for allocation or -1
@@ -1550,9 +1603,9 @@ void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot)
1550 * allocator with @gfp_mask flags. Map them into contiguous 1603 * allocator with @gfp_mask flags. Map them into contiguous
1551 * kernel virtual space, using a pagetable protection of @prot. 1604 * kernel virtual space, using a pagetable protection of @prot.
1552 */ 1605 */
1553static void *__vmalloc_node(unsigned long size, unsigned long align, 1606void *__vmalloc_node_range(unsigned long size, unsigned long align,
1554 gfp_t gfp_mask, pgprot_t prot, 1607 unsigned long start, unsigned long end, gfp_t gfp_mask,
1555 int node, void *caller) 1608 pgprot_t prot, int node, void *caller)
1556{ 1609{
1557 struct vm_struct *area; 1610 struct vm_struct *area;
1558 void *addr; 1611 void *addr;
@@ -1562,8 +1615,8 @@ static void *__vmalloc_node(unsigned long size, unsigned long align,
1562 if (!size || (size >> PAGE_SHIFT) > totalram_pages) 1615 if (!size || (size >> PAGE_SHIFT) > totalram_pages)
1563 return NULL; 1616 return NULL;
1564 1617
1565 area = __get_vm_area_node(size, align, VM_ALLOC, VMALLOC_START, 1618 area = __get_vm_area_node(size, align, VM_ALLOC, start, end, node,
1566 VMALLOC_END, node, gfp_mask, caller); 1619 gfp_mask, caller);
1567 1620
1568 if (!area) 1621 if (!area)
1569 return NULL; 1622 return NULL;
@@ -1580,6 +1633,27 @@ static void *__vmalloc_node(unsigned long size, unsigned long align,
1580 return addr; 1633 return addr;
1581} 1634}
1582 1635
1636/**
1637 * __vmalloc_node - allocate virtually contiguous memory
1638 * @size: allocation size
1639 * @align: desired alignment
1640 * @gfp_mask: flags for the page level allocator
1641 * @prot: protection mask for the allocated pages
1642 * @node: node to use for allocation or -1
1643 * @caller: caller's return address
1644 *
1645 * Allocate enough pages to cover @size from the page level
1646 * allocator with @gfp_mask flags. Map them into contiguous
1647 * kernel virtual space, using a pagetable protection of @prot.
1648 */
1649static void *__vmalloc_node(unsigned long size, unsigned long align,
1650 gfp_t gfp_mask, pgprot_t prot,
1651 int node, void *caller)
1652{
1653 return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
1654 gfp_mask, prot, node, caller);
1655}
1656
1583void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) 1657void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
1584{ 1658{
1585 return __vmalloc_node(size, 1, gfp_mask, prot, -1, 1659 return __vmalloc_node(size, 1, gfp_mask, prot, -1,
@@ -1587,6 +1661,13 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
1587} 1661}
1588EXPORT_SYMBOL(__vmalloc); 1662EXPORT_SYMBOL(__vmalloc);
1589 1663
1664static inline void *__vmalloc_node_flags(unsigned long size,
1665 int node, gfp_t flags)
1666{
1667 return __vmalloc_node(size, 1, flags, PAGE_KERNEL,
1668 node, __builtin_return_address(0));
1669}
1670
1590/** 1671/**
1591 * vmalloc - allocate virtually contiguous memory 1672 * vmalloc - allocate virtually contiguous memory
1592 * @size: allocation size 1673 * @size: allocation size
@@ -1598,12 +1679,28 @@ EXPORT_SYMBOL(__vmalloc);
1598 */ 1679 */
1599void *vmalloc(unsigned long size) 1680void *vmalloc(unsigned long size)
1600{ 1681{
1601 return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL, 1682 return __vmalloc_node_flags(size, -1, GFP_KERNEL | __GFP_HIGHMEM);
1602 -1, __builtin_return_address(0));
1603} 1683}
1604EXPORT_SYMBOL(vmalloc); 1684EXPORT_SYMBOL(vmalloc);
1605 1685
1606/** 1686/**
1687 * vzalloc - allocate virtually contiguous memory with zero fill
1688 * @size: allocation size
1689 * Allocate enough pages to cover @size from the page level
1690 * allocator and map them into contiguous kernel virtual space.
1691 * The memory allocated is set to zero.
1692 *
1693 * For tight control over page level allocator and protection flags
1694 * use __vmalloc() instead.
1695 */
1696void *vzalloc(unsigned long size)
1697{
1698 return __vmalloc_node_flags(size, -1,
1699 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
1700}
1701EXPORT_SYMBOL(vzalloc);
1702
1703/**
1607 * vmalloc_user - allocate zeroed virtually contiguous memory for userspace 1704 * vmalloc_user - allocate zeroed virtually contiguous memory for userspace
1608 * @size: allocation size 1705 * @size: allocation size
1609 * 1706 *
@@ -1644,6 +1741,25 @@ void *vmalloc_node(unsigned long size, int node)
1644} 1741}
1645EXPORT_SYMBOL(vmalloc_node); 1742EXPORT_SYMBOL(vmalloc_node);
1646 1743
1744/**
1745 * vzalloc_node - allocate memory on a specific node with zero fill
1746 * @size: allocation size
1747 * @node: numa node
1748 *
1749 * Allocate enough pages to cover @size from the page level
1750 * allocator and map them into contiguous kernel virtual space.
1751 * The memory allocated is set to zero.
1752 *
1753 * For tight control over page level allocator and protection flags
1754 * use __vmalloc_node() instead.
1755 */
1756void *vzalloc_node(unsigned long size, int node)
1757{
1758 return __vmalloc_node_flags(size, node,
1759 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
1760}
1761EXPORT_SYMBOL(vzalloc_node);
1762
1647#ifndef PAGE_KERNEL_EXEC 1763#ifndef PAGE_KERNEL_EXEC
1648# define PAGE_KERNEL_EXEC PAGE_KERNEL 1764# define PAGE_KERNEL_EXEC PAGE_KERNEL
1649#endif 1765#endif
@@ -1892,8 +2008,6 @@ finished:
1892 * should know vmalloc() area is valid and can use memcpy(). 2008 * should know vmalloc() area is valid and can use memcpy().
1893 * This is for routines which have to access vmalloc area without 2009 * This is for routines which have to access vmalloc area without
1894 * any informaion, as /dev/kmem. 2010 * any informaion, as /dev/kmem.
1895 *
1896 * The caller should guarantee KM_USER1 is not used.
1897 */ 2011 */
1898 2012
1899long vwrite(char *buf, char *addr, unsigned long count) 2013long vwrite(char *buf, char *addr, unsigned long count)
@@ -2039,10 +2153,6 @@ struct vm_struct *alloc_vm_area(size_t size)
2039 return NULL; 2153 return NULL;
2040 } 2154 }
2041 2155
2042 /* Make sure the pagetables are constructed in process kernel
2043 mappings */
2044 vmalloc_sync_all();
2045
2046 return area; 2156 return area;
2047} 2157}
2048EXPORT_SYMBOL_GPL(alloc_vm_area); 2158EXPORT_SYMBOL_GPL(alloc_vm_area);
@@ -2056,6 +2166,7 @@ void free_vm_area(struct vm_struct *area)
2056} 2166}
2057EXPORT_SYMBOL_GPL(free_vm_area); 2167EXPORT_SYMBOL_GPL(free_vm_area);
2058 2168
2169#ifdef CONFIG_SMP
2059static struct vmap_area *node_to_va(struct rb_node *n) 2170static struct vmap_area *node_to_va(struct rb_node *n)
2060{ 2171{
2061 return n ? rb_entry(n, struct vmap_area, rb_node) : NULL; 2172 return n ? rb_entry(n, struct vmap_area, rb_node) : NULL;
@@ -2145,17 +2256,16 @@ static unsigned long pvm_determine_end(struct vmap_area **pnext,
2145 * @sizes: array containing size of each area 2256 * @sizes: array containing size of each area
2146 * @nr_vms: the number of areas to allocate 2257 * @nr_vms: the number of areas to allocate
2147 * @align: alignment, all entries in @offsets and @sizes must be aligned to this 2258 * @align: alignment, all entries in @offsets and @sizes must be aligned to this
2148 * @gfp_mask: allocation mask
2149 * 2259 *
2150 * Returns: kmalloc'd vm_struct pointer array pointing to allocated 2260 * Returns: kmalloc'd vm_struct pointer array pointing to allocated
2151 * vm_structs on success, %NULL on failure 2261 * vm_structs on success, %NULL on failure
2152 * 2262 *
2153 * Percpu allocator wants to use congruent vm areas so that it can 2263 * Percpu allocator wants to use congruent vm areas so that it can
2154 * maintain the offsets among percpu areas. This function allocates 2264 * maintain the offsets among percpu areas. This function allocates
2155 * congruent vmalloc areas for it. These areas tend to be scattered 2265 * congruent vmalloc areas for it with GFP_KERNEL. These areas tend to
2156 * pretty far, distance between two areas easily going up to 2266 * be scattered pretty far, distance between two areas easily going up
2157 * gigabytes. To avoid interacting with regular vmallocs, these areas 2267 * to gigabytes. To avoid interacting with regular vmallocs, these
2158 * are allocated from top. 2268 * areas are allocated from top.
2159 * 2269 *
2160 * Despite its complicated look, this allocator is rather simple. It 2270 * Despite its complicated look, this allocator is rather simple. It
2161 * does everything top-down and scans areas from the end looking for 2271 * does everything top-down and scans areas from the end looking for
@@ -2166,7 +2276,7 @@ static unsigned long pvm_determine_end(struct vmap_area **pnext,
2166 */ 2276 */
2167struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, 2277struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
2168 const size_t *sizes, int nr_vms, 2278 const size_t *sizes, int nr_vms,
2169 size_t align, gfp_t gfp_mask) 2279 size_t align)
2170{ 2280{
2171 const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align); 2281 const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
2172 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); 2282 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
@@ -2176,8 +2286,6 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
2176 unsigned long base, start, end, last_end; 2286 unsigned long base, start, end, last_end;
2177 bool purged = false; 2287 bool purged = false;
2178 2288
2179 gfp_mask &= GFP_RECLAIM_MASK;
2180
2181 /* verify parameters and allocate data structures */ 2289 /* verify parameters and allocate data structures */
2182 BUG_ON(align & ~PAGE_MASK || !is_power_of_2(align)); 2290 BUG_ON(align & ~PAGE_MASK || !is_power_of_2(align));
2183 for (last_area = 0, area = 0; area < nr_vms; area++) { 2291 for (last_area = 0, area = 0; area < nr_vms; area++) {
@@ -2210,14 +2318,14 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
2210 return NULL; 2318 return NULL;
2211 } 2319 }
2212 2320
2213 vms = kzalloc(sizeof(vms[0]) * nr_vms, gfp_mask); 2321 vms = kzalloc(sizeof(vms[0]) * nr_vms, GFP_KERNEL);
2214 vas = kzalloc(sizeof(vas[0]) * nr_vms, gfp_mask); 2322 vas = kzalloc(sizeof(vas[0]) * nr_vms, GFP_KERNEL);
2215 if (!vas || !vms) 2323 if (!vas || !vms)
2216 goto err_free; 2324 goto err_free;
2217 2325
2218 for (area = 0; area < nr_vms; area++) { 2326 for (area = 0; area < nr_vms; area++) {
2219 vas[area] = kzalloc(sizeof(struct vmap_area), gfp_mask); 2327 vas[area] = kzalloc(sizeof(struct vmap_area), GFP_KERNEL);
2220 vms[area] = kzalloc(sizeof(struct vm_struct), gfp_mask); 2328 vms[area] = kzalloc(sizeof(struct vm_struct), GFP_KERNEL);
2221 if (!vas[area] || !vms[area]) 2329 if (!vas[area] || !vms[area])
2222 goto err_free; 2330 goto err_free;
2223 } 2331 }
@@ -2336,9 +2444,11 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
2336 free_vm_area(vms[i]); 2444 free_vm_area(vms[i]);
2337 kfree(vms); 2445 kfree(vms);
2338} 2446}
2447#endif /* CONFIG_SMP */
2339 2448
2340#ifdef CONFIG_PROC_FS 2449#ifdef CONFIG_PROC_FS
2341static void *s_start(struct seq_file *m, loff_t *pos) 2450static void *s_start(struct seq_file *m, loff_t *pos)
2451 __acquires(&vmlist_lock)
2342{ 2452{
2343 loff_t n = *pos; 2453 loff_t n = *pos;
2344 struct vm_struct *v; 2454 struct vm_struct *v;
@@ -2365,6 +2475,7 @@ static void *s_next(struct seq_file *m, void *p, loff_t *pos)
2365} 2475}
2366 2476
2367static void s_stop(struct seq_file *m, void *p) 2477static void s_stop(struct seq_file *m, void *p)
2478 __releases(&vmlist_lock)
2368{ 2479{
2369 read_unlock(&vmlist_lock); 2480 read_unlock(&vmlist_lock);
2370} 2481}
@@ -2395,13 +2506,8 @@ static int s_show(struct seq_file *m, void *p)
2395 seq_printf(m, "0x%p-0x%p %7ld", 2506 seq_printf(m, "0x%p-0x%p %7ld",
2396 v->addr, v->addr + v->size, v->size); 2507 v->addr, v->addr + v->size, v->size);
2397 2508
2398 if (v->caller) { 2509 if (v->caller)
2399 char buff[KSYM_SYMBOL_LEN]; 2510 seq_printf(m, " %pS", v->caller);
2400
2401 seq_putc(m, ' ');
2402 sprint_symbol(buff, (unsigned long)v->caller);
2403 seq_puts(m, buff);
2404 }
2405 2511
2406 if (v->nr_pages) 2512 if (v->nr_pages)
2407 seq_printf(m, " pages=%d", v->nr_pages); 2513 seq_printf(m, " pages=%d", v->nr_pages);