aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmalloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmalloc.c')
-rw-r--r--mm/vmalloc.c561
1 files changed, 506 insertions, 55 deletions
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index f8189a4b3e13..69511e663234 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -25,7 +25,7 @@
25#include <linux/rcupdate.h> 25#include <linux/rcupdate.h>
26#include <linux/pfn.h> 26#include <linux/pfn.h>
27#include <linux/kmemleak.h> 27#include <linux/kmemleak.h>
28 28#include <linux/highmem.h>
29#include <asm/atomic.h> 29#include <asm/atomic.h>
30#include <asm/uaccess.h> 30#include <asm/uaccess.h>
31#include <asm/tlbflush.h> 31#include <asm/tlbflush.h>
@@ -168,11 +168,9 @@ static int vmap_page_range_noflush(unsigned long start, unsigned long end,
168 next = pgd_addr_end(addr, end); 168 next = pgd_addr_end(addr, end);
169 err = vmap_pud_range(pgd, addr, next, prot, pages, &nr); 169 err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
170 if (err) 170 if (err)
171 break; 171 return err;
172 } while (pgd++, addr = next, addr != end); 172 } while (pgd++, addr = next, addr != end);
173 173
174 if (unlikely(err))
175 return err;
176 return nr; 174 return nr;
177} 175}
178 176
@@ -186,7 +184,7 @@ static int vmap_page_range(unsigned long start, unsigned long end,
186 return ret; 184 return ret;
187} 185}
188 186
189static inline int is_vmalloc_or_module_addr(const void *x) 187int is_vmalloc_or_module_addr(const void *x)
190{ 188{
191 /* 189 /*
192 * ARM, x86-64 and sparc64 put modules in a special place, 190 * ARM, x86-64 and sparc64 put modules in a special place,
@@ -265,6 +263,7 @@ struct vmap_area {
265static DEFINE_SPINLOCK(vmap_area_lock); 263static DEFINE_SPINLOCK(vmap_area_lock);
266static struct rb_root vmap_area_root = RB_ROOT; 264static struct rb_root vmap_area_root = RB_ROOT;
267static LIST_HEAD(vmap_area_list); 265static LIST_HEAD(vmap_area_list);
266static unsigned long vmap_area_pcpu_hole;
268 267
269static struct vmap_area *__find_vmap_area(unsigned long addr) 268static struct vmap_area *__find_vmap_area(unsigned long addr)
270{ 269{
@@ -431,6 +430,15 @@ static void __free_vmap_area(struct vmap_area *va)
431 RB_CLEAR_NODE(&va->rb_node); 430 RB_CLEAR_NODE(&va->rb_node);
432 list_del_rcu(&va->list); 431 list_del_rcu(&va->list);
433 432
433 /*
434 * Track the highest possible candidate for pcpu area
435 * allocation. Areas outside of vmalloc area can be returned
436 * here too, consider only end addresses which fall inside
437 * vmalloc area proper.
438 */
439 if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END)
440 vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end);
441
434 call_rcu(&va->rcu_head, rcu_free_va); 442 call_rcu(&va->rcu_head, rcu_free_va);
435} 443}
436 444
@@ -1038,6 +1046,9 @@ void __init vmalloc_init(void)
1038 va->va_end = va->va_start + tmp->size; 1046 va->va_end = va->va_start + tmp->size;
1039 __insert_vmap_area(va); 1047 __insert_vmap_area(va);
1040 } 1048 }
1049
1050 vmap_area_pcpu_hole = VMALLOC_END;
1051
1041 vmap_initialized = true; 1052 vmap_initialized = true;
1042} 1053}
1043 1054
@@ -1122,13 +1133,34 @@ EXPORT_SYMBOL_GPL(map_vm_area);
1122DEFINE_RWLOCK(vmlist_lock); 1133DEFINE_RWLOCK(vmlist_lock);
1123struct vm_struct *vmlist; 1134struct vm_struct *vmlist;
1124 1135
1136static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
1137 unsigned long flags, void *caller)
1138{
1139 struct vm_struct *tmp, **p;
1140
1141 vm->flags = flags;
1142 vm->addr = (void *)va->va_start;
1143 vm->size = va->va_end - va->va_start;
1144 vm->caller = caller;
1145 va->private = vm;
1146 va->flags |= VM_VM_AREA;
1147
1148 write_lock(&vmlist_lock);
1149 for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
1150 if (tmp->addr >= vm->addr)
1151 break;
1152 }
1153 vm->next = *p;
1154 *p = vm;
1155 write_unlock(&vmlist_lock);
1156}
1157
1125static struct vm_struct *__get_vm_area_node(unsigned long size, 1158static struct vm_struct *__get_vm_area_node(unsigned long size,
1126 unsigned long flags, unsigned long start, unsigned long end, 1159 unsigned long flags, unsigned long start, unsigned long end,
1127 int node, gfp_t gfp_mask, void *caller) 1160 int node, gfp_t gfp_mask, void *caller)
1128{ 1161{
1129 static struct vmap_area *va; 1162 static struct vmap_area *va;
1130 struct vm_struct *area; 1163 struct vm_struct *area;
1131 struct vm_struct *tmp, **p;
1132 unsigned long align = 1; 1164 unsigned long align = 1;
1133 1165
1134 BUG_ON(in_interrupt()); 1166 BUG_ON(in_interrupt());
@@ -1147,7 +1179,7 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
1147 if (unlikely(!size)) 1179 if (unlikely(!size))
1148 return NULL; 1180 return NULL;
1149 1181
1150 area = kmalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node); 1182 area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
1151 if (unlikely(!area)) 1183 if (unlikely(!area))
1152 return NULL; 1184 return NULL;
1153 1185
@@ -1162,25 +1194,7 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
1162 return NULL; 1194 return NULL;
1163 } 1195 }
1164 1196
1165 area->flags = flags; 1197 insert_vmalloc_vm(area, va, flags, caller);
1166 area->addr = (void *)va->va_start;
1167 area->size = size;
1168 area->pages = NULL;
1169 area->nr_pages = 0;
1170 area->phys_addr = 0;
1171 area->caller = caller;
1172 va->private = area;
1173 va->flags |= VM_VM_AREA;
1174
1175 write_lock(&vmlist_lock);
1176 for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
1177 if (tmp->addr >= area->addr)
1178 break;
1179 }
1180 area->next = *p;
1181 *p = area;
1182 write_unlock(&vmlist_lock);
1183
1184 return area; 1198 return area;
1185} 1199}
1186 1200
@@ -1256,17 +1270,21 @@ struct vm_struct *remove_vm_area(const void *addr)
1256 if (va && va->flags & VM_VM_AREA) { 1270 if (va && va->flags & VM_VM_AREA) {
1257 struct vm_struct *vm = va->private; 1271 struct vm_struct *vm = va->private;
1258 struct vm_struct *tmp, **p; 1272 struct vm_struct *tmp, **p;
1259 1273 /*
1260 vmap_debug_free_range(va->va_start, va->va_end); 1274 * remove from list and disallow access to this vm_struct
1261 free_unmap_vmap_area(va); 1275 * before unmap. (address range confliction is maintained by
1262 vm->size -= PAGE_SIZE; 1276 * vmap.)
1263 1277 */
1264 write_lock(&vmlist_lock); 1278 write_lock(&vmlist_lock);
1265 for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next) 1279 for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next)
1266 ; 1280 ;
1267 *p = tmp->next; 1281 *p = tmp->next;
1268 write_unlock(&vmlist_lock); 1282 write_unlock(&vmlist_lock);
1269 1283
1284 vmap_debug_free_range(va->va_start, va->va_end);
1285 free_unmap_vmap_area(va);
1286 vm->size -= PAGE_SIZE;
1287
1270 return vm; 1288 return vm;
1271 } 1289 }
1272 return NULL; 1290 return NULL;
@@ -1368,7 +1386,7 @@ void *vmap(struct page **pages, unsigned int count,
1368 1386
1369 might_sleep(); 1387 might_sleep();
1370 1388
1371 if (count > num_physpages) 1389 if (count > totalram_pages)
1372 return NULL; 1390 return NULL;
1373 1391
1374 area = get_vm_area_caller((count << PAGE_SHIFT), flags, 1392 area = get_vm_area_caller((count << PAGE_SHIFT), flags,
@@ -1475,7 +1493,7 @@ static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
1475 unsigned long real_size = size; 1493 unsigned long real_size = size;
1476 1494
1477 size = PAGE_ALIGN(size); 1495 size = PAGE_ALIGN(size);
1478 if (!size || (size >> PAGE_SHIFT) > num_physpages) 1496 if (!size || (size >> PAGE_SHIFT) > totalram_pages)
1479 return NULL; 1497 return NULL;
1480 1498
1481 area = __get_vm_area_node(size, VM_ALLOC, VMALLOC_START, VMALLOC_END, 1499 area = __get_vm_area_node(size, VM_ALLOC, VMALLOC_START, VMALLOC_END,
@@ -1625,10 +1643,120 @@ void *vmalloc_32_user(unsigned long size)
1625} 1643}
1626EXPORT_SYMBOL(vmalloc_32_user); 1644EXPORT_SYMBOL(vmalloc_32_user);
1627 1645
1646/*
1647 * small helper routine , copy contents to buf from addr.
1648 * If the page is not present, fill zero.
1649 */
1650
1651static int aligned_vread(char *buf, char *addr, unsigned long count)
1652{
1653 struct page *p;
1654 int copied = 0;
1655
1656 while (count) {
1657 unsigned long offset, length;
1658
1659 offset = (unsigned long)addr & ~PAGE_MASK;
1660 length = PAGE_SIZE - offset;
1661 if (length > count)
1662 length = count;
1663 p = vmalloc_to_page(addr);
1664 /*
1665 * To do safe access to this _mapped_ area, we need
1666 * lock. But adding lock here means that we need to add
1667 * overhead of vmalloc()/vfree() calles for this _debug_
1668 * interface, rarely used. Instead of that, we'll use
1669 * kmap() and get small overhead in this access function.
1670 */
1671 if (p) {
1672 /*
1673 * we can expect USER0 is not used (see vread/vwrite's
1674 * function description)
1675 */
1676 void *map = kmap_atomic(p, KM_USER0);
1677 memcpy(buf, map + offset, length);
1678 kunmap_atomic(map, KM_USER0);
1679 } else
1680 memset(buf, 0, length);
1681
1682 addr += length;
1683 buf += length;
1684 copied += length;
1685 count -= length;
1686 }
1687 return copied;
1688}
1689
1690static int aligned_vwrite(char *buf, char *addr, unsigned long count)
1691{
1692 struct page *p;
1693 int copied = 0;
1694
1695 while (count) {
1696 unsigned long offset, length;
1697
1698 offset = (unsigned long)addr & ~PAGE_MASK;
1699 length = PAGE_SIZE - offset;
1700 if (length > count)
1701 length = count;
1702 p = vmalloc_to_page(addr);
1703 /*
1704 * To do safe access to this _mapped_ area, we need
1705 * lock. But adding lock here means that we need to add
1706 * overhead of vmalloc()/vfree() calles for this _debug_
1707 * interface, rarely used. Instead of that, we'll use
1708 * kmap() and get small overhead in this access function.
1709 */
1710 if (p) {
1711 /*
1712 * we can expect USER0 is not used (see vread/vwrite's
1713 * function description)
1714 */
1715 void *map = kmap_atomic(p, KM_USER0);
1716 memcpy(map + offset, buf, length);
1717 kunmap_atomic(map, KM_USER0);
1718 }
1719 addr += length;
1720 buf += length;
1721 copied += length;
1722 count -= length;
1723 }
1724 return copied;
1725}
1726
1727/**
1728 * vread() - read vmalloc area in a safe way.
1729 * @buf: buffer for reading data
1730 * @addr: vm address.
1731 * @count: number of bytes to be read.
1732 *
1733 * Returns # of bytes which addr and buf should be increased.
1734 * (same number to @count). Returns 0 if [addr...addr+count) doesn't
1735 * includes any intersect with alive vmalloc area.
1736 *
1737 * This function checks that addr is a valid vmalloc'ed area, and
1738 * copy data from that area to a given buffer. If the given memory range
1739 * of [addr...addr+count) includes some valid address, data is copied to
1740 * proper area of @buf. If there are memory holes, they'll be zero-filled.
1741 * IOREMAP area is treated as memory hole and no copy is done.
1742 *
1743 * If [addr...addr+count) doesn't includes any intersects with alive
1744 * vm_struct area, returns 0.
1745 * @buf should be kernel's buffer. Because this function uses KM_USER0,
1746 * the caller should guarantee KM_USER0 is not used.
1747 *
1748 * Note: In usual ops, vread() is never necessary because the caller
1749 * should know vmalloc() area is valid and can use memcpy().
1750 * This is for routines which have to access vmalloc area without
1751 * any informaion, as /dev/kmem.
1752 *
1753 */
1754
1628long vread(char *buf, char *addr, unsigned long count) 1755long vread(char *buf, char *addr, unsigned long count)
1629{ 1756{
1630 struct vm_struct *tmp; 1757 struct vm_struct *tmp;
1631 char *vaddr, *buf_start = buf; 1758 char *vaddr, *buf_start = buf;
1759 unsigned long buflen = count;
1632 unsigned long n; 1760 unsigned long n;
1633 1761
1634 /* Don't allow overflow */ 1762 /* Don't allow overflow */
@@ -1636,7 +1764,7 @@ long vread(char *buf, char *addr, unsigned long count)
1636 count = -(unsigned long) addr; 1764 count = -(unsigned long) addr;
1637 1765
1638 read_lock(&vmlist_lock); 1766 read_lock(&vmlist_lock);
1639 for (tmp = vmlist; tmp; tmp = tmp->next) { 1767 for (tmp = vmlist; count && tmp; tmp = tmp->next) {
1640 vaddr = (char *) tmp->addr; 1768 vaddr = (char *) tmp->addr;
1641 if (addr >= vaddr + tmp->size - PAGE_SIZE) 1769 if (addr >= vaddr + tmp->size - PAGE_SIZE)
1642 continue; 1770 continue;
@@ -1649,32 +1777,72 @@ long vread(char *buf, char *addr, unsigned long count)
1649 count--; 1777 count--;
1650 } 1778 }
1651 n = vaddr + tmp->size - PAGE_SIZE - addr; 1779 n = vaddr + tmp->size - PAGE_SIZE - addr;
1652 do { 1780 if (n > count)
1653 if (count == 0) 1781 n = count;
1654 goto finished; 1782 if (!(tmp->flags & VM_IOREMAP))
1655 *buf = *addr; 1783 aligned_vread(buf, addr, n);
1656 buf++; 1784 else /* IOREMAP area is treated as memory hole */
1657 addr++; 1785 memset(buf, 0, n);
1658 count--; 1786 buf += n;
1659 } while (--n > 0); 1787 addr += n;
1788 count -= n;
1660 } 1789 }
1661finished: 1790finished:
1662 read_unlock(&vmlist_lock); 1791 read_unlock(&vmlist_lock);
1663 return buf - buf_start; 1792
1793 if (buf == buf_start)
1794 return 0;
1795 /* zero-fill memory holes */
1796 if (buf != buf_start + buflen)
1797 memset(buf, 0, buflen - (buf - buf_start));
1798
1799 return buflen;
1664} 1800}
1665 1801
1802/**
1803 * vwrite() - write vmalloc area in a safe way.
1804 * @buf: buffer for source data
1805 * @addr: vm address.
1806 * @count: number of bytes to be read.
1807 *
1808 * Returns # of bytes which addr and buf should be incresed.
1809 * (same number to @count).
1810 * If [addr...addr+count) doesn't includes any intersect with valid
1811 * vmalloc area, returns 0.
1812 *
1813 * This function checks that addr is a valid vmalloc'ed area, and
1814 * copy data from a buffer to the given addr. If specified range of
1815 * [addr...addr+count) includes some valid address, data is copied from
1816 * proper area of @buf. If there are memory holes, no copy to hole.
1817 * IOREMAP area is treated as memory hole and no copy is done.
1818 *
1819 * If [addr...addr+count) doesn't includes any intersects with alive
1820 * vm_struct area, returns 0.
1821 * @buf should be kernel's buffer. Because this function uses KM_USER0,
1822 * the caller should guarantee KM_USER0 is not used.
1823 *
1824 * Note: In usual ops, vwrite() is never necessary because the caller
1825 * should know vmalloc() area is valid and can use memcpy().
1826 * This is for routines which have to access vmalloc area without
1827 * any informaion, as /dev/kmem.
1828 *
1829 * The caller should guarantee KM_USER1 is not used.
1830 */
1831
1666long vwrite(char *buf, char *addr, unsigned long count) 1832long vwrite(char *buf, char *addr, unsigned long count)
1667{ 1833{
1668 struct vm_struct *tmp; 1834 struct vm_struct *tmp;
1669 char *vaddr, *buf_start = buf; 1835 char *vaddr;
1670 unsigned long n; 1836 unsigned long n, buflen;
1837 int copied = 0;
1671 1838
1672 /* Don't allow overflow */ 1839 /* Don't allow overflow */
1673 if ((unsigned long) addr + count < count) 1840 if ((unsigned long) addr + count < count)
1674 count = -(unsigned long) addr; 1841 count = -(unsigned long) addr;
1842 buflen = count;
1675 1843
1676 read_lock(&vmlist_lock); 1844 read_lock(&vmlist_lock);
1677 for (tmp = vmlist; tmp; tmp = tmp->next) { 1845 for (tmp = vmlist; count && tmp; tmp = tmp->next) {
1678 vaddr = (char *) tmp->addr; 1846 vaddr = (char *) tmp->addr;
1679 if (addr >= vaddr + tmp->size - PAGE_SIZE) 1847 if (addr >= vaddr + tmp->size - PAGE_SIZE)
1680 continue; 1848 continue;
@@ -1686,18 +1854,21 @@ long vwrite(char *buf, char *addr, unsigned long count)
1686 count--; 1854 count--;
1687 } 1855 }
1688 n = vaddr + tmp->size - PAGE_SIZE - addr; 1856 n = vaddr + tmp->size - PAGE_SIZE - addr;
1689 do { 1857 if (n > count)
1690 if (count == 0) 1858 n = count;
1691 goto finished; 1859 if (!(tmp->flags & VM_IOREMAP)) {
1692 *addr = *buf; 1860 aligned_vwrite(buf, addr, n);
1693 buf++; 1861 copied++;
1694 addr++; 1862 }
1695 count--; 1863 buf += n;
1696 } while (--n > 0); 1864 addr += n;
1865 count -= n;
1697 } 1866 }
1698finished: 1867finished:
1699 read_unlock(&vmlist_lock); 1868 read_unlock(&vmlist_lock);
1700 return buf - buf_start; 1869 if (!copied)
1870 return 0;
1871 return buflen;
1701} 1872}
1702 1873
1703/** 1874/**
@@ -1818,6 +1989,286 @@ void free_vm_area(struct vm_struct *area)
1818} 1989}
1819EXPORT_SYMBOL_GPL(free_vm_area); 1990EXPORT_SYMBOL_GPL(free_vm_area);
1820 1991
1992static struct vmap_area *node_to_va(struct rb_node *n)
1993{
1994 return n ? rb_entry(n, struct vmap_area, rb_node) : NULL;
1995}
1996
1997/**
1998 * pvm_find_next_prev - find the next and prev vmap_area surrounding @end
1999 * @end: target address
2000 * @pnext: out arg for the next vmap_area
2001 * @pprev: out arg for the previous vmap_area
2002 *
2003 * Returns: %true if either or both of next and prev are found,
2004 * %false if no vmap_area exists
2005 *
2006 * Find vmap_areas end addresses of which enclose @end. ie. if not
2007 * NULL, *pnext->va_end > @end and *pprev->va_end <= @end.
2008 */
2009static bool pvm_find_next_prev(unsigned long end,
2010 struct vmap_area **pnext,
2011 struct vmap_area **pprev)
2012{
2013 struct rb_node *n = vmap_area_root.rb_node;
2014 struct vmap_area *va = NULL;
2015
2016 while (n) {
2017 va = rb_entry(n, struct vmap_area, rb_node);
2018 if (end < va->va_end)
2019 n = n->rb_left;
2020 else if (end > va->va_end)
2021 n = n->rb_right;
2022 else
2023 break;
2024 }
2025
2026 if (!va)
2027 return false;
2028
2029 if (va->va_end > end) {
2030 *pnext = va;
2031 *pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
2032 } else {
2033 *pprev = va;
2034 *pnext = node_to_va(rb_next(&(*pprev)->rb_node));
2035 }
2036 return true;
2037}
2038
2039/**
2040 * pvm_determine_end - find the highest aligned address between two vmap_areas
2041 * @pnext: in/out arg for the next vmap_area
2042 * @pprev: in/out arg for the previous vmap_area
2043 * @align: alignment
2044 *
2045 * Returns: determined end address
2046 *
2047 * Find the highest aligned address between *@pnext and *@pprev below
2048 * VMALLOC_END. *@pnext and *@pprev are adjusted so that the aligned
2049 * down address is between the end addresses of the two vmap_areas.
2050 *
2051 * Please note that the address returned by this function may fall
2052 * inside *@pnext vmap_area. The caller is responsible for checking
2053 * that.
2054 */
2055static unsigned long pvm_determine_end(struct vmap_area **pnext,
2056 struct vmap_area **pprev,
2057 unsigned long align)
2058{
2059 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
2060 unsigned long addr;
2061
2062 if (*pnext)
2063 addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end);
2064 else
2065 addr = vmalloc_end;
2066
2067 while (*pprev && (*pprev)->va_end > addr) {
2068 *pnext = *pprev;
2069 *pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
2070 }
2071
2072 return addr;
2073}
2074
2075/**
2076 * pcpu_get_vm_areas - allocate vmalloc areas for percpu allocator
2077 * @offsets: array containing offset of each area
2078 * @sizes: array containing size of each area
2079 * @nr_vms: the number of areas to allocate
2080 * @align: alignment, all entries in @offsets and @sizes must be aligned to this
2081 * @gfp_mask: allocation mask
2082 *
2083 * Returns: kmalloc'd vm_struct pointer array pointing to allocated
2084 * vm_structs on success, %NULL on failure
2085 *
2086 * Percpu allocator wants to use congruent vm areas so that it can
2087 * maintain the offsets among percpu areas. This function allocates
2088 * congruent vmalloc areas for it. These areas tend to be scattered
2089 * pretty far, distance between two areas easily going up to
2090 * gigabytes. To avoid interacting with regular vmallocs, these areas
2091 * are allocated from top.
2092 *
2093 * Despite its complicated look, this allocator is rather simple. It
2094 * does everything top-down and scans areas from the end looking for
2095 * matching slot. While scanning, if any of the areas overlaps with
2096 * existing vmap_area, the base address is pulled down to fit the
2097 * area. Scanning is repeated till all the areas fit and then all
2098 * necessary data structres are inserted and the result is returned.
2099 */
2100struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
2101 const size_t *sizes, int nr_vms,
2102 size_t align, gfp_t gfp_mask)
2103{
2104 const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
2105 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
2106 struct vmap_area **vas, *prev, *next;
2107 struct vm_struct **vms;
2108 int area, area2, last_area, term_area;
2109 unsigned long base, start, end, last_end;
2110 bool purged = false;
2111
2112 gfp_mask &= GFP_RECLAIM_MASK;
2113
2114 /* verify parameters and allocate data structures */
2115 BUG_ON(align & ~PAGE_MASK || !is_power_of_2(align));
2116 for (last_area = 0, area = 0; area < nr_vms; area++) {
2117 start = offsets[area];
2118 end = start + sizes[area];
2119
2120 /* is everything aligned properly? */
2121 BUG_ON(!IS_ALIGNED(offsets[area], align));
2122 BUG_ON(!IS_ALIGNED(sizes[area], align));
2123
2124 /* detect the area with the highest address */
2125 if (start > offsets[last_area])
2126 last_area = area;
2127
2128 for (area2 = 0; area2 < nr_vms; area2++) {
2129 unsigned long start2 = offsets[area2];
2130 unsigned long end2 = start2 + sizes[area2];
2131
2132 if (area2 == area)
2133 continue;
2134
2135 BUG_ON(start2 >= start && start2 < end);
2136 BUG_ON(end2 <= end && end2 > start);
2137 }
2138 }
2139 last_end = offsets[last_area] + sizes[last_area];
2140
2141 if (vmalloc_end - vmalloc_start < last_end) {
2142 WARN_ON(true);
2143 return NULL;
2144 }
2145
2146 vms = kzalloc(sizeof(vms[0]) * nr_vms, gfp_mask);
2147 vas = kzalloc(sizeof(vas[0]) * nr_vms, gfp_mask);
2148 if (!vas || !vms)
2149 goto err_free;
2150
2151 for (area = 0; area < nr_vms; area++) {
2152 vas[area] = kzalloc(sizeof(struct vmap_area), gfp_mask);
2153 vms[area] = kzalloc(sizeof(struct vm_struct), gfp_mask);
2154 if (!vas[area] || !vms[area])
2155 goto err_free;
2156 }
2157retry:
2158 spin_lock(&vmap_area_lock);
2159
2160 /* start scanning - we scan from the top, begin with the last area */
2161 area = term_area = last_area;
2162 start = offsets[area];
2163 end = start + sizes[area];
2164
2165 if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) {
2166 base = vmalloc_end - last_end;
2167 goto found;
2168 }
2169 base = pvm_determine_end(&next, &prev, align) - end;
2170
2171 while (true) {
2172 BUG_ON(next && next->va_end <= base + end);
2173 BUG_ON(prev && prev->va_end > base + end);
2174
2175 /*
2176 * base might have underflowed, add last_end before
2177 * comparing.
2178 */
2179 if (base + last_end < vmalloc_start + last_end) {
2180 spin_unlock(&vmap_area_lock);
2181 if (!purged) {
2182 purge_vmap_area_lazy();
2183 purged = true;
2184 goto retry;
2185 }
2186 goto err_free;
2187 }
2188
2189 /*
2190 * If next overlaps, move base downwards so that it's
2191 * right below next and then recheck.
2192 */
2193 if (next && next->va_start < base + end) {
2194 base = pvm_determine_end(&next, &prev, align) - end;
2195 term_area = area;
2196 continue;
2197 }
2198
2199 /*
2200 * If prev overlaps, shift down next and prev and move
2201 * base so that it's right below new next and then
2202 * recheck.
2203 */
2204 if (prev && prev->va_end > base + start) {
2205 next = prev;
2206 prev = node_to_va(rb_prev(&next->rb_node));
2207 base = pvm_determine_end(&next, &prev, align) - end;
2208 term_area = area;
2209 continue;
2210 }
2211
2212 /*
2213 * This area fits, move on to the previous one. If
2214 * the previous one is the terminal one, we're done.
2215 */
2216 area = (area + nr_vms - 1) % nr_vms;
2217 if (area == term_area)
2218 break;
2219 start = offsets[area];
2220 end = start + sizes[area];
2221 pvm_find_next_prev(base + end, &next, &prev);
2222 }
2223found:
2224 /* we've found a fitting base, insert all va's */
2225 for (area = 0; area < nr_vms; area++) {
2226 struct vmap_area *va = vas[area];
2227
2228 va->va_start = base + offsets[area];
2229 va->va_end = va->va_start + sizes[area];
2230 __insert_vmap_area(va);
2231 }
2232
2233 vmap_area_pcpu_hole = base + offsets[last_area];
2234
2235 spin_unlock(&vmap_area_lock);
2236
2237 /* insert all vm's */
2238 for (area = 0; area < nr_vms; area++)
2239 insert_vmalloc_vm(vms[area], vas[area], VM_ALLOC,
2240 pcpu_get_vm_areas);
2241
2242 kfree(vas);
2243 return vms;
2244
2245err_free:
2246 for (area = 0; area < nr_vms; area++) {
2247 if (vas)
2248 kfree(vas[area]);
2249 if (vms)
2250 kfree(vms[area]);
2251 }
2252 kfree(vas);
2253 kfree(vms);
2254 return NULL;
2255}
2256
2257/**
2258 * pcpu_free_vm_areas - free vmalloc areas for percpu allocator
2259 * @vms: vm_struct pointer array returned by pcpu_get_vm_areas()
2260 * @nr_vms: the number of allocated areas
2261 *
2262 * Free vm_structs and the array allocated by pcpu_get_vm_areas().
2263 */
2264void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
2265{
2266 int i;
2267
2268 for (i = 0; i < nr_vms; i++)
2269 free_vm_area(vms[i]);
2270 kfree(vms);
2271}
1821 2272
1822#ifdef CONFIG_PROC_FS 2273#ifdef CONFIG_PROC_FS
1823static void *s_start(struct seq_file *m, loff_t *pos) 2274static void *s_start(struct seq_file *m, loff_t *pos)