diff options
Diffstat (limited to 'mm/vmalloc.c')
| -rw-r--r-- | mm/vmalloc.c | 561 |
1 files changed, 506 insertions, 55 deletions
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index f8189a4b3e13..69511e663234 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
| @@ -25,7 +25,7 @@ | |||
| 25 | #include <linux/rcupdate.h> | 25 | #include <linux/rcupdate.h> |
| 26 | #include <linux/pfn.h> | 26 | #include <linux/pfn.h> |
| 27 | #include <linux/kmemleak.h> | 27 | #include <linux/kmemleak.h> |
| 28 | 28 | #include <linux/highmem.h> | |
| 29 | #include <asm/atomic.h> | 29 | #include <asm/atomic.h> |
| 30 | #include <asm/uaccess.h> | 30 | #include <asm/uaccess.h> |
| 31 | #include <asm/tlbflush.h> | 31 | #include <asm/tlbflush.h> |
| @@ -168,11 +168,9 @@ static int vmap_page_range_noflush(unsigned long start, unsigned long end, | |||
| 168 | next = pgd_addr_end(addr, end); | 168 | next = pgd_addr_end(addr, end); |
| 169 | err = vmap_pud_range(pgd, addr, next, prot, pages, &nr); | 169 | err = vmap_pud_range(pgd, addr, next, prot, pages, &nr); |
| 170 | if (err) | 170 | if (err) |
| 171 | break; | 171 | return err; |
| 172 | } while (pgd++, addr = next, addr != end); | 172 | } while (pgd++, addr = next, addr != end); |
| 173 | 173 | ||
| 174 | if (unlikely(err)) | ||
| 175 | return err; | ||
| 176 | return nr; | 174 | return nr; |
| 177 | } | 175 | } |
| 178 | 176 | ||
| @@ -186,7 +184,7 @@ static int vmap_page_range(unsigned long start, unsigned long end, | |||
| 186 | return ret; | 184 | return ret; |
| 187 | } | 185 | } |
| 188 | 186 | ||
| 189 | static inline int is_vmalloc_or_module_addr(const void *x) | 187 | int is_vmalloc_or_module_addr(const void *x) |
| 190 | { | 188 | { |
| 191 | /* | 189 | /* |
| 192 | * ARM, x86-64 and sparc64 put modules in a special place, | 190 | * ARM, x86-64 and sparc64 put modules in a special place, |
| @@ -265,6 +263,7 @@ struct vmap_area { | |||
| 265 | static DEFINE_SPINLOCK(vmap_area_lock); | 263 | static DEFINE_SPINLOCK(vmap_area_lock); |
| 266 | static struct rb_root vmap_area_root = RB_ROOT; | 264 | static struct rb_root vmap_area_root = RB_ROOT; |
| 267 | static LIST_HEAD(vmap_area_list); | 265 | static LIST_HEAD(vmap_area_list); |
| 266 | static unsigned long vmap_area_pcpu_hole; | ||
| 268 | 267 | ||
| 269 | static struct vmap_area *__find_vmap_area(unsigned long addr) | 268 | static struct vmap_area *__find_vmap_area(unsigned long addr) |
| 270 | { | 269 | { |
| @@ -431,6 +430,15 @@ static void __free_vmap_area(struct vmap_area *va) | |||
| 431 | RB_CLEAR_NODE(&va->rb_node); | 430 | RB_CLEAR_NODE(&va->rb_node); |
| 432 | list_del_rcu(&va->list); | 431 | list_del_rcu(&va->list); |
| 433 | 432 | ||
| 433 | /* | ||
| 434 | * Track the highest possible candidate for pcpu area | ||
| 435 | * allocation. Areas outside of vmalloc area can be returned | ||
| 436 | * here too, consider only end addresses which fall inside | ||
| 437 | * vmalloc area proper. | ||
| 438 | */ | ||
| 439 | if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END) | ||
| 440 | vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end); | ||
| 441 | |||
| 434 | call_rcu(&va->rcu_head, rcu_free_va); | 442 | call_rcu(&va->rcu_head, rcu_free_va); |
| 435 | } | 443 | } |
| 436 | 444 | ||
| @@ -1038,6 +1046,9 @@ void __init vmalloc_init(void) | |||
| 1038 | va->va_end = va->va_start + tmp->size; | 1046 | va->va_end = va->va_start + tmp->size; |
| 1039 | __insert_vmap_area(va); | 1047 | __insert_vmap_area(va); |
| 1040 | } | 1048 | } |
| 1049 | |||
| 1050 | vmap_area_pcpu_hole = VMALLOC_END; | ||
| 1051 | |||
| 1041 | vmap_initialized = true; | 1052 | vmap_initialized = true; |
| 1042 | } | 1053 | } |
| 1043 | 1054 | ||
| @@ -1122,13 +1133,34 @@ EXPORT_SYMBOL_GPL(map_vm_area); | |||
| 1122 | DEFINE_RWLOCK(vmlist_lock); | 1133 | DEFINE_RWLOCK(vmlist_lock); |
| 1123 | struct vm_struct *vmlist; | 1134 | struct vm_struct *vmlist; |
| 1124 | 1135 | ||
| 1136 | static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, | ||
| 1137 | unsigned long flags, void *caller) | ||
| 1138 | { | ||
| 1139 | struct vm_struct *tmp, **p; | ||
| 1140 | |||
| 1141 | vm->flags = flags; | ||
| 1142 | vm->addr = (void *)va->va_start; | ||
| 1143 | vm->size = va->va_end - va->va_start; | ||
| 1144 | vm->caller = caller; | ||
| 1145 | va->private = vm; | ||
| 1146 | va->flags |= VM_VM_AREA; | ||
| 1147 | |||
| 1148 | write_lock(&vmlist_lock); | ||
| 1149 | for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) { | ||
| 1150 | if (tmp->addr >= vm->addr) | ||
| 1151 | break; | ||
| 1152 | } | ||
| 1153 | vm->next = *p; | ||
| 1154 | *p = vm; | ||
| 1155 | write_unlock(&vmlist_lock); | ||
| 1156 | } | ||
| 1157 | |||
| 1125 | static struct vm_struct *__get_vm_area_node(unsigned long size, | 1158 | static struct vm_struct *__get_vm_area_node(unsigned long size, |
| 1126 | unsigned long flags, unsigned long start, unsigned long end, | 1159 | unsigned long flags, unsigned long start, unsigned long end, |
| 1127 | int node, gfp_t gfp_mask, void *caller) | 1160 | int node, gfp_t gfp_mask, void *caller) |
| 1128 | { | 1161 | { |
| 1129 | static struct vmap_area *va; | 1162 | static struct vmap_area *va; |
| 1130 | struct vm_struct *area; | 1163 | struct vm_struct *area; |
| 1131 | struct vm_struct *tmp, **p; | ||
| 1132 | unsigned long align = 1; | 1164 | unsigned long align = 1; |
| 1133 | 1165 | ||
| 1134 | BUG_ON(in_interrupt()); | 1166 | BUG_ON(in_interrupt()); |
| @@ -1147,7 +1179,7 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, | |||
| 1147 | if (unlikely(!size)) | 1179 | if (unlikely(!size)) |
| 1148 | return NULL; | 1180 | return NULL; |
| 1149 | 1181 | ||
| 1150 | area = kmalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node); | 1182 | area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node); |
| 1151 | if (unlikely(!area)) | 1183 | if (unlikely(!area)) |
| 1152 | return NULL; | 1184 | return NULL; |
| 1153 | 1185 | ||
| @@ -1162,25 +1194,7 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, | |||
| 1162 | return NULL; | 1194 | return NULL; |
| 1163 | } | 1195 | } |
| 1164 | 1196 | ||
| 1165 | area->flags = flags; | 1197 | insert_vmalloc_vm(area, va, flags, caller); |
| 1166 | area->addr = (void *)va->va_start; | ||
| 1167 | area->size = size; | ||
| 1168 | area->pages = NULL; | ||
| 1169 | area->nr_pages = 0; | ||
| 1170 | area->phys_addr = 0; | ||
| 1171 | area->caller = caller; | ||
| 1172 | va->private = area; | ||
| 1173 | va->flags |= VM_VM_AREA; | ||
| 1174 | |||
| 1175 | write_lock(&vmlist_lock); | ||
| 1176 | for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) { | ||
| 1177 | if (tmp->addr >= area->addr) | ||
| 1178 | break; | ||
| 1179 | } | ||
| 1180 | area->next = *p; | ||
| 1181 | *p = area; | ||
| 1182 | write_unlock(&vmlist_lock); | ||
| 1183 | |||
| 1184 | return area; | 1198 | return area; |
| 1185 | } | 1199 | } |
| 1186 | 1200 | ||
| @@ -1256,17 +1270,21 @@ struct vm_struct *remove_vm_area(const void *addr) | |||
| 1256 | if (va && va->flags & VM_VM_AREA) { | 1270 | if (va && va->flags & VM_VM_AREA) { |
| 1257 | struct vm_struct *vm = va->private; | 1271 | struct vm_struct *vm = va->private; |
| 1258 | struct vm_struct *tmp, **p; | 1272 | struct vm_struct *tmp, **p; |
| 1259 | 1273 | /* | |
| 1260 | vmap_debug_free_range(va->va_start, va->va_end); | 1274 | * remove from list and disallow access to this vm_struct |
| 1261 | free_unmap_vmap_area(va); | 1275 | * before unmap. (address range confliction is maintained by |
| 1262 | vm->size -= PAGE_SIZE; | 1276 | * vmap.) |
| 1263 | 1277 | */ | |
| 1264 | write_lock(&vmlist_lock); | 1278 | write_lock(&vmlist_lock); |
| 1265 | for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next) | 1279 | for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next) |
| 1266 | ; | 1280 | ; |
| 1267 | *p = tmp->next; | 1281 | *p = tmp->next; |
| 1268 | write_unlock(&vmlist_lock); | 1282 | write_unlock(&vmlist_lock); |
| 1269 | 1283 | ||
| 1284 | vmap_debug_free_range(va->va_start, va->va_end); | ||
| 1285 | free_unmap_vmap_area(va); | ||
| 1286 | vm->size -= PAGE_SIZE; | ||
| 1287 | |||
| 1270 | return vm; | 1288 | return vm; |
| 1271 | } | 1289 | } |
| 1272 | return NULL; | 1290 | return NULL; |
| @@ -1368,7 +1386,7 @@ void *vmap(struct page **pages, unsigned int count, | |||
| 1368 | 1386 | ||
| 1369 | might_sleep(); | 1387 | might_sleep(); |
| 1370 | 1388 | ||
| 1371 | if (count > num_physpages) | 1389 | if (count > totalram_pages) |
| 1372 | return NULL; | 1390 | return NULL; |
| 1373 | 1391 | ||
| 1374 | area = get_vm_area_caller((count << PAGE_SHIFT), flags, | 1392 | area = get_vm_area_caller((count << PAGE_SHIFT), flags, |
| @@ -1475,7 +1493,7 @@ static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot, | |||
| 1475 | unsigned long real_size = size; | 1493 | unsigned long real_size = size; |
| 1476 | 1494 | ||
| 1477 | size = PAGE_ALIGN(size); | 1495 | size = PAGE_ALIGN(size); |
| 1478 | if (!size || (size >> PAGE_SHIFT) > num_physpages) | 1496 | if (!size || (size >> PAGE_SHIFT) > totalram_pages) |
| 1479 | return NULL; | 1497 | return NULL; |
| 1480 | 1498 | ||
| 1481 | area = __get_vm_area_node(size, VM_ALLOC, VMALLOC_START, VMALLOC_END, | 1499 | area = __get_vm_area_node(size, VM_ALLOC, VMALLOC_START, VMALLOC_END, |
| @@ -1625,10 +1643,120 @@ void *vmalloc_32_user(unsigned long size) | |||
| 1625 | } | 1643 | } |
| 1626 | EXPORT_SYMBOL(vmalloc_32_user); | 1644 | EXPORT_SYMBOL(vmalloc_32_user); |
| 1627 | 1645 | ||
| 1646 | /* | ||
| 1647 | * small helper routine , copy contents to buf from addr. | ||
| 1648 | * If the page is not present, fill zero. | ||
| 1649 | */ | ||
| 1650 | |||
| 1651 | static int aligned_vread(char *buf, char *addr, unsigned long count) | ||
| 1652 | { | ||
| 1653 | struct page *p; | ||
| 1654 | int copied = 0; | ||
| 1655 | |||
| 1656 | while (count) { | ||
| 1657 | unsigned long offset, length; | ||
| 1658 | |||
| 1659 | offset = (unsigned long)addr & ~PAGE_MASK; | ||
| 1660 | length = PAGE_SIZE - offset; | ||
| 1661 | if (length > count) | ||
| 1662 | length = count; | ||
| 1663 | p = vmalloc_to_page(addr); | ||
| 1664 | /* | ||
| 1665 | * To do safe access to this _mapped_ area, we need | ||
| 1666 | * lock. But adding lock here means that we need to add | ||
| 1667 | * overhead of vmalloc()/vfree() calles for this _debug_ | ||
| 1668 | * interface, rarely used. Instead of that, we'll use | ||
| 1669 | * kmap() and get small overhead in this access function. | ||
| 1670 | */ | ||
| 1671 | if (p) { | ||
| 1672 | /* | ||
| 1673 | * we can expect USER0 is not used (see vread/vwrite's | ||
| 1674 | * function description) | ||
| 1675 | */ | ||
| 1676 | void *map = kmap_atomic(p, KM_USER0); | ||
| 1677 | memcpy(buf, map + offset, length); | ||
| 1678 | kunmap_atomic(map, KM_USER0); | ||
| 1679 | } else | ||
| 1680 | memset(buf, 0, length); | ||
| 1681 | |||
| 1682 | addr += length; | ||
| 1683 | buf += length; | ||
| 1684 | copied += length; | ||
| 1685 | count -= length; | ||
| 1686 | } | ||
| 1687 | return copied; | ||
| 1688 | } | ||
| 1689 | |||
| 1690 | static int aligned_vwrite(char *buf, char *addr, unsigned long count) | ||
| 1691 | { | ||
| 1692 | struct page *p; | ||
| 1693 | int copied = 0; | ||
| 1694 | |||
| 1695 | while (count) { | ||
| 1696 | unsigned long offset, length; | ||
| 1697 | |||
| 1698 | offset = (unsigned long)addr & ~PAGE_MASK; | ||
| 1699 | length = PAGE_SIZE - offset; | ||
| 1700 | if (length > count) | ||
| 1701 | length = count; | ||
| 1702 | p = vmalloc_to_page(addr); | ||
| 1703 | /* | ||
| 1704 | * To do safe access to this _mapped_ area, we need | ||
| 1705 | * lock. But adding lock here means that we need to add | ||
| 1706 | * overhead of vmalloc()/vfree() calles for this _debug_ | ||
| 1707 | * interface, rarely used. Instead of that, we'll use | ||
| 1708 | * kmap() and get small overhead in this access function. | ||
| 1709 | */ | ||
| 1710 | if (p) { | ||
| 1711 | /* | ||
| 1712 | * we can expect USER0 is not used (see vread/vwrite's | ||
| 1713 | * function description) | ||
| 1714 | */ | ||
| 1715 | void *map = kmap_atomic(p, KM_USER0); | ||
| 1716 | memcpy(map + offset, buf, length); | ||
| 1717 | kunmap_atomic(map, KM_USER0); | ||
| 1718 | } | ||
| 1719 | addr += length; | ||
| 1720 | buf += length; | ||
| 1721 | copied += length; | ||
| 1722 | count -= length; | ||
| 1723 | } | ||
| 1724 | return copied; | ||
| 1725 | } | ||
| 1726 | |||
| 1727 | /** | ||
| 1728 | * vread() - read vmalloc area in a safe way. | ||
| 1729 | * @buf: buffer for reading data | ||
| 1730 | * @addr: vm address. | ||
| 1731 | * @count: number of bytes to be read. | ||
| 1732 | * | ||
| 1733 | * Returns # of bytes which addr and buf should be increased. | ||
| 1734 | * (same number to @count). Returns 0 if [addr...addr+count) doesn't | ||
| 1735 | * includes any intersect with alive vmalloc area. | ||
| 1736 | * | ||
| 1737 | * This function checks that addr is a valid vmalloc'ed area, and | ||
| 1738 | * copy data from that area to a given buffer. If the given memory range | ||
| 1739 | * of [addr...addr+count) includes some valid address, data is copied to | ||
| 1740 | * proper area of @buf. If there are memory holes, they'll be zero-filled. | ||
| 1741 | * IOREMAP area is treated as memory hole and no copy is done. | ||
| 1742 | * | ||
| 1743 | * If [addr...addr+count) doesn't includes any intersects with alive | ||
| 1744 | * vm_struct area, returns 0. | ||
| 1745 | * @buf should be kernel's buffer. Because this function uses KM_USER0, | ||
| 1746 | * the caller should guarantee KM_USER0 is not used. | ||
| 1747 | * | ||
| 1748 | * Note: In usual ops, vread() is never necessary because the caller | ||
| 1749 | * should know vmalloc() area is valid and can use memcpy(). | ||
| 1750 | * This is for routines which have to access vmalloc area without | ||
| 1751 | * any informaion, as /dev/kmem. | ||
| 1752 | * | ||
| 1753 | */ | ||
| 1754 | |||
| 1628 | long vread(char *buf, char *addr, unsigned long count) | 1755 | long vread(char *buf, char *addr, unsigned long count) |
| 1629 | { | 1756 | { |
| 1630 | struct vm_struct *tmp; | 1757 | struct vm_struct *tmp; |
| 1631 | char *vaddr, *buf_start = buf; | 1758 | char *vaddr, *buf_start = buf; |
| 1759 | unsigned long buflen = count; | ||
| 1632 | unsigned long n; | 1760 | unsigned long n; |
| 1633 | 1761 | ||
| 1634 | /* Don't allow overflow */ | 1762 | /* Don't allow overflow */ |
| @@ -1636,7 +1764,7 @@ long vread(char *buf, char *addr, unsigned long count) | |||
| 1636 | count = -(unsigned long) addr; | 1764 | count = -(unsigned long) addr; |
| 1637 | 1765 | ||
| 1638 | read_lock(&vmlist_lock); | 1766 | read_lock(&vmlist_lock); |
| 1639 | for (tmp = vmlist; tmp; tmp = tmp->next) { | 1767 | for (tmp = vmlist; count && tmp; tmp = tmp->next) { |
| 1640 | vaddr = (char *) tmp->addr; | 1768 | vaddr = (char *) tmp->addr; |
| 1641 | if (addr >= vaddr + tmp->size - PAGE_SIZE) | 1769 | if (addr >= vaddr + tmp->size - PAGE_SIZE) |
| 1642 | continue; | 1770 | continue; |
| @@ -1649,32 +1777,72 @@ long vread(char *buf, char *addr, unsigned long count) | |||
| 1649 | count--; | 1777 | count--; |
| 1650 | } | 1778 | } |
| 1651 | n = vaddr + tmp->size - PAGE_SIZE - addr; | 1779 | n = vaddr + tmp->size - PAGE_SIZE - addr; |
| 1652 | do { | 1780 | if (n > count) |
| 1653 | if (count == 0) | 1781 | n = count; |
| 1654 | goto finished; | 1782 | if (!(tmp->flags & VM_IOREMAP)) |
| 1655 | *buf = *addr; | 1783 | aligned_vread(buf, addr, n); |
| 1656 | buf++; | 1784 | else /* IOREMAP area is treated as memory hole */ |
| 1657 | addr++; | 1785 | memset(buf, 0, n); |
| 1658 | count--; | 1786 | buf += n; |
| 1659 | } while (--n > 0); | 1787 | addr += n; |
| 1788 | count -= n; | ||
| 1660 | } | 1789 | } |
| 1661 | finished: | 1790 | finished: |
| 1662 | read_unlock(&vmlist_lock); | 1791 | read_unlock(&vmlist_lock); |
| 1663 | return buf - buf_start; | 1792 | |
| 1793 | if (buf == buf_start) | ||
| 1794 | return 0; | ||
| 1795 | /* zero-fill memory holes */ | ||
| 1796 | if (buf != buf_start + buflen) | ||
| 1797 | memset(buf, 0, buflen - (buf - buf_start)); | ||
| 1798 | |||
| 1799 | return buflen; | ||
| 1664 | } | 1800 | } |
| 1665 | 1801 | ||
| 1802 | /** | ||
| 1803 | * vwrite() - write vmalloc area in a safe way. | ||
| 1804 | * @buf: buffer for source data | ||
| 1805 | * @addr: vm address. | ||
| 1806 | * @count: number of bytes to be read. | ||
| 1807 | * | ||
| 1808 | * Returns # of bytes which addr and buf should be incresed. | ||
| 1809 | * (same number to @count). | ||
| 1810 | * If [addr...addr+count) doesn't includes any intersect with valid | ||
| 1811 | * vmalloc area, returns 0. | ||
| 1812 | * | ||
| 1813 | * This function checks that addr is a valid vmalloc'ed area, and | ||
| 1814 | * copy data from a buffer to the given addr. If specified range of | ||
| 1815 | * [addr...addr+count) includes some valid address, data is copied from | ||
| 1816 | * proper area of @buf. If there are memory holes, no copy to hole. | ||
| 1817 | * IOREMAP area is treated as memory hole and no copy is done. | ||
| 1818 | * | ||
| 1819 | * If [addr...addr+count) doesn't includes any intersects with alive | ||
| 1820 | * vm_struct area, returns 0. | ||
| 1821 | * @buf should be kernel's buffer. Because this function uses KM_USER0, | ||
| 1822 | * the caller should guarantee KM_USER0 is not used. | ||
| 1823 | * | ||
| 1824 | * Note: In usual ops, vwrite() is never necessary because the caller | ||
| 1825 | * should know vmalloc() area is valid and can use memcpy(). | ||
| 1826 | * This is for routines which have to access vmalloc area without | ||
| 1827 | * any informaion, as /dev/kmem. | ||
| 1828 | * | ||
| 1829 | * The caller should guarantee KM_USER1 is not used. | ||
| 1830 | */ | ||
| 1831 | |||
| 1666 | long vwrite(char *buf, char *addr, unsigned long count) | 1832 | long vwrite(char *buf, char *addr, unsigned long count) |
| 1667 | { | 1833 | { |
| 1668 | struct vm_struct *tmp; | 1834 | struct vm_struct *tmp; |
| 1669 | char *vaddr, *buf_start = buf; | 1835 | char *vaddr; |
| 1670 | unsigned long n; | 1836 | unsigned long n, buflen; |
| 1837 | int copied = 0; | ||
| 1671 | 1838 | ||
| 1672 | /* Don't allow overflow */ | 1839 | /* Don't allow overflow */ |
| 1673 | if ((unsigned long) addr + count < count) | 1840 | if ((unsigned long) addr + count < count) |
| 1674 | count = -(unsigned long) addr; | 1841 | count = -(unsigned long) addr; |
| 1842 | buflen = count; | ||
| 1675 | 1843 | ||
| 1676 | read_lock(&vmlist_lock); | 1844 | read_lock(&vmlist_lock); |
| 1677 | for (tmp = vmlist; tmp; tmp = tmp->next) { | 1845 | for (tmp = vmlist; count && tmp; tmp = tmp->next) { |
| 1678 | vaddr = (char *) tmp->addr; | 1846 | vaddr = (char *) tmp->addr; |
| 1679 | if (addr >= vaddr + tmp->size - PAGE_SIZE) | 1847 | if (addr >= vaddr + tmp->size - PAGE_SIZE) |
| 1680 | continue; | 1848 | continue; |
| @@ -1686,18 +1854,21 @@ long vwrite(char *buf, char *addr, unsigned long count) | |||
| 1686 | count--; | 1854 | count--; |
| 1687 | } | 1855 | } |
| 1688 | n = vaddr + tmp->size - PAGE_SIZE - addr; | 1856 | n = vaddr + tmp->size - PAGE_SIZE - addr; |
| 1689 | do { | 1857 | if (n > count) |
| 1690 | if (count == 0) | 1858 | n = count; |
| 1691 | goto finished; | 1859 | if (!(tmp->flags & VM_IOREMAP)) { |
| 1692 | *addr = *buf; | 1860 | aligned_vwrite(buf, addr, n); |
| 1693 | buf++; | 1861 | copied++; |
| 1694 | addr++; | 1862 | } |
| 1695 | count--; | 1863 | buf += n; |
| 1696 | } while (--n > 0); | 1864 | addr += n; |
| 1865 | count -= n; | ||
| 1697 | } | 1866 | } |
| 1698 | finished: | 1867 | finished: |
| 1699 | read_unlock(&vmlist_lock); | 1868 | read_unlock(&vmlist_lock); |
| 1700 | return buf - buf_start; | 1869 | if (!copied) |
| 1870 | return 0; | ||
| 1871 | return buflen; | ||
| 1701 | } | 1872 | } |
| 1702 | 1873 | ||
| 1703 | /** | 1874 | /** |
| @@ -1818,6 +1989,286 @@ void free_vm_area(struct vm_struct *area) | |||
| 1818 | } | 1989 | } |
| 1819 | EXPORT_SYMBOL_GPL(free_vm_area); | 1990 | EXPORT_SYMBOL_GPL(free_vm_area); |
| 1820 | 1991 | ||
| 1992 | static struct vmap_area *node_to_va(struct rb_node *n) | ||
| 1993 | { | ||
| 1994 | return n ? rb_entry(n, struct vmap_area, rb_node) : NULL; | ||
| 1995 | } | ||
| 1996 | |||
| 1997 | /** | ||
| 1998 | * pvm_find_next_prev - find the next and prev vmap_area surrounding @end | ||
| 1999 | * @end: target address | ||
| 2000 | * @pnext: out arg for the next vmap_area | ||
| 2001 | * @pprev: out arg for the previous vmap_area | ||
| 2002 | * | ||
| 2003 | * Returns: %true if either or both of next and prev are found, | ||
| 2004 | * %false if no vmap_area exists | ||
| 2005 | * | ||
| 2006 | * Find vmap_areas end addresses of which enclose @end. ie. if not | ||
| 2007 | * NULL, *pnext->va_end > @end and *pprev->va_end <= @end. | ||
| 2008 | */ | ||
| 2009 | static bool pvm_find_next_prev(unsigned long end, | ||
| 2010 | struct vmap_area **pnext, | ||
| 2011 | struct vmap_area **pprev) | ||
| 2012 | { | ||
| 2013 | struct rb_node *n = vmap_area_root.rb_node; | ||
| 2014 | struct vmap_area *va = NULL; | ||
| 2015 | |||
| 2016 | while (n) { | ||
| 2017 | va = rb_entry(n, struct vmap_area, rb_node); | ||
| 2018 | if (end < va->va_end) | ||
| 2019 | n = n->rb_left; | ||
| 2020 | else if (end > va->va_end) | ||
| 2021 | n = n->rb_right; | ||
| 2022 | else | ||
| 2023 | break; | ||
| 2024 | } | ||
| 2025 | |||
| 2026 | if (!va) | ||
| 2027 | return false; | ||
| 2028 | |||
| 2029 | if (va->va_end > end) { | ||
| 2030 | *pnext = va; | ||
| 2031 | *pprev = node_to_va(rb_prev(&(*pnext)->rb_node)); | ||
| 2032 | } else { | ||
| 2033 | *pprev = va; | ||
| 2034 | *pnext = node_to_va(rb_next(&(*pprev)->rb_node)); | ||
| 2035 | } | ||
| 2036 | return true; | ||
| 2037 | } | ||
| 2038 | |||
| 2039 | /** | ||
| 2040 | * pvm_determine_end - find the highest aligned address between two vmap_areas | ||
| 2041 | * @pnext: in/out arg for the next vmap_area | ||
| 2042 | * @pprev: in/out arg for the previous vmap_area | ||
| 2043 | * @align: alignment | ||
| 2044 | * | ||
| 2045 | * Returns: determined end address | ||
| 2046 | * | ||
| 2047 | * Find the highest aligned address between *@pnext and *@pprev below | ||
| 2048 | * VMALLOC_END. *@pnext and *@pprev are adjusted so that the aligned | ||
| 2049 | * down address is between the end addresses of the two vmap_areas. | ||
| 2050 | * | ||
| 2051 | * Please note that the address returned by this function may fall | ||
| 2052 | * inside *@pnext vmap_area. The caller is responsible for checking | ||
| 2053 | * that. | ||
| 2054 | */ | ||
| 2055 | static unsigned long pvm_determine_end(struct vmap_area **pnext, | ||
| 2056 | struct vmap_area **pprev, | ||
| 2057 | unsigned long align) | ||
| 2058 | { | ||
| 2059 | const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); | ||
| 2060 | unsigned long addr; | ||
| 2061 | |||
| 2062 | if (*pnext) | ||
| 2063 | addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end); | ||
| 2064 | else | ||
| 2065 | addr = vmalloc_end; | ||
| 2066 | |||
| 2067 | while (*pprev && (*pprev)->va_end > addr) { | ||
| 2068 | *pnext = *pprev; | ||
| 2069 | *pprev = node_to_va(rb_prev(&(*pnext)->rb_node)); | ||
| 2070 | } | ||
| 2071 | |||
| 2072 | return addr; | ||
| 2073 | } | ||
| 2074 | |||
| 2075 | /** | ||
| 2076 | * pcpu_get_vm_areas - allocate vmalloc areas for percpu allocator | ||
| 2077 | * @offsets: array containing offset of each area | ||
| 2078 | * @sizes: array containing size of each area | ||
| 2079 | * @nr_vms: the number of areas to allocate | ||
| 2080 | * @align: alignment, all entries in @offsets and @sizes must be aligned to this | ||
| 2081 | * @gfp_mask: allocation mask | ||
| 2082 | * | ||
| 2083 | * Returns: kmalloc'd vm_struct pointer array pointing to allocated | ||
| 2084 | * vm_structs on success, %NULL on failure | ||
| 2085 | * | ||
| 2086 | * Percpu allocator wants to use congruent vm areas so that it can | ||
| 2087 | * maintain the offsets among percpu areas. This function allocates | ||
| 2088 | * congruent vmalloc areas for it. These areas tend to be scattered | ||
| 2089 | * pretty far, distance between two areas easily going up to | ||
| 2090 | * gigabytes. To avoid interacting with regular vmallocs, these areas | ||
| 2091 | * are allocated from top. | ||
| 2092 | * | ||
| 2093 | * Despite its complicated look, this allocator is rather simple. It | ||
| 2094 | * does everything top-down and scans areas from the end looking for | ||
| 2095 | * matching slot. While scanning, if any of the areas overlaps with | ||
| 2096 | * existing vmap_area, the base address is pulled down to fit the | ||
| 2097 | * area. Scanning is repeated till all the areas fit and then all | ||
| 2098 | * necessary data structres are inserted and the result is returned. | ||
| 2099 | */ | ||
| 2100 | struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, | ||
| 2101 | const size_t *sizes, int nr_vms, | ||
| 2102 | size_t align, gfp_t gfp_mask) | ||
| 2103 | { | ||
| 2104 | const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align); | ||
| 2105 | const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); | ||
| 2106 | struct vmap_area **vas, *prev, *next; | ||
| 2107 | struct vm_struct **vms; | ||
| 2108 | int area, area2, last_area, term_area; | ||
| 2109 | unsigned long base, start, end, last_end; | ||
| 2110 | bool purged = false; | ||
| 2111 | |||
| 2112 | gfp_mask &= GFP_RECLAIM_MASK; | ||
| 2113 | |||
| 2114 | /* verify parameters and allocate data structures */ | ||
| 2115 | BUG_ON(align & ~PAGE_MASK || !is_power_of_2(align)); | ||
| 2116 | for (last_area = 0, area = 0; area < nr_vms; area++) { | ||
| 2117 | start = offsets[area]; | ||
| 2118 | end = start + sizes[area]; | ||
| 2119 | |||
| 2120 | /* is everything aligned properly? */ | ||
| 2121 | BUG_ON(!IS_ALIGNED(offsets[area], align)); | ||
| 2122 | BUG_ON(!IS_ALIGNED(sizes[area], align)); | ||
| 2123 | |||
| 2124 | /* detect the area with the highest address */ | ||
| 2125 | if (start > offsets[last_area]) | ||
| 2126 | last_area = area; | ||
| 2127 | |||
| 2128 | for (area2 = 0; area2 < nr_vms; area2++) { | ||
| 2129 | unsigned long start2 = offsets[area2]; | ||
| 2130 | unsigned long end2 = start2 + sizes[area2]; | ||
| 2131 | |||
| 2132 | if (area2 == area) | ||
| 2133 | continue; | ||
| 2134 | |||
| 2135 | BUG_ON(start2 >= start && start2 < end); | ||
| 2136 | BUG_ON(end2 <= end && end2 > start); | ||
| 2137 | } | ||
| 2138 | } | ||
| 2139 | last_end = offsets[last_area] + sizes[last_area]; | ||
| 2140 | |||
| 2141 | if (vmalloc_end - vmalloc_start < last_end) { | ||
| 2142 | WARN_ON(true); | ||
| 2143 | return NULL; | ||
| 2144 | } | ||
| 2145 | |||
| 2146 | vms = kzalloc(sizeof(vms[0]) * nr_vms, gfp_mask); | ||
| 2147 | vas = kzalloc(sizeof(vas[0]) * nr_vms, gfp_mask); | ||
| 2148 | if (!vas || !vms) | ||
| 2149 | goto err_free; | ||
| 2150 | |||
| 2151 | for (area = 0; area < nr_vms; area++) { | ||
| 2152 | vas[area] = kzalloc(sizeof(struct vmap_area), gfp_mask); | ||
| 2153 | vms[area] = kzalloc(sizeof(struct vm_struct), gfp_mask); | ||
| 2154 | if (!vas[area] || !vms[area]) | ||
| 2155 | goto err_free; | ||
| 2156 | } | ||
| 2157 | retry: | ||
| 2158 | spin_lock(&vmap_area_lock); | ||
| 2159 | |||
| 2160 | /* start scanning - we scan from the top, begin with the last area */ | ||
| 2161 | area = term_area = last_area; | ||
| 2162 | start = offsets[area]; | ||
| 2163 | end = start + sizes[area]; | ||
| 2164 | |||
| 2165 | if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) { | ||
| 2166 | base = vmalloc_end - last_end; | ||
| 2167 | goto found; | ||
| 2168 | } | ||
| 2169 | base = pvm_determine_end(&next, &prev, align) - end; | ||
| 2170 | |||
| 2171 | while (true) { | ||
| 2172 | BUG_ON(next && next->va_end <= base + end); | ||
| 2173 | BUG_ON(prev && prev->va_end > base + end); | ||
| 2174 | |||
| 2175 | /* | ||
| 2176 | * base might have underflowed, add last_end before | ||
| 2177 | * comparing. | ||
| 2178 | */ | ||
| 2179 | if (base + last_end < vmalloc_start + last_end) { | ||
| 2180 | spin_unlock(&vmap_area_lock); | ||
| 2181 | if (!purged) { | ||
| 2182 | purge_vmap_area_lazy(); | ||
| 2183 | purged = true; | ||
| 2184 | goto retry; | ||
| 2185 | } | ||
| 2186 | goto err_free; | ||
| 2187 | } | ||
| 2188 | |||
| 2189 | /* | ||
| 2190 | * If next overlaps, move base downwards so that it's | ||
| 2191 | * right below next and then recheck. | ||
| 2192 | */ | ||
| 2193 | if (next && next->va_start < base + end) { | ||
| 2194 | base = pvm_determine_end(&next, &prev, align) - end; | ||
| 2195 | term_area = area; | ||
| 2196 | continue; | ||
| 2197 | } | ||
| 2198 | |||
| 2199 | /* | ||
| 2200 | * If prev overlaps, shift down next and prev and move | ||
| 2201 | * base so that it's right below new next and then | ||
| 2202 | * recheck. | ||
| 2203 | */ | ||
| 2204 | if (prev && prev->va_end > base + start) { | ||
| 2205 | next = prev; | ||
| 2206 | prev = node_to_va(rb_prev(&next->rb_node)); | ||
| 2207 | base = pvm_determine_end(&next, &prev, align) - end; | ||
| 2208 | term_area = area; | ||
| 2209 | continue; | ||
| 2210 | } | ||
| 2211 | |||
| 2212 | /* | ||
| 2213 | * This area fits, move on to the previous one. If | ||
| 2214 | * the previous one is the terminal one, we're done. | ||
| 2215 | */ | ||
| 2216 | area = (area + nr_vms - 1) % nr_vms; | ||
| 2217 | if (area == term_area) | ||
| 2218 | break; | ||
| 2219 | start = offsets[area]; | ||
| 2220 | end = start + sizes[area]; | ||
| 2221 | pvm_find_next_prev(base + end, &next, &prev); | ||
| 2222 | } | ||
| 2223 | found: | ||
| 2224 | /* we've found a fitting base, insert all va's */ | ||
| 2225 | for (area = 0; area < nr_vms; area++) { | ||
| 2226 | struct vmap_area *va = vas[area]; | ||
| 2227 | |||
| 2228 | va->va_start = base + offsets[area]; | ||
| 2229 | va->va_end = va->va_start + sizes[area]; | ||
| 2230 | __insert_vmap_area(va); | ||
| 2231 | } | ||
| 2232 | |||
| 2233 | vmap_area_pcpu_hole = base + offsets[last_area]; | ||
| 2234 | |||
| 2235 | spin_unlock(&vmap_area_lock); | ||
| 2236 | |||
| 2237 | /* insert all vm's */ | ||
| 2238 | for (area = 0; area < nr_vms; area++) | ||
| 2239 | insert_vmalloc_vm(vms[area], vas[area], VM_ALLOC, | ||
| 2240 | pcpu_get_vm_areas); | ||
| 2241 | |||
| 2242 | kfree(vas); | ||
| 2243 | return vms; | ||
| 2244 | |||
| 2245 | err_free: | ||
| 2246 | for (area = 0; area < nr_vms; area++) { | ||
| 2247 | if (vas) | ||
| 2248 | kfree(vas[area]); | ||
| 2249 | if (vms) | ||
| 2250 | kfree(vms[area]); | ||
| 2251 | } | ||
| 2252 | kfree(vas); | ||
| 2253 | kfree(vms); | ||
| 2254 | return NULL; | ||
| 2255 | } | ||
| 2256 | |||
| 2257 | /** | ||
| 2258 | * pcpu_free_vm_areas - free vmalloc areas for percpu allocator | ||
| 2259 | * @vms: vm_struct pointer array returned by pcpu_get_vm_areas() | ||
| 2260 | * @nr_vms: the number of allocated areas | ||
| 2261 | * | ||
| 2262 | * Free vm_structs and the array allocated by pcpu_get_vm_areas(). | ||
| 2263 | */ | ||
| 2264 | void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) | ||
| 2265 | { | ||
| 2266 | int i; | ||
| 2267 | |||
| 2268 | for (i = 0; i < nr_vms; i++) | ||
| 2269 | free_vm_area(vms[i]); | ||
| 2270 | kfree(vms); | ||
| 2271 | } | ||
| 1821 | 2272 | ||
| 1822 | #ifdef CONFIG_PROC_FS | 2273 | #ifdef CONFIG_PROC_FS |
| 1823 | static void *s_start(struct seq_file *m, loff_t *pos) | 2274 | static void *s_start(struct seq_file *m, loff_t *pos) |
