diff options
Diffstat (limited to 'mm/vmalloc.c')
-rw-r--r-- | mm/vmalloc.c | 338 |
1 files changed, 317 insertions, 21 deletions
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index f8189a4b3e13..204b8243d8ab 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
@@ -265,6 +265,7 @@ struct vmap_area { | |||
265 | static DEFINE_SPINLOCK(vmap_area_lock); | 265 | static DEFINE_SPINLOCK(vmap_area_lock); |
266 | static struct rb_root vmap_area_root = RB_ROOT; | 266 | static struct rb_root vmap_area_root = RB_ROOT; |
267 | static LIST_HEAD(vmap_area_list); | 267 | static LIST_HEAD(vmap_area_list); |
268 | static unsigned long vmap_area_pcpu_hole; | ||
268 | 269 | ||
269 | static struct vmap_area *__find_vmap_area(unsigned long addr) | 270 | static struct vmap_area *__find_vmap_area(unsigned long addr) |
270 | { | 271 | { |
@@ -431,6 +432,15 @@ static void __free_vmap_area(struct vmap_area *va) | |||
431 | RB_CLEAR_NODE(&va->rb_node); | 432 | RB_CLEAR_NODE(&va->rb_node); |
432 | list_del_rcu(&va->list); | 433 | list_del_rcu(&va->list); |
433 | 434 | ||
435 | /* | ||
436 | * Track the highest possible candidate for pcpu area | ||
437 | * allocation. Areas outside of vmalloc area can be returned | ||
438 | * here too, consider only end addresses which fall inside | ||
439 | * vmalloc area proper. | ||
440 | */ | ||
441 | if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END) | ||
442 | vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end); | ||
443 | |||
434 | call_rcu(&va->rcu_head, rcu_free_va); | 444 | call_rcu(&va->rcu_head, rcu_free_va); |
435 | } | 445 | } |
436 | 446 | ||
@@ -1038,6 +1048,9 @@ void __init vmalloc_init(void) | |||
1038 | va->va_end = va->va_start + tmp->size; | 1048 | va->va_end = va->va_start + tmp->size; |
1039 | __insert_vmap_area(va); | 1049 | __insert_vmap_area(va); |
1040 | } | 1050 | } |
1051 | |||
1052 | vmap_area_pcpu_hole = VMALLOC_END; | ||
1053 | |||
1041 | vmap_initialized = true; | 1054 | vmap_initialized = true; |
1042 | } | 1055 | } |
1043 | 1056 | ||
@@ -1122,13 +1135,34 @@ EXPORT_SYMBOL_GPL(map_vm_area); | |||
1122 | DEFINE_RWLOCK(vmlist_lock); | 1135 | DEFINE_RWLOCK(vmlist_lock); |
1123 | struct vm_struct *vmlist; | 1136 | struct vm_struct *vmlist; |
1124 | 1137 | ||
1138 | static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, | ||
1139 | unsigned long flags, void *caller) | ||
1140 | { | ||
1141 | struct vm_struct *tmp, **p; | ||
1142 | |||
1143 | vm->flags = flags; | ||
1144 | vm->addr = (void *)va->va_start; | ||
1145 | vm->size = va->va_end - va->va_start; | ||
1146 | vm->caller = caller; | ||
1147 | va->private = vm; | ||
1148 | va->flags |= VM_VM_AREA; | ||
1149 | |||
1150 | write_lock(&vmlist_lock); | ||
1151 | for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) { | ||
1152 | if (tmp->addr >= vm->addr) | ||
1153 | break; | ||
1154 | } | ||
1155 | vm->next = *p; | ||
1156 | *p = vm; | ||
1157 | write_unlock(&vmlist_lock); | ||
1158 | } | ||
1159 | |||
1125 | static struct vm_struct *__get_vm_area_node(unsigned long size, | 1160 | static struct vm_struct *__get_vm_area_node(unsigned long size, |
1126 | unsigned long flags, unsigned long start, unsigned long end, | 1161 | unsigned long flags, unsigned long start, unsigned long end, |
1127 | int node, gfp_t gfp_mask, void *caller) | 1162 | int node, gfp_t gfp_mask, void *caller) |
1128 | { | 1163 | { |
1129 | static struct vmap_area *va; | 1164 | static struct vmap_area *va; |
1130 | struct vm_struct *area; | 1165 | struct vm_struct *area; |
1131 | struct vm_struct *tmp, **p; | ||
1132 | unsigned long align = 1; | 1166 | unsigned long align = 1; |
1133 | 1167 | ||
1134 | BUG_ON(in_interrupt()); | 1168 | BUG_ON(in_interrupt()); |
@@ -1147,7 +1181,7 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, | |||
1147 | if (unlikely(!size)) | 1181 | if (unlikely(!size)) |
1148 | return NULL; | 1182 | return NULL; |
1149 | 1183 | ||
1150 | area = kmalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node); | 1184 | area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node); |
1151 | if (unlikely(!area)) | 1185 | if (unlikely(!area)) |
1152 | return NULL; | 1186 | return NULL; |
1153 | 1187 | ||
@@ -1162,25 +1196,7 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, | |||
1162 | return NULL; | 1196 | return NULL; |
1163 | } | 1197 | } |
1164 | 1198 | ||
1165 | area->flags = flags; | 1199 | insert_vmalloc_vm(area, va, flags, caller); |
1166 | area->addr = (void *)va->va_start; | ||
1167 | area->size = size; | ||
1168 | area->pages = NULL; | ||
1169 | area->nr_pages = 0; | ||
1170 | area->phys_addr = 0; | ||
1171 | area->caller = caller; | ||
1172 | va->private = area; | ||
1173 | va->flags |= VM_VM_AREA; | ||
1174 | |||
1175 | write_lock(&vmlist_lock); | ||
1176 | for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) { | ||
1177 | if (tmp->addr >= area->addr) | ||
1178 | break; | ||
1179 | } | ||
1180 | area->next = *p; | ||
1181 | *p = area; | ||
1182 | write_unlock(&vmlist_lock); | ||
1183 | |||
1184 | return area; | 1200 | return area; |
1185 | } | 1201 | } |
1186 | 1202 | ||
@@ -1818,6 +1834,286 @@ void free_vm_area(struct vm_struct *area) | |||
1818 | } | 1834 | } |
1819 | EXPORT_SYMBOL_GPL(free_vm_area); | 1835 | EXPORT_SYMBOL_GPL(free_vm_area); |
1820 | 1836 | ||
1837 | static struct vmap_area *node_to_va(struct rb_node *n) | ||
1838 | { | ||
1839 | return n ? rb_entry(n, struct vmap_area, rb_node) : NULL; | ||
1840 | } | ||
1841 | |||
1842 | /** | ||
1843 | * pvm_find_next_prev - find the next and prev vmap_area surrounding @end | ||
1844 | * @end: target address | ||
1845 | * @pnext: out arg for the next vmap_area | ||
1846 | * @pprev: out arg for the previous vmap_area | ||
1847 | * | ||
1848 | * Returns: %true if either or both of next and prev are found, | ||
1849 | * %false if no vmap_area exists | ||
1850 | * | ||
1851 | * Find vmap_areas end addresses of which enclose @end. ie. if not | ||
1852 | * NULL, *pnext->va_end > @end and *pprev->va_end <= @end. | ||
1853 | */ | ||
1854 | static bool pvm_find_next_prev(unsigned long end, | ||
1855 | struct vmap_area **pnext, | ||
1856 | struct vmap_area **pprev) | ||
1857 | { | ||
1858 | struct rb_node *n = vmap_area_root.rb_node; | ||
1859 | struct vmap_area *va = NULL; | ||
1860 | |||
1861 | while (n) { | ||
1862 | va = rb_entry(n, struct vmap_area, rb_node); | ||
1863 | if (end < va->va_end) | ||
1864 | n = n->rb_left; | ||
1865 | else if (end > va->va_end) | ||
1866 | n = n->rb_right; | ||
1867 | else | ||
1868 | break; | ||
1869 | } | ||
1870 | |||
1871 | if (!va) | ||
1872 | return false; | ||
1873 | |||
1874 | if (va->va_end > end) { | ||
1875 | *pnext = va; | ||
1876 | *pprev = node_to_va(rb_prev(&(*pnext)->rb_node)); | ||
1877 | } else { | ||
1878 | *pprev = va; | ||
1879 | *pnext = node_to_va(rb_next(&(*pprev)->rb_node)); | ||
1880 | } | ||
1881 | return true; | ||
1882 | } | ||
1883 | |||
1884 | /** | ||
1885 | * pvm_determine_end - find the highest aligned address between two vmap_areas | ||
1886 | * @pnext: in/out arg for the next vmap_area | ||
1887 | * @pprev: in/out arg for the previous vmap_area | ||
1888 | * @align: alignment | ||
1889 | * | ||
1890 | * Returns: determined end address | ||
1891 | * | ||
1892 | * Find the highest aligned address between *@pnext and *@pprev below | ||
1893 | * VMALLOC_END. *@pnext and *@pprev are adjusted so that the aligned | ||
1894 | * down address is between the end addresses of the two vmap_areas. | ||
1895 | * | ||
1896 | * Please note that the address returned by this function may fall | ||
1897 | * inside *@pnext vmap_area. The caller is responsible for checking | ||
1898 | * that. | ||
1899 | */ | ||
1900 | static unsigned long pvm_determine_end(struct vmap_area **pnext, | ||
1901 | struct vmap_area **pprev, | ||
1902 | unsigned long align) | ||
1903 | { | ||
1904 | const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); | ||
1905 | unsigned long addr; | ||
1906 | |||
1907 | if (*pnext) | ||
1908 | addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end); | ||
1909 | else | ||
1910 | addr = vmalloc_end; | ||
1911 | |||
1912 | while (*pprev && (*pprev)->va_end > addr) { | ||
1913 | *pnext = *pprev; | ||
1914 | *pprev = node_to_va(rb_prev(&(*pnext)->rb_node)); | ||
1915 | } | ||
1916 | |||
1917 | return addr; | ||
1918 | } | ||
1919 | |||
1920 | /** | ||
1921 | * pcpu_get_vm_areas - allocate vmalloc areas for percpu allocator | ||
1922 | * @offsets: array containing offset of each area | ||
1923 | * @sizes: array containing size of each area | ||
1924 | * @nr_vms: the number of areas to allocate | ||
1925 | * @align: alignment, all entries in @offsets and @sizes must be aligned to this | ||
1926 | * @gfp_mask: allocation mask | ||
1927 | * | ||
1928 | * Returns: kmalloc'd vm_struct pointer array pointing to allocated | ||
1929 | * vm_structs on success, %NULL on failure | ||
1930 | * | ||
1931 | * Percpu allocator wants to use congruent vm areas so that it can | ||
1932 | * maintain the offsets among percpu areas. This function allocates | ||
1933 | * congruent vmalloc areas for it. These areas tend to be scattered | ||
1934 | * pretty far, distance between two areas easily going up to | ||
1935 | * gigabytes. To avoid interacting with regular vmallocs, these areas | ||
1936 | * are allocated from top. | ||
1937 | * | ||
1938 | * Despite its complicated look, this allocator is rather simple. It | ||
1939 | * does everything top-down and scans areas from the end looking for | ||
1940 | * matching slot. While scanning, if any of the areas overlaps with | ||
1941 | * existing vmap_area, the base address is pulled down to fit the | ||
1942 | * area. Scanning is repeated till all the areas fit and then all | ||
1943 | * necessary data structres are inserted and the result is returned. | ||
1944 | */ | ||
1945 | struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, | ||
1946 | const size_t *sizes, int nr_vms, | ||
1947 | size_t align, gfp_t gfp_mask) | ||
1948 | { | ||
1949 | const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align); | ||
1950 | const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); | ||
1951 | struct vmap_area **vas, *prev, *next; | ||
1952 | struct vm_struct **vms; | ||
1953 | int area, area2, last_area, term_area; | ||
1954 | unsigned long base, start, end, last_end; | ||
1955 | bool purged = false; | ||
1956 | |||
1957 | gfp_mask &= GFP_RECLAIM_MASK; | ||
1958 | |||
1959 | /* verify parameters and allocate data structures */ | ||
1960 | BUG_ON(align & ~PAGE_MASK || !is_power_of_2(align)); | ||
1961 | for (last_area = 0, area = 0; area < nr_vms; area++) { | ||
1962 | start = offsets[area]; | ||
1963 | end = start + sizes[area]; | ||
1964 | |||
1965 | /* is everything aligned properly? */ | ||
1966 | BUG_ON(!IS_ALIGNED(offsets[area], align)); | ||
1967 | BUG_ON(!IS_ALIGNED(sizes[area], align)); | ||
1968 | |||
1969 | /* detect the area with the highest address */ | ||
1970 | if (start > offsets[last_area]) | ||
1971 | last_area = area; | ||
1972 | |||
1973 | for (area2 = 0; area2 < nr_vms; area2++) { | ||
1974 | unsigned long start2 = offsets[area2]; | ||
1975 | unsigned long end2 = start2 + sizes[area2]; | ||
1976 | |||
1977 | if (area2 == area) | ||
1978 | continue; | ||
1979 | |||
1980 | BUG_ON(start2 >= start && start2 < end); | ||
1981 | BUG_ON(end2 <= end && end2 > start); | ||
1982 | } | ||
1983 | } | ||
1984 | last_end = offsets[last_area] + sizes[last_area]; | ||
1985 | |||
1986 | if (vmalloc_end - vmalloc_start < last_end) { | ||
1987 | WARN_ON(true); | ||
1988 | return NULL; | ||
1989 | } | ||
1990 | |||
1991 | vms = kzalloc(sizeof(vms[0]) * nr_vms, gfp_mask); | ||
1992 | vas = kzalloc(sizeof(vas[0]) * nr_vms, gfp_mask); | ||
1993 | if (!vas || !vms) | ||
1994 | goto err_free; | ||
1995 | |||
1996 | for (area = 0; area < nr_vms; area++) { | ||
1997 | vas[area] = kzalloc(sizeof(struct vmap_area), gfp_mask); | ||
1998 | vms[area] = kzalloc(sizeof(struct vm_struct), gfp_mask); | ||
1999 | if (!vas[area] || !vms[area]) | ||
2000 | goto err_free; | ||
2001 | } | ||
2002 | retry: | ||
2003 | spin_lock(&vmap_area_lock); | ||
2004 | |||
2005 | /* start scanning - we scan from the top, begin with the last area */ | ||
2006 | area = term_area = last_area; | ||
2007 | start = offsets[area]; | ||
2008 | end = start + sizes[area]; | ||
2009 | |||
2010 | if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) { | ||
2011 | base = vmalloc_end - last_end; | ||
2012 | goto found; | ||
2013 | } | ||
2014 | base = pvm_determine_end(&next, &prev, align) - end; | ||
2015 | |||
2016 | while (true) { | ||
2017 | BUG_ON(next && next->va_end <= base + end); | ||
2018 | BUG_ON(prev && prev->va_end > base + end); | ||
2019 | |||
2020 | /* | ||
2021 | * base might have underflowed, add last_end before | ||
2022 | * comparing. | ||
2023 | */ | ||
2024 | if (base + last_end < vmalloc_start + last_end) { | ||
2025 | spin_unlock(&vmap_area_lock); | ||
2026 | if (!purged) { | ||
2027 | purge_vmap_area_lazy(); | ||
2028 | purged = true; | ||
2029 | goto retry; | ||
2030 | } | ||
2031 | goto err_free; | ||
2032 | } | ||
2033 | |||
2034 | /* | ||
2035 | * If next overlaps, move base downwards so that it's | ||
2036 | * right below next and then recheck. | ||
2037 | */ | ||
2038 | if (next && next->va_start < base + end) { | ||
2039 | base = pvm_determine_end(&next, &prev, align) - end; | ||
2040 | term_area = area; | ||
2041 | continue; | ||
2042 | } | ||
2043 | |||
2044 | /* | ||
2045 | * If prev overlaps, shift down next and prev and move | ||
2046 | * base so that it's right below new next and then | ||
2047 | * recheck. | ||
2048 | */ | ||
2049 | if (prev && prev->va_end > base + start) { | ||
2050 | next = prev; | ||
2051 | prev = node_to_va(rb_prev(&next->rb_node)); | ||
2052 | base = pvm_determine_end(&next, &prev, align) - end; | ||
2053 | term_area = area; | ||
2054 | continue; | ||
2055 | } | ||
2056 | |||
2057 | /* | ||
2058 | * This area fits, move on to the previous one. If | ||
2059 | * the previous one is the terminal one, we're done. | ||
2060 | */ | ||
2061 | area = (area + nr_vms - 1) % nr_vms; | ||
2062 | if (area == term_area) | ||
2063 | break; | ||
2064 | start = offsets[area]; | ||
2065 | end = start + sizes[area]; | ||
2066 | pvm_find_next_prev(base + end, &next, &prev); | ||
2067 | } | ||
2068 | found: | ||
2069 | /* we've found a fitting base, insert all va's */ | ||
2070 | for (area = 0; area < nr_vms; area++) { | ||
2071 | struct vmap_area *va = vas[area]; | ||
2072 | |||
2073 | va->va_start = base + offsets[area]; | ||
2074 | va->va_end = va->va_start + sizes[area]; | ||
2075 | __insert_vmap_area(va); | ||
2076 | } | ||
2077 | |||
2078 | vmap_area_pcpu_hole = base + offsets[last_area]; | ||
2079 | |||
2080 | spin_unlock(&vmap_area_lock); | ||
2081 | |||
2082 | /* insert all vm's */ | ||
2083 | for (area = 0; area < nr_vms; area++) | ||
2084 | insert_vmalloc_vm(vms[area], vas[area], VM_ALLOC, | ||
2085 | pcpu_get_vm_areas); | ||
2086 | |||
2087 | kfree(vas); | ||
2088 | return vms; | ||
2089 | |||
2090 | err_free: | ||
2091 | for (area = 0; area < nr_vms; area++) { | ||
2092 | if (vas) | ||
2093 | kfree(vas[area]); | ||
2094 | if (vms) | ||
2095 | kfree(vms[area]); | ||
2096 | } | ||
2097 | kfree(vas); | ||
2098 | kfree(vms); | ||
2099 | return NULL; | ||
2100 | } | ||
2101 | |||
2102 | /** | ||
2103 | * pcpu_free_vm_areas - free vmalloc areas for percpu allocator | ||
2104 | * @vms: vm_struct pointer array returned by pcpu_get_vm_areas() | ||
2105 | * @nr_vms: the number of allocated areas | ||
2106 | * | ||
2107 | * Free vm_structs and the array allocated by pcpu_get_vm_areas(). | ||
2108 | */ | ||
2109 | void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) | ||
2110 | { | ||
2111 | int i; | ||
2112 | |||
2113 | for (i = 0; i < nr_vms; i++) | ||
2114 | free_vm_area(vms[i]); | ||
2115 | kfree(vms); | ||
2116 | } | ||
1821 | 2117 | ||
1822 | #ifdef CONFIG_PROC_FS | 2118 | #ifdef CONFIG_PROC_FS |
1823 | static void *s_start(struct seq_file *m, loff_t *pos) | 2119 | static void *s_start(struct seq_file *m, loff_t *pos) |