diff options
Diffstat (limited to 'mm/vmalloc.c')
| -rw-r--r-- | mm/vmalloc.c | 293 |
1 files changed, 293 insertions, 0 deletions
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 2eb461c3a46e..204b8243d8ab 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
| @@ -265,6 +265,7 @@ struct vmap_area { | |||
| 265 | static DEFINE_SPINLOCK(vmap_area_lock); | 265 | static DEFINE_SPINLOCK(vmap_area_lock); |
| 266 | static struct rb_root vmap_area_root = RB_ROOT; | 266 | static struct rb_root vmap_area_root = RB_ROOT; |
| 267 | static LIST_HEAD(vmap_area_list); | 267 | static LIST_HEAD(vmap_area_list); |
| 268 | static unsigned long vmap_area_pcpu_hole; | ||
| 268 | 269 | ||
| 269 | static struct vmap_area *__find_vmap_area(unsigned long addr) | 270 | static struct vmap_area *__find_vmap_area(unsigned long addr) |
| 270 | { | 271 | { |
| @@ -431,6 +432,15 @@ static void __free_vmap_area(struct vmap_area *va) | |||
| 431 | RB_CLEAR_NODE(&va->rb_node); | 432 | RB_CLEAR_NODE(&va->rb_node); |
| 432 | list_del_rcu(&va->list); | 433 | list_del_rcu(&va->list); |
| 433 | 434 | ||
| 435 | /* | ||
| 436 | * Track the highest possible candidate for pcpu area | ||
| 437 | * allocation. Areas outside of vmalloc area can be returned | ||
| 438 | * here too, consider only end addresses which fall inside | ||
| 439 | * vmalloc area proper. | ||
| 440 | */ | ||
| 441 | if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END) | ||
| 442 | vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end); | ||
| 443 | |||
| 434 | call_rcu(&va->rcu_head, rcu_free_va); | 444 | call_rcu(&va->rcu_head, rcu_free_va); |
| 435 | } | 445 | } |
| 436 | 446 | ||
| @@ -1038,6 +1048,9 @@ void __init vmalloc_init(void) | |||
| 1038 | va->va_end = va->va_start + tmp->size; | 1048 | va->va_end = va->va_start + tmp->size; |
| 1039 | __insert_vmap_area(va); | 1049 | __insert_vmap_area(va); |
| 1040 | } | 1050 | } |
| 1051 | |||
| 1052 | vmap_area_pcpu_hole = VMALLOC_END; | ||
| 1053 | |||
| 1041 | vmap_initialized = true; | 1054 | vmap_initialized = true; |
| 1042 | } | 1055 | } |
| 1043 | 1056 | ||
| @@ -1821,6 +1834,286 @@ void free_vm_area(struct vm_struct *area) | |||
| 1821 | } | 1834 | } |
| 1822 | EXPORT_SYMBOL_GPL(free_vm_area); | 1835 | EXPORT_SYMBOL_GPL(free_vm_area); |
| 1823 | 1836 | ||
| 1837 | static struct vmap_area *node_to_va(struct rb_node *n) | ||
| 1838 | { | ||
| 1839 | return n ? rb_entry(n, struct vmap_area, rb_node) : NULL; | ||
| 1840 | } | ||
| 1841 | |||
| 1842 | /** | ||
| 1843 | * pvm_find_next_prev - find the next and prev vmap_area surrounding @end | ||
| 1844 | * @end: target address | ||
| 1845 | * @pnext: out arg for the next vmap_area | ||
| 1846 | * @pprev: out arg for the previous vmap_area | ||
| 1847 | * | ||
| 1848 | * Returns: %true if either or both of next and prev are found, | ||
| 1849 | * %false if no vmap_area exists | ||
| 1850 | * | ||
| 1851 | * Find vmap_areas end addresses of which enclose @end. ie. if not | ||
| 1852 | * NULL, *pnext->va_end > @end and *pprev->va_end <= @end. | ||
| 1853 | */ | ||
| 1854 | static bool pvm_find_next_prev(unsigned long end, | ||
| 1855 | struct vmap_area **pnext, | ||
| 1856 | struct vmap_area **pprev) | ||
| 1857 | { | ||
| 1858 | struct rb_node *n = vmap_area_root.rb_node; | ||
| 1859 | struct vmap_area *va = NULL; | ||
| 1860 | |||
| 1861 | while (n) { | ||
| 1862 | va = rb_entry(n, struct vmap_area, rb_node); | ||
| 1863 | if (end < va->va_end) | ||
| 1864 | n = n->rb_left; | ||
| 1865 | else if (end > va->va_end) | ||
| 1866 | n = n->rb_right; | ||
| 1867 | else | ||
| 1868 | break; | ||
| 1869 | } | ||
| 1870 | |||
| 1871 | if (!va) | ||
| 1872 | return false; | ||
| 1873 | |||
| 1874 | if (va->va_end > end) { | ||
| 1875 | *pnext = va; | ||
| 1876 | *pprev = node_to_va(rb_prev(&(*pnext)->rb_node)); | ||
| 1877 | } else { | ||
| 1878 | *pprev = va; | ||
| 1879 | *pnext = node_to_va(rb_next(&(*pprev)->rb_node)); | ||
| 1880 | } | ||
| 1881 | return true; | ||
| 1882 | } | ||
| 1883 | |||
| 1884 | /** | ||
| 1885 | * pvm_determine_end - find the highest aligned address between two vmap_areas | ||
| 1886 | * @pnext: in/out arg for the next vmap_area | ||
| 1887 | * @pprev: in/out arg for the previous vmap_area | ||
| 1888 | * @align: alignment | ||
| 1889 | * | ||
| 1890 | * Returns: determined end address | ||
| 1891 | * | ||
| 1892 | * Find the highest aligned address between *@pnext and *@pprev below | ||
| 1893 | * VMALLOC_END. *@pnext and *@pprev are adjusted so that the aligned | ||
| 1894 | * down address is between the end addresses of the two vmap_areas. | ||
| 1895 | * | ||
| 1896 | * Please note that the address returned by this function may fall | ||
| 1897 | * inside *@pnext vmap_area. The caller is responsible for checking | ||
| 1898 | * that. | ||
| 1899 | */ | ||
| 1900 | static unsigned long pvm_determine_end(struct vmap_area **pnext, | ||
| 1901 | struct vmap_area **pprev, | ||
| 1902 | unsigned long align) | ||
| 1903 | { | ||
| 1904 | const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); | ||
| 1905 | unsigned long addr; | ||
| 1906 | |||
| 1907 | if (*pnext) | ||
| 1908 | addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end); | ||
| 1909 | else | ||
| 1910 | addr = vmalloc_end; | ||
| 1911 | |||
| 1912 | while (*pprev && (*pprev)->va_end > addr) { | ||
| 1913 | *pnext = *pprev; | ||
| 1914 | *pprev = node_to_va(rb_prev(&(*pnext)->rb_node)); | ||
| 1915 | } | ||
| 1916 | |||
| 1917 | return addr; | ||
| 1918 | } | ||
| 1919 | |||
| 1920 | /** | ||
| 1921 | * pcpu_get_vm_areas - allocate vmalloc areas for percpu allocator | ||
| 1922 | * @offsets: array containing offset of each area | ||
| 1923 | * @sizes: array containing size of each area | ||
| 1924 | * @nr_vms: the number of areas to allocate | ||
| 1925 | * @align: alignment, all entries in @offsets and @sizes must be aligned to this | ||
| 1926 | * @gfp_mask: allocation mask | ||
| 1927 | * | ||
| 1928 | * Returns: kmalloc'd vm_struct pointer array pointing to allocated | ||
| 1929 | * vm_structs on success, %NULL on failure | ||
| 1930 | * | ||
| 1931 | * Percpu allocator wants to use congruent vm areas so that it can | ||
| 1932 | * maintain the offsets among percpu areas. This function allocates | ||
| 1933 | * congruent vmalloc areas for it. These areas tend to be scattered | ||
| 1934 | * pretty far, distance between two areas easily going up to | ||
| 1935 | * gigabytes. To avoid interacting with regular vmallocs, these areas | ||
| 1936 | * are allocated from top. | ||
| 1937 | * | ||
| 1938 | * Despite its complicated look, this allocator is rather simple. It | ||
| 1939 | * does everything top-down and scans areas from the end looking for | ||
| 1940 | * matching slot. While scanning, if any of the areas overlaps with | ||
| 1941 | * existing vmap_area, the base address is pulled down to fit the | ||
| 1942 | * area. Scanning is repeated till all the areas fit and then all | ||
| 1943 | * necessary data structres are inserted and the result is returned. | ||
| 1944 | */ | ||
| 1945 | struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, | ||
| 1946 | const size_t *sizes, int nr_vms, | ||
| 1947 | size_t align, gfp_t gfp_mask) | ||
| 1948 | { | ||
| 1949 | const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align); | ||
| 1950 | const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1); | ||
| 1951 | struct vmap_area **vas, *prev, *next; | ||
| 1952 | struct vm_struct **vms; | ||
| 1953 | int area, area2, last_area, term_area; | ||
| 1954 | unsigned long base, start, end, last_end; | ||
| 1955 | bool purged = false; | ||
| 1956 | |||
| 1957 | gfp_mask &= GFP_RECLAIM_MASK; | ||
| 1958 | |||
| 1959 | /* verify parameters and allocate data structures */ | ||
| 1960 | BUG_ON(align & ~PAGE_MASK || !is_power_of_2(align)); | ||
| 1961 | for (last_area = 0, area = 0; area < nr_vms; area++) { | ||
| 1962 | start = offsets[area]; | ||
| 1963 | end = start + sizes[area]; | ||
| 1964 | |||
| 1965 | /* is everything aligned properly? */ | ||
| 1966 | BUG_ON(!IS_ALIGNED(offsets[area], align)); | ||
| 1967 | BUG_ON(!IS_ALIGNED(sizes[area], align)); | ||
| 1968 | |||
| 1969 | /* detect the area with the highest address */ | ||
| 1970 | if (start > offsets[last_area]) | ||
| 1971 | last_area = area; | ||
| 1972 | |||
| 1973 | for (area2 = 0; area2 < nr_vms; area2++) { | ||
| 1974 | unsigned long start2 = offsets[area2]; | ||
| 1975 | unsigned long end2 = start2 + sizes[area2]; | ||
| 1976 | |||
| 1977 | if (area2 == area) | ||
| 1978 | continue; | ||
| 1979 | |||
| 1980 | BUG_ON(start2 >= start && start2 < end); | ||
| 1981 | BUG_ON(end2 <= end && end2 > start); | ||
| 1982 | } | ||
| 1983 | } | ||
| 1984 | last_end = offsets[last_area] + sizes[last_area]; | ||
| 1985 | |||
| 1986 | if (vmalloc_end - vmalloc_start < last_end) { | ||
| 1987 | WARN_ON(true); | ||
| 1988 | return NULL; | ||
| 1989 | } | ||
| 1990 | |||
| 1991 | vms = kzalloc(sizeof(vms[0]) * nr_vms, gfp_mask); | ||
| 1992 | vas = kzalloc(sizeof(vas[0]) * nr_vms, gfp_mask); | ||
| 1993 | if (!vas || !vms) | ||
| 1994 | goto err_free; | ||
| 1995 | |||
| 1996 | for (area = 0; area < nr_vms; area++) { | ||
| 1997 | vas[area] = kzalloc(sizeof(struct vmap_area), gfp_mask); | ||
| 1998 | vms[area] = kzalloc(sizeof(struct vm_struct), gfp_mask); | ||
| 1999 | if (!vas[area] || !vms[area]) | ||
| 2000 | goto err_free; | ||
| 2001 | } | ||
| 2002 | retry: | ||
| 2003 | spin_lock(&vmap_area_lock); | ||
| 2004 | |||
| 2005 | /* start scanning - we scan from the top, begin with the last area */ | ||
| 2006 | area = term_area = last_area; | ||
| 2007 | start = offsets[area]; | ||
| 2008 | end = start + sizes[area]; | ||
| 2009 | |||
| 2010 | if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) { | ||
| 2011 | base = vmalloc_end - last_end; | ||
| 2012 | goto found; | ||
| 2013 | } | ||
| 2014 | base = pvm_determine_end(&next, &prev, align) - end; | ||
| 2015 | |||
| 2016 | while (true) { | ||
| 2017 | BUG_ON(next && next->va_end <= base + end); | ||
| 2018 | BUG_ON(prev && prev->va_end > base + end); | ||
| 2019 | |||
| 2020 | /* | ||
| 2021 | * base might have underflowed, add last_end before | ||
| 2022 | * comparing. | ||
| 2023 | */ | ||
| 2024 | if (base + last_end < vmalloc_start + last_end) { | ||
| 2025 | spin_unlock(&vmap_area_lock); | ||
| 2026 | if (!purged) { | ||
| 2027 | purge_vmap_area_lazy(); | ||
| 2028 | purged = true; | ||
| 2029 | goto retry; | ||
| 2030 | } | ||
| 2031 | goto err_free; | ||
| 2032 | } | ||
| 2033 | |||
| 2034 | /* | ||
| 2035 | * If next overlaps, move base downwards so that it's | ||
| 2036 | * right below next and then recheck. | ||
| 2037 | */ | ||
| 2038 | if (next && next->va_start < base + end) { | ||
| 2039 | base = pvm_determine_end(&next, &prev, align) - end; | ||
| 2040 | term_area = area; | ||
| 2041 | continue; | ||
| 2042 | } | ||
| 2043 | |||
| 2044 | /* | ||
| 2045 | * If prev overlaps, shift down next and prev and move | ||
| 2046 | * base so that it's right below new next and then | ||
| 2047 | * recheck. | ||
| 2048 | */ | ||
| 2049 | if (prev && prev->va_end > base + start) { | ||
| 2050 | next = prev; | ||
| 2051 | prev = node_to_va(rb_prev(&next->rb_node)); | ||
| 2052 | base = pvm_determine_end(&next, &prev, align) - end; | ||
| 2053 | term_area = area; | ||
| 2054 | continue; | ||
| 2055 | } | ||
| 2056 | |||
| 2057 | /* | ||
| 2058 | * This area fits, move on to the previous one. If | ||
| 2059 | * the previous one is the terminal one, we're done. | ||
| 2060 | */ | ||
| 2061 | area = (area + nr_vms - 1) % nr_vms; | ||
| 2062 | if (area == term_area) | ||
| 2063 | break; | ||
| 2064 | start = offsets[area]; | ||
| 2065 | end = start + sizes[area]; | ||
| 2066 | pvm_find_next_prev(base + end, &next, &prev); | ||
| 2067 | } | ||
| 2068 | found: | ||
| 2069 | /* we've found a fitting base, insert all va's */ | ||
| 2070 | for (area = 0; area < nr_vms; area++) { | ||
| 2071 | struct vmap_area *va = vas[area]; | ||
| 2072 | |||
| 2073 | va->va_start = base + offsets[area]; | ||
| 2074 | va->va_end = va->va_start + sizes[area]; | ||
| 2075 | __insert_vmap_area(va); | ||
| 2076 | } | ||
| 2077 | |||
| 2078 | vmap_area_pcpu_hole = base + offsets[last_area]; | ||
| 2079 | |||
| 2080 | spin_unlock(&vmap_area_lock); | ||
| 2081 | |||
| 2082 | /* insert all vm's */ | ||
| 2083 | for (area = 0; area < nr_vms; area++) | ||
| 2084 | insert_vmalloc_vm(vms[area], vas[area], VM_ALLOC, | ||
| 2085 | pcpu_get_vm_areas); | ||
| 2086 | |||
| 2087 | kfree(vas); | ||
| 2088 | return vms; | ||
| 2089 | |||
| 2090 | err_free: | ||
| 2091 | for (area = 0; area < nr_vms; area++) { | ||
| 2092 | if (vas) | ||
| 2093 | kfree(vas[area]); | ||
| 2094 | if (vms) | ||
| 2095 | kfree(vms[area]); | ||
| 2096 | } | ||
| 2097 | kfree(vas); | ||
| 2098 | kfree(vms); | ||
| 2099 | return NULL; | ||
| 2100 | } | ||
| 2101 | |||
| 2102 | /** | ||
| 2103 | * pcpu_free_vm_areas - free vmalloc areas for percpu allocator | ||
| 2104 | * @vms: vm_struct pointer array returned by pcpu_get_vm_areas() | ||
| 2105 | * @nr_vms: the number of allocated areas | ||
| 2106 | * | ||
| 2107 | * Free vm_structs and the array allocated by pcpu_get_vm_areas(). | ||
| 2108 | */ | ||
| 2109 | void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) | ||
| 2110 | { | ||
| 2111 | int i; | ||
| 2112 | |||
| 2113 | for (i = 0; i < nr_vms; i++) | ||
| 2114 | free_vm_area(vms[i]); | ||
| 2115 | kfree(vms); | ||
| 2116 | } | ||
| 1824 | 2117 | ||
| 1825 | #ifdef CONFIG_PROC_FS | 2118 | #ifdef CONFIG_PROC_FS |
| 1826 | static void *s_start(struct seq_file *m, loff_t *pos) | 2119 | static void *s_start(struct seq_file *m, loff_t *pos) |
