diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/vmalloc.c | 975 |
1 files changed, 842 insertions, 133 deletions
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index bba06c41fc59..712ae47af0bf 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
@@ -8,6 +8,7 @@ | |||
8 | * Numa awareness, Christoph Lameter, SGI, June 2005 | 8 | * Numa awareness, Christoph Lameter, SGI, June 2005 |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/vmalloc.h> | ||
11 | #include <linux/mm.h> | 12 | #include <linux/mm.h> |
12 | #include <linux/module.h> | 13 | #include <linux/module.h> |
13 | #include <linux/highmem.h> | 14 | #include <linux/highmem.h> |
@@ -18,16 +19,17 @@ | |||
18 | #include <linux/debugobjects.h> | 19 | #include <linux/debugobjects.h> |
19 | #include <linux/vmalloc.h> | 20 | #include <linux/vmalloc.h> |
20 | #include <linux/kallsyms.h> | 21 | #include <linux/kallsyms.h> |
22 | #include <linux/list.h> | ||
23 | #include <linux/rbtree.h> | ||
24 | #include <linux/radix-tree.h> | ||
25 | #include <linux/rcupdate.h> | ||
21 | 26 | ||
27 | #include <asm/atomic.h> | ||
22 | #include <asm/uaccess.h> | 28 | #include <asm/uaccess.h> |
23 | #include <asm/tlbflush.h> | 29 | #include <asm/tlbflush.h> |
24 | 30 | ||
25 | 31 | ||
26 | DEFINE_RWLOCK(vmlist_lock); | 32 | /*** Page table manipulation functions ***/ |
27 | struct vm_struct *vmlist; | ||
28 | |||
29 | static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot, | ||
30 | int node, void *caller); | ||
31 | 33 | ||
32 | static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) | 34 | static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) |
33 | { | 35 | { |
@@ -40,8 +42,7 @@ static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) | |||
40 | } while (pte++, addr += PAGE_SIZE, addr != end); | 42 | } while (pte++, addr += PAGE_SIZE, addr != end); |
41 | } | 43 | } |
42 | 44 | ||
43 | static inline void vunmap_pmd_range(pud_t *pud, unsigned long addr, | 45 | static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end) |
44 | unsigned long end) | ||
45 | { | 46 | { |
46 | pmd_t *pmd; | 47 | pmd_t *pmd; |
47 | unsigned long next; | 48 | unsigned long next; |
@@ -55,8 +56,7 @@ static inline void vunmap_pmd_range(pud_t *pud, unsigned long addr, | |||
55 | } while (pmd++, addr = next, addr != end); | 56 | } while (pmd++, addr = next, addr != end); |
56 | } | 57 | } |
57 | 58 | ||
58 | static inline void vunmap_pud_range(pgd_t *pgd, unsigned long addr, | 59 | static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end) |
59 | unsigned long end) | ||
60 | { | 60 | { |
61 | pud_t *pud; | 61 | pud_t *pud; |
62 | unsigned long next; | 62 | unsigned long next; |
@@ -70,12 +70,10 @@ static inline void vunmap_pud_range(pgd_t *pgd, unsigned long addr, | |||
70 | } while (pud++, addr = next, addr != end); | 70 | } while (pud++, addr = next, addr != end); |
71 | } | 71 | } |
72 | 72 | ||
73 | void unmap_kernel_range(unsigned long addr, unsigned long size) | 73 | static void vunmap_page_range(unsigned long addr, unsigned long end) |
74 | { | 74 | { |
75 | pgd_t *pgd; | 75 | pgd_t *pgd; |
76 | unsigned long next; | 76 | unsigned long next; |
77 | unsigned long start = addr; | ||
78 | unsigned long end = addr + size; | ||
79 | 77 | ||
80 | BUG_ON(addr >= end); | 78 | BUG_ON(addr >= end); |
81 | pgd = pgd_offset_k(addr); | 79 | pgd = pgd_offset_k(addr); |
@@ -86,35 +84,36 @@ void unmap_kernel_range(unsigned long addr, unsigned long size) | |||
86 | continue; | 84 | continue; |
87 | vunmap_pud_range(pgd, addr, next); | 85 | vunmap_pud_range(pgd, addr, next); |
88 | } while (pgd++, addr = next, addr != end); | 86 | } while (pgd++, addr = next, addr != end); |
89 | flush_tlb_kernel_range(start, end); | ||
90 | } | ||
91 | |||
92 | static void unmap_vm_area(struct vm_struct *area) | ||
93 | { | ||
94 | unmap_kernel_range((unsigned long)area->addr, area->size); | ||
95 | } | 87 | } |
96 | 88 | ||
97 | static int vmap_pte_range(pmd_t *pmd, unsigned long addr, | 89 | static int vmap_pte_range(pmd_t *pmd, unsigned long addr, |
98 | unsigned long end, pgprot_t prot, struct page ***pages) | 90 | unsigned long end, pgprot_t prot, struct page **pages, int *nr) |
99 | { | 91 | { |
100 | pte_t *pte; | 92 | pte_t *pte; |
101 | 93 | ||
94 | /* | ||
95 | * nr is a running index into the array which helps higher level | ||
96 | * callers keep track of where we're up to. | ||
97 | */ | ||
98 | |||
102 | pte = pte_alloc_kernel(pmd, addr); | 99 | pte = pte_alloc_kernel(pmd, addr); |
103 | if (!pte) | 100 | if (!pte) |
104 | return -ENOMEM; | 101 | return -ENOMEM; |
105 | do { | 102 | do { |
106 | struct page *page = **pages; | 103 | struct page *page = pages[*nr]; |
107 | WARN_ON(!pte_none(*pte)); | 104 | |
108 | if (!page) | 105 | if (WARN_ON(!pte_none(*pte))) |
106 | return -EBUSY; | ||
107 | if (WARN_ON(!page)) | ||
109 | return -ENOMEM; | 108 | return -ENOMEM; |
110 | set_pte_at(&init_mm, addr, pte, mk_pte(page, prot)); | 109 | set_pte_at(&init_mm, addr, pte, mk_pte(page, prot)); |
111 | (*pages)++; | 110 | (*nr)++; |
112 | } while (pte++, addr += PAGE_SIZE, addr != end); | 111 | } while (pte++, addr += PAGE_SIZE, addr != end); |
113 | return 0; | 112 | return 0; |
114 | } | 113 | } |
115 | 114 | ||
116 | static inline int vmap_pmd_range(pud_t *pud, unsigned long addr, | 115 | static int vmap_pmd_range(pud_t *pud, unsigned long addr, |
117 | unsigned long end, pgprot_t prot, struct page ***pages) | 116 | unsigned long end, pgprot_t prot, struct page **pages, int *nr) |
118 | { | 117 | { |
119 | pmd_t *pmd; | 118 | pmd_t *pmd; |
120 | unsigned long next; | 119 | unsigned long next; |
@@ -124,14 +123,14 @@ static inline int vmap_pmd_range(pud_t *pud, unsigned long addr, | |||
124 | return -ENOMEM; | 123 | return -ENOMEM; |
125 | do { | 124 | do { |
126 | next = pmd_addr_end(addr, end); | 125 | next = pmd_addr_end(addr, end); |
127 | if (vmap_pte_range(pmd, addr, next, prot, pages)) | 126 | if (vmap_pte_range(pmd, addr, next, prot, pages, nr)) |
128 | return -ENOMEM; | 127 | return -ENOMEM; |
129 | } while (pmd++, addr = next, addr != end); | 128 | } while (pmd++, addr = next, addr != end); |
130 | return 0; | 129 | return 0; |
131 | } | 130 | } |
132 | 131 | ||
133 | static inline int vmap_pud_range(pgd_t *pgd, unsigned long addr, | 132 | static int vmap_pud_range(pgd_t *pgd, unsigned long addr, |
134 | unsigned long end, pgprot_t prot, struct page ***pages) | 133 | unsigned long end, pgprot_t prot, struct page **pages, int *nr) |
135 | { | 134 | { |
136 | pud_t *pud; | 135 | pud_t *pud; |
137 | unsigned long next; | 136 | unsigned long next; |
@@ -141,44 +140,49 @@ static inline int vmap_pud_range(pgd_t *pgd, unsigned long addr, | |||
141 | return -ENOMEM; | 140 | return -ENOMEM; |
142 | do { | 141 | do { |
143 | next = pud_addr_end(addr, end); | 142 | next = pud_addr_end(addr, end); |
144 | if (vmap_pmd_range(pud, addr, next, prot, pages)) | 143 | if (vmap_pmd_range(pud, addr, next, prot, pages, nr)) |
145 | return -ENOMEM; | 144 | return -ENOMEM; |
146 | } while (pud++, addr = next, addr != end); | 145 | } while (pud++, addr = next, addr != end); |
147 | return 0; | 146 | return 0; |
148 | } | 147 | } |
149 | 148 | ||
150 | int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages) | 149 | /* |
150 | * Set up page tables in kva (addr, end). The ptes shall have prot "prot", and | ||
151 | * will have pfns corresponding to the "pages" array. | ||
152 | * | ||
153 | * Ie. pte at addr+N*PAGE_SIZE shall point to pfn corresponding to pages[N] | ||
154 | */ | ||
155 | static int vmap_page_range(unsigned long addr, unsigned long end, | ||
156 | pgprot_t prot, struct page **pages) | ||
151 | { | 157 | { |
152 | pgd_t *pgd; | 158 | pgd_t *pgd; |
153 | unsigned long next; | 159 | unsigned long next; |
154 | unsigned long addr = (unsigned long) area->addr; | 160 | int err = 0; |
155 | unsigned long end = addr + area->size - PAGE_SIZE; | 161 | int nr = 0; |
156 | int err; | ||
157 | 162 | ||
158 | BUG_ON(addr >= end); | 163 | BUG_ON(addr >= end); |
159 | pgd = pgd_offset_k(addr); | 164 | pgd = pgd_offset_k(addr); |
160 | do { | 165 | do { |
161 | next = pgd_addr_end(addr, end); | 166 | next = pgd_addr_end(addr, end); |
162 | err = vmap_pud_range(pgd, addr, next, prot, pages); | 167 | err = vmap_pud_range(pgd, addr, next, prot, pages, &nr); |
163 | if (err) | 168 | if (err) |
164 | break; | 169 | break; |
165 | } while (pgd++, addr = next, addr != end); | 170 | } while (pgd++, addr = next, addr != end); |
166 | flush_cache_vmap((unsigned long) area->addr, end); | 171 | flush_cache_vmap(addr, end); |
167 | return err; | 172 | |
173 | if (unlikely(err)) | ||
174 | return err; | ||
175 | return nr; | ||
168 | } | 176 | } |
169 | EXPORT_SYMBOL_GPL(map_vm_area); | ||
170 | 177 | ||
171 | /* | 178 | /* |
172 | * Map a vmalloc()-space virtual address to the physical page. | 179 | * Walk a vmap address to the struct page it maps. |
173 | */ | 180 | */ |
174 | struct page *vmalloc_to_page(const void *vmalloc_addr) | 181 | struct page *vmalloc_to_page(const void *vmalloc_addr) |
175 | { | 182 | { |
176 | unsigned long addr = (unsigned long) vmalloc_addr; | 183 | unsigned long addr = (unsigned long) vmalloc_addr; |
177 | struct page *page = NULL; | 184 | struct page *page = NULL; |
178 | pgd_t *pgd = pgd_offset_k(addr); | 185 | pgd_t *pgd = pgd_offset_k(addr); |
179 | pud_t *pud; | ||
180 | pmd_t *pmd; | ||
181 | pte_t *ptep, pte; | ||
182 | 186 | ||
183 | /* | 187 | /* |
184 | * XXX we might need to change this if we add VIRTUAL_BUG_ON for | 188 | * XXX we might need to change this if we add VIRTUAL_BUG_ON for |
@@ -188,10 +192,12 @@ struct page *vmalloc_to_page(const void *vmalloc_addr) | |||
188 | !is_module_address(addr)); | 192 | !is_module_address(addr)); |
189 | 193 | ||
190 | if (!pgd_none(*pgd)) { | 194 | if (!pgd_none(*pgd)) { |
191 | pud = pud_offset(pgd, addr); | 195 | pud_t *pud = pud_offset(pgd, addr); |
192 | if (!pud_none(*pud)) { | 196 | if (!pud_none(*pud)) { |
193 | pmd = pmd_offset(pud, addr); | 197 | pmd_t *pmd = pmd_offset(pud, addr); |
194 | if (!pmd_none(*pmd)) { | 198 | if (!pmd_none(*pmd)) { |
199 | pte_t *ptep, pte; | ||
200 | |||
195 | ptep = pte_offset_map(pmd, addr); | 201 | ptep = pte_offset_map(pmd, addr); |
196 | pte = *ptep; | 202 | pte = *ptep; |
197 | if (pte_present(pte)) | 203 | if (pte_present(pte)) |
@@ -213,13 +219,751 @@ unsigned long vmalloc_to_pfn(const void *vmalloc_addr) | |||
213 | } | 219 | } |
214 | EXPORT_SYMBOL(vmalloc_to_pfn); | 220 | EXPORT_SYMBOL(vmalloc_to_pfn); |
215 | 221 | ||
216 | static struct vm_struct * | 222 | |
217 | __get_vm_area_node(unsigned long size, unsigned long flags, unsigned long start, | 223 | /*** Global kva allocator ***/ |
218 | unsigned long end, int node, gfp_t gfp_mask, void *caller) | 224 | |
225 | #define VM_LAZY_FREE 0x01 | ||
226 | #define VM_LAZY_FREEING 0x02 | ||
227 | #define VM_VM_AREA 0x04 | ||
228 | |||
229 | struct vmap_area { | ||
230 | unsigned long va_start; | ||
231 | unsigned long va_end; | ||
232 | unsigned long flags; | ||
233 | struct rb_node rb_node; /* address sorted rbtree */ | ||
234 | struct list_head list; /* address sorted list */ | ||
235 | struct list_head purge_list; /* "lazy purge" list */ | ||
236 | void *private; | ||
237 | struct rcu_head rcu_head; | ||
238 | }; | ||
239 | |||
240 | static DEFINE_SPINLOCK(vmap_area_lock); | ||
241 | static struct rb_root vmap_area_root = RB_ROOT; | ||
242 | static LIST_HEAD(vmap_area_list); | ||
243 | |||
244 | static struct vmap_area *__find_vmap_area(unsigned long addr) | ||
219 | { | 245 | { |
220 | struct vm_struct **p, *tmp, *area; | 246 | struct rb_node *n = vmap_area_root.rb_node; |
221 | unsigned long align = 1; | 247 | |
248 | while (n) { | ||
249 | struct vmap_area *va; | ||
250 | |||
251 | va = rb_entry(n, struct vmap_area, rb_node); | ||
252 | if (addr < va->va_start) | ||
253 | n = n->rb_left; | ||
254 | else if (addr > va->va_start) | ||
255 | n = n->rb_right; | ||
256 | else | ||
257 | return va; | ||
258 | } | ||
259 | |||
260 | return NULL; | ||
261 | } | ||
262 | |||
263 | static void __insert_vmap_area(struct vmap_area *va) | ||
264 | { | ||
265 | struct rb_node **p = &vmap_area_root.rb_node; | ||
266 | struct rb_node *parent = NULL; | ||
267 | struct rb_node *tmp; | ||
268 | |||
269 | while (*p) { | ||
270 | struct vmap_area *tmp; | ||
271 | |||
272 | parent = *p; | ||
273 | tmp = rb_entry(parent, struct vmap_area, rb_node); | ||
274 | if (va->va_start < tmp->va_end) | ||
275 | p = &(*p)->rb_left; | ||
276 | else if (va->va_end > tmp->va_start) | ||
277 | p = &(*p)->rb_right; | ||
278 | else | ||
279 | BUG(); | ||
280 | } | ||
281 | |||
282 | rb_link_node(&va->rb_node, parent, p); | ||
283 | rb_insert_color(&va->rb_node, &vmap_area_root); | ||
284 | |||
285 | /* address-sort this list so it is usable like the vmlist */ | ||
286 | tmp = rb_prev(&va->rb_node); | ||
287 | if (tmp) { | ||
288 | struct vmap_area *prev; | ||
289 | prev = rb_entry(tmp, struct vmap_area, rb_node); | ||
290 | list_add_rcu(&va->list, &prev->list); | ||
291 | } else | ||
292 | list_add_rcu(&va->list, &vmap_area_list); | ||
293 | } | ||
294 | |||
295 | static void purge_vmap_area_lazy(void); | ||
296 | |||
297 | /* | ||
298 | * Allocate a region of KVA of the specified size and alignment, within the | ||
299 | * vstart and vend. | ||
300 | */ | ||
301 | static struct vmap_area *alloc_vmap_area(unsigned long size, | ||
302 | unsigned long align, | ||
303 | unsigned long vstart, unsigned long vend, | ||
304 | int node, gfp_t gfp_mask) | ||
305 | { | ||
306 | struct vmap_area *va; | ||
307 | struct rb_node *n; | ||
308 | unsigned long addr; | ||
309 | int purged = 0; | ||
310 | |||
311 | BUG_ON(size & ~PAGE_MASK); | ||
312 | |||
313 | addr = ALIGN(vstart, align); | ||
314 | |||
315 | va = kmalloc_node(sizeof(struct vmap_area), | ||
316 | gfp_mask & GFP_RECLAIM_MASK, node); | ||
317 | if (unlikely(!va)) | ||
318 | return ERR_PTR(-ENOMEM); | ||
319 | |||
320 | retry: | ||
321 | spin_lock(&vmap_area_lock); | ||
322 | /* XXX: could have a last_hole cache */ | ||
323 | n = vmap_area_root.rb_node; | ||
324 | if (n) { | ||
325 | struct vmap_area *first = NULL; | ||
326 | |||
327 | do { | ||
328 | struct vmap_area *tmp; | ||
329 | tmp = rb_entry(n, struct vmap_area, rb_node); | ||
330 | if (tmp->va_end >= addr) { | ||
331 | if (!first && tmp->va_start < addr + size) | ||
332 | first = tmp; | ||
333 | n = n->rb_left; | ||
334 | } else { | ||
335 | first = tmp; | ||
336 | n = n->rb_right; | ||
337 | } | ||
338 | } while (n); | ||
339 | |||
340 | if (!first) | ||
341 | goto found; | ||
342 | |||
343 | if (first->va_end < addr) { | ||
344 | n = rb_next(&first->rb_node); | ||
345 | if (n) | ||
346 | first = rb_entry(n, struct vmap_area, rb_node); | ||
347 | else | ||
348 | goto found; | ||
349 | } | ||
350 | |||
351 | while (addr + size >= first->va_start && addr + size <= vend) { | ||
352 | addr = ALIGN(first->va_end + PAGE_SIZE, align); | ||
353 | |||
354 | n = rb_next(&first->rb_node); | ||
355 | if (n) | ||
356 | first = rb_entry(n, struct vmap_area, rb_node); | ||
357 | else | ||
358 | goto found; | ||
359 | } | ||
360 | } | ||
361 | found: | ||
362 | if (addr + size > vend) { | ||
363 | spin_unlock(&vmap_area_lock); | ||
364 | if (!purged) { | ||
365 | purge_vmap_area_lazy(); | ||
366 | purged = 1; | ||
367 | goto retry; | ||
368 | } | ||
369 | if (printk_ratelimit()) | ||
370 | printk(KERN_WARNING "vmap allocation failed: " | ||
371 | "use vmalloc=<size> to increase size.\n"); | ||
372 | return ERR_PTR(-EBUSY); | ||
373 | } | ||
374 | |||
375 | BUG_ON(addr & (align-1)); | ||
376 | |||
377 | va->va_start = addr; | ||
378 | va->va_end = addr + size; | ||
379 | va->flags = 0; | ||
380 | __insert_vmap_area(va); | ||
381 | spin_unlock(&vmap_area_lock); | ||
382 | |||
383 | return va; | ||
384 | } | ||
385 | |||
386 | static void rcu_free_va(struct rcu_head *head) | ||
387 | { | ||
388 | struct vmap_area *va = container_of(head, struct vmap_area, rcu_head); | ||
389 | |||
390 | kfree(va); | ||
391 | } | ||
392 | |||
393 | static void __free_vmap_area(struct vmap_area *va) | ||
394 | { | ||
395 | BUG_ON(RB_EMPTY_NODE(&va->rb_node)); | ||
396 | rb_erase(&va->rb_node, &vmap_area_root); | ||
397 | RB_CLEAR_NODE(&va->rb_node); | ||
398 | list_del_rcu(&va->list); | ||
399 | |||
400 | call_rcu(&va->rcu_head, rcu_free_va); | ||
401 | } | ||
402 | |||
403 | /* | ||
404 | * Free a region of KVA allocated by alloc_vmap_area | ||
405 | */ | ||
406 | static void free_vmap_area(struct vmap_area *va) | ||
407 | { | ||
408 | spin_lock(&vmap_area_lock); | ||
409 | __free_vmap_area(va); | ||
410 | spin_unlock(&vmap_area_lock); | ||
411 | } | ||
412 | |||
413 | /* | ||
414 | * Clear the pagetable entries of a given vmap_area | ||
415 | */ | ||
416 | static void unmap_vmap_area(struct vmap_area *va) | ||
417 | { | ||
418 | vunmap_page_range(va->va_start, va->va_end); | ||
419 | } | ||
420 | |||
421 | /* | ||
422 | * lazy_max_pages is the maximum amount of virtual address space we gather up | ||
423 | * before attempting to purge with a TLB flush. | ||
424 | * | ||
425 | * There is a tradeoff here: a larger number will cover more kernel page tables | ||
426 | * and take slightly longer to purge, but it will linearly reduce the number of | ||
427 | * global TLB flushes that must be performed. It would seem natural to scale | ||
428 | * this number up linearly with the number of CPUs (because vmapping activity | ||
429 | * could also scale linearly with the number of CPUs), however it is likely | ||
430 | * that in practice, workloads might be constrained in other ways that mean | ||
431 | * vmap activity will not scale linearly with CPUs. Also, I want to be | ||
432 | * conservative and not introduce a big latency on huge systems, so go with | ||
433 | * a less aggressive log scale. It will still be an improvement over the old | ||
434 | * code, and it will be simple to change the scale factor if we find that it | ||
435 | * becomes a problem on bigger systems. | ||
436 | */ | ||
437 | static unsigned long lazy_max_pages(void) | ||
438 | { | ||
439 | unsigned int log; | ||
440 | |||
441 | log = fls(num_online_cpus()); | ||
442 | |||
443 | return log * (32UL * 1024 * 1024 / PAGE_SIZE); | ||
444 | } | ||
445 | |||
446 | static atomic_t vmap_lazy_nr = ATOMIC_INIT(0); | ||
447 | |||
448 | /* | ||
449 | * Purges all lazily-freed vmap areas. | ||
450 | * | ||
451 | * If sync is 0 then don't purge if there is already a purge in progress. | ||
452 | * If force_flush is 1, then flush kernel TLBs between *start and *end even | ||
453 | * if we found no lazy vmap areas to unmap (callers can use this to optimise | ||
454 | * their own TLB flushing). | ||
455 | * Returns with *start = min(*start, lowest purged address) | ||
456 | * *end = max(*end, highest purged address) | ||
457 | */ | ||
458 | static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, | ||
459 | int sync, int force_flush) | ||
460 | { | ||
461 | static DEFINE_SPINLOCK(purge_lock); | ||
462 | LIST_HEAD(valist); | ||
463 | struct vmap_area *va; | ||
464 | int nr = 0; | ||
465 | |||
466 | /* | ||
467 | * If sync is 0 but force_flush is 1, we'll go sync anyway but callers | ||
468 | * should not expect such behaviour. This just simplifies locking for | ||
469 | * the case that isn't actually used at the moment anyway. | ||
470 | */ | ||
471 | if (!sync && !force_flush) { | ||
472 | if (!spin_trylock(&purge_lock)) | ||
473 | return; | ||
474 | } else | ||
475 | spin_lock(&purge_lock); | ||
476 | |||
477 | rcu_read_lock(); | ||
478 | list_for_each_entry_rcu(va, &vmap_area_list, list) { | ||
479 | if (va->flags & VM_LAZY_FREE) { | ||
480 | if (va->va_start < *start) | ||
481 | *start = va->va_start; | ||
482 | if (va->va_end > *end) | ||
483 | *end = va->va_end; | ||
484 | nr += (va->va_end - va->va_start) >> PAGE_SHIFT; | ||
485 | unmap_vmap_area(va); | ||
486 | list_add_tail(&va->purge_list, &valist); | ||
487 | va->flags |= VM_LAZY_FREEING; | ||
488 | va->flags &= ~VM_LAZY_FREE; | ||
489 | } | ||
490 | } | ||
491 | rcu_read_unlock(); | ||
492 | |||
493 | if (nr) { | ||
494 | BUG_ON(nr > atomic_read(&vmap_lazy_nr)); | ||
495 | atomic_sub(nr, &vmap_lazy_nr); | ||
496 | } | ||
497 | |||
498 | if (nr || force_flush) | ||
499 | flush_tlb_kernel_range(*start, *end); | ||
500 | |||
501 | if (nr) { | ||
502 | spin_lock(&vmap_area_lock); | ||
503 | list_for_each_entry(va, &valist, purge_list) | ||
504 | __free_vmap_area(va); | ||
505 | spin_unlock(&vmap_area_lock); | ||
506 | } | ||
507 | spin_unlock(&purge_lock); | ||
508 | } | ||
509 | |||
510 | /* | ||
511 | * Kick off a purge of the outstanding lazy areas. | ||
512 | */ | ||
513 | static void purge_vmap_area_lazy(void) | ||
514 | { | ||
515 | unsigned long start = ULONG_MAX, end = 0; | ||
516 | |||
517 | __purge_vmap_area_lazy(&start, &end, 0, 0); | ||
518 | } | ||
519 | |||
520 | /* | ||
521 | * Free and unmap a vmap area | ||
522 | */ | ||
523 | static void free_unmap_vmap_area(struct vmap_area *va) | ||
524 | { | ||
525 | va->flags |= VM_LAZY_FREE; | ||
526 | atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr); | ||
527 | if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages())) | ||
528 | purge_vmap_area_lazy(); | ||
529 | } | ||
530 | |||
531 | static struct vmap_area *find_vmap_area(unsigned long addr) | ||
532 | { | ||
533 | struct vmap_area *va; | ||
534 | |||
535 | spin_lock(&vmap_area_lock); | ||
536 | va = __find_vmap_area(addr); | ||
537 | spin_unlock(&vmap_area_lock); | ||
538 | |||
539 | return va; | ||
540 | } | ||
541 | |||
542 | static void free_unmap_vmap_area_addr(unsigned long addr) | ||
543 | { | ||
544 | struct vmap_area *va; | ||
545 | |||
546 | va = find_vmap_area(addr); | ||
547 | BUG_ON(!va); | ||
548 | free_unmap_vmap_area(va); | ||
549 | } | ||
550 | |||
551 | |||
552 | /*** Per cpu kva allocator ***/ | ||
553 | |||
554 | /* | ||
555 | * vmap space is limited especially on 32 bit architectures. Ensure there is | ||
556 | * room for at least 16 percpu vmap blocks per CPU. | ||
557 | */ | ||
558 | /* | ||
559 | * If we had a constant VMALLOC_START and VMALLOC_END, we'd like to be able | ||
560 | * to #define VMALLOC_SPACE (VMALLOC_END-VMALLOC_START). Guess | ||
561 | * instead (we just need a rough idea) | ||
562 | */ | ||
563 | #if BITS_PER_LONG == 32 | ||
564 | #define VMALLOC_SPACE (128UL*1024*1024) | ||
565 | #else | ||
566 | #define VMALLOC_SPACE (128UL*1024*1024*1024) | ||
567 | #endif | ||
568 | |||
569 | #define VMALLOC_PAGES (VMALLOC_SPACE / PAGE_SIZE) | ||
570 | #define VMAP_MAX_ALLOC BITS_PER_LONG /* 256K with 4K pages */ | ||
571 | #define VMAP_BBMAP_BITS_MAX 1024 /* 4MB with 4K pages */ | ||
572 | #define VMAP_BBMAP_BITS_MIN (VMAP_MAX_ALLOC*2) | ||
573 | #define VMAP_MIN(x, y) ((x) < (y) ? (x) : (y)) /* can't use min() */ | ||
574 | #define VMAP_MAX(x, y) ((x) > (y) ? (x) : (y)) /* can't use max() */ | ||
575 | #define VMAP_BBMAP_BITS VMAP_MIN(VMAP_BBMAP_BITS_MAX, \ | ||
576 | VMAP_MAX(VMAP_BBMAP_BITS_MIN, \ | ||
577 | VMALLOC_PAGES / NR_CPUS / 16)) | ||
578 | |||
579 | #define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE) | ||
580 | |||
581 | struct vmap_block_queue { | ||
582 | spinlock_t lock; | ||
583 | struct list_head free; | ||
584 | struct list_head dirty; | ||
585 | unsigned int nr_dirty; | ||
586 | }; | ||
587 | |||
588 | struct vmap_block { | ||
589 | spinlock_t lock; | ||
590 | struct vmap_area *va; | ||
591 | struct vmap_block_queue *vbq; | ||
592 | unsigned long free, dirty; | ||
593 | DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS); | ||
594 | DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS); | ||
595 | union { | ||
596 | struct { | ||
597 | struct list_head free_list; | ||
598 | struct list_head dirty_list; | ||
599 | }; | ||
600 | struct rcu_head rcu_head; | ||
601 | }; | ||
602 | }; | ||
603 | |||
604 | /* Queue of free and dirty vmap blocks, for allocation and flushing purposes */ | ||
605 | static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue); | ||
606 | |||
607 | /* | ||
608 | * Radix tree of vmap blocks, indexed by address, to quickly find a vmap block | ||
609 | * in the free path. Could get rid of this if we change the API to return a | ||
610 | * "cookie" from alloc, to be passed to free. But no big deal yet. | ||
611 | */ | ||
612 | static DEFINE_SPINLOCK(vmap_block_tree_lock); | ||
613 | static RADIX_TREE(vmap_block_tree, GFP_ATOMIC); | ||
614 | |||
615 | /* | ||
616 | * We should probably have a fallback mechanism to allocate virtual memory | ||
617 | * out of partially filled vmap blocks. However vmap block sizing should be | ||
618 | * fairly reasonable according to the vmalloc size, so it shouldn't be a | ||
619 | * big problem. | ||
620 | */ | ||
621 | |||
622 | static unsigned long addr_to_vb_idx(unsigned long addr) | ||
623 | { | ||
624 | addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1); | ||
625 | addr /= VMAP_BLOCK_SIZE; | ||
626 | return addr; | ||
627 | } | ||
628 | |||
629 | static struct vmap_block *new_vmap_block(gfp_t gfp_mask) | ||
630 | { | ||
631 | struct vmap_block_queue *vbq; | ||
632 | struct vmap_block *vb; | ||
633 | struct vmap_area *va; | ||
634 | unsigned long vb_idx; | ||
635 | int node, err; | ||
636 | |||
637 | node = numa_node_id(); | ||
638 | |||
639 | vb = kmalloc_node(sizeof(struct vmap_block), | ||
640 | gfp_mask & GFP_RECLAIM_MASK, node); | ||
641 | if (unlikely(!vb)) | ||
642 | return ERR_PTR(-ENOMEM); | ||
643 | |||
644 | va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE, | ||
645 | VMALLOC_START, VMALLOC_END, | ||
646 | node, gfp_mask); | ||
647 | if (unlikely(IS_ERR(va))) { | ||
648 | kfree(vb); | ||
649 | return ERR_PTR(PTR_ERR(va)); | ||
650 | } | ||
651 | |||
652 | err = radix_tree_preload(gfp_mask); | ||
653 | if (unlikely(err)) { | ||
654 | kfree(vb); | ||
655 | free_vmap_area(va); | ||
656 | return ERR_PTR(err); | ||
657 | } | ||
658 | |||
659 | spin_lock_init(&vb->lock); | ||
660 | vb->va = va; | ||
661 | vb->free = VMAP_BBMAP_BITS; | ||
662 | vb->dirty = 0; | ||
663 | bitmap_zero(vb->alloc_map, VMAP_BBMAP_BITS); | ||
664 | bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS); | ||
665 | INIT_LIST_HEAD(&vb->free_list); | ||
666 | INIT_LIST_HEAD(&vb->dirty_list); | ||
667 | |||
668 | vb_idx = addr_to_vb_idx(va->va_start); | ||
669 | spin_lock(&vmap_block_tree_lock); | ||
670 | err = radix_tree_insert(&vmap_block_tree, vb_idx, vb); | ||
671 | spin_unlock(&vmap_block_tree_lock); | ||
672 | BUG_ON(err); | ||
673 | radix_tree_preload_end(); | ||
674 | |||
675 | vbq = &get_cpu_var(vmap_block_queue); | ||
676 | vb->vbq = vbq; | ||
677 | spin_lock(&vbq->lock); | ||
678 | list_add(&vb->free_list, &vbq->free); | ||
679 | spin_unlock(&vbq->lock); | ||
680 | put_cpu_var(vmap_cpu_blocks); | ||
681 | |||
682 | return vb; | ||
683 | } | ||
684 | |||
685 | static void rcu_free_vb(struct rcu_head *head) | ||
686 | { | ||
687 | struct vmap_block *vb = container_of(head, struct vmap_block, rcu_head); | ||
688 | |||
689 | kfree(vb); | ||
690 | } | ||
691 | |||
692 | static void free_vmap_block(struct vmap_block *vb) | ||
693 | { | ||
694 | struct vmap_block *tmp; | ||
695 | unsigned long vb_idx; | ||
696 | |||
697 | spin_lock(&vb->vbq->lock); | ||
698 | if (!list_empty(&vb->free_list)) | ||
699 | list_del(&vb->free_list); | ||
700 | if (!list_empty(&vb->dirty_list)) | ||
701 | list_del(&vb->dirty_list); | ||
702 | spin_unlock(&vb->vbq->lock); | ||
703 | |||
704 | vb_idx = addr_to_vb_idx(vb->va->va_start); | ||
705 | spin_lock(&vmap_block_tree_lock); | ||
706 | tmp = radix_tree_delete(&vmap_block_tree, vb_idx); | ||
707 | spin_unlock(&vmap_block_tree_lock); | ||
708 | BUG_ON(tmp != vb); | ||
709 | |||
710 | free_unmap_vmap_area(vb->va); | ||
711 | call_rcu(&vb->rcu_head, rcu_free_vb); | ||
712 | } | ||
713 | |||
714 | static void *vb_alloc(unsigned long size, gfp_t gfp_mask) | ||
715 | { | ||
716 | struct vmap_block_queue *vbq; | ||
717 | struct vmap_block *vb; | ||
718 | unsigned long addr = 0; | ||
719 | unsigned int order; | ||
720 | |||
721 | BUG_ON(size & ~PAGE_MASK); | ||
722 | BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); | ||
723 | order = get_order(size); | ||
724 | |||
725 | again: | ||
726 | rcu_read_lock(); | ||
727 | vbq = &get_cpu_var(vmap_block_queue); | ||
728 | list_for_each_entry_rcu(vb, &vbq->free, free_list) { | ||
729 | int i; | ||
730 | |||
731 | spin_lock(&vb->lock); | ||
732 | i = bitmap_find_free_region(vb->alloc_map, | ||
733 | VMAP_BBMAP_BITS, order); | ||
734 | |||
735 | if (i >= 0) { | ||
736 | addr = vb->va->va_start + (i << PAGE_SHIFT); | ||
737 | BUG_ON(addr_to_vb_idx(addr) != | ||
738 | addr_to_vb_idx(vb->va->va_start)); | ||
739 | vb->free -= 1UL << order; | ||
740 | if (vb->free == 0) { | ||
741 | spin_lock(&vbq->lock); | ||
742 | list_del_init(&vb->free_list); | ||
743 | spin_unlock(&vbq->lock); | ||
744 | } | ||
745 | spin_unlock(&vb->lock); | ||
746 | break; | ||
747 | } | ||
748 | spin_unlock(&vb->lock); | ||
749 | } | ||
750 | put_cpu_var(vmap_cpu_blocks); | ||
751 | rcu_read_unlock(); | ||
752 | |||
753 | if (!addr) { | ||
754 | vb = new_vmap_block(gfp_mask); | ||
755 | if (IS_ERR(vb)) | ||
756 | return vb; | ||
757 | goto again; | ||
758 | } | ||
759 | |||
760 | return (void *)addr; | ||
761 | } | ||
762 | |||
763 | static void vb_free(const void *addr, unsigned long size) | ||
764 | { | ||
765 | unsigned long offset; | ||
766 | unsigned long vb_idx; | ||
767 | unsigned int order; | ||
768 | struct vmap_block *vb; | ||
769 | |||
770 | BUG_ON(size & ~PAGE_MASK); | ||
771 | BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); | ||
772 | order = get_order(size); | ||
773 | |||
774 | offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1); | ||
775 | |||
776 | vb_idx = addr_to_vb_idx((unsigned long)addr); | ||
777 | rcu_read_lock(); | ||
778 | vb = radix_tree_lookup(&vmap_block_tree, vb_idx); | ||
779 | rcu_read_unlock(); | ||
780 | BUG_ON(!vb); | ||
781 | |||
782 | spin_lock(&vb->lock); | ||
783 | bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order); | ||
784 | if (!vb->dirty) { | ||
785 | spin_lock(&vb->vbq->lock); | ||
786 | list_add(&vb->dirty_list, &vb->vbq->dirty); | ||
787 | spin_unlock(&vb->vbq->lock); | ||
788 | } | ||
789 | vb->dirty += 1UL << order; | ||
790 | if (vb->dirty == VMAP_BBMAP_BITS) { | ||
791 | BUG_ON(vb->free || !list_empty(&vb->free_list)); | ||
792 | spin_unlock(&vb->lock); | ||
793 | free_vmap_block(vb); | ||
794 | } else | ||
795 | spin_unlock(&vb->lock); | ||
796 | } | ||
797 | |||
798 | /** | ||
799 | * vm_unmap_aliases - unmap outstanding lazy aliases in the vmap layer | ||
800 | * | ||
801 | * The vmap/vmalloc layer lazily flushes kernel virtual mappings primarily | ||
802 | * to amortize TLB flushing overheads. What this means is that any page you | ||
803 | * have now, may, in a former life, have been mapped into kernel virtual | ||
804 | * address by the vmap layer and so there might be some CPUs with TLB entries | ||
805 | * still referencing that page (additional to the regular 1:1 kernel mapping). | ||
806 | * | ||
807 | * vm_unmap_aliases flushes all such lazy mappings. After it returns, we can | ||
808 | * be sure that none of the pages we have control over will have any aliases | ||
809 | * from the vmap layer. | ||
810 | */ | ||
811 | void vm_unmap_aliases(void) | ||
812 | { | ||
813 | unsigned long start = ULONG_MAX, end = 0; | ||
814 | int cpu; | ||
815 | int flush = 0; | ||
816 | |||
817 | for_each_possible_cpu(cpu) { | ||
818 | struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu); | ||
819 | struct vmap_block *vb; | ||
820 | |||
821 | rcu_read_lock(); | ||
822 | list_for_each_entry_rcu(vb, &vbq->free, free_list) { | ||
823 | int i; | ||
824 | |||
825 | spin_lock(&vb->lock); | ||
826 | i = find_first_bit(vb->dirty_map, VMAP_BBMAP_BITS); | ||
827 | while (i < VMAP_BBMAP_BITS) { | ||
828 | unsigned long s, e; | ||
829 | int j; | ||
830 | j = find_next_zero_bit(vb->dirty_map, | ||
831 | VMAP_BBMAP_BITS, i); | ||
832 | |||
833 | s = vb->va->va_start + (i << PAGE_SHIFT); | ||
834 | e = vb->va->va_start + (j << PAGE_SHIFT); | ||
835 | vunmap_page_range(s, e); | ||
836 | flush = 1; | ||
837 | |||
838 | if (s < start) | ||
839 | start = s; | ||
840 | if (e > end) | ||
841 | end = e; | ||
842 | |||
843 | i = j; | ||
844 | i = find_next_bit(vb->dirty_map, | ||
845 | VMAP_BBMAP_BITS, i); | ||
846 | } | ||
847 | spin_unlock(&vb->lock); | ||
848 | } | ||
849 | rcu_read_unlock(); | ||
850 | } | ||
851 | |||
852 | __purge_vmap_area_lazy(&start, &end, 1, flush); | ||
853 | } | ||
854 | EXPORT_SYMBOL_GPL(vm_unmap_aliases); | ||
855 | |||
856 | /** | ||
857 | * vm_unmap_ram - unmap linear kernel address space set up by vm_map_ram | ||
858 | * @mem: the pointer returned by vm_map_ram | ||
859 | * @count: the count passed to that vm_map_ram call (cannot unmap partial) | ||
860 | */ | ||
861 | void vm_unmap_ram(const void *mem, unsigned int count) | ||
862 | { | ||
863 | unsigned long size = count << PAGE_SHIFT; | ||
864 | unsigned long addr = (unsigned long)mem; | ||
865 | |||
866 | BUG_ON(!addr); | ||
867 | BUG_ON(addr < VMALLOC_START); | ||
868 | BUG_ON(addr > VMALLOC_END); | ||
869 | BUG_ON(addr & (PAGE_SIZE-1)); | ||
870 | |||
871 | debug_check_no_locks_freed(mem, size); | ||
872 | |||
873 | if (likely(count <= VMAP_MAX_ALLOC)) | ||
874 | vb_free(mem, size); | ||
875 | else | ||
876 | free_unmap_vmap_area_addr(addr); | ||
877 | } | ||
878 | EXPORT_SYMBOL(vm_unmap_ram); | ||
879 | |||
880 | /** | ||
881 | * vm_map_ram - map pages linearly into kernel virtual address (vmalloc space) | ||
882 | * @pages: an array of pointers to the pages to be mapped | ||
883 | * @count: number of pages | ||
884 | * @node: prefer to allocate data structures on this node | ||
885 | * @prot: memory protection to use. PAGE_KERNEL for regular RAM | ||
886 | * @returns: a pointer to the address that has been mapped, or NULL on failure | ||
887 | */ | ||
888 | void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot) | ||
889 | { | ||
890 | unsigned long size = count << PAGE_SHIFT; | ||
222 | unsigned long addr; | 891 | unsigned long addr; |
892 | void *mem; | ||
893 | |||
894 | if (likely(count <= VMAP_MAX_ALLOC)) { | ||
895 | mem = vb_alloc(size, GFP_KERNEL); | ||
896 | if (IS_ERR(mem)) | ||
897 | return NULL; | ||
898 | addr = (unsigned long)mem; | ||
899 | } else { | ||
900 | struct vmap_area *va; | ||
901 | va = alloc_vmap_area(size, PAGE_SIZE, | ||
902 | VMALLOC_START, VMALLOC_END, node, GFP_KERNEL); | ||
903 | if (IS_ERR(va)) | ||
904 | return NULL; | ||
905 | |||
906 | addr = va->va_start; | ||
907 | mem = (void *)addr; | ||
908 | } | ||
909 | if (vmap_page_range(addr, addr + size, prot, pages) < 0) { | ||
910 | vm_unmap_ram(mem, count); | ||
911 | return NULL; | ||
912 | } | ||
913 | return mem; | ||
914 | } | ||
915 | EXPORT_SYMBOL(vm_map_ram); | ||
916 | |||
917 | void __init vmalloc_init(void) | ||
918 | { | ||
919 | int i; | ||
920 | |||
921 | for_each_possible_cpu(i) { | ||
922 | struct vmap_block_queue *vbq; | ||
923 | |||
924 | vbq = &per_cpu(vmap_block_queue, i); | ||
925 | spin_lock_init(&vbq->lock); | ||
926 | INIT_LIST_HEAD(&vbq->free); | ||
927 | INIT_LIST_HEAD(&vbq->dirty); | ||
928 | vbq->nr_dirty = 0; | ||
929 | } | ||
930 | } | ||
931 | |||
932 | void unmap_kernel_range(unsigned long addr, unsigned long size) | ||
933 | { | ||
934 | unsigned long end = addr + size; | ||
935 | vunmap_page_range(addr, end); | ||
936 | flush_tlb_kernel_range(addr, end); | ||
937 | } | ||
938 | |||
939 | int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages) | ||
940 | { | ||
941 | unsigned long addr = (unsigned long)area->addr; | ||
942 | unsigned long end = addr + area->size - PAGE_SIZE; | ||
943 | int err; | ||
944 | |||
945 | err = vmap_page_range(addr, end, prot, *pages); | ||
946 | if (err > 0) { | ||
947 | *pages += err; | ||
948 | err = 0; | ||
949 | } | ||
950 | |||
951 | return err; | ||
952 | } | ||
953 | EXPORT_SYMBOL_GPL(map_vm_area); | ||
954 | |||
955 | /*** Old vmalloc interfaces ***/ | ||
956 | DEFINE_RWLOCK(vmlist_lock); | ||
957 | struct vm_struct *vmlist; | ||
958 | |||
959 | static struct vm_struct *__get_vm_area_node(unsigned long size, | ||
960 | unsigned long flags, unsigned long start, unsigned long end, | ||
961 | int node, gfp_t gfp_mask, void *caller) | ||
962 | { | ||
963 | static struct vmap_area *va; | ||
964 | struct vm_struct *area; | ||
965 | struct vm_struct *tmp, **p; | ||
966 | unsigned long align = 1; | ||
223 | 967 | ||
224 | BUG_ON(in_interrupt()); | 968 | BUG_ON(in_interrupt()); |
225 | if (flags & VM_IOREMAP) { | 969 | if (flags & VM_IOREMAP) { |
@@ -232,13 +976,12 @@ __get_vm_area_node(unsigned long size, unsigned long flags, unsigned long start, | |||
232 | 976 | ||
233 | align = 1ul << bit; | 977 | align = 1ul << bit; |
234 | } | 978 | } |
235 | addr = ALIGN(start, align); | 979 | |
236 | size = PAGE_ALIGN(size); | 980 | size = PAGE_ALIGN(size); |
237 | if (unlikely(!size)) | 981 | if (unlikely(!size)) |
238 | return NULL; | 982 | return NULL; |
239 | 983 | ||
240 | area = kmalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node); | 984 | area = kmalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node); |
241 | |||
242 | if (unlikely(!area)) | 985 | if (unlikely(!area)) |
243 | return NULL; | 986 | return NULL; |
244 | 987 | ||
@@ -247,48 +990,32 @@ __get_vm_area_node(unsigned long size, unsigned long flags, unsigned long start, | |||
247 | */ | 990 | */ |
248 | size += PAGE_SIZE; | 991 | size += PAGE_SIZE; |
249 | 992 | ||
250 | write_lock(&vmlist_lock); | 993 | va = alloc_vmap_area(size, align, start, end, node, gfp_mask); |
251 | for (p = &vmlist; (tmp = *p) != NULL ;p = &tmp->next) { | 994 | if (IS_ERR(va)) { |
252 | if ((unsigned long)tmp->addr < addr) { | 995 | kfree(area); |
253 | if((unsigned long)tmp->addr + tmp->size >= addr) | 996 | return NULL; |
254 | addr = ALIGN(tmp->size + | ||
255 | (unsigned long)tmp->addr, align); | ||
256 | continue; | ||
257 | } | ||
258 | if ((size + addr) < addr) | ||
259 | goto out; | ||
260 | if (size + addr <= (unsigned long)tmp->addr) | ||
261 | goto found; | ||
262 | addr = ALIGN(tmp->size + (unsigned long)tmp->addr, align); | ||
263 | if (addr > end - size) | ||
264 | goto out; | ||
265 | } | 997 | } |
266 | if ((size + addr) < addr) | ||
267 | goto out; | ||
268 | if (addr > end - size) | ||
269 | goto out; | ||
270 | |||
271 | found: | ||
272 | area->next = *p; | ||
273 | *p = area; | ||
274 | 998 | ||
275 | area->flags = flags; | 999 | area->flags = flags; |
276 | area->addr = (void *)addr; | 1000 | area->addr = (void *)va->va_start; |
277 | area->size = size; | 1001 | area->size = size; |
278 | area->pages = NULL; | 1002 | area->pages = NULL; |
279 | area->nr_pages = 0; | 1003 | area->nr_pages = 0; |
280 | area->phys_addr = 0; | 1004 | area->phys_addr = 0; |
281 | area->caller = caller; | 1005 | area->caller = caller; |
1006 | va->private = area; | ||
1007 | va->flags |= VM_VM_AREA; | ||
1008 | |||
1009 | write_lock(&vmlist_lock); | ||
1010 | for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) { | ||
1011 | if (tmp->addr >= area->addr) | ||
1012 | break; | ||
1013 | } | ||
1014 | area->next = *p; | ||
1015 | *p = area; | ||
282 | write_unlock(&vmlist_lock); | 1016 | write_unlock(&vmlist_lock); |
283 | 1017 | ||
284 | return area; | 1018 | return area; |
285 | |||
286 | out: | ||
287 | write_unlock(&vmlist_lock); | ||
288 | kfree(area); | ||
289 | if (printk_ratelimit()) | ||
290 | printk(KERN_WARNING "allocation failed: out of vmalloc space - use vmalloc=<size> to increase size.\n"); | ||
291 | return NULL; | ||
292 | } | 1019 | } |
293 | 1020 | ||
294 | struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, | 1021 | struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, |
@@ -328,39 +1055,15 @@ struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags, | |||
328 | gfp_mask, __builtin_return_address(0)); | 1055 | gfp_mask, __builtin_return_address(0)); |
329 | } | 1056 | } |
330 | 1057 | ||
331 | /* Caller must hold vmlist_lock */ | 1058 | static struct vm_struct *find_vm_area(const void *addr) |
332 | static struct vm_struct *__find_vm_area(const void *addr) | ||
333 | { | 1059 | { |
334 | struct vm_struct *tmp; | 1060 | struct vmap_area *va; |
335 | 1061 | ||
336 | for (tmp = vmlist; tmp != NULL; tmp = tmp->next) { | 1062 | va = find_vmap_area((unsigned long)addr); |
337 | if (tmp->addr == addr) | 1063 | if (va && va->flags & VM_VM_AREA) |
338 | break; | 1064 | return va->private; |
339 | } | ||
340 | |||
341 | return tmp; | ||
342 | } | ||
343 | |||
344 | /* Caller must hold vmlist_lock */ | ||
345 | static struct vm_struct *__remove_vm_area(const void *addr) | ||
346 | { | ||
347 | struct vm_struct **p, *tmp; | ||
348 | 1065 | ||
349 | for (p = &vmlist ; (tmp = *p) != NULL ;p = &tmp->next) { | ||
350 | if (tmp->addr == addr) | ||
351 | goto found; | ||
352 | } | ||
353 | return NULL; | 1066 | return NULL; |
354 | |||
355 | found: | ||
356 | unmap_vm_area(tmp); | ||
357 | *p = tmp->next; | ||
358 | |||
359 | /* | ||
360 | * Remove the guard page. | ||
361 | */ | ||
362 | tmp->size -= PAGE_SIZE; | ||
363 | return tmp; | ||
364 | } | 1067 | } |
365 | 1068 | ||
366 | /** | 1069 | /** |
@@ -373,11 +1076,24 @@ found: | |||
373 | */ | 1076 | */ |
374 | struct vm_struct *remove_vm_area(const void *addr) | 1077 | struct vm_struct *remove_vm_area(const void *addr) |
375 | { | 1078 | { |
376 | struct vm_struct *v; | 1079 | struct vmap_area *va; |
377 | write_lock(&vmlist_lock); | 1080 | |
378 | v = __remove_vm_area(addr); | 1081 | va = find_vmap_area((unsigned long)addr); |
379 | write_unlock(&vmlist_lock); | 1082 | if (va && va->flags & VM_VM_AREA) { |
380 | return v; | 1083 | struct vm_struct *vm = va->private; |
1084 | struct vm_struct *tmp, **p; | ||
1085 | free_unmap_vmap_area(va); | ||
1086 | vm->size -= PAGE_SIZE; | ||
1087 | |||
1088 | write_lock(&vmlist_lock); | ||
1089 | for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next) | ||
1090 | ; | ||
1091 | *p = tmp->next; | ||
1092 | write_unlock(&vmlist_lock); | ||
1093 | |||
1094 | return vm; | ||
1095 | } | ||
1096 | return NULL; | ||
381 | } | 1097 | } |
382 | 1098 | ||
383 | static void __vunmap(const void *addr, int deallocate_pages) | 1099 | static void __vunmap(const void *addr, int deallocate_pages) |
@@ -487,6 +1203,8 @@ void *vmap(struct page **pages, unsigned int count, | |||
487 | } | 1203 | } |
488 | EXPORT_SYMBOL(vmap); | 1204 | EXPORT_SYMBOL(vmap); |
489 | 1205 | ||
1206 | static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot, | ||
1207 | int node, void *caller); | ||
490 | static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, | 1208 | static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, |
491 | pgprot_t prot, int node, void *caller) | 1209 | pgprot_t prot, int node, void *caller) |
492 | { | 1210 | { |
@@ -613,10 +1331,8 @@ void *vmalloc_user(unsigned long size) | |||
613 | 1331 | ||
614 | ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL); | 1332 | ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL); |
615 | if (ret) { | 1333 | if (ret) { |
616 | write_lock(&vmlist_lock); | 1334 | area = find_vm_area(ret); |
617 | area = __find_vm_area(ret); | ||
618 | area->flags |= VM_USERMAP; | 1335 | area->flags |= VM_USERMAP; |
619 | write_unlock(&vmlist_lock); | ||
620 | } | 1336 | } |
621 | return ret; | 1337 | return ret; |
622 | } | 1338 | } |
@@ -696,10 +1412,8 @@ void *vmalloc_32_user(unsigned long size) | |||
696 | 1412 | ||
697 | ret = __vmalloc(size, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL); | 1413 | ret = __vmalloc(size, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL); |
698 | if (ret) { | 1414 | if (ret) { |
699 | write_lock(&vmlist_lock); | 1415 | area = find_vm_area(ret); |
700 | area = __find_vm_area(ret); | ||
701 | area->flags |= VM_USERMAP; | 1416 | area->flags |= VM_USERMAP; |
702 | write_unlock(&vmlist_lock); | ||
703 | } | 1417 | } |
704 | return ret; | 1418 | return ret; |
705 | } | 1419 | } |
@@ -800,26 +1514,25 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, | |||
800 | struct vm_struct *area; | 1514 | struct vm_struct *area; |
801 | unsigned long uaddr = vma->vm_start; | 1515 | unsigned long uaddr = vma->vm_start; |
802 | unsigned long usize = vma->vm_end - vma->vm_start; | 1516 | unsigned long usize = vma->vm_end - vma->vm_start; |
803 | int ret; | ||
804 | 1517 | ||
805 | if ((PAGE_SIZE-1) & (unsigned long)addr) | 1518 | if ((PAGE_SIZE-1) & (unsigned long)addr) |
806 | return -EINVAL; | 1519 | return -EINVAL; |
807 | 1520 | ||
808 | read_lock(&vmlist_lock); | 1521 | area = find_vm_area(addr); |
809 | area = __find_vm_area(addr); | ||
810 | if (!area) | 1522 | if (!area) |
811 | goto out_einval_locked; | 1523 | return -EINVAL; |
812 | 1524 | ||
813 | if (!(area->flags & VM_USERMAP)) | 1525 | if (!(area->flags & VM_USERMAP)) |
814 | goto out_einval_locked; | 1526 | return -EINVAL; |
815 | 1527 | ||
816 | if (usize + (pgoff << PAGE_SHIFT) > area->size - PAGE_SIZE) | 1528 | if (usize + (pgoff << PAGE_SHIFT) > area->size - PAGE_SIZE) |
817 | goto out_einval_locked; | 1529 | return -EINVAL; |
818 | read_unlock(&vmlist_lock); | ||
819 | 1530 | ||
820 | addr += pgoff << PAGE_SHIFT; | 1531 | addr += pgoff << PAGE_SHIFT; |
821 | do { | 1532 | do { |
822 | struct page *page = vmalloc_to_page(addr); | 1533 | struct page *page = vmalloc_to_page(addr); |
1534 | int ret; | ||
1535 | |||
823 | ret = vm_insert_page(vma, uaddr, page); | 1536 | ret = vm_insert_page(vma, uaddr, page); |
824 | if (ret) | 1537 | if (ret) |
825 | return ret; | 1538 | return ret; |
@@ -832,11 +1545,7 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, | |||
832 | /* Prevent "things" like memory migration? VM_flags need a cleanup... */ | 1545 | /* Prevent "things" like memory migration? VM_flags need a cleanup... */ |
833 | vma->vm_flags |= VM_RESERVED; | 1546 | vma->vm_flags |= VM_RESERVED; |
834 | 1547 | ||
835 | return ret; | 1548 | return 0; |
836 | |||
837 | out_einval_locked: | ||
838 | read_unlock(&vmlist_lock); | ||
839 | return -EINVAL; | ||
840 | } | 1549 | } |
841 | EXPORT_SYMBOL(remap_vmalloc_range); | 1550 | EXPORT_SYMBOL(remap_vmalloc_range); |
842 | 1551 | ||