diff options
Diffstat (limited to 'mm/mremap.c')
-rw-r--r-- | mm/mremap.c | 426 |
1 files changed, 426 insertions, 0 deletions
diff --git a/mm/mremap.c b/mm/mremap.c new file mode 100644 index 000000000000..0d1c1b9c7a0a --- /dev/null +++ b/mm/mremap.c | |||
@@ -0,0 +1,426 @@ | |||
1 | /* | ||
2 | * mm/mremap.c | ||
3 | * | ||
4 | * (C) Copyright 1996 Linus Torvalds | ||
5 | * | ||
6 | * Address space accounting code <alan@redhat.com> | ||
7 | * (C) Copyright 2002 Red Hat Inc, All Rights Reserved | ||
8 | */ | ||
9 | |||
10 | #include <linux/mm.h> | ||
11 | #include <linux/hugetlb.h> | ||
12 | #include <linux/slab.h> | ||
13 | #include <linux/shm.h> | ||
14 | #include <linux/mman.h> | ||
15 | #include <linux/swap.h> | ||
16 | #include <linux/fs.h> | ||
17 | #include <linux/highmem.h> | ||
18 | #include <linux/security.h> | ||
19 | #include <linux/syscalls.h> | ||
20 | |||
21 | #include <asm/uaccess.h> | ||
22 | #include <asm/cacheflush.h> | ||
23 | #include <asm/tlbflush.h> | ||
24 | |||
25 | static pte_t *get_one_pte_map_nested(struct mm_struct *mm, unsigned long addr) | ||
26 | { | ||
27 | pgd_t *pgd; | ||
28 | pud_t *pud; | ||
29 | pmd_t *pmd; | ||
30 | pte_t *pte = NULL; | ||
31 | |||
32 | pgd = pgd_offset(mm, addr); | ||
33 | if (pgd_none_or_clear_bad(pgd)) | ||
34 | goto end; | ||
35 | |||
36 | pud = pud_offset(pgd, addr); | ||
37 | if (pud_none_or_clear_bad(pud)) | ||
38 | goto end; | ||
39 | |||
40 | pmd = pmd_offset(pud, addr); | ||
41 | if (pmd_none_or_clear_bad(pmd)) | ||
42 | goto end; | ||
43 | |||
44 | pte = pte_offset_map_nested(pmd, addr); | ||
45 | if (pte_none(*pte)) { | ||
46 | pte_unmap_nested(pte); | ||
47 | pte = NULL; | ||
48 | } | ||
49 | end: | ||
50 | return pte; | ||
51 | } | ||
52 | |||
53 | static pte_t *get_one_pte_map(struct mm_struct *mm, unsigned long addr) | ||
54 | { | ||
55 | pgd_t *pgd; | ||
56 | pud_t *pud; | ||
57 | pmd_t *pmd; | ||
58 | |||
59 | pgd = pgd_offset(mm, addr); | ||
60 | if (pgd_none_or_clear_bad(pgd)) | ||
61 | return NULL; | ||
62 | |||
63 | pud = pud_offset(pgd, addr); | ||
64 | if (pud_none_or_clear_bad(pud)) | ||
65 | return NULL; | ||
66 | |||
67 | pmd = pmd_offset(pud, addr); | ||
68 | if (pmd_none_or_clear_bad(pmd)) | ||
69 | return NULL; | ||
70 | |||
71 | return pte_offset_map(pmd, addr); | ||
72 | } | ||
73 | |||
74 | static inline pte_t *alloc_one_pte_map(struct mm_struct *mm, unsigned long addr) | ||
75 | { | ||
76 | pgd_t *pgd; | ||
77 | pud_t *pud; | ||
78 | pmd_t *pmd; | ||
79 | pte_t *pte = NULL; | ||
80 | |||
81 | pgd = pgd_offset(mm, addr); | ||
82 | |||
83 | pud = pud_alloc(mm, pgd, addr); | ||
84 | if (!pud) | ||
85 | return NULL; | ||
86 | pmd = pmd_alloc(mm, pud, addr); | ||
87 | if (pmd) | ||
88 | pte = pte_alloc_map(mm, pmd, addr); | ||
89 | return pte; | ||
90 | } | ||
91 | |||
92 | static int | ||
93 | move_one_page(struct vm_area_struct *vma, unsigned long old_addr, | ||
94 | struct vm_area_struct *new_vma, unsigned long new_addr) | ||
95 | { | ||
96 | struct address_space *mapping = NULL; | ||
97 | struct mm_struct *mm = vma->vm_mm; | ||
98 | int error = 0; | ||
99 | pte_t *src, *dst; | ||
100 | |||
101 | if (vma->vm_file) { | ||
102 | /* | ||
103 | * Subtle point from Rajesh Venkatasubramanian: before | ||
104 | * moving file-based ptes, we must lock vmtruncate out, | ||
105 | * since it might clean the dst vma before the src vma, | ||
106 | * and we propagate stale pages into the dst afterward. | ||
107 | */ | ||
108 | mapping = vma->vm_file->f_mapping; | ||
109 | spin_lock(&mapping->i_mmap_lock); | ||
110 | if (new_vma->vm_truncate_count && | ||
111 | new_vma->vm_truncate_count != vma->vm_truncate_count) | ||
112 | new_vma->vm_truncate_count = 0; | ||
113 | } | ||
114 | spin_lock(&mm->page_table_lock); | ||
115 | |||
116 | src = get_one_pte_map_nested(mm, old_addr); | ||
117 | if (src) { | ||
118 | /* | ||
119 | * Look to see whether alloc_one_pte_map needs to perform a | ||
120 | * memory allocation. If it does then we need to drop the | ||
121 | * atomic kmap | ||
122 | */ | ||
123 | dst = get_one_pte_map(mm, new_addr); | ||
124 | if (unlikely(!dst)) { | ||
125 | pte_unmap_nested(src); | ||
126 | if (mapping) | ||
127 | spin_unlock(&mapping->i_mmap_lock); | ||
128 | dst = alloc_one_pte_map(mm, new_addr); | ||
129 | if (mapping && !spin_trylock(&mapping->i_mmap_lock)) { | ||
130 | spin_unlock(&mm->page_table_lock); | ||
131 | spin_lock(&mapping->i_mmap_lock); | ||
132 | spin_lock(&mm->page_table_lock); | ||
133 | } | ||
134 | src = get_one_pte_map_nested(mm, old_addr); | ||
135 | } | ||
136 | /* | ||
137 | * Since alloc_one_pte_map can drop and re-acquire | ||
138 | * page_table_lock, we should re-check the src entry... | ||
139 | */ | ||
140 | if (src) { | ||
141 | if (dst) { | ||
142 | pte_t pte; | ||
143 | pte = ptep_clear_flush(vma, old_addr, src); | ||
144 | set_pte_at(mm, new_addr, dst, pte); | ||
145 | } else | ||
146 | error = -ENOMEM; | ||
147 | pte_unmap_nested(src); | ||
148 | } | ||
149 | if (dst) | ||
150 | pte_unmap(dst); | ||
151 | } | ||
152 | spin_unlock(&mm->page_table_lock); | ||
153 | if (mapping) | ||
154 | spin_unlock(&mapping->i_mmap_lock); | ||
155 | return error; | ||
156 | } | ||
157 | |||
158 | static unsigned long move_page_tables(struct vm_area_struct *vma, | ||
159 | unsigned long old_addr, struct vm_area_struct *new_vma, | ||
160 | unsigned long new_addr, unsigned long len) | ||
161 | { | ||
162 | unsigned long offset; | ||
163 | |||
164 | flush_cache_range(vma, old_addr, old_addr + len); | ||
165 | |||
166 | /* | ||
167 | * This is not the clever way to do this, but we're taking the | ||
168 | * easy way out on the assumption that most remappings will be | ||
169 | * only a few pages.. This also makes error recovery easier. | ||
170 | */ | ||
171 | for (offset = 0; offset < len; offset += PAGE_SIZE) { | ||
172 | if (move_one_page(vma, old_addr + offset, | ||
173 | new_vma, new_addr + offset) < 0) | ||
174 | break; | ||
175 | cond_resched(); | ||
176 | } | ||
177 | return offset; | ||
178 | } | ||
179 | |||
180 | static unsigned long move_vma(struct vm_area_struct *vma, | ||
181 | unsigned long old_addr, unsigned long old_len, | ||
182 | unsigned long new_len, unsigned long new_addr) | ||
183 | { | ||
184 | struct mm_struct *mm = vma->vm_mm; | ||
185 | struct vm_area_struct *new_vma; | ||
186 | unsigned long vm_flags = vma->vm_flags; | ||
187 | unsigned long new_pgoff; | ||
188 | unsigned long moved_len; | ||
189 | unsigned long excess = 0; | ||
190 | int split = 0; | ||
191 | |||
192 | /* | ||
193 | * We'd prefer to avoid failure later on in do_munmap: | ||
194 | * which may split one vma into three before unmapping. | ||
195 | */ | ||
196 | if (mm->map_count >= sysctl_max_map_count - 3) | ||
197 | return -ENOMEM; | ||
198 | |||
199 | new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT); | ||
200 | new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff); | ||
201 | if (!new_vma) | ||
202 | return -ENOMEM; | ||
203 | |||
204 | moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len); | ||
205 | if (moved_len < old_len) { | ||
206 | /* | ||
207 | * On error, move entries back from new area to old, | ||
208 | * which will succeed since page tables still there, | ||
209 | * and then proceed to unmap new area instead of old. | ||
210 | */ | ||
211 | move_page_tables(new_vma, new_addr, vma, old_addr, moved_len); | ||
212 | vma = new_vma; | ||
213 | old_len = new_len; | ||
214 | old_addr = new_addr; | ||
215 | new_addr = -ENOMEM; | ||
216 | } | ||
217 | |||
218 | /* Conceal VM_ACCOUNT so old reservation is not undone */ | ||
219 | if (vm_flags & VM_ACCOUNT) { | ||
220 | vma->vm_flags &= ~VM_ACCOUNT; | ||
221 | excess = vma->vm_end - vma->vm_start - old_len; | ||
222 | if (old_addr > vma->vm_start && | ||
223 | old_addr + old_len < vma->vm_end) | ||
224 | split = 1; | ||
225 | } | ||
226 | |||
227 | if (do_munmap(mm, old_addr, old_len) < 0) { | ||
228 | /* OOM: unable to split vma, just get accounts right */ | ||
229 | vm_unacct_memory(excess >> PAGE_SHIFT); | ||
230 | excess = 0; | ||
231 | } | ||
232 | |||
233 | /* Restore VM_ACCOUNT if one or two pieces of vma left */ | ||
234 | if (excess) { | ||
235 | vma->vm_flags |= VM_ACCOUNT; | ||
236 | if (split) | ||
237 | vma->vm_next->vm_flags |= VM_ACCOUNT; | ||
238 | } | ||
239 | |||
240 | mm->total_vm += new_len >> PAGE_SHIFT; | ||
241 | __vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT); | ||
242 | if (vm_flags & VM_LOCKED) { | ||
243 | mm->locked_vm += new_len >> PAGE_SHIFT; | ||
244 | if (new_len > old_len) | ||
245 | make_pages_present(new_addr + old_len, | ||
246 | new_addr + new_len); | ||
247 | } | ||
248 | |||
249 | return new_addr; | ||
250 | } | ||
251 | |||
252 | /* | ||
253 | * Expand (or shrink) an existing mapping, potentially moving it at the | ||
254 | * same time (controlled by the MREMAP_MAYMOVE flag and available VM space) | ||
255 | * | ||
256 | * MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise | ||
257 | * This option implies MREMAP_MAYMOVE. | ||
258 | */ | ||
259 | unsigned long do_mremap(unsigned long addr, | ||
260 | unsigned long old_len, unsigned long new_len, | ||
261 | unsigned long flags, unsigned long new_addr) | ||
262 | { | ||
263 | struct vm_area_struct *vma; | ||
264 | unsigned long ret = -EINVAL; | ||
265 | unsigned long charged = 0; | ||
266 | |||
267 | if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) | ||
268 | goto out; | ||
269 | |||
270 | if (addr & ~PAGE_MASK) | ||
271 | goto out; | ||
272 | |||
273 | old_len = PAGE_ALIGN(old_len); | ||
274 | new_len = PAGE_ALIGN(new_len); | ||
275 | |||
276 | /* | ||
277 | * We allow a zero old-len as a special case | ||
278 | * for DOS-emu "duplicate shm area" thing. But | ||
279 | * a zero new-len is nonsensical. | ||
280 | */ | ||
281 | if (!new_len) | ||
282 | goto out; | ||
283 | |||
284 | /* new_addr is only valid if MREMAP_FIXED is specified */ | ||
285 | if (flags & MREMAP_FIXED) { | ||
286 | if (new_addr & ~PAGE_MASK) | ||
287 | goto out; | ||
288 | if (!(flags & MREMAP_MAYMOVE)) | ||
289 | goto out; | ||
290 | |||
291 | if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len) | ||
292 | goto out; | ||
293 | |||
294 | /* Check if the location we're moving into overlaps the | ||
295 | * old location at all, and fail if it does. | ||
296 | */ | ||
297 | if ((new_addr <= addr) && (new_addr+new_len) > addr) | ||
298 | goto out; | ||
299 | |||
300 | if ((addr <= new_addr) && (addr+old_len) > new_addr) | ||
301 | goto out; | ||
302 | |||
303 | ret = do_munmap(current->mm, new_addr, new_len); | ||
304 | if (ret) | ||
305 | goto out; | ||
306 | } | ||
307 | |||
308 | /* | ||
309 | * Always allow a shrinking remap: that just unmaps | ||
310 | * the unnecessary pages.. | ||
311 | * do_munmap does all the needed commit accounting | ||
312 | */ | ||
313 | if (old_len >= new_len) { | ||
314 | ret = do_munmap(current->mm, addr+new_len, old_len - new_len); | ||
315 | if (ret && old_len != new_len) | ||
316 | goto out; | ||
317 | ret = addr; | ||
318 | if (!(flags & MREMAP_FIXED) || (new_addr == addr)) | ||
319 | goto out; | ||
320 | old_len = new_len; | ||
321 | } | ||
322 | |||
323 | /* | ||
324 | * Ok, we need to grow.. or relocate. | ||
325 | */ | ||
326 | ret = -EFAULT; | ||
327 | vma = find_vma(current->mm, addr); | ||
328 | if (!vma || vma->vm_start > addr) | ||
329 | goto out; | ||
330 | if (is_vm_hugetlb_page(vma)) { | ||
331 | ret = -EINVAL; | ||
332 | goto out; | ||
333 | } | ||
334 | /* We can't remap across vm area boundaries */ | ||
335 | if (old_len > vma->vm_end - addr) | ||
336 | goto out; | ||
337 | if (vma->vm_flags & VM_DONTEXPAND) { | ||
338 | if (new_len > old_len) | ||
339 | goto out; | ||
340 | } | ||
341 | if (vma->vm_flags & VM_LOCKED) { | ||
342 | unsigned long locked, lock_limit; | ||
343 | locked = current->mm->locked_vm << PAGE_SHIFT; | ||
344 | lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; | ||
345 | locked += new_len - old_len; | ||
346 | ret = -EAGAIN; | ||
347 | if (locked > lock_limit && !capable(CAP_IPC_LOCK)) | ||
348 | goto out; | ||
349 | } | ||
350 | ret = -ENOMEM; | ||
351 | if ((current->mm->total_vm << PAGE_SHIFT) + (new_len - old_len) | ||
352 | > current->signal->rlim[RLIMIT_AS].rlim_cur) | ||
353 | goto out; | ||
354 | |||
355 | if (vma->vm_flags & VM_ACCOUNT) { | ||
356 | charged = (new_len - old_len) >> PAGE_SHIFT; | ||
357 | if (security_vm_enough_memory(charged)) | ||
358 | goto out_nc; | ||
359 | } | ||
360 | |||
361 | /* old_len exactly to the end of the area.. | ||
362 | * And we're not relocating the area. | ||
363 | */ | ||
364 | if (old_len == vma->vm_end - addr && | ||
365 | !((flags & MREMAP_FIXED) && (addr != new_addr)) && | ||
366 | (old_len != new_len || !(flags & MREMAP_MAYMOVE))) { | ||
367 | unsigned long max_addr = TASK_SIZE; | ||
368 | if (vma->vm_next) | ||
369 | max_addr = vma->vm_next->vm_start; | ||
370 | /* can we just expand the current mapping? */ | ||
371 | if (max_addr - addr >= new_len) { | ||
372 | int pages = (new_len - old_len) >> PAGE_SHIFT; | ||
373 | |||
374 | vma_adjust(vma, vma->vm_start, | ||
375 | addr + new_len, vma->vm_pgoff, NULL); | ||
376 | |||
377 | current->mm->total_vm += pages; | ||
378 | __vm_stat_account(vma->vm_mm, vma->vm_flags, | ||
379 | vma->vm_file, pages); | ||
380 | if (vma->vm_flags & VM_LOCKED) { | ||
381 | current->mm->locked_vm += pages; | ||
382 | make_pages_present(addr + old_len, | ||
383 | addr + new_len); | ||
384 | } | ||
385 | ret = addr; | ||
386 | goto out; | ||
387 | } | ||
388 | } | ||
389 | |||
390 | /* | ||
391 | * We weren't able to just expand or shrink the area, | ||
392 | * we need to create a new one and move it.. | ||
393 | */ | ||
394 | ret = -ENOMEM; | ||
395 | if (flags & MREMAP_MAYMOVE) { | ||
396 | if (!(flags & MREMAP_FIXED)) { | ||
397 | unsigned long map_flags = 0; | ||
398 | if (vma->vm_flags & VM_MAYSHARE) | ||
399 | map_flags |= MAP_SHARED; | ||
400 | |||
401 | new_addr = get_unmapped_area(vma->vm_file, 0, new_len, | ||
402 | vma->vm_pgoff, map_flags); | ||
403 | ret = new_addr; | ||
404 | if (new_addr & ~PAGE_MASK) | ||
405 | goto out; | ||
406 | } | ||
407 | ret = move_vma(vma, addr, old_len, new_len, new_addr); | ||
408 | } | ||
409 | out: | ||
410 | if (ret & ~PAGE_MASK) | ||
411 | vm_unacct_memory(charged); | ||
412 | out_nc: | ||
413 | return ret; | ||
414 | } | ||
415 | |||
416 | asmlinkage unsigned long sys_mremap(unsigned long addr, | ||
417 | unsigned long old_len, unsigned long new_len, | ||
418 | unsigned long flags, unsigned long new_addr) | ||
419 | { | ||
420 | unsigned long ret; | ||
421 | |||
422 | down_write(¤t->mm->mmap_sem); | ||
423 | ret = do_mremap(addr, old_len, new_len, flags, new_addr); | ||
424 | up_write(¤t->mm->mmap_sem); | ||
425 | return ret; | ||
426 | } | ||