diff options
Diffstat (limited to 'mm/mmap.c')
-rw-r--r-- | mm/mmap.c | 189 |
1 files changed, 183 insertions, 6 deletions
@@ -26,12 +26,15 @@ | |||
26 | #include <linux/mount.h> | 26 | #include <linux/mount.h> |
27 | #include <linux/mempolicy.h> | 27 | #include <linux/mempolicy.h> |
28 | #include <linux/rmap.h> | 28 | #include <linux/rmap.h> |
29 | #include <linux/mmu_notifier.h> | ||
29 | 30 | ||
30 | #include <asm/uaccess.h> | 31 | #include <asm/uaccess.h> |
31 | #include <asm/cacheflush.h> | 32 | #include <asm/cacheflush.h> |
32 | #include <asm/tlb.h> | 33 | #include <asm/tlb.h> |
33 | #include <asm/mmu_context.h> | 34 | #include <asm/mmu_context.h> |
34 | 35 | ||
36 | #include "internal.h" | ||
37 | |||
35 | #ifndef arch_mmap_check | 38 | #ifndef arch_mmap_check |
36 | #define arch_mmap_check(addr, len, flags) (0) | 39 | #define arch_mmap_check(addr, len, flags) (0) |
37 | #endif | 40 | #endif |
@@ -72,8 +75,9 @@ pgprot_t protection_map[16] = { | |||
72 | 75 | ||
73 | pgprot_t vm_get_page_prot(unsigned long vm_flags) | 76 | pgprot_t vm_get_page_prot(unsigned long vm_flags) |
74 | { | 77 | { |
75 | return protection_map[vm_flags & | 78 | return __pgprot(pgprot_val(protection_map[vm_flags & |
76 | (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]; | 79 | (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) | |
80 | pgprot_val(arch_vm_get_page_prot(vm_flags))); | ||
77 | } | 81 | } |
78 | EXPORT_SYMBOL(vm_get_page_prot); | 82 | EXPORT_SYMBOL(vm_get_page_prot); |
79 | 83 | ||
@@ -366,7 +370,7 @@ find_vma_prepare(struct mm_struct *mm, unsigned long addr, | |||
366 | if (vma_tmp->vm_end > addr) { | 370 | if (vma_tmp->vm_end > addr) { |
367 | vma = vma_tmp; | 371 | vma = vma_tmp; |
368 | if (vma_tmp->vm_start <= addr) | 372 | if (vma_tmp->vm_start <= addr) |
369 | return vma; | 373 | break; |
370 | __rb_link = &__rb_parent->rb_left; | 374 | __rb_link = &__rb_parent->rb_left; |
371 | } else { | 375 | } else { |
372 | rb_prev = __rb_parent; | 376 | rb_prev = __rb_parent; |
@@ -1026,6 +1030,10 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, | |||
1026 | } else { | 1030 | } else { |
1027 | switch (flags & MAP_TYPE) { | 1031 | switch (flags & MAP_TYPE) { |
1028 | case MAP_SHARED: | 1032 | case MAP_SHARED: |
1033 | /* | ||
1034 | * Ignore pgoff. | ||
1035 | */ | ||
1036 | pgoff = 0; | ||
1029 | vm_flags |= VM_SHARED | VM_MAYSHARE; | 1037 | vm_flags |= VM_SHARED | VM_MAYSHARE; |
1030 | break; | 1038 | break; |
1031 | case MAP_PRIVATE: | 1039 | case MAP_PRIVATE: |
@@ -1107,6 +1115,9 @@ munmap_back: | |||
1107 | if (!may_expand_vm(mm, len >> PAGE_SHIFT)) | 1115 | if (!may_expand_vm(mm, len >> PAGE_SHIFT)) |
1108 | return -ENOMEM; | 1116 | return -ENOMEM; |
1109 | 1117 | ||
1118 | if (flags & MAP_NORESERVE) | ||
1119 | vm_flags |= VM_NORESERVE; | ||
1120 | |||
1110 | if (accountable && (!(flags & MAP_NORESERVE) || | 1121 | if (accountable && (!(flags & MAP_NORESERVE) || |
1111 | sysctl_overcommit_memory == OVERCOMMIT_NEVER)) { | 1122 | sysctl_overcommit_memory == OVERCOMMIT_NEVER)) { |
1112 | if (vm_flags & VM_SHARED) { | 1123 | if (vm_flags & VM_SHARED) { |
@@ -1762,7 +1773,7 @@ static void unmap_region(struct mm_struct *mm, | |||
1762 | update_hiwater_rss(mm); | 1773 | update_hiwater_rss(mm); |
1763 | unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL); | 1774 | unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL); |
1764 | vm_unacct_memory(nr_accounted); | 1775 | vm_unacct_memory(nr_accounted); |
1765 | free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS, | 1776 | free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS, |
1766 | next? next->vm_start: 0); | 1777 | next? next->vm_start: 0); |
1767 | tlb_finish_mmu(tlb, start, end); | 1778 | tlb_finish_mmu(tlb, start, end); |
1768 | } | 1779 | } |
@@ -1806,7 +1817,8 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, | |||
1806 | struct mempolicy *pol; | 1817 | struct mempolicy *pol; |
1807 | struct vm_area_struct *new; | 1818 | struct vm_area_struct *new; |
1808 | 1819 | ||
1809 | if (is_vm_hugetlb_page(vma) && (addr & ~HPAGE_MASK)) | 1820 | if (is_vm_hugetlb_page(vma) && (addr & |
1821 | ~(huge_page_mask(hstate_vma(vma))))) | ||
1810 | return -EINVAL; | 1822 | return -EINVAL; |
1811 | 1823 | ||
1812 | if (mm->map_count >= sysctl_max_map_count) | 1824 | if (mm->map_count >= sysctl_max_map_count) |
@@ -2054,6 +2066,7 @@ void exit_mmap(struct mm_struct *mm) | |||
2054 | 2066 | ||
2055 | /* mm's last user has gone, and its about to be pulled down */ | 2067 | /* mm's last user has gone, and its about to be pulled down */ |
2056 | arch_exit_mmap(mm); | 2068 | arch_exit_mmap(mm); |
2069 | mmu_notifier_release(mm); | ||
2057 | 2070 | ||
2058 | lru_add_drain(); | 2071 | lru_add_drain(); |
2059 | flush_cache_mm(mm); | 2072 | flush_cache_mm(mm); |
@@ -2062,7 +2075,7 @@ void exit_mmap(struct mm_struct *mm) | |||
2062 | /* Use -1 here to ensure all VMAs in the mm are unmapped */ | 2075 | /* Use -1 here to ensure all VMAs in the mm are unmapped */ |
2063 | end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL); | 2076 | end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL); |
2064 | vm_unacct_memory(nr_accounted); | 2077 | vm_unacct_memory(nr_accounted); |
2065 | free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0); | 2078 | free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0); |
2066 | tlb_finish_mmu(tlb, 0, end); | 2079 | tlb_finish_mmu(tlb, 0, end); |
2067 | 2080 | ||
2068 | /* | 2081 | /* |
@@ -2261,3 +2274,167 @@ int install_special_mapping(struct mm_struct *mm, | |||
2261 | 2274 | ||
2262 | return 0; | 2275 | return 0; |
2263 | } | 2276 | } |
2277 | |||
2278 | static DEFINE_MUTEX(mm_all_locks_mutex); | ||
2279 | |||
2280 | static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma) | ||
2281 | { | ||
2282 | if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) { | ||
2283 | /* | ||
2284 | * The LSB of head.next can't change from under us | ||
2285 | * because we hold the mm_all_locks_mutex. | ||
2286 | */ | ||
2287 | spin_lock_nest_lock(&anon_vma->lock, &mm->mmap_sem); | ||
2288 | /* | ||
2289 | * We can safely modify head.next after taking the | ||
2290 | * anon_vma->lock. If some other vma in this mm shares | ||
2291 | * the same anon_vma we won't take it again. | ||
2292 | * | ||
2293 | * No need of atomic instructions here, head.next | ||
2294 | * can't change from under us thanks to the | ||
2295 | * anon_vma->lock. | ||
2296 | */ | ||
2297 | if (__test_and_set_bit(0, (unsigned long *) | ||
2298 | &anon_vma->head.next)) | ||
2299 | BUG(); | ||
2300 | } | ||
2301 | } | ||
2302 | |||
2303 | static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping) | ||
2304 | { | ||
2305 | if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) { | ||
2306 | /* | ||
2307 | * AS_MM_ALL_LOCKS can't change from under us because | ||
2308 | * we hold the mm_all_locks_mutex. | ||
2309 | * | ||
2310 | * Operations on ->flags have to be atomic because | ||
2311 | * even if AS_MM_ALL_LOCKS is stable thanks to the | ||
2312 | * mm_all_locks_mutex, there may be other cpus | ||
2313 | * changing other bitflags in parallel to us. | ||
2314 | */ | ||
2315 | if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags)) | ||
2316 | BUG(); | ||
2317 | spin_lock_nest_lock(&mapping->i_mmap_lock, &mm->mmap_sem); | ||
2318 | } | ||
2319 | } | ||
2320 | |||
2321 | /* | ||
2322 | * This operation locks against the VM for all pte/vma/mm related | ||
2323 | * operations that could ever happen on a certain mm. This includes | ||
2324 | * vmtruncate, try_to_unmap, and all page faults. | ||
2325 | * | ||
2326 | * The caller must take the mmap_sem in write mode before calling | ||
2327 | * mm_take_all_locks(). The caller isn't allowed to release the | ||
2328 | * mmap_sem until mm_drop_all_locks() returns. | ||
2329 | * | ||
2330 | * mmap_sem in write mode is required in order to block all operations | ||
2331 | * that could modify pagetables and free pages without need of | ||
2332 | * altering the vma layout (for example populate_range() with | ||
2333 | * nonlinear vmas). It's also needed in write mode to avoid new | ||
2334 | * anon_vmas to be associated with existing vmas. | ||
2335 | * | ||
2336 | * A single task can't take more than one mm_take_all_locks() in a row | ||
2337 | * or it would deadlock. | ||
2338 | * | ||
2339 | * The LSB in anon_vma->head.next and the AS_MM_ALL_LOCKS bitflag in | ||
2340 | * mapping->flags avoid to take the same lock twice, if more than one | ||
2341 | * vma in this mm is backed by the same anon_vma or address_space. | ||
2342 | * | ||
2343 | * We can take all the locks in random order because the VM code | ||
2344 | * taking i_mmap_lock or anon_vma->lock outside the mmap_sem never | ||
2345 | * takes more than one of them in a row. Secondly we're protected | ||
2346 | * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex. | ||
2347 | * | ||
2348 | * mm_take_all_locks() and mm_drop_all_locks are expensive operations | ||
2349 | * that may have to take thousand of locks. | ||
2350 | * | ||
2351 | * mm_take_all_locks() can fail if it's interrupted by signals. | ||
2352 | */ | ||
2353 | int mm_take_all_locks(struct mm_struct *mm) | ||
2354 | { | ||
2355 | struct vm_area_struct *vma; | ||
2356 | int ret = -EINTR; | ||
2357 | |||
2358 | BUG_ON(down_read_trylock(&mm->mmap_sem)); | ||
2359 | |||
2360 | mutex_lock(&mm_all_locks_mutex); | ||
2361 | |||
2362 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | ||
2363 | if (signal_pending(current)) | ||
2364 | goto out_unlock; | ||
2365 | if (vma->vm_file && vma->vm_file->f_mapping) | ||
2366 | vm_lock_mapping(mm, vma->vm_file->f_mapping); | ||
2367 | } | ||
2368 | |||
2369 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | ||
2370 | if (signal_pending(current)) | ||
2371 | goto out_unlock; | ||
2372 | if (vma->anon_vma) | ||
2373 | vm_lock_anon_vma(mm, vma->anon_vma); | ||
2374 | } | ||
2375 | |||
2376 | ret = 0; | ||
2377 | |||
2378 | out_unlock: | ||
2379 | if (ret) | ||
2380 | mm_drop_all_locks(mm); | ||
2381 | |||
2382 | return ret; | ||
2383 | } | ||
2384 | |||
2385 | static void vm_unlock_anon_vma(struct anon_vma *anon_vma) | ||
2386 | { | ||
2387 | if (test_bit(0, (unsigned long *) &anon_vma->head.next)) { | ||
2388 | /* | ||
2389 | * The LSB of head.next can't change to 0 from under | ||
2390 | * us because we hold the mm_all_locks_mutex. | ||
2391 | * | ||
2392 | * We must however clear the bitflag before unlocking | ||
2393 | * the vma so the users using the anon_vma->head will | ||
2394 | * never see our bitflag. | ||
2395 | * | ||
2396 | * No need of atomic instructions here, head.next | ||
2397 | * can't change from under us until we release the | ||
2398 | * anon_vma->lock. | ||
2399 | */ | ||
2400 | if (!__test_and_clear_bit(0, (unsigned long *) | ||
2401 | &anon_vma->head.next)) | ||
2402 | BUG(); | ||
2403 | spin_unlock(&anon_vma->lock); | ||
2404 | } | ||
2405 | } | ||
2406 | |||
2407 | static void vm_unlock_mapping(struct address_space *mapping) | ||
2408 | { | ||
2409 | if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) { | ||
2410 | /* | ||
2411 | * AS_MM_ALL_LOCKS can't change to 0 from under us | ||
2412 | * because we hold the mm_all_locks_mutex. | ||
2413 | */ | ||
2414 | spin_unlock(&mapping->i_mmap_lock); | ||
2415 | if (!test_and_clear_bit(AS_MM_ALL_LOCKS, | ||
2416 | &mapping->flags)) | ||
2417 | BUG(); | ||
2418 | } | ||
2419 | } | ||
2420 | |||
2421 | /* | ||
2422 | * The mmap_sem cannot be released by the caller until | ||
2423 | * mm_drop_all_locks() returns. | ||
2424 | */ | ||
2425 | void mm_drop_all_locks(struct mm_struct *mm) | ||
2426 | { | ||
2427 | struct vm_area_struct *vma; | ||
2428 | |||
2429 | BUG_ON(down_read_trylock(&mm->mmap_sem)); | ||
2430 | BUG_ON(!mutex_is_locked(&mm_all_locks_mutex)); | ||
2431 | |||
2432 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | ||
2433 | if (vma->anon_vma) | ||
2434 | vm_unlock_anon_vma(vma->anon_vma); | ||
2435 | if (vma->vm_file && vma->vm_file->f_mapping) | ||
2436 | vm_unlock_mapping(vma->vm_file->f_mapping); | ||
2437 | } | ||
2438 | |||
2439 | mutex_unlock(&mm_all_locks_mutex); | ||
2440 | } | ||