diff options
Diffstat (limited to 'mm/mmap.c')
-rw-r--r-- | mm/mmap.c | 180 |
1 files changed, 176 insertions, 4 deletions
@@ -26,12 +26,15 @@ | |||
26 | #include <linux/mount.h> | 26 | #include <linux/mount.h> |
27 | #include <linux/mempolicy.h> | 27 | #include <linux/mempolicy.h> |
28 | #include <linux/rmap.h> | 28 | #include <linux/rmap.h> |
29 | #include <linux/mmu_notifier.h> | ||
29 | 30 | ||
30 | #include <asm/uaccess.h> | 31 | #include <asm/uaccess.h> |
31 | #include <asm/cacheflush.h> | 32 | #include <asm/cacheflush.h> |
32 | #include <asm/tlb.h> | 33 | #include <asm/tlb.h> |
33 | #include <asm/mmu_context.h> | 34 | #include <asm/mmu_context.h> |
34 | 35 | ||
36 | #include "internal.h" | ||
37 | |||
35 | #ifndef arch_mmap_check | 38 | #ifndef arch_mmap_check |
36 | #define arch_mmap_check(addr, len, flags) (0) | 39 | #define arch_mmap_check(addr, len, flags) (0) |
37 | #endif | 40 | #endif |
@@ -367,7 +370,7 @@ find_vma_prepare(struct mm_struct *mm, unsigned long addr, | |||
367 | if (vma_tmp->vm_end > addr) { | 370 | if (vma_tmp->vm_end > addr) { |
368 | vma = vma_tmp; | 371 | vma = vma_tmp; |
369 | if (vma_tmp->vm_start <= addr) | 372 | if (vma_tmp->vm_start <= addr) |
370 | return vma; | 373 | break; |
371 | __rb_link = &__rb_parent->rb_left; | 374 | __rb_link = &__rb_parent->rb_left; |
372 | } else { | 375 | } else { |
373 | rb_prev = __rb_parent; | 376 | rb_prev = __rb_parent; |
@@ -1108,6 +1111,9 @@ munmap_back: | |||
1108 | if (!may_expand_vm(mm, len >> PAGE_SHIFT)) | 1111 | if (!may_expand_vm(mm, len >> PAGE_SHIFT)) |
1109 | return -ENOMEM; | 1112 | return -ENOMEM; |
1110 | 1113 | ||
1114 | if (flags & MAP_NORESERVE) | ||
1115 | vm_flags |= VM_NORESERVE; | ||
1116 | |||
1111 | if (accountable && (!(flags & MAP_NORESERVE) || | 1117 | if (accountable && (!(flags & MAP_NORESERVE) || |
1112 | sysctl_overcommit_memory == OVERCOMMIT_NEVER)) { | 1118 | sysctl_overcommit_memory == OVERCOMMIT_NEVER)) { |
1113 | if (vm_flags & VM_SHARED) { | 1119 | if (vm_flags & VM_SHARED) { |
@@ -1763,7 +1769,7 @@ static void unmap_region(struct mm_struct *mm, | |||
1763 | update_hiwater_rss(mm); | 1769 | update_hiwater_rss(mm); |
1764 | unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL); | 1770 | unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL); |
1765 | vm_unacct_memory(nr_accounted); | 1771 | vm_unacct_memory(nr_accounted); |
1766 | free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS, | 1772 | free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS, |
1767 | next? next->vm_start: 0); | 1773 | next? next->vm_start: 0); |
1768 | tlb_finish_mmu(tlb, start, end); | 1774 | tlb_finish_mmu(tlb, start, end); |
1769 | } | 1775 | } |
@@ -1807,7 +1813,8 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, | |||
1807 | struct mempolicy *pol; | 1813 | struct mempolicy *pol; |
1808 | struct vm_area_struct *new; | 1814 | struct vm_area_struct *new; |
1809 | 1815 | ||
1810 | if (is_vm_hugetlb_page(vma) && (addr & ~HPAGE_MASK)) | 1816 | if (is_vm_hugetlb_page(vma) && (addr & |
1817 | ~(huge_page_mask(hstate_vma(vma))))) | ||
1811 | return -EINVAL; | 1818 | return -EINVAL; |
1812 | 1819 | ||
1813 | if (mm->map_count >= sysctl_max_map_count) | 1820 | if (mm->map_count >= sysctl_max_map_count) |
@@ -2055,6 +2062,7 @@ void exit_mmap(struct mm_struct *mm) | |||
2055 | 2062 | ||
2056 | /* mm's last user has gone, and its about to be pulled down */ | 2063 | /* mm's last user has gone, and its about to be pulled down */ |
2057 | arch_exit_mmap(mm); | 2064 | arch_exit_mmap(mm); |
2065 | mmu_notifier_release(mm); | ||
2058 | 2066 | ||
2059 | lru_add_drain(); | 2067 | lru_add_drain(); |
2060 | flush_cache_mm(mm); | 2068 | flush_cache_mm(mm); |
@@ -2063,7 +2071,7 @@ void exit_mmap(struct mm_struct *mm) | |||
2063 | /* Use -1 here to ensure all VMAs in the mm are unmapped */ | 2071 | /* Use -1 here to ensure all VMAs in the mm are unmapped */ |
2064 | end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL); | 2072 | end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL); |
2065 | vm_unacct_memory(nr_accounted); | 2073 | vm_unacct_memory(nr_accounted); |
2066 | free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0); | 2074 | free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0); |
2067 | tlb_finish_mmu(tlb, 0, end); | 2075 | tlb_finish_mmu(tlb, 0, end); |
2068 | 2076 | ||
2069 | /* | 2077 | /* |
@@ -2262,3 +2270,167 @@ int install_special_mapping(struct mm_struct *mm, | |||
2262 | 2270 | ||
2263 | return 0; | 2271 | return 0; |
2264 | } | 2272 | } |
2273 | |||
2274 | static DEFINE_MUTEX(mm_all_locks_mutex); | ||
2275 | |||
2276 | static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma) | ||
2277 | { | ||
2278 | if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) { | ||
2279 | /* | ||
2280 | * The LSB of head.next can't change from under us | ||
2281 | * because we hold the mm_all_locks_mutex. | ||
2282 | */ | ||
2283 | spin_lock_nest_lock(&anon_vma->lock, &mm->mmap_sem); | ||
2284 | /* | ||
2285 | * We can safely modify head.next after taking the | ||
2286 | * anon_vma->lock. If some other vma in this mm shares | ||
2287 | * the same anon_vma we won't take it again. | ||
2288 | * | ||
2289 | * No need of atomic instructions here, head.next | ||
2290 | * can't change from under us thanks to the | ||
2291 | * anon_vma->lock. | ||
2292 | */ | ||
2293 | if (__test_and_set_bit(0, (unsigned long *) | ||
2294 | &anon_vma->head.next)) | ||
2295 | BUG(); | ||
2296 | } | ||
2297 | } | ||
2298 | |||
2299 | static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping) | ||
2300 | { | ||
2301 | if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) { | ||
2302 | /* | ||
2303 | * AS_MM_ALL_LOCKS can't change from under us because | ||
2304 | * we hold the mm_all_locks_mutex. | ||
2305 | * | ||
2306 | * Operations on ->flags have to be atomic because | ||
2307 | * even if AS_MM_ALL_LOCKS is stable thanks to the | ||
2308 | * mm_all_locks_mutex, there may be other cpus | ||
2309 | * changing other bitflags in parallel to us. | ||
2310 | */ | ||
2311 | if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags)) | ||
2312 | BUG(); | ||
2313 | spin_lock_nest_lock(&mapping->i_mmap_lock, &mm->mmap_sem); | ||
2314 | } | ||
2315 | } | ||
2316 | |||
2317 | /* | ||
2318 | * This operation locks against the VM for all pte/vma/mm related | ||
2319 | * operations that could ever happen on a certain mm. This includes | ||
2320 | * vmtruncate, try_to_unmap, and all page faults. | ||
2321 | * | ||
2322 | * The caller must take the mmap_sem in write mode before calling | ||
2323 | * mm_take_all_locks(). The caller isn't allowed to release the | ||
2324 | * mmap_sem until mm_drop_all_locks() returns. | ||
2325 | * | ||
2326 | * mmap_sem in write mode is required in order to block all operations | ||
2327 | * that could modify pagetables and free pages without need of | ||
2328 | * altering the vma layout (for example populate_range() with | ||
2329 | * nonlinear vmas). It's also needed in write mode to avoid new | ||
2330 | * anon_vmas to be associated with existing vmas. | ||
2331 | * | ||
2332 | * A single task can't take more than one mm_take_all_locks() in a row | ||
2333 | * or it would deadlock. | ||
2334 | * | ||
2335 | * The LSB in anon_vma->head.next and the AS_MM_ALL_LOCKS bitflag in | ||
2336 | * mapping->flags avoid to take the same lock twice, if more than one | ||
2337 | * vma in this mm is backed by the same anon_vma or address_space. | ||
2338 | * | ||
2339 | * We can take all the locks in random order because the VM code | ||
2340 | * taking i_mmap_lock or anon_vma->lock outside the mmap_sem never | ||
2341 | * takes more than one of them in a row. Secondly we're protected | ||
2342 | * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex. | ||
2343 | * | ||
2344 | * mm_take_all_locks() and mm_drop_all_locks are expensive operations | ||
2345 | * that may have to take thousand of locks. | ||
2346 | * | ||
2347 | * mm_take_all_locks() can fail if it's interrupted by signals. | ||
2348 | */ | ||
2349 | int mm_take_all_locks(struct mm_struct *mm) | ||
2350 | { | ||
2351 | struct vm_area_struct *vma; | ||
2352 | int ret = -EINTR; | ||
2353 | |||
2354 | BUG_ON(down_read_trylock(&mm->mmap_sem)); | ||
2355 | |||
2356 | mutex_lock(&mm_all_locks_mutex); | ||
2357 | |||
2358 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | ||
2359 | if (signal_pending(current)) | ||
2360 | goto out_unlock; | ||
2361 | if (vma->vm_file && vma->vm_file->f_mapping) | ||
2362 | vm_lock_mapping(mm, vma->vm_file->f_mapping); | ||
2363 | } | ||
2364 | |||
2365 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | ||
2366 | if (signal_pending(current)) | ||
2367 | goto out_unlock; | ||
2368 | if (vma->anon_vma) | ||
2369 | vm_lock_anon_vma(mm, vma->anon_vma); | ||
2370 | } | ||
2371 | |||
2372 | ret = 0; | ||
2373 | |||
2374 | out_unlock: | ||
2375 | if (ret) | ||
2376 | mm_drop_all_locks(mm); | ||
2377 | |||
2378 | return ret; | ||
2379 | } | ||
2380 | |||
2381 | static void vm_unlock_anon_vma(struct anon_vma *anon_vma) | ||
2382 | { | ||
2383 | if (test_bit(0, (unsigned long *) &anon_vma->head.next)) { | ||
2384 | /* | ||
2385 | * The LSB of head.next can't change to 0 from under | ||
2386 | * us because we hold the mm_all_locks_mutex. | ||
2387 | * | ||
2388 | * We must however clear the bitflag before unlocking | ||
2389 | * the vma so the users using the anon_vma->head will | ||
2390 | * never see our bitflag. | ||
2391 | * | ||
2392 | * No need of atomic instructions here, head.next | ||
2393 | * can't change from under us until we release the | ||
2394 | * anon_vma->lock. | ||
2395 | */ | ||
2396 | if (!__test_and_clear_bit(0, (unsigned long *) | ||
2397 | &anon_vma->head.next)) | ||
2398 | BUG(); | ||
2399 | spin_unlock(&anon_vma->lock); | ||
2400 | } | ||
2401 | } | ||
2402 | |||
2403 | static void vm_unlock_mapping(struct address_space *mapping) | ||
2404 | { | ||
2405 | if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) { | ||
2406 | /* | ||
2407 | * AS_MM_ALL_LOCKS can't change to 0 from under us | ||
2408 | * because we hold the mm_all_locks_mutex. | ||
2409 | */ | ||
2410 | spin_unlock(&mapping->i_mmap_lock); | ||
2411 | if (!test_and_clear_bit(AS_MM_ALL_LOCKS, | ||
2412 | &mapping->flags)) | ||
2413 | BUG(); | ||
2414 | } | ||
2415 | } | ||
2416 | |||
2417 | /* | ||
2418 | * The mmap_sem cannot be released by the caller until | ||
2419 | * mm_drop_all_locks() returns. | ||
2420 | */ | ||
2421 | void mm_drop_all_locks(struct mm_struct *mm) | ||
2422 | { | ||
2423 | struct vm_area_struct *vma; | ||
2424 | |||
2425 | BUG_ON(down_read_trylock(&mm->mmap_sem)); | ||
2426 | BUG_ON(!mutex_is_locked(&mm_all_locks_mutex)); | ||
2427 | |||
2428 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | ||
2429 | if (vma->anon_vma) | ||
2430 | vm_unlock_anon_vma(vma->anon_vma); | ||
2431 | if (vma->vm_file && vma->vm_file->f_mapping) | ||
2432 | vm_unlock_mapping(vma->vm_file->f_mapping); | ||
2433 | } | ||
2434 | |||
2435 | mutex_unlock(&mm_all_locks_mutex); | ||
2436 | } | ||