diff options
| author | Ingo Molnar <mingo@elte.hu> | 2008-08-14 06:19:59 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2008-08-14 06:19:59 -0400 |
| commit | 8d7ccaa545490cdffdfaff0842436a8dd85cf47b (patch) | |
| tree | 8129b5907161bc6ae26deb3645ce1e280c5e1f51 /mm/mmap.c | |
| parent | b2139aa0eec330c711c5a279db361e5ef1178e78 (diff) | |
| parent | 30a2f3c60a84092c8084dfe788b710f8d0768cd4 (diff) | |
Merge commit 'v2.6.27-rc3' into x86/prototypes
Conflicts:
include/asm-x86/dma-mapping.h
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'mm/mmap.c')
| -rw-r--r-- | mm/mmap.c | 180 |
1 files changed, 176 insertions, 4 deletions
| @@ -26,12 +26,15 @@ | |||
| 26 | #include <linux/mount.h> | 26 | #include <linux/mount.h> |
| 27 | #include <linux/mempolicy.h> | 27 | #include <linux/mempolicy.h> |
| 28 | #include <linux/rmap.h> | 28 | #include <linux/rmap.h> |
| 29 | #include <linux/mmu_notifier.h> | ||
| 29 | 30 | ||
| 30 | #include <asm/uaccess.h> | 31 | #include <asm/uaccess.h> |
| 31 | #include <asm/cacheflush.h> | 32 | #include <asm/cacheflush.h> |
| 32 | #include <asm/tlb.h> | 33 | #include <asm/tlb.h> |
| 33 | #include <asm/mmu_context.h> | 34 | #include <asm/mmu_context.h> |
| 34 | 35 | ||
| 36 | #include "internal.h" | ||
| 37 | |||
| 35 | #ifndef arch_mmap_check | 38 | #ifndef arch_mmap_check |
| 36 | #define arch_mmap_check(addr, len, flags) (0) | 39 | #define arch_mmap_check(addr, len, flags) (0) |
| 37 | #endif | 40 | #endif |
| @@ -367,7 +370,7 @@ find_vma_prepare(struct mm_struct *mm, unsigned long addr, | |||
| 367 | if (vma_tmp->vm_end > addr) { | 370 | if (vma_tmp->vm_end > addr) { |
| 368 | vma = vma_tmp; | 371 | vma = vma_tmp; |
| 369 | if (vma_tmp->vm_start <= addr) | 372 | if (vma_tmp->vm_start <= addr) |
| 370 | return vma; | 373 | break; |
| 371 | __rb_link = &__rb_parent->rb_left; | 374 | __rb_link = &__rb_parent->rb_left; |
| 372 | } else { | 375 | } else { |
| 373 | rb_prev = __rb_parent; | 376 | rb_prev = __rb_parent; |
| @@ -1108,6 +1111,9 @@ munmap_back: | |||
| 1108 | if (!may_expand_vm(mm, len >> PAGE_SHIFT)) | 1111 | if (!may_expand_vm(mm, len >> PAGE_SHIFT)) |
| 1109 | return -ENOMEM; | 1112 | return -ENOMEM; |
| 1110 | 1113 | ||
| 1114 | if (flags & MAP_NORESERVE) | ||
| 1115 | vm_flags |= VM_NORESERVE; | ||
| 1116 | |||
| 1111 | if (accountable && (!(flags & MAP_NORESERVE) || | 1117 | if (accountable && (!(flags & MAP_NORESERVE) || |
| 1112 | sysctl_overcommit_memory == OVERCOMMIT_NEVER)) { | 1118 | sysctl_overcommit_memory == OVERCOMMIT_NEVER)) { |
| 1113 | if (vm_flags & VM_SHARED) { | 1119 | if (vm_flags & VM_SHARED) { |
| @@ -1763,7 +1769,7 @@ static void unmap_region(struct mm_struct *mm, | |||
| 1763 | update_hiwater_rss(mm); | 1769 | update_hiwater_rss(mm); |
| 1764 | unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL); | 1770 | unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL); |
| 1765 | vm_unacct_memory(nr_accounted); | 1771 | vm_unacct_memory(nr_accounted); |
| 1766 | free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS, | 1772 | free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS, |
| 1767 | next? next->vm_start: 0); | 1773 | next? next->vm_start: 0); |
| 1768 | tlb_finish_mmu(tlb, start, end); | 1774 | tlb_finish_mmu(tlb, start, end); |
| 1769 | } | 1775 | } |
| @@ -1807,7 +1813,8 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, | |||
| 1807 | struct mempolicy *pol; | 1813 | struct mempolicy *pol; |
| 1808 | struct vm_area_struct *new; | 1814 | struct vm_area_struct *new; |
| 1809 | 1815 | ||
| 1810 | if (is_vm_hugetlb_page(vma) && (addr & ~HPAGE_MASK)) | 1816 | if (is_vm_hugetlb_page(vma) && (addr & |
| 1817 | ~(huge_page_mask(hstate_vma(vma))))) | ||
| 1811 | return -EINVAL; | 1818 | return -EINVAL; |
| 1812 | 1819 | ||
| 1813 | if (mm->map_count >= sysctl_max_map_count) | 1820 | if (mm->map_count >= sysctl_max_map_count) |
| @@ -2055,6 +2062,7 @@ void exit_mmap(struct mm_struct *mm) | |||
| 2055 | 2062 | ||
| 2056 | /* mm's last user has gone, and its about to be pulled down */ | 2063 | /* mm's last user has gone, and its about to be pulled down */ |
| 2057 | arch_exit_mmap(mm); | 2064 | arch_exit_mmap(mm); |
| 2065 | mmu_notifier_release(mm); | ||
| 2058 | 2066 | ||
| 2059 | lru_add_drain(); | 2067 | lru_add_drain(); |
| 2060 | flush_cache_mm(mm); | 2068 | flush_cache_mm(mm); |
| @@ -2063,7 +2071,7 @@ void exit_mmap(struct mm_struct *mm) | |||
| 2063 | /* Use -1 here to ensure all VMAs in the mm are unmapped */ | 2071 | /* Use -1 here to ensure all VMAs in the mm are unmapped */ |
| 2064 | end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL); | 2072 | end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL); |
| 2065 | vm_unacct_memory(nr_accounted); | 2073 | vm_unacct_memory(nr_accounted); |
| 2066 | free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0); | 2074 | free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0); |
| 2067 | tlb_finish_mmu(tlb, 0, end); | 2075 | tlb_finish_mmu(tlb, 0, end); |
| 2068 | 2076 | ||
| 2069 | /* | 2077 | /* |
| @@ -2262,3 +2270,167 @@ int install_special_mapping(struct mm_struct *mm, | |||
| 2262 | 2270 | ||
| 2263 | return 0; | 2271 | return 0; |
| 2264 | } | 2272 | } |
| 2273 | |||
| 2274 | static DEFINE_MUTEX(mm_all_locks_mutex); | ||
| 2275 | |||
| 2276 | static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma) | ||
| 2277 | { | ||
| 2278 | if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) { | ||
| 2279 | /* | ||
| 2280 | * The LSB of head.next can't change from under us | ||
| 2281 | * because we hold the mm_all_locks_mutex. | ||
| 2282 | */ | ||
| 2283 | spin_lock_nest_lock(&anon_vma->lock, &mm->mmap_sem); | ||
| 2284 | /* | ||
| 2285 | * We can safely modify head.next after taking the | ||
| 2286 | * anon_vma->lock. If some other vma in this mm shares | ||
| 2287 | * the same anon_vma we won't take it again. | ||
| 2288 | * | ||
| 2289 | * No need of atomic instructions here, head.next | ||
| 2290 | * can't change from under us thanks to the | ||
| 2291 | * anon_vma->lock. | ||
| 2292 | */ | ||
| 2293 | if (__test_and_set_bit(0, (unsigned long *) | ||
| 2294 | &anon_vma->head.next)) | ||
| 2295 | BUG(); | ||
| 2296 | } | ||
| 2297 | } | ||
| 2298 | |||
| 2299 | static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping) | ||
| 2300 | { | ||
| 2301 | if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) { | ||
| 2302 | /* | ||
| 2303 | * AS_MM_ALL_LOCKS can't change from under us because | ||
| 2304 | * we hold the mm_all_locks_mutex. | ||
| 2305 | * | ||
| 2306 | * Operations on ->flags have to be atomic because | ||
| 2307 | * even if AS_MM_ALL_LOCKS is stable thanks to the | ||
| 2308 | * mm_all_locks_mutex, there may be other cpus | ||
| 2309 | * changing other bitflags in parallel to us. | ||
| 2310 | */ | ||
| 2311 | if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags)) | ||
| 2312 | BUG(); | ||
| 2313 | spin_lock_nest_lock(&mapping->i_mmap_lock, &mm->mmap_sem); | ||
| 2314 | } | ||
| 2315 | } | ||
| 2316 | |||
| 2317 | /* | ||
| 2318 | * This operation locks against the VM for all pte/vma/mm related | ||
| 2319 | * operations that could ever happen on a certain mm. This includes | ||
| 2320 | * vmtruncate, try_to_unmap, and all page faults. | ||
| 2321 | * | ||
| 2322 | * The caller must take the mmap_sem in write mode before calling | ||
| 2323 | * mm_take_all_locks(). The caller isn't allowed to release the | ||
| 2324 | * mmap_sem until mm_drop_all_locks() returns. | ||
| 2325 | * | ||
| 2326 | * mmap_sem in write mode is required in order to block all operations | ||
| 2327 | * that could modify pagetables and free pages without need of | ||
| 2328 | * altering the vma layout (for example populate_range() with | ||
| 2329 | * nonlinear vmas). It's also needed in write mode to avoid new | ||
| 2330 | * anon_vmas to be associated with existing vmas. | ||
| 2331 | * | ||
| 2332 | * A single task can't take more than one mm_take_all_locks() in a row | ||
| 2333 | * or it would deadlock. | ||
| 2334 | * | ||
| 2335 | * The LSB in anon_vma->head.next and the AS_MM_ALL_LOCKS bitflag in | ||
| 2336 | * mapping->flags avoid to take the same lock twice, if more than one | ||
| 2337 | * vma in this mm is backed by the same anon_vma or address_space. | ||
| 2338 | * | ||
| 2339 | * We can take all the locks in random order because the VM code | ||
| 2340 | * taking i_mmap_lock or anon_vma->lock outside the mmap_sem never | ||
| 2341 | * takes more than one of them in a row. Secondly we're protected | ||
| 2342 | * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex. | ||
| 2343 | * | ||
| 2344 | * mm_take_all_locks() and mm_drop_all_locks are expensive operations | ||
| 2345 | * that may have to take thousand of locks. | ||
| 2346 | * | ||
| 2347 | * mm_take_all_locks() can fail if it's interrupted by signals. | ||
| 2348 | */ | ||
| 2349 | int mm_take_all_locks(struct mm_struct *mm) | ||
| 2350 | { | ||
| 2351 | struct vm_area_struct *vma; | ||
| 2352 | int ret = -EINTR; | ||
| 2353 | |||
| 2354 | BUG_ON(down_read_trylock(&mm->mmap_sem)); | ||
| 2355 | |||
| 2356 | mutex_lock(&mm_all_locks_mutex); | ||
| 2357 | |||
| 2358 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | ||
| 2359 | if (signal_pending(current)) | ||
| 2360 | goto out_unlock; | ||
| 2361 | if (vma->vm_file && vma->vm_file->f_mapping) | ||
| 2362 | vm_lock_mapping(mm, vma->vm_file->f_mapping); | ||
| 2363 | } | ||
| 2364 | |||
| 2365 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | ||
| 2366 | if (signal_pending(current)) | ||
| 2367 | goto out_unlock; | ||
| 2368 | if (vma->anon_vma) | ||
| 2369 | vm_lock_anon_vma(mm, vma->anon_vma); | ||
| 2370 | } | ||
| 2371 | |||
| 2372 | ret = 0; | ||
| 2373 | |||
| 2374 | out_unlock: | ||
| 2375 | if (ret) | ||
| 2376 | mm_drop_all_locks(mm); | ||
| 2377 | |||
| 2378 | return ret; | ||
| 2379 | } | ||
| 2380 | |||
| 2381 | static void vm_unlock_anon_vma(struct anon_vma *anon_vma) | ||
| 2382 | { | ||
| 2383 | if (test_bit(0, (unsigned long *) &anon_vma->head.next)) { | ||
| 2384 | /* | ||
| 2385 | * The LSB of head.next can't change to 0 from under | ||
| 2386 | * us because we hold the mm_all_locks_mutex. | ||
| 2387 | * | ||
| 2388 | * We must however clear the bitflag before unlocking | ||
| 2389 | * the vma so the users using the anon_vma->head will | ||
| 2390 | * never see our bitflag. | ||
| 2391 | * | ||
| 2392 | * No need of atomic instructions here, head.next | ||
| 2393 | * can't change from under us until we release the | ||
| 2394 | * anon_vma->lock. | ||
| 2395 | */ | ||
| 2396 | if (!__test_and_clear_bit(0, (unsigned long *) | ||
| 2397 | &anon_vma->head.next)) | ||
| 2398 | BUG(); | ||
| 2399 | spin_unlock(&anon_vma->lock); | ||
| 2400 | } | ||
| 2401 | } | ||
| 2402 | |||
| 2403 | static void vm_unlock_mapping(struct address_space *mapping) | ||
| 2404 | { | ||
| 2405 | if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) { | ||
| 2406 | /* | ||
| 2407 | * AS_MM_ALL_LOCKS can't change to 0 from under us | ||
| 2408 | * because we hold the mm_all_locks_mutex. | ||
| 2409 | */ | ||
| 2410 | spin_unlock(&mapping->i_mmap_lock); | ||
| 2411 | if (!test_and_clear_bit(AS_MM_ALL_LOCKS, | ||
| 2412 | &mapping->flags)) | ||
| 2413 | BUG(); | ||
| 2414 | } | ||
| 2415 | } | ||
| 2416 | |||
| 2417 | /* | ||
| 2418 | * The mmap_sem cannot be released by the caller until | ||
| 2419 | * mm_drop_all_locks() returns. | ||
| 2420 | */ | ||
| 2421 | void mm_drop_all_locks(struct mm_struct *mm) | ||
| 2422 | { | ||
| 2423 | struct vm_area_struct *vma; | ||
| 2424 | |||
| 2425 | BUG_ON(down_read_trylock(&mm->mmap_sem)); | ||
| 2426 | BUG_ON(!mutex_is_locked(&mm_all_locks_mutex)); | ||
| 2427 | |||
| 2428 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | ||
| 2429 | if (vma->anon_vma) | ||
| 2430 | vm_unlock_anon_vma(vma->anon_vma); | ||
| 2431 | if (vma->vm_file && vma->vm_file->f_mapping) | ||
| 2432 | vm_unlock_mapping(vma->vm_file->f_mapping); | ||
| 2433 | } | ||
| 2434 | |||
| 2435 | mutex_unlock(&mm_all_locks_mutex); | ||
| 2436 | } | ||
