aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mmap.c')
-rw-r--r--mm/mmap.c180
1 files changed, 176 insertions, 4 deletions
diff --git a/mm/mmap.c b/mm/mmap.c
index 1d102b956fd8..339cf5c4d5d8 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -26,12 +26,15 @@
26#include <linux/mount.h> 26#include <linux/mount.h>
27#include <linux/mempolicy.h> 27#include <linux/mempolicy.h>
28#include <linux/rmap.h> 28#include <linux/rmap.h>
29#include <linux/mmu_notifier.h>
29 30
30#include <asm/uaccess.h> 31#include <asm/uaccess.h>
31#include <asm/cacheflush.h> 32#include <asm/cacheflush.h>
32#include <asm/tlb.h> 33#include <asm/tlb.h>
33#include <asm/mmu_context.h> 34#include <asm/mmu_context.h>
34 35
36#include "internal.h"
37
35#ifndef arch_mmap_check 38#ifndef arch_mmap_check
36#define arch_mmap_check(addr, len, flags) (0) 39#define arch_mmap_check(addr, len, flags) (0)
37#endif 40#endif
@@ -367,7 +370,7 @@ find_vma_prepare(struct mm_struct *mm, unsigned long addr,
367 if (vma_tmp->vm_end > addr) { 370 if (vma_tmp->vm_end > addr) {
368 vma = vma_tmp; 371 vma = vma_tmp;
369 if (vma_tmp->vm_start <= addr) 372 if (vma_tmp->vm_start <= addr)
370 return vma; 373 break;
371 __rb_link = &__rb_parent->rb_left; 374 __rb_link = &__rb_parent->rb_left;
372 } else { 375 } else {
373 rb_prev = __rb_parent; 376 rb_prev = __rb_parent;
@@ -1108,6 +1111,9 @@ munmap_back:
1108 if (!may_expand_vm(mm, len >> PAGE_SHIFT)) 1111 if (!may_expand_vm(mm, len >> PAGE_SHIFT))
1109 return -ENOMEM; 1112 return -ENOMEM;
1110 1113
1114 if (flags & MAP_NORESERVE)
1115 vm_flags |= VM_NORESERVE;
1116
1111 if (accountable && (!(flags & MAP_NORESERVE) || 1117 if (accountable && (!(flags & MAP_NORESERVE) ||
1112 sysctl_overcommit_memory == OVERCOMMIT_NEVER)) { 1118 sysctl_overcommit_memory == OVERCOMMIT_NEVER)) {
1113 if (vm_flags & VM_SHARED) { 1119 if (vm_flags & VM_SHARED) {
@@ -1763,7 +1769,7 @@ static void unmap_region(struct mm_struct *mm,
1763 update_hiwater_rss(mm); 1769 update_hiwater_rss(mm);
1764 unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL); 1770 unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
1765 vm_unacct_memory(nr_accounted); 1771 vm_unacct_memory(nr_accounted);
1766 free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS, 1772 free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
1767 next? next->vm_start: 0); 1773 next? next->vm_start: 0);
1768 tlb_finish_mmu(tlb, start, end); 1774 tlb_finish_mmu(tlb, start, end);
1769} 1775}
@@ -1807,7 +1813,8 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
1807 struct mempolicy *pol; 1813 struct mempolicy *pol;
1808 struct vm_area_struct *new; 1814 struct vm_area_struct *new;
1809 1815
1810 if (is_vm_hugetlb_page(vma) && (addr & ~HPAGE_MASK)) 1816 if (is_vm_hugetlb_page(vma) && (addr &
1817 ~(huge_page_mask(hstate_vma(vma)))))
1811 return -EINVAL; 1818 return -EINVAL;
1812 1819
1813 if (mm->map_count >= sysctl_max_map_count) 1820 if (mm->map_count >= sysctl_max_map_count)
@@ -2055,6 +2062,7 @@ void exit_mmap(struct mm_struct *mm)
2055 2062
2056 /* mm's last user has gone, and its about to be pulled down */ 2063 /* mm's last user has gone, and its about to be pulled down */
2057 arch_exit_mmap(mm); 2064 arch_exit_mmap(mm);
2065 mmu_notifier_release(mm);
2058 2066
2059 lru_add_drain(); 2067 lru_add_drain();
2060 flush_cache_mm(mm); 2068 flush_cache_mm(mm);
@@ -2063,7 +2071,7 @@ void exit_mmap(struct mm_struct *mm)
2063 /* Use -1 here to ensure all VMAs in the mm are unmapped */ 2071 /* Use -1 here to ensure all VMAs in the mm are unmapped */
2064 end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL); 2072 end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
2065 vm_unacct_memory(nr_accounted); 2073 vm_unacct_memory(nr_accounted);
2066 free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0); 2074 free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
2067 tlb_finish_mmu(tlb, 0, end); 2075 tlb_finish_mmu(tlb, 0, end);
2068 2076
2069 /* 2077 /*
@@ -2262,3 +2270,167 @@ int install_special_mapping(struct mm_struct *mm,
2262 2270
2263 return 0; 2271 return 0;
2264} 2272}
2273
2274static DEFINE_MUTEX(mm_all_locks_mutex);
2275
2276static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
2277{
2278 if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) {
2279 /*
2280 * The LSB of head.next can't change from under us
2281 * because we hold the mm_all_locks_mutex.
2282 */
2283 spin_lock_nest_lock(&anon_vma->lock, &mm->mmap_sem);
2284 /*
2285 * We can safely modify head.next after taking the
2286 * anon_vma->lock. If some other vma in this mm shares
2287 * the same anon_vma we won't take it again.
2288 *
2289 * No need of atomic instructions here, head.next
2290 * can't change from under us thanks to the
2291 * anon_vma->lock.
2292 */
2293 if (__test_and_set_bit(0, (unsigned long *)
2294 &anon_vma->head.next))
2295 BUG();
2296 }
2297}
2298
2299static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
2300{
2301 if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
2302 /*
2303 * AS_MM_ALL_LOCKS can't change from under us because
2304 * we hold the mm_all_locks_mutex.
2305 *
2306 * Operations on ->flags have to be atomic because
2307 * even if AS_MM_ALL_LOCKS is stable thanks to the
2308 * mm_all_locks_mutex, there may be other cpus
2309 * changing other bitflags in parallel to us.
2310 */
2311 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
2312 BUG();
2313 spin_lock_nest_lock(&mapping->i_mmap_lock, &mm->mmap_sem);
2314 }
2315}
2316
2317/*
2318 * This operation locks against the VM for all pte/vma/mm related
2319 * operations that could ever happen on a certain mm. This includes
2320 * vmtruncate, try_to_unmap, and all page faults.
2321 *
2322 * The caller must take the mmap_sem in write mode before calling
2323 * mm_take_all_locks(). The caller isn't allowed to release the
2324 * mmap_sem until mm_drop_all_locks() returns.
2325 *
2326 * mmap_sem in write mode is required in order to block all operations
2327 * that could modify pagetables and free pages without need of
2328 * altering the vma layout (for example populate_range() with
2329 * nonlinear vmas). It's also needed in write mode to avoid new
2330 * anon_vmas to be associated with existing vmas.
2331 *
2332 * A single task can't take more than one mm_take_all_locks() in a row
2333 * or it would deadlock.
2334 *
2335 * The LSB in anon_vma->head.next and the AS_MM_ALL_LOCKS bitflag in
2336 * mapping->flags avoid to take the same lock twice, if more than one
2337 * vma in this mm is backed by the same anon_vma or address_space.
2338 *
2339 * We can take all the locks in random order because the VM code
2340 * taking i_mmap_lock or anon_vma->lock outside the mmap_sem never
2341 * takes more than one of them in a row. Secondly we're protected
2342 * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex.
2343 *
2344 * mm_take_all_locks() and mm_drop_all_locks are expensive operations
2345 * that may have to take thousand of locks.
2346 *
2347 * mm_take_all_locks() can fail if it's interrupted by signals.
2348 */
2349int mm_take_all_locks(struct mm_struct *mm)
2350{
2351 struct vm_area_struct *vma;
2352 int ret = -EINTR;
2353
2354 BUG_ON(down_read_trylock(&mm->mmap_sem));
2355
2356 mutex_lock(&mm_all_locks_mutex);
2357
2358 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2359 if (signal_pending(current))
2360 goto out_unlock;
2361 if (vma->vm_file && vma->vm_file->f_mapping)
2362 vm_lock_mapping(mm, vma->vm_file->f_mapping);
2363 }
2364
2365 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2366 if (signal_pending(current))
2367 goto out_unlock;
2368 if (vma->anon_vma)
2369 vm_lock_anon_vma(mm, vma->anon_vma);
2370 }
2371
2372 ret = 0;
2373
2374out_unlock:
2375 if (ret)
2376 mm_drop_all_locks(mm);
2377
2378 return ret;
2379}
2380
2381static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
2382{
2383 if (test_bit(0, (unsigned long *) &anon_vma->head.next)) {
2384 /*
2385 * The LSB of head.next can't change to 0 from under
2386 * us because we hold the mm_all_locks_mutex.
2387 *
2388 * We must however clear the bitflag before unlocking
2389 * the vma so the users using the anon_vma->head will
2390 * never see our bitflag.
2391 *
2392 * No need of atomic instructions here, head.next
2393 * can't change from under us until we release the
2394 * anon_vma->lock.
2395 */
2396 if (!__test_and_clear_bit(0, (unsigned long *)
2397 &anon_vma->head.next))
2398 BUG();
2399 spin_unlock(&anon_vma->lock);
2400 }
2401}
2402
2403static void vm_unlock_mapping(struct address_space *mapping)
2404{
2405 if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
2406 /*
2407 * AS_MM_ALL_LOCKS can't change to 0 from under us
2408 * because we hold the mm_all_locks_mutex.
2409 */
2410 spin_unlock(&mapping->i_mmap_lock);
2411 if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
2412 &mapping->flags))
2413 BUG();
2414 }
2415}
2416
2417/*
2418 * The mmap_sem cannot be released by the caller until
2419 * mm_drop_all_locks() returns.
2420 */
2421void mm_drop_all_locks(struct mm_struct *mm)
2422{
2423 struct vm_area_struct *vma;
2424
2425 BUG_ON(down_read_trylock(&mm->mmap_sem));
2426 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
2427
2428 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2429 if (vma->anon_vma)
2430 vm_unlock_anon_vma(vma->anon_vma);
2431 if (vma->vm_file && vma->vm_file->f_mapping)
2432 vm_unlock_mapping(vma->vm_file->f_mapping);
2433 }
2434
2435 mutex_unlock(&mm_all_locks_mutex);
2436}