aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mmap.c')
-rw-r--r--mm/mmap.c189
1 files changed, 183 insertions, 6 deletions
diff --git a/mm/mmap.c b/mm/mmap.c
index 3354fdd83d4b..e7a5a68a9c2e 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -26,12 +26,15 @@
26#include <linux/mount.h> 26#include <linux/mount.h>
27#include <linux/mempolicy.h> 27#include <linux/mempolicy.h>
28#include <linux/rmap.h> 28#include <linux/rmap.h>
29#include <linux/mmu_notifier.h>
29 30
30#include <asm/uaccess.h> 31#include <asm/uaccess.h>
31#include <asm/cacheflush.h> 32#include <asm/cacheflush.h>
32#include <asm/tlb.h> 33#include <asm/tlb.h>
33#include <asm/mmu_context.h> 34#include <asm/mmu_context.h>
34 35
36#include "internal.h"
37
35#ifndef arch_mmap_check 38#ifndef arch_mmap_check
36#define arch_mmap_check(addr, len, flags) (0) 39#define arch_mmap_check(addr, len, flags) (0)
37#endif 40#endif
@@ -72,8 +75,9 @@ pgprot_t protection_map[16] = {
72 75
73pgprot_t vm_get_page_prot(unsigned long vm_flags) 76pgprot_t vm_get_page_prot(unsigned long vm_flags)
74{ 77{
75 return protection_map[vm_flags & 78 return __pgprot(pgprot_val(protection_map[vm_flags &
76 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]; 79 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
80 pgprot_val(arch_vm_get_page_prot(vm_flags)));
77} 81}
78EXPORT_SYMBOL(vm_get_page_prot); 82EXPORT_SYMBOL(vm_get_page_prot);
79 83
@@ -366,7 +370,7 @@ find_vma_prepare(struct mm_struct *mm, unsigned long addr,
366 if (vma_tmp->vm_end > addr) { 370 if (vma_tmp->vm_end > addr) {
367 vma = vma_tmp; 371 vma = vma_tmp;
368 if (vma_tmp->vm_start <= addr) 372 if (vma_tmp->vm_start <= addr)
369 return vma; 373 break;
370 __rb_link = &__rb_parent->rb_left; 374 __rb_link = &__rb_parent->rb_left;
371 } else { 375 } else {
372 rb_prev = __rb_parent; 376 rb_prev = __rb_parent;
@@ -1026,6 +1030,10 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
1026 } else { 1030 } else {
1027 switch (flags & MAP_TYPE) { 1031 switch (flags & MAP_TYPE) {
1028 case MAP_SHARED: 1032 case MAP_SHARED:
1033 /*
1034 * Ignore pgoff.
1035 */
1036 pgoff = 0;
1029 vm_flags |= VM_SHARED | VM_MAYSHARE; 1037 vm_flags |= VM_SHARED | VM_MAYSHARE;
1030 break; 1038 break;
1031 case MAP_PRIVATE: 1039 case MAP_PRIVATE:
@@ -1107,6 +1115,9 @@ munmap_back:
1107 if (!may_expand_vm(mm, len >> PAGE_SHIFT)) 1115 if (!may_expand_vm(mm, len >> PAGE_SHIFT))
1108 return -ENOMEM; 1116 return -ENOMEM;
1109 1117
1118 if (flags & MAP_NORESERVE)
1119 vm_flags |= VM_NORESERVE;
1120
1110 if (accountable && (!(flags & MAP_NORESERVE) || 1121 if (accountable && (!(flags & MAP_NORESERVE) ||
1111 sysctl_overcommit_memory == OVERCOMMIT_NEVER)) { 1122 sysctl_overcommit_memory == OVERCOMMIT_NEVER)) {
1112 if (vm_flags & VM_SHARED) { 1123 if (vm_flags & VM_SHARED) {
@@ -1762,7 +1773,7 @@ static void unmap_region(struct mm_struct *mm,
1762 update_hiwater_rss(mm); 1773 update_hiwater_rss(mm);
1763 unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL); 1774 unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
1764 vm_unacct_memory(nr_accounted); 1775 vm_unacct_memory(nr_accounted);
1765 free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS, 1776 free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
1766 next? next->vm_start: 0); 1777 next? next->vm_start: 0);
1767 tlb_finish_mmu(tlb, start, end); 1778 tlb_finish_mmu(tlb, start, end);
1768} 1779}
@@ -1806,7 +1817,8 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
1806 struct mempolicy *pol; 1817 struct mempolicy *pol;
1807 struct vm_area_struct *new; 1818 struct vm_area_struct *new;
1808 1819
1809 if (is_vm_hugetlb_page(vma) && (addr & ~HPAGE_MASK)) 1820 if (is_vm_hugetlb_page(vma) && (addr &
1821 ~(huge_page_mask(hstate_vma(vma)))))
1810 return -EINVAL; 1822 return -EINVAL;
1811 1823
1812 if (mm->map_count >= sysctl_max_map_count) 1824 if (mm->map_count >= sysctl_max_map_count)
@@ -2054,6 +2066,7 @@ void exit_mmap(struct mm_struct *mm)
2054 2066
2055 /* mm's last user has gone, and its about to be pulled down */ 2067 /* mm's last user has gone, and its about to be pulled down */
2056 arch_exit_mmap(mm); 2068 arch_exit_mmap(mm);
2069 mmu_notifier_release(mm);
2057 2070
2058 lru_add_drain(); 2071 lru_add_drain();
2059 flush_cache_mm(mm); 2072 flush_cache_mm(mm);
@@ -2062,7 +2075,7 @@ void exit_mmap(struct mm_struct *mm)
2062 /* Use -1 here to ensure all VMAs in the mm are unmapped */ 2075 /* Use -1 here to ensure all VMAs in the mm are unmapped */
2063 end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL); 2076 end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
2064 vm_unacct_memory(nr_accounted); 2077 vm_unacct_memory(nr_accounted);
2065 free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0); 2078 free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
2066 tlb_finish_mmu(tlb, 0, end); 2079 tlb_finish_mmu(tlb, 0, end);
2067 2080
2068 /* 2081 /*
@@ -2261,3 +2274,167 @@ int install_special_mapping(struct mm_struct *mm,
2261 2274
2262 return 0; 2275 return 0;
2263} 2276}
2277
2278static DEFINE_MUTEX(mm_all_locks_mutex);
2279
2280static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
2281{
2282 if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) {
2283 /*
2284 * The LSB of head.next can't change from under us
2285 * because we hold the mm_all_locks_mutex.
2286 */
2287 spin_lock_nest_lock(&anon_vma->lock, &mm->mmap_sem);
2288 /*
2289 * We can safely modify head.next after taking the
2290 * anon_vma->lock. If some other vma in this mm shares
2291 * the same anon_vma we won't take it again.
2292 *
2293 * No need of atomic instructions here, head.next
2294 * can't change from under us thanks to the
2295 * anon_vma->lock.
2296 */
2297 if (__test_and_set_bit(0, (unsigned long *)
2298 &anon_vma->head.next))
2299 BUG();
2300 }
2301}
2302
2303static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
2304{
2305 if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
2306 /*
2307 * AS_MM_ALL_LOCKS can't change from under us because
2308 * we hold the mm_all_locks_mutex.
2309 *
2310 * Operations on ->flags have to be atomic because
2311 * even if AS_MM_ALL_LOCKS is stable thanks to the
2312 * mm_all_locks_mutex, there may be other cpus
2313 * changing other bitflags in parallel to us.
2314 */
2315 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
2316 BUG();
2317 spin_lock_nest_lock(&mapping->i_mmap_lock, &mm->mmap_sem);
2318 }
2319}
2320
2321/*
2322 * This operation locks against the VM for all pte/vma/mm related
2323 * operations that could ever happen on a certain mm. This includes
2324 * vmtruncate, try_to_unmap, and all page faults.
2325 *
2326 * The caller must take the mmap_sem in write mode before calling
2327 * mm_take_all_locks(). The caller isn't allowed to release the
2328 * mmap_sem until mm_drop_all_locks() returns.
2329 *
2330 * mmap_sem in write mode is required in order to block all operations
2331 * that could modify pagetables and free pages without need of
2332 * altering the vma layout (for example populate_range() with
2333 * nonlinear vmas). It's also needed in write mode to avoid new
2334 * anon_vmas to be associated with existing vmas.
2335 *
2336 * A single task can't take more than one mm_take_all_locks() in a row
2337 * or it would deadlock.
2338 *
2339 * The LSB in anon_vma->head.next and the AS_MM_ALL_LOCKS bitflag in
2340 * mapping->flags avoid to take the same lock twice, if more than one
2341 * vma in this mm is backed by the same anon_vma or address_space.
2342 *
2343 * We can take all the locks in random order because the VM code
2344 * taking i_mmap_lock or anon_vma->lock outside the mmap_sem never
2345 * takes more than one of them in a row. Secondly we're protected
2346 * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex.
2347 *
2348 * mm_take_all_locks() and mm_drop_all_locks are expensive operations
2349 * that may have to take thousand of locks.
2350 *
2351 * mm_take_all_locks() can fail if it's interrupted by signals.
2352 */
2353int mm_take_all_locks(struct mm_struct *mm)
2354{
2355 struct vm_area_struct *vma;
2356 int ret = -EINTR;
2357
2358 BUG_ON(down_read_trylock(&mm->mmap_sem));
2359
2360 mutex_lock(&mm_all_locks_mutex);
2361
2362 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2363 if (signal_pending(current))
2364 goto out_unlock;
2365 if (vma->vm_file && vma->vm_file->f_mapping)
2366 vm_lock_mapping(mm, vma->vm_file->f_mapping);
2367 }
2368
2369 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2370 if (signal_pending(current))
2371 goto out_unlock;
2372 if (vma->anon_vma)
2373 vm_lock_anon_vma(mm, vma->anon_vma);
2374 }
2375
2376 ret = 0;
2377
2378out_unlock:
2379 if (ret)
2380 mm_drop_all_locks(mm);
2381
2382 return ret;
2383}
2384
2385static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
2386{
2387 if (test_bit(0, (unsigned long *) &anon_vma->head.next)) {
2388 /*
2389 * The LSB of head.next can't change to 0 from under
2390 * us because we hold the mm_all_locks_mutex.
2391 *
2392 * We must however clear the bitflag before unlocking
2393 * the vma so the users using the anon_vma->head will
2394 * never see our bitflag.
2395 *
2396 * No need of atomic instructions here, head.next
2397 * can't change from under us until we release the
2398 * anon_vma->lock.
2399 */
2400 if (!__test_and_clear_bit(0, (unsigned long *)
2401 &anon_vma->head.next))
2402 BUG();
2403 spin_unlock(&anon_vma->lock);
2404 }
2405}
2406
2407static void vm_unlock_mapping(struct address_space *mapping)
2408{
2409 if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
2410 /*
2411 * AS_MM_ALL_LOCKS can't change to 0 from under us
2412 * because we hold the mm_all_locks_mutex.
2413 */
2414 spin_unlock(&mapping->i_mmap_lock);
2415 if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
2416 &mapping->flags))
2417 BUG();
2418 }
2419}
2420
2421/*
2422 * The mmap_sem cannot be released by the caller until
2423 * mm_drop_all_locks() returns.
2424 */
2425void mm_drop_all_locks(struct mm_struct *mm)
2426{
2427 struct vm_area_struct *vma;
2428
2429 BUG_ON(down_read_trylock(&mm->mmap_sem));
2430 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
2431
2432 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2433 if (vma->anon_vma)
2434 vm_unlock_anon_vma(vma->anon_vma);
2435 if (vma->vm_file && vma->vm_file->f_mapping)
2436 vm_unlock_mapping(vma->vm_file->f_mapping);
2437 }
2438
2439 mutex_unlock(&mm_all_locks_mutex);
2440}