aboutsummaryrefslogtreecommitdiffstats
path: root/mm/rmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/rmap.c')
-rw-r--r--mm/rmap.c172
1 files changed, 137 insertions, 35 deletions
diff --git a/mm/rmap.c b/mm/rmap.c
index 522e4a93cad..3a39b518a65 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -24,8 +24,8 @@
24 * inode->i_alloc_sem (vmtruncate_range) 24 * inode->i_alloc_sem (vmtruncate_range)
25 * mm->mmap_sem 25 * mm->mmap_sem
26 * page->flags PG_locked (lock_page) 26 * page->flags PG_locked (lock_page)
27 * mapping->i_mmap_lock 27 * mapping->i_mmap_mutex
28 * anon_vma->lock 28 * anon_vma->mutex
29 * mm->page_table_lock or pte_lock 29 * mm->page_table_lock or pte_lock
30 * zone->lru_lock (in mark_page_accessed, isolate_lru_page) 30 * zone->lru_lock (in mark_page_accessed, isolate_lru_page)
31 * swap_lock (in swap_duplicate, swap_info_get) 31 * swap_lock (in swap_duplicate, swap_info_get)
@@ -40,7 +40,7 @@
40 * 40 *
41 * (code doesn't rely on that order so it could be switched around) 41 * (code doesn't rely on that order so it could be switched around)
42 * ->tasklist_lock 42 * ->tasklist_lock
43 * anon_vma->lock (memory_failure, collect_procs_anon) 43 * anon_vma->mutex (memory_failure, collect_procs_anon)
44 * pte map lock 44 * pte map lock
45 */ 45 */
46 46
@@ -86,6 +86,29 @@ static inline struct anon_vma *anon_vma_alloc(void)
86static inline void anon_vma_free(struct anon_vma *anon_vma) 86static inline void anon_vma_free(struct anon_vma *anon_vma)
87{ 87{
88 VM_BUG_ON(atomic_read(&anon_vma->refcount)); 88 VM_BUG_ON(atomic_read(&anon_vma->refcount));
89
90 /*
91 * Synchronize against page_lock_anon_vma() such that
92 * we can safely hold the lock without the anon_vma getting
93 * freed.
94 *
95 * Relies on the full mb implied by the atomic_dec_and_test() from
96 * put_anon_vma() against the acquire barrier implied by
97 * mutex_trylock() from page_lock_anon_vma(). This orders:
98 *
99 * page_lock_anon_vma() VS put_anon_vma()
100 * mutex_trylock() atomic_dec_and_test()
101 * LOCK MB
102 * atomic_read() mutex_is_locked()
103 *
104 * LOCK should suffice since the actual taking of the lock must
105 * happen _before_ what follows.
106 */
107 if (mutex_is_locked(&anon_vma->root->mutex)) {
108 anon_vma_lock(anon_vma);
109 anon_vma_unlock(anon_vma);
110 }
111
89 kmem_cache_free(anon_vma_cachep, anon_vma); 112 kmem_cache_free(anon_vma_cachep, anon_vma);
90} 113}
91 114
@@ -307,7 +330,7 @@ static void anon_vma_ctor(void *data)
307{ 330{
308 struct anon_vma *anon_vma = data; 331 struct anon_vma *anon_vma = data;
309 332
310 spin_lock_init(&anon_vma->lock); 333 mutex_init(&anon_vma->mutex);
311 atomic_set(&anon_vma->refcount, 0); 334 atomic_set(&anon_vma->refcount, 0);
312 INIT_LIST_HEAD(&anon_vma->head); 335 INIT_LIST_HEAD(&anon_vma->head);
313} 336}
@@ -320,12 +343,26 @@ void __init anon_vma_init(void)
320} 343}
321 344
322/* 345/*
323 * Getting a lock on a stable anon_vma from a page off the LRU is 346 * Getting a lock on a stable anon_vma from a page off the LRU is tricky!
324 * tricky: page_lock_anon_vma rely on RCU to guard against the races. 347 *
348 * Since there is no serialization what so ever against page_remove_rmap()
349 * the best this function can do is return a locked anon_vma that might
350 * have been relevant to this page.
351 *
352 * The page might have been remapped to a different anon_vma or the anon_vma
353 * returned may already be freed (and even reused).
354 *
355 * All users of this function must be very careful when walking the anon_vma
356 * chain and verify that the page in question is indeed mapped in it
357 * [ something equivalent to page_mapped_in_vma() ].
358 *
359 * Since anon_vma's slab is DESTROY_BY_RCU and we know from page_remove_rmap()
360 * that the anon_vma pointer from page->mapping is valid if there is a
361 * mapcount, we can dereference the anon_vma after observing those.
325 */ 362 */
326struct anon_vma *__page_lock_anon_vma(struct page *page) 363struct anon_vma *page_get_anon_vma(struct page *page)
327{ 364{
328 struct anon_vma *anon_vma, *root_anon_vma; 365 struct anon_vma *anon_vma = NULL;
329 unsigned long anon_mapping; 366 unsigned long anon_mapping;
330 367
331 rcu_read_lock(); 368 rcu_read_lock();
@@ -336,32 +373,97 @@ struct anon_vma *__page_lock_anon_vma(struct page *page)
336 goto out; 373 goto out;
337 374
338 anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON); 375 anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
339 root_anon_vma = ACCESS_ONCE(anon_vma->root); 376 if (!atomic_inc_not_zero(&anon_vma->refcount)) {
340 spin_lock(&root_anon_vma->lock); 377 anon_vma = NULL;
378 goto out;
379 }
341 380
342 /* 381 /*
343 * If this page is still mapped, then its anon_vma cannot have been 382 * If this page is still mapped, then its anon_vma cannot have been
344 * freed. But if it has been unmapped, we have no security against 383 * freed. But if it has been unmapped, we have no security against the
345 * the anon_vma structure being freed and reused (for another anon_vma: 384 * anon_vma structure being freed and reused (for another anon_vma:
346 * SLAB_DESTROY_BY_RCU guarantees that - so the spin_lock above cannot 385 * SLAB_DESTROY_BY_RCU guarantees that - so the atomic_inc_not_zero()
347 * corrupt): with anon_vma_prepare() or anon_vma_fork() redirecting 386 * above cannot corrupt).
348 * anon_vma->root before page_unlock_anon_vma() is called to unlock.
349 */ 387 */
350 if (page_mapped(page)) 388 if (!page_mapped(page)) {
351 return anon_vma; 389 put_anon_vma(anon_vma);
390 anon_vma = NULL;
391 }
392out:
393 rcu_read_unlock();
394
395 return anon_vma;
396}
397
398/*
399 * Similar to page_get_anon_vma() except it locks the anon_vma.
400 *
401 * Its a little more complex as it tries to keep the fast path to a single
402 * atomic op -- the trylock. If we fail the trylock, we fall back to getting a
403 * reference like with page_get_anon_vma() and then block on the mutex.
404 */
405struct anon_vma *page_lock_anon_vma(struct page *page)
406{
407 struct anon_vma *anon_vma = NULL;
408 unsigned long anon_mapping;
409
410 rcu_read_lock();
411 anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping);
412 if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
413 goto out;
414 if (!page_mapped(page))
415 goto out;
416
417 anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
418 if (mutex_trylock(&anon_vma->root->mutex)) {
419 /*
420 * If we observe a !0 refcount, then holding the lock ensures
421 * the anon_vma will not go away, see __put_anon_vma().
422 */
423 if (!atomic_read(&anon_vma->refcount)) {
424 anon_vma_unlock(anon_vma);
425 anon_vma = NULL;
426 }
427 goto out;
428 }
429
430 /* trylock failed, we got to sleep */
431 if (!atomic_inc_not_zero(&anon_vma->refcount)) {
432 anon_vma = NULL;
433 goto out;
434 }
435
436 if (!page_mapped(page)) {
437 put_anon_vma(anon_vma);
438 anon_vma = NULL;
439 goto out;
440 }
441
442 /* we pinned the anon_vma, its safe to sleep */
443 rcu_read_unlock();
444 anon_vma_lock(anon_vma);
445
446 if (atomic_dec_and_test(&anon_vma->refcount)) {
447 /*
448 * Oops, we held the last refcount, release the lock
449 * and bail -- can't simply use put_anon_vma() because
450 * we'll deadlock on the anon_vma_lock() recursion.
451 */
452 anon_vma_unlock(anon_vma);
453 __put_anon_vma(anon_vma);
454 anon_vma = NULL;
455 }
456
457 return anon_vma;
352 458
353 spin_unlock(&root_anon_vma->lock);
354out: 459out:
355 rcu_read_unlock(); 460 rcu_read_unlock();
356 return NULL; 461 return anon_vma;
357} 462}
358 463
359void page_unlock_anon_vma(struct anon_vma *anon_vma) 464void page_unlock_anon_vma(struct anon_vma *anon_vma)
360 __releases(&anon_vma->root->lock)
361 __releases(RCU)
362{ 465{
363 anon_vma_unlock(anon_vma); 466 anon_vma_unlock(anon_vma);
364 rcu_read_unlock();
365} 467}
366 468
367/* 469/*
@@ -646,14 +748,14 @@ static int page_referenced_file(struct page *page,
646 * The page lock not only makes sure that page->mapping cannot 748 * The page lock not only makes sure that page->mapping cannot
647 * suddenly be NULLified by truncation, it makes sure that the 749 * suddenly be NULLified by truncation, it makes sure that the
648 * structure at mapping cannot be freed and reused yet, 750 * structure at mapping cannot be freed and reused yet,
649 * so we can safely take mapping->i_mmap_lock. 751 * so we can safely take mapping->i_mmap_mutex.
650 */ 752 */
651 BUG_ON(!PageLocked(page)); 753 BUG_ON(!PageLocked(page));
652 754
653 spin_lock(&mapping->i_mmap_lock); 755 mutex_lock(&mapping->i_mmap_mutex);
654 756
655 /* 757 /*
656 * i_mmap_lock does not stabilize mapcount at all, but mapcount 758 * i_mmap_mutex does not stabilize mapcount at all, but mapcount
657 * is more likely to be accurate if we note it after spinning. 759 * is more likely to be accurate if we note it after spinning.
658 */ 760 */
659 mapcount = page_mapcount(page); 761 mapcount = page_mapcount(page);
@@ -675,7 +777,7 @@ static int page_referenced_file(struct page *page,
675 break; 777 break;
676 } 778 }
677 779
678 spin_unlock(&mapping->i_mmap_lock); 780 mutex_unlock(&mapping->i_mmap_mutex);
679 return referenced; 781 return referenced;
680} 782}
681 783
@@ -762,7 +864,7 @@ static int page_mkclean_file(struct address_space *mapping, struct page *page)
762 864
763 BUG_ON(PageAnon(page)); 865 BUG_ON(PageAnon(page));
764 866
765 spin_lock(&mapping->i_mmap_lock); 867 mutex_lock(&mapping->i_mmap_mutex);
766 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 868 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
767 if (vma->vm_flags & VM_SHARED) { 869 if (vma->vm_flags & VM_SHARED) {
768 unsigned long address = vma_address(page, vma); 870 unsigned long address = vma_address(page, vma);
@@ -771,7 +873,7 @@ static int page_mkclean_file(struct address_space *mapping, struct page *page)
771 ret += page_mkclean_one(page, vma, address); 873 ret += page_mkclean_one(page, vma, address);
772 } 874 }
773 } 875 }
774 spin_unlock(&mapping->i_mmap_lock); 876 mutex_unlock(&mapping->i_mmap_mutex);
775 return ret; 877 return ret;
776} 878}
777 879
@@ -1119,7 +1221,7 @@ out_mlock:
1119 /* 1221 /*
1120 * We need mmap_sem locking, Otherwise VM_LOCKED check makes 1222 * We need mmap_sem locking, Otherwise VM_LOCKED check makes
1121 * unstable result and race. Plus, We can't wait here because 1223 * unstable result and race. Plus, We can't wait here because
1122 * we now hold anon_vma->lock or mapping->i_mmap_lock. 1224 * we now hold anon_vma->mutex or mapping->i_mmap_mutex.
1123 * if trylock failed, the page remain in evictable lru and later 1225 * if trylock failed, the page remain in evictable lru and later
1124 * vmscan could retry to move the page to unevictable lru if the 1226 * vmscan could retry to move the page to unevictable lru if the
1125 * page is actually mlocked. 1227 * page is actually mlocked.
@@ -1345,7 +1447,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1345 unsigned long max_nl_size = 0; 1447 unsigned long max_nl_size = 0;
1346 unsigned int mapcount; 1448 unsigned int mapcount;
1347 1449
1348 spin_lock(&mapping->i_mmap_lock); 1450 mutex_lock(&mapping->i_mmap_mutex);
1349 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 1451 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
1350 unsigned long address = vma_address(page, vma); 1452 unsigned long address = vma_address(page, vma);
1351 if (address == -EFAULT) 1453 if (address == -EFAULT)
@@ -1391,7 +1493,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1391 mapcount = page_mapcount(page); 1493 mapcount = page_mapcount(page);
1392 if (!mapcount) 1494 if (!mapcount)
1393 goto out; 1495 goto out;
1394 cond_resched_lock(&mapping->i_mmap_lock); 1496 cond_resched();
1395 1497
1396 max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK; 1498 max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
1397 if (max_nl_cursor == 0) 1499 if (max_nl_cursor == 0)
@@ -1413,7 +1515,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1413 } 1515 }
1414 vma->vm_private_data = (void *) max_nl_cursor; 1516 vma->vm_private_data = (void *) max_nl_cursor;
1415 } 1517 }
1416 cond_resched_lock(&mapping->i_mmap_lock); 1518 cond_resched();
1417 max_nl_cursor += CLUSTER_SIZE; 1519 max_nl_cursor += CLUSTER_SIZE;
1418 } while (max_nl_cursor <= max_nl_size); 1520 } while (max_nl_cursor <= max_nl_size);
1419 1521
@@ -1425,7 +1527,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1425 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list) 1527 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
1426 vma->vm_private_data = NULL; 1528 vma->vm_private_data = NULL;
1427out: 1529out:
1428 spin_unlock(&mapping->i_mmap_lock); 1530 mutex_unlock(&mapping->i_mmap_mutex);
1429 return ret; 1531 return ret;
1430} 1532}
1431 1533
@@ -1544,7 +1646,7 @@ static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *,
1544 1646
1545 if (!mapping) 1647 if (!mapping)
1546 return ret; 1648 return ret;
1547 spin_lock(&mapping->i_mmap_lock); 1649 mutex_lock(&mapping->i_mmap_mutex);
1548 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 1650 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
1549 unsigned long address = vma_address(page, vma); 1651 unsigned long address = vma_address(page, vma);
1550 if (address == -EFAULT) 1652 if (address == -EFAULT)
@@ -1558,7 +1660,7 @@ static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *,
1558 * never contain migration ptes. Decide what to do about this 1660 * never contain migration ptes. Decide what to do about this
1559 * limitation to linear when we need rmap_walk() on nonlinear. 1661 * limitation to linear when we need rmap_walk() on nonlinear.
1560 */ 1662 */
1561 spin_unlock(&mapping->i_mmap_lock); 1663 mutex_unlock(&mapping->i_mmap_mutex);
1562 return ret; 1664 return ret;
1563} 1665}
1564 1666