aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorRik van Riel <riel@redhat.com>2010-08-09 20:18:40 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-08-09 23:44:55 -0400
commit012f18004da33ba672e3c60838cc4898126174d3 (patch)
tree990382f9f8c0d885463ac9195b8e9a18043f716d /mm
parent5c341ee1dfc8fe69d66b1c8b19e463c6d7201ae1 (diff)
mm: always lock the root (oldest) anon_vma
Always (and only) lock the root (oldest) anon_vma whenever we do something in an anon_vma. The recently introduced anon_vma scalability is due to the rmap code scanning only the VMAs that need to be scanned. Many common operations still took the anon_vma lock on the root anon_vma, so always taking that lock is not expected to introduce any scalability issues. However, always taking the same lock does mean we only need to take one lock, which means rmap_walk on pages from any anon_vma in the vma is excluded from occurring during an munmap, expand_stack or other operation that needs to exclude rmap_walk and similar functions. Also add the proper locking to vma_adjust. Signed-off-by: Rik van Riel <riel@redhat.com> Tested-by: Larry Woodman <lwoodman@redhat.com> Acked-by: Larry Woodman <lwoodman@redhat.com> Reviewed-by: Minchan Kim <minchan.kim@gmail.com> Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Acked-by: Mel Gorman <mel@csn.ul.ie> Acked-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/ksm.c2
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/mmap.c30
3 files changed, 24 insertions, 10 deletions
diff --git a/mm/ksm.c b/mm/ksm.c
index eb9f6806ed51..da6037c261f1 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -325,7 +325,7 @@ static void drop_anon_vma(struct rmap_item *rmap_item)
325{ 325{
326 struct anon_vma *anon_vma = rmap_item->anon_vma; 326 struct anon_vma *anon_vma = rmap_item->anon_vma;
327 327
328 if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->lock)) { 328 if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
329 int empty = list_empty(&anon_vma->head); 329 int empty = list_empty(&anon_vma->head);
330 anon_vma_unlock(anon_vma); 330 anon_vma_unlock(anon_vma);
331 if (empty) 331 if (empty)
diff --git a/mm/migrate.c b/mm/migrate.c
index 1855f869917d..5208fa1d9712 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -682,7 +682,7 @@ skip_unmap:
682rcu_unlock: 682rcu_unlock:
683 683
684 /* Drop an anon_vma reference if we took one */ 684 /* Drop an anon_vma reference if we took one */
685 if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->lock)) { 685 if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
686 int empty = list_empty(&anon_vma->head); 686 int empty = list_empty(&anon_vma->head);
687 anon_vma_unlock(anon_vma); 687 anon_vma_unlock(anon_vma);
688 if (empty) 688 if (empty)
diff --git a/mm/mmap.c b/mm/mmap.c
index f5db18decc2e..fb89360a2120 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -506,6 +506,7 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
506 struct vm_area_struct *importer = NULL; 506 struct vm_area_struct *importer = NULL;
507 struct address_space *mapping = NULL; 507 struct address_space *mapping = NULL;
508 struct prio_tree_root *root = NULL; 508 struct prio_tree_root *root = NULL;
509 struct anon_vma *anon_vma = NULL;
509 struct file *file = vma->vm_file; 510 struct file *file = vma->vm_file;
510 long adjust_next = 0; 511 long adjust_next = 0;
511 int remove_next = 0; 512 int remove_next = 0;
@@ -578,6 +579,17 @@ again: remove_next = 1 + (end > next->vm_end);
578 } 579 }
579 } 580 }
580 581
582 /*
583 * When changing only vma->vm_end, we don't really need anon_vma
584 * lock. This is a fairly rare case by itself, but the anon_vma
585 * lock may be shared between many sibling processes. Skipping
586 * the lock for brk adjustments makes a difference sometimes.
587 */
588 if (vma->anon_vma && (insert || importer || start != vma->vm_start)) {
589 anon_vma = vma->anon_vma;
590 anon_vma_lock(anon_vma);
591 }
592
581 if (root) { 593 if (root) {
582 flush_dcache_mmap_lock(mapping); 594 flush_dcache_mmap_lock(mapping);
583 vma_prio_tree_remove(vma, root); 595 vma_prio_tree_remove(vma, root);
@@ -617,6 +629,8 @@ again: remove_next = 1 + (end > next->vm_end);
617 __insert_vm_struct(mm, insert); 629 __insert_vm_struct(mm, insert);
618 } 630 }
619 631
632 if (anon_vma)
633 anon_vma_unlock(anon_vma);
620 if (mapping) 634 if (mapping)
621 spin_unlock(&mapping->i_mmap_lock); 635 spin_unlock(&mapping->i_mmap_lock);
622 636
@@ -2470,23 +2484,23 @@ static DEFINE_MUTEX(mm_all_locks_mutex);
2470 2484
2471static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma) 2485static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
2472{ 2486{
2473 if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) { 2487 if (!test_bit(0, (unsigned long *) &anon_vma->root->head.next)) {
2474 /* 2488 /*
2475 * The LSB of head.next can't change from under us 2489 * The LSB of head.next can't change from under us
2476 * because we hold the mm_all_locks_mutex. 2490 * because we hold the mm_all_locks_mutex.
2477 */ 2491 */
2478 spin_lock_nest_lock(&anon_vma->lock, &mm->mmap_sem); 2492 spin_lock_nest_lock(&anon_vma->root->lock, &mm->mmap_sem);
2479 /* 2493 /*
2480 * We can safely modify head.next after taking the 2494 * We can safely modify head.next after taking the
2481 * anon_vma->lock. If some other vma in this mm shares 2495 * anon_vma->root->lock. If some other vma in this mm shares
2482 * the same anon_vma we won't take it again. 2496 * the same anon_vma we won't take it again.
2483 * 2497 *
2484 * No need of atomic instructions here, head.next 2498 * No need of atomic instructions here, head.next
2485 * can't change from under us thanks to the 2499 * can't change from under us thanks to the
2486 * anon_vma->lock. 2500 * anon_vma->root->lock.
2487 */ 2501 */
2488 if (__test_and_set_bit(0, (unsigned long *) 2502 if (__test_and_set_bit(0, (unsigned long *)
2489 &anon_vma->head.next)) 2503 &anon_vma->root->head.next))
2490 BUG(); 2504 BUG();
2491 } 2505 }
2492} 2506}
@@ -2577,7 +2591,7 @@ out_unlock:
2577 2591
2578static void vm_unlock_anon_vma(struct anon_vma *anon_vma) 2592static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
2579{ 2593{
2580 if (test_bit(0, (unsigned long *) &anon_vma->head.next)) { 2594 if (test_bit(0, (unsigned long *) &anon_vma->root->head.next)) {
2581 /* 2595 /*
2582 * The LSB of head.next can't change to 0 from under 2596 * The LSB of head.next can't change to 0 from under
2583 * us because we hold the mm_all_locks_mutex. 2597 * us because we hold the mm_all_locks_mutex.
@@ -2588,10 +2602,10 @@ static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
2588 * 2602 *
2589 * No need of atomic instructions here, head.next 2603 * No need of atomic instructions here, head.next
2590 * can't change from under us until we release the 2604 * can't change from under us until we release the
2591 * anon_vma->lock. 2605 * anon_vma->root->lock.
2592 */ 2606 */
2593 if (!__test_and_clear_bit(0, (unsigned long *) 2607 if (!__test_and_clear_bit(0, (unsigned long *)
2594 &anon_vma->head.next)) 2608 &anon_vma->root->head.next))
2595 BUG(); 2609 BUG();
2596 anon_vma_unlock(anon_vma); 2610 anon_vma_unlock(anon_vma);
2597 } 2611 }