aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2012-12-02 14:56:46 -0500
committerMel Gorman <mgorman@suse.de>2012-12-11 09:43:00 -0500
commit5a505085f043e8380f83610f79642853c051e2f1 (patch)
treec9e86ee196b1ef87350ee9b268a515e2eae4d307
parentd28d433512f4f387e2563c14db45a7bb8a338b1a (diff)
mm/rmap: Convert the struct anon_vma::mutex to an rwsem
Convert the struct anon_vma::mutex to an rwsem, which will help in solving a page-migration scalability problem. (Addressed in a separate patch.) The conversion is simple and straightforward: in every case where we mutex_lock()ed we'll now down_write(). Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> Reviewed-by: Rik van Riel <riel@redhat.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Turner <pjt@google.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com> Cc: Christoph Lameter <cl@linux.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Mel Gorman <mgorman@suse.de>
-rw-r--r--include/linux/rmap.h16
-rw-r--r--mm/huge_memory.c4
-rw-r--r--mm/mmap.c8
-rw-r--r--mm/rmap.c22
4 files changed, 25 insertions, 25 deletions
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index bfe1f4780644..f3f41d242e25 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -7,7 +7,7 @@
7#include <linux/list.h> 7#include <linux/list.h>
8#include <linux/slab.h> 8#include <linux/slab.h>
9#include <linux/mm.h> 9#include <linux/mm.h>
10#include <linux/mutex.h> 10#include <linux/rwsem.h>
11#include <linux/memcontrol.h> 11#include <linux/memcontrol.h>
12 12
13/* 13/*
@@ -25,8 +25,8 @@
25 * pointing to this anon_vma once its vma list is empty. 25 * pointing to this anon_vma once its vma list is empty.
26 */ 26 */
27struct anon_vma { 27struct anon_vma {
28 struct anon_vma *root; /* Root of this anon_vma tree */ 28 struct anon_vma *root; /* Root of this anon_vma tree */
29 struct mutex mutex; /* Serialize access to vma list */ 29 struct rw_semaphore rwsem; /* W: modification, R: walking the list */
30 /* 30 /*
31 * The refcount is taken on an anon_vma when there is no 31 * The refcount is taken on an anon_vma when there is no
32 * guarantee that the vma of page tables will exist for 32 * guarantee that the vma of page tables will exist for
@@ -64,7 +64,7 @@ struct anon_vma_chain {
64 struct vm_area_struct *vma; 64 struct vm_area_struct *vma;
65 struct anon_vma *anon_vma; 65 struct anon_vma *anon_vma;
66 struct list_head same_vma; /* locked by mmap_sem & page_table_lock */ 66 struct list_head same_vma; /* locked by mmap_sem & page_table_lock */
67 struct rb_node rb; /* locked by anon_vma->mutex */ 67 struct rb_node rb; /* locked by anon_vma->rwsem */
68 unsigned long rb_subtree_last; 68 unsigned long rb_subtree_last;
69#ifdef CONFIG_DEBUG_VM_RB 69#ifdef CONFIG_DEBUG_VM_RB
70 unsigned long cached_vma_start, cached_vma_last; 70 unsigned long cached_vma_start, cached_vma_last;
@@ -108,24 +108,24 @@ static inline void vma_lock_anon_vma(struct vm_area_struct *vma)
108{ 108{
109 struct anon_vma *anon_vma = vma->anon_vma; 109 struct anon_vma *anon_vma = vma->anon_vma;
110 if (anon_vma) 110 if (anon_vma)
111 mutex_lock(&anon_vma->root->mutex); 111 down_write(&anon_vma->root->rwsem);
112} 112}
113 113
114static inline void vma_unlock_anon_vma(struct vm_area_struct *vma) 114static inline void vma_unlock_anon_vma(struct vm_area_struct *vma)
115{ 115{
116 struct anon_vma *anon_vma = vma->anon_vma; 116 struct anon_vma *anon_vma = vma->anon_vma;
117 if (anon_vma) 117 if (anon_vma)
118 mutex_unlock(&anon_vma->root->mutex); 118 up_write(&anon_vma->root->rwsem);
119} 119}
120 120
121static inline void anon_vma_lock(struct anon_vma *anon_vma) 121static inline void anon_vma_lock(struct anon_vma *anon_vma)
122{ 122{
123 mutex_lock(&anon_vma->root->mutex); 123 down_write(&anon_vma->root->rwsem);
124} 124}
125 125
126static inline void anon_vma_unlock(struct anon_vma *anon_vma) 126static inline void anon_vma_unlock(struct anon_vma *anon_vma)
127{ 127{
128 mutex_unlock(&anon_vma->root->mutex); 128 up_write(&anon_vma->root->rwsem);
129} 129}
130 130
131/* 131/*
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 711baf84b153..acd37fe55eb7 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1292,7 +1292,7 @@ static int __split_huge_page_splitting(struct page *page,
1292 * We can't temporarily set the pmd to null in order 1292 * We can't temporarily set the pmd to null in order
1293 * to split it, the pmd must remain marked huge at all 1293 * to split it, the pmd must remain marked huge at all
1294 * times or the VM won't take the pmd_trans_huge paths 1294 * times or the VM won't take the pmd_trans_huge paths
1295 * and it won't wait on the anon_vma->root->mutex to 1295 * and it won't wait on the anon_vma->root->rwsem to
1296 * serialize against split_huge_page*. 1296 * serialize against split_huge_page*.
1297 */ 1297 */
1298 pmdp_splitting_flush(vma, address, pmd); 1298 pmdp_splitting_flush(vma, address, pmd);
@@ -1495,7 +1495,7 @@ static int __split_huge_page_map(struct page *page,
1495 return ret; 1495 return ret;
1496} 1496}
1497 1497
1498/* must be called with anon_vma->root->mutex hold */ 1498/* must be called with anon_vma->root->rwsem held */
1499static void __split_huge_page(struct page *page, 1499static void __split_huge_page(struct page *page,
1500 struct anon_vma *anon_vma) 1500 struct anon_vma *anon_vma)
1501{ 1501{
diff --git a/mm/mmap.c b/mm/mmap.c
index 9a796c41e7d9..88408632da66 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2561,15 +2561,15 @@ static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
2561 * The LSB of head.next can't change from under us 2561 * The LSB of head.next can't change from under us
2562 * because we hold the mm_all_locks_mutex. 2562 * because we hold the mm_all_locks_mutex.
2563 */ 2563 */
2564 mutex_lock_nest_lock(&anon_vma->root->mutex, &mm->mmap_sem); 2564 down_write(&anon_vma->root->rwsem);
2565 /* 2565 /*
2566 * We can safely modify head.next after taking the 2566 * We can safely modify head.next after taking the
2567 * anon_vma->root->mutex. If some other vma in this mm shares 2567 * anon_vma->root->rwsem. If some other vma in this mm shares
2568 * the same anon_vma we won't take it again. 2568 * the same anon_vma we won't take it again.
2569 * 2569 *
2570 * No need of atomic instructions here, head.next 2570 * No need of atomic instructions here, head.next
2571 * can't change from under us thanks to the 2571 * can't change from under us thanks to the
2572 * anon_vma->root->mutex. 2572 * anon_vma->root->rwsem.
2573 */ 2573 */
2574 if (__test_and_set_bit(0, (unsigned long *) 2574 if (__test_and_set_bit(0, (unsigned long *)
2575 &anon_vma->root->rb_root.rb_node)) 2575 &anon_vma->root->rb_root.rb_node))
@@ -2671,7 +2671,7 @@ static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
2671 * 2671 *
2672 * No need of atomic instructions here, head.next 2672 * No need of atomic instructions here, head.next
2673 * can't change from under us until we release the 2673 * can't change from under us until we release the
2674 * anon_vma->root->mutex. 2674 * anon_vma->root->rwsem.
2675 */ 2675 */
2676 if (!__test_and_clear_bit(0, (unsigned long *) 2676 if (!__test_and_clear_bit(0, (unsigned long *)
2677 &anon_vma->root->rb_root.rb_node)) 2677 &anon_vma->root->rb_root.rb_node))
diff --git a/mm/rmap.c b/mm/rmap.c
index 2ee1ef0f317b..6e3ee3b82798 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -24,7 +24,7 @@
24 * mm->mmap_sem 24 * mm->mmap_sem
25 * page->flags PG_locked (lock_page) 25 * page->flags PG_locked (lock_page)
26 * mapping->i_mmap_mutex 26 * mapping->i_mmap_mutex
27 * anon_vma->mutex 27 * anon_vma->rwsem
28 * mm->page_table_lock or pte_lock 28 * mm->page_table_lock or pte_lock
29 * zone->lru_lock (in mark_page_accessed, isolate_lru_page) 29 * zone->lru_lock (in mark_page_accessed, isolate_lru_page)
30 * swap_lock (in swap_duplicate, swap_info_get) 30 * swap_lock (in swap_duplicate, swap_info_get)
@@ -37,7 +37,7 @@
37 * in arch-dependent flush_dcache_mmap_lock, 37 * in arch-dependent flush_dcache_mmap_lock,
38 * within bdi.wb->list_lock in __sync_single_inode) 38 * within bdi.wb->list_lock in __sync_single_inode)
39 * 39 *
40 * anon_vma->mutex,mapping->i_mutex (memory_failure, collect_procs_anon) 40 * anon_vma->rwsem,mapping->i_mutex (memory_failure, collect_procs_anon)
41 * ->tasklist_lock 41 * ->tasklist_lock
42 * pte map lock 42 * pte map lock
43 */ 43 */
@@ -103,7 +103,7 @@ static inline void anon_vma_free(struct anon_vma *anon_vma)
103 * LOCK should suffice since the actual taking of the lock must 103 * LOCK should suffice since the actual taking of the lock must
104 * happen _before_ what follows. 104 * happen _before_ what follows.
105 */ 105 */
106 if (mutex_is_locked(&anon_vma->root->mutex)) { 106 if (rwsem_is_locked(&anon_vma->root->rwsem)) {
107 anon_vma_lock(anon_vma); 107 anon_vma_lock(anon_vma);
108 anon_vma_unlock(anon_vma); 108 anon_vma_unlock(anon_vma);
109 } 109 }
@@ -219,9 +219,9 @@ static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct
219 struct anon_vma *new_root = anon_vma->root; 219 struct anon_vma *new_root = anon_vma->root;
220 if (new_root != root) { 220 if (new_root != root) {
221 if (WARN_ON_ONCE(root)) 221 if (WARN_ON_ONCE(root))
222 mutex_unlock(&root->mutex); 222 up_write(&root->rwsem);
223 root = new_root; 223 root = new_root;
224 mutex_lock(&root->mutex); 224 down_write(&root->rwsem);
225 } 225 }
226 return root; 226 return root;
227} 227}
@@ -229,7 +229,7 @@ static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct
229static inline void unlock_anon_vma_root(struct anon_vma *root) 229static inline void unlock_anon_vma_root(struct anon_vma *root)
230{ 230{
231 if (root) 231 if (root)
232 mutex_unlock(&root->mutex); 232 up_write(&root->rwsem);
233} 233}
234 234
235/* 235/*
@@ -349,7 +349,7 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
349 /* 349 /*
350 * Iterate the list once more, it now only contains empty and unlinked 350 * Iterate the list once more, it now only contains empty and unlinked
351 * anon_vmas, destroy them. Could not do before due to __put_anon_vma() 351 * anon_vmas, destroy them. Could not do before due to __put_anon_vma()
352 * needing to acquire the anon_vma->root->mutex. 352 * needing to write-acquire the anon_vma->root->rwsem.
353 */ 353 */
354 list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) { 354 list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
355 struct anon_vma *anon_vma = avc->anon_vma; 355 struct anon_vma *anon_vma = avc->anon_vma;
@@ -365,7 +365,7 @@ static void anon_vma_ctor(void *data)
365{ 365{
366 struct anon_vma *anon_vma = data; 366 struct anon_vma *anon_vma = data;
367 367
368 mutex_init(&anon_vma->mutex); 368 init_rwsem(&anon_vma->rwsem);
369 atomic_set(&anon_vma->refcount, 0); 369 atomic_set(&anon_vma->refcount, 0);
370 anon_vma->rb_root = RB_ROOT; 370 anon_vma->rb_root = RB_ROOT;
371} 371}
@@ -457,14 +457,14 @@ struct anon_vma *page_lock_anon_vma(struct page *page)
457 457
458 anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON); 458 anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
459 root_anon_vma = ACCESS_ONCE(anon_vma->root); 459 root_anon_vma = ACCESS_ONCE(anon_vma->root);
460 if (mutex_trylock(&root_anon_vma->mutex)) { 460 if (down_write_trylock(&root_anon_vma->rwsem)) {
461 /* 461 /*
462 * If the page is still mapped, then this anon_vma is still 462 * If the page is still mapped, then this anon_vma is still
463 * its anon_vma, and holding the mutex ensures that it will 463 * its anon_vma, and holding the mutex ensures that it will
464 * not go away, see anon_vma_free(). 464 * not go away, see anon_vma_free().
465 */ 465 */
466 if (!page_mapped(page)) { 466 if (!page_mapped(page)) {
467 mutex_unlock(&root_anon_vma->mutex); 467 up_write(&root_anon_vma->rwsem);
468 anon_vma = NULL; 468 anon_vma = NULL;
469 } 469 }
470 goto out; 470 goto out;
@@ -1299,7 +1299,7 @@ out_mlock:
1299 /* 1299 /*
1300 * We need mmap_sem locking, Otherwise VM_LOCKED check makes 1300 * We need mmap_sem locking, Otherwise VM_LOCKED check makes
1301 * unstable result and race. Plus, We can't wait here because 1301 * unstable result and race. Plus, We can't wait here because
1302 * we now hold anon_vma->mutex or mapping->i_mmap_mutex. 1302 * we now hold anon_vma->rwsem or mapping->i_mmap_mutex.
1303 * if trylock failed, the page remain in evictable lru and later 1303 * if trylock failed, the page remain in evictable lru and later
1304 * vmscan could retry to move the page to unevictable lru if the 1304 * vmscan could retry to move the page to unevictable lru if the
1305 * page is actually mlocked. 1305 * page is actually mlocked.