aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorDavidlohr Bueso <dave@stgolabs.net>2014-12-12 19:54:24 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-12-13 15:42:45 -0500
commitc8c06efa8b552608493b7066c234cfa82c47fcea (patch)
tree7e206c669149766fb5a77a3ef85cdd4fac63be78 /mm
parent83cde9e8ba95d180eaefefe834958fbf7008cf39 (diff)
mm: convert i_mmap_mutex to rwsem
The i_mmap_mutex is a close cousin of the anon vma lock, both protecting similar data, one for file backed pages and the other for anon memory. To this end, this lock can also be a rwsem. In addition, there are some important opportunities to share the lock when there are no tree modifications. This conversion is straightforward. For now, all users take the write lock. [sfr@canb.auug.org.au: update fremap.c] Signed-off-by: Davidlohr Bueso <dbueso@suse.de> Reviewed-by: Rik van Riel <riel@redhat.com> Acked-by: "Kirill A. Shutemov" <kirill@shutemov.name> Acked-by: Hugh Dickins <hughd@google.com> Cc: Oleg Nesterov <oleg@redhat.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Acked-by: Mel Gorman <mgorman@suse.de> Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c10
-rw-r--r--mm/hugetlb.c10
-rw-r--r--mm/mmap.c8
-rw-r--r--mm/mremap.c2
-rw-r--r--mm/rmap.c6
5 files changed, 18 insertions, 18 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 14b4642279f1..e8905bc3cbd7 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -62,16 +62,16 @@
62/* 62/*
63 * Lock ordering: 63 * Lock ordering:
64 * 64 *
65 * ->i_mmap_mutex (truncate_pagecache) 65 * ->i_mmap_rwsem (truncate_pagecache)
66 * ->private_lock (__free_pte->__set_page_dirty_buffers) 66 * ->private_lock (__free_pte->__set_page_dirty_buffers)
67 * ->swap_lock (exclusive_swap_page, others) 67 * ->swap_lock (exclusive_swap_page, others)
68 * ->mapping->tree_lock 68 * ->mapping->tree_lock
69 * 69 *
70 * ->i_mutex 70 * ->i_mutex
71 * ->i_mmap_mutex (truncate->unmap_mapping_range) 71 * ->i_mmap_rwsem (truncate->unmap_mapping_range)
72 * 72 *
73 * ->mmap_sem 73 * ->mmap_sem
74 * ->i_mmap_mutex 74 * ->i_mmap_rwsem
75 * ->page_table_lock or pte_lock (various, mainly in memory.c) 75 * ->page_table_lock or pte_lock (various, mainly in memory.c)
76 * ->mapping->tree_lock (arch-dependent flush_dcache_mmap_lock) 76 * ->mapping->tree_lock (arch-dependent flush_dcache_mmap_lock)
77 * 77 *
@@ -85,7 +85,7 @@
85 * sb_lock (fs/fs-writeback.c) 85 * sb_lock (fs/fs-writeback.c)
86 * ->mapping->tree_lock (__sync_single_inode) 86 * ->mapping->tree_lock (__sync_single_inode)
87 * 87 *
88 * ->i_mmap_mutex 88 * ->i_mmap_rwsem
89 * ->anon_vma.lock (vma_adjust) 89 * ->anon_vma.lock (vma_adjust)
90 * 90 *
91 * ->anon_vma.lock 91 * ->anon_vma.lock
@@ -105,7 +105,7 @@
105 * ->inode->i_lock (zap_pte_range->set_page_dirty) 105 * ->inode->i_lock (zap_pte_range->set_page_dirty)
106 * ->private_lock (zap_pte_range->__set_page_dirty_buffers) 106 * ->private_lock (zap_pte_range->__set_page_dirty_buffers)
107 * 107 *
108 * ->i_mmap_mutex 108 * ->i_mmap_rwsem
109 * ->tasklist_lock (memory_failure, collect_procs_ao) 109 * ->tasklist_lock (memory_failure, collect_procs_ao)
110 */ 110 */
111 111
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ffe19304cc09..989cb032eaf5 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2726,9 +2726,9 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb,
2726 * on its way out. We're lucky that the flag has such an appropriate 2726 * on its way out. We're lucky that the flag has such an appropriate
2727 * name, and can in fact be safely cleared here. We could clear it 2727 * name, and can in fact be safely cleared here. We could clear it
2728 * before the __unmap_hugepage_range above, but all that's necessary 2728 * before the __unmap_hugepage_range above, but all that's necessary
2729 * is to clear it before releasing the i_mmap_mutex. This works 2729 * is to clear it before releasing the i_mmap_rwsem. This works
2730 * because in the context this is called, the VMA is about to be 2730 * because in the context this is called, the VMA is about to be
2731 * destroyed and the i_mmap_mutex is held. 2731 * destroyed and the i_mmap_rwsem is held.
2732 */ 2732 */
2733 vma->vm_flags &= ~VM_MAYSHARE; 2733 vma->vm_flags &= ~VM_MAYSHARE;
2734} 2734}
@@ -3370,9 +3370,9 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
3370 spin_unlock(ptl); 3370 spin_unlock(ptl);
3371 } 3371 }
3372 /* 3372 /*
3373 * Must flush TLB before releasing i_mmap_mutex: x86's huge_pmd_unshare 3373 * Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare
3374 * may have cleared our pud entry and done put_page on the page table: 3374 * may have cleared our pud entry and done put_page on the page table:
3375 * once we release i_mmap_mutex, another task can do the final put_page 3375 * once we release i_mmap_rwsem, another task can do the final put_page
3376 * and that page table be reused and filled with junk. 3376 * and that page table be reused and filled with junk.
3377 */ 3377 */
3378 flush_tlb_range(vma, start, end); 3378 flush_tlb_range(vma, start, end);
@@ -3525,7 +3525,7 @@ static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
3525 * and returns the corresponding pte. While this is not necessary for the 3525 * and returns the corresponding pte. While this is not necessary for the
3526 * !shared pmd case because we can allocate the pmd later as well, it makes the 3526 * !shared pmd case because we can allocate the pmd later as well, it makes the
3527 * code much cleaner. pmd allocation is essential for the shared case because 3527 * code much cleaner. pmd allocation is essential for the shared case because
3528 * pud has to be populated inside the same i_mmap_mutex section - otherwise 3528 * pud has to be populated inside the same i_mmap_rwsem section - otherwise
3529 * racing tasks could either miss the sharing (see huge_pte_offset) or select a 3529 * racing tasks could either miss the sharing (see huge_pte_offset) or select a
3530 * bad pmd for sharing. 3530 * bad pmd for sharing.
3531 */ 3531 */
diff --git a/mm/mmap.c b/mm/mmap.c
index ecd6ecf48778..0d84b2f86f3b 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -232,7 +232,7 @@ error:
232} 232}
233 233
234/* 234/*
235 * Requires inode->i_mapping->i_mmap_mutex 235 * Requires inode->i_mapping->i_mmap_rwsem
236 */ 236 */
237static void __remove_shared_vm_struct(struct vm_area_struct *vma, 237static void __remove_shared_vm_struct(struct vm_area_struct *vma,
238 struct file *file, struct address_space *mapping) 238 struct file *file, struct address_space *mapping)
@@ -2791,7 +2791,7 @@ void exit_mmap(struct mm_struct *mm)
2791 2791
2792/* Insert vm structure into process list sorted by address 2792/* Insert vm structure into process list sorted by address
2793 * and into the inode's i_mmap tree. If vm_file is non-NULL 2793 * and into the inode's i_mmap tree. If vm_file is non-NULL
2794 * then i_mmap_mutex is taken here. 2794 * then i_mmap_rwsem is taken here.
2795 */ 2795 */
2796int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma) 2796int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
2797{ 2797{
@@ -3086,7 +3086,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
3086 */ 3086 */
3087 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags)) 3087 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
3088 BUG(); 3088 BUG();
3089 mutex_lock_nest_lock(&mapping->i_mmap_mutex, &mm->mmap_sem); 3089 down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_sem);
3090 } 3090 }
3091} 3091}
3092 3092
@@ -3113,7 +3113,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
3113 * vma in this mm is backed by the same anon_vma or address_space. 3113 * vma in this mm is backed by the same anon_vma or address_space.
3114 * 3114 *
3115 * We can take all the locks in random order because the VM code 3115 * We can take all the locks in random order because the VM code
3116 * taking i_mmap_mutex or anon_vma->rwsem outside the mmap_sem never 3116 * taking i_mmap_rwsem or anon_vma->rwsem outside the mmap_sem never
3117 * takes more than one of them in a row. Secondly we're protected 3117 * takes more than one of them in a row. Secondly we're protected
3118 * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex. 3118 * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex.
3119 * 3119 *
diff --git a/mm/mremap.c b/mm/mremap.c
index 426b448d6447..84aa36f9f308 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -99,7 +99,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
99 spinlock_t *old_ptl, *new_ptl; 99 spinlock_t *old_ptl, *new_ptl;
100 100
101 /* 101 /*
102 * When need_rmap_locks is true, we take the i_mmap_mutex and anon_vma 102 * When need_rmap_locks is true, we take the i_mmap_rwsem and anon_vma
103 * locks to ensure that rmap will always observe either the old or the 103 * locks to ensure that rmap will always observe either the old or the
104 * new ptes. This is the easiest way to avoid races with 104 * new ptes. This is the easiest way to avoid races with
105 * truncate_pagecache(), page migration, etc... 105 * truncate_pagecache(), page migration, etc...
diff --git a/mm/rmap.c b/mm/rmap.c
index bea03f6bec61..18247f89f1a8 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -23,7 +23,7 @@
23 * inode->i_mutex (while writing or truncating, not reading or faulting) 23 * inode->i_mutex (while writing or truncating, not reading or faulting)
24 * mm->mmap_sem 24 * mm->mmap_sem
25 * page->flags PG_locked (lock_page) 25 * page->flags PG_locked (lock_page)
26 * mapping->i_mmap_mutex 26 * mapping->i_mmap_rwsem
27 * anon_vma->rwsem 27 * anon_vma->rwsem
28 * mm->page_table_lock or pte_lock 28 * mm->page_table_lock or pte_lock
29 * zone->lru_lock (in mark_page_accessed, isolate_lru_page) 29 * zone->lru_lock (in mark_page_accessed, isolate_lru_page)
@@ -1260,7 +1260,7 @@ out_mlock:
1260 /* 1260 /*
1261 * We need mmap_sem locking, Otherwise VM_LOCKED check makes 1261 * We need mmap_sem locking, Otherwise VM_LOCKED check makes
1262 * unstable result and race. Plus, We can't wait here because 1262 * unstable result and race. Plus, We can't wait here because
1263 * we now hold anon_vma->rwsem or mapping->i_mmap_mutex. 1263 * we now hold anon_vma->rwsem or mapping->i_mmap_rwsem.
1264 * if trylock failed, the page remain in evictable lru and later 1264 * if trylock failed, the page remain in evictable lru and later
1265 * vmscan could retry to move the page to unevictable lru if the 1265 * vmscan could retry to move the page to unevictable lru if the
1266 * page is actually mlocked. 1266 * page is actually mlocked.
@@ -1684,7 +1684,7 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
1684 * The page lock not only makes sure that page->mapping cannot 1684 * The page lock not only makes sure that page->mapping cannot
1685 * suddenly be NULLified by truncation, it makes sure that the 1685 * suddenly be NULLified by truncation, it makes sure that the
1686 * structure at mapping cannot be freed and reused yet, 1686 * structure at mapping cannot be freed and reused yet,
1687 * so we can safely take mapping->i_mmap_mutex. 1687 * so we can safely take mapping->i_mmap_rwsem.
1688 */ 1688 */
1689 VM_BUG_ON_PAGE(!PageLocked(page), page); 1689 VM_BUG_ON_PAGE(!PageLocked(page), page);
1690 1690