diff options
author | Davidlohr Bueso <dave@stgolabs.net> | 2014-12-12 19:54:24 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-13 15:42:45 -0500 |
commit | c8c06efa8b552608493b7066c234cfa82c47fcea (patch) | |
tree | 7e206c669149766fb5a77a3ef85cdd4fac63be78 /mm | |
parent | 83cde9e8ba95d180eaefefe834958fbf7008cf39 (diff) |
mm: convert i_mmap_mutex to rwsem
The i_mmap_mutex is a close cousin of the anon vma lock, both protecting
similar data, one for file backed pages and the other for anon memory. To
this end, this lock can also be a rwsem. In addition, there are some
important opportunities to share the lock when there are no tree
modifications.
This conversion is straightforward. For now, all users take the write
lock.
[sfr@canb.auug.org.au: update fremap.c]
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Acked-by: "Kirill A. Shutemov" <kirill@shutemov.name>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 10 | ||||
-rw-r--r-- | mm/hugetlb.c | 10 | ||||
-rw-r--r-- | mm/mmap.c | 8 | ||||
-rw-r--r-- | mm/mremap.c | 2 | ||||
-rw-r--r-- | mm/rmap.c | 6 |
5 files changed, 18 insertions, 18 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 14b4642279f1..e8905bc3cbd7 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -62,16 +62,16 @@ | |||
62 | /* | 62 | /* |
63 | * Lock ordering: | 63 | * Lock ordering: |
64 | * | 64 | * |
65 | * ->i_mmap_mutex (truncate_pagecache) | 65 | * ->i_mmap_rwsem (truncate_pagecache) |
66 | * ->private_lock (__free_pte->__set_page_dirty_buffers) | 66 | * ->private_lock (__free_pte->__set_page_dirty_buffers) |
67 | * ->swap_lock (exclusive_swap_page, others) | 67 | * ->swap_lock (exclusive_swap_page, others) |
68 | * ->mapping->tree_lock | 68 | * ->mapping->tree_lock |
69 | * | 69 | * |
70 | * ->i_mutex | 70 | * ->i_mutex |
71 | * ->i_mmap_mutex (truncate->unmap_mapping_range) | 71 | * ->i_mmap_rwsem (truncate->unmap_mapping_range) |
72 | * | 72 | * |
73 | * ->mmap_sem | 73 | * ->mmap_sem |
74 | * ->i_mmap_mutex | 74 | * ->i_mmap_rwsem |
75 | * ->page_table_lock or pte_lock (various, mainly in memory.c) | 75 | * ->page_table_lock or pte_lock (various, mainly in memory.c) |
76 | * ->mapping->tree_lock (arch-dependent flush_dcache_mmap_lock) | 76 | * ->mapping->tree_lock (arch-dependent flush_dcache_mmap_lock) |
77 | * | 77 | * |
@@ -85,7 +85,7 @@ | |||
85 | * sb_lock (fs/fs-writeback.c) | 85 | * sb_lock (fs/fs-writeback.c) |
86 | * ->mapping->tree_lock (__sync_single_inode) | 86 | * ->mapping->tree_lock (__sync_single_inode) |
87 | * | 87 | * |
88 | * ->i_mmap_mutex | 88 | * ->i_mmap_rwsem |
89 | * ->anon_vma.lock (vma_adjust) | 89 | * ->anon_vma.lock (vma_adjust) |
90 | * | 90 | * |
91 | * ->anon_vma.lock | 91 | * ->anon_vma.lock |
@@ -105,7 +105,7 @@ | |||
105 | * ->inode->i_lock (zap_pte_range->set_page_dirty) | 105 | * ->inode->i_lock (zap_pte_range->set_page_dirty) |
106 | * ->private_lock (zap_pte_range->__set_page_dirty_buffers) | 106 | * ->private_lock (zap_pte_range->__set_page_dirty_buffers) |
107 | * | 107 | * |
108 | * ->i_mmap_mutex | 108 | * ->i_mmap_rwsem |
109 | * ->tasklist_lock (memory_failure, collect_procs_ao) | 109 | * ->tasklist_lock (memory_failure, collect_procs_ao) |
110 | */ | 110 | */ |
111 | 111 | ||
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ffe19304cc09..989cb032eaf5 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -2726,9 +2726,9 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb, | |||
2726 | * on its way out. We're lucky that the flag has such an appropriate | 2726 | * on its way out. We're lucky that the flag has such an appropriate |
2727 | * name, and can in fact be safely cleared here. We could clear it | 2727 | * name, and can in fact be safely cleared here. We could clear it |
2728 | * before the __unmap_hugepage_range above, but all that's necessary | 2728 | * before the __unmap_hugepage_range above, but all that's necessary |
2729 | * is to clear it before releasing the i_mmap_mutex. This works | 2729 | * is to clear it before releasing the i_mmap_rwsem. This works |
2730 | * because in the context this is called, the VMA is about to be | 2730 | * because in the context this is called, the VMA is about to be |
2731 | * destroyed and the i_mmap_mutex is held. | 2731 | * destroyed and the i_mmap_rwsem is held. |
2732 | */ | 2732 | */ |
2733 | vma->vm_flags &= ~VM_MAYSHARE; | 2733 | vma->vm_flags &= ~VM_MAYSHARE; |
2734 | } | 2734 | } |
@@ -3370,9 +3370,9 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, | |||
3370 | spin_unlock(ptl); | 3370 | spin_unlock(ptl); |
3371 | } | 3371 | } |
3372 | /* | 3372 | /* |
3373 | * Must flush TLB before releasing i_mmap_mutex: x86's huge_pmd_unshare | 3373 | * Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare |
3374 | * may have cleared our pud entry and done put_page on the page table: | 3374 | * may have cleared our pud entry and done put_page on the page table: |
3375 | * once we release i_mmap_mutex, another task can do the final put_page | 3375 | * once we release i_mmap_rwsem, another task can do the final put_page |
3376 | * and that page table be reused and filled with junk. | 3376 | * and that page table be reused and filled with junk. |
3377 | */ | 3377 | */ |
3378 | flush_tlb_range(vma, start, end); | 3378 | flush_tlb_range(vma, start, end); |
@@ -3525,7 +3525,7 @@ static int vma_shareable(struct vm_area_struct *vma, unsigned long addr) | |||
3525 | * and returns the corresponding pte. While this is not necessary for the | 3525 | * and returns the corresponding pte. While this is not necessary for the |
3526 | * !shared pmd case because we can allocate the pmd later as well, it makes the | 3526 | * !shared pmd case because we can allocate the pmd later as well, it makes the |
3527 | * code much cleaner. pmd allocation is essential for the shared case because | 3527 | * code much cleaner. pmd allocation is essential for the shared case because |
3528 | * pud has to be populated inside the same i_mmap_mutex section - otherwise | 3528 | * pud has to be populated inside the same i_mmap_rwsem section - otherwise |
3529 | * racing tasks could either miss the sharing (see huge_pte_offset) or select a | 3529 | * racing tasks could either miss the sharing (see huge_pte_offset) or select a |
3530 | * bad pmd for sharing. | 3530 | * bad pmd for sharing. |
3531 | */ | 3531 | */ |
@@ -232,7 +232,7 @@ error: | |||
232 | } | 232 | } |
233 | 233 | ||
234 | /* | 234 | /* |
235 | * Requires inode->i_mapping->i_mmap_mutex | 235 | * Requires inode->i_mapping->i_mmap_rwsem |
236 | */ | 236 | */ |
237 | static void __remove_shared_vm_struct(struct vm_area_struct *vma, | 237 | static void __remove_shared_vm_struct(struct vm_area_struct *vma, |
238 | struct file *file, struct address_space *mapping) | 238 | struct file *file, struct address_space *mapping) |
@@ -2791,7 +2791,7 @@ void exit_mmap(struct mm_struct *mm) | |||
2791 | 2791 | ||
2792 | /* Insert vm structure into process list sorted by address | 2792 | /* Insert vm structure into process list sorted by address |
2793 | * and into the inode's i_mmap tree. If vm_file is non-NULL | 2793 | * and into the inode's i_mmap tree. If vm_file is non-NULL |
2794 | * then i_mmap_mutex is taken here. | 2794 | * then i_mmap_rwsem is taken here. |
2795 | */ | 2795 | */ |
2796 | int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma) | 2796 | int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma) |
2797 | { | 2797 | { |
@@ -3086,7 +3086,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping) | |||
3086 | */ | 3086 | */ |
3087 | if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags)) | 3087 | if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags)) |
3088 | BUG(); | 3088 | BUG(); |
3089 | mutex_lock_nest_lock(&mapping->i_mmap_mutex, &mm->mmap_sem); | 3089 | down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_sem); |
3090 | } | 3090 | } |
3091 | } | 3091 | } |
3092 | 3092 | ||
@@ -3113,7 +3113,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping) | |||
3113 | * vma in this mm is backed by the same anon_vma or address_space. | 3113 | * vma in this mm is backed by the same anon_vma or address_space. |
3114 | * | 3114 | * |
3115 | * We can take all the locks in random order because the VM code | 3115 | * We can take all the locks in random order because the VM code |
3116 | * taking i_mmap_mutex or anon_vma->rwsem outside the mmap_sem never | 3116 | * taking i_mmap_rwsem or anon_vma->rwsem outside the mmap_sem never |
3117 | * takes more than one of them in a row. Secondly we're protected | 3117 | * takes more than one of them in a row. Secondly we're protected |
3118 | * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex. | 3118 | * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex. |
3119 | * | 3119 | * |
diff --git a/mm/mremap.c b/mm/mremap.c index 426b448d6447..84aa36f9f308 100644 --- a/mm/mremap.c +++ b/mm/mremap.c | |||
@@ -99,7 +99,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, | |||
99 | spinlock_t *old_ptl, *new_ptl; | 99 | spinlock_t *old_ptl, *new_ptl; |
100 | 100 | ||
101 | /* | 101 | /* |
102 | * When need_rmap_locks is true, we take the i_mmap_mutex and anon_vma | 102 | * When need_rmap_locks is true, we take the i_mmap_rwsem and anon_vma |
103 | * locks to ensure that rmap will always observe either the old or the | 103 | * locks to ensure that rmap will always observe either the old or the |
104 | * new ptes. This is the easiest way to avoid races with | 104 | * new ptes. This is the easiest way to avoid races with |
105 | * truncate_pagecache(), page migration, etc... | 105 | * truncate_pagecache(), page migration, etc... |
@@ -23,7 +23,7 @@ | |||
23 | * inode->i_mutex (while writing or truncating, not reading or faulting) | 23 | * inode->i_mutex (while writing or truncating, not reading or faulting) |
24 | * mm->mmap_sem | 24 | * mm->mmap_sem |
25 | * page->flags PG_locked (lock_page) | 25 | * page->flags PG_locked (lock_page) |
26 | * mapping->i_mmap_mutex | 26 | * mapping->i_mmap_rwsem |
27 | * anon_vma->rwsem | 27 | * anon_vma->rwsem |
28 | * mm->page_table_lock or pte_lock | 28 | * mm->page_table_lock or pte_lock |
29 | * zone->lru_lock (in mark_page_accessed, isolate_lru_page) | 29 | * zone->lru_lock (in mark_page_accessed, isolate_lru_page) |
@@ -1260,7 +1260,7 @@ out_mlock: | |||
1260 | /* | 1260 | /* |
1261 | * We need mmap_sem locking, Otherwise VM_LOCKED check makes | 1261 | * We need mmap_sem locking, Otherwise VM_LOCKED check makes |
1262 | * unstable result and race. Plus, We can't wait here because | 1262 | * unstable result and race. Plus, We can't wait here because |
1263 | * we now hold anon_vma->rwsem or mapping->i_mmap_mutex. | 1263 | * we now hold anon_vma->rwsem or mapping->i_mmap_rwsem. |
1264 | * if trylock failed, the page remain in evictable lru and later | 1264 | * if trylock failed, the page remain in evictable lru and later |
1265 | * vmscan could retry to move the page to unevictable lru if the | 1265 | * vmscan could retry to move the page to unevictable lru if the |
1266 | * page is actually mlocked. | 1266 | * page is actually mlocked. |
@@ -1684,7 +1684,7 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc) | |||
1684 | * The page lock not only makes sure that page->mapping cannot | 1684 | * The page lock not only makes sure that page->mapping cannot |
1685 | * suddenly be NULLified by truncation, it makes sure that the | 1685 | * suddenly be NULLified by truncation, it makes sure that the |
1686 | * structure at mapping cannot be freed and reused yet, | 1686 | * structure at mapping cannot be freed and reused yet, |
1687 | * so we can safely take mapping->i_mmap_mutex. | 1687 | * so we can safely take mapping->i_mmap_rwsem. |
1688 | */ | 1688 | */ |
1689 | VM_BUG_ON_PAGE(!PageLocked(page), page); | 1689 | VM_BUG_ON_PAGE(!PageLocked(page), page); |
1690 | 1690 | ||