aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorKirill A. Shutemov <kirill.shutemov@linux.intel.com>2016-01-15 19:57:31 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-01-15 20:56:32 -0500
commit88f306b68cbb36e500da4b9601b2e3d13dd683c4 (patch)
treee0373c2ee59446b3f8f9c2bfae9f75ff05e73f6c /mm
parentd645fc0eabbe783d34a14fbb31768ad8571f0ad4 (diff)
mm: fix locking order in mm_take_all_locks()
Dmitry Vyukov has reported[1] possible deadlock (triggered by his syzkaller fuzzer): Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&hugetlbfs_i_mmap_rwsem_key); lock(&mapping->i_mmap_rwsem); lock(&hugetlbfs_i_mmap_rwsem_key); lock(&mapping->i_mmap_rwsem); Both traces points to mm_take_all_locks() as a source of the problem. It doesn't take care about ordering or hugetlbfs_i_mmap_rwsem_key (aka mapping->i_mmap_rwsem for hugetlb mapping) vs. i_mmap_rwsem. huge_pmd_share() does memory allocation under hugetlbfs_i_mmap_rwsem_key and allocator can take i_mmap_rwsem if it hit reclaim. So we need to take i_mmap_rwsem from all hugetlb VMAs before taking i_mmap_rwsem from rest of VMAs. The patch also documents locking order for hugetlbfs_i_mmap_rwsem_key. [1] http://lkml.kernel.org/r/CACT4Y+Zu95tBs-0EvdiAKzUOsb4tczRRfCRTpLr4bg_OP9HuVg@mail.gmail.com Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Reported-by: Dmitry Vyukov <dvyukov@google.com> Reviewed-by: Michal Hocko <mhocko@suse.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/mmap.c25
-rw-r--r--mm/rmap.c31
2 files changed, 36 insertions, 20 deletions
diff --git a/mm/mmap.c b/mm/mmap.c
index b3f00b616b81..84b12624ceb0 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3184,10 +3184,16 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
3184 * mapping->flags avoid to take the same lock twice, if more than one 3184 * mapping->flags avoid to take the same lock twice, if more than one
3185 * vma in this mm is backed by the same anon_vma or address_space. 3185 * vma in this mm is backed by the same anon_vma or address_space.
3186 * 3186 *
3187 * We can take all the locks in random order because the VM code 3187 * We take locks in following order, accordingly to comment at beginning
3188 * taking i_mmap_rwsem or anon_vma->rwsem outside the mmap_sem never 3188 * of mm/rmap.c:
3189 * takes more than one of them in a row. Secondly we're protected 3189 * - all hugetlbfs_i_mmap_rwsem_key locks (aka mapping->i_mmap_rwsem for
3190 * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex. 3190 * hugetlb mapping);
3191 * - all i_mmap_rwsem locks;
3192 * - all anon_vma->rwseml
3193 *
3194 * We can take all locks within these types randomly because the VM code
3195 * doesn't nest them and we protected from parallel mm_take_all_locks() by
3196 * mm_all_locks_mutex.
3191 * 3197 *
3192 * mm_take_all_locks() and mm_drop_all_locks are expensive operations 3198 * mm_take_all_locks() and mm_drop_all_locks are expensive operations
3193 * that may have to take thousand of locks. 3199 * that may have to take thousand of locks.
@@ -3206,7 +3212,16 @@ int mm_take_all_locks(struct mm_struct *mm)
3206 for (vma = mm->mmap; vma; vma = vma->vm_next) { 3212 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3207 if (signal_pending(current)) 3213 if (signal_pending(current))
3208 goto out_unlock; 3214 goto out_unlock;
3209 if (vma->vm_file && vma->vm_file->f_mapping) 3215 if (vma->vm_file && vma->vm_file->f_mapping &&
3216 is_vm_hugetlb_page(vma))
3217 vm_lock_mapping(mm, vma->vm_file->f_mapping);
3218 }
3219
3220 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3221 if (signal_pending(current))
3222 goto out_unlock;
3223 if (vma->vm_file && vma->vm_file->f_mapping &&
3224 !is_vm_hugetlb_page(vma))
3210 vm_lock_mapping(mm, vma->vm_file->f_mapping); 3225 vm_lock_mapping(mm, vma->vm_file->f_mapping);
3211 } 3226 }
3212 3227
diff --git a/mm/rmap.c b/mm/rmap.c
index 68af2e32f7ed..79f3bf047f38 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -23,21 +23,22 @@
23 * inode->i_mutex (while writing or truncating, not reading or faulting) 23 * inode->i_mutex (while writing or truncating, not reading or faulting)
24 * mm->mmap_sem 24 * mm->mmap_sem
25 * page->flags PG_locked (lock_page) 25 * page->flags PG_locked (lock_page)
26 * mapping->i_mmap_rwsem 26 * hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share)
27 * anon_vma->rwsem 27 * mapping->i_mmap_rwsem
28 * mm->page_table_lock or pte_lock 28 * anon_vma->rwsem
29 * zone->lru_lock (in mark_page_accessed, isolate_lru_page) 29 * mm->page_table_lock or pte_lock
30 * swap_lock (in swap_duplicate, swap_info_get) 30 * zone->lru_lock (in mark_page_accessed, isolate_lru_page)
31 * mmlist_lock (in mmput, drain_mmlist and others) 31 * swap_lock (in swap_duplicate, swap_info_get)
32 * mapping->private_lock (in __set_page_dirty_buffers) 32 * mmlist_lock (in mmput, drain_mmlist and others)
33 * mem_cgroup_{begin,end}_page_stat (memcg->move_lock) 33 * mapping->private_lock (in __set_page_dirty_buffers)
34 * mapping->tree_lock (widely used) 34 * mem_cgroup_{begin,end}_page_stat (memcg->move_lock)
35 * inode->i_lock (in set_page_dirty's __mark_inode_dirty) 35 * mapping->tree_lock (widely used)
36 * bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty) 36 * inode->i_lock (in set_page_dirty's __mark_inode_dirty)
37 * sb_lock (within inode_lock in fs/fs-writeback.c) 37 * bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
38 * mapping->tree_lock (widely used, in set_page_dirty, 38 * sb_lock (within inode_lock in fs/fs-writeback.c)
39 * in arch-dependent flush_dcache_mmap_lock, 39 * mapping->tree_lock (widely used, in set_page_dirty,
40 * within bdi.wb->list_lock in __sync_single_inode) 40 * in arch-dependent flush_dcache_mmap_lock,
41 * within bdi.wb->list_lock in __sync_single_inode)
41 * 42 *
42 * anon_vma->rwsem,mapping->i_mutex (memory_failure, collect_procs_anon) 43 * anon_vma->rwsem,mapping->i_mutex (memory_failure, collect_procs_anon)
43 * ->tasklist_lock 44 * ->tasklist_lock