summaryrefslogtreecommitdiffstats
path: root/fs/ceph/snap.c
diff options
context:
space:
mode:
authorYan, Zheng <zyan@redhat.com>2019-05-18 08:39:55 -0400
committerIlya Dryomov <idryomov@gmail.com>2019-06-05 14:34:39 -0400
commit3e1d0452edceebb903d23db53201013c940bf000 (patch)
tree1a6065a9b2fcd52395ae35778cf9de0ad43d0b30 /fs/ceph/snap.c
parent1cf89a8dee5e6e9d4fcb81b571a54d40068dfbb7 (diff)
ceph: avoid iput_final() while holding mutex or in dispatch thread
iput_final() may wait for reahahead pages. The wait can cause deadlock. For example: Workqueue: ceph-msgr ceph_con_workfn [libceph] Call Trace: schedule+0x36/0x80 io_schedule+0x16/0x40 __lock_page+0x101/0x140 truncate_inode_pages_range+0x556/0x9f0 truncate_inode_pages_final+0x4d/0x60 evict+0x182/0x1a0 iput+0x1d2/0x220 iterate_session_caps+0x82/0x230 [ceph] dispatch+0x678/0xa80 [ceph] ceph_con_workfn+0x95b/0x1560 [libceph] process_one_work+0x14d/0x410 worker_thread+0x4b/0x460 kthread+0x105/0x140 ret_from_fork+0x22/0x40 Workqueue: ceph-msgr ceph_con_workfn [libceph] Call Trace: __schedule+0x3d6/0x8b0 schedule+0x36/0x80 schedule_preempt_disabled+0xe/0x10 mutex_lock+0x2f/0x40 ceph_check_caps+0x505/0xa80 [ceph] ceph_put_wrbuffer_cap_refs+0x1e5/0x2c0 [ceph] writepages_finish+0x2d3/0x410 [ceph] __complete_request+0x26/0x60 [libceph] handle_reply+0x6c8/0xa10 [libceph] dispatch+0x29a/0xbb0 [libceph] ceph_con_workfn+0x95b/0x1560 [libceph] process_one_work+0x14d/0x410 worker_thread+0x4b/0x460 kthread+0x105/0x140 ret_from_fork+0x22/0x40 In above example, truncate_inode_pages_range() waits for readahead pages while holding s_mutex. ceph_check_caps() waits for s_mutex and blocks OSD dispatch thread. Later OSD replies (for readahead) can't be handled. ceph_check_caps() also may lock snap_rwsem for read. So similar deadlock can happen if iput_final() is called while holding snap_rwsem. In general, it's not good to call iput_final() inside MDS/OSD dispatch threads or while holding any mutex. The fix is introducing ceph_async_iput(), which calls iput_final() in workqueue. Signed-off-by: "Yan, Zheng" <zyan@redhat.com> Reviewed-by: Jeff Layton <jlayton@redhat.com> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Diffstat (limited to 'fs/ceph/snap.c')
-rw-r--r--fs/ceph/snap.c16
1 files changed, 11 insertions, 5 deletions
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index b26e12cd8ec3..72c6c022f02b 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -648,13 +648,15 @@ static void queue_realm_cap_snaps(struct ceph_snap_realm *realm)
648 if (!inode) 648 if (!inode)
649 continue; 649 continue;
650 spin_unlock(&realm->inodes_with_caps_lock); 650 spin_unlock(&realm->inodes_with_caps_lock);
651 iput(lastinode); 651 /* avoid calling iput_final() while holding
652 * mdsc->snap_rwsem or in mds dispatch threads */
653 ceph_async_iput(lastinode);
652 lastinode = inode; 654 lastinode = inode;
653 ceph_queue_cap_snap(ci); 655 ceph_queue_cap_snap(ci);
654 spin_lock(&realm->inodes_with_caps_lock); 656 spin_lock(&realm->inodes_with_caps_lock);
655 } 657 }
656 spin_unlock(&realm->inodes_with_caps_lock); 658 spin_unlock(&realm->inodes_with_caps_lock);
657 iput(lastinode); 659 ceph_async_iput(lastinode);
658 660
659 dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino); 661 dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino);
660} 662}
@@ -806,7 +808,9 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
806 ihold(inode); 808 ihold(inode);
807 spin_unlock(&mdsc->snap_flush_lock); 809 spin_unlock(&mdsc->snap_flush_lock);
808 ceph_flush_snaps(ci, &session); 810 ceph_flush_snaps(ci, &session);
809 iput(inode); 811 /* avoid calling iput_final() while holding
812 * session->s_mutex or in mds dispatch threads */
813 ceph_async_iput(inode);
810 spin_lock(&mdsc->snap_flush_lock); 814 spin_lock(&mdsc->snap_flush_lock);
811 } 815 }
812 spin_unlock(&mdsc->snap_flush_lock); 816 spin_unlock(&mdsc->snap_flush_lock);
@@ -950,12 +954,14 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
950 ceph_get_snap_realm(mdsc, realm); 954 ceph_get_snap_realm(mdsc, realm);
951 ceph_put_snap_realm(mdsc, oldrealm); 955 ceph_put_snap_realm(mdsc, oldrealm);
952 956
953 iput(inode); 957 /* avoid calling iput_final() while holding
958 * mdsc->snap_rwsem or mds in dispatch threads */
959 ceph_async_iput(inode);
954 continue; 960 continue;
955 961
956skip_inode: 962skip_inode:
957 spin_unlock(&ci->i_ceph_lock); 963 spin_unlock(&ci->i_ceph_lock);
958 iput(inode); 964 ceph_async_iput(inode);
959 } 965 }
960 966
961 /* we may have taken some of the old realm's children. */ 967 /* we may have taken some of the old realm's children. */