diff options
author | Yan, Zheng <zyan@redhat.com> | 2019-05-18 08:39:55 -0400 |
---|---|---|
committer | Ilya Dryomov <idryomov@gmail.com> | 2019-06-05 14:34:39 -0400 |
commit | 3e1d0452edceebb903d23db53201013c940bf000 (patch) | |
tree | 1a6065a9b2fcd52395ae35778cf9de0ad43d0b30 /fs/ceph/snap.c | |
parent | 1cf89a8dee5e6e9d4fcb81b571a54d40068dfbb7 (diff) |
ceph: avoid iput_final() while holding mutex or in dispatch thread
iput_final() may wait for reahahead pages. The wait can cause deadlock.
For example:
Workqueue: ceph-msgr ceph_con_workfn [libceph]
Call Trace:
schedule+0x36/0x80
io_schedule+0x16/0x40
__lock_page+0x101/0x140
truncate_inode_pages_range+0x556/0x9f0
truncate_inode_pages_final+0x4d/0x60
evict+0x182/0x1a0
iput+0x1d2/0x220
iterate_session_caps+0x82/0x230 [ceph]
dispatch+0x678/0xa80 [ceph]
ceph_con_workfn+0x95b/0x1560 [libceph]
process_one_work+0x14d/0x410
worker_thread+0x4b/0x460
kthread+0x105/0x140
ret_from_fork+0x22/0x40
Workqueue: ceph-msgr ceph_con_workfn [libceph]
Call Trace:
__schedule+0x3d6/0x8b0
schedule+0x36/0x80
schedule_preempt_disabled+0xe/0x10
mutex_lock+0x2f/0x40
ceph_check_caps+0x505/0xa80 [ceph]
ceph_put_wrbuffer_cap_refs+0x1e5/0x2c0 [ceph]
writepages_finish+0x2d3/0x410 [ceph]
__complete_request+0x26/0x60 [libceph]
handle_reply+0x6c8/0xa10 [libceph]
dispatch+0x29a/0xbb0 [libceph]
ceph_con_workfn+0x95b/0x1560 [libceph]
process_one_work+0x14d/0x410
worker_thread+0x4b/0x460
kthread+0x105/0x140
ret_from_fork+0x22/0x40
In above example, truncate_inode_pages_range() waits for readahead pages
while holding s_mutex. ceph_check_caps() waits for s_mutex and blocks
OSD dispatch thread. Later OSD replies (for readahead) can't be handled.
ceph_check_caps() also may lock snap_rwsem for read. So similar deadlock
can happen if iput_final() is called while holding snap_rwsem.
In general, it's not good to call iput_final() inside MDS/OSD dispatch
threads or while holding any mutex.
The fix is introducing ceph_async_iput(), which calls iput_final() in
workqueue.
Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Diffstat (limited to 'fs/ceph/snap.c')
-rw-r--r-- | fs/ceph/snap.c | 16 |
1 files changed, 11 insertions, 5 deletions
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index b26e12cd8ec3..72c6c022f02b 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c | |||
@@ -648,13 +648,15 @@ static void queue_realm_cap_snaps(struct ceph_snap_realm *realm) | |||
648 | if (!inode) | 648 | if (!inode) |
649 | continue; | 649 | continue; |
650 | spin_unlock(&realm->inodes_with_caps_lock); | 650 | spin_unlock(&realm->inodes_with_caps_lock); |
651 | iput(lastinode); | 651 | /* avoid calling iput_final() while holding |
652 | * mdsc->snap_rwsem or in mds dispatch threads */ | ||
653 | ceph_async_iput(lastinode); | ||
652 | lastinode = inode; | 654 | lastinode = inode; |
653 | ceph_queue_cap_snap(ci); | 655 | ceph_queue_cap_snap(ci); |
654 | spin_lock(&realm->inodes_with_caps_lock); | 656 | spin_lock(&realm->inodes_with_caps_lock); |
655 | } | 657 | } |
656 | spin_unlock(&realm->inodes_with_caps_lock); | 658 | spin_unlock(&realm->inodes_with_caps_lock); |
657 | iput(lastinode); | 659 | ceph_async_iput(lastinode); |
658 | 660 | ||
659 | dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino); | 661 | dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino); |
660 | } | 662 | } |
@@ -806,7 +808,9 @@ static void flush_snaps(struct ceph_mds_client *mdsc) | |||
806 | ihold(inode); | 808 | ihold(inode); |
807 | spin_unlock(&mdsc->snap_flush_lock); | 809 | spin_unlock(&mdsc->snap_flush_lock); |
808 | ceph_flush_snaps(ci, &session); | 810 | ceph_flush_snaps(ci, &session); |
809 | iput(inode); | 811 | /* avoid calling iput_final() while holding |
812 | * session->s_mutex or in mds dispatch threads */ | ||
813 | ceph_async_iput(inode); | ||
810 | spin_lock(&mdsc->snap_flush_lock); | 814 | spin_lock(&mdsc->snap_flush_lock); |
811 | } | 815 | } |
812 | spin_unlock(&mdsc->snap_flush_lock); | 816 | spin_unlock(&mdsc->snap_flush_lock); |
@@ -950,12 +954,14 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, | |||
950 | ceph_get_snap_realm(mdsc, realm); | 954 | ceph_get_snap_realm(mdsc, realm); |
951 | ceph_put_snap_realm(mdsc, oldrealm); | 955 | ceph_put_snap_realm(mdsc, oldrealm); |
952 | 956 | ||
953 | iput(inode); | 957 | /* avoid calling iput_final() while holding |
958 | * mdsc->snap_rwsem or mds in dispatch threads */ | ||
959 | ceph_async_iput(inode); | ||
954 | continue; | 960 | continue; |
955 | 961 | ||
956 | skip_inode: | 962 | skip_inode: |
957 | spin_unlock(&ci->i_ceph_lock); | 963 | spin_unlock(&ci->i_ceph_lock); |
958 | iput(inode); | 964 | ceph_async_iput(inode); |
959 | } | 965 | } |
960 | 966 | ||
961 | /* we may have taken some of the old realm's children. */ | 967 | /* we may have taken some of the old realm's children. */ |