diff options
author | Yan, Zheng <zyan@redhat.com> | 2019-05-18 08:39:55 -0400 |
---|---|---|
committer | Ilya Dryomov <idryomov@gmail.com> | 2019-06-05 14:34:39 -0400 |
commit | 3e1d0452edceebb903d23db53201013c940bf000 (patch) | |
tree | 1a6065a9b2fcd52395ae35778cf9de0ad43d0b30 /fs/ceph/caps.c | |
parent | 1cf89a8dee5e6e9d4fcb81b571a54d40068dfbb7 (diff) |
ceph: avoid iput_final() while holding mutex or in dispatch thread
iput_final() may wait for reahahead pages. The wait can cause deadlock.
For example:
Workqueue: ceph-msgr ceph_con_workfn [libceph]
Call Trace:
schedule+0x36/0x80
io_schedule+0x16/0x40
__lock_page+0x101/0x140
truncate_inode_pages_range+0x556/0x9f0
truncate_inode_pages_final+0x4d/0x60
evict+0x182/0x1a0
iput+0x1d2/0x220
iterate_session_caps+0x82/0x230 [ceph]
dispatch+0x678/0xa80 [ceph]
ceph_con_workfn+0x95b/0x1560 [libceph]
process_one_work+0x14d/0x410
worker_thread+0x4b/0x460
kthread+0x105/0x140
ret_from_fork+0x22/0x40
Workqueue: ceph-msgr ceph_con_workfn [libceph]
Call Trace:
__schedule+0x3d6/0x8b0
schedule+0x36/0x80
schedule_preempt_disabled+0xe/0x10
mutex_lock+0x2f/0x40
ceph_check_caps+0x505/0xa80 [ceph]
ceph_put_wrbuffer_cap_refs+0x1e5/0x2c0 [ceph]
writepages_finish+0x2d3/0x410 [ceph]
__complete_request+0x26/0x60 [libceph]
handle_reply+0x6c8/0xa10 [libceph]
dispatch+0x29a/0xbb0 [libceph]
ceph_con_workfn+0x95b/0x1560 [libceph]
process_one_work+0x14d/0x410
worker_thread+0x4b/0x460
kthread+0x105/0x140
ret_from_fork+0x22/0x40
In above example, truncate_inode_pages_range() waits for readahead pages
while holding s_mutex. ceph_check_caps() waits for s_mutex and blocks
OSD dispatch thread. Later OSD replies (for readahead) can't be handled.
ceph_check_caps() also may lock snap_rwsem for read. So similar deadlock
can happen if iput_final() is called while holding snap_rwsem.
In general, it's not good to call iput_final() inside MDS/OSD dispatch
threads or while holding any mutex.
The fix is introducing ceph_async_iput(), which calls iput_final() in
workqueue.
Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Diffstat (limited to 'fs/ceph/caps.c')
-rw-r--r-- | fs/ceph/caps.c | 12 |
1 files changed, 8 insertions, 4 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 72f8e1311392..52a2b90621cd 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -2992,8 +2992,10 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, | |||
2992 | } | 2992 | } |
2993 | if (complete_capsnap) | 2993 | if (complete_capsnap) |
2994 | wake_up_all(&ci->i_cap_wq); | 2994 | wake_up_all(&ci->i_cap_wq); |
2995 | while (put-- > 0) | 2995 | while (put-- > 0) { |
2996 | iput(inode); | 2996 | /* avoid calling iput_final() in osd dispatch threads */ |
2997 | ceph_async_iput(inode); | ||
2998 | } | ||
2997 | } | 2999 | } |
2998 | 3000 | ||
2999 | /* | 3001 | /* |
@@ -3964,8 +3966,9 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
3964 | done: | 3966 | done: |
3965 | mutex_unlock(&session->s_mutex); | 3967 | mutex_unlock(&session->s_mutex); |
3966 | done_unlocked: | 3968 | done_unlocked: |
3967 | iput(inode); | ||
3968 | ceph_put_string(extra_info.pool_ns); | 3969 | ceph_put_string(extra_info.pool_ns); |
3970 | /* avoid calling iput_final() in mds dispatch threads */ | ||
3971 | ceph_async_iput(inode); | ||
3969 | return; | 3972 | return; |
3970 | 3973 | ||
3971 | flush_cap_releases: | 3974 | flush_cap_releases: |
@@ -4011,7 +4014,8 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc) | |||
4011 | if (inode) { | 4014 | if (inode) { |
4012 | dout("check_delayed_caps on %p\n", inode); | 4015 | dout("check_delayed_caps on %p\n", inode); |
4013 | ceph_check_caps(ci, flags, NULL); | 4016 | ceph_check_caps(ci, flags, NULL); |
4014 | iput(inode); | 4017 | /* avoid calling iput_final() in tick thread */ |
4018 | ceph_async_iput(inode); | ||
4015 | } | 4019 | } |
4016 | } | 4020 | } |
4017 | spin_unlock(&mdsc->cap_delay_lock); | 4021 | spin_unlock(&mdsc->cap_delay_lock); |