diff options
author | Ilya Dryomov <idryomov@gmail.com> | 2018-05-21 10:00:29 -0400 |
---|---|---|
committer | Ilya Dryomov <idryomov@gmail.com> | 2018-06-04 14:45:58 -0400 |
commit | 88bc1922c273c95e84a8955e657401f9bc63a80b (patch) | |
tree | 4f5f0e45685fc055e2ed6643ed62804b088291de | |
parent | 26df726bcdfacf69335de716e7b78c517bc1df65 (diff) |
libceph: defer __complete_request() to a workqueue
In the common case, req->r_callback is called by handle_reply() on the
ceph-msgr worker thread without any locks. If handle_reply() fails, it
is called with both osd->lock and osdc->lock. In the map check case,
it is called with just osdc->lock but held for write. Finally, if the
request is aborted because of -ENOSPC or by ceph_osdc_abort_requests(),
it is called directly on the submitter's thread, again with both locks.
req->r_callback on the submitter's thread is relatively new (introduced
in 4.12) and ripe for deadlocks -- e.g. writeback worker thread waiting
on itself:
inode_wait_for_writeback+0x26/0x40
evict+0xb5/0x1a0
iput+0x1d2/0x220
ceph_put_wrbuffer_cap_refs+0xe0/0x2c0 [ceph]
writepages_finish+0x2d3/0x410 [ceph]
__complete_request+0x26/0x60 [libceph]
complete_request+0x2e/0x70 [libceph]
__submit_request+0x256/0x330 [libceph]
submit_request+0x2b/0x30 [libceph]
ceph_osdc_start_request+0x25/0x40 [libceph]
ceph_writepages_start+0xdfe/0x1320 [ceph]
do_writepages+0x1f/0x70
__writeback_single_inode+0x45/0x330
writeback_sb_inodes+0x26a/0x600
__writeback_inodes_wb+0x92/0xc0
wb_writeback+0x274/0x330
wb_workfn+0x2d5/0x3b0
Defer __complete_request() to a workqueue in all failure cases so it's
never on the same thread as ceph_osdc_start_request() and always called
with no locks held.
Link: http://tracker.ceph.com/issues/23978
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: "Yan, Zheng" <zyan@redhat.com>
-rw-r--r-- | include/linux/ceph/osd_client.h | 2 | ||||
-rw-r--r-- | net/ceph/osd_client.c | 19 |
2 files changed, 20 insertions, 1 deletions
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 874c31c01f80..d4191bde95a4 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h | |||
@@ -170,6 +170,7 @@ struct ceph_osd_request { | |||
170 | u64 r_tid; /* unique for this client */ | 170 | u64 r_tid; /* unique for this client */ |
171 | struct rb_node r_node; | 171 | struct rb_node r_node; |
172 | struct rb_node r_mc_node; /* map check */ | 172 | struct rb_node r_mc_node; /* map check */ |
173 | struct work_struct r_complete_work; | ||
173 | struct ceph_osd *r_osd; | 174 | struct ceph_osd *r_osd; |
174 | 175 | ||
175 | struct ceph_osd_request_target r_t; | 176 | struct ceph_osd_request_target r_t; |
@@ -360,6 +361,7 @@ struct ceph_osd_client { | |||
360 | struct ceph_msgpool msgpool_op_reply; | 361 | struct ceph_msgpool msgpool_op_reply; |
361 | 362 | ||
362 | struct workqueue_struct *notify_wq; | 363 | struct workqueue_struct *notify_wq; |
364 | struct workqueue_struct *completion_wq; | ||
363 | }; | 365 | }; |
364 | 366 | ||
365 | static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag) | 367 | static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag) |
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index a78f578a2da7..a4c12c37aa90 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c | |||
@@ -2329,6 +2329,14 @@ static void __complete_request(struct ceph_osd_request *req) | |||
2329 | ceph_osdc_put_request(req); | 2329 | ceph_osdc_put_request(req); |
2330 | } | 2330 | } |
2331 | 2331 | ||
2332 | static void complete_request_workfn(struct work_struct *work) | ||
2333 | { | ||
2334 | struct ceph_osd_request *req = | ||
2335 | container_of(work, struct ceph_osd_request, r_complete_work); | ||
2336 | |||
2337 | __complete_request(req); | ||
2338 | } | ||
2339 | |||
2332 | /* | 2340 | /* |
2333 | * This is open-coded in handle_reply(). | 2341 | * This is open-coded in handle_reply(). |
2334 | */ | 2342 | */ |
@@ -2338,7 +2346,9 @@ static void complete_request(struct ceph_osd_request *req, int err) | |||
2338 | 2346 | ||
2339 | req->r_result = err; | 2347 | req->r_result = err; |
2340 | finish_request(req); | 2348 | finish_request(req); |
2341 | __complete_request(req); | 2349 | |
2350 | INIT_WORK(&req->r_complete_work, complete_request_workfn); | ||
2351 | queue_work(req->r_osdc->completion_wq, &req->r_complete_work); | ||
2342 | } | 2352 | } |
2343 | 2353 | ||
2344 | static void cancel_map_check(struct ceph_osd_request *req) | 2354 | static void cancel_map_check(struct ceph_osd_request *req) |
@@ -5058,6 +5068,10 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) | |||
5058 | if (!osdc->notify_wq) | 5068 | if (!osdc->notify_wq) |
5059 | goto out_msgpool_reply; | 5069 | goto out_msgpool_reply; |
5060 | 5070 | ||
5071 | osdc->completion_wq = create_singlethread_workqueue("ceph-completion"); | ||
5072 | if (!osdc->completion_wq) | ||
5073 | goto out_notify_wq; | ||
5074 | |||
5061 | schedule_delayed_work(&osdc->timeout_work, | 5075 | schedule_delayed_work(&osdc->timeout_work, |
5062 | osdc->client->options->osd_keepalive_timeout); | 5076 | osdc->client->options->osd_keepalive_timeout); |
5063 | schedule_delayed_work(&osdc->osds_timeout_work, | 5077 | schedule_delayed_work(&osdc->osds_timeout_work, |
@@ -5065,6 +5079,8 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) | |||
5065 | 5079 | ||
5066 | return 0; | 5080 | return 0; |
5067 | 5081 | ||
5082 | out_notify_wq: | ||
5083 | destroy_workqueue(osdc->notify_wq); | ||
5068 | out_msgpool_reply: | 5084 | out_msgpool_reply: |
5069 | ceph_msgpool_destroy(&osdc->msgpool_op_reply); | 5085 | ceph_msgpool_destroy(&osdc->msgpool_op_reply); |
5070 | out_msgpool: | 5086 | out_msgpool: |
@@ -5079,6 +5095,7 @@ out: | |||
5079 | 5095 | ||
5080 | void ceph_osdc_stop(struct ceph_osd_client *osdc) | 5096 | void ceph_osdc_stop(struct ceph_osd_client *osdc) |
5081 | { | 5097 | { |
5098 | destroy_workqueue(osdc->completion_wq); | ||
5082 | destroy_workqueue(osdc->notify_wq); | 5099 | destroy_workqueue(osdc->notify_wq); |
5083 | cancel_delayed_work_sync(&osdc->timeout_work); | 5100 | cancel_delayed_work_sync(&osdc->timeout_work); |
5084 | cancel_delayed_work_sync(&osdc->osds_timeout_work); | 5101 | cancel_delayed_work_sync(&osdc->osds_timeout_work); |