aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIlya Dryomov <idryomov@gmail.com>2018-05-21 10:00:29 -0400
committerIlya Dryomov <idryomov@gmail.com>2018-06-04 14:45:58 -0400
commit88bc1922c273c95e84a8955e657401f9bc63a80b (patch)
tree4f5f0e45685fc055e2ed6643ed62804b088291de
parent26df726bcdfacf69335de716e7b78c517bc1df65 (diff)
libceph: defer __complete_request() to a workqueue
In the common case, req->r_callback is called by handle_reply() on the ceph-msgr worker thread without any locks. If handle_reply() fails, it is called with both osd->lock and osdc->lock. In the map check case, it is called with just osdc->lock but held for write. Finally, if the request is aborted because of -ENOSPC or by ceph_osdc_abort_requests(), it is called directly on the submitter's thread, again with both locks. req->r_callback on the submitter's thread is relatively new (introduced in 4.12) and ripe for deadlocks -- e.g. writeback worker thread waiting on itself: inode_wait_for_writeback+0x26/0x40 evict+0xb5/0x1a0 iput+0x1d2/0x220 ceph_put_wrbuffer_cap_refs+0xe0/0x2c0 [ceph] writepages_finish+0x2d3/0x410 [ceph] __complete_request+0x26/0x60 [libceph] complete_request+0x2e/0x70 [libceph] __submit_request+0x256/0x330 [libceph] submit_request+0x2b/0x30 [libceph] ceph_osdc_start_request+0x25/0x40 [libceph] ceph_writepages_start+0xdfe/0x1320 [ceph] do_writepages+0x1f/0x70 __writeback_single_inode+0x45/0x330 writeback_sb_inodes+0x26a/0x600 __writeback_inodes_wb+0x92/0xc0 wb_writeback+0x274/0x330 wb_workfn+0x2d5/0x3b0 Defer __complete_request() to a workqueue in all failure cases so it's never on the same thread as ceph_osdc_start_request() and always called with no locks held. Link: http://tracker.ceph.com/issues/23978 Signed-off-by: Ilya Dryomov <idryomov@gmail.com> Acked-by: Jeff Layton <jlayton@redhat.com> Reviewed-by: "Yan, Zheng" <zyan@redhat.com>
-rw-r--r--include/linux/ceph/osd_client.h2
-rw-r--r--net/ceph/osd_client.c19
2 files changed, 20 insertions, 1 deletions
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 874c31c01f80..d4191bde95a4 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -170,6 +170,7 @@ struct ceph_osd_request {
170 u64 r_tid; /* unique for this client */ 170 u64 r_tid; /* unique for this client */
171 struct rb_node r_node; 171 struct rb_node r_node;
172 struct rb_node r_mc_node; /* map check */ 172 struct rb_node r_mc_node; /* map check */
173 struct work_struct r_complete_work;
173 struct ceph_osd *r_osd; 174 struct ceph_osd *r_osd;
174 175
175 struct ceph_osd_request_target r_t; 176 struct ceph_osd_request_target r_t;
@@ -360,6 +361,7 @@ struct ceph_osd_client {
360 struct ceph_msgpool msgpool_op_reply; 361 struct ceph_msgpool msgpool_op_reply;
361 362
362 struct workqueue_struct *notify_wq; 363 struct workqueue_struct *notify_wq;
364 struct workqueue_struct *completion_wq;
363}; 365};
364 366
365static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag) 367static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag)
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index a78f578a2da7..a4c12c37aa90 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -2329,6 +2329,14 @@ static void __complete_request(struct ceph_osd_request *req)
2329 ceph_osdc_put_request(req); 2329 ceph_osdc_put_request(req);
2330} 2330}
2331 2331
2332static void complete_request_workfn(struct work_struct *work)
2333{
2334 struct ceph_osd_request *req =
2335 container_of(work, struct ceph_osd_request, r_complete_work);
2336
2337 __complete_request(req);
2338}
2339
2332/* 2340/*
2333 * This is open-coded in handle_reply(). 2341 * This is open-coded in handle_reply().
2334 */ 2342 */
@@ -2338,7 +2346,9 @@ static void complete_request(struct ceph_osd_request *req, int err)
2338 2346
2339 req->r_result = err; 2347 req->r_result = err;
2340 finish_request(req); 2348 finish_request(req);
2341 __complete_request(req); 2349
2350 INIT_WORK(&req->r_complete_work, complete_request_workfn);
2351 queue_work(req->r_osdc->completion_wq, &req->r_complete_work);
2342} 2352}
2343 2353
2344static void cancel_map_check(struct ceph_osd_request *req) 2354static void cancel_map_check(struct ceph_osd_request *req)
@@ -5058,6 +5068,10 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
5058 if (!osdc->notify_wq) 5068 if (!osdc->notify_wq)
5059 goto out_msgpool_reply; 5069 goto out_msgpool_reply;
5060 5070
5071 osdc->completion_wq = create_singlethread_workqueue("ceph-completion");
5072 if (!osdc->completion_wq)
5073 goto out_notify_wq;
5074
5061 schedule_delayed_work(&osdc->timeout_work, 5075 schedule_delayed_work(&osdc->timeout_work,
5062 osdc->client->options->osd_keepalive_timeout); 5076 osdc->client->options->osd_keepalive_timeout);
5063 schedule_delayed_work(&osdc->osds_timeout_work, 5077 schedule_delayed_work(&osdc->osds_timeout_work,
@@ -5065,6 +5079,8 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
5065 5079
5066 return 0; 5080 return 0;
5067 5081
5082out_notify_wq:
5083 destroy_workqueue(osdc->notify_wq);
5068out_msgpool_reply: 5084out_msgpool_reply:
5069 ceph_msgpool_destroy(&osdc->msgpool_op_reply); 5085 ceph_msgpool_destroy(&osdc->msgpool_op_reply);
5070out_msgpool: 5086out_msgpool:
@@ -5079,6 +5095,7 @@ out:
5079 5095
5080void ceph_osdc_stop(struct ceph_osd_client *osdc) 5096void ceph_osdc_stop(struct ceph_osd_client *osdc)
5081{ 5097{
5098 destroy_workqueue(osdc->completion_wq);
5082 destroy_workqueue(osdc->notify_wq); 5099 destroy_workqueue(osdc->notify_wq);
5083 cancel_delayed_work_sync(&osdc->timeout_work); 5100 cancel_delayed_work_sync(&osdc->timeout_work);
5084 cancel_delayed_work_sync(&osdc->osds_timeout_work); 5101 cancel_delayed_work_sync(&osdc->osds_timeout_work);