aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph/osd_client.c
diff options
context:
space:
mode:
authorSage Weil <sage@newdream.net>2009-10-08 19:55:47 -0400
committerSage Weil <sage@newdream.net>2009-10-09 14:58:03 -0400
commitc1ea8823be69ebebaface912142190e910711984 (patch)
treed834392aebd65a1d538e23b54b63172ca427f29c /fs/ceph/osd_client.c
parent0656d11ba6ffa3dee0e8916a1903f96185651217 (diff)
ceph: fix osd request submission race
The osd request submission path registers the request, drops and retakes the request_mutex, then sends it to the OSD. A racing kick_requests could sent it during that interval, causing the same msg to be sent twice and BUGing in the msgr. Fix by only sending the message if it hasn't been touched by other threads. Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to 'fs/ceph/osd_client.c')
-rw-r--r--fs/ceph/osd_client.c30
1 files changed, 19 insertions, 11 deletions
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index 978593a4f466..d14019dd6868 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -837,7 +837,8 @@ static void kick_requests(struct ceph_osd_client *osdc,
837 } 837 }
838 838
839kick: 839kick:
840 dout("kicking tid %llu osd%d\n", req->r_tid, req->r_osd->o_osd); 840 dout("kicking %p tid %llu osd%d\n", req, req->r_tid,
841 req->r_osd->o_osd);
841 req->r_flags |= CEPH_OSD_FLAG_RETRY; 842 req->r_flags |= CEPH_OSD_FLAG_RETRY;
842 err = __send_request(osdc, req); 843 err = __send_request(osdc, req);
843 if (err) { 844 if (err) {
@@ -1016,7 +1017,7 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
1016 struct ceph_osd_request *req, 1017 struct ceph_osd_request *req,
1017 bool nofail) 1018 bool nofail)
1018{ 1019{
1019 int rc; 1020 int rc = 0;
1020 1021
1021 req->r_request->pages = req->r_pages; 1022 req->r_request->pages = req->r_pages;
1022 req->r_request->nr_pages = req->r_num_pages; 1023 req->r_request->nr_pages = req->r_num_pages;
@@ -1025,15 +1026,22 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
1025 1026
1026 down_read(&osdc->map_sem); 1027 down_read(&osdc->map_sem);
1027 mutex_lock(&osdc->request_mutex); 1028 mutex_lock(&osdc->request_mutex);
1028 rc = __send_request(osdc, req); 1029 /*
1029 if (rc) { 1030 * a racing kick_requests() may have sent the message for us
1030 if (nofail) { 1031 * while we dropped request_mutex above, so only send now if
1031 dout("osdc_start_request failed send, marking %lld\n", 1032 * the request still han't been touched yet.
1032 req->r_tid); 1033 */
1033 req->r_resend = true; 1034 if (req->r_sent == 0) {
1034 rc = 0; 1035 rc = __send_request(osdc, req);
1035 } else { 1036 if (rc) {
1036 __unregister_request(osdc, req); 1037 if (nofail) {
1038 dout("osdc_start_request failed send, "
1039 " marking %lld\n", req->r_tid);
1040 req->r_resend = true;
1041 rc = 0;
1042 } else {
1043 __unregister_request(osdc, req);
1044 }
1037 } 1045 }
1038 } 1046 }
1039 mutex_unlock(&osdc->request_mutex); 1047 mutex_unlock(&osdc->request_mutex);