aboutsummaryrefslogtreecommitdiffstats
path: root/net/ceph/osd_client.c
diff options
context:
space:
mode:
authorAlex Elder <elder@inktank.com>2012-12-19 16:52:36 -0500
committerAlex Elder <elder@inktank.com>2012-12-27 21:27:04 -0500
commitab60b16d3c31b9bd9fd5b39f97dc42c52a50b67d (patch)
treef5ac3a40c46f6587ddfc3c98bc4a12d299fd6e36 /net/ceph/osd_client.c
parentc3e946ce7276faf0b302acd25c7b874edbeba661 (diff)
libceph: move linger requests sooner in kick_requests()
The kick_requests() function is called by ceph_osdc_handle_map() when an osd map change has been indicated. Its purpose is to re-queue any request whose target osd is different from what it was when it was originally sent. It is structured as two loops, one for incomplete but registered requests, and a second for handling completed linger requests. As a special case, in the first loop if a request marked to linger has not yet completed, it is moved from the request list to the linger list. This is as a quick and dirty way to have the second loop handle sending the request along with all the other linger requests. Because of the way it's done now, however, this quick and dirty solution can result in these incomplete linger requests never getting re-sent as desired. The problem lies in the fact that the second loop only arranges for a linger request to be sent if it appears its target osd has changed. This is the proper handling for *completed* linger requests (it avoids issuing the same linger request twice to the same osd). But although the linger requests added to the list in the first loop may have been sent, they have not yet completed, so they need to be re-sent regardless of whether their target osd has changed. The first required fix is we need to avoid calling __map_request() on any incomplete linger request. Otherwise the subsequent __map_request() call in the second loop will find the target osd has not changed and will therefore not re-send the request. Second, we need to be sure that a sent but incomplete linger request gets re-sent. If the target osd is the same with the new osd map as it was when the request was originally sent, this won't happen. This can be fixed through careful handling when we move these requests from the request list to the linger list, by unregistering the request *before* it is registered as a linger request. This works because a side-effect of unregistering the request is to make the request's r_osd pointer be NULL, and *that* will ensure the second loop actually re-sends the linger request. Processing of such a request is done at that point, so continue with the next one once it's been moved. Signed-off-by: Alex Elder <elder@inktank.com> Reviewed-by: Sage Weil <sage@inktank.com>
Diffstat (limited to 'net/ceph/osd_client.c')
-rw-r--r--net/ceph/osd_client.c30
1 files changed, 19 insertions, 11 deletions
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 780caf6b0491..0174c04edac0 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1284,6 +1284,24 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
1284 for (p = rb_first(&osdc->requests); p; ) { 1284 for (p = rb_first(&osdc->requests); p; ) {
1285 req = rb_entry(p, struct ceph_osd_request, r_node); 1285 req = rb_entry(p, struct ceph_osd_request, r_node);
1286 p = rb_next(p); 1286 p = rb_next(p);
1287
1288 /*
1289 * For linger requests that have not yet been
1290 * registered, move them to the linger list; they'll
1291 * be sent to the osd in the loop below. Unregister
1292 * the request before re-registering it as a linger
1293 * request to ensure the __map_request() below
1294 * will decide it needs to be sent.
1295 */
1296 if (req->r_linger && list_empty(&req->r_linger_item)) {
1297 dout("%p tid %llu restart on osd%d\n",
1298 req, req->r_tid,
1299 req->r_osd ? req->r_osd->o_osd : -1);
1300 __unregister_request(osdc, req);
1301 __register_linger_request(osdc, req);
1302 continue;
1303 }
1304
1287 err = __map_request(osdc, req, force_resend); 1305 err = __map_request(osdc, req, force_resend);
1288 if (err < 0) 1306 if (err < 0)
1289 continue; /* error */ 1307 continue; /* error */
@@ -1298,17 +1316,6 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
1298 req->r_flags |= CEPH_OSD_FLAG_RETRY; 1316 req->r_flags |= CEPH_OSD_FLAG_RETRY;
1299 } 1317 }
1300 } 1318 }
1301 if (req->r_linger && list_empty(&req->r_linger_item)) {
1302 /*
1303 * register as a linger so that we will
1304 * re-submit below and get a new tid
1305 */
1306 dout("%p tid %llu restart on osd%d\n",
1307 req, req->r_tid,
1308 req->r_osd ? req->r_osd->o_osd : -1);
1309 __register_linger_request(osdc, req);
1310 __unregister_request(osdc, req);
1311 }
1312 } 1319 }
1313 1320
1314 list_for_each_entry_safe(req, nreq, &osdc->req_linger, 1321 list_for_each_entry_safe(req, nreq, &osdc->req_linger,
@@ -1316,6 +1323,7 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
1316 dout("linger req=%p req->r_osd=%p\n", req, req->r_osd); 1323 dout("linger req=%p req->r_osd=%p\n", req, req->r_osd);
1317 1324
1318 err = __map_request(osdc, req, force_resend); 1325 err = __map_request(osdc, req, force_resend);
1326 dout("__map_request returned %d\n", err);
1319 if (err == 0) 1327 if (err == 0)
1320 continue; /* no change and no osd was specified */ 1328 continue; /* no change and no osd was specified */
1321 if (err < 0) 1329 if (err < 0)