aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorSage Weil <sage@newdream.net>2011-07-26 14:27:24 -0400
committerSage Weil <sage@newdream.net>2011-07-26 14:27:24 -0400
commit4cf9d544631c92809cb94ea680c71df56e9437aa (patch)
tree419e162dfb25e69ef1a89c56a318ad322cf21053 /net
parent8f04d42276048b3baff5a5d8fa769f433c62b63e (diff)
libceph: don't time out osd requests that haven't been received
Keep track of when an outgoing message is ACKed (i.e., the server fully received it and, presumably, queued it for processing). Time out OSD requests only if it's been too long since they've been received. This prevents timeouts and connection thrashing when the OSDs are simply busy and are throttling the requests they read off the network. Reviewed-by: Yehuda Sadeh <yehuda@hq.newdream.net> Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to 'net')
-rw-r--r--net/ceph/messenger.c12
-rw-r--r--net/ceph/osd_client.c6
2 files changed, 11 insertions, 7 deletions
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 78b55f49de7c..c340e2e0765b 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -486,13 +486,10 @@ static void prepare_write_message(struct ceph_connection *con)
486 m = list_first_entry(&con->out_queue, 486 m = list_first_entry(&con->out_queue,
487 struct ceph_msg, list_head); 487 struct ceph_msg, list_head);
488 con->out_msg = m; 488 con->out_msg = m;
489 if (test_bit(LOSSYTX, &con->state)) { 489
490 list_del_init(&m->list_head); 490 /* put message on sent list */
491 } else { 491 ceph_msg_get(m);
492 /* put message on sent list */ 492 list_move_tail(&m->list_head, &con->out_sent);
493 ceph_msg_get(m);
494 list_move_tail(&m->list_head, &con->out_sent);
495 }
496 493
497 /* 494 /*
498 * only assign outgoing seq # if we haven't sent this message 495 * only assign outgoing seq # if we haven't sent this message
@@ -1399,6 +1396,7 @@ static void process_ack(struct ceph_connection *con)
1399 break; 1396 break;
1400 dout("got ack for seq %llu type %d at %p\n", seq, 1397 dout("got ack for seq %llu type %d at %p\n", seq,
1401 le16_to_cpu(m->hdr.type), m); 1398 le16_to_cpu(m->hdr.type), m);
1399 m->ack_stamp = jiffies;
1402 ceph_msg_remove(m); 1400 ceph_msg_remove(m);
1403 } 1401 }
1404 prepare_read_tag(con); 1402 prepare_read_tag(con);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 7330c2757c0c..ce310eee708d 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1085,9 +1085,15 @@ static void handle_timeout(struct work_struct *work)
1085 req = list_entry(osdc->req_lru.next, struct ceph_osd_request, 1085 req = list_entry(osdc->req_lru.next, struct ceph_osd_request,
1086 r_req_lru_item); 1086 r_req_lru_item);
1087 1087
1088 /* hasn't been long enough since we sent it? */
1088 if (time_before(jiffies, req->r_stamp + timeout)) 1089 if (time_before(jiffies, req->r_stamp + timeout))
1089 break; 1090 break;
1090 1091
1092 /* hasn't been long enough since it was acked? */
1093 if (req->r_request->ack_stamp == 0 ||
1094 time_before(jiffies, req->r_request->ack_stamp + timeout))
1095 break;
1096
1091 BUG_ON(req == last_req && req->r_stamp == last_stamp); 1097 BUG_ON(req == last_req && req->r_stamp == last_stamp);
1092 last_req = req; 1098 last_req = req;
1093 last_stamp = req->r_stamp; 1099 last_stamp = req->r_stamp;