aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
authorSage Weil <sage@newdream.net>2010-03-22 17:51:18 -0400
committerSage Weil <sage@newdream.net>2010-03-23 10:47:01 -0400
commit87b315a5b5cec5d7086494b203577602f5befc8c (patch)
tree18e1638619a51cdf4605bad5b4270841450cf211 /fs/ceph
parent3dd72fc0e6dc49c79fa9e7cd7c654deac7ccaa29 (diff)
ceph: avoid reopening osd connections when address hasn't changed
We get a fault callback on _every_ tcp connection fault. Normally, we want to reopen the connection when that happens. If the address we have is bad, however, and connection attempts always result in a connection refused or similar error, explicitly closing and reopening the msgr connection just prevents the messenger's backoff logic from kicking in. The result can be a console full of [ 3974.417106] ceph: osd11 10.3.14.138:6800 connection failed [ 3974.423295] ceph: osd11 10.3.14.138:6800 connection failed [ 3974.429709] ceph: osd11 10.3.14.138:6800 connection failed Instead, if we get a fault, and have outstanding requests, but the osd address hasn't changed and the connection never successfully connected in the first place, do nothing to the osd connection. The messenger layer will back off and retry periodically, because we never connected and thus the lossy bit is not set. Instead, touch each request's r_stamp so that handle_timeout can tell the request is still alive and kicking. Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/messenger.c8
-rw-r--r--fs/ceph/messenger.h1
-rw-r--r--fs/ceph/osd_client.c15
3 files changed, 23 insertions, 1 deletions
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index 983285540945..a32f0f896d9f 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -366,6 +366,14 @@ void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr)
366} 366}
367 367
368/* 368/*
369 * return true if this connection ever successfully opened
370 */
371bool ceph_con_opened(struct ceph_connection *con)
372{
373 return con->connect_seq > 0;
374}
375
376/*
369 * generic get/put 377 * generic get/put
370 */ 378 */
371struct ceph_connection *ceph_con_get(struct ceph_connection *con) 379struct ceph_connection *ceph_con_get(struct ceph_connection *con)
diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h
index 4caaa5911110..a343dae73cdc 100644
--- a/fs/ceph/messenger.h
+++ b/fs/ceph/messenger.h
@@ -223,6 +223,7 @@ extern void ceph_con_init(struct ceph_messenger *msgr,
223 struct ceph_connection *con); 223 struct ceph_connection *con);
224extern void ceph_con_open(struct ceph_connection *con, 224extern void ceph_con_open(struct ceph_connection *con,
225 struct ceph_entity_addr *addr); 225 struct ceph_entity_addr *addr);
226extern bool ceph_con_opened(struct ceph_connection *con);
226extern void ceph_con_close(struct ceph_connection *con); 227extern void ceph_con_close(struct ceph_connection *con);
227extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg); 228extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg);
228extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg); 229extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg);
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index 04359217ea6c..c7b4dedaace6 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -413,11 +413,22 @@ static void remove_old_osds(struct ceph_osd_client *osdc, int remove_all)
413 */ 413 */
414static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) 414static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
415{ 415{
416 struct ceph_osd_request *req;
416 int ret = 0; 417 int ret = 0;
417 418
418 dout("__reset_osd %p osd%d\n", osd, osd->o_osd); 419 dout("__reset_osd %p osd%d\n", osd, osd->o_osd);
419 if (list_empty(&osd->o_requests)) { 420 if (list_empty(&osd->o_requests)) {
420 __remove_osd(osdc, osd); 421 __remove_osd(osdc, osd);
422 } else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd],
423 &osd->o_con.peer_addr,
424 sizeof(osd->o_con.peer_addr)) == 0 &&
425 !ceph_con_opened(&osd->o_con)) {
426 dout(" osd addr hasn't changed and connection never opened,"
427 " letting msgr retry");
428 /* touch each r_stamp for handle_timeout()'s benfit */
429 list_for_each_entry(req, &osd->o_requests, r_osd_item)
430 req->r_stamp = jiffies;
431 ret = -EAGAIN;
421 } else { 432 } else {
422 ceph_con_close(&osd->o_con); 433 ceph_con_close(&osd->o_con);
423 ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]); 434 ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]);
@@ -862,7 +873,9 @@ static int __kick_requests(struct ceph_osd_client *osdc,
862 873
863 dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1); 874 dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1);
864 if (kickosd) { 875 if (kickosd) {
865 __reset_osd(osdc, kickosd); 876 err = __reset_osd(osdc, kickosd);
877 if (err == -EAGAIN)
878 return 1;
866 } else { 879 } else {
867 for (p = rb_first(&osdc->osds); p; p = n) { 880 for (p = rb_first(&osdc->osds); p; p = n) {
868 struct ceph_osd *osd = 881 struct ceph_osd *osd =