aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2012-11-28 15:28:24 -0500
committerAlex Elder <elder@inktank.com>2012-12-13 09:13:06 -0500
commit83aff95eb9d60aff5497e9f44a2ae906b86d8e88 (patch)
treeb191a3640990e4b7cc13b2db423449b90086f71b
parentcfc84c9f73ab8a6933bd4f36efac1196cddad581 (diff)
libceph: remove 'osdtimeout' option
This would reset a connection with any OSD that had an outstanding request that was taking more than N seconds. The idea was that if the OSD was buggy, the client could compensate by resending the request. In reality, this only served to hide server bugs, and we haven't actually seen such a bug in quite a while. Moreover, the userspace client code never did this. More importantly, often the request is taking a long time because the OSD is trying to recover, or overloaded, and killing the connection and retrying would only make the situation worse by giving the OSD more work to do. Signed-off-by: Sage Weil <sage@inktank.com> Reviewed-by: Alex Elder <elder@inktank.com>
-rw-r--r--fs/ceph/super.c2
-rw-r--r--include/linux/ceph/libceph.h2
-rw-r--r--net/ceph/ceph_common.c3
-rw-r--r--net/ceph/osd_client.c47
4 files changed, 5 insertions, 49 deletions
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 2f586b0e5e0f..fcda1c73a1e5 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -403,8 +403,6 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
403 seq_printf(m, ",mount_timeout=%d", opt->mount_timeout); 403 seq_printf(m, ",mount_timeout=%d", opt->mount_timeout);
404 if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) 404 if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
405 seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl); 405 seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl);
406 if (opt->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT)
407 seq_printf(m, ",osdtimeout=%d", opt->osd_timeout);
408 if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) 406 if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
409 seq_printf(m, ",osdkeepalivetimeout=%d", 407 seq_printf(m, ",osdkeepalivetimeout=%d",
410 opt->osd_keepalive_timeout); 408 opt->osd_keepalive_timeout);
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 42624789b06f..317aff8feb0a 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -43,7 +43,6 @@ struct ceph_options {
43 struct ceph_entity_addr my_addr; 43 struct ceph_entity_addr my_addr;
44 int mount_timeout; 44 int mount_timeout;
45 int osd_idle_ttl; 45 int osd_idle_ttl;
46 int osd_timeout;
47 int osd_keepalive_timeout; 46 int osd_keepalive_timeout;
48 47
49 /* 48 /*
@@ -63,7 +62,6 @@ struct ceph_options {
63 * defaults 62 * defaults
64 */ 63 */
65#define CEPH_MOUNT_TIMEOUT_DEFAULT 60 64#define CEPH_MOUNT_TIMEOUT_DEFAULT 60
66#define CEPH_OSD_TIMEOUT_DEFAULT 60 /* seconds */
67#define CEPH_OSD_KEEPALIVE_DEFAULT 5 65#define CEPH_OSD_KEEPALIVE_DEFAULT 5
68#define CEPH_OSD_IDLE_TTL_DEFAULT 60 66#define CEPH_OSD_IDLE_TTL_DEFAULT 60
69 67
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index a8020293f342..ee71ea26777a 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -305,7 +305,6 @@ ceph_parse_options(char *options, const char *dev_name,
305 305
306 /* start with defaults */ 306 /* start with defaults */
307 opt->flags = CEPH_OPT_DEFAULT; 307 opt->flags = CEPH_OPT_DEFAULT;
308 opt->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT;
309 opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; 308 opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
310 opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ 309 opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */
311 opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */ 310 opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */
@@ -391,7 +390,7 @@ ceph_parse_options(char *options, const char *dev_name,
391 390
392 /* misc */ 391 /* misc */
393 case Opt_osdtimeout: 392 case Opt_osdtimeout:
394 opt->osd_timeout = intval; 393 pr_warning("ignoring deprecated osdtimeout option\n");
395 break; 394 break;
396 case Opt_osdkeepalivetimeout: 395 case Opt_osdkeepalivetimeout:
397 opt->osd_keepalive_timeout = intval; 396 opt->osd_keepalive_timeout = intval;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index ccbdfbba9e53..7ebfe13267e6 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -608,14 +608,6 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc,
608 } 608 }
609} 609}
610 610
611static void kick_osd_requests(struct ceph_osd_client *osdc,
612 struct ceph_osd *kickosd)
613{
614 mutex_lock(&osdc->request_mutex);
615 __kick_osd_requests(osdc, kickosd);
616 mutex_unlock(&osdc->request_mutex);
617}
618
619/* 611/*
620 * If the osd connection drops, we need to resubmit all requests. 612 * If the osd connection drops, we need to resubmit all requests.
621 */ 613 */
@@ -629,7 +621,9 @@ static void osd_reset(struct ceph_connection *con)
629 dout("osd_reset osd%d\n", osd->o_osd); 621 dout("osd_reset osd%d\n", osd->o_osd);
630 osdc = osd->o_osdc; 622 osdc = osd->o_osdc;
631 down_read(&osdc->map_sem); 623 down_read(&osdc->map_sem);
632 kick_osd_requests(osdc, osd); 624 mutex_lock(&osdc->request_mutex);
625 __kick_osd_requests(osdc, osd);
626 mutex_unlock(&osdc->request_mutex);
633 send_queued(osdc); 627 send_queued(osdc);
634 up_read(&osdc->map_sem); 628 up_read(&osdc->map_sem);
635} 629}
@@ -1091,12 +1085,10 @@ static void handle_timeout(struct work_struct *work)
1091{ 1085{
1092 struct ceph_osd_client *osdc = 1086 struct ceph_osd_client *osdc =
1093 container_of(work, struct ceph_osd_client, timeout_work.work); 1087 container_of(work, struct ceph_osd_client, timeout_work.work);
1094 struct ceph_osd_request *req, *last_req = NULL; 1088 struct ceph_osd_request *req;
1095 struct ceph_osd *osd; 1089 struct ceph_osd *osd;
1096 unsigned long timeout = osdc->client->options->osd_timeout * HZ;
1097 unsigned long keepalive = 1090 unsigned long keepalive =
1098 osdc->client->options->osd_keepalive_timeout * HZ; 1091 osdc->client->options->osd_keepalive_timeout * HZ;
1099 unsigned long last_stamp = 0;
1100 struct list_head slow_osds; 1092 struct list_head slow_osds;
1101 dout("timeout\n"); 1093 dout("timeout\n");
1102 down_read(&osdc->map_sem); 1094 down_read(&osdc->map_sem);
@@ -1106,37 +1098,6 @@ static void handle_timeout(struct work_struct *work)
1106 mutex_lock(&osdc->request_mutex); 1098 mutex_lock(&osdc->request_mutex);
1107 1099
1108 /* 1100 /*
1109 * reset osds that appear to be _really_ unresponsive. this
1110 * is a failsafe measure.. we really shouldn't be getting to
1111 * this point if the system is working properly. the monitors
1112 * should mark the osd as failed and we should find out about
1113 * it from an updated osd map.
1114 */
1115 while (timeout && !list_empty(&osdc->req_lru)) {
1116 req = list_entry(osdc->req_lru.next, struct ceph_osd_request,
1117 r_req_lru_item);
1118
1119 /* hasn't been long enough since we sent it? */
1120 if (time_before(jiffies, req->r_stamp + timeout))
1121 break;
1122
1123 /* hasn't been long enough since it was acked? */
1124 if (req->r_request->ack_stamp == 0 ||
1125 time_before(jiffies, req->r_request->ack_stamp + timeout))
1126 break;
1127
1128 BUG_ON(req == last_req && req->r_stamp == last_stamp);
1129 last_req = req;
1130 last_stamp = req->r_stamp;
1131
1132 osd = req->r_osd;
1133 BUG_ON(!osd);
1134 pr_warning(" tid %llu timed out on osd%d, will reset osd\n",
1135 req->r_tid, osd->o_osd);
1136 __kick_osd_requests(osdc, osd);
1137 }
1138
1139 /*
1140 * ping osds that are a bit slow. this ensures that if there 1101 * ping osds that are a bit slow. this ensures that if there
1141 * is a break in the TCP connection we will notice, and reopen 1102 * is a break in the TCP connection we will notice, and reopen
1142 * a connection with that osd (from the fault callback). 1103 * a connection with that osd (from the fault callback).