diff options
author | Sage Weil <sage@inktank.com> | 2012-11-28 15:28:24 -0500 |
---|---|---|
committer | Alex Elder <elder@inktank.com> | 2012-12-13 09:13:06 -0500 |
commit | 83aff95eb9d60aff5497e9f44a2ae906b86d8e88 (patch) | |
tree | b191a3640990e4b7cc13b2db423449b90086f71b /net | |
parent | cfc84c9f73ab8a6933bd4f36efac1196cddad581 (diff) |
libceph: remove 'osdtimeout' option
This would reset a connection with any OSD that had an outstanding
request that was taking more than N seconds. The idea was that if the
OSD was buggy, the client could compensate by resending the request.
In reality, this only served to hide server bugs, and we haven't
actually seen such a bug in quite a while. Moreover, the userspace
client code never did this.
More importantly, often the request is taking a long time because the
OSD is trying to recover, or overloaded, and killing the connection
and retrying would only make the situation worse by giving the OSD
more work to do.
Signed-off-by: Sage Weil <sage@inktank.com>
Reviewed-by: Alex Elder <elder@inktank.com>
Diffstat (limited to 'net')
-rw-r--r-- | net/ceph/ceph_common.c | 3 | ||||
-rw-r--r-- | net/ceph/osd_client.c | 47 |
2 files changed, 5 insertions, 45 deletions
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index a8020293f342..ee71ea26777a 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c | |||
@@ -305,7 +305,6 @@ ceph_parse_options(char *options, const char *dev_name, | |||
305 | 305 | ||
306 | /* start with defaults */ | 306 | /* start with defaults */ |
307 | opt->flags = CEPH_OPT_DEFAULT; | 307 | opt->flags = CEPH_OPT_DEFAULT; |
308 | opt->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT; | ||
309 | opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; | 308 | opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; |
310 | opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ | 309 | opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ |
311 | opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */ | 310 | opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */ |
@@ -391,7 +390,7 @@ ceph_parse_options(char *options, const char *dev_name, | |||
391 | 390 | ||
392 | /* misc */ | 391 | /* misc */ |
393 | case Opt_osdtimeout: | 392 | case Opt_osdtimeout: |
394 | opt->osd_timeout = intval; | 393 | pr_warning("ignoring deprecated osdtimeout option\n"); |
395 | break; | 394 | break; |
396 | case Opt_osdkeepalivetimeout: | 395 | case Opt_osdkeepalivetimeout: |
397 | opt->osd_keepalive_timeout = intval; | 396 | opt->osd_keepalive_timeout = intval; |
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index ccbdfbba9e53..7ebfe13267e6 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c | |||
@@ -608,14 +608,6 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc, | |||
608 | } | 608 | } |
609 | } | 609 | } |
610 | 610 | ||
611 | static void kick_osd_requests(struct ceph_osd_client *osdc, | ||
612 | struct ceph_osd *kickosd) | ||
613 | { | ||
614 | mutex_lock(&osdc->request_mutex); | ||
615 | __kick_osd_requests(osdc, kickosd); | ||
616 | mutex_unlock(&osdc->request_mutex); | ||
617 | } | ||
618 | |||
619 | /* | 611 | /* |
620 | * If the osd connection drops, we need to resubmit all requests. | 612 | * If the osd connection drops, we need to resubmit all requests. |
621 | */ | 613 | */ |
@@ -629,7 +621,9 @@ static void osd_reset(struct ceph_connection *con) | |||
629 | dout("osd_reset osd%d\n", osd->o_osd); | 621 | dout("osd_reset osd%d\n", osd->o_osd); |
630 | osdc = osd->o_osdc; | 622 | osdc = osd->o_osdc; |
631 | down_read(&osdc->map_sem); | 623 | down_read(&osdc->map_sem); |
632 | kick_osd_requests(osdc, osd); | 624 | mutex_lock(&osdc->request_mutex); |
625 | __kick_osd_requests(osdc, osd); | ||
626 | mutex_unlock(&osdc->request_mutex); | ||
633 | send_queued(osdc); | 627 | send_queued(osdc); |
634 | up_read(&osdc->map_sem); | 628 | up_read(&osdc->map_sem); |
635 | } | 629 | } |
@@ -1091,12 +1085,10 @@ static void handle_timeout(struct work_struct *work) | |||
1091 | { | 1085 | { |
1092 | struct ceph_osd_client *osdc = | 1086 | struct ceph_osd_client *osdc = |
1093 | container_of(work, struct ceph_osd_client, timeout_work.work); | 1087 | container_of(work, struct ceph_osd_client, timeout_work.work); |
1094 | struct ceph_osd_request *req, *last_req = NULL; | 1088 | struct ceph_osd_request *req; |
1095 | struct ceph_osd *osd; | 1089 | struct ceph_osd *osd; |
1096 | unsigned long timeout = osdc->client->options->osd_timeout * HZ; | ||
1097 | unsigned long keepalive = | 1090 | unsigned long keepalive = |
1098 | osdc->client->options->osd_keepalive_timeout * HZ; | 1091 | osdc->client->options->osd_keepalive_timeout * HZ; |
1099 | unsigned long last_stamp = 0; | ||
1100 | struct list_head slow_osds; | 1092 | struct list_head slow_osds; |
1101 | dout("timeout\n"); | 1093 | dout("timeout\n"); |
1102 | down_read(&osdc->map_sem); | 1094 | down_read(&osdc->map_sem); |
@@ -1106,37 +1098,6 @@ static void handle_timeout(struct work_struct *work) | |||
1106 | mutex_lock(&osdc->request_mutex); | 1098 | mutex_lock(&osdc->request_mutex); |
1107 | 1099 | ||
1108 | /* | 1100 | /* |
1109 | * reset osds that appear to be _really_ unresponsive. this | ||
1110 | * is a failsafe measure.. we really shouldn't be getting to | ||
1111 | * this point if the system is working properly. the monitors | ||
1112 | * should mark the osd as failed and we should find out about | ||
1113 | * it from an updated osd map. | ||
1114 | */ | ||
1115 | while (timeout && !list_empty(&osdc->req_lru)) { | ||
1116 | req = list_entry(osdc->req_lru.next, struct ceph_osd_request, | ||
1117 | r_req_lru_item); | ||
1118 | |||
1119 | /* hasn't been long enough since we sent it? */ | ||
1120 | if (time_before(jiffies, req->r_stamp + timeout)) | ||
1121 | break; | ||
1122 | |||
1123 | /* hasn't been long enough since it was acked? */ | ||
1124 | if (req->r_request->ack_stamp == 0 || | ||
1125 | time_before(jiffies, req->r_request->ack_stamp + timeout)) | ||
1126 | break; | ||
1127 | |||
1128 | BUG_ON(req == last_req && req->r_stamp == last_stamp); | ||
1129 | last_req = req; | ||
1130 | last_stamp = req->r_stamp; | ||
1131 | |||
1132 | osd = req->r_osd; | ||
1133 | BUG_ON(!osd); | ||
1134 | pr_warning(" tid %llu timed out on osd%d, will reset osd\n", | ||
1135 | req->r_tid, osd->o_osd); | ||
1136 | __kick_osd_requests(osdc, osd); | ||
1137 | } | ||
1138 | |||
1139 | /* | ||
1140 | * ping osds that are a bit slow. this ensures that if there | 1101 | * ping osds that are a bit slow. this ensures that if there |
1141 | * is a break in the TCP connection we will notice, and reopen | 1102 | * is a break in the TCP connection we will notice, and reopen |
1142 | * a connection with that osd (from the fault callback). | 1103 | * a connection with that osd (from the fault callback). |