libceph: remove 'osdtimeout' option

This would reset a connection with any OSD that had an outstanding request that was taking more than N seconds. The idea was that if the OSD was buggy, the client could compensate by resending the request. In reality, this only served to hide server bugs, and we haven't actually seen such a bug in quite a while. Moreover, the userspace client code never did this. More importantly, often the request is taking a long time because the OSD is trying to recover, or overloaded, and killing the connection and retrying would only make the situation worse by giving the OSD more work to do. Signed-off-by: Sage Weil <sage@inktank.com> Reviewed-by: Alex Elder <elder@inktank.com>
author: Sage Weil <sage@inktank.com> 2012-11-28 15:28:24 -0500
committer: Alex Elder <elder@inktank.com> 2012-12-13 09:13:06 -0500
commit: 83aff95eb9d60aff5497e9f44a2ae906b86d8e88 (patch)
tree: b191a3640990e4b7cc13b2db423449b90086f71b /net/ceph/osd_client.c
parent: cfc84c9f73ab8a6933bd4f36efac1196cddad581 (diff)
1 files changed, 4 insertions, 43 deletions
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index ccbdfbba9e53..7ebfe13267e6 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -608,14 +608,6 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc,
        }
 }
-static void kick_osd_requests(struct ceph_osd_client *osdc,
-                              struct ceph_osd *kickosd)
-{
-        mutex_lock(&osdc->request_mutex);
-        __kick_osd_requests(osdc, kickosd);
-        mutex_unlock(&osdc->request_mutex);
-}
 /*
 * If the osd connection drops, we need to resubmit all requests.
 */
@@ -629,7 +621,9 @@ static void osd_reset(struct ceph_connection *con)
        dout("osd_reset osd%d\n", osd->o_osd);
        osdc = osd->o_osdc;
        down_read(&osdc->map_sem);
-        kick_osd_requests(osdc, osd);
+        mutex_lock(&osdc->request_mutex);
+        __kick_osd_requests(osdc, osd);
+        mutex_unlock(&osdc->request_mutex);
        send_queued(osdc);
        up_read(&osdc->map_sem);
 }
@@ -1091,12 +1085,10 @@ static void handle_timeout(struct work_struct *work)
 {
        struct ceph_osd_client *osdc =
                container_of(work, struct ceph_osd_client, timeout_work.work);
-        struct ceph_osd_request *req, *last_req = NULL;
+        struct ceph_osd_request *req;
        struct ceph_osd *osd;
-        unsigned long timeout = osdc->client->options->osd_timeout * HZ;
        unsigned long keepalive =
                osdc->client->options->osd_keepalive_timeout * HZ;
-        unsigned long last_stamp = 0;
        struct list_head slow_osds;
        dout("timeout\n");
        down_read(&osdc->map_sem);
@@ -1106,37 +1098,6 @@ static void handle_timeout(struct work_struct *work)
        mutex_lock(&osdc->request_mutex);
        /*
-         * reset osds that appear to be _really_ unresponsive.  this
-         * is a failsafe measure.. we really shouldn't be getting to
-         * this point if the system is working properly.  the monitors
-         * should mark the osd as failed and we should find out about
-         * it from an updated osd map.
-         */
-        while (timeout && !list_empty(&osdc->req_lru)) {
-                req = list_entry(osdc->req_lru.next, struct ceph_osd_request,
-                                 r_req_lru_item);
-                /* hasn't been long enough since we sent it? */
-                if (time_before(jiffies, req->r_stamp + timeout))
-                        break;
-                /* hasn't been long enough since it was acked? */
-                if (req->r_request->ack_stamp == 0 ||
-                    time_before(jiffies, req->r_request->ack_stamp + timeout))
-                        break;
-                BUG_ON(req == last_req && req->r_stamp == last_stamp);
-                last_req = req;
-                last_stamp = req->r_stamp;
-                osd = req->r_osd;
-                BUG_ON(!osd);
-                pr_warning(" tid %llu timed out on osd%d, will reset osd\n",
-                           req->r_tid, osd->o_osd);
-                __kick_osd_requests(osdc, osd);
-        }
-        /*
         * ping osds that are a bit slow.  this ensures that if there
         * is a break in the TCP connection we will notice, and reopen
         * a connection with that osd (from the fault callback).
author	Sage Weil <sage@inktank.com>	2012-11-28 15:28:24 -0500
committer	Alex Elder <elder@inktank.com>	2012-12-13 09:13:06 -0500
commit	83aff95eb9d60aff5497e9f44a2ae906b86d8e88 (patch)
tree	b191a3640990e4b7cc13b2db423449b90086f71b /net/ceph/osd_client.c
parent	cfc84c9f73ab8a6933bd4f36efac1196cddad581 (diff)

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index ccbdfbba9e53..7ebfe13267e6 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c
@@ -608,14 +608,6 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc,
608	}	608	}
609	}	609	}
610		610
611	static void kick_osd_requests(struct ceph_osd_client *osdc,
612	struct ceph_osd *kickosd)
613	{
614	mutex_lock(&osdc->request_mutex);
615	__kick_osd_requests(osdc, kickosd);
616	mutex_unlock(&osdc->request_mutex);
617	}
618
619	/*	611	/*
620	* If the osd connection drops, we need to resubmit all requests.	612	* If the osd connection drops, we need to resubmit all requests.
621	*/	613	*/
@@ -629,7 +621,9 @@ static void osd_reset(struct ceph_connection *con)
629	dout("osd_reset osd%d\n", osd->o_osd);	621	dout("osd_reset osd%d\n", osd->o_osd);
630	osdc = osd->o_osdc;	622	osdc = osd->o_osdc;
631	down_read(&osdc->map_sem);	623	down_read(&osdc->map_sem);
632	kick_osd_requests(osdc, osd);	624	mutex_lock(&osdc->request_mutex);
		625	__kick_osd_requests(osdc, osd);
		626	mutex_unlock(&osdc->request_mutex);
633	send_queued(osdc);	627	send_queued(osdc);
634	up_read(&osdc->map_sem);	628	up_read(&osdc->map_sem);
635	}	629	}
@@ -1091,12 +1085,10 @@ static void handle_timeout(struct work_struct *work)
1091	{	1085	{
1092	struct ceph_osd_client *osdc =	1086	struct ceph_osd_client *osdc =
1093	container_of(work, struct ceph_osd_client, timeout_work.work);	1087	container_of(work, struct ceph_osd_client, timeout_work.work);
1094	struct ceph_osd_request req, last_req = NULL;	1088	struct ceph_osd_request *req;
1095	struct ceph_osd *osd;	1089	struct ceph_osd *osd;
1096	unsigned long timeout = osdc->client->options->osd_timeout * HZ;
1097	unsigned long keepalive =	1090	unsigned long keepalive =
1098	osdc->client->options->osd_keepalive_timeout * HZ;	1091	osdc->client->options->osd_keepalive_timeout * HZ;
1099	unsigned long last_stamp = 0;
1100	struct list_head slow_osds;	1092	struct list_head slow_osds;
1101	dout("timeout\n");	1093	dout("timeout\n");
1102	down_read(&osdc->map_sem);	1094	down_read(&osdc->map_sem);
@@ -1106,37 +1098,6 @@ static void handle_timeout(struct work_struct *work)
1106	mutex_lock(&osdc->request_mutex);	1098	mutex_lock(&osdc->request_mutex);
1107		1099
1108	/*	1100	/*
1109	* reset osds that appear to be _really_ unresponsive. this
1110	* is a failsafe measure.. we really shouldn't be getting to
1111	* this point if the system is working properly. the monitors
1112	* should mark the osd as failed and we should find out about
1113	* it from an updated osd map.
1114	*/
1115	while (timeout && !list_empty(&osdc->req_lru)) {
1116	req = list_entry(osdc->req_lru.next, struct ceph_osd_request,
1117	r_req_lru_item);
1118
1119	/* hasn't been long enough since we sent it? */
1120	if (time_before(jiffies, req->r_stamp + timeout))
1121	break;
1122
1123	/* hasn't been long enough since it was acked? */
1124	if (req->r_request->ack_stamp == 0 \|\|
1125	time_before(jiffies, req->r_request->ack_stamp + timeout))
1126	break;
1127
1128	BUG_ON(req == last_req && req->r_stamp == last_stamp);
1129	last_req = req;
1130	last_stamp = req->r_stamp;
1131
1132	osd = req->r_osd;
1133	BUG_ON(!osd);
1134	pr_warning(" tid %llu timed out on osd%d, will reset osd\n",
1135	req->r_tid, osd->o_osd);
1136	__kick_osd_requests(osdc, osd);
1137	}
1138
1139	/*
1140	* ping osds that are a bit slow. this ensures that if there	1101	* ping osds that are a bit slow. this ensures that if there
1141	* is a break in the TCP connection we will notice, and reopen	1102	* is a break in the TCP connection we will notice, and reopen
1142	* a connection with that osd (from the fault callback).	1103	* a connection with that osd (from the fault callback).