From 153a008bf7915ea9127341409170cb197d111282 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 15 Feb 2010 12:11:51 -0800 Subject: ceph: reset osd connections after fault A single osd connection fault (e.g. tcp disconnect) wasn't reopening the connection, which causes all current and future requests for that osd to hang. Signed-off-by: Sage Weil --- fs/ceph/osd_client.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index 7f8a26fdcc2c..fa0f73703954 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c @@ -369,7 +369,6 @@ static void osd_reset(struct ceph_connection *con) return; dout("osd_reset osd%d\n", osd->o_osd); osdc = osd->o_osdc; - osd->o_incarnation++; down_read(&osdc->map_sem); kick_requests(osdc, osd); up_read(&osdc->map_sem); @@ -921,7 +920,9 @@ static void kick_requests(struct ceph_osd_client *osdc, dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1); mutex_lock(&osdc->request_mutex); - if (!kickosd) { + if (kickosd) { + __reset_osd(osdc, kickosd); + } else { for (p = rb_first(&osdc->osds); p; p = n) { struct ceph_osd *osd = rb_entry(p, struct ceph_osd, o_node); -- cgit v1.2.2