From: Sage Weil Date: Mon, 15 Feb 2010 20:11:51 +0000 (-0800) Subject: ceph: reset osd connections after fault X-Git-Url: http://drtracing.org/?a=commitdiff_plain;h=153a008bf7915ea9127341409170cb197d111282;p=deliverable%2Flinux.git ceph: reset osd connections after fault A single osd connection fault (e.g. tcp disconnect) wasn't reopening the connection, which causes all current and future requests for that osd to hang. Signed-off-by: Sage Weil --- diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index 7f8a26fdcc2c..fa0f73703954 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c @@ -369,7 +369,6 @@ static void osd_reset(struct ceph_connection *con) return; dout("osd_reset osd%d\n", osd->o_osd); osdc = osd->o_osdc; - osd->o_incarnation++; down_read(&osdc->map_sem); kick_requests(osdc, osd); up_read(&osdc->map_sem); @@ -921,7 +920,9 @@ static void kick_requests(struct ceph_osd_client *osdc, dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1); mutex_lock(&osdc->request_mutex); - if (!kickosd) { + if (kickosd) { + __reset_osd(osdc, kickosd); + } else { for (p = rb_first(&osdc->osds); p; p = n) { struct ceph_osd *osd = rb_entry(p, struct ceph_osd, o_node);