aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorSage Weil <sage@newdream.net>2010-02-25 15:40:45 -0500
committerSage Weil <sage@newdream.net>2010-02-25 15:40:45 -0500
commite80a52d14f868059e8ec790c9fae88cdb8a1df98 (patch)
tree8353f33245509d7c3bd7fb25f5c254df6db8db9b /fs
parent161fd65ac934608345aed35226fc889ea3b0b500 (diff)
ceph: fix connection fault STANDBY check
Move any out_sent messages to out_queue _before_ checking if out_queue is empty and going to STANDBY, or else we may drop something that was never acked. And clean up the code a bit (less goto). Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to 'fs')
-rw-r--r--fs/ceph/messenger.c31
1 files changed, 13 insertions, 18 deletions
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index 9ea7b763c8d..0ddc2c75f6b 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -1853,32 +1853,27 @@ static void ceph_fault(struct ceph_connection *con)
1853 con->in_msg = NULL; 1853 con->in_msg = NULL;
1854 } 1854 }
1855 1855
1856 /* Requeue anything that hasn't been acked */
1857 list_splice_init(&con->out_sent, &con->out_queue);
1856 1858
1857 /* If there are no messages in the queue, place the connection 1859 /* If there are no messages in the queue, place the connection
1858 * in a STANDBY state (i.e., don't try to reconnect just yet). */ 1860 * in a STANDBY state (i.e., don't try to reconnect just yet). */
1859 if (list_empty(&con->out_queue) && !con->out_keepalive_pending) { 1861 if (list_empty(&con->out_queue) && !con->out_keepalive_pending) {
1860 dout("fault setting STANDBY\n"); 1862 dout("fault setting STANDBY\n");
1861 set_bit(STANDBY, &con->state); 1863 set_bit(STANDBY, &con->state);
1862 mutex_unlock(&con->mutex); 1864 } else {
1863 goto out; 1865 /* retry after a delay. */
1866 if (con->delay == 0)
1867 con->delay = BASE_DELAY_INTERVAL;
1868 else if (con->delay < MAX_DELAY_INTERVAL)
1869 con->delay *= 2;
1870 dout("fault queueing %p delay %lu\n", con, con->delay);
1871 con->ops->get(con);
1872 if (queue_delayed_work(ceph_msgr_wq, &con->work,
1873 round_jiffies_relative(con->delay)) == 0)
1874 con->ops->put(con);
1864 } 1875 }
1865 1876
1866 /* Requeue anything that hasn't been acked, and retry after a
1867 * delay. */
1868 list_splice_init(&con->out_sent, &con->out_queue);
1869
1870 if (con->delay == 0)
1871 con->delay = BASE_DELAY_INTERVAL;
1872 else if (con->delay < MAX_DELAY_INTERVAL)
1873 con->delay *= 2;
1874
1875 /* explicitly schedule work to try to reconnect again later. */
1876 dout("fault queueing %p delay %lu\n", con, con->delay);
1877 con->ops->get(con);
1878 if (queue_delayed_work(ceph_msgr_wq, &con->work,
1879 round_jiffies_relative(con->delay)) == 0)
1880 con->ops->put(con);
1881
1882out_unlock: 1877out_unlock:
1883 mutex_unlock(&con->mutex); 1878 mutex_unlock(&con->mutex);
1884out: 1879out: