diff options
author | Sage Weil <sage@newdream.net> | 2010-02-25 15:40:45 -0500 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2010-02-25 15:40:45 -0500 |
commit | e80a52d14f868059e8ec790c9fae88cdb8a1df98 (patch) | |
tree | 8353f33245509d7c3bd7fb25f5c254df6db8db9b /fs/ceph/messenger.c | |
parent | 161fd65ac934608345aed35226fc889ea3b0b500 (diff) |
ceph: fix connection fault STANDBY check
Move any out_sent messages to out_queue _before_ checking if
out_queue is empty and going to STANDBY, or else we may drop
something that was never acked.
And clean up the code a bit (less goto).
Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to 'fs/ceph/messenger.c')
-rw-r--r-- | fs/ceph/messenger.c | 31 |
1 files changed, 13 insertions, 18 deletions
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 9ea7b763c8dc..0ddc2c75f6b4 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c | |||
@@ -1853,32 +1853,27 @@ static void ceph_fault(struct ceph_connection *con) | |||
1853 | con->in_msg = NULL; | 1853 | con->in_msg = NULL; |
1854 | } | 1854 | } |
1855 | 1855 | ||
1856 | /* Requeue anything that hasn't been acked */ | ||
1857 | list_splice_init(&con->out_sent, &con->out_queue); | ||
1856 | 1858 | ||
1857 | /* If there are no messages in the queue, place the connection | 1859 | /* If there are no messages in the queue, place the connection |
1858 | * in a STANDBY state (i.e., don't try to reconnect just yet). */ | 1860 | * in a STANDBY state (i.e., don't try to reconnect just yet). */ |
1859 | if (list_empty(&con->out_queue) && !con->out_keepalive_pending) { | 1861 | if (list_empty(&con->out_queue) && !con->out_keepalive_pending) { |
1860 | dout("fault setting STANDBY\n"); | 1862 | dout("fault setting STANDBY\n"); |
1861 | set_bit(STANDBY, &con->state); | 1863 | set_bit(STANDBY, &con->state); |
1862 | mutex_unlock(&con->mutex); | 1864 | } else { |
1863 | goto out; | 1865 | /* retry after a delay. */ |
1866 | if (con->delay == 0) | ||
1867 | con->delay = BASE_DELAY_INTERVAL; | ||
1868 | else if (con->delay < MAX_DELAY_INTERVAL) | ||
1869 | con->delay *= 2; | ||
1870 | dout("fault queueing %p delay %lu\n", con, con->delay); | ||
1871 | con->ops->get(con); | ||
1872 | if (queue_delayed_work(ceph_msgr_wq, &con->work, | ||
1873 | round_jiffies_relative(con->delay)) == 0) | ||
1874 | con->ops->put(con); | ||
1864 | } | 1875 | } |
1865 | 1876 | ||
1866 | /* Requeue anything that hasn't been acked, and retry after a | ||
1867 | * delay. */ | ||
1868 | list_splice_init(&con->out_sent, &con->out_queue); | ||
1869 | |||
1870 | if (con->delay == 0) | ||
1871 | con->delay = BASE_DELAY_INTERVAL; | ||
1872 | else if (con->delay < MAX_DELAY_INTERVAL) | ||
1873 | con->delay *= 2; | ||
1874 | |||
1875 | /* explicitly schedule work to try to reconnect again later. */ | ||
1876 | dout("fault queueing %p delay %lu\n", con, con->delay); | ||
1877 | con->ops->get(con); | ||
1878 | if (queue_delayed_work(ceph_msgr_wq, &con->work, | ||
1879 | round_jiffies_relative(con->delay)) == 0) | ||
1880 | con->ops->put(con); | ||
1881 | |||
1882 | out_unlock: | 1877 | out_unlock: |
1883 | mutex_unlock(&con->mutex); | 1878 | mutex_unlock(&con->mutex); |
1884 | out: | 1879 | out: |