diff options
author | Sage Weil <sage@newdream.net> | 2010-02-15 15:05:09 -0500 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2010-02-17 01:01:07 -0500 |
commit | 91e45ce38946a8efa21fefbc65d023ca3c0b434f (patch) | |
tree | 704146df9a3db46820684841cceffaabe5bc21cd /fs | |
parent | e2663ab60de59d20fa33da3528f6d5359f8eb003 (diff) |
ceph: cancel delayed work when closing connection
This ensures that if/when we reopen the connection, we can requeue work on
the connection immediately, without waiting for an old timer to expire.
Queue new delayed work inside con->mutex to avoid any race.
This fixes problems with clients failing to reconnect to the MDS due to
the client_reconnect message arriving too late (due to waiting for an old
delayed work timeout to expire).
Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ceph/messenger.c | 7 |
1 files changed, 5 insertions, 2 deletions
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index acf383f6a9cd..ca2ad0e5bb28 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c | |||
@@ -344,6 +344,7 @@ void ceph_con_close(struct ceph_connection *con) | |||
344 | clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */ | 344 | clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */ |
345 | mutex_lock(&con->mutex); | 345 | mutex_lock(&con->mutex); |
346 | reset_connection(con); | 346 | reset_connection(con); |
347 | cancel_delayed_work(&con->work); | ||
347 | mutex_unlock(&con->mutex); | 348 | mutex_unlock(&con->mutex); |
348 | queue_con(con); | 349 | queue_con(con); |
349 | } | 350 | } |
@@ -1841,6 +1842,8 @@ static void ceph_fault(struct ceph_connection *con) | |||
1841 | clear_bit(BUSY, &con->state); /* to avoid an improbable race */ | 1842 | clear_bit(BUSY, &con->state); /* to avoid an improbable race */ |
1842 | 1843 | ||
1843 | mutex_lock(&con->mutex); | 1844 | mutex_lock(&con->mutex); |
1845 | if (test_bit(CLOSED, &con->state)) | ||
1846 | goto out_unlock; | ||
1844 | 1847 | ||
1845 | con_close_socket(con); | 1848 | con_close_socket(con); |
1846 | 1849 | ||
@@ -1876,8 +1879,6 @@ static void ceph_fault(struct ceph_connection *con) | |||
1876 | else if (con->delay < MAX_DELAY_INTERVAL) | 1879 | else if (con->delay < MAX_DELAY_INTERVAL) |
1877 | con->delay *= 2; | 1880 | con->delay *= 2; |
1878 | 1881 | ||
1879 | mutex_unlock(&con->mutex); | ||
1880 | |||
1881 | /* explicitly schedule work to try to reconnect again later. */ | 1882 | /* explicitly schedule work to try to reconnect again later. */ |
1882 | dout("fault queueing %p delay %lu\n", con, con->delay); | 1883 | dout("fault queueing %p delay %lu\n", con, con->delay); |
1883 | con->ops->get(con); | 1884 | con->ops->get(con); |
@@ -1885,6 +1886,8 @@ static void ceph_fault(struct ceph_connection *con) | |||
1885 | round_jiffies_relative(con->delay)) == 0) | 1886 | round_jiffies_relative(con->delay)) == 0) |
1886 | con->ops->put(con); | 1887 | con->ops->put(con); |
1887 | 1888 | ||
1889 | out_unlock: | ||
1890 | mutex_unlock(&con->mutex); | ||
1888 | out: | 1891 | out: |
1889 | if (con->ops->fault) | 1892 | if (con->ops->fault) |
1890 | con->ops->fault(con); | 1893 | con->ops->fault(con); |