diff options
author | Sage Weil <sage@newdream.net> | 2011-03-04 15:24:28 -0500 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2011-03-04 15:24:28 -0500 |
commit | 60bf8bf8815e6adea4c1d0423578c3b8000e2ec8 (patch) | |
tree | ce7140c32a3f177816f4029a88eb99e15bda6943 | |
parent | 692d20f576fb26f62c83f80dbf3ea899998391b7 (diff) |
libceph: fix msgr backoff
With commit f363e45f we replaced a bunch of hacky workqueue mutual
exclusion logic with the WQ_NON_REENTRANT flag. One pieces of fallout is
that the exponential backoff breaks in certain cases:
* con_work attempts to connect.
* we get an immediate failure, and the socket state change handler queues
immediate work.
* con_work calls con_fault, we decide to back off, but can't queue delayed
work.
In this case, we add a BACKOFF bit to make con_work reschedule delayed work
next time it runs (which should be immediately).
Signed-off-by: Sage Weil <sage@newdream.net>
-rw-r--r-- | include/linux/ceph/messenger.h | 1 | ||||
-rw-r--r-- | net/ceph/messenger.c | 30 |
2 files changed, 29 insertions, 2 deletions
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index c3011beac30d..eb31e108a64d 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h | |||
@@ -123,6 +123,7 @@ struct ceph_msg_pos { | |||
123 | #define SOCK_CLOSED 11 /* socket state changed to closed */ | 123 | #define SOCK_CLOSED 11 /* socket state changed to closed */ |
124 | #define OPENING 13 /* open connection w/ (possibly new) peer */ | 124 | #define OPENING 13 /* open connection w/ (possibly new) peer */ |
125 | #define DEAD 14 /* dead, about to kfree */ | 125 | #define DEAD 14 /* dead, about to kfree */ |
126 | #define BACKOFF 15 | ||
126 | 127 | ||
127 | /* | 128 | /* |
128 | * A single connection with another host. | 129 | * A single connection with another host. |
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 6bd5025f6220..46fbc422ba74 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c | |||
@@ -1949,6 +1949,19 @@ static void con_work(struct work_struct *work) | |||
1949 | work.work); | 1949 | work.work); |
1950 | 1950 | ||
1951 | mutex_lock(&con->mutex); | 1951 | mutex_lock(&con->mutex); |
1952 | if (test_and_clear_bit(BACKOFF, &con->state)) { | ||
1953 | dout("con_work %p backing off\n", con); | ||
1954 | if (queue_delayed_work(ceph_msgr_wq, &con->work, | ||
1955 | round_jiffies_relative(con->delay))) { | ||
1956 | dout("con_work %p backoff %lu\n", con, con->delay); | ||
1957 | mutex_unlock(&con->mutex); | ||
1958 | return; | ||
1959 | } else { | ||
1960 | con->ops->put(con); | ||
1961 | dout("con_work %p FAILED to back off %lu\n", con, | ||
1962 | con->delay); | ||
1963 | } | ||
1964 | } | ||
1952 | 1965 | ||
1953 | if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ | 1966 | if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ |
1954 | dout("con_work CLOSED\n"); | 1967 | dout("con_work CLOSED\n"); |
@@ -2017,11 +2030,24 @@ static void ceph_fault(struct ceph_connection *con) | |||
2017 | con->delay = BASE_DELAY_INTERVAL; | 2030 | con->delay = BASE_DELAY_INTERVAL; |
2018 | else if (con->delay < MAX_DELAY_INTERVAL) | 2031 | else if (con->delay < MAX_DELAY_INTERVAL) |
2019 | con->delay *= 2; | 2032 | con->delay *= 2; |
2020 | dout("fault queueing %p delay %lu\n", con, con->delay); | ||
2021 | con->ops->get(con); | 2033 | con->ops->get(con); |
2022 | if (queue_delayed_work(ceph_msgr_wq, &con->work, | 2034 | if (queue_delayed_work(ceph_msgr_wq, &con->work, |
2023 | round_jiffies_relative(con->delay)) == 0) | 2035 | round_jiffies_relative(con->delay))) { |
2036 | dout("fault queued %p delay %lu\n", con, con->delay); | ||
2037 | } else { | ||
2024 | con->ops->put(con); | 2038 | con->ops->put(con); |
2039 | dout("fault failed to queue %p delay %lu, backoff\n", | ||
2040 | con, con->delay); | ||
2041 | /* | ||
2042 | * In many cases we see a socket state change | ||
2043 | * while con_work is running and end up | ||
2044 | * queuing (non-delayed) work, such that we | ||
2045 | * can't backoff with a delay. Set a flag so | ||
2046 | * that when con_work restarts we schedule the | ||
2047 | * delay then. | ||
2048 | */ | ||
2049 | set_bit(BACKOFF, &con->state); | ||
2050 | } | ||
2025 | } | 2051 | } |
2026 | 2052 | ||
2027 | out_unlock: | 2053 | out_unlock: |