aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorSage Weil <sage@newdream.net>2010-11-01 18:49:23 -0400
committerSage Weil <sage@newdream.net>2010-11-01 18:49:23 -0400
commitdf9f86faf3ee610527ed02031fe7dd3c8b752e44 (patch)
tree361ecdca449c3f80d45ff33a291ad0ae544d7470 /net
parent2f56f56ad991edd51ffd0baf1182245ee1277a04 (diff)
ceph: fix small seq message skipping
If the client gets out of sync with the server message sequence number, we normally skip low seq messages (ones we already received). The skip code was also incrementing the expected seq, such that all subsequent messages also appeared old and got skipped, and an eventual timeout on the osd connection. This resulted in some lagging requests and console messages like [233480.882885] ceph: skipping osd22 10.138.138.13:6804 seq 2016, expected 2017 [233480.882919] ceph: skipping osd22 10.138.138.13:6804 seq 2017, expected 2018 [233480.882963] ceph: skipping osd22 10.138.138.13:6804 seq 2018, expected 2019 [233480.883488] ceph: skipping osd22 10.138.138.13:6804 seq 2019, expected 2020 [233485.219558] ceph: skipping osd22 10.138.138.13:6804 seq 2020, expected 2021 [233485.906595] ceph: skipping osd22 10.138.138.13:6804 seq 2021, expected 2022 [233490.379536] ceph: skipping osd22 10.138.138.13:6804 seq 2022, expected 2023 [233495.523260] ceph: skipping osd22 10.138.138.13:6804 seq 2023, expected 2024 [233495.923194] ceph: skipping osd22 10.138.138.13:6804 seq 2024, expected 2025 [233500.534614] ceph: tid 6023602 timed out on osd22, will reset osd Reported-by: Theodore Ts'o <tytso@mit.edu> Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to 'net')
-rw-r--r--net/ceph/messenger.c3
1 files changed, 1 insertions, 2 deletions
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 0e8157ee5d43..d379abf873bc 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1532,14 +1532,13 @@ static int read_partial_message(struct ceph_connection *con)
1532 /* verify seq# */ 1532 /* verify seq# */
1533 seq = le64_to_cpu(con->in_hdr.seq); 1533 seq = le64_to_cpu(con->in_hdr.seq);
1534 if ((s64)seq - (s64)con->in_seq < 1) { 1534 if ((s64)seq - (s64)con->in_seq < 1) {
1535 pr_info("skipping %s%lld %s seq %lld, expected %lld\n", 1535 pr_info("skipping %s%lld %s seq %lld expected %lld\n",
1536 ENTITY_NAME(con->peer_name), 1536 ENTITY_NAME(con->peer_name),
1537 ceph_pr_addr(&con->peer_addr.in_addr), 1537 ceph_pr_addr(&con->peer_addr.in_addr),
1538 seq, con->in_seq + 1); 1538 seq, con->in_seq + 1);
1539 con->in_base_pos = -front_len - middle_len - data_len - 1539 con->in_base_pos = -front_len - middle_len - data_len -
1540 sizeof(m->footer); 1540 sizeof(m->footer);
1541 con->in_tag = CEPH_MSGR_TAG_READY; 1541 con->in_tag = CEPH_MSGR_TAG_READY;
1542 con->in_seq++;
1543 return 0; 1542 return 0;
1544 } else if ((s64)seq - (s64)con->in_seq > 1) { 1543 } else if ((s64)seq - (s64)con->in_seq > 1) {
1545 pr_err("read_partial_message bad seq %lld expected %lld\n", 1544 pr_err("read_partial_message bad seq %lld expected %lld\n",