summaryrefslogtreecommitdiffstats
path: root/net/ceph
diff options
context:
space:
mode:
authorIlya Dryomov <idryomov@gmail.com>2015-05-11 10:53:10 -0400
committerIlya Dryomov <idryomov@gmail.com>2015-05-20 14:02:14 -0400
commitb0494532214bdfbf241e94fabab5dd46f7b82631 (patch)
tree0e0a3057667e958118daf8ef835cb7f996b8ad8a /net/ceph
parente26081808edadfd257c6c9d81014e3b25e9a6118 (diff)
libceph: request a new osdmap if lingering request maps to no osd
This commit does two things. First, if there are any homeless lingering requests, we now request a new osdmap even if the osdmap that is being processed brought no changes, i.e. if a given lingering request turned homeless in one of the previous epochs and remained homeless in the current epoch. Not doing so leaves us with a stale osdmap and as a result we may miss our window for reestablishing the watch and lose notifies. MON=1 OSD=1: # cat linger-needmap.sh #!/bin/bash rbd create --size 1 test DEV=$(rbd map test) ceph osd out 0 rbd map dne/dne # obtain a new osdmap as a side effect (!) sleep 1 ceph osd in 0 rbd resize --size 2 test # rbd info test | grep size -> 2M # blockdev --getsize $DEV -> 1M N.B.: Not obtaining a new osdmap in between "osd out" and "osd in" above is enough to make it miss that resize notify, but that is a bug^Wlimitation of ceph watch/notify v1. Second, homeless lingering requests are now kicked just like those lingering requests whose mapping has changed. This is mainly to recognize that a homeless lingering request makes no sense and to preserve the invariant that a registered lingering request is not sitting on any of r_req_lru_item lists. This spares us a WARN_ON, which commit ba9d114ec557 ("libceph: clear r_req_lru_item in __unregister_linger_request()") tried to fix the _wrong_ way. Cc: stable@vger.kernel.org # 3.10+ Signed-off-by: Ilya Dryomov <idryomov@gmail.com> Reviewed-by: Sage Weil <sage@redhat.com>
Diffstat (limited to 'net/ceph')
-rw-r--r--net/ceph/osd_client.c31
1 files changed, 20 insertions, 11 deletions
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 41a4abc7e98e..31d4b1ebff01 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -2017,20 +2017,29 @@ static void kick_requests(struct ceph_osd_client *osdc, bool force_resend,
2017 err = __map_request(osdc, req, 2017 err = __map_request(osdc, req,
2018 force_resend || force_resend_writes); 2018 force_resend || force_resend_writes);
2019 dout("__map_request returned %d\n", err); 2019 dout("__map_request returned %d\n", err);
2020 if (err == 0)
2021 continue; /* no change and no osd was specified */
2022 if (err < 0) 2020 if (err < 0)
2023 continue; /* hrm! */ 2021 continue; /* hrm! */
2024 if (req->r_osd == NULL) { 2022 if (req->r_osd == NULL || err > 0) {
2025 dout("tid %llu maps to no valid osd\n", req->r_tid); 2023 if (req->r_osd == NULL) {
2026 needmap++; /* request a newer map */ 2024 dout("lingering %p tid %llu maps to no osd\n",
2027 continue; 2025 req, req->r_tid);
2028 } 2026 /*
2027 * A homeless lingering request makes
2028 * no sense, as it's job is to keep
2029 * a particular OSD connection open.
2030 * Request a newer map and kick the
2031 * request, knowing that it won't be
2032 * resent until we actually get a map
2033 * that can tell us where to send it.
2034 */
2035 needmap++;
2036 }
2029 2037
2030 dout("kicking lingering %p tid %llu osd%d\n", req, req->r_tid, 2038 dout("kicking lingering %p tid %llu osd%d\n", req,
2031 req->r_osd ? req->r_osd->o_osd : -1); 2039 req->r_tid, req->r_osd ? req->r_osd->o_osd : -1);
2032 __register_request(osdc, req); 2040 __register_request(osdc, req);
2033 __unregister_linger_request(osdc, req); 2041 __unregister_linger_request(osdc, req);
2042 }
2034 } 2043 }
2035 reset_changed_osds(osdc); 2044 reset_changed_osds(osdc);
2036 mutex_unlock(&osdc->request_mutex); 2045 mutex_unlock(&osdc->request_mutex);