diff options
author | Sage Weil <sage@newdream.net> | 2010-03-16 18:28:54 -0400 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2010-03-23 10:46:56 -0400 |
commit | 80fc7314a7e26e8d2e4ba5b3d8cc2d4aeb750015 (patch) | |
tree | a8f5c8ee76ac0b566332317388b716e70112088b /fs/ceph/mds_client.c | |
parent | 916623da10e270c7e9e802a7ddfe1ec8f890982d (diff) |
ceph: fix mds sync() race with completing requests
The wait_unsafe_requests() helper dropped the mdsc mutex to wait
for each request to complete, and then examined r_node to get the
next request after retaking the lock. But the request completion
removes the request from the tree, so r_node was always undefined
at this point. Since it's a small race, it usually led to a
valid request, but not always. The result was an occasional
crash in rb_next() while dereferencing node->rb_left.
Fix this by clearing the rb_node when removing the request from
the request tree, and not walking off into the weeds when we
are done waiting for a request. Since the request we waited on
will _always_ be out of the request tree, take a ref on the next
request, in the hopes that it won't be. But if it is, it's ok:
we can start over from the beginning (and traverse over older read
requests again).
Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to 'fs/ceph/mds_client.c')
-rw-r--r-- | fs/ceph/mds_client.c | 27 |
1 files changed, 20 insertions, 7 deletions
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index a2600101ec22..5ec864198368 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -529,6 +529,7 @@ static void __unregister_request(struct ceph_mds_client *mdsc, | |||
529 | { | 529 | { |
530 | dout("__unregister_request %p tid %lld\n", req, req->r_tid); | 530 | dout("__unregister_request %p tid %lld\n", req, req->r_tid); |
531 | rb_erase(&req->r_node, &mdsc->request_tree); | 531 | rb_erase(&req->r_node, &mdsc->request_tree); |
532 | RB_CLEAR_NODE(&req->r_node); | ||
532 | ceph_mdsc_put_request(req); | 533 | ceph_mdsc_put_request(req); |
533 | 534 | ||
534 | if (req->r_unsafe_dir) { | 535 | if (req->r_unsafe_dir) { |
@@ -2682,29 +2683,41 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) | |||
2682 | */ | 2683 | */ |
2683 | static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid) | 2684 | static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid) |
2684 | { | 2685 | { |
2685 | struct ceph_mds_request *req = NULL; | 2686 | struct ceph_mds_request *req = NULL, *nextreq; |
2686 | struct rb_node *n; | 2687 | struct rb_node *n; |
2687 | 2688 | ||
2688 | mutex_lock(&mdsc->mutex); | 2689 | mutex_lock(&mdsc->mutex); |
2689 | dout("wait_unsafe_requests want %lld\n", want_tid); | 2690 | dout("wait_unsafe_requests want %lld\n", want_tid); |
2691 | restart: | ||
2690 | req = __get_oldest_req(mdsc); | 2692 | req = __get_oldest_req(mdsc); |
2691 | while (req && req->r_tid <= want_tid) { | 2693 | while (req && req->r_tid <= want_tid) { |
2694 | /* find next request */ | ||
2695 | n = rb_next(&req->r_node); | ||
2696 | if (n) | ||
2697 | nextreq = rb_entry(n, struct ceph_mds_request, r_node); | ||
2698 | else | ||
2699 | nextreq = NULL; | ||
2692 | if ((req->r_op & CEPH_MDS_OP_WRITE)) { | 2700 | if ((req->r_op & CEPH_MDS_OP_WRITE)) { |
2693 | /* write op */ | 2701 | /* write op */ |
2694 | ceph_mdsc_get_request(req); | 2702 | ceph_mdsc_get_request(req); |
2703 | if (nextreq) | ||
2704 | ceph_mdsc_get_request(nextreq); | ||
2695 | mutex_unlock(&mdsc->mutex); | 2705 | mutex_unlock(&mdsc->mutex); |
2696 | dout("wait_unsafe_requests wait on %llu (want %llu)\n", | 2706 | dout("wait_unsafe_requests wait on %llu (want %llu)\n", |
2697 | req->r_tid, want_tid); | 2707 | req->r_tid, want_tid); |
2698 | wait_for_completion(&req->r_safe_completion); | 2708 | wait_for_completion(&req->r_safe_completion); |
2699 | mutex_lock(&mdsc->mutex); | 2709 | mutex_lock(&mdsc->mutex); |
2700 | n = rb_next(&req->r_node); | ||
2701 | ceph_mdsc_put_request(req); | 2710 | ceph_mdsc_put_request(req); |
2702 | } else { | 2711 | if (!nextreq) |
2703 | n = rb_next(&req->r_node); | 2712 | break; /* next dne before, so we're done! */ |
2713 | if (RB_EMPTY_NODE(&nextreq->r_node)) { | ||
2714 | /* next request was removed from tree */ | ||
2715 | ceph_mdsc_put_request(nextreq); | ||
2716 | goto restart; | ||
2717 | } | ||
2718 | ceph_mdsc_put_request(nextreq); /* won't go away */ | ||
2704 | } | 2719 | } |
2705 | if (!n) | 2720 | req = nextreq; |
2706 | break; | ||
2707 | req = rb_entry(n, struct ceph_mds_request, r_node); | ||
2708 | } | 2721 | } |
2709 | mutex_unlock(&mdsc->mutex); | 2722 | mutex_unlock(&mdsc->mutex); |
2710 | dout("wait_unsafe_requests done\n"); | 2723 | dout("wait_unsafe_requests done\n"); |