diff options
author | Sage Weil <sage@newdream.net> | 2009-11-09 15:05:48 -0500 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2009-11-09 15:06:07 -0500 |
commit | 685f9a5d14194fc35db73e5e7370740ccc14b64a (patch) | |
tree | 4c34ac2348149d91ad5e08cd105207810f0afdc9 | |
parent | fb690390e305ea51e1883b105c7d3c52d7100ba5 (diff) |
ceph: do not confuse stale and dead (unreconnected) caps
We were using the cap_gen to track both stale caps (caps that timed out
due to temporarily losing touch with the mds) and dead caps that did not
reconnect after an MDS failure. Introduce a recon_gen counter to track
reconnections to restarted MDSs and kill dead caps based on that instead.
Rename gen to cap_gen while we're at it to make it more clear which is
which.
Signed-off-by: Sage Weil <sage@newdream.net>
-rw-r--r-- | fs/ceph/caps.c | 20 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 9 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 2 | ||||
-rw-r--r-- | fs/ceph/super.h | 4 |
4 files changed, 26 insertions, 9 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 8b863dbec70c..775e6f6fc970 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -609,7 +609,8 @@ retry: | |||
609 | cap->seq = seq; | 609 | cap->seq = seq; |
610 | cap->issue_seq = seq; | 610 | cap->issue_seq = seq; |
611 | cap->mseq = mseq; | 611 | cap->mseq = mseq; |
612 | cap->gen = session->s_cap_gen; | 612 | cap->cap_gen = session->s_cap_gen; |
613 | cap->recon_gen = session->s_recon_gen; | ||
613 | 614 | ||
614 | if (fmode >= 0) | 615 | if (fmode >= 0) |
615 | __ceph_get_fmode(ci, fmode); | 616 | __ceph_get_fmode(ci, fmode); |
@@ -626,17 +627,25 @@ retry: | |||
626 | static int __cap_is_valid(struct ceph_cap *cap) | 627 | static int __cap_is_valid(struct ceph_cap *cap) |
627 | { | 628 | { |
628 | unsigned long ttl; | 629 | unsigned long ttl; |
629 | u32 gen; | 630 | u32 gen, recon_gen; |
630 | 631 | ||
631 | spin_lock(&cap->session->s_cap_lock); | 632 | spin_lock(&cap->session->s_cap_lock); |
632 | gen = cap->session->s_cap_gen; | 633 | gen = cap->session->s_cap_gen; |
634 | recon_gen = cap->session->s_recon_gen; | ||
633 | ttl = cap->session->s_cap_ttl; | 635 | ttl = cap->session->s_cap_ttl; |
634 | spin_unlock(&cap->session->s_cap_lock); | 636 | spin_unlock(&cap->session->s_cap_lock); |
635 | 637 | ||
636 | if (cap->gen < gen || time_after_eq(jiffies, ttl)) { | 638 | if (cap->recon_gen != recon_gen) { |
639 | dout("__cap_is_valid %p cap %p issued %s " | ||
640 | "but DEAD (recon_gen %u vs %u)\n", &cap->ci->vfs_inode, | ||
641 | cap, ceph_cap_string(cap->issued), cap->recon_gen, | ||
642 | recon_gen); | ||
643 | return 0; | ||
644 | } | ||
645 | if (cap->cap_gen < gen || time_after_eq(jiffies, ttl)) { | ||
637 | dout("__cap_is_valid %p cap %p issued %s " | 646 | dout("__cap_is_valid %p cap %p issued %s " |
638 | "but STALE (gen %u vs %u)\n", &cap->ci->vfs_inode, | 647 | "but STALE (gen %u vs %u)\n", &cap->ci->vfs_inode, |
639 | cap, ceph_cap_string(cap->issued), cap->gen, gen); | 648 | cap, ceph_cap_string(cap->issued), cap->cap_gen, gen); |
640 | return 0; | 649 | return 0; |
641 | } | 650 | } |
642 | 651 | ||
@@ -2203,7 +2212,8 @@ restart: | |||
2203 | issued = __ceph_caps_issued(ci, &implemented); | 2212 | issued = __ceph_caps_issued(ci, &implemented); |
2204 | issued |= implemented | __ceph_caps_dirty(ci); | 2213 | issued |= implemented | __ceph_caps_dirty(ci); |
2205 | 2214 | ||
2206 | cap->gen = session->s_cap_gen; | 2215 | cap->cap_gen = session->s_cap_gen; |
2216 | cap->recon_gen = session->s_recon_gen; | ||
2207 | 2217 | ||
2208 | __check_cap_issue(ci, cap, newcaps); | 2218 | __check_cap_issue(ci, cap, newcaps); |
2209 | 2219 | ||
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 210cb6623ea2..828417ae16f9 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -329,6 +329,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, | |||
329 | ceph_con_open(&s->s_con, ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); | 329 | ceph_con_open(&s->s_con, ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); |
330 | 330 | ||
331 | spin_lock_init(&s->s_cap_lock); | 331 | spin_lock_init(&s->s_cap_lock); |
332 | s->s_recon_gen = 0; | ||
332 | s->s_cap_gen = 0; | 333 | s->s_cap_gen = 0; |
333 | s->s_cap_ttl = 0; | 334 | s->s_cap_ttl = 0; |
334 | s->s_renew_requested = 0; | 335 | s->s_renew_requested = 0; |
@@ -738,10 +739,11 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap, | |||
738 | struct ceph_mds_session *session = arg; | 739 | struct ceph_mds_session *session = arg; |
739 | 740 | ||
740 | spin_lock(&inode->i_lock); | 741 | spin_lock(&inode->i_lock); |
741 | if (cap->gen != session->s_cap_gen) { | 742 | if (cap->recon_gen != session->s_recon_gen) { |
742 | pr_err("failed reconnect %p %llx.%llx cap %p " | 743 | pr_err("failed reconnect %p %llx.%llx cap %p " |
743 | "(gen %d < session %d)\n", inode, ceph_vinop(inode), | 744 | "(recon_gen %d < session %d)\n", inode, |
744 | cap, cap->gen, session->s_cap_gen); | 745 | ceph_vinop(inode), cap, |
746 | cap->recon_gen, session->s_recon_gen); | ||
745 | __ceph_remove_cap(cap, NULL); | 747 | __ceph_remove_cap(cap, NULL); |
746 | } | 748 | } |
747 | wake_up(&ceph_inode(inode)->i_cap_wq); | 749 | wake_up(&ceph_inode(inode)->i_cap_wq); |
@@ -2050,6 +2052,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) | |||
2050 | 2052 | ||
2051 | session->s_state = CEPH_MDS_SESSION_RECONNECTING; | 2053 | session->s_state = CEPH_MDS_SESSION_RECONNECTING; |
2052 | session->s_seq = 0; | 2054 | session->s_seq = 0; |
2055 | session->s_recon_gen++; | ||
2053 | 2056 | ||
2054 | ceph_con_open(&session->s_con, | 2057 | ceph_con_open(&session->s_con, |
2055 | ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); | 2058 | ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index f566e9c84295..c0846b1c482b 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -98,6 +98,8 @@ struct ceph_mds_session { | |||
98 | u64 s_seq; /* incoming msg seq # */ | 98 | u64 s_seq; /* incoming msg seq # */ |
99 | struct mutex s_mutex; /* serialize session messages */ | 99 | struct mutex s_mutex; /* serialize session messages */ |
100 | 100 | ||
101 | int s_recon_gen; /* inc on reconnect to recovered mds */ | ||
102 | |||
101 | struct ceph_connection s_con; | 103 | struct ceph_connection s_con; |
102 | 104 | ||
103 | /* protected by s_cap_lock */ | 105 | /* protected by s_cap_lock */ |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 05947b96c524..25793559a2e5 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -169,7 +169,9 @@ struct ceph_cap { | |||
169 | int issued; /* latest, from the mds */ | 169 | int issued; /* latest, from the mds */ |
170 | int implemented; /* implemented superset of issued (for revocation) */ | 170 | int implemented; /* implemented superset of issued (for revocation) */ |
171 | int mds_wanted; | 171 | int mds_wanted; |
172 | u32 seq, issue_seq, mseq, gen; | 172 | u32 seq, issue_seq, mseq; |
173 | u32 cap_gen; /* active/stale cycle */ | ||
174 | u32 recon_gen; /* mds restart reconnect cycle */ | ||
173 | unsigned long last_used; | 175 | unsigned long last_used; |
174 | struct list_head caps_item; | 176 | struct list_head caps_item; |
175 | }; | 177 | }; |