diff options
author | Greg Farnum <gregf@hq.newdream.net> | 2010-06-22 18:58:01 -0400 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2010-08-01 23:11:41 -0400 |
commit | e55b71f802fd448a79275ba7b263fe1a8639be5f (patch) | |
tree | fa17d2b0fbf1bc5e76a5e9ddac6c19a6bbe8d2bc | |
parent | 2bc50259fa0aa1868f8b2ba1d374406cb3c57f72 (diff) |
ceph: handle ESTALE properly; on receipt send to authority if it wasn't
Signed-off-by: Greg Farnum <gregf@hq.newdream.net>
Signed-off-by: Sage Weil <sage@newdream.net>
-rw-r--r-- | fs/ceph/mds_client.c | 41 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 2 |
2 files changed, 35 insertions, 8 deletions
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index a546e0ddb8e3..34d215ff4c82 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -1628,6 +1628,15 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, | |||
1628 | 1628 | ||
1629 | req->r_mds = mds; | 1629 | req->r_mds = mds; |
1630 | req->r_attempts++; | 1630 | req->r_attempts++; |
1631 | if (req->r_inode) { | ||
1632 | struct ceph_cap *cap = | ||
1633 | ceph_get_cap_for_mds(ceph_inode(req->r_inode), mds); | ||
1634 | |||
1635 | if (cap) | ||
1636 | req->r_sent_on_mseq = cap->mseq; | ||
1637 | else | ||
1638 | req->r_sent_on_mseq = -1; | ||
1639 | } | ||
1631 | dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req, | 1640 | dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req, |
1632 | req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts); | 1641 | req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts); |
1633 | 1642 | ||
@@ -1962,21 +1971,39 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
1962 | result = le32_to_cpu(head->result); | 1971 | result = le32_to_cpu(head->result); |
1963 | 1972 | ||
1964 | /* | 1973 | /* |
1965 | * Tolerate 2 consecutive ESTALEs from the same mds. | 1974 | * Handle an ESTALE |
1966 | * FIXME: we should be looking at the cap migrate_seq. | 1975 | * if we're not talking to the authority, send to them |
1976 | * if the authority has changed while we weren't looking, | ||
1977 | * send to new authority | ||
1978 | * Otherwise we just have to return an ESTALE | ||
1967 | */ | 1979 | */ |
1968 | if (result == -ESTALE) { | 1980 | if (result == -ESTALE) { |
1969 | req->r_direct_mode = USE_AUTH_MDS; | 1981 | dout("got ESTALE on request %llu", req->r_tid); |
1970 | req->r_num_stale++; | 1982 | if (!req->r_inode) ; //do nothing; not an authority problem |
1971 | if (req->r_num_stale <= 2) { | 1983 | else if (req->r_direct_mode != USE_AUTH_MDS) { |
1984 | dout("not using auth, setting for that now"); | ||
1985 | req->r_direct_mode = USE_AUTH_MDS; | ||
1972 | __do_request(mdsc, req); | 1986 | __do_request(mdsc, req); |
1973 | mutex_unlock(&mdsc->mutex); | 1987 | mutex_unlock(&mdsc->mutex); |
1974 | goto out; | 1988 | goto out; |
1989 | } else { | ||
1990 | struct ceph_inode_info *ci = ceph_inode(req->r_inode); | ||
1991 | struct ceph_cap *cap = | ||
1992 | ceph_get_cap_for_mds(ci, req->r_mds);; | ||
1993 | |||
1994 | dout("already using auth"); | ||
1995 | if ((!cap || cap != ci->i_auth_cap) || | ||
1996 | (cap->mseq != req->r_sent_on_mseq)) { | ||
1997 | dout("but cap changed, so resending"); | ||
1998 | __do_request(mdsc, req); | ||
1999 | mutex_unlock(&mdsc->mutex); | ||
2000 | goto out; | ||
2001 | } | ||
1975 | } | 2002 | } |
1976 | } else { | 2003 | dout("have to return ESTALE on request %llu", req->r_tid); |
1977 | req->r_num_stale = 0; | ||
1978 | } | 2004 | } |
1979 | 2005 | ||
2006 | |||
1980 | if (head->safe) { | 2007 | if (head->safe) { |
1981 | req->r_got_safe = true; | 2008 | req->r_got_safe = true; |
1982 | __unregister_request(mdsc, req); | 2009 | __unregister_request(mdsc, req); |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index c86be30e8707..ab7e89f5e344 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -208,8 +208,8 @@ struct ceph_mds_request { | |||
208 | 208 | ||
209 | int r_attempts; /* resend attempts */ | 209 | int r_attempts; /* resend attempts */ |
210 | int r_num_fwd; /* number of forward attempts */ | 210 | int r_num_fwd; /* number of forward attempts */ |
211 | int r_num_stale; | ||
212 | int r_resend_mds; /* mds to resend to next, if any*/ | 211 | int r_resend_mds; /* mds to resend to next, if any*/ |
212 | u32 r_sent_on_mseq; /* cap mseq request was sent at*/ | ||
213 | 213 | ||
214 | struct kref r_kref; | 214 | struct kref r_kref; |
215 | struct list_head r_wait; | 215 | struct list_head r_wait; |