aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-11-26 21:02:46 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-11-26 21:02:46 -0500
commit4f9e5df211a8591b3fb5c154ecc7ec5d4ecd6b79 (patch)
treeba4e3980540d5bf082bc38f0107b628b2b726a25 /fs/ceph
parent0e4b0743bbe5807535ba1b0389281f9a4c1b2bb7 (diff)
parentff638b7df5a9264024a6448bdfde2b2bf5d1994a (diff)
Merge branch 'for-linus-bugs' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull ceph bug-fixes from Sage Weil: "These include a couple fixes to the new fscache code that went in during the last cycle (which will need to go stable@ shortly as well), a couple client-side directory fragmentation fixes, a fix for a race in the cap release queuing path, and a couple race fixes in the request abort and resend code. Obviously some of this could have gone into 3.12 final, but I preferred to overtest rather than send things in for a late -rc, and then my travel schedule intervened" * 'for-linus-bugs' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: ceph: allocate non-zero page to fscache in readpage() ceph: wake up 'safe' waiters when unregistering request ceph: cleanup aborted requests when re-sending requests. ceph: handle race between cap reconnect and cap release ceph: set caps count after composing cap reconnect message ceph: queue cap release in __ceph_remove_cap() ceph: handle frag mismatch between readdir request and reply ceph: remove outdated frag information ceph: hung on ceph fscache invalidate in some cases
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/addr.c2
-rw-r--r--fs/ceph/cache.c3
-rw-r--r--fs/ceph/caps.c27
-rw-r--r--fs/ceph/dir.c11
-rw-r--r--fs/ceph/inode.c49
-rw-r--r--fs/ceph/mds_client.c61
-rw-r--r--fs/ceph/mds_client.h1
-rw-r--r--fs/ceph/super.h8
8 files changed, 121 insertions, 41 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 6df8bd481425..1e561c059539 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -216,7 +216,7 @@ static int readpage_nounlock(struct file *filp, struct page *page)
216 } 216 }
217 SetPageUptodate(page); 217 SetPageUptodate(page);
218 218
219 if (err == 0) 219 if (err >= 0)
220 ceph_readpage_to_fscache(inode, page); 220 ceph_readpage_to_fscache(inode, page);
221 221
222out: 222out:
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index 7db2e6ca4b8f..8c44fdd4e1c3 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -324,6 +324,9 @@ void ceph_invalidate_fscache_page(struct inode* inode, struct page *page)
324{ 324{
325 struct ceph_inode_info *ci = ceph_inode(inode); 325 struct ceph_inode_info *ci = ceph_inode(inode);
326 326
327 if (!PageFsCache(page))
328 return;
329
327 fscache_wait_on_page_write(ci->fscache, page); 330 fscache_wait_on_page_write(ci->fscache, page);
328 fscache_uncache_page(ci->fscache, page); 331 fscache_uncache_page(ci->fscache, page);
329} 332}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 13976c33332e..3c0a4bd74996 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -897,7 +897,7 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci)
897 * caller should hold i_ceph_lock. 897 * caller should hold i_ceph_lock.
898 * caller will not hold session s_mutex if called from destroy_inode. 898 * caller will not hold session s_mutex if called from destroy_inode.
899 */ 899 */
900void __ceph_remove_cap(struct ceph_cap *cap) 900void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
901{ 901{
902 struct ceph_mds_session *session = cap->session; 902 struct ceph_mds_session *session = cap->session;
903 struct ceph_inode_info *ci = cap->ci; 903 struct ceph_inode_info *ci = cap->ci;
@@ -909,6 +909,16 @@ void __ceph_remove_cap(struct ceph_cap *cap)
909 909
910 /* remove from session list */ 910 /* remove from session list */
911 spin_lock(&session->s_cap_lock); 911 spin_lock(&session->s_cap_lock);
912 /*
913 * s_cap_reconnect is protected by s_cap_lock. no one changes
914 * s_cap_gen while session is in the reconnect state.
915 */
916 if (queue_release &&
917 (!session->s_cap_reconnect ||
918 cap->cap_gen == session->s_cap_gen))
919 __queue_cap_release(session, ci->i_vino.ino, cap->cap_id,
920 cap->mseq, cap->issue_seq);
921
912 if (session->s_cap_iterator == cap) { 922 if (session->s_cap_iterator == cap) {
913 /* not yet, we are iterating over this very cap */ 923 /* not yet, we are iterating over this very cap */
914 dout("__ceph_remove_cap delaying %p removal from session %p\n", 924 dout("__ceph_remove_cap delaying %p removal from session %p\n",
@@ -1023,7 +1033,6 @@ void __queue_cap_release(struct ceph_mds_session *session,
1023 struct ceph_mds_cap_release *head; 1033 struct ceph_mds_cap_release *head;
1024 struct ceph_mds_cap_item *item; 1034 struct ceph_mds_cap_item *item;
1025 1035
1026 spin_lock(&session->s_cap_lock);
1027 BUG_ON(!session->s_num_cap_releases); 1036 BUG_ON(!session->s_num_cap_releases);
1028 msg = list_first_entry(&session->s_cap_releases, 1037 msg = list_first_entry(&session->s_cap_releases,
1029 struct ceph_msg, list_head); 1038 struct ceph_msg, list_head);
@@ -1052,7 +1061,6 @@ void __queue_cap_release(struct ceph_mds_session *session,
1052 (int)CEPH_CAPS_PER_RELEASE, 1061 (int)CEPH_CAPS_PER_RELEASE,
1053 (int)msg->front.iov_len); 1062 (int)msg->front.iov_len);
1054 } 1063 }
1055 spin_unlock(&session->s_cap_lock);
1056} 1064}
1057 1065
1058/* 1066/*
@@ -1067,12 +1075,8 @@ void ceph_queue_caps_release(struct inode *inode)
1067 p = rb_first(&ci->i_caps); 1075 p = rb_first(&ci->i_caps);
1068 while (p) { 1076 while (p) {
1069 struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node); 1077 struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
1070 struct ceph_mds_session *session = cap->session;
1071
1072 __queue_cap_release(session, ceph_ino(inode), cap->cap_id,
1073 cap->mseq, cap->issue_seq);
1074 p = rb_next(p); 1078 p = rb_next(p);
1075 __ceph_remove_cap(cap); 1079 __ceph_remove_cap(cap, true);
1076 } 1080 }
1077} 1081}
1078 1082
@@ -2791,7 +2795,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
2791 } 2795 }
2792 spin_unlock(&mdsc->cap_dirty_lock); 2796 spin_unlock(&mdsc->cap_dirty_lock);
2793 } 2797 }
2794 __ceph_remove_cap(cap); 2798 __ceph_remove_cap(cap, false);
2795 } 2799 }
2796 /* else, we already released it */ 2800 /* else, we already released it */
2797 2801
@@ -2931,9 +2935,12 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2931 if (!inode) { 2935 if (!inode) {
2932 dout(" i don't have ino %llx\n", vino.ino); 2936 dout(" i don't have ino %llx\n", vino.ino);
2933 2937
2934 if (op == CEPH_CAP_OP_IMPORT) 2938 if (op == CEPH_CAP_OP_IMPORT) {
2939 spin_lock(&session->s_cap_lock);
2935 __queue_cap_release(session, vino.ino, cap_id, 2940 __queue_cap_release(session, vino.ino, cap_id,
2936 mseq, seq); 2941 mseq, seq);
2942 spin_unlock(&session->s_cap_lock);
2943 }
2937 goto flush_cap_releases; 2944 goto flush_cap_releases;
2938 } 2945 }
2939 2946
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 868b61d56cac..2a0bcaeb189a 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -352,8 +352,18 @@ more:
352 } 352 }
353 353
354 /* note next offset and last dentry name */ 354 /* note next offset and last dentry name */
355 rinfo = &req->r_reply_info;
356 if (le32_to_cpu(rinfo->dir_dir->frag) != frag) {
357 frag = le32_to_cpu(rinfo->dir_dir->frag);
358 if (ceph_frag_is_leftmost(frag))
359 fi->next_offset = 2;
360 else
361 fi->next_offset = 0;
362 off = fi->next_offset;
363 }
355 fi->offset = fi->next_offset; 364 fi->offset = fi->next_offset;
356 fi->last_readdir = req; 365 fi->last_readdir = req;
366 fi->frag = frag;
357 367
358 if (req->r_reply_info.dir_end) { 368 if (req->r_reply_info.dir_end) {
359 kfree(fi->last_name); 369 kfree(fi->last_name);
@@ -363,7 +373,6 @@ more:
363 else 373 else
364 fi->next_offset = 0; 374 fi->next_offset = 0;
365 } else { 375 } else {
366 rinfo = &req->r_reply_info;
367 err = note_last_dentry(fi, 376 err = note_last_dentry(fi,
368 rinfo->dir_dname[rinfo->dir_nr-1], 377 rinfo->dir_dname[rinfo->dir_nr-1],
369 rinfo->dir_dname_len[rinfo->dir_nr-1]); 378 rinfo->dir_dname_len[rinfo->dir_nr-1]);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 8549a48115f7..9a8e396aed89 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -577,6 +577,8 @@ static int fill_inode(struct inode *inode,
577 int issued = 0, implemented; 577 int issued = 0, implemented;
578 struct timespec mtime, atime, ctime; 578 struct timespec mtime, atime, ctime;
579 u32 nsplits; 579 u32 nsplits;
580 struct ceph_inode_frag *frag;
581 struct rb_node *rb_node;
580 struct ceph_buffer *xattr_blob = NULL; 582 struct ceph_buffer *xattr_blob = NULL;
581 int err = 0; 583 int err = 0;
582 int queue_trunc = 0; 584 int queue_trunc = 0;
@@ -751,15 +753,38 @@ no_change:
751 /* FIXME: move me up, if/when version reflects fragtree changes */ 753 /* FIXME: move me up, if/when version reflects fragtree changes */
752 nsplits = le32_to_cpu(info->fragtree.nsplits); 754 nsplits = le32_to_cpu(info->fragtree.nsplits);
753 mutex_lock(&ci->i_fragtree_mutex); 755 mutex_lock(&ci->i_fragtree_mutex);
756 rb_node = rb_first(&ci->i_fragtree);
754 for (i = 0; i < nsplits; i++) { 757 for (i = 0; i < nsplits; i++) {
755 u32 id = le32_to_cpu(info->fragtree.splits[i].frag); 758 u32 id = le32_to_cpu(info->fragtree.splits[i].frag);
756 struct ceph_inode_frag *frag = __get_or_create_frag(ci, id); 759 frag = NULL;
757 760 while (rb_node) {
758 if (IS_ERR(frag)) 761 frag = rb_entry(rb_node, struct ceph_inode_frag, node);
759 continue; 762 if (ceph_frag_compare(frag->frag, id) >= 0) {
763 if (frag->frag != id)
764 frag = NULL;
765 else
766 rb_node = rb_next(rb_node);
767 break;
768 }
769 rb_node = rb_next(rb_node);
770 rb_erase(&frag->node, &ci->i_fragtree);
771 kfree(frag);
772 frag = NULL;
773 }
774 if (!frag) {
775 frag = __get_or_create_frag(ci, id);
776 if (IS_ERR(frag))
777 continue;
778 }
760 frag->split_by = le32_to_cpu(info->fragtree.splits[i].by); 779 frag->split_by = le32_to_cpu(info->fragtree.splits[i].by);
761 dout(" frag %x split by %d\n", frag->frag, frag->split_by); 780 dout(" frag %x split by %d\n", frag->frag, frag->split_by);
762 } 781 }
782 while (rb_node) {
783 frag = rb_entry(rb_node, struct ceph_inode_frag, node);
784 rb_node = rb_next(rb_node);
785 rb_erase(&frag->node, &ci->i_fragtree);
786 kfree(frag);
787 }
763 mutex_unlock(&ci->i_fragtree_mutex); 788 mutex_unlock(&ci->i_fragtree_mutex);
764 789
765 /* were we issued a capability? */ 790 /* were we issued a capability? */
@@ -1250,8 +1275,20 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
1250 int err = 0, i; 1275 int err = 0, i;
1251 struct inode *snapdir = NULL; 1276 struct inode *snapdir = NULL;
1252 struct ceph_mds_request_head *rhead = req->r_request->front.iov_base; 1277 struct ceph_mds_request_head *rhead = req->r_request->front.iov_base;
1253 u64 frag = le32_to_cpu(rhead->args.readdir.frag);
1254 struct ceph_dentry_info *di; 1278 struct ceph_dentry_info *di;
1279 u64 r_readdir_offset = req->r_readdir_offset;
1280 u32 frag = le32_to_cpu(rhead->args.readdir.frag);
1281
1282 if (rinfo->dir_dir &&
1283 le32_to_cpu(rinfo->dir_dir->frag) != frag) {
1284 dout("readdir_prepopulate got new frag %x -> %x\n",
1285 frag, le32_to_cpu(rinfo->dir_dir->frag));
1286 frag = le32_to_cpu(rinfo->dir_dir->frag);
1287 if (ceph_frag_is_leftmost(frag))
1288 r_readdir_offset = 2;
1289 else
1290 r_readdir_offset = 0;
1291 }
1255 1292
1256 if (req->r_aborted) 1293 if (req->r_aborted)
1257 return readdir_prepopulate_inodes_only(req, session); 1294 return readdir_prepopulate_inodes_only(req, session);
@@ -1315,7 +1352,7 @@ retry_lookup:
1315 } 1352 }
1316 1353
1317 di = dn->d_fsdata; 1354 di = dn->d_fsdata;
1318 di->offset = ceph_make_fpos(frag, i + req->r_readdir_offset); 1355 di->offset = ceph_make_fpos(frag, i + r_readdir_offset);
1319 1356
1320 /* inode */ 1357 /* inode */
1321 if (dn->d_inode) { 1358 if (dn->d_inode) {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index b7bda5d9611d..d90861f45210 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -43,6 +43,7 @@
43 */ 43 */
44 44
45struct ceph_reconnect_state { 45struct ceph_reconnect_state {
46 int nr_caps;
46 struct ceph_pagelist *pagelist; 47 struct ceph_pagelist *pagelist;
47 bool flock; 48 bool flock;
48}; 49};
@@ -443,6 +444,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
443 INIT_LIST_HEAD(&s->s_waiting); 444 INIT_LIST_HEAD(&s->s_waiting);
444 INIT_LIST_HEAD(&s->s_unsafe); 445 INIT_LIST_HEAD(&s->s_unsafe);
445 s->s_num_cap_releases = 0; 446 s->s_num_cap_releases = 0;
447 s->s_cap_reconnect = 0;
446 s->s_cap_iterator = NULL; 448 s->s_cap_iterator = NULL;
447 INIT_LIST_HEAD(&s->s_cap_releases); 449 INIT_LIST_HEAD(&s->s_cap_releases);
448 INIT_LIST_HEAD(&s->s_cap_releases_done); 450 INIT_LIST_HEAD(&s->s_cap_releases_done);
@@ -642,6 +644,8 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
642 req->r_unsafe_dir = NULL; 644 req->r_unsafe_dir = NULL;
643 } 645 }
644 646
647 complete_all(&req->r_safe_completion);
648
645 ceph_mdsc_put_request(req); 649 ceph_mdsc_put_request(req);
646} 650}
647 651
@@ -986,7 +990,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
986 dout("removing cap %p, ci is %p, inode is %p\n", 990 dout("removing cap %p, ci is %p, inode is %p\n",
987 cap, ci, &ci->vfs_inode); 991 cap, ci, &ci->vfs_inode);
988 spin_lock(&ci->i_ceph_lock); 992 spin_lock(&ci->i_ceph_lock);
989 __ceph_remove_cap(cap); 993 __ceph_remove_cap(cap, false);
990 if (!__ceph_is_any_real_caps(ci)) { 994 if (!__ceph_is_any_real_caps(ci)) {
991 struct ceph_mds_client *mdsc = 995 struct ceph_mds_client *mdsc =
992 ceph_sb_to_client(inode->i_sb)->mdsc; 996 ceph_sb_to_client(inode->i_sb)->mdsc;
@@ -1231,9 +1235,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
1231 session->s_trim_caps--; 1235 session->s_trim_caps--;
1232 if (oissued) { 1236 if (oissued) {
1233 /* we aren't the only cap.. just remove us */ 1237 /* we aren't the only cap.. just remove us */
1234 __queue_cap_release(session, ceph_ino(inode), cap->cap_id, 1238 __ceph_remove_cap(cap, true);
1235 cap->mseq, cap->issue_seq);
1236 __ceph_remove_cap(cap);
1237 } else { 1239 } else {
1238 /* try to drop referring dentries */ 1240 /* try to drop referring dentries */
1239 spin_unlock(&ci->i_ceph_lock); 1241 spin_unlock(&ci->i_ceph_lock);
@@ -1416,7 +1418,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
1416 unsigned num; 1418 unsigned num;
1417 1419
1418 dout("discard_cap_releases mds%d\n", session->s_mds); 1420 dout("discard_cap_releases mds%d\n", session->s_mds);
1419 spin_lock(&session->s_cap_lock);
1420 1421
1421 /* zero out the in-progress message */ 1422 /* zero out the in-progress message */
1422 msg = list_first_entry(&session->s_cap_releases, 1423 msg = list_first_entry(&session->s_cap_releases,
@@ -1443,8 +1444,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc,
1443 msg->front.iov_len = sizeof(*head); 1444 msg->front.iov_len = sizeof(*head);
1444 list_add(&msg->list_head, &session->s_cap_releases); 1445 list_add(&msg->list_head, &session->s_cap_releases);
1445 } 1446 }
1446
1447 spin_unlock(&session->s_cap_lock);
1448} 1447}
1449 1448
1450/* 1449/*
@@ -1875,8 +1874,11 @@ static int __do_request(struct ceph_mds_client *mdsc,
1875 int mds = -1; 1874 int mds = -1;
1876 int err = -EAGAIN; 1875 int err = -EAGAIN;
1877 1876
1878 if (req->r_err || req->r_got_result) 1877 if (req->r_err || req->r_got_result) {
1878 if (req->r_aborted)
1879 __unregister_request(mdsc, req);
1879 goto out; 1880 goto out;
1881 }
1880 1882
1881 if (req->r_timeout && 1883 if (req->r_timeout &&
1882 time_after_eq(jiffies, req->r_started + req->r_timeout)) { 1884 time_after_eq(jiffies, req->r_started + req->r_timeout)) {
@@ -2186,7 +2188,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
2186 if (head->safe) { 2188 if (head->safe) {
2187 req->r_got_safe = true; 2189 req->r_got_safe = true;
2188 __unregister_request(mdsc, req); 2190 __unregister_request(mdsc, req);
2189 complete_all(&req->r_safe_completion);
2190 2191
2191 if (req->r_got_unsafe) { 2192 if (req->r_got_unsafe) {
2192 /* 2193 /*
@@ -2238,8 +2239,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
2238 err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); 2239 err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
2239 if (err == 0) { 2240 if (err == 0) {
2240 if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR || 2241 if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
2241 req->r_op == CEPH_MDS_OP_LSSNAP) && 2242 req->r_op == CEPH_MDS_OP_LSSNAP))
2242 rinfo->dir_nr)
2243 ceph_readdir_prepopulate(req, req->r_session); 2243 ceph_readdir_prepopulate(req, req->r_session);
2244 ceph_unreserve_caps(mdsc, &req->r_caps_reservation); 2244 ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
2245 } 2245 }
@@ -2490,6 +2490,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
2490 cap->seq = 0; /* reset cap seq */ 2490 cap->seq = 0; /* reset cap seq */
2491 cap->issue_seq = 0; /* and issue_seq */ 2491 cap->issue_seq = 0; /* and issue_seq */
2492 cap->mseq = 0; /* and migrate_seq */ 2492 cap->mseq = 0; /* and migrate_seq */
2493 cap->cap_gen = cap->session->s_cap_gen;
2493 2494
2494 if (recon_state->flock) { 2495 if (recon_state->flock) {
2495 rec.v2.cap_id = cpu_to_le64(cap->cap_id); 2496 rec.v2.cap_id = cpu_to_le64(cap->cap_id);
@@ -2552,6 +2553,8 @@ encode_again:
2552 } else { 2553 } else {
2553 err = ceph_pagelist_append(pagelist, &rec, reclen); 2554 err = ceph_pagelist_append(pagelist, &rec, reclen);
2554 } 2555 }
2556
2557 recon_state->nr_caps++;
2555out_free: 2558out_free:
2556 kfree(path); 2559 kfree(path);
2557out_dput: 2560out_dput:
@@ -2579,6 +2582,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
2579 struct rb_node *p; 2582 struct rb_node *p;
2580 int mds = session->s_mds; 2583 int mds = session->s_mds;
2581 int err = -ENOMEM; 2584 int err = -ENOMEM;
2585 int s_nr_caps;
2582 struct ceph_pagelist *pagelist; 2586 struct ceph_pagelist *pagelist;
2583 struct ceph_reconnect_state recon_state; 2587 struct ceph_reconnect_state recon_state;
2584 2588
@@ -2610,20 +2614,38 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
2610 dout("session %p state %s\n", session, 2614 dout("session %p state %s\n", session,
2611 session_state_name(session->s_state)); 2615 session_state_name(session->s_state));
2612 2616
2617 spin_lock(&session->s_gen_ttl_lock);
2618 session->s_cap_gen++;
2619 spin_unlock(&session->s_gen_ttl_lock);
2620
2621 spin_lock(&session->s_cap_lock);
2622 /*
2623 * notify __ceph_remove_cap() that we are composing cap reconnect.
2624 * If a cap get released before being added to the cap reconnect,
2625 * __ceph_remove_cap() should skip queuing cap release.
2626 */
2627 session->s_cap_reconnect = 1;
2613 /* drop old cap expires; we're about to reestablish that state */ 2628 /* drop old cap expires; we're about to reestablish that state */
2614 discard_cap_releases(mdsc, session); 2629 discard_cap_releases(mdsc, session);
2630 spin_unlock(&session->s_cap_lock);
2615 2631
2616 /* traverse this session's caps */ 2632 /* traverse this session's caps */
2617 err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps); 2633 s_nr_caps = session->s_nr_caps;
2634 err = ceph_pagelist_encode_32(pagelist, s_nr_caps);
2618 if (err) 2635 if (err)
2619 goto fail; 2636 goto fail;
2620 2637
2638 recon_state.nr_caps = 0;
2621 recon_state.pagelist = pagelist; 2639 recon_state.pagelist = pagelist;
2622 recon_state.flock = session->s_con.peer_features & CEPH_FEATURE_FLOCK; 2640 recon_state.flock = session->s_con.peer_features & CEPH_FEATURE_FLOCK;
2623 err = iterate_session_caps(session, encode_caps_cb, &recon_state); 2641 err = iterate_session_caps(session, encode_caps_cb, &recon_state);
2624 if (err < 0) 2642 if (err < 0)
2625 goto fail; 2643 goto fail;
2626 2644
2645 spin_lock(&session->s_cap_lock);
2646 session->s_cap_reconnect = 0;
2647 spin_unlock(&session->s_cap_lock);
2648
2627 /* 2649 /*
2628 * snaprealms. we provide mds with the ino, seq (version), and 2650 * snaprealms. we provide mds with the ino, seq (version), and
2629 * parent for all of our realms. If the mds has any newer info, 2651 * parent for all of our realms. If the mds has any newer info,
@@ -2646,11 +2668,18 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
2646 2668
2647 if (recon_state.flock) 2669 if (recon_state.flock)
2648 reply->hdr.version = cpu_to_le16(2); 2670 reply->hdr.version = cpu_to_le16(2);
2649 if (pagelist->length) { 2671
2650 /* set up outbound data if we have any */ 2672 /* raced with cap release? */
2651 reply->hdr.data_len = cpu_to_le32(pagelist->length); 2673 if (s_nr_caps != recon_state.nr_caps) {
2652 ceph_msg_data_add_pagelist(reply, pagelist); 2674 struct page *page = list_first_entry(&pagelist->head,
2675 struct page, lru);
2676 __le32 *addr = kmap_atomic(page);
2677 *addr = cpu_to_le32(recon_state.nr_caps);
2678 kunmap_atomic(addr);
2653 } 2679 }
2680
2681 reply->hdr.data_len = cpu_to_le32(pagelist->length);
2682 ceph_msg_data_add_pagelist(reply, pagelist);
2654 ceph_con_send(&session->s_con, reply); 2683 ceph_con_send(&session->s_con, reply);
2655 2684
2656 mutex_unlock(&session->s_mutex); 2685 mutex_unlock(&session->s_mutex);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index c2a19fbbe517..4c053d099ae4 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -132,6 +132,7 @@ struct ceph_mds_session {
132 struct list_head s_caps; /* all caps issued by this session */ 132 struct list_head s_caps; /* all caps issued by this session */
133 int s_nr_caps, s_trim_caps; 133 int s_nr_caps, s_trim_caps;
134 int s_num_cap_releases; 134 int s_num_cap_releases;
135 int s_cap_reconnect;
135 struct list_head s_cap_releases; /* waiting cap_release messages */ 136 struct list_head s_cap_releases; /* waiting cap_release messages */
136 struct list_head s_cap_releases_done; /* ready to send */ 137 struct list_head s_cap_releases_done; /* ready to send */
137 struct ceph_cap *s_cap_iterator; 138 struct ceph_cap *s_cap_iterator;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 6014b0a3c405..ef4ac38bb614 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -741,13 +741,7 @@ extern int ceph_add_cap(struct inode *inode,
741 int fmode, unsigned issued, unsigned wanted, 741 int fmode, unsigned issued, unsigned wanted,
742 unsigned cap, unsigned seq, u64 realmino, int flags, 742 unsigned cap, unsigned seq, u64 realmino, int flags,
743 struct ceph_cap_reservation *caps_reservation); 743 struct ceph_cap_reservation *caps_reservation);
744extern void __ceph_remove_cap(struct ceph_cap *cap); 744extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
745static inline void ceph_remove_cap(struct ceph_cap *cap)
746{
747 spin_lock(&cap->ci->i_ceph_lock);
748 __ceph_remove_cap(cap);
749 spin_unlock(&cap->ci->i_ceph_lock);
750}
751extern void ceph_put_cap(struct ceph_mds_client *mdsc, 745extern void ceph_put_cap(struct ceph_mds_client *mdsc,
752 struct ceph_cap *cap); 746 struct ceph_cap *cap);
753 747