diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-26 21:02:46 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-26 21:02:46 -0500 |
commit | 4f9e5df211a8591b3fb5c154ecc7ec5d4ecd6b79 (patch) | |
tree | ba4e3980540d5bf082bc38f0107b628b2b726a25 /fs/ceph | |
parent | 0e4b0743bbe5807535ba1b0389281f9a4c1b2bb7 (diff) | |
parent | ff638b7df5a9264024a6448bdfde2b2bf5d1994a (diff) |
Merge branch 'for-linus-bugs' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull ceph bug-fixes from Sage Weil:
"These include a couple fixes to the new fscache code that went in
during the last cycle (which will need to go stable@ shortly as well),
a couple client-side directory fragmentation fixes, a fix for a race
in the cap release queuing path, and a couple race fixes in the
request abort and resend code.
Obviously some of this could have gone into 3.12 final, but I
preferred to overtest rather than send things in for a late -rc, and
then my travel schedule intervened"
* 'for-linus-bugs' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
ceph: allocate non-zero page to fscache in readpage()
ceph: wake up 'safe' waiters when unregistering request
ceph: cleanup aborted requests when re-sending requests.
ceph: handle race between cap reconnect and cap release
ceph: set caps count after composing cap reconnect message
ceph: queue cap release in __ceph_remove_cap()
ceph: handle frag mismatch between readdir request and reply
ceph: remove outdated frag information
ceph: hung on ceph fscache invalidate in some cases
Diffstat (limited to 'fs/ceph')
-rw-r--r-- | fs/ceph/addr.c | 2 | ||||
-rw-r--r-- | fs/ceph/cache.c | 3 | ||||
-rw-r--r-- | fs/ceph/caps.c | 27 | ||||
-rw-r--r-- | fs/ceph/dir.c | 11 | ||||
-rw-r--r-- | fs/ceph/inode.c | 49 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 61 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 1 | ||||
-rw-r--r-- | fs/ceph/super.h | 8 |
8 files changed, 121 insertions, 41 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 6df8bd481425..1e561c059539 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -216,7 +216,7 @@ static int readpage_nounlock(struct file *filp, struct page *page) | |||
216 | } | 216 | } |
217 | SetPageUptodate(page); | 217 | SetPageUptodate(page); |
218 | 218 | ||
219 | if (err == 0) | 219 | if (err >= 0) |
220 | ceph_readpage_to_fscache(inode, page); | 220 | ceph_readpage_to_fscache(inode, page); |
221 | 221 | ||
222 | out: | 222 | out: |
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index 7db2e6ca4b8f..8c44fdd4e1c3 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c | |||
@@ -324,6 +324,9 @@ void ceph_invalidate_fscache_page(struct inode* inode, struct page *page) | |||
324 | { | 324 | { |
325 | struct ceph_inode_info *ci = ceph_inode(inode); | 325 | struct ceph_inode_info *ci = ceph_inode(inode); |
326 | 326 | ||
327 | if (!PageFsCache(page)) | ||
328 | return; | ||
329 | |||
327 | fscache_wait_on_page_write(ci->fscache, page); | 330 | fscache_wait_on_page_write(ci->fscache, page); |
328 | fscache_uncache_page(ci->fscache, page); | 331 | fscache_uncache_page(ci->fscache, page); |
329 | } | 332 | } |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 13976c33332e..3c0a4bd74996 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -897,7 +897,7 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci) | |||
897 | * caller should hold i_ceph_lock. | 897 | * caller should hold i_ceph_lock. |
898 | * caller will not hold session s_mutex if called from destroy_inode. | 898 | * caller will not hold session s_mutex if called from destroy_inode. |
899 | */ | 899 | */ |
900 | void __ceph_remove_cap(struct ceph_cap *cap) | 900 | void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) |
901 | { | 901 | { |
902 | struct ceph_mds_session *session = cap->session; | 902 | struct ceph_mds_session *session = cap->session; |
903 | struct ceph_inode_info *ci = cap->ci; | 903 | struct ceph_inode_info *ci = cap->ci; |
@@ -909,6 +909,16 @@ void __ceph_remove_cap(struct ceph_cap *cap) | |||
909 | 909 | ||
910 | /* remove from session list */ | 910 | /* remove from session list */ |
911 | spin_lock(&session->s_cap_lock); | 911 | spin_lock(&session->s_cap_lock); |
912 | /* | ||
913 | * s_cap_reconnect is protected by s_cap_lock. no one changes | ||
914 | * s_cap_gen while session is in the reconnect state. | ||
915 | */ | ||
916 | if (queue_release && | ||
917 | (!session->s_cap_reconnect || | ||
918 | cap->cap_gen == session->s_cap_gen)) | ||
919 | __queue_cap_release(session, ci->i_vino.ino, cap->cap_id, | ||
920 | cap->mseq, cap->issue_seq); | ||
921 | |||
912 | if (session->s_cap_iterator == cap) { | 922 | if (session->s_cap_iterator == cap) { |
913 | /* not yet, we are iterating over this very cap */ | 923 | /* not yet, we are iterating over this very cap */ |
914 | dout("__ceph_remove_cap delaying %p removal from session %p\n", | 924 | dout("__ceph_remove_cap delaying %p removal from session %p\n", |
@@ -1023,7 +1033,6 @@ void __queue_cap_release(struct ceph_mds_session *session, | |||
1023 | struct ceph_mds_cap_release *head; | 1033 | struct ceph_mds_cap_release *head; |
1024 | struct ceph_mds_cap_item *item; | 1034 | struct ceph_mds_cap_item *item; |
1025 | 1035 | ||
1026 | spin_lock(&session->s_cap_lock); | ||
1027 | BUG_ON(!session->s_num_cap_releases); | 1036 | BUG_ON(!session->s_num_cap_releases); |
1028 | msg = list_first_entry(&session->s_cap_releases, | 1037 | msg = list_first_entry(&session->s_cap_releases, |
1029 | struct ceph_msg, list_head); | 1038 | struct ceph_msg, list_head); |
@@ -1052,7 +1061,6 @@ void __queue_cap_release(struct ceph_mds_session *session, | |||
1052 | (int)CEPH_CAPS_PER_RELEASE, | 1061 | (int)CEPH_CAPS_PER_RELEASE, |
1053 | (int)msg->front.iov_len); | 1062 | (int)msg->front.iov_len); |
1054 | } | 1063 | } |
1055 | spin_unlock(&session->s_cap_lock); | ||
1056 | } | 1064 | } |
1057 | 1065 | ||
1058 | /* | 1066 | /* |
@@ -1067,12 +1075,8 @@ void ceph_queue_caps_release(struct inode *inode) | |||
1067 | p = rb_first(&ci->i_caps); | 1075 | p = rb_first(&ci->i_caps); |
1068 | while (p) { | 1076 | while (p) { |
1069 | struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node); | 1077 | struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node); |
1070 | struct ceph_mds_session *session = cap->session; | ||
1071 | |||
1072 | __queue_cap_release(session, ceph_ino(inode), cap->cap_id, | ||
1073 | cap->mseq, cap->issue_seq); | ||
1074 | p = rb_next(p); | 1078 | p = rb_next(p); |
1075 | __ceph_remove_cap(cap); | 1079 | __ceph_remove_cap(cap, true); |
1076 | } | 1080 | } |
1077 | } | 1081 | } |
1078 | 1082 | ||
@@ -2791,7 +2795,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, | |||
2791 | } | 2795 | } |
2792 | spin_unlock(&mdsc->cap_dirty_lock); | 2796 | spin_unlock(&mdsc->cap_dirty_lock); |
2793 | } | 2797 | } |
2794 | __ceph_remove_cap(cap); | 2798 | __ceph_remove_cap(cap, false); |
2795 | } | 2799 | } |
2796 | /* else, we already released it */ | 2800 | /* else, we already released it */ |
2797 | 2801 | ||
@@ -2931,9 +2935,12 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2931 | if (!inode) { | 2935 | if (!inode) { |
2932 | dout(" i don't have ino %llx\n", vino.ino); | 2936 | dout(" i don't have ino %llx\n", vino.ino); |
2933 | 2937 | ||
2934 | if (op == CEPH_CAP_OP_IMPORT) | 2938 | if (op == CEPH_CAP_OP_IMPORT) { |
2939 | spin_lock(&session->s_cap_lock); | ||
2935 | __queue_cap_release(session, vino.ino, cap_id, | 2940 | __queue_cap_release(session, vino.ino, cap_id, |
2936 | mseq, seq); | 2941 | mseq, seq); |
2942 | spin_unlock(&session->s_cap_lock); | ||
2943 | } | ||
2937 | goto flush_cap_releases; | 2944 | goto flush_cap_releases; |
2938 | } | 2945 | } |
2939 | 2946 | ||
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 868b61d56cac..2a0bcaeb189a 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -352,8 +352,18 @@ more: | |||
352 | } | 352 | } |
353 | 353 | ||
354 | /* note next offset and last dentry name */ | 354 | /* note next offset and last dentry name */ |
355 | rinfo = &req->r_reply_info; | ||
356 | if (le32_to_cpu(rinfo->dir_dir->frag) != frag) { | ||
357 | frag = le32_to_cpu(rinfo->dir_dir->frag); | ||
358 | if (ceph_frag_is_leftmost(frag)) | ||
359 | fi->next_offset = 2; | ||
360 | else | ||
361 | fi->next_offset = 0; | ||
362 | off = fi->next_offset; | ||
363 | } | ||
355 | fi->offset = fi->next_offset; | 364 | fi->offset = fi->next_offset; |
356 | fi->last_readdir = req; | 365 | fi->last_readdir = req; |
366 | fi->frag = frag; | ||
357 | 367 | ||
358 | if (req->r_reply_info.dir_end) { | 368 | if (req->r_reply_info.dir_end) { |
359 | kfree(fi->last_name); | 369 | kfree(fi->last_name); |
@@ -363,7 +373,6 @@ more: | |||
363 | else | 373 | else |
364 | fi->next_offset = 0; | 374 | fi->next_offset = 0; |
365 | } else { | 375 | } else { |
366 | rinfo = &req->r_reply_info; | ||
367 | err = note_last_dentry(fi, | 376 | err = note_last_dentry(fi, |
368 | rinfo->dir_dname[rinfo->dir_nr-1], | 377 | rinfo->dir_dname[rinfo->dir_nr-1], |
369 | rinfo->dir_dname_len[rinfo->dir_nr-1]); | 378 | rinfo->dir_dname_len[rinfo->dir_nr-1]); |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 8549a48115f7..9a8e396aed89 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -577,6 +577,8 @@ static int fill_inode(struct inode *inode, | |||
577 | int issued = 0, implemented; | 577 | int issued = 0, implemented; |
578 | struct timespec mtime, atime, ctime; | 578 | struct timespec mtime, atime, ctime; |
579 | u32 nsplits; | 579 | u32 nsplits; |
580 | struct ceph_inode_frag *frag; | ||
581 | struct rb_node *rb_node; | ||
580 | struct ceph_buffer *xattr_blob = NULL; | 582 | struct ceph_buffer *xattr_blob = NULL; |
581 | int err = 0; | 583 | int err = 0; |
582 | int queue_trunc = 0; | 584 | int queue_trunc = 0; |
@@ -751,15 +753,38 @@ no_change: | |||
751 | /* FIXME: move me up, if/when version reflects fragtree changes */ | 753 | /* FIXME: move me up, if/when version reflects fragtree changes */ |
752 | nsplits = le32_to_cpu(info->fragtree.nsplits); | 754 | nsplits = le32_to_cpu(info->fragtree.nsplits); |
753 | mutex_lock(&ci->i_fragtree_mutex); | 755 | mutex_lock(&ci->i_fragtree_mutex); |
756 | rb_node = rb_first(&ci->i_fragtree); | ||
754 | for (i = 0; i < nsplits; i++) { | 757 | for (i = 0; i < nsplits; i++) { |
755 | u32 id = le32_to_cpu(info->fragtree.splits[i].frag); | 758 | u32 id = le32_to_cpu(info->fragtree.splits[i].frag); |
756 | struct ceph_inode_frag *frag = __get_or_create_frag(ci, id); | 759 | frag = NULL; |
757 | 760 | while (rb_node) { | |
758 | if (IS_ERR(frag)) | 761 | frag = rb_entry(rb_node, struct ceph_inode_frag, node); |
759 | continue; | 762 | if (ceph_frag_compare(frag->frag, id) >= 0) { |
763 | if (frag->frag != id) | ||
764 | frag = NULL; | ||
765 | else | ||
766 | rb_node = rb_next(rb_node); | ||
767 | break; | ||
768 | } | ||
769 | rb_node = rb_next(rb_node); | ||
770 | rb_erase(&frag->node, &ci->i_fragtree); | ||
771 | kfree(frag); | ||
772 | frag = NULL; | ||
773 | } | ||
774 | if (!frag) { | ||
775 | frag = __get_or_create_frag(ci, id); | ||
776 | if (IS_ERR(frag)) | ||
777 | continue; | ||
778 | } | ||
760 | frag->split_by = le32_to_cpu(info->fragtree.splits[i].by); | 779 | frag->split_by = le32_to_cpu(info->fragtree.splits[i].by); |
761 | dout(" frag %x split by %d\n", frag->frag, frag->split_by); | 780 | dout(" frag %x split by %d\n", frag->frag, frag->split_by); |
762 | } | 781 | } |
782 | while (rb_node) { | ||
783 | frag = rb_entry(rb_node, struct ceph_inode_frag, node); | ||
784 | rb_node = rb_next(rb_node); | ||
785 | rb_erase(&frag->node, &ci->i_fragtree); | ||
786 | kfree(frag); | ||
787 | } | ||
763 | mutex_unlock(&ci->i_fragtree_mutex); | 788 | mutex_unlock(&ci->i_fragtree_mutex); |
764 | 789 | ||
765 | /* were we issued a capability? */ | 790 | /* were we issued a capability? */ |
@@ -1250,8 +1275,20 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, | |||
1250 | int err = 0, i; | 1275 | int err = 0, i; |
1251 | struct inode *snapdir = NULL; | 1276 | struct inode *snapdir = NULL; |
1252 | struct ceph_mds_request_head *rhead = req->r_request->front.iov_base; | 1277 | struct ceph_mds_request_head *rhead = req->r_request->front.iov_base; |
1253 | u64 frag = le32_to_cpu(rhead->args.readdir.frag); | ||
1254 | struct ceph_dentry_info *di; | 1278 | struct ceph_dentry_info *di; |
1279 | u64 r_readdir_offset = req->r_readdir_offset; | ||
1280 | u32 frag = le32_to_cpu(rhead->args.readdir.frag); | ||
1281 | |||
1282 | if (rinfo->dir_dir && | ||
1283 | le32_to_cpu(rinfo->dir_dir->frag) != frag) { | ||
1284 | dout("readdir_prepopulate got new frag %x -> %x\n", | ||
1285 | frag, le32_to_cpu(rinfo->dir_dir->frag)); | ||
1286 | frag = le32_to_cpu(rinfo->dir_dir->frag); | ||
1287 | if (ceph_frag_is_leftmost(frag)) | ||
1288 | r_readdir_offset = 2; | ||
1289 | else | ||
1290 | r_readdir_offset = 0; | ||
1291 | } | ||
1255 | 1292 | ||
1256 | if (req->r_aborted) | 1293 | if (req->r_aborted) |
1257 | return readdir_prepopulate_inodes_only(req, session); | 1294 | return readdir_prepopulate_inodes_only(req, session); |
@@ -1315,7 +1352,7 @@ retry_lookup: | |||
1315 | } | 1352 | } |
1316 | 1353 | ||
1317 | di = dn->d_fsdata; | 1354 | di = dn->d_fsdata; |
1318 | di->offset = ceph_make_fpos(frag, i + req->r_readdir_offset); | 1355 | di->offset = ceph_make_fpos(frag, i + r_readdir_offset); |
1319 | 1356 | ||
1320 | /* inode */ | 1357 | /* inode */ |
1321 | if (dn->d_inode) { | 1358 | if (dn->d_inode) { |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index b7bda5d9611d..d90861f45210 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -43,6 +43,7 @@ | |||
43 | */ | 43 | */ |
44 | 44 | ||
45 | struct ceph_reconnect_state { | 45 | struct ceph_reconnect_state { |
46 | int nr_caps; | ||
46 | struct ceph_pagelist *pagelist; | 47 | struct ceph_pagelist *pagelist; |
47 | bool flock; | 48 | bool flock; |
48 | }; | 49 | }; |
@@ -443,6 +444,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, | |||
443 | INIT_LIST_HEAD(&s->s_waiting); | 444 | INIT_LIST_HEAD(&s->s_waiting); |
444 | INIT_LIST_HEAD(&s->s_unsafe); | 445 | INIT_LIST_HEAD(&s->s_unsafe); |
445 | s->s_num_cap_releases = 0; | 446 | s->s_num_cap_releases = 0; |
447 | s->s_cap_reconnect = 0; | ||
446 | s->s_cap_iterator = NULL; | 448 | s->s_cap_iterator = NULL; |
447 | INIT_LIST_HEAD(&s->s_cap_releases); | 449 | INIT_LIST_HEAD(&s->s_cap_releases); |
448 | INIT_LIST_HEAD(&s->s_cap_releases_done); | 450 | INIT_LIST_HEAD(&s->s_cap_releases_done); |
@@ -642,6 +644,8 @@ static void __unregister_request(struct ceph_mds_client *mdsc, | |||
642 | req->r_unsafe_dir = NULL; | 644 | req->r_unsafe_dir = NULL; |
643 | } | 645 | } |
644 | 646 | ||
647 | complete_all(&req->r_safe_completion); | ||
648 | |||
645 | ceph_mdsc_put_request(req); | 649 | ceph_mdsc_put_request(req); |
646 | } | 650 | } |
647 | 651 | ||
@@ -986,7 +990,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
986 | dout("removing cap %p, ci is %p, inode is %p\n", | 990 | dout("removing cap %p, ci is %p, inode is %p\n", |
987 | cap, ci, &ci->vfs_inode); | 991 | cap, ci, &ci->vfs_inode); |
988 | spin_lock(&ci->i_ceph_lock); | 992 | spin_lock(&ci->i_ceph_lock); |
989 | __ceph_remove_cap(cap); | 993 | __ceph_remove_cap(cap, false); |
990 | if (!__ceph_is_any_real_caps(ci)) { | 994 | if (!__ceph_is_any_real_caps(ci)) { |
991 | struct ceph_mds_client *mdsc = | 995 | struct ceph_mds_client *mdsc = |
992 | ceph_sb_to_client(inode->i_sb)->mdsc; | 996 | ceph_sb_to_client(inode->i_sb)->mdsc; |
@@ -1231,9 +1235,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) | |||
1231 | session->s_trim_caps--; | 1235 | session->s_trim_caps--; |
1232 | if (oissued) { | 1236 | if (oissued) { |
1233 | /* we aren't the only cap.. just remove us */ | 1237 | /* we aren't the only cap.. just remove us */ |
1234 | __queue_cap_release(session, ceph_ino(inode), cap->cap_id, | 1238 | __ceph_remove_cap(cap, true); |
1235 | cap->mseq, cap->issue_seq); | ||
1236 | __ceph_remove_cap(cap); | ||
1237 | } else { | 1239 | } else { |
1238 | /* try to drop referring dentries */ | 1240 | /* try to drop referring dentries */ |
1239 | spin_unlock(&ci->i_ceph_lock); | 1241 | spin_unlock(&ci->i_ceph_lock); |
@@ -1416,7 +1418,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc, | |||
1416 | unsigned num; | 1418 | unsigned num; |
1417 | 1419 | ||
1418 | dout("discard_cap_releases mds%d\n", session->s_mds); | 1420 | dout("discard_cap_releases mds%d\n", session->s_mds); |
1419 | spin_lock(&session->s_cap_lock); | ||
1420 | 1421 | ||
1421 | /* zero out the in-progress message */ | 1422 | /* zero out the in-progress message */ |
1422 | msg = list_first_entry(&session->s_cap_releases, | 1423 | msg = list_first_entry(&session->s_cap_releases, |
@@ -1443,8 +1444,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc, | |||
1443 | msg->front.iov_len = sizeof(*head); | 1444 | msg->front.iov_len = sizeof(*head); |
1444 | list_add(&msg->list_head, &session->s_cap_releases); | 1445 | list_add(&msg->list_head, &session->s_cap_releases); |
1445 | } | 1446 | } |
1446 | |||
1447 | spin_unlock(&session->s_cap_lock); | ||
1448 | } | 1447 | } |
1449 | 1448 | ||
1450 | /* | 1449 | /* |
@@ -1875,8 +1874,11 @@ static int __do_request(struct ceph_mds_client *mdsc, | |||
1875 | int mds = -1; | 1874 | int mds = -1; |
1876 | int err = -EAGAIN; | 1875 | int err = -EAGAIN; |
1877 | 1876 | ||
1878 | if (req->r_err || req->r_got_result) | 1877 | if (req->r_err || req->r_got_result) { |
1878 | if (req->r_aborted) | ||
1879 | __unregister_request(mdsc, req); | ||
1879 | goto out; | 1880 | goto out; |
1881 | } | ||
1880 | 1882 | ||
1881 | if (req->r_timeout && | 1883 | if (req->r_timeout && |
1882 | time_after_eq(jiffies, req->r_started + req->r_timeout)) { | 1884 | time_after_eq(jiffies, req->r_started + req->r_timeout)) { |
@@ -2186,7 +2188,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
2186 | if (head->safe) { | 2188 | if (head->safe) { |
2187 | req->r_got_safe = true; | 2189 | req->r_got_safe = true; |
2188 | __unregister_request(mdsc, req); | 2190 | __unregister_request(mdsc, req); |
2189 | complete_all(&req->r_safe_completion); | ||
2190 | 2191 | ||
2191 | if (req->r_got_unsafe) { | 2192 | if (req->r_got_unsafe) { |
2192 | /* | 2193 | /* |
@@ -2238,8 +2239,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
2238 | err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); | 2239 | err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); |
2239 | if (err == 0) { | 2240 | if (err == 0) { |
2240 | if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR || | 2241 | if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR || |
2241 | req->r_op == CEPH_MDS_OP_LSSNAP) && | 2242 | req->r_op == CEPH_MDS_OP_LSSNAP)) |
2242 | rinfo->dir_nr) | ||
2243 | ceph_readdir_prepopulate(req, req->r_session); | 2243 | ceph_readdir_prepopulate(req, req->r_session); |
2244 | ceph_unreserve_caps(mdsc, &req->r_caps_reservation); | 2244 | ceph_unreserve_caps(mdsc, &req->r_caps_reservation); |
2245 | } | 2245 | } |
@@ -2490,6 +2490,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
2490 | cap->seq = 0; /* reset cap seq */ | 2490 | cap->seq = 0; /* reset cap seq */ |
2491 | cap->issue_seq = 0; /* and issue_seq */ | 2491 | cap->issue_seq = 0; /* and issue_seq */ |
2492 | cap->mseq = 0; /* and migrate_seq */ | 2492 | cap->mseq = 0; /* and migrate_seq */ |
2493 | cap->cap_gen = cap->session->s_cap_gen; | ||
2493 | 2494 | ||
2494 | if (recon_state->flock) { | 2495 | if (recon_state->flock) { |
2495 | rec.v2.cap_id = cpu_to_le64(cap->cap_id); | 2496 | rec.v2.cap_id = cpu_to_le64(cap->cap_id); |
@@ -2552,6 +2553,8 @@ encode_again: | |||
2552 | } else { | 2553 | } else { |
2553 | err = ceph_pagelist_append(pagelist, &rec, reclen); | 2554 | err = ceph_pagelist_append(pagelist, &rec, reclen); |
2554 | } | 2555 | } |
2556 | |||
2557 | recon_state->nr_caps++; | ||
2555 | out_free: | 2558 | out_free: |
2556 | kfree(path); | 2559 | kfree(path); |
2557 | out_dput: | 2560 | out_dput: |
@@ -2579,6 +2582,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
2579 | struct rb_node *p; | 2582 | struct rb_node *p; |
2580 | int mds = session->s_mds; | 2583 | int mds = session->s_mds; |
2581 | int err = -ENOMEM; | 2584 | int err = -ENOMEM; |
2585 | int s_nr_caps; | ||
2582 | struct ceph_pagelist *pagelist; | 2586 | struct ceph_pagelist *pagelist; |
2583 | struct ceph_reconnect_state recon_state; | 2587 | struct ceph_reconnect_state recon_state; |
2584 | 2588 | ||
@@ -2610,20 +2614,38 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
2610 | dout("session %p state %s\n", session, | 2614 | dout("session %p state %s\n", session, |
2611 | session_state_name(session->s_state)); | 2615 | session_state_name(session->s_state)); |
2612 | 2616 | ||
2617 | spin_lock(&session->s_gen_ttl_lock); | ||
2618 | session->s_cap_gen++; | ||
2619 | spin_unlock(&session->s_gen_ttl_lock); | ||
2620 | |||
2621 | spin_lock(&session->s_cap_lock); | ||
2622 | /* | ||
2623 | * notify __ceph_remove_cap() that we are composing cap reconnect. | ||
2624 | * If a cap get released before being added to the cap reconnect, | ||
2625 | * __ceph_remove_cap() should skip queuing cap release. | ||
2626 | */ | ||
2627 | session->s_cap_reconnect = 1; | ||
2613 | /* drop old cap expires; we're about to reestablish that state */ | 2628 | /* drop old cap expires; we're about to reestablish that state */ |
2614 | discard_cap_releases(mdsc, session); | 2629 | discard_cap_releases(mdsc, session); |
2630 | spin_unlock(&session->s_cap_lock); | ||
2615 | 2631 | ||
2616 | /* traverse this session's caps */ | 2632 | /* traverse this session's caps */ |
2617 | err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps); | 2633 | s_nr_caps = session->s_nr_caps; |
2634 | err = ceph_pagelist_encode_32(pagelist, s_nr_caps); | ||
2618 | if (err) | 2635 | if (err) |
2619 | goto fail; | 2636 | goto fail; |
2620 | 2637 | ||
2638 | recon_state.nr_caps = 0; | ||
2621 | recon_state.pagelist = pagelist; | 2639 | recon_state.pagelist = pagelist; |
2622 | recon_state.flock = session->s_con.peer_features & CEPH_FEATURE_FLOCK; | 2640 | recon_state.flock = session->s_con.peer_features & CEPH_FEATURE_FLOCK; |
2623 | err = iterate_session_caps(session, encode_caps_cb, &recon_state); | 2641 | err = iterate_session_caps(session, encode_caps_cb, &recon_state); |
2624 | if (err < 0) | 2642 | if (err < 0) |
2625 | goto fail; | 2643 | goto fail; |
2626 | 2644 | ||
2645 | spin_lock(&session->s_cap_lock); | ||
2646 | session->s_cap_reconnect = 0; | ||
2647 | spin_unlock(&session->s_cap_lock); | ||
2648 | |||
2627 | /* | 2649 | /* |
2628 | * snaprealms. we provide mds with the ino, seq (version), and | 2650 | * snaprealms. we provide mds with the ino, seq (version), and |
2629 | * parent for all of our realms. If the mds has any newer info, | 2651 | * parent for all of our realms. If the mds has any newer info, |
@@ -2646,11 +2668,18 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
2646 | 2668 | ||
2647 | if (recon_state.flock) | 2669 | if (recon_state.flock) |
2648 | reply->hdr.version = cpu_to_le16(2); | 2670 | reply->hdr.version = cpu_to_le16(2); |
2649 | if (pagelist->length) { | 2671 | |
2650 | /* set up outbound data if we have any */ | 2672 | /* raced with cap release? */ |
2651 | reply->hdr.data_len = cpu_to_le32(pagelist->length); | 2673 | if (s_nr_caps != recon_state.nr_caps) { |
2652 | ceph_msg_data_add_pagelist(reply, pagelist); | 2674 | struct page *page = list_first_entry(&pagelist->head, |
2675 | struct page, lru); | ||
2676 | __le32 *addr = kmap_atomic(page); | ||
2677 | *addr = cpu_to_le32(recon_state.nr_caps); | ||
2678 | kunmap_atomic(addr); | ||
2653 | } | 2679 | } |
2680 | |||
2681 | reply->hdr.data_len = cpu_to_le32(pagelist->length); | ||
2682 | ceph_msg_data_add_pagelist(reply, pagelist); | ||
2654 | ceph_con_send(&session->s_con, reply); | 2683 | ceph_con_send(&session->s_con, reply); |
2655 | 2684 | ||
2656 | mutex_unlock(&session->s_mutex); | 2685 | mutex_unlock(&session->s_mutex); |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index c2a19fbbe517..4c053d099ae4 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -132,6 +132,7 @@ struct ceph_mds_session { | |||
132 | struct list_head s_caps; /* all caps issued by this session */ | 132 | struct list_head s_caps; /* all caps issued by this session */ |
133 | int s_nr_caps, s_trim_caps; | 133 | int s_nr_caps, s_trim_caps; |
134 | int s_num_cap_releases; | 134 | int s_num_cap_releases; |
135 | int s_cap_reconnect; | ||
135 | struct list_head s_cap_releases; /* waiting cap_release messages */ | 136 | struct list_head s_cap_releases; /* waiting cap_release messages */ |
136 | struct list_head s_cap_releases_done; /* ready to send */ | 137 | struct list_head s_cap_releases_done; /* ready to send */ |
137 | struct ceph_cap *s_cap_iterator; | 138 | struct ceph_cap *s_cap_iterator; |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 6014b0a3c405..ef4ac38bb614 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -741,13 +741,7 @@ extern int ceph_add_cap(struct inode *inode, | |||
741 | int fmode, unsigned issued, unsigned wanted, | 741 | int fmode, unsigned issued, unsigned wanted, |
742 | unsigned cap, unsigned seq, u64 realmino, int flags, | 742 | unsigned cap, unsigned seq, u64 realmino, int flags, |
743 | struct ceph_cap_reservation *caps_reservation); | 743 | struct ceph_cap_reservation *caps_reservation); |
744 | extern void __ceph_remove_cap(struct ceph_cap *cap); | 744 | extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release); |
745 | static inline void ceph_remove_cap(struct ceph_cap *cap) | ||
746 | { | ||
747 | spin_lock(&cap->ci->i_ceph_lock); | ||
748 | __ceph_remove_cap(cap); | ||
749 | spin_unlock(&cap->ci->i_ceph_lock); | ||
750 | } | ||
751 | extern void ceph_put_cap(struct ceph_mds_client *mdsc, | 745 | extern void ceph_put_cap(struct ceph_mds_client *mdsc, |
752 | struct ceph_cap *cap); | 746 | struct ceph_cap *cap); |
753 | 747 | ||