diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-26 21:02:46 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-26 21:02:46 -0500 |
| commit | 4f9e5df211a8591b3fb5c154ecc7ec5d4ecd6b79 (patch) | |
| tree | ba4e3980540d5bf082bc38f0107b628b2b726a25 | |
| parent | 0e4b0743bbe5807535ba1b0389281f9a4c1b2bb7 (diff) | |
| parent | ff638b7df5a9264024a6448bdfde2b2bf5d1994a (diff) | |
Merge branch 'for-linus-bugs' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull ceph bug-fixes from Sage Weil:
"These include a couple fixes to the new fscache code that went in
during the last cycle (which will need to go stable@ shortly as well),
a couple client-side directory fragmentation fixes, a fix for a race
in the cap release queuing path, and a couple race fixes in the
request abort and resend code.
Obviously some of this could have gone into 3.12 final, but I
preferred to overtest rather than send things in for a late -rc, and
then my travel schedule intervened"
* 'for-linus-bugs' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
ceph: allocate non-zero page to fscache in readpage()
ceph: wake up 'safe' waiters when unregistering request
ceph: cleanup aborted requests when re-sending requests.
ceph: handle race between cap reconnect and cap release
ceph: set caps count after composing cap reconnect message
ceph: queue cap release in __ceph_remove_cap()
ceph: handle frag mismatch between readdir request and reply
ceph: remove outdated frag information
ceph: hung on ceph fscache invalidate in some cases
| -rw-r--r-- | fs/ceph/addr.c | 2 | ||||
| -rw-r--r-- | fs/ceph/cache.c | 3 | ||||
| -rw-r--r-- | fs/ceph/caps.c | 27 | ||||
| -rw-r--r-- | fs/ceph/dir.c | 11 | ||||
| -rw-r--r-- | fs/ceph/inode.c | 49 | ||||
| -rw-r--r-- | fs/ceph/mds_client.c | 61 | ||||
| -rw-r--r-- | fs/ceph/mds_client.h | 1 | ||||
| -rw-r--r-- | fs/ceph/super.h | 8 |
8 files changed, 121 insertions, 41 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 6df8bd481425..1e561c059539 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
| @@ -216,7 +216,7 @@ static int readpage_nounlock(struct file *filp, struct page *page) | |||
| 216 | } | 216 | } |
| 217 | SetPageUptodate(page); | 217 | SetPageUptodate(page); |
| 218 | 218 | ||
| 219 | if (err == 0) | 219 | if (err >= 0) |
| 220 | ceph_readpage_to_fscache(inode, page); | 220 | ceph_readpage_to_fscache(inode, page); |
| 221 | 221 | ||
| 222 | out: | 222 | out: |
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index 7db2e6ca4b8f..8c44fdd4e1c3 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c | |||
| @@ -324,6 +324,9 @@ void ceph_invalidate_fscache_page(struct inode* inode, struct page *page) | |||
| 324 | { | 324 | { |
| 325 | struct ceph_inode_info *ci = ceph_inode(inode); | 325 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 326 | 326 | ||
| 327 | if (!PageFsCache(page)) | ||
| 328 | return; | ||
| 329 | |||
| 327 | fscache_wait_on_page_write(ci->fscache, page); | 330 | fscache_wait_on_page_write(ci->fscache, page); |
| 328 | fscache_uncache_page(ci->fscache, page); | 331 | fscache_uncache_page(ci->fscache, page); |
| 329 | } | 332 | } |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 13976c33332e..3c0a4bd74996 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
| @@ -897,7 +897,7 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci) | |||
| 897 | * caller should hold i_ceph_lock. | 897 | * caller should hold i_ceph_lock. |
| 898 | * caller will not hold session s_mutex if called from destroy_inode. | 898 | * caller will not hold session s_mutex if called from destroy_inode. |
| 899 | */ | 899 | */ |
| 900 | void __ceph_remove_cap(struct ceph_cap *cap) | 900 | void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) |
| 901 | { | 901 | { |
| 902 | struct ceph_mds_session *session = cap->session; | 902 | struct ceph_mds_session *session = cap->session; |
| 903 | struct ceph_inode_info *ci = cap->ci; | 903 | struct ceph_inode_info *ci = cap->ci; |
| @@ -909,6 +909,16 @@ void __ceph_remove_cap(struct ceph_cap *cap) | |||
| 909 | 909 | ||
| 910 | /* remove from session list */ | 910 | /* remove from session list */ |
| 911 | spin_lock(&session->s_cap_lock); | 911 | spin_lock(&session->s_cap_lock); |
| 912 | /* | ||
| 913 | * s_cap_reconnect is protected by s_cap_lock. no one changes | ||
| 914 | * s_cap_gen while session is in the reconnect state. | ||
| 915 | */ | ||
| 916 | if (queue_release && | ||
| 917 | (!session->s_cap_reconnect || | ||
| 918 | cap->cap_gen == session->s_cap_gen)) | ||
| 919 | __queue_cap_release(session, ci->i_vino.ino, cap->cap_id, | ||
| 920 | cap->mseq, cap->issue_seq); | ||
| 921 | |||
| 912 | if (session->s_cap_iterator == cap) { | 922 | if (session->s_cap_iterator == cap) { |
| 913 | /* not yet, we are iterating over this very cap */ | 923 | /* not yet, we are iterating over this very cap */ |
| 914 | dout("__ceph_remove_cap delaying %p removal from session %p\n", | 924 | dout("__ceph_remove_cap delaying %p removal from session %p\n", |
| @@ -1023,7 +1033,6 @@ void __queue_cap_release(struct ceph_mds_session *session, | |||
| 1023 | struct ceph_mds_cap_release *head; | 1033 | struct ceph_mds_cap_release *head; |
| 1024 | struct ceph_mds_cap_item *item; | 1034 | struct ceph_mds_cap_item *item; |
| 1025 | 1035 | ||
| 1026 | spin_lock(&session->s_cap_lock); | ||
| 1027 | BUG_ON(!session->s_num_cap_releases); | 1036 | BUG_ON(!session->s_num_cap_releases); |
| 1028 | msg = list_first_entry(&session->s_cap_releases, | 1037 | msg = list_first_entry(&session->s_cap_releases, |
| 1029 | struct ceph_msg, list_head); | 1038 | struct ceph_msg, list_head); |
| @@ -1052,7 +1061,6 @@ void __queue_cap_release(struct ceph_mds_session *session, | |||
| 1052 | (int)CEPH_CAPS_PER_RELEASE, | 1061 | (int)CEPH_CAPS_PER_RELEASE, |
| 1053 | (int)msg->front.iov_len); | 1062 | (int)msg->front.iov_len); |
| 1054 | } | 1063 | } |
| 1055 | spin_unlock(&session->s_cap_lock); | ||
| 1056 | } | 1064 | } |
| 1057 | 1065 | ||
| 1058 | /* | 1066 | /* |
| @@ -1067,12 +1075,8 @@ void ceph_queue_caps_release(struct inode *inode) | |||
| 1067 | p = rb_first(&ci->i_caps); | 1075 | p = rb_first(&ci->i_caps); |
| 1068 | while (p) { | 1076 | while (p) { |
| 1069 | struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node); | 1077 | struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node); |
| 1070 | struct ceph_mds_session *session = cap->session; | ||
| 1071 | |||
| 1072 | __queue_cap_release(session, ceph_ino(inode), cap->cap_id, | ||
| 1073 | cap->mseq, cap->issue_seq); | ||
| 1074 | p = rb_next(p); | 1078 | p = rb_next(p); |
| 1075 | __ceph_remove_cap(cap); | 1079 | __ceph_remove_cap(cap, true); |
| 1076 | } | 1080 | } |
| 1077 | } | 1081 | } |
| 1078 | 1082 | ||
| @@ -2791,7 +2795,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, | |||
| 2791 | } | 2795 | } |
| 2792 | spin_unlock(&mdsc->cap_dirty_lock); | 2796 | spin_unlock(&mdsc->cap_dirty_lock); |
| 2793 | } | 2797 | } |
| 2794 | __ceph_remove_cap(cap); | 2798 | __ceph_remove_cap(cap, false); |
| 2795 | } | 2799 | } |
| 2796 | /* else, we already released it */ | 2800 | /* else, we already released it */ |
| 2797 | 2801 | ||
| @@ -2931,9 +2935,12 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
| 2931 | if (!inode) { | 2935 | if (!inode) { |
| 2932 | dout(" i don't have ino %llx\n", vino.ino); | 2936 | dout(" i don't have ino %llx\n", vino.ino); |
| 2933 | 2937 | ||
| 2934 | if (op == CEPH_CAP_OP_IMPORT) | 2938 | if (op == CEPH_CAP_OP_IMPORT) { |
| 2939 | spin_lock(&session->s_cap_lock); | ||
| 2935 | __queue_cap_release(session, vino.ino, cap_id, | 2940 | __queue_cap_release(session, vino.ino, cap_id, |
| 2936 | mseq, seq); | 2941 | mseq, seq); |
| 2942 | spin_unlock(&session->s_cap_lock); | ||
| 2943 | } | ||
| 2937 | goto flush_cap_releases; | 2944 | goto flush_cap_releases; |
| 2938 | } | 2945 | } |
| 2939 | 2946 | ||
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 868b61d56cac..2a0bcaeb189a 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
| @@ -352,8 +352,18 @@ more: | |||
| 352 | } | 352 | } |
| 353 | 353 | ||
| 354 | /* note next offset and last dentry name */ | 354 | /* note next offset and last dentry name */ |
| 355 | rinfo = &req->r_reply_info; | ||
| 356 | if (le32_to_cpu(rinfo->dir_dir->frag) != frag) { | ||
| 357 | frag = le32_to_cpu(rinfo->dir_dir->frag); | ||
| 358 | if (ceph_frag_is_leftmost(frag)) | ||
| 359 | fi->next_offset = 2; | ||
| 360 | else | ||
| 361 | fi->next_offset = 0; | ||
| 362 | off = fi->next_offset; | ||
| 363 | } | ||
| 355 | fi->offset = fi->next_offset; | 364 | fi->offset = fi->next_offset; |
| 356 | fi->last_readdir = req; | 365 | fi->last_readdir = req; |
| 366 | fi->frag = frag; | ||
| 357 | 367 | ||
| 358 | if (req->r_reply_info.dir_end) { | 368 | if (req->r_reply_info.dir_end) { |
| 359 | kfree(fi->last_name); | 369 | kfree(fi->last_name); |
| @@ -363,7 +373,6 @@ more: | |||
| 363 | else | 373 | else |
| 364 | fi->next_offset = 0; | 374 | fi->next_offset = 0; |
| 365 | } else { | 375 | } else { |
| 366 | rinfo = &req->r_reply_info; | ||
| 367 | err = note_last_dentry(fi, | 376 | err = note_last_dentry(fi, |
| 368 | rinfo->dir_dname[rinfo->dir_nr-1], | 377 | rinfo->dir_dname[rinfo->dir_nr-1], |
| 369 | rinfo->dir_dname_len[rinfo->dir_nr-1]); | 378 | rinfo->dir_dname_len[rinfo->dir_nr-1]); |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 8549a48115f7..9a8e396aed89 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
| @@ -577,6 +577,8 @@ static int fill_inode(struct inode *inode, | |||
| 577 | int issued = 0, implemented; | 577 | int issued = 0, implemented; |
| 578 | struct timespec mtime, atime, ctime; | 578 | struct timespec mtime, atime, ctime; |
| 579 | u32 nsplits; | 579 | u32 nsplits; |
| 580 | struct ceph_inode_frag *frag; | ||
| 581 | struct rb_node *rb_node; | ||
| 580 | struct ceph_buffer *xattr_blob = NULL; | 582 | struct ceph_buffer *xattr_blob = NULL; |
| 581 | int err = 0; | 583 | int err = 0; |
| 582 | int queue_trunc = 0; | 584 | int queue_trunc = 0; |
| @@ -751,15 +753,38 @@ no_change: | |||
| 751 | /* FIXME: move me up, if/when version reflects fragtree changes */ | 753 | /* FIXME: move me up, if/when version reflects fragtree changes */ |
| 752 | nsplits = le32_to_cpu(info->fragtree.nsplits); | 754 | nsplits = le32_to_cpu(info->fragtree.nsplits); |
| 753 | mutex_lock(&ci->i_fragtree_mutex); | 755 | mutex_lock(&ci->i_fragtree_mutex); |
| 756 | rb_node = rb_first(&ci->i_fragtree); | ||
| 754 | for (i = 0; i < nsplits; i++) { | 757 | for (i = 0; i < nsplits; i++) { |
| 755 | u32 id = le32_to_cpu(info->fragtree.splits[i].frag); | 758 | u32 id = le32_to_cpu(info->fragtree.splits[i].frag); |
| 756 | struct ceph_inode_frag *frag = __get_or_create_frag(ci, id); | 759 | frag = NULL; |
| 757 | 760 | while (rb_node) { | |
| 758 | if (IS_ERR(frag)) | 761 | frag = rb_entry(rb_node, struct ceph_inode_frag, node); |
| 759 | continue; | 762 | if (ceph_frag_compare(frag->frag, id) >= 0) { |
| 763 | if (frag->frag != id) | ||
| 764 | frag = NULL; | ||
| 765 | else | ||
| 766 | rb_node = rb_next(rb_node); | ||
| 767 | break; | ||
| 768 | } | ||
| 769 | rb_node = rb_next(rb_node); | ||
| 770 | rb_erase(&frag->node, &ci->i_fragtree); | ||
| 771 | kfree(frag); | ||
| 772 | frag = NULL; | ||
| 773 | } | ||
| 774 | if (!frag) { | ||
| 775 | frag = __get_or_create_frag(ci, id); | ||
| 776 | if (IS_ERR(frag)) | ||
| 777 | continue; | ||
| 778 | } | ||
| 760 | frag->split_by = le32_to_cpu(info->fragtree.splits[i].by); | 779 | frag->split_by = le32_to_cpu(info->fragtree.splits[i].by); |
| 761 | dout(" frag %x split by %d\n", frag->frag, frag->split_by); | 780 | dout(" frag %x split by %d\n", frag->frag, frag->split_by); |
| 762 | } | 781 | } |
| 782 | while (rb_node) { | ||
| 783 | frag = rb_entry(rb_node, struct ceph_inode_frag, node); | ||
| 784 | rb_node = rb_next(rb_node); | ||
| 785 | rb_erase(&frag->node, &ci->i_fragtree); | ||
| 786 | kfree(frag); | ||
| 787 | } | ||
| 763 | mutex_unlock(&ci->i_fragtree_mutex); | 788 | mutex_unlock(&ci->i_fragtree_mutex); |
| 764 | 789 | ||
| 765 | /* were we issued a capability? */ | 790 | /* were we issued a capability? */ |
| @@ -1250,8 +1275,20 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, | |||
| 1250 | int err = 0, i; | 1275 | int err = 0, i; |
| 1251 | struct inode *snapdir = NULL; | 1276 | struct inode *snapdir = NULL; |
| 1252 | struct ceph_mds_request_head *rhead = req->r_request->front.iov_base; | 1277 | struct ceph_mds_request_head *rhead = req->r_request->front.iov_base; |
| 1253 | u64 frag = le32_to_cpu(rhead->args.readdir.frag); | ||
| 1254 | struct ceph_dentry_info *di; | 1278 | struct ceph_dentry_info *di; |
| 1279 | u64 r_readdir_offset = req->r_readdir_offset; | ||
| 1280 | u32 frag = le32_to_cpu(rhead->args.readdir.frag); | ||
| 1281 | |||
| 1282 | if (rinfo->dir_dir && | ||
| 1283 | le32_to_cpu(rinfo->dir_dir->frag) != frag) { | ||
| 1284 | dout("readdir_prepopulate got new frag %x -> %x\n", | ||
| 1285 | frag, le32_to_cpu(rinfo->dir_dir->frag)); | ||
| 1286 | frag = le32_to_cpu(rinfo->dir_dir->frag); | ||
| 1287 | if (ceph_frag_is_leftmost(frag)) | ||
| 1288 | r_readdir_offset = 2; | ||
| 1289 | else | ||
| 1290 | r_readdir_offset = 0; | ||
| 1291 | } | ||
| 1255 | 1292 | ||
| 1256 | if (req->r_aborted) | 1293 | if (req->r_aborted) |
| 1257 | return readdir_prepopulate_inodes_only(req, session); | 1294 | return readdir_prepopulate_inodes_only(req, session); |
| @@ -1315,7 +1352,7 @@ retry_lookup: | |||
| 1315 | } | 1352 | } |
| 1316 | 1353 | ||
| 1317 | di = dn->d_fsdata; | 1354 | di = dn->d_fsdata; |
| 1318 | di->offset = ceph_make_fpos(frag, i + req->r_readdir_offset); | 1355 | di->offset = ceph_make_fpos(frag, i + r_readdir_offset); |
| 1319 | 1356 | ||
| 1320 | /* inode */ | 1357 | /* inode */ |
| 1321 | if (dn->d_inode) { | 1358 | if (dn->d_inode) { |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index b7bda5d9611d..d90861f45210 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
| @@ -43,6 +43,7 @@ | |||
| 43 | */ | 43 | */ |
| 44 | 44 | ||
| 45 | struct ceph_reconnect_state { | 45 | struct ceph_reconnect_state { |
| 46 | int nr_caps; | ||
| 46 | struct ceph_pagelist *pagelist; | 47 | struct ceph_pagelist *pagelist; |
| 47 | bool flock; | 48 | bool flock; |
| 48 | }; | 49 | }; |
| @@ -443,6 +444,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, | |||
| 443 | INIT_LIST_HEAD(&s->s_waiting); | 444 | INIT_LIST_HEAD(&s->s_waiting); |
| 444 | INIT_LIST_HEAD(&s->s_unsafe); | 445 | INIT_LIST_HEAD(&s->s_unsafe); |
| 445 | s->s_num_cap_releases = 0; | 446 | s->s_num_cap_releases = 0; |
| 447 | s->s_cap_reconnect = 0; | ||
| 446 | s->s_cap_iterator = NULL; | 448 | s->s_cap_iterator = NULL; |
| 447 | INIT_LIST_HEAD(&s->s_cap_releases); | 449 | INIT_LIST_HEAD(&s->s_cap_releases); |
| 448 | INIT_LIST_HEAD(&s->s_cap_releases_done); | 450 | INIT_LIST_HEAD(&s->s_cap_releases_done); |
| @@ -642,6 +644,8 @@ static void __unregister_request(struct ceph_mds_client *mdsc, | |||
| 642 | req->r_unsafe_dir = NULL; | 644 | req->r_unsafe_dir = NULL; |
| 643 | } | 645 | } |
| 644 | 646 | ||
| 647 | complete_all(&req->r_safe_completion); | ||
| 648 | |||
| 645 | ceph_mdsc_put_request(req); | 649 | ceph_mdsc_put_request(req); |
| 646 | } | 650 | } |
| 647 | 651 | ||
| @@ -986,7 +990,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
| 986 | dout("removing cap %p, ci is %p, inode is %p\n", | 990 | dout("removing cap %p, ci is %p, inode is %p\n", |
| 987 | cap, ci, &ci->vfs_inode); | 991 | cap, ci, &ci->vfs_inode); |
| 988 | spin_lock(&ci->i_ceph_lock); | 992 | spin_lock(&ci->i_ceph_lock); |
| 989 | __ceph_remove_cap(cap); | 993 | __ceph_remove_cap(cap, false); |
| 990 | if (!__ceph_is_any_real_caps(ci)) { | 994 | if (!__ceph_is_any_real_caps(ci)) { |
| 991 | struct ceph_mds_client *mdsc = | 995 | struct ceph_mds_client *mdsc = |
| 992 | ceph_sb_to_client(inode->i_sb)->mdsc; | 996 | ceph_sb_to_client(inode->i_sb)->mdsc; |
| @@ -1231,9 +1235,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) | |||
| 1231 | session->s_trim_caps--; | 1235 | session->s_trim_caps--; |
| 1232 | if (oissued) { | 1236 | if (oissued) { |
| 1233 | /* we aren't the only cap.. just remove us */ | 1237 | /* we aren't the only cap.. just remove us */ |
| 1234 | __queue_cap_release(session, ceph_ino(inode), cap->cap_id, | 1238 | __ceph_remove_cap(cap, true); |
| 1235 | cap->mseq, cap->issue_seq); | ||
| 1236 | __ceph_remove_cap(cap); | ||
| 1237 | } else { | 1239 | } else { |
| 1238 | /* try to drop referring dentries */ | 1240 | /* try to drop referring dentries */ |
| 1239 | spin_unlock(&ci->i_ceph_lock); | 1241 | spin_unlock(&ci->i_ceph_lock); |
| @@ -1416,7 +1418,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc, | |||
| 1416 | unsigned num; | 1418 | unsigned num; |
| 1417 | 1419 | ||
| 1418 | dout("discard_cap_releases mds%d\n", session->s_mds); | 1420 | dout("discard_cap_releases mds%d\n", session->s_mds); |
| 1419 | spin_lock(&session->s_cap_lock); | ||
| 1420 | 1421 | ||
| 1421 | /* zero out the in-progress message */ | 1422 | /* zero out the in-progress message */ |
| 1422 | msg = list_first_entry(&session->s_cap_releases, | 1423 | msg = list_first_entry(&session->s_cap_releases, |
| @@ -1443,8 +1444,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc, | |||
| 1443 | msg->front.iov_len = sizeof(*head); | 1444 | msg->front.iov_len = sizeof(*head); |
| 1444 | list_add(&msg->list_head, &session->s_cap_releases); | 1445 | list_add(&msg->list_head, &session->s_cap_releases); |
| 1445 | } | 1446 | } |
| 1446 | |||
| 1447 | spin_unlock(&session->s_cap_lock); | ||
| 1448 | } | 1447 | } |
| 1449 | 1448 | ||
| 1450 | /* | 1449 | /* |
| @@ -1875,8 +1874,11 @@ static int __do_request(struct ceph_mds_client *mdsc, | |||
| 1875 | int mds = -1; | 1874 | int mds = -1; |
| 1876 | int err = -EAGAIN; | 1875 | int err = -EAGAIN; |
| 1877 | 1876 | ||
| 1878 | if (req->r_err || req->r_got_result) | 1877 | if (req->r_err || req->r_got_result) { |
| 1878 | if (req->r_aborted) | ||
| 1879 | __unregister_request(mdsc, req); | ||
| 1879 | goto out; | 1880 | goto out; |
| 1881 | } | ||
| 1880 | 1882 | ||
| 1881 | if (req->r_timeout && | 1883 | if (req->r_timeout && |
| 1882 | time_after_eq(jiffies, req->r_started + req->r_timeout)) { | 1884 | time_after_eq(jiffies, req->r_started + req->r_timeout)) { |
| @@ -2186,7 +2188,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
| 2186 | if (head->safe) { | 2188 | if (head->safe) { |
| 2187 | req->r_got_safe = true; | 2189 | req->r_got_safe = true; |
| 2188 | __unregister_request(mdsc, req); | 2190 | __unregister_request(mdsc, req); |
| 2189 | complete_all(&req->r_safe_completion); | ||
| 2190 | 2191 | ||
| 2191 | if (req->r_got_unsafe) { | 2192 | if (req->r_got_unsafe) { |
| 2192 | /* | 2193 | /* |
| @@ -2238,8 +2239,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
| 2238 | err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); | 2239 | err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); |
| 2239 | if (err == 0) { | 2240 | if (err == 0) { |
| 2240 | if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR || | 2241 | if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR || |
| 2241 | req->r_op == CEPH_MDS_OP_LSSNAP) && | 2242 | req->r_op == CEPH_MDS_OP_LSSNAP)) |
| 2242 | rinfo->dir_nr) | ||
| 2243 | ceph_readdir_prepopulate(req, req->r_session); | 2243 | ceph_readdir_prepopulate(req, req->r_session); |
| 2244 | ceph_unreserve_caps(mdsc, &req->r_caps_reservation); | 2244 | ceph_unreserve_caps(mdsc, &req->r_caps_reservation); |
| 2245 | } | 2245 | } |
| @@ -2490,6 +2490,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
| 2490 | cap->seq = 0; /* reset cap seq */ | 2490 | cap->seq = 0; /* reset cap seq */ |
| 2491 | cap->issue_seq = 0; /* and issue_seq */ | 2491 | cap->issue_seq = 0; /* and issue_seq */ |
| 2492 | cap->mseq = 0; /* and migrate_seq */ | 2492 | cap->mseq = 0; /* and migrate_seq */ |
| 2493 | cap->cap_gen = cap->session->s_cap_gen; | ||
| 2493 | 2494 | ||
| 2494 | if (recon_state->flock) { | 2495 | if (recon_state->flock) { |
| 2495 | rec.v2.cap_id = cpu_to_le64(cap->cap_id); | 2496 | rec.v2.cap_id = cpu_to_le64(cap->cap_id); |
| @@ -2552,6 +2553,8 @@ encode_again: | |||
| 2552 | } else { | 2553 | } else { |
| 2553 | err = ceph_pagelist_append(pagelist, &rec, reclen); | 2554 | err = ceph_pagelist_append(pagelist, &rec, reclen); |
| 2554 | } | 2555 | } |
| 2556 | |||
| 2557 | recon_state->nr_caps++; | ||
| 2555 | out_free: | 2558 | out_free: |
| 2556 | kfree(path); | 2559 | kfree(path); |
| 2557 | out_dput: | 2560 | out_dput: |
| @@ -2579,6 +2582,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
| 2579 | struct rb_node *p; | 2582 | struct rb_node *p; |
| 2580 | int mds = session->s_mds; | 2583 | int mds = session->s_mds; |
| 2581 | int err = -ENOMEM; | 2584 | int err = -ENOMEM; |
| 2585 | int s_nr_caps; | ||
| 2582 | struct ceph_pagelist *pagelist; | 2586 | struct ceph_pagelist *pagelist; |
| 2583 | struct ceph_reconnect_state recon_state; | 2587 | struct ceph_reconnect_state recon_state; |
| 2584 | 2588 | ||
| @@ -2610,20 +2614,38 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
| 2610 | dout("session %p state %s\n", session, | 2614 | dout("session %p state %s\n", session, |
| 2611 | session_state_name(session->s_state)); | 2615 | session_state_name(session->s_state)); |
| 2612 | 2616 | ||
| 2617 | spin_lock(&session->s_gen_ttl_lock); | ||
| 2618 | session->s_cap_gen++; | ||
| 2619 | spin_unlock(&session->s_gen_ttl_lock); | ||
| 2620 | |||
| 2621 | spin_lock(&session->s_cap_lock); | ||
| 2622 | /* | ||
| 2623 | * notify __ceph_remove_cap() that we are composing cap reconnect. | ||
| 2624 | * If a cap get released before being added to the cap reconnect, | ||
| 2625 | * __ceph_remove_cap() should skip queuing cap release. | ||
| 2626 | */ | ||
| 2627 | session->s_cap_reconnect = 1; | ||
| 2613 | /* drop old cap expires; we're about to reestablish that state */ | 2628 | /* drop old cap expires; we're about to reestablish that state */ |
| 2614 | discard_cap_releases(mdsc, session); | 2629 | discard_cap_releases(mdsc, session); |
| 2630 | spin_unlock(&session->s_cap_lock); | ||
| 2615 | 2631 | ||
| 2616 | /* traverse this session's caps */ | 2632 | /* traverse this session's caps */ |
| 2617 | err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps); | 2633 | s_nr_caps = session->s_nr_caps; |
| 2634 | err = ceph_pagelist_encode_32(pagelist, s_nr_caps); | ||
| 2618 | if (err) | 2635 | if (err) |
| 2619 | goto fail; | 2636 | goto fail; |
| 2620 | 2637 | ||
| 2638 | recon_state.nr_caps = 0; | ||
| 2621 | recon_state.pagelist = pagelist; | 2639 | recon_state.pagelist = pagelist; |
| 2622 | recon_state.flock = session->s_con.peer_features & CEPH_FEATURE_FLOCK; | 2640 | recon_state.flock = session->s_con.peer_features & CEPH_FEATURE_FLOCK; |
| 2623 | err = iterate_session_caps(session, encode_caps_cb, &recon_state); | 2641 | err = iterate_session_caps(session, encode_caps_cb, &recon_state); |
| 2624 | if (err < 0) | 2642 | if (err < 0) |
| 2625 | goto fail; | 2643 | goto fail; |
| 2626 | 2644 | ||
| 2645 | spin_lock(&session->s_cap_lock); | ||
| 2646 | session->s_cap_reconnect = 0; | ||
| 2647 | spin_unlock(&session->s_cap_lock); | ||
| 2648 | |||
| 2627 | /* | 2649 | /* |
| 2628 | * snaprealms. we provide mds with the ino, seq (version), and | 2650 | * snaprealms. we provide mds with the ino, seq (version), and |
| 2629 | * parent for all of our realms. If the mds has any newer info, | 2651 | * parent for all of our realms. If the mds has any newer info, |
| @@ -2646,11 +2668,18 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
| 2646 | 2668 | ||
| 2647 | if (recon_state.flock) | 2669 | if (recon_state.flock) |
| 2648 | reply->hdr.version = cpu_to_le16(2); | 2670 | reply->hdr.version = cpu_to_le16(2); |
| 2649 | if (pagelist->length) { | 2671 | |
| 2650 | /* set up outbound data if we have any */ | 2672 | /* raced with cap release? */ |
| 2651 | reply->hdr.data_len = cpu_to_le32(pagelist->length); | 2673 | if (s_nr_caps != recon_state.nr_caps) { |
| 2652 | ceph_msg_data_add_pagelist(reply, pagelist); | 2674 | struct page *page = list_first_entry(&pagelist->head, |
| 2675 | struct page, lru); | ||
| 2676 | __le32 *addr = kmap_atomic(page); | ||
| 2677 | *addr = cpu_to_le32(recon_state.nr_caps); | ||
| 2678 | kunmap_atomic(addr); | ||
| 2653 | } | 2679 | } |
| 2680 | |||
| 2681 | reply->hdr.data_len = cpu_to_le32(pagelist->length); | ||
| 2682 | ceph_msg_data_add_pagelist(reply, pagelist); | ||
| 2654 | ceph_con_send(&session->s_con, reply); | 2683 | ceph_con_send(&session->s_con, reply); |
| 2655 | 2684 | ||
| 2656 | mutex_unlock(&session->s_mutex); | 2685 | mutex_unlock(&session->s_mutex); |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index c2a19fbbe517..4c053d099ae4 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
| @@ -132,6 +132,7 @@ struct ceph_mds_session { | |||
| 132 | struct list_head s_caps; /* all caps issued by this session */ | 132 | struct list_head s_caps; /* all caps issued by this session */ |
| 133 | int s_nr_caps, s_trim_caps; | 133 | int s_nr_caps, s_trim_caps; |
| 134 | int s_num_cap_releases; | 134 | int s_num_cap_releases; |
| 135 | int s_cap_reconnect; | ||
| 135 | struct list_head s_cap_releases; /* waiting cap_release messages */ | 136 | struct list_head s_cap_releases; /* waiting cap_release messages */ |
| 136 | struct list_head s_cap_releases_done; /* ready to send */ | 137 | struct list_head s_cap_releases_done; /* ready to send */ |
| 137 | struct ceph_cap *s_cap_iterator; | 138 | struct ceph_cap *s_cap_iterator; |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 6014b0a3c405..ef4ac38bb614 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
| @@ -741,13 +741,7 @@ extern int ceph_add_cap(struct inode *inode, | |||
| 741 | int fmode, unsigned issued, unsigned wanted, | 741 | int fmode, unsigned issued, unsigned wanted, |
| 742 | unsigned cap, unsigned seq, u64 realmino, int flags, | 742 | unsigned cap, unsigned seq, u64 realmino, int flags, |
| 743 | struct ceph_cap_reservation *caps_reservation); | 743 | struct ceph_cap_reservation *caps_reservation); |
| 744 | extern void __ceph_remove_cap(struct ceph_cap *cap); | 744 | extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release); |
| 745 | static inline void ceph_remove_cap(struct ceph_cap *cap) | ||
| 746 | { | ||
| 747 | spin_lock(&cap->ci->i_ceph_lock); | ||
| 748 | __ceph_remove_cap(cap); | ||
| 749 | spin_unlock(&cap->ci->i_ceph_lock); | ||
| 750 | } | ||
| 751 | extern void ceph_put_cap(struct ceph_mds_client *mdsc, | 745 | extern void ceph_put_cap(struct ceph_mds_client *mdsc, |
| 752 | struct ceph_cap *cap); | 746 | struct ceph_cap *cap); |
| 753 | 747 | ||
