aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph/caps.c
diff options
context:
space:
mode:
authorYan, Zheng <zheng.z.yan@intel.com>2013-11-24 01:44:38 -0500
committerYan, Zheng <zheng.z.yan@intel.com>2014-01-21 03:30:31 -0500
commit11df2dfb610d68e8050c2183c344b1002351a99d (patch)
tree55bf3640e8553a1e2b3f0067cce0e321e20b83d1 /fs/ceph/caps.c
parent5d72d13c425bb41f7752962f168fb402b86b7ac0 (diff)
ceph: add imported caps when handling cap export message
Version 3 cap export message includes information about the imported caps. It allows us to add the imported caps if the corresponding cap import message still hasn't been received. This allow us to handle situation that the importer MDS crashes and the cap import message is missing. Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Diffstat (limited to 'fs/ceph/caps.c')
-rw-r--r--fs/ceph/caps.c220
1 files changed, 144 insertions, 76 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 98f3ca4a5ddf..17543383545c 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -555,21 +555,34 @@ retry:
555 cap->ci = ci; 555 cap->ci = ci;
556 __insert_cap_node(ci, cap); 556 __insert_cap_node(ci, cap);
557 557
558 /* clear out old exporting info? (i.e. on cap import) */
559 if (ci->i_cap_exporting_mds == mds) {
560 ci->i_cap_exporting_issued = 0;
561 ci->i_cap_exporting_mseq = 0;
562 ci->i_cap_exporting_mds = -1;
563 }
564
565 /* add to session cap list */ 558 /* add to session cap list */
566 cap->session = session; 559 cap->session = session;
567 spin_lock(&session->s_cap_lock); 560 spin_lock(&session->s_cap_lock);
568 list_add_tail(&cap->session_caps, &session->s_caps); 561 list_add_tail(&cap->session_caps, &session->s_caps);
569 session->s_nr_caps++; 562 session->s_nr_caps++;
570 spin_unlock(&session->s_cap_lock); 563 spin_unlock(&session->s_cap_lock);
571 } else if (new_cap) 564 } else {
572 ceph_put_cap(mdsc, new_cap); 565 if (new_cap)
566 ceph_put_cap(mdsc, new_cap);
567
568 /*
569 * auth mds of the inode changed. we received the cap export
570 * message, but still haven't received the cap import message.
571 * handle_cap_export() updated the new auth MDS' cap.
572 *
573 * "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing
574 * a message that was send before the cap import message. So
575 * don't remove caps.
576 */
577 if (ceph_seq_cmp(seq, cap->seq) <= 0) {
578 WARN_ON(cap != ci->i_auth_cap);
579 WARN_ON(cap->cap_id != cap_id);
580 seq = cap->seq;
581 mseq = cap->mseq;
582 issued |= cap->issued;
583 flags |= CEPH_CAP_FLAG_AUTH;
584 }
585 }
573 586
574 if (!ci->i_snap_realm) { 587 if (!ci->i_snap_realm) {
575 /* 588 /*
@@ -612,15 +625,8 @@ retry:
612 ceph_seq_cmp(ci->i_auth_cap->mseq, mseq) < 0) 625 ceph_seq_cmp(ci->i_auth_cap->mseq, mseq) < 0)
613 ci->i_auth_cap = cap; 626 ci->i_auth_cap = cap;
614 ci->i_cap_exporting_issued = 0; 627 ci->i_cap_exporting_issued = 0;
615 } else if (ci->i_auth_cap == cap) { 628 } else {
616 ci->i_auth_cap = NULL; 629 WARN_ON(ci->i_auth_cap == cap);
617 spin_lock(&mdsc->cap_dirty_lock);
618 if (!list_empty(&ci->i_dirty_item)) {
619 dout(" moving %p to cap_dirty_migrating\n", inode);
620 list_move(&ci->i_dirty_item,
621 &mdsc->cap_dirty_migrating);
622 }
623 spin_unlock(&mdsc->cap_dirty_lock);
624 } 630 }
625 631
626 dout("add_cap inode %p (%llx.%llx) cap %p %s now %s seq %d mds%d\n", 632 dout("add_cap inode %p (%llx.%llx) cap %p %s now %s seq %d mds%d\n",
@@ -889,7 +895,7 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
889 */ 895 */
890static int __ceph_is_any_caps(struct ceph_inode_info *ci) 896static int __ceph_is_any_caps(struct ceph_inode_info *ci)
891{ 897{
892 return !RB_EMPTY_ROOT(&ci->i_caps) || ci->i_cap_exporting_mds >= 0; 898 return !RB_EMPTY_ROOT(&ci->i_caps) || ci->i_cap_exporting_issued;
893} 899}
894 900
895int ceph_is_any_caps(struct inode *inode) 901int ceph_is_any_caps(struct inode *inode)
@@ -1396,13 +1402,10 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
1396 ci->i_snap_realm->cached_context); 1402 ci->i_snap_realm->cached_context);
1397 dout(" inode %p now dirty snapc %p auth cap %p\n", 1403 dout(" inode %p now dirty snapc %p auth cap %p\n",
1398 &ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap); 1404 &ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap);
1405 WARN_ON(!ci->i_auth_cap);
1399 BUG_ON(!list_empty(&ci->i_dirty_item)); 1406 BUG_ON(!list_empty(&ci->i_dirty_item));
1400 spin_lock(&mdsc->cap_dirty_lock); 1407 spin_lock(&mdsc->cap_dirty_lock);
1401 if (ci->i_auth_cap) 1408 list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
1402 list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
1403 else
1404 list_add(&ci->i_dirty_item,
1405 &mdsc->cap_dirty_migrating);
1406 spin_unlock(&mdsc->cap_dirty_lock); 1409 spin_unlock(&mdsc->cap_dirty_lock);
1407 if (ci->i_flushing_caps == 0) { 1410 if (ci->i_flushing_caps == 0) {
1408 ihold(inode); 1411 ihold(inode);
@@ -2421,6 +2424,22 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2421 dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, 2424 dout(" size %llu max_size %llu, i_size %llu\n", size, max_size,
2422 inode->i_size); 2425 inode->i_size);
2423 2426
2427
2428 /*
2429 * auth mds of the inode changed. we received the cap export message,
2430 * but still haven't received the cap import message. handle_cap_export
2431 * updated the new auth MDS' cap.
2432 *
2433 * "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing a message
2434 * that was sent before the cap import message. So don't remove caps.
2435 */
2436 if (ceph_seq_cmp(seq, cap->seq) <= 0) {
2437 WARN_ON(cap != ci->i_auth_cap);
2438 WARN_ON(cap->cap_id != le64_to_cpu(grant->cap_id));
2439 seq = cap->seq;
2440 newcaps |= cap->issued;
2441 }
2442
2424 /* 2443 /*
2425 * If CACHE is being revoked, and we have no dirty buffers, 2444 * If CACHE is being revoked, and we have no dirty buffers,
2426 * try to invalidate (once). (If there are dirty buffers, we 2445 * try to invalidate (once). (If there are dirty buffers, we
@@ -2447,6 +2466,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2447 issued |= implemented | __ceph_caps_dirty(ci); 2466 issued |= implemented | __ceph_caps_dirty(ci);
2448 2467
2449 cap->cap_gen = session->s_cap_gen; 2468 cap->cap_gen = session->s_cap_gen;
2469 cap->seq = seq;
2450 2470
2451 __check_cap_issue(ci, cap, newcaps); 2471 __check_cap_issue(ci, cap, newcaps);
2452 2472
@@ -2497,6 +2517,10 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2497 le32_to_cpu(grant->time_warp_seq), &ctime, &mtime, 2517 le32_to_cpu(grant->time_warp_seq), &ctime, &mtime,
2498 &atime); 2518 &atime);
2499 2519
2520
2521 /* file layout may have changed */
2522 ci->i_layout = grant->layout;
2523
2500 /* max size increase? */ 2524 /* max size increase? */
2501 if (ci->i_auth_cap == cap && max_size != ci->i_max_size) { 2525 if (ci->i_auth_cap == cap && max_size != ci->i_max_size) {
2502 dout("max_size %lld -> %llu\n", ci->i_max_size, max_size); 2526 dout("max_size %lld -> %llu\n", ci->i_max_size, max_size);
@@ -2525,11 +2549,6 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2525 check_caps = 1; 2549 check_caps = 1;
2526 } 2550 }
2527 2551
2528 cap->seq = seq;
2529
2530 /* file layout may have changed */
2531 ci->i_layout = grant->layout;
2532
2533 /* revocation, grant, or no-op? */ 2552 /* revocation, grant, or no-op? */
2534 if (cap->issued & ~newcaps) { 2553 if (cap->issued & ~newcaps) {
2535 int revoking = cap->issued & ~newcaps; 2554 int revoking = cap->issued & ~newcaps;
@@ -2755,65 +2774,114 @@ static void handle_cap_trunc(struct inode *inode,
2755 * caller holds s_mutex 2774 * caller holds s_mutex
2756 */ 2775 */
2757static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, 2776static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
2758 struct ceph_mds_session *session, 2777 struct ceph_mds_cap_peer *ph,
2759 int *open_target_sessions) 2778 struct ceph_mds_session *session)
2760{ 2779{
2761 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; 2780 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
2781 struct ceph_mds_session *tsession = NULL;
2782 struct ceph_cap *cap, *tcap;
2762 struct ceph_inode_info *ci = ceph_inode(inode); 2783 struct ceph_inode_info *ci = ceph_inode(inode);
2763 int mds = session->s_mds; 2784 u64 t_cap_id;
2764 unsigned mseq = le32_to_cpu(ex->migrate_seq); 2785 unsigned mseq = le32_to_cpu(ex->migrate_seq);
2765 struct ceph_cap *cap = NULL, *t; 2786 unsigned t_seq, t_mseq;
2766 struct rb_node *p; 2787 int target, issued;
2767 int remember = 1; 2788 int mds = session->s_mds;
2768 2789
2769 dout("handle_cap_export inode %p ci %p mds%d mseq %d\n", 2790 if (ph) {
2770 inode, ci, mds, mseq); 2791 t_cap_id = le64_to_cpu(ph->cap_id);
2792 t_seq = le32_to_cpu(ph->seq);
2793 t_mseq = le32_to_cpu(ph->mseq);
2794 target = le32_to_cpu(ph->mds);
2795 } else {
2796 t_cap_id = t_seq = t_mseq = 0;
2797 target = -1;
2798 }
2771 2799
2800 dout("handle_cap_export inode %p ci %p mds%d mseq %d target %d\n",
2801 inode, ci, mds, mseq, target);
2802retry:
2772 spin_lock(&ci->i_ceph_lock); 2803 spin_lock(&ci->i_ceph_lock);
2804 cap = __get_cap_for_mds(ci, mds);
2805 if (!cap)
2806 goto out_unlock;
2773 2807
2774 /* make sure we haven't seen a higher mseq */ 2808 if (target < 0) {
2775 for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { 2809 __ceph_remove_cap(cap, false);
2776 t = rb_entry(p, struct ceph_cap, ci_node); 2810 goto out_unlock;
2777 if (ceph_seq_cmp(t->mseq, mseq) > 0) {
2778 dout(" higher mseq on cap from mds%d\n",
2779 t->session->s_mds);
2780 remember = 0;
2781 }
2782 if (t->session->s_mds == mds)
2783 cap = t;
2784 } 2811 }
2785 2812
2786 if (cap) { 2813 /*
2787 if (remember) { 2814 * now we know we haven't received the cap import message yet
2788 /* make note */ 2815 * because the exported cap still exist.
2789 ci->i_cap_exporting_mds = mds; 2816 */
2790 ci->i_cap_exporting_mseq = mseq;
2791 ci->i_cap_exporting_issued = cap->issued;
2792
2793 /*
2794 * make sure we have open sessions with all possible
2795 * export targets, so that we get the matching IMPORT
2796 */
2797 *open_target_sessions = 1;
2798 2817
2799 /* 2818 issued = cap->issued;
2800 * we can't flush dirty caps that we've seen the 2819 WARN_ON(issued != cap->implemented);
2801 * EXPORT but no IMPORT for 2820
2802 */ 2821 tcap = __get_cap_for_mds(ci, target);
2803 spin_lock(&mdsc->cap_dirty_lock); 2822 if (tcap) {
2804 if (!list_empty(&ci->i_dirty_item)) { 2823 /* already have caps from the target */
2805 dout(" moving %p to cap_dirty_migrating\n", 2824 if (tcap->cap_id != t_cap_id ||
2806 inode); 2825 ceph_seq_cmp(tcap->seq, t_seq) < 0) {
2807 list_move(&ci->i_dirty_item, 2826 dout(" updating import cap %p mds%d\n", tcap, target);
2808 &mdsc->cap_dirty_migrating); 2827 tcap->cap_id = t_cap_id;
2828 tcap->seq = t_seq - 1;
2829 tcap->issue_seq = t_seq - 1;
2830 tcap->mseq = t_mseq;
2831 tcap->issued |= issued;
2832 tcap->implemented |= issued;
2833 if (cap == ci->i_auth_cap)
2834 ci->i_auth_cap = tcap;
2835 if (ci->i_flushing_caps && ci->i_auth_cap == tcap) {
2836 spin_lock(&mdsc->cap_dirty_lock);
2837 list_move_tail(&ci->i_flushing_item,
2838 &tcap->session->s_cap_flushing);
2839 spin_unlock(&mdsc->cap_dirty_lock);
2809 } 2840 }
2810 spin_unlock(&mdsc->cap_dirty_lock);
2811 } 2841 }
2812 __ceph_remove_cap(cap, false); 2842 __ceph_remove_cap(cap, false);
2843 goto out_unlock;
2844 }
2845
2846 if (tsession) {
2847 int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0;
2848 spin_unlock(&ci->i_ceph_lock);
2849 /* add placeholder for the export tagert */
2850 ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0,
2851 t_seq - 1, t_mseq, (u64)-1, flag, NULL);
2852 goto retry;
2813 } 2853 }
2814 /* else, we already released it */
2815 2854
2816 spin_unlock(&ci->i_ceph_lock); 2855 spin_unlock(&ci->i_ceph_lock);
2856 mutex_unlock(&session->s_mutex);
2857
2858 /* open target session */
2859 tsession = ceph_mdsc_open_export_target_session(mdsc, target);
2860 if (!IS_ERR(tsession)) {
2861 if (mds > target) {
2862 mutex_lock(&session->s_mutex);
2863 mutex_lock_nested(&tsession->s_mutex,
2864 SINGLE_DEPTH_NESTING);
2865 } else {
2866 mutex_lock(&tsession->s_mutex);
2867 mutex_lock_nested(&session->s_mutex,
2868 SINGLE_DEPTH_NESTING);
2869 }
2870 ceph_add_cap_releases(mdsc, tsession);
2871 } else {
2872 WARN_ON(1);
2873 tsession = NULL;
2874 target = -1;
2875 }
2876 goto retry;
2877
2878out_unlock:
2879 spin_unlock(&ci->i_ceph_lock);
2880 mutex_unlock(&session->s_mutex);
2881 if (tsession) {
2882 mutex_unlock(&tsession->s_mutex);
2883 ceph_put_mds_session(tsession);
2884 }
2817} 2885}
2818 2886
2819/* 2887/*
@@ -2915,7 +2983,6 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2915 void *flock; 2983 void *flock;
2916 void *end; 2984 void *end;
2917 u32 flock_len; 2985 u32 flock_len;
2918 int open_target_sessions = 0;
2919 2986
2920 dout("handle_caps from mds%d\n", mds); 2987 dout("handle_caps from mds%d\n", mds);
2921 2988
@@ -2954,6 +3021,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2954 if (p + sizeof(*peer) > end) 3021 if (p + sizeof(*peer) > end)
2955 goto bad; 3022 goto bad;
2956 peer = p; 3023 peer = p;
3024 } else if (op == CEPH_CAP_OP_EXPORT) {
3025 /* recorded in unused fields */
3026 peer = (void *)&h->size;
2957 } 3027 }
2958 } 3028 }
2959 3029
@@ -2989,8 +3059,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2989 goto done; 3059 goto done;
2990 3060
2991 case CEPH_CAP_OP_EXPORT: 3061 case CEPH_CAP_OP_EXPORT:
2992 handle_cap_export(inode, h, session, &open_target_sessions); 3062 handle_cap_export(inode, h, peer, session);
2993 goto done; 3063 goto done_unlocked;
2994 3064
2995 case CEPH_CAP_OP_IMPORT: 3065 case CEPH_CAP_OP_IMPORT:
2996 handle_cap_import(mdsc, inode, h, peer, session, 3066 handle_cap_import(mdsc, inode, h, peer, session,
@@ -3045,8 +3115,6 @@ done:
3045done_unlocked: 3115done_unlocked:
3046 if (inode) 3116 if (inode)
3047 iput(inode); 3117 iput(inode);
3048 if (open_target_sessions)
3049 ceph_mdsc_open_export_target_sessions(mdsc, session);
3050 return; 3118 return;
3051 3119
3052bad: 3120bad: