diff options
-rw-r--r-- | fs/ceph/caps.c | 220 | ||||
-rw-r--r-- | fs/ceph/inode.c | 4 | ||||
-rw-r--r-- | fs/ceph/super.h | 4 |
3 files changed, 146 insertions, 82 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 98f3ca4a5ddf..17543383545c 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -555,21 +555,34 @@ retry: | |||
555 | cap->ci = ci; | 555 | cap->ci = ci; |
556 | __insert_cap_node(ci, cap); | 556 | __insert_cap_node(ci, cap); |
557 | 557 | ||
558 | /* clear out old exporting info? (i.e. on cap import) */ | ||
559 | if (ci->i_cap_exporting_mds == mds) { | ||
560 | ci->i_cap_exporting_issued = 0; | ||
561 | ci->i_cap_exporting_mseq = 0; | ||
562 | ci->i_cap_exporting_mds = -1; | ||
563 | } | ||
564 | |||
565 | /* add to session cap list */ | 558 | /* add to session cap list */ |
566 | cap->session = session; | 559 | cap->session = session; |
567 | spin_lock(&session->s_cap_lock); | 560 | spin_lock(&session->s_cap_lock); |
568 | list_add_tail(&cap->session_caps, &session->s_caps); | 561 | list_add_tail(&cap->session_caps, &session->s_caps); |
569 | session->s_nr_caps++; | 562 | session->s_nr_caps++; |
570 | spin_unlock(&session->s_cap_lock); | 563 | spin_unlock(&session->s_cap_lock); |
571 | } else if (new_cap) | 564 | } else { |
572 | ceph_put_cap(mdsc, new_cap); | 565 | if (new_cap) |
566 | ceph_put_cap(mdsc, new_cap); | ||
567 | |||
568 | /* | ||
569 | * auth mds of the inode changed. we received the cap export | ||
570 | * message, but still haven't received the cap import message. | ||
571 | * handle_cap_export() updated the new auth MDS' cap. | ||
572 | * | ||
573 | * "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing | ||
574 | * a message that was send before the cap import message. So | ||
575 | * don't remove caps. | ||
576 | */ | ||
577 | if (ceph_seq_cmp(seq, cap->seq) <= 0) { | ||
578 | WARN_ON(cap != ci->i_auth_cap); | ||
579 | WARN_ON(cap->cap_id != cap_id); | ||
580 | seq = cap->seq; | ||
581 | mseq = cap->mseq; | ||
582 | issued |= cap->issued; | ||
583 | flags |= CEPH_CAP_FLAG_AUTH; | ||
584 | } | ||
585 | } | ||
573 | 586 | ||
574 | if (!ci->i_snap_realm) { | 587 | if (!ci->i_snap_realm) { |
575 | /* | 588 | /* |
@@ -612,15 +625,8 @@ retry: | |||
612 | ceph_seq_cmp(ci->i_auth_cap->mseq, mseq) < 0) | 625 | ceph_seq_cmp(ci->i_auth_cap->mseq, mseq) < 0) |
613 | ci->i_auth_cap = cap; | 626 | ci->i_auth_cap = cap; |
614 | ci->i_cap_exporting_issued = 0; | 627 | ci->i_cap_exporting_issued = 0; |
615 | } else if (ci->i_auth_cap == cap) { | 628 | } else { |
616 | ci->i_auth_cap = NULL; | 629 | WARN_ON(ci->i_auth_cap == cap); |
617 | spin_lock(&mdsc->cap_dirty_lock); | ||
618 | if (!list_empty(&ci->i_dirty_item)) { | ||
619 | dout(" moving %p to cap_dirty_migrating\n", inode); | ||
620 | list_move(&ci->i_dirty_item, | ||
621 | &mdsc->cap_dirty_migrating); | ||
622 | } | ||
623 | spin_unlock(&mdsc->cap_dirty_lock); | ||
624 | } | 630 | } |
625 | 631 | ||
626 | dout("add_cap inode %p (%llx.%llx) cap %p %s now %s seq %d mds%d\n", | 632 | dout("add_cap inode %p (%llx.%llx) cap %p %s now %s seq %d mds%d\n", |
@@ -889,7 +895,7 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci) | |||
889 | */ | 895 | */ |
890 | static int __ceph_is_any_caps(struct ceph_inode_info *ci) | 896 | static int __ceph_is_any_caps(struct ceph_inode_info *ci) |
891 | { | 897 | { |
892 | return !RB_EMPTY_ROOT(&ci->i_caps) || ci->i_cap_exporting_mds >= 0; | 898 | return !RB_EMPTY_ROOT(&ci->i_caps) || ci->i_cap_exporting_issued; |
893 | } | 899 | } |
894 | 900 | ||
895 | int ceph_is_any_caps(struct inode *inode) | 901 | int ceph_is_any_caps(struct inode *inode) |
@@ -1396,13 +1402,10 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) | |||
1396 | ci->i_snap_realm->cached_context); | 1402 | ci->i_snap_realm->cached_context); |
1397 | dout(" inode %p now dirty snapc %p auth cap %p\n", | 1403 | dout(" inode %p now dirty snapc %p auth cap %p\n", |
1398 | &ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap); | 1404 | &ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap); |
1405 | WARN_ON(!ci->i_auth_cap); | ||
1399 | BUG_ON(!list_empty(&ci->i_dirty_item)); | 1406 | BUG_ON(!list_empty(&ci->i_dirty_item)); |
1400 | spin_lock(&mdsc->cap_dirty_lock); | 1407 | spin_lock(&mdsc->cap_dirty_lock); |
1401 | if (ci->i_auth_cap) | 1408 | list_add(&ci->i_dirty_item, &mdsc->cap_dirty); |
1402 | list_add(&ci->i_dirty_item, &mdsc->cap_dirty); | ||
1403 | else | ||
1404 | list_add(&ci->i_dirty_item, | ||
1405 | &mdsc->cap_dirty_migrating); | ||
1406 | spin_unlock(&mdsc->cap_dirty_lock); | 1409 | spin_unlock(&mdsc->cap_dirty_lock); |
1407 | if (ci->i_flushing_caps == 0) { | 1410 | if (ci->i_flushing_caps == 0) { |
1408 | ihold(inode); | 1411 | ihold(inode); |
@@ -2421,6 +2424,22 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2421 | dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, | 2424 | dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, |
2422 | inode->i_size); | 2425 | inode->i_size); |
2423 | 2426 | ||
2427 | |||
2428 | /* | ||
2429 | * auth mds of the inode changed. we received the cap export message, | ||
2430 | * but still haven't received the cap import message. handle_cap_export | ||
2431 | * updated the new auth MDS' cap. | ||
2432 | * | ||
2433 | * "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing a message | ||
2434 | * that was sent before the cap import message. So don't remove caps. | ||
2435 | */ | ||
2436 | if (ceph_seq_cmp(seq, cap->seq) <= 0) { | ||
2437 | WARN_ON(cap != ci->i_auth_cap); | ||
2438 | WARN_ON(cap->cap_id != le64_to_cpu(grant->cap_id)); | ||
2439 | seq = cap->seq; | ||
2440 | newcaps |= cap->issued; | ||
2441 | } | ||
2442 | |||
2424 | /* | 2443 | /* |
2425 | * If CACHE is being revoked, and we have no dirty buffers, | 2444 | * If CACHE is being revoked, and we have no dirty buffers, |
2426 | * try to invalidate (once). (If there are dirty buffers, we | 2445 | * try to invalidate (once). (If there are dirty buffers, we |
@@ -2447,6 +2466,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2447 | issued |= implemented | __ceph_caps_dirty(ci); | 2466 | issued |= implemented | __ceph_caps_dirty(ci); |
2448 | 2467 | ||
2449 | cap->cap_gen = session->s_cap_gen; | 2468 | cap->cap_gen = session->s_cap_gen; |
2469 | cap->seq = seq; | ||
2450 | 2470 | ||
2451 | __check_cap_issue(ci, cap, newcaps); | 2471 | __check_cap_issue(ci, cap, newcaps); |
2452 | 2472 | ||
@@ -2497,6 +2517,10 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2497 | le32_to_cpu(grant->time_warp_seq), &ctime, &mtime, | 2517 | le32_to_cpu(grant->time_warp_seq), &ctime, &mtime, |
2498 | &atime); | 2518 | &atime); |
2499 | 2519 | ||
2520 | |||
2521 | /* file layout may have changed */ | ||
2522 | ci->i_layout = grant->layout; | ||
2523 | |||
2500 | /* max size increase? */ | 2524 | /* max size increase? */ |
2501 | if (ci->i_auth_cap == cap && max_size != ci->i_max_size) { | 2525 | if (ci->i_auth_cap == cap && max_size != ci->i_max_size) { |
2502 | dout("max_size %lld -> %llu\n", ci->i_max_size, max_size); | 2526 | dout("max_size %lld -> %llu\n", ci->i_max_size, max_size); |
@@ -2525,11 +2549,6 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2525 | check_caps = 1; | 2549 | check_caps = 1; |
2526 | } | 2550 | } |
2527 | 2551 | ||
2528 | cap->seq = seq; | ||
2529 | |||
2530 | /* file layout may have changed */ | ||
2531 | ci->i_layout = grant->layout; | ||
2532 | |||
2533 | /* revocation, grant, or no-op? */ | 2552 | /* revocation, grant, or no-op? */ |
2534 | if (cap->issued & ~newcaps) { | 2553 | if (cap->issued & ~newcaps) { |
2535 | int revoking = cap->issued & ~newcaps; | 2554 | int revoking = cap->issued & ~newcaps; |
@@ -2755,65 +2774,114 @@ static void handle_cap_trunc(struct inode *inode, | |||
2755 | * caller holds s_mutex | 2774 | * caller holds s_mutex |
2756 | */ | 2775 | */ |
2757 | static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, | 2776 | static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, |
2758 | struct ceph_mds_session *session, | 2777 | struct ceph_mds_cap_peer *ph, |
2759 | int *open_target_sessions) | 2778 | struct ceph_mds_session *session) |
2760 | { | 2779 | { |
2761 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; | 2780 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
2781 | struct ceph_mds_session *tsession = NULL; | ||
2782 | struct ceph_cap *cap, *tcap; | ||
2762 | struct ceph_inode_info *ci = ceph_inode(inode); | 2783 | struct ceph_inode_info *ci = ceph_inode(inode); |
2763 | int mds = session->s_mds; | 2784 | u64 t_cap_id; |
2764 | unsigned mseq = le32_to_cpu(ex->migrate_seq); | 2785 | unsigned mseq = le32_to_cpu(ex->migrate_seq); |
2765 | struct ceph_cap *cap = NULL, *t; | 2786 | unsigned t_seq, t_mseq; |
2766 | struct rb_node *p; | 2787 | int target, issued; |
2767 | int remember = 1; | 2788 | int mds = session->s_mds; |
2768 | 2789 | ||
2769 | dout("handle_cap_export inode %p ci %p mds%d mseq %d\n", | 2790 | if (ph) { |
2770 | inode, ci, mds, mseq); | 2791 | t_cap_id = le64_to_cpu(ph->cap_id); |
2792 | t_seq = le32_to_cpu(ph->seq); | ||
2793 | t_mseq = le32_to_cpu(ph->mseq); | ||
2794 | target = le32_to_cpu(ph->mds); | ||
2795 | } else { | ||
2796 | t_cap_id = t_seq = t_mseq = 0; | ||
2797 | target = -1; | ||
2798 | } | ||
2771 | 2799 | ||
2800 | dout("handle_cap_export inode %p ci %p mds%d mseq %d target %d\n", | ||
2801 | inode, ci, mds, mseq, target); | ||
2802 | retry: | ||
2772 | spin_lock(&ci->i_ceph_lock); | 2803 | spin_lock(&ci->i_ceph_lock); |
2804 | cap = __get_cap_for_mds(ci, mds); | ||
2805 | if (!cap) | ||
2806 | goto out_unlock; | ||
2773 | 2807 | ||
2774 | /* make sure we haven't seen a higher mseq */ | 2808 | if (target < 0) { |
2775 | for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { | 2809 | __ceph_remove_cap(cap, false); |
2776 | t = rb_entry(p, struct ceph_cap, ci_node); | 2810 | goto out_unlock; |
2777 | if (ceph_seq_cmp(t->mseq, mseq) > 0) { | ||
2778 | dout(" higher mseq on cap from mds%d\n", | ||
2779 | t->session->s_mds); | ||
2780 | remember = 0; | ||
2781 | } | ||
2782 | if (t->session->s_mds == mds) | ||
2783 | cap = t; | ||
2784 | } | 2811 | } |
2785 | 2812 | ||
2786 | if (cap) { | 2813 | /* |
2787 | if (remember) { | 2814 | * now we know we haven't received the cap import message yet |
2788 | /* make note */ | 2815 | * because the exported cap still exist. |
2789 | ci->i_cap_exporting_mds = mds; | 2816 | */ |
2790 | ci->i_cap_exporting_mseq = mseq; | ||
2791 | ci->i_cap_exporting_issued = cap->issued; | ||
2792 | |||
2793 | /* | ||
2794 | * make sure we have open sessions with all possible | ||
2795 | * export targets, so that we get the matching IMPORT | ||
2796 | */ | ||
2797 | *open_target_sessions = 1; | ||
2798 | 2817 | ||
2799 | /* | 2818 | issued = cap->issued; |
2800 | * we can't flush dirty caps that we've seen the | 2819 | WARN_ON(issued != cap->implemented); |
2801 | * EXPORT but no IMPORT for | 2820 | |
2802 | */ | 2821 | tcap = __get_cap_for_mds(ci, target); |
2803 | spin_lock(&mdsc->cap_dirty_lock); | 2822 | if (tcap) { |
2804 | if (!list_empty(&ci->i_dirty_item)) { | 2823 | /* already have caps from the target */ |
2805 | dout(" moving %p to cap_dirty_migrating\n", | 2824 | if (tcap->cap_id != t_cap_id || |
2806 | inode); | 2825 | ceph_seq_cmp(tcap->seq, t_seq) < 0) { |
2807 | list_move(&ci->i_dirty_item, | 2826 | dout(" updating import cap %p mds%d\n", tcap, target); |
2808 | &mdsc->cap_dirty_migrating); | 2827 | tcap->cap_id = t_cap_id; |
2828 | tcap->seq = t_seq - 1; | ||
2829 | tcap->issue_seq = t_seq - 1; | ||
2830 | tcap->mseq = t_mseq; | ||
2831 | tcap->issued |= issued; | ||
2832 | tcap->implemented |= issued; | ||
2833 | if (cap == ci->i_auth_cap) | ||
2834 | ci->i_auth_cap = tcap; | ||
2835 | if (ci->i_flushing_caps && ci->i_auth_cap == tcap) { | ||
2836 | spin_lock(&mdsc->cap_dirty_lock); | ||
2837 | list_move_tail(&ci->i_flushing_item, | ||
2838 | &tcap->session->s_cap_flushing); | ||
2839 | spin_unlock(&mdsc->cap_dirty_lock); | ||
2809 | } | 2840 | } |
2810 | spin_unlock(&mdsc->cap_dirty_lock); | ||
2811 | } | 2841 | } |
2812 | __ceph_remove_cap(cap, false); | 2842 | __ceph_remove_cap(cap, false); |
2843 | goto out_unlock; | ||
2844 | } | ||
2845 | |||
2846 | if (tsession) { | ||
2847 | int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0; | ||
2848 | spin_unlock(&ci->i_ceph_lock); | ||
2849 | /* add placeholder for the export tagert */ | ||
2850 | ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0, | ||
2851 | t_seq - 1, t_mseq, (u64)-1, flag, NULL); | ||
2852 | goto retry; | ||
2813 | } | 2853 | } |
2814 | /* else, we already released it */ | ||
2815 | 2854 | ||
2816 | spin_unlock(&ci->i_ceph_lock); | 2855 | spin_unlock(&ci->i_ceph_lock); |
2856 | mutex_unlock(&session->s_mutex); | ||
2857 | |||
2858 | /* open target session */ | ||
2859 | tsession = ceph_mdsc_open_export_target_session(mdsc, target); | ||
2860 | if (!IS_ERR(tsession)) { | ||
2861 | if (mds > target) { | ||
2862 | mutex_lock(&session->s_mutex); | ||
2863 | mutex_lock_nested(&tsession->s_mutex, | ||
2864 | SINGLE_DEPTH_NESTING); | ||
2865 | } else { | ||
2866 | mutex_lock(&tsession->s_mutex); | ||
2867 | mutex_lock_nested(&session->s_mutex, | ||
2868 | SINGLE_DEPTH_NESTING); | ||
2869 | } | ||
2870 | ceph_add_cap_releases(mdsc, tsession); | ||
2871 | } else { | ||
2872 | WARN_ON(1); | ||
2873 | tsession = NULL; | ||
2874 | target = -1; | ||
2875 | } | ||
2876 | goto retry; | ||
2877 | |||
2878 | out_unlock: | ||
2879 | spin_unlock(&ci->i_ceph_lock); | ||
2880 | mutex_unlock(&session->s_mutex); | ||
2881 | if (tsession) { | ||
2882 | mutex_unlock(&tsession->s_mutex); | ||
2883 | ceph_put_mds_session(tsession); | ||
2884 | } | ||
2817 | } | 2885 | } |
2818 | 2886 | ||
2819 | /* | 2887 | /* |
@@ -2915,7 +2983,6 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2915 | void *flock; | 2983 | void *flock; |
2916 | void *end; | 2984 | void *end; |
2917 | u32 flock_len; | 2985 | u32 flock_len; |
2918 | int open_target_sessions = 0; | ||
2919 | 2986 | ||
2920 | dout("handle_caps from mds%d\n", mds); | 2987 | dout("handle_caps from mds%d\n", mds); |
2921 | 2988 | ||
@@ -2954,6 +3021,9 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2954 | if (p + sizeof(*peer) > end) | 3021 | if (p + sizeof(*peer) > end) |
2955 | goto bad; | 3022 | goto bad; |
2956 | peer = p; | 3023 | peer = p; |
3024 | } else if (op == CEPH_CAP_OP_EXPORT) { | ||
3025 | /* recorded in unused fields */ | ||
3026 | peer = (void *)&h->size; | ||
2957 | } | 3027 | } |
2958 | } | 3028 | } |
2959 | 3029 | ||
@@ -2989,8 +3059,8 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2989 | goto done; | 3059 | goto done; |
2990 | 3060 | ||
2991 | case CEPH_CAP_OP_EXPORT: | 3061 | case CEPH_CAP_OP_EXPORT: |
2992 | handle_cap_export(inode, h, session, &open_target_sessions); | 3062 | handle_cap_export(inode, h, peer, session); |
2993 | goto done; | 3063 | goto done_unlocked; |
2994 | 3064 | ||
2995 | case CEPH_CAP_OP_IMPORT: | 3065 | case CEPH_CAP_OP_IMPORT: |
2996 | handle_cap_import(mdsc, inode, h, peer, session, | 3066 | handle_cap_import(mdsc, inode, h, peer, session, |
@@ -3045,8 +3115,6 @@ done: | |||
3045 | done_unlocked: | 3115 | done_unlocked: |
3046 | if (inode) | 3116 | if (inode) |
3047 | iput(inode); | 3117 | iput(inode); |
3048 | if (open_target_sessions) | ||
3049 | ceph_mdsc_open_export_target_sessions(mdsc, session); | ||
3050 | return; | 3118 | return; |
3051 | 3119 | ||
3052 | bad: | 3120 | bad: |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 3db97ba15a06..6fc10a7d7c59 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -336,12 +336,10 @@ struct inode *ceph_alloc_inode(struct super_block *sb) | |||
336 | ci->i_hold_caps_min = 0; | 336 | ci->i_hold_caps_min = 0; |
337 | ci->i_hold_caps_max = 0; | 337 | ci->i_hold_caps_max = 0; |
338 | INIT_LIST_HEAD(&ci->i_cap_delay_list); | 338 | INIT_LIST_HEAD(&ci->i_cap_delay_list); |
339 | ci->i_cap_exporting_mds = 0; | ||
340 | ci->i_cap_exporting_mseq = 0; | ||
341 | ci->i_cap_exporting_issued = 0; | ||
342 | INIT_LIST_HEAD(&ci->i_cap_snaps); | 339 | INIT_LIST_HEAD(&ci->i_cap_snaps); |
343 | ci->i_head_snapc = NULL; | 340 | ci->i_head_snapc = NULL; |
344 | ci->i_snap_caps = 0; | 341 | ci->i_snap_caps = 0; |
342 | ci->i_cap_exporting_issued = 0; | ||
345 | 343 | ||
346 | for (i = 0; i < CEPH_FILE_MODE_NUM; i++) | 344 | for (i = 0; i < CEPH_FILE_MODE_NUM; i++) |
347 | ci->i_nr_by_mode[i] = 0; | 345 | ci->i_nr_by_mode[i] = 0; |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index a6ba32fb0d49..c299f7d19bf3 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -287,14 +287,12 @@ struct ceph_inode_info { | |||
287 | unsigned long i_hold_caps_min; /* jiffies */ | 287 | unsigned long i_hold_caps_min; /* jiffies */ |
288 | unsigned long i_hold_caps_max; /* jiffies */ | 288 | unsigned long i_hold_caps_max; /* jiffies */ |
289 | struct list_head i_cap_delay_list; /* for delayed cap release to mds */ | 289 | struct list_head i_cap_delay_list; /* for delayed cap release to mds */ |
290 | int i_cap_exporting_mds; /* to handle cap migration between */ | ||
291 | unsigned i_cap_exporting_mseq; /* mds's. */ | ||
292 | unsigned i_cap_exporting_issued; | ||
293 | struct ceph_cap_reservation i_cap_migration_resv; | 290 | struct ceph_cap_reservation i_cap_migration_resv; |
294 | struct list_head i_cap_snaps; /* snapped state pending flush to mds */ | 291 | struct list_head i_cap_snaps; /* snapped state pending flush to mds */ |
295 | struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or | 292 | struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or |
296 | dirty|flushing caps */ | 293 | dirty|flushing caps */ |
297 | unsigned i_snap_caps; /* cap bits for snapped files */ | 294 | unsigned i_snap_caps; /* cap bits for snapped files */ |
295 | unsigned i_cap_exporting_issued; | ||
298 | 296 | ||
299 | int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */ | 297 | int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */ |
300 | 298 | ||