diff options
author | Yan, Zheng <zheng.z.yan@intel.com> | 2014-04-18 01:20:27 -0400 |
---|---|---|
committer | Yan, Zheng <zheng.z.yan@intel.com> | 2014-06-05 21:29:53 -0400 |
commit | 2cd698be9a3d3a0f8f3c66814eac34144c31954c (patch) | |
tree | e5e3bd01ef76b0345742b12887a637ad821598e7 /fs/ceph/caps.c | |
parent | d9df2783507943316b305e177e5b1c157200c76f (diff) |
ceph: handle cap import atomically
cap import messages are processed by both handle_cap_import() and
handle_cap_grant(). These two functions are not executed in the same
atomic context, so they can races with cap release.
The fix is make handle_cap_import() not release the i_ceph_lock when
it returns. Let handle_cap_grant() release the lock after it finishes
its job.
Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Diffstat (limited to 'fs/ceph/caps.c')
-rw-r--r-- | fs/ceph/caps.c | 99 |
1 files changed, 54 insertions, 45 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 73a42f504357..9f2c99c34e92 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -2379,23 +2379,20 @@ static void invalidate_aliases(struct inode *inode) | |||
2379 | * actually be a revocation if it specifies a smaller cap set.) | 2379 | * actually be a revocation if it specifies a smaller cap set.) |
2380 | * | 2380 | * |
2381 | * caller holds s_mutex and i_ceph_lock, we drop both. | 2381 | * caller holds s_mutex and i_ceph_lock, we drop both. |
2382 | * | ||
2383 | * return value: | ||
2384 | * 0 - ok | ||
2385 | * 1 - check_caps on auth cap only (writeback) | ||
2386 | * 2 - check_caps (ack revoke) | ||
2387 | */ | 2382 | */ |
2388 | static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | 2383 | static void handle_cap_grant(struct ceph_mds_client *mdsc, |
2384 | struct inode *inode, struct ceph_mds_caps *grant, | ||
2385 | void *snaptrace, int snaptrace_len, | ||
2386 | struct ceph_buffer *xattr_buf, | ||
2389 | struct ceph_mds_session *session, | 2387 | struct ceph_mds_session *session, |
2390 | struct ceph_cap *cap, | 2388 | struct ceph_cap *cap, int issued) |
2391 | struct ceph_buffer *xattr_buf) | 2389 | __releases(ci->i_ceph_lock) |
2392 | __releases(ci->i_ceph_lock) | ||
2393 | { | 2390 | { |
2394 | struct ceph_inode_info *ci = ceph_inode(inode); | 2391 | struct ceph_inode_info *ci = ceph_inode(inode); |
2395 | int mds = session->s_mds; | 2392 | int mds = session->s_mds; |
2396 | int seq = le32_to_cpu(grant->seq); | 2393 | int seq = le32_to_cpu(grant->seq); |
2397 | int newcaps = le32_to_cpu(grant->caps); | 2394 | int newcaps = le32_to_cpu(grant->caps); |
2398 | int issued, implemented, used, wanted, dirty; | 2395 | int used, wanted, dirty; |
2399 | u64 size = le64_to_cpu(grant->size); | 2396 | u64 size = le64_to_cpu(grant->size); |
2400 | u64 max_size = le64_to_cpu(grant->max_size); | 2397 | u64 max_size = le64_to_cpu(grant->max_size); |
2401 | struct timespec mtime, atime, ctime; | 2398 | struct timespec mtime, atime, ctime; |
@@ -2449,10 +2446,6 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2449 | } | 2446 | } |
2450 | 2447 | ||
2451 | /* side effects now are allowed */ | 2448 | /* side effects now are allowed */ |
2452 | |||
2453 | issued = __ceph_caps_issued(ci, &implemented); | ||
2454 | issued |= implemented | __ceph_caps_dirty(ci); | ||
2455 | |||
2456 | cap->cap_gen = session->s_cap_gen; | 2449 | cap->cap_gen = session->s_cap_gen; |
2457 | cap->seq = seq; | 2450 | cap->seq = seq; |
2458 | 2451 | ||
@@ -2585,6 +2578,17 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2585 | 2578 | ||
2586 | spin_unlock(&ci->i_ceph_lock); | 2579 | spin_unlock(&ci->i_ceph_lock); |
2587 | 2580 | ||
2581 | if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) { | ||
2582 | down_write(&mdsc->snap_rwsem); | ||
2583 | ceph_update_snap_trace(mdsc, snaptrace, | ||
2584 | snaptrace + snaptrace_len, false); | ||
2585 | downgrade_write(&mdsc->snap_rwsem); | ||
2586 | kick_flushing_inode_caps(mdsc, session, inode); | ||
2587 | up_read(&mdsc->snap_rwsem); | ||
2588 | if (newcaps & ~issued) | ||
2589 | wake = 1; | ||
2590 | } | ||
2591 | |||
2588 | if (queue_trunc) { | 2592 | if (queue_trunc) { |
2589 | ceph_queue_vmtruncate(inode); | 2593 | ceph_queue_vmtruncate(inode); |
2590 | ceph_queue_revalidate(inode); | 2594 | ceph_queue_revalidate(inode); |
@@ -2886,21 +2890,22 @@ out_unlock: | |||
2886 | } | 2890 | } |
2887 | 2891 | ||
2888 | /* | 2892 | /* |
2889 | * Handle cap IMPORT. If there are temp bits from an older EXPORT, | 2893 | * Handle cap IMPORT. |
2890 | * clean them up. | ||
2891 | * | 2894 | * |
2892 | * caller holds s_mutex. | 2895 | * caller holds s_mutex. acquires i_ceph_lock |
2893 | */ | 2896 | */ |
2894 | static void handle_cap_import(struct ceph_mds_client *mdsc, | 2897 | static void handle_cap_import(struct ceph_mds_client *mdsc, |
2895 | struct inode *inode, struct ceph_mds_caps *im, | 2898 | struct inode *inode, struct ceph_mds_caps *im, |
2896 | struct ceph_mds_cap_peer *ph, | 2899 | struct ceph_mds_cap_peer *ph, |
2897 | struct ceph_mds_session *session, | 2900 | struct ceph_mds_session *session, |
2898 | void *snaptrace, int snaptrace_len) | 2901 | struct ceph_cap **target_cap, int *old_issued) |
2902 | __acquires(ci->i_ceph_lock) | ||
2899 | { | 2903 | { |
2900 | struct ceph_inode_info *ci = ceph_inode(inode); | 2904 | struct ceph_inode_info *ci = ceph_inode(inode); |
2901 | struct ceph_cap *cap, *new_cap = NULL; | 2905 | struct ceph_cap *cap, *ocap, *new_cap = NULL; |
2902 | int mds = session->s_mds; | 2906 | int mds = session->s_mds; |
2903 | unsigned issued = le32_to_cpu(im->caps); | 2907 | int issued; |
2908 | unsigned caps = le32_to_cpu(im->caps); | ||
2904 | unsigned wanted = le32_to_cpu(im->wanted); | 2909 | unsigned wanted = le32_to_cpu(im->wanted); |
2905 | unsigned seq = le32_to_cpu(im->seq); | 2910 | unsigned seq = le32_to_cpu(im->seq); |
2906 | unsigned mseq = le32_to_cpu(im->migrate_seq); | 2911 | unsigned mseq = le32_to_cpu(im->migrate_seq); |
@@ -2929,44 +2934,43 @@ retry: | |||
2929 | new_cap = ceph_get_cap(mdsc, NULL); | 2934 | new_cap = ceph_get_cap(mdsc, NULL); |
2930 | goto retry; | 2935 | goto retry; |
2931 | } | 2936 | } |
2937 | cap = new_cap; | ||
2938 | } else { | ||
2939 | if (new_cap) { | ||
2940 | ceph_put_cap(mdsc, new_cap); | ||
2941 | new_cap = NULL; | ||
2942 | } | ||
2932 | } | 2943 | } |
2933 | 2944 | ||
2934 | ceph_add_cap(inode, session, cap_id, -1, issued, wanted, seq, mseq, | 2945 | __ceph_caps_issued(ci, &issued); |
2946 | issued |= __ceph_caps_dirty(ci); | ||
2947 | |||
2948 | ceph_add_cap(inode, session, cap_id, -1, caps, wanted, seq, mseq, | ||
2935 | realmino, CEPH_CAP_FLAG_AUTH, &new_cap); | 2949 | realmino, CEPH_CAP_FLAG_AUTH, &new_cap); |
2936 | 2950 | ||
2937 | cap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL; | 2951 | ocap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL; |
2938 | if (cap && cap->cap_id == p_cap_id) { | 2952 | if (ocap && ocap->cap_id == p_cap_id) { |
2939 | dout(" remove export cap %p mds%d flags %d\n", | 2953 | dout(" remove export cap %p mds%d flags %d\n", |
2940 | cap, peer, ph->flags); | 2954 | ocap, peer, ph->flags); |
2941 | if ((ph->flags & CEPH_CAP_FLAG_AUTH) && | 2955 | if ((ph->flags & CEPH_CAP_FLAG_AUTH) && |
2942 | (cap->seq != le32_to_cpu(ph->seq) || | 2956 | (ocap->seq != le32_to_cpu(ph->seq) || |
2943 | cap->mseq != le32_to_cpu(ph->mseq))) { | 2957 | ocap->mseq != le32_to_cpu(ph->mseq))) { |
2944 | pr_err("handle_cap_import: mismatched seq/mseq: " | 2958 | pr_err("handle_cap_import: mismatched seq/mseq: " |
2945 | "ino (%llx.%llx) mds%d seq %d mseq %d " | 2959 | "ino (%llx.%llx) mds%d seq %d mseq %d " |
2946 | "importer mds%d has peer seq %d mseq %d\n", | 2960 | "importer mds%d has peer seq %d mseq %d\n", |
2947 | ceph_vinop(inode), peer, cap->seq, | 2961 | ceph_vinop(inode), peer, ocap->seq, |
2948 | cap->mseq, mds, le32_to_cpu(ph->seq), | 2962 | ocap->mseq, mds, le32_to_cpu(ph->seq), |
2949 | le32_to_cpu(ph->mseq)); | 2963 | le32_to_cpu(ph->mseq)); |
2950 | } | 2964 | } |
2951 | __ceph_remove_cap(cap, (ph->flags & CEPH_CAP_FLAG_RELEASE)); | 2965 | __ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE)); |
2952 | } | 2966 | } |
2953 | 2967 | ||
2954 | /* make sure we re-request max_size, if necessary */ | 2968 | /* make sure we re-request max_size, if necessary */ |
2955 | ci->i_wanted_max_size = 0; | 2969 | ci->i_wanted_max_size = 0; |
2956 | ci->i_requested_max_size = 0; | 2970 | ci->i_requested_max_size = 0; |
2957 | spin_unlock(&ci->i_ceph_lock); | ||
2958 | |||
2959 | wake_up_all(&ci->i_cap_wq); | ||
2960 | 2971 | ||
2961 | down_write(&mdsc->snap_rwsem); | 2972 | *old_issued = issued; |
2962 | ceph_update_snap_trace(mdsc, snaptrace, snaptrace+snaptrace_len, | 2973 | *target_cap = cap; |
2963 | false); | ||
2964 | downgrade_write(&mdsc->snap_rwsem); | ||
2965 | kick_flushing_inode_caps(mdsc, session, inode); | ||
2966 | up_read(&mdsc->snap_rwsem); | ||
2967 | |||
2968 | if (new_cap) | ||
2969 | ceph_put_cap(mdsc, new_cap); | ||
2970 | } | 2974 | } |
2971 | 2975 | ||
2972 | /* | 2976 | /* |
@@ -2986,7 +2990,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2986 | struct ceph_mds_caps *h; | 2990 | struct ceph_mds_caps *h; |
2987 | struct ceph_mds_cap_peer *peer = NULL; | 2991 | struct ceph_mds_cap_peer *peer = NULL; |
2988 | int mds = session->s_mds; | 2992 | int mds = session->s_mds; |
2989 | int op; | 2993 | int op, issued; |
2990 | u32 seq, mseq; | 2994 | u32 seq, mseq; |
2991 | struct ceph_vino vino; | 2995 | struct ceph_vino vino; |
2992 | u64 cap_id; | 2996 | u64 cap_id; |
@@ -3078,7 +3082,10 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
3078 | 3082 | ||
3079 | case CEPH_CAP_OP_IMPORT: | 3083 | case CEPH_CAP_OP_IMPORT: |
3080 | handle_cap_import(mdsc, inode, h, peer, session, | 3084 | handle_cap_import(mdsc, inode, h, peer, session, |
3081 | snaptrace, snaptrace_len); | 3085 | &cap, &issued); |
3086 | handle_cap_grant(mdsc, inode, h, snaptrace, snaptrace_len, | ||
3087 | msg->middle, session, cap, issued); | ||
3088 | goto done_unlocked; | ||
3082 | } | 3089 | } |
3083 | 3090 | ||
3084 | /* the rest require a cap */ | 3091 | /* the rest require a cap */ |
@@ -3095,8 +3102,10 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
3095 | switch (op) { | 3102 | switch (op) { |
3096 | case CEPH_CAP_OP_REVOKE: | 3103 | case CEPH_CAP_OP_REVOKE: |
3097 | case CEPH_CAP_OP_GRANT: | 3104 | case CEPH_CAP_OP_GRANT: |
3098 | case CEPH_CAP_OP_IMPORT: | 3105 | __ceph_caps_issued(ci, &issued); |
3099 | handle_cap_grant(inode, h, session, cap, msg->middle); | 3106 | issued |= __ceph_caps_dirty(ci); |
3107 | handle_cap_grant(mdsc, inode, h, NULL, 0, msg->middle, | ||
3108 | session, cap, issued); | ||
3100 | goto done_unlocked; | 3109 | goto done_unlocked; |
3101 | 3110 | ||
3102 | case CEPH_CAP_OP_FLUSH_ACK: | 3111 | case CEPH_CAP_OP_FLUSH_ACK: |