diff options
author | Yan, Zheng <zyan@redhat.com> | 2019-01-01 03:28:33 -0500 |
---|---|---|
committer | Ilya Dryomov <idryomov@gmail.com> | 2019-03-05 12:55:16 -0500 |
commit | 81c5a1487e52a316e5e7d79e9911376648a79e85 (patch) | |
tree | b716d8a8ec3d7b012197776fafd4c3f68c31c33c /fs/ceph | |
parent | 84bf39509bea5b9f936281c4c660e75099fcd15f (diff) |
ceph: split large reconnect into multiple messages
Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Diffstat (limited to 'fs/ceph')
-rw-r--r-- | fs/ceph/caps.c | 6 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 290 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 3 | ||||
-rw-r--r-- | fs/ceph/snap.c | 3 |
4 files changed, 243 insertions, 59 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index bba28a5034ba..0eaf1b48c431 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -2393,6 +2393,12 @@ void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc, | |||
2393 | if ((cap->issued & ci->i_flushing_caps) != | 2393 | if ((cap->issued & ci->i_flushing_caps) != |
2394 | ci->i_flushing_caps) { | 2394 | ci->i_flushing_caps) { |
2395 | ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH; | 2395 | ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH; |
2396 | /* encode_caps_cb() also will reset these sequence | ||
2397 | * numbers. make sure sequence numbers in cap flush | ||
2398 | * message match later reconnect message */ | ||
2399 | cap->seq = 0; | ||
2400 | cap->issue_seq = 0; | ||
2401 | cap->mseq = 0; | ||
2396 | __kick_flushing_caps(mdsc, session, ci, | 2402 | __kick_flushing_caps(mdsc, session, ci, |
2397 | oldest_flush_tid); | 2403 | oldest_flush_tid); |
2398 | } else { | 2404 | } else { |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 04f18095e306..cce4e4b9ea57 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -20,6 +20,8 @@ | |||
20 | #include <linux/ceph/auth.h> | 20 | #include <linux/ceph/auth.h> |
21 | #include <linux/ceph/debugfs.h> | 21 | #include <linux/ceph/debugfs.h> |
22 | 22 | ||
23 | #define RECONNECT_MAX_SIZE (INT_MAX - PAGE_SIZE) | ||
24 | |||
23 | /* | 25 | /* |
24 | * A cluster of MDS (metadata server) daemons is responsible for | 26 | * A cluster of MDS (metadata server) daemons is responsible for |
25 | * managing the file system namespace (the directory hierarchy and | 27 | * managing the file system namespace (the directory hierarchy and |
@@ -46,9 +48,11 @@ | |||
46 | */ | 48 | */ |
47 | 49 | ||
48 | struct ceph_reconnect_state { | 50 | struct ceph_reconnect_state { |
49 | int nr_caps; | 51 | struct ceph_mds_session *session; |
52 | int nr_caps, nr_realms; | ||
50 | struct ceph_pagelist *pagelist; | 53 | struct ceph_pagelist *pagelist; |
51 | unsigned msg_version; | 54 | unsigned msg_version; |
55 | bool allow_multi; | ||
52 | }; | 56 | }; |
53 | 57 | ||
54 | static void __wake_requests(struct ceph_mds_client *mdsc, | 58 | static void __wake_requests(struct ceph_mds_client *mdsc, |
@@ -2985,6 +2989,82 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc, | |||
2985 | mutex_unlock(&mdsc->mutex); | 2989 | mutex_unlock(&mdsc->mutex); |
2986 | } | 2990 | } |
2987 | 2991 | ||
2992 | static int send_reconnect_partial(struct ceph_reconnect_state *recon_state) | ||
2993 | { | ||
2994 | struct ceph_msg *reply; | ||
2995 | struct ceph_pagelist *_pagelist; | ||
2996 | struct page *page; | ||
2997 | __le32 *addr; | ||
2998 | int err = -ENOMEM; | ||
2999 | |||
3000 | if (!recon_state->allow_multi) | ||
3001 | return -ENOSPC; | ||
3002 | |||
3003 | /* can't handle message that contains both caps and realm */ | ||
3004 | BUG_ON(!recon_state->nr_caps == !recon_state->nr_realms); | ||
3005 | |||
3006 | /* pre-allocate new pagelist */ | ||
3007 | _pagelist = ceph_pagelist_alloc(GFP_NOFS); | ||
3008 | if (!_pagelist) | ||
3009 | return -ENOMEM; | ||
3010 | |||
3011 | reply = ceph_msg_new2(CEPH_MSG_CLIENT_RECONNECT, 0, 1, GFP_NOFS, false); | ||
3012 | if (!reply) | ||
3013 | goto fail_msg; | ||
3014 | |||
3015 | /* placeholder for nr_caps */ | ||
3016 | err = ceph_pagelist_encode_32(_pagelist, 0); | ||
3017 | if (err < 0) | ||
3018 | goto fail; | ||
3019 | |||
3020 | if (recon_state->nr_caps) { | ||
3021 | /* currently encoding caps */ | ||
3022 | err = ceph_pagelist_encode_32(recon_state->pagelist, 0); | ||
3023 | if (err) | ||
3024 | goto fail; | ||
3025 | } else { | ||
3026 | /* placeholder for nr_realms (currently encoding relams) */ | ||
3027 | err = ceph_pagelist_encode_32(_pagelist, 0); | ||
3028 | if (err < 0) | ||
3029 | goto fail; | ||
3030 | } | ||
3031 | |||
3032 | err = ceph_pagelist_encode_8(recon_state->pagelist, 1); | ||
3033 | if (err) | ||
3034 | goto fail; | ||
3035 | |||
3036 | page = list_first_entry(&recon_state->pagelist->head, struct page, lru); | ||
3037 | addr = kmap_atomic(page); | ||
3038 | if (recon_state->nr_caps) { | ||
3039 | /* currently encoding caps */ | ||
3040 | *addr = cpu_to_le32(recon_state->nr_caps); | ||
3041 | } else { | ||
3042 | /* currently encoding relams */ | ||
3043 | *(addr + 1) = cpu_to_le32(recon_state->nr_realms); | ||
3044 | } | ||
3045 | kunmap_atomic(addr); | ||
3046 | |||
3047 | reply->hdr.version = cpu_to_le16(5); | ||
3048 | reply->hdr.compat_version = cpu_to_le16(4); | ||
3049 | |||
3050 | reply->hdr.data_len = cpu_to_le32(recon_state->pagelist->length); | ||
3051 | ceph_msg_data_add_pagelist(reply, recon_state->pagelist); | ||
3052 | |||
3053 | ceph_con_send(&recon_state->session->s_con, reply); | ||
3054 | ceph_pagelist_release(recon_state->pagelist); | ||
3055 | |||
3056 | recon_state->pagelist = _pagelist; | ||
3057 | recon_state->nr_caps = 0; | ||
3058 | recon_state->nr_realms = 0; | ||
3059 | recon_state->msg_version = 5; | ||
3060 | return 0; | ||
3061 | fail: | ||
3062 | ceph_msg_put(reply); | ||
3063 | fail_msg: | ||
3064 | ceph_pagelist_release(_pagelist); | ||
3065 | return err; | ||
3066 | } | ||
3067 | |||
2988 | /* | 3068 | /* |
2989 | * Encode information about a cap for a reconnect with the MDS. | 3069 | * Encode information about a cap for a reconnect with the MDS. |
2990 | */ | 3070 | */ |
@@ -3004,9 +3084,6 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
3004 | dout(" adding %p ino %llx.%llx cap %p %lld %s\n", | 3084 | dout(" adding %p ino %llx.%llx cap %p %lld %s\n", |
3005 | inode, ceph_vinop(inode), cap, cap->cap_id, | 3085 | inode, ceph_vinop(inode), cap, cap->cap_id, |
3006 | ceph_cap_string(cap->issued)); | 3086 | ceph_cap_string(cap->issued)); |
3007 | err = ceph_pagelist_encode_64(pagelist, ceph_ino(inode)); | ||
3008 | if (err) | ||
3009 | return err; | ||
3010 | 3087 | ||
3011 | spin_lock(&ci->i_ceph_lock); | 3088 | spin_lock(&ci->i_ceph_lock); |
3012 | cap->seq = 0; /* reset cap seq */ | 3089 | cap->seq = 0; /* reset cap seq */ |
@@ -3046,7 +3123,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
3046 | if (recon_state->msg_version >= 2) { | 3123 | if (recon_state->msg_version >= 2) { |
3047 | int num_fcntl_locks, num_flock_locks; | 3124 | int num_fcntl_locks, num_flock_locks; |
3048 | struct ceph_filelock *flocks = NULL; | 3125 | struct ceph_filelock *flocks = NULL; |
3049 | size_t struct_len, total_len = 0; | 3126 | size_t struct_len, total_len = sizeof(u64); |
3050 | u8 struct_v = 0; | 3127 | u8 struct_v = 0; |
3051 | 3128 | ||
3052 | encode_again: | 3129 | encode_again: |
@@ -3081,7 +3158,7 @@ encode_again: | |||
3081 | 3158 | ||
3082 | if (recon_state->msg_version >= 3) { | 3159 | if (recon_state->msg_version >= 3) { |
3083 | /* version, compat_version and struct_len */ | 3160 | /* version, compat_version and struct_len */ |
3084 | total_len = 2 * sizeof(u8) + sizeof(u32); | 3161 | total_len += 2 * sizeof(u8) + sizeof(u32); |
3085 | struct_v = 2; | 3162 | struct_v = 2; |
3086 | } | 3163 | } |
3087 | /* | 3164 | /* |
@@ -3098,12 +3175,19 @@ encode_again: | |||
3098 | struct_len += sizeof(u64); /* snap_follows */ | 3175 | struct_len += sizeof(u64); /* snap_follows */ |
3099 | 3176 | ||
3100 | total_len += struct_len; | 3177 | total_len += struct_len; |
3101 | err = ceph_pagelist_reserve(pagelist, total_len); | 3178 | |
3102 | if (err) { | 3179 | if (pagelist->length + total_len > RECONNECT_MAX_SIZE) { |
3103 | kfree(flocks); | 3180 | err = send_reconnect_partial(recon_state); |
3104 | goto out_err; | 3181 | if (err) |
3182 | goto out_freeflocks; | ||
3183 | pagelist = recon_state->pagelist; | ||
3105 | } | 3184 | } |
3106 | 3185 | ||
3186 | err = ceph_pagelist_reserve(pagelist, total_len); | ||
3187 | if (err) | ||
3188 | goto out_freeflocks; | ||
3189 | |||
3190 | ceph_pagelist_encode_64(pagelist, ceph_ino(inode)); | ||
3107 | if (recon_state->msg_version >= 3) { | 3191 | if (recon_state->msg_version >= 3) { |
3108 | ceph_pagelist_encode_8(pagelist, struct_v); | 3192 | ceph_pagelist_encode_8(pagelist, struct_v); |
3109 | ceph_pagelist_encode_8(pagelist, 1); | 3193 | ceph_pagelist_encode_8(pagelist, 1); |
@@ -3115,7 +3199,7 @@ encode_again: | |||
3115 | num_fcntl_locks, num_flock_locks); | 3199 | num_fcntl_locks, num_flock_locks); |
3116 | if (struct_v >= 2) | 3200 | if (struct_v >= 2) |
3117 | ceph_pagelist_encode_64(pagelist, snap_follows); | 3201 | ceph_pagelist_encode_64(pagelist, snap_follows); |
3118 | 3202 | out_freeflocks: | |
3119 | kfree(flocks); | 3203 | kfree(flocks); |
3120 | } else { | 3204 | } else { |
3121 | u64 pathbase = 0; | 3205 | u64 pathbase = 0; |
@@ -3136,20 +3220,81 @@ encode_again: | |||
3136 | } | 3220 | } |
3137 | 3221 | ||
3138 | err = ceph_pagelist_reserve(pagelist, | 3222 | err = ceph_pagelist_reserve(pagelist, |
3139 | pathlen + sizeof(u32) + sizeof(rec.v1)); | 3223 | sizeof(u64) + sizeof(u32) + |
3224 | pathlen + sizeof(rec.v1)); | ||
3140 | if (err) { | 3225 | if (err) { |
3141 | kfree(path); | 3226 | goto out_freepath; |
3142 | goto out_err; | ||
3143 | } | 3227 | } |
3144 | 3228 | ||
3229 | ceph_pagelist_encode_64(pagelist, ceph_ino(inode)); | ||
3145 | ceph_pagelist_encode_string(pagelist, path, pathlen); | 3230 | ceph_pagelist_encode_string(pagelist, path, pathlen); |
3146 | ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1)); | 3231 | ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1)); |
3147 | 3232 | out_freepath: | |
3148 | kfree(path); | 3233 | kfree(path); |
3149 | } | 3234 | } |
3150 | 3235 | ||
3151 | recon_state->nr_caps++; | ||
3152 | out_err: | 3236 | out_err: |
3237 | if (err >= 0) | ||
3238 | recon_state->nr_caps++; | ||
3239 | return err; | ||
3240 | } | ||
3241 | |||
3242 | static int encode_snap_realms(struct ceph_mds_client *mdsc, | ||
3243 | struct ceph_reconnect_state *recon_state) | ||
3244 | { | ||
3245 | struct rb_node *p; | ||
3246 | struct ceph_pagelist *pagelist = recon_state->pagelist; | ||
3247 | int err = 0; | ||
3248 | |||
3249 | if (recon_state->msg_version >= 4) { | ||
3250 | err = ceph_pagelist_encode_32(pagelist, mdsc->num_snap_realms); | ||
3251 | if (err < 0) | ||
3252 | goto fail; | ||
3253 | } | ||
3254 | |||
3255 | /* | ||
3256 | * snaprealms. we provide mds with the ino, seq (version), and | ||
3257 | * parent for all of our realms. If the mds has any newer info, | ||
3258 | * it will tell us. | ||
3259 | */ | ||
3260 | for (p = rb_first(&mdsc->snap_realms); p; p = rb_next(p)) { | ||
3261 | struct ceph_snap_realm *realm = | ||
3262 | rb_entry(p, struct ceph_snap_realm, node); | ||
3263 | struct ceph_mds_snaprealm_reconnect sr_rec; | ||
3264 | |||
3265 | if (recon_state->msg_version >= 4) { | ||
3266 | size_t need = sizeof(u8) * 2 + sizeof(u32) + | ||
3267 | sizeof(sr_rec); | ||
3268 | |||
3269 | if (pagelist->length + need > RECONNECT_MAX_SIZE) { | ||
3270 | err = send_reconnect_partial(recon_state); | ||
3271 | if (err) | ||
3272 | goto fail; | ||
3273 | pagelist = recon_state->pagelist; | ||
3274 | } | ||
3275 | |||
3276 | err = ceph_pagelist_reserve(pagelist, need); | ||
3277 | if (err) | ||
3278 | goto fail; | ||
3279 | |||
3280 | ceph_pagelist_encode_8(pagelist, 1); | ||
3281 | ceph_pagelist_encode_8(pagelist, 1); | ||
3282 | ceph_pagelist_encode_32(pagelist, sizeof(sr_rec)); | ||
3283 | } | ||
3284 | |||
3285 | dout(" adding snap realm %llx seq %lld parent %llx\n", | ||
3286 | realm->ino, realm->seq, realm->parent_ino); | ||
3287 | sr_rec.ino = cpu_to_le64(realm->ino); | ||
3288 | sr_rec.seq = cpu_to_le64(realm->seq); | ||
3289 | sr_rec.parent = cpu_to_le64(realm->parent_ino); | ||
3290 | |||
3291 | err = ceph_pagelist_append(pagelist, &sr_rec, sizeof(sr_rec)); | ||
3292 | if (err) | ||
3293 | goto fail; | ||
3294 | |||
3295 | recon_state->nr_realms++; | ||
3296 | } | ||
3297 | fail: | ||
3153 | return err; | 3298 | return err; |
3154 | } | 3299 | } |
3155 | 3300 | ||
@@ -3170,18 +3315,17 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
3170 | struct ceph_mds_session *session) | 3315 | struct ceph_mds_session *session) |
3171 | { | 3316 | { |
3172 | struct ceph_msg *reply; | 3317 | struct ceph_msg *reply; |
3173 | struct rb_node *p; | ||
3174 | int mds = session->s_mds; | 3318 | int mds = session->s_mds; |
3175 | int err = -ENOMEM; | 3319 | int err = -ENOMEM; |
3176 | int s_nr_caps; | 3320 | struct ceph_reconnect_state recon_state = { |
3177 | struct ceph_pagelist *pagelist; | 3321 | .session = session, |
3178 | struct ceph_reconnect_state recon_state; | 3322 | }; |
3179 | LIST_HEAD(dispose); | 3323 | LIST_HEAD(dispose); |
3180 | 3324 | ||
3181 | pr_info("mds%d reconnect start\n", mds); | 3325 | pr_info("mds%d reconnect start\n", mds); |
3182 | 3326 | ||
3183 | pagelist = ceph_pagelist_alloc(GFP_NOFS); | 3327 | recon_state.pagelist = ceph_pagelist_alloc(GFP_NOFS); |
3184 | if (!pagelist) | 3328 | if (!recon_state.pagelist) |
3185 | goto fail_nopagelist; | 3329 | goto fail_nopagelist; |
3186 | 3330 | ||
3187 | reply = ceph_msg_new2(CEPH_MSG_CLIENT_RECONNECT, 0, 1, GFP_NOFS, false); | 3331 | reply = ceph_msg_new2(CEPH_MSG_CLIENT_RECONNECT, 0, 1, GFP_NOFS, false); |
@@ -3225,63 +3369,90 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
3225 | /* replay unsafe requests */ | 3369 | /* replay unsafe requests */ |
3226 | replay_unsafe_requests(mdsc, session); | 3370 | replay_unsafe_requests(mdsc, session); |
3227 | 3371 | ||
3372 | ceph_early_kick_flushing_caps(mdsc, session); | ||
3373 | |||
3228 | down_read(&mdsc->snap_rwsem); | 3374 | down_read(&mdsc->snap_rwsem); |
3229 | 3375 | ||
3230 | /* traverse this session's caps */ | 3376 | /* placeholder for nr_caps */ |
3231 | s_nr_caps = session->s_nr_caps; | 3377 | err = ceph_pagelist_encode_32(recon_state.pagelist, 0); |
3232 | err = ceph_pagelist_encode_32(pagelist, s_nr_caps); | ||
3233 | if (err) | 3378 | if (err) |
3234 | goto fail; | 3379 | goto fail; |
3235 | 3380 | ||
3236 | recon_state.nr_caps = 0; | 3381 | if (test_bit(CEPHFS_FEATURE_MULTI_RECONNECT, &session->s_features)) { |
3237 | recon_state.pagelist = pagelist; | ||
3238 | if (session->s_con.peer_features & CEPH_FEATURE_MDSENC) | ||
3239 | recon_state.msg_version = 3; | 3382 | recon_state.msg_version = 3; |
3240 | else | 3383 | recon_state.allow_multi = true; |
3384 | } else if (session->s_con.peer_features & CEPH_FEATURE_MDSENC) { | ||
3385 | recon_state.msg_version = 3; | ||
3386 | } else { | ||
3241 | recon_state.msg_version = 2; | 3387 | recon_state.msg_version = 2; |
3388 | } | ||
3389 | /* trsaverse this session's caps */ | ||
3242 | err = iterate_session_caps(session, encode_caps_cb, &recon_state); | 3390 | err = iterate_session_caps(session, encode_caps_cb, &recon_state); |
3243 | if (err < 0) | ||
3244 | goto fail; | ||
3245 | 3391 | ||
3246 | spin_lock(&session->s_cap_lock); | 3392 | spin_lock(&session->s_cap_lock); |
3247 | session->s_cap_reconnect = 0; | 3393 | session->s_cap_reconnect = 0; |
3248 | spin_unlock(&session->s_cap_lock); | 3394 | spin_unlock(&session->s_cap_lock); |
3249 | 3395 | ||
3250 | /* | 3396 | if (err < 0) |
3251 | * snaprealms. we provide mds with the ino, seq (version), and | 3397 | goto fail; |
3252 | * parent for all of our realms. If the mds has any newer info, | ||
3253 | * it will tell us. | ||
3254 | */ | ||
3255 | for (p = rb_first(&mdsc->snap_realms); p; p = rb_next(p)) { | ||
3256 | struct ceph_snap_realm *realm = | ||
3257 | rb_entry(p, struct ceph_snap_realm, node); | ||
3258 | struct ceph_mds_snaprealm_reconnect sr_rec; | ||
3259 | 3398 | ||
3260 | dout(" adding snap realm %llx seq %lld parent %llx\n", | 3399 | /* check if all realms can be encoded into current message */ |
3261 | realm->ino, realm->seq, realm->parent_ino); | 3400 | if (mdsc->num_snap_realms) { |
3262 | sr_rec.ino = cpu_to_le64(realm->ino); | 3401 | size_t total_len = |
3263 | sr_rec.seq = cpu_to_le64(realm->seq); | 3402 | recon_state.pagelist->length + |
3264 | sr_rec.parent = cpu_to_le64(realm->parent_ino); | 3403 | mdsc->num_snap_realms * |
3265 | err = ceph_pagelist_append(pagelist, &sr_rec, sizeof(sr_rec)); | 3404 | sizeof(struct ceph_mds_snaprealm_reconnect); |
3266 | if (err) | 3405 | if (recon_state.msg_version >= 4) { |
3267 | goto fail; | 3406 | /* number of realms */ |
3407 | total_len += sizeof(u32); | ||
3408 | /* version, compat_version and struct_len */ | ||
3409 | total_len += mdsc->num_snap_realms * | ||
3410 | (2 * sizeof(u8) + sizeof(u32)); | ||
3411 | } | ||
3412 | if (total_len > RECONNECT_MAX_SIZE) { | ||
3413 | if (!recon_state.allow_multi) { | ||
3414 | err = -ENOSPC; | ||
3415 | goto fail; | ||
3416 | } | ||
3417 | if (recon_state.nr_caps) { | ||
3418 | err = send_reconnect_partial(&recon_state); | ||
3419 | if (err) | ||
3420 | goto fail; | ||
3421 | } | ||
3422 | recon_state.msg_version = 5; | ||
3423 | } | ||
3268 | } | 3424 | } |
3269 | 3425 | ||
3270 | reply->hdr.version = cpu_to_le16(recon_state.msg_version); | 3426 | err = encode_snap_realms(mdsc, &recon_state); |
3427 | if (err < 0) | ||
3428 | goto fail; | ||
3429 | |||
3430 | if (recon_state.msg_version >= 5) { | ||
3431 | err = ceph_pagelist_encode_8(recon_state.pagelist, 0); | ||
3432 | if (err < 0) | ||
3433 | goto fail; | ||
3434 | } | ||
3271 | 3435 | ||
3272 | /* raced with cap release? */ | 3436 | if (recon_state.nr_caps || recon_state.nr_realms) { |
3273 | if (s_nr_caps != recon_state.nr_caps) { | 3437 | struct page *page = |
3274 | struct page *page = list_first_entry(&pagelist->head, | 3438 | list_first_entry(&recon_state.pagelist->head, |
3275 | struct page, lru); | 3439 | struct page, lru); |
3276 | __le32 *addr = kmap_atomic(page); | 3440 | __le32 *addr = kmap_atomic(page); |
3277 | *addr = cpu_to_le32(recon_state.nr_caps); | 3441 | if (recon_state.nr_caps) { |
3442 | WARN_ON(recon_state.nr_realms != mdsc->num_snap_realms); | ||
3443 | *addr = cpu_to_le32(recon_state.nr_caps); | ||
3444 | } else if (recon_state.msg_version >= 4) { | ||
3445 | *(addr + 1) = cpu_to_le32(recon_state.nr_realms); | ||
3446 | } | ||
3278 | kunmap_atomic(addr); | 3447 | kunmap_atomic(addr); |
3279 | } | 3448 | } |
3280 | 3449 | ||
3281 | reply->hdr.data_len = cpu_to_le32(pagelist->length); | 3450 | reply->hdr.version = cpu_to_le16(recon_state.msg_version); |
3282 | ceph_msg_data_add_pagelist(reply, pagelist); | 3451 | if (recon_state.msg_version >= 4) |
3452 | reply->hdr.compat_version = cpu_to_le16(4); | ||
3283 | 3453 | ||
3284 | ceph_early_kick_flushing_caps(mdsc, session); | 3454 | reply->hdr.data_len = cpu_to_le32(recon_state.pagelist->length); |
3455 | ceph_msg_data_add_pagelist(reply, recon_state.pagelist); | ||
3285 | 3456 | ||
3286 | ceph_con_send(&session->s_con, reply); | 3457 | ceph_con_send(&session->s_con, reply); |
3287 | 3458 | ||
@@ -3292,7 +3463,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
3292 | mutex_unlock(&mdsc->mutex); | 3463 | mutex_unlock(&mdsc->mutex); |
3293 | 3464 | ||
3294 | up_read(&mdsc->snap_rwsem); | 3465 | up_read(&mdsc->snap_rwsem); |
3295 | ceph_pagelist_release(pagelist); | 3466 | ceph_pagelist_release(recon_state.pagelist); |
3296 | return; | 3467 | return; |
3297 | 3468 | ||
3298 | fail: | 3469 | fail: |
@@ -3300,7 +3471,7 @@ fail: | |||
3300 | up_read(&mdsc->snap_rwsem); | 3471 | up_read(&mdsc->snap_rwsem); |
3301 | mutex_unlock(&session->s_mutex); | 3472 | mutex_unlock(&session->s_mutex); |
3302 | fail_nomsg: | 3473 | fail_nomsg: |
3303 | ceph_pagelist_release(pagelist); | 3474 | ceph_pagelist_release(recon_state.pagelist); |
3304 | fail_nopagelist: | 3475 | fail_nopagelist: |
3305 | pr_err("error %d preparing reconnect for mds%d\n", err, mds); | 3476 | pr_err("error %d preparing reconnect for mds%d\n", err, mds); |
3306 | return; | 3477 | return; |
@@ -3698,6 +3869,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) | |||
3698 | init_rwsem(&mdsc->snap_rwsem); | 3869 | init_rwsem(&mdsc->snap_rwsem); |
3699 | mdsc->snap_realms = RB_ROOT; | 3870 | mdsc->snap_realms = RB_ROOT; |
3700 | INIT_LIST_HEAD(&mdsc->snap_empty); | 3871 | INIT_LIST_HEAD(&mdsc->snap_empty); |
3872 | mdsc->num_snap_realms = 0; | ||
3701 | spin_lock_init(&mdsc->snap_empty_lock); | 3873 | spin_lock_init(&mdsc->snap_empty_lock); |
3702 | mdsc->last_tid = 0; | 3874 | mdsc->last_tid = 0; |
3703 | mdsc->oldest_tid = 0; | 3875 | mdsc->oldest_tid = 0; |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 0d3264cf3334..4f962642fee4 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -21,11 +21,13 @@ | |||
21 | #define CEPHFS_FEATURE_REPLY_ENCODING 9 | 21 | #define CEPHFS_FEATURE_REPLY_ENCODING 9 |
22 | #define CEPHFS_FEATURE_RECLAIM_CLIENT 10 | 22 | #define CEPHFS_FEATURE_RECLAIM_CLIENT 10 |
23 | #define CEPHFS_FEATURE_LAZY_CAP_WANTED 11 | 23 | #define CEPHFS_FEATURE_LAZY_CAP_WANTED 11 |
24 | #define CEPHFS_FEATURE_MULTI_RECONNECT 12 | ||
24 | 25 | ||
25 | #define CEPHFS_FEATURES_CLIENT_SUPPORTED { \ | 26 | #define CEPHFS_FEATURES_CLIENT_SUPPORTED { \ |
26 | 0, 1, 2, 3, 4, 5, 6, 7, \ | 27 | 0, 1, 2, 3, 4, 5, 6, 7, \ |
27 | CEPHFS_FEATURE_MIMIC, \ | 28 | CEPHFS_FEATURE_MIMIC, \ |
28 | CEPHFS_FEATURE_LAZY_CAP_WANTED, \ | 29 | CEPHFS_FEATURE_LAZY_CAP_WANTED, \ |
30 | CEPHFS_FEATURE_MULTI_RECONNECT, \ | ||
29 | } | 31 | } |
30 | #define CEPHFS_FEATURES_CLIENT_REQUIRED {} | 32 | #define CEPHFS_FEATURES_CLIENT_REQUIRED {} |
31 | 33 | ||
@@ -342,6 +344,7 @@ struct ceph_mds_client { | |||
342 | struct rw_semaphore snap_rwsem; | 344 | struct rw_semaphore snap_rwsem; |
343 | struct rb_root snap_realms; | 345 | struct rb_root snap_realms; |
344 | struct list_head snap_empty; | 346 | struct list_head snap_empty; |
347 | int num_snap_realms; | ||
345 | spinlock_t snap_empty_lock; /* protect snap_empty */ | 348 | spinlock_t snap_empty_lock; /* protect snap_empty */ |
346 | 349 | ||
347 | u64 last_tid; /* most recent mds request */ | 350 | u64 last_tid; /* most recent mds request */ |
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index f74193da0e09..dfc25ceeffed 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c | |||
@@ -124,6 +124,8 @@ static struct ceph_snap_realm *ceph_create_snap_realm( | |||
124 | INIT_LIST_HEAD(&realm->inodes_with_caps); | 124 | INIT_LIST_HEAD(&realm->inodes_with_caps); |
125 | spin_lock_init(&realm->inodes_with_caps_lock); | 125 | spin_lock_init(&realm->inodes_with_caps_lock); |
126 | __insert_snap_realm(&mdsc->snap_realms, realm); | 126 | __insert_snap_realm(&mdsc->snap_realms, realm); |
127 | mdsc->num_snap_realms++; | ||
128 | |||
127 | dout("create_snap_realm %llx %p\n", realm->ino, realm); | 129 | dout("create_snap_realm %llx %p\n", realm->ino, realm); |
128 | return realm; | 130 | return realm; |
129 | } | 131 | } |
@@ -175,6 +177,7 @@ static void __destroy_snap_realm(struct ceph_mds_client *mdsc, | |||
175 | dout("__destroy_snap_realm %p %llx\n", realm, realm->ino); | 177 | dout("__destroy_snap_realm %p %llx\n", realm, realm->ino); |
176 | 178 | ||
177 | rb_erase(&realm->node, &mdsc->snap_realms); | 179 | rb_erase(&realm->node, &mdsc->snap_realms); |
180 | mdsc->num_snap_realms--; | ||
178 | 181 | ||
179 | if (realm->parent) { | 182 | if (realm->parent) { |
180 | list_del_init(&realm->child_item); | 183 | list_del_init(&realm->child_item); |