diff options
author | Yan, Zheng <zyan@redhat.com> | 2015-05-14 05:22:42 -0400 |
---|---|---|
committer | Ilya Dryomov <idryomov@gmail.com> | 2015-06-25 04:49:29 -0400 |
commit | 745a8e3bccbc6adae69a98ddc525e529aa44636e (patch) | |
tree | 96d8106a0800a839d8f6bffaf7485b92dcf78a6c /fs/ceph/caps.c | |
parent | affbc19a68f9966ad65a773db405f78e2bafc07b (diff) |
ceph: don't pre-allocate space for cap release messages
Previously we pre-allocate cap release messages for each caps. This
wastes lots of memory when there are large amount of caps. This patch
make the code not pre-allocate the cap release messages. Instead,
we add the corresponding ceph_cap struct to a list when releasing a
cap. Later when flush cap releases is needed, we allocate the cap
release messages dynamically.
Signed-off-by: Yan, Zheng <zyan@redhat.com>
Diffstat (limited to 'fs/ceph/caps.c')
-rw-r--r-- | fs/ceph/caps.c | 85 |
1 files changed, 29 insertions, 56 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index bbd969e16a01..245ca381a6dc 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -926,16 +926,6 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) | |||
926 | 926 | ||
927 | /* remove from session list */ | 927 | /* remove from session list */ |
928 | spin_lock(&session->s_cap_lock); | 928 | spin_lock(&session->s_cap_lock); |
929 | /* | ||
930 | * s_cap_reconnect is protected by s_cap_lock. no one changes | ||
931 | * s_cap_gen while session is in the reconnect state. | ||
932 | */ | ||
933 | if (queue_release && | ||
934 | (!session->s_cap_reconnect || | ||
935 | cap->cap_gen == session->s_cap_gen)) | ||
936 | __queue_cap_release(session, ci->i_vino.ino, cap->cap_id, | ||
937 | cap->mseq, cap->issue_seq); | ||
938 | |||
939 | if (session->s_cap_iterator == cap) { | 929 | if (session->s_cap_iterator == cap) { |
940 | /* not yet, we are iterating over this very cap */ | 930 | /* not yet, we are iterating over this very cap */ |
941 | dout("__ceph_remove_cap delaying %p removal from session %p\n", | 931 | dout("__ceph_remove_cap delaying %p removal from session %p\n", |
@@ -948,6 +938,25 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) | |||
948 | } | 938 | } |
949 | /* protect backpointer with s_cap_lock: see iterate_session_caps */ | 939 | /* protect backpointer with s_cap_lock: see iterate_session_caps */ |
950 | cap->ci = NULL; | 940 | cap->ci = NULL; |
941 | |||
942 | /* | ||
943 | * s_cap_reconnect is protected by s_cap_lock. no one changes | ||
944 | * s_cap_gen while session is in the reconnect state. | ||
945 | */ | ||
946 | if (queue_release && | ||
947 | (!session->s_cap_reconnect || cap->cap_gen == session->s_cap_gen)) { | ||
948 | cap->queue_release = 1; | ||
949 | if (removed) { | ||
950 | list_add_tail(&cap->session_caps, | ||
951 | &session->s_cap_releases); | ||
952 | session->s_num_cap_releases++; | ||
953 | removed = 0; | ||
954 | } | ||
955 | } else { | ||
956 | cap->queue_release = 0; | ||
957 | } | ||
958 | cap->cap_ino = ci->i_vino.ino; | ||
959 | |||
951 | spin_unlock(&session->s_cap_lock); | 960 | spin_unlock(&session->s_cap_lock); |
952 | 961 | ||
953 | /* remove from inode list */ | 962 | /* remove from inode list */ |
@@ -1053,44 +1062,6 @@ static int send_cap_msg(struct ceph_mds_session *session, | |||
1053 | return 0; | 1062 | return 0; |
1054 | } | 1063 | } |
1055 | 1064 | ||
1056 | void __queue_cap_release(struct ceph_mds_session *session, | ||
1057 | u64 ino, u64 cap_id, u32 migrate_seq, | ||
1058 | u32 issue_seq) | ||
1059 | { | ||
1060 | struct ceph_msg *msg; | ||
1061 | struct ceph_mds_cap_release *head; | ||
1062 | struct ceph_mds_cap_item *item; | ||
1063 | |||
1064 | BUG_ON(!session->s_num_cap_releases); | ||
1065 | msg = list_first_entry(&session->s_cap_releases, | ||
1066 | struct ceph_msg, list_head); | ||
1067 | |||
1068 | dout(" adding %llx release to mds%d msg %p (%d left)\n", | ||
1069 | ino, session->s_mds, msg, session->s_num_cap_releases); | ||
1070 | |||
1071 | BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE); | ||
1072 | head = msg->front.iov_base; | ||
1073 | le32_add_cpu(&head->num, 1); | ||
1074 | item = msg->front.iov_base + msg->front.iov_len; | ||
1075 | item->ino = cpu_to_le64(ino); | ||
1076 | item->cap_id = cpu_to_le64(cap_id); | ||
1077 | item->migrate_seq = cpu_to_le32(migrate_seq); | ||
1078 | item->seq = cpu_to_le32(issue_seq); | ||
1079 | |||
1080 | session->s_num_cap_releases--; | ||
1081 | |||
1082 | msg->front.iov_len += sizeof(*item); | ||
1083 | if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) { | ||
1084 | dout(" release msg %p full\n", msg); | ||
1085 | list_move_tail(&msg->list_head, &session->s_cap_releases_done); | ||
1086 | } else { | ||
1087 | dout(" release msg %p at %d/%d (%d)\n", msg, | ||
1088 | (int)le32_to_cpu(head->num), | ||
1089 | (int)CEPH_CAPS_PER_RELEASE, | ||
1090 | (int)msg->front.iov_len); | ||
1091 | } | ||
1092 | } | ||
1093 | |||
1094 | /* | 1065 | /* |
1095 | * Queue cap releases when an inode is dropped from our cache. Since | 1066 | * Queue cap releases when an inode is dropped from our cache. Since |
1096 | * inode is about to be destroyed, there is no need for i_ceph_lock. | 1067 | * inode is about to be destroyed, there is no need for i_ceph_lock. |
@@ -3051,7 +3022,6 @@ retry: | |||
3051 | mutex_lock_nested(&session->s_mutex, | 3022 | mutex_lock_nested(&session->s_mutex, |
3052 | SINGLE_DEPTH_NESTING); | 3023 | SINGLE_DEPTH_NESTING); |
3053 | } | 3024 | } |
3054 | ceph_add_cap_releases(mdsc, tsession); | ||
3055 | new_cap = ceph_get_cap(mdsc, NULL); | 3025 | new_cap = ceph_get_cap(mdsc, NULL); |
3056 | } else { | 3026 | } else { |
3057 | WARN_ON(1); | 3027 | WARN_ON(1); |
@@ -3247,16 +3217,20 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
3247 | dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq, | 3217 | dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq, |
3248 | (unsigned)seq); | 3218 | (unsigned)seq); |
3249 | 3219 | ||
3250 | if (op == CEPH_CAP_OP_IMPORT) | ||
3251 | ceph_add_cap_releases(mdsc, session); | ||
3252 | |||
3253 | if (!inode) { | 3220 | if (!inode) { |
3254 | dout(" i don't have ino %llx\n", vino.ino); | 3221 | dout(" i don't have ino %llx\n", vino.ino); |
3255 | 3222 | ||
3256 | if (op == CEPH_CAP_OP_IMPORT) { | 3223 | if (op == CEPH_CAP_OP_IMPORT) { |
3224 | cap = ceph_get_cap(mdsc, NULL); | ||
3225 | cap->cap_ino = vino.ino; | ||
3226 | cap->queue_release = 1; | ||
3227 | cap->cap_id = cap_id; | ||
3228 | cap->mseq = mseq; | ||
3229 | cap->seq = seq; | ||
3257 | spin_lock(&session->s_cap_lock); | 3230 | spin_lock(&session->s_cap_lock); |
3258 | __queue_cap_release(session, vino.ino, cap_id, | 3231 | list_add_tail(&cap->session_caps, |
3259 | mseq, seq); | 3232 | &session->s_cap_releases); |
3233 | session->s_num_cap_releases++; | ||
3260 | spin_unlock(&session->s_cap_lock); | 3234 | spin_unlock(&session->s_cap_lock); |
3261 | } | 3235 | } |
3262 | goto flush_cap_releases; | 3236 | goto flush_cap_releases; |
@@ -3332,11 +3306,10 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
3332 | 3306 | ||
3333 | flush_cap_releases: | 3307 | flush_cap_releases: |
3334 | /* | 3308 | /* |
3335 | * send any full release message to try to move things | 3309 | * send any cap release message to try to move things |
3336 | * along for the mds (who clearly thinks we still have this | 3310 | * along for the mds (who clearly thinks we still have this |
3337 | * cap). | 3311 | * cap). |
3338 | */ | 3312 | */ |
3339 | ceph_add_cap_releases(mdsc, session); | ||
3340 | ceph_send_cap_releases(mdsc, session); | 3313 | ceph_send_cap_releases(mdsc, session); |
3341 | 3314 | ||
3342 | done: | 3315 | done: |