diff options
author | Yan, Zheng <zyan@redhat.com> | 2015-05-14 05:22:42 -0400 |
---|---|---|
committer | Ilya Dryomov <idryomov@gmail.com> | 2015-06-25 04:49:29 -0400 |
commit | 745a8e3bccbc6adae69a98ddc525e529aa44636e (patch) | |
tree | 96d8106a0800a839d8f6bffaf7485b92dcf78a6c /fs/ceph | |
parent | affbc19a68f9966ad65a773db405f78e2bafc07b (diff) |
ceph: don't pre-allocate space for cap release messages
Previously we pre-allocate cap release messages for each caps. This
wastes lots of memory when there are large amount of caps. This patch
make the code not pre-allocate the cap release messages. Instead,
we add the corresponding ceph_cap struct to a list when releasing a
cap. Later when flush cap releases is needed, we allocate the cap
release messages dynamically.
Signed-off-by: Yan, Zheng <zyan@redhat.com>
Diffstat (limited to 'fs/ceph')
-rw-r--r-- | fs/ceph/caps.c | 85 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 223 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 3 | ||||
-rw-r--r-- | fs/ceph/super.h | 20 |
4 files changed, 129 insertions, 202 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index bbd969e16a01..245ca381a6dc 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -926,16 +926,6 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) | |||
926 | 926 | ||
927 | /* remove from session list */ | 927 | /* remove from session list */ |
928 | spin_lock(&session->s_cap_lock); | 928 | spin_lock(&session->s_cap_lock); |
929 | /* | ||
930 | * s_cap_reconnect is protected by s_cap_lock. no one changes | ||
931 | * s_cap_gen while session is in the reconnect state. | ||
932 | */ | ||
933 | if (queue_release && | ||
934 | (!session->s_cap_reconnect || | ||
935 | cap->cap_gen == session->s_cap_gen)) | ||
936 | __queue_cap_release(session, ci->i_vino.ino, cap->cap_id, | ||
937 | cap->mseq, cap->issue_seq); | ||
938 | |||
939 | if (session->s_cap_iterator == cap) { | 929 | if (session->s_cap_iterator == cap) { |
940 | /* not yet, we are iterating over this very cap */ | 930 | /* not yet, we are iterating over this very cap */ |
941 | dout("__ceph_remove_cap delaying %p removal from session %p\n", | 931 | dout("__ceph_remove_cap delaying %p removal from session %p\n", |
@@ -948,6 +938,25 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) | |||
948 | } | 938 | } |
949 | /* protect backpointer with s_cap_lock: see iterate_session_caps */ | 939 | /* protect backpointer with s_cap_lock: see iterate_session_caps */ |
950 | cap->ci = NULL; | 940 | cap->ci = NULL; |
941 | |||
942 | /* | ||
943 | * s_cap_reconnect is protected by s_cap_lock. no one changes | ||
944 | * s_cap_gen while session is in the reconnect state. | ||
945 | */ | ||
946 | if (queue_release && | ||
947 | (!session->s_cap_reconnect || cap->cap_gen == session->s_cap_gen)) { | ||
948 | cap->queue_release = 1; | ||
949 | if (removed) { | ||
950 | list_add_tail(&cap->session_caps, | ||
951 | &session->s_cap_releases); | ||
952 | session->s_num_cap_releases++; | ||
953 | removed = 0; | ||
954 | } | ||
955 | } else { | ||
956 | cap->queue_release = 0; | ||
957 | } | ||
958 | cap->cap_ino = ci->i_vino.ino; | ||
959 | |||
951 | spin_unlock(&session->s_cap_lock); | 960 | spin_unlock(&session->s_cap_lock); |
952 | 961 | ||
953 | /* remove from inode list */ | 962 | /* remove from inode list */ |
@@ -1053,44 +1062,6 @@ static int send_cap_msg(struct ceph_mds_session *session, | |||
1053 | return 0; | 1062 | return 0; |
1054 | } | 1063 | } |
1055 | 1064 | ||
1056 | void __queue_cap_release(struct ceph_mds_session *session, | ||
1057 | u64 ino, u64 cap_id, u32 migrate_seq, | ||
1058 | u32 issue_seq) | ||
1059 | { | ||
1060 | struct ceph_msg *msg; | ||
1061 | struct ceph_mds_cap_release *head; | ||
1062 | struct ceph_mds_cap_item *item; | ||
1063 | |||
1064 | BUG_ON(!session->s_num_cap_releases); | ||
1065 | msg = list_first_entry(&session->s_cap_releases, | ||
1066 | struct ceph_msg, list_head); | ||
1067 | |||
1068 | dout(" adding %llx release to mds%d msg %p (%d left)\n", | ||
1069 | ino, session->s_mds, msg, session->s_num_cap_releases); | ||
1070 | |||
1071 | BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE); | ||
1072 | head = msg->front.iov_base; | ||
1073 | le32_add_cpu(&head->num, 1); | ||
1074 | item = msg->front.iov_base + msg->front.iov_len; | ||
1075 | item->ino = cpu_to_le64(ino); | ||
1076 | item->cap_id = cpu_to_le64(cap_id); | ||
1077 | item->migrate_seq = cpu_to_le32(migrate_seq); | ||
1078 | item->seq = cpu_to_le32(issue_seq); | ||
1079 | |||
1080 | session->s_num_cap_releases--; | ||
1081 | |||
1082 | msg->front.iov_len += sizeof(*item); | ||
1083 | if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) { | ||
1084 | dout(" release msg %p full\n", msg); | ||
1085 | list_move_tail(&msg->list_head, &session->s_cap_releases_done); | ||
1086 | } else { | ||
1087 | dout(" release msg %p at %d/%d (%d)\n", msg, | ||
1088 | (int)le32_to_cpu(head->num), | ||
1089 | (int)CEPH_CAPS_PER_RELEASE, | ||
1090 | (int)msg->front.iov_len); | ||
1091 | } | ||
1092 | } | ||
1093 | |||
1094 | /* | 1065 | /* |
1095 | * Queue cap releases when an inode is dropped from our cache. Since | 1066 | * Queue cap releases when an inode is dropped from our cache. Since |
1096 | * inode is about to be destroyed, there is no need for i_ceph_lock. | 1067 | * inode is about to be destroyed, there is no need for i_ceph_lock. |
@@ -3051,7 +3022,6 @@ retry: | |||
3051 | mutex_lock_nested(&session->s_mutex, | 3022 | mutex_lock_nested(&session->s_mutex, |
3052 | SINGLE_DEPTH_NESTING); | 3023 | SINGLE_DEPTH_NESTING); |
3053 | } | 3024 | } |
3054 | ceph_add_cap_releases(mdsc, tsession); | ||
3055 | new_cap = ceph_get_cap(mdsc, NULL); | 3025 | new_cap = ceph_get_cap(mdsc, NULL); |
3056 | } else { | 3026 | } else { |
3057 | WARN_ON(1); | 3027 | WARN_ON(1); |
@@ -3247,16 +3217,20 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
3247 | dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq, | 3217 | dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq, |
3248 | (unsigned)seq); | 3218 | (unsigned)seq); |
3249 | 3219 | ||
3250 | if (op == CEPH_CAP_OP_IMPORT) | ||
3251 | ceph_add_cap_releases(mdsc, session); | ||
3252 | |||
3253 | if (!inode) { | 3220 | if (!inode) { |
3254 | dout(" i don't have ino %llx\n", vino.ino); | 3221 | dout(" i don't have ino %llx\n", vino.ino); |
3255 | 3222 | ||
3256 | if (op == CEPH_CAP_OP_IMPORT) { | 3223 | if (op == CEPH_CAP_OP_IMPORT) { |
3224 | cap = ceph_get_cap(mdsc, NULL); | ||
3225 | cap->cap_ino = vino.ino; | ||
3226 | cap->queue_release = 1; | ||
3227 | cap->cap_id = cap_id; | ||
3228 | cap->mseq = mseq; | ||
3229 | cap->seq = seq; | ||
3257 | spin_lock(&session->s_cap_lock); | 3230 | spin_lock(&session->s_cap_lock); |
3258 | __queue_cap_release(session, vino.ino, cap_id, | 3231 | list_add_tail(&cap->session_caps, |
3259 | mseq, seq); | 3232 | &session->s_cap_releases); |
3233 | session->s_num_cap_releases++; | ||
3260 | spin_unlock(&session->s_cap_lock); | 3234 | spin_unlock(&session->s_cap_lock); |
3261 | } | 3235 | } |
3262 | goto flush_cap_releases; | 3236 | goto flush_cap_releases; |
@@ -3332,11 +3306,10 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
3332 | 3306 | ||
3333 | flush_cap_releases: | 3307 | flush_cap_releases: |
3334 | /* | 3308 | /* |
3335 | * send any full release message to try to move things | 3309 | * send any cap release message to try to move things |
3336 | * along for the mds (who clearly thinks we still have this | 3310 | * along for the mds (who clearly thinks we still have this |
3337 | * cap). | 3311 | * cap). |
3338 | */ | 3312 | */ |
3339 | ceph_add_cap_releases(mdsc, session); | ||
3340 | ceph_send_cap_releases(mdsc, session); | 3313 | ceph_send_cap_releases(mdsc, session); |
3341 | 3314 | ||
3342 | done: | 3315 | done: |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 2bb9264b9225..76eb14489bfa 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -458,7 +458,6 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, | |||
458 | s->s_cap_reconnect = 0; | 458 | s->s_cap_reconnect = 0; |
459 | s->s_cap_iterator = NULL; | 459 | s->s_cap_iterator = NULL; |
460 | INIT_LIST_HEAD(&s->s_cap_releases); | 460 | INIT_LIST_HEAD(&s->s_cap_releases); |
461 | INIT_LIST_HEAD(&s->s_cap_releases_done); | ||
462 | INIT_LIST_HEAD(&s->s_cap_flushing); | 461 | INIT_LIST_HEAD(&s->s_cap_flushing); |
463 | INIT_LIST_HEAD(&s->s_cap_snaps_flushing); | 462 | INIT_LIST_HEAD(&s->s_cap_snaps_flushing); |
464 | 463 | ||
@@ -998,27 +997,25 @@ void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc, | |||
998 | * session caps | 997 | * session caps |
999 | */ | 998 | */ |
1000 | 999 | ||
1001 | /* | 1000 | /* caller holds s_cap_lock, we drop it */ |
1002 | * Free preallocated cap messages assigned to this session | 1001 | static void cleanup_cap_releases(struct ceph_mds_client *mdsc, |
1003 | */ | 1002 | struct ceph_mds_session *session) |
1004 | static void cleanup_cap_releases(struct ceph_mds_session *session) | 1003 | __releases(session->s_cap_lock) |
1005 | { | 1004 | { |
1006 | struct ceph_msg *msg; | 1005 | LIST_HEAD(tmp_list); |
1006 | list_splice_init(&session->s_cap_releases, &tmp_list); | ||
1007 | session->s_num_cap_releases = 0; | ||
1008 | spin_unlock(&session->s_cap_lock); | ||
1007 | 1009 | ||
1008 | spin_lock(&session->s_cap_lock); | 1010 | dout("cleanup_cap_releases mds%d\n", session->s_mds); |
1009 | while (!list_empty(&session->s_cap_releases)) { | 1011 | while (!list_empty(&tmp_list)) { |
1010 | msg = list_first_entry(&session->s_cap_releases, | 1012 | struct ceph_cap *cap; |
1011 | struct ceph_msg, list_head); | 1013 | /* zero out the in-progress message */ |
1012 | list_del_init(&msg->list_head); | 1014 | cap = list_first_entry(&tmp_list, |
1013 | ceph_msg_put(msg); | 1015 | struct ceph_cap, session_caps); |
1014 | } | 1016 | list_del(&cap->session_caps); |
1015 | while (!list_empty(&session->s_cap_releases_done)) { | 1017 | ceph_put_cap(mdsc, cap); |
1016 | msg = list_first_entry(&session->s_cap_releases_done, | ||
1017 | struct ceph_msg, list_head); | ||
1018 | list_del_init(&msg->list_head); | ||
1019 | ceph_msg_put(msg); | ||
1020 | } | 1018 | } |
1021 | spin_unlock(&session->s_cap_lock); | ||
1022 | } | 1019 | } |
1023 | 1020 | ||
1024 | static void cleanup_session_requests(struct ceph_mds_client *mdsc, | 1021 | static void cleanup_session_requests(struct ceph_mds_client *mdsc, |
@@ -1095,10 +1092,16 @@ static int iterate_session_caps(struct ceph_mds_session *session, | |||
1095 | dout("iterate_session_caps finishing cap %p removal\n", | 1092 | dout("iterate_session_caps finishing cap %p removal\n", |
1096 | cap); | 1093 | cap); |
1097 | BUG_ON(cap->session != session); | 1094 | BUG_ON(cap->session != session); |
1095 | cap->session = NULL; | ||
1098 | list_del_init(&cap->session_caps); | 1096 | list_del_init(&cap->session_caps); |
1099 | session->s_nr_caps--; | 1097 | session->s_nr_caps--; |
1100 | cap->session = NULL; | 1098 | if (cap->queue_release) { |
1101 | old_cap = cap; /* put_cap it w/o locks held */ | 1099 | list_add_tail(&cap->session_caps, |
1100 | &session->s_cap_releases); | ||
1101 | session->s_num_cap_releases++; | ||
1102 | } else { | ||
1103 | old_cap = cap; /* put_cap it w/o locks held */ | ||
1104 | } | ||
1102 | } | 1105 | } |
1103 | if (ret < 0) | 1106 | if (ret < 0) |
1104 | goto out; | 1107 | goto out; |
@@ -1191,11 +1194,12 @@ static void remove_session_caps(struct ceph_mds_session *session) | |||
1191 | spin_lock(&session->s_cap_lock); | 1194 | spin_lock(&session->s_cap_lock); |
1192 | } | 1195 | } |
1193 | } | 1196 | } |
1194 | spin_unlock(&session->s_cap_lock); | 1197 | |
1198 | // drop cap expires and unlock s_cap_lock | ||
1199 | cleanup_cap_releases(session->s_mdsc, session); | ||
1195 | 1200 | ||
1196 | BUG_ON(session->s_nr_caps > 0); | 1201 | BUG_ON(session->s_nr_caps > 0); |
1197 | BUG_ON(!list_empty(&session->s_cap_flushing)); | 1202 | BUG_ON(!list_empty(&session->s_cap_flushing)); |
1198 | cleanup_cap_releases(session); | ||
1199 | } | 1203 | } |
1200 | 1204 | ||
1201 | /* | 1205 | /* |
@@ -1418,76 +1422,10 @@ static int trim_caps(struct ceph_mds_client *mdsc, | |||
1418 | session->s_trim_caps = 0; | 1422 | session->s_trim_caps = 0; |
1419 | } | 1423 | } |
1420 | 1424 | ||
1421 | ceph_add_cap_releases(mdsc, session); | ||
1422 | ceph_send_cap_releases(mdsc, session); | 1425 | ceph_send_cap_releases(mdsc, session); |
1423 | return 0; | 1426 | return 0; |
1424 | } | 1427 | } |
1425 | 1428 | ||
1426 | /* | ||
1427 | * Allocate cap_release messages. If there is a partially full message | ||
1428 | * in the queue, try to allocate enough to cover it's remainder, so that | ||
1429 | * we can send it immediately. | ||
1430 | * | ||
1431 | * Called under s_mutex. | ||
1432 | */ | ||
1433 | int ceph_add_cap_releases(struct ceph_mds_client *mdsc, | ||
1434 | struct ceph_mds_session *session) | ||
1435 | { | ||
1436 | struct ceph_msg *msg, *partial = NULL; | ||
1437 | struct ceph_mds_cap_release *head; | ||
1438 | int err = -ENOMEM; | ||
1439 | int extra = mdsc->fsc->mount_options->cap_release_safety; | ||
1440 | int num; | ||
1441 | |||
1442 | dout("add_cap_releases %p mds%d extra %d\n", session, session->s_mds, | ||
1443 | extra); | ||
1444 | |||
1445 | spin_lock(&session->s_cap_lock); | ||
1446 | |||
1447 | if (!list_empty(&session->s_cap_releases)) { | ||
1448 | msg = list_first_entry(&session->s_cap_releases, | ||
1449 | struct ceph_msg, | ||
1450 | list_head); | ||
1451 | head = msg->front.iov_base; | ||
1452 | num = le32_to_cpu(head->num); | ||
1453 | if (num) { | ||
1454 | dout(" partial %p with (%d/%d)\n", msg, num, | ||
1455 | (int)CEPH_CAPS_PER_RELEASE); | ||
1456 | extra += CEPH_CAPS_PER_RELEASE - num; | ||
1457 | partial = msg; | ||
1458 | } | ||
1459 | } | ||
1460 | while (session->s_num_cap_releases < session->s_nr_caps + extra) { | ||
1461 | spin_unlock(&session->s_cap_lock); | ||
1462 | msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE, | ||
1463 | GFP_NOFS, false); | ||
1464 | if (!msg) | ||
1465 | goto out_unlocked; | ||
1466 | dout("add_cap_releases %p msg %p now %d\n", session, msg, | ||
1467 | (int)msg->front.iov_len); | ||
1468 | head = msg->front.iov_base; | ||
1469 | head->num = cpu_to_le32(0); | ||
1470 | msg->front.iov_len = sizeof(*head); | ||
1471 | spin_lock(&session->s_cap_lock); | ||
1472 | list_add(&msg->list_head, &session->s_cap_releases); | ||
1473 | session->s_num_cap_releases += CEPH_CAPS_PER_RELEASE; | ||
1474 | } | ||
1475 | |||
1476 | if (partial) { | ||
1477 | head = partial->front.iov_base; | ||
1478 | num = le32_to_cpu(head->num); | ||
1479 | dout(" queueing partial %p with %d/%d\n", partial, num, | ||
1480 | (int)CEPH_CAPS_PER_RELEASE); | ||
1481 | list_move_tail(&partial->list_head, | ||
1482 | &session->s_cap_releases_done); | ||
1483 | session->s_num_cap_releases -= CEPH_CAPS_PER_RELEASE - num; | ||
1484 | } | ||
1485 | err = 0; | ||
1486 | spin_unlock(&session->s_cap_lock); | ||
1487 | out_unlocked: | ||
1488 | return err; | ||
1489 | } | ||
1490 | |||
1491 | static int check_cap_flush(struct ceph_inode_info *ci, | 1429 | static int check_cap_flush(struct ceph_inode_info *ci, |
1492 | u64 want_flush_seq, u64 want_snap_seq) | 1430 | u64 want_flush_seq, u64 want_snap_seq) |
1493 | { | 1431 | { |
@@ -1590,60 +1528,74 @@ static void wait_caps_flush(struct ceph_mds_client *mdsc, | |||
1590 | void ceph_send_cap_releases(struct ceph_mds_client *mdsc, | 1528 | void ceph_send_cap_releases(struct ceph_mds_client *mdsc, |
1591 | struct ceph_mds_session *session) | 1529 | struct ceph_mds_session *session) |
1592 | { | 1530 | { |
1593 | struct ceph_msg *msg; | 1531 | struct ceph_msg *msg = NULL; |
1532 | struct ceph_mds_cap_release *head; | ||
1533 | struct ceph_mds_cap_item *item; | ||
1534 | struct ceph_cap *cap; | ||
1535 | LIST_HEAD(tmp_list); | ||
1536 | int num_cap_releases; | ||
1594 | 1537 | ||
1595 | dout("send_cap_releases mds%d\n", session->s_mds); | ||
1596 | spin_lock(&session->s_cap_lock); | 1538 | spin_lock(&session->s_cap_lock); |
1597 | while (!list_empty(&session->s_cap_releases_done)) { | 1539 | again: |
1598 | msg = list_first_entry(&session->s_cap_releases_done, | 1540 | list_splice_init(&session->s_cap_releases, &tmp_list); |
1599 | struct ceph_msg, list_head); | 1541 | num_cap_releases = session->s_num_cap_releases; |
1600 | list_del_init(&msg->list_head); | 1542 | session->s_num_cap_releases = 0; |
1601 | spin_unlock(&session->s_cap_lock); | ||
1602 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); | ||
1603 | dout("send_cap_releases mds%d %p\n", session->s_mds, msg); | ||
1604 | ceph_con_send(&session->s_con, msg); | ||
1605 | spin_lock(&session->s_cap_lock); | ||
1606 | } | ||
1607 | spin_unlock(&session->s_cap_lock); | 1543 | spin_unlock(&session->s_cap_lock); |
1608 | } | ||
1609 | |||
1610 | static void discard_cap_releases(struct ceph_mds_client *mdsc, | ||
1611 | struct ceph_mds_session *session) | ||
1612 | { | ||
1613 | struct ceph_msg *msg; | ||
1614 | struct ceph_mds_cap_release *head; | ||
1615 | unsigned num; | ||
1616 | 1544 | ||
1617 | dout("discard_cap_releases mds%d\n", session->s_mds); | 1545 | while (!list_empty(&tmp_list)) { |
1546 | if (!msg) { | ||
1547 | msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, | ||
1548 | PAGE_CACHE_SIZE, GFP_NOFS, false); | ||
1549 | if (!msg) | ||
1550 | goto out_err; | ||
1551 | head = msg->front.iov_base; | ||
1552 | head->num = cpu_to_le32(0); | ||
1553 | msg->front.iov_len = sizeof(*head); | ||
1554 | } | ||
1555 | cap = list_first_entry(&tmp_list, struct ceph_cap, | ||
1556 | session_caps); | ||
1557 | list_del(&cap->session_caps); | ||
1558 | num_cap_releases--; | ||
1618 | 1559 | ||
1619 | if (!list_empty(&session->s_cap_releases)) { | ||
1620 | /* zero out the in-progress message */ | ||
1621 | msg = list_first_entry(&session->s_cap_releases, | ||
1622 | struct ceph_msg, list_head); | ||
1623 | head = msg->front.iov_base; | 1560 | head = msg->front.iov_base; |
1624 | num = le32_to_cpu(head->num); | 1561 | le32_add_cpu(&head->num, 1); |
1625 | dout("discard_cap_releases mds%d %p %u\n", | 1562 | item = msg->front.iov_base + msg->front.iov_len; |
1626 | session->s_mds, msg, num); | 1563 | item->ino = cpu_to_le64(cap->cap_ino); |
1627 | head->num = cpu_to_le32(0); | 1564 | item->cap_id = cpu_to_le64(cap->cap_id); |
1628 | msg->front.iov_len = sizeof(*head); | 1565 | item->migrate_seq = cpu_to_le32(cap->mseq); |
1629 | session->s_num_cap_releases += num; | 1566 | item->seq = cpu_to_le32(cap->issue_seq); |
1567 | msg->front.iov_len += sizeof(*item); | ||
1568 | |||
1569 | ceph_put_cap(mdsc, cap); | ||
1570 | |||
1571 | if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) { | ||
1572 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); | ||
1573 | dout("send_cap_releases mds%d %p\n", session->s_mds, msg); | ||
1574 | ceph_con_send(&session->s_con, msg); | ||
1575 | msg = NULL; | ||
1576 | } | ||
1630 | } | 1577 | } |
1631 | 1578 | ||
1632 | /* requeue completed messages */ | 1579 | BUG_ON(num_cap_releases != 0); |
1633 | while (!list_empty(&session->s_cap_releases_done)) { | ||
1634 | msg = list_first_entry(&session->s_cap_releases_done, | ||
1635 | struct ceph_msg, list_head); | ||
1636 | list_del_init(&msg->list_head); | ||
1637 | 1580 | ||
1638 | head = msg->front.iov_base; | 1581 | spin_lock(&session->s_cap_lock); |
1639 | num = le32_to_cpu(head->num); | 1582 | if (!list_empty(&session->s_cap_releases)) |
1640 | dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg, | 1583 | goto again; |
1641 | num); | 1584 | spin_unlock(&session->s_cap_lock); |
1642 | session->s_num_cap_releases += num; | 1585 | |
1643 | head->num = cpu_to_le32(0); | 1586 | if (msg) { |
1644 | msg->front.iov_len = sizeof(*head); | 1587 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); |
1645 | list_add(&msg->list_head, &session->s_cap_releases); | 1588 | dout("send_cap_releases mds%d %p\n", session->s_mds, msg); |
1589 | ceph_con_send(&session->s_con, msg); | ||
1646 | } | 1590 | } |
1591 | return; | ||
1592 | out_err: | ||
1593 | pr_err("send_cap_releases mds%d, failed to allocate message\n", | ||
1594 | session->s_mds); | ||
1595 | spin_lock(&session->s_cap_lock); | ||
1596 | list_splice(&tmp_list, &session->s_cap_releases); | ||
1597 | session->s_num_cap_releases += num_cap_releases; | ||
1598 | spin_unlock(&session->s_cap_lock); | ||
1647 | } | 1599 | } |
1648 | 1600 | ||
1649 | /* | 1601 | /* |
@@ -2529,7 +2481,6 @@ out_err: | |||
2529 | } | 2481 | } |
2530 | mutex_unlock(&mdsc->mutex); | 2482 | mutex_unlock(&mdsc->mutex); |
2531 | 2483 | ||
2532 | ceph_add_cap_releases(mdsc, req->r_session); | ||
2533 | mutex_unlock(&session->s_mutex); | 2484 | mutex_unlock(&session->s_mutex); |
2534 | 2485 | ||
2535 | /* kick calling process */ | 2486 | /* kick calling process */ |
@@ -2921,8 +2872,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
2921 | */ | 2872 | */ |
2922 | session->s_cap_reconnect = 1; | 2873 | session->s_cap_reconnect = 1; |
2923 | /* drop old cap expires; we're about to reestablish that state */ | 2874 | /* drop old cap expires; we're about to reestablish that state */ |
2924 | discard_cap_releases(mdsc, session); | 2875 | cleanup_cap_releases(mdsc, session); |
2925 | spin_unlock(&session->s_cap_lock); | ||
2926 | 2876 | ||
2927 | /* trim unused caps to reduce MDS's cache rejoin time */ | 2877 | /* trim unused caps to reduce MDS's cache rejoin time */ |
2928 | if (mdsc->fsc->sb->s_root) | 2878 | if (mdsc->fsc->sb->s_root) |
@@ -3385,7 +3335,6 @@ static void delayed_work(struct work_struct *work) | |||
3385 | send_renew_caps(mdsc, s); | 3335 | send_renew_caps(mdsc, s); |
3386 | else | 3336 | else |
3387 | ceph_con_keepalive(&s->s_con); | 3337 | ceph_con_keepalive(&s->s_con); |
3388 | ceph_add_cap_releases(mdsc, s); | ||
3389 | if (s->s_state == CEPH_MDS_SESSION_OPEN || | 3338 | if (s->s_state == CEPH_MDS_SESSION_OPEN || |
3390 | s->s_state == CEPH_MDS_SESSION_HUNG) | 3339 | s->s_state == CEPH_MDS_SESSION_HUNG) |
3391 | ceph_send_cap_releases(mdsc, s); | 3340 | ceph_send_cap_releases(mdsc, s); |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index bf24d88cfeb2..294fa23a7df6 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -139,7 +139,6 @@ struct ceph_mds_session { | |||
139 | int s_cap_reconnect; | 139 | int s_cap_reconnect; |
140 | int s_readonly; | 140 | int s_readonly; |
141 | struct list_head s_cap_releases; /* waiting cap_release messages */ | 141 | struct list_head s_cap_releases; /* waiting cap_release messages */ |
142 | struct list_head s_cap_releases_done; /* ready to send */ | ||
143 | struct ceph_cap *s_cap_iterator; | 142 | struct ceph_cap *s_cap_iterator; |
144 | 143 | ||
145 | /* protected by mutex */ | 144 | /* protected by mutex */ |
@@ -389,8 +388,6 @@ static inline void ceph_mdsc_put_request(struct ceph_mds_request *req) | |||
389 | kref_put(&req->r_kref, ceph_mdsc_release_request); | 388 | kref_put(&req->r_kref, ceph_mdsc_release_request); |
390 | } | 389 | } |
391 | 390 | ||
392 | extern int ceph_add_cap_releases(struct ceph_mds_client *mdsc, | ||
393 | struct ceph_mds_session *session); | ||
394 | extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc, | 391 | extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc, |
395 | struct ceph_mds_session *session); | 392 | struct ceph_mds_session *session); |
396 | 393 | ||
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 4ef1ae92c2a6..c4961353d058 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -122,11 +122,21 @@ struct ceph_cap { | |||
122 | struct rb_node ci_node; /* per-ci cap tree */ | 122 | struct rb_node ci_node; /* per-ci cap tree */ |
123 | struct ceph_mds_session *session; | 123 | struct ceph_mds_session *session; |
124 | struct list_head session_caps; /* per-session caplist */ | 124 | struct list_head session_caps; /* per-session caplist */ |
125 | int mds; | ||
126 | u64 cap_id; /* unique cap id (mds provided) */ | 125 | u64 cap_id; /* unique cap id (mds provided) */ |
127 | int issued; /* latest, from the mds */ | 126 | union { |
128 | int implemented; /* implemented superset of issued (for revocation) */ | 127 | /* in-use caps */ |
129 | int mds_wanted; | 128 | struct { |
129 | int issued; /* latest, from the mds */ | ||
130 | int implemented; /* implemented superset of | ||
131 | issued (for revocation) */ | ||
132 | int mds, mds_wanted; | ||
133 | }; | ||
134 | /* caps to release */ | ||
135 | struct { | ||
136 | u64 cap_ino; | ||
137 | int queue_release; | ||
138 | }; | ||
139 | }; | ||
130 | u32 seq, issue_seq, mseq; | 140 | u32 seq, issue_seq, mseq; |
131 | u32 cap_gen; /* active/stale cycle */ | 141 | u32 cap_gen; /* active/stale cycle */ |
132 | unsigned long last_used; | 142 | unsigned long last_used; |
@@ -845,8 +855,6 @@ extern void ceph_put_cap(struct ceph_mds_client *mdsc, | |||
845 | struct ceph_cap *cap); | 855 | struct ceph_cap *cap); |
846 | extern int ceph_is_any_caps(struct inode *inode); | 856 | extern int ceph_is_any_caps(struct inode *inode); |
847 | 857 | ||
848 | extern void __queue_cap_release(struct ceph_mds_session *session, u64 ino, | ||
849 | u64 cap_id, u32 migrate_seq, u32 issue_seq); | ||
850 | extern void ceph_queue_caps_release(struct inode *inode); | 858 | extern void ceph_queue_caps_release(struct inode *inode); |
851 | extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc); | 859 | extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc); |
852 | extern int ceph_fsync(struct file *file, loff_t start, loff_t end, | 860 | extern int ceph_fsync(struct file *file, loff_t start, loff_t end, |