diff options
author | Sage Weil <sage@newdream.net> | 2009-12-23 15:12:31 -0500 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2009-12-23 15:12:31 -0500 |
commit | 58bb3b374b07a2a43315213f00a48a5ffd6d0915 (patch) | |
tree | 04599b1f6c5f8bf501a1070b5ab7269a9a97fece /fs/ceph/messenger.c | |
parent | 04a419f908b5291ff7e8ffd7aa351fa0ac0c08af (diff) |
ceph: support ceph_pagelist for message payload
The ceph_pagelist is a simple list of whole pages, strung together via
their lru list_head. It facilitates encoding to a "buffer" of unknown
size. Allow its use in place of the ceph_msg page vector.
This will be used to fix the huge buffer preallocation woes of MDS
reconnection.
Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to 'fs/ceph/messenger.c')
-rw-r--r-- | fs/ceph/messenger.c | 24 |
1 files changed, 20 insertions, 4 deletions
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 68052f664280..c1106e8360f0 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include "super.h" | 13 | #include "super.h" |
14 | #include "messenger.h" | 14 | #include "messenger.h" |
15 | #include "decode.h" | 15 | #include "decode.h" |
16 | #include "pagelist.h" | ||
16 | 17 | ||
17 | /* | 18 | /* |
18 | * Ceph uses the messenger to exchange ceph_msg messages with other | 19 | * Ceph uses the messenger to exchange ceph_msg messages with other |
@@ -728,6 +729,11 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||
728 | page = msg->pages[con->out_msg_pos.page]; | 729 | page = msg->pages[con->out_msg_pos.page]; |
729 | if (crc) | 730 | if (crc) |
730 | kaddr = kmap(page); | 731 | kaddr = kmap(page); |
732 | } else if (msg->pagelist) { | ||
733 | page = list_first_entry(&msg->pagelist->head, | ||
734 | struct page, lru); | ||
735 | if (crc) | ||
736 | kaddr = kmap(page); | ||
731 | } else { | 737 | } else { |
732 | page = con->msgr->zero_page; | 738 | page = con->msgr->zero_page; |
733 | if (crc) | 739 | if (crc) |
@@ -750,7 +756,7 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||
750 | MSG_DONTWAIT | MSG_NOSIGNAL | | 756 | MSG_DONTWAIT | MSG_NOSIGNAL | |
751 | MSG_MORE); | 757 | MSG_MORE); |
752 | 758 | ||
753 | if (crc && msg->pages) | 759 | if (crc && (msg->pages || msg->pagelist)) |
754 | kunmap(page); | 760 | kunmap(page); |
755 | 761 | ||
756 | if (ret <= 0) | 762 | if (ret <= 0) |
@@ -762,6 +768,9 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||
762 | con->out_msg_pos.page_pos = 0; | 768 | con->out_msg_pos.page_pos = 0; |
763 | con->out_msg_pos.page++; | 769 | con->out_msg_pos.page++; |
764 | con->out_msg_pos.did_page_crc = 0; | 770 | con->out_msg_pos.did_page_crc = 0; |
771 | if (msg->pagelist) | ||
772 | list_move_tail(&page->lru, | ||
773 | &msg->pagelist->head); | ||
765 | } | 774 | } |
766 | } | 775 | } |
767 | 776 | ||
@@ -1051,13 +1060,13 @@ static int process_banner(struct ceph_connection *con) | |||
1051 | &con->actual_peer_addr) && | 1060 | &con->actual_peer_addr) && |
1052 | !(addr_is_blank(&con->actual_peer_addr.in_addr) && | 1061 | !(addr_is_blank(&con->actual_peer_addr.in_addr) && |
1053 | con->actual_peer_addr.nonce == con->peer_addr.nonce)) { | 1062 | con->actual_peer_addr.nonce == con->peer_addr.nonce)) { |
1054 | pr_err("wrong peer, want %s/%d, " | 1063 | pr_warning("wrong peer, want %s/%d, " |
1055 | "got %s/%d, wtf\n", | 1064 | "got %s/%d\n", |
1056 | pr_addr(&con->peer_addr.in_addr), | 1065 | pr_addr(&con->peer_addr.in_addr), |
1057 | con->peer_addr.nonce, | 1066 | con->peer_addr.nonce, |
1058 | pr_addr(&con->actual_peer_addr.in_addr), | 1067 | pr_addr(&con->actual_peer_addr.in_addr), |
1059 | con->actual_peer_addr.nonce); | 1068 | con->actual_peer_addr.nonce); |
1060 | con->error_msg = "protocol error, wrong peer"; | 1069 | con->error_msg = "wrong peer at address"; |
1061 | return -1; | 1070 | return -1; |
1062 | } | 1071 | } |
1063 | 1072 | ||
@@ -2096,6 +2105,7 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, | |||
2096 | /* data */ | 2105 | /* data */ |
2097 | m->nr_pages = calc_pages_for(page_off, page_len); | 2106 | m->nr_pages = calc_pages_for(page_off, page_len); |
2098 | m->pages = pages; | 2107 | m->pages = pages; |
2108 | m->pagelist = NULL; | ||
2099 | 2109 | ||
2100 | dout("ceph_msg_new %p page %d~%d -> %d\n", m, page_off, page_len, | 2110 | dout("ceph_msg_new %p page %d~%d -> %d\n", m, page_off, page_len, |
2101 | m->nr_pages); | 2111 | m->nr_pages); |
@@ -2181,6 +2191,12 @@ void ceph_msg_last_put(struct kref *kref) | |||
2181 | m->nr_pages = 0; | 2191 | m->nr_pages = 0; |
2182 | m->pages = NULL; | 2192 | m->pages = NULL; |
2183 | 2193 | ||
2194 | if (m->pagelist) { | ||
2195 | ceph_pagelist_release(m->pagelist); | ||
2196 | kfree(m->pagelist); | ||
2197 | m->pagelist = NULL; | ||
2198 | } | ||
2199 | |||
2184 | if (m->pool) | 2200 | if (m->pool) |
2185 | ceph_msgpool_put(m->pool, m); | 2201 | ceph_msgpool_put(m->pool, m); |
2186 | else | 2202 | else |