diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-28 13:01:29 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-28 13:01:29 -0400 |
commit | 56b59b429b4c26e5e730bc8c3d837de9f7d0a966 (patch) | |
tree | 191bf87e438a3985ccb7e3c5382fab8d31f94edb /net | |
parent | 9a7259d5c8978bbeb5fdcf64b168f8470d8208a6 (diff) | |
parent | c666601a935b94cc0f3310339411b6940de751ba (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull Ceph updates for 3.4-rc1 from Sage Weil:
"Alex has been busy. There are a range of rbd and libceph cleanups,
especially surrounding device setup and teardown, and a few critical
fixes in that code. There are more cleanups in the messenger code,
virtual xattrs, a fix for CRC calculation/checks, and lots of other
miscellaneous stuff.
There's a patch from Amon Ott to make inos behave a bit better on
32-bit boxes, some decode check fixes from Xi Wang, and network
throttling fix from Jim Schutt, and a couple RBD fixes from Josh
Durgin.
No new functionality, just a lot of cleanup and bug fixing."
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (65 commits)
rbd: move snap_rwsem to the device, rename to header_rwsem
ceph: fix three bugs, two in ceph_vxattrcb_file_layout()
libceph: isolate kmap() call in write_partial_msg_pages()
libceph: rename "page_shift" variable to something sensible
libceph: get rid of zero_page_address
libceph: only call kernel_sendpage() via helper
libceph: use kernel_sendpage() for sending zeroes
libceph: fix inverted crc option logic
libceph: some simple changes
libceph: small refactor in write_partial_kvec()
libceph: do crc calculations outside loop
libceph: separate CRC calculation from byte swapping
libceph: use "do" in CRC-related Boolean variables
ceph: ensure Boolean options support both senses
libceph: a few small changes
libceph: make ceph_tcp_connect() return int
libceph: encapsulate some messenger cleanup code
libceph: make ceph_msgr_wq private
libceph: encapsulate connection kvec operations
libceph: move prepare_write_banner()
...
Diffstat (limited to 'net')
-rw-r--r-- | net/ceph/ceph_common.c | 26 | ||||
-rw-r--r-- | net/ceph/messenger.c | 456 | ||||
-rw-r--r-- | net/ceph/osdmap.c | 3 |
3 files changed, 255 insertions, 230 deletions
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 761ad9d6cc3b..cc913193d992 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c | |||
@@ -201,7 +201,9 @@ enum { | |||
201 | Opt_ip, | 201 | Opt_ip, |
202 | Opt_last_string, | 202 | Opt_last_string, |
203 | /* string args above */ | 203 | /* string args above */ |
204 | Opt_share, | ||
204 | Opt_noshare, | 205 | Opt_noshare, |
206 | Opt_crc, | ||
205 | Opt_nocrc, | 207 | Opt_nocrc, |
206 | }; | 208 | }; |
207 | 209 | ||
@@ -217,7 +219,9 @@ static match_table_t opt_tokens = { | |||
217 | {Opt_key, "key=%s"}, | 219 | {Opt_key, "key=%s"}, |
218 | {Opt_ip, "ip=%s"}, | 220 | {Opt_ip, "ip=%s"}, |
219 | /* string args above */ | 221 | /* string args above */ |
222 | {Opt_share, "share"}, | ||
220 | {Opt_noshare, "noshare"}, | 223 | {Opt_noshare, "noshare"}, |
224 | {Opt_crc, "crc"}, | ||
221 | {Opt_nocrc, "nocrc"}, | 225 | {Opt_nocrc, "nocrc"}, |
222 | {-1, NULL} | 226 | {-1, NULL} |
223 | }; | 227 | }; |
@@ -277,10 +281,11 @@ out: | |||
277 | return err; | 281 | return err; |
278 | } | 282 | } |
279 | 283 | ||
280 | int ceph_parse_options(struct ceph_options **popt, char *options, | 284 | struct ceph_options * |
281 | const char *dev_name, const char *dev_name_end, | 285 | ceph_parse_options(char *options, const char *dev_name, |
282 | int (*parse_extra_token)(char *c, void *private), | 286 | const char *dev_name_end, |
283 | void *private) | 287 | int (*parse_extra_token)(char *c, void *private), |
288 | void *private) | ||
284 | { | 289 | { |
285 | struct ceph_options *opt; | 290 | struct ceph_options *opt; |
286 | const char *c; | 291 | const char *c; |
@@ -289,7 +294,7 @@ int ceph_parse_options(struct ceph_options **popt, char *options, | |||
289 | 294 | ||
290 | opt = kzalloc(sizeof(*opt), GFP_KERNEL); | 295 | opt = kzalloc(sizeof(*opt), GFP_KERNEL); |
291 | if (!opt) | 296 | if (!opt) |
292 | return err; | 297 | return ERR_PTR(-ENOMEM); |
293 | opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr), | 298 | opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr), |
294 | GFP_KERNEL); | 299 | GFP_KERNEL); |
295 | if (!opt->mon_addr) | 300 | if (!opt->mon_addr) |
@@ -398,10 +403,16 @@ int ceph_parse_options(struct ceph_options **popt, char *options, | |||
398 | opt->mount_timeout = intval; | 403 | opt->mount_timeout = intval; |
399 | break; | 404 | break; |
400 | 405 | ||
406 | case Opt_share: | ||
407 | opt->flags &= ~CEPH_OPT_NOSHARE; | ||
408 | break; | ||
401 | case Opt_noshare: | 409 | case Opt_noshare: |
402 | opt->flags |= CEPH_OPT_NOSHARE; | 410 | opt->flags |= CEPH_OPT_NOSHARE; |
403 | break; | 411 | break; |
404 | 412 | ||
413 | case Opt_crc: | ||
414 | opt->flags &= ~CEPH_OPT_NOCRC; | ||
415 | break; | ||
405 | case Opt_nocrc: | 416 | case Opt_nocrc: |
406 | opt->flags |= CEPH_OPT_NOCRC; | 417 | opt->flags |= CEPH_OPT_NOCRC; |
407 | break; | 418 | break; |
@@ -412,12 +423,11 @@ int ceph_parse_options(struct ceph_options **popt, char *options, | |||
412 | } | 423 | } |
413 | 424 | ||
414 | /* success */ | 425 | /* success */ |
415 | *popt = opt; | 426 | return opt; |
416 | return 0; | ||
417 | 427 | ||
418 | out: | 428 | out: |
419 | ceph_destroy_options(opt); | 429 | ceph_destroy_options(opt); |
420 | return err; | 430 | return ERR_PTR(err); |
421 | } | 431 | } |
422 | EXPORT_SYMBOL(ceph_parse_options); | 432 | EXPORT_SYMBOL(ceph_parse_options); |
423 | 433 | ||
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index ad5b70801f37..f0993af2ae4d 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c | |||
@@ -38,48 +38,54 @@ static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; | |||
38 | static struct lock_class_key socket_class; | 38 | static struct lock_class_key socket_class; |
39 | #endif | 39 | #endif |
40 | 40 | ||
41 | /* | ||
42 | * When skipping (ignoring) a block of input we read it into a "skip | ||
43 | * buffer," which is this many bytes in size. | ||
44 | */ | ||
45 | #define SKIP_BUF_SIZE 1024 | ||
41 | 46 | ||
42 | static void queue_con(struct ceph_connection *con); | 47 | static void queue_con(struct ceph_connection *con); |
43 | static void con_work(struct work_struct *); | 48 | static void con_work(struct work_struct *); |
44 | static void ceph_fault(struct ceph_connection *con); | 49 | static void ceph_fault(struct ceph_connection *con); |
45 | 50 | ||
46 | /* | 51 | /* |
47 | * nicely render a sockaddr as a string. | 52 | * Nicely render a sockaddr as a string. An array of formatted |
53 | * strings is used, to approximate reentrancy. | ||
48 | */ | 54 | */ |
49 | #define MAX_ADDR_STR 20 | 55 | #define ADDR_STR_COUNT_LOG 5 /* log2(# address strings in array) */ |
50 | #define MAX_ADDR_STR_LEN 60 | 56 | #define ADDR_STR_COUNT (1 << ADDR_STR_COUNT_LOG) |
51 | static char addr_str[MAX_ADDR_STR][MAX_ADDR_STR_LEN]; | 57 | #define ADDR_STR_COUNT_MASK (ADDR_STR_COUNT - 1) |
52 | static DEFINE_SPINLOCK(addr_str_lock); | 58 | #define MAX_ADDR_STR_LEN 64 /* 54 is enough */ |
53 | static int last_addr_str; | 59 | |
60 | static char addr_str[ADDR_STR_COUNT][MAX_ADDR_STR_LEN]; | ||
61 | static atomic_t addr_str_seq = ATOMIC_INIT(0); | ||
62 | |||
63 | static struct page *zero_page; /* used in certain error cases */ | ||
54 | 64 | ||
55 | const char *ceph_pr_addr(const struct sockaddr_storage *ss) | 65 | const char *ceph_pr_addr(const struct sockaddr_storage *ss) |
56 | { | 66 | { |
57 | int i; | 67 | int i; |
58 | char *s; | 68 | char *s; |
59 | struct sockaddr_in *in4 = (void *)ss; | 69 | struct sockaddr_in *in4 = (struct sockaddr_in *) ss; |
60 | struct sockaddr_in6 *in6 = (void *)ss; | 70 | struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) ss; |
61 | 71 | ||
62 | spin_lock(&addr_str_lock); | 72 | i = atomic_inc_return(&addr_str_seq) & ADDR_STR_COUNT_MASK; |
63 | i = last_addr_str++; | ||
64 | if (last_addr_str == MAX_ADDR_STR) | ||
65 | last_addr_str = 0; | ||
66 | spin_unlock(&addr_str_lock); | ||
67 | s = addr_str[i]; | 73 | s = addr_str[i]; |
68 | 74 | ||
69 | switch (ss->ss_family) { | 75 | switch (ss->ss_family) { |
70 | case AF_INET: | 76 | case AF_INET: |
71 | snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%u", &in4->sin_addr, | 77 | snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%hu", &in4->sin_addr, |
72 | (unsigned int)ntohs(in4->sin_port)); | 78 | ntohs(in4->sin_port)); |
73 | break; | 79 | break; |
74 | 80 | ||
75 | case AF_INET6: | 81 | case AF_INET6: |
76 | snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%u", &in6->sin6_addr, | 82 | snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%hu", &in6->sin6_addr, |
77 | (unsigned int)ntohs(in6->sin6_port)); | 83 | ntohs(in6->sin6_port)); |
78 | break; | 84 | break; |
79 | 85 | ||
80 | default: | 86 | default: |
81 | snprintf(s, MAX_ADDR_STR_LEN, "(unknown sockaddr family %d)", | 87 | snprintf(s, MAX_ADDR_STR_LEN, "(unknown sockaddr family %hu)", |
82 | (int)ss->ss_family); | 88 | ss->ss_family); |
83 | } | 89 | } |
84 | 90 | ||
85 | return s; | 91 | return s; |
@@ -95,22 +101,43 @@ static void encode_my_addr(struct ceph_messenger *msgr) | |||
95 | /* | 101 | /* |
96 | * work queue for all reading and writing to/from the socket. | 102 | * work queue for all reading and writing to/from the socket. |
97 | */ | 103 | */ |
98 | struct workqueue_struct *ceph_msgr_wq; | 104 | static struct workqueue_struct *ceph_msgr_wq; |
105 | |||
106 | void _ceph_msgr_exit(void) | ||
107 | { | ||
108 | if (ceph_msgr_wq) { | ||
109 | destroy_workqueue(ceph_msgr_wq); | ||
110 | ceph_msgr_wq = NULL; | ||
111 | } | ||
112 | |||
113 | BUG_ON(zero_page == NULL); | ||
114 | kunmap(zero_page); | ||
115 | page_cache_release(zero_page); | ||
116 | zero_page = NULL; | ||
117 | } | ||
99 | 118 | ||
100 | int ceph_msgr_init(void) | 119 | int ceph_msgr_init(void) |
101 | { | 120 | { |
121 | BUG_ON(zero_page != NULL); | ||
122 | zero_page = ZERO_PAGE(0); | ||
123 | page_cache_get(zero_page); | ||
124 | |||
102 | ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_NON_REENTRANT, 0); | 125 | ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_NON_REENTRANT, 0); |
103 | if (!ceph_msgr_wq) { | 126 | if (ceph_msgr_wq) |
104 | pr_err("msgr_init failed to create workqueue\n"); | 127 | return 0; |
105 | return -ENOMEM; | 128 | |
106 | } | 129 | pr_err("msgr_init failed to create workqueue\n"); |
107 | return 0; | 130 | _ceph_msgr_exit(); |
131 | |||
132 | return -ENOMEM; | ||
108 | } | 133 | } |
109 | EXPORT_SYMBOL(ceph_msgr_init); | 134 | EXPORT_SYMBOL(ceph_msgr_init); |
110 | 135 | ||
111 | void ceph_msgr_exit(void) | 136 | void ceph_msgr_exit(void) |
112 | { | 137 | { |
113 | destroy_workqueue(ceph_msgr_wq); | 138 | BUG_ON(ceph_msgr_wq == NULL); |
139 | |||
140 | _ceph_msgr_exit(); | ||
114 | } | 141 | } |
115 | EXPORT_SYMBOL(ceph_msgr_exit); | 142 | EXPORT_SYMBOL(ceph_msgr_exit); |
116 | 143 | ||
@@ -128,8 +155,8 @@ EXPORT_SYMBOL(ceph_msgr_flush); | |||
128 | /* data available on socket, or listen socket received a connect */ | 155 | /* data available on socket, or listen socket received a connect */ |
129 | static void ceph_data_ready(struct sock *sk, int count_unused) | 156 | static void ceph_data_ready(struct sock *sk, int count_unused) |
130 | { | 157 | { |
131 | struct ceph_connection *con = | 158 | struct ceph_connection *con = sk->sk_user_data; |
132 | (struct ceph_connection *)sk->sk_user_data; | 159 | |
133 | if (sk->sk_state != TCP_CLOSE_WAIT) { | 160 | if (sk->sk_state != TCP_CLOSE_WAIT) { |
134 | dout("ceph_data_ready on %p state = %lu, queueing work\n", | 161 | dout("ceph_data_ready on %p state = %lu, queueing work\n", |
135 | con, con->state); | 162 | con, con->state); |
@@ -140,26 +167,30 @@ static void ceph_data_ready(struct sock *sk, int count_unused) | |||
140 | /* socket has buffer space for writing */ | 167 | /* socket has buffer space for writing */ |
141 | static void ceph_write_space(struct sock *sk) | 168 | static void ceph_write_space(struct sock *sk) |
142 | { | 169 | { |
143 | struct ceph_connection *con = | 170 | struct ceph_connection *con = sk->sk_user_data; |
144 | (struct ceph_connection *)sk->sk_user_data; | ||
145 | 171 | ||
146 | /* only queue to workqueue if there is data we want to write. */ | 172 | /* only queue to workqueue if there is data we want to write, |
173 | * and there is sufficient space in the socket buffer to accept | ||
174 | * more data. clear SOCK_NOSPACE so that ceph_write_space() | ||
175 | * doesn't get called again until try_write() fills the socket | ||
176 | * buffer. See net/ipv4/tcp_input.c:tcp_check_space() | ||
177 | * and net/core/stream.c:sk_stream_write_space(). | ||
178 | */ | ||
147 | if (test_bit(WRITE_PENDING, &con->state)) { | 179 | if (test_bit(WRITE_PENDING, &con->state)) { |
148 | dout("ceph_write_space %p queueing write work\n", con); | 180 | if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { |
149 | queue_con(con); | 181 | dout("ceph_write_space %p queueing write work\n", con); |
182 | clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | ||
183 | queue_con(con); | ||
184 | } | ||
150 | } else { | 185 | } else { |
151 | dout("ceph_write_space %p nothing to write\n", con); | 186 | dout("ceph_write_space %p nothing to write\n", con); |
152 | } | 187 | } |
153 | |||
154 | /* since we have our own write_space, clear the SOCK_NOSPACE flag */ | ||
155 | clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | ||
156 | } | 188 | } |
157 | 189 | ||
158 | /* socket's state has changed */ | 190 | /* socket's state has changed */ |
159 | static void ceph_state_change(struct sock *sk) | 191 | static void ceph_state_change(struct sock *sk) |
160 | { | 192 | { |
161 | struct ceph_connection *con = | 193 | struct ceph_connection *con = sk->sk_user_data; |
162 | (struct ceph_connection *)sk->sk_user_data; | ||
163 | 194 | ||
164 | dout("ceph_state_change %p state = %lu sk_state = %u\n", | 195 | dout("ceph_state_change %p state = %lu sk_state = %u\n", |
165 | con, con->state, sk->sk_state); | 196 | con, con->state, sk->sk_state); |
@@ -184,6 +215,8 @@ static void ceph_state_change(struct sock *sk) | |||
184 | dout("ceph_state_change TCP_ESTABLISHED\n"); | 215 | dout("ceph_state_change TCP_ESTABLISHED\n"); |
185 | queue_con(con); | 216 | queue_con(con); |
186 | break; | 217 | break; |
218 | default: /* Everything else is uninteresting */ | ||
219 | break; | ||
187 | } | 220 | } |
188 | } | 221 | } |
189 | 222 | ||
@@ -194,7 +227,7 @@ static void set_sock_callbacks(struct socket *sock, | |||
194 | struct ceph_connection *con) | 227 | struct ceph_connection *con) |
195 | { | 228 | { |
196 | struct sock *sk = sock->sk; | 229 | struct sock *sk = sock->sk; |
197 | sk->sk_user_data = (void *)con; | 230 | sk->sk_user_data = con; |
198 | sk->sk_data_ready = ceph_data_ready; | 231 | sk->sk_data_ready = ceph_data_ready; |
199 | sk->sk_write_space = ceph_write_space; | 232 | sk->sk_write_space = ceph_write_space; |
200 | sk->sk_state_change = ceph_state_change; | 233 | sk->sk_state_change = ceph_state_change; |
@@ -208,7 +241,7 @@ static void set_sock_callbacks(struct socket *sock, | |||
208 | /* | 241 | /* |
209 | * initiate connection to a remote socket. | 242 | * initiate connection to a remote socket. |
210 | */ | 243 | */ |
211 | static struct socket *ceph_tcp_connect(struct ceph_connection *con) | 244 | static int ceph_tcp_connect(struct ceph_connection *con) |
212 | { | 245 | { |
213 | struct sockaddr_storage *paddr = &con->peer_addr.in_addr; | 246 | struct sockaddr_storage *paddr = &con->peer_addr.in_addr; |
214 | struct socket *sock; | 247 | struct socket *sock; |
@@ -218,8 +251,7 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con) | |||
218 | ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM, | 251 | ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM, |
219 | IPPROTO_TCP, &sock); | 252 | IPPROTO_TCP, &sock); |
220 | if (ret) | 253 | if (ret) |
221 | return ERR_PTR(ret); | 254 | return ret; |
222 | con->sock = sock; | ||
223 | sock->sk->sk_allocation = GFP_NOFS; | 255 | sock->sk->sk_allocation = GFP_NOFS; |
224 | 256 | ||
225 | #ifdef CONFIG_LOCKDEP | 257 | #ifdef CONFIG_LOCKDEP |
@@ -236,19 +268,17 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con) | |||
236 | dout("connect %s EINPROGRESS sk_state = %u\n", | 268 | dout("connect %s EINPROGRESS sk_state = %u\n", |
237 | ceph_pr_addr(&con->peer_addr.in_addr), | 269 | ceph_pr_addr(&con->peer_addr.in_addr), |
238 | sock->sk->sk_state); | 270 | sock->sk->sk_state); |
239 | ret = 0; | 271 | } else if (ret < 0) { |
240 | } | ||
241 | if (ret < 0) { | ||
242 | pr_err("connect %s error %d\n", | 272 | pr_err("connect %s error %d\n", |
243 | ceph_pr_addr(&con->peer_addr.in_addr), ret); | 273 | ceph_pr_addr(&con->peer_addr.in_addr), ret); |
244 | sock_release(sock); | 274 | sock_release(sock); |
245 | con->sock = NULL; | ||
246 | con->error_msg = "connect error"; | 275 | con->error_msg = "connect error"; |
276 | |||
277 | return ret; | ||
247 | } | 278 | } |
279 | con->sock = sock; | ||
248 | 280 | ||
249 | if (ret < 0) | 281 | return 0; |
250 | return ERR_PTR(ret); | ||
251 | return sock; | ||
252 | } | 282 | } |
253 | 283 | ||
254 | static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len) | 284 | static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len) |
@@ -284,6 +314,19 @@ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov, | |||
284 | return r; | 314 | return r; |
285 | } | 315 | } |
286 | 316 | ||
317 | static int ceph_tcp_sendpage(struct socket *sock, struct page *page, | ||
318 | int offset, size_t size, int more) | ||
319 | { | ||
320 | int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : MSG_EOR); | ||
321 | int ret; | ||
322 | |||
323 | ret = kernel_sendpage(sock, page, offset, size, flags); | ||
324 | if (ret == -EAGAIN) | ||
325 | ret = 0; | ||
326 | |||
327 | return ret; | ||
328 | } | ||
329 | |||
287 | 330 | ||
288 | /* | 331 | /* |
289 | * Shutdown/close the socket for the given connection. | 332 | * Shutdown/close the socket for the given connection. |
@@ -391,22 +434,23 @@ bool ceph_con_opened(struct ceph_connection *con) | |||
391 | */ | 434 | */ |
392 | struct ceph_connection *ceph_con_get(struct ceph_connection *con) | 435 | struct ceph_connection *ceph_con_get(struct ceph_connection *con) |
393 | { | 436 | { |
394 | dout("con_get %p nref = %d -> %d\n", con, | 437 | int nref = __atomic_add_unless(&con->nref, 1, 0); |
395 | atomic_read(&con->nref), atomic_read(&con->nref) + 1); | 438 | |
396 | if (atomic_inc_not_zero(&con->nref)) | 439 | dout("con_get %p nref = %d -> %d\n", con, nref, nref + 1); |
397 | return con; | 440 | |
398 | return NULL; | 441 | return nref ? con : NULL; |
399 | } | 442 | } |
400 | 443 | ||
401 | void ceph_con_put(struct ceph_connection *con) | 444 | void ceph_con_put(struct ceph_connection *con) |
402 | { | 445 | { |
403 | dout("con_put %p nref = %d -> %d\n", con, | 446 | int nref = atomic_dec_return(&con->nref); |
404 | atomic_read(&con->nref), atomic_read(&con->nref) - 1); | 447 | |
405 | BUG_ON(atomic_read(&con->nref) == 0); | 448 | BUG_ON(nref < 0); |
406 | if (atomic_dec_and_test(&con->nref)) { | 449 | if (nref == 0) { |
407 | BUG_ON(con->sock); | 450 | BUG_ON(con->sock); |
408 | kfree(con); | 451 | kfree(con); |
409 | } | 452 | } |
453 | dout("con_put %p nref = %d -> %d\n", con, nref + 1, nref); | ||
410 | } | 454 | } |
411 | 455 | ||
412 | /* | 456 | /* |
@@ -442,14 +486,35 @@ static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt) | |||
442 | return ret; | 486 | return ret; |
443 | } | 487 | } |
444 | 488 | ||
489 | static void ceph_con_out_kvec_reset(struct ceph_connection *con) | ||
490 | { | ||
491 | con->out_kvec_left = 0; | ||
492 | con->out_kvec_bytes = 0; | ||
493 | con->out_kvec_cur = &con->out_kvec[0]; | ||
494 | } | ||
495 | |||
496 | static void ceph_con_out_kvec_add(struct ceph_connection *con, | ||
497 | size_t size, void *data) | ||
498 | { | ||
499 | int index; | ||
500 | |||
501 | index = con->out_kvec_left; | ||
502 | BUG_ON(index >= ARRAY_SIZE(con->out_kvec)); | ||
503 | |||
504 | con->out_kvec[index].iov_len = size; | ||
505 | con->out_kvec[index].iov_base = data; | ||
506 | con->out_kvec_left++; | ||
507 | con->out_kvec_bytes += size; | ||
508 | } | ||
445 | 509 | ||
446 | /* | 510 | /* |
447 | * Prepare footer for currently outgoing message, and finish things | 511 | * Prepare footer for currently outgoing message, and finish things |
448 | * off. Assumes out_kvec* are already valid.. we just add on to the end. | 512 | * off. Assumes out_kvec* are already valid.. we just add on to the end. |
449 | */ | 513 | */ |
450 | static void prepare_write_message_footer(struct ceph_connection *con, int v) | 514 | static void prepare_write_message_footer(struct ceph_connection *con) |
451 | { | 515 | { |
452 | struct ceph_msg *m = con->out_msg; | 516 | struct ceph_msg *m = con->out_msg; |
517 | int v = con->out_kvec_left; | ||
453 | 518 | ||
454 | dout("prepare_write_message_footer %p\n", con); | 519 | dout("prepare_write_message_footer %p\n", con); |
455 | con->out_kvec_is_msg = true; | 520 | con->out_kvec_is_msg = true; |
@@ -467,9 +532,9 @@ static void prepare_write_message_footer(struct ceph_connection *con, int v) | |||
467 | static void prepare_write_message(struct ceph_connection *con) | 532 | static void prepare_write_message(struct ceph_connection *con) |
468 | { | 533 | { |
469 | struct ceph_msg *m; | 534 | struct ceph_msg *m; |
470 | int v = 0; | 535 | u32 crc; |
471 | 536 | ||
472 | con->out_kvec_bytes = 0; | 537 | ceph_con_out_kvec_reset(con); |
473 | con->out_kvec_is_msg = true; | 538 | con->out_kvec_is_msg = true; |
474 | con->out_msg_done = false; | 539 | con->out_msg_done = false; |
475 | 540 | ||
@@ -477,16 +542,13 @@ static void prepare_write_message(struct ceph_connection *con) | |||
477 | * TCP packet that's a good thing. */ | 542 | * TCP packet that's a good thing. */ |
478 | if (con->in_seq > con->in_seq_acked) { | 543 | if (con->in_seq > con->in_seq_acked) { |
479 | con->in_seq_acked = con->in_seq; | 544 | con->in_seq_acked = con->in_seq; |
480 | con->out_kvec[v].iov_base = &tag_ack; | 545 | ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); |
481 | con->out_kvec[v++].iov_len = 1; | ||
482 | con->out_temp_ack = cpu_to_le64(con->in_seq_acked); | 546 | con->out_temp_ack = cpu_to_le64(con->in_seq_acked); |
483 | con->out_kvec[v].iov_base = &con->out_temp_ack; | 547 | ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack), |
484 | con->out_kvec[v++].iov_len = sizeof(con->out_temp_ack); | 548 | &con->out_temp_ack); |
485 | con->out_kvec_bytes = 1 + sizeof(con->out_temp_ack); | ||
486 | } | 549 | } |
487 | 550 | ||
488 | m = list_first_entry(&con->out_queue, | 551 | m = list_first_entry(&con->out_queue, struct ceph_msg, list_head); |
489 | struct ceph_msg, list_head); | ||
490 | con->out_msg = m; | 552 | con->out_msg = m; |
491 | 553 | ||
492 | /* put message on sent list */ | 554 | /* put message on sent list */ |
@@ -510,30 +572,26 @@ static void prepare_write_message(struct ceph_connection *con) | |||
510 | BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len); | 572 | BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len); |
511 | 573 | ||
512 | /* tag + hdr + front + middle */ | 574 | /* tag + hdr + front + middle */ |
513 | con->out_kvec[v].iov_base = &tag_msg; | 575 | ceph_con_out_kvec_add(con, sizeof (tag_msg), &tag_msg); |
514 | con->out_kvec[v++].iov_len = 1; | 576 | ceph_con_out_kvec_add(con, sizeof (m->hdr), &m->hdr); |
515 | con->out_kvec[v].iov_base = &m->hdr; | 577 | ceph_con_out_kvec_add(con, m->front.iov_len, m->front.iov_base); |
516 | con->out_kvec[v++].iov_len = sizeof(m->hdr); | 578 | |
517 | con->out_kvec[v++] = m->front; | ||
518 | if (m->middle) | 579 | if (m->middle) |
519 | con->out_kvec[v++] = m->middle->vec; | 580 | ceph_con_out_kvec_add(con, m->middle->vec.iov_len, |
520 | con->out_kvec_left = v; | 581 | m->middle->vec.iov_base); |
521 | con->out_kvec_bytes += 1 + sizeof(m->hdr) + m->front.iov_len + | ||
522 | (m->middle ? m->middle->vec.iov_len : 0); | ||
523 | con->out_kvec_cur = con->out_kvec; | ||
524 | 582 | ||
525 | /* fill in crc (except data pages), footer */ | 583 | /* fill in crc (except data pages), footer */ |
526 | con->out_msg->hdr.crc = | 584 | crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc)); |
527 | cpu_to_le32(crc32c(0, (void *)&m->hdr, | 585 | con->out_msg->hdr.crc = cpu_to_le32(crc); |
528 | sizeof(m->hdr) - sizeof(m->hdr.crc))); | ||
529 | con->out_msg->footer.flags = CEPH_MSG_FOOTER_COMPLETE; | 586 | con->out_msg->footer.flags = CEPH_MSG_FOOTER_COMPLETE; |
530 | con->out_msg->footer.front_crc = | 587 | |
531 | cpu_to_le32(crc32c(0, m->front.iov_base, m->front.iov_len)); | 588 | crc = crc32c(0, m->front.iov_base, m->front.iov_len); |
532 | if (m->middle) | 589 | con->out_msg->footer.front_crc = cpu_to_le32(crc); |
533 | con->out_msg->footer.middle_crc = | 590 | if (m->middle) { |
534 | cpu_to_le32(crc32c(0, m->middle->vec.iov_base, | 591 | crc = crc32c(0, m->middle->vec.iov_base, |
535 | m->middle->vec.iov_len)); | 592 | m->middle->vec.iov_len); |
536 | else | 593 | con->out_msg->footer.middle_crc = cpu_to_le32(crc); |
594 | } else | ||
537 | con->out_msg->footer.middle_crc = 0; | 595 | con->out_msg->footer.middle_crc = 0; |
538 | con->out_msg->footer.data_crc = 0; | 596 | con->out_msg->footer.data_crc = 0; |
539 | dout("prepare_write_message front_crc %u data_crc %u\n", | 597 | dout("prepare_write_message front_crc %u data_crc %u\n", |
@@ -549,11 +607,11 @@ static void prepare_write_message(struct ceph_connection *con) | |||
549 | else | 607 | else |
550 | con->out_msg_pos.page_pos = 0; | 608 | con->out_msg_pos.page_pos = 0; |
551 | con->out_msg_pos.data_pos = 0; | 609 | con->out_msg_pos.data_pos = 0; |
552 | con->out_msg_pos.did_page_crc = 0; | 610 | con->out_msg_pos.did_page_crc = false; |
553 | con->out_more = 1; /* data + footer will follow */ | 611 | con->out_more = 1; /* data + footer will follow */ |
554 | } else { | 612 | } else { |
555 | /* no, queue up footer too and be done */ | 613 | /* no, queue up footer too and be done */ |
556 | prepare_write_message_footer(con, v); | 614 | prepare_write_message_footer(con); |
557 | } | 615 | } |
558 | 616 | ||
559 | set_bit(WRITE_PENDING, &con->state); | 617 | set_bit(WRITE_PENDING, &con->state); |
@@ -568,14 +626,14 @@ static void prepare_write_ack(struct ceph_connection *con) | |||
568 | con->in_seq_acked, con->in_seq); | 626 | con->in_seq_acked, con->in_seq); |
569 | con->in_seq_acked = con->in_seq; | 627 | con->in_seq_acked = con->in_seq; |
570 | 628 | ||
571 | con->out_kvec[0].iov_base = &tag_ack; | 629 | ceph_con_out_kvec_reset(con); |
572 | con->out_kvec[0].iov_len = 1; | 630 | |
631 | ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); | ||
632 | |||
573 | con->out_temp_ack = cpu_to_le64(con->in_seq_acked); | 633 | con->out_temp_ack = cpu_to_le64(con->in_seq_acked); |
574 | con->out_kvec[1].iov_base = &con->out_temp_ack; | 634 | ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack), |
575 | con->out_kvec[1].iov_len = sizeof(con->out_temp_ack); | 635 | &con->out_temp_ack); |
576 | con->out_kvec_left = 2; | 636 | |
577 | con->out_kvec_bytes = 1 + sizeof(con->out_temp_ack); | ||
578 | con->out_kvec_cur = con->out_kvec; | ||
579 | con->out_more = 1; /* more will follow.. eventually.. */ | 637 | con->out_more = 1; /* more will follow.. eventually.. */ |
580 | set_bit(WRITE_PENDING, &con->state); | 638 | set_bit(WRITE_PENDING, &con->state); |
581 | } | 639 | } |
@@ -586,11 +644,8 @@ static void prepare_write_ack(struct ceph_connection *con) | |||
586 | static void prepare_write_keepalive(struct ceph_connection *con) | 644 | static void prepare_write_keepalive(struct ceph_connection *con) |
587 | { | 645 | { |
588 | dout("prepare_write_keepalive %p\n", con); | 646 | dout("prepare_write_keepalive %p\n", con); |
589 | con->out_kvec[0].iov_base = &tag_keepalive; | 647 | ceph_con_out_kvec_reset(con); |
590 | con->out_kvec[0].iov_len = 1; | 648 | ceph_con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive); |
591 | con->out_kvec_left = 1; | ||
592 | con->out_kvec_bytes = 1; | ||
593 | con->out_kvec_cur = con->out_kvec; | ||
594 | set_bit(WRITE_PENDING, &con->state); | 649 | set_bit(WRITE_PENDING, &con->state); |
595 | } | 650 | } |
596 | 651 | ||
@@ -619,12 +674,9 @@ static int prepare_connect_authorizer(struct ceph_connection *con) | |||
619 | con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol); | 674 | con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol); |
620 | con->out_connect.authorizer_len = cpu_to_le32(auth_len); | 675 | con->out_connect.authorizer_len = cpu_to_le32(auth_len); |
621 | 676 | ||
622 | if (auth_len) { | 677 | if (auth_len) |
623 | con->out_kvec[con->out_kvec_left].iov_base = auth_buf; | 678 | ceph_con_out_kvec_add(con, auth_len, auth_buf); |
624 | con->out_kvec[con->out_kvec_left].iov_len = auth_len; | 679 | |
625 | con->out_kvec_left++; | ||
626 | con->out_kvec_bytes += auth_len; | ||
627 | } | ||
628 | return 0; | 680 | return 0; |
629 | } | 681 | } |
630 | 682 | ||
@@ -634,22 +686,18 @@ static int prepare_connect_authorizer(struct ceph_connection *con) | |||
634 | static void prepare_write_banner(struct ceph_messenger *msgr, | 686 | static void prepare_write_banner(struct ceph_messenger *msgr, |
635 | struct ceph_connection *con) | 687 | struct ceph_connection *con) |
636 | { | 688 | { |
637 | int len = strlen(CEPH_BANNER); | 689 | ceph_con_out_kvec_reset(con); |
690 | ceph_con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER); | ||
691 | ceph_con_out_kvec_add(con, sizeof (msgr->my_enc_addr), | ||
692 | &msgr->my_enc_addr); | ||
638 | 693 | ||
639 | con->out_kvec[0].iov_base = CEPH_BANNER; | ||
640 | con->out_kvec[0].iov_len = len; | ||
641 | con->out_kvec[1].iov_base = &msgr->my_enc_addr; | ||
642 | con->out_kvec[1].iov_len = sizeof(msgr->my_enc_addr); | ||
643 | con->out_kvec_left = 2; | ||
644 | con->out_kvec_bytes = len + sizeof(msgr->my_enc_addr); | ||
645 | con->out_kvec_cur = con->out_kvec; | ||
646 | con->out_more = 0; | 694 | con->out_more = 0; |
647 | set_bit(WRITE_PENDING, &con->state); | 695 | set_bit(WRITE_PENDING, &con->state); |
648 | } | 696 | } |
649 | 697 | ||
650 | static int prepare_write_connect(struct ceph_messenger *msgr, | 698 | static int prepare_write_connect(struct ceph_messenger *msgr, |
651 | struct ceph_connection *con, | 699 | struct ceph_connection *con, |
652 | int after_banner) | 700 | int include_banner) |
653 | { | 701 | { |
654 | unsigned global_seq = get_global_seq(con->msgr, 0); | 702 | unsigned global_seq = get_global_seq(con->msgr, 0); |
655 | int proto; | 703 | int proto; |
@@ -678,22 +726,18 @@ static int prepare_write_connect(struct ceph_messenger *msgr, | |||
678 | con->out_connect.protocol_version = cpu_to_le32(proto); | 726 | con->out_connect.protocol_version = cpu_to_le32(proto); |
679 | con->out_connect.flags = 0; | 727 | con->out_connect.flags = 0; |
680 | 728 | ||
681 | if (!after_banner) { | 729 | if (include_banner) |
682 | con->out_kvec_left = 0; | 730 | prepare_write_banner(msgr, con); |
683 | con->out_kvec_bytes = 0; | 731 | else |
684 | } | 732 | ceph_con_out_kvec_reset(con); |
685 | con->out_kvec[con->out_kvec_left].iov_base = &con->out_connect; | 733 | ceph_con_out_kvec_add(con, sizeof (con->out_connect), &con->out_connect); |
686 | con->out_kvec[con->out_kvec_left].iov_len = sizeof(con->out_connect); | 734 | |
687 | con->out_kvec_left++; | ||
688 | con->out_kvec_bytes += sizeof(con->out_connect); | ||
689 | con->out_kvec_cur = con->out_kvec; | ||
690 | con->out_more = 0; | 735 | con->out_more = 0; |
691 | set_bit(WRITE_PENDING, &con->state); | 736 | set_bit(WRITE_PENDING, &con->state); |
692 | 737 | ||
693 | return prepare_connect_authorizer(con); | 738 | return prepare_connect_authorizer(con); |
694 | } | 739 | } |
695 | 740 | ||
696 | |||
697 | /* | 741 | /* |
698 | * write as much of pending kvecs to the socket as we can. | 742 | * write as much of pending kvecs to the socket as we can. |
699 | * 1 -> done | 743 | * 1 -> done |
@@ -714,17 +758,18 @@ static int write_partial_kvec(struct ceph_connection *con) | |||
714 | con->out_kvec_bytes -= ret; | 758 | con->out_kvec_bytes -= ret; |
715 | if (con->out_kvec_bytes == 0) | 759 | if (con->out_kvec_bytes == 0) |
716 | break; /* done */ | 760 | break; /* done */ |
717 | while (ret > 0) { | 761 | |
718 | if (ret >= con->out_kvec_cur->iov_len) { | 762 | /* account for full iov entries consumed */ |
719 | ret -= con->out_kvec_cur->iov_len; | 763 | while (ret >= con->out_kvec_cur->iov_len) { |
720 | con->out_kvec_cur++; | 764 | BUG_ON(!con->out_kvec_left); |
721 | con->out_kvec_left--; | 765 | ret -= con->out_kvec_cur->iov_len; |
722 | } else { | 766 | con->out_kvec_cur++; |
723 | con->out_kvec_cur->iov_len -= ret; | 767 | con->out_kvec_left--; |
724 | con->out_kvec_cur->iov_base += ret; | 768 | } |
725 | ret = 0; | 769 | /* and for a partially-consumed entry */ |
726 | break; | 770 | if (ret) { |
727 | } | 771 | con->out_kvec_cur->iov_len -= ret; |
772 | con->out_kvec_cur->iov_base += ret; | ||
728 | } | 773 | } |
729 | } | 774 | } |
730 | con->out_kvec_left = 0; | 775 | con->out_kvec_left = 0; |
@@ -773,7 +818,7 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||
773 | struct ceph_msg *msg = con->out_msg; | 818 | struct ceph_msg *msg = con->out_msg; |
774 | unsigned data_len = le32_to_cpu(msg->hdr.data_len); | 819 | unsigned data_len = le32_to_cpu(msg->hdr.data_len); |
775 | size_t len; | 820 | size_t len; |
776 | int crc = con->msgr->nocrc; | 821 | bool do_datacrc = !con->msgr->nocrc; |
777 | int ret; | 822 | int ret; |
778 | int total_max_write; | 823 | int total_max_write; |
779 | int in_trail = 0; | 824 | int in_trail = 0; |
@@ -790,9 +835,8 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||
790 | 835 | ||
791 | while (data_len > con->out_msg_pos.data_pos) { | 836 | while (data_len > con->out_msg_pos.data_pos) { |
792 | struct page *page = NULL; | 837 | struct page *page = NULL; |
793 | void *kaddr = NULL; | ||
794 | int max_write = PAGE_SIZE; | 838 | int max_write = PAGE_SIZE; |
795 | int page_shift = 0; | 839 | int bio_offset = 0; |
796 | 840 | ||
797 | total_max_write = data_len - trail_len - | 841 | total_max_write = data_len - trail_len - |
798 | con->out_msg_pos.data_pos; | 842 | con->out_msg_pos.data_pos; |
@@ -811,58 +855,47 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||
811 | 855 | ||
812 | page = list_first_entry(&msg->trail->head, | 856 | page = list_first_entry(&msg->trail->head, |
813 | struct page, lru); | 857 | struct page, lru); |
814 | if (crc) | ||
815 | kaddr = kmap(page); | ||
816 | max_write = PAGE_SIZE; | 858 | max_write = PAGE_SIZE; |
817 | } else if (msg->pages) { | 859 | } else if (msg->pages) { |
818 | page = msg->pages[con->out_msg_pos.page]; | 860 | page = msg->pages[con->out_msg_pos.page]; |
819 | if (crc) | ||
820 | kaddr = kmap(page); | ||
821 | } else if (msg->pagelist) { | 861 | } else if (msg->pagelist) { |
822 | page = list_first_entry(&msg->pagelist->head, | 862 | page = list_first_entry(&msg->pagelist->head, |
823 | struct page, lru); | 863 | struct page, lru); |
824 | if (crc) | ||
825 | kaddr = kmap(page); | ||
826 | #ifdef CONFIG_BLOCK | 864 | #ifdef CONFIG_BLOCK |
827 | } else if (msg->bio) { | 865 | } else if (msg->bio) { |
828 | struct bio_vec *bv; | 866 | struct bio_vec *bv; |
829 | 867 | ||
830 | bv = bio_iovec_idx(msg->bio_iter, msg->bio_seg); | 868 | bv = bio_iovec_idx(msg->bio_iter, msg->bio_seg); |
831 | page = bv->bv_page; | 869 | page = bv->bv_page; |
832 | page_shift = bv->bv_offset; | 870 | bio_offset = bv->bv_offset; |
833 | if (crc) | ||
834 | kaddr = kmap(page) + page_shift; | ||
835 | max_write = bv->bv_len; | 871 | max_write = bv->bv_len; |
836 | #endif | 872 | #endif |
837 | } else { | 873 | } else { |
838 | page = con->msgr->zero_page; | 874 | page = zero_page; |
839 | if (crc) | ||
840 | kaddr = page_address(con->msgr->zero_page); | ||
841 | } | 875 | } |
842 | len = min_t(int, max_write - con->out_msg_pos.page_pos, | 876 | len = min_t(int, max_write - con->out_msg_pos.page_pos, |
843 | total_max_write); | 877 | total_max_write); |
844 | 878 | ||
845 | if (crc && !con->out_msg_pos.did_page_crc) { | 879 | if (do_datacrc && !con->out_msg_pos.did_page_crc) { |
846 | void *base = kaddr + con->out_msg_pos.page_pos; | 880 | void *base; |
881 | u32 crc; | ||
847 | u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc); | 882 | u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc); |
883 | char *kaddr; | ||
848 | 884 | ||
885 | kaddr = kmap(page); | ||
849 | BUG_ON(kaddr == NULL); | 886 | BUG_ON(kaddr == NULL); |
850 | con->out_msg->footer.data_crc = | 887 | base = kaddr + con->out_msg_pos.page_pos + bio_offset; |
851 | cpu_to_le32(crc32c(tmpcrc, base, len)); | 888 | crc = crc32c(tmpcrc, base, len); |
852 | con->out_msg_pos.did_page_crc = 1; | 889 | con->out_msg->footer.data_crc = cpu_to_le32(crc); |
890 | con->out_msg_pos.did_page_crc = true; | ||
853 | } | 891 | } |
854 | ret = kernel_sendpage(con->sock, page, | 892 | ret = ceph_tcp_sendpage(con->sock, page, |
855 | con->out_msg_pos.page_pos + page_shift, | 893 | con->out_msg_pos.page_pos + bio_offset, |
856 | len, | 894 | len, 1); |
857 | MSG_DONTWAIT | MSG_NOSIGNAL | | 895 | |
858 | MSG_MORE); | 896 | if (do_datacrc) |
859 | |||
860 | if (crc && | ||
861 | (msg->pages || msg->pagelist || msg->bio || in_trail)) | ||
862 | kunmap(page); | 897 | kunmap(page); |
863 | 898 | ||
864 | if (ret == -EAGAIN) | ||
865 | ret = 0; | ||
866 | if (ret <= 0) | 899 | if (ret <= 0) |
867 | goto out; | 900 | goto out; |
868 | 901 | ||
@@ -871,7 +904,7 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||
871 | if (ret == len) { | 904 | if (ret == len) { |
872 | con->out_msg_pos.page_pos = 0; | 905 | con->out_msg_pos.page_pos = 0; |
873 | con->out_msg_pos.page++; | 906 | con->out_msg_pos.page++; |
874 | con->out_msg_pos.did_page_crc = 0; | 907 | con->out_msg_pos.did_page_crc = false; |
875 | if (in_trail) | 908 | if (in_trail) |
876 | list_move_tail(&page->lru, | 909 | list_move_tail(&page->lru, |
877 | &msg->trail->head); | 910 | &msg->trail->head); |
@@ -888,12 +921,10 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||
888 | dout("write_partial_msg_pages %p msg %p done\n", con, msg); | 921 | dout("write_partial_msg_pages %p msg %p done\n", con, msg); |
889 | 922 | ||
890 | /* prepare and queue up footer, too */ | 923 | /* prepare and queue up footer, too */ |
891 | if (!crc) | 924 | if (!do_datacrc) |
892 | con->out_msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC; | 925 | con->out_msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC; |
893 | con->out_kvec_bytes = 0; | 926 | ceph_con_out_kvec_reset(con); |
894 | con->out_kvec_left = 0; | 927 | prepare_write_message_footer(con); |
895 | con->out_kvec_cur = con->out_kvec; | ||
896 | prepare_write_message_footer(con, 0); | ||
897 | ret = 1; | 928 | ret = 1; |
898 | out: | 929 | out: |
899 | return ret; | 930 | return ret; |
@@ -907,12 +938,9 @@ static int write_partial_skip(struct ceph_connection *con) | |||
907 | int ret; | 938 | int ret; |
908 | 939 | ||
909 | while (con->out_skip > 0) { | 940 | while (con->out_skip > 0) { |
910 | struct kvec iov = { | 941 | size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE); |
911 | .iov_base = page_address(con->msgr->zero_page), | ||
912 | .iov_len = min(con->out_skip, (int)PAGE_CACHE_SIZE) | ||
913 | }; | ||
914 | 942 | ||
915 | ret = ceph_tcp_sendmsg(con->sock, &iov, 1, iov.iov_len, 1); | 943 | ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, 1); |
916 | if (ret <= 0) | 944 | if (ret <= 0) |
917 | goto out; | 945 | goto out; |
918 | con->out_skip -= ret; | 946 | con->out_skip -= ret; |
@@ -1085,8 +1113,8 @@ static void addr_set_port(struct sockaddr_storage *ss, int p) | |||
1085 | static int ceph_pton(const char *str, size_t len, struct sockaddr_storage *ss, | 1113 | static int ceph_pton(const char *str, size_t len, struct sockaddr_storage *ss, |
1086 | char delim, const char **ipend) | 1114 | char delim, const char **ipend) |
1087 | { | 1115 | { |
1088 | struct sockaddr_in *in4 = (void *)ss; | 1116 | struct sockaddr_in *in4 = (struct sockaddr_in *) ss; |
1089 | struct sockaddr_in6 *in6 = (void *)ss; | 1117 | struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) ss; |
1090 | 1118 | ||
1091 | memset(ss, 0, sizeof(*ss)); | 1119 | memset(ss, 0, sizeof(*ss)); |
1092 | 1120 | ||
@@ -1512,10 +1540,9 @@ static int read_partial_message_section(struct ceph_connection *con, | |||
1512 | if (ret <= 0) | 1540 | if (ret <= 0) |
1513 | return ret; | 1541 | return ret; |
1514 | section->iov_len += ret; | 1542 | section->iov_len += ret; |
1515 | if (section->iov_len == sec_len) | ||
1516 | *crc = crc32c(0, section->iov_base, | ||
1517 | section->iov_len); | ||
1518 | } | 1543 | } |
1544 | if (section->iov_len == sec_len) | ||
1545 | *crc = crc32c(0, section->iov_base, section->iov_len); | ||
1519 | 1546 | ||
1520 | return 1; | 1547 | return 1; |
1521 | } | 1548 | } |
@@ -1527,7 +1554,7 @@ static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, | |||
1527 | 1554 | ||
1528 | static int read_partial_message_pages(struct ceph_connection *con, | 1555 | static int read_partial_message_pages(struct ceph_connection *con, |
1529 | struct page **pages, | 1556 | struct page **pages, |
1530 | unsigned data_len, int datacrc) | 1557 | unsigned data_len, bool do_datacrc) |
1531 | { | 1558 | { |
1532 | void *p; | 1559 | void *p; |
1533 | int ret; | 1560 | int ret; |
@@ -1540,7 +1567,7 @@ static int read_partial_message_pages(struct ceph_connection *con, | |||
1540 | p = kmap(pages[con->in_msg_pos.page]); | 1567 | p = kmap(pages[con->in_msg_pos.page]); |
1541 | ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, | 1568 | ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, |
1542 | left); | 1569 | left); |
1543 | if (ret > 0 && datacrc) | 1570 | if (ret > 0 && do_datacrc) |
1544 | con->in_data_crc = | 1571 | con->in_data_crc = |
1545 | crc32c(con->in_data_crc, | 1572 | crc32c(con->in_data_crc, |
1546 | p + con->in_msg_pos.page_pos, ret); | 1573 | p + con->in_msg_pos.page_pos, ret); |
@@ -1560,7 +1587,7 @@ static int read_partial_message_pages(struct ceph_connection *con, | |||
1560 | #ifdef CONFIG_BLOCK | 1587 | #ifdef CONFIG_BLOCK |
1561 | static int read_partial_message_bio(struct ceph_connection *con, | 1588 | static int read_partial_message_bio(struct ceph_connection *con, |
1562 | struct bio **bio_iter, int *bio_seg, | 1589 | struct bio **bio_iter, int *bio_seg, |
1563 | unsigned data_len, int datacrc) | 1590 | unsigned data_len, bool do_datacrc) |
1564 | { | 1591 | { |
1565 | struct bio_vec *bv = bio_iovec_idx(*bio_iter, *bio_seg); | 1592 | struct bio_vec *bv = bio_iovec_idx(*bio_iter, *bio_seg); |
1566 | void *p; | 1593 | void *p; |
@@ -1576,7 +1603,7 @@ static int read_partial_message_bio(struct ceph_connection *con, | |||
1576 | 1603 | ||
1577 | ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, | 1604 | ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, |
1578 | left); | 1605 | left); |
1579 | if (ret > 0 && datacrc) | 1606 | if (ret > 0 && do_datacrc) |
1580 | con->in_data_crc = | 1607 | con->in_data_crc = |
1581 | crc32c(con->in_data_crc, | 1608 | crc32c(con->in_data_crc, |
1582 | p + con->in_msg_pos.page_pos, ret); | 1609 | p + con->in_msg_pos.page_pos, ret); |
@@ -1603,9 +1630,10 @@ static int read_partial_message(struct ceph_connection *con) | |||
1603 | int ret; | 1630 | int ret; |
1604 | int to, left; | 1631 | int to, left; |
1605 | unsigned front_len, middle_len, data_len; | 1632 | unsigned front_len, middle_len, data_len; |
1606 | int datacrc = con->msgr->nocrc; | 1633 | bool do_datacrc = !con->msgr->nocrc; |
1607 | int skip; | 1634 | int skip; |
1608 | u64 seq; | 1635 | u64 seq; |
1636 | u32 crc; | ||
1609 | 1637 | ||
1610 | dout("read_partial_message con %p msg %p\n", con, m); | 1638 | dout("read_partial_message con %p msg %p\n", con, m); |
1611 | 1639 | ||
@@ -1618,17 +1646,16 @@ static int read_partial_message(struct ceph_connection *con) | |||
1618 | if (ret <= 0) | 1646 | if (ret <= 0) |
1619 | return ret; | 1647 | return ret; |
1620 | con->in_base_pos += ret; | 1648 | con->in_base_pos += ret; |
1621 | if (con->in_base_pos == sizeof(con->in_hdr)) { | ||
1622 | u32 crc = crc32c(0, (void *)&con->in_hdr, | ||
1623 | sizeof(con->in_hdr) - sizeof(con->in_hdr.crc)); | ||
1624 | if (crc != le32_to_cpu(con->in_hdr.crc)) { | ||
1625 | pr_err("read_partial_message bad hdr " | ||
1626 | " crc %u != expected %u\n", | ||
1627 | crc, con->in_hdr.crc); | ||
1628 | return -EBADMSG; | ||
1629 | } | ||
1630 | } | ||
1631 | } | 1649 | } |
1650 | |||
1651 | crc = crc32c(0, &con->in_hdr, offsetof(struct ceph_msg_header, crc)); | ||
1652 | if (cpu_to_le32(crc) != con->in_hdr.crc) { | ||
1653 | pr_err("read_partial_message bad hdr " | ||
1654 | " crc %u != expected %u\n", | ||
1655 | crc, con->in_hdr.crc); | ||
1656 | return -EBADMSG; | ||
1657 | } | ||
1658 | |||
1632 | front_len = le32_to_cpu(con->in_hdr.front_len); | 1659 | front_len = le32_to_cpu(con->in_hdr.front_len); |
1633 | if (front_len > CEPH_MSG_MAX_FRONT_LEN) | 1660 | if (front_len > CEPH_MSG_MAX_FRONT_LEN) |
1634 | return -EIO; | 1661 | return -EIO; |
@@ -1714,7 +1741,7 @@ static int read_partial_message(struct ceph_connection *con) | |||
1714 | while (con->in_msg_pos.data_pos < data_len) { | 1741 | while (con->in_msg_pos.data_pos < data_len) { |
1715 | if (m->pages) { | 1742 | if (m->pages) { |
1716 | ret = read_partial_message_pages(con, m->pages, | 1743 | ret = read_partial_message_pages(con, m->pages, |
1717 | data_len, datacrc); | 1744 | data_len, do_datacrc); |
1718 | if (ret <= 0) | 1745 | if (ret <= 0) |
1719 | return ret; | 1746 | return ret; |
1720 | #ifdef CONFIG_BLOCK | 1747 | #ifdef CONFIG_BLOCK |
@@ -1722,7 +1749,7 @@ static int read_partial_message(struct ceph_connection *con) | |||
1722 | 1749 | ||
1723 | ret = read_partial_message_bio(con, | 1750 | ret = read_partial_message_bio(con, |
1724 | &m->bio_iter, &m->bio_seg, | 1751 | &m->bio_iter, &m->bio_seg, |
1725 | data_len, datacrc); | 1752 | data_len, do_datacrc); |
1726 | if (ret <= 0) | 1753 | if (ret <= 0) |
1727 | return ret; | 1754 | return ret; |
1728 | #endif | 1755 | #endif |
@@ -1757,7 +1784,7 @@ static int read_partial_message(struct ceph_connection *con) | |||
1757 | m, con->in_middle_crc, m->footer.middle_crc); | 1784 | m, con->in_middle_crc, m->footer.middle_crc); |
1758 | return -EBADMSG; | 1785 | return -EBADMSG; |
1759 | } | 1786 | } |
1760 | if (datacrc && | 1787 | if (do_datacrc && |
1761 | (m->footer.flags & CEPH_MSG_FOOTER_NOCRC) == 0 && | 1788 | (m->footer.flags & CEPH_MSG_FOOTER_NOCRC) == 0 && |
1762 | con->in_data_crc != le32_to_cpu(m->footer.data_crc)) { | 1789 | con->in_data_crc != le32_to_cpu(m->footer.data_crc)) { |
1763 | pr_err("read_partial_message %p data crc %u != exp. %u\n", m, | 1790 | pr_err("read_partial_message %p data crc %u != exp. %u\n", m, |
@@ -1819,7 +1846,6 @@ more: | |||
1819 | 1846 | ||
1820 | /* open the socket first? */ | 1847 | /* open the socket first? */ |
1821 | if (con->sock == NULL) { | 1848 | if (con->sock == NULL) { |
1822 | prepare_write_banner(msgr, con); | ||
1823 | prepare_write_connect(msgr, con, 1); | 1849 | prepare_write_connect(msgr, con, 1); |
1824 | prepare_read_banner(con); | 1850 | prepare_read_banner(con); |
1825 | set_bit(CONNECTING, &con->state); | 1851 | set_bit(CONNECTING, &con->state); |
@@ -1829,11 +1855,9 @@ more: | |||
1829 | con->in_tag = CEPH_MSGR_TAG_READY; | 1855 | con->in_tag = CEPH_MSGR_TAG_READY; |
1830 | dout("try_write initiating connect on %p new state %lu\n", | 1856 | dout("try_write initiating connect on %p new state %lu\n", |
1831 | con, con->state); | 1857 | con, con->state); |
1832 | con->sock = ceph_tcp_connect(con); | 1858 | ret = ceph_tcp_connect(con); |
1833 | if (IS_ERR(con->sock)) { | 1859 | if (ret < 0) { |
1834 | con->sock = NULL; | ||
1835 | con->error_msg = "connect error"; | 1860 | con->error_msg = "connect error"; |
1836 | ret = -1; | ||
1837 | goto out; | 1861 | goto out; |
1838 | } | 1862 | } |
1839 | } | 1863 | } |
@@ -1953,8 +1977,9 @@ more: | |||
1953 | * | 1977 | * |
1954 | * FIXME: there must be a better way to do this! | 1978 | * FIXME: there must be a better way to do this! |
1955 | */ | 1979 | */ |
1956 | static char buf[1024]; | 1980 | static char buf[SKIP_BUF_SIZE]; |
1957 | int skip = min(1024, -con->in_base_pos); | 1981 | int skip = min((int) sizeof (buf), -con->in_base_pos); |
1982 | |||
1958 | dout("skipping %d / %d bytes\n", skip, -con->in_base_pos); | 1983 | dout("skipping %d / %d bytes\n", skip, -con->in_base_pos); |
1959 | ret = ceph_tcp_recvmsg(con->sock, buf, skip); | 1984 | ret = ceph_tcp_recvmsg(con->sock, buf, skip); |
1960 | if (ret <= 0) | 1985 | if (ret <= 0) |
@@ -2216,15 +2241,6 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr, | |||
2216 | 2241 | ||
2217 | spin_lock_init(&msgr->global_seq_lock); | 2242 | spin_lock_init(&msgr->global_seq_lock); |
2218 | 2243 | ||
2219 | /* the zero page is needed if a request is "canceled" while the message | ||
2220 | * is being written over the socket */ | ||
2221 | msgr->zero_page = __page_cache_alloc(GFP_KERNEL | __GFP_ZERO); | ||
2222 | if (!msgr->zero_page) { | ||
2223 | kfree(msgr); | ||
2224 | return ERR_PTR(-ENOMEM); | ||
2225 | } | ||
2226 | kmap(msgr->zero_page); | ||
2227 | |||
2228 | if (myaddr) | 2244 | if (myaddr) |
2229 | msgr->inst.addr = *myaddr; | 2245 | msgr->inst.addr = *myaddr; |
2230 | 2246 | ||
@@ -2241,8 +2257,6 @@ EXPORT_SYMBOL(ceph_messenger_create); | |||
2241 | void ceph_messenger_destroy(struct ceph_messenger *msgr) | 2257 | void ceph_messenger_destroy(struct ceph_messenger *msgr) |
2242 | { | 2258 | { |
2243 | dout("destroy %p\n", msgr); | 2259 | dout("destroy %p\n", msgr); |
2244 | kunmap(msgr->zero_page); | ||
2245 | __free_page(msgr->zero_page); | ||
2246 | kfree(msgr); | 2260 | kfree(msgr); |
2247 | dout("destroyed messenger %p\n", msgr); | 2261 | dout("destroyed messenger %p\n", msgr); |
2248 | } | 2262 | } |
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index fd863fe76934..29ad46ec9dcf 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c | |||
@@ -283,7 +283,8 @@ static struct crush_map *crush_decode(void *pbyval, void *end) | |||
283 | ceph_decode_32_safe(p, end, yes, bad); | 283 | ceph_decode_32_safe(p, end, yes, bad); |
284 | #if BITS_PER_LONG == 32 | 284 | #if BITS_PER_LONG == 32 |
285 | err = -EINVAL; | 285 | err = -EINVAL; |
286 | if (yes > ULONG_MAX / sizeof(struct crush_rule_step)) | 286 | if (yes > (ULONG_MAX - sizeof(*r)) |
287 | / sizeof(struct crush_rule_step)) | ||
287 | goto bad; | 288 | goto bad; |
288 | #endif | 289 | #endif |
289 | r = c->rules[i] = kmalloc(sizeof(*r) + | 290 | r = c->rules[i] = kmalloc(sizeof(*r) + |