diff options
-rw-r--r-- | drivers/block/nbd.c | 115 |
1 files changed, 89 insertions, 26 deletions
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 7e4287bc19e5..3d1fc37a83b1 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c | |||
@@ -47,6 +47,8 @@ static DEFINE_MUTEX(nbd_index_mutex); | |||
47 | struct nbd_sock { | 47 | struct nbd_sock { |
48 | struct socket *sock; | 48 | struct socket *sock; |
49 | struct mutex tx_lock; | 49 | struct mutex tx_lock; |
50 | struct request *pending; | ||
51 | int sent; | ||
50 | }; | 52 | }; |
51 | 53 | ||
52 | #define NBD_TIMEDOUT 0 | 54 | #define NBD_TIMEDOUT 0 |
@@ -202,7 +204,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, | |||
202 | * Send or receive packet. | 204 | * Send or receive packet. |
203 | */ | 205 | */ |
204 | static int sock_xmit(struct nbd_device *nbd, int index, int send, | 206 | static int sock_xmit(struct nbd_device *nbd, int index, int send, |
205 | struct iov_iter *iter, int msg_flags) | 207 | struct iov_iter *iter, int msg_flags, int *sent) |
206 | { | 208 | { |
207 | struct socket *sock = nbd->socks[index]->sock; | 209 | struct socket *sock = nbd->socks[index]->sock; |
208 | int result; | 210 | int result; |
@@ -237,6 +239,8 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, | |||
237 | result = -EPIPE; /* short read */ | 239 | result = -EPIPE; /* short read */ |
238 | break; | 240 | break; |
239 | } | 241 | } |
242 | if (sent) | ||
243 | *sent += result; | ||
240 | } while (msg_data_left(&msg)); | 244 | } while (msg_data_left(&msg)); |
241 | 245 | ||
242 | tsk_restore_flags(current, pflags, PF_MEMALLOC); | 246 | tsk_restore_flags(current, pflags, PF_MEMALLOC); |
@@ -248,6 +252,7 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, | |||
248 | static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) | 252 | static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) |
249 | { | 253 | { |
250 | struct request *req = blk_mq_rq_from_pdu(cmd); | 254 | struct request *req = blk_mq_rq_from_pdu(cmd); |
255 | struct nbd_sock *nsock = nbd->socks[index]; | ||
251 | int result; | 256 | int result; |
252 | struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)}; | 257 | struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)}; |
253 | struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)}; | 258 | struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)}; |
@@ -256,6 +261,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) | |||
256 | struct bio *bio; | 261 | struct bio *bio; |
257 | u32 type; | 262 | u32 type; |
258 | u32 tag = blk_mq_unique_tag(req); | 263 | u32 tag = blk_mq_unique_tag(req); |
264 | int sent = nsock->sent, skip = 0; | ||
259 | 265 | ||
260 | iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); | 266 | iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); |
261 | 267 | ||
@@ -283,6 +289,17 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) | |||
283 | return -EIO; | 289 | return -EIO; |
284 | } | 290 | } |
285 | 291 | ||
292 | /* We did a partial send previously, and we at least sent the whole | ||
293 | * request struct, so just go and send the rest of the pages in the | ||
294 | * request. | ||
295 | */ | ||
296 | if (sent) { | ||
297 | if (sent >= sizeof(request)) { | ||
298 | skip = sent - sizeof(request); | ||
299 | goto send_pages; | ||
300 | } | ||
301 | iov_iter_advance(&from, sent); | ||
302 | } | ||
286 | request.type = htonl(type); | 303 | request.type = htonl(type); |
287 | if (type != NBD_CMD_FLUSH) { | 304 | if (type != NBD_CMD_FLUSH) { |
288 | request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); | 305 | request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); |
@@ -294,15 +311,27 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) | |||
294 | cmd, nbdcmd_to_ascii(type), | 311 | cmd, nbdcmd_to_ascii(type), |
295 | (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req)); | 312 | (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req)); |
296 | result = sock_xmit(nbd, index, 1, &from, | 313 | result = sock_xmit(nbd, index, 1, &from, |
297 | (type == NBD_CMD_WRITE) ? MSG_MORE : 0); | 314 | (type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent); |
298 | if (result <= 0) { | 315 | if (result <= 0) { |
316 | if (result == -ERESTARTSYS) { | ||
317 | /* If we havne't sent anything we can just return BUSY, | ||
318 | * however if we have sent something we need to make | ||
319 | * sure we only allow this req to be sent until we are | ||
320 | * completely done. | ||
321 | */ | ||
322 | if (sent) { | ||
323 | nsock->pending = req; | ||
324 | nsock->sent = sent; | ||
325 | } | ||
326 | return BLK_MQ_RQ_QUEUE_BUSY; | ||
327 | } | ||
299 | dev_err_ratelimited(disk_to_dev(nbd->disk), | 328 | dev_err_ratelimited(disk_to_dev(nbd->disk), |
300 | "Send control failed (result %d)\n", result); | 329 | "Send control failed (result %d)\n", result); |
301 | return -EIO; | 330 | return -EIO; |
302 | } | 331 | } |
303 | 332 | send_pages: | |
304 | if (type != NBD_CMD_WRITE) | 333 | if (type != NBD_CMD_WRITE) |
305 | return 0; | 334 | goto out; |
306 | 335 | ||
307 | bio = req->bio; | 336 | bio = req->bio; |
308 | while (bio) { | 337 | while (bio) { |
@@ -318,8 +347,25 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) | |||
318 | cmd, bvec.bv_len); | 347 | cmd, bvec.bv_len); |
319 | iov_iter_bvec(&from, ITER_BVEC | WRITE, | 348 | iov_iter_bvec(&from, ITER_BVEC | WRITE, |
320 | &bvec, 1, bvec.bv_len); | 349 | &bvec, 1, bvec.bv_len); |
321 | result = sock_xmit(nbd, index, 1, &from, flags); | 350 | if (skip) { |
351 | if (skip >= iov_iter_count(&from)) { | ||
352 | skip -= iov_iter_count(&from); | ||
353 | continue; | ||
354 | } | ||
355 | iov_iter_advance(&from, skip); | ||
356 | skip = 0; | ||
357 | } | ||
358 | result = sock_xmit(nbd, index, 1, &from, flags, &sent); | ||
322 | if (result <= 0) { | 359 | if (result <= 0) { |
360 | if (result == -ERESTARTSYS) { | ||
361 | /* We've already sent the header, we | ||
362 | * have no choice but to set pending and | ||
363 | * return BUSY. | ||
364 | */ | ||
365 | nsock->pending = req; | ||
366 | nsock->sent = sent; | ||
367 | return BLK_MQ_RQ_QUEUE_BUSY; | ||
368 | } | ||
323 | dev_err(disk_to_dev(nbd->disk), | 369 | dev_err(disk_to_dev(nbd->disk), |
324 | "Send data failed (result %d)\n", | 370 | "Send data failed (result %d)\n", |
325 | result); | 371 | result); |
@@ -336,6 +382,9 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) | |||
336 | } | 382 | } |
337 | bio = next; | 383 | bio = next; |
338 | } | 384 | } |
385 | out: | ||
386 | nsock->pending = NULL; | ||
387 | nsock->sent = 0; | ||
339 | return 0; | 388 | return 0; |
340 | } | 389 | } |
341 | 390 | ||
@@ -353,7 +402,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) | |||
353 | 402 | ||
354 | reply.magic = 0; | 403 | reply.magic = 0; |
355 | iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply)); | 404 | iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply)); |
356 | result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL); | 405 | result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); |
357 | if (result <= 0) { | 406 | if (result <= 0) { |
358 | if (!test_bit(NBD_DISCONNECTED, &nbd->runtime_flags) && | 407 | if (!test_bit(NBD_DISCONNECTED, &nbd->runtime_flags) && |
359 | !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags)) | 408 | !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags)) |
@@ -395,7 +444,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) | |||
395 | rq_for_each_segment(bvec, req, iter) { | 444 | rq_for_each_segment(bvec, req, iter) { |
396 | iov_iter_bvec(&to, ITER_BVEC | READ, | 445 | iov_iter_bvec(&to, ITER_BVEC | READ, |
397 | &bvec, 1, bvec.bv_len); | 446 | &bvec, 1, bvec.bv_len); |
398 | result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL); | 447 | result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); |
399 | if (result <= 0) { | 448 | if (result <= 0) { |
400 | dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", | 449 | dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", |
401 | result); | 450 | result); |
@@ -482,22 +531,23 @@ static void nbd_clear_que(struct nbd_device *nbd) | |||
482 | } | 531 | } |
483 | 532 | ||
484 | 533 | ||
485 | static void nbd_handle_cmd(struct nbd_cmd *cmd, int index) | 534 | static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) |
486 | { | 535 | { |
487 | struct request *req = blk_mq_rq_from_pdu(cmd); | 536 | struct request *req = blk_mq_rq_from_pdu(cmd); |
488 | struct nbd_device *nbd = cmd->nbd; | 537 | struct nbd_device *nbd = cmd->nbd; |
489 | struct nbd_sock *nsock; | 538 | struct nbd_sock *nsock; |
539 | int ret; | ||
490 | 540 | ||
491 | if (index >= nbd->num_connections) { | 541 | if (index >= nbd->num_connections) { |
492 | dev_err_ratelimited(disk_to_dev(nbd->disk), | 542 | dev_err_ratelimited(disk_to_dev(nbd->disk), |
493 | "Attempted send on invalid socket\n"); | 543 | "Attempted send on invalid socket\n"); |
494 | goto error_out; | 544 | return -EINVAL; |
495 | } | 545 | } |
496 | 546 | ||
497 | if (test_bit(NBD_DISCONNECTED, &nbd->runtime_flags)) { | 547 | if (test_bit(NBD_DISCONNECTED, &nbd->runtime_flags)) { |
498 | dev_err_ratelimited(disk_to_dev(nbd->disk), | 548 | dev_err_ratelimited(disk_to_dev(nbd->disk), |
499 | "Attempted send on closed socket\n"); | 549 | "Attempted send on closed socket\n"); |
500 | goto error_out; | 550 | return -EINVAL; |
501 | } | 551 | } |
502 | 552 | ||
503 | req->errors = 0; | 553 | req->errors = 0; |
@@ -508,29 +558,30 @@ static void nbd_handle_cmd(struct nbd_cmd *cmd, int index) | |||
508 | mutex_unlock(&nsock->tx_lock); | 558 | mutex_unlock(&nsock->tx_lock); |
509 | dev_err_ratelimited(disk_to_dev(nbd->disk), | 559 | dev_err_ratelimited(disk_to_dev(nbd->disk), |
510 | "Attempted send on closed socket\n"); | 560 | "Attempted send on closed socket\n"); |
511 | goto error_out; | 561 | return -EINVAL; |
512 | } | 562 | } |
513 | 563 | ||
514 | if (nbd_send_cmd(nbd, cmd, index) != 0) { | 564 | /* Handle the case that we have a pending request that was partially |
515 | dev_err_ratelimited(disk_to_dev(nbd->disk), | 565 | * transmitted that _has_ to be serviced first. We need to call requeue |
516 | "Request send failed\n"); | 566 | * here so that it gets put _after_ the request that is already on the |
517 | req->errors++; | 567 | * dispatch list. |
518 | nbd_end_request(cmd); | 568 | */ |
569 | if (unlikely(nsock->pending && nsock->pending != req)) { | ||
570 | blk_mq_requeue_request(req, true); | ||
571 | ret = 0; | ||
572 | goto out; | ||
519 | } | 573 | } |
520 | 574 | ret = nbd_send_cmd(nbd, cmd, index); | |
575 | out: | ||
521 | mutex_unlock(&nsock->tx_lock); | 576 | mutex_unlock(&nsock->tx_lock); |
522 | 577 | return ret; | |
523 | return; | ||
524 | |||
525 | error_out: | ||
526 | req->errors++; | ||
527 | nbd_end_request(cmd); | ||
528 | } | 578 | } |
529 | 579 | ||
530 | static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx, | 580 | static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx, |
531 | const struct blk_mq_queue_data *bd) | 581 | const struct blk_mq_queue_data *bd) |
532 | { | 582 | { |
533 | struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); | 583 | struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); |
584 | int ret; | ||
534 | 585 | ||
535 | /* | 586 | /* |
536 | * Since we look at the bio's to send the request over the network we | 587 | * Since we look at the bio's to send the request over the network we |
@@ -543,10 +594,20 @@ static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx, | |||
543 | */ | 594 | */ |
544 | init_completion(&cmd->send_complete); | 595 | init_completion(&cmd->send_complete); |
545 | blk_mq_start_request(bd->rq); | 596 | blk_mq_start_request(bd->rq); |
546 | nbd_handle_cmd(cmd, hctx->queue_num); | 597 | |
598 | /* We can be called directly from the user space process, which means we | ||
599 | * could possibly have signals pending so our sendmsg will fail. In | ||
600 | * this case we need to return that we are busy, otherwise error out as | ||
601 | * appropriate. | ||
602 | */ | ||
603 | ret = nbd_handle_cmd(cmd, hctx->queue_num); | ||
604 | if (ret < 0) | ||
605 | ret = BLK_MQ_RQ_QUEUE_ERROR; | ||
606 | if (!ret) | ||
607 | ret = BLK_MQ_RQ_QUEUE_OK; | ||
547 | complete(&cmd->send_complete); | 608 | complete(&cmd->send_complete); |
548 | 609 | ||
549 | return BLK_MQ_RQ_QUEUE_OK; | 610 | return ret; |
550 | } | 611 | } |
551 | 612 | ||
552 | static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev, | 613 | static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev, |
@@ -581,6 +642,8 @@ static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev, | |||
581 | 642 | ||
582 | mutex_init(&nsock->tx_lock); | 643 | mutex_init(&nsock->tx_lock); |
583 | nsock->sock = sock; | 644 | nsock->sock = sock; |
645 | nsock->pending = NULL; | ||
646 | nsock->sent = 0; | ||
584 | socks[nbd->num_connections++] = nsock; | 647 | socks[nbd->num_connections++] = nsock; |
585 | 648 | ||
586 | if (max_part) | 649 | if (max_part) |
@@ -634,7 +697,7 @@ static void send_disconnects(struct nbd_device *nbd) | |||
634 | 697 | ||
635 | for (i = 0; i < nbd->num_connections; i++) { | 698 | for (i = 0; i < nbd->num_connections; i++) { |
636 | iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); | 699 | iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); |
637 | ret = sock_xmit(nbd, i, 1, &from, 0); | 700 | ret = sock_xmit(nbd, i, 1, &from, 0, NULL); |
638 | if (ret <= 0) | 701 | if (ret <= 0) |
639 | dev_err(disk_to_dev(nbd->disk), | 702 | dev_err(disk_to_dev(nbd->disk), |
640 | "Send disconnect failed %d\n", ret); | 703 | "Send disconnect failed %d\n", ret); |