aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJosef Bacik <jbacik@fb.com>2017-03-24 14:08:26 -0400
committerJens Axboe <axboe@fb.com>2017-03-24 17:42:47 -0400
commit9dd5d3ab49f74e1a3fab92c432a7600bd9081ccc (patch)
treeab7de5bd26fc932bed7ee9d3dd2610197ce98e16
parent93efe9817e651607d83e5f100076ae62d0ce0b93 (diff)
nbd: handle ERESTARTSYS properly
We can submit IO in a processes context, which means there can be pending signals. This isn't a fatal error for NBD, but it does require some finesse. If the signal happens before we transmit anything then we are ok, just requeue the request and carry on. However if we've done a partial transmit we can't allow anything else to be transmitted on this socket until we transmit the remaining part of the request. Deal with this by keeping track of how much we've sent for the current request, and if we get an ERESTARTSYS during any part of our transmission save the state of that request and requeue the IO. If anybody tries to submit a request that isn't our pending request then requeue that request until we are able to service the one that is pending. Signed-off-by: Josef Bacik <jbacik@fb.com> Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r--drivers/block/nbd.c115
1 files changed, 89 insertions, 26 deletions
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 7e4287bc19e5..3d1fc37a83b1 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -47,6 +47,8 @@ static DEFINE_MUTEX(nbd_index_mutex);
47struct nbd_sock { 47struct nbd_sock {
48 struct socket *sock; 48 struct socket *sock;
49 struct mutex tx_lock; 49 struct mutex tx_lock;
50 struct request *pending;
51 int sent;
50}; 52};
51 53
52#define NBD_TIMEDOUT 0 54#define NBD_TIMEDOUT 0
@@ -202,7 +204,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
202 * Send or receive packet. 204 * Send or receive packet.
203 */ 205 */
204static int sock_xmit(struct nbd_device *nbd, int index, int send, 206static int sock_xmit(struct nbd_device *nbd, int index, int send,
205 struct iov_iter *iter, int msg_flags) 207 struct iov_iter *iter, int msg_flags, int *sent)
206{ 208{
207 struct socket *sock = nbd->socks[index]->sock; 209 struct socket *sock = nbd->socks[index]->sock;
208 int result; 210 int result;
@@ -237,6 +239,8 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send,
237 result = -EPIPE; /* short read */ 239 result = -EPIPE; /* short read */
238 break; 240 break;
239 } 241 }
242 if (sent)
243 *sent += result;
240 } while (msg_data_left(&msg)); 244 } while (msg_data_left(&msg));
241 245
242 tsk_restore_flags(current, pflags, PF_MEMALLOC); 246 tsk_restore_flags(current, pflags, PF_MEMALLOC);
@@ -248,6 +252,7 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send,
248static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) 252static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
249{ 253{
250 struct request *req = blk_mq_rq_from_pdu(cmd); 254 struct request *req = blk_mq_rq_from_pdu(cmd);
255 struct nbd_sock *nsock = nbd->socks[index];
251 int result; 256 int result;
252 struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)}; 257 struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)};
253 struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)}; 258 struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)};
@@ -256,6 +261,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
256 struct bio *bio; 261 struct bio *bio;
257 u32 type; 262 u32 type;
258 u32 tag = blk_mq_unique_tag(req); 263 u32 tag = blk_mq_unique_tag(req);
264 int sent = nsock->sent, skip = 0;
259 265
260 iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); 266 iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
261 267
@@ -283,6 +289,17 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
283 return -EIO; 289 return -EIO;
284 } 290 }
285 291
292 /* We did a partial send previously, and we at least sent the whole
293 * request struct, so just go and send the rest of the pages in the
294 * request.
295 */
296 if (sent) {
297 if (sent >= sizeof(request)) {
298 skip = sent - sizeof(request);
299 goto send_pages;
300 }
301 iov_iter_advance(&from, sent);
302 }
286 request.type = htonl(type); 303 request.type = htonl(type);
287 if (type != NBD_CMD_FLUSH) { 304 if (type != NBD_CMD_FLUSH) {
288 request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); 305 request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
@@ -294,15 +311,27 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
294 cmd, nbdcmd_to_ascii(type), 311 cmd, nbdcmd_to_ascii(type),
295 (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req)); 312 (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
296 result = sock_xmit(nbd, index, 1, &from, 313 result = sock_xmit(nbd, index, 1, &from,
297 (type == NBD_CMD_WRITE) ? MSG_MORE : 0); 314 (type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent);
298 if (result <= 0) { 315 if (result <= 0) {
316 if (result == -ERESTARTSYS) {
317 /* If we havne't sent anything we can just return BUSY,
318 * however if we have sent something we need to make
319 * sure we only allow this req to be sent until we are
320 * completely done.
321 */
322 if (sent) {
323 nsock->pending = req;
324 nsock->sent = sent;
325 }
326 return BLK_MQ_RQ_QUEUE_BUSY;
327 }
299 dev_err_ratelimited(disk_to_dev(nbd->disk), 328 dev_err_ratelimited(disk_to_dev(nbd->disk),
300 "Send control failed (result %d)\n", result); 329 "Send control failed (result %d)\n", result);
301 return -EIO; 330 return -EIO;
302 } 331 }
303 332send_pages:
304 if (type != NBD_CMD_WRITE) 333 if (type != NBD_CMD_WRITE)
305 return 0; 334 goto out;
306 335
307 bio = req->bio; 336 bio = req->bio;
308 while (bio) { 337 while (bio) {
@@ -318,8 +347,25 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
318 cmd, bvec.bv_len); 347 cmd, bvec.bv_len);
319 iov_iter_bvec(&from, ITER_BVEC | WRITE, 348 iov_iter_bvec(&from, ITER_BVEC | WRITE,
320 &bvec, 1, bvec.bv_len); 349 &bvec, 1, bvec.bv_len);
321 result = sock_xmit(nbd, index, 1, &from, flags); 350 if (skip) {
351 if (skip >= iov_iter_count(&from)) {
352 skip -= iov_iter_count(&from);
353 continue;
354 }
355 iov_iter_advance(&from, skip);
356 skip = 0;
357 }
358 result = sock_xmit(nbd, index, 1, &from, flags, &sent);
322 if (result <= 0) { 359 if (result <= 0) {
360 if (result == -ERESTARTSYS) {
361 /* We've already sent the header, we
362 * have no choice but to set pending and
363 * return BUSY.
364 */
365 nsock->pending = req;
366 nsock->sent = sent;
367 return BLK_MQ_RQ_QUEUE_BUSY;
368 }
323 dev_err(disk_to_dev(nbd->disk), 369 dev_err(disk_to_dev(nbd->disk),
324 "Send data failed (result %d)\n", 370 "Send data failed (result %d)\n",
325 result); 371 result);
@@ -336,6 +382,9 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
336 } 382 }
337 bio = next; 383 bio = next;
338 } 384 }
385out:
386 nsock->pending = NULL;
387 nsock->sent = 0;
339 return 0; 388 return 0;
340} 389}
341 390
@@ -353,7 +402,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
353 402
354 reply.magic = 0; 403 reply.magic = 0;
355 iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply)); 404 iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply));
356 result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL); 405 result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
357 if (result <= 0) { 406 if (result <= 0) {
358 if (!test_bit(NBD_DISCONNECTED, &nbd->runtime_flags) && 407 if (!test_bit(NBD_DISCONNECTED, &nbd->runtime_flags) &&
359 !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags)) 408 !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
@@ -395,7 +444,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
395 rq_for_each_segment(bvec, req, iter) { 444 rq_for_each_segment(bvec, req, iter) {
396 iov_iter_bvec(&to, ITER_BVEC | READ, 445 iov_iter_bvec(&to, ITER_BVEC | READ,
397 &bvec, 1, bvec.bv_len); 446 &bvec, 1, bvec.bv_len);
398 result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL); 447 result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
399 if (result <= 0) { 448 if (result <= 0) {
400 dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", 449 dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
401 result); 450 result);
@@ -482,22 +531,23 @@ static void nbd_clear_que(struct nbd_device *nbd)
482} 531}
483 532
484 533
485static void nbd_handle_cmd(struct nbd_cmd *cmd, int index) 534static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
486{ 535{
487 struct request *req = blk_mq_rq_from_pdu(cmd); 536 struct request *req = blk_mq_rq_from_pdu(cmd);
488 struct nbd_device *nbd = cmd->nbd; 537 struct nbd_device *nbd = cmd->nbd;
489 struct nbd_sock *nsock; 538 struct nbd_sock *nsock;
539 int ret;
490 540
491 if (index >= nbd->num_connections) { 541 if (index >= nbd->num_connections) {
492 dev_err_ratelimited(disk_to_dev(nbd->disk), 542 dev_err_ratelimited(disk_to_dev(nbd->disk),
493 "Attempted send on invalid socket\n"); 543 "Attempted send on invalid socket\n");
494 goto error_out; 544 return -EINVAL;
495 } 545 }
496 546
497 if (test_bit(NBD_DISCONNECTED, &nbd->runtime_flags)) { 547 if (test_bit(NBD_DISCONNECTED, &nbd->runtime_flags)) {
498 dev_err_ratelimited(disk_to_dev(nbd->disk), 548 dev_err_ratelimited(disk_to_dev(nbd->disk),
499 "Attempted send on closed socket\n"); 549 "Attempted send on closed socket\n");
500 goto error_out; 550 return -EINVAL;
501 } 551 }
502 552
503 req->errors = 0; 553 req->errors = 0;
@@ -508,29 +558,30 @@ static void nbd_handle_cmd(struct nbd_cmd *cmd, int index)
508 mutex_unlock(&nsock->tx_lock); 558 mutex_unlock(&nsock->tx_lock);
509 dev_err_ratelimited(disk_to_dev(nbd->disk), 559 dev_err_ratelimited(disk_to_dev(nbd->disk),
510 "Attempted send on closed socket\n"); 560 "Attempted send on closed socket\n");
511 goto error_out; 561 return -EINVAL;
512 } 562 }
513 563
514 if (nbd_send_cmd(nbd, cmd, index) != 0) { 564 /* Handle the case that we have a pending request that was partially
515 dev_err_ratelimited(disk_to_dev(nbd->disk), 565 * transmitted that _has_ to be serviced first. We need to call requeue
516 "Request send failed\n"); 566 * here so that it gets put _after_ the request that is already on the
517 req->errors++; 567 * dispatch list.
518 nbd_end_request(cmd); 568 */
569 if (unlikely(nsock->pending && nsock->pending != req)) {
570 blk_mq_requeue_request(req, true);
571 ret = 0;
572 goto out;
519 } 573 }
520 574 ret = nbd_send_cmd(nbd, cmd, index);
575out:
521 mutex_unlock(&nsock->tx_lock); 576 mutex_unlock(&nsock->tx_lock);
522 577 return ret;
523 return;
524
525error_out:
526 req->errors++;
527 nbd_end_request(cmd);
528} 578}
529 579
530static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx, 580static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
531 const struct blk_mq_queue_data *bd) 581 const struct blk_mq_queue_data *bd)
532{ 582{
533 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); 583 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
584 int ret;
534 585
535 /* 586 /*
536 * Since we look at the bio's to send the request over the network we 587 * Since we look at the bio's to send the request over the network we
@@ -543,10 +594,20 @@ static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
543 */ 594 */
544 init_completion(&cmd->send_complete); 595 init_completion(&cmd->send_complete);
545 blk_mq_start_request(bd->rq); 596 blk_mq_start_request(bd->rq);
546 nbd_handle_cmd(cmd, hctx->queue_num); 597
598 /* We can be called directly from the user space process, which means we
599 * could possibly have signals pending so our sendmsg will fail. In
600 * this case we need to return that we are busy, otherwise error out as
601 * appropriate.
602 */
603 ret = nbd_handle_cmd(cmd, hctx->queue_num);
604 if (ret < 0)
605 ret = BLK_MQ_RQ_QUEUE_ERROR;
606 if (!ret)
607 ret = BLK_MQ_RQ_QUEUE_OK;
547 complete(&cmd->send_complete); 608 complete(&cmd->send_complete);
548 609
549 return BLK_MQ_RQ_QUEUE_OK; 610 return ret;
550} 611}
551 612
552static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev, 613static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev,
@@ -581,6 +642,8 @@ static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev,
581 642
582 mutex_init(&nsock->tx_lock); 643 mutex_init(&nsock->tx_lock);
583 nsock->sock = sock; 644 nsock->sock = sock;
645 nsock->pending = NULL;
646 nsock->sent = 0;
584 socks[nbd->num_connections++] = nsock; 647 socks[nbd->num_connections++] = nsock;
585 648
586 if (max_part) 649 if (max_part)
@@ -634,7 +697,7 @@ static void send_disconnects(struct nbd_device *nbd)
634 697
635 for (i = 0; i < nbd->num_connections; i++) { 698 for (i = 0; i < nbd->num_connections; i++) {
636 iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); 699 iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
637 ret = sock_xmit(nbd, i, 1, &from, 0); 700 ret = sock_xmit(nbd, i, 1, &from, 0, NULL);
638 if (ret <= 0) 701 if (ret <= 0)
639 dev_err(disk_to_dev(nbd->disk), 702 dev_err(disk_to_dev(nbd->disk),
640 "Send disconnect failed %d\n", ret); 703 "Send disconnect failed %d\n", ret);