aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJosef Bacik <josef@toxicpanda.com>2017-04-06 17:01:57 -0400
committerJens Axboe <axboe@fb.com>2017-04-17 11:58:42 -0400
commitf3733247ae7c5fc0fa4c7303438f9e18a6ebb5ec (patch)
tree4742034116698892b2da029876df3342372531f4
parent9b1355d5e3f094ef671033f38c4357cd1455c571 (diff)
nbd: handle single path failures gracefully
Currently if we have multiple connections and one of them goes down we will tear down the whole device. However there's no reason we need to do this as we could have other connections that are working fine. Deal with this by keeping track of the state of the different connections, and if we lose one we mark it as dead and send all IO destined for that socket to one of the other healthy sockets. Any outstanding requests that were on the dead socket will timeout and be re-submitted properly. Signed-off-by: Josef Bacik <jbacik@fb.com> Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r--drivers/block/nbd.c151
1 files changed, 125 insertions, 26 deletions
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 9dcd5ddce94f..7044ef76a19d 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -49,6 +49,8 @@ struct nbd_sock {
49 struct mutex tx_lock; 49 struct mutex tx_lock;
50 struct request *pending; 50 struct request *pending;
51 int sent; 51 int sent;
52 bool dead;
53 int fallback_index;
52}; 54};
53 55
54#define NBD_TIMEDOUT 0 56#define NBD_TIMEDOUT 0
@@ -82,6 +84,7 @@ struct nbd_device {
82 84
83struct nbd_cmd { 85struct nbd_cmd {
84 struct nbd_device *nbd; 86 struct nbd_device *nbd;
87 int index;
85 struct completion send_complete; 88 struct completion send_complete;
86}; 89};
87 90
@@ -124,6 +127,15 @@ static const char *nbdcmd_to_ascii(int cmd)
124 return "invalid"; 127 return "invalid";
125} 128}
126 129
130static void nbd_mark_nsock_dead(struct nbd_sock *nsock)
131{
132 if (!nsock->dead)
133 kernel_sock_shutdown(nsock->sock, SHUT_RDWR);
134 nsock->dead = true;
135 nsock->pending = NULL;
136 nsock->sent = 0;
137}
138
127static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev) 139static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev)
128{ 140{
129 if (bdev->bd_openers <= 1) 141 if (bdev->bd_openers <= 1)
@@ -191,7 +203,31 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
191 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); 203 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
192 struct nbd_device *nbd = cmd->nbd; 204 struct nbd_device *nbd = cmd->nbd;
193 205
194 dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n"); 206 if (nbd->num_connections > 1) {
207 dev_err_ratelimited(nbd_to_dev(nbd),
208 "Connection timed out, retrying\n");
209 mutex_lock(&nbd->config_lock);
210 /*
211 * Hooray we have more connections, requeue this IO, the submit
212 * path will put it on a real connection.
213 */
214 if (nbd->socks && nbd->num_connections > 1) {
215 if (cmd->index < nbd->num_connections) {
216 struct nbd_sock *nsock =
217 nbd->socks[cmd->index];
218 mutex_lock(&nsock->tx_lock);
219 nbd_mark_nsock_dead(nsock);
220 mutex_unlock(&nsock->tx_lock);
221 }
222 mutex_unlock(&nbd->config_lock);
223 blk_mq_requeue_request(req, true);
224 return BLK_EH_NOT_HANDLED;
225 }
226 mutex_unlock(&nbd->config_lock);
227 } else {
228 dev_err_ratelimited(nbd_to_dev(nbd),
229 "Connection timed out\n");
230 }
195 set_bit(NBD_TIMEDOUT, &nbd->runtime_flags); 231 set_bit(NBD_TIMEDOUT, &nbd->runtime_flags);
196 req->errors = -EIO; 232 req->errors = -EIO;
197 233
@@ -301,6 +337,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
301 } 337 }
302 iov_iter_advance(&from, sent); 338 iov_iter_advance(&from, sent);
303 } 339 }
340 cmd->index = index;
304 request.type = htonl(type); 341 request.type = htonl(type);
305 if (type != NBD_CMD_FLUSH) { 342 if (type != NBD_CMD_FLUSH) {
306 request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); 343 request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
@@ -328,7 +365,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
328 } 365 }
329 dev_err_ratelimited(disk_to_dev(nbd->disk), 366 dev_err_ratelimited(disk_to_dev(nbd->disk),
330 "Send control failed (result %d)\n", result); 367 "Send control failed (result %d)\n", result);
331 return -EIO; 368 return -EAGAIN;
332 } 369 }
333send_pages: 370send_pages:
334 if (type != NBD_CMD_WRITE) 371 if (type != NBD_CMD_WRITE)
@@ -370,7 +407,7 @@ send_pages:
370 dev_err(disk_to_dev(nbd->disk), 407 dev_err(disk_to_dev(nbd->disk),
371 "Send data failed (result %d)\n", 408 "Send data failed (result %d)\n",
372 result); 409 result);
373 return -EIO; 410 return -EAGAIN;
374 } 411 }
375 /* 412 /*
376 * The completion might already have come in, 413 * The completion might already have come in,
@@ -389,6 +426,12 @@ out:
389 return 0; 426 return 0;
390} 427}
391 428
429static int nbd_disconnected(struct nbd_device *nbd)
430{
431 return test_bit(NBD_DISCONNECTED, &nbd->runtime_flags) ||
432 test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags);
433}
434
392/* NULL returned = something went wrong, inform userspace */ 435/* NULL returned = something went wrong, inform userspace */
393static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) 436static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
394{ 437{
@@ -405,8 +448,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
405 iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply)); 448 iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply));
406 result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); 449 result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
407 if (result <= 0) { 450 if (result <= 0) {
408 if (!test_bit(NBD_DISCONNECTED, &nbd->runtime_flags) && 451 if (!nbd_disconnected(nbd))
409 !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
410 dev_err(disk_to_dev(nbd->disk), 452 dev_err(disk_to_dev(nbd->disk),
411 "Receive control failed (result %d)\n", result); 453 "Receive control failed (result %d)\n", result);
412 return ERR_PTR(result); 454 return ERR_PTR(result);
@@ -449,8 +491,19 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
449 if (result <= 0) { 491 if (result <= 0) {
450 dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", 492 dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
451 result); 493 result);
452 req->errors = -EIO; 494 /*
453 return cmd; 495 * If we've disconnected or we only have 1
496 * connection then we need to make sure we
497 * complete this request, otherwise error out
498 * and let the timeout stuff handle resubmitting
499 * this request onto another connection.
500 */
501 if (nbd_disconnected(nbd) ||
502 nbd->num_connections <= 1) {
503 req->errors = -EIO;
504 return cmd;
505 }
506 return ERR_PTR(-EIO);
454 } 507 }
455 dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n", 508 dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n",
456 cmd, bvec.bv_len); 509 cmd, bvec.bv_len);
@@ -495,19 +548,17 @@ static void recv_work(struct work_struct *work)
495 while (1) { 548 while (1) {
496 cmd = nbd_read_stat(nbd, args->index); 549 cmd = nbd_read_stat(nbd, args->index);
497 if (IS_ERR(cmd)) { 550 if (IS_ERR(cmd)) {
551 struct nbd_sock *nsock = nbd->socks[args->index];
552
553 mutex_lock(&nsock->tx_lock);
554 nbd_mark_nsock_dead(nsock);
555 mutex_unlock(&nsock->tx_lock);
498 ret = PTR_ERR(cmd); 556 ret = PTR_ERR(cmd);
499 break; 557 break;
500 } 558 }
501 559
502 nbd_end_request(cmd); 560 nbd_end_request(cmd);
503 } 561 }
504
505 /*
506 * We got an error, shut everybody down if this wasn't the result of a
507 * disconnect request.
508 */
509 if (ret && !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
510 sock_shutdown(nbd);
511 atomic_dec(&nbd->recv_threads); 562 atomic_dec(&nbd->recv_threads);
512 wake_up(&nbd->recv_wq); 563 wake_up(&nbd->recv_wq);
513} 564}
@@ -531,6 +582,47 @@ static void nbd_clear_que(struct nbd_device *nbd)
531 dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n"); 582 dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n");
532} 583}
533 584
585static int find_fallback(struct nbd_device *nbd, int index)
586{
587 int new_index = -1;
588 struct nbd_sock *nsock = nbd->socks[index];
589 int fallback = nsock->fallback_index;
590
591 if (test_bit(NBD_DISCONNECTED, &nbd->runtime_flags))
592 return new_index;
593
594 if (nbd->num_connections <= 1) {
595 dev_err_ratelimited(disk_to_dev(nbd->disk),
596 "Attempted send on invalid socket\n");
597 return new_index;
598 }
599
600 if (fallback >= 0 && fallback < nbd->num_connections &&
601 !nbd->socks[fallback]->dead)
602 return fallback;
603
604 if (nsock->fallback_index < 0 ||
605 nsock->fallback_index >= nbd->num_connections ||
606 nbd->socks[nsock->fallback_index]->dead) {
607 int i;
608 for (i = 0; i < nbd->num_connections; i++) {
609 if (i == index)
610 continue;
611 if (!nbd->socks[i]->dead) {
612 new_index = i;
613 break;
614 }
615 }
616 nsock->fallback_index = new_index;
617 if (new_index < 0) {
618 dev_err_ratelimited(disk_to_dev(nbd->disk),
619 "Dead connection, failed to find a fallback\n");
620 return new_index;
621 }
622 }
623 new_index = nsock->fallback_index;
624 return new_index;
625}
534 626
535static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) 627static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
536{ 628{
@@ -544,22 +636,16 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
544 "Attempted send on invalid socket\n"); 636 "Attempted send on invalid socket\n");
545 return -EINVAL; 637 return -EINVAL;
546 } 638 }
547
548 if (test_bit(NBD_DISCONNECTED, &nbd->runtime_flags)) {
549 dev_err_ratelimited(disk_to_dev(nbd->disk),
550 "Attempted send on closed socket\n");
551 return -EINVAL;
552 }
553
554 req->errors = 0; 639 req->errors = 0;
555 640again:
556 nsock = nbd->socks[index]; 641 nsock = nbd->socks[index];
557 mutex_lock(&nsock->tx_lock); 642 mutex_lock(&nsock->tx_lock);
558 if (unlikely(!nsock->sock)) { 643 if (nsock->dead) {
644 index = find_fallback(nbd, index);
559 mutex_unlock(&nsock->tx_lock); 645 mutex_unlock(&nsock->tx_lock);
560 dev_err_ratelimited(disk_to_dev(nbd->disk), 646 if (index < 0)
561 "Attempted send on closed socket\n"); 647 return -EIO;
562 return -EINVAL; 648 goto again;
563 } 649 }
564 650
565 /* Handle the case that we have a pending request that was partially 651 /* Handle the case that we have a pending request that was partially
@@ -572,7 +658,18 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
572 ret = 0; 658 ret = 0;
573 goto out; 659 goto out;
574 } 660 }
661 /*
662 * Some failures are related to the link going down, so anything that
663 * returns EAGAIN can be retried on a different socket.
664 */
575 ret = nbd_send_cmd(nbd, cmd, index); 665 ret = nbd_send_cmd(nbd, cmd, index);
666 if (ret == -EAGAIN) {
667 dev_err_ratelimited(disk_to_dev(nbd->disk),
668 "Request send failed trying another connection\n");
669 nbd_mark_nsock_dead(nsock);
670 mutex_unlock(&nsock->tx_lock);
671 goto again;
672 }
576out: 673out:
577 mutex_unlock(&nsock->tx_lock); 674 mutex_unlock(&nsock->tx_lock);
578 return ret; 675 return ret;
@@ -646,6 +743,8 @@ static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev,
646 743
647 nbd->socks = socks; 744 nbd->socks = socks;
648 745
746 nsock->fallback_index = -1;
747 nsock->dead = false;
649 mutex_init(&nsock->tx_lock); 748 mutex_init(&nsock->tx_lock);
650 nsock->sock = sock; 749 nsock->sock = sock;
651 nsock->pending = NULL; 750 nsock->pending = NULL;