aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2008-03-04 17:29:29 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-03-04 19:35:17 -0500
commita35e63efa1fb18c6f20f38e3ddf3f8ffbcf0f6e7 (patch)
tree8dddd54c45ebaad84a6178765d29d9536df944d1
parent466634488e80968f12e73dd1fe6af5c37a1fbfe2 (diff)
md: fix deadlock in md/raid1 and md/raid10 when handling a read error
When handling a read error, we freeze the array to stop any other IO while attempting to over-write with correct data. This is done in the raid1d(raid10d) thread and must wait for all submitted IO to complete (except for requests that failed and are sitting in the retry queue - these are counted in ->nr_queue and will stay there during a freeze). However write requests need attention from raid1d as bitmap updates might be required. This can cause a deadlock as raid1 is waiting for requests to finish that themselves need attention from raid1d. So we create a new function 'flush_pending_writes' to give that attention, and call it in freeze_array to be sure that we aren't waiting on raid1d. Thanks to "K.Tanaka" <k-tanaka@ce.jp.nec.com> for finding and reporting this problem. Cc: "K.Tanaka" <k-tanaka@ce.jp.nec.com> Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/md/raid1.c62
-rw-r--r--drivers/md/raid10.c62
2 files changed, 81 insertions, 43 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 5c7fef091cec..38f076a3400d 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -592,6 +592,37 @@ static int raid1_congested(void *data, int bits)
592} 592}
593 593
594 594
595static int flush_pending_writes(conf_t *conf)
596{
597 /* Any writes that have been queued but are awaiting
598 * bitmap updates get flushed here.
599 * We return 1 if any requests were actually submitted.
600 */
601 int rv = 0;
602
603 spin_lock_irq(&conf->device_lock);
604
605 if (conf->pending_bio_list.head) {
606 struct bio *bio;
607 bio = bio_list_get(&conf->pending_bio_list);
608 blk_remove_plug(conf->mddev->queue);
609 spin_unlock_irq(&conf->device_lock);
610 /* flush any pending bitmap writes to
611 * disk before proceeding w/ I/O */
612 bitmap_unplug(conf->mddev->bitmap);
613
614 while (bio) { /* submit pending writes */
615 struct bio *next = bio->bi_next;
616 bio->bi_next = NULL;
617 generic_make_request(bio);
618 bio = next;
619 }
620 rv = 1;
621 } else
622 spin_unlock_irq(&conf->device_lock);
623 return rv;
624}
625
595/* Barriers.... 626/* Barriers....
596 * Sometimes we need to suspend IO while we do something else, 627 * Sometimes we need to suspend IO while we do something else,
597 * either some resync/recovery, or reconfigure the array. 628 * either some resync/recovery, or reconfigure the array.
@@ -681,7 +712,8 @@ static void freeze_array(conf_t *conf)
681 wait_event_lock_irq(conf->wait_barrier, 712 wait_event_lock_irq(conf->wait_barrier,
682 conf->barrier+conf->nr_pending == conf->nr_queued+2, 713 conf->barrier+conf->nr_pending == conf->nr_queued+2,
683 conf->resync_lock, 714 conf->resync_lock,
684 raid1_unplug(conf->mddev->queue)); 715 ({ flush_pending_writes(conf);
716 raid1_unplug(conf->mddev->queue); }));
685 spin_unlock_irq(&conf->resync_lock); 717 spin_unlock_irq(&conf->resync_lock);
686} 718}
687static void unfreeze_array(conf_t *conf) 719static void unfreeze_array(conf_t *conf)
@@ -907,6 +939,9 @@ static int make_request(struct request_queue *q, struct bio * bio)
907 blk_plug_device(mddev->queue); 939 blk_plug_device(mddev->queue);
908 spin_unlock_irqrestore(&conf->device_lock, flags); 940 spin_unlock_irqrestore(&conf->device_lock, flags);
909 941
942 /* In case raid1d snuck into freeze_array */
943 wake_up(&conf->wait_barrier);
944
910 if (do_sync) 945 if (do_sync)
911 md_wakeup_thread(mddev->thread); 946 md_wakeup_thread(mddev->thread);
912#if 0 947#if 0
@@ -1473,28 +1508,14 @@ static void raid1d(mddev_t *mddev)
1473 1508
1474 for (;;) { 1509 for (;;) {
1475 char b[BDEVNAME_SIZE]; 1510 char b[BDEVNAME_SIZE];
1476 spin_lock_irqsave(&conf->device_lock, flags);
1477
1478 if (conf->pending_bio_list.head) {
1479 bio = bio_list_get(&conf->pending_bio_list);
1480 blk_remove_plug(mddev->queue);
1481 spin_unlock_irqrestore(&conf->device_lock, flags);
1482 /* flush any pending bitmap writes to disk before proceeding w/ I/O */
1483 bitmap_unplug(mddev->bitmap);
1484 1511
1485 while (bio) { /* submit pending writes */ 1512 unplug += flush_pending_writes(conf);
1486 struct bio *next = bio->bi_next;
1487 bio->bi_next = NULL;
1488 generic_make_request(bio);
1489 bio = next;
1490 }
1491 unplug = 1;
1492 1513
1493 continue; 1514 spin_lock_irqsave(&conf->device_lock, flags);
1494 } 1515 if (list_empty(head)) {
1495 1516 spin_unlock_irqrestore(&conf->device_lock, flags);
1496 if (list_empty(head))
1497 break; 1517 break;
1518 }
1498 r1_bio = list_entry(head->prev, r1bio_t, retry_list); 1519 r1_bio = list_entry(head->prev, r1bio_t, retry_list);
1499 list_del(head->prev); 1520 list_del(head->prev);
1500 conf->nr_queued--; 1521 conf->nr_queued--;
@@ -1590,7 +1611,6 @@ static void raid1d(mddev_t *mddev)
1590 } 1611 }
1591 } 1612 }
1592 } 1613 }
1593 spin_unlock_irqrestore(&conf->device_lock, flags);
1594 if (unplug) 1614 if (unplug)
1595 unplug_slaves(mddev); 1615 unplug_slaves(mddev);
1596} 1616}
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 017f58113c33..5de42d87bf4e 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -629,7 +629,36 @@ static int raid10_congested(void *data, int bits)
629 return ret; 629 return ret;
630} 630}
631 631
632 632static int flush_pending_writes(conf_t *conf)
633{
634 /* Any writes that have been queued but are awaiting
635 * bitmap updates get flushed here.
636 * We return 1 if any requests were actually submitted.
637 */
638 int rv = 0;
639
640 spin_lock_irq(&conf->device_lock);
641
642 if (conf->pending_bio_list.head) {
643 struct bio *bio;
644 bio = bio_list_get(&conf->pending_bio_list);
645 blk_remove_plug(conf->mddev->queue);
646 spin_unlock_irq(&conf->device_lock);
647 /* flush any pending bitmap writes to disk
648 * before proceeding w/ I/O */
649 bitmap_unplug(conf->mddev->bitmap);
650
651 while (bio) { /* submit pending writes */
652 struct bio *next = bio->bi_next;
653 bio->bi_next = NULL;
654 generic_make_request(bio);
655 bio = next;
656 }
657 rv = 1;
658 } else
659 spin_unlock_irq(&conf->device_lock);
660 return rv;
661}
633/* Barriers.... 662/* Barriers....
634 * Sometimes we need to suspend IO while we do something else, 663 * Sometimes we need to suspend IO while we do something else,
635 * either some resync/recovery, or reconfigure the array. 664 * either some resync/recovery, or reconfigure the array.
@@ -720,7 +749,8 @@ static void freeze_array(conf_t *conf)
720 wait_event_lock_irq(conf->wait_barrier, 749 wait_event_lock_irq(conf->wait_barrier,
721 conf->barrier+conf->nr_pending == conf->nr_queued+2, 750 conf->barrier+conf->nr_pending == conf->nr_queued+2,
722 conf->resync_lock, 751 conf->resync_lock,
723 raid10_unplug(conf->mddev->queue)); 752 ({ flush_pending_writes(conf);
753 raid10_unplug(conf->mddev->queue); }));
724 spin_unlock_irq(&conf->resync_lock); 754 spin_unlock_irq(&conf->resync_lock);
725} 755}
726 756
@@ -892,6 +922,9 @@ static int make_request(struct request_queue *q, struct bio * bio)
892 blk_plug_device(mddev->queue); 922 blk_plug_device(mddev->queue);
893 spin_unlock_irqrestore(&conf->device_lock, flags); 923 spin_unlock_irqrestore(&conf->device_lock, flags);
894 924
925 /* In case raid10d snuck in to freeze_array */
926 wake_up(&conf->wait_barrier);
927
895 if (do_sync) 928 if (do_sync)
896 md_wakeup_thread(mddev->thread); 929 md_wakeup_thread(mddev->thread);
897 930
@@ -1464,28 +1497,14 @@ static void raid10d(mddev_t *mddev)
1464 1497
1465 for (;;) { 1498 for (;;) {
1466 char b[BDEVNAME_SIZE]; 1499 char b[BDEVNAME_SIZE];
1467 spin_lock_irqsave(&conf->device_lock, flags);
1468 1500
1469 if (conf->pending_bio_list.head) { 1501 unplug += flush_pending_writes(conf);
1470 bio = bio_list_get(&conf->pending_bio_list);
1471 blk_remove_plug(mddev->queue);
1472 spin_unlock_irqrestore(&conf->device_lock, flags);
1473 /* flush any pending bitmap writes to disk before proceeding w/ I/O */
1474 bitmap_unplug(mddev->bitmap);
1475 1502
1476 while (bio) { /* submit pending writes */ 1503 spin_lock_irqsave(&conf->device_lock, flags);
1477 struct bio *next = bio->bi_next; 1504 if (list_empty(head)) {
1478 bio->bi_next = NULL; 1505 spin_unlock_irqrestore(&conf->device_lock, flags);
1479 generic_make_request(bio);
1480 bio = next;
1481 }
1482 unplug = 1;
1483
1484 continue;
1485 }
1486
1487 if (list_empty(head))
1488 break; 1506 break;
1507 }
1489 r10_bio = list_entry(head->prev, r10bio_t, retry_list); 1508 r10_bio = list_entry(head->prev, r10bio_t, retry_list);
1490 list_del(head->prev); 1509 list_del(head->prev);
1491 conf->nr_queued--; 1510 conf->nr_queued--;
@@ -1548,7 +1567,6 @@ static void raid10d(mddev_t *mddev)
1548 } 1567 }
1549 } 1568 }
1550 } 1569 }
1551 spin_unlock_irqrestore(&conf->device_lock, flags);
1552 if (unplug) 1570 if (unplug)
1553 unplug_slaves(mddev); 1571 unplug_slaves(mddev);
1554} 1572}