aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid10.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r--drivers/md/raid10.c87
1 files changed, 60 insertions, 27 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 017f58113c33..32389d2f18fc 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -537,7 +537,8 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
537 current_distance = abs(r10_bio->devs[slot].addr - 537 current_distance = abs(r10_bio->devs[slot].addr -
538 conf->mirrors[disk].head_position); 538 conf->mirrors[disk].head_position);
539 539
540 /* Find the disk whose head is closest */ 540 /* Find the disk whose head is closest,
541 * or - for far > 1 - find the closest to partition beginning */
541 542
542 for (nslot = slot; nslot < conf->copies; nslot++) { 543 for (nslot = slot; nslot < conf->copies; nslot++) {
543 int ndisk = r10_bio->devs[nslot].devnum; 544 int ndisk = r10_bio->devs[nslot].devnum;
@@ -557,8 +558,13 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
557 slot = nslot; 558 slot = nslot;
558 break; 559 break;
559 } 560 }
560 new_distance = abs(r10_bio->devs[nslot].addr - 561
561 conf->mirrors[ndisk].head_position); 562 /* for far > 1 always use the lowest address */
563 if (conf->far_copies > 1)
564 new_distance = r10_bio->devs[nslot].addr;
565 else
566 new_distance = abs(r10_bio->devs[nslot].addr -
567 conf->mirrors[ndisk].head_position);
562 if (new_distance < current_distance) { 568 if (new_distance < current_distance) {
563 current_distance = new_distance; 569 current_distance = new_distance;
564 disk = ndisk; 570 disk = ndisk;
@@ -629,7 +635,36 @@ static int raid10_congested(void *data, int bits)
629 return ret; 635 return ret;
630} 636}
631 637
632 638static int flush_pending_writes(conf_t *conf)
639{
640 /* Any writes that have been queued but are awaiting
641 * bitmap updates get flushed here.
642 * We return 1 if any requests were actually submitted.
643 */
644 int rv = 0;
645
646 spin_lock_irq(&conf->device_lock);
647
648 if (conf->pending_bio_list.head) {
649 struct bio *bio;
650 bio = bio_list_get(&conf->pending_bio_list);
651 blk_remove_plug(conf->mddev->queue);
652 spin_unlock_irq(&conf->device_lock);
653 /* flush any pending bitmap writes to disk
654 * before proceeding w/ I/O */
655 bitmap_unplug(conf->mddev->bitmap);
656
657 while (bio) { /* submit pending writes */
658 struct bio *next = bio->bi_next;
659 bio->bi_next = NULL;
660 generic_make_request(bio);
661 bio = next;
662 }
663 rv = 1;
664 } else
665 spin_unlock_irq(&conf->device_lock);
666 return rv;
667}
633/* Barriers.... 668/* Barriers....
634 * Sometimes we need to suspend IO while we do something else, 669 * Sometimes we need to suspend IO while we do something else,
635 * either some resync/recovery, or reconfigure the array. 670 * either some resync/recovery, or reconfigure the array.
@@ -712,15 +747,23 @@ static void freeze_array(conf_t *conf)
712 /* stop syncio and normal IO and wait for everything to 747 /* stop syncio and normal IO and wait for everything to
713 * go quiet. 748 * go quiet.
714 * We increment barrier and nr_waiting, and then 749 * We increment barrier and nr_waiting, and then
715 * wait until barrier+nr_pending match nr_queued+2 750 * wait until nr_pending match nr_queued+1
751 * This is called in the context of one normal IO request
752 * that has failed. Thus any sync request that might be pending
753 * will be blocked by nr_pending, and we need to wait for
754 * pending IO requests to complete or be queued for re-try.
755 * Thus the number queued (nr_queued) plus this request (1)
756 * must match the number of pending IOs (nr_pending) before
757 * we continue.
716 */ 758 */
717 spin_lock_irq(&conf->resync_lock); 759 spin_lock_irq(&conf->resync_lock);
718 conf->barrier++; 760 conf->barrier++;
719 conf->nr_waiting++; 761 conf->nr_waiting++;
720 wait_event_lock_irq(conf->wait_barrier, 762 wait_event_lock_irq(conf->wait_barrier,
721 conf->barrier+conf->nr_pending == conf->nr_queued+2, 763 conf->nr_pending == conf->nr_queued+1,
722 conf->resync_lock, 764 conf->resync_lock,
723 raid10_unplug(conf->mddev->queue)); 765 ({ flush_pending_writes(conf);
766 raid10_unplug(conf->mddev->queue); }));
724 spin_unlock_irq(&conf->resync_lock); 767 spin_unlock_irq(&conf->resync_lock);
725} 768}
726 769
@@ -892,6 +935,9 @@ static int make_request(struct request_queue *q, struct bio * bio)
892 blk_plug_device(mddev->queue); 935 blk_plug_device(mddev->queue);
893 spin_unlock_irqrestore(&conf->device_lock, flags); 936 spin_unlock_irqrestore(&conf->device_lock, flags);
894 937
938 /* In case raid10d snuck in to freeze_array */
939 wake_up(&conf->wait_barrier);
940
895 if (do_sync) 941 if (do_sync)
896 md_wakeup_thread(mddev->thread); 942 md_wakeup_thread(mddev->thread);
897 943
@@ -1464,28 +1510,14 @@ static void raid10d(mddev_t *mddev)
1464 1510
1465 for (;;) { 1511 for (;;) {
1466 char b[BDEVNAME_SIZE]; 1512 char b[BDEVNAME_SIZE];
1467 spin_lock_irqsave(&conf->device_lock, flags);
1468 1513
1469 if (conf->pending_bio_list.head) { 1514 unplug += flush_pending_writes(conf);
1470 bio = bio_list_get(&conf->pending_bio_list);
1471 blk_remove_plug(mddev->queue);
1472 spin_unlock_irqrestore(&conf->device_lock, flags);
1473 /* flush any pending bitmap writes to disk before proceeding w/ I/O */
1474 bitmap_unplug(mddev->bitmap);
1475
1476 while (bio) { /* submit pending writes */
1477 struct bio *next = bio->bi_next;
1478 bio->bi_next = NULL;
1479 generic_make_request(bio);
1480 bio = next;
1481 }
1482 unplug = 1;
1483
1484 continue;
1485 }
1486 1515
1487 if (list_empty(head)) 1516 spin_lock_irqsave(&conf->device_lock, flags);
1517 if (list_empty(head)) {
1518 spin_unlock_irqrestore(&conf->device_lock, flags);
1488 break; 1519 break;
1520 }
1489 r10_bio = list_entry(head->prev, r10bio_t, retry_list); 1521 r10_bio = list_entry(head->prev, r10bio_t, retry_list);
1490 list_del(head->prev); 1522 list_del(head->prev);
1491 conf->nr_queued--; 1523 conf->nr_queued--;
@@ -1548,7 +1580,6 @@ static void raid10d(mddev_t *mddev)
1548 } 1580 }
1549 } 1581 }
1550 } 1582 }
1551 spin_unlock_irqrestore(&conf->device_lock, flags);
1552 if (unplug) 1583 if (unplug)
1553 unplug_slaves(mddev); 1584 unplug_slaves(mddev);
1554} 1585}
@@ -1787,6 +1818,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1787 if (j == conf->copies) { 1818 if (j == conf->copies) {
1788 /* Cannot recover, so abort the recovery */ 1819 /* Cannot recover, so abort the recovery */
1789 put_buf(r10_bio); 1820 put_buf(r10_bio);
1821 if (rb2)
1822 atomic_dec(&rb2->remaining);
1790 r10_bio = rb2; 1823 r10_bio = rb2;
1791 if (!test_and_set_bit(MD_RECOVERY_ERR, &mddev->recovery)) 1824 if (!test_and_set_bit(MD_RECOVERY_ERR, &mddev->recovery))
1792 printk(KERN_INFO "raid10: %s: insufficient working devices for recovery.\n", 1825 printk(KERN_INFO "raid10: %s: insufficient working devices for recovery.\n",