diff options
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r-- | drivers/md/raid10.c | 87 |
1 files changed, 60 insertions, 27 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 017f58113c33..32389d2f18fc 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -537,7 +537,8 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) | |||
537 | current_distance = abs(r10_bio->devs[slot].addr - | 537 | current_distance = abs(r10_bio->devs[slot].addr - |
538 | conf->mirrors[disk].head_position); | 538 | conf->mirrors[disk].head_position); |
539 | 539 | ||
540 | /* Find the disk whose head is closest */ | 540 | /* Find the disk whose head is closest, |
541 | * or - for far > 1 - find the closest to partition beginning */ | ||
541 | 542 | ||
542 | for (nslot = slot; nslot < conf->copies; nslot++) { | 543 | for (nslot = slot; nslot < conf->copies; nslot++) { |
543 | int ndisk = r10_bio->devs[nslot].devnum; | 544 | int ndisk = r10_bio->devs[nslot].devnum; |
@@ -557,8 +558,13 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) | |||
557 | slot = nslot; | 558 | slot = nslot; |
558 | break; | 559 | break; |
559 | } | 560 | } |
560 | new_distance = abs(r10_bio->devs[nslot].addr - | 561 | |
561 | conf->mirrors[ndisk].head_position); | 562 | /* for far > 1 always use the lowest address */ |
563 | if (conf->far_copies > 1) | ||
564 | new_distance = r10_bio->devs[nslot].addr; | ||
565 | else | ||
566 | new_distance = abs(r10_bio->devs[nslot].addr - | ||
567 | conf->mirrors[ndisk].head_position); | ||
562 | if (new_distance < current_distance) { | 568 | if (new_distance < current_distance) { |
563 | current_distance = new_distance; | 569 | current_distance = new_distance; |
564 | disk = ndisk; | 570 | disk = ndisk; |
@@ -629,7 +635,36 @@ static int raid10_congested(void *data, int bits) | |||
629 | return ret; | 635 | return ret; |
630 | } | 636 | } |
631 | 637 | ||
632 | 638 | static int flush_pending_writes(conf_t *conf) | |
639 | { | ||
640 | /* Any writes that have been queued but are awaiting | ||
641 | * bitmap updates get flushed here. | ||
642 | * We return 1 if any requests were actually submitted. | ||
643 | */ | ||
644 | int rv = 0; | ||
645 | |||
646 | spin_lock_irq(&conf->device_lock); | ||
647 | |||
648 | if (conf->pending_bio_list.head) { | ||
649 | struct bio *bio; | ||
650 | bio = bio_list_get(&conf->pending_bio_list); | ||
651 | blk_remove_plug(conf->mddev->queue); | ||
652 | spin_unlock_irq(&conf->device_lock); | ||
653 | /* flush any pending bitmap writes to disk | ||
654 | * before proceeding w/ I/O */ | ||
655 | bitmap_unplug(conf->mddev->bitmap); | ||
656 | |||
657 | while (bio) { /* submit pending writes */ | ||
658 | struct bio *next = bio->bi_next; | ||
659 | bio->bi_next = NULL; | ||
660 | generic_make_request(bio); | ||
661 | bio = next; | ||
662 | } | ||
663 | rv = 1; | ||
664 | } else | ||
665 | spin_unlock_irq(&conf->device_lock); | ||
666 | return rv; | ||
667 | } | ||
633 | /* Barriers.... | 668 | /* Barriers.... |
634 | * Sometimes we need to suspend IO while we do something else, | 669 | * Sometimes we need to suspend IO while we do something else, |
635 | * either some resync/recovery, or reconfigure the array. | 670 | * either some resync/recovery, or reconfigure the array. |
@@ -712,15 +747,23 @@ static void freeze_array(conf_t *conf) | |||
712 | /* stop syncio and normal IO and wait for everything to | 747 | /* stop syncio and normal IO and wait for everything to |
713 | * go quiet. | 748 | * go quiet. |
714 | * We increment barrier and nr_waiting, and then | 749 | * We increment barrier and nr_waiting, and then |
715 | * wait until barrier+nr_pending match nr_queued+2 | 750 | * wait until nr_pending match nr_queued+1 |
751 | * This is called in the context of one normal IO request | ||
752 | * that has failed. Thus any sync request that might be pending | ||
753 | * will be blocked by nr_pending, and we need to wait for | ||
754 | * pending IO requests to complete or be queued for re-try. | ||
755 | * Thus the number queued (nr_queued) plus this request (1) | ||
756 | * must match the number of pending IOs (nr_pending) before | ||
757 | * we continue. | ||
716 | */ | 758 | */ |
717 | spin_lock_irq(&conf->resync_lock); | 759 | spin_lock_irq(&conf->resync_lock); |
718 | conf->barrier++; | 760 | conf->barrier++; |
719 | conf->nr_waiting++; | 761 | conf->nr_waiting++; |
720 | wait_event_lock_irq(conf->wait_barrier, | 762 | wait_event_lock_irq(conf->wait_barrier, |
721 | conf->barrier+conf->nr_pending == conf->nr_queued+2, | 763 | conf->nr_pending == conf->nr_queued+1, |
722 | conf->resync_lock, | 764 | conf->resync_lock, |
723 | raid10_unplug(conf->mddev->queue)); | 765 | ({ flush_pending_writes(conf); |
766 | raid10_unplug(conf->mddev->queue); })); | ||
724 | spin_unlock_irq(&conf->resync_lock); | 767 | spin_unlock_irq(&conf->resync_lock); |
725 | } | 768 | } |
726 | 769 | ||
@@ -892,6 +935,9 @@ static int make_request(struct request_queue *q, struct bio * bio) | |||
892 | blk_plug_device(mddev->queue); | 935 | blk_plug_device(mddev->queue); |
893 | spin_unlock_irqrestore(&conf->device_lock, flags); | 936 | spin_unlock_irqrestore(&conf->device_lock, flags); |
894 | 937 | ||
938 | /* In case raid10d snuck in to freeze_array */ | ||
939 | wake_up(&conf->wait_barrier); | ||
940 | |||
895 | if (do_sync) | 941 | if (do_sync) |
896 | md_wakeup_thread(mddev->thread); | 942 | md_wakeup_thread(mddev->thread); |
897 | 943 | ||
@@ -1464,28 +1510,14 @@ static void raid10d(mddev_t *mddev) | |||
1464 | 1510 | ||
1465 | for (;;) { | 1511 | for (;;) { |
1466 | char b[BDEVNAME_SIZE]; | 1512 | char b[BDEVNAME_SIZE]; |
1467 | spin_lock_irqsave(&conf->device_lock, flags); | ||
1468 | 1513 | ||
1469 | if (conf->pending_bio_list.head) { | 1514 | unplug += flush_pending_writes(conf); |
1470 | bio = bio_list_get(&conf->pending_bio_list); | ||
1471 | blk_remove_plug(mddev->queue); | ||
1472 | spin_unlock_irqrestore(&conf->device_lock, flags); | ||
1473 | /* flush any pending bitmap writes to disk before proceeding w/ I/O */ | ||
1474 | bitmap_unplug(mddev->bitmap); | ||
1475 | |||
1476 | while (bio) { /* submit pending writes */ | ||
1477 | struct bio *next = bio->bi_next; | ||
1478 | bio->bi_next = NULL; | ||
1479 | generic_make_request(bio); | ||
1480 | bio = next; | ||
1481 | } | ||
1482 | unplug = 1; | ||
1483 | |||
1484 | continue; | ||
1485 | } | ||
1486 | 1515 | ||
1487 | if (list_empty(head)) | 1516 | spin_lock_irqsave(&conf->device_lock, flags); |
1517 | if (list_empty(head)) { | ||
1518 | spin_unlock_irqrestore(&conf->device_lock, flags); | ||
1488 | break; | 1519 | break; |
1520 | } | ||
1489 | r10_bio = list_entry(head->prev, r10bio_t, retry_list); | 1521 | r10_bio = list_entry(head->prev, r10bio_t, retry_list); |
1490 | list_del(head->prev); | 1522 | list_del(head->prev); |
1491 | conf->nr_queued--; | 1523 | conf->nr_queued--; |
@@ -1548,7 +1580,6 @@ static void raid10d(mddev_t *mddev) | |||
1548 | } | 1580 | } |
1549 | } | 1581 | } |
1550 | } | 1582 | } |
1551 | spin_unlock_irqrestore(&conf->device_lock, flags); | ||
1552 | if (unplug) | 1583 | if (unplug) |
1553 | unplug_slaves(mddev); | 1584 | unplug_slaves(mddev); |
1554 | } | 1585 | } |
@@ -1787,6 +1818,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1787 | if (j == conf->copies) { | 1818 | if (j == conf->copies) { |
1788 | /* Cannot recover, so abort the recovery */ | 1819 | /* Cannot recover, so abort the recovery */ |
1789 | put_buf(r10_bio); | 1820 | put_buf(r10_bio); |
1821 | if (rb2) | ||
1822 | atomic_dec(&rb2->remaining); | ||
1790 | r10_bio = rb2; | 1823 | r10_bio = rb2; |
1791 | if (!test_and_set_bit(MD_RECOVERY_ERR, &mddev->recovery)) | 1824 | if (!test_and_set_bit(MD_RECOVERY_ERR, &mddev->recovery)) |
1792 | printk(KERN_INFO "raid10: %s: insufficient working devices for recovery.\n", | 1825 | printk(KERN_INFO "raid10: %s: insufficient working devices for recovery.\n", |