diff options
author | Tomi Valkeinen <tomi.valkeinen@ti.com> | 2012-09-03 02:26:33 -0400 |
---|---|---|
committer | Tomi Valkeinen <tomi.valkeinen@ti.com> | 2012-09-03 02:26:33 -0400 |
commit | c50e86ce7c2961a41f2f7aa6e4fd6c99229ba205 (patch) | |
tree | 4ea36009719bd8fc523239fe1bdccb90f0dce3ae /drivers/md/raid1.c | |
parent | 14d33d384693eb6083396199de516fdef320f7af (diff) | |
parent | 4cbe5a555fa58a79b6ecbb6c531b8bab0650778d (diff) |
Merge tag 'v3.6-rc4'
Merge 3.6-rc4 to get latest OMAP and device tree fixes.
Diffstat (limited to 'drivers/md/raid1.c')
-rw-r--r-- | drivers/md/raid1.c | 244 |
1 files changed, 188 insertions, 56 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index a9c7981ddd24..611b5f797618 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -46,6 +46,20 @@ | |||
46 | */ | 46 | */ |
47 | #define NR_RAID1_BIOS 256 | 47 | #define NR_RAID1_BIOS 256 |
48 | 48 | ||
49 | /* when we get a read error on a read-only array, we redirect to another | ||
50 | * device without failing the first device, or trying to over-write to | ||
51 | * correct the read error. To keep track of bad blocks on a per-bio | ||
52 | * level, we store IO_BLOCKED in the appropriate 'bios' pointer | ||
53 | */ | ||
54 | #define IO_BLOCKED ((struct bio *)1) | ||
55 | /* When we successfully write to a known bad-block, we need to remove the | ||
56 | * bad-block marking which must be done from process context. So we record | ||
57 | * the success by setting devs[n].bio to IO_MADE_GOOD | ||
58 | */ | ||
59 | #define IO_MADE_GOOD ((struct bio *)2) | ||
60 | |||
61 | #define BIO_SPECIAL(bio) ((unsigned long)bio <= 2) | ||
62 | |||
49 | /* When there are this many requests queue to be written by | 63 | /* When there are this many requests queue to be written by |
50 | * the raid1 thread, we become 'congested' to provide back-pressure | 64 | * the raid1 thread, we become 'congested' to provide back-pressure |
51 | * for writeback. | 65 | * for writeback. |
@@ -483,12 +497,14 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
483 | const sector_t this_sector = r1_bio->sector; | 497 | const sector_t this_sector = r1_bio->sector; |
484 | int sectors; | 498 | int sectors; |
485 | int best_good_sectors; | 499 | int best_good_sectors; |
486 | int start_disk; | 500 | int best_disk, best_dist_disk, best_pending_disk; |
487 | int best_disk; | 501 | int has_nonrot_disk; |
488 | int i; | 502 | int disk; |
489 | sector_t best_dist; | 503 | sector_t best_dist; |
504 | unsigned int min_pending; | ||
490 | struct md_rdev *rdev; | 505 | struct md_rdev *rdev; |
491 | int choose_first; | 506 | int choose_first; |
507 | int choose_next_idle; | ||
492 | 508 | ||
493 | rcu_read_lock(); | 509 | rcu_read_lock(); |
494 | /* | 510 | /* |
@@ -499,26 +515,26 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
499 | retry: | 515 | retry: |
500 | sectors = r1_bio->sectors; | 516 | sectors = r1_bio->sectors; |
501 | best_disk = -1; | 517 | best_disk = -1; |
518 | best_dist_disk = -1; | ||
502 | best_dist = MaxSector; | 519 | best_dist = MaxSector; |
520 | best_pending_disk = -1; | ||
521 | min_pending = UINT_MAX; | ||
503 | best_good_sectors = 0; | 522 | best_good_sectors = 0; |
523 | has_nonrot_disk = 0; | ||
524 | choose_next_idle = 0; | ||
504 | 525 | ||
505 | if (conf->mddev->recovery_cp < MaxSector && | 526 | if (conf->mddev->recovery_cp < MaxSector && |
506 | (this_sector + sectors >= conf->next_resync)) { | 527 | (this_sector + sectors >= conf->next_resync)) |
507 | choose_first = 1; | 528 | choose_first = 1; |
508 | start_disk = 0; | 529 | else |
509 | } else { | ||
510 | choose_first = 0; | 530 | choose_first = 0; |
511 | start_disk = conf->last_used; | ||
512 | } | ||
513 | 531 | ||
514 | for (i = 0 ; i < conf->raid_disks * 2 ; i++) { | 532 | for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) { |
515 | sector_t dist; | 533 | sector_t dist; |
516 | sector_t first_bad; | 534 | sector_t first_bad; |
517 | int bad_sectors; | 535 | int bad_sectors; |
518 | 536 | unsigned int pending; | |
519 | int disk = start_disk + i; | 537 | bool nonrot; |
520 | if (disk >= conf->raid_disks) | ||
521 | disk -= conf->raid_disks; | ||
522 | 538 | ||
523 | rdev = rcu_dereference(conf->mirrors[disk].rdev); | 539 | rdev = rcu_dereference(conf->mirrors[disk].rdev); |
524 | if (r1_bio->bios[disk] == IO_BLOCKED | 540 | if (r1_bio->bios[disk] == IO_BLOCKED |
@@ -577,22 +593,77 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
577 | } else | 593 | } else |
578 | best_good_sectors = sectors; | 594 | best_good_sectors = sectors; |
579 | 595 | ||
596 | nonrot = blk_queue_nonrot(bdev_get_queue(rdev->bdev)); | ||
597 | has_nonrot_disk |= nonrot; | ||
598 | pending = atomic_read(&rdev->nr_pending); | ||
580 | dist = abs(this_sector - conf->mirrors[disk].head_position); | 599 | dist = abs(this_sector - conf->mirrors[disk].head_position); |
581 | if (choose_first | 600 | if (choose_first) { |
582 | /* Don't change to another disk for sequential reads */ | ||
583 | || conf->next_seq_sect == this_sector | ||
584 | || dist == 0 | ||
585 | /* If device is idle, use it */ | ||
586 | || atomic_read(&rdev->nr_pending) == 0) { | ||
587 | best_disk = disk; | 601 | best_disk = disk; |
588 | break; | 602 | break; |
589 | } | 603 | } |
604 | /* Don't change to another disk for sequential reads */ | ||
605 | if (conf->mirrors[disk].next_seq_sect == this_sector | ||
606 | || dist == 0) { | ||
607 | int opt_iosize = bdev_io_opt(rdev->bdev) >> 9; | ||
608 | struct raid1_info *mirror = &conf->mirrors[disk]; | ||
609 | |||
610 | best_disk = disk; | ||
611 | /* | ||
612 | * If buffered sequential IO size exceeds optimal | ||
613 | * iosize, check if there is idle disk. If yes, choose | ||
614 | * the idle disk. read_balance could already choose an | ||
615 | * idle disk before noticing it's a sequential IO in | ||
616 | * this disk. This doesn't matter because this disk | ||
617 | * will idle, next time it will be utilized after the | ||
618 | * first disk has IO size exceeds optimal iosize. In | ||
619 | * this way, iosize of the first disk will be optimal | ||
620 | * iosize at least. iosize of the second disk might be | ||
621 | * small, but not a big deal since when the second disk | ||
622 | * starts IO, the first disk is likely still busy. | ||
623 | */ | ||
624 | if (nonrot && opt_iosize > 0 && | ||
625 | mirror->seq_start != MaxSector && | ||
626 | mirror->next_seq_sect > opt_iosize && | ||
627 | mirror->next_seq_sect - opt_iosize >= | ||
628 | mirror->seq_start) { | ||
629 | choose_next_idle = 1; | ||
630 | continue; | ||
631 | } | ||
632 | break; | ||
633 | } | ||
634 | /* If device is idle, use it */ | ||
635 | if (pending == 0) { | ||
636 | best_disk = disk; | ||
637 | break; | ||
638 | } | ||
639 | |||
640 | if (choose_next_idle) | ||
641 | continue; | ||
642 | |||
643 | if (min_pending > pending) { | ||
644 | min_pending = pending; | ||
645 | best_pending_disk = disk; | ||
646 | } | ||
647 | |||
590 | if (dist < best_dist) { | 648 | if (dist < best_dist) { |
591 | best_dist = dist; | 649 | best_dist = dist; |
592 | best_disk = disk; | 650 | best_dist_disk = disk; |
593 | } | 651 | } |
594 | } | 652 | } |
595 | 653 | ||
654 | /* | ||
655 | * If all disks are rotational, choose the closest disk. If any disk is | ||
656 | * non-rotational, choose the disk with less pending request even the | ||
657 | * disk is rotational, which might/might not be optimal for raids with | ||
658 | * mixed ratation/non-rotational disks depending on workload. | ||
659 | */ | ||
660 | if (best_disk == -1) { | ||
661 | if (has_nonrot_disk) | ||
662 | best_disk = best_pending_disk; | ||
663 | else | ||
664 | best_disk = best_dist_disk; | ||
665 | } | ||
666 | |||
596 | if (best_disk >= 0) { | 667 | if (best_disk >= 0) { |
597 | rdev = rcu_dereference(conf->mirrors[best_disk].rdev); | 668 | rdev = rcu_dereference(conf->mirrors[best_disk].rdev); |
598 | if (!rdev) | 669 | if (!rdev) |
@@ -606,8 +677,11 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
606 | goto retry; | 677 | goto retry; |
607 | } | 678 | } |
608 | sectors = best_good_sectors; | 679 | sectors = best_good_sectors; |
609 | conf->next_seq_sect = this_sector + sectors; | 680 | |
610 | conf->last_used = best_disk; | 681 | if (conf->mirrors[best_disk].next_seq_sect != this_sector) |
682 | conf->mirrors[best_disk].seq_start = this_sector; | ||
683 | |||
684 | conf->mirrors[best_disk].next_seq_sect = this_sector + sectors; | ||
611 | } | 685 | } |
612 | rcu_read_unlock(); | 686 | rcu_read_unlock(); |
613 | *max_sectors = sectors; | 687 | *max_sectors = sectors; |
@@ -870,10 +944,48 @@ do_sync_io: | |||
870 | pr_debug("%dB behind alloc failed, doing sync I/O\n", bio->bi_size); | 944 | pr_debug("%dB behind alloc failed, doing sync I/O\n", bio->bi_size); |
871 | } | 945 | } |
872 | 946 | ||
947 | struct raid1_plug_cb { | ||
948 | struct blk_plug_cb cb; | ||
949 | struct bio_list pending; | ||
950 | int pending_cnt; | ||
951 | }; | ||
952 | |||
953 | static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule) | ||
954 | { | ||
955 | struct raid1_plug_cb *plug = container_of(cb, struct raid1_plug_cb, | ||
956 | cb); | ||
957 | struct mddev *mddev = plug->cb.data; | ||
958 | struct r1conf *conf = mddev->private; | ||
959 | struct bio *bio; | ||
960 | |||
961 | if (from_schedule) { | ||
962 | spin_lock_irq(&conf->device_lock); | ||
963 | bio_list_merge(&conf->pending_bio_list, &plug->pending); | ||
964 | conf->pending_count += plug->pending_cnt; | ||
965 | spin_unlock_irq(&conf->device_lock); | ||
966 | md_wakeup_thread(mddev->thread); | ||
967 | kfree(plug); | ||
968 | return; | ||
969 | } | ||
970 | |||
971 | /* we aren't scheduling, so we can do the write-out directly. */ | ||
972 | bio = bio_list_get(&plug->pending); | ||
973 | bitmap_unplug(mddev->bitmap); | ||
974 | wake_up(&conf->wait_barrier); | ||
975 | |||
976 | while (bio) { /* submit pending writes */ | ||
977 | struct bio *next = bio->bi_next; | ||
978 | bio->bi_next = NULL; | ||
979 | generic_make_request(bio); | ||
980 | bio = next; | ||
981 | } | ||
982 | kfree(plug); | ||
983 | } | ||
984 | |||
873 | static void make_request(struct mddev *mddev, struct bio * bio) | 985 | static void make_request(struct mddev *mddev, struct bio * bio) |
874 | { | 986 | { |
875 | struct r1conf *conf = mddev->private; | 987 | struct r1conf *conf = mddev->private; |
876 | struct mirror_info *mirror; | 988 | struct raid1_info *mirror; |
877 | struct r1bio *r1_bio; | 989 | struct r1bio *r1_bio; |
878 | struct bio *read_bio; | 990 | struct bio *read_bio; |
879 | int i, disks; | 991 | int i, disks; |
@@ -883,7 +995,8 @@ static void make_request(struct mddev *mddev, struct bio * bio) | |||
883 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); | 995 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); |
884 | const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); | 996 | const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); |
885 | struct md_rdev *blocked_rdev; | 997 | struct md_rdev *blocked_rdev; |
886 | int plugged; | 998 | struct blk_plug_cb *cb; |
999 | struct raid1_plug_cb *plug = NULL; | ||
887 | int first_clone; | 1000 | int first_clone; |
888 | int sectors_handled; | 1001 | int sectors_handled; |
889 | int max_sectors; | 1002 | int max_sectors; |
@@ -1034,7 +1147,6 @@ read_again: | |||
1034 | * the bad blocks. Each set of writes gets it's own r1bio | 1147 | * the bad blocks. Each set of writes gets it's own r1bio |
1035 | * with a set of bios attached. | 1148 | * with a set of bios attached. |
1036 | */ | 1149 | */ |
1037 | plugged = mddev_check_plugged(mddev); | ||
1038 | 1150 | ||
1039 | disks = conf->raid_disks * 2; | 1151 | disks = conf->raid_disks * 2; |
1040 | retry_write: | 1152 | retry_write: |
@@ -1187,10 +1299,23 @@ read_again: | |||
1187 | mbio->bi_private = r1_bio; | 1299 | mbio->bi_private = r1_bio; |
1188 | 1300 | ||
1189 | atomic_inc(&r1_bio->remaining); | 1301 | atomic_inc(&r1_bio->remaining); |
1302 | |||
1303 | cb = blk_check_plugged(raid1_unplug, mddev, sizeof(*plug)); | ||
1304 | if (cb) | ||
1305 | plug = container_of(cb, struct raid1_plug_cb, cb); | ||
1306 | else | ||
1307 | plug = NULL; | ||
1190 | spin_lock_irqsave(&conf->device_lock, flags); | 1308 | spin_lock_irqsave(&conf->device_lock, flags); |
1191 | bio_list_add(&conf->pending_bio_list, mbio); | 1309 | if (plug) { |
1192 | conf->pending_count++; | 1310 | bio_list_add(&plug->pending, mbio); |
1311 | plug->pending_cnt++; | ||
1312 | } else { | ||
1313 | bio_list_add(&conf->pending_bio_list, mbio); | ||
1314 | conf->pending_count++; | ||
1315 | } | ||
1193 | spin_unlock_irqrestore(&conf->device_lock, flags); | 1316 | spin_unlock_irqrestore(&conf->device_lock, flags); |
1317 | if (!plug) | ||
1318 | md_wakeup_thread(mddev->thread); | ||
1194 | } | 1319 | } |
1195 | /* Mustn't call r1_bio_write_done before this next test, | 1320 | /* Mustn't call r1_bio_write_done before this next test, |
1196 | * as it could result in the bio being freed. | 1321 | * as it could result in the bio being freed. |
@@ -1213,9 +1338,6 @@ read_again: | |||
1213 | 1338 | ||
1214 | /* In case raid1d snuck in to freeze_array */ | 1339 | /* In case raid1d snuck in to freeze_array */ |
1215 | wake_up(&conf->wait_barrier); | 1340 | wake_up(&conf->wait_barrier); |
1216 | |||
1217 | if (do_sync || !bitmap || !plugged) | ||
1218 | md_wakeup_thread(mddev->thread); | ||
1219 | } | 1341 | } |
1220 | 1342 | ||
1221 | static void status(struct seq_file *seq, struct mddev *mddev) | 1343 | static void status(struct seq_file *seq, struct mddev *mddev) |
@@ -1367,7 +1489,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1367 | struct r1conf *conf = mddev->private; | 1489 | struct r1conf *conf = mddev->private; |
1368 | int err = -EEXIST; | 1490 | int err = -EEXIST; |
1369 | int mirror = 0; | 1491 | int mirror = 0; |
1370 | struct mirror_info *p; | 1492 | struct raid1_info *p; |
1371 | int first = 0; | 1493 | int first = 0; |
1372 | int last = conf->raid_disks - 1; | 1494 | int last = conf->raid_disks - 1; |
1373 | struct request_queue *q = bdev_get_queue(rdev->bdev); | 1495 | struct request_queue *q = bdev_get_queue(rdev->bdev); |
@@ -1436,7 +1558,7 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1436 | struct r1conf *conf = mddev->private; | 1558 | struct r1conf *conf = mddev->private; |
1437 | int err = 0; | 1559 | int err = 0; |
1438 | int number = rdev->raid_disk; | 1560 | int number = rdev->raid_disk; |
1439 | struct mirror_info *p = conf->mirrors+ number; | 1561 | struct raid1_info *p = conf->mirrors + number; |
1440 | 1562 | ||
1441 | if (rdev != p->rdev) | 1563 | if (rdev != p->rdev) |
1442 | p = conf->mirrors + conf->raid_disks + number; | 1564 | p = conf->mirrors + conf->raid_disks + number; |
@@ -1821,8 +1943,14 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio) | |||
1821 | 1943 | ||
1822 | if (atomic_dec_and_test(&r1_bio->remaining)) { | 1944 | if (atomic_dec_and_test(&r1_bio->remaining)) { |
1823 | /* if we're here, all write(s) have completed, so clean up */ | 1945 | /* if we're here, all write(s) have completed, so clean up */ |
1824 | md_done_sync(mddev, r1_bio->sectors, 1); | 1946 | int s = r1_bio->sectors; |
1825 | put_buf(r1_bio); | 1947 | if (test_bit(R1BIO_MadeGood, &r1_bio->state) || |
1948 | test_bit(R1BIO_WriteError, &r1_bio->state)) | ||
1949 | reschedule_retry(r1_bio); | ||
1950 | else { | ||
1951 | put_buf(r1_bio); | ||
1952 | md_done_sync(mddev, s, 1); | ||
1953 | } | ||
1826 | } | 1954 | } |
1827 | } | 1955 | } |
1828 | 1956 | ||
@@ -2170,8 +2298,7 @@ static void raid1d(struct mddev *mddev) | |||
2170 | blk_start_plug(&plug); | 2298 | blk_start_plug(&plug); |
2171 | for (;;) { | 2299 | for (;;) { |
2172 | 2300 | ||
2173 | if (atomic_read(&mddev->plug_cnt) == 0) | 2301 | flush_pending_writes(conf); |
2174 | flush_pending_writes(conf); | ||
2175 | 2302 | ||
2176 | spin_lock_irqsave(&conf->device_lock, flags); | 2303 | spin_lock_irqsave(&conf->device_lock, flags); |
2177 | if (list_empty(head)) { | 2304 | if (list_empty(head)) { |
@@ -2368,6 +2495,18 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp | |||
2368 | bio->bi_rw = READ; | 2495 | bio->bi_rw = READ; |
2369 | bio->bi_end_io = end_sync_read; | 2496 | bio->bi_end_io = end_sync_read; |
2370 | read_targets++; | 2497 | read_targets++; |
2498 | } else if (!test_bit(WriteErrorSeen, &rdev->flags) && | ||
2499 | test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && | ||
2500 | !test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) { | ||
2501 | /* | ||
2502 | * The device is suitable for reading (InSync), | ||
2503 | * but has bad block(s) here. Let's try to correct them, | ||
2504 | * if we are doing resync or repair. Otherwise, leave | ||
2505 | * this device alone for this sync request. | ||
2506 | */ | ||
2507 | bio->bi_rw = WRITE; | ||
2508 | bio->bi_end_io = end_sync_write; | ||
2509 | write_targets++; | ||
2371 | } | 2510 | } |
2372 | } | 2511 | } |
2373 | if (bio->bi_end_io) { | 2512 | if (bio->bi_end_io) { |
@@ -2425,7 +2564,10 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp | |||
2425 | /* There is nowhere to write, so all non-sync | 2564 | /* There is nowhere to write, so all non-sync |
2426 | * drives must be failed - so we are finished | 2565 | * drives must be failed - so we are finished |
2427 | */ | 2566 | */ |
2428 | sector_t rv = max_sector - sector_nr; | 2567 | sector_t rv; |
2568 | if (min_bad > 0) | ||
2569 | max_sector = sector_nr + min_bad; | ||
2570 | rv = max_sector - sector_nr; | ||
2429 | *skipped = 1; | 2571 | *skipped = 1; |
2430 | put_buf(r1_bio); | 2572 | put_buf(r1_bio); |
2431 | return rv; | 2573 | return rv; |
@@ -2488,9 +2630,10 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp | |||
2488 | */ | 2630 | */ |
2489 | if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { | 2631 | if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { |
2490 | atomic_set(&r1_bio->remaining, read_targets); | 2632 | atomic_set(&r1_bio->remaining, read_targets); |
2491 | for (i = 0; i < conf->raid_disks * 2; i++) { | 2633 | for (i = 0; i < conf->raid_disks * 2 && read_targets; i++) { |
2492 | bio = r1_bio->bios[i]; | 2634 | bio = r1_bio->bios[i]; |
2493 | if (bio->bi_end_io == end_sync_read) { | 2635 | if (bio->bi_end_io == end_sync_read) { |
2636 | read_targets--; | ||
2494 | md_sync_acct(bio->bi_bdev, nr_sectors); | 2637 | md_sync_acct(bio->bi_bdev, nr_sectors); |
2495 | generic_make_request(bio); | 2638 | generic_make_request(bio); |
2496 | } | 2639 | } |
@@ -2517,7 +2660,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) | |||
2517 | { | 2660 | { |
2518 | struct r1conf *conf; | 2661 | struct r1conf *conf; |
2519 | int i; | 2662 | int i; |
2520 | struct mirror_info *disk; | 2663 | struct raid1_info *disk; |
2521 | struct md_rdev *rdev; | 2664 | struct md_rdev *rdev; |
2522 | int err = -ENOMEM; | 2665 | int err = -ENOMEM; |
2523 | 2666 | ||
@@ -2525,7 +2668,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) | |||
2525 | if (!conf) | 2668 | if (!conf) |
2526 | goto abort; | 2669 | goto abort; |
2527 | 2670 | ||
2528 | conf->mirrors = kzalloc(sizeof(struct mirror_info) | 2671 | conf->mirrors = kzalloc(sizeof(struct raid1_info) |
2529 | * mddev->raid_disks * 2, | 2672 | * mddev->raid_disks * 2, |
2530 | GFP_KERNEL); | 2673 | GFP_KERNEL); |
2531 | if (!conf->mirrors) | 2674 | if (!conf->mirrors) |
@@ -2568,6 +2711,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) | |||
2568 | mddev->merge_check_needed = 1; | 2711 | mddev->merge_check_needed = 1; |
2569 | 2712 | ||
2570 | disk->head_position = 0; | 2713 | disk->head_position = 0; |
2714 | disk->seq_start = MaxSector; | ||
2571 | } | 2715 | } |
2572 | conf->raid_disks = mddev->raid_disks; | 2716 | conf->raid_disks = mddev->raid_disks; |
2573 | conf->mddev = mddev; | 2717 | conf->mddev = mddev; |
@@ -2581,7 +2725,6 @@ static struct r1conf *setup_conf(struct mddev *mddev) | |||
2581 | conf->recovery_disabled = mddev->recovery_disabled - 1; | 2725 | conf->recovery_disabled = mddev->recovery_disabled - 1; |
2582 | 2726 | ||
2583 | err = -EIO; | 2727 | err = -EIO; |
2584 | conf->last_used = -1; | ||
2585 | for (i = 0; i < conf->raid_disks * 2; i++) { | 2728 | for (i = 0; i < conf->raid_disks * 2; i++) { |
2586 | 2729 | ||
2587 | disk = conf->mirrors + i; | 2730 | disk = conf->mirrors + i; |
@@ -2607,21 +2750,11 @@ static struct r1conf *setup_conf(struct mddev *mddev) | |||
2607 | if (disk->rdev && | 2750 | if (disk->rdev && |
2608 | (disk->rdev->saved_raid_disk < 0)) | 2751 | (disk->rdev->saved_raid_disk < 0)) |
2609 | conf->fullsync = 1; | 2752 | conf->fullsync = 1; |
2610 | } else if (conf->last_used < 0) | 2753 | } |
2611 | /* | ||
2612 | * The first working device is used as a | ||
2613 | * starting point to read balancing. | ||
2614 | */ | ||
2615 | conf->last_used = i; | ||
2616 | } | 2754 | } |
2617 | 2755 | ||
2618 | if (conf->last_used < 0) { | ||
2619 | printk(KERN_ERR "md/raid1:%s: no operational mirrors\n", | ||
2620 | mdname(mddev)); | ||
2621 | goto abort; | ||
2622 | } | ||
2623 | err = -ENOMEM; | 2756 | err = -ENOMEM; |
2624 | conf->thread = md_register_thread(raid1d, mddev, NULL); | 2757 | conf->thread = md_register_thread(raid1d, mddev, "raid1"); |
2625 | if (!conf->thread) { | 2758 | if (!conf->thread) { |
2626 | printk(KERN_ERR | 2759 | printk(KERN_ERR |
2627 | "md/raid1:%s: couldn't allocate thread\n", | 2760 | "md/raid1:%s: couldn't allocate thread\n", |
@@ -2794,7 +2927,7 @@ static int raid1_reshape(struct mddev *mddev) | |||
2794 | */ | 2927 | */ |
2795 | mempool_t *newpool, *oldpool; | 2928 | mempool_t *newpool, *oldpool; |
2796 | struct pool_info *newpoolinfo; | 2929 | struct pool_info *newpoolinfo; |
2797 | struct mirror_info *newmirrors; | 2930 | struct raid1_info *newmirrors; |
2798 | struct r1conf *conf = mddev->private; | 2931 | struct r1conf *conf = mddev->private; |
2799 | int cnt, raid_disks; | 2932 | int cnt, raid_disks; |
2800 | unsigned long flags; | 2933 | unsigned long flags; |
@@ -2837,7 +2970,7 @@ static int raid1_reshape(struct mddev *mddev) | |||
2837 | kfree(newpoolinfo); | 2970 | kfree(newpoolinfo); |
2838 | return -ENOMEM; | 2971 | return -ENOMEM; |
2839 | } | 2972 | } |
2840 | newmirrors = kzalloc(sizeof(struct mirror_info) * raid_disks * 2, | 2973 | newmirrors = kzalloc(sizeof(struct raid1_info) * raid_disks * 2, |
2841 | GFP_KERNEL); | 2974 | GFP_KERNEL); |
2842 | if (!newmirrors) { | 2975 | if (!newmirrors) { |
2843 | kfree(newpoolinfo); | 2976 | kfree(newpoolinfo); |
@@ -2876,7 +3009,6 @@ static int raid1_reshape(struct mddev *mddev) | |||
2876 | conf->raid_disks = mddev->raid_disks = raid_disks; | 3009 | conf->raid_disks = mddev->raid_disks = raid_disks; |
2877 | mddev->delta_disks = 0; | 3010 | mddev->delta_disks = 0; |
2878 | 3011 | ||
2879 | conf->last_used = 0; /* just make sure it is in-range */ | ||
2880 | lower_barrier(conf); | 3012 | lower_barrier(conf); |
2881 | 3013 | ||
2882 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 3014 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |