diff options
Diffstat (limited to 'drivers/md/raid1.c')
-rw-r--r-- | drivers/md/raid1.c | 242 |
1 files changed, 194 insertions, 48 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 1db5de52d376..ff1dbec864af 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -12,6 +12,15 @@ | |||
12 | * Fixes to reconstruction by Jakob Østergaard" <jakob@ostenfeld.dk> | 12 | * Fixes to reconstruction by Jakob Østergaard" <jakob@ostenfeld.dk> |
13 | * Various fixes by Neil Brown <neilb@cse.unsw.edu.au> | 13 | * Various fixes by Neil Brown <neilb@cse.unsw.edu.au> |
14 | * | 14 | * |
15 | * Changes by Peter T. Breuer <ptb@it.uc3m.es> 31/1/2003 to support | ||
16 | * bitmapped intelligence in resync: | ||
17 | * | ||
18 | * - bitmap marked during normal i/o | ||
19 | * - bitmap used to skip nondirty blocks during sync | ||
20 | * | ||
21 | * Additions to bitmap code, (C) 2003-2004 Paul Clements, SteelEye Technology: | ||
22 | * - persistent bitmap code | ||
23 | * | ||
15 | * This program is free software; you can redistribute it and/or modify | 24 | * This program is free software; you can redistribute it and/or modify |
16 | * it under the terms of the GNU General Public License as published by | 25 | * it under the terms of the GNU General Public License as published by |
17 | * the Free Software Foundation; either version 2, or (at your option) | 26 | * the Free Software Foundation; either version 2, or (at your option) |
@@ -22,7 +31,16 @@ | |||
22 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 31 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
23 | */ | 32 | */ |
24 | 33 | ||
34 | #include "dm-bio-list.h" | ||
25 | #include <linux/raid/raid1.h> | 35 | #include <linux/raid/raid1.h> |
36 | #include <linux/raid/bitmap.h> | ||
37 | |||
38 | #define DEBUG 0 | ||
39 | #if DEBUG | ||
40 | #define PRINTK(x...) printk(x) | ||
41 | #else | ||
42 | #define PRINTK(x...) | ||
43 | #endif | ||
26 | 44 | ||
27 | /* | 45 | /* |
28 | * Number of guaranteed r1bios in case of extreme VM load: | 46 | * Number of guaranteed r1bios in case of extreme VM load: |
@@ -287,9 +305,11 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int | |||
287 | /* | 305 | /* |
288 | * this branch is our 'one mirror IO has finished' event handler: | 306 | * this branch is our 'one mirror IO has finished' event handler: |
289 | */ | 307 | */ |
290 | if (!uptodate) | 308 | if (!uptodate) { |
291 | md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); | 309 | md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); |
292 | else | 310 | /* an I/O failed, we can't clear the bitmap */ |
311 | set_bit(R1BIO_Degraded, &r1_bio->state); | ||
312 | } else | ||
293 | /* | 313 | /* |
294 | * Set R1BIO_Uptodate in our master bio, so that | 314 | * Set R1BIO_Uptodate in our master bio, so that |
295 | * we will return a good error code for to the higher | 315 | * we will return a good error code for to the higher |
@@ -309,6 +329,10 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int | |||
309 | * already. | 329 | * already. |
310 | */ | 330 | */ |
311 | if (atomic_dec_and_test(&r1_bio->remaining)) { | 331 | if (atomic_dec_and_test(&r1_bio->remaining)) { |
332 | /* clear the bitmap if all writes complete successfully */ | ||
333 | bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, | ||
334 | r1_bio->sectors, | ||
335 | !test_bit(R1BIO_Degraded, &r1_bio->state)); | ||
312 | md_write_end(r1_bio->mddev); | 336 | md_write_end(r1_bio->mddev); |
313 | raid_end_bio_io(r1_bio); | 337 | raid_end_bio_io(r1_bio); |
314 | } | 338 | } |
@@ -458,7 +482,10 @@ static void unplug_slaves(mddev_t *mddev) | |||
458 | 482 | ||
459 | static void raid1_unplug(request_queue_t *q) | 483 | static void raid1_unplug(request_queue_t *q) |
460 | { | 484 | { |
461 | unplug_slaves(q->queuedata); | 485 | mddev_t *mddev = q->queuedata; |
486 | |||
487 | unplug_slaves(mddev); | ||
488 | md_wakeup_thread(mddev->thread); | ||
462 | } | 489 | } |
463 | 490 | ||
464 | static int raid1_issue_flush(request_queue_t *q, struct gendisk *disk, | 491 | static int raid1_issue_flush(request_queue_t *q, struct gendisk *disk, |
@@ -501,16 +528,16 @@ static void device_barrier(conf_t *conf, sector_t sect) | |||
501 | { | 528 | { |
502 | spin_lock_irq(&conf->resync_lock); | 529 | spin_lock_irq(&conf->resync_lock); |
503 | wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume), | 530 | wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume), |
504 | conf->resync_lock, unplug_slaves(conf->mddev)); | 531 | conf->resync_lock, raid1_unplug(conf->mddev->queue)); |
505 | 532 | ||
506 | if (!conf->barrier++) { | 533 | if (!conf->barrier++) { |
507 | wait_event_lock_irq(conf->wait_idle, !conf->nr_pending, | 534 | wait_event_lock_irq(conf->wait_idle, !conf->nr_pending, |
508 | conf->resync_lock, unplug_slaves(conf->mddev)); | 535 | conf->resync_lock, raid1_unplug(conf->mddev->queue)); |
509 | if (conf->nr_pending) | 536 | if (conf->nr_pending) |
510 | BUG(); | 537 | BUG(); |
511 | } | 538 | } |
512 | wait_event_lock_irq(conf->wait_resume, conf->barrier < RESYNC_DEPTH, | 539 | wait_event_lock_irq(conf->wait_resume, conf->barrier < RESYNC_DEPTH, |
513 | conf->resync_lock, unplug_slaves(conf->mddev)); | 540 | conf->resync_lock, raid1_unplug(conf->mddev->queue)); |
514 | conf->next_resync = sect; | 541 | conf->next_resync = sect; |
515 | spin_unlock_irq(&conf->resync_lock); | 542 | spin_unlock_irq(&conf->resync_lock); |
516 | } | 543 | } |
@@ -522,14 +549,20 @@ static int make_request(request_queue_t *q, struct bio * bio) | |||
522 | mirror_info_t *mirror; | 549 | mirror_info_t *mirror; |
523 | r1bio_t *r1_bio; | 550 | r1bio_t *r1_bio; |
524 | struct bio *read_bio; | 551 | struct bio *read_bio; |
525 | int i, disks; | 552 | int i, targets = 0, disks; |
526 | mdk_rdev_t *rdev; | 553 | mdk_rdev_t *rdev; |
554 | struct bitmap *bitmap = mddev->bitmap; | ||
555 | unsigned long flags; | ||
556 | struct bio_list bl; | ||
557 | |||
527 | 558 | ||
528 | /* | 559 | /* |
529 | * Register the new request and wait if the reconstruction | 560 | * Register the new request and wait if the reconstruction |
530 | * thread has put up a bar for new requests. | 561 | * thread has put up a bar for new requests. |
531 | * Continue immediately if no resync is active currently. | 562 | * Continue immediately if no resync is active currently. |
532 | */ | 563 | */ |
564 | md_write_start(mddev, bio); /* wait on superblock update early */ | ||
565 | |||
533 | spin_lock_irq(&conf->resync_lock); | 566 | spin_lock_irq(&conf->resync_lock); |
534 | wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, ); | 567 | wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, ); |
535 | conf->nr_pending++; | 568 | conf->nr_pending++; |
@@ -552,7 +585,7 @@ static int make_request(request_queue_t *q, struct bio * bio) | |||
552 | 585 | ||
553 | r1_bio->master_bio = bio; | 586 | r1_bio->master_bio = bio; |
554 | r1_bio->sectors = bio->bi_size >> 9; | 587 | r1_bio->sectors = bio->bi_size >> 9; |
555 | 588 | r1_bio->state = 0; | |
556 | r1_bio->mddev = mddev; | 589 | r1_bio->mddev = mddev; |
557 | r1_bio->sector = bio->bi_sector; | 590 | r1_bio->sector = bio->bi_sector; |
558 | 591 | ||
@@ -595,6 +628,13 @@ static int make_request(request_queue_t *q, struct bio * bio) | |||
595 | * bios[x] to bio | 628 | * bios[x] to bio |
596 | */ | 629 | */ |
597 | disks = conf->raid_disks; | 630 | disks = conf->raid_disks; |
631 | #if 0 | ||
632 | { static int first=1; | ||
633 | if (first) printk("First Write sector %llu disks %d\n", | ||
634 | (unsigned long long)r1_bio->sector, disks); | ||
635 | first = 0; | ||
636 | } | ||
637 | #endif | ||
598 | rcu_read_lock(); | 638 | rcu_read_lock(); |
599 | for (i = 0; i < disks; i++) { | 639 | for (i = 0; i < disks; i++) { |
600 | if ((rdev=conf->mirrors[i].rdev) != NULL && | 640 | if ((rdev=conf->mirrors[i].rdev) != NULL && |
@@ -605,13 +645,21 @@ static int make_request(request_queue_t *q, struct bio * bio) | |||
605 | r1_bio->bios[i] = NULL; | 645 | r1_bio->bios[i] = NULL; |
606 | } else | 646 | } else |
607 | r1_bio->bios[i] = bio; | 647 | r1_bio->bios[i] = bio; |
648 | targets++; | ||
608 | } else | 649 | } else |
609 | r1_bio->bios[i] = NULL; | 650 | r1_bio->bios[i] = NULL; |
610 | } | 651 | } |
611 | rcu_read_unlock(); | 652 | rcu_read_unlock(); |
612 | 653 | ||
613 | atomic_set(&r1_bio->remaining, 1); | 654 | if (targets < conf->raid_disks) { |
614 | md_write_start(mddev); | 655 | /* array is degraded, we will not clear the bitmap |
656 | * on I/O completion (see raid1_end_write_request) */ | ||
657 | set_bit(R1BIO_Degraded, &r1_bio->state); | ||
658 | } | ||
659 | |||
660 | atomic_set(&r1_bio->remaining, 0); | ||
661 | |||
662 | bio_list_init(&bl); | ||
615 | for (i = 0; i < disks; i++) { | 663 | for (i = 0; i < disks; i++) { |
616 | struct bio *mbio; | 664 | struct bio *mbio; |
617 | if (!r1_bio->bios[i]) | 665 | if (!r1_bio->bios[i]) |
@@ -627,14 +675,23 @@ static int make_request(request_queue_t *q, struct bio * bio) | |||
627 | mbio->bi_private = r1_bio; | 675 | mbio->bi_private = r1_bio; |
628 | 676 | ||
629 | atomic_inc(&r1_bio->remaining); | 677 | atomic_inc(&r1_bio->remaining); |
630 | generic_make_request(mbio); | ||
631 | } | ||
632 | 678 | ||
633 | if (atomic_dec_and_test(&r1_bio->remaining)) { | 679 | bio_list_add(&bl, mbio); |
634 | md_write_end(mddev); | ||
635 | raid_end_bio_io(r1_bio); | ||
636 | } | 680 | } |
637 | 681 | ||
682 | bitmap_startwrite(bitmap, bio->bi_sector, r1_bio->sectors); | ||
683 | spin_lock_irqsave(&conf->device_lock, flags); | ||
684 | bio_list_merge(&conf->pending_bio_list, &bl); | ||
685 | bio_list_init(&bl); | ||
686 | |||
687 | blk_plug_device(mddev->queue); | ||
688 | spin_unlock_irqrestore(&conf->device_lock, flags); | ||
689 | |||
690 | #if 0 | ||
691 | while ((bio = bio_list_pop(&bl)) != NULL) | ||
692 | generic_make_request(bio); | ||
693 | #endif | ||
694 | |||
638 | return 0; | 695 | return 0; |
639 | } | 696 | } |
640 | 697 | ||
@@ -714,7 +771,7 @@ static void close_sync(conf_t *conf) | |||
714 | { | 771 | { |
715 | spin_lock_irq(&conf->resync_lock); | 772 | spin_lock_irq(&conf->resync_lock); |
716 | wait_event_lock_irq(conf->wait_resume, !conf->barrier, | 773 | wait_event_lock_irq(conf->wait_resume, !conf->barrier, |
717 | conf->resync_lock, unplug_slaves(conf->mddev)); | 774 | conf->resync_lock, raid1_unplug(conf->mddev->queue)); |
718 | spin_unlock_irq(&conf->resync_lock); | 775 | spin_unlock_irq(&conf->resync_lock); |
719 | 776 | ||
720 | if (conf->barrier) BUG(); | 777 | if (conf->barrier) BUG(); |
@@ -754,9 +811,12 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
754 | { | 811 | { |
755 | conf_t *conf = mddev->private; | 812 | conf_t *conf = mddev->private; |
756 | int found = 0; | 813 | int found = 0; |
757 | int mirror; | 814 | int mirror = 0; |
758 | mirror_info_t *p; | 815 | mirror_info_t *p; |
759 | 816 | ||
817 | if (rdev->saved_raid_disk >= 0 && | ||
818 | conf->mirrors[rdev->saved_raid_disk].rdev == NULL) | ||
819 | mirror = rdev->saved_raid_disk; | ||
760 | for (mirror=0; mirror < mddev->raid_disks; mirror++) | 820 | for (mirror=0; mirror < mddev->raid_disks; mirror++) |
761 | if ( !(p=conf->mirrors+mirror)->rdev) { | 821 | if ( !(p=conf->mirrors+mirror)->rdev) { |
762 | 822 | ||
@@ -773,6 +833,8 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
773 | p->head_position = 0; | 833 | p->head_position = 0; |
774 | rdev->raid_disk = mirror; | 834 | rdev->raid_disk = mirror; |
775 | found = 1; | 835 | found = 1; |
836 | if (rdev->saved_raid_disk != mirror) | ||
837 | conf->fullsync = 1; | ||
776 | p->rdev = rdev; | 838 | p->rdev = rdev; |
777 | break; | 839 | break; |
778 | } | 840 | } |
@@ -828,10 +890,11 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error) | |||
828 | * or re-read if the read failed. | 890 | * or re-read if the read failed. |
829 | * We don't do much here, just schedule handling by raid1d | 891 | * We don't do much here, just schedule handling by raid1d |
830 | */ | 892 | */ |
831 | if (!uptodate) | 893 | if (!uptodate) { |
832 | md_error(r1_bio->mddev, | 894 | md_error(r1_bio->mddev, |
833 | conf->mirrors[r1_bio->read_disk].rdev); | 895 | conf->mirrors[r1_bio->read_disk].rdev); |
834 | else | 896 | set_bit(R1BIO_Degraded, &r1_bio->state); |
897 | } else | ||
835 | set_bit(R1BIO_Uptodate, &r1_bio->state); | 898 | set_bit(R1BIO_Uptodate, &r1_bio->state); |
836 | rdev_dec_pending(conf->mirrors[r1_bio->read_disk].rdev, conf->mddev); | 899 | rdev_dec_pending(conf->mirrors[r1_bio->read_disk].rdev, conf->mddev); |
837 | reschedule_retry(r1_bio); | 900 | reschedule_retry(r1_bio); |
@@ -855,8 +918,10 @@ static int end_sync_write(struct bio *bio, unsigned int bytes_done, int error) | |||
855 | mirror = i; | 918 | mirror = i; |
856 | break; | 919 | break; |
857 | } | 920 | } |
858 | if (!uptodate) | 921 | if (!uptodate) { |
859 | md_error(mddev, conf->mirrors[mirror].rdev); | 922 | md_error(mddev, conf->mirrors[mirror].rdev); |
923 | set_bit(R1BIO_Degraded, &r1_bio->state); | ||
924 | } | ||
860 | update_head_pos(mirror, r1_bio); | 925 | update_head_pos(mirror, r1_bio); |
861 | 926 | ||
862 | if (atomic_dec_and_test(&r1_bio->remaining)) { | 927 | if (atomic_dec_and_test(&r1_bio->remaining)) { |
@@ -876,6 +941,9 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) | |||
876 | 941 | ||
877 | bio = r1_bio->bios[r1_bio->read_disk]; | 942 | bio = r1_bio->bios[r1_bio->read_disk]; |
878 | 943 | ||
944 | /* | ||
945 | if (r1_bio->sector == 0) printk("First sync write startss\n"); | ||
946 | */ | ||
879 | /* | 947 | /* |
880 | * schedule writes | 948 | * schedule writes |
881 | */ | 949 | */ |
@@ -903,10 +971,12 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) | |||
903 | atomic_inc(&conf->mirrors[i].rdev->nr_pending); | 971 | atomic_inc(&conf->mirrors[i].rdev->nr_pending); |
904 | atomic_inc(&r1_bio->remaining); | 972 | atomic_inc(&r1_bio->remaining); |
905 | md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9); | 973 | md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9); |
974 | |||
906 | generic_make_request(wbio); | 975 | generic_make_request(wbio); |
907 | } | 976 | } |
908 | 977 | ||
909 | if (atomic_dec_and_test(&r1_bio->remaining)) { | 978 | if (atomic_dec_and_test(&r1_bio->remaining)) { |
979 | /* if we're here, all write(s) have completed, so clean up */ | ||
910 | md_done_sync(mddev, r1_bio->sectors, 1); | 980 | md_done_sync(mddev, r1_bio->sectors, 1); |
911 | put_buf(r1_bio); | 981 | put_buf(r1_bio); |
912 | } | 982 | } |
@@ -931,11 +1001,30 @@ static void raid1d(mddev_t *mddev) | |||
931 | mdk_rdev_t *rdev; | 1001 | mdk_rdev_t *rdev; |
932 | 1002 | ||
933 | md_check_recovery(mddev); | 1003 | md_check_recovery(mddev); |
934 | md_handle_safemode(mddev); | ||
935 | 1004 | ||
936 | for (;;) { | 1005 | for (;;) { |
937 | char b[BDEVNAME_SIZE]; | 1006 | char b[BDEVNAME_SIZE]; |
938 | spin_lock_irqsave(&conf->device_lock, flags); | 1007 | spin_lock_irqsave(&conf->device_lock, flags); |
1008 | |||
1009 | if (conf->pending_bio_list.head) { | ||
1010 | bio = bio_list_get(&conf->pending_bio_list); | ||
1011 | blk_remove_plug(mddev->queue); | ||
1012 | spin_unlock_irqrestore(&conf->device_lock, flags); | ||
1013 | /* flush any pending bitmap writes to disk before proceeding w/ I/O */ | ||
1014 | if (bitmap_unplug(mddev->bitmap) != 0) | ||
1015 | printk("%s: bitmap file write failed!\n", mdname(mddev)); | ||
1016 | |||
1017 | while (bio) { /* submit pending writes */ | ||
1018 | struct bio *next = bio->bi_next; | ||
1019 | bio->bi_next = NULL; | ||
1020 | generic_make_request(bio); | ||
1021 | bio = next; | ||
1022 | } | ||
1023 | unplug = 1; | ||
1024 | |||
1025 | continue; | ||
1026 | } | ||
1027 | |||
939 | if (list_empty(head)) | 1028 | if (list_empty(head)) |
940 | break; | 1029 | break; |
941 | r1_bio = list_entry(head->prev, r1bio_t, retry_list); | 1030 | r1_bio = list_entry(head->prev, r1bio_t, retry_list); |
@@ -1009,7 +1098,7 @@ static int init_resync(conf_t *conf) | |||
1009 | * that can be installed to exclude normal IO requests. | 1098 | * that can be installed to exclude normal IO requests. |
1010 | */ | 1099 | */ |
1011 | 1100 | ||
1012 | static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster) | 1101 | static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster) |
1013 | { | 1102 | { |
1014 | conf_t *conf = mddev_to_conf(mddev); | 1103 | conf_t *conf = mddev_to_conf(mddev); |
1015 | mirror_info_t *mirror; | 1104 | mirror_info_t *mirror; |
@@ -1019,17 +1108,43 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster) | |||
1019 | int disk; | 1108 | int disk; |
1020 | int i; | 1109 | int i; |
1021 | int write_targets = 0; | 1110 | int write_targets = 0; |
1111 | int sync_blocks; | ||
1022 | 1112 | ||
1023 | if (!conf->r1buf_pool) | 1113 | if (!conf->r1buf_pool) |
1114 | { | ||
1115 | /* | ||
1116 | printk("sync start - bitmap %p\n", mddev->bitmap); | ||
1117 | */ | ||
1024 | if (init_resync(conf)) | 1118 | if (init_resync(conf)) |
1025 | return -ENOMEM; | 1119 | return 0; |
1120 | } | ||
1026 | 1121 | ||
1027 | max_sector = mddev->size << 1; | 1122 | max_sector = mddev->size << 1; |
1028 | if (sector_nr >= max_sector) { | 1123 | if (sector_nr >= max_sector) { |
1124 | /* If we aborted, we need to abort the | ||
1125 | * sync on the 'current' bitmap chunk (there will | ||
1126 | * only be one in raid1 resync. | ||
1127 | * We can find the current addess in mddev->curr_resync | ||
1128 | */ | ||
1129 | if (!conf->fullsync) { | ||
1130 | if (mddev->curr_resync < max_sector) | ||
1131 | bitmap_end_sync(mddev->bitmap, | ||
1132 | mddev->curr_resync, | ||
1133 | &sync_blocks, 1); | ||
1134 | bitmap_close_sync(mddev->bitmap); | ||
1135 | } | ||
1136 | if (mddev->curr_resync >= max_sector) | ||
1137 | conf->fullsync = 0; | ||
1029 | close_sync(conf); | 1138 | close_sync(conf); |
1030 | return 0; | 1139 | return 0; |
1031 | } | 1140 | } |
1032 | 1141 | ||
1142 | if (!conf->fullsync && | ||
1143 | !bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks)) { | ||
1144 | /* We can skip this block, and probably several more */ | ||
1145 | *skipped = 1; | ||
1146 | return sync_blocks; | ||
1147 | } | ||
1033 | /* | 1148 | /* |
1034 | * If there is non-resync activity waiting for us then | 1149 | * If there is non-resync activity waiting for us then |
1035 | * put in a delay to throttle resync. | 1150 | * put in a delay to throttle resync. |
@@ -1068,6 +1183,7 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster) | |||
1068 | 1183 | ||
1069 | r1_bio->mddev = mddev; | 1184 | r1_bio->mddev = mddev; |
1070 | r1_bio->sector = sector_nr; | 1185 | r1_bio->sector = sector_nr; |
1186 | r1_bio->state = 0; | ||
1071 | set_bit(R1BIO_IsSync, &r1_bio->state); | 1187 | set_bit(R1BIO_IsSync, &r1_bio->state); |
1072 | r1_bio->read_disk = disk; | 1188 | r1_bio->read_disk = disk; |
1073 | 1189 | ||
@@ -1102,18 +1218,24 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster) | |||
1102 | bio->bi_bdev = conf->mirrors[i].rdev->bdev; | 1218 | bio->bi_bdev = conf->mirrors[i].rdev->bdev; |
1103 | bio->bi_private = r1_bio; | 1219 | bio->bi_private = r1_bio; |
1104 | } | 1220 | } |
1221 | |||
1222 | if (write_targets + 1 < conf->raid_disks) | ||
1223 | /* array degraded, can't clear bitmap */ | ||
1224 | set_bit(R1BIO_Degraded, &r1_bio->state); | ||
1225 | |||
1105 | if (write_targets == 0) { | 1226 | if (write_targets == 0) { |
1106 | /* There is nowhere to write, so all non-sync | 1227 | /* There is nowhere to write, so all non-sync |
1107 | * drives must be failed - so we are finished | 1228 | * drives must be failed - so we are finished |
1108 | */ | 1229 | */ |
1109 | int rv = max_sector - sector_nr; | 1230 | sector_t rv = max_sector - sector_nr; |
1110 | md_done_sync(mddev, rv, 1); | 1231 | *skipped = 1; |
1111 | put_buf(r1_bio); | 1232 | put_buf(r1_bio); |
1112 | rdev_dec_pending(conf->mirrors[disk].rdev, mddev); | 1233 | rdev_dec_pending(conf->mirrors[disk].rdev, mddev); |
1113 | return rv; | 1234 | return rv; |
1114 | } | 1235 | } |
1115 | 1236 | ||
1116 | nr_sectors = 0; | 1237 | nr_sectors = 0; |
1238 | sync_blocks = 0; | ||
1117 | do { | 1239 | do { |
1118 | struct page *page; | 1240 | struct page *page; |
1119 | int len = PAGE_SIZE; | 1241 | int len = PAGE_SIZE; |
@@ -1121,6 +1243,17 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster) | |||
1121 | len = (max_sector - sector_nr) << 9; | 1243 | len = (max_sector - sector_nr) << 9; |
1122 | if (len == 0) | 1244 | if (len == 0) |
1123 | break; | 1245 | break; |
1246 | if (!conf->fullsync) { | ||
1247 | if (sync_blocks == 0) { | ||
1248 | if (!bitmap_start_sync(mddev->bitmap, | ||
1249 | sector_nr, &sync_blocks)) | ||
1250 | break; | ||
1251 | if (sync_blocks < (PAGE_SIZE>>9)) | ||
1252 | BUG(); | ||
1253 | if (len > (sync_blocks<<9)) len = sync_blocks<<9; | ||
1254 | } | ||
1255 | } | ||
1256 | |||
1124 | for (i=0 ; i < conf->raid_disks; i++) { | 1257 | for (i=0 ; i < conf->raid_disks; i++) { |
1125 | bio = r1_bio->bios[i]; | 1258 | bio = r1_bio->bios[i]; |
1126 | if (bio->bi_end_io) { | 1259 | if (bio->bi_end_io) { |
@@ -1143,6 +1276,7 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster) | |||
1143 | } | 1276 | } |
1144 | nr_sectors += len>>9; | 1277 | nr_sectors += len>>9; |
1145 | sector_nr += len>>9; | 1278 | sector_nr += len>>9; |
1279 | sync_blocks -= (len>>9); | ||
1146 | } while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES); | 1280 | } while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES); |
1147 | bio_full: | 1281 | bio_full: |
1148 | bio = r1_bio->bios[disk]; | 1282 | bio = r1_bio->bios[disk]; |
@@ -1231,6 +1365,9 @@ static int run(mddev_t *mddev) | |||
1231 | init_waitqueue_head(&conf->wait_idle); | 1365 | init_waitqueue_head(&conf->wait_idle); |
1232 | init_waitqueue_head(&conf->wait_resume); | 1366 | init_waitqueue_head(&conf->wait_resume); |
1233 | 1367 | ||
1368 | bio_list_init(&conf->pending_bio_list); | ||
1369 | bio_list_init(&conf->flushing_bio_list); | ||
1370 | |||
1234 | if (!conf->working_disks) { | 1371 | if (!conf->working_disks) { |
1235 | printk(KERN_ERR "raid1: no operational mirrors for %s\n", | 1372 | printk(KERN_ERR "raid1: no operational mirrors for %s\n", |
1236 | mdname(mddev)); | 1373 | mdname(mddev)); |
@@ -1259,16 +1396,15 @@ static int run(mddev_t *mddev) | |||
1259 | conf->last_used = j; | 1396 | conf->last_used = j; |
1260 | 1397 | ||
1261 | 1398 | ||
1262 | 1399 | mddev->thread = md_register_thread(raid1d, mddev, "%s_raid1"); | |
1263 | { | 1400 | if (!mddev->thread) { |
1264 | mddev->thread = md_register_thread(raid1d, mddev, "%s_raid1"); | 1401 | printk(KERN_ERR |
1265 | if (!mddev->thread) { | 1402 | "raid1: couldn't allocate thread for %s\n", |
1266 | printk(KERN_ERR | 1403 | mdname(mddev)); |
1267 | "raid1: couldn't allocate thread for %s\n", | 1404 | goto out_free_conf; |
1268 | mdname(mddev)); | ||
1269 | goto out_free_conf; | ||
1270 | } | ||
1271 | } | 1405 | } |
1406 | if (mddev->bitmap) mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ; | ||
1407 | |||
1272 | printk(KERN_INFO | 1408 | printk(KERN_INFO |
1273 | "raid1: raid set %s active with %d out of %d mirrors\n", | 1409 | "raid1: raid set %s active with %d out of %d mirrors\n", |
1274 | mdname(mddev), mddev->raid_disks - mddev->degraded, | 1410 | mdname(mddev), mddev->raid_disks - mddev->degraded, |
@@ -1291,10 +1427,8 @@ out_free_conf: | |||
1291 | if (conf) { | 1427 | if (conf) { |
1292 | if (conf->r1bio_pool) | 1428 | if (conf->r1bio_pool) |
1293 | mempool_destroy(conf->r1bio_pool); | 1429 | mempool_destroy(conf->r1bio_pool); |
1294 | if (conf->mirrors) | 1430 | kfree(conf->mirrors); |
1295 | kfree(conf->mirrors); | 1431 | kfree(conf->poolinfo); |
1296 | if (conf->poolinfo) | ||
1297 | kfree(conf->poolinfo); | ||
1298 | kfree(conf); | 1432 | kfree(conf); |
1299 | mddev->private = NULL; | 1433 | mddev->private = NULL; |
1300 | } | 1434 | } |
@@ -1311,10 +1445,8 @@ static int stop(mddev_t *mddev) | |||
1311 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | 1445 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ |
1312 | if (conf->r1bio_pool) | 1446 | if (conf->r1bio_pool) |
1313 | mempool_destroy(conf->r1bio_pool); | 1447 | mempool_destroy(conf->r1bio_pool); |
1314 | if (conf->mirrors) | 1448 | kfree(conf->mirrors); |
1315 | kfree(conf->mirrors); | 1449 | kfree(conf->poolinfo); |
1316 | if (conf->poolinfo) | ||
1317 | kfree(conf->poolinfo); | ||
1318 | kfree(conf); | 1450 | kfree(conf); |
1319 | mddev->private = NULL; | 1451 | mddev->private = NULL; |
1320 | return 0; | 1452 | return 0; |
@@ -1349,17 +1481,26 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks) | |||
1349 | * We allocate a new r1bio_pool if we can. | 1481 | * We allocate a new r1bio_pool if we can. |
1350 | * Then raise a device barrier and wait until all IO stops. | 1482 | * Then raise a device barrier and wait until all IO stops. |
1351 | * Then resize conf->mirrors and swap in the new r1bio pool. | 1483 | * Then resize conf->mirrors and swap in the new r1bio pool. |
1484 | * | ||
1485 | * At the same time, we "pack" the devices so that all the missing | ||
1486 | * devices have the higher raid_disk numbers. | ||
1352 | */ | 1487 | */ |
1353 | mempool_t *newpool, *oldpool; | 1488 | mempool_t *newpool, *oldpool; |
1354 | struct pool_info *newpoolinfo; | 1489 | struct pool_info *newpoolinfo; |
1355 | mirror_info_t *newmirrors; | 1490 | mirror_info_t *newmirrors; |
1356 | conf_t *conf = mddev_to_conf(mddev); | 1491 | conf_t *conf = mddev_to_conf(mddev); |
1492 | int cnt; | ||
1357 | 1493 | ||
1358 | int d; | 1494 | int d, d2; |
1359 | 1495 | ||
1360 | for (d= raid_disks; d < conf->raid_disks; d++) | 1496 | if (raid_disks < conf->raid_disks) { |
1361 | if (conf->mirrors[d].rdev) | 1497 | cnt=0; |
1498 | for (d= 0; d < conf->raid_disks; d++) | ||
1499 | if (conf->mirrors[d].rdev) | ||
1500 | cnt++; | ||
1501 | if (cnt > raid_disks) | ||
1362 | return -EBUSY; | 1502 | return -EBUSY; |
1503 | } | ||
1363 | 1504 | ||
1364 | newpoolinfo = kmalloc(sizeof(*newpoolinfo), GFP_KERNEL); | 1505 | newpoolinfo = kmalloc(sizeof(*newpoolinfo), GFP_KERNEL); |
1365 | if (!newpoolinfo) | 1506 | if (!newpoolinfo) |
@@ -1384,14 +1525,18 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks) | |||
1384 | spin_lock_irq(&conf->resync_lock); | 1525 | spin_lock_irq(&conf->resync_lock); |
1385 | conf->barrier++; | 1526 | conf->barrier++; |
1386 | wait_event_lock_irq(conf->wait_idle, !conf->nr_pending, | 1527 | wait_event_lock_irq(conf->wait_idle, !conf->nr_pending, |
1387 | conf->resync_lock, unplug_slaves(mddev)); | 1528 | conf->resync_lock, raid1_unplug(mddev->queue)); |
1388 | spin_unlock_irq(&conf->resync_lock); | 1529 | spin_unlock_irq(&conf->resync_lock); |
1389 | 1530 | ||
1390 | /* ok, everything is stopped */ | 1531 | /* ok, everything is stopped */ |
1391 | oldpool = conf->r1bio_pool; | 1532 | oldpool = conf->r1bio_pool; |
1392 | conf->r1bio_pool = newpool; | 1533 | conf->r1bio_pool = newpool; |
1393 | for (d=0; d < raid_disks && d < conf->raid_disks; d++) | 1534 | |
1394 | newmirrors[d] = conf->mirrors[d]; | 1535 | for (d=d2=0; d < conf->raid_disks; d++) |
1536 | if (conf->mirrors[d].rdev) { | ||
1537 | conf->mirrors[d].rdev->raid_disk = d2; | ||
1538 | newmirrors[d2++].rdev = conf->mirrors[d].rdev; | ||
1539 | } | ||
1395 | kfree(conf->mirrors); | 1540 | kfree(conf->mirrors); |
1396 | conf->mirrors = newmirrors; | 1541 | conf->mirrors = newmirrors; |
1397 | kfree(conf->poolinfo); | 1542 | kfree(conf->poolinfo); |
@@ -1400,6 +1545,7 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks) | |||
1400 | mddev->degraded += (raid_disks - conf->raid_disks); | 1545 | mddev->degraded += (raid_disks - conf->raid_disks); |
1401 | conf->raid_disks = mddev->raid_disks = raid_disks; | 1546 | conf->raid_disks = mddev->raid_disks = raid_disks; |
1402 | 1547 | ||
1548 | conf->last_used = 0; /* just make sure it is in-range */ | ||
1403 | spin_lock_irq(&conf->resync_lock); | 1549 | spin_lock_irq(&conf->resync_lock); |
1404 | conf->barrier--; | 1550 | conf->barrier--; |
1405 | spin_unlock_irq(&conf->resync_lock); | 1551 | spin_unlock_irq(&conf->resync_lock); |