aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid1.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid1.c')
-rw-r--r--drivers/md/raid1.c242
1 files changed, 194 insertions, 48 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 1db5de52d376..ff1dbec864af 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -12,6 +12,15 @@
12 * Fixes to reconstruction by Jakob Østergaard" <jakob@ostenfeld.dk> 12 * Fixes to reconstruction by Jakob Østergaard" <jakob@ostenfeld.dk>
13 * Various fixes by Neil Brown <neilb@cse.unsw.edu.au> 13 * Various fixes by Neil Brown <neilb@cse.unsw.edu.au>
14 * 14 *
15 * Changes by Peter T. Breuer <ptb@it.uc3m.es> 31/1/2003 to support
16 * bitmapped intelligence in resync:
17 *
18 * - bitmap marked during normal i/o
19 * - bitmap used to skip nondirty blocks during sync
20 *
21 * Additions to bitmap code, (C) 2003-2004 Paul Clements, SteelEye Technology:
22 * - persistent bitmap code
23 *
15 * This program is free software; you can redistribute it and/or modify 24 * This program is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by 25 * it under the terms of the GNU General Public License as published by
17 * the Free Software Foundation; either version 2, or (at your option) 26 * the Free Software Foundation; either version 2, or (at your option)
@@ -22,7 +31,16 @@
22 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 31 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 */ 32 */
24 33
34#include "dm-bio-list.h"
25#include <linux/raid/raid1.h> 35#include <linux/raid/raid1.h>
36#include <linux/raid/bitmap.h>
37
38#define DEBUG 0
39#if DEBUG
40#define PRINTK(x...) printk(x)
41#else
42#define PRINTK(x...)
43#endif
26 44
27/* 45/*
28 * Number of guaranteed r1bios in case of extreme VM load: 46 * Number of guaranteed r1bios in case of extreme VM load:
@@ -287,9 +305,11 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int
287 /* 305 /*
288 * this branch is our 'one mirror IO has finished' event handler: 306 * this branch is our 'one mirror IO has finished' event handler:
289 */ 307 */
290 if (!uptodate) 308 if (!uptodate) {
291 md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); 309 md_error(r1_bio->mddev, conf->mirrors[mirror].rdev);
292 else 310 /* an I/O failed, we can't clear the bitmap */
311 set_bit(R1BIO_Degraded, &r1_bio->state);
312 } else
293 /* 313 /*
294 * Set R1BIO_Uptodate in our master bio, so that 314 * Set R1BIO_Uptodate in our master bio, so that
295 * we will return a good error code for to the higher 315 * we will return a good error code for to the higher
@@ -309,6 +329,10 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int
309 * already. 329 * already.
310 */ 330 */
311 if (atomic_dec_and_test(&r1_bio->remaining)) { 331 if (atomic_dec_and_test(&r1_bio->remaining)) {
332 /* clear the bitmap if all writes complete successfully */
333 bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
334 r1_bio->sectors,
335 !test_bit(R1BIO_Degraded, &r1_bio->state));
312 md_write_end(r1_bio->mddev); 336 md_write_end(r1_bio->mddev);
313 raid_end_bio_io(r1_bio); 337 raid_end_bio_io(r1_bio);
314 } 338 }
@@ -458,7 +482,10 @@ static void unplug_slaves(mddev_t *mddev)
458 482
459static void raid1_unplug(request_queue_t *q) 483static void raid1_unplug(request_queue_t *q)
460{ 484{
461 unplug_slaves(q->queuedata); 485 mddev_t *mddev = q->queuedata;
486
487 unplug_slaves(mddev);
488 md_wakeup_thread(mddev->thread);
462} 489}
463 490
464static int raid1_issue_flush(request_queue_t *q, struct gendisk *disk, 491static int raid1_issue_flush(request_queue_t *q, struct gendisk *disk,
@@ -501,16 +528,16 @@ static void device_barrier(conf_t *conf, sector_t sect)
501{ 528{
502 spin_lock_irq(&conf->resync_lock); 529 spin_lock_irq(&conf->resync_lock);
503 wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume), 530 wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume),
504 conf->resync_lock, unplug_slaves(conf->mddev)); 531 conf->resync_lock, raid1_unplug(conf->mddev->queue));
505 532
506 if (!conf->barrier++) { 533 if (!conf->barrier++) {
507 wait_event_lock_irq(conf->wait_idle, !conf->nr_pending, 534 wait_event_lock_irq(conf->wait_idle, !conf->nr_pending,
508 conf->resync_lock, unplug_slaves(conf->mddev)); 535 conf->resync_lock, raid1_unplug(conf->mddev->queue));
509 if (conf->nr_pending) 536 if (conf->nr_pending)
510 BUG(); 537 BUG();
511 } 538 }
512 wait_event_lock_irq(conf->wait_resume, conf->barrier < RESYNC_DEPTH, 539 wait_event_lock_irq(conf->wait_resume, conf->barrier < RESYNC_DEPTH,
513 conf->resync_lock, unplug_slaves(conf->mddev)); 540 conf->resync_lock, raid1_unplug(conf->mddev->queue));
514 conf->next_resync = sect; 541 conf->next_resync = sect;
515 spin_unlock_irq(&conf->resync_lock); 542 spin_unlock_irq(&conf->resync_lock);
516} 543}
@@ -522,14 +549,20 @@ static int make_request(request_queue_t *q, struct bio * bio)
522 mirror_info_t *mirror; 549 mirror_info_t *mirror;
523 r1bio_t *r1_bio; 550 r1bio_t *r1_bio;
524 struct bio *read_bio; 551 struct bio *read_bio;
525 int i, disks; 552 int i, targets = 0, disks;
526 mdk_rdev_t *rdev; 553 mdk_rdev_t *rdev;
554 struct bitmap *bitmap = mddev->bitmap;
555 unsigned long flags;
556 struct bio_list bl;
557
527 558
528 /* 559 /*
529 * Register the new request and wait if the reconstruction 560 * Register the new request and wait if the reconstruction
530 * thread has put up a bar for new requests. 561 * thread has put up a bar for new requests.
531 * Continue immediately if no resync is active currently. 562 * Continue immediately if no resync is active currently.
532 */ 563 */
564 md_write_start(mddev, bio); /* wait on superblock update early */
565
533 spin_lock_irq(&conf->resync_lock); 566 spin_lock_irq(&conf->resync_lock);
534 wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, ); 567 wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, );
535 conf->nr_pending++; 568 conf->nr_pending++;
@@ -552,7 +585,7 @@ static int make_request(request_queue_t *q, struct bio * bio)
552 585
553 r1_bio->master_bio = bio; 586 r1_bio->master_bio = bio;
554 r1_bio->sectors = bio->bi_size >> 9; 587 r1_bio->sectors = bio->bi_size >> 9;
555 588 r1_bio->state = 0;
556 r1_bio->mddev = mddev; 589 r1_bio->mddev = mddev;
557 r1_bio->sector = bio->bi_sector; 590 r1_bio->sector = bio->bi_sector;
558 591
@@ -595,6 +628,13 @@ static int make_request(request_queue_t *q, struct bio * bio)
595 * bios[x] to bio 628 * bios[x] to bio
596 */ 629 */
597 disks = conf->raid_disks; 630 disks = conf->raid_disks;
631#if 0
632 { static int first=1;
633 if (first) printk("First Write sector %llu disks %d\n",
634 (unsigned long long)r1_bio->sector, disks);
635 first = 0;
636 }
637#endif
598 rcu_read_lock(); 638 rcu_read_lock();
599 for (i = 0; i < disks; i++) { 639 for (i = 0; i < disks; i++) {
600 if ((rdev=conf->mirrors[i].rdev) != NULL && 640 if ((rdev=conf->mirrors[i].rdev) != NULL &&
@@ -605,13 +645,21 @@ static int make_request(request_queue_t *q, struct bio * bio)
605 r1_bio->bios[i] = NULL; 645 r1_bio->bios[i] = NULL;
606 } else 646 } else
607 r1_bio->bios[i] = bio; 647 r1_bio->bios[i] = bio;
648 targets++;
608 } else 649 } else
609 r1_bio->bios[i] = NULL; 650 r1_bio->bios[i] = NULL;
610 } 651 }
611 rcu_read_unlock(); 652 rcu_read_unlock();
612 653
613 atomic_set(&r1_bio->remaining, 1); 654 if (targets < conf->raid_disks) {
614 md_write_start(mddev); 655 /* array is degraded, we will not clear the bitmap
656 * on I/O completion (see raid1_end_write_request) */
657 set_bit(R1BIO_Degraded, &r1_bio->state);
658 }
659
660 atomic_set(&r1_bio->remaining, 0);
661
662 bio_list_init(&bl);
615 for (i = 0; i < disks; i++) { 663 for (i = 0; i < disks; i++) {
616 struct bio *mbio; 664 struct bio *mbio;
617 if (!r1_bio->bios[i]) 665 if (!r1_bio->bios[i])
@@ -627,14 +675,23 @@ static int make_request(request_queue_t *q, struct bio * bio)
627 mbio->bi_private = r1_bio; 675 mbio->bi_private = r1_bio;
628 676
629 atomic_inc(&r1_bio->remaining); 677 atomic_inc(&r1_bio->remaining);
630 generic_make_request(mbio);
631 }
632 678
633 if (atomic_dec_and_test(&r1_bio->remaining)) { 679 bio_list_add(&bl, mbio);
634 md_write_end(mddev);
635 raid_end_bio_io(r1_bio);
636 } 680 }
637 681
682 bitmap_startwrite(bitmap, bio->bi_sector, r1_bio->sectors);
683 spin_lock_irqsave(&conf->device_lock, flags);
684 bio_list_merge(&conf->pending_bio_list, &bl);
685 bio_list_init(&bl);
686
687 blk_plug_device(mddev->queue);
688 spin_unlock_irqrestore(&conf->device_lock, flags);
689
690#if 0
691 while ((bio = bio_list_pop(&bl)) != NULL)
692 generic_make_request(bio);
693#endif
694
638 return 0; 695 return 0;
639} 696}
640 697
@@ -714,7 +771,7 @@ static void close_sync(conf_t *conf)
714{ 771{
715 spin_lock_irq(&conf->resync_lock); 772 spin_lock_irq(&conf->resync_lock);
716 wait_event_lock_irq(conf->wait_resume, !conf->barrier, 773 wait_event_lock_irq(conf->wait_resume, !conf->barrier,
717 conf->resync_lock, unplug_slaves(conf->mddev)); 774 conf->resync_lock, raid1_unplug(conf->mddev->queue));
718 spin_unlock_irq(&conf->resync_lock); 775 spin_unlock_irq(&conf->resync_lock);
719 776
720 if (conf->barrier) BUG(); 777 if (conf->barrier) BUG();
@@ -754,9 +811,12 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
754{ 811{
755 conf_t *conf = mddev->private; 812 conf_t *conf = mddev->private;
756 int found = 0; 813 int found = 0;
757 int mirror; 814 int mirror = 0;
758 mirror_info_t *p; 815 mirror_info_t *p;
759 816
817 if (rdev->saved_raid_disk >= 0 &&
818 conf->mirrors[rdev->saved_raid_disk].rdev == NULL)
819 mirror = rdev->saved_raid_disk;
760 for (mirror=0; mirror < mddev->raid_disks; mirror++) 820 for (mirror=0; mirror < mddev->raid_disks; mirror++)
761 if ( !(p=conf->mirrors+mirror)->rdev) { 821 if ( !(p=conf->mirrors+mirror)->rdev) {
762 822
@@ -773,6 +833,8 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
773 p->head_position = 0; 833 p->head_position = 0;
774 rdev->raid_disk = mirror; 834 rdev->raid_disk = mirror;
775 found = 1; 835 found = 1;
836 if (rdev->saved_raid_disk != mirror)
837 conf->fullsync = 1;
776 p->rdev = rdev; 838 p->rdev = rdev;
777 break; 839 break;
778 } 840 }
@@ -828,10 +890,11 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
828 * or re-read if the read failed. 890 * or re-read if the read failed.
829 * We don't do much here, just schedule handling by raid1d 891 * We don't do much here, just schedule handling by raid1d
830 */ 892 */
831 if (!uptodate) 893 if (!uptodate) {
832 md_error(r1_bio->mddev, 894 md_error(r1_bio->mddev,
833 conf->mirrors[r1_bio->read_disk].rdev); 895 conf->mirrors[r1_bio->read_disk].rdev);
834 else 896 set_bit(R1BIO_Degraded, &r1_bio->state);
897 } else
835 set_bit(R1BIO_Uptodate, &r1_bio->state); 898 set_bit(R1BIO_Uptodate, &r1_bio->state);
836 rdev_dec_pending(conf->mirrors[r1_bio->read_disk].rdev, conf->mddev); 899 rdev_dec_pending(conf->mirrors[r1_bio->read_disk].rdev, conf->mddev);
837 reschedule_retry(r1_bio); 900 reschedule_retry(r1_bio);
@@ -855,8 +918,10 @@ static int end_sync_write(struct bio *bio, unsigned int bytes_done, int error)
855 mirror = i; 918 mirror = i;
856 break; 919 break;
857 } 920 }
858 if (!uptodate) 921 if (!uptodate) {
859 md_error(mddev, conf->mirrors[mirror].rdev); 922 md_error(mddev, conf->mirrors[mirror].rdev);
923 set_bit(R1BIO_Degraded, &r1_bio->state);
924 }
860 update_head_pos(mirror, r1_bio); 925 update_head_pos(mirror, r1_bio);
861 926
862 if (atomic_dec_and_test(&r1_bio->remaining)) { 927 if (atomic_dec_and_test(&r1_bio->remaining)) {
@@ -876,6 +941,9 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
876 941
877 bio = r1_bio->bios[r1_bio->read_disk]; 942 bio = r1_bio->bios[r1_bio->read_disk];
878 943
944/*
945 if (r1_bio->sector == 0) printk("First sync write startss\n");
946*/
879 /* 947 /*
880 * schedule writes 948 * schedule writes
881 */ 949 */
@@ -903,10 +971,12 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
903 atomic_inc(&conf->mirrors[i].rdev->nr_pending); 971 atomic_inc(&conf->mirrors[i].rdev->nr_pending);
904 atomic_inc(&r1_bio->remaining); 972 atomic_inc(&r1_bio->remaining);
905 md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9); 973 md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9);
974
906 generic_make_request(wbio); 975 generic_make_request(wbio);
907 } 976 }
908 977
909 if (atomic_dec_and_test(&r1_bio->remaining)) { 978 if (atomic_dec_and_test(&r1_bio->remaining)) {
979 /* if we're here, all write(s) have completed, so clean up */
910 md_done_sync(mddev, r1_bio->sectors, 1); 980 md_done_sync(mddev, r1_bio->sectors, 1);
911 put_buf(r1_bio); 981 put_buf(r1_bio);
912 } 982 }
@@ -931,11 +1001,30 @@ static void raid1d(mddev_t *mddev)
931 mdk_rdev_t *rdev; 1001 mdk_rdev_t *rdev;
932 1002
933 md_check_recovery(mddev); 1003 md_check_recovery(mddev);
934 md_handle_safemode(mddev);
935 1004
936 for (;;) { 1005 for (;;) {
937 char b[BDEVNAME_SIZE]; 1006 char b[BDEVNAME_SIZE];
938 spin_lock_irqsave(&conf->device_lock, flags); 1007 spin_lock_irqsave(&conf->device_lock, flags);
1008
1009 if (conf->pending_bio_list.head) {
1010 bio = bio_list_get(&conf->pending_bio_list);
1011 blk_remove_plug(mddev->queue);
1012 spin_unlock_irqrestore(&conf->device_lock, flags);
1013 /* flush any pending bitmap writes to disk before proceeding w/ I/O */
1014 if (bitmap_unplug(mddev->bitmap) != 0)
1015 printk("%s: bitmap file write failed!\n", mdname(mddev));
1016
1017 while (bio) { /* submit pending writes */
1018 struct bio *next = bio->bi_next;
1019 bio->bi_next = NULL;
1020 generic_make_request(bio);
1021 bio = next;
1022 }
1023 unplug = 1;
1024
1025 continue;
1026 }
1027
939 if (list_empty(head)) 1028 if (list_empty(head))
940 break; 1029 break;
941 r1_bio = list_entry(head->prev, r1bio_t, retry_list); 1030 r1_bio = list_entry(head->prev, r1bio_t, retry_list);
@@ -1009,7 +1098,7 @@ static int init_resync(conf_t *conf)
1009 * that can be installed to exclude normal IO requests. 1098 * that can be installed to exclude normal IO requests.
1010 */ 1099 */
1011 1100
1012static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster) 1101static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster)
1013{ 1102{
1014 conf_t *conf = mddev_to_conf(mddev); 1103 conf_t *conf = mddev_to_conf(mddev);
1015 mirror_info_t *mirror; 1104 mirror_info_t *mirror;
@@ -1019,17 +1108,43 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster)
1019 int disk; 1108 int disk;
1020 int i; 1109 int i;
1021 int write_targets = 0; 1110 int write_targets = 0;
1111 int sync_blocks;
1022 1112
1023 if (!conf->r1buf_pool) 1113 if (!conf->r1buf_pool)
1114 {
1115/*
1116 printk("sync start - bitmap %p\n", mddev->bitmap);
1117*/
1024 if (init_resync(conf)) 1118 if (init_resync(conf))
1025 return -ENOMEM; 1119 return 0;
1120 }
1026 1121
1027 max_sector = mddev->size << 1; 1122 max_sector = mddev->size << 1;
1028 if (sector_nr >= max_sector) { 1123 if (sector_nr >= max_sector) {
1124 /* If we aborted, we need to abort the
1125 * sync on the 'current' bitmap chunk (there will
1126 * only be one in raid1 resync.
1127 * We can find the current addess in mddev->curr_resync
1128 */
1129 if (!conf->fullsync) {
1130 if (mddev->curr_resync < max_sector)
1131 bitmap_end_sync(mddev->bitmap,
1132 mddev->curr_resync,
1133 &sync_blocks, 1);
1134 bitmap_close_sync(mddev->bitmap);
1135 }
1136 if (mddev->curr_resync >= max_sector)
1137 conf->fullsync = 0;
1029 close_sync(conf); 1138 close_sync(conf);
1030 return 0; 1139 return 0;
1031 } 1140 }
1032 1141
1142 if (!conf->fullsync &&
1143 !bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks)) {
1144 /* We can skip this block, and probably several more */
1145 *skipped = 1;
1146 return sync_blocks;
1147 }
1033 /* 1148 /*
1034 * If there is non-resync activity waiting for us then 1149 * If there is non-resync activity waiting for us then
1035 * put in a delay to throttle resync. 1150 * put in a delay to throttle resync.
@@ -1068,6 +1183,7 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster)
1068 1183
1069 r1_bio->mddev = mddev; 1184 r1_bio->mddev = mddev;
1070 r1_bio->sector = sector_nr; 1185 r1_bio->sector = sector_nr;
1186 r1_bio->state = 0;
1071 set_bit(R1BIO_IsSync, &r1_bio->state); 1187 set_bit(R1BIO_IsSync, &r1_bio->state);
1072 r1_bio->read_disk = disk; 1188 r1_bio->read_disk = disk;
1073 1189
@@ -1102,18 +1218,24 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster)
1102 bio->bi_bdev = conf->mirrors[i].rdev->bdev; 1218 bio->bi_bdev = conf->mirrors[i].rdev->bdev;
1103 bio->bi_private = r1_bio; 1219 bio->bi_private = r1_bio;
1104 } 1220 }
1221
1222 if (write_targets + 1 < conf->raid_disks)
1223 /* array degraded, can't clear bitmap */
1224 set_bit(R1BIO_Degraded, &r1_bio->state);
1225
1105 if (write_targets == 0) { 1226 if (write_targets == 0) {
1106 /* There is nowhere to write, so all non-sync 1227 /* There is nowhere to write, so all non-sync
1107 * drives must be failed - so we are finished 1228 * drives must be failed - so we are finished
1108 */ 1229 */
1109 int rv = max_sector - sector_nr; 1230 sector_t rv = max_sector - sector_nr;
1110 md_done_sync(mddev, rv, 1); 1231 *skipped = 1;
1111 put_buf(r1_bio); 1232 put_buf(r1_bio);
1112 rdev_dec_pending(conf->mirrors[disk].rdev, mddev); 1233 rdev_dec_pending(conf->mirrors[disk].rdev, mddev);
1113 return rv; 1234 return rv;
1114 } 1235 }
1115 1236
1116 nr_sectors = 0; 1237 nr_sectors = 0;
1238 sync_blocks = 0;
1117 do { 1239 do {
1118 struct page *page; 1240 struct page *page;
1119 int len = PAGE_SIZE; 1241 int len = PAGE_SIZE;
@@ -1121,6 +1243,17 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster)
1121 len = (max_sector - sector_nr) << 9; 1243 len = (max_sector - sector_nr) << 9;
1122 if (len == 0) 1244 if (len == 0)
1123 break; 1245 break;
1246 if (!conf->fullsync) {
1247 if (sync_blocks == 0) {
1248 if (!bitmap_start_sync(mddev->bitmap,
1249 sector_nr, &sync_blocks))
1250 break;
1251 if (sync_blocks < (PAGE_SIZE>>9))
1252 BUG();
1253 if (len > (sync_blocks<<9)) len = sync_blocks<<9;
1254 }
1255 }
1256
1124 for (i=0 ; i < conf->raid_disks; i++) { 1257 for (i=0 ; i < conf->raid_disks; i++) {
1125 bio = r1_bio->bios[i]; 1258 bio = r1_bio->bios[i];
1126 if (bio->bi_end_io) { 1259 if (bio->bi_end_io) {
@@ -1143,6 +1276,7 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster)
1143 } 1276 }
1144 nr_sectors += len>>9; 1277 nr_sectors += len>>9;
1145 sector_nr += len>>9; 1278 sector_nr += len>>9;
1279 sync_blocks -= (len>>9);
1146 } while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES); 1280 } while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES);
1147 bio_full: 1281 bio_full:
1148 bio = r1_bio->bios[disk]; 1282 bio = r1_bio->bios[disk];
@@ -1231,6 +1365,9 @@ static int run(mddev_t *mddev)
1231 init_waitqueue_head(&conf->wait_idle); 1365 init_waitqueue_head(&conf->wait_idle);
1232 init_waitqueue_head(&conf->wait_resume); 1366 init_waitqueue_head(&conf->wait_resume);
1233 1367
1368 bio_list_init(&conf->pending_bio_list);
1369 bio_list_init(&conf->flushing_bio_list);
1370
1234 if (!conf->working_disks) { 1371 if (!conf->working_disks) {
1235 printk(KERN_ERR "raid1: no operational mirrors for %s\n", 1372 printk(KERN_ERR "raid1: no operational mirrors for %s\n",
1236 mdname(mddev)); 1373 mdname(mddev));
@@ -1259,16 +1396,15 @@ static int run(mddev_t *mddev)
1259 conf->last_used = j; 1396 conf->last_used = j;
1260 1397
1261 1398
1262 1399 mddev->thread = md_register_thread(raid1d, mddev, "%s_raid1");
1263 { 1400 if (!mddev->thread) {
1264 mddev->thread = md_register_thread(raid1d, mddev, "%s_raid1"); 1401 printk(KERN_ERR
1265 if (!mddev->thread) { 1402 "raid1: couldn't allocate thread for %s\n",
1266 printk(KERN_ERR 1403 mdname(mddev));
1267 "raid1: couldn't allocate thread for %s\n", 1404 goto out_free_conf;
1268 mdname(mddev));
1269 goto out_free_conf;
1270 }
1271 } 1405 }
1406 if (mddev->bitmap) mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ;
1407
1272 printk(KERN_INFO 1408 printk(KERN_INFO
1273 "raid1: raid set %s active with %d out of %d mirrors\n", 1409 "raid1: raid set %s active with %d out of %d mirrors\n",
1274 mdname(mddev), mddev->raid_disks - mddev->degraded, 1410 mdname(mddev), mddev->raid_disks - mddev->degraded,
@@ -1291,10 +1427,8 @@ out_free_conf:
1291 if (conf) { 1427 if (conf) {
1292 if (conf->r1bio_pool) 1428 if (conf->r1bio_pool)
1293 mempool_destroy(conf->r1bio_pool); 1429 mempool_destroy(conf->r1bio_pool);
1294 if (conf->mirrors) 1430 kfree(conf->mirrors);
1295 kfree(conf->mirrors); 1431 kfree(conf->poolinfo);
1296 if (conf->poolinfo)
1297 kfree(conf->poolinfo);
1298 kfree(conf); 1432 kfree(conf);
1299 mddev->private = NULL; 1433 mddev->private = NULL;
1300 } 1434 }
@@ -1311,10 +1445,8 @@ static int stop(mddev_t *mddev)
1311 blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ 1445 blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
1312 if (conf->r1bio_pool) 1446 if (conf->r1bio_pool)
1313 mempool_destroy(conf->r1bio_pool); 1447 mempool_destroy(conf->r1bio_pool);
1314 if (conf->mirrors) 1448 kfree(conf->mirrors);
1315 kfree(conf->mirrors); 1449 kfree(conf->poolinfo);
1316 if (conf->poolinfo)
1317 kfree(conf->poolinfo);
1318 kfree(conf); 1450 kfree(conf);
1319 mddev->private = NULL; 1451 mddev->private = NULL;
1320 return 0; 1452 return 0;
@@ -1349,17 +1481,26 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks)
1349 * We allocate a new r1bio_pool if we can. 1481 * We allocate a new r1bio_pool if we can.
1350 * Then raise a device barrier and wait until all IO stops. 1482 * Then raise a device barrier and wait until all IO stops.
1351 * Then resize conf->mirrors and swap in the new r1bio pool. 1483 * Then resize conf->mirrors and swap in the new r1bio pool.
1484 *
1485 * At the same time, we "pack" the devices so that all the missing
1486 * devices have the higher raid_disk numbers.
1352 */ 1487 */
1353 mempool_t *newpool, *oldpool; 1488 mempool_t *newpool, *oldpool;
1354 struct pool_info *newpoolinfo; 1489 struct pool_info *newpoolinfo;
1355 mirror_info_t *newmirrors; 1490 mirror_info_t *newmirrors;
1356 conf_t *conf = mddev_to_conf(mddev); 1491 conf_t *conf = mddev_to_conf(mddev);
1492 int cnt;
1357 1493
1358 int d; 1494 int d, d2;
1359 1495
1360 for (d= raid_disks; d < conf->raid_disks; d++) 1496 if (raid_disks < conf->raid_disks) {
1361 if (conf->mirrors[d].rdev) 1497 cnt=0;
1498 for (d= 0; d < conf->raid_disks; d++)
1499 if (conf->mirrors[d].rdev)
1500 cnt++;
1501 if (cnt > raid_disks)
1362 return -EBUSY; 1502 return -EBUSY;
1503 }
1363 1504
1364 newpoolinfo = kmalloc(sizeof(*newpoolinfo), GFP_KERNEL); 1505 newpoolinfo = kmalloc(sizeof(*newpoolinfo), GFP_KERNEL);
1365 if (!newpoolinfo) 1506 if (!newpoolinfo)
@@ -1384,14 +1525,18 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks)
1384 spin_lock_irq(&conf->resync_lock); 1525 spin_lock_irq(&conf->resync_lock);
1385 conf->barrier++; 1526 conf->barrier++;
1386 wait_event_lock_irq(conf->wait_idle, !conf->nr_pending, 1527 wait_event_lock_irq(conf->wait_idle, !conf->nr_pending,
1387 conf->resync_lock, unplug_slaves(mddev)); 1528 conf->resync_lock, raid1_unplug(mddev->queue));
1388 spin_unlock_irq(&conf->resync_lock); 1529 spin_unlock_irq(&conf->resync_lock);
1389 1530
1390 /* ok, everything is stopped */ 1531 /* ok, everything is stopped */
1391 oldpool = conf->r1bio_pool; 1532 oldpool = conf->r1bio_pool;
1392 conf->r1bio_pool = newpool; 1533 conf->r1bio_pool = newpool;
1393 for (d=0; d < raid_disks && d < conf->raid_disks; d++) 1534
1394 newmirrors[d] = conf->mirrors[d]; 1535 for (d=d2=0; d < conf->raid_disks; d++)
1536 if (conf->mirrors[d].rdev) {
1537 conf->mirrors[d].rdev->raid_disk = d2;
1538 newmirrors[d2++].rdev = conf->mirrors[d].rdev;
1539 }
1395 kfree(conf->mirrors); 1540 kfree(conf->mirrors);
1396 conf->mirrors = newmirrors; 1541 conf->mirrors = newmirrors;
1397 kfree(conf->poolinfo); 1542 kfree(conf->poolinfo);
@@ -1400,6 +1545,7 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks)
1400 mddev->degraded += (raid_disks - conf->raid_disks); 1545 mddev->degraded += (raid_disks - conf->raid_disks);
1401 conf->raid_disks = mddev->raid_disks = raid_disks; 1546 conf->raid_disks = mddev->raid_disks = raid_disks;
1402 1547
1548 conf->last_used = 0; /* just make sure it is in-range */
1403 spin_lock_irq(&conf->resync_lock); 1549 spin_lock_irq(&conf->resync_lock);
1404 conf->barrier--; 1550 conf->barrier--;
1405 spin_unlock_irq(&conf->resync_lock); 1551 spin_unlock_irq(&conf->resync_lock);