aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/md.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c525
1 files changed, 393 insertions, 132 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index d899204d3743..0c6b5b6baff6 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -19,6 +19,9 @@
19 19
20 Neil Brown <neilb@cse.unsw.edu.au>. 20 Neil Brown <neilb@cse.unsw.edu.au>.
21 21
22 - persistent bitmap code
23 Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.
24
22 This program is free software; you can redistribute it and/or modify 25 This program is free software; you can redistribute it and/or modify
23 it under the terms of the GNU General Public License as published by 26 it under the terms of the GNU General Public License as published by
24 the Free Software Foundation; either version 2, or (at your option) 27 the Free Software Foundation; either version 2, or (at your option)
@@ -33,6 +36,7 @@
33#include <linux/config.h> 36#include <linux/config.h>
34#include <linux/linkage.h> 37#include <linux/linkage.h>
35#include <linux/raid/md.h> 38#include <linux/raid/md.h>
39#include <linux/raid/bitmap.h>
36#include <linux/sysctl.h> 40#include <linux/sysctl.h>
37#include <linux/devfs_fs_kernel.h> 41#include <linux/devfs_fs_kernel.h>
38#include <linux/buffer_head.h> /* for invalidate_bdev */ 42#include <linux/buffer_head.h> /* for invalidate_bdev */
@@ -40,6 +44,8 @@
40 44
41#include <linux/init.h> 45#include <linux/init.h>
42 46
47#include <linux/file.h>
48
43#ifdef CONFIG_KMOD 49#ifdef CONFIG_KMOD
44#include <linux/kmod.h> 50#include <linux/kmod.h>
45#endif 51#endif
@@ -189,8 +195,7 @@ static mddev_t * mddev_find(dev_t unit)
189 if (mddev->unit == unit) { 195 if (mddev->unit == unit) {
190 mddev_get(mddev); 196 mddev_get(mddev);
191 spin_unlock(&all_mddevs_lock); 197 spin_unlock(&all_mddevs_lock);
192 if (new) 198 kfree(new);
193 kfree(new);
194 return mddev; 199 return mddev;
195 } 200 }
196 201
@@ -218,6 +223,8 @@ static mddev_t * mddev_find(dev_t unit)
218 INIT_LIST_HEAD(&new->all_mddevs); 223 INIT_LIST_HEAD(&new->all_mddevs);
219 init_timer(&new->safemode_timer); 224 init_timer(&new->safemode_timer);
220 atomic_set(&new->active, 1); 225 atomic_set(&new->active, 1);
226 spin_lock_init(&new->write_lock);
227 init_waitqueue_head(&new->sb_wait);
221 228
222 new->queue = blk_alloc_queue(GFP_KERNEL); 229 new->queue = blk_alloc_queue(GFP_KERNEL);
223 if (!new->queue) { 230 if (!new->queue) {
@@ -320,6 +327,40 @@ static void free_disk_sb(mdk_rdev_t * rdev)
320} 327}
321 328
322 329
330static int super_written(struct bio *bio, unsigned int bytes_done, int error)
331{
332 mdk_rdev_t *rdev = bio->bi_private;
333 if (bio->bi_size)
334 return 1;
335
336 if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags))
337 md_error(rdev->mddev, rdev);
338
339 if (atomic_dec_and_test(&rdev->mddev->pending_writes))
340 wake_up(&rdev->mddev->sb_wait);
341 return 0;
342}
343
344void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
345 sector_t sector, int size, struct page *page)
346{
347 /* write first size bytes of page to sector of rdev
348 * Increment mddev->pending_writes before returning
349 * and decrement it on completion, waking up sb_wait
350 * if zero is reached.
351 * If an error occurred, call md_error
352 */
353 struct bio *bio = bio_alloc(GFP_NOIO, 1);
354
355 bio->bi_bdev = rdev->bdev;
356 bio->bi_sector = sector;
357 bio_add_page(bio, page, size, 0);
358 bio->bi_private = rdev;
359 bio->bi_end_io = super_written;
360 atomic_inc(&mddev->pending_writes);
361 submit_bio((1<<BIO_RW)|(1<<BIO_RW_SYNC), bio);
362}
363
323static int bi_complete(struct bio *bio, unsigned int bytes_done, int error) 364static int bi_complete(struct bio *bio, unsigned int bytes_done, int error)
324{ 365{
325 if (bio->bi_size) 366 if (bio->bi_size)
@@ -329,7 +370,7 @@ static int bi_complete(struct bio *bio, unsigned int bytes_done, int error)
329 return 0; 370 return 0;
330} 371}
331 372
332static int sync_page_io(struct block_device *bdev, sector_t sector, int size, 373int sync_page_io(struct block_device *bdev, sector_t sector, int size,
333 struct page *page, int rw) 374 struct page *page, int rw)
334{ 375{
335 struct bio *bio = bio_alloc(GFP_NOIO, 1); 376 struct bio *bio = bio_alloc(GFP_NOIO, 1);
@@ -416,11 +457,8 @@ static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
416 ret = 1; 457 ret = 1;
417 458
418abort: 459abort:
419 if (tmp1) 460 kfree(tmp1);
420 kfree(tmp1); 461 kfree(tmp2);
421 if (tmp2)
422 kfree(tmp2);
423
424 return ret; 462 return ret;
425} 463}
426 464
@@ -569,6 +607,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
569 mdp_disk_t *desc; 607 mdp_disk_t *desc;
570 mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page); 608 mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page);
571 609
610 rdev->raid_disk = -1;
611 rdev->in_sync = 0;
572 if (mddev->raid_disks == 0) { 612 if (mddev->raid_disks == 0) {
573 mddev->major_version = 0; 613 mddev->major_version = 0;
574 mddev->minor_version = sb->minor_version; 614 mddev->minor_version = sb->minor_version;
@@ -599,16 +639,35 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
599 memcpy(mddev->uuid+12,&sb->set_uuid3, 4); 639 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
600 640
601 mddev->max_disks = MD_SB_DISKS; 641 mddev->max_disks = MD_SB_DISKS;
602 } else { 642
603 __u64 ev1; 643 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
604 ev1 = md_event(sb); 644 mddev->bitmap_file == NULL) {
645 if (mddev->level != 1) {
646 /* FIXME use a better test */
647 printk(KERN_WARNING "md: bitmaps only support for raid1\n");
648 return -EINVAL;
649 }
650 mddev->bitmap_offset = (MD_SB_BYTES >> 9);
651 }
652
653 } else if (mddev->pers == NULL) {
654 /* Insist on good event counter while assembling */
655 __u64 ev1 = md_event(sb);
605 ++ev1; 656 ++ev1;
606 if (ev1 < mddev->events) 657 if (ev1 < mddev->events)
607 return -EINVAL; 658 return -EINVAL;
608 } 659 } else if (mddev->bitmap) {
660 /* if adding to array with a bitmap, then we can accept an
661 * older device ... but not too old.
662 */
663 __u64 ev1 = md_event(sb);
664 if (ev1 < mddev->bitmap->events_cleared)
665 return 0;
666 } else /* just a hot-add of a new device, leave raid_disk at -1 */
667 return 0;
668
609 if (mddev->level != LEVEL_MULTIPATH) { 669 if (mddev->level != LEVEL_MULTIPATH) {
610 rdev->raid_disk = -1; 670 rdev->faulty = 0;
611 rdev->in_sync = rdev->faulty = 0;
612 desc = sb->disks + rdev->desc_nr; 671 desc = sb->disks + rdev->desc_nr;
613 672
614 if (desc->state & (1<<MD_DISK_FAULTY)) 673 if (desc->state & (1<<MD_DISK_FAULTY))
@@ -618,7 +677,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
618 rdev->in_sync = 1; 677 rdev->in_sync = 1;
619 rdev->raid_disk = desc->raid_disk; 678 rdev->raid_disk = desc->raid_disk;
620 } 679 }
621 } 680 } else /* MULTIPATH are always insync */
681 rdev->in_sync = 1;
622 return 0; 682 return 0;
623} 683}
624 684
@@ -683,6 +743,9 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
683 sb->layout = mddev->layout; 743 sb->layout = mddev->layout;
684 sb->chunk_size = mddev->chunk_size; 744 sb->chunk_size = mddev->chunk_size;
685 745
746 if (mddev->bitmap && mddev->bitmap_file == NULL)
747 sb->state |= (1<<MD_SB_BITMAP_PRESENT);
748
686 sb->disks[0].state = (1<<MD_DISK_REMOVED); 749 sb->disks[0].state = (1<<MD_DISK_REMOVED);
687 ITERATE_RDEV(mddev,rdev2,tmp) { 750 ITERATE_RDEV(mddev,rdev2,tmp) {
688 mdp_disk_t *d; 751 mdp_disk_t *d;
@@ -780,7 +843,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
780 case 0: 843 case 0:
781 sb_offset = rdev->bdev->bd_inode->i_size >> 9; 844 sb_offset = rdev->bdev->bd_inode->i_size >> 9;
782 sb_offset -= 8*2; 845 sb_offset -= 8*2;
783 sb_offset &= ~(4*2-1); 846 sb_offset &= ~(sector_t)(4*2-1);
784 /* convert from sectors to K */ 847 /* convert from sectors to K */
785 sb_offset /= 2; 848 sb_offset /= 2;
786 break; 849 break;
@@ -860,6 +923,8 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
860{ 923{
861 struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page); 924 struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
862 925
926 rdev->raid_disk = -1;
927 rdev->in_sync = 0;
863 if (mddev->raid_disks == 0) { 928 if (mddev->raid_disks == 0) {
864 mddev->major_version = 1; 929 mddev->major_version = 1;
865 mddev->patch_version = 0; 930 mddev->patch_version = 0;
@@ -877,13 +942,30 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
877 memcpy(mddev->uuid, sb->set_uuid, 16); 942 memcpy(mddev->uuid, sb->set_uuid, 16);
878 943
879 mddev->max_disks = (4096-256)/2; 944 mddev->max_disks = (4096-256)/2;
880 } else { 945
881 __u64 ev1; 946 if ((le32_to_cpu(sb->feature_map) & 1) &&
882 ev1 = le64_to_cpu(sb->events); 947 mddev->bitmap_file == NULL ) {
948 if (mddev->level != 1) {
949 printk(KERN_WARNING "md: bitmaps only supported for raid1\n");
950 return -EINVAL;
951 }
952 mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset);
953 }
954 } else if (mddev->pers == NULL) {
955 /* Insist of good event counter while assembling */
956 __u64 ev1 = le64_to_cpu(sb->events);
883 ++ev1; 957 ++ev1;
884 if (ev1 < mddev->events) 958 if (ev1 < mddev->events)
885 return -EINVAL; 959 return -EINVAL;
886 } 960 } else if (mddev->bitmap) {
961 /* If adding to array with a bitmap, then we can accept an
962 * older device, but not too old.
963 */
964 __u64 ev1 = le64_to_cpu(sb->events);
965 if (ev1 < mddev->bitmap->events_cleared)
966 return 0;
967 } else /* just a hot-add of a new device, leave raid_disk at -1 */
968 return 0;
887 969
888 if (mddev->level != LEVEL_MULTIPATH) { 970 if (mddev->level != LEVEL_MULTIPATH) {
889 int role; 971 int role;
@@ -891,14 +973,10 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
891 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); 973 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
892 switch(role) { 974 switch(role) {
893 case 0xffff: /* spare */ 975 case 0xffff: /* spare */
894 rdev->in_sync = 0;
895 rdev->faulty = 0; 976 rdev->faulty = 0;
896 rdev->raid_disk = -1;
897 break; 977 break;
898 case 0xfffe: /* faulty */ 978 case 0xfffe: /* faulty */
899 rdev->in_sync = 0;
900 rdev->faulty = 1; 979 rdev->faulty = 1;
901 rdev->raid_disk = -1;
902 break; 980 break;
903 default: 981 default:
904 rdev->in_sync = 1; 982 rdev->in_sync = 1;
@@ -906,7 +984,9 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
906 rdev->raid_disk = role; 984 rdev->raid_disk = role;
907 break; 985 break;
908 } 986 }
909 } 987 } else /* MULTIPATH are always insync */
988 rdev->in_sync = 1;
989
910 return 0; 990 return 0;
911} 991}
912 992
@@ -933,6 +1013,11 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
933 else 1013 else
934 sb->resync_offset = cpu_to_le64(0); 1014 sb->resync_offset = cpu_to_le64(0);
935 1015
1016 if (mddev->bitmap && mddev->bitmap_file == NULL) {
1017 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
1018 sb->feature_map = cpu_to_le32(1);
1019 }
1020
936 max_dev = 0; 1021 max_dev = 0;
937 ITERATE_RDEV(mddev,rdev2,tmp) 1022 ITERATE_RDEV(mddev,rdev2,tmp)
938 if (rdev2->desc_nr+1 > max_dev) 1023 if (rdev2->desc_nr+1 > max_dev)
@@ -1196,8 +1281,11 @@ void md_print_devices(void)
1196 printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n"); 1281 printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n");
1197 printk("md: **********************************\n"); 1282 printk("md: **********************************\n");
1198 ITERATE_MDDEV(mddev,tmp) { 1283 ITERATE_MDDEV(mddev,tmp) {
1199 printk("%s: ", mdname(mddev));
1200 1284
1285 if (mddev->bitmap)
1286 bitmap_print_sb(mddev->bitmap);
1287 else
1288 printk("%s: ", mdname(mddev));
1201 ITERATE_RDEV(mddev,rdev,tmp2) 1289 ITERATE_RDEV(mddev,rdev,tmp2)
1202 printk("<%s>", bdevname(rdev->bdev,b)); 1290 printk("<%s>", bdevname(rdev->bdev,b));
1203 printk("\n"); 1291 printk("\n");
@@ -1210,30 +1298,6 @@ void md_print_devices(void)
1210} 1298}
1211 1299
1212 1300
1213static int write_disk_sb(mdk_rdev_t * rdev)
1214{
1215 char b[BDEVNAME_SIZE];
1216 if (!rdev->sb_loaded) {
1217 MD_BUG();
1218 return 1;
1219 }
1220 if (rdev->faulty) {
1221 MD_BUG();
1222 return 1;
1223 }
1224
1225 dprintk(KERN_INFO "(write) %s's sb offset: %llu\n",
1226 bdevname(rdev->bdev,b),
1227 (unsigned long long)rdev->sb_offset);
1228
1229 if (sync_page_io(rdev->bdev, rdev->sb_offset<<1, MD_SB_BYTES, rdev->sb_page, WRITE))
1230 return 0;
1231
1232 printk("md: write_disk_sb failed for device %s\n",
1233 bdevname(rdev->bdev,b));
1234 return 1;
1235}
1236
1237static void sync_sbs(mddev_t * mddev) 1301static void sync_sbs(mddev_t * mddev)
1238{ 1302{
1239 mdk_rdev_t *rdev; 1303 mdk_rdev_t *rdev;
@@ -1248,12 +1312,14 @@ static void sync_sbs(mddev_t * mddev)
1248 1312
1249static void md_update_sb(mddev_t * mddev) 1313static void md_update_sb(mddev_t * mddev)
1250{ 1314{
1251 int err, count = 100; 1315 int err;
1252 struct list_head *tmp; 1316 struct list_head *tmp;
1253 mdk_rdev_t *rdev; 1317 mdk_rdev_t *rdev;
1318 int sync_req;
1254 1319
1255 mddev->sb_dirty = 0;
1256repeat: 1320repeat:
1321 spin_lock(&mddev->write_lock);
1322 sync_req = mddev->in_sync;
1257 mddev->utime = get_seconds(); 1323 mddev->utime = get_seconds();
1258 mddev->events ++; 1324 mddev->events ++;
1259 1325
@@ -1266,20 +1332,26 @@ repeat:
1266 MD_BUG(); 1332 MD_BUG();
1267 mddev->events --; 1333 mddev->events --;
1268 } 1334 }
1335 mddev->sb_dirty = 2;
1269 sync_sbs(mddev); 1336 sync_sbs(mddev);
1270 1337
1271 /* 1338 /*
1272 * do not write anything to disk if using 1339 * do not write anything to disk if using
1273 * nonpersistent superblocks 1340 * nonpersistent superblocks
1274 */ 1341 */
1275 if (!mddev->persistent) 1342 if (!mddev->persistent) {
1343 mddev->sb_dirty = 0;
1344 spin_unlock(&mddev->write_lock);
1345 wake_up(&mddev->sb_wait);
1276 return; 1346 return;
1347 }
1348 spin_unlock(&mddev->write_lock);
1277 1349
1278 dprintk(KERN_INFO 1350 dprintk(KERN_INFO
1279 "md: updating %s RAID superblock on device (in sync %d)\n", 1351 "md: updating %s RAID superblock on device (in sync %d)\n",
1280 mdname(mddev),mddev->in_sync); 1352 mdname(mddev),mddev->in_sync);
1281 1353
1282 err = 0; 1354 err = bitmap_update_sb(mddev->bitmap);
1283 ITERATE_RDEV(mddev,rdev,tmp) { 1355 ITERATE_RDEV(mddev,rdev,tmp) {
1284 char b[BDEVNAME_SIZE]; 1356 char b[BDEVNAME_SIZE];
1285 dprintk(KERN_INFO "md: "); 1357 dprintk(KERN_INFO "md: ");
@@ -1288,22 +1360,32 @@ repeat:
1288 1360
1289 dprintk("%s ", bdevname(rdev->bdev,b)); 1361 dprintk("%s ", bdevname(rdev->bdev,b));
1290 if (!rdev->faulty) { 1362 if (!rdev->faulty) {
1291 err += write_disk_sb(rdev); 1363 md_super_write(mddev,rdev,
1364 rdev->sb_offset<<1, MD_SB_BYTES,
1365 rdev->sb_page);
1366 dprintk(KERN_INFO "(write) %s's sb offset: %llu\n",
1367 bdevname(rdev->bdev,b),
1368 (unsigned long long)rdev->sb_offset);
1369
1292 } else 1370 } else
1293 dprintk(")\n"); 1371 dprintk(")\n");
1294 if (!err && mddev->level == LEVEL_MULTIPATH) 1372 if (mddev->level == LEVEL_MULTIPATH)
1295 /* only need to write one superblock... */ 1373 /* only need to write one superblock... */
1296 break; 1374 break;
1297 } 1375 }
1298 if (err) { 1376 wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
1299 if (--count) { 1377 /* if there was a failure, sb_dirty was set to 1, and we re-write super */
1300 printk(KERN_ERR "md: errors occurred during superblock" 1378
1301 " update, repeating\n"); 1379 spin_lock(&mddev->write_lock);
1302 goto repeat; 1380 if (mddev->in_sync != sync_req|| mddev->sb_dirty == 1) {
1303 } 1381 /* have to write it out again */
1304 printk(KERN_ERR \ 1382 spin_unlock(&mddev->write_lock);
1305 "md: excessive errors occurred during superblock update, exiting\n"); 1383 goto repeat;
1306 } 1384 }
1385 mddev->sb_dirty = 0;
1386 spin_unlock(&mddev->write_lock);
1387 wake_up(&mddev->sb_wait);
1388
1307} 1389}
1308 1390
1309/* 1391/*
@@ -1607,12 +1689,19 @@ static int do_md_run(mddev_t * mddev)
1607 1689
1608 mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ 1690 mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */
1609 1691
1610 err = mddev->pers->run(mddev); 1692 /* before we start the array running, initialise the bitmap */
1693 err = bitmap_create(mddev);
1694 if (err)
1695 printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
1696 mdname(mddev), err);
1697 else
1698 err = mddev->pers->run(mddev);
1611 if (err) { 1699 if (err) {
1612 printk(KERN_ERR "md: pers->run() failed ...\n"); 1700 printk(KERN_ERR "md: pers->run() failed ...\n");
1613 module_put(mddev->pers->owner); 1701 module_put(mddev->pers->owner);
1614 mddev->pers = NULL; 1702 mddev->pers = NULL;
1615 return -EINVAL; 1703 bitmap_destroy(mddev);
1704 return err;
1616 } 1705 }
1617 atomic_set(&mddev->writes_pending,0); 1706 atomic_set(&mddev->writes_pending,0);
1618 mddev->safemode = 0; 1707 mddev->safemode = 0;
@@ -1725,6 +1814,14 @@ static int do_md_stop(mddev_t * mddev, int ro)
1725 if (ro) 1814 if (ro)
1726 set_disk_ro(disk, 1); 1815 set_disk_ro(disk, 1);
1727 } 1816 }
1817
1818 bitmap_destroy(mddev);
1819 if (mddev->bitmap_file) {
1820 atomic_set(&mddev->bitmap_file->f_dentry->d_inode->i_writecount, 1);
1821 fput(mddev->bitmap_file);
1822 mddev->bitmap_file = NULL;
1823 }
1824
1728 /* 1825 /*
1729 * Free resources if final stop 1826 * Free resources if final stop
1730 */ 1827 */
@@ -1983,6 +2080,42 @@ static int get_array_info(mddev_t * mddev, void __user * arg)
1983 return 0; 2080 return 0;
1984} 2081}
1985 2082
2083static int get_bitmap_file(mddev_t * mddev, void * arg)
2084{
2085 mdu_bitmap_file_t *file = NULL; /* too big for stack allocation */
2086 char *ptr, *buf = NULL;
2087 int err = -ENOMEM;
2088
2089 file = kmalloc(sizeof(*file), GFP_KERNEL);
2090 if (!file)
2091 goto out;
2092
2093 /* bitmap disabled, zero the first byte and copy out */
2094 if (!mddev->bitmap || !mddev->bitmap->file) {
2095 file->pathname[0] = '\0';
2096 goto copy_out;
2097 }
2098
2099 buf = kmalloc(sizeof(file->pathname), GFP_KERNEL);
2100 if (!buf)
2101 goto out;
2102
2103 ptr = file_path(mddev->bitmap->file, buf, sizeof(file->pathname));
2104 if (!ptr)
2105 goto out;
2106
2107 strcpy(file->pathname, ptr);
2108
2109copy_out:
2110 err = 0;
2111 if (copy_to_user(arg, file, sizeof(*file)))
2112 err = -EFAULT;
2113out:
2114 kfree(buf);
2115 kfree(file);
2116 return err;
2117}
2118
1986static int get_disk_info(mddev_t * mddev, void __user * arg) 2119static int get_disk_info(mddev_t * mddev, void __user * arg)
1987{ 2120{
1988 mdu_disk_info_t info; 2121 mdu_disk_info_t info;
@@ -2078,11 +2211,25 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
2078 PTR_ERR(rdev)); 2211 PTR_ERR(rdev));
2079 return PTR_ERR(rdev); 2212 return PTR_ERR(rdev);
2080 } 2213 }
2214 /* set save_raid_disk if appropriate */
2215 if (!mddev->persistent) {
2216 if (info->state & (1<<MD_DISK_SYNC) &&
2217 info->raid_disk < mddev->raid_disks)
2218 rdev->raid_disk = info->raid_disk;
2219 else
2220 rdev->raid_disk = -1;
2221 } else
2222 super_types[mddev->major_version].
2223 validate_super(mddev, rdev);
2224 rdev->saved_raid_disk = rdev->raid_disk;
2225
2081 rdev->in_sync = 0; /* just to be sure */ 2226 rdev->in_sync = 0; /* just to be sure */
2082 rdev->raid_disk = -1; 2227 rdev->raid_disk = -1;
2083 err = bind_rdev_to_array(rdev, mddev); 2228 err = bind_rdev_to_array(rdev, mddev);
2084 if (err) 2229 if (err)
2085 export_rdev(rdev); 2230 export_rdev(rdev);
2231
2232 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2086 if (mddev->thread) 2233 if (mddev->thread)
2087 md_wakeup_thread(mddev->thread); 2234 md_wakeup_thread(mddev->thread);
2088 return err; 2235 return err;
@@ -2256,6 +2403,49 @@ abort_export:
2256 return err; 2403 return err;
2257} 2404}
2258 2405
2406/* similar to deny_write_access, but accounts for our holding a reference
2407 * to the file ourselves */
2408static int deny_bitmap_write_access(struct file * file)
2409{
2410 struct inode *inode = file->f_mapping->host;
2411
2412 spin_lock(&inode->i_lock);
2413 if (atomic_read(&inode->i_writecount) > 1) {
2414 spin_unlock(&inode->i_lock);
2415 return -ETXTBSY;
2416 }
2417 atomic_set(&inode->i_writecount, -1);
2418 spin_unlock(&inode->i_lock);
2419
2420 return 0;
2421}
2422
2423static int set_bitmap_file(mddev_t *mddev, int fd)
2424{
2425 int err;
2426
2427 if (mddev->pers)
2428 return -EBUSY;
2429
2430 mddev->bitmap_file = fget(fd);
2431
2432 if (mddev->bitmap_file == NULL) {
2433 printk(KERN_ERR "%s: error: failed to get bitmap file\n",
2434 mdname(mddev));
2435 return -EBADF;
2436 }
2437
2438 err = deny_bitmap_write_access(mddev->bitmap_file);
2439 if (err) {
2440 printk(KERN_ERR "%s: error: bitmap file is already in use\n",
2441 mdname(mddev));
2442 fput(mddev->bitmap_file);
2443 mddev->bitmap_file = NULL;
2444 } else
2445 mddev->bitmap_offset = 0; /* file overrides offset */
2446 return err;
2447}
2448
2259/* 2449/*
2260 * set_array_info is used two different ways 2450 * set_array_info is used two different ways
2261 * The original usage is when creating a new array. 2451 * The original usage is when creating a new array.
@@ -2567,8 +2757,10 @@ static int md_ioctl(struct inode *inode, struct file *file,
2567 /* 2757 /*
2568 * Commands querying/configuring an existing array: 2758 * Commands querying/configuring an existing array:
2569 */ 2759 */
2570 /* if we are initialised yet, only ADD_NEW_DISK or STOP_ARRAY is allowed */ 2760 /* if we are not initialised yet, only ADD_NEW_DISK, STOP_ARRAY,
2571 if (!mddev->raid_disks && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY && cmd != RUN_ARRAY) { 2761 * RUN_ARRAY, and SET_BITMAP_FILE are allowed */
2762 if (!mddev->raid_disks && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
2763 && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE) {
2572 err = -ENODEV; 2764 err = -ENODEV;
2573 goto abort_unlock; 2765 goto abort_unlock;
2574 } 2766 }
@@ -2582,6 +2774,10 @@ static int md_ioctl(struct inode *inode, struct file *file,
2582 err = get_array_info(mddev, argp); 2774 err = get_array_info(mddev, argp);
2583 goto done_unlock; 2775 goto done_unlock;
2584 2776
2777 case GET_BITMAP_FILE:
2778 err = get_bitmap_file(mddev, (void *)arg);
2779 goto done_unlock;
2780
2585 case GET_DISK_INFO: 2781 case GET_DISK_INFO:
2586 err = get_disk_info(mddev, argp); 2782 err = get_disk_info(mddev, argp);
2587 goto done_unlock; 2783 goto done_unlock;
@@ -2662,6 +2858,10 @@ static int md_ioctl(struct inode *inode, struct file *file,
2662 err = do_md_run (mddev); 2858 err = do_md_run (mddev);
2663 goto done_unlock; 2859 goto done_unlock;
2664 2860
2861 case SET_BITMAP_FILE:
2862 err = set_bitmap_file(mddev, (int)arg);
2863 goto done_unlock;
2864
2665 default: 2865 default:
2666 if (_IOC_TYPE(cmd) == MD_MAJOR) 2866 if (_IOC_TYPE(cmd) == MD_MAJOR)
2667 printk(KERN_WARNING "md: %s(pid %d) used" 2867 printk(KERN_WARNING "md: %s(pid %d) used"
@@ -2773,8 +2973,9 @@ static int md_thread(void * arg)
2773 while (thread->run) { 2973 while (thread->run) {
2774 void (*run)(mddev_t *); 2974 void (*run)(mddev_t *);
2775 2975
2776 wait_event_interruptible(thread->wqueue, 2976 wait_event_interruptible_timeout(thread->wqueue,
2777 test_bit(THREAD_WAKEUP, &thread->flags)); 2977 test_bit(THREAD_WAKEUP, &thread->flags),
2978 thread->timeout);
2778 if (current->flags & PF_FREEZE) 2979 if (current->flags & PF_FREEZE)
2779 refrigerator(PF_FREEZE); 2980 refrigerator(PF_FREEZE);
2780 2981
@@ -2820,6 +3021,7 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev,
2820 thread->run = run; 3021 thread->run = run;
2821 thread->mddev = mddev; 3022 thread->mddev = mddev;
2822 thread->name = name; 3023 thread->name = name;
3024 thread->timeout = MAX_SCHEDULE_TIMEOUT;
2823 ret = kernel_thread(md_thread, thread, 0); 3025 ret = kernel_thread(md_thread, thread, 0);
2824 if (ret < 0) { 3026 if (ret < 0) {
2825 kfree(thread); 3027 kfree(thread);
@@ -2858,13 +3060,13 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev)
2858 3060
2859 if (!rdev || rdev->faulty) 3061 if (!rdev || rdev->faulty)
2860 return; 3062 return;
2861 3063/*
2862 dprintk("md_error dev:%s, rdev:(%d:%d), (caller: %p,%p,%p,%p).\n", 3064 dprintk("md_error dev:%s, rdev:(%d:%d), (caller: %p,%p,%p,%p).\n",
2863 mdname(mddev), 3065 mdname(mddev),
2864 MAJOR(rdev->bdev->bd_dev), MINOR(rdev->bdev->bd_dev), 3066 MAJOR(rdev->bdev->bd_dev), MINOR(rdev->bdev->bd_dev),
2865 __builtin_return_address(0),__builtin_return_address(1), 3067 __builtin_return_address(0),__builtin_return_address(1),
2866 __builtin_return_address(2),__builtin_return_address(3)); 3068 __builtin_return_address(2),__builtin_return_address(3));
2867 3069*/
2868 if (!mddev->pers->error_handler) 3070 if (!mddev->pers->error_handler)
2869 return; 3071 return;
2870 mddev->pers->error_handler(mddev,rdev); 3072 mddev->pers->error_handler(mddev,rdev);
@@ -3018,6 +3220,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
3018 struct list_head *tmp2; 3220 struct list_head *tmp2;
3019 mdk_rdev_t *rdev; 3221 mdk_rdev_t *rdev;
3020 int i; 3222 int i;
3223 struct bitmap *bitmap;
3021 3224
3022 if (v == (void*)1) { 3225 if (v == (void*)1) {
3023 seq_printf(seq, "Personalities : "); 3226 seq_printf(seq, "Personalities : ");
@@ -3070,10 +3273,35 @@ static int md_seq_show(struct seq_file *seq, void *v)
3070 if (mddev->pers) { 3273 if (mddev->pers) {
3071 mddev->pers->status (seq, mddev); 3274 mddev->pers->status (seq, mddev);
3072 seq_printf(seq, "\n "); 3275 seq_printf(seq, "\n ");
3073 if (mddev->curr_resync > 2) 3276 if (mddev->curr_resync > 2) {
3074 status_resync (seq, mddev); 3277 status_resync (seq, mddev);
3075 else if (mddev->curr_resync == 1 || mddev->curr_resync == 2) 3278 seq_printf(seq, "\n ");
3076 seq_printf(seq, " resync=DELAYED"); 3279 } else if (mddev->curr_resync == 1 || mddev->curr_resync == 2)
3280 seq_printf(seq, " resync=DELAYED\n ");
3281 } else
3282 seq_printf(seq, "\n ");
3283
3284 if ((bitmap = mddev->bitmap)) {
3285 unsigned long chunk_kb;
3286 unsigned long flags;
3287 spin_lock_irqsave(&bitmap->lock, flags);
3288 chunk_kb = bitmap->chunksize >> 10;
3289 seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], "
3290 "%lu%s chunk",
3291 bitmap->pages - bitmap->missing_pages,
3292 bitmap->pages,
3293 (bitmap->pages - bitmap->missing_pages)
3294 << (PAGE_SHIFT - 10),
3295 chunk_kb ? chunk_kb : bitmap->chunksize,
3296 chunk_kb ? "KB" : "B");
3297 if (bitmap->file) {
3298 seq_printf(seq, ", file: ");
3299 seq_path(seq, bitmap->file->f_vfsmnt,
3300 bitmap->file->f_dentry," \t\n");
3301 }
3302
3303 seq_printf(seq, "\n");
3304 spin_unlock_irqrestore(&bitmap->lock, flags);
3077 } 3305 }
3078 3306
3079 seq_printf(seq, "\n"); 3307 seq_printf(seq, "\n");
@@ -3176,19 +3404,28 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok)
3176} 3404}
3177 3405
3178 3406
3179void md_write_start(mddev_t *mddev) 3407/* md_write_start(mddev, bi)
3408 * If we need to update some array metadata (e.g. 'active' flag
3409 * in superblock) before writing, schedule a superblock update
3410 * and wait for it to complete.
3411 */
3412void md_write_start(mddev_t *mddev, struct bio *bi)
3180{ 3413{
3181 if (!atomic_read(&mddev->writes_pending)) { 3414 DEFINE_WAIT(w);
3182 mddev_lock_uninterruptible(mddev); 3415 if (bio_data_dir(bi) != WRITE)
3416 return;
3417
3418 atomic_inc(&mddev->writes_pending);
3419 if (mddev->in_sync) {
3420 spin_lock(&mddev->write_lock);
3183 if (mddev->in_sync) { 3421 if (mddev->in_sync) {
3184 mddev->in_sync = 0; 3422 mddev->in_sync = 0;
3185 del_timer(&mddev->safemode_timer); 3423 mddev->sb_dirty = 1;
3186 md_update_sb(mddev); 3424 md_wakeup_thread(mddev->thread);
3187 } 3425 }
3188 atomic_inc(&mddev->writes_pending); 3426 spin_unlock(&mddev->write_lock);
3189 mddev_unlock(mddev); 3427 }
3190 } else 3428 wait_event(mddev->sb_wait, mddev->sb_dirty==0);
3191 atomic_inc(&mddev->writes_pending);
3192} 3429}
3193 3430
3194void md_write_end(mddev_t *mddev) 3431void md_write_end(mddev_t *mddev)
@@ -3201,37 +3438,6 @@ void md_write_end(mddev_t *mddev)
3201 } 3438 }
3202} 3439}
3203 3440
3204static inline void md_enter_safemode(mddev_t *mddev)
3205{
3206 if (!mddev->safemode) return;
3207 if (mddev->safemode == 2 &&
3208 (atomic_read(&mddev->writes_pending) || mddev->in_sync ||
3209 mddev->recovery_cp != MaxSector))
3210 return; /* avoid the lock */
3211 mddev_lock_uninterruptible(mddev);
3212 if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
3213 !mddev->in_sync && mddev->recovery_cp == MaxSector) {
3214 mddev->in_sync = 1;
3215 md_update_sb(mddev);
3216 }
3217 mddev_unlock(mddev);
3218
3219 if (mddev->safemode == 1)
3220 mddev->safemode = 0;
3221}
3222
3223void md_handle_safemode(mddev_t *mddev)
3224{
3225 if (signal_pending(current)) {
3226 printk(KERN_INFO "md: %s in immediate safe mode\n",
3227 mdname(mddev));
3228 mddev->safemode = 2;
3229 flush_signals(current);
3230 }
3231 md_enter_safemode(mddev);
3232}
3233
3234
3235static DECLARE_WAIT_QUEUE_HEAD(resync_wait); 3441static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
3236 3442
3237#define SYNC_MARKS 10 3443#define SYNC_MARKS 10
@@ -3241,12 +3447,13 @@ static void md_do_sync(mddev_t *mddev)
3241 mddev_t *mddev2; 3447 mddev_t *mddev2;
3242 unsigned int currspeed = 0, 3448 unsigned int currspeed = 0,
3243 window; 3449 window;
3244 sector_t max_sectors,j; 3450 sector_t max_sectors,j, io_sectors;
3245 unsigned long mark[SYNC_MARKS]; 3451 unsigned long mark[SYNC_MARKS];
3246 sector_t mark_cnt[SYNC_MARKS]; 3452 sector_t mark_cnt[SYNC_MARKS];
3247 int last_mark,m; 3453 int last_mark,m;
3248 struct list_head *tmp; 3454 struct list_head *tmp;
3249 sector_t last_check; 3455 sector_t last_check;
3456 int skipped = 0;
3250 3457
3251 /* just incase thread restarts... */ 3458 /* just incase thread restarts... */
3252 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery)) 3459 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
@@ -3312,7 +3519,7 @@ static void md_do_sync(mddev_t *mddev)
3312 3519
3313 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) 3520 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
3314 /* resync follows the size requested by the personality, 3521 /* resync follows the size requested by the personality,
3315 * which default to physical size, but can be virtual size 3522 * which defaults to physical size, but can be virtual size
3316 */ 3523 */
3317 max_sectors = mddev->resync_max_sectors; 3524 max_sectors = mddev->resync_max_sectors;
3318 else 3525 else
@@ -3327,13 +3534,15 @@ static void md_do_sync(mddev_t *mddev)
3327 sysctl_speed_limit_max); 3534 sysctl_speed_limit_max);
3328 3535
3329 is_mddev_idle(mddev); /* this also initializes IO event counters */ 3536 is_mddev_idle(mddev); /* this also initializes IO event counters */
3330 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) 3537 /* we don't use the checkpoint if there's a bitmap */
3538 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && !mddev->bitmap)
3331 j = mddev->recovery_cp; 3539 j = mddev->recovery_cp;
3332 else 3540 else
3333 j = 0; 3541 j = 0;
3542 io_sectors = 0;
3334 for (m = 0; m < SYNC_MARKS; m++) { 3543 for (m = 0; m < SYNC_MARKS; m++) {
3335 mark[m] = jiffies; 3544 mark[m] = jiffies;
3336 mark_cnt[m] = j; 3545 mark_cnt[m] = io_sectors;
3337 } 3546 }
3338 last_mark = 0; 3547 last_mark = 0;
3339 mddev->resync_mark = mark[last_mark]; 3548 mddev->resync_mark = mark[last_mark];
@@ -3358,21 +3567,29 @@ static void md_do_sync(mddev_t *mddev)
3358 } 3567 }
3359 3568
3360 while (j < max_sectors) { 3569 while (j < max_sectors) {
3361 int sectors; 3570 sector_t sectors;
3362 3571
3363 sectors = mddev->pers->sync_request(mddev, j, currspeed < sysctl_speed_limit_min); 3572 skipped = 0;
3364 if (sectors < 0) { 3573 sectors = mddev->pers->sync_request(mddev, j, &skipped,
3574 currspeed < sysctl_speed_limit_min);
3575 if (sectors == 0) {
3365 set_bit(MD_RECOVERY_ERR, &mddev->recovery); 3576 set_bit(MD_RECOVERY_ERR, &mddev->recovery);
3366 goto out; 3577 goto out;
3367 } 3578 }
3368 atomic_add(sectors, &mddev->recovery_active); 3579
3580 if (!skipped) { /* actual IO requested */
3581 io_sectors += sectors;
3582 atomic_add(sectors, &mddev->recovery_active);
3583 }
3584
3369 j += sectors; 3585 j += sectors;
3370 if (j>1) mddev->curr_resync = j; 3586 if (j>1) mddev->curr_resync = j;
3371 3587
3372 if (last_check + window > j || j == max_sectors) 3588
3589 if (last_check + window > io_sectors || j == max_sectors)
3373 continue; 3590 continue;
3374 3591
3375 last_check = j; 3592 last_check = io_sectors;
3376 3593
3377 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery) || 3594 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery) ||
3378 test_bit(MD_RECOVERY_ERR, &mddev->recovery)) 3595 test_bit(MD_RECOVERY_ERR, &mddev->recovery))
@@ -3386,7 +3603,7 @@ static void md_do_sync(mddev_t *mddev)
3386 mddev->resync_mark = mark[next]; 3603 mddev->resync_mark = mark[next];
3387 mddev->resync_mark_cnt = mark_cnt[next]; 3604 mddev->resync_mark_cnt = mark_cnt[next];
3388 mark[next] = jiffies; 3605 mark[next] = jiffies;
3389 mark_cnt[next] = j - atomic_read(&mddev->recovery_active); 3606 mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
3390 last_mark = next; 3607 last_mark = next;
3391 } 3608 }
3392 3609
@@ -3413,7 +3630,8 @@ static void md_do_sync(mddev_t *mddev)
3413 mddev->queue->unplug_fn(mddev->queue); 3630 mddev->queue->unplug_fn(mddev->queue);
3414 cond_resched(); 3631 cond_resched();
3415 3632
3416 currspeed = ((unsigned long)(j-mddev->resync_mark_cnt))/2/((jiffies-mddev->resync_mark)/HZ +1) +1; 3633 currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
3634 /((jiffies-mddev->resync_mark)/HZ +1) +1;
3417 3635
3418 if (currspeed > sysctl_speed_limit_min) { 3636 if (currspeed > sysctl_speed_limit_min) {
3419 if ((currspeed > sysctl_speed_limit_max) || 3637 if ((currspeed > sysctl_speed_limit_max) ||
@@ -3433,7 +3651,7 @@ static void md_do_sync(mddev_t *mddev)
3433 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); 3651 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
3434 3652
3435 /* tell personality that we are finished */ 3653 /* tell personality that we are finished */
3436 mddev->pers->sync_request(mddev, max_sectors, 1); 3654 mddev->pers->sync_request(mddev, max_sectors, &skipped, 1);
3437 3655
3438 if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && 3656 if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) &&
3439 mddev->curr_resync > 2 && 3657 mddev->curr_resync > 2 &&
@@ -3447,7 +3665,6 @@ static void md_do_sync(mddev_t *mddev)
3447 mddev->recovery_cp = MaxSector; 3665 mddev->recovery_cp = MaxSector;
3448 } 3666 }
3449 3667
3450 md_enter_safemode(mddev);
3451 skip: 3668 skip:
3452 mddev->curr_resync = 0; 3669 mddev->curr_resync = 0;
3453 wake_up(&resync_wait); 3670 wake_up(&resync_wait);
@@ -3484,20 +3701,48 @@ void md_check_recovery(mddev_t *mddev)
3484 struct list_head *rtmp; 3701 struct list_head *rtmp;
3485 3702
3486 3703
3487 dprintk(KERN_INFO "md: recovery thread got woken up ...\n"); 3704 if (mddev->bitmap)
3705 bitmap_daemon_work(mddev->bitmap);
3488 3706
3489 if (mddev->ro) 3707 if (mddev->ro)
3490 return; 3708 return;
3709
3710 if (signal_pending(current)) {
3711 if (mddev->pers->sync_request) {
3712 printk(KERN_INFO "md: %s in immediate safe mode\n",
3713 mdname(mddev));
3714 mddev->safemode = 2;
3715 }
3716 flush_signals(current);
3717 }
3718
3491 if ( ! ( 3719 if ( ! (
3492 mddev->sb_dirty || 3720 mddev->sb_dirty ||
3493 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || 3721 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
3494 test_bit(MD_RECOVERY_DONE, &mddev->recovery) 3722 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
3723 (mddev->safemode == 1) ||
3724 (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
3725 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
3495 )) 3726 ))
3496 return; 3727 return;
3728
3497 if (mddev_trylock(mddev)==0) { 3729 if (mddev_trylock(mddev)==0) {
3498 int spares =0; 3730 int spares =0;
3731
3732 spin_lock(&mddev->write_lock);
3733 if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
3734 !mddev->in_sync && mddev->recovery_cp == MaxSector) {
3735 mddev->in_sync = 1;
3736 mddev->sb_dirty = 1;
3737 }
3738 if (mddev->safemode == 1)
3739 mddev->safemode = 0;
3740 spin_unlock(&mddev->write_lock);
3741
3499 if (mddev->sb_dirty) 3742 if (mddev->sb_dirty)
3500 md_update_sb(mddev); 3743 md_update_sb(mddev);
3744
3745
3501 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && 3746 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
3502 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) { 3747 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
3503 /* resync/recovery still happening */ 3748 /* resync/recovery still happening */
@@ -3515,6 +3760,14 @@ void md_check_recovery(mddev_t *mddev)
3515 mddev->pers->spare_active(mddev); 3760 mddev->pers->spare_active(mddev);
3516 } 3761 }
3517 md_update_sb(mddev); 3762 md_update_sb(mddev);
3763
3764 /* if array is no-longer degraded, then any saved_raid_disk
3765 * information must be scrapped
3766 */
3767 if (!mddev->degraded)
3768 ITERATE_RDEV(mddev,rdev,rtmp)
3769 rdev->saved_raid_disk = -1;
3770
3518 mddev->recovery = 0; 3771 mddev->recovery = 0;
3519 /* flag recovery needed just to double check */ 3772 /* flag recovery needed just to double check */
3520 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 3773 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
@@ -3557,6 +3810,13 @@ void md_check_recovery(mddev_t *mddev)
3557 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); 3810 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
3558 if (!spares) 3811 if (!spares)
3559 set_bit(MD_RECOVERY_SYNC, &mddev->recovery); 3812 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
3813 if (spares && mddev->bitmap && ! mddev->bitmap->file) {
3814 /* We are adding a device or devices to an array
3815 * which has the bitmap stored on all devices.
3816 * So make sure all bitmap pages get written
3817 */
3818 bitmap_write_all(mddev->bitmap);
3819 }
3560 mddev->sync_thread = md_register_thread(md_do_sync, 3820 mddev->sync_thread = md_register_thread(md_do_sync,
3561 mddev, 3821 mddev,
3562 "%s_resync"); 3822 "%s_resync");
@@ -3624,6 +3884,8 @@ static int __init md_init(void)
3624 " MD_SB_DISKS=%d\n", 3884 " MD_SB_DISKS=%d\n",
3625 MD_MAJOR_VERSION, MD_MINOR_VERSION, 3885 MD_MAJOR_VERSION, MD_MINOR_VERSION,
3626 MD_PATCHLEVEL_VERSION, MAX_MD_DEVS, MD_SB_DISKS); 3886 MD_PATCHLEVEL_VERSION, MAX_MD_DEVS, MD_SB_DISKS);
3887 printk(KERN_INFO "md: bitmap version %d.%d\n", BITMAP_MAJOR,
3888 BITMAP_MINOR);
3627 3889
3628 if (register_blkdev(MAJOR_NR, "md")) 3890 if (register_blkdev(MAJOR_NR, "md"))
3629 return -1; 3891 return -1;
@@ -3739,7 +4001,6 @@ EXPORT_SYMBOL(md_error);
3739EXPORT_SYMBOL(md_done_sync); 4001EXPORT_SYMBOL(md_done_sync);
3740EXPORT_SYMBOL(md_write_start); 4002EXPORT_SYMBOL(md_write_start);
3741EXPORT_SYMBOL(md_write_end); 4003EXPORT_SYMBOL(md_write_end);
3742EXPORT_SYMBOL(md_handle_safemode);
3743EXPORT_SYMBOL(md_register_thread); 4004EXPORT_SYMBOL(md_register_thread);
3744EXPORT_SYMBOL(md_unregister_thread); 4005EXPORT_SYMBOL(md_unregister_thread);
3745EXPORT_SYMBOL(md_wakeup_thread); 4006EXPORT_SYMBOL(md_wakeup_thread);