diff options
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 525 |
1 files changed, 393 insertions, 132 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index d899204d3743..0c6b5b6baff6 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -19,6 +19,9 @@ | |||
19 | 19 | ||
20 | Neil Brown <neilb@cse.unsw.edu.au>. | 20 | Neil Brown <neilb@cse.unsw.edu.au>. |
21 | 21 | ||
22 | - persistent bitmap code | ||
23 | Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc. | ||
24 | |||
22 | This program is free software; you can redistribute it and/or modify | 25 | This program is free software; you can redistribute it and/or modify |
23 | it under the terms of the GNU General Public License as published by | 26 | it under the terms of the GNU General Public License as published by |
24 | the Free Software Foundation; either version 2, or (at your option) | 27 | the Free Software Foundation; either version 2, or (at your option) |
@@ -33,6 +36,7 @@ | |||
33 | #include <linux/config.h> | 36 | #include <linux/config.h> |
34 | #include <linux/linkage.h> | 37 | #include <linux/linkage.h> |
35 | #include <linux/raid/md.h> | 38 | #include <linux/raid/md.h> |
39 | #include <linux/raid/bitmap.h> | ||
36 | #include <linux/sysctl.h> | 40 | #include <linux/sysctl.h> |
37 | #include <linux/devfs_fs_kernel.h> | 41 | #include <linux/devfs_fs_kernel.h> |
38 | #include <linux/buffer_head.h> /* for invalidate_bdev */ | 42 | #include <linux/buffer_head.h> /* for invalidate_bdev */ |
@@ -40,6 +44,8 @@ | |||
40 | 44 | ||
41 | #include <linux/init.h> | 45 | #include <linux/init.h> |
42 | 46 | ||
47 | #include <linux/file.h> | ||
48 | |||
43 | #ifdef CONFIG_KMOD | 49 | #ifdef CONFIG_KMOD |
44 | #include <linux/kmod.h> | 50 | #include <linux/kmod.h> |
45 | #endif | 51 | #endif |
@@ -189,8 +195,7 @@ static mddev_t * mddev_find(dev_t unit) | |||
189 | if (mddev->unit == unit) { | 195 | if (mddev->unit == unit) { |
190 | mddev_get(mddev); | 196 | mddev_get(mddev); |
191 | spin_unlock(&all_mddevs_lock); | 197 | spin_unlock(&all_mddevs_lock); |
192 | if (new) | 198 | kfree(new); |
193 | kfree(new); | ||
194 | return mddev; | 199 | return mddev; |
195 | } | 200 | } |
196 | 201 | ||
@@ -218,6 +223,8 @@ static mddev_t * mddev_find(dev_t unit) | |||
218 | INIT_LIST_HEAD(&new->all_mddevs); | 223 | INIT_LIST_HEAD(&new->all_mddevs); |
219 | init_timer(&new->safemode_timer); | 224 | init_timer(&new->safemode_timer); |
220 | atomic_set(&new->active, 1); | 225 | atomic_set(&new->active, 1); |
226 | spin_lock_init(&new->write_lock); | ||
227 | init_waitqueue_head(&new->sb_wait); | ||
221 | 228 | ||
222 | new->queue = blk_alloc_queue(GFP_KERNEL); | 229 | new->queue = blk_alloc_queue(GFP_KERNEL); |
223 | if (!new->queue) { | 230 | if (!new->queue) { |
@@ -320,6 +327,40 @@ static void free_disk_sb(mdk_rdev_t * rdev) | |||
320 | } | 327 | } |
321 | 328 | ||
322 | 329 | ||
330 | static int super_written(struct bio *bio, unsigned int bytes_done, int error) | ||
331 | { | ||
332 | mdk_rdev_t *rdev = bio->bi_private; | ||
333 | if (bio->bi_size) | ||
334 | return 1; | ||
335 | |||
336 | if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) | ||
337 | md_error(rdev->mddev, rdev); | ||
338 | |||
339 | if (atomic_dec_and_test(&rdev->mddev->pending_writes)) | ||
340 | wake_up(&rdev->mddev->sb_wait); | ||
341 | return 0; | ||
342 | } | ||
343 | |||
344 | void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, | ||
345 | sector_t sector, int size, struct page *page) | ||
346 | { | ||
347 | /* write first size bytes of page to sector of rdev | ||
348 | * Increment mddev->pending_writes before returning | ||
349 | * and decrement it on completion, waking up sb_wait | ||
350 | * if zero is reached. | ||
351 | * If an error occurred, call md_error | ||
352 | */ | ||
353 | struct bio *bio = bio_alloc(GFP_NOIO, 1); | ||
354 | |||
355 | bio->bi_bdev = rdev->bdev; | ||
356 | bio->bi_sector = sector; | ||
357 | bio_add_page(bio, page, size, 0); | ||
358 | bio->bi_private = rdev; | ||
359 | bio->bi_end_io = super_written; | ||
360 | atomic_inc(&mddev->pending_writes); | ||
361 | submit_bio((1<<BIO_RW)|(1<<BIO_RW_SYNC), bio); | ||
362 | } | ||
363 | |||
323 | static int bi_complete(struct bio *bio, unsigned int bytes_done, int error) | 364 | static int bi_complete(struct bio *bio, unsigned int bytes_done, int error) |
324 | { | 365 | { |
325 | if (bio->bi_size) | 366 | if (bio->bi_size) |
@@ -329,7 +370,7 @@ static int bi_complete(struct bio *bio, unsigned int bytes_done, int error) | |||
329 | return 0; | 370 | return 0; |
330 | } | 371 | } |
331 | 372 | ||
332 | static int sync_page_io(struct block_device *bdev, sector_t sector, int size, | 373 | int sync_page_io(struct block_device *bdev, sector_t sector, int size, |
333 | struct page *page, int rw) | 374 | struct page *page, int rw) |
334 | { | 375 | { |
335 | struct bio *bio = bio_alloc(GFP_NOIO, 1); | 376 | struct bio *bio = bio_alloc(GFP_NOIO, 1); |
@@ -416,11 +457,8 @@ static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2) | |||
416 | ret = 1; | 457 | ret = 1; |
417 | 458 | ||
418 | abort: | 459 | abort: |
419 | if (tmp1) | 460 | kfree(tmp1); |
420 | kfree(tmp1); | 461 | kfree(tmp2); |
421 | if (tmp2) | ||
422 | kfree(tmp2); | ||
423 | |||
424 | return ret; | 462 | return ret; |
425 | } | 463 | } |
426 | 464 | ||
@@ -569,6 +607,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
569 | mdp_disk_t *desc; | 607 | mdp_disk_t *desc; |
570 | mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page); | 608 | mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page); |
571 | 609 | ||
610 | rdev->raid_disk = -1; | ||
611 | rdev->in_sync = 0; | ||
572 | if (mddev->raid_disks == 0) { | 612 | if (mddev->raid_disks == 0) { |
573 | mddev->major_version = 0; | 613 | mddev->major_version = 0; |
574 | mddev->minor_version = sb->minor_version; | 614 | mddev->minor_version = sb->minor_version; |
@@ -599,16 +639,35 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
599 | memcpy(mddev->uuid+12,&sb->set_uuid3, 4); | 639 | memcpy(mddev->uuid+12,&sb->set_uuid3, 4); |
600 | 640 | ||
601 | mddev->max_disks = MD_SB_DISKS; | 641 | mddev->max_disks = MD_SB_DISKS; |
602 | } else { | 642 | |
603 | __u64 ev1; | 643 | if (sb->state & (1<<MD_SB_BITMAP_PRESENT) && |
604 | ev1 = md_event(sb); | 644 | mddev->bitmap_file == NULL) { |
645 | if (mddev->level != 1) { | ||
646 | /* FIXME use a better test */ | ||
647 | printk(KERN_WARNING "md: bitmaps only support for raid1\n"); | ||
648 | return -EINVAL; | ||
649 | } | ||
650 | mddev->bitmap_offset = (MD_SB_BYTES >> 9); | ||
651 | } | ||
652 | |||
653 | } else if (mddev->pers == NULL) { | ||
654 | /* Insist on good event counter while assembling */ | ||
655 | __u64 ev1 = md_event(sb); | ||
605 | ++ev1; | 656 | ++ev1; |
606 | if (ev1 < mddev->events) | 657 | if (ev1 < mddev->events) |
607 | return -EINVAL; | 658 | return -EINVAL; |
608 | } | 659 | } else if (mddev->bitmap) { |
660 | /* if adding to array with a bitmap, then we can accept an | ||
661 | * older device ... but not too old. | ||
662 | */ | ||
663 | __u64 ev1 = md_event(sb); | ||
664 | if (ev1 < mddev->bitmap->events_cleared) | ||
665 | return 0; | ||
666 | } else /* just a hot-add of a new device, leave raid_disk at -1 */ | ||
667 | return 0; | ||
668 | |||
609 | if (mddev->level != LEVEL_MULTIPATH) { | 669 | if (mddev->level != LEVEL_MULTIPATH) { |
610 | rdev->raid_disk = -1; | 670 | rdev->faulty = 0; |
611 | rdev->in_sync = rdev->faulty = 0; | ||
612 | desc = sb->disks + rdev->desc_nr; | 671 | desc = sb->disks + rdev->desc_nr; |
613 | 672 | ||
614 | if (desc->state & (1<<MD_DISK_FAULTY)) | 673 | if (desc->state & (1<<MD_DISK_FAULTY)) |
@@ -618,7 +677,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
618 | rdev->in_sync = 1; | 677 | rdev->in_sync = 1; |
619 | rdev->raid_disk = desc->raid_disk; | 678 | rdev->raid_disk = desc->raid_disk; |
620 | } | 679 | } |
621 | } | 680 | } else /* MULTIPATH are always insync */ |
681 | rdev->in_sync = 1; | ||
622 | return 0; | 682 | return 0; |
623 | } | 683 | } |
624 | 684 | ||
@@ -683,6 +743,9 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
683 | sb->layout = mddev->layout; | 743 | sb->layout = mddev->layout; |
684 | sb->chunk_size = mddev->chunk_size; | 744 | sb->chunk_size = mddev->chunk_size; |
685 | 745 | ||
746 | if (mddev->bitmap && mddev->bitmap_file == NULL) | ||
747 | sb->state |= (1<<MD_SB_BITMAP_PRESENT); | ||
748 | |||
686 | sb->disks[0].state = (1<<MD_DISK_REMOVED); | 749 | sb->disks[0].state = (1<<MD_DISK_REMOVED); |
687 | ITERATE_RDEV(mddev,rdev2,tmp) { | 750 | ITERATE_RDEV(mddev,rdev2,tmp) { |
688 | mdp_disk_t *d; | 751 | mdp_disk_t *d; |
@@ -780,7 +843,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) | |||
780 | case 0: | 843 | case 0: |
781 | sb_offset = rdev->bdev->bd_inode->i_size >> 9; | 844 | sb_offset = rdev->bdev->bd_inode->i_size >> 9; |
782 | sb_offset -= 8*2; | 845 | sb_offset -= 8*2; |
783 | sb_offset &= ~(4*2-1); | 846 | sb_offset &= ~(sector_t)(4*2-1); |
784 | /* convert from sectors to K */ | 847 | /* convert from sectors to K */ |
785 | sb_offset /= 2; | 848 | sb_offset /= 2; |
786 | break; | 849 | break; |
@@ -860,6 +923,8 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
860 | { | 923 | { |
861 | struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page); | 924 | struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page); |
862 | 925 | ||
926 | rdev->raid_disk = -1; | ||
927 | rdev->in_sync = 0; | ||
863 | if (mddev->raid_disks == 0) { | 928 | if (mddev->raid_disks == 0) { |
864 | mddev->major_version = 1; | 929 | mddev->major_version = 1; |
865 | mddev->patch_version = 0; | 930 | mddev->patch_version = 0; |
@@ -877,13 +942,30 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
877 | memcpy(mddev->uuid, sb->set_uuid, 16); | 942 | memcpy(mddev->uuid, sb->set_uuid, 16); |
878 | 943 | ||
879 | mddev->max_disks = (4096-256)/2; | 944 | mddev->max_disks = (4096-256)/2; |
880 | } else { | 945 | |
881 | __u64 ev1; | 946 | if ((le32_to_cpu(sb->feature_map) & 1) && |
882 | ev1 = le64_to_cpu(sb->events); | 947 | mddev->bitmap_file == NULL ) { |
948 | if (mddev->level != 1) { | ||
949 | printk(KERN_WARNING "md: bitmaps only supported for raid1\n"); | ||
950 | return -EINVAL; | ||
951 | } | ||
952 | mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset); | ||
953 | } | ||
954 | } else if (mddev->pers == NULL) { | ||
955 | /* Insist of good event counter while assembling */ | ||
956 | __u64 ev1 = le64_to_cpu(sb->events); | ||
883 | ++ev1; | 957 | ++ev1; |
884 | if (ev1 < mddev->events) | 958 | if (ev1 < mddev->events) |
885 | return -EINVAL; | 959 | return -EINVAL; |
886 | } | 960 | } else if (mddev->bitmap) { |
961 | /* If adding to array with a bitmap, then we can accept an | ||
962 | * older device, but not too old. | ||
963 | */ | ||
964 | __u64 ev1 = le64_to_cpu(sb->events); | ||
965 | if (ev1 < mddev->bitmap->events_cleared) | ||
966 | return 0; | ||
967 | } else /* just a hot-add of a new device, leave raid_disk at -1 */ | ||
968 | return 0; | ||
887 | 969 | ||
888 | if (mddev->level != LEVEL_MULTIPATH) { | 970 | if (mddev->level != LEVEL_MULTIPATH) { |
889 | int role; | 971 | int role; |
@@ -891,14 +973,10 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
891 | role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); | 973 | role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); |
892 | switch(role) { | 974 | switch(role) { |
893 | case 0xffff: /* spare */ | 975 | case 0xffff: /* spare */ |
894 | rdev->in_sync = 0; | ||
895 | rdev->faulty = 0; | 976 | rdev->faulty = 0; |
896 | rdev->raid_disk = -1; | ||
897 | break; | 977 | break; |
898 | case 0xfffe: /* faulty */ | 978 | case 0xfffe: /* faulty */ |
899 | rdev->in_sync = 0; | ||
900 | rdev->faulty = 1; | 979 | rdev->faulty = 1; |
901 | rdev->raid_disk = -1; | ||
902 | break; | 980 | break; |
903 | default: | 981 | default: |
904 | rdev->in_sync = 1; | 982 | rdev->in_sync = 1; |
@@ -906,7 +984,9 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
906 | rdev->raid_disk = role; | 984 | rdev->raid_disk = role; |
907 | break; | 985 | break; |
908 | } | 986 | } |
909 | } | 987 | } else /* MULTIPATH are always insync */ |
988 | rdev->in_sync = 1; | ||
989 | |||
910 | return 0; | 990 | return 0; |
911 | } | 991 | } |
912 | 992 | ||
@@ -933,6 +1013,11 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
933 | else | 1013 | else |
934 | sb->resync_offset = cpu_to_le64(0); | 1014 | sb->resync_offset = cpu_to_le64(0); |
935 | 1015 | ||
1016 | if (mddev->bitmap && mddev->bitmap_file == NULL) { | ||
1017 | sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); | ||
1018 | sb->feature_map = cpu_to_le32(1); | ||
1019 | } | ||
1020 | |||
936 | max_dev = 0; | 1021 | max_dev = 0; |
937 | ITERATE_RDEV(mddev,rdev2,tmp) | 1022 | ITERATE_RDEV(mddev,rdev2,tmp) |
938 | if (rdev2->desc_nr+1 > max_dev) | 1023 | if (rdev2->desc_nr+1 > max_dev) |
@@ -1196,8 +1281,11 @@ void md_print_devices(void) | |||
1196 | printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n"); | 1281 | printk("md: * <COMPLETE RAID STATE PRINTOUT> *\n"); |
1197 | printk("md: **********************************\n"); | 1282 | printk("md: **********************************\n"); |
1198 | ITERATE_MDDEV(mddev,tmp) { | 1283 | ITERATE_MDDEV(mddev,tmp) { |
1199 | printk("%s: ", mdname(mddev)); | ||
1200 | 1284 | ||
1285 | if (mddev->bitmap) | ||
1286 | bitmap_print_sb(mddev->bitmap); | ||
1287 | else | ||
1288 | printk("%s: ", mdname(mddev)); | ||
1201 | ITERATE_RDEV(mddev,rdev,tmp2) | 1289 | ITERATE_RDEV(mddev,rdev,tmp2) |
1202 | printk("<%s>", bdevname(rdev->bdev,b)); | 1290 | printk("<%s>", bdevname(rdev->bdev,b)); |
1203 | printk("\n"); | 1291 | printk("\n"); |
@@ -1210,30 +1298,6 @@ void md_print_devices(void) | |||
1210 | } | 1298 | } |
1211 | 1299 | ||
1212 | 1300 | ||
1213 | static int write_disk_sb(mdk_rdev_t * rdev) | ||
1214 | { | ||
1215 | char b[BDEVNAME_SIZE]; | ||
1216 | if (!rdev->sb_loaded) { | ||
1217 | MD_BUG(); | ||
1218 | return 1; | ||
1219 | } | ||
1220 | if (rdev->faulty) { | ||
1221 | MD_BUG(); | ||
1222 | return 1; | ||
1223 | } | ||
1224 | |||
1225 | dprintk(KERN_INFO "(write) %s's sb offset: %llu\n", | ||
1226 | bdevname(rdev->bdev,b), | ||
1227 | (unsigned long long)rdev->sb_offset); | ||
1228 | |||
1229 | if (sync_page_io(rdev->bdev, rdev->sb_offset<<1, MD_SB_BYTES, rdev->sb_page, WRITE)) | ||
1230 | return 0; | ||
1231 | |||
1232 | printk("md: write_disk_sb failed for device %s\n", | ||
1233 | bdevname(rdev->bdev,b)); | ||
1234 | return 1; | ||
1235 | } | ||
1236 | |||
1237 | static void sync_sbs(mddev_t * mddev) | 1301 | static void sync_sbs(mddev_t * mddev) |
1238 | { | 1302 | { |
1239 | mdk_rdev_t *rdev; | 1303 | mdk_rdev_t *rdev; |
@@ -1248,12 +1312,14 @@ static void sync_sbs(mddev_t * mddev) | |||
1248 | 1312 | ||
1249 | static void md_update_sb(mddev_t * mddev) | 1313 | static void md_update_sb(mddev_t * mddev) |
1250 | { | 1314 | { |
1251 | int err, count = 100; | 1315 | int err; |
1252 | struct list_head *tmp; | 1316 | struct list_head *tmp; |
1253 | mdk_rdev_t *rdev; | 1317 | mdk_rdev_t *rdev; |
1318 | int sync_req; | ||
1254 | 1319 | ||
1255 | mddev->sb_dirty = 0; | ||
1256 | repeat: | 1320 | repeat: |
1321 | spin_lock(&mddev->write_lock); | ||
1322 | sync_req = mddev->in_sync; | ||
1257 | mddev->utime = get_seconds(); | 1323 | mddev->utime = get_seconds(); |
1258 | mddev->events ++; | 1324 | mddev->events ++; |
1259 | 1325 | ||
@@ -1266,20 +1332,26 @@ repeat: | |||
1266 | MD_BUG(); | 1332 | MD_BUG(); |
1267 | mddev->events --; | 1333 | mddev->events --; |
1268 | } | 1334 | } |
1335 | mddev->sb_dirty = 2; | ||
1269 | sync_sbs(mddev); | 1336 | sync_sbs(mddev); |
1270 | 1337 | ||
1271 | /* | 1338 | /* |
1272 | * do not write anything to disk if using | 1339 | * do not write anything to disk if using |
1273 | * nonpersistent superblocks | 1340 | * nonpersistent superblocks |
1274 | */ | 1341 | */ |
1275 | if (!mddev->persistent) | 1342 | if (!mddev->persistent) { |
1343 | mddev->sb_dirty = 0; | ||
1344 | spin_unlock(&mddev->write_lock); | ||
1345 | wake_up(&mddev->sb_wait); | ||
1276 | return; | 1346 | return; |
1347 | } | ||
1348 | spin_unlock(&mddev->write_lock); | ||
1277 | 1349 | ||
1278 | dprintk(KERN_INFO | 1350 | dprintk(KERN_INFO |
1279 | "md: updating %s RAID superblock on device (in sync %d)\n", | 1351 | "md: updating %s RAID superblock on device (in sync %d)\n", |
1280 | mdname(mddev),mddev->in_sync); | 1352 | mdname(mddev),mddev->in_sync); |
1281 | 1353 | ||
1282 | err = 0; | 1354 | err = bitmap_update_sb(mddev->bitmap); |
1283 | ITERATE_RDEV(mddev,rdev,tmp) { | 1355 | ITERATE_RDEV(mddev,rdev,tmp) { |
1284 | char b[BDEVNAME_SIZE]; | 1356 | char b[BDEVNAME_SIZE]; |
1285 | dprintk(KERN_INFO "md: "); | 1357 | dprintk(KERN_INFO "md: "); |
@@ -1288,22 +1360,32 @@ repeat: | |||
1288 | 1360 | ||
1289 | dprintk("%s ", bdevname(rdev->bdev,b)); | 1361 | dprintk("%s ", bdevname(rdev->bdev,b)); |
1290 | if (!rdev->faulty) { | 1362 | if (!rdev->faulty) { |
1291 | err += write_disk_sb(rdev); | 1363 | md_super_write(mddev,rdev, |
1364 | rdev->sb_offset<<1, MD_SB_BYTES, | ||
1365 | rdev->sb_page); | ||
1366 | dprintk(KERN_INFO "(write) %s's sb offset: %llu\n", | ||
1367 | bdevname(rdev->bdev,b), | ||
1368 | (unsigned long long)rdev->sb_offset); | ||
1369 | |||
1292 | } else | 1370 | } else |
1293 | dprintk(")\n"); | 1371 | dprintk(")\n"); |
1294 | if (!err && mddev->level == LEVEL_MULTIPATH) | 1372 | if (mddev->level == LEVEL_MULTIPATH) |
1295 | /* only need to write one superblock... */ | 1373 | /* only need to write one superblock... */ |
1296 | break; | 1374 | break; |
1297 | } | 1375 | } |
1298 | if (err) { | 1376 | wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0); |
1299 | if (--count) { | 1377 | /* if there was a failure, sb_dirty was set to 1, and we re-write super */ |
1300 | printk(KERN_ERR "md: errors occurred during superblock" | 1378 | |
1301 | " update, repeating\n"); | 1379 | spin_lock(&mddev->write_lock); |
1302 | goto repeat; | 1380 | if (mddev->in_sync != sync_req|| mddev->sb_dirty == 1) { |
1303 | } | 1381 | /* have to write it out again */ |
1304 | printk(KERN_ERR \ | 1382 | spin_unlock(&mddev->write_lock); |
1305 | "md: excessive errors occurred during superblock update, exiting\n"); | 1383 | goto repeat; |
1306 | } | 1384 | } |
1385 | mddev->sb_dirty = 0; | ||
1386 | spin_unlock(&mddev->write_lock); | ||
1387 | wake_up(&mddev->sb_wait); | ||
1388 | |||
1307 | } | 1389 | } |
1308 | 1390 | ||
1309 | /* | 1391 | /* |
@@ -1607,12 +1689,19 @@ static int do_md_run(mddev_t * mddev) | |||
1607 | 1689 | ||
1608 | mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ | 1690 | mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ |
1609 | 1691 | ||
1610 | err = mddev->pers->run(mddev); | 1692 | /* before we start the array running, initialise the bitmap */ |
1693 | err = bitmap_create(mddev); | ||
1694 | if (err) | ||
1695 | printk(KERN_ERR "%s: failed to create bitmap (%d)\n", | ||
1696 | mdname(mddev), err); | ||
1697 | else | ||
1698 | err = mddev->pers->run(mddev); | ||
1611 | if (err) { | 1699 | if (err) { |
1612 | printk(KERN_ERR "md: pers->run() failed ...\n"); | 1700 | printk(KERN_ERR "md: pers->run() failed ...\n"); |
1613 | module_put(mddev->pers->owner); | 1701 | module_put(mddev->pers->owner); |
1614 | mddev->pers = NULL; | 1702 | mddev->pers = NULL; |
1615 | return -EINVAL; | 1703 | bitmap_destroy(mddev); |
1704 | return err; | ||
1616 | } | 1705 | } |
1617 | atomic_set(&mddev->writes_pending,0); | 1706 | atomic_set(&mddev->writes_pending,0); |
1618 | mddev->safemode = 0; | 1707 | mddev->safemode = 0; |
@@ -1725,6 +1814,14 @@ static int do_md_stop(mddev_t * mddev, int ro) | |||
1725 | if (ro) | 1814 | if (ro) |
1726 | set_disk_ro(disk, 1); | 1815 | set_disk_ro(disk, 1); |
1727 | } | 1816 | } |
1817 | |||
1818 | bitmap_destroy(mddev); | ||
1819 | if (mddev->bitmap_file) { | ||
1820 | atomic_set(&mddev->bitmap_file->f_dentry->d_inode->i_writecount, 1); | ||
1821 | fput(mddev->bitmap_file); | ||
1822 | mddev->bitmap_file = NULL; | ||
1823 | } | ||
1824 | |||
1728 | /* | 1825 | /* |
1729 | * Free resources if final stop | 1826 | * Free resources if final stop |
1730 | */ | 1827 | */ |
@@ -1983,6 +2080,42 @@ static int get_array_info(mddev_t * mddev, void __user * arg) | |||
1983 | return 0; | 2080 | return 0; |
1984 | } | 2081 | } |
1985 | 2082 | ||
2083 | static int get_bitmap_file(mddev_t * mddev, void * arg) | ||
2084 | { | ||
2085 | mdu_bitmap_file_t *file = NULL; /* too big for stack allocation */ | ||
2086 | char *ptr, *buf = NULL; | ||
2087 | int err = -ENOMEM; | ||
2088 | |||
2089 | file = kmalloc(sizeof(*file), GFP_KERNEL); | ||
2090 | if (!file) | ||
2091 | goto out; | ||
2092 | |||
2093 | /* bitmap disabled, zero the first byte and copy out */ | ||
2094 | if (!mddev->bitmap || !mddev->bitmap->file) { | ||
2095 | file->pathname[0] = '\0'; | ||
2096 | goto copy_out; | ||
2097 | } | ||
2098 | |||
2099 | buf = kmalloc(sizeof(file->pathname), GFP_KERNEL); | ||
2100 | if (!buf) | ||
2101 | goto out; | ||
2102 | |||
2103 | ptr = file_path(mddev->bitmap->file, buf, sizeof(file->pathname)); | ||
2104 | if (!ptr) | ||
2105 | goto out; | ||
2106 | |||
2107 | strcpy(file->pathname, ptr); | ||
2108 | |||
2109 | copy_out: | ||
2110 | err = 0; | ||
2111 | if (copy_to_user(arg, file, sizeof(*file))) | ||
2112 | err = -EFAULT; | ||
2113 | out: | ||
2114 | kfree(buf); | ||
2115 | kfree(file); | ||
2116 | return err; | ||
2117 | } | ||
2118 | |||
1986 | static int get_disk_info(mddev_t * mddev, void __user * arg) | 2119 | static int get_disk_info(mddev_t * mddev, void __user * arg) |
1987 | { | 2120 | { |
1988 | mdu_disk_info_t info; | 2121 | mdu_disk_info_t info; |
@@ -2078,11 +2211,25 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) | |||
2078 | PTR_ERR(rdev)); | 2211 | PTR_ERR(rdev)); |
2079 | return PTR_ERR(rdev); | 2212 | return PTR_ERR(rdev); |
2080 | } | 2213 | } |
2214 | /* set save_raid_disk if appropriate */ | ||
2215 | if (!mddev->persistent) { | ||
2216 | if (info->state & (1<<MD_DISK_SYNC) && | ||
2217 | info->raid_disk < mddev->raid_disks) | ||
2218 | rdev->raid_disk = info->raid_disk; | ||
2219 | else | ||
2220 | rdev->raid_disk = -1; | ||
2221 | } else | ||
2222 | super_types[mddev->major_version]. | ||
2223 | validate_super(mddev, rdev); | ||
2224 | rdev->saved_raid_disk = rdev->raid_disk; | ||
2225 | |||
2081 | rdev->in_sync = 0; /* just to be sure */ | 2226 | rdev->in_sync = 0; /* just to be sure */ |
2082 | rdev->raid_disk = -1; | 2227 | rdev->raid_disk = -1; |
2083 | err = bind_rdev_to_array(rdev, mddev); | 2228 | err = bind_rdev_to_array(rdev, mddev); |
2084 | if (err) | 2229 | if (err) |
2085 | export_rdev(rdev); | 2230 | export_rdev(rdev); |
2231 | |||
2232 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | ||
2086 | if (mddev->thread) | 2233 | if (mddev->thread) |
2087 | md_wakeup_thread(mddev->thread); | 2234 | md_wakeup_thread(mddev->thread); |
2088 | return err; | 2235 | return err; |
@@ -2256,6 +2403,49 @@ abort_export: | |||
2256 | return err; | 2403 | return err; |
2257 | } | 2404 | } |
2258 | 2405 | ||
2406 | /* similar to deny_write_access, but accounts for our holding a reference | ||
2407 | * to the file ourselves */ | ||
2408 | static int deny_bitmap_write_access(struct file * file) | ||
2409 | { | ||
2410 | struct inode *inode = file->f_mapping->host; | ||
2411 | |||
2412 | spin_lock(&inode->i_lock); | ||
2413 | if (atomic_read(&inode->i_writecount) > 1) { | ||
2414 | spin_unlock(&inode->i_lock); | ||
2415 | return -ETXTBSY; | ||
2416 | } | ||
2417 | atomic_set(&inode->i_writecount, -1); | ||
2418 | spin_unlock(&inode->i_lock); | ||
2419 | |||
2420 | return 0; | ||
2421 | } | ||
2422 | |||
2423 | static int set_bitmap_file(mddev_t *mddev, int fd) | ||
2424 | { | ||
2425 | int err; | ||
2426 | |||
2427 | if (mddev->pers) | ||
2428 | return -EBUSY; | ||
2429 | |||
2430 | mddev->bitmap_file = fget(fd); | ||
2431 | |||
2432 | if (mddev->bitmap_file == NULL) { | ||
2433 | printk(KERN_ERR "%s: error: failed to get bitmap file\n", | ||
2434 | mdname(mddev)); | ||
2435 | return -EBADF; | ||
2436 | } | ||
2437 | |||
2438 | err = deny_bitmap_write_access(mddev->bitmap_file); | ||
2439 | if (err) { | ||
2440 | printk(KERN_ERR "%s: error: bitmap file is already in use\n", | ||
2441 | mdname(mddev)); | ||
2442 | fput(mddev->bitmap_file); | ||
2443 | mddev->bitmap_file = NULL; | ||
2444 | } else | ||
2445 | mddev->bitmap_offset = 0; /* file overrides offset */ | ||
2446 | return err; | ||
2447 | } | ||
2448 | |||
2259 | /* | 2449 | /* |
2260 | * set_array_info is used two different ways | 2450 | * set_array_info is used two different ways |
2261 | * The original usage is when creating a new array. | 2451 | * The original usage is when creating a new array. |
@@ -2567,8 +2757,10 @@ static int md_ioctl(struct inode *inode, struct file *file, | |||
2567 | /* | 2757 | /* |
2568 | * Commands querying/configuring an existing array: | 2758 | * Commands querying/configuring an existing array: |
2569 | */ | 2759 | */ |
2570 | /* if we are initialised yet, only ADD_NEW_DISK or STOP_ARRAY is allowed */ | 2760 | /* if we are not initialised yet, only ADD_NEW_DISK, STOP_ARRAY, |
2571 | if (!mddev->raid_disks && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY && cmd != RUN_ARRAY) { | 2761 | * RUN_ARRAY, and SET_BITMAP_FILE are allowed */ |
2762 | if (!mddev->raid_disks && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY | ||
2763 | && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE) { | ||
2572 | err = -ENODEV; | 2764 | err = -ENODEV; |
2573 | goto abort_unlock; | 2765 | goto abort_unlock; |
2574 | } | 2766 | } |
@@ -2582,6 +2774,10 @@ static int md_ioctl(struct inode *inode, struct file *file, | |||
2582 | err = get_array_info(mddev, argp); | 2774 | err = get_array_info(mddev, argp); |
2583 | goto done_unlock; | 2775 | goto done_unlock; |
2584 | 2776 | ||
2777 | case GET_BITMAP_FILE: | ||
2778 | err = get_bitmap_file(mddev, (void *)arg); | ||
2779 | goto done_unlock; | ||
2780 | |||
2585 | case GET_DISK_INFO: | 2781 | case GET_DISK_INFO: |
2586 | err = get_disk_info(mddev, argp); | 2782 | err = get_disk_info(mddev, argp); |
2587 | goto done_unlock; | 2783 | goto done_unlock; |
@@ -2662,6 +2858,10 @@ static int md_ioctl(struct inode *inode, struct file *file, | |||
2662 | err = do_md_run (mddev); | 2858 | err = do_md_run (mddev); |
2663 | goto done_unlock; | 2859 | goto done_unlock; |
2664 | 2860 | ||
2861 | case SET_BITMAP_FILE: | ||
2862 | err = set_bitmap_file(mddev, (int)arg); | ||
2863 | goto done_unlock; | ||
2864 | |||
2665 | default: | 2865 | default: |
2666 | if (_IOC_TYPE(cmd) == MD_MAJOR) | 2866 | if (_IOC_TYPE(cmd) == MD_MAJOR) |
2667 | printk(KERN_WARNING "md: %s(pid %d) used" | 2867 | printk(KERN_WARNING "md: %s(pid %d) used" |
@@ -2773,8 +2973,9 @@ static int md_thread(void * arg) | |||
2773 | while (thread->run) { | 2973 | while (thread->run) { |
2774 | void (*run)(mddev_t *); | 2974 | void (*run)(mddev_t *); |
2775 | 2975 | ||
2776 | wait_event_interruptible(thread->wqueue, | 2976 | wait_event_interruptible_timeout(thread->wqueue, |
2777 | test_bit(THREAD_WAKEUP, &thread->flags)); | 2977 | test_bit(THREAD_WAKEUP, &thread->flags), |
2978 | thread->timeout); | ||
2778 | if (current->flags & PF_FREEZE) | 2979 | if (current->flags & PF_FREEZE) |
2779 | refrigerator(PF_FREEZE); | 2980 | refrigerator(PF_FREEZE); |
2780 | 2981 | ||
@@ -2820,6 +3021,7 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev, | |||
2820 | thread->run = run; | 3021 | thread->run = run; |
2821 | thread->mddev = mddev; | 3022 | thread->mddev = mddev; |
2822 | thread->name = name; | 3023 | thread->name = name; |
3024 | thread->timeout = MAX_SCHEDULE_TIMEOUT; | ||
2823 | ret = kernel_thread(md_thread, thread, 0); | 3025 | ret = kernel_thread(md_thread, thread, 0); |
2824 | if (ret < 0) { | 3026 | if (ret < 0) { |
2825 | kfree(thread); | 3027 | kfree(thread); |
@@ -2858,13 +3060,13 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
2858 | 3060 | ||
2859 | if (!rdev || rdev->faulty) | 3061 | if (!rdev || rdev->faulty) |
2860 | return; | 3062 | return; |
2861 | 3063 | /* | |
2862 | dprintk("md_error dev:%s, rdev:(%d:%d), (caller: %p,%p,%p,%p).\n", | 3064 | dprintk("md_error dev:%s, rdev:(%d:%d), (caller: %p,%p,%p,%p).\n", |
2863 | mdname(mddev), | 3065 | mdname(mddev), |
2864 | MAJOR(rdev->bdev->bd_dev), MINOR(rdev->bdev->bd_dev), | 3066 | MAJOR(rdev->bdev->bd_dev), MINOR(rdev->bdev->bd_dev), |
2865 | __builtin_return_address(0),__builtin_return_address(1), | 3067 | __builtin_return_address(0),__builtin_return_address(1), |
2866 | __builtin_return_address(2),__builtin_return_address(3)); | 3068 | __builtin_return_address(2),__builtin_return_address(3)); |
2867 | 3069 | */ | |
2868 | if (!mddev->pers->error_handler) | 3070 | if (!mddev->pers->error_handler) |
2869 | return; | 3071 | return; |
2870 | mddev->pers->error_handler(mddev,rdev); | 3072 | mddev->pers->error_handler(mddev,rdev); |
@@ -3018,6 +3220,7 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
3018 | struct list_head *tmp2; | 3220 | struct list_head *tmp2; |
3019 | mdk_rdev_t *rdev; | 3221 | mdk_rdev_t *rdev; |
3020 | int i; | 3222 | int i; |
3223 | struct bitmap *bitmap; | ||
3021 | 3224 | ||
3022 | if (v == (void*)1) { | 3225 | if (v == (void*)1) { |
3023 | seq_printf(seq, "Personalities : "); | 3226 | seq_printf(seq, "Personalities : "); |
@@ -3070,10 +3273,35 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
3070 | if (mddev->pers) { | 3273 | if (mddev->pers) { |
3071 | mddev->pers->status (seq, mddev); | 3274 | mddev->pers->status (seq, mddev); |
3072 | seq_printf(seq, "\n "); | 3275 | seq_printf(seq, "\n "); |
3073 | if (mddev->curr_resync > 2) | 3276 | if (mddev->curr_resync > 2) { |
3074 | status_resync (seq, mddev); | 3277 | status_resync (seq, mddev); |
3075 | else if (mddev->curr_resync == 1 || mddev->curr_resync == 2) | 3278 | seq_printf(seq, "\n "); |
3076 | seq_printf(seq, " resync=DELAYED"); | 3279 | } else if (mddev->curr_resync == 1 || mddev->curr_resync == 2) |
3280 | seq_printf(seq, " resync=DELAYED\n "); | ||
3281 | } else | ||
3282 | seq_printf(seq, "\n "); | ||
3283 | |||
3284 | if ((bitmap = mddev->bitmap)) { | ||
3285 | unsigned long chunk_kb; | ||
3286 | unsigned long flags; | ||
3287 | spin_lock_irqsave(&bitmap->lock, flags); | ||
3288 | chunk_kb = bitmap->chunksize >> 10; | ||
3289 | seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], " | ||
3290 | "%lu%s chunk", | ||
3291 | bitmap->pages - bitmap->missing_pages, | ||
3292 | bitmap->pages, | ||
3293 | (bitmap->pages - bitmap->missing_pages) | ||
3294 | << (PAGE_SHIFT - 10), | ||
3295 | chunk_kb ? chunk_kb : bitmap->chunksize, | ||
3296 | chunk_kb ? "KB" : "B"); | ||
3297 | if (bitmap->file) { | ||
3298 | seq_printf(seq, ", file: "); | ||
3299 | seq_path(seq, bitmap->file->f_vfsmnt, | ||
3300 | bitmap->file->f_dentry," \t\n"); | ||
3301 | } | ||
3302 | |||
3303 | seq_printf(seq, "\n"); | ||
3304 | spin_unlock_irqrestore(&bitmap->lock, flags); | ||
3077 | } | 3305 | } |
3078 | 3306 | ||
3079 | seq_printf(seq, "\n"); | 3307 | seq_printf(seq, "\n"); |
@@ -3176,19 +3404,28 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok) | |||
3176 | } | 3404 | } |
3177 | 3405 | ||
3178 | 3406 | ||
3179 | void md_write_start(mddev_t *mddev) | 3407 | /* md_write_start(mddev, bi) |
3408 | * If we need to update some array metadata (e.g. 'active' flag | ||
3409 | * in superblock) before writing, schedule a superblock update | ||
3410 | * and wait for it to complete. | ||
3411 | */ | ||
3412 | void md_write_start(mddev_t *mddev, struct bio *bi) | ||
3180 | { | 3413 | { |
3181 | if (!atomic_read(&mddev->writes_pending)) { | 3414 | DEFINE_WAIT(w); |
3182 | mddev_lock_uninterruptible(mddev); | 3415 | if (bio_data_dir(bi) != WRITE) |
3416 | return; | ||
3417 | |||
3418 | atomic_inc(&mddev->writes_pending); | ||
3419 | if (mddev->in_sync) { | ||
3420 | spin_lock(&mddev->write_lock); | ||
3183 | if (mddev->in_sync) { | 3421 | if (mddev->in_sync) { |
3184 | mddev->in_sync = 0; | 3422 | mddev->in_sync = 0; |
3185 | del_timer(&mddev->safemode_timer); | 3423 | mddev->sb_dirty = 1; |
3186 | md_update_sb(mddev); | 3424 | md_wakeup_thread(mddev->thread); |
3187 | } | 3425 | } |
3188 | atomic_inc(&mddev->writes_pending); | 3426 | spin_unlock(&mddev->write_lock); |
3189 | mddev_unlock(mddev); | 3427 | } |
3190 | } else | 3428 | wait_event(mddev->sb_wait, mddev->sb_dirty==0); |
3191 | atomic_inc(&mddev->writes_pending); | ||
3192 | } | 3429 | } |
3193 | 3430 | ||
3194 | void md_write_end(mddev_t *mddev) | 3431 | void md_write_end(mddev_t *mddev) |
@@ -3201,37 +3438,6 @@ void md_write_end(mddev_t *mddev) | |||
3201 | } | 3438 | } |
3202 | } | 3439 | } |
3203 | 3440 | ||
3204 | static inline void md_enter_safemode(mddev_t *mddev) | ||
3205 | { | ||
3206 | if (!mddev->safemode) return; | ||
3207 | if (mddev->safemode == 2 && | ||
3208 | (atomic_read(&mddev->writes_pending) || mddev->in_sync || | ||
3209 | mddev->recovery_cp != MaxSector)) | ||
3210 | return; /* avoid the lock */ | ||
3211 | mddev_lock_uninterruptible(mddev); | ||
3212 | if (mddev->safemode && !atomic_read(&mddev->writes_pending) && | ||
3213 | !mddev->in_sync && mddev->recovery_cp == MaxSector) { | ||
3214 | mddev->in_sync = 1; | ||
3215 | md_update_sb(mddev); | ||
3216 | } | ||
3217 | mddev_unlock(mddev); | ||
3218 | |||
3219 | if (mddev->safemode == 1) | ||
3220 | mddev->safemode = 0; | ||
3221 | } | ||
3222 | |||
3223 | void md_handle_safemode(mddev_t *mddev) | ||
3224 | { | ||
3225 | if (signal_pending(current)) { | ||
3226 | printk(KERN_INFO "md: %s in immediate safe mode\n", | ||
3227 | mdname(mddev)); | ||
3228 | mddev->safemode = 2; | ||
3229 | flush_signals(current); | ||
3230 | } | ||
3231 | md_enter_safemode(mddev); | ||
3232 | } | ||
3233 | |||
3234 | |||
3235 | static DECLARE_WAIT_QUEUE_HEAD(resync_wait); | 3441 | static DECLARE_WAIT_QUEUE_HEAD(resync_wait); |
3236 | 3442 | ||
3237 | #define SYNC_MARKS 10 | 3443 | #define SYNC_MARKS 10 |
@@ -3241,12 +3447,13 @@ static void md_do_sync(mddev_t *mddev) | |||
3241 | mddev_t *mddev2; | 3447 | mddev_t *mddev2; |
3242 | unsigned int currspeed = 0, | 3448 | unsigned int currspeed = 0, |
3243 | window; | 3449 | window; |
3244 | sector_t max_sectors,j; | 3450 | sector_t max_sectors,j, io_sectors; |
3245 | unsigned long mark[SYNC_MARKS]; | 3451 | unsigned long mark[SYNC_MARKS]; |
3246 | sector_t mark_cnt[SYNC_MARKS]; | 3452 | sector_t mark_cnt[SYNC_MARKS]; |
3247 | int last_mark,m; | 3453 | int last_mark,m; |
3248 | struct list_head *tmp; | 3454 | struct list_head *tmp; |
3249 | sector_t last_check; | 3455 | sector_t last_check; |
3456 | int skipped = 0; | ||
3250 | 3457 | ||
3251 | /* just incase thread restarts... */ | 3458 | /* just incase thread restarts... */ |
3252 | if (test_bit(MD_RECOVERY_DONE, &mddev->recovery)) | 3459 | if (test_bit(MD_RECOVERY_DONE, &mddev->recovery)) |
@@ -3312,7 +3519,7 @@ static void md_do_sync(mddev_t *mddev) | |||
3312 | 3519 | ||
3313 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) | 3520 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) |
3314 | /* resync follows the size requested by the personality, | 3521 | /* resync follows the size requested by the personality, |
3315 | * which default to physical size, but can be virtual size | 3522 | * which defaults to physical size, but can be virtual size |
3316 | */ | 3523 | */ |
3317 | max_sectors = mddev->resync_max_sectors; | 3524 | max_sectors = mddev->resync_max_sectors; |
3318 | else | 3525 | else |
@@ -3327,13 +3534,15 @@ static void md_do_sync(mddev_t *mddev) | |||
3327 | sysctl_speed_limit_max); | 3534 | sysctl_speed_limit_max); |
3328 | 3535 | ||
3329 | is_mddev_idle(mddev); /* this also initializes IO event counters */ | 3536 | is_mddev_idle(mddev); /* this also initializes IO event counters */ |
3330 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) | 3537 | /* we don't use the checkpoint if there's a bitmap */ |
3538 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && !mddev->bitmap) | ||
3331 | j = mddev->recovery_cp; | 3539 | j = mddev->recovery_cp; |
3332 | else | 3540 | else |
3333 | j = 0; | 3541 | j = 0; |
3542 | io_sectors = 0; | ||
3334 | for (m = 0; m < SYNC_MARKS; m++) { | 3543 | for (m = 0; m < SYNC_MARKS; m++) { |
3335 | mark[m] = jiffies; | 3544 | mark[m] = jiffies; |
3336 | mark_cnt[m] = j; | 3545 | mark_cnt[m] = io_sectors; |
3337 | } | 3546 | } |
3338 | last_mark = 0; | 3547 | last_mark = 0; |
3339 | mddev->resync_mark = mark[last_mark]; | 3548 | mddev->resync_mark = mark[last_mark]; |
@@ -3358,21 +3567,29 @@ static void md_do_sync(mddev_t *mddev) | |||
3358 | } | 3567 | } |
3359 | 3568 | ||
3360 | while (j < max_sectors) { | 3569 | while (j < max_sectors) { |
3361 | int sectors; | 3570 | sector_t sectors; |
3362 | 3571 | ||
3363 | sectors = mddev->pers->sync_request(mddev, j, currspeed < sysctl_speed_limit_min); | 3572 | skipped = 0; |
3364 | if (sectors < 0) { | 3573 | sectors = mddev->pers->sync_request(mddev, j, &skipped, |
3574 | currspeed < sysctl_speed_limit_min); | ||
3575 | if (sectors == 0) { | ||
3365 | set_bit(MD_RECOVERY_ERR, &mddev->recovery); | 3576 | set_bit(MD_RECOVERY_ERR, &mddev->recovery); |
3366 | goto out; | 3577 | goto out; |
3367 | } | 3578 | } |
3368 | atomic_add(sectors, &mddev->recovery_active); | 3579 | |
3580 | if (!skipped) { /* actual IO requested */ | ||
3581 | io_sectors += sectors; | ||
3582 | atomic_add(sectors, &mddev->recovery_active); | ||
3583 | } | ||
3584 | |||
3369 | j += sectors; | 3585 | j += sectors; |
3370 | if (j>1) mddev->curr_resync = j; | 3586 | if (j>1) mddev->curr_resync = j; |
3371 | 3587 | ||
3372 | if (last_check + window > j || j == max_sectors) | 3588 | |
3589 | if (last_check + window > io_sectors || j == max_sectors) | ||
3373 | continue; | 3590 | continue; |
3374 | 3591 | ||
3375 | last_check = j; | 3592 | last_check = io_sectors; |
3376 | 3593 | ||
3377 | if (test_bit(MD_RECOVERY_INTR, &mddev->recovery) || | 3594 | if (test_bit(MD_RECOVERY_INTR, &mddev->recovery) || |
3378 | test_bit(MD_RECOVERY_ERR, &mddev->recovery)) | 3595 | test_bit(MD_RECOVERY_ERR, &mddev->recovery)) |
@@ -3386,7 +3603,7 @@ static void md_do_sync(mddev_t *mddev) | |||
3386 | mddev->resync_mark = mark[next]; | 3603 | mddev->resync_mark = mark[next]; |
3387 | mddev->resync_mark_cnt = mark_cnt[next]; | 3604 | mddev->resync_mark_cnt = mark_cnt[next]; |
3388 | mark[next] = jiffies; | 3605 | mark[next] = jiffies; |
3389 | mark_cnt[next] = j - atomic_read(&mddev->recovery_active); | 3606 | mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active); |
3390 | last_mark = next; | 3607 | last_mark = next; |
3391 | } | 3608 | } |
3392 | 3609 | ||
@@ -3413,7 +3630,8 @@ static void md_do_sync(mddev_t *mddev) | |||
3413 | mddev->queue->unplug_fn(mddev->queue); | 3630 | mddev->queue->unplug_fn(mddev->queue); |
3414 | cond_resched(); | 3631 | cond_resched(); |
3415 | 3632 | ||
3416 | currspeed = ((unsigned long)(j-mddev->resync_mark_cnt))/2/((jiffies-mddev->resync_mark)/HZ +1) +1; | 3633 | currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2 |
3634 | /((jiffies-mddev->resync_mark)/HZ +1) +1; | ||
3417 | 3635 | ||
3418 | if (currspeed > sysctl_speed_limit_min) { | 3636 | if (currspeed > sysctl_speed_limit_min) { |
3419 | if ((currspeed > sysctl_speed_limit_max) || | 3637 | if ((currspeed > sysctl_speed_limit_max) || |
@@ -3433,7 +3651,7 @@ static void md_do_sync(mddev_t *mddev) | |||
3433 | wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); | 3651 | wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); |
3434 | 3652 | ||
3435 | /* tell personality that we are finished */ | 3653 | /* tell personality that we are finished */ |
3436 | mddev->pers->sync_request(mddev, max_sectors, 1); | 3654 | mddev->pers->sync_request(mddev, max_sectors, &skipped, 1); |
3437 | 3655 | ||
3438 | if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && | 3656 | if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && |
3439 | mddev->curr_resync > 2 && | 3657 | mddev->curr_resync > 2 && |
@@ -3447,7 +3665,6 @@ static void md_do_sync(mddev_t *mddev) | |||
3447 | mddev->recovery_cp = MaxSector; | 3665 | mddev->recovery_cp = MaxSector; |
3448 | } | 3666 | } |
3449 | 3667 | ||
3450 | md_enter_safemode(mddev); | ||
3451 | skip: | 3668 | skip: |
3452 | mddev->curr_resync = 0; | 3669 | mddev->curr_resync = 0; |
3453 | wake_up(&resync_wait); | 3670 | wake_up(&resync_wait); |
@@ -3484,20 +3701,48 @@ void md_check_recovery(mddev_t *mddev) | |||
3484 | struct list_head *rtmp; | 3701 | struct list_head *rtmp; |
3485 | 3702 | ||
3486 | 3703 | ||
3487 | dprintk(KERN_INFO "md: recovery thread got woken up ...\n"); | 3704 | if (mddev->bitmap) |
3705 | bitmap_daemon_work(mddev->bitmap); | ||
3488 | 3706 | ||
3489 | if (mddev->ro) | 3707 | if (mddev->ro) |
3490 | return; | 3708 | return; |
3709 | |||
3710 | if (signal_pending(current)) { | ||
3711 | if (mddev->pers->sync_request) { | ||
3712 | printk(KERN_INFO "md: %s in immediate safe mode\n", | ||
3713 | mdname(mddev)); | ||
3714 | mddev->safemode = 2; | ||
3715 | } | ||
3716 | flush_signals(current); | ||
3717 | } | ||
3718 | |||
3491 | if ( ! ( | 3719 | if ( ! ( |
3492 | mddev->sb_dirty || | 3720 | mddev->sb_dirty || |
3493 | test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || | 3721 | test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || |
3494 | test_bit(MD_RECOVERY_DONE, &mddev->recovery) | 3722 | test_bit(MD_RECOVERY_DONE, &mddev->recovery) || |
3723 | (mddev->safemode == 1) || | ||
3724 | (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending) | ||
3725 | && !mddev->in_sync && mddev->recovery_cp == MaxSector) | ||
3495 | )) | 3726 | )) |
3496 | return; | 3727 | return; |
3728 | |||
3497 | if (mddev_trylock(mddev)==0) { | 3729 | if (mddev_trylock(mddev)==0) { |
3498 | int spares =0; | 3730 | int spares =0; |
3731 | |||
3732 | spin_lock(&mddev->write_lock); | ||
3733 | if (mddev->safemode && !atomic_read(&mddev->writes_pending) && | ||
3734 | !mddev->in_sync && mddev->recovery_cp == MaxSector) { | ||
3735 | mddev->in_sync = 1; | ||
3736 | mddev->sb_dirty = 1; | ||
3737 | } | ||
3738 | if (mddev->safemode == 1) | ||
3739 | mddev->safemode = 0; | ||
3740 | spin_unlock(&mddev->write_lock); | ||
3741 | |||
3499 | if (mddev->sb_dirty) | 3742 | if (mddev->sb_dirty) |
3500 | md_update_sb(mddev); | 3743 | md_update_sb(mddev); |
3744 | |||
3745 | |||
3501 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && | 3746 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && |
3502 | !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) { | 3747 | !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) { |
3503 | /* resync/recovery still happening */ | 3748 | /* resync/recovery still happening */ |
@@ -3515,6 +3760,14 @@ void md_check_recovery(mddev_t *mddev) | |||
3515 | mddev->pers->spare_active(mddev); | 3760 | mddev->pers->spare_active(mddev); |
3516 | } | 3761 | } |
3517 | md_update_sb(mddev); | 3762 | md_update_sb(mddev); |
3763 | |||
3764 | /* if array is no-longer degraded, then any saved_raid_disk | ||
3765 | * information must be scrapped | ||
3766 | */ | ||
3767 | if (!mddev->degraded) | ||
3768 | ITERATE_RDEV(mddev,rdev,rtmp) | ||
3769 | rdev->saved_raid_disk = -1; | ||
3770 | |||
3518 | mddev->recovery = 0; | 3771 | mddev->recovery = 0; |
3519 | /* flag recovery needed just to double check */ | 3772 | /* flag recovery needed just to double check */ |
3520 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 3773 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
@@ -3557,6 +3810,13 @@ void md_check_recovery(mddev_t *mddev) | |||
3557 | set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); | 3810 | set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); |
3558 | if (!spares) | 3811 | if (!spares) |
3559 | set_bit(MD_RECOVERY_SYNC, &mddev->recovery); | 3812 | set_bit(MD_RECOVERY_SYNC, &mddev->recovery); |
3813 | if (spares && mddev->bitmap && ! mddev->bitmap->file) { | ||
3814 | /* We are adding a device or devices to an array | ||
3815 | * which has the bitmap stored on all devices. | ||
3816 | * So make sure all bitmap pages get written | ||
3817 | */ | ||
3818 | bitmap_write_all(mddev->bitmap); | ||
3819 | } | ||
3560 | mddev->sync_thread = md_register_thread(md_do_sync, | 3820 | mddev->sync_thread = md_register_thread(md_do_sync, |
3561 | mddev, | 3821 | mddev, |
3562 | "%s_resync"); | 3822 | "%s_resync"); |
@@ -3624,6 +3884,8 @@ static int __init md_init(void) | |||
3624 | " MD_SB_DISKS=%d\n", | 3884 | " MD_SB_DISKS=%d\n", |
3625 | MD_MAJOR_VERSION, MD_MINOR_VERSION, | 3885 | MD_MAJOR_VERSION, MD_MINOR_VERSION, |
3626 | MD_PATCHLEVEL_VERSION, MAX_MD_DEVS, MD_SB_DISKS); | 3886 | MD_PATCHLEVEL_VERSION, MAX_MD_DEVS, MD_SB_DISKS); |
3887 | printk(KERN_INFO "md: bitmap version %d.%d\n", BITMAP_MAJOR, | ||
3888 | BITMAP_MINOR); | ||
3627 | 3889 | ||
3628 | if (register_blkdev(MAJOR_NR, "md")) | 3890 | if (register_blkdev(MAJOR_NR, "md")) |
3629 | return -1; | 3891 | return -1; |
@@ -3739,7 +4001,6 @@ EXPORT_SYMBOL(md_error); | |||
3739 | EXPORT_SYMBOL(md_done_sync); | 4001 | EXPORT_SYMBOL(md_done_sync); |
3740 | EXPORT_SYMBOL(md_write_start); | 4002 | EXPORT_SYMBOL(md_write_start); |
3741 | EXPORT_SYMBOL(md_write_end); | 4003 | EXPORT_SYMBOL(md_write_end); |
3742 | EXPORT_SYMBOL(md_handle_safemode); | ||
3743 | EXPORT_SYMBOL(md_register_thread); | 4004 | EXPORT_SYMBOL(md_register_thread); |
3744 | EXPORT_SYMBOL(md_unregister_thread); | 4005 | EXPORT_SYMBOL(md_unregister_thread); |
3745 | EXPORT_SYMBOL(md_wakeup_thread); | 4006 | EXPORT_SYMBOL(md_wakeup_thread); |