diff options
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 636 |
1 files changed, 520 insertions, 116 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 9ecf51ee596f..adf960d8a7c9 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -131,6 +131,8 @@ static ctl_table raid_root_table[] = { | |||
131 | 131 | ||
132 | static struct block_device_operations md_fops; | 132 | static struct block_device_operations md_fops; |
133 | 133 | ||
134 | static int start_readonly; | ||
135 | |||
134 | /* | 136 | /* |
135 | * Enables to iterate over all existing md arrays | 137 | * Enables to iterate over all existing md arrays |
136 | * all_mddevs_lock protects this list. | 138 | * all_mddevs_lock protects this list. |
@@ -181,7 +183,7 @@ static void mddev_put(mddev_t *mddev) | |||
181 | if (!mddev->raid_disks && list_empty(&mddev->disks)) { | 183 | if (!mddev->raid_disks && list_empty(&mddev->disks)) { |
182 | list_del(&mddev->all_mddevs); | 184 | list_del(&mddev->all_mddevs); |
183 | blk_put_queue(mddev->queue); | 185 | blk_put_queue(mddev->queue); |
184 | kfree(mddev); | 186 | kobject_unregister(&mddev->kobj); |
185 | } | 187 | } |
186 | spin_unlock(&all_mddevs_lock); | 188 | spin_unlock(&all_mddevs_lock); |
187 | } | 189 | } |
@@ -330,18 +332,46 @@ static void free_disk_sb(mdk_rdev_t * rdev) | |||
330 | static int super_written(struct bio *bio, unsigned int bytes_done, int error) | 332 | static int super_written(struct bio *bio, unsigned int bytes_done, int error) |
331 | { | 333 | { |
332 | mdk_rdev_t *rdev = bio->bi_private; | 334 | mdk_rdev_t *rdev = bio->bi_private; |
335 | mddev_t *mddev = rdev->mddev; | ||
333 | if (bio->bi_size) | 336 | if (bio->bi_size) |
334 | return 1; | 337 | return 1; |
335 | 338 | ||
336 | if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) | 339 | if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) |
337 | md_error(rdev->mddev, rdev); | 340 | md_error(mddev, rdev); |
338 | 341 | ||
339 | if (atomic_dec_and_test(&rdev->mddev->pending_writes)) | 342 | if (atomic_dec_and_test(&mddev->pending_writes)) |
340 | wake_up(&rdev->mddev->sb_wait); | 343 | wake_up(&mddev->sb_wait); |
341 | bio_put(bio); | 344 | bio_put(bio); |
342 | return 0; | 345 | return 0; |
343 | } | 346 | } |
344 | 347 | ||
348 | static int super_written_barrier(struct bio *bio, unsigned int bytes_done, int error) | ||
349 | { | ||
350 | struct bio *bio2 = bio->bi_private; | ||
351 | mdk_rdev_t *rdev = bio2->bi_private; | ||
352 | mddev_t *mddev = rdev->mddev; | ||
353 | if (bio->bi_size) | ||
354 | return 1; | ||
355 | |||
356 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags) && | ||
357 | error == -EOPNOTSUPP) { | ||
358 | unsigned long flags; | ||
359 | /* barriers don't appear to be supported :-( */ | ||
360 | set_bit(BarriersNotsupp, &rdev->flags); | ||
361 | mddev->barriers_work = 0; | ||
362 | spin_lock_irqsave(&mddev->write_lock, flags); | ||
363 | bio2->bi_next = mddev->biolist; | ||
364 | mddev->biolist = bio2; | ||
365 | spin_unlock_irqrestore(&mddev->write_lock, flags); | ||
366 | wake_up(&mddev->sb_wait); | ||
367 | bio_put(bio); | ||
368 | return 0; | ||
369 | } | ||
370 | bio_put(bio2); | ||
371 | bio->bi_private = rdev; | ||
372 | return super_written(bio, bytes_done, error); | ||
373 | } | ||
374 | |||
345 | void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, | 375 | void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, |
346 | sector_t sector, int size, struct page *page) | 376 | sector_t sector, int size, struct page *page) |
347 | { | 377 | { |
@@ -350,16 +380,54 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, | |||
350 | * and decrement it on completion, waking up sb_wait | 380 | * and decrement it on completion, waking up sb_wait |
351 | * if zero is reached. | 381 | * if zero is reached. |
352 | * If an error occurred, call md_error | 382 | * If an error occurred, call md_error |
383 | * | ||
384 | * As we might need to resubmit the request if BIO_RW_BARRIER | ||
385 | * causes ENOTSUPP, we allocate a spare bio... | ||
353 | */ | 386 | */ |
354 | struct bio *bio = bio_alloc(GFP_NOIO, 1); | 387 | struct bio *bio = bio_alloc(GFP_NOIO, 1); |
388 | int rw = (1<<BIO_RW) | (1<<BIO_RW_SYNC); | ||
355 | 389 | ||
356 | bio->bi_bdev = rdev->bdev; | 390 | bio->bi_bdev = rdev->bdev; |
357 | bio->bi_sector = sector; | 391 | bio->bi_sector = sector; |
358 | bio_add_page(bio, page, size, 0); | 392 | bio_add_page(bio, page, size, 0); |
359 | bio->bi_private = rdev; | 393 | bio->bi_private = rdev; |
360 | bio->bi_end_io = super_written; | 394 | bio->bi_end_io = super_written; |
395 | bio->bi_rw = rw; | ||
396 | |||
361 | atomic_inc(&mddev->pending_writes); | 397 | atomic_inc(&mddev->pending_writes); |
362 | submit_bio((1<<BIO_RW)|(1<<BIO_RW_SYNC), bio); | 398 | if (!test_bit(BarriersNotsupp, &rdev->flags)) { |
399 | struct bio *rbio; | ||
400 | rw |= (1<<BIO_RW_BARRIER); | ||
401 | rbio = bio_clone(bio, GFP_NOIO); | ||
402 | rbio->bi_private = bio; | ||
403 | rbio->bi_end_io = super_written_barrier; | ||
404 | submit_bio(rw, rbio); | ||
405 | } else | ||
406 | submit_bio(rw, bio); | ||
407 | } | ||
408 | |||
409 | void md_super_wait(mddev_t *mddev) | ||
410 | { | ||
411 | /* wait for all superblock writes that were scheduled to complete. | ||
412 | * if any had to be retried (due to BARRIER problems), retry them | ||
413 | */ | ||
414 | DEFINE_WAIT(wq); | ||
415 | for(;;) { | ||
416 | prepare_to_wait(&mddev->sb_wait, &wq, TASK_UNINTERRUPTIBLE); | ||
417 | if (atomic_read(&mddev->pending_writes)==0) | ||
418 | break; | ||
419 | while (mddev->biolist) { | ||
420 | struct bio *bio; | ||
421 | spin_lock_irq(&mddev->write_lock); | ||
422 | bio = mddev->biolist; | ||
423 | mddev->biolist = bio->bi_next ; | ||
424 | bio->bi_next = NULL; | ||
425 | spin_unlock_irq(&mddev->write_lock); | ||
426 | submit_bio(bio->bi_rw, bio); | ||
427 | } | ||
428 | schedule(); | ||
429 | } | ||
430 | finish_wait(&mddev->sb_wait, &wq); | ||
363 | } | 431 | } |
364 | 432 | ||
365 | static int bi_complete(struct bio *bio, unsigned int bytes_done, int error) | 433 | static int bi_complete(struct bio *bio, unsigned int bytes_done, int error) |
@@ -610,7 +678,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
610 | mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page); | 678 | mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page); |
611 | 679 | ||
612 | rdev->raid_disk = -1; | 680 | rdev->raid_disk = -1; |
613 | rdev->in_sync = 0; | 681 | rdev->flags = 0; |
614 | if (mddev->raid_disks == 0) { | 682 | if (mddev->raid_disks == 0) { |
615 | mddev->major_version = 0; | 683 | mddev->major_version = 0; |
616 | mddev->minor_version = sb->minor_version; | 684 | mddev->minor_version = sb->minor_version; |
@@ -671,21 +739,19 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
671 | return 0; | 739 | return 0; |
672 | 740 | ||
673 | if (mddev->level != LEVEL_MULTIPATH) { | 741 | if (mddev->level != LEVEL_MULTIPATH) { |
674 | rdev->faulty = 0; | ||
675 | rdev->flags = 0; | ||
676 | desc = sb->disks + rdev->desc_nr; | 742 | desc = sb->disks + rdev->desc_nr; |
677 | 743 | ||
678 | if (desc->state & (1<<MD_DISK_FAULTY)) | 744 | if (desc->state & (1<<MD_DISK_FAULTY)) |
679 | rdev->faulty = 1; | 745 | set_bit(Faulty, &rdev->flags); |
680 | else if (desc->state & (1<<MD_DISK_SYNC) && | 746 | else if (desc->state & (1<<MD_DISK_SYNC) && |
681 | desc->raid_disk < mddev->raid_disks) { | 747 | desc->raid_disk < mddev->raid_disks) { |
682 | rdev->in_sync = 1; | 748 | set_bit(In_sync, &rdev->flags); |
683 | rdev->raid_disk = desc->raid_disk; | 749 | rdev->raid_disk = desc->raid_disk; |
684 | } | 750 | } |
685 | if (desc->state & (1<<MD_DISK_WRITEMOSTLY)) | 751 | if (desc->state & (1<<MD_DISK_WRITEMOSTLY)) |
686 | set_bit(WriteMostly, &rdev->flags); | 752 | set_bit(WriteMostly, &rdev->flags); |
687 | } else /* MULTIPATH are always insync */ | 753 | } else /* MULTIPATH are always insync */ |
688 | rdev->in_sync = 1; | 754 | set_bit(In_sync, &rdev->flags); |
689 | return 0; | 755 | return 0; |
690 | } | 756 | } |
691 | 757 | ||
@@ -699,6 +765,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
699 | mdk_rdev_t *rdev2; | 765 | mdk_rdev_t *rdev2; |
700 | int next_spare = mddev->raid_disks; | 766 | int next_spare = mddev->raid_disks; |
701 | 767 | ||
768 | |||
702 | /* make rdev->sb match mddev data.. | 769 | /* make rdev->sb match mddev data.. |
703 | * | 770 | * |
704 | * 1/ zero out disks | 771 | * 1/ zero out disks |
@@ -758,23 +825,27 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
758 | sb->disks[0].state = (1<<MD_DISK_REMOVED); | 825 | sb->disks[0].state = (1<<MD_DISK_REMOVED); |
759 | ITERATE_RDEV(mddev,rdev2,tmp) { | 826 | ITERATE_RDEV(mddev,rdev2,tmp) { |
760 | mdp_disk_t *d; | 827 | mdp_disk_t *d; |
761 | if (rdev2->raid_disk >= 0 && rdev2->in_sync && !rdev2->faulty) | 828 | int desc_nr; |
762 | rdev2->desc_nr = rdev2->raid_disk; | 829 | if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags) |
830 | && !test_bit(Faulty, &rdev2->flags)) | ||
831 | desc_nr = rdev2->raid_disk; | ||
763 | else | 832 | else |
764 | rdev2->desc_nr = next_spare++; | 833 | desc_nr = next_spare++; |
834 | rdev2->desc_nr = desc_nr; | ||
765 | d = &sb->disks[rdev2->desc_nr]; | 835 | d = &sb->disks[rdev2->desc_nr]; |
766 | nr_disks++; | 836 | nr_disks++; |
767 | d->number = rdev2->desc_nr; | 837 | d->number = rdev2->desc_nr; |
768 | d->major = MAJOR(rdev2->bdev->bd_dev); | 838 | d->major = MAJOR(rdev2->bdev->bd_dev); |
769 | d->minor = MINOR(rdev2->bdev->bd_dev); | 839 | d->minor = MINOR(rdev2->bdev->bd_dev); |
770 | if (rdev2->raid_disk >= 0 && rdev->in_sync && !rdev2->faulty) | 840 | if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags) |
841 | && !test_bit(Faulty, &rdev2->flags)) | ||
771 | d->raid_disk = rdev2->raid_disk; | 842 | d->raid_disk = rdev2->raid_disk; |
772 | else | 843 | else |
773 | d->raid_disk = rdev2->desc_nr; /* compatibility */ | 844 | d->raid_disk = rdev2->desc_nr; /* compatibility */ |
774 | if (rdev2->faulty) { | 845 | if (test_bit(Faulty, &rdev2->flags)) { |
775 | d->state = (1<<MD_DISK_FAULTY); | 846 | d->state = (1<<MD_DISK_FAULTY); |
776 | failed++; | 847 | failed++; |
777 | } else if (rdev2->in_sync) { | 848 | } else if (test_bit(In_sync, &rdev2->flags)) { |
778 | d->state = (1<<MD_DISK_ACTIVE); | 849 | d->state = (1<<MD_DISK_ACTIVE); |
779 | d->state |= (1<<MD_DISK_SYNC); | 850 | d->state |= (1<<MD_DISK_SYNC); |
780 | active++; | 851 | active++; |
@@ -787,7 +858,6 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
787 | if (test_bit(WriteMostly, &rdev2->flags)) | 858 | if (test_bit(WriteMostly, &rdev2->flags)) |
788 | d->state |= (1<<MD_DISK_WRITEMOSTLY); | 859 | d->state |= (1<<MD_DISK_WRITEMOSTLY); |
789 | } | 860 | } |
790 | |||
791 | /* now set the "removed" and "faulty" bits on any missing devices */ | 861 | /* now set the "removed" and "faulty" bits on any missing devices */ |
792 | for (i=0 ; i < mddev->raid_disks ; i++) { | 862 | for (i=0 ; i < mddev->raid_disks ; i++) { |
793 | mdp_disk_t *d = &sb->disks[i]; | 863 | mdp_disk_t *d = &sb->disks[i]; |
@@ -944,7 +1014,7 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
944 | struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page); | 1014 | struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page); |
945 | 1015 | ||
946 | rdev->raid_disk = -1; | 1016 | rdev->raid_disk = -1; |
947 | rdev->in_sync = 0; | 1017 | rdev->flags = 0; |
948 | if (mddev->raid_disks == 0) { | 1018 | if (mddev->raid_disks == 0) { |
949 | mddev->major_version = 1; | 1019 | mddev->major_version = 1; |
950 | mddev->patch_version = 0; | 1020 | mddev->patch_version = 0; |
@@ -996,22 +1066,19 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
996 | role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); | 1066 | role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); |
997 | switch(role) { | 1067 | switch(role) { |
998 | case 0xffff: /* spare */ | 1068 | case 0xffff: /* spare */ |
999 | rdev->faulty = 0; | ||
1000 | break; | 1069 | break; |
1001 | case 0xfffe: /* faulty */ | 1070 | case 0xfffe: /* faulty */ |
1002 | rdev->faulty = 1; | 1071 | set_bit(Faulty, &rdev->flags); |
1003 | break; | 1072 | break; |
1004 | default: | 1073 | default: |
1005 | rdev->in_sync = 1; | 1074 | set_bit(In_sync, &rdev->flags); |
1006 | rdev->faulty = 0; | ||
1007 | rdev->raid_disk = role; | 1075 | rdev->raid_disk = role; |
1008 | break; | 1076 | break; |
1009 | } | 1077 | } |
1010 | rdev->flags = 0; | ||
1011 | if (sb->devflags & WriteMostly1) | 1078 | if (sb->devflags & WriteMostly1) |
1012 | set_bit(WriteMostly, &rdev->flags); | 1079 | set_bit(WriteMostly, &rdev->flags); |
1013 | } else /* MULTIPATH are always insync */ | 1080 | } else /* MULTIPATH are always insync */ |
1014 | rdev->in_sync = 1; | 1081 | set_bit(In_sync, &rdev->flags); |
1015 | 1082 | ||
1016 | return 0; | 1083 | return 0; |
1017 | } | 1084 | } |
@@ -1055,9 +1122,9 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1055 | 1122 | ||
1056 | ITERATE_RDEV(mddev,rdev2,tmp) { | 1123 | ITERATE_RDEV(mddev,rdev2,tmp) { |
1057 | i = rdev2->desc_nr; | 1124 | i = rdev2->desc_nr; |
1058 | if (rdev2->faulty) | 1125 | if (test_bit(Faulty, &rdev2->flags)) |
1059 | sb->dev_roles[i] = cpu_to_le16(0xfffe); | 1126 | sb->dev_roles[i] = cpu_to_le16(0xfffe); |
1060 | else if (rdev2->in_sync) | 1127 | else if (test_bit(In_sync, &rdev2->flags)) |
1061 | sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk); | 1128 | sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk); |
1062 | else | 1129 | else |
1063 | sb->dev_roles[i] = cpu_to_le16(0xffff); | 1130 | sb->dev_roles[i] = cpu_to_le16(0xffff); |
@@ -1115,6 +1182,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | |||
1115 | { | 1182 | { |
1116 | mdk_rdev_t *same_pdev; | 1183 | mdk_rdev_t *same_pdev; |
1117 | char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; | 1184 | char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; |
1185 | struct kobject *ko; | ||
1118 | 1186 | ||
1119 | if (rdev->mddev) { | 1187 | if (rdev->mddev) { |
1120 | MD_BUG(); | 1188 | MD_BUG(); |
@@ -1143,10 +1211,22 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | |||
1143 | if (find_rdev_nr(mddev, rdev->desc_nr)) | 1211 | if (find_rdev_nr(mddev, rdev->desc_nr)) |
1144 | return -EBUSY; | 1212 | return -EBUSY; |
1145 | } | 1213 | } |
1214 | bdevname(rdev->bdev,b); | ||
1215 | if (kobject_set_name(&rdev->kobj, "dev-%s", b) < 0) | ||
1216 | return -ENOMEM; | ||
1146 | 1217 | ||
1147 | list_add(&rdev->same_set, &mddev->disks); | 1218 | list_add(&rdev->same_set, &mddev->disks); |
1148 | rdev->mddev = mddev; | 1219 | rdev->mddev = mddev; |
1149 | printk(KERN_INFO "md: bind<%s>\n", bdevname(rdev->bdev,b)); | 1220 | printk(KERN_INFO "md: bind<%s>\n", b); |
1221 | |||
1222 | rdev->kobj.parent = &mddev->kobj; | ||
1223 | kobject_add(&rdev->kobj); | ||
1224 | |||
1225 | if (rdev->bdev->bd_part) | ||
1226 | ko = &rdev->bdev->bd_part->kobj; | ||
1227 | else | ||
1228 | ko = &rdev->bdev->bd_disk->kobj; | ||
1229 | sysfs_create_link(&rdev->kobj, ko, "block"); | ||
1150 | return 0; | 1230 | return 0; |
1151 | } | 1231 | } |
1152 | 1232 | ||
@@ -1160,6 +1240,8 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) | |||
1160 | list_del_init(&rdev->same_set); | 1240 | list_del_init(&rdev->same_set); |
1161 | printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); | 1241 | printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); |
1162 | rdev->mddev = NULL; | 1242 | rdev->mddev = NULL; |
1243 | sysfs_remove_link(&rdev->kobj, "block"); | ||
1244 | kobject_del(&rdev->kobj); | ||
1163 | } | 1245 | } |
1164 | 1246 | ||
1165 | /* | 1247 | /* |
@@ -1215,7 +1297,7 @@ static void export_rdev(mdk_rdev_t * rdev) | |||
1215 | md_autodetect_dev(rdev->bdev->bd_dev); | 1297 | md_autodetect_dev(rdev->bdev->bd_dev); |
1216 | #endif | 1298 | #endif |
1217 | unlock_rdev(rdev); | 1299 | unlock_rdev(rdev); |
1218 | kfree(rdev); | 1300 | kobject_put(&rdev->kobj); |
1219 | } | 1301 | } |
1220 | 1302 | ||
1221 | static void kick_rdev_from_array(mdk_rdev_t * rdev) | 1303 | static void kick_rdev_from_array(mdk_rdev_t * rdev) |
@@ -1287,7 +1369,8 @@ static void print_rdev(mdk_rdev_t *rdev) | |||
1287 | char b[BDEVNAME_SIZE]; | 1369 | char b[BDEVNAME_SIZE]; |
1288 | printk(KERN_INFO "md: rdev %s, SZ:%08llu F:%d S:%d DN:%u\n", | 1370 | printk(KERN_INFO "md: rdev %s, SZ:%08llu F:%d S:%d DN:%u\n", |
1289 | bdevname(rdev->bdev,b), (unsigned long long)rdev->size, | 1371 | bdevname(rdev->bdev,b), (unsigned long long)rdev->size, |
1290 | rdev->faulty, rdev->in_sync, rdev->desc_nr); | 1372 | test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags), |
1373 | rdev->desc_nr); | ||
1291 | if (rdev->sb_loaded) { | 1374 | if (rdev->sb_loaded) { |
1292 | printk(KERN_INFO "md: rdev superblock:\n"); | 1375 | printk(KERN_INFO "md: rdev superblock:\n"); |
1293 | print_sb((mdp_super_t*)page_address(rdev->sb_page)); | 1376 | print_sb((mdp_super_t*)page_address(rdev->sb_page)); |
@@ -1344,7 +1427,7 @@ static void md_update_sb(mddev_t * mddev) | |||
1344 | int sync_req; | 1427 | int sync_req; |
1345 | 1428 | ||
1346 | repeat: | 1429 | repeat: |
1347 | spin_lock(&mddev->write_lock); | 1430 | spin_lock_irq(&mddev->write_lock); |
1348 | sync_req = mddev->in_sync; | 1431 | sync_req = mddev->in_sync; |
1349 | mddev->utime = get_seconds(); | 1432 | mddev->utime = get_seconds(); |
1350 | mddev->events ++; | 1433 | mddev->events ++; |
@@ -1367,11 +1450,11 @@ repeat: | |||
1367 | */ | 1450 | */ |
1368 | if (!mddev->persistent) { | 1451 | if (!mddev->persistent) { |
1369 | mddev->sb_dirty = 0; | 1452 | mddev->sb_dirty = 0; |
1370 | spin_unlock(&mddev->write_lock); | 1453 | spin_unlock_irq(&mddev->write_lock); |
1371 | wake_up(&mddev->sb_wait); | 1454 | wake_up(&mddev->sb_wait); |
1372 | return; | 1455 | return; |
1373 | } | 1456 | } |
1374 | spin_unlock(&mddev->write_lock); | 1457 | spin_unlock_irq(&mddev->write_lock); |
1375 | 1458 | ||
1376 | dprintk(KERN_INFO | 1459 | dprintk(KERN_INFO |
1377 | "md: updating %s RAID superblock on device (in sync %d)\n", | 1460 | "md: updating %s RAID superblock on device (in sync %d)\n", |
@@ -1381,11 +1464,11 @@ repeat: | |||
1381 | ITERATE_RDEV(mddev,rdev,tmp) { | 1464 | ITERATE_RDEV(mddev,rdev,tmp) { |
1382 | char b[BDEVNAME_SIZE]; | 1465 | char b[BDEVNAME_SIZE]; |
1383 | dprintk(KERN_INFO "md: "); | 1466 | dprintk(KERN_INFO "md: "); |
1384 | if (rdev->faulty) | 1467 | if (test_bit(Faulty, &rdev->flags)) |
1385 | dprintk("(skipping faulty "); | 1468 | dprintk("(skipping faulty "); |
1386 | 1469 | ||
1387 | dprintk("%s ", bdevname(rdev->bdev,b)); | 1470 | dprintk("%s ", bdevname(rdev->bdev,b)); |
1388 | if (!rdev->faulty) { | 1471 | if (!test_bit(Faulty, &rdev->flags)) { |
1389 | md_super_write(mddev,rdev, | 1472 | md_super_write(mddev,rdev, |
1390 | rdev->sb_offset<<1, rdev->sb_size, | 1473 | rdev->sb_offset<<1, rdev->sb_size, |
1391 | rdev->sb_page); | 1474 | rdev->sb_page); |
@@ -1399,21 +1482,106 @@ repeat: | |||
1399 | /* only need to write one superblock... */ | 1482 | /* only need to write one superblock... */ |
1400 | break; | 1483 | break; |
1401 | } | 1484 | } |
1402 | wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0); | 1485 | md_super_wait(mddev); |
1403 | /* if there was a failure, sb_dirty was set to 1, and we re-write super */ | 1486 | /* if there was a failure, sb_dirty was set to 1, and we re-write super */ |
1404 | 1487 | ||
1405 | spin_lock(&mddev->write_lock); | 1488 | spin_lock_irq(&mddev->write_lock); |
1406 | if (mddev->in_sync != sync_req|| mddev->sb_dirty == 1) { | 1489 | if (mddev->in_sync != sync_req|| mddev->sb_dirty == 1) { |
1407 | /* have to write it out again */ | 1490 | /* have to write it out again */ |
1408 | spin_unlock(&mddev->write_lock); | 1491 | spin_unlock_irq(&mddev->write_lock); |
1409 | goto repeat; | 1492 | goto repeat; |
1410 | } | 1493 | } |
1411 | mddev->sb_dirty = 0; | 1494 | mddev->sb_dirty = 0; |
1412 | spin_unlock(&mddev->write_lock); | 1495 | spin_unlock_irq(&mddev->write_lock); |
1413 | wake_up(&mddev->sb_wait); | 1496 | wake_up(&mddev->sb_wait); |
1414 | 1497 | ||
1415 | } | 1498 | } |
1416 | 1499 | ||
1500 | struct rdev_sysfs_entry { | ||
1501 | struct attribute attr; | ||
1502 | ssize_t (*show)(mdk_rdev_t *, char *); | ||
1503 | ssize_t (*store)(mdk_rdev_t *, const char *, size_t); | ||
1504 | }; | ||
1505 | |||
1506 | static ssize_t | ||
1507 | state_show(mdk_rdev_t *rdev, char *page) | ||
1508 | { | ||
1509 | char *sep = ""; | ||
1510 | int len=0; | ||
1511 | |||
1512 | if (test_bit(Faulty, &rdev->flags)) { | ||
1513 | len+= sprintf(page+len, "%sfaulty",sep); | ||
1514 | sep = ","; | ||
1515 | } | ||
1516 | if (test_bit(In_sync, &rdev->flags)) { | ||
1517 | len += sprintf(page+len, "%sin_sync",sep); | ||
1518 | sep = ","; | ||
1519 | } | ||
1520 | if (!test_bit(Faulty, &rdev->flags) && | ||
1521 | !test_bit(In_sync, &rdev->flags)) { | ||
1522 | len += sprintf(page+len, "%sspare", sep); | ||
1523 | sep = ","; | ||
1524 | } | ||
1525 | return len+sprintf(page+len, "\n"); | ||
1526 | } | ||
1527 | |||
1528 | static struct rdev_sysfs_entry | ||
1529 | rdev_state = __ATTR_RO(state); | ||
1530 | |||
1531 | static ssize_t | ||
1532 | super_show(mdk_rdev_t *rdev, char *page) | ||
1533 | { | ||
1534 | if (rdev->sb_loaded && rdev->sb_size) { | ||
1535 | memcpy(page, page_address(rdev->sb_page), rdev->sb_size); | ||
1536 | return rdev->sb_size; | ||
1537 | } else | ||
1538 | return 0; | ||
1539 | } | ||
1540 | static struct rdev_sysfs_entry rdev_super = __ATTR_RO(super); | ||
1541 | |||
1542 | static struct attribute *rdev_default_attrs[] = { | ||
1543 | &rdev_state.attr, | ||
1544 | &rdev_super.attr, | ||
1545 | NULL, | ||
1546 | }; | ||
1547 | static ssize_t | ||
1548 | rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page) | ||
1549 | { | ||
1550 | struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr); | ||
1551 | mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj); | ||
1552 | |||
1553 | if (!entry->show) | ||
1554 | return -EIO; | ||
1555 | return entry->show(rdev, page); | ||
1556 | } | ||
1557 | |||
1558 | static ssize_t | ||
1559 | rdev_attr_store(struct kobject *kobj, struct attribute *attr, | ||
1560 | const char *page, size_t length) | ||
1561 | { | ||
1562 | struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr); | ||
1563 | mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj); | ||
1564 | |||
1565 | if (!entry->store) | ||
1566 | return -EIO; | ||
1567 | return entry->store(rdev, page, length); | ||
1568 | } | ||
1569 | |||
1570 | static void rdev_free(struct kobject *ko) | ||
1571 | { | ||
1572 | mdk_rdev_t *rdev = container_of(ko, mdk_rdev_t, kobj); | ||
1573 | kfree(rdev); | ||
1574 | } | ||
1575 | static struct sysfs_ops rdev_sysfs_ops = { | ||
1576 | .show = rdev_attr_show, | ||
1577 | .store = rdev_attr_store, | ||
1578 | }; | ||
1579 | static struct kobj_type rdev_ktype = { | ||
1580 | .release = rdev_free, | ||
1581 | .sysfs_ops = &rdev_sysfs_ops, | ||
1582 | .default_attrs = rdev_default_attrs, | ||
1583 | }; | ||
1584 | |||
1417 | /* | 1585 | /* |
1418 | * Import a device. If 'super_format' >= 0, then sanity check the superblock | 1586 | * Import a device. If 'super_format' >= 0, then sanity check the superblock |
1419 | * | 1587 | * |
@@ -1445,11 +1613,15 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi | |||
1445 | if (err) | 1613 | if (err) |
1446 | goto abort_free; | 1614 | goto abort_free; |
1447 | 1615 | ||
1616 | rdev->kobj.parent = NULL; | ||
1617 | rdev->kobj.ktype = &rdev_ktype; | ||
1618 | kobject_init(&rdev->kobj); | ||
1619 | |||
1448 | rdev->desc_nr = -1; | 1620 | rdev->desc_nr = -1; |
1449 | rdev->faulty = 0; | 1621 | rdev->flags = 0; |
1450 | rdev->in_sync = 0; | ||
1451 | rdev->data_offset = 0; | 1622 | rdev->data_offset = 0; |
1452 | atomic_set(&rdev->nr_pending, 0); | 1623 | atomic_set(&rdev->nr_pending, 0); |
1624 | atomic_set(&rdev->read_errors, 0); | ||
1453 | 1625 | ||
1454 | size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; | 1626 | size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; |
1455 | if (!size) { | 1627 | if (!size) { |
@@ -1537,7 +1709,7 @@ static void analyze_sbs(mddev_t * mddev) | |||
1537 | if (mddev->level == LEVEL_MULTIPATH) { | 1709 | if (mddev->level == LEVEL_MULTIPATH) { |
1538 | rdev->desc_nr = i++; | 1710 | rdev->desc_nr = i++; |
1539 | rdev->raid_disk = rdev->desc_nr; | 1711 | rdev->raid_disk = rdev->desc_nr; |
1540 | rdev->in_sync = 1; | 1712 | set_bit(In_sync, &rdev->flags); |
1541 | } | 1713 | } |
1542 | } | 1714 | } |
1543 | 1715 | ||
@@ -1551,6 +1723,162 @@ static void analyze_sbs(mddev_t * mddev) | |||
1551 | 1723 | ||
1552 | } | 1724 | } |
1553 | 1725 | ||
1726 | static ssize_t | ||
1727 | level_show(mddev_t *mddev, char *page) | ||
1728 | { | ||
1729 | mdk_personality_t *p = mddev->pers; | ||
1730 | if (p == NULL && mddev->raid_disks == 0) | ||
1731 | return 0; | ||
1732 | if (mddev->level >= 0) | ||
1733 | return sprintf(page, "RAID-%d\n", mddev->level); | ||
1734 | else | ||
1735 | return sprintf(page, "%s\n", p->name); | ||
1736 | } | ||
1737 | |||
1738 | static struct md_sysfs_entry md_level = __ATTR_RO(level); | ||
1739 | |||
1740 | static ssize_t | ||
1741 | raid_disks_show(mddev_t *mddev, char *page) | ||
1742 | { | ||
1743 | if (mddev->raid_disks == 0) | ||
1744 | return 0; | ||
1745 | return sprintf(page, "%d\n", mddev->raid_disks); | ||
1746 | } | ||
1747 | |||
1748 | static struct md_sysfs_entry md_raid_disks = __ATTR_RO(raid_disks); | ||
1749 | |||
1750 | static ssize_t | ||
1751 | action_show(mddev_t *mddev, char *page) | ||
1752 | { | ||
1753 | char *type = "idle"; | ||
1754 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || | ||
1755 | test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) { | ||
1756 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { | ||
1757 | if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) | ||
1758 | type = "resync"; | ||
1759 | else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) | ||
1760 | type = "check"; | ||
1761 | else | ||
1762 | type = "repair"; | ||
1763 | } else | ||
1764 | type = "recover"; | ||
1765 | } | ||
1766 | return sprintf(page, "%s\n", type); | ||
1767 | } | ||
1768 | |||
1769 | static ssize_t | ||
1770 | action_store(mddev_t *mddev, const char *page, size_t len) | ||
1771 | { | ||
1772 | if (!mddev->pers || !mddev->pers->sync_request) | ||
1773 | return -EINVAL; | ||
1774 | |||
1775 | if (strcmp(page, "idle")==0 || strcmp(page, "idle\n")==0) { | ||
1776 | if (mddev->sync_thread) { | ||
1777 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | ||
1778 | md_unregister_thread(mddev->sync_thread); | ||
1779 | mddev->sync_thread = NULL; | ||
1780 | mddev->recovery = 0; | ||
1781 | } | ||
1782 | return len; | ||
1783 | } | ||
1784 | |||
1785 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || | ||
1786 | test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) | ||
1787 | return -EBUSY; | ||
1788 | if (strcmp(page, "resync")==0 || strcmp(page, "resync\n")==0 || | ||
1789 | strcmp(page, "recover")==0 || strcmp(page, "recover\n")==0) | ||
1790 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | ||
1791 | else { | ||
1792 | if (strcmp(page, "check")==0 || strcmp(page, "check\n")==0) | ||
1793 | set_bit(MD_RECOVERY_CHECK, &mddev->recovery); | ||
1794 | else if (strcmp(page, "repair")!=0 && strcmp(page, "repair\n")!=0) | ||
1795 | return -EINVAL; | ||
1796 | set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); | ||
1797 | set_bit(MD_RECOVERY_SYNC, &mddev->recovery); | ||
1798 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | ||
1799 | } | ||
1800 | md_wakeup_thread(mddev->thread); | ||
1801 | return len; | ||
1802 | } | ||
1803 | |||
1804 | static ssize_t | ||
1805 | mismatch_cnt_show(mddev_t *mddev, char *page) | ||
1806 | { | ||
1807 | return sprintf(page, "%llu\n", | ||
1808 | (unsigned long long) mddev->resync_mismatches); | ||
1809 | } | ||
1810 | |||
1811 | static struct md_sysfs_entry | ||
1812 | md_scan_mode = __ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store); | ||
1813 | |||
1814 | |||
1815 | static struct md_sysfs_entry | ||
1816 | md_mismatches = __ATTR_RO(mismatch_cnt); | ||
1817 | |||
1818 | static struct attribute *md_default_attrs[] = { | ||
1819 | &md_level.attr, | ||
1820 | &md_raid_disks.attr, | ||
1821 | NULL, | ||
1822 | }; | ||
1823 | |||
1824 | static struct attribute *md_redundancy_attrs[] = { | ||
1825 | &md_scan_mode.attr, | ||
1826 | &md_mismatches.attr, | ||
1827 | NULL, | ||
1828 | }; | ||
1829 | static struct attribute_group md_redundancy_group = { | ||
1830 | .name = NULL, | ||
1831 | .attrs = md_redundancy_attrs, | ||
1832 | }; | ||
1833 | |||
1834 | |||
1835 | static ssize_t | ||
1836 | md_attr_show(struct kobject *kobj, struct attribute *attr, char *page) | ||
1837 | { | ||
1838 | struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr); | ||
1839 | mddev_t *mddev = container_of(kobj, struct mddev_s, kobj); | ||
1840 | ssize_t rv; | ||
1841 | |||
1842 | if (!entry->show) | ||
1843 | return -EIO; | ||
1844 | mddev_lock(mddev); | ||
1845 | rv = entry->show(mddev, page); | ||
1846 | mddev_unlock(mddev); | ||
1847 | return rv; | ||
1848 | } | ||
1849 | |||
1850 | static ssize_t | ||
1851 | md_attr_store(struct kobject *kobj, struct attribute *attr, | ||
1852 | const char *page, size_t length) | ||
1853 | { | ||
1854 | struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr); | ||
1855 | mddev_t *mddev = container_of(kobj, struct mddev_s, kobj); | ||
1856 | ssize_t rv; | ||
1857 | |||
1858 | if (!entry->store) | ||
1859 | return -EIO; | ||
1860 | mddev_lock(mddev); | ||
1861 | rv = entry->store(mddev, page, length); | ||
1862 | mddev_unlock(mddev); | ||
1863 | return rv; | ||
1864 | } | ||
1865 | |||
1866 | static void md_free(struct kobject *ko) | ||
1867 | { | ||
1868 | mddev_t *mddev = container_of(ko, mddev_t, kobj); | ||
1869 | kfree(mddev); | ||
1870 | } | ||
1871 | |||
1872 | static struct sysfs_ops md_sysfs_ops = { | ||
1873 | .show = md_attr_show, | ||
1874 | .store = md_attr_store, | ||
1875 | }; | ||
1876 | static struct kobj_type md_ktype = { | ||
1877 | .release = md_free, | ||
1878 | .sysfs_ops = &md_sysfs_ops, | ||
1879 | .default_attrs = md_default_attrs, | ||
1880 | }; | ||
1881 | |||
1554 | int mdp_major = 0; | 1882 | int mdp_major = 0; |
1555 | 1883 | ||
1556 | static struct kobject *md_probe(dev_t dev, int *part, void *data) | 1884 | static struct kobject *md_probe(dev_t dev, int *part, void *data) |
@@ -1592,6 +1920,11 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) | |||
1592 | add_disk(disk); | 1920 | add_disk(disk); |
1593 | mddev->gendisk = disk; | 1921 | mddev->gendisk = disk; |
1594 | up(&disks_sem); | 1922 | up(&disks_sem); |
1923 | mddev->kobj.parent = &disk->kobj; | ||
1924 | mddev->kobj.k_name = NULL; | ||
1925 | snprintf(mddev->kobj.name, KOBJ_NAME_LEN, "%s", "md"); | ||
1926 | mddev->kobj.ktype = &md_ktype; | ||
1927 | kobject_register(&mddev->kobj); | ||
1595 | return NULL; | 1928 | return NULL; |
1596 | } | 1929 | } |
1597 | 1930 | ||
@@ -1663,7 +1996,7 @@ static int do_md_run(mddev_t * mddev) | |||
1663 | 1996 | ||
1664 | /* devices must have minimum size of one chunk */ | 1997 | /* devices must have minimum size of one chunk */ |
1665 | ITERATE_RDEV(mddev,rdev,tmp) { | 1998 | ITERATE_RDEV(mddev,rdev,tmp) { |
1666 | if (rdev->faulty) | 1999 | if (test_bit(Faulty, &rdev->flags)) |
1667 | continue; | 2000 | continue; |
1668 | if (rdev->size < chunk_size / 1024) { | 2001 | if (rdev->size < chunk_size / 1024) { |
1669 | printk(KERN_WARNING | 2002 | printk(KERN_WARNING |
@@ -1691,7 +2024,7 @@ static int do_md_run(mddev_t * mddev) | |||
1691 | * Also find largest hardsector size | 2024 | * Also find largest hardsector size |
1692 | */ | 2025 | */ |
1693 | ITERATE_RDEV(mddev,rdev,tmp) { | 2026 | ITERATE_RDEV(mddev,rdev,tmp) { |
1694 | if (rdev->faulty) | 2027 | if (test_bit(Faulty, &rdev->flags)) |
1695 | continue; | 2028 | continue; |
1696 | sync_blockdev(rdev->bdev); | 2029 | sync_blockdev(rdev->bdev); |
1697 | invalidate_bdev(rdev->bdev, 0); | 2030 | invalidate_bdev(rdev->bdev, 0); |
@@ -1715,6 +2048,10 @@ static int do_md_run(mddev_t * mddev) | |||
1715 | 2048 | ||
1716 | mddev->recovery = 0; | 2049 | mddev->recovery = 0; |
1717 | mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ | 2050 | mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ |
2051 | mddev->barriers_work = 1; | ||
2052 | |||
2053 | if (start_readonly) | ||
2054 | mddev->ro = 2; /* read-only, but switch on first write */ | ||
1718 | 2055 | ||
1719 | /* before we start the array running, initialise the bitmap */ | 2056 | /* before we start the array running, initialise the bitmap */ |
1720 | err = bitmap_create(mddev); | 2057 | err = bitmap_create(mddev); |
@@ -1730,12 +2067,24 @@ static int do_md_run(mddev_t * mddev) | |||
1730 | bitmap_destroy(mddev); | 2067 | bitmap_destroy(mddev); |
1731 | return err; | 2068 | return err; |
1732 | } | 2069 | } |
2070 | if (mddev->pers->sync_request) | ||
2071 | sysfs_create_group(&mddev->kobj, &md_redundancy_group); | ||
2072 | else if (mddev->ro == 2) /* auto-readonly not meaningful */ | ||
2073 | mddev->ro = 0; | ||
2074 | |||
1733 | atomic_set(&mddev->writes_pending,0); | 2075 | atomic_set(&mddev->writes_pending,0); |
1734 | mddev->safemode = 0; | 2076 | mddev->safemode = 0; |
1735 | mddev->safemode_timer.function = md_safemode_timeout; | 2077 | mddev->safemode_timer.function = md_safemode_timeout; |
1736 | mddev->safemode_timer.data = (unsigned long) mddev; | 2078 | mddev->safemode_timer.data = (unsigned long) mddev; |
1737 | mddev->safemode_delay = (20 * HZ)/1000 +1; /* 20 msec delay */ | 2079 | mddev->safemode_delay = (20 * HZ)/1000 +1; /* 20 msec delay */ |
1738 | mddev->in_sync = 1; | 2080 | mddev->in_sync = 1; |
2081 | |||
2082 | ITERATE_RDEV(mddev,rdev,tmp) | ||
2083 | if (rdev->raid_disk >= 0) { | ||
2084 | char nm[20]; | ||
2085 | sprintf(nm, "rd%d", rdev->raid_disk); | ||
2086 | sysfs_create_link(&mddev->kobj, &rdev->kobj, nm); | ||
2087 | } | ||
1739 | 2088 | ||
1740 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 2089 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
1741 | md_wakeup_thread(mddev->thread); | 2090 | md_wakeup_thread(mddev->thread); |
@@ -1821,16 +2170,19 @@ static int do_md_stop(mddev_t * mddev, int ro) | |||
1821 | 2170 | ||
1822 | if (ro) { | 2171 | if (ro) { |
1823 | err = -ENXIO; | 2172 | err = -ENXIO; |
1824 | if (mddev->ro) | 2173 | if (mddev->ro==1) |
1825 | goto out; | 2174 | goto out; |
1826 | mddev->ro = 1; | 2175 | mddev->ro = 1; |
1827 | } else { | 2176 | } else { |
1828 | bitmap_flush(mddev); | 2177 | bitmap_flush(mddev); |
1829 | wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0); | 2178 | md_super_wait(mddev); |
1830 | if (mddev->ro) | 2179 | if (mddev->ro) |
1831 | set_disk_ro(disk, 0); | 2180 | set_disk_ro(disk, 0); |
1832 | blk_queue_make_request(mddev->queue, md_fail_request); | 2181 | blk_queue_make_request(mddev->queue, md_fail_request); |
1833 | mddev->pers->stop(mddev); | 2182 | mddev->pers->stop(mddev); |
2183 | if (mddev->pers->sync_request) | ||
2184 | sysfs_remove_group(&mddev->kobj, &md_redundancy_group); | ||
2185 | |||
1834 | module_put(mddev->pers->owner); | 2186 | module_put(mddev->pers->owner); |
1835 | mddev->pers = NULL; | 2187 | mddev->pers = NULL; |
1836 | if (mddev->ro) | 2188 | if (mddev->ro) |
@@ -1857,9 +2209,18 @@ static int do_md_stop(mddev_t * mddev, int ro) | |||
1857 | * Free resources if final stop | 2209 | * Free resources if final stop |
1858 | */ | 2210 | */ |
1859 | if (!ro) { | 2211 | if (!ro) { |
2212 | mdk_rdev_t *rdev; | ||
2213 | struct list_head *tmp; | ||
1860 | struct gendisk *disk; | 2214 | struct gendisk *disk; |
1861 | printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); | 2215 | printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); |
1862 | 2216 | ||
2217 | ITERATE_RDEV(mddev,rdev,tmp) | ||
2218 | if (rdev->raid_disk >= 0) { | ||
2219 | char nm[20]; | ||
2220 | sprintf(nm, "rd%d", rdev->raid_disk); | ||
2221 | sysfs_remove_link(&mddev->kobj, nm); | ||
2222 | } | ||
2223 | |||
1863 | export_array(mddev); | 2224 | export_array(mddev); |
1864 | 2225 | ||
1865 | mddev->array_size = 0; | 2226 | mddev->array_size = 0; |
@@ -2012,7 +2373,7 @@ static int autostart_array(dev_t startdev) | |||
2012 | return err; | 2373 | return err; |
2013 | } | 2374 | } |
2014 | 2375 | ||
2015 | if (start_rdev->faulty) { | 2376 | if (test_bit(Faulty, &start_rdev->flags)) { |
2016 | printk(KERN_WARNING | 2377 | printk(KERN_WARNING |
2017 | "md: can not autostart based on faulty %s!\n", | 2378 | "md: can not autostart based on faulty %s!\n", |
2018 | bdevname(start_rdev->bdev,b)); | 2379 | bdevname(start_rdev->bdev,b)); |
@@ -2071,11 +2432,11 @@ static int get_array_info(mddev_t * mddev, void __user * arg) | |||
2071 | nr=working=active=failed=spare=0; | 2432 | nr=working=active=failed=spare=0; |
2072 | ITERATE_RDEV(mddev,rdev,tmp) { | 2433 | ITERATE_RDEV(mddev,rdev,tmp) { |
2073 | nr++; | 2434 | nr++; |
2074 | if (rdev->faulty) | 2435 | if (test_bit(Faulty, &rdev->flags)) |
2075 | failed++; | 2436 | failed++; |
2076 | else { | 2437 | else { |
2077 | working++; | 2438 | working++; |
2078 | if (rdev->in_sync) | 2439 | if (test_bit(In_sync, &rdev->flags)) |
2079 | active++; | 2440 | active++; |
2080 | else | 2441 | else |
2081 | spare++; | 2442 | spare++; |
@@ -2166,9 +2527,9 @@ static int get_disk_info(mddev_t * mddev, void __user * arg) | |||
2166 | info.minor = MINOR(rdev->bdev->bd_dev); | 2527 | info.minor = MINOR(rdev->bdev->bd_dev); |
2167 | info.raid_disk = rdev->raid_disk; | 2528 | info.raid_disk = rdev->raid_disk; |
2168 | info.state = 0; | 2529 | info.state = 0; |
2169 | if (rdev->faulty) | 2530 | if (test_bit(Faulty, &rdev->flags)) |
2170 | info.state |= (1<<MD_DISK_FAULTY); | 2531 | info.state |= (1<<MD_DISK_FAULTY); |
2171 | else if (rdev->in_sync) { | 2532 | else if (test_bit(In_sync, &rdev->flags)) { |
2172 | info.state |= (1<<MD_DISK_ACTIVE); | 2533 | info.state |= (1<<MD_DISK_ACTIVE); |
2173 | info.state |= (1<<MD_DISK_SYNC); | 2534 | info.state |= (1<<MD_DISK_SYNC); |
2174 | } | 2535 | } |
@@ -2261,7 +2622,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) | |||
2261 | validate_super(mddev, rdev); | 2622 | validate_super(mddev, rdev); |
2262 | rdev->saved_raid_disk = rdev->raid_disk; | 2623 | rdev->saved_raid_disk = rdev->raid_disk; |
2263 | 2624 | ||
2264 | rdev->in_sync = 0; /* just to be sure */ | 2625 | clear_bit(In_sync, &rdev->flags); /* just to be sure */ |
2265 | if (info->state & (1<<MD_DISK_WRITEMOSTLY)) | 2626 | if (info->state & (1<<MD_DISK_WRITEMOSTLY)) |
2266 | set_bit(WriteMostly, &rdev->flags); | 2627 | set_bit(WriteMostly, &rdev->flags); |
2267 | 2628 | ||
@@ -2299,11 +2660,11 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) | |||
2299 | else | 2660 | else |
2300 | rdev->raid_disk = -1; | 2661 | rdev->raid_disk = -1; |
2301 | 2662 | ||
2302 | rdev->faulty = 0; | 2663 | rdev->flags = 0; |
2664 | |||
2303 | if (rdev->raid_disk < mddev->raid_disks) | 2665 | if (rdev->raid_disk < mddev->raid_disks) |
2304 | rdev->in_sync = (info->state & (1<<MD_DISK_SYNC)); | 2666 | if (info->state & (1<<MD_DISK_SYNC)) |
2305 | else | 2667 | set_bit(In_sync, &rdev->flags); |
2306 | rdev->in_sync = 0; | ||
2307 | 2668 | ||
2308 | if (info->state & (1<<MD_DISK_WRITEMOSTLY)) | 2669 | if (info->state & (1<<MD_DISK_WRITEMOSTLY)) |
2309 | set_bit(WriteMostly, &rdev->flags); | 2670 | set_bit(WriteMostly, &rdev->flags); |
@@ -2402,14 +2763,14 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev) | |||
2402 | goto abort_export; | 2763 | goto abort_export; |
2403 | } | 2764 | } |
2404 | 2765 | ||
2405 | if (rdev->faulty) { | 2766 | if (test_bit(Faulty, &rdev->flags)) { |
2406 | printk(KERN_WARNING | 2767 | printk(KERN_WARNING |
2407 | "md: can not hot-add faulty %s disk to %s!\n", | 2768 | "md: can not hot-add faulty %s disk to %s!\n", |
2408 | bdevname(rdev->bdev,b), mdname(mddev)); | 2769 | bdevname(rdev->bdev,b), mdname(mddev)); |
2409 | err = -EINVAL; | 2770 | err = -EINVAL; |
2410 | goto abort_export; | 2771 | goto abort_export; |
2411 | } | 2772 | } |
2412 | rdev->in_sync = 0; | 2773 | clear_bit(In_sync, &rdev->flags); |
2413 | rdev->desc_nr = -1; | 2774 | rdev->desc_nr = -1; |
2414 | bind_rdev_to_array(rdev, mddev); | 2775 | bind_rdev_to_array(rdev, mddev); |
2415 | 2776 | ||
@@ -2929,12 +3290,22 @@ static int md_ioctl(struct inode *inode, struct file *file, | |||
2929 | 3290 | ||
2930 | /* | 3291 | /* |
2931 | * The remaining ioctls are changing the state of the | 3292 | * The remaining ioctls are changing the state of the |
2932 | * superblock, so we do not allow read-only arrays | 3293 | * superblock, so we do not allow them on read-only arrays. |
2933 | * here: | 3294 | * However non-MD ioctls (e.g. get-size) will still come through |
3295 | * here and hit the 'default' below, so only disallow | ||
3296 | * 'md' ioctls, and switch to rw mode if started auto-readonly. | ||
2934 | */ | 3297 | */ |
2935 | if (mddev->ro) { | 3298 | if (_IOC_TYPE(cmd) == MD_MAJOR && |
2936 | err = -EROFS; | 3299 | mddev->ro && mddev->pers) { |
2937 | goto abort_unlock; | 3300 | if (mddev->ro == 2) { |
3301 | mddev->ro = 0; | ||
3302 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | ||
3303 | md_wakeup_thread(mddev->thread); | ||
3304 | |||
3305 | } else { | ||
3306 | err = -EROFS; | ||
3307 | goto abort_unlock; | ||
3308 | } | ||
2938 | } | 3309 | } |
2939 | 3310 | ||
2940 | switch (cmd) | 3311 | switch (cmd) |
@@ -3064,21 +3435,17 @@ static int md_thread(void * arg) | |||
3064 | */ | 3435 | */ |
3065 | 3436 | ||
3066 | allow_signal(SIGKILL); | 3437 | allow_signal(SIGKILL); |
3067 | complete(thread->event); | ||
3068 | while (!kthread_should_stop()) { | 3438 | while (!kthread_should_stop()) { |
3069 | void (*run)(mddev_t *); | ||
3070 | 3439 | ||
3071 | wait_event_interruptible_timeout(thread->wqueue, | 3440 | wait_event_timeout(thread->wqueue, |
3072 | test_bit(THREAD_WAKEUP, &thread->flags) | 3441 | test_bit(THREAD_WAKEUP, &thread->flags) |
3073 | || kthread_should_stop(), | 3442 | || kthread_should_stop(), |
3074 | thread->timeout); | 3443 | thread->timeout); |
3075 | try_to_freeze(); | 3444 | try_to_freeze(); |
3076 | 3445 | ||
3077 | clear_bit(THREAD_WAKEUP, &thread->flags); | 3446 | clear_bit(THREAD_WAKEUP, &thread->flags); |
3078 | 3447 | ||
3079 | run = thread->run; | 3448 | thread->run(thread->mddev); |
3080 | if (run) | ||
3081 | run(thread->mddev); | ||
3082 | } | 3449 | } |
3083 | 3450 | ||
3084 | return 0; | 3451 | return 0; |
@@ -3097,7 +3464,6 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev, | |||
3097 | const char *name) | 3464 | const char *name) |
3098 | { | 3465 | { |
3099 | mdk_thread_t *thread; | 3466 | mdk_thread_t *thread; |
3100 | struct completion event; | ||
3101 | 3467 | ||
3102 | thread = kmalloc(sizeof(mdk_thread_t), GFP_KERNEL); | 3468 | thread = kmalloc(sizeof(mdk_thread_t), GFP_KERNEL); |
3103 | if (!thread) | 3469 | if (!thread) |
@@ -3106,18 +3472,14 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev, | |||
3106 | memset(thread, 0, sizeof(mdk_thread_t)); | 3472 | memset(thread, 0, sizeof(mdk_thread_t)); |
3107 | init_waitqueue_head(&thread->wqueue); | 3473 | init_waitqueue_head(&thread->wqueue); |
3108 | 3474 | ||
3109 | init_completion(&event); | ||
3110 | thread->event = &event; | ||
3111 | thread->run = run; | 3475 | thread->run = run; |
3112 | thread->mddev = mddev; | 3476 | thread->mddev = mddev; |
3113 | thread->name = name; | ||
3114 | thread->timeout = MAX_SCHEDULE_TIMEOUT; | 3477 | thread->timeout = MAX_SCHEDULE_TIMEOUT; |
3115 | thread->tsk = kthread_run(md_thread, thread, name, mdname(thread->mddev)); | 3478 | thread->tsk = kthread_run(md_thread, thread, name, mdname(thread->mddev)); |
3116 | if (IS_ERR(thread->tsk)) { | 3479 | if (IS_ERR(thread->tsk)) { |
3117 | kfree(thread); | 3480 | kfree(thread); |
3118 | return NULL; | 3481 | return NULL; |
3119 | } | 3482 | } |
3120 | wait_for_completion(&event); | ||
3121 | return thread; | 3483 | return thread; |
3122 | } | 3484 | } |
3123 | 3485 | ||
@@ -3136,7 +3498,7 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
3136 | return; | 3498 | return; |
3137 | } | 3499 | } |
3138 | 3500 | ||
3139 | if (!rdev || rdev->faulty) | 3501 | if (!rdev || test_bit(Faulty, &rdev->flags)) |
3140 | return; | 3502 | return; |
3141 | /* | 3503 | /* |
3142 | dprintk("md_error dev:%s, rdev:(%d:%d), (caller: %p,%p,%p,%p).\n", | 3504 | dprintk("md_error dev:%s, rdev:(%d:%d), (caller: %p,%p,%p,%p).\n", |
@@ -3322,8 +3684,10 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
3322 | seq_printf(seq, "%s : %sactive", mdname(mddev), | 3684 | seq_printf(seq, "%s : %sactive", mdname(mddev), |
3323 | mddev->pers ? "" : "in"); | 3685 | mddev->pers ? "" : "in"); |
3324 | if (mddev->pers) { | 3686 | if (mddev->pers) { |
3325 | if (mddev->ro) | 3687 | if (mddev->ro==1) |
3326 | seq_printf(seq, " (read-only)"); | 3688 | seq_printf(seq, " (read-only)"); |
3689 | if (mddev->ro==2) | ||
3690 | seq_printf(seq, "(auto-read-only)"); | ||
3327 | seq_printf(seq, " %s", mddev->pers->name); | 3691 | seq_printf(seq, " %s", mddev->pers->name); |
3328 | } | 3692 | } |
3329 | 3693 | ||
@@ -3334,7 +3698,7 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
3334 | bdevname(rdev->bdev,b), rdev->desc_nr); | 3698 | bdevname(rdev->bdev,b), rdev->desc_nr); |
3335 | if (test_bit(WriteMostly, &rdev->flags)) | 3699 | if (test_bit(WriteMostly, &rdev->flags)) |
3336 | seq_printf(seq, "(W)"); | 3700 | seq_printf(seq, "(W)"); |
3337 | if (rdev->faulty) { | 3701 | if (test_bit(Faulty, &rdev->flags)) { |
3338 | seq_printf(seq, "(F)"); | 3702 | seq_printf(seq, "(F)"); |
3339 | continue; | 3703 | continue; |
3340 | } else if (rdev->raid_disk < 0) | 3704 | } else if (rdev->raid_disk < 0) |
@@ -3363,11 +3727,15 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
3363 | if (mddev->pers) { | 3727 | if (mddev->pers) { |
3364 | mddev->pers->status (seq, mddev); | 3728 | mddev->pers->status (seq, mddev); |
3365 | seq_printf(seq, "\n "); | 3729 | seq_printf(seq, "\n "); |
3366 | if (mddev->curr_resync > 2) { | 3730 | if (mddev->pers->sync_request) { |
3367 | status_resync (seq, mddev); | 3731 | if (mddev->curr_resync > 2) { |
3368 | seq_printf(seq, "\n "); | 3732 | status_resync (seq, mddev); |
3369 | } else if (mddev->curr_resync == 1 || mddev->curr_resync == 2) | 3733 | seq_printf(seq, "\n "); |
3370 | seq_printf(seq, " resync=DELAYED\n "); | 3734 | } else if (mddev->curr_resync == 1 || mddev->curr_resync == 2) |
3735 | seq_printf(seq, "\tresync=DELAYED\n "); | ||
3736 | else if (mddev->recovery_cp < MaxSector) | ||
3737 | seq_printf(seq, "\tresync=PENDING\n "); | ||
3738 | } | ||
3371 | } else | 3739 | } else |
3372 | seq_printf(seq, "\n "); | 3740 | seq_printf(seq, "\n "); |
3373 | 3741 | ||
@@ -3504,15 +3872,22 @@ void md_write_start(mddev_t *mddev, struct bio *bi) | |||
3504 | if (bio_data_dir(bi) != WRITE) | 3872 | if (bio_data_dir(bi) != WRITE) |
3505 | return; | 3873 | return; |
3506 | 3874 | ||
3875 | BUG_ON(mddev->ro == 1); | ||
3876 | if (mddev->ro == 2) { | ||
3877 | /* need to switch to read/write */ | ||
3878 | mddev->ro = 0; | ||
3879 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | ||
3880 | md_wakeup_thread(mddev->thread); | ||
3881 | } | ||
3507 | atomic_inc(&mddev->writes_pending); | 3882 | atomic_inc(&mddev->writes_pending); |
3508 | if (mddev->in_sync) { | 3883 | if (mddev->in_sync) { |
3509 | spin_lock(&mddev->write_lock); | 3884 | spin_lock_irq(&mddev->write_lock); |
3510 | if (mddev->in_sync) { | 3885 | if (mddev->in_sync) { |
3511 | mddev->in_sync = 0; | 3886 | mddev->in_sync = 0; |
3512 | mddev->sb_dirty = 1; | 3887 | mddev->sb_dirty = 1; |
3513 | md_wakeup_thread(mddev->thread); | 3888 | md_wakeup_thread(mddev->thread); |
3514 | } | 3889 | } |
3515 | spin_unlock(&mddev->write_lock); | 3890 | spin_unlock_irq(&mddev->write_lock); |
3516 | } | 3891 | } |
3517 | wait_event(mddev->sb_wait, mddev->sb_dirty==0); | 3892 | wait_event(mddev->sb_wait, mddev->sb_dirty==0); |
3518 | } | 3893 | } |
@@ -3568,9 +3943,7 @@ static void md_do_sync(mddev_t *mddev) | |||
3568 | mddev->curr_resync = 2; | 3943 | mddev->curr_resync = 2; |
3569 | 3944 | ||
3570 | try_again: | 3945 | try_again: |
3571 | if (signal_pending(current) || | 3946 | if (kthread_should_stop()) { |
3572 | kthread_should_stop()) { | ||
3573 | flush_signals(current); | ||
3574 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | 3947 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); |
3575 | goto skip; | 3948 | goto skip; |
3576 | } | 3949 | } |
@@ -3590,9 +3963,8 @@ static void md_do_sync(mddev_t *mddev) | |||
3590 | * time 'round when curr_resync == 2 | 3963 | * time 'round when curr_resync == 2 |
3591 | */ | 3964 | */ |
3592 | continue; | 3965 | continue; |
3593 | prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE); | 3966 | prepare_to_wait(&resync_wait, &wq, TASK_UNINTERRUPTIBLE); |
3594 | if (!signal_pending(current) && | 3967 | if (!kthread_should_stop() && |
3595 | !kthread_should_stop() && | ||
3596 | mddev2->curr_resync >= mddev->curr_resync) { | 3968 | mddev2->curr_resync >= mddev->curr_resync) { |
3597 | printk(KERN_INFO "md: delaying resync of %s" | 3969 | printk(KERN_INFO "md: delaying resync of %s" |
3598 | " until %s has finished resync (they" | 3970 | " until %s has finished resync (they" |
@@ -3608,12 +3980,13 @@ static void md_do_sync(mddev_t *mddev) | |||
3608 | } | 3980 | } |
3609 | } while (mddev->curr_resync < 2); | 3981 | } while (mddev->curr_resync < 2); |
3610 | 3982 | ||
3611 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) | 3983 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { |
3612 | /* resync follows the size requested by the personality, | 3984 | /* resync follows the size requested by the personality, |
3613 | * which defaults to physical size, but can be virtual size | 3985 | * which defaults to physical size, but can be virtual size |
3614 | */ | 3986 | */ |
3615 | max_sectors = mddev->resync_max_sectors; | 3987 | max_sectors = mddev->resync_max_sectors; |
3616 | else | 3988 | mddev->resync_mismatches = 0; |
3989 | } else | ||
3617 | /* recovery follows the physical size of devices */ | 3990 | /* recovery follows the physical size of devices */ |
3618 | max_sectors = mddev->size << 1; | 3991 | max_sectors = mddev->size << 1; |
3619 | 3992 | ||
@@ -3626,7 +3999,8 @@ static void md_do_sync(mddev_t *mddev) | |||
3626 | 3999 | ||
3627 | is_mddev_idle(mddev); /* this also initializes IO event counters */ | 4000 | is_mddev_idle(mddev); /* this also initializes IO event counters */ |
3628 | /* we don't use the checkpoint if there's a bitmap */ | 4001 | /* we don't use the checkpoint if there's a bitmap */ |
3629 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && !mddev->bitmap) | 4002 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && !mddev->bitmap |
4003 | && ! test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) | ||
3630 | j = mddev->recovery_cp; | 4004 | j = mddev->recovery_cp; |
3631 | else | 4005 | else |
3632 | j = 0; | 4006 | j = 0; |
@@ -3699,13 +4073,12 @@ static void md_do_sync(mddev_t *mddev) | |||
3699 | } | 4073 | } |
3700 | 4074 | ||
3701 | 4075 | ||
3702 | if (signal_pending(current) || kthread_should_stop()) { | 4076 | if (kthread_should_stop()) { |
3703 | /* | 4077 | /* |
3704 | * got a signal, exit. | 4078 | * got a signal, exit. |
3705 | */ | 4079 | */ |
3706 | printk(KERN_INFO | 4080 | printk(KERN_INFO |
3707 | "md: md_do_sync() got signal ... exiting\n"); | 4081 | "md: md_do_sync() got signal ... exiting\n"); |
3708 | flush_signals(current); | ||
3709 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | 4082 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); |
3710 | goto out; | 4083 | goto out; |
3711 | } | 4084 | } |
@@ -3727,7 +4100,7 @@ static void md_do_sync(mddev_t *mddev) | |||
3727 | if (currspeed > sysctl_speed_limit_min) { | 4100 | if (currspeed > sysctl_speed_limit_min) { |
3728 | if ((currspeed > sysctl_speed_limit_max) || | 4101 | if ((currspeed > sysctl_speed_limit_max) || |
3729 | !is_mddev_idle(mddev)) { | 4102 | !is_mddev_idle(mddev)) { |
3730 | msleep_interruptible(250); | 4103 | msleep(250); |
3731 | goto repeat; | 4104 | goto repeat; |
3732 | } | 4105 | } |
3733 | } | 4106 | } |
@@ -3820,7 +4193,7 @@ void md_check_recovery(mddev_t *mddev) | |||
3820 | if (mddev_trylock(mddev)==0) { | 4193 | if (mddev_trylock(mddev)==0) { |
3821 | int spares =0; | 4194 | int spares =0; |
3822 | 4195 | ||
3823 | spin_lock(&mddev->write_lock); | 4196 | spin_lock_irq(&mddev->write_lock); |
3824 | if (mddev->safemode && !atomic_read(&mddev->writes_pending) && | 4197 | if (mddev->safemode && !atomic_read(&mddev->writes_pending) && |
3825 | !mddev->in_sync && mddev->recovery_cp == MaxSector) { | 4198 | !mddev->in_sync && mddev->recovery_cp == MaxSector) { |
3826 | mddev->in_sync = 1; | 4199 | mddev->in_sync = 1; |
@@ -3828,7 +4201,7 @@ void md_check_recovery(mddev_t *mddev) | |||
3828 | } | 4201 | } |
3829 | if (mddev->safemode == 1) | 4202 | if (mddev->safemode == 1) |
3830 | mddev->safemode = 0; | 4203 | mddev->safemode = 0; |
3831 | spin_unlock(&mddev->write_lock); | 4204 | spin_unlock_irq(&mddev->write_lock); |
3832 | 4205 | ||
3833 | if (mddev->sb_dirty) | 4206 | if (mddev->sb_dirty) |
3834 | md_update_sb(mddev); | 4207 | md_update_sb(mddev); |
@@ -3864,9 +4237,13 @@ void md_check_recovery(mddev_t *mddev) | |||
3864 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 4237 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
3865 | goto unlock; | 4238 | goto unlock; |
3866 | } | 4239 | } |
3867 | if (mddev->recovery) | 4240 | /* Clear some bits that don't mean anything, but |
3868 | /* probably just the RECOVERY_NEEDED flag */ | 4241 | * might be left set |
3869 | mddev->recovery = 0; | 4242 | */ |
4243 | clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | ||
4244 | clear_bit(MD_RECOVERY_ERR, &mddev->recovery); | ||
4245 | clear_bit(MD_RECOVERY_INTR, &mddev->recovery); | ||
4246 | clear_bit(MD_RECOVERY_DONE, &mddev->recovery); | ||
3870 | 4247 | ||
3871 | /* no recovery is running. | 4248 | /* no recovery is running. |
3872 | * remove any failed drives, then | 4249 | * remove any failed drives, then |
@@ -3876,31 +4253,41 @@ void md_check_recovery(mddev_t *mddev) | |||
3876 | */ | 4253 | */ |
3877 | ITERATE_RDEV(mddev,rdev,rtmp) | 4254 | ITERATE_RDEV(mddev,rdev,rtmp) |
3878 | if (rdev->raid_disk >= 0 && | 4255 | if (rdev->raid_disk >= 0 && |
3879 | (rdev->faulty || ! rdev->in_sync) && | 4256 | (test_bit(Faulty, &rdev->flags) || ! test_bit(In_sync, &rdev->flags)) && |
3880 | atomic_read(&rdev->nr_pending)==0) { | 4257 | atomic_read(&rdev->nr_pending)==0) { |
3881 | if (mddev->pers->hot_remove_disk(mddev, rdev->raid_disk)==0) | 4258 | if (mddev->pers->hot_remove_disk(mddev, rdev->raid_disk)==0) { |
4259 | char nm[20]; | ||
4260 | sprintf(nm,"rd%d", rdev->raid_disk); | ||
4261 | sysfs_remove_link(&mddev->kobj, nm); | ||
3882 | rdev->raid_disk = -1; | 4262 | rdev->raid_disk = -1; |
4263 | } | ||
3883 | } | 4264 | } |
3884 | 4265 | ||
3885 | if (mddev->degraded) { | 4266 | if (mddev->degraded) { |
3886 | ITERATE_RDEV(mddev,rdev,rtmp) | 4267 | ITERATE_RDEV(mddev,rdev,rtmp) |
3887 | if (rdev->raid_disk < 0 | 4268 | if (rdev->raid_disk < 0 |
3888 | && !rdev->faulty) { | 4269 | && !test_bit(Faulty, &rdev->flags)) { |
3889 | if (mddev->pers->hot_add_disk(mddev,rdev)) | 4270 | if (mddev->pers->hot_add_disk(mddev,rdev)) { |
4271 | char nm[20]; | ||
4272 | sprintf(nm, "rd%d", rdev->raid_disk); | ||
4273 | sysfs_create_link(&mddev->kobj, &rdev->kobj, nm); | ||
3890 | spares++; | 4274 | spares++; |
3891 | else | 4275 | } else |
3892 | break; | 4276 | break; |
3893 | } | 4277 | } |
3894 | } | 4278 | } |
3895 | 4279 | ||
3896 | if (!spares && (mddev->recovery_cp == MaxSector )) { | 4280 | if (spares) { |
3897 | /* nothing we can do ... */ | 4281 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); |
4282 | clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); | ||
4283 | } else if (mddev->recovery_cp < MaxSector) { | ||
4284 | set_bit(MD_RECOVERY_SYNC, &mddev->recovery); | ||
4285 | } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) | ||
4286 | /* nothing to be done ... */ | ||
3898 | goto unlock; | 4287 | goto unlock; |
3899 | } | 4288 | |
3900 | if (mddev->pers->sync_request) { | 4289 | if (mddev->pers->sync_request) { |
3901 | set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); | 4290 | set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); |
3902 | if (!spares) | ||
3903 | set_bit(MD_RECOVERY_SYNC, &mddev->recovery); | ||
3904 | if (spares && mddev->bitmap && ! mddev->bitmap->file) { | 4291 | if (spares && mddev->bitmap && ! mddev->bitmap->file) { |
3905 | /* We are adding a device or devices to an array | 4292 | /* We are adding a device or devices to an array |
3906 | * which has the bitmap stored on all devices. | 4293 | * which has the bitmap stored on all devices. |
@@ -3975,7 +4362,7 @@ static int __init md_init(void) | |||
3975 | " MD_SB_DISKS=%d\n", | 4362 | " MD_SB_DISKS=%d\n", |
3976 | MD_MAJOR_VERSION, MD_MINOR_VERSION, | 4363 | MD_MAJOR_VERSION, MD_MINOR_VERSION, |
3977 | MD_PATCHLEVEL_VERSION, MAX_MD_DEVS, MD_SB_DISKS); | 4364 | MD_PATCHLEVEL_VERSION, MAX_MD_DEVS, MD_SB_DISKS); |
3978 | printk(KERN_INFO "md: bitmap version %d.%d\n", BITMAP_MAJOR, | 4365 | printk(KERN_INFO "md: bitmap version %d.%d\n", BITMAP_MAJOR_HI, |
3979 | BITMAP_MINOR); | 4366 | BITMAP_MINOR); |
3980 | 4367 | ||
3981 | if (register_blkdev(MAJOR_NR, "md")) | 4368 | if (register_blkdev(MAJOR_NR, "md")) |
@@ -4039,7 +4426,7 @@ static void autostart_arrays(int part) | |||
4039 | if (IS_ERR(rdev)) | 4426 | if (IS_ERR(rdev)) |
4040 | continue; | 4427 | continue; |
4041 | 4428 | ||
4042 | if (rdev->faulty) { | 4429 | if (test_bit(Faulty, &rdev->flags)) { |
4043 | MD_BUG(); | 4430 | MD_BUG(); |
4044 | continue; | 4431 | continue; |
4045 | } | 4432 | } |
@@ -4086,6 +4473,23 @@ static __exit void md_exit(void) | |||
4086 | module_init(md_init) | 4473 | module_init(md_init) |
4087 | module_exit(md_exit) | 4474 | module_exit(md_exit) |
4088 | 4475 | ||
4476 | static int get_ro(char *buffer, struct kernel_param *kp) | ||
4477 | { | ||
4478 | return sprintf(buffer, "%d", start_readonly); | ||
4479 | } | ||
4480 | static int set_ro(const char *val, struct kernel_param *kp) | ||
4481 | { | ||
4482 | char *e; | ||
4483 | int num = simple_strtoul(val, &e, 10); | ||
4484 | if (*val && (*e == '\0' || *e == '\n')) { | ||
4485 | start_readonly = num; | ||
4486 | return 0;; | ||
4487 | } | ||
4488 | return -EINVAL; | ||
4489 | } | ||
4490 | |||
4491 | module_param_call(start_ro, set_ro, get_ro, NULL, 0600); | ||
4492 | |||
4089 | EXPORT_SYMBOL(register_md_personality); | 4493 | EXPORT_SYMBOL(register_md_personality); |
4090 | EXPORT_SYMBOL(unregister_md_personality); | 4494 | EXPORT_SYMBOL(unregister_md_personality); |
4091 | EXPORT_SYMBOL(md_error); | 4495 | EXPORT_SYMBOL(md_error); |