diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bitmap.c | 34 | ||||
-rw-r--r-- | drivers/md/md.c | 636 | ||||
-rw-r--r-- | drivers/md/multipath.c | 31 | ||||
-rw-r--r-- | drivers/md/raid1.c | 216 | ||||
-rw-r--r-- | drivers/md/raid10.c | 63 | ||||
-rw-r--r-- | drivers/md/raid5.c | 261 | ||||
-rw-r--r-- | drivers/md/raid6main.c | 40 |
7 files changed, 952 insertions, 329 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 01654fcabc52..51315302a85e 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c | |||
@@ -21,7 +21,6 @@ | |||
21 | */ | 21 | */ |
22 | 22 | ||
23 | #include <linux/module.h> | 23 | #include <linux/module.h> |
24 | #include <linux/version.h> | ||
25 | #include <linux/errno.h> | 24 | #include <linux/errno.h> |
26 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
27 | #include <linux/init.h> | 26 | #include <linux/init.h> |
@@ -272,7 +271,8 @@ static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long inde | |||
272 | return ERR_PTR(-ENOMEM); | 271 | return ERR_PTR(-ENOMEM); |
273 | 272 | ||
274 | ITERATE_RDEV(mddev, rdev, tmp) { | 273 | ITERATE_RDEV(mddev, rdev, tmp) { |
275 | if (! rdev->in_sync || rdev->faulty) | 274 | if (! test_bit(In_sync, &rdev->flags) |
275 | || test_bit(Faulty, &rdev->flags)) | ||
276 | continue; | 276 | continue; |
277 | 277 | ||
278 | target = (rdev->sb_offset << 1) + offset + index * (PAGE_SIZE/512); | 278 | target = (rdev->sb_offset << 1) + offset + index * (PAGE_SIZE/512); |
@@ -292,7 +292,8 @@ static int write_sb_page(mddev_t *mddev, long offset, struct page *page, int wai | |||
292 | struct list_head *tmp; | 292 | struct list_head *tmp; |
293 | 293 | ||
294 | ITERATE_RDEV(mddev, rdev, tmp) | 294 | ITERATE_RDEV(mddev, rdev, tmp) |
295 | if (rdev->in_sync && !rdev->faulty) | 295 | if (test_bit(In_sync, &rdev->flags) |
296 | && !test_bit(Faulty, &rdev->flags)) | ||
296 | md_super_write(mddev, rdev, | 297 | md_super_write(mddev, rdev, |
297 | (rdev->sb_offset<<1) + offset | 298 | (rdev->sb_offset<<1) + offset |
298 | + page->index * (PAGE_SIZE/512), | 299 | + page->index * (PAGE_SIZE/512), |
@@ -300,7 +301,7 @@ static int write_sb_page(mddev_t *mddev, long offset, struct page *page, int wai | |||
300 | page); | 301 | page); |
301 | 302 | ||
302 | if (wait) | 303 | if (wait) |
303 | wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0); | 304 | md_super_wait(mddev); |
304 | return 0; | 305 | return 0; |
305 | } | 306 | } |
306 | 307 | ||
@@ -481,7 +482,8 @@ static int bitmap_read_sb(struct bitmap *bitmap) | |||
481 | /* verify that the bitmap-specific fields are valid */ | 482 | /* verify that the bitmap-specific fields are valid */ |
482 | if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) | 483 | if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) |
483 | reason = "bad magic"; | 484 | reason = "bad magic"; |
484 | else if (sb->version != cpu_to_le32(BITMAP_MAJOR)) | 485 | else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO || |
486 | le32_to_cpu(sb->version) > BITMAP_MAJOR_HI) | ||
485 | reason = "unrecognized superblock version"; | 487 | reason = "unrecognized superblock version"; |
486 | else if (chunksize < 512 || chunksize > (1024 * 1024 * 4)) | 488 | else if (chunksize < 512 || chunksize > (1024 * 1024 * 4)) |
487 | reason = "bitmap chunksize out of range (512B - 4MB)"; | 489 | reason = "bitmap chunksize out of range (512B - 4MB)"; |
@@ -526,6 +528,8 @@ success: | |||
526 | bitmap->daemon_lastrun = jiffies; | 528 | bitmap->daemon_lastrun = jiffies; |
527 | bitmap->max_write_behind = write_behind; | 529 | bitmap->max_write_behind = write_behind; |
528 | bitmap->flags |= sb->state; | 530 | bitmap->flags |= sb->state; |
531 | if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN) | ||
532 | bitmap->flags |= BITMAP_HOSTENDIAN; | ||
529 | bitmap->events_cleared = le64_to_cpu(sb->events_cleared); | 533 | bitmap->events_cleared = le64_to_cpu(sb->events_cleared); |
530 | if (sb->state & BITMAP_STALE) | 534 | if (sb->state & BITMAP_STALE) |
531 | bitmap->events_cleared = bitmap->mddev->events; | 535 | bitmap->events_cleared = bitmap->mddev->events; |
@@ -763,7 +767,10 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) | |||
763 | 767 | ||
764 | /* set the bit */ | 768 | /* set the bit */ |
765 | kaddr = kmap_atomic(page, KM_USER0); | 769 | kaddr = kmap_atomic(page, KM_USER0); |
766 | set_bit(bit, kaddr); | 770 | if (bitmap->flags & BITMAP_HOSTENDIAN) |
771 | set_bit(bit, kaddr); | ||
772 | else | ||
773 | ext2_set_bit(bit, kaddr); | ||
767 | kunmap_atomic(kaddr, KM_USER0); | 774 | kunmap_atomic(kaddr, KM_USER0); |
768 | PRINTK("set file bit %lu page %lu\n", bit, page->index); | 775 | PRINTK("set file bit %lu page %lu\n", bit, page->index); |
769 | 776 | ||
@@ -821,8 +828,7 @@ int bitmap_unplug(struct bitmap *bitmap) | |||
821 | wake_up_process(bitmap->writeback_daemon->tsk)); | 828 | wake_up_process(bitmap->writeback_daemon->tsk)); |
822 | spin_unlock_irq(&bitmap->write_lock); | 829 | spin_unlock_irq(&bitmap->write_lock); |
823 | } else | 830 | } else |
824 | wait_event(bitmap->mddev->sb_wait, | 831 | md_super_wait(bitmap->mddev); |
825 | atomic_read(&bitmap->mddev->pending_writes)==0); | ||
826 | } | 832 | } |
827 | return 0; | 833 | return 0; |
828 | } | 834 | } |
@@ -890,6 +896,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) | |||
890 | oldindex = ~0L; | 896 | oldindex = ~0L; |
891 | 897 | ||
892 | for (i = 0; i < chunks; i++) { | 898 | for (i = 0; i < chunks; i++) { |
899 | int b; | ||
893 | index = file_page_index(i); | 900 | index = file_page_index(i); |
894 | bit = file_page_offset(i); | 901 | bit = file_page_offset(i); |
895 | if (index != oldindex) { /* this is a new page, read it in */ | 902 | if (index != oldindex) { /* this is a new page, read it in */ |
@@ -938,7 +945,11 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) | |||
938 | 945 | ||
939 | bitmap->filemap[bitmap->file_pages++] = page; | 946 | bitmap->filemap[bitmap->file_pages++] = page; |
940 | } | 947 | } |
941 | if (test_bit(bit, page_address(page))) { | 948 | if (bitmap->flags & BITMAP_HOSTENDIAN) |
949 | b = test_bit(bit, page_address(page)); | ||
950 | else | ||
951 | b = ext2_test_bit(bit, page_address(page)); | ||
952 | if (b) { | ||
942 | /* if the disk bit is set, set the memory bit */ | 953 | /* if the disk bit is set, set the memory bit */ |
943 | bitmap_set_memory_bits(bitmap, i << CHUNK_BLOCK_SHIFT(bitmap), | 954 | bitmap_set_memory_bits(bitmap, i << CHUNK_BLOCK_SHIFT(bitmap), |
944 | ((i+1) << (CHUNK_BLOCK_SHIFT(bitmap)) >= start) | 955 | ((i+1) << (CHUNK_BLOCK_SHIFT(bitmap)) >= start) |
@@ -1096,7 +1107,10 @@ int bitmap_daemon_work(struct bitmap *bitmap) | |||
1096 | -1); | 1107 | -1); |
1097 | 1108 | ||
1098 | /* clear the bit */ | 1109 | /* clear the bit */ |
1099 | clear_bit(file_page_offset(j), page_address(page)); | 1110 | if (bitmap->flags & BITMAP_HOSTENDIAN) |
1111 | clear_bit(file_page_offset(j), page_address(page)); | ||
1112 | else | ||
1113 | ext2_clear_bit(file_page_offset(j), page_address(page)); | ||
1100 | } | 1114 | } |
1101 | } | 1115 | } |
1102 | spin_unlock_irqrestore(&bitmap->lock, flags); | 1116 | spin_unlock_irqrestore(&bitmap->lock, flags); |
diff --git a/drivers/md/md.c b/drivers/md/md.c index 9ecf51ee596f..adf960d8a7c9 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -131,6 +131,8 @@ static ctl_table raid_root_table[] = { | |||
131 | 131 | ||
132 | static struct block_device_operations md_fops; | 132 | static struct block_device_operations md_fops; |
133 | 133 | ||
134 | static int start_readonly; | ||
135 | |||
134 | /* | 136 | /* |
135 | * Enables to iterate over all existing md arrays | 137 | * Enables to iterate over all existing md arrays |
136 | * all_mddevs_lock protects this list. | 138 | * all_mddevs_lock protects this list. |
@@ -181,7 +183,7 @@ static void mddev_put(mddev_t *mddev) | |||
181 | if (!mddev->raid_disks && list_empty(&mddev->disks)) { | 183 | if (!mddev->raid_disks && list_empty(&mddev->disks)) { |
182 | list_del(&mddev->all_mddevs); | 184 | list_del(&mddev->all_mddevs); |
183 | blk_put_queue(mddev->queue); | 185 | blk_put_queue(mddev->queue); |
184 | kfree(mddev); | 186 | kobject_unregister(&mddev->kobj); |
185 | } | 187 | } |
186 | spin_unlock(&all_mddevs_lock); | 188 | spin_unlock(&all_mddevs_lock); |
187 | } | 189 | } |
@@ -330,18 +332,46 @@ static void free_disk_sb(mdk_rdev_t * rdev) | |||
330 | static int super_written(struct bio *bio, unsigned int bytes_done, int error) | 332 | static int super_written(struct bio *bio, unsigned int bytes_done, int error) |
331 | { | 333 | { |
332 | mdk_rdev_t *rdev = bio->bi_private; | 334 | mdk_rdev_t *rdev = bio->bi_private; |
335 | mddev_t *mddev = rdev->mddev; | ||
333 | if (bio->bi_size) | 336 | if (bio->bi_size) |
334 | return 1; | 337 | return 1; |
335 | 338 | ||
336 | if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) | 339 | if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) |
337 | md_error(rdev->mddev, rdev); | 340 | md_error(mddev, rdev); |
338 | 341 | ||
339 | if (atomic_dec_and_test(&rdev->mddev->pending_writes)) | 342 | if (atomic_dec_and_test(&mddev->pending_writes)) |
340 | wake_up(&rdev->mddev->sb_wait); | 343 | wake_up(&mddev->sb_wait); |
341 | bio_put(bio); | 344 | bio_put(bio); |
342 | return 0; | 345 | return 0; |
343 | } | 346 | } |
344 | 347 | ||
348 | static int super_written_barrier(struct bio *bio, unsigned int bytes_done, int error) | ||
349 | { | ||
350 | struct bio *bio2 = bio->bi_private; | ||
351 | mdk_rdev_t *rdev = bio2->bi_private; | ||
352 | mddev_t *mddev = rdev->mddev; | ||
353 | if (bio->bi_size) | ||
354 | return 1; | ||
355 | |||
356 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags) && | ||
357 | error == -EOPNOTSUPP) { | ||
358 | unsigned long flags; | ||
359 | /* barriers don't appear to be supported :-( */ | ||
360 | set_bit(BarriersNotsupp, &rdev->flags); | ||
361 | mddev->barriers_work = 0; | ||
362 | spin_lock_irqsave(&mddev->write_lock, flags); | ||
363 | bio2->bi_next = mddev->biolist; | ||
364 | mddev->biolist = bio2; | ||
365 | spin_unlock_irqrestore(&mddev->write_lock, flags); | ||
366 | wake_up(&mddev->sb_wait); | ||
367 | bio_put(bio); | ||
368 | return 0; | ||
369 | } | ||
370 | bio_put(bio2); | ||
371 | bio->bi_private = rdev; | ||
372 | return super_written(bio, bytes_done, error); | ||
373 | } | ||
374 | |||
345 | void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, | 375 | void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, |
346 | sector_t sector, int size, struct page *page) | 376 | sector_t sector, int size, struct page *page) |
347 | { | 377 | { |
@@ -350,16 +380,54 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, | |||
350 | * and decrement it on completion, waking up sb_wait | 380 | * and decrement it on completion, waking up sb_wait |
351 | * if zero is reached. | 381 | * if zero is reached. |
352 | * If an error occurred, call md_error | 382 | * If an error occurred, call md_error |
383 | * | ||
384 | * As we might need to resubmit the request if BIO_RW_BARRIER | ||
385 | * causes ENOTSUPP, we allocate a spare bio... | ||
353 | */ | 386 | */ |
354 | struct bio *bio = bio_alloc(GFP_NOIO, 1); | 387 | struct bio *bio = bio_alloc(GFP_NOIO, 1); |
388 | int rw = (1<<BIO_RW) | (1<<BIO_RW_SYNC); | ||
355 | 389 | ||
356 | bio->bi_bdev = rdev->bdev; | 390 | bio->bi_bdev = rdev->bdev; |
357 | bio->bi_sector = sector; | 391 | bio->bi_sector = sector; |
358 | bio_add_page(bio, page, size, 0); | 392 | bio_add_page(bio, page, size, 0); |
359 | bio->bi_private = rdev; | 393 | bio->bi_private = rdev; |
360 | bio->bi_end_io = super_written; | 394 | bio->bi_end_io = super_written; |
395 | bio->bi_rw = rw; | ||
396 | |||
361 | atomic_inc(&mddev->pending_writes); | 397 | atomic_inc(&mddev->pending_writes); |
362 | submit_bio((1<<BIO_RW)|(1<<BIO_RW_SYNC), bio); | 398 | if (!test_bit(BarriersNotsupp, &rdev->flags)) { |
399 | struct bio *rbio; | ||
400 | rw |= (1<<BIO_RW_BARRIER); | ||
401 | rbio = bio_clone(bio, GFP_NOIO); | ||
402 | rbio->bi_private = bio; | ||
403 | rbio->bi_end_io = super_written_barrier; | ||
404 | submit_bio(rw, rbio); | ||
405 | } else | ||
406 | submit_bio(rw, bio); | ||
407 | } | ||
408 | |||
409 | void md_super_wait(mddev_t *mddev) | ||
410 | { | ||
411 | /* wait for all superblock writes that were scheduled to complete. | ||
412 | * if any had to be retried (due to BARRIER problems), retry them | ||
413 | */ | ||
414 | DEFINE_WAIT(wq); | ||
415 | for(;;) { | ||
416 | prepare_to_wait(&mddev->sb_wait, &wq, TASK_UNINTERRUPTIBLE); | ||
417 | if (atomic_read(&mddev->pending_writes)==0) | ||
418 | break; | ||
419 | while (mddev->biolist) { | ||
420 | struct bio *bio; | ||
421 | spin_lock_irq(&mddev->write_lock); | ||
422 | bio = mddev->biolist; | ||
423 | mddev->biolist = bio->bi_next ; | ||
424 | bio->bi_next = NULL; | ||
425 | spin_unlock_irq(&mddev->write_lock); | ||
426 | submit_bio(bio->bi_rw, bio); | ||
427 | } | ||
428 | schedule(); | ||
429 | } | ||
430 | finish_wait(&mddev->sb_wait, &wq); | ||
363 | } | 431 | } |
364 | 432 | ||
365 | static int bi_complete(struct bio *bio, unsigned int bytes_done, int error) | 433 | static int bi_complete(struct bio *bio, unsigned int bytes_done, int error) |
@@ -610,7 +678,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
610 | mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page); | 678 | mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page); |
611 | 679 | ||
612 | rdev->raid_disk = -1; | 680 | rdev->raid_disk = -1; |
613 | rdev->in_sync = 0; | 681 | rdev->flags = 0; |
614 | if (mddev->raid_disks == 0) { | 682 | if (mddev->raid_disks == 0) { |
615 | mddev->major_version = 0; | 683 | mddev->major_version = 0; |
616 | mddev->minor_version = sb->minor_version; | 684 | mddev->minor_version = sb->minor_version; |
@@ -671,21 +739,19 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
671 | return 0; | 739 | return 0; |
672 | 740 | ||
673 | if (mddev->level != LEVEL_MULTIPATH) { | 741 | if (mddev->level != LEVEL_MULTIPATH) { |
674 | rdev->faulty = 0; | ||
675 | rdev->flags = 0; | ||
676 | desc = sb->disks + rdev->desc_nr; | 742 | desc = sb->disks + rdev->desc_nr; |
677 | 743 | ||
678 | if (desc->state & (1<<MD_DISK_FAULTY)) | 744 | if (desc->state & (1<<MD_DISK_FAULTY)) |
679 | rdev->faulty = 1; | 745 | set_bit(Faulty, &rdev->flags); |
680 | else if (desc->state & (1<<MD_DISK_SYNC) && | 746 | else if (desc->state & (1<<MD_DISK_SYNC) && |
681 | desc->raid_disk < mddev->raid_disks) { | 747 | desc->raid_disk < mddev->raid_disks) { |
682 | rdev->in_sync = 1; | 748 | set_bit(In_sync, &rdev->flags); |
683 | rdev->raid_disk = desc->raid_disk; | 749 | rdev->raid_disk = desc->raid_disk; |
684 | } | 750 | } |
685 | if (desc->state & (1<<MD_DISK_WRITEMOSTLY)) | 751 | if (desc->state & (1<<MD_DISK_WRITEMOSTLY)) |
686 | set_bit(WriteMostly, &rdev->flags); | 752 | set_bit(WriteMostly, &rdev->flags); |
687 | } else /* MULTIPATH are always insync */ | 753 | } else /* MULTIPATH are always insync */ |
688 | rdev->in_sync = 1; | 754 | set_bit(In_sync, &rdev->flags); |
689 | return 0; | 755 | return 0; |
690 | } | 756 | } |
691 | 757 | ||
@@ -699,6 +765,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
699 | mdk_rdev_t *rdev2; | 765 | mdk_rdev_t *rdev2; |
700 | int next_spare = mddev->raid_disks; | 766 | int next_spare = mddev->raid_disks; |
701 | 767 | ||
768 | |||
702 | /* make rdev->sb match mddev data.. | 769 | /* make rdev->sb match mddev data.. |
703 | * | 770 | * |
704 | * 1/ zero out disks | 771 | * 1/ zero out disks |
@@ -758,23 +825,27 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
758 | sb->disks[0].state = (1<<MD_DISK_REMOVED); | 825 | sb->disks[0].state = (1<<MD_DISK_REMOVED); |
759 | ITERATE_RDEV(mddev,rdev2,tmp) { | 826 | ITERATE_RDEV(mddev,rdev2,tmp) { |
760 | mdp_disk_t *d; | 827 | mdp_disk_t *d; |
761 | if (rdev2->raid_disk >= 0 && rdev2->in_sync && !rdev2->faulty) | 828 | int desc_nr; |
762 | rdev2->desc_nr = rdev2->raid_disk; | 829 | if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags) |
830 | && !test_bit(Faulty, &rdev2->flags)) | ||
831 | desc_nr = rdev2->raid_disk; | ||
763 | else | 832 | else |
764 | rdev2->desc_nr = next_spare++; | 833 | desc_nr = next_spare++; |
834 | rdev2->desc_nr = desc_nr; | ||
765 | d = &sb->disks[rdev2->desc_nr]; | 835 | d = &sb->disks[rdev2->desc_nr]; |
766 | nr_disks++; | 836 | nr_disks++; |
767 | d->number = rdev2->desc_nr; | 837 | d->number = rdev2->desc_nr; |
768 | d->major = MAJOR(rdev2->bdev->bd_dev); | 838 | d->major = MAJOR(rdev2->bdev->bd_dev); |
769 | d->minor = MINOR(rdev2->bdev->bd_dev); | 839 | d->minor = MINOR(rdev2->bdev->bd_dev); |
770 | if (rdev2->raid_disk >= 0 && rdev->in_sync && !rdev2->faulty) | 840 | if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags) |
841 | && !test_bit(Faulty, &rdev2->flags)) | ||
771 | d->raid_disk = rdev2->raid_disk; | 842 | d->raid_disk = rdev2->raid_disk; |
772 | else | 843 | else |
773 | d->raid_disk = rdev2->desc_nr; /* compatibility */ | 844 | d->raid_disk = rdev2->desc_nr; /* compatibility */ |
774 | if (rdev2->faulty) { | 845 | if (test_bit(Faulty, &rdev2->flags)) { |
775 | d->state = (1<<MD_DISK_FAULTY); | 846 | d->state = (1<<MD_DISK_FAULTY); |
776 | failed++; | 847 | failed++; |
777 | } else if (rdev2->in_sync) { | 848 | } else if (test_bit(In_sync, &rdev2->flags)) { |
778 | d->state = (1<<MD_DISK_ACTIVE); | 849 | d->state = (1<<MD_DISK_ACTIVE); |
779 | d->state |= (1<<MD_DISK_SYNC); | 850 | d->state |= (1<<MD_DISK_SYNC); |
780 | active++; | 851 | active++; |
@@ -787,7 +858,6 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
787 | if (test_bit(WriteMostly, &rdev2->flags)) | 858 | if (test_bit(WriteMostly, &rdev2->flags)) |
788 | d->state |= (1<<MD_DISK_WRITEMOSTLY); | 859 | d->state |= (1<<MD_DISK_WRITEMOSTLY); |
789 | } | 860 | } |
790 | |||
791 | /* now set the "removed" and "faulty" bits on any missing devices */ | 861 | /* now set the "removed" and "faulty" bits on any missing devices */ |
792 | for (i=0 ; i < mddev->raid_disks ; i++) { | 862 | for (i=0 ; i < mddev->raid_disks ; i++) { |
793 | mdp_disk_t *d = &sb->disks[i]; | 863 | mdp_disk_t *d = &sb->disks[i]; |
@@ -944,7 +1014,7 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
944 | struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page); | 1014 | struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page); |
945 | 1015 | ||
946 | rdev->raid_disk = -1; | 1016 | rdev->raid_disk = -1; |
947 | rdev->in_sync = 0; | 1017 | rdev->flags = 0; |
948 | if (mddev->raid_disks == 0) { | 1018 | if (mddev->raid_disks == 0) { |
949 | mddev->major_version = 1; | 1019 | mddev->major_version = 1; |
950 | mddev->patch_version = 0; | 1020 | mddev->patch_version = 0; |
@@ -996,22 +1066,19 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
996 | role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); | 1066 | role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); |
997 | switch(role) { | 1067 | switch(role) { |
998 | case 0xffff: /* spare */ | 1068 | case 0xffff: /* spare */ |
999 | rdev->faulty = 0; | ||
1000 | break; | 1069 | break; |
1001 | case 0xfffe: /* faulty */ | 1070 | case 0xfffe: /* faulty */ |
1002 | rdev->faulty = 1; | 1071 | set_bit(Faulty, &rdev->flags); |
1003 | break; | 1072 | break; |
1004 | default: | 1073 | default: |
1005 | rdev->in_sync = 1; | 1074 | set_bit(In_sync, &rdev->flags); |
1006 | rdev->faulty = 0; | ||
1007 | rdev->raid_disk = role; | 1075 | rdev->raid_disk = role; |
1008 | break; | 1076 | break; |
1009 | } | 1077 | } |
1010 | rdev->flags = 0; | ||
1011 | if (sb->devflags & WriteMostly1) | 1078 | if (sb->devflags & WriteMostly1) |
1012 | set_bit(WriteMostly, &rdev->flags); | 1079 | set_bit(WriteMostly, &rdev->flags); |
1013 | } else /* MULTIPATH are always insync */ | 1080 | } else /* MULTIPATH are always insync */ |
1014 | rdev->in_sync = 1; | 1081 | set_bit(In_sync, &rdev->flags); |
1015 | 1082 | ||
1016 | return 0; | 1083 | return 0; |
1017 | } | 1084 | } |
@@ -1055,9 +1122,9 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1055 | 1122 | ||
1056 | ITERATE_RDEV(mddev,rdev2,tmp) { | 1123 | ITERATE_RDEV(mddev,rdev2,tmp) { |
1057 | i = rdev2->desc_nr; | 1124 | i = rdev2->desc_nr; |
1058 | if (rdev2->faulty) | 1125 | if (test_bit(Faulty, &rdev2->flags)) |
1059 | sb->dev_roles[i] = cpu_to_le16(0xfffe); | 1126 | sb->dev_roles[i] = cpu_to_le16(0xfffe); |
1060 | else if (rdev2->in_sync) | 1127 | else if (test_bit(In_sync, &rdev2->flags)) |
1061 | sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk); | 1128 | sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk); |
1062 | else | 1129 | else |
1063 | sb->dev_roles[i] = cpu_to_le16(0xffff); | 1130 | sb->dev_roles[i] = cpu_to_le16(0xffff); |
@@ -1115,6 +1182,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | |||
1115 | { | 1182 | { |
1116 | mdk_rdev_t *same_pdev; | 1183 | mdk_rdev_t *same_pdev; |
1117 | char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; | 1184 | char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; |
1185 | struct kobject *ko; | ||
1118 | 1186 | ||
1119 | if (rdev->mddev) { | 1187 | if (rdev->mddev) { |
1120 | MD_BUG(); | 1188 | MD_BUG(); |
@@ -1143,10 +1211,22 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | |||
1143 | if (find_rdev_nr(mddev, rdev->desc_nr)) | 1211 | if (find_rdev_nr(mddev, rdev->desc_nr)) |
1144 | return -EBUSY; | 1212 | return -EBUSY; |
1145 | } | 1213 | } |
1214 | bdevname(rdev->bdev,b); | ||
1215 | if (kobject_set_name(&rdev->kobj, "dev-%s", b) < 0) | ||
1216 | return -ENOMEM; | ||
1146 | 1217 | ||
1147 | list_add(&rdev->same_set, &mddev->disks); | 1218 | list_add(&rdev->same_set, &mddev->disks); |
1148 | rdev->mddev = mddev; | 1219 | rdev->mddev = mddev; |
1149 | printk(KERN_INFO "md: bind<%s>\n", bdevname(rdev->bdev,b)); | 1220 | printk(KERN_INFO "md: bind<%s>\n", b); |
1221 | |||
1222 | rdev->kobj.parent = &mddev->kobj; | ||
1223 | kobject_add(&rdev->kobj); | ||
1224 | |||
1225 | if (rdev->bdev->bd_part) | ||
1226 | ko = &rdev->bdev->bd_part->kobj; | ||
1227 | else | ||
1228 | ko = &rdev->bdev->bd_disk->kobj; | ||
1229 | sysfs_create_link(&rdev->kobj, ko, "block"); | ||
1150 | return 0; | 1230 | return 0; |
1151 | } | 1231 | } |
1152 | 1232 | ||
@@ -1160,6 +1240,8 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) | |||
1160 | list_del_init(&rdev->same_set); | 1240 | list_del_init(&rdev->same_set); |
1161 | printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); | 1241 | printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); |
1162 | rdev->mddev = NULL; | 1242 | rdev->mddev = NULL; |
1243 | sysfs_remove_link(&rdev->kobj, "block"); | ||
1244 | kobject_del(&rdev->kobj); | ||
1163 | } | 1245 | } |
1164 | 1246 | ||
1165 | /* | 1247 | /* |
@@ -1215,7 +1297,7 @@ static void export_rdev(mdk_rdev_t * rdev) | |||
1215 | md_autodetect_dev(rdev->bdev->bd_dev); | 1297 | md_autodetect_dev(rdev->bdev->bd_dev); |
1216 | #endif | 1298 | #endif |
1217 | unlock_rdev(rdev); | 1299 | unlock_rdev(rdev); |
1218 | kfree(rdev); | 1300 | kobject_put(&rdev->kobj); |
1219 | } | 1301 | } |
1220 | 1302 | ||
1221 | static void kick_rdev_from_array(mdk_rdev_t * rdev) | 1303 | static void kick_rdev_from_array(mdk_rdev_t * rdev) |
@@ -1287,7 +1369,8 @@ static void print_rdev(mdk_rdev_t *rdev) | |||
1287 | char b[BDEVNAME_SIZE]; | 1369 | char b[BDEVNAME_SIZE]; |
1288 | printk(KERN_INFO "md: rdev %s, SZ:%08llu F:%d S:%d DN:%u\n", | 1370 | printk(KERN_INFO "md: rdev %s, SZ:%08llu F:%d S:%d DN:%u\n", |
1289 | bdevname(rdev->bdev,b), (unsigned long long)rdev->size, | 1371 | bdevname(rdev->bdev,b), (unsigned long long)rdev->size, |
1290 | rdev->faulty, rdev->in_sync, rdev->desc_nr); | 1372 | test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags), |
1373 | rdev->desc_nr); | ||
1291 | if (rdev->sb_loaded) { | 1374 | if (rdev->sb_loaded) { |
1292 | printk(KERN_INFO "md: rdev superblock:\n"); | 1375 | printk(KERN_INFO "md: rdev superblock:\n"); |
1293 | print_sb((mdp_super_t*)page_address(rdev->sb_page)); | 1376 | print_sb((mdp_super_t*)page_address(rdev->sb_page)); |
@@ -1344,7 +1427,7 @@ static void md_update_sb(mddev_t * mddev) | |||
1344 | int sync_req; | 1427 | int sync_req; |
1345 | 1428 | ||
1346 | repeat: | 1429 | repeat: |
1347 | spin_lock(&mddev->write_lock); | 1430 | spin_lock_irq(&mddev->write_lock); |
1348 | sync_req = mddev->in_sync; | 1431 | sync_req = mddev->in_sync; |
1349 | mddev->utime = get_seconds(); | 1432 | mddev->utime = get_seconds(); |
1350 | mddev->events ++; | 1433 | mddev->events ++; |
@@ -1367,11 +1450,11 @@ repeat: | |||
1367 | */ | 1450 | */ |
1368 | if (!mddev->persistent) { | 1451 | if (!mddev->persistent) { |
1369 | mddev->sb_dirty = 0; | 1452 | mddev->sb_dirty = 0; |
1370 | spin_unlock(&mddev->write_lock); | 1453 | spin_unlock_irq(&mddev->write_lock); |
1371 | wake_up(&mddev->sb_wait); | 1454 | wake_up(&mddev->sb_wait); |
1372 | return; | 1455 | return; |
1373 | } | 1456 | } |
1374 | spin_unlock(&mddev->write_lock); | 1457 | spin_unlock_irq(&mddev->write_lock); |
1375 | 1458 | ||
1376 | dprintk(KERN_INFO | 1459 | dprintk(KERN_INFO |
1377 | "md: updating %s RAID superblock on device (in sync %d)\n", | 1460 | "md: updating %s RAID superblock on device (in sync %d)\n", |
@@ -1381,11 +1464,11 @@ repeat: | |||
1381 | ITERATE_RDEV(mddev,rdev,tmp) { | 1464 | ITERATE_RDEV(mddev,rdev,tmp) { |
1382 | char b[BDEVNAME_SIZE]; | 1465 | char b[BDEVNAME_SIZE]; |
1383 | dprintk(KERN_INFO "md: "); | 1466 | dprintk(KERN_INFO "md: "); |
1384 | if (rdev->faulty) | 1467 | if (test_bit(Faulty, &rdev->flags)) |
1385 | dprintk("(skipping faulty "); | 1468 | dprintk("(skipping faulty "); |
1386 | 1469 | ||
1387 | dprintk("%s ", bdevname(rdev->bdev,b)); | 1470 | dprintk("%s ", bdevname(rdev->bdev,b)); |
1388 | if (!rdev->faulty) { | 1471 | if (!test_bit(Faulty, &rdev->flags)) { |
1389 | md_super_write(mddev,rdev, | 1472 | md_super_write(mddev,rdev, |
1390 | rdev->sb_offset<<1, rdev->sb_size, | 1473 | rdev->sb_offset<<1, rdev->sb_size, |
1391 | rdev->sb_page); | 1474 | rdev->sb_page); |
@@ -1399,21 +1482,106 @@ repeat: | |||
1399 | /* only need to write one superblock... */ | 1482 | /* only need to write one superblock... */ |
1400 | break; | 1483 | break; |
1401 | } | 1484 | } |
1402 | wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0); | 1485 | md_super_wait(mddev); |
1403 | /* if there was a failure, sb_dirty was set to 1, and we re-write super */ | 1486 | /* if there was a failure, sb_dirty was set to 1, and we re-write super */ |
1404 | 1487 | ||
1405 | spin_lock(&mddev->write_lock); | 1488 | spin_lock_irq(&mddev->write_lock); |
1406 | if (mddev->in_sync != sync_req|| mddev->sb_dirty == 1) { | 1489 | if (mddev->in_sync != sync_req|| mddev->sb_dirty == 1) { |
1407 | /* have to write it out again */ | 1490 | /* have to write it out again */ |
1408 | spin_unlock(&mddev->write_lock); | 1491 | spin_unlock_irq(&mddev->write_lock); |
1409 | goto repeat; | 1492 | goto repeat; |
1410 | } | 1493 | } |
1411 | mddev->sb_dirty = 0; | 1494 | mddev->sb_dirty = 0; |
1412 | spin_unlock(&mddev->write_lock); | 1495 | spin_unlock_irq(&mddev->write_lock); |
1413 | wake_up(&mddev->sb_wait); | 1496 | wake_up(&mddev->sb_wait); |
1414 | 1497 | ||
1415 | } | 1498 | } |
1416 | 1499 | ||
1500 | struct rdev_sysfs_entry { | ||
1501 | struct attribute attr; | ||
1502 | ssize_t (*show)(mdk_rdev_t *, char *); | ||
1503 | ssize_t (*store)(mdk_rdev_t *, const char *, size_t); | ||
1504 | }; | ||
1505 | |||
1506 | static ssize_t | ||
1507 | state_show(mdk_rdev_t *rdev, char *page) | ||
1508 | { | ||
1509 | char *sep = ""; | ||
1510 | int len=0; | ||
1511 | |||
1512 | if (test_bit(Faulty, &rdev->flags)) { | ||
1513 | len+= sprintf(page+len, "%sfaulty",sep); | ||
1514 | sep = ","; | ||
1515 | } | ||
1516 | if (test_bit(In_sync, &rdev->flags)) { | ||
1517 | len += sprintf(page+len, "%sin_sync",sep); | ||
1518 | sep = ","; | ||
1519 | } | ||
1520 | if (!test_bit(Faulty, &rdev->flags) && | ||
1521 | !test_bit(In_sync, &rdev->flags)) { | ||
1522 | len += sprintf(page+len, "%sspare", sep); | ||
1523 | sep = ","; | ||
1524 | } | ||
1525 | return len+sprintf(page+len, "\n"); | ||
1526 | } | ||
1527 | |||
1528 | static struct rdev_sysfs_entry | ||
1529 | rdev_state = __ATTR_RO(state); | ||
1530 | |||
1531 | static ssize_t | ||
1532 | super_show(mdk_rdev_t *rdev, char *page) | ||
1533 | { | ||
1534 | if (rdev->sb_loaded && rdev->sb_size) { | ||
1535 | memcpy(page, page_address(rdev->sb_page), rdev->sb_size); | ||
1536 | return rdev->sb_size; | ||
1537 | } else | ||
1538 | return 0; | ||
1539 | } | ||
1540 | static struct rdev_sysfs_entry rdev_super = __ATTR_RO(super); | ||
1541 | |||
1542 | static struct attribute *rdev_default_attrs[] = { | ||
1543 | &rdev_state.attr, | ||
1544 | &rdev_super.attr, | ||
1545 | NULL, | ||
1546 | }; | ||
1547 | static ssize_t | ||
1548 | rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page) | ||
1549 | { | ||
1550 | struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr); | ||
1551 | mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj); | ||
1552 | |||
1553 | if (!entry->show) | ||
1554 | return -EIO; | ||
1555 | return entry->show(rdev, page); | ||
1556 | } | ||
1557 | |||
1558 | static ssize_t | ||
1559 | rdev_attr_store(struct kobject *kobj, struct attribute *attr, | ||
1560 | const char *page, size_t length) | ||
1561 | { | ||
1562 | struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr); | ||
1563 | mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj); | ||
1564 | |||
1565 | if (!entry->store) | ||
1566 | return -EIO; | ||
1567 | return entry->store(rdev, page, length); | ||
1568 | } | ||
1569 | |||
1570 | static void rdev_free(struct kobject *ko) | ||
1571 | { | ||
1572 | mdk_rdev_t *rdev = container_of(ko, mdk_rdev_t, kobj); | ||
1573 | kfree(rdev); | ||
1574 | } | ||
1575 | static struct sysfs_ops rdev_sysfs_ops = { | ||
1576 | .show = rdev_attr_show, | ||
1577 | .store = rdev_attr_store, | ||
1578 | }; | ||
1579 | static struct kobj_type rdev_ktype = { | ||
1580 | .release = rdev_free, | ||
1581 | .sysfs_ops = &rdev_sysfs_ops, | ||
1582 | .default_attrs = rdev_default_attrs, | ||
1583 | }; | ||
1584 | |||
1417 | /* | 1585 | /* |
1418 | * Import a device. If 'super_format' >= 0, then sanity check the superblock | 1586 | * Import a device. If 'super_format' >= 0, then sanity check the superblock |
1419 | * | 1587 | * |
@@ -1445,11 +1613,15 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi | |||
1445 | if (err) | 1613 | if (err) |
1446 | goto abort_free; | 1614 | goto abort_free; |
1447 | 1615 | ||
1616 | rdev->kobj.parent = NULL; | ||
1617 | rdev->kobj.ktype = &rdev_ktype; | ||
1618 | kobject_init(&rdev->kobj); | ||
1619 | |||
1448 | rdev->desc_nr = -1; | 1620 | rdev->desc_nr = -1; |
1449 | rdev->faulty = 0; | 1621 | rdev->flags = 0; |
1450 | rdev->in_sync = 0; | ||
1451 | rdev->data_offset = 0; | 1622 | rdev->data_offset = 0; |
1452 | atomic_set(&rdev->nr_pending, 0); | 1623 | atomic_set(&rdev->nr_pending, 0); |
1624 | atomic_set(&rdev->read_errors, 0); | ||
1453 | 1625 | ||
1454 | size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; | 1626 | size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; |
1455 | if (!size) { | 1627 | if (!size) { |
@@ -1537,7 +1709,7 @@ static void analyze_sbs(mddev_t * mddev) | |||
1537 | if (mddev->level == LEVEL_MULTIPATH) { | 1709 | if (mddev->level == LEVEL_MULTIPATH) { |
1538 | rdev->desc_nr = i++; | 1710 | rdev->desc_nr = i++; |
1539 | rdev->raid_disk = rdev->desc_nr; | 1711 | rdev->raid_disk = rdev->desc_nr; |
1540 | rdev->in_sync = 1; | 1712 | set_bit(In_sync, &rdev->flags); |
1541 | } | 1713 | } |
1542 | } | 1714 | } |
1543 | 1715 | ||
@@ -1551,6 +1723,162 @@ static void analyze_sbs(mddev_t * mddev) | |||
1551 | 1723 | ||
1552 | } | 1724 | } |
1553 | 1725 | ||
1726 | static ssize_t | ||
1727 | level_show(mddev_t *mddev, char *page) | ||
1728 | { | ||
1729 | mdk_personality_t *p = mddev->pers; | ||
1730 | if (p == NULL && mddev->raid_disks == 0) | ||
1731 | return 0; | ||
1732 | if (mddev->level >= 0) | ||
1733 | return sprintf(page, "RAID-%d\n", mddev->level); | ||
1734 | else | ||
1735 | return sprintf(page, "%s\n", p->name); | ||
1736 | } | ||
1737 | |||
1738 | static struct md_sysfs_entry md_level = __ATTR_RO(level); | ||
1739 | |||
1740 | static ssize_t | ||
1741 | raid_disks_show(mddev_t *mddev, char *page) | ||
1742 | { | ||
1743 | if (mddev->raid_disks == 0) | ||
1744 | return 0; | ||
1745 | return sprintf(page, "%d\n", mddev->raid_disks); | ||
1746 | } | ||
1747 | |||
1748 | static struct md_sysfs_entry md_raid_disks = __ATTR_RO(raid_disks); | ||
1749 | |||
1750 | static ssize_t | ||
1751 | action_show(mddev_t *mddev, char *page) | ||
1752 | { | ||
1753 | char *type = "idle"; | ||
1754 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || | ||
1755 | test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) { | ||
1756 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { | ||
1757 | if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) | ||
1758 | type = "resync"; | ||
1759 | else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) | ||
1760 | type = "check"; | ||
1761 | else | ||
1762 | type = "repair"; | ||
1763 | } else | ||
1764 | type = "recover"; | ||
1765 | } | ||
1766 | return sprintf(page, "%s\n", type); | ||
1767 | } | ||
1768 | |||
1769 | static ssize_t | ||
1770 | action_store(mddev_t *mddev, const char *page, size_t len) | ||
1771 | { | ||
1772 | if (!mddev->pers || !mddev->pers->sync_request) | ||
1773 | return -EINVAL; | ||
1774 | |||
1775 | if (strcmp(page, "idle")==0 || strcmp(page, "idle\n")==0) { | ||
1776 | if (mddev->sync_thread) { | ||
1777 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | ||
1778 | md_unregister_thread(mddev->sync_thread); | ||
1779 | mddev->sync_thread = NULL; | ||
1780 | mddev->recovery = 0; | ||
1781 | } | ||
1782 | return len; | ||
1783 | } | ||
1784 | |||
1785 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || | ||
1786 | test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) | ||
1787 | return -EBUSY; | ||
1788 | if (strcmp(page, "resync")==0 || strcmp(page, "resync\n")==0 || | ||
1789 | strcmp(page, "recover")==0 || strcmp(page, "recover\n")==0) | ||
1790 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | ||
1791 | else { | ||
1792 | if (strcmp(page, "check")==0 || strcmp(page, "check\n")==0) | ||
1793 | set_bit(MD_RECOVERY_CHECK, &mddev->recovery); | ||
1794 | else if (strcmp(page, "repair")!=0 && strcmp(page, "repair\n")!=0) | ||
1795 | return -EINVAL; | ||
1796 | set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); | ||
1797 | set_bit(MD_RECOVERY_SYNC, &mddev->recovery); | ||
1798 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | ||
1799 | } | ||
1800 | md_wakeup_thread(mddev->thread); | ||
1801 | return len; | ||
1802 | } | ||
1803 | |||
1804 | static ssize_t | ||
1805 | mismatch_cnt_show(mddev_t *mddev, char *page) | ||
1806 | { | ||
1807 | return sprintf(page, "%llu\n", | ||
1808 | (unsigned long long) mddev->resync_mismatches); | ||
1809 | } | ||
1810 | |||
1811 | static struct md_sysfs_entry | ||
1812 | md_scan_mode = __ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store); | ||
1813 | |||
1814 | |||
1815 | static struct md_sysfs_entry | ||
1816 | md_mismatches = __ATTR_RO(mismatch_cnt); | ||
1817 | |||
1818 | static struct attribute *md_default_attrs[] = { | ||
1819 | &md_level.attr, | ||
1820 | &md_raid_disks.attr, | ||
1821 | NULL, | ||
1822 | }; | ||
1823 | |||
1824 | static struct attribute *md_redundancy_attrs[] = { | ||
1825 | &md_scan_mode.attr, | ||
1826 | &md_mismatches.attr, | ||
1827 | NULL, | ||
1828 | }; | ||
1829 | static struct attribute_group md_redundancy_group = { | ||
1830 | .name = NULL, | ||
1831 | .attrs = md_redundancy_attrs, | ||
1832 | }; | ||
1833 | |||
1834 | |||
1835 | static ssize_t | ||
1836 | md_attr_show(struct kobject *kobj, struct attribute *attr, char *page) | ||
1837 | { | ||
1838 | struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr); | ||
1839 | mddev_t *mddev = container_of(kobj, struct mddev_s, kobj); | ||
1840 | ssize_t rv; | ||
1841 | |||
1842 | if (!entry->show) | ||
1843 | return -EIO; | ||
1844 | mddev_lock(mddev); | ||
1845 | rv = entry->show(mddev, page); | ||
1846 | mddev_unlock(mddev); | ||
1847 | return rv; | ||
1848 | } | ||
1849 | |||
1850 | static ssize_t | ||
1851 | md_attr_store(struct kobject *kobj, struct attribute *attr, | ||
1852 | const char *page, size_t length) | ||
1853 | { | ||
1854 | struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr); | ||
1855 | mddev_t *mddev = container_of(kobj, struct mddev_s, kobj); | ||
1856 | ssize_t rv; | ||
1857 | |||
1858 | if (!entry->store) | ||
1859 | return -EIO; | ||
1860 | mddev_lock(mddev); | ||
1861 | rv = entry->store(mddev, page, length); | ||
1862 | mddev_unlock(mddev); | ||
1863 | return rv; | ||
1864 | } | ||
1865 | |||
1866 | static void md_free(struct kobject *ko) | ||
1867 | { | ||
1868 | mddev_t *mddev = container_of(ko, mddev_t, kobj); | ||
1869 | kfree(mddev); | ||
1870 | } | ||
1871 | |||
1872 | static struct sysfs_ops md_sysfs_ops = { | ||
1873 | .show = md_attr_show, | ||
1874 | .store = md_attr_store, | ||
1875 | }; | ||
1876 | static struct kobj_type md_ktype = { | ||
1877 | .release = md_free, | ||
1878 | .sysfs_ops = &md_sysfs_ops, | ||
1879 | .default_attrs = md_default_attrs, | ||
1880 | }; | ||
1881 | |||
1554 | int mdp_major = 0; | 1882 | int mdp_major = 0; |
1555 | 1883 | ||
1556 | static struct kobject *md_probe(dev_t dev, int *part, void *data) | 1884 | static struct kobject *md_probe(dev_t dev, int *part, void *data) |
@@ -1592,6 +1920,11 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) | |||
1592 | add_disk(disk); | 1920 | add_disk(disk); |
1593 | mddev->gendisk = disk; | 1921 | mddev->gendisk = disk; |
1594 | up(&disks_sem); | 1922 | up(&disks_sem); |
1923 | mddev->kobj.parent = &disk->kobj; | ||
1924 | mddev->kobj.k_name = NULL; | ||
1925 | snprintf(mddev->kobj.name, KOBJ_NAME_LEN, "%s", "md"); | ||
1926 | mddev->kobj.ktype = &md_ktype; | ||
1927 | kobject_register(&mddev->kobj); | ||
1595 | return NULL; | 1928 | return NULL; |
1596 | } | 1929 | } |
1597 | 1930 | ||
@@ -1663,7 +1996,7 @@ static int do_md_run(mddev_t * mddev) | |||
1663 | 1996 | ||
1664 | /* devices must have minimum size of one chunk */ | 1997 | /* devices must have minimum size of one chunk */ |
1665 | ITERATE_RDEV(mddev,rdev,tmp) { | 1998 | ITERATE_RDEV(mddev,rdev,tmp) { |
1666 | if (rdev->faulty) | 1999 | if (test_bit(Faulty, &rdev->flags)) |
1667 | continue; | 2000 | continue; |
1668 | if (rdev->size < chunk_size / 1024) { | 2001 | if (rdev->size < chunk_size / 1024) { |
1669 | printk(KERN_WARNING | 2002 | printk(KERN_WARNING |
@@ -1691,7 +2024,7 @@ static int do_md_run(mddev_t * mddev) | |||
1691 | * Also find largest hardsector size | 2024 | * Also find largest hardsector size |
1692 | */ | 2025 | */ |
1693 | ITERATE_RDEV(mddev,rdev,tmp) { | 2026 | ITERATE_RDEV(mddev,rdev,tmp) { |
1694 | if (rdev->faulty) | 2027 | if (test_bit(Faulty, &rdev->flags)) |
1695 | continue; | 2028 | continue; |
1696 | sync_blockdev(rdev->bdev); | 2029 | sync_blockdev(rdev->bdev); |
1697 | invalidate_bdev(rdev->bdev, 0); | 2030 | invalidate_bdev(rdev->bdev, 0); |
@@ -1715,6 +2048,10 @@ static int do_md_run(mddev_t * mddev) | |||
1715 | 2048 | ||
1716 | mddev->recovery = 0; | 2049 | mddev->recovery = 0; |
1717 | mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ | 2050 | mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ |
2051 | mddev->barriers_work = 1; | ||
2052 | |||
2053 | if (start_readonly) | ||
2054 | mddev->ro = 2; /* read-only, but switch on first write */ | ||
1718 | 2055 | ||
1719 | /* before we start the array running, initialise the bitmap */ | 2056 | /* before we start the array running, initialise the bitmap */ |
1720 | err = bitmap_create(mddev); | 2057 | err = bitmap_create(mddev); |
@@ -1730,12 +2067,24 @@ static int do_md_run(mddev_t * mddev) | |||
1730 | bitmap_destroy(mddev); | 2067 | bitmap_destroy(mddev); |
1731 | return err; | 2068 | return err; |
1732 | } | 2069 | } |
2070 | if (mddev->pers->sync_request) | ||
2071 | sysfs_create_group(&mddev->kobj, &md_redundancy_group); | ||
2072 | else if (mddev->ro == 2) /* auto-readonly not meaningful */ | ||
2073 | mddev->ro = 0; | ||
2074 | |||
1733 | atomic_set(&mddev->writes_pending,0); | 2075 | atomic_set(&mddev->writes_pending,0); |
1734 | mddev->safemode = 0; | 2076 | mddev->safemode = 0; |
1735 | mddev->safemode_timer.function = md_safemode_timeout; | 2077 | mddev->safemode_timer.function = md_safemode_timeout; |
1736 | mddev->safemode_timer.data = (unsigned long) mddev; | 2078 | mddev->safemode_timer.data = (unsigned long) mddev; |
1737 | mddev->safemode_delay = (20 * HZ)/1000 +1; /* 20 msec delay */ | 2079 | mddev->safemode_delay = (20 * HZ)/1000 +1; /* 20 msec delay */ |
1738 | mddev->in_sync = 1; | 2080 | mddev->in_sync = 1; |
2081 | |||
2082 | ITERATE_RDEV(mddev,rdev,tmp) | ||
2083 | if (rdev->raid_disk >= 0) { | ||
2084 | char nm[20]; | ||
2085 | sprintf(nm, "rd%d", rdev->raid_disk); | ||
2086 | sysfs_create_link(&mddev->kobj, &rdev->kobj, nm); | ||
2087 | } | ||
1739 | 2088 | ||
1740 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 2089 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
1741 | md_wakeup_thread(mddev->thread); | 2090 | md_wakeup_thread(mddev->thread); |
@@ -1821,16 +2170,19 @@ static int do_md_stop(mddev_t * mddev, int ro) | |||
1821 | 2170 | ||
1822 | if (ro) { | 2171 | if (ro) { |
1823 | err = -ENXIO; | 2172 | err = -ENXIO; |
1824 | if (mddev->ro) | 2173 | if (mddev->ro==1) |
1825 | goto out; | 2174 | goto out; |
1826 | mddev->ro = 1; | 2175 | mddev->ro = 1; |
1827 | } else { | 2176 | } else { |
1828 | bitmap_flush(mddev); | 2177 | bitmap_flush(mddev); |
1829 | wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0); | 2178 | md_super_wait(mddev); |
1830 | if (mddev->ro) | 2179 | if (mddev->ro) |
1831 | set_disk_ro(disk, 0); | 2180 | set_disk_ro(disk, 0); |
1832 | blk_queue_make_request(mddev->queue, md_fail_request); | 2181 | blk_queue_make_request(mddev->queue, md_fail_request); |
1833 | mddev->pers->stop(mddev); | 2182 | mddev->pers->stop(mddev); |
2183 | if (mddev->pers->sync_request) | ||
2184 | sysfs_remove_group(&mddev->kobj, &md_redundancy_group); | ||
2185 | |||
1834 | module_put(mddev->pers->owner); | 2186 | module_put(mddev->pers->owner); |
1835 | mddev->pers = NULL; | 2187 | mddev->pers = NULL; |
1836 | if (mddev->ro) | 2188 | if (mddev->ro) |
@@ -1857,9 +2209,18 @@ static int do_md_stop(mddev_t * mddev, int ro) | |||
1857 | * Free resources if final stop | 2209 | * Free resources if final stop |
1858 | */ | 2210 | */ |
1859 | if (!ro) { | 2211 | if (!ro) { |
2212 | mdk_rdev_t *rdev; | ||
2213 | struct list_head *tmp; | ||
1860 | struct gendisk *disk; | 2214 | struct gendisk *disk; |
1861 | printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); | 2215 | printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); |
1862 | 2216 | ||
2217 | ITERATE_RDEV(mddev,rdev,tmp) | ||
2218 | if (rdev->raid_disk >= 0) { | ||
2219 | char nm[20]; | ||
2220 | sprintf(nm, "rd%d", rdev->raid_disk); | ||
2221 | sysfs_remove_link(&mddev->kobj, nm); | ||
2222 | } | ||
2223 | |||
1863 | export_array(mddev); | 2224 | export_array(mddev); |
1864 | 2225 | ||
1865 | mddev->array_size = 0; | 2226 | mddev->array_size = 0; |
@@ -2012,7 +2373,7 @@ static int autostart_array(dev_t startdev) | |||
2012 | return err; | 2373 | return err; |
2013 | } | 2374 | } |
2014 | 2375 | ||
2015 | if (start_rdev->faulty) { | 2376 | if (test_bit(Faulty, &start_rdev->flags)) { |
2016 | printk(KERN_WARNING | 2377 | printk(KERN_WARNING |
2017 | "md: can not autostart based on faulty %s!\n", | 2378 | "md: can not autostart based on faulty %s!\n", |
2018 | bdevname(start_rdev->bdev,b)); | 2379 | bdevname(start_rdev->bdev,b)); |
@@ -2071,11 +2432,11 @@ static int get_array_info(mddev_t * mddev, void __user * arg) | |||
2071 | nr=working=active=failed=spare=0; | 2432 | nr=working=active=failed=spare=0; |
2072 | ITERATE_RDEV(mddev,rdev,tmp) { | 2433 | ITERATE_RDEV(mddev,rdev,tmp) { |
2073 | nr++; | 2434 | nr++; |
2074 | if (rdev->faulty) | 2435 | if (test_bit(Faulty, &rdev->flags)) |
2075 | failed++; | 2436 | failed++; |
2076 | else { | 2437 | else { |
2077 | working++; | 2438 | working++; |
2078 | if (rdev->in_sync) | 2439 | if (test_bit(In_sync, &rdev->flags)) |
2079 | active++; | 2440 | active++; |
2080 | else | 2441 | else |
2081 | spare++; | 2442 | spare++; |
@@ -2166,9 +2527,9 @@ static int get_disk_info(mddev_t * mddev, void __user * arg) | |||
2166 | info.minor = MINOR(rdev->bdev->bd_dev); | 2527 | info.minor = MINOR(rdev->bdev->bd_dev); |
2167 | info.raid_disk = rdev->raid_disk; | 2528 | info.raid_disk = rdev->raid_disk; |
2168 | info.state = 0; | 2529 | info.state = 0; |
2169 | if (rdev->faulty) | 2530 | if (test_bit(Faulty, &rdev->flags)) |
2170 | info.state |= (1<<MD_DISK_FAULTY); | 2531 | info.state |= (1<<MD_DISK_FAULTY); |
2171 | else if (rdev->in_sync) { | 2532 | else if (test_bit(In_sync, &rdev->flags)) { |
2172 | info.state |= (1<<MD_DISK_ACTIVE); | 2533 | info.state |= (1<<MD_DISK_ACTIVE); |
2173 | info.state |= (1<<MD_DISK_SYNC); | 2534 | info.state |= (1<<MD_DISK_SYNC); |
2174 | } | 2535 | } |
@@ -2261,7 +2622,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) | |||
2261 | validate_super(mddev, rdev); | 2622 | validate_super(mddev, rdev); |
2262 | rdev->saved_raid_disk = rdev->raid_disk; | 2623 | rdev->saved_raid_disk = rdev->raid_disk; |
2263 | 2624 | ||
2264 | rdev->in_sync = 0; /* just to be sure */ | 2625 | clear_bit(In_sync, &rdev->flags); /* just to be sure */ |
2265 | if (info->state & (1<<MD_DISK_WRITEMOSTLY)) | 2626 | if (info->state & (1<<MD_DISK_WRITEMOSTLY)) |
2266 | set_bit(WriteMostly, &rdev->flags); | 2627 | set_bit(WriteMostly, &rdev->flags); |
2267 | 2628 | ||
@@ -2299,11 +2660,11 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) | |||
2299 | else | 2660 | else |
2300 | rdev->raid_disk = -1; | 2661 | rdev->raid_disk = -1; |
2301 | 2662 | ||
2302 | rdev->faulty = 0; | 2663 | rdev->flags = 0; |
2664 | |||
2303 | if (rdev->raid_disk < mddev->raid_disks) | 2665 | if (rdev->raid_disk < mddev->raid_disks) |
2304 | rdev->in_sync = (info->state & (1<<MD_DISK_SYNC)); | 2666 | if (info->state & (1<<MD_DISK_SYNC)) |
2305 | else | 2667 | set_bit(In_sync, &rdev->flags); |
2306 | rdev->in_sync = 0; | ||
2307 | 2668 | ||
2308 | if (info->state & (1<<MD_DISK_WRITEMOSTLY)) | 2669 | if (info->state & (1<<MD_DISK_WRITEMOSTLY)) |
2309 | set_bit(WriteMostly, &rdev->flags); | 2670 | set_bit(WriteMostly, &rdev->flags); |
@@ -2402,14 +2763,14 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev) | |||
2402 | goto abort_export; | 2763 | goto abort_export; |
2403 | } | 2764 | } |
2404 | 2765 | ||
2405 | if (rdev->faulty) { | 2766 | if (test_bit(Faulty, &rdev->flags)) { |
2406 | printk(KERN_WARNING | 2767 | printk(KERN_WARNING |
2407 | "md: can not hot-add faulty %s disk to %s!\n", | 2768 | "md: can not hot-add faulty %s disk to %s!\n", |
2408 | bdevname(rdev->bdev,b), mdname(mddev)); | 2769 | bdevname(rdev->bdev,b), mdname(mddev)); |
2409 | err = -EINVAL; | 2770 | err = -EINVAL; |
2410 | goto abort_export; | 2771 | goto abort_export; |
2411 | } | 2772 | } |
2412 | rdev->in_sync = 0; | 2773 | clear_bit(In_sync, &rdev->flags); |
2413 | rdev->desc_nr = -1; | 2774 | rdev->desc_nr = -1; |
2414 | bind_rdev_to_array(rdev, mddev); | 2775 | bind_rdev_to_array(rdev, mddev); |
2415 | 2776 | ||
@@ -2929,12 +3290,22 @@ static int md_ioctl(struct inode *inode, struct file *file, | |||
2929 | 3290 | ||
2930 | /* | 3291 | /* |
2931 | * The remaining ioctls are changing the state of the | 3292 | * The remaining ioctls are changing the state of the |
2932 | * superblock, so we do not allow read-only arrays | 3293 | * superblock, so we do not allow them on read-only arrays. |
2933 | * here: | 3294 | * However non-MD ioctls (e.g. get-size) will still come through |
3295 | * here and hit the 'default' below, so only disallow | ||
3296 | * 'md' ioctls, and switch to rw mode if started auto-readonly. | ||
2934 | */ | 3297 | */ |
2935 | if (mddev->ro) { | 3298 | if (_IOC_TYPE(cmd) == MD_MAJOR && |
2936 | err = -EROFS; | 3299 | mddev->ro && mddev->pers) { |
2937 | goto abort_unlock; | 3300 | if (mddev->ro == 2) { |
3301 | mddev->ro = 0; | ||
3302 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | ||
3303 | md_wakeup_thread(mddev->thread); | ||
3304 | |||
3305 | } else { | ||
3306 | err = -EROFS; | ||
3307 | goto abort_unlock; | ||
3308 | } | ||
2938 | } | 3309 | } |
2939 | 3310 | ||
2940 | switch (cmd) | 3311 | switch (cmd) |
@@ -3064,21 +3435,17 @@ static int md_thread(void * arg) | |||
3064 | */ | 3435 | */ |
3065 | 3436 | ||
3066 | allow_signal(SIGKILL); | 3437 | allow_signal(SIGKILL); |
3067 | complete(thread->event); | ||
3068 | while (!kthread_should_stop()) { | 3438 | while (!kthread_should_stop()) { |
3069 | void (*run)(mddev_t *); | ||
3070 | 3439 | ||
3071 | wait_event_interruptible_timeout(thread->wqueue, | 3440 | wait_event_timeout(thread->wqueue, |
3072 | test_bit(THREAD_WAKEUP, &thread->flags) | 3441 | test_bit(THREAD_WAKEUP, &thread->flags) |
3073 | || kthread_should_stop(), | 3442 | || kthread_should_stop(), |
3074 | thread->timeout); | 3443 | thread->timeout); |
3075 | try_to_freeze(); | 3444 | try_to_freeze(); |
3076 | 3445 | ||
3077 | clear_bit(THREAD_WAKEUP, &thread->flags); | 3446 | clear_bit(THREAD_WAKEUP, &thread->flags); |
3078 | 3447 | ||
3079 | run = thread->run; | 3448 | thread->run(thread->mddev); |
3080 | if (run) | ||
3081 | run(thread->mddev); | ||
3082 | } | 3449 | } |
3083 | 3450 | ||
3084 | return 0; | 3451 | return 0; |
@@ -3097,7 +3464,6 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev, | |||
3097 | const char *name) | 3464 | const char *name) |
3098 | { | 3465 | { |
3099 | mdk_thread_t *thread; | 3466 | mdk_thread_t *thread; |
3100 | struct completion event; | ||
3101 | 3467 | ||
3102 | thread = kmalloc(sizeof(mdk_thread_t), GFP_KERNEL); | 3468 | thread = kmalloc(sizeof(mdk_thread_t), GFP_KERNEL); |
3103 | if (!thread) | 3469 | if (!thread) |
@@ -3106,18 +3472,14 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev, | |||
3106 | memset(thread, 0, sizeof(mdk_thread_t)); | 3472 | memset(thread, 0, sizeof(mdk_thread_t)); |
3107 | init_waitqueue_head(&thread->wqueue); | 3473 | init_waitqueue_head(&thread->wqueue); |
3108 | 3474 | ||
3109 | init_completion(&event); | ||
3110 | thread->event = &event; | ||
3111 | thread->run = run; | 3475 | thread->run = run; |
3112 | thread->mddev = mddev; | 3476 | thread->mddev = mddev; |
3113 | thread->name = name; | ||
3114 | thread->timeout = MAX_SCHEDULE_TIMEOUT; | 3477 | thread->timeout = MAX_SCHEDULE_TIMEOUT; |
3115 | thread->tsk = kthread_run(md_thread, thread, name, mdname(thread->mddev)); | 3478 | thread->tsk = kthread_run(md_thread, thread, name, mdname(thread->mddev)); |
3116 | if (IS_ERR(thread->tsk)) { | 3479 | if (IS_ERR(thread->tsk)) { |
3117 | kfree(thread); | 3480 | kfree(thread); |
3118 | return NULL; | 3481 | return NULL; |
3119 | } | 3482 | } |
3120 | wait_for_completion(&event); | ||
3121 | return thread; | 3483 | return thread; |
3122 | } | 3484 | } |
3123 | 3485 | ||
@@ -3136,7 +3498,7 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
3136 | return; | 3498 | return; |
3137 | } | 3499 | } |
3138 | 3500 | ||
3139 | if (!rdev || rdev->faulty) | 3501 | if (!rdev || test_bit(Faulty, &rdev->flags)) |
3140 | return; | 3502 | return; |
3141 | /* | 3503 | /* |
3142 | dprintk("md_error dev:%s, rdev:(%d:%d), (caller: %p,%p,%p,%p).\n", | 3504 | dprintk("md_error dev:%s, rdev:(%d:%d), (caller: %p,%p,%p,%p).\n", |
@@ -3322,8 +3684,10 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
3322 | seq_printf(seq, "%s : %sactive", mdname(mddev), | 3684 | seq_printf(seq, "%s : %sactive", mdname(mddev), |
3323 | mddev->pers ? "" : "in"); | 3685 | mddev->pers ? "" : "in"); |
3324 | if (mddev->pers) { | 3686 | if (mddev->pers) { |
3325 | if (mddev->ro) | 3687 | if (mddev->ro==1) |
3326 | seq_printf(seq, " (read-only)"); | 3688 | seq_printf(seq, " (read-only)"); |
3689 | if (mddev->ro==2) | ||
3690 | seq_printf(seq, "(auto-read-only)"); | ||
3327 | seq_printf(seq, " %s", mddev->pers->name); | 3691 | seq_printf(seq, " %s", mddev->pers->name); |
3328 | } | 3692 | } |
3329 | 3693 | ||
@@ -3334,7 +3698,7 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
3334 | bdevname(rdev->bdev,b), rdev->desc_nr); | 3698 | bdevname(rdev->bdev,b), rdev->desc_nr); |
3335 | if (test_bit(WriteMostly, &rdev->flags)) | 3699 | if (test_bit(WriteMostly, &rdev->flags)) |
3336 | seq_printf(seq, "(W)"); | 3700 | seq_printf(seq, "(W)"); |
3337 | if (rdev->faulty) { | 3701 | if (test_bit(Faulty, &rdev->flags)) { |
3338 | seq_printf(seq, "(F)"); | 3702 | seq_printf(seq, "(F)"); |
3339 | continue; | 3703 | continue; |
3340 | } else if (rdev->raid_disk < 0) | 3704 | } else if (rdev->raid_disk < 0) |
@@ -3363,11 +3727,15 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
3363 | if (mddev->pers) { | 3727 | if (mddev->pers) { |
3364 | mddev->pers->status (seq, mddev); | 3728 | mddev->pers->status (seq, mddev); |
3365 | seq_printf(seq, "\n "); | 3729 | seq_printf(seq, "\n "); |
3366 | if (mddev->curr_resync > 2) { | 3730 | if (mddev->pers->sync_request) { |
3367 | status_resync (seq, mddev); | 3731 | if (mddev->curr_resync > 2) { |
3368 | seq_printf(seq, "\n "); | 3732 | status_resync (seq, mddev); |
3369 | } else if (mddev->curr_resync == 1 || mddev->curr_resync == 2) | 3733 | seq_printf(seq, "\n "); |
3370 | seq_printf(seq, " resync=DELAYED\n "); | 3734 | } else if (mddev->curr_resync == 1 || mddev->curr_resync == 2) |
3735 | seq_printf(seq, "\tresync=DELAYED\n "); | ||
3736 | else if (mddev->recovery_cp < MaxSector) | ||
3737 | seq_printf(seq, "\tresync=PENDING\n "); | ||
3738 | } | ||
3371 | } else | 3739 | } else |
3372 | seq_printf(seq, "\n "); | 3740 | seq_printf(seq, "\n "); |
3373 | 3741 | ||
@@ -3504,15 +3872,22 @@ void md_write_start(mddev_t *mddev, struct bio *bi) | |||
3504 | if (bio_data_dir(bi) != WRITE) | 3872 | if (bio_data_dir(bi) != WRITE) |
3505 | return; | 3873 | return; |
3506 | 3874 | ||
3875 | BUG_ON(mddev->ro == 1); | ||
3876 | if (mddev->ro == 2) { | ||
3877 | /* need to switch to read/write */ | ||
3878 | mddev->ro = 0; | ||
3879 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | ||
3880 | md_wakeup_thread(mddev->thread); | ||
3881 | } | ||
3507 | atomic_inc(&mddev->writes_pending); | 3882 | atomic_inc(&mddev->writes_pending); |
3508 | if (mddev->in_sync) { | 3883 | if (mddev->in_sync) { |
3509 | spin_lock(&mddev->write_lock); | 3884 | spin_lock_irq(&mddev->write_lock); |
3510 | if (mddev->in_sync) { | 3885 | if (mddev->in_sync) { |
3511 | mddev->in_sync = 0; | 3886 | mddev->in_sync = 0; |
3512 | mddev->sb_dirty = 1; | 3887 | mddev->sb_dirty = 1; |
3513 | md_wakeup_thread(mddev->thread); | 3888 | md_wakeup_thread(mddev->thread); |
3514 | } | 3889 | } |
3515 | spin_unlock(&mddev->write_lock); | 3890 | spin_unlock_irq(&mddev->write_lock); |
3516 | } | 3891 | } |
3517 | wait_event(mddev->sb_wait, mddev->sb_dirty==0); | 3892 | wait_event(mddev->sb_wait, mddev->sb_dirty==0); |
3518 | } | 3893 | } |
@@ -3568,9 +3943,7 @@ static void md_do_sync(mddev_t *mddev) | |||
3568 | mddev->curr_resync = 2; | 3943 | mddev->curr_resync = 2; |
3569 | 3944 | ||
3570 | try_again: | 3945 | try_again: |
3571 | if (signal_pending(current) || | 3946 | if (kthread_should_stop()) { |
3572 | kthread_should_stop()) { | ||
3573 | flush_signals(current); | ||
3574 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | 3947 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); |
3575 | goto skip; | 3948 | goto skip; |
3576 | } | 3949 | } |
@@ -3590,9 +3963,8 @@ static void md_do_sync(mddev_t *mddev) | |||
3590 | * time 'round when curr_resync == 2 | 3963 | * time 'round when curr_resync == 2 |
3591 | */ | 3964 | */ |
3592 | continue; | 3965 | continue; |
3593 | prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE); | 3966 | prepare_to_wait(&resync_wait, &wq, TASK_UNINTERRUPTIBLE); |
3594 | if (!signal_pending(current) && | 3967 | if (!kthread_should_stop() && |
3595 | !kthread_should_stop() && | ||
3596 | mddev2->curr_resync >= mddev->curr_resync) { | 3968 | mddev2->curr_resync >= mddev->curr_resync) { |
3597 | printk(KERN_INFO "md: delaying resync of %s" | 3969 | printk(KERN_INFO "md: delaying resync of %s" |
3598 | " until %s has finished resync (they" | 3970 | " until %s has finished resync (they" |
@@ -3608,12 +3980,13 @@ static void md_do_sync(mddev_t *mddev) | |||
3608 | } | 3980 | } |
3609 | } while (mddev->curr_resync < 2); | 3981 | } while (mddev->curr_resync < 2); |
3610 | 3982 | ||
3611 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) | 3983 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { |
3612 | /* resync follows the size requested by the personality, | 3984 | /* resync follows the size requested by the personality, |
3613 | * which defaults to physical size, but can be virtual size | 3985 | * which defaults to physical size, but can be virtual size |
3614 | */ | 3986 | */ |
3615 | max_sectors = mddev->resync_max_sectors; | 3987 | max_sectors = mddev->resync_max_sectors; |
3616 | else | 3988 | mddev->resync_mismatches = 0; |
3989 | } else | ||
3617 | /* recovery follows the physical size of devices */ | 3990 | /* recovery follows the physical size of devices */ |
3618 | max_sectors = mddev->size << 1; | 3991 | max_sectors = mddev->size << 1; |
3619 | 3992 | ||
@@ -3626,7 +3999,8 @@ static void md_do_sync(mddev_t *mddev) | |||
3626 | 3999 | ||
3627 | is_mddev_idle(mddev); /* this also initializes IO event counters */ | 4000 | is_mddev_idle(mddev); /* this also initializes IO event counters */ |
3628 | /* we don't use the checkpoint if there's a bitmap */ | 4001 | /* we don't use the checkpoint if there's a bitmap */ |
3629 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && !mddev->bitmap) | 4002 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && !mddev->bitmap |
4003 | && ! test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) | ||
3630 | j = mddev->recovery_cp; | 4004 | j = mddev->recovery_cp; |
3631 | else | 4005 | else |
3632 | j = 0; | 4006 | j = 0; |
@@ -3699,13 +4073,12 @@ static void md_do_sync(mddev_t *mddev) | |||
3699 | } | 4073 | } |
3700 | 4074 | ||
3701 | 4075 | ||
3702 | if (signal_pending(current) || kthread_should_stop()) { | 4076 | if (kthread_should_stop()) { |
3703 | /* | 4077 | /* |
3704 | * got a signal, exit. | 4078 | * got a signal, exit. |
3705 | */ | 4079 | */ |
3706 | printk(KERN_INFO | 4080 | printk(KERN_INFO |
3707 | "md: md_do_sync() got signal ... exiting\n"); | 4081 | "md: md_do_sync() got signal ... exiting\n"); |
3708 | flush_signals(current); | ||
3709 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | 4082 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); |
3710 | goto out; | 4083 | goto out; |
3711 | } | 4084 | } |
@@ -3727,7 +4100,7 @@ static void md_do_sync(mddev_t *mddev) | |||
3727 | if (currspeed > sysctl_speed_limit_min) { | 4100 | if (currspeed > sysctl_speed_limit_min) { |
3728 | if ((currspeed > sysctl_speed_limit_max) || | 4101 | if ((currspeed > sysctl_speed_limit_max) || |
3729 | !is_mddev_idle(mddev)) { | 4102 | !is_mddev_idle(mddev)) { |
3730 | msleep_interruptible(250); | 4103 | msleep(250); |
3731 | goto repeat; | 4104 | goto repeat; |
3732 | } | 4105 | } |
3733 | } | 4106 | } |
@@ -3820,7 +4193,7 @@ void md_check_recovery(mddev_t *mddev) | |||
3820 | if (mddev_trylock(mddev)==0) { | 4193 | if (mddev_trylock(mddev)==0) { |
3821 | int spares =0; | 4194 | int spares =0; |
3822 | 4195 | ||
3823 | spin_lock(&mddev->write_lock); | 4196 | spin_lock_irq(&mddev->write_lock); |
3824 | if (mddev->safemode && !atomic_read(&mddev->writes_pending) && | 4197 | if (mddev->safemode && !atomic_read(&mddev->writes_pending) && |
3825 | !mddev->in_sync && mddev->recovery_cp == MaxSector) { | 4198 | !mddev->in_sync && mddev->recovery_cp == MaxSector) { |
3826 | mddev->in_sync = 1; | 4199 | mddev->in_sync = 1; |
@@ -3828,7 +4201,7 @@ void md_check_recovery(mddev_t *mddev) | |||
3828 | } | 4201 | } |
3829 | if (mddev->safemode == 1) | 4202 | if (mddev->safemode == 1) |
3830 | mddev->safemode = 0; | 4203 | mddev->safemode = 0; |
3831 | spin_unlock(&mddev->write_lock); | 4204 | spin_unlock_irq(&mddev->write_lock); |
3832 | 4205 | ||
3833 | if (mddev->sb_dirty) | 4206 | if (mddev->sb_dirty) |
3834 | md_update_sb(mddev); | 4207 | md_update_sb(mddev); |
@@ -3864,9 +4237,13 @@ void md_check_recovery(mddev_t *mddev) | |||
3864 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 4237 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
3865 | goto unlock; | 4238 | goto unlock; |
3866 | } | 4239 | } |
3867 | if (mddev->recovery) | 4240 | /* Clear some bits that don't mean anything, but |
3868 | /* probably just the RECOVERY_NEEDED flag */ | 4241 | * might be left set |
3869 | mddev->recovery = 0; | 4242 | */ |
4243 | clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | ||
4244 | clear_bit(MD_RECOVERY_ERR, &mddev->recovery); | ||
4245 | clear_bit(MD_RECOVERY_INTR, &mddev->recovery); | ||
4246 | clear_bit(MD_RECOVERY_DONE, &mddev->recovery); | ||
3870 | 4247 | ||
3871 | /* no recovery is running. | 4248 | /* no recovery is running. |
3872 | * remove any failed drives, then | 4249 | * remove any failed drives, then |
@@ -3876,31 +4253,41 @@ void md_check_recovery(mddev_t *mddev) | |||
3876 | */ | 4253 | */ |
3877 | ITERATE_RDEV(mddev,rdev,rtmp) | 4254 | ITERATE_RDEV(mddev,rdev,rtmp) |
3878 | if (rdev->raid_disk >= 0 && | 4255 | if (rdev->raid_disk >= 0 && |
3879 | (rdev->faulty || ! rdev->in_sync) && | 4256 | (test_bit(Faulty, &rdev->flags) || ! test_bit(In_sync, &rdev->flags)) && |
3880 | atomic_read(&rdev->nr_pending)==0) { | 4257 | atomic_read(&rdev->nr_pending)==0) { |
3881 | if (mddev->pers->hot_remove_disk(mddev, rdev->raid_disk)==0) | 4258 | if (mddev->pers->hot_remove_disk(mddev, rdev->raid_disk)==0) { |
4259 | char nm[20]; | ||
4260 | sprintf(nm,"rd%d", rdev->raid_disk); | ||
4261 | sysfs_remove_link(&mddev->kobj, nm); | ||
3882 | rdev->raid_disk = -1; | 4262 | rdev->raid_disk = -1; |
4263 | } | ||
3883 | } | 4264 | } |
3884 | 4265 | ||
3885 | if (mddev->degraded) { | 4266 | if (mddev->degraded) { |
3886 | ITERATE_RDEV(mddev,rdev,rtmp) | 4267 | ITERATE_RDEV(mddev,rdev,rtmp) |
3887 | if (rdev->raid_disk < 0 | 4268 | if (rdev->raid_disk < 0 |
3888 | && !rdev->faulty) { | 4269 | && !test_bit(Faulty, &rdev->flags)) { |
3889 | if (mddev->pers->hot_add_disk(mddev,rdev)) | 4270 | if (mddev->pers->hot_add_disk(mddev,rdev)) { |
4271 | char nm[20]; | ||
4272 | sprintf(nm, "rd%d", rdev->raid_disk); | ||
4273 | sysfs_create_link(&mddev->kobj, &rdev->kobj, nm); | ||
3890 | spares++; | 4274 | spares++; |
3891 | else | 4275 | } else |
3892 | break; | 4276 | break; |
3893 | } | 4277 | } |
3894 | } | 4278 | } |
3895 | 4279 | ||
3896 | if (!spares && (mddev->recovery_cp == MaxSector )) { | 4280 | if (spares) { |
3897 | /* nothing we can do ... */ | 4281 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); |
4282 | clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); | ||
4283 | } else if (mddev->recovery_cp < MaxSector) { | ||
4284 | set_bit(MD_RECOVERY_SYNC, &mddev->recovery); | ||
4285 | } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) | ||
4286 | /* nothing to be done ... */ | ||
3898 | goto unlock; | 4287 | goto unlock; |
3899 | } | 4288 | |
3900 | if (mddev->pers->sync_request) { | 4289 | if (mddev->pers->sync_request) { |
3901 | set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); | 4290 | set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); |
3902 | if (!spares) | ||
3903 | set_bit(MD_RECOVERY_SYNC, &mddev->recovery); | ||
3904 | if (spares && mddev->bitmap && ! mddev->bitmap->file) { | 4291 | if (spares && mddev->bitmap && ! mddev->bitmap->file) { |
3905 | /* We are adding a device or devices to an array | 4292 | /* We are adding a device or devices to an array |
3906 | * which has the bitmap stored on all devices. | 4293 | * which has the bitmap stored on all devices. |
@@ -3975,7 +4362,7 @@ static int __init md_init(void) | |||
3975 | " MD_SB_DISKS=%d\n", | 4362 | " MD_SB_DISKS=%d\n", |
3976 | MD_MAJOR_VERSION, MD_MINOR_VERSION, | 4363 | MD_MAJOR_VERSION, MD_MINOR_VERSION, |
3977 | MD_PATCHLEVEL_VERSION, MAX_MD_DEVS, MD_SB_DISKS); | 4364 | MD_PATCHLEVEL_VERSION, MAX_MD_DEVS, MD_SB_DISKS); |
3978 | printk(KERN_INFO "md: bitmap version %d.%d\n", BITMAP_MAJOR, | 4365 | printk(KERN_INFO "md: bitmap version %d.%d\n", BITMAP_MAJOR_HI, |
3979 | BITMAP_MINOR); | 4366 | BITMAP_MINOR); |
3980 | 4367 | ||
3981 | if (register_blkdev(MAJOR_NR, "md")) | 4368 | if (register_blkdev(MAJOR_NR, "md")) |
@@ -4039,7 +4426,7 @@ static void autostart_arrays(int part) | |||
4039 | if (IS_ERR(rdev)) | 4426 | if (IS_ERR(rdev)) |
4040 | continue; | 4427 | continue; |
4041 | 4428 | ||
4042 | if (rdev->faulty) { | 4429 | if (test_bit(Faulty, &rdev->flags)) { |
4043 | MD_BUG(); | 4430 | MD_BUG(); |
4044 | continue; | 4431 | continue; |
4045 | } | 4432 | } |
@@ -4086,6 +4473,23 @@ static __exit void md_exit(void) | |||
4086 | module_init(md_init) | 4473 | module_init(md_init) |
4087 | module_exit(md_exit) | 4474 | module_exit(md_exit) |
4088 | 4475 | ||
4476 | static int get_ro(char *buffer, struct kernel_param *kp) | ||
4477 | { | ||
4478 | return sprintf(buffer, "%d", start_readonly); | ||
4479 | } | ||
4480 | static int set_ro(const char *val, struct kernel_param *kp) | ||
4481 | { | ||
4482 | char *e; | ||
4483 | int num = simple_strtoul(val, &e, 10); | ||
4484 | if (*val && (*e == '\0' || *e == '\n')) { | ||
4485 | start_readonly = num; | ||
4486 | return 0;; | ||
4487 | } | ||
4488 | return -EINVAL; | ||
4489 | } | ||
4490 | |||
4491 | module_param_call(start_ro, set_ro, get_ro, NULL, 0600); | ||
4492 | |||
4089 | EXPORT_SYMBOL(register_md_personality); | 4493 | EXPORT_SYMBOL(register_md_personality); |
4090 | EXPORT_SYMBOL(unregister_md_personality); | 4494 | EXPORT_SYMBOL(unregister_md_personality); |
4091 | EXPORT_SYMBOL(md_error); | 4495 | EXPORT_SYMBOL(md_error); |
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index c06f4474192b..145cdc5ad008 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c | |||
@@ -63,8 +63,8 @@ static int multipath_map (multipath_conf_t *conf) | |||
63 | 63 | ||
64 | rcu_read_lock(); | 64 | rcu_read_lock(); |
65 | for (i = 0; i < disks; i++) { | 65 | for (i = 0; i < disks; i++) { |
66 | mdk_rdev_t *rdev = conf->multipaths[i].rdev; | 66 | mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev); |
67 | if (rdev && rdev->in_sync) { | 67 | if (rdev && test_bit(In_sync, &rdev->flags)) { |
68 | atomic_inc(&rdev->nr_pending); | 68 | atomic_inc(&rdev->nr_pending); |
69 | rcu_read_unlock(); | 69 | rcu_read_unlock(); |
70 | return i; | 70 | return i; |
@@ -139,8 +139,9 @@ static void unplug_slaves(mddev_t *mddev) | |||
139 | 139 | ||
140 | rcu_read_lock(); | 140 | rcu_read_lock(); |
141 | for (i=0; i<mddev->raid_disks; i++) { | 141 | for (i=0; i<mddev->raid_disks; i++) { |
142 | mdk_rdev_t *rdev = conf->multipaths[i].rdev; | 142 | mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev); |
143 | if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) { | 143 | if (rdev && !test_bit(Faulty, &rdev->flags) |
144 | && atomic_read(&rdev->nr_pending)) { | ||
144 | request_queue_t *r_queue = bdev_get_queue(rdev->bdev); | 145 | request_queue_t *r_queue = bdev_get_queue(rdev->bdev); |
145 | 146 | ||
146 | atomic_inc(&rdev->nr_pending); | 147 | atomic_inc(&rdev->nr_pending); |
@@ -211,7 +212,7 @@ static void multipath_status (struct seq_file *seq, mddev_t *mddev) | |||
211 | for (i = 0; i < conf->raid_disks; i++) | 212 | for (i = 0; i < conf->raid_disks; i++) |
212 | seq_printf (seq, "%s", | 213 | seq_printf (seq, "%s", |
213 | conf->multipaths[i].rdev && | 214 | conf->multipaths[i].rdev && |
214 | conf->multipaths[i].rdev->in_sync ? "U" : "_"); | 215 | test_bit(In_sync, &conf->multipaths[i].rdev->flags) ? "U" : "_"); |
215 | seq_printf (seq, "]"); | 216 | seq_printf (seq, "]"); |
216 | } | 217 | } |
217 | 218 | ||
@@ -224,8 +225,8 @@ static int multipath_issue_flush(request_queue_t *q, struct gendisk *disk, | |||
224 | 225 | ||
225 | rcu_read_lock(); | 226 | rcu_read_lock(); |
226 | for (i=0; i<mddev->raid_disks && ret == 0; i++) { | 227 | for (i=0; i<mddev->raid_disks && ret == 0; i++) { |
227 | mdk_rdev_t *rdev = conf->multipaths[i].rdev; | 228 | mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev); |
228 | if (rdev && !rdev->faulty) { | 229 | if (rdev && !test_bit(Faulty, &rdev->flags)) { |
229 | struct block_device *bdev = rdev->bdev; | 230 | struct block_device *bdev = rdev->bdev; |
230 | request_queue_t *r_queue = bdev_get_queue(bdev); | 231 | request_queue_t *r_queue = bdev_get_queue(bdev); |
231 | 232 | ||
@@ -265,10 +266,10 @@ static void multipath_error (mddev_t *mddev, mdk_rdev_t *rdev) | |||
265 | /* | 266 | /* |
266 | * Mark disk as unusable | 267 | * Mark disk as unusable |
267 | */ | 268 | */ |
268 | if (!rdev->faulty) { | 269 | if (!test_bit(Faulty, &rdev->flags)) { |
269 | char b[BDEVNAME_SIZE]; | 270 | char b[BDEVNAME_SIZE]; |
270 | rdev->in_sync = 0; | 271 | clear_bit(In_sync, &rdev->flags); |
271 | rdev->faulty = 1; | 272 | set_bit(Faulty, &rdev->flags); |
272 | mddev->sb_dirty = 1; | 273 | mddev->sb_dirty = 1; |
273 | conf->working_disks--; | 274 | conf->working_disks--; |
274 | printk(KERN_ALERT "multipath: IO failure on %s," | 275 | printk(KERN_ALERT "multipath: IO failure on %s," |
@@ -298,7 +299,7 @@ static void print_multipath_conf (multipath_conf_t *conf) | |||
298 | tmp = conf->multipaths + i; | 299 | tmp = conf->multipaths + i; |
299 | if (tmp->rdev) | 300 | if (tmp->rdev) |
300 | printk(" disk%d, o:%d, dev:%s\n", | 301 | printk(" disk%d, o:%d, dev:%s\n", |
301 | i,!tmp->rdev->faulty, | 302 | i,!test_bit(Faulty, &tmp->rdev->flags), |
302 | bdevname(tmp->rdev->bdev,b)); | 303 | bdevname(tmp->rdev->bdev,b)); |
303 | } | 304 | } |
304 | } | 305 | } |
@@ -330,8 +331,8 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
330 | 331 | ||
331 | conf->working_disks++; | 332 | conf->working_disks++; |
332 | rdev->raid_disk = path; | 333 | rdev->raid_disk = path; |
333 | rdev->in_sync = 1; | 334 | set_bit(In_sync, &rdev->flags); |
334 | p->rdev = rdev; | 335 | rcu_assign_pointer(p->rdev, rdev); |
335 | found = 1; | 336 | found = 1; |
336 | } | 337 | } |
337 | 338 | ||
@@ -350,7 +351,7 @@ static int multipath_remove_disk(mddev_t *mddev, int number) | |||
350 | 351 | ||
351 | rdev = p->rdev; | 352 | rdev = p->rdev; |
352 | if (rdev) { | 353 | if (rdev) { |
353 | if (rdev->in_sync || | 354 | if (test_bit(In_sync, &rdev->flags) || |
354 | atomic_read(&rdev->nr_pending)) { | 355 | atomic_read(&rdev->nr_pending)) { |
355 | printk(KERN_ERR "hot-remove-disk, slot %d is identified" " but is still operational!\n", number); | 356 | printk(KERN_ERR "hot-remove-disk, slot %d is identified" " but is still operational!\n", number); |
356 | err = -EBUSY; | 357 | err = -EBUSY; |
@@ -482,7 +483,7 @@ static int multipath_run (mddev_t *mddev) | |||
482 | mddev->queue->max_sectors > (PAGE_SIZE>>9)) | 483 | mddev->queue->max_sectors > (PAGE_SIZE>>9)) |
483 | blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); | 484 | blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); |
484 | 485 | ||
485 | if (!rdev->faulty) | 486 | if (!test_bit(Faulty, &rdev->flags)) |
486 | conf->working_disks++; | 487 | conf->working_disks++; |
487 | } | 488 | } |
488 | 489 | ||
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index e16f473bcf46..2da9d3ba902d 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -301,7 +301,7 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int | |||
301 | { | 301 | { |
302 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | 302 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); |
303 | r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); | 303 | r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); |
304 | int mirror, behind; | 304 | int mirror, behind = test_bit(R1BIO_BehindIO, &r1_bio->state); |
305 | conf_t *conf = mddev_to_conf(r1_bio->mddev); | 305 | conf_t *conf = mddev_to_conf(r1_bio->mddev); |
306 | 306 | ||
307 | if (bio->bi_size) | 307 | if (bio->bi_size) |
@@ -311,47 +311,54 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int | |||
311 | if (r1_bio->bios[mirror] == bio) | 311 | if (r1_bio->bios[mirror] == bio) |
312 | break; | 312 | break; |
313 | 313 | ||
314 | /* | 314 | if (error == -ENOTSUPP && test_bit(R1BIO_Barrier, &r1_bio->state)) { |
315 | * this branch is our 'one mirror IO has finished' event handler: | 315 | set_bit(BarriersNotsupp, &conf->mirrors[mirror].rdev->flags); |
316 | */ | 316 | set_bit(R1BIO_BarrierRetry, &r1_bio->state); |
317 | if (!uptodate) { | 317 | r1_bio->mddev->barriers_work = 0; |
318 | md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); | 318 | } else { |
319 | /* an I/O failed, we can't clear the bitmap */ | ||
320 | set_bit(R1BIO_Degraded, &r1_bio->state); | ||
321 | } else | ||
322 | /* | 319 | /* |
323 | * Set R1BIO_Uptodate in our master bio, so that | 320 | * this branch is our 'one mirror IO has finished' event handler: |
324 | * we will return a good error code for to the higher | ||
325 | * levels even if IO on some other mirrored buffer fails. | ||
326 | * | ||
327 | * The 'master' represents the composite IO operation to | ||
328 | * user-side. So if something waits for IO, then it will | ||
329 | * wait for the 'master' bio. | ||
330 | */ | 321 | */ |
331 | set_bit(R1BIO_Uptodate, &r1_bio->state); | 322 | r1_bio->bios[mirror] = NULL; |
332 | 323 | bio_put(bio); | |
333 | update_head_pos(mirror, r1_bio); | 324 | if (!uptodate) { |
334 | 325 | md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); | |
335 | behind = test_bit(R1BIO_BehindIO, &r1_bio->state); | 326 | /* an I/O failed, we can't clear the bitmap */ |
336 | if (behind) { | 327 | set_bit(R1BIO_Degraded, &r1_bio->state); |
337 | if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags)) | 328 | } else |
338 | atomic_dec(&r1_bio->behind_remaining); | 329 | /* |
339 | 330 | * Set R1BIO_Uptodate in our master bio, so that | |
340 | /* In behind mode, we ACK the master bio once the I/O has safely | 331 | * we will return a good error code for to the higher |
341 | * reached all non-writemostly disks. Setting the Returned bit | 332 | * levels even if IO on some other mirrored buffer fails. |
342 | * ensures that this gets done only once -- we don't ever want to | 333 | * |
343 | * return -EIO here, instead we'll wait */ | 334 | * The 'master' represents the composite IO operation to |
344 | 335 | * user-side. So if something waits for IO, then it will | |
345 | if (atomic_read(&r1_bio->behind_remaining) >= (atomic_read(&r1_bio->remaining)-1) && | 336 | * wait for the 'master' bio. |
346 | test_bit(R1BIO_Uptodate, &r1_bio->state)) { | 337 | */ |
347 | /* Maybe we can return now */ | 338 | set_bit(R1BIO_Uptodate, &r1_bio->state); |
348 | if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) { | 339 | |
349 | struct bio *mbio = r1_bio->master_bio; | 340 | update_head_pos(mirror, r1_bio); |
350 | PRINTK(KERN_DEBUG "raid1: behind end write sectors %llu-%llu\n", | 341 | |
351 | (unsigned long long) mbio->bi_sector, | 342 | if (behind) { |
352 | (unsigned long long) mbio->bi_sector + | 343 | if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags)) |
353 | (mbio->bi_size >> 9) - 1); | 344 | atomic_dec(&r1_bio->behind_remaining); |
354 | bio_endio(mbio, mbio->bi_size, 0); | 345 | |
346 | /* In behind mode, we ACK the master bio once the I/O has safely | ||
347 | * reached all non-writemostly disks. Setting the Returned bit | ||
348 | * ensures that this gets done only once -- we don't ever want to | ||
349 | * return -EIO here, instead we'll wait */ | ||
350 | |||
351 | if (atomic_read(&r1_bio->behind_remaining) >= (atomic_read(&r1_bio->remaining)-1) && | ||
352 | test_bit(R1BIO_Uptodate, &r1_bio->state)) { | ||
353 | /* Maybe we can return now */ | ||
354 | if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) { | ||
355 | struct bio *mbio = r1_bio->master_bio; | ||
356 | PRINTK(KERN_DEBUG "raid1: behind end write sectors %llu-%llu\n", | ||
357 | (unsigned long long) mbio->bi_sector, | ||
358 | (unsigned long long) mbio->bi_sector + | ||
359 | (mbio->bi_size >> 9) - 1); | ||
360 | bio_endio(mbio, mbio->bi_size, 0); | ||
361 | } | ||
355 | } | 362 | } |
356 | } | 363 | } |
357 | } | 364 | } |
@@ -361,8 +368,16 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int | |||
361 | * already. | 368 | * already. |
362 | */ | 369 | */ |
363 | if (atomic_dec_and_test(&r1_bio->remaining)) { | 370 | if (atomic_dec_and_test(&r1_bio->remaining)) { |
371 | if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) { | ||
372 | reschedule_retry(r1_bio); | ||
373 | /* Don't dec_pending yet, we want to hold | ||
374 | * the reference over the retry | ||
375 | */ | ||
376 | return 0; | ||
377 | } | ||
364 | if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { | 378 | if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { |
365 | /* free extra copy of the data pages */ | 379 | /* free extra copy of the data pages */ |
380 | /* FIXME bio has been freed!!! */ | ||
366 | int i = bio->bi_vcnt; | 381 | int i = bio->bi_vcnt; |
367 | while (i--) | 382 | while (i--) |
368 | __free_page(bio->bi_io_vec[i].bv_page); | 383 | __free_page(bio->bi_io_vec[i].bv_page); |
@@ -416,12 +431,12 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) | |||
416 | /* Choose the first operation device, for consistancy */ | 431 | /* Choose the first operation device, for consistancy */ |
417 | new_disk = 0; | 432 | new_disk = 0; |
418 | 433 | ||
419 | for (rdev = conf->mirrors[new_disk].rdev; | 434 | for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev); |
420 | !rdev || !rdev->in_sync | 435 | !rdev || !test_bit(In_sync, &rdev->flags) |
421 | || test_bit(WriteMostly, &rdev->flags); | 436 | || test_bit(WriteMostly, &rdev->flags); |
422 | rdev = conf->mirrors[++new_disk].rdev) { | 437 | rdev = rcu_dereference(conf->mirrors[++new_disk].rdev)) { |
423 | 438 | ||
424 | if (rdev && rdev->in_sync) | 439 | if (rdev && test_bit(In_sync, &rdev->flags)) |
425 | wonly_disk = new_disk; | 440 | wonly_disk = new_disk; |
426 | 441 | ||
427 | if (new_disk == conf->raid_disks - 1) { | 442 | if (new_disk == conf->raid_disks - 1) { |
@@ -434,12 +449,12 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) | |||
434 | 449 | ||
435 | 450 | ||
436 | /* make sure the disk is operational */ | 451 | /* make sure the disk is operational */ |
437 | for (rdev = conf->mirrors[new_disk].rdev; | 452 | for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev); |
438 | !rdev || !rdev->in_sync || | 453 | !rdev || !test_bit(In_sync, &rdev->flags) || |
439 | test_bit(WriteMostly, &rdev->flags); | 454 | test_bit(WriteMostly, &rdev->flags); |
440 | rdev = conf->mirrors[new_disk].rdev) { | 455 | rdev = rcu_dereference(conf->mirrors[new_disk].rdev)) { |
441 | 456 | ||
442 | if (rdev && rdev->in_sync) | 457 | if (rdev && test_bit(In_sync, &rdev->flags)) |
443 | wonly_disk = new_disk; | 458 | wonly_disk = new_disk; |
444 | 459 | ||
445 | if (new_disk <= 0) | 460 | if (new_disk <= 0) |
@@ -474,10 +489,10 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) | |||
474 | disk = conf->raid_disks; | 489 | disk = conf->raid_disks; |
475 | disk--; | 490 | disk--; |
476 | 491 | ||
477 | rdev = conf->mirrors[disk].rdev; | 492 | rdev = rcu_dereference(conf->mirrors[disk].rdev); |
478 | 493 | ||
479 | if (!rdev || | 494 | if (!rdev || |
480 | !rdev->in_sync || | 495 | !test_bit(In_sync, &rdev->flags) || |
481 | test_bit(WriteMostly, &rdev->flags)) | 496 | test_bit(WriteMostly, &rdev->flags)) |
482 | continue; | 497 | continue; |
483 | 498 | ||
@@ -496,11 +511,11 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) | |||
496 | 511 | ||
497 | 512 | ||
498 | if (new_disk >= 0) { | 513 | if (new_disk >= 0) { |
499 | rdev = conf->mirrors[new_disk].rdev; | 514 | rdev = rcu_dereference(conf->mirrors[new_disk].rdev); |
500 | if (!rdev) | 515 | if (!rdev) |
501 | goto retry; | 516 | goto retry; |
502 | atomic_inc(&rdev->nr_pending); | 517 | atomic_inc(&rdev->nr_pending); |
503 | if (!rdev->in_sync) { | 518 | if (!test_bit(In_sync, &rdev->flags)) { |
504 | /* cannot risk returning a device that failed | 519 | /* cannot risk returning a device that failed |
505 | * before we inc'ed nr_pending | 520 | * before we inc'ed nr_pending |
506 | */ | 521 | */ |
@@ -522,8 +537,8 @@ static void unplug_slaves(mddev_t *mddev) | |||
522 | 537 | ||
523 | rcu_read_lock(); | 538 | rcu_read_lock(); |
524 | for (i=0; i<mddev->raid_disks; i++) { | 539 | for (i=0; i<mddev->raid_disks; i++) { |
525 | mdk_rdev_t *rdev = conf->mirrors[i].rdev; | 540 | mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); |
526 | if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) { | 541 | if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) { |
527 | request_queue_t *r_queue = bdev_get_queue(rdev->bdev); | 542 | request_queue_t *r_queue = bdev_get_queue(rdev->bdev); |
528 | 543 | ||
529 | atomic_inc(&rdev->nr_pending); | 544 | atomic_inc(&rdev->nr_pending); |
@@ -556,8 +571,8 @@ static int raid1_issue_flush(request_queue_t *q, struct gendisk *disk, | |||
556 | 571 | ||
557 | rcu_read_lock(); | 572 | rcu_read_lock(); |
558 | for (i=0; i<mddev->raid_disks && ret == 0; i++) { | 573 | for (i=0; i<mddev->raid_disks && ret == 0; i++) { |
559 | mdk_rdev_t *rdev = conf->mirrors[i].rdev; | 574 | mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); |
560 | if (rdev && !rdev->faulty) { | 575 | if (rdev && !test_bit(Faulty, &rdev->flags)) { |
561 | struct block_device *bdev = rdev->bdev; | 576 | struct block_device *bdev = rdev->bdev; |
562 | request_queue_t *r_queue = bdev_get_queue(bdev); | 577 | request_queue_t *r_queue = bdev_get_queue(bdev); |
563 | 578 | ||
@@ -648,8 +663,9 @@ static int make_request(request_queue_t *q, struct bio * bio) | |||
648 | struct bio_list bl; | 663 | struct bio_list bl; |
649 | struct page **behind_pages = NULL; | 664 | struct page **behind_pages = NULL; |
650 | const int rw = bio_data_dir(bio); | 665 | const int rw = bio_data_dir(bio); |
666 | int do_barriers; | ||
651 | 667 | ||
652 | if (unlikely(bio_barrier(bio))) { | 668 | if (unlikely(!mddev->barriers_work && bio_barrier(bio))) { |
653 | bio_endio(bio, bio->bi_size, -EOPNOTSUPP); | 669 | bio_endio(bio, bio->bi_size, -EOPNOTSUPP); |
654 | return 0; | 670 | return 0; |
655 | } | 671 | } |
@@ -728,10 +744,10 @@ static int make_request(request_queue_t *q, struct bio * bio) | |||
728 | #endif | 744 | #endif |
729 | rcu_read_lock(); | 745 | rcu_read_lock(); |
730 | for (i = 0; i < disks; i++) { | 746 | for (i = 0; i < disks; i++) { |
731 | if ((rdev=conf->mirrors[i].rdev) != NULL && | 747 | if ((rdev=rcu_dereference(conf->mirrors[i].rdev)) != NULL && |
732 | !rdev->faulty) { | 748 | !test_bit(Faulty, &rdev->flags)) { |
733 | atomic_inc(&rdev->nr_pending); | 749 | atomic_inc(&rdev->nr_pending); |
734 | if (rdev->faulty) { | 750 | if (test_bit(Faulty, &rdev->flags)) { |
735 | atomic_dec(&rdev->nr_pending); | 751 | atomic_dec(&rdev->nr_pending); |
736 | r1_bio->bios[i] = NULL; | 752 | r1_bio->bios[i] = NULL; |
737 | } else | 753 | } else |
@@ -759,6 +775,10 @@ static int make_request(request_queue_t *q, struct bio * bio) | |||
759 | atomic_set(&r1_bio->remaining, 0); | 775 | atomic_set(&r1_bio->remaining, 0); |
760 | atomic_set(&r1_bio->behind_remaining, 0); | 776 | atomic_set(&r1_bio->behind_remaining, 0); |
761 | 777 | ||
778 | do_barriers = bio->bi_rw & BIO_RW_BARRIER; | ||
779 | if (do_barriers) | ||
780 | set_bit(R1BIO_Barrier, &r1_bio->state); | ||
781 | |||
762 | bio_list_init(&bl); | 782 | bio_list_init(&bl); |
763 | for (i = 0; i < disks; i++) { | 783 | for (i = 0; i < disks; i++) { |
764 | struct bio *mbio; | 784 | struct bio *mbio; |
@@ -771,7 +791,7 @@ static int make_request(request_queue_t *q, struct bio * bio) | |||
771 | mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; | 791 | mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; |
772 | mbio->bi_bdev = conf->mirrors[i].rdev->bdev; | 792 | mbio->bi_bdev = conf->mirrors[i].rdev->bdev; |
773 | mbio->bi_end_io = raid1_end_write_request; | 793 | mbio->bi_end_io = raid1_end_write_request; |
774 | mbio->bi_rw = WRITE; | 794 | mbio->bi_rw = WRITE | do_barriers; |
775 | mbio->bi_private = r1_bio; | 795 | mbio->bi_private = r1_bio; |
776 | 796 | ||
777 | if (behind_pages) { | 797 | if (behind_pages) { |
@@ -824,7 +844,7 @@ static void status(struct seq_file *seq, mddev_t *mddev) | |||
824 | for (i = 0; i < conf->raid_disks; i++) | 844 | for (i = 0; i < conf->raid_disks; i++) |
825 | seq_printf(seq, "%s", | 845 | seq_printf(seq, "%s", |
826 | conf->mirrors[i].rdev && | 846 | conf->mirrors[i].rdev && |
827 | conf->mirrors[i].rdev->in_sync ? "U" : "_"); | 847 | test_bit(In_sync, &conf->mirrors[i].rdev->flags) ? "U" : "_"); |
828 | seq_printf(seq, "]"); | 848 | seq_printf(seq, "]"); |
829 | } | 849 | } |
830 | 850 | ||
@@ -840,14 +860,14 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
840 | * next level up know. | 860 | * next level up know. |
841 | * else mark the drive as failed | 861 | * else mark the drive as failed |
842 | */ | 862 | */ |
843 | if (rdev->in_sync | 863 | if (test_bit(In_sync, &rdev->flags) |
844 | && conf->working_disks == 1) | 864 | && conf->working_disks == 1) |
845 | /* | 865 | /* |
846 | * Don't fail the drive, act as though we were just a | 866 | * Don't fail the drive, act as though we were just a |
847 | * normal single drive | 867 | * normal single drive |
848 | */ | 868 | */ |
849 | return; | 869 | return; |
850 | if (rdev->in_sync) { | 870 | if (test_bit(In_sync, &rdev->flags)) { |
851 | mddev->degraded++; | 871 | mddev->degraded++; |
852 | conf->working_disks--; | 872 | conf->working_disks--; |
853 | /* | 873 | /* |
@@ -855,8 +875,8 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
855 | */ | 875 | */ |
856 | set_bit(MD_RECOVERY_ERR, &mddev->recovery); | 876 | set_bit(MD_RECOVERY_ERR, &mddev->recovery); |
857 | } | 877 | } |
858 | rdev->in_sync = 0; | 878 | clear_bit(In_sync, &rdev->flags); |
859 | rdev->faulty = 1; | 879 | set_bit(Faulty, &rdev->flags); |
860 | mddev->sb_dirty = 1; | 880 | mddev->sb_dirty = 1; |
861 | printk(KERN_ALERT "raid1: Disk failure on %s, disabling device. \n" | 881 | printk(KERN_ALERT "raid1: Disk failure on %s, disabling device. \n" |
862 | " Operation continuing on %d devices\n", | 882 | " Operation continuing on %d devices\n", |
@@ -881,7 +901,7 @@ static void print_conf(conf_t *conf) | |||
881 | tmp = conf->mirrors + i; | 901 | tmp = conf->mirrors + i; |
882 | if (tmp->rdev) | 902 | if (tmp->rdev) |
883 | printk(" disk %d, wo:%d, o:%d, dev:%s\n", | 903 | printk(" disk %d, wo:%d, o:%d, dev:%s\n", |
884 | i, !tmp->rdev->in_sync, !tmp->rdev->faulty, | 904 | i, !test_bit(In_sync, &tmp->rdev->flags), !test_bit(Faulty, &tmp->rdev->flags), |
885 | bdevname(tmp->rdev->bdev,b)); | 905 | bdevname(tmp->rdev->bdev,b)); |
886 | } | 906 | } |
887 | } | 907 | } |
@@ -913,11 +933,11 @@ static int raid1_spare_active(mddev_t *mddev) | |||
913 | for (i = 0; i < conf->raid_disks; i++) { | 933 | for (i = 0; i < conf->raid_disks; i++) { |
914 | tmp = conf->mirrors + i; | 934 | tmp = conf->mirrors + i; |
915 | if (tmp->rdev | 935 | if (tmp->rdev |
916 | && !tmp->rdev->faulty | 936 | && !test_bit(Faulty, &tmp->rdev->flags) |
917 | && !tmp->rdev->in_sync) { | 937 | && !test_bit(In_sync, &tmp->rdev->flags)) { |
918 | conf->working_disks++; | 938 | conf->working_disks++; |
919 | mddev->degraded--; | 939 | mddev->degraded--; |
920 | tmp->rdev->in_sync = 1; | 940 | set_bit(In_sync, &tmp->rdev->flags); |
921 | } | 941 | } |
922 | } | 942 | } |
923 | 943 | ||
@@ -954,7 +974,7 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
954 | found = 1; | 974 | found = 1; |
955 | if (rdev->saved_raid_disk != mirror) | 975 | if (rdev->saved_raid_disk != mirror) |
956 | conf->fullsync = 1; | 976 | conf->fullsync = 1; |
957 | p->rdev = rdev; | 977 | rcu_assign_pointer(p->rdev, rdev); |
958 | break; | 978 | break; |
959 | } | 979 | } |
960 | 980 | ||
@@ -972,7 +992,7 @@ static int raid1_remove_disk(mddev_t *mddev, int number) | |||
972 | print_conf(conf); | 992 | print_conf(conf); |
973 | rdev = p->rdev; | 993 | rdev = p->rdev; |
974 | if (rdev) { | 994 | if (rdev) { |
975 | if (rdev->in_sync || | 995 | if (test_bit(In_sync, &rdev->flags) || |
976 | atomic_read(&rdev->nr_pending)) { | 996 | atomic_read(&rdev->nr_pending)) { |
977 | err = -EBUSY; | 997 | err = -EBUSY; |
978 | goto abort; | 998 | goto abort; |
@@ -1153,6 +1173,36 @@ static void raid1d(mddev_t *mddev) | |||
1153 | if (test_bit(R1BIO_IsSync, &r1_bio->state)) { | 1173 | if (test_bit(R1BIO_IsSync, &r1_bio->state)) { |
1154 | sync_request_write(mddev, r1_bio); | 1174 | sync_request_write(mddev, r1_bio); |
1155 | unplug = 1; | 1175 | unplug = 1; |
1176 | } else if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) { | ||
1177 | /* some requests in the r1bio were BIO_RW_BARRIER | ||
1178 | * requests which failed with -ENOTSUPP. Hohumm.. | ||
1179 | * Better resubmit without the barrier. | ||
1180 | * We know which devices to resubmit for, because | ||
1181 | * all others have had their bios[] entry cleared. | ||
1182 | */ | ||
1183 | int i; | ||
1184 | clear_bit(R1BIO_BarrierRetry, &r1_bio->state); | ||
1185 | clear_bit(R1BIO_Barrier, &r1_bio->state); | ||
1186 | for (i=0; i < conf->raid_disks; i++) | ||
1187 | if (r1_bio->bios[i]) { | ||
1188 | struct bio_vec *bvec; | ||
1189 | int j; | ||
1190 | |||
1191 | bio = bio_clone(r1_bio->master_bio, GFP_NOIO); | ||
1192 | /* copy pages from the failed bio, as | ||
1193 | * this might be a write-behind device */ | ||
1194 | __bio_for_each_segment(bvec, bio, j, 0) | ||
1195 | bvec->bv_page = bio_iovec_idx(r1_bio->bios[i], j)->bv_page; | ||
1196 | bio_put(r1_bio->bios[i]); | ||
1197 | bio->bi_sector = r1_bio->sector + | ||
1198 | conf->mirrors[i].rdev->data_offset; | ||
1199 | bio->bi_bdev = conf->mirrors[i].rdev->bdev; | ||
1200 | bio->bi_end_io = raid1_end_write_request; | ||
1201 | bio->bi_rw = WRITE; | ||
1202 | bio->bi_private = r1_bio; | ||
1203 | r1_bio->bios[i] = bio; | ||
1204 | generic_make_request(bio); | ||
1205 | } | ||
1156 | } else { | 1206 | } else { |
1157 | int disk; | 1207 | int disk; |
1158 | bio = r1_bio->bios[r1_bio->read_disk]; | 1208 | bio = r1_bio->bios[r1_bio->read_disk]; |
@@ -1260,7 +1310,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1260 | * This call the bitmap_start_sync doesn't actually record anything | 1310 | * This call the bitmap_start_sync doesn't actually record anything |
1261 | */ | 1311 | */ |
1262 | if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && | 1312 | if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && |
1263 | !conf->fullsync) { | 1313 | !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { |
1264 | /* We can skip this block, and probably several more */ | 1314 | /* We can skip this block, and probably several more */ |
1265 | *skipped = 1; | 1315 | *skipped = 1; |
1266 | return sync_blocks; | 1316 | return sync_blocks; |
@@ -1282,11 +1332,11 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1282 | /* make sure disk is operational */ | 1332 | /* make sure disk is operational */ |
1283 | wonly = disk; | 1333 | wonly = disk; |
1284 | while (conf->mirrors[disk].rdev == NULL || | 1334 | while (conf->mirrors[disk].rdev == NULL || |
1285 | !conf->mirrors[disk].rdev->in_sync || | 1335 | !test_bit(In_sync, &conf->mirrors[disk].rdev->flags) || |
1286 | test_bit(WriteMostly, &conf->mirrors[disk].rdev->flags) | 1336 | test_bit(WriteMostly, &conf->mirrors[disk].rdev->flags) |
1287 | ) { | 1337 | ) { |
1288 | if (conf->mirrors[disk].rdev && | 1338 | if (conf->mirrors[disk].rdev && |
1289 | conf->mirrors[disk].rdev->in_sync) | 1339 | test_bit(In_sync, &conf->mirrors[disk].rdev->flags)) |
1290 | wonly = disk; | 1340 | wonly = disk; |
1291 | if (disk <= 0) | 1341 | if (disk <= 0) |
1292 | disk = conf->raid_disks; | 1342 | disk = conf->raid_disks; |
@@ -1333,11 +1383,12 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1333 | bio->bi_rw = READ; | 1383 | bio->bi_rw = READ; |
1334 | bio->bi_end_io = end_sync_read; | 1384 | bio->bi_end_io = end_sync_read; |
1335 | } else if (conf->mirrors[i].rdev == NULL || | 1385 | } else if (conf->mirrors[i].rdev == NULL || |
1336 | conf->mirrors[i].rdev->faulty) { | 1386 | test_bit(Faulty, &conf->mirrors[i].rdev->flags)) { |
1337 | still_degraded = 1; | 1387 | still_degraded = 1; |
1338 | continue; | 1388 | continue; |
1339 | } else if (!conf->mirrors[i].rdev->in_sync || | 1389 | } else if (!test_bit(In_sync, &conf->mirrors[i].rdev->flags) || |
1340 | sector_nr + RESYNC_SECTORS > mddev->recovery_cp) { | 1390 | sector_nr + RESYNC_SECTORS > mddev->recovery_cp || |
1391 | test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { | ||
1341 | bio->bi_rw = WRITE; | 1392 | bio->bi_rw = WRITE; |
1342 | bio->bi_end_io = end_sync_write; | 1393 | bio->bi_end_io = end_sync_write; |
1343 | write_targets ++; | 1394 | write_targets ++; |
@@ -1371,8 +1422,9 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1371 | break; | 1422 | break; |
1372 | if (sync_blocks == 0) { | 1423 | if (sync_blocks == 0) { |
1373 | if (!bitmap_start_sync(mddev->bitmap, sector_nr, | 1424 | if (!bitmap_start_sync(mddev->bitmap, sector_nr, |
1374 | &sync_blocks, still_degraded) && | 1425 | &sync_blocks, still_degraded) && |
1375 | !conf->fullsync) | 1426 | !conf->fullsync && |
1427 | !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) | ||
1376 | break; | 1428 | break; |
1377 | if (sync_blocks < (PAGE_SIZE>>9)) | 1429 | if (sync_blocks < (PAGE_SIZE>>9)) |
1378 | BUG(); | 1430 | BUG(); |
@@ -1478,7 +1530,7 @@ static int run(mddev_t *mddev) | |||
1478 | blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); | 1530 | blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); |
1479 | 1531 | ||
1480 | disk->head_position = 0; | 1532 | disk->head_position = 0; |
1481 | if (!rdev->faulty && rdev->in_sync) | 1533 | if (!test_bit(Faulty, &rdev->flags) && test_bit(In_sync, &rdev->flags)) |
1482 | conf->working_disks++; | 1534 | conf->working_disks++; |
1483 | } | 1535 | } |
1484 | conf->raid_disks = mddev->raid_disks; | 1536 | conf->raid_disks = mddev->raid_disks; |
@@ -1518,7 +1570,7 @@ static int run(mddev_t *mddev) | |||
1518 | */ | 1570 | */ |
1519 | for (j = 0; j < conf->raid_disks && | 1571 | for (j = 0; j < conf->raid_disks && |
1520 | (!conf->mirrors[j].rdev || | 1572 | (!conf->mirrors[j].rdev || |
1521 | !conf->mirrors[j].rdev->in_sync) ; j++) | 1573 | !test_bit(In_sync, &conf->mirrors[j].rdev->flags)) ; j++) |
1522 | /* nothing */; | 1574 | /* nothing */; |
1523 | conf->last_used = j; | 1575 | conf->last_used = j; |
1524 | 1576 | ||
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index bbe40e9cf923..867f06ae33d9 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -496,6 +496,7 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) | |||
496 | int disk, slot, nslot; | 496 | int disk, slot, nslot; |
497 | const int sectors = r10_bio->sectors; | 497 | const int sectors = r10_bio->sectors; |
498 | sector_t new_distance, current_distance; | 498 | sector_t new_distance, current_distance; |
499 | mdk_rdev_t *rdev; | ||
499 | 500 | ||
500 | raid10_find_phys(conf, r10_bio); | 501 | raid10_find_phys(conf, r10_bio); |
501 | rcu_read_lock(); | 502 | rcu_read_lock(); |
@@ -510,8 +511,8 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) | |||
510 | slot = 0; | 511 | slot = 0; |
511 | disk = r10_bio->devs[slot].devnum; | 512 | disk = r10_bio->devs[slot].devnum; |
512 | 513 | ||
513 | while (!conf->mirrors[disk].rdev || | 514 | while ((rdev = rcu_dereference(conf->mirrors[disk].rdev)) == NULL || |
514 | !conf->mirrors[disk].rdev->in_sync) { | 515 | !test_bit(In_sync, &rdev->flags)) { |
515 | slot++; | 516 | slot++; |
516 | if (slot == conf->copies) { | 517 | if (slot == conf->copies) { |
517 | slot = 0; | 518 | slot = 0; |
@@ -527,8 +528,8 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) | |||
527 | /* make sure the disk is operational */ | 528 | /* make sure the disk is operational */ |
528 | slot = 0; | 529 | slot = 0; |
529 | disk = r10_bio->devs[slot].devnum; | 530 | disk = r10_bio->devs[slot].devnum; |
530 | while (!conf->mirrors[disk].rdev || | 531 | while ((rdev=rcu_dereference(conf->mirrors[disk].rdev)) == NULL || |
531 | !conf->mirrors[disk].rdev->in_sync) { | 532 | !test_bit(In_sync, &rdev->flags)) { |
532 | slot ++; | 533 | slot ++; |
533 | if (slot == conf->copies) { | 534 | if (slot == conf->copies) { |
534 | disk = -1; | 535 | disk = -1; |
@@ -547,11 +548,11 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) | |||
547 | int ndisk = r10_bio->devs[nslot].devnum; | 548 | int ndisk = r10_bio->devs[nslot].devnum; |
548 | 549 | ||
549 | 550 | ||
550 | if (!conf->mirrors[ndisk].rdev || | 551 | if ((rdev=rcu_dereference(conf->mirrors[ndisk].rdev)) == NULL || |
551 | !conf->mirrors[ndisk].rdev->in_sync) | 552 | !test_bit(In_sync, &rdev->flags)) |
552 | continue; | 553 | continue; |
553 | 554 | ||
554 | if (!atomic_read(&conf->mirrors[ndisk].rdev->nr_pending)) { | 555 | if (!atomic_read(&rdev->nr_pending)) { |
555 | disk = ndisk; | 556 | disk = ndisk; |
556 | slot = nslot; | 557 | slot = nslot; |
557 | break; | 558 | break; |
@@ -569,7 +570,7 @@ rb_out: | |||
569 | r10_bio->read_slot = slot; | 570 | r10_bio->read_slot = slot; |
570 | /* conf->next_seq_sect = this_sector + sectors;*/ | 571 | /* conf->next_seq_sect = this_sector + sectors;*/ |
571 | 572 | ||
572 | if (disk >= 0 && conf->mirrors[disk].rdev) | 573 | if (disk >= 0 && (rdev=rcu_dereference(conf->mirrors[disk].rdev))!= NULL) |
573 | atomic_inc(&conf->mirrors[disk].rdev->nr_pending); | 574 | atomic_inc(&conf->mirrors[disk].rdev->nr_pending); |
574 | rcu_read_unlock(); | 575 | rcu_read_unlock(); |
575 | 576 | ||
@@ -583,8 +584,8 @@ static void unplug_slaves(mddev_t *mddev) | |||
583 | 584 | ||
584 | rcu_read_lock(); | 585 | rcu_read_lock(); |
585 | for (i=0; i<mddev->raid_disks; i++) { | 586 | for (i=0; i<mddev->raid_disks; i++) { |
586 | mdk_rdev_t *rdev = conf->mirrors[i].rdev; | 587 | mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); |
587 | if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) { | 588 | if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) { |
588 | request_queue_t *r_queue = bdev_get_queue(rdev->bdev); | 589 | request_queue_t *r_queue = bdev_get_queue(rdev->bdev); |
589 | 590 | ||
590 | atomic_inc(&rdev->nr_pending); | 591 | atomic_inc(&rdev->nr_pending); |
@@ -614,8 +615,8 @@ static int raid10_issue_flush(request_queue_t *q, struct gendisk *disk, | |||
614 | 615 | ||
615 | rcu_read_lock(); | 616 | rcu_read_lock(); |
616 | for (i=0; i<mddev->raid_disks && ret == 0; i++) { | 617 | for (i=0; i<mddev->raid_disks && ret == 0; i++) { |
617 | mdk_rdev_t *rdev = conf->mirrors[i].rdev; | 618 | mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); |
618 | if (rdev && !rdev->faulty) { | 619 | if (rdev && !test_bit(Faulty, &rdev->flags)) { |
619 | struct block_device *bdev = rdev->bdev; | 620 | struct block_device *bdev = rdev->bdev; |
620 | request_queue_t *r_queue = bdev_get_queue(bdev); | 621 | request_queue_t *r_queue = bdev_get_queue(bdev); |
621 | 622 | ||
@@ -768,9 +769,10 @@ static int make_request(request_queue_t *q, struct bio * bio) | |||
768 | rcu_read_lock(); | 769 | rcu_read_lock(); |
769 | for (i = 0; i < conf->copies; i++) { | 770 | for (i = 0; i < conf->copies; i++) { |
770 | int d = r10_bio->devs[i].devnum; | 771 | int d = r10_bio->devs[i].devnum; |
771 | if (conf->mirrors[d].rdev && | 772 | mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[d].rdev); |
772 | !conf->mirrors[d].rdev->faulty) { | 773 | if (rdev && |
773 | atomic_inc(&conf->mirrors[d].rdev->nr_pending); | 774 | !test_bit(Faulty, &rdev->flags)) { |
775 | atomic_inc(&rdev->nr_pending); | ||
774 | r10_bio->devs[i].bio = bio; | 776 | r10_bio->devs[i].bio = bio; |
775 | } else | 777 | } else |
776 | r10_bio->devs[i].bio = NULL; | 778 | r10_bio->devs[i].bio = NULL; |
@@ -824,7 +826,7 @@ static void status(struct seq_file *seq, mddev_t *mddev) | |||
824 | for (i = 0; i < conf->raid_disks; i++) | 826 | for (i = 0; i < conf->raid_disks; i++) |
825 | seq_printf(seq, "%s", | 827 | seq_printf(seq, "%s", |
826 | conf->mirrors[i].rdev && | 828 | conf->mirrors[i].rdev && |
827 | conf->mirrors[i].rdev->in_sync ? "U" : "_"); | 829 | test_bit(In_sync, &conf->mirrors[i].rdev->flags) ? "U" : "_"); |
828 | seq_printf(seq, "]"); | 830 | seq_printf(seq, "]"); |
829 | } | 831 | } |
830 | 832 | ||
@@ -839,7 +841,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
839 | * next level up know. | 841 | * next level up know. |
840 | * else mark the drive as failed | 842 | * else mark the drive as failed |
841 | */ | 843 | */ |
842 | if (rdev->in_sync | 844 | if (test_bit(In_sync, &rdev->flags) |
843 | && conf->working_disks == 1) | 845 | && conf->working_disks == 1) |
844 | /* | 846 | /* |
845 | * Don't fail the drive, just return an IO error. | 847 | * Don't fail the drive, just return an IO error. |
@@ -849,7 +851,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
849 | * really dead" tests... | 851 | * really dead" tests... |
850 | */ | 852 | */ |
851 | return; | 853 | return; |
852 | if (rdev->in_sync) { | 854 | if (test_bit(In_sync, &rdev->flags)) { |
853 | mddev->degraded++; | 855 | mddev->degraded++; |
854 | conf->working_disks--; | 856 | conf->working_disks--; |
855 | /* | 857 | /* |
@@ -857,8 +859,8 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
857 | */ | 859 | */ |
858 | set_bit(MD_RECOVERY_ERR, &mddev->recovery); | 860 | set_bit(MD_RECOVERY_ERR, &mddev->recovery); |
859 | } | 861 | } |
860 | rdev->in_sync = 0; | 862 | clear_bit(In_sync, &rdev->flags); |
861 | rdev->faulty = 1; | 863 | set_bit(Faulty, &rdev->flags); |
862 | mddev->sb_dirty = 1; | 864 | mddev->sb_dirty = 1; |
863 | printk(KERN_ALERT "raid10: Disk failure on %s, disabling device. \n" | 865 | printk(KERN_ALERT "raid10: Disk failure on %s, disabling device. \n" |
864 | " Operation continuing on %d devices\n", | 866 | " Operation continuing on %d devices\n", |
@@ -883,7 +885,8 @@ static void print_conf(conf_t *conf) | |||
883 | tmp = conf->mirrors + i; | 885 | tmp = conf->mirrors + i; |
884 | if (tmp->rdev) | 886 | if (tmp->rdev) |
885 | printk(" disk %d, wo:%d, o:%d, dev:%s\n", | 887 | printk(" disk %d, wo:%d, o:%d, dev:%s\n", |
886 | i, !tmp->rdev->in_sync, !tmp->rdev->faulty, | 888 | i, !test_bit(In_sync, &tmp->rdev->flags), |
889 | !test_bit(Faulty, &tmp->rdev->flags), | ||
887 | bdevname(tmp->rdev->bdev,b)); | 890 | bdevname(tmp->rdev->bdev,b)); |
888 | } | 891 | } |
889 | } | 892 | } |
@@ -936,11 +939,11 @@ static int raid10_spare_active(mddev_t *mddev) | |||
936 | for (i = 0; i < conf->raid_disks; i++) { | 939 | for (i = 0; i < conf->raid_disks; i++) { |
937 | tmp = conf->mirrors + i; | 940 | tmp = conf->mirrors + i; |
938 | if (tmp->rdev | 941 | if (tmp->rdev |
939 | && !tmp->rdev->faulty | 942 | && !test_bit(Faulty, &tmp->rdev->flags) |
940 | && !tmp->rdev->in_sync) { | 943 | && !test_bit(In_sync, &tmp->rdev->flags)) { |
941 | conf->working_disks++; | 944 | conf->working_disks++; |
942 | mddev->degraded--; | 945 | mddev->degraded--; |
943 | tmp->rdev->in_sync = 1; | 946 | set_bit(In_sync, &tmp->rdev->flags); |
944 | } | 947 | } |
945 | } | 948 | } |
946 | 949 | ||
@@ -980,7 +983,7 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
980 | p->head_position = 0; | 983 | p->head_position = 0; |
981 | rdev->raid_disk = mirror; | 984 | rdev->raid_disk = mirror; |
982 | found = 1; | 985 | found = 1; |
983 | p->rdev = rdev; | 986 | rcu_assign_pointer(p->rdev, rdev); |
984 | break; | 987 | break; |
985 | } | 988 | } |
986 | 989 | ||
@@ -998,7 +1001,7 @@ static int raid10_remove_disk(mddev_t *mddev, int number) | |||
998 | print_conf(conf); | 1001 | print_conf(conf); |
999 | rdev = p->rdev; | 1002 | rdev = p->rdev; |
1000 | if (rdev) { | 1003 | if (rdev) { |
1001 | if (rdev->in_sync || | 1004 | if (test_bit(In_sync, &rdev->flags) || |
1002 | atomic_read(&rdev->nr_pending)) { | 1005 | atomic_read(&rdev->nr_pending)) { |
1003 | err = -EBUSY; | 1006 | err = -EBUSY; |
1004 | goto abort; | 1007 | goto abort; |
@@ -1414,7 +1417,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1414 | 1417 | ||
1415 | for (i=0 ; i<conf->raid_disks; i++) | 1418 | for (i=0 ; i<conf->raid_disks; i++) |
1416 | if (conf->mirrors[i].rdev && | 1419 | if (conf->mirrors[i].rdev && |
1417 | !conf->mirrors[i].rdev->in_sync) { | 1420 | !test_bit(In_sync, &conf->mirrors[i].rdev->flags)) { |
1418 | /* want to reconstruct this device */ | 1421 | /* want to reconstruct this device */ |
1419 | r10bio_t *rb2 = r10_bio; | 1422 | r10bio_t *rb2 = r10_bio; |
1420 | 1423 | ||
@@ -1435,7 +1438,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1435 | for (j=0; j<conf->copies;j++) { | 1438 | for (j=0; j<conf->copies;j++) { |
1436 | int d = r10_bio->devs[j].devnum; | 1439 | int d = r10_bio->devs[j].devnum; |
1437 | if (conf->mirrors[d].rdev && | 1440 | if (conf->mirrors[d].rdev && |
1438 | conf->mirrors[d].rdev->in_sync) { | 1441 | test_bit(In_sync, &conf->mirrors[d].rdev->flags)) { |
1439 | /* This is where we read from */ | 1442 | /* This is where we read from */ |
1440 | bio = r10_bio->devs[0].bio; | 1443 | bio = r10_bio->devs[0].bio; |
1441 | bio->bi_next = biolist; | 1444 | bio->bi_next = biolist; |
@@ -1511,7 +1514,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1511 | bio = r10_bio->devs[i].bio; | 1514 | bio = r10_bio->devs[i].bio; |
1512 | bio->bi_end_io = NULL; | 1515 | bio->bi_end_io = NULL; |
1513 | if (conf->mirrors[d].rdev == NULL || | 1516 | if (conf->mirrors[d].rdev == NULL || |
1514 | conf->mirrors[d].rdev->faulty) | 1517 | test_bit(Faulty, &conf->mirrors[d].rdev->flags)) |
1515 | continue; | 1518 | continue; |
1516 | atomic_inc(&conf->mirrors[d].rdev->nr_pending); | 1519 | atomic_inc(&conf->mirrors[d].rdev->nr_pending); |
1517 | atomic_inc(&r10_bio->remaining); | 1520 | atomic_inc(&r10_bio->remaining); |
@@ -1697,7 +1700,7 @@ static int run(mddev_t *mddev) | |||
1697 | mddev->queue->max_sectors = (PAGE_SIZE>>9); | 1700 | mddev->queue->max_sectors = (PAGE_SIZE>>9); |
1698 | 1701 | ||
1699 | disk->head_position = 0; | 1702 | disk->head_position = 0; |
1700 | if (!rdev->faulty && rdev->in_sync) | 1703 | if (!test_bit(Faulty, &rdev->flags) && test_bit(In_sync, &rdev->flags)) |
1701 | conf->working_disks++; | 1704 | conf->working_disks++; |
1702 | } | 1705 | } |
1703 | conf->raid_disks = mddev->raid_disks; | 1706 | conf->raid_disks = mddev->raid_disks; |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 1223e98ecd70..e2a40283e323 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -293,9 +293,31 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector | |||
293 | return sh; | 293 | return sh; |
294 | } | 294 | } |
295 | 295 | ||
296 | static int grow_stripes(raid5_conf_t *conf, int num) | 296 | static int grow_one_stripe(raid5_conf_t *conf) |
297 | { | 297 | { |
298 | struct stripe_head *sh; | 298 | struct stripe_head *sh; |
299 | sh = kmem_cache_alloc(conf->slab_cache, GFP_KERNEL); | ||
300 | if (!sh) | ||
301 | return 0; | ||
302 | memset(sh, 0, sizeof(*sh) + (conf->raid_disks-1)*sizeof(struct r5dev)); | ||
303 | sh->raid_conf = conf; | ||
304 | spin_lock_init(&sh->lock); | ||
305 | |||
306 | if (grow_buffers(sh, conf->raid_disks)) { | ||
307 | shrink_buffers(sh, conf->raid_disks); | ||
308 | kmem_cache_free(conf->slab_cache, sh); | ||
309 | return 0; | ||
310 | } | ||
311 | /* we just created an active stripe so... */ | ||
312 | atomic_set(&sh->count, 1); | ||
313 | atomic_inc(&conf->active_stripes); | ||
314 | INIT_LIST_HEAD(&sh->lru); | ||
315 | release_stripe(sh); | ||
316 | return 1; | ||
317 | } | ||
318 | |||
319 | static int grow_stripes(raid5_conf_t *conf, int num) | ||
320 | { | ||
299 | kmem_cache_t *sc; | 321 | kmem_cache_t *sc; |
300 | int devs = conf->raid_disks; | 322 | int devs = conf->raid_disks; |
301 | 323 | ||
@@ -308,48 +330,39 @@ static int grow_stripes(raid5_conf_t *conf, int num) | |||
308 | return 1; | 330 | return 1; |
309 | conf->slab_cache = sc; | 331 | conf->slab_cache = sc; |
310 | while (num--) { | 332 | while (num--) { |
311 | sh = kmem_cache_alloc(sc, GFP_KERNEL); | 333 | if (!grow_one_stripe(conf)) |
312 | if (!sh) | ||
313 | return 1; | ||
314 | memset(sh, 0, sizeof(*sh) + (devs-1)*sizeof(struct r5dev)); | ||
315 | sh->raid_conf = conf; | ||
316 | spin_lock_init(&sh->lock); | ||
317 | |||
318 | if (grow_buffers(sh, conf->raid_disks)) { | ||
319 | shrink_buffers(sh, conf->raid_disks); | ||
320 | kmem_cache_free(sc, sh); | ||
321 | return 1; | 334 | return 1; |
322 | } | ||
323 | /* we just created an active stripe so... */ | ||
324 | atomic_set(&sh->count, 1); | ||
325 | atomic_inc(&conf->active_stripes); | ||
326 | INIT_LIST_HEAD(&sh->lru); | ||
327 | release_stripe(sh); | ||
328 | } | 335 | } |
329 | return 0; | 336 | return 0; |
330 | } | 337 | } |
331 | 338 | ||
332 | static void shrink_stripes(raid5_conf_t *conf) | 339 | static int drop_one_stripe(raid5_conf_t *conf) |
333 | { | 340 | { |
334 | struct stripe_head *sh; | 341 | struct stripe_head *sh; |
335 | 342 | ||
336 | while (1) { | 343 | spin_lock_irq(&conf->device_lock); |
337 | spin_lock_irq(&conf->device_lock); | 344 | sh = get_free_stripe(conf); |
338 | sh = get_free_stripe(conf); | 345 | spin_unlock_irq(&conf->device_lock); |
339 | spin_unlock_irq(&conf->device_lock); | 346 | if (!sh) |
340 | if (!sh) | 347 | return 0; |
341 | break; | 348 | if (atomic_read(&sh->count)) |
342 | if (atomic_read(&sh->count)) | 349 | BUG(); |
343 | BUG(); | 350 | shrink_buffers(sh, conf->raid_disks); |
344 | shrink_buffers(sh, conf->raid_disks); | 351 | kmem_cache_free(conf->slab_cache, sh); |
345 | kmem_cache_free(conf->slab_cache, sh); | 352 | atomic_dec(&conf->active_stripes); |
346 | atomic_dec(&conf->active_stripes); | 353 | return 1; |
347 | } | 354 | } |
355 | |||
356 | static void shrink_stripes(raid5_conf_t *conf) | ||
357 | { | ||
358 | while (drop_one_stripe(conf)) | ||
359 | ; | ||
360 | |||
348 | kmem_cache_destroy(conf->slab_cache); | 361 | kmem_cache_destroy(conf->slab_cache); |
349 | conf->slab_cache = NULL; | 362 | conf->slab_cache = NULL; |
350 | } | 363 | } |
351 | 364 | ||
352 | static int raid5_end_read_request (struct bio * bi, unsigned int bytes_done, | 365 | static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done, |
353 | int error) | 366 | int error) |
354 | { | 367 | { |
355 | struct stripe_head *sh = bi->bi_private; | 368 | struct stripe_head *sh = bi->bi_private; |
@@ -401,10 +414,35 @@ static int raid5_end_read_request (struct bio * bi, unsigned int bytes_done, | |||
401 | } | 414 | } |
402 | #else | 415 | #else |
403 | set_bit(R5_UPTODATE, &sh->dev[i].flags); | 416 | set_bit(R5_UPTODATE, &sh->dev[i].flags); |
404 | #endif | 417 | #endif |
418 | if (test_bit(R5_ReadError, &sh->dev[i].flags)) { | ||
419 | printk("R5: read error corrected!!\n"); | ||
420 | clear_bit(R5_ReadError, &sh->dev[i].flags); | ||
421 | clear_bit(R5_ReWrite, &sh->dev[i].flags); | ||
422 | } | ||
423 | if (atomic_read(&conf->disks[i].rdev->read_errors)) | ||
424 | atomic_set(&conf->disks[i].rdev->read_errors, 0); | ||
405 | } else { | 425 | } else { |
406 | md_error(conf->mddev, conf->disks[i].rdev); | 426 | int retry = 0; |
407 | clear_bit(R5_UPTODATE, &sh->dev[i].flags); | 427 | clear_bit(R5_UPTODATE, &sh->dev[i].flags); |
428 | atomic_inc(&conf->disks[i].rdev->read_errors); | ||
429 | if (conf->mddev->degraded) | ||
430 | printk("R5: read error not correctable.\n"); | ||
431 | else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) | ||
432 | /* Oh, no!!! */ | ||
433 | printk("R5: read error NOT corrected!!\n"); | ||
434 | else if (atomic_read(&conf->disks[i].rdev->read_errors) | ||
435 | > conf->max_nr_stripes) | ||
436 | printk("raid5: Too many read errors, failing device.\n"); | ||
437 | else | ||
438 | retry = 1; | ||
439 | if (retry) | ||
440 | set_bit(R5_ReadError, &sh->dev[i].flags); | ||
441 | else { | ||
442 | clear_bit(R5_ReadError, &sh->dev[i].flags); | ||
443 | clear_bit(R5_ReWrite, &sh->dev[i].flags); | ||
444 | md_error(conf->mddev, conf->disks[i].rdev); | ||
445 | } | ||
408 | } | 446 | } |
409 | rdev_dec_pending(conf->disks[i].rdev, conf->mddev); | 447 | rdev_dec_pending(conf->disks[i].rdev, conf->mddev); |
410 | #if 0 | 448 | #if 0 |
@@ -487,19 +525,19 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
487 | raid5_conf_t *conf = (raid5_conf_t *) mddev->private; | 525 | raid5_conf_t *conf = (raid5_conf_t *) mddev->private; |
488 | PRINTK("raid5: error called\n"); | 526 | PRINTK("raid5: error called\n"); |
489 | 527 | ||
490 | if (!rdev->faulty) { | 528 | if (!test_bit(Faulty, &rdev->flags)) { |
491 | mddev->sb_dirty = 1; | 529 | mddev->sb_dirty = 1; |
492 | if (rdev->in_sync) { | 530 | if (test_bit(In_sync, &rdev->flags)) { |
493 | conf->working_disks--; | 531 | conf->working_disks--; |
494 | mddev->degraded++; | 532 | mddev->degraded++; |
495 | conf->failed_disks++; | 533 | conf->failed_disks++; |
496 | rdev->in_sync = 0; | 534 | clear_bit(In_sync, &rdev->flags); |
497 | /* | 535 | /* |
498 | * if recovery was running, make sure it aborts. | 536 | * if recovery was running, make sure it aborts. |
499 | */ | 537 | */ |
500 | set_bit(MD_RECOVERY_ERR, &mddev->recovery); | 538 | set_bit(MD_RECOVERY_ERR, &mddev->recovery); |
501 | } | 539 | } |
502 | rdev->faulty = 1; | 540 | set_bit(Faulty, &rdev->flags); |
503 | printk (KERN_ALERT | 541 | printk (KERN_ALERT |
504 | "raid5: Disk failure on %s, disabling device." | 542 | "raid5: Disk failure on %s, disabling device." |
505 | " Operation continuing on %d devices\n", | 543 | " Operation continuing on %d devices\n", |
@@ -965,7 +1003,13 @@ static void handle_stripe(struct stripe_head *sh) | |||
965 | } | 1003 | } |
966 | if (dev->written) written++; | 1004 | if (dev->written) written++; |
967 | rdev = conf->disks[i].rdev; /* FIXME, should I be looking rdev */ | 1005 | rdev = conf->disks[i].rdev; /* FIXME, should I be looking rdev */ |
968 | if (!rdev || !rdev->in_sync) { | 1006 | if (!rdev || !test_bit(In_sync, &rdev->flags)) { |
1007 | /* The ReadError flag wil just be confusing now */ | ||
1008 | clear_bit(R5_ReadError, &dev->flags); | ||
1009 | clear_bit(R5_ReWrite, &dev->flags); | ||
1010 | } | ||
1011 | if (!rdev || !test_bit(In_sync, &rdev->flags) | ||
1012 | || test_bit(R5_ReadError, &dev->flags)) { | ||
969 | failed++; | 1013 | failed++; |
970 | failed_num = i; | 1014 | failed_num = i; |
971 | } else | 1015 | } else |
@@ -980,6 +1024,14 @@ static void handle_stripe(struct stripe_head *sh) | |||
980 | if (failed > 1 && to_read+to_write+written) { | 1024 | if (failed > 1 && to_read+to_write+written) { |
981 | for (i=disks; i--; ) { | 1025 | for (i=disks; i--; ) { |
982 | int bitmap_end = 0; | 1026 | int bitmap_end = 0; |
1027 | |||
1028 | if (test_bit(R5_ReadError, &sh->dev[i].flags)) { | ||
1029 | mdk_rdev_t *rdev = conf->disks[i].rdev; | ||
1030 | if (rdev && test_bit(In_sync, &rdev->flags)) | ||
1031 | /* multiple read failures in one stripe */ | ||
1032 | md_error(conf->mddev, rdev); | ||
1033 | } | ||
1034 | |||
983 | spin_lock_irq(&conf->device_lock); | 1035 | spin_lock_irq(&conf->device_lock); |
984 | /* fail all writes first */ | 1036 | /* fail all writes first */ |
985 | bi = sh->dev[i].towrite; | 1037 | bi = sh->dev[i].towrite; |
@@ -1015,7 +1067,8 @@ static void handle_stripe(struct stripe_head *sh) | |||
1015 | } | 1067 | } |
1016 | 1068 | ||
1017 | /* fail any reads if this device is non-operational */ | 1069 | /* fail any reads if this device is non-operational */ |
1018 | if (!test_bit(R5_Insync, &sh->dev[i].flags)) { | 1070 | if (!test_bit(R5_Insync, &sh->dev[i].flags) || |
1071 | test_bit(R5_ReadError, &sh->dev[i].flags)) { | ||
1019 | bi = sh->dev[i].toread; | 1072 | bi = sh->dev[i].toread; |
1020 | sh->dev[i].toread = NULL; | 1073 | sh->dev[i].toread = NULL; |
1021 | if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) | 1074 | if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) |
@@ -1247,6 +1300,11 @@ static void handle_stripe(struct stripe_head *sh) | |||
1247 | !memcmp(pagea, pagea+4, STRIPE_SIZE-4)) { | 1300 | !memcmp(pagea, pagea+4, STRIPE_SIZE-4)) { |
1248 | /* parity is correct (on disc, not in buffer any more) */ | 1301 | /* parity is correct (on disc, not in buffer any more) */ |
1249 | set_bit(STRIPE_INSYNC, &sh->state); | 1302 | set_bit(STRIPE_INSYNC, &sh->state); |
1303 | } else { | ||
1304 | conf->mddev->resync_mismatches += STRIPE_SECTORS; | ||
1305 | if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) | ||
1306 | /* don't try to repair!! */ | ||
1307 | set_bit(STRIPE_INSYNC, &sh->state); | ||
1250 | } | 1308 | } |
1251 | } | 1309 | } |
1252 | if (!test_bit(STRIPE_INSYNC, &sh->state)) { | 1310 | if (!test_bit(STRIPE_INSYNC, &sh->state)) { |
@@ -1274,7 +1332,27 @@ static void handle_stripe(struct stripe_head *sh) | |||
1274 | md_done_sync(conf->mddev, STRIPE_SECTORS,1); | 1332 | md_done_sync(conf->mddev, STRIPE_SECTORS,1); |
1275 | clear_bit(STRIPE_SYNCING, &sh->state); | 1333 | clear_bit(STRIPE_SYNCING, &sh->state); |
1276 | } | 1334 | } |
1277 | 1335 | ||
1336 | /* If the failed drive is just a ReadError, then we might need to progress | ||
1337 | * the repair/check process | ||
1338 | */ | ||
1339 | if (failed == 1 && ! conf->mddev->ro && | ||
1340 | test_bit(R5_ReadError, &sh->dev[failed_num].flags) | ||
1341 | && !test_bit(R5_LOCKED, &sh->dev[failed_num].flags) | ||
1342 | && test_bit(R5_UPTODATE, &sh->dev[failed_num].flags) | ||
1343 | ) { | ||
1344 | dev = &sh->dev[failed_num]; | ||
1345 | if (!test_bit(R5_ReWrite, &dev->flags)) { | ||
1346 | set_bit(R5_Wantwrite, &dev->flags); | ||
1347 | set_bit(R5_ReWrite, &dev->flags); | ||
1348 | set_bit(R5_LOCKED, &dev->flags); | ||
1349 | } else { | ||
1350 | /* let's read it back */ | ||
1351 | set_bit(R5_Wantread, &dev->flags); | ||
1352 | set_bit(R5_LOCKED, &dev->flags); | ||
1353 | } | ||
1354 | } | ||
1355 | |||
1278 | spin_unlock(&sh->lock); | 1356 | spin_unlock(&sh->lock); |
1279 | 1357 | ||
1280 | while ((bi=return_bi)) { | 1358 | while ((bi=return_bi)) { |
@@ -1305,8 +1383,8 @@ static void handle_stripe(struct stripe_head *sh) | |||
1305 | bi->bi_end_io = raid5_end_read_request; | 1383 | bi->bi_end_io = raid5_end_read_request; |
1306 | 1384 | ||
1307 | rcu_read_lock(); | 1385 | rcu_read_lock(); |
1308 | rdev = conf->disks[i].rdev; | 1386 | rdev = rcu_dereference(conf->disks[i].rdev); |
1309 | if (rdev && rdev->faulty) | 1387 | if (rdev && test_bit(Faulty, &rdev->flags)) |
1310 | rdev = NULL; | 1388 | rdev = NULL; |
1311 | if (rdev) | 1389 | if (rdev) |
1312 | atomic_inc(&rdev->nr_pending); | 1390 | atomic_inc(&rdev->nr_pending); |
@@ -1379,8 +1457,8 @@ static void unplug_slaves(mddev_t *mddev) | |||
1379 | 1457 | ||
1380 | rcu_read_lock(); | 1458 | rcu_read_lock(); |
1381 | for (i=0; i<mddev->raid_disks; i++) { | 1459 | for (i=0; i<mddev->raid_disks; i++) { |
1382 | mdk_rdev_t *rdev = conf->disks[i].rdev; | 1460 | mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev); |
1383 | if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) { | 1461 | if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) { |
1384 | request_queue_t *r_queue = bdev_get_queue(rdev->bdev); | 1462 | request_queue_t *r_queue = bdev_get_queue(rdev->bdev); |
1385 | 1463 | ||
1386 | atomic_inc(&rdev->nr_pending); | 1464 | atomic_inc(&rdev->nr_pending); |
@@ -1424,8 +1502,8 @@ static int raid5_issue_flush(request_queue_t *q, struct gendisk *disk, | |||
1424 | 1502 | ||
1425 | rcu_read_lock(); | 1503 | rcu_read_lock(); |
1426 | for (i=0; i<mddev->raid_disks && ret == 0; i++) { | 1504 | for (i=0; i<mddev->raid_disks && ret == 0; i++) { |
1427 | mdk_rdev_t *rdev = conf->disks[i].rdev; | 1505 | mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev); |
1428 | if (rdev && !rdev->faulty) { | 1506 | if (rdev && !test_bit(Faulty, &rdev->flags)) { |
1429 | struct block_device *bdev = rdev->bdev; | 1507 | struct block_device *bdev = rdev->bdev; |
1430 | request_queue_t *r_queue = bdev_get_queue(bdev); | 1508 | request_queue_t *r_queue = bdev_get_queue(bdev); |
1431 | 1509 | ||
@@ -1567,6 +1645,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1567 | return rv; | 1645 | return rv; |
1568 | } | 1646 | } |
1569 | if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && | 1647 | if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && |
1648 | !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && | ||
1570 | !conf->fullsync && sync_blocks >= STRIPE_SECTORS) { | 1649 | !conf->fullsync && sync_blocks >= STRIPE_SECTORS) { |
1571 | /* we can skip this block, and probably more */ | 1650 | /* we can skip this block, and probably more */ |
1572 | sync_blocks /= STRIPE_SECTORS; | 1651 | sync_blocks /= STRIPE_SECTORS; |
@@ -1663,6 +1742,74 @@ static void raid5d (mddev_t *mddev) | |||
1663 | PRINTK("--- raid5d inactive\n"); | 1742 | PRINTK("--- raid5d inactive\n"); |
1664 | } | 1743 | } |
1665 | 1744 | ||
1745 | static ssize_t | ||
1746 | raid5_show_stripe_cache_size(mddev_t *mddev, char *page) | ||
1747 | { | ||
1748 | raid5_conf_t *conf = mddev_to_conf(mddev); | ||
1749 | if (conf) | ||
1750 | return sprintf(page, "%d\n", conf->max_nr_stripes); | ||
1751 | else | ||
1752 | return 0; | ||
1753 | } | ||
1754 | |||
1755 | static ssize_t | ||
1756 | raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len) | ||
1757 | { | ||
1758 | raid5_conf_t *conf = mddev_to_conf(mddev); | ||
1759 | char *end; | ||
1760 | int new; | ||
1761 | if (len >= PAGE_SIZE) | ||
1762 | return -EINVAL; | ||
1763 | if (!conf) | ||
1764 | return -ENODEV; | ||
1765 | |||
1766 | new = simple_strtoul(page, &end, 10); | ||
1767 | if (!*page || (*end && *end != '\n') ) | ||
1768 | return -EINVAL; | ||
1769 | if (new <= 16 || new > 32768) | ||
1770 | return -EINVAL; | ||
1771 | while (new < conf->max_nr_stripes) { | ||
1772 | if (drop_one_stripe(conf)) | ||
1773 | conf->max_nr_stripes--; | ||
1774 | else | ||
1775 | break; | ||
1776 | } | ||
1777 | while (new > conf->max_nr_stripes) { | ||
1778 | if (grow_one_stripe(conf)) | ||
1779 | conf->max_nr_stripes++; | ||
1780 | else break; | ||
1781 | } | ||
1782 | return len; | ||
1783 | } | ||
1784 | |||
1785 | static struct md_sysfs_entry | ||
1786 | raid5_stripecache_size = __ATTR(stripe_cache_size, S_IRUGO | S_IWUSR, | ||
1787 | raid5_show_stripe_cache_size, | ||
1788 | raid5_store_stripe_cache_size); | ||
1789 | |||
1790 | static ssize_t | ||
1791 | stripe_cache_active_show(mddev_t *mddev, char *page) | ||
1792 | { | ||
1793 | raid5_conf_t *conf = mddev_to_conf(mddev); | ||
1794 | if (conf) | ||
1795 | return sprintf(page, "%d\n", atomic_read(&conf->active_stripes)); | ||
1796 | else | ||
1797 | return 0; | ||
1798 | } | ||
1799 | |||
1800 | static struct md_sysfs_entry | ||
1801 | raid5_stripecache_active = __ATTR_RO(stripe_cache_active); | ||
1802 | |||
1803 | static struct attribute *raid5_attrs[] = { | ||
1804 | &raid5_stripecache_size.attr, | ||
1805 | &raid5_stripecache_active.attr, | ||
1806 | NULL, | ||
1807 | }; | ||
1808 | static struct attribute_group raid5_attrs_group = { | ||
1809 | .name = NULL, | ||
1810 | .attrs = raid5_attrs, | ||
1811 | }; | ||
1812 | |||
1666 | static int run(mddev_t *mddev) | 1813 | static int run(mddev_t *mddev) |
1667 | { | 1814 | { |
1668 | raid5_conf_t *conf; | 1815 | raid5_conf_t *conf; |
@@ -1709,7 +1856,7 @@ static int run(mddev_t *mddev) | |||
1709 | 1856 | ||
1710 | disk->rdev = rdev; | 1857 | disk->rdev = rdev; |
1711 | 1858 | ||
1712 | if (rdev->in_sync) { | 1859 | if (test_bit(In_sync, &rdev->flags)) { |
1713 | char b[BDEVNAME_SIZE]; | 1860 | char b[BDEVNAME_SIZE]; |
1714 | printk(KERN_INFO "raid5: device %s operational as raid" | 1861 | printk(KERN_INFO "raid5: device %s operational as raid" |
1715 | " disk %d\n", bdevname(rdev->bdev,b), | 1862 | " disk %d\n", bdevname(rdev->bdev,b), |
@@ -1804,6 +1951,7 @@ memory = conf->max_nr_stripes * (sizeof(struct stripe_head) + | |||
1804 | } | 1951 | } |
1805 | 1952 | ||
1806 | /* Ok, everything is just fine now */ | 1953 | /* Ok, everything is just fine now */ |
1954 | sysfs_create_group(&mddev->kobj, &raid5_attrs_group); | ||
1807 | 1955 | ||
1808 | if (mddev->bitmap) | 1956 | if (mddev->bitmap) |
1809 | mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ; | 1957 | mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ; |
@@ -1828,7 +1976,7 @@ abort: | |||
1828 | 1976 | ||
1829 | 1977 | ||
1830 | 1978 | ||
1831 | static int stop (mddev_t *mddev) | 1979 | static int stop(mddev_t *mddev) |
1832 | { | 1980 | { |
1833 | raid5_conf_t *conf = (raid5_conf_t *) mddev->private; | 1981 | raid5_conf_t *conf = (raid5_conf_t *) mddev->private; |
1834 | 1982 | ||
@@ -1837,6 +1985,7 @@ static int stop (mddev_t *mddev) | |||
1837 | shrink_stripes(conf); | 1985 | shrink_stripes(conf); |
1838 | free_pages((unsigned long) conf->stripe_hashtbl, HASH_PAGES_ORDER); | 1986 | free_pages((unsigned long) conf->stripe_hashtbl, HASH_PAGES_ORDER); |
1839 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | 1987 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ |
1988 | sysfs_remove_group(&mddev->kobj, &raid5_attrs_group); | ||
1840 | kfree(conf); | 1989 | kfree(conf); |
1841 | mddev->private = NULL; | 1990 | mddev->private = NULL; |
1842 | return 0; | 1991 | return 0; |
@@ -1887,7 +2036,7 @@ static void status (struct seq_file *seq, mddev_t *mddev) | |||
1887 | for (i = 0; i < conf->raid_disks; i++) | 2036 | for (i = 0; i < conf->raid_disks; i++) |
1888 | seq_printf (seq, "%s", | 2037 | seq_printf (seq, "%s", |
1889 | conf->disks[i].rdev && | 2038 | conf->disks[i].rdev && |
1890 | conf->disks[i].rdev->in_sync ? "U" : "_"); | 2039 | test_bit(In_sync, &conf->disks[i].rdev->flags) ? "U" : "_"); |
1891 | seq_printf (seq, "]"); | 2040 | seq_printf (seq, "]"); |
1892 | #if RAID5_DEBUG | 2041 | #if RAID5_DEBUG |
1893 | #define D(x) \ | 2042 | #define D(x) \ |
@@ -1914,7 +2063,7 @@ static void print_raid5_conf (raid5_conf_t *conf) | |||
1914 | tmp = conf->disks + i; | 2063 | tmp = conf->disks + i; |
1915 | if (tmp->rdev) | 2064 | if (tmp->rdev) |
1916 | printk(" disk %d, o:%d, dev:%s\n", | 2065 | printk(" disk %d, o:%d, dev:%s\n", |
1917 | i, !tmp->rdev->faulty, | 2066 | i, !test_bit(Faulty, &tmp->rdev->flags), |
1918 | bdevname(tmp->rdev->bdev,b)); | 2067 | bdevname(tmp->rdev->bdev,b)); |
1919 | } | 2068 | } |
1920 | } | 2069 | } |
@@ -1928,12 +2077,12 @@ static int raid5_spare_active(mddev_t *mddev) | |||
1928 | for (i = 0; i < conf->raid_disks; i++) { | 2077 | for (i = 0; i < conf->raid_disks; i++) { |
1929 | tmp = conf->disks + i; | 2078 | tmp = conf->disks + i; |
1930 | if (tmp->rdev | 2079 | if (tmp->rdev |
1931 | && !tmp->rdev->faulty | 2080 | && !test_bit(Faulty, &tmp->rdev->flags) |
1932 | && !tmp->rdev->in_sync) { | 2081 | && !test_bit(In_sync, &tmp->rdev->flags)) { |
1933 | mddev->degraded--; | 2082 | mddev->degraded--; |
1934 | conf->failed_disks--; | 2083 | conf->failed_disks--; |
1935 | conf->working_disks++; | 2084 | conf->working_disks++; |
1936 | tmp->rdev->in_sync = 1; | 2085 | set_bit(In_sync, &tmp->rdev->flags); |
1937 | } | 2086 | } |
1938 | } | 2087 | } |
1939 | print_raid5_conf(conf); | 2088 | print_raid5_conf(conf); |
@@ -1950,7 +2099,7 @@ static int raid5_remove_disk(mddev_t *mddev, int number) | |||
1950 | print_raid5_conf(conf); | 2099 | print_raid5_conf(conf); |
1951 | rdev = p->rdev; | 2100 | rdev = p->rdev; |
1952 | if (rdev) { | 2101 | if (rdev) { |
1953 | if (rdev->in_sync || | 2102 | if (test_bit(In_sync, &rdev->flags) || |
1954 | atomic_read(&rdev->nr_pending)) { | 2103 | atomic_read(&rdev->nr_pending)) { |
1955 | err = -EBUSY; | 2104 | err = -EBUSY; |
1956 | goto abort; | 2105 | goto abort; |
@@ -1985,12 +2134,12 @@ static int raid5_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1985 | */ | 2134 | */ |
1986 | for (disk=0; disk < mddev->raid_disks; disk++) | 2135 | for (disk=0; disk < mddev->raid_disks; disk++) |
1987 | if ((p=conf->disks + disk)->rdev == NULL) { | 2136 | if ((p=conf->disks + disk)->rdev == NULL) { |
1988 | rdev->in_sync = 0; | 2137 | clear_bit(In_sync, &rdev->flags); |
1989 | rdev->raid_disk = disk; | 2138 | rdev->raid_disk = disk; |
1990 | found = 1; | 2139 | found = 1; |
1991 | if (rdev->saved_raid_disk != disk) | 2140 | if (rdev->saved_raid_disk != disk) |
1992 | conf->fullsync = 1; | 2141 | conf->fullsync = 1; |
1993 | p->rdev = rdev; | 2142 | rcu_assign_pointer(p->rdev, rdev); |
1994 | break; | 2143 | break; |
1995 | } | 2144 | } |
1996 | print_raid5_conf(conf); | 2145 | print_raid5_conf(conf); |
diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c index 775786947701..eae5a35629c5 100644 --- a/drivers/md/raid6main.c +++ b/drivers/md/raid6main.c | |||
@@ -507,19 +507,19 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
507 | raid6_conf_t *conf = (raid6_conf_t *) mddev->private; | 507 | raid6_conf_t *conf = (raid6_conf_t *) mddev->private; |
508 | PRINTK("raid6: error called\n"); | 508 | PRINTK("raid6: error called\n"); |
509 | 509 | ||
510 | if (!rdev->faulty) { | 510 | if (!test_bit(Faulty, &rdev->flags)) { |
511 | mddev->sb_dirty = 1; | 511 | mddev->sb_dirty = 1; |
512 | if (rdev->in_sync) { | 512 | if (test_bit(In_sync, &rdev->flags)) { |
513 | conf->working_disks--; | 513 | conf->working_disks--; |
514 | mddev->degraded++; | 514 | mddev->degraded++; |
515 | conf->failed_disks++; | 515 | conf->failed_disks++; |
516 | rdev->in_sync = 0; | 516 | clear_bit(In_sync, &rdev->flags); |
517 | /* | 517 | /* |
518 | * if recovery was running, make sure it aborts. | 518 | * if recovery was running, make sure it aborts. |
519 | */ | 519 | */ |
520 | set_bit(MD_RECOVERY_ERR, &mddev->recovery); | 520 | set_bit(MD_RECOVERY_ERR, &mddev->recovery); |
521 | } | 521 | } |
522 | rdev->faulty = 1; | 522 | set_bit(Faulty, &rdev->flags); |
523 | printk (KERN_ALERT | 523 | printk (KERN_ALERT |
524 | "raid6: Disk failure on %s, disabling device." | 524 | "raid6: Disk failure on %s, disabling device." |
525 | " Operation continuing on %d devices\n", | 525 | " Operation continuing on %d devices\n", |
@@ -1071,7 +1071,7 @@ static void handle_stripe(struct stripe_head *sh) | |||
1071 | } | 1071 | } |
1072 | if (dev->written) written++; | 1072 | if (dev->written) written++; |
1073 | rdev = conf->disks[i].rdev; /* FIXME, should I be looking rdev */ | 1073 | rdev = conf->disks[i].rdev; /* FIXME, should I be looking rdev */ |
1074 | if (!rdev || !rdev->in_sync) { | 1074 | if (!rdev || !test_bit(In_sync, &rdev->flags)) { |
1075 | if ( failed < 2 ) | 1075 | if ( failed < 2 ) |
1076 | failed_num[failed] = i; | 1076 | failed_num[failed] = i; |
1077 | failed++; | 1077 | failed++; |
@@ -1464,8 +1464,8 @@ static void handle_stripe(struct stripe_head *sh) | |||
1464 | bi->bi_end_io = raid6_end_read_request; | 1464 | bi->bi_end_io = raid6_end_read_request; |
1465 | 1465 | ||
1466 | rcu_read_lock(); | 1466 | rcu_read_lock(); |
1467 | rdev = conf->disks[i].rdev; | 1467 | rdev = rcu_dereference(conf->disks[i].rdev); |
1468 | if (rdev && rdev->faulty) | 1468 | if (rdev && test_bit(Faulty, &rdev->flags)) |
1469 | rdev = NULL; | 1469 | rdev = NULL; |
1470 | if (rdev) | 1470 | if (rdev) |
1471 | atomic_inc(&rdev->nr_pending); | 1471 | atomic_inc(&rdev->nr_pending); |
@@ -1538,8 +1538,8 @@ static void unplug_slaves(mddev_t *mddev) | |||
1538 | 1538 | ||
1539 | rcu_read_lock(); | 1539 | rcu_read_lock(); |
1540 | for (i=0; i<mddev->raid_disks; i++) { | 1540 | for (i=0; i<mddev->raid_disks; i++) { |
1541 | mdk_rdev_t *rdev = conf->disks[i].rdev; | 1541 | mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev); |
1542 | if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) { | 1542 | if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) { |
1543 | request_queue_t *r_queue = bdev_get_queue(rdev->bdev); | 1543 | request_queue_t *r_queue = bdev_get_queue(rdev->bdev); |
1544 | 1544 | ||
1545 | atomic_inc(&rdev->nr_pending); | 1545 | atomic_inc(&rdev->nr_pending); |
@@ -1583,8 +1583,8 @@ static int raid6_issue_flush(request_queue_t *q, struct gendisk *disk, | |||
1583 | 1583 | ||
1584 | rcu_read_lock(); | 1584 | rcu_read_lock(); |
1585 | for (i=0; i<mddev->raid_disks && ret == 0; i++) { | 1585 | for (i=0; i<mddev->raid_disks && ret == 0; i++) { |
1586 | mdk_rdev_t *rdev = conf->disks[i].rdev; | 1586 | mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev); |
1587 | if (rdev && !rdev->faulty) { | 1587 | if (rdev && !test_bit(Faulty, &rdev->flags)) { |
1588 | struct block_device *bdev = rdev->bdev; | 1588 | struct block_device *bdev = rdev->bdev; |
1589 | request_queue_t *r_queue = bdev_get_queue(bdev); | 1589 | request_queue_t *r_queue = bdev_get_queue(bdev); |
1590 | 1590 | ||
@@ -1868,7 +1868,7 @@ static int run(mddev_t *mddev) | |||
1868 | 1868 | ||
1869 | disk->rdev = rdev; | 1869 | disk->rdev = rdev; |
1870 | 1870 | ||
1871 | if (rdev->in_sync) { | 1871 | if (test_bit(In_sync, &rdev->flags)) { |
1872 | char b[BDEVNAME_SIZE]; | 1872 | char b[BDEVNAME_SIZE]; |
1873 | printk(KERN_INFO "raid6: device %s operational as raid" | 1873 | printk(KERN_INFO "raid6: device %s operational as raid" |
1874 | " disk %d\n", bdevname(rdev->bdev,b), | 1874 | " disk %d\n", bdevname(rdev->bdev,b), |
@@ -2052,7 +2052,7 @@ static void status (struct seq_file *seq, mddev_t *mddev) | |||
2052 | for (i = 0; i < conf->raid_disks; i++) | 2052 | for (i = 0; i < conf->raid_disks; i++) |
2053 | seq_printf (seq, "%s", | 2053 | seq_printf (seq, "%s", |
2054 | conf->disks[i].rdev && | 2054 | conf->disks[i].rdev && |
2055 | conf->disks[i].rdev->in_sync ? "U" : "_"); | 2055 | test_bit(In_sync, &conf->disks[i].rdev->flags) ? "U" : "_"); |
2056 | seq_printf (seq, "]"); | 2056 | seq_printf (seq, "]"); |
2057 | #if RAID6_DUMPSTATE | 2057 | #if RAID6_DUMPSTATE |
2058 | seq_printf (seq, "\n"); | 2058 | seq_printf (seq, "\n"); |
@@ -2078,7 +2078,7 @@ static void print_raid6_conf (raid6_conf_t *conf) | |||
2078 | tmp = conf->disks + i; | 2078 | tmp = conf->disks + i; |
2079 | if (tmp->rdev) | 2079 | if (tmp->rdev) |
2080 | printk(" disk %d, o:%d, dev:%s\n", | 2080 | printk(" disk %d, o:%d, dev:%s\n", |
2081 | i, !tmp->rdev->faulty, | 2081 | i, !test_bit(Faulty, &tmp->rdev->flags), |
2082 | bdevname(tmp->rdev->bdev,b)); | 2082 | bdevname(tmp->rdev->bdev,b)); |
2083 | } | 2083 | } |
2084 | } | 2084 | } |
@@ -2092,12 +2092,12 @@ static int raid6_spare_active(mddev_t *mddev) | |||
2092 | for (i = 0; i < conf->raid_disks; i++) { | 2092 | for (i = 0; i < conf->raid_disks; i++) { |
2093 | tmp = conf->disks + i; | 2093 | tmp = conf->disks + i; |
2094 | if (tmp->rdev | 2094 | if (tmp->rdev |
2095 | && !tmp->rdev->faulty | 2095 | && !test_bit(Faulty, &tmp->rdev->flags) |
2096 | && !tmp->rdev->in_sync) { | 2096 | && !test_bit(In_sync, &tmp->rdev->flags)) { |
2097 | mddev->degraded--; | 2097 | mddev->degraded--; |
2098 | conf->failed_disks--; | 2098 | conf->failed_disks--; |
2099 | conf->working_disks++; | 2099 | conf->working_disks++; |
2100 | tmp->rdev->in_sync = 1; | 2100 | set_bit(In_sync, &tmp->rdev->flags); |
2101 | } | 2101 | } |
2102 | } | 2102 | } |
2103 | print_raid6_conf(conf); | 2103 | print_raid6_conf(conf); |
@@ -2114,7 +2114,7 @@ static int raid6_remove_disk(mddev_t *mddev, int number) | |||
2114 | print_raid6_conf(conf); | 2114 | print_raid6_conf(conf); |
2115 | rdev = p->rdev; | 2115 | rdev = p->rdev; |
2116 | if (rdev) { | 2116 | if (rdev) { |
2117 | if (rdev->in_sync || | 2117 | if (test_bit(In_sync, &rdev->flags) || |
2118 | atomic_read(&rdev->nr_pending)) { | 2118 | atomic_read(&rdev->nr_pending)) { |
2119 | err = -EBUSY; | 2119 | err = -EBUSY; |
2120 | goto abort; | 2120 | goto abort; |
@@ -2149,12 +2149,12 @@ static int raid6_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
2149 | */ | 2149 | */ |
2150 | for (disk=0; disk < mddev->raid_disks; disk++) | 2150 | for (disk=0; disk < mddev->raid_disks; disk++) |
2151 | if ((p=conf->disks + disk)->rdev == NULL) { | 2151 | if ((p=conf->disks + disk)->rdev == NULL) { |
2152 | rdev->in_sync = 0; | 2152 | clear_bit(In_sync, &rdev->flags); |
2153 | rdev->raid_disk = disk; | 2153 | rdev->raid_disk = disk; |
2154 | found = 1; | 2154 | found = 1; |
2155 | if (rdev->saved_raid_disk != disk) | 2155 | if (rdev->saved_raid_disk != disk) |
2156 | conf->fullsync = 1; | 2156 | conf->fullsync = 1; |
2157 | p->rdev = rdev; | 2157 | rcu_assign_pointer(p->rdev, rdev); |
2158 | break; | 2158 | break; |
2159 | } | 2159 | } |
2160 | print_raid6_conf(conf); | 2160 | print_raid6_conf(conf); |