diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bitmap.c | 17 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 1 | ||||
-rw-r--r-- | drivers/md/md.c | 87 | ||||
-rw-r--r-- | drivers/md/multipath.c | 3 | ||||
-rw-r--r-- | drivers/md/raid1.c | 29 | ||||
-rw-r--r-- | drivers/md/raid10.c | 16 | ||||
-rw-r--r-- | drivers/md/raid5.c | 68 |
7 files changed, 159 insertions, 62 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index c14dacdacfac..b26927ce889c 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c | |||
@@ -203,17 +203,6 @@ static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page) | |||
203 | * bitmap file handling - read and write the bitmap file and its superblock | 203 | * bitmap file handling - read and write the bitmap file and its superblock |
204 | */ | 204 | */ |
205 | 205 | ||
206 | /* copy the pathname of a file to a buffer */ | ||
207 | char *file_path(struct file *file, char *buf, int count) | ||
208 | { | ||
209 | if (!buf) | ||
210 | return NULL; | ||
211 | |||
212 | buf = d_path(&file->f_path, buf, count); | ||
213 | |||
214 | return IS_ERR(buf) ? NULL : buf; | ||
215 | } | ||
216 | |||
217 | /* | 206 | /* |
218 | * basic page I/O operations | 207 | * basic page I/O operations |
219 | */ | 208 | */ |
@@ -721,11 +710,13 @@ static void bitmap_file_kick(struct bitmap *bitmap) | |||
721 | if (bitmap->file) { | 710 | if (bitmap->file) { |
722 | path = kmalloc(PAGE_SIZE, GFP_KERNEL); | 711 | path = kmalloc(PAGE_SIZE, GFP_KERNEL); |
723 | if (path) | 712 | if (path) |
724 | ptr = file_path(bitmap->file, path, PAGE_SIZE); | 713 | ptr = d_path(&bitmap->file->f_path, path, |
714 | PAGE_SIZE); | ||
715 | |||
725 | 716 | ||
726 | printk(KERN_ALERT | 717 | printk(KERN_ALERT |
727 | "%s: kicking failed bitmap file %s from array!\n", | 718 | "%s: kicking failed bitmap file %s from array!\n", |
728 | bmname(bitmap), ptr ? ptr : ""); | 719 | bmname(bitmap), IS_ERR(ptr) ? "" : ptr); |
729 | 720 | ||
730 | kfree(path); | 721 | kfree(path); |
731 | } else | 722 | } else |
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 835def11419d..ab6a61db63ce 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c | |||
@@ -432,6 +432,7 @@ static int crypt_convert(struct crypt_config *cc, | |||
432 | case 0: | 432 | case 0: |
433 | atomic_dec(&ctx->pending); | 433 | atomic_dec(&ctx->pending); |
434 | ctx->sector++; | 434 | ctx->sector++; |
435 | cond_resched(); | ||
435 | continue; | 436 | continue; |
436 | 437 | ||
437 | /* error */ | 438 | /* error */ |
diff --git a/drivers/md/md.c b/drivers/md/md.c index 83eb78b00137..2580ac1b9b0f 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -74,6 +74,8 @@ static DEFINE_SPINLOCK(pers_lock); | |||
74 | 74 | ||
75 | static void md_print_devices(void); | 75 | static void md_print_devices(void); |
76 | 76 | ||
77 | static DECLARE_WAIT_QUEUE_HEAD(resync_wait); | ||
78 | |||
77 | #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } | 79 | #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } |
78 | 80 | ||
79 | /* | 81 | /* |
@@ -274,6 +276,7 @@ static mddev_t * mddev_find(dev_t unit) | |||
274 | atomic_set(&new->active, 1); | 276 | atomic_set(&new->active, 1); |
275 | spin_lock_init(&new->write_lock); | 277 | spin_lock_init(&new->write_lock); |
276 | init_waitqueue_head(&new->sb_wait); | 278 | init_waitqueue_head(&new->sb_wait); |
279 | init_waitqueue_head(&new->recovery_wait); | ||
277 | new->reshape_position = MaxSector; | 280 | new->reshape_position = MaxSector; |
278 | new->resync_max = MaxSector; | 281 | new->resync_max = MaxSector; |
279 | new->level = LEVEL_NONE; | 282 | new->level = LEVEL_NONE; |
@@ -3013,6 +3016,36 @@ degraded_show(mddev_t *mddev, char *page) | |||
3013 | static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded); | 3016 | static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded); |
3014 | 3017 | ||
3015 | static ssize_t | 3018 | static ssize_t |
3019 | sync_force_parallel_show(mddev_t *mddev, char *page) | ||
3020 | { | ||
3021 | return sprintf(page, "%d\n", mddev->parallel_resync); | ||
3022 | } | ||
3023 | |||
3024 | static ssize_t | ||
3025 | sync_force_parallel_store(mddev_t *mddev, const char *buf, size_t len) | ||
3026 | { | ||
3027 | long n; | ||
3028 | |||
3029 | if (strict_strtol(buf, 10, &n)) | ||
3030 | return -EINVAL; | ||
3031 | |||
3032 | if (n != 0 && n != 1) | ||
3033 | return -EINVAL; | ||
3034 | |||
3035 | mddev->parallel_resync = n; | ||
3036 | |||
3037 | if (mddev->sync_thread) | ||
3038 | wake_up(&resync_wait); | ||
3039 | |||
3040 | return len; | ||
3041 | } | ||
3042 | |||
3043 | /* force parallel resync, even with shared block devices */ | ||
3044 | static struct md_sysfs_entry md_sync_force_parallel = | ||
3045 | __ATTR(sync_force_parallel, S_IRUGO|S_IWUSR, | ||
3046 | sync_force_parallel_show, sync_force_parallel_store); | ||
3047 | |||
3048 | static ssize_t | ||
3016 | sync_speed_show(mddev_t *mddev, char *page) | 3049 | sync_speed_show(mddev_t *mddev, char *page) |
3017 | { | 3050 | { |
3018 | unsigned long resync, dt, db; | 3051 | unsigned long resync, dt, db; |
@@ -3187,6 +3220,7 @@ static struct attribute *md_redundancy_attrs[] = { | |||
3187 | &md_sync_min.attr, | 3220 | &md_sync_min.attr, |
3188 | &md_sync_max.attr, | 3221 | &md_sync_max.attr, |
3189 | &md_sync_speed.attr, | 3222 | &md_sync_speed.attr, |
3223 | &md_sync_force_parallel.attr, | ||
3190 | &md_sync_completed.attr, | 3224 | &md_sync_completed.attr, |
3191 | &md_max_sync.attr, | 3225 | &md_max_sync.attr, |
3192 | &md_suspend_lo.attr, | 3226 | &md_suspend_lo.attr, |
@@ -3691,6 +3725,8 @@ static int do_md_stop(mddev_t * mddev, int mode) | |||
3691 | 3725 | ||
3692 | module_put(mddev->pers->owner); | 3726 | module_put(mddev->pers->owner); |
3693 | mddev->pers = NULL; | 3727 | mddev->pers = NULL; |
3728 | /* tell userspace to handle 'inactive' */ | ||
3729 | sysfs_notify(&mddev->kobj, NULL, "array_state"); | ||
3694 | 3730 | ||
3695 | set_capacity(disk, 0); | 3731 | set_capacity(disk, 0); |
3696 | mddev->changed = 1; | 3732 | mddev->changed = 1; |
@@ -3861,8 +3897,10 @@ static void autorun_devices(int part) | |||
3861 | 3897 | ||
3862 | md_probe(dev, NULL, NULL); | 3898 | md_probe(dev, NULL, NULL); |
3863 | mddev = mddev_find(dev); | 3899 | mddev = mddev_find(dev); |
3864 | if (!mddev) { | 3900 | if (!mddev || !mddev->gendisk) { |
3865 | printk(KERN_ERR | 3901 | if (mddev) |
3902 | mddev_put(mddev); | ||
3903 | printk(KERN_ERR | ||
3866 | "md: cannot allocate memory for md drive.\n"); | 3904 | "md: cannot allocate memory for md drive.\n"); |
3867 | break; | 3905 | break; |
3868 | } | 3906 | } |
@@ -3987,8 +4025,8 @@ static int get_bitmap_file(mddev_t * mddev, void __user * arg) | |||
3987 | if (!buf) | 4025 | if (!buf) |
3988 | goto out; | 4026 | goto out; |
3989 | 4027 | ||
3990 | ptr = file_path(mddev->bitmap->file, buf, sizeof(file->pathname)); | 4028 | ptr = d_path(&mddev->bitmap->file->f_path, buf, sizeof(file->pathname)); |
3991 | if (!ptr) | 4029 | if (IS_ERR(ptr)) |
3992 | goto out; | 4030 | goto out; |
3993 | 4031 | ||
3994 | strcpy(file->pathname, ptr); | 4032 | strcpy(file->pathname, ptr); |
@@ -5399,7 +5437,7 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok) | |||
5399 | atomic_sub(blocks, &mddev->recovery_active); | 5437 | atomic_sub(blocks, &mddev->recovery_active); |
5400 | wake_up(&mddev->recovery_wait); | 5438 | wake_up(&mddev->recovery_wait); |
5401 | if (!ok) { | 5439 | if (!ok) { |
5402 | set_bit(MD_RECOVERY_ERR, &mddev->recovery); | 5440 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); |
5403 | md_wakeup_thread(mddev->thread); | 5441 | md_wakeup_thread(mddev->thread); |
5404 | // stop recovery, signal do_sync .... | 5442 | // stop recovery, signal do_sync .... |
5405 | } | 5443 | } |
@@ -5435,8 +5473,11 @@ void md_write_start(mddev_t *mddev, struct bio *bi) | |||
5435 | md_wakeup_thread(mddev->thread); | 5473 | md_wakeup_thread(mddev->thread); |
5436 | } | 5474 | } |
5437 | spin_unlock_irq(&mddev->write_lock); | 5475 | spin_unlock_irq(&mddev->write_lock); |
5476 | sysfs_notify(&mddev->kobj, NULL, "array_state"); | ||
5438 | } | 5477 | } |
5439 | wait_event(mddev->sb_wait, mddev->flags==0); | 5478 | wait_event(mddev->sb_wait, |
5479 | !test_bit(MD_CHANGE_CLEAN, &mddev->flags) && | ||
5480 | !test_bit(MD_CHANGE_PENDING, &mddev->flags)); | ||
5440 | } | 5481 | } |
5441 | 5482 | ||
5442 | void md_write_end(mddev_t *mddev) | 5483 | void md_write_end(mddev_t *mddev) |
@@ -5471,13 +5512,17 @@ void md_allow_write(mddev_t *mddev) | |||
5471 | mddev->safemode = 1; | 5512 | mddev->safemode = 1; |
5472 | spin_unlock_irq(&mddev->write_lock); | 5513 | spin_unlock_irq(&mddev->write_lock); |
5473 | md_update_sb(mddev, 0); | 5514 | md_update_sb(mddev, 0); |
5515 | |||
5516 | sysfs_notify(&mddev->kobj, NULL, "array_state"); | ||
5517 | /* wait for the dirty state to be recorded in the metadata */ | ||
5518 | wait_event(mddev->sb_wait, | ||
5519 | !test_bit(MD_CHANGE_CLEAN, &mddev->flags) && | ||
5520 | !test_bit(MD_CHANGE_PENDING, &mddev->flags)); | ||
5474 | } else | 5521 | } else |
5475 | spin_unlock_irq(&mddev->write_lock); | 5522 | spin_unlock_irq(&mddev->write_lock); |
5476 | } | 5523 | } |
5477 | EXPORT_SYMBOL_GPL(md_allow_write); | 5524 | EXPORT_SYMBOL_GPL(md_allow_write); |
5478 | 5525 | ||
5479 | static DECLARE_WAIT_QUEUE_HEAD(resync_wait); | ||
5480 | |||
5481 | #define SYNC_MARKS 10 | 5526 | #define SYNC_MARKS 10 |
5482 | #define SYNC_MARK_STEP (3*HZ) | 5527 | #define SYNC_MARK_STEP (3*HZ) |
5483 | void md_do_sync(mddev_t *mddev) | 5528 | void md_do_sync(mddev_t *mddev) |
@@ -5541,8 +5586,9 @@ void md_do_sync(mddev_t *mddev) | |||
5541 | for_each_mddev(mddev2, tmp) { | 5586 | for_each_mddev(mddev2, tmp) { |
5542 | if (mddev2 == mddev) | 5587 | if (mddev2 == mddev) |
5543 | continue; | 5588 | continue; |
5544 | if (mddev2->curr_resync && | 5589 | if (!mddev->parallel_resync |
5545 | match_mddev_units(mddev,mddev2)) { | 5590 | && mddev2->curr_resync |
5591 | && match_mddev_units(mddev, mddev2)) { | ||
5546 | DEFINE_WAIT(wq); | 5592 | DEFINE_WAIT(wq); |
5547 | if (mddev < mddev2 && mddev->curr_resync == 2) { | 5593 | if (mddev < mddev2 && mddev->curr_resync == 2) { |
5548 | /* arbitrarily yield */ | 5594 | /* arbitrarily yield */ |
@@ -5622,7 +5668,6 @@ void md_do_sync(mddev_t *mddev) | |||
5622 | window/2,(unsigned long long) max_sectors/2); | 5668 | window/2,(unsigned long long) max_sectors/2); |
5623 | 5669 | ||
5624 | atomic_set(&mddev->recovery_active, 0); | 5670 | atomic_set(&mddev->recovery_active, 0); |
5625 | init_waitqueue_head(&mddev->recovery_wait); | ||
5626 | last_check = 0; | 5671 | last_check = 0; |
5627 | 5672 | ||
5628 | if (j>2) { | 5673 | if (j>2) { |
@@ -5647,7 +5692,7 @@ void md_do_sync(mddev_t *mddev) | |||
5647 | sectors = mddev->pers->sync_request(mddev, j, &skipped, | 5692 | sectors = mddev->pers->sync_request(mddev, j, &skipped, |
5648 | currspeed < speed_min(mddev)); | 5693 | currspeed < speed_min(mddev)); |
5649 | if (sectors == 0) { | 5694 | if (sectors == 0) { |
5650 | set_bit(MD_RECOVERY_ERR, &mddev->recovery); | 5695 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); |
5651 | goto out; | 5696 | goto out; |
5652 | } | 5697 | } |
5653 | 5698 | ||
@@ -5670,8 +5715,7 @@ void md_do_sync(mddev_t *mddev) | |||
5670 | 5715 | ||
5671 | last_check = io_sectors; | 5716 | last_check = io_sectors; |
5672 | 5717 | ||
5673 | if (test_bit(MD_RECOVERY_INTR, &mddev->recovery) || | 5718 | if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) |
5674 | test_bit(MD_RECOVERY_ERR, &mddev->recovery)) | ||
5675 | break; | 5719 | break; |
5676 | 5720 | ||
5677 | repeat: | 5721 | repeat: |
@@ -5725,8 +5769,7 @@ void md_do_sync(mddev_t *mddev) | |||
5725 | /* tell personality that we are finished */ | 5769 | /* tell personality that we are finished */ |
5726 | mddev->pers->sync_request(mddev, max_sectors, &skipped, 1); | 5770 | mddev->pers->sync_request(mddev, max_sectors, &skipped, 1); |
5727 | 5771 | ||
5728 | if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && | 5772 | if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && |
5729 | !test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && | ||
5730 | mddev->curr_resync > 2) { | 5773 | mddev->curr_resync > 2) { |
5731 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { | 5774 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { |
5732 | if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { | 5775 | if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { |
@@ -5795,7 +5838,10 @@ static int remove_and_add_spares(mddev_t *mddev) | |||
5795 | } | 5838 | } |
5796 | 5839 | ||
5797 | if (mddev->degraded) { | 5840 | if (mddev->degraded) { |
5798 | rdev_for_each(rdev, rtmp, mddev) | 5841 | rdev_for_each(rdev, rtmp, mddev) { |
5842 | if (rdev->raid_disk >= 0 && | ||
5843 | !test_bit(In_sync, &rdev->flags)) | ||
5844 | spares++; | ||
5799 | if (rdev->raid_disk < 0 | 5845 | if (rdev->raid_disk < 0 |
5800 | && !test_bit(Faulty, &rdev->flags)) { | 5846 | && !test_bit(Faulty, &rdev->flags)) { |
5801 | rdev->recovery_offset = 0; | 5847 | rdev->recovery_offset = 0; |
@@ -5813,6 +5859,7 @@ static int remove_and_add_spares(mddev_t *mddev) | |||
5813 | } else | 5859 | } else |
5814 | break; | 5860 | break; |
5815 | } | 5861 | } |
5862 | } | ||
5816 | } | 5863 | } |
5817 | return spares; | 5864 | return spares; |
5818 | } | 5865 | } |
@@ -5826,7 +5873,7 @@ static int remove_and_add_spares(mddev_t *mddev) | |||
5826 | * to do that as needed. | 5873 | * to do that as needed. |
5827 | * When it is determined that resync is needed, we set MD_RECOVERY_RUNNING in | 5874 | * When it is determined that resync is needed, we set MD_RECOVERY_RUNNING in |
5828 | * "->recovery" and create a thread at ->sync_thread. | 5875 | * "->recovery" and create a thread at ->sync_thread. |
5829 | * When the thread finishes it sets MD_RECOVERY_DONE (and might set MD_RECOVERY_ERR) | 5876 | * When the thread finishes it sets MD_RECOVERY_DONE |
5830 | * and wakeups up this thread which will reap the thread and finish up. | 5877 | * and wakeups up this thread which will reap the thread and finish up. |
5831 | * This thread also removes any faulty devices (with nr_pending == 0). | 5878 | * This thread also removes any faulty devices (with nr_pending == 0). |
5832 | * | 5879 | * |
@@ -5901,8 +5948,7 @@ void md_check_recovery(mddev_t *mddev) | |||
5901 | /* resync has finished, collect result */ | 5948 | /* resync has finished, collect result */ |
5902 | md_unregister_thread(mddev->sync_thread); | 5949 | md_unregister_thread(mddev->sync_thread); |
5903 | mddev->sync_thread = NULL; | 5950 | mddev->sync_thread = NULL; |
5904 | if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && | 5951 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { |
5905 | !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { | ||
5906 | /* success...*/ | 5952 | /* success...*/ |
5907 | /* activate any spares */ | 5953 | /* activate any spares */ |
5908 | mddev->pers->spare_active(mddev); | 5954 | mddev->pers->spare_active(mddev); |
@@ -5926,7 +5972,6 @@ void md_check_recovery(mddev_t *mddev) | |||
5926 | * might be left set | 5972 | * might be left set |
5927 | */ | 5973 | */ |
5928 | clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 5974 | clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
5929 | clear_bit(MD_RECOVERY_ERR, &mddev->recovery); | ||
5930 | clear_bit(MD_RECOVERY_INTR, &mddev->recovery); | 5975 | clear_bit(MD_RECOVERY_INTR, &mddev->recovery); |
5931 | clear_bit(MD_RECOVERY_DONE, &mddev->recovery); | 5976 | clear_bit(MD_RECOVERY_DONE, &mddev->recovery); |
5932 | 5977 | ||
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 4f4d1f383842..e968116e0de9 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c | |||
@@ -327,7 +327,8 @@ static int multipath_remove_disk(mddev_t *mddev, int number) | |||
327 | if (rdev) { | 327 | if (rdev) { |
328 | if (test_bit(In_sync, &rdev->flags) || | 328 | if (test_bit(In_sync, &rdev->flags) || |
329 | atomic_read(&rdev->nr_pending)) { | 329 | atomic_read(&rdev->nr_pending)) { |
330 | printk(KERN_ERR "hot-remove-disk, slot %d is identified" " but is still operational!\n", number); | 330 | printk(KERN_ERR "hot-remove-disk, slot %d is identified" |
331 | " but is still operational!\n", number); | ||
331 | err = -EBUSY; | 332 | err = -EBUSY; |
332 | goto abort; | 333 | goto abort; |
333 | } | 334 | } |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index ac409b7d83f5..c610b947218a 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -773,7 +773,7 @@ static int make_request(struct request_queue *q, struct bio * bio) | |||
773 | r1bio_t *r1_bio; | 773 | r1bio_t *r1_bio; |
774 | struct bio *read_bio; | 774 | struct bio *read_bio; |
775 | int i, targets = 0, disks; | 775 | int i, targets = 0, disks; |
776 | struct bitmap *bitmap = mddev->bitmap; | 776 | struct bitmap *bitmap; |
777 | unsigned long flags; | 777 | unsigned long flags; |
778 | struct bio_list bl; | 778 | struct bio_list bl; |
779 | struct page **behind_pages = NULL; | 779 | struct page **behind_pages = NULL; |
@@ -802,6 +802,8 @@ static int make_request(struct request_queue *q, struct bio * bio) | |||
802 | 802 | ||
803 | wait_barrier(conf); | 803 | wait_barrier(conf); |
804 | 804 | ||
805 | bitmap = mddev->bitmap; | ||
806 | |||
805 | disk_stat_inc(mddev->gendisk, ios[rw]); | 807 | disk_stat_inc(mddev->gendisk, ios[rw]); |
806 | disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); | 808 | disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); |
807 | 809 | ||
@@ -1025,7 +1027,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1025 | /* | 1027 | /* |
1026 | * if recovery is running, make sure it aborts. | 1028 | * if recovery is running, make sure it aborts. |
1027 | */ | 1029 | */ |
1028 | set_bit(MD_RECOVERY_ERR, &mddev->recovery); | 1030 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); |
1029 | } else | 1031 | } else |
1030 | set_bit(Faulty, &rdev->flags); | 1032 | set_bit(Faulty, &rdev->flags); |
1031 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 1033 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
@@ -1146,6 +1148,14 @@ static int raid1_remove_disk(mddev_t *mddev, int number) | |||
1146 | err = -EBUSY; | 1148 | err = -EBUSY; |
1147 | goto abort; | 1149 | goto abort; |
1148 | } | 1150 | } |
1151 | /* Only remove non-faulty devices is recovery | ||
1152 | * is not possible. | ||
1153 | */ | ||
1154 | if (!test_bit(Faulty, &rdev->flags) && | ||
1155 | mddev->degraded < conf->raid_disks) { | ||
1156 | err = -EBUSY; | ||
1157 | goto abort; | ||
1158 | } | ||
1149 | p->rdev = NULL; | 1159 | p->rdev = NULL; |
1150 | synchronize_rcu(); | 1160 | synchronize_rcu(); |
1151 | if (atomic_read(&rdev->nr_pending)) { | 1161 | if (atomic_read(&rdev->nr_pending)) { |
@@ -1282,6 +1292,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) | |||
1282 | rdev_dec_pending(conf->mirrors[i].rdev, mddev); | 1292 | rdev_dec_pending(conf->mirrors[i].rdev, mddev); |
1283 | } else { | 1293 | } else { |
1284 | /* fixup the bio for reuse */ | 1294 | /* fixup the bio for reuse */ |
1295 | int size; | ||
1285 | sbio->bi_vcnt = vcnt; | 1296 | sbio->bi_vcnt = vcnt; |
1286 | sbio->bi_size = r1_bio->sectors << 9; | 1297 | sbio->bi_size = r1_bio->sectors << 9; |
1287 | sbio->bi_idx = 0; | 1298 | sbio->bi_idx = 0; |
@@ -1295,10 +1306,20 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) | |||
1295 | sbio->bi_sector = r1_bio->sector + | 1306 | sbio->bi_sector = r1_bio->sector + |
1296 | conf->mirrors[i].rdev->data_offset; | 1307 | conf->mirrors[i].rdev->data_offset; |
1297 | sbio->bi_bdev = conf->mirrors[i].rdev->bdev; | 1308 | sbio->bi_bdev = conf->mirrors[i].rdev->bdev; |
1298 | for (j = 0; j < vcnt ; j++) | 1309 | size = sbio->bi_size; |
1299 | memcpy(page_address(sbio->bi_io_vec[j].bv_page), | 1310 | for (j = 0; j < vcnt ; j++) { |
1311 | struct bio_vec *bi; | ||
1312 | bi = &sbio->bi_io_vec[j]; | ||
1313 | bi->bv_offset = 0; | ||
1314 | if (size > PAGE_SIZE) | ||
1315 | bi->bv_len = PAGE_SIZE; | ||
1316 | else | ||
1317 | bi->bv_len = size; | ||
1318 | size -= PAGE_SIZE; | ||
1319 | memcpy(page_address(bi->bv_page), | ||
1300 | page_address(pbio->bi_io_vec[j].bv_page), | 1320 | page_address(pbio->bi_io_vec[j].bv_page), |
1301 | PAGE_SIZE); | 1321 | PAGE_SIZE); |
1322 | } | ||
1302 | 1323 | ||
1303 | } | 1324 | } |
1304 | } | 1325 | } |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 8536ede1e712..a71277b640ab 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -1020,7 +1020,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1020 | /* | 1020 | /* |
1021 | * if recovery is running, make sure it aborts. | 1021 | * if recovery is running, make sure it aborts. |
1022 | */ | 1022 | */ |
1023 | set_bit(MD_RECOVERY_ERR, &mddev->recovery); | 1023 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); |
1024 | } | 1024 | } |
1025 | set_bit(Faulty, &rdev->flags); | 1025 | set_bit(Faulty, &rdev->flags); |
1026 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 1026 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
@@ -1171,6 +1171,14 @@ static int raid10_remove_disk(mddev_t *mddev, int number) | |||
1171 | err = -EBUSY; | 1171 | err = -EBUSY; |
1172 | goto abort; | 1172 | goto abort; |
1173 | } | 1173 | } |
1174 | /* Only remove faulty devices in recovery | ||
1175 | * is not possible. | ||
1176 | */ | ||
1177 | if (!test_bit(Faulty, &rdev->flags) && | ||
1178 | enough(conf)) { | ||
1179 | err = -EBUSY; | ||
1180 | goto abort; | ||
1181 | } | ||
1174 | p->rdev = NULL; | 1182 | p->rdev = NULL; |
1175 | synchronize_rcu(); | 1183 | synchronize_rcu(); |
1176 | if (atomic_read(&rdev->nr_pending)) { | 1184 | if (atomic_read(&rdev->nr_pending)) { |
@@ -1237,6 +1245,7 @@ static void end_sync_write(struct bio *bio, int error) | |||
1237 | 1245 | ||
1238 | if (!uptodate) | 1246 | if (!uptodate) |
1239 | md_error(mddev, conf->mirrors[d].rdev); | 1247 | md_error(mddev, conf->mirrors[d].rdev); |
1248 | |||
1240 | update_head_pos(i, r10_bio); | 1249 | update_head_pos(i, r10_bio); |
1241 | 1250 | ||
1242 | while (atomic_dec_and_test(&r10_bio->remaining)) { | 1251 | while (atomic_dec_and_test(&r10_bio->remaining)) { |
@@ -1844,7 +1853,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1844 | if (rb2) | 1853 | if (rb2) |
1845 | atomic_dec(&rb2->remaining); | 1854 | atomic_dec(&rb2->remaining); |
1846 | r10_bio = rb2; | 1855 | r10_bio = rb2; |
1847 | if (!test_and_set_bit(MD_RECOVERY_ERR, &mddev->recovery)) | 1856 | if (!test_and_set_bit(MD_RECOVERY_INTR, |
1857 | &mddev->recovery)) | ||
1848 | printk(KERN_INFO "raid10: %s: insufficient working devices for recovery.\n", | 1858 | printk(KERN_INFO "raid10: %s: insufficient working devices for recovery.\n", |
1849 | mdname(mddev)); | 1859 | mdname(mddev)); |
1850 | break; | 1860 | break; |
@@ -2127,6 +2137,8 @@ static int run(mddev_t *mddev) | |||
2127 | !test_bit(In_sync, &disk->rdev->flags)) { | 2137 | !test_bit(In_sync, &disk->rdev->flags)) { |
2128 | disk->head_position = 0; | 2138 | disk->head_position = 0; |
2129 | mddev->degraded++; | 2139 | mddev->degraded++; |
2140 | if (disk->rdev) | ||
2141 | conf->fullsync = 1; | ||
2130 | } | 2142 | } |
2131 | } | 2143 | } |
2132 | 2144 | ||
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 93fde48c0f42..3b27df52456b 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -94,6 +94,8 @@ | |||
94 | #define __inline__ | 94 | #define __inline__ |
95 | #endif | 95 | #endif |
96 | 96 | ||
97 | #define printk_rl(args...) ((void) (printk_ratelimit() && printk(args))) | ||
98 | |||
97 | #if !RAID6_USE_EMPTY_ZERO_PAGE | 99 | #if !RAID6_USE_EMPTY_ZERO_PAGE |
98 | /* In .bss so it's zeroed */ | 100 | /* In .bss so it's zeroed */ |
99 | const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); | 101 | const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); |
@@ -1143,10 +1145,12 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
1143 | set_bit(R5_UPTODATE, &sh->dev[i].flags); | 1145 | set_bit(R5_UPTODATE, &sh->dev[i].flags); |
1144 | if (test_bit(R5_ReadError, &sh->dev[i].flags)) { | 1146 | if (test_bit(R5_ReadError, &sh->dev[i].flags)) { |
1145 | rdev = conf->disks[i].rdev; | 1147 | rdev = conf->disks[i].rdev; |
1146 | printk(KERN_INFO "raid5:%s: read error corrected (%lu sectors at %llu on %s)\n", | 1148 | printk_rl(KERN_INFO "raid5:%s: read error corrected" |
1147 | mdname(conf->mddev), STRIPE_SECTORS, | 1149 | " (%lu sectors at %llu on %s)\n", |
1148 | (unsigned long long)(sh->sector + rdev->data_offset), | 1150 | mdname(conf->mddev), STRIPE_SECTORS, |
1149 | bdevname(rdev->bdev, b)); | 1151 | (unsigned long long)(sh->sector |
1152 | + rdev->data_offset), | ||
1153 | bdevname(rdev->bdev, b)); | ||
1150 | clear_bit(R5_ReadError, &sh->dev[i].flags); | 1154 | clear_bit(R5_ReadError, &sh->dev[i].flags); |
1151 | clear_bit(R5_ReWrite, &sh->dev[i].flags); | 1155 | clear_bit(R5_ReWrite, &sh->dev[i].flags); |
1152 | } | 1156 | } |
@@ -1160,16 +1164,22 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
1160 | clear_bit(R5_UPTODATE, &sh->dev[i].flags); | 1164 | clear_bit(R5_UPTODATE, &sh->dev[i].flags); |
1161 | atomic_inc(&rdev->read_errors); | 1165 | atomic_inc(&rdev->read_errors); |
1162 | if (conf->mddev->degraded) | 1166 | if (conf->mddev->degraded) |
1163 | printk(KERN_WARNING "raid5:%s: read error not correctable (sector %llu on %s).\n", | 1167 | printk_rl(KERN_WARNING |
1164 | mdname(conf->mddev), | 1168 | "raid5:%s: read error not correctable " |
1165 | (unsigned long long)(sh->sector + rdev->data_offset), | 1169 | "(sector %llu on %s).\n", |
1166 | bdn); | 1170 | mdname(conf->mddev), |
1171 | (unsigned long long)(sh->sector | ||
1172 | + rdev->data_offset), | ||
1173 | bdn); | ||
1167 | else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) | 1174 | else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) |
1168 | /* Oh, no!!! */ | 1175 | /* Oh, no!!! */ |
1169 | printk(KERN_WARNING "raid5:%s: read error NOT corrected!! (sector %llu on %s).\n", | 1176 | printk_rl(KERN_WARNING |
1170 | mdname(conf->mddev), | 1177 | "raid5:%s: read error NOT corrected!! " |
1171 | (unsigned long long)(sh->sector + rdev->data_offset), | 1178 | "(sector %llu on %s).\n", |
1172 | bdn); | 1179 | mdname(conf->mddev), |
1180 | (unsigned long long)(sh->sector | ||
1181 | + rdev->data_offset), | ||
1182 | bdn); | ||
1173 | else if (atomic_read(&rdev->read_errors) | 1183 | else if (atomic_read(&rdev->read_errors) |
1174 | > conf->max_nr_stripes) | 1184 | > conf->max_nr_stripes) |
1175 | printk(KERN_WARNING | 1185 | printk(KERN_WARNING |
@@ -1258,7 +1268,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1258 | /* | 1268 | /* |
1259 | * if recovery was running, make sure it aborts. | 1269 | * if recovery was running, make sure it aborts. |
1260 | */ | 1270 | */ |
1261 | set_bit(MD_RECOVERY_ERR, &mddev->recovery); | 1271 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); |
1262 | } | 1272 | } |
1263 | set_bit(Faulty, &rdev->flags); | 1273 | set_bit(Faulty, &rdev->flags); |
1264 | printk (KERN_ALERT | 1274 | printk (KERN_ALERT |
@@ -1992,6 +2002,7 @@ static int __handle_issuing_new_read_requests5(struct stripe_head *sh, | |||
1992 | * have quiesced. | 2002 | * have quiesced. |
1993 | */ | 2003 | */ |
1994 | if ((s->uptodate == disks - 1) && | 2004 | if ((s->uptodate == disks - 1) && |
2005 | (s->failed && disk_idx == s->failed_num) && | ||
1995 | !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) { | 2006 | !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) { |
1996 | set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); | 2007 | set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); |
1997 | set_bit(R5_Wantcompute, &dev->flags); | 2008 | set_bit(R5_Wantcompute, &dev->flags); |
@@ -2006,12 +2017,7 @@ static int __handle_issuing_new_read_requests5(struct stripe_head *sh, | |||
2006 | */ | 2017 | */ |
2007 | s->uptodate++; | 2018 | s->uptodate++; |
2008 | return 0; /* uptodate + compute == disks */ | 2019 | return 0; /* uptodate + compute == disks */ |
2009 | } else if ((s->uptodate < disks - 1) && | 2020 | } else if (test_bit(R5_Insync, &dev->flags)) { |
2010 | test_bit(R5_Insync, &dev->flags)) { | ||
2011 | /* Note: we hold off compute operations while checks are | ||
2012 | * in flight, but we still prefer 'compute' over 'read' | ||
2013 | * hence we only read if (uptodate < * disks-1) | ||
2014 | */ | ||
2015 | set_bit(R5_LOCKED, &dev->flags); | 2021 | set_bit(R5_LOCKED, &dev->flags); |
2016 | set_bit(R5_Wantread, &dev->flags); | 2022 | set_bit(R5_Wantread, &dev->flags); |
2017 | if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) | 2023 | if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) |
@@ -2077,7 +2083,9 @@ static void handle_issuing_new_read_requests6(struct stripe_head *sh, | |||
2077 | /* we would like to get this block, possibly | 2083 | /* we would like to get this block, possibly |
2078 | * by computing it, but we might not be able to | 2084 | * by computing it, but we might not be able to |
2079 | */ | 2085 | */ |
2080 | if (s->uptodate == disks-1) { | 2086 | if ((s->uptodate == disks - 1) && |
2087 | (s->failed && (i == r6s->failed_num[0] || | ||
2088 | i == r6s->failed_num[1]))) { | ||
2081 | pr_debug("Computing stripe %llu block %d\n", | 2089 | pr_debug("Computing stripe %llu block %d\n", |
2082 | (unsigned long long)sh->sector, i); | 2090 | (unsigned long long)sh->sector, i); |
2083 | compute_block_1(sh, i, 0); | 2091 | compute_block_1(sh, i, 0); |
@@ -2635,6 +2643,7 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2635 | struct r5dev *dev; | 2643 | struct r5dev *dev; |
2636 | unsigned long pending = 0; | 2644 | unsigned long pending = 0; |
2637 | mdk_rdev_t *blocked_rdev = NULL; | 2645 | mdk_rdev_t *blocked_rdev = NULL; |
2646 | int prexor; | ||
2638 | 2647 | ||
2639 | memset(&s, 0, sizeof(s)); | 2648 | memset(&s, 0, sizeof(s)); |
2640 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d " | 2649 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d " |
@@ -2764,9 +2773,11 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2764 | /* leave prexor set until postxor is done, allows us to distinguish | 2773 | /* leave prexor set until postxor is done, allows us to distinguish |
2765 | * a rmw from a rcw during biodrain | 2774 | * a rmw from a rcw during biodrain |
2766 | */ | 2775 | */ |
2776 | prexor = 0; | ||
2767 | if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) && | 2777 | if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) && |
2768 | test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) { | 2778 | test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) { |
2769 | 2779 | ||
2780 | prexor = 1; | ||
2770 | clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete); | 2781 | clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete); |
2771 | clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack); | 2782 | clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack); |
2772 | clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending); | 2783 | clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending); |
@@ -2800,6 +2811,8 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2800 | if (!test_and_set_bit( | 2811 | if (!test_and_set_bit( |
2801 | STRIPE_OP_IO, &sh->ops.pending)) | 2812 | STRIPE_OP_IO, &sh->ops.pending)) |
2802 | sh->ops.count++; | 2813 | sh->ops.count++; |
2814 | if (prexor) | ||
2815 | continue; | ||
2803 | if (!test_bit(R5_Insync, &dev->flags) || | 2816 | if (!test_bit(R5_Insync, &dev->flags) || |
2804 | (i == sh->pd_idx && s.failed == 0)) | 2817 | (i == sh->pd_idx && s.failed == 0)) |
2805 | set_bit(STRIPE_INSYNC, &sh->state); | 2818 | set_bit(STRIPE_INSYNC, &sh->state); |
@@ -2880,6 +2893,8 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2880 | 2893 | ||
2881 | for (i = conf->raid_disks; i--; ) { | 2894 | for (i = conf->raid_disks; i--; ) { |
2882 | set_bit(R5_Wantwrite, &sh->dev[i].flags); | 2895 | set_bit(R5_Wantwrite, &sh->dev[i].flags); |
2896 | set_bit(R5_LOCKED, &dev->flags); | ||
2897 | s.locked++; | ||
2883 | if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) | 2898 | if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) |
2884 | sh->ops.count++; | 2899 | sh->ops.count++; |
2885 | } | 2900 | } |
@@ -2893,6 +2908,7 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2893 | conf->raid_disks); | 2908 | conf->raid_disks); |
2894 | s.locked += handle_write_operations5(sh, 1, 1); | 2909 | s.locked += handle_write_operations5(sh, 1, 1); |
2895 | } else if (s.expanded && | 2910 | } else if (s.expanded && |
2911 | s.locked == 0 && | ||
2896 | !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) { | 2912 | !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) { |
2897 | clear_bit(STRIPE_EXPAND_READY, &sh->state); | 2913 | clear_bit(STRIPE_EXPAND_READY, &sh->state); |
2898 | atomic_dec(&conf->reshape_stripes); | 2914 | atomic_dec(&conf->reshape_stripes); |
@@ -4287,7 +4303,9 @@ static int run(mddev_t *mddev) | |||
4287 | " disk %d\n", bdevname(rdev->bdev,b), | 4303 | " disk %d\n", bdevname(rdev->bdev,b), |
4288 | raid_disk); | 4304 | raid_disk); |
4289 | working_disks++; | 4305 | working_disks++; |
4290 | } | 4306 | } else |
4307 | /* Cannot rely on bitmap to complete recovery */ | ||
4308 | conf->fullsync = 1; | ||
4291 | } | 4309 | } |
4292 | 4310 | ||
4293 | /* | 4311 | /* |
@@ -4564,6 +4582,14 @@ static int raid5_remove_disk(mddev_t *mddev, int number) | |||
4564 | err = -EBUSY; | 4582 | err = -EBUSY; |
4565 | goto abort; | 4583 | goto abort; |
4566 | } | 4584 | } |
4585 | /* Only remove non-faulty devices if recovery | ||
4586 | * isn't possible. | ||
4587 | */ | ||
4588 | if (!test_bit(Faulty, &rdev->flags) && | ||
4589 | mddev->degraded <= conf->max_degraded) { | ||
4590 | err = -EBUSY; | ||
4591 | goto abort; | ||
4592 | } | ||
4567 | p->rdev = NULL; | 4593 | p->rdev = NULL; |
4568 | synchronize_rcu(); | 4594 | synchronize_rcu(); |
4569 | if (atomic_read(&rdev->nr_pending)) { | 4595 | if (atomic_read(&rdev->nr_pending)) { |