aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bitmap.c17
-rw-r--r--drivers/md/dm-crypt.c1
-rw-r--r--drivers/md/md.c87
-rw-r--r--drivers/md/multipath.c3
-rw-r--r--drivers/md/raid1.c29
-rw-r--r--drivers/md/raid10.c16
-rw-r--r--drivers/md/raid5.c68
7 files changed, 159 insertions, 62 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index c14dacdacfac..b26927ce889c 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -203,17 +203,6 @@ static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page)
203 * bitmap file handling - read and write the bitmap file and its superblock 203 * bitmap file handling - read and write the bitmap file and its superblock
204 */ 204 */
205 205
206/* copy the pathname of a file to a buffer */
207char *file_path(struct file *file, char *buf, int count)
208{
209 if (!buf)
210 return NULL;
211
212 buf = d_path(&file->f_path, buf, count);
213
214 return IS_ERR(buf) ? NULL : buf;
215}
216
217/* 206/*
218 * basic page I/O operations 207 * basic page I/O operations
219 */ 208 */
@@ -721,11 +710,13 @@ static void bitmap_file_kick(struct bitmap *bitmap)
721 if (bitmap->file) { 710 if (bitmap->file) {
722 path = kmalloc(PAGE_SIZE, GFP_KERNEL); 711 path = kmalloc(PAGE_SIZE, GFP_KERNEL);
723 if (path) 712 if (path)
724 ptr = file_path(bitmap->file, path, PAGE_SIZE); 713 ptr = d_path(&bitmap->file->f_path, path,
714 PAGE_SIZE);
715
725 716
726 printk(KERN_ALERT 717 printk(KERN_ALERT
727 "%s: kicking failed bitmap file %s from array!\n", 718 "%s: kicking failed bitmap file %s from array!\n",
728 bmname(bitmap), ptr ? ptr : ""); 719 bmname(bitmap), IS_ERR(ptr) ? "" : ptr);
729 720
730 kfree(path); 721 kfree(path);
731 } else 722 } else
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 835def11419d..ab6a61db63ce 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -432,6 +432,7 @@ static int crypt_convert(struct crypt_config *cc,
432 case 0: 432 case 0:
433 atomic_dec(&ctx->pending); 433 atomic_dec(&ctx->pending);
434 ctx->sector++; 434 ctx->sector++;
435 cond_resched();
435 continue; 436 continue;
436 437
437 /* error */ 438 /* error */
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 83eb78b00137..2580ac1b9b0f 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -74,6 +74,8 @@ static DEFINE_SPINLOCK(pers_lock);
74 74
75static void md_print_devices(void); 75static void md_print_devices(void);
76 76
77static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
78
77#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } 79#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
78 80
79/* 81/*
@@ -274,6 +276,7 @@ static mddev_t * mddev_find(dev_t unit)
274 atomic_set(&new->active, 1); 276 atomic_set(&new->active, 1);
275 spin_lock_init(&new->write_lock); 277 spin_lock_init(&new->write_lock);
276 init_waitqueue_head(&new->sb_wait); 278 init_waitqueue_head(&new->sb_wait);
279 init_waitqueue_head(&new->recovery_wait);
277 new->reshape_position = MaxSector; 280 new->reshape_position = MaxSector;
278 new->resync_max = MaxSector; 281 new->resync_max = MaxSector;
279 new->level = LEVEL_NONE; 282 new->level = LEVEL_NONE;
@@ -3013,6 +3016,36 @@ degraded_show(mddev_t *mddev, char *page)
3013static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded); 3016static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
3014 3017
3015static ssize_t 3018static ssize_t
3019sync_force_parallel_show(mddev_t *mddev, char *page)
3020{
3021 return sprintf(page, "%d\n", mddev->parallel_resync);
3022}
3023
3024static ssize_t
3025sync_force_parallel_store(mddev_t *mddev, const char *buf, size_t len)
3026{
3027 long n;
3028
3029 if (strict_strtol(buf, 10, &n))
3030 return -EINVAL;
3031
3032 if (n != 0 && n != 1)
3033 return -EINVAL;
3034
3035 mddev->parallel_resync = n;
3036
3037 if (mddev->sync_thread)
3038 wake_up(&resync_wait);
3039
3040 return len;
3041}
3042
3043/* force parallel resync, even with shared block devices */
3044static struct md_sysfs_entry md_sync_force_parallel =
3045__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
3046 sync_force_parallel_show, sync_force_parallel_store);
3047
3048static ssize_t
3016sync_speed_show(mddev_t *mddev, char *page) 3049sync_speed_show(mddev_t *mddev, char *page)
3017{ 3050{
3018 unsigned long resync, dt, db; 3051 unsigned long resync, dt, db;
@@ -3187,6 +3220,7 @@ static struct attribute *md_redundancy_attrs[] = {
3187 &md_sync_min.attr, 3220 &md_sync_min.attr,
3188 &md_sync_max.attr, 3221 &md_sync_max.attr,
3189 &md_sync_speed.attr, 3222 &md_sync_speed.attr,
3223 &md_sync_force_parallel.attr,
3190 &md_sync_completed.attr, 3224 &md_sync_completed.attr,
3191 &md_max_sync.attr, 3225 &md_max_sync.attr,
3192 &md_suspend_lo.attr, 3226 &md_suspend_lo.attr,
@@ -3691,6 +3725,8 @@ static int do_md_stop(mddev_t * mddev, int mode)
3691 3725
3692 module_put(mddev->pers->owner); 3726 module_put(mddev->pers->owner);
3693 mddev->pers = NULL; 3727 mddev->pers = NULL;
3728 /* tell userspace to handle 'inactive' */
3729 sysfs_notify(&mddev->kobj, NULL, "array_state");
3694 3730
3695 set_capacity(disk, 0); 3731 set_capacity(disk, 0);
3696 mddev->changed = 1; 3732 mddev->changed = 1;
@@ -3861,8 +3897,10 @@ static void autorun_devices(int part)
3861 3897
3862 md_probe(dev, NULL, NULL); 3898 md_probe(dev, NULL, NULL);
3863 mddev = mddev_find(dev); 3899 mddev = mddev_find(dev);
3864 if (!mddev) { 3900 if (!mddev || !mddev->gendisk) {
3865 printk(KERN_ERR 3901 if (mddev)
3902 mddev_put(mddev);
3903 printk(KERN_ERR
3866 "md: cannot allocate memory for md drive.\n"); 3904 "md: cannot allocate memory for md drive.\n");
3867 break; 3905 break;
3868 } 3906 }
@@ -3987,8 +4025,8 @@ static int get_bitmap_file(mddev_t * mddev, void __user * arg)
3987 if (!buf) 4025 if (!buf)
3988 goto out; 4026 goto out;
3989 4027
3990 ptr = file_path(mddev->bitmap->file, buf, sizeof(file->pathname)); 4028 ptr = d_path(&mddev->bitmap->file->f_path, buf, sizeof(file->pathname));
3991 if (!ptr) 4029 if (IS_ERR(ptr))
3992 goto out; 4030 goto out;
3993 4031
3994 strcpy(file->pathname, ptr); 4032 strcpy(file->pathname, ptr);
@@ -5399,7 +5437,7 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok)
5399 atomic_sub(blocks, &mddev->recovery_active); 5437 atomic_sub(blocks, &mddev->recovery_active);
5400 wake_up(&mddev->recovery_wait); 5438 wake_up(&mddev->recovery_wait);
5401 if (!ok) { 5439 if (!ok) {
5402 set_bit(MD_RECOVERY_ERR, &mddev->recovery); 5440 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5403 md_wakeup_thread(mddev->thread); 5441 md_wakeup_thread(mddev->thread);
5404 // stop recovery, signal do_sync .... 5442 // stop recovery, signal do_sync ....
5405 } 5443 }
@@ -5435,8 +5473,11 @@ void md_write_start(mddev_t *mddev, struct bio *bi)
5435 md_wakeup_thread(mddev->thread); 5473 md_wakeup_thread(mddev->thread);
5436 } 5474 }
5437 spin_unlock_irq(&mddev->write_lock); 5475 spin_unlock_irq(&mddev->write_lock);
5476 sysfs_notify(&mddev->kobj, NULL, "array_state");
5438 } 5477 }
5439 wait_event(mddev->sb_wait, mddev->flags==0); 5478 wait_event(mddev->sb_wait,
5479 !test_bit(MD_CHANGE_CLEAN, &mddev->flags) &&
5480 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
5440} 5481}
5441 5482
5442void md_write_end(mddev_t *mddev) 5483void md_write_end(mddev_t *mddev)
@@ -5471,13 +5512,17 @@ void md_allow_write(mddev_t *mddev)
5471 mddev->safemode = 1; 5512 mddev->safemode = 1;
5472 spin_unlock_irq(&mddev->write_lock); 5513 spin_unlock_irq(&mddev->write_lock);
5473 md_update_sb(mddev, 0); 5514 md_update_sb(mddev, 0);
5515
5516 sysfs_notify(&mddev->kobj, NULL, "array_state");
5517 /* wait for the dirty state to be recorded in the metadata */
5518 wait_event(mddev->sb_wait,
5519 !test_bit(MD_CHANGE_CLEAN, &mddev->flags) &&
5520 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
5474 } else 5521 } else
5475 spin_unlock_irq(&mddev->write_lock); 5522 spin_unlock_irq(&mddev->write_lock);
5476} 5523}
5477EXPORT_SYMBOL_GPL(md_allow_write); 5524EXPORT_SYMBOL_GPL(md_allow_write);
5478 5525
5479static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
5480
5481#define SYNC_MARKS 10 5526#define SYNC_MARKS 10
5482#define SYNC_MARK_STEP (3*HZ) 5527#define SYNC_MARK_STEP (3*HZ)
5483void md_do_sync(mddev_t *mddev) 5528void md_do_sync(mddev_t *mddev)
@@ -5541,8 +5586,9 @@ void md_do_sync(mddev_t *mddev)
5541 for_each_mddev(mddev2, tmp) { 5586 for_each_mddev(mddev2, tmp) {
5542 if (mddev2 == mddev) 5587 if (mddev2 == mddev)
5543 continue; 5588 continue;
5544 if (mddev2->curr_resync && 5589 if (!mddev->parallel_resync
5545 match_mddev_units(mddev,mddev2)) { 5590 && mddev2->curr_resync
5591 && match_mddev_units(mddev, mddev2)) {
5546 DEFINE_WAIT(wq); 5592 DEFINE_WAIT(wq);
5547 if (mddev < mddev2 && mddev->curr_resync == 2) { 5593 if (mddev < mddev2 && mddev->curr_resync == 2) {
5548 /* arbitrarily yield */ 5594 /* arbitrarily yield */
@@ -5622,7 +5668,6 @@ void md_do_sync(mddev_t *mddev)
5622 window/2,(unsigned long long) max_sectors/2); 5668 window/2,(unsigned long long) max_sectors/2);
5623 5669
5624 atomic_set(&mddev->recovery_active, 0); 5670 atomic_set(&mddev->recovery_active, 0);
5625 init_waitqueue_head(&mddev->recovery_wait);
5626 last_check = 0; 5671 last_check = 0;
5627 5672
5628 if (j>2) { 5673 if (j>2) {
@@ -5647,7 +5692,7 @@ void md_do_sync(mddev_t *mddev)
5647 sectors = mddev->pers->sync_request(mddev, j, &skipped, 5692 sectors = mddev->pers->sync_request(mddev, j, &skipped,
5648 currspeed < speed_min(mddev)); 5693 currspeed < speed_min(mddev));
5649 if (sectors == 0) { 5694 if (sectors == 0) {
5650 set_bit(MD_RECOVERY_ERR, &mddev->recovery); 5695 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5651 goto out; 5696 goto out;
5652 } 5697 }
5653 5698
@@ -5670,8 +5715,7 @@ void md_do_sync(mddev_t *mddev)
5670 5715
5671 last_check = io_sectors; 5716 last_check = io_sectors;
5672 5717
5673 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery) || 5718 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
5674 test_bit(MD_RECOVERY_ERR, &mddev->recovery))
5675 break; 5719 break;
5676 5720
5677 repeat: 5721 repeat:
@@ -5725,8 +5769,7 @@ void md_do_sync(mddev_t *mddev)
5725 /* tell personality that we are finished */ 5769 /* tell personality that we are finished */
5726 mddev->pers->sync_request(mddev, max_sectors, &skipped, 1); 5770 mddev->pers->sync_request(mddev, max_sectors, &skipped, 1);
5727 5771
5728 if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && 5772 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
5729 !test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
5730 mddev->curr_resync > 2) { 5773 mddev->curr_resync > 2) {
5731 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { 5774 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
5732 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { 5775 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
@@ -5795,7 +5838,10 @@ static int remove_and_add_spares(mddev_t *mddev)
5795 } 5838 }
5796 5839
5797 if (mddev->degraded) { 5840 if (mddev->degraded) {
5798 rdev_for_each(rdev, rtmp, mddev) 5841 rdev_for_each(rdev, rtmp, mddev) {
5842 if (rdev->raid_disk >= 0 &&
5843 !test_bit(In_sync, &rdev->flags))
5844 spares++;
5799 if (rdev->raid_disk < 0 5845 if (rdev->raid_disk < 0
5800 && !test_bit(Faulty, &rdev->flags)) { 5846 && !test_bit(Faulty, &rdev->flags)) {
5801 rdev->recovery_offset = 0; 5847 rdev->recovery_offset = 0;
@@ -5813,6 +5859,7 @@ static int remove_and_add_spares(mddev_t *mddev)
5813 } else 5859 } else
5814 break; 5860 break;
5815 } 5861 }
5862 }
5816 } 5863 }
5817 return spares; 5864 return spares;
5818} 5865}
@@ -5826,7 +5873,7 @@ static int remove_and_add_spares(mddev_t *mddev)
5826 * to do that as needed. 5873 * to do that as needed.
5827 * When it is determined that resync is needed, we set MD_RECOVERY_RUNNING in 5874 * When it is determined that resync is needed, we set MD_RECOVERY_RUNNING in
5828 * "->recovery" and create a thread at ->sync_thread. 5875 * "->recovery" and create a thread at ->sync_thread.
5829 * When the thread finishes it sets MD_RECOVERY_DONE (and might set MD_RECOVERY_ERR) 5876 * When the thread finishes it sets MD_RECOVERY_DONE
5830 * and wakeups up this thread which will reap the thread and finish up. 5877 * and wakeups up this thread which will reap the thread and finish up.
5831 * This thread also removes any faulty devices (with nr_pending == 0). 5878 * This thread also removes any faulty devices (with nr_pending == 0).
5832 * 5879 *
@@ -5901,8 +5948,7 @@ void md_check_recovery(mddev_t *mddev)
5901 /* resync has finished, collect result */ 5948 /* resync has finished, collect result */
5902 md_unregister_thread(mddev->sync_thread); 5949 md_unregister_thread(mddev->sync_thread);
5903 mddev->sync_thread = NULL; 5950 mddev->sync_thread = NULL;
5904 if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && 5951 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
5905 !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
5906 /* success...*/ 5952 /* success...*/
5907 /* activate any spares */ 5953 /* activate any spares */
5908 mddev->pers->spare_active(mddev); 5954 mddev->pers->spare_active(mddev);
@@ -5926,7 +5972,6 @@ void md_check_recovery(mddev_t *mddev)
5926 * might be left set 5972 * might be left set
5927 */ 5973 */
5928 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 5974 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5929 clear_bit(MD_RECOVERY_ERR, &mddev->recovery);
5930 clear_bit(MD_RECOVERY_INTR, &mddev->recovery); 5975 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
5931 clear_bit(MD_RECOVERY_DONE, &mddev->recovery); 5976 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
5932 5977
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 4f4d1f383842..e968116e0de9 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -327,7 +327,8 @@ static int multipath_remove_disk(mddev_t *mddev, int number)
327 if (rdev) { 327 if (rdev) {
328 if (test_bit(In_sync, &rdev->flags) || 328 if (test_bit(In_sync, &rdev->flags) ||
329 atomic_read(&rdev->nr_pending)) { 329 atomic_read(&rdev->nr_pending)) {
330 printk(KERN_ERR "hot-remove-disk, slot %d is identified" " but is still operational!\n", number); 330 printk(KERN_ERR "hot-remove-disk, slot %d is identified"
331 " but is still operational!\n", number);
331 err = -EBUSY; 332 err = -EBUSY;
332 goto abort; 333 goto abort;
333 } 334 }
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index ac409b7d83f5..c610b947218a 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -773,7 +773,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
773 r1bio_t *r1_bio; 773 r1bio_t *r1_bio;
774 struct bio *read_bio; 774 struct bio *read_bio;
775 int i, targets = 0, disks; 775 int i, targets = 0, disks;
776 struct bitmap *bitmap = mddev->bitmap; 776 struct bitmap *bitmap;
777 unsigned long flags; 777 unsigned long flags;
778 struct bio_list bl; 778 struct bio_list bl;
779 struct page **behind_pages = NULL; 779 struct page **behind_pages = NULL;
@@ -802,6 +802,8 @@ static int make_request(struct request_queue *q, struct bio * bio)
802 802
803 wait_barrier(conf); 803 wait_barrier(conf);
804 804
805 bitmap = mddev->bitmap;
806
805 disk_stat_inc(mddev->gendisk, ios[rw]); 807 disk_stat_inc(mddev->gendisk, ios[rw]);
806 disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); 808 disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
807 809
@@ -1025,7 +1027,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
1025 /* 1027 /*
1026 * if recovery is running, make sure it aborts. 1028 * if recovery is running, make sure it aborts.
1027 */ 1029 */
1028 set_bit(MD_RECOVERY_ERR, &mddev->recovery); 1030 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
1029 } else 1031 } else
1030 set_bit(Faulty, &rdev->flags); 1032 set_bit(Faulty, &rdev->flags);
1031 set_bit(MD_CHANGE_DEVS, &mddev->flags); 1033 set_bit(MD_CHANGE_DEVS, &mddev->flags);
@@ -1146,6 +1148,14 @@ static int raid1_remove_disk(mddev_t *mddev, int number)
1146 err = -EBUSY; 1148 err = -EBUSY;
1147 goto abort; 1149 goto abort;
1148 } 1150 }
1151 /* Only remove non-faulty devices is recovery
1152 * is not possible.
1153 */
1154 if (!test_bit(Faulty, &rdev->flags) &&
1155 mddev->degraded < conf->raid_disks) {
1156 err = -EBUSY;
1157 goto abort;
1158 }
1149 p->rdev = NULL; 1159 p->rdev = NULL;
1150 synchronize_rcu(); 1160 synchronize_rcu();
1151 if (atomic_read(&rdev->nr_pending)) { 1161 if (atomic_read(&rdev->nr_pending)) {
@@ -1282,6 +1292,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
1282 rdev_dec_pending(conf->mirrors[i].rdev, mddev); 1292 rdev_dec_pending(conf->mirrors[i].rdev, mddev);
1283 } else { 1293 } else {
1284 /* fixup the bio for reuse */ 1294 /* fixup the bio for reuse */
1295 int size;
1285 sbio->bi_vcnt = vcnt; 1296 sbio->bi_vcnt = vcnt;
1286 sbio->bi_size = r1_bio->sectors << 9; 1297 sbio->bi_size = r1_bio->sectors << 9;
1287 sbio->bi_idx = 0; 1298 sbio->bi_idx = 0;
@@ -1295,10 +1306,20 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
1295 sbio->bi_sector = r1_bio->sector + 1306 sbio->bi_sector = r1_bio->sector +
1296 conf->mirrors[i].rdev->data_offset; 1307 conf->mirrors[i].rdev->data_offset;
1297 sbio->bi_bdev = conf->mirrors[i].rdev->bdev; 1308 sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
1298 for (j = 0; j < vcnt ; j++) 1309 size = sbio->bi_size;
1299 memcpy(page_address(sbio->bi_io_vec[j].bv_page), 1310 for (j = 0; j < vcnt ; j++) {
1311 struct bio_vec *bi;
1312 bi = &sbio->bi_io_vec[j];
1313 bi->bv_offset = 0;
1314 if (size > PAGE_SIZE)
1315 bi->bv_len = PAGE_SIZE;
1316 else
1317 bi->bv_len = size;
1318 size -= PAGE_SIZE;
1319 memcpy(page_address(bi->bv_page),
1300 page_address(pbio->bi_io_vec[j].bv_page), 1320 page_address(pbio->bi_io_vec[j].bv_page),
1301 PAGE_SIZE); 1321 PAGE_SIZE);
1322 }
1302 1323
1303 } 1324 }
1304 } 1325 }
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 8536ede1e712..a71277b640ab 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1020,7 +1020,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
1020 /* 1020 /*
1021 * if recovery is running, make sure it aborts. 1021 * if recovery is running, make sure it aborts.
1022 */ 1022 */
1023 set_bit(MD_RECOVERY_ERR, &mddev->recovery); 1023 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
1024 } 1024 }
1025 set_bit(Faulty, &rdev->flags); 1025 set_bit(Faulty, &rdev->flags);
1026 set_bit(MD_CHANGE_DEVS, &mddev->flags); 1026 set_bit(MD_CHANGE_DEVS, &mddev->flags);
@@ -1171,6 +1171,14 @@ static int raid10_remove_disk(mddev_t *mddev, int number)
1171 err = -EBUSY; 1171 err = -EBUSY;
1172 goto abort; 1172 goto abort;
1173 } 1173 }
1174 /* Only remove faulty devices in recovery
1175 * is not possible.
1176 */
1177 if (!test_bit(Faulty, &rdev->flags) &&
1178 enough(conf)) {
1179 err = -EBUSY;
1180 goto abort;
1181 }
1174 p->rdev = NULL; 1182 p->rdev = NULL;
1175 synchronize_rcu(); 1183 synchronize_rcu();
1176 if (atomic_read(&rdev->nr_pending)) { 1184 if (atomic_read(&rdev->nr_pending)) {
@@ -1237,6 +1245,7 @@ static void end_sync_write(struct bio *bio, int error)
1237 1245
1238 if (!uptodate) 1246 if (!uptodate)
1239 md_error(mddev, conf->mirrors[d].rdev); 1247 md_error(mddev, conf->mirrors[d].rdev);
1248
1240 update_head_pos(i, r10_bio); 1249 update_head_pos(i, r10_bio);
1241 1250
1242 while (atomic_dec_and_test(&r10_bio->remaining)) { 1251 while (atomic_dec_and_test(&r10_bio->remaining)) {
@@ -1844,7 +1853,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1844 if (rb2) 1853 if (rb2)
1845 atomic_dec(&rb2->remaining); 1854 atomic_dec(&rb2->remaining);
1846 r10_bio = rb2; 1855 r10_bio = rb2;
1847 if (!test_and_set_bit(MD_RECOVERY_ERR, &mddev->recovery)) 1856 if (!test_and_set_bit(MD_RECOVERY_INTR,
1857 &mddev->recovery))
1848 printk(KERN_INFO "raid10: %s: insufficient working devices for recovery.\n", 1858 printk(KERN_INFO "raid10: %s: insufficient working devices for recovery.\n",
1849 mdname(mddev)); 1859 mdname(mddev));
1850 break; 1860 break;
@@ -2127,6 +2137,8 @@ static int run(mddev_t *mddev)
2127 !test_bit(In_sync, &disk->rdev->flags)) { 2137 !test_bit(In_sync, &disk->rdev->flags)) {
2128 disk->head_position = 0; 2138 disk->head_position = 0;
2129 mddev->degraded++; 2139 mddev->degraded++;
2140 if (disk->rdev)
2141 conf->fullsync = 1;
2130 } 2142 }
2131 } 2143 }
2132 2144
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 93fde48c0f42..3b27df52456b 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -94,6 +94,8 @@
94#define __inline__ 94#define __inline__
95#endif 95#endif
96 96
97#define printk_rl(args...) ((void) (printk_ratelimit() && printk(args)))
98
97#if !RAID6_USE_EMPTY_ZERO_PAGE 99#if !RAID6_USE_EMPTY_ZERO_PAGE
98/* In .bss so it's zeroed */ 100/* In .bss so it's zeroed */
99const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); 101const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
@@ -1143,10 +1145,12 @@ static void raid5_end_read_request(struct bio * bi, int error)
1143 set_bit(R5_UPTODATE, &sh->dev[i].flags); 1145 set_bit(R5_UPTODATE, &sh->dev[i].flags);
1144 if (test_bit(R5_ReadError, &sh->dev[i].flags)) { 1146 if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
1145 rdev = conf->disks[i].rdev; 1147 rdev = conf->disks[i].rdev;
1146 printk(KERN_INFO "raid5:%s: read error corrected (%lu sectors at %llu on %s)\n", 1148 printk_rl(KERN_INFO "raid5:%s: read error corrected"
1147 mdname(conf->mddev), STRIPE_SECTORS, 1149 " (%lu sectors at %llu on %s)\n",
1148 (unsigned long long)(sh->sector + rdev->data_offset), 1150 mdname(conf->mddev), STRIPE_SECTORS,
1149 bdevname(rdev->bdev, b)); 1151 (unsigned long long)(sh->sector
1152 + rdev->data_offset),
1153 bdevname(rdev->bdev, b));
1150 clear_bit(R5_ReadError, &sh->dev[i].flags); 1154 clear_bit(R5_ReadError, &sh->dev[i].flags);
1151 clear_bit(R5_ReWrite, &sh->dev[i].flags); 1155 clear_bit(R5_ReWrite, &sh->dev[i].flags);
1152 } 1156 }
@@ -1160,16 +1164,22 @@ static void raid5_end_read_request(struct bio * bi, int error)
1160 clear_bit(R5_UPTODATE, &sh->dev[i].flags); 1164 clear_bit(R5_UPTODATE, &sh->dev[i].flags);
1161 atomic_inc(&rdev->read_errors); 1165 atomic_inc(&rdev->read_errors);
1162 if (conf->mddev->degraded) 1166 if (conf->mddev->degraded)
1163 printk(KERN_WARNING "raid5:%s: read error not correctable (sector %llu on %s).\n", 1167 printk_rl(KERN_WARNING
1164 mdname(conf->mddev), 1168 "raid5:%s: read error not correctable "
1165 (unsigned long long)(sh->sector + rdev->data_offset), 1169 "(sector %llu on %s).\n",
1166 bdn); 1170 mdname(conf->mddev),
1171 (unsigned long long)(sh->sector
1172 + rdev->data_offset),
1173 bdn);
1167 else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) 1174 else if (test_bit(R5_ReWrite, &sh->dev[i].flags))
1168 /* Oh, no!!! */ 1175 /* Oh, no!!! */
1169 printk(KERN_WARNING "raid5:%s: read error NOT corrected!! (sector %llu on %s).\n", 1176 printk_rl(KERN_WARNING
1170 mdname(conf->mddev), 1177 "raid5:%s: read error NOT corrected!! "
1171 (unsigned long long)(sh->sector + rdev->data_offset), 1178 "(sector %llu on %s).\n",
1172 bdn); 1179 mdname(conf->mddev),
1180 (unsigned long long)(sh->sector
1181 + rdev->data_offset),
1182 bdn);
1173 else if (atomic_read(&rdev->read_errors) 1183 else if (atomic_read(&rdev->read_errors)
1174 > conf->max_nr_stripes) 1184 > conf->max_nr_stripes)
1175 printk(KERN_WARNING 1185 printk(KERN_WARNING
@@ -1258,7 +1268,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
1258 /* 1268 /*
1259 * if recovery was running, make sure it aborts. 1269 * if recovery was running, make sure it aborts.
1260 */ 1270 */
1261 set_bit(MD_RECOVERY_ERR, &mddev->recovery); 1271 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
1262 } 1272 }
1263 set_bit(Faulty, &rdev->flags); 1273 set_bit(Faulty, &rdev->flags);
1264 printk (KERN_ALERT 1274 printk (KERN_ALERT
@@ -1992,6 +2002,7 @@ static int __handle_issuing_new_read_requests5(struct stripe_head *sh,
1992 * have quiesced. 2002 * have quiesced.
1993 */ 2003 */
1994 if ((s->uptodate == disks - 1) && 2004 if ((s->uptodate == disks - 1) &&
2005 (s->failed && disk_idx == s->failed_num) &&
1995 !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) { 2006 !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
1996 set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); 2007 set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
1997 set_bit(R5_Wantcompute, &dev->flags); 2008 set_bit(R5_Wantcompute, &dev->flags);
@@ -2006,12 +2017,7 @@ static int __handle_issuing_new_read_requests5(struct stripe_head *sh,
2006 */ 2017 */
2007 s->uptodate++; 2018 s->uptodate++;
2008 return 0; /* uptodate + compute == disks */ 2019 return 0; /* uptodate + compute == disks */
2009 } else if ((s->uptodate < disks - 1) && 2020 } else if (test_bit(R5_Insync, &dev->flags)) {
2010 test_bit(R5_Insync, &dev->flags)) {
2011 /* Note: we hold off compute operations while checks are
2012 * in flight, but we still prefer 'compute' over 'read'
2013 * hence we only read if (uptodate < * disks-1)
2014 */
2015 set_bit(R5_LOCKED, &dev->flags); 2021 set_bit(R5_LOCKED, &dev->flags);
2016 set_bit(R5_Wantread, &dev->flags); 2022 set_bit(R5_Wantread, &dev->flags);
2017 if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) 2023 if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
@@ -2077,7 +2083,9 @@ static void handle_issuing_new_read_requests6(struct stripe_head *sh,
2077 /* we would like to get this block, possibly 2083 /* we would like to get this block, possibly
2078 * by computing it, but we might not be able to 2084 * by computing it, but we might not be able to
2079 */ 2085 */
2080 if (s->uptodate == disks-1) { 2086 if ((s->uptodate == disks - 1) &&
2087 (s->failed && (i == r6s->failed_num[0] ||
2088 i == r6s->failed_num[1]))) {
2081 pr_debug("Computing stripe %llu block %d\n", 2089 pr_debug("Computing stripe %llu block %d\n",
2082 (unsigned long long)sh->sector, i); 2090 (unsigned long long)sh->sector, i);
2083 compute_block_1(sh, i, 0); 2091 compute_block_1(sh, i, 0);
@@ -2635,6 +2643,7 @@ static void handle_stripe5(struct stripe_head *sh)
2635 struct r5dev *dev; 2643 struct r5dev *dev;
2636 unsigned long pending = 0; 2644 unsigned long pending = 0;
2637 mdk_rdev_t *blocked_rdev = NULL; 2645 mdk_rdev_t *blocked_rdev = NULL;
2646 int prexor;
2638 2647
2639 memset(&s, 0, sizeof(s)); 2648 memset(&s, 0, sizeof(s));
2640 pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d " 2649 pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d "
@@ -2764,9 +2773,11 @@ static void handle_stripe5(struct stripe_head *sh)
2764 /* leave prexor set until postxor is done, allows us to distinguish 2773 /* leave prexor set until postxor is done, allows us to distinguish
2765 * a rmw from a rcw during biodrain 2774 * a rmw from a rcw during biodrain
2766 */ 2775 */
2776 prexor = 0;
2767 if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) && 2777 if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) &&
2768 test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) { 2778 test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {
2769 2779
2780 prexor = 1;
2770 clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete); 2781 clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete);
2771 clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack); 2782 clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack);
2772 clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending); 2783 clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
@@ -2800,6 +2811,8 @@ static void handle_stripe5(struct stripe_head *sh)
2800 if (!test_and_set_bit( 2811 if (!test_and_set_bit(
2801 STRIPE_OP_IO, &sh->ops.pending)) 2812 STRIPE_OP_IO, &sh->ops.pending))
2802 sh->ops.count++; 2813 sh->ops.count++;
2814 if (prexor)
2815 continue;
2803 if (!test_bit(R5_Insync, &dev->flags) || 2816 if (!test_bit(R5_Insync, &dev->flags) ||
2804 (i == sh->pd_idx && s.failed == 0)) 2817 (i == sh->pd_idx && s.failed == 0))
2805 set_bit(STRIPE_INSYNC, &sh->state); 2818 set_bit(STRIPE_INSYNC, &sh->state);
@@ -2880,6 +2893,8 @@ static void handle_stripe5(struct stripe_head *sh)
2880 2893
2881 for (i = conf->raid_disks; i--; ) { 2894 for (i = conf->raid_disks; i--; ) {
2882 set_bit(R5_Wantwrite, &sh->dev[i].flags); 2895 set_bit(R5_Wantwrite, &sh->dev[i].flags);
2896 set_bit(R5_LOCKED, &dev->flags);
2897 s.locked++;
2883 if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) 2898 if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
2884 sh->ops.count++; 2899 sh->ops.count++;
2885 } 2900 }
@@ -2893,6 +2908,7 @@ static void handle_stripe5(struct stripe_head *sh)
2893 conf->raid_disks); 2908 conf->raid_disks);
2894 s.locked += handle_write_operations5(sh, 1, 1); 2909 s.locked += handle_write_operations5(sh, 1, 1);
2895 } else if (s.expanded && 2910 } else if (s.expanded &&
2911 s.locked == 0 &&
2896 !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) { 2912 !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
2897 clear_bit(STRIPE_EXPAND_READY, &sh->state); 2913 clear_bit(STRIPE_EXPAND_READY, &sh->state);
2898 atomic_dec(&conf->reshape_stripes); 2914 atomic_dec(&conf->reshape_stripes);
@@ -4287,7 +4303,9 @@ static int run(mddev_t *mddev)
4287 " disk %d\n", bdevname(rdev->bdev,b), 4303 " disk %d\n", bdevname(rdev->bdev,b),
4288 raid_disk); 4304 raid_disk);
4289 working_disks++; 4305 working_disks++;
4290 } 4306 } else
4307 /* Cannot rely on bitmap to complete recovery */
4308 conf->fullsync = 1;
4291 } 4309 }
4292 4310
4293 /* 4311 /*
@@ -4564,6 +4582,14 @@ static int raid5_remove_disk(mddev_t *mddev, int number)
4564 err = -EBUSY; 4582 err = -EBUSY;
4565 goto abort; 4583 goto abort;
4566 } 4584 }
4585 /* Only remove non-faulty devices if recovery
4586 * isn't possible.
4587 */
4588 if (!test_bit(Faulty, &rdev->flags) &&
4589 mddev->degraded <= conf->max_degraded) {
4590 err = -EBUSY;
4591 goto abort;
4592 }
4567 p->rdev = NULL; 4593 p->rdev = NULL;
4568 synchronize_rcu(); 4594 synchronize_rcu();
4569 if (atomic_read(&rdev->nr_pending)) { 4595 if (atomic_read(&rdev->nr_pending)) {