aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bitmap.c17
-rw-r--r--drivers/md/linear.c1
-rw-r--r--drivers/md/md.c81
-rw-r--r--drivers/md/multipath.c4
-rw-r--r--drivers/md/raid0.c1
-rw-r--r--drivers/md/raid1.c33
-rw-r--r--drivers/md/raid10.c18
-rw-r--r--drivers/md/raid5.c80
8 files changed, 168 insertions, 67 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index c14dacdacfac..b26927ce889c 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -203,17 +203,6 @@ static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page)
203 * bitmap file handling - read and write the bitmap file and its superblock 203 * bitmap file handling - read and write the bitmap file and its superblock
204 */ 204 */
205 205
206/* copy the pathname of a file to a buffer */
207char *file_path(struct file *file, char *buf, int count)
208{
209 if (!buf)
210 return NULL;
211
212 buf = d_path(&file->f_path, buf, count);
213
214 return IS_ERR(buf) ? NULL : buf;
215}
216
217/* 206/*
218 * basic page I/O operations 207 * basic page I/O operations
219 */ 208 */
@@ -721,11 +710,13 @@ static void bitmap_file_kick(struct bitmap *bitmap)
721 if (bitmap->file) { 710 if (bitmap->file) {
722 path = kmalloc(PAGE_SIZE, GFP_KERNEL); 711 path = kmalloc(PAGE_SIZE, GFP_KERNEL);
723 if (path) 712 if (path)
724 ptr = file_path(bitmap->file, path, PAGE_SIZE); 713 ptr = d_path(&bitmap->file->f_path, path,
714 PAGE_SIZE);
715
725 716
726 printk(KERN_ALERT 717 printk(KERN_ALERT
727 "%s: kicking failed bitmap file %s from array!\n", 718 "%s: kicking failed bitmap file %s from array!\n",
728 bmname(bitmap), ptr ? ptr : ""); 719 bmname(bitmap), IS_ERR(ptr) ? "" : ptr);
729 720
730 kfree(path); 721 kfree(path);
731 } else 722 } else
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 0b8511776b3e..10748240cb2f 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -250,6 +250,7 @@ static int linear_run (mddev_t *mddev)
250{ 250{
251 linear_conf_t *conf; 251 linear_conf_t *conf;
252 252
253 mddev->queue->queue_lock = &mddev->queue->__queue_lock;
253 conf = linear_conf(mddev, mddev->raid_disks); 254 conf = linear_conf(mddev, mddev->raid_disks);
254 255
255 if (!conf) 256 if (!conf)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 83eb78b00137..7cf512a34ccf 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -74,6 +74,8 @@ static DEFINE_SPINLOCK(pers_lock);
74 74
75static void md_print_devices(void); 75static void md_print_devices(void);
76 76
77static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
78
77#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } 79#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
78 80
79/* 81/*
@@ -274,6 +276,7 @@ static mddev_t * mddev_find(dev_t unit)
274 atomic_set(&new->active, 1); 276 atomic_set(&new->active, 1);
275 spin_lock_init(&new->write_lock); 277 spin_lock_init(&new->write_lock);
276 init_waitqueue_head(&new->sb_wait); 278 init_waitqueue_head(&new->sb_wait);
279 init_waitqueue_head(&new->recovery_wait);
277 new->reshape_position = MaxSector; 280 new->reshape_position = MaxSector;
278 new->resync_max = MaxSector; 281 new->resync_max = MaxSector;
279 new->level = LEVEL_NONE; 282 new->level = LEVEL_NONE;
@@ -3013,6 +3016,36 @@ degraded_show(mddev_t *mddev, char *page)
3013static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded); 3016static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
3014 3017
3015static ssize_t 3018static ssize_t
3019sync_force_parallel_show(mddev_t *mddev, char *page)
3020{
3021 return sprintf(page, "%d\n", mddev->parallel_resync);
3022}
3023
3024static ssize_t
3025sync_force_parallel_store(mddev_t *mddev, const char *buf, size_t len)
3026{
3027 long n;
3028
3029 if (strict_strtol(buf, 10, &n))
3030 return -EINVAL;
3031
3032 if (n != 0 && n != 1)
3033 return -EINVAL;
3034
3035 mddev->parallel_resync = n;
3036
3037 if (mddev->sync_thread)
3038 wake_up(&resync_wait);
3039
3040 return len;
3041}
3042
3043/* force parallel resync, even with shared block devices */
3044static struct md_sysfs_entry md_sync_force_parallel =
3045__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
3046 sync_force_parallel_show, sync_force_parallel_store);
3047
3048static ssize_t
3016sync_speed_show(mddev_t *mddev, char *page) 3049sync_speed_show(mddev_t *mddev, char *page)
3017{ 3050{
3018 unsigned long resync, dt, db; 3051 unsigned long resync, dt, db;
@@ -3187,6 +3220,7 @@ static struct attribute *md_redundancy_attrs[] = {
3187 &md_sync_min.attr, 3220 &md_sync_min.attr,
3188 &md_sync_max.attr, 3221 &md_sync_max.attr,
3189 &md_sync_speed.attr, 3222 &md_sync_speed.attr,
3223 &md_sync_force_parallel.attr,
3190 &md_sync_completed.attr, 3224 &md_sync_completed.attr,
3191 &md_max_sync.attr, 3225 &md_max_sync.attr,
3192 &md_suspend_lo.attr, 3226 &md_suspend_lo.attr,
@@ -3691,6 +3725,8 @@ static int do_md_stop(mddev_t * mddev, int mode)
3691 3725
3692 module_put(mddev->pers->owner); 3726 module_put(mddev->pers->owner);
3693 mddev->pers = NULL; 3727 mddev->pers = NULL;
3728 /* tell userspace to handle 'inactive' */
3729 sysfs_notify(&mddev->kobj, NULL, "array_state");
3694 3730
3695 set_capacity(disk, 0); 3731 set_capacity(disk, 0);
3696 mddev->changed = 1; 3732 mddev->changed = 1;
@@ -3987,8 +4023,8 @@ static int get_bitmap_file(mddev_t * mddev, void __user * arg)
3987 if (!buf) 4023 if (!buf)
3988 goto out; 4024 goto out;
3989 4025
3990 ptr = file_path(mddev->bitmap->file, buf, sizeof(file->pathname)); 4026 ptr = d_path(&mddev->bitmap->file->f_path, buf, sizeof(file->pathname));
3991 if (!ptr) 4027 if (IS_ERR(ptr))
3992 goto out; 4028 goto out;
3993 4029
3994 strcpy(file->pathname, ptr); 4030 strcpy(file->pathname, ptr);
@@ -5399,7 +5435,7 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok)
5399 atomic_sub(blocks, &mddev->recovery_active); 5435 atomic_sub(blocks, &mddev->recovery_active);
5400 wake_up(&mddev->recovery_wait); 5436 wake_up(&mddev->recovery_wait);
5401 if (!ok) { 5437 if (!ok) {
5402 set_bit(MD_RECOVERY_ERR, &mddev->recovery); 5438 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5403 md_wakeup_thread(mddev->thread); 5439 md_wakeup_thread(mddev->thread);
5404 // stop recovery, signal do_sync .... 5440 // stop recovery, signal do_sync ....
5405 } 5441 }
@@ -5435,8 +5471,11 @@ void md_write_start(mddev_t *mddev, struct bio *bi)
5435 md_wakeup_thread(mddev->thread); 5471 md_wakeup_thread(mddev->thread);
5436 } 5472 }
5437 spin_unlock_irq(&mddev->write_lock); 5473 spin_unlock_irq(&mddev->write_lock);
5474 sysfs_notify(&mddev->kobj, NULL, "array_state");
5438 } 5475 }
5439 wait_event(mddev->sb_wait, mddev->flags==0); 5476 wait_event(mddev->sb_wait,
5477 !test_bit(MD_CHANGE_CLEAN, &mddev->flags) &&
5478 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
5440} 5479}
5441 5480
5442void md_write_end(mddev_t *mddev) 5481void md_write_end(mddev_t *mddev)
@@ -5471,13 +5510,17 @@ void md_allow_write(mddev_t *mddev)
5471 mddev->safemode = 1; 5510 mddev->safemode = 1;
5472 spin_unlock_irq(&mddev->write_lock); 5511 spin_unlock_irq(&mddev->write_lock);
5473 md_update_sb(mddev, 0); 5512 md_update_sb(mddev, 0);
5513
5514 sysfs_notify(&mddev->kobj, NULL, "array_state");
5515 /* wait for the dirty state to be recorded in the metadata */
5516 wait_event(mddev->sb_wait,
5517 !test_bit(MD_CHANGE_CLEAN, &mddev->flags) &&
5518 !test_bit(MD_CHANGE_PENDING, &mddev->flags));
5474 } else 5519 } else
5475 spin_unlock_irq(&mddev->write_lock); 5520 spin_unlock_irq(&mddev->write_lock);
5476} 5521}
5477EXPORT_SYMBOL_GPL(md_allow_write); 5522EXPORT_SYMBOL_GPL(md_allow_write);
5478 5523
5479static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
5480
5481#define SYNC_MARKS 10 5524#define SYNC_MARKS 10
5482#define SYNC_MARK_STEP (3*HZ) 5525#define SYNC_MARK_STEP (3*HZ)
5483void md_do_sync(mddev_t *mddev) 5526void md_do_sync(mddev_t *mddev)
@@ -5541,8 +5584,9 @@ void md_do_sync(mddev_t *mddev)
5541 for_each_mddev(mddev2, tmp) { 5584 for_each_mddev(mddev2, tmp) {
5542 if (mddev2 == mddev) 5585 if (mddev2 == mddev)
5543 continue; 5586 continue;
5544 if (mddev2->curr_resync && 5587 if (!mddev->parallel_resync
5545 match_mddev_units(mddev,mddev2)) { 5588 && mddev2->curr_resync
5589 && match_mddev_units(mddev, mddev2)) {
5546 DEFINE_WAIT(wq); 5590 DEFINE_WAIT(wq);
5547 if (mddev < mddev2 && mddev->curr_resync == 2) { 5591 if (mddev < mddev2 && mddev->curr_resync == 2) {
5548 /* arbitrarily yield */ 5592 /* arbitrarily yield */
@@ -5622,7 +5666,6 @@ void md_do_sync(mddev_t *mddev)
5622 window/2,(unsigned long long) max_sectors/2); 5666 window/2,(unsigned long long) max_sectors/2);
5623 5667
5624 atomic_set(&mddev->recovery_active, 0); 5668 atomic_set(&mddev->recovery_active, 0);
5625 init_waitqueue_head(&mddev->recovery_wait);
5626 last_check = 0; 5669 last_check = 0;
5627 5670
5628 if (j>2) { 5671 if (j>2) {
@@ -5647,7 +5690,7 @@ void md_do_sync(mddev_t *mddev)
5647 sectors = mddev->pers->sync_request(mddev, j, &skipped, 5690 sectors = mddev->pers->sync_request(mddev, j, &skipped,
5648 currspeed < speed_min(mddev)); 5691 currspeed < speed_min(mddev));
5649 if (sectors == 0) { 5692 if (sectors == 0) {
5650 set_bit(MD_RECOVERY_ERR, &mddev->recovery); 5693 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5651 goto out; 5694 goto out;
5652 } 5695 }
5653 5696
@@ -5670,8 +5713,7 @@ void md_do_sync(mddev_t *mddev)
5670 5713
5671 last_check = io_sectors; 5714 last_check = io_sectors;
5672 5715
5673 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery) || 5716 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
5674 test_bit(MD_RECOVERY_ERR, &mddev->recovery))
5675 break; 5717 break;
5676 5718
5677 repeat: 5719 repeat:
@@ -5725,8 +5767,7 @@ void md_do_sync(mddev_t *mddev)
5725 /* tell personality that we are finished */ 5767 /* tell personality that we are finished */
5726 mddev->pers->sync_request(mddev, max_sectors, &skipped, 1); 5768 mddev->pers->sync_request(mddev, max_sectors, &skipped, 1);
5727 5769
5728 if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && 5770 if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
5729 !test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
5730 mddev->curr_resync > 2) { 5771 mddev->curr_resync > 2) {
5731 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { 5772 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
5732 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { 5773 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
@@ -5795,7 +5836,10 @@ static int remove_and_add_spares(mddev_t *mddev)
5795 } 5836 }
5796 5837
5797 if (mddev->degraded) { 5838 if (mddev->degraded) {
5798 rdev_for_each(rdev, rtmp, mddev) 5839 rdev_for_each(rdev, rtmp, mddev) {
5840 if (rdev->raid_disk >= 0 &&
5841 !test_bit(In_sync, &rdev->flags))
5842 spares++;
5799 if (rdev->raid_disk < 0 5843 if (rdev->raid_disk < 0
5800 && !test_bit(Faulty, &rdev->flags)) { 5844 && !test_bit(Faulty, &rdev->flags)) {
5801 rdev->recovery_offset = 0; 5845 rdev->recovery_offset = 0;
@@ -5813,6 +5857,7 @@ static int remove_and_add_spares(mddev_t *mddev)
5813 } else 5857 } else
5814 break; 5858 break;
5815 } 5859 }
5860 }
5816 } 5861 }
5817 return spares; 5862 return spares;
5818} 5863}
@@ -5826,7 +5871,7 @@ static int remove_and_add_spares(mddev_t *mddev)
5826 * to do that as needed. 5871 * to do that as needed.
5827 * When it is determined that resync is needed, we set MD_RECOVERY_RUNNING in 5872 * When it is determined that resync is needed, we set MD_RECOVERY_RUNNING in
5828 * "->recovery" and create a thread at ->sync_thread. 5873 * "->recovery" and create a thread at ->sync_thread.
5829 * When the thread finishes it sets MD_RECOVERY_DONE (and might set MD_RECOVERY_ERR) 5874 * When the thread finishes it sets MD_RECOVERY_DONE
5830 * and wakeups up this thread which will reap the thread and finish up. 5875 * and wakeups up this thread which will reap the thread and finish up.
5831 * This thread also removes any faulty devices (with nr_pending == 0). 5876 * This thread also removes any faulty devices (with nr_pending == 0).
5832 * 5877 *
@@ -5901,8 +5946,7 @@ void md_check_recovery(mddev_t *mddev)
5901 /* resync has finished, collect result */ 5946 /* resync has finished, collect result */
5902 md_unregister_thread(mddev->sync_thread); 5947 md_unregister_thread(mddev->sync_thread);
5903 mddev->sync_thread = NULL; 5948 mddev->sync_thread = NULL;
5904 if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && 5949 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
5905 !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
5906 /* success...*/ 5950 /* success...*/
5907 /* activate any spares */ 5951 /* activate any spares */
5908 mddev->pers->spare_active(mddev); 5952 mddev->pers->spare_active(mddev);
@@ -5926,7 +5970,6 @@ void md_check_recovery(mddev_t *mddev)
5926 * might be left set 5970 * might be left set
5927 */ 5971 */
5928 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 5972 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5929 clear_bit(MD_RECOVERY_ERR, &mddev->recovery);
5930 clear_bit(MD_RECOVERY_INTR, &mddev->recovery); 5973 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
5931 clear_bit(MD_RECOVERY_DONE, &mddev->recovery); 5974 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
5932 5975
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 42ee1a2dc144..e968116e0de9 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -327,7 +327,8 @@ static int multipath_remove_disk(mddev_t *mddev, int number)
327 if (rdev) { 327 if (rdev) {
328 if (test_bit(In_sync, &rdev->flags) || 328 if (test_bit(In_sync, &rdev->flags) ||
329 atomic_read(&rdev->nr_pending)) { 329 atomic_read(&rdev->nr_pending)) {
330 printk(KERN_ERR "hot-remove-disk, slot %d is identified" " but is still operational!\n", number); 330 printk(KERN_ERR "hot-remove-disk, slot %d is identified"
331 " but is still operational!\n", number);
331 err = -EBUSY; 332 err = -EBUSY;
332 goto abort; 333 goto abort;
333 } 334 }
@@ -417,6 +418,7 @@ static int multipath_run (mddev_t *mddev)
417 * bookkeeping area. [whatever we allocate in multipath_run(), 418 * bookkeeping area. [whatever we allocate in multipath_run(),
418 * should be freed in multipath_stop()] 419 * should be freed in multipath_stop()]
419 */ 420 */
421 mddev->queue->queue_lock = &mddev->queue->__queue_lock;
420 422
421 conf = kzalloc(sizeof(multipath_conf_t), GFP_KERNEL); 423 conf = kzalloc(sizeof(multipath_conf_t), GFP_KERNEL);
422 mddev->private = conf; 424 mddev->private = conf;
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 818b48284096..914c04ddec7c 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -280,6 +280,7 @@ static int raid0_run (mddev_t *mddev)
280 (mddev->chunk_size>>1)-1); 280 (mddev->chunk_size>>1)-1);
281 blk_queue_max_sectors(mddev->queue, mddev->chunk_size >> 9); 281 blk_queue_max_sectors(mddev->queue, mddev->chunk_size >> 9);
282 blk_queue_segment_boundary(mddev->queue, (mddev->chunk_size>>1) - 1); 282 blk_queue_segment_boundary(mddev->queue, (mddev->chunk_size>>1) - 1);
283 mddev->queue->queue_lock = &mddev->queue->__queue_lock;
283 284
284 conf = kmalloc(sizeof (raid0_conf_t), GFP_KERNEL); 285 conf = kmalloc(sizeof (raid0_conf_t), GFP_KERNEL);
285 if (!conf) 286 if (!conf)
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 6778b7cb39bd..c610b947218a 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -773,7 +773,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
773 r1bio_t *r1_bio; 773 r1bio_t *r1_bio;
774 struct bio *read_bio; 774 struct bio *read_bio;
775 int i, targets = 0, disks; 775 int i, targets = 0, disks;
776 struct bitmap *bitmap = mddev->bitmap; 776 struct bitmap *bitmap;
777 unsigned long flags; 777 unsigned long flags;
778 struct bio_list bl; 778 struct bio_list bl;
779 struct page **behind_pages = NULL; 779 struct page **behind_pages = NULL;
@@ -802,6 +802,8 @@ static int make_request(struct request_queue *q, struct bio * bio)
802 802
803 wait_barrier(conf); 803 wait_barrier(conf);
804 804
805 bitmap = mddev->bitmap;
806
805 disk_stat_inc(mddev->gendisk, ios[rw]); 807 disk_stat_inc(mddev->gendisk, ios[rw]);
806 disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); 808 disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
807 809
@@ -1025,7 +1027,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
1025 /* 1027 /*
1026 * if recovery is running, make sure it aborts. 1028 * if recovery is running, make sure it aborts.
1027 */ 1029 */
1028 set_bit(MD_RECOVERY_ERR, &mddev->recovery); 1030 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
1029 } else 1031 } else
1030 set_bit(Faulty, &rdev->flags); 1032 set_bit(Faulty, &rdev->flags);
1031 set_bit(MD_CHANGE_DEVS, &mddev->flags); 1033 set_bit(MD_CHANGE_DEVS, &mddev->flags);
@@ -1146,6 +1148,14 @@ static int raid1_remove_disk(mddev_t *mddev, int number)
1146 err = -EBUSY; 1148 err = -EBUSY;
1147 goto abort; 1149 goto abort;
1148 } 1150 }
1151 /* Only remove non-faulty devices is recovery
1152 * is not possible.
1153 */
1154 if (!test_bit(Faulty, &rdev->flags) &&
1155 mddev->degraded < conf->raid_disks) {
1156 err = -EBUSY;
1157 goto abort;
1158 }
1149 p->rdev = NULL; 1159 p->rdev = NULL;
1150 synchronize_rcu(); 1160 synchronize_rcu();
1151 if (atomic_read(&rdev->nr_pending)) { 1161 if (atomic_read(&rdev->nr_pending)) {
@@ -1282,6 +1292,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
1282 rdev_dec_pending(conf->mirrors[i].rdev, mddev); 1292 rdev_dec_pending(conf->mirrors[i].rdev, mddev);
1283 } else { 1293 } else {
1284 /* fixup the bio for reuse */ 1294 /* fixup the bio for reuse */
1295 int size;
1285 sbio->bi_vcnt = vcnt; 1296 sbio->bi_vcnt = vcnt;
1286 sbio->bi_size = r1_bio->sectors << 9; 1297 sbio->bi_size = r1_bio->sectors << 9;
1287 sbio->bi_idx = 0; 1298 sbio->bi_idx = 0;
@@ -1295,10 +1306,20 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
1295 sbio->bi_sector = r1_bio->sector + 1306 sbio->bi_sector = r1_bio->sector +
1296 conf->mirrors[i].rdev->data_offset; 1307 conf->mirrors[i].rdev->data_offset;
1297 sbio->bi_bdev = conf->mirrors[i].rdev->bdev; 1308 sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
1298 for (j = 0; j < vcnt ; j++) 1309 size = sbio->bi_size;
1299 memcpy(page_address(sbio->bi_io_vec[j].bv_page), 1310 for (j = 0; j < vcnt ; j++) {
1311 struct bio_vec *bi;
1312 bi = &sbio->bi_io_vec[j];
1313 bi->bv_offset = 0;
1314 if (size > PAGE_SIZE)
1315 bi->bv_len = PAGE_SIZE;
1316 else
1317 bi->bv_len = size;
1318 size -= PAGE_SIZE;
1319 memcpy(page_address(bi->bv_page),
1300 page_address(pbio->bi_io_vec[j].bv_page), 1320 page_address(pbio->bi_io_vec[j].bv_page),
1301 PAGE_SIZE); 1321 PAGE_SIZE);
1322 }
1302 1323
1303 } 1324 }
1304 } 1325 }
@@ -1935,6 +1956,9 @@ static int run(mddev_t *mddev)
1935 if (!conf->r1bio_pool) 1956 if (!conf->r1bio_pool)
1936 goto out_no_mem; 1957 goto out_no_mem;
1937 1958
1959 spin_lock_init(&conf->device_lock);
1960 mddev->queue->queue_lock = &conf->device_lock;
1961
1938 rdev_for_each(rdev, tmp, mddev) { 1962 rdev_for_each(rdev, tmp, mddev) {
1939 disk_idx = rdev->raid_disk; 1963 disk_idx = rdev->raid_disk;
1940 if (disk_idx >= mddev->raid_disks 1964 if (disk_idx >= mddev->raid_disks
@@ -1958,7 +1982,6 @@ static int run(mddev_t *mddev)
1958 } 1982 }
1959 conf->raid_disks = mddev->raid_disks; 1983 conf->raid_disks = mddev->raid_disks;
1960 conf->mddev = mddev; 1984 conf->mddev = mddev;
1961 spin_lock_init(&conf->device_lock);
1962 INIT_LIST_HEAD(&conf->retry_list); 1985 INIT_LIST_HEAD(&conf->retry_list);
1963 1986
1964 spin_lock_init(&conf->resync_lock); 1987 spin_lock_init(&conf->resync_lock);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index faf3d8912979..1de17da34a95 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1020,7 +1020,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
1020 /* 1020 /*
1021 * if recovery is running, make sure it aborts. 1021 * if recovery is running, make sure it aborts.
1022 */ 1022 */
1023 set_bit(MD_RECOVERY_ERR, &mddev->recovery); 1023 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
1024 } 1024 }
1025 set_bit(Faulty, &rdev->flags); 1025 set_bit(Faulty, &rdev->flags);
1026 set_bit(MD_CHANGE_DEVS, &mddev->flags); 1026 set_bit(MD_CHANGE_DEVS, &mddev->flags);
@@ -1171,6 +1171,14 @@ static int raid10_remove_disk(mddev_t *mddev, int number)
1171 err = -EBUSY; 1171 err = -EBUSY;
1172 goto abort; 1172 goto abort;
1173 } 1173 }
1174 /* Only remove faulty devices in recovery
1175 * is not possible.
1176 */
1177 if (!test_bit(Faulty, &rdev->flags) &&
1178 enough(conf)) {
1179 err = -EBUSY;
1180 goto abort;
1181 }
1174 p->rdev = NULL; 1182 p->rdev = NULL;
1175 synchronize_rcu(); 1183 synchronize_rcu();
1176 if (atomic_read(&rdev->nr_pending)) { 1184 if (atomic_read(&rdev->nr_pending)) {
@@ -1237,6 +1245,7 @@ static void end_sync_write(struct bio *bio, int error)
1237 1245
1238 if (!uptodate) 1246 if (!uptodate)
1239 md_error(mddev, conf->mirrors[d].rdev); 1247 md_error(mddev, conf->mirrors[d].rdev);
1248
1240 update_head_pos(i, r10_bio); 1249 update_head_pos(i, r10_bio);
1241 1250
1242 while (atomic_dec_and_test(&r10_bio->remaining)) { 1251 while (atomic_dec_and_test(&r10_bio->remaining)) {
@@ -1844,7 +1853,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1844 if (rb2) 1853 if (rb2)
1845 atomic_dec(&rb2->remaining); 1854 atomic_dec(&rb2->remaining);
1846 r10_bio = rb2; 1855 r10_bio = rb2;
1847 if (!test_and_set_bit(MD_RECOVERY_ERR, &mddev->recovery)) 1856 if (!test_and_set_bit(MD_RECOVERY_INTR,
1857 &mddev->recovery))
1848 printk(KERN_INFO "raid10: %s: insufficient working devices for recovery.\n", 1858 printk(KERN_INFO "raid10: %s: insufficient working devices for recovery.\n",
1849 mdname(mddev)); 1859 mdname(mddev));
1850 break; 1860 break;
@@ -2082,6 +2092,9 @@ static int run(mddev_t *mddev)
2082 goto out_free_conf; 2092 goto out_free_conf;
2083 } 2093 }
2084 2094
2095 spin_lock_init(&conf->device_lock);
2096 mddev->queue->queue_lock = &conf->device_lock;
2097
2085 rdev_for_each(rdev, tmp, mddev) { 2098 rdev_for_each(rdev, tmp, mddev) {
2086 disk_idx = rdev->raid_disk; 2099 disk_idx = rdev->raid_disk;
2087 if (disk_idx >= mddev->raid_disks 2100 if (disk_idx >= mddev->raid_disks
@@ -2103,7 +2116,6 @@ static int run(mddev_t *mddev)
2103 2116
2104 disk->head_position = 0; 2117 disk->head_position = 0;
2105 } 2118 }
2106 spin_lock_init(&conf->device_lock);
2107 INIT_LIST_HEAD(&conf->retry_list); 2119 INIT_LIST_HEAD(&conf->retry_list);
2108 2120
2109 spin_lock_init(&conf->resync_lock); 2121 spin_lock_init(&conf->resync_lock);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 087eee0cb809..c37e256b1176 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -94,6 +94,8 @@
94#define __inline__ 94#define __inline__
95#endif 95#endif
96 96
97#define printk_rl(args...) ((void) (printk_ratelimit() && printk(args)))
98
97#if !RAID6_USE_EMPTY_ZERO_PAGE 99#if !RAID6_USE_EMPTY_ZERO_PAGE
98/* In .bss so it's zeroed */ 100/* In .bss so it's zeroed */
99const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); 101const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
@@ -1143,10 +1145,12 @@ static void raid5_end_read_request(struct bio * bi, int error)
1143 set_bit(R5_UPTODATE, &sh->dev[i].flags); 1145 set_bit(R5_UPTODATE, &sh->dev[i].flags);
1144 if (test_bit(R5_ReadError, &sh->dev[i].flags)) { 1146 if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
1145 rdev = conf->disks[i].rdev; 1147 rdev = conf->disks[i].rdev;
1146 printk(KERN_INFO "raid5:%s: read error corrected (%lu sectors at %llu on %s)\n", 1148 printk_rl(KERN_INFO "raid5:%s: read error corrected"
1147 mdname(conf->mddev), STRIPE_SECTORS, 1149 " (%lu sectors at %llu on %s)\n",
1148 (unsigned long long)(sh->sector + rdev->data_offset), 1150 mdname(conf->mddev), STRIPE_SECTORS,
1149 bdevname(rdev->bdev, b)); 1151 (unsigned long long)(sh->sector
1152 + rdev->data_offset),
1153 bdevname(rdev->bdev, b));
1150 clear_bit(R5_ReadError, &sh->dev[i].flags); 1154 clear_bit(R5_ReadError, &sh->dev[i].flags);
1151 clear_bit(R5_ReWrite, &sh->dev[i].flags); 1155 clear_bit(R5_ReWrite, &sh->dev[i].flags);
1152 } 1156 }
@@ -1160,16 +1164,22 @@ static void raid5_end_read_request(struct bio * bi, int error)
1160 clear_bit(R5_UPTODATE, &sh->dev[i].flags); 1164 clear_bit(R5_UPTODATE, &sh->dev[i].flags);
1161 atomic_inc(&rdev->read_errors); 1165 atomic_inc(&rdev->read_errors);
1162 if (conf->mddev->degraded) 1166 if (conf->mddev->degraded)
1163 printk(KERN_WARNING "raid5:%s: read error not correctable (sector %llu on %s).\n", 1167 printk_rl(KERN_WARNING
1164 mdname(conf->mddev), 1168 "raid5:%s: read error not correctable "
1165 (unsigned long long)(sh->sector + rdev->data_offset), 1169 "(sector %llu on %s).\n",
1166 bdn); 1170 mdname(conf->mddev),
1171 (unsigned long long)(sh->sector
1172 + rdev->data_offset),
1173 bdn);
1167 else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) 1174 else if (test_bit(R5_ReWrite, &sh->dev[i].flags))
1168 /* Oh, no!!! */ 1175 /* Oh, no!!! */
1169 printk(KERN_WARNING "raid5:%s: read error NOT corrected!! (sector %llu on %s).\n", 1176 printk_rl(KERN_WARNING
1170 mdname(conf->mddev), 1177 "raid5:%s: read error NOT corrected!! "
1171 (unsigned long long)(sh->sector + rdev->data_offset), 1178 "(sector %llu on %s).\n",
1172 bdn); 1179 mdname(conf->mddev),
1180 (unsigned long long)(sh->sector
1181 + rdev->data_offset),
1182 bdn);
1173 else if (atomic_read(&rdev->read_errors) 1183 else if (atomic_read(&rdev->read_errors)
1174 > conf->max_nr_stripes) 1184 > conf->max_nr_stripes)
1175 printk(KERN_WARNING 1185 printk(KERN_WARNING
@@ -1258,7 +1268,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
1258 /* 1268 /*
1259 * if recovery was running, make sure it aborts. 1269 * if recovery was running, make sure it aborts.
1260 */ 1270 */
1261 set_bit(MD_RECOVERY_ERR, &mddev->recovery); 1271 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
1262 } 1272 }
1263 set_bit(Faulty, &rdev->flags); 1273 set_bit(Faulty, &rdev->flags);
1264 printk (KERN_ALERT 1274 printk (KERN_ALERT
@@ -1992,6 +2002,7 @@ static int __handle_issuing_new_read_requests5(struct stripe_head *sh,
1992 * have quiesced. 2002 * have quiesced.
1993 */ 2003 */
1994 if ((s->uptodate == disks - 1) && 2004 if ((s->uptodate == disks - 1) &&
2005 (s->failed && disk_idx == s->failed_num) &&
1995 !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) { 2006 !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
1996 set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); 2007 set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
1997 set_bit(R5_Wantcompute, &dev->flags); 2008 set_bit(R5_Wantcompute, &dev->flags);
@@ -2077,7 +2088,9 @@ static void handle_issuing_new_read_requests6(struct stripe_head *sh,
2077 /* we would like to get this block, possibly 2088 /* we would like to get this block, possibly
2078 * by computing it, but we might not be able to 2089 * by computing it, but we might not be able to
2079 */ 2090 */
2080 if (s->uptodate == disks-1) { 2091 if ((s->uptodate == disks - 1) &&
2092 (s->failed && (i == r6s->failed_num[0] ||
2093 i == r6s->failed_num[1]))) {
2081 pr_debug("Computing stripe %llu block %d\n", 2094 pr_debug("Computing stripe %llu block %d\n",
2082 (unsigned long long)sh->sector, i); 2095 (unsigned long long)sh->sector, i);
2083 compute_block_1(sh, i, 0); 2096 compute_block_1(sh, i, 0);
@@ -2369,8 +2382,8 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
2369 2382
2370 /* complete a check operation */ 2383 /* complete a check operation */
2371 if (test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) { 2384 if (test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) {
2372 clear_bit(STRIPE_OP_CHECK, &sh->ops.ack); 2385 clear_bit(STRIPE_OP_CHECK, &sh->ops.ack);
2373 clear_bit(STRIPE_OP_CHECK, &sh->ops.pending); 2386 clear_bit(STRIPE_OP_CHECK, &sh->ops.pending);
2374 if (s->failed == 0) { 2387 if (s->failed == 0) {
2375 if (sh->ops.zero_sum_result == 0) 2388 if (sh->ops.zero_sum_result == 0)
2376 /* parity is correct (on disc, 2389 /* parity is correct (on disc,
@@ -2400,16 +2413,6 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
2400 canceled_check = 1; /* STRIPE_INSYNC is not set */ 2413 canceled_check = 1; /* STRIPE_INSYNC is not set */
2401 } 2414 }
2402 2415
2403 /* check if we can clear a parity disk reconstruct */
2404 if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) &&
2405 test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
2406
2407 clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending);
2408 clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
2409 clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack);
2410 clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
2411 }
2412
2413 /* start a new check operation if there are no failures, the stripe is 2416 /* start a new check operation if there are no failures, the stripe is
2414 * not insync, and a repair is not in flight 2417 * not insync, and a repair is not in flight
2415 */ 2418 */
@@ -2424,6 +2427,17 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
2424 } 2427 }
2425 } 2428 }
2426 2429
2430 /* check if we can clear a parity disk reconstruct */
2431 if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) &&
2432 test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
2433
2434 clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending);
2435 clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
2436 clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack);
2437 clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
2438 }
2439
2440
2427 /* Wait for check parity and compute block operations to complete 2441 /* Wait for check parity and compute block operations to complete
2428 * before write-back. If a failure occurred while the check operation 2442 * before write-back. If a failure occurred while the check operation
2429 * was in flight we need to cycle this stripe through handle_stripe 2443 * was in flight we need to cycle this stripe through handle_stripe
@@ -2634,6 +2648,7 @@ static void handle_stripe5(struct stripe_head *sh)
2634 struct r5dev *dev; 2648 struct r5dev *dev;
2635 unsigned long pending = 0; 2649 unsigned long pending = 0;
2636 mdk_rdev_t *blocked_rdev = NULL; 2650 mdk_rdev_t *blocked_rdev = NULL;
2651 int prexor;
2637 2652
2638 memset(&s, 0, sizeof(s)); 2653 memset(&s, 0, sizeof(s));
2639 pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d " 2654 pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d "
@@ -2763,9 +2778,11 @@ static void handle_stripe5(struct stripe_head *sh)
2763 /* leave prexor set until postxor is done, allows us to distinguish 2778 /* leave prexor set until postxor is done, allows us to distinguish
2764 * a rmw from a rcw during biodrain 2779 * a rmw from a rcw during biodrain
2765 */ 2780 */
2781 prexor = 0;
2766 if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) && 2782 if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) &&
2767 test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) { 2783 test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {
2768 2784
2785 prexor = 1;
2769 clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete); 2786 clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete);
2770 clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack); 2787 clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack);
2771 clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending); 2788 clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
@@ -2799,6 +2816,8 @@ static void handle_stripe5(struct stripe_head *sh)
2799 if (!test_and_set_bit( 2816 if (!test_and_set_bit(
2800 STRIPE_OP_IO, &sh->ops.pending)) 2817 STRIPE_OP_IO, &sh->ops.pending))
2801 sh->ops.count++; 2818 sh->ops.count++;
2819 if (prexor)
2820 continue;
2802 if (!test_bit(R5_Insync, &dev->flags) || 2821 if (!test_bit(R5_Insync, &dev->flags) ||
2803 (i == sh->pd_idx && s.failed == 0)) 2822 (i == sh->pd_idx && s.failed == 0))
2804 set_bit(STRIPE_INSYNC, &sh->state); 2823 set_bit(STRIPE_INSYNC, &sh->state);
@@ -4256,6 +4275,7 @@ static int run(mddev_t *mddev)
4256 goto abort; 4275 goto abort;
4257 } 4276 }
4258 spin_lock_init(&conf->device_lock); 4277 spin_lock_init(&conf->device_lock);
4278 mddev->queue->queue_lock = &conf->device_lock;
4259 init_waitqueue_head(&conf->wait_for_stripe); 4279 init_waitqueue_head(&conf->wait_for_stripe);
4260 init_waitqueue_head(&conf->wait_for_overlap); 4280 init_waitqueue_head(&conf->wait_for_overlap);
4261 INIT_LIST_HEAD(&conf->handle_list); 4281 INIT_LIST_HEAD(&conf->handle_list);
@@ -4562,6 +4582,14 @@ static int raid5_remove_disk(mddev_t *mddev, int number)
4562 err = -EBUSY; 4582 err = -EBUSY;
4563 goto abort; 4583 goto abort;
4564 } 4584 }
4585 /* Only remove non-faulty devices if recovery
4586 * isn't possible.
4587 */
4588 if (!test_bit(Faulty, &rdev->flags) &&
4589 mddev->degraded <= conf->max_degraded) {
4590 err = -EBUSY;
4591 goto abort;
4592 }
4565 p->rdev = NULL; 4593 p->rdev = NULL;
4566 synchronize_rcu(); 4594 synchronize_rcu();
4567 if (atomic_read(&rdev->nr_pending)) { 4595 if (atomic_read(&rdev->nr_pending)) {