aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 20:30:20 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 20:30:20 -0500
commit509e4aef44eb10e4aef1f81c3c3ff1214671503b (patch)
treeb90daecbc3d05787b0dd141e524b67d8579a6dfd /drivers
parent375b6f5a40a3d8373561a3dbeed440a845e6d379 (diff)
parentbf2cb0dab8c97f00a71875d9b13dbac17a2f47ca (diff)
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: md: Fix removal of extra drives when converting RAID6 to RAID5 md: range check slot number when manually adding a spare. md/raid5: handle manually-added spares in start_reshape. md: fix sync_completed reporting for very large drives (>2TB) md: allow suspend_lo and suspend_hi to decrease as well as increase. md: Don't let implementation detail of curr_resync leak out through sysfs. md: separate meta and data devs md-new-param-to_sync_page_io md-new-param-to-calc_dev_sboffset md: Be more careful about clearing flags bit in ->recovery md: md_stop_writes requires mddev_lock. md/raid5: use sysfs_notify_dirent_safe to avoid NULL pointer md: Ensure no IO request to get md device before it is properly initialised. md: Fix single printks with multiple KERN_<level>s md: fix regression resulting in delays in clearing bits in a bitmap md: fix regression with re-adding devices to arrays with no metadata
Diffstat (limited to 'drivers')
-rw-r--r--drivers/md/bitmap.c12
-rw-r--r--drivers/md/md.c197
-rw-r--r--drivers/md/md.h13
-rw-r--r--drivers/md/raid1.c33
-rw-r--r--drivers/md/raid10.c17
-rw-r--r--drivers/md/raid5.c16
6 files changed, 172 insertions, 116 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 5a1ffe3527aa..9a35320fb59f 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -210,11 +210,11 @@ static struct page *read_sb_page(mddev_t *mddev, loff_t offset,
210 || test_bit(Faulty, &rdev->flags)) 210 || test_bit(Faulty, &rdev->flags))
211 continue; 211 continue;
212 212
213 target = rdev->sb_start + offset + index * (PAGE_SIZE/512); 213 target = offset + index * (PAGE_SIZE/512);
214 214
215 if (sync_page_io(rdev, target, 215 if (sync_page_io(rdev, target,
216 roundup(size, bdev_logical_block_size(rdev->bdev)), 216 roundup(size, bdev_logical_block_size(rdev->bdev)),
217 page, READ)) { 217 page, READ, true)) {
218 page->index = index; 218 page->index = index;
219 attach_page_buffers(page, NULL); /* so that free_buffer will 219 attach_page_buffers(page, NULL); /* so that free_buffer will
220 * quietly no-op */ 220 * quietly no-op */
@@ -264,14 +264,18 @@ static mdk_rdev_t *next_active_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
264static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) 264static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
265{ 265{
266 mdk_rdev_t *rdev = NULL; 266 mdk_rdev_t *rdev = NULL;
267 struct block_device *bdev;
267 mddev_t *mddev = bitmap->mddev; 268 mddev_t *mddev = bitmap->mddev;
268 269
269 while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { 270 while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
270 int size = PAGE_SIZE; 271 int size = PAGE_SIZE;
271 loff_t offset = mddev->bitmap_info.offset; 272 loff_t offset = mddev->bitmap_info.offset;
273
274 bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
275
272 if (page->index == bitmap->file_pages-1) 276 if (page->index == bitmap->file_pages-1)
273 size = roundup(bitmap->last_page_size, 277 size = roundup(bitmap->last_page_size,
274 bdev_logical_block_size(rdev->bdev)); 278 bdev_logical_block_size(bdev));
275 /* Just make sure we aren't corrupting data or 279 /* Just make sure we aren't corrupting data or
276 * metadata 280 * metadata
277 */ 281 */
@@ -1542,7 +1546,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
1542 wait_event(bitmap->mddev->recovery_wait, 1546 wait_event(bitmap->mddev->recovery_wait,
1543 atomic_read(&bitmap->mddev->recovery_active) == 0); 1547 atomic_read(&bitmap->mddev->recovery_active) == 0);
1544 1548
1545 bitmap->mddev->curr_resync_completed = bitmap->mddev->curr_resync; 1549 bitmap->mddev->curr_resync_completed = sector;
1546 set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags); 1550 set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags);
1547 sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1); 1551 sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1);
1548 s = 0; 1552 s = 0;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 7fc090ac9e28..cf8594c5ea21 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -288,10 +288,12 @@ static int md_make_request(struct request_queue *q, struct bio *bio)
288 int rv; 288 int rv;
289 int cpu; 289 int cpu;
290 290
291 if (mddev == NULL || mddev->pers == NULL) { 291 if (mddev == NULL || mddev->pers == NULL
292 || !mddev->ready) {
292 bio_io_error(bio); 293 bio_io_error(bio);
293 return 0; 294 return 0;
294 } 295 }
296 smp_rmb(); /* Ensure implications of 'active' are visible */
295 rcu_read_lock(); 297 rcu_read_lock();
296 if (mddev->suspended) { 298 if (mddev->suspended) {
297 DEFINE_WAIT(__wait); 299 DEFINE_WAIT(__wait);
@@ -703,9 +705,9 @@ static struct mdk_personality *find_pers(int level, char *clevel)
703} 705}
704 706
705/* return the offset of the super block in 512byte sectors */ 707/* return the offset of the super block in 512byte sectors */
706static inline sector_t calc_dev_sboffset(struct block_device *bdev) 708static inline sector_t calc_dev_sboffset(mdk_rdev_t *rdev)
707{ 709{
708 sector_t num_sectors = i_size_read(bdev->bd_inode) / 512; 710 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
709 return MD_NEW_SIZE_SECTORS(num_sectors); 711 return MD_NEW_SIZE_SECTORS(num_sectors);
710} 712}
711 713
@@ -763,7 +765,7 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
763 */ 765 */
764 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev); 766 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
765 767
766 bio->bi_bdev = rdev->bdev; 768 bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev;
767 bio->bi_sector = sector; 769 bio->bi_sector = sector;
768 bio_add_page(bio, page, size, 0); 770 bio_add_page(bio, page, size, 0);
769 bio->bi_private = rdev; 771 bio->bi_private = rdev;
@@ -793,7 +795,7 @@ static void bi_complete(struct bio *bio, int error)
793} 795}
794 796
795int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size, 797int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size,
796 struct page *page, int rw) 798 struct page *page, int rw, bool metadata_op)
797{ 799{
798 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev); 800 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
799 struct completion event; 801 struct completion event;
@@ -801,8 +803,12 @@ int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size,
801 803
802 rw |= REQ_SYNC | REQ_UNPLUG; 804 rw |= REQ_SYNC | REQ_UNPLUG;
803 805
804 bio->bi_bdev = rdev->bdev; 806 bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
805 bio->bi_sector = sector; 807 rdev->meta_bdev : rdev->bdev;
808 if (metadata_op)
809 bio->bi_sector = sector + rdev->sb_start;
810 else
811 bio->bi_sector = sector + rdev->data_offset;
806 bio_add_page(bio, page, size, 0); 812 bio_add_page(bio, page, size, 0);
807 init_completion(&event); 813 init_completion(&event);
808 bio->bi_private = &event; 814 bio->bi_private = &event;
@@ -827,7 +833,7 @@ static int read_disk_sb(mdk_rdev_t * rdev, int size)
827 return 0; 833 return 0;
828 834
829 835
830 if (!sync_page_io(rdev, rdev->sb_start, size, rdev->sb_page, READ)) 836 if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, true))
831 goto fail; 837 goto fail;
832 rdev->sb_loaded = 1; 838 rdev->sb_loaded = 1;
833 return 0; 839 return 0;
@@ -989,7 +995,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version
989 * 995 *
990 * It also happens to be a multiple of 4Kb. 996 * It also happens to be a multiple of 4Kb.
991 */ 997 */
992 rdev->sb_start = calc_dev_sboffset(rdev->bdev); 998 rdev->sb_start = calc_dev_sboffset(rdev);
993 999
994 ret = read_disk_sb(rdev, MD_SB_BYTES); 1000 ret = read_disk_sb(rdev, MD_SB_BYTES);
995 if (ret) return ret; 1001 if (ret) return ret;
@@ -1330,7 +1336,7 @@ super_90_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
1330 return 0; /* component must fit device */ 1336 return 0; /* component must fit device */
1331 if (rdev->mddev->bitmap_info.offset) 1337 if (rdev->mddev->bitmap_info.offset)
1332 return 0; /* can't move bitmap */ 1338 return 0; /* can't move bitmap */
1333 rdev->sb_start = calc_dev_sboffset(rdev->bdev); 1339 rdev->sb_start = calc_dev_sboffset(rdev);
1334 if (!num_sectors || num_sectors > rdev->sb_start) 1340 if (!num_sectors || num_sectors > rdev->sb_start)
1335 num_sectors = rdev->sb_start; 1341 num_sectors = rdev->sb_start;
1336 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size, 1342 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
@@ -2465,6 +2471,10 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2465 if (rdev2->raid_disk == slot) 2471 if (rdev2->raid_disk == slot)
2466 return -EEXIST; 2472 return -EEXIST;
2467 2473
2474 if (slot >= rdev->mddev->raid_disks &&
2475 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2476 return -ENOSPC;
2477
2468 rdev->raid_disk = slot; 2478 rdev->raid_disk = slot;
2469 if (test_bit(In_sync, &rdev->flags)) 2479 if (test_bit(In_sync, &rdev->flags))
2470 rdev->saved_raid_disk = slot; 2480 rdev->saved_raid_disk = slot;
@@ -2482,7 +2492,8 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2482 /* failure here is OK */; 2492 /* failure here is OK */;
2483 /* don't wakeup anyone, leave that to userspace. */ 2493 /* don't wakeup anyone, leave that to userspace. */
2484 } else { 2494 } else {
2485 if (slot >= rdev->mddev->raid_disks) 2495 if (slot >= rdev->mddev->raid_disks &&
2496 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2486 return -ENOSPC; 2497 return -ENOSPC;
2487 rdev->raid_disk = slot; 2498 rdev->raid_disk = slot;
2488 /* assume it is working */ 2499 /* assume it is working */
@@ -3107,7 +3118,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
3107 char nm[20]; 3118 char nm[20];
3108 if (rdev->raid_disk < 0) 3119 if (rdev->raid_disk < 0)
3109 continue; 3120 continue;
3110 if (rdev->new_raid_disk > mddev->raid_disks) 3121 if (rdev->new_raid_disk >= mddev->raid_disks)
3111 rdev->new_raid_disk = -1; 3122 rdev->new_raid_disk = -1;
3112 if (rdev->new_raid_disk == rdev->raid_disk) 3123 if (rdev->new_raid_disk == rdev->raid_disk)
3113 continue; 3124 continue;
@@ -3736,6 +3747,8 @@ action_show(mddev_t *mddev, char *page)
3736 return sprintf(page, "%s\n", type); 3747 return sprintf(page, "%s\n", type);
3737} 3748}
3738 3749
3750static void reap_sync_thread(mddev_t *mddev);
3751
3739static ssize_t 3752static ssize_t
3740action_store(mddev_t *mddev, const char *page, size_t len) 3753action_store(mddev_t *mddev, const char *page, size_t len)
3741{ 3754{
@@ -3750,9 +3763,7 @@ action_store(mddev_t *mddev, const char *page, size_t len)
3750 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) { 3763 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
3751 if (mddev->sync_thread) { 3764 if (mddev->sync_thread) {
3752 set_bit(MD_RECOVERY_INTR, &mddev->recovery); 3765 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
3753 md_unregister_thread(mddev->sync_thread); 3766 reap_sync_thread(mddev);
3754 mddev->sync_thread = NULL;
3755 mddev->recovery = 0;
3756 } 3767 }
3757 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || 3768 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3758 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) 3769 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
@@ -3904,7 +3915,7 @@ static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
3904static ssize_t 3915static ssize_t
3905sync_completed_show(mddev_t *mddev, char *page) 3916sync_completed_show(mddev_t *mddev, char *page)
3906{ 3917{
3907 unsigned long max_sectors, resync; 3918 unsigned long long max_sectors, resync;
3908 3919
3909 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) 3920 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
3910 return sprintf(page, "none\n"); 3921 return sprintf(page, "none\n");
@@ -3915,7 +3926,7 @@ sync_completed_show(mddev_t *mddev, char *page)
3915 max_sectors = mddev->dev_sectors; 3926 max_sectors = mddev->dev_sectors;
3916 3927
3917 resync = mddev->curr_resync_completed; 3928 resync = mddev->curr_resync_completed;
3918 return sprintf(page, "%lu / %lu\n", resync, max_sectors); 3929 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
3919} 3930}
3920 3931
3921static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed); 3932static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
@@ -4002,19 +4013,24 @@ suspend_lo_store(mddev_t *mddev, const char *buf, size_t len)
4002{ 4013{
4003 char *e; 4014 char *e;
4004 unsigned long long new = simple_strtoull(buf, &e, 10); 4015 unsigned long long new = simple_strtoull(buf, &e, 10);
4016 unsigned long long old = mddev->suspend_lo;
4005 4017
4006 if (mddev->pers == NULL || 4018 if (mddev->pers == NULL ||
4007 mddev->pers->quiesce == NULL) 4019 mddev->pers->quiesce == NULL)
4008 return -EINVAL; 4020 return -EINVAL;
4009 if (buf == e || (*e && *e != '\n')) 4021 if (buf == e || (*e && *e != '\n'))
4010 return -EINVAL; 4022 return -EINVAL;
4011 if (new >= mddev->suspend_hi || 4023
4012 (new > mddev->suspend_lo && new < mddev->suspend_hi)) { 4024 mddev->suspend_lo = new;
4013 mddev->suspend_lo = new; 4025 if (new >= old)
4026 /* Shrinking suspended region */
4014 mddev->pers->quiesce(mddev, 2); 4027 mddev->pers->quiesce(mddev, 2);
4015 return len; 4028 else {
4016 } else 4029 /* Expanding suspended region - need to wait */
4017 return -EINVAL; 4030 mddev->pers->quiesce(mddev, 1);
4031 mddev->pers->quiesce(mddev, 0);
4032 }
4033 return len;
4018} 4034}
4019static struct md_sysfs_entry md_suspend_lo = 4035static struct md_sysfs_entry md_suspend_lo =
4020__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store); 4036__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
@@ -4031,20 +4047,24 @@ suspend_hi_store(mddev_t *mddev, const char *buf, size_t len)
4031{ 4047{
4032 char *e; 4048 char *e;
4033 unsigned long long new = simple_strtoull(buf, &e, 10); 4049 unsigned long long new = simple_strtoull(buf, &e, 10);
4050 unsigned long long old = mddev->suspend_hi;
4034 4051
4035 if (mddev->pers == NULL || 4052 if (mddev->pers == NULL ||
4036 mddev->pers->quiesce == NULL) 4053 mddev->pers->quiesce == NULL)
4037 return -EINVAL; 4054 return -EINVAL;
4038 if (buf == e || (*e && *e != '\n')) 4055 if (buf == e || (*e && *e != '\n'))
4039 return -EINVAL; 4056 return -EINVAL;
4040 if ((new <= mddev->suspend_lo && mddev->suspend_lo >= mddev->suspend_hi) || 4057
4041 (new > mddev->suspend_lo && new > mddev->suspend_hi)) { 4058 mddev->suspend_hi = new;
4042 mddev->suspend_hi = new; 4059 if (new <= old)
4060 /* Shrinking suspended region */
4061 mddev->pers->quiesce(mddev, 2);
4062 else {
4063 /* Expanding suspended region - need to wait */
4043 mddev->pers->quiesce(mddev, 1); 4064 mddev->pers->quiesce(mddev, 1);
4044 mddev->pers->quiesce(mddev, 0); 4065 mddev->pers->quiesce(mddev, 0);
4045 return len; 4066 }
4046 } else 4067 return len;
4047 return -EINVAL;
4048} 4068}
4049static struct md_sysfs_entry md_suspend_hi = 4069static struct md_sysfs_entry md_suspend_hi =
4050__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store); 4070__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
@@ -4422,7 +4442,9 @@ int md_run(mddev_t *mddev)
4422 * We don't want the data to overlap the metadata, 4442 * We don't want the data to overlap the metadata,
4423 * Internal Bitmap issues have been handled elsewhere. 4443 * Internal Bitmap issues have been handled elsewhere.
4424 */ 4444 */
4425 if (rdev->data_offset < rdev->sb_start) { 4445 if (rdev->meta_bdev) {
4446 /* Nothing to check */;
4447 } else if (rdev->data_offset < rdev->sb_start) {
4426 if (mddev->dev_sectors && 4448 if (mddev->dev_sectors &&
4427 rdev->data_offset + mddev->dev_sectors 4449 rdev->data_offset + mddev->dev_sectors
4428 > rdev->sb_start) { 4450 > rdev->sb_start) {
@@ -4556,7 +4578,8 @@ int md_run(mddev_t *mddev)
4556 mddev->safemode_timer.data = (unsigned long) mddev; 4578 mddev->safemode_timer.data = (unsigned long) mddev;
4557 mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */ 4579 mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */
4558 mddev->in_sync = 1; 4580 mddev->in_sync = 1;
4559 4581 smp_wmb();
4582 mddev->ready = 1;
4560 list_for_each_entry(rdev, &mddev->disks, same_set) 4583 list_for_each_entry(rdev, &mddev->disks, same_set)
4561 if (rdev->raid_disk >= 0) { 4584 if (rdev->raid_disk >= 0) {
4562 char nm[20]; 4585 char nm[20];
@@ -4693,13 +4716,12 @@ static void md_clean(mddev_t *mddev)
4693 mddev->plug = NULL; 4716 mddev->plug = NULL;
4694} 4717}
4695 4718
4696void md_stop_writes(mddev_t *mddev) 4719static void __md_stop_writes(mddev_t *mddev)
4697{ 4720{
4698 if (mddev->sync_thread) { 4721 if (mddev->sync_thread) {
4699 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 4722 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4700 set_bit(MD_RECOVERY_INTR, &mddev->recovery); 4723 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4701 md_unregister_thread(mddev->sync_thread); 4724 reap_sync_thread(mddev);
4702 mddev->sync_thread = NULL;
4703 } 4725 }
4704 4726
4705 del_timer_sync(&mddev->safemode_timer); 4727 del_timer_sync(&mddev->safemode_timer);
@@ -4713,10 +4735,18 @@ void md_stop_writes(mddev_t *mddev)
4713 md_update_sb(mddev, 1); 4735 md_update_sb(mddev, 1);
4714 } 4736 }
4715} 4737}
4738
4739void md_stop_writes(mddev_t *mddev)
4740{
4741 mddev_lock(mddev);
4742 __md_stop_writes(mddev);
4743 mddev_unlock(mddev);
4744}
4716EXPORT_SYMBOL_GPL(md_stop_writes); 4745EXPORT_SYMBOL_GPL(md_stop_writes);
4717 4746
4718void md_stop(mddev_t *mddev) 4747void md_stop(mddev_t *mddev)
4719{ 4748{
4749 mddev->ready = 0;
4720 mddev->pers->stop(mddev); 4750 mddev->pers->stop(mddev);
4721 if (mddev->pers->sync_request && mddev->to_remove == NULL) 4751 if (mddev->pers->sync_request && mddev->to_remove == NULL)
4722 mddev->to_remove = &md_redundancy_group; 4752 mddev->to_remove = &md_redundancy_group;
@@ -4736,7 +4766,7 @@ static int md_set_readonly(mddev_t *mddev, int is_open)
4736 goto out; 4766 goto out;
4737 } 4767 }
4738 if (mddev->pers) { 4768 if (mddev->pers) {
4739 md_stop_writes(mddev); 4769 __md_stop_writes(mddev);
4740 4770
4741 err = -ENXIO; 4771 err = -ENXIO;
4742 if (mddev->ro==1) 4772 if (mddev->ro==1)
@@ -4773,7 +4803,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4773 if (mddev->ro) 4803 if (mddev->ro)
4774 set_disk_ro(disk, 0); 4804 set_disk_ro(disk, 0);
4775 4805
4776 md_stop_writes(mddev); 4806 __md_stop_writes(mddev);
4777 md_stop(mddev); 4807 md_stop(mddev);
4778 mddev->queue->merge_bvec_fn = NULL; 4808 mddev->queue->merge_bvec_fn = NULL;
4779 mddev->queue->unplug_fn = NULL; 4809 mddev->queue->unplug_fn = NULL;
@@ -5151,9 +5181,10 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
5151 /* set saved_raid_disk if appropriate */ 5181 /* set saved_raid_disk if appropriate */
5152 if (!mddev->persistent) { 5182 if (!mddev->persistent) {
5153 if (info->state & (1<<MD_DISK_SYNC) && 5183 if (info->state & (1<<MD_DISK_SYNC) &&
5154 info->raid_disk < mddev->raid_disks) 5184 info->raid_disk < mddev->raid_disks) {
5155 rdev->raid_disk = info->raid_disk; 5185 rdev->raid_disk = info->raid_disk;
5156 else 5186 set_bit(In_sync, &rdev->flags);
5187 } else
5157 rdev->raid_disk = -1; 5188 rdev->raid_disk = -1;
5158 } else 5189 } else
5159 super_types[mddev->major_version]. 5190 super_types[mddev->major_version].
@@ -5230,7 +5261,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
5230 printk(KERN_INFO "md: nonpersistent superblock ...\n"); 5261 printk(KERN_INFO "md: nonpersistent superblock ...\n");
5231 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512; 5262 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
5232 } else 5263 } else
5233 rdev->sb_start = calc_dev_sboffset(rdev->bdev); 5264 rdev->sb_start = calc_dev_sboffset(rdev);
5234 rdev->sectors = rdev->sb_start; 5265 rdev->sectors = rdev->sb_start;
5235 5266
5236 err = bind_rdev_to_array(rdev, mddev); 5267 err = bind_rdev_to_array(rdev, mddev);
@@ -5297,7 +5328,7 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev)
5297 } 5328 }
5298 5329
5299 if (mddev->persistent) 5330 if (mddev->persistent)
5300 rdev->sb_start = calc_dev_sboffset(rdev->bdev); 5331 rdev->sb_start = calc_dev_sboffset(rdev);
5301 else 5332 else
5302 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512; 5333 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
5303 5334
@@ -5510,7 +5541,6 @@ static int update_size(mddev_t *mddev, sector_t num_sectors)
5510 * sb_start or, if that is <data_offset, it must fit before the size 5541 * sb_start or, if that is <data_offset, it must fit before the size
5511 * of each device. If num_sectors is zero, we find the largest size 5542 * of each device. If num_sectors is zero, we find the largest size
5512 * that fits. 5543 * that fits.
5513
5514 */ 5544 */
5515 if (mddev->sync_thread) 5545 if (mddev->sync_thread)
5516 return -EBUSY; 5546 return -EBUSY;
@@ -6033,7 +6063,8 @@ static int md_thread(void * arg)
6033 || kthread_should_stop(), 6063 || kthread_should_stop(),
6034 thread->timeout); 6064 thread->timeout);
6035 6065
6036 if (test_and_clear_bit(THREAD_WAKEUP, &thread->flags)) 6066 clear_bit(THREAD_WAKEUP, &thread->flags);
6067 if (!kthread_should_stop())
6037 thread->run(thread->mddev); 6068 thread->run(thread->mddev);
6038 } 6069 }
6039 6070
@@ -6799,7 +6830,7 @@ void md_do_sync(mddev_t *mddev)
6799 desc, mdname(mddev)); 6830 desc, mdname(mddev));
6800 mddev->curr_resync = j; 6831 mddev->curr_resync = j;
6801 } 6832 }
6802 mddev->curr_resync_completed = mddev->curr_resync; 6833 mddev->curr_resync_completed = j;
6803 6834
6804 while (j < max_sectors) { 6835 while (j < max_sectors) {
6805 sector_t sectors; 6836 sector_t sectors;
@@ -6817,8 +6848,7 @@ void md_do_sync(mddev_t *mddev)
6817 md_unplug(mddev); 6848 md_unplug(mddev);
6818 wait_event(mddev->recovery_wait, 6849 wait_event(mddev->recovery_wait,
6819 atomic_read(&mddev->recovery_active) == 0); 6850 atomic_read(&mddev->recovery_active) == 0);
6820 mddev->curr_resync_completed = 6851 mddev->curr_resync_completed = j;
6821 mddev->curr_resync;
6822 set_bit(MD_CHANGE_CLEAN, &mddev->flags); 6852 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
6823 sysfs_notify(&mddev->kobj, NULL, "sync_completed"); 6853 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
6824 } 6854 }
@@ -7023,6 +7053,45 @@ static int remove_and_add_spares(mddev_t *mddev)
7023 } 7053 }
7024 return spares; 7054 return spares;
7025} 7055}
7056
7057static void reap_sync_thread(mddev_t *mddev)
7058{
7059 mdk_rdev_t *rdev;
7060
7061 /* resync has finished, collect result */
7062 md_unregister_thread(mddev->sync_thread);
7063 mddev->sync_thread = NULL;
7064 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
7065 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
7066 /* success...*/
7067 /* activate any spares */
7068 if (mddev->pers->spare_active(mddev))
7069 sysfs_notify(&mddev->kobj, NULL,
7070 "degraded");
7071 }
7072 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
7073 mddev->pers->finish_reshape)
7074 mddev->pers->finish_reshape(mddev);
7075 md_update_sb(mddev, 1);
7076
7077 /* if array is no-longer degraded, then any saved_raid_disk
7078 * information must be scrapped
7079 */
7080 if (!mddev->degraded)
7081 list_for_each_entry(rdev, &mddev->disks, same_set)
7082 rdev->saved_raid_disk = -1;
7083
7084 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7085 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7086 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7087 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7088 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7089 /* flag recovery needed just to double check */
7090 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7091 sysfs_notify_dirent_safe(mddev->sysfs_action);
7092 md_new_event(mddev);
7093}
7094
7026/* 7095/*
7027 * This routine is regularly called by all per-raid-array threads to 7096 * This routine is regularly called by all per-raid-array threads to
7028 * deal with generic issues like resync and super-block update. 7097 * deal with generic issues like resync and super-block update.
@@ -7047,9 +7116,6 @@ static int remove_and_add_spares(mddev_t *mddev)
7047 */ 7116 */
7048void md_check_recovery(mddev_t *mddev) 7117void md_check_recovery(mddev_t *mddev)
7049{ 7118{
7050 mdk_rdev_t *rdev;
7051
7052
7053 if (mddev->bitmap) 7119 if (mddev->bitmap)
7054 bitmap_daemon_work(mddev); 7120 bitmap_daemon_work(mddev);
7055 7121
@@ -7117,34 +7183,7 @@ void md_check_recovery(mddev_t *mddev)
7117 goto unlock; 7183 goto unlock;
7118 } 7184 }
7119 if (mddev->sync_thread) { 7185 if (mddev->sync_thread) {
7120 /* resync has finished, collect result */ 7186 reap_sync_thread(mddev);
7121 md_unregister_thread(mddev->sync_thread);
7122 mddev->sync_thread = NULL;
7123 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
7124 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
7125 /* success...*/
7126 /* activate any spares */
7127 if (mddev->pers->spare_active(mddev))
7128 sysfs_notify(&mddev->kobj, NULL,
7129 "degraded");
7130 }
7131 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
7132 mddev->pers->finish_reshape)
7133 mddev->pers->finish_reshape(mddev);
7134 md_update_sb(mddev, 1);
7135
7136 /* if array is no-longer degraded, then any saved_raid_disk
7137 * information must be scrapped
7138 */
7139 if (!mddev->degraded)
7140 list_for_each_entry(rdev, &mddev->disks, same_set)
7141 rdev->saved_raid_disk = -1;
7142
7143 mddev->recovery = 0;
7144 /* flag recovery needed just to double check */
7145 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7146 sysfs_notify_dirent_safe(mddev->sysfs_action);
7147 md_new_event(mddev);
7148 goto unlock; 7187 goto unlock;
7149 } 7188 }
7150 /* Set RUNNING before clearing NEEDED to avoid 7189 /* Set RUNNING before clearing NEEDED to avoid
@@ -7202,7 +7241,11 @@ void md_check_recovery(mddev_t *mddev)
7202 " thread...\n", 7241 " thread...\n",
7203 mdname(mddev)); 7242 mdname(mddev));
7204 /* leave the spares where they are, it shouldn't hurt */ 7243 /* leave the spares where they are, it shouldn't hurt */
7205 mddev->recovery = 0; 7244 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7245 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7246 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7247 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7248 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7206 } else 7249 } else
7207 md_wakeup_thread(mddev->sync_thread); 7250 md_wakeup_thread(mddev->sync_thread);
7208 sysfs_notify_dirent_safe(mddev->sysfs_action); 7251 sysfs_notify_dirent_safe(mddev->sysfs_action);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index d05bab55df4e..eec517ced31a 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -60,6 +60,12 @@ struct mdk_rdev_s
60 mddev_t *mddev; /* RAID array if running */ 60 mddev_t *mddev; /* RAID array if running */
61 int last_events; /* IO event timestamp */ 61 int last_events; /* IO event timestamp */
62 62
63 /*
64 * If meta_bdev is non-NULL, it means that a separate device is
65 * being used to store the metadata (superblock/bitmap) which
66 * would otherwise be contained on the same device as the data (bdev).
67 */
68 struct block_device *meta_bdev;
63 struct block_device *bdev; /* block device handle */ 69 struct block_device *bdev; /* block device handle */
64 70
65 struct page *sb_page; 71 struct page *sb_page;
@@ -148,7 +154,8 @@ struct mddev_s
148 * are happening, so run/ 154 * are happening, so run/
149 * takeover/stop are not safe 155 * takeover/stop are not safe
150 */ 156 */
151 157 int ready; /* See when safe to pass
158 * IO requests down */
152 struct gendisk *gendisk; 159 struct gendisk *gendisk;
153 160
154 struct kobject kobj; 161 struct kobject kobj;
@@ -497,8 +504,8 @@ extern void md_flush_request(mddev_t *mddev, struct bio *bio);
497extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, 504extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
498 sector_t sector, int size, struct page *page); 505 sector_t sector, int size, struct page *page);
499extern void md_super_wait(mddev_t *mddev); 506extern void md_super_wait(mddev_t *mddev);
500extern int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size, 507extern int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size,
501 struct page *page, int rw); 508 struct page *page, int rw, bool metadata_op);
502extern void md_do_sync(mddev_t *mddev); 509extern void md_do_sync(mddev_t *mddev);
503extern void md_new_event(mddev_t *mddev); 510extern void md_new_event(mddev_t *mddev);
504extern int md_allow_write(mddev_t *mddev); 511extern int md_allow_write(mddev_t *mddev);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 845cf95b612c..a23ffa397ba9 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1027,8 +1027,9 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
1027 } else 1027 } else
1028 set_bit(Faulty, &rdev->flags); 1028 set_bit(Faulty, &rdev->flags);
1029 set_bit(MD_CHANGE_DEVS, &mddev->flags); 1029 set_bit(MD_CHANGE_DEVS, &mddev->flags);
1030 printk(KERN_ALERT "md/raid1:%s: Disk failure on %s, disabling device.\n" 1030 printk(KERN_ALERT
1031 KERN_ALERT "md/raid1:%s: Operation continuing on %d devices.\n", 1031 "md/raid1:%s: Disk failure on %s, disabling device.\n"
1032 "md/raid1:%s: Operation continuing on %d devices.\n",
1032 mdname(mddev), bdevname(rdev->bdev, b), 1033 mdname(mddev), bdevname(rdev->bdev, b),
1033 mdname(mddev), conf->raid_disks - mddev->degraded); 1034 mdname(mddev), conf->raid_disks - mddev->degraded);
1034} 1035}
@@ -1364,10 +1365,10 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
1364 */ 1365 */
1365 rdev = conf->mirrors[d].rdev; 1366 rdev = conf->mirrors[d].rdev;
1366 if (sync_page_io(rdev, 1367 if (sync_page_io(rdev,
1367 sect + rdev->data_offset, 1368 sect,
1368 s<<9, 1369 s<<9,
1369 bio->bi_io_vec[idx].bv_page, 1370 bio->bi_io_vec[idx].bv_page,
1370 READ)) { 1371 READ, false)) {
1371 success = 1; 1372 success = 1;
1372 break; 1373 break;
1373 } 1374 }
@@ -1390,10 +1391,10 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
1390 rdev = conf->mirrors[d].rdev; 1391 rdev = conf->mirrors[d].rdev;
1391 atomic_add(s, &rdev->corrected_errors); 1392 atomic_add(s, &rdev->corrected_errors);
1392 if (sync_page_io(rdev, 1393 if (sync_page_io(rdev,
1393 sect + rdev->data_offset, 1394 sect,
1394 s<<9, 1395 s<<9,
1395 bio->bi_io_vec[idx].bv_page, 1396 bio->bi_io_vec[idx].bv_page,
1396 WRITE) == 0) 1397 WRITE, false) == 0)
1397 md_error(mddev, rdev); 1398 md_error(mddev, rdev);
1398 } 1399 }
1399 d = start; 1400 d = start;
@@ -1405,10 +1406,10 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
1405 continue; 1406 continue;
1406 rdev = conf->mirrors[d].rdev; 1407 rdev = conf->mirrors[d].rdev;
1407 if (sync_page_io(rdev, 1408 if (sync_page_io(rdev,
1408 sect + rdev->data_offset, 1409 sect,
1409 s<<9, 1410 s<<9,
1410 bio->bi_io_vec[idx].bv_page, 1411 bio->bi_io_vec[idx].bv_page,
1411 READ) == 0) 1412 READ, false) == 0)
1412 md_error(mddev, rdev); 1413 md_error(mddev, rdev);
1413 } 1414 }
1414 } else { 1415 } else {
@@ -1488,10 +1489,8 @@ static void fix_read_error(conf_t *conf, int read_disk,
1488 rdev = conf->mirrors[d].rdev; 1489 rdev = conf->mirrors[d].rdev;
1489 if (rdev && 1490 if (rdev &&
1490 test_bit(In_sync, &rdev->flags) && 1491 test_bit(In_sync, &rdev->flags) &&
1491 sync_page_io(rdev, 1492 sync_page_io(rdev, sect, s<<9,
1492 sect + rdev->data_offset, 1493 conf->tmppage, READ, false))
1493 s<<9,
1494 conf->tmppage, READ))
1495 success = 1; 1494 success = 1;
1496 else { 1495 else {
1497 d++; 1496 d++;
@@ -1514,9 +1513,8 @@ static void fix_read_error(conf_t *conf, int read_disk,
1514 rdev = conf->mirrors[d].rdev; 1513 rdev = conf->mirrors[d].rdev;
1515 if (rdev && 1514 if (rdev &&
1516 test_bit(In_sync, &rdev->flags)) { 1515 test_bit(In_sync, &rdev->flags)) {
1517 if (sync_page_io(rdev, 1516 if (sync_page_io(rdev, sect, s<<9,
1518 sect + rdev->data_offset, 1517 conf->tmppage, WRITE, false)
1519 s<<9, conf->tmppage, WRITE)
1520 == 0) 1518 == 0)
1521 /* Well, this device is dead */ 1519 /* Well, this device is dead */
1522 md_error(mddev, rdev); 1520 md_error(mddev, rdev);
@@ -1531,9 +1529,8 @@ static void fix_read_error(conf_t *conf, int read_disk,
1531 rdev = conf->mirrors[d].rdev; 1529 rdev = conf->mirrors[d].rdev;
1532 if (rdev && 1530 if (rdev &&
1533 test_bit(In_sync, &rdev->flags)) { 1531 test_bit(In_sync, &rdev->flags)) {
1534 if (sync_page_io(rdev, 1532 if (sync_page_io(rdev, sect, s<<9,
1535 sect + rdev->data_offset, 1533 conf->tmppage, READ, false)
1536 s<<9, conf->tmppage, READ)
1537 == 0) 1534 == 0)
1538 /* Well, this device is dead */ 1535 /* Well, this device is dead */
1539 md_error(mddev, rdev); 1536 md_error(mddev, rdev);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 0641674827f0..69b659544390 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1051,8 +1051,9 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
1051 } 1051 }
1052 set_bit(Faulty, &rdev->flags); 1052 set_bit(Faulty, &rdev->flags);
1053 set_bit(MD_CHANGE_DEVS, &mddev->flags); 1053 set_bit(MD_CHANGE_DEVS, &mddev->flags);
1054 printk(KERN_ALERT "md/raid10:%s: Disk failure on %s, disabling device.\n" 1054 printk(KERN_ALERT
1055 KERN_ALERT "md/raid10:%s: Operation continuing on %d devices.\n", 1055 "md/raid10:%s: Disk failure on %s, disabling device.\n"
1056 "md/raid10:%s: Operation continuing on %d devices.\n",
1056 mdname(mddev), bdevname(rdev->bdev, b), 1057 mdname(mddev), bdevname(rdev->bdev, b),
1057 mdname(mddev), conf->raid_disks - mddev->degraded); 1058 mdname(mddev), conf->raid_disks - mddev->degraded);
1058} 1059}
@@ -1559,9 +1560,9 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
1559 rcu_read_unlock(); 1560 rcu_read_unlock();
1560 success = sync_page_io(rdev, 1561 success = sync_page_io(rdev,
1561 r10_bio->devs[sl].addr + 1562 r10_bio->devs[sl].addr +
1562 sect + rdev->data_offset, 1563 sect,
1563 s<<9, 1564 s<<9,
1564 conf->tmppage, READ); 1565 conf->tmppage, READ, false);
1565 rdev_dec_pending(rdev, mddev); 1566 rdev_dec_pending(rdev, mddev);
1566 rcu_read_lock(); 1567 rcu_read_lock();
1567 if (success) 1568 if (success)
@@ -1598,8 +1599,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
1598 atomic_add(s, &rdev->corrected_errors); 1599 atomic_add(s, &rdev->corrected_errors);
1599 if (sync_page_io(rdev, 1600 if (sync_page_io(rdev,
1600 r10_bio->devs[sl].addr + 1601 r10_bio->devs[sl].addr +
1601 sect + rdev->data_offset, 1602 sect,
1602 s<<9, conf->tmppage, WRITE) 1603 s<<9, conf->tmppage, WRITE, false)
1603 == 0) { 1604 == 0) {
1604 /* Well, this device is dead */ 1605 /* Well, this device is dead */
1605 printk(KERN_NOTICE 1606 printk(KERN_NOTICE
@@ -1635,9 +1636,9 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
1635 rcu_read_unlock(); 1636 rcu_read_unlock();
1636 if (sync_page_io(rdev, 1637 if (sync_page_io(rdev,
1637 r10_bio->devs[sl].addr + 1638 r10_bio->devs[sl].addr +
1638 sect + rdev->data_offset, 1639 sect,
1639 s<<9, conf->tmppage, 1640 s<<9, conf->tmppage,
1640 READ) == 0) { 1641 READ, false) == 0) {
1641 /* Well, this device is dead */ 1642 /* Well, this device is dead */
1642 printk(KERN_NOTICE 1643 printk(KERN_NOTICE
1643 "md/raid10:%s: unable to read back " 1644 "md/raid10:%s: unable to read back "
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index dc574f303f8b..5044babfcda0 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1721,7 +1721,6 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
1721 set_bit(Faulty, &rdev->flags); 1721 set_bit(Faulty, &rdev->flags);
1722 printk(KERN_ALERT 1722 printk(KERN_ALERT
1723 "md/raid:%s: Disk failure on %s, disabling device.\n" 1723 "md/raid:%s: Disk failure on %s, disabling device.\n"
1724 KERN_ALERT
1725 "md/raid:%s: Operation continuing on %d devices.\n", 1724 "md/raid:%s: Operation continuing on %d devices.\n",
1726 mdname(mddev), 1725 mdname(mddev),
1727 bdevname(rdev->bdev, b), 1726 bdevname(rdev->bdev, b),
@@ -4237,7 +4236,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
4237 wait_event(conf->wait_for_overlap, 4236 wait_event(conf->wait_for_overlap,
4238 atomic_read(&conf->reshape_stripes)==0); 4237 atomic_read(&conf->reshape_stripes)==0);
4239 mddev->reshape_position = conf->reshape_progress; 4238 mddev->reshape_position = conf->reshape_progress;
4240 mddev->curr_resync_completed = mddev->curr_resync; 4239 mddev->curr_resync_completed = sector_nr;
4241 conf->reshape_checkpoint = jiffies; 4240 conf->reshape_checkpoint = jiffies;
4242 set_bit(MD_CHANGE_DEVS, &mddev->flags); 4241 set_bit(MD_CHANGE_DEVS, &mddev->flags);
4243 md_wakeup_thread(mddev->thread); 4242 md_wakeup_thread(mddev->thread);
@@ -4338,7 +4337,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
4338 wait_event(conf->wait_for_overlap, 4337 wait_event(conf->wait_for_overlap,
4339 atomic_read(&conf->reshape_stripes) == 0); 4338 atomic_read(&conf->reshape_stripes) == 0);
4340 mddev->reshape_position = conf->reshape_progress; 4339 mddev->reshape_position = conf->reshape_progress;
4341 mddev->curr_resync_completed = mddev->curr_resync + reshape_sectors; 4340 mddev->curr_resync_completed = sector_nr;
4342 conf->reshape_checkpoint = jiffies; 4341 conf->reshape_checkpoint = jiffies;
4343 set_bit(MD_CHANGE_DEVS, &mddev->flags); 4342 set_bit(MD_CHANGE_DEVS, &mddev->flags);
4344 md_wakeup_thread(mddev->thread); 4343 md_wakeup_thread(mddev->thread);
@@ -5339,7 +5338,7 @@ static int raid5_spare_active(mddev_t *mddev)
5339 && !test_bit(Faulty, &tmp->rdev->flags) 5338 && !test_bit(Faulty, &tmp->rdev->flags)
5340 && !test_and_set_bit(In_sync, &tmp->rdev->flags)) { 5339 && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
5341 count++; 5340 count++;
5342 sysfs_notify_dirent(tmp->rdev->sysfs_state); 5341 sysfs_notify_dirent_safe(tmp->rdev->sysfs_state);
5343 } 5342 }
5344 } 5343 }
5345 spin_lock_irqsave(&conf->device_lock, flags); 5344 spin_lock_irqsave(&conf->device_lock, flags);
@@ -5528,8 +5527,8 @@ static int raid5_start_reshape(mddev_t *mddev)
5528 return -ENOSPC; 5527 return -ENOSPC;
5529 5528
5530 list_for_each_entry(rdev, &mddev->disks, same_set) 5529 list_for_each_entry(rdev, &mddev->disks, same_set)
5531 if (rdev->raid_disk < 0 && 5530 if ((rdev->raid_disk < 0 || rdev->raid_disk >= conf->raid_disks)
5532 !test_bit(Faulty, &rdev->flags)) 5531 && !test_bit(Faulty, &rdev->flags))
5533 spares++; 5532 spares++;
5534 5533
5535 if (spares - mddev->degraded < mddev->delta_disks - conf->max_degraded) 5534 if (spares - mddev->degraded < mddev->delta_disks - conf->max_degraded)
@@ -5589,6 +5588,11 @@ static int raid5_start_reshape(mddev_t *mddev)
5589 /* Failure here is OK */; 5588 /* Failure here is OK */;
5590 } else 5589 } else
5591 break; 5590 break;
5591 } else if (rdev->raid_disk >= conf->previous_raid_disks
5592 && !test_bit(Faulty, &rdev->flags)) {
5593 /* This is a spare that was manually added */
5594 set_bit(In_sync, &rdev->flags);
5595 added_devices++;
5592 } 5596 }
5593 5597
5594 /* When a reshape changes the number of devices, ->degraded 5598 /* When a reshape changes the number of devices, ->degraded