aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/md.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 20:30:20 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 20:30:20 -0500
commit509e4aef44eb10e4aef1f81c3c3ff1214671503b (patch)
treeb90daecbc3d05787b0dd141e524b67d8579a6dfd /drivers/md/md.c
parent375b6f5a40a3d8373561a3dbeed440a845e6d379 (diff)
parentbf2cb0dab8c97f00a71875d9b13dbac17a2f47ca (diff)
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: md: Fix removal of extra drives when converting RAID6 to RAID5 md: range check slot number when manually adding a spare. md/raid5: handle manually-added spares in start_reshape. md: fix sync_completed reporting for very large drives (>2TB) md: allow suspend_lo and suspend_hi to decrease as well as increase. md: Don't let implementation detail of curr_resync leak out through sysfs. md: separate meta and data devs md-new-param-to_sync_page_io md-new-param-to-calc_dev_sboffset md: Be more careful about clearing flags bit in ->recovery md: md_stop_writes requires mddev_lock. md/raid5: use sysfs_notify_dirent_safe to avoid NULL pointer md: Ensure no IO request to get md device before it is properly initialised. md: Fix single printks with multiple KERN_<level>s md: fix regression resulting in delays in clearing bits in a bitmap md: fix regression with re-adding devices to arrays with no metadata
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c197
1 files changed, 120 insertions, 77 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 7fc090ac9e28..cf8594c5ea21 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -288,10 +288,12 @@ static int md_make_request(struct request_queue *q, struct bio *bio)
288 int rv; 288 int rv;
289 int cpu; 289 int cpu;
290 290
291 if (mddev == NULL || mddev->pers == NULL) { 291 if (mddev == NULL || mddev->pers == NULL
292 || !mddev->ready) {
292 bio_io_error(bio); 293 bio_io_error(bio);
293 return 0; 294 return 0;
294 } 295 }
296 smp_rmb(); /* Ensure implications of 'active' are visible */
295 rcu_read_lock(); 297 rcu_read_lock();
296 if (mddev->suspended) { 298 if (mddev->suspended) {
297 DEFINE_WAIT(__wait); 299 DEFINE_WAIT(__wait);
@@ -703,9 +705,9 @@ static struct mdk_personality *find_pers(int level, char *clevel)
703} 705}
704 706
705/* return the offset of the super block in 512byte sectors */ 707/* return the offset of the super block in 512byte sectors */
706static inline sector_t calc_dev_sboffset(struct block_device *bdev) 708static inline sector_t calc_dev_sboffset(mdk_rdev_t *rdev)
707{ 709{
708 sector_t num_sectors = i_size_read(bdev->bd_inode) / 512; 710 sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
709 return MD_NEW_SIZE_SECTORS(num_sectors); 711 return MD_NEW_SIZE_SECTORS(num_sectors);
710} 712}
711 713
@@ -763,7 +765,7 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
763 */ 765 */
764 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev); 766 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
765 767
766 bio->bi_bdev = rdev->bdev; 768 bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev;
767 bio->bi_sector = sector; 769 bio->bi_sector = sector;
768 bio_add_page(bio, page, size, 0); 770 bio_add_page(bio, page, size, 0);
769 bio->bi_private = rdev; 771 bio->bi_private = rdev;
@@ -793,7 +795,7 @@ static void bi_complete(struct bio *bio, int error)
793} 795}
794 796
795int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size, 797int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size,
796 struct page *page, int rw) 798 struct page *page, int rw, bool metadata_op)
797{ 799{
798 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev); 800 struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
799 struct completion event; 801 struct completion event;
@@ -801,8 +803,12 @@ int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size,
801 803
802 rw |= REQ_SYNC | REQ_UNPLUG; 804 rw |= REQ_SYNC | REQ_UNPLUG;
803 805
804 bio->bi_bdev = rdev->bdev; 806 bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
805 bio->bi_sector = sector; 807 rdev->meta_bdev : rdev->bdev;
808 if (metadata_op)
809 bio->bi_sector = sector + rdev->sb_start;
810 else
811 bio->bi_sector = sector + rdev->data_offset;
806 bio_add_page(bio, page, size, 0); 812 bio_add_page(bio, page, size, 0);
807 init_completion(&event); 813 init_completion(&event);
808 bio->bi_private = &event; 814 bio->bi_private = &event;
@@ -827,7 +833,7 @@ static int read_disk_sb(mdk_rdev_t * rdev, int size)
827 return 0; 833 return 0;
828 834
829 835
830 if (!sync_page_io(rdev, rdev->sb_start, size, rdev->sb_page, READ)) 836 if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, true))
831 goto fail; 837 goto fail;
832 rdev->sb_loaded = 1; 838 rdev->sb_loaded = 1;
833 return 0; 839 return 0;
@@ -989,7 +995,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version
989 * 995 *
990 * It also happens to be a multiple of 4Kb. 996 * It also happens to be a multiple of 4Kb.
991 */ 997 */
992 rdev->sb_start = calc_dev_sboffset(rdev->bdev); 998 rdev->sb_start = calc_dev_sboffset(rdev);
993 999
994 ret = read_disk_sb(rdev, MD_SB_BYTES); 1000 ret = read_disk_sb(rdev, MD_SB_BYTES);
995 if (ret) return ret; 1001 if (ret) return ret;
@@ -1330,7 +1336,7 @@ super_90_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
1330 return 0; /* component must fit device */ 1336 return 0; /* component must fit device */
1331 if (rdev->mddev->bitmap_info.offset) 1337 if (rdev->mddev->bitmap_info.offset)
1332 return 0; /* can't move bitmap */ 1338 return 0; /* can't move bitmap */
1333 rdev->sb_start = calc_dev_sboffset(rdev->bdev); 1339 rdev->sb_start = calc_dev_sboffset(rdev);
1334 if (!num_sectors || num_sectors > rdev->sb_start) 1340 if (!num_sectors || num_sectors > rdev->sb_start)
1335 num_sectors = rdev->sb_start; 1341 num_sectors = rdev->sb_start;
1336 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size, 1342 md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
@@ -2465,6 +2471,10 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2465 if (rdev2->raid_disk == slot) 2471 if (rdev2->raid_disk == slot)
2466 return -EEXIST; 2472 return -EEXIST;
2467 2473
2474 if (slot >= rdev->mddev->raid_disks &&
2475 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2476 return -ENOSPC;
2477
2468 rdev->raid_disk = slot; 2478 rdev->raid_disk = slot;
2469 if (test_bit(In_sync, &rdev->flags)) 2479 if (test_bit(In_sync, &rdev->flags))
2470 rdev->saved_raid_disk = slot; 2480 rdev->saved_raid_disk = slot;
@@ -2482,7 +2492,8 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
2482 /* failure here is OK */; 2492 /* failure here is OK */;
2483 /* don't wakeup anyone, leave that to userspace. */ 2493 /* don't wakeup anyone, leave that to userspace. */
2484 } else { 2494 } else {
2485 if (slot >= rdev->mddev->raid_disks) 2495 if (slot >= rdev->mddev->raid_disks &&
2496 slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
2486 return -ENOSPC; 2497 return -ENOSPC;
2487 rdev->raid_disk = slot; 2498 rdev->raid_disk = slot;
2488 /* assume it is working */ 2499 /* assume it is working */
@@ -3107,7 +3118,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
3107 char nm[20]; 3118 char nm[20];
3108 if (rdev->raid_disk < 0) 3119 if (rdev->raid_disk < 0)
3109 continue; 3120 continue;
3110 if (rdev->new_raid_disk > mddev->raid_disks) 3121 if (rdev->new_raid_disk >= mddev->raid_disks)
3111 rdev->new_raid_disk = -1; 3122 rdev->new_raid_disk = -1;
3112 if (rdev->new_raid_disk == rdev->raid_disk) 3123 if (rdev->new_raid_disk == rdev->raid_disk)
3113 continue; 3124 continue;
@@ -3736,6 +3747,8 @@ action_show(mddev_t *mddev, char *page)
3736 return sprintf(page, "%s\n", type); 3747 return sprintf(page, "%s\n", type);
3737} 3748}
3738 3749
3750static void reap_sync_thread(mddev_t *mddev);
3751
3739static ssize_t 3752static ssize_t
3740action_store(mddev_t *mddev, const char *page, size_t len) 3753action_store(mddev_t *mddev, const char *page, size_t len)
3741{ 3754{
@@ -3750,9 +3763,7 @@ action_store(mddev_t *mddev, const char *page, size_t len)
3750 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) { 3763 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
3751 if (mddev->sync_thread) { 3764 if (mddev->sync_thread) {
3752 set_bit(MD_RECOVERY_INTR, &mddev->recovery); 3765 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
3753 md_unregister_thread(mddev->sync_thread); 3766 reap_sync_thread(mddev);
3754 mddev->sync_thread = NULL;
3755 mddev->recovery = 0;
3756 } 3767 }
3757 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || 3768 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3758 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) 3769 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
@@ -3904,7 +3915,7 @@ static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
3904static ssize_t 3915static ssize_t
3905sync_completed_show(mddev_t *mddev, char *page) 3916sync_completed_show(mddev_t *mddev, char *page)
3906{ 3917{
3907 unsigned long max_sectors, resync; 3918 unsigned long long max_sectors, resync;
3908 3919
3909 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) 3920 if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
3910 return sprintf(page, "none\n"); 3921 return sprintf(page, "none\n");
@@ -3915,7 +3926,7 @@ sync_completed_show(mddev_t *mddev, char *page)
3915 max_sectors = mddev->dev_sectors; 3926 max_sectors = mddev->dev_sectors;
3916 3927
3917 resync = mddev->curr_resync_completed; 3928 resync = mddev->curr_resync_completed;
3918 return sprintf(page, "%lu / %lu\n", resync, max_sectors); 3929 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
3919} 3930}
3920 3931
3921static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed); 3932static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
@@ -4002,19 +4013,24 @@ suspend_lo_store(mddev_t *mddev, const char *buf, size_t len)
4002{ 4013{
4003 char *e; 4014 char *e;
4004 unsigned long long new = simple_strtoull(buf, &e, 10); 4015 unsigned long long new = simple_strtoull(buf, &e, 10);
4016 unsigned long long old = mddev->suspend_lo;
4005 4017
4006 if (mddev->pers == NULL || 4018 if (mddev->pers == NULL ||
4007 mddev->pers->quiesce == NULL) 4019 mddev->pers->quiesce == NULL)
4008 return -EINVAL; 4020 return -EINVAL;
4009 if (buf == e || (*e && *e != '\n')) 4021 if (buf == e || (*e && *e != '\n'))
4010 return -EINVAL; 4022 return -EINVAL;
4011 if (new >= mddev->suspend_hi || 4023
4012 (new > mddev->suspend_lo && new < mddev->suspend_hi)) { 4024 mddev->suspend_lo = new;
4013 mddev->suspend_lo = new; 4025 if (new >= old)
4026 /* Shrinking suspended region */
4014 mddev->pers->quiesce(mddev, 2); 4027 mddev->pers->quiesce(mddev, 2);
4015 return len; 4028 else {
4016 } else 4029 /* Expanding suspended region - need to wait */
4017 return -EINVAL; 4030 mddev->pers->quiesce(mddev, 1);
4031 mddev->pers->quiesce(mddev, 0);
4032 }
4033 return len;
4018} 4034}
4019static struct md_sysfs_entry md_suspend_lo = 4035static struct md_sysfs_entry md_suspend_lo =
4020__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store); 4036__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
@@ -4031,20 +4047,24 @@ suspend_hi_store(mddev_t *mddev, const char *buf, size_t len)
4031{ 4047{
4032 char *e; 4048 char *e;
4033 unsigned long long new = simple_strtoull(buf, &e, 10); 4049 unsigned long long new = simple_strtoull(buf, &e, 10);
4050 unsigned long long old = mddev->suspend_hi;
4034 4051
4035 if (mddev->pers == NULL || 4052 if (mddev->pers == NULL ||
4036 mddev->pers->quiesce == NULL) 4053 mddev->pers->quiesce == NULL)
4037 return -EINVAL; 4054 return -EINVAL;
4038 if (buf == e || (*e && *e != '\n')) 4055 if (buf == e || (*e && *e != '\n'))
4039 return -EINVAL; 4056 return -EINVAL;
4040 if ((new <= mddev->suspend_lo && mddev->suspend_lo >= mddev->suspend_hi) || 4057
4041 (new > mddev->suspend_lo && new > mddev->suspend_hi)) { 4058 mddev->suspend_hi = new;
4042 mddev->suspend_hi = new; 4059 if (new <= old)
4060 /* Shrinking suspended region */
4061 mddev->pers->quiesce(mddev, 2);
4062 else {
4063 /* Expanding suspended region - need to wait */
4043 mddev->pers->quiesce(mddev, 1); 4064 mddev->pers->quiesce(mddev, 1);
4044 mddev->pers->quiesce(mddev, 0); 4065 mddev->pers->quiesce(mddev, 0);
4045 return len; 4066 }
4046 } else 4067 return len;
4047 return -EINVAL;
4048} 4068}
4049static struct md_sysfs_entry md_suspend_hi = 4069static struct md_sysfs_entry md_suspend_hi =
4050__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store); 4070__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
@@ -4422,7 +4442,9 @@ int md_run(mddev_t *mddev)
4422 * We don't want the data to overlap the metadata, 4442 * We don't want the data to overlap the metadata,
4423 * Internal Bitmap issues have been handled elsewhere. 4443 * Internal Bitmap issues have been handled elsewhere.
4424 */ 4444 */
4425 if (rdev->data_offset < rdev->sb_start) { 4445 if (rdev->meta_bdev) {
4446 /* Nothing to check */;
4447 } else if (rdev->data_offset < rdev->sb_start) {
4426 if (mddev->dev_sectors && 4448 if (mddev->dev_sectors &&
4427 rdev->data_offset + mddev->dev_sectors 4449 rdev->data_offset + mddev->dev_sectors
4428 > rdev->sb_start) { 4450 > rdev->sb_start) {
@@ -4556,7 +4578,8 @@ int md_run(mddev_t *mddev)
4556 mddev->safemode_timer.data = (unsigned long) mddev; 4578 mddev->safemode_timer.data = (unsigned long) mddev;
4557 mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */ 4579 mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */
4558 mddev->in_sync = 1; 4580 mddev->in_sync = 1;
4559 4581 smp_wmb();
4582 mddev->ready = 1;
4560 list_for_each_entry(rdev, &mddev->disks, same_set) 4583 list_for_each_entry(rdev, &mddev->disks, same_set)
4561 if (rdev->raid_disk >= 0) { 4584 if (rdev->raid_disk >= 0) {
4562 char nm[20]; 4585 char nm[20];
@@ -4693,13 +4716,12 @@ static void md_clean(mddev_t *mddev)
4693 mddev->plug = NULL; 4716 mddev->plug = NULL;
4694} 4717}
4695 4718
4696void md_stop_writes(mddev_t *mddev) 4719static void __md_stop_writes(mddev_t *mddev)
4697{ 4720{
4698 if (mddev->sync_thread) { 4721 if (mddev->sync_thread) {
4699 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 4722 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4700 set_bit(MD_RECOVERY_INTR, &mddev->recovery); 4723 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4701 md_unregister_thread(mddev->sync_thread); 4724 reap_sync_thread(mddev);
4702 mddev->sync_thread = NULL;
4703 } 4725 }
4704 4726
4705 del_timer_sync(&mddev->safemode_timer); 4727 del_timer_sync(&mddev->safemode_timer);
@@ -4713,10 +4735,18 @@ void md_stop_writes(mddev_t *mddev)
4713 md_update_sb(mddev, 1); 4735 md_update_sb(mddev, 1);
4714 } 4736 }
4715} 4737}
4738
4739void md_stop_writes(mddev_t *mddev)
4740{
4741 mddev_lock(mddev);
4742 __md_stop_writes(mddev);
4743 mddev_unlock(mddev);
4744}
4716EXPORT_SYMBOL_GPL(md_stop_writes); 4745EXPORT_SYMBOL_GPL(md_stop_writes);
4717 4746
4718void md_stop(mddev_t *mddev) 4747void md_stop(mddev_t *mddev)
4719{ 4748{
4749 mddev->ready = 0;
4720 mddev->pers->stop(mddev); 4750 mddev->pers->stop(mddev);
4721 if (mddev->pers->sync_request && mddev->to_remove == NULL) 4751 if (mddev->pers->sync_request && mddev->to_remove == NULL)
4722 mddev->to_remove = &md_redundancy_group; 4752 mddev->to_remove = &md_redundancy_group;
@@ -4736,7 +4766,7 @@ static int md_set_readonly(mddev_t *mddev, int is_open)
4736 goto out; 4766 goto out;
4737 } 4767 }
4738 if (mddev->pers) { 4768 if (mddev->pers) {
4739 md_stop_writes(mddev); 4769 __md_stop_writes(mddev);
4740 4770
4741 err = -ENXIO; 4771 err = -ENXIO;
4742 if (mddev->ro==1) 4772 if (mddev->ro==1)
@@ -4773,7 +4803,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4773 if (mddev->ro) 4803 if (mddev->ro)
4774 set_disk_ro(disk, 0); 4804 set_disk_ro(disk, 0);
4775 4805
4776 md_stop_writes(mddev); 4806 __md_stop_writes(mddev);
4777 md_stop(mddev); 4807 md_stop(mddev);
4778 mddev->queue->merge_bvec_fn = NULL; 4808 mddev->queue->merge_bvec_fn = NULL;
4779 mddev->queue->unplug_fn = NULL; 4809 mddev->queue->unplug_fn = NULL;
@@ -5151,9 +5181,10 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
5151 /* set saved_raid_disk if appropriate */ 5181 /* set saved_raid_disk if appropriate */
5152 if (!mddev->persistent) { 5182 if (!mddev->persistent) {
5153 if (info->state & (1<<MD_DISK_SYNC) && 5183 if (info->state & (1<<MD_DISK_SYNC) &&
5154 info->raid_disk < mddev->raid_disks) 5184 info->raid_disk < mddev->raid_disks) {
5155 rdev->raid_disk = info->raid_disk; 5185 rdev->raid_disk = info->raid_disk;
5156 else 5186 set_bit(In_sync, &rdev->flags);
5187 } else
5157 rdev->raid_disk = -1; 5188 rdev->raid_disk = -1;
5158 } else 5189 } else
5159 super_types[mddev->major_version]. 5190 super_types[mddev->major_version].
@@ -5230,7 +5261,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
5230 printk(KERN_INFO "md: nonpersistent superblock ...\n"); 5261 printk(KERN_INFO "md: nonpersistent superblock ...\n");
5231 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512; 5262 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
5232 } else 5263 } else
5233 rdev->sb_start = calc_dev_sboffset(rdev->bdev); 5264 rdev->sb_start = calc_dev_sboffset(rdev);
5234 rdev->sectors = rdev->sb_start; 5265 rdev->sectors = rdev->sb_start;
5235 5266
5236 err = bind_rdev_to_array(rdev, mddev); 5267 err = bind_rdev_to_array(rdev, mddev);
@@ -5297,7 +5328,7 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev)
5297 } 5328 }
5298 5329
5299 if (mddev->persistent) 5330 if (mddev->persistent)
5300 rdev->sb_start = calc_dev_sboffset(rdev->bdev); 5331 rdev->sb_start = calc_dev_sboffset(rdev);
5301 else 5332 else
5302 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512; 5333 rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
5303 5334
@@ -5510,7 +5541,6 @@ static int update_size(mddev_t *mddev, sector_t num_sectors)
5510 * sb_start or, if that is <data_offset, it must fit before the size 5541 * sb_start or, if that is <data_offset, it must fit before the size
5511 * of each device. If num_sectors is zero, we find the largest size 5542 * of each device. If num_sectors is zero, we find the largest size
5512 * that fits. 5543 * that fits.
5513
5514 */ 5544 */
5515 if (mddev->sync_thread) 5545 if (mddev->sync_thread)
5516 return -EBUSY; 5546 return -EBUSY;
@@ -6033,7 +6063,8 @@ static int md_thread(void * arg)
6033 || kthread_should_stop(), 6063 || kthread_should_stop(),
6034 thread->timeout); 6064 thread->timeout);
6035 6065
6036 if (test_and_clear_bit(THREAD_WAKEUP, &thread->flags)) 6066 clear_bit(THREAD_WAKEUP, &thread->flags);
6067 if (!kthread_should_stop())
6037 thread->run(thread->mddev); 6068 thread->run(thread->mddev);
6038 } 6069 }
6039 6070
@@ -6799,7 +6830,7 @@ void md_do_sync(mddev_t *mddev)
6799 desc, mdname(mddev)); 6830 desc, mdname(mddev));
6800 mddev->curr_resync = j; 6831 mddev->curr_resync = j;
6801 } 6832 }
6802 mddev->curr_resync_completed = mddev->curr_resync; 6833 mddev->curr_resync_completed = j;
6803 6834
6804 while (j < max_sectors) { 6835 while (j < max_sectors) {
6805 sector_t sectors; 6836 sector_t sectors;
@@ -6817,8 +6848,7 @@ void md_do_sync(mddev_t *mddev)
6817 md_unplug(mddev); 6848 md_unplug(mddev);
6818 wait_event(mddev->recovery_wait, 6849 wait_event(mddev->recovery_wait,
6819 atomic_read(&mddev->recovery_active) == 0); 6850 atomic_read(&mddev->recovery_active) == 0);
6820 mddev->curr_resync_completed = 6851 mddev->curr_resync_completed = j;
6821 mddev->curr_resync;
6822 set_bit(MD_CHANGE_CLEAN, &mddev->flags); 6852 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
6823 sysfs_notify(&mddev->kobj, NULL, "sync_completed"); 6853 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
6824 } 6854 }
@@ -7023,6 +7053,45 @@ static int remove_and_add_spares(mddev_t *mddev)
7023 } 7053 }
7024 return spares; 7054 return spares;
7025} 7055}
7056
7057static void reap_sync_thread(mddev_t *mddev)
7058{
7059 mdk_rdev_t *rdev;
7060
7061 /* resync has finished, collect result */
7062 md_unregister_thread(mddev->sync_thread);
7063 mddev->sync_thread = NULL;
7064 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
7065 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
7066 /* success...*/
7067 /* activate any spares */
7068 if (mddev->pers->spare_active(mddev))
7069 sysfs_notify(&mddev->kobj, NULL,
7070 "degraded");
7071 }
7072 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
7073 mddev->pers->finish_reshape)
7074 mddev->pers->finish_reshape(mddev);
7075 md_update_sb(mddev, 1);
7076
7077 /* if array is no-longer degraded, then any saved_raid_disk
7078 * information must be scrapped
7079 */
7080 if (!mddev->degraded)
7081 list_for_each_entry(rdev, &mddev->disks, same_set)
7082 rdev->saved_raid_disk = -1;
7083
7084 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7085 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7086 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7087 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7088 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7089 /* flag recovery needed just to double check */
7090 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7091 sysfs_notify_dirent_safe(mddev->sysfs_action);
7092 md_new_event(mddev);
7093}
7094
7026/* 7095/*
7027 * This routine is regularly called by all per-raid-array threads to 7096 * This routine is regularly called by all per-raid-array threads to
7028 * deal with generic issues like resync and super-block update. 7097 * deal with generic issues like resync and super-block update.
@@ -7047,9 +7116,6 @@ static int remove_and_add_spares(mddev_t *mddev)
7047 */ 7116 */
7048void md_check_recovery(mddev_t *mddev) 7117void md_check_recovery(mddev_t *mddev)
7049{ 7118{
7050 mdk_rdev_t *rdev;
7051
7052
7053 if (mddev->bitmap) 7119 if (mddev->bitmap)
7054 bitmap_daemon_work(mddev); 7120 bitmap_daemon_work(mddev);
7055 7121
@@ -7117,34 +7183,7 @@ void md_check_recovery(mddev_t *mddev)
7117 goto unlock; 7183 goto unlock;
7118 } 7184 }
7119 if (mddev->sync_thread) { 7185 if (mddev->sync_thread) {
7120 /* resync has finished, collect result */ 7186 reap_sync_thread(mddev);
7121 md_unregister_thread(mddev->sync_thread);
7122 mddev->sync_thread = NULL;
7123 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
7124 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
7125 /* success...*/
7126 /* activate any spares */
7127 if (mddev->pers->spare_active(mddev))
7128 sysfs_notify(&mddev->kobj, NULL,
7129 "degraded");
7130 }
7131 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
7132 mddev->pers->finish_reshape)
7133 mddev->pers->finish_reshape(mddev);
7134 md_update_sb(mddev, 1);
7135
7136 /* if array is no-longer degraded, then any saved_raid_disk
7137 * information must be scrapped
7138 */
7139 if (!mddev->degraded)
7140 list_for_each_entry(rdev, &mddev->disks, same_set)
7141 rdev->saved_raid_disk = -1;
7142
7143 mddev->recovery = 0;
7144 /* flag recovery needed just to double check */
7145 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7146 sysfs_notify_dirent_safe(mddev->sysfs_action);
7147 md_new_event(mddev);
7148 goto unlock; 7187 goto unlock;
7149 } 7188 }
7150 /* Set RUNNING before clearing NEEDED to avoid 7189 /* Set RUNNING before clearing NEEDED to avoid
@@ -7202,7 +7241,11 @@ void md_check_recovery(mddev_t *mddev)
7202 " thread...\n", 7241 " thread...\n",
7203 mdname(mddev)); 7242 mdname(mddev));
7204 /* leave the spares where they are, it shouldn't hurt */ 7243 /* leave the spares where they are, it shouldn't hurt */
7205 mddev->recovery = 0; 7244 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7245 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7246 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7247 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7248 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7206 } else 7249 } else
7207 md_wakeup_thread(mddev->sync_thread); 7250 md_wakeup_thread(mddev->sync_thread);
7208 sysfs_notify_dirent_safe(mddev->sysfs_action); 7251 sysfs_notify_dirent_safe(mddev->sysfs_action);