aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2013-12-11 18:13:33 -0500
committerNeilBrown <neilb@suse.de>2014-01-14 00:44:08 -0500
commit8313b8e57f55b15e5b7f7fc5d1630bbf686a9a97 (patch)
tree80ba177fd5a1566c593f1cd8a963b9ecd0191225 /drivers
parente8b849158508565e0cd6bc80061124afc5879160 (diff)
md: fix problem when adding device to read-only array with bitmap.
If an array is started degraded, and then the missing device is found it can be re-added and a minimal bitmap-based recovery will bring it fully up-to-date. If the array is read-only a recovery would not be allowed. But also if the array is read-only and the missing device was present very recently, then there could be no need for any recovery at all, so we simply include the device in the read-only array without any recovery. However... if the missing device was removed a little longer ago it could be missing some updates, but if a bitmap is present it will be conditionally accepted pending a bitmap-based update. We don't currently detect this case properly and will include that old device into the read-only array with no recovery even though it really needs a recovery. This patch keeps track of whether a bitmap-based-recovery is really needed or not in the new Bitmap_sync rdev flag. If that is set, then the device will not be added to a read-only array. Cc: Andrei Warkentin <andreiw@vmware.com> Fixes: d70ed2e4fafdbef0800e73942482bb075c21578b Cc: stable@vger.kernel.org (3.2+) Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/md/md.c18
-rw-r--r--drivers/md/md.h3
2 files changed, 18 insertions, 3 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index e60cebf3f519..2a456a5d59a8 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1087,6 +1087,7 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1087 rdev->raid_disk = -1; 1087 rdev->raid_disk = -1;
1088 clear_bit(Faulty, &rdev->flags); 1088 clear_bit(Faulty, &rdev->flags);
1089 clear_bit(In_sync, &rdev->flags); 1089 clear_bit(In_sync, &rdev->flags);
1090 clear_bit(Bitmap_sync, &rdev->flags);
1090 clear_bit(WriteMostly, &rdev->flags); 1091 clear_bit(WriteMostly, &rdev->flags);
1091 1092
1092 if (mddev->raid_disks == 0) { 1093 if (mddev->raid_disks == 0) {
@@ -1165,6 +1166,8 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1165 */ 1166 */
1166 if (ev1 < mddev->bitmap->events_cleared) 1167 if (ev1 < mddev->bitmap->events_cleared)
1167 return 0; 1168 return 0;
1169 if (ev1 < mddev->events)
1170 set_bit(Bitmap_sync, &rdev->flags);
1168 } else { 1171 } else {
1169 if (ev1 < mddev->events) 1172 if (ev1 < mddev->events)
1170 /* just a hot-add of a new device, leave raid_disk at -1 */ 1173 /* just a hot-add of a new device, leave raid_disk at -1 */
@@ -1573,6 +1576,7 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1573 rdev->raid_disk = -1; 1576 rdev->raid_disk = -1;
1574 clear_bit(Faulty, &rdev->flags); 1577 clear_bit(Faulty, &rdev->flags);
1575 clear_bit(In_sync, &rdev->flags); 1578 clear_bit(In_sync, &rdev->flags);
1579 clear_bit(Bitmap_sync, &rdev->flags);
1576 clear_bit(WriteMostly, &rdev->flags); 1580 clear_bit(WriteMostly, &rdev->flags);
1577 1581
1578 if (mddev->raid_disks == 0) { 1582 if (mddev->raid_disks == 0) {
@@ -1655,6 +1659,8 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1655 */ 1659 */
1656 if (ev1 < mddev->bitmap->events_cleared) 1660 if (ev1 < mddev->bitmap->events_cleared)
1657 return 0; 1661 return 0;
1662 if (ev1 < mddev->events)
1663 set_bit(Bitmap_sync, &rdev->flags);
1658 } else { 1664 } else {
1659 if (ev1 < mddev->events) 1665 if (ev1 < mddev->events)
1660 /* just a hot-add of a new device, leave raid_disk at -1 */ 1666 /* just a hot-add of a new device, leave raid_disk at -1 */
@@ -2798,6 +2804,7 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
2798 else 2804 else
2799 rdev->saved_raid_disk = -1; 2805 rdev->saved_raid_disk = -1;
2800 clear_bit(In_sync, &rdev->flags); 2806 clear_bit(In_sync, &rdev->flags);
2807 clear_bit(Bitmap_sync, &rdev->flags);
2801 err = rdev->mddev->pers-> 2808 err = rdev->mddev->pers->
2802 hot_add_disk(rdev->mddev, rdev); 2809 hot_add_disk(rdev->mddev, rdev);
2803 if (err) { 2810 if (err) {
@@ -5770,6 +5777,7 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
5770 info->raid_disk < mddev->raid_disks) { 5777 info->raid_disk < mddev->raid_disks) {
5771 rdev->raid_disk = info->raid_disk; 5778 rdev->raid_disk = info->raid_disk;
5772 set_bit(In_sync, &rdev->flags); 5779 set_bit(In_sync, &rdev->flags);
5780 clear_bit(Bitmap_sync, &rdev->flags);
5773 } else 5781 } else
5774 rdev->raid_disk = -1; 5782 rdev->raid_disk = -1;
5775 } else 5783 } else
@@ -7716,7 +7724,8 @@ static int remove_and_add_spares(struct mddev *mddev,
7716 if (test_bit(Faulty, &rdev->flags)) 7724 if (test_bit(Faulty, &rdev->flags))
7717 continue; 7725 continue;
7718 if (mddev->ro && 7726 if (mddev->ro &&
7719 rdev->saved_raid_disk < 0) 7727 ! (rdev->saved_raid_disk >= 0 &&
7728 !test_bit(Bitmap_sync, &rdev->flags)))
7720 continue; 7729 continue;
7721 7730
7722 rdev->recovery_offset = 0; 7731 rdev->recovery_offset = 0;
@@ -7797,9 +7806,12 @@ void md_check_recovery(struct mddev *mddev)
7797 * As we only add devices that are already in-sync, 7806 * As we only add devices that are already in-sync,
7798 * we can activate the spares immediately. 7807 * we can activate the spares immediately.
7799 */ 7808 */
7800 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7801 remove_and_add_spares(mddev, NULL); 7809 remove_and_add_spares(mddev, NULL);
7802 mddev->pers->spare_active(mddev); 7810 /* There is no thread, but we need to call
7811 * ->spare_active and clear saved_raid_disk
7812 */
7813 md_reap_sync_thread(mddev);
7814 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7803 goto unlock; 7815 goto unlock;
7804 } 7816 }
7805 7817
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 2f5cc8a7ef3e..0095ec84ffc7 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -129,6 +129,9 @@ struct md_rdev {
129enum flag_bits { 129enum flag_bits {
130 Faulty, /* device is known to have a fault */ 130 Faulty, /* device is known to have a fault */
131 In_sync, /* device is in_sync with rest of array */ 131 In_sync, /* device is in_sync with rest of array */
132 Bitmap_sync, /* ..actually, not quite In_sync. Need a
133 * bitmap-based recovery to get fully in sync
134 */
132 Unmerged, /* device is being added to array and should 135 Unmerged, /* device is being added to array and should
133 * be considerred for bvec_merge_fn but not 136 * be considerred for bvec_merge_fn but not
134 * yet for actual IO 137 * yet for actual IO