diff options
author | NeilBrown <neilb@suse.de> | 2013-12-11 18:13:33 -0500 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2014-01-25 11:27:12 -0500 |
commit | bb4a65df3097524be403d455176710aee14d41f7 (patch) | |
tree | 51b1817c03b7799797b3c17f719d5acd75d50518 /drivers/md | |
parent | ba8ee5985099dbd8f01ac5dcc7ad8ddf23056e87 (diff) |
md: fix problem when adding device to read-only array with bitmap.
commit 8313b8e57f55b15e5b7f7fc5d1630bbf686a9a97 upstream.
If an array is started degraded, and then the missing device
is found it can be re-added and a minimal bitmap-based recovery
will bring it fully up-to-date.
If the array is read-only a recovery would not be allowed.
But also if the array is read-only and the missing device was
present very recently, then there could be no need for any
recovery at all, so we simply include the device in the read-only
array without any recovery.
However... if the missing device was removed a little longer ago
it could be missing some updates, but if a bitmap is present it will
be conditionally accepted pending a bitmap-based update. We don't
currently detect this case properly and will include that old
device into the read-only array with no recovery even though it really
needs a recovery.
This patch keeps track of whether a bitmap-based-recovery is really
needed or not in the new Bitmap_sync rdev flag. If that is set,
then the device will not be added to a read-only array.
Cc: Andrei Warkentin <andreiw@vmware.com>
Fixes: d70ed2e4fafdbef0800e73942482bb075c21578b
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/md.c | 18 | ||||
-rw-r--r-- | drivers/md/md.h | 3 |
2 files changed, 18 insertions, 3 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 2290b95009de..a2dda416c9cb 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -1118,6 +1118,7 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev) | |||
1118 | rdev->raid_disk = -1; | 1118 | rdev->raid_disk = -1; |
1119 | clear_bit(Faulty, &rdev->flags); | 1119 | clear_bit(Faulty, &rdev->flags); |
1120 | clear_bit(In_sync, &rdev->flags); | 1120 | clear_bit(In_sync, &rdev->flags); |
1121 | clear_bit(Bitmap_sync, &rdev->flags); | ||
1121 | clear_bit(WriteMostly, &rdev->flags); | 1122 | clear_bit(WriteMostly, &rdev->flags); |
1122 | 1123 | ||
1123 | if (mddev->raid_disks == 0) { | 1124 | if (mddev->raid_disks == 0) { |
@@ -1196,6 +1197,8 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev) | |||
1196 | */ | 1197 | */ |
1197 | if (ev1 < mddev->bitmap->events_cleared) | 1198 | if (ev1 < mddev->bitmap->events_cleared) |
1198 | return 0; | 1199 | return 0; |
1200 | if (ev1 < mddev->events) | ||
1201 | set_bit(Bitmap_sync, &rdev->flags); | ||
1199 | } else { | 1202 | } else { |
1200 | if (ev1 < mddev->events) | 1203 | if (ev1 < mddev->events) |
1201 | /* just a hot-add of a new device, leave raid_disk at -1 */ | 1204 | /* just a hot-add of a new device, leave raid_disk at -1 */ |
@@ -1604,6 +1607,7 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) | |||
1604 | rdev->raid_disk = -1; | 1607 | rdev->raid_disk = -1; |
1605 | clear_bit(Faulty, &rdev->flags); | 1608 | clear_bit(Faulty, &rdev->flags); |
1606 | clear_bit(In_sync, &rdev->flags); | 1609 | clear_bit(In_sync, &rdev->flags); |
1610 | clear_bit(Bitmap_sync, &rdev->flags); | ||
1607 | clear_bit(WriteMostly, &rdev->flags); | 1611 | clear_bit(WriteMostly, &rdev->flags); |
1608 | 1612 | ||
1609 | if (mddev->raid_disks == 0) { | 1613 | if (mddev->raid_disks == 0) { |
@@ -1686,6 +1690,8 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) | |||
1686 | */ | 1690 | */ |
1687 | if (ev1 < mddev->bitmap->events_cleared) | 1691 | if (ev1 < mddev->bitmap->events_cleared) |
1688 | return 0; | 1692 | return 0; |
1693 | if (ev1 < mddev->events) | ||
1694 | set_bit(Bitmap_sync, &rdev->flags); | ||
1689 | } else { | 1695 | } else { |
1690 | if (ev1 < mddev->events) | 1696 | if (ev1 < mddev->events) |
1691 | /* just a hot-add of a new device, leave raid_disk at -1 */ | 1697 | /* just a hot-add of a new device, leave raid_disk at -1 */ |
@@ -2829,6 +2835,7 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len) | |||
2829 | else | 2835 | else |
2830 | rdev->saved_raid_disk = -1; | 2836 | rdev->saved_raid_disk = -1; |
2831 | clear_bit(In_sync, &rdev->flags); | 2837 | clear_bit(In_sync, &rdev->flags); |
2838 | clear_bit(Bitmap_sync, &rdev->flags); | ||
2832 | err = rdev->mddev->pers-> | 2839 | err = rdev->mddev->pers-> |
2833 | hot_add_disk(rdev->mddev, rdev); | 2840 | hot_add_disk(rdev->mddev, rdev); |
2834 | if (err) { | 2841 | if (err) { |
@@ -5761,6 +5768,7 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info) | |||
5761 | info->raid_disk < mddev->raid_disks) { | 5768 | info->raid_disk < mddev->raid_disks) { |
5762 | rdev->raid_disk = info->raid_disk; | 5769 | rdev->raid_disk = info->raid_disk; |
5763 | set_bit(In_sync, &rdev->flags); | 5770 | set_bit(In_sync, &rdev->flags); |
5771 | clear_bit(Bitmap_sync, &rdev->flags); | ||
5764 | } else | 5772 | } else |
5765 | rdev->raid_disk = -1; | 5773 | rdev->raid_disk = -1; |
5766 | } else | 5774 | } else |
@@ -7694,7 +7702,8 @@ static int remove_and_add_spares(struct mddev *mddev, | |||
7694 | if (test_bit(Faulty, &rdev->flags)) | 7702 | if (test_bit(Faulty, &rdev->flags)) |
7695 | continue; | 7703 | continue; |
7696 | if (mddev->ro && | 7704 | if (mddev->ro && |
7697 | rdev->saved_raid_disk < 0) | 7705 | ! (rdev->saved_raid_disk >= 0 && |
7706 | !test_bit(Bitmap_sync, &rdev->flags))) | ||
7698 | continue; | 7707 | continue; |
7699 | 7708 | ||
7700 | rdev->recovery_offset = 0; | 7709 | rdev->recovery_offset = 0; |
@@ -7775,9 +7784,12 @@ void md_check_recovery(struct mddev *mddev) | |||
7775 | * As we only add devices that are already in-sync, | 7784 | * As we only add devices that are already in-sync, |
7776 | * we can activate the spares immediately. | 7785 | * we can activate the spares immediately. |
7777 | */ | 7786 | */ |
7778 | clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | ||
7779 | remove_and_add_spares(mddev, NULL); | 7787 | remove_and_add_spares(mddev, NULL); |
7780 | mddev->pers->spare_active(mddev); | 7788 | /* There is no thread, but we need to call |
7789 | * ->spare_active and clear saved_raid_disk | ||
7790 | */ | ||
7791 | md_reap_sync_thread(mddev); | ||
7792 | clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | ||
7781 | goto unlock; | 7793 | goto unlock; |
7782 | } | 7794 | } |
7783 | 7795 | ||
diff --git a/drivers/md/md.h b/drivers/md/md.h index 653f992b687a..ebe748e57416 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -129,6 +129,9 @@ struct md_rdev { | |||
129 | enum flag_bits { | 129 | enum flag_bits { |
130 | Faulty, /* device is known to have a fault */ | 130 | Faulty, /* device is known to have a fault */ |
131 | In_sync, /* device is in_sync with rest of array */ | 131 | In_sync, /* device is in_sync with rest of array */ |
132 | Bitmap_sync, /* ..actually, not quite In_sync. Need a | ||
133 | * bitmap-based recovery to get fully in sync | ||
134 | */ | ||
132 | Unmerged, /* device is being added to array and should | 135 | Unmerged, /* device is being added to array and should |
133 | * be considerred for bvec_merge_fn but not | 136 | * be considerred for bvec_merge_fn but not |
134 | * yet for actual IO | 137 | * yet for actual IO |