diff options
| author | NeilBrown <neilb@suse.de> | 2013-12-11 18:13:33 -0500 |
|---|---|---|
| committer | NeilBrown <neilb@suse.de> | 2014-01-14 00:44:08 -0500 |
| commit | 8313b8e57f55b15e5b7f7fc5d1630bbf686a9a97 (patch) | |
| tree | 80ba177fd5a1566c593f1cd8a963b9ecd0191225 | |
| parent | e8b849158508565e0cd6bc80061124afc5879160 (diff) | |
md: fix problem when adding device to read-only array with bitmap.
If an array is started degraded, and then the missing device
is found it can be re-added and a minimal bitmap-based recovery
will bring it fully up-to-date.
If the array is read-only a recovery would not be allowed.
But also if the array is read-only and the missing device was
present very recently, then there could be no need for any
recovery at all, so we simply include the device in the read-only
array without any recovery.
However... if the missing device was removed a little longer ago
it could be missing some updates, but if a bitmap is present it will
be conditionally accepted pending a bitmap-based update. We don't
currently detect this case properly and will include that old
device into the read-only array with no recovery even though it really
needs a recovery.
This patch keeps track of whether a bitmap-based-recovery is really
needed or not in the new Bitmap_sync rdev flag. If that is set,
then the device will not be added to a read-only array.
Cc: Andrei Warkentin <andreiw@vmware.com>
Fixes: d70ed2e4fafdbef0800e73942482bb075c21578b
Cc: stable@vger.kernel.org (3.2+)
Signed-off-by: NeilBrown <neilb@suse.de>
| -rw-r--r-- | drivers/md/md.c | 18 | ||||
| -rw-r--r-- | drivers/md/md.h | 3 |
2 files changed, 18 insertions, 3 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index e60cebf3f519..2a456a5d59a8 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
| @@ -1087,6 +1087,7 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev) | |||
| 1087 | rdev->raid_disk = -1; | 1087 | rdev->raid_disk = -1; |
| 1088 | clear_bit(Faulty, &rdev->flags); | 1088 | clear_bit(Faulty, &rdev->flags); |
| 1089 | clear_bit(In_sync, &rdev->flags); | 1089 | clear_bit(In_sync, &rdev->flags); |
| 1090 | clear_bit(Bitmap_sync, &rdev->flags); | ||
| 1090 | clear_bit(WriteMostly, &rdev->flags); | 1091 | clear_bit(WriteMostly, &rdev->flags); |
| 1091 | 1092 | ||
| 1092 | if (mddev->raid_disks == 0) { | 1093 | if (mddev->raid_disks == 0) { |
| @@ -1165,6 +1166,8 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev) | |||
| 1165 | */ | 1166 | */ |
| 1166 | if (ev1 < mddev->bitmap->events_cleared) | 1167 | if (ev1 < mddev->bitmap->events_cleared) |
| 1167 | return 0; | 1168 | return 0; |
| 1169 | if (ev1 < mddev->events) | ||
| 1170 | set_bit(Bitmap_sync, &rdev->flags); | ||
| 1168 | } else { | 1171 | } else { |
| 1169 | if (ev1 < mddev->events) | 1172 | if (ev1 < mddev->events) |
| 1170 | /* just a hot-add of a new device, leave raid_disk at -1 */ | 1173 | /* just a hot-add of a new device, leave raid_disk at -1 */ |
| @@ -1573,6 +1576,7 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) | |||
| 1573 | rdev->raid_disk = -1; | 1576 | rdev->raid_disk = -1; |
| 1574 | clear_bit(Faulty, &rdev->flags); | 1577 | clear_bit(Faulty, &rdev->flags); |
| 1575 | clear_bit(In_sync, &rdev->flags); | 1578 | clear_bit(In_sync, &rdev->flags); |
| 1579 | clear_bit(Bitmap_sync, &rdev->flags); | ||
| 1576 | clear_bit(WriteMostly, &rdev->flags); | 1580 | clear_bit(WriteMostly, &rdev->flags); |
| 1577 | 1581 | ||
| 1578 | if (mddev->raid_disks == 0) { | 1582 | if (mddev->raid_disks == 0) { |
| @@ -1655,6 +1659,8 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) | |||
| 1655 | */ | 1659 | */ |
| 1656 | if (ev1 < mddev->bitmap->events_cleared) | 1660 | if (ev1 < mddev->bitmap->events_cleared) |
| 1657 | return 0; | 1661 | return 0; |
| 1662 | if (ev1 < mddev->events) | ||
| 1663 | set_bit(Bitmap_sync, &rdev->flags); | ||
| 1658 | } else { | 1664 | } else { |
| 1659 | if (ev1 < mddev->events) | 1665 | if (ev1 < mddev->events) |
| 1660 | /* just a hot-add of a new device, leave raid_disk at -1 */ | 1666 | /* just a hot-add of a new device, leave raid_disk at -1 */ |
| @@ -2798,6 +2804,7 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len) | |||
| 2798 | else | 2804 | else |
| 2799 | rdev->saved_raid_disk = -1; | 2805 | rdev->saved_raid_disk = -1; |
| 2800 | clear_bit(In_sync, &rdev->flags); | 2806 | clear_bit(In_sync, &rdev->flags); |
| 2807 | clear_bit(Bitmap_sync, &rdev->flags); | ||
| 2801 | err = rdev->mddev->pers-> | 2808 | err = rdev->mddev->pers-> |
| 2802 | hot_add_disk(rdev->mddev, rdev); | 2809 | hot_add_disk(rdev->mddev, rdev); |
| 2803 | if (err) { | 2810 | if (err) { |
| @@ -5770,6 +5777,7 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info) | |||
| 5770 | info->raid_disk < mddev->raid_disks) { | 5777 | info->raid_disk < mddev->raid_disks) { |
| 5771 | rdev->raid_disk = info->raid_disk; | 5778 | rdev->raid_disk = info->raid_disk; |
| 5772 | set_bit(In_sync, &rdev->flags); | 5779 | set_bit(In_sync, &rdev->flags); |
| 5780 | clear_bit(Bitmap_sync, &rdev->flags); | ||
| 5773 | } else | 5781 | } else |
| 5774 | rdev->raid_disk = -1; | 5782 | rdev->raid_disk = -1; |
| 5775 | } else | 5783 | } else |
| @@ -7716,7 +7724,8 @@ static int remove_and_add_spares(struct mddev *mddev, | |||
| 7716 | if (test_bit(Faulty, &rdev->flags)) | 7724 | if (test_bit(Faulty, &rdev->flags)) |
| 7717 | continue; | 7725 | continue; |
| 7718 | if (mddev->ro && | 7726 | if (mddev->ro && |
| 7719 | rdev->saved_raid_disk < 0) | 7727 | ! (rdev->saved_raid_disk >= 0 && |
| 7728 | !test_bit(Bitmap_sync, &rdev->flags))) | ||
| 7720 | continue; | 7729 | continue; |
| 7721 | 7730 | ||
| 7722 | rdev->recovery_offset = 0; | 7731 | rdev->recovery_offset = 0; |
| @@ -7797,9 +7806,12 @@ void md_check_recovery(struct mddev *mddev) | |||
| 7797 | * As we only add devices that are already in-sync, | 7806 | * As we only add devices that are already in-sync, |
| 7798 | * we can activate the spares immediately. | 7807 | * we can activate the spares immediately. |
| 7799 | */ | 7808 | */ |
| 7800 | clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | ||
| 7801 | remove_and_add_spares(mddev, NULL); | 7809 | remove_and_add_spares(mddev, NULL); |
| 7802 | mddev->pers->spare_active(mddev); | 7810 | /* There is no thread, but we need to call |
| 7811 | * ->spare_active and clear saved_raid_disk | ||
| 7812 | */ | ||
| 7813 | md_reap_sync_thread(mddev); | ||
| 7814 | clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | ||
| 7803 | goto unlock; | 7815 | goto unlock; |
| 7804 | } | 7816 | } |
| 7805 | 7817 | ||
diff --git a/drivers/md/md.h b/drivers/md/md.h index 2f5cc8a7ef3e..0095ec84ffc7 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
| @@ -129,6 +129,9 @@ struct md_rdev { | |||
| 129 | enum flag_bits { | 129 | enum flag_bits { |
| 130 | Faulty, /* device is known to have a fault */ | 130 | Faulty, /* device is known to have a fault */ |
| 131 | In_sync, /* device is in_sync with rest of array */ | 131 | In_sync, /* device is in_sync with rest of array */ |
| 132 | Bitmap_sync, /* ..actually, not quite In_sync. Need a | ||
| 133 | * bitmap-based recovery to get fully in sync | ||
| 134 | */ | ||
| 132 | Unmerged, /* device is being added to array and should | 135 | Unmerged, /* device is being added to array and should |
| 133 | * be considerred for bvec_merge_fn but not | 136 | * be considerred for bvec_merge_fn but not |
| 134 | * yet for actual IO | 137 | * yet for actual IO |
