aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2009-11-13 01:47:00 -0500
committerNeilBrown <neilb@suse.de>2009-11-13 01:47:00 -0500
commitc148ffdcda00b6599b70f8b65e6a1fadd1dbb127 (patch)
tree3d50cc9dbef926f62a588dc1f45f1df304e1bf31
parent7ef90146a14c2bb1de2e22399f147ebec5b74f0b (diff)
md/raid5: Allow dirty-degraded arrays to be assembled when only party is degraded.
Normally is it not safe to allow a raid5 that is both dirty and degraded to be assembled without explicit request from that admin, as it can cause hidden data corruption. This is because 'dirty' means that the parity cannot be trusted, and 'degraded' means that the parity needs to be used. However, if the device that is missing contains only parity, then there is no issue and assembly can continue. This particularly applies when a RAID5 is being converted to a RAID6 and there is an unclean shutdown while the conversion is happening. So check for whether the degraded space only contains parity, and in that case, allow the assembly. Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--drivers/md/raid5.c79
1 files changed, 75 insertions, 4 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index ab40529bdabe..d29215d966da 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -4823,11 +4823,40 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
4823 return ERR_PTR(-ENOMEM); 4823 return ERR_PTR(-ENOMEM);
4824} 4824}
4825 4825
4826
4827static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded)
4828{
4829 switch (algo) {
4830 case ALGORITHM_PARITY_0:
4831 if (raid_disk < max_degraded)
4832 return 1;
4833 break;
4834 case ALGORITHM_PARITY_N:
4835 if (raid_disk >= raid_disks - max_degraded)
4836 return 1;
4837 break;
4838 case ALGORITHM_PARITY_0_6:
4839 if (raid_disk == 0 ||
4840 raid_disk == raid_disks - 1)
4841 return 1;
4842 break;
4843 case ALGORITHM_LEFT_ASYMMETRIC_6:
4844 case ALGORITHM_RIGHT_ASYMMETRIC_6:
4845 case ALGORITHM_LEFT_SYMMETRIC_6:
4846 case ALGORITHM_RIGHT_SYMMETRIC_6:
4847 if (raid_disk == raid_disks - 1)
4848 return 1;
4849 }
4850 return 0;
4851}
4852
4826static int run(mddev_t *mddev) 4853static int run(mddev_t *mddev)
4827{ 4854{
4828 raid5_conf_t *conf; 4855 raid5_conf_t *conf;
4829 int working_disks = 0, chunk_size; 4856 int working_disks = 0, chunk_size;
4857 int dirty_parity_disks = 0;
4830 mdk_rdev_t *rdev; 4858 mdk_rdev_t *rdev;
4859 sector_t reshape_offset = 0;
4831 4860
4832 if (mddev->recovery_cp != MaxSector) 4861 if (mddev->recovery_cp != MaxSector)
4833 printk(KERN_NOTICE "raid5: %s is not clean" 4862 printk(KERN_NOTICE "raid5: %s is not clean"
@@ -4861,6 +4890,7 @@ static int run(mddev_t *mddev)
4861 "on a stripe boundary\n"); 4890 "on a stripe boundary\n");
4862 return -EINVAL; 4891 return -EINVAL;
4863 } 4892 }
4893 reshape_offset = here_new * mddev->new_chunk_sectors;
4864 /* here_new is the stripe we will write to */ 4894 /* here_new is the stripe we will write to */
4865 here_old = mddev->reshape_position; 4895 here_old = mddev->reshape_position;
4866 sector_div(here_old, mddev->chunk_sectors * 4896 sector_div(here_old, mddev->chunk_sectors *
@@ -4916,10 +4946,51 @@ static int run(mddev_t *mddev)
4916 /* 4946 /*
4917 * 0 for a fully functional array, 1 or 2 for a degraded array. 4947 * 0 for a fully functional array, 1 or 2 for a degraded array.
4918 */ 4948 */
4919 list_for_each_entry(rdev, &mddev->disks, same_set) 4949 list_for_each_entry(rdev, &mddev->disks, same_set) {
4920 if (rdev->raid_disk >= 0 && 4950 if (rdev->raid_disk < 0)
4921 test_bit(In_sync, &rdev->flags)) 4951 continue;
4952 if (test_bit(In_sync, &rdev->flags))
4922 working_disks++; 4953 working_disks++;
4954 /* This disc is not fully in-sync. However if it
4955 * just stored parity (beyond the recovery_offset),
4956 * when we don't need to be concerned about the
4957 * array being dirty.
4958 * When reshape goes 'backwards', we never have
4959 * partially completed devices, so we only need
4960 * to worry about reshape going forwards.
4961 */
4962 /* Hack because v0.91 doesn't store recovery_offset properly. */
4963 if (mddev->major_version == 0 &&
4964 mddev->minor_version > 90)
4965 rdev->recovery_offset = reshape_offset;
4966
4967 printk("%d: w=%d pa=%d pr=%d m=%d a=%d r=%d op1=%d op2=%d\n",
4968 rdev->raid_disk, working_disks, conf->prev_algo,
4969 conf->previous_raid_disks, conf->max_degraded,
4970 conf->algorithm, conf->raid_disks,
4971 only_parity(rdev->raid_disk,
4972 conf->prev_algo,
4973 conf->previous_raid_disks,
4974 conf->max_degraded),
4975 only_parity(rdev->raid_disk,
4976 conf->algorithm,
4977 conf->raid_disks,
4978 conf->max_degraded));
4979 if (rdev->recovery_offset < reshape_offset) {
4980 /* We need to check old and new layout */
4981 if (!only_parity(rdev->raid_disk,
4982 conf->algorithm,
4983 conf->raid_disks,
4984 conf->max_degraded))
4985 continue;
4986 }
4987 if (!only_parity(rdev->raid_disk,
4988 conf->prev_algo,
4989 conf->previous_raid_disks,
4990 conf->max_degraded))
4991 continue;
4992 dirty_parity_disks++;
4993 }
4923 4994
4924 mddev->degraded = (max(conf->raid_disks, conf->previous_raid_disks) 4995 mddev->degraded = (max(conf->raid_disks, conf->previous_raid_disks)
4925 - working_disks); 4996 - working_disks);
@@ -4935,7 +5006,7 @@ static int run(mddev_t *mddev)
4935 mddev->dev_sectors &= ~(mddev->chunk_sectors - 1); 5006 mddev->dev_sectors &= ~(mddev->chunk_sectors - 1);
4936 mddev->resync_max_sectors = mddev->dev_sectors; 5007 mddev->resync_max_sectors = mddev->dev_sectors;
4937 5008
4938 if (mddev->degraded > 0 && 5009 if (mddev->degraded > dirty_parity_disks &&
4939 mddev->recovery_cp != MaxSector) { 5010 mddev->recovery_cp != MaxSector) {
4940 if (mddev->ok_start_degraded) 5011 if (mddev->ok_start_degraded)
4941 printk(KERN_WARNING 5012 printk(KERN_WARNING