aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2011-07-26 21:00:36 -0400
committerNeilBrown <neilb@suse.de>2011-07-26 21:00:36 -0400
commit700c72138938cf428c74379806886c6b017d6295 (patch)
tree93ca6eb628d36e841131aa62a85b45d16b3f45fa /drivers/md
parent2bb77736ae5dca0a189829fbb7379d43364a9dac (diff)
md/raid10: Improve decision on whether to fail a device with a read error.
Normally we would fail a device with a READ error. However if doing so causes the array to fail, it is better to leave the device in place and just return the read error to the caller. The current test for decide if the array will fail is overly simplistic. We have a function 'enough' which can tell if the array is failed or not, so use it to guide the decision. Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/raid10.c57
1 files changed, 28 insertions, 29 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 5583201e5cde..6721cb080354 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -970,6 +970,30 @@ static void status(struct seq_file *seq, mddev_t *mddev)
970 seq_printf(seq, "]"); 970 seq_printf(seq, "]");
971} 971}
972 972
973/* check if there are enough drives for
974 * every block to appear on atleast one.
975 * Don't consider the device numbered 'ignore'
976 * as we might be about to remove it.
977 */
978static int enough(conf_t *conf, int ignore)
979{
980 int first = 0;
981
982 do {
983 int n = conf->copies;
984 int cnt = 0;
985 while (n--) {
986 if (conf->mirrors[first].rdev &&
987 first != ignore)
988 cnt++;
989 first = (first+1) % conf->raid_disks;
990 }
991 if (cnt == 0)
992 return 0;
993 } while (first != 0);
994 return 1;
995}
996
973static void error(mddev_t *mddev, mdk_rdev_t *rdev) 997static void error(mddev_t *mddev, mdk_rdev_t *rdev)
974{ 998{
975 char b[BDEVNAME_SIZE]; 999 char b[BDEVNAME_SIZE];
@@ -982,13 +1006,9 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
982 * else mark the drive as failed 1006 * else mark the drive as failed
983 */ 1007 */
984 if (test_bit(In_sync, &rdev->flags) 1008 if (test_bit(In_sync, &rdev->flags)
985 && conf->raid_disks-mddev->degraded == 1) 1009 && !enough(conf, rdev->raid_disk))
986 /* 1010 /*
987 * Don't fail the drive, just return an IO error. 1011 * Don't fail the drive, just return an IO error.
988 * The test should really be more sophisticated than
989 * "working_disks == 1", but it isn't critical, and
990 * can wait until we do more sophisticated "is the drive
991 * really dead" tests...
992 */ 1012 */
993 return; 1013 return;
994 if (test_and_clear_bit(In_sync, &rdev->flags)) { 1014 if (test_and_clear_bit(In_sync, &rdev->flags)) {
@@ -1043,27 +1063,6 @@ static void close_sync(conf_t *conf)
1043 conf->r10buf_pool = NULL; 1063 conf->r10buf_pool = NULL;
1044} 1064}
1045 1065
1046/* check if there are enough drives for
1047 * every block to appear on atleast one
1048 */
1049static int enough(conf_t *conf)
1050{
1051 int first = 0;
1052
1053 do {
1054 int n = conf->copies;
1055 int cnt = 0;
1056 while (n--) {
1057 if (conf->mirrors[first].rdev)
1058 cnt++;
1059 first = (first+1) % conf->raid_disks;
1060 }
1061 if (cnt == 0)
1062 return 0;
1063 } while (first != 0);
1064 return 1;
1065}
1066
1067static int raid10_spare_active(mddev_t *mddev) 1066static int raid10_spare_active(mddev_t *mddev)
1068{ 1067{
1069 int i; 1068 int i;
@@ -1107,7 +1106,7 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
1107 * very different from resync 1106 * very different from resync
1108 */ 1107 */
1109 return -EBUSY; 1108 return -EBUSY;
1110 if (!enough(conf)) 1109 if (!enough(conf, -1))
1111 return -EINVAL; 1110 return -EINVAL;
1112 1111
1113 if (rdev->raid_disk >= 0) 1112 if (rdev->raid_disk >= 0)
@@ -1173,7 +1172,7 @@ static int raid10_remove_disk(mddev_t *mddev, int number)
1173 */ 1172 */
1174 if (!test_bit(Faulty, &rdev->flags) && 1173 if (!test_bit(Faulty, &rdev->flags) &&
1175 mddev->recovery_disabled != p->recovery_disabled && 1174 mddev->recovery_disabled != p->recovery_disabled &&
1176 enough(conf)) { 1175 enough(conf, -1)) {
1177 err = -EBUSY; 1176 err = -EBUSY;
1178 goto abort; 1177 goto abort;
1179 } 1178 }
@@ -2286,7 +2285,7 @@ static int run(mddev_t *mddev)
2286 disk->head_position = 0; 2285 disk->head_position = 0;
2287 } 2286 }
2288 /* need to check that every block has at least one working mirror */ 2287 /* need to check that every block has at least one working mirror */
2289 if (!enough(conf)) { 2288 if (!enough(conf, -1)) {
2290 printk(KERN_ERR "md/raid10:%s: not enough operational mirrors.\n", 2289 printk(KERN_ERR "md/raid10:%s: not enough operational mirrors.\n",
2291 mdname(mddev)); 2290 mdname(mddev));
2292 goto out_free_conf; 2291 goto out_free_conf;