md/raid5: need_this_block: tidy/fix last condition.

That last condition is unclear and over cautious. There are two related issues here. If a partial write is destined for a missing device, then either RMW or RCW can work. We must read all the available block. Only then can the missing blocks be calculated, and then the parity update performed. If RMW is not an option, then there is a complication even without partial writes. If we would need to read a missing device to perform the reconstruction, then we must first read every block so the missing device data can be computed. This is the case for RAID6 (Which currently does not support RMW) and for times when we don't trust the parity (after a crash) and so are in the process of resyncing it. So make these two cases more clear and separate, and perform the relevant tests more thoroughly. Signed-off-by: NeilBrown <neilb@suse.de>
author: NeilBrown <neilb@suse.de> 2015-02-01 22:03:28 -0500
committer: NeilBrown <neilb@suse.de> 2015-02-03 16:35:51 -0500
commit: ea664c8245f3d5e78d05d1250bc0be0d60e264af (patch)
tree: aedfd219092b1c2a4f08a4e743b058d8e87e350c /drivers/md
parent: a9d56950f763fa3e9d831541e62d223197d2ff60 (diff)
1 files changed, 32 insertions, 10 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index bb42551c1a42..a03cf2d889bf 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2902,6 +2902,7 @@ static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s,
        struct r5dev *dev = &sh->dev[disk_idx];
        struct r5dev *fdev[2] = { &sh->dev[s->failed_num[0]],
                                  &sh->dev[s->failed_num[1]] };
+        int i;
        if (test_bit(R5_LOCKED, &dev->flags) ||
@@ -2949,16 +2950,37 @@ static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s,
                 * and there is no need to delay that.
                 */
                return 0;
-        if (
-             (sh->raid_conf->level <= 5 && fdev[0]->towrite &&
+        for (i = 0; i < s->failed; i++) {
-              !test_bit(R5_OVERWRITE, &fdev[0]->flags)) ||
+                if (fdev[i]->towrite &&
-             ((sh->raid_conf->level == 6 ||
+                    !test_bit(R5_UPTODATE, &fdev[i]->flags) &&
-               sh->sector >= sh->raid_conf->mddev->recovery_cp)
+                    !test_bit(R5_OVERWRITE, &fdev[i]->flags))
-              &&
+                        /* If we have a partial write to a failed
-              (s->to_write - s->non_overwrite <
+                         * device, then we will need to reconstruct
-               sh->raid_conf->raid_disks - sh->raid_conf->max_degraded)
+                         * the content of that device, so all other
-              ))
+                         * devices must be read.
-                return 1;
+                         */
+                        return 1;
+        }
+        /* If we are forced to do a reconstruct-write, either because
+         * the current RAID6 implementation only supports that, or
+         * or because parity cannot be trusted and we are currently
+         * recovering it, there is extra need to be careful.
+         * If one of the devices that we would need to read, because
+         * it is not being overwritten (and maybe not written at all)
+         * is missing/faulty, then we need to read everything we can.
+         */
+        if (sh->raid_conf->level != 6 &&
+            sh->sector < sh->raid_conf->mddev->recovery_cp)
+                /* reconstruct-write isn't being forced */
+                return 0;
+        for (i = 0; i < s->failed; i++) {
+                if (!test_bit(R5_UPTODATE, &fdev[i]->flags) &&
+                    !test_bit(R5_OVERWRITE, &fdev[i]->flags))
+                        return 1;
+        }
        return 0;
 }
author	NeilBrown <neilb@suse.de>	2015-02-01 22:03:28 -0500
committer	NeilBrown <neilb@suse.de>	2015-02-03 16:35:51 -0500
commit	ea664c8245f3d5e78d05d1250bc0be0d60e264af (patch)
tree	aedfd219092b1c2a4f08a4e743b058d8e87e350c /drivers/md
parent	a9d56950f763fa3e9d831541e62d223197d2ff60 (diff)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index bb42551c1a42..a03cf2d889bf 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c
@@ -2902,6 +2902,7 @@ static int need_this_block(struct stripe_head sh, struct stripe_head_state s,
2902	struct r5dev *dev = &sh->dev[disk_idx];	2902	struct r5dev *dev = &sh->dev[disk_idx];
2903	struct r5dev *fdev[2] = { &sh->dev[s->failed_num[0]],	2903	struct r5dev *fdev[2] = { &sh->dev[s->failed_num[0]],
2904	&sh->dev[s->failed_num[1]] };	2904	&sh->dev[s->failed_num[1]] };
		2905	int i;
2905		2906
2906		2907
2907	if (test_bit(R5_LOCKED, &dev->flags) \|\|	2908	if (test_bit(R5_LOCKED, &dev->flags) \|\|
@@ -2949,16 +2950,37 @@ static int need_this_block(struct stripe_head sh, struct stripe_head_state s,
2949	* and there is no need to delay that.	2950	* and there is no need to delay that.
2950	*/	2951	*/
2951	return 0;	2952	return 0;
2952	if (	2953
2953	(sh->raid_conf->level <= 5 && fdev[0]->towrite &&	2954	for (i = 0; i < s->failed; i++) {
2954	!test_bit(R5_OVERWRITE, &fdev[0]->flags)) \|\|	2955	if (fdev[i]->towrite &&
2955	((sh->raid_conf->level == 6 \|\|	2956	!test_bit(R5_UPTODATE, &fdev[i]->flags) &&
2956	sh->sector >= sh->raid_conf->mddev->recovery_cp)	2957	!test_bit(R5_OVERWRITE, &fdev[i]->flags))
2957	&&	2958	/* If we have a partial write to a failed
2958	(s->to_write - s->non_overwrite <	2959	* device, then we will need to reconstruct
2959	sh->raid_conf->raid_disks - sh->raid_conf->max_degraded)	2960	* the content of that device, so all other
2960	))	2961	* devices must be read.
2961	return 1;	2962	*/
		2963	return 1;
		2964	}
		2965
		2966	/* If we are forced to do a reconstruct-write, either because
		2967	* the current RAID6 implementation only supports that, or
		2968	* or because parity cannot be trusted and we are currently
		2969	* recovering it, there is extra need to be careful.
		2970	* If one of the devices that we would need to read, because
		2971	* it is not being overwritten (and maybe not written at all)
		2972	* is missing/faulty, then we need to read everything we can.
		2973	*/
		2974	if (sh->raid_conf->level != 6 &&
		2975	sh->sector < sh->raid_conf->mddev->recovery_cp)
		2976	/* reconstruct-write isn't being forced */
		2977	return 0;
		2978	for (i = 0; i < s->failed; i++) {
		2979	if (!test_bit(R5_UPTODATE, &fdev[i]->flags) &&
		2980	!test_bit(R5_OVERWRITE, &fdev[i]->flags))
		2981	return 1;
		2982	}
		2983
2962	return 0;	2984	return 0;
2963	}	2985	}
2964		2986