aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2015-02-01 22:03:28 -0500
committerNeilBrown <neilb@suse.de>2015-02-03 16:35:51 -0500
commitea664c8245f3d5e78d05d1250bc0be0d60e264af (patch)
treeaedfd219092b1c2a4f08a4e743b058d8e87e350c /drivers/md
parenta9d56950f763fa3e9d831541e62d223197d2ff60 (diff)
md/raid5: need_this_block: tidy/fix last condition.
That last condition is unclear and over cautious. There are two related issues here. If a partial write is destined for a missing device, then either RMW or RCW can work. We must read all the available block. Only then can the missing blocks be calculated, and then the parity update performed. If RMW is not an option, then there is a complication even without partial writes. If we would need to read a missing device to perform the reconstruction, then we must first read every block so the missing device data can be computed. This is the case for RAID6 (Which currently does not support RMW) and for times when we don't trust the parity (after a crash) and so are in the process of resyncing it. So make these two cases more clear and separate, and perform the relevant tests more thoroughly. Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/raid5.c42
1 files changed, 32 insertions, 10 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index bb42551c1a42..a03cf2d889bf 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2902,6 +2902,7 @@ static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s,
2902 struct r5dev *dev = &sh->dev[disk_idx]; 2902 struct r5dev *dev = &sh->dev[disk_idx];
2903 struct r5dev *fdev[2] = { &sh->dev[s->failed_num[0]], 2903 struct r5dev *fdev[2] = { &sh->dev[s->failed_num[0]],
2904 &sh->dev[s->failed_num[1]] }; 2904 &sh->dev[s->failed_num[1]] };
2905 int i;
2905 2906
2906 2907
2907 if (test_bit(R5_LOCKED, &dev->flags) || 2908 if (test_bit(R5_LOCKED, &dev->flags) ||
@@ -2949,16 +2950,37 @@ static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s,
2949 * and there is no need to delay that. 2950 * and there is no need to delay that.
2950 */ 2951 */
2951 return 0; 2952 return 0;
2952 if ( 2953
2953 (sh->raid_conf->level <= 5 && fdev[0]->towrite && 2954 for (i = 0; i < s->failed; i++) {
2954 !test_bit(R5_OVERWRITE, &fdev[0]->flags)) || 2955 if (fdev[i]->towrite &&
2955 ((sh->raid_conf->level == 6 || 2956 !test_bit(R5_UPTODATE, &fdev[i]->flags) &&
2956 sh->sector >= sh->raid_conf->mddev->recovery_cp) 2957 !test_bit(R5_OVERWRITE, &fdev[i]->flags))
2957 && 2958 /* If we have a partial write to a failed
2958 (s->to_write - s->non_overwrite < 2959 * device, then we will need to reconstruct
2959 sh->raid_conf->raid_disks - sh->raid_conf->max_degraded) 2960 * the content of that device, so all other
2960 )) 2961 * devices must be read.
2961 return 1; 2962 */
2963 return 1;
2964 }
2965
2966 /* If we are forced to do a reconstruct-write, either because
2967 * the current RAID6 implementation only supports that, or
2968 * or because parity cannot be trusted and we are currently
2969 * recovering it, there is extra need to be careful.
2970 * If one of the devices that we would need to read, because
2971 * it is not being overwritten (and maybe not written at all)
2972 * is missing/faulty, then we need to read everything we can.
2973 */
2974 if (sh->raid_conf->level != 6 &&
2975 sh->sector < sh->raid_conf->mddev->recovery_cp)
2976 /* reconstruct-write isn't being forced */
2977 return 0;
2978 for (i = 0; i < s->failed; i++) {
2979 if (!test_bit(R5_UPTODATE, &fdev[i]->flags) &&
2980 !test_bit(R5_OVERWRITE, &fdev[i]->flags))
2981 return 1;
2982 }
2983
2962 return 0; 2984 return 0;
2963} 2985}
2964 2986