aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.com>2015-10-21 22:20:15 -0400
committerNeilBrown <neilb@suse.com>2015-10-24 01:24:25 -0400
commit8bce6d35b308d73cdb2ee273c95d711a55be688c (patch)
tree01b072a83736bae1455d7bb5743d271f91cd6325
parentc340702ca26a628832fade4f133d8160a55c29cc (diff)
md/raid10: fix the 'new' raid10 layout to work correctly.
In Linux 3.9 we introduce a new 'far' layout for RAID10 which was supposed to rotate the replicas differently and so provide better resilience. In particular it could survive more combinations of 2 drive failures. Unfortunately. due to a coding error, this some did what was wanted, sometimes improved less than we hoped, and sometimes - in very unlikely circumstances - put multiple replicas on the same device so the redundancy was harmed. No public user-space tool has created arrays using this layout so it is very unlikely that zero-redundancy arrays actually exist. Probably no arrays using any form of the new layout exist. But we cannot be certain. So use another bit in the 'layout' number and introduce a bug-fixed version of the layout. Also when assembling an array, if it has a zero-redundancy layout, give a warning. Reported-by: Heinz Mauelshagen <heinzm@redhat.com> Signed-off-by: NeilBrown <neilb@suse.com>
-rw-r--r--drivers/md/raid10.c22
1 files changed, 20 insertions, 2 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 23de2144ee13..96f365968306 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -39,6 +39,7 @@
39 * far_copies (stored in second byte of layout) 39 * far_copies (stored in second byte of layout)
40 * far_offset (stored in bit 16 of layout ) 40 * far_offset (stored in bit 16 of layout )
41 * use_far_sets (stored in bit 17 of layout ) 41 * use_far_sets (stored in bit 17 of layout )
42 * use_far_sets_bugfixed (stored in bit 18 of layout )
42 * 43 *
43 * The data to be stored is divided into chunks using chunksize. Each device 44 * The data to be stored is divided into chunks using chunksize. Each device
44 * is divided into far_copies sections. In each section, chunks are laid out 45 * is divided into far_copies sections. In each section, chunks are laid out
@@ -1497,6 +1498,8 @@ static void status(struct seq_file *seq, struct mddev *mddev)
1497 seq_printf(seq, " %d offset-copies", conf->geo.far_copies); 1498 seq_printf(seq, " %d offset-copies", conf->geo.far_copies);
1498 else 1499 else
1499 seq_printf(seq, " %d far-copies", conf->geo.far_copies); 1500 seq_printf(seq, " %d far-copies", conf->geo.far_copies);
1501 if (conf->geo.far_set_size != conf->geo.raid_disks)
1502 seq_printf(seq, " %d devices per set", conf->geo.far_set_size);
1500 } 1503 }
1501 seq_printf(seq, " [%d/%d] [", conf->geo.raid_disks, 1504 seq_printf(seq, " [%d/%d] [", conf->geo.raid_disks,
1502 conf->geo.raid_disks - mddev->degraded); 1505 conf->geo.raid_disks - mddev->degraded);
@@ -3394,7 +3397,7 @@ static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new)
3394 disks = mddev->raid_disks + mddev->delta_disks; 3397 disks = mddev->raid_disks + mddev->delta_disks;
3395 break; 3398 break;
3396 } 3399 }
3397 if (layout >> 18) 3400 if (layout >> 19)
3398 return -1; 3401 return -1;
3399 if (chunk < (PAGE_SIZE >> 9) || 3402 if (chunk < (PAGE_SIZE >> 9) ||
3400 !is_power_of_2(chunk)) 3403 !is_power_of_2(chunk))
@@ -3406,7 +3409,22 @@ static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new)
3406 geo->near_copies = nc; 3409 geo->near_copies = nc;
3407 geo->far_copies = fc; 3410 geo->far_copies = fc;
3408 geo->far_offset = fo; 3411 geo->far_offset = fo;
3409 geo->far_set_size = (layout & (1<<17)) ? disks / fc : disks; 3412 switch (layout >> 17) {
3413 case 0: /* original layout. simple but not always optimal */
3414 geo->far_set_size = disks;
3415 break;
3416 case 1: /* "improved" layout which was buggy. Hopefully no-one is
3417 * actually using this, but leave code here just in case.*/
3418 geo->far_set_size = disks/fc;
3419 WARN(geo->far_set_size < fc,
3420 "This RAID10 layout does not provide data safety - please backup and create new array\n");
3421 break;
3422 case 2: /* "improved" layout fixed to match documentation */
3423 geo->far_set_size = fc * nc;
3424 break;
3425 default: /* Not a valid layout */
3426 return -1;
3427 }
3410 geo->chunk_mask = chunk - 1; 3428 geo->chunk_mask = chunk - 1;
3411 geo->chunk_shift = ffz(~chunk); 3429 geo->chunk_shift = ffz(~chunk);
3412 return nc*fc; 3430 return nc*fc;