diff options
author | NeilBrown <neilb@suse.com> | 2015-10-21 22:20:15 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.com> | 2015-10-24 01:24:25 -0400 |
commit | 8bce6d35b308d73cdb2ee273c95d711a55be688c (patch) | |
tree | 01b072a83736bae1455d7bb5743d271f91cd6325 | |
parent | c340702ca26a628832fade4f133d8160a55c29cc (diff) |
md/raid10: fix the 'new' raid10 layout to work correctly.
In Linux 3.9 we introduce a new 'far' layout for RAID10 which was
supposed to rotate the replicas differently and so provide better
resilience. In particular it could survive more combinations of 2
drive failures.
Unfortunately. due to a coding error, this some did what was wanted,
sometimes improved less than we hoped, and sometimes - in very
unlikely circumstances - put multiple replicas on the same device so
the redundancy was harmed.
No public user-space tool has created arrays using this layout so it
is very unlikely that zero-redundancy arrays actually exist. Probably
no arrays using any form of the new layout exist. But we cannot be
certain.
So use another bit in the 'layout' number and introduce a bug-fixed
version of the layout.
Also when assembling an array, if it has a zero-redundancy layout,
give a warning.
Reported-by: Heinz Mauelshagen <heinzm@redhat.com>
Signed-off-by: NeilBrown <neilb@suse.com>
-rw-r--r-- | drivers/md/raid10.c | 22 |
1 files changed, 20 insertions, 2 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 23de2144ee13..96f365968306 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -39,6 +39,7 @@ | |||
39 | * far_copies (stored in second byte of layout) | 39 | * far_copies (stored in second byte of layout) |
40 | * far_offset (stored in bit 16 of layout ) | 40 | * far_offset (stored in bit 16 of layout ) |
41 | * use_far_sets (stored in bit 17 of layout ) | 41 | * use_far_sets (stored in bit 17 of layout ) |
42 | * use_far_sets_bugfixed (stored in bit 18 of layout ) | ||
42 | * | 43 | * |
43 | * The data to be stored is divided into chunks using chunksize. Each device | 44 | * The data to be stored is divided into chunks using chunksize. Each device |
44 | * is divided into far_copies sections. In each section, chunks are laid out | 45 | * is divided into far_copies sections. In each section, chunks are laid out |
@@ -1497,6 +1498,8 @@ static void status(struct seq_file *seq, struct mddev *mddev) | |||
1497 | seq_printf(seq, " %d offset-copies", conf->geo.far_copies); | 1498 | seq_printf(seq, " %d offset-copies", conf->geo.far_copies); |
1498 | else | 1499 | else |
1499 | seq_printf(seq, " %d far-copies", conf->geo.far_copies); | 1500 | seq_printf(seq, " %d far-copies", conf->geo.far_copies); |
1501 | if (conf->geo.far_set_size != conf->geo.raid_disks) | ||
1502 | seq_printf(seq, " %d devices per set", conf->geo.far_set_size); | ||
1500 | } | 1503 | } |
1501 | seq_printf(seq, " [%d/%d] [", conf->geo.raid_disks, | 1504 | seq_printf(seq, " [%d/%d] [", conf->geo.raid_disks, |
1502 | conf->geo.raid_disks - mddev->degraded); | 1505 | conf->geo.raid_disks - mddev->degraded); |
@@ -3394,7 +3397,7 @@ static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new) | |||
3394 | disks = mddev->raid_disks + mddev->delta_disks; | 3397 | disks = mddev->raid_disks + mddev->delta_disks; |
3395 | break; | 3398 | break; |
3396 | } | 3399 | } |
3397 | if (layout >> 18) | 3400 | if (layout >> 19) |
3398 | return -1; | 3401 | return -1; |
3399 | if (chunk < (PAGE_SIZE >> 9) || | 3402 | if (chunk < (PAGE_SIZE >> 9) || |
3400 | !is_power_of_2(chunk)) | 3403 | !is_power_of_2(chunk)) |
@@ -3406,7 +3409,22 @@ static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new) | |||
3406 | geo->near_copies = nc; | 3409 | geo->near_copies = nc; |
3407 | geo->far_copies = fc; | 3410 | geo->far_copies = fc; |
3408 | geo->far_offset = fo; | 3411 | geo->far_offset = fo; |
3409 | geo->far_set_size = (layout & (1<<17)) ? disks / fc : disks; | 3412 | switch (layout >> 17) { |
3413 | case 0: /* original layout. simple but not always optimal */ | ||
3414 | geo->far_set_size = disks; | ||
3415 | break; | ||
3416 | case 1: /* "improved" layout which was buggy. Hopefully no-one is | ||
3417 | * actually using this, but leave code here just in case.*/ | ||
3418 | geo->far_set_size = disks/fc; | ||
3419 | WARN(geo->far_set_size < fc, | ||
3420 | "This RAID10 layout does not provide data safety - please backup and create new array\n"); | ||
3421 | break; | ||
3422 | case 2: /* "improved" layout fixed to match documentation */ | ||
3423 | geo->far_set_size = fc * nc; | ||
3424 | break; | ||
3425 | default: /* Not a valid layout */ | ||
3426 | return -1; | ||
3427 | } | ||
3410 | geo->chunk_mask = chunk - 1; | 3428 | geo->chunk_mask = chunk - 1; |
3411 | geo->chunk_shift = ffz(~chunk); | 3429 | geo->chunk_shift = ffz(~chunk); |
3412 | return nc*fc; | 3430 | return nc*fc; |