aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/raid10.c58
-rw-r--r--drivers/md/raid10.h5
2 files changed, 45 insertions, 18 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index de174ad6f8bd..70b58b4bcf89 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -38,21 +38,36 @@
38 * near_copies (stored in low byte of layout) 38 * near_copies (stored in low byte of layout)
39 * far_copies (stored in second byte of layout) 39 * far_copies (stored in second byte of layout)
40 * far_offset (stored in bit 16 of layout ) 40 * far_offset (stored in bit 16 of layout )
41 * use_far_sets (stored in bit 17 of layout )
41 * 42 *
42 * The data to be stored is divided into chunks using chunksize. 43 * The data to be stored is divided into chunks using chunksize. Each device
43 * Each device is divided into far_copies sections. 44 * is divided into far_copies sections. In each section, chunks are laid out
44 * In each section, chunks are laid out in a style similar to raid0, but 45 * in a style similar to raid0, but near_copies copies of each chunk is stored
45 * near_copies copies of each chunk is stored (each on a different drive). 46 * (each on a different drive). The starting device for each section is offset
46 * The starting device for each section is offset near_copies from the starting 47 * near_copies from the starting device of the previous section. Thus there
47 * device of the previous section. 48 * are (near_copies * far_copies) of each chunk, and each is on a different
48 * Thus they are (near_copies*far_copies) of each chunk, and each is on a different 49 * drive. near_copies and far_copies must be at least one, and their product
49 * drive. 50 * is at most raid_disks.
50 * near_copies and far_copies must be at least one, and their product is at most
51 * raid_disks.
52 * 51 *
53 * If far_offset is true, then the far_copies are handled a bit differently. 52 * If far_offset is true, then the far_copies are handled a bit differently.
54 * The copies are still in different stripes, but instead of be very far apart 53 * The copies are still in different stripes, but instead of being very far
55 * on disk, there are adjacent stripes. 54 * apart on disk, there are adjacent stripes.
55 *
56 * The far and offset algorithms are handled slightly differently if
57 * 'use_far_sets' is true. In this case, the array's devices are grouped into
58 * sets that are (near_copies * far_copies) in size. The far copied stripes
59 * are still shifted by 'near_copies' devices, but this shifting stays confined
60 * to the set rather than the entire array. This is done to improve the number
61 * of device combinations that can fail without causing the array to fail.
62 * Example 'far' algorithm w/o 'use_far_sets' (each letter represents a chunk
63 * on a device):
64 * A B C D A B C D E
65 * ... ...
66 * D A B C E A B C D
67 * Example 'far' algorithm w/ 'use_far_sets' enabled (sets illustrated w/ []'s):
68 * [A B] [C D] [A B] [C D E]
69 * |...| |...| |...| | ... |
70 * [B A] [D C] [B A] [E C D]
56 */ 71 */
57 72
58/* 73/*
@@ -551,14 +566,18 @@ static void __raid10_find_phys(struct geom *geo, struct r10bio *r10bio)
551 /* and calculate all the others */ 566 /* and calculate all the others */
552 for (n = 0; n < geo->near_copies; n++) { 567 for (n = 0; n < geo->near_copies; n++) {
553 int d = dev; 568 int d = dev;
569 int set;
554 sector_t s = sector; 570 sector_t s = sector;
555 r10bio->devs[slot].devnum = d; 571 r10bio->devs[slot].devnum = d;
556 r10bio->devs[slot].addr = s; 572 r10bio->devs[slot].addr = s;
557 slot++; 573 slot++;
558 574
559 for (f = 1; f < geo->far_copies; f++) { 575 for (f = 1; f < geo->far_copies; f++) {
576 set = d / geo->far_set_size;
560 d += geo->near_copies; 577 d += geo->near_copies;
561 d %= geo->raid_disks; 578 d %= geo->far_set_size;
579 d += geo->far_set_size * set;
580
562 s += geo->stride; 581 s += geo->stride;
563 r10bio->devs[slot].devnum = d; 582 r10bio->devs[slot].devnum = d;
564 r10bio->devs[slot].addr = s; 583 r10bio->devs[slot].addr = s;
@@ -594,6 +613,8 @@ static sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev)
594 * or recovery, so reshape isn't happening 613 * or recovery, so reshape isn't happening
595 */ 614 */
596 struct geom *geo = &conf->geo; 615 struct geom *geo = &conf->geo;
616 int far_set_start = (dev / geo->far_set_size) * geo->far_set_size;
617 int far_set_size = geo->far_set_size;
597 618
598 offset = sector & geo->chunk_mask; 619 offset = sector & geo->chunk_mask;
599 if (geo->far_offset) { 620 if (geo->far_offset) {
@@ -601,13 +622,13 @@ static sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev)
601 chunk = sector >> geo->chunk_shift; 622 chunk = sector >> geo->chunk_shift;
602 fc = sector_div(chunk, geo->far_copies); 623 fc = sector_div(chunk, geo->far_copies);
603 dev -= fc * geo->near_copies; 624 dev -= fc * geo->near_copies;
604 if (dev < 0) 625 if (dev < far_set_start)
605 dev += geo->raid_disks; 626 dev += far_set_size;
606 } else { 627 } else {
607 while (sector >= geo->stride) { 628 while (sector >= geo->stride) {
608 sector -= geo->stride; 629 sector -= geo->stride;
609 if (dev < geo->near_copies) 630 if (dev < (geo->near_copies + far_set_start))
610 dev += geo->raid_disks - geo->near_copies; 631 dev += far_set_size - geo->near_copies;
611 else 632 else
612 dev -= geo->near_copies; 633 dev -= geo->near_copies;
613 } 634 }
@@ -3438,7 +3459,7 @@ static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new)
3438 disks = mddev->raid_disks + mddev->delta_disks; 3459 disks = mddev->raid_disks + mddev->delta_disks;
3439 break; 3460 break;
3440 } 3461 }
3441 if (layout >> 17) 3462 if (layout >> 18)
3442 return -1; 3463 return -1;
3443 if (chunk < (PAGE_SIZE >> 9) || 3464 if (chunk < (PAGE_SIZE >> 9) ||
3444 !is_power_of_2(chunk)) 3465 !is_power_of_2(chunk))
@@ -3450,6 +3471,7 @@ static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new)
3450 geo->near_copies = nc; 3471 geo->near_copies = nc;
3451 geo->far_copies = fc; 3472 geo->far_copies = fc;
3452 geo->far_offset = fo; 3473 geo->far_offset = fo;
3474 geo->far_set_size = (layout & (1<<17)) ? disks / fc : disks;
3453 geo->chunk_mask = chunk - 1; 3475 geo->chunk_mask = chunk - 1;
3454 geo->chunk_shift = ffz(~chunk); 3476 geo->chunk_shift = ffz(~chunk);
3455 return nc*fc; 3477 return nc*fc;
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index 1054cf602345..157d69e83ff4 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -33,6 +33,11 @@ struct r10conf {
33 * far_offset, in which case it is 33 * far_offset, in which case it is
34 * 1 stripe. 34 * 1 stripe.
35 */ 35 */
36 int far_set_size; /* The number of devices in a set,
37 * where a 'set' are devices that
38 * contain far/offset copies of
39 * each other.
40 */
36 int chunk_shift; /* shift from chunks to sectors */ 41 int chunk_shift; /* shift from chunks to sectors */
37 sector_t chunk_mask; 42 sector_t chunk_mask;
38 } prev, geo; 43 } prev, geo;