diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/raid10.c | 58 | ||||
-rw-r--r-- | drivers/md/raid10.h | 5 |
2 files changed, 45 insertions, 18 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index de174ad6f8bd..70b58b4bcf89 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -38,21 +38,36 @@ | |||
38 | * near_copies (stored in low byte of layout) | 38 | * near_copies (stored in low byte of layout) |
39 | * far_copies (stored in second byte of layout) | 39 | * far_copies (stored in second byte of layout) |
40 | * far_offset (stored in bit 16 of layout ) | 40 | * far_offset (stored in bit 16 of layout ) |
41 | * use_far_sets (stored in bit 17 of layout ) | ||
41 | * | 42 | * |
42 | * The data to be stored is divided into chunks using chunksize. | 43 | * The data to be stored is divided into chunks using chunksize. Each device |
43 | * Each device is divided into far_copies sections. | 44 | * is divided into far_copies sections. In each section, chunks are laid out |
44 | * In each section, chunks are laid out in a style similar to raid0, but | 45 | * in a style similar to raid0, but near_copies copies of each chunk is stored |
45 | * near_copies copies of each chunk is stored (each on a different drive). | 46 | * (each on a different drive). The starting device for each section is offset |
46 | * The starting device for each section is offset near_copies from the starting | 47 | * near_copies from the starting device of the previous section. Thus there |
47 | * device of the previous section. | 48 | * are (near_copies * far_copies) of each chunk, and each is on a different |
48 | * Thus they are (near_copies*far_copies) of each chunk, and each is on a different | 49 | * drive. near_copies and far_copies must be at least one, and their product |
49 | * drive. | 50 | * is at most raid_disks. |
50 | * near_copies and far_copies must be at least one, and their product is at most | ||
51 | * raid_disks. | ||
52 | * | 51 | * |
53 | * If far_offset is true, then the far_copies are handled a bit differently. | 52 | * If far_offset is true, then the far_copies are handled a bit differently. |
54 | * The copies are still in different stripes, but instead of be very far apart | 53 | * The copies are still in different stripes, but instead of being very far |
55 | * on disk, there are adjacent stripes. | 54 | * apart on disk, there are adjacent stripes. |
55 | * | ||
56 | * The far and offset algorithms are handled slightly differently if | ||
57 | * 'use_far_sets' is true. In this case, the array's devices are grouped into | ||
58 | * sets that are (near_copies * far_copies) in size. The far copied stripes | ||
59 | * are still shifted by 'near_copies' devices, but this shifting stays confined | ||
60 | * to the set rather than the entire array. This is done to improve the number | ||
61 | * of device combinations that can fail without causing the array to fail. | ||
62 | * Example 'far' algorithm w/o 'use_far_sets' (each letter represents a chunk | ||
63 | * on a device): | ||
64 | * A B C D A B C D E | ||
65 | * ... ... | ||
66 | * D A B C E A B C D | ||
67 | * Example 'far' algorithm w/ 'use_far_sets' enabled (sets illustrated w/ []'s): | ||
68 | * [A B] [C D] [A B] [C D E] | ||
69 | * |...| |...| |...| | ... | | ||
70 | * [B A] [D C] [B A] [E C D] | ||
56 | */ | 71 | */ |
57 | 72 | ||
58 | /* | 73 | /* |
@@ -551,14 +566,18 @@ static void __raid10_find_phys(struct geom *geo, struct r10bio *r10bio) | |||
551 | /* and calculate all the others */ | 566 | /* and calculate all the others */ |
552 | for (n = 0; n < geo->near_copies; n++) { | 567 | for (n = 0; n < geo->near_copies; n++) { |
553 | int d = dev; | 568 | int d = dev; |
569 | int set; | ||
554 | sector_t s = sector; | 570 | sector_t s = sector; |
555 | r10bio->devs[slot].devnum = d; | 571 | r10bio->devs[slot].devnum = d; |
556 | r10bio->devs[slot].addr = s; | 572 | r10bio->devs[slot].addr = s; |
557 | slot++; | 573 | slot++; |
558 | 574 | ||
559 | for (f = 1; f < geo->far_copies; f++) { | 575 | for (f = 1; f < geo->far_copies; f++) { |
576 | set = d / geo->far_set_size; | ||
560 | d += geo->near_copies; | 577 | d += geo->near_copies; |
561 | d %= geo->raid_disks; | 578 | d %= geo->far_set_size; |
579 | d += geo->far_set_size * set; | ||
580 | |||
562 | s += geo->stride; | 581 | s += geo->stride; |
563 | r10bio->devs[slot].devnum = d; | 582 | r10bio->devs[slot].devnum = d; |
564 | r10bio->devs[slot].addr = s; | 583 | r10bio->devs[slot].addr = s; |
@@ -594,6 +613,8 @@ static sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev) | |||
594 | * or recovery, so reshape isn't happening | 613 | * or recovery, so reshape isn't happening |
595 | */ | 614 | */ |
596 | struct geom *geo = &conf->geo; | 615 | struct geom *geo = &conf->geo; |
616 | int far_set_start = (dev / geo->far_set_size) * geo->far_set_size; | ||
617 | int far_set_size = geo->far_set_size; | ||
597 | 618 | ||
598 | offset = sector & geo->chunk_mask; | 619 | offset = sector & geo->chunk_mask; |
599 | if (geo->far_offset) { | 620 | if (geo->far_offset) { |
@@ -601,13 +622,13 @@ static sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev) | |||
601 | chunk = sector >> geo->chunk_shift; | 622 | chunk = sector >> geo->chunk_shift; |
602 | fc = sector_div(chunk, geo->far_copies); | 623 | fc = sector_div(chunk, geo->far_copies); |
603 | dev -= fc * geo->near_copies; | 624 | dev -= fc * geo->near_copies; |
604 | if (dev < 0) | 625 | if (dev < far_set_start) |
605 | dev += geo->raid_disks; | 626 | dev += far_set_size; |
606 | } else { | 627 | } else { |
607 | while (sector >= geo->stride) { | 628 | while (sector >= geo->stride) { |
608 | sector -= geo->stride; | 629 | sector -= geo->stride; |
609 | if (dev < geo->near_copies) | 630 | if (dev < (geo->near_copies + far_set_start)) |
610 | dev += geo->raid_disks - geo->near_copies; | 631 | dev += far_set_size - geo->near_copies; |
611 | else | 632 | else |
612 | dev -= geo->near_copies; | 633 | dev -= geo->near_copies; |
613 | } | 634 | } |
@@ -3438,7 +3459,7 @@ static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new) | |||
3438 | disks = mddev->raid_disks + mddev->delta_disks; | 3459 | disks = mddev->raid_disks + mddev->delta_disks; |
3439 | break; | 3460 | break; |
3440 | } | 3461 | } |
3441 | if (layout >> 17) | 3462 | if (layout >> 18) |
3442 | return -1; | 3463 | return -1; |
3443 | if (chunk < (PAGE_SIZE >> 9) || | 3464 | if (chunk < (PAGE_SIZE >> 9) || |
3444 | !is_power_of_2(chunk)) | 3465 | !is_power_of_2(chunk)) |
@@ -3450,6 +3471,7 @@ static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new) | |||
3450 | geo->near_copies = nc; | 3471 | geo->near_copies = nc; |
3451 | geo->far_copies = fc; | 3472 | geo->far_copies = fc; |
3452 | geo->far_offset = fo; | 3473 | geo->far_offset = fo; |
3474 | geo->far_set_size = (layout & (1<<17)) ? disks / fc : disks; | ||
3453 | geo->chunk_mask = chunk - 1; | 3475 | geo->chunk_mask = chunk - 1; |
3454 | geo->chunk_shift = ffz(~chunk); | 3476 | geo->chunk_shift = ffz(~chunk); |
3455 | return nc*fc; | 3477 | return nc*fc; |
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index 1054cf602345..157d69e83ff4 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h | |||
@@ -33,6 +33,11 @@ struct r10conf { | |||
33 | * far_offset, in which case it is | 33 | * far_offset, in which case it is |
34 | * 1 stripe. | 34 | * 1 stripe. |
35 | */ | 35 | */ |
36 | int far_set_size; /* The number of devices in a set, | ||
37 | * where a 'set' are devices that | ||
38 | * contain far/offset copies of | ||
39 | * each other. | ||
40 | */ | ||
36 | int chunk_shift; /* shift from chunks to sectors */ | 41 | int chunk_shift; /* shift from chunks to sectors */ |
37 | sector_t chunk_mask; | 42 | sector_t chunk_mask; |
38 | } prev, geo; | 43 | } prev, geo; |