diff options
author | NeilBrown <neilb@suse.de> | 2006-06-26 03:27:41 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-06-26 12:58:37 -0400 |
commit | c93983bf517c100a31e40ef087e19bd3d7aa2d28 (patch) | |
tree | 9361c68d5f00ccd34cbc0a3bc7bc2389ce7f4c3a /drivers/md/raid10.c | |
parent | 7c7546ccf6463edbeee8d9aac6de7be1cd80d08a (diff) |
[PATCH] md: support stripe/offset mode in raid10
The "industry standard" DDF format allows for a stripe/offset layout where
data is duplicated on different stripes. e.g.
A B C D
D A B C
E F G H
H E F G
(columns are drives, rows are stripes, LETTERS are chunks of data).
This is similar to raid10's 'far' mode, but not quite the same. So enhance
'far' mode with a 'far/offset' option which follows the layout of DDFs
stripe/offset.
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r-- | drivers/md/raid10.c | 64 |
1 files changed, 43 insertions, 21 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 2ca18770575f..4db34edfd87f 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -29,6 +29,7 @@ | |||
29 | * raid_disks | 29 | * raid_disks |
30 | * near_copies (stored in low byte of layout) | 30 | * near_copies (stored in low byte of layout) |
31 | * far_copies (stored in second byte of layout) | 31 | * far_copies (stored in second byte of layout) |
32 | * far_offset (stored in bit 16 of layout ) | ||
32 | * | 33 | * |
33 | * The data to be stored is divided into chunks using chunksize. | 34 | * The data to be stored is divided into chunks using chunksize. |
34 | * Each device is divided into far_copies sections. | 35 | * Each device is divided into far_copies sections. |
@@ -36,10 +37,14 @@ | |||
36 | * near_copies copies of each chunk is stored (each on a different drive). | 37 | * near_copies copies of each chunk is stored (each on a different drive). |
37 | * The starting device for each section is offset near_copies from the starting | 38 | * The starting device for each section is offset near_copies from the starting |
38 | * device of the previous section. | 39 | * device of the previous section. |
39 | * Thus there are (near_copies*far_copies) of each chunk, and each is on a different | 40 | * Thus they are (near_copies*far_copies) of each chunk, and each is on a different |
40 | * drive. | 41 | * drive. |
41 | * near_copies and far_copies must be at least one, and their product is at most | 42 | * near_copies and far_copies must be at least one, and their product is at most |
42 | * raid_disks. | 43 | * raid_disks. |
44 | * | ||
45 | * If far_offset is true, then the far_copies are handled a bit differently. | ||
46 | * The copies are still in different stripes, but instead of be very far apart | ||
47 | * on disk, there are adjacent stripes. | ||
43 | */ | 48 | */ |
44 | 49 | ||
45 | /* | 50 | /* |
@@ -357,8 +362,7 @@ static int raid10_end_write_request(struct bio *bio, unsigned int bytes_done, in | |||
357 | * With this layout, and block is never stored twice on the one device. | 362 | * With this layout, and block is never stored twice on the one device. |
358 | * | 363 | * |
359 | * raid10_find_phys finds the sector offset of a given virtual sector | 364 | * raid10_find_phys finds the sector offset of a given virtual sector |
360 | * on each device that it is on. If a block isn't on a device, | 365 | * on each device that it is on. |
361 | * that entry in the array is set to MaxSector. | ||
362 | * | 366 | * |
363 | * raid10_find_virt does the reverse mapping, from a device and a | 367 | * raid10_find_virt does the reverse mapping, from a device and a |
364 | * sector offset to a virtual address | 368 | * sector offset to a virtual address |
@@ -381,6 +385,8 @@ static void raid10_find_phys(conf_t *conf, r10bio_t *r10bio) | |||
381 | chunk *= conf->near_copies; | 385 | chunk *= conf->near_copies; |
382 | stripe = chunk; | 386 | stripe = chunk; |
383 | dev = sector_div(stripe, conf->raid_disks); | 387 | dev = sector_div(stripe, conf->raid_disks); |
388 | if (conf->far_offset) | ||
389 | stripe *= conf->far_copies; | ||
384 | 390 | ||
385 | sector += stripe << conf->chunk_shift; | 391 | sector += stripe << conf->chunk_shift; |
386 | 392 | ||
@@ -414,16 +420,24 @@ static sector_t raid10_find_virt(conf_t *conf, sector_t sector, int dev) | |||
414 | { | 420 | { |
415 | sector_t offset, chunk, vchunk; | 421 | sector_t offset, chunk, vchunk; |
416 | 422 | ||
417 | while (sector > conf->stride) { | ||
418 | sector -= conf->stride; | ||
419 | if (dev < conf->near_copies) | ||
420 | dev += conf->raid_disks - conf->near_copies; | ||
421 | else | ||
422 | dev -= conf->near_copies; | ||
423 | } | ||
424 | |||
425 | offset = sector & conf->chunk_mask; | 423 | offset = sector & conf->chunk_mask; |
426 | chunk = sector >> conf->chunk_shift; | 424 | if (conf->far_offset) { |
425 | int fc; | ||
426 | chunk = sector >> conf->chunk_shift; | ||
427 | fc = sector_div(chunk, conf->far_copies); | ||
428 | dev -= fc * conf->near_copies; | ||
429 | if (dev < 0) | ||
430 | dev += conf->raid_disks; | ||
431 | } else { | ||
432 | while (sector > conf->stride) { | ||
433 | sector -= conf->stride; | ||
434 | if (dev < conf->near_copies) | ||
435 | dev += conf->raid_disks - conf->near_copies; | ||
436 | else | ||
437 | dev -= conf->near_copies; | ||
438 | } | ||
439 | chunk = sector >> conf->chunk_shift; | ||
440 | } | ||
427 | vchunk = chunk * conf->raid_disks + dev; | 441 | vchunk = chunk * conf->raid_disks + dev; |
428 | sector_div(vchunk, conf->near_copies); | 442 | sector_div(vchunk, conf->near_copies); |
429 | return (vchunk << conf->chunk_shift) + offset; | 443 | return (vchunk << conf->chunk_shift) + offset; |
@@ -900,9 +914,12 @@ static void status(struct seq_file *seq, mddev_t *mddev) | |||
900 | seq_printf(seq, " %dK chunks", mddev->chunk_size/1024); | 914 | seq_printf(seq, " %dK chunks", mddev->chunk_size/1024); |
901 | if (conf->near_copies > 1) | 915 | if (conf->near_copies > 1) |
902 | seq_printf(seq, " %d near-copies", conf->near_copies); | 916 | seq_printf(seq, " %d near-copies", conf->near_copies); |
903 | if (conf->far_copies > 1) | 917 | if (conf->far_copies > 1) { |
904 | seq_printf(seq, " %d far-copies", conf->far_copies); | 918 | if (conf->far_offset) |
905 | 919 | seq_printf(seq, " %d offset-copies", conf->far_copies); | |
920 | else | ||
921 | seq_printf(seq, " %d far-copies", conf->far_copies); | ||
922 | } | ||
906 | seq_printf(seq, " [%d/%d] [", conf->raid_disks, | 923 | seq_printf(seq, " [%d/%d] [", conf->raid_disks, |
907 | conf->working_disks); | 924 | conf->working_disks); |
908 | for (i = 0; i < conf->raid_disks; i++) | 925 | for (i = 0; i < conf->raid_disks; i++) |
@@ -1915,7 +1932,7 @@ static int run(mddev_t *mddev) | |||
1915 | mirror_info_t *disk; | 1932 | mirror_info_t *disk; |
1916 | mdk_rdev_t *rdev; | 1933 | mdk_rdev_t *rdev; |
1917 | struct list_head *tmp; | 1934 | struct list_head *tmp; |
1918 | int nc, fc; | 1935 | int nc, fc, fo; |
1919 | sector_t stride, size; | 1936 | sector_t stride, size; |
1920 | 1937 | ||
1921 | if (mddev->chunk_size == 0) { | 1938 | if (mddev->chunk_size == 0) { |
@@ -1925,8 +1942,9 @@ static int run(mddev_t *mddev) | |||
1925 | 1942 | ||
1926 | nc = mddev->layout & 255; | 1943 | nc = mddev->layout & 255; |
1927 | fc = (mddev->layout >> 8) & 255; | 1944 | fc = (mddev->layout >> 8) & 255; |
1945 | fo = mddev->layout & (1<<16); | ||
1928 | if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks || | 1946 | if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks || |
1929 | (mddev->layout >> 16)) { | 1947 | (mddev->layout >> 17)) { |
1930 | printk(KERN_ERR "raid10: %s: unsupported raid10 layout: 0x%8x\n", | 1948 | printk(KERN_ERR "raid10: %s: unsupported raid10 layout: 0x%8x\n", |
1931 | mdname(mddev), mddev->layout); | 1949 | mdname(mddev), mddev->layout); |
1932 | goto out; | 1950 | goto out; |
@@ -1958,12 +1976,16 @@ static int run(mddev_t *mddev) | |||
1958 | conf->near_copies = nc; | 1976 | conf->near_copies = nc; |
1959 | conf->far_copies = fc; | 1977 | conf->far_copies = fc; |
1960 | conf->copies = nc*fc; | 1978 | conf->copies = nc*fc; |
1979 | conf->far_offset = fo; | ||
1961 | conf->chunk_mask = (sector_t)(mddev->chunk_size>>9)-1; | 1980 | conf->chunk_mask = (sector_t)(mddev->chunk_size>>9)-1; |
1962 | conf->chunk_shift = ffz(~mddev->chunk_size) - 9; | 1981 | conf->chunk_shift = ffz(~mddev->chunk_size) - 9; |
1963 | stride = mddev->size >> (conf->chunk_shift-1); | 1982 | if (fo) |
1964 | sector_div(stride, fc); | 1983 | conf->stride = 1 << conf->chunk_shift; |
1965 | conf->stride = stride << conf->chunk_shift; | 1984 | else { |
1966 | 1985 | stride = mddev->size >> (conf->chunk_shift-1); | |
1986 | sector_div(stride, fc); | ||
1987 | conf->stride = stride << conf->chunk_shift; | ||
1988 | } | ||
1967 | conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc, | 1989 | conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc, |
1968 | r10bio_pool_free, conf); | 1990 | r10bio_pool_free, conf); |
1969 | if (!conf->r10bio_pool) { | 1991 | if (!conf->r10bio_pool) { |