aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2006-06-26 03:27:41 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-06-26 12:58:37 -0400
commitc93983bf517c100a31e40ef087e19bd3d7aa2d28 (patch)
tree9361c68d5f00ccd34cbc0a3bc7bc2389ce7f4c3a
parent7c7546ccf6463edbeee8d9aac6de7be1cd80d08a (diff)
[PATCH] md: support stripe/offset mode in raid10
The "industry standard" DDF format allows for a stripe/offset layout where data is duplicated on different stripes. e.g. A B C D D A B C E F G H H E F G (columns are drives, rows are stripes, LETTERS are chunks of data). This is similar to raid10's 'far' mode, but not quite the same. So enhance 'far' mode with a 'far/offset' option which follows the layout of DDFs stripe/offset. Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--drivers/md/raid10.c64
-rw-r--r--include/linux/raid/raid10.h7
2 files changed, 49 insertions, 22 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 2ca18770575f..4db34edfd87f 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -29,6 +29,7 @@
29 * raid_disks 29 * raid_disks
30 * near_copies (stored in low byte of layout) 30 * near_copies (stored in low byte of layout)
31 * far_copies (stored in second byte of layout) 31 * far_copies (stored in second byte of layout)
32 * far_offset (stored in bit 16 of layout )
32 * 33 *
33 * The data to be stored is divided into chunks using chunksize. 34 * The data to be stored is divided into chunks using chunksize.
34 * Each device is divided into far_copies sections. 35 * Each device is divided into far_copies sections.
@@ -36,10 +37,14 @@
36 * near_copies copies of each chunk is stored (each on a different drive). 37 * near_copies copies of each chunk is stored (each on a different drive).
37 * The starting device for each section is offset near_copies from the starting 38 * The starting device for each section is offset near_copies from the starting
38 * device of the previous section. 39 * device of the previous section.
39 * Thus there are (near_copies*far_copies) of each chunk, and each is on a different 40 * Thus they are (near_copies*far_copies) of each chunk, and each is on a different
40 * drive. 41 * drive.
41 * near_copies and far_copies must be at least one, and their product is at most 42 * near_copies and far_copies must be at least one, and their product is at most
42 * raid_disks. 43 * raid_disks.
44 *
45 * If far_offset is true, then the far_copies are handled a bit differently.
46 * The copies are still in different stripes, but instead of be very far apart
47 * on disk, there are adjacent stripes.
43 */ 48 */
44 49
45/* 50/*
@@ -357,8 +362,7 @@ static int raid10_end_write_request(struct bio *bio, unsigned int bytes_done, in
357 * With this layout, and block is never stored twice on the one device. 362 * With this layout, and block is never stored twice on the one device.
358 * 363 *
359 * raid10_find_phys finds the sector offset of a given virtual sector 364 * raid10_find_phys finds the sector offset of a given virtual sector
360 * on each device that it is on. If a block isn't on a device, 365 * on each device that it is on.
361 * that entry in the array is set to MaxSector.
362 * 366 *
363 * raid10_find_virt does the reverse mapping, from a device and a 367 * raid10_find_virt does the reverse mapping, from a device and a
364 * sector offset to a virtual address 368 * sector offset to a virtual address
@@ -381,6 +385,8 @@ static void raid10_find_phys(conf_t *conf, r10bio_t *r10bio)
381 chunk *= conf->near_copies; 385 chunk *= conf->near_copies;
382 stripe = chunk; 386 stripe = chunk;
383 dev = sector_div(stripe, conf->raid_disks); 387 dev = sector_div(stripe, conf->raid_disks);
388 if (conf->far_offset)
389 stripe *= conf->far_copies;
384 390
385 sector += stripe << conf->chunk_shift; 391 sector += stripe << conf->chunk_shift;
386 392
@@ -414,16 +420,24 @@ static sector_t raid10_find_virt(conf_t *conf, sector_t sector, int dev)
414{ 420{
415 sector_t offset, chunk, vchunk; 421 sector_t offset, chunk, vchunk;
416 422
417 while (sector > conf->stride) {
418 sector -= conf->stride;
419 if (dev < conf->near_copies)
420 dev += conf->raid_disks - conf->near_copies;
421 else
422 dev -= conf->near_copies;
423 }
424
425 offset = sector & conf->chunk_mask; 423 offset = sector & conf->chunk_mask;
426 chunk = sector >> conf->chunk_shift; 424 if (conf->far_offset) {
425 int fc;
426 chunk = sector >> conf->chunk_shift;
427 fc = sector_div(chunk, conf->far_copies);
428 dev -= fc * conf->near_copies;
429 if (dev < 0)
430 dev += conf->raid_disks;
431 } else {
432 while (sector > conf->stride) {
433 sector -= conf->stride;
434 if (dev < conf->near_copies)
435 dev += conf->raid_disks - conf->near_copies;
436 else
437 dev -= conf->near_copies;
438 }
439 chunk = sector >> conf->chunk_shift;
440 }
427 vchunk = chunk * conf->raid_disks + dev; 441 vchunk = chunk * conf->raid_disks + dev;
428 sector_div(vchunk, conf->near_copies); 442 sector_div(vchunk, conf->near_copies);
429 return (vchunk << conf->chunk_shift) + offset; 443 return (vchunk << conf->chunk_shift) + offset;
@@ -900,9 +914,12 @@ static void status(struct seq_file *seq, mddev_t *mddev)
900 seq_printf(seq, " %dK chunks", mddev->chunk_size/1024); 914 seq_printf(seq, " %dK chunks", mddev->chunk_size/1024);
901 if (conf->near_copies > 1) 915 if (conf->near_copies > 1)
902 seq_printf(seq, " %d near-copies", conf->near_copies); 916 seq_printf(seq, " %d near-copies", conf->near_copies);
903 if (conf->far_copies > 1) 917 if (conf->far_copies > 1) {
904 seq_printf(seq, " %d far-copies", conf->far_copies); 918 if (conf->far_offset)
905 919 seq_printf(seq, " %d offset-copies", conf->far_copies);
920 else
921 seq_printf(seq, " %d far-copies", conf->far_copies);
922 }
906 seq_printf(seq, " [%d/%d] [", conf->raid_disks, 923 seq_printf(seq, " [%d/%d] [", conf->raid_disks,
907 conf->working_disks); 924 conf->working_disks);
908 for (i = 0; i < conf->raid_disks; i++) 925 for (i = 0; i < conf->raid_disks; i++)
@@ -1915,7 +1932,7 @@ static int run(mddev_t *mddev)
1915 mirror_info_t *disk; 1932 mirror_info_t *disk;
1916 mdk_rdev_t *rdev; 1933 mdk_rdev_t *rdev;
1917 struct list_head *tmp; 1934 struct list_head *tmp;
1918 int nc, fc; 1935 int nc, fc, fo;
1919 sector_t stride, size; 1936 sector_t stride, size;
1920 1937
1921 if (mddev->chunk_size == 0) { 1938 if (mddev->chunk_size == 0) {
@@ -1925,8 +1942,9 @@ static int run(mddev_t *mddev)
1925 1942
1926 nc = mddev->layout & 255; 1943 nc = mddev->layout & 255;
1927 fc = (mddev->layout >> 8) & 255; 1944 fc = (mddev->layout >> 8) & 255;
1945 fo = mddev->layout & (1<<16);
1928 if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks || 1946 if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks ||
1929 (mddev->layout >> 16)) { 1947 (mddev->layout >> 17)) {
1930 printk(KERN_ERR "raid10: %s: unsupported raid10 layout: 0x%8x\n", 1948 printk(KERN_ERR "raid10: %s: unsupported raid10 layout: 0x%8x\n",
1931 mdname(mddev), mddev->layout); 1949 mdname(mddev), mddev->layout);
1932 goto out; 1950 goto out;
@@ -1958,12 +1976,16 @@ static int run(mddev_t *mddev)
1958 conf->near_copies = nc; 1976 conf->near_copies = nc;
1959 conf->far_copies = fc; 1977 conf->far_copies = fc;
1960 conf->copies = nc*fc; 1978 conf->copies = nc*fc;
1979 conf->far_offset = fo;
1961 conf->chunk_mask = (sector_t)(mddev->chunk_size>>9)-1; 1980 conf->chunk_mask = (sector_t)(mddev->chunk_size>>9)-1;
1962 conf->chunk_shift = ffz(~mddev->chunk_size) - 9; 1981 conf->chunk_shift = ffz(~mddev->chunk_size) - 9;
1963 stride = mddev->size >> (conf->chunk_shift-1); 1982 if (fo)
1964 sector_div(stride, fc); 1983 conf->stride = 1 << conf->chunk_shift;
1965 conf->stride = stride << conf->chunk_shift; 1984 else {
1966 1985 stride = mddev->size >> (conf->chunk_shift-1);
1986 sector_div(stride, fc);
1987 conf->stride = stride << conf->chunk_shift;
1988 }
1967 conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc, 1989 conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc,
1968 r10bio_pool_free, conf); 1990 r10bio_pool_free, conf);
1969 if (!conf->r10bio_pool) { 1991 if (!conf->r10bio_pool) {
diff --git a/include/linux/raid/raid10.h b/include/linux/raid/raid10.h
index b1103298a8c2..c41e56a7c090 100644
--- a/include/linux/raid/raid10.h
+++ b/include/linux/raid/raid10.h
@@ -24,11 +24,16 @@ struct r10_private_data_s {
24 int far_copies; /* number of copies layed out 24 int far_copies; /* number of copies layed out
25 * at large strides across drives 25 * at large strides across drives
26 */ 26 */
27 int far_offset; /* far_copies are offset by 1 stripe
28 * instead of many
29 */
27 int copies; /* near_copies * far_copies. 30 int copies; /* near_copies * far_copies.
28 * must be <= raid_disks 31 * must be <= raid_disks
29 */ 32 */
30 sector_t stride; /* distance between far copies. 33 sector_t stride; /* distance between far copies.
31 * This is size / far_copies 34 * This is size / far_copies unless
35 * far_offset, in which case it is
36 * 1 stripe.
32 */ 37 */
33 38
34 int chunk_shift; /* shift from chunks to sectors */ 39 int chunk_shift; /* shift from chunks to sectors */