diff options
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r-- | drivers/md/raid10.c | 77 |
1 files changed, 53 insertions, 24 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 1440935414e6..7f636283a1ba 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -29,6 +29,7 @@ | |||
29 | * raid_disks | 29 | * raid_disks |
30 | * near_copies (stored in low byte of layout) | 30 | * near_copies (stored in low byte of layout) |
31 | * far_copies (stored in second byte of layout) | 31 | * far_copies (stored in second byte of layout) |
32 | * far_offset (stored in bit 16 of layout ) | ||
32 | * | 33 | * |
33 | * The data to be stored is divided into chunks using chunksize. | 34 | * The data to be stored is divided into chunks using chunksize. |
34 | * Each device is divided into far_copies sections. | 35 | * Each device is divided into far_copies sections. |
@@ -36,10 +37,14 @@ | |||
36 | * near_copies copies of each chunk is stored (each on a different drive). | 37 | * near_copies copies of each chunk is stored (each on a different drive). |
37 | * The starting device for each section is offset near_copies from the starting | 38 | * The starting device for each section is offset near_copies from the starting |
38 | * device of the previous section. | 39 | * device of the previous section. |
39 | * Thus there are (near_copies*far_copies) of each chunk, and each is on a different | 40 | * Thus they are (near_copies*far_copies) of each chunk, and each is on a different |
40 | * drive. | 41 | * drive. |
41 | * near_copies and far_copies must be at least one, and their product is at most | 42 | * near_copies and far_copies must be at least one, and their product is at most |
42 | * raid_disks. | 43 | * raid_disks. |
44 | * | ||
45 | * If far_offset is true, then the far_copies are handled a bit differently. | ||
46 | * The copies are still in different stripes, but instead of be very far apart | ||
47 | * on disk, there are adjacent stripes. | ||
43 | */ | 48 | */ |
44 | 49 | ||
45 | /* | 50 | /* |
@@ -357,8 +362,7 @@ static int raid10_end_write_request(struct bio *bio, unsigned int bytes_done, in | |||
357 | * With this layout, and block is never stored twice on the one device. | 362 | * With this layout, and block is never stored twice on the one device. |
358 | * | 363 | * |
359 | * raid10_find_phys finds the sector offset of a given virtual sector | 364 | * raid10_find_phys finds the sector offset of a given virtual sector |
360 | * on each device that it is on. If a block isn't on a device, | 365 | * on each device that it is on. |
361 | * that entry in the array is set to MaxSector. | ||
362 | * | 366 | * |
363 | * raid10_find_virt does the reverse mapping, from a device and a | 367 | * raid10_find_virt does the reverse mapping, from a device and a |
364 | * sector offset to a virtual address | 368 | * sector offset to a virtual address |
@@ -381,6 +385,8 @@ static void raid10_find_phys(conf_t *conf, r10bio_t *r10bio) | |||
381 | chunk *= conf->near_copies; | 385 | chunk *= conf->near_copies; |
382 | stripe = chunk; | 386 | stripe = chunk; |
383 | dev = sector_div(stripe, conf->raid_disks); | 387 | dev = sector_div(stripe, conf->raid_disks); |
388 | if (conf->far_offset) | ||
389 | stripe *= conf->far_copies; | ||
384 | 390 | ||
385 | sector += stripe << conf->chunk_shift; | 391 | sector += stripe << conf->chunk_shift; |
386 | 392 | ||
@@ -414,16 +420,24 @@ static sector_t raid10_find_virt(conf_t *conf, sector_t sector, int dev) | |||
414 | { | 420 | { |
415 | sector_t offset, chunk, vchunk; | 421 | sector_t offset, chunk, vchunk; |
416 | 422 | ||
417 | while (sector > conf->stride) { | ||
418 | sector -= conf->stride; | ||
419 | if (dev < conf->near_copies) | ||
420 | dev += conf->raid_disks - conf->near_copies; | ||
421 | else | ||
422 | dev -= conf->near_copies; | ||
423 | } | ||
424 | |||
425 | offset = sector & conf->chunk_mask; | 423 | offset = sector & conf->chunk_mask; |
426 | chunk = sector >> conf->chunk_shift; | 424 | if (conf->far_offset) { |
425 | int fc; | ||
426 | chunk = sector >> conf->chunk_shift; | ||
427 | fc = sector_div(chunk, conf->far_copies); | ||
428 | dev -= fc * conf->near_copies; | ||
429 | if (dev < 0) | ||
430 | dev += conf->raid_disks; | ||
431 | } else { | ||
432 | while (sector > conf->stride) { | ||
433 | sector -= conf->stride; | ||
434 | if (dev < conf->near_copies) | ||
435 | dev += conf->raid_disks - conf->near_copies; | ||
436 | else | ||
437 | dev -= conf->near_copies; | ||
438 | } | ||
439 | chunk = sector >> conf->chunk_shift; | ||
440 | } | ||
427 | vchunk = chunk * conf->raid_disks + dev; | 441 | vchunk = chunk * conf->raid_disks + dev; |
428 | sector_div(vchunk, conf->near_copies); | 442 | sector_div(vchunk, conf->near_copies); |
429 | return (vchunk << conf->chunk_shift) + offset; | 443 | return (vchunk << conf->chunk_shift) + offset; |
@@ -900,9 +914,12 @@ static void status(struct seq_file *seq, mddev_t *mddev) | |||
900 | seq_printf(seq, " %dK chunks", mddev->chunk_size/1024); | 914 | seq_printf(seq, " %dK chunks", mddev->chunk_size/1024); |
901 | if (conf->near_copies > 1) | 915 | if (conf->near_copies > 1) |
902 | seq_printf(seq, " %d near-copies", conf->near_copies); | 916 | seq_printf(seq, " %d near-copies", conf->near_copies); |
903 | if (conf->far_copies > 1) | 917 | if (conf->far_copies > 1) { |
904 | seq_printf(seq, " %d far-copies", conf->far_copies); | 918 | if (conf->far_offset) |
905 | 919 | seq_printf(seq, " %d offset-copies", conf->far_copies); | |
920 | else | ||
921 | seq_printf(seq, " %d far-copies", conf->far_copies); | ||
922 | } | ||
906 | seq_printf(seq, " [%d/%d] [", conf->raid_disks, | 923 | seq_printf(seq, " [%d/%d] [", conf->raid_disks, |
907 | conf->working_disks); | 924 | conf->working_disks); |
908 | for (i = 0; i < conf->raid_disks; i++) | 925 | for (i = 0; i < conf->raid_disks; i++) |
@@ -1915,7 +1932,7 @@ static int run(mddev_t *mddev) | |||
1915 | mirror_info_t *disk; | 1932 | mirror_info_t *disk; |
1916 | mdk_rdev_t *rdev; | 1933 | mdk_rdev_t *rdev; |
1917 | struct list_head *tmp; | 1934 | struct list_head *tmp; |
1918 | int nc, fc; | 1935 | int nc, fc, fo; |
1919 | sector_t stride, size; | 1936 | sector_t stride, size; |
1920 | 1937 | ||
1921 | if (mddev->chunk_size == 0) { | 1938 | if (mddev->chunk_size == 0) { |
@@ -1925,8 +1942,9 @@ static int run(mddev_t *mddev) | |||
1925 | 1942 | ||
1926 | nc = mddev->layout & 255; | 1943 | nc = mddev->layout & 255; |
1927 | fc = (mddev->layout >> 8) & 255; | 1944 | fc = (mddev->layout >> 8) & 255; |
1945 | fo = mddev->layout & (1<<16); | ||
1928 | if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks || | 1946 | if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks || |
1929 | (mddev->layout >> 16)) { | 1947 | (mddev->layout >> 17)) { |
1930 | printk(KERN_ERR "raid10: %s: unsupported raid10 layout: 0x%8x\n", | 1948 | printk(KERN_ERR "raid10: %s: unsupported raid10 layout: 0x%8x\n", |
1931 | mdname(mddev), mddev->layout); | 1949 | mdname(mddev), mddev->layout); |
1932 | goto out; | 1950 | goto out; |
@@ -1958,12 +1976,16 @@ static int run(mddev_t *mddev) | |||
1958 | conf->near_copies = nc; | 1976 | conf->near_copies = nc; |
1959 | conf->far_copies = fc; | 1977 | conf->far_copies = fc; |
1960 | conf->copies = nc*fc; | 1978 | conf->copies = nc*fc; |
1979 | conf->far_offset = fo; | ||
1961 | conf->chunk_mask = (sector_t)(mddev->chunk_size>>9)-1; | 1980 | conf->chunk_mask = (sector_t)(mddev->chunk_size>>9)-1; |
1962 | conf->chunk_shift = ffz(~mddev->chunk_size) - 9; | 1981 | conf->chunk_shift = ffz(~mddev->chunk_size) - 9; |
1963 | stride = mddev->size >> (conf->chunk_shift-1); | 1982 | if (fo) |
1964 | sector_div(stride, fc); | 1983 | conf->stride = 1 << conf->chunk_shift; |
1965 | conf->stride = stride << conf->chunk_shift; | 1984 | else { |
1966 | 1985 | stride = mddev->size >> (conf->chunk_shift-1); | |
1986 | sector_div(stride, fc); | ||
1987 | conf->stride = stride << conf->chunk_shift; | ||
1988 | } | ||
1967 | conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc, | 1989 | conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc, |
1968 | r10bio_pool_free, conf); | 1990 | r10bio_pool_free, conf); |
1969 | if (!conf->r10bio_pool) { | 1991 | if (!conf->r10bio_pool) { |
@@ -2015,7 +2037,8 @@ static int run(mddev_t *mddev) | |||
2015 | 2037 | ||
2016 | disk = conf->mirrors + i; | 2038 | disk = conf->mirrors + i; |
2017 | 2039 | ||
2018 | if (!disk->rdev) { | 2040 | if (!disk->rdev || |
2041 | !test_bit(In_sync, &rdev->flags)) { | ||
2019 | disk->head_position = 0; | 2042 | disk->head_position = 0; |
2020 | mddev->degraded++; | 2043 | mddev->degraded++; |
2021 | } | 2044 | } |
@@ -2037,7 +2060,13 @@ static int run(mddev_t *mddev) | |||
2037 | /* | 2060 | /* |
2038 | * Ok, everything is just fine now | 2061 | * Ok, everything is just fine now |
2039 | */ | 2062 | */ |
2040 | size = conf->stride * conf->raid_disks; | 2063 | if (conf->far_offset) { |
2064 | size = mddev->size >> (conf->chunk_shift-1); | ||
2065 | size *= conf->raid_disks; | ||
2066 | size <<= conf->chunk_shift; | ||
2067 | sector_div(size, conf->far_copies); | ||
2068 | } else | ||
2069 | size = conf->stride * conf->raid_disks; | ||
2041 | sector_div(size, conf->near_copies); | 2070 | sector_div(size, conf->near_copies); |
2042 | mddev->array_size = size/2; | 2071 | mddev->array_size = size/2; |
2043 | mddev->resync_max_sectors = size; | 2072 | mddev->resync_max_sectors = size; |
@@ -2050,7 +2079,7 @@ static int run(mddev_t *mddev) | |||
2050 | * maybe... | 2079 | * maybe... |
2051 | */ | 2080 | */ |
2052 | { | 2081 | { |
2053 | int stripe = conf->raid_disks * mddev->chunk_size / PAGE_SIZE; | 2082 | int stripe = conf->raid_disks * (mddev->chunk_size / PAGE_SIZE); |
2054 | stripe /= conf->near_copies; | 2083 | stripe /= conf->near_copies; |
2055 | if (mddev->queue->backing_dev_info.ra_pages < 2* stripe) | 2084 | if (mddev->queue->backing_dev_info.ra_pages < 2* stripe) |
2056 | mddev->queue->backing_dev_info.ra_pages = 2* stripe; | 2085 | mddev->queue->backing_dev_info.ra_pages = 2* stripe; |