diff options
author | Mike Snitzer <snitzer@redhat.com> | 2012-07-27 10:08:02 -0400 |
---|---|---|
committer | Alasdair G Kergon <agk@redhat.com> | 2012-07-27 10:08:02 -0400 |
commit | 55f2b8bdb0c7387eb2dc645b9ecbe5d0faa6b54e (patch) | |
tree | 4428d66325bdc8d4f25a374e10b5a78cb0ab3e54 /drivers | |
parent | 33d07c0dfab902a7c5420587984497dc05ab5c9c (diff) |
dm thin: support for non power of 2 pool blocksize
Non power of 2 blocksize support is needed to properly align thinp IO
on storage that has non power of 2 optimal IO sizes (e.g. RAID6 10+2).
Use sector_div to support non power of 2 blocksize for the pool's
data device. This provides comparable performance to the power of 2
math that was performed until now (as tested on modern x86_64 hardware).
The kernel currently assumes that limits->discard_granularity is a power
of two so the thin target only enables discard support if the block
size is a power of two.
Eliminate pool structure's 'block_shift', 'offset_mask' and
remaining 4 byte holes.
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/md/dm-thin.c | 59 |
1 files changed, 37 insertions, 22 deletions
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 350bcf40485e..f21d318d98f0 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c | |||
@@ -510,10 +510,8 @@ struct pool { | |||
510 | struct block_device *md_dev; | 510 | struct block_device *md_dev; |
511 | struct dm_pool_metadata *pmd; | 511 | struct dm_pool_metadata *pmd; |
512 | 512 | ||
513 | uint32_t sectors_per_block; | ||
514 | unsigned block_shift; | ||
515 | dm_block_t offset_mask; | ||
516 | dm_block_t low_water_blocks; | 513 | dm_block_t low_water_blocks; |
514 | uint32_t sectors_per_block; | ||
517 | 515 | ||
518 | struct pool_features pf; | 516 | struct pool_features pf; |
519 | unsigned low_water_triggered:1; /* A dm event has been sent */ | 517 | unsigned low_water_triggered:1; /* A dm event has been sent */ |
@@ -526,8 +524,8 @@ struct pool { | |||
526 | struct work_struct worker; | 524 | struct work_struct worker; |
527 | struct delayed_work waker; | 525 | struct delayed_work waker; |
528 | 526 | ||
529 | unsigned ref_count; | ||
530 | unsigned long last_commit_jiffies; | 527 | unsigned long last_commit_jiffies; |
528 | unsigned ref_count; | ||
531 | 529 | ||
532 | spinlock_t lock; | 530 | spinlock_t lock; |
533 | struct bio_list deferred_bios; | 531 | struct bio_list deferred_bios; |
@@ -679,16 +677,21 @@ static void requeue_io(struct thin_c *tc) | |||
679 | 677 | ||
680 | static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio) | 678 | static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio) |
681 | { | 679 | { |
682 | return bio->bi_sector >> tc->pool->block_shift; | 680 | sector_t block_nr = bio->bi_sector; |
681 | |||
682 | (void) sector_div(block_nr, tc->pool->sectors_per_block); | ||
683 | |||
684 | return block_nr; | ||
683 | } | 685 | } |
684 | 686 | ||
685 | static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block) | 687 | static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block) |
686 | { | 688 | { |
687 | struct pool *pool = tc->pool; | 689 | struct pool *pool = tc->pool; |
690 | sector_t bi_sector = bio->bi_sector; | ||
688 | 691 | ||
689 | bio->bi_bdev = tc->pool_dev->bdev; | 692 | bio->bi_bdev = tc->pool_dev->bdev; |
690 | bio->bi_sector = (block << pool->block_shift) + | 693 | bio->bi_sector = (block * pool->sectors_per_block) + |
691 | (bio->bi_sector & pool->offset_mask); | 694 | sector_div(bi_sector, pool->sectors_per_block); |
692 | } | 695 | } |
693 | 696 | ||
694 | static void remap_to_origin(struct thin_c *tc, struct bio *bio) | 697 | static void remap_to_origin(struct thin_c *tc, struct bio *bio) |
@@ -933,9 +936,10 @@ static void process_prepared(struct pool *pool, struct list_head *head, | |||
933 | */ | 936 | */ |
934 | static int io_overlaps_block(struct pool *pool, struct bio *bio) | 937 | static int io_overlaps_block(struct pool *pool, struct bio *bio) |
935 | { | 938 | { |
936 | return !(bio->bi_sector & pool->offset_mask) && | 939 | sector_t bi_sector = bio->bi_sector; |
937 | (bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT)); | ||
938 | 940 | ||
941 | return !sector_div(bi_sector, pool->sectors_per_block) && | ||
942 | (bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT)); | ||
939 | } | 943 | } |
940 | 944 | ||
941 | static int io_overwrites_block(struct pool *pool, struct bio *bio) | 945 | static int io_overwrites_block(struct pool *pool, struct bio *bio) |
@@ -1239,8 +1243,8 @@ static void process_discard(struct thin_c *tc, struct bio *bio) | |||
1239 | * part of the discard that is in a subsequent | 1243 | * part of the discard that is in a subsequent |
1240 | * block. | 1244 | * block. |
1241 | */ | 1245 | */ |
1242 | sector_t offset = bio->bi_sector - (block << pool->block_shift); | 1246 | sector_t offset = bio->bi_sector - (block * pool->sectors_per_block); |
1243 | unsigned remaining = (pool->sectors_per_block - offset) << 9; | 1247 | unsigned remaining = (pool->sectors_per_block - offset) << SECTOR_SHIFT; |
1244 | bio->bi_size = min(bio->bi_size, remaining); | 1248 | bio->bi_size = min(bio->bi_size, remaining); |
1245 | 1249 | ||
1246 | cell_release_singleton(cell, bio); | 1250 | cell_release_singleton(cell, bio); |
@@ -1722,8 +1726,6 @@ static struct pool *pool_create(struct mapped_device *pool_md, | |||
1722 | 1726 | ||
1723 | pool->pmd = pmd; | 1727 | pool->pmd = pmd; |
1724 | pool->sectors_per_block = block_size; | 1728 | pool->sectors_per_block = block_size; |
1725 | pool->block_shift = ffs(block_size) - 1; | ||
1726 | pool->offset_mask = block_size - 1; | ||
1727 | pool->low_water_blocks = 0; | 1729 | pool->low_water_blocks = 0; |
1728 | pool_features_init(&pool->pf); | 1730 | pool_features_init(&pool->pf); |
1729 | pool->prison = prison_create(PRISON_CELLS); | 1731 | pool->prison = prison_create(PRISON_CELLS); |
@@ -1971,7 +1973,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
1971 | if (kstrtoul(argv[2], 10, &block_size) || !block_size || | 1973 | if (kstrtoul(argv[2], 10, &block_size) || !block_size || |
1972 | block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS || | 1974 | block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS || |
1973 | block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS || | 1975 | block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS || |
1974 | !is_power_of_2(block_size)) { | 1976 | block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) { |
1975 | ti->error = "Invalid block size"; | 1977 | ti->error = "Invalid block size"; |
1976 | r = -EINVAL; | 1978 | r = -EINVAL; |
1977 | goto out; | 1979 | goto out; |
@@ -2018,6 +2020,15 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
2018 | goto out_flags_changed; | 2020 | goto out_flags_changed; |
2019 | } | 2021 | } |
2020 | 2022 | ||
2023 | /* | ||
2024 | * The block layer requires discard_granularity to be a power of 2. | ||
2025 | */ | ||
2026 | if (pf.discard_enabled && !is_power_of_2(block_size)) { | ||
2027 | ti->error = "Discard support must be disabled when the block size is not a power of 2"; | ||
2028 | r = -EINVAL; | ||
2029 | goto out_flags_changed; | ||
2030 | } | ||
2031 | |||
2021 | pt->pool = pool; | 2032 | pt->pool = pool; |
2022 | pt->ti = ti; | 2033 | pt->ti = ti; |
2023 | pt->metadata_dev = metadata_dev; | 2034 | pt->metadata_dev = metadata_dev; |
@@ -2097,7 +2108,8 @@ static int pool_preresume(struct dm_target *ti) | |||
2097 | int r; | 2108 | int r; |
2098 | struct pool_c *pt = ti->private; | 2109 | struct pool_c *pt = ti->private; |
2099 | struct pool *pool = pt->pool; | 2110 | struct pool *pool = pt->pool; |
2100 | dm_block_t data_size, sb_data_size; | 2111 | sector_t data_size = ti->len; |
2112 | dm_block_t sb_data_size; | ||
2101 | 2113 | ||
2102 | /* | 2114 | /* |
2103 | * Take control of the pool object. | 2115 | * Take control of the pool object. |
@@ -2106,7 +2118,8 @@ static int pool_preresume(struct dm_target *ti) | |||
2106 | if (r) | 2118 | if (r) |
2107 | return r; | 2119 | return r; |
2108 | 2120 | ||
2109 | data_size = ti->len >> pool->block_shift; | 2121 | (void) sector_div(data_size, pool->sectors_per_block); |
2122 | |||
2110 | r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size); | 2123 | r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size); |
2111 | if (r) { | 2124 | if (r) { |
2112 | DMERR("failed to retrieve data device size"); | 2125 | DMERR("failed to retrieve data device size"); |
@@ -2115,7 +2128,7 @@ static int pool_preresume(struct dm_target *ti) | |||
2115 | 2128 | ||
2116 | if (data_size < sb_data_size) { | 2129 | if (data_size < sb_data_size) { |
2117 | DMERR("pool target too small, is %llu blocks (expected %llu)", | 2130 | DMERR("pool target too small, is %llu blocks (expected %llu)", |
2118 | data_size, sb_data_size); | 2131 | (unsigned long long)data_size, sb_data_size); |
2119 | return -EINVAL; | 2132 | return -EINVAL; |
2120 | 2133 | ||
2121 | } else if (data_size > sb_data_size) { | 2134 | } else if (data_size > sb_data_size) { |
@@ -2764,19 +2777,21 @@ static int thin_status(struct dm_target *ti, status_type_t type, | |||
2764 | static int thin_iterate_devices(struct dm_target *ti, | 2777 | static int thin_iterate_devices(struct dm_target *ti, |
2765 | iterate_devices_callout_fn fn, void *data) | 2778 | iterate_devices_callout_fn fn, void *data) |
2766 | { | 2779 | { |
2767 | dm_block_t blocks; | 2780 | sector_t blocks; |
2768 | struct thin_c *tc = ti->private; | 2781 | struct thin_c *tc = ti->private; |
2782 | struct pool *pool = tc->pool; | ||
2769 | 2783 | ||
2770 | /* | 2784 | /* |
2771 | * We can't call dm_pool_get_data_dev_size() since that blocks. So | 2785 | * We can't call dm_pool_get_data_dev_size() since that blocks. So |
2772 | * we follow a more convoluted path through to the pool's target. | 2786 | * we follow a more convoluted path through to the pool's target. |
2773 | */ | 2787 | */ |
2774 | if (!tc->pool->ti) | 2788 | if (!pool->ti) |
2775 | return 0; /* nothing is bound */ | 2789 | return 0; /* nothing is bound */ |
2776 | 2790 | ||
2777 | blocks = tc->pool->ti->len >> tc->pool->block_shift; | 2791 | blocks = pool->ti->len; |
2792 | (void) sector_div(blocks, pool->sectors_per_block); | ||
2778 | if (blocks) | 2793 | if (blocks) |
2779 | return fn(ti, tc->pool_dev, 0, tc->pool->sectors_per_block * blocks, data); | 2794 | return fn(ti, tc->pool_dev, 0, pool->sectors_per_block * blocks, data); |
2780 | 2795 | ||
2781 | return 0; | 2796 | return 0; |
2782 | } | 2797 | } |
@@ -2793,7 +2808,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) | |||
2793 | 2808 | ||
2794 | static struct target_type thin_target = { | 2809 | static struct target_type thin_target = { |
2795 | .name = "thin", | 2810 | .name = "thin", |
2796 | .version = {1, 1, 0}, | 2811 | .version = {1, 2, 0}, |
2797 | .module = THIS_MODULE, | 2812 | .module = THIS_MODULE, |
2798 | .ctr = thin_ctr, | 2813 | .ctr = thin_ctr, |
2799 | .dtr = thin_dtr, | 2814 | .dtr = thin_dtr, |