diff options
| author | Mike Snitzer <snitzer@redhat.com> | 2012-07-27 10:08:02 -0400 |
|---|---|---|
| committer | Alasdair G Kergon <agk@redhat.com> | 2012-07-27 10:08:02 -0400 |
| commit | 55f2b8bdb0c7387eb2dc645b9ecbe5d0faa6b54e (patch) | |
| tree | 4428d66325bdc8d4f25a374e10b5a78cb0ab3e54 | |
| parent | 33d07c0dfab902a7c5420587984497dc05ab5c9c (diff) | |
dm thin: support for non power of 2 pool blocksize
Non power of 2 blocksize support is needed to properly align thinp IO
on storage that has non power of 2 optimal IO sizes (e.g. RAID6 10+2).
Use sector_div to support non power of 2 blocksize for the pool's
data device. This provides comparable performance to the power of 2
math that was performed until now (as tested on modern x86_64 hardware).
The kernel currently assumes that limits->discard_granularity is a power
of two so the thin target only enables discard support if the block
size is a power of two.
Eliminate pool structure's 'block_shift', 'offset_mask' and
remaining 4 byte holes.
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
| -rw-r--r-- | drivers/md/dm-thin.c | 59 |
1 files changed, 37 insertions, 22 deletions
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 350bcf40485e..f21d318d98f0 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c | |||
| @@ -510,10 +510,8 @@ struct pool { | |||
| 510 | struct block_device *md_dev; | 510 | struct block_device *md_dev; |
| 511 | struct dm_pool_metadata *pmd; | 511 | struct dm_pool_metadata *pmd; |
| 512 | 512 | ||
| 513 | uint32_t sectors_per_block; | ||
| 514 | unsigned block_shift; | ||
| 515 | dm_block_t offset_mask; | ||
| 516 | dm_block_t low_water_blocks; | 513 | dm_block_t low_water_blocks; |
| 514 | uint32_t sectors_per_block; | ||
| 517 | 515 | ||
| 518 | struct pool_features pf; | 516 | struct pool_features pf; |
| 519 | unsigned low_water_triggered:1; /* A dm event has been sent */ | 517 | unsigned low_water_triggered:1; /* A dm event has been sent */ |
| @@ -526,8 +524,8 @@ struct pool { | |||
| 526 | struct work_struct worker; | 524 | struct work_struct worker; |
| 527 | struct delayed_work waker; | 525 | struct delayed_work waker; |
| 528 | 526 | ||
| 529 | unsigned ref_count; | ||
| 530 | unsigned long last_commit_jiffies; | 527 | unsigned long last_commit_jiffies; |
| 528 | unsigned ref_count; | ||
| 531 | 529 | ||
| 532 | spinlock_t lock; | 530 | spinlock_t lock; |
| 533 | struct bio_list deferred_bios; | 531 | struct bio_list deferred_bios; |
| @@ -679,16 +677,21 @@ static void requeue_io(struct thin_c *tc) | |||
| 679 | 677 | ||
| 680 | static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio) | 678 | static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio) |
| 681 | { | 679 | { |
| 682 | return bio->bi_sector >> tc->pool->block_shift; | 680 | sector_t block_nr = bio->bi_sector; |
| 681 | |||
| 682 | (void) sector_div(block_nr, tc->pool->sectors_per_block); | ||
| 683 | |||
| 684 | return block_nr; | ||
| 683 | } | 685 | } |
| 684 | 686 | ||
| 685 | static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block) | 687 | static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block) |
| 686 | { | 688 | { |
| 687 | struct pool *pool = tc->pool; | 689 | struct pool *pool = tc->pool; |
| 690 | sector_t bi_sector = bio->bi_sector; | ||
| 688 | 691 | ||
| 689 | bio->bi_bdev = tc->pool_dev->bdev; | 692 | bio->bi_bdev = tc->pool_dev->bdev; |
| 690 | bio->bi_sector = (block << pool->block_shift) + | 693 | bio->bi_sector = (block * pool->sectors_per_block) + |
| 691 | (bio->bi_sector & pool->offset_mask); | 694 | sector_div(bi_sector, pool->sectors_per_block); |
| 692 | } | 695 | } |
| 693 | 696 | ||
| 694 | static void remap_to_origin(struct thin_c *tc, struct bio *bio) | 697 | static void remap_to_origin(struct thin_c *tc, struct bio *bio) |
| @@ -933,9 +936,10 @@ static void process_prepared(struct pool *pool, struct list_head *head, | |||
| 933 | */ | 936 | */ |
| 934 | static int io_overlaps_block(struct pool *pool, struct bio *bio) | 937 | static int io_overlaps_block(struct pool *pool, struct bio *bio) |
| 935 | { | 938 | { |
| 936 | return !(bio->bi_sector & pool->offset_mask) && | 939 | sector_t bi_sector = bio->bi_sector; |
| 937 | (bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT)); | ||
| 938 | 940 | ||
| 941 | return !sector_div(bi_sector, pool->sectors_per_block) && | ||
| 942 | (bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT)); | ||
| 939 | } | 943 | } |
| 940 | 944 | ||
| 941 | static int io_overwrites_block(struct pool *pool, struct bio *bio) | 945 | static int io_overwrites_block(struct pool *pool, struct bio *bio) |
| @@ -1239,8 +1243,8 @@ static void process_discard(struct thin_c *tc, struct bio *bio) | |||
| 1239 | * part of the discard that is in a subsequent | 1243 | * part of the discard that is in a subsequent |
| 1240 | * block. | 1244 | * block. |
| 1241 | */ | 1245 | */ |
| 1242 | sector_t offset = bio->bi_sector - (block << pool->block_shift); | 1246 | sector_t offset = bio->bi_sector - (block * pool->sectors_per_block); |
| 1243 | unsigned remaining = (pool->sectors_per_block - offset) << 9; | 1247 | unsigned remaining = (pool->sectors_per_block - offset) << SECTOR_SHIFT; |
| 1244 | bio->bi_size = min(bio->bi_size, remaining); | 1248 | bio->bi_size = min(bio->bi_size, remaining); |
| 1245 | 1249 | ||
| 1246 | cell_release_singleton(cell, bio); | 1250 | cell_release_singleton(cell, bio); |
| @@ -1722,8 +1726,6 @@ static struct pool *pool_create(struct mapped_device *pool_md, | |||
| 1722 | 1726 | ||
| 1723 | pool->pmd = pmd; | 1727 | pool->pmd = pmd; |
| 1724 | pool->sectors_per_block = block_size; | 1728 | pool->sectors_per_block = block_size; |
| 1725 | pool->block_shift = ffs(block_size) - 1; | ||
| 1726 | pool->offset_mask = block_size - 1; | ||
| 1727 | pool->low_water_blocks = 0; | 1729 | pool->low_water_blocks = 0; |
| 1728 | pool_features_init(&pool->pf); | 1730 | pool_features_init(&pool->pf); |
| 1729 | pool->prison = prison_create(PRISON_CELLS); | 1731 | pool->prison = prison_create(PRISON_CELLS); |
| @@ -1971,7 +1973,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
| 1971 | if (kstrtoul(argv[2], 10, &block_size) || !block_size || | 1973 | if (kstrtoul(argv[2], 10, &block_size) || !block_size || |
| 1972 | block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS || | 1974 | block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS || |
| 1973 | block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS || | 1975 | block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS || |
| 1974 | !is_power_of_2(block_size)) { | 1976 | block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) { |
| 1975 | ti->error = "Invalid block size"; | 1977 | ti->error = "Invalid block size"; |
| 1976 | r = -EINVAL; | 1978 | r = -EINVAL; |
| 1977 | goto out; | 1979 | goto out; |
| @@ -2018,6 +2020,15 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
| 2018 | goto out_flags_changed; | 2020 | goto out_flags_changed; |
| 2019 | } | 2021 | } |
| 2020 | 2022 | ||
| 2023 | /* | ||
| 2024 | * The block layer requires discard_granularity to be a power of 2. | ||
| 2025 | */ | ||
| 2026 | if (pf.discard_enabled && !is_power_of_2(block_size)) { | ||
| 2027 | ti->error = "Discard support must be disabled when the block size is not a power of 2"; | ||
| 2028 | r = -EINVAL; | ||
| 2029 | goto out_flags_changed; | ||
| 2030 | } | ||
| 2031 | |||
| 2021 | pt->pool = pool; | 2032 | pt->pool = pool; |
| 2022 | pt->ti = ti; | 2033 | pt->ti = ti; |
| 2023 | pt->metadata_dev = metadata_dev; | 2034 | pt->metadata_dev = metadata_dev; |
| @@ -2097,7 +2108,8 @@ static int pool_preresume(struct dm_target *ti) | |||
| 2097 | int r; | 2108 | int r; |
| 2098 | struct pool_c *pt = ti->private; | 2109 | struct pool_c *pt = ti->private; |
| 2099 | struct pool *pool = pt->pool; | 2110 | struct pool *pool = pt->pool; |
| 2100 | dm_block_t data_size, sb_data_size; | 2111 | sector_t data_size = ti->len; |
| 2112 | dm_block_t sb_data_size; | ||
| 2101 | 2113 | ||
| 2102 | /* | 2114 | /* |
| 2103 | * Take control of the pool object. | 2115 | * Take control of the pool object. |
| @@ -2106,7 +2118,8 @@ static int pool_preresume(struct dm_target *ti) | |||
| 2106 | if (r) | 2118 | if (r) |
| 2107 | return r; | 2119 | return r; |
| 2108 | 2120 | ||
| 2109 | data_size = ti->len >> pool->block_shift; | 2121 | (void) sector_div(data_size, pool->sectors_per_block); |
| 2122 | |||
| 2110 | r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size); | 2123 | r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size); |
| 2111 | if (r) { | 2124 | if (r) { |
| 2112 | DMERR("failed to retrieve data device size"); | 2125 | DMERR("failed to retrieve data device size"); |
| @@ -2115,7 +2128,7 @@ static int pool_preresume(struct dm_target *ti) | |||
| 2115 | 2128 | ||
| 2116 | if (data_size < sb_data_size) { | 2129 | if (data_size < sb_data_size) { |
| 2117 | DMERR("pool target too small, is %llu blocks (expected %llu)", | 2130 | DMERR("pool target too small, is %llu blocks (expected %llu)", |
| 2118 | data_size, sb_data_size); | 2131 | (unsigned long long)data_size, sb_data_size); |
| 2119 | return -EINVAL; | 2132 | return -EINVAL; |
| 2120 | 2133 | ||
| 2121 | } else if (data_size > sb_data_size) { | 2134 | } else if (data_size > sb_data_size) { |
| @@ -2764,19 +2777,21 @@ static int thin_status(struct dm_target *ti, status_type_t type, | |||
| 2764 | static int thin_iterate_devices(struct dm_target *ti, | 2777 | static int thin_iterate_devices(struct dm_target *ti, |
| 2765 | iterate_devices_callout_fn fn, void *data) | 2778 | iterate_devices_callout_fn fn, void *data) |
| 2766 | { | 2779 | { |
| 2767 | dm_block_t blocks; | 2780 | sector_t blocks; |
| 2768 | struct thin_c *tc = ti->private; | 2781 | struct thin_c *tc = ti->private; |
| 2782 | struct pool *pool = tc->pool; | ||
| 2769 | 2783 | ||
| 2770 | /* | 2784 | /* |
| 2771 | * We can't call dm_pool_get_data_dev_size() since that blocks. So | 2785 | * We can't call dm_pool_get_data_dev_size() since that blocks. So |
| 2772 | * we follow a more convoluted path through to the pool's target. | 2786 | * we follow a more convoluted path through to the pool's target. |
| 2773 | */ | 2787 | */ |
| 2774 | if (!tc->pool->ti) | 2788 | if (!pool->ti) |
| 2775 | return 0; /* nothing is bound */ | 2789 | return 0; /* nothing is bound */ |
| 2776 | 2790 | ||
| 2777 | blocks = tc->pool->ti->len >> tc->pool->block_shift; | 2791 | blocks = pool->ti->len; |
| 2792 | (void) sector_div(blocks, pool->sectors_per_block); | ||
| 2778 | if (blocks) | 2793 | if (blocks) |
| 2779 | return fn(ti, tc->pool_dev, 0, tc->pool->sectors_per_block * blocks, data); | 2794 | return fn(ti, tc->pool_dev, 0, pool->sectors_per_block * blocks, data); |
| 2780 | 2795 | ||
| 2781 | return 0; | 2796 | return 0; |
| 2782 | } | 2797 | } |
| @@ -2793,7 +2808,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) | |||
| 2793 | 2808 | ||
| 2794 | static struct target_type thin_target = { | 2809 | static struct target_type thin_target = { |
| 2795 | .name = "thin", | 2810 | .name = "thin", |
| 2796 | .version = {1, 1, 0}, | 2811 | .version = {1, 2, 0}, |
| 2797 | .module = THIS_MODULE, | 2812 | .module = THIS_MODULE, |
| 2798 | .ctr = thin_ctr, | 2813 | .ctr = thin_ctr, |
| 2799 | .dtr = thin_dtr, | 2814 | .dtr = thin_dtr, |
