aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorMike Snitzer <snitzer@redhat.com>2012-07-27 10:08:02 -0400
committerAlasdair G Kergon <agk@redhat.com>2012-07-27 10:08:02 -0400
commit55f2b8bdb0c7387eb2dc645b9ecbe5d0faa6b54e (patch)
tree4428d66325bdc8d4f25a374e10b5a78cb0ab3e54 /drivers/md
parent33d07c0dfab902a7c5420587984497dc05ab5c9c (diff)
dm thin: support for non power of 2 pool blocksize
Non power of 2 blocksize support is needed to properly align thinp IO on storage that has non power of 2 optimal IO sizes (e.g. RAID6 10+2). Use sector_div to support non power of 2 blocksize for the pool's data device. This provides comparable performance to the power of 2 math that was performed until now (as tested on modern x86_64 hardware). The kernel currently assumes that limits->discard_granularity is a power of two so the thin target only enables discard support if the block size is a power of two. Eliminate pool structure's 'block_shift', 'offset_mask' and remaining 4 byte holes. Signed-off-by: Mike Snitzer <snitzer@redhat.com> Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-thin.c59
1 files changed, 37 insertions, 22 deletions
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 350bcf40485e..f21d318d98f0 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -510,10 +510,8 @@ struct pool {
510 struct block_device *md_dev; 510 struct block_device *md_dev;
511 struct dm_pool_metadata *pmd; 511 struct dm_pool_metadata *pmd;
512 512
513 uint32_t sectors_per_block;
514 unsigned block_shift;
515 dm_block_t offset_mask;
516 dm_block_t low_water_blocks; 513 dm_block_t low_water_blocks;
514 uint32_t sectors_per_block;
517 515
518 struct pool_features pf; 516 struct pool_features pf;
519 unsigned low_water_triggered:1; /* A dm event has been sent */ 517 unsigned low_water_triggered:1; /* A dm event has been sent */
@@ -526,8 +524,8 @@ struct pool {
526 struct work_struct worker; 524 struct work_struct worker;
527 struct delayed_work waker; 525 struct delayed_work waker;
528 526
529 unsigned ref_count;
530 unsigned long last_commit_jiffies; 527 unsigned long last_commit_jiffies;
528 unsigned ref_count;
531 529
532 spinlock_t lock; 530 spinlock_t lock;
533 struct bio_list deferred_bios; 531 struct bio_list deferred_bios;
@@ -679,16 +677,21 @@ static void requeue_io(struct thin_c *tc)
679 677
680static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio) 678static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio)
681{ 679{
682 return bio->bi_sector >> tc->pool->block_shift; 680 sector_t block_nr = bio->bi_sector;
681
682 (void) sector_div(block_nr, tc->pool->sectors_per_block);
683
684 return block_nr;
683} 685}
684 686
685static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block) 687static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block)
686{ 688{
687 struct pool *pool = tc->pool; 689 struct pool *pool = tc->pool;
690 sector_t bi_sector = bio->bi_sector;
688 691
689 bio->bi_bdev = tc->pool_dev->bdev; 692 bio->bi_bdev = tc->pool_dev->bdev;
690 bio->bi_sector = (block << pool->block_shift) + 693 bio->bi_sector = (block * pool->sectors_per_block) +
691 (bio->bi_sector & pool->offset_mask); 694 sector_div(bi_sector, pool->sectors_per_block);
692} 695}
693 696
694static void remap_to_origin(struct thin_c *tc, struct bio *bio) 697static void remap_to_origin(struct thin_c *tc, struct bio *bio)
@@ -933,9 +936,10 @@ static void process_prepared(struct pool *pool, struct list_head *head,
933 */ 936 */
934static int io_overlaps_block(struct pool *pool, struct bio *bio) 937static int io_overlaps_block(struct pool *pool, struct bio *bio)
935{ 938{
936 return !(bio->bi_sector & pool->offset_mask) && 939 sector_t bi_sector = bio->bi_sector;
937 (bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT));
938 940
941 return !sector_div(bi_sector, pool->sectors_per_block) &&
942 (bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT));
939} 943}
940 944
941static int io_overwrites_block(struct pool *pool, struct bio *bio) 945static int io_overwrites_block(struct pool *pool, struct bio *bio)
@@ -1239,8 +1243,8 @@ static void process_discard(struct thin_c *tc, struct bio *bio)
1239 * part of the discard that is in a subsequent 1243 * part of the discard that is in a subsequent
1240 * block. 1244 * block.
1241 */ 1245 */
1242 sector_t offset = bio->bi_sector - (block << pool->block_shift); 1246 sector_t offset = bio->bi_sector - (block * pool->sectors_per_block);
1243 unsigned remaining = (pool->sectors_per_block - offset) << 9; 1247 unsigned remaining = (pool->sectors_per_block - offset) << SECTOR_SHIFT;
1244 bio->bi_size = min(bio->bi_size, remaining); 1248 bio->bi_size = min(bio->bi_size, remaining);
1245 1249
1246 cell_release_singleton(cell, bio); 1250 cell_release_singleton(cell, bio);
@@ -1722,8 +1726,6 @@ static struct pool *pool_create(struct mapped_device *pool_md,
1722 1726
1723 pool->pmd = pmd; 1727 pool->pmd = pmd;
1724 pool->sectors_per_block = block_size; 1728 pool->sectors_per_block = block_size;
1725 pool->block_shift = ffs(block_size) - 1;
1726 pool->offset_mask = block_size - 1;
1727 pool->low_water_blocks = 0; 1729 pool->low_water_blocks = 0;
1728 pool_features_init(&pool->pf); 1730 pool_features_init(&pool->pf);
1729 pool->prison = prison_create(PRISON_CELLS); 1731 pool->prison = prison_create(PRISON_CELLS);
@@ -1971,7 +1973,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
1971 if (kstrtoul(argv[2], 10, &block_size) || !block_size || 1973 if (kstrtoul(argv[2], 10, &block_size) || !block_size ||
1972 block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS || 1974 block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
1973 block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS || 1975 block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
1974 !is_power_of_2(block_size)) { 1976 block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) {
1975 ti->error = "Invalid block size"; 1977 ti->error = "Invalid block size";
1976 r = -EINVAL; 1978 r = -EINVAL;
1977 goto out; 1979 goto out;
@@ -2018,6 +2020,15 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
2018 goto out_flags_changed; 2020 goto out_flags_changed;
2019 } 2021 }
2020 2022
2023 /*
2024 * The block layer requires discard_granularity to be a power of 2.
2025 */
2026 if (pf.discard_enabled && !is_power_of_2(block_size)) {
2027 ti->error = "Discard support must be disabled when the block size is not a power of 2";
2028 r = -EINVAL;
2029 goto out_flags_changed;
2030 }
2031
2021 pt->pool = pool; 2032 pt->pool = pool;
2022 pt->ti = ti; 2033 pt->ti = ti;
2023 pt->metadata_dev = metadata_dev; 2034 pt->metadata_dev = metadata_dev;
@@ -2097,7 +2108,8 @@ static int pool_preresume(struct dm_target *ti)
2097 int r; 2108 int r;
2098 struct pool_c *pt = ti->private; 2109 struct pool_c *pt = ti->private;
2099 struct pool *pool = pt->pool; 2110 struct pool *pool = pt->pool;
2100 dm_block_t data_size, sb_data_size; 2111 sector_t data_size = ti->len;
2112 dm_block_t sb_data_size;
2101 2113
2102 /* 2114 /*
2103 * Take control of the pool object. 2115 * Take control of the pool object.
@@ -2106,7 +2118,8 @@ static int pool_preresume(struct dm_target *ti)
2106 if (r) 2118 if (r)
2107 return r; 2119 return r;
2108 2120
2109 data_size = ti->len >> pool->block_shift; 2121 (void) sector_div(data_size, pool->sectors_per_block);
2122
2110 r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size); 2123 r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size);
2111 if (r) { 2124 if (r) {
2112 DMERR("failed to retrieve data device size"); 2125 DMERR("failed to retrieve data device size");
@@ -2115,7 +2128,7 @@ static int pool_preresume(struct dm_target *ti)
2115 2128
2116 if (data_size < sb_data_size) { 2129 if (data_size < sb_data_size) {
2117 DMERR("pool target too small, is %llu blocks (expected %llu)", 2130 DMERR("pool target too small, is %llu blocks (expected %llu)",
2118 data_size, sb_data_size); 2131 (unsigned long long)data_size, sb_data_size);
2119 return -EINVAL; 2132 return -EINVAL;
2120 2133
2121 } else if (data_size > sb_data_size) { 2134 } else if (data_size > sb_data_size) {
@@ -2764,19 +2777,21 @@ static int thin_status(struct dm_target *ti, status_type_t type,
2764static int thin_iterate_devices(struct dm_target *ti, 2777static int thin_iterate_devices(struct dm_target *ti,
2765 iterate_devices_callout_fn fn, void *data) 2778 iterate_devices_callout_fn fn, void *data)
2766{ 2779{
2767 dm_block_t blocks; 2780 sector_t blocks;
2768 struct thin_c *tc = ti->private; 2781 struct thin_c *tc = ti->private;
2782 struct pool *pool = tc->pool;
2769 2783
2770 /* 2784 /*
2771 * We can't call dm_pool_get_data_dev_size() since that blocks. So 2785 * We can't call dm_pool_get_data_dev_size() since that blocks. So
2772 * we follow a more convoluted path through to the pool's target. 2786 * we follow a more convoluted path through to the pool's target.
2773 */ 2787 */
2774 if (!tc->pool->ti) 2788 if (!pool->ti)
2775 return 0; /* nothing is bound */ 2789 return 0; /* nothing is bound */
2776 2790
2777 blocks = tc->pool->ti->len >> tc->pool->block_shift; 2791 blocks = pool->ti->len;
2792 (void) sector_div(blocks, pool->sectors_per_block);
2778 if (blocks) 2793 if (blocks)
2779 return fn(ti, tc->pool_dev, 0, tc->pool->sectors_per_block * blocks, data); 2794 return fn(ti, tc->pool_dev, 0, pool->sectors_per_block * blocks, data);
2780 2795
2781 return 0; 2796 return 0;
2782} 2797}
@@ -2793,7 +2808,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
2793 2808
2794static struct target_type thin_target = { 2809static struct target_type thin_target = {
2795 .name = "thin", 2810 .name = "thin",
2796 .version = {1, 1, 0}, 2811 .version = {1, 2, 0},
2797 .module = THIS_MODULE, 2812 .module = THIS_MODULE,
2798 .ctr = thin_ctr, 2813 .ctr = thin_ctr,
2799 .dtr = thin_dtr, 2814 .dtr = thin_dtr,