dm thin: fix discard support for data devices

The discard limits that get established for a thin-pool or thin device may be incompatible with the pool's data device. Avoid this by checking the discard limits of the pool's data device. If an incompatibility is found then the pool's 'discard passdown' feature is disabled. Change thin_io_hints to ensure that a thin device always uses the same queue limits as its pool device. Introduce requested_pf to track whether or not the table line originally contained the no_discard_passdown flag and use this directly for table output. We prepare the correct setting for discard_passdown directly in bind_control_target (called from pool_io_hints) and store it in adjusted_pf rather than waiting until we have access to pool->pf in pool_preresume. Signed-off-by: Mike Snitzer <snitzer@redhat.com> Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Alasdair G Kergon <agk@redhat.com>
author: Mike Snitzer <snitzer@redhat.com> 2012-09-26 18:45:47 -0400
committer: Alasdair G Kergon <agk@redhat.com> 2012-09-26 18:45:47 -0400
commit: 0424caa14508f19ca8093d36c15250e0331a3a0a (patch)
tree: aa276ebe959e01f0bae7aafe9cb7f140219a6b64 /drivers/md
parent: 9bc142dd755d360c08a91ecb107d218787a2e9db (diff)
1 files changed, 57 insertions, 30 deletions
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index e99f4134dbd7..c29410af1e22 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -580,7 +580,8 @@ struct pool_c {
        struct dm_target_callbacks callbacks;
        dm_block_t low_water_blocks;
-        struct pool_features pf;
+        struct pool_features requested_pf; /* Features requested during table load */
+        struct pool_features adjusted_pf;  /* Features used after adjusting for constituent devices */
 };
 /*
@@ -1848,21 +1849,36 @@ static bool data_dev_supports_discard(struct pool_c *pt)
 /*
 * If discard_passdown was enabled verify that the data device
- * supports discards.  Disable discard_passdown if not; otherwise
+ * supports discards.  Disable discard_passdown if not.
- * -EOPNOTSUPP will be returned.
 */
-static void disable_passdown_if_not_supported(struct pool_c *pt,
+static void disable_passdown_if_not_supported(struct pool_c *pt)
-                                              struct pool_features *pf)
 {
+        struct pool *pool = pt->pool;
+        struct block_device *data_bdev = pt->data_dev->bdev;
+        struct queue_limits *data_limits = &bdev_get_queue(data_bdev)->limits;
+        sector_t block_size = pool->sectors_per_block << SECTOR_SHIFT;
+        const char *reason = NULL;
        char buf[BDEVNAME_SIZE];
-        if (!pf->discard_passdown || data_dev_supports_discard(pt))
+        if (!pt->adjusted_pf.discard_passdown)
                return;
-        DMWARN("Discard unsupported by data device (%s): Disabling discard passdown.",
+        if (!data_dev_supports_discard(pt))
-               bdevname(pt->data_dev->bdev, buf));
+                reason = "discard unsupported";
+        else if (data_limits->max_discard_sectors < pool->sectors_per_block)
+                reason = "max discard sectors smaller than a block";
-        pf->discard_passdown = false;
+        else if (data_limits->discard_granularity > block_size)
+                reason = "discard granularity larger than a block";
+        else if (block_size & (data_limits->discard_granularity - 1))
+                reason = "discard granularity not a factor of block size";
+        if (reason) {
+                DMWARN("Data device (%s) %s: Disabling discard passdown.", bdevname(data_bdev, buf), reason);
+                pt->adjusted_pf.discard_passdown = false;
+        }
 }
 static int bind_control_target(struct pool *pool, struct dm_target *ti)
@@ -1873,16 +1889,15 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti)
         * We want to make sure that degraded pools are never upgraded.
         */
        enum pool_mode old_mode = pool->pf.mode;
-        enum pool_mode new_mode = pt->pf.mode;
+        enum pool_mode new_mode = pt->adjusted_pf.mode;
        if (old_mode > new_mode)
                new_mode = old_mode;
        pool->ti = ti;
        pool->low_water_blocks = pt->low_water_blocks;
-        pool->pf = pt->pf;
+        pool->pf = pt->adjusted_pf;
-        disable_passdown_if_not_supported(pt, &pool->pf);
        set_pool_mode(pool, new_mode);
        return 0;
@@ -2271,7 +2286,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
        pt->metadata_dev = metadata_dev;
        pt->data_dev = data_dev;
        pt->low_water_blocks = low_water_blocks;
-        pt->pf = pf;
+        pt->adjusted_pf = pt->requested_pf = pf;
        ti->num_flush_requests = 1;
        /*
@@ -2718,7 +2733,7 @@ static int pool_status(struct dm_target *ti, status_type_t type,
                       format_dev_t(buf2, pt->data_dev->bdev->bd_dev),
                       (unsigned long)pool->sectors_per_block,
                       (unsigned long long)pt->low_water_blocks);
-                emit_flags(&pt->pf, result, sz, maxlen);
+                emit_flags(&pt->requested_pf, result, sz, maxlen);
                break;
        }
@@ -2747,19 +2762,21 @@ static int pool_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
        return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
 }
-static void set_discard_limits(struct pool *pool, struct queue_limits *limits)
+static void set_discard_limits(struct pool_c *pt, struct queue_limits *limits)
 {
-        /*
+        struct pool *pool = pt->pool;
-         * FIXME: these limits may be incompatible with the pool's data device
+        struct queue_limits *data_limits;
-         */
        limits->max_discard_sectors = pool->sectors_per_block;
        /*
-         * This is just a hint, and not enforced.  We have to cope with
+         * discard_granularity is just a hint, and not enforced.
-         * bios that cover a block partially.  A discard that spans a block
-         * boundary is not sent to this target.
         */
-        limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
+        if (pt->adjusted_pf.discard_passdown) {
+                data_limits = &bdev_get_queue(pt->data_dev->bdev)->limits;
+                limits->discard_granularity = data_limits->discard_granularity;
+        } else
+                limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
 }
 static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
@@ -2769,15 +2786,25 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
        blk_limits_io_min(limits, 0);
        blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
-        if (pool->pf.discard_enabled)
-                set_discard_limits(pool, limits);
+        /*
+         * pt->adjusted_pf is a staging area for the actual features to use.
+         * They get transferred to the live pool in bind_control_target()
+         * called from pool_preresume().
+         */
+        if (!pt->adjusted_pf.discard_enabled)
+                return;
+        disable_passdown_if_not_supported(pt);
+        set_discard_limits(pt, limits);
 }
 static struct target_type pool_target = {
        .name = "thin-pool",
        .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
                    DM_TARGET_IMMUTABLE,
-        .version = {1, 3, 0},
+        .version = {1, 4, 0},
        .module = THIS_MODULE,
        .ctr = pool_ctr,
        .dtr = pool_dtr,
@@ -3056,19 +3083,19 @@ static int thin_iterate_devices(struct dm_target *ti,
        return 0;
 }
+/*
+ * A thin device always inherits its queue limits from its pool.
+ */
 static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
 {
        struct thin_c *tc = ti->private;
-        struct pool *pool = tc->pool;
-        blk_limits_io_min(limits, 0);
+        *limits = bdev_get_queue(tc->pool_dev->bdev)->limits;
-        blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
-        set_discard_limits(pool, limits);
 }
 static struct target_type thin_target = {
        .name = "thin",
-        .version = {1, 3, 0},
+        .version = {1, 4, 0},
        .module = THIS_MODULE,
        .ctr = thin_ctr,
        .dtr = thin_dtr,
author	Mike Snitzer <snitzer@redhat.com>	2012-09-26 18:45:47 -0400
committer	Alasdair G Kergon <agk@redhat.com>	2012-09-26 18:45:47 -0400
commit	0424caa14508f19ca8093d36c15250e0331a3a0a (patch)
tree	aa276ebe959e01f0bae7aafe9cb7f140219a6b64 /drivers/md
parent	9bc142dd755d360c08a91ecb107d218787a2e9db (diff)

diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index e99f4134dbd7..c29410af1e22 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c
@@ -580,7 +580,8 @@ struct pool_c {
580	struct dm_target_callbacks callbacks;	580	struct dm_target_callbacks callbacks;
581		581
582	dm_block_t low_water_blocks;	582	dm_block_t low_water_blocks;
583	struct pool_features pf;	583	struct pool_features requested_pf; /* Features requested during table load */
		584	struct pool_features adjusted_pf; /* Features used after adjusting for constituent devices */
584	};	585	};
585		586
586	/*	587	/*
@@ -1848,21 +1849,36 @@ static bool data_dev_supports_discard(struct pool_c *pt)
1848		1849
1849	/*	1850	/*
1850	* If discard_passdown was enabled verify that the data device	1851	* If discard_passdown was enabled verify that the data device
1851	* supports discards. Disable discard_passdown if not; otherwise	1852	* supports discards. Disable discard_passdown if not.
1852	* -EOPNOTSUPP will be returned.
1853	*/	1853	*/
1854	static void disable_passdown_if_not_supported(struct pool_c *pt,	1854	static void disable_passdown_if_not_supported(struct pool_c *pt)
1855	struct pool_features *pf)
1856	{	1855	{
		1856	struct pool *pool = pt->pool;
		1857	struct block_device *data_bdev = pt->data_dev->bdev;
		1858	struct queue_limits *data_limits = &bdev_get_queue(data_bdev)->limits;
		1859	sector_t block_size = pool->sectors_per_block << SECTOR_SHIFT;
		1860	const char *reason = NULL;
1857	char buf[BDEVNAME_SIZE];	1861	char buf[BDEVNAME_SIZE];
1858		1862
1859	if (!pf->discard_passdown \|\| data_dev_supports_discard(pt))	1863	if (!pt->adjusted_pf.discard_passdown)
1860	return;	1864	return;
1861		1865
1862	DMWARN("Discard unsupported by data device (%s): Disabling discard passdown.",	1866	if (!data_dev_supports_discard(pt))
1863	bdevname(pt->data_dev->bdev, buf));	1867	reason = "discard unsupported";
		1868
		1869	else if (data_limits->max_discard_sectors < pool->sectors_per_block)
		1870	reason = "max discard sectors smaller than a block";
1864		1871
1865	pf->discard_passdown = false;	1872	else if (data_limits->discard_granularity > block_size)
		1873	reason = "discard granularity larger than a block";
		1874
		1875	else if (block_size & (data_limits->discard_granularity - 1))
		1876	reason = "discard granularity not a factor of block size";
		1877
		1878	if (reason) {
		1879	DMWARN("Data device (%s) %s: Disabling discard passdown.", bdevname(data_bdev, buf), reason);
		1880	pt->adjusted_pf.discard_passdown = false;
		1881	}
1866	}	1882	}
1867		1883
1868	static int bind_control_target(struct pool pool, struct dm_target ti)	1884	static int bind_control_target(struct pool pool, struct dm_target ti)
@@ -1873,16 +1889,15 @@ static int bind_control_target(struct pool pool, struct dm_target ti)
1873	* We want to make sure that degraded pools are never upgraded.	1889	* We want to make sure that degraded pools are never upgraded.
1874	*/	1890	*/
1875	enum pool_mode old_mode = pool->pf.mode;	1891	enum pool_mode old_mode = pool->pf.mode;
1876	enum pool_mode new_mode = pt->pf.mode;	1892	enum pool_mode new_mode = pt->adjusted_pf.mode;
1877		1893
1878	if (old_mode > new_mode)	1894	if (old_mode > new_mode)
1879	new_mode = old_mode;	1895	new_mode = old_mode;
1880		1896
1881	pool->ti = ti;	1897	pool->ti = ti;
1882	pool->low_water_blocks = pt->low_water_blocks;	1898	pool->low_water_blocks = pt->low_water_blocks;
1883	pool->pf = pt->pf;	1899	pool->pf = pt->adjusted_pf;
1884		1900
1885	disable_passdown_if_not_supported(pt, &pool->pf);
1886	set_pool_mode(pool, new_mode);	1901	set_pool_mode(pool, new_mode);
1887		1902
1888	return 0;	1903	return 0;
@@ -2271,7 +2286,7 @@ static int pool_ctr(struct dm_target ti, unsigned argc, char *argv)
2271	pt->metadata_dev = metadata_dev;	2286	pt->metadata_dev = metadata_dev;
2272	pt->data_dev = data_dev;	2287	pt->data_dev = data_dev;
2273	pt->low_water_blocks = low_water_blocks;	2288	pt->low_water_blocks = low_water_blocks;
2274	pt->pf = pf;	2289	pt->adjusted_pf = pt->requested_pf = pf;
2275	ti->num_flush_requests = 1;	2290	ti->num_flush_requests = 1;
2276		2291
2277	/*	2292	/*
@@ -2718,7 +2733,7 @@ static int pool_status(struct dm_target *ti, status_type_t type,
2718	format_dev_t(buf2, pt->data_dev->bdev->bd_dev),	2733	format_dev_t(buf2, pt->data_dev->bdev->bd_dev),
2719	(unsigned long)pool->sectors_per_block,	2734	(unsigned long)pool->sectors_per_block,
2720	(unsigned long long)pt->low_water_blocks);	2735	(unsigned long long)pt->low_water_blocks);
2721	emit_flags(&pt->pf, result, sz, maxlen);	2736	emit_flags(&pt->requested_pf, result, sz, maxlen);
2722	break;	2737	break;
2723	}	2738	}
2724		2739
@@ -2747,19 +2762,21 @@ static int pool_merge(struct dm_target ti, struct bvec_merge_data bvm,
2747	return min(max_size, q->merge_bvec_fn(q, bvm, biovec));	2762	return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
2748	}	2763	}
2749		2764
2750	static void set_discard_limits(struct pool pool, struct queue_limits limits)	2765	static void set_discard_limits(struct pool_c pt, struct queue_limits limits)
2751	{	2766	{
2752	/*	2767	struct pool *pool = pt->pool;
2753	* FIXME: these limits may be incompatible with the pool's data device	2768	struct queue_limits *data_limits;
2754	*/	2769
2755	limits->max_discard_sectors = pool->sectors_per_block;	2770	limits->max_discard_sectors = pool->sectors_per_block;
2756		2771
2757	/*	2772	/*
2758	* This is just a hint, and not enforced. We have to cope with	2773	* discard_granularity is just a hint, and not enforced.
2759	* bios that cover a block partially. A discard that spans a block
2760	* boundary is not sent to this target.
2761	*/	2774	*/
2762	limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;	2775	if (pt->adjusted_pf.discard_passdown) {
		2776	data_limits = &bdev_get_queue(pt->data_dev->bdev)->limits;
		2777	limits->discard_granularity = data_limits->discard_granularity;
		2778	} else
		2779	limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
2763	}	2780	}
2764		2781
2765	static void pool_io_hints(struct dm_target ti, struct queue_limits limits)	2782	static void pool_io_hints(struct dm_target ti, struct queue_limits limits)
@@ -2769,15 +2786,25 @@ static void pool_io_hints(struct dm_target ti, struct queue_limits limits)
2769		2786
2770	blk_limits_io_min(limits, 0);	2787	blk_limits_io_min(limits, 0);
2771	blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);	2788	blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
2772	if (pool->pf.discard_enabled)	2789
2773	set_discard_limits(pool, limits);	2790	/*
		2791	* pt->adjusted_pf is a staging area for the actual features to use.
		2792	* They get transferred to the live pool in bind_control_target()
		2793	* called from pool_preresume().
		2794	*/
		2795	if (!pt->adjusted_pf.discard_enabled)
		2796	return;
		2797
		2798	disable_passdown_if_not_supported(pt);
		2799
		2800	set_discard_limits(pt, limits);
2774	}	2801	}
2775		2802
2776	static struct target_type pool_target = {	2803	static struct target_type pool_target = {
2777	.name = "thin-pool",	2804	.name = "thin-pool",
2778	.features = DM_TARGET_SINGLETON \| DM_TARGET_ALWAYS_WRITEABLE \|	2805	.features = DM_TARGET_SINGLETON \| DM_TARGET_ALWAYS_WRITEABLE \|
2779	DM_TARGET_IMMUTABLE,	2806	DM_TARGET_IMMUTABLE,
2780	.version = {1, 3, 0},	2807	.version = {1, 4, 0},
2781	.module = THIS_MODULE,	2808	.module = THIS_MODULE,
2782	.ctr = pool_ctr,	2809	.ctr = pool_ctr,
2783	.dtr = pool_dtr,	2810	.dtr = pool_dtr,
@@ -3056,19 +3083,19 @@ static int thin_iterate_devices(struct dm_target *ti,
3056	return 0;	3083	return 0;
3057	}	3084	}
3058		3085
		3086	/*
		3087	* A thin device always inherits its queue limits from its pool.
		3088	*/
3059	static void thin_io_hints(struct dm_target ti, struct queue_limits limits)	3089	static void thin_io_hints(struct dm_target ti, struct queue_limits limits)
3060	{	3090	{
3061	struct thin_c *tc = ti->private;	3091	struct thin_c *tc = ti->private;
3062	struct pool *pool = tc->pool;
3063		3092
3064	blk_limits_io_min(limits, 0);	3093	*limits = bdev_get_queue(tc->pool_dev->bdev)->limits;
3065	blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
3066	set_discard_limits(pool, limits);
3067	}	3094	}
3068		3095
3069	static struct target_type thin_target = {	3096	static struct target_type thin_target = {
3070	.name = "thin",	3097	.name = "thin",
3071	.version = {1, 3, 0},	3098	.version = {1, 4, 0},
3072	.module = THIS_MODULE,	3099	.module = THIS_MODULE,
3073	.ctr = thin_ctr,	3100	.ctr = thin_ctr,
3074	.dtr = thin_dtr,	3101	.dtr = thin_dtr,