aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/md/dm-mpath.c11
-rw-r--r--drivers/md/dm-table.c61
-rw-r--r--drivers/md/dm-thin.c135
-rw-r--r--drivers/md/dm-verity.c8
-rw-r--r--drivers/md/dm.c71
-rw-r--r--drivers/md/dm.h1
6 files changed, 209 insertions, 78 deletions
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index d8abb90a6c2f..034233eefc82 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1555,6 +1555,7 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
1555 unsigned long arg) 1555 unsigned long arg)
1556{ 1556{
1557 struct multipath *m = ti->private; 1557 struct multipath *m = ti->private;
1558 struct pgpath *pgpath;
1558 struct block_device *bdev; 1559 struct block_device *bdev;
1559 fmode_t mode; 1560 fmode_t mode;
1560 unsigned long flags; 1561 unsigned long flags;
@@ -1570,12 +1571,14 @@ again:
1570 if (!m->current_pgpath) 1571 if (!m->current_pgpath)
1571 __choose_pgpath(m, 0); 1572 __choose_pgpath(m, 0);
1572 1573
1573 if (m->current_pgpath) { 1574 pgpath = m->current_pgpath;
1574 bdev = m->current_pgpath->path.dev->bdev; 1575
1575 mode = m->current_pgpath->path.dev->mode; 1576 if (pgpath) {
1577 bdev = pgpath->path.dev->bdev;
1578 mode = pgpath->path.dev->mode;
1576 } 1579 }
1577 1580
1578 if (m->queue_io) 1581 if ((pgpath && m->queue_io) || (!pgpath && m->queue_if_no_path))
1579 r = -EAGAIN; 1582 r = -EAGAIN;
1580 else if (!bdev) 1583 else if (!bdev)
1581 r = -EIO; 1584 r = -EIO;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index f90069029aae..100368eb7991 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1212,6 +1212,41 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
1212 return &t->targets[(KEYS_PER_NODE * n) + k]; 1212 return &t->targets[(KEYS_PER_NODE * n) + k];
1213} 1213}
1214 1214
1215static int count_device(struct dm_target *ti, struct dm_dev *dev,
1216 sector_t start, sector_t len, void *data)
1217{
1218 unsigned *num_devices = data;
1219
1220 (*num_devices)++;
1221
1222 return 0;
1223}
1224
1225/*
1226 * Check whether a table has no data devices attached using each
1227 * target's iterate_devices method.
1228 * Returns false if the result is unknown because a target doesn't
1229 * support iterate_devices.
1230 */
1231bool dm_table_has_no_data_devices(struct dm_table *table)
1232{
1233 struct dm_target *uninitialized_var(ti);
1234 unsigned i = 0, num_devices = 0;
1235
1236 while (i < dm_table_get_num_targets(table)) {
1237 ti = dm_table_get_target(table, i++);
1238
1239 if (!ti->type->iterate_devices)
1240 return false;
1241
1242 ti->type->iterate_devices(ti, count_device, &num_devices);
1243 if (num_devices)
1244 return false;
1245 }
1246
1247 return true;
1248}
1249
1215/* 1250/*
1216 * Establish the new table's queue_limits and validate them. 1251 * Establish the new table's queue_limits and validate them.
1217 */ 1252 */
@@ -1354,17 +1389,25 @@ static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev,
1354 return q && blk_queue_nonrot(q); 1389 return q && blk_queue_nonrot(q);
1355} 1390}
1356 1391
1357static bool dm_table_is_nonrot(struct dm_table *t) 1392static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev,
1393 sector_t start, sector_t len, void *data)
1394{
1395 struct request_queue *q = bdev_get_queue(dev->bdev);
1396
1397 return q && !blk_queue_add_random(q);
1398}
1399
1400static bool dm_table_all_devices_attribute(struct dm_table *t,
1401 iterate_devices_callout_fn func)
1358{ 1402{
1359 struct dm_target *ti; 1403 struct dm_target *ti;
1360 unsigned i = 0; 1404 unsigned i = 0;
1361 1405
1362 /* Ensure that all underlying device are non-rotational. */
1363 while (i < dm_table_get_num_targets(t)) { 1406 while (i < dm_table_get_num_targets(t)) {
1364 ti = dm_table_get_target(t, i++); 1407 ti = dm_table_get_target(t, i++);
1365 1408
1366 if (!ti->type->iterate_devices || 1409 if (!ti->type->iterate_devices ||
1367 !ti->type->iterate_devices(ti, device_is_nonrot, NULL)) 1410 !ti->type->iterate_devices(ti, func, NULL))
1368 return 0; 1411 return 0;
1369 } 1412 }
1370 1413
@@ -1396,7 +1439,8 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
1396 if (!dm_table_discard_zeroes_data(t)) 1439 if (!dm_table_discard_zeroes_data(t))
1397 q->limits.discard_zeroes_data = 0; 1440 q->limits.discard_zeroes_data = 0;
1398 1441
1399 if (dm_table_is_nonrot(t)) 1442 /* Ensure that all underlying devices are non-rotational. */
1443 if (dm_table_all_devices_attribute(t, device_is_nonrot))
1400 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); 1444 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
1401 else 1445 else
1402 queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q); 1446 queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q);
@@ -1404,6 +1448,15 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
1404 dm_table_set_integrity(t); 1448 dm_table_set_integrity(t);
1405 1449
1406 /* 1450 /*
1451 * Determine whether or not this queue's I/O timings contribute
1452 * to the entropy pool, Only request-based targets use this.
1453 * Clear QUEUE_FLAG_ADD_RANDOM if any underlying device does not
1454 * have it set.
1455 */
1456 if (blk_queue_add_random(q) && dm_table_all_devices_attribute(t, device_is_not_random))
1457 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q);
1458
1459 /*
1407 * QUEUE_FLAG_STACKABLE must be set after all queue settings are 1460 * QUEUE_FLAG_STACKABLE must be set after all queue settings are
1408 * visible to other CPUs because, once the flag is set, incoming bios 1461 * visible to other CPUs because, once the flag is set, incoming bios
1409 * are processed by request-based dm, which refers to the queue 1462 * are processed by request-based dm, which refers to the queue
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index af1fc3b2c2ad..c29410af1e22 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -509,9 +509,9 @@ enum pool_mode {
509struct pool_features { 509struct pool_features {
510 enum pool_mode mode; 510 enum pool_mode mode;
511 511
512 unsigned zero_new_blocks:1; 512 bool zero_new_blocks:1;
513 unsigned discard_enabled:1; 513 bool discard_enabled:1;
514 unsigned discard_passdown:1; 514 bool discard_passdown:1;
515}; 515};
516 516
517struct thin_c; 517struct thin_c;
@@ -580,7 +580,8 @@ struct pool_c {
580 struct dm_target_callbacks callbacks; 580 struct dm_target_callbacks callbacks;
581 581
582 dm_block_t low_water_blocks; 582 dm_block_t low_water_blocks;
583 struct pool_features pf; 583 struct pool_features requested_pf; /* Features requested during table load */
584 struct pool_features adjusted_pf; /* Features used after adjusting for constituent devices */
584}; 585};
585 586
586/* 587/*
@@ -1839,6 +1840,47 @@ static void __requeue_bios(struct pool *pool)
1839/*---------------------------------------------------------------- 1840/*----------------------------------------------------------------
1840 * Binding of control targets to a pool object 1841 * Binding of control targets to a pool object
1841 *--------------------------------------------------------------*/ 1842 *--------------------------------------------------------------*/
1843static bool data_dev_supports_discard(struct pool_c *pt)
1844{
1845 struct request_queue *q = bdev_get_queue(pt->data_dev->bdev);
1846
1847 return q && blk_queue_discard(q);
1848}
1849
1850/*
1851 * If discard_passdown was enabled verify that the data device
1852 * supports discards. Disable discard_passdown if not.
1853 */
1854static void disable_passdown_if_not_supported(struct pool_c *pt)
1855{
1856 struct pool *pool = pt->pool;
1857 struct block_device *data_bdev = pt->data_dev->bdev;
1858 struct queue_limits *data_limits = &bdev_get_queue(data_bdev)->limits;
1859 sector_t block_size = pool->sectors_per_block << SECTOR_SHIFT;
1860 const char *reason = NULL;
1861 char buf[BDEVNAME_SIZE];
1862
1863 if (!pt->adjusted_pf.discard_passdown)
1864 return;
1865
1866 if (!data_dev_supports_discard(pt))
1867 reason = "discard unsupported";
1868
1869 else if (data_limits->max_discard_sectors < pool->sectors_per_block)
1870 reason = "max discard sectors smaller than a block";
1871
1872 else if (data_limits->discard_granularity > block_size)
1873 reason = "discard granularity larger than a block";
1874
1875 else if (block_size & (data_limits->discard_granularity - 1))
1876 reason = "discard granularity not a factor of block size";
1877
1878 if (reason) {
1879 DMWARN("Data device (%s) %s: Disabling discard passdown.", bdevname(data_bdev, buf), reason);
1880 pt->adjusted_pf.discard_passdown = false;
1881 }
1882}
1883
1842static int bind_control_target(struct pool *pool, struct dm_target *ti) 1884static int bind_control_target(struct pool *pool, struct dm_target *ti)
1843{ 1885{
1844 struct pool_c *pt = ti->private; 1886 struct pool_c *pt = ti->private;
@@ -1847,31 +1889,16 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti)
1847 * We want to make sure that degraded pools are never upgraded. 1889 * We want to make sure that degraded pools are never upgraded.
1848 */ 1890 */
1849 enum pool_mode old_mode = pool->pf.mode; 1891 enum pool_mode old_mode = pool->pf.mode;
1850 enum pool_mode new_mode = pt->pf.mode; 1892 enum pool_mode new_mode = pt->adjusted_pf.mode;
1851 1893
1852 if (old_mode > new_mode) 1894 if (old_mode > new_mode)
1853 new_mode = old_mode; 1895 new_mode = old_mode;
1854 1896
1855 pool->ti = ti; 1897 pool->ti = ti;
1856 pool->low_water_blocks = pt->low_water_blocks; 1898 pool->low_water_blocks = pt->low_water_blocks;
1857 pool->pf = pt->pf; 1899 pool->pf = pt->adjusted_pf;
1858 set_pool_mode(pool, new_mode);
1859 1900
1860 /* 1901 set_pool_mode(pool, new_mode);
1861 * If discard_passdown was enabled verify that the data device
1862 * supports discards. Disable discard_passdown if not; otherwise
1863 * -EOPNOTSUPP will be returned.
1864 */
1865 /* FIXME: pull this out into a sep fn. */
1866 if (pt->pf.discard_passdown) {
1867 struct request_queue *q = bdev_get_queue(pt->data_dev->bdev);
1868 if (!q || !blk_queue_discard(q)) {
1869 char buf[BDEVNAME_SIZE];
1870 DMWARN("Discard unsupported by data device (%s): Disabling discard passdown.",
1871 bdevname(pt->data_dev->bdev, buf));
1872 pool->pf.discard_passdown = 0;
1873 }
1874 }
1875 1902
1876 return 0; 1903 return 0;
1877} 1904}
@@ -1889,9 +1916,9 @@ static void unbind_control_target(struct pool *pool, struct dm_target *ti)
1889static void pool_features_init(struct pool_features *pf) 1916static void pool_features_init(struct pool_features *pf)
1890{ 1917{
1891 pf->mode = PM_WRITE; 1918 pf->mode = PM_WRITE;
1892 pf->zero_new_blocks = 1; 1919 pf->zero_new_blocks = true;
1893 pf->discard_enabled = 1; 1920 pf->discard_enabled = true;
1894 pf->discard_passdown = 1; 1921 pf->discard_passdown = true;
1895} 1922}
1896 1923
1897static void __pool_destroy(struct pool *pool) 1924static void __pool_destroy(struct pool *pool)
@@ -2119,13 +2146,13 @@ static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf,
2119 argc--; 2146 argc--;
2120 2147
2121 if (!strcasecmp(arg_name, "skip_block_zeroing")) 2148 if (!strcasecmp(arg_name, "skip_block_zeroing"))
2122 pf->zero_new_blocks = 0; 2149 pf->zero_new_blocks = false;
2123 2150
2124 else if (!strcasecmp(arg_name, "ignore_discard")) 2151 else if (!strcasecmp(arg_name, "ignore_discard"))
2125 pf->discard_enabled = 0; 2152 pf->discard_enabled = false;
2126 2153
2127 else if (!strcasecmp(arg_name, "no_discard_passdown")) 2154 else if (!strcasecmp(arg_name, "no_discard_passdown"))
2128 pf->discard_passdown = 0; 2155 pf->discard_passdown = false;
2129 2156
2130 else if (!strcasecmp(arg_name, "read_only")) 2157 else if (!strcasecmp(arg_name, "read_only"))
2131 pf->mode = PM_READ_ONLY; 2158 pf->mode = PM_READ_ONLY;
@@ -2259,8 +2286,9 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
2259 pt->metadata_dev = metadata_dev; 2286 pt->metadata_dev = metadata_dev;
2260 pt->data_dev = data_dev; 2287 pt->data_dev = data_dev;
2261 pt->low_water_blocks = low_water_blocks; 2288 pt->low_water_blocks = low_water_blocks;
2262 pt->pf = pf; 2289 pt->adjusted_pf = pt->requested_pf = pf;
2263 ti->num_flush_requests = 1; 2290 ti->num_flush_requests = 1;
2291
2264 /* 2292 /*
2265 * Only need to enable discards if the pool should pass 2293 * Only need to enable discards if the pool should pass
2266 * them down to the data device. The thin device's discard 2294 * them down to the data device. The thin device's discard
@@ -2268,12 +2296,14 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
2268 */ 2296 */
2269 if (pf.discard_enabled && pf.discard_passdown) { 2297 if (pf.discard_enabled && pf.discard_passdown) {
2270 ti->num_discard_requests = 1; 2298 ti->num_discard_requests = 1;
2299
2271 /* 2300 /*
2272 * Setting 'discards_supported' circumvents the normal 2301 * Setting 'discards_supported' circumvents the normal
2273 * stacking of discard limits (this keeps the pool and 2302 * stacking of discard limits (this keeps the pool and
2274 * thin devices' discard limits consistent). 2303 * thin devices' discard limits consistent).
2275 */ 2304 */
2276 ti->discards_supported = true; 2305 ti->discards_supported = true;
2306 ti->discard_zeroes_data_unsupported = true;
2277 } 2307 }
2278 ti->private = pt; 2308 ti->private = pt;
2279 2309
@@ -2703,7 +2733,7 @@ static int pool_status(struct dm_target *ti, status_type_t type,
2703 format_dev_t(buf2, pt->data_dev->bdev->bd_dev), 2733 format_dev_t(buf2, pt->data_dev->bdev->bd_dev),
2704 (unsigned long)pool->sectors_per_block, 2734 (unsigned long)pool->sectors_per_block,
2705 (unsigned long long)pt->low_water_blocks); 2735 (unsigned long long)pt->low_water_blocks);
2706 emit_flags(&pt->pf, result, sz, maxlen); 2736 emit_flags(&pt->requested_pf, result, sz, maxlen);
2707 break; 2737 break;
2708 } 2738 }
2709 2739
@@ -2732,20 +2762,21 @@ static int pool_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
2732 return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); 2762 return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
2733} 2763}
2734 2764
2735static void set_discard_limits(struct pool *pool, struct queue_limits *limits) 2765static void set_discard_limits(struct pool_c *pt, struct queue_limits *limits)
2736{ 2766{
2737 /* 2767 struct pool *pool = pt->pool;
2738 * FIXME: these limits may be incompatible with the pool's data device 2768 struct queue_limits *data_limits;
2739 */ 2769
2740 limits->max_discard_sectors = pool->sectors_per_block; 2770 limits->max_discard_sectors = pool->sectors_per_block;
2741 2771
2742 /* 2772 /*
2743 * This is just a hint, and not enforced. We have to cope with 2773 * discard_granularity is just a hint, and not enforced.
2744 * bios that cover a block partially. A discard that spans a block
2745 * boundary is not sent to this target.
2746 */ 2774 */
2747 limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; 2775 if (pt->adjusted_pf.discard_passdown) {
2748 limits->discard_zeroes_data = pool->pf.zero_new_blocks; 2776 data_limits = &bdev_get_queue(pt->data_dev->bdev)->limits;
2777 limits->discard_granularity = data_limits->discard_granularity;
2778 } else
2779 limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
2749} 2780}
2750 2781
2751static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) 2782static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
@@ -2755,15 +2786,25 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
2755 2786
2756 blk_limits_io_min(limits, 0); 2787 blk_limits_io_min(limits, 0);
2757 blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); 2788 blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
2758 if (pool->pf.discard_enabled) 2789
2759 set_discard_limits(pool, limits); 2790 /*
2791 * pt->adjusted_pf is a staging area for the actual features to use.
2792 * They get transferred to the live pool in bind_control_target()
2793 * called from pool_preresume().
2794 */
2795 if (!pt->adjusted_pf.discard_enabled)
2796 return;
2797
2798 disable_passdown_if_not_supported(pt);
2799
2800 set_discard_limits(pt, limits);
2760} 2801}
2761 2802
2762static struct target_type pool_target = { 2803static struct target_type pool_target = {
2763 .name = "thin-pool", 2804 .name = "thin-pool",
2764 .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | 2805 .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
2765 DM_TARGET_IMMUTABLE, 2806 DM_TARGET_IMMUTABLE,
2766 .version = {1, 3, 0}, 2807 .version = {1, 4, 0},
2767 .module = THIS_MODULE, 2808 .module = THIS_MODULE,
2768 .ctr = pool_ctr, 2809 .ctr = pool_ctr,
2769 .dtr = pool_dtr, 2810 .dtr = pool_dtr,
@@ -3042,19 +3083,19 @@ static int thin_iterate_devices(struct dm_target *ti,
3042 return 0; 3083 return 0;
3043} 3084}
3044 3085
3086/*
3087 * A thin device always inherits its queue limits from its pool.
3088 */
3045static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) 3089static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
3046{ 3090{
3047 struct thin_c *tc = ti->private; 3091 struct thin_c *tc = ti->private;
3048 struct pool *pool = tc->pool;
3049 3092
3050 blk_limits_io_min(limits, 0); 3093 *limits = bdev_get_queue(tc->pool_dev->bdev)->limits;
3051 blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
3052 set_discard_limits(pool, limits);
3053} 3094}
3054 3095
3055static struct target_type thin_target = { 3096static struct target_type thin_target = {
3056 .name = "thin", 3097 .name = "thin",
3057 .version = {1, 3, 0}, 3098 .version = {1, 4, 0},
3058 .module = THIS_MODULE, 3099 .module = THIS_MODULE,
3059 .ctr = thin_ctr, 3100 .ctr = thin_ctr,
3060 .dtr = thin_dtr, 3101 .dtr = thin_dtr,
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index 254d19268ad2..892ae2766aa6 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -718,8 +718,8 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
718 v->hash_dev_block_bits = ffs(num) - 1; 718 v->hash_dev_block_bits = ffs(num) - 1;
719 719
720 if (sscanf(argv[5], "%llu%c", &num_ll, &dummy) != 1 || 720 if (sscanf(argv[5], "%llu%c", &num_ll, &dummy) != 1 ||
721 num_ll << (v->data_dev_block_bits - SECTOR_SHIFT) != 721 (sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT))
722 (sector_t)num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) { 722 >> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll) {
723 ti->error = "Invalid data blocks"; 723 ti->error = "Invalid data blocks";
724 r = -EINVAL; 724 r = -EINVAL;
725 goto bad; 725 goto bad;
@@ -733,8 +733,8 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
733 } 733 }
734 734
735 if (sscanf(argv[6], "%llu%c", &num_ll, &dummy) != 1 || 735 if (sscanf(argv[6], "%llu%c", &num_ll, &dummy) != 1 ||
736 num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT) != 736 (sector_t)(num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT))
737 (sector_t)num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT)) { 737 >> (v->hash_dev_block_bits - SECTOR_SHIFT) != num_ll) {
738 ti->error = "Invalid hash start"; 738 ti->error = "Invalid hash start";
739 r = -EINVAL; 739 r = -EINVAL;
740 goto bad; 740 goto bad;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 4e09b6ff5b49..67ffa391edcf 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -865,10 +865,14 @@ static void dm_done(struct request *clone, int error, bool mapped)
865{ 865{
866 int r = error; 866 int r = error;
867 struct dm_rq_target_io *tio = clone->end_io_data; 867 struct dm_rq_target_io *tio = clone->end_io_data;
868 dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io; 868 dm_request_endio_fn rq_end_io = NULL;
869 869
870 if (mapped && rq_end_io) 870 if (tio->ti) {
871 r = rq_end_io(tio->ti, clone, error, &tio->info); 871 rq_end_io = tio->ti->type->rq_end_io;
872
873 if (mapped && rq_end_io)
874 r = rq_end_io(tio->ti, clone, error, &tio->info);
875 }
872 876
873 if (r <= 0) 877 if (r <= 0)
874 /* The target wants to complete the I/O */ 878 /* The target wants to complete the I/O */
@@ -1588,15 +1592,6 @@ static int map_request(struct dm_target *ti, struct request *clone,
1588 int r, requeued = 0; 1592 int r, requeued = 0;
1589 struct dm_rq_target_io *tio = clone->end_io_data; 1593 struct dm_rq_target_io *tio = clone->end_io_data;
1590 1594
1591 /*
1592 * Hold the md reference here for the in-flight I/O.
1593 * We can't rely on the reference count by device opener,
1594 * because the device may be closed during the request completion
1595 * when all bios are completed.
1596 * See the comment in rq_completed() too.
1597 */
1598 dm_get(md);
1599
1600 tio->ti = ti; 1595 tio->ti = ti;
1601 r = ti->type->map_rq(ti, clone, &tio->info); 1596 r = ti->type->map_rq(ti, clone, &tio->info);
1602 switch (r) { 1597 switch (r) {
@@ -1628,6 +1623,26 @@ static int map_request(struct dm_target *ti, struct request *clone,
1628 return requeued; 1623 return requeued;
1629} 1624}
1630 1625
1626static struct request *dm_start_request(struct mapped_device *md, struct request *orig)
1627{
1628 struct request *clone;
1629
1630 blk_start_request(orig);
1631 clone = orig->special;
1632 atomic_inc(&md->pending[rq_data_dir(clone)]);
1633
1634 /*
1635 * Hold the md reference here for the in-flight I/O.
1636 * We can't rely on the reference count by device opener,
1637 * because the device may be closed during the request completion
1638 * when all bios are completed.
1639 * See the comment in rq_completed() too.
1640 */
1641 dm_get(md);
1642
1643 return clone;
1644}
1645
1631/* 1646/*
1632 * q->request_fn for request-based dm. 1647 * q->request_fn for request-based dm.
1633 * Called with the queue lock held. 1648 * Called with the queue lock held.
@@ -1657,14 +1672,21 @@ static void dm_request_fn(struct request_queue *q)
1657 pos = blk_rq_pos(rq); 1672 pos = blk_rq_pos(rq);
1658 1673
1659 ti = dm_table_find_target(map, pos); 1674 ti = dm_table_find_target(map, pos);
1660 BUG_ON(!dm_target_is_valid(ti)); 1675 if (!dm_target_is_valid(ti)) {
1676 /*
1677 * Must perform setup, that dm_done() requires,
1678 * before calling dm_kill_unmapped_request
1679 */
1680 DMERR_LIMIT("request attempted access beyond the end of device");
1681 clone = dm_start_request(md, rq);
1682 dm_kill_unmapped_request(clone, -EIO);
1683 continue;
1684 }
1661 1685
1662 if (ti->type->busy && ti->type->busy(ti)) 1686 if (ti->type->busy && ti->type->busy(ti))
1663 goto delay_and_out; 1687 goto delay_and_out;
1664 1688
1665 blk_start_request(rq); 1689 clone = dm_start_request(md, rq);
1666 clone = rq->special;
1667 atomic_inc(&md->pending[rq_data_dir(clone)]);
1668 1690
1669 spin_unlock(q->queue_lock); 1691 spin_unlock(q->queue_lock);
1670 if (map_request(ti, clone, md)) 1692 if (map_request(ti, clone, md))
@@ -1684,8 +1706,6 @@ delay_and_out:
1684 blk_delay_queue(q, HZ / 10); 1706 blk_delay_queue(q, HZ / 10);
1685out: 1707out:
1686 dm_table_put(map); 1708 dm_table_put(map);
1687
1688 return;
1689} 1709}
1690 1710
1691int dm_underlying_device_busy(struct request_queue *q) 1711int dm_underlying_device_busy(struct request_queue *q)
@@ -2409,7 +2429,7 @@ static void dm_queue_flush(struct mapped_device *md)
2409 */ 2429 */
2410struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) 2430struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
2411{ 2431{
2412 struct dm_table *map = ERR_PTR(-EINVAL); 2432 struct dm_table *live_map, *map = ERR_PTR(-EINVAL);
2413 struct queue_limits limits; 2433 struct queue_limits limits;
2414 int r; 2434 int r;
2415 2435
@@ -2419,6 +2439,19 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
2419 if (!dm_suspended_md(md)) 2439 if (!dm_suspended_md(md))
2420 goto out; 2440 goto out;
2421 2441
2442 /*
2443 * If the new table has no data devices, retain the existing limits.
2444 * This helps multipath with queue_if_no_path if all paths disappear,
2445 * then new I/O is queued based on these limits, and then some paths
2446 * reappear.
2447 */
2448 if (dm_table_has_no_data_devices(table)) {
2449 live_map = dm_get_live_table(md);
2450 if (live_map)
2451 limits = md->queue->limits;
2452 dm_table_put(live_map);
2453 }
2454
2422 r = dm_calculate_queue_limits(table, &limits); 2455 r = dm_calculate_queue_limits(table, &limits);
2423 if (r) { 2456 if (r) {
2424 map = ERR_PTR(r); 2457 map = ERR_PTR(r);
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 52eef493d266..6a99fefaa743 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -54,6 +54,7 @@ void dm_table_event_callback(struct dm_table *t,
54 void (*fn)(void *), void *context); 54 void (*fn)(void *), void *context);
55struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index); 55struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index);
56struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector); 56struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector);
57bool dm_table_has_no_data_devices(struct dm_table *table);
57int dm_calculate_queue_limits(struct dm_table *table, 58int dm_calculate_queue_limits(struct dm_table *table,
58 struct queue_limits *limits); 59 struct queue_limits *limits);
59void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, 60void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,