aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-02-12 19:36:31 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-12 19:36:31 -0500
commit802ea9d8645d33d24b7b4cd4537c14f3e698bde0 (patch)
tree9a51a21025fa9a38263aa44883ea2b6af823ea05 /drivers/md
parent8494bcf5b7c4b2416687e233dd34d4c6b6fe5653 (diff)
parenta4afe76b2b922e6197944d7be0be7a18b53175ae (diff)
Merge tag 'dm-3.20-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper changes from Mike Snitzer: - The most significant change this cycle is request-based DM now supports stacking ontop of blk-mq devices. This blk-mq support changes the model request-based DM uses for cloning a request to relying on calling blk_get_request() directly from the underlying blk-mq device. An early consumer of this code is Intel's emerging NVMe hardware; thanks to Keith Busch for working on, and pushing for, these changes. - A few other small fixes and cleanups across other DM targets. * tag 'dm-3.20-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: dm: inherit QUEUE_FLAG_SG_GAPS flags from underlying queues dm snapshot: remove unnecessary NULL checks before vfree() calls dm mpath: simplify failure path of dm_multipath_init() dm thin metadata: remove unused dm_pool_get_data_block_size() dm ioctl: fix stale comment above dm_get_inactive_table() dm crypt: update url in CONFIG_DM_CRYPT help text dm bufio: fix time comparison to use time_after_eq() dm: use time_in_range() and time_after() dm raid: fix a couple integer overflows dm table: train hybrid target type detection to select blk-mq if appropriate dm: allocate requests in target when stacking on blk-mq devices dm: prepare for allocating blk-mq clone requests in target dm: submit stacked requests in irq enabled context dm: split request structure out from dm_rq_target_io structure dm: remove exports for request-based interfaces without external callers
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/Kconfig5
-rw-r--r--drivers/md/dm-bufio.c3
-rw-r--r--drivers/md/dm-cache-target.c5
-rw-r--r--drivers/md/dm-ioctl.c4
-rw-r--r--drivers/md/dm-log-userspace-base.c5
-rw-r--r--drivers/md/dm-mpath.c87
-rw-r--r--drivers/md/dm-raid.c16
-rw-r--r--drivers/md/dm-snap-persistent.c14
-rw-r--r--drivers/md/dm-table.c72
-rw-r--r--drivers/md/dm-target.c15
-rw-r--r--drivers/md/dm-thin-metadata.c9
-rw-r--r--drivers/md/dm-thin-metadata.h2
-rw-r--r--drivers/md/dm-thin.c5
-rw-r--r--drivers/md/dm.c347
-rw-r--r--drivers/md/dm.h11
15 files changed, 407 insertions, 193 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index c355a226a024..c39644478aa4 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -231,9 +231,8 @@ config DM_CRYPT
231 transparently encrypts the data on it. You'll need to activate 231 transparently encrypts the data on it. You'll need to activate
232 the ciphers you're going to use in the cryptoapi configuration. 232 the ciphers you're going to use in the cryptoapi configuration.
233 233
234 Information on how to use dm-crypt can be found on 234 For further information on dm-crypt and userspace tools see:
235 235 <http://code.google.com/p/cryptsetup/wiki/DMCrypt>
236 <http://www.saout.de/misc/dm-crypt/>
237 236
238 To compile this code as a module, choose M here: the module will 237 To compile this code as a module, choose M here: the module will
239 be called dm-crypt. 238 be called dm-crypt.
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index c33b49792b87..86dbbc737402 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -11,6 +11,7 @@
11#include <linux/device-mapper.h> 11#include <linux/device-mapper.h>
12#include <linux/dm-io.h> 12#include <linux/dm-io.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/jiffies.h>
14#include <linux/vmalloc.h> 15#include <linux/vmalloc.h>
15#include <linux/shrinker.h> 16#include <linux/shrinker.h>
16#include <linux/module.h> 17#include <linux/module.h>
@@ -1739,7 +1740,7 @@ static unsigned get_max_age_hz(void)
1739 1740
1740static bool older_than(struct dm_buffer *b, unsigned long age_hz) 1741static bool older_than(struct dm_buffer *b, unsigned long age_hz)
1741{ 1742{
1742 return (jiffies - b->last_accessed) >= age_hz; 1743 return time_after_eq(jiffies, b->last_accessed + age_hz);
1743} 1744}
1744 1745
1745static void __evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz) 1746static void __evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz)
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index e1650539cc2f..7755af351867 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -11,6 +11,7 @@
11 11
12#include <linux/dm-io.h> 12#include <linux/dm-io.h>
13#include <linux/dm-kcopyd.h> 13#include <linux/dm-kcopyd.h>
14#include <linux/jiffies.h>
14#include <linux/init.h> 15#include <linux/init.h>
15#include <linux/mempool.h> 16#include <linux/mempool.h>
16#include <linux/module.h> 17#include <linux/module.h>
@@ -1562,8 +1563,8 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
1562 1563
1563static int need_commit_due_to_time(struct cache *cache) 1564static int need_commit_due_to_time(struct cache *cache)
1564{ 1565{
1565 return jiffies < cache->last_commit_jiffies || 1566 return !time_in_range(jiffies, cache->last_commit_jiffies,
1566 jiffies > cache->last_commit_jiffies + COMMIT_PERIOD; 1567 cache->last_commit_jiffies + COMMIT_PERIOD);
1567} 1568}
1568 1569
1569static int commit_if_needed(struct cache *cache) 1570static int commit_if_needed(struct cache *cache)
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 73f791bb9ea4..c8a18e4ee9dc 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -639,8 +639,8 @@ static int check_name(const char *name)
639 639
640/* 640/*
641 * On successful return, the caller must not attempt to acquire 641 * On successful return, the caller must not attempt to acquire
642 * _hash_lock without first calling dm_table_put, because dm_table_destroy 642 * _hash_lock without first calling dm_put_live_table, because dm_table_destroy
643 * waits for this dm_table_put and could be called under this lock. 643 * waits for this dm_put_live_table and could be called under this lock.
644 */ 644 */
645static struct dm_table *dm_get_inactive_table(struct mapped_device *md, int *srcu_idx) 645static struct dm_table *dm_get_inactive_table(struct mapped_device *md, int *srcu_idx)
646{ 646{
diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c
index b953db6cc229..03177ca0b009 100644
--- a/drivers/md/dm-log-userspace-base.c
+++ b/drivers/md/dm-log-userspace-base.c
@@ -6,6 +6,7 @@
6 6
7#include <linux/bio.h> 7#include <linux/bio.h>
8#include <linux/slab.h> 8#include <linux/slab.h>
9#include <linux/jiffies.h>
9#include <linux/dm-dirty-log.h> 10#include <linux/dm-dirty-log.h>
10#include <linux/device-mapper.h> 11#include <linux/device-mapper.h>
11#include <linux/dm-log-userspace.h> 12#include <linux/dm-log-userspace.h>
@@ -829,7 +830,7 @@ static int userspace_is_remote_recovering(struct dm_dirty_log *log,
829 int r; 830 int r;
830 uint64_t region64 = region; 831 uint64_t region64 = region;
831 struct log_c *lc = log->context; 832 struct log_c *lc = log->context;
832 static unsigned long long limit; 833 static unsigned long limit;
833 struct { 834 struct {
834 int64_t is_recovering; 835 int64_t is_recovering;
835 uint64_t in_sync_hint; 836 uint64_t in_sync_hint;
@@ -845,7 +846,7 @@ static int userspace_is_remote_recovering(struct dm_dirty_log *log,
845 */ 846 */
846 if (region < lc->in_sync_hint) 847 if (region < lc->in_sync_hint)
847 return 0; 848 return 0;
848 else if (jiffies < limit) 849 else if (time_after(limit, jiffies))
849 return 1; 850 return 1;
850 851
851 limit = jiffies + (HZ / 4); 852 limit = jiffies + (HZ / 4);
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 7b6b0f0f831a..d376dc87716e 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -11,6 +11,7 @@
11#include "dm-path-selector.h" 11#include "dm-path-selector.h"
12#include "dm-uevent.h" 12#include "dm-uevent.h"
13 13
14#include <linux/blkdev.h>
14#include <linux/ctype.h> 15#include <linux/ctype.h>
15#include <linux/init.h> 16#include <linux/init.h>
16#include <linux/mempool.h> 17#include <linux/mempool.h>
@@ -378,18 +379,18 @@ static int __must_push_back(struct multipath *m)
378/* 379/*
379 * Map cloned requests 380 * Map cloned requests
380 */ 381 */
381static int multipath_map(struct dm_target *ti, struct request *clone, 382static int __multipath_map(struct dm_target *ti, struct request *clone,
382 union map_info *map_context) 383 union map_info *map_context,
384 struct request *rq, struct request **__clone)
383{ 385{
384 struct multipath *m = (struct multipath *) ti->private; 386 struct multipath *m = (struct multipath *) ti->private;
385 int r = DM_MAPIO_REQUEUE; 387 int r = DM_MAPIO_REQUEUE;
386 size_t nr_bytes = blk_rq_bytes(clone); 388 size_t nr_bytes = clone ? blk_rq_bytes(clone) : blk_rq_bytes(rq);
387 unsigned long flags;
388 struct pgpath *pgpath; 389 struct pgpath *pgpath;
389 struct block_device *bdev; 390 struct block_device *bdev;
390 struct dm_mpath_io *mpio; 391 struct dm_mpath_io *mpio;
391 392
392 spin_lock_irqsave(&m->lock, flags); 393 spin_lock_irq(&m->lock);
393 394
394 /* Do we need to select a new pgpath? */ 395 /* Do we need to select a new pgpath? */
395 if (!m->current_pgpath || 396 if (!m->current_pgpath ||
@@ -411,25 +412,61 @@ static int multipath_map(struct dm_target *ti, struct request *clone,
411 /* ENOMEM, requeue */ 412 /* ENOMEM, requeue */
412 goto out_unlock; 413 goto out_unlock;
413 414
414 bdev = pgpath->path.dev->bdev;
415 clone->q = bdev_get_queue(bdev);
416 clone->rq_disk = bdev->bd_disk;
417 clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
418 mpio = map_context->ptr; 415 mpio = map_context->ptr;
419 mpio->pgpath = pgpath; 416 mpio->pgpath = pgpath;
420 mpio->nr_bytes = nr_bytes; 417 mpio->nr_bytes = nr_bytes;
418
419 bdev = pgpath->path.dev->bdev;
420
421 spin_unlock_irq(&m->lock);
422
423 if (clone) {
424 /* Old request-based interface: allocated clone is passed in */
425 clone->q = bdev_get_queue(bdev);
426 clone->rq_disk = bdev->bd_disk;
427 clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
428 } else {
429 /* blk-mq request-based interface */
430 *__clone = blk_get_request(bdev_get_queue(bdev),
431 rq_data_dir(rq), GFP_KERNEL);
432 if (IS_ERR(*__clone))
433 /* ENOMEM, requeue */
434 return r;
435 (*__clone)->bio = (*__clone)->biotail = NULL;
436 (*__clone)->rq_disk = bdev->bd_disk;
437 (*__clone)->cmd_flags |= REQ_FAILFAST_TRANSPORT;
438 }
439
421 if (pgpath->pg->ps.type->start_io) 440 if (pgpath->pg->ps.type->start_io)
422 pgpath->pg->ps.type->start_io(&pgpath->pg->ps, 441 pgpath->pg->ps.type->start_io(&pgpath->pg->ps,
423 &pgpath->path, 442 &pgpath->path,
424 nr_bytes); 443 nr_bytes);
425 r = DM_MAPIO_REMAPPED; 444 return DM_MAPIO_REMAPPED;
426 445
427out_unlock: 446out_unlock:
428 spin_unlock_irqrestore(&m->lock, flags); 447 spin_unlock_irq(&m->lock);
429 448
430 return r; 449 return r;
431} 450}
432 451
452static int multipath_map(struct dm_target *ti, struct request *clone,
453 union map_info *map_context)
454{
455 return __multipath_map(ti, clone, map_context, NULL, NULL);
456}
457
458static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
459 union map_info *map_context,
460 struct request **clone)
461{
462 return __multipath_map(ti, NULL, map_context, rq, clone);
463}
464
465static void multipath_release_clone(struct request *clone)
466{
467 blk_put_request(clone);
468}
469
433/* 470/*
434 * If we run out of usable paths, should we queue I/O or error it? 471 * If we run out of usable paths, should we queue I/O or error it?
435 */ 472 */
@@ -1666,11 +1703,13 @@ out:
1666 *---------------------------------------------------------------*/ 1703 *---------------------------------------------------------------*/
1667static struct target_type multipath_target = { 1704static struct target_type multipath_target = {
1668 .name = "multipath", 1705 .name = "multipath",
1669 .version = {1, 7, 0}, 1706 .version = {1, 8, 0},
1670 .module = THIS_MODULE, 1707 .module = THIS_MODULE,
1671 .ctr = multipath_ctr, 1708 .ctr = multipath_ctr,
1672 .dtr = multipath_dtr, 1709 .dtr = multipath_dtr,
1673 .map_rq = multipath_map, 1710 .map_rq = multipath_map,
1711 .clone_and_map_rq = multipath_clone_and_map,
1712 .release_clone_rq = multipath_release_clone,
1674 .rq_end_io = multipath_end_io, 1713 .rq_end_io = multipath_end_io,
1675 .presuspend = multipath_presuspend, 1714 .presuspend = multipath_presuspend,
1676 .postsuspend = multipath_postsuspend, 1715 .postsuspend = multipath_postsuspend,
@@ -1694,16 +1733,15 @@ static int __init dm_multipath_init(void)
1694 r = dm_register_target(&multipath_target); 1733 r = dm_register_target(&multipath_target);
1695 if (r < 0) { 1734 if (r < 0) {
1696 DMERR("register failed %d", r); 1735 DMERR("register failed %d", r);
1697 kmem_cache_destroy(_mpio_cache); 1736 r = -EINVAL;
1698 return -EINVAL; 1737 goto bad_register_target;
1699 } 1738 }
1700 1739
1701 kmultipathd = alloc_workqueue("kmpathd", WQ_MEM_RECLAIM, 0); 1740 kmultipathd = alloc_workqueue("kmpathd", WQ_MEM_RECLAIM, 0);
1702 if (!kmultipathd) { 1741 if (!kmultipathd) {
1703 DMERR("failed to create workqueue kmpathd"); 1742 DMERR("failed to create workqueue kmpathd");
1704 dm_unregister_target(&multipath_target); 1743 r = -ENOMEM;
1705 kmem_cache_destroy(_mpio_cache); 1744 goto bad_alloc_kmultipathd;
1706 return -ENOMEM;
1707 } 1745 }
1708 1746
1709 /* 1747 /*
@@ -1716,16 +1754,23 @@ static int __init dm_multipath_init(void)
1716 WQ_MEM_RECLAIM); 1754 WQ_MEM_RECLAIM);
1717 if (!kmpath_handlerd) { 1755 if (!kmpath_handlerd) {
1718 DMERR("failed to create workqueue kmpath_handlerd"); 1756 DMERR("failed to create workqueue kmpath_handlerd");
1719 destroy_workqueue(kmultipathd); 1757 r = -ENOMEM;
1720 dm_unregister_target(&multipath_target); 1758 goto bad_alloc_kmpath_handlerd;
1721 kmem_cache_destroy(_mpio_cache);
1722 return -ENOMEM;
1723 } 1759 }
1724 1760
1725 DMINFO("version %u.%u.%u loaded", 1761 DMINFO("version %u.%u.%u loaded",
1726 multipath_target.version[0], multipath_target.version[1], 1762 multipath_target.version[0], multipath_target.version[1],
1727 multipath_target.version[2]); 1763 multipath_target.version[2]);
1728 1764
1765 return 0;
1766
1767bad_alloc_kmpath_handlerd:
1768 destroy_workqueue(kmultipathd);
1769bad_alloc_kmultipathd:
1770 dm_unregister_target(&multipath_target);
1771bad_register_target:
1772 kmem_cache_destroy(_mpio_cache);
1773
1729 return r; 1774 return r;
1730} 1775}
1731 1776
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 777d9ba2acad..88e4c7f24986 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -1237,7 +1237,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
1237 argv++; 1237 argv++;
1238 1238
1239 /* Skip over RAID params for now and find out # of devices */ 1239 /* Skip over RAID params for now and find out # of devices */
1240 if (num_raid_params + 1 > argc) { 1240 if (num_raid_params >= argc) {
1241 ti->error = "Arguments do not agree with counts given"; 1241 ti->error = "Arguments do not agree with counts given";
1242 return -EINVAL; 1242 return -EINVAL;
1243 } 1243 }
@@ -1248,6 +1248,12 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
1248 return -EINVAL; 1248 return -EINVAL;
1249 } 1249 }
1250 1250
1251 argc -= num_raid_params + 1; /* +1: we already have num_raid_devs */
1252 if (argc != (num_raid_devs * 2)) {
1253 ti->error = "Supplied RAID devices does not match the count given";
1254 return -EINVAL;
1255 }
1256
1251 rs = context_alloc(ti, rt, (unsigned)num_raid_devs); 1257 rs = context_alloc(ti, rt, (unsigned)num_raid_devs);
1252 if (IS_ERR(rs)) 1258 if (IS_ERR(rs))
1253 return PTR_ERR(rs); 1259 return PTR_ERR(rs);
@@ -1256,16 +1262,8 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
1256 if (ret) 1262 if (ret)
1257 goto bad; 1263 goto bad;
1258 1264
1259 ret = -EINVAL;
1260
1261 argc -= num_raid_params + 1; /* +1: we already have num_raid_devs */
1262 argv += num_raid_params + 1; 1265 argv += num_raid_params + 1;
1263 1266
1264 if (argc != (num_raid_devs * 2)) {
1265 ti->error = "Supplied RAID devices does not match the count given";
1266 goto bad;
1267 }
1268
1269 ret = dev_parms(rs, argv); 1267 ret = dev_parms(rs, argv);
1270 if (ret) 1268 if (ret)
1271 goto bad; 1269 goto bad;
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index d6e88178d22c..808b8419bc48 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -200,16 +200,11 @@ err_area:
200 200
201static void free_area(struct pstore *ps) 201static void free_area(struct pstore *ps)
202{ 202{
203 if (ps->area) 203 vfree(ps->area);
204 vfree(ps->area);
205 ps->area = NULL; 204 ps->area = NULL;
206 205 vfree(ps->zero_area);
207 if (ps->zero_area)
208 vfree(ps->zero_area);
209 ps->zero_area = NULL; 206 ps->zero_area = NULL;
210 207 vfree(ps->header_area);
211 if (ps->header_area)
212 vfree(ps->header_area);
213 ps->header_area = NULL; 208 ps->header_area = NULL;
214} 209}
215 210
@@ -605,8 +600,7 @@ static void persistent_dtr(struct dm_exception_store *store)
605 free_area(ps); 600 free_area(ps);
606 601
607 /* Allocated in persistent_read_metadata */ 602 /* Allocated in persistent_read_metadata */
608 if (ps->callbacks) 603 vfree(ps->callbacks);
609 vfree(ps->callbacks);
610 604
611 kfree(ps); 605 kfree(ps);
612} 606}
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 3afae9e062f8..6554d9148927 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -827,10 +827,11 @@ static int dm_table_set_type(struct dm_table *t)
827{ 827{
828 unsigned i; 828 unsigned i;
829 unsigned bio_based = 0, request_based = 0, hybrid = 0; 829 unsigned bio_based = 0, request_based = 0, hybrid = 0;
830 bool use_blk_mq = false;
830 struct dm_target *tgt; 831 struct dm_target *tgt;
831 struct dm_dev_internal *dd; 832 struct dm_dev_internal *dd;
832 struct list_head *devices; 833 struct list_head *devices;
833 unsigned live_md_type; 834 unsigned live_md_type = dm_get_md_type(t->md);
834 835
835 for (i = 0; i < t->num_targets; i++) { 836 for (i = 0; i < t->num_targets; i++) {
836 tgt = t->targets + i; 837 tgt = t->targets + i;
@@ -854,8 +855,8 @@ static int dm_table_set_type(struct dm_table *t)
854 * Determine the type from the live device. 855 * Determine the type from the live device.
855 * Default to bio-based if device is new. 856 * Default to bio-based if device is new.
856 */ 857 */
857 live_md_type = dm_get_md_type(t->md); 858 if (live_md_type == DM_TYPE_REQUEST_BASED ||
858 if (live_md_type == DM_TYPE_REQUEST_BASED) 859 live_md_type == DM_TYPE_MQ_REQUEST_BASED)
859 request_based = 1; 860 request_based = 1;
860 else 861 else
861 bio_based = 1; 862 bio_based = 1;
@@ -869,16 +870,6 @@ static int dm_table_set_type(struct dm_table *t)
869 870
870 BUG_ON(!request_based); /* No targets in this table */ 871 BUG_ON(!request_based); /* No targets in this table */
871 872
872 /* Non-request-stackable devices can't be used for request-based dm */
873 devices = dm_table_get_devices(t);
874 list_for_each_entry(dd, devices, list) {
875 if (!blk_queue_stackable(bdev_get_queue(dd->dm_dev->bdev))) {
876 DMWARN("table load rejected: including"
877 " non-request-stackable devices");
878 return -EINVAL;
879 }
880 }
881
882 /* 873 /*
883 * Request-based dm supports only tables that have a single target now. 874 * Request-based dm supports only tables that have a single target now.
884 * To support multiple targets, request splitting support is needed, 875 * To support multiple targets, request splitting support is needed,
@@ -890,7 +881,37 @@ static int dm_table_set_type(struct dm_table *t)
890 return -EINVAL; 881 return -EINVAL;
891 } 882 }
892 883
893 t->type = DM_TYPE_REQUEST_BASED; 884 /* Non-request-stackable devices can't be used for request-based dm */
885 devices = dm_table_get_devices(t);
886 list_for_each_entry(dd, devices, list) {
887 struct request_queue *q = bdev_get_queue(dd->dm_dev->bdev);
888
889 if (!blk_queue_stackable(q)) {
890 DMERR("table load rejected: including"
891 " non-request-stackable devices");
892 return -EINVAL;
893 }
894
895 if (q->mq_ops)
896 use_blk_mq = true;
897 }
898
899 if (use_blk_mq) {
900 /* verify _all_ devices in the table are blk-mq devices */
901 list_for_each_entry(dd, devices, list)
902 if (!bdev_get_queue(dd->dm_dev->bdev)->mq_ops) {
903 DMERR("table load rejected: not all devices"
904 " are blk-mq request-stackable");
905 return -EINVAL;
906 }
907 t->type = DM_TYPE_MQ_REQUEST_BASED;
908
909 } else if (hybrid && list_empty(devices) && live_md_type != DM_TYPE_NONE) {
910 /* inherit live MD type */
911 t->type = live_md_type;
912
913 } else
914 t->type = DM_TYPE_REQUEST_BASED;
894 915
895 return 0; 916 return 0;
896} 917}
@@ -907,7 +928,15 @@ struct target_type *dm_table_get_immutable_target_type(struct dm_table *t)
907 928
908bool dm_table_request_based(struct dm_table *t) 929bool dm_table_request_based(struct dm_table *t)
909{ 930{
910 return dm_table_get_type(t) == DM_TYPE_REQUEST_BASED; 931 unsigned table_type = dm_table_get_type(t);
932
933 return (table_type == DM_TYPE_REQUEST_BASED ||
934 table_type == DM_TYPE_MQ_REQUEST_BASED);
935}
936
937bool dm_table_mq_request_based(struct dm_table *t)
938{
939 return dm_table_get_type(t) == DM_TYPE_MQ_REQUEST_BASED;
911} 940}
912 941
913static int dm_table_alloc_md_mempools(struct dm_table *t) 942static int dm_table_alloc_md_mempools(struct dm_table *t)
@@ -1360,6 +1389,14 @@ static int queue_supports_sg_merge(struct dm_target *ti, struct dm_dev *dev,
1360 return q && !test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags); 1389 return q && !test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags);
1361} 1390}
1362 1391
1392static int queue_supports_sg_gaps(struct dm_target *ti, struct dm_dev *dev,
1393 sector_t start, sector_t len, void *data)
1394{
1395 struct request_queue *q = bdev_get_queue(dev->bdev);
1396
1397 return q && !test_bit(QUEUE_FLAG_SG_GAPS, &q->queue_flags);
1398}
1399
1363static bool dm_table_all_devices_attribute(struct dm_table *t, 1400static bool dm_table_all_devices_attribute(struct dm_table *t,
1364 iterate_devices_callout_fn func) 1401 iterate_devices_callout_fn func)
1365{ 1402{
@@ -1480,6 +1517,11 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
1480 else 1517 else
1481 queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q); 1518 queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
1482 1519
1520 if (dm_table_all_devices_attribute(t, queue_supports_sg_gaps))
1521 queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS, q);
1522 else
1523 queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, q);
1524
1483 dm_table_set_integrity(t); 1525 dm_table_set_integrity(t);
1484 1526
1485 /* 1527 /*
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index 242e3cec397a..925ec1b15e75 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -137,13 +137,26 @@ static int io_err_map_rq(struct dm_target *ti, struct request *clone,
137 return -EIO; 137 return -EIO;
138} 138}
139 139
140static int io_err_clone_and_map_rq(struct dm_target *ti, struct request *rq,
141 union map_info *map_context,
142 struct request **clone)
143{
144 return -EIO;
145}
146
147static void io_err_release_clone_rq(struct request *clone)
148{
149}
150
140static struct target_type error_target = { 151static struct target_type error_target = {
141 .name = "error", 152 .name = "error",
142 .version = {1, 2, 0}, 153 .version = {1, 3, 0},
143 .ctr = io_err_ctr, 154 .ctr = io_err_ctr,
144 .dtr = io_err_dtr, 155 .dtr = io_err_dtr,
145 .map = io_err_map, 156 .map = io_err_map,
146 .map_rq = io_err_map_rq, 157 .map_rq = io_err_map_rq,
158 .clone_and_map_rq = io_err_clone_and_map_rq,
159 .release_clone_rq = io_err_release_clone_rq,
147}; 160};
148 161
149int __init dm_target_init(void) 162int __init dm_target_init(void)
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 43adbb863f5a..79f694120ddf 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -1635,15 +1635,6 @@ int dm_pool_get_metadata_dev_size(struct dm_pool_metadata *pmd,
1635 return r; 1635 return r;
1636} 1636}
1637 1637
1638int dm_pool_get_data_block_size(struct dm_pool_metadata *pmd, sector_t *result)
1639{
1640 down_read(&pmd->root_lock);
1641 *result = pmd->data_block_size;
1642 up_read(&pmd->root_lock);
1643
1644 return 0;
1645}
1646
1647int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result) 1638int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result)
1648{ 1639{
1649 int r = -EINVAL; 1640 int r = -EINVAL;
diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h
index 921d15ee56a0..fac01a96d303 100644
--- a/drivers/md/dm-thin-metadata.h
+++ b/drivers/md/dm-thin-metadata.h
@@ -182,8 +182,6 @@ int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd,
182int dm_pool_get_metadata_dev_size(struct dm_pool_metadata *pmd, 182int dm_pool_get_metadata_dev_size(struct dm_pool_metadata *pmd,
183 dm_block_t *result); 183 dm_block_t *result);
184 184
185int dm_pool_get_data_block_size(struct dm_pool_metadata *pmd, sector_t *result);
186
187int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result); 185int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result);
188 186
189int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *result); 187int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *result);
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 07705ee181e3..654773cb1eee 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -11,6 +11,7 @@
11#include <linux/device-mapper.h> 11#include <linux/device-mapper.h>
12#include <linux/dm-io.h> 12#include <linux/dm-io.h>
13#include <linux/dm-kcopyd.h> 13#include <linux/dm-kcopyd.h>
14#include <linux/jiffies.h>
14#include <linux/log2.h> 15#include <linux/log2.h>
15#include <linux/list.h> 16#include <linux/list.h>
16#include <linux/rculist.h> 17#include <linux/rculist.h>
@@ -1700,8 +1701,8 @@ static void process_cell_fail(struct thin_c *tc, struct dm_bio_prison_cell *cell
1700 */ 1701 */
1701static int need_commit_due_to_time(struct pool *pool) 1702static int need_commit_due_to_time(struct pool *pool)
1702{ 1703{
1703 return jiffies < pool->last_commit_jiffies || 1704 return !time_in_range(jiffies, pool->last_commit_jiffies,
1704 jiffies > pool->last_commit_jiffies + COMMIT_PERIOD; 1705 pool->last_commit_jiffies + COMMIT_PERIOD);
1705} 1706}
1706 1707
1707#define thin_pbd(node) rb_entry((node), struct dm_thin_endio_hook, rb_node) 1708#define thin_pbd(node) rb_entry((node), struct dm_thin_endio_hook, rb_node)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 68c1b535c52e..ec1444f49de1 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -20,6 +20,7 @@
20#include <linux/hdreg.h> 20#include <linux/hdreg.h>
21#include <linux/delay.h> 21#include <linux/delay.h>
22#include <linux/wait.h> 22#include <linux/wait.h>
23#include <linux/kthread.h>
23 24
24#include <trace/events/block.h> 25#include <trace/events/block.h>
25 26
@@ -78,7 +79,8 @@ struct dm_io {
78struct dm_rq_target_io { 79struct dm_rq_target_io {
79 struct mapped_device *md; 80 struct mapped_device *md;
80 struct dm_target *ti; 81 struct dm_target *ti;
81 struct request *orig, clone; 82 struct request *orig, *clone;
83 struct kthread_work work;
82 int error; 84 int error;
83 union map_info info; 85 union map_info info;
84}; 86};
@@ -179,6 +181,7 @@ struct mapped_device {
179 * io objects are allocated from here. 181 * io objects are allocated from here.
180 */ 182 */
181 mempool_t *io_pool; 183 mempool_t *io_pool;
184 mempool_t *rq_pool;
182 185
183 struct bio_set *bs; 186 struct bio_set *bs;
184 187
@@ -210,6 +213,9 @@ struct mapped_device {
210 unsigned internal_suspend_count; 213 unsigned internal_suspend_count;
211 214
212 struct dm_stats stats; 215 struct dm_stats stats;
216
217 struct kthread_worker kworker;
218 struct task_struct *kworker_task;
213}; 219};
214 220
215/* 221/*
@@ -217,6 +223,7 @@ struct mapped_device {
217 */ 223 */
218struct dm_md_mempools { 224struct dm_md_mempools {
219 mempool_t *io_pool; 225 mempool_t *io_pool;
226 mempool_t *rq_pool;
220 struct bio_set *bs; 227 struct bio_set *bs;
221}; 228};
222 229
@@ -231,6 +238,7 @@ struct table_device {
231#define RESERVED_MAX_IOS 1024 238#define RESERVED_MAX_IOS 1024
232static struct kmem_cache *_io_cache; 239static struct kmem_cache *_io_cache;
233static struct kmem_cache *_rq_tio_cache; 240static struct kmem_cache *_rq_tio_cache;
241static struct kmem_cache *_rq_cache;
234 242
235/* 243/*
236 * Bio-based DM's mempools' reserved IOs set by the user. 244 * Bio-based DM's mempools' reserved IOs set by the user.
@@ -288,9 +296,14 @@ static int __init local_init(void)
288 if (!_rq_tio_cache) 296 if (!_rq_tio_cache)
289 goto out_free_io_cache; 297 goto out_free_io_cache;
290 298
299 _rq_cache = kmem_cache_create("dm_clone_request", sizeof(struct request),
300 __alignof__(struct request), 0, NULL);
301 if (!_rq_cache)
302 goto out_free_rq_tio_cache;
303
291 r = dm_uevent_init(); 304 r = dm_uevent_init();
292 if (r) 305 if (r)
293 goto out_free_rq_tio_cache; 306 goto out_free_rq_cache;
294 307
295 deferred_remove_workqueue = alloc_workqueue("kdmremove", WQ_UNBOUND, 1); 308 deferred_remove_workqueue = alloc_workqueue("kdmremove", WQ_UNBOUND, 1);
296 if (!deferred_remove_workqueue) { 309 if (!deferred_remove_workqueue) {
@@ -312,6 +325,8 @@ out_free_workqueue:
312 destroy_workqueue(deferred_remove_workqueue); 325 destroy_workqueue(deferred_remove_workqueue);
313out_uevent_exit: 326out_uevent_exit:
314 dm_uevent_exit(); 327 dm_uevent_exit();
328out_free_rq_cache:
329 kmem_cache_destroy(_rq_cache);
315out_free_rq_tio_cache: 330out_free_rq_tio_cache:
316 kmem_cache_destroy(_rq_tio_cache); 331 kmem_cache_destroy(_rq_tio_cache);
317out_free_io_cache: 332out_free_io_cache:
@@ -325,6 +340,7 @@ static void local_exit(void)
325 flush_scheduled_work(); 340 flush_scheduled_work();
326 destroy_workqueue(deferred_remove_workqueue); 341 destroy_workqueue(deferred_remove_workqueue);
327 342
343 kmem_cache_destroy(_rq_cache);
328 kmem_cache_destroy(_rq_tio_cache); 344 kmem_cache_destroy(_rq_tio_cache);
329 kmem_cache_destroy(_io_cache); 345 kmem_cache_destroy(_io_cache);
330 unregister_blkdev(_major, _name); 346 unregister_blkdev(_major, _name);
@@ -577,6 +593,17 @@ static void free_rq_tio(struct dm_rq_target_io *tio)
577 mempool_free(tio, tio->md->io_pool); 593 mempool_free(tio, tio->md->io_pool);
578} 594}
579 595
596static struct request *alloc_clone_request(struct mapped_device *md,
597 gfp_t gfp_mask)
598{
599 return mempool_alloc(md->rq_pool, gfp_mask);
600}
601
602static void free_clone_request(struct mapped_device *md, struct request *rq)
603{
604 mempool_free(rq, md->rq_pool);
605}
606
580static int md_in_flight(struct mapped_device *md) 607static int md_in_flight(struct mapped_device *md)
581{ 608{
582 return atomic_read(&md->pending[READ]) + 609 return atomic_read(&md->pending[READ]) +
@@ -992,7 +1019,7 @@ static void end_clone_bio(struct bio *clone, int error)
992 * the md may be freed in dm_put() at the end of this function. 1019 * the md may be freed in dm_put() at the end of this function.
993 * Or do dm_get() before calling this function and dm_put() later. 1020 * Or do dm_get() before calling this function and dm_put() later.
994 */ 1021 */
995static void rq_completed(struct mapped_device *md, int rw, int run_queue) 1022static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
996{ 1023{
997 atomic_dec(&md->pending[rw]); 1024 atomic_dec(&md->pending[rw]);
998 1025
@@ -1020,12 +1047,17 @@ static void free_rq_clone(struct request *clone)
1020 struct dm_rq_target_io *tio = clone->end_io_data; 1047 struct dm_rq_target_io *tio = clone->end_io_data;
1021 1048
1022 blk_rq_unprep_clone(clone); 1049 blk_rq_unprep_clone(clone);
1050 if (clone->q && clone->q->mq_ops)
1051 tio->ti->type->release_clone_rq(clone);
1052 else
1053 free_clone_request(tio->md, clone);
1023 free_rq_tio(tio); 1054 free_rq_tio(tio);
1024} 1055}
1025 1056
1026/* 1057/*
1027 * Complete the clone and the original request. 1058 * Complete the clone and the original request.
1028 * Must be called without queue lock. 1059 * Must be called without clone's queue lock held,
1060 * see end_clone_request() for more details.
1029 */ 1061 */
1030static void dm_end_request(struct request *clone, int error) 1062static void dm_end_request(struct request *clone, int error)
1031{ 1063{
@@ -1054,23 +1086,23 @@ static void dm_end_request(struct request *clone, int error)
1054 1086
1055static void dm_unprep_request(struct request *rq) 1087static void dm_unprep_request(struct request *rq)
1056{ 1088{
1057 struct request *clone = rq->special; 1089 struct dm_rq_target_io *tio = rq->special;
1090 struct request *clone = tio->clone;
1058 1091
1059 rq->special = NULL; 1092 rq->special = NULL;
1060 rq->cmd_flags &= ~REQ_DONTPREP; 1093 rq->cmd_flags &= ~REQ_DONTPREP;
1061 1094
1062 free_rq_clone(clone); 1095 if (clone)
1096 free_rq_clone(clone);
1063} 1097}
1064 1098
1065/* 1099/*
1066 * Requeue the original request of a clone. 1100 * Requeue the original request of a clone.
1067 */ 1101 */
1068void dm_requeue_unmapped_request(struct request *clone) 1102static void dm_requeue_unmapped_original_request(struct mapped_device *md,
1103 struct request *rq)
1069{ 1104{
1070 int rw = rq_data_dir(clone); 1105 int rw = rq_data_dir(rq);
1071 struct dm_rq_target_io *tio = clone->end_io_data;
1072 struct mapped_device *md = tio->md;
1073 struct request *rq = tio->orig;
1074 struct request_queue *q = rq->q; 1106 struct request_queue *q = rq->q;
1075 unsigned long flags; 1107 unsigned long flags;
1076 1108
@@ -1080,9 +1112,15 @@ void dm_requeue_unmapped_request(struct request *clone)
1080 blk_requeue_request(q, rq); 1112 blk_requeue_request(q, rq);
1081 spin_unlock_irqrestore(q->queue_lock, flags); 1113 spin_unlock_irqrestore(q->queue_lock, flags);
1082 1114
1083 rq_completed(md, rw, 0); 1115 rq_completed(md, rw, false);
1116}
1117
1118static void dm_requeue_unmapped_request(struct request *clone)
1119{
1120 struct dm_rq_target_io *tio = clone->end_io_data;
1121
1122 dm_requeue_unmapped_original_request(tio->md, tio->orig);
1084} 1123}
1085EXPORT_SYMBOL_GPL(dm_requeue_unmapped_request);
1086 1124
1087static void __stop_queue(struct request_queue *q) 1125static void __stop_queue(struct request_queue *q)
1088{ 1126{
@@ -1151,8 +1189,15 @@ static void dm_done(struct request *clone, int error, bool mapped)
1151static void dm_softirq_done(struct request *rq) 1189static void dm_softirq_done(struct request *rq)
1152{ 1190{
1153 bool mapped = true; 1191 bool mapped = true;
1154 struct request *clone = rq->completion_data; 1192 struct dm_rq_target_io *tio = rq->special;
1155 struct dm_rq_target_io *tio = clone->end_io_data; 1193 struct request *clone = tio->clone;
1194
1195 if (!clone) {
1196 blk_end_request_all(rq, tio->error);
1197 rq_completed(tio->md, rq_data_dir(rq), false);
1198 free_rq_tio(tio);
1199 return;
1200 }
1156 1201
1157 if (rq->cmd_flags & REQ_FAILED) 1202 if (rq->cmd_flags & REQ_FAILED)
1158 mapped = false; 1203 mapped = false;
@@ -1164,13 +1209,11 @@ static void dm_softirq_done(struct request *rq)
1164 * Complete the clone and the original request with the error status 1209 * Complete the clone and the original request with the error status
1165 * through softirq context. 1210 * through softirq context.
1166 */ 1211 */
1167static void dm_complete_request(struct request *clone, int error) 1212static void dm_complete_request(struct request *rq, int error)
1168{ 1213{
1169 struct dm_rq_target_io *tio = clone->end_io_data; 1214 struct dm_rq_target_io *tio = rq->special;
1170 struct request *rq = tio->orig;
1171 1215
1172 tio->error = error; 1216 tio->error = error;
1173 rq->completion_data = clone;
1174 blk_complete_request(rq); 1217 blk_complete_request(rq);
1175} 1218}
1176 1219
@@ -1178,40 +1221,40 @@ static void dm_complete_request(struct request *clone, int error)
1178 * Complete the not-mapped clone and the original request with the error status 1221 * Complete the not-mapped clone and the original request with the error status
1179 * through softirq context. 1222 * through softirq context.
1180 * Target's rq_end_io() function isn't called. 1223 * Target's rq_end_io() function isn't called.
1181 * This may be used when the target's map_rq() function fails. 1224 * This may be used when the target's map_rq() or clone_and_map_rq() functions fail.
1182 */ 1225 */
1183void dm_kill_unmapped_request(struct request *clone, int error) 1226static void dm_kill_unmapped_request(struct request *rq, int error)
1184{ 1227{
1185 struct dm_rq_target_io *tio = clone->end_io_data;
1186 struct request *rq = tio->orig;
1187
1188 rq->cmd_flags |= REQ_FAILED; 1228 rq->cmd_flags |= REQ_FAILED;
1189 dm_complete_request(clone, error); 1229 dm_complete_request(rq, error);
1190} 1230}
1191EXPORT_SYMBOL_GPL(dm_kill_unmapped_request);
1192 1231
1193/* 1232/*
1194 * Called with the queue lock held 1233 * Called with the clone's queue lock held
1195 */ 1234 */
1196static void end_clone_request(struct request *clone, int error) 1235static void end_clone_request(struct request *clone, int error)
1197{ 1236{
1198 /* 1237 struct dm_rq_target_io *tio = clone->end_io_data;
1199 * For just cleaning up the information of the queue in which 1238
1200 * the clone was dispatched. 1239 if (!clone->q->mq_ops) {
1201 * The clone is *NOT* freed actually here because it is alloced from 1240 /*
1202 * dm own mempool and REQ_ALLOCED isn't set in clone->cmd_flags. 1241 * For just cleaning up the information of the queue in which
1203 */ 1242 * the clone was dispatched.
1204 __blk_put_request(clone->q, clone); 1243 * The clone is *NOT* freed actually here because it is alloced
1244 * from dm own mempool (REQ_ALLOCED isn't set).
1245 */
1246 __blk_put_request(clone->q, clone);
1247 }
1205 1248
1206 /* 1249 /*
1207 * Actual request completion is done in a softirq context which doesn't 1250 * Actual request completion is done in a softirq context which doesn't
1208 * hold the queue lock. Otherwise, deadlock could occur because: 1251 * hold the clone's queue lock. Otherwise, deadlock could occur because:
1209 * - another request may be submitted by the upper level driver 1252 * - another request may be submitted by the upper level driver
1210 * of the stacking during the completion 1253 * of the stacking during the completion
1211 * - the submission which requires queue lock may be done 1254 * - the submission which requires queue lock may be done
1212 * against this queue 1255 * against this clone's queue
1213 */ 1256 */
1214 dm_complete_request(clone, error); 1257 dm_complete_request(tio->orig, error);
1215} 1258}
1216 1259
1217/* 1260/*
@@ -1689,19 +1732,19 @@ static void dm_request(struct request_queue *q, struct bio *bio)
1689 _dm_request(q, bio); 1732 _dm_request(q, bio);
1690} 1733}
1691 1734
1692void dm_dispatch_request(struct request *rq) 1735static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
1693{ 1736{
1694 int r; 1737 int r;
1695 1738
1696 if (blk_queue_io_stat(rq->q)) 1739 if (blk_queue_io_stat(clone->q))
1697 rq->cmd_flags |= REQ_IO_STAT; 1740 clone->cmd_flags |= REQ_IO_STAT;
1698 1741
1699 rq->start_time = jiffies; 1742 clone->start_time = jiffies;
1700 r = blk_insert_cloned_request(rq->q, rq); 1743 r = blk_insert_cloned_request(clone->q, clone);
1701 if (r) 1744 if (r)
1745 /* must complete clone in terms of original request */
1702 dm_complete_request(rq, r); 1746 dm_complete_request(rq, r);
1703} 1747}
1704EXPORT_SYMBOL_GPL(dm_dispatch_request);
1705 1748
1706static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, 1749static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
1707 void *data) 1750 void *data)
@@ -1718,12 +1761,11 @@ static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
1718} 1761}
1719 1762
1720static int setup_clone(struct request *clone, struct request *rq, 1763static int setup_clone(struct request *clone, struct request *rq,
1721 struct dm_rq_target_io *tio) 1764 struct dm_rq_target_io *tio, gfp_t gfp_mask)
1722{ 1765{
1723 int r; 1766 int r;
1724 1767
1725 blk_rq_init(NULL, clone); 1768 r = blk_rq_prep_clone(clone, rq, tio->md->bs, gfp_mask,
1726 r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
1727 dm_rq_bio_constructor, tio); 1769 dm_rq_bio_constructor, tio);
1728 if (r) 1770 if (r)
1729 return r; 1771 return r;
@@ -1734,14 +1776,37 @@ static int setup_clone(struct request *clone, struct request *rq,
1734 clone->end_io = end_clone_request; 1776 clone->end_io = end_clone_request;
1735 clone->end_io_data = tio; 1777 clone->end_io_data = tio;
1736 1778
1779 tio->clone = clone;
1780
1737 return 0; 1781 return 0;
1738} 1782}
1739 1783
1740static struct request *clone_rq(struct request *rq, struct mapped_device *md, 1784static struct request *clone_rq(struct request *rq, struct mapped_device *md,
1741 gfp_t gfp_mask) 1785 struct dm_rq_target_io *tio, gfp_t gfp_mask)
1786{
1787 struct request *clone = alloc_clone_request(md, gfp_mask);
1788
1789 if (!clone)
1790 return NULL;
1791
1792 blk_rq_init(NULL, clone);
1793 if (setup_clone(clone, rq, tio, gfp_mask)) {
1794 /* -ENOMEM */
1795 free_clone_request(md, clone);
1796 return NULL;
1797 }
1798
1799 return clone;
1800}
1801
1802static void map_tio_request(struct kthread_work *work);
1803
1804static struct dm_rq_target_io *prep_tio(struct request *rq,
1805 struct mapped_device *md, gfp_t gfp_mask)
1742{ 1806{
1743 struct request *clone;
1744 struct dm_rq_target_io *tio; 1807 struct dm_rq_target_io *tio;
1808 int srcu_idx;
1809 struct dm_table *table;
1745 1810
1746 tio = alloc_rq_tio(md, gfp_mask); 1811 tio = alloc_rq_tio(md, gfp_mask);
1747 if (!tio) 1812 if (!tio)
@@ -1749,18 +1814,23 @@ static struct request *clone_rq(struct request *rq, struct mapped_device *md,
1749 1814
1750 tio->md = md; 1815 tio->md = md;
1751 tio->ti = NULL; 1816 tio->ti = NULL;
1817 tio->clone = NULL;
1752 tio->orig = rq; 1818 tio->orig = rq;
1753 tio->error = 0; 1819 tio->error = 0;
1754 memset(&tio->info, 0, sizeof(tio->info)); 1820 memset(&tio->info, 0, sizeof(tio->info));
1755 1821 init_kthread_work(&tio->work, map_tio_request);
1756 clone = &tio->clone; 1822
1757 if (setup_clone(clone, rq, tio)) { 1823 table = dm_get_live_table(md, &srcu_idx);
1758 /* -ENOMEM */ 1824 if (!dm_table_mq_request_based(table)) {
1759 free_rq_tio(tio); 1825 if (!clone_rq(rq, md, tio, gfp_mask)) {
1760 return NULL; 1826 dm_put_live_table(md, srcu_idx);
1827 free_rq_tio(tio);
1828 return NULL;
1829 }
1761 } 1830 }
1831 dm_put_live_table(md, srcu_idx);
1762 1832
1763 return clone; 1833 return tio;
1764} 1834}
1765 1835
1766/* 1836/*
@@ -1769,18 +1839,18 @@ static struct request *clone_rq(struct request *rq, struct mapped_device *md,
1769static int dm_prep_fn(struct request_queue *q, struct request *rq) 1839static int dm_prep_fn(struct request_queue *q, struct request *rq)
1770{ 1840{
1771 struct mapped_device *md = q->queuedata; 1841 struct mapped_device *md = q->queuedata;
1772 struct request *clone; 1842 struct dm_rq_target_io *tio;
1773 1843
1774 if (unlikely(rq->special)) { 1844 if (unlikely(rq->special)) {
1775 DMWARN("Already has something in rq->special."); 1845 DMWARN("Already has something in rq->special.");
1776 return BLKPREP_KILL; 1846 return BLKPREP_KILL;
1777 } 1847 }
1778 1848
1779 clone = clone_rq(rq, md, GFP_ATOMIC); 1849 tio = prep_tio(rq, md, GFP_ATOMIC);
1780 if (!clone) 1850 if (!tio)
1781 return BLKPREP_DEFER; 1851 return BLKPREP_DEFER;
1782 1852
1783 rq->special = clone; 1853 rq->special = tio;
1784 rq->cmd_flags |= REQ_DONTPREP; 1854 rq->cmd_flags |= REQ_DONTPREP;
1785 1855
1786 return BLKPREP_OK; 1856 return BLKPREP_OK;
@@ -1788,17 +1858,36 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq)
1788 1858
1789/* 1859/*
1790 * Returns: 1860 * Returns:
1791 * 0 : the request has been processed (not requeued) 1861 * 0 : the request has been processed
1792 * !0 : the request has been requeued 1862 * DM_MAPIO_REQUEUE : the original request needs to be requeued
1863 * < 0 : the request was completed due to failure
1793 */ 1864 */
1794static int map_request(struct dm_target *ti, struct request *clone, 1865static int map_request(struct dm_target *ti, struct request *rq,
1795 struct mapped_device *md) 1866 struct mapped_device *md)
1796{ 1867{
1797 int r, requeued = 0; 1868 int r;
1798 struct dm_rq_target_io *tio = clone->end_io_data; 1869 struct dm_rq_target_io *tio = rq->special;
1870 struct request *clone = NULL;
1871
1872 if (tio->clone) {
1873 clone = tio->clone;
1874 r = ti->type->map_rq(ti, clone, &tio->info);
1875 } else {
1876 r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
1877 if (r < 0) {
1878 /* The target wants to complete the I/O */
1879 dm_kill_unmapped_request(rq, r);
1880 return r;
1881 }
1882 if (IS_ERR(clone))
1883 return DM_MAPIO_REQUEUE;
1884 if (setup_clone(clone, rq, tio, GFP_KERNEL)) {
1885 /* -ENOMEM */
1886 ti->type->release_clone_rq(clone);
1887 return DM_MAPIO_REQUEUE;
1888 }
1889 }
1799 1890
1800 tio->ti = ti;
1801 r = ti->type->map_rq(ti, clone, &tio->info);
1802 switch (r) { 1891 switch (r) {
1803 case DM_MAPIO_SUBMITTED: 1892 case DM_MAPIO_SUBMITTED:
1804 /* The target has taken the I/O to submit by itself later */ 1893 /* The target has taken the I/O to submit by itself later */
@@ -1806,13 +1895,12 @@ static int map_request(struct dm_target *ti, struct request *clone,
1806 case DM_MAPIO_REMAPPED: 1895 case DM_MAPIO_REMAPPED:
1807 /* The target has remapped the I/O so dispatch it */ 1896 /* The target has remapped the I/O so dispatch it */
1808 trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)), 1897 trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
1809 blk_rq_pos(tio->orig)); 1898 blk_rq_pos(rq));
1810 dm_dispatch_request(clone); 1899 dm_dispatch_clone_request(clone, rq);
1811 break; 1900 break;
1812 case DM_MAPIO_REQUEUE: 1901 case DM_MAPIO_REQUEUE:
1813 /* The target wants to requeue the I/O */ 1902 /* The target wants to requeue the I/O */
1814 dm_requeue_unmapped_request(clone); 1903 dm_requeue_unmapped_request(clone);
1815 requeued = 1;
1816 break; 1904 break;
1817 default: 1905 default:
1818 if (r > 0) { 1906 if (r > 0) {
@@ -1821,20 +1909,27 @@ static int map_request(struct dm_target *ti, struct request *clone,
1821 } 1909 }
1822 1910
1823 /* The target wants to complete the I/O */ 1911 /* The target wants to complete the I/O */
1824 dm_kill_unmapped_request(clone, r); 1912 dm_kill_unmapped_request(rq, r);
1825 break; 1913 return r;
1826 } 1914 }
1827 1915
1828 return requeued; 1916 return 0;
1829} 1917}
1830 1918
1831static struct request *dm_start_request(struct mapped_device *md, struct request *orig) 1919static void map_tio_request(struct kthread_work *work)
1832{ 1920{
1833 struct request *clone; 1921 struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work);
1922 struct request *rq = tio->orig;
1923 struct mapped_device *md = tio->md;
1834 1924
1925 if (map_request(tio->ti, rq, md) == DM_MAPIO_REQUEUE)
1926 dm_requeue_unmapped_original_request(md, rq);
1927}
1928
1929static void dm_start_request(struct mapped_device *md, struct request *orig)
1930{
1835 blk_start_request(orig); 1931 blk_start_request(orig);
1836 clone = orig->special; 1932 atomic_inc(&md->pending[rq_data_dir(orig)]);
1837 atomic_inc(&md->pending[rq_data_dir(clone)]);
1838 1933
1839 /* 1934 /*
1840 * Hold the md reference here for the in-flight I/O. 1935 * Hold the md reference here for the in-flight I/O.
@@ -1844,8 +1939,6 @@ static struct request *dm_start_request(struct mapped_device *md, struct request
1844 * See the comment in rq_completed() too. 1939 * See the comment in rq_completed() too.
1845 */ 1940 */
1846 dm_get(md); 1941 dm_get(md);
1847
1848 return clone;
1849} 1942}
1850 1943
1851/* 1944/*
@@ -1858,7 +1951,8 @@ static void dm_request_fn(struct request_queue *q)
1858 int srcu_idx; 1951 int srcu_idx;
1859 struct dm_table *map = dm_get_live_table(md, &srcu_idx); 1952 struct dm_table *map = dm_get_live_table(md, &srcu_idx);
1860 struct dm_target *ti; 1953 struct dm_target *ti;
1861 struct request *rq, *clone; 1954 struct request *rq;
1955 struct dm_rq_target_io *tio;
1862 sector_t pos; 1956 sector_t pos;
1863 1957
1864 /* 1958 /*
@@ -1880,34 +1974,29 @@ static void dm_request_fn(struct request_queue *q)
1880 ti = dm_table_find_target(map, pos); 1974 ti = dm_table_find_target(map, pos);
1881 if (!dm_target_is_valid(ti)) { 1975 if (!dm_target_is_valid(ti)) {
1882 /* 1976 /*
1883 * Must perform setup, that dm_done() requires, 1977 * Must perform setup, that rq_completed() requires,
1884 * before calling dm_kill_unmapped_request 1978 * before calling dm_kill_unmapped_request
1885 */ 1979 */
1886 DMERR_LIMIT("request attempted access beyond the end of device"); 1980 DMERR_LIMIT("request attempted access beyond the end of device");
1887 clone = dm_start_request(md, rq); 1981 dm_start_request(md, rq);
1888 dm_kill_unmapped_request(clone, -EIO); 1982 dm_kill_unmapped_request(rq, -EIO);
1889 continue; 1983 continue;
1890 } 1984 }
1891 1985
1892 if (ti->type->busy && ti->type->busy(ti)) 1986 if (ti->type->busy && ti->type->busy(ti))
1893 goto delay_and_out; 1987 goto delay_and_out;
1894 1988
1895 clone = dm_start_request(md, rq); 1989 dm_start_request(md, rq);
1896
1897 spin_unlock(q->queue_lock);
1898 if (map_request(ti, clone, md))
1899 goto requeued;
1900 1990
1991 tio = rq->special;
1992 /* Establish tio->ti before queuing work (map_tio_request) */
1993 tio->ti = ti;
1994 queue_kthread_work(&md->kworker, &tio->work);
1901 BUG_ON(!irqs_disabled()); 1995 BUG_ON(!irqs_disabled());
1902 spin_lock(q->queue_lock);
1903 } 1996 }
1904 1997
1905 goto out; 1998 goto out;
1906 1999
1907requeued:
1908 BUG_ON(!irqs_disabled());
1909 spin_lock(q->queue_lock);
1910
1911delay_and_out: 2000delay_and_out:
1912 blk_delay_queue(q, HZ / 10); 2001 blk_delay_queue(q, HZ / 10);
1913out: 2002out:
@@ -2093,6 +2182,7 @@ static struct mapped_device *alloc_dev(int minor)
2093 INIT_WORK(&md->work, dm_wq_work); 2182 INIT_WORK(&md->work, dm_wq_work);
2094 init_waitqueue_head(&md->eventq); 2183 init_waitqueue_head(&md->eventq);
2095 init_completion(&md->kobj_holder.completion); 2184 init_completion(&md->kobj_holder.completion);
2185 md->kworker_task = NULL;
2096 2186
2097 md->disk->major = _major; 2187 md->disk->major = _major;
2098 md->disk->first_minor = minor; 2188 md->disk->first_minor = minor;
@@ -2153,8 +2243,13 @@ static void free_dev(struct mapped_device *md)
2153 unlock_fs(md); 2243 unlock_fs(md);
2154 bdput(md->bdev); 2244 bdput(md->bdev);
2155 destroy_workqueue(md->wq); 2245 destroy_workqueue(md->wq);
2246
2247 if (md->kworker_task)
2248 kthread_stop(md->kworker_task);
2156 if (md->io_pool) 2249 if (md->io_pool)
2157 mempool_destroy(md->io_pool); 2250 mempool_destroy(md->io_pool);
2251 if (md->rq_pool)
2252 mempool_destroy(md->rq_pool);
2158 if (md->bs) 2253 if (md->bs)
2159 bioset_free(md->bs); 2254 bioset_free(md->bs);
2160 blk_integrity_unregister(md->disk); 2255 blk_integrity_unregister(md->disk);
@@ -2188,23 +2283,24 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
2188 bioset_free(md->bs); 2283 bioset_free(md->bs);
2189 md->bs = p->bs; 2284 md->bs = p->bs;
2190 p->bs = NULL; 2285 p->bs = NULL;
2191 } else if (dm_table_get_type(t) == DM_TYPE_REQUEST_BASED) {
2192 /*
2193 * There's no need to reload with request-based dm
2194 * because the size of front_pad doesn't change.
2195 * Note for future: If you are to reload bioset,
2196 * prep-ed requests in the queue may refer
2197 * to bio from the old bioset, so you must walk
2198 * through the queue to unprep.
2199 */
2200 } 2286 }
2287 /*
2288 * There's no need to reload with request-based dm
2289 * because the size of front_pad doesn't change.
2290 * Note for future: If you are to reload bioset,
2291 * prep-ed requests in the queue may refer
2292 * to bio from the old bioset, so you must walk
2293 * through the queue to unprep.
2294 */
2201 goto out; 2295 goto out;
2202 } 2296 }
2203 2297
2204 BUG_ON(!p || md->io_pool || md->bs); 2298 BUG_ON(!p || md->io_pool || md->rq_pool || md->bs);
2205 2299
2206 md->io_pool = p->io_pool; 2300 md->io_pool = p->io_pool;
2207 p->io_pool = NULL; 2301 p->io_pool = NULL;
2302 md->rq_pool = p->rq_pool;
2303 p->rq_pool = NULL;
2208 md->bs = p->bs; 2304 md->bs = p->bs;
2209 p->bs = NULL; 2305 p->bs = NULL;
2210 2306
@@ -2407,6 +2503,14 @@ unsigned dm_get_md_type(struct mapped_device *md)
2407 return md->type; 2503 return md->type;
2408} 2504}
2409 2505
2506static bool dm_md_type_request_based(struct mapped_device *md)
2507{
2508 unsigned table_type = dm_get_md_type(md);
2509
2510 return (table_type == DM_TYPE_REQUEST_BASED ||
2511 table_type == DM_TYPE_MQ_REQUEST_BASED);
2512}
2513
2410struct target_type *dm_get_immutable_target_type(struct mapped_device *md) 2514struct target_type *dm_get_immutable_target_type(struct mapped_device *md)
2411{ 2515{
2412 return md->immutable_target_type; 2516 return md->immutable_target_type;
@@ -2444,6 +2548,11 @@ static int dm_init_request_based_queue(struct mapped_device *md)
2444 blk_queue_prep_rq(md->queue, dm_prep_fn); 2548 blk_queue_prep_rq(md->queue, dm_prep_fn);
2445 blk_queue_lld_busy(md->queue, dm_lld_busy); 2549 blk_queue_lld_busy(md->queue, dm_lld_busy);
2446 2550
2551 /* Also initialize the request-based DM worker thread */
2552 init_kthread_worker(&md->kworker);
2553 md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker,
2554 "kdmwork-%s", dm_device_name(md));
2555
2447 elv_register_queue(md->queue); 2556 elv_register_queue(md->queue);
2448 2557
2449 return 1; 2558 return 1;
@@ -2454,8 +2563,7 @@ static int dm_init_request_based_queue(struct mapped_device *md)
2454 */ 2563 */
2455int dm_setup_md_queue(struct mapped_device *md) 2564int dm_setup_md_queue(struct mapped_device *md)
2456{ 2565{
2457 if ((dm_get_md_type(md) == DM_TYPE_REQUEST_BASED) && 2566 if (dm_md_type_request_based(md) && !dm_init_request_based_queue(md)) {
2458 !dm_init_request_based_queue(md)) {
2459 DMWARN("Cannot initialize queue for request-based mapped device"); 2567 DMWARN("Cannot initialize queue for request-based mapped device");
2460 return -EINVAL; 2568 return -EINVAL;
2461 } 2569 }
@@ -2534,6 +2642,9 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
2534 set_bit(DMF_FREEING, &md->flags); 2642 set_bit(DMF_FREEING, &md->flags);
2535 spin_unlock(&_minor_lock); 2643 spin_unlock(&_minor_lock);
2536 2644
2645 if (dm_request_based(md))
2646 flush_kthread_worker(&md->kworker);
2647
2537 if (!dm_suspended_md(md)) { 2648 if (!dm_suspended_md(md)) {
2538 dm_table_presuspend_targets(map); 2649 dm_table_presuspend_targets(map);
2539 dm_table_postsuspend_targets(map); 2650 dm_table_postsuspend_targets(map);
@@ -2777,8 +2888,10 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
2777 * Stop md->queue before flushing md->wq in case request-based 2888 * Stop md->queue before flushing md->wq in case request-based
2778 * dm defers requests to md->wq from md->queue. 2889 * dm defers requests to md->wq from md->queue.
2779 */ 2890 */
2780 if (dm_request_based(md)) 2891 if (dm_request_based(md)) {
2781 stop_queue(md->queue); 2892 stop_queue(md->queue);
2893 flush_kthread_worker(&md->kworker);
2894 }
2782 2895
2783 flush_workqueue(md->wq); 2896 flush_workqueue(md->wq);
2784 2897
@@ -3124,24 +3237,35 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, u
3124{ 3237{
3125 struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL); 3238 struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL);
3126 struct kmem_cache *cachep; 3239 struct kmem_cache *cachep;
3127 unsigned int pool_size; 3240 unsigned int pool_size = 0;
3128 unsigned int front_pad; 3241 unsigned int front_pad;
3129 3242
3130 if (!pools) 3243 if (!pools)
3131 return NULL; 3244 return NULL;
3132 3245
3133 if (type == DM_TYPE_BIO_BASED) { 3246 switch (type) {
3247 case DM_TYPE_BIO_BASED:
3134 cachep = _io_cache; 3248 cachep = _io_cache;
3135 pool_size = dm_get_reserved_bio_based_ios(); 3249 pool_size = dm_get_reserved_bio_based_ios();
3136 front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone); 3250 front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
3137 } else if (type == DM_TYPE_REQUEST_BASED) { 3251 break;
3138 cachep = _rq_tio_cache; 3252 case DM_TYPE_REQUEST_BASED:
3139 pool_size = dm_get_reserved_rq_based_ios(); 3253 pool_size = dm_get_reserved_rq_based_ios();
3254 pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache);
3255 if (!pools->rq_pool)
3256 goto out;
3257 /* fall through to setup remaining rq-based pools */
3258 case DM_TYPE_MQ_REQUEST_BASED:
3259 cachep = _rq_tio_cache;
3260 if (!pool_size)
3261 pool_size = dm_get_reserved_rq_based_ios();
3140 front_pad = offsetof(struct dm_rq_clone_bio_info, clone); 3262 front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
3141 /* per_bio_data_size is not used. See __bind_mempools(). */ 3263 /* per_bio_data_size is not used. See __bind_mempools(). */
3142 WARN_ON(per_bio_data_size != 0); 3264 WARN_ON(per_bio_data_size != 0);
3143 } else 3265 break;
3266 default:
3144 goto out; 3267 goto out;
3268 }
3145 3269
3146 pools->io_pool = mempool_create_slab_pool(pool_size, cachep); 3270 pools->io_pool = mempool_create_slab_pool(pool_size, cachep);
3147 if (!pools->io_pool) 3271 if (!pools->io_pool)
@@ -3170,6 +3294,9 @@ void dm_free_md_mempools(struct dm_md_mempools *pools)
3170 if (pools->io_pool) 3294 if (pools->io_pool)
3171 mempool_destroy(pools->io_pool); 3295 mempool_destroy(pools->io_pool);
3172 3296
3297 if (pools->rq_pool)
3298 mempool_destroy(pools->rq_pool);
3299
3173 if (pools->bs) 3300 if (pools->bs)
3174 bioset_free(pools->bs); 3301 bioset_free(pools->bs);
3175 3302
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 84b0f9e4ba6c..59f53e79db82 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -34,9 +34,10 @@
34/* 34/*
35 * Type of table and mapped_device's mempool 35 * Type of table and mapped_device's mempool
36 */ 36 */
37#define DM_TYPE_NONE 0 37#define DM_TYPE_NONE 0
38#define DM_TYPE_BIO_BASED 1 38#define DM_TYPE_BIO_BASED 1
39#define DM_TYPE_REQUEST_BASED 2 39#define DM_TYPE_REQUEST_BASED 2
40#define DM_TYPE_MQ_REQUEST_BASED 3
40 41
41/* 42/*
42 * List of devices that a metadevice uses and should open/close. 43 * List of devices that a metadevice uses and should open/close.
@@ -73,6 +74,7 @@ int dm_table_any_busy_target(struct dm_table *t);
73unsigned dm_table_get_type(struct dm_table *t); 74unsigned dm_table_get_type(struct dm_table *t);
74struct target_type *dm_table_get_immutable_target_type(struct dm_table *t); 75struct target_type *dm_table_get_immutable_target_type(struct dm_table *t);
75bool dm_table_request_based(struct dm_table *t); 76bool dm_table_request_based(struct dm_table *t);
77bool dm_table_mq_request_based(struct dm_table *t);
76void dm_table_free_md_mempools(struct dm_table *t); 78void dm_table_free_md_mempools(struct dm_table *t);
77struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t); 79struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
78 80
@@ -99,7 +101,8 @@ int dm_setup_md_queue(struct mapped_device *md);
99/* 101/*
100 * To check whether the target type is request-based or not (bio-based). 102 * To check whether the target type is request-based or not (bio-based).
101 */ 103 */
102#define dm_target_request_based(t) ((t)->type->map_rq != NULL) 104#define dm_target_request_based(t) (((t)->type->map_rq != NULL) || \
105 ((t)->type->clone_and_map_rq != NULL))
103 106
104/* 107/*
105 * To check whether the target type is a hybrid (capable of being 108 * To check whether the target type is a hybrid (capable of being