aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMike Snitzer <snitzer@redhat.com>2015-03-08 00:51:47 -0500
committerMike Snitzer <snitzer@redhat.com>2015-04-15 12:10:16 -0400
commitbfebd1cdb497a57757c83f5fbf1a29931591e2a4 (patch)
treec4ca9675ad22a45eb6e36b130db0d65d574dd2b3
parent0ce65797a77ee780f62909d3128bf08b9735718b (diff)
dm: add full blk-mq support to request-based DM
Commit e5863d9ad ("dm: allocate requests in target when stacking on blk-mq devices") served as the first step toward fully utilizing blk-mq in request-based DM -- it enabled stacking an old-style (request_fn) request_queue ontop of the underlying blk-mq device(s). That first step didn't improve performance of DM multipath ontop of fast blk-mq devices (e.g. NVMe) because the top-level old-style request_queue was severely limited by the queue_lock. The second step offered here enables stacking a blk-mq request_queue ontop of the underlying blk-mq device(s). This unlocks significant performance gains on fast blk-mq devices, Keith Busch tested on his NVMe testbed and offered this really positive news: "Just providing a performance update. All my fio tests are getting roughly equal performance whether accessed through the raw block device or the multipath device mapper (~470k IOPS). I could only push ~20% of the raw iops through dm before this conversion, so this latest tree is looking really solid from a performance standpoint." Signed-off-by: Mike Snitzer <snitzer@redhat.com> Tested-by: Keith Busch <keith.busch@intel.com>
-rw-r--r--drivers/md/dm-mpath.c2
-rw-r--r--drivers/md/dm-table.c11
-rw-r--r--drivers/md/dm.c317
-rw-r--r--include/uapi/linux/dm-ioctl.h4
4 files changed, 261 insertions, 73 deletions
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index add6391f3f8e..c8f07e5a9a17 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1703,7 +1703,7 @@ out:
1703 *---------------------------------------------------------------*/ 1703 *---------------------------------------------------------------*/
1704static struct target_type multipath_target = { 1704static struct target_type multipath_target = {
1705 .name = "multipath", 1705 .name = "multipath",
1706 .version = {1, 8, 0}, 1706 .version = {1, 9, 0},
1707 .module = THIS_MODULE, 1707 .module = THIS_MODULE,
1708 .ctr = multipath_ctr, 1708 .ctr = multipath_ctr,
1709 .dtr = multipath_dtr, 1709 .dtr = multipath_dtr,
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 057312048b68..66600cab9fa5 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -18,6 +18,7 @@
18#include <linux/mutex.h> 18#include <linux/mutex.h>
19#include <linux/delay.h> 19#include <linux/delay.h>
20#include <linux/atomic.h> 20#include <linux/atomic.h>
21#include <linux/blk-mq.h>
21 22
22#define DM_MSG_PREFIX "table" 23#define DM_MSG_PREFIX "table"
23 24
@@ -1695,9 +1696,13 @@ void dm_table_run_md_queue_async(struct dm_table *t)
1695 md = dm_table_get_md(t); 1696 md = dm_table_get_md(t);
1696 queue = dm_get_md_queue(md); 1697 queue = dm_get_md_queue(md);
1697 if (queue) { 1698 if (queue) {
1698 spin_lock_irqsave(queue->queue_lock, flags); 1699 if (queue->mq_ops)
1699 blk_run_queue_async(queue); 1700 blk_mq_run_hw_queues(queue, true);
1700 spin_unlock_irqrestore(queue->queue_lock, flags); 1701 else {
1702 spin_lock_irqsave(queue->queue_lock, flags);
1703 blk_run_queue_async(queue);
1704 spin_unlock_irqrestore(queue->queue_lock, flags);
1705 }
1701 } 1706 }
1702} 1707}
1703EXPORT_SYMBOL(dm_table_run_md_queue_async); 1708EXPORT_SYMBOL(dm_table_run_md_queue_async);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 5294e016e92b..3a66baac76ed 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -23,6 +23,7 @@
23#include <linux/kthread.h> 23#include <linux/kthread.h>
24#include <linux/ktime.h> 24#include <linux/ktime.h>
25#include <linux/elevator.h> /* for rq_end_sector() */ 25#include <linux/elevator.h> /* for rq_end_sector() */
26#include <linux/blk-mq.h>
26 27
27#include <trace/events/block.h> 28#include <trace/events/block.h>
28 29
@@ -224,6 +225,9 @@ struct mapped_device {
224 int last_rq_rw; 225 int last_rq_rw;
225 sector_t last_rq_pos; 226 sector_t last_rq_pos;
226 ktime_t last_rq_start_time; 227 ktime_t last_rq_start_time;
228
229 /* for blk-mq request-based DM support */
230 struct blk_mq_tag_set tag_set;
227}; 231};
228 232
229/* 233/*
@@ -1025,6 +1029,11 @@ static void end_clone_bio(struct bio *clone, int error)
1025 blk_update_request(tio->orig, 0, nr_bytes); 1029 blk_update_request(tio->orig, 0, nr_bytes);
1026} 1030}
1027 1031
1032static struct dm_rq_target_io *tio_from_request(struct request *rq)
1033{
1034 return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special);
1035}
1036
1028/* 1037/*
1029 * Don't touch any member of the md after calling this function because 1038 * Don't touch any member of the md after calling this function because
1030 * the md may be freed in dm_put() at the end of this function. 1039 * the md may be freed in dm_put() at the end of this function.
@@ -1048,8 +1057,10 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
1048 * queue lock again. 1057 * queue lock again.
1049 */ 1058 */
1050 if (run_queue) { 1059 if (run_queue) {
1051 if (!nr_requests_pending || 1060 if (md->queue->mq_ops)
1052 (nr_requests_pending >= md->queue->nr_congestion_on)) 1061 blk_mq_run_hw_queues(md->queue, true);
1062 else if (!nr_requests_pending ||
1063 (nr_requests_pending >= md->queue->nr_congestion_on))
1053 blk_run_queue_async(md->queue); 1064 blk_run_queue_async(md->queue);
1054 } 1065 }
1055 1066
@@ -1062,13 +1073,17 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
1062static void free_rq_clone(struct request *clone) 1073static void free_rq_clone(struct request *clone)
1063{ 1074{
1064 struct dm_rq_target_io *tio = clone->end_io_data; 1075 struct dm_rq_target_io *tio = clone->end_io_data;
1076 struct mapped_device *md = tio->md;
1065 1077
1066 blk_rq_unprep_clone(clone); 1078 blk_rq_unprep_clone(clone);
1079
1067 if (clone->q && clone->q->mq_ops) 1080 if (clone->q && clone->q->mq_ops)
1068 tio->ti->type->release_clone_rq(clone); 1081 tio->ti->type->release_clone_rq(clone);
1069 else 1082 else
1070 free_clone_request(tio->md, clone); 1083 free_clone_request(md, clone);
1071 free_rq_tio(tio); 1084
1085 if (!md->queue->mq_ops)
1086 free_rq_tio(tio);
1072} 1087}
1073 1088
1074/* 1089/*
@@ -1097,17 +1112,22 @@ static void dm_end_request(struct request *clone, int error)
1097 } 1112 }
1098 1113
1099 free_rq_clone(clone); 1114 free_rq_clone(clone);
1100 blk_end_request_all(rq, error); 1115 if (!rq->q->mq_ops)
1116 blk_end_request_all(rq, error);
1117 else
1118 blk_mq_end_request(rq, error);
1101 rq_completed(md, rw, true); 1119 rq_completed(md, rw, true);
1102} 1120}
1103 1121
1104static void dm_unprep_request(struct request *rq) 1122static void dm_unprep_request(struct request *rq)
1105{ 1123{
1106 struct dm_rq_target_io *tio = rq->special; 1124 struct dm_rq_target_io *tio = tio_from_request(rq);
1107 struct request *clone = tio->clone; 1125 struct request *clone = tio->clone;
1108 1126
1109 rq->special = NULL; 1127 if (!rq->q->mq_ops) {
1110 rq->cmd_flags &= ~REQ_DONTPREP; 1128 rq->special = NULL;
1129 rq->cmd_flags &= ~REQ_DONTPREP;
1130 }
1111 1131
1112 if (clone) 1132 if (clone)
1113 free_rq_clone(clone); 1133 free_rq_clone(clone);
@@ -1116,18 +1136,29 @@ static void dm_unprep_request(struct request *rq)
1116/* 1136/*
1117 * Requeue the original request of a clone. 1137 * Requeue the original request of a clone.
1118 */ 1138 */
1119static void dm_requeue_unmapped_original_request(struct mapped_device *md, 1139static void old_requeue_request(struct request *rq)
1120 struct request *rq)
1121{ 1140{
1122 int rw = rq_data_dir(rq);
1123 struct request_queue *q = rq->q; 1141 struct request_queue *q = rq->q;
1124 unsigned long flags; 1142 unsigned long flags;
1125 1143
1126 dm_unprep_request(rq);
1127
1128 spin_lock_irqsave(q->queue_lock, flags); 1144 spin_lock_irqsave(q->queue_lock, flags);
1129 blk_requeue_request(q, rq); 1145 blk_requeue_request(q, rq);
1130 spin_unlock_irqrestore(q->queue_lock, flags); 1146 spin_unlock_irqrestore(q->queue_lock, flags);
1147}
1148
1149static void dm_requeue_unmapped_original_request(struct mapped_device *md,
1150 struct request *rq)
1151{
1152 int rw = rq_data_dir(rq);
1153
1154 dm_unprep_request(rq);
1155
1156 if (!rq->q->mq_ops)
1157 old_requeue_request(rq);
1158 else {
1159 blk_mq_requeue_request(rq);
1160 blk_mq_kick_requeue_list(rq->q);
1161 }
1131 1162
1132 rq_completed(md, rw, false); 1163 rq_completed(md, rw, false);
1133} 1164}
@@ -1139,35 +1170,44 @@ static void dm_requeue_unmapped_request(struct request *clone)
1139 dm_requeue_unmapped_original_request(tio->md, tio->orig); 1170 dm_requeue_unmapped_original_request(tio->md, tio->orig);
1140} 1171}
1141 1172
1142static void __stop_queue(struct request_queue *q) 1173static void old_stop_queue(struct request_queue *q)
1143{
1144 blk_stop_queue(q);
1145}
1146
1147static void stop_queue(struct request_queue *q)
1148{ 1174{
1149 unsigned long flags; 1175 unsigned long flags;
1150 1176
1177 if (blk_queue_stopped(q))
1178 return;
1179
1151 spin_lock_irqsave(q->queue_lock, flags); 1180 spin_lock_irqsave(q->queue_lock, flags);
1152 __stop_queue(q); 1181 blk_stop_queue(q);
1153 spin_unlock_irqrestore(q->queue_lock, flags); 1182 spin_unlock_irqrestore(q->queue_lock, flags);
1154} 1183}
1155 1184
1156static void __start_queue(struct request_queue *q) 1185static void stop_queue(struct request_queue *q)
1157{ 1186{
1158 if (blk_queue_stopped(q)) 1187 if (!q->mq_ops)
1159 blk_start_queue(q); 1188 old_stop_queue(q);
1189 else
1190 blk_mq_stop_hw_queues(q);
1160} 1191}
1161 1192
1162static void start_queue(struct request_queue *q) 1193static void old_start_queue(struct request_queue *q)
1163{ 1194{
1164 unsigned long flags; 1195 unsigned long flags;
1165 1196
1166 spin_lock_irqsave(q->queue_lock, flags); 1197 spin_lock_irqsave(q->queue_lock, flags);
1167 __start_queue(q); 1198 if (blk_queue_stopped(q))
1199 blk_start_queue(q);
1168 spin_unlock_irqrestore(q->queue_lock, flags); 1200 spin_unlock_irqrestore(q->queue_lock, flags);
1169} 1201}
1170 1202
1203static void start_queue(struct request_queue *q)
1204{
1205 if (!q->mq_ops)
1206 old_start_queue(q);
1207 else
1208 blk_mq_start_stopped_hw_queues(q, true);
1209}
1210
1171static void dm_done(struct request *clone, int error, bool mapped) 1211static void dm_done(struct request *clone, int error, bool mapped)
1172{ 1212{
1173 int r = error; 1213 int r = error;
@@ -1206,13 +1246,20 @@ static void dm_done(struct request *clone, int error, bool mapped)
1206static void dm_softirq_done(struct request *rq) 1246static void dm_softirq_done(struct request *rq)
1207{ 1247{
1208 bool mapped = true; 1248 bool mapped = true;
1209 struct dm_rq_target_io *tio = rq->special; 1249 struct dm_rq_target_io *tio = tio_from_request(rq);
1210 struct request *clone = tio->clone; 1250 struct request *clone = tio->clone;
1251 int rw;
1211 1252
1212 if (!clone) { 1253 if (!clone) {
1213 blk_end_request_all(rq, tio->error); 1254 rw = rq_data_dir(rq);
1214 rq_completed(tio->md, rq_data_dir(rq), false); 1255 if (!rq->q->mq_ops) {
1215 free_rq_tio(tio); 1256 blk_end_request_all(rq, tio->error);
1257 rq_completed(tio->md, rw, false);
1258 free_rq_tio(tio);
1259 } else {
1260 blk_mq_end_request(rq, tio->error);
1261 rq_completed(tio->md, rw, false);
1262 }
1216 return; 1263 return;
1217 } 1264 }
1218 1265
@@ -1228,7 +1275,7 @@ static void dm_softirq_done(struct request *rq)
1228 */ 1275 */
1229static void dm_complete_request(struct request *rq, int error) 1276static void dm_complete_request(struct request *rq, int error)
1230{ 1277{
1231 struct dm_rq_target_io *tio = rq->special; 1278 struct dm_rq_target_io *tio = tio_from_request(rq);
1232 1279
1233 tio->error = error; 1280 tio->error = error;
1234 blk_complete_request(rq); 1281 blk_complete_request(rq);
@@ -1247,7 +1294,7 @@ static void dm_kill_unmapped_request(struct request *rq, int error)
1247} 1294}
1248 1295
1249/* 1296/*
1250 * Called with the clone's queue lock held 1297 * Called with the clone's queue lock held (for non-blk-mq)
1251 */ 1298 */
1252static void end_clone_request(struct request *clone, int error) 1299static void end_clone_request(struct request *clone, int error)
1253{ 1300{
@@ -1808,6 +1855,18 @@ static struct request *clone_rq(struct request *rq, struct mapped_device *md,
1808 1855
1809static void map_tio_request(struct kthread_work *work); 1856static void map_tio_request(struct kthread_work *work);
1810 1857
1858static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
1859 struct mapped_device *md)
1860{
1861 tio->md = md;
1862 tio->ti = NULL;
1863 tio->clone = NULL;
1864 tio->orig = rq;
1865 tio->error = 0;
1866 memset(&tio->info, 0, sizeof(tio->info));
1867 init_kthread_work(&tio->work, map_tio_request);
1868}
1869
1811static struct dm_rq_target_io *prep_tio(struct request *rq, 1870static struct dm_rq_target_io *prep_tio(struct request *rq,
1812 struct mapped_device *md, gfp_t gfp_mask) 1871 struct mapped_device *md, gfp_t gfp_mask)
1813{ 1872{
@@ -1819,13 +1878,7 @@ static struct dm_rq_target_io *prep_tio(struct request *rq,
1819 if (!tio) 1878 if (!tio)
1820 return NULL; 1879 return NULL;
1821 1880
1822 tio->md = md; 1881 init_tio(tio, rq, md);
1823 tio->ti = NULL;
1824 tio->clone = NULL;
1825 tio->orig = rq;
1826 tio->error = 0;
1827 memset(&tio->info, 0, sizeof(tio->info));
1828 init_kthread_work(&tio->work, map_tio_request);
1829 1882
1830 table = dm_get_live_table(md, &srcu_idx); 1883 table = dm_get_live_table(md, &srcu_idx);
1831 if (!dm_table_mq_request_based(table)) { 1884 if (!dm_table_mq_request_based(table)) {
@@ -1869,11 +1922,11 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq)
1869 * DM_MAPIO_REQUEUE : the original request needs to be requeued 1922 * DM_MAPIO_REQUEUE : the original request needs to be requeued
1870 * < 0 : the request was completed due to failure 1923 * < 0 : the request was completed due to failure
1871 */ 1924 */
1872static int map_request(struct dm_target *ti, struct request *rq, 1925static int map_request(struct dm_rq_target_io *tio, struct request *rq,
1873 struct mapped_device *md) 1926 struct mapped_device *md)
1874{ 1927{
1875 int r; 1928 int r;
1876 struct dm_rq_target_io *tio = rq->special; 1929 struct dm_target *ti = tio->ti;
1877 struct request *clone = NULL; 1930 struct request *clone = NULL;
1878 1931
1879 if (tio->clone) { 1932 if (tio->clone) {
@@ -1888,7 +1941,7 @@ static int map_request(struct dm_target *ti, struct request *rq,
1888 } 1941 }
1889 if (IS_ERR(clone)) 1942 if (IS_ERR(clone))
1890 return DM_MAPIO_REQUEUE; 1943 return DM_MAPIO_REQUEUE;
1891 if (setup_clone(clone, rq, tio, GFP_KERNEL)) { 1944 if (setup_clone(clone, rq, tio, GFP_NOIO)) {
1892 /* -ENOMEM */ 1945 /* -ENOMEM */
1893 ti->type->release_clone_rq(clone); 1946 ti->type->release_clone_rq(clone);
1894 return DM_MAPIO_REQUEUE; 1947 return DM_MAPIO_REQUEUE;
@@ -1929,13 +1982,16 @@ static void map_tio_request(struct kthread_work *work)
1929 struct request *rq = tio->orig; 1982 struct request *rq = tio->orig;
1930 struct mapped_device *md = tio->md; 1983 struct mapped_device *md = tio->md;
1931 1984
1932 if (map_request(tio->ti, rq, md) == DM_MAPIO_REQUEUE) 1985 if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
1933 dm_requeue_unmapped_original_request(md, rq); 1986 dm_requeue_unmapped_original_request(md, rq);
1934} 1987}
1935 1988
1936static void dm_start_request(struct mapped_device *md, struct request *orig) 1989static void dm_start_request(struct mapped_device *md, struct request *orig)
1937{ 1990{
1938 blk_start_request(orig); 1991 if (!orig->q->mq_ops)
1992 blk_start_request(orig);
1993 else
1994 blk_mq_start_request(orig);
1939 atomic_inc(&md->pending[rq_data_dir(orig)]); 1995 atomic_inc(&md->pending[rq_data_dir(orig)]);
1940 1996
1941 if (md->seq_rq_merge_deadline_usecs) { 1997 if (md->seq_rq_merge_deadline_usecs) {
@@ -2045,7 +2101,7 @@ static void dm_request_fn(struct request_queue *q)
2045 2101
2046 dm_start_request(md, rq); 2102 dm_start_request(md, rq);
2047 2103
2048 tio = rq->special; 2104 tio = tio_from_request(rq);
2049 /* Establish tio->ti before queuing work (map_tio_request) */ 2105 /* Establish tio->ti before queuing work (map_tio_request) */
2050 tio->ti = ti; 2106 tio->ti = ti;
2051 queue_kthread_work(&md->kworker, &tio->work); 2107 queue_kthread_work(&md->kworker, &tio->work);
@@ -2142,7 +2198,7 @@ static void dm_init_md_queue(struct mapped_device *md)
2142{ 2198{
2143 /* 2199 /*
2144 * Request-based dm devices cannot be stacked on top of bio-based dm 2200 * Request-based dm devices cannot be stacked on top of bio-based dm
2145 * devices. The type of this dm device has not been decided yet. 2201 * devices. The type of this dm device may not have been decided yet.
2146 * The type is decided at the first table loading time. 2202 * The type is decided at the first table loading time.
2147 * To prevent problematic device stacking, clear the queue flag 2203 * To prevent problematic device stacking, clear the queue flag
2148 * for request stacking support until then. 2204 * for request stacking support until then.
@@ -2150,7 +2206,15 @@ static void dm_init_md_queue(struct mapped_device *md)
2150 * This queue is new, so no concurrency on the queue_flags. 2206 * This queue is new, so no concurrency on the queue_flags.
2151 */ 2207 */
2152 queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue); 2208 queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue);
2209}
2210
2211static void dm_init_old_md_queue(struct mapped_device *md)
2212{
2213 dm_init_md_queue(md);
2153 2214
2215 /*
2216 * Initialize aspects of queue that aren't relevant for blk-mq
2217 */
2154 md->queue->queuedata = md; 2218 md->queue->queuedata = md;
2155 md->queue->backing_dev_info.congested_fn = dm_any_congested; 2219 md->queue->backing_dev_info.congested_fn = dm_any_congested;
2156 md->queue->backing_dev_info.congested_data = md; 2220 md->queue->backing_dev_info.congested_data = md;
@@ -2273,6 +2337,7 @@ static void unlock_fs(struct mapped_device *md);
2273static void free_dev(struct mapped_device *md) 2337static void free_dev(struct mapped_device *md)
2274{ 2338{
2275 int minor = MINOR(disk_devt(md->disk)); 2339 int minor = MINOR(disk_devt(md->disk));
2340 bool using_blk_mq = !!md->queue->mq_ops;
2276 2341
2277 unlock_fs(md); 2342 unlock_fs(md);
2278 destroy_workqueue(md->wq); 2343 destroy_workqueue(md->wq);
@@ -2298,6 +2363,8 @@ static void free_dev(struct mapped_device *md)
2298 del_gendisk(md->disk); 2363 del_gendisk(md->disk);
2299 put_disk(md->disk); 2364 put_disk(md->disk);
2300 blk_cleanup_queue(md->queue); 2365 blk_cleanup_queue(md->queue);
2366 if (using_blk_mq)
2367 blk_mq_free_tag_set(&md->tag_set);
2301 bdput(md->bdev); 2368 bdput(md->bdev);
2302 free_minor(minor); 2369 free_minor(minor);
2303 2370
@@ -2457,7 +2524,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
2457 * This must be done before setting the queue restrictions, 2524 * This must be done before setting the queue restrictions,
2458 * because request-based dm may be run just after the setting. 2525 * because request-based dm may be run just after the setting.
2459 */ 2526 */
2460 if (dm_table_request_based(t) && !blk_queue_stopped(q)) 2527 if (dm_table_request_based(t))
2461 stop_queue(q); 2528 stop_queue(q);
2462 2529
2463 __bind_mempools(md, t); 2530 __bind_mempools(md, t);
@@ -2539,14 +2606,6 @@ unsigned dm_get_md_type(struct mapped_device *md)
2539 return md->type; 2606 return md->type;
2540} 2607}
2541 2608
2542static bool dm_md_type_request_based(struct mapped_device *md)
2543{
2544 unsigned table_type = dm_get_md_type(md);
2545
2546 return (table_type == DM_TYPE_REQUEST_BASED ||
2547 table_type == DM_TYPE_MQ_REQUEST_BASED);
2548}
2549
2550struct target_type *dm_get_immutable_target_type(struct mapped_device *md) 2609struct target_type *dm_get_immutable_target_type(struct mapped_device *md)
2551{ 2610{
2552 return md->immutable_target_type; 2611 return md->immutable_target_type;
@@ -2563,6 +2622,14 @@ struct queue_limits *dm_get_queue_limits(struct mapped_device *md)
2563} 2622}
2564EXPORT_SYMBOL_GPL(dm_get_queue_limits); 2623EXPORT_SYMBOL_GPL(dm_get_queue_limits);
2565 2624
2625static void init_rq_based_worker_thread(struct mapped_device *md)
2626{
2627 /* Initialize the request-based DM worker thread */
2628 init_kthread_worker(&md->kworker);
2629 md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker,
2630 "kdmwork-%s", dm_device_name(md));
2631}
2632
2566/* 2633/*
2567 * Fully initialize a request-based queue (->elevator, ->request_fn, etc). 2634 * Fully initialize a request-based queue (->elevator, ->request_fn, etc).
2568 */ 2635 */
@@ -2571,29 +2638,131 @@ static int dm_init_request_based_queue(struct mapped_device *md)
2571 struct request_queue *q = NULL; 2638 struct request_queue *q = NULL;
2572 2639
2573 if (md->queue->elevator) 2640 if (md->queue->elevator)
2574 return 1; 2641 return 0;
2575 2642
2576 /* Fully initialize the queue */ 2643 /* Fully initialize the queue */
2577 q = blk_init_allocated_queue(md->queue, dm_request_fn, NULL); 2644 q = blk_init_allocated_queue(md->queue, dm_request_fn, NULL);
2578 if (!q) 2645 if (!q)
2579 return 0; 2646 return -EINVAL;
2580 2647
2581 /* disable dm_request_fn's merge heuristic by default */ 2648 /* disable dm_request_fn's merge heuristic by default */
2582 md->seq_rq_merge_deadline_usecs = 0; 2649 md->seq_rq_merge_deadline_usecs = 0;
2583 2650
2584 md->queue = q; 2651 md->queue = q;
2585 dm_init_md_queue(md); 2652 dm_init_old_md_queue(md);
2586 blk_queue_softirq_done(md->queue, dm_softirq_done); 2653 blk_queue_softirq_done(md->queue, dm_softirq_done);
2587 blk_queue_prep_rq(md->queue, dm_prep_fn); 2654 blk_queue_prep_rq(md->queue, dm_prep_fn);
2588 2655
2589 /* Also initialize the request-based DM worker thread */ 2656 init_rq_based_worker_thread(md);
2590 init_kthread_worker(&md->kworker);
2591 md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker,
2592 "kdmwork-%s", dm_device_name(md));
2593 2657
2594 elv_register_queue(md->queue); 2658 elv_register_queue(md->queue);
2595 2659
2596 return 1; 2660 return 0;
2661}
2662
2663static int dm_mq_init_request(void *data, struct request *rq,
2664 unsigned int hctx_idx, unsigned int request_idx,
2665 unsigned int numa_node)
2666{
2667 struct mapped_device *md = data;
2668 struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
2669
2670 /*
2671 * Must initialize md member of tio, otherwise it won't
2672 * be available in dm_mq_queue_rq.
2673 */
2674 tio->md = md;
2675
2676 return 0;
2677}
2678
2679static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
2680 const struct blk_mq_queue_data *bd)
2681{
2682 struct request *rq = bd->rq;
2683 struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
2684 struct mapped_device *md = tio->md;
2685 int srcu_idx;
2686 struct dm_table *map = dm_get_live_table(md, &srcu_idx);
2687 struct dm_target *ti;
2688 sector_t pos;
2689
2690 /* always use block 0 to find the target for flushes for now */
2691 pos = 0;
2692 if (!(rq->cmd_flags & REQ_FLUSH))
2693 pos = blk_rq_pos(rq);
2694
2695 ti = dm_table_find_target(map, pos);
2696 if (!dm_target_is_valid(ti)) {
2697 dm_put_live_table(md, srcu_idx);
2698 DMERR_LIMIT("request attempted access beyond the end of device");
2699 /*
2700 * Must perform setup, that rq_completed() requires,
2701 * before returning BLK_MQ_RQ_QUEUE_ERROR
2702 */
2703 dm_start_request(md, rq);
2704 return BLK_MQ_RQ_QUEUE_ERROR;
2705 }
2706 dm_put_live_table(md, srcu_idx);
2707
2708 if (ti->type->busy && ti->type->busy(ti))
2709 return BLK_MQ_RQ_QUEUE_BUSY;
2710
2711 dm_start_request(md, rq);
2712
2713 /* Init tio using md established in .init_request */
2714 init_tio(tio, rq, md);
2715
2716 /* Establish tio->ti before queuing work (map_tio_request) */
2717 tio->ti = ti;
2718 queue_kthread_work(&md->kworker, &tio->work);
2719
2720 return BLK_MQ_RQ_QUEUE_OK;
2721}
2722
2723static struct blk_mq_ops dm_mq_ops = {
2724 .queue_rq = dm_mq_queue_rq,
2725 .map_queue = blk_mq_map_queue,
2726 .complete = dm_softirq_done,
2727 .init_request = dm_mq_init_request,
2728};
2729
2730static int dm_init_request_based_blk_mq_queue(struct mapped_device *md)
2731{
2732 struct request_queue *q;
2733 int err;
2734
2735 memset(&md->tag_set, 0, sizeof(md->tag_set));
2736 md->tag_set.ops = &dm_mq_ops;
2737 md->tag_set.queue_depth = BLKDEV_MAX_RQ;
2738 md->tag_set.numa_node = NUMA_NO_NODE;
2739 md->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
2740 md->tag_set.nr_hw_queues = 1;
2741 md->tag_set.cmd_size = sizeof(struct dm_rq_target_io);
2742 md->tag_set.driver_data = md;
2743
2744 err = blk_mq_alloc_tag_set(&md->tag_set);
2745 if (err)
2746 return err;
2747
2748 q = blk_mq_init_allocated_queue(&md->tag_set, md->queue);
2749 if (IS_ERR(q)) {
2750 err = PTR_ERR(q);
2751 goto out_tag_set;
2752 }
2753 md->queue = q;
2754 dm_init_md_queue(md);
2755
2756 /* backfill 'mq' sysfs registration normally done in blk_register_queue */
2757 blk_mq_register_disk(md->disk);
2758
2759 init_rq_based_worker_thread(md);
2760
2761 return 0;
2762
2763out_tag_set:
2764 blk_mq_free_tag_set(&md->tag_set);
2765 return err;
2597} 2766}
2598 2767
2599/* 2768/*
@@ -2601,15 +2770,29 @@ static int dm_init_request_based_queue(struct mapped_device *md)
2601 */ 2770 */
2602int dm_setup_md_queue(struct mapped_device *md) 2771int dm_setup_md_queue(struct mapped_device *md)
2603{ 2772{
2604 if (dm_md_type_request_based(md)) { 2773 int r;
2605 if (!dm_init_request_based_queue(md)) { 2774 unsigned md_type = dm_get_md_type(md);
2775
2776 switch (md_type) {
2777 case DM_TYPE_REQUEST_BASED:
2778 r = dm_init_request_based_queue(md);
2779 if (r) {
2606 DMWARN("Cannot initialize queue for request-based mapped device"); 2780 DMWARN("Cannot initialize queue for request-based mapped device");
2607 return -EINVAL; 2781 return r;
2608 } 2782 }
2609 } else { 2783 break;
2610 /* bio-based specific initialization */ 2784 case DM_TYPE_MQ_REQUEST_BASED:
2785 r = dm_init_request_based_blk_mq_queue(md);
2786 if (r) {
2787 DMWARN("Cannot initialize queue for request-based blk-mq mapped device");
2788 return r;
2789 }
2790 break;
2791 case DM_TYPE_BIO_BASED:
2792 dm_init_old_md_queue(md);
2611 blk_queue_make_request(md->queue, dm_make_request); 2793 blk_queue_make_request(md->queue, dm_make_request);
2612 blk_queue_merge_bvec(md->queue, dm_merge_bvec); 2794 blk_queue_merge_bvec(md->queue, dm_merge_bvec);
2795 break;
2613 } 2796 }
2614 2797
2615 return 0; 2798 return 0;
diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
index 889f3a5b7b18..eac8c3641f39 100644
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -267,9 +267,9 @@ enum {
267#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) 267#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
268 268
269#define DM_VERSION_MAJOR 4 269#define DM_VERSION_MAJOR 4
270#define DM_VERSION_MINOR 30 270#define DM_VERSION_MINOR 31
271#define DM_VERSION_PATCHLEVEL 0 271#define DM_VERSION_PATCHLEVEL 0
272#define DM_VERSION_EXTRA "-ioctl (2014-12-22)" 272#define DM_VERSION_EXTRA "-ioctl (2015-3-12)"
273 273
274/* Status bits */ 274/* Status bits */
275#define DM_READONLY_FLAG (1 << 0) /* In/Out */ 275#define DM_READONLY_FLAG (1 << 0) /* In/Out */