diff options
| author | Mike Snitzer <snitzer@redhat.com> | 2015-03-08 00:51:47 -0500 |
|---|---|---|
| committer | Mike Snitzer <snitzer@redhat.com> | 2015-04-15 12:10:16 -0400 |
| commit | bfebd1cdb497a57757c83f5fbf1a29931591e2a4 (patch) | |
| tree | c4ca9675ad22a45eb6e36b130db0d65d574dd2b3 | |
| parent | 0ce65797a77ee780f62909d3128bf08b9735718b (diff) | |
dm: add full blk-mq support to request-based DM
Commit e5863d9ad ("dm: allocate requests in target when stacking on
blk-mq devices") served as the first step toward fully utilizing blk-mq
in request-based DM -- it enabled stacking an old-style (request_fn)
request_queue ontop of the underlying blk-mq device(s). That first step
didn't improve performance of DM multipath ontop of fast blk-mq devices
(e.g. NVMe) because the top-level old-style request_queue was severely
limited by the queue_lock.
The second step offered here enables stacking a blk-mq request_queue
ontop of the underlying blk-mq device(s). This unlocks significant
performance gains on fast blk-mq devices, Keith Busch tested on his NVMe
testbed and offered this really positive news:
"Just providing a performance update. All my fio tests are getting
roughly equal performance whether accessed through the raw block
device or the multipath device mapper (~470k IOPS). I could only push
~20% of the raw iops through dm before this conversion, so this latest
tree is looking really solid from a performance standpoint."
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Tested-by: Keith Busch <keith.busch@intel.com>
| -rw-r--r-- | drivers/md/dm-mpath.c | 2 | ||||
| -rw-r--r-- | drivers/md/dm-table.c | 11 | ||||
| -rw-r--r-- | drivers/md/dm.c | 317 | ||||
| -rw-r--r-- | include/uapi/linux/dm-ioctl.h | 4 |
4 files changed, 261 insertions, 73 deletions
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index add6391f3f8e..c8f07e5a9a17 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c | |||
| @@ -1703,7 +1703,7 @@ out: | |||
| 1703 | *---------------------------------------------------------------*/ | 1703 | *---------------------------------------------------------------*/ |
| 1704 | static struct target_type multipath_target = { | 1704 | static struct target_type multipath_target = { |
| 1705 | .name = "multipath", | 1705 | .name = "multipath", |
| 1706 | .version = {1, 8, 0}, | 1706 | .version = {1, 9, 0}, |
| 1707 | .module = THIS_MODULE, | 1707 | .module = THIS_MODULE, |
| 1708 | .ctr = multipath_ctr, | 1708 | .ctr = multipath_ctr, |
| 1709 | .dtr = multipath_dtr, | 1709 | .dtr = multipath_dtr, |
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 057312048b68..66600cab9fa5 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #include <linux/mutex.h> | 18 | #include <linux/mutex.h> |
| 19 | #include <linux/delay.h> | 19 | #include <linux/delay.h> |
| 20 | #include <linux/atomic.h> | 20 | #include <linux/atomic.h> |
| 21 | #include <linux/blk-mq.h> | ||
| 21 | 22 | ||
| 22 | #define DM_MSG_PREFIX "table" | 23 | #define DM_MSG_PREFIX "table" |
| 23 | 24 | ||
| @@ -1695,9 +1696,13 @@ void dm_table_run_md_queue_async(struct dm_table *t) | |||
| 1695 | md = dm_table_get_md(t); | 1696 | md = dm_table_get_md(t); |
| 1696 | queue = dm_get_md_queue(md); | 1697 | queue = dm_get_md_queue(md); |
| 1697 | if (queue) { | 1698 | if (queue) { |
| 1698 | spin_lock_irqsave(queue->queue_lock, flags); | 1699 | if (queue->mq_ops) |
| 1699 | blk_run_queue_async(queue); | 1700 | blk_mq_run_hw_queues(queue, true); |
| 1700 | spin_unlock_irqrestore(queue->queue_lock, flags); | 1701 | else { |
| 1702 | spin_lock_irqsave(queue->queue_lock, flags); | ||
| 1703 | blk_run_queue_async(queue); | ||
| 1704 | spin_unlock_irqrestore(queue->queue_lock, flags); | ||
| 1705 | } | ||
| 1701 | } | 1706 | } |
| 1702 | } | 1707 | } |
| 1703 | EXPORT_SYMBOL(dm_table_run_md_queue_async); | 1708 | EXPORT_SYMBOL(dm_table_run_md_queue_async); |
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 5294e016e92b..3a66baac76ed 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
| @@ -23,6 +23,7 @@ | |||
| 23 | #include <linux/kthread.h> | 23 | #include <linux/kthread.h> |
| 24 | #include <linux/ktime.h> | 24 | #include <linux/ktime.h> |
| 25 | #include <linux/elevator.h> /* for rq_end_sector() */ | 25 | #include <linux/elevator.h> /* for rq_end_sector() */ |
| 26 | #include <linux/blk-mq.h> | ||
| 26 | 27 | ||
| 27 | #include <trace/events/block.h> | 28 | #include <trace/events/block.h> |
| 28 | 29 | ||
| @@ -224,6 +225,9 @@ struct mapped_device { | |||
| 224 | int last_rq_rw; | 225 | int last_rq_rw; |
| 225 | sector_t last_rq_pos; | 226 | sector_t last_rq_pos; |
| 226 | ktime_t last_rq_start_time; | 227 | ktime_t last_rq_start_time; |
| 228 | |||
| 229 | /* for blk-mq request-based DM support */ | ||
| 230 | struct blk_mq_tag_set tag_set; | ||
| 227 | }; | 231 | }; |
| 228 | 232 | ||
| 229 | /* | 233 | /* |
| @@ -1025,6 +1029,11 @@ static void end_clone_bio(struct bio *clone, int error) | |||
| 1025 | blk_update_request(tio->orig, 0, nr_bytes); | 1029 | blk_update_request(tio->orig, 0, nr_bytes); |
| 1026 | } | 1030 | } |
| 1027 | 1031 | ||
| 1032 | static struct dm_rq_target_io *tio_from_request(struct request *rq) | ||
| 1033 | { | ||
| 1034 | return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special); | ||
| 1035 | } | ||
| 1036 | |||
| 1028 | /* | 1037 | /* |
| 1029 | * Don't touch any member of the md after calling this function because | 1038 | * Don't touch any member of the md after calling this function because |
| 1030 | * the md may be freed in dm_put() at the end of this function. | 1039 | * the md may be freed in dm_put() at the end of this function. |
| @@ -1048,8 +1057,10 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) | |||
| 1048 | * queue lock again. | 1057 | * queue lock again. |
| 1049 | */ | 1058 | */ |
| 1050 | if (run_queue) { | 1059 | if (run_queue) { |
| 1051 | if (!nr_requests_pending || | 1060 | if (md->queue->mq_ops) |
| 1052 | (nr_requests_pending >= md->queue->nr_congestion_on)) | 1061 | blk_mq_run_hw_queues(md->queue, true); |
| 1062 | else if (!nr_requests_pending || | ||
| 1063 | (nr_requests_pending >= md->queue->nr_congestion_on)) | ||
| 1053 | blk_run_queue_async(md->queue); | 1064 | blk_run_queue_async(md->queue); |
| 1054 | } | 1065 | } |
| 1055 | 1066 | ||
| @@ -1062,13 +1073,17 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) | |||
| 1062 | static void free_rq_clone(struct request *clone) | 1073 | static void free_rq_clone(struct request *clone) |
| 1063 | { | 1074 | { |
| 1064 | struct dm_rq_target_io *tio = clone->end_io_data; | 1075 | struct dm_rq_target_io *tio = clone->end_io_data; |
| 1076 | struct mapped_device *md = tio->md; | ||
| 1065 | 1077 | ||
| 1066 | blk_rq_unprep_clone(clone); | 1078 | blk_rq_unprep_clone(clone); |
| 1079 | |||
| 1067 | if (clone->q && clone->q->mq_ops) | 1080 | if (clone->q && clone->q->mq_ops) |
| 1068 | tio->ti->type->release_clone_rq(clone); | 1081 | tio->ti->type->release_clone_rq(clone); |
| 1069 | else | 1082 | else |
| 1070 | free_clone_request(tio->md, clone); | 1083 | free_clone_request(md, clone); |
| 1071 | free_rq_tio(tio); | 1084 | |
| 1085 | if (!md->queue->mq_ops) | ||
| 1086 | free_rq_tio(tio); | ||
| 1072 | } | 1087 | } |
| 1073 | 1088 | ||
| 1074 | /* | 1089 | /* |
| @@ -1097,17 +1112,22 @@ static void dm_end_request(struct request *clone, int error) | |||
| 1097 | } | 1112 | } |
| 1098 | 1113 | ||
| 1099 | free_rq_clone(clone); | 1114 | free_rq_clone(clone); |
| 1100 | blk_end_request_all(rq, error); | 1115 | if (!rq->q->mq_ops) |
| 1116 | blk_end_request_all(rq, error); | ||
| 1117 | else | ||
| 1118 | blk_mq_end_request(rq, error); | ||
| 1101 | rq_completed(md, rw, true); | 1119 | rq_completed(md, rw, true); |
| 1102 | } | 1120 | } |
| 1103 | 1121 | ||
| 1104 | static void dm_unprep_request(struct request *rq) | 1122 | static void dm_unprep_request(struct request *rq) |
| 1105 | { | 1123 | { |
| 1106 | struct dm_rq_target_io *tio = rq->special; | 1124 | struct dm_rq_target_io *tio = tio_from_request(rq); |
| 1107 | struct request *clone = tio->clone; | 1125 | struct request *clone = tio->clone; |
| 1108 | 1126 | ||
| 1109 | rq->special = NULL; | 1127 | if (!rq->q->mq_ops) { |
| 1110 | rq->cmd_flags &= ~REQ_DONTPREP; | 1128 | rq->special = NULL; |
| 1129 | rq->cmd_flags &= ~REQ_DONTPREP; | ||
| 1130 | } | ||
| 1111 | 1131 | ||
| 1112 | if (clone) | 1132 | if (clone) |
| 1113 | free_rq_clone(clone); | 1133 | free_rq_clone(clone); |
| @@ -1116,18 +1136,29 @@ static void dm_unprep_request(struct request *rq) | |||
| 1116 | /* | 1136 | /* |
| 1117 | * Requeue the original request of a clone. | 1137 | * Requeue the original request of a clone. |
| 1118 | */ | 1138 | */ |
| 1119 | static void dm_requeue_unmapped_original_request(struct mapped_device *md, | 1139 | static void old_requeue_request(struct request *rq) |
| 1120 | struct request *rq) | ||
| 1121 | { | 1140 | { |
| 1122 | int rw = rq_data_dir(rq); | ||
| 1123 | struct request_queue *q = rq->q; | 1141 | struct request_queue *q = rq->q; |
| 1124 | unsigned long flags; | 1142 | unsigned long flags; |
| 1125 | 1143 | ||
| 1126 | dm_unprep_request(rq); | ||
| 1127 | |||
| 1128 | spin_lock_irqsave(q->queue_lock, flags); | 1144 | spin_lock_irqsave(q->queue_lock, flags); |
| 1129 | blk_requeue_request(q, rq); | 1145 | blk_requeue_request(q, rq); |
| 1130 | spin_unlock_irqrestore(q->queue_lock, flags); | 1146 | spin_unlock_irqrestore(q->queue_lock, flags); |
| 1147 | } | ||
| 1148 | |||
| 1149 | static void dm_requeue_unmapped_original_request(struct mapped_device *md, | ||
| 1150 | struct request *rq) | ||
| 1151 | { | ||
| 1152 | int rw = rq_data_dir(rq); | ||
| 1153 | |||
| 1154 | dm_unprep_request(rq); | ||
| 1155 | |||
| 1156 | if (!rq->q->mq_ops) | ||
| 1157 | old_requeue_request(rq); | ||
| 1158 | else { | ||
| 1159 | blk_mq_requeue_request(rq); | ||
| 1160 | blk_mq_kick_requeue_list(rq->q); | ||
| 1161 | } | ||
| 1131 | 1162 | ||
| 1132 | rq_completed(md, rw, false); | 1163 | rq_completed(md, rw, false); |
| 1133 | } | 1164 | } |
| @@ -1139,35 +1170,44 @@ static void dm_requeue_unmapped_request(struct request *clone) | |||
| 1139 | dm_requeue_unmapped_original_request(tio->md, tio->orig); | 1170 | dm_requeue_unmapped_original_request(tio->md, tio->orig); |
| 1140 | } | 1171 | } |
| 1141 | 1172 | ||
| 1142 | static void __stop_queue(struct request_queue *q) | 1173 | static void old_stop_queue(struct request_queue *q) |
| 1143 | { | ||
| 1144 | blk_stop_queue(q); | ||
| 1145 | } | ||
| 1146 | |||
| 1147 | static void stop_queue(struct request_queue *q) | ||
| 1148 | { | 1174 | { |
| 1149 | unsigned long flags; | 1175 | unsigned long flags; |
| 1150 | 1176 | ||
| 1177 | if (blk_queue_stopped(q)) | ||
| 1178 | return; | ||
| 1179 | |||
| 1151 | spin_lock_irqsave(q->queue_lock, flags); | 1180 | spin_lock_irqsave(q->queue_lock, flags); |
| 1152 | __stop_queue(q); | 1181 | blk_stop_queue(q); |
| 1153 | spin_unlock_irqrestore(q->queue_lock, flags); | 1182 | spin_unlock_irqrestore(q->queue_lock, flags); |
| 1154 | } | 1183 | } |
| 1155 | 1184 | ||
| 1156 | static void __start_queue(struct request_queue *q) | 1185 | static void stop_queue(struct request_queue *q) |
| 1157 | { | 1186 | { |
| 1158 | if (blk_queue_stopped(q)) | 1187 | if (!q->mq_ops) |
| 1159 | blk_start_queue(q); | 1188 | old_stop_queue(q); |
| 1189 | else | ||
| 1190 | blk_mq_stop_hw_queues(q); | ||
| 1160 | } | 1191 | } |
| 1161 | 1192 | ||
| 1162 | static void start_queue(struct request_queue *q) | 1193 | static void old_start_queue(struct request_queue *q) |
| 1163 | { | 1194 | { |
| 1164 | unsigned long flags; | 1195 | unsigned long flags; |
| 1165 | 1196 | ||
| 1166 | spin_lock_irqsave(q->queue_lock, flags); | 1197 | spin_lock_irqsave(q->queue_lock, flags); |
| 1167 | __start_queue(q); | 1198 | if (blk_queue_stopped(q)) |
| 1199 | blk_start_queue(q); | ||
| 1168 | spin_unlock_irqrestore(q->queue_lock, flags); | 1200 | spin_unlock_irqrestore(q->queue_lock, flags); |
| 1169 | } | 1201 | } |
| 1170 | 1202 | ||
| 1203 | static void start_queue(struct request_queue *q) | ||
| 1204 | { | ||
| 1205 | if (!q->mq_ops) | ||
| 1206 | old_start_queue(q); | ||
| 1207 | else | ||
| 1208 | blk_mq_start_stopped_hw_queues(q, true); | ||
| 1209 | } | ||
| 1210 | |||
| 1171 | static void dm_done(struct request *clone, int error, bool mapped) | 1211 | static void dm_done(struct request *clone, int error, bool mapped) |
| 1172 | { | 1212 | { |
| 1173 | int r = error; | 1213 | int r = error; |
| @@ -1206,13 +1246,20 @@ static void dm_done(struct request *clone, int error, bool mapped) | |||
| 1206 | static void dm_softirq_done(struct request *rq) | 1246 | static void dm_softirq_done(struct request *rq) |
| 1207 | { | 1247 | { |
| 1208 | bool mapped = true; | 1248 | bool mapped = true; |
| 1209 | struct dm_rq_target_io *tio = rq->special; | 1249 | struct dm_rq_target_io *tio = tio_from_request(rq); |
| 1210 | struct request *clone = tio->clone; | 1250 | struct request *clone = tio->clone; |
| 1251 | int rw; | ||
| 1211 | 1252 | ||
| 1212 | if (!clone) { | 1253 | if (!clone) { |
| 1213 | blk_end_request_all(rq, tio->error); | 1254 | rw = rq_data_dir(rq); |
| 1214 | rq_completed(tio->md, rq_data_dir(rq), false); | 1255 | if (!rq->q->mq_ops) { |
| 1215 | free_rq_tio(tio); | 1256 | blk_end_request_all(rq, tio->error); |
| 1257 | rq_completed(tio->md, rw, false); | ||
| 1258 | free_rq_tio(tio); | ||
| 1259 | } else { | ||
| 1260 | blk_mq_end_request(rq, tio->error); | ||
| 1261 | rq_completed(tio->md, rw, false); | ||
| 1262 | } | ||
| 1216 | return; | 1263 | return; |
| 1217 | } | 1264 | } |
| 1218 | 1265 | ||
| @@ -1228,7 +1275,7 @@ static void dm_softirq_done(struct request *rq) | |||
| 1228 | */ | 1275 | */ |
| 1229 | static void dm_complete_request(struct request *rq, int error) | 1276 | static void dm_complete_request(struct request *rq, int error) |
| 1230 | { | 1277 | { |
| 1231 | struct dm_rq_target_io *tio = rq->special; | 1278 | struct dm_rq_target_io *tio = tio_from_request(rq); |
| 1232 | 1279 | ||
| 1233 | tio->error = error; | 1280 | tio->error = error; |
| 1234 | blk_complete_request(rq); | 1281 | blk_complete_request(rq); |
| @@ -1247,7 +1294,7 @@ static void dm_kill_unmapped_request(struct request *rq, int error) | |||
| 1247 | } | 1294 | } |
| 1248 | 1295 | ||
| 1249 | /* | 1296 | /* |
| 1250 | * Called with the clone's queue lock held | 1297 | * Called with the clone's queue lock held (for non-blk-mq) |
| 1251 | */ | 1298 | */ |
| 1252 | static void end_clone_request(struct request *clone, int error) | 1299 | static void end_clone_request(struct request *clone, int error) |
| 1253 | { | 1300 | { |
| @@ -1808,6 +1855,18 @@ static struct request *clone_rq(struct request *rq, struct mapped_device *md, | |||
| 1808 | 1855 | ||
| 1809 | static void map_tio_request(struct kthread_work *work); | 1856 | static void map_tio_request(struct kthread_work *work); |
| 1810 | 1857 | ||
| 1858 | static void init_tio(struct dm_rq_target_io *tio, struct request *rq, | ||
| 1859 | struct mapped_device *md) | ||
| 1860 | { | ||
| 1861 | tio->md = md; | ||
| 1862 | tio->ti = NULL; | ||
| 1863 | tio->clone = NULL; | ||
| 1864 | tio->orig = rq; | ||
| 1865 | tio->error = 0; | ||
| 1866 | memset(&tio->info, 0, sizeof(tio->info)); | ||
| 1867 | init_kthread_work(&tio->work, map_tio_request); | ||
| 1868 | } | ||
| 1869 | |||
| 1811 | static struct dm_rq_target_io *prep_tio(struct request *rq, | 1870 | static struct dm_rq_target_io *prep_tio(struct request *rq, |
| 1812 | struct mapped_device *md, gfp_t gfp_mask) | 1871 | struct mapped_device *md, gfp_t gfp_mask) |
| 1813 | { | 1872 | { |
| @@ -1819,13 +1878,7 @@ static struct dm_rq_target_io *prep_tio(struct request *rq, | |||
| 1819 | if (!tio) | 1878 | if (!tio) |
| 1820 | return NULL; | 1879 | return NULL; |
| 1821 | 1880 | ||
| 1822 | tio->md = md; | 1881 | init_tio(tio, rq, md); |
| 1823 | tio->ti = NULL; | ||
| 1824 | tio->clone = NULL; | ||
| 1825 | tio->orig = rq; | ||
| 1826 | tio->error = 0; | ||
| 1827 | memset(&tio->info, 0, sizeof(tio->info)); | ||
| 1828 | init_kthread_work(&tio->work, map_tio_request); | ||
| 1829 | 1882 | ||
| 1830 | table = dm_get_live_table(md, &srcu_idx); | 1883 | table = dm_get_live_table(md, &srcu_idx); |
| 1831 | if (!dm_table_mq_request_based(table)) { | 1884 | if (!dm_table_mq_request_based(table)) { |
| @@ -1869,11 +1922,11 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq) | |||
| 1869 | * DM_MAPIO_REQUEUE : the original request needs to be requeued | 1922 | * DM_MAPIO_REQUEUE : the original request needs to be requeued |
| 1870 | * < 0 : the request was completed due to failure | 1923 | * < 0 : the request was completed due to failure |
| 1871 | */ | 1924 | */ |
| 1872 | static int map_request(struct dm_target *ti, struct request *rq, | 1925 | static int map_request(struct dm_rq_target_io *tio, struct request *rq, |
| 1873 | struct mapped_device *md) | 1926 | struct mapped_device *md) |
| 1874 | { | 1927 | { |
| 1875 | int r; | 1928 | int r; |
| 1876 | struct dm_rq_target_io *tio = rq->special; | 1929 | struct dm_target *ti = tio->ti; |
| 1877 | struct request *clone = NULL; | 1930 | struct request *clone = NULL; |
| 1878 | 1931 | ||
| 1879 | if (tio->clone) { | 1932 | if (tio->clone) { |
| @@ -1888,7 +1941,7 @@ static int map_request(struct dm_target *ti, struct request *rq, | |||
| 1888 | } | 1941 | } |
| 1889 | if (IS_ERR(clone)) | 1942 | if (IS_ERR(clone)) |
| 1890 | return DM_MAPIO_REQUEUE; | 1943 | return DM_MAPIO_REQUEUE; |
| 1891 | if (setup_clone(clone, rq, tio, GFP_KERNEL)) { | 1944 | if (setup_clone(clone, rq, tio, GFP_NOIO)) { |
| 1892 | /* -ENOMEM */ | 1945 | /* -ENOMEM */ |
| 1893 | ti->type->release_clone_rq(clone); | 1946 | ti->type->release_clone_rq(clone); |
| 1894 | return DM_MAPIO_REQUEUE; | 1947 | return DM_MAPIO_REQUEUE; |
| @@ -1929,13 +1982,16 @@ static void map_tio_request(struct kthread_work *work) | |||
| 1929 | struct request *rq = tio->orig; | 1982 | struct request *rq = tio->orig; |
| 1930 | struct mapped_device *md = tio->md; | 1983 | struct mapped_device *md = tio->md; |
| 1931 | 1984 | ||
| 1932 | if (map_request(tio->ti, rq, md) == DM_MAPIO_REQUEUE) | 1985 | if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) |
| 1933 | dm_requeue_unmapped_original_request(md, rq); | 1986 | dm_requeue_unmapped_original_request(md, rq); |
| 1934 | } | 1987 | } |
| 1935 | 1988 | ||
| 1936 | static void dm_start_request(struct mapped_device *md, struct request *orig) | 1989 | static void dm_start_request(struct mapped_device *md, struct request *orig) |
| 1937 | { | 1990 | { |
| 1938 | blk_start_request(orig); | 1991 | if (!orig->q->mq_ops) |
| 1992 | blk_start_request(orig); | ||
| 1993 | else | ||
| 1994 | blk_mq_start_request(orig); | ||
| 1939 | atomic_inc(&md->pending[rq_data_dir(orig)]); | 1995 | atomic_inc(&md->pending[rq_data_dir(orig)]); |
| 1940 | 1996 | ||
| 1941 | if (md->seq_rq_merge_deadline_usecs) { | 1997 | if (md->seq_rq_merge_deadline_usecs) { |
| @@ -2045,7 +2101,7 @@ static void dm_request_fn(struct request_queue *q) | |||
| 2045 | 2101 | ||
| 2046 | dm_start_request(md, rq); | 2102 | dm_start_request(md, rq); |
| 2047 | 2103 | ||
| 2048 | tio = rq->special; | 2104 | tio = tio_from_request(rq); |
| 2049 | /* Establish tio->ti before queuing work (map_tio_request) */ | 2105 | /* Establish tio->ti before queuing work (map_tio_request) */ |
| 2050 | tio->ti = ti; | 2106 | tio->ti = ti; |
| 2051 | queue_kthread_work(&md->kworker, &tio->work); | 2107 | queue_kthread_work(&md->kworker, &tio->work); |
| @@ -2142,7 +2198,7 @@ static void dm_init_md_queue(struct mapped_device *md) | |||
| 2142 | { | 2198 | { |
| 2143 | /* | 2199 | /* |
| 2144 | * Request-based dm devices cannot be stacked on top of bio-based dm | 2200 | * Request-based dm devices cannot be stacked on top of bio-based dm |
| 2145 | * devices. The type of this dm device has not been decided yet. | 2201 | * devices. The type of this dm device may not have been decided yet. |
| 2146 | * The type is decided at the first table loading time. | 2202 | * The type is decided at the first table loading time. |
| 2147 | * To prevent problematic device stacking, clear the queue flag | 2203 | * To prevent problematic device stacking, clear the queue flag |
| 2148 | * for request stacking support until then. | 2204 | * for request stacking support until then. |
| @@ -2150,7 +2206,15 @@ static void dm_init_md_queue(struct mapped_device *md) | |||
| 2150 | * This queue is new, so no concurrency on the queue_flags. | 2206 | * This queue is new, so no concurrency on the queue_flags. |
| 2151 | */ | 2207 | */ |
| 2152 | queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue); | 2208 | queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue); |
| 2209 | } | ||
| 2210 | |||
| 2211 | static void dm_init_old_md_queue(struct mapped_device *md) | ||
| 2212 | { | ||
| 2213 | dm_init_md_queue(md); | ||
| 2153 | 2214 | ||
| 2215 | /* | ||
| 2216 | * Initialize aspects of queue that aren't relevant for blk-mq | ||
| 2217 | */ | ||
| 2154 | md->queue->queuedata = md; | 2218 | md->queue->queuedata = md; |
| 2155 | md->queue->backing_dev_info.congested_fn = dm_any_congested; | 2219 | md->queue->backing_dev_info.congested_fn = dm_any_congested; |
| 2156 | md->queue->backing_dev_info.congested_data = md; | 2220 | md->queue->backing_dev_info.congested_data = md; |
| @@ -2273,6 +2337,7 @@ static void unlock_fs(struct mapped_device *md); | |||
| 2273 | static void free_dev(struct mapped_device *md) | 2337 | static void free_dev(struct mapped_device *md) |
| 2274 | { | 2338 | { |
| 2275 | int minor = MINOR(disk_devt(md->disk)); | 2339 | int minor = MINOR(disk_devt(md->disk)); |
| 2340 | bool using_blk_mq = !!md->queue->mq_ops; | ||
| 2276 | 2341 | ||
| 2277 | unlock_fs(md); | 2342 | unlock_fs(md); |
| 2278 | destroy_workqueue(md->wq); | 2343 | destroy_workqueue(md->wq); |
| @@ -2298,6 +2363,8 @@ static void free_dev(struct mapped_device *md) | |||
| 2298 | del_gendisk(md->disk); | 2363 | del_gendisk(md->disk); |
| 2299 | put_disk(md->disk); | 2364 | put_disk(md->disk); |
| 2300 | blk_cleanup_queue(md->queue); | 2365 | blk_cleanup_queue(md->queue); |
| 2366 | if (using_blk_mq) | ||
| 2367 | blk_mq_free_tag_set(&md->tag_set); | ||
| 2301 | bdput(md->bdev); | 2368 | bdput(md->bdev); |
| 2302 | free_minor(minor); | 2369 | free_minor(minor); |
| 2303 | 2370 | ||
| @@ -2457,7 +2524,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, | |||
| 2457 | * This must be done before setting the queue restrictions, | 2524 | * This must be done before setting the queue restrictions, |
| 2458 | * because request-based dm may be run just after the setting. | 2525 | * because request-based dm may be run just after the setting. |
| 2459 | */ | 2526 | */ |
| 2460 | if (dm_table_request_based(t) && !blk_queue_stopped(q)) | 2527 | if (dm_table_request_based(t)) |
| 2461 | stop_queue(q); | 2528 | stop_queue(q); |
| 2462 | 2529 | ||
| 2463 | __bind_mempools(md, t); | 2530 | __bind_mempools(md, t); |
| @@ -2539,14 +2606,6 @@ unsigned dm_get_md_type(struct mapped_device *md) | |||
| 2539 | return md->type; | 2606 | return md->type; |
| 2540 | } | 2607 | } |
| 2541 | 2608 | ||
| 2542 | static bool dm_md_type_request_based(struct mapped_device *md) | ||
| 2543 | { | ||
| 2544 | unsigned table_type = dm_get_md_type(md); | ||
| 2545 | |||
| 2546 | return (table_type == DM_TYPE_REQUEST_BASED || | ||
| 2547 | table_type == DM_TYPE_MQ_REQUEST_BASED); | ||
| 2548 | } | ||
| 2549 | |||
| 2550 | struct target_type *dm_get_immutable_target_type(struct mapped_device *md) | 2609 | struct target_type *dm_get_immutable_target_type(struct mapped_device *md) |
| 2551 | { | 2610 | { |
| 2552 | return md->immutable_target_type; | 2611 | return md->immutable_target_type; |
| @@ -2563,6 +2622,14 @@ struct queue_limits *dm_get_queue_limits(struct mapped_device *md) | |||
| 2563 | } | 2622 | } |
| 2564 | EXPORT_SYMBOL_GPL(dm_get_queue_limits); | 2623 | EXPORT_SYMBOL_GPL(dm_get_queue_limits); |
| 2565 | 2624 | ||
| 2625 | static void init_rq_based_worker_thread(struct mapped_device *md) | ||
| 2626 | { | ||
| 2627 | /* Initialize the request-based DM worker thread */ | ||
| 2628 | init_kthread_worker(&md->kworker); | ||
| 2629 | md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker, | ||
| 2630 | "kdmwork-%s", dm_device_name(md)); | ||
| 2631 | } | ||
| 2632 | |||
| 2566 | /* | 2633 | /* |
| 2567 | * Fully initialize a request-based queue (->elevator, ->request_fn, etc). | 2634 | * Fully initialize a request-based queue (->elevator, ->request_fn, etc). |
| 2568 | */ | 2635 | */ |
| @@ -2571,29 +2638,131 @@ static int dm_init_request_based_queue(struct mapped_device *md) | |||
| 2571 | struct request_queue *q = NULL; | 2638 | struct request_queue *q = NULL; |
| 2572 | 2639 | ||
| 2573 | if (md->queue->elevator) | 2640 | if (md->queue->elevator) |
| 2574 | return 1; | 2641 | return 0; |
| 2575 | 2642 | ||
| 2576 | /* Fully initialize the queue */ | 2643 | /* Fully initialize the queue */ |
| 2577 | q = blk_init_allocated_queue(md->queue, dm_request_fn, NULL); | 2644 | q = blk_init_allocated_queue(md->queue, dm_request_fn, NULL); |
| 2578 | if (!q) | 2645 | if (!q) |
| 2579 | return 0; | 2646 | return -EINVAL; |
| 2580 | 2647 | ||
| 2581 | /* disable dm_request_fn's merge heuristic by default */ | 2648 | /* disable dm_request_fn's merge heuristic by default */ |
| 2582 | md->seq_rq_merge_deadline_usecs = 0; | 2649 | md->seq_rq_merge_deadline_usecs = 0; |
| 2583 | 2650 | ||
| 2584 | md->queue = q; | 2651 | md->queue = q; |
| 2585 | dm_init_md_queue(md); | 2652 | dm_init_old_md_queue(md); |
| 2586 | blk_queue_softirq_done(md->queue, dm_softirq_done); | 2653 | blk_queue_softirq_done(md->queue, dm_softirq_done); |
| 2587 | blk_queue_prep_rq(md->queue, dm_prep_fn); | 2654 | blk_queue_prep_rq(md->queue, dm_prep_fn); |
| 2588 | 2655 | ||
| 2589 | /* Also initialize the request-based DM worker thread */ | 2656 | init_rq_based_worker_thread(md); |
| 2590 | init_kthread_worker(&md->kworker); | ||
| 2591 | md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker, | ||
| 2592 | "kdmwork-%s", dm_device_name(md)); | ||
| 2593 | 2657 | ||
| 2594 | elv_register_queue(md->queue); | 2658 | elv_register_queue(md->queue); |
| 2595 | 2659 | ||
| 2596 | return 1; | 2660 | return 0; |
| 2661 | } | ||
| 2662 | |||
| 2663 | static int dm_mq_init_request(void *data, struct request *rq, | ||
| 2664 | unsigned int hctx_idx, unsigned int request_idx, | ||
| 2665 | unsigned int numa_node) | ||
| 2666 | { | ||
| 2667 | struct mapped_device *md = data; | ||
| 2668 | struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq); | ||
| 2669 | |||
| 2670 | /* | ||
| 2671 | * Must initialize md member of tio, otherwise it won't | ||
| 2672 | * be available in dm_mq_queue_rq. | ||
| 2673 | */ | ||
| 2674 | tio->md = md; | ||
| 2675 | |||
| 2676 | return 0; | ||
| 2677 | } | ||
| 2678 | |||
| 2679 | static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, | ||
| 2680 | const struct blk_mq_queue_data *bd) | ||
| 2681 | { | ||
| 2682 | struct request *rq = bd->rq; | ||
| 2683 | struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq); | ||
| 2684 | struct mapped_device *md = tio->md; | ||
| 2685 | int srcu_idx; | ||
| 2686 | struct dm_table *map = dm_get_live_table(md, &srcu_idx); | ||
| 2687 | struct dm_target *ti; | ||
| 2688 | sector_t pos; | ||
| 2689 | |||
| 2690 | /* always use block 0 to find the target for flushes for now */ | ||
| 2691 | pos = 0; | ||
| 2692 | if (!(rq->cmd_flags & REQ_FLUSH)) | ||
| 2693 | pos = blk_rq_pos(rq); | ||
| 2694 | |||
| 2695 | ti = dm_table_find_target(map, pos); | ||
| 2696 | if (!dm_target_is_valid(ti)) { | ||
| 2697 | dm_put_live_table(md, srcu_idx); | ||
| 2698 | DMERR_LIMIT("request attempted access beyond the end of device"); | ||
| 2699 | /* | ||
| 2700 | * Must perform setup, that rq_completed() requires, | ||
| 2701 | * before returning BLK_MQ_RQ_QUEUE_ERROR | ||
| 2702 | */ | ||
| 2703 | dm_start_request(md, rq); | ||
| 2704 | return BLK_MQ_RQ_QUEUE_ERROR; | ||
| 2705 | } | ||
| 2706 | dm_put_live_table(md, srcu_idx); | ||
| 2707 | |||
| 2708 | if (ti->type->busy && ti->type->busy(ti)) | ||
| 2709 | return BLK_MQ_RQ_QUEUE_BUSY; | ||
| 2710 | |||
| 2711 | dm_start_request(md, rq); | ||
| 2712 | |||
| 2713 | /* Init tio using md established in .init_request */ | ||
| 2714 | init_tio(tio, rq, md); | ||
| 2715 | |||
| 2716 | /* Establish tio->ti before queuing work (map_tio_request) */ | ||
| 2717 | tio->ti = ti; | ||
| 2718 | queue_kthread_work(&md->kworker, &tio->work); | ||
| 2719 | |||
| 2720 | return BLK_MQ_RQ_QUEUE_OK; | ||
| 2721 | } | ||
| 2722 | |||
| 2723 | static struct blk_mq_ops dm_mq_ops = { | ||
| 2724 | .queue_rq = dm_mq_queue_rq, | ||
| 2725 | .map_queue = blk_mq_map_queue, | ||
| 2726 | .complete = dm_softirq_done, | ||
| 2727 | .init_request = dm_mq_init_request, | ||
| 2728 | }; | ||
| 2729 | |||
| 2730 | static int dm_init_request_based_blk_mq_queue(struct mapped_device *md) | ||
| 2731 | { | ||
| 2732 | struct request_queue *q; | ||
| 2733 | int err; | ||
| 2734 | |||
| 2735 | memset(&md->tag_set, 0, sizeof(md->tag_set)); | ||
| 2736 | md->tag_set.ops = &dm_mq_ops; | ||
| 2737 | md->tag_set.queue_depth = BLKDEV_MAX_RQ; | ||
| 2738 | md->tag_set.numa_node = NUMA_NO_NODE; | ||
| 2739 | md->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; | ||
| 2740 | md->tag_set.nr_hw_queues = 1; | ||
| 2741 | md->tag_set.cmd_size = sizeof(struct dm_rq_target_io); | ||
| 2742 | md->tag_set.driver_data = md; | ||
| 2743 | |||
| 2744 | err = blk_mq_alloc_tag_set(&md->tag_set); | ||
| 2745 | if (err) | ||
| 2746 | return err; | ||
| 2747 | |||
| 2748 | q = blk_mq_init_allocated_queue(&md->tag_set, md->queue); | ||
| 2749 | if (IS_ERR(q)) { | ||
| 2750 | err = PTR_ERR(q); | ||
| 2751 | goto out_tag_set; | ||
| 2752 | } | ||
| 2753 | md->queue = q; | ||
| 2754 | dm_init_md_queue(md); | ||
| 2755 | |||
| 2756 | /* backfill 'mq' sysfs registration normally done in blk_register_queue */ | ||
| 2757 | blk_mq_register_disk(md->disk); | ||
| 2758 | |||
| 2759 | init_rq_based_worker_thread(md); | ||
| 2760 | |||
| 2761 | return 0; | ||
| 2762 | |||
| 2763 | out_tag_set: | ||
| 2764 | blk_mq_free_tag_set(&md->tag_set); | ||
| 2765 | return err; | ||
| 2597 | } | 2766 | } |
| 2598 | 2767 | ||
| 2599 | /* | 2768 | /* |
| @@ -2601,15 +2770,29 @@ static int dm_init_request_based_queue(struct mapped_device *md) | |||
| 2601 | */ | 2770 | */ |
| 2602 | int dm_setup_md_queue(struct mapped_device *md) | 2771 | int dm_setup_md_queue(struct mapped_device *md) |
| 2603 | { | 2772 | { |
| 2604 | if (dm_md_type_request_based(md)) { | 2773 | int r; |
| 2605 | if (!dm_init_request_based_queue(md)) { | 2774 | unsigned md_type = dm_get_md_type(md); |
| 2775 | |||
| 2776 | switch (md_type) { | ||
| 2777 | case DM_TYPE_REQUEST_BASED: | ||
| 2778 | r = dm_init_request_based_queue(md); | ||
| 2779 | if (r) { | ||
| 2606 | DMWARN("Cannot initialize queue for request-based mapped device"); | 2780 | DMWARN("Cannot initialize queue for request-based mapped device"); |
| 2607 | return -EINVAL; | 2781 | return r; |
| 2608 | } | 2782 | } |
| 2609 | } else { | 2783 | break; |
| 2610 | /* bio-based specific initialization */ | 2784 | case DM_TYPE_MQ_REQUEST_BASED: |
| 2785 | r = dm_init_request_based_blk_mq_queue(md); | ||
| 2786 | if (r) { | ||
| 2787 | DMWARN("Cannot initialize queue for request-based blk-mq mapped device"); | ||
| 2788 | return r; | ||
| 2789 | } | ||
| 2790 | break; | ||
| 2791 | case DM_TYPE_BIO_BASED: | ||
| 2792 | dm_init_old_md_queue(md); | ||
| 2611 | blk_queue_make_request(md->queue, dm_make_request); | 2793 | blk_queue_make_request(md->queue, dm_make_request); |
| 2612 | blk_queue_merge_bvec(md->queue, dm_merge_bvec); | 2794 | blk_queue_merge_bvec(md->queue, dm_merge_bvec); |
| 2795 | break; | ||
| 2613 | } | 2796 | } |
| 2614 | 2797 | ||
| 2615 | return 0; | 2798 | return 0; |
diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index 889f3a5b7b18..eac8c3641f39 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h | |||
| @@ -267,9 +267,9 @@ enum { | |||
| 267 | #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) | 267 | #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) |
| 268 | 268 | ||
| 269 | #define DM_VERSION_MAJOR 4 | 269 | #define DM_VERSION_MAJOR 4 |
| 270 | #define DM_VERSION_MINOR 30 | 270 | #define DM_VERSION_MINOR 31 |
| 271 | #define DM_VERSION_PATCHLEVEL 0 | 271 | #define DM_VERSION_PATCHLEVEL 0 |
| 272 | #define DM_VERSION_EXTRA "-ioctl (2014-12-22)" | 272 | #define DM_VERSION_EXTRA "-ioctl (2015-3-12)" |
| 273 | 273 | ||
| 274 | /* Status bits */ | 274 | /* Status bits */ |
| 275 | #define DM_READONLY_FLAG (1 << 0) /* In/Out */ | 275 | #define DM_READONLY_FLAG (1 << 0) /* In/Out */ |
