aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMike Snitzer <snitzer@redhat.com>2016-05-12 16:28:10 -0400
committerMike Snitzer <snitzer@redhat.com>2016-06-10 15:15:44 -0400
commit4cc96131afce3eaae7c13dff41c6ba771cf10e96 (patch)
tree1015e8bd091d2c108fb3d100cfd275c25c89afb3
parent1a89694f7899d39aa58cc6f061e97a17089ac025 (diff)
dm: move request-based code out to dm-rq.[hc]
Add some seperation between bio-based and request-based DM core code. 'struct mapped_device' and other DM core only structures and functions have been moved to dm-core.h and all relevant DM core .c files have been updated to include dm-core.h rather than dm.h DM targets should _never_ include dm-core.h! [block core merge conflict resolution from Stephen Rothwell] Signed-off-by: Mike Snitzer <snitzer@redhat.com> Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
-rw-r--r--drivers/md/Makefile3
-rw-r--r--drivers/md/dm-builtin.c2
-rw-r--r--drivers/md/dm-core.h149
-rw-r--r--drivers/md/dm-io.c2
-rw-r--r--drivers/md/dm-ioctl.c2
-rw-r--r--drivers/md/dm-kcopyd.c2
-rw-r--r--drivers/md/dm-mpath.c4
-rw-r--r--drivers/md/dm-rq.c959
-rw-r--r--drivers/md/dm-rq.h64
-rw-r--r--drivers/md/dm-stats.c2
-rw-r--r--drivers/md/dm-sysfs.c3
-rw-r--r--drivers/md/dm-table.c2
-rw-r--r--drivers/md/dm-target.c2
-rw-r--r--drivers/md/dm.c1110
-rw-r--r--drivers/md/dm.h25
15 files changed, 1200 insertions, 1131 deletions
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 52ba8dd82821..3cbda1af87a0 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -3,7 +3,8 @@
3# 3#
4 4
5dm-mod-y += dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ 5dm-mod-y += dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
6 dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o dm-stats.o 6 dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o dm-stats.o \
7 dm-rq.o
7dm-multipath-y += dm-path-selector.o dm-mpath.o 8dm-multipath-y += dm-path-selector.o dm-mpath.o
8dm-snapshot-y += dm-snap.o dm-exception-store.o dm-snap-transient.o \ 9dm-snapshot-y += dm-snap.o dm-exception-store.o dm-snap-transient.o \
9 dm-snap-persistent.o 10 dm-snap-persistent.o
diff --git a/drivers/md/dm-builtin.c b/drivers/md/dm-builtin.c
index 6c9049c51b2b..f092771878c2 100644
--- a/drivers/md/dm-builtin.c
+++ b/drivers/md/dm-builtin.c
@@ -1,4 +1,4 @@
1#include "dm.h" 1#include "dm-core.h"
2 2
3/* 3/*
4 * The kobject release method must not be placed in the module itself, 4 * The kobject release method must not be placed in the module itself,
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
new file mode 100644
index 000000000000..40ceba1fe8be
--- /dev/null
+++ b/drivers/md/dm-core.h
@@ -0,0 +1,149 @@
1/*
2 * Internal header file _only_ for device mapper core
3 *
4 * Copyright (C) 2016 Red Hat, Inc. All rights reserved.
5 *
6 * This file is released under the LGPL.
7 */
8
9#ifndef DM_CORE_INTERNAL_H
10#define DM_CORE_INTERNAL_H
11
12#include <linux/kthread.h>
13#include <linux/ktime.h>
14#include <linux/blk-mq.h>
15
16#include <trace/events/block.h>
17
18#include "dm.h"
19
20#define DM_RESERVED_MAX_IOS 1024
21
22struct dm_kobject_holder {
23 struct kobject kobj;
24 struct completion completion;
25};
26
27/*
28 * DM core internal structure that used directly by dm.c and dm-rq.c
29 * DM targets must _not_ deference a mapped_device to directly access its members!
30 */
31struct mapped_device {
32 struct srcu_struct io_barrier;
33 struct mutex suspend_lock;
34
35 /*
36 * The current mapping (struct dm_table *).
37 * Use dm_get_live_table{_fast} or take suspend_lock for
38 * dereference.
39 */
40 void __rcu *map;
41
42 struct list_head table_devices;
43 struct mutex table_devices_lock;
44
45 unsigned long flags;
46
47 struct request_queue *queue;
48 int numa_node_id;
49
50 unsigned type;
51 /* Protect queue and type against concurrent access. */
52 struct mutex type_lock;
53
54 atomic_t holders;
55 atomic_t open_count;
56
57 struct dm_target *immutable_target;
58 struct target_type *immutable_target_type;
59
60 struct gendisk *disk;
61 char name[16];
62
63 void *interface_ptr;
64
65 /*
66 * A list of ios that arrived while we were suspended.
67 */
68 atomic_t pending[2];
69 wait_queue_head_t wait;
70 struct work_struct work;
71 spinlock_t deferred_lock;
72 struct bio_list deferred;
73
74 /*
75 * Event handling.
76 */
77 wait_queue_head_t eventq;
78 atomic_t event_nr;
79 atomic_t uevent_seq;
80 struct list_head uevent_list;
81 spinlock_t uevent_lock; /* Protect access to uevent_list */
82
83 /* the number of internal suspends */
84 unsigned internal_suspend_count;
85
86 /*
87 * Processing queue (flush)
88 */
89 struct workqueue_struct *wq;
90
91 /*
92 * io objects are allocated from here.
93 */
94 mempool_t *io_pool;
95 mempool_t *rq_pool;
96
97 struct bio_set *bs;
98
99 /*
100 * freeze/thaw support require holding onto a super block
101 */
102 struct super_block *frozen_sb;
103
104 /* forced geometry settings */
105 struct hd_geometry geometry;
106
107 struct block_device *bdev;
108
109 /* kobject and completion */
110 struct dm_kobject_holder kobj_holder;
111
112 /* zero-length flush that will be cloned and submitted to targets */
113 struct bio flush_bio;
114
115 struct dm_stats stats;
116
117 struct kthread_worker kworker;
118 struct task_struct *kworker_task;
119
120 /* for request-based merge heuristic in dm_request_fn() */
121 unsigned seq_rq_merge_deadline_usecs;
122 int last_rq_rw;
123 sector_t last_rq_pos;
124 ktime_t last_rq_start_time;
125
126 /* for blk-mq request-based DM support */
127 struct blk_mq_tag_set *tag_set;
128 bool use_blk_mq:1;
129 bool init_tio_pdu:1;
130};
131
132void dm_init_md_queue(struct mapped_device *md);
133void dm_init_normal_md_queue(struct mapped_device *md);
134int md_in_flight(struct mapped_device *md);
135void disable_write_same(struct mapped_device *md);
136
137static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj)
138{
139 return &container_of(kobj, struct dm_kobject_holder, kobj)->completion;
140}
141
142unsigned __dm_get_module_param(unsigned *module_param, unsigned def, unsigned max);
143
144static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen)
145{
146 return !maxlen || strlen(result) + 1 >= maxlen;
147}
148
149#endif
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 0e225fd4a8d1..daa03e41654a 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -5,7 +5,7 @@
5 * This file is released under the GPL. 5 * This file is released under the GPL.
6 */ 6 */
7 7
8#include "dm.h" 8#include "dm-core.h"
9 9
10#include <linux/device-mapper.h> 10#include <linux/device-mapper.h>
11 11
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 2c7ca258c4e4..b59e34595ad8 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -5,7 +5,7 @@
5 * This file is released under the GPL. 5 * This file is released under the GPL.
6 */ 6 */
7 7
8#include "dm.h" 8#include "dm-core.h"
9 9
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/vmalloc.h> 11#include <linux/vmalloc.h>
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 9da1d54ac6cb..9e9d04cb7d51 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -26,7 +26,7 @@
26#include <linux/device-mapper.h> 26#include <linux/device-mapper.h>
27#include <linux/dm-kcopyd.h> 27#include <linux/dm-kcopyd.h>
28 28
29#include "dm.h" 29#include "dm-core.h"
30 30
31#define SUB_JOB_SIZE 128 31#define SUB_JOB_SIZE 128
32#define SPLIT_COUNT 8 32#define SPLIT_COUNT 8
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 52baf8a5b0f4..e1c07d1ec80b 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -7,7 +7,7 @@
7 7
8#include <linux/device-mapper.h> 8#include <linux/device-mapper.h>
9 9
10#include "dm.h" 10#include "dm-rq.h"
11#include "dm-path-selector.h" 11#include "dm-path-selector.h"
12#include "dm-uevent.h" 12#include "dm-uevent.h"
13 13
@@ -1328,7 +1328,7 @@ static int do_end_io(struct multipath *m, struct request *clone,
1328 * during end I/O handling, since those clone requests don't have 1328 * during end I/O handling, since those clone requests don't have
1329 * bio clones. If we queue them inside the multipath target, 1329 * bio clones. If we queue them inside the multipath target,
1330 * we need to make bio clones, that requires memory allocation. 1330 * we need to make bio clones, that requires memory allocation.
1331 * (See drivers/md/dm.c:end_clone_bio() about why the clone requests 1331 * (See drivers/md/dm-rq.c:end_clone_bio() about why the clone requests
1332 * don't have bio clones.) 1332 * don't have bio clones.)
1333 * Instead of queueing the clone request here, we queue the original 1333 * Instead of queueing the clone request here, we queue the original
1334 * request into dm core, which will remake a clone request and 1334 * request into dm core, which will remake a clone request and
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
new file mode 100644
index 000000000000..787c81b16a26
--- /dev/null
+++ b/drivers/md/dm-rq.c
@@ -0,0 +1,959 @@
1/*
2 * Copyright (C) 2016 Red Hat, Inc. All rights reserved.
3 *
4 * This file is released under the GPL.
5 */
6
7#include "dm-core.h"
8#include "dm-rq.h"
9
10#include <linux/elevator.h> /* for rq_end_sector() */
11#include <linux/blk-mq.h>
12
13#define DM_MSG_PREFIX "core-rq"
14
15#define DM_MQ_NR_HW_QUEUES 1
16#define DM_MQ_QUEUE_DEPTH 2048
17static unsigned dm_mq_nr_hw_queues = DM_MQ_NR_HW_QUEUES;
18static unsigned dm_mq_queue_depth = DM_MQ_QUEUE_DEPTH;
19
20/*
21 * Request-based DM's mempools' reserved IOs set by the user.
22 */
23#define RESERVED_REQUEST_BASED_IOS 256
24static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
25
26#ifdef CONFIG_DM_MQ_DEFAULT
27static bool use_blk_mq = true;
28#else
29static bool use_blk_mq = false;
30#endif
31
32bool dm_use_blk_mq_default(void)
33{
34 return use_blk_mq;
35}
36
37bool dm_use_blk_mq(struct mapped_device *md)
38{
39 return md->use_blk_mq;
40}
41EXPORT_SYMBOL_GPL(dm_use_blk_mq);
42
43unsigned dm_get_reserved_rq_based_ios(void)
44{
45 return __dm_get_module_param(&reserved_rq_based_ios,
46 RESERVED_REQUEST_BASED_IOS, DM_RESERVED_MAX_IOS);
47}
48EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios);
49
50static unsigned dm_get_blk_mq_nr_hw_queues(void)
51{
52 return __dm_get_module_param(&dm_mq_nr_hw_queues, 1, 32);
53}
54
55static unsigned dm_get_blk_mq_queue_depth(void)
56{
57 return __dm_get_module_param(&dm_mq_queue_depth,
58 DM_MQ_QUEUE_DEPTH, BLK_MQ_MAX_DEPTH);
59}
60
61int dm_request_based(struct mapped_device *md)
62{
63 return blk_queue_stackable(md->queue);
64}
65
66static void dm_old_start_queue(struct request_queue *q)
67{
68 unsigned long flags;
69
70 spin_lock_irqsave(q->queue_lock, flags);
71 if (blk_queue_stopped(q))
72 blk_start_queue(q);
73 spin_unlock_irqrestore(q->queue_lock, flags);
74}
75
76void dm_start_queue(struct request_queue *q)
77{
78 if (!q->mq_ops)
79 dm_old_start_queue(q);
80 else {
81 blk_mq_start_stopped_hw_queues(q, true);
82 blk_mq_kick_requeue_list(q);
83 }
84}
85
86static void dm_old_stop_queue(struct request_queue *q)
87{
88 unsigned long flags;
89
90 spin_lock_irqsave(q->queue_lock, flags);
91 if (blk_queue_stopped(q)) {
92 spin_unlock_irqrestore(q->queue_lock, flags);
93 return;
94 }
95
96 blk_stop_queue(q);
97 spin_unlock_irqrestore(q->queue_lock, flags);
98}
99
100void dm_stop_queue(struct request_queue *q)
101{
102 if (!q->mq_ops)
103 dm_old_stop_queue(q);
104 else
105 blk_mq_stop_hw_queues(q);
106}
107
108static struct dm_rq_target_io *alloc_old_rq_tio(struct mapped_device *md,
109 gfp_t gfp_mask)
110{
111 return mempool_alloc(md->io_pool, gfp_mask);
112}
113
114static void free_old_rq_tio(struct dm_rq_target_io *tio)
115{
116 mempool_free(tio, tio->md->io_pool);
117}
118
119static struct request *alloc_old_clone_request(struct mapped_device *md,
120 gfp_t gfp_mask)
121{
122 return mempool_alloc(md->rq_pool, gfp_mask);
123}
124
125static void free_old_clone_request(struct mapped_device *md, struct request *rq)
126{
127 mempool_free(rq, md->rq_pool);
128}
129
130/*
131 * Partial completion handling for request-based dm
132 */
133static void end_clone_bio(struct bio *clone)
134{
135 struct dm_rq_clone_bio_info *info =
136 container_of(clone, struct dm_rq_clone_bio_info, clone);
137 struct dm_rq_target_io *tio = info->tio;
138 struct bio *bio = info->orig;
139 unsigned int nr_bytes = info->orig->bi_iter.bi_size;
140 int error = clone->bi_error;
141
142 bio_put(clone);
143
144 if (tio->error)
145 /*
146 * An error has already been detected on the request.
147 * Once error occurred, just let clone->end_io() handle
148 * the remainder.
149 */
150 return;
151 else if (error) {
152 /*
153 * Don't notice the error to the upper layer yet.
154 * The error handling decision is made by the target driver,
155 * when the request is completed.
156 */
157 tio->error = error;
158 return;
159 }
160
161 /*
162 * I/O for the bio successfully completed.
163 * Notice the data completion to the upper layer.
164 */
165
166 /*
167 * bios are processed from the head of the list.
168 * So the completing bio should always be rq->bio.
169 * If it's not, something wrong is happening.
170 */
171 if (tio->orig->bio != bio)
172 DMERR("bio completion is going in the middle of the request");
173
174 /*
175 * Update the original request.
176 * Do not use blk_end_request() here, because it may complete
177 * the original request before the clone, and break the ordering.
178 */
179 blk_update_request(tio->orig, 0, nr_bytes);
180}
181
182static struct dm_rq_target_io *tio_from_request(struct request *rq)
183{
184 return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special);
185}
186
187static void rq_end_stats(struct mapped_device *md, struct request *orig)
188{
189 if (unlikely(dm_stats_used(&md->stats))) {
190 struct dm_rq_target_io *tio = tio_from_request(orig);
191 tio->duration_jiffies = jiffies - tio->duration_jiffies;
192 dm_stats_account_io(&md->stats, rq_data_dir(orig),
193 blk_rq_pos(orig), tio->n_sectors, true,
194 tio->duration_jiffies, &tio->stats_aux);
195 }
196}
197
198/*
199 * Don't touch any member of the md after calling this function because
200 * the md may be freed in dm_put() at the end of this function.
201 * Or do dm_get() before calling this function and dm_put() later.
202 */
203static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
204{
205 atomic_dec(&md->pending[rw]);
206
207 /* nudge anyone waiting on suspend queue */
208 if (!md_in_flight(md))
209 wake_up(&md->wait);
210
211 /*
212 * Run this off this callpath, as drivers could invoke end_io while
213 * inside their request_fn (and holding the queue lock). Calling
214 * back into ->request_fn() could deadlock attempting to grab the
215 * queue lock again.
216 */
217 if (!md->queue->mq_ops && run_queue)
218 blk_run_queue_async(md->queue);
219
220 /*
221 * dm_put() must be at the end of this function. See the comment above
222 */
223 dm_put(md);
224}
225
226static void free_rq_clone(struct request *clone)
227{
228 struct dm_rq_target_io *tio = clone->end_io_data;
229 struct mapped_device *md = tio->md;
230
231 blk_rq_unprep_clone(clone);
232
233 if (md->type == DM_TYPE_MQ_REQUEST_BASED)
234 /* stacked on blk-mq queue(s) */
235 tio->ti->type->release_clone_rq(clone);
236 else if (!md->queue->mq_ops)
237 /* request_fn queue stacked on request_fn queue(s) */
238 free_old_clone_request(md, clone);
239
240 if (!md->queue->mq_ops)
241 free_old_rq_tio(tio);
242}
243
244/*
245 * Complete the clone and the original request.
246 * Must be called without clone's queue lock held,
247 * see end_clone_request() for more details.
248 */
249static void dm_end_request(struct request *clone, int error)
250{
251 int rw = rq_data_dir(clone);
252 struct dm_rq_target_io *tio = clone->end_io_data;
253 struct mapped_device *md = tio->md;
254 struct request *rq = tio->orig;
255
256 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
257 rq->errors = clone->errors;
258 rq->resid_len = clone->resid_len;
259
260 if (rq->sense)
261 /*
262 * We are using the sense buffer of the original
263 * request.
264 * So setting the length of the sense data is enough.
265 */
266 rq->sense_len = clone->sense_len;
267 }
268
269 free_rq_clone(clone);
270 rq_end_stats(md, rq);
271 if (!rq->q->mq_ops)
272 blk_end_request_all(rq, error);
273 else
274 blk_mq_end_request(rq, error);
275 rq_completed(md, rw, true);
276}
277
278static void dm_unprep_request(struct request *rq)
279{
280 struct dm_rq_target_io *tio = tio_from_request(rq);
281 struct request *clone = tio->clone;
282
283 if (!rq->q->mq_ops) {
284 rq->special = NULL;
285 rq->cmd_flags &= ~REQ_DONTPREP;
286 }
287
288 if (clone)
289 free_rq_clone(clone);
290 else if (!tio->md->queue->mq_ops)
291 free_old_rq_tio(tio);
292}
293
294/*
295 * Requeue the original request of a clone.
296 */
297static void dm_old_requeue_request(struct request *rq)
298{
299 struct request_queue *q = rq->q;
300 unsigned long flags;
301
302 spin_lock_irqsave(q->queue_lock, flags);
303 blk_requeue_request(q, rq);
304 blk_run_queue_async(q);
305 spin_unlock_irqrestore(q->queue_lock, flags);
306}
307
308static void dm_mq_requeue_request(struct request *rq)
309{
310 struct request_queue *q = rq->q;
311 unsigned long flags;
312
313 blk_mq_requeue_request(rq);
314 spin_lock_irqsave(q->queue_lock, flags);
315 if (!blk_queue_stopped(q))
316 blk_mq_kick_requeue_list(q);
317 spin_unlock_irqrestore(q->queue_lock, flags);
318}
319
320static void dm_requeue_original_request(struct mapped_device *md,
321 struct request *rq)
322{
323 int rw = rq_data_dir(rq);
324
325 rq_end_stats(md, rq);
326 dm_unprep_request(rq);
327
328 if (!rq->q->mq_ops)
329 dm_old_requeue_request(rq);
330 else
331 dm_mq_requeue_request(rq);
332
333 rq_completed(md, rw, false);
334}
335
336static void dm_done(struct request *clone, int error, bool mapped)
337{
338 int r = error;
339 struct dm_rq_target_io *tio = clone->end_io_data;
340 dm_request_endio_fn rq_end_io = NULL;
341
342 if (tio->ti) {
343 rq_end_io = tio->ti->type->rq_end_io;
344
345 if (mapped && rq_end_io)
346 r = rq_end_io(tio->ti, clone, error, &tio->info);
347 }
348
349 if (unlikely(r == -EREMOTEIO && (req_op(clone) == REQ_OP_WRITE_SAME) &&
350 !clone->q->limits.max_write_same_sectors))
351 disable_write_same(tio->md);
352
353 if (r <= 0)
354 /* The target wants to complete the I/O */
355 dm_end_request(clone, r);
356 else if (r == DM_ENDIO_INCOMPLETE)
357 /* The target will handle the I/O */
358 return;
359 else if (r == DM_ENDIO_REQUEUE)
360 /* The target wants to requeue the I/O */
361 dm_requeue_original_request(tio->md, tio->orig);
362 else {
363 DMWARN("unimplemented target endio return value: %d", r);
364 BUG();
365 }
366}
367
368/*
369 * Request completion handler for request-based dm
370 */
371static void dm_softirq_done(struct request *rq)
372{
373 bool mapped = true;
374 struct dm_rq_target_io *tio = tio_from_request(rq);
375 struct request *clone = tio->clone;
376 int rw;
377
378 if (!clone) {
379 rq_end_stats(tio->md, rq);
380 rw = rq_data_dir(rq);
381 if (!rq->q->mq_ops) {
382 blk_end_request_all(rq, tio->error);
383 rq_completed(tio->md, rw, false);
384 free_old_rq_tio(tio);
385 } else {
386 blk_mq_end_request(rq, tio->error);
387 rq_completed(tio->md, rw, false);
388 }
389 return;
390 }
391
392 if (rq->cmd_flags & REQ_FAILED)
393 mapped = false;
394
395 dm_done(clone, tio->error, mapped);
396}
397
398/*
399 * Complete the clone and the original request with the error status
400 * through softirq context.
401 */
402static void dm_complete_request(struct request *rq, int error)
403{
404 struct dm_rq_target_io *tio = tio_from_request(rq);
405
406 tio->error = error;
407 if (!rq->q->mq_ops)
408 blk_complete_request(rq);
409 else
410 blk_mq_complete_request(rq, error);
411}
412
413/*
414 * Complete the not-mapped clone and the original request with the error status
415 * through softirq context.
416 * Target's rq_end_io() function isn't called.
417 * This may be used when the target's map_rq() or clone_and_map_rq() functions fail.
418 */
419static void dm_kill_unmapped_request(struct request *rq, int error)
420{
421 rq->cmd_flags |= REQ_FAILED;
422 dm_complete_request(rq, error);
423}
424
425/*
426 * Called with the clone's queue lock held (in the case of .request_fn)
427 */
428static void end_clone_request(struct request *clone, int error)
429{
430 struct dm_rq_target_io *tio = clone->end_io_data;
431
432 if (!clone->q->mq_ops) {
433 /*
434 * For just cleaning up the information of the queue in which
435 * the clone was dispatched.
436 * The clone is *NOT* freed actually here because it is alloced
437 * from dm own mempool (REQ_ALLOCED isn't set).
438 */
439 __blk_put_request(clone->q, clone);
440 }
441
442 /*
443 * Actual request completion is done in a softirq context which doesn't
444 * hold the clone's queue lock. Otherwise, deadlock could occur because:
445 * - another request may be submitted by the upper level driver
446 * of the stacking during the completion
447 * - the submission which requires queue lock may be done
448 * against this clone's queue
449 */
450 dm_complete_request(tio->orig, error);
451}
452
453static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
454{
455 int r;
456
457 if (blk_queue_io_stat(clone->q))
458 clone->cmd_flags |= REQ_IO_STAT;
459
460 clone->start_time = jiffies;
461 r = blk_insert_cloned_request(clone->q, clone);
462 if (r)
463 /* must complete clone in terms of original request */
464 dm_complete_request(rq, r);
465}
466
467static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
468 void *data)
469{
470 struct dm_rq_target_io *tio = data;
471 struct dm_rq_clone_bio_info *info =
472 container_of(bio, struct dm_rq_clone_bio_info, clone);
473
474 info->orig = bio_orig;
475 info->tio = tio;
476 bio->bi_end_io = end_clone_bio;
477
478 return 0;
479}
480
481static int setup_clone(struct request *clone, struct request *rq,
482 struct dm_rq_target_io *tio, gfp_t gfp_mask)
483{
484 int r;
485
486 r = blk_rq_prep_clone(clone, rq, tio->md->bs, gfp_mask,
487 dm_rq_bio_constructor, tio);
488 if (r)
489 return r;
490
491 clone->cmd = rq->cmd;
492 clone->cmd_len = rq->cmd_len;
493 clone->sense = rq->sense;
494 clone->end_io = end_clone_request;
495 clone->end_io_data = tio;
496
497 tio->clone = clone;
498
499 return 0;
500}
501
502static struct request *clone_old_rq(struct request *rq, struct mapped_device *md,
503 struct dm_rq_target_io *tio, gfp_t gfp_mask)
504{
505 /*
506 * Create clone for use with .request_fn request_queue
507 */
508 struct request *clone;
509
510 clone = alloc_old_clone_request(md, gfp_mask);
511 if (!clone)
512 return NULL;
513
514 blk_rq_init(NULL, clone);
515 if (setup_clone(clone, rq, tio, gfp_mask)) {
516 /* -ENOMEM */
517 free_old_clone_request(md, clone);
518 return NULL;
519 }
520
521 return clone;
522}
523
524static void map_tio_request(struct kthread_work *work);
525
526static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
527 struct mapped_device *md)
528{
529 tio->md = md;
530 tio->ti = NULL;
531 tio->clone = NULL;
532 tio->orig = rq;
533 tio->error = 0;
534 /*
535 * Avoid initializing info for blk-mq; it passes
536 * target-specific data through info.ptr
537 * (see: dm_mq_init_request)
538 */
539 if (!md->init_tio_pdu)
540 memset(&tio->info, 0, sizeof(tio->info));
541 if (md->kworker_task)
542 init_kthread_work(&tio->work, map_tio_request);
543}
544
545static struct dm_rq_target_io *dm_old_prep_tio(struct request *rq,
546 struct mapped_device *md,
547 gfp_t gfp_mask)
548{
549 struct dm_rq_target_io *tio;
550 int srcu_idx;
551 struct dm_table *table;
552
553 tio = alloc_old_rq_tio(md, gfp_mask);
554 if (!tio)
555 return NULL;
556
557 init_tio(tio, rq, md);
558
559 table = dm_get_live_table(md, &srcu_idx);
560 /*
561 * Must clone a request if this .request_fn DM device
562 * is stacked on .request_fn device(s).
563 */
564 if (!dm_table_mq_request_based(table)) {
565 if (!clone_old_rq(rq, md, tio, gfp_mask)) {
566 dm_put_live_table(md, srcu_idx);
567 free_old_rq_tio(tio);
568 return NULL;
569 }
570 }
571 dm_put_live_table(md, srcu_idx);
572
573 return tio;
574}
575
576/*
577 * Called with the queue lock held.
578 */
579static int dm_old_prep_fn(struct request_queue *q, struct request *rq)
580{
581 struct mapped_device *md = q->queuedata;
582 struct dm_rq_target_io *tio;
583
584 if (unlikely(rq->special)) {
585 DMWARN("Already has something in rq->special.");
586 return BLKPREP_KILL;
587 }
588
589 tio = dm_old_prep_tio(rq, md, GFP_ATOMIC);
590 if (!tio)
591 return BLKPREP_DEFER;
592
593 rq->special = tio;
594 rq->cmd_flags |= REQ_DONTPREP;
595
596 return BLKPREP_OK;
597}
598
599/*
600 * Returns:
601 * 0 : the request has been processed
602 * DM_MAPIO_REQUEUE : the original request needs to be requeued
603 * < 0 : the request was completed due to failure
604 */
605static int map_request(struct dm_rq_target_io *tio, struct request *rq,
606 struct mapped_device *md)
607{
608 int r;
609 struct dm_target *ti = tio->ti;
610 struct request *clone = NULL;
611
612 if (tio->clone) {
613 clone = tio->clone;
614 r = ti->type->map_rq(ti, clone, &tio->info);
615 } else {
616 r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
617 if (r < 0) {
618 /* The target wants to complete the I/O */
619 dm_kill_unmapped_request(rq, r);
620 return r;
621 }
622 if (r != DM_MAPIO_REMAPPED)
623 return r;
624 if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
625 /* -ENOMEM */
626 ti->type->release_clone_rq(clone);
627 return DM_MAPIO_REQUEUE;
628 }
629 }
630
631 switch (r) {
632 case DM_MAPIO_SUBMITTED:
633 /* The target has taken the I/O to submit by itself later */
634 break;
635 case DM_MAPIO_REMAPPED:
636 /* The target has remapped the I/O so dispatch it */
637 trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
638 blk_rq_pos(rq));
639 dm_dispatch_clone_request(clone, rq);
640 break;
641 case DM_MAPIO_REQUEUE:
642 /* The target wants to requeue the I/O */
643 dm_requeue_original_request(md, tio->orig);
644 break;
645 default:
646 if (r > 0) {
647 DMWARN("unimplemented target map return value: %d", r);
648 BUG();
649 }
650
651 /* The target wants to complete the I/O */
652 dm_kill_unmapped_request(rq, r);
653 return r;
654 }
655
656 return 0;
657}
658
659static void dm_start_request(struct mapped_device *md, struct request *orig)
660{
661 if (!orig->q->mq_ops)
662 blk_start_request(orig);
663 else
664 blk_mq_start_request(orig);
665 atomic_inc(&md->pending[rq_data_dir(orig)]);
666
667 if (md->seq_rq_merge_deadline_usecs) {
668 md->last_rq_pos = rq_end_sector(orig);
669 md->last_rq_rw = rq_data_dir(orig);
670 md->last_rq_start_time = ktime_get();
671 }
672
673 if (unlikely(dm_stats_used(&md->stats))) {
674 struct dm_rq_target_io *tio = tio_from_request(orig);
675 tio->duration_jiffies = jiffies;
676 tio->n_sectors = blk_rq_sectors(orig);
677 dm_stats_account_io(&md->stats, rq_data_dir(orig),
678 blk_rq_pos(orig), tio->n_sectors, false, 0,
679 &tio->stats_aux);
680 }
681
682 /*
683 * Hold the md reference here for the in-flight I/O.
684 * We can't rely on the reference count by device opener,
685 * because the device may be closed during the request completion
686 * when all bios are completed.
687 * See the comment in rq_completed() too.
688 */
689 dm_get(md);
690}
691
692static void map_tio_request(struct kthread_work *work)
693{
694 struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work);
695 struct request *rq = tio->orig;
696 struct mapped_device *md = tio->md;
697
698 if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
699 dm_requeue_original_request(md, rq);
700}
701
702ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf)
703{
704 return sprintf(buf, "%u\n", md->seq_rq_merge_deadline_usecs);
705}
706
707#define MAX_SEQ_RQ_MERGE_DEADLINE_USECS 100000
708
709ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
710 const char *buf, size_t count)
711{
712 unsigned deadline;
713
714 if (!dm_request_based(md) || md->use_blk_mq)
715 return count;
716
717 if (kstrtouint(buf, 10, &deadline))
718 return -EINVAL;
719
720 if (deadline > MAX_SEQ_RQ_MERGE_DEADLINE_USECS)
721 deadline = MAX_SEQ_RQ_MERGE_DEADLINE_USECS;
722
723 md->seq_rq_merge_deadline_usecs = deadline;
724
725 return count;
726}
727
728static bool dm_old_request_peeked_before_merge_deadline(struct mapped_device *md)
729{
730 ktime_t kt_deadline;
731
732 if (!md->seq_rq_merge_deadline_usecs)
733 return false;
734
735 kt_deadline = ns_to_ktime((u64)md->seq_rq_merge_deadline_usecs * NSEC_PER_USEC);
736 kt_deadline = ktime_add_safe(md->last_rq_start_time, kt_deadline);
737
738 return !ktime_after(ktime_get(), kt_deadline);
739}
740
741/*
742 * q->request_fn for old request-based dm.
743 * Called with the queue lock held.
744 */
745static void dm_old_request_fn(struct request_queue *q)
746{
747 struct mapped_device *md = q->queuedata;
748 struct dm_target *ti = md->immutable_target;
749 struct request *rq;
750 struct dm_rq_target_io *tio;
751 sector_t pos = 0;
752
753 if (unlikely(!ti)) {
754 int srcu_idx;
755 struct dm_table *map = dm_get_live_table(md, &srcu_idx);
756
757 ti = dm_table_find_target(map, pos);
758 dm_put_live_table(md, srcu_idx);
759 }
760
761 /*
762 * For suspend, check blk_queue_stopped() and increment
763 * ->pending within a single queue_lock not to increment the
764 * number of in-flight I/Os after the queue is stopped in
765 * dm_suspend().
766 */
767 while (!blk_queue_stopped(q)) {
768 rq = blk_peek_request(q);
769 if (!rq)
770 return;
771
772 /* always use block 0 to find the target for flushes for now */
773 pos = 0;
774 if (req_op(rq) != REQ_OP_FLUSH)
775 pos = blk_rq_pos(rq);
776
777 if ((dm_old_request_peeked_before_merge_deadline(md) &&
778 md_in_flight(md) && rq->bio && rq->bio->bi_vcnt == 1 &&
779 md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq)) ||
780 (ti->type->busy && ti->type->busy(ti))) {
781 blk_delay_queue(q, HZ / 100);
782 return;
783 }
784
785 dm_start_request(md, rq);
786
787 tio = tio_from_request(rq);
788 /* Establish tio->ti before queuing work (map_tio_request) */
789 tio->ti = ti;
790 queue_kthread_work(&md->kworker, &tio->work);
791 BUG_ON(!irqs_disabled());
792 }
793}
794
795/*
796 * Fully initialize a .request_fn request-based queue.
797 */
798int dm_old_init_request_queue(struct mapped_device *md)
799{
800 /* Fully initialize the queue */
801 if (!blk_init_allocated_queue(md->queue, dm_old_request_fn, NULL))
802 return -EINVAL;
803
804 /* disable dm_old_request_fn's merge heuristic by default */
805 md->seq_rq_merge_deadline_usecs = 0;
806
807 dm_init_normal_md_queue(md);
808 blk_queue_softirq_done(md->queue, dm_softirq_done);
809 blk_queue_prep_rq(md->queue, dm_old_prep_fn);
810
811 /* Initialize the request-based DM worker thread */
812 init_kthread_worker(&md->kworker);
813 md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker,
814 "kdmwork-%s", dm_device_name(md));
815
816 elv_register_queue(md->queue);
817
818 return 0;
819}
820
821static int dm_mq_init_request(void *data, struct request *rq,
822 unsigned int hctx_idx, unsigned int request_idx,
823 unsigned int numa_node)
824{
825 struct mapped_device *md = data;
826 struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
827
828 /*
829 * Must initialize md member of tio, otherwise it won't
830 * be available in dm_mq_queue_rq.
831 */
832 tio->md = md;
833
834 if (md->init_tio_pdu) {
835 /* target-specific per-io data is immediately after the tio */
836 tio->info.ptr = tio + 1;
837 }
838
839 return 0;
840}
841
842static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
843 const struct blk_mq_queue_data *bd)
844{
845 struct request *rq = bd->rq;
846 struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
847 struct mapped_device *md = tio->md;
848 struct dm_target *ti = md->immutable_target;
849
850 if (unlikely(!ti)) {
851 int srcu_idx;
852 struct dm_table *map = dm_get_live_table(md, &srcu_idx);
853
854 ti = dm_table_find_target(map, 0);
855 dm_put_live_table(md, srcu_idx);
856 }
857
858 if (ti->type->busy && ti->type->busy(ti))
859 return BLK_MQ_RQ_QUEUE_BUSY;
860
861 dm_start_request(md, rq);
862
863 /* Init tio using md established in .init_request */
864 init_tio(tio, rq, md);
865
866 /*
867 * Establish tio->ti before calling map_request().
868 */
869 tio->ti = ti;
870
871 /* Direct call is fine since .queue_rq allows allocations */
872 if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) {
873 /* Undo dm_start_request() before requeuing */
874 rq_end_stats(md, rq);
875 rq_completed(md, rq_data_dir(rq), false);
876 return BLK_MQ_RQ_QUEUE_BUSY;
877 }
878
879 return BLK_MQ_RQ_QUEUE_OK;
880}
881
882static struct blk_mq_ops dm_mq_ops = {
883 .queue_rq = dm_mq_queue_rq,
884 .map_queue = blk_mq_map_queue,
885 .complete = dm_softirq_done,
886 .init_request = dm_mq_init_request,
887};
888
889int dm_mq_init_request_queue(struct mapped_device *md, struct dm_target *immutable_tgt)
890{
891 struct request_queue *q;
892 int err;
893
894 if (dm_get_md_type(md) == DM_TYPE_REQUEST_BASED) {
895 DMERR("request-based dm-mq may only be stacked on blk-mq device(s)");
896 return -EINVAL;
897 }
898
899 md->tag_set = kzalloc_node(sizeof(struct blk_mq_tag_set), GFP_KERNEL, md->numa_node_id);
900 if (!md->tag_set)
901 return -ENOMEM;
902
903 md->tag_set->ops = &dm_mq_ops;
904 md->tag_set->queue_depth = dm_get_blk_mq_queue_depth();
905 md->tag_set->numa_node = md->numa_node_id;
906 md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
907 md->tag_set->nr_hw_queues = dm_get_blk_mq_nr_hw_queues();
908 md->tag_set->driver_data = md;
909
910 md->tag_set->cmd_size = sizeof(struct dm_rq_target_io);
911 if (immutable_tgt && immutable_tgt->per_io_data_size) {
912 /* any target-specific per-io data is immediately after the tio */
913 md->tag_set->cmd_size += immutable_tgt->per_io_data_size;
914 md->init_tio_pdu = true;
915 }
916
917 err = blk_mq_alloc_tag_set(md->tag_set);
918 if (err)
919 goto out_kfree_tag_set;
920
921 q = blk_mq_init_allocated_queue(md->tag_set, md->queue);
922 if (IS_ERR(q)) {
923 err = PTR_ERR(q);
924 goto out_tag_set;
925 }
926 dm_init_md_queue(md);
927
928 /* backfill 'mq' sysfs registration normally done in blk_register_queue */
929 blk_mq_register_disk(md->disk);
930
931 return 0;
932
933out_tag_set:
934 blk_mq_free_tag_set(md->tag_set);
935out_kfree_tag_set:
936 kfree(md->tag_set);
937
938 return err;
939}
940
941void dm_mq_cleanup_mapped_device(struct mapped_device *md)
942{
943 if (md->tag_set) {
944 blk_mq_free_tag_set(md->tag_set);
945 kfree(md->tag_set);
946 }
947}
948
949module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR);
950MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools");
951
952module_param(use_blk_mq, bool, S_IRUGO | S_IWUSR);
953MODULE_PARM_DESC(use_blk_mq, "Use block multiqueue for request-based DM devices");
954
955module_param(dm_mq_nr_hw_queues, uint, S_IRUGO | S_IWUSR);
956MODULE_PARM_DESC(dm_mq_nr_hw_queues, "Number of hardware queues for request-based dm-mq devices");
957
958module_param(dm_mq_queue_depth, uint, S_IRUGO | S_IWUSR);
959MODULE_PARM_DESC(dm_mq_queue_depth, "Queue depth for request-based dm-mq devices");
diff --git a/drivers/md/dm-rq.h b/drivers/md/dm-rq.h
new file mode 100644
index 000000000000..1559f6486024
--- /dev/null
+++ b/drivers/md/dm-rq.h
@@ -0,0 +1,64 @@
1/*
2 * Internal header file for device mapper
3 *
4 * Copyright (C) 2016 Red Hat, Inc. All rights reserved.
5 *
6 * This file is released under the LGPL.
7 */
8
9#ifndef DM_RQ_INTERNAL_H
10#define DM_RQ_INTERNAL_H
11
12#include <linux/bio.h>
13#include <linux/kthread.h>
14
15#include "dm-stats.h"
16
17struct mapped_device;
18
19/*
20 * One of these is allocated per request.
21 */
22struct dm_rq_target_io {
23 struct mapped_device *md;
24 struct dm_target *ti;
25 struct request *orig, *clone;
26 struct kthread_work work;
27 int error;
28 union map_info info;
29 struct dm_stats_aux stats_aux;
30 unsigned long duration_jiffies;
31 unsigned n_sectors;
32};
33
34/*
35 * For request-based dm - the bio clones we allocate are embedded in these
36 * structs.
37 *
38 * We allocate these with bio_alloc_bioset, using the front_pad parameter when
39 * the bioset is created - this means the bio has to come at the end of the
40 * struct.
41 */
42struct dm_rq_clone_bio_info {
43 struct bio *orig;
44 struct dm_rq_target_io *tio;
45 struct bio clone;
46};
47
48bool dm_use_blk_mq_default(void);
49bool dm_use_blk_mq(struct mapped_device *md);
50
51int dm_old_init_request_queue(struct mapped_device *md);
52int dm_mq_init_request_queue(struct mapped_device *md, struct dm_target *immutable_tgt);
53void dm_mq_cleanup_mapped_device(struct mapped_device *md);
54
55void dm_start_queue(struct request_queue *q);
56void dm_stop_queue(struct request_queue *q);
57
58unsigned dm_get_reserved_rq_based_ios(void);
59
60ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf);
61ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
62 const char *buf, size_t count);
63
64#endif
diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c
index 4fba26cd6bdb..38b05f23b96c 100644
--- a/drivers/md/dm-stats.c
+++ b/drivers/md/dm-stats.c
@@ -10,7 +10,7 @@
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/device-mapper.h> 11#include <linux/device-mapper.h>
12 12
13#include "dm.h" 13#include "dm-core.h"
14#include "dm-stats.h" 14#include "dm-stats.h"
15 15
16#define DM_MSG_PREFIX "stats" 16#define DM_MSG_PREFIX "stats"
diff --git a/drivers/md/dm-sysfs.c b/drivers/md/dm-sysfs.c
index 7e818f5f1dc4..c209b8a19b84 100644
--- a/drivers/md/dm-sysfs.c
+++ b/drivers/md/dm-sysfs.c
@@ -6,7 +6,8 @@
6 6
7#include <linux/sysfs.h> 7#include <linux/sysfs.h>
8#include <linux/dm-ioctl.h> 8#include <linux/dm-ioctl.h>
9#include "dm.h" 9#include "dm-core.h"
10#include "dm-rq.h"
10 11
11struct dm_sysfs_attr { 12struct dm_sysfs_attr {
12 struct attribute attr; 13 struct attribute attr;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 626a5ec04466..a682d51111dd 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -5,7 +5,7 @@
5 * This file is released under the GPL. 5 * This file is released under the GPL.
6 */ 6 */
7 7
8#include "dm.h" 8#include "dm-core.h"
9 9
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/vmalloc.h> 11#include <linux/vmalloc.h>
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index a317dd884ba6..5c826b450aad 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -4,7 +4,7 @@
4 * This file is released under the GPL. 4 * This file is released under the GPL.
5 */ 5 */
6 6
7#include "dm.h" 7#include "dm-core.h"
8 8
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/init.h> 10#include <linux/init.h>
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index aba7ed9abb3a..8f22527134e9 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -5,13 +5,13 @@
5 * This file is released under the GPL. 5 * This file is released under the GPL.
6 */ 6 */
7 7
8#include "dm.h" 8#include "dm-core.h"
9#include "dm-rq.h"
9#include "dm-uevent.h" 10#include "dm-uevent.h"
10 11
11#include <linux/init.h> 12#include <linux/init.h>
12#include <linux/module.h> 13#include <linux/module.h>
13#include <linux/mutex.h> 14#include <linux/mutex.h>
14#include <linux/moduleparam.h>
15#include <linux/blkpg.h> 15#include <linux/blkpg.h>
16#include <linux/bio.h> 16#include <linux/bio.h>
17#include <linux/mempool.h> 17#include <linux/mempool.h>
@@ -20,14 +20,8 @@
20#include <linux/hdreg.h> 20#include <linux/hdreg.h>
21#include <linux/delay.h> 21#include <linux/delay.h>
22#include <linux/wait.h> 22#include <linux/wait.h>
23#include <linux/kthread.h>
24#include <linux/ktime.h>
25#include <linux/elevator.h> /* for rq_end_sector() */
26#include <linux/blk-mq.h>
27#include <linux/pr.h> 23#include <linux/pr.h>
28 24
29#include <trace/events/block.h>
30
31#define DM_MSG_PREFIX "core" 25#define DM_MSG_PREFIX "core"
32 26
33#ifdef CONFIG_PRINTK 27#ifdef CONFIG_PRINTK
@@ -63,7 +57,6 @@ static DECLARE_WORK(deferred_remove_work, do_deferred_remove);
63static struct workqueue_struct *deferred_remove_workqueue; 57static struct workqueue_struct *deferred_remove_workqueue;
64 58
65/* 59/*
66 * For bio-based dm.
67 * One of these is allocated per bio. 60 * One of these is allocated per bio.
68 */ 61 */
69struct dm_io { 62struct dm_io {
@@ -76,36 +69,6 @@ struct dm_io {
76 struct dm_stats_aux stats_aux; 69 struct dm_stats_aux stats_aux;
77}; 70};
78 71
79/*
80 * For request-based dm.
81 * One of these is allocated per request.
82 */
83struct dm_rq_target_io {
84 struct mapped_device *md;
85 struct dm_target *ti;
86 struct request *orig, *clone;
87 struct kthread_work work;
88 int error;
89 union map_info info;
90 struct dm_stats_aux stats_aux;
91 unsigned long duration_jiffies;
92 unsigned n_sectors;
93};
94
95/*
96 * For request-based dm - the bio clones we allocate are embedded in these
97 * structs.
98 *
99 * We allocate these with bio_alloc_bioset, using the front_pad parameter when
100 * the bioset is created - this means the bio has to come at the end of the
101 * struct.
102 */
103struct dm_rq_clone_bio_info {
104 struct bio *orig;
105 struct dm_rq_target_io *tio;
106 struct bio clone;
107};
108
109#define MINOR_ALLOCED ((void *)-1) 72#define MINOR_ALLOCED ((void *)-1)
110 73
111/* 74/*
@@ -120,130 +83,9 @@ struct dm_rq_clone_bio_info {
120#define DMF_DEFERRED_REMOVE 6 83#define DMF_DEFERRED_REMOVE 6
121#define DMF_SUSPENDED_INTERNALLY 7 84#define DMF_SUSPENDED_INTERNALLY 7
122 85
123/*
124 * Work processed by per-device workqueue.
125 */
126struct mapped_device {
127 struct srcu_struct io_barrier;
128 struct mutex suspend_lock;
129
130 /*
131 * The current mapping (struct dm_table *).
132 * Use dm_get_live_table{_fast} or take suspend_lock for
133 * dereference.
134 */
135 void __rcu *map;
136
137 struct list_head table_devices;
138 struct mutex table_devices_lock;
139
140 unsigned long flags;
141
142 struct request_queue *queue;
143 int numa_node_id;
144
145 unsigned type;
146 /* Protect queue and type against concurrent access. */
147 struct mutex type_lock;
148
149 atomic_t holders;
150 atomic_t open_count;
151
152 struct dm_target *immutable_target;
153 struct target_type *immutable_target_type;
154
155 struct gendisk *disk;
156 char name[16];
157
158 void *interface_ptr;
159
160 /*
161 * A list of ios that arrived while we were suspended.
162 */
163 atomic_t pending[2];
164 wait_queue_head_t wait;
165 struct work_struct work;
166 spinlock_t deferred_lock;
167 struct bio_list deferred;
168
169 /*
170 * Event handling.
171 */
172 wait_queue_head_t eventq;
173 atomic_t event_nr;
174 atomic_t uevent_seq;
175 struct list_head uevent_list;
176 spinlock_t uevent_lock; /* Protect access to uevent_list */
177
178 /* the number of internal suspends */
179 unsigned internal_suspend_count;
180
181 /*
182 * Processing queue (flush)
183 */
184 struct workqueue_struct *wq;
185
186 /*
187 * io objects are allocated from here.
188 */
189 mempool_t *io_pool;
190 mempool_t *rq_pool;
191
192 struct bio_set *bs;
193
194 /*
195 * freeze/thaw support require holding onto a super block
196 */
197 struct super_block *frozen_sb;
198
199 /* forced geometry settings */
200 struct hd_geometry geometry;
201
202 struct block_device *bdev;
203
204 /* kobject and completion */
205 struct dm_kobject_holder kobj_holder;
206
207 /* zero-length flush that will be cloned and submitted to targets */
208 struct bio flush_bio;
209
210 struct dm_stats stats;
211
212 struct kthread_worker kworker;
213 struct task_struct *kworker_task;
214
215 /* for request-based merge heuristic in dm_request_fn() */
216 unsigned seq_rq_merge_deadline_usecs;
217 int last_rq_rw;
218 sector_t last_rq_pos;
219 ktime_t last_rq_start_time;
220
221 /* for blk-mq request-based DM support */
222 struct blk_mq_tag_set *tag_set;
223 bool use_blk_mq:1;
224 bool init_tio_pdu:1;
225};
226
227#ifdef CONFIG_DM_MQ_DEFAULT
228static bool use_blk_mq = true;
229#else
230static bool use_blk_mq = false;
231#endif
232
233#define DM_MQ_NR_HW_QUEUES 1
234#define DM_MQ_QUEUE_DEPTH 2048
235#define DM_NUMA_NODE NUMA_NO_NODE 86#define DM_NUMA_NODE NUMA_NO_NODE
236
237static unsigned dm_mq_nr_hw_queues = DM_MQ_NR_HW_QUEUES;
238static unsigned dm_mq_queue_depth = DM_MQ_QUEUE_DEPTH;
239static int dm_numa_node = DM_NUMA_NODE; 87static int dm_numa_node = DM_NUMA_NODE;
240 88
241bool dm_use_blk_mq(struct mapped_device *md)
242{
243 return md->use_blk_mq;
244}
245EXPORT_SYMBOL_GPL(dm_use_blk_mq);
246
247/* 89/*
248 * For mempools pre-allocation at the table loading time. 90 * For mempools pre-allocation at the table loading time.
249 */ 91 */
@@ -259,9 +101,6 @@ struct table_device {
259 struct dm_dev dm_dev; 101 struct dm_dev dm_dev;
260}; 102};
261 103
262#define RESERVED_BIO_BASED_IOS 16
263#define RESERVED_REQUEST_BASED_IOS 256
264#define RESERVED_MAX_IOS 1024
265static struct kmem_cache *_io_cache; 104static struct kmem_cache *_io_cache;
266static struct kmem_cache *_rq_tio_cache; 105static struct kmem_cache *_rq_tio_cache;
267static struct kmem_cache *_rq_cache; 106static struct kmem_cache *_rq_cache;
@@ -269,13 +108,9 @@ static struct kmem_cache *_rq_cache;
269/* 108/*
270 * Bio-based DM's mempools' reserved IOs set by the user. 109 * Bio-based DM's mempools' reserved IOs set by the user.
271 */ 110 */
111#define RESERVED_BIO_BASED_IOS 16
272static unsigned reserved_bio_based_ios = RESERVED_BIO_BASED_IOS; 112static unsigned reserved_bio_based_ios = RESERVED_BIO_BASED_IOS;
273 113
274/*
275 * Request-based DM's mempools' reserved IOs set by the user.
276 */
277static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
278
279static int __dm_get_module_param_int(int *module_param, int min, int max) 114static int __dm_get_module_param_int(int *module_param, int min, int max)
280{ 115{
281 int param = ACCESS_ONCE(*module_param); 116 int param = ACCESS_ONCE(*module_param);
@@ -297,8 +132,8 @@ static int __dm_get_module_param_int(int *module_param, int min, int max)
297 return param; 132 return param;
298} 133}
299 134
300static unsigned __dm_get_module_param(unsigned *module_param, 135unsigned __dm_get_module_param(unsigned *module_param,
301 unsigned def, unsigned max) 136 unsigned def, unsigned max)
302{ 137{
303 unsigned param = ACCESS_ONCE(*module_param); 138 unsigned param = ACCESS_ONCE(*module_param);
304 unsigned modified_param = 0; 139 unsigned modified_param = 0;
@@ -319,28 +154,10 @@ static unsigned __dm_get_module_param(unsigned *module_param,
319unsigned dm_get_reserved_bio_based_ios(void) 154unsigned dm_get_reserved_bio_based_ios(void)
320{ 155{
321 return __dm_get_module_param(&reserved_bio_based_ios, 156 return __dm_get_module_param(&reserved_bio_based_ios,
322 RESERVED_BIO_BASED_IOS, RESERVED_MAX_IOS); 157 RESERVED_BIO_BASED_IOS, DM_RESERVED_MAX_IOS);
323} 158}
324EXPORT_SYMBOL_GPL(dm_get_reserved_bio_based_ios); 159EXPORT_SYMBOL_GPL(dm_get_reserved_bio_based_ios);
325 160
326unsigned dm_get_reserved_rq_based_ios(void)
327{
328 return __dm_get_module_param(&reserved_rq_based_ios,
329 RESERVED_REQUEST_BASED_IOS, RESERVED_MAX_IOS);
330}
331EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios);
332
333static unsigned dm_get_blk_mq_nr_hw_queues(void)
334{
335 return __dm_get_module_param(&dm_mq_nr_hw_queues, 1, 32);
336}
337
338static unsigned dm_get_blk_mq_queue_depth(void)
339{
340 return __dm_get_module_param(&dm_mq_queue_depth,
341 DM_MQ_QUEUE_DEPTH, BLK_MQ_MAX_DEPTH);
342}
343
344static unsigned dm_get_numa_node(void) 161static unsigned dm_get_numa_node(void)
345{ 162{
346 return __dm_get_module_param_int(&dm_numa_node, 163 return __dm_get_module_param_int(&dm_numa_node,
@@ -679,29 +496,7 @@ static void free_tio(struct dm_target_io *tio)
679 bio_put(&tio->clone); 496 bio_put(&tio->clone);
680} 497}
681 498
682static struct dm_rq_target_io *alloc_old_rq_tio(struct mapped_device *md, 499int md_in_flight(struct mapped_device *md)
683 gfp_t gfp_mask)
684{
685 return mempool_alloc(md->io_pool, gfp_mask);
686}
687
688static void free_old_rq_tio(struct dm_rq_target_io *tio)
689{
690 mempool_free(tio, tio->md->io_pool);
691}
692
693static struct request *alloc_old_clone_request(struct mapped_device *md,
694 gfp_t gfp_mask)
695{
696 return mempool_alloc(md->rq_pool, gfp_mask);
697}
698
699static void free_old_clone_request(struct mapped_device *md, struct request *rq)
700{
701 mempool_free(rq, md->rq_pool);
702}
703
704static int md_in_flight(struct mapped_device *md)
705{ 500{
706 return atomic_read(&md->pending[READ]) + 501 return atomic_read(&md->pending[READ]) +
707 atomic_read(&md->pending[WRITE]); 502 atomic_read(&md->pending[WRITE]);
@@ -1019,7 +814,7 @@ static void dec_pending(struct dm_io *io, int error)
1019 } 814 }
1020} 815}
1021 816
1022static void disable_write_same(struct mapped_device *md) 817void disable_write_same(struct mapped_device *md)
1023{ 818{
1024 struct queue_limits *limits = dm_get_queue_limits(md); 819 struct queue_limits *limits = dm_get_queue_limits(md);
1025 820
@@ -1062,371 +857,6 @@ static void clone_endio(struct bio *bio)
1062} 857}
1063 858
1064/* 859/*
1065 * Partial completion handling for request-based dm
1066 */
1067static void end_clone_bio(struct bio *clone)
1068{
1069 struct dm_rq_clone_bio_info *info =
1070 container_of(clone, struct dm_rq_clone_bio_info, clone);
1071 struct dm_rq_target_io *tio = info->tio;
1072 struct bio *bio = info->orig;
1073 unsigned int nr_bytes = info->orig->bi_iter.bi_size;
1074 int error = clone->bi_error;
1075
1076 bio_put(clone);
1077
1078 if (tio->error)
1079 /*
1080 * An error has already been detected on the request.
1081 * Once error occurred, just let clone->end_io() handle
1082 * the remainder.
1083 */
1084 return;
1085 else if (error) {
1086 /*
1087 * Don't notice the error to the upper layer yet.
1088 * The error handling decision is made by the target driver,
1089 * when the request is completed.
1090 */
1091 tio->error = error;
1092 return;
1093 }
1094
1095 /*
1096 * I/O for the bio successfully completed.
1097 * Notice the data completion to the upper layer.
1098 */
1099
1100 /*
1101 * bios are processed from the head of the list.
1102 * So the completing bio should always be rq->bio.
1103 * If it's not, something wrong is happening.
1104 */
1105 if (tio->orig->bio != bio)
1106 DMERR("bio completion is going in the middle of the request");
1107
1108 /*
1109 * Update the original request.
1110 * Do not use blk_end_request() here, because it may complete
1111 * the original request before the clone, and break the ordering.
1112 */
1113 blk_update_request(tio->orig, 0, nr_bytes);
1114}
1115
1116static struct dm_rq_target_io *tio_from_request(struct request *rq)
1117{
1118 return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special);
1119}
1120
1121static void rq_end_stats(struct mapped_device *md, struct request *orig)
1122{
1123 if (unlikely(dm_stats_used(&md->stats))) {
1124 struct dm_rq_target_io *tio = tio_from_request(orig);
1125 tio->duration_jiffies = jiffies - tio->duration_jiffies;
1126 dm_stats_account_io(&md->stats, rq_data_dir(orig),
1127 blk_rq_pos(orig), tio->n_sectors, true,
1128 tio->duration_jiffies, &tio->stats_aux);
1129 }
1130}
1131
1132/*
1133 * Don't touch any member of the md after calling this function because
1134 * the md may be freed in dm_put() at the end of this function.
1135 * Or do dm_get() before calling this function and dm_put() later.
1136 */
1137static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
1138{
1139 atomic_dec(&md->pending[rw]);
1140
1141 /* nudge anyone waiting on suspend queue */
1142 if (!md_in_flight(md))
1143 wake_up(&md->wait);
1144
1145 /*
1146 * Run this off this callpath, as drivers could invoke end_io while
1147 * inside their request_fn (and holding the queue lock). Calling
1148 * back into ->request_fn() could deadlock attempting to grab the
1149 * queue lock again.
1150 */
1151 if (!md->queue->mq_ops && run_queue)
1152 blk_run_queue_async(md->queue);
1153
1154 /*
1155 * dm_put() must be at the end of this function. See the comment above
1156 */
1157 dm_put(md);
1158}
1159
1160static void free_rq_clone(struct request *clone)
1161{
1162 struct dm_rq_target_io *tio = clone->end_io_data;
1163 struct mapped_device *md = tio->md;
1164
1165 blk_rq_unprep_clone(clone);
1166
1167 if (md->type == DM_TYPE_MQ_REQUEST_BASED)
1168 /* stacked on blk-mq queue(s) */
1169 tio->ti->type->release_clone_rq(clone);
1170 else if (!md->queue->mq_ops)
1171 /* request_fn queue stacked on request_fn queue(s) */
1172 free_old_clone_request(md, clone);
1173
1174 if (!md->queue->mq_ops)
1175 free_old_rq_tio(tio);
1176}
1177
1178/*
1179 * Complete the clone and the original request.
1180 * Must be called without clone's queue lock held,
1181 * see end_clone_request() for more details.
1182 */
1183static void dm_end_request(struct request *clone, int error)
1184{
1185 int rw = rq_data_dir(clone);
1186 struct dm_rq_target_io *tio = clone->end_io_data;
1187 struct mapped_device *md = tio->md;
1188 struct request *rq = tio->orig;
1189
1190 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
1191 rq->errors = clone->errors;
1192 rq->resid_len = clone->resid_len;
1193
1194 if (rq->sense)
1195 /*
1196 * We are using the sense buffer of the original
1197 * request.
1198 * So setting the length of the sense data is enough.
1199 */
1200 rq->sense_len = clone->sense_len;
1201 }
1202
1203 free_rq_clone(clone);
1204 rq_end_stats(md, rq);
1205 if (!rq->q->mq_ops)
1206 blk_end_request_all(rq, error);
1207 else
1208 blk_mq_end_request(rq, error);
1209 rq_completed(md, rw, true);
1210}
1211
1212static void dm_unprep_request(struct request *rq)
1213{
1214 struct dm_rq_target_io *tio = tio_from_request(rq);
1215 struct request *clone = tio->clone;
1216
1217 if (!rq->q->mq_ops) {
1218 rq->special = NULL;
1219 rq->cmd_flags &= ~REQ_DONTPREP;
1220 }
1221
1222 if (clone)
1223 free_rq_clone(clone);
1224 else if (!tio->md->queue->mq_ops)
1225 free_old_rq_tio(tio);
1226}
1227
1228/*
1229 * Requeue the original request of a clone.
1230 */
1231static void dm_old_requeue_request(struct request *rq)
1232{
1233 struct request_queue *q = rq->q;
1234 unsigned long flags;
1235
1236 spin_lock_irqsave(q->queue_lock, flags);
1237 blk_requeue_request(q, rq);
1238 blk_run_queue_async(q);
1239 spin_unlock_irqrestore(q->queue_lock, flags);
1240}
1241
1242static void dm_mq_requeue_request(struct request *rq)
1243{
1244 struct request_queue *q = rq->q;
1245 unsigned long flags;
1246
1247 blk_mq_requeue_request(rq);
1248 spin_lock_irqsave(q->queue_lock, flags);
1249 if (!blk_queue_stopped(q))
1250 blk_mq_kick_requeue_list(q);
1251 spin_unlock_irqrestore(q->queue_lock, flags);
1252}
1253
1254static void dm_requeue_original_request(struct mapped_device *md,
1255 struct request *rq)
1256{
1257 int rw = rq_data_dir(rq);
1258
1259 rq_end_stats(md, rq);
1260 dm_unprep_request(rq);
1261
1262 if (!rq->q->mq_ops)
1263 dm_old_requeue_request(rq);
1264 else
1265 dm_mq_requeue_request(rq);
1266
1267 rq_completed(md, rw, false);
1268}
1269
1270static void dm_old_stop_queue(struct request_queue *q)
1271{
1272 unsigned long flags;
1273
1274 spin_lock_irqsave(q->queue_lock, flags);
1275 if (blk_queue_stopped(q)) {
1276 spin_unlock_irqrestore(q->queue_lock, flags);
1277 return;
1278 }
1279
1280 blk_stop_queue(q);
1281 spin_unlock_irqrestore(q->queue_lock, flags);
1282}
1283
1284static void dm_stop_queue(struct request_queue *q)
1285{
1286 if (!q->mq_ops)
1287 dm_old_stop_queue(q);
1288 else
1289 blk_mq_stop_hw_queues(q);
1290}
1291
1292static void dm_old_start_queue(struct request_queue *q)
1293{
1294 unsigned long flags;
1295
1296 spin_lock_irqsave(q->queue_lock, flags);
1297 if (blk_queue_stopped(q))
1298 blk_start_queue(q);
1299 spin_unlock_irqrestore(q->queue_lock, flags);
1300}
1301
1302static void dm_start_queue(struct request_queue *q)
1303{
1304 if (!q->mq_ops)
1305 dm_old_start_queue(q);
1306 else {
1307 blk_mq_start_stopped_hw_queues(q, true);
1308 blk_mq_kick_requeue_list(q);
1309 }
1310}
1311
1312static void dm_done(struct request *clone, int error, bool mapped)
1313{
1314 int r = error;
1315 struct dm_rq_target_io *tio = clone->end_io_data;
1316 dm_request_endio_fn rq_end_io = NULL;
1317
1318 if (tio->ti) {
1319 rq_end_io = tio->ti->type->rq_end_io;
1320
1321 if (mapped && rq_end_io)
1322 r = rq_end_io(tio->ti, clone, error, &tio->info);
1323 }
1324
1325 if (unlikely(r == -EREMOTEIO && (req_op(clone) == REQ_OP_WRITE_SAME) &&
1326 !clone->q->limits.max_write_same_sectors))
1327 disable_write_same(tio->md);
1328
1329 if (r <= 0)
1330 /* The target wants to complete the I/O */
1331 dm_end_request(clone, r);
1332 else if (r == DM_ENDIO_INCOMPLETE)
1333 /* The target will handle the I/O */
1334 return;
1335 else if (r == DM_ENDIO_REQUEUE)
1336 /* The target wants to requeue the I/O */
1337 dm_requeue_original_request(tio->md, tio->orig);
1338 else {
1339 DMWARN("unimplemented target endio return value: %d", r);
1340 BUG();
1341 }
1342}
1343
1344/*
1345 * Request completion handler for request-based dm
1346 */
1347static void dm_softirq_done(struct request *rq)
1348{
1349 bool mapped = true;
1350 struct dm_rq_target_io *tio = tio_from_request(rq);
1351 struct request *clone = tio->clone;
1352 int rw;
1353
1354 if (!clone) {
1355 rq_end_stats(tio->md, rq);
1356 rw = rq_data_dir(rq);
1357 if (!rq->q->mq_ops) {
1358 blk_end_request_all(rq, tio->error);
1359 rq_completed(tio->md, rw, false);
1360 free_old_rq_tio(tio);
1361 } else {
1362 blk_mq_end_request(rq, tio->error);
1363 rq_completed(tio->md, rw, false);
1364 }
1365 return;
1366 }
1367
1368 if (rq->cmd_flags & REQ_FAILED)
1369 mapped = false;
1370
1371 dm_done(clone, tio->error, mapped);
1372}
1373
1374/*
1375 * Complete the clone and the original request with the error status
1376 * through softirq context.
1377 */
1378static void dm_complete_request(struct request *rq, int error)
1379{
1380 struct dm_rq_target_io *tio = tio_from_request(rq);
1381
1382 tio->error = error;
1383 if (!rq->q->mq_ops)
1384 blk_complete_request(rq);
1385 else
1386 blk_mq_complete_request(rq, error);
1387}
1388
1389/*
1390 * Complete the not-mapped clone and the original request with the error status
1391 * through softirq context.
1392 * Target's rq_end_io() function isn't called.
1393 * This may be used when the target's map_rq() or clone_and_map_rq() functions fail.
1394 */
1395static void dm_kill_unmapped_request(struct request *rq, int error)
1396{
1397 rq->cmd_flags |= REQ_FAILED;
1398 dm_complete_request(rq, error);
1399}
1400
1401/*
1402 * Called with the clone's queue lock held (in the case of .request_fn)
1403 */
1404static void end_clone_request(struct request *clone, int error)
1405{
1406 struct dm_rq_target_io *tio = clone->end_io_data;
1407
1408 if (!clone->q->mq_ops) {
1409 /*
1410 * For just cleaning up the information of the queue in which
1411 * the clone was dispatched.
1412 * The clone is *NOT* freed actually here because it is alloced
1413 * from dm own mempool (REQ_ALLOCED isn't set).
1414 */
1415 __blk_put_request(clone->q, clone);
1416 }
1417
1418 /*
1419 * Actual request completion is done in a softirq context which doesn't
1420 * hold the clone's queue lock. Otherwise, deadlock could occur because:
1421 * - another request may be submitted by the upper level driver
1422 * of the stacking during the completion
1423 * - the submission which requires queue lock may be done
1424 * against this clone's queue
1425 */
1426 dm_complete_request(tio->orig, error);
1427}
1428
1429/*
1430 * Return maximum size of I/O possible at the supplied sector up to the current 860 * Return maximum size of I/O possible at the supplied sector up to the current
1431 * target boundary. 861 * target boundary.
1432 */ 862 */
@@ -1845,353 +1275,6 @@ static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
1845 return BLK_QC_T_NONE; 1275 return BLK_QC_T_NONE;
1846} 1276}
1847 1277
1848int dm_request_based(struct mapped_device *md)
1849{
1850 return blk_queue_stackable(md->queue);
1851}
1852
1853static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
1854{
1855 int r;
1856
1857 if (blk_queue_io_stat(clone->q))
1858 clone->cmd_flags |= REQ_IO_STAT;
1859
1860 clone->start_time = jiffies;
1861 r = blk_insert_cloned_request(clone->q, clone);
1862 if (r)
1863 /* must complete clone in terms of original request */
1864 dm_complete_request(rq, r);
1865}
1866
1867static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
1868 void *data)
1869{
1870 struct dm_rq_target_io *tio = data;
1871 struct dm_rq_clone_bio_info *info =
1872 container_of(bio, struct dm_rq_clone_bio_info, clone);
1873
1874 info->orig = bio_orig;
1875 info->tio = tio;
1876 bio->bi_end_io = end_clone_bio;
1877
1878 return 0;
1879}
1880
1881static int setup_clone(struct request *clone, struct request *rq,
1882 struct dm_rq_target_io *tio, gfp_t gfp_mask)
1883{
1884 int r;
1885
1886 r = blk_rq_prep_clone(clone, rq, tio->md->bs, gfp_mask,
1887 dm_rq_bio_constructor, tio);
1888 if (r)
1889 return r;
1890
1891 clone->cmd = rq->cmd;
1892 clone->cmd_len = rq->cmd_len;
1893 clone->sense = rq->sense;
1894 clone->end_io = end_clone_request;
1895 clone->end_io_data = tio;
1896
1897 tio->clone = clone;
1898
1899 return 0;
1900}
1901
1902static struct request *clone_old_rq(struct request *rq, struct mapped_device *md,
1903 struct dm_rq_target_io *tio, gfp_t gfp_mask)
1904{
1905 /*
1906 * Create clone for use with .request_fn request_queue
1907 */
1908 struct request *clone;
1909
1910 clone = alloc_old_clone_request(md, gfp_mask);
1911 if (!clone)
1912 return NULL;
1913
1914 blk_rq_init(NULL, clone);
1915 if (setup_clone(clone, rq, tio, gfp_mask)) {
1916 /* -ENOMEM */
1917 free_old_clone_request(md, clone);
1918 return NULL;
1919 }
1920
1921 return clone;
1922}
1923
1924static void map_tio_request(struct kthread_work *work);
1925
1926static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
1927 struct mapped_device *md)
1928{
1929 tio->md = md;
1930 tio->ti = NULL;
1931 tio->clone = NULL;
1932 tio->orig = rq;
1933 tio->error = 0;
1934 /*
1935 * Avoid initializing info for blk-mq; it passes
1936 * target-specific data through info.ptr
1937 * (see: dm_mq_init_request)
1938 */
1939 if (!md->init_tio_pdu)
1940 memset(&tio->info, 0, sizeof(tio->info));
1941 if (md->kworker_task)
1942 init_kthread_work(&tio->work, map_tio_request);
1943}
1944
1945static struct dm_rq_target_io *dm_old_prep_tio(struct request *rq,
1946 struct mapped_device *md,
1947 gfp_t gfp_mask)
1948{
1949 struct dm_rq_target_io *tio;
1950 int srcu_idx;
1951 struct dm_table *table;
1952
1953 tio = alloc_old_rq_tio(md, gfp_mask);
1954 if (!tio)
1955 return NULL;
1956
1957 init_tio(tio, rq, md);
1958
1959 table = dm_get_live_table(md, &srcu_idx);
1960 /*
1961 * Must clone a request if this .request_fn DM device
1962 * is stacked on .request_fn device(s).
1963 */
1964 if (!dm_table_mq_request_based(table)) {
1965 if (!clone_old_rq(rq, md, tio, gfp_mask)) {
1966 dm_put_live_table(md, srcu_idx);
1967 free_old_rq_tio(tio);
1968 return NULL;
1969 }
1970 }
1971 dm_put_live_table(md, srcu_idx);
1972
1973 return tio;
1974}
1975
1976/*
1977 * Called with the queue lock held.
1978 */
1979static int dm_old_prep_fn(struct request_queue *q, struct request *rq)
1980{
1981 struct mapped_device *md = q->queuedata;
1982 struct dm_rq_target_io *tio;
1983
1984 if (unlikely(rq->special)) {
1985 DMWARN("Already has something in rq->special.");
1986 return BLKPREP_KILL;
1987 }
1988
1989 tio = dm_old_prep_tio(rq, md, GFP_ATOMIC);
1990 if (!tio)
1991 return BLKPREP_DEFER;
1992
1993 rq->special = tio;
1994 rq->cmd_flags |= REQ_DONTPREP;
1995
1996 return BLKPREP_OK;
1997}
1998
1999/*
2000 * Returns:
2001 * 0 : the request has been processed
2002 * DM_MAPIO_REQUEUE : the original request needs to be requeued
2003 * < 0 : the request was completed due to failure
2004 */
2005static int map_request(struct dm_rq_target_io *tio, struct request *rq,
2006 struct mapped_device *md)
2007{
2008 int r;
2009 struct dm_target *ti = tio->ti;
2010 struct request *clone = NULL;
2011
2012 if (tio->clone) {
2013 clone = tio->clone;
2014 r = ti->type->map_rq(ti, clone, &tio->info);
2015 } else {
2016 r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
2017 if (r < 0) {
2018 /* The target wants to complete the I/O */
2019 dm_kill_unmapped_request(rq, r);
2020 return r;
2021 }
2022 if (r != DM_MAPIO_REMAPPED)
2023 return r;
2024 if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
2025 /* -ENOMEM */
2026 ti->type->release_clone_rq(clone);
2027 return DM_MAPIO_REQUEUE;
2028 }
2029 }
2030
2031 switch (r) {
2032 case DM_MAPIO_SUBMITTED:
2033 /* The target has taken the I/O to submit by itself later */
2034 break;
2035 case DM_MAPIO_REMAPPED:
2036 /* The target has remapped the I/O so dispatch it */
2037 trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
2038 blk_rq_pos(rq));
2039 dm_dispatch_clone_request(clone, rq);
2040 break;
2041 case DM_MAPIO_REQUEUE:
2042 /* The target wants to requeue the I/O */
2043 dm_requeue_original_request(md, tio->orig);
2044 break;
2045 default:
2046 if (r > 0) {
2047 DMWARN("unimplemented target map return value: %d", r);
2048 BUG();
2049 }
2050
2051 /* The target wants to complete the I/O */
2052 dm_kill_unmapped_request(rq, r);
2053 return r;
2054 }
2055
2056 return 0;
2057}
2058
2059static void map_tio_request(struct kthread_work *work)
2060{
2061 struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work);
2062 struct request *rq = tio->orig;
2063 struct mapped_device *md = tio->md;
2064
2065 if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
2066 dm_requeue_original_request(md, rq);
2067}
2068
2069static void dm_start_request(struct mapped_device *md, struct request *orig)
2070{
2071 if (!orig->q->mq_ops)
2072 blk_start_request(orig);
2073 else
2074 blk_mq_start_request(orig);
2075 atomic_inc(&md->pending[rq_data_dir(orig)]);
2076
2077 if (md->seq_rq_merge_deadline_usecs) {
2078 md->last_rq_pos = rq_end_sector(orig);
2079 md->last_rq_rw = rq_data_dir(orig);
2080 md->last_rq_start_time = ktime_get();
2081 }
2082
2083 if (unlikely(dm_stats_used(&md->stats))) {
2084 struct dm_rq_target_io *tio = tio_from_request(orig);
2085 tio->duration_jiffies = jiffies;
2086 tio->n_sectors = blk_rq_sectors(orig);
2087 dm_stats_account_io(&md->stats, rq_data_dir(orig),
2088 blk_rq_pos(orig), tio->n_sectors, false, 0,
2089 &tio->stats_aux);
2090 }
2091
2092 /*
2093 * Hold the md reference here for the in-flight I/O.
2094 * We can't rely on the reference count by device opener,
2095 * because the device may be closed during the request completion
2096 * when all bios are completed.
2097 * See the comment in rq_completed() too.
2098 */
2099 dm_get(md);
2100}
2101
2102#define MAX_SEQ_RQ_MERGE_DEADLINE_USECS 100000
2103
2104ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf)
2105{
2106 return sprintf(buf, "%u\n", md->seq_rq_merge_deadline_usecs);
2107}
2108
2109ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
2110 const char *buf, size_t count)
2111{
2112 unsigned deadline;
2113
2114 if (!dm_request_based(md) || md->use_blk_mq)
2115 return count;
2116
2117 if (kstrtouint(buf, 10, &deadline))
2118 return -EINVAL;
2119
2120 if (deadline > MAX_SEQ_RQ_MERGE_DEADLINE_USECS)
2121 deadline = MAX_SEQ_RQ_MERGE_DEADLINE_USECS;
2122
2123 md->seq_rq_merge_deadline_usecs = deadline;
2124
2125 return count;
2126}
2127
2128static bool dm_request_peeked_before_merge_deadline(struct mapped_device *md)
2129{
2130 ktime_t kt_deadline;
2131
2132 if (!md->seq_rq_merge_deadline_usecs)
2133 return false;
2134
2135 kt_deadline = ns_to_ktime((u64)md->seq_rq_merge_deadline_usecs * NSEC_PER_USEC);
2136 kt_deadline = ktime_add_safe(md->last_rq_start_time, kt_deadline);
2137
2138 return !ktime_after(ktime_get(), kt_deadline);
2139}
2140
2141/*
2142 * q->request_fn for request-based dm.
2143 * Called with the queue lock held.
2144 */
2145static void dm_request_fn(struct request_queue *q)
2146{
2147 struct mapped_device *md = q->queuedata;
2148 struct dm_target *ti = md->immutable_target;
2149 struct request *rq;
2150 struct dm_rq_target_io *tio;
2151 sector_t pos = 0;
2152
2153 if (unlikely(!ti)) {
2154 int srcu_idx;
2155 struct dm_table *map = dm_get_live_table(md, &srcu_idx);
2156
2157 ti = dm_table_find_target(map, pos);
2158 dm_put_live_table(md, srcu_idx);
2159 }
2160
2161 /*
2162 * For suspend, check blk_queue_stopped() and increment
2163 * ->pending within a single queue_lock not to increment the
2164 * number of in-flight I/Os after the queue is stopped in
2165 * dm_suspend().
2166 */
2167 while (!blk_queue_stopped(q)) {
2168 rq = blk_peek_request(q);
2169 if (!rq)
2170 return;
2171
2172 /* always use block 0 to find the target for flushes for now */
2173 pos = 0;
2174 if (req_op(rq) != REQ_OP_FLUSH)
2175 pos = blk_rq_pos(rq);
2176
2177 if ((dm_request_peeked_before_merge_deadline(md) &&
2178 md_in_flight(md) && rq->bio && rq->bio->bi_vcnt == 1 &&
2179 md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq)) ||
2180 (ti->type->busy && ti->type->busy(ti))) {
2181 blk_delay_queue(q, HZ / 100);
2182 return;
2183 }
2184
2185 dm_start_request(md, rq);
2186
2187 tio = tio_from_request(rq);
2188 /* Establish tio->ti before queuing work (map_tio_request) */
2189 tio->ti = ti;
2190 queue_kthread_work(&md->kworker, &tio->work);
2191 BUG_ON(!irqs_disabled());
2192 }
2193}
2194
2195static int dm_any_congested(void *congested_data, int bdi_bits) 1278static int dm_any_congested(void *congested_data, int bdi_bits)
2196{ 1279{
2197 int r = bdi_bits; 1280 int r = bdi_bits;
@@ -2269,7 +1352,7 @@ static const struct block_device_operations dm_blk_dops;
2269 1352
2270static void dm_wq_work(struct work_struct *work); 1353static void dm_wq_work(struct work_struct *work);
2271 1354
2272static void dm_init_md_queue(struct mapped_device *md) 1355void dm_init_md_queue(struct mapped_device *md)
2273{ 1356{
2274 /* 1357 /*
2275 * Request-based dm devices cannot be stacked on top of bio-based dm 1358 * Request-based dm devices cannot be stacked on top of bio-based dm
@@ -2290,7 +1373,7 @@ static void dm_init_md_queue(struct mapped_device *md)
2290 md->queue->backing_dev_info.congested_data = md; 1373 md->queue->backing_dev_info.congested_data = md;
2291} 1374}
2292 1375
2293static void dm_init_normal_md_queue(struct mapped_device *md) 1376void dm_init_normal_md_queue(struct mapped_device *md)
2294{ 1377{
2295 md->use_blk_mq = false; 1378 md->use_blk_mq = false;
2296 dm_init_md_queue(md); 1379 dm_init_md_queue(md);
@@ -2330,6 +1413,8 @@ static void cleanup_mapped_device(struct mapped_device *md)
2330 bdput(md->bdev); 1413 bdput(md->bdev);
2331 md->bdev = NULL; 1414 md->bdev = NULL;
2332 } 1415 }
1416
1417 dm_mq_cleanup_mapped_device(md);
2333} 1418}
2334 1419
2335/* 1420/*
@@ -2363,7 +1448,7 @@ static struct mapped_device *alloc_dev(int minor)
2363 goto bad_io_barrier; 1448 goto bad_io_barrier;
2364 1449
2365 md->numa_node_id = numa_node_id; 1450 md->numa_node_id = numa_node_id;
2366 md->use_blk_mq = use_blk_mq; 1451 md->use_blk_mq = dm_use_blk_mq_default();
2367 md->init_tio_pdu = false; 1452 md->init_tio_pdu = false;
2368 md->type = DM_TYPE_NONE; 1453 md->type = DM_TYPE_NONE;
2369 mutex_init(&md->suspend_lock); 1454 mutex_init(&md->suspend_lock);
@@ -2448,10 +1533,6 @@ static void free_dev(struct mapped_device *md)
2448 unlock_fs(md); 1533 unlock_fs(md);
2449 1534
2450 cleanup_mapped_device(md); 1535 cleanup_mapped_device(md);
2451 if (md->tag_set) {
2452 blk_mq_free_tag_set(md->tag_set);
2453 kfree(md->tag_set);
2454 }
2455 1536
2456 free_table_devices(&md->table_devices); 1537 free_table_devices(&md->table_devices);
2457 dm_stats_cleanup(&md->stats); 1538 dm_stats_cleanup(&md->stats);
@@ -2657,159 +1738,6 @@ struct queue_limits *dm_get_queue_limits(struct mapped_device *md)
2657} 1738}
2658EXPORT_SYMBOL_GPL(dm_get_queue_limits); 1739EXPORT_SYMBOL_GPL(dm_get_queue_limits);
2659 1740
2660static void dm_old_init_rq_based_worker_thread(struct mapped_device *md)
2661{
2662 /* Initialize the request-based DM worker thread */
2663 init_kthread_worker(&md->kworker);
2664 md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker,
2665 "kdmwork-%s", dm_device_name(md));
2666}
2667
2668/*
2669 * Fully initialize a .request_fn request-based queue.
2670 */
2671static int dm_old_init_request_queue(struct mapped_device *md)
2672{
2673 /* Fully initialize the queue */
2674 if (!blk_init_allocated_queue(md->queue, dm_request_fn, NULL))
2675 return -EINVAL;
2676
2677 /* disable dm_request_fn's merge heuristic by default */
2678 md->seq_rq_merge_deadline_usecs = 0;
2679
2680 dm_init_normal_md_queue(md);
2681 blk_queue_softirq_done(md->queue, dm_softirq_done);
2682 blk_queue_prep_rq(md->queue, dm_old_prep_fn);
2683
2684 dm_old_init_rq_based_worker_thread(md);
2685
2686 elv_register_queue(md->queue);
2687
2688 return 0;
2689}
2690
2691static int dm_mq_init_request(void *data, struct request *rq,
2692 unsigned int hctx_idx, unsigned int request_idx,
2693 unsigned int numa_node)
2694{
2695 struct mapped_device *md = data;
2696 struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
2697
2698 /*
2699 * Must initialize md member of tio, otherwise it won't
2700 * be available in dm_mq_queue_rq.
2701 */
2702 tio->md = md;
2703
2704 if (md->init_tio_pdu) {
2705 /* target-specific per-io data is immediately after the tio */
2706 tio->info.ptr = tio + 1;
2707 }
2708
2709 return 0;
2710}
2711
2712static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
2713 const struct blk_mq_queue_data *bd)
2714{
2715 struct request *rq = bd->rq;
2716 struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
2717 struct mapped_device *md = tio->md;
2718 struct dm_target *ti = md->immutable_target;
2719
2720 if (unlikely(!ti)) {
2721 int srcu_idx;
2722 struct dm_table *map = dm_get_live_table(md, &srcu_idx);
2723
2724 ti = dm_table_find_target(map, 0);
2725 dm_put_live_table(md, srcu_idx);
2726 }
2727
2728 if (ti->type->busy && ti->type->busy(ti))
2729 return BLK_MQ_RQ_QUEUE_BUSY;
2730
2731 dm_start_request(md, rq);
2732
2733 /* Init tio using md established in .init_request */
2734 init_tio(tio, rq, md);
2735
2736 /*
2737 * Establish tio->ti before queuing work (map_tio_request)
2738 * or making direct call to map_request().
2739 */
2740 tio->ti = ti;
2741
2742 /* Direct call is fine since .queue_rq allows allocations */
2743 if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) {
2744 /* Undo dm_start_request() before requeuing */
2745 rq_end_stats(md, rq);
2746 rq_completed(md, rq_data_dir(rq), false);
2747 return BLK_MQ_RQ_QUEUE_BUSY;
2748 }
2749
2750 return BLK_MQ_RQ_QUEUE_OK;
2751}
2752
2753static struct blk_mq_ops dm_mq_ops = {
2754 .queue_rq = dm_mq_queue_rq,
2755 .map_queue = blk_mq_map_queue,
2756 .complete = dm_softirq_done,
2757 .init_request = dm_mq_init_request,
2758};
2759
2760static int dm_mq_init_request_queue(struct mapped_device *md,
2761 struct dm_target *immutable_tgt)
2762{
2763 struct request_queue *q;
2764 int err;
2765
2766 if (dm_get_md_type(md) == DM_TYPE_REQUEST_BASED) {
2767 DMERR("request-based dm-mq may only be stacked on blk-mq device(s)");
2768 return -EINVAL;
2769 }
2770
2771 md->tag_set = kzalloc_node(sizeof(struct blk_mq_tag_set), GFP_KERNEL, md->numa_node_id);
2772 if (!md->tag_set)
2773 return -ENOMEM;
2774
2775 md->tag_set->ops = &dm_mq_ops;
2776 md->tag_set->queue_depth = dm_get_blk_mq_queue_depth();
2777 md->tag_set->numa_node = md->numa_node_id;
2778 md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
2779 md->tag_set->nr_hw_queues = dm_get_blk_mq_nr_hw_queues();
2780 md->tag_set->driver_data = md;
2781
2782 md->tag_set->cmd_size = sizeof(struct dm_rq_target_io);
2783 if (immutable_tgt && immutable_tgt->per_io_data_size) {
2784 /* any target-specific per-io data is immediately after the tio */
2785 md->tag_set->cmd_size += immutable_tgt->per_io_data_size;
2786 md->init_tio_pdu = true;
2787 }
2788
2789 err = blk_mq_alloc_tag_set(md->tag_set);
2790 if (err)
2791 goto out_kfree_tag_set;
2792
2793 q = blk_mq_init_allocated_queue(md->tag_set, md->queue);
2794 if (IS_ERR(q)) {
2795 err = PTR_ERR(q);
2796 goto out_tag_set;
2797 }
2798 dm_init_md_queue(md);
2799
2800 /* backfill 'mq' sysfs registration normally done in blk_register_queue */
2801 blk_mq_register_disk(md->disk);
2802
2803 return 0;
2804
2805out_tag_set:
2806 blk_mq_free_tag_set(md->tag_set);
2807out_kfree_tag_set:
2808 kfree(md->tag_set);
2809
2810 return err;
2811}
2812
2813static unsigned filter_md_type(unsigned type, struct mapped_device *md) 1741static unsigned filter_md_type(unsigned type, struct mapped_device *md)
2814{ 1742{
2815 if (type == DM_TYPE_BIO_BASED) 1743 if (type == DM_TYPE_BIO_BASED)
@@ -3741,18 +2669,6 @@ MODULE_PARM_DESC(major, "The major number of the device mapper");
3741module_param(reserved_bio_based_ios, uint, S_IRUGO | S_IWUSR); 2669module_param(reserved_bio_based_ios, uint, S_IRUGO | S_IWUSR);
3742MODULE_PARM_DESC(reserved_bio_based_ios, "Reserved IOs in bio-based mempools"); 2670MODULE_PARM_DESC(reserved_bio_based_ios, "Reserved IOs in bio-based mempools");
3743 2671
3744module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR);
3745MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools");
3746
3747module_param(use_blk_mq, bool, S_IRUGO | S_IWUSR);
3748MODULE_PARM_DESC(use_blk_mq, "Use block multiqueue for request-based DM devices");
3749
3750module_param(dm_mq_nr_hw_queues, uint, S_IRUGO | S_IWUSR);
3751MODULE_PARM_DESC(dm_mq_nr_hw_queues, "Number of hardware queues for request-based dm-mq devices");
3752
3753module_param(dm_mq_queue_depth, uint, S_IRUGO | S_IWUSR);
3754MODULE_PARM_DESC(dm_mq_queue_depth, "Queue depth for request-based dm-mq devices");
3755
3756module_param(dm_numa_node, int, S_IRUGO | S_IWUSR); 2672module_param(dm_numa_node, int, S_IRUGO | S_IWUSR);
3757MODULE_PARM_DESC(dm_numa_node, "NUMA node for DM device memory allocations"); 2673MODULE_PARM_DESC(dm_numa_node, "NUMA node for DM device memory allocations");
3758 2674
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 13a758ec0f88..b611b3064a7c 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -13,6 +13,7 @@
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/device-mapper.h> 14#include <linux/device-mapper.h>
15#include <linux/list.h> 15#include <linux/list.h>
16#include <linux/moduleparam.h>
16#include <linux/blkdev.h> 17#include <linux/blkdev.h>
17#include <linux/backing-dev.h> 18#include <linux/backing-dev.h>
18#include <linux/hdreg.h> 19#include <linux/hdreg.h>
@@ -161,16 +162,6 @@ void dm_interface_exit(void);
161/* 162/*
162 * sysfs interface 163 * sysfs interface
163 */ 164 */
164struct dm_kobject_holder {
165 struct kobject kobj;
166 struct completion completion;
167};
168
169static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj)
170{
171 return &container_of(kobj, struct dm_kobject_holder, kobj)->completion;
172}
173
174int dm_sysfs_init(struct mapped_device *md); 165int dm_sysfs_init(struct mapped_device *md);
175void dm_sysfs_exit(struct mapped_device *md); 166void dm_sysfs_exit(struct mapped_device *md);
176struct kobject *dm_kobject(struct mapped_device *md); 167struct kobject *dm_kobject(struct mapped_device *md);
@@ -212,8 +203,6 @@ int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
212void dm_internal_suspend(struct mapped_device *md); 203void dm_internal_suspend(struct mapped_device *md);
213void dm_internal_resume(struct mapped_device *md); 204void dm_internal_resume(struct mapped_device *md);
214 205
215bool dm_use_blk_mq(struct mapped_device *md);
216
217int dm_io_init(void); 206int dm_io_init(void);
218void dm_io_exit(void); 207void dm_io_exit(void);
219 208
@@ -228,18 +217,8 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t
228void dm_free_md_mempools(struct dm_md_mempools *pools); 217void dm_free_md_mempools(struct dm_md_mempools *pools);
229 218
230/* 219/*
231 * Helpers that are used by DM core 220 * Various helpers
232 */ 221 */
233unsigned dm_get_reserved_bio_based_ios(void); 222unsigned dm_get_reserved_bio_based_ios(void);
234unsigned dm_get_reserved_rq_based_ios(void);
235
236static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen)
237{
238 return !maxlen || strlen(result) + 1 >= maxlen;
239}
240
241ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf);
242ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
243 const char *buf, size_t count);
244 223
245#endif 224#endif