diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-18 08:14:18 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-18 08:14:18 -0400 |
commit | afad97eee47c1f1f242202e2473929b4ef5d9f43 (patch) | |
tree | 31f68d70760234b582a28bd3f64311ff5307b7b1 /drivers/md | |
parent | 04b7fe6a4a231871ef681bc95e08fe66992f7b1f (diff) | |
parent | 44c144f9c8e8fbd73ede2848da8253b3aae42ec2 (diff) |
Merge tag 'dm-4.1-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer:
- the most extensive changes this cycle are the DM core improvements to
add full blk-mq support to request-based DM.
- disabled by default but user can opt-in with CONFIG_DM_MQ_DEFAULT
- depends on some blk-mq changes from Jens' for-4.1/core branch so
that explains why this pull is built on linux-block.git
- update DM to use name_to_dev_t() rather than open-coding a less
capable device parser.
- includes a couple small improvements to name_to_dev_t() that offer
stricter constraints that DM's code provided.
- improvements to the dm-cache "mq" cache replacement policy.
- a DM crypt crypt_ctr() error path fix and an async crypto deadlock
fix
- a small efficiency improvement for DM crypt decryption by leveraging
immutable biovecs
- add error handling modes for corrupted blocks to DM verity
- a new "log-writes" DM target from Josef Bacik that is meant for file
system developers to test file system integrity at particular points
in the life of a file system
- a few DM log userspace cleanups and fixes
- a few Documentation fixes (for thin, cache, crypt and switch)
* tag 'dm-4.1-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (34 commits)
dm crypt: fix missing error code return from crypt_ctr error path
dm crypt: fix deadlock when async crypto algorithm returns -EBUSY
dm crypt: leverage immutable biovecs when decrypting on read
dm crypt: update URLs to new cryptsetup project page
dm: add log writes target
dm table: use bool function return values of true/false not 1/0
dm verity: add error handling modes for corrupted blocks
dm thin: remove stale 'trim' message documentation
dm delay: use msecs_to_jiffies for time conversion
dm log userspace base: fix compile warning
dm log userspace transfer: match wait_for_completion_timeout return type
dm table: fall back to getting device using name_to_dev_t()
init: stricter checking of major:minor root= values
init: export name_to_dev_t and mark name argument as const
dm: add 'use_blk_mq' module param and expose in per-device ro sysfs attr
dm: optimize dm_mq_queue_rq to _not_ use kthread if using pure blk-mq
dm: add full blk-mq support to request-based DM
dm: impose configurable deadline for dm_request_fn's merge heuristic
dm sysfs: introduce ability to add writable attributes
dm: don't start current request if it would've merged with the previous
...
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/Kconfig | 27 | ||||
-rw-r--r-- | drivers/md/Makefile | 1 | ||||
-rw-r--r-- | drivers/md/dm-cache-policy-mq.c | 251 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 25 | ||||
-rw-r--r-- | drivers/md/dm-delay.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-log-userspace-base.c | 91 | ||||
-rw-r--r-- | drivers/md/dm-log-userspace-transfer.c | 5 | ||||
-rw-r--r-- | drivers/md/dm-log-writes.c | 825 | ||||
-rw-r--r-- | drivers/md/dm-mpath.c | 6 | ||||
-rw-r--r-- | drivers/md/dm-sysfs.c | 43 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 71 | ||||
-rw-r--r-- | drivers/md/dm-verity.c | 147 | ||||
-rw-r--r-- | drivers/md/dm.c | 556 | ||||
-rw-r--r-- | drivers/md/dm.h | 10 |
14 files changed, 1736 insertions, 324 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 63e05e32b462..6ddc983417d5 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig | |||
@@ -196,6 +196,17 @@ config BLK_DEV_DM | |||
196 | 196 | ||
197 | If unsure, say N. | 197 | If unsure, say N. |
198 | 198 | ||
199 | config DM_MQ_DEFAULT | ||
200 | bool "request-based DM: use blk-mq I/O path by default" | ||
201 | depends on BLK_DEV_DM | ||
202 | ---help--- | ||
203 | This option enables the blk-mq based I/O path for request-based | ||
204 | DM devices by default. With the option the dm_mod.use_blk_mq | ||
205 | module/boot option defaults to Y, without it to N, but it can | ||
206 | still be overriden either way. | ||
207 | |||
208 | If unsure say N. | ||
209 | |||
199 | config DM_DEBUG | 210 | config DM_DEBUG |
200 | bool "Device mapper debugging support" | 211 | bool "Device mapper debugging support" |
201 | depends on BLK_DEV_DM | 212 | depends on BLK_DEV_DM |
@@ -432,4 +443,20 @@ config DM_SWITCH | |||
432 | 443 | ||
433 | If unsure, say N. | 444 | If unsure, say N. |
434 | 445 | ||
446 | config DM_LOG_WRITES | ||
447 | tristate "Log writes target support" | ||
448 | depends on BLK_DEV_DM | ||
449 | ---help--- | ||
450 | This device-mapper target takes two devices, one device to use | ||
451 | normally, one to log all write operations done to the first device. | ||
452 | This is for use by file system developers wishing to verify that | ||
453 | their fs is writing a consitent file system at all times by allowing | ||
454 | them to replay the log in a variety of ways and to check the | ||
455 | contents. | ||
456 | |||
457 | To compile this code as a module, choose M here: the module will | ||
458 | be called dm-log-writes. | ||
459 | |||
460 | If unsure, say N. | ||
461 | |||
435 | endif # MD | 462 | endif # MD |
diff --git a/drivers/md/Makefile b/drivers/md/Makefile index a2da532b1c2b..1863feaa5846 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile | |||
@@ -55,6 +55,7 @@ obj-$(CONFIG_DM_CACHE) += dm-cache.o | |||
55 | obj-$(CONFIG_DM_CACHE_MQ) += dm-cache-mq.o | 55 | obj-$(CONFIG_DM_CACHE_MQ) += dm-cache-mq.o |
56 | obj-$(CONFIG_DM_CACHE_CLEANER) += dm-cache-cleaner.o | 56 | obj-$(CONFIG_DM_CACHE_CLEANER) += dm-cache-cleaner.o |
57 | obj-$(CONFIG_DM_ERA) += dm-era.o | 57 | obj-$(CONFIG_DM_ERA) += dm-era.o |
58 | obj-$(CONFIG_DM_LOG_WRITES) += dm-log-writes.o | ||
58 | 59 | ||
59 | ifeq ($(CONFIG_DM_UEVENT),y) | 60 | ifeq ($(CONFIG_DM_UEVENT),y) |
60 | dm-mod-objs += dm-uevent.o | 61 | dm-mod-objs += dm-uevent.o |
diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c index 13f547a4eeb6..3ddd1162334d 100644 --- a/drivers/md/dm-cache-policy-mq.c +++ b/drivers/md/dm-cache-policy-mq.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include "dm.h" | 8 | #include "dm.h" |
9 | 9 | ||
10 | #include <linux/hash.h> | 10 | #include <linux/hash.h> |
11 | #include <linux/jiffies.h> | ||
11 | #include <linux/module.h> | 12 | #include <linux/module.h> |
12 | #include <linux/mutex.h> | 13 | #include <linux/mutex.h> |
13 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
@@ -124,32 +125,41 @@ static void iot_examine_bio(struct io_tracker *t, struct bio *bio) | |||
124 | * sorted queue. | 125 | * sorted queue. |
125 | */ | 126 | */ |
126 | #define NR_QUEUE_LEVELS 16u | 127 | #define NR_QUEUE_LEVELS 16u |
128 | #define NR_SENTINELS NR_QUEUE_LEVELS * 3 | ||
129 | |||
130 | #define WRITEBACK_PERIOD HZ | ||
127 | 131 | ||
128 | struct queue { | 132 | struct queue { |
133 | unsigned nr_elts; | ||
134 | bool current_writeback_sentinels; | ||
135 | unsigned long next_writeback; | ||
129 | struct list_head qs[NR_QUEUE_LEVELS]; | 136 | struct list_head qs[NR_QUEUE_LEVELS]; |
137 | struct list_head sentinels[NR_SENTINELS]; | ||
130 | }; | 138 | }; |
131 | 139 | ||
132 | static void queue_init(struct queue *q) | 140 | static void queue_init(struct queue *q) |
133 | { | 141 | { |
134 | unsigned i; | 142 | unsigned i; |
135 | 143 | ||
136 | for (i = 0; i < NR_QUEUE_LEVELS; i++) | 144 | q->nr_elts = 0; |
145 | q->current_writeback_sentinels = false; | ||
146 | q->next_writeback = 0; | ||
147 | for (i = 0; i < NR_QUEUE_LEVELS; i++) { | ||
137 | INIT_LIST_HEAD(q->qs + i); | 148 | INIT_LIST_HEAD(q->qs + i); |
149 | INIT_LIST_HEAD(q->sentinels + i); | ||
150 | INIT_LIST_HEAD(q->sentinels + NR_QUEUE_LEVELS + i); | ||
151 | INIT_LIST_HEAD(q->sentinels + (2 * NR_QUEUE_LEVELS) + i); | ||
152 | } | ||
138 | } | 153 | } |
139 | 154 | ||
140 | /* | 155 | static unsigned queue_size(struct queue *q) |
141 | * Checks to see if the queue is empty. | ||
142 | * FIXME: reduce cpu usage. | ||
143 | */ | ||
144 | static bool queue_empty(struct queue *q) | ||
145 | { | 156 | { |
146 | unsigned i; | 157 | return q->nr_elts; |
147 | 158 | } | |
148 | for (i = 0; i < NR_QUEUE_LEVELS; i++) | ||
149 | if (!list_empty(q->qs + i)) | ||
150 | return false; | ||
151 | 159 | ||
152 | return true; | 160 | static bool queue_empty(struct queue *q) |
161 | { | ||
162 | return q->nr_elts == 0; | ||
153 | } | 163 | } |
154 | 164 | ||
155 | /* | 165 | /* |
@@ -157,24 +167,19 @@ static bool queue_empty(struct queue *q) | |||
157 | */ | 167 | */ |
158 | static void queue_push(struct queue *q, unsigned level, struct list_head *elt) | 168 | static void queue_push(struct queue *q, unsigned level, struct list_head *elt) |
159 | { | 169 | { |
170 | q->nr_elts++; | ||
160 | list_add_tail(elt, q->qs + level); | 171 | list_add_tail(elt, q->qs + level); |
161 | } | 172 | } |
162 | 173 | ||
163 | static void queue_remove(struct list_head *elt) | 174 | static void queue_remove(struct queue *q, struct list_head *elt) |
164 | { | 175 | { |
176 | q->nr_elts--; | ||
165 | list_del(elt); | 177 | list_del(elt); |
166 | } | 178 | } |
167 | 179 | ||
168 | /* | 180 | static bool is_sentinel(struct queue *q, struct list_head *h) |
169 | * Shifts all regions down one level. This has no effect on the order of | ||
170 | * the queue. | ||
171 | */ | ||
172 | static void queue_shift_down(struct queue *q) | ||
173 | { | 181 | { |
174 | unsigned level; | 182 | return (h >= q->sentinels) && (h < (q->sentinels + NR_SENTINELS)); |
175 | |||
176 | for (level = 1; level < NR_QUEUE_LEVELS; level++) | ||
177 | list_splice_init(q->qs + level, q->qs + level - 1); | ||
178 | } | 183 | } |
179 | 184 | ||
180 | /* | 185 | /* |
@@ -184,10 +189,12 @@ static void queue_shift_down(struct queue *q) | |||
184 | static struct list_head *queue_peek(struct queue *q) | 189 | static struct list_head *queue_peek(struct queue *q) |
185 | { | 190 | { |
186 | unsigned level; | 191 | unsigned level; |
192 | struct list_head *h; | ||
187 | 193 | ||
188 | for (level = 0; level < NR_QUEUE_LEVELS; level++) | 194 | for (level = 0; level < NR_QUEUE_LEVELS; level++) |
189 | if (!list_empty(q->qs + level)) | 195 | list_for_each(h, q->qs + level) |
190 | return q->qs[level].next; | 196 | if (!is_sentinel(q, h)) |
197 | return h; | ||
191 | 198 | ||
192 | return NULL; | 199 | return NULL; |
193 | } | 200 | } |
@@ -197,16 +204,34 @@ static struct list_head *queue_pop(struct queue *q) | |||
197 | struct list_head *r = queue_peek(q); | 204 | struct list_head *r = queue_peek(q); |
198 | 205 | ||
199 | if (r) { | 206 | if (r) { |
207 | q->nr_elts--; | ||
200 | list_del(r); | 208 | list_del(r); |
201 | |||
202 | /* have we just emptied the bottom level? */ | ||
203 | if (list_empty(q->qs)) | ||
204 | queue_shift_down(q); | ||
205 | } | 209 | } |
206 | 210 | ||
207 | return r; | 211 | return r; |
208 | } | 212 | } |
209 | 213 | ||
214 | /* | ||
215 | * Pops an entry from a level that is not past a sentinel. | ||
216 | */ | ||
217 | static struct list_head *queue_pop_old(struct queue *q) | ||
218 | { | ||
219 | unsigned level; | ||
220 | struct list_head *h; | ||
221 | |||
222 | for (level = 0; level < NR_QUEUE_LEVELS; level++) | ||
223 | list_for_each(h, q->qs + level) { | ||
224 | if (is_sentinel(q, h)) | ||
225 | break; | ||
226 | |||
227 | q->nr_elts--; | ||
228 | list_del(h); | ||
229 | return h; | ||
230 | } | ||
231 | |||
232 | return NULL; | ||
233 | } | ||
234 | |||
210 | static struct list_head *list_pop(struct list_head *lh) | 235 | static struct list_head *list_pop(struct list_head *lh) |
211 | { | 236 | { |
212 | struct list_head *r = lh->next; | 237 | struct list_head *r = lh->next; |
@@ -217,6 +242,62 @@ static struct list_head *list_pop(struct list_head *lh) | |||
217 | return r; | 242 | return r; |
218 | } | 243 | } |
219 | 244 | ||
245 | static struct list_head *writeback_sentinel(struct queue *q, unsigned level) | ||
246 | { | ||
247 | if (q->current_writeback_sentinels) | ||
248 | return q->sentinels + NR_QUEUE_LEVELS + level; | ||
249 | else | ||
250 | return q->sentinels + 2 * NR_QUEUE_LEVELS + level; | ||
251 | } | ||
252 | |||
253 | static void queue_update_writeback_sentinels(struct queue *q) | ||
254 | { | ||
255 | unsigned i; | ||
256 | struct list_head *h; | ||
257 | |||
258 | if (time_after(jiffies, q->next_writeback)) { | ||
259 | for (i = 0; i < NR_QUEUE_LEVELS; i++) { | ||
260 | h = writeback_sentinel(q, i); | ||
261 | list_del(h); | ||
262 | list_add_tail(h, q->qs + i); | ||
263 | } | ||
264 | |||
265 | q->next_writeback = jiffies + WRITEBACK_PERIOD; | ||
266 | q->current_writeback_sentinels = !q->current_writeback_sentinels; | ||
267 | } | ||
268 | } | ||
269 | |||
270 | /* | ||
271 | * Sometimes we want to iterate through entries that have been pushed since | ||
272 | * a certain event. We use sentinel entries on the queues to delimit these | ||
273 | * 'tick' events. | ||
274 | */ | ||
275 | static void queue_tick(struct queue *q) | ||
276 | { | ||
277 | unsigned i; | ||
278 | |||
279 | for (i = 0; i < NR_QUEUE_LEVELS; i++) { | ||
280 | list_del(q->sentinels + i); | ||
281 | list_add_tail(q->sentinels + i, q->qs + i); | ||
282 | } | ||
283 | } | ||
284 | |||
285 | typedef void (*iter_fn)(struct list_head *, void *); | ||
286 | static void queue_iterate_tick(struct queue *q, iter_fn fn, void *context) | ||
287 | { | ||
288 | unsigned i; | ||
289 | struct list_head *h; | ||
290 | |||
291 | for (i = 0; i < NR_QUEUE_LEVELS; i++) { | ||
292 | list_for_each_prev(h, q->qs + i) { | ||
293 | if (is_sentinel(q, h)) | ||
294 | break; | ||
295 | |||
296 | fn(h, context); | ||
297 | } | ||
298 | } | ||
299 | } | ||
300 | |||
220 | /*----------------------------------------------------------------*/ | 301 | /*----------------------------------------------------------------*/ |
221 | 302 | ||
222 | /* | 303 | /* |
@@ -232,8 +313,6 @@ struct entry { | |||
232 | */ | 313 | */ |
233 | bool dirty:1; | 314 | bool dirty:1; |
234 | unsigned hit_count; | 315 | unsigned hit_count; |
235 | unsigned generation; | ||
236 | unsigned tick; | ||
237 | }; | 316 | }; |
238 | 317 | ||
239 | /* | 318 | /* |
@@ -481,7 +560,6 @@ static bool in_cache(struct mq_policy *mq, struct entry *e) | |||
481 | */ | 560 | */ |
482 | static void push(struct mq_policy *mq, struct entry *e) | 561 | static void push(struct mq_policy *mq, struct entry *e) |
483 | { | 562 | { |
484 | e->tick = mq->tick; | ||
485 | hash_insert(mq, e); | 563 | hash_insert(mq, e); |
486 | 564 | ||
487 | if (in_cache(mq, e)) | 565 | if (in_cache(mq, e)) |
@@ -496,7 +574,11 @@ static void push(struct mq_policy *mq, struct entry *e) | |||
496 | */ | 574 | */ |
497 | static void del(struct mq_policy *mq, struct entry *e) | 575 | static void del(struct mq_policy *mq, struct entry *e) |
498 | { | 576 | { |
499 | queue_remove(&e->list); | 577 | if (in_cache(mq, e)) |
578 | queue_remove(e->dirty ? &mq->cache_dirty : &mq->cache_clean, &e->list); | ||
579 | else | ||
580 | queue_remove(&mq->pre_cache, &e->list); | ||
581 | |||
500 | hash_remove(e); | 582 | hash_remove(e); |
501 | } | 583 | } |
502 | 584 | ||
@@ -518,18 +600,24 @@ static struct entry *pop(struct mq_policy *mq, struct queue *q) | |||
518 | return e; | 600 | return e; |
519 | } | 601 | } |
520 | 602 | ||
521 | static struct entry *peek(struct queue *q) | 603 | static struct entry *pop_old(struct mq_policy *mq, struct queue *q) |
522 | { | 604 | { |
523 | struct list_head *h = queue_peek(q); | 605 | struct entry *e; |
524 | return h ? container_of(h, struct entry, list) : NULL; | 606 | struct list_head *h = queue_pop_old(q); |
607 | |||
608 | if (!h) | ||
609 | return NULL; | ||
610 | |||
611 | e = container_of(h, struct entry, list); | ||
612 | hash_remove(e); | ||
613 | |||
614 | return e; | ||
525 | } | 615 | } |
526 | 616 | ||
527 | /* | 617 | static struct entry *peek(struct queue *q) |
528 | * Has this entry already been updated? | ||
529 | */ | ||
530 | static bool updated_this_tick(struct mq_policy *mq, struct entry *e) | ||
531 | { | 618 | { |
532 | return mq->tick == e->tick; | 619 | struct list_head *h = queue_peek(q); |
620 | return h ? container_of(h, struct entry, list) : NULL; | ||
533 | } | 621 | } |
534 | 622 | ||
535 | /* | 623 | /* |
@@ -583,20 +671,9 @@ static void check_generation(struct mq_policy *mq) | |||
583 | * Whenever we use an entry we bump up it's hit counter, and push it to the | 671 | * Whenever we use an entry we bump up it's hit counter, and push it to the |
584 | * back to it's current level. | 672 | * back to it's current level. |
585 | */ | 673 | */ |
586 | static void requeue_and_update_tick(struct mq_policy *mq, struct entry *e) | 674 | static void requeue(struct mq_policy *mq, struct entry *e) |
587 | { | 675 | { |
588 | if (updated_this_tick(mq, e)) | ||
589 | return; | ||
590 | |||
591 | e->hit_count++; | ||
592 | mq->hit_count++; | ||
593 | check_generation(mq); | 676 | check_generation(mq); |
594 | |||
595 | /* generation adjustment, to stop the counts increasing forever. */ | ||
596 | /* FIXME: divide? */ | ||
597 | /* e->hit_count -= min(e->hit_count - 1, mq->generation - e->generation); */ | ||
598 | e->generation = mq->generation; | ||
599 | |||
600 | del(mq, e); | 677 | del(mq, e); |
601 | push(mq, e); | 678 | push(mq, e); |
602 | } | 679 | } |
@@ -703,7 +780,7 @@ static int cache_entry_found(struct mq_policy *mq, | |||
703 | struct entry *e, | 780 | struct entry *e, |
704 | struct policy_result *result) | 781 | struct policy_result *result) |
705 | { | 782 | { |
706 | requeue_and_update_tick(mq, e); | 783 | requeue(mq, e); |
707 | 784 | ||
708 | if (in_cache(mq, e)) { | 785 | if (in_cache(mq, e)) { |
709 | result->op = POLICY_HIT; | 786 | result->op = POLICY_HIT; |
@@ -740,8 +817,6 @@ static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e, | |||
740 | new_e->oblock = e->oblock; | 817 | new_e->oblock = e->oblock; |
741 | new_e->dirty = false; | 818 | new_e->dirty = false; |
742 | new_e->hit_count = e->hit_count; | 819 | new_e->hit_count = e->hit_count; |
743 | new_e->generation = e->generation; | ||
744 | new_e->tick = e->tick; | ||
745 | 820 | ||
746 | del(mq, e); | 821 | del(mq, e); |
747 | free_entry(&mq->pre_cache_pool, e); | 822 | free_entry(&mq->pre_cache_pool, e); |
@@ -757,18 +832,16 @@ static int pre_cache_entry_found(struct mq_policy *mq, struct entry *e, | |||
757 | int data_dir, struct policy_result *result) | 832 | int data_dir, struct policy_result *result) |
758 | { | 833 | { |
759 | int r = 0; | 834 | int r = 0; |
760 | bool updated = updated_this_tick(mq, e); | ||
761 | 835 | ||
762 | if ((!discarded_oblock && updated) || | 836 | if (!should_promote(mq, e, discarded_oblock, data_dir)) { |
763 | !should_promote(mq, e, discarded_oblock, data_dir)) { | 837 | requeue(mq, e); |
764 | requeue_and_update_tick(mq, e); | ||
765 | result->op = POLICY_MISS; | 838 | result->op = POLICY_MISS; |
766 | 839 | ||
767 | } else if (!can_migrate) | 840 | } else if (!can_migrate) |
768 | r = -EWOULDBLOCK; | 841 | r = -EWOULDBLOCK; |
769 | 842 | ||
770 | else { | 843 | else { |
771 | requeue_and_update_tick(mq, e); | 844 | requeue(mq, e); |
772 | r = pre_cache_to_cache(mq, e, result); | 845 | r = pre_cache_to_cache(mq, e, result); |
773 | } | 846 | } |
774 | 847 | ||
@@ -795,7 +868,6 @@ static void insert_in_pre_cache(struct mq_policy *mq, | |||
795 | e->dirty = false; | 868 | e->dirty = false; |
796 | e->oblock = oblock; | 869 | e->oblock = oblock; |
797 | e->hit_count = 1; | 870 | e->hit_count = 1; |
798 | e->generation = mq->generation; | ||
799 | push(mq, e); | 871 | push(mq, e); |
800 | } | 872 | } |
801 | 873 | ||
@@ -828,7 +900,6 @@ static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock, | |||
828 | e->oblock = oblock; | 900 | e->oblock = oblock; |
829 | e->dirty = false; | 901 | e->dirty = false; |
830 | e->hit_count = 1; | 902 | e->hit_count = 1; |
831 | e->generation = mq->generation; | ||
832 | push(mq, e); | 903 | push(mq, e); |
833 | 904 | ||
834 | result->cblock = infer_cblock(&mq->cache_pool, e); | 905 | result->cblock = infer_cblock(&mq->cache_pool, e); |
@@ -905,12 +976,37 @@ static void mq_destroy(struct dm_cache_policy *p) | |||
905 | kfree(mq); | 976 | kfree(mq); |
906 | } | 977 | } |
907 | 978 | ||
979 | static void update_pre_cache_hits(struct list_head *h, void *context) | ||
980 | { | ||
981 | struct entry *e = container_of(h, struct entry, list); | ||
982 | e->hit_count++; | ||
983 | } | ||
984 | |||
985 | static void update_cache_hits(struct list_head *h, void *context) | ||
986 | { | ||
987 | struct mq_policy *mq = context; | ||
988 | struct entry *e = container_of(h, struct entry, list); | ||
989 | e->hit_count++; | ||
990 | mq->hit_count++; | ||
991 | } | ||
992 | |||
908 | static void copy_tick(struct mq_policy *mq) | 993 | static void copy_tick(struct mq_policy *mq) |
909 | { | 994 | { |
910 | unsigned long flags; | 995 | unsigned long flags, tick; |
911 | 996 | ||
912 | spin_lock_irqsave(&mq->tick_lock, flags); | 997 | spin_lock_irqsave(&mq->tick_lock, flags); |
913 | mq->tick = mq->tick_protected; | 998 | tick = mq->tick_protected; |
999 | if (tick != mq->tick) { | ||
1000 | queue_iterate_tick(&mq->pre_cache, update_pre_cache_hits, mq); | ||
1001 | queue_iterate_tick(&mq->cache_dirty, update_cache_hits, mq); | ||
1002 | queue_iterate_tick(&mq->cache_clean, update_cache_hits, mq); | ||
1003 | mq->tick = tick; | ||
1004 | } | ||
1005 | |||
1006 | queue_tick(&mq->pre_cache); | ||
1007 | queue_tick(&mq->cache_dirty); | ||
1008 | queue_tick(&mq->cache_clean); | ||
1009 | queue_update_writeback_sentinels(&mq->cache_dirty); | ||
914 | spin_unlock_irqrestore(&mq->tick_lock, flags); | 1010 | spin_unlock_irqrestore(&mq->tick_lock, flags); |
915 | } | 1011 | } |
916 | 1012 | ||
@@ -1001,7 +1097,6 @@ static int mq_load_mapping(struct dm_cache_policy *p, | |||
1001 | e->oblock = oblock; | 1097 | e->oblock = oblock; |
1002 | e->dirty = false; /* this gets corrected in a minute */ | 1098 | e->dirty = false; /* this gets corrected in a minute */ |
1003 | e->hit_count = hint_valid ? hint : 1; | 1099 | e->hit_count = hint_valid ? hint : 1; |
1004 | e->generation = mq->generation; | ||
1005 | push(mq, e); | 1100 | push(mq, e); |
1006 | 1101 | ||
1007 | return 0; | 1102 | return 0; |
@@ -1012,10 +1107,15 @@ static int mq_save_hints(struct mq_policy *mq, struct queue *q, | |||
1012 | { | 1107 | { |
1013 | int r; | 1108 | int r; |
1014 | unsigned level; | 1109 | unsigned level; |
1110 | struct list_head *h; | ||
1015 | struct entry *e; | 1111 | struct entry *e; |
1016 | 1112 | ||
1017 | for (level = 0; level < NR_QUEUE_LEVELS; level++) | 1113 | for (level = 0; level < NR_QUEUE_LEVELS; level++) |
1018 | list_for_each_entry(e, q->qs + level, list) { | 1114 | list_for_each(h, q->qs + level) { |
1115 | if (is_sentinel(q, h)) | ||
1116 | continue; | ||
1117 | |||
1118 | e = container_of(h, struct entry, list); | ||
1019 | r = fn(context, infer_cblock(&mq->cache_pool, e), | 1119 | r = fn(context, infer_cblock(&mq->cache_pool, e), |
1020 | e->oblock, e->hit_count); | 1120 | e->oblock, e->hit_count); |
1021 | if (r) | 1121 | if (r) |
@@ -1087,10 +1187,27 @@ static int mq_remove_cblock(struct dm_cache_policy *p, dm_cblock_t cblock) | |||
1087 | return r; | 1187 | return r; |
1088 | } | 1188 | } |
1089 | 1189 | ||
1190 | #define CLEAN_TARGET_PERCENTAGE 25 | ||
1191 | |||
1192 | static bool clean_target_met(struct mq_policy *mq) | ||
1193 | { | ||
1194 | /* | ||
1195 | * Cache entries may not be populated. So we're cannot rely on the | ||
1196 | * size of the clean queue. | ||
1197 | */ | ||
1198 | unsigned nr_clean = from_cblock(mq->cache_size) - queue_size(&mq->cache_dirty); | ||
1199 | unsigned target = from_cblock(mq->cache_size) * CLEAN_TARGET_PERCENTAGE / 100; | ||
1200 | |||
1201 | return nr_clean >= target; | ||
1202 | } | ||
1203 | |||
1090 | static int __mq_writeback_work(struct mq_policy *mq, dm_oblock_t *oblock, | 1204 | static int __mq_writeback_work(struct mq_policy *mq, dm_oblock_t *oblock, |
1091 | dm_cblock_t *cblock) | 1205 | dm_cblock_t *cblock) |
1092 | { | 1206 | { |
1093 | struct entry *e = pop(mq, &mq->cache_dirty); | 1207 | struct entry *e = pop_old(mq, &mq->cache_dirty); |
1208 | |||
1209 | if (!e && !clean_target_met(mq)) | ||
1210 | e = pop(mq, &mq->cache_dirty); | ||
1094 | 1211 | ||
1095 | if (!e) | 1212 | if (!e) |
1096 | return -ENODATA; | 1213 | return -ENODATA; |
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 713a96237a80..9eeea196328a 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c | |||
@@ -228,7 +228,7 @@ static struct crypto_ablkcipher *any_tfm(struct crypt_config *cc) | |||
228 | * | 228 | * |
229 | * tcw: Compatible implementation of the block chaining mode used | 229 | * tcw: Compatible implementation of the block chaining mode used |
230 | * by the TrueCrypt device encryption system (prior to version 4.1). | 230 | * by the TrueCrypt device encryption system (prior to version 4.1). |
231 | * For more info see: http://www.truecrypt.org | 231 | * For more info see: https://gitlab.com/cryptsetup/cryptsetup/wikis/TrueCryptOnDiskFormat |
232 | * It operates on full 512 byte sectors and uses CBC | 232 | * It operates on full 512 byte sectors and uses CBC |
233 | * with an IV derived from initial key and the sector number. | 233 | * with an IV derived from initial key and the sector number. |
234 | * In addition, whitening value is applied on every sector, whitening | 234 | * In addition, whitening value is applied on every sector, whitening |
@@ -925,11 +925,10 @@ static int crypt_convert(struct crypt_config *cc, | |||
925 | 925 | ||
926 | switch (r) { | 926 | switch (r) { |
927 | /* async */ | 927 | /* async */ |
928 | case -EINPROGRESS: | ||
928 | case -EBUSY: | 929 | case -EBUSY: |
929 | wait_for_completion(&ctx->restart); | 930 | wait_for_completion(&ctx->restart); |
930 | reinit_completion(&ctx->restart); | 931 | reinit_completion(&ctx->restart); |
931 | /* fall through*/ | ||
932 | case -EINPROGRESS: | ||
933 | ctx->req = NULL; | 932 | ctx->req = NULL; |
934 | ctx->cc_sector++; | 933 | ctx->cc_sector++; |
935 | continue; | 934 | continue; |
@@ -1124,15 +1123,15 @@ static void clone_init(struct dm_crypt_io *io, struct bio *clone) | |||
1124 | static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) | 1123 | static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) |
1125 | { | 1124 | { |
1126 | struct crypt_config *cc = io->cc; | 1125 | struct crypt_config *cc = io->cc; |
1127 | struct bio *base_bio = io->base_bio; | ||
1128 | struct bio *clone; | 1126 | struct bio *clone; |
1129 | 1127 | ||
1130 | /* | 1128 | /* |
1131 | * The block layer might modify the bvec array, so always | 1129 | * We need the original biovec array in order to decrypt |
1132 | * copy the required bvecs because we need the original | 1130 | * the whole bio data *afterwards* -- thanks to immutable |
1133 | * one in order to decrypt the whole bio data *afterwards*. | 1131 | * biovecs we don't need to worry about the block layer |
1132 | * modifying the biovec array; so leverage bio_clone_fast(). | ||
1134 | */ | 1133 | */ |
1135 | clone = bio_clone_bioset(base_bio, gfp, cc->bs); | 1134 | clone = bio_clone_fast(io->base_bio, gfp, cc->bs); |
1136 | if (!clone) | 1135 | if (!clone) |
1137 | return 1; | 1136 | return 1; |
1138 | 1137 | ||
@@ -1346,10 +1345,8 @@ static void kcryptd_async_done(struct crypto_async_request *async_req, | |||
1346 | struct dm_crypt_io *io = container_of(ctx, struct dm_crypt_io, ctx); | 1345 | struct dm_crypt_io *io = container_of(ctx, struct dm_crypt_io, ctx); |
1347 | struct crypt_config *cc = io->cc; | 1346 | struct crypt_config *cc = io->cc; |
1348 | 1347 | ||
1349 | if (error == -EINPROGRESS) { | 1348 | if (error == -EINPROGRESS) |
1350 | complete(&ctx->restart); | ||
1351 | return; | 1349 | return; |
1352 | } | ||
1353 | 1350 | ||
1354 | if (!error && cc->iv_gen_ops && cc->iv_gen_ops->post) | 1351 | if (!error && cc->iv_gen_ops && cc->iv_gen_ops->post) |
1355 | error = cc->iv_gen_ops->post(cc, iv_of_dmreq(cc, dmreq), dmreq); | 1352 | error = cc->iv_gen_ops->post(cc, iv_of_dmreq(cc, dmreq), dmreq); |
@@ -1360,12 +1357,15 @@ static void kcryptd_async_done(struct crypto_async_request *async_req, | |||
1360 | crypt_free_req(cc, req_of_dmreq(cc, dmreq), io->base_bio); | 1357 | crypt_free_req(cc, req_of_dmreq(cc, dmreq), io->base_bio); |
1361 | 1358 | ||
1362 | if (!atomic_dec_and_test(&ctx->cc_pending)) | 1359 | if (!atomic_dec_and_test(&ctx->cc_pending)) |
1363 | return; | 1360 | goto done; |
1364 | 1361 | ||
1365 | if (bio_data_dir(io->base_bio) == READ) | 1362 | if (bio_data_dir(io->base_bio) == READ) |
1366 | kcryptd_crypt_read_done(io); | 1363 | kcryptd_crypt_read_done(io); |
1367 | else | 1364 | else |
1368 | kcryptd_crypt_write_io_submit(io, 1); | 1365 | kcryptd_crypt_write_io_submit(io, 1); |
1366 | done: | ||
1367 | if (!completion_done(&ctx->restart)) | ||
1368 | complete(&ctx->restart); | ||
1369 | } | 1369 | } |
1370 | 1370 | ||
1371 | static void kcryptd_crypt(struct work_struct *work) | 1371 | static void kcryptd_crypt(struct work_struct *work) |
@@ -1816,6 +1816,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1816 | if (ret) | 1816 | if (ret) |
1817 | goto bad; | 1817 | goto bad; |
1818 | 1818 | ||
1819 | ret = -EINVAL; | ||
1819 | while (opt_params--) { | 1820 | while (opt_params--) { |
1820 | opt_string = dm_shift_arg(&as); | 1821 | opt_string = dm_shift_arg(&as); |
1821 | if (!opt_string) { | 1822 | if (!opt_string) { |
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 42c3a27a14cc..57b6a1901c91 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c | |||
@@ -236,7 +236,7 @@ static int delay_bio(struct delay_c *dc, int delay, struct bio *bio) | |||
236 | delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info)); | 236 | delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info)); |
237 | 237 | ||
238 | delayed->context = dc; | 238 | delayed->context = dc; |
239 | delayed->expires = expires = jiffies + (delay * HZ / 1000); | 239 | delayed->expires = expires = jiffies + msecs_to_jiffies(delay); |
240 | 240 | ||
241 | mutex_lock(&delayed_bios_lock); | 241 | mutex_lock(&delayed_bios_lock); |
242 | 242 | ||
diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c index 03177ca0b009..058256d2eeea 100644 --- a/drivers/md/dm-log-userspace-base.c +++ b/drivers/md/dm-log-userspace-base.c | |||
@@ -17,7 +17,9 @@ | |||
17 | 17 | ||
18 | #define DM_LOG_USERSPACE_VSN "1.3.0" | 18 | #define DM_LOG_USERSPACE_VSN "1.3.0" |
19 | 19 | ||
20 | struct flush_entry { | 20 | #define FLUSH_ENTRY_POOL_SIZE 16 |
21 | |||
22 | struct dm_dirty_log_flush_entry { | ||
21 | int type; | 23 | int type; |
22 | region_t region; | 24 | region_t region; |
23 | struct list_head list; | 25 | struct list_head list; |
@@ -34,22 +36,14 @@ struct flush_entry { | |||
34 | struct log_c { | 36 | struct log_c { |
35 | struct dm_target *ti; | 37 | struct dm_target *ti; |
36 | struct dm_dev *log_dev; | 38 | struct dm_dev *log_dev; |
37 | uint32_t region_size; | ||
38 | region_t region_count; | ||
39 | uint64_t luid; | ||
40 | char uuid[DM_UUID_LEN]; | ||
41 | 39 | ||
42 | char *usr_argv_str; | 40 | char *usr_argv_str; |
43 | uint32_t usr_argc; | 41 | uint32_t usr_argc; |
44 | 42 | ||
45 | /* | 43 | uint32_t region_size; |
46 | * in_sync_hint gets set when doing is_remote_recovering. It | 44 | region_t region_count; |
47 | * represents the first region that needs recovery. IOW, the | 45 | uint64_t luid; |
48 | * first zero bit of sync_bits. This can be useful for to limit | 46 | char uuid[DM_UUID_LEN]; |
49 | * traffic for calls like is_remote_recovering and get_resync_work, | ||
50 | * but be take care in its use for anything else. | ||
51 | */ | ||
52 | uint64_t in_sync_hint; | ||
53 | 47 | ||
54 | /* | 48 | /* |
55 | * Mark and clear requests are held until a flush is issued | 49 | * Mark and clear requests are held until a flush is issued |
@@ -62,6 +56,15 @@ struct log_c { | |||
62 | struct list_head clear_list; | 56 | struct list_head clear_list; |
63 | 57 | ||
64 | /* | 58 | /* |
59 | * in_sync_hint gets set when doing is_remote_recovering. It | ||
60 | * represents the first region that needs recovery. IOW, the | ||
61 | * first zero bit of sync_bits. This can be useful for to limit | ||
62 | * traffic for calls like is_remote_recovering and get_resync_work, | ||
63 | * but be take care in its use for anything else. | ||
64 | */ | ||
65 | uint64_t in_sync_hint; | ||
66 | |||
67 | /* | ||
65 | * Workqueue for flush of clear region requests. | 68 | * Workqueue for flush of clear region requests. |
66 | */ | 69 | */ |
67 | struct workqueue_struct *dmlog_wq; | 70 | struct workqueue_struct *dmlog_wq; |
@@ -72,19 +75,11 @@ struct log_c { | |||
72 | * Combine userspace flush and mark requests for efficiency. | 75 | * Combine userspace flush and mark requests for efficiency. |
73 | */ | 76 | */ |
74 | uint32_t integrated_flush; | 77 | uint32_t integrated_flush; |
75 | }; | ||
76 | |||
77 | static mempool_t *flush_entry_pool; | ||
78 | 78 | ||
79 | static void *flush_entry_alloc(gfp_t gfp_mask, void *pool_data) | 79 | mempool_t *flush_entry_pool; |
80 | { | 80 | }; |
81 | return kmalloc(sizeof(struct flush_entry), gfp_mask); | ||
82 | } | ||
83 | 81 | ||
84 | static void flush_entry_free(void *element, void *pool_data) | 82 | static struct kmem_cache *_flush_entry_cache; |
85 | { | ||
86 | kfree(element); | ||
87 | } | ||
88 | 83 | ||
89 | static int userspace_do_request(struct log_c *lc, const char *uuid, | 84 | static int userspace_do_request(struct log_c *lc, const char *uuid, |
90 | int request_type, char *data, size_t data_size, | 85 | int request_type, char *data, size_t data_size, |
@@ -254,6 +249,14 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, | |||
254 | goto out; | 249 | goto out; |
255 | } | 250 | } |
256 | 251 | ||
252 | lc->flush_entry_pool = mempool_create_slab_pool(FLUSH_ENTRY_POOL_SIZE, | ||
253 | _flush_entry_cache); | ||
254 | if (!lc->flush_entry_pool) { | ||
255 | DMERR("Failed to create flush_entry_pool"); | ||
256 | r = -ENOMEM; | ||
257 | goto out; | ||
258 | } | ||
259 | |||
257 | /* | 260 | /* |
258 | * Send table string and get back any opened device. | 261 | * Send table string and get back any opened device. |
259 | */ | 262 | */ |
@@ -310,6 +313,8 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, | |||
310 | out: | 313 | out: |
311 | kfree(devices_rdata); | 314 | kfree(devices_rdata); |
312 | if (r) { | 315 | if (r) { |
316 | if (lc->flush_entry_pool) | ||
317 | mempool_destroy(lc->flush_entry_pool); | ||
313 | kfree(lc); | 318 | kfree(lc); |
314 | kfree(ctr_str); | 319 | kfree(ctr_str); |
315 | } else { | 320 | } else { |
@@ -338,6 +343,8 @@ static void userspace_dtr(struct dm_dirty_log *log) | |||
338 | if (lc->log_dev) | 343 | if (lc->log_dev) |
339 | dm_put_device(lc->ti, lc->log_dev); | 344 | dm_put_device(lc->ti, lc->log_dev); |
340 | 345 | ||
346 | mempool_destroy(lc->flush_entry_pool); | ||
347 | |||
341 | kfree(lc->usr_argv_str); | 348 | kfree(lc->usr_argv_str); |
342 | kfree(lc); | 349 | kfree(lc); |
343 | 350 | ||
@@ -461,7 +468,7 @@ static int userspace_in_sync(struct dm_dirty_log *log, region_t region, | |||
461 | static int flush_one_by_one(struct log_c *lc, struct list_head *flush_list) | 468 | static int flush_one_by_one(struct log_c *lc, struct list_head *flush_list) |
462 | { | 469 | { |
463 | int r = 0; | 470 | int r = 0; |
464 | struct flush_entry *fe; | 471 | struct dm_dirty_log_flush_entry *fe; |
465 | 472 | ||
466 | list_for_each_entry(fe, flush_list, list) { | 473 | list_for_each_entry(fe, flush_list, list) { |
467 | r = userspace_do_request(lc, lc->uuid, fe->type, | 474 | r = userspace_do_request(lc, lc->uuid, fe->type, |
@@ -481,7 +488,7 @@ static int flush_by_group(struct log_c *lc, struct list_head *flush_list, | |||
481 | int r = 0; | 488 | int r = 0; |
482 | int count; | 489 | int count; |
483 | uint32_t type = 0; | 490 | uint32_t type = 0; |
484 | struct flush_entry *fe, *tmp_fe; | 491 | struct dm_dirty_log_flush_entry *fe, *tmp_fe; |
485 | LIST_HEAD(tmp_list); | 492 | LIST_HEAD(tmp_list); |
486 | uint64_t group[MAX_FLUSH_GROUP_COUNT]; | 493 | uint64_t group[MAX_FLUSH_GROUP_COUNT]; |
487 | 494 | ||
@@ -563,7 +570,8 @@ static int userspace_flush(struct dm_dirty_log *log) | |||
563 | LIST_HEAD(clear_list); | 570 | LIST_HEAD(clear_list); |
564 | int mark_list_is_empty; | 571 | int mark_list_is_empty; |
565 | int clear_list_is_empty; | 572 | int clear_list_is_empty; |
566 | struct flush_entry *fe, *tmp_fe; | 573 | struct dm_dirty_log_flush_entry *fe, *tmp_fe; |
574 | mempool_t *flush_entry_pool = lc->flush_entry_pool; | ||
567 | 575 | ||
568 | spin_lock_irqsave(&lc->flush_lock, flags); | 576 | spin_lock_irqsave(&lc->flush_lock, flags); |
569 | list_splice_init(&lc->mark_list, &mark_list); | 577 | list_splice_init(&lc->mark_list, &mark_list); |
@@ -643,10 +651,10 @@ static void userspace_mark_region(struct dm_dirty_log *log, region_t region) | |||
643 | { | 651 | { |
644 | unsigned long flags; | 652 | unsigned long flags; |
645 | struct log_c *lc = log->context; | 653 | struct log_c *lc = log->context; |
646 | struct flush_entry *fe; | 654 | struct dm_dirty_log_flush_entry *fe; |
647 | 655 | ||
648 | /* Wait for an allocation, but _never_ fail */ | 656 | /* Wait for an allocation, but _never_ fail */ |
649 | fe = mempool_alloc(flush_entry_pool, GFP_NOIO); | 657 | fe = mempool_alloc(lc->flush_entry_pool, GFP_NOIO); |
650 | BUG_ON(!fe); | 658 | BUG_ON(!fe); |
651 | 659 | ||
652 | spin_lock_irqsave(&lc->flush_lock, flags); | 660 | spin_lock_irqsave(&lc->flush_lock, flags); |
@@ -672,7 +680,7 @@ static void userspace_clear_region(struct dm_dirty_log *log, region_t region) | |||
672 | { | 680 | { |
673 | unsigned long flags; | 681 | unsigned long flags; |
674 | struct log_c *lc = log->context; | 682 | struct log_c *lc = log->context; |
675 | struct flush_entry *fe; | 683 | struct dm_dirty_log_flush_entry *fe; |
676 | 684 | ||
677 | /* | 685 | /* |
678 | * If we fail to allocate, we skip the clearing of | 686 | * If we fail to allocate, we skip the clearing of |
@@ -680,7 +688,7 @@ static void userspace_clear_region(struct dm_dirty_log *log, region_t region) | |||
680 | * to cause the region to be resync'ed when the | 688 | * to cause the region to be resync'ed when the |
681 | * device is activated next time. | 689 | * device is activated next time. |
682 | */ | 690 | */ |
683 | fe = mempool_alloc(flush_entry_pool, GFP_ATOMIC); | 691 | fe = mempool_alloc(lc->flush_entry_pool, GFP_ATOMIC); |
684 | if (!fe) { | 692 | if (!fe) { |
685 | DMERR("Failed to allocate memory to clear region."); | 693 | DMERR("Failed to allocate memory to clear region."); |
686 | return; | 694 | return; |
@@ -733,7 +741,6 @@ static int userspace_get_resync_work(struct dm_dirty_log *log, region_t *region) | |||
733 | static void userspace_set_region_sync(struct dm_dirty_log *log, | 741 | static void userspace_set_region_sync(struct dm_dirty_log *log, |
734 | region_t region, int in_sync) | 742 | region_t region, int in_sync) |
735 | { | 743 | { |
736 | int r; | ||
737 | struct log_c *lc = log->context; | 744 | struct log_c *lc = log->context; |
738 | struct { | 745 | struct { |
739 | region_t r; | 746 | region_t r; |
@@ -743,12 +750,12 @@ static void userspace_set_region_sync(struct dm_dirty_log *log, | |||
743 | pkg.r = region; | 750 | pkg.r = region; |
744 | pkg.i = (int64_t)in_sync; | 751 | pkg.i = (int64_t)in_sync; |
745 | 752 | ||
746 | r = userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC, | 753 | (void) userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC, |
747 | (char *)&pkg, sizeof(pkg), NULL, NULL); | 754 | (char *)&pkg, sizeof(pkg), NULL, NULL); |
748 | 755 | ||
749 | /* | 756 | /* |
750 | * It would be nice to be able to report failures. | 757 | * It would be nice to be able to report failures. |
751 | * However, it is easy emough to detect and resolve. | 758 | * However, it is easy enough to detect and resolve. |
752 | */ | 759 | */ |
753 | return; | 760 | return; |
754 | } | 761 | } |
@@ -886,18 +893,16 @@ static int __init userspace_dirty_log_init(void) | |||
886 | { | 893 | { |
887 | int r = 0; | 894 | int r = 0; |
888 | 895 | ||
889 | flush_entry_pool = mempool_create(100, flush_entry_alloc, | 896 | _flush_entry_cache = KMEM_CACHE(dm_dirty_log_flush_entry, 0); |
890 | flush_entry_free, NULL); | 897 | if (!_flush_entry_cache) { |
891 | 898 | DMWARN("Unable to create flush_entry_cache: No memory."); | |
892 | if (!flush_entry_pool) { | ||
893 | DMWARN("Unable to create flush_entry_pool: No memory."); | ||
894 | return -ENOMEM; | 899 | return -ENOMEM; |
895 | } | 900 | } |
896 | 901 | ||
897 | r = dm_ulog_tfr_init(); | 902 | r = dm_ulog_tfr_init(); |
898 | if (r) { | 903 | if (r) { |
899 | DMWARN("Unable to initialize userspace log communications"); | 904 | DMWARN("Unable to initialize userspace log communications"); |
900 | mempool_destroy(flush_entry_pool); | 905 | kmem_cache_destroy(_flush_entry_cache); |
901 | return r; | 906 | return r; |
902 | } | 907 | } |
903 | 908 | ||
@@ -905,7 +910,7 @@ static int __init userspace_dirty_log_init(void) | |||
905 | if (r) { | 910 | if (r) { |
906 | DMWARN("Couldn't register userspace dirty log type"); | 911 | DMWARN("Couldn't register userspace dirty log type"); |
907 | dm_ulog_tfr_exit(); | 912 | dm_ulog_tfr_exit(); |
908 | mempool_destroy(flush_entry_pool); | 913 | kmem_cache_destroy(_flush_entry_cache); |
909 | return r; | 914 | return r; |
910 | } | 915 | } |
911 | 916 | ||
@@ -917,7 +922,7 @@ static void __exit userspace_dirty_log_exit(void) | |||
917 | { | 922 | { |
918 | dm_dirty_log_type_unregister(&_userspace_type); | 923 | dm_dirty_log_type_unregister(&_userspace_type); |
919 | dm_ulog_tfr_exit(); | 924 | dm_ulog_tfr_exit(); |
920 | mempool_destroy(flush_entry_pool); | 925 | kmem_cache_destroy(_flush_entry_cache); |
921 | 926 | ||
922 | DMINFO("version " DM_LOG_USERSPACE_VSN " unloaded"); | 927 | DMINFO("version " DM_LOG_USERSPACE_VSN " unloaded"); |
923 | return; | 928 | return; |
diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c index 39ad9664d397..fdf8ec304f8d 100644 --- a/drivers/md/dm-log-userspace-transfer.c +++ b/drivers/md/dm-log-userspace-transfer.c | |||
@@ -172,6 +172,7 @@ int dm_consult_userspace(const char *uuid, uint64_t luid, int request_type, | |||
172 | char *rdata, size_t *rdata_size) | 172 | char *rdata, size_t *rdata_size) |
173 | { | 173 | { |
174 | int r = 0; | 174 | int r = 0; |
175 | unsigned long tmo; | ||
175 | size_t dummy = 0; | 176 | size_t dummy = 0; |
176 | int overhead_size = sizeof(struct dm_ulog_request) + sizeof(struct cn_msg); | 177 | int overhead_size = sizeof(struct dm_ulog_request) + sizeof(struct cn_msg); |
177 | struct dm_ulog_request *tfr = prealloced_ulog_tfr; | 178 | struct dm_ulog_request *tfr = prealloced_ulog_tfr; |
@@ -236,11 +237,11 @@ resend: | |||
236 | goto out; | 237 | goto out; |
237 | } | 238 | } |
238 | 239 | ||
239 | r = wait_for_completion_timeout(&(pkg.complete), DM_ULOG_RETRY_TIMEOUT); | 240 | tmo = wait_for_completion_timeout(&(pkg.complete), DM_ULOG_RETRY_TIMEOUT); |
240 | spin_lock(&receiving_list_lock); | 241 | spin_lock(&receiving_list_lock); |
241 | list_del_init(&(pkg.list)); | 242 | list_del_init(&(pkg.list)); |
242 | spin_unlock(&receiving_list_lock); | 243 | spin_unlock(&receiving_list_lock); |
243 | if (!r) { | 244 | if (!tmo) { |
244 | DMWARN("[%s] Request timed out: [%u/%u] - retrying", | 245 | DMWARN("[%s] Request timed out: [%u/%u] - retrying", |
245 | (strlen(uuid) > 8) ? | 246 | (strlen(uuid) > 8) ? |
246 | (uuid + (strlen(uuid) - 8)) : (uuid), | 247 | (uuid + (strlen(uuid) - 8)) : (uuid), |
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c new file mode 100644 index 000000000000..93e08446a87d --- /dev/null +++ b/drivers/md/dm-log-writes.c | |||
@@ -0,0 +1,825 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2014 Facebook. All rights reserved. | ||
3 | * | ||
4 | * This file is released under the GPL. | ||
5 | */ | ||
6 | |||
7 | #include <linux/device-mapper.h> | ||
8 | |||
9 | #include <linux/module.h> | ||
10 | #include <linux/init.h> | ||
11 | #include <linux/blkdev.h> | ||
12 | #include <linux/bio.h> | ||
13 | #include <linux/slab.h> | ||
14 | #include <linux/kthread.h> | ||
15 | #include <linux/freezer.h> | ||
16 | |||
17 | #define DM_MSG_PREFIX "log-writes" | ||
18 | |||
19 | /* | ||
20 | * This target will sequentially log all writes to the target device onto the | ||
21 | * log device. This is helpful for replaying writes to check for fs consistency | ||
22 | * at all times. This target provides a mechanism to mark specific events to | ||
23 | * check data at a later time. So for example you would: | ||
24 | * | ||
25 | * write data | ||
26 | * fsync | ||
27 | * dmsetup message /dev/whatever mark mymark | ||
28 | * unmount /mnt/test | ||
29 | * | ||
30 | * Then replay the log up to mymark and check the contents of the replay to | ||
31 | * verify it matches what was written. | ||
32 | * | ||
33 | * We log writes only after they have been flushed, this makes the log describe | ||
34 | * close to the order in which the data hits the actual disk, not its cache. So | ||
35 | * for example the following sequence (W means write, C means complete) | ||
36 | * | ||
37 | * Wa,Wb,Wc,Cc,Ca,FLUSH,FUAd,Cb,CFLUSH,CFUAd | ||
38 | * | ||
39 | * Would result in the log looking like this: | ||
40 | * | ||
41 | * c,a,flush,fuad,b,<other writes>,<next flush> | ||
42 | * | ||
43 | * This is meant to help expose problems where file systems do not properly wait | ||
44 | * on data being written before invoking a FLUSH. FUA bypasses cache so once it | ||
45 | * completes it is added to the log as it should be on disk. | ||
46 | * | ||
47 | * We treat DISCARDs as if they don't bypass cache so that they are logged in | ||
48 | * order of completion along with the normal writes. If we didn't do it this | ||
49 | * way we would process all the discards first and then write all the data, when | ||
50 | * in fact we want to do the data and the discard in the order that they | ||
51 | * completed. | ||
52 | */ | ||
53 | #define LOG_FLUSH_FLAG (1 << 0) | ||
54 | #define LOG_FUA_FLAG (1 << 1) | ||
55 | #define LOG_DISCARD_FLAG (1 << 2) | ||
56 | #define LOG_MARK_FLAG (1 << 3) | ||
57 | |||
58 | #define WRITE_LOG_VERSION 1 | ||
59 | #define WRITE_LOG_MAGIC 0x6a736677736872 | ||
60 | |||
61 | /* | ||
62 | * The disk format for this is braindead simple. | ||
63 | * | ||
64 | * At byte 0 we have our super, followed by the following sequence for | ||
65 | * nr_entries: | ||
66 | * | ||
67 | * [ 1 sector ][ entry->nr_sectors ] | ||
68 | * [log_write_entry][ data written ] | ||
69 | * | ||
70 | * The log_write_entry takes up a full sector so we can have arbitrary length | ||
71 | * marks and it leaves us room for extra content in the future. | ||
72 | */ | ||
73 | |||
74 | /* | ||
75 | * Basic info about the log for userspace. | ||
76 | */ | ||
77 | struct log_write_super { | ||
78 | __le64 magic; | ||
79 | __le64 version; | ||
80 | __le64 nr_entries; | ||
81 | __le32 sectorsize; | ||
82 | }; | ||
83 | |||
84 | /* | ||
85 | * sector - the sector we wrote. | ||
86 | * nr_sectors - the number of sectors we wrote. | ||
87 | * flags - flags for this log entry. | ||
88 | * data_len - the size of the data in this log entry, this is for private log | ||
89 | * entry stuff, the MARK data provided by userspace for example. | ||
90 | */ | ||
91 | struct log_write_entry { | ||
92 | __le64 sector; | ||
93 | __le64 nr_sectors; | ||
94 | __le64 flags; | ||
95 | __le64 data_len; | ||
96 | }; | ||
97 | |||
98 | struct log_writes_c { | ||
99 | struct dm_dev *dev; | ||
100 | struct dm_dev *logdev; | ||
101 | u64 logged_entries; | ||
102 | u32 sectorsize; | ||
103 | atomic_t io_blocks; | ||
104 | atomic_t pending_blocks; | ||
105 | sector_t next_sector; | ||
106 | sector_t end_sector; | ||
107 | bool logging_enabled; | ||
108 | bool device_supports_discard; | ||
109 | spinlock_t blocks_lock; | ||
110 | struct list_head unflushed_blocks; | ||
111 | struct list_head logging_blocks; | ||
112 | wait_queue_head_t wait; | ||
113 | struct task_struct *log_kthread; | ||
114 | }; | ||
115 | |||
116 | struct pending_block { | ||
117 | int vec_cnt; | ||
118 | u64 flags; | ||
119 | sector_t sector; | ||
120 | sector_t nr_sectors; | ||
121 | char *data; | ||
122 | u32 datalen; | ||
123 | struct list_head list; | ||
124 | struct bio_vec vecs[0]; | ||
125 | }; | ||
126 | |||
127 | struct per_bio_data { | ||
128 | struct pending_block *block; | ||
129 | }; | ||
130 | |||
131 | static void put_pending_block(struct log_writes_c *lc) | ||
132 | { | ||
133 | if (atomic_dec_and_test(&lc->pending_blocks)) { | ||
134 | smp_mb__after_atomic(); | ||
135 | if (waitqueue_active(&lc->wait)) | ||
136 | wake_up(&lc->wait); | ||
137 | } | ||
138 | } | ||
139 | |||
140 | static void put_io_block(struct log_writes_c *lc) | ||
141 | { | ||
142 | if (atomic_dec_and_test(&lc->io_blocks)) { | ||
143 | smp_mb__after_atomic(); | ||
144 | if (waitqueue_active(&lc->wait)) | ||
145 | wake_up(&lc->wait); | ||
146 | } | ||
147 | } | ||
148 | |||
149 | static void log_end_io(struct bio *bio, int err) | ||
150 | { | ||
151 | struct log_writes_c *lc = bio->bi_private; | ||
152 | struct bio_vec *bvec; | ||
153 | int i; | ||
154 | |||
155 | if (err) { | ||
156 | unsigned long flags; | ||
157 | |||
158 | DMERR("Error writing log block, error=%d", err); | ||
159 | spin_lock_irqsave(&lc->blocks_lock, flags); | ||
160 | lc->logging_enabled = false; | ||
161 | spin_unlock_irqrestore(&lc->blocks_lock, flags); | ||
162 | } | ||
163 | |||
164 | bio_for_each_segment_all(bvec, bio, i) | ||
165 | __free_page(bvec->bv_page); | ||
166 | |||
167 | put_io_block(lc); | ||
168 | bio_put(bio); | ||
169 | } | ||
170 | |||
171 | /* | ||
172 | * Meant to be called if there is an error, it will free all the pages | ||
173 | * associated with the block. | ||
174 | */ | ||
175 | static void free_pending_block(struct log_writes_c *lc, | ||
176 | struct pending_block *block) | ||
177 | { | ||
178 | int i; | ||
179 | |||
180 | for (i = 0; i < block->vec_cnt; i++) { | ||
181 | if (block->vecs[i].bv_page) | ||
182 | __free_page(block->vecs[i].bv_page); | ||
183 | } | ||
184 | kfree(block->data); | ||
185 | kfree(block); | ||
186 | put_pending_block(lc); | ||
187 | } | ||
188 | |||
189 | static int write_metadata(struct log_writes_c *lc, void *entry, | ||
190 | size_t entrylen, void *data, size_t datalen, | ||
191 | sector_t sector) | ||
192 | { | ||
193 | struct bio *bio; | ||
194 | struct page *page; | ||
195 | void *ptr; | ||
196 | size_t ret; | ||
197 | |||
198 | bio = bio_alloc(GFP_KERNEL, 1); | ||
199 | if (!bio) { | ||
200 | DMERR("Couldn't alloc log bio"); | ||
201 | goto error; | ||
202 | } | ||
203 | bio->bi_iter.bi_size = 0; | ||
204 | bio->bi_iter.bi_sector = sector; | ||
205 | bio->bi_bdev = lc->logdev->bdev; | ||
206 | bio->bi_end_io = log_end_io; | ||
207 | bio->bi_private = lc; | ||
208 | set_bit(BIO_UPTODATE, &bio->bi_flags); | ||
209 | |||
210 | page = alloc_page(GFP_KERNEL); | ||
211 | if (!page) { | ||
212 | DMERR("Couldn't alloc log page"); | ||
213 | bio_put(bio); | ||
214 | goto error; | ||
215 | } | ||
216 | |||
217 | ptr = kmap_atomic(page); | ||
218 | memcpy(ptr, entry, entrylen); | ||
219 | if (datalen) | ||
220 | memcpy(ptr + entrylen, data, datalen); | ||
221 | memset(ptr + entrylen + datalen, 0, | ||
222 | lc->sectorsize - entrylen - datalen); | ||
223 | kunmap_atomic(ptr); | ||
224 | |||
225 | ret = bio_add_page(bio, page, lc->sectorsize, 0); | ||
226 | if (ret != lc->sectorsize) { | ||
227 | DMERR("Couldn't add page to the log block"); | ||
228 | goto error_bio; | ||
229 | } | ||
230 | submit_bio(WRITE, bio); | ||
231 | return 0; | ||
232 | error_bio: | ||
233 | bio_put(bio); | ||
234 | __free_page(page); | ||
235 | error: | ||
236 | put_io_block(lc); | ||
237 | return -1; | ||
238 | } | ||
239 | |||
240 | static int log_one_block(struct log_writes_c *lc, | ||
241 | struct pending_block *block, sector_t sector) | ||
242 | { | ||
243 | struct bio *bio; | ||
244 | struct log_write_entry entry; | ||
245 | size_t ret; | ||
246 | int i; | ||
247 | |||
248 | entry.sector = cpu_to_le64(block->sector); | ||
249 | entry.nr_sectors = cpu_to_le64(block->nr_sectors); | ||
250 | entry.flags = cpu_to_le64(block->flags); | ||
251 | entry.data_len = cpu_to_le64(block->datalen); | ||
252 | if (write_metadata(lc, &entry, sizeof(entry), block->data, | ||
253 | block->datalen, sector)) { | ||
254 | free_pending_block(lc, block); | ||
255 | return -1; | ||
256 | } | ||
257 | |||
258 | if (!block->vec_cnt) | ||
259 | goto out; | ||
260 | sector++; | ||
261 | |||
262 | bio = bio_alloc(GFP_KERNEL, block->vec_cnt); | ||
263 | if (!bio) { | ||
264 | DMERR("Couldn't alloc log bio"); | ||
265 | goto error; | ||
266 | } | ||
267 | atomic_inc(&lc->io_blocks); | ||
268 | bio->bi_iter.bi_size = 0; | ||
269 | bio->bi_iter.bi_sector = sector; | ||
270 | bio->bi_bdev = lc->logdev->bdev; | ||
271 | bio->bi_end_io = log_end_io; | ||
272 | bio->bi_private = lc; | ||
273 | set_bit(BIO_UPTODATE, &bio->bi_flags); | ||
274 | |||
275 | for (i = 0; i < block->vec_cnt; i++) { | ||
276 | /* | ||
277 | * The page offset is always 0 because we allocate a new page | ||
278 | * for every bvec in the original bio for simplicity sake. | ||
279 | */ | ||
280 | ret = bio_add_page(bio, block->vecs[i].bv_page, | ||
281 | block->vecs[i].bv_len, 0); | ||
282 | if (ret != block->vecs[i].bv_len) { | ||
283 | atomic_inc(&lc->io_blocks); | ||
284 | submit_bio(WRITE, bio); | ||
285 | bio = bio_alloc(GFP_KERNEL, block->vec_cnt - i); | ||
286 | if (!bio) { | ||
287 | DMERR("Couldn't alloc log bio"); | ||
288 | goto error; | ||
289 | } | ||
290 | bio->bi_iter.bi_size = 0; | ||
291 | bio->bi_iter.bi_sector = sector; | ||
292 | bio->bi_bdev = lc->logdev->bdev; | ||
293 | bio->bi_end_io = log_end_io; | ||
294 | bio->bi_private = lc; | ||
295 | set_bit(BIO_UPTODATE, &bio->bi_flags); | ||
296 | |||
297 | ret = bio_add_page(bio, block->vecs[i].bv_page, | ||
298 | block->vecs[i].bv_len, 0); | ||
299 | if (ret != block->vecs[i].bv_len) { | ||
300 | DMERR("Couldn't add page on new bio?"); | ||
301 | bio_put(bio); | ||
302 | goto error; | ||
303 | } | ||
304 | } | ||
305 | sector += block->vecs[i].bv_len >> SECTOR_SHIFT; | ||
306 | } | ||
307 | submit_bio(WRITE, bio); | ||
308 | out: | ||
309 | kfree(block->data); | ||
310 | kfree(block); | ||
311 | put_pending_block(lc); | ||
312 | return 0; | ||
313 | error: | ||
314 | free_pending_block(lc, block); | ||
315 | put_io_block(lc); | ||
316 | return -1; | ||
317 | } | ||
318 | |||
319 | static int log_super(struct log_writes_c *lc) | ||
320 | { | ||
321 | struct log_write_super super; | ||
322 | |||
323 | super.magic = cpu_to_le64(WRITE_LOG_MAGIC); | ||
324 | super.version = cpu_to_le64(WRITE_LOG_VERSION); | ||
325 | super.nr_entries = cpu_to_le64(lc->logged_entries); | ||
326 | super.sectorsize = cpu_to_le32(lc->sectorsize); | ||
327 | |||
328 | if (write_metadata(lc, &super, sizeof(super), NULL, 0, 0)) { | ||
329 | DMERR("Couldn't write super"); | ||
330 | return -1; | ||
331 | } | ||
332 | |||
333 | return 0; | ||
334 | } | ||
335 | |||
336 | static inline sector_t logdev_last_sector(struct log_writes_c *lc) | ||
337 | { | ||
338 | return i_size_read(lc->logdev->bdev->bd_inode) >> SECTOR_SHIFT; | ||
339 | } | ||
340 | |||
341 | static int log_writes_kthread(void *arg) | ||
342 | { | ||
343 | struct log_writes_c *lc = (struct log_writes_c *)arg; | ||
344 | sector_t sector = 0; | ||
345 | |||
346 | while (!kthread_should_stop()) { | ||
347 | bool super = false; | ||
348 | bool logging_enabled; | ||
349 | struct pending_block *block = NULL; | ||
350 | int ret; | ||
351 | |||
352 | spin_lock_irq(&lc->blocks_lock); | ||
353 | if (!list_empty(&lc->logging_blocks)) { | ||
354 | block = list_first_entry(&lc->logging_blocks, | ||
355 | struct pending_block, list); | ||
356 | list_del_init(&block->list); | ||
357 | if (!lc->logging_enabled) | ||
358 | goto next; | ||
359 | |||
360 | sector = lc->next_sector; | ||
361 | if (block->flags & LOG_DISCARD_FLAG) | ||
362 | lc->next_sector++; | ||
363 | else | ||
364 | lc->next_sector += block->nr_sectors + 1; | ||
365 | |||
366 | /* | ||
367 | * Apparently the size of the device may not be known | ||
368 | * right away, so handle this properly. | ||
369 | */ | ||
370 | if (!lc->end_sector) | ||
371 | lc->end_sector = logdev_last_sector(lc); | ||
372 | if (lc->end_sector && | ||
373 | lc->next_sector >= lc->end_sector) { | ||
374 | DMERR("Ran out of space on the logdev"); | ||
375 | lc->logging_enabled = false; | ||
376 | goto next; | ||
377 | } | ||
378 | lc->logged_entries++; | ||
379 | atomic_inc(&lc->io_blocks); | ||
380 | |||
381 | super = (block->flags & (LOG_FUA_FLAG | LOG_MARK_FLAG)); | ||
382 | if (super) | ||
383 | atomic_inc(&lc->io_blocks); | ||
384 | } | ||
385 | next: | ||
386 | logging_enabled = lc->logging_enabled; | ||
387 | spin_unlock_irq(&lc->blocks_lock); | ||
388 | if (block) { | ||
389 | if (logging_enabled) { | ||
390 | ret = log_one_block(lc, block, sector); | ||
391 | if (!ret && super) | ||
392 | ret = log_super(lc); | ||
393 | if (ret) { | ||
394 | spin_lock_irq(&lc->blocks_lock); | ||
395 | lc->logging_enabled = false; | ||
396 | spin_unlock_irq(&lc->blocks_lock); | ||
397 | } | ||
398 | } else | ||
399 | free_pending_block(lc, block); | ||
400 | continue; | ||
401 | } | ||
402 | |||
403 | if (!try_to_freeze()) { | ||
404 | set_current_state(TASK_INTERRUPTIBLE); | ||
405 | if (!kthread_should_stop() && | ||
406 | !atomic_read(&lc->pending_blocks)) | ||
407 | schedule(); | ||
408 | __set_current_state(TASK_RUNNING); | ||
409 | } | ||
410 | } | ||
411 | return 0; | ||
412 | } | ||
413 | |||
414 | /* | ||
415 | * Construct a log-writes mapping: | ||
416 | * log-writes <dev_path> <log_dev_path> | ||
417 | */ | ||
418 | static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv) | ||
419 | { | ||
420 | struct log_writes_c *lc; | ||
421 | struct dm_arg_set as; | ||
422 | const char *devname, *logdevname; | ||
423 | |||
424 | as.argc = argc; | ||
425 | as.argv = argv; | ||
426 | |||
427 | if (argc < 2) { | ||
428 | ti->error = "Invalid argument count"; | ||
429 | return -EINVAL; | ||
430 | } | ||
431 | |||
432 | lc = kzalloc(sizeof(struct log_writes_c), GFP_KERNEL); | ||
433 | if (!lc) { | ||
434 | ti->error = "Cannot allocate context"; | ||
435 | return -ENOMEM; | ||
436 | } | ||
437 | spin_lock_init(&lc->blocks_lock); | ||
438 | INIT_LIST_HEAD(&lc->unflushed_blocks); | ||
439 | INIT_LIST_HEAD(&lc->logging_blocks); | ||
440 | init_waitqueue_head(&lc->wait); | ||
441 | lc->sectorsize = 1 << SECTOR_SHIFT; | ||
442 | atomic_set(&lc->io_blocks, 0); | ||
443 | atomic_set(&lc->pending_blocks, 0); | ||
444 | |||
445 | devname = dm_shift_arg(&as); | ||
446 | if (dm_get_device(ti, devname, dm_table_get_mode(ti->table), &lc->dev)) { | ||
447 | ti->error = "Device lookup failed"; | ||
448 | goto bad; | ||
449 | } | ||
450 | |||
451 | logdevname = dm_shift_arg(&as); | ||
452 | if (dm_get_device(ti, logdevname, dm_table_get_mode(ti->table), &lc->logdev)) { | ||
453 | ti->error = "Log device lookup failed"; | ||
454 | dm_put_device(ti, lc->dev); | ||
455 | goto bad; | ||
456 | } | ||
457 | |||
458 | lc->log_kthread = kthread_run(log_writes_kthread, lc, "log-write"); | ||
459 | if (!lc->log_kthread) { | ||
460 | ti->error = "Couldn't alloc kthread"; | ||
461 | dm_put_device(ti, lc->dev); | ||
462 | dm_put_device(ti, lc->logdev); | ||
463 | goto bad; | ||
464 | } | ||
465 | |||
466 | /* We put the super at sector 0, start logging at sector 1 */ | ||
467 | lc->next_sector = 1; | ||
468 | lc->logging_enabled = true; | ||
469 | lc->end_sector = logdev_last_sector(lc); | ||
470 | lc->device_supports_discard = true; | ||
471 | |||
472 | ti->num_flush_bios = 1; | ||
473 | ti->flush_supported = true; | ||
474 | ti->num_discard_bios = 1; | ||
475 | ti->discards_supported = true; | ||
476 | ti->per_bio_data_size = sizeof(struct per_bio_data); | ||
477 | ti->private = lc; | ||
478 | return 0; | ||
479 | |||
480 | bad: | ||
481 | kfree(lc); | ||
482 | return -EINVAL; | ||
483 | } | ||
484 | |||
485 | static int log_mark(struct log_writes_c *lc, char *data) | ||
486 | { | ||
487 | struct pending_block *block; | ||
488 | size_t maxsize = lc->sectorsize - sizeof(struct log_write_entry); | ||
489 | |||
490 | block = kzalloc(sizeof(struct pending_block), GFP_KERNEL); | ||
491 | if (!block) { | ||
492 | DMERR("Error allocating pending block"); | ||
493 | return -ENOMEM; | ||
494 | } | ||
495 | |||
496 | block->data = kstrndup(data, maxsize, GFP_KERNEL); | ||
497 | if (!block->data) { | ||
498 | DMERR("Error copying mark data"); | ||
499 | kfree(block); | ||
500 | return -ENOMEM; | ||
501 | } | ||
502 | atomic_inc(&lc->pending_blocks); | ||
503 | block->datalen = strlen(block->data); | ||
504 | block->flags |= LOG_MARK_FLAG; | ||
505 | spin_lock_irq(&lc->blocks_lock); | ||
506 | list_add_tail(&block->list, &lc->logging_blocks); | ||
507 | spin_unlock_irq(&lc->blocks_lock); | ||
508 | wake_up_process(lc->log_kthread); | ||
509 | return 0; | ||
510 | } | ||
511 | |||
512 | static void log_writes_dtr(struct dm_target *ti) | ||
513 | { | ||
514 | struct log_writes_c *lc = ti->private; | ||
515 | |||
516 | spin_lock_irq(&lc->blocks_lock); | ||
517 | list_splice_init(&lc->unflushed_blocks, &lc->logging_blocks); | ||
518 | spin_unlock_irq(&lc->blocks_lock); | ||
519 | |||
520 | /* | ||
521 | * This is just nice to have since it'll update the super to include the | ||
522 | * unflushed blocks, if it fails we don't really care. | ||
523 | */ | ||
524 | log_mark(lc, "dm-log-writes-end"); | ||
525 | wake_up_process(lc->log_kthread); | ||
526 | wait_event(lc->wait, !atomic_read(&lc->io_blocks) && | ||
527 | !atomic_read(&lc->pending_blocks)); | ||
528 | kthread_stop(lc->log_kthread); | ||
529 | |||
530 | WARN_ON(!list_empty(&lc->logging_blocks)); | ||
531 | WARN_ON(!list_empty(&lc->unflushed_blocks)); | ||
532 | dm_put_device(ti, lc->dev); | ||
533 | dm_put_device(ti, lc->logdev); | ||
534 | kfree(lc); | ||
535 | } | ||
536 | |||
537 | static void normal_map_bio(struct dm_target *ti, struct bio *bio) | ||
538 | { | ||
539 | struct log_writes_c *lc = ti->private; | ||
540 | |||
541 | bio->bi_bdev = lc->dev->bdev; | ||
542 | } | ||
543 | |||
544 | static int log_writes_map(struct dm_target *ti, struct bio *bio) | ||
545 | { | ||
546 | struct log_writes_c *lc = ti->private; | ||
547 | struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); | ||
548 | struct pending_block *block; | ||
549 | struct bvec_iter iter; | ||
550 | struct bio_vec bv; | ||
551 | size_t alloc_size; | ||
552 | int i = 0; | ||
553 | bool flush_bio = (bio->bi_rw & REQ_FLUSH); | ||
554 | bool fua_bio = (bio->bi_rw & REQ_FUA); | ||
555 | bool discard_bio = (bio->bi_rw & REQ_DISCARD); | ||
556 | |||
557 | pb->block = NULL; | ||
558 | |||
559 | /* Don't bother doing anything if logging has been disabled */ | ||
560 | if (!lc->logging_enabled) | ||
561 | goto map_bio; | ||
562 | |||
563 | /* | ||
564 | * Map reads as normal. | ||
565 | */ | ||
566 | if (bio_data_dir(bio) == READ) | ||
567 | goto map_bio; | ||
568 | |||
569 | /* No sectors and not a flush? Don't care */ | ||
570 | if (!bio_sectors(bio) && !flush_bio) | ||
571 | goto map_bio; | ||
572 | |||
573 | /* | ||
574 | * Discards will have bi_size set but there's no actual data, so just | ||
575 | * allocate the size of the pending block. | ||
576 | */ | ||
577 | if (discard_bio) | ||
578 | alloc_size = sizeof(struct pending_block); | ||
579 | else | ||
580 | alloc_size = sizeof(struct pending_block) + sizeof(struct bio_vec) * bio_segments(bio); | ||
581 | |||
582 | block = kzalloc(alloc_size, GFP_NOIO); | ||
583 | if (!block) { | ||
584 | DMERR("Error allocating pending block"); | ||
585 | spin_lock_irq(&lc->blocks_lock); | ||
586 | lc->logging_enabled = false; | ||
587 | spin_unlock_irq(&lc->blocks_lock); | ||
588 | return -ENOMEM; | ||
589 | } | ||
590 | INIT_LIST_HEAD(&block->list); | ||
591 | pb->block = block; | ||
592 | atomic_inc(&lc->pending_blocks); | ||
593 | |||
594 | if (flush_bio) | ||
595 | block->flags |= LOG_FLUSH_FLAG; | ||
596 | if (fua_bio) | ||
597 | block->flags |= LOG_FUA_FLAG; | ||
598 | if (discard_bio) | ||
599 | block->flags |= LOG_DISCARD_FLAG; | ||
600 | |||
601 | block->sector = bio->bi_iter.bi_sector; | ||
602 | block->nr_sectors = bio_sectors(bio); | ||
603 | |||
604 | /* We don't need the data, just submit */ | ||
605 | if (discard_bio) { | ||
606 | WARN_ON(flush_bio || fua_bio); | ||
607 | if (lc->device_supports_discard) | ||
608 | goto map_bio; | ||
609 | bio_endio(bio, 0); | ||
610 | return DM_MAPIO_SUBMITTED; | ||
611 | } | ||
612 | |||
613 | /* Flush bio, splice the unflushed blocks onto this list and submit */ | ||
614 | if (flush_bio && !bio_sectors(bio)) { | ||
615 | spin_lock_irq(&lc->blocks_lock); | ||
616 | list_splice_init(&lc->unflushed_blocks, &block->list); | ||
617 | spin_unlock_irq(&lc->blocks_lock); | ||
618 | goto map_bio; | ||
619 | } | ||
620 | |||
621 | /* | ||
622 | * We will write this bio somewhere else way later so we need to copy | ||
623 | * the actual contents into new pages so we know the data will always be | ||
624 | * there. | ||
625 | * | ||
626 | * We do this because this could be a bio from O_DIRECT in which case we | ||
627 | * can't just hold onto the page until some later point, we have to | ||
628 | * manually copy the contents. | ||
629 | */ | ||
630 | bio_for_each_segment(bv, bio, iter) { | ||
631 | struct page *page; | ||
632 | void *src, *dst; | ||
633 | |||
634 | page = alloc_page(GFP_NOIO); | ||
635 | if (!page) { | ||
636 | DMERR("Error allocing page"); | ||
637 | free_pending_block(lc, block); | ||
638 | spin_lock_irq(&lc->blocks_lock); | ||
639 | lc->logging_enabled = false; | ||
640 | spin_unlock_irq(&lc->blocks_lock); | ||
641 | return -ENOMEM; | ||
642 | } | ||
643 | |||
644 | src = kmap_atomic(bv.bv_page); | ||
645 | dst = kmap_atomic(page); | ||
646 | memcpy(dst, src + bv.bv_offset, bv.bv_len); | ||
647 | kunmap_atomic(dst); | ||
648 | kunmap_atomic(src); | ||
649 | block->vecs[i].bv_page = page; | ||
650 | block->vecs[i].bv_len = bv.bv_len; | ||
651 | block->vec_cnt++; | ||
652 | i++; | ||
653 | } | ||
654 | |||
655 | /* Had a flush with data in it, weird */ | ||
656 | if (flush_bio) { | ||
657 | spin_lock_irq(&lc->blocks_lock); | ||
658 | list_splice_init(&lc->unflushed_blocks, &block->list); | ||
659 | spin_unlock_irq(&lc->blocks_lock); | ||
660 | } | ||
661 | map_bio: | ||
662 | normal_map_bio(ti, bio); | ||
663 | return DM_MAPIO_REMAPPED; | ||
664 | } | ||
665 | |||
666 | static int normal_end_io(struct dm_target *ti, struct bio *bio, int error) | ||
667 | { | ||
668 | struct log_writes_c *lc = ti->private; | ||
669 | struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); | ||
670 | |||
671 | if (bio_data_dir(bio) == WRITE && pb->block) { | ||
672 | struct pending_block *block = pb->block; | ||
673 | unsigned long flags; | ||
674 | |||
675 | spin_lock_irqsave(&lc->blocks_lock, flags); | ||
676 | if (block->flags & LOG_FLUSH_FLAG) { | ||
677 | list_splice_tail_init(&block->list, &lc->logging_blocks); | ||
678 | list_add_tail(&block->list, &lc->logging_blocks); | ||
679 | wake_up_process(lc->log_kthread); | ||
680 | } else if (block->flags & LOG_FUA_FLAG) { | ||
681 | list_add_tail(&block->list, &lc->logging_blocks); | ||
682 | wake_up_process(lc->log_kthread); | ||
683 | } else | ||
684 | list_add_tail(&block->list, &lc->unflushed_blocks); | ||
685 | spin_unlock_irqrestore(&lc->blocks_lock, flags); | ||
686 | } | ||
687 | |||
688 | return error; | ||
689 | } | ||
690 | |||
691 | /* | ||
692 | * INFO format: <logged entries> <highest allocated sector> | ||
693 | */ | ||
694 | static void log_writes_status(struct dm_target *ti, status_type_t type, | ||
695 | unsigned status_flags, char *result, | ||
696 | unsigned maxlen) | ||
697 | { | ||
698 | unsigned sz = 0; | ||
699 | struct log_writes_c *lc = ti->private; | ||
700 | |||
701 | switch (type) { | ||
702 | case STATUSTYPE_INFO: | ||
703 | DMEMIT("%llu %llu", lc->logged_entries, | ||
704 | (unsigned long long)lc->next_sector - 1); | ||
705 | if (!lc->logging_enabled) | ||
706 | DMEMIT(" logging_disabled"); | ||
707 | break; | ||
708 | |||
709 | case STATUSTYPE_TABLE: | ||
710 | DMEMIT("%s %s", lc->dev->name, lc->logdev->name); | ||
711 | break; | ||
712 | } | ||
713 | } | ||
714 | |||
715 | static int log_writes_ioctl(struct dm_target *ti, unsigned int cmd, | ||
716 | unsigned long arg) | ||
717 | { | ||
718 | struct log_writes_c *lc = ti->private; | ||
719 | struct dm_dev *dev = lc->dev; | ||
720 | int r = 0; | ||
721 | |||
722 | /* | ||
723 | * Only pass ioctls through if the device sizes match exactly. | ||
724 | */ | ||
725 | if (ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT) | ||
726 | r = scsi_verify_blk_ioctl(NULL, cmd); | ||
727 | |||
728 | return r ? : __blkdev_driver_ioctl(dev->bdev, dev->mode, cmd, arg); | ||
729 | } | ||
730 | |||
731 | static int log_writes_merge(struct dm_target *ti, struct bvec_merge_data *bvm, | ||
732 | struct bio_vec *biovec, int max_size) | ||
733 | { | ||
734 | struct log_writes_c *lc = ti->private; | ||
735 | struct request_queue *q = bdev_get_queue(lc->dev->bdev); | ||
736 | |||
737 | if (!q->merge_bvec_fn) | ||
738 | return max_size; | ||
739 | |||
740 | bvm->bi_bdev = lc->dev->bdev; | ||
741 | bvm->bi_sector = dm_target_offset(ti, bvm->bi_sector); | ||
742 | |||
743 | return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); | ||
744 | } | ||
745 | |||
746 | static int log_writes_iterate_devices(struct dm_target *ti, | ||
747 | iterate_devices_callout_fn fn, | ||
748 | void *data) | ||
749 | { | ||
750 | struct log_writes_c *lc = ti->private; | ||
751 | |||
752 | return fn(ti, lc->dev, 0, ti->len, data); | ||
753 | } | ||
754 | |||
755 | /* | ||
756 | * Messages supported: | ||
757 | * mark <mark data> - specify the marked data. | ||
758 | */ | ||
759 | static int log_writes_message(struct dm_target *ti, unsigned argc, char **argv) | ||
760 | { | ||
761 | int r = -EINVAL; | ||
762 | struct log_writes_c *lc = ti->private; | ||
763 | |||
764 | if (argc != 2) { | ||
765 | DMWARN("Invalid log-writes message arguments, expect 2 arguments, got %d", argc); | ||
766 | return r; | ||
767 | } | ||
768 | |||
769 | if (!strcasecmp(argv[0], "mark")) | ||
770 | r = log_mark(lc, argv[1]); | ||
771 | else | ||
772 | DMWARN("Unrecognised log writes target message received: %s", argv[0]); | ||
773 | |||
774 | return r; | ||
775 | } | ||
776 | |||
777 | static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limits) | ||
778 | { | ||
779 | struct log_writes_c *lc = ti->private; | ||
780 | struct request_queue *q = bdev_get_queue(lc->dev->bdev); | ||
781 | |||
782 | if (!q || !blk_queue_discard(q)) { | ||
783 | lc->device_supports_discard = false; | ||
784 | limits->discard_granularity = 1 << SECTOR_SHIFT; | ||
785 | limits->max_discard_sectors = (UINT_MAX >> SECTOR_SHIFT); | ||
786 | } | ||
787 | } | ||
788 | |||
789 | static struct target_type log_writes_target = { | ||
790 | .name = "log-writes", | ||
791 | .version = {1, 0, 0}, | ||
792 | .module = THIS_MODULE, | ||
793 | .ctr = log_writes_ctr, | ||
794 | .dtr = log_writes_dtr, | ||
795 | .map = log_writes_map, | ||
796 | .end_io = normal_end_io, | ||
797 | .status = log_writes_status, | ||
798 | .ioctl = log_writes_ioctl, | ||
799 | .merge = log_writes_merge, | ||
800 | .message = log_writes_message, | ||
801 | .iterate_devices = log_writes_iterate_devices, | ||
802 | .io_hints = log_writes_io_hints, | ||
803 | }; | ||
804 | |||
805 | static int __init dm_log_writes_init(void) | ||
806 | { | ||
807 | int r = dm_register_target(&log_writes_target); | ||
808 | |||
809 | if (r < 0) | ||
810 | DMERR("register failed %d", r); | ||
811 | |||
812 | return r; | ||
813 | } | ||
814 | |||
815 | static void __exit dm_log_writes_exit(void) | ||
816 | { | ||
817 | dm_unregister_target(&log_writes_target); | ||
818 | } | ||
819 | |||
820 | module_init(dm_log_writes_init); | ||
821 | module_exit(dm_log_writes_exit); | ||
822 | |||
823 | MODULE_DESCRIPTION(DM_NAME " log writes target"); | ||
824 | MODULE_AUTHOR("Josef Bacik <jbacik@fb.com>"); | ||
825 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index d376dc87716e..63953477a07c 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c | |||
@@ -428,7 +428,7 @@ static int __multipath_map(struct dm_target *ti, struct request *clone, | |||
428 | } else { | 428 | } else { |
429 | /* blk-mq request-based interface */ | 429 | /* blk-mq request-based interface */ |
430 | *__clone = blk_get_request(bdev_get_queue(bdev), | 430 | *__clone = blk_get_request(bdev_get_queue(bdev), |
431 | rq_data_dir(rq), GFP_KERNEL); | 431 | rq_data_dir(rq), GFP_ATOMIC); |
432 | if (IS_ERR(*__clone)) | 432 | if (IS_ERR(*__clone)) |
433 | /* ENOMEM, requeue */ | 433 | /* ENOMEM, requeue */ |
434 | return r; | 434 | return r; |
@@ -1627,7 +1627,7 @@ static int __pgpath_busy(struct pgpath *pgpath) | |||
1627 | { | 1627 | { |
1628 | struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev); | 1628 | struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev); |
1629 | 1629 | ||
1630 | return dm_underlying_device_busy(q); | 1630 | return blk_lld_busy(q); |
1631 | } | 1631 | } |
1632 | 1632 | ||
1633 | /* | 1633 | /* |
@@ -1703,7 +1703,7 @@ out: | |||
1703 | *---------------------------------------------------------------*/ | 1703 | *---------------------------------------------------------------*/ |
1704 | static struct target_type multipath_target = { | 1704 | static struct target_type multipath_target = { |
1705 | .name = "multipath", | 1705 | .name = "multipath", |
1706 | .version = {1, 8, 0}, | 1706 | .version = {1, 9, 0}, |
1707 | .module = THIS_MODULE, | 1707 | .module = THIS_MODULE, |
1708 | .ctr = multipath_ctr, | 1708 | .ctr = multipath_ctr, |
1709 | .dtr = multipath_dtr, | 1709 | .dtr = multipath_dtr, |
diff --git a/drivers/md/dm-sysfs.c b/drivers/md/dm-sysfs.c index c62c5ab6aed5..7e818f5f1dc4 100644 --- a/drivers/md/dm-sysfs.c +++ b/drivers/md/dm-sysfs.c | |||
@@ -11,7 +11,7 @@ | |||
11 | struct dm_sysfs_attr { | 11 | struct dm_sysfs_attr { |
12 | struct attribute attr; | 12 | struct attribute attr; |
13 | ssize_t (*show)(struct mapped_device *, char *); | 13 | ssize_t (*show)(struct mapped_device *, char *); |
14 | ssize_t (*store)(struct mapped_device *, char *); | 14 | ssize_t (*store)(struct mapped_device *, const char *, size_t count); |
15 | }; | 15 | }; |
16 | 16 | ||
17 | #define DM_ATTR_RO(_name) \ | 17 | #define DM_ATTR_RO(_name) \ |
@@ -39,6 +39,31 @@ static ssize_t dm_attr_show(struct kobject *kobj, struct attribute *attr, | |||
39 | return ret; | 39 | return ret; |
40 | } | 40 | } |
41 | 41 | ||
42 | #define DM_ATTR_RW(_name) \ | ||
43 | struct dm_sysfs_attr dm_attr_##_name = \ | ||
44 | __ATTR(_name, S_IRUGO | S_IWUSR, dm_attr_##_name##_show, dm_attr_##_name##_store) | ||
45 | |||
46 | static ssize_t dm_attr_store(struct kobject *kobj, struct attribute *attr, | ||
47 | const char *page, size_t count) | ||
48 | { | ||
49 | struct dm_sysfs_attr *dm_attr; | ||
50 | struct mapped_device *md; | ||
51 | ssize_t ret; | ||
52 | |||
53 | dm_attr = container_of(attr, struct dm_sysfs_attr, attr); | ||
54 | if (!dm_attr->store) | ||
55 | return -EIO; | ||
56 | |||
57 | md = dm_get_from_kobject(kobj); | ||
58 | if (!md) | ||
59 | return -EINVAL; | ||
60 | |||
61 | ret = dm_attr->store(md, page, count); | ||
62 | dm_put(md); | ||
63 | |||
64 | return ret; | ||
65 | } | ||
66 | |||
42 | static ssize_t dm_attr_name_show(struct mapped_device *md, char *buf) | 67 | static ssize_t dm_attr_name_show(struct mapped_device *md, char *buf) |
43 | { | 68 | { |
44 | if (dm_copy_name_and_uuid(md, buf, NULL)) | 69 | if (dm_copy_name_and_uuid(md, buf, NULL)) |
@@ -64,25 +89,33 @@ static ssize_t dm_attr_suspended_show(struct mapped_device *md, char *buf) | |||
64 | return strlen(buf); | 89 | return strlen(buf); |
65 | } | 90 | } |
66 | 91 | ||
92 | static ssize_t dm_attr_use_blk_mq_show(struct mapped_device *md, char *buf) | ||
93 | { | ||
94 | sprintf(buf, "%d\n", dm_use_blk_mq(md)); | ||
95 | |||
96 | return strlen(buf); | ||
97 | } | ||
98 | |||
67 | static DM_ATTR_RO(name); | 99 | static DM_ATTR_RO(name); |
68 | static DM_ATTR_RO(uuid); | 100 | static DM_ATTR_RO(uuid); |
69 | static DM_ATTR_RO(suspended); | 101 | static DM_ATTR_RO(suspended); |
102 | static DM_ATTR_RO(use_blk_mq); | ||
103 | static DM_ATTR_RW(rq_based_seq_io_merge_deadline); | ||
70 | 104 | ||
71 | static struct attribute *dm_attrs[] = { | 105 | static struct attribute *dm_attrs[] = { |
72 | &dm_attr_name.attr, | 106 | &dm_attr_name.attr, |
73 | &dm_attr_uuid.attr, | 107 | &dm_attr_uuid.attr, |
74 | &dm_attr_suspended.attr, | 108 | &dm_attr_suspended.attr, |
109 | &dm_attr_use_blk_mq.attr, | ||
110 | &dm_attr_rq_based_seq_io_merge_deadline.attr, | ||
75 | NULL, | 111 | NULL, |
76 | }; | 112 | }; |
77 | 113 | ||
78 | static const struct sysfs_ops dm_sysfs_ops = { | 114 | static const struct sysfs_ops dm_sysfs_ops = { |
79 | .show = dm_attr_show, | 115 | .show = dm_attr_show, |
116 | .store = dm_attr_store, | ||
80 | }; | 117 | }; |
81 | 118 | ||
82 | /* | ||
83 | * dm kobject is embedded in mapped_device structure | ||
84 | * no need to define release function here | ||
85 | */ | ||
86 | static struct kobj_type dm_ktype = { | 119 | static struct kobj_type dm_ktype = { |
87 | .sysfs_ops = &dm_sysfs_ops, | 120 | .sysfs_ops = &dm_sysfs_ops, |
88 | .default_attrs = dm_attrs, | 121 | .default_attrs = dm_attrs, |
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 6554d9148927..d9b00b8565c6 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c | |||
@@ -18,6 +18,8 @@ | |||
18 | #include <linux/mutex.h> | 18 | #include <linux/mutex.h> |
19 | #include <linux/delay.h> | 19 | #include <linux/delay.h> |
20 | #include <linux/atomic.h> | 20 | #include <linux/atomic.h> |
21 | #include <linux/blk-mq.h> | ||
22 | #include <linux/mount.h> | ||
21 | 23 | ||
22 | #define DM_MSG_PREFIX "table" | 24 | #define DM_MSG_PREFIX "table" |
23 | 25 | ||
@@ -372,23 +374,18 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, | |||
372 | int r; | 374 | int r; |
373 | dev_t uninitialized_var(dev); | 375 | dev_t uninitialized_var(dev); |
374 | struct dm_dev_internal *dd; | 376 | struct dm_dev_internal *dd; |
375 | unsigned int major, minor; | ||
376 | struct dm_table *t = ti->table; | 377 | struct dm_table *t = ti->table; |
377 | char dummy; | 378 | struct block_device *bdev; |
378 | 379 | ||
379 | BUG_ON(!t); | 380 | BUG_ON(!t); |
380 | 381 | ||
381 | if (sscanf(path, "%u:%u%c", &major, &minor, &dummy) == 2) { | 382 | /* convert the path to a device */ |
382 | /* Extract the major/minor numbers */ | 383 | bdev = lookup_bdev(path); |
383 | dev = MKDEV(major, minor); | 384 | if (IS_ERR(bdev)) { |
384 | if (MAJOR(dev) != major || MINOR(dev) != minor) | 385 | dev = name_to_dev_t(path); |
385 | return -EOVERFLOW; | 386 | if (!dev) |
387 | return -ENODEV; | ||
386 | } else { | 388 | } else { |
387 | /* convert the path to a device */ | ||
388 | struct block_device *bdev = lookup_bdev(path); | ||
389 | |||
390 | if (IS_ERR(bdev)) | ||
391 | return PTR_ERR(bdev); | ||
392 | dev = bdev->bd_dev; | 389 | dev = bdev->bd_dev; |
393 | bdput(bdev); | 390 | bdput(bdev); |
394 | } | 391 | } |
@@ -939,7 +936,7 @@ bool dm_table_mq_request_based(struct dm_table *t) | |||
939 | return dm_table_get_type(t) == DM_TYPE_MQ_REQUEST_BASED; | 936 | return dm_table_get_type(t) == DM_TYPE_MQ_REQUEST_BASED; |
940 | } | 937 | } |
941 | 938 | ||
942 | static int dm_table_alloc_md_mempools(struct dm_table *t) | 939 | static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md) |
943 | { | 940 | { |
944 | unsigned type = dm_table_get_type(t); | 941 | unsigned type = dm_table_get_type(t); |
945 | unsigned per_bio_data_size = 0; | 942 | unsigned per_bio_data_size = 0; |
@@ -957,7 +954,7 @@ static int dm_table_alloc_md_mempools(struct dm_table *t) | |||
957 | per_bio_data_size = max(per_bio_data_size, tgt->per_bio_data_size); | 954 | per_bio_data_size = max(per_bio_data_size, tgt->per_bio_data_size); |
958 | } | 955 | } |
959 | 956 | ||
960 | t->mempools = dm_alloc_md_mempools(type, t->integrity_supported, per_bio_data_size); | 957 | t->mempools = dm_alloc_md_mempools(md, type, t->integrity_supported, per_bio_data_size); |
961 | if (!t->mempools) | 958 | if (!t->mempools) |
962 | return -ENOMEM; | 959 | return -ENOMEM; |
963 | 960 | ||
@@ -1127,7 +1124,7 @@ int dm_table_complete(struct dm_table *t) | |||
1127 | return r; | 1124 | return r; |
1128 | } | 1125 | } |
1129 | 1126 | ||
1130 | r = dm_table_alloc_md_mempools(t); | 1127 | r = dm_table_alloc_md_mempools(t, t->md); |
1131 | if (r) | 1128 | if (r) |
1132 | DMERR("unable to allocate mempools"); | 1129 | DMERR("unable to allocate mempools"); |
1133 | 1130 | ||
@@ -1339,14 +1336,14 @@ static bool dm_table_supports_flush(struct dm_table *t, unsigned flush) | |||
1339 | continue; | 1336 | continue; |
1340 | 1337 | ||
1341 | if (ti->flush_supported) | 1338 | if (ti->flush_supported) |
1342 | return 1; | 1339 | return true; |
1343 | 1340 | ||
1344 | if (ti->type->iterate_devices && | 1341 | if (ti->type->iterate_devices && |
1345 | ti->type->iterate_devices(ti, device_flush_capable, &flush)) | 1342 | ti->type->iterate_devices(ti, device_flush_capable, &flush)) |
1346 | return 1; | 1343 | return true; |
1347 | } | 1344 | } |
1348 | 1345 | ||
1349 | return 0; | 1346 | return false; |
1350 | } | 1347 | } |
1351 | 1348 | ||
1352 | static bool dm_table_discard_zeroes_data(struct dm_table *t) | 1349 | static bool dm_table_discard_zeroes_data(struct dm_table *t) |
@@ -1359,10 +1356,10 @@ static bool dm_table_discard_zeroes_data(struct dm_table *t) | |||
1359 | ti = dm_table_get_target(t, i++); | 1356 | ti = dm_table_get_target(t, i++); |
1360 | 1357 | ||
1361 | if (ti->discard_zeroes_data_unsupported) | 1358 | if (ti->discard_zeroes_data_unsupported) |
1362 | return 0; | 1359 | return false; |
1363 | } | 1360 | } |
1364 | 1361 | ||
1365 | return 1; | 1362 | return true; |
1366 | } | 1363 | } |
1367 | 1364 | ||
1368 | static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev, | 1365 | static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev, |
@@ -1408,10 +1405,10 @@ static bool dm_table_all_devices_attribute(struct dm_table *t, | |||
1408 | 1405 | ||
1409 | if (!ti->type->iterate_devices || | 1406 | if (!ti->type->iterate_devices || |
1410 | !ti->type->iterate_devices(ti, func, NULL)) | 1407 | !ti->type->iterate_devices(ti, func, NULL)) |
1411 | return 0; | 1408 | return false; |
1412 | } | 1409 | } |
1413 | 1410 | ||
1414 | return 1; | 1411 | return true; |
1415 | } | 1412 | } |
1416 | 1413 | ||
1417 | static int device_not_write_same_capable(struct dm_target *ti, struct dm_dev *dev, | 1414 | static int device_not_write_same_capable(struct dm_target *ti, struct dm_dev *dev, |
@@ -1468,14 +1465,14 @@ static bool dm_table_supports_discards(struct dm_table *t) | |||
1468 | continue; | 1465 | continue; |
1469 | 1466 | ||
1470 | if (ti->discards_supported) | 1467 | if (ti->discards_supported) |
1471 | return 1; | 1468 | return true; |
1472 | 1469 | ||
1473 | if (ti->type->iterate_devices && | 1470 | if (ti->type->iterate_devices && |
1474 | ti->type->iterate_devices(ti, device_discard_capable, NULL)) | 1471 | ti->type->iterate_devices(ti, device_discard_capable, NULL)) |
1475 | return 1; | 1472 | return true; |
1476 | } | 1473 | } |
1477 | 1474 | ||
1478 | return 0; | 1475 | return false; |
1479 | } | 1476 | } |
1480 | 1477 | ||
1481 | void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, | 1478 | void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, |
@@ -1677,20 +1674,6 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits) | |||
1677 | return r; | 1674 | return r; |
1678 | } | 1675 | } |
1679 | 1676 | ||
1680 | int dm_table_any_busy_target(struct dm_table *t) | ||
1681 | { | ||
1682 | unsigned i; | ||
1683 | struct dm_target *ti; | ||
1684 | |||
1685 | for (i = 0; i < t->num_targets; i++) { | ||
1686 | ti = t->targets + i; | ||
1687 | if (ti->type->busy && ti->type->busy(ti)) | ||
1688 | return 1; | ||
1689 | } | ||
1690 | |||
1691 | return 0; | ||
1692 | } | ||
1693 | |||
1694 | struct mapped_device *dm_table_get_md(struct dm_table *t) | 1677 | struct mapped_device *dm_table_get_md(struct dm_table *t) |
1695 | { | 1678 | { |
1696 | return t->md; | 1679 | return t->md; |
@@ -1709,9 +1692,13 @@ void dm_table_run_md_queue_async(struct dm_table *t) | |||
1709 | md = dm_table_get_md(t); | 1692 | md = dm_table_get_md(t); |
1710 | queue = dm_get_md_queue(md); | 1693 | queue = dm_get_md_queue(md); |
1711 | if (queue) { | 1694 | if (queue) { |
1712 | spin_lock_irqsave(queue->queue_lock, flags); | 1695 | if (queue->mq_ops) |
1713 | blk_run_queue_async(queue); | 1696 | blk_mq_run_hw_queues(queue, true); |
1714 | spin_unlock_irqrestore(queue->queue_lock, flags); | 1697 | else { |
1698 | spin_lock_irqsave(queue->queue_lock, flags); | ||
1699 | blk_run_queue_async(queue); | ||
1700 | spin_unlock_irqrestore(queue->queue_lock, flags); | ||
1701 | } | ||
1715 | } | 1702 | } |
1716 | } | 1703 | } |
1717 | EXPORT_SYMBOL(dm_table_run_md_queue_async); | 1704 | EXPORT_SYMBOL(dm_table_run_md_queue_async); |
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 7a7bab8947ae..66616db33e6f 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c | |||
@@ -18,20 +18,39 @@ | |||
18 | 18 | ||
19 | #include <linux/module.h> | 19 | #include <linux/module.h> |
20 | #include <linux/device-mapper.h> | 20 | #include <linux/device-mapper.h> |
21 | #include <linux/reboot.h> | ||
21 | #include <crypto/hash.h> | 22 | #include <crypto/hash.h> |
22 | 23 | ||
23 | #define DM_MSG_PREFIX "verity" | 24 | #define DM_MSG_PREFIX "verity" |
24 | 25 | ||
26 | #define DM_VERITY_ENV_LENGTH 42 | ||
27 | #define DM_VERITY_ENV_VAR_NAME "DM_VERITY_ERR_BLOCK_NR" | ||
28 | |||
25 | #define DM_VERITY_IO_VEC_INLINE 16 | 29 | #define DM_VERITY_IO_VEC_INLINE 16 |
26 | #define DM_VERITY_MEMPOOL_SIZE 4 | 30 | #define DM_VERITY_MEMPOOL_SIZE 4 |
27 | #define DM_VERITY_DEFAULT_PREFETCH_SIZE 262144 | 31 | #define DM_VERITY_DEFAULT_PREFETCH_SIZE 262144 |
28 | 32 | ||
29 | #define DM_VERITY_MAX_LEVELS 63 | 33 | #define DM_VERITY_MAX_LEVELS 63 |
34 | #define DM_VERITY_MAX_CORRUPTED_ERRS 100 | ||
35 | |||
36 | #define DM_VERITY_OPT_LOGGING "ignore_corruption" | ||
37 | #define DM_VERITY_OPT_RESTART "restart_on_corruption" | ||
30 | 38 | ||
31 | static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE; | 39 | static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE; |
32 | 40 | ||
33 | module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, S_IRUGO | S_IWUSR); | 41 | module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, S_IRUGO | S_IWUSR); |
34 | 42 | ||
43 | enum verity_mode { | ||
44 | DM_VERITY_MODE_EIO, | ||
45 | DM_VERITY_MODE_LOGGING, | ||
46 | DM_VERITY_MODE_RESTART | ||
47 | }; | ||
48 | |||
49 | enum verity_block_type { | ||
50 | DM_VERITY_BLOCK_TYPE_DATA, | ||
51 | DM_VERITY_BLOCK_TYPE_METADATA | ||
52 | }; | ||
53 | |||
35 | struct dm_verity { | 54 | struct dm_verity { |
36 | struct dm_dev *data_dev; | 55 | struct dm_dev *data_dev; |
37 | struct dm_dev *hash_dev; | 56 | struct dm_dev *hash_dev; |
@@ -54,6 +73,8 @@ struct dm_verity { | |||
54 | unsigned digest_size; /* digest size for the current hash algorithm */ | 73 | unsigned digest_size; /* digest size for the current hash algorithm */ |
55 | unsigned shash_descsize;/* the size of temporary space for crypto */ | 74 | unsigned shash_descsize;/* the size of temporary space for crypto */ |
56 | int hash_failed; /* set to 1 if hash of any block failed */ | 75 | int hash_failed; /* set to 1 if hash of any block failed */ |
76 | enum verity_mode mode; /* mode for handling verification errors */ | ||
77 | unsigned corrupted_errs;/* Number of errors for corrupted blocks */ | ||
57 | 78 | ||
58 | mempool_t *vec_mempool; /* mempool of bio vector */ | 79 | mempool_t *vec_mempool; /* mempool of bio vector */ |
59 | 80 | ||
@@ -175,6 +196,57 @@ static void verity_hash_at_level(struct dm_verity *v, sector_t block, int level, | |||
175 | } | 196 | } |
176 | 197 | ||
177 | /* | 198 | /* |
199 | * Handle verification errors. | ||
200 | */ | ||
201 | static int verity_handle_err(struct dm_verity *v, enum verity_block_type type, | ||
202 | unsigned long long block) | ||
203 | { | ||
204 | char verity_env[DM_VERITY_ENV_LENGTH]; | ||
205 | char *envp[] = { verity_env, NULL }; | ||
206 | const char *type_str = ""; | ||
207 | struct mapped_device *md = dm_table_get_md(v->ti->table); | ||
208 | |||
209 | /* Corruption should be visible in device status in all modes */ | ||
210 | v->hash_failed = 1; | ||
211 | |||
212 | if (v->corrupted_errs >= DM_VERITY_MAX_CORRUPTED_ERRS) | ||
213 | goto out; | ||
214 | |||
215 | v->corrupted_errs++; | ||
216 | |||
217 | switch (type) { | ||
218 | case DM_VERITY_BLOCK_TYPE_DATA: | ||
219 | type_str = "data"; | ||
220 | break; | ||
221 | case DM_VERITY_BLOCK_TYPE_METADATA: | ||
222 | type_str = "metadata"; | ||
223 | break; | ||
224 | default: | ||
225 | BUG(); | ||
226 | } | ||
227 | |||
228 | DMERR("%s: %s block %llu is corrupted", v->data_dev->name, type_str, | ||
229 | block); | ||
230 | |||
231 | if (v->corrupted_errs == DM_VERITY_MAX_CORRUPTED_ERRS) | ||
232 | DMERR("%s: reached maximum errors", v->data_dev->name); | ||
233 | |||
234 | snprintf(verity_env, DM_VERITY_ENV_LENGTH, "%s=%d,%llu", | ||
235 | DM_VERITY_ENV_VAR_NAME, type, block); | ||
236 | |||
237 | kobject_uevent_env(&disk_to_dev(dm_disk(md))->kobj, KOBJ_CHANGE, envp); | ||
238 | |||
239 | out: | ||
240 | if (v->mode == DM_VERITY_MODE_LOGGING) | ||
241 | return 0; | ||
242 | |||
243 | if (v->mode == DM_VERITY_MODE_RESTART) | ||
244 | kernel_restart("dm-verity device corrupted"); | ||
245 | |||
246 | return 1; | ||
247 | } | ||
248 | |||
249 | /* | ||
178 | * Verify hash of a metadata block pertaining to the specified data block | 250 | * Verify hash of a metadata block pertaining to the specified data block |
179 | * ("block" argument) at a specified level ("level" argument). | 251 | * ("block" argument) at a specified level ("level" argument). |
180 | * | 252 | * |
@@ -251,11 +323,11 @@ static int verity_verify_level(struct dm_verity_io *io, sector_t block, | |||
251 | goto release_ret_r; | 323 | goto release_ret_r; |
252 | } | 324 | } |
253 | if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) { | 325 | if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) { |
254 | DMERR_LIMIT("metadata block %llu is corrupted", | 326 | if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_METADATA, |
255 | (unsigned long long)hash_block); | 327 | hash_block)) { |
256 | v->hash_failed = 1; | 328 | r = -EIO; |
257 | r = -EIO; | 329 | goto release_ret_r; |
258 | goto release_ret_r; | 330 | } |
259 | } else | 331 | } else |
260 | aux->hash_verified = 1; | 332 | aux->hash_verified = 1; |
261 | } | 333 | } |
@@ -367,10 +439,9 @@ test_block_hash: | |||
367 | return r; | 439 | return r; |
368 | } | 440 | } |
369 | if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) { | 441 | if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) { |
370 | DMERR_LIMIT("data block %llu is corrupted", | 442 | if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, |
371 | (unsigned long long)(io->block + b)); | 443 | io->block + b)) |
372 | v->hash_failed = 1; | 444 | return -EIO; |
373 | return -EIO; | ||
374 | } | 445 | } |
375 | } | 446 | } |
376 | 447 | ||
@@ -546,6 +617,19 @@ static void verity_status(struct dm_target *ti, status_type_t type, | |||
546 | else | 617 | else |
547 | for (x = 0; x < v->salt_size; x++) | 618 | for (x = 0; x < v->salt_size; x++) |
548 | DMEMIT("%02x", v->salt[x]); | 619 | DMEMIT("%02x", v->salt[x]); |
620 | if (v->mode != DM_VERITY_MODE_EIO) { | ||
621 | DMEMIT(" 1 "); | ||
622 | switch (v->mode) { | ||
623 | case DM_VERITY_MODE_LOGGING: | ||
624 | DMEMIT(DM_VERITY_OPT_LOGGING); | ||
625 | break; | ||
626 | case DM_VERITY_MODE_RESTART: | ||
627 | DMEMIT(DM_VERITY_OPT_RESTART); | ||
628 | break; | ||
629 | default: | ||
630 | BUG(); | ||
631 | } | ||
632 | } | ||
549 | break; | 633 | break; |
550 | } | 634 | } |
551 | } | 635 | } |
@@ -647,13 +731,19 @@ static void verity_dtr(struct dm_target *ti) | |||
647 | static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) | 731 | static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) |
648 | { | 732 | { |
649 | struct dm_verity *v; | 733 | struct dm_verity *v; |
650 | unsigned num; | 734 | struct dm_arg_set as; |
735 | const char *opt_string; | ||
736 | unsigned int num, opt_params; | ||
651 | unsigned long long num_ll; | 737 | unsigned long long num_ll; |
652 | int r; | 738 | int r; |
653 | int i; | 739 | int i; |
654 | sector_t hash_position; | 740 | sector_t hash_position; |
655 | char dummy; | 741 | char dummy; |
656 | 742 | ||
743 | static struct dm_arg _args[] = { | ||
744 | {0, 1, "Invalid number of feature args"}, | ||
745 | }; | ||
746 | |||
657 | v = kzalloc(sizeof(struct dm_verity), GFP_KERNEL); | 747 | v = kzalloc(sizeof(struct dm_verity), GFP_KERNEL); |
658 | if (!v) { | 748 | if (!v) { |
659 | ti->error = "Cannot allocate verity structure"; | 749 | ti->error = "Cannot allocate verity structure"; |
@@ -668,8 +758,8 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
668 | goto bad; | 758 | goto bad; |
669 | } | 759 | } |
670 | 760 | ||
671 | if (argc != 10) { | 761 | if (argc < 10) { |
672 | ti->error = "Invalid argument count: exactly 10 arguments required"; | 762 | ti->error = "Not enough arguments"; |
673 | r = -EINVAL; | 763 | r = -EINVAL; |
674 | goto bad; | 764 | goto bad; |
675 | } | 765 | } |
@@ -790,6 +880,39 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
790 | } | 880 | } |
791 | } | 881 | } |
792 | 882 | ||
883 | argv += 10; | ||
884 | argc -= 10; | ||
885 | |||
886 | /* Optional parameters */ | ||
887 | if (argc) { | ||
888 | as.argc = argc; | ||
889 | as.argv = argv; | ||
890 | |||
891 | r = dm_read_arg_group(_args, &as, &opt_params, &ti->error); | ||
892 | if (r) | ||
893 | goto bad; | ||
894 | |||
895 | while (opt_params) { | ||
896 | opt_params--; | ||
897 | opt_string = dm_shift_arg(&as); | ||
898 | if (!opt_string) { | ||
899 | ti->error = "Not enough feature arguments"; | ||
900 | r = -EINVAL; | ||
901 | goto bad; | ||
902 | } | ||
903 | |||
904 | if (!strcasecmp(opt_string, DM_VERITY_OPT_LOGGING)) | ||
905 | v->mode = DM_VERITY_MODE_LOGGING; | ||
906 | else if (!strcasecmp(opt_string, DM_VERITY_OPT_RESTART)) | ||
907 | v->mode = DM_VERITY_MODE_RESTART; | ||
908 | else { | ||
909 | ti->error = "Invalid feature arguments"; | ||
910 | r = -EINVAL; | ||
911 | goto bad; | ||
912 | } | ||
913 | } | ||
914 | } | ||
915 | |||
793 | v->hash_per_block_bits = | 916 | v->hash_per_block_bits = |
794 | __fls((1 << v->hash_dev_block_bits) / v->digest_size); | 917 | __fls((1 << v->hash_dev_block_bits) / v->digest_size); |
795 | 918 | ||
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 8001fe9e3434..f8c7ca3e8947 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -21,6 +21,9 @@ | |||
21 | #include <linux/delay.h> | 21 | #include <linux/delay.h> |
22 | #include <linux/wait.h> | 22 | #include <linux/wait.h> |
23 | #include <linux/kthread.h> | 23 | #include <linux/kthread.h> |
24 | #include <linux/ktime.h> | ||
25 | #include <linux/elevator.h> /* for rq_end_sector() */ | ||
26 | #include <linux/blk-mq.h> | ||
24 | 27 | ||
25 | #include <trace/events/block.h> | 28 | #include <trace/events/block.h> |
26 | 29 | ||
@@ -216,8 +219,29 @@ struct mapped_device { | |||
216 | 219 | ||
217 | struct kthread_worker kworker; | 220 | struct kthread_worker kworker; |
218 | struct task_struct *kworker_task; | 221 | struct task_struct *kworker_task; |
222 | |||
223 | /* for request-based merge heuristic in dm_request_fn() */ | ||
224 | unsigned seq_rq_merge_deadline_usecs; | ||
225 | int last_rq_rw; | ||
226 | sector_t last_rq_pos; | ||
227 | ktime_t last_rq_start_time; | ||
228 | |||
229 | /* for blk-mq request-based DM support */ | ||
230 | struct blk_mq_tag_set tag_set; | ||
231 | bool use_blk_mq; | ||
219 | }; | 232 | }; |
220 | 233 | ||
234 | #ifdef CONFIG_DM_MQ_DEFAULT | ||
235 | static bool use_blk_mq = true; | ||
236 | #else | ||
237 | static bool use_blk_mq = false; | ||
238 | #endif | ||
239 | |||
240 | bool dm_use_blk_mq(struct mapped_device *md) | ||
241 | { | ||
242 | return md->use_blk_mq; | ||
243 | } | ||
244 | |||
221 | /* | 245 | /* |
222 | * For mempools pre-allocation at the table loading time. | 246 | * For mempools pre-allocation at the table loading time. |
223 | */ | 247 | */ |
@@ -250,35 +274,35 @@ static unsigned reserved_bio_based_ios = RESERVED_BIO_BASED_IOS; | |||
250 | */ | 274 | */ |
251 | static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS; | 275 | static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS; |
252 | 276 | ||
253 | static unsigned __dm_get_reserved_ios(unsigned *reserved_ios, | 277 | static unsigned __dm_get_module_param(unsigned *module_param, |
254 | unsigned def, unsigned max) | 278 | unsigned def, unsigned max) |
255 | { | 279 | { |
256 | unsigned ios = ACCESS_ONCE(*reserved_ios); | 280 | unsigned param = ACCESS_ONCE(*module_param); |
257 | unsigned modified_ios = 0; | 281 | unsigned modified_param = 0; |
258 | 282 | ||
259 | if (!ios) | 283 | if (!param) |
260 | modified_ios = def; | 284 | modified_param = def; |
261 | else if (ios > max) | 285 | else if (param > max) |
262 | modified_ios = max; | 286 | modified_param = max; |
263 | 287 | ||
264 | if (modified_ios) { | 288 | if (modified_param) { |
265 | (void)cmpxchg(reserved_ios, ios, modified_ios); | 289 | (void)cmpxchg(module_param, param, modified_param); |
266 | ios = modified_ios; | 290 | param = modified_param; |
267 | } | 291 | } |
268 | 292 | ||
269 | return ios; | 293 | return param; |
270 | } | 294 | } |
271 | 295 | ||
272 | unsigned dm_get_reserved_bio_based_ios(void) | 296 | unsigned dm_get_reserved_bio_based_ios(void) |
273 | { | 297 | { |
274 | return __dm_get_reserved_ios(&reserved_bio_based_ios, | 298 | return __dm_get_module_param(&reserved_bio_based_ios, |
275 | RESERVED_BIO_BASED_IOS, RESERVED_MAX_IOS); | 299 | RESERVED_BIO_BASED_IOS, RESERVED_MAX_IOS); |
276 | } | 300 | } |
277 | EXPORT_SYMBOL_GPL(dm_get_reserved_bio_based_ios); | 301 | EXPORT_SYMBOL_GPL(dm_get_reserved_bio_based_ios); |
278 | 302 | ||
279 | unsigned dm_get_reserved_rq_based_ios(void) | 303 | unsigned dm_get_reserved_rq_based_ios(void) |
280 | { | 304 | { |
281 | return __dm_get_reserved_ios(&reserved_rq_based_ios, | 305 | return __dm_get_module_param(&reserved_rq_based_ios, |
282 | RESERVED_REQUEST_BASED_IOS, RESERVED_MAX_IOS); | 306 | RESERVED_REQUEST_BASED_IOS, RESERVED_MAX_IOS); |
283 | } | 307 | } |
284 | EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios); | 308 | EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios); |
@@ -1017,6 +1041,11 @@ static void end_clone_bio(struct bio *clone, int error) | |||
1017 | blk_update_request(tio->orig, 0, nr_bytes); | 1041 | blk_update_request(tio->orig, 0, nr_bytes); |
1018 | } | 1042 | } |
1019 | 1043 | ||
1044 | static struct dm_rq_target_io *tio_from_request(struct request *rq) | ||
1045 | { | ||
1046 | return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special); | ||
1047 | } | ||
1048 | |||
1020 | /* | 1049 | /* |
1021 | * Don't touch any member of the md after calling this function because | 1050 | * Don't touch any member of the md after calling this function because |
1022 | * the md may be freed in dm_put() at the end of this function. | 1051 | * the md may be freed in dm_put() at the end of this function. |
@@ -1024,10 +1053,13 @@ static void end_clone_bio(struct bio *clone, int error) | |||
1024 | */ | 1053 | */ |
1025 | static void rq_completed(struct mapped_device *md, int rw, bool run_queue) | 1054 | static void rq_completed(struct mapped_device *md, int rw, bool run_queue) |
1026 | { | 1055 | { |
1056 | int nr_requests_pending; | ||
1057 | |||
1027 | atomic_dec(&md->pending[rw]); | 1058 | atomic_dec(&md->pending[rw]); |
1028 | 1059 | ||
1029 | /* nudge anyone waiting on suspend queue */ | 1060 | /* nudge anyone waiting on suspend queue */ |
1030 | if (!md_in_flight(md)) | 1061 | nr_requests_pending = md_in_flight(md); |
1062 | if (!nr_requests_pending) | ||
1031 | wake_up(&md->wait); | 1063 | wake_up(&md->wait); |
1032 | 1064 | ||
1033 | /* | 1065 | /* |
@@ -1036,8 +1068,13 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) | |||
1036 | * back into ->request_fn() could deadlock attempting to grab the | 1068 | * back into ->request_fn() could deadlock attempting to grab the |
1037 | * queue lock again. | 1069 | * queue lock again. |
1038 | */ | 1070 | */ |
1039 | if (run_queue) | 1071 | if (run_queue) { |
1040 | blk_run_queue_async(md->queue); | 1072 | if (md->queue->mq_ops) |
1073 | blk_mq_run_hw_queues(md->queue, true); | ||
1074 | else if (!nr_requests_pending || | ||
1075 | (nr_requests_pending >= md->queue->nr_congestion_on)) | ||
1076 | blk_run_queue_async(md->queue); | ||
1077 | } | ||
1041 | 1078 | ||
1042 | /* | 1079 | /* |
1043 | * dm_put() must be at the end of this function. See the comment above | 1080 | * dm_put() must be at the end of this function. See the comment above |
@@ -1048,13 +1085,18 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) | |||
1048 | static void free_rq_clone(struct request *clone) | 1085 | static void free_rq_clone(struct request *clone) |
1049 | { | 1086 | { |
1050 | struct dm_rq_target_io *tio = clone->end_io_data; | 1087 | struct dm_rq_target_io *tio = clone->end_io_data; |
1088 | struct mapped_device *md = tio->md; | ||
1051 | 1089 | ||
1052 | blk_rq_unprep_clone(clone); | 1090 | blk_rq_unprep_clone(clone); |
1053 | if (clone->q && clone->q->mq_ops) | 1091 | |
1092 | if (clone->q->mq_ops) | ||
1054 | tio->ti->type->release_clone_rq(clone); | 1093 | tio->ti->type->release_clone_rq(clone); |
1055 | else | 1094 | else if (!md->queue->mq_ops) |
1056 | free_clone_request(tio->md, clone); | 1095 | /* request_fn queue stacked on request_fn queue(s) */ |
1057 | free_rq_tio(tio); | 1096 | free_clone_request(md, clone); |
1097 | |||
1098 | if (!md->queue->mq_ops) | ||
1099 | free_rq_tio(tio); | ||
1058 | } | 1100 | } |
1059 | 1101 | ||
1060 | /* | 1102 | /* |
@@ -1083,17 +1125,22 @@ static void dm_end_request(struct request *clone, int error) | |||
1083 | } | 1125 | } |
1084 | 1126 | ||
1085 | free_rq_clone(clone); | 1127 | free_rq_clone(clone); |
1086 | blk_end_request_all(rq, error); | 1128 | if (!rq->q->mq_ops) |
1129 | blk_end_request_all(rq, error); | ||
1130 | else | ||
1131 | blk_mq_end_request(rq, error); | ||
1087 | rq_completed(md, rw, true); | 1132 | rq_completed(md, rw, true); |
1088 | } | 1133 | } |
1089 | 1134 | ||
1090 | static void dm_unprep_request(struct request *rq) | 1135 | static void dm_unprep_request(struct request *rq) |
1091 | { | 1136 | { |
1092 | struct dm_rq_target_io *tio = rq->special; | 1137 | struct dm_rq_target_io *tio = tio_from_request(rq); |
1093 | struct request *clone = tio->clone; | 1138 | struct request *clone = tio->clone; |
1094 | 1139 | ||
1095 | rq->special = NULL; | 1140 | if (!rq->q->mq_ops) { |
1096 | rq->cmd_flags &= ~REQ_DONTPREP; | 1141 | rq->special = NULL; |
1142 | rq->cmd_flags &= ~REQ_DONTPREP; | ||
1143 | } | ||
1097 | 1144 | ||
1098 | if (clone) | 1145 | if (clone) |
1099 | free_rq_clone(clone); | 1146 | free_rq_clone(clone); |
@@ -1102,18 +1149,29 @@ static void dm_unprep_request(struct request *rq) | |||
1102 | /* | 1149 | /* |
1103 | * Requeue the original request of a clone. | 1150 | * Requeue the original request of a clone. |
1104 | */ | 1151 | */ |
1105 | static void dm_requeue_unmapped_original_request(struct mapped_device *md, | 1152 | static void old_requeue_request(struct request *rq) |
1106 | struct request *rq) | ||
1107 | { | 1153 | { |
1108 | int rw = rq_data_dir(rq); | ||
1109 | struct request_queue *q = rq->q; | 1154 | struct request_queue *q = rq->q; |
1110 | unsigned long flags; | 1155 | unsigned long flags; |
1111 | 1156 | ||
1112 | dm_unprep_request(rq); | ||
1113 | |||
1114 | spin_lock_irqsave(q->queue_lock, flags); | 1157 | spin_lock_irqsave(q->queue_lock, flags); |
1115 | blk_requeue_request(q, rq); | 1158 | blk_requeue_request(q, rq); |
1116 | spin_unlock_irqrestore(q->queue_lock, flags); | 1159 | spin_unlock_irqrestore(q->queue_lock, flags); |
1160 | } | ||
1161 | |||
1162 | static void dm_requeue_unmapped_original_request(struct mapped_device *md, | ||
1163 | struct request *rq) | ||
1164 | { | ||
1165 | int rw = rq_data_dir(rq); | ||
1166 | |||
1167 | dm_unprep_request(rq); | ||
1168 | |||
1169 | if (!rq->q->mq_ops) | ||
1170 | old_requeue_request(rq); | ||
1171 | else { | ||
1172 | blk_mq_requeue_request(rq); | ||
1173 | blk_mq_kick_requeue_list(rq->q); | ||
1174 | } | ||
1117 | 1175 | ||
1118 | rq_completed(md, rw, false); | 1176 | rq_completed(md, rw, false); |
1119 | } | 1177 | } |
@@ -1125,35 +1183,44 @@ static void dm_requeue_unmapped_request(struct request *clone) | |||
1125 | dm_requeue_unmapped_original_request(tio->md, tio->orig); | 1183 | dm_requeue_unmapped_original_request(tio->md, tio->orig); |
1126 | } | 1184 | } |
1127 | 1185 | ||
1128 | static void __stop_queue(struct request_queue *q) | 1186 | static void old_stop_queue(struct request_queue *q) |
1129 | { | ||
1130 | blk_stop_queue(q); | ||
1131 | } | ||
1132 | |||
1133 | static void stop_queue(struct request_queue *q) | ||
1134 | { | 1187 | { |
1135 | unsigned long flags; | 1188 | unsigned long flags; |
1136 | 1189 | ||
1190 | if (blk_queue_stopped(q)) | ||
1191 | return; | ||
1192 | |||
1137 | spin_lock_irqsave(q->queue_lock, flags); | 1193 | spin_lock_irqsave(q->queue_lock, flags); |
1138 | __stop_queue(q); | 1194 | blk_stop_queue(q); |
1139 | spin_unlock_irqrestore(q->queue_lock, flags); | 1195 | spin_unlock_irqrestore(q->queue_lock, flags); |
1140 | } | 1196 | } |
1141 | 1197 | ||
1142 | static void __start_queue(struct request_queue *q) | 1198 | static void stop_queue(struct request_queue *q) |
1143 | { | 1199 | { |
1144 | if (blk_queue_stopped(q)) | 1200 | if (!q->mq_ops) |
1145 | blk_start_queue(q); | 1201 | old_stop_queue(q); |
1202 | else | ||
1203 | blk_mq_stop_hw_queues(q); | ||
1146 | } | 1204 | } |
1147 | 1205 | ||
1148 | static void start_queue(struct request_queue *q) | 1206 | static void old_start_queue(struct request_queue *q) |
1149 | { | 1207 | { |
1150 | unsigned long flags; | 1208 | unsigned long flags; |
1151 | 1209 | ||
1152 | spin_lock_irqsave(q->queue_lock, flags); | 1210 | spin_lock_irqsave(q->queue_lock, flags); |
1153 | __start_queue(q); | 1211 | if (blk_queue_stopped(q)) |
1212 | blk_start_queue(q); | ||
1154 | spin_unlock_irqrestore(q->queue_lock, flags); | 1213 | spin_unlock_irqrestore(q->queue_lock, flags); |
1155 | } | 1214 | } |
1156 | 1215 | ||
1216 | static void start_queue(struct request_queue *q) | ||
1217 | { | ||
1218 | if (!q->mq_ops) | ||
1219 | old_start_queue(q); | ||
1220 | else | ||
1221 | blk_mq_start_stopped_hw_queues(q, true); | ||
1222 | } | ||
1223 | |||
1157 | static void dm_done(struct request *clone, int error, bool mapped) | 1224 | static void dm_done(struct request *clone, int error, bool mapped) |
1158 | { | 1225 | { |
1159 | int r = error; | 1226 | int r = error; |
@@ -1192,13 +1259,20 @@ static void dm_done(struct request *clone, int error, bool mapped) | |||
1192 | static void dm_softirq_done(struct request *rq) | 1259 | static void dm_softirq_done(struct request *rq) |
1193 | { | 1260 | { |
1194 | bool mapped = true; | 1261 | bool mapped = true; |
1195 | struct dm_rq_target_io *tio = rq->special; | 1262 | struct dm_rq_target_io *tio = tio_from_request(rq); |
1196 | struct request *clone = tio->clone; | 1263 | struct request *clone = tio->clone; |
1264 | int rw; | ||
1197 | 1265 | ||
1198 | if (!clone) { | 1266 | if (!clone) { |
1199 | blk_end_request_all(rq, tio->error); | 1267 | rw = rq_data_dir(rq); |
1200 | rq_completed(tio->md, rq_data_dir(rq), false); | 1268 | if (!rq->q->mq_ops) { |
1201 | free_rq_tio(tio); | 1269 | blk_end_request_all(rq, tio->error); |
1270 | rq_completed(tio->md, rw, false); | ||
1271 | free_rq_tio(tio); | ||
1272 | } else { | ||
1273 | blk_mq_end_request(rq, tio->error); | ||
1274 | rq_completed(tio->md, rw, false); | ||
1275 | } | ||
1202 | return; | 1276 | return; |
1203 | } | 1277 | } |
1204 | 1278 | ||
@@ -1214,7 +1288,7 @@ static void dm_softirq_done(struct request *rq) | |||
1214 | */ | 1288 | */ |
1215 | static void dm_complete_request(struct request *rq, int error) | 1289 | static void dm_complete_request(struct request *rq, int error) |
1216 | { | 1290 | { |
1217 | struct dm_rq_target_io *tio = rq->special; | 1291 | struct dm_rq_target_io *tio = tio_from_request(rq); |
1218 | 1292 | ||
1219 | tio->error = error; | 1293 | tio->error = error; |
1220 | blk_complete_request(rq); | 1294 | blk_complete_request(rq); |
@@ -1233,7 +1307,7 @@ static void dm_kill_unmapped_request(struct request *rq, int error) | |||
1233 | } | 1307 | } |
1234 | 1308 | ||
1235 | /* | 1309 | /* |
1236 | * Called with the clone's queue lock held | 1310 | * Called with the clone's queue lock held (for non-blk-mq) |
1237 | */ | 1311 | */ |
1238 | static void end_clone_request(struct request *clone, int error) | 1312 | static void end_clone_request(struct request *clone, int error) |
1239 | { | 1313 | { |
@@ -1693,7 +1767,7 @@ out: | |||
1693 | * The request function that just remaps the bio built up by | 1767 | * The request function that just remaps the bio built up by |
1694 | * dm_merge_bvec. | 1768 | * dm_merge_bvec. |
1695 | */ | 1769 | */ |
1696 | static void _dm_request(struct request_queue *q, struct bio *bio) | 1770 | static void dm_make_request(struct request_queue *q, struct bio *bio) |
1697 | { | 1771 | { |
1698 | int rw = bio_data_dir(bio); | 1772 | int rw = bio_data_dir(bio); |
1699 | struct mapped_device *md = q->queuedata; | 1773 | struct mapped_device *md = q->queuedata; |
@@ -1725,16 +1799,6 @@ int dm_request_based(struct mapped_device *md) | |||
1725 | return blk_queue_stackable(md->queue); | 1799 | return blk_queue_stackable(md->queue); |
1726 | } | 1800 | } |
1727 | 1801 | ||
1728 | static void dm_request(struct request_queue *q, struct bio *bio) | ||
1729 | { | ||
1730 | struct mapped_device *md = q->queuedata; | ||
1731 | |||
1732 | if (dm_request_based(md)) | ||
1733 | blk_queue_bio(q, bio); | ||
1734 | else | ||
1735 | _dm_request(q, bio); | ||
1736 | } | ||
1737 | |||
1738 | static void dm_dispatch_clone_request(struct request *clone, struct request *rq) | 1802 | static void dm_dispatch_clone_request(struct request *clone, struct request *rq) |
1739 | { | 1803 | { |
1740 | int r; | 1804 | int r; |
@@ -1787,15 +1851,25 @@ static int setup_clone(struct request *clone, struct request *rq, | |||
1787 | static struct request *clone_rq(struct request *rq, struct mapped_device *md, | 1851 | static struct request *clone_rq(struct request *rq, struct mapped_device *md, |
1788 | struct dm_rq_target_io *tio, gfp_t gfp_mask) | 1852 | struct dm_rq_target_io *tio, gfp_t gfp_mask) |
1789 | { | 1853 | { |
1790 | struct request *clone = alloc_clone_request(md, gfp_mask); | 1854 | /* |
1855 | * Do not allocate a clone if tio->clone was already set | ||
1856 | * (see: dm_mq_queue_rq). | ||
1857 | */ | ||
1858 | bool alloc_clone = !tio->clone; | ||
1859 | struct request *clone; | ||
1791 | 1860 | ||
1792 | if (!clone) | 1861 | if (alloc_clone) { |
1793 | return NULL; | 1862 | clone = alloc_clone_request(md, gfp_mask); |
1863 | if (!clone) | ||
1864 | return NULL; | ||
1865 | } else | ||
1866 | clone = tio->clone; | ||
1794 | 1867 | ||
1795 | blk_rq_init(NULL, clone); | 1868 | blk_rq_init(NULL, clone); |
1796 | if (setup_clone(clone, rq, tio, gfp_mask)) { | 1869 | if (setup_clone(clone, rq, tio, gfp_mask)) { |
1797 | /* -ENOMEM */ | 1870 | /* -ENOMEM */ |
1798 | free_clone_request(md, clone); | 1871 | if (alloc_clone) |
1872 | free_clone_request(md, clone); | ||
1799 | return NULL; | 1873 | return NULL; |
1800 | } | 1874 | } |
1801 | 1875 | ||
@@ -1804,6 +1878,19 @@ static struct request *clone_rq(struct request *rq, struct mapped_device *md, | |||
1804 | 1878 | ||
1805 | static void map_tio_request(struct kthread_work *work); | 1879 | static void map_tio_request(struct kthread_work *work); |
1806 | 1880 | ||
1881 | static void init_tio(struct dm_rq_target_io *tio, struct request *rq, | ||
1882 | struct mapped_device *md) | ||
1883 | { | ||
1884 | tio->md = md; | ||
1885 | tio->ti = NULL; | ||
1886 | tio->clone = NULL; | ||
1887 | tio->orig = rq; | ||
1888 | tio->error = 0; | ||
1889 | memset(&tio->info, 0, sizeof(tio->info)); | ||
1890 | if (md->kworker_task) | ||
1891 | init_kthread_work(&tio->work, map_tio_request); | ||
1892 | } | ||
1893 | |||
1807 | static struct dm_rq_target_io *prep_tio(struct request *rq, | 1894 | static struct dm_rq_target_io *prep_tio(struct request *rq, |
1808 | struct mapped_device *md, gfp_t gfp_mask) | 1895 | struct mapped_device *md, gfp_t gfp_mask) |
1809 | { | 1896 | { |
@@ -1815,13 +1902,7 @@ static struct dm_rq_target_io *prep_tio(struct request *rq, | |||
1815 | if (!tio) | 1902 | if (!tio) |
1816 | return NULL; | 1903 | return NULL; |
1817 | 1904 | ||
1818 | tio->md = md; | 1905 | init_tio(tio, rq, md); |
1819 | tio->ti = NULL; | ||
1820 | tio->clone = NULL; | ||
1821 | tio->orig = rq; | ||
1822 | tio->error = 0; | ||
1823 | memset(&tio->info, 0, sizeof(tio->info)); | ||
1824 | init_kthread_work(&tio->work, map_tio_request); | ||
1825 | 1906 | ||
1826 | table = dm_get_live_table(md, &srcu_idx); | 1907 | table = dm_get_live_table(md, &srcu_idx); |
1827 | if (!dm_table_mq_request_based(table)) { | 1908 | if (!dm_table_mq_request_based(table)) { |
@@ -1865,11 +1946,11 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq) | |||
1865 | * DM_MAPIO_REQUEUE : the original request needs to be requeued | 1946 | * DM_MAPIO_REQUEUE : the original request needs to be requeued |
1866 | * < 0 : the request was completed due to failure | 1947 | * < 0 : the request was completed due to failure |
1867 | */ | 1948 | */ |
1868 | static int map_request(struct dm_target *ti, struct request *rq, | 1949 | static int map_request(struct dm_rq_target_io *tio, struct request *rq, |
1869 | struct mapped_device *md) | 1950 | struct mapped_device *md) |
1870 | { | 1951 | { |
1871 | int r; | 1952 | int r; |
1872 | struct dm_rq_target_io *tio = rq->special; | 1953 | struct dm_target *ti = tio->ti; |
1873 | struct request *clone = NULL; | 1954 | struct request *clone = NULL; |
1874 | 1955 | ||
1875 | if (tio->clone) { | 1956 | if (tio->clone) { |
@@ -1884,7 +1965,7 @@ static int map_request(struct dm_target *ti, struct request *rq, | |||
1884 | } | 1965 | } |
1885 | if (IS_ERR(clone)) | 1966 | if (IS_ERR(clone)) |
1886 | return DM_MAPIO_REQUEUE; | 1967 | return DM_MAPIO_REQUEUE; |
1887 | if (setup_clone(clone, rq, tio, GFP_KERNEL)) { | 1968 | if (setup_clone(clone, rq, tio, GFP_ATOMIC)) { |
1888 | /* -ENOMEM */ | 1969 | /* -ENOMEM */ |
1889 | ti->type->release_clone_rq(clone); | 1970 | ti->type->release_clone_rq(clone); |
1890 | return DM_MAPIO_REQUEUE; | 1971 | return DM_MAPIO_REQUEUE; |
@@ -1925,15 +2006,24 @@ static void map_tio_request(struct kthread_work *work) | |||
1925 | struct request *rq = tio->orig; | 2006 | struct request *rq = tio->orig; |
1926 | struct mapped_device *md = tio->md; | 2007 | struct mapped_device *md = tio->md; |
1927 | 2008 | ||
1928 | if (map_request(tio->ti, rq, md) == DM_MAPIO_REQUEUE) | 2009 | if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) |
1929 | dm_requeue_unmapped_original_request(md, rq); | 2010 | dm_requeue_unmapped_original_request(md, rq); |
1930 | } | 2011 | } |
1931 | 2012 | ||
1932 | static void dm_start_request(struct mapped_device *md, struct request *orig) | 2013 | static void dm_start_request(struct mapped_device *md, struct request *orig) |
1933 | { | 2014 | { |
1934 | blk_start_request(orig); | 2015 | if (!orig->q->mq_ops) |
2016 | blk_start_request(orig); | ||
2017 | else | ||
2018 | blk_mq_start_request(orig); | ||
1935 | atomic_inc(&md->pending[rq_data_dir(orig)]); | 2019 | atomic_inc(&md->pending[rq_data_dir(orig)]); |
1936 | 2020 | ||
2021 | if (md->seq_rq_merge_deadline_usecs) { | ||
2022 | md->last_rq_pos = rq_end_sector(orig); | ||
2023 | md->last_rq_rw = rq_data_dir(orig); | ||
2024 | md->last_rq_start_time = ktime_get(); | ||
2025 | } | ||
2026 | |||
1937 | /* | 2027 | /* |
1938 | * Hold the md reference here for the in-flight I/O. | 2028 | * Hold the md reference here for the in-flight I/O. |
1939 | * We can't rely on the reference count by device opener, | 2029 | * We can't rely on the reference count by device opener, |
@@ -1944,6 +2034,45 @@ static void dm_start_request(struct mapped_device *md, struct request *orig) | |||
1944 | dm_get(md); | 2034 | dm_get(md); |
1945 | } | 2035 | } |
1946 | 2036 | ||
2037 | #define MAX_SEQ_RQ_MERGE_DEADLINE_USECS 100000 | ||
2038 | |||
2039 | ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf) | ||
2040 | { | ||
2041 | return sprintf(buf, "%u\n", md->seq_rq_merge_deadline_usecs); | ||
2042 | } | ||
2043 | |||
2044 | ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md, | ||
2045 | const char *buf, size_t count) | ||
2046 | { | ||
2047 | unsigned deadline; | ||
2048 | |||
2049 | if (!dm_request_based(md) || md->use_blk_mq) | ||
2050 | return count; | ||
2051 | |||
2052 | if (kstrtouint(buf, 10, &deadline)) | ||
2053 | return -EINVAL; | ||
2054 | |||
2055 | if (deadline > MAX_SEQ_RQ_MERGE_DEADLINE_USECS) | ||
2056 | deadline = MAX_SEQ_RQ_MERGE_DEADLINE_USECS; | ||
2057 | |||
2058 | md->seq_rq_merge_deadline_usecs = deadline; | ||
2059 | |||
2060 | return count; | ||
2061 | } | ||
2062 | |||
2063 | static bool dm_request_peeked_before_merge_deadline(struct mapped_device *md) | ||
2064 | { | ||
2065 | ktime_t kt_deadline; | ||
2066 | |||
2067 | if (!md->seq_rq_merge_deadline_usecs) | ||
2068 | return false; | ||
2069 | |||
2070 | kt_deadline = ns_to_ktime((u64)md->seq_rq_merge_deadline_usecs * NSEC_PER_USEC); | ||
2071 | kt_deadline = ktime_add_safe(md->last_rq_start_time, kt_deadline); | ||
2072 | |||
2073 | return !ktime_after(ktime_get(), kt_deadline); | ||
2074 | } | ||
2075 | |||
1947 | /* | 2076 | /* |
1948 | * q->request_fn for request-based dm. | 2077 | * q->request_fn for request-based dm. |
1949 | * Called with the queue lock held. | 2078 | * Called with the queue lock held. |
@@ -1967,7 +2096,7 @@ static void dm_request_fn(struct request_queue *q) | |||
1967 | while (!blk_queue_stopped(q)) { | 2096 | while (!blk_queue_stopped(q)) { |
1968 | rq = blk_peek_request(q); | 2097 | rq = blk_peek_request(q); |
1969 | if (!rq) | 2098 | if (!rq) |
1970 | goto delay_and_out; | 2099 | goto out; |
1971 | 2100 | ||
1972 | /* always use block 0 to find the target for flushes for now */ | 2101 | /* always use block 0 to find the target for flushes for now */ |
1973 | pos = 0; | 2102 | pos = 0; |
@@ -1986,12 +2115,17 @@ static void dm_request_fn(struct request_queue *q) | |||
1986 | continue; | 2115 | continue; |
1987 | } | 2116 | } |
1988 | 2117 | ||
2118 | if (dm_request_peeked_before_merge_deadline(md) && | ||
2119 | md_in_flight(md) && rq->bio && rq->bio->bi_vcnt == 1 && | ||
2120 | md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq)) | ||
2121 | goto delay_and_out; | ||
2122 | |||
1989 | if (ti->type->busy && ti->type->busy(ti)) | 2123 | if (ti->type->busy && ti->type->busy(ti)) |
1990 | goto delay_and_out; | 2124 | goto delay_and_out; |
1991 | 2125 | ||
1992 | dm_start_request(md, rq); | 2126 | dm_start_request(md, rq); |
1993 | 2127 | ||
1994 | tio = rq->special; | 2128 | tio = tio_from_request(rq); |
1995 | /* Establish tio->ti before queuing work (map_tio_request) */ | 2129 | /* Establish tio->ti before queuing work (map_tio_request) */ |
1996 | tio->ti = ti; | 2130 | tio->ti = ti; |
1997 | queue_kthread_work(&md->kworker, &tio->work); | 2131 | queue_kthread_work(&md->kworker, &tio->work); |
@@ -2001,33 +2135,11 @@ static void dm_request_fn(struct request_queue *q) | |||
2001 | goto out; | 2135 | goto out; |
2002 | 2136 | ||
2003 | delay_and_out: | 2137 | delay_and_out: |
2004 | blk_delay_queue(q, HZ / 10); | 2138 | blk_delay_queue(q, HZ / 100); |
2005 | out: | 2139 | out: |
2006 | dm_put_live_table(md, srcu_idx); | 2140 | dm_put_live_table(md, srcu_idx); |
2007 | } | 2141 | } |
2008 | 2142 | ||
2009 | int dm_underlying_device_busy(struct request_queue *q) | ||
2010 | { | ||
2011 | return blk_lld_busy(q); | ||
2012 | } | ||
2013 | EXPORT_SYMBOL_GPL(dm_underlying_device_busy); | ||
2014 | |||
2015 | static int dm_lld_busy(struct request_queue *q) | ||
2016 | { | ||
2017 | int r; | ||
2018 | struct mapped_device *md = q->queuedata; | ||
2019 | struct dm_table *map = dm_get_live_table_fast(md); | ||
2020 | |||
2021 | if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) | ||
2022 | r = 1; | ||
2023 | else | ||
2024 | r = dm_table_any_busy_target(map); | ||
2025 | |||
2026 | dm_put_live_table_fast(md); | ||
2027 | |||
2028 | return r; | ||
2029 | } | ||
2030 | |||
2031 | static int dm_any_congested(void *congested_data, int bdi_bits) | 2143 | static int dm_any_congested(void *congested_data, int bdi_bits) |
2032 | { | 2144 | { |
2033 | int r = bdi_bits; | 2145 | int r = bdi_bits; |
@@ -2110,7 +2222,7 @@ static void dm_init_md_queue(struct mapped_device *md) | |||
2110 | { | 2222 | { |
2111 | /* | 2223 | /* |
2112 | * Request-based dm devices cannot be stacked on top of bio-based dm | 2224 | * Request-based dm devices cannot be stacked on top of bio-based dm |
2113 | * devices. The type of this dm device has not been decided yet. | 2225 | * devices. The type of this dm device may not have been decided yet. |
2114 | * The type is decided at the first table loading time. | 2226 | * The type is decided at the first table loading time. |
2115 | * To prevent problematic device stacking, clear the queue flag | 2227 | * To prevent problematic device stacking, clear the queue flag |
2116 | * for request stacking support until then. | 2228 | * for request stacking support until then. |
@@ -2118,13 +2230,21 @@ static void dm_init_md_queue(struct mapped_device *md) | |||
2118 | * This queue is new, so no concurrency on the queue_flags. | 2230 | * This queue is new, so no concurrency on the queue_flags. |
2119 | */ | 2231 | */ |
2120 | queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue); | 2232 | queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue); |
2233 | } | ||
2234 | |||
2235 | static void dm_init_old_md_queue(struct mapped_device *md) | ||
2236 | { | ||
2237 | md->use_blk_mq = false; | ||
2238 | dm_init_md_queue(md); | ||
2121 | 2239 | ||
2240 | /* | ||
2241 | * Initialize aspects of queue that aren't relevant for blk-mq | ||
2242 | */ | ||
2122 | md->queue->queuedata = md; | 2243 | md->queue->queuedata = md; |
2123 | md->queue->backing_dev_info.congested_fn = dm_any_congested; | 2244 | md->queue->backing_dev_info.congested_fn = dm_any_congested; |
2124 | md->queue->backing_dev_info.congested_data = md; | 2245 | md->queue->backing_dev_info.congested_data = md; |
2125 | blk_queue_make_request(md->queue, dm_request); | 2246 | |
2126 | blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); | 2247 | blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); |
2127 | blk_queue_merge_bvec(md->queue, dm_merge_bvec); | ||
2128 | } | 2248 | } |
2129 | 2249 | ||
2130 | /* | 2250 | /* |
@@ -2156,6 +2276,7 @@ static struct mapped_device *alloc_dev(int minor) | |||
2156 | if (r < 0) | 2276 | if (r < 0) |
2157 | goto bad_io_barrier; | 2277 | goto bad_io_barrier; |
2158 | 2278 | ||
2279 | md->use_blk_mq = use_blk_mq; | ||
2159 | md->type = DM_TYPE_NONE; | 2280 | md->type = DM_TYPE_NONE; |
2160 | mutex_init(&md->suspend_lock); | 2281 | mutex_init(&md->suspend_lock); |
2161 | mutex_init(&md->type_lock); | 2282 | mutex_init(&md->type_lock); |
@@ -2267,6 +2388,8 @@ static void free_dev(struct mapped_device *md) | |||
2267 | del_gendisk(md->disk); | 2388 | del_gendisk(md->disk); |
2268 | put_disk(md->disk); | 2389 | put_disk(md->disk); |
2269 | blk_cleanup_queue(md->queue); | 2390 | blk_cleanup_queue(md->queue); |
2391 | if (md->use_blk_mq) | ||
2392 | blk_mq_free_tag_set(&md->tag_set); | ||
2270 | bdput(md->bdev); | 2393 | bdput(md->bdev); |
2271 | free_minor(minor); | 2394 | free_minor(minor); |
2272 | 2395 | ||
@@ -2278,7 +2401,7 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t) | |||
2278 | { | 2401 | { |
2279 | struct dm_md_mempools *p = dm_table_get_md_mempools(t); | 2402 | struct dm_md_mempools *p = dm_table_get_md_mempools(t); |
2280 | 2403 | ||
2281 | if (md->io_pool && md->bs) { | 2404 | if (md->bs) { |
2282 | /* The md already has necessary mempools. */ | 2405 | /* The md already has necessary mempools. */ |
2283 | if (dm_table_get_type(t) == DM_TYPE_BIO_BASED) { | 2406 | if (dm_table_get_type(t) == DM_TYPE_BIO_BASED) { |
2284 | /* | 2407 | /* |
@@ -2310,7 +2433,7 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t) | |||
2310 | p->bs = NULL; | 2433 | p->bs = NULL; |
2311 | 2434 | ||
2312 | out: | 2435 | out: |
2313 | /* mempool bind completed, now no need any mempools in the table */ | 2436 | /* mempool bind completed, no longer need any mempools in the table */ |
2314 | dm_table_free_md_mempools(t); | 2437 | dm_table_free_md_mempools(t); |
2315 | } | 2438 | } |
2316 | 2439 | ||
@@ -2357,7 +2480,7 @@ int dm_queue_merge_is_compulsory(struct request_queue *q) | |||
2357 | if (!q->merge_bvec_fn) | 2480 | if (!q->merge_bvec_fn) |
2358 | return 0; | 2481 | return 0; |
2359 | 2482 | ||
2360 | if (q->make_request_fn == dm_request) { | 2483 | if (q->make_request_fn == dm_make_request) { |
2361 | dev_md = q->queuedata; | 2484 | dev_md = q->queuedata; |
2362 | if (test_bit(DMF_MERGE_IS_OPTIONAL, &dev_md->flags)) | 2485 | if (test_bit(DMF_MERGE_IS_OPTIONAL, &dev_md->flags)) |
2363 | return 0; | 2486 | return 0; |
@@ -2426,7 +2549,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, | |||
2426 | * This must be done before setting the queue restrictions, | 2549 | * This must be done before setting the queue restrictions, |
2427 | * because request-based dm may be run just after the setting. | 2550 | * because request-based dm may be run just after the setting. |
2428 | */ | 2551 | */ |
2429 | if (dm_table_request_based(t) && !blk_queue_stopped(q)) | 2552 | if (dm_table_request_based(t)) |
2430 | stop_queue(q); | 2553 | stop_queue(q); |
2431 | 2554 | ||
2432 | __bind_mempools(md, t); | 2555 | __bind_mempools(md, t); |
@@ -2508,14 +2631,6 @@ unsigned dm_get_md_type(struct mapped_device *md) | |||
2508 | return md->type; | 2631 | return md->type; |
2509 | } | 2632 | } |
2510 | 2633 | ||
2511 | static bool dm_md_type_request_based(struct mapped_device *md) | ||
2512 | { | ||
2513 | unsigned table_type = dm_get_md_type(md); | ||
2514 | |||
2515 | return (table_type == DM_TYPE_REQUEST_BASED || | ||
2516 | table_type == DM_TYPE_MQ_REQUEST_BASED); | ||
2517 | } | ||
2518 | |||
2519 | struct target_type *dm_get_immutable_target_type(struct mapped_device *md) | 2634 | struct target_type *dm_get_immutable_target_type(struct mapped_device *md) |
2520 | { | 2635 | { |
2521 | return md->immutable_target_type; | 2636 | return md->immutable_target_type; |
@@ -2532,6 +2647,14 @@ struct queue_limits *dm_get_queue_limits(struct mapped_device *md) | |||
2532 | } | 2647 | } |
2533 | EXPORT_SYMBOL_GPL(dm_get_queue_limits); | 2648 | EXPORT_SYMBOL_GPL(dm_get_queue_limits); |
2534 | 2649 | ||
2650 | static void init_rq_based_worker_thread(struct mapped_device *md) | ||
2651 | { | ||
2652 | /* Initialize the request-based DM worker thread */ | ||
2653 | init_kthread_worker(&md->kworker); | ||
2654 | md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker, | ||
2655 | "kdmwork-%s", dm_device_name(md)); | ||
2656 | } | ||
2657 | |||
2535 | /* | 2658 | /* |
2536 | * Fully initialize a request-based queue (->elevator, ->request_fn, etc). | 2659 | * Fully initialize a request-based queue (->elevator, ->request_fn, etc). |
2537 | */ | 2660 | */ |
@@ -2540,27 +2663,160 @@ static int dm_init_request_based_queue(struct mapped_device *md) | |||
2540 | struct request_queue *q = NULL; | 2663 | struct request_queue *q = NULL; |
2541 | 2664 | ||
2542 | if (md->queue->elevator) | 2665 | if (md->queue->elevator) |
2543 | return 1; | 2666 | return 0; |
2544 | 2667 | ||
2545 | /* Fully initialize the queue */ | 2668 | /* Fully initialize the queue */ |
2546 | q = blk_init_allocated_queue(md->queue, dm_request_fn, NULL); | 2669 | q = blk_init_allocated_queue(md->queue, dm_request_fn, NULL); |
2547 | if (!q) | 2670 | if (!q) |
2548 | return 0; | 2671 | return -EINVAL; |
2672 | |||
2673 | /* disable dm_request_fn's merge heuristic by default */ | ||
2674 | md->seq_rq_merge_deadline_usecs = 0; | ||
2549 | 2675 | ||
2550 | md->queue = q; | 2676 | md->queue = q; |
2551 | dm_init_md_queue(md); | 2677 | dm_init_old_md_queue(md); |
2552 | blk_queue_softirq_done(md->queue, dm_softirq_done); | 2678 | blk_queue_softirq_done(md->queue, dm_softirq_done); |
2553 | blk_queue_prep_rq(md->queue, dm_prep_fn); | 2679 | blk_queue_prep_rq(md->queue, dm_prep_fn); |
2554 | blk_queue_lld_busy(md->queue, dm_lld_busy); | ||
2555 | 2680 | ||
2556 | /* Also initialize the request-based DM worker thread */ | 2681 | init_rq_based_worker_thread(md); |
2557 | init_kthread_worker(&md->kworker); | ||
2558 | md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker, | ||
2559 | "kdmwork-%s", dm_device_name(md)); | ||
2560 | 2682 | ||
2561 | elv_register_queue(md->queue); | 2683 | elv_register_queue(md->queue); |
2562 | 2684 | ||
2563 | return 1; | 2685 | return 0; |
2686 | } | ||
2687 | |||
2688 | static int dm_mq_init_request(void *data, struct request *rq, | ||
2689 | unsigned int hctx_idx, unsigned int request_idx, | ||
2690 | unsigned int numa_node) | ||
2691 | { | ||
2692 | struct mapped_device *md = data; | ||
2693 | struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq); | ||
2694 | |||
2695 | /* | ||
2696 | * Must initialize md member of tio, otherwise it won't | ||
2697 | * be available in dm_mq_queue_rq. | ||
2698 | */ | ||
2699 | tio->md = md; | ||
2700 | |||
2701 | return 0; | ||
2702 | } | ||
2703 | |||
2704 | static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, | ||
2705 | const struct blk_mq_queue_data *bd) | ||
2706 | { | ||
2707 | struct request *rq = bd->rq; | ||
2708 | struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq); | ||
2709 | struct mapped_device *md = tio->md; | ||
2710 | int srcu_idx; | ||
2711 | struct dm_table *map = dm_get_live_table(md, &srcu_idx); | ||
2712 | struct dm_target *ti; | ||
2713 | sector_t pos; | ||
2714 | |||
2715 | /* always use block 0 to find the target for flushes for now */ | ||
2716 | pos = 0; | ||
2717 | if (!(rq->cmd_flags & REQ_FLUSH)) | ||
2718 | pos = blk_rq_pos(rq); | ||
2719 | |||
2720 | ti = dm_table_find_target(map, pos); | ||
2721 | if (!dm_target_is_valid(ti)) { | ||
2722 | dm_put_live_table(md, srcu_idx); | ||
2723 | DMERR_LIMIT("request attempted access beyond the end of device"); | ||
2724 | /* | ||
2725 | * Must perform setup, that rq_completed() requires, | ||
2726 | * before returning BLK_MQ_RQ_QUEUE_ERROR | ||
2727 | */ | ||
2728 | dm_start_request(md, rq); | ||
2729 | return BLK_MQ_RQ_QUEUE_ERROR; | ||
2730 | } | ||
2731 | dm_put_live_table(md, srcu_idx); | ||
2732 | |||
2733 | if (ti->type->busy && ti->type->busy(ti)) | ||
2734 | return BLK_MQ_RQ_QUEUE_BUSY; | ||
2735 | |||
2736 | dm_start_request(md, rq); | ||
2737 | |||
2738 | /* Init tio using md established in .init_request */ | ||
2739 | init_tio(tio, rq, md); | ||
2740 | |||
2741 | /* | ||
2742 | * Establish tio->ti before queuing work (map_tio_request) | ||
2743 | * or making direct call to map_request(). | ||
2744 | */ | ||
2745 | tio->ti = ti; | ||
2746 | |||
2747 | /* Clone the request if underlying devices aren't blk-mq */ | ||
2748 | if (dm_table_get_type(map) == DM_TYPE_REQUEST_BASED) { | ||
2749 | /* clone request is allocated at the end of the pdu */ | ||
2750 | tio->clone = (void *)blk_mq_rq_to_pdu(rq) + sizeof(struct dm_rq_target_io); | ||
2751 | if (!clone_rq(rq, md, tio, GFP_ATOMIC)) | ||
2752 | return BLK_MQ_RQ_QUEUE_BUSY; | ||
2753 | queue_kthread_work(&md->kworker, &tio->work); | ||
2754 | } else { | ||
2755 | /* Direct call is fine since .queue_rq allows allocations */ | ||
2756 | if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) | ||
2757 | dm_requeue_unmapped_original_request(md, rq); | ||
2758 | } | ||
2759 | |||
2760 | return BLK_MQ_RQ_QUEUE_OK; | ||
2761 | } | ||
2762 | |||
2763 | static struct blk_mq_ops dm_mq_ops = { | ||
2764 | .queue_rq = dm_mq_queue_rq, | ||
2765 | .map_queue = blk_mq_map_queue, | ||
2766 | .complete = dm_softirq_done, | ||
2767 | .init_request = dm_mq_init_request, | ||
2768 | }; | ||
2769 | |||
2770 | static int dm_init_request_based_blk_mq_queue(struct mapped_device *md) | ||
2771 | { | ||
2772 | unsigned md_type = dm_get_md_type(md); | ||
2773 | struct request_queue *q; | ||
2774 | int err; | ||
2775 | |||
2776 | memset(&md->tag_set, 0, sizeof(md->tag_set)); | ||
2777 | md->tag_set.ops = &dm_mq_ops; | ||
2778 | md->tag_set.queue_depth = BLKDEV_MAX_RQ; | ||
2779 | md->tag_set.numa_node = NUMA_NO_NODE; | ||
2780 | md->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; | ||
2781 | md->tag_set.nr_hw_queues = 1; | ||
2782 | if (md_type == DM_TYPE_REQUEST_BASED) { | ||
2783 | /* make the memory for non-blk-mq clone part of the pdu */ | ||
2784 | md->tag_set.cmd_size = sizeof(struct dm_rq_target_io) + sizeof(struct request); | ||
2785 | } else | ||
2786 | md->tag_set.cmd_size = sizeof(struct dm_rq_target_io); | ||
2787 | md->tag_set.driver_data = md; | ||
2788 | |||
2789 | err = blk_mq_alloc_tag_set(&md->tag_set); | ||
2790 | if (err) | ||
2791 | return err; | ||
2792 | |||
2793 | q = blk_mq_init_allocated_queue(&md->tag_set, md->queue); | ||
2794 | if (IS_ERR(q)) { | ||
2795 | err = PTR_ERR(q); | ||
2796 | goto out_tag_set; | ||
2797 | } | ||
2798 | md->queue = q; | ||
2799 | dm_init_md_queue(md); | ||
2800 | |||
2801 | /* backfill 'mq' sysfs registration normally done in blk_register_queue */ | ||
2802 | blk_mq_register_disk(md->disk); | ||
2803 | |||
2804 | if (md_type == DM_TYPE_REQUEST_BASED) | ||
2805 | init_rq_based_worker_thread(md); | ||
2806 | |||
2807 | return 0; | ||
2808 | |||
2809 | out_tag_set: | ||
2810 | blk_mq_free_tag_set(&md->tag_set); | ||
2811 | return err; | ||
2812 | } | ||
2813 | |||
2814 | static unsigned filter_md_type(unsigned type, struct mapped_device *md) | ||
2815 | { | ||
2816 | if (type == DM_TYPE_BIO_BASED) | ||
2817 | return type; | ||
2818 | |||
2819 | return !md->use_blk_mq ? DM_TYPE_REQUEST_BASED : DM_TYPE_MQ_REQUEST_BASED; | ||
2564 | } | 2820 | } |
2565 | 2821 | ||
2566 | /* | 2822 | /* |
@@ -2568,9 +2824,29 @@ static int dm_init_request_based_queue(struct mapped_device *md) | |||
2568 | */ | 2824 | */ |
2569 | int dm_setup_md_queue(struct mapped_device *md) | 2825 | int dm_setup_md_queue(struct mapped_device *md) |
2570 | { | 2826 | { |
2571 | if (dm_md_type_request_based(md) && !dm_init_request_based_queue(md)) { | 2827 | int r; |
2572 | DMWARN("Cannot initialize queue for request-based mapped device"); | 2828 | unsigned md_type = filter_md_type(dm_get_md_type(md), md); |
2573 | return -EINVAL; | 2829 | |
2830 | switch (md_type) { | ||
2831 | case DM_TYPE_REQUEST_BASED: | ||
2832 | r = dm_init_request_based_queue(md); | ||
2833 | if (r) { | ||
2834 | DMWARN("Cannot initialize queue for request-based mapped device"); | ||
2835 | return r; | ||
2836 | } | ||
2837 | break; | ||
2838 | case DM_TYPE_MQ_REQUEST_BASED: | ||
2839 | r = dm_init_request_based_blk_mq_queue(md); | ||
2840 | if (r) { | ||
2841 | DMWARN("Cannot initialize queue for request-based blk-mq mapped device"); | ||
2842 | return r; | ||
2843 | } | ||
2844 | break; | ||
2845 | case DM_TYPE_BIO_BASED: | ||
2846 | dm_init_old_md_queue(md); | ||
2847 | blk_queue_make_request(md->queue, dm_make_request); | ||
2848 | blk_queue_merge_bvec(md->queue, dm_merge_bvec); | ||
2849 | break; | ||
2574 | } | 2850 | } |
2575 | 2851 | ||
2576 | return 0; | 2852 | return 0; |
@@ -2654,7 +2930,7 @@ static void __dm_destroy(struct mapped_device *md, bool wait) | |||
2654 | set_bit(DMF_FREEING, &md->flags); | 2930 | set_bit(DMF_FREEING, &md->flags); |
2655 | spin_unlock(&_minor_lock); | 2931 | spin_unlock(&_minor_lock); |
2656 | 2932 | ||
2657 | if (dm_request_based(md)) | 2933 | if (dm_request_based(md) && md->kworker_task) |
2658 | flush_kthread_worker(&md->kworker); | 2934 | flush_kthread_worker(&md->kworker); |
2659 | 2935 | ||
2660 | /* | 2936 | /* |
@@ -2908,7 +3184,8 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map, | |||
2908 | */ | 3184 | */ |
2909 | if (dm_request_based(md)) { | 3185 | if (dm_request_based(md)) { |
2910 | stop_queue(md->queue); | 3186 | stop_queue(md->queue); |
2911 | flush_kthread_worker(&md->kworker); | 3187 | if (md->kworker_task) |
3188 | flush_kthread_worker(&md->kworker); | ||
2912 | } | 3189 | } |
2913 | 3190 | ||
2914 | flush_workqueue(md->wq); | 3191 | flush_workqueue(md->wq); |
@@ -3206,6 +3483,7 @@ struct gendisk *dm_disk(struct mapped_device *md) | |||
3206 | { | 3483 | { |
3207 | return md->disk; | 3484 | return md->disk; |
3208 | } | 3485 | } |
3486 | EXPORT_SYMBOL_GPL(dm_disk); | ||
3209 | 3487 | ||
3210 | struct kobject *dm_kobject(struct mapped_device *md) | 3488 | struct kobject *dm_kobject(struct mapped_device *md) |
3211 | { | 3489 | { |
@@ -3253,16 +3531,19 @@ int dm_noflush_suspending(struct dm_target *ti) | |||
3253 | } | 3531 | } |
3254 | EXPORT_SYMBOL_GPL(dm_noflush_suspending); | 3532 | EXPORT_SYMBOL_GPL(dm_noflush_suspending); |
3255 | 3533 | ||
3256 | struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, unsigned per_bio_data_size) | 3534 | struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned type, |
3535 | unsigned integrity, unsigned per_bio_data_size) | ||
3257 | { | 3536 | { |
3258 | struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL); | 3537 | struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL); |
3259 | struct kmem_cache *cachep; | 3538 | struct kmem_cache *cachep = NULL; |
3260 | unsigned int pool_size = 0; | 3539 | unsigned int pool_size = 0; |
3261 | unsigned int front_pad; | 3540 | unsigned int front_pad; |
3262 | 3541 | ||
3263 | if (!pools) | 3542 | if (!pools) |
3264 | return NULL; | 3543 | return NULL; |
3265 | 3544 | ||
3545 | type = filter_md_type(type, md); | ||
3546 | |||
3266 | switch (type) { | 3547 | switch (type) { |
3267 | case DM_TYPE_BIO_BASED: | 3548 | case DM_TYPE_BIO_BASED: |
3268 | cachep = _io_cache; | 3549 | cachep = _io_cache; |
@@ -3270,13 +3551,13 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, u | |||
3270 | front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone); | 3551 | front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone); |
3271 | break; | 3552 | break; |
3272 | case DM_TYPE_REQUEST_BASED: | 3553 | case DM_TYPE_REQUEST_BASED: |
3554 | cachep = _rq_tio_cache; | ||
3273 | pool_size = dm_get_reserved_rq_based_ios(); | 3555 | pool_size = dm_get_reserved_rq_based_ios(); |
3274 | pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache); | 3556 | pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache); |
3275 | if (!pools->rq_pool) | 3557 | if (!pools->rq_pool) |
3276 | goto out; | 3558 | goto out; |
3277 | /* fall through to setup remaining rq-based pools */ | 3559 | /* fall through to setup remaining rq-based pools */ |
3278 | case DM_TYPE_MQ_REQUEST_BASED: | 3560 | case DM_TYPE_MQ_REQUEST_BASED: |
3279 | cachep = _rq_tio_cache; | ||
3280 | if (!pool_size) | 3561 | if (!pool_size) |
3281 | pool_size = dm_get_reserved_rq_based_ios(); | 3562 | pool_size = dm_get_reserved_rq_based_ios(); |
3282 | front_pad = offsetof(struct dm_rq_clone_bio_info, clone); | 3563 | front_pad = offsetof(struct dm_rq_clone_bio_info, clone); |
@@ -3284,12 +3565,14 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, u | |||
3284 | WARN_ON(per_bio_data_size != 0); | 3565 | WARN_ON(per_bio_data_size != 0); |
3285 | break; | 3566 | break; |
3286 | default: | 3567 | default: |
3287 | goto out; | 3568 | BUG(); |
3288 | } | 3569 | } |
3289 | 3570 | ||
3290 | pools->io_pool = mempool_create_slab_pool(pool_size, cachep); | 3571 | if (cachep) { |
3291 | if (!pools->io_pool) | 3572 | pools->io_pool = mempool_create_slab_pool(pool_size, cachep); |
3292 | goto out; | 3573 | if (!pools->io_pool) |
3574 | goto out; | ||
3575 | } | ||
3293 | 3576 | ||
3294 | pools->bs = bioset_create_nobvec(pool_size, front_pad); | 3577 | pools->bs = bioset_create_nobvec(pool_size, front_pad); |
3295 | if (!pools->bs) | 3578 | if (!pools->bs) |
@@ -3346,6 +3629,9 @@ MODULE_PARM_DESC(reserved_bio_based_ios, "Reserved IOs in bio-based mempools"); | |||
3346 | module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR); | 3629 | module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR); |
3347 | MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools"); | 3630 | MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools"); |
3348 | 3631 | ||
3632 | module_param(use_blk_mq, bool, S_IRUGO | S_IWUSR); | ||
3633 | MODULE_PARM_DESC(use_blk_mq, "Use block multiqueue for request-based DM devices"); | ||
3634 | |||
3349 | MODULE_DESCRIPTION(DM_NAME " driver"); | 3635 | MODULE_DESCRIPTION(DM_NAME " driver"); |
3350 | MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); | 3636 | MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); |
3351 | MODULE_LICENSE("GPL"); | 3637 | MODULE_LICENSE("GPL"); |
diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 59f53e79db82..6123c2bf9150 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h | |||
@@ -70,7 +70,6 @@ void dm_table_presuspend_undo_targets(struct dm_table *t); | |||
70 | void dm_table_postsuspend_targets(struct dm_table *t); | 70 | void dm_table_postsuspend_targets(struct dm_table *t); |
71 | int dm_table_resume_targets(struct dm_table *t); | 71 | int dm_table_resume_targets(struct dm_table *t); |
72 | int dm_table_any_congested(struct dm_table *t, int bdi_bits); | 72 | int dm_table_any_congested(struct dm_table *t, int bdi_bits); |
73 | int dm_table_any_busy_target(struct dm_table *t); | ||
74 | unsigned dm_table_get_type(struct dm_table *t); | 73 | unsigned dm_table_get_type(struct dm_table *t); |
75 | struct target_type *dm_table_get_immutable_target_type(struct dm_table *t); | 74 | struct target_type *dm_table_get_immutable_target_type(struct dm_table *t); |
76 | bool dm_table_request_based(struct dm_table *t); | 75 | bool dm_table_request_based(struct dm_table *t); |
@@ -212,6 +211,8 @@ int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, | |||
212 | void dm_internal_suspend(struct mapped_device *md); | 211 | void dm_internal_suspend(struct mapped_device *md); |
213 | void dm_internal_resume(struct mapped_device *md); | 212 | void dm_internal_resume(struct mapped_device *md); |
214 | 213 | ||
214 | bool dm_use_blk_mq(struct mapped_device *md); | ||
215 | |||
215 | int dm_io_init(void); | 216 | int dm_io_init(void); |
216 | void dm_io_exit(void); | 217 | void dm_io_exit(void); |
217 | 218 | ||
@@ -221,7 +222,8 @@ void dm_kcopyd_exit(void); | |||
221 | /* | 222 | /* |
222 | * Mempool operations | 223 | * Mempool operations |
223 | */ | 224 | */ |
224 | struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, unsigned per_bio_data_size); | 225 | struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned type, |
226 | unsigned integrity, unsigned per_bio_data_size); | ||
225 | void dm_free_md_mempools(struct dm_md_mempools *pools); | 227 | void dm_free_md_mempools(struct dm_md_mempools *pools); |
226 | 228 | ||
227 | /* | 229 | /* |
@@ -235,4 +237,8 @@ static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen | |||
235 | return !maxlen || strlen(result) + 1 >= maxlen; | 237 | return !maxlen || strlen(result) + 1 >= maxlen; |
236 | } | 238 | } |
237 | 239 | ||
240 | ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf); | ||
241 | ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md, | ||
242 | const char *buf, size_t count); | ||
243 | |||
238 | #endif | 244 | #endif |