diff options
Diffstat (limited to 'drivers/md/dm-thin.c')
-rw-r--r-- | drivers/md/dm-thin.c | 760 |
1 files changed, 607 insertions, 153 deletions
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 0f86d802b533..8735543eacdb 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c | |||
@@ -11,11 +11,13 @@ | |||
11 | #include <linux/device-mapper.h> | 11 | #include <linux/device-mapper.h> |
12 | #include <linux/dm-io.h> | 12 | #include <linux/dm-io.h> |
13 | #include <linux/dm-kcopyd.h> | 13 | #include <linux/dm-kcopyd.h> |
14 | #include <linux/log2.h> | ||
14 | #include <linux/list.h> | 15 | #include <linux/list.h> |
15 | #include <linux/rculist.h> | 16 | #include <linux/rculist.h> |
16 | #include <linux/init.h> | 17 | #include <linux/init.h> |
17 | #include <linux/module.h> | 18 | #include <linux/module.h> |
18 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
20 | #include <linux/sort.h> | ||
19 | #include <linux/rbtree.h> | 21 | #include <linux/rbtree.h> |
20 | 22 | ||
21 | #define DM_MSG_PREFIX "thin" | 23 | #define DM_MSG_PREFIX "thin" |
@@ -25,7 +27,6 @@ | |||
25 | */ | 27 | */ |
26 | #define ENDIO_HOOK_POOL_SIZE 1024 | 28 | #define ENDIO_HOOK_POOL_SIZE 1024 |
27 | #define MAPPING_POOL_SIZE 1024 | 29 | #define MAPPING_POOL_SIZE 1024 |
28 | #define PRISON_CELLS 1024 | ||
29 | #define COMMIT_PERIOD HZ | 30 | #define COMMIT_PERIOD HZ |
30 | #define NO_SPACE_TIMEOUT_SECS 60 | 31 | #define NO_SPACE_TIMEOUT_SECS 60 |
31 | 32 | ||
@@ -114,7 +115,8 @@ static void build_data_key(struct dm_thin_device *td, | |||
114 | { | 115 | { |
115 | key->virtual = 0; | 116 | key->virtual = 0; |
116 | key->dev = dm_thin_dev_id(td); | 117 | key->dev = dm_thin_dev_id(td); |
117 | key->block = b; | 118 | key->block_begin = b; |
119 | key->block_end = b + 1ULL; | ||
118 | } | 120 | } |
119 | 121 | ||
120 | static void build_virtual_key(struct dm_thin_device *td, dm_block_t b, | 122 | static void build_virtual_key(struct dm_thin_device *td, dm_block_t b, |
@@ -122,7 +124,55 @@ static void build_virtual_key(struct dm_thin_device *td, dm_block_t b, | |||
122 | { | 124 | { |
123 | key->virtual = 1; | 125 | key->virtual = 1; |
124 | key->dev = dm_thin_dev_id(td); | 126 | key->dev = dm_thin_dev_id(td); |
125 | key->block = b; | 127 | key->block_begin = b; |
128 | key->block_end = b + 1ULL; | ||
129 | } | ||
130 | |||
131 | /*----------------------------------------------------------------*/ | ||
132 | |||
133 | #define THROTTLE_THRESHOLD (1 * HZ) | ||
134 | |||
135 | struct throttle { | ||
136 | struct rw_semaphore lock; | ||
137 | unsigned long threshold; | ||
138 | bool throttle_applied; | ||
139 | }; | ||
140 | |||
141 | static void throttle_init(struct throttle *t) | ||
142 | { | ||
143 | init_rwsem(&t->lock); | ||
144 | t->throttle_applied = false; | ||
145 | } | ||
146 | |||
147 | static void throttle_work_start(struct throttle *t) | ||
148 | { | ||
149 | t->threshold = jiffies + THROTTLE_THRESHOLD; | ||
150 | } | ||
151 | |||
152 | static void throttle_work_update(struct throttle *t) | ||
153 | { | ||
154 | if (!t->throttle_applied && jiffies > t->threshold) { | ||
155 | down_write(&t->lock); | ||
156 | t->throttle_applied = true; | ||
157 | } | ||
158 | } | ||
159 | |||
160 | static void throttle_work_complete(struct throttle *t) | ||
161 | { | ||
162 | if (t->throttle_applied) { | ||
163 | t->throttle_applied = false; | ||
164 | up_write(&t->lock); | ||
165 | } | ||
166 | } | ||
167 | |||
168 | static void throttle_lock(struct throttle *t) | ||
169 | { | ||
170 | down_read(&t->lock); | ||
171 | } | ||
172 | |||
173 | static void throttle_unlock(struct throttle *t) | ||
174 | { | ||
175 | up_read(&t->lock); | ||
126 | } | 176 | } |
127 | 177 | ||
128 | /*----------------------------------------------------------------*/ | 178 | /*----------------------------------------------------------------*/ |
@@ -155,8 +205,11 @@ struct pool_features { | |||
155 | 205 | ||
156 | struct thin_c; | 206 | struct thin_c; |
157 | typedef void (*process_bio_fn)(struct thin_c *tc, struct bio *bio); | 207 | typedef void (*process_bio_fn)(struct thin_c *tc, struct bio *bio); |
208 | typedef void (*process_cell_fn)(struct thin_c *tc, struct dm_bio_prison_cell *cell); | ||
158 | typedef void (*process_mapping_fn)(struct dm_thin_new_mapping *m); | 209 | typedef void (*process_mapping_fn)(struct dm_thin_new_mapping *m); |
159 | 210 | ||
211 | #define CELL_SORT_ARRAY_SIZE 8192 | ||
212 | |||
160 | struct pool { | 213 | struct pool { |
161 | struct list_head list; | 214 | struct list_head list; |
162 | struct dm_target *ti; /* Only set if a pool target is bound */ | 215 | struct dm_target *ti; /* Only set if a pool target is bound */ |
@@ -171,11 +224,13 @@ struct pool { | |||
171 | 224 | ||
172 | struct pool_features pf; | 225 | struct pool_features pf; |
173 | bool low_water_triggered:1; /* A dm event has been sent */ | 226 | bool low_water_triggered:1; /* A dm event has been sent */ |
227 | bool suspended:1; | ||
174 | 228 | ||
175 | struct dm_bio_prison *prison; | 229 | struct dm_bio_prison *prison; |
176 | struct dm_kcopyd_client *copier; | 230 | struct dm_kcopyd_client *copier; |
177 | 231 | ||
178 | struct workqueue_struct *wq; | 232 | struct workqueue_struct *wq; |
233 | struct throttle throttle; | ||
179 | struct work_struct worker; | 234 | struct work_struct worker; |
180 | struct delayed_work waker; | 235 | struct delayed_work waker; |
181 | struct delayed_work no_space_timeout; | 236 | struct delayed_work no_space_timeout; |
@@ -198,8 +253,13 @@ struct pool { | |||
198 | process_bio_fn process_bio; | 253 | process_bio_fn process_bio; |
199 | process_bio_fn process_discard; | 254 | process_bio_fn process_discard; |
200 | 255 | ||
256 | process_cell_fn process_cell; | ||
257 | process_cell_fn process_discard_cell; | ||
258 | |||
201 | process_mapping_fn process_prepared_mapping; | 259 | process_mapping_fn process_prepared_mapping; |
202 | process_mapping_fn process_prepared_discard; | 260 | process_mapping_fn process_prepared_discard; |
261 | |||
262 | struct dm_bio_prison_cell *cell_sort_array[CELL_SORT_ARRAY_SIZE]; | ||
203 | }; | 263 | }; |
204 | 264 | ||
205 | static enum pool_mode get_pool_mode(struct pool *pool); | 265 | static enum pool_mode get_pool_mode(struct pool *pool); |
@@ -232,8 +292,11 @@ struct thin_c { | |||
232 | 292 | ||
233 | struct pool *pool; | 293 | struct pool *pool; |
234 | struct dm_thin_device *td; | 294 | struct dm_thin_device *td; |
295 | struct mapped_device *thin_md; | ||
296 | |||
235 | bool requeue_mode:1; | 297 | bool requeue_mode:1; |
236 | spinlock_t lock; | 298 | spinlock_t lock; |
299 | struct list_head deferred_cells; | ||
237 | struct bio_list deferred_bio_list; | 300 | struct bio_list deferred_bio_list; |
238 | struct bio_list retry_on_resume_list; | 301 | struct bio_list retry_on_resume_list; |
239 | struct rb_root sort_bio_list; /* sorted list of deferred bios */ | 302 | struct rb_root sort_bio_list; /* sorted list of deferred bios */ |
@@ -290,6 +353,15 @@ static void cell_release(struct pool *pool, | |||
290 | dm_bio_prison_free_cell(pool->prison, cell); | 353 | dm_bio_prison_free_cell(pool->prison, cell); |
291 | } | 354 | } |
292 | 355 | ||
356 | static void cell_visit_release(struct pool *pool, | ||
357 | void (*fn)(void *, struct dm_bio_prison_cell *), | ||
358 | void *context, | ||
359 | struct dm_bio_prison_cell *cell) | ||
360 | { | ||
361 | dm_cell_visit_release(pool->prison, fn, context, cell); | ||
362 | dm_bio_prison_free_cell(pool->prison, cell); | ||
363 | } | ||
364 | |||
293 | static void cell_release_no_holder(struct pool *pool, | 365 | static void cell_release_no_holder(struct pool *pool, |
294 | struct dm_bio_prison_cell *cell, | 366 | struct dm_bio_prison_cell *cell, |
295 | struct bio_list *bios) | 367 | struct bio_list *bios) |
@@ -298,19 +370,6 @@ static void cell_release_no_holder(struct pool *pool, | |||
298 | dm_bio_prison_free_cell(pool->prison, cell); | 370 | dm_bio_prison_free_cell(pool->prison, cell); |
299 | } | 371 | } |
300 | 372 | ||
301 | static void cell_defer_no_holder_no_free(struct thin_c *tc, | ||
302 | struct dm_bio_prison_cell *cell) | ||
303 | { | ||
304 | struct pool *pool = tc->pool; | ||
305 | unsigned long flags; | ||
306 | |||
307 | spin_lock_irqsave(&tc->lock, flags); | ||
308 | dm_cell_release_no_holder(pool->prison, cell, &tc->deferred_bio_list); | ||
309 | spin_unlock_irqrestore(&tc->lock, flags); | ||
310 | |||
311 | wake_worker(pool); | ||
312 | } | ||
313 | |||
314 | static void cell_error_with_code(struct pool *pool, | 373 | static void cell_error_with_code(struct pool *pool, |
315 | struct dm_bio_prison_cell *cell, int error_code) | 374 | struct dm_bio_prison_cell *cell, int error_code) |
316 | { | 375 | { |
@@ -323,6 +382,16 @@ static void cell_error(struct pool *pool, struct dm_bio_prison_cell *cell) | |||
323 | cell_error_with_code(pool, cell, -EIO); | 382 | cell_error_with_code(pool, cell, -EIO); |
324 | } | 383 | } |
325 | 384 | ||
385 | static void cell_success(struct pool *pool, struct dm_bio_prison_cell *cell) | ||
386 | { | ||
387 | cell_error_with_code(pool, cell, 0); | ||
388 | } | ||
389 | |||
390 | static void cell_requeue(struct pool *pool, struct dm_bio_prison_cell *cell) | ||
391 | { | ||
392 | cell_error_with_code(pool, cell, DM_ENDIO_REQUEUE); | ||
393 | } | ||
394 | |||
326 | /*----------------------------------------------------------------*/ | 395 | /*----------------------------------------------------------------*/ |
327 | 396 | ||
328 | /* | 397 | /* |
@@ -393,44 +462,65 @@ struct dm_thin_endio_hook { | |||
393 | struct rb_node rb_node; | 462 | struct rb_node rb_node; |
394 | }; | 463 | }; |
395 | 464 | ||
396 | static void requeue_bio_list(struct thin_c *tc, struct bio_list *master) | 465 | static void __merge_bio_list(struct bio_list *bios, struct bio_list *master) |
466 | { | ||
467 | bio_list_merge(bios, master); | ||
468 | bio_list_init(master); | ||
469 | } | ||
470 | |||
471 | static void error_bio_list(struct bio_list *bios, int error) | ||
397 | { | 472 | { |
398 | struct bio *bio; | 473 | struct bio *bio; |
474 | |||
475 | while ((bio = bio_list_pop(bios))) | ||
476 | bio_endio(bio, error); | ||
477 | } | ||
478 | |||
479 | static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master, int error) | ||
480 | { | ||
399 | struct bio_list bios; | 481 | struct bio_list bios; |
400 | unsigned long flags; | 482 | unsigned long flags; |
401 | 483 | ||
402 | bio_list_init(&bios); | 484 | bio_list_init(&bios); |
403 | 485 | ||
404 | spin_lock_irqsave(&tc->lock, flags); | 486 | spin_lock_irqsave(&tc->lock, flags); |
405 | bio_list_merge(&bios, master); | 487 | __merge_bio_list(&bios, master); |
406 | bio_list_init(master); | ||
407 | spin_unlock_irqrestore(&tc->lock, flags); | 488 | spin_unlock_irqrestore(&tc->lock, flags); |
408 | 489 | ||
409 | while ((bio = bio_list_pop(&bios))) | 490 | error_bio_list(&bios, error); |
410 | bio_endio(bio, DM_ENDIO_REQUEUE); | ||
411 | } | 491 | } |
412 | 492 | ||
413 | static void requeue_io(struct thin_c *tc) | 493 | static void requeue_deferred_cells(struct thin_c *tc) |
414 | { | 494 | { |
415 | requeue_bio_list(tc, &tc->deferred_bio_list); | 495 | struct pool *pool = tc->pool; |
416 | requeue_bio_list(tc, &tc->retry_on_resume_list); | 496 | unsigned long flags; |
497 | struct list_head cells; | ||
498 | struct dm_bio_prison_cell *cell, *tmp; | ||
499 | |||
500 | INIT_LIST_HEAD(&cells); | ||
501 | |||
502 | spin_lock_irqsave(&tc->lock, flags); | ||
503 | list_splice_init(&tc->deferred_cells, &cells); | ||
504 | spin_unlock_irqrestore(&tc->lock, flags); | ||
505 | |||
506 | list_for_each_entry_safe(cell, tmp, &cells, user_list) | ||
507 | cell_requeue(pool, cell); | ||
417 | } | 508 | } |
418 | 509 | ||
419 | static void error_thin_retry_list(struct thin_c *tc) | 510 | static void requeue_io(struct thin_c *tc) |
420 | { | 511 | { |
421 | struct bio *bio; | ||
422 | unsigned long flags; | ||
423 | struct bio_list bios; | 512 | struct bio_list bios; |
513 | unsigned long flags; | ||
424 | 514 | ||
425 | bio_list_init(&bios); | 515 | bio_list_init(&bios); |
426 | 516 | ||
427 | spin_lock_irqsave(&tc->lock, flags); | 517 | spin_lock_irqsave(&tc->lock, flags); |
428 | bio_list_merge(&bios, &tc->retry_on_resume_list); | 518 | __merge_bio_list(&bios, &tc->deferred_bio_list); |
429 | bio_list_init(&tc->retry_on_resume_list); | 519 | __merge_bio_list(&bios, &tc->retry_on_resume_list); |
430 | spin_unlock_irqrestore(&tc->lock, flags); | 520 | spin_unlock_irqrestore(&tc->lock, flags); |
431 | 521 | ||
432 | while ((bio = bio_list_pop(&bios))) | 522 | error_bio_list(&bios, DM_ENDIO_REQUEUE); |
433 | bio_io_error(bio); | 523 | requeue_deferred_cells(tc); |
434 | } | 524 | } |
435 | 525 | ||
436 | static void error_retry_list(struct pool *pool) | 526 | static void error_retry_list(struct pool *pool) |
@@ -439,7 +529,7 @@ static void error_retry_list(struct pool *pool) | |||
439 | 529 | ||
440 | rcu_read_lock(); | 530 | rcu_read_lock(); |
441 | list_for_each_entry_rcu(tc, &pool->active_thins, list) | 531 | list_for_each_entry_rcu(tc, &pool->active_thins, list) |
442 | error_thin_retry_list(tc); | 532 | error_thin_bio_list(tc, &tc->retry_on_resume_list, -EIO); |
443 | rcu_read_unlock(); | 533 | rcu_read_unlock(); |
444 | } | 534 | } |
445 | 535 | ||
@@ -629,33 +719,75 @@ static void overwrite_endio(struct bio *bio, int err) | |||
629 | */ | 719 | */ |
630 | 720 | ||
631 | /* | 721 | /* |
632 | * This sends the bios in the cell back to the deferred_bios list. | 722 | * This sends the bios in the cell, except the original holder, back |
723 | * to the deferred_bios list. | ||
633 | */ | 724 | */ |
634 | static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell) | 725 | static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *cell) |
635 | { | 726 | { |
636 | struct pool *pool = tc->pool; | 727 | struct pool *pool = tc->pool; |
637 | unsigned long flags; | 728 | unsigned long flags; |
638 | 729 | ||
639 | spin_lock_irqsave(&tc->lock, flags); | 730 | spin_lock_irqsave(&tc->lock, flags); |
640 | cell_release(pool, cell, &tc->deferred_bio_list); | 731 | cell_release_no_holder(pool, cell, &tc->deferred_bio_list); |
641 | spin_unlock_irqrestore(&tc->lock, flags); | 732 | spin_unlock_irqrestore(&tc->lock, flags); |
642 | 733 | ||
643 | wake_worker(pool); | 734 | wake_worker(pool); |
644 | } | 735 | } |
645 | 736 | ||
646 | /* | 737 | static void thin_defer_bio(struct thin_c *tc, struct bio *bio); |
647 | * Same as cell_defer above, except it omits the original holder of the cell. | 738 | |
648 | */ | 739 | struct remap_info { |
649 | static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *cell) | 740 | struct thin_c *tc; |
741 | struct bio_list defer_bios; | ||
742 | struct bio_list issue_bios; | ||
743 | }; | ||
744 | |||
745 | static void __inc_remap_and_issue_cell(void *context, | ||
746 | struct dm_bio_prison_cell *cell) | ||
650 | { | 747 | { |
651 | struct pool *pool = tc->pool; | 748 | struct remap_info *info = context; |
652 | unsigned long flags; | 749 | struct bio *bio; |
653 | 750 | ||
654 | spin_lock_irqsave(&tc->lock, flags); | 751 | while ((bio = bio_list_pop(&cell->bios))) { |
655 | cell_release_no_holder(pool, cell, &tc->deferred_bio_list); | 752 | if (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA)) |
656 | spin_unlock_irqrestore(&tc->lock, flags); | 753 | bio_list_add(&info->defer_bios, bio); |
754 | else { | ||
755 | inc_all_io_entry(info->tc->pool, bio); | ||
657 | 756 | ||
658 | wake_worker(pool); | 757 | /* |
758 | * We can't issue the bios with the bio prison lock | ||
759 | * held, so we add them to a list to issue on | ||
760 | * return from this function. | ||
761 | */ | ||
762 | bio_list_add(&info->issue_bios, bio); | ||
763 | } | ||
764 | } | ||
765 | } | ||
766 | |||
767 | static void inc_remap_and_issue_cell(struct thin_c *tc, | ||
768 | struct dm_bio_prison_cell *cell, | ||
769 | dm_block_t block) | ||
770 | { | ||
771 | struct bio *bio; | ||
772 | struct remap_info info; | ||
773 | |||
774 | info.tc = tc; | ||
775 | bio_list_init(&info.defer_bios); | ||
776 | bio_list_init(&info.issue_bios); | ||
777 | |||
778 | /* | ||
779 | * We have to be careful to inc any bios we're about to issue | ||
780 | * before the cell is released, and avoid a race with new bios | ||
781 | * being added to the cell. | ||
782 | */ | ||
783 | cell_visit_release(tc->pool, __inc_remap_and_issue_cell, | ||
784 | &info, cell); | ||
785 | |||
786 | while ((bio = bio_list_pop(&info.defer_bios))) | ||
787 | thin_defer_bio(tc, bio); | ||
788 | |||
789 | while ((bio = bio_list_pop(&info.issue_bios))) | ||
790 | remap_and_issue(info.tc, bio, block); | ||
659 | } | 791 | } |
660 | 792 | ||
661 | static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m) | 793 | static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m) |
@@ -706,10 +838,13 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) | |||
706 | * the bios in the cell. | 838 | * the bios in the cell. |
707 | */ | 839 | */ |
708 | if (bio) { | 840 | if (bio) { |
709 | cell_defer_no_holder(tc, m->cell); | 841 | inc_remap_and_issue_cell(tc, m->cell, m->data_block); |
710 | bio_endio(bio, 0); | 842 | bio_endio(bio, 0); |
711 | } else | 843 | } else { |
712 | cell_defer(tc, m->cell); | 844 | inc_all_io_entry(tc->pool, m->cell->holder); |
845 | remap_and_issue(tc, m->cell->holder, m->data_block); | ||
846 | inc_remap_and_issue_cell(tc, m->cell, m->data_block); | ||
847 | } | ||
713 | 848 | ||
714 | out: | 849 | out: |
715 | list_del(&m->list); | 850 | list_del(&m->list); |
@@ -842,6 +977,20 @@ static void ll_zero(struct thin_c *tc, struct dm_thin_new_mapping *m, | |||
842 | } | 977 | } |
843 | } | 978 | } |
844 | 979 | ||
980 | static void remap_and_issue_overwrite(struct thin_c *tc, struct bio *bio, | ||
981 | dm_block_t data_block, | ||
982 | struct dm_thin_new_mapping *m) | ||
983 | { | ||
984 | struct pool *pool = tc->pool; | ||
985 | struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); | ||
986 | |||
987 | h->overwrite_mapping = m; | ||
988 | m->bio = bio; | ||
989 | save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio); | ||
990 | inc_all_io_entry(pool, bio); | ||
991 | remap_and_issue(tc, bio, data_block); | ||
992 | } | ||
993 | |||
845 | /* | 994 | /* |
846 | * A partial copy also needs to zero the uncopied region. | 995 | * A partial copy also needs to zero the uncopied region. |
847 | */ | 996 | */ |
@@ -876,15 +1025,9 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, | |||
876 | * If the whole block of data is being overwritten, we can issue the | 1025 | * If the whole block of data is being overwritten, we can issue the |
877 | * bio immediately. Otherwise we use kcopyd to clone the data first. | 1026 | * bio immediately. Otherwise we use kcopyd to clone the data first. |
878 | */ | 1027 | */ |
879 | if (io_overwrites_block(pool, bio)) { | 1028 | if (io_overwrites_block(pool, bio)) |
880 | struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); | 1029 | remap_and_issue_overwrite(tc, bio, data_dest, m); |
881 | 1030 | else { | |
882 | h->overwrite_mapping = m; | ||
883 | m->bio = bio; | ||
884 | save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio); | ||
885 | inc_all_io_entry(pool, bio); | ||
886 | remap_and_issue(tc, bio, data_dest); | ||
887 | } else { | ||
888 | struct dm_io_region from, to; | 1031 | struct dm_io_region from, to; |
889 | 1032 | ||
890 | from.bdev = origin->bdev; | 1033 | from.bdev = origin->bdev; |
@@ -953,16 +1096,10 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, | |||
953 | if (!pool->pf.zero_new_blocks) | 1096 | if (!pool->pf.zero_new_blocks) |
954 | process_prepared_mapping(m); | 1097 | process_prepared_mapping(m); |
955 | 1098 | ||
956 | else if (io_overwrites_block(pool, bio)) { | 1099 | else if (io_overwrites_block(pool, bio)) |
957 | struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); | 1100 | remap_and_issue_overwrite(tc, bio, data_block, m); |
958 | |||
959 | h->overwrite_mapping = m; | ||
960 | m->bio = bio; | ||
961 | save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio); | ||
962 | inc_all_io_entry(pool, bio); | ||
963 | remap_and_issue(tc, bio, data_block); | ||
964 | 1101 | ||
965 | } else | 1102 | else |
966 | ll_zero(tc, m, | 1103 | ll_zero(tc, m, |
967 | data_block * pool->sectors_per_block, | 1104 | data_block * pool->sectors_per_block, |
968 | (data_block + 1) * pool->sectors_per_block); | 1105 | (data_block + 1) * pool->sectors_per_block); |
@@ -1134,29 +1271,25 @@ static void retry_bios_on_resume(struct pool *pool, struct dm_bio_prison_cell *c | |||
1134 | bio_list_init(&bios); | 1271 | bio_list_init(&bios); |
1135 | cell_release(pool, cell, &bios); | 1272 | cell_release(pool, cell, &bios); |
1136 | 1273 | ||
1137 | error = should_error_unserviceable_bio(pool); | 1274 | while ((bio = bio_list_pop(&bios))) |
1138 | if (error) | 1275 | retry_on_resume(bio); |
1139 | while ((bio = bio_list_pop(&bios))) | ||
1140 | bio_endio(bio, error); | ||
1141 | else | ||
1142 | while ((bio = bio_list_pop(&bios))) | ||
1143 | retry_on_resume(bio); | ||
1144 | } | 1276 | } |
1145 | 1277 | ||
1146 | static void process_discard(struct thin_c *tc, struct bio *bio) | 1278 | static void process_discard_cell(struct thin_c *tc, struct dm_bio_prison_cell *cell) |
1147 | { | 1279 | { |
1148 | int r; | 1280 | int r; |
1149 | unsigned long flags; | 1281 | struct bio *bio = cell->holder; |
1150 | struct pool *pool = tc->pool; | 1282 | struct pool *pool = tc->pool; |
1151 | struct dm_bio_prison_cell *cell, *cell2; | 1283 | struct dm_bio_prison_cell *cell2; |
1152 | struct dm_cell_key key, key2; | 1284 | struct dm_cell_key key2; |
1153 | dm_block_t block = get_bio_block(tc, bio); | 1285 | dm_block_t block = get_bio_block(tc, bio); |
1154 | struct dm_thin_lookup_result lookup_result; | 1286 | struct dm_thin_lookup_result lookup_result; |
1155 | struct dm_thin_new_mapping *m; | 1287 | struct dm_thin_new_mapping *m; |
1156 | 1288 | ||
1157 | build_virtual_key(tc->td, block, &key); | 1289 | if (tc->requeue_mode) { |
1158 | if (bio_detain(tc->pool, &key, bio, &cell)) | 1290 | cell_requeue(pool, cell); |
1159 | return; | 1291 | return; |
1292 | } | ||
1160 | 1293 | ||
1161 | r = dm_thin_find_block(tc->td, block, 1, &lookup_result); | 1294 | r = dm_thin_find_block(tc->td, block, 1, &lookup_result); |
1162 | switch (r) { | 1295 | switch (r) { |
@@ -1187,12 +1320,9 @@ static void process_discard(struct thin_c *tc, struct bio *bio) | |||
1187 | m->cell2 = cell2; | 1320 | m->cell2 = cell2; |
1188 | m->bio = bio; | 1321 | m->bio = bio; |
1189 | 1322 | ||
1190 | if (!dm_deferred_set_add_work(pool->all_io_ds, &m->list)) { | 1323 | if (!dm_deferred_set_add_work(pool->all_io_ds, &m->list)) |
1191 | spin_lock_irqsave(&pool->lock, flags); | 1324 | pool->process_prepared_discard(m); |
1192 | list_add_tail(&m->list, &pool->prepared_discards); | 1325 | |
1193 | spin_unlock_irqrestore(&pool->lock, flags); | ||
1194 | wake_worker(pool); | ||
1195 | } | ||
1196 | } else { | 1326 | } else { |
1197 | inc_all_io_entry(pool, bio); | 1327 | inc_all_io_entry(pool, bio); |
1198 | cell_defer_no_holder(tc, cell); | 1328 | cell_defer_no_holder(tc, cell); |
@@ -1227,6 +1357,19 @@ static void process_discard(struct thin_c *tc, struct bio *bio) | |||
1227 | } | 1357 | } |
1228 | } | 1358 | } |
1229 | 1359 | ||
1360 | static void process_discard_bio(struct thin_c *tc, struct bio *bio) | ||
1361 | { | ||
1362 | struct dm_bio_prison_cell *cell; | ||
1363 | struct dm_cell_key key; | ||
1364 | dm_block_t block = get_bio_block(tc, bio); | ||
1365 | |||
1366 | build_virtual_key(tc->td, block, &key); | ||
1367 | if (bio_detain(tc->pool, &key, bio, &cell)) | ||
1368 | return; | ||
1369 | |||
1370 | process_discard_cell(tc, cell); | ||
1371 | } | ||
1372 | |||
1230 | static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block, | 1373 | static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block, |
1231 | struct dm_cell_key *key, | 1374 | struct dm_cell_key *key, |
1232 | struct dm_thin_lookup_result *lookup_result, | 1375 | struct dm_thin_lookup_result *lookup_result, |
@@ -1255,11 +1398,53 @@ static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block, | |||
1255 | } | 1398 | } |
1256 | } | 1399 | } |
1257 | 1400 | ||
1401 | static void __remap_and_issue_shared_cell(void *context, | ||
1402 | struct dm_bio_prison_cell *cell) | ||
1403 | { | ||
1404 | struct remap_info *info = context; | ||
1405 | struct bio *bio; | ||
1406 | |||
1407 | while ((bio = bio_list_pop(&cell->bios))) { | ||
1408 | if ((bio_data_dir(bio) == WRITE) || | ||
1409 | (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA))) | ||
1410 | bio_list_add(&info->defer_bios, bio); | ||
1411 | else { | ||
1412 | struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));; | ||
1413 | |||
1414 | h->shared_read_entry = dm_deferred_entry_inc(info->tc->pool->shared_read_ds); | ||
1415 | inc_all_io_entry(info->tc->pool, bio); | ||
1416 | bio_list_add(&info->issue_bios, bio); | ||
1417 | } | ||
1418 | } | ||
1419 | } | ||
1420 | |||
1421 | static void remap_and_issue_shared_cell(struct thin_c *tc, | ||
1422 | struct dm_bio_prison_cell *cell, | ||
1423 | dm_block_t block) | ||
1424 | { | ||
1425 | struct bio *bio; | ||
1426 | struct remap_info info; | ||
1427 | |||
1428 | info.tc = tc; | ||
1429 | bio_list_init(&info.defer_bios); | ||
1430 | bio_list_init(&info.issue_bios); | ||
1431 | |||
1432 | cell_visit_release(tc->pool, __remap_and_issue_shared_cell, | ||
1433 | &info, cell); | ||
1434 | |||
1435 | while ((bio = bio_list_pop(&info.defer_bios))) | ||
1436 | thin_defer_bio(tc, bio); | ||
1437 | |||
1438 | while ((bio = bio_list_pop(&info.issue_bios))) | ||
1439 | remap_and_issue(tc, bio, block); | ||
1440 | } | ||
1441 | |||
1258 | static void process_shared_bio(struct thin_c *tc, struct bio *bio, | 1442 | static void process_shared_bio(struct thin_c *tc, struct bio *bio, |
1259 | dm_block_t block, | 1443 | dm_block_t block, |
1260 | struct dm_thin_lookup_result *lookup_result) | 1444 | struct dm_thin_lookup_result *lookup_result, |
1445 | struct dm_bio_prison_cell *virt_cell) | ||
1261 | { | 1446 | { |
1262 | struct dm_bio_prison_cell *cell; | 1447 | struct dm_bio_prison_cell *data_cell; |
1263 | struct pool *pool = tc->pool; | 1448 | struct pool *pool = tc->pool; |
1264 | struct dm_cell_key key; | 1449 | struct dm_cell_key key; |
1265 | 1450 | ||
@@ -1268,19 +1453,23 @@ static void process_shared_bio(struct thin_c *tc, struct bio *bio, | |||
1268 | * of being broken so we have nothing further to do here. | 1453 | * of being broken so we have nothing further to do here. |
1269 | */ | 1454 | */ |
1270 | build_data_key(tc->td, lookup_result->block, &key); | 1455 | build_data_key(tc->td, lookup_result->block, &key); |
1271 | if (bio_detain(pool, &key, bio, &cell)) | 1456 | if (bio_detain(pool, &key, bio, &data_cell)) { |
1457 | cell_defer_no_holder(tc, virt_cell); | ||
1272 | return; | 1458 | return; |
1459 | } | ||
1273 | 1460 | ||
1274 | if (bio_data_dir(bio) == WRITE && bio->bi_iter.bi_size) | 1461 | if (bio_data_dir(bio) == WRITE && bio->bi_iter.bi_size) { |
1275 | break_sharing(tc, bio, block, &key, lookup_result, cell); | 1462 | break_sharing(tc, bio, block, &key, lookup_result, data_cell); |
1276 | else { | 1463 | cell_defer_no_holder(tc, virt_cell); |
1464 | } else { | ||
1277 | struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); | 1465 | struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); |
1278 | 1466 | ||
1279 | h->shared_read_entry = dm_deferred_entry_inc(pool->shared_read_ds); | 1467 | h->shared_read_entry = dm_deferred_entry_inc(pool->shared_read_ds); |
1280 | inc_all_io_entry(pool, bio); | 1468 | inc_all_io_entry(pool, bio); |
1281 | cell_defer_no_holder(tc, cell); | ||
1282 | |||
1283 | remap_and_issue(tc, bio, lookup_result->block); | 1469 | remap_and_issue(tc, bio, lookup_result->block); |
1470 | |||
1471 | remap_and_issue_shared_cell(tc, data_cell, lookup_result->block); | ||
1472 | remap_and_issue_shared_cell(tc, virt_cell, lookup_result->block); | ||
1284 | } | 1473 | } |
1285 | } | 1474 | } |
1286 | 1475 | ||
@@ -1333,34 +1522,28 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block | |||
1333 | } | 1522 | } |
1334 | } | 1523 | } |
1335 | 1524 | ||
1336 | static void process_bio(struct thin_c *tc, struct bio *bio) | 1525 | static void process_cell(struct thin_c *tc, struct dm_bio_prison_cell *cell) |
1337 | { | 1526 | { |
1338 | int r; | 1527 | int r; |
1339 | struct pool *pool = tc->pool; | 1528 | struct pool *pool = tc->pool; |
1529 | struct bio *bio = cell->holder; | ||
1340 | dm_block_t block = get_bio_block(tc, bio); | 1530 | dm_block_t block = get_bio_block(tc, bio); |
1341 | struct dm_bio_prison_cell *cell; | ||
1342 | struct dm_cell_key key; | ||
1343 | struct dm_thin_lookup_result lookup_result; | 1531 | struct dm_thin_lookup_result lookup_result; |
1344 | 1532 | ||
1345 | /* | 1533 | if (tc->requeue_mode) { |
1346 | * If cell is already occupied, then the block is already | 1534 | cell_requeue(pool, cell); |
1347 | * being provisioned so we have nothing further to do here. | ||
1348 | */ | ||
1349 | build_virtual_key(tc->td, block, &key); | ||
1350 | if (bio_detain(pool, &key, bio, &cell)) | ||
1351 | return; | 1535 | return; |
1536 | } | ||
1352 | 1537 | ||
1353 | r = dm_thin_find_block(tc->td, block, 1, &lookup_result); | 1538 | r = dm_thin_find_block(tc->td, block, 1, &lookup_result); |
1354 | switch (r) { | 1539 | switch (r) { |
1355 | case 0: | 1540 | case 0: |
1356 | if (lookup_result.shared) { | 1541 | if (lookup_result.shared) |
1357 | process_shared_bio(tc, bio, block, &lookup_result); | 1542 | process_shared_bio(tc, bio, block, &lookup_result, cell); |
1358 | cell_defer_no_holder(tc, cell); /* FIXME: pass this cell into process_shared? */ | 1543 | else { |
1359 | } else { | ||
1360 | inc_all_io_entry(pool, bio); | 1544 | inc_all_io_entry(pool, bio); |
1361 | cell_defer_no_holder(tc, cell); | ||
1362 | |||
1363 | remap_and_issue(tc, bio, lookup_result.block); | 1545 | remap_and_issue(tc, bio, lookup_result.block); |
1546 | inc_remap_and_issue_cell(tc, cell, lookup_result.block); | ||
1364 | } | 1547 | } |
1365 | break; | 1548 | break; |
1366 | 1549 | ||
@@ -1394,7 +1577,26 @@ static void process_bio(struct thin_c *tc, struct bio *bio) | |||
1394 | } | 1577 | } |
1395 | } | 1578 | } |
1396 | 1579 | ||
1397 | static void process_bio_read_only(struct thin_c *tc, struct bio *bio) | 1580 | static void process_bio(struct thin_c *tc, struct bio *bio) |
1581 | { | ||
1582 | struct pool *pool = tc->pool; | ||
1583 | dm_block_t block = get_bio_block(tc, bio); | ||
1584 | struct dm_bio_prison_cell *cell; | ||
1585 | struct dm_cell_key key; | ||
1586 | |||
1587 | /* | ||
1588 | * If cell is already occupied, then the block is already | ||
1589 | * being provisioned so we have nothing further to do here. | ||
1590 | */ | ||
1591 | build_virtual_key(tc->td, block, &key); | ||
1592 | if (bio_detain(pool, &key, bio, &cell)) | ||
1593 | return; | ||
1594 | |||
1595 | process_cell(tc, cell); | ||
1596 | } | ||
1597 | |||
1598 | static void __process_bio_read_only(struct thin_c *tc, struct bio *bio, | ||
1599 | struct dm_bio_prison_cell *cell) | ||
1398 | { | 1600 | { |
1399 | int r; | 1601 | int r; |
1400 | int rw = bio_data_dir(bio); | 1602 | int rw = bio_data_dir(bio); |
@@ -1404,15 +1606,21 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio) | |||
1404 | r = dm_thin_find_block(tc->td, block, 1, &lookup_result); | 1606 | r = dm_thin_find_block(tc->td, block, 1, &lookup_result); |
1405 | switch (r) { | 1607 | switch (r) { |
1406 | case 0: | 1608 | case 0: |
1407 | if (lookup_result.shared && (rw == WRITE) && bio->bi_iter.bi_size) | 1609 | if (lookup_result.shared && (rw == WRITE) && bio->bi_iter.bi_size) { |
1408 | handle_unserviceable_bio(tc->pool, bio); | 1610 | handle_unserviceable_bio(tc->pool, bio); |
1409 | else { | 1611 | if (cell) |
1612 | cell_defer_no_holder(tc, cell); | ||
1613 | } else { | ||
1410 | inc_all_io_entry(tc->pool, bio); | 1614 | inc_all_io_entry(tc->pool, bio); |
1411 | remap_and_issue(tc, bio, lookup_result.block); | 1615 | remap_and_issue(tc, bio, lookup_result.block); |
1616 | if (cell) | ||
1617 | inc_remap_and_issue_cell(tc, cell, lookup_result.block); | ||
1412 | } | 1618 | } |
1413 | break; | 1619 | break; |
1414 | 1620 | ||
1415 | case -ENODATA: | 1621 | case -ENODATA: |
1622 | if (cell) | ||
1623 | cell_defer_no_holder(tc, cell); | ||
1416 | if (rw != READ) { | 1624 | if (rw != READ) { |
1417 | handle_unserviceable_bio(tc->pool, bio); | 1625 | handle_unserviceable_bio(tc->pool, bio); |
1418 | break; | 1626 | break; |
@@ -1431,11 +1639,23 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio) | |||
1431 | default: | 1639 | default: |
1432 | DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d", | 1640 | DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d", |
1433 | __func__, r); | 1641 | __func__, r); |
1642 | if (cell) | ||
1643 | cell_defer_no_holder(tc, cell); | ||
1434 | bio_io_error(bio); | 1644 | bio_io_error(bio); |
1435 | break; | 1645 | break; |
1436 | } | 1646 | } |
1437 | } | 1647 | } |
1438 | 1648 | ||
1649 | static void process_bio_read_only(struct thin_c *tc, struct bio *bio) | ||
1650 | { | ||
1651 | __process_bio_read_only(tc, bio, NULL); | ||
1652 | } | ||
1653 | |||
1654 | static void process_cell_read_only(struct thin_c *tc, struct dm_bio_prison_cell *cell) | ||
1655 | { | ||
1656 | __process_bio_read_only(tc, cell->holder, cell); | ||
1657 | } | ||
1658 | |||
1439 | static void process_bio_success(struct thin_c *tc, struct bio *bio) | 1659 | static void process_bio_success(struct thin_c *tc, struct bio *bio) |
1440 | { | 1660 | { |
1441 | bio_endio(bio, 0); | 1661 | bio_endio(bio, 0); |
@@ -1446,6 +1666,16 @@ static void process_bio_fail(struct thin_c *tc, struct bio *bio) | |||
1446 | bio_io_error(bio); | 1666 | bio_io_error(bio); |
1447 | } | 1667 | } |
1448 | 1668 | ||
1669 | static void process_cell_success(struct thin_c *tc, struct dm_bio_prison_cell *cell) | ||
1670 | { | ||
1671 | cell_success(tc->pool, cell); | ||
1672 | } | ||
1673 | |||
1674 | static void process_cell_fail(struct thin_c *tc, struct dm_bio_prison_cell *cell) | ||
1675 | { | ||
1676 | cell_error(tc->pool, cell); | ||
1677 | } | ||
1678 | |||
1449 | /* | 1679 | /* |
1450 | * FIXME: should we also commit due to size of transaction, measured in | 1680 | * FIXME: should we also commit due to size of transaction, measured in |
1451 | * metadata blocks? | 1681 | * metadata blocks? |
@@ -1527,9 +1757,10 @@ static void process_thin_deferred_bios(struct thin_c *tc) | |||
1527 | struct bio *bio; | 1757 | struct bio *bio; |
1528 | struct bio_list bios; | 1758 | struct bio_list bios; |
1529 | struct blk_plug plug; | 1759 | struct blk_plug plug; |
1760 | unsigned count = 0; | ||
1530 | 1761 | ||
1531 | if (tc->requeue_mode) { | 1762 | if (tc->requeue_mode) { |
1532 | requeue_bio_list(tc, &tc->deferred_bio_list); | 1763 | error_thin_bio_list(tc, &tc->deferred_bio_list, DM_ENDIO_REQUEUE); |
1533 | return; | 1764 | return; |
1534 | } | 1765 | } |
1535 | 1766 | ||
@@ -1568,10 +1799,97 @@ static void process_thin_deferred_bios(struct thin_c *tc) | |||
1568 | pool->process_discard(tc, bio); | 1799 | pool->process_discard(tc, bio); |
1569 | else | 1800 | else |
1570 | pool->process_bio(tc, bio); | 1801 | pool->process_bio(tc, bio); |
1802 | |||
1803 | if ((count++ & 127) == 0) { | ||
1804 | throttle_work_update(&pool->throttle); | ||
1805 | dm_pool_issue_prefetches(pool->pmd); | ||
1806 | } | ||
1571 | } | 1807 | } |
1572 | blk_finish_plug(&plug); | 1808 | blk_finish_plug(&plug); |
1573 | } | 1809 | } |
1574 | 1810 | ||
1811 | static int cmp_cells(const void *lhs, const void *rhs) | ||
1812 | { | ||
1813 | struct dm_bio_prison_cell *lhs_cell = *((struct dm_bio_prison_cell **) lhs); | ||
1814 | struct dm_bio_prison_cell *rhs_cell = *((struct dm_bio_prison_cell **) rhs); | ||
1815 | |||
1816 | BUG_ON(!lhs_cell->holder); | ||
1817 | BUG_ON(!rhs_cell->holder); | ||
1818 | |||
1819 | if (lhs_cell->holder->bi_iter.bi_sector < rhs_cell->holder->bi_iter.bi_sector) | ||
1820 | return -1; | ||
1821 | |||
1822 | if (lhs_cell->holder->bi_iter.bi_sector > rhs_cell->holder->bi_iter.bi_sector) | ||
1823 | return 1; | ||
1824 | |||
1825 | return 0; | ||
1826 | } | ||
1827 | |||
1828 | static unsigned sort_cells(struct pool *pool, struct list_head *cells) | ||
1829 | { | ||
1830 | unsigned count = 0; | ||
1831 | struct dm_bio_prison_cell *cell, *tmp; | ||
1832 | |||
1833 | list_for_each_entry_safe(cell, tmp, cells, user_list) { | ||
1834 | if (count >= CELL_SORT_ARRAY_SIZE) | ||
1835 | break; | ||
1836 | |||
1837 | pool->cell_sort_array[count++] = cell; | ||
1838 | list_del(&cell->user_list); | ||
1839 | } | ||
1840 | |||
1841 | sort(pool->cell_sort_array, count, sizeof(cell), cmp_cells, NULL); | ||
1842 | |||
1843 | return count; | ||
1844 | } | ||
1845 | |||
1846 | static void process_thin_deferred_cells(struct thin_c *tc) | ||
1847 | { | ||
1848 | struct pool *pool = tc->pool; | ||
1849 | unsigned long flags; | ||
1850 | struct list_head cells; | ||
1851 | struct dm_bio_prison_cell *cell; | ||
1852 | unsigned i, j, count; | ||
1853 | |||
1854 | INIT_LIST_HEAD(&cells); | ||
1855 | |||
1856 | spin_lock_irqsave(&tc->lock, flags); | ||
1857 | list_splice_init(&tc->deferred_cells, &cells); | ||
1858 | spin_unlock_irqrestore(&tc->lock, flags); | ||
1859 | |||
1860 | if (list_empty(&cells)) | ||
1861 | return; | ||
1862 | |||
1863 | do { | ||
1864 | count = sort_cells(tc->pool, &cells); | ||
1865 | |||
1866 | for (i = 0; i < count; i++) { | ||
1867 | cell = pool->cell_sort_array[i]; | ||
1868 | BUG_ON(!cell->holder); | ||
1869 | |||
1870 | /* | ||
1871 | * If we've got no free new_mapping structs, and processing | ||
1872 | * this bio might require one, we pause until there are some | ||
1873 | * prepared mappings to process. | ||
1874 | */ | ||
1875 | if (ensure_next_mapping(pool)) { | ||
1876 | for (j = i; j < count; j++) | ||
1877 | list_add(&pool->cell_sort_array[j]->user_list, &cells); | ||
1878 | |||
1879 | spin_lock_irqsave(&tc->lock, flags); | ||
1880 | list_splice(&cells, &tc->deferred_cells); | ||
1881 | spin_unlock_irqrestore(&tc->lock, flags); | ||
1882 | return; | ||
1883 | } | ||
1884 | |||
1885 | if (cell->holder->bi_rw & REQ_DISCARD) | ||
1886 | pool->process_discard_cell(tc, cell); | ||
1887 | else | ||
1888 | pool->process_cell(tc, cell); | ||
1889 | } | ||
1890 | } while (!list_empty(&cells)); | ||
1891 | } | ||
1892 | |||
1575 | static void thin_get(struct thin_c *tc); | 1893 | static void thin_get(struct thin_c *tc); |
1576 | static void thin_put(struct thin_c *tc); | 1894 | static void thin_put(struct thin_c *tc); |
1577 | 1895 | ||
@@ -1620,6 +1938,7 @@ static void process_deferred_bios(struct pool *pool) | |||
1620 | 1938 | ||
1621 | tc = get_first_thin(pool); | 1939 | tc = get_first_thin(pool); |
1622 | while (tc) { | 1940 | while (tc) { |
1941 | process_thin_deferred_cells(tc); | ||
1623 | process_thin_deferred_bios(tc); | 1942 | process_thin_deferred_bios(tc); |
1624 | tc = get_next_thin(pool, tc); | 1943 | tc = get_next_thin(pool, tc); |
1625 | } | 1944 | } |
@@ -1653,9 +1972,15 @@ static void do_worker(struct work_struct *ws) | |||
1653 | { | 1972 | { |
1654 | struct pool *pool = container_of(ws, struct pool, worker); | 1973 | struct pool *pool = container_of(ws, struct pool, worker); |
1655 | 1974 | ||
1975 | throttle_work_start(&pool->throttle); | ||
1976 | dm_pool_issue_prefetches(pool->pmd); | ||
1977 | throttle_work_update(&pool->throttle); | ||
1656 | process_prepared(pool, &pool->prepared_mappings, &pool->process_prepared_mapping); | 1978 | process_prepared(pool, &pool->prepared_mappings, &pool->process_prepared_mapping); |
1979 | throttle_work_update(&pool->throttle); | ||
1657 | process_prepared(pool, &pool->prepared_discards, &pool->process_prepared_discard); | 1980 | process_prepared(pool, &pool->prepared_discards, &pool->process_prepared_discard); |
1981 | throttle_work_update(&pool->throttle); | ||
1658 | process_deferred_bios(pool); | 1982 | process_deferred_bios(pool); |
1983 | throttle_work_complete(&pool->throttle); | ||
1659 | } | 1984 | } |
1660 | 1985 | ||
1661 | /* | 1986 | /* |
@@ -1792,6 +2117,8 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) | |||
1792 | dm_pool_metadata_read_only(pool->pmd); | 2117 | dm_pool_metadata_read_only(pool->pmd); |
1793 | pool->process_bio = process_bio_fail; | 2118 | pool->process_bio = process_bio_fail; |
1794 | pool->process_discard = process_bio_fail; | 2119 | pool->process_discard = process_bio_fail; |
2120 | pool->process_cell = process_cell_fail; | ||
2121 | pool->process_discard_cell = process_cell_fail; | ||
1795 | pool->process_prepared_mapping = process_prepared_mapping_fail; | 2122 | pool->process_prepared_mapping = process_prepared_mapping_fail; |
1796 | pool->process_prepared_discard = process_prepared_discard_fail; | 2123 | pool->process_prepared_discard = process_prepared_discard_fail; |
1797 | 2124 | ||
@@ -1804,6 +2131,8 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) | |||
1804 | dm_pool_metadata_read_only(pool->pmd); | 2131 | dm_pool_metadata_read_only(pool->pmd); |
1805 | pool->process_bio = process_bio_read_only; | 2132 | pool->process_bio = process_bio_read_only; |
1806 | pool->process_discard = process_bio_success; | 2133 | pool->process_discard = process_bio_success; |
2134 | pool->process_cell = process_cell_read_only; | ||
2135 | pool->process_discard_cell = process_cell_success; | ||
1807 | pool->process_prepared_mapping = process_prepared_mapping_fail; | 2136 | pool->process_prepared_mapping = process_prepared_mapping_fail; |
1808 | pool->process_prepared_discard = process_prepared_discard_passdown; | 2137 | pool->process_prepared_discard = process_prepared_discard_passdown; |
1809 | 2138 | ||
@@ -1822,7 +2151,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) | |||
1822 | if (old_mode != new_mode) | 2151 | if (old_mode != new_mode) |
1823 | notify_of_pool_mode_change(pool, "out-of-data-space"); | 2152 | notify_of_pool_mode_change(pool, "out-of-data-space"); |
1824 | pool->process_bio = process_bio_read_only; | 2153 | pool->process_bio = process_bio_read_only; |
1825 | pool->process_discard = process_discard; | 2154 | pool->process_discard = process_discard_bio; |
2155 | pool->process_cell = process_cell_read_only; | ||
2156 | pool->process_discard_cell = process_discard_cell; | ||
1826 | pool->process_prepared_mapping = process_prepared_mapping; | 2157 | pool->process_prepared_mapping = process_prepared_mapping; |
1827 | pool->process_prepared_discard = process_prepared_discard_passdown; | 2158 | pool->process_prepared_discard = process_prepared_discard_passdown; |
1828 | 2159 | ||
@@ -1835,7 +2166,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) | |||
1835 | notify_of_pool_mode_change(pool, "write"); | 2166 | notify_of_pool_mode_change(pool, "write"); |
1836 | dm_pool_metadata_read_write(pool->pmd); | 2167 | dm_pool_metadata_read_write(pool->pmd); |
1837 | pool->process_bio = process_bio; | 2168 | pool->process_bio = process_bio; |
1838 | pool->process_discard = process_discard; | 2169 | pool->process_discard = process_discard_bio; |
2170 | pool->process_cell = process_cell; | ||
2171 | pool->process_discard_cell = process_discard_cell; | ||
1839 | pool->process_prepared_mapping = process_prepared_mapping; | 2172 | pool->process_prepared_mapping = process_prepared_mapping; |
1840 | pool->process_prepared_discard = process_prepared_discard; | 2173 | pool->process_prepared_discard = process_prepared_discard; |
1841 | break; | 2174 | break; |
@@ -1895,6 +2228,29 @@ static void thin_defer_bio(struct thin_c *tc, struct bio *bio) | |||
1895 | wake_worker(pool); | 2228 | wake_worker(pool); |
1896 | } | 2229 | } |
1897 | 2230 | ||
2231 | static void thin_defer_bio_with_throttle(struct thin_c *tc, struct bio *bio) | ||
2232 | { | ||
2233 | struct pool *pool = tc->pool; | ||
2234 | |||
2235 | throttle_lock(&pool->throttle); | ||
2236 | thin_defer_bio(tc, bio); | ||
2237 | throttle_unlock(&pool->throttle); | ||
2238 | } | ||
2239 | |||
2240 | static void thin_defer_cell(struct thin_c *tc, struct dm_bio_prison_cell *cell) | ||
2241 | { | ||
2242 | unsigned long flags; | ||
2243 | struct pool *pool = tc->pool; | ||
2244 | |||
2245 | throttle_lock(&pool->throttle); | ||
2246 | spin_lock_irqsave(&tc->lock, flags); | ||
2247 | list_add_tail(&cell->user_list, &tc->deferred_cells); | ||
2248 | spin_unlock_irqrestore(&tc->lock, flags); | ||
2249 | throttle_unlock(&pool->throttle); | ||
2250 | |||
2251 | wake_worker(pool); | ||
2252 | } | ||
2253 | |||
1898 | static void thin_hook_bio(struct thin_c *tc, struct bio *bio) | 2254 | static void thin_hook_bio(struct thin_c *tc, struct bio *bio) |
1899 | { | 2255 | { |
1900 | struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); | 2256 | struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); |
@@ -1915,8 +2271,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) | |||
1915 | dm_block_t block = get_bio_block(tc, bio); | 2271 | dm_block_t block = get_bio_block(tc, bio); |
1916 | struct dm_thin_device *td = tc->td; | 2272 | struct dm_thin_device *td = tc->td; |
1917 | struct dm_thin_lookup_result result; | 2273 | struct dm_thin_lookup_result result; |
1918 | struct dm_bio_prison_cell cell1, cell2; | 2274 | struct dm_bio_prison_cell *virt_cell, *data_cell; |
1919 | struct dm_bio_prison_cell *cell_result; | ||
1920 | struct dm_cell_key key; | 2275 | struct dm_cell_key key; |
1921 | 2276 | ||
1922 | thin_hook_bio(tc, bio); | 2277 | thin_hook_bio(tc, bio); |
@@ -1932,7 +2287,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) | |||
1932 | } | 2287 | } |
1933 | 2288 | ||
1934 | if (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA)) { | 2289 | if (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA)) { |
1935 | thin_defer_bio(tc, bio); | 2290 | thin_defer_bio_with_throttle(tc, bio); |
1936 | return DM_MAPIO_SUBMITTED; | 2291 | return DM_MAPIO_SUBMITTED; |
1937 | } | 2292 | } |
1938 | 2293 | ||
@@ -1941,7 +2296,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) | |||
1941 | * there's a race with discard. | 2296 | * there's a race with discard. |
1942 | */ | 2297 | */ |
1943 | build_virtual_key(tc->td, block, &key); | 2298 | build_virtual_key(tc->td, block, &key); |
1944 | if (dm_bio_detain(tc->pool->prison, &key, bio, &cell1, &cell_result)) | 2299 | if (bio_detain(tc->pool, &key, bio, &virt_cell)) |
1945 | return DM_MAPIO_SUBMITTED; | 2300 | return DM_MAPIO_SUBMITTED; |
1946 | 2301 | ||
1947 | r = dm_thin_find_block(td, block, 0, &result); | 2302 | r = dm_thin_find_block(td, block, 0, &result); |
@@ -1966,20 +2321,19 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) | |||
1966 | * More distant ancestors are irrelevant. The | 2321 | * More distant ancestors are irrelevant. The |
1967 | * shared flag will be set in their case. | 2322 | * shared flag will be set in their case. |
1968 | */ | 2323 | */ |
1969 | thin_defer_bio(tc, bio); | 2324 | thin_defer_cell(tc, virt_cell); |
1970 | cell_defer_no_holder_no_free(tc, &cell1); | ||
1971 | return DM_MAPIO_SUBMITTED; | 2325 | return DM_MAPIO_SUBMITTED; |
1972 | } | 2326 | } |
1973 | 2327 | ||
1974 | build_data_key(tc->td, result.block, &key); | 2328 | build_data_key(tc->td, result.block, &key); |
1975 | if (dm_bio_detain(tc->pool->prison, &key, bio, &cell2, &cell_result)) { | 2329 | if (bio_detain(tc->pool, &key, bio, &data_cell)) { |
1976 | cell_defer_no_holder_no_free(tc, &cell1); | 2330 | cell_defer_no_holder(tc, virt_cell); |
1977 | return DM_MAPIO_SUBMITTED; | 2331 | return DM_MAPIO_SUBMITTED; |
1978 | } | 2332 | } |
1979 | 2333 | ||
1980 | inc_all_io_entry(tc->pool, bio); | 2334 | inc_all_io_entry(tc->pool, bio); |
1981 | cell_defer_no_holder_no_free(tc, &cell2); | 2335 | cell_defer_no_holder(tc, data_cell); |
1982 | cell_defer_no_holder_no_free(tc, &cell1); | 2336 | cell_defer_no_holder(tc, virt_cell); |
1983 | 2337 | ||
1984 | remap(tc, bio, result.block); | 2338 | remap(tc, bio, result.block); |
1985 | return DM_MAPIO_REMAPPED; | 2339 | return DM_MAPIO_REMAPPED; |
@@ -1991,18 +2345,13 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) | |||
1991 | * of doing so. | 2345 | * of doing so. |
1992 | */ | 2346 | */ |
1993 | handle_unserviceable_bio(tc->pool, bio); | 2347 | handle_unserviceable_bio(tc->pool, bio); |
1994 | cell_defer_no_holder_no_free(tc, &cell1); | 2348 | cell_defer_no_holder(tc, virt_cell); |
1995 | return DM_MAPIO_SUBMITTED; | 2349 | return DM_MAPIO_SUBMITTED; |
1996 | } | 2350 | } |
1997 | /* fall through */ | 2351 | /* fall through */ |
1998 | 2352 | ||
1999 | case -EWOULDBLOCK: | 2353 | case -EWOULDBLOCK: |
2000 | /* | 2354 | thin_defer_cell(tc, virt_cell); |
2001 | * In future, the failed dm_thin_find_block above could | ||
2002 | * provide the hint to load the metadata into cache. | ||
2003 | */ | ||
2004 | thin_defer_bio(tc, bio); | ||
2005 | cell_defer_no_holder_no_free(tc, &cell1); | ||
2006 | return DM_MAPIO_SUBMITTED; | 2355 | return DM_MAPIO_SUBMITTED; |
2007 | 2356 | ||
2008 | default: | 2357 | default: |
@@ -2012,7 +2361,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) | |||
2012 | * pool is switched to fail-io mode. | 2361 | * pool is switched to fail-io mode. |
2013 | */ | 2362 | */ |
2014 | bio_io_error(bio); | 2363 | bio_io_error(bio); |
2015 | cell_defer_no_holder_no_free(tc, &cell1); | 2364 | cell_defer_no_holder(tc, virt_cell); |
2016 | return DM_MAPIO_SUBMITTED; | 2365 | return DM_MAPIO_SUBMITTED; |
2017 | } | 2366 | } |
2018 | } | 2367 | } |
@@ -2193,7 +2542,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, | |||
2193 | pool->sectors_per_block_shift = __ffs(block_size); | 2542 | pool->sectors_per_block_shift = __ffs(block_size); |
2194 | pool->low_water_blocks = 0; | 2543 | pool->low_water_blocks = 0; |
2195 | pool_features_init(&pool->pf); | 2544 | pool_features_init(&pool->pf); |
2196 | pool->prison = dm_bio_prison_create(PRISON_CELLS); | 2545 | pool->prison = dm_bio_prison_create(); |
2197 | if (!pool->prison) { | 2546 | if (!pool->prison) { |
2198 | *error = "Error creating pool's bio prison"; | 2547 | *error = "Error creating pool's bio prison"; |
2199 | err_p = ERR_PTR(-ENOMEM); | 2548 | err_p = ERR_PTR(-ENOMEM); |
@@ -2219,6 +2568,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, | |||
2219 | goto bad_wq; | 2568 | goto bad_wq; |
2220 | } | 2569 | } |
2221 | 2570 | ||
2571 | throttle_init(&pool->throttle); | ||
2222 | INIT_WORK(&pool->worker, do_worker); | 2572 | INIT_WORK(&pool->worker, do_worker); |
2223 | INIT_DELAYED_WORK(&pool->waker, do_waker); | 2573 | INIT_DELAYED_WORK(&pool->waker, do_waker); |
2224 | INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout); | 2574 | INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout); |
@@ -2228,6 +2578,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, | |||
2228 | INIT_LIST_HEAD(&pool->prepared_discards); | 2578 | INIT_LIST_HEAD(&pool->prepared_discards); |
2229 | INIT_LIST_HEAD(&pool->active_thins); | 2579 | INIT_LIST_HEAD(&pool->active_thins); |
2230 | pool->low_water_triggered = false; | 2580 | pool->low_water_triggered = false; |
2581 | pool->suspended = true; | ||
2231 | 2582 | ||
2232 | pool->shared_read_ds = dm_deferred_set_create(); | 2583 | pool->shared_read_ds = dm_deferred_set_create(); |
2233 | if (!pool->shared_read_ds) { | 2584 | if (!pool->shared_read_ds) { |
@@ -2764,20 +3115,77 @@ static int pool_preresume(struct dm_target *ti) | |||
2764 | return 0; | 3115 | return 0; |
2765 | } | 3116 | } |
2766 | 3117 | ||
3118 | static void pool_suspend_active_thins(struct pool *pool) | ||
3119 | { | ||
3120 | struct thin_c *tc; | ||
3121 | |||
3122 | /* Suspend all active thin devices */ | ||
3123 | tc = get_first_thin(pool); | ||
3124 | while (tc) { | ||
3125 | dm_internal_suspend_noflush(tc->thin_md); | ||
3126 | tc = get_next_thin(pool, tc); | ||
3127 | } | ||
3128 | } | ||
3129 | |||
3130 | static void pool_resume_active_thins(struct pool *pool) | ||
3131 | { | ||
3132 | struct thin_c *tc; | ||
3133 | |||
3134 | /* Resume all active thin devices */ | ||
3135 | tc = get_first_thin(pool); | ||
3136 | while (tc) { | ||
3137 | dm_internal_resume(tc->thin_md); | ||
3138 | tc = get_next_thin(pool, tc); | ||
3139 | } | ||
3140 | } | ||
3141 | |||
2767 | static void pool_resume(struct dm_target *ti) | 3142 | static void pool_resume(struct dm_target *ti) |
2768 | { | 3143 | { |
2769 | struct pool_c *pt = ti->private; | 3144 | struct pool_c *pt = ti->private; |
2770 | struct pool *pool = pt->pool; | 3145 | struct pool *pool = pt->pool; |
2771 | unsigned long flags; | 3146 | unsigned long flags; |
2772 | 3147 | ||
3148 | /* | ||
3149 | * Must requeue active_thins' bios and then resume | ||
3150 | * active_thins _before_ clearing 'suspend' flag. | ||
3151 | */ | ||
3152 | requeue_bios(pool); | ||
3153 | pool_resume_active_thins(pool); | ||
3154 | |||
2773 | spin_lock_irqsave(&pool->lock, flags); | 3155 | spin_lock_irqsave(&pool->lock, flags); |
2774 | pool->low_water_triggered = false; | 3156 | pool->low_water_triggered = false; |
3157 | pool->suspended = false; | ||
2775 | spin_unlock_irqrestore(&pool->lock, flags); | 3158 | spin_unlock_irqrestore(&pool->lock, flags); |
2776 | requeue_bios(pool); | ||
2777 | 3159 | ||
2778 | do_waker(&pool->waker.work); | 3160 | do_waker(&pool->waker.work); |
2779 | } | 3161 | } |
2780 | 3162 | ||
3163 | static void pool_presuspend(struct dm_target *ti) | ||
3164 | { | ||
3165 | struct pool_c *pt = ti->private; | ||
3166 | struct pool *pool = pt->pool; | ||
3167 | unsigned long flags; | ||
3168 | |||
3169 | spin_lock_irqsave(&pool->lock, flags); | ||
3170 | pool->suspended = true; | ||
3171 | spin_unlock_irqrestore(&pool->lock, flags); | ||
3172 | |||
3173 | pool_suspend_active_thins(pool); | ||
3174 | } | ||
3175 | |||
3176 | static void pool_presuspend_undo(struct dm_target *ti) | ||
3177 | { | ||
3178 | struct pool_c *pt = ti->private; | ||
3179 | struct pool *pool = pt->pool; | ||
3180 | unsigned long flags; | ||
3181 | |||
3182 | pool_resume_active_thins(pool); | ||
3183 | |||
3184 | spin_lock_irqsave(&pool->lock, flags); | ||
3185 | pool->suspended = false; | ||
3186 | spin_unlock_irqrestore(&pool->lock, flags); | ||
3187 | } | ||
3188 | |||
2781 | static void pool_postsuspend(struct dm_target *ti) | 3189 | static void pool_postsuspend(struct dm_target *ti) |
2782 | { | 3190 | { |
2783 | struct pool_c *pt = ti->private; | 3191 | struct pool_c *pt = ti->private; |
@@ -2949,7 +3357,6 @@ static int process_release_metadata_snap_mesg(unsigned argc, char **argv, struct | |||
2949 | * create_thin <dev_id> | 3357 | * create_thin <dev_id> |
2950 | * create_snap <dev_id> <origin_id> | 3358 | * create_snap <dev_id> <origin_id> |
2951 | * delete <dev_id> | 3359 | * delete <dev_id> |
2952 | * trim <dev_id> <new_size_in_sectors> | ||
2953 | * set_transaction_id <current_trans_id> <new_trans_id> | 3360 | * set_transaction_id <current_trans_id> <new_trans_id> |
2954 | * reserve_metadata_snap | 3361 | * reserve_metadata_snap |
2955 | * release_metadata_snap | 3362 | * release_metadata_snap |
@@ -3177,15 +3584,35 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) | |||
3177 | { | 3584 | { |
3178 | struct pool_c *pt = ti->private; | 3585 | struct pool_c *pt = ti->private; |
3179 | struct pool *pool = pt->pool; | 3586 | struct pool *pool = pt->pool; |
3180 | uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT; | 3587 | sector_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT; |
3588 | |||
3589 | /* | ||
3590 | * If max_sectors is smaller than pool->sectors_per_block adjust it | ||
3591 | * to the highest possible power-of-2 factor of pool->sectors_per_block. | ||
3592 | * This is especially beneficial when the pool's data device is a RAID | ||
3593 | * device that has a full stripe width that matches pool->sectors_per_block | ||
3594 | * -- because even though partial RAID stripe-sized IOs will be issued to a | ||
3595 | * single RAID stripe; when aggregated they will end on a full RAID stripe | ||
3596 | * boundary.. which avoids additional partial RAID stripe writes cascading | ||
3597 | */ | ||
3598 | if (limits->max_sectors < pool->sectors_per_block) { | ||
3599 | while (!is_factor(pool->sectors_per_block, limits->max_sectors)) { | ||
3600 | if ((limits->max_sectors & (limits->max_sectors - 1)) == 0) | ||
3601 | limits->max_sectors--; | ||
3602 | limits->max_sectors = rounddown_pow_of_two(limits->max_sectors); | ||
3603 | } | ||
3604 | } | ||
3181 | 3605 | ||
3182 | /* | 3606 | /* |
3183 | * If the system-determined stacked limits are compatible with the | 3607 | * If the system-determined stacked limits are compatible with the |
3184 | * pool's blocksize (io_opt is a factor) do not override them. | 3608 | * pool's blocksize (io_opt is a factor) do not override them. |
3185 | */ | 3609 | */ |
3186 | if (io_opt_sectors < pool->sectors_per_block || | 3610 | if (io_opt_sectors < pool->sectors_per_block || |
3187 | do_div(io_opt_sectors, pool->sectors_per_block)) { | 3611 | !is_factor(io_opt_sectors, pool->sectors_per_block)) { |
3188 | blk_limits_io_min(limits, pool->sectors_per_block << SECTOR_SHIFT); | 3612 | if (is_factor(pool->sectors_per_block, limits->max_sectors)) |
3613 | blk_limits_io_min(limits, limits->max_sectors << SECTOR_SHIFT); | ||
3614 | else | ||
3615 | blk_limits_io_min(limits, pool->sectors_per_block << SECTOR_SHIFT); | ||
3189 | blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); | 3616 | blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); |
3190 | } | 3617 | } |
3191 | 3618 | ||
@@ -3214,11 +3641,13 @@ static struct target_type pool_target = { | |||
3214 | .name = "thin-pool", | 3641 | .name = "thin-pool", |
3215 | .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | | 3642 | .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | |
3216 | DM_TARGET_IMMUTABLE, | 3643 | DM_TARGET_IMMUTABLE, |
3217 | .version = {1, 13, 0}, | 3644 | .version = {1, 14, 0}, |
3218 | .module = THIS_MODULE, | 3645 | .module = THIS_MODULE, |
3219 | .ctr = pool_ctr, | 3646 | .ctr = pool_ctr, |
3220 | .dtr = pool_dtr, | 3647 | .dtr = pool_dtr, |
3221 | .map = pool_map, | 3648 | .map = pool_map, |
3649 | .presuspend = pool_presuspend, | ||
3650 | .presuspend_undo = pool_presuspend_undo, | ||
3222 | .postsuspend = pool_postsuspend, | 3651 | .postsuspend = pool_postsuspend, |
3223 | .preresume = pool_preresume, | 3652 | .preresume = pool_preresume, |
3224 | .resume = pool_resume, | 3653 | .resume = pool_resume, |
@@ -3248,14 +3677,14 @@ static void thin_dtr(struct dm_target *ti) | |||
3248 | struct thin_c *tc = ti->private; | 3677 | struct thin_c *tc = ti->private; |
3249 | unsigned long flags; | 3678 | unsigned long flags; |
3250 | 3679 | ||
3251 | thin_put(tc); | ||
3252 | wait_for_completion(&tc->can_destroy); | ||
3253 | |||
3254 | spin_lock_irqsave(&tc->pool->lock, flags); | 3680 | spin_lock_irqsave(&tc->pool->lock, flags); |
3255 | list_del_rcu(&tc->list); | 3681 | list_del_rcu(&tc->list); |
3256 | spin_unlock_irqrestore(&tc->pool->lock, flags); | 3682 | spin_unlock_irqrestore(&tc->pool->lock, flags); |
3257 | synchronize_rcu(); | 3683 | synchronize_rcu(); |
3258 | 3684 | ||
3685 | thin_put(tc); | ||
3686 | wait_for_completion(&tc->can_destroy); | ||
3687 | |||
3259 | mutex_lock(&dm_thin_pool_table.mutex); | 3688 | mutex_lock(&dm_thin_pool_table.mutex); |
3260 | 3689 | ||
3261 | __pool_dec(tc->pool); | 3690 | __pool_dec(tc->pool); |
@@ -3302,7 +3731,9 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
3302 | r = -ENOMEM; | 3731 | r = -ENOMEM; |
3303 | goto out_unlock; | 3732 | goto out_unlock; |
3304 | } | 3733 | } |
3734 | tc->thin_md = dm_table_get_md(ti->table); | ||
3305 | spin_lock_init(&tc->lock); | 3735 | spin_lock_init(&tc->lock); |
3736 | INIT_LIST_HEAD(&tc->deferred_cells); | ||
3306 | bio_list_init(&tc->deferred_bio_list); | 3737 | bio_list_init(&tc->deferred_bio_list); |
3307 | bio_list_init(&tc->retry_on_resume_list); | 3738 | bio_list_init(&tc->retry_on_resume_list); |
3308 | tc->sort_bio_list = RB_ROOT; | 3739 | tc->sort_bio_list = RB_ROOT; |
@@ -3347,18 +3778,18 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
3347 | if (get_pool_mode(tc->pool) == PM_FAIL) { | 3778 | if (get_pool_mode(tc->pool) == PM_FAIL) { |
3348 | ti->error = "Couldn't open thin device, Pool is in fail mode"; | 3779 | ti->error = "Couldn't open thin device, Pool is in fail mode"; |
3349 | r = -EINVAL; | 3780 | r = -EINVAL; |
3350 | goto bad_thin_open; | 3781 | goto bad_pool; |
3351 | } | 3782 | } |
3352 | 3783 | ||
3353 | r = dm_pool_open_thin_device(tc->pool->pmd, tc->dev_id, &tc->td); | 3784 | r = dm_pool_open_thin_device(tc->pool->pmd, tc->dev_id, &tc->td); |
3354 | if (r) { | 3785 | if (r) { |
3355 | ti->error = "Couldn't open thin internal device"; | 3786 | ti->error = "Couldn't open thin internal device"; |
3356 | goto bad_thin_open; | 3787 | goto bad_pool; |
3357 | } | 3788 | } |
3358 | 3789 | ||
3359 | r = dm_set_target_max_io_len(ti, tc->pool->sectors_per_block); | 3790 | r = dm_set_target_max_io_len(ti, tc->pool->sectors_per_block); |
3360 | if (r) | 3791 | if (r) |
3361 | goto bad_target_max_io_len; | 3792 | goto bad; |
3362 | 3793 | ||
3363 | ti->num_flush_bios = 1; | 3794 | ti->num_flush_bios = 1; |
3364 | ti->flush_supported = true; | 3795 | ti->flush_supported = true; |
@@ -3373,14 +3804,16 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
3373 | ti->split_discard_bios = true; | 3804 | ti->split_discard_bios = true; |
3374 | } | 3805 | } |
3375 | 3806 | ||
3376 | dm_put(pool_md); | ||
3377 | |||
3378 | mutex_unlock(&dm_thin_pool_table.mutex); | 3807 | mutex_unlock(&dm_thin_pool_table.mutex); |
3379 | 3808 | ||
3380 | atomic_set(&tc->refcount, 1); | ||
3381 | init_completion(&tc->can_destroy); | ||
3382 | |||
3383 | spin_lock_irqsave(&tc->pool->lock, flags); | 3809 | spin_lock_irqsave(&tc->pool->lock, flags); |
3810 | if (tc->pool->suspended) { | ||
3811 | spin_unlock_irqrestore(&tc->pool->lock, flags); | ||
3812 | mutex_lock(&dm_thin_pool_table.mutex); /* reacquire for __pool_dec */ | ||
3813 | ti->error = "Unable to activate thin device while pool is suspended"; | ||
3814 | r = -EINVAL; | ||
3815 | goto bad; | ||
3816 | } | ||
3384 | list_add_tail_rcu(&tc->list, &tc->pool->active_thins); | 3817 | list_add_tail_rcu(&tc->list, &tc->pool->active_thins); |
3385 | spin_unlock_irqrestore(&tc->pool->lock, flags); | 3818 | spin_unlock_irqrestore(&tc->pool->lock, flags); |
3386 | /* | 3819 | /* |
@@ -3391,11 +3824,16 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
3391 | */ | 3824 | */ |
3392 | synchronize_rcu(); | 3825 | synchronize_rcu(); |
3393 | 3826 | ||
3827 | dm_put(pool_md); | ||
3828 | |||
3829 | atomic_set(&tc->refcount, 1); | ||
3830 | init_completion(&tc->can_destroy); | ||
3831 | |||
3394 | return 0; | 3832 | return 0; |
3395 | 3833 | ||
3396 | bad_target_max_io_len: | 3834 | bad: |
3397 | dm_pool_close_thin_device(tc->td); | 3835 | dm_pool_close_thin_device(tc->td); |
3398 | bad_thin_open: | 3836 | bad_pool: |
3399 | __pool_dec(tc->pool); | 3837 | __pool_dec(tc->pool); |
3400 | bad_pool_lookup: | 3838 | bad_pool_lookup: |
3401 | dm_put(pool_md); | 3839 | dm_put(pool_md); |
@@ -3541,6 +3979,21 @@ err: | |||
3541 | DMEMIT("Error"); | 3979 | DMEMIT("Error"); |
3542 | } | 3980 | } |
3543 | 3981 | ||
3982 | static int thin_merge(struct dm_target *ti, struct bvec_merge_data *bvm, | ||
3983 | struct bio_vec *biovec, int max_size) | ||
3984 | { | ||
3985 | struct thin_c *tc = ti->private; | ||
3986 | struct request_queue *q = bdev_get_queue(tc->pool_dev->bdev); | ||
3987 | |||
3988 | if (!q->merge_bvec_fn) | ||
3989 | return max_size; | ||
3990 | |||
3991 | bvm->bi_bdev = tc->pool_dev->bdev; | ||
3992 | bvm->bi_sector = dm_target_offset(ti, bvm->bi_sector); | ||
3993 | |||
3994 | return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); | ||
3995 | } | ||
3996 | |||
3544 | static int thin_iterate_devices(struct dm_target *ti, | 3997 | static int thin_iterate_devices(struct dm_target *ti, |
3545 | iterate_devices_callout_fn fn, void *data) | 3998 | iterate_devices_callout_fn fn, void *data) |
3546 | { | 3999 | { |
@@ -3565,7 +4018,7 @@ static int thin_iterate_devices(struct dm_target *ti, | |||
3565 | 4018 | ||
3566 | static struct target_type thin_target = { | 4019 | static struct target_type thin_target = { |
3567 | .name = "thin", | 4020 | .name = "thin", |
3568 | .version = {1, 13, 0}, | 4021 | .version = {1, 14, 0}, |
3569 | .module = THIS_MODULE, | 4022 | .module = THIS_MODULE, |
3570 | .ctr = thin_ctr, | 4023 | .ctr = thin_ctr, |
3571 | .dtr = thin_dtr, | 4024 | .dtr = thin_dtr, |
@@ -3575,6 +4028,7 @@ static struct target_type thin_target = { | |||
3575 | .presuspend = thin_presuspend, | 4028 | .presuspend = thin_presuspend, |
3576 | .postsuspend = thin_postsuspend, | 4029 | .postsuspend = thin_postsuspend, |
3577 | .status = thin_status, | 4030 | .status = thin_status, |
4031 | .merge = thin_merge, | ||
3578 | .iterate_devices = thin_iterate_devices, | 4032 | .iterate_devices = thin_iterate_devices, |
3579 | }; | 4033 | }; |
3580 | 4034 | ||