diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-28 15:55:04 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-28 15:55:04 -0400 |
commit | 89e5d6f0d979f6e7dc2bbb1ebd9e239217e2e952 (patch) | |
tree | 1126044004b73df905a6183430376f1d97c3b6c9 /drivers/md/dm-thin.c | |
parent | 516e77977085c9c50703fabb5dc61bd57a8cc1d0 (diff) | |
parent | a4ffc152198efba2ed9e6eac0eb97f17bfebce85 (diff) |
Merge tag 'dm-3.4-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm
Pull device-mapper changes for 3.4 from Alasdair Kergon:
- Update thin provisioning to support read-only external snapshot
origins and discards.
- A new target, dm verity, for device content validation.
- Mark dm uevent and dm raid as no-longer-experimental.
- Miscellaneous other fixes and clean-ups.
* tag 'dm-3.4-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm: (27 commits)
dm: add verity target
dm bufio: prefetch
dm thin: add pool target flags to control discard
dm thin: support discards
dm thin: prepare to support discard
dm thin: use dm_target_offset
dm thin: support read only external snapshot origins
dm thin: relax hard limit on the maximum size of a metadata device
dm persistent data: remove space map ref_count entries if redundant
dm thin: commit outstanding data every second
dm: reject trailing characters in sccanf input
dm raid: handle failed devices during start up
dm thin metadata: pass correct space map to dm_sm_root_size
dm persistent data: remove redundant value_size arg from value_ptr
dm mpath: detect invalid map_context
dm: clear bi_end_io on remapping failure
dm table: simplify call to free_devices
dm thin: correct comments
dm raid: no longer experimental
dm uevent: no longer experimental
...
Diffstat (limited to 'drivers/md/dm-thin.c')
-rw-r--r-- | drivers/md/dm-thin.c | 680 |
1 files changed, 508 insertions, 172 deletions
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index c3087575fef0..213ae32a0fc4 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #define DEFERRED_SET_SIZE 64 | 23 | #define DEFERRED_SET_SIZE 64 |
24 | #define MAPPING_POOL_SIZE 1024 | 24 | #define MAPPING_POOL_SIZE 1024 |
25 | #define PRISON_CELLS 1024 | 25 | #define PRISON_CELLS 1024 |
26 | #define COMMIT_PERIOD HZ | ||
26 | 27 | ||
27 | /* | 28 | /* |
28 | * The block size of the device holding pool data must be | 29 | * The block size of the device holding pool data must be |
@@ -32,16 +33,6 @@ | |||
32 | #define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT) | 33 | #define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT) |
33 | 34 | ||
34 | /* | 35 | /* |
35 | * The metadata device is currently limited in size. The limitation is | ||
36 | * checked lower down in dm-space-map-metadata, but we also check it here | ||
37 | * so we can fail early. | ||
38 | * | ||
39 | * We have one block of index, which can hold 255 index entries. Each | ||
40 | * index entry contains allocation info about 16k metadata blocks. | ||
41 | */ | ||
42 | #define METADATA_DEV_MAX_SECTORS (255 * (1 << 14) * (THIN_METADATA_BLOCK_SIZE / (1 << SECTOR_SHIFT))) | ||
43 | |||
44 | /* | ||
45 | * Device id is restricted to 24 bits. | 36 | * Device id is restricted to 24 bits. |
46 | */ | 37 | */ |
47 | #define MAX_DEV_ID ((1 << 24) - 1) | 38 | #define MAX_DEV_ID ((1 << 24) - 1) |
@@ -72,7 +63,7 @@ | |||
72 | * missed out if the io covers the block. (schedule_copy). | 63 | * missed out if the io covers the block. (schedule_copy). |
73 | * | 64 | * |
74 | * iv) insert the new mapping into the origin's btree | 65 | * iv) insert the new mapping into the origin's btree |
75 | * (process_prepared_mappings). This act of inserting breaks some | 66 | * (process_prepared_mapping). This act of inserting breaks some |
76 | * sharing of btree nodes between the two devices. Breaking sharing only | 67 | * sharing of btree nodes between the two devices. Breaking sharing only |
77 | * effects the btree of that specific device. Btrees for the other | 68 | * effects the btree of that specific device. Btrees for the other |
78 | * devices that share the block never change. The btree for the origin | 69 | * devices that share the block never change. The btree for the origin |
@@ -124,7 +115,7 @@ struct cell { | |||
124 | struct hlist_node list; | 115 | struct hlist_node list; |
125 | struct bio_prison *prison; | 116 | struct bio_prison *prison; |
126 | struct cell_key key; | 117 | struct cell_key key; |
127 | unsigned count; | 118 | struct bio *holder; |
128 | struct bio_list bios; | 119 | struct bio_list bios; |
129 | }; | 120 | }; |
130 | 121 | ||
@@ -220,54 +211,59 @@ static struct cell *__search_bucket(struct hlist_head *bucket, | |||
220 | * This may block if a new cell needs allocating. You must ensure that | 211 | * This may block if a new cell needs allocating. You must ensure that |
221 | * cells will be unlocked even if the calling thread is blocked. | 212 | * cells will be unlocked even if the calling thread is blocked. |
222 | * | 213 | * |
223 | * Returns the number of entries in the cell prior to the new addition | 214 | * Returns 1 if the cell was already held, 0 if @inmate is the new holder. |
224 | * or < 0 on failure. | ||
225 | */ | 215 | */ |
226 | static int bio_detain(struct bio_prison *prison, struct cell_key *key, | 216 | static int bio_detain(struct bio_prison *prison, struct cell_key *key, |
227 | struct bio *inmate, struct cell **ref) | 217 | struct bio *inmate, struct cell **ref) |
228 | { | 218 | { |
229 | int r; | 219 | int r = 1; |
230 | unsigned long flags; | 220 | unsigned long flags; |
231 | uint32_t hash = hash_key(prison, key); | 221 | uint32_t hash = hash_key(prison, key); |
232 | struct cell *uninitialized_var(cell), *cell2 = NULL; | 222 | struct cell *cell, *cell2; |
233 | 223 | ||
234 | BUG_ON(hash > prison->nr_buckets); | 224 | BUG_ON(hash > prison->nr_buckets); |
235 | 225 | ||
236 | spin_lock_irqsave(&prison->lock, flags); | 226 | spin_lock_irqsave(&prison->lock, flags); |
227 | |||
237 | cell = __search_bucket(prison->cells + hash, key); | 228 | cell = __search_bucket(prison->cells + hash, key); |
229 | if (cell) { | ||
230 | bio_list_add(&cell->bios, inmate); | ||
231 | goto out; | ||
232 | } | ||
238 | 233 | ||
239 | if (!cell) { | 234 | /* |
240 | /* | 235 | * Allocate a new cell |
241 | * Allocate a new cell | 236 | */ |
242 | */ | 237 | spin_unlock_irqrestore(&prison->lock, flags); |
243 | spin_unlock_irqrestore(&prison->lock, flags); | 238 | cell2 = mempool_alloc(prison->cell_pool, GFP_NOIO); |
244 | cell2 = mempool_alloc(prison->cell_pool, GFP_NOIO); | 239 | spin_lock_irqsave(&prison->lock, flags); |
245 | spin_lock_irqsave(&prison->lock, flags); | ||
246 | 240 | ||
247 | /* | 241 | /* |
248 | * We've been unlocked, so we have to double check that | 242 | * We've been unlocked, so we have to double check that |
249 | * nobody else has inserted this cell in the meantime. | 243 | * nobody else has inserted this cell in the meantime. |
250 | */ | 244 | */ |
251 | cell = __search_bucket(prison->cells + hash, key); | 245 | cell = __search_bucket(prison->cells + hash, key); |
246 | if (cell) { | ||
247 | mempool_free(cell2, prison->cell_pool); | ||
248 | bio_list_add(&cell->bios, inmate); | ||
249 | goto out; | ||
250 | } | ||
252 | 251 | ||
253 | if (!cell) { | 252 | /* |
254 | cell = cell2; | 253 | * Use new cell. |
255 | cell2 = NULL; | 254 | */ |
255 | cell = cell2; | ||
256 | 256 | ||
257 | cell->prison = prison; | 257 | cell->prison = prison; |
258 | memcpy(&cell->key, key, sizeof(cell->key)); | 258 | memcpy(&cell->key, key, sizeof(cell->key)); |
259 | cell->count = 0; | 259 | cell->holder = inmate; |
260 | bio_list_init(&cell->bios); | 260 | bio_list_init(&cell->bios); |
261 | hlist_add_head(&cell->list, prison->cells + hash); | 261 | hlist_add_head(&cell->list, prison->cells + hash); |
262 | } | ||
263 | } | ||
264 | 262 | ||
265 | r = cell->count++; | 263 | r = 0; |
266 | bio_list_add(&cell->bios, inmate); | ||
267 | spin_unlock_irqrestore(&prison->lock, flags); | ||
268 | 264 | ||
269 | if (cell2) | 265 | out: |
270 | mempool_free(cell2, prison->cell_pool); | 266 | spin_unlock_irqrestore(&prison->lock, flags); |
271 | 267 | ||
272 | *ref = cell; | 268 | *ref = cell; |
273 | 269 | ||
@@ -283,8 +279,8 @@ static void __cell_release(struct cell *cell, struct bio_list *inmates) | |||
283 | 279 | ||
284 | hlist_del(&cell->list); | 280 | hlist_del(&cell->list); |
285 | 281 | ||
286 | if (inmates) | 282 | bio_list_add(inmates, cell->holder); |
287 | bio_list_merge(inmates, &cell->bios); | 283 | bio_list_merge(inmates, &cell->bios); |
288 | 284 | ||
289 | mempool_free(cell, prison->cell_pool); | 285 | mempool_free(cell, prison->cell_pool); |
290 | } | 286 | } |
@@ -305,22 +301,44 @@ static void cell_release(struct cell *cell, struct bio_list *bios) | |||
305 | * bio may be in the cell. This function releases the cell, and also does | 301 | * bio may be in the cell. This function releases the cell, and also does |
306 | * a sanity check. | 302 | * a sanity check. |
307 | */ | 303 | */ |
304 | static void __cell_release_singleton(struct cell *cell, struct bio *bio) | ||
305 | { | ||
306 | hlist_del(&cell->list); | ||
307 | BUG_ON(cell->holder != bio); | ||
308 | BUG_ON(!bio_list_empty(&cell->bios)); | ||
309 | } | ||
310 | |||
308 | static void cell_release_singleton(struct cell *cell, struct bio *bio) | 311 | static void cell_release_singleton(struct cell *cell, struct bio *bio) |
309 | { | 312 | { |
310 | struct bio_prison *prison = cell->prison; | ||
311 | struct bio_list bios; | ||
312 | struct bio *b; | ||
313 | unsigned long flags; | 313 | unsigned long flags; |
314 | 314 | struct bio_prison *prison = cell->prison; | |
315 | bio_list_init(&bios); | ||
316 | 315 | ||
317 | spin_lock_irqsave(&prison->lock, flags); | 316 | spin_lock_irqsave(&prison->lock, flags); |
318 | __cell_release(cell, &bios); | 317 | __cell_release_singleton(cell, bio); |
319 | spin_unlock_irqrestore(&prison->lock, flags); | 318 | spin_unlock_irqrestore(&prison->lock, flags); |
319 | } | ||
320 | |||
321 | /* | ||
322 | * Sometimes we don't want the holder, just the additional bios. | ||
323 | */ | ||
324 | static void __cell_release_no_holder(struct cell *cell, struct bio_list *inmates) | ||
325 | { | ||
326 | struct bio_prison *prison = cell->prison; | ||
327 | |||
328 | hlist_del(&cell->list); | ||
329 | bio_list_merge(inmates, &cell->bios); | ||
320 | 330 | ||
321 | b = bio_list_pop(&bios); | 331 | mempool_free(cell, prison->cell_pool); |
322 | BUG_ON(b != bio); | 332 | } |
323 | BUG_ON(!bio_list_empty(&bios)); | 333 | |
334 | static void cell_release_no_holder(struct cell *cell, struct bio_list *inmates) | ||
335 | { | ||
336 | unsigned long flags; | ||
337 | struct bio_prison *prison = cell->prison; | ||
338 | |||
339 | spin_lock_irqsave(&prison->lock, flags); | ||
340 | __cell_release_no_holder(cell, inmates); | ||
341 | spin_unlock_irqrestore(&prison->lock, flags); | ||
324 | } | 342 | } |
325 | 343 | ||
326 | static void cell_error(struct cell *cell) | 344 | static void cell_error(struct cell *cell) |
@@ -471,6 +489,13 @@ static void build_virtual_key(struct dm_thin_device *td, dm_block_t b, | |||
471 | * devices. | 489 | * devices. |
472 | */ | 490 | */ |
473 | struct new_mapping; | 491 | struct new_mapping; |
492 | |||
493 | struct pool_features { | ||
494 | unsigned zero_new_blocks:1; | ||
495 | unsigned discard_enabled:1; | ||
496 | unsigned discard_passdown:1; | ||
497 | }; | ||
498 | |||
474 | struct pool { | 499 | struct pool { |
475 | struct list_head list; | 500 | struct list_head list; |
476 | struct dm_target *ti; /* Only set if a pool target is bound */ | 501 | struct dm_target *ti; /* Only set if a pool target is bound */ |
@@ -484,7 +509,7 @@ struct pool { | |||
484 | dm_block_t offset_mask; | 509 | dm_block_t offset_mask; |
485 | dm_block_t low_water_blocks; | 510 | dm_block_t low_water_blocks; |
486 | 511 | ||
487 | unsigned zero_new_blocks:1; | 512 | struct pool_features pf; |
488 | unsigned low_water_triggered:1; /* A dm event has been sent */ | 513 | unsigned low_water_triggered:1; /* A dm event has been sent */ |
489 | unsigned no_free_space:1; /* A -ENOSPC warning has been issued */ | 514 | unsigned no_free_space:1; /* A -ENOSPC warning has been issued */ |
490 | 515 | ||
@@ -493,17 +518,21 @@ struct pool { | |||
493 | 518 | ||
494 | struct workqueue_struct *wq; | 519 | struct workqueue_struct *wq; |
495 | struct work_struct worker; | 520 | struct work_struct worker; |
521 | struct delayed_work waker; | ||
496 | 522 | ||
497 | unsigned ref_count; | 523 | unsigned ref_count; |
524 | unsigned long last_commit_jiffies; | ||
498 | 525 | ||
499 | spinlock_t lock; | 526 | spinlock_t lock; |
500 | struct bio_list deferred_bios; | 527 | struct bio_list deferred_bios; |
501 | struct bio_list deferred_flush_bios; | 528 | struct bio_list deferred_flush_bios; |
502 | struct list_head prepared_mappings; | 529 | struct list_head prepared_mappings; |
530 | struct list_head prepared_discards; | ||
503 | 531 | ||
504 | struct bio_list retry_on_resume_list; | 532 | struct bio_list retry_on_resume_list; |
505 | 533 | ||
506 | struct deferred_set ds; /* FIXME: move to thin_c */ | 534 | struct deferred_set shared_read_ds; |
535 | struct deferred_set all_io_ds; | ||
507 | 536 | ||
508 | struct new_mapping *next_mapping; | 537 | struct new_mapping *next_mapping; |
509 | mempool_t *mapping_pool; | 538 | mempool_t *mapping_pool; |
@@ -521,7 +550,7 @@ struct pool_c { | |||
521 | struct dm_target_callbacks callbacks; | 550 | struct dm_target_callbacks callbacks; |
522 | 551 | ||
523 | dm_block_t low_water_blocks; | 552 | dm_block_t low_water_blocks; |
524 | unsigned zero_new_blocks:1; | 553 | struct pool_features pf; |
525 | }; | 554 | }; |
526 | 555 | ||
527 | /* | 556 | /* |
@@ -529,6 +558,7 @@ struct pool_c { | |||
529 | */ | 558 | */ |
530 | struct thin_c { | 559 | struct thin_c { |
531 | struct dm_dev *pool_dev; | 560 | struct dm_dev *pool_dev; |
561 | struct dm_dev *origin_dev; | ||
532 | dm_thin_id dev_id; | 562 | dm_thin_id dev_id; |
533 | 563 | ||
534 | struct pool *pool; | 564 | struct pool *pool; |
@@ -597,6 +627,13 @@ static struct pool *__pool_table_lookup_metadata_dev(struct block_device *md_dev | |||
597 | 627 | ||
598 | /*----------------------------------------------------------------*/ | 628 | /*----------------------------------------------------------------*/ |
599 | 629 | ||
630 | struct endio_hook { | ||
631 | struct thin_c *tc; | ||
632 | struct deferred_entry *shared_read_entry; | ||
633 | struct deferred_entry *all_io_entry; | ||
634 | struct new_mapping *overwrite_mapping; | ||
635 | }; | ||
636 | |||
600 | static void __requeue_bio_list(struct thin_c *tc, struct bio_list *master) | 637 | static void __requeue_bio_list(struct thin_c *tc, struct bio_list *master) |
601 | { | 638 | { |
602 | struct bio *bio; | 639 | struct bio *bio; |
@@ -607,7 +644,8 @@ static void __requeue_bio_list(struct thin_c *tc, struct bio_list *master) | |||
607 | bio_list_init(master); | 644 | bio_list_init(master); |
608 | 645 | ||
609 | while ((bio = bio_list_pop(&bios))) { | 646 | while ((bio = bio_list_pop(&bios))) { |
610 | if (dm_get_mapinfo(bio)->ptr == tc) | 647 | struct endio_hook *h = dm_get_mapinfo(bio)->ptr; |
648 | if (h->tc == tc) | ||
611 | bio_endio(bio, DM_ENDIO_REQUEUE); | 649 | bio_endio(bio, DM_ENDIO_REQUEUE); |
612 | else | 650 | else |
613 | bio_list_add(master, bio); | 651 | bio_list_add(master, bio); |
@@ -646,14 +684,16 @@ static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block) | |||
646 | (bio->bi_sector & pool->offset_mask); | 684 | (bio->bi_sector & pool->offset_mask); |
647 | } | 685 | } |
648 | 686 | ||
649 | static void remap_and_issue(struct thin_c *tc, struct bio *bio, | 687 | static void remap_to_origin(struct thin_c *tc, struct bio *bio) |
650 | dm_block_t block) | 688 | { |
689 | bio->bi_bdev = tc->origin_dev->bdev; | ||
690 | } | ||
691 | |||
692 | static void issue(struct thin_c *tc, struct bio *bio) | ||
651 | { | 693 | { |
652 | struct pool *pool = tc->pool; | 694 | struct pool *pool = tc->pool; |
653 | unsigned long flags; | 695 | unsigned long flags; |
654 | 696 | ||
655 | remap(tc, bio, block); | ||
656 | |||
657 | /* | 697 | /* |
658 | * Batch together any FUA/FLUSH bios we find and then issue | 698 | * Batch together any FUA/FLUSH bios we find and then issue |
659 | * a single commit for them in process_deferred_bios(). | 699 | * a single commit for them in process_deferred_bios(). |
@@ -666,6 +706,19 @@ static void remap_and_issue(struct thin_c *tc, struct bio *bio, | |||
666 | generic_make_request(bio); | 706 | generic_make_request(bio); |
667 | } | 707 | } |
668 | 708 | ||
709 | static void remap_to_origin_and_issue(struct thin_c *tc, struct bio *bio) | ||
710 | { | ||
711 | remap_to_origin(tc, bio); | ||
712 | issue(tc, bio); | ||
713 | } | ||
714 | |||
715 | static void remap_and_issue(struct thin_c *tc, struct bio *bio, | ||
716 | dm_block_t block) | ||
717 | { | ||
718 | remap(tc, bio, block); | ||
719 | issue(tc, bio); | ||
720 | } | ||
721 | |||
669 | /* | 722 | /* |
670 | * wake_worker() is used when new work is queued and when pool_resume is | 723 | * wake_worker() is used when new work is queued and when pool_resume is |
671 | * ready to continue deferred IO processing. | 724 | * ready to continue deferred IO processing. |
@@ -680,21 +733,17 @@ static void wake_worker(struct pool *pool) | |||
680 | /* | 733 | /* |
681 | * Bio endio functions. | 734 | * Bio endio functions. |
682 | */ | 735 | */ |
683 | struct endio_hook { | ||
684 | struct thin_c *tc; | ||
685 | bio_end_io_t *saved_bi_end_io; | ||
686 | struct deferred_entry *entry; | ||
687 | }; | ||
688 | |||
689 | struct new_mapping { | 736 | struct new_mapping { |
690 | struct list_head list; | 737 | struct list_head list; |
691 | 738 | ||
692 | int prepared; | 739 | unsigned quiesced:1; |
740 | unsigned prepared:1; | ||
741 | unsigned pass_discard:1; | ||
693 | 742 | ||
694 | struct thin_c *tc; | 743 | struct thin_c *tc; |
695 | dm_block_t virt_block; | 744 | dm_block_t virt_block; |
696 | dm_block_t data_block; | 745 | dm_block_t data_block; |
697 | struct cell *cell; | 746 | struct cell *cell, *cell2; |
698 | int err; | 747 | int err; |
699 | 748 | ||
700 | /* | 749 | /* |
@@ -711,7 +760,7 @@ static void __maybe_add_mapping(struct new_mapping *m) | |||
711 | { | 760 | { |
712 | struct pool *pool = m->tc->pool; | 761 | struct pool *pool = m->tc->pool; |
713 | 762 | ||
714 | if (list_empty(&m->list) && m->prepared) { | 763 | if (m->quiesced && m->prepared) { |
715 | list_add(&m->list, &pool->prepared_mappings); | 764 | list_add(&m->list, &pool->prepared_mappings); |
716 | wake_worker(pool); | 765 | wake_worker(pool); |
717 | } | 766 | } |
@@ -734,7 +783,8 @@ static void copy_complete(int read_err, unsigned long write_err, void *context) | |||
734 | static void overwrite_endio(struct bio *bio, int err) | 783 | static void overwrite_endio(struct bio *bio, int err) |
735 | { | 784 | { |
736 | unsigned long flags; | 785 | unsigned long flags; |
737 | struct new_mapping *m = dm_get_mapinfo(bio)->ptr; | 786 | struct endio_hook *h = dm_get_mapinfo(bio)->ptr; |
787 | struct new_mapping *m = h->overwrite_mapping; | ||
738 | struct pool *pool = m->tc->pool; | 788 | struct pool *pool = m->tc->pool; |
739 | 789 | ||
740 | m->err = err; | 790 | m->err = err; |
@@ -745,31 +795,6 @@ static void overwrite_endio(struct bio *bio, int err) | |||
745 | spin_unlock_irqrestore(&pool->lock, flags); | 795 | spin_unlock_irqrestore(&pool->lock, flags); |
746 | } | 796 | } |
747 | 797 | ||
748 | static void shared_read_endio(struct bio *bio, int err) | ||
749 | { | ||
750 | struct list_head mappings; | ||
751 | struct new_mapping *m, *tmp; | ||
752 | struct endio_hook *h = dm_get_mapinfo(bio)->ptr; | ||
753 | unsigned long flags; | ||
754 | struct pool *pool = h->tc->pool; | ||
755 | |||
756 | bio->bi_end_io = h->saved_bi_end_io; | ||
757 | bio_endio(bio, err); | ||
758 | |||
759 | INIT_LIST_HEAD(&mappings); | ||
760 | ds_dec(h->entry, &mappings); | ||
761 | |||
762 | spin_lock_irqsave(&pool->lock, flags); | ||
763 | list_for_each_entry_safe(m, tmp, &mappings, list) { | ||
764 | list_del(&m->list); | ||
765 | INIT_LIST_HEAD(&m->list); | ||
766 | __maybe_add_mapping(m); | ||
767 | } | ||
768 | spin_unlock_irqrestore(&pool->lock, flags); | ||
769 | |||
770 | mempool_free(h, pool->endio_hook_pool); | ||
771 | } | ||
772 | |||
773 | /*----------------------------------------------------------------*/ | 798 | /*----------------------------------------------------------------*/ |
774 | 799 | ||
775 | /* | 800 | /* |
@@ -800,21 +825,16 @@ static void cell_defer(struct thin_c *tc, struct cell *cell, | |||
800 | * Same as cell_defer above, except it omits one particular detainee, | 825 | * Same as cell_defer above, except it omits one particular detainee, |
801 | * a write bio that covers the block and has already been processed. | 826 | * a write bio that covers the block and has already been processed. |
802 | */ | 827 | */ |
803 | static void cell_defer_except(struct thin_c *tc, struct cell *cell, | 828 | static void cell_defer_except(struct thin_c *tc, struct cell *cell) |
804 | struct bio *exception) | ||
805 | { | 829 | { |
806 | struct bio_list bios; | 830 | struct bio_list bios; |
807 | struct bio *bio; | ||
808 | struct pool *pool = tc->pool; | 831 | struct pool *pool = tc->pool; |
809 | unsigned long flags; | 832 | unsigned long flags; |
810 | 833 | ||
811 | bio_list_init(&bios); | 834 | bio_list_init(&bios); |
812 | cell_release(cell, &bios); | ||
813 | 835 | ||
814 | spin_lock_irqsave(&pool->lock, flags); | 836 | spin_lock_irqsave(&pool->lock, flags); |
815 | while ((bio = bio_list_pop(&bios))) | 837 | cell_release_no_holder(cell, &pool->deferred_bios); |
816 | if (bio != exception) | ||
817 | bio_list_add(&pool->deferred_bios, bio); | ||
818 | spin_unlock_irqrestore(&pool->lock, flags); | 838 | spin_unlock_irqrestore(&pool->lock, flags); |
819 | 839 | ||
820 | wake_worker(pool); | 840 | wake_worker(pool); |
@@ -854,7 +874,7 @@ static void process_prepared_mapping(struct new_mapping *m) | |||
854 | * the bios in the cell. | 874 | * the bios in the cell. |
855 | */ | 875 | */ |
856 | if (bio) { | 876 | if (bio) { |
857 | cell_defer_except(tc, m->cell, bio); | 877 | cell_defer_except(tc, m->cell); |
858 | bio_endio(bio, 0); | 878 | bio_endio(bio, 0); |
859 | } else | 879 | } else |
860 | cell_defer(tc, m->cell, m->data_block); | 880 | cell_defer(tc, m->cell, m->data_block); |
@@ -863,7 +883,30 @@ static void process_prepared_mapping(struct new_mapping *m) | |||
863 | mempool_free(m, tc->pool->mapping_pool); | 883 | mempool_free(m, tc->pool->mapping_pool); |
864 | } | 884 | } |
865 | 885 | ||
866 | static void process_prepared_mappings(struct pool *pool) | 886 | static void process_prepared_discard(struct new_mapping *m) |
887 | { | ||
888 | int r; | ||
889 | struct thin_c *tc = m->tc; | ||
890 | |||
891 | r = dm_thin_remove_block(tc->td, m->virt_block); | ||
892 | if (r) | ||
893 | DMERR("dm_thin_remove_block() failed"); | ||
894 | |||
895 | /* | ||
896 | * Pass the discard down to the underlying device? | ||
897 | */ | ||
898 | if (m->pass_discard) | ||
899 | remap_and_issue(tc, m->bio, m->data_block); | ||
900 | else | ||
901 | bio_endio(m->bio, 0); | ||
902 | |||
903 | cell_defer_except(tc, m->cell); | ||
904 | cell_defer_except(tc, m->cell2); | ||
905 | mempool_free(m, tc->pool->mapping_pool); | ||
906 | } | ||
907 | |||
908 | static void process_prepared(struct pool *pool, struct list_head *head, | ||
909 | void (*fn)(struct new_mapping *)) | ||
867 | { | 910 | { |
868 | unsigned long flags; | 911 | unsigned long flags; |
869 | struct list_head maps; | 912 | struct list_head maps; |
@@ -871,21 +914,27 @@ static void process_prepared_mappings(struct pool *pool) | |||
871 | 914 | ||
872 | INIT_LIST_HEAD(&maps); | 915 | INIT_LIST_HEAD(&maps); |
873 | spin_lock_irqsave(&pool->lock, flags); | 916 | spin_lock_irqsave(&pool->lock, flags); |
874 | list_splice_init(&pool->prepared_mappings, &maps); | 917 | list_splice_init(head, &maps); |
875 | spin_unlock_irqrestore(&pool->lock, flags); | 918 | spin_unlock_irqrestore(&pool->lock, flags); |
876 | 919 | ||
877 | list_for_each_entry_safe(m, tmp, &maps, list) | 920 | list_for_each_entry_safe(m, tmp, &maps, list) |
878 | process_prepared_mapping(m); | 921 | fn(m); |
879 | } | 922 | } |
880 | 923 | ||
881 | /* | 924 | /* |
882 | * Deferred bio jobs. | 925 | * Deferred bio jobs. |
883 | */ | 926 | */ |
884 | static int io_overwrites_block(struct pool *pool, struct bio *bio) | 927 | static int io_overlaps_block(struct pool *pool, struct bio *bio) |
885 | { | 928 | { |
886 | return ((bio_data_dir(bio) == WRITE) && | 929 | return !(bio->bi_sector & pool->offset_mask) && |
887 | !(bio->bi_sector & pool->offset_mask)) && | ||
888 | (bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT)); | 930 | (bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT)); |
931 | |||
932 | } | ||
933 | |||
934 | static int io_overwrites_block(struct pool *pool, struct bio *bio) | ||
935 | { | ||
936 | return (bio_data_dir(bio) == WRITE) && | ||
937 | io_overlaps_block(pool, bio); | ||
889 | } | 938 | } |
890 | 939 | ||
891 | static void save_and_set_endio(struct bio *bio, bio_end_io_t **save, | 940 | static void save_and_set_endio(struct bio *bio, bio_end_io_t **save, |
@@ -917,7 +966,8 @@ static struct new_mapping *get_next_mapping(struct pool *pool) | |||
917 | } | 966 | } |
918 | 967 | ||
919 | static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, | 968 | static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, |
920 | dm_block_t data_origin, dm_block_t data_dest, | 969 | struct dm_dev *origin, dm_block_t data_origin, |
970 | dm_block_t data_dest, | ||
921 | struct cell *cell, struct bio *bio) | 971 | struct cell *cell, struct bio *bio) |
922 | { | 972 | { |
923 | int r; | 973 | int r; |
@@ -925,6 +975,7 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, | |||
925 | struct new_mapping *m = get_next_mapping(pool); | 975 | struct new_mapping *m = get_next_mapping(pool); |
926 | 976 | ||
927 | INIT_LIST_HEAD(&m->list); | 977 | INIT_LIST_HEAD(&m->list); |
978 | m->quiesced = 0; | ||
928 | m->prepared = 0; | 979 | m->prepared = 0; |
929 | m->tc = tc; | 980 | m->tc = tc; |
930 | m->virt_block = virt_block; | 981 | m->virt_block = virt_block; |
@@ -933,7 +984,8 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, | |||
933 | m->err = 0; | 984 | m->err = 0; |
934 | m->bio = NULL; | 985 | m->bio = NULL; |
935 | 986 | ||
936 | ds_add_work(&pool->ds, &m->list); | 987 | if (!ds_add_work(&pool->shared_read_ds, &m->list)) |
988 | m->quiesced = 1; | ||
937 | 989 | ||
938 | /* | 990 | /* |
939 | * IO to pool_dev remaps to the pool target's data_dev. | 991 | * IO to pool_dev remaps to the pool target's data_dev. |
@@ -942,14 +994,15 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, | |||
942 | * bio immediately. Otherwise we use kcopyd to clone the data first. | 994 | * bio immediately. Otherwise we use kcopyd to clone the data first. |
943 | */ | 995 | */ |
944 | if (io_overwrites_block(pool, bio)) { | 996 | if (io_overwrites_block(pool, bio)) { |
997 | struct endio_hook *h = dm_get_mapinfo(bio)->ptr; | ||
998 | h->overwrite_mapping = m; | ||
945 | m->bio = bio; | 999 | m->bio = bio; |
946 | save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio); | 1000 | save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio); |
947 | dm_get_mapinfo(bio)->ptr = m; | ||
948 | remap_and_issue(tc, bio, data_dest); | 1001 | remap_and_issue(tc, bio, data_dest); |
949 | } else { | 1002 | } else { |
950 | struct dm_io_region from, to; | 1003 | struct dm_io_region from, to; |
951 | 1004 | ||
952 | from.bdev = tc->pool_dev->bdev; | 1005 | from.bdev = origin->bdev; |
953 | from.sector = data_origin * pool->sectors_per_block; | 1006 | from.sector = data_origin * pool->sectors_per_block; |
954 | from.count = pool->sectors_per_block; | 1007 | from.count = pool->sectors_per_block; |
955 | 1008 | ||
@@ -967,6 +1020,22 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, | |||
967 | } | 1020 | } |
968 | } | 1021 | } |
969 | 1022 | ||
1023 | static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block, | ||
1024 | dm_block_t data_origin, dm_block_t data_dest, | ||
1025 | struct cell *cell, struct bio *bio) | ||
1026 | { | ||
1027 | schedule_copy(tc, virt_block, tc->pool_dev, | ||
1028 | data_origin, data_dest, cell, bio); | ||
1029 | } | ||
1030 | |||
1031 | static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block, | ||
1032 | dm_block_t data_dest, | ||
1033 | struct cell *cell, struct bio *bio) | ||
1034 | { | ||
1035 | schedule_copy(tc, virt_block, tc->origin_dev, | ||
1036 | virt_block, data_dest, cell, bio); | ||
1037 | } | ||
1038 | |||
970 | static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, | 1039 | static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, |
971 | dm_block_t data_block, struct cell *cell, | 1040 | dm_block_t data_block, struct cell *cell, |
972 | struct bio *bio) | 1041 | struct bio *bio) |
@@ -975,6 +1044,7 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, | |||
975 | struct new_mapping *m = get_next_mapping(pool); | 1044 | struct new_mapping *m = get_next_mapping(pool); |
976 | 1045 | ||
977 | INIT_LIST_HEAD(&m->list); | 1046 | INIT_LIST_HEAD(&m->list); |
1047 | m->quiesced = 1; | ||
978 | m->prepared = 0; | 1048 | m->prepared = 0; |
979 | m->tc = tc; | 1049 | m->tc = tc; |
980 | m->virt_block = virt_block; | 1050 | m->virt_block = virt_block; |
@@ -988,13 +1058,14 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, | |||
988 | * zeroing pre-existing data, we can issue the bio immediately. | 1058 | * zeroing pre-existing data, we can issue the bio immediately. |
989 | * Otherwise we use kcopyd to zero the data first. | 1059 | * Otherwise we use kcopyd to zero the data first. |
990 | */ | 1060 | */ |
991 | if (!pool->zero_new_blocks) | 1061 | if (!pool->pf.zero_new_blocks) |
992 | process_prepared_mapping(m); | 1062 | process_prepared_mapping(m); |
993 | 1063 | ||
994 | else if (io_overwrites_block(pool, bio)) { | 1064 | else if (io_overwrites_block(pool, bio)) { |
1065 | struct endio_hook *h = dm_get_mapinfo(bio)->ptr; | ||
1066 | h->overwrite_mapping = m; | ||
995 | m->bio = bio; | 1067 | m->bio = bio; |
996 | save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio); | 1068 | save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio); |
997 | dm_get_mapinfo(bio)->ptr = m; | ||
998 | remap_and_issue(tc, bio, data_block); | 1069 | remap_and_issue(tc, bio, data_block); |
999 | 1070 | ||
1000 | } else { | 1071 | } else { |
@@ -1081,7 +1152,8 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result) | |||
1081 | */ | 1152 | */ |
1082 | static void retry_on_resume(struct bio *bio) | 1153 | static void retry_on_resume(struct bio *bio) |
1083 | { | 1154 | { |
1084 | struct thin_c *tc = dm_get_mapinfo(bio)->ptr; | 1155 | struct endio_hook *h = dm_get_mapinfo(bio)->ptr; |
1156 | struct thin_c *tc = h->tc; | ||
1085 | struct pool *pool = tc->pool; | 1157 | struct pool *pool = tc->pool; |
1086 | unsigned long flags; | 1158 | unsigned long flags; |
1087 | 1159 | ||
@@ -1102,6 +1174,86 @@ static void no_space(struct cell *cell) | |||
1102 | retry_on_resume(bio); | 1174 | retry_on_resume(bio); |
1103 | } | 1175 | } |
1104 | 1176 | ||
1177 | static void process_discard(struct thin_c *tc, struct bio *bio) | ||
1178 | { | ||
1179 | int r; | ||
1180 | struct pool *pool = tc->pool; | ||
1181 | struct cell *cell, *cell2; | ||
1182 | struct cell_key key, key2; | ||
1183 | dm_block_t block = get_bio_block(tc, bio); | ||
1184 | struct dm_thin_lookup_result lookup_result; | ||
1185 | struct new_mapping *m; | ||
1186 | |||
1187 | build_virtual_key(tc->td, block, &key); | ||
1188 | if (bio_detain(tc->pool->prison, &key, bio, &cell)) | ||
1189 | return; | ||
1190 | |||
1191 | r = dm_thin_find_block(tc->td, block, 1, &lookup_result); | ||
1192 | switch (r) { | ||
1193 | case 0: | ||
1194 | /* | ||
1195 | * Check nobody is fiddling with this pool block. This can | ||
1196 | * happen if someone's in the process of breaking sharing | ||
1197 | * on this block. | ||
1198 | */ | ||
1199 | build_data_key(tc->td, lookup_result.block, &key2); | ||
1200 | if (bio_detain(tc->pool->prison, &key2, bio, &cell2)) { | ||
1201 | cell_release_singleton(cell, bio); | ||
1202 | break; | ||
1203 | } | ||
1204 | |||
1205 | if (io_overlaps_block(pool, bio)) { | ||
1206 | /* | ||
1207 | * IO may still be going to the destination block. We must | ||
1208 | * quiesce before we can do the removal. | ||
1209 | */ | ||
1210 | m = get_next_mapping(pool); | ||
1211 | m->tc = tc; | ||
1212 | m->pass_discard = (!lookup_result.shared) & pool->pf.discard_passdown; | ||
1213 | m->virt_block = block; | ||
1214 | m->data_block = lookup_result.block; | ||
1215 | m->cell = cell; | ||
1216 | m->cell2 = cell2; | ||
1217 | m->err = 0; | ||
1218 | m->bio = bio; | ||
1219 | |||
1220 | if (!ds_add_work(&pool->all_io_ds, &m->list)) { | ||
1221 | list_add(&m->list, &pool->prepared_discards); | ||
1222 | wake_worker(pool); | ||
1223 | } | ||
1224 | } else { | ||
1225 | /* | ||
1226 | * This path is hit if people are ignoring | ||
1227 | * limits->discard_granularity. It ignores any | ||
1228 | * part of the discard that is in a subsequent | ||
1229 | * block. | ||
1230 | */ | ||
1231 | sector_t offset = bio->bi_sector - (block << pool->block_shift); | ||
1232 | unsigned remaining = (pool->sectors_per_block - offset) << 9; | ||
1233 | bio->bi_size = min(bio->bi_size, remaining); | ||
1234 | |||
1235 | cell_release_singleton(cell, bio); | ||
1236 | cell_release_singleton(cell2, bio); | ||
1237 | remap_and_issue(tc, bio, lookup_result.block); | ||
1238 | } | ||
1239 | break; | ||
1240 | |||
1241 | case -ENODATA: | ||
1242 | /* | ||
1243 | * It isn't provisioned, just forget it. | ||
1244 | */ | ||
1245 | cell_release_singleton(cell, bio); | ||
1246 | bio_endio(bio, 0); | ||
1247 | break; | ||
1248 | |||
1249 | default: | ||
1250 | DMERR("discard: find block unexpectedly returned %d", r); | ||
1251 | cell_release_singleton(cell, bio); | ||
1252 | bio_io_error(bio); | ||
1253 | break; | ||
1254 | } | ||
1255 | } | ||
1256 | |||
1105 | static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block, | 1257 | static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block, |
1106 | struct cell_key *key, | 1258 | struct cell_key *key, |
1107 | struct dm_thin_lookup_result *lookup_result, | 1259 | struct dm_thin_lookup_result *lookup_result, |
@@ -1113,8 +1265,8 @@ static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block, | |||
1113 | r = alloc_data_block(tc, &data_block); | 1265 | r = alloc_data_block(tc, &data_block); |
1114 | switch (r) { | 1266 | switch (r) { |
1115 | case 0: | 1267 | case 0: |
1116 | schedule_copy(tc, block, lookup_result->block, | 1268 | schedule_internal_copy(tc, block, lookup_result->block, |
1117 | data_block, cell, bio); | 1269 | data_block, cell, bio); |
1118 | break; | 1270 | break; |
1119 | 1271 | ||
1120 | case -ENOSPC: | 1272 | case -ENOSPC: |
@@ -1147,13 +1299,9 @@ static void process_shared_bio(struct thin_c *tc, struct bio *bio, | |||
1147 | if (bio_data_dir(bio) == WRITE) | 1299 | if (bio_data_dir(bio) == WRITE) |
1148 | break_sharing(tc, bio, block, &key, lookup_result, cell); | 1300 | break_sharing(tc, bio, block, &key, lookup_result, cell); |
1149 | else { | 1301 | else { |
1150 | struct endio_hook *h; | 1302 | struct endio_hook *h = dm_get_mapinfo(bio)->ptr; |
1151 | h = mempool_alloc(pool->endio_hook_pool, GFP_NOIO); | ||
1152 | 1303 | ||
1153 | h->tc = tc; | 1304 | h->shared_read_entry = ds_inc(&pool->shared_read_ds); |
1154 | h->entry = ds_inc(&pool->ds); | ||
1155 | save_and_set_endio(bio, &h->saved_bi_end_io, shared_read_endio); | ||
1156 | dm_get_mapinfo(bio)->ptr = h; | ||
1157 | 1305 | ||
1158 | cell_release_singleton(cell, bio); | 1306 | cell_release_singleton(cell, bio); |
1159 | remap_and_issue(tc, bio, lookup_result->block); | 1307 | remap_and_issue(tc, bio, lookup_result->block); |
@@ -1188,7 +1336,10 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block | |||
1188 | r = alloc_data_block(tc, &data_block); | 1336 | r = alloc_data_block(tc, &data_block); |
1189 | switch (r) { | 1337 | switch (r) { |
1190 | case 0: | 1338 | case 0: |
1191 | schedule_zero(tc, block, data_block, cell, bio); | 1339 | if (tc->origin_dev) |
1340 | schedule_external_copy(tc, block, data_block, cell, bio); | ||
1341 | else | ||
1342 | schedule_zero(tc, block, data_block, cell, bio); | ||
1192 | break; | 1343 | break; |
1193 | 1344 | ||
1194 | case -ENOSPC: | 1345 | case -ENOSPC: |
@@ -1239,16 +1390,27 @@ static void process_bio(struct thin_c *tc, struct bio *bio) | |||
1239 | break; | 1390 | break; |
1240 | 1391 | ||
1241 | case -ENODATA: | 1392 | case -ENODATA: |
1242 | provision_block(tc, bio, block, cell); | 1393 | if (bio_data_dir(bio) == READ && tc->origin_dev) { |
1394 | cell_release_singleton(cell, bio); | ||
1395 | remap_to_origin_and_issue(tc, bio); | ||
1396 | } else | ||
1397 | provision_block(tc, bio, block, cell); | ||
1243 | break; | 1398 | break; |
1244 | 1399 | ||
1245 | default: | 1400 | default: |
1246 | DMERR("dm_thin_find_block() failed, error = %d", r); | 1401 | DMERR("dm_thin_find_block() failed, error = %d", r); |
1402 | cell_release_singleton(cell, bio); | ||
1247 | bio_io_error(bio); | 1403 | bio_io_error(bio); |
1248 | break; | 1404 | break; |
1249 | } | 1405 | } |
1250 | } | 1406 | } |
1251 | 1407 | ||
1408 | static int need_commit_due_to_time(struct pool *pool) | ||
1409 | { | ||
1410 | return jiffies < pool->last_commit_jiffies || | ||
1411 | jiffies > pool->last_commit_jiffies + COMMIT_PERIOD; | ||
1412 | } | ||
1413 | |||
1252 | static void process_deferred_bios(struct pool *pool) | 1414 | static void process_deferred_bios(struct pool *pool) |
1253 | { | 1415 | { |
1254 | unsigned long flags; | 1416 | unsigned long flags; |
@@ -1264,7 +1426,9 @@ static void process_deferred_bios(struct pool *pool) | |||
1264 | spin_unlock_irqrestore(&pool->lock, flags); | 1426 | spin_unlock_irqrestore(&pool->lock, flags); |
1265 | 1427 | ||
1266 | while ((bio = bio_list_pop(&bios))) { | 1428 | while ((bio = bio_list_pop(&bios))) { |
1267 | struct thin_c *tc = dm_get_mapinfo(bio)->ptr; | 1429 | struct endio_hook *h = dm_get_mapinfo(bio)->ptr; |
1430 | struct thin_c *tc = h->tc; | ||
1431 | |||
1268 | /* | 1432 | /* |
1269 | * If we've got no free new_mapping structs, and processing | 1433 | * If we've got no free new_mapping structs, and processing |
1270 | * this bio might require one, we pause until there are some | 1434 | * this bio might require one, we pause until there are some |
@@ -1277,7 +1441,11 @@ static void process_deferred_bios(struct pool *pool) | |||
1277 | 1441 | ||
1278 | break; | 1442 | break; |
1279 | } | 1443 | } |
1280 | process_bio(tc, bio); | 1444 | |
1445 | if (bio->bi_rw & REQ_DISCARD) | ||
1446 | process_discard(tc, bio); | ||
1447 | else | ||
1448 | process_bio(tc, bio); | ||
1281 | } | 1449 | } |
1282 | 1450 | ||
1283 | /* | 1451 | /* |
@@ -1290,7 +1458,7 @@ static void process_deferred_bios(struct pool *pool) | |||
1290 | bio_list_init(&pool->deferred_flush_bios); | 1458 | bio_list_init(&pool->deferred_flush_bios); |
1291 | spin_unlock_irqrestore(&pool->lock, flags); | 1459 | spin_unlock_irqrestore(&pool->lock, flags); |
1292 | 1460 | ||
1293 | if (bio_list_empty(&bios)) | 1461 | if (bio_list_empty(&bios) && !need_commit_due_to_time(pool)) |
1294 | return; | 1462 | return; |
1295 | 1463 | ||
1296 | r = dm_pool_commit_metadata(pool->pmd); | 1464 | r = dm_pool_commit_metadata(pool->pmd); |
@@ -1301,6 +1469,7 @@ static void process_deferred_bios(struct pool *pool) | |||
1301 | bio_io_error(bio); | 1469 | bio_io_error(bio); |
1302 | return; | 1470 | return; |
1303 | } | 1471 | } |
1472 | pool->last_commit_jiffies = jiffies; | ||
1304 | 1473 | ||
1305 | while ((bio = bio_list_pop(&bios))) | 1474 | while ((bio = bio_list_pop(&bios))) |
1306 | generic_make_request(bio); | 1475 | generic_make_request(bio); |
@@ -1310,10 +1479,22 @@ static void do_worker(struct work_struct *ws) | |||
1310 | { | 1479 | { |
1311 | struct pool *pool = container_of(ws, struct pool, worker); | 1480 | struct pool *pool = container_of(ws, struct pool, worker); |
1312 | 1481 | ||
1313 | process_prepared_mappings(pool); | 1482 | process_prepared(pool, &pool->prepared_mappings, process_prepared_mapping); |
1483 | process_prepared(pool, &pool->prepared_discards, process_prepared_discard); | ||
1314 | process_deferred_bios(pool); | 1484 | process_deferred_bios(pool); |
1315 | } | 1485 | } |
1316 | 1486 | ||
1487 | /* | ||
1488 | * We want to commit periodically so that not too much | ||
1489 | * unwritten data builds up. | ||
1490 | */ | ||
1491 | static void do_waker(struct work_struct *ws) | ||
1492 | { | ||
1493 | struct pool *pool = container_of(to_delayed_work(ws), struct pool, waker); | ||
1494 | wake_worker(pool); | ||
1495 | queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD); | ||
1496 | } | ||
1497 | |||
1317 | /*----------------------------------------------------------------*/ | 1498 | /*----------------------------------------------------------------*/ |
1318 | 1499 | ||
1319 | /* | 1500 | /* |
@@ -1335,6 +1516,19 @@ static void thin_defer_bio(struct thin_c *tc, struct bio *bio) | |||
1335 | wake_worker(pool); | 1516 | wake_worker(pool); |
1336 | } | 1517 | } |
1337 | 1518 | ||
1519 | static struct endio_hook *thin_hook_bio(struct thin_c *tc, struct bio *bio) | ||
1520 | { | ||
1521 | struct pool *pool = tc->pool; | ||
1522 | struct endio_hook *h = mempool_alloc(pool->endio_hook_pool, GFP_NOIO); | ||
1523 | |||
1524 | h->tc = tc; | ||
1525 | h->shared_read_entry = NULL; | ||
1526 | h->all_io_entry = bio->bi_rw & REQ_DISCARD ? NULL : ds_inc(&pool->all_io_ds); | ||
1527 | h->overwrite_mapping = NULL; | ||
1528 | |||
1529 | return h; | ||
1530 | } | ||
1531 | |||
1338 | /* | 1532 | /* |
1339 | * Non-blocking function called from the thin target's map function. | 1533 | * Non-blocking function called from the thin target's map function. |
1340 | */ | 1534 | */ |
@@ -1347,12 +1541,8 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, | |||
1347 | struct dm_thin_device *td = tc->td; | 1541 | struct dm_thin_device *td = tc->td; |
1348 | struct dm_thin_lookup_result result; | 1542 | struct dm_thin_lookup_result result; |
1349 | 1543 | ||
1350 | /* | 1544 | map_context->ptr = thin_hook_bio(tc, bio); |
1351 | * Save the thin context for easy access from the deferred bio later. | 1545 | if (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA)) { |
1352 | */ | ||
1353 | map_context->ptr = tc; | ||
1354 | |||
1355 | if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { | ||
1356 | thin_defer_bio(tc, bio); | 1546 | thin_defer_bio(tc, bio); |
1357 | return DM_MAPIO_SUBMITTED; | 1547 | return DM_MAPIO_SUBMITTED; |
1358 | } | 1548 | } |
@@ -1434,7 +1624,7 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti) | |||
1434 | 1624 | ||
1435 | pool->ti = ti; | 1625 | pool->ti = ti; |
1436 | pool->low_water_blocks = pt->low_water_blocks; | 1626 | pool->low_water_blocks = pt->low_water_blocks; |
1437 | pool->zero_new_blocks = pt->zero_new_blocks; | 1627 | pool->pf = pt->pf; |
1438 | 1628 | ||
1439 | return 0; | 1629 | return 0; |
1440 | } | 1630 | } |
@@ -1448,6 +1638,14 @@ static void unbind_control_target(struct pool *pool, struct dm_target *ti) | |||
1448 | /*---------------------------------------------------------------- | 1638 | /*---------------------------------------------------------------- |
1449 | * Pool creation | 1639 | * Pool creation |
1450 | *--------------------------------------------------------------*/ | 1640 | *--------------------------------------------------------------*/ |
1641 | /* Initialize pool features. */ | ||
1642 | static void pool_features_init(struct pool_features *pf) | ||
1643 | { | ||
1644 | pf->zero_new_blocks = 1; | ||
1645 | pf->discard_enabled = 1; | ||
1646 | pf->discard_passdown = 1; | ||
1647 | } | ||
1648 | |||
1451 | static void __pool_destroy(struct pool *pool) | 1649 | static void __pool_destroy(struct pool *pool) |
1452 | { | 1650 | { |
1453 | __pool_table_remove(pool); | 1651 | __pool_table_remove(pool); |
@@ -1495,7 +1693,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, | |||
1495 | pool->block_shift = ffs(block_size) - 1; | 1693 | pool->block_shift = ffs(block_size) - 1; |
1496 | pool->offset_mask = block_size - 1; | 1694 | pool->offset_mask = block_size - 1; |
1497 | pool->low_water_blocks = 0; | 1695 | pool->low_water_blocks = 0; |
1498 | pool->zero_new_blocks = 1; | 1696 | pool_features_init(&pool->pf); |
1499 | pool->prison = prison_create(PRISON_CELLS); | 1697 | pool->prison = prison_create(PRISON_CELLS); |
1500 | if (!pool->prison) { | 1698 | if (!pool->prison) { |
1501 | *error = "Error creating pool's bio prison"; | 1699 | *error = "Error creating pool's bio prison"; |
@@ -1523,14 +1721,17 @@ static struct pool *pool_create(struct mapped_device *pool_md, | |||
1523 | } | 1721 | } |
1524 | 1722 | ||
1525 | INIT_WORK(&pool->worker, do_worker); | 1723 | INIT_WORK(&pool->worker, do_worker); |
1724 | INIT_DELAYED_WORK(&pool->waker, do_waker); | ||
1526 | spin_lock_init(&pool->lock); | 1725 | spin_lock_init(&pool->lock); |
1527 | bio_list_init(&pool->deferred_bios); | 1726 | bio_list_init(&pool->deferred_bios); |
1528 | bio_list_init(&pool->deferred_flush_bios); | 1727 | bio_list_init(&pool->deferred_flush_bios); |
1529 | INIT_LIST_HEAD(&pool->prepared_mappings); | 1728 | INIT_LIST_HEAD(&pool->prepared_mappings); |
1729 | INIT_LIST_HEAD(&pool->prepared_discards); | ||
1530 | pool->low_water_triggered = 0; | 1730 | pool->low_water_triggered = 0; |
1531 | pool->no_free_space = 0; | 1731 | pool->no_free_space = 0; |
1532 | bio_list_init(&pool->retry_on_resume_list); | 1732 | bio_list_init(&pool->retry_on_resume_list); |
1533 | ds_init(&pool->ds); | 1733 | ds_init(&pool->shared_read_ds); |
1734 | ds_init(&pool->all_io_ds); | ||
1534 | 1735 | ||
1535 | pool->next_mapping = NULL; | 1736 | pool->next_mapping = NULL; |
1536 | pool->mapping_pool = | 1737 | pool->mapping_pool = |
@@ -1549,6 +1750,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, | |||
1549 | goto bad_endio_hook_pool; | 1750 | goto bad_endio_hook_pool; |
1550 | } | 1751 | } |
1551 | pool->ref_count = 1; | 1752 | pool->ref_count = 1; |
1753 | pool->last_commit_jiffies = jiffies; | ||
1552 | pool->pool_md = pool_md; | 1754 | pool->pool_md = pool_md; |
1553 | pool->md_dev = metadata_dev; | 1755 | pool->md_dev = metadata_dev; |
1554 | __pool_table_insert(pool); | 1756 | __pool_table_insert(pool); |
@@ -1588,7 +1790,8 @@ static void __pool_dec(struct pool *pool) | |||
1588 | 1790 | ||
1589 | static struct pool *__pool_find(struct mapped_device *pool_md, | 1791 | static struct pool *__pool_find(struct mapped_device *pool_md, |
1590 | struct block_device *metadata_dev, | 1792 | struct block_device *metadata_dev, |
1591 | unsigned long block_size, char **error) | 1793 | unsigned long block_size, char **error, |
1794 | int *created) | ||
1592 | { | 1795 | { |
1593 | struct pool *pool = __pool_table_lookup_metadata_dev(metadata_dev); | 1796 | struct pool *pool = __pool_table_lookup_metadata_dev(metadata_dev); |
1594 | 1797 | ||
@@ -1604,8 +1807,10 @@ static struct pool *__pool_find(struct mapped_device *pool_md, | |||
1604 | return ERR_PTR(-EINVAL); | 1807 | return ERR_PTR(-EINVAL); |
1605 | __pool_inc(pool); | 1808 | __pool_inc(pool); |
1606 | 1809 | ||
1607 | } else | 1810 | } else { |
1608 | pool = pool_create(pool_md, metadata_dev, block_size, error); | 1811 | pool = pool_create(pool_md, metadata_dev, block_size, error); |
1812 | *created = 1; | ||
1813 | } | ||
1609 | } | 1814 | } |
1610 | 1815 | ||
1611 | return pool; | 1816 | return pool; |
@@ -1629,10 +1834,6 @@ static void pool_dtr(struct dm_target *ti) | |||
1629 | mutex_unlock(&dm_thin_pool_table.mutex); | 1834 | mutex_unlock(&dm_thin_pool_table.mutex); |
1630 | } | 1835 | } |
1631 | 1836 | ||
1632 | struct pool_features { | ||
1633 | unsigned zero_new_blocks:1; | ||
1634 | }; | ||
1635 | |||
1636 | static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf, | 1837 | static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf, |
1637 | struct dm_target *ti) | 1838 | struct dm_target *ti) |
1638 | { | 1839 | { |
@@ -1641,7 +1842,7 @@ static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf, | |||
1641 | const char *arg_name; | 1842 | const char *arg_name; |
1642 | 1843 | ||
1643 | static struct dm_arg _args[] = { | 1844 | static struct dm_arg _args[] = { |
1644 | {0, 1, "Invalid number of pool feature arguments"}, | 1845 | {0, 3, "Invalid number of pool feature arguments"}, |
1645 | }; | 1846 | }; |
1646 | 1847 | ||
1647 | /* | 1848 | /* |
@@ -1661,6 +1862,12 @@ static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf, | |||
1661 | if (!strcasecmp(arg_name, "skip_block_zeroing")) { | 1862 | if (!strcasecmp(arg_name, "skip_block_zeroing")) { |
1662 | pf->zero_new_blocks = 0; | 1863 | pf->zero_new_blocks = 0; |
1663 | continue; | 1864 | continue; |
1865 | } else if (!strcasecmp(arg_name, "ignore_discard")) { | ||
1866 | pf->discard_enabled = 0; | ||
1867 | continue; | ||
1868 | } else if (!strcasecmp(arg_name, "no_discard_passdown")) { | ||
1869 | pf->discard_passdown = 0; | ||
1870 | continue; | ||
1664 | } | 1871 | } |
1665 | 1872 | ||
1666 | ti->error = "Unrecognised pool feature requested"; | 1873 | ti->error = "Unrecognised pool feature requested"; |
@@ -1678,10 +1885,12 @@ static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf, | |||
1678 | * | 1885 | * |
1679 | * Optional feature arguments are: | 1886 | * Optional feature arguments are: |
1680 | * skip_block_zeroing: skips the zeroing of newly-provisioned blocks. | 1887 | * skip_block_zeroing: skips the zeroing of newly-provisioned blocks. |
1888 | * ignore_discard: disable discard | ||
1889 | * no_discard_passdown: don't pass discards down to the data device | ||
1681 | */ | 1890 | */ |
1682 | static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) | 1891 | static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) |
1683 | { | 1892 | { |
1684 | int r; | 1893 | int r, pool_created = 0; |
1685 | struct pool_c *pt; | 1894 | struct pool_c *pt; |
1686 | struct pool *pool; | 1895 | struct pool *pool; |
1687 | struct pool_features pf; | 1896 | struct pool_features pf; |
@@ -1691,6 +1900,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
1691 | dm_block_t low_water_blocks; | 1900 | dm_block_t low_water_blocks; |
1692 | struct dm_dev *metadata_dev; | 1901 | struct dm_dev *metadata_dev; |
1693 | sector_t metadata_dev_size; | 1902 | sector_t metadata_dev_size; |
1903 | char b[BDEVNAME_SIZE]; | ||
1694 | 1904 | ||
1695 | /* | 1905 | /* |
1696 | * FIXME Remove validation from scope of lock. | 1906 | * FIXME Remove validation from scope of lock. |
@@ -1712,11 +1922,9 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
1712 | } | 1922 | } |
1713 | 1923 | ||
1714 | metadata_dev_size = i_size_read(metadata_dev->bdev->bd_inode) >> SECTOR_SHIFT; | 1924 | metadata_dev_size = i_size_read(metadata_dev->bdev->bd_inode) >> SECTOR_SHIFT; |
1715 | if (metadata_dev_size > METADATA_DEV_MAX_SECTORS) { | 1925 | if (metadata_dev_size > THIN_METADATA_MAX_SECTORS_WARNING) |
1716 | ti->error = "Metadata device is too large"; | 1926 | DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.", |
1717 | r = -EINVAL; | 1927 | bdevname(metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS); |
1718 | goto out_metadata; | ||
1719 | } | ||
1720 | 1928 | ||
1721 | r = dm_get_device(ti, argv[1], FMODE_READ | FMODE_WRITE, &data_dev); | 1929 | r = dm_get_device(ti, argv[1], FMODE_READ | FMODE_WRITE, &data_dev); |
1722 | if (r) { | 1930 | if (r) { |
@@ -1742,8 +1950,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
1742 | /* | 1950 | /* |
1743 | * Set default pool features. | 1951 | * Set default pool features. |
1744 | */ | 1952 | */ |
1745 | memset(&pf, 0, sizeof(pf)); | 1953 | pool_features_init(&pf); |
1746 | pf.zero_new_blocks = 1; | ||
1747 | 1954 | ||
1748 | dm_consume_args(&as, 4); | 1955 | dm_consume_args(&as, 4); |
1749 | r = parse_pool_features(&as, &pf, ti); | 1956 | r = parse_pool_features(&as, &pf, ti); |
@@ -1757,20 +1964,58 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
1757 | } | 1964 | } |
1758 | 1965 | ||
1759 | pool = __pool_find(dm_table_get_md(ti->table), metadata_dev->bdev, | 1966 | pool = __pool_find(dm_table_get_md(ti->table), metadata_dev->bdev, |
1760 | block_size, &ti->error); | 1967 | block_size, &ti->error, &pool_created); |
1761 | if (IS_ERR(pool)) { | 1968 | if (IS_ERR(pool)) { |
1762 | r = PTR_ERR(pool); | 1969 | r = PTR_ERR(pool); |
1763 | goto out_free_pt; | 1970 | goto out_free_pt; |
1764 | } | 1971 | } |
1765 | 1972 | ||
1973 | /* | ||
1974 | * 'pool_created' reflects whether this is the first table load. | ||
1975 | * Top level discard support is not allowed to be changed after | ||
1976 | * initial load. This would require a pool reload to trigger thin | ||
1977 | * device changes. | ||
1978 | */ | ||
1979 | if (!pool_created && pf.discard_enabled != pool->pf.discard_enabled) { | ||
1980 | ti->error = "Discard support cannot be disabled once enabled"; | ||
1981 | r = -EINVAL; | ||
1982 | goto out_flags_changed; | ||
1983 | } | ||
1984 | |||
1985 | /* | ||
1986 | * If discard_passdown was enabled verify that the data device | ||
1987 | * supports discards. Disable discard_passdown if not; otherwise | ||
1988 | * -EOPNOTSUPP will be returned. | ||
1989 | */ | ||
1990 | if (pf.discard_passdown) { | ||
1991 | struct request_queue *q = bdev_get_queue(data_dev->bdev); | ||
1992 | if (!q || !blk_queue_discard(q)) { | ||
1993 | DMWARN("Discard unsupported by data device: Disabling discard passdown."); | ||
1994 | pf.discard_passdown = 0; | ||
1995 | } | ||
1996 | } | ||
1997 | |||
1766 | pt->pool = pool; | 1998 | pt->pool = pool; |
1767 | pt->ti = ti; | 1999 | pt->ti = ti; |
1768 | pt->metadata_dev = metadata_dev; | 2000 | pt->metadata_dev = metadata_dev; |
1769 | pt->data_dev = data_dev; | 2001 | pt->data_dev = data_dev; |
1770 | pt->low_water_blocks = low_water_blocks; | 2002 | pt->low_water_blocks = low_water_blocks; |
1771 | pt->zero_new_blocks = pf.zero_new_blocks; | 2003 | pt->pf = pf; |
1772 | ti->num_flush_requests = 1; | 2004 | ti->num_flush_requests = 1; |
1773 | ti->num_discard_requests = 0; | 2005 | /* |
2006 | * Only need to enable discards if the pool should pass | ||
2007 | * them down to the data device. The thin device's discard | ||
2008 | * processing will cause mappings to be removed from the btree. | ||
2009 | */ | ||
2010 | if (pf.discard_enabled && pf.discard_passdown) { | ||
2011 | ti->num_discard_requests = 1; | ||
2012 | /* | ||
2013 | * Setting 'discards_supported' circumvents the normal | ||
2014 | * stacking of discard limits (this keeps the pool and | ||
2015 | * thin devices' discard limits consistent). | ||
2016 | */ | ||
2017 | ti->discards_supported = 1; | ||
2018 | } | ||
1774 | ti->private = pt; | 2019 | ti->private = pt; |
1775 | 2020 | ||
1776 | pt->callbacks.congested_fn = pool_is_congested; | 2021 | pt->callbacks.congested_fn = pool_is_congested; |
@@ -1780,6 +2025,8 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
1780 | 2025 | ||
1781 | return 0; | 2026 | return 0; |
1782 | 2027 | ||
2028 | out_flags_changed: | ||
2029 | __pool_dec(pool); | ||
1783 | out_free_pt: | 2030 | out_free_pt: |
1784 | kfree(pt); | 2031 | kfree(pt); |
1785 | out: | 2032 | out: |
@@ -1878,7 +2125,7 @@ static void pool_resume(struct dm_target *ti) | |||
1878 | __requeue_bios(pool); | 2125 | __requeue_bios(pool); |
1879 | spin_unlock_irqrestore(&pool->lock, flags); | 2126 | spin_unlock_irqrestore(&pool->lock, flags); |
1880 | 2127 | ||
1881 | wake_worker(pool); | 2128 | do_waker(&pool->waker.work); |
1882 | } | 2129 | } |
1883 | 2130 | ||
1884 | static void pool_postsuspend(struct dm_target *ti) | 2131 | static void pool_postsuspend(struct dm_target *ti) |
@@ -1887,6 +2134,7 @@ static void pool_postsuspend(struct dm_target *ti) | |||
1887 | struct pool_c *pt = ti->private; | 2134 | struct pool_c *pt = ti->private; |
1888 | struct pool *pool = pt->pool; | 2135 | struct pool *pool = pt->pool; |
1889 | 2136 | ||
2137 | cancel_delayed_work(&pool->waker); | ||
1890 | flush_workqueue(pool->wq); | 2138 | flush_workqueue(pool->wq); |
1891 | 2139 | ||
1892 | r = dm_pool_commit_metadata(pool->pmd); | 2140 | r = dm_pool_commit_metadata(pool->pmd); |
@@ -2067,7 +2315,7 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv) | |||
2067 | static int pool_status(struct dm_target *ti, status_type_t type, | 2315 | static int pool_status(struct dm_target *ti, status_type_t type, |
2068 | char *result, unsigned maxlen) | 2316 | char *result, unsigned maxlen) |
2069 | { | 2317 | { |
2070 | int r; | 2318 | int r, count; |
2071 | unsigned sz = 0; | 2319 | unsigned sz = 0; |
2072 | uint64_t transaction_id; | 2320 | uint64_t transaction_id; |
2073 | dm_block_t nr_free_blocks_data; | 2321 | dm_block_t nr_free_blocks_data; |
@@ -2130,10 +2378,19 @@ static int pool_status(struct dm_target *ti, status_type_t type, | |||
2130 | (unsigned long)pool->sectors_per_block, | 2378 | (unsigned long)pool->sectors_per_block, |
2131 | (unsigned long long)pt->low_water_blocks); | 2379 | (unsigned long long)pt->low_water_blocks); |
2132 | 2380 | ||
2133 | DMEMIT("%u ", !pool->zero_new_blocks); | 2381 | count = !pool->pf.zero_new_blocks + !pool->pf.discard_enabled + |
2382 | !pool->pf.discard_passdown; | ||
2383 | DMEMIT("%u ", count); | ||
2134 | 2384 | ||
2135 | if (!pool->zero_new_blocks) | 2385 | if (!pool->pf.zero_new_blocks) |
2136 | DMEMIT("skip_block_zeroing "); | 2386 | DMEMIT("skip_block_zeroing "); |
2387 | |||
2388 | if (!pool->pf.discard_enabled) | ||
2389 | DMEMIT("ignore_discard "); | ||
2390 | |||
2391 | if (!pool->pf.discard_passdown) | ||
2392 | DMEMIT("no_discard_passdown "); | ||
2393 | |||
2137 | break; | 2394 | break; |
2138 | } | 2395 | } |
2139 | 2396 | ||
@@ -2162,6 +2419,21 @@ static int pool_merge(struct dm_target *ti, struct bvec_merge_data *bvm, | |||
2162 | return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); | 2419 | return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); |
2163 | } | 2420 | } |
2164 | 2421 | ||
2422 | static void set_discard_limits(struct pool *pool, struct queue_limits *limits) | ||
2423 | { | ||
2424 | /* | ||
2425 | * FIXME: these limits may be incompatible with the pool's data device | ||
2426 | */ | ||
2427 | limits->max_discard_sectors = pool->sectors_per_block; | ||
2428 | |||
2429 | /* | ||
2430 | * This is just a hint, and not enforced. We have to cope with | ||
2431 | * bios that overlap 2 blocks. | ||
2432 | */ | ||
2433 | limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; | ||
2434 | limits->discard_zeroes_data = pool->pf.zero_new_blocks; | ||
2435 | } | ||
2436 | |||
2165 | static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) | 2437 | static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) |
2166 | { | 2438 | { |
2167 | struct pool_c *pt = ti->private; | 2439 | struct pool_c *pt = ti->private; |
@@ -2169,13 +2441,15 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) | |||
2169 | 2441 | ||
2170 | blk_limits_io_min(limits, 0); | 2442 | blk_limits_io_min(limits, 0); |
2171 | blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); | 2443 | blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); |
2444 | if (pool->pf.discard_enabled) | ||
2445 | set_discard_limits(pool, limits); | ||
2172 | } | 2446 | } |
2173 | 2447 | ||
2174 | static struct target_type pool_target = { | 2448 | static struct target_type pool_target = { |
2175 | .name = "thin-pool", | 2449 | .name = "thin-pool", |
2176 | .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | | 2450 | .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | |
2177 | DM_TARGET_IMMUTABLE, | 2451 | DM_TARGET_IMMUTABLE, |
2178 | .version = {1, 0, 0}, | 2452 | .version = {1, 1, 0}, |
2179 | .module = THIS_MODULE, | 2453 | .module = THIS_MODULE, |
2180 | .ctr = pool_ctr, | 2454 | .ctr = pool_ctr, |
2181 | .dtr = pool_dtr, | 2455 | .dtr = pool_dtr, |
@@ -2202,6 +2476,8 @@ static void thin_dtr(struct dm_target *ti) | |||
2202 | __pool_dec(tc->pool); | 2476 | __pool_dec(tc->pool); |
2203 | dm_pool_close_thin_device(tc->td); | 2477 | dm_pool_close_thin_device(tc->td); |
2204 | dm_put_device(ti, tc->pool_dev); | 2478 | dm_put_device(ti, tc->pool_dev); |
2479 | if (tc->origin_dev) | ||
2480 | dm_put_device(ti, tc->origin_dev); | ||
2205 | kfree(tc); | 2481 | kfree(tc); |
2206 | 2482 | ||
2207 | mutex_unlock(&dm_thin_pool_table.mutex); | 2483 | mutex_unlock(&dm_thin_pool_table.mutex); |
@@ -2210,21 +2486,25 @@ static void thin_dtr(struct dm_target *ti) | |||
2210 | /* | 2486 | /* |
2211 | * Thin target parameters: | 2487 | * Thin target parameters: |
2212 | * | 2488 | * |
2213 | * <pool_dev> <dev_id> | 2489 | * <pool_dev> <dev_id> [origin_dev] |
2214 | * | 2490 | * |
2215 | * pool_dev: the path to the pool (eg, /dev/mapper/my_pool) | 2491 | * pool_dev: the path to the pool (eg, /dev/mapper/my_pool) |
2216 | * dev_id: the internal device identifier | 2492 | * dev_id: the internal device identifier |
2493 | * origin_dev: a device external to the pool that should act as the origin | ||
2494 | * | ||
2495 | * If the pool device has discards disabled, they get disabled for the thin | ||
2496 | * device as well. | ||
2217 | */ | 2497 | */ |
2218 | static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) | 2498 | static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) |
2219 | { | 2499 | { |
2220 | int r; | 2500 | int r; |
2221 | struct thin_c *tc; | 2501 | struct thin_c *tc; |
2222 | struct dm_dev *pool_dev; | 2502 | struct dm_dev *pool_dev, *origin_dev; |
2223 | struct mapped_device *pool_md; | 2503 | struct mapped_device *pool_md; |
2224 | 2504 | ||
2225 | mutex_lock(&dm_thin_pool_table.mutex); | 2505 | mutex_lock(&dm_thin_pool_table.mutex); |
2226 | 2506 | ||
2227 | if (argc != 2) { | 2507 | if (argc != 2 && argc != 3) { |
2228 | ti->error = "Invalid argument count"; | 2508 | ti->error = "Invalid argument count"; |
2229 | r = -EINVAL; | 2509 | r = -EINVAL; |
2230 | goto out_unlock; | 2510 | goto out_unlock; |
@@ -2237,6 +2517,15 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
2237 | goto out_unlock; | 2517 | goto out_unlock; |
2238 | } | 2518 | } |
2239 | 2519 | ||
2520 | if (argc == 3) { | ||
2521 | r = dm_get_device(ti, argv[2], FMODE_READ, &origin_dev); | ||
2522 | if (r) { | ||
2523 | ti->error = "Error opening origin device"; | ||
2524 | goto bad_origin_dev; | ||
2525 | } | ||
2526 | tc->origin_dev = origin_dev; | ||
2527 | } | ||
2528 | |||
2240 | r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &pool_dev); | 2529 | r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &pool_dev); |
2241 | if (r) { | 2530 | if (r) { |
2242 | ti->error = "Error opening pool device"; | 2531 | ti->error = "Error opening pool device"; |
@@ -2273,8 +2562,12 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
2273 | 2562 | ||
2274 | ti->split_io = tc->pool->sectors_per_block; | 2563 | ti->split_io = tc->pool->sectors_per_block; |
2275 | ti->num_flush_requests = 1; | 2564 | ti->num_flush_requests = 1; |
2276 | ti->num_discard_requests = 0; | 2565 | |
2277 | ti->discards_supported = 0; | 2566 | /* In case the pool supports discards, pass them on. */ |
2567 | if (tc->pool->pf.discard_enabled) { | ||
2568 | ti->discards_supported = 1; | ||
2569 | ti->num_discard_requests = 1; | ||
2570 | } | ||
2278 | 2571 | ||
2279 | dm_put(pool_md); | 2572 | dm_put(pool_md); |
2280 | 2573 | ||
@@ -2289,6 +2582,9 @@ bad_pool_lookup: | |||
2289 | bad_common: | 2582 | bad_common: |
2290 | dm_put_device(ti, tc->pool_dev); | 2583 | dm_put_device(ti, tc->pool_dev); |
2291 | bad_pool_dev: | 2584 | bad_pool_dev: |
2585 | if (tc->origin_dev) | ||
2586 | dm_put_device(ti, tc->origin_dev); | ||
2587 | bad_origin_dev: | ||
2292 | kfree(tc); | 2588 | kfree(tc); |
2293 | out_unlock: | 2589 | out_unlock: |
2294 | mutex_unlock(&dm_thin_pool_table.mutex); | 2590 | mutex_unlock(&dm_thin_pool_table.mutex); |
@@ -2299,11 +2595,46 @@ out_unlock: | |||
2299 | static int thin_map(struct dm_target *ti, struct bio *bio, | 2595 | static int thin_map(struct dm_target *ti, struct bio *bio, |
2300 | union map_info *map_context) | 2596 | union map_info *map_context) |
2301 | { | 2597 | { |
2302 | bio->bi_sector -= ti->begin; | 2598 | bio->bi_sector = dm_target_offset(ti, bio->bi_sector); |
2303 | 2599 | ||
2304 | return thin_bio_map(ti, bio, map_context); | 2600 | return thin_bio_map(ti, bio, map_context); |
2305 | } | 2601 | } |
2306 | 2602 | ||
2603 | static int thin_endio(struct dm_target *ti, | ||
2604 | struct bio *bio, int err, | ||
2605 | union map_info *map_context) | ||
2606 | { | ||
2607 | unsigned long flags; | ||
2608 | struct endio_hook *h = map_context->ptr; | ||
2609 | struct list_head work; | ||
2610 | struct new_mapping *m, *tmp; | ||
2611 | struct pool *pool = h->tc->pool; | ||
2612 | |||
2613 | if (h->shared_read_entry) { | ||
2614 | INIT_LIST_HEAD(&work); | ||
2615 | ds_dec(h->shared_read_entry, &work); | ||
2616 | |||
2617 | spin_lock_irqsave(&pool->lock, flags); | ||
2618 | list_for_each_entry_safe(m, tmp, &work, list) { | ||
2619 | list_del(&m->list); | ||
2620 | m->quiesced = 1; | ||
2621 | __maybe_add_mapping(m); | ||
2622 | } | ||
2623 | spin_unlock_irqrestore(&pool->lock, flags); | ||
2624 | } | ||
2625 | |||
2626 | if (h->all_io_entry) { | ||
2627 | INIT_LIST_HEAD(&work); | ||
2628 | ds_dec(h->all_io_entry, &work); | ||
2629 | list_for_each_entry_safe(m, tmp, &work, list) | ||
2630 | list_add(&m->list, &pool->prepared_discards); | ||
2631 | } | ||
2632 | |||
2633 | mempool_free(h, pool->endio_hook_pool); | ||
2634 | |||
2635 | return 0; | ||
2636 | } | ||
2637 | |||
2307 | static void thin_postsuspend(struct dm_target *ti) | 2638 | static void thin_postsuspend(struct dm_target *ti) |
2308 | { | 2639 | { |
2309 | if (dm_noflush_suspending(ti)) | 2640 | if (dm_noflush_suspending(ti)) |
@@ -2347,6 +2678,8 @@ static int thin_status(struct dm_target *ti, status_type_t type, | |||
2347 | DMEMIT("%s %lu", | 2678 | DMEMIT("%s %lu", |
2348 | format_dev_t(buf, tc->pool_dev->bdev->bd_dev), | 2679 | format_dev_t(buf, tc->pool_dev->bdev->bd_dev), |
2349 | (unsigned long) tc->dev_id); | 2680 | (unsigned long) tc->dev_id); |
2681 | if (tc->origin_dev) | ||
2682 | DMEMIT(" %s", format_dev_t(buf, tc->origin_dev->bdev->bd_dev)); | ||
2350 | break; | 2683 | break; |
2351 | } | 2684 | } |
2352 | } | 2685 | } |
@@ -2377,18 +2710,21 @@ static int thin_iterate_devices(struct dm_target *ti, | |||
2377 | static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) | 2710 | static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) |
2378 | { | 2711 | { |
2379 | struct thin_c *tc = ti->private; | 2712 | struct thin_c *tc = ti->private; |
2713 | struct pool *pool = tc->pool; | ||
2380 | 2714 | ||
2381 | blk_limits_io_min(limits, 0); | 2715 | blk_limits_io_min(limits, 0); |
2382 | blk_limits_io_opt(limits, tc->pool->sectors_per_block << SECTOR_SHIFT); | 2716 | blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); |
2717 | set_discard_limits(pool, limits); | ||
2383 | } | 2718 | } |
2384 | 2719 | ||
2385 | static struct target_type thin_target = { | 2720 | static struct target_type thin_target = { |
2386 | .name = "thin", | 2721 | .name = "thin", |
2387 | .version = {1, 0, 0}, | 2722 | .version = {1, 1, 0}, |
2388 | .module = THIS_MODULE, | 2723 | .module = THIS_MODULE, |
2389 | .ctr = thin_ctr, | 2724 | .ctr = thin_ctr, |
2390 | .dtr = thin_dtr, | 2725 | .dtr = thin_dtr, |
2391 | .map = thin_map, | 2726 | .map = thin_map, |
2727 | .end_io = thin_endio, | ||
2392 | .postsuspend = thin_postsuspend, | 2728 | .postsuspend = thin_postsuspend, |
2393 | .status = thin_status, | 2729 | .status = thin_status, |
2394 | .iterate_devices = thin_iterate_devices, | 2730 | .iterate_devices = thin_iterate_devices, |