diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-08-14 11:17:56 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-08-14 11:17:56 -0400 |
commit | ba368991f63f020afe4ee9d5b647c5397cf3c7f2 (patch) | |
tree | 8d391c8921acab5bd70cd04edaeb3de4c38ed426 | |
parent | a8e4def604a9affa04fdd4efa0692da1385ffa3f (diff) | |
parent | 200612ec33e555a356eebc717630b866ae2b694f (diff) |
Merge tag 'dm-3.17-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper changes from Mike Snitzer:
- Allow the thin target to paired with any size external origin; also
allow thin snapshots to be larger than the external origin.
- Add support for quickly loading a repetitive pattern into the
dm-switch target.
- Use per-bio data in the dm-crypt target instead of always using a
mempool for each allocation. Required switching to kmalloc alignment
for the bio slab.
- Fix DM core to properly stack the QUEUE_FLAG_NO_SG_MERGE flag
- Fix the dm-cache and dm-thin targets' export of the minimum_io_size
to match the data block size -- this fixes an issue where mkfs.xfs
would improperly infer raid striping was in place on the underlying
storage.
- Small cleanups in dm-io, dm-mpath and dm-cache
* tag 'dm-3.17-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
dm table: propagate QUEUE_FLAG_NO_SG_MERGE
dm switch: efficiently support repetitive patterns
dm switch: factor out switch_region_table_read
dm cache: set minimum_io_size to cache's data block size
dm thin: set minimum_io_size to pool's data block size
dm crypt: use per-bio data
block: use kmalloc alignment for bio slab
dm table: make dm_table_supports_discards static
dm cache metadata: use dm-space-map-metadata.h defined size limits
dm cache: fail migrations in the do_worker error path
dm cache: simplify deferred set reference count increments
dm thin: relax external origin size constraints
dm thin: switch to an atomic_t for tracking pending new block preparations
dm mpath: eliminate pg_ready() wrapper
dm io: simplify dec_count and sync_io
-rw-r--r-- | Documentation/device-mapper/switch.txt | 12 | ||||
-rw-r--r-- | block/bio.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-cache-metadata.c | 4 | ||||
-rw-r--r-- | drivers/md/dm-cache-metadata.h | 8 | ||||
-rw-r--r-- | drivers/md/dm-cache-target.c | 128 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 41 | ||||
-rw-r--r-- | drivers/md/dm-io.c | 77 | ||||
-rw-r--r-- | drivers/md/dm-mpath.c | 6 | ||||
-rw-r--r-- | drivers/md/dm-switch.c | 67 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 86 | ||||
-rw-r--r-- | drivers/md/dm-thin.c | 181 | ||||
-rw-r--r-- | drivers/md/dm.h | 1 |
12 files changed, 408 insertions, 206 deletions
diff --git a/Documentation/device-mapper/switch.txt b/Documentation/device-mapper/switch.txt index 2fa749387be8..8897d0494838 100644 --- a/Documentation/device-mapper/switch.txt +++ b/Documentation/device-mapper/switch.txt | |||
@@ -106,6 +106,11 @@ which paths. | |||
106 | The path number in the range 0 ... (<num_paths> - 1). | 106 | The path number in the range 0 ... (<num_paths> - 1). |
107 | Expressed in hexadecimal (WITHOUT any prefix like 0x). | 107 | Expressed in hexadecimal (WITHOUT any prefix like 0x). |
108 | 108 | ||
109 | R<n>,<m> | ||
110 | This parameter allows repetitive patterns to be loaded quickly. <n> and <m> | ||
111 | are hexadecimal numbers. The last <n> mappings are repeated in the next <m> | ||
112 | slots. | ||
113 | |||
109 | Status | 114 | Status |
110 | ====== | 115 | ====== |
111 | 116 | ||
@@ -124,3 +129,10 @@ Create a switch device with 64kB region size: | |||
124 | Set mappings for the first 7 entries to point to devices switch0, switch1, | 129 | Set mappings for the first 7 entries to point to devices switch0, switch1, |
125 | switch2, switch0, switch1, switch2, switch1: | 130 | switch2, switch0, switch1, switch2, switch1: |
126 | dmsetup message switch 0 set_region_mappings 0:0 :1 :2 :0 :1 :2 :1 | 131 | dmsetup message switch 0 set_region_mappings 0:0 :1 :2 :0 :1 :2 :1 |
132 | |||
133 | Set repetitive mapping. This command: | ||
134 | dmsetup message switch 0 set_region_mappings 1000:1 :2 R2,10 | ||
135 | is equivalent to: | ||
136 | dmsetup message switch 0 set_region_mappings 1000:1 :2 :1 :2 :1 :2 :1 :2 \ | ||
137 | :1 :2 :1 :2 :1 :2 :1 :2 :1 :2 | ||
138 | |||
diff --git a/block/bio.c b/block/bio.c index 0ec61c9e536c..3e6331d25d90 100644 --- a/block/bio.c +++ b/block/bio.c | |||
@@ -112,7 +112,8 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size) | |||
112 | bslab = &bio_slabs[entry]; | 112 | bslab = &bio_slabs[entry]; |
113 | 113 | ||
114 | snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry); | 114 | snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry); |
115 | slab = kmem_cache_create(bslab->name, sz, 0, SLAB_HWCACHE_ALIGN, NULL); | 115 | slab = kmem_cache_create(bslab->name, sz, ARCH_KMALLOC_MINALIGN, |
116 | SLAB_HWCACHE_ALIGN, NULL); | ||
116 | if (!slab) | 117 | if (!slab) |
117 | goto out_unlock; | 118 | goto out_unlock; |
118 | 119 | ||
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index d2899e7eb3aa..06709257adde 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c | |||
@@ -330,7 +330,7 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd) | |||
330 | disk_super->discard_root = cpu_to_le64(cmd->discard_root); | 330 | disk_super->discard_root = cpu_to_le64(cmd->discard_root); |
331 | disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size); | 331 | disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size); |
332 | disk_super->discard_nr_blocks = cpu_to_le64(from_oblock(cmd->discard_nr_blocks)); | 332 | disk_super->discard_nr_blocks = cpu_to_le64(from_oblock(cmd->discard_nr_blocks)); |
333 | disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); | 333 | disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE); |
334 | disk_super->data_block_size = cpu_to_le32(cmd->data_block_size); | 334 | disk_super->data_block_size = cpu_to_le32(cmd->data_block_size); |
335 | disk_super->cache_blocks = cpu_to_le32(0); | 335 | disk_super->cache_blocks = cpu_to_le32(0); |
336 | 336 | ||
@@ -478,7 +478,7 @@ static int __create_persistent_data_objects(struct dm_cache_metadata *cmd, | |||
478 | bool may_format_device) | 478 | bool may_format_device) |
479 | { | 479 | { |
480 | int r; | 480 | int r; |
481 | cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE, | 481 | cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT, |
482 | CACHE_METADATA_CACHE_SIZE, | 482 | CACHE_METADATA_CACHE_SIZE, |
483 | CACHE_MAX_CONCURRENT_LOCKS); | 483 | CACHE_MAX_CONCURRENT_LOCKS); |
484 | if (IS_ERR(cmd->bm)) { | 484 | if (IS_ERR(cmd->bm)) { |
diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h index cd70a78623a3..7383c90ccdb8 100644 --- a/drivers/md/dm-cache-metadata.h +++ b/drivers/md/dm-cache-metadata.h | |||
@@ -9,19 +9,17 @@ | |||
9 | 9 | ||
10 | #include "dm-cache-block-types.h" | 10 | #include "dm-cache-block-types.h" |
11 | #include "dm-cache-policy-internal.h" | 11 | #include "dm-cache-policy-internal.h" |
12 | #include "persistent-data/dm-space-map-metadata.h" | ||
12 | 13 | ||
13 | /*----------------------------------------------------------------*/ | 14 | /*----------------------------------------------------------------*/ |
14 | 15 | ||
15 | #define DM_CACHE_METADATA_BLOCK_SIZE 4096 | 16 | #define DM_CACHE_METADATA_BLOCK_SIZE DM_SM_METADATA_BLOCK_SIZE |
16 | 17 | ||
17 | /* FIXME: remove this restriction */ | 18 | /* FIXME: remove this restriction */ |
18 | /* | 19 | /* |
19 | * The metadata device is currently limited in size. | 20 | * The metadata device is currently limited in size. |
20 | * | ||
21 | * We have one block of index, which can hold 255 index entries. Each | ||
22 | * index entry contains allocation info about 16k metadata blocks. | ||
23 | */ | 21 | */ |
24 | #define DM_CACHE_METADATA_MAX_SECTORS (255 * (1 << 14) * (DM_CACHE_METADATA_BLOCK_SIZE / (1 << SECTOR_SHIFT))) | 22 | #define DM_CACHE_METADATA_MAX_SECTORS DM_SM_METADATA_MAX_SECTORS |
25 | 23 | ||
26 | /* | 24 | /* |
27 | * A metadata device larger than 16GB triggers a warning. | 25 | * A metadata device larger than 16GB triggers a warning. |
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 2c63326638b6..1af40ee209e2 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c | |||
@@ -718,6 +718,22 @@ static int bio_triggers_commit(struct cache *cache, struct bio *bio) | |||
718 | return bio->bi_rw & (REQ_FLUSH | REQ_FUA); | 718 | return bio->bi_rw & (REQ_FLUSH | REQ_FUA); |
719 | } | 719 | } |
720 | 720 | ||
721 | /* | ||
722 | * You must increment the deferred set whilst the prison cell is held. To | ||
723 | * encourage this, we ask for 'cell' to be passed in. | ||
724 | */ | ||
725 | static void inc_ds(struct cache *cache, struct bio *bio, | ||
726 | struct dm_bio_prison_cell *cell) | ||
727 | { | ||
728 | size_t pb_data_size = get_per_bio_data_size(cache); | ||
729 | struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); | ||
730 | |||
731 | BUG_ON(!cell); | ||
732 | BUG_ON(pb->all_io_entry); | ||
733 | |||
734 | pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); | ||
735 | } | ||
736 | |||
721 | static void issue(struct cache *cache, struct bio *bio) | 737 | static void issue(struct cache *cache, struct bio *bio) |
722 | { | 738 | { |
723 | unsigned long flags; | 739 | unsigned long flags; |
@@ -737,6 +753,12 @@ static void issue(struct cache *cache, struct bio *bio) | |||
737 | spin_unlock_irqrestore(&cache->lock, flags); | 753 | spin_unlock_irqrestore(&cache->lock, flags); |
738 | } | 754 | } |
739 | 755 | ||
756 | static void inc_and_issue(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell *cell) | ||
757 | { | ||
758 | inc_ds(cache, bio, cell); | ||
759 | issue(cache, bio); | ||
760 | } | ||
761 | |||
740 | static void defer_writethrough_bio(struct cache *cache, struct bio *bio) | 762 | static void defer_writethrough_bio(struct cache *cache, struct bio *bio) |
741 | { | 763 | { |
742 | unsigned long flags; | 764 | unsigned long flags; |
@@ -1015,6 +1037,11 @@ static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio) | |||
1015 | 1037 | ||
1016 | dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg); | 1038 | dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg); |
1017 | remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock); | 1039 | remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock); |
1040 | |||
1041 | /* | ||
1042 | * No need to inc_ds() here, since the cell will be held for the | ||
1043 | * duration of the io. | ||
1044 | */ | ||
1018 | generic_make_request(bio); | 1045 | generic_make_request(bio); |
1019 | } | 1046 | } |
1020 | 1047 | ||
@@ -1115,8 +1142,7 @@ static void check_for_quiesced_migrations(struct cache *cache, | |||
1115 | return; | 1142 | return; |
1116 | 1143 | ||
1117 | INIT_LIST_HEAD(&work); | 1144 | INIT_LIST_HEAD(&work); |
1118 | if (pb->all_io_entry) | 1145 | dm_deferred_entry_dec(pb->all_io_entry, &work); |
1119 | dm_deferred_entry_dec(pb->all_io_entry, &work); | ||
1120 | 1146 | ||
1121 | if (!list_empty(&work)) | 1147 | if (!list_empty(&work)) |
1122 | queue_quiesced_migrations(cache, &work); | 1148 | queue_quiesced_migrations(cache, &work); |
@@ -1252,6 +1278,11 @@ static void process_flush_bio(struct cache *cache, struct bio *bio) | |||
1252 | else | 1278 | else |
1253 | remap_to_cache(cache, bio, 0); | 1279 | remap_to_cache(cache, bio, 0); |
1254 | 1280 | ||
1281 | /* | ||
1282 | * REQ_FLUSH is not directed at any particular block so we don't | ||
1283 | * need to inc_ds(). REQ_FUA's are split into a write + REQ_FLUSH | ||
1284 | * by dm-core. | ||
1285 | */ | ||
1255 | issue(cache, bio); | 1286 | issue(cache, bio); |
1256 | } | 1287 | } |
1257 | 1288 | ||
@@ -1301,15 +1332,6 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio) | |||
1301 | &cache->stats.read_miss : &cache->stats.write_miss); | 1332 | &cache->stats.read_miss : &cache->stats.write_miss); |
1302 | } | 1333 | } |
1303 | 1334 | ||
1304 | static void issue_cache_bio(struct cache *cache, struct bio *bio, | ||
1305 | struct per_bio_data *pb, | ||
1306 | dm_oblock_t oblock, dm_cblock_t cblock) | ||
1307 | { | ||
1308 | pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); | ||
1309 | remap_to_cache_dirty(cache, bio, oblock, cblock); | ||
1310 | issue(cache, bio); | ||
1311 | } | ||
1312 | |||
1313 | static void process_bio(struct cache *cache, struct prealloc *structs, | 1335 | static void process_bio(struct cache *cache, struct prealloc *structs, |
1314 | struct bio *bio) | 1336 | struct bio *bio) |
1315 | { | 1337 | { |
@@ -1318,8 +1340,6 @@ static void process_bio(struct cache *cache, struct prealloc *structs, | |||
1318 | dm_oblock_t block = get_bio_block(cache, bio); | 1340 | dm_oblock_t block = get_bio_block(cache, bio); |
1319 | struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell; | 1341 | struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell; |
1320 | struct policy_result lookup_result; | 1342 | struct policy_result lookup_result; |
1321 | size_t pb_data_size = get_per_bio_data_size(cache); | ||
1322 | struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); | ||
1323 | bool discarded_block = is_discarded_oblock(cache, block); | 1343 | bool discarded_block = is_discarded_oblock(cache, block); |
1324 | bool passthrough = passthrough_mode(&cache->features); | 1344 | bool passthrough = passthrough_mode(&cache->features); |
1325 | bool can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache)); | 1345 | bool can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache)); |
@@ -1359,9 +1379,8 @@ static void process_bio(struct cache *cache, struct prealloc *structs, | |||
1359 | 1379 | ||
1360 | } else { | 1380 | } else { |
1361 | /* FIXME: factor out issue_origin() */ | 1381 | /* FIXME: factor out issue_origin() */ |
1362 | pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); | ||
1363 | remap_to_origin_clear_discard(cache, bio, block); | 1382 | remap_to_origin_clear_discard(cache, bio, block); |
1364 | issue(cache, bio); | 1383 | inc_and_issue(cache, bio, new_ocell); |
1365 | } | 1384 | } |
1366 | } else { | 1385 | } else { |
1367 | inc_hit_counter(cache, bio); | 1386 | inc_hit_counter(cache, bio); |
@@ -1369,20 +1388,21 @@ static void process_bio(struct cache *cache, struct prealloc *structs, | |||
1369 | if (bio_data_dir(bio) == WRITE && | 1388 | if (bio_data_dir(bio) == WRITE && |
1370 | writethrough_mode(&cache->features) && | 1389 | writethrough_mode(&cache->features) && |
1371 | !is_dirty(cache, lookup_result.cblock)) { | 1390 | !is_dirty(cache, lookup_result.cblock)) { |
1372 | pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); | ||
1373 | remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); | 1391 | remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); |
1374 | issue(cache, bio); | 1392 | inc_and_issue(cache, bio, new_ocell); |
1375 | } else | 1393 | |
1376 | issue_cache_bio(cache, bio, pb, block, lookup_result.cblock); | 1394 | } else { |
1395 | remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); | ||
1396 | inc_and_issue(cache, bio, new_ocell); | ||
1397 | } | ||
1377 | } | 1398 | } |
1378 | 1399 | ||
1379 | break; | 1400 | break; |
1380 | 1401 | ||
1381 | case POLICY_MISS: | 1402 | case POLICY_MISS: |
1382 | inc_miss_counter(cache, bio); | 1403 | inc_miss_counter(cache, bio); |
1383 | pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); | ||
1384 | remap_to_origin_clear_discard(cache, bio, block); | 1404 | remap_to_origin_clear_discard(cache, bio, block); |
1385 | issue(cache, bio); | 1405 | inc_and_issue(cache, bio, new_ocell); |
1386 | break; | 1406 | break; |
1387 | 1407 | ||
1388 | case POLICY_NEW: | 1408 | case POLICY_NEW: |
@@ -1501,6 +1521,9 @@ static void process_deferred_flush_bios(struct cache *cache, bool submit_bios) | |||
1501 | bio_list_init(&cache->deferred_flush_bios); | 1521 | bio_list_init(&cache->deferred_flush_bios); |
1502 | spin_unlock_irqrestore(&cache->lock, flags); | 1522 | spin_unlock_irqrestore(&cache->lock, flags); |
1503 | 1523 | ||
1524 | /* | ||
1525 | * These bios have already been through inc_ds() | ||
1526 | */ | ||
1504 | while ((bio = bio_list_pop(&bios))) | 1527 | while ((bio = bio_list_pop(&bios))) |
1505 | submit_bios ? generic_make_request(bio) : bio_io_error(bio); | 1528 | submit_bios ? generic_make_request(bio) : bio_io_error(bio); |
1506 | } | 1529 | } |
@@ -1518,6 +1541,9 @@ static void process_deferred_writethrough_bios(struct cache *cache) | |||
1518 | bio_list_init(&cache->deferred_writethrough_bios); | 1541 | bio_list_init(&cache->deferred_writethrough_bios); |
1519 | spin_unlock_irqrestore(&cache->lock, flags); | 1542 | spin_unlock_irqrestore(&cache->lock, flags); |
1520 | 1543 | ||
1544 | /* | ||
1545 | * These bios have already been through inc_ds() | ||
1546 | */ | ||
1521 | while ((bio = bio_list_pop(&bios))) | 1547 | while ((bio = bio_list_pop(&bios))) |
1522 | generic_make_request(bio); | 1548 | generic_make_request(bio); |
1523 | } | 1549 | } |
@@ -1694,6 +1720,7 @@ static void do_worker(struct work_struct *ws) | |||
1694 | 1720 | ||
1695 | if (commit_if_needed(cache)) { | 1721 | if (commit_if_needed(cache)) { |
1696 | process_deferred_flush_bios(cache, false); | 1722 | process_deferred_flush_bios(cache, false); |
1723 | process_migrations(cache, &cache->need_commit_migrations, migration_failure); | ||
1697 | 1724 | ||
1698 | /* | 1725 | /* |
1699 | * FIXME: rollback metadata or just go into a | 1726 | * FIXME: rollback metadata or just go into a |
@@ -2406,16 +2433,13 @@ out: | |||
2406 | return r; | 2433 | return r; |
2407 | } | 2434 | } |
2408 | 2435 | ||
2409 | static int cache_map(struct dm_target *ti, struct bio *bio) | 2436 | static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell **cell) |
2410 | { | 2437 | { |
2411 | struct cache *cache = ti->private; | ||
2412 | |||
2413 | int r; | 2438 | int r; |
2414 | dm_oblock_t block = get_bio_block(cache, bio); | 2439 | dm_oblock_t block = get_bio_block(cache, bio); |
2415 | size_t pb_data_size = get_per_bio_data_size(cache); | 2440 | size_t pb_data_size = get_per_bio_data_size(cache); |
2416 | bool can_migrate = false; | 2441 | bool can_migrate = false; |
2417 | bool discarded_block; | 2442 | bool discarded_block; |
2418 | struct dm_bio_prison_cell *cell; | ||
2419 | struct policy_result lookup_result; | 2443 | struct policy_result lookup_result; |
2420 | struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size); | 2444 | struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size); |
2421 | 2445 | ||
@@ -2437,15 +2461,15 @@ static int cache_map(struct dm_target *ti, struct bio *bio) | |||
2437 | /* | 2461 | /* |
2438 | * Check to see if that block is currently migrating. | 2462 | * Check to see if that block is currently migrating. |
2439 | */ | 2463 | */ |
2440 | cell = alloc_prison_cell(cache); | 2464 | *cell = alloc_prison_cell(cache); |
2441 | if (!cell) { | 2465 | if (!*cell) { |
2442 | defer_bio(cache, bio); | 2466 | defer_bio(cache, bio); |
2443 | return DM_MAPIO_SUBMITTED; | 2467 | return DM_MAPIO_SUBMITTED; |
2444 | } | 2468 | } |
2445 | 2469 | ||
2446 | r = bio_detain(cache, block, bio, cell, | 2470 | r = bio_detain(cache, block, bio, *cell, |
2447 | (cell_free_fn) free_prison_cell, | 2471 | (cell_free_fn) free_prison_cell, |
2448 | cache, &cell); | 2472 | cache, cell); |
2449 | if (r) { | 2473 | if (r) { |
2450 | if (r < 0) | 2474 | if (r < 0) |
2451 | defer_bio(cache, bio); | 2475 | defer_bio(cache, bio); |
@@ -2458,11 +2482,12 @@ static int cache_map(struct dm_target *ti, struct bio *bio) | |||
2458 | r = policy_map(cache->policy, block, false, can_migrate, discarded_block, | 2482 | r = policy_map(cache->policy, block, false, can_migrate, discarded_block, |
2459 | bio, &lookup_result); | 2483 | bio, &lookup_result); |
2460 | if (r == -EWOULDBLOCK) { | 2484 | if (r == -EWOULDBLOCK) { |
2461 | cell_defer(cache, cell, true); | 2485 | cell_defer(cache, *cell, true); |
2462 | return DM_MAPIO_SUBMITTED; | 2486 | return DM_MAPIO_SUBMITTED; |
2463 | 2487 | ||
2464 | } else if (r) { | 2488 | } else if (r) { |
2465 | DMERR_LIMIT("Unexpected return from cache replacement policy: %d", r); | 2489 | DMERR_LIMIT("Unexpected return from cache replacement policy: %d", r); |
2490 | cell_defer(cache, *cell, false); | ||
2466 | bio_io_error(bio); | 2491 | bio_io_error(bio); |
2467 | return DM_MAPIO_SUBMITTED; | 2492 | return DM_MAPIO_SUBMITTED; |
2468 | } | 2493 | } |
@@ -2476,52 +2501,44 @@ static int cache_map(struct dm_target *ti, struct bio *bio) | |||
2476 | * We need to invalidate this block, so | 2501 | * We need to invalidate this block, so |
2477 | * defer for the worker thread. | 2502 | * defer for the worker thread. |
2478 | */ | 2503 | */ |
2479 | cell_defer(cache, cell, true); | 2504 | cell_defer(cache, *cell, true); |
2480 | r = DM_MAPIO_SUBMITTED; | 2505 | r = DM_MAPIO_SUBMITTED; |
2481 | 2506 | ||
2482 | } else { | 2507 | } else { |
2483 | pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); | ||
2484 | inc_miss_counter(cache, bio); | 2508 | inc_miss_counter(cache, bio); |
2485 | remap_to_origin_clear_discard(cache, bio, block); | 2509 | remap_to_origin_clear_discard(cache, bio, block); |
2486 | |||
2487 | cell_defer(cache, cell, false); | ||
2488 | } | 2510 | } |
2489 | 2511 | ||
2490 | } else { | 2512 | } else { |
2491 | inc_hit_counter(cache, bio); | 2513 | inc_hit_counter(cache, bio); |
2492 | pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); | ||
2493 | |||
2494 | if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) && | 2514 | if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) && |
2495 | !is_dirty(cache, lookup_result.cblock)) | 2515 | !is_dirty(cache, lookup_result.cblock)) |
2496 | remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); | 2516 | remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); |
2497 | else | 2517 | else |
2498 | remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); | 2518 | remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); |
2499 | |||
2500 | cell_defer(cache, cell, false); | ||
2501 | } | 2519 | } |
2502 | break; | 2520 | break; |
2503 | 2521 | ||
2504 | case POLICY_MISS: | 2522 | case POLICY_MISS: |
2505 | inc_miss_counter(cache, bio); | 2523 | inc_miss_counter(cache, bio); |
2506 | pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); | ||
2507 | |||
2508 | if (pb->req_nr != 0) { | 2524 | if (pb->req_nr != 0) { |
2509 | /* | 2525 | /* |
2510 | * This is a duplicate writethrough io that is no | 2526 | * This is a duplicate writethrough io that is no |
2511 | * longer needed because the block has been demoted. | 2527 | * longer needed because the block has been demoted. |
2512 | */ | 2528 | */ |
2513 | bio_endio(bio, 0); | 2529 | bio_endio(bio, 0); |
2514 | cell_defer(cache, cell, false); | 2530 | cell_defer(cache, *cell, false); |
2515 | return DM_MAPIO_SUBMITTED; | 2531 | r = DM_MAPIO_SUBMITTED; |
2516 | } else { | 2532 | |
2533 | } else | ||
2517 | remap_to_origin_clear_discard(cache, bio, block); | 2534 | remap_to_origin_clear_discard(cache, bio, block); |
2518 | cell_defer(cache, cell, false); | 2535 | |
2519 | } | ||
2520 | break; | 2536 | break; |
2521 | 2537 | ||
2522 | default: | 2538 | default: |
2523 | DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__, | 2539 | DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__, |
2524 | (unsigned) lookup_result.op); | 2540 | (unsigned) lookup_result.op); |
2541 | cell_defer(cache, *cell, false); | ||
2525 | bio_io_error(bio); | 2542 | bio_io_error(bio); |
2526 | r = DM_MAPIO_SUBMITTED; | 2543 | r = DM_MAPIO_SUBMITTED; |
2527 | } | 2544 | } |
@@ -2529,6 +2546,21 @@ static int cache_map(struct dm_target *ti, struct bio *bio) | |||
2529 | return r; | 2546 | return r; |
2530 | } | 2547 | } |
2531 | 2548 | ||
2549 | static int cache_map(struct dm_target *ti, struct bio *bio) | ||
2550 | { | ||
2551 | int r; | ||
2552 | struct dm_bio_prison_cell *cell; | ||
2553 | struct cache *cache = ti->private; | ||
2554 | |||
2555 | r = __cache_map(cache, bio, &cell); | ||
2556 | if (r == DM_MAPIO_REMAPPED) { | ||
2557 | inc_ds(cache, bio, cell); | ||
2558 | cell_defer(cache, cell, false); | ||
2559 | } | ||
2560 | |||
2561 | return r; | ||
2562 | } | ||
2563 | |||
2532 | static int cache_end_io(struct dm_target *ti, struct bio *bio, int error) | 2564 | static int cache_end_io(struct dm_target *ti, struct bio *bio, int error) |
2533 | { | 2565 | { |
2534 | struct cache *cache = ti->private; | 2566 | struct cache *cache = ti->private; |
@@ -2808,7 +2840,7 @@ static void cache_status(struct dm_target *ti, status_type_t type, | |||
2808 | residency = policy_residency(cache->policy); | 2840 | residency = policy_residency(cache->policy); |
2809 | 2841 | ||
2810 | DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %lu ", | 2842 | DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %lu ", |
2811 | (unsigned)(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT), | 2843 | (unsigned)DM_CACHE_METADATA_BLOCK_SIZE, |
2812 | (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata), | 2844 | (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata), |
2813 | (unsigned long long)nr_blocks_metadata, | 2845 | (unsigned long long)nr_blocks_metadata, |
2814 | cache->sectors_per_block, | 2846 | cache->sectors_per_block, |
@@ -3062,7 +3094,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) | |||
3062 | */ | 3094 | */ |
3063 | if (io_opt_sectors < cache->sectors_per_block || | 3095 | if (io_opt_sectors < cache->sectors_per_block || |
3064 | do_div(io_opt_sectors, cache->sectors_per_block)) { | 3096 | do_div(io_opt_sectors, cache->sectors_per_block)) { |
3065 | blk_limits_io_min(limits, 0); | 3097 | blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT); |
3066 | blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT); | 3098 | blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT); |
3067 | } | 3099 | } |
3068 | set_discard_limits(cache, limits); | 3100 | set_discard_limits(cache, limits); |
@@ -3072,7 +3104,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) | |||
3072 | 3104 | ||
3073 | static struct target_type cache_target = { | 3105 | static struct target_type cache_target = { |
3074 | .name = "cache", | 3106 | .name = "cache", |
3075 | .version = {1, 4, 0}, | 3107 | .version = {1, 5, 0}, |
3076 | .module = THIS_MODULE, | 3108 | .module = THIS_MODULE, |
3077 | .ctr = cache_ctr, | 3109 | .ctr = cache_ctr, |
3078 | .dtr = cache_dtr, | 3110 | .dtr = cache_dtr, |
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 4cba2d808afb..2785007e0e46 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c | |||
@@ -59,7 +59,7 @@ struct dm_crypt_io { | |||
59 | int error; | 59 | int error; |
60 | sector_t sector; | 60 | sector_t sector; |
61 | struct dm_crypt_io *base_io; | 61 | struct dm_crypt_io *base_io; |
62 | }; | 62 | } CRYPTO_MINALIGN_ATTR; |
63 | 63 | ||
64 | struct dm_crypt_request { | 64 | struct dm_crypt_request { |
65 | struct convert_context *ctx; | 65 | struct convert_context *ctx; |
@@ -162,6 +162,8 @@ struct crypt_config { | |||
162 | */ | 162 | */ |
163 | unsigned int dmreq_start; | 163 | unsigned int dmreq_start; |
164 | 164 | ||
165 | unsigned int per_bio_data_size; | ||
166 | |||
165 | unsigned long flags; | 167 | unsigned long flags; |
166 | unsigned int key_size; | 168 | unsigned int key_size; |
167 | unsigned int key_parts; /* independent parts in key buffer */ | 169 | unsigned int key_parts; /* independent parts in key buffer */ |
@@ -895,6 +897,15 @@ static void crypt_alloc_req(struct crypt_config *cc, | |||
895 | kcryptd_async_done, dmreq_of_req(cc, ctx->req)); | 897 | kcryptd_async_done, dmreq_of_req(cc, ctx->req)); |
896 | } | 898 | } |
897 | 899 | ||
900 | static void crypt_free_req(struct crypt_config *cc, | ||
901 | struct ablkcipher_request *req, struct bio *base_bio) | ||
902 | { | ||
903 | struct dm_crypt_io *io = dm_per_bio_data(base_bio, cc->per_bio_data_size); | ||
904 | |||
905 | if ((struct ablkcipher_request *)(io + 1) != req) | ||
906 | mempool_free(req, cc->req_pool); | ||
907 | } | ||
908 | |||
898 | /* | 909 | /* |
899 | * Encrypt / decrypt data from one bio to another one (can be the same one) | 910 | * Encrypt / decrypt data from one bio to another one (can be the same one) |
900 | */ | 911 | */ |
@@ -1008,12 +1019,9 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone) | |||
1008 | } | 1019 | } |
1009 | } | 1020 | } |
1010 | 1021 | ||
1011 | static struct dm_crypt_io *crypt_io_alloc(struct crypt_config *cc, | 1022 | static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc, |
1012 | struct bio *bio, sector_t sector) | 1023 | struct bio *bio, sector_t sector) |
1013 | { | 1024 | { |
1014 | struct dm_crypt_io *io; | ||
1015 | |||
1016 | io = mempool_alloc(cc->io_pool, GFP_NOIO); | ||
1017 | io->cc = cc; | 1025 | io->cc = cc; |
1018 | io->base_bio = bio; | 1026 | io->base_bio = bio; |
1019 | io->sector = sector; | 1027 | io->sector = sector; |
@@ -1021,8 +1029,6 @@ static struct dm_crypt_io *crypt_io_alloc(struct crypt_config *cc, | |||
1021 | io->base_io = NULL; | 1029 | io->base_io = NULL; |
1022 | io->ctx.req = NULL; | 1030 | io->ctx.req = NULL; |
1023 | atomic_set(&io->io_pending, 0); | 1031 | atomic_set(&io->io_pending, 0); |
1024 | |||
1025 | return io; | ||
1026 | } | 1032 | } |
1027 | 1033 | ||
1028 | static void crypt_inc_pending(struct dm_crypt_io *io) | 1034 | static void crypt_inc_pending(struct dm_crypt_io *io) |
@@ -1046,8 +1052,9 @@ static void crypt_dec_pending(struct dm_crypt_io *io) | |||
1046 | return; | 1052 | return; |
1047 | 1053 | ||
1048 | if (io->ctx.req) | 1054 | if (io->ctx.req) |
1049 | mempool_free(io->ctx.req, cc->req_pool); | 1055 | crypt_free_req(cc, io->ctx.req, base_bio); |
1050 | mempool_free(io, cc->io_pool); | 1056 | if (io != dm_per_bio_data(base_bio, cc->per_bio_data_size)) |
1057 | mempool_free(io, cc->io_pool); | ||
1051 | 1058 | ||
1052 | if (likely(!base_io)) | 1059 | if (likely(!base_io)) |
1053 | bio_endio(base_bio, error); | 1060 | bio_endio(base_bio, error); |
@@ -1255,8 +1262,8 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) | |||
1255 | * between fragments, so switch to a new dm_crypt_io structure. | 1262 | * between fragments, so switch to a new dm_crypt_io structure. |
1256 | */ | 1263 | */ |
1257 | if (unlikely(!crypt_finished && remaining)) { | 1264 | if (unlikely(!crypt_finished && remaining)) { |
1258 | new_io = crypt_io_alloc(io->cc, io->base_bio, | 1265 | new_io = mempool_alloc(cc->io_pool, GFP_NOIO); |
1259 | sector); | 1266 | crypt_io_init(new_io, io->cc, io->base_bio, sector); |
1260 | crypt_inc_pending(new_io); | 1267 | crypt_inc_pending(new_io); |
1261 | crypt_convert_init(cc, &new_io->ctx, NULL, | 1268 | crypt_convert_init(cc, &new_io->ctx, NULL, |
1262 | io->base_bio, sector); | 1269 | io->base_bio, sector); |
@@ -1325,7 +1332,7 @@ static void kcryptd_async_done(struct crypto_async_request *async_req, | |||
1325 | if (error < 0) | 1332 | if (error < 0) |
1326 | io->error = -EIO; | 1333 | io->error = -EIO; |
1327 | 1334 | ||
1328 | mempool_free(req_of_dmreq(cc, dmreq), cc->req_pool); | 1335 | crypt_free_req(cc, req_of_dmreq(cc, dmreq), io->base_bio); |
1329 | 1336 | ||
1330 | if (!atomic_dec_and_test(&ctx->cc_pending)) | 1337 | if (!atomic_dec_and_test(&ctx->cc_pending)) |
1331 | return; | 1338 | return; |
@@ -1728,6 +1735,10 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1728 | goto bad; | 1735 | goto bad; |
1729 | } | 1736 | } |
1730 | 1737 | ||
1738 | cc->per_bio_data_size = ti->per_bio_data_size = | ||
1739 | sizeof(struct dm_crypt_io) + cc->dmreq_start + | ||
1740 | sizeof(struct dm_crypt_request) + cc->iv_size; | ||
1741 | |||
1731 | cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0); | 1742 | cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0); |
1732 | if (!cc->page_pool) { | 1743 | if (!cc->page_pool) { |
1733 | ti->error = "Cannot allocate page mempool"; | 1744 | ti->error = "Cannot allocate page mempool"; |
@@ -1824,7 +1835,9 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) | |||
1824 | return DM_MAPIO_REMAPPED; | 1835 | return DM_MAPIO_REMAPPED; |
1825 | } | 1836 | } |
1826 | 1837 | ||
1827 | io = crypt_io_alloc(cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector)); | 1838 | io = dm_per_bio_data(bio, cc->per_bio_data_size); |
1839 | crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector)); | ||
1840 | io->ctx.req = (struct ablkcipher_request *)(io + 1); | ||
1828 | 1841 | ||
1829 | if (bio_data_dir(io->base_bio) == READ) { | 1842 | if (bio_data_dir(io->base_bio) == READ) { |
1830 | if (kcryptd_io_read(io, GFP_NOWAIT)) | 1843 | if (kcryptd_io_read(io, GFP_NOWAIT)) |
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index db404a0f7e2c..c09359db3a90 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c | |||
@@ -33,7 +33,6 @@ struct dm_io_client { | |||
33 | struct io { | 33 | struct io { |
34 | unsigned long error_bits; | 34 | unsigned long error_bits; |
35 | atomic_t count; | 35 | atomic_t count; |
36 | struct completion *wait; | ||
37 | struct dm_io_client *client; | 36 | struct dm_io_client *client; |
38 | io_notify_fn callback; | 37 | io_notify_fn callback; |
39 | void *context; | 38 | void *context; |
@@ -112,28 +111,27 @@ static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io, | |||
112 | * We need an io object to keep track of the number of bios that | 111 | * We need an io object to keep track of the number of bios that |
113 | * have been dispatched for a particular io. | 112 | * have been dispatched for a particular io. |
114 | *---------------------------------------------------------------*/ | 113 | *---------------------------------------------------------------*/ |
115 | static void dec_count(struct io *io, unsigned int region, int error) | 114 | static void complete_io(struct io *io) |
116 | { | 115 | { |
117 | if (error) | 116 | unsigned long error_bits = io->error_bits; |
118 | set_bit(region, &io->error_bits); | 117 | io_notify_fn fn = io->callback; |
118 | void *context = io->context; | ||
119 | 119 | ||
120 | if (atomic_dec_and_test(&io->count)) { | 120 | if (io->vma_invalidate_size) |
121 | if (io->vma_invalidate_size) | 121 | invalidate_kernel_vmap_range(io->vma_invalidate_address, |
122 | invalidate_kernel_vmap_range(io->vma_invalidate_address, | 122 | io->vma_invalidate_size); |
123 | io->vma_invalidate_size); | ||
124 | 123 | ||
125 | if (io->wait) | 124 | mempool_free(io, io->client->pool); |
126 | complete(io->wait); | 125 | fn(error_bits, context); |
126 | } | ||
127 | 127 | ||
128 | else { | 128 | static void dec_count(struct io *io, unsigned int region, int error) |
129 | unsigned long r = io->error_bits; | 129 | { |
130 | io_notify_fn fn = io->callback; | 130 | if (error) |
131 | void *context = io->context; | 131 | set_bit(region, &io->error_bits); |
132 | 132 | ||
133 | mempool_free(io, io->client->pool); | 133 | if (atomic_dec_and_test(&io->count)) |
134 | fn(r, context); | 134 | complete_io(io); |
135 | } | ||
136 | } | ||
137 | } | 135 | } |
138 | 136 | ||
139 | static void endio(struct bio *bio, int error) | 137 | static void endio(struct bio *bio, int error) |
@@ -376,41 +374,51 @@ static void dispatch_io(int rw, unsigned int num_regions, | |||
376 | dec_count(io, 0, 0); | 374 | dec_count(io, 0, 0); |
377 | } | 375 | } |
378 | 376 | ||
377 | struct sync_io { | ||
378 | unsigned long error_bits; | ||
379 | struct completion wait; | ||
380 | }; | ||
381 | |||
382 | static void sync_io_complete(unsigned long error, void *context) | ||
383 | { | ||
384 | struct sync_io *sio = context; | ||
385 | |||
386 | sio->error_bits = error; | ||
387 | complete(&sio->wait); | ||
388 | } | ||
389 | |||
379 | static int sync_io(struct dm_io_client *client, unsigned int num_regions, | 390 | static int sync_io(struct dm_io_client *client, unsigned int num_regions, |
380 | struct dm_io_region *where, int rw, struct dpages *dp, | 391 | struct dm_io_region *where, int rw, struct dpages *dp, |
381 | unsigned long *error_bits) | 392 | unsigned long *error_bits) |
382 | { | 393 | { |
383 | /* | 394 | struct io *io; |
384 | * gcc <= 4.3 can't do the alignment for stack variables, so we must | 395 | struct sync_io sio; |
385 | * align it on our own. | ||
386 | * volatile prevents the optimizer from removing or reusing | ||
387 | * "io_" field from the stack frame (allowed in ANSI C). | ||
388 | */ | ||
389 | volatile char io_[sizeof(struct io) + __alignof__(struct io) - 1]; | ||
390 | struct io *io = (struct io *)PTR_ALIGN(&io_, __alignof__(struct io)); | ||
391 | DECLARE_COMPLETION_ONSTACK(wait); | ||
392 | 396 | ||
393 | if (num_regions > 1 && (rw & RW_MASK) != WRITE) { | 397 | if (num_regions > 1 && (rw & RW_MASK) != WRITE) { |
394 | WARN_ON(1); | 398 | WARN_ON(1); |
395 | return -EIO; | 399 | return -EIO; |
396 | } | 400 | } |
397 | 401 | ||
402 | init_completion(&sio.wait); | ||
403 | |||
404 | io = mempool_alloc(client->pool, GFP_NOIO); | ||
398 | io->error_bits = 0; | 405 | io->error_bits = 0; |
399 | atomic_set(&io->count, 1); /* see dispatch_io() */ | 406 | atomic_set(&io->count, 1); /* see dispatch_io() */ |
400 | io->wait = &wait; | ||
401 | io->client = client; | 407 | io->client = client; |
408 | io->callback = sync_io_complete; | ||
409 | io->context = &sio; | ||
402 | 410 | ||
403 | io->vma_invalidate_address = dp->vma_invalidate_address; | 411 | io->vma_invalidate_address = dp->vma_invalidate_address; |
404 | io->vma_invalidate_size = dp->vma_invalidate_size; | 412 | io->vma_invalidate_size = dp->vma_invalidate_size; |
405 | 413 | ||
406 | dispatch_io(rw, num_regions, where, dp, io, 1); | 414 | dispatch_io(rw, num_regions, where, dp, io, 1); |
407 | 415 | ||
408 | wait_for_completion_io(&wait); | 416 | wait_for_completion_io(&sio.wait); |
409 | 417 | ||
410 | if (error_bits) | 418 | if (error_bits) |
411 | *error_bits = io->error_bits; | 419 | *error_bits = sio.error_bits; |
412 | 420 | ||
413 | return io->error_bits ? -EIO : 0; | 421 | return sio.error_bits ? -EIO : 0; |
414 | } | 422 | } |
415 | 423 | ||
416 | static int async_io(struct dm_io_client *client, unsigned int num_regions, | 424 | static int async_io(struct dm_io_client *client, unsigned int num_regions, |
@@ -428,7 +436,6 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions, | |||
428 | io = mempool_alloc(client->pool, GFP_NOIO); | 436 | io = mempool_alloc(client->pool, GFP_NOIO); |
429 | io->error_bits = 0; | 437 | io->error_bits = 0; |
430 | atomic_set(&io->count, 1); /* see dispatch_io() */ | 438 | atomic_set(&io->count, 1); /* see dispatch_io() */ |
431 | io->wait = NULL; | ||
432 | io->client = client; | 439 | io->client = client; |
433 | io->callback = fn; | 440 | io->callback = fn; |
434 | io->context = context; | 441 | io->context = context; |
@@ -481,9 +488,9 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp, | |||
481 | * New collapsed (a)synchronous interface. | 488 | * New collapsed (a)synchronous interface. |
482 | * | 489 | * |
483 | * If the IO is asynchronous (i.e. it has notify.fn), you must either unplug | 490 | * If the IO is asynchronous (i.e. it has notify.fn), you must either unplug |
484 | * the queue with blk_unplug() some time later or set REQ_SYNC in | 491 | * the queue with blk_unplug() some time later or set REQ_SYNC in io_req->bi_rw. |
485 | io_req->bi_rw. If you fail to do one of these, the IO will be submitted to | 492 | * If you fail to do one of these, the IO will be submitted to the disk after |
486 | * the disk after q->unplug_delay, which defaults to 3ms in blk-settings.c. | 493 | * q->unplug_delay, which defaults to 3ms in blk-settings.c. |
487 | */ | 494 | */ |
488 | int dm_io(struct dm_io_request *io_req, unsigned num_regions, | 495 | int dm_io(struct dm_io_request *io_req, unsigned num_regions, |
489 | struct dm_io_region *where, unsigned long *sync_error_bits) | 496 | struct dm_io_region *where, unsigned long *sync_error_bits) |
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index f4167b013d99..833d7e752f06 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c | |||
@@ -373,8 +373,6 @@ static int __must_push_back(struct multipath *m) | |||
373 | dm_noflush_suspending(m->ti))); | 373 | dm_noflush_suspending(m->ti))); |
374 | } | 374 | } |
375 | 375 | ||
376 | #define pg_ready(m) (!(m)->queue_io && !(m)->pg_init_required) | ||
377 | |||
378 | /* | 376 | /* |
379 | * Map cloned requests | 377 | * Map cloned requests |
380 | */ | 378 | */ |
@@ -402,11 +400,11 @@ static int multipath_map(struct dm_target *ti, struct request *clone, | |||
402 | if (!__must_push_back(m)) | 400 | if (!__must_push_back(m)) |
403 | r = -EIO; /* Failed */ | 401 | r = -EIO; /* Failed */ |
404 | goto out_unlock; | 402 | goto out_unlock; |
405 | } | 403 | } else if (m->queue_io || m->pg_init_required) { |
406 | if (!pg_ready(m)) { | ||
407 | __pg_init_all_paths(m); | 404 | __pg_init_all_paths(m); |
408 | goto out_unlock; | 405 | goto out_unlock; |
409 | } | 406 | } |
407 | |||
410 | if (set_mapinfo(m, map_context) < 0) | 408 | if (set_mapinfo(m, map_context) < 0) |
411 | /* ENOMEM, requeue */ | 409 | /* ENOMEM, requeue */ |
412 | goto out_unlock; | 410 | goto out_unlock; |
diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c index 09a688b3d48c..50fca469cafd 100644 --- a/drivers/md/dm-switch.c +++ b/drivers/md/dm-switch.c | |||
@@ -137,13 +137,23 @@ static void switch_get_position(struct switch_ctx *sctx, unsigned long region_nr | |||
137 | *bit *= sctx->region_table_entry_bits; | 137 | *bit *= sctx->region_table_entry_bits; |
138 | } | 138 | } |
139 | 139 | ||
140 | static unsigned switch_region_table_read(struct switch_ctx *sctx, unsigned long region_nr) | ||
141 | { | ||
142 | unsigned long region_index; | ||
143 | unsigned bit; | ||
144 | |||
145 | switch_get_position(sctx, region_nr, ®ion_index, &bit); | ||
146 | |||
147 | return (ACCESS_ONCE(sctx->region_table[region_index]) >> bit) & | ||
148 | ((1 << sctx->region_table_entry_bits) - 1); | ||
149 | } | ||
150 | |||
140 | /* | 151 | /* |
141 | * Find which path to use at given offset. | 152 | * Find which path to use at given offset. |
142 | */ | 153 | */ |
143 | static unsigned switch_get_path_nr(struct switch_ctx *sctx, sector_t offset) | 154 | static unsigned switch_get_path_nr(struct switch_ctx *sctx, sector_t offset) |
144 | { | 155 | { |
145 | unsigned long region_index; | 156 | unsigned path_nr; |
146 | unsigned bit, path_nr; | ||
147 | sector_t p; | 157 | sector_t p; |
148 | 158 | ||
149 | p = offset; | 159 | p = offset; |
@@ -152,9 +162,7 @@ static unsigned switch_get_path_nr(struct switch_ctx *sctx, sector_t offset) | |||
152 | else | 162 | else |
153 | sector_div(p, sctx->region_size); | 163 | sector_div(p, sctx->region_size); |
154 | 164 | ||
155 | switch_get_position(sctx, p, ®ion_index, &bit); | 165 | path_nr = switch_region_table_read(sctx, p); |
156 | path_nr = (ACCESS_ONCE(sctx->region_table[region_index]) >> bit) & | ||
157 | ((1 << sctx->region_table_entry_bits) - 1); | ||
158 | 166 | ||
159 | /* This can only happen if the processor uses non-atomic stores. */ | 167 | /* This can only happen if the processor uses non-atomic stores. */ |
160 | if (unlikely(path_nr >= sctx->nr_paths)) | 168 | if (unlikely(path_nr >= sctx->nr_paths)) |
@@ -363,7 +371,7 @@ static __always_inline unsigned long parse_hex(const char **string) | |||
363 | } | 371 | } |
364 | 372 | ||
365 | static int process_set_region_mappings(struct switch_ctx *sctx, | 373 | static int process_set_region_mappings(struct switch_ctx *sctx, |
366 | unsigned argc, char **argv) | 374 | unsigned argc, char **argv) |
367 | { | 375 | { |
368 | unsigned i; | 376 | unsigned i; |
369 | unsigned long region_index = 0; | 377 | unsigned long region_index = 0; |
@@ -372,6 +380,51 @@ static int process_set_region_mappings(struct switch_ctx *sctx, | |||
372 | unsigned long path_nr; | 380 | unsigned long path_nr; |
373 | const char *string = argv[i]; | 381 | const char *string = argv[i]; |
374 | 382 | ||
383 | if ((*string & 0xdf) == 'R') { | ||
384 | unsigned long cycle_length, num_write; | ||
385 | |||
386 | string++; | ||
387 | if (unlikely(*string == ',')) { | ||
388 | DMWARN("invalid set_region_mappings argument: '%s'", argv[i]); | ||
389 | return -EINVAL; | ||
390 | } | ||
391 | cycle_length = parse_hex(&string); | ||
392 | if (unlikely(*string != ',')) { | ||
393 | DMWARN("invalid set_region_mappings argument: '%s'", argv[i]); | ||
394 | return -EINVAL; | ||
395 | } | ||
396 | string++; | ||
397 | if (unlikely(!*string)) { | ||
398 | DMWARN("invalid set_region_mappings argument: '%s'", argv[i]); | ||
399 | return -EINVAL; | ||
400 | } | ||
401 | num_write = parse_hex(&string); | ||
402 | if (unlikely(*string)) { | ||
403 | DMWARN("invalid set_region_mappings argument: '%s'", argv[i]); | ||
404 | return -EINVAL; | ||
405 | } | ||
406 | |||
407 | if (unlikely(!cycle_length) || unlikely(cycle_length - 1 > region_index)) { | ||
408 | DMWARN("invalid set_region_mappings cycle length: %lu > %lu", | ||
409 | cycle_length - 1, region_index); | ||
410 | return -EINVAL; | ||
411 | } | ||
412 | if (unlikely(region_index + num_write < region_index) || | ||
413 | unlikely(region_index + num_write >= sctx->nr_regions)) { | ||
414 | DMWARN("invalid set_region_mappings region number: %lu + %lu >= %lu", | ||
415 | region_index, num_write, sctx->nr_regions); | ||
416 | return -EINVAL; | ||
417 | } | ||
418 | |||
419 | while (num_write--) { | ||
420 | region_index++; | ||
421 | path_nr = switch_region_table_read(sctx, region_index - cycle_length); | ||
422 | switch_region_table_write(sctx, region_index, path_nr); | ||
423 | } | ||
424 | |||
425 | continue; | ||
426 | } | ||
427 | |||
375 | if (*string == ':') | 428 | if (*string == ':') |
376 | region_index++; | 429 | region_index++; |
377 | else { | 430 | else { |
@@ -500,7 +553,7 @@ static int switch_iterate_devices(struct dm_target *ti, | |||
500 | 553 | ||
501 | static struct target_type switch_target = { | 554 | static struct target_type switch_target = { |
502 | .name = "switch", | 555 | .name = "switch", |
503 | .version = {1, 0, 0}, | 556 | .version = {1, 1, 0}, |
504 | .module = THIS_MODULE, | 557 | .module = THIS_MODULE, |
505 | .ctr = switch_ctr, | 558 | .ctr = switch_ctr, |
506 | .dtr = switch_dtr, | 559 | .dtr = switch_dtr, |
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 5f59f1e3e5b1..f9c6cb8dbcf8 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c | |||
@@ -1386,6 +1386,14 @@ static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev, | |||
1386 | return q && !blk_queue_add_random(q); | 1386 | return q && !blk_queue_add_random(q); |
1387 | } | 1387 | } |
1388 | 1388 | ||
1389 | static int queue_supports_sg_merge(struct dm_target *ti, struct dm_dev *dev, | ||
1390 | sector_t start, sector_t len, void *data) | ||
1391 | { | ||
1392 | struct request_queue *q = bdev_get_queue(dev->bdev); | ||
1393 | |||
1394 | return q && !test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags); | ||
1395 | } | ||
1396 | |||
1389 | static bool dm_table_all_devices_attribute(struct dm_table *t, | 1397 | static bool dm_table_all_devices_attribute(struct dm_table *t, |
1390 | iterate_devices_callout_fn func) | 1398 | iterate_devices_callout_fn func) |
1391 | { | 1399 | { |
@@ -1430,6 +1438,43 @@ static bool dm_table_supports_write_same(struct dm_table *t) | |||
1430 | return true; | 1438 | return true; |
1431 | } | 1439 | } |
1432 | 1440 | ||
1441 | static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev, | ||
1442 | sector_t start, sector_t len, void *data) | ||
1443 | { | ||
1444 | struct request_queue *q = bdev_get_queue(dev->bdev); | ||
1445 | |||
1446 | return q && blk_queue_discard(q); | ||
1447 | } | ||
1448 | |||
1449 | static bool dm_table_supports_discards(struct dm_table *t) | ||
1450 | { | ||
1451 | struct dm_target *ti; | ||
1452 | unsigned i = 0; | ||
1453 | |||
1454 | /* | ||
1455 | * Unless any target used by the table set discards_supported, | ||
1456 | * require at least one underlying device to support discards. | ||
1457 | * t->devices includes internal dm devices such as mirror logs | ||
1458 | * so we need to use iterate_devices here, which targets | ||
1459 | * supporting discard selectively must provide. | ||
1460 | */ | ||
1461 | while (i < dm_table_get_num_targets(t)) { | ||
1462 | ti = dm_table_get_target(t, i++); | ||
1463 | |||
1464 | if (!ti->num_discard_bios) | ||
1465 | continue; | ||
1466 | |||
1467 | if (ti->discards_supported) | ||
1468 | return 1; | ||
1469 | |||
1470 | if (ti->type->iterate_devices && | ||
1471 | ti->type->iterate_devices(ti, device_discard_capable, NULL)) | ||
1472 | return 1; | ||
1473 | } | ||
1474 | |||
1475 | return 0; | ||
1476 | } | ||
1477 | |||
1433 | void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, | 1478 | void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, |
1434 | struct queue_limits *limits) | 1479 | struct queue_limits *limits) |
1435 | { | 1480 | { |
@@ -1464,6 +1509,11 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, | |||
1464 | if (!dm_table_supports_write_same(t)) | 1509 | if (!dm_table_supports_write_same(t)) |
1465 | q->limits.max_write_same_sectors = 0; | 1510 | q->limits.max_write_same_sectors = 0; |
1466 | 1511 | ||
1512 | if (dm_table_all_devices_attribute(t, queue_supports_sg_merge)) | ||
1513 | queue_flag_clear_unlocked(QUEUE_FLAG_NO_SG_MERGE, q); | ||
1514 | else | ||
1515 | queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q); | ||
1516 | |||
1467 | dm_table_set_integrity(t); | 1517 | dm_table_set_integrity(t); |
1468 | 1518 | ||
1469 | /* | 1519 | /* |
@@ -1636,39 +1686,3 @@ void dm_table_run_md_queue_async(struct dm_table *t) | |||
1636 | } | 1686 | } |
1637 | EXPORT_SYMBOL(dm_table_run_md_queue_async); | 1687 | EXPORT_SYMBOL(dm_table_run_md_queue_async); |
1638 | 1688 | ||
1639 | static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev, | ||
1640 | sector_t start, sector_t len, void *data) | ||
1641 | { | ||
1642 | struct request_queue *q = bdev_get_queue(dev->bdev); | ||
1643 | |||
1644 | return q && blk_queue_discard(q); | ||
1645 | } | ||
1646 | |||
1647 | bool dm_table_supports_discards(struct dm_table *t) | ||
1648 | { | ||
1649 | struct dm_target *ti; | ||
1650 | unsigned i = 0; | ||
1651 | |||
1652 | /* | ||
1653 | * Unless any target used by the table set discards_supported, | ||
1654 | * require at least one underlying device to support discards. | ||
1655 | * t->devices includes internal dm devices such as mirror logs | ||
1656 | * so we need to use iterate_devices here, which targets | ||
1657 | * supporting discard selectively must provide. | ||
1658 | */ | ||
1659 | while (i < dm_table_get_num_targets(t)) { | ||
1660 | ti = dm_table_get_target(t, i++); | ||
1661 | |||
1662 | if (!ti->num_discard_bios) | ||
1663 | continue; | ||
1664 | |||
1665 | if (ti->discards_supported) | ||
1666 | return 1; | ||
1667 | |||
1668 | if (ti->type->iterate_devices && | ||
1669 | ti->type->iterate_devices(ti, device_discard_capable, NULL)) | ||
1670 | return 1; | ||
1671 | } | ||
1672 | |||
1673 | return 0; | ||
1674 | } | ||
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index fc9c848a60c9..4843801173fe 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c | |||
@@ -227,6 +227,7 @@ struct thin_c { | |||
227 | struct list_head list; | 227 | struct list_head list; |
228 | struct dm_dev *pool_dev; | 228 | struct dm_dev *pool_dev; |
229 | struct dm_dev *origin_dev; | 229 | struct dm_dev *origin_dev; |
230 | sector_t origin_size; | ||
230 | dm_thin_id dev_id; | 231 | dm_thin_id dev_id; |
231 | 232 | ||
232 | struct pool *pool; | 233 | struct pool *pool; |
@@ -554,11 +555,16 @@ static void remap_and_issue(struct thin_c *tc, struct bio *bio, | |||
554 | struct dm_thin_new_mapping { | 555 | struct dm_thin_new_mapping { |
555 | struct list_head list; | 556 | struct list_head list; |
556 | 557 | ||
557 | bool quiesced:1; | ||
558 | bool prepared:1; | ||
559 | bool pass_discard:1; | 558 | bool pass_discard:1; |
560 | bool definitely_not_shared:1; | 559 | bool definitely_not_shared:1; |
561 | 560 | ||
561 | /* | ||
562 | * Track quiescing, copying and zeroing preparation actions. When this | ||
563 | * counter hits zero the block is prepared and can be inserted into the | ||
564 | * btree. | ||
565 | */ | ||
566 | atomic_t prepare_actions; | ||
567 | |||
562 | int err; | 568 | int err; |
563 | struct thin_c *tc; | 569 | struct thin_c *tc; |
564 | dm_block_t virt_block; | 570 | dm_block_t virt_block; |
@@ -575,43 +581,41 @@ struct dm_thin_new_mapping { | |||
575 | bio_end_io_t *saved_bi_end_io; | 581 | bio_end_io_t *saved_bi_end_io; |
576 | }; | 582 | }; |
577 | 583 | ||
578 | static void __maybe_add_mapping(struct dm_thin_new_mapping *m) | 584 | static void __complete_mapping_preparation(struct dm_thin_new_mapping *m) |
579 | { | 585 | { |
580 | struct pool *pool = m->tc->pool; | 586 | struct pool *pool = m->tc->pool; |
581 | 587 | ||
582 | if (m->quiesced && m->prepared) { | 588 | if (atomic_dec_and_test(&m->prepare_actions)) { |
583 | list_add_tail(&m->list, &pool->prepared_mappings); | 589 | list_add_tail(&m->list, &pool->prepared_mappings); |
584 | wake_worker(pool); | 590 | wake_worker(pool); |
585 | } | 591 | } |
586 | } | 592 | } |
587 | 593 | ||
588 | static void copy_complete(int read_err, unsigned long write_err, void *context) | 594 | static void complete_mapping_preparation(struct dm_thin_new_mapping *m) |
589 | { | 595 | { |
590 | unsigned long flags; | 596 | unsigned long flags; |
591 | struct dm_thin_new_mapping *m = context; | ||
592 | struct pool *pool = m->tc->pool; | 597 | struct pool *pool = m->tc->pool; |
593 | 598 | ||
594 | m->err = read_err || write_err ? -EIO : 0; | ||
595 | |||
596 | spin_lock_irqsave(&pool->lock, flags); | 599 | spin_lock_irqsave(&pool->lock, flags); |
597 | m->prepared = true; | 600 | __complete_mapping_preparation(m); |
598 | __maybe_add_mapping(m); | ||
599 | spin_unlock_irqrestore(&pool->lock, flags); | 601 | spin_unlock_irqrestore(&pool->lock, flags); |
600 | } | 602 | } |
601 | 603 | ||
604 | static void copy_complete(int read_err, unsigned long write_err, void *context) | ||
605 | { | ||
606 | struct dm_thin_new_mapping *m = context; | ||
607 | |||
608 | m->err = read_err || write_err ? -EIO : 0; | ||
609 | complete_mapping_preparation(m); | ||
610 | } | ||
611 | |||
602 | static void overwrite_endio(struct bio *bio, int err) | 612 | static void overwrite_endio(struct bio *bio, int err) |
603 | { | 613 | { |
604 | unsigned long flags; | ||
605 | struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); | 614 | struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); |
606 | struct dm_thin_new_mapping *m = h->overwrite_mapping; | 615 | struct dm_thin_new_mapping *m = h->overwrite_mapping; |
607 | struct pool *pool = m->tc->pool; | ||
608 | 616 | ||
609 | m->err = err; | 617 | m->err = err; |
610 | 618 | complete_mapping_preparation(m); | |
611 | spin_lock_irqsave(&pool->lock, flags); | ||
612 | m->prepared = true; | ||
613 | __maybe_add_mapping(m); | ||
614 | spin_unlock_irqrestore(&pool->lock, flags); | ||
615 | } | 619 | } |
616 | 620 | ||
617 | /*----------------------------------------------------------------*/ | 621 | /*----------------------------------------------------------------*/ |
@@ -821,10 +825,31 @@ static struct dm_thin_new_mapping *get_next_mapping(struct pool *pool) | |||
821 | return m; | 825 | return m; |
822 | } | 826 | } |
823 | 827 | ||
828 | static void ll_zero(struct thin_c *tc, struct dm_thin_new_mapping *m, | ||
829 | sector_t begin, sector_t end) | ||
830 | { | ||
831 | int r; | ||
832 | struct dm_io_region to; | ||
833 | |||
834 | to.bdev = tc->pool_dev->bdev; | ||
835 | to.sector = begin; | ||
836 | to.count = end - begin; | ||
837 | |||
838 | r = dm_kcopyd_zero(tc->pool->copier, 1, &to, 0, copy_complete, m); | ||
839 | if (r < 0) { | ||
840 | DMERR_LIMIT("dm_kcopyd_zero() failed"); | ||
841 | copy_complete(1, 1, m); | ||
842 | } | ||
843 | } | ||
844 | |||
845 | /* | ||
846 | * A partial copy also needs to zero the uncopied region. | ||
847 | */ | ||
824 | static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, | 848 | static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, |
825 | struct dm_dev *origin, dm_block_t data_origin, | 849 | struct dm_dev *origin, dm_block_t data_origin, |
826 | dm_block_t data_dest, | 850 | dm_block_t data_dest, |
827 | struct dm_bio_prison_cell *cell, struct bio *bio) | 851 | struct dm_bio_prison_cell *cell, struct bio *bio, |
852 | sector_t len) | ||
828 | { | 853 | { |
829 | int r; | 854 | int r; |
830 | struct pool *pool = tc->pool; | 855 | struct pool *pool = tc->pool; |
@@ -835,8 +860,15 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, | |||
835 | m->data_block = data_dest; | 860 | m->data_block = data_dest; |
836 | m->cell = cell; | 861 | m->cell = cell; |
837 | 862 | ||
863 | /* | ||
864 | * quiesce action + copy action + an extra reference held for the | ||
865 | * duration of this function (we may need to inc later for a | ||
866 | * partial zero). | ||
867 | */ | ||
868 | atomic_set(&m->prepare_actions, 3); | ||
869 | |||
838 | if (!dm_deferred_set_add_work(pool->shared_read_ds, &m->list)) | 870 | if (!dm_deferred_set_add_work(pool->shared_read_ds, &m->list)) |
839 | m->quiesced = true; | 871 | complete_mapping_preparation(m); /* already quiesced */ |
840 | 872 | ||
841 | /* | 873 | /* |
842 | * IO to pool_dev remaps to the pool target's data_dev. | 874 | * IO to pool_dev remaps to the pool target's data_dev. |
@@ -857,20 +889,38 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, | |||
857 | 889 | ||
858 | from.bdev = origin->bdev; | 890 | from.bdev = origin->bdev; |
859 | from.sector = data_origin * pool->sectors_per_block; | 891 | from.sector = data_origin * pool->sectors_per_block; |
860 | from.count = pool->sectors_per_block; | 892 | from.count = len; |
861 | 893 | ||
862 | to.bdev = tc->pool_dev->bdev; | 894 | to.bdev = tc->pool_dev->bdev; |
863 | to.sector = data_dest * pool->sectors_per_block; | 895 | to.sector = data_dest * pool->sectors_per_block; |
864 | to.count = pool->sectors_per_block; | 896 | to.count = len; |
865 | 897 | ||
866 | r = dm_kcopyd_copy(pool->copier, &from, 1, &to, | 898 | r = dm_kcopyd_copy(pool->copier, &from, 1, &to, |
867 | 0, copy_complete, m); | 899 | 0, copy_complete, m); |
868 | if (r < 0) { | 900 | if (r < 0) { |
869 | mempool_free(m, pool->mapping_pool); | ||
870 | DMERR_LIMIT("dm_kcopyd_copy() failed"); | 901 | DMERR_LIMIT("dm_kcopyd_copy() failed"); |
871 | cell_error(pool, cell); | 902 | copy_complete(1, 1, m); |
903 | |||
904 | /* | ||
905 | * We allow the zero to be issued, to simplify the | ||
906 | * error path. Otherwise we'd need to start | ||
907 | * worrying about decrementing the prepare_actions | ||
908 | * counter. | ||
909 | */ | ||
910 | } | ||
911 | |||
912 | /* | ||
913 | * Do we need to zero a tail region? | ||
914 | */ | ||
915 | if (len < pool->sectors_per_block && pool->pf.zero_new_blocks) { | ||
916 | atomic_inc(&m->prepare_actions); | ||
917 | ll_zero(tc, m, | ||
918 | data_dest * pool->sectors_per_block + len, | ||
919 | (data_dest + 1) * pool->sectors_per_block); | ||
872 | } | 920 | } |
873 | } | 921 | } |
922 | |||
923 | complete_mapping_preparation(m); /* drop our ref */ | ||
874 | } | 924 | } |
875 | 925 | ||
876 | static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block, | 926 | static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block, |
@@ -878,15 +928,8 @@ static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block, | |||
878 | struct dm_bio_prison_cell *cell, struct bio *bio) | 928 | struct dm_bio_prison_cell *cell, struct bio *bio) |
879 | { | 929 | { |
880 | schedule_copy(tc, virt_block, tc->pool_dev, | 930 | schedule_copy(tc, virt_block, tc->pool_dev, |
881 | data_origin, data_dest, cell, bio); | 931 | data_origin, data_dest, cell, bio, |
882 | } | 932 | tc->pool->sectors_per_block); |
883 | |||
884 | static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block, | ||
885 | dm_block_t data_dest, | ||
886 | struct dm_bio_prison_cell *cell, struct bio *bio) | ||
887 | { | ||
888 | schedule_copy(tc, virt_block, tc->origin_dev, | ||
889 | virt_block, data_dest, cell, bio); | ||
890 | } | 933 | } |
891 | 934 | ||
892 | static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, | 935 | static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, |
@@ -896,8 +939,7 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, | |||
896 | struct pool *pool = tc->pool; | 939 | struct pool *pool = tc->pool; |
897 | struct dm_thin_new_mapping *m = get_next_mapping(pool); | 940 | struct dm_thin_new_mapping *m = get_next_mapping(pool); |
898 | 941 | ||
899 | m->quiesced = true; | 942 | atomic_set(&m->prepare_actions, 1); /* no need to quiesce */ |
900 | m->prepared = false; | ||
901 | m->tc = tc; | 943 | m->tc = tc; |
902 | m->virt_block = virt_block; | 944 | m->virt_block = virt_block; |
903 | m->data_block = data_block; | 945 | m->data_block = data_block; |
@@ -919,21 +961,33 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, | |||
919 | save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio); | 961 | save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio); |
920 | inc_all_io_entry(pool, bio); | 962 | inc_all_io_entry(pool, bio); |
921 | remap_and_issue(tc, bio, data_block); | 963 | remap_and_issue(tc, bio, data_block); |
922 | } else { | ||
923 | int r; | ||
924 | struct dm_io_region to; | ||
925 | 964 | ||
926 | to.bdev = tc->pool_dev->bdev; | 965 | } else |
927 | to.sector = data_block * pool->sectors_per_block; | 966 | ll_zero(tc, m, |
928 | to.count = pool->sectors_per_block; | 967 | data_block * pool->sectors_per_block, |
968 | (data_block + 1) * pool->sectors_per_block); | ||
969 | } | ||
929 | 970 | ||
930 | r = dm_kcopyd_zero(pool->copier, 1, &to, 0, copy_complete, m); | 971 | static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block, |
931 | if (r < 0) { | 972 | dm_block_t data_dest, |
932 | mempool_free(m, pool->mapping_pool); | 973 | struct dm_bio_prison_cell *cell, struct bio *bio) |
933 | DMERR_LIMIT("dm_kcopyd_zero() failed"); | 974 | { |
934 | cell_error(pool, cell); | 975 | struct pool *pool = tc->pool; |
935 | } | 976 | sector_t virt_block_begin = virt_block * pool->sectors_per_block; |
936 | } | 977 | sector_t virt_block_end = (virt_block + 1) * pool->sectors_per_block; |
978 | |||
979 | if (virt_block_end <= tc->origin_size) | ||
980 | schedule_copy(tc, virt_block, tc->origin_dev, | ||
981 | virt_block, data_dest, cell, bio, | ||
982 | pool->sectors_per_block); | ||
983 | |||
984 | else if (virt_block_begin < tc->origin_size) | ||
985 | schedule_copy(tc, virt_block, tc->origin_dev, | ||
986 | virt_block, data_dest, cell, bio, | ||
987 | tc->origin_size - virt_block_begin); | ||
988 | |||
989 | else | ||
990 | schedule_zero(tc, virt_block, data_dest, cell, bio); | ||
937 | } | 991 | } |
938 | 992 | ||
939 | /* | 993 | /* |
@@ -1315,7 +1369,18 @@ static void process_bio(struct thin_c *tc, struct bio *bio) | |||
1315 | inc_all_io_entry(pool, bio); | 1369 | inc_all_io_entry(pool, bio); |
1316 | cell_defer_no_holder(tc, cell); | 1370 | cell_defer_no_holder(tc, cell); |
1317 | 1371 | ||
1318 | remap_to_origin_and_issue(tc, bio); | 1372 | if (bio_end_sector(bio) <= tc->origin_size) |
1373 | remap_to_origin_and_issue(tc, bio); | ||
1374 | |||
1375 | else if (bio->bi_iter.bi_sector < tc->origin_size) { | ||
1376 | zero_fill_bio(bio); | ||
1377 | bio->bi_iter.bi_size = (tc->origin_size - bio->bi_iter.bi_sector) << SECTOR_SHIFT; | ||
1378 | remap_to_origin_and_issue(tc, bio); | ||
1379 | |||
1380 | } else { | ||
1381 | zero_fill_bio(bio); | ||
1382 | bio_endio(bio, 0); | ||
1383 | } | ||
1319 | } else | 1384 | } else |
1320 | provision_block(tc, bio, block, cell); | 1385 | provision_block(tc, bio, block, cell); |
1321 | break; | 1386 | break; |
@@ -3112,7 +3177,7 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) | |||
3112 | */ | 3177 | */ |
3113 | if (io_opt_sectors < pool->sectors_per_block || | 3178 | if (io_opt_sectors < pool->sectors_per_block || |
3114 | do_div(io_opt_sectors, pool->sectors_per_block)) { | 3179 | do_div(io_opt_sectors, pool->sectors_per_block)) { |
3115 | blk_limits_io_min(limits, 0); | 3180 | blk_limits_io_min(limits, pool->sectors_per_block << SECTOR_SHIFT); |
3116 | blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); | 3181 | blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); |
3117 | } | 3182 | } |
3118 | 3183 | ||
@@ -3141,7 +3206,7 @@ static struct target_type pool_target = { | |||
3141 | .name = "thin-pool", | 3206 | .name = "thin-pool", |
3142 | .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | | 3207 | .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | |
3143 | DM_TARGET_IMMUTABLE, | 3208 | DM_TARGET_IMMUTABLE, |
3144 | .version = {1, 12, 0}, | 3209 | .version = {1, 13, 0}, |
3145 | .module = THIS_MODULE, | 3210 | .module = THIS_MODULE, |
3146 | .ctr = pool_ctr, | 3211 | .ctr = pool_ctr, |
3147 | .dtr = pool_dtr, | 3212 | .dtr = pool_dtr, |
@@ -3361,8 +3426,7 @@ static int thin_endio(struct dm_target *ti, struct bio *bio, int err) | |||
3361 | spin_lock_irqsave(&pool->lock, flags); | 3426 | spin_lock_irqsave(&pool->lock, flags); |
3362 | list_for_each_entry_safe(m, tmp, &work, list) { | 3427 | list_for_each_entry_safe(m, tmp, &work, list) { |
3363 | list_del(&m->list); | 3428 | list_del(&m->list); |
3364 | m->quiesced = true; | 3429 | __complete_mapping_preparation(m); |
3365 | __maybe_add_mapping(m); | ||
3366 | } | 3430 | } |
3367 | spin_unlock_irqrestore(&pool->lock, flags); | 3431 | spin_unlock_irqrestore(&pool->lock, flags); |
3368 | } | 3432 | } |
@@ -3401,6 +3465,16 @@ static void thin_postsuspend(struct dm_target *ti) | |||
3401 | noflush_work(tc, do_noflush_stop); | 3465 | noflush_work(tc, do_noflush_stop); |
3402 | } | 3466 | } |
3403 | 3467 | ||
3468 | static int thin_preresume(struct dm_target *ti) | ||
3469 | { | ||
3470 | struct thin_c *tc = ti->private; | ||
3471 | |||
3472 | if (tc->origin_dev) | ||
3473 | tc->origin_size = get_dev_size(tc->origin_dev->bdev); | ||
3474 | |||
3475 | return 0; | ||
3476 | } | ||
3477 | |||
3404 | /* | 3478 | /* |
3405 | * <nr mapped sectors> <highest mapped sector> | 3479 | * <nr mapped sectors> <highest mapped sector> |
3406 | */ | 3480 | */ |
@@ -3483,12 +3557,13 @@ static int thin_iterate_devices(struct dm_target *ti, | |||
3483 | 3557 | ||
3484 | static struct target_type thin_target = { | 3558 | static struct target_type thin_target = { |
3485 | .name = "thin", | 3559 | .name = "thin", |
3486 | .version = {1, 12, 0}, | 3560 | .version = {1, 13, 0}, |
3487 | .module = THIS_MODULE, | 3561 | .module = THIS_MODULE, |
3488 | .ctr = thin_ctr, | 3562 | .ctr = thin_ctr, |
3489 | .dtr = thin_dtr, | 3563 | .dtr = thin_dtr, |
3490 | .map = thin_map, | 3564 | .map = thin_map, |
3491 | .end_io = thin_endio, | 3565 | .end_io = thin_endio, |
3566 | .preresume = thin_preresume, | ||
3492 | .presuspend = thin_presuspend, | 3567 | .presuspend = thin_presuspend, |
3493 | .postsuspend = thin_postsuspend, | 3568 | .postsuspend = thin_postsuspend, |
3494 | .status = thin_status, | 3569 | .status = thin_status, |
diff --git a/drivers/md/dm.h b/drivers/md/dm.h index ed76126aac54..e81d2152fa68 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h | |||
@@ -72,7 +72,6 @@ int dm_table_any_busy_target(struct dm_table *t); | |||
72 | unsigned dm_table_get_type(struct dm_table *t); | 72 | unsigned dm_table_get_type(struct dm_table *t); |
73 | struct target_type *dm_table_get_immutable_target_type(struct dm_table *t); | 73 | struct target_type *dm_table_get_immutable_target_type(struct dm_table *t); |
74 | bool dm_table_request_based(struct dm_table *t); | 74 | bool dm_table_request_based(struct dm_table *t); |
75 | bool dm_table_supports_discards(struct dm_table *t); | ||
76 | void dm_table_free_md_mempools(struct dm_table *t); | 75 | void dm_table_free_md_mempools(struct dm_table *t); |
77 | struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t); | 76 | struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t); |
78 | 77 | ||