diff options
| -rw-r--r-- | Documentation/device-mapper/era.txt | 108 | ||||
| -rw-r--r-- | drivers/md/Kconfig | 11 | ||||
| -rw-r--r-- | drivers/md/Makefile | 2 | ||||
| -rw-r--r-- | drivers/md/dm-cache-block-types.h | 11 | ||||
| -rw-r--r-- | drivers/md/dm-cache-metadata.c | 132 | ||||
| -rw-r--r-- | drivers/md/dm-cache-metadata.h | 15 | ||||
| -rw-r--r-- | drivers/md/dm-cache-target.c | 131 | ||||
| -rw-r--r-- | drivers/md/dm-era-target.c | 1746 | ||||
| -rw-r--r-- | drivers/md/dm-mpath.c | 219 | ||||
| -rw-r--r-- | drivers/md/dm-table.c | 21 | ||||
| -rw-r--r-- | drivers/md/dm-thin-metadata.c | 80 | ||||
| -rw-r--r-- | drivers/md/dm-thin.c | 263 | ||||
| -rw-r--r-- | drivers/md/dm.c | 24 | ||||
| -rw-r--r-- | drivers/md/dm.h | 2 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-bitset.c | 10 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-bitset.h | 1 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-block-manager.c | 15 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-block-manager.h | 3 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-transaction-manager.c | 5 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-transaction-manager.h | 17 | ||||
| -rw-r--r-- | include/linux/device-mapper.h | 8 |
21 files changed, 2346 insertions, 478 deletions
diff --git a/Documentation/device-mapper/era.txt b/Documentation/device-mapper/era.txt new file mode 100644 index 000000000000..3c6d01be3560 --- /dev/null +++ b/Documentation/device-mapper/era.txt | |||
| @@ -0,0 +1,108 @@ | |||
| 1 | Introduction | ||
| 2 | ============ | ||
| 3 | |||
| 4 | dm-era is a target that behaves similar to the linear target. In | ||
| 5 | addition it keeps track of which blocks were written within a user | ||
| 6 | defined period of time called an 'era'. Each era target instance | ||
| 7 | maintains the current era as a monotonically increasing 32-bit | ||
| 8 | counter. | ||
| 9 | |||
| 10 | Use cases include tracking changed blocks for backup software, and | ||
| 11 | partially invalidating the contents of a cache to restore cache | ||
| 12 | coherency after rolling back a vendor snapshot. | ||
| 13 | |||
| 14 | Constructor | ||
| 15 | =========== | ||
| 16 | |||
| 17 | era <metadata dev> <origin dev> <block size> | ||
| 18 | |||
| 19 | metadata dev : fast device holding the persistent metadata | ||
| 20 | origin dev : device holding data blocks that may change | ||
| 21 | block size : block size of origin data device, granularity that is | ||
| 22 | tracked by the target | ||
| 23 | |||
| 24 | Messages | ||
| 25 | ======== | ||
| 26 | |||
| 27 | None of the dm messages take any arguments. | ||
| 28 | |||
| 29 | checkpoint | ||
| 30 | ---------- | ||
| 31 | |||
| 32 | Possibly move to a new era. You shouldn't assume the era has | ||
| 33 | incremented. After sending this message, you should check the | ||
| 34 | current era via the status line. | ||
| 35 | |||
| 36 | take_metadata_snap | ||
| 37 | ------------------ | ||
| 38 | |||
| 39 | Create a clone of the metadata, to allow a userland process to read it. | ||
| 40 | |||
| 41 | drop_metadata_snap | ||
| 42 | ------------------ | ||
| 43 | |||
| 44 | Drop the metadata snapshot. | ||
| 45 | |||
| 46 | Status | ||
| 47 | ====== | ||
| 48 | |||
| 49 | <metadata block size> <#used metadata blocks>/<#total metadata blocks> | ||
| 50 | <current era> <held metadata root | '-'> | ||
| 51 | |||
| 52 | metadata block size : Fixed block size for each metadata block in | ||
| 53 | sectors | ||
| 54 | #used metadata blocks : Number of metadata blocks used | ||
| 55 | #total metadata blocks : Total number of metadata blocks | ||
| 56 | current era : The current era | ||
| 57 | held metadata root : The location, in blocks, of the metadata root | ||
| 58 | that has been 'held' for userspace read | ||
| 59 | access. '-' indicates there is no held root | ||
| 60 | |||
| 61 | Detailed use case | ||
| 62 | ================= | ||
| 63 | |||
| 64 | The scenario of invalidating a cache when rolling back a vendor | ||
| 65 | snapshot was the primary use case when developing this target: | ||
| 66 | |||
| 67 | Taking a vendor snapshot | ||
| 68 | ------------------------ | ||
| 69 | |||
| 70 | - Send a checkpoint message to the era target | ||
| 71 | - Make a note of the current era in its status line | ||
| 72 | - Take vendor snapshot (the era and snapshot should be forever | ||
| 73 | associated now). | ||
| 74 | |||
| 75 | Rolling back to an vendor snapshot | ||
| 76 | ---------------------------------- | ||
| 77 | |||
| 78 | - Cache enters passthrough mode (see: dm-cache's docs in cache.txt) | ||
| 79 | - Rollback vendor storage | ||
| 80 | - Take metadata snapshot | ||
| 81 | - Ascertain which blocks have been written since the snapshot was taken | ||
| 82 | by checking each block's era | ||
| 83 | - Invalidate those blocks in the caching software | ||
| 84 | - Cache returns to writeback/writethrough mode | ||
| 85 | |||
| 86 | Memory usage | ||
| 87 | ============ | ||
| 88 | |||
| 89 | The target uses a bitset to record writes in the current era. It also | ||
| 90 | has a spare bitset ready for switching over to a new era. Other than | ||
| 91 | that it uses a few 4k blocks for updating metadata. | ||
| 92 | |||
| 93 | (4 * nr_blocks) bytes + buffers | ||
| 94 | |||
| 95 | Resilience | ||
| 96 | ========== | ||
| 97 | |||
| 98 | Metadata is updated on disk before a write to a previously unwritten | ||
| 99 | block is performed. As such dm-era should not be effected by a hard | ||
| 100 | crash such as power failure. | ||
| 101 | |||
| 102 | Userland tools | ||
| 103 | ============== | ||
| 104 | |||
| 105 | Userland tools are found in the increasingly poorly named | ||
| 106 | thin-provisioning-tools project: | ||
| 107 | |||
| 108 | https://github.com/jthornber/thin-provisioning-tools | ||
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 95ad936e6048..5bdedf6df153 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig | |||
| @@ -285,6 +285,17 @@ config DM_CACHE_CLEANER | |||
| 285 | A simple cache policy that writes back all data to the | 285 | A simple cache policy that writes back all data to the |
| 286 | origin. Used when decommissioning a dm-cache. | 286 | origin. Used when decommissioning a dm-cache. |
| 287 | 287 | ||
| 288 | config DM_ERA | ||
| 289 | tristate "Era target (EXPERIMENTAL)" | ||
| 290 | depends on BLK_DEV_DM | ||
| 291 | default n | ||
| 292 | select DM_PERSISTENT_DATA | ||
| 293 | select DM_BIO_PRISON | ||
| 294 | ---help--- | ||
| 295 | dm-era tracks which parts of a block device are written to | ||
| 296 | over time. Useful for maintaining cache coherency when using | ||
| 297 | vendor snapshots. | ||
| 298 | |||
| 288 | config DM_MIRROR | 299 | config DM_MIRROR |
| 289 | tristate "Mirror target" | 300 | tristate "Mirror target" |
| 290 | depends on BLK_DEV_DM | 301 | depends on BLK_DEV_DM |
diff --git a/drivers/md/Makefile b/drivers/md/Makefile index f26d83292579..a2da532b1c2b 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile | |||
| @@ -14,6 +14,7 @@ dm-thin-pool-y += dm-thin.o dm-thin-metadata.o | |||
| 14 | dm-cache-y += dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o | 14 | dm-cache-y += dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o |
| 15 | dm-cache-mq-y += dm-cache-policy-mq.o | 15 | dm-cache-mq-y += dm-cache-policy-mq.o |
| 16 | dm-cache-cleaner-y += dm-cache-policy-cleaner.o | 16 | dm-cache-cleaner-y += dm-cache-policy-cleaner.o |
| 17 | dm-era-y += dm-era-target.o | ||
| 17 | md-mod-y += md.o bitmap.o | 18 | md-mod-y += md.o bitmap.o |
| 18 | raid456-y += raid5.o | 19 | raid456-y += raid5.o |
| 19 | 20 | ||
| @@ -53,6 +54,7 @@ obj-$(CONFIG_DM_VERITY) += dm-verity.o | |||
| 53 | obj-$(CONFIG_DM_CACHE) += dm-cache.o | 54 | obj-$(CONFIG_DM_CACHE) += dm-cache.o |
| 54 | obj-$(CONFIG_DM_CACHE_MQ) += dm-cache-mq.o | 55 | obj-$(CONFIG_DM_CACHE_MQ) += dm-cache-mq.o |
| 55 | obj-$(CONFIG_DM_CACHE_CLEANER) += dm-cache-cleaner.o | 56 | obj-$(CONFIG_DM_CACHE_CLEANER) += dm-cache-cleaner.o |
| 57 | obj-$(CONFIG_DM_ERA) += dm-era.o | ||
| 56 | 58 | ||
| 57 | ifeq ($(CONFIG_DM_UEVENT),y) | 59 | ifeq ($(CONFIG_DM_UEVENT),y) |
| 58 | dm-mod-objs += dm-uevent.o | 60 | dm-mod-objs += dm-uevent.o |
diff --git a/drivers/md/dm-cache-block-types.h b/drivers/md/dm-cache-block-types.h index bed4ad4e1b7c..aac0e2df06be 100644 --- a/drivers/md/dm-cache-block-types.h +++ b/drivers/md/dm-cache-block-types.h | |||
| @@ -19,7 +19,6 @@ | |||
| 19 | 19 | ||
| 20 | typedef dm_block_t __bitwise__ dm_oblock_t; | 20 | typedef dm_block_t __bitwise__ dm_oblock_t; |
| 21 | typedef uint32_t __bitwise__ dm_cblock_t; | 21 | typedef uint32_t __bitwise__ dm_cblock_t; |
| 22 | typedef dm_block_t __bitwise__ dm_dblock_t; | ||
| 23 | 22 | ||
| 24 | static inline dm_oblock_t to_oblock(dm_block_t b) | 23 | static inline dm_oblock_t to_oblock(dm_block_t b) |
| 25 | { | 24 | { |
| @@ -41,14 +40,4 @@ static inline uint32_t from_cblock(dm_cblock_t b) | |||
| 41 | return (__force uint32_t) b; | 40 | return (__force uint32_t) b; |
| 42 | } | 41 | } |
| 43 | 42 | ||
| 44 | static inline dm_dblock_t to_dblock(dm_block_t b) | ||
| 45 | { | ||
| 46 | return (__force dm_dblock_t) b; | ||
| 47 | } | ||
| 48 | |||
| 49 | static inline dm_block_t from_dblock(dm_dblock_t b) | ||
| 50 | { | ||
| 51 | return (__force dm_block_t) b; | ||
| 52 | } | ||
| 53 | |||
| 54 | #endif /* DM_CACHE_BLOCK_TYPES_H */ | 43 | #endif /* DM_CACHE_BLOCK_TYPES_H */ |
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 9ef0752e8a08..4ead4ba60656 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c | |||
| @@ -109,7 +109,7 @@ struct dm_cache_metadata { | |||
| 109 | dm_block_t discard_root; | 109 | dm_block_t discard_root; |
| 110 | 110 | ||
| 111 | sector_t discard_block_size; | 111 | sector_t discard_block_size; |
| 112 | dm_dblock_t discard_nr_blocks; | 112 | dm_oblock_t discard_nr_blocks; |
| 113 | 113 | ||
| 114 | sector_t data_block_size; | 114 | sector_t data_block_size; |
| 115 | dm_cblock_t cache_blocks; | 115 | dm_cblock_t cache_blocks; |
| @@ -120,6 +120,12 @@ struct dm_cache_metadata { | |||
| 120 | unsigned policy_version[CACHE_POLICY_VERSION_SIZE]; | 120 | unsigned policy_version[CACHE_POLICY_VERSION_SIZE]; |
| 121 | size_t policy_hint_size; | 121 | size_t policy_hint_size; |
| 122 | struct dm_cache_statistics stats; | 122 | struct dm_cache_statistics stats; |
| 123 | |||
| 124 | /* | ||
| 125 | * Reading the space map root can fail, so we read it into this | ||
| 126 | * buffer before the superblock is locked and updated. | ||
| 127 | */ | ||
| 128 | __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; | ||
| 123 | }; | 129 | }; |
| 124 | 130 | ||
| 125 | /*------------------------------------------------------------------- | 131 | /*------------------------------------------------------------------- |
| @@ -260,11 +266,31 @@ static void __setup_mapping_info(struct dm_cache_metadata *cmd) | |||
| 260 | } | 266 | } |
| 261 | } | 267 | } |
| 262 | 268 | ||
| 269 | static int __save_sm_root(struct dm_cache_metadata *cmd) | ||
| 270 | { | ||
| 271 | int r; | ||
| 272 | size_t metadata_len; | ||
| 273 | |||
| 274 | r = dm_sm_root_size(cmd->metadata_sm, &metadata_len); | ||
| 275 | if (r < 0) | ||
| 276 | return r; | ||
| 277 | |||
| 278 | return dm_sm_copy_root(cmd->metadata_sm, &cmd->metadata_space_map_root, | ||
| 279 | metadata_len); | ||
| 280 | } | ||
| 281 | |||
| 282 | static void __copy_sm_root(struct dm_cache_metadata *cmd, | ||
| 283 | struct cache_disk_superblock *disk_super) | ||
| 284 | { | ||
| 285 | memcpy(&disk_super->metadata_space_map_root, | ||
| 286 | &cmd->metadata_space_map_root, | ||
| 287 | sizeof(cmd->metadata_space_map_root)); | ||
| 288 | } | ||
| 289 | |||
| 263 | static int __write_initial_superblock(struct dm_cache_metadata *cmd) | 290 | static int __write_initial_superblock(struct dm_cache_metadata *cmd) |
| 264 | { | 291 | { |
| 265 | int r; | 292 | int r; |
| 266 | struct dm_block *sblock; | 293 | struct dm_block *sblock; |
| 267 | size_t metadata_len; | ||
| 268 | struct cache_disk_superblock *disk_super; | 294 | struct cache_disk_superblock *disk_super; |
| 269 | sector_t bdev_size = i_size_read(cmd->bdev->bd_inode) >> SECTOR_SHIFT; | 295 | sector_t bdev_size = i_size_read(cmd->bdev->bd_inode) >> SECTOR_SHIFT; |
| 270 | 296 | ||
| @@ -272,12 +298,16 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd) | |||
| 272 | if (bdev_size > DM_CACHE_METADATA_MAX_SECTORS) | 298 | if (bdev_size > DM_CACHE_METADATA_MAX_SECTORS) |
| 273 | bdev_size = DM_CACHE_METADATA_MAX_SECTORS; | 299 | bdev_size = DM_CACHE_METADATA_MAX_SECTORS; |
| 274 | 300 | ||
| 275 | r = dm_sm_root_size(cmd->metadata_sm, &metadata_len); | 301 | r = dm_tm_pre_commit(cmd->tm); |
| 276 | if (r < 0) | 302 | if (r < 0) |
| 277 | return r; | 303 | return r; |
| 278 | 304 | ||
| 279 | r = dm_tm_pre_commit(cmd->tm); | 305 | /* |
| 280 | if (r < 0) | 306 | * dm_sm_copy_root() can fail. So we need to do it before we start |
| 307 | * updating the superblock. | ||
| 308 | */ | ||
| 309 | r = __save_sm_root(cmd); | ||
| 310 | if (r) | ||
| 281 | return r; | 311 | return r; |
| 282 | 312 | ||
| 283 | r = superblock_lock_zero(cmd, &sblock); | 313 | r = superblock_lock_zero(cmd, &sblock); |
| @@ -293,16 +323,13 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd) | |||
| 293 | memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version)); | 323 | memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version)); |
| 294 | disk_super->policy_hint_size = 0; | 324 | disk_super->policy_hint_size = 0; |
| 295 | 325 | ||
| 296 | r = dm_sm_copy_root(cmd->metadata_sm, &disk_super->metadata_space_map_root, | 326 | __copy_sm_root(cmd, disk_super); |
| 297 | metadata_len); | ||
| 298 | if (r < 0) | ||
| 299 | goto bad_locked; | ||
| 300 | 327 | ||
| 301 | disk_super->mapping_root = cpu_to_le64(cmd->root); | 328 | disk_super->mapping_root = cpu_to_le64(cmd->root); |
| 302 | disk_super->hint_root = cpu_to_le64(cmd->hint_root); | 329 | disk_super->hint_root = cpu_to_le64(cmd->hint_root); |
| 303 | disk_super->discard_root = cpu_to_le64(cmd->discard_root); | 330 | disk_super->discard_root = cpu_to_le64(cmd->discard_root); |
| 304 | disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size); | 331 | disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size); |
| 305 | disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks)); | 332 | disk_super->discard_nr_blocks = cpu_to_le64(from_oblock(cmd->discard_nr_blocks)); |
| 306 | disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); | 333 | disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); |
| 307 | disk_super->data_block_size = cpu_to_le32(cmd->data_block_size); | 334 | disk_super->data_block_size = cpu_to_le32(cmd->data_block_size); |
| 308 | disk_super->cache_blocks = cpu_to_le32(0); | 335 | disk_super->cache_blocks = cpu_to_le32(0); |
| @@ -313,10 +340,6 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd) | |||
| 313 | disk_super->write_misses = cpu_to_le32(0); | 340 | disk_super->write_misses = cpu_to_le32(0); |
| 314 | 341 | ||
| 315 | return dm_tm_commit(cmd->tm, sblock); | 342 | return dm_tm_commit(cmd->tm, sblock); |
| 316 | |||
| 317 | bad_locked: | ||
| 318 | dm_bm_unlock(sblock); | ||
| 319 | return r; | ||
| 320 | } | 343 | } |
| 321 | 344 | ||
| 322 | static int __format_metadata(struct dm_cache_metadata *cmd) | 345 | static int __format_metadata(struct dm_cache_metadata *cmd) |
| @@ -496,7 +519,7 @@ static void read_superblock_fields(struct dm_cache_metadata *cmd, | |||
| 496 | cmd->hint_root = le64_to_cpu(disk_super->hint_root); | 519 | cmd->hint_root = le64_to_cpu(disk_super->hint_root); |
| 497 | cmd->discard_root = le64_to_cpu(disk_super->discard_root); | 520 | cmd->discard_root = le64_to_cpu(disk_super->discard_root); |
| 498 | cmd->discard_block_size = le64_to_cpu(disk_super->discard_block_size); | 521 | cmd->discard_block_size = le64_to_cpu(disk_super->discard_block_size); |
| 499 | cmd->discard_nr_blocks = to_dblock(le64_to_cpu(disk_super->discard_nr_blocks)); | 522 | cmd->discard_nr_blocks = to_oblock(le64_to_cpu(disk_super->discard_nr_blocks)); |
| 500 | cmd->data_block_size = le32_to_cpu(disk_super->data_block_size); | 523 | cmd->data_block_size = le32_to_cpu(disk_super->data_block_size); |
| 501 | cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks)); | 524 | cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks)); |
| 502 | strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name)); | 525 | strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name)); |
| @@ -530,8 +553,9 @@ static int __begin_transaction_flags(struct dm_cache_metadata *cmd, | |||
| 530 | disk_super = dm_block_data(sblock); | 553 | disk_super = dm_block_data(sblock); |
| 531 | update_flags(disk_super, mutator); | 554 | update_flags(disk_super, mutator); |
| 532 | read_superblock_fields(cmd, disk_super); | 555 | read_superblock_fields(cmd, disk_super); |
| 556 | dm_bm_unlock(sblock); | ||
| 533 | 557 | ||
| 534 | return dm_bm_flush_and_unlock(cmd->bm, sblock); | 558 | return dm_bm_flush(cmd->bm); |
| 535 | } | 559 | } |
| 536 | 560 | ||
| 537 | static int __begin_transaction(struct dm_cache_metadata *cmd) | 561 | static int __begin_transaction(struct dm_cache_metadata *cmd) |
| @@ -559,7 +583,6 @@ static int __commit_transaction(struct dm_cache_metadata *cmd, | |||
| 559 | flags_mutator mutator) | 583 | flags_mutator mutator) |
| 560 | { | 584 | { |
| 561 | int r; | 585 | int r; |
| 562 | size_t metadata_len; | ||
| 563 | struct cache_disk_superblock *disk_super; | 586 | struct cache_disk_superblock *disk_super; |
| 564 | struct dm_block *sblock; | 587 | struct dm_block *sblock; |
| 565 | 588 | ||
| @@ -577,8 +600,8 @@ static int __commit_transaction(struct dm_cache_metadata *cmd, | |||
| 577 | if (r < 0) | 600 | if (r < 0) |
| 578 | return r; | 601 | return r; |
| 579 | 602 | ||
| 580 | r = dm_sm_root_size(cmd->metadata_sm, &metadata_len); | 603 | r = __save_sm_root(cmd); |
| 581 | if (r < 0) | 604 | if (r) |
| 582 | return r; | 605 | return r; |
| 583 | 606 | ||
| 584 | r = superblock_lock(cmd, &sblock); | 607 | r = superblock_lock(cmd, &sblock); |
| @@ -594,7 +617,7 @@ static int __commit_transaction(struct dm_cache_metadata *cmd, | |||
| 594 | disk_super->hint_root = cpu_to_le64(cmd->hint_root); | 617 | disk_super->hint_root = cpu_to_le64(cmd->hint_root); |
| 595 | disk_super->discard_root = cpu_to_le64(cmd->discard_root); | 618 | disk_super->discard_root = cpu_to_le64(cmd->discard_root); |
| 596 | disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size); | 619 | disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size); |
| 597 | disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks)); | 620 | disk_super->discard_nr_blocks = cpu_to_le64(from_oblock(cmd->discard_nr_blocks)); |
| 598 | disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks)); | 621 | disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks)); |
| 599 | strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name)); | 622 | strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name)); |
| 600 | disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]); | 623 | disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]); |
| @@ -605,13 +628,7 @@ static int __commit_transaction(struct dm_cache_metadata *cmd, | |||
| 605 | disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses); | 628 | disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses); |
| 606 | disk_super->write_hits = cpu_to_le32(cmd->stats.write_hits); | 629 | disk_super->write_hits = cpu_to_le32(cmd->stats.write_hits); |
| 607 | disk_super->write_misses = cpu_to_le32(cmd->stats.write_misses); | 630 | disk_super->write_misses = cpu_to_le32(cmd->stats.write_misses); |
| 608 | 631 | __copy_sm_root(cmd, disk_super); | |
| 609 | r = dm_sm_copy_root(cmd->metadata_sm, &disk_super->metadata_space_map_root, | ||
| 610 | metadata_len); | ||
| 611 | if (r < 0) { | ||
| 612 | dm_bm_unlock(sblock); | ||
| 613 | return r; | ||
| 614 | } | ||
| 615 | 632 | ||
| 616 | return dm_tm_commit(cmd->tm, sblock); | 633 | return dm_tm_commit(cmd->tm, sblock); |
| 617 | } | 634 | } |
| @@ -771,15 +788,15 @@ out: | |||
| 771 | 788 | ||
| 772 | int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd, | 789 | int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd, |
| 773 | sector_t discard_block_size, | 790 | sector_t discard_block_size, |
| 774 | dm_dblock_t new_nr_entries) | 791 | dm_oblock_t new_nr_entries) |
| 775 | { | 792 | { |
| 776 | int r; | 793 | int r; |
| 777 | 794 | ||
| 778 | down_write(&cmd->root_lock); | 795 | down_write(&cmd->root_lock); |
| 779 | r = dm_bitset_resize(&cmd->discard_info, | 796 | r = dm_bitset_resize(&cmd->discard_info, |
| 780 | cmd->discard_root, | 797 | cmd->discard_root, |
| 781 | from_dblock(cmd->discard_nr_blocks), | 798 | from_oblock(cmd->discard_nr_blocks), |
| 782 | from_dblock(new_nr_entries), | 799 | from_oblock(new_nr_entries), |
| 783 | false, &cmd->discard_root); | 800 | false, &cmd->discard_root); |
| 784 | if (!r) { | 801 | if (!r) { |
| 785 | cmd->discard_block_size = discard_block_size; | 802 | cmd->discard_block_size = discard_block_size; |
| @@ -792,28 +809,28 @@ int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd, | |||
| 792 | return r; | 809 | return r; |
| 793 | } | 810 | } |
| 794 | 811 | ||
| 795 | static int __set_discard(struct dm_cache_metadata *cmd, dm_dblock_t b) | 812 | static int __set_discard(struct dm_cache_metadata *cmd, dm_oblock_t b) |
| 796 | { | 813 | { |
| 797 | return dm_bitset_set_bit(&cmd->discard_info, cmd->discard_root, | 814 | return dm_bitset_set_bit(&cmd->discard_info, cmd->discard_root, |
| 798 | from_dblock(b), &cmd->discard_root); | 815 | from_oblock(b), &cmd->discard_root); |
| 799 | } | 816 | } |
| 800 | 817 | ||
| 801 | static int __clear_discard(struct dm_cache_metadata *cmd, dm_dblock_t b) | 818 | static int __clear_discard(struct dm_cache_metadata *cmd, dm_oblock_t b) |
| 802 | { | 819 | { |
| 803 | return dm_bitset_clear_bit(&cmd->discard_info, cmd->discard_root, | 820 | return dm_bitset_clear_bit(&cmd->discard_info, cmd->discard_root, |
| 804 | from_dblock(b), &cmd->discard_root); | 821 | from_oblock(b), &cmd->discard_root); |
| 805 | } | 822 | } |
| 806 | 823 | ||
| 807 | static int __is_discarded(struct dm_cache_metadata *cmd, dm_dblock_t b, | 824 | static int __is_discarded(struct dm_cache_metadata *cmd, dm_oblock_t b, |
| 808 | bool *is_discarded) | 825 | bool *is_discarded) |
| 809 | { | 826 | { |
| 810 | return dm_bitset_test_bit(&cmd->discard_info, cmd->discard_root, | 827 | return dm_bitset_test_bit(&cmd->discard_info, cmd->discard_root, |
| 811 | from_dblock(b), &cmd->discard_root, | 828 | from_oblock(b), &cmd->discard_root, |
| 812 | is_discarded); | 829 | is_discarded); |
| 813 | } | 830 | } |
| 814 | 831 | ||
| 815 | static int __discard(struct dm_cache_metadata *cmd, | 832 | static int __discard(struct dm_cache_metadata *cmd, |
| 816 | dm_dblock_t dblock, bool discard) | 833 | dm_oblock_t dblock, bool discard) |
| 817 | { | 834 | { |
| 818 | int r; | 835 | int r; |
| 819 | 836 | ||
| @@ -826,7 +843,7 @@ static int __discard(struct dm_cache_metadata *cmd, | |||
| 826 | } | 843 | } |
| 827 | 844 | ||
| 828 | int dm_cache_set_discard(struct dm_cache_metadata *cmd, | 845 | int dm_cache_set_discard(struct dm_cache_metadata *cmd, |
| 829 | dm_dblock_t dblock, bool discard) | 846 | dm_oblock_t dblock, bool discard) |
| 830 | { | 847 | { |
| 831 | int r; | 848 | int r; |
| 832 | 849 | ||
| @@ -844,8 +861,8 @@ static int __load_discards(struct dm_cache_metadata *cmd, | |||
| 844 | dm_block_t b; | 861 | dm_block_t b; |
| 845 | bool discard; | 862 | bool discard; |
| 846 | 863 | ||
| 847 | for (b = 0; b < from_dblock(cmd->discard_nr_blocks); b++) { | 864 | for (b = 0; b < from_oblock(cmd->discard_nr_blocks); b++) { |
| 848 | dm_dblock_t dblock = to_dblock(b); | 865 | dm_oblock_t dblock = to_oblock(b); |
| 849 | 866 | ||
| 850 | if (cmd->clean_when_opened) { | 867 | if (cmd->clean_when_opened) { |
| 851 | r = __is_discarded(cmd, dblock, &discard); | 868 | r = __is_discarded(cmd, dblock, &discard); |
| @@ -1228,22 +1245,12 @@ static int begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *po | |||
| 1228 | return 0; | 1245 | return 0; |
| 1229 | } | 1246 | } |
| 1230 | 1247 | ||
| 1231 | int dm_cache_begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy) | 1248 | static int save_hint(void *context, dm_cblock_t cblock, dm_oblock_t oblock, uint32_t hint) |
| 1232 | { | 1249 | { |
| 1250 | struct dm_cache_metadata *cmd = context; | ||
| 1251 | __le32 value = cpu_to_le32(hint); | ||
| 1233 | int r; | 1252 | int r; |
| 1234 | 1253 | ||
| 1235 | down_write(&cmd->root_lock); | ||
| 1236 | r = begin_hints(cmd, policy); | ||
| 1237 | up_write(&cmd->root_lock); | ||
| 1238 | |||
| 1239 | return r; | ||
| 1240 | } | ||
| 1241 | |||
| 1242 | static int save_hint(struct dm_cache_metadata *cmd, dm_cblock_t cblock, | ||
| 1243 | uint32_t hint) | ||
| 1244 | { | ||
| 1245 | int r; | ||
| 1246 | __le32 value = cpu_to_le32(hint); | ||
| 1247 | __dm_bless_for_disk(&value); | 1254 | __dm_bless_for_disk(&value); |
| 1248 | 1255 | ||
| 1249 | r = dm_array_set_value(&cmd->hint_info, cmd->hint_root, | 1256 | r = dm_array_set_value(&cmd->hint_info, cmd->hint_root, |
| @@ -1253,16 +1260,25 @@ static int save_hint(struct dm_cache_metadata *cmd, dm_cblock_t cblock, | |||
| 1253 | return r; | 1260 | return r; |
| 1254 | } | 1261 | } |
| 1255 | 1262 | ||
| 1256 | int dm_cache_save_hint(struct dm_cache_metadata *cmd, dm_cblock_t cblock, | 1263 | static int write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy) |
| 1257 | uint32_t hint) | ||
| 1258 | { | 1264 | { |
| 1259 | int r; | 1265 | int r; |
| 1260 | 1266 | ||
| 1261 | if (!hints_array_initialized(cmd)) | 1267 | r = begin_hints(cmd, policy); |
| 1262 | return 0; | 1268 | if (r) { |
| 1269 | DMERR("begin_hints failed"); | ||
| 1270 | return r; | ||
| 1271 | } | ||
| 1272 | |||
| 1273 | return policy_walk_mappings(policy, save_hint, cmd); | ||
| 1274 | } | ||
| 1275 | |||
| 1276 | int dm_cache_write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy) | ||
| 1277 | { | ||
| 1278 | int r; | ||
| 1263 | 1279 | ||
| 1264 | down_write(&cmd->root_lock); | 1280 | down_write(&cmd->root_lock); |
| 1265 | r = save_hint(cmd, cblock, hint); | 1281 | r = write_hints(cmd, policy); |
| 1266 | up_write(&cmd->root_lock); | 1282 | up_write(&cmd->root_lock); |
| 1267 | 1283 | ||
| 1268 | return r; | 1284 | return r; |
diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h index cd906f14f98d..cd70a78623a3 100644 --- a/drivers/md/dm-cache-metadata.h +++ b/drivers/md/dm-cache-metadata.h | |||
| @@ -72,14 +72,14 @@ dm_cblock_t dm_cache_size(struct dm_cache_metadata *cmd); | |||
| 72 | 72 | ||
| 73 | int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd, | 73 | int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd, |
| 74 | sector_t discard_block_size, | 74 | sector_t discard_block_size, |
| 75 | dm_dblock_t new_nr_entries); | 75 | dm_oblock_t new_nr_entries); |
| 76 | 76 | ||
| 77 | typedef int (*load_discard_fn)(void *context, sector_t discard_block_size, | 77 | typedef int (*load_discard_fn)(void *context, sector_t discard_block_size, |
| 78 | dm_dblock_t dblock, bool discarded); | 78 | dm_oblock_t dblock, bool discarded); |
| 79 | int dm_cache_load_discards(struct dm_cache_metadata *cmd, | 79 | int dm_cache_load_discards(struct dm_cache_metadata *cmd, |
| 80 | load_discard_fn fn, void *context); | 80 | load_discard_fn fn, void *context); |
| 81 | 81 | ||
| 82 | int dm_cache_set_discard(struct dm_cache_metadata *cmd, dm_dblock_t dblock, bool discard); | 82 | int dm_cache_set_discard(struct dm_cache_metadata *cmd, dm_oblock_t dblock, bool discard); |
| 83 | 83 | ||
| 84 | int dm_cache_remove_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock); | 84 | int dm_cache_remove_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock); |
| 85 | int dm_cache_insert_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock, dm_oblock_t oblock); | 85 | int dm_cache_insert_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock, dm_oblock_t oblock); |
| @@ -128,14 +128,7 @@ void dm_cache_dump(struct dm_cache_metadata *cmd); | |||
| 128 | * rather than querying the policy for each cblock, we let it walk its data | 128 | * rather than querying the policy for each cblock, we let it walk its data |
| 129 | * structures and fill in the hints in whatever order it wishes. | 129 | * structures and fill in the hints in whatever order it wishes. |
| 130 | */ | 130 | */ |
| 131 | 131 | int dm_cache_write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *p); | |
| 132 | int dm_cache_begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *p); | ||
| 133 | |||
| 134 | /* | ||
| 135 | * requests hints for every cblock and stores in the metadata device. | ||
| 136 | */ | ||
| 137 | int dm_cache_save_hint(struct dm_cache_metadata *cmd, | ||
| 138 | dm_cblock_t cblock, uint32_t hint); | ||
| 139 | 132 | ||
| 140 | /* | 133 | /* |
| 141 | * Query method. Are all the blocks in the cache clean? | 134 | * Query method. Are all the blocks in the cache clean? |
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 074b9c8e4cf0..1bf4a71919ec 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c | |||
| @@ -237,9 +237,8 @@ struct cache { | |||
| 237 | /* | 237 | /* |
| 238 | * origin_blocks entries, discarded if set. | 238 | * origin_blocks entries, discarded if set. |
| 239 | */ | 239 | */ |
| 240 | dm_dblock_t discard_nr_blocks; | 240 | dm_oblock_t discard_nr_blocks; |
| 241 | unsigned long *discard_bitset; | 241 | unsigned long *discard_bitset; |
| 242 | uint32_t discard_block_size; /* a power of 2 times sectors per block */ | ||
| 243 | 242 | ||
| 244 | /* | 243 | /* |
| 245 | * Rather than reconstructing the table line for the status we just | 244 | * Rather than reconstructing the table line for the status we just |
| @@ -526,48 +525,33 @@ static dm_block_t block_div(dm_block_t b, uint32_t n) | |||
| 526 | return b; | 525 | return b; |
| 527 | } | 526 | } |
| 528 | 527 | ||
| 529 | static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock) | 528 | static void set_discard(struct cache *cache, dm_oblock_t b) |
| 530 | { | ||
| 531 | uint32_t discard_blocks = cache->discard_block_size; | ||
| 532 | dm_block_t b = from_oblock(oblock); | ||
| 533 | |||
| 534 | if (!block_size_is_power_of_two(cache)) | ||
| 535 | discard_blocks = discard_blocks / cache->sectors_per_block; | ||
| 536 | else | ||
| 537 | discard_blocks >>= cache->sectors_per_block_shift; | ||
| 538 | |||
| 539 | b = block_div(b, discard_blocks); | ||
| 540 | |||
| 541 | return to_dblock(b); | ||
| 542 | } | ||
| 543 | |||
| 544 | static void set_discard(struct cache *cache, dm_dblock_t b) | ||
| 545 | { | 529 | { |
| 546 | unsigned long flags; | 530 | unsigned long flags; |
| 547 | 531 | ||
| 548 | atomic_inc(&cache->stats.discard_count); | 532 | atomic_inc(&cache->stats.discard_count); |
| 549 | 533 | ||
| 550 | spin_lock_irqsave(&cache->lock, flags); | 534 | spin_lock_irqsave(&cache->lock, flags); |
| 551 | set_bit(from_dblock(b), cache->discard_bitset); | 535 | set_bit(from_oblock(b), cache->discard_bitset); |
| 552 | spin_unlock_irqrestore(&cache->lock, flags); | 536 | spin_unlock_irqrestore(&cache->lock, flags); |
| 553 | } | 537 | } |
| 554 | 538 | ||
| 555 | static void clear_discard(struct cache *cache, dm_dblock_t b) | 539 | static void clear_discard(struct cache *cache, dm_oblock_t b) |
| 556 | { | 540 | { |
| 557 | unsigned long flags; | 541 | unsigned long flags; |
| 558 | 542 | ||
| 559 | spin_lock_irqsave(&cache->lock, flags); | 543 | spin_lock_irqsave(&cache->lock, flags); |
| 560 | clear_bit(from_dblock(b), cache->discard_bitset); | 544 | clear_bit(from_oblock(b), cache->discard_bitset); |
| 561 | spin_unlock_irqrestore(&cache->lock, flags); | 545 | spin_unlock_irqrestore(&cache->lock, flags); |
| 562 | } | 546 | } |
| 563 | 547 | ||
| 564 | static bool is_discarded(struct cache *cache, dm_dblock_t b) | 548 | static bool is_discarded(struct cache *cache, dm_oblock_t b) |
| 565 | { | 549 | { |
| 566 | int r; | 550 | int r; |
| 567 | unsigned long flags; | 551 | unsigned long flags; |
| 568 | 552 | ||
| 569 | spin_lock_irqsave(&cache->lock, flags); | 553 | spin_lock_irqsave(&cache->lock, flags); |
| 570 | r = test_bit(from_dblock(b), cache->discard_bitset); | 554 | r = test_bit(from_oblock(b), cache->discard_bitset); |
| 571 | spin_unlock_irqrestore(&cache->lock, flags); | 555 | spin_unlock_irqrestore(&cache->lock, flags); |
| 572 | 556 | ||
| 573 | return r; | 557 | return r; |
| @@ -579,8 +563,7 @@ static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b) | |||
| 579 | unsigned long flags; | 563 | unsigned long flags; |
| 580 | 564 | ||
| 581 | spin_lock_irqsave(&cache->lock, flags); | 565 | spin_lock_irqsave(&cache->lock, flags); |
| 582 | r = test_bit(from_dblock(oblock_to_dblock(cache, b)), | 566 | r = test_bit(from_oblock(b), cache->discard_bitset); |
| 583 | cache->discard_bitset); | ||
| 584 | spin_unlock_irqrestore(&cache->lock, flags); | 567 | spin_unlock_irqrestore(&cache->lock, flags); |
| 585 | 568 | ||
| 586 | return r; | 569 | return r; |
| @@ -705,7 +688,7 @@ static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio, | |||
| 705 | check_if_tick_bio_needed(cache, bio); | 688 | check_if_tick_bio_needed(cache, bio); |
| 706 | remap_to_origin(cache, bio); | 689 | remap_to_origin(cache, bio); |
| 707 | if (bio_data_dir(bio) == WRITE) | 690 | if (bio_data_dir(bio) == WRITE) |
| 708 | clear_discard(cache, oblock_to_dblock(cache, oblock)); | 691 | clear_discard(cache, oblock); |
| 709 | } | 692 | } |
| 710 | 693 | ||
| 711 | static void remap_to_cache_dirty(struct cache *cache, struct bio *bio, | 694 | static void remap_to_cache_dirty(struct cache *cache, struct bio *bio, |
| @@ -715,7 +698,7 @@ static void remap_to_cache_dirty(struct cache *cache, struct bio *bio, | |||
| 715 | remap_to_cache(cache, bio, cblock); | 698 | remap_to_cache(cache, bio, cblock); |
| 716 | if (bio_data_dir(bio) == WRITE) { | 699 | if (bio_data_dir(bio) == WRITE) { |
| 717 | set_dirty(cache, oblock, cblock); | 700 | set_dirty(cache, oblock, cblock); |
| 718 | clear_discard(cache, oblock_to_dblock(cache, oblock)); | 701 | clear_discard(cache, oblock); |
| 719 | } | 702 | } |
| 720 | } | 703 | } |
| 721 | 704 | ||
| @@ -1288,14 +1271,14 @@ static void process_flush_bio(struct cache *cache, struct bio *bio) | |||
| 1288 | static void process_discard_bio(struct cache *cache, struct bio *bio) | 1271 | static void process_discard_bio(struct cache *cache, struct bio *bio) |
| 1289 | { | 1272 | { |
| 1290 | dm_block_t start_block = dm_sector_div_up(bio->bi_iter.bi_sector, | 1273 | dm_block_t start_block = dm_sector_div_up(bio->bi_iter.bi_sector, |
| 1291 | cache->discard_block_size); | 1274 | cache->sectors_per_block); |
| 1292 | dm_block_t end_block = bio_end_sector(bio); | 1275 | dm_block_t end_block = bio_end_sector(bio); |
| 1293 | dm_block_t b; | 1276 | dm_block_t b; |
| 1294 | 1277 | ||
| 1295 | end_block = block_div(end_block, cache->discard_block_size); | 1278 | end_block = block_div(end_block, cache->sectors_per_block); |
| 1296 | 1279 | ||
| 1297 | for (b = start_block; b < end_block; b++) | 1280 | for (b = start_block; b < end_block; b++) |
| 1298 | set_discard(cache, to_dblock(b)); | 1281 | set_discard(cache, to_oblock(b)); |
| 1299 | 1282 | ||
| 1300 | bio_endio(bio, 0); | 1283 | bio_endio(bio, 0); |
| 1301 | } | 1284 | } |
| @@ -2171,35 +2154,6 @@ static int create_cache_policy(struct cache *cache, struct cache_args *ca, | |||
| 2171 | return 0; | 2154 | return 0; |
| 2172 | } | 2155 | } |
| 2173 | 2156 | ||
| 2174 | /* | ||
| 2175 | * We want the discard block size to be a power of two, at least the size | ||
| 2176 | * of the cache block size, and have no more than 2^14 discard blocks | ||
| 2177 | * across the origin. | ||
| 2178 | */ | ||
| 2179 | #define MAX_DISCARD_BLOCKS (1 << 14) | ||
| 2180 | |||
| 2181 | static bool too_many_discard_blocks(sector_t discard_block_size, | ||
| 2182 | sector_t origin_size) | ||
| 2183 | { | ||
| 2184 | (void) sector_div(origin_size, discard_block_size); | ||
| 2185 | |||
| 2186 | return origin_size > MAX_DISCARD_BLOCKS; | ||
| 2187 | } | ||
| 2188 | |||
| 2189 | static sector_t calculate_discard_block_size(sector_t cache_block_size, | ||
| 2190 | sector_t origin_size) | ||
| 2191 | { | ||
| 2192 | sector_t discard_block_size; | ||
| 2193 | |||
| 2194 | discard_block_size = roundup_pow_of_two(cache_block_size); | ||
| 2195 | |||
| 2196 | if (origin_size) | ||
| 2197 | while (too_many_discard_blocks(discard_block_size, origin_size)) | ||
| 2198 | discard_block_size *= 2; | ||
| 2199 | |||
| 2200 | return discard_block_size; | ||
| 2201 | } | ||
| 2202 | |||
| 2203 | #define DEFAULT_MIGRATION_THRESHOLD 2048 | 2157 | #define DEFAULT_MIGRATION_THRESHOLD 2048 |
| 2204 | 2158 | ||
| 2205 | static int cache_create(struct cache_args *ca, struct cache **result) | 2159 | static int cache_create(struct cache_args *ca, struct cache **result) |
| @@ -2321,16 +2275,13 @@ static int cache_create(struct cache_args *ca, struct cache **result) | |||
| 2321 | } | 2275 | } |
| 2322 | clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size)); | 2276 | clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size)); |
| 2323 | 2277 | ||
| 2324 | cache->discard_block_size = | 2278 | cache->discard_nr_blocks = cache->origin_blocks; |
| 2325 | calculate_discard_block_size(cache->sectors_per_block, | 2279 | cache->discard_bitset = alloc_bitset(from_oblock(cache->discard_nr_blocks)); |
| 2326 | cache->origin_sectors); | ||
| 2327 | cache->discard_nr_blocks = oblock_to_dblock(cache, cache->origin_blocks); | ||
| 2328 | cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks)); | ||
| 2329 | if (!cache->discard_bitset) { | 2280 | if (!cache->discard_bitset) { |
| 2330 | *error = "could not allocate discard bitset"; | 2281 | *error = "could not allocate discard bitset"; |
| 2331 | goto bad; | 2282 | goto bad; |
| 2332 | } | 2283 | } |
| 2333 | clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks)); | 2284 | clear_bitset(cache->discard_bitset, from_oblock(cache->discard_nr_blocks)); |
| 2334 | 2285 | ||
| 2335 | cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle); | 2286 | cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle); |
| 2336 | if (IS_ERR(cache->copier)) { | 2287 | if (IS_ERR(cache->copier)) { |
| @@ -2614,16 +2565,16 @@ static int write_discard_bitset(struct cache *cache) | |||
| 2614 | { | 2565 | { |
| 2615 | unsigned i, r; | 2566 | unsigned i, r; |
| 2616 | 2567 | ||
| 2617 | r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size, | 2568 | r = dm_cache_discard_bitset_resize(cache->cmd, cache->sectors_per_block, |
| 2618 | cache->discard_nr_blocks); | 2569 | cache->origin_blocks); |
| 2619 | if (r) { | 2570 | if (r) { |
| 2620 | DMERR("could not resize on-disk discard bitset"); | 2571 | DMERR("could not resize on-disk discard bitset"); |
| 2621 | return r; | 2572 | return r; |
| 2622 | } | 2573 | } |
| 2623 | 2574 | ||
| 2624 | for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) { | 2575 | for (i = 0; i < from_oblock(cache->discard_nr_blocks); i++) { |
| 2625 | r = dm_cache_set_discard(cache->cmd, to_dblock(i), | 2576 | r = dm_cache_set_discard(cache->cmd, to_oblock(i), |
| 2626 | is_discarded(cache, to_dblock(i))); | 2577 | is_discarded(cache, to_oblock(i))); |
| 2627 | if (r) | 2578 | if (r) |
| 2628 | return r; | 2579 | return r; |
| 2629 | } | 2580 | } |
| @@ -2631,30 +2582,6 @@ static int write_discard_bitset(struct cache *cache) | |||
| 2631 | return 0; | 2582 | return 0; |
| 2632 | } | 2583 | } |
| 2633 | 2584 | ||
| 2634 | static int save_hint(void *context, dm_cblock_t cblock, dm_oblock_t oblock, | ||
| 2635 | uint32_t hint) | ||
| 2636 | { | ||
| 2637 | struct cache *cache = context; | ||
| 2638 | return dm_cache_save_hint(cache->cmd, cblock, hint); | ||
| 2639 | } | ||
| 2640 | |||
| 2641 | static int write_hints(struct cache *cache) | ||
| 2642 | { | ||
| 2643 | int r; | ||
| 2644 | |||
| 2645 | r = dm_cache_begin_hints(cache->cmd, cache->policy); | ||
| 2646 | if (r) { | ||
| 2647 | DMERR("dm_cache_begin_hints failed"); | ||
| 2648 | return r; | ||
| 2649 | } | ||
| 2650 | |||
| 2651 | r = policy_walk_mappings(cache->policy, save_hint, cache); | ||
| 2652 | if (r) | ||
| 2653 | DMERR("policy_walk_mappings failed"); | ||
| 2654 | |||
| 2655 | return r; | ||
| 2656 | } | ||
| 2657 | |||
| 2658 | /* | 2585 | /* |
| 2659 | * returns true on success | 2586 | * returns true on success |
| 2660 | */ | 2587 | */ |
| @@ -2672,7 +2599,7 @@ static bool sync_metadata(struct cache *cache) | |||
| 2672 | 2599 | ||
| 2673 | save_stats(cache); | 2600 | save_stats(cache); |
| 2674 | 2601 | ||
| 2675 | r3 = write_hints(cache); | 2602 | r3 = dm_cache_write_hints(cache->cmd, cache->policy); |
| 2676 | if (r3) | 2603 | if (r3) |
| 2677 | DMERR("could not write hints"); | 2604 | DMERR("could not write hints"); |
| 2678 | 2605 | ||
| @@ -2720,16 +2647,14 @@ static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock, | |||
| 2720 | } | 2647 | } |
| 2721 | 2648 | ||
| 2722 | static int load_discard(void *context, sector_t discard_block_size, | 2649 | static int load_discard(void *context, sector_t discard_block_size, |
| 2723 | dm_dblock_t dblock, bool discard) | 2650 | dm_oblock_t oblock, bool discard) |
| 2724 | { | 2651 | { |
| 2725 | struct cache *cache = context; | 2652 | struct cache *cache = context; |
| 2726 | 2653 | ||
| 2727 | /* FIXME: handle mis-matched block size */ | ||
| 2728 | |||
| 2729 | if (discard) | 2654 | if (discard) |
| 2730 | set_discard(cache, dblock); | 2655 | set_discard(cache, oblock); |
| 2731 | else | 2656 | else |
| 2732 | clear_discard(cache, dblock); | 2657 | clear_discard(cache, oblock); |
| 2733 | 2658 | ||
| 2734 | return 0; | 2659 | return 0; |
| 2735 | } | 2660 | } |
| @@ -3120,8 +3045,8 @@ static void set_discard_limits(struct cache *cache, struct queue_limits *limits) | |||
| 3120 | /* | 3045 | /* |
| 3121 | * FIXME: these limits may be incompatible with the cache device | 3046 | * FIXME: these limits may be incompatible with the cache device |
| 3122 | */ | 3047 | */ |
| 3123 | limits->max_discard_sectors = cache->discard_block_size * 1024; | 3048 | limits->max_discard_sectors = cache->sectors_per_block; |
| 3124 | limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT; | 3049 | limits->discard_granularity = cache->sectors_per_block << SECTOR_SHIFT; |
| 3125 | } | 3050 | } |
| 3126 | 3051 | ||
| 3127 | static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) | 3052 | static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) |
| @@ -3145,7 +3070,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) | |||
| 3145 | 3070 | ||
| 3146 | static struct target_type cache_target = { | 3071 | static struct target_type cache_target = { |
| 3147 | .name = "cache", | 3072 | .name = "cache", |
| 3148 | .version = {1, 3, 0}, | 3073 | .version = {1, 4, 0}, |
| 3149 | .module = THIS_MODULE, | 3074 | .module = THIS_MODULE, |
| 3150 | .ctr = cache_ctr, | 3075 | .ctr = cache_ctr, |
| 3151 | .dtr = cache_dtr, | 3076 | .dtr = cache_dtr, |
diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c new file mode 100644 index 000000000000..414dad4cb49b --- /dev/null +++ b/drivers/md/dm-era-target.c | |||
| @@ -0,0 +1,1746 @@ | |||
| 1 | #include "dm.h" | ||
| 2 | #include "persistent-data/dm-transaction-manager.h" | ||
| 3 | #include "persistent-data/dm-bitset.h" | ||
| 4 | #include "persistent-data/dm-space-map.h" | ||
| 5 | |||
| 6 | #include <linux/dm-io.h> | ||
| 7 | #include <linux/dm-kcopyd.h> | ||
| 8 | #include <linux/init.h> | ||
| 9 | #include <linux/mempool.h> | ||
| 10 | #include <linux/module.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/vmalloc.h> | ||
| 13 | |||
| 14 | #define DM_MSG_PREFIX "era" | ||
| 15 | |||
| 16 | #define SUPERBLOCK_LOCATION 0 | ||
| 17 | #define SUPERBLOCK_MAGIC 2126579579 | ||
| 18 | #define SUPERBLOCK_CSUM_XOR 146538381 | ||
| 19 | #define MIN_ERA_VERSION 1 | ||
| 20 | #define MAX_ERA_VERSION 1 | ||
| 21 | #define INVALID_WRITESET_ROOT SUPERBLOCK_LOCATION | ||
| 22 | #define MIN_BLOCK_SIZE 8 | ||
| 23 | |||
| 24 | /*---------------------------------------------------------------- | ||
| 25 | * Writeset | ||
| 26 | *--------------------------------------------------------------*/ | ||
| 27 | struct writeset_metadata { | ||
| 28 | uint32_t nr_bits; | ||
| 29 | dm_block_t root; | ||
| 30 | }; | ||
| 31 | |||
| 32 | struct writeset { | ||
| 33 | struct writeset_metadata md; | ||
| 34 | |||
| 35 | /* | ||
| 36 | * An in core copy of the bits to save constantly doing look ups on | ||
| 37 | * disk. | ||
| 38 | */ | ||
| 39 | unsigned long *bits; | ||
| 40 | }; | ||
| 41 | |||
| 42 | /* | ||
| 43 | * This does not free off the on disk bitset as this will normally be done | ||
| 44 | * after digesting into the era array. | ||
| 45 | */ | ||
| 46 | static void writeset_free(struct writeset *ws) | ||
| 47 | { | ||
| 48 | vfree(ws->bits); | ||
| 49 | } | ||
| 50 | |||
| 51 | static int setup_on_disk_bitset(struct dm_disk_bitset *info, | ||
| 52 | unsigned nr_bits, dm_block_t *root) | ||
| 53 | { | ||
| 54 | int r; | ||
| 55 | |||
| 56 | r = dm_bitset_empty(info, root); | ||
| 57 | if (r) | ||
| 58 | return r; | ||
| 59 | |||
| 60 | return dm_bitset_resize(info, *root, 0, nr_bits, false, root); | ||
| 61 | } | ||
| 62 | |||
| 63 | static size_t bitset_size(unsigned nr_bits) | ||
| 64 | { | ||
| 65 | return sizeof(unsigned long) * dm_div_up(nr_bits, BITS_PER_LONG); | ||
| 66 | } | ||
| 67 | |||
| 68 | /* | ||
| 69 | * Allocates memory for the in core bitset. | ||
| 70 | */ | ||
| 71 | static int writeset_alloc(struct writeset *ws, dm_block_t nr_blocks) | ||
| 72 | { | ||
| 73 | ws->md.nr_bits = nr_blocks; | ||
| 74 | ws->md.root = INVALID_WRITESET_ROOT; | ||
| 75 | ws->bits = vzalloc(bitset_size(nr_blocks)); | ||
| 76 | if (!ws->bits) { | ||
| 77 | DMERR("%s: couldn't allocate in memory bitset", __func__); | ||
| 78 | return -ENOMEM; | ||
| 79 | } | ||
| 80 | |||
| 81 | return 0; | ||
| 82 | } | ||
| 83 | |||
| 84 | /* | ||
| 85 | * Wipes the in-core bitset, and creates a new on disk bitset. | ||
| 86 | */ | ||
| 87 | static int writeset_init(struct dm_disk_bitset *info, struct writeset *ws) | ||
| 88 | { | ||
| 89 | int r; | ||
| 90 | |||
| 91 | memset(ws->bits, 0, bitset_size(ws->md.nr_bits)); | ||
| 92 | |||
| 93 | r = setup_on_disk_bitset(info, ws->md.nr_bits, &ws->md.root); | ||
| 94 | if (r) { | ||
| 95 | DMERR("%s: setup_on_disk_bitset failed", __func__); | ||
| 96 | return r; | ||
| 97 | } | ||
| 98 | |||
| 99 | return 0; | ||
| 100 | } | ||
| 101 | |||
| 102 | static bool writeset_marked(struct writeset *ws, dm_block_t block) | ||
| 103 | { | ||
| 104 | return test_bit(block, ws->bits); | ||
| 105 | } | ||
| 106 | |||
| 107 | static int writeset_marked_on_disk(struct dm_disk_bitset *info, | ||
| 108 | struct writeset_metadata *m, dm_block_t block, | ||
| 109 | bool *result) | ||
| 110 | { | ||
| 111 | dm_block_t old = m->root; | ||
| 112 | |||
| 113 | /* | ||
| 114 | * The bitset was flushed when it was archived, so we know there'll | ||
| 115 | * be no change to the root. | ||
| 116 | */ | ||
| 117 | int r = dm_bitset_test_bit(info, m->root, block, &m->root, result); | ||
| 118 | if (r) { | ||
| 119 | DMERR("%s: dm_bitset_test_bit failed", __func__); | ||
| 120 | return r; | ||
| 121 | } | ||
| 122 | |||
| 123 | BUG_ON(m->root != old); | ||
| 124 | |||
| 125 | return r; | ||
| 126 | } | ||
| 127 | |||
| 128 | /* | ||
| 129 | * Returns < 0 on error, 0 if the bit wasn't previously set, 1 if it was. | ||
| 130 | */ | ||
| 131 | static int writeset_test_and_set(struct dm_disk_bitset *info, | ||
| 132 | struct writeset *ws, uint32_t block) | ||
| 133 | { | ||
| 134 | int r; | ||
| 135 | |||
| 136 | if (!test_and_set_bit(block, ws->bits)) { | ||
| 137 | r = dm_bitset_set_bit(info, ws->md.root, block, &ws->md.root); | ||
| 138 | if (r) { | ||
| 139 | /* FIXME: fail mode */ | ||
| 140 | return r; | ||
| 141 | } | ||
| 142 | |||
| 143 | return 0; | ||
| 144 | } | ||
| 145 | |||
| 146 | return 1; | ||
| 147 | } | ||
| 148 | |||
| 149 | /*---------------------------------------------------------------- | ||
| 150 | * On disk metadata layout | ||
| 151 | *--------------------------------------------------------------*/ | ||
| 152 | #define SPACE_MAP_ROOT_SIZE 128 | ||
| 153 | #define UUID_LEN 16 | ||
| 154 | |||
| 155 | struct writeset_disk { | ||
| 156 | __le32 nr_bits; | ||
| 157 | __le64 root; | ||
| 158 | } __packed; | ||
| 159 | |||
| 160 | struct superblock_disk { | ||
| 161 | __le32 csum; | ||
| 162 | __le32 flags; | ||
| 163 | __le64 blocknr; | ||
| 164 | |||
| 165 | __u8 uuid[UUID_LEN]; | ||
| 166 | __le64 magic; | ||
| 167 | __le32 version; | ||
| 168 | |||
| 169 | __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; | ||
| 170 | |||
| 171 | __le32 data_block_size; | ||
| 172 | __le32 metadata_block_size; | ||
| 173 | __le32 nr_blocks; | ||
| 174 | |||
| 175 | __le32 current_era; | ||
| 176 | struct writeset_disk current_writeset; | ||
| 177 | |||
| 178 | /* | ||
| 179 | * Only these two fields are valid within the metadata snapshot. | ||
| 180 | */ | ||
| 181 | __le64 writeset_tree_root; | ||
| 182 | __le64 era_array_root; | ||
| 183 | |||
| 184 | __le64 metadata_snap; | ||
| 185 | } __packed; | ||
| 186 | |||
| 187 | /*---------------------------------------------------------------- | ||
| 188 | * Superblock validation | ||
| 189 | *--------------------------------------------------------------*/ | ||
| 190 | static void sb_prepare_for_write(struct dm_block_validator *v, | ||
| 191 | struct dm_block *b, | ||
| 192 | size_t sb_block_size) | ||
| 193 | { | ||
| 194 | struct superblock_disk *disk = dm_block_data(b); | ||
| 195 | |||
| 196 | disk->blocknr = cpu_to_le64(dm_block_location(b)); | ||
| 197 | disk->csum = cpu_to_le32(dm_bm_checksum(&disk->flags, | ||
| 198 | sb_block_size - sizeof(__le32), | ||
| 199 | SUPERBLOCK_CSUM_XOR)); | ||
| 200 | } | ||
| 201 | |||
| 202 | static int check_metadata_version(struct superblock_disk *disk) | ||
| 203 | { | ||
| 204 | uint32_t metadata_version = le32_to_cpu(disk->version); | ||
| 205 | if (metadata_version < MIN_ERA_VERSION || metadata_version > MAX_ERA_VERSION) { | ||
| 206 | DMERR("Era metadata version %u found, but only versions between %u and %u supported.", | ||
| 207 | metadata_version, MIN_ERA_VERSION, MAX_ERA_VERSION); | ||
| 208 | return -EINVAL; | ||
| 209 | } | ||
| 210 | |||
| 211 | return 0; | ||
| 212 | } | ||
| 213 | |||
| 214 | static int sb_check(struct dm_block_validator *v, | ||
| 215 | struct dm_block *b, | ||
| 216 | size_t sb_block_size) | ||
| 217 | { | ||
| 218 | struct superblock_disk *disk = dm_block_data(b); | ||
| 219 | __le32 csum_le; | ||
| 220 | |||
| 221 | if (dm_block_location(b) != le64_to_cpu(disk->blocknr)) { | ||
| 222 | DMERR("sb_check failed: blocknr %llu: wanted %llu", | ||
| 223 | le64_to_cpu(disk->blocknr), | ||
| 224 | (unsigned long long)dm_block_location(b)); | ||
| 225 | return -ENOTBLK; | ||
| 226 | } | ||
| 227 | |||
| 228 | if (le64_to_cpu(disk->magic) != SUPERBLOCK_MAGIC) { | ||
| 229 | DMERR("sb_check failed: magic %llu: wanted %llu", | ||
| 230 | le64_to_cpu(disk->magic), | ||
| 231 | (unsigned long long) SUPERBLOCK_MAGIC); | ||
| 232 | return -EILSEQ; | ||
| 233 | } | ||
| 234 | |||
| 235 | csum_le = cpu_to_le32(dm_bm_checksum(&disk->flags, | ||
| 236 | sb_block_size - sizeof(__le32), | ||
| 237 | SUPERBLOCK_CSUM_XOR)); | ||
| 238 | if (csum_le != disk->csum) { | ||
| 239 | DMERR("sb_check failed: csum %u: wanted %u", | ||
| 240 | le32_to_cpu(csum_le), le32_to_cpu(disk->csum)); | ||
| 241 | return -EILSEQ; | ||
| 242 | } | ||
| 243 | |||
| 244 | return check_metadata_version(disk); | ||
| 245 | } | ||
| 246 | |||
| 247 | static struct dm_block_validator sb_validator = { | ||
| 248 | .name = "superblock", | ||
| 249 | .prepare_for_write = sb_prepare_for_write, | ||
| 250 | .check = sb_check | ||
| 251 | }; | ||
| 252 | |||
| 253 | /*---------------------------------------------------------------- | ||
| 254 | * Low level metadata handling | ||
| 255 | *--------------------------------------------------------------*/ | ||
| 256 | #define DM_ERA_METADATA_BLOCK_SIZE 4096 | ||
| 257 | #define DM_ERA_METADATA_CACHE_SIZE 64 | ||
| 258 | #define ERA_MAX_CONCURRENT_LOCKS 5 | ||
| 259 | |||
| 260 | struct era_metadata { | ||
| 261 | struct block_device *bdev; | ||
| 262 | struct dm_block_manager *bm; | ||
| 263 | struct dm_space_map *sm; | ||
| 264 | struct dm_transaction_manager *tm; | ||
| 265 | |||
| 266 | dm_block_t block_size; | ||
| 267 | uint32_t nr_blocks; | ||
| 268 | |||
| 269 | uint32_t current_era; | ||
| 270 | |||
| 271 | /* | ||
| 272 | * We preallocate 2 writesets. When an era rolls over we | ||
| 273 | * switch between them. This means the allocation is done at | ||
| 274 | * preresume time, rather than on the io path. | ||
| 275 | */ | ||
| 276 | struct writeset writesets[2]; | ||
| 277 | struct writeset *current_writeset; | ||
| 278 | |||
| 279 | dm_block_t writeset_tree_root; | ||
| 280 | dm_block_t era_array_root; | ||
| 281 | |||
| 282 | struct dm_disk_bitset bitset_info; | ||
| 283 | struct dm_btree_info writeset_tree_info; | ||
| 284 | struct dm_array_info era_array_info; | ||
| 285 | |||
| 286 | dm_block_t metadata_snap; | ||
| 287 | |||
| 288 | /* | ||
| 289 | * A flag that is set whenever a writeset has been archived. | ||
| 290 | */ | ||
| 291 | bool archived_writesets; | ||
| 292 | |||
| 293 | /* | ||
| 294 | * Reading the space map root can fail, so we read it into this | ||
| 295 | * buffer before the superblock is locked and updated. | ||
| 296 | */ | ||
| 297 | __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; | ||
| 298 | }; | ||
| 299 | |||
| 300 | static int superblock_read_lock(struct era_metadata *md, | ||
| 301 | struct dm_block **sblock) | ||
| 302 | { | ||
| 303 | return dm_bm_read_lock(md->bm, SUPERBLOCK_LOCATION, | ||
| 304 | &sb_validator, sblock); | ||
| 305 | } | ||
| 306 | |||
| 307 | static int superblock_lock_zero(struct era_metadata *md, | ||
| 308 | struct dm_block **sblock) | ||
| 309 | { | ||
| 310 | return dm_bm_write_lock_zero(md->bm, SUPERBLOCK_LOCATION, | ||
| 311 | &sb_validator, sblock); | ||
| 312 | } | ||
| 313 | |||
| 314 | static int superblock_lock(struct era_metadata *md, | ||
| 315 | struct dm_block **sblock) | ||
| 316 | { | ||
| 317 | return dm_bm_write_lock(md->bm, SUPERBLOCK_LOCATION, | ||
| 318 | &sb_validator, sblock); | ||
| 319 | } | ||
| 320 | |||
| 321 | /* FIXME: duplication with cache and thin */ | ||
| 322 | static int superblock_all_zeroes(struct dm_block_manager *bm, bool *result) | ||
| 323 | { | ||
| 324 | int r; | ||
| 325 | unsigned i; | ||
| 326 | struct dm_block *b; | ||
| 327 | __le64 *data_le, zero = cpu_to_le64(0); | ||
| 328 | unsigned sb_block_size = dm_bm_block_size(bm) / sizeof(__le64); | ||
| 329 | |||
| 330 | /* | ||
| 331 | * We can't use a validator here - it may be all zeroes. | ||
| 332 | */ | ||
| 333 | r = dm_bm_read_lock(bm, SUPERBLOCK_LOCATION, NULL, &b); | ||
| 334 | if (r) | ||
| 335 | return r; | ||
| 336 | |||
| 337 | data_le = dm_block_data(b); | ||
| 338 | *result = true; | ||
| 339 | for (i = 0; i < sb_block_size; i++) { | ||
| 340 | if (data_le[i] != zero) { | ||
| 341 | *result = false; | ||
| 342 | break; | ||
| 343 | } | ||
| 344 | } | ||
| 345 | |||
| 346 | return dm_bm_unlock(b); | ||
| 347 | } | ||
| 348 | |||
| 349 | /*----------------------------------------------------------------*/ | ||
| 350 | |||
| 351 | static void ws_pack(const struct writeset_metadata *core, struct writeset_disk *disk) | ||
| 352 | { | ||
| 353 | disk->nr_bits = cpu_to_le32(core->nr_bits); | ||
| 354 | disk->root = cpu_to_le64(core->root); | ||
| 355 | } | ||
| 356 | |||
| 357 | static void ws_unpack(const struct writeset_disk *disk, struct writeset_metadata *core) | ||
| 358 | { | ||
| 359 | core->nr_bits = le32_to_cpu(disk->nr_bits); | ||
| 360 | core->root = le64_to_cpu(disk->root); | ||
| 361 | } | ||
| 362 | |||
| 363 | static void ws_inc(void *context, const void *value) | ||
| 364 | { | ||
| 365 | struct era_metadata *md = context; | ||
| 366 | struct writeset_disk ws_d; | ||
| 367 | dm_block_t b; | ||
| 368 | |||
| 369 | memcpy(&ws_d, value, sizeof(ws_d)); | ||
| 370 | b = le64_to_cpu(ws_d.root); | ||
| 371 | |||
| 372 | dm_tm_inc(md->tm, b); | ||
| 373 | } | ||
| 374 | |||
| 375 | static void ws_dec(void *context, const void *value) | ||
| 376 | { | ||
| 377 | struct era_metadata *md = context; | ||
| 378 | struct writeset_disk ws_d; | ||
| 379 | dm_block_t b; | ||
| 380 | |||
| 381 | memcpy(&ws_d, value, sizeof(ws_d)); | ||
| 382 | b = le64_to_cpu(ws_d.root); | ||
| 383 | |||
| 384 | dm_bitset_del(&md->bitset_info, b); | ||
| 385 | } | ||
| 386 | |||
| 387 | static int ws_eq(void *context, const void *value1, const void *value2) | ||
| 388 | { | ||
| 389 | return !memcmp(value1, value2, sizeof(struct writeset_metadata)); | ||
| 390 | } | ||
| 391 | |||
| 392 | /*----------------------------------------------------------------*/ | ||
| 393 | |||
| 394 | static void setup_writeset_tree_info(struct era_metadata *md) | ||
| 395 | { | ||
| 396 | struct dm_btree_value_type *vt = &md->writeset_tree_info.value_type; | ||
| 397 | md->writeset_tree_info.tm = md->tm; | ||
| 398 | md->writeset_tree_info.levels = 1; | ||
| 399 | vt->context = md; | ||
| 400 | vt->size = sizeof(struct writeset_disk); | ||
| 401 | vt->inc = ws_inc; | ||
| 402 | vt->dec = ws_dec; | ||
| 403 | vt->equal = ws_eq; | ||
| 404 | } | ||
| 405 | |||
| 406 | static void setup_era_array_info(struct era_metadata *md) | ||
| 407 | |||
| 408 | { | ||
| 409 | struct dm_btree_value_type vt; | ||
| 410 | vt.context = NULL; | ||
| 411 | vt.size = sizeof(__le32); | ||
| 412 | vt.inc = NULL; | ||
| 413 | vt.dec = NULL; | ||
| 414 | vt.equal = NULL; | ||
| 415 | |||
| 416 | dm_array_info_init(&md->era_array_info, md->tm, &vt); | ||
| 417 | } | ||
| 418 | |||
| 419 | static void setup_infos(struct era_metadata *md) | ||
| 420 | { | ||
| 421 | dm_disk_bitset_init(md->tm, &md->bitset_info); | ||
| 422 | setup_writeset_tree_info(md); | ||
| 423 | setup_era_array_info(md); | ||
| 424 | } | ||
| 425 | |||
| 426 | /*----------------------------------------------------------------*/ | ||
| 427 | |||
| 428 | static int create_fresh_metadata(struct era_metadata *md) | ||
| 429 | { | ||
| 430 | int r; | ||
| 431 | |||
| 432 | r = dm_tm_create_with_sm(md->bm, SUPERBLOCK_LOCATION, | ||
| 433 | &md->tm, &md->sm); | ||
| 434 | if (r < 0) { | ||
| 435 | DMERR("dm_tm_create_with_sm failed"); | ||
| 436 | return r; | ||
| 437 | } | ||
| 438 | |||
| 439 | setup_infos(md); | ||
| 440 | |||
| 441 | r = dm_btree_empty(&md->writeset_tree_info, &md->writeset_tree_root); | ||
| 442 | if (r) { | ||
| 443 | DMERR("couldn't create new writeset tree"); | ||
| 444 | goto bad; | ||
| 445 | } | ||
| 446 | |||
| 447 | r = dm_array_empty(&md->era_array_info, &md->era_array_root); | ||
| 448 | if (r) { | ||
| 449 | DMERR("couldn't create era array"); | ||
| 450 | goto bad; | ||
| 451 | } | ||
| 452 | |||
| 453 | return 0; | ||
| 454 | |||
| 455 | bad: | ||
| 456 | dm_sm_destroy(md->sm); | ||
| 457 | dm_tm_destroy(md->tm); | ||
| 458 | |||
| 459 | return r; | ||
| 460 | } | ||
| 461 | |||
| 462 | static int save_sm_root(struct era_metadata *md) | ||
| 463 | { | ||
| 464 | int r; | ||
| 465 | size_t metadata_len; | ||
| 466 | |||
| 467 | r = dm_sm_root_size(md->sm, &metadata_len); | ||
| 468 | if (r < 0) | ||
| 469 | return r; | ||
| 470 | |||
| 471 | return dm_sm_copy_root(md->sm, &md->metadata_space_map_root, | ||
| 472 | metadata_len); | ||
| 473 | } | ||
| 474 | |||
| 475 | static void copy_sm_root(struct era_metadata *md, struct superblock_disk *disk) | ||
| 476 | { | ||
| 477 | memcpy(&disk->metadata_space_map_root, | ||
| 478 | &md->metadata_space_map_root, | ||
| 479 | sizeof(md->metadata_space_map_root)); | ||
| 480 | } | ||
| 481 | |||
| 482 | /* | ||
| 483 | * Writes a superblock, including the static fields that don't get updated | ||
| 484 | * with every commit (possible optimisation here). 'md' should be fully | ||
| 485 | * constructed when this is called. | ||
| 486 | */ | ||
| 487 | static void prepare_superblock(struct era_metadata *md, struct superblock_disk *disk) | ||
| 488 | { | ||
| 489 | disk->magic = cpu_to_le64(SUPERBLOCK_MAGIC); | ||
| 490 | disk->flags = cpu_to_le32(0ul); | ||
| 491 | |||
| 492 | /* FIXME: can't keep blanking the uuid (uuid is currently unused though) */ | ||
| 493 | memset(disk->uuid, 0, sizeof(disk->uuid)); | ||
| 494 | disk->version = cpu_to_le32(MAX_ERA_VERSION); | ||
| 495 | |||
| 496 | copy_sm_root(md, disk); | ||
| 497 | |||
| 498 | disk->data_block_size = cpu_to_le32(md->block_size); | ||
| 499 | disk->metadata_block_size = cpu_to_le32(DM_ERA_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); | ||
| 500 | disk->nr_blocks = cpu_to_le32(md->nr_blocks); | ||
| 501 | disk->current_era = cpu_to_le32(md->current_era); | ||
| 502 | |||
| 503 | ws_pack(&md->current_writeset->md, &disk->current_writeset); | ||
| 504 | disk->writeset_tree_root = cpu_to_le64(md->writeset_tree_root); | ||
| 505 | disk->era_array_root = cpu_to_le64(md->era_array_root); | ||
| 506 | disk->metadata_snap = cpu_to_le64(md->metadata_snap); | ||
| 507 | } | ||
| 508 | |||
| 509 | static int write_superblock(struct era_metadata *md) | ||
| 510 | { | ||
| 511 | int r; | ||
| 512 | struct dm_block *sblock; | ||
| 513 | struct superblock_disk *disk; | ||
| 514 | |||
| 515 | r = save_sm_root(md); | ||
| 516 | if (r) { | ||
| 517 | DMERR("%s: save_sm_root failed", __func__); | ||
| 518 | return r; | ||
| 519 | } | ||
| 520 | |||
| 521 | r = superblock_lock_zero(md, &sblock); | ||
| 522 | if (r) | ||
| 523 | return r; | ||
| 524 | |||
| 525 | disk = dm_block_data(sblock); | ||
| 526 | prepare_superblock(md, disk); | ||
| 527 | |||
| 528 | return dm_tm_commit(md->tm, sblock); | ||
| 529 | } | ||
| 530 | |||
| 531 | /* | ||
| 532 | * Assumes block_size and the infos are set. | ||
| 533 | */ | ||
| 534 | static int format_metadata(struct era_metadata *md) | ||
| 535 | { | ||
| 536 | int r; | ||
| 537 | |||
| 538 | r = create_fresh_metadata(md); | ||
| 539 | if (r) | ||
| 540 | return r; | ||
| 541 | |||
| 542 | r = write_superblock(md); | ||
| 543 | if (r) { | ||
| 544 | dm_sm_destroy(md->sm); | ||
| 545 | dm_tm_destroy(md->tm); | ||
| 546 | return r; | ||
| 547 | } | ||
| 548 | |||
| 549 | return 0; | ||
| 550 | } | ||
| 551 | |||
| 552 | static int open_metadata(struct era_metadata *md) | ||
| 553 | { | ||
| 554 | int r; | ||
| 555 | struct dm_block *sblock; | ||
| 556 | struct superblock_disk *disk; | ||
| 557 | |||
| 558 | r = superblock_read_lock(md, &sblock); | ||
| 559 | if (r) { | ||
| 560 | DMERR("couldn't read_lock superblock"); | ||
| 561 | return r; | ||
| 562 | } | ||
| 563 | |||
| 564 | disk = dm_block_data(sblock); | ||
| 565 | r = dm_tm_open_with_sm(md->bm, SUPERBLOCK_LOCATION, | ||
| 566 | disk->metadata_space_map_root, | ||
| 567 | sizeof(disk->metadata_space_map_root), | ||
| 568 | &md->tm, &md->sm); | ||
| 569 | if (r) { | ||
| 570 | DMERR("dm_tm_open_with_sm failed"); | ||
| 571 | goto bad; | ||
| 572 | } | ||
| 573 | |||
| 574 | setup_infos(md); | ||
| 575 | |||
| 576 | md->block_size = le32_to_cpu(disk->data_block_size); | ||
| 577 | md->nr_blocks = le32_to_cpu(disk->nr_blocks); | ||
| 578 | md->current_era = le32_to_cpu(disk->current_era); | ||
| 579 | |||
| 580 | md->writeset_tree_root = le64_to_cpu(disk->writeset_tree_root); | ||
| 581 | md->era_array_root = le64_to_cpu(disk->era_array_root); | ||
| 582 | md->metadata_snap = le64_to_cpu(disk->metadata_snap); | ||
| 583 | md->archived_writesets = true; | ||
| 584 | |||
| 585 | return dm_bm_unlock(sblock); | ||
| 586 | |||
| 587 | bad: | ||
| 588 | dm_bm_unlock(sblock); | ||
| 589 | return r; | ||
| 590 | } | ||
| 591 | |||
| 592 | static int open_or_format_metadata(struct era_metadata *md, | ||
| 593 | bool may_format) | ||
| 594 | { | ||
| 595 | int r; | ||
| 596 | bool unformatted = false; | ||
| 597 | |||
| 598 | r = superblock_all_zeroes(md->bm, &unformatted); | ||
| 599 | if (r) | ||
| 600 | return r; | ||
| 601 | |||
| 602 | if (unformatted) | ||
| 603 | return may_format ? format_metadata(md) : -EPERM; | ||
| 604 | |||
| 605 | return open_metadata(md); | ||
| 606 | } | ||
| 607 | |||
| 608 | static int create_persistent_data_objects(struct era_metadata *md, | ||
| 609 | bool may_format) | ||
| 610 | { | ||
| 611 | int r; | ||
| 612 | |||
| 613 | md->bm = dm_block_manager_create(md->bdev, DM_ERA_METADATA_BLOCK_SIZE, | ||
| 614 | DM_ERA_METADATA_CACHE_SIZE, | ||
| 615 | ERA_MAX_CONCURRENT_LOCKS); | ||
| 616 | if (IS_ERR(md->bm)) { | ||
| 617 | DMERR("could not create block manager"); | ||
| 618 | return PTR_ERR(md->bm); | ||
| 619 | } | ||
| 620 | |||
| 621 | r = open_or_format_metadata(md, may_format); | ||
| 622 | if (r) | ||
| 623 | dm_block_manager_destroy(md->bm); | ||
| 624 | |||
| 625 | return r; | ||
| 626 | } | ||
| 627 | |||
| 628 | static void destroy_persistent_data_objects(struct era_metadata *md) | ||
| 629 | { | ||
| 630 | dm_sm_destroy(md->sm); | ||
| 631 | dm_tm_destroy(md->tm); | ||
| 632 | dm_block_manager_destroy(md->bm); | ||
| 633 | } | ||
| 634 | |||
| 635 | /* | ||
| 636 | * This waits until all era_map threads have picked up the new filter. | ||
| 637 | */ | ||
| 638 | static void swap_writeset(struct era_metadata *md, struct writeset *new_writeset) | ||
| 639 | { | ||
| 640 | rcu_assign_pointer(md->current_writeset, new_writeset); | ||
| 641 | synchronize_rcu(); | ||
| 642 | } | ||
| 643 | |||
| 644 | /*---------------------------------------------------------------- | ||
| 645 | * Writesets get 'digested' into the main era array. | ||
| 646 | * | ||
| 647 | * We're using a coroutine here so the worker thread can do the digestion, | ||
| 648 | * thus avoiding synchronisation of the metadata. Digesting a whole | ||
| 649 | * writeset in one go would cause too much latency. | ||
| 650 | *--------------------------------------------------------------*/ | ||
| 651 | struct digest { | ||
| 652 | uint32_t era; | ||
| 653 | unsigned nr_bits, current_bit; | ||
| 654 | struct writeset_metadata writeset; | ||
| 655 | __le32 value; | ||
| 656 | struct dm_disk_bitset info; | ||
| 657 | |||
| 658 | int (*step)(struct era_metadata *, struct digest *); | ||
| 659 | }; | ||
| 660 | |||
| 661 | static int metadata_digest_lookup_writeset(struct era_metadata *md, | ||
| 662 | struct digest *d); | ||
| 663 | |||
| 664 | static int metadata_digest_remove_writeset(struct era_metadata *md, | ||
| 665 | struct digest *d) | ||
| 666 | { | ||
| 667 | int r; | ||
| 668 | uint64_t key = d->era; | ||
| 669 | |||
| 670 | r = dm_btree_remove(&md->writeset_tree_info, md->writeset_tree_root, | ||
| 671 | &key, &md->writeset_tree_root); | ||
| 672 | if (r) { | ||
| 673 | DMERR("%s: dm_btree_remove failed", __func__); | ||
| 674 | return r; | ||
| 675 | } | ||
| 676 | |||
| 677 | d->step = metadata_digest_lookup_writeset; | ||
| 678 | return 0; | ||
| 679 | } | ||
| 680 | |||
| 681 | #define INSERTS_PER_STEP 100 | ||
| 682 | |||
| 683 | static int metadata_digest_transcribe_writeset(struct era_metadata *md, | ||
| 684 | struct digest *d) | ||
| 685 | { | ||
| 686 | int r; | ||
| 687 | bool marked; | ||
| 688 | unsigned b, e = min(d->current_bit + INSERTS_PER_STEP, d->nr_bits); | ||
| 689 | |||
| 690 | for (b = d->current_bit; b < e; b++) { | ||
| 691 | r = writeset_marked_on_disk(&d->info, &d->writeset, b, &marked); | ||
| 692 | if (r) { | ||
| 693 | DMERR("%s: writeset_marked_on_disk failed", __func__); | ||
| 694 | return r; | ||
| 695 | } | ||
| 696 | |||
| 697 | if (!marked) | ||
| 698 | continue; | ||
| 699 | |||
| 700 | __dm_bless_for_disk(&d->value); | ||
| 701 | r = dm_array_set_value(&md->era_array_info, md->era_array_root, | ||
| 702 | b, &d->value, &md->era_array_root); | ||
| 703 | if (r) { | ||
| 704 | DMERR("%s: dm_array_set_value failed", __func__); | ||
| 705 | return r; | ||
| 706 | } | ||
| 707 | } | ||
| 708 | |||
| 709 | if (b == d->nr_bits) | ||
| 710 | d->step = metadata_digest_remove_writeset; | ||
| 711 | else | ||
| 712 | d->current_bit = b; | ||
| 713 | |||
| 714 | return 0; | ||
| 715 | } | ||
| 716 | |||
| 717 | static int metadata_digest_lookup_writeset(struct era_metadata *md, | ||
| 718 | struct digest *d) | ||
| 719 | { | ||
| 720 | int r; | ||
| 721 | uint64_t key; | ||
| 722 | struct writeset_disk disk; | ||
| 723 | |||
| 724 | r = dm_btree_find_lowest_key(&md->writeset_tree_info, | ||
| 725 | md->writeset_tree_root, &key); | ||
| 726 | if (r < 0) | ||
| 727 | return r; | ||
| 728 | |||
| 729 | d->era = key; | ||
| 730 | |||
| 731 | r = dm_btree_lookup(&md->writeset_tree_info, | ||
| 732 | md->writeset_tree_root, &key, &disk); | ||
| 733 | if (r) { | ||
| 734 | if (r == -ENODATA) { | ||
| 735 | d->step = NULL; | ||
| 736 | return 0; | ||
| 737 | } | ||
| 738 | |||
| 739 | DMERR("%s: dm_btree_lookup failed", __func__); | ||
| 740 | return r; | ||
| 741 | } | ||
| 742 | |||
| 743 | ws_unpack(&disk, &d->writeset); | ||
| 744 | d->value = cpu_to_le32(key); | ||
| 745 | |||
| 746 | d->nr_bits = min(d->writeset.nr_bits, md->nr_blocks); | ||
| 747 | d->current_bit = 0; | ||
| 748 | d->step = metadata_digest_transcribe_writeset; | ||
| 749 | |||
| 750 | return 0; | ||
| 751 | } | ||
| 752 | |||
| 753 | static int metadata_digest_start(struct era_metadata *md, struct digest *d) | ||
| 754 | { | ||
| 755 | if (d->step) | ||
| 756 | return 0; | ||
| 757 | |||
| 758 | memset(d, 0, sizeof(*d)); | ||
| 759 | |||
| 760 | /* | ||
| 761 | * We initialise another bitset info to avoid any caching side | ||
| 762 | * effects with the previous one. | ||
| 763 | */ | ||
| 764 | dm_disk_bitset_init(md->tm, &d->info); | ||
| 765 | d->step = metadata_digest_lookup_writeset; | ||
| 766 | |||
| 767 | return 0; | ||
| 768 | } | ||
| 769 | |||
| 770 | /*---------------------------------------------------------------- | ||
| 771 | * High level metadata interface. Target methods should use these, and not | ||
| 772 | * the lower level ones. | ||
| 773 | *--------------------------------------------------------------*/ | ||
| 774 | static struct era_metadata *metadata_open(struct block_device *bdev, | ||
| 775 | sector_t block_size, | ||
| 776 | bool may_format) | ||
| 777 | { | ||
| 778 | int r; | ||
| 779 | struct era_metadata *md = kzalloc(sizeof(*md), GFP_KERNEL); | ||
| 780 | |||
| 781 | if (!md) | ||
| 782 | return NULL; | ||
| 783 | |||
| 784 | md->bdev = bdev; | ||
| 785 | md->block_size = block_size; | ||
| 786 | |||
| 787 | md->writesets[0].md.root = INVALID_WRITESET_ROOT; | ||
| 788 | md->writesets[1].md.root = INVALID_WRITESET_ROOT; | ||
| 789 | md->current_writeset = &md->writesets[0]; | ||
| 790 | |||
| 791 | r = create_persistent_data_objects(md, may_format); | ||
| 792 | if (r) { | ||
| 793 | kfree(md); | ||
| 794 | return ERR_PTR(r); | ||
| 795 | } | ||
| 796 | |||
| 797 | return md; | ||
| 798 | } | ||
| 799 | |||
| 800 | static void metadata_close(struct era_metadata *md) | ||
| 801 | { | ||
| 802 | destroy_persistent_data_objects(md); | ||
| 803 | kfree(md); | ||
| 804 | } | ||
| 805 | |||
| 806 | static bool valid_nr_blocks(dm_block_t n) | ||
| 807 | { | ||
| 808 | /* | ||
| 809 | * dm_bitset restricts us to 2^32. test_bit & co. restrict us | ||
| 810 | * further to 2^31 - 1 | ||
| 811 | */ | ||
| 812 | return n < (1ull << 31); | ||
| 813 | } | ||
| 814 | |||
| 815 | static int metadata_resize(struct era_metadata *md, void *arg) | ||
| 816 | { | ||
| 817 | int r; | ||
| 818 | dm_block_t *new_size = arg; | ||
| 819 | __le32 value; | ||
| 820 | |||
| 821 | if (!valid_nr_blocks(*new_size)) { | ||
| 822 | DMERR("Invalid number of origin blocks %llu", | ||
| 823 | (unsigned long long) *new_size); | ||
| 824 | return -EINVAL; | ||
| 825 | } | ||
| 826 | |||
| 827 | writeset_free(&md->writesets[0]); | ||
| 828 | writeset_free(&md->writesets[1]); | ||
| 829 | |||
| 830 | r = writeset_alloc(&md->writesets[0], *new_size); | ||
| 831 | if (r) { | ||
| 832 | DMERR("%s: writeset_alloc failed for writeset 0", __func__); | ||
| 833 | return r; | ||
| 834 | } | ||
| 835 | |||
| 836 | r = writeset_alloc(&md->writesets[1], *new_size); | ||
| 837 | if (r) { | ||
| 838 | DMERR("%s: writeset_alloc failed for writeset 1", __func__); | ||
| 839 | return r; | ||
| 840 | } | ||
| 841 | |||
| 842 | value = cpu_to_le32(0u); | ||
| 843 | __dm_bless_for_disk(&value); | ||
| 844 | r = dm_array_resize(&md->era_array_info, md->era_array_root, | ||
| 845 | md->nr_blocks, *new_size, | ||
| 846 | &value, &md->era_array_root); | ||
| 847 | if (r) { | ||
| 848 | DMERR("%s: dm_array_resize failed", __func__); | ||
| 849 | return r; | ||
| 850 | } | ||
| 851 | |||
| 852 | md->nr_blocks = *new_size; | ||
| 853 | return 0; | ||
| 854 | } | ||
| 855 | |||
| 856 | static int metadata_era_archive(struct era_metadata *md) | ||
| 857 | { | ||
| 858 | int r; | ||
| 859 | uint64_t keys[1]; | ||
| 860 | struct writeset_disk value; | ||
| 861 | |||
| 862 | r = dm_bitset_flush(&md->bitset_info, md->current_writeset->md.root, | ||
| 863 | &md->current_writeset->md.root); | ||
| 864 | if (r) { | ||
| 865 | DMERR("%s: dm_bitset_flush failed", __func__); | ||
| 866 | return r; | ||
| 867 | } | ||
| 868 | |||
| 869 | ws_pack(&md->current_writeset->md, &value); | ||
| 870 | md->current_writeset->md.root = INVALID_WRITESET_ROOT; | ||
| 871 | |||
| 872 | keys[0] = md->current_era; | ||
| 873 | __dm_bless_for_disk(&value); | ||
| 874 | r = dm_btree_insert(&md->writeset_tree_info, md->writeset_tree_root, | ||
| 875 | keys, &value, &md->writeset_tree_root); | ||
| 876 | if (r) { | ||
| 877 | DMERR("%s: couldn't insert writeset into btree", __func__); | ||
| 878 | /* FIXME: fail mode */ | ||
| 879 | return r; | ||
| 880 | } | ||
| 881 | |||
| 882 | md->archived_writesets = true; | ||
| 883 | |||
| 884 | return 0; | ||
| 885 | } | ||
| 886 | |||
| 887 | static struct writeset *next_writeset(struct era_metadata *md) | ||
| 888 | { | ||
| 889 | return (md->current_writeset == &md->writesets[0]) ? | ||
| 890 | &md->writesets[1] : &md->writesets[0]; | ||
| 891 | } | ||
| 892 | |||
| 893 | static int metadata_new_era(struct era_metadata *md) | ||
| 894 | { | ||
| 895 | int r; | ||
| 896 | struct writeset *new_writeset = next_writeset(md); | ||
| 897 | |||
| 898 | r = writeset_init(&md->bitset_info, new_writeset); | ||
| 899 | if (r) { | ||
| 900 | DMERR("%s: writeset_init failed", __func__); | ||
| 901 | return r; | ||
| 902 | } | ||
| 903 | |||
| 904 | swap_writeset(md, new_writeset); | ||
| 905 | md->current_era++; | ||
| 906 | |||
| 907 | return 0; | ||
| 908 | } | ||
| 909 | |||
| 910 | static int metadata_era_rollover(struct era_metadata *md) | ||
| 911 | { | ||
| 912 | int r; | ||
| 913 | |||
| 914 | if (md->current_writeset->md.root != INVALID_WRITESET_ROOT) { | ||
| 915 | r = metadata_era_archive(md); | ||
| 916 | if (r) { | ||
| 917 | DMERR("%s: metadata_archive_era failed", __func__); | ||
| 918 | /* FIXME: fail mode? */ | ||
| 919 | return r; | ||
| 920 | } | ||
| 921 | } | ||
| 922 | |||
| 923 | r = metadata_new_era(md); | ||
| 924 | if (r) { | ||
| 925 | DMERR("%s: new era failed", __func__); | ||
| 926 | /* FIXME: fail mode */ | ||
| 927 | return r; | ||
| 928 | } | ||
| 929 | |||
| 930 | return 0; | ||
| 931 | } | ||
| 932 | |||
| 933 | static bool metadata_current_marked(struct era_metadata *md, dm_block_t block) | ||
| 934 | { | ||
| 935 | bool r; | ||
| 936 | struct writeset *ws; | ||
| 937 | |||
| 938 | rcu_read_lock(); | ||
| 939 | ws = rcu_dereference(md->current_writeset); | ||
| 940 | r = writeset_marked(ws, block); | ||
| 941 | rcu_read_unlock(); | ||
| 942 | |||
| 943 | return r; | ||
| 944 | } | ||
| 945 | |||
| 946 | static int metadata_commit(struct era_metadata *md) | ||
| 947 | { | ||
| 948 | int r; | ||
| 949 | struct dm_block *sblock; | ||
| 950 | |||
| 951 | if (md->current_writeset->md.root != SUPERBLOCK_LOCATION) { | ||
| 952 | r = dm_bitset_flush(&md->bitset_info, md->current_writeset->md.root, | ||
| 953 | &md->current_writeset->md.root); | ||
| 954 | if (r) { | ||
| 955 | DMERR("%s: bitset flush failed", __func__); | ||
| 956 | return r; | ||
| 957 | } | ||
| 958 | } | ||
| 959 | |||
| 960 | r = save_sm_root(md); | ||
| 961 | if (r) { | ||
| 962 | DMERR("%s: save_sm_root failed", __func__); | ||
| 963 | return r; | ||
| 964 | } | ||
| 965 | |||
| 966 | r = dm_tm_pre_commit(md->tm); | ||
| 967 | if (r) { | ||
| 968 | DMERR("%s: pre commit failed", __func__); | ||
| 969 | return r; | ||
| 970 | } | ||
| 971 | |||
| 972 | r = superblock_lock(md, &sblock); | ||
| 973 | if (r) { | ||
| 974 | DMERR("%s: superblock lock failed", __func__); | ||
| 975 | return r; | ||
| 976 | } | ||
| 977 | |||
| 978 | prepare_superblock(md, dm_block_data(sblock)); | ||
| 979 | |||
| 980 | return dm_tm_commit(md->tm, sblock); | ||
| 981 | } | ||
| 982 | |||
| 983 | static int metadata_checkpoint(struct era_metadata *md) | ||
| 984 | { | ||
| 985 | /* | ||
| 986 | * For now we just rollover, but later I want to put a check in to | ||
| 987 | * avoid this if the filter is still pretty fresh. | ||
| 988 | */ | ||
| 989 | return metadata_era_rollover(md); | ||
| 990 | } | ||
| 991 | |||
| 992 | /* | ||
| 993 | * Metadata snapshots allow userland to access era data. | ||
| 994 | */ | ||
| 995 | static int metadata_take_snap(struct era_metadata *md) | ||
| 996 | { | ||
| 997 | int r, inc; | ||
| 998 | struct dm_block *clone; | ||
| 999 | |||
| 1000 | if (md->metadata_snap != SUPERBLOCK_LOCATION) { | ||
| 1001 | DMERR("%s: metadata snapshot already exists", __func__); | ||
| 1002 | return -EINVAL; | ||
| 1003 | } | ||
| 1004 | |||
| 1005 | r = metadata_era_rollover(md); | ||
| 1006 | if (r) { | ||
| 1007 | DMERR("%s: era rollover failed", __func__); | ||
| 1008 | return r; | ||
| 1009 | } | ||
| 1010 | |||
| 1011 | r = metadata_commit(md); | ||
| 1012 | if (r) { | ||
| 1013 | DMERR("%s: pre commit failed", __func__); | ||
| 1014 | return r; | ||
| 1015 | } | ||
| 1016 | |||
| 1017 | r = dm_sm_inc_block(md->sm, SUPERBLOCK_LOCATION); | ||
| 1018 | if (r) { | ||
| 1019 | DMERR("%s: couldn't increment superblock", __func__); | ||
| 1020 | return r; | ||
| 1021 | } | ||
| 1022 | |||
| 1023 | r = dm_tm_shadow_block(md->tm, SUPERBLOCK_LOCATION, | ||
| 1024 | &sb_validator, &clone, &inc); | ||
| 1025 | if (r) { | ||
| 1026 | DMERR("%s: couldn't shadow superblock", __func__); | ||
| 1027 | dm_sm_dec_block(md->sm, SUPERBLOCK_LOCATION); | ||
| 1028 | return r; | ||
| 1029 | } | ||
| 1030 | BUG_ON(!inc); | ||
| 1031 | |||
| 1032 | r = dm_sm_inc_block(md->sm, md->writeset_tree_root); | ||
| 1033 | if (r) { | ||
| 1034 | DMERR("%s: couldn't inc writeset tree root", __func__); | ||
| 1035 | dm_tm_unlock(md->tm, clone); | ||
| 1036 | return r; | ||
| 1037 | } | ||
| 1038 | |||
| 1039 | r = dm_sm_inc_block(md->sm, md->era_array_root); | ||
| 1040 | if (r) { | ||
| 1041 | DMERR("%s: couldn't inc era tree root", __func__); | ||
| 1042 | dm_sm_dec_block(md->sm, md->writeset_tree_root); | ||
| 1043 | dm_tm_unlock(md->tm, clone); | ||
| 1044 | return r; | ||
| 1045 | } | ||
| 1046 | |||
| 1047 | md->metadata_snap = dm_block_location(clone); | ||
| 1048 | |||
| 1049 | r = dm_tm_unlock(md->tm, clone); | ||
| 1050 | if (r) { | ||
| 1051 | DMERR("%s: couldn't unlock clone", __func__); | ||
| 1052 | md->metadata_snap = SUPERBLOCK_LOCATION; | ||
| 1053 | return r; | ||
| 1054 | } | ||
| 1055 | |||
| 1056 | return 0; | ||
| 1057 | } | ||
| 1058 | |||
| 1059 | static int metadata_drop_snap(struct era_metadata *md) | ||
| 1060 | { | ||
| 1061 | int r; | ||
| 1062 | dm_block_t location; | ||
| 1063 | struct dm_block *clone; | ||
| 1064 | struct superblock_disk *disk; | ||
| 1065 | |||
| 1066 | if (md->metadata_snap == SUPERBLOCK_LOCATION) { | ||
| 1067 | DMERR("%s: no snap to drop", __func__); | ||
| 1068 | return -EINVAL; | ||
| 1069 | } | ||
| 1070 | |||
| 1071 | r = dm_tm_read_lock(md->tm, md->metadata_snap, &sb_validator, &clone); | ||
| 1072 | if (r) { | ||
| 1073 | DMERR("%s: couldn't read lock superblock clone", __func__); | ||
| 1074 | return r; | ||
| 1075 | } | ||
| 1076 | |||
| 1077 | /* | ||
| 1078 | * Whatever happens now we'll commit with no record of the metadata | ||
| 1079 | * snap. | ||
| 1080 | */ | ||
| 1081 | md->metadata_snap = SUPERBLOCK_LOCATION; | ||
| 1082 | |||
| 1083 | disk = dm_block_data(clone); | ||
| 1084 | r = dm_btree_del(&md->writeset_tree_info, | ||
| 1085 | le64_to_cpu(disk->writeset_tree_root)); | ||
| 1086 | if (r) { | ||
| 1087 | DMERR("%s: error deleting writeset tree clone", __func__); | ||
| 1088 | dm_tm_unlock(md->tm, clone); | ||
| 1089 | return r; | ||
| 1090 | } | ||
| 1091 | |||
| 1092 | r = dm_array_del(&md->era_array_info, le64_to_cpu(disk->era_array_root)); | ||
| 1093 | if (r) { | ||
| 1094 | DMERR("%s: error deleting era array clone", __func__); | ||
| 1095 | dm_tm_unlock(md->tm, clone); | ||
| 1096 | return r; | ||
| 1097 | } | ||
| 1098 | |||
| 1099 | location = dm_block_location(clone); | ||
| 1100 | dm_tm_unlock(md->tm, clone); | ||
| 1101 | |||
| 1102 | return dm_sm_dec_block(md->sm, location); | ||
| 1103 | } | ||
| 1104 | |||
| 1105 | struct metadata_stats { | ||
| 1106 | dm_block_t used; | ||
| 1107 | dm_block_t total; | ||
| 1108 | dm_block_t snap; | ||
| 1109 | uint32_t era; | ||
| 1110 | }; | ||
| 1111 | |||
| 1112 | static int metadata_get_stats(struct era_metadata *md, void *ptr) | ||
| 1113 | { | ||
| 1114 | int r; | ||
| 1115 | struct metadata_stats *s = ptr; | ||
| 1116 | dm_block_t nr_free, nr_total; | ||
| 1117 | |||
| 1118 | r = dm_sm_get_nr_free(md->sm, &nr_free); | ||
| 1119 | if (r) { | ||
| 1120 | DMERR("dm_sm_get_nr_free returned %d", r); | ||
| 1121 | return r; | ||
| 1122 | } | ||
| 1123 | |||
| 1124 | r = dm_sm_get_nr_blocks(md->sm, &nr_total); | ||
| 1125 | if (r) { | ||
| 1126 | DMERR("dm_pool_get_metadata_dev_size returned %d", r); | ||
| 1127 | return r; | ||
| 1128 | } | ||
| 1129 | |||
| 1130 | s->used = nr_total - nr_free; | ||
| 1131 | s->total = nr_total; | ||
| 1132 | s->snap = md->metadata_snap; | ||
| 1133 | s->era = md->current_era; | ||
| 1134 | |||
| 1135 | return 0; | ||
| 1136 | } | ||
| 1137 | |||
| 1138 | /*----------------------------------------------------------------*/ | ||
| 1139 | |||
| 1140 | struct era { | ||
| 1141 | struct dm_target *ti; | ||
| 1142 | struct dm_target_callbacks callbacks; | ||
| 1143 | |||
| 1144 | struct dm_dev *metadata_dev; | ||
| 1145 | struct dm_dev *origin_dev; | ||
| 1146 | |||
| 1147 | dm_block_t nr_blocks; | ||
| 1148 | uint32_t sectors_per_block; | ||
| 1149 | int sectors_per_block_shift; | ||
| 1150 | struct era_metadata *md; | ||
| 1151 | |||
| 1152 | struct workqueue_struct *wq; | ||
| 1153 | struct work_struct worker; | ||
| 1154 | |||
| 1155 | spinlock_t deferred_lock; | ||
| 1156 | struct bio_list deferred_bios; | ||
| 1157 | |||
| 1158 | spinlock_t rpc_lock; | ||
| 1159 | struct list_head rpc_calls; | ||
| 1160 | |||
| 1161 | struct digest digest; | ||
| 1162 | atomic_t suspended; | ||
| 1163 | }; | ||
| 1164 | |||
| 1165 | struct rpc { | ||
| 1166 | struct list_head list; | ||
| 1167 | |||
| 1168 | int (*fn0)(struct era_metadata *); | ||
| 1169 | int (*fn1)(struct era_metadata *, void *); | ||
| 1170 | void *arg; | ||
| 1171 | int result; | ||
| 1172 | |||
| 1173 | struct completion complete; | ||
| 1174 | }; | ||
| 1175 | |||
| 1176 | /*---------------------------------------------------------------- | ||
| 1177 | * Remapping. | ||
| 1178 | *---------------------------------------------------------------*/ | ||
| 1179 | static bool block_size_is_power_of_two(struct era *era) | ||
| 1180 | { | ||
| 1181 | return era->sectors_per_block_shift >= 0; | ||
| 1182 | } | ||
| 1183 | |||
| 1184 | static dm_block_t get_block(struct era *era, struct bio *bio) | ||
| 1185 | { | ||
| 1186 | sector_t block_nr = bio->bi_iter.bi_sector; | ||
| 1187 | |||
| 1188 | if (!block_size_is_power_of_two(era)) | ||
| 1189 | (void) sector_div(block_nr, era->sectors_per_block); | ||
| 1190 | else | ||
| 1191 | block_nr >>= era->sectors_per_block_shift; | ||
| 1192 | |||
| 1193 | return block_nr; | ||
| 1194 | } | ||
| 1195 | |||
| 1196 | static void remap_to_origin(struct era *era, struct bio *bio) | ||
| 1197 | { | ||
| 1198 | bio->bi_bdev = era->origin_dev->bdev; | ||
| 1199 | } | ||
| 1200 | |||
| 1201 | /*---------------------------------------------------------------- | ||
| 1202 | * Worker thread | ||
| 1203 | *--------------------------------------------------------------*/ | ||
| 1204 | static void wake_worker(struct era *era) | ||
| 1205 | { | ||
| 1206 | if (!atomic_read(&era->suspended)) | ||
| 1207 | queue_work(era->wq, &era->worker); | ||
| 1208 | } | ||
| 1209 | |||
| 1210 | static void process_old_eras(struct era *era) | ||
| 1211 | { | ||
| 1212 | int r; | ||
| 1213 | |||
| 1214 | if (!era->digest.step) | ||
| 1215 | return; | ||
| 1216 | |||
| 1217 | r = era->digest.step(era->md, &era->digest); | ||
| 1218 | if (r < 0) { | ||
| 1219 | DMERR("%s: digest step failed, stopping digestion", __func__); | ||
| 1220 | era->digest.step = NULL; | ||
| 1221 | |||
| 1222 | } else if (era->digest.step) | ||
| 1223 | wake_worker(era); | ||
| 1224 | } | ||
| 1225 | |||
| 1226 | static void process_deferred_bios(struct era *era) | ||
| 1227 | { | ||
| 1228 | int r; | ||
| 1229 | struct bio_list deferred_bios, marked_bios; | ||
| 1230 | struct bio *bio; | ||
| 1231 | bool commit_needed = false; | ||
| 1232 | bool failed = false; | ||
| 1233 | |||
| 1234 | bio_list_init(&deferred_bios); | ||
| 1235 | bio_list_init(&marked_bios); | ||
| 1236 | |||
| 1237 | spin_lock(&era->deferred_lock); | ||
| 1238 | bio_list_merge(&deferred_bios, &era->deferred_bios); | ||
| 1239 | bio_list_init(&era->deferred_bios); | ||
| 1240 | spin_unlock(&era->deferred_lock); | ||
| 1241 | |||
| 1242 | while ((bio = bio_list_pop(&deferred_bios))) { | ||
| 1243 | r = writeset_test_and_set(&era->md->bitset_info, | ||
| 1244 | era->md->current_writeset, | ||
| 1245 | get_block(era, bio)); | ||
| 1246 | if (r < 0) { | ||
| 1247 | /* | ||
| 1248 | * This is bad news, we need to rollback. | ||
| 1249 | * FIXME: finish. | ||
| 1250 | */ | ||
| 1251 | failed = true; | ||
| 1252 | |||
| 1253 | } else if (r == 0) | ||
| 1254 | commit_needed = true; | ||
| 1255 | |||
| 1256 | bio_list_add(&marked_bios, bio); | ||
| 1257 | } | ||
| 1258 | |||
| 1259 | if (commit_needed) { | ||
| 1260 | r = metadata_commit(era->md); | ||
| 1261 | if (r) | ||
| 1262 | failed = true; | ||
| 1263 | } | ||
| 1264 | |||
| 1265 | if (failed) | ||
| 1266 | while ((bio = bio_list_pop(&marked_bios))) | ||
| 1267 | bio_io_error(bio); | ||
| 1268 | else | ||
| 1269 | while ((bio = bio_list_pop(&marked_bios))) | ||
| 1270 | generic_make_request(bio); | ||
| 1271 | } | ||
| 1272 | |||
| 1273 | static void process_rpc_calls(struct era *era) | ||
| 1274 | { | ||
| 1275 | int r; | ||
| 1276 | bool need_commit = false; | ||
| 1277 | struct list_head calls; | ||
| 1278 | struct rpc *rpc, *tmp; | ||
| 1279 | |||
| 1280 | INIT_LIST_HEAD(&calls); | ||
| 1281 | spin_lock(&era->rpc_lock); | ||
| 1282 | list_splice_init(&era->rpc_calls, &calls); | ||
| 1283 | spin_unlock(&era->rpc_lock); | ||
| 1284 | |||
| 1285 | list_for_each_entry_safe(rpc, tmp, &calls, list) { | ||
| 1286 | rpc->result = rpc->fn0 ? rpc->fn0(era->md) : rpc->fn1(era->md, rpc->arg); | ||
| 1287 | need_commit = true; | ||
| 1288 | } | ||
| 1289 | |||
| 1290 | if (need_commit) { | ||
| 1291 | r = metadata_commit(era->md); | ||
| 1292 | if (r) | ||
| 1293 | list_for_each_entry_safe(rpc, tmp, &calls, list) | ||
| 1294 | rpc->result = r; | ||
| 1295 | } | ||
| 1296 | |||
| 1297 | list_for_each_entry_safe(rpc, tmp, &calls, list) | ||
| 1298 | complete(&rpc->complete); | ||
| 1299 | } | ||
| 1300 | |||
| 1301 | static void kick_off_digest(struct era *era) | ||
| 1302 | { | ||
| 1303 | if (era->md->archived_writesets) { | ||
| 1304 | era->md->archived_writesets = false; | ||
| 1305 | metadata_digest_start(era->md, &era->digest); | ||
| 1306 | } | ||
| 1307 | } | ||
| 1308 | |||
| 1309 | static void do_work(struct work_struct *ws) | ||
| 1310 | { | ||
| 1311 | struct era *era = container_of(ws, struct era, worker); | ||
| 1312 | |||
| 1313 | kick_off_digest(era); | ||
| 1314 | process_old_eras(era); | ||
| 1315 | process_deferred_bios(era); | ||
| 1316 | process_rpc_calls(era); | ||
| 1317 | } | ||
| 1318 | |||
| 1319 | static void defer_bio(struct era *era, struct bio *bio) | ||
| 1320 | { | ||
| 1321 | spin_lock(&era->deferred_lock); | ||
| 1322 | bio_list_add(&era->deferred_bios, bio); | ||
| 1323 | spin_unlock(&era->deferred_lock); | ||
| 1324 | |||
| 1325 | wake_worker(era); | ||
| 1326 | } | ||
| 1327 | |||
| 1328 | /* | ||
| 1329 | * Make an rpc call to the worker to change the metadata. | ||
| 1330 | */ | ||
| 1331 | static int perform_rpc(struct era *era, struct rpc *rpc) | ||
| 1332 | { | ||
| 1333 | rpc->result = 0; | ||
| 1334 | init_completion(&rpc->complete); | ||
| 1335 | |||
| 1336 | spin_lock(&era->rpc_lock); | ||
| 1337 | list_add(&rpc->list, &era->rpc_calls); | ||
| 1338 | spin_unlock(&era->rpc_lock); | ||
| 1339 | |||
| 1340 | wake_worker(era); | ||
| 1341 | wait_for_completion(&rpc->complete); | ||
| 1342 | |||
| 1343 | return rpc->result; | ||
| 1344 | } | ||
| 1345 | |||
| 1346 | static int in_worker0(struct era *era, int (*fn)(struct era_metadata *)) | ||
| 1347 | { | ||
| 1348 | struct rpc rpc; | ||
| 1349 | rpc.fn0 = fn; | ||
| 1350 | rpc.fn1 = NULL; | ||
| 1351 | |||
| 1352 | return perform_rpc(era, &rpc); | ||
| 1353 | } | ||
| 1354 | |||
| 1355 | static int in_worker1(struct era *era, | ||
| 1356 | int (*fn)(struct era_metadata *, void *), void *arg) | ||
| 1357 | { | ||
| 1358 | struct rpc rpc; | ||
| 1359 | rpc.fn0 = NULL; | ||
| 1360 | rpc.fn1 = fn; | ||
| 1361 | rpc.arg = arg; | ||
| 1362 | |||
| 1363 | return perform_rpc(era, &rpc); | ||
| 1364 | } | ||
| 1365 | |||
| 1366 | static void start_worker(struct era *era) | ||
| 1367 | { | ||
| 1368 | atomic_set(&era->suspended, 0); | ||
| 1369 | } | ||
| 1370 | |||
| 1371 | static void stop_worker(struct era *era) | ||
| 1372 | { | ||
| 1373 | atomic_set(&era->suspended, 1); | ||
| 1374 | flush_workqueue(era->wq); | ||
| 1375 | } | ||
| 1376 | |||
| 1377 | /*---------------------------------------------------------------- | ||
| 1378 | * Target methods | ||
| 1379 | *--------------------------------------------------------------*/ | ||
| 1380 | static int dev_is_congested(struct dm_dev *dev, int bdi_bits) | ||
| 1381 | { | ||
| 1382 | struct request_queue *q = bdev_get_queue(dev->bdev); | ||
| 1383 | return bdi_congested(&q->backing_dev_info, bdi_bits); | ||
| 1384 | } | ||
| 1385 | |||
| 1386 | static int era_is_congested(struct dm_target_callbacks *cb, int bdi_bits) | ||
| 1387 | { | ||
| 1388 | struct era *era = container_of(cb, struct era, callbacks); | ||
| 1389 | return dev_is_congested(era->origin_dev, bdi_bits); | ||
| 1390 | } | ||
| 1391 | |||
| 1392 | static void era_destroy(struct era *era) | ||
| 1393 | { | ||
| 1394 | metadata_close(era->md); | ||
| 1395 | |||
| 1396 | if (era->wq) | ||
| 1397 | destroy_workqueue(era->wq); | ||
| 1398 | |||
| 1399 | if (era->origin_dev) | ||
| 1400 | dm_put_device(era->ti, era->origin_dev); | ||
| 1401 | |||
| 1402 | if (era->metadata_dev) | ||
| 1403 | dm_put_device(era->ti, era->metadata_dev); | ||
| 1404 | |||
| 1405 | kfree(era); | ||
| 1406 | } | ||
| 1407 | |||
| 1408 | static dm_block_t calc_nr_blocks(struct era *era) | ||
| 1409 | { | ||
| 1410 | return dm_sector_div_up(era->ti->len, era->sectors_per_block); | ||
| 1411 | } | ||
| 1412 | |||
| 1413 | static bool valid_block_size(dm_block_t block_size) | ||
| 1414 | { | ||
| 1415 | bool greater_than_zero = block_size > 0; | ||
| 1416 | bool multiple_of_min_block_size = (block_size & (MIN_BLOCK_SIZE - 1)) == 0; | ||
| 1417 | |||
| 1418 | return greater_than_zero && multiple_of_min_block_size; | ||
| 1419 | } | ||
| 1420 | |||
| 1421 | /* | ||
| 1422 | * <metadata dev> <data dev> <data block size (sectors)> | ||
| 1423 | */ | ||
| 1424 | static int era_ctr(struct dm_target *ti, unsigned argc, char **argv) | ||
| 1425 | { | ||
| 1426 | int r; | ||
| 1427 | char dummy; | ||
| 1428 | struct era *era; | ||
| 1429 | struct era_metadata *md; | ||
| 1430 | |||
| 1431 | if (argc != 3) { | ||
| 1432 | ti->error = "Invalid argument count"; | ||
| 1433 | return -EINVAL; | ||
| 1434 | } | ||
| 1435 | |||
| 1436 | era = kzalloc(sizeof(*era), GFP_KERNEL); | ||
| 1437 | if (!era) { | ||
| 1438 | ti->error = "Error allocating era structure"; | ||
| 1439 | return -ENOMEM; | ||
| 1440 | } | ||
| 1441 | |||
| 1442 | era->ti = ti; | ||
| 1443 | |||
| 1444 | r = dm_get_device(ti, argv[0], FMODE_READ | FMODE_WRITE, &era->metadata_dev); | ||
| 1445 | if (r) { | ||
| 1446 | ti->error = "Error opening metadata device"; | ||
| 1447 | era_destroy(era); | ||
| 1448 | return -EINVAL; | ||
| 1449 | } | ||
| 1450 | |||
| 1451 | r = dm_get_device(ti, argv[1], FMODE_READ | FMODE_WRITE, &era->origin_dev); | ||
| 1452 | if (r) { | ||
| 1453 | ti->error = "Error opening data device"; | ||
| 1454 | era_destroy(era); | ||
| 1455 | return -EINVAL; | ||
| 1456 | } | ||
| 1457 | |||
| 1458 | r = sscanf(argv[2], "%u%c", &era->sectors_per_block, &dummy); | ||
| 1459 | if (r != 1) { | ||
| 1460 | ti->error = "Error parsing block size"; | ||
| 1461 | era_destroy(era); | ||
| 1462 | return -EINVAL; | ||
| 1463 | } | ||
| 1464 | |||
| 1465 | r = dm_set_target_max_io_len(ti, era->sectors_per_block); | ||
| 1466 | if (r) { | ||
| 1467 | ti->error = "could not set max io len"; | ||
| 1468 | era_destroy(era); | ||
| 1469 | return -EINVAL; | ||
| 1470 | } | ||
| 1471 | |||
| 1472 | if (!valid_block_size(era->sectors_per_block)) { | ||
| 1473 | ti->error = "Invalid block size"; | ||
| 1474 | era_destroy(era); | ||
| 1475 | return -EINVAL; | ||
| 1476 | } | ||
| 1477 | if (era->sectors_per_block & (era->sectors_per_block - 1)) | ||
| 1478 | era->sectors_per_block_shift = -1; | ||
| 1479 | else | ||
| 1480 | era->sectors_per_block_shift = __ffs(era->sectors_per_block); | ||
| 1481 | |||
| 1482 | md = metadata_open(era->metadata_dev->bdev, era->sectors_per_block, true); | ||
| 1483 | if (IS_ERR(md)) { | ||
| 1484 | ti->error = "Error reading metadata"; | ||
| 1485 | era_destroy(era); | ||
| 1486 | return PTR_ERR(md); | ||
| 1487 | } | ||
| 1488 | era->md = md; | ||
| 1489 | |||
| 1490 | era->nr_blocks = calc_nr_blocks(era); | ||
| 1491 | |||
| 1492 | r = metadata_resize(era->md, &era->nr_blocks); | ||
| 1493 | if (r) { | ||
| 1494 | ti->error = "couldn't resize metadata"; | ||
| 1495 | era_destroy(era); | ||
| 1496 | return -ENOMEM; | ||
| 1497 | } | ||
| 1498 | |||
| 1499 | era->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM); | ||
| 1500 | if (!era->wq) { | ||
| 1501 | ti->error = "could not create workqueue for metadata object"; | ||
| 1502 | era_destroy(era); | ||
| 1503 | return -ENOMEM; | ||
| 1504 | } | ||
| 1505 | INIT_WORK(&era->worker, do_work); | ||
| 1506 | |||
| 1507 | spin_lock_init(&era->deferred_lock); | ||
| 1508 | bio_list_init(&era->deferred_bios); | ||
| 1509 | |||
| 1510 | spin_lock_init(&era->rpc_lock); | ||
| 1511 | INIT_LIST_HEAD(&era->rpc_calls); | ||
| 1512 | |||
| 1513 | ti->private = era; | ||
| 1514 | ti->num_flush_bios = 1; | ||
| 1515 | ti->flush_supported = true; | ||
| 1516 | |||
| 1517 | ti->num_discard_bios = 1; | ||
| 1518 | ti->discards_supported = true; | ||
| 1519 | era->callbacks.congested_fn = era_is_congested; | ||
| 1520 | dm_table_add_target_callbacks(ti->table, &era->callbacks); | ||
| 1521 | |||
| 1522 | return 0; | ||
| 1523 | } | ||
| 1524 | |||
| 1525 | static void era_dtr(struct dm_target *ti) | ||
| 1526 | { | ||
| 1527 | era_destroy(ti->private); | ||
| 1528 | } | ||
| 1529 | |||
| 1530 | static int era_map(struct dm_target *ti, struct bio *bio) | ||
| 1531 | { | ||
| 1532 | struct era *era = ti->private; | ||
| 1533 | dm_block_t block = get_block(era, bio); | ||
| 1534 | |||
| 1535 | /* | ||
| 1536 | * All bios get remapped to the origin device. We do this now, but | ||
| 1537 | * it may not get issued until later. Depending on whether the | ||
| 1538 | * block is marked in this era. | ||
| 1539 | */ | ||
| 1540 | remap_to_origin(era, bio); | ||
| 1541 | |||
| 1542 | /* | ||
| 1543 | * REQ_FLUSH bios carry no data, so we're not interested in them. | ||
| 1544 | */ | ||
| 1545 | if (!(bio->bi_rw & REQ_FLUSH) && | ||
| 1546 | (bio_data_dir(bio) == WRITE) && | ||
| 1547 | !metadata_current_marked(era->md, block)) { | ||
| 1548 | defer_bio(era, bio); | ||
| 1549 | return DM_MAPIO_SUBMITTED; | ||
| 1550 | } | ||
| 1551 | |||
| 1552 | return DM_MAPIO_REMAPPED; | ||
| 1553 | } | ||
| 1554 | |||
| 1555 | static void era_postsuspend(struct dm_target *ti) | ||
| 1556 | { | ||
| 1557 | int r; | ||
| 1558 | struct era *era = ti->private; | ||
| 1559 | |||
| 1560 | r = in_worker0(era, metadata_era_archive); | ||
| 1561 | if (r) { | ||
| 1562 | DMERR("%s: couldn't archive current era", __func__); | ||
| 1563 | /* FIXME: fail mode */ | ||
| 1564 | } | ||
| 1565 | |||
| 1566 | stop_worker(era); | ||
| 1567 | } | ||
| 1568 | |||
| 1569 | static int era_preresume(struct dm_target *ti) | ||
| 1570 | { | ||
| 1571 | int r; | ||
| 1572 | struct era *era = ti->private; | ||
| 1573 | dm_block_t new_size = calc_nr_blocks(era); | ||
| 1574 | |||
| 1575 | if (era->nr_blocks != new_size) { | ||
| 1576 | r = in_worker1(era, metadata_resize, &new_size); | ||
| 1577 | if (r) | ||
| 1578 | return r; | ||
| 1579 | |||
| 1580 | era->nr_blocks = new_size; | ||
| 1581 | } | ||
| 1582 | |||
| 1583 | start_worker(era); | ||
| 1584 | |||
| 1585 | r = in_worker0(era, metadata_new_era); | ||
| 1586 | if (r) { | ||
| 1587 | DMERR("%s: metadata_era_rollover failed", __func__); | ||
| 1588 | return r; | ||
| 1589 | } | ||
| 1590 | |||
| 1591 | return 0; | ||
| 1592 | } | ||
| 1593 | |||
| 1594 | /* | ||
| 1595 | * Status format: | ||
| 1596 | * | ||
| 1597 | * <metadata block size> <#used metadata blocks>/<#total metadata blocks> | ||
| 1598 | * <current era> <held metadata root | '-'> | ||
| 1599 | */ | ||
| 1600 | static void era_status(struct dm_target *ti, status_type_t type, | ||
| 1601 | unsigned status_flags, char *result, unsigned maxlen) | ||
| 1602 | { | ||
| 1603 | int r; | ||
| 1604 | struct era *era = ti->private; | ||
| 1605 | ssize_t sz = 0; | ||
| 1606 | struct metadata_stats stats; | ||
| 1607 | char buf[BDEVNAME_SIZE]; | ||
| 1608 | |||
| 1609 | switch (type) { | ||
| 1610 | case STATUSTYPE_INFO: | ||
| 1611 | r = in_worker1(era, metadata_get_stats, &stats); | ||
| 1612 | if (r) | ||
| 1613 | goto err; | ||
| 1614 | |||
| 1615 | DMEMIT("%u %llu/%llu %u", | ||
| 1616 | (unsigned) (DM_ERA_METADATA_BLOCK_SIZE >> SECTOR_SHIFT), | ||
| 1617 | (unsigned long long) stats.used, | ||
| 1618 | (unsigned long long) stats.total, | ||
| 1619 | (unsigned) stats.era); | ||
| 1620 | |||
| 1621 | if (stats.snap != SUPERBLOCK_LOCATION) | ||
| 1622 | DMEMIT(" %llu", stats.snap); | ||
| 1623 | else | ||
| 1624 | DMEMIT(" -"); | ||
| 1625 | break; | ||
| 1626 | |||
| 1627 | case STATUSTYPE_TABLE: | ||
| 1628 | format_dev_t(buf, era->metadata_dev->bdev->bd_dev); | ||
| 1629 | DMEMIT("%s ", buf); | ||
| 1630 | format_dev_t(buf, era->origin_dev->bdev->bd_dev); | ||
| 1631 | DMEMIT("%s %u", buf, era->sectors_per_block); | ||
| 1632 | break; | ||
| 1633 | } | ||
| 1634 | |||
| 1635 | return; | ||
| 1636 | |||
| 1637 | err: | ||
| 1638 | DMEMIT("Error"); | ||
| 1639 | } | ||
| 1640 | |||
| 1641 | static int era_message(struct dm_target *ti, unsigned argc, char **argv) | ||
| 1642 | { | ||
| 1643 | struct era *era = ti->private; | ||
| 1644 | |||
| 1645 | if (argc != 1) { | ||
| 1646 | DMERR("incorrect number of message arguments"); | ||
| 1647 | return -EINVAL; | ||
| 1648 | } | ||
| 1649 | |||
| 1650 | if (!strcasecmp(argv[0], "checkpoint")) | ||
| 1651 | return in_worker0(era, metadata_checkpoint); | ||
| 1652 | |||
| 1653 | if (!strcasecmp(argv[0], "take_metadata_snap")) | ||
| 1654 | return in_worker0(era, metadata_take_snap); | ||
| 1655 | |||
| 1656 | if (!strcasecmp(argv[0], "drop_metadata_snap")) | ||
| 1657 | return in_worker0(era, metadata_drop_snap); | ||
| 1658 | |||
| 1659 | DMERR("unsupported message '%s'", argv[0]); | ||
| 1660 | return -EINVAL; | ||
| 1661 | } | ||
| 1662 | |||
| 1663 | static sector_t get_dev_size(struct dm_dev *dev) | ||
| 1664 | { | ||
| 1665 | return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT; | ||
| 1666 | } | ||
| 1667 | |||
| 1668 | static int era_iterate_devices(struct dm_target *ti, | ||
| 1669 | iterate_devices_callout_fn fn, void *data) | ||
| 1670 | { | ||
| 1671 | struct era *era = ti->private; | ||
| 1672 | return fn(ti, era->origin_dev, 0, get_dev_size(era->origin_dev), data); | ||
| 1673 | } | ||
| 1674 | |||
| 1675 | static int era_merge(struct dm_target *ti, struct bvec_merge_data *bvm, | ||
| 1676 | struct bio_vec *biovec, int max_size) | ||
| 1677 | { | ||
| 1678 | struct era *era = ti->private; | ||
| 1679 | struct request_queue *q = bdev_get_queue(era->origin_dev->bdev); | ||
| 1680 | |||
| 1681 | if (!q->merge_bvec_fn) | ||
| 1682 | return max_size; | ||
| 1683 | |||
| 1684 | bvm->bi_bdev = era->origin_dev->bdev; | ||
| 1685 | |||
| 1686 | return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); | ||
| 1687 | } | ||
| 1688 | |||
| 1689 | static void era_io_hints(struct dm_target *ti, struct queue_limits *limits) | ||
| 1690 | { | ||
| 1691 | struct era *era = ti->private; | ||
| 1692 | uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT; | ||
| 1693 | |||
| 1694 | /* | ||
| 1695 | * If the system-determined stacked limits are compatible with the | ||
| 1696 | * era device's blocksize (io_opt is a factor) do not override them. | ||
| 1697 | */ | ||
| 1698 | if (io_opt_sectors < era->sectors_per_block || | ||
| 1699 | do_div(io_opt_sectors, era->sectors_per_block)) { | ||
| 1700 | blk_limits_io_min(limits, 0); | ||
| 1701 | blk_limits_io_opt(limits, era->sectors_per_block << SECTOR_SHIFT); | ||
| 1702 | } | ||
| 1703 | } | ||
| 1704 | |||
| 1705 | /*----------------------------------------------------------------*/ | ||
| 1706 | |||
| 1707 | static struct target_type era_target = { | ||
| 1708 | .name = "era", | ||
| 1709 | .version = {1, 0, 0}, | ||
| 1710 | .module = THIS_MODULE, | ||
| 1711 | .ctr = era_ctr, | ||
| 1712 | .dtr = era_dtr, | ||
| 1713 | .map = era_map, | ||
| 1714 | .postsuspend = era_postsuspend, | ||
| 1715 | .preresume = era_preresume, | ||
| 1716 | .status = era_status, | ||
| 1717 | .message = era_message, | ||
| 1718 | .iterate_devices = era_iterate_devices, | ||
| 1719 | .merge = era_merge, | ||
| 1720 | .io_hints = era_io_hints | ||
| 1721 | }; | ||
| 1722 | |||
| 1723 | static int __init dm_era_init(void) | ||
| 1724 | { | ||
| 1725 | int r; | ||
| 1726 | |||
| 1727 | r = dm_register_target(&era_target); | ||
| 1728 | if (r) { | ||
| 1729 | DMERR("era target registration failed: %d", r); | ||
| 1730 | return r; | ||
| 1731 | } | ||
| 1732 | |||
| 1733 | return 0; | ||
| 1734 | } | ||
| 1735 | |||
| 1736 | static void __exit dm_era_exit(void) | ||
| 1737 | { | ||
| 1738 | dm_unregister_target(&era_target); | ||
| 1739 | } | ||
| 1740 | |||
| 1741 | module_init(dm_era_init); | ||
| 1742 | module_exit(dm_era_exit); | ||
| 1743 | |||
| 1744 | MODULE_DESCRIPTION(DM_NAME " era target"); | ||
| 1745 | MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>"); | ||
| 1746 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 422a9fdeb53e..aa009e865871 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c | |||
| @@ -93,10 +93,6 @@ struct multipath { | |||
| 93 | unsigned pg_init_count; /* Number of times pg_init called */ | 93 | unsigned pg_init_count; /* Number of times pg_init called */ |
| 94 | unsigned pg_init_delay_msecs; /* Number of msecs before pg_init retry */ | 94 | unsigned pg_init_delay_msecs; /* Number of msecs before pg_init retry */ |
| 95 | 95 | ||
| 96 | unsigned queue_size; | ||
| 97 | struct work_struct process_queued_ios; | ||
| 98 | struct list_head queued_ios; | ||
| 99 | |||
| 100 | struct work_struct trigger_event; | 96 | struct work_struct trigger_event; |
| 101 | 97 | ||
| 102 | /* | 98 | /* |
| @@ -121,9 +117,9 @@ typedef int (*action_fn) (struct pgpath *pgpath); | |||
| 121 | static struct kmem_cache *_mpio_cache; | 117 | static struct kmem_cache *_mpio_cache; |
| 122 | 118 | ||
| 123 | static struct workqueue_struct *kmultipathd, *kmpath_handlerd; | 119 | static struct workqueue_struct *kmultipathd, *kmpath_handlerd; |
| 124 | static void process_queued_ios(struct work_struct *work); | ||
| 125 | static void trigger_event(struct work_struct *work); | 120 | static void trigger_event(struct work_struct *work); |
| 126 | static void activate_path(struct work_struct *work); | 121 | static void activate_path(struct work_struct *work); |
| 122 | static int __pgpath_busy(struct pgpath *pgpath); | ||
| 127 | 123 | ||
| 128 | 124 | ||
| 129 | /*----------------------------------------------- | 125 | /*----------------------------------------------- |
| @@ -195,11 +191,9 @@ static struct multipath *alloc_multipath(struct dm_target *ti) | |||
| 195 | m = kzalloc(sizeof(*m), GFP_KERNEL); | 191 | m = kzalloc(sizeof(*m), GFP_KERNEL); |
| 196 | if (m) { | 192 | if (m) { |
| 197 | INIT_LIST_HEAD(&m->priority_groups); | 193 | INIT_LIST_HEAD(&m->priority_groups); |
| 198 | INIT_LIST_HEAD(&m->queued_ios); | ||
| 199 | spin_lock_init(&m->lock); | 194 | spin_lock_init(&m->lock); |
| 200 | m->queue_io = 1; | 195 | m->queue_io = 1; |
| 201 | m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT; | 196 | m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT; |
| 202 | INIT_WORK(&m->process_queued_ios, process_queued_ios); | ||
| 203 | INIT_WORK(&m->trigger_event, trigger_event); | 197 | INIT_WORK(&m->trigger_event, trigger_event); |
| 204 | init_waitqueue_head(&m->pg_init_wait); | 198 | init_waitqueue_head(&m->pg_init_wait); |
| 205 | mutex_init(&m->work_mutex); | 199 | mutex_init(&m->work_mutex); |
| @@ -256,13 +250,21 @@ static void clear_mapinfo(struct multipath *m, union map_info *info) | |||
| 256 | * Path selection | 250 | * Path selection |
| 257 | *-----------------------------------------------*/ | 251 | *-----------------------------------------------*/ |
| 258 | 252 | ||
| 259 | static void __pg_init_all_paths(struct multipath *m) | 253 | static int __pg_init_all_paths(struct multipath *m) |
| 260 | { | 254 | { |
| 261 | struct pgpath *pgpath; | 255 | struct pgpath *pgpath; |
| 262 | unsigned long pg_init_delay = 0; | 256 | unsigned long pg_init_delay = 0; |
| 263 | 257 | ||
| 258 | if (m->pg_init_in_progress || m->pg_init_disabled) | ||
| 259 | return 0; | ||
| 260 | |||
| 264 | m->pg_init_count++; | 261 | m->pg_init_count++; |
| 265 | m->pg_init_required = 0; | 262 | m->pg_init_required = 0; |
| 263 | |||
| 264 | /* Check here to reset pg_init_required */ | ||
| 265 | if (!m->current_pg) | ||
| 266 | return 0; | ||
| 267 | |||
| 266 | if (m->pg_init_delay_retry) | 268 | if (m->pg_init_delay_retry) |
| 267 | pg_init_delay = msecs_to_jiffies(m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT ? | 269 | pg_init_delay = msecs_to_jiffies(m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT ? |
| 268 | m->pg_init_delay_msecs : DM_PG_INIT_DELAY_MSECS); | 270 | m->pg_init_delay_msecs : DM_PG_INIT_DELAY_MSECS); |
| @@ -274,6 +276,7 @@ static void __pg_init_all_paths(struct multipath *m) | |||
| 274 | pg_init_delay)) | 276 | pg_init_delay)) |
| 275 | m->pg_init_in_progress++; | 277 | m->pg_init_in_progress++; |
| 276 | } | 278 | } |
| 279 | return m->pg_init_in_progress; | ||
| 277 | } | 280 | } |
| 278 | 281 | ||
| 279 | static void __switch_pg(struct multipath *m, struct pgpath *pgpath) | 282 | static void __switch_pg(struct multipath *m, struct pgpath *pgpath) |
| @@ -365,19 +368,26 @@ failed: | |||
| 365 | */ | 368 | */ |
| 366 | static int __must_push_back(struct multipath *m) | 369 | static int __must_push_back(struct multipath *m) |
| 367 | { | 370 | { |
| 368 | return (m->queue_if_no_path != m->saved_queue_if_no_path && | 371 | return (m->queue_if_no_path || |
| 369 | dm_noflush_suspending(m->ti)); | 372 | (m->queue_if_no_path != m->saved_queue_if_no_path && |
| 373 | dm_noflush_suspending(m->ti))); | ||
| 370 | } | 374 | } |
| 371 | 375 | ||
| 372 | static int map_io(struct multipath *m, struct request *clone, | 376 | #define pg_ready(m) (!(m)->queue_io && !(m)->pg_init_required) |
| 373 | union map_info *map_context, unsigned was_queued) | 377 | |
| 378 | /* | ||
| 379 | * Map cloned requests | ||
| 380 | */ | ||
| 381 | static int multipath_map(struct dm_target *ti, struct request *clone, | ||
| 382 | union map_info *map_context) | ||
| 374 | { | 383 | { |
| 375 | int r = DM_MAPIO_REMAPPED; | 384 | struct multipath *m = (struct multipath *) ti->private; |
| 385 | int r = DM_MAPIO_REQUEUE; | ||
| 376 | size_t nr_bytes = blk_rq_bytes(clone); | 386 | size_t nr_bytes = blk_rq_bytes(clone); |
| 377 | unsigned long flags; | 387 | unsigned long flags; |
| 378 | struct pgpath *pgpath; | 388 | struct pgpath *pgpath; |
| 379 | struct block_device *bdev; | 389 | struct block_device *bdev; |
| 380 | struct dm_mpath_io *mpio = map_context->ptr; | 390 | struct dm_mpath_io *mpio; |
| 381 | 391 | ||
| 382 | spin_lock_irqsave(&m->lock, flags); | 392 | spin_lock_irqsave(&m->lock, flags); |
| 383 | 393 | ||
| @@ -388,38 +398,33 @@ static int map_io(struct multipath *m, struct request *clone, | |||
| 388 | 398 | ||
| 389 | pgpath = m->current_pgpath; | 399 | pgpath = m->current_pgpath; |
| 390 | 400 | ||
| 391 | if (was_queued) | 401 | if (!pgpath) { |
| 392 | m->queue_size--; | 402 | if (!__must_push_back(m)) |
| 393 | 403 | r = -EIO; /* Failed */ | |
| 394 | if (m->pg_init_required) { | 404 | goto out_unlock; |
| 395 | if (!m->pg_init_in_progress) | 405 | } |
| 396 | queue_work(kmultipathd, &m->process_queued_ios); | 406 | if (!pg_ready(m)) { |
| 397 | r = DM_MAPIO_REQUEUE; | 407 | __pg_init_all_paths(m); |
| 398 | } else if ((pgpath && m->queue_io) || | 408 | goto out_unlock; |
| 399 | (!pgpath && m->queue_if_no_path)) { | 409 | } |
| 400 | /* Queue for the daemon to resubmit */ | 410 | if (set_mapinfo(m, map_context) < 0) |
| 401 | list_add_tail(&clone->queuelist, &m->queued_ios); | 411 | /* ENOMEM, requeue */ |
| 402 | m->queue_size++; | 412 | goto out_unlock; |
| 403 | if (!m->queue_io) | ||
| 404 | queue_work(kmultipathd, &m->process_queued_ios); | ||
| 405 | pgpath = NULL; | ||
| 406 | r = DM_MAPIO_SUBMITTED; | ||
| 407 | } else if (pgpath) { | ||
| 408 | bdev = pgpath->path.dev->bdev; | ||
| 409 | clone->q = bdev_get_queue(bdev); | ||
| 410 | clone->rq_disk = bdev->bd_disk; | ||
| 411 | } else if (__must_push_back(m)) | ||
| 412 | r = DM_MAPIO_REQUEUE; | ||
| 413 | else | ||
| 414 | r = -EIO; /* Failed */ | ||
| 415 | 413 | ||
| 414 | bdev = pgpath->path.dev->bdev; | ||
| 415 | clone->q = bdev_get_queue(bdev); | ||
| 416 | clone->rq_disk = bdev->bd_disk; | ||
| 417 | clone->cmd_flags |= REQ_FAILFAST_TRANSPORT; | ||
| 418 | mpio = map_context->ptr; | ||
| 416 | mpio->pgpath = pgpath; | 419 | mpio->pgpath = pgpath; |
| 417 | mpio->nr_bytes = nr_bytes; | 420 | mpio->nr_bytes = nr_bytes; |
| 418 | 421 | if (pgpath->pg->ps.type->start_io) | |
| 419 | if (r == DM_MAPIO_REMAPPED && pgpath->pg->ps.type->start_io) | 422 | pgpath->pg->ps.type->start_io(&pgpath->pg->ps, |
| 420 | pgpath->pg->ps.type->start_io(&pgpath->pg->ps, &pgpath->path, | 423 | &pgpath->path, |
| 421 | nr_bytes); | 424 | nr_bytes); |
| 425 | r = DM_MAPIO_REMAPPED; | ||
| 422 | 426 | ||
| 427 | out_unlock: | ||
| 423 | spin_unlock_irqrestore(&m->lock, flags); | 428 | spin_unlock_irqrestore(&m->lock, flags); |
| 424 | 429 | ||
| 425 | return r; | 430 | return r; |
| @@ -440,76 +445,14 @@ static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path, | |||
| 440 | else | 445 | else |
| 441 | m->saved_queue_if_no_path = queue_if_no_path; | 446 | m->saved_queue_if_no_path = queue_if_no_path; |
| 442 | m->queue_if_no_path = queue_if_no_path; | 447 | m->queue_if_no_path = queue_if_no_path; |
| 443 | if (!m->queue_if_no_path && m->queue_size) | 448 | if (!m->queue_if_no_path) |
| 444 | queue_work(kmultipathd, &m->process_queued_ios); | 449 | dm_table_run_md_queue_async(m->ti->table); |
| 445 | 450 | ||
| 446 | spin_unlock_irqrestore(&m->lock, flags); | 451 | spin_unlock_irqrestore(&m->lock, flags); |
| 447 | 452 | ||
| 448 | return 0; | 453 | return 0; |
| 449 | } | 454 | } |
| 450 | 455 | ||
| 451 | /*----------------------------------------------------------------- | ||
| 452 | * The multipath daemon is responsible for resubmitting queued ios. | ||
| 453 | *---------------------------------------------------------------*/ | ||
| 454 | |||
| 455 | static void dispatch_queued_ios(struct multipath *m) | ||
| 456 | { | ||
| 457 | int r; | ||
| 458 | unsigned long flags; | ||
| 459 | union map_info *info; | ||
| 460 | struct request *clone, *n; | ||
| 461 | LIST_HEAD(cl); | ||
| 462 | |||
| 463 | spin_lock_irqsave(&m->lock, flags); | ||
| 464 | list_splice_init(&m->queued_ios, &cl); | ||
| 465 | spin_unlock_irqrestore(&m->lock, flags); | ||
| 466 | |||
| 467 | list_for_each_entry_safe(clone, n, &cl, queuelist) { | ||
| 468 | list_del_init(&clone->queuelist); | ||
| 469 | |||
| 470 | info = dm_get_rq_mapinfo(clone); | ||
| 471 | |||
| 472 | r = map_io(m, clone, info, 1); | ||
| 473 | if (r < 0) { | ||
| 474 | clear_mapinfo(m, info); | ||
| 475 | dm_kill_unmapped_request(clone, r); | ||
| 476 | } else if (r == DM_MAPIO_REMAPPED) | ||
| 477 | dm_dispatch_request(clone); | ||
| 478 | else if (r == DM_MAPIO_REQUEUE) { | ||
| 479 | clear_mapinfo(m, info); | ||
| 480 | dm_requeue_unmapped_request(clone); | ||
| 481 | } | ||
| 482 | } | ||
| 483 | } | ||
| 484 | |||
| 485 | static void process_queued_ios(struct work_struct *work) | ||
| 486 | { | ||
| 487 | struct multipath *m = | ||
| 488 | container_of(work, struct multipath, process_queued_ios); | ||
| 489 | struct pgpath *pgpath = NULL; | ||
| 490 | unsigned must_queue = 1; | ||
| 491 | unsigned long flags; | ||
| 492 | |||
| 493 | spin_lock_irqsave(&m->lock, flags); | ||
| 494 | |||
| 495 | if (!m->current_pgpath) | ||
| 496 | __choose_pgpath(m, 0); | ||
| 497 | |||
| 498 | pgpath = m->current_pgpath; | ||
| 499 | |||
| 500 | if ((pgpath && !m->queue_io) || | ||
| 501 | (!pgpath && !m->queue_if_no_path)) | ||
| 502 | must_queue = 0; | ||
| 503 | |||
| 504 | if (m->pg_init_required && !m->pg_init_in_progress && pgpath && | ||
| 505 | !m->pg_init_disabled) | ||
| 506 | __pg_init_all_paths(m); | ||
| 507 | |||
| 508 | spin_unlock_irqrestore(&m->lock, flags); | ||
| 509 | if (!must_queue) | ||
| 510 | dispatch_queued_ios(m); | ||
| 511 | } | ||
| 512 | |||
| 513 | /* | 456 | /* |
| 514 | * An event is triggered whenever a path is taken out of use. | 457 | * An event is triggered whenever a path is taken out of use. |
| 515 | * Includes path failure and PG bypass. | 458 | * Includes path failure and PG bypass. |
| @@ -972,27 +915,6 @@ static void multipath_dtr(struct dm_target *ti) | |||
| 972 | } | 915 | } |
| 973 | 916 | ||
| 974 | /* | 917 | /* |
| 975 | * Map cloned requests | ||
| 976 | */ | ||
| 977 | static int multipath_map(struct dm_target *ti, struct request *clone, | ||
| 978 | union map_info *map_context) | ||
| 979 | { | ||
| 980 | int r; | ||
| 981 | struct multipath *m = (struct multipath *) ti->private; | ||
| 982 | |||
| 983 | if (set_mapinfo(m, map_context) < 0) | ||
| 984 | /* ENOMEM, requeue */ | ||
| 985 | return DM_MAPIO_REQUEUE; | ||
| 986 | |||
| 987 | clone->cmd_flags |= REQ_FAILFAST_TRANSPORT; | ||
| 988 | r = map_io(m, clone, map_context, 0); | ||
| 989 | if (r < 0 || r == DM_MAPIO_REQUEUE) | ||
| 990 | clear_mapinfo(m, map_context); | ||
| 991 | |||
| 992 | return r; | ||
| 993 | } | ||
| 994 | |||
| 995 | /* | ||
| 996 | * Take a path out of use. | 918 | * Take a path out of use. |
| 997 | */ | 919 | */ |
| 998 | static int fail_path(struct pgpath *pgpath) | 920 | static int fail_path(struct pgpath *pgpath) |
| @@ -1054,9 +976,9 @@ static int reinstate_path(struct pgpath *pgpath) | |||
| 1054 | 976 | ||
| 1055 | pgpath->is_active = 1; | 977 | pgpath->is_active = 1; |
| 1056 | 978 | ||
| 1057 | if (!m->nr_valid_paths++ && m->queue_size) { | 979 | if (!m->nr_valid_paths++) { |
| 1058 | m->current_pgpath = NULL; | 980 | m->current_pgpath = NULL; |
| 1059 | queue_work(kmultipathd, &m->process_queued_ios); | 981 | dm_table_run_md_queue_async(m->ti->table); |
| 1060 | } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) { | 982 | } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) { |
| 1061 | if (queue_work(kmpath_handlerd, &pgpath->activate_path.work)) | 983 | if (queue_work(kmpath_handlerd, &pgpath->activate_path.work)) |
| 1062 | m->pg_init_in_progress++; | 984 | m->pg_init_in_progress++; |
| @@ -1252,11 +1174,12 @@ static void pg_init_done(void *data, int errors) | |||
| 1252 | /* Activations of other paths are still on going */ | 1174 | /* Activations of other paths are still on going */ |
| 1253 | goto out; | 1175 | goto out; |
| 1254 | 1176 | ||
| 1255 | if (!m->pg_init_required) | 1177 | if (m->pg_init_required) { |
| 1256 | m->queue_io = 0; | 1178 | m->pg_init_delay_retry = delay_retry; |
| 1257 | 1179 | if (__pg_init_all_paths(m)) | |
| 1258 | m->pg_init_delay_retry = delay_retry; | 1180 | goto out; |
| 1259 | queue_work(kmultipathd, &m->process_queued_ios); | 1181 | } |
| 1182 | m->queue_io = 0; | ||
| 1260 | 1183 | ||
| 1261 | /* | 1184 | /* |
| 1262 | * Wake up any thread waiting to suspend. | 1185 | * Wake up any thread waiting to suspend. |
| @@ -1272,8 +1195,11 @@ static void activate_path(struct work_struct *work) | |||
| 1272 | struct pgpath *pgpath = | 1195 | struct pgpath *pgpath = |
| 1273 | container_of(work, struct pgpath, activate_path.work); | 1196 | container_of(work, struct pgpath, activate_path.work); |
| 1274 | 1197 | ||
| 1275 | scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev), | 1198 | if (pgpath->is_active) |
| 1276 | pg_init_done, pgpath); | 1199 | scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev), |
| 1200 | pg_init_done, pgpath); | ||
| 1201 | else | ||
| 1202 | pg_init_done(pgpath, SCSI_DH_DEV_OFFLINED); | ||
| 1277 | } | 1203 | } |
| 1278 | 1204 | ||
| 1279 | static int noretry_error(int error) | 1205 | static int noretry_error(int error) |
| @@ -1433,7 +1359,7 @@ static void multipath_status(struct dm_target *ti, status_type_t type, | |||
| 1433 | 1359 | ||
| 1434 | /* Features */ | 1360 | /* Features */ |
| 1435 | if (type == STATUSTYPE_INFO) | 1361 | if (type == STATUSTYPE_INFO) |
| 1436 | DMEMIT("2 %u %u ", m->queue_size, m->pg_init_count); | 1362 | DMEMIT("2 %u %u ", m->queue_io, m->pg_init_count); |
| 1437 | else { | 1363 | else { |
| 1438 | DMEMIT("%u ", m->queue_if_no_path + | 1364 | DMEMIT("%u ", m->queue_if_no_path + |
| 1439 | (m->pg_init_retries > 0) * 2 + | 1365 | (m->pg_init_retries > 0) * 2 + |
| @@ -1552,7 +1478,7 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv) | |||
| 1552 | } | 1478 | } |
| 1553 | 1479 | ||
| 1554 | if (argc != 2) { | 1480 | if (argc != 2) { |
| 1555 | DMWARN("Unrecognised multipath message received."); | 1481 | DMWARN("Invalid multipath message arguments. Expected 2 arguments, got %d.", argc); |
| 1556 | goto out; | 1482 | goto out; |
| 1557 | } | 1483 | } |
| 1558 | 1484 | ||
| @@ -1570,7 +1496,7 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv) | |||
| 1570 | else if (!strcasecmp(argv[0], "fail_path")) | 1496 | else if (!strcasecmp(argv[0], "fail_path")) |
| 1571 | action = fail_path; | 1497 | action = fail_path; |
| 1572 | else { | 1498 | else { |
| 1573 | DMWARN("Unrecognised multipath message received."); | 1499 | DMWARN("Unrecognised multipath message received: %s", argv[0]); |
| 1574 | goto out; | 1500 | goto out; |
| 1575 | } | 1501 | } |
| 1576 | 1502 | ||
| @@ -1632,8 +1558,17 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd, | |||
| 1632 | r = err; | 1558 | r = err; |
| 1633 | } | 1559 | } |
| 1634 | 1560 | ||
| 1635 | if (r == -ENOTCONN && !fatal_signal_pending(current)) | 1561 | if (r == -ENOTCONN && !fatal_signal_pending(current)) { |
| 1636 | queue_work(kmultipathd, &m->process_queued_ios); | 1562 | spin_lock_irqsave(&m->lock, flags); |
| 1563 | if (!m->current_pg) { | ||
| 1564 | /* Path status changed, redo selection */ | ||
| 1565 | __choose_pgpath(m, 0); | ||
| 1566 | } | ||
| 1567 | if (m->pg_init_required) | ||
| 1568 | __pg_init_all_paths(m); | ||
| 1569 | spin_unlock_irqrestore(&m->lock, flags); | ||
| 1570 | dm_table_run_md_queue_async(m->ti->table); | ||
| 1571 | } | ||
| 1637 | 1572 | ||
| 1638 | return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg); | 1573 | return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg); |
| 1639 | } | 1574 | } |
| @@ -1684,7 +1619,7 @@ static int multipath_busy(struct dm_target *ti) | |||
| 1684 | spin_lock_irqsave(&m->lock, flags); | 1619 | spin_lock_irqsave(&m->lock, flags); |
| 1685 | 1620 | ||
| 1686 | /* pg_init in progress, requeue until done */ | 1621 | /* pg_init in progress, requeue until done */ |
| 1687 | if (m->pg_init_in_progress) { | 1622 | if (!pg_ready(m)) { |
| 1688 | busy = 1; | 1623 | busy = 1; |
| 1689 | goto out; | 1624 | goto out; |
| 1690 | } | 1625 | } |
| @@ -1737,7 +1672,7 @@ out: | |||
| 1737 | *---------------------------------------------------------------*/ | 1672 | *---------------------------------------------------------------*/ |
| 1738 | static struct target_type multipath_target = { | 1673 | static struct target_type multipath_target = { |
| 1739 | .name = "multipath", | 1674 | .name = "multipath", |
| 1740 | .version = {1, 6, 0}, | 1675 | .version = {1, 7, 0}, |
| 1741 | .module = THIS_MODULE, | 1676 | .module = THIS_MODULE, |
| 1742 | .ctr = multipath_ctr, | 1677 | .ctr = multipath_ctr, |
| 1743 | .dtr = multipath_dtr, | 1678 | .dtr = multipath_dtr, |
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 6a7f2b83a126..50601ec7017a 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c | |||
| @@ -945,7 +945,7 @@ bool dm_table_request_based(struct dm_table *t) | |||
| 945 | return dm_table_get_type(t) == DM_TYPE_REQUEST_BASED; | 945 | return dm_table_get_type(t) == DM_TYPE_REQUEST_BASED; |
| 946 | } | 946 | } |
| 947 | 947 | ||
| 948 | int dm_table_alloc_md_mempools(struct dm_table *t) | 948 | static int dm_table_alloc_md_mempools(struct dm_table *t) |
| 949 | { | 949 | { |
| 950 | unsigned type = dm_table_get_type(t); | 950 | unsigned type = dm_table_get_type(t); |
| 951 | unsigned per_bio_data_size = 0; | 951 | unsigned per_bio_data_size = 0; |
| @@ -1618,6 +1618,25 @@ struct mapped_device *dm_table_get_md(struct dm_table *t) | |||
| 1618 | } | 1618 | } |
| 1619 | EXPORT_SYMBOL(dm_table_get_md); | 1619 | EXPORT_SYMBOL(dm_table_get_md); |
| 1620 | 1620 | ||
| 1621 | void dm_table_run_md_queue_async(struct dm_table *t) | ||
| 1622 | { | ||
| 1623 | struct mapped_device *md; | ||
| 1624 | struct request_queue *queue; | ||
| 1625 | unsigned long flags; | ||
| 1626 | |||
| 1627 | if (!dm_table_request_based(t)) | ||
| 1628 | return; | ||
| 1629 | |||
| 1630 | md = dm_table_get_md(t); | ||
| 1631 | queue = dm_get_md_queue(md); | ||
| 1632 | if (queue) { | ||
| 1633 | spin_lock_irqsave(queue->queue_lock, flags); | ||
| 1634 | blk_run_queue_async(queue); | ||
| 1635 | spin_unlock_irqrestore(queue->queue_lock, flags); | ||
| 1636 | } | ||
| 1637 | } | ||
| 1638 | EXPORT_SYMBOL(dm_table_run_md_queue_async); | ||
| 1639 | |||
| 1621 | static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev, | 1640 | static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev, |
| 1622 | sector_t start, sector_t len, void *data) | 1641 | sector_t start, sector_t len, void *data) |
| 1623 | { | 1642 | { |
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index fb9efc829182..b086a945edcb 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c | |||
| @@ -192,6 +192,13 @@ struct dm_pool_metadata { | |||
| 192 | * operation possible in this state is the closing of the device. | 192 | * operation possible in this state is the closing of the device. |
| 193 | */ | 193 | */ |
| 194 | bool fail_io:1; | 194 | bool fail_io:1; |
| 195 | |||
| 196 | /* | ||
| 197 | * Reading the space map roots can fail, so we read it into these | ||
| 198 | * buffers before the superblock is locked and updated. | ||
| 199 | */ | ||
| 200 | __u8 data_space_map_root[SPACE_MAP_ROOT_SIZE]; | ||
| 201 | __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; | ||
| 195 | }; | 202 | }; |
| 196 | 203 | ||
| 197 | struct dm_thin_device { | 204 | struct dm_thin_device { |
| @@ -431,26 +438,53 @@ static void __setup_btree_details(struct dm_pool_metadata *pmd) | |||
| 431 | pmd->details_info.value_type.equal = NULL; | 438 | pmd->details_info.value_type.equal = NULL; |
| 432 | } | 439 | } |
| 433 | 440 | ||
| 441 | static int save_sm_roots(struct dm_pool_metadata *pmd) | ||
| 442 | { | ||
| 443 | int r; | ||
| 444 | size_t len; | ||
| 445 | |||
| 446 | r = dm_sm_root_size(pmd->metadata_sm, &len); | ||
| 447 | if (r < 0) | ||
| 448 | return r; | ||
| 449 | |||
| 450 | r = dm_sm_copy_root(pmd->metadata_sm, &pmd->metadata_space_map_root, len); | ||
| 451 | if (r < 0) | ||
| 452 | return r; | ||
| 453 | |||
| 454 | r = dm_sm_root_size(pmd->data_sm, &len); | ||
| 455 | if (r < 0) | ||
| 456 | return r; | ||
| 457 | |||
| 458 | return dm_sm_copy_root(pmd->data_sm, &pmd->data_space_map_root, len); | ||
| 459 | } | ||
| 460 | |||
| 461 | static void copy_sm_roots(struct dm_pool_metadata *pmd, | ||
| 462 | struct thin_disk_superblock *disk) | ||
| 463 | { | ||
| 464 | memcpy(&disk->metadata_space_map_root, | ||
| 465 | &pmd->metadata_space_map_root, | ||
| 466 | sizeof(pmd->metadata_space_map_root)); | ||
| 467 | |||
| 468 | memcpy(&disk->data_space_map_root, | ||
| 469 | &pmd->data_space_map_root, | ||
| 470 | sizeof(pmd->data_space_map_root)); | ||
| 471 | } | ||
| 472 | |||
| 434 | static int __write_initial_superblock(struct dm_pool_metadata *pmd) | 473 | static int __write_initial_superblock(struct dm_pool_metadata *pmd) |
| 435 | { | 474 | { |
| 436 | int r; | 475 | int r; |
| 437 | struct dm_block *sblock; | 476 | struct dm_block *sblock; |
| 438 | size_t metadata_len, data_len; | ||
| 439 | struct thin_disk_superblock *disk_super; | 477 | struct thin_disk_superblock *disk_super; |
| 440 | sector_t bdev_size = i_size_read(pmd->bdev->bd_inode) >> SECTOR_SHIFT; | 478 | sector_t bdev_size = i_size_read(pmd->bdev->bd_inode) >> SECTOR_SHIFT; |
| 441 | 479 | ||
| 442 | if (bdev_size > THIN_METADATA_MAX_SECTORS) | 480 | if (bdev_size > THIN_METADATA_MAX_SECTORS) |
| 443 | bdev_size = THIN_METADATA_MAX_SECTORS; | 481 | bdev_size = THIN_METADATA_MAX_SECTORS; |
| 444 | 482 | ||
| 445 | r = dm_sm_root_size(pmd->metadata_sm, &metadata_len); | 483 | r = dm_sm_commit(pmd->data_sm); |
| 446 | if (r < 0) | ||
| 447 | return r; | ||
| 448 | |||
| 449 | r = dm_sm_root_size(pmd->data_sm, &data_len); | ||
| 450 | if (r < 0) | 484 | if (r < 0) |
| 451 | return r; | 485 | return r; |
| 452 | 486 | ||
| 453 | r = dm_sm_commit(pmd->data_sm); | 487 | r = save_sm_roots(pmd); |
| 454 | if (r < 0) | 488 | if (r < 0) |
| 455 | return r; | 489 | return r; |
| 456 | 490 | ||
| @@ -471,15 +505,7 @@ static int __write_initial_superblock(struct dm_pool_metadata *pmd) | |||
| 471 | disk_super->trans_id = 0; | 505 | disk_super->trans_id = 0; |
| 472 | disk_super->held_root = 0; | 506 | disk_super->held_root = 0; |
| 473 | 507 | ||
| 474 | r = dm_sm_copy_root(pmd->metadata_sm, &disk_super->metadata_space_map_root, | 508 | copy_sm_roots(pmd, disk_super); |
| 475 | metadata_len); | ||
| 476 | if (r < 0) | ||
| 477 | goto bad_locked; | ||
| 478 | |||
| 479 | r = dm_sm_copy_root(pmd->data_sm, &disk_super->data_space_map_root, | ||
| 480 | data_len); | ||
| 481 | if (r < 0) | ||
| 482 | goto bad_locked; | ||
| 483 | 509 | ||
| 484 | disk_super->data_mapping_root = cpu_to_le64(pmd->root); | 510 | disk_super->data_mapping_root = cpu_to_le64(pmd->root); |
| 485 | disk_super->device_details_root = cpu_to_le64(pmd->details_root); | 511 | disk_super->device_details_root = cpu_to_le64(pmd->details_root); |
| @@ -488,10 +514,6 @@ static int __write_initial_superblock(struct dm_pool_metadata *pmd) | |||
| 488 | disk_super->data_block_size = cpu_to_le32(pmd->data_block_size); | 514 | disk_super->data_block_size = cpu_to_le32(pmd->data_block_size); |
| 489 | 515 | ||
| 490 | return dm_tm_commit(pmd->tm, sblock); | 516 | return dm_tm_commit(pmd->tm, sblock); |
| 491 | |||
| 492 | bad_locked: | ||
| 493 | dm_bm_unlock(sblock); | ||
| 494 | return r; | ||
| 495 | } | 517 | } |
| 496 | 518 | ||
| 497 | static int __format_metadata(struct dm_pool_metadata *pmd) | 519 | static int __format_metadata(struct dm_pool_metadata *pmd) |
| @@ -769,6 +791,10 @@ static int __commit_transaction(struct dm_pool_metadata *pmd) | |||
| 769 | if (r < 0) | 791 | if (r < 0) |
| 770 | return r; | 792 | return r; |
| 771 | 793 | ||
| 794 | r = save_sm_roots(pmd); | ||
| 795 | if (r < 0) | ||
| 796 | return r; | ||
| 797 | |||
| 772 | r = superblock_lock(pmd, &sblock); | 798 | r = superblock_lock(pmd, &sblock); |
| 773 | if (r) | 799 | if (r) |
| 774 | return r; | 800 | return r; |
| @@ -780,21 +806,9 @@ static int __commit_transaction(struct dm_pool_metadata *pmd) | |||
| 780 | disk_super->trans_id = cpu_to_le64(pmd->trans_id); | 806 | disk_super->trans_id = cpu_to_le64(pmd->trans_id); |
| 781 | disk_super->flags = cpu_to_le32(pmd->flags); | 807 | disk_super->flags = cpu_to_le32(pmd->flags); |
| 782 | 808 | ||
| 783 | r = dm_sm_copy_root(pmd->metadata_sm, &disk_super->metadata_space_map_root, | 809 | copy_sm_roots(pmd, disk_super); |
| 784 | metadata_len); | ||
| 785 | if (r < 0) | ||
| 786 | goto out_locked; | ||
| 787 | |||
| 788 | r = dm_sm_copy_root(pmd->data_sm, &disk_super->data_space_map_root, | ||
| 789 | data_len); | ||
| 790 | if (r < 0) | ||
| 791 | goto out_locked; | ||
| 792 | 810 | ||
| 793 | return dm_tm_commit(pmd->tm, sblock); | 811 | return dm_tm_commit(pmd->tm, sblock); |
| 794 | |||
| 795 | out_locked: | ||
| 796 | dm_bm_unlock(sblock); | ||
| 797 | return r; | ||
| 798 | } | 812 | } |
| 799 | 813 | ||
| 800 | struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, | 814 | struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, |
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index be70d38745f7..53728be84dee 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c | |||
| @@ -12,9 +12,11 @@ | |||
| 12 | #include <linux/dm-io.h> | 12 | #include <linux/dm-io.h> |
| 13 | #include <linux/dm-kcopyd.h> | 13 | #include <linux/dm-kcopyd.h> |
| 14 | #include <linux/list.h> | 14 | #include <linux/list.h> |
| 15 | #include <linux/rculist.h> | ||
| 15 | #include <linux/init.h> | 16 | #include <linux/init.h> |
| 16 | #include <linux/module.h> | 17 | #include <linux/module.h> |
| 17 | #include <linux/slab.h> | 18 | #include <linux/slab.h> |
| 19 | #include <linux/rbtree.h> | ||
| 18 | 20 | ||
| 19 | #define DM_MSG_PREFIX "thin" | 21 | #define DM_MSG_PREFIX "thin" |
| 20 | 22 | ||
| @@ -178,12 +180,10 @@ struct pool { | |||
| 178 | unsigned ref_count; | 180 | unsigned ref_count; |
| 179 | 181 | ||
| 180 | spinlock_t lock; | 182 | spinlock_t lock; |
| 181 | struct bio_list deferred_bios; | ||
| 182 | struct bio_list deferred_flush_bios; | 183 | struct bio_list deferred_flush_bios; |
| 183 | struct list_head prepared_mappings; | 184 | struct list_head prepared_mappings; |
| 184 | struct list_head prepared_discards; | 185 | struct list_head prepared_discards; |
| 185 | 186 | struct list_head active_thins; | |
| 186 | struct bio_list retry_on_resume_list; | ||
| 187 | 187 | ||
| 188 | struct dm_deferred_set *shared_read_ds; | 188 | struct dm_deferred_set *shared_read_ds; |
| 189 | struct dm_deferred_set *all_io_ds; | 189 | struct dm_deferred_set *all_io_ds; |
| @@ -220,6 +220,7 @@ struct pool_c { | |||
| 220 | * Target context for a thin. | 220 | * Target context for a thin. |
| 221 | */ | 221 | */ |
| 222 | struct thin_c { | 222 | struct thin_c { |
| 223 | struct list_head list; | ||
| 223 | struct dm_dev *pool_dev; | 224 | struct dm_dev *pool_dev; |
| 224 | struct dm_dev *origin_dev; | 225 | struct dm_dev *origin_dev; |
| 225 | dm_thin_id dev_id; | 226 | dm_thin_id dev_id; |
| @@ -227,6 +228,10 @@ struct thin_c { | |||
| 227 | struct pool *pool; | 228 | struct pool *pool; |
| 228 | struct dm_thin_device *td; | 229 | struct dm_thin_device *td; |
| 229 | bool requeue_mode:1; | 230 | bool requeue_mode:1; |
| 231 | spinlock_t lock; | ||
| 232 | struct bio_list deferred_bio_list; | ||
| 233 | struct bio_list retry_on_resume_list; | ||
| 234 | struct rb_root sort_bio_list; /* sorted list of deferred bios */ | ||
| 230 | }; | 235 | }; |
| 231 | 236 | ||
| 232 | /*----------------------------------------------------------------*/ | 237 | /*----------------------------------------------------------------*/ |
| @@ -287,9 +292,9 @@ static void cell_defer_no_holder_no_free(struct thin_c *tc, | |||
| 287 | struct pool *pool = tc->pool; | 292 | struct pool *pool = tc->pool; |
| 288 | unsigned long flags; | 293 | unsigned long flags; |
| 289 | 294 | ||
| 290 | spin_lock_irqsave(&pool->lock, flags); | 295 | spin_lock_irqsave(&tc->lock, flags); |
| 291 | dm_cell_release_no_holder(pool->prison, cell, &pool->deferred_bios); | 296 | dm_cell_release_no_holder(pool->prison, cell, &tc->deferred_bio_list); |
| 292 | spin_unlock_irqrestore(&pool->lock, flags); | 297 | spin_unlock_irqrestore(&tc->lock, flags); |
| 293 | 298 | ||
| 294 | wake_worker(pool); | 299 | wake_worker(pool); |
| 295 | } | 300 | } |
| @@ -368,6 +373,7 @@ struct dm_thin_endio_hook { | |||
| 368 | struct dm_deferred_entry *shared_read_entry; | 373 | struct dm_deferred_entry *shared_read_entry; |
| 369 | struct dm_deferred_entry *all_io_entry; | 374 | struct dm_deferred_entry *all_io_entry; |
| 370 | struct dm_thin_new_mapping *overwrite_mapping; | 375 | struct dm_thin_new_mapping *overwrite_mapping; |
| 376 | struct rb_node rb_node; | ||
| 371 | }; | 377 | }; |
| 372 | 378 | ||
| 373 | static void requeue_bio_list(struct thin_c *tc, struct bio_list *master) | 379 | static void requeue_bio_list(struct thin_c *tc, struct bio_list *master) |
| @@ -378,30 +384,22 @@ static void requeue_bio_list(struct thin_c *tc, struct bio_list *master) | |||
| 378 | 384 | ||
| 379 | bio_list_init(&bios); | 385 | bio_list_init(&bios); |
| 380 | 386 | ||
| 381 | spin_lock_irqsave(&tc->pool->lock, flags); | 387 | spin_lock_irqsave(&tc->lock, flags); |
| 382 | bio_list_merge(&bios, master); | 388 | bio_list_merge(&bios, master); |
| 383 | bio_list_init(master); | 389 | bio_list_init(master); |
| 384 | spin_unlock_irqrestore(&tc->pool->lock, flags); | 390 | spin_unlock_irqrestore(&tc->lock, flags); |
| 385 | 391 | ||
| 386 | while ((bio = bio_list_pop(&bios))) { | 392 | while ((bio = bio_list_pop(&bios))) |
| 387 | struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); | 393 | bio_endio(bio, DM_ENDIO_REQUEUE); |
| 388 | |||
| 389 | if (h->tc == tc) | ||
| 390 | bio_endio(bio, DM_ENDIO_REQUEUE); | ||
| 391 | else | ||
| 392 | bio_list_add(master, bio); | ||
| 393 | } | ||
| 394 | } | 394 | } |
| 395 | 395 | ||
| 396 | static void requeue_io(struct thin_c *tc) | 396 | static void requeue_io(struct thin_c *tc) |
| 397 | { | 397 | { |
| 398 | struct pool *pool = tc->pool; | 398 | requeue_bio_list(tc, &tc->deferred_bio_list); |
| 399 | 399 | requeue_bio_list(tc, &tc->retry_on_resume_list); | |
| 400 | requeue_bio_list(tc, &pool->deferred_bios); | ||
| 401 | requeue_bio_list(tc, &pool->retry_on_resume_list); | ||
| 402 | } | 400 | } |
| 403 | 401 | ||
| 404 | static void error_retry_list(struct pool *pool) | 402 | static void error_thin_retry_list(struct thin_c *tc) |
| 405 | { | 403 | { |
| 406 | struct bio *bio; | 404 | struct bio *bio; |
| 407 | unsigned long flags; | 405 | unsigned long flags; |
| @@ -409,15 +407,25 @@ static void error_retry_list(struct pool *pool) | |||
| 409 | 407 | ||
| 410 | bio_list_init(&bios); | 408 | bio_list_init(&bios); |
| 411 | 409 | ||
| 412 | spin_lock_irqsave(&pool->lock, flags); | 410 | spin_lock_irqsave(&tc->lock, flags); |
| 413 | bio_list_merge(&bios, &pool->retry_on_resume_list); | 411 | bio_list_merge(&bios, &tc->retry_on_resume_list); |
| 414 | bio_list_init(&pool->retry_on_resume_list); | 412 | bio_list_init(&tc->retry_on_resume_list); |
| 415 | spin_unlock_irqrestore(&pool->lock, flags); | 413 | spin_unlock_irqrestore(&tc->lock, flags); |
| 416 | 414 | ||
| 417 | while ((bio = bio_list_pop(&bios))) | 415 | while ((bio = bio_list_pop(&bios))) |
| 418 | bio_io_error(bio); | 416 | bio_io_error(bio); |
| 419 | } | 417 | } |
| 420 | 418 | ||
| 419 | static void error_retry_list(struct pool *pool) | ||
| 420 | { | ||
| 421 | struct thin_c *tc; | ||
| 422 | |||
| 423 | rcu_read_lock(); | ||
| 424 | list_for_each_entry_rcu(tc, &pool->active_thins, list) | ||
| 425 | error_thin_retry_list(tc); | ||
| 426 | rcu_read_unlock(); | ||
| 427 | } | ||
| 428 | |||
| 421 | /* | 429 | /* |
| 422 | * This section of code contains the logic for processing a thin device's IO. | 430 | * This section of code contains the logic for processing a thin device's IO. |
| 423 | * Much of the code depends on pool object resources (lists, workqueues, etc) | 431 | * Much of the code depends on pool object resources (lists, workqueues, etc) |
| @@ -608,9 +616,9 @@ static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell) | |||
| 608 | struct pool *pool = tc->pool; | 616 | struct pool *pool = tc->pool; |
| 609 | unsigned long flags; | 617 | unsigned long flags; |
| 610 | 618 | ||
| 611 | spin_lock_irqsave(&pool->lock, flags); | 619 | spin_lock_irqsave(&tc->lock, flags); |
| 612 | cell_release(pool, cell, &pool->deferred_bios); | 620 | cell_release(pool, cell, &tc->deferred_bio_list); |
| 613 | spin_unlock_irqrestore(&tc->pool->lock, flags); | 621 | spin_unlock_irqrestore(&tc->lock, flags); |
| 614 | 622 | ||
| 615 | wake_worker(pool); | 623 | wake_worker(pool); |
| 616 | } | 624 | } |
| @@ -623,9 +631,9 @@ static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *c | |||
| 623 | struct pool *pool = tc->pool; | 631 | struct pool *pool = tc->pool; |
| 624 | unsigned long flags; | 632 | unsigned long flags; |
| 625 | 633 | ||
| 626 | spin_lock_irqsave(&pool->lock, flags); | 634 | spin_lock_irqsave(&tc->lock, flags); |
| 627 | cell_release_no_holder(pool, cell, &pool->deferred_bios); | 635 | cell_release_no_holder(pool, cell, &tc->deferred_bio_list); |
| 628 | spin_unlock_irqrestore(&pool->lock, flags); | 636 | spin_unlock_irqrestore(&tc->lock, flags); |
| 629 | 637 | ||
| 630 | wake_worker(pool); | 638 | wake_worker(pool); |
| 631 | } | 639 | } |
| @@ -1001,12 +1009,11 @@ static void retry_on_resume(struct bio *bio) | |||
| 1001 | { | 1009 | { |
| 1002 | struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); | 1010 | struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); |
| 1003 | struct thin_c *tc = h->tc; | 1011 | struct thin_c *tc = h->tc; |
| 1004 | struct pool *pool = tc->pool; | ||
| 1005 | unsigned long flags; | 1012 | unsigned long flags; |
| 1006 | 1013 | ||
| 1007 | spin_lock_irqsave(&pool->lock, flags); | 1014 | spin_lock_irqsave(&tc->lock, flags); |
| 1008 | bio_list_add(&pool->retry_on_resume_list, bio); | 1015 | bio_list_add(&tc->retry_on_resume_list, bio); |
| 1009 | spin_unlock_irqrestore(&pool->lock, flags); | 1016 | spin_unlock_irqrestore(&tc->lock, flags); |
| 1010 | } | 1017 | } |
| 1011 | 1018 | ||
| 1012 | static bool should_error_unserviceable_bio(struct pool *pool) | 1019 | static bool should_error_unserviceable_bio(struct pool *pool) |
| @@ -1363,38 +1370,111 @@ static int need_commit_due_to_time(struct pool *pool) | |||
| 1363 | jiffies > pool->last_commit_jiffies + COMMIT_PERIOD; | 1370 | jiffies > pool->last_commit_jiffies + COMMIT_PERIOD; |
| 1364 | } | 1371 | } |
| 1365 | 1372 | ||
| 1366 | static void process_deferred_bios(struct pool *pool) | 1373 | #define thin_pbd(node) rb_entry((node), struct dm_thin_endio_hook, rb_node) |
| 1374 | #define thin_bio(pbd) dm_bio_from_per_bio_data((pbd), sizeof(struct dm_thin_endio_hook)) | ||
| 1375 | |||
| 1376 | static void __thin_bio_rb_add(struct thin_c *tc, struct bio *bio) | ||
| 1377 | { | ||
| 1378 | struct rb_node **rbp, *parent; | ||
| 1379 | struct dm_thin_endio_hook *pbd; | ||
| 1380 | sector_t bi_sector = bio->bi_iter.bi_sector; | ||
| 1381 | |||
| 1382 | rbp = &tc->sort_bio_list.rb_node; | ||
| 1383 | parent = NULL; | ||
| 1384 | while (*rbp) { | ||
| 1385 | parent = *rbp; | ||
| 1386 | pbd = thin_pbd(parent); | ||
| 1387 | |||
| 1388 | if (bi_sector < thin_bio(pbd)->bi_iter.bi_sector) | ||
| 1389 | rbp = &(*rbp)->rb_left; | ||
| 1390 | else | ||
| 1391 | rbp = &(*rbp)->rb_right; | ||
| 1392 | } | ||
| 1393 | |||
| 1394 | pbd = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); | ||
| 1395 | rb_link_node(&pbd->rb_node, parent, rbp); | ||
| 1396 | rb_insert_color(&pbd->rb_node, &tc->sort_bio_list); | ||
| 1397 | } | ||
| 1398 | |||
| 1399 | static void __extract_sorted_bios(struct thin_c *tc) | ||
| 1400 | { | ||
| 1401 | struct rb_node *node; | ||
| 1402 | struct dm_thin_endio_hook *pbd; | ||
| 1403 | struct bio *bio; | ||
| 1404 | |||
| 1405 | for (node = rb_first(&tc->sort_bio_list); node; node = rb_next(node)) { | ||
| 1406 | pbd = thin_pbd(node); | ||
| 1407 | bio = thin_bio(pbd); | ||
| 1408 | |||
| 1409 | bio_list_add(&tc->deferred_bio_list, bio); | ||
| 1410 | rb_erase(&pbd->rb_node, &tc->sort_bio_list); | ||
| 1411 | } | ||
| 1412 | |||
| 1413 | WARN_ON(!RB_EMPTY_ROOT(&tc->sort_bio_list)); | ||
| 1414 | } | ||
| 1415 | |||
| 1416 | static void __sort_thin_deferred_bios(struct thin_c *tc) | ||
| 1417 | { | ||
| 1418 | struct bio *bio; | ||
| 1419 | struct bio_list bios; | ||
| 1420 | |||
| 1421 | bio_list_init(&bios); | ||
| 1422 | bio_list_merge(&bios, &tc->deferred_bio_list); | ||
| 1423 | bio_list_init(&tc->deferred_bio_list); | ||
| 1424 | |||
| 1425 | /* Sort deferred_bio_list using rb-tree */ | ||
| 1426 | while ((bio = bio_list_pop(&bios))) | ||
| 1427 | __thin_bio_rb_add(tc, bio); | ||
| 1428 | |||
| 1429 | /* | ||
| 1430 | * Transfer the sorted bios in sort_bio_list back to | ||
| 1431 | * deferred_bio_list to allow lockless submission of | ||
| 1432 | * all bios. | ||
| 1433 | */ | ||
| 1434 | __extract_sorted_bios(tc); | ||
| 1435 | } | ||
| 1436 | |||
| 1437 | static void process_thin_deferred_bios(struct thin_c *tc) | ||
| 1367 | { | 1438 | { |
| 1439 | struct pool *pool = tc->pool; | ||
| 1368 | unsigned long flags; | 1440 | unsigned long flags; |
| 1369 | struct bio *bio; | 1441 | struct bio *bio; |
| 1370 | struct bio_list bios; | 1442 | struct bio_list bios; |
| 1443 | struct blk_plug plug; | ||
| 1444 | |||
| 1445 | if (tc->requeue_mode) { | ||
| 1446 | requeue_bio_list(tc, &tc->deferred_bio_list); | ||
| 1447 | return; | ||
| 1448 | } | ||
| 1371 | 1449 | ||
| 1372 | bio_list_init(&bios); | 1450 | bio_list_init(&bios); |
| 1373 | 1451 | ||
| 1374 | spin_lock_irqsave(&pool->lock, flags); | 1452 | spin_lock_irqsave(&tc->lock, flags); |
| 1375 | bio_list_merge(&bios, &pool->deferred_bios); | ||
| 1376 | bio_list_init(&pool->deferred_bios); | ||
| 1377 | spin_unlock_irqrestore(&pool->lock, flags); | ||
| 1378 | 1453 | ||
| 1379 | while ((bio = bio_list_pop(&bios))) { | 1454 | if (bio_list_empty(&tc->deferred_bio_list)) { |
| 1380 | struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); | 1455 | spin_unlock_irqrestore(&tc->lock, flags); |
| 1381 | struct thin_c *tc = h->tc; | 1456 | return; |
| 1457 | } | ||
| 1382 | 1458 | ||
| 1383 | if (tc->requeue_mode) { | 1459 | __sort_thin_deferred_bios(tc); |
| 1384 | bio_endio(bio, DM_ENDIO_REQUEUE); | 1460 | |
| 1385 | continue; | 1461 | bio_list_merge(&bios, &tc->deferred_bio_list); |
| 1386 | } | 1462 | bio_list_init(&tc->deferred_bio_list); |
| 1387 | 1463 | ||
| 1464 | spin_unlock_irqrestore(&tc->lock, flags); | ||
| 1465 | |||
| 1466 | blk_start_plug(&plug); | ||
| 1467 | while ((bio = bio_list_pop(&bios))) { | ||
| 1388 | /* | 1468 | /* |
| 1389 | * If we've got no free new_mapping structs, and processing | 1469 | * If we've got no free new_mapping structs, and processing |
| 1390 | * this bio might require one, we pause until there are some | 1470 | * this bio might require one, we pause until there are some |
| 1391 | * prepared mappings to process. | 1471 | * prepared mappings to process. |
| 1392 | */ | 1472 | */ |
| 1393 | if (ensure_next_mapping(pool)) { | 1473 | if (ensure_next_mapping(pool)) { |
| 1394 | spin_lock_irqsave(&pool->lock, flags); | 1474 | spin_lock_irqsave(&tc->lock, flags); |
| 1395 | bio_list_merge(&pool->deferred_bios, &bios); | 1475 | bio_list_add(&tc->deferred_bio_list, bio); |
| 1396 | spin_unlock_irqrestore(&pool->lock, flags); | 1476 | bio_list_merge(&tc->deferred_bio_list, &bios); |
| 1397 | 1477 | spin_unlock_irqrestore(&tc->lock, flags); | |
| 1398 | break; | 1478 | break; |
| 1399 | } | 1479 | } |
| 1400 | 1480 | ||
| @@ -1403,6 +1483,20 @@ static void process_deferred_bios(struct pool *pool) | |||
| 1403 | else | 1483 | else |
| 1404 | pool->process_bio(tc, bio); | 1484 | pool->process_bio(tc, bio); |
| 1405 | } | 1485 | } |
| 1486 | blk_finish_plug(&plug); | ||
| 1487 | } | ||
| 1488 | |||
| 1489 | static void process_deferred_bios(struct pool *pool) | ||
| 1490 | { | ||
| 1491 | unsigned long flags; | ||
| 1492 | struct bio *bio; | ||
| 1493 | struct bio_list bios; | ||
| 1494 | struct thin_c *tc; | ||
| 1495 | |||
| 1496 | rcu_read_lock(); | ||
| 1497 | list_for_each_entry_rcu(tc, &pool->active_thins, list) | ||
| 1498 | process_thin_deferred_bios(tc); | ||
| 1499 | rcu_read_unlock(); | ||
| 1406 | 1500 | ||
| 1407 | /* | 1501 | /* |
| 1408 | * If there are any deferred flush bios, we must commit | 1502 | * If there are any deferred flush bios, we must commit |
| @@ -1634,9 +1728,9 @@ static void thin_defer_bio(struct thin_c *tc, struct bio *bio) | |||
| 1634 | unsigned long flags; | 1728 | unsigned long flags; |
| 1635 | struct pool *pool = tc->pool; | 1729 | struct pool *pool = tc->pool; |
| 1636 | 1730 | ||
| 1637 | spin_lock_irqsave(&pool->lock, flags); | 1731 | spin_lock_irqsave(&tc->lock, flags); |
| 1638 | bio_list_add(&pool->deferred_bios, bio); | 1732 | bio_list_add(&tc->deferred_bio_list, bio); |
| 1639 | spin_unlock_irqrestore(&pool->lock, flags); | 1733 | spin_unlock_irqrestore(&tc->lock, flags); |
| 1640 | 1734 | ||
| 1641 | wake_worker(pool); | 1735 | wake_worker(pool); |
| 1642 | } | 1736 | } |
| @@ -1757,26 +1851,29 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) | |||
| 1757 | 1851 | ||
| 1758 | static int pool_is_congested(struct dm_target_callbacks *cb, int bdi_bits) | 1852 | static int pool_is_congested(struct dm_target_callbacks *cb, int bdi_bits) |
| 1759 | { | 1853 | { |
| 1760 | int r; | ||
| 1761 | unsigned long flags; | ||
| 1762 | struct pool_c *pt = container_of(cb, struct pool_c, callbacks); | 1854 | struct pool_c *pt = container_of(cb, struct pool_c, callbacks); |
| 1855 | struct request_queue *q; | ||
| 1763 | 1856 | ||
| 1764 | spin_lock_irqsave(&pt->pool->lock, flags); | 1857 | if (get_pool_mode(pt->pool) == PM_OUT_OF_DATA_SPACE) |
| 1765 | r = !bio_list_empty(&pt->pool->retry_on_resume_list); | 1858 | return 1; |
| 1766 | spin_unlock_irqrestore(&pt->pool->lock, flags); | ||
| 1767 | 1859 | ||
| 1768 | if (!r) { | 1860 | q = bdev_get_queue(pt->data_dev->bdev); |
| 1769 | struct request_queue *q = bdev_get_queue(pt->data_dev->bdev); | 1861 | return bdi_congested(&q->backing_dev_info, bdi_bits); |
| 1770 | r = bdi_congested(&q->backing_dev_info, bdi_bits); | ||
| 1771 | } | ||
| 1772 | |||
| 1773 | return r; | ||
| 1774 | } | 1862 | } |
| 1775 | 1863 | ||
| 1776 | static void __requeue_bios(struct pool *pool) | 1864 | static void requeue_bios(struct pool *pool) |
| 1777 | { | 1865 | { |
| 1778 | bio_list_merge(&pool->deferred_bios, &pool->retry_on_resume_list); | 1866 | unsigned long flags; |
| 1779 | bio_list_init(&pool->retry_on_resume_list); | 1867 | struct thin_c *tc; |
| 1868 | |||
| 1869 | rcu_read_lock(); | ||
| 1870 | list_for_each_entry_rcu(tc, &pool->active_thins, list) { | ||
| 1871 | spin_lock_irqsave(&tc->lock, flags); | ||
| 1872 | bio_list_merge(&tc->deferred_bio_list, &tc->retry_on_resume_list); | ||
| 1873 | bio_list_init(&tc->retry_on_resume_list); | ||
| 1874 | spin_unlock_irqrestore(&tc->lock, flags); | ||
| 1875 | } | ||
| 1876 | rcu_read_unlock(); | ||
| 1780 | } | 1877 | } |
| 1781 | 1878 | ||
| 1782 | /*---------------------------------------------------------------- | 1879 | /*---------------------------------------------------------------- |
| @@ -1957,12 +2054,11 @@ static struct pool *pool_create(struct mapped_device *pool_md, | |||
| 1957 | INIT_WORK(&pool->worker, do_worker); | 2054 | INIT_WORK(&pool->worker, do_worker); |
| 1958 | INIT_DELAYED_WORK(&pool->waker, do_waker); | 2055 | INIT_DELAYED_WORK(&pool->waker, do_waker); |
| 1959 | spin_lock_init(&pool->lock); | 2056 | spin_lock_init(&pool->lock); |
| 1960 | bio_list_init(&pool->deferred_bios); | ||
| 1961 | bio_list_init(&pool->deferred_flush_bios); | 2057 | bio_list_init(&pool->deferred_flush_bios); |
| 1962 | INIT_LIST_HEAD(&pool->prepared_mappings); | 2058 | INIT_LIST_HEAD(&pool->prepared_mappings); |
| 1963 | INIT_LIST_HEAD(&pool->prepared_discards); | 2059 | INIT_LIST_HEAD(&pool->prepared_discards); |
| 2060 | INIT_LIST_HEAD(&pool->active_thins); | ||
| 1964 | pool->low_water_triggered = false; | 2061 | pool->low_water_triggered = false; |
| 1965 | bio_list_init(&pool->retry_on_resume_list); | ||
| 1966 | 2062 | ||
| 1967 | pool->shared_read_ds = dm_deferred_set_create(); | 2063 | pool->shared_read_ds = dm_deferred_set_create(); |
| 1968 | if (!pool->shared_read_ds) { | 2064 | if (!pool->shared_read_ds) { |
| @@ -2507,8 +2603,8 @@ static void pool_resume(struct dm_target *ti) | |||
| 2507 | 2603 | ||
| 2508 | spin_lock_irqsave(&pool->lock, flags); | 2604 | spin_lock_irqsave(&pool->lock, flags); |
| 2509 | pool->low_water_triggered = false; | 2605 | pool->low_water_triggered = false; |
| 2510 | __requeue_bios(pool); | ||
| 2511 | spin_unlock_irqrestore(&pool->lock, flags); | 2606 | spin_unlock_irqrestore(&pool->lock, flags); |
| 2607 | requeue_bios(pool); | ||
| 2512 | 2608 | ||
| 2513 | do_waker(&pool->waker.work); | 2609 | do_waker(&pool->waker.work); |
| 2514 | } | 2610 | } |
| @@ -2947,7 +3043,7 @@ static struct target_type pool_target = { | |||
| 2947 | .name = "thin-pool", | 3043 | .name = "thin-pool", |
| 2948 | .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | | 3044 | .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | |
| 2949 | DM_TARGET_IMMUTABLE, | 3045 | DM_TARGET_IMMUTABLE, |
| 2950 | .version = {1, 11, 0}, | 3046 | .version = {1, 12, 0}, |
| 2951 | .module = THIS_MODULE, | 3047 | .module = THIS_MODULE, |
| 2952 | .ctr = pool_ctr, | 3048 | .ctr = pool_ctr, |
| 2953 | .dtr = pool_dtr, | 3049 | .dtr = pool_dtr, |
| @@ -2968,6 +3064,12 @@ static struct target_type pool_target = { | |||
| 2968 | static void thin_dtr(struct dm_target *ti) | 3064 | static void thin_dtr(struct dm_target *ti) |
| 2969 | { | 3065 | { |
| 2970 | struct thin_c *tc = ti->private; | 3066 | struct thin_c *tc = ti->private; |
| 3067 | unsigned long flags; | ||
| 3068 | |||
| 3069 | spin_lock_irqsave(&tc->pool->lock, flags); | ||
| 3070 | list_del_rcu(&tc->list); | ||
| 3071 | spin_unlock_irqrestore(&tc->pool->lock, flags); | ||
| 3072 | synchronize_rcu(); | ||
| 2971 | 3073 | ||
| 2972 | mutex_lock(&dm_thin_pool_table.mutex); | 3074 | mutex_lock(&dm_thin_pool_table.mutex); |
| 2973 | 3075 | ||
| @@ -3014,6 +3116,10 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
| 3014 | r = -ENOMEM; | 3116 | r = -ENOMEM; |
| 3015 | goto out_unlock; | 3117 | goto out_unlock; |
| 3016 | } | 3118 | } |
| 3119 | spin_lock_init(&tc->lock); | ||
| 3120 | bio_list_init(&tc->deferred_bio_list); | ||
| 3121 | bio_list_init(&tc->retry_on_resume_list); | ||
| 3122 | tc->sort_bio_list = RB_ROOT; | ||
| 3017 | 3123 | ||
| 3018 | if (argc == 3) { | 3124 | if (argc == 3) { |
| 3019 | r = dm_get_device(ti, argv[2], FMODE_READ, &origin_dev); | 3125 | r = dm_get_device(ti, argv[2], FMODE_READ, &origin_dev); |
| @@ -3085,6 +3191,17 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
| 3085 | 3191 | ||
| 3086 | mutex_unlock(&dm_thin_pool_table.mutex); | 3192 | mutex_unlock(&dm_thin_pool_table.mutex); |
| 3087 | 3193 | ||
| 3194 | spin_lock(&tc->pool->lock); | ||
| 3195 | list_add_tail_rcu(&tc->list, &tc->pool->active_thins); | ||
| 3196 | spin_unlock(&tc->pool->lock); | ||
| 3197 | /* | ||
| 3198 | * This synchronize_rcu() call is needed here otherwise we risk a | ||
| 3199 | * wake_worker() call finding no bios to process (because the newly | ||
| 3200 | * added tc isn't yet visible). So this reduces latency since we | ||
| 3201 | * aren't then dependent on the periodic commit to wake_worker(). | ||
| 3202 | */ | ||
| 3203 | synchronize_rcu(); | ||
| 3204 | |||
| 3088 | return 0; | 3205 | return 0; |
| 3089 | 3206 | ||
| 3090 | bad_target_max_io_len: | 3207 | bad_target_max_io_len: |
| @@ -3250,7 +3367,7 @@ static int thin_iterate_devices(struct dm_target *ti, | |||
| 3250 | 3367 | ||
| 3251 | static struct target_type thin_target = { | 3368 | static struct target_type thin_target = { |
| 3252 | .name = "thin", | 3369 | .name = "thin", |
| 3253 | .version = {1, 11, 0}, | 3370 | .version = {1, 12, 0}, |
| 3254 | .module = THIS_MODULE, | 3371 | .module = THIS_MODULE, |
| 3255 | .ctr = thin_ctr, | 3372 | .ctr = thin_ctr, |
| 3256 | .dtr = thin_dtr, | 3373 | .dtr = thin_dtr, |
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 8c53b09b9a2c..455e64916498 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
| @@ -94,13 +94,6 @@ struct dm_rq_clone_bio_info { | |||
| 94 | struct bio clone; | 94 | struct bio clone; |
| 95 | }; | 95 | }; |
| 96 | 96 | ||
| 97 | union map_info *dm_get_mapinfo(struct bio *bio) | ||
| 98 | { | ||
| 99 | if (bio && bio->bi_private) | ||
| 100 | return &((struct dm_target_io *)bio->bi_private)->info; | ||
| 101 | return NULL; | ||
| 102 | } | ||
| 103 | |||
| 104 | union map_info *dm_get_rq_mapinfo(struct request *rq) | 97 | union map_info *dm_get_rq_mapinfo(struct request *rq) |
| 105 | { | 98 | { |
| 106 | if (rq && rq->end_io_data) | 99 | if (rq && rq->end_io_data) |
| @@ -475,6 +468,11 @@ sector_t dm_get_size(struct mapped_device *md) | |||
| 475 | return get_capacity(md->disk); | 468 | return get_capacity(md->disk); |
| 476 | } | 469 | } |
| 477 | 470 | ||
| 471 | struct request_queue *dm_get_md_queue(struct mapped_device *md) | ||
| 472 | { | ||
| 473 | return md->queue; | ||
| 474 | } | ||
| 475 | |||
| 478 | struct dm_stats *dm_get_stats(struct mapped_device *md) | 476 | struct dm_stats *dm_get_stats(struct mapped_device *md) |
| 479 | { | 477 | { |
| 480 | return &md->stats; | 478 | return &md->stats; |
| @@ -760,7 +758,7 @@ static void dec_pending(struct dm_io *io, int error) | |||
| 760 | static void clone_endio(struct bio *bio, int error) | 758 | static void clone_endio(struct bio *bio, int error) |
| 761 | { | 759 | { |
| 762 | int r = 0; | 760 | int r = 0; |
| 763 | struct dm_target_io *tio = bio->bi_private; | 761 | struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); |
| 764 | struct dm_io *io = tio->io; | 762 | struct dm_io *io = tio->io; |
| 765 | struct mapped_device *md = tio->io->md; | 763 | struct mapped_device *md = tio->io->md; |
| 766 | dm_endio_fn endio = tio->ti->type->end_io; | 764 | dm_endio_fn endio = tio->ti->type->end_io; |
| @@ -794,7 +792,8 @@ static void clone_endio(struct bio *bio, int error) | |||
| 794 | */ | 792 | */ |
| 795 | static void end_clone_bio(struct bio *clone, int error) | 793 | static void end_clone_bio(struct bio *clone, int error) |
| 796 | { | 794 | { |
| 797 | struct dm_rq_clone_bio_info *info = clone->bi_private; | 795 | struct dm_rq_clone_bio_info *info = |
| 796 | container_of(clone, struct dm_rq_clone_bio_info, clone); | ||
| 798 | struct dm_rq_target_io *tio = info->tio; | 797 | struct dm_rq_target_io *tio = info->tio; |
| 799 | struct bio *bio = info->orig; | 798 | struct bio *bio = info->orig; |
| 800 | unsigned int nr_bytes = info->orig->bi_iter.bi_size; | 799 | unsigned int nr_bytes = info->orig->bi_iter.bi_size; |
| @@ -1120,7 +1119,6 @@ static void __map_bio(struct dm_target_io *tio) | |||
| 1120 | struct dm_target *ti = tio->ti; | 1119 | struct dm_target *ti = tio->ti; |
| 1121 | 1120 | ||
| 1122 | clone->bi_end_io = clone_endio; | 1121 | clone->bi_end_io = clone_endio; |
| 1123 | clone->bi_private = tio; | ||
| 1124 | 1122 | ||
| 1125 | /* | 1123 | /* |
| 1126 | * Map the clone. If r == 0 we don't need to do | 1124 | * Map the clone. If r == 0 we don't need to do |
| @@ -1195,7 +1193,6 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, | |||
| 1195 | 1193 | ||
| 1196 | tio->io = ci->io; | 1194 | tio->io = ci->io; |
| 1197 | tio->ti = ti; | 1195 | tio->ti = ti; |
| 1198 | memset(&tio->info, 0, sizeof(tio->info)); | ||
| 1199 | tio->target_bio_nr = target_bio_nr; | 1196 | tio->target_bio_nr = target_bio_nr; |
| 1200 | 1197 | ||
| 1201 | return tio; | 1198 | return tio; |
| @@ -1530,7 +1527,6 @@ static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, | |||
| 1530 | info->orig = bio_orig; | 1527 | info->orig = bio_orig; |
| 1531 | info->tio = tio; | 1528 | info->tio = tio; |
| 1532 | bio->bi_end_io = end_clone_bio; | 1529 | bio->bi_end_io = end_clone_bio; |
| 1533 | bio->bi_private = info; | ||
| 1534 | 1530 | ||
| 1535 | return 0; | 1531 | return 0; |
| 1536 | } | 1532 | } |
| @@ -2172,7 +2168,7 @@ static struct dm_table *__unbind(struct mapped_device *md) | |||
| 2172 | return NULL; | 2168 | return NULL; |
| 2173 | 2169 | ||
| 2174 | dm_table_event_callback(map, NULL, NULL); | 2170 | dm_table_event_callback(map, NULL, NULL); |
| 2175 | rcu_assign_pointer(md->map, NULL); | 2171 | RCU_INIT_POINTER(md->map, NULL); |
| 2176 | dm_sync_table(md); | 2172 | dm_sync_table(md); |
| 2177 | 2173 | ||
| 2178 | return map; | 2174 | return map; |
| @@ -2873,8 +2869,6 @@ static const struct block_device_operations dm_blk_dops = { | |||
| 2873 | .owner = THIS_MODULE | 2869 | .owner = THIS_MODULE |
| 2874 | }; | 2870 | }; |
| 2875 | 2871 | ||
| 2876 | EXPORT_SYMBOL(dm_get_mapinfo); | ||
| 2877 | |||
| 2878 | /* | 2872 | /* |
| 2879 | * module hooks | 2873 | * module hooks |
| 2880 | */ | 2874 | */ |
diff --git a/drivers/md/dm.h b/drivers/md/dm.h index c4569f02f50f..ed76126aac54 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h | |||
| @@ -73,7 +73,6 @@ unsigned dm_table_get_type(struct dm_table *t); | |||
| 73 | struct target_type *dm_table_get_immutable_target_type(struct dm_table *t); | 73 | struct target_type *dm_table_get_immutable_target_type(struct dm_table *t); |
| 74 | bool dm_table_request_based(struct dm_table *t); | 74 | bool dm_table_request_based(struct dm_table *t); |
| 75 | bool dm_table_supports_discards(struct dm_table *t); | 75 | bool dm_table_supports_discards(struct dm_table *t); |
| 76 | int dm_table_alloc_md_mempools(struct dm_table *t); | ||
| 77 | void dm_table_free_md_mempools(struct dm_table *t); | 76 | void dm_table_free_md_mempools(struct dm_table *t); |
| 78 | struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t); | 77 | struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t); |
| 79 | 78 | ||
| @@ -189,6 +188,7 @@ int dm_lock_for_deletion(struct mapped_device *md, bool mark_deferred, bool only | |||
| 189 | int dm_cancel_deferred_remove(struct mapped_device *md); | 188 | int dm_cancel_deferred_remove(struct mapped_device *md); |
| 190 | int dm_request_based(struct mapped_device *md); | 189 | int dm_request_based(struct mapped_device *md); |
| 191 | sector_t dm_get_size(struct mapped_device *md); | 190 | sector_t dm_get_size(struct mapped_device *md); |
| 191 | struct request_queue *dm_get_md_queue(struct mapped_device *md); | ||
| 192 | struct dm_stats *dm_get_stats(struct mapped_device *md); | 192 | struct dm_stats *dm_get_stats(struct mapped_device *md); |
| 193 | 193 | ||
| 194 | int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, | 194 | int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, |
diff --git a/drivers/md/persistent-data/dm-bitset.c b/drivers/md/persistent-data/dm-bitset.c index cd9a86d4cdf0..36f7cc2c7109 100644 --- a/drivers/md/persistent-data/dm-bitset.c +++ b/drivers/md/persistent-data/dm-bitset.c | |||
| @@ -65,7 +65,7 @@ int dm_bitset_flush(struct dm_disk_bitset *info, dm_block_t root, | |||
| 65 | int r; | 65 | int r; |
| 66 | __le64 value; | 66 | __le64 value; |
| 67 | 67 | ||
| 68 | if (!info->current_index_set) | 68 | if (!info->current_index_set || !info->dirty) |
| 69 | return 0; | 69 | return 0; |
| 70 | 70 | ||
| 71 | value = cpu_to_le64(info->current_bits); | 71 | value = cpu_to_le64(info->current_bits); |
| @@ -77,6 +77,8 @@ int dm_bitset_flush(struct dm_disk_bitset *info, dm_block_t root, | |||
| 77 | return r; | 77 | return r; |
| 78 | 78 | ||
| 79 | info->current_index_set = false; | 79 | info->current_index_set = false; |
| 80 | info->dirty = false; | ||
| 81 | |||
| 80 | return 0; | 82 | return 0; |
| 81 | } | 83 | } |
| 82 | EXPORT_SYMBOL_GPL(dm_bitset_flush); | 84 | EXPORT_SYMBOL_GPL(dm_bitset_flush); |
| @@ -94,6 +96,8 @@ static int read_bits(struct dm_disk_bitset *info, dm_block_t root, | |||
| 94 | info->current_bits = le64_to_cpu(value); | 96 | info->current_bits = le64_to_cpu(value); |
| 95 | info->current_index_set = true; | 97 | info->current_index_set = true; |
| 96 | info->current_index = array_index; | 98 | info->current_index = array_index; |
| 99 | info->dirty = false; | ||
| 100 | |||
| 97 | return 0; | 101 | return 0; |
| 98 | } | 102 | } |
| 99 | 103 | ||
| @@ -126,6 +130,8 @@ int dm_bitset_set_bit(struct dm_disk_bitset *info, dm_block_t root, | |||
| 126 | return r; | 130 | return r; |
| 127 | 131 | ||
| 128 | set_bit(b, (unsigned long *) &info->current_bits); | 132 | set_bit(b, (unsigned long *) &info->current_bits); |
| 133 | info->dirty = true; | ||
| 134 | |||
| 129 | return 0; | 135 | return 0; |
| 130 | } | 136 | } |
| 131 | EXPORT_SYMBOL_GPL(dm_bitset_set_bit); | 137 | EXPORT_SYMBOL_GPL(dm_bitset_set_bit); |
| @@ -141,6 +147,8 @@ int dm_bitset_clear_bit(struct dm_disk_bitset *info, dm_block_t root, | |||
| 141 | return r; | 147 | return r; |
| 142 | 148 | ||
| 143 | clear_bit(b, (unsigned long *) &info->current_bits); | 149 | clear_bit(b, (unsigned long *) &info->current_bits); |
| 150 | info->dirty = true; | ||
| 151 | |||
| 144 | return 0; | 152 | return 0; |
| 145 | } | 153 | } |
| 146 | EXPORT_SYMBOL_GPL(dm_bitset_clear_bit); | 154 | EXPORT_SYMBOL_GPL(dm_bitset_clear_bit); |
diff --git a/drivers/md/persistent-data/dm-bitset.h b/drivers/md/persistent-data/dm-bitset.h index e1b9bea14aa1..c2287d672ef5 100644 --- a/drivers/md/persistent-data/dm-bitset.h +++ b/drivers/md/persistent-data/dm-bitset.h | |||
| @@ -71,6 +71,7 @@ struct dm_disk_bitset { | |||
| 71 | uint64_t current_bits; | 71 | uint64_t current_bits; |
| 72 | 72 | ||
| 73 | bool current_index_set:1; | 73 | bool current_index_set:1; |
| 74 | bool dirty:1; | ||
| 74 | }; | 75 | }; |
| 75 | 76 | ||
| 76 | /* | 77 | /* |
diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index 455f79279a16..087411c95ffc 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c | |||
| @@ -595,25 +595,14 @@ int dm_bm_unlock(struct dm_block *b) | |||
| 595 | } | 595 | } |
| 596 | EXPORT_SYMBOL_GPL(dm_bm_unlock); | 596 | EXPORT_SYMBOL_GPL(dm_bm_unlock); |
| 597 | 597 | ||
| 598 | int dm_bm_flush_and_unlock(struct dm_block_manager *bm, | 598 | int dm_bm_flush(struct dm_block_manager *bm) |
| 599 | struct dm_block *superblock) | ||
| 600 | { | 599 | { |
| 601 | int r; | ||
| 602 | |||
| 603 | if (bm->read_only) | 600 | if (bm->read_only) |
| 604 | return -EPERM; | 601 | return -EPERM; |
| 605 | 602 | ||
| 606 | r = dm_bufio_write_dirty_buffers(bm->bufio); | ||
| 607 | if (unlikely(r)) { | ||
| 608 | dm_bm_unlock(superblock); | ||
| 609 | return r; | ||
| 610 | } | ||
| 611 | |||
| 612 | dm_bm_unlock(superblock); | ||
| 613 | |||
| 614 | return dm_bufio_write_dirty_buffers(bm->bufio); | 603 | return dm_bufio_write_dirty_buffers(bm->bufio); |
| 615 | } | 604 | } |
| 616 | EXPORT_SYMBOL_GPL(dm_bm_flush_and_unlock); | 605 | EXPORT_SYMBOL_GPL(dm_bm_flush); |
| 617 | 606 | ||
| 618 | void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b) | 607 | void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b) |
| 619 | { | 608 | { |
diff --git a/drivers/md/persistent-data/dm-block-manager.h b/drivers/md/persistent-data/dm-block-manager.h index 13cd58e1fe69..1b95dfc17786 100644 --- a/drivers/md/persistent-data/dm-block-manager.h +++ b/drivers/md/persistent-data/dm-block-manager.h | |||
| @@ -105,8 +105,7 @@ int dm_bm_unlock(struct dm_block *b); | |||
| 105 | * | 105 | * |
| 106 | * This method always blocks. | 106 | * This method always blocks. |
| 107 | */ | 107 | */ |
| 108 | int dm_bm_flush_and_unlock(struct dm_block_manager *bm, | 108 | int dm_bm_flush(struct dm_block_manager *bm); |
| 109 | struct dm_block *superblock); | ||
| 110 | 109 | ||
| 111 | /* | 110 | /* |
| 112 | * Request data is prefetched into the cache. | 111 | * Request data is prefetched into the cache. |
diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c index 81da1a26042e..3bc30a0ae3d6 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.c +++ b/drivers/md/persistent-data/dm-transaction-manager.c | |||
| @@ -154,7 +154,7 @@ int dm_tm_pre_commit(struct dm_transaction_manager *tm) | |||
| 154 | if (r < 0) | 154 | if (r < 0) |
| 155 | return r; | 155 | return r; |
| 156 | 156 | ||
| 157 | return 0; | 157 | return dm_bm_flush(tm->bm); |
| 158 | } | 158 | } |
| 159 | EXPORT_SYMBOL_GPL(dm_tm_pre_commit); | 159 | EXPORT_SYMBOL_GPL(dm_tm_pre_commit); |
| 160 | 160 | ||
| @@ -164,8 +164,9 @@ int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *root) | |||
| 164 | return -EWOULDBLOCK; | 164 | return -EWOULDBLOCK; |
| 165 | 165 | ||
| 166 | wipe_shadow_table(tm); | 166 | wipe_shadow_table(tm); |
| 167 | dm_bm_unlock(root); | ||
| 167 | 168 | ||
| 168 | return dm_bm_flush_and_unlock(tm->bm, root); | 169 | return dm_bm_flush(tm->bm); |
| 169 | } | 170 | } |
| 170 | EXPORT_SYMBOL_GPL(dm_tm_commit); | 171 | EXPORT_SYMBOL_GPL(dm_tm_commit); |
| 171 | 172 | ||
diff --git a/drivers/md/persistent-data/dm-transaction-manager.h b/drivers/md/persistent-data/dm-transaction-manager.h index b5b139076ca5..2772ed2a781a 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.h +++ b/drivers/md/persistent-data/dm-transaction-manager.h | |||
| @@ -38,18 +38,17 @@ struct dm_transaction_manager *dm_tm_create_non_blocking_clone(struct dm_transac | |||
| 38 | /* | 38 | /* |
| 39 | * We use a 2-phase commit here. | 39 | * We use a 2-phase commit here. |
| 40 | * | 40 | * |
| 41 | * i) In the first phase the block manager is told to start flushing, and | 41 | * i) Make all changes for the transaction *except* for the superblock. |
| 42 | * the changes to the space map are written to disk. You should interrogate | 42 | * Then call dm_tm_pre_commit() to flush them to disk. |
| 43 | * your particular space map to get detail of its root node etc. to be | ||
| 44 | * included in your superblock. | ||
| 45 | * | 43 | * |
| 46 | * ii) @root will be committed last. You shouldn't use more than the | 44 | * ii) Lock your superblock. Update. Then call dm_tm_commit() which will |
| 47 | * first 512 bytes of @root if you wish the transaction to survive a power | 45 | * unlock the superblock and flush it. No other blocks should be updated |
| 48 | * failure. You *must* have a write lock held on @root for both stage (i) | 46 | * during this period. Care should be taken to never unlock a partially |
| 49 | * and (ii). The commit will drop the write lock. | 47 | * updated superblock; perform any operations that could fail *before* you |
| 48 | * take the superblock lock. | ||
| 50 | */ | 49 | */ |
| 51 | int dm_tm_pre_commit(struct dm_transaction_manager *tm); | 50 | int dm_tm_pre_commit(struct dm_transaction_manager *tm); |
| 52 | int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *root); | 51 | int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *superblock); |
| 53 | 52 | ||
| 54 | /* | 53 | /* |
| 55 | * These methods are the only way to get hold of a writeable block. | 54 | * These methods are the only way to get hold of a writeable block. |
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index ed419c62dde1..63da56ed9796 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h | |||
| @@ -23,7 +23,6 @@ typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t; | |||
| 23 | 23 | ||
| 24 | union map_info { | 24 | union map_info { |
| 25 | void *ptr; | 25 | void *ptr; |
| 26 | unsigned long long ll; | ||
| 27 | }; | 26 | }; |
| 28 | 27 | ||
| 29 | /* | 28 | /* |
| @@ -291,7 +290,6 @@ struct dm_target_callbacks { | |||
| 291 | struct dm_target_io { | 290 | struct dm_target_io { |
| 292 | struct dm_io *io; | 291 | struct dm_io *io; |
| 293 | struct dm_target *ti; | 292 | struct dm_target *ti; |
| 294 | union map_info info; | ||
| 295 | unsigned target_bio_nr; | 293 | unsigned target_bio_nr; |
| 296 | struct bio clone; | 294 | struct bio clone; |
| 297 | }; | 295 | }; |
| @@ -403,7 +401,6 @@ int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid); | |||
| 403 | struct gendisk *dm_disk(struct mapped_device *md); | 401 | struct gendisk *dm_disk(struct mapped_device *md); |
| 404 | int dm_suspended(struct dm_target *ti); | 402 | int dm_suspended(struct dm_target *ti); |
| 405 | int dm_noflush_suspending(struct dm_target *ti); | 403 | int dm_noflush_suspending(struct dm_target *ti); |
| 406 | union map_info *dm_get_mapinfo(struct bio *bio); | ||
| 407 | union map_info *dm_get_rq_mapinfo(struct request *rq); | 404 | union map_info *dm_get_rq_mapinfo(struct request *rq); |
| 408 | 405 | ||
| 409 | struct queue_limits *dm_get_queue_limits(struct mapped_device *md); | 406 | struct queue_limits *dm_get_queue_limits(struct mapped_device *md); |
| @@ -466,6 +463,11 @@ struct mapped_device *dm_table_get_md(struct dm_table *t); | |||
| 466 | void dm_table_event(struct dm_table *t); | 463 | void dm_table_event(struct dm_table *t); |
| 467 | 464 | ||
| 468 | /* | 465 | /* |
| 466 | * Run the queue for request-based targets. | ||
| 467 | */ | ||
| 468 | void dm_table_run_md_queue_async(struct dm_table *t); | ||
| 469 | |||
| 470 | /* | ||
| 469 | * The device must be suspended before calling this method. | 471 | * The device must be suspended before calling this method. |
| 470 | * Returns the previous table, which the caller must destroy. | 472 | * Returns the previous table, which the caller must destroy. |
| 471 | */ | 473 | */ |
