diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-21 15:11:41 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-21 15:11:41 -0500 |
commit | 7a771ceac771d009f7203c40b256b0608d7ea2f8 (patch) | |
tree | 940260bccb165f47669397515c00900629c01803 /drivers/md | |
parent | e67bd12d6036ae3de9eeb0ba52e43691264ec850 (diff) | |
parent | d67a5f4b5947aba4bfe9a80a2b86079c215ca755 (diff) |
Merge tag 'dm-4.11-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer:
- Fix dm-raid transient device failure processing and other smaller
tweaks.
- Add journal support to the DM raid target to close the 'write hole'
on raid 4/5/6.
- Fix dm-cache corruption, due to rounding bug, when cache exceeds 2TB.
- Add 'metadata2' feature to dm-cache to separate the dirty bitset out
from other cache metadata. This improves speed of shutting down a
large cache device (which implies writing out dirty bits).
- Fix a memory leak during dm-stats data structure destruction.
- Fix a DM multipath round-robin path selector performance regression
that was caused by less precise balancing across all paths.
- Lastly, introduce a DM core fix for a long-standing DM snapshot
deadlock that is rooted in the complexity of the device stack used in
conjunction with block core maintaining bios on current->bio_list to
manage recursion in generic_make_request(). A more comprehensive fix
to block core (and its hook in the cpu scheduler) would be wonderful
but this DM-specific fix is pragmatic considering how difficult it
has been to make progress on a generic fix.
* tag 'dm-4.11-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (22 commits)
dm: flush queued bios when process blocks to avoid deadlock
dm round robin: revert "use percpu 'repeat_count' and 'current_path'"
dm stats: fix a leaked s->histogram_boundaries array
dm space map metadata: constify dm_space_map structures
dm cache metadata: use cursor api in blocks_are_clean_separate_dirty()
dm persistent data: add cursor skip functions to the cursor APIs
dm cache metadata: use dm_bitset_new() to create the dirty bitset in format 2
dm bitset: add dm_bitset_new()
dm cache metadata: name the cache block that couldn't be loaded
dm cache metadata: add "metadata2" feature
dm cache metadata: use bitset cursor api to load discard bitset
dm bitset: introduce cursor api
dm btree: use GFP_NOFS in dm_btree_del()
dm space map common: memcpy the disk root to ensure it's arch aligned
dm block manager: add unlikely() annotations on dm_bufio error paths
dm cache: fix corruption seen when using cache > 2TB
dm raid: cleanup awkward branching in raid_message() option processing
dm raid: use mddev rather than rdev->mddev
dm raid: use read_disk_sb() throughout
dm raid: add raid4/5/6 journaling support
...
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/dm-cache-metadata.c | 353 | ||||
-rw-r--r-- | drivers/md/dm-cache-metadata.h | 11 | ||||
-rw-r--r-- | drivers/md/dm-cache-target.c | 44 | ||||
-rw-r--r-- | drivers/md/dm-raid.c | 296 | ||||
-rw-r--r-- | drivers/md/dm-round-robin.c | 67 | ||||
-rw-r--r-- | drivers/md/dm-stats.c | 1 | ||||
-rw-r--r-- | drivers/md/dm.c | 55 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-array.c | 21 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-array.h | 1 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-bitset.c | 146 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-bitset.h | 39 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-block-manager.c | 8 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-btree.c | 18 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-btree.h | 1 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-space-map-common.c | 16 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-space-map-metadata.c | 4 |
16 files changed, 854 insertions, 227 deletions
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 624fe4319b24..e4c2c1a1e993 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c | |||
@@ -25,7 +25,7 @@ | |||
25 | * defines a range of metadata versions that this module can handle. | 25 | * defines a range of metadata versions that this module can handle. |
26 | */ | 26 | */ |
27 | #define MIN_CACHE_VERSION 1 | 27 | #define MIN_CACHE_VERSION 1 |
28 | #define MAX_CACHE_VERSION 1 | 28 | #define MAX_CACHE_VERSION 2 |
29 | 29 | ||
30 | #define CACHE_METADATA_CACHE_SIZE 64 | 30 | #define CACHE_METADATA_CACHE_SIZE 64 |
31 | 31 | ||
@@ -55,6 +55,7 @@ enum mapping_bits { | |||
55 | 55 | ||
56 | /* | 56 | /* |
57 | * The data on the cache is different from that on the origin. | 57 | * The data on the cache is different from that on the origin. |
58 | * This flag is only used by metadata format 1. | ||
58 | */ | 59 | */ |
59 | M_DIRTY = 2 | 60 | M_DIRTY = 2 |
60 | }; | 61 | }; |
@@ -93,12 +94,18 @@ struct cache_disk_superblock { | |||
93 | __le32 write_misses; | 94 | __le32 write_misses; |
94 | 95 | ||
95 | __le32 policy_version[CACHE_POLICY_VERSION_SIZE]; | 96 | __le32 policy_version[CACHE_POLICY_VERSION_SIZE]; |
97 | |||
98 | /* | ||
99 | * Metadata format 2 fields. | ||
100 | */ | ||
101 | __le64 dirty_root; | ||
96 | } __packed; | 102 | } __packed; |
97 | 103 | ||
98 | struct dm_cache_metadata { | 104 | struct dm_cache_metadata { |
99 | atomic_t ref_count; | 105 | atomic_t ref_count; |
100 | struct list_head list; | 106 | struct list_head list; |
101 | 107 | ||
108 | unsigned version; | ||
102 | struct block_device *bdev; | 109 | struct block_device *bdev; |
103 | struct dm_block_manager *bm; | 110 | struct dm_block_manager *bm; |
104 | struct dm_space_map *metadata_sm; | 111 | struct dm_space_map *metadata_sm; |
@@ -142,11 +149,18 @@ struct dm_cache_metadata { | |||
142 | bool fail_io:1; | 149 | bool fail_io:1; |
143 | 150 | ||
144 | /* | 151 | /* |
152 | * Metadata format 2 fields. | ||
153 | */ | ||
154 | dm_block_t dirty_root; | ||
155 | struct dm_disk_bitset dirty_info; | ||
156 | |||
157 | /* | ||
145 | * These structures are used when loading metadata. They're too | 158 | * These structures are used when loading metadata. They're too |
146 | * big to put on the stack. | 159 | * big to put on the stack. |
147 | */ | 160 | */ |
148 | struct dm_array_cursor mapping_cursor; | 161 | struct dm_array_cursor mapping_cursor; |
149 | struct dm_array_cursor hint_cursor; | 162 | struct dm_array_cursor hint_cursor; |
163 | struct dm_bitset_cursor dirty_cursor; | ||
150 | }; | 164 | }; |
151 | 165 | ||
152 | /*------------------------------------------------------------------- | 166 | /*------------------------------------------------------------------- |
@@ -170,6 +184,7 @@ static void sb_prepare_for_write(struct dm_block_validator *v, | |||
170 | static int check_metadata_version(struct cache_disk_superblock *disk_super) | 184 | static int check_metadata_version(struct cache_disk_superblock *disk_super) |
171 | { | 185 | { |
172 | uint32_t metadata_version = le32_to_cpu(disk_super->version); | 186 | uint32_t metadata_version = le32_to_cpu(disk_super->version); |
187 | |||
173 | if (metadata_version < MIN_CACHE_VERSION || metadata_version > MAX_CACHE_VERSION) { | 188 | if (metadata_version < MIN_CACHE_VERSION || metadata_version > MAX_CACHE_VERSION) { |
174 | DMERR("Cache metadata version %u found, but only versions between %u and %u supported.", | 189 | DMERR("Cache metadata version %u found, but only versions between %u and %u supported.", |
175 | metadata_version, MIN_CACHE_VERSION, MAX_CACHE_VERSION); | 190 | metadata_version, MIN_CACHE_VERSION, MAX_CACHE_VERSION); |
@@ -310,6 +325,11 @@ static void __copy_sm_root(struct dm_cache_metadata *cmd, | |||
310 | sizeof(cmd->metadata_space_map_root)); | 325 | sizeof(cmd->metadata_space_map_root)); |
311 | } | 326 | } |
312 | 327 | ||
328 | static bool separate_dirty_bits(struct dm_cache_metadata *cmd) | ||
329 | { | ||
330 | return cmd->version >= 2; | ||
331 | } | ||
332 | |||
313 | static int __write_initial_superblock(struct dm_cache_metadata *cmd) | 333 | static int __write_initial_superblock(struct dm_cache_metadata *cmd) |
314 | { | 334 | { |
315 | int r; | 335 | int r; |
@@ -341,7 +361,7 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd) | |||
341 | disk_super->flags = 0; | 361 | disk_super->flags = 0; |
342 | memset(disk_super->uuid, 0, sizeof(disk_super->uuid)); | 362 | memset(disk_super->uuid, 0, sizeof(disk_super->uuid)); |
343 | disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC); | 363 | disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC); |
344 | disk_super->version = cpu_to_le32(MAX_CACHE_VERSION); | 364 | disk_super->version = cpu_to_le32(cmd->version); |
345 | memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name)); | 365 | memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name)); |
346 | memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version)); | 366 | memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version)); |
347 | disk_super->policy_hint_size = 0; | 367 | disk_super->policy_hint_size = 0; |
@@ -362,6 +382,9 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd) | |||
362 | disk_super->write_hits = cpu_to_le32(0); | 382 | disk_super->write_hits = cpu_to_le32(0); |
363 | disk_super->write_misses = cpu_to_le32(0); | 383 | disk_super->write_misses = cpu_to_le32(0); |
364 | 384 | ||
385 | if (separate_dirty_bits(cmd)) | ||
386 | disk_super->dirty_root = cpu_to_le64(cmd->dirty_root); | ||
387 | |||
365 | return dm_tm_commit(cmd->tm, sblock); | 388 | return dm_tm_commit(cmd->tm, sblock); |
366 | } | 389 | } |
367 | 390 | ||
@@ -382,6 +405,13 @@ static int __format_metadata(struct dm_cache_metadata *cmd) | |||
382 | if (r < 0) | 405 | if (r < 0) |
383 | goto bad; | 406 | goto bad; |
384 | 407 | ||
408 | if (separate_dirty_bits(cmd)) { | ||
409 | dm_disk_bitset_init(cmd->tm, &cmd->dirty_info); | ||
410 | r = dm_bitset_empty(&cmd->dirty_info, &cmd->dirty_root); | ||
411 | if (r < 0) | ||
412 | goto bad; | ||
413 | } | ||
414 | |||
385 | dm_disk_bitset_init(cmd->tm, &cmd->discard_info); | 415 | dm_disk_bitset_init(cmd->tm, &cmd->discard_info); |
386 | r = dm_bitset_empty(&cmd->discard_info, &cmd->discard_root); | 416 | r = dm_bitset_empty(&cmd->discard_info, &cmd->discard_root); |
387 | if (r < 0) | 417 | if (r < 0) |
@@ -407,9 +437,10 @@ bad: | |||
407 | static int __check_incompat_features(struct cache_disk_superblock *disk_super, | 437 | static int __check_incompat_features(struct cache_disk_superblock *disk_super, |
408 | struct dm_cache_metadata *cmd) | 438 | struct dm_cache_metadata *cmd) |
409 | { | 439 | { |
410 | uint32_t features; | 440 | uint32_t incompat_flags, features; |
411 | 441 | ||
412 | features = le32_to_cpu(disk_super->incompat_flags) & ~DM_CACHE_FEATURE_INCOMPAT_SUPP; | 442 | incompat_flags = le32_to_cpu(disk_super->incompat_flags); |
443 | features = incompat_flags & ~DM_CACHE_FEATURE_INCOMPAT_SUPP; | ||
413 | if (features) { | 444 | if (features) { |
414 | DMERR("could not access metadata due to unsupported optional features (%lx).", | 445 | DMERR("could not access metadata due to unsupported optional features (%lx).", |
415 | (unsigned long)features); | 446 | (unsigned long)features); |
@@ -470,6 +501,7 @@ static int __open_metadata(struct dm_cache_metadata *cmd) | |||
470 | } | 501 | } |
471 | 502 | ||
472 | __setup_mapping_info(cmd); | 503 | __setup_mapping_info(cmd); |
504 | dm_disk_bitset_init(cmd->tm, &cmd->dirty_info); | ||
473 | dm_disk_bitset_init(cmd->tm, &cmd->discard_info); | 505 | dm_disk_bitset_init(cmd->tm, &cmd->discard_info); |
474 | sb_flags = le32_to_cpu(disk_super->flags); | 506 | sb_flags = le32_to_cpu(disk_super->flags); |
475 | cmd->clean_when_opened = test_bit(CLEAN_SHUTDOWN, &sb_flags); | 507 | cmd->clean_when_opened = test_bit(CLEAN_SHUTDOWN, &sb_flags); |
@@ -548,6 +580,7 @@ static unsigned long clear_clean_shutdown(unsigned long flags) | |||
548 | static void read_superblock_fields(struct dm_cache_metadata *cmd, | 580 | static void read_superblock_fields(struct dm_cache_metadata *cmd, |
549 | struct cache_disk_superblock *disk_super) | 581 | struct cache_disk_superblock *disk_super) |
550 | { | 582 | { |
583 | cmd->version = le32_to_cpu(disk_super->version); | ||
551 | cmd->flags = le32_to_cpu(disk_super->flags); | 584 | cmd->flags = le32_to_cpu(disk_super->flags); |
552 | cmd->root = le64_to_cpu(disk_super->mapping_root); | 585 | cmd->root = le64_to_cpu(disk_super->mapping_root); |
553 | cmd->hint_root = le64_to_cpu(disk_super->hint_root); | 586 | cmd->hint_root = le64_to_cpu(disk_super->hint_root); |
@@ -567,6 +600,9 @@ static void read_superblock_fields(struct dm_cache_metadata *cmd, | |||
567 | cmd->stats.write_hits = le32_to_cpu(disk_super->write_hits); | 600 | cmd->stats.write_hits = le32_to_cpu(disk_super->write_hits); |
568 | cmd->stats.write_misses = le32_to_cpu(disk_super->write_misses); | 601 | cmd->stats.write_misses = le32_to_cpu(disk_super->write_misses); |
569 | 602 | ||
603 | if (separate_dirty_bits(cmd)) | ||
604 | cmd->dirty_root = le64_to_cpu(disk_super->dirty_root); | ||
605 | |||
570 | cmd->changed = false; | 606 | cmd->changed = false; |
571 | } | 607 | } |
572 | 608 | ||
@@ -625,6 +661,13 @@ static int __commit_transaction(struct dm_cache_metadata *cmd, | |||
625 | */ | 661 | */ |
626 | BUILD_BUG_ON(sizeof(struct cache_disk_superblock) > 512); | 662 | BUILD_BUG_ON(sizeof(struct cache_disk_superblock) > 512); |
627 | 663 | ||
664 | if (separate_dirty_bits(cmd)) { | ||
665 | r = dm_bitset_flush(&cmd->dirty_info, cmd->dirty_root, | ||
666 | &cmd->dirty_root); | ||
667 | if (r) | ||
668 | return r; | ||
669 | } | ||
670 | |||
628 | r = dm_bitset_flush(&cmd->discard_info, cmd->discard_root, | 671 | r = dm_bitset_flush(&cmd->discard_info, cmd->discard_root, |
629 | &cmd->discard_root); | 672 | &cmd->discard_root); |
630 | if (r) | 673 | if (r) |
@@ -649,6 +692,8 @@ static int __commit_transaction(struct dm_cache_metadata *cmd, | |||
649 | update_flags(disk_super, mutator); | 692 | update_flags(disk_super, mutator); |
650 | 693 | ||
651 | disk_super->mapping_root = cpu_to_le64(cmd->root); | 694 | disk_super->mapping_root = cpu_to_le64(cmd->root); |
695 | if (separate_dirty_bits(cmd)) | ||
696 | disk_super->dirty_root = cpu_to_le64(cmd->dirty_root); | ||
652 | disk_super->hint_root = cpu_to_le64(cmd->hint_root); | 697 | disk_super->hint_root = cpu_to_le64(cmd->hint_root); |
653 | disk_super->discard_root = cpu_to_le64(cmd->discard_root); | 698 | disk_super->discard_root = cpu_to_le64(cmd->discard_root); |
654 | disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size); | 699 | disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size); |
@@ -698,7 +743,8 @@ static void unpack_value(__le64 value_le, dm_oblock_t *block, unsigned *flags) | |||
698 | static struct dm_cache_metadata *metadata_open(struct block_device *bdev, | 743 | static struct dm_cache_metadata *metadata_open(struct block_device *bdev, |
699 | sector_t data_block_size, | 744 | sector_t data_block_size, |
700 | bool may_format_device, | 745 | bool may_format_device, |
701 | size_t policy_hint_size) | 746 | size_t policy_hint_size, |
747 | unsigned metadata_version) | ||
702 | { | 748 | { |
703 | int r; | 749 | int r; |
704 | struct dm_cache_metadata *cmd; | 750 | struct dm_cache_metadata *cmd; |
@@ -709,6 +755,7 @@ static struct dm_cache_metadata *metadata_open(struct block_device *bdev, | |||
709 | return ERR_PTR(-ENOMEM); | 755 | return ERR_PTR(-ENOMEM); |
710 | } | 756 | } |
711 | 757 | ||
758 | cmd->version = metadata_version; | ||
712 | atomic_set(&cmd->ref_count, 1); | 759 | atomic_set(&cmd->ref_count, 1); |
713 | init_rwsem(&cmd->root_lock); | 760 | init_rwsem(&cmd->root_lock); |
714 | cmd->bdev = bdev; | 761 | cmd->bdev = bdev; |
@@ -757,7 +804,8 @@ static struct dm_cache_metadata *lookup(struct block_device *bdev) | |||
757 | static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev, | 804 | static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev, |
758 | sector_t data_block_size, | 805 | sector_t data_block_size, |
759 | bool may_format_device, | 806 | bool may_format_device, |
760 | size_t policy_hint_size) | 807 | size_t policy_hint_size, |
808 | unsigned metadata_version) | ||
761 | { | 809 | { |
762 | struct dm_cache_metadata *cmd, *cmd2; | 810 | struct dm_cache_metadata *cmd, *cmd2; |
763 | 811 | ||
@@ -768,7 +816,8 @@ static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev, | |||
768 | if (cmd) | 816 | if (cmd) |
769 | return cmd; | 817 | return cmd; |
770 | 818 | ||
771 | cmd = metadata_open(bdev, data_block_size, may_format_device, policy_hint_size); | 819 | cmd = metadata_open(bdev, data_block_size, may_format_device, |
820 | policy_hint_size, metadata_version); | ||
772 | if (!IS_ERR(cmd)) { | 821 | if (!IS_ERR(cmd)) { |
773 | mutex_lock(&table_lock); | 822 | mutex_lock(&table_lock); |
774 | cmd2 = lookup(bdev); | 823 | cmd2 = lookup(bdev); |
@@ -800,10 +849,11 @@ static bool same_params(struct dm_cache_metadata *cmd, sector_t data_block_size) | |||
800 | struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev, | 849 | struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev, |
801 | sector_t data_block_size, | 850 | sector_t data_block_size, |
802 | bool may_format_device, | 851 | bool may_format_device, |
803 | size_t policy_hint_size) | 852 | size_t policy_hint_size, |
853 | unsigned metadata_version) | ||
804 | { | 854 | { |
805 | struct dm_cache_metadata *cmd = lookup_or_open(bdev, data_block_size, | 855 | struct dm_cache_metadata *cmd = lookup_or_open(bdev, data_block_size, may_format_device, |
806 | may_format_device, policy_hint_size); | 856 | policy_hint_size, metadata_version); |
807 | 857 | ||
808 | if (!IS_ERR(cmd) && !same_params(cmd, data_block_size)) { | 858 | if (!IS_ERR(cmd) && !same_params(cmd, data_block_size)) { |
809 | dm_cache_metadata_close(cmd); | 859 | dm_cache_metadata_close(cmd); |
@@ -829,8 +879,8 @@ void dm_cache_metadata_close(struct dm_cache_metadata *cmd) | |||
829 | /* | 879 | /* |
830 | * Checks that the given cache block is either unmapped or clean. | 880 | * Checks that the given cache block is either unmapped or clean. |
831 | */ | 881 | */ |
832 | static int block_unmapped_or_clean(struct dm_cache_metadata *cmd, dm_cblock_t b, | 882 | static int block_clean_combined_dirty(struct dm_cache_metadata *cmd, dm_cblock_t b, |
833 | bool *result) | 883 | bool *result) |
834 | { | 884 | { |
835 | int r; | 885 | int r; |
836 | __le64 value; | 886 | __le64 value; |
@@ -838,10 +888,8 @@ static int block_unmapped_or_clean(struct dm_cache_metadata *cmd, dm_cblock_t b, | |||
838 | unsigned flags; | 888 | unsigned flags; |
839 | 889 | ||
840 | r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(b), &value); | 890 | r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(b), &value); |
841 | if (r) { | 891 | if (r) |
842 | DMERR("block_unmapped_or_clean failed"); | ||
843 | return r; | 892 | return r; |
844 | } | ||
845 | 893 | ||
846 | unpack_value(value, &ob, &flags); | 894 | unpack_value(value, &ob, &flags); |
847 | *result = !((flags & M_VALID) && (flags & M_DIRTY)); | 895 | *result = !((flags & M_VALID) && (flags & M_DIRTY)); |
@@ -849,17 +897,19 @@ static int block_unmapped_or_clean(struct dm_cache_metadata *cmd, dm_cblock_t b, | |||
849 | return 0; | 897 | return 0; |
850 | } | 898 | } |
851 | 899 | ||
852 | static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd, | 900 | static int blocks_are_clean_combined_dirty(struct dm_cache_metadata *cmd, |
853 | dm_cblock_t begin, dm_cblock_t end, | 901 | dm_cblock_t begin, dm_cblock_t end, |
854 | bool *result) | 902 | bool *result) |
855 | { | 903 | { |
856 | int r; | 904 | int r; |
857 | *result = true; | 905 | *result = true; |
858 | 906 | ||
859 | while (begin != end) { | 907 | while (begin != end) { |
860 | r = block_unmapped_or_clean(cmd, begin, result); | 908 | r = block_clean_combined_dirty(cmd, begin, result); |
861 | if (r) | 909 | if (r) { |
910 | DMERR("block_clean_combined_dirty failed"); | ||
862 | return r; | 911 | return r; |
912 | } | ||
863 | 913 | ||
864 | if (!*result) { | 914 | if (!*result) { |
865 | DMERR("cache block %llu is dirty", | 915 | DMERR("cache block %llu is dirty", |
@@ -873,6 +923,67 @@ static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd, | |||
873 | return 0; | 923 | return 0; |
874 | } | 924 | } |
875 | 925 | ||
926 | static int blocks_are_clean_separate_dirty(struct dm_cache_metadata *cmd, | ||
927 | dm_cblock_t begin, dm_cblock_t end, | ||
928 | bool *result) | ||
929 | { | ||
930 | int r; | ||
931 | bool dirty_flag; | ||
932 | *result = true; | ||
933 | |||
934 | r = dm_bitset_cursor_begin(&cmd->dirty_info, cmd->dirty_root, | ||
935 | from_cblock(begin), &cmd->dirty_cursor); | ||
936 | if (r) { | ||
937 | DMERR("%s: dm_bitset_cursor_begin for dirty failed", __func__); | ||
938 | return r; | ||
939 | } | ||
940 | |||
941 | r = dm_bitset_cursor_skip(&cmd->dirty_cursor, from_cblock(begin)); | ||
942 | if (r) { | ||
943 | DMERR("%s: dm_bitset_cursor_skip for dirty failed", __func__); | ||
944 | dm_bitset_cursor_end(&cmd->dirty_cursor); | ||
945 | return r; | ||
946 | } | ||
947 | |||
948 | while (begin != end) { | ||
949 | /* | ||
950 | * We assume that unmapped blocks have their dirty bit | ||
951 | * cleared. | ||
952 | */ | ||
953 | dirty_flag = dm_bitset_cursor_get_value(&cmd->dirty_cursor); | ||
954 | if (dirty_flag) { | ||
955 | DMERR("%s: cache block %llu is dirty", __func__, | ||
956 | (unsigned long long) from_cblock(begin)); | ||
957 | dm_bitset_cursor_end(&cmd->dirty_cursor); | ||
958 | *result = false; | ||
959 | return 0; | ||
960 | } | ||
961 | |||
962 | r = dm_bitset_cursor_next(&cmd->dirty_cursor); | ||
963 | if (r) { | ||
964 | DMERR("%s: dm_bitset_cursor_next for dirty failed", __func__); | ||
965 | dm_bitset_cursor_end(&cmd->dirty_cursor); | ||
966 | return r; | ||
967 | } | ||
968 | |||
969 | begin = to_cblock(from_cblock(begin) + 1); | ||
970 | } | ||
971 | |||
972 | dm_bitset_cursor_end(&cmd->dirty_cursor); | ||
973 | |||
974 | return 0; | ||
975 | } | ||
976 | |||
977 | static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd, | ||
978 | dm_cblock_t begin, dm_cblock_t end, | ||
979 | bool *result) | ||
980 | { | ||
981 | if (separate_dirty_bits(cmd)) | ||
982 | return blocks_are_clean_separate_dirty(cmd, begin, end, result); | ||
983 | else | ||
984 | return blocks_are_clean_combined_dirty(cmd, begin, end, result); | ||
985 | } | ||
986 | |||
876 | static bool cmd_write_lock(struct dm_cache_metadata *cmd) | 987 | static bool cmd_write_lock(struct dm_cache_metadata *cmd) |
877 | { | 988 | { |
878 | down_write(&cmd->root_lock); | 989 | down_write(&cmd->root_lock); |
@@ -950,8 +1061,18 @@ int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size) | |||
950 | r = dm_array_resize(&cmd->info, cmd->root, from_cblock(cmd->cache_blocks), | 1061 | r = dm_array_resize(&cmd->info, cmd->root, from_cblock(cmd->cache_blocks), |
951 | from_cblock(new_cache_size), | 1062 | from_cblock(new_cache_size), |
952 | &null_mapping, &cmd->root); | 1063 | &null_mapping, &cmd->root); |
953 | if (!r) | 1064 | if (r) |
954 | cmd->cache_blocks = new_cache_size; | 1065 | goto out; |
1066 | |||
1067 | if (separate_dirty_bits(cmd)) { | ||
1068 | r = dm_bitset_resize(&cmd->dirty_info, cmd->dirty_root, | ||
1069 | from_cblock(cmd->cache_blocks), from_cblock(new_cache_size), | ||
1070 | false, &cmd->dirty_root); | ||
1071 | if (r) | ||
1072 | goto out; | ||
1073 | } | ||
1074 | |||
1075 | cmd->cache_blocks = new_cache_size; | ||
955 | cmd->changed = true; | 1076 | cmd->changed = true; |
956 | 1077 | ||
957 | out: | 1078 | out: |
@@ -995,14 +1116,6 @@ static int __clear_discard(struct dm_cache_metadata *cmd, dm_dblock_t b) | |||
995 | from_dblock(b), &cmd->discard_root); | 1116 | from_dblock(b), &cmd->discard_root); |
996 | } | 1117 | } |
997 | 1118 | ||
998 | static int __is_discarded(struct dm_cache_metadata *cmd, dm_dblock_t b, | ||
999 | bool *is_discarded) | ||
1000 | { | ||
1001 | return dm_bitset_test_bit(&cmd->discard_info, cmd->discard_root, | ||
1002 | from_dblock(b), &cmd->discard_root, | ||
1003 | is_discarded); | ||
1004 | } | ||
1005 | |||
1006 | static int __discard(struct dm_cache_metadata *cmd, | 1119 | static int __discard(struct dm_cache_metadata *cmd, |
1007 | dm_dblock_t dblock, bool discard) | 1120 | dm_dblock_t dblock, bool discard) |
1008 | { | 1121 | { |
@@ -1032,22 +1145,38 @@ static int __load_discards(struct dm_cache_metadata *cmd, | |||
1032 | load_discard_fn fn, void *context) | 1145 | load_discard_fn fn, void *context) |
1033 | { | 1146 | { |
1034 | int r = 0; | 1147 | int r = 0; |
1035 | dm_block_t b; | 1148 | uint32_t b; |
1036 | bool discard; | 1149 | struct dm_bitset_cursor c; |
1037 | 1150 | ||
1038 | for (b = 0; b < from_dblock(cmd->discard_nr_blocks); b++) { | 1151 | if (from_dblock(cmd->discard_nr_blocks) == 0) |
1039 | dm_dblock_t dblock = to_dblock(b); | 1152 | /* nothing to do */ |
1153 | return 0; | ||
1040 | 1154 | ||
1041 | if (cmd->clean_when_opened) { | 1155 | if (cmd->clean_when_opened) { |
1042 | r = __is_discarded(cmd, dblock, &discard); | 1156 | r = dm_bitset_flush(&cmd->discard_info, cmd->discard_root, &cmd->discard_root); |
1043 | if (r) | 1157 | if (r) |
1044 | return r; | 1158 | return r; |
1045 | } else | ||
1046 | discard = false; | ||
1047 | 1159 | ||
1048 | r = fn(context, cmd->discard_block_size, dblock, discard); | 1160 | r = dm_bitset_cursor_begin(&cmd->discard_info, cmd->discard_root, |
1161 | from_dblock(cmd->discard_nr_blocks), &c); | ||
1049 | if (r) | 1162 | if (r) |
1050 | break; | 1163 | return r; |
1164 | |||
1165 | for (b = 0; b < from_dblock(cmd->discard_nr_blocks); b++) { | ||
1166 | r = fn(context, cmd->discard_block_size, to_dblock(b), | ||
1167 | dm_bitset_cursor_get_value(&c)); | ||
1168 | if (r) | ||
1169 | break; | ||
1170 | } | ||
1171 | |||
1172 | dm_bitset_cursor_end(&c); | ||
1173 | |||
1174 | } else { | ||
1175 | for (b = 0; b < from_dblock(cmd->discard_nr_blocks); b++) { | ||
1176 | r = fn(context, cmd->discard_block_size, to_dblock(b), false); | ||
1177 | if (r) | ||
1178 | return r; | ||
1179 | } | ||
1051 | } | 1180 | } |
1052 | 1181 | ||
1053 | return r; | 1182 | return r; |
@@ -1177,11 +1306,11 @@ static bool hints_array_available(struct dm_cache_metadata *cmd, | |||
1177 | hints_array_initialized(cmd); | 1306 | hints_array_initialized(cmd); |
1178 | } | 1307 | } |
1179 | 1308 | ||
1180 | static int __load_mapping(struct dm_cache_metadata *cmd, | 1309 | static int __load_mapping_v1(struct dm_cache_metadata *cmd, |
1181 | uint64_t cb, bool hints_valid, | 1310 | uint64_t cb, bool hints_valid, |
1182 | struct dm_array_cursor *mapping_cursor, | 1311 | struct dm_array_cursor *mapping_cursor, |
1183 | struct dm_array_cursor *hint_cursor, | 1312 | struct dm_array_cursor *hint_cursor, |
1184 | load_mapping_fn fn, void *context) | 1313 | load_mapping_fn fn, void *context) |
1185 | { | 1314 | { |
1186 | int r = 0; | 1315 | int r = 0; |
1187 | 1316 | ||
@@ -1206,8 +1335,51 @@ static int __load_mapping(struct dm_cache_metadata *cmd, | |||
1206 | 1335 | ||
1207 | r = fn(context, oblock, to_cblock(cb), flags & M_DIRTY, | 1336 | r = fn(context, oblock, to_cblock(cb), flags & M_DIRTY, |
1208 | le32_to_cpu(hint), hints_valid); | 1337 | le32_to_cpu(hint), hints_valid); |
1209 | if (r) | 1338 | if (r) { |
1210 | DMERR("policy couldn't load cblock"); | 1339 | DMERR("policy couldn't load cache block %llu", |
1340 | (unsigned long long) from_cblock(to_cblock(cb))); | ||
1341 | } | ||
1342 | } | ||
1343 | |||
1344 | return r; | ||
1345 | } | ||
1346 | |||
1347 | static int __load_mapping_v2(struct dm_cache_metadata *cmd, | ||
1348 | uint64_t cb, bool hints_valid, | ||
1349 | struct dm_array_cursor *mapping_cursor, | ||
1350 | struct dm_array_cursor *hint_cursor, | ||
1351 | struct dm_bitset_cursor *dirty_cursor, | ||
1352 | load_mapping_fn fn, void *context) | ||
1353 | { | ||
1354 | int r = 0; | ||
1355 | |||
1356 | __le64 mapping; | ||
1357 | __le32 hint = 0; | ||
1358 | |||
1359 | __le64 *mapping_value_le; | ||
1360 | __le32 *hint_value_le; | ||
1361 | |||
1362 | dm_oblock_t oblock; | ||
1363 | unsigned flags; | ||
1364 | bool dirty; | ||
1365 | |||
1366 | dm_array_cursor_get_value(mapping_cursor, (void **) &mapping_value_le); | ||
1367 | memcpy(&mapping, mapping_value_le, sizeof(mapping)); | ||
1368 | unpack_value(mapping, &oblock, &flags); | ||
1369 | |||
1370 | if (flags & M_VALID) { | ||
1371 | if (hints_valid) { | ||
1372 | dm_array_cursor_get_value(hint_cursor, (void **) &hint_value_le); | ||
1373 | memcpy(&hint, hint_value_le, sizeof(hint)); | ||
1374 | } | ||
1375 | |||
1376 | dirty = dm_bitset_cursor_get_value(dirty_cursor); | ||
1377 | r = fn(context, oblock, to_cblock(cb), dirty, | ||
1378 | le32_to_cpu(hint), hints_valid); | ||
1379 | if (r) { | ||
1380 | DMERR("policy couldn't load cache block %llu", | ||
1381 | (unsigned long long) from_cblock(to_cblock(cb))); | ||
1382 | } | ||
1211 | } | 1383 | } |
1212 | 1384 | ||
1213 | return r; | 1385 | return r; |
@@ -1238,10 +1410,28 @@ static int __load_mappings(struct dm_cache_metadata *cmd, | |||
1238 | } | 1410 | } |
1239 | } | 1411 | } |
1240 | 1412 | ||
1413 | if (separate_dirty_bits(cmd)) { | ||
1414 | r = dm_bitset_cursor_begin(&cmd->dirty_info, cmd->dirty_root, | ||
1415 | from_cblock(cmd->cache_blocks), | ||
1416 | &cmd->dirty_cursor); | ||
1417 | if (r) { | ||
1418 | dm_array_cursor_end(&cmd->hint_cursor); | ||
1419 | dm_array_cursor_end(&cmd->mapping_cursor); | ||
1420 | return r; | ||
1421 | } | ||
1422 | } | ||
1423 | |||
1241 | for (cb = 0; ; cb++) { | 1424 | for (cb = 0; ; cb++) { |
1242 | r = __load_mapping(cmd, cb, hints_valid, | 1425 | if (separate_dirty_bits(cmd)) |
1243 | &cmd->mapping_cursor, &cmd->hint_cursor, | 1426 | r = __load_mapping_v2(cmd, cb, hints_valid, |
1244 | fn, context); | 1427 | &cmd->mapping_cursor, |
1428 | &cmd->hint_cursor, | ||
1429 | &cmd->dirty_cursor, | ||
1430 | fn, context); | ||
1431 | else | ||
1432 | r = __load_mapping_v1(cmd, cb, hints_valid, | ||
1433 | &cmd->mapping_cursor, &cmd->hint_cursor, | ||
1434 | fn, context); | ||
1245 | if (r) | 1435 | if (r) |
1246 | goto out; | 1436 | goto out; |
1247 | 1437 | ||
@@ -1264,12 +1454,23 @@ static int __load_mappings(struct dm_cache_metadata *cmd, | |||
1264 | goto out; | 1454 | goto out; |
1265 | } | 1455 | } |
1266 | } | 1456 | } |
1457 | |||
1458 | if (separate_dirty_bits(cmd)) { | ||
1459 | r = dm_bitset_cursor_next(&cmd->dirty_cursor); | ||
1460 | if (r) { | ||
1461 | DMERR("dm_bitset_cursor_next for dirty failed"); | ||
1462 | goto out; | ||
1463 | } | ||
1464 | } | ||
1267 | } | 1465 | } |
1268 | out: | 1466 | out: |
1269 | dm_array_cursor_end(&cmd->mapping_cursor); | 1467 | dm_array_cursor_end(&cmd->mapping_cursor); |
1270 | if (hints_valid) | 1468 | if (hints_valid) |
1271 | dm_array_cursor_end(&cmd->hint_cursor); | 1469 | dm_array_cursor_end(&cmd->hint_cursor); |
1272 | 1470 | ||
1471 | if (separate_dirty_bits(cmd)) | ||
1472 | dm_bitset_cursor_end(&cmd->dirty_cursor); | ||
1473 | |||
1273 | return r; | 1474 | return r; |
1274 | } | 1475 | } |
1275 | 1476 | ||
@@ -1352,13 +1553,55 @@ static int __dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty | |||
1352 | 1553 | ||
1353 | } | 1554 | } |
1354 | 1555 | ||
1355 | int dm_cache_set_dirty(struct dm_cache_metadata *cmd, | 1556 | static int __set_dirty_bits_v1(struct dm_cache_metadata *cmd, unsigned nr_bits, unsigned long *bits) |
1356 | dm_cblock_t cblock, bool dirty) | 1557 | { |
1558 | int r; | ||
1559 | unsigned i; | ||
1560 | for (i = 0; i < nr_bits; i++) { | ||
1561 | r = __dirty(cmd, to_cblock(i), test_bit(i, bits)); | ||
1562 | if (r) | ||
1563 | return r; | ||
1564 | } | ||
1565 | |||
1566 | return 0; | ||
1567 | } | ||
1568 | |||
1569 | static int is_dirty_callback(uint32_t index, bool *value, void *context) | ||
1570 | { | ||
1571 | unsigned long *bits = context; | ||
1572 | *value = test_bit(index, bits); | ||
1573 | return 0; | ||
1574 | } | ||
1575 | |||
1576 | static int __set_dirty_bits_v2(struct dm_cache_metadata *cmd, unsigned nr_bits, unsigned long *bits) | ||
1577 | { | ||
1578 | int r = 0; | ||
1579 | |||
1580 | /* nr_bits is really just a sanity check */ | ||
1581 | if (nr_bits != from_cblock(cmd->cache_blocks)) { | ||
1582 | DMERR("dirty bitset is wrong size"); | ||
1583 | return -EINVAL; | ||
1584 | } | ||
1585 | |||
1586 | r = dm_bitset_del(&cmd->dirty_info, cmd->dirty_root); | ||
1587 | if (r) | ||
1588 | return r; | ||
1589 | |||
1590 | cmd->changed = true; | ||
1591 | return dm_bitset_new(&cmd->dirty_info, &cmd->dirty_root, nr_bits, is_dirty_callback, bits); | ||
1592 | } | ||
1593 | |||
1594 | int dm_cache_set_dirty_bits(struct dm_cache_metadata *cmd, | ||
1595 | unsigned nr_bits, | ||
1596 | unsigned long *bits) | ||
1357 | { | 1597 | { |
1358 | int r; | 1598 | int r; |
1359 | 1599 | ||
1360 | WRITE_LOCK(cmd); | 1600 | WRITE_LOCK(cmd); |
1361 | r = __dirty(cmd, cblock, dirty); | 1601 | if (separate_dirty_bits(cmd)) |
1602 | r = __set_dirty_bits_v2(cmd, nr_bits, bits); | ||
1603 | else | ||
1604 | r = __set_dirty_bits_v1(cmd, nr_bits, bits); | ||
1362 | WRITE_UNLOCK(cmd); | 1605 | WRITE_UNLOCK(cmd); |
1363 | 1606 | ||
1364 | return r; | 1607 | return r; |
diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h index 8528744195e5..4f07c08cf107 100644 --- a/drivers/md/dm-cache-metadata.h +++ b/drivers/md/dm-cache-metadata.h | |||
@@ -45,18 +45,20 @@ | |||
45 | * As these various flags are defined they should be added to the | 45 | * As these various flags are defined they should be added to the |
46 | * following masks. | 46 | * following masks. |
47 | */ | 47 | */ |
48 | |||
48 | #define DM_CACHE_FEATURE_COMPAT_SUPP 0UL | 49 | #define DM_CACHE_FEATURE_COMPAT_SUPP 0UL |
49 | #define DM_CACHE_FEATURE_COMPAT_RO_SUPP 0UL | 50 | #define DM_CACHE_FEATURE_COMPAT_RO_SUPP 0UL |
50 | #define DM_CACHE_FEATURE_INCOMPAT_SUPP 0UL | 51 | #define DM_CACHE_FEATURE_INCOMPAT_SUPP 0UL |
51 | 52 | ||
52 | /* | 53 | /* |
53 | * Reopens or creates a new, empty metadata volume. | 54 | * Reopens or creates a new, empty metadata volume. Returns an ERR_PTR on |
54 | * Returns an ERR_PTR on failure. | 55 | * failure. If reopening then features must match. |
55 | */ | 56 | */ |
56 | struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev, | 57 | struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev, |
57 | sector_t data_block_size, | 58 | sector_t data_block_size, |
58 | bool may_format_device, | 59 | bool may_format_device, |
59 | size_t policy_hint_size); | 60 | size_t policy_hint_size, |
61 | unsigned metadata_version); | ||
60 | 62 | ||
61 | void dm_cache_metadata_close(struct dm_cache_metadata *cmd); | 63 | void dm_cache_metadata_close(struct dm_cache_metadata *cmd); |
62 | 64 | ||
@@ -91,7 +93,8 @@ int dm_cache_load_mappings(struct dm_cache_metadata *cmd, | |||
91 | load_mapping_fn fn, | 93 | load_mapping_fn fn, |
92 | void *context); | 94 | void *context); |
93 | 95 | ||
94 | int dm_cache_set_dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty); | 96 | int dm_cache_set_dirty_bits(struct dm_cache_metadata *cmd, |
97 | unsigned nr_bits, unsigned long *bits); | ||
95 | 98 | ||
96 | struct dm_cache_statistics { | 99 | struct dm_cache_statistics { |
97 | uint32_t read_hits; | 100 | uint32_t read_hits; |
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 894bc14469c8..9c689b34e6e7 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c | |||
@@ -179,6 +179,7 @@ enum cache_io_mode { | |||
179 | struct cache_features { | 179 | struct cache_features { |
180 | enum cache_metadata_mode mode; | 180 | enum cache_metadata_mode mode; |
181 | enum cache_io_mode io_mode; | 181 | enum cache_io_mode io_mode; |
182 | unsigned metadata_version; | ||
182 | }; | 183 | }; |
183 | 184 | ||
184 | struct cache_stats { | 185 | struct cache_stats { |
@@ -248,7 +249,7 @@ struct cache { | |||
248 | /* | 249 | /* |
249 | * Fields for converting from sectors to blocks. | 250 | * Fields for converting from sectors to blocks. |
250 | */ | 251 | */ |
251 | uint32_t sectors_per_block; | 252 | sector_t sectors_per_block; |
252 | int sectors_per_block_shift; | 253 | int sectors_per_block_shift; |
253 | 254 | ||
254 | spinlock_t lock; | 255 | spinlock_t lock; |
@@ -2534,13 +2535,14 @@ static void init_features(struct cache_features *cf) | |||
2534 | { | 2535 | { |
2535 | cf->mode = CM_WRITE; | 2536 | cf->mode = CM_WRITE; |
2536 | cf->io_mode = CM_IO_WRITEBACK; | 2537 | cf->io_mode = CM_IO_WRITEBACK; |
2538 | cf->metadata_version = 1; | ||
2537 | } | 2539 | } |
2538 | 2540 | ||
2539 | static int parse_features(struct cache_args *ca, struct dm_arg_set *as, | 2541 | static int parse_features(struct cache_args *ca, struct dm_arg_set *as, |
2540 | char **error) | 2542 | char **error) |
2541 | { | 2543 | { |
2542 | static struct dm_arg _args[] = { | 2544 | static struct dm_arg _args[] = { |
2543 | {0, 1, "Invalid number of cache feature arguments"}, | 2545 | {0, 2, "Invalid number of cache feature arguments"}, |
2544 | }; | 2546 | }; |
2545 | 2547 | ||
2546 | int r; | 2548 | int r; |
@@ -2566,6 +2568,9 @@ static int parse_features(struct cache_args *ca, struct dm_arg_set *as, | |||
2566 | else if (!strcasecmp(arg, "passthrough")) | 2568 | else if (!strcasecmp(arg, "passthrough")) |
2567 | cf->io_mode = CM_IO_PASSTHROUGH; | 2569 | cf->io_mode = CM_IO_PASSTHROUGH; |
2568 | 2570 | ||
2571 | else if (!strcasecmp(arg, "metadata2")) | ||
2572 | cf->metadata_version = 2; | ||
2573 | |||
2569 | else { | 2574 | else { |
2570 | *error = "Unrecognised cache feature requested"; | 2575 | *error = "Unrecognised cache feature requested"; |
2571 | return -EINVAL; | 2576 | return -EINVAL; |
@@ -2820,7 +2825,8 @@ static int cache_create(struct cache_args *ca, struct cache **result) | |||
2820 | 2825 | ||
2821 | cmd = dm_cache_metadata_open(cache->metadata_dev->bdev, | 2826 | cmd = dm_cache_metadata_open(cache->metadata_dev->bdev, |
2822 | ca->block_size, may_format, | 2827 | ca->block_size, may_format, |
2823 | dm_cache_policy_get_hint_size(cache->policy)); | 2828 | dm_cache_policy_get_hint_size(cache->policy), |
2829 | ca->features.metadata_version); | ||
2824 | if (IS_ERR(cmd)) { | 2830 | if (IS_ERR(cmd)) { |
2825 | *error = "Error creating metadata object"; | 2831 | *error = "Error creating metadata object"; |
2826 | r = PTR_ERR(cmd); | 2832 | r = PTR_ERR(cmd); |
@@ -3165,21 +3171,16 @@ static int cache_end_io(struct dm_target *ti, struct bio *bio, int error) | |||
3165 | 3171 | ||
3166 | static int write_dirty_bitset(struct cache *cache) | 3172 | static int write_dirty_bitset(struct cache *cache) |
3167 | { | 3173 | { |
3168 | unsigned i, r; | 3174 | int r; |
3169 | 3175 | ||
3170 | if (get_cache_mode(cache) >= CM_READ_ONLY) | 3176 | if (get_cache_mode(cache) >= CM_READ_ONLY) |
3171 | return -EINVAL; | 3177 | return -EINVAL; |
3172 | 3178 | ||
3173 | for (i = 0; i < from_cblock(cache->cache_size); i++) { | 3179 | r = dm_cache_set_dirty_bits(cache->cmd, from_cblock(cache->cache_size), cache->dirty_bitset); |
3174 | r = dm_cache_set_dirty(cache->cmd, to_cblock(i), | 3180 | if (r) |
3175 | is_dirty(cache, to_cblock(i))); | 3181 | metadata_operation_failed(cache, "dm_cache_set_dirty_bits", r); |
3176 | if (r) { | ||
3177 | metadata_operation_failed(cache, "dm_cache_set_dirty", r); | ||
3178 | return r; | ||
3179 | } | ||
3180 | } | ||
3181 | 3182 | ||
3182 | return 0; | 3183 | return r; |
3183 | } | 3184 | } |
3184 | 3185 | ||
3185 | static int write_discard_bitset(struct cache *cache) | 3186 | static int write_discard_bitset(struct cache *cache) |
@@ -3540,11 +3541,11 @@ static void cache_status(struct dm_target *ti, status_type_t type, | |||
3540 | 3541 | ||
3541 | residency = policy_residency(cache->policy); | 3542 | residency = policy_residency(cache->policy); |
3542 | 3543 | ||
3543 | DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %lu ", | 3544 | DMEMIT("%u %llu/%llu %llu %llu/%llu %u %u %u %u %u %u %lu ", |
3544 | (unsigned)DM_CACHE_METADATA_BLOCK_SIZE, | 3545 | (unsigned)DM_CACHE_METADATA_BLOCK_SIZE, |
3545 | (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata), | 3546 | (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata), |
3546 | (unsigned long long)nr_blocks_metadata, | 3547 | (unsigned long long)nr_blocks_metadata, |
3547 | cache->sectors_per_block, | 3548 | (unsigned long long)cache->sectors_per_block, |
3548 | (unsigned long long) from_cblock(residency), | 3549 | (unsigned long long) from_cblock(residency), |
3549 | (unsigned long long) from_cblock(cache->cache_size), | 3550 | (unsigned long long) from_cblock(cache->cache_size), |
3550 | (unsigned) atomic_read(&cache->stats.read_hit), | 3551 | (unsigned) atomic_read(&cache->stats.read_hit), |
@@ -3555,14 +3556,19 @@ static void cache_status(struct dm_target *ti, status_type_t type, | |||
3555 | (unsigned) atomic_read(&cache->stats.promotion), | 3556 | (unsigned) atomic_read(&cache->stats.promotion), |
3556 | (unsigned long) atomic_read(&cache->nr_dirty)); | 3557 | (unsigned long) atomic_read(&cache->nr_dirty)); |
3557 | 3558 | ||
3559 | if (cache->features.metadata_version == 2) | ||
3560 | DMEMIT("2 metadata2 "); | ||
3561 | else | ||
3562 | DMEMIT("1 "); | ||
3563 | |||
3558 | if (writethrough_mode(&cache->features)) | 3564 | if (writethrough_mode(&cache->features)) |
3559 | DMEMIT("1 writethrough "); | 3565 | DMEMIT("writethrough "); |
3560 | 3566 | ||
3561 | else if (passthrough_mode(&cache->features)) | 3567 | else if (passthrough_mode(&cache->features)) |
3562 | DMEMIT("1 passthrough "); | 3568 | DMEMIT("passthrough "); |
3563 | 3569 | ||
3564 | else if (writeback_mode(&cache->features)) | 3570 | else if (writeback_mode(&cache->features)) |
3565 | DMEMIT("1 writeback "); | 3571 | DMEMIT("writeback "); |
3566 | 3572 | ||
3567 | else { | 3573 | else { |
3568 | DMERR("%s: internal error: unknown io mode: %d", | 3574 | DMERR("%s: internal error: unknown io mode: %d", |
@@ -3810,7 +3816,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) | |||
3810 | 3816 | ||
3811 | static struct target_type cache_target = { | 3817 | static struct target_type cache_target = { |
3812 | .name = "cache", | 3818 | .name = "cache", |
3813 | .version = {1, 9, 0}, | 3819 | .version = {1, 10, 0}, |
3814 | .module = THIS_MODULE, | 3820 | .module = THIS_MODULE, |
3815 | .ctr = cache_ctr, | 3821 | .ctr = cache_ctr, |
3816 | .dtr = cache_dtr, | 3822 | .dtr = cache_dtr, |
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index b8f978e551d7..5c9e95d66f3b 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c | |||
@@ -24,6 +24,11 @@ | |||
24 | */ | 24 | */ |
25 | #define MIN_FREE_RESHAPE_SPACE to_sector(4*4096) | 25 | #define MIN_FREE_RESHAPE_SPACE to_sector(4*4096) |
26 | 26 | ||
27 | /* | ||
28 | * Minimum journal space 4 MiB in sectors. | ||
29 | */ | ||
30 | #define MIN_RAID456_JOURNAL_SPACE (4*2048) | ||
31 | |||
27 | static bool devices_handle_discard_safely = false; | 32 | static bool devices_handle_discard_safely = false; |
28 | 33 | ||
29 | /* | 34 | /* |
@@ -73,6 +78,9 @@ struct raid_dev { | |||
73 | #define __CTR_FLAG_DATA_OFFSET 13 /* 2 */ /* Only with reshapable raid4/5/6/10! */ | 78 | #define __CTR_FLAG_DATA_OFFSET 13 /* 2 */ /* Only with reshapable raid4/5/6/10! */ |
74 | #define __CTR_FLAG_RAID10_USE_NEAR_SETS 14 /* 2 */ /* Only with raid10! */ | 79 | #define __CTR_FLAG_RAID10_USE_NEAR_SETS 14 /* 2 */ /* Only with raid10! */ |
75 | 80 | ||
81 | /* New for v1.10.0 */ | ||
82 | #define __CTR_FLAG_JOURNAL_DEV 15 /* 2 */ /* Only with raid4/5/6! */ | ||
83 | |||
76 | /* | 84 | /* |
77 | * Flags for rs->ctr_flags field. | 85 | * Flags for rs->ctr_flags field. |
78 | */ | 86 | */ |
@@ -91,6 +99,7 @@ struct raid_dev { | |||
91 | #define CTR_FLAG_DELTA_DISKS (1 << __CTR_FLAG_DELTA_DISKS) | 99 | #define CTR_FLAG_DELTA_DISKS (1 << __CTR_FLAG_DELTA_DISKS) |
92 | #define CTR_FLAG_DATA_OFFSET (1 << __CTR_FLAG_DATA_OFFSET) | 100 | #define CTR_FLAG_DATA_OFFSET (1 << __CTR_FLAG_DATA_OFFSET) |
93 | #define CTR_FLAG_RAID10_USE_NEAR_SETS (1 << __CTR_FLAG_RAID10_USE_NEAR_SETS) | 101 | #define CTR_FLAG_RAID10_USE_NEAR_SETS (1 << __CTR_FLAG_RAID10_USE_NEAR_SETS) |
102 | #define CTR_FLAG_JOURNAL_DEV (1 << __CTR_FLAG_JOURNAL_DEV) | ||
94 | 103 | ||
95 | /* | 104 | /* |
96 | * Definitions of various constructor flags to | 105 | * Definitions of various constructor flags to |
@@ -163,7 +172,8 @@ struct raid_dev { | |||
163 | CTR_FLAG_STRIPE_CACHE | \ | 172 | CTR_FLAG_STRIPE_CACHE | \ |
164 | CTR_FLAG_REGION_SIZE | \ | 173 | CTR_FLAG_REGION_SIZE | \ |
165 | CTR_FLAG_DELTA_DISKS | \ | 174 | CTR_FLAG_DELTA_DISKS | \ |
166 | CTR_FLAG_DATA_OFFSET) | 175 | CTR_FLAG_DATA_OFFSET | \ |
176 | CTR_FLAG_JOURNAL_DEV) | ||
167 | 177 | ||
168 | #define RAID6_VALID_FLAGS (CTR_FLAG_SYNC | \ | 178 | #define RAID6_VALID_FLAGS (CTR_FLAG_SYNC | \ |
169 | CTR_FLAG_REBUILD | \ | 179 | CTR_FLAG_REBUILD | \ |
@@ -173,7 +183,8 @@ struct raid_dev { | |||
173 | CTR_FLAG_STRIPE_CACHE | \ | 183 | CTR_FLAG_STRIPE_CACHE | \ |
174 | CTR_FLAG_REGION_SIZE | \ | 184 | CTR_FLAG_REGION_SIZE | \ |
175 | CTR_FLAG_DELTA_DISKS | \ | 185 | CTR_FLAG_DELTA_DISKS | \ |
176 | CTR_FLAG_DATA_OFFSET) | 186 | CTR_FLAG_DATA_OFFSET | \ |
187 | CTR_FLAG_JOURNAL_DEV) | ||
177 | /* ...valid options definitions per raid level */ | 188 | /* ...valid options definitions per raid level */ |
178 | 189 | ||
179 | /* | 190 | /* |
@@ -222,6 +233,12 @@ struct raid_set { | |||
222 | struct raid_type *raid_type; | 233 | struct raid_type *raid_type; |
223 | struct dm_target_callbacks callbacks; | 234 | struct dm_target_callbacks callbacks; |
224 | 235 | ||
236 | /* Optional raid4/5/6 journal device */ | ||
237 | struct journal_dev { | ||
238 | struct dm_dev *dev; | ||
239 | struct md_rdev rdev; | ||
240 | } journal_dev; | ||
241 | |||
225 | struct raid_dev dev[0]; | 242 | struct raid_dev dev[0]; |
226 | }; | 243 | }; |
227 | 244 | ||
@@ -306,6 +323,7 @@ static struct arg_name_flag { | |||
306 | { CTR_FLAG_DATA_OFFSET, "data_offset"}, | 323 | { CTR_FLAG_DATA_OFFSET, "data_offset"}, |
307 | { CTR_FLAG_DELTA_DISKS, "delta_disks"}, | 324 | { CTR_FLAG_DELTA_DISKS, "delta_disks"}, |
308 | { CTR_FLAG_RAID10_USE_NEAR_SETS, "raid10_use_near_sets"}, | 325 | { CTR_FLAG_RAID10_USE_NEAR_SETS, "raid10_use_near_sets"}, |
326 | { CTR_FLAG_JOURNAL_DEV, "journal_dev" }, | ||
309 | }; | 327 | }; |
310 | 328 | ||
311 | /* Return argument name string for given @flag */ | 329 | /* Return argument name string for given @flag */ |
@@ -370,7 +388,7 @@ static bool rs_is_reshapable(struct raid_set *rs) | |||
370 | /* Return true, if raid set in @rs is recovering */ | 388 | /* Return true, if raid set in @rs is recovering */ |
371 | static bool rs_is_recovering(struct raid_set *rs) | 389 | static bool rs_is_recovering(struct raid_set *rs) |
372 | { | 390 | { |
373 | return rs->md.recovery_cp < rs->dev[0].rdev.sectors; | 391 | return rs->md.recovery_cp < rs->md.dev_sectors; |
374 | } | 392 | } |
375 | 393 | ||
376 | /* Return true, if raid set in @rs is reshaping */ | 394 | /* Return true, if raid set in @rs is reshaping */ |
@@ -627,7 +645,8 @@ static void rs_set_capacity(struct raid_set *rs) | |||
627 | * is unintended in case of out-of-place reshaping | 645 | * is unintended in case of out-of-place reshaping |
628 | */ | 646 | */ |
629 | rdev_for_each(rdev, mddev) | 647 | rdev_for_each(rdev, mddev) |
630 | rdev->sectors = mddev->dev_sectors; | 648 | if (!test_bit(Journal, &rdev->flags)) |
649 | rdev->sectors = mddev->dev_sectors; | ||
631 | 650 | ||
632 | set_capacity(gendisk, mddev->array_sectors); | 651 | set_capacity(gendisk, mddev->array_sectors); |
633 | revalidate_disk(gendisk); | 652 | revalidate_disk(gendisk); |
@@ -713,6 +732,11 @@ static void raid_set_free(struct raid_set *rs) | |||
713 | { | 732 | { |
714 | int i; | 733 | int i; |
715 | 734 | ||
735 | if (rs->journal_dev.dev) { | ||
736 | md_rdev_clear(&rs->journal_dev.rdev); | ||
737 | dm_put_device(rs->ti, rs->journal_dev.dev); | ||
738 | } | ||
739 | |||
716 | for (i = 0; i < rs->raid_disks; i++) { | 740 | for (i = 0; i < rs->raid_disks; i++) { |
717 | if (rs->dev[i].meta_dev) | 741 | if (rs->dev[i].meta_dev) |
718 | dm_put_device(rs->ti, rs->dev[i].meta_dev); | 742 | dm_put_device(rs->ti, rs->dev[i].meta_dev); |
@@ -760,10 +784,11 @@ static int parse_dev_params(struct raid_set *rs, struct dm_arg_set *as) | |||
760 | rs->dev[i].data_dev = NULL; | 784 | rs->dev[i].data_dev = NULL; |
761 | 785 | ||
762 | /* | 786 | /* |
763 | * There are no offsets, since there is a separate device | 787 | * There are no offsets initially. |
764 | * for data and metadata. | 788 | * Out of place reshape will set them accordingly. |
765 | */ | 789 | */ |
766 | rs->dev[i].rdev.data_offset = 0; | 790 | rs->dev[i].rdev.data_offset = 0; |
791 | rs->dev[i].rdev.new_data_offset = 0; | ||
767 | rs->dev[i].rdev.mddev = &rs->md; | 792 | rs->dev[i].rdev.mddev = &rs->md; |
768 | 793 | ||
769 | arg = dm_shift_arg(as); | 794 | arg = dm_shift_arg(as); |
@@ -821,6 +846,9 @@ static int parse_dev_params(struct raid_set *rs, struct dm_arg_set *as) | |||
821 | rebuild++; | 846 | rebuild++; |
822 | } | 847 | } |
823 | 848 | ||
849 | if (rs->journal_dev.dev) | ||
850 | list_add_tail(&rs->journal_dev.rdev.same_set, &rs->md.disks); | ||
851 | |||
824 | if (metadata_available) { | 852 | if (metadata_available) { |
825 | rs->md.external = 0; | 853 | rs->md.external = 0; |
826 | rs->md.persistent = 1; | 854 | rs->md.persistent = 1; |
@@ -1026,6 +1054,8 @@ too_many: | |||
1026 | * [max_write_behind <sectors>] See '-write-behind=' (man mdadm) | 1054 | * [max_write_behind <sectors>] See '-write-behind=' (man mdadm) |
1027 | * [stripe_cache <sectors>] Stripe cache size for higher RAIDs | 1055 | * [stripe_cache <sectors>] Stripe cache size for higher RAIDs |
1028 | * [region_size <sectors>] Defines granularity of bitmap | 1056 | * [region_size <sectors>] Defines granularity of bitmap |
1057 | * [journal_dev <dev>] raid4/5/6 journaling deviice | ||
1058 | * (i.e. write hole closing log) | ||
1029 | * | 1059 | * |
1030 | * RAID10-only options: | 1060 | * RAID10-only options: |
1031 | * [raid10_copies <# copies>] Number of copies. (Default: 2) | 1061 | * [raid10_copies <# copies>] Number of copies. (Default: 2) |
@@ -1133,7 +1163,7 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as, | |||
1133 | /* | 1163 | /* |
1134 | * Parameters that take a string value are checked here. | 1164 | * Parameters that take a string value are checked here. |
1135 | */ | 1165 | */ |
1136 | 1166 | /* "raid10_format {near|offset|far} */ | |
1137 | if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_RAID10_FORMAT))) { | 1167 | if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_RAID10_FORMAT))) { |
1138 | if (test_and_set_bit(__CTR_FLAG_RAID10_FORMAT, &rs->ctr_flags)) { | 1168 | if (test_and_set_bit(__CTR_FLAG_RAID10_FORMAT, &rs->ctr_flags)) { |
1139 | rs->ti->error = "Only one 'raid10_format' argument pair allowed"; | 1169 | rs->ti->error = "Only one 'raid10_format' argument pair allowed"; |
@@ -1151,6 +1181,41 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as, | |||
1151 | continue; | 1181 | continue; |
1152 | } | 1182 | } |
1153 | 1183 | ||
1184 | /* "journal_dev dev" */ | ||
1185 | if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_JOURNAL_DEV))) { | ||
1186 | int r; | ||
1187 | struct md_rdev *jdev; | ||
1188 | |||
1189 | if (test_and_set_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags)) { | ||
1190 | rs->ti->error = "Only one raid4/5/6 set journaling device allowed"; | ||
1191 | return -EINVAL; | ||
1192 | } | ||
1193 | if (!rt_is_raid456(rt)) { | ||
1194 | rs->ti->error = "'journal_dev' is an invalid parameter for this RAID type"; | ||
1195 | return -EINVAL; | ||
1196 | } | ||
1197 | r = dm_get_device(rs->ti, arg, dm_table_get_mode(rs->ti->table), | ||
1198 | &rs->journal_dev.dev); | ||
1199 | if (r) { | ||
1200 | rs->ti->error = "raid4/5/6 journal device lookup failure"; | ||
1201 | return r; | ||
1202 | } | ||
1203 | jdev = &rs->journal_dev.rdev; | ||
1204 | md_rdev_init(jdev); | ||
1205 | jdev->mddev = &rs->md; | ||
1206 | jdev->bdev = rs->journal_dev.dev->bdev; | ||
1207 | jdev->sectors = to_sector(i_size_read(jdev->bdev->bd_inode)); | ||
1208 | if (jdev->sectors < MIN_RAID456_JOURNAL_SPACE) { | ||
1209 | rs->ti->error = "No space for raid4/5/6 journal"; | ||
1210 | return -ENOSPC; | ||
1211 | } | ||
1212 | set_bit(Journal, &jdev->flags); | ||
1213 | continue; | ||
1214 | } | ||
1215 | |||
1216 | /* | ||
1217 | * Parameters with number values from here on. | ||
1218 | */ | ||
1154 | if (kstrtoint(arg, 10, &value) < 0) { | 1219 | if (kstrtoint(arg, 10, &value) < 0) { |
1155 | rs->ti->error = "Bad numerical argument given in raid params"; | 1220 | rs->ti->error = "Bad numerical argument given in raid params"; |
1156 | return -EINVAL; | 1221 | return -EINVAL; |
@@ -1425,6 +1490,25 @@ static unsigned int rs_data_stripes(struct raid_set *rs) | |||
1425 | return rs->raid_disks - rs->raid_type->parity_devs; | 1490 | return rs->raid_disks - rs->raid_type->parity_devs; |
1426 | } | 1491 | } |
1427 | 1492 | ||
1493 | /* | ||
1494 | * Retrieve rdev->sectors from any valid raid device of @rs | ||
1495 | * to allow userpace to pass in arbitray "- -" device tupples. | ||
1496 | */ | ||
1497 | static sector_t __rdev_sectors(struct raid_set *rs) | ||
1498 | { | ||
1499 | int i; | ||
1500 | |||
1501 | for (i = 0; i < rs->md.raid_disks; i++) { | ||
1502 | struct md_rdev *rdev = &rs->dev[i].rdev; | ||
1503 | |||
1504 | if (!test_bit(Journal, &rdev->flags) && | ||
1505 | rdev->bdev && rdev->sectors) | ||
1506 | return rdev->sectors; | ||
1507 | } | ||
1508 | |||
1509 | BUG(); /* Constructor ensures we got some. */ | ||
1510 | } | ||
1511 | |||
1428 | /* Calculate the sectors per device and per array used for @rs */ | 1512 | /* Calculate the sectors per device and per array used for @rs */ |
1429 | static int rs_set_dev_and_array_sectors(struct raid_set *rs, bool use_mddev) | 1513 | static int rs_set_dev_and_array_sectors(struct raid_set *rs, bool use_mddev) |
1430 | { | 1514 | { |
@@ -1468,7 +1552,8 @@ static int rs_set_dev_and_array_sectors(struct raid_set *rs, bool use_mddev) | |||
1468 | array_sectors = (data_stripes + delta_disks) * dev_sectors; | 1552 | array_sectors = (data_stripes + delta_disks) * dev_sectors; |
1469 | 1553 | ||
1470 | rdev_for_each(rdev, mddev) | 1554 | rdev_for_each(rdev, mddev) |
1471 | rdev->sectors = dev_sectors; | 1555 | if (!test_bit(Journal, &rdev->flags)) |
1556 | rdev->sectors = dev_sectors; | ||
1472 | 1557 | ||
1473 | mddev->array_sectors = array_sectors; | 1558 | mddev->array_sectors = array_sectors; |
1474 | mddev->dev_sectors = dev_sectors; | 1559 | mddev->dev_sectors = dev_sectors; |
@@ -1510,9 +1595,9 @@ static void rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors) | |||
1510 | else if (dev_sectors == MaxSector) | 1595 | else if (dev_sectors == MaxSector) |
1511 | /* Prevent recovery */ | 1596 | /* Prevent recovery */ |
1512 | __rs_setup_recovery(rs, MaxSector); | 1597 | __rs_setup_recovery(rs, MaxSector); |
1513 | else if (rs->dev[0].rdev.sectors < dev_sectors) | 1598 | else if (__rdev_sectors(rs) < dev_sectors) |
1514 | /* Grown raid set */ | 1599 | /* Grown raid set */ |
1515 | __rs_setup_recovery(rs, rs->dev[0].rdev.sectors); | 1600 | __rs_setup_recovery(rs, __rdev_sectors(rs)); |
1516 | else | 1601 | else |
1517 | __rs_setup_recovery(rs, MaxSector); | 1602 | __rs_setup_recovery(rs, MaxSector); |
1518 | } | 1603 | } |
@@ -1851,18 +1936,21 @@ static int rs_check_reshape(struct raid_set *rs) | |||
1851 | return -EPERM; | 1936 | return -EPERM; |
1852 | } | 1937 | } |
1853 | 1938 | ||
1854 | static int read_disk_sb(struct md_rdev *rdev, int size) | 1939 | static int read_disk_sb(struct md_rdev *rdev, int size, bool force_reload) |
1855 | { | 1940 | { |
1856 | BUG_ON(!rdev->sb_page); | 1941 | BUG_ON(!rdev->sb_page); |
1857 | 1942 | ||
1858 | if (rdev->sb_loaded) | 1943 | if (rdev->sb_loaded && !force_reload) |
1859 | return 0; | 1944 | return 0; |
1860 | 1945 | ||
1946 | rdev->sb_loaded = 0; | ||
1947 | |||
1861 | if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true)) { | 1948 | if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true)) { |
1862 | DMERR("Failed to read superblock of device at position %d", | 1949 | DMERR("Failed to read superblock of device at position %d", |
1863 | rdev->raid_disk); | 1950 | rdev->raid_disk); |
1864 | md_error(rdev->mddev, rdev); | 1951 | md_error(rdev->mddev, rdev); |
1865 | return -EINVAL; | 1952 | set_bit(Faulty, &rdev->flags); |
1953 | return -EIO; | ||
1866 | } | 1954 | } |
1867 | 1955 | ||
1868 | rdev->sb_loaded = 1; | 1956 | rdev->sb_loaded = 1; |
@@ -1990,7 +2078,7 @@ static int super_load(struct md_rdev *rdev, struct md_rdev *refdev) | |||
1990 | return -EINVAL; | 2078 | return -EINVAL; |
1991 | } | 2079 | } |
1992 | 2080 | ||
1993 | r = read_disk_sb(rdev, rdev->sb_size); | 2081 | r = read_disk_sb(rdev, rdev->sb_size, false); |
1994 | if (r) | 2082 | if (r) |
1995 | return r; | 2083 | return r; |
1996 | 2084 | ||
@@ -2146,6 +2234,9 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev) | |||
2146 | */ | 2234 | */ |
2147 | d = 0; | 2235 | d = 0; |
2148 | rdev_for_each(r, mddev) { | 2236 | rdev_for_each(r, mddev) { |
2237 | if (test_bit(Journal, &rdev->flags)) | ||
2238 | continue; | ||
2239 | |||
2149 | if (test_bit(FirstUse, &r->flags)) | 2240 | if (test_bit(FirstUse, &r->flags)) |
2150 | new_devs++; | 2241 | new_devs++; |
2151 | 2242 | ||
@@ -2201,7 +2292,8 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev) | |||
2201 | */ | 2292 | */ |
2202 | sb_retrieve_failed_devices(sb, failed_devices); | 2293 | sb_retrieve_failed_devices(sb, failed_devices); |
2203 | rdev_for_each(r, mddev) { | 2294 | rdev_for_each(r, mddev) { |
2204 | if (!r->sb_page) | 2295 | if (test_bit(Journal, &rdev->flags) || |
2296 | !r->sb_page) | ||
2205 | continue; | 2297 | continue; |
2206 | sb2 = page_address(r->sb_page); | 2298 | sb2 = page_address(r->sb_page); |
2207 | sb2->failed_devices = 0; | 2299 | sb2->failed_devices = 0; |
@@ -2253,7 +2345,7 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev) | |||
2253 | struct mddev *mddev = &rs->md; | 2345 | struct mddev *mddev = &rs->md; |
2254 | struct dm_raid_superblock *sb; | 2346 | struct dm_raid_superblock *sb; |
2255 | 2347 | ||
2256 | if (rs_is_raid0(rs) || !rdev->sb_page) | 2348 | if (rs_is_raid0(rs) || !rdev->sb_page || rdev->raid_disk < 0) |
2257 | return 0; | 2349 | return 0; |
2258 | 2350 | ||
2259 | sb = page_address(rdev->sb_page); | 2351 | sb = page_address(rdev->sb_page); |
@@ -2278,7 +2370,7 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev) | |||
2278 | 2370 | ||
2279 | /* Enable bitmap creation for RAID levels != 0 */ | 2371 | /* Enable bitmap creation for RAID levels != 0 */ |
2280 | mddev->bitmap_info.offset = rt_is_raid0(rs->raid_type) ? 0 : to_sector(4096); | 2372 | mddev->bitmap_info.offset = rt_is_raid0(rs->raid_type) ? 0 : to_sector(4096); |
2281 | rdev->mddev->bitmap_info.default_offset = mddev->bitmap_info.offset; | 2373 | mddev->bitmap_info.default_offset = mddev->bitmap_info.offset; |
2282 | 2374 | ||
2283 | if (!test_and_clear_bit(FirstUse, &rdev->flags)) { | 2375 | if (!test_and_clear_bit(FirstUse, &rdev->flags)) { |
2284 | /* Retrieve device size stored in superblock to be prepared for shrink */ | 2376 | /* Retrieve device size stored in superblock to be prepared for shrink */ |
@@ -2316,21 +2408,22 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev) | |||
2316 | static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) | 2408 | static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) |
2317 | { | 2409 | { |
2318 | int r; | 2410 | int r; |
2319 | struct raid_dev *dev; | 2411 | struct md_rdev *rdev, *freshest; |
2320 | struct md_rdev *rdev, *tmp, *freshest; | ||
2321 | struct mddev *mddev = &rs->md; | 2412 | struct mddev *mddev = &rs->md; |
2322 | 2413 | ||
2323 | freshest = NULL; | 2414 | freshest = NULL; |
2324 | rdev_for_each_safe(rdev, tmp, mddev) { | 2415 | rdev_for_each(rdev, mddev) { |
2416 | if (test_bit(Journal, &rdev->flags)) | ||
2417 | continue; | ||
2418 | |||
2325 | /* | 2419 | /* |
2326 | * Skipping super_load due to CTR_FLAG_SYNC will cause | 2420 | * Skipping super_load due to CTR_FLAG_SYNC will cause |
2327 | * the array to undergo initialization again as | 2421 | * the array to undergo initialization again as |
2328 | * though it were new. This is the intended effect | 2422 | * though it were new. This is the intended effect |
2329 | * of the "sync" directive. | 2423 | * of the "sync" directive. |
2330 | * | 2424 | * |
2331 | * When reshaping capability is added, we must ensure | 2425 | * With reshaping capability added, we must ensure that |
2332 | * that the "sync" directive is disallowed during the | 2426 | * that the "sync" directive is disallowed during the reshape. |
2333 | * reshape. | ||
2334 | */ | 2427 | */ |
2335 | if (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags)) | 2428 | if (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags)) |
2336 | continue; | 2429 | continue; |
@@ -2347,6 +2440,7 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) | |||
2347 | case 0: | 2440 | case 0: |
2348 | break; | 2441 | break; |
2349 | default: | 2442 | default: |
2443 | /* This is a failure to read the superblock from the metadata device. */ | ||
2350 | /* | 2444 | /* |
2351 | * We have to keep any raid0 data/metadata device pairs or | 2445 | * We have to keep any raid0 data/metadata device pairs or |
2352 | * the MD raid0 personality will fail to start the array. | 2446 | * the MD raid0 personality will fail to start the array. |
@@ -2354,33 +2448,16 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) | |||
2354 | if (rs_is_raid0(rs)) | 2448 | if (rs_is_raid0(rs)) |
2355 | continue; | 2449 | continue; |
2356 | 2450 | ||
2357 | dev = container_of(rdev, struct raid_dev, rdev); | ||
2358 | if (dev->meta_dev) | ||
2359 | dm_put_device(ti, dev->meta_dev); | ||
2360 | |||
2361 | dev->meta_dev = NULL; | ||
2362 | rdev->meta_bdev = NULL; | ||
2363 | |||
2364 | if (rdev->sb_page) | ||
2365 | put_page(rdev->sb_page); | ||
2366 | |||
2367 | rdev->sb_page = NULL; | ||
2368 | |||
2369 | rdev->sb_loaded = 0; | ||
2370 | |||
2371 | /* | 2451 | /* |
2372 | * We might be able to salvage the data device | 2452 | * We keep the dm_devs to be able to emit the device tuple |
2373 | * even though the meta device has failed. For | 2453 | * properly on the table line in raid_status() (rather than |
2374 | * now, we behave as though '- -' had been | 2454 | * mistakenly acting as if '- -' got passed into the constructor). |
2375 | * set for this device in the table. | 2455 | * |
2456 | * The rdev has to stay on the same_set list to allow for | ||
2457 | * the attempt to restore faulty devices on second resume. | ||
2376 | */ | 2458 | */ |
2377 | if (dev->data_dev) | 2459 | rdev->raid_disk = rdev->saved_raid_disk = -1; |
2378 | dm_put_device(ti, dev->data_dev); | 2460 | break; |
2379 | |||
2380 | dev->data_dev = NULL; | ||
2381 | rdev->bdev = NULL; | ||
2382 | |||
2383 | list_del(&rdev->same_set); | ||
2384 | } | 2461 | } |
2385 | } | 2462 | } |
2386 | 2463 | ||
@@ -2401,7 +2478,9 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) | |||
2401 | return -EINVAL; | 2478 | return -EINVAL; |
2402 | 2479 | ||
2403 | rdev_for_each(rdev, mddev) | 2480 | rdev_for_each(rdev, mddev) |
2404 | if ((rdev != freshest) && super_validate(rs, rdev)) | 2481 | if (!test_bit(Journal, &rdev->flags) && |
2482 | rdev != freshest && | ||
2483 | super_validate(rs, rdev)) | ||
2405 | return -EINVAL; | 2484 | return -EINVAL; |
2406 | return 0; | 2485 | return 0; |
2407 | } | 2486 | } |
@@ -2488,10 +2567,12 @@ static int rs_adjust_data_offsets(struct raid_set *rs) | |||
2488 | return -ENOSPC; | 2567 | return -ENOSPC; |
2489 | } | 2568 | } |
2490 | out: | 2569 | out: |
2491 | /* Adjust data offsets on all rdevs */ | 2570 | /* Adjust data offsets on all rdevs but on any raid4/5/6 journal device */ |
2492 | rdev_for_each(rdev, &rs->md) { | 2571 | rdev_for_each(rdev, &rs->md) { |
2493 | rdev->data_offset = data_offset; | 2572 | if (!test_bit(Journal, &rdev->flags)) { |
2494 | rdev->new_data_offset = new_data_offset; | 2573 | rdev->data_offset = data_offset; |
2574 | rdev->new_data_offset = new_data_offset; | ||
2575 | } | ||
2495 | } | 2576 | } |
2496 | 2577 | ||
2497 | return 0; | 2578 | return 0; |
@@ -2504,8 +2585,10 @@ static void __reorder_raid_disk_indexes(struct raid_set *rs) | |||
2504 | struct md_rdev *rdev; | 2585 | struct md_rdev *rdev; |
2505 | 2586 | ||
2506 | rdev_for_each(rdev, &rs->md) { | 2587 | rdev_for_each(rdev, &rs->md) { |
2507 | rdev->raid_disk = i++; | 2588 | if (!test_bit(Journal, &rdev->flags)) { |
2508 | rdev->saved_raid_disk = rdev->new_raid_disk = -1; | 2589 | rdev->raid_disk = i++; |
2590 | rdev->saved_raid_disk = rdev->new_raid_disk = -1; | ||
2591 | } | ||
2509 | } | 2592 | } |
2510 | } | 2593 | } |
2511 | 2594 | ||
@@ -2845,7 +2928,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
2845 | if (r) | 2928 | if (r) |
2846 | goto bad; | 2929 | goto bad; |
2847 | 2930 | ||
2848 | calculated_dev_sectors = rs->dev[0].rdev.sectors; | 2931 | calculated_dev_sectors = rs->md.dev_sectors; |
2849 | 2932 | ||
2850 | /* | 2933 | /* |
2851 | * Backup any new raid set level, layout, ... | 2934 | * Backup any new raid set level, layout, ... |
@@ -2858,7 +2941,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
2858 | if (r) | 2941 | if (r) |
2859 | goto bad; | 2942 | goto bad; |
2860 | 2943 | ||
2861 | resize = calculated_dev_sectors != rs->dev[0].rdev.sectors; | 2944 | resize = calculated_dev_sectors != __rdev_sectors(rs); |
2862 | 2945 | ||
2863 | INIT_WORK(&rs->md.event_work, do_table_event); | 2946 | INIT_WORK(&rs->md.event_work, do_table_event); |
2864 | ti->private = rs; | 2947 | ti->private = rs; |
@@ -2902,6 +2985,13 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
2902 | goto bad; | 2985 | goto bad; |
2903 | } | 2986 | } |
2904 | 2987 | ||
2988 | /* We can't takeover a journaled raid4/5/6 */ | ||
2989 | if (test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags)) { | ||
2990 | ti->error = "Can't takeover a journaled raid4/5/6 set"; | ||
2991 | r = -EPERM; | ||
2992 | goto bad; | ||
2993 | } | ||
2994 | |||
2905 | /* | 2995 | /* |
2906 | * If a takeover is needed, userspace sets any additional | 2996 | * If a takeover is needed, userspace sets any additional |
2907 | * devices to rebuild and we can check for a valid request here. | 2997 | * devices to rebuild and we can check for a valid request here. |
@@ -2924,6 +3014,18 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
2924 | rs_set_new(rs); | 3014 | rs_set_new(rs); |
2925 | } else if (rs_reshape_requested(rs)) { | 3015 | } else if (rs_reshape_requested(rs)) { |
2926 | /* | 3016 | /* |
3017 | * No need to check for 'ongoing' takeover here, because takeover | ||
3018 | * is an instant operation as oposed to an ongoing reshape. | ||
3019 | */ | ||
3020 | |||
3021 | /* We can't reshape a journaled raid4/5/6 */ | ||
3022 | if (test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags)) { | ||
3023 | ti->error = "Can't reshape a journaled raid4/5/6 set"; | ||
3024 | r = -EPERM; | ||
3025 | goto bad; | ||
3026 | } | ||
3027 | |||
3028 | /* | ||
2927 | * We can only prepare for a reshape here, because the | 3029 | * We can only prepare for a reshape here, because the |
2928 | * raid set needs to run to provide the repective reshape | 3030 | * raid set needs to run to provide the repective reshape |
2929 | * check functions via its MD personality instance. | 3031 | * check functions via its MD personality instance. |
@@ -3071,18 +3173,23 @@ static const char *decipher_sync_action(struct mddev *mddev) | |||
3071 | } | 3173 | } |
3072 | 3174 | ||
3073 | /* | 3175 | /* |
3074 | * Return status string @rdev | 3176 | * Return status string for @rdev |
3075 | * | 3177 | * |
3076 | * Status characters: | 3178 | * Status characters: |
3077 | * | 3179 | * |
3078 | * 'D' = Dead/Failed device | 3180 | * 'D' = Dead/Failed raid set component or raid4/5/6 journal device |
3079 | * 'a' = Alive but not in-sync | 3181 | * 'a' = Alive but not in-sync |
3080 | * 'A' = Alive and in-sync | 3182 | * 'A' = Alive and in-sync raid set component or alive raid4/5/6 journal device |
3183 | * '-' = Non-existing device (i.e. uspace passed '- -' into the ctr) | ||
3081 | */ | 3184 | */ |
3082 | static const char *__raid_dev_status(struct md_rdev *rdev, bool array_in_sync) | 3185 | static const char *__raid_dev_status(struct md_rdev *rdev, bool array_in_sync) |
3083 | { | 3186 | { |
3084 | if (test_bit(Faulty, &rdev->flags)) | 3187 | if (!rdev->bdev) |
3188 | return "-"; | ||
3189 | else if (test_bit(Faulty, &rdev->flags)) | ||
3085 | return "D"; | 3190 | return "D"; |
3191 | else if (test_bit(Journal, &rdev->flags)) | ||
3192 | return "A"; | ||
3086 | else if (!array_in_sync || !test_bit(In_sync, &rdev->flags)) | 3193 | else if (!array_in_sync || !test_bit(In_sync, &rdev->flags)) |
3087 | return "a"; | 3194 | return "a"; |
3088 | else | 3195 | else |
@@ -3151,7 +3258,8 @@ static sector_t rs_get_progress(struct raid_set *rs, | |||
3151 | * being initialized. | 3258 | * being initialized. |
3152 | */ | 3259 | */ |
3153 | rdev_for_each(rdev, mddev) | 3260 | rdev_for_each(rdev, mddev) |
3154 | if (!test_bit(In_sync, &rdev->flags)) | 3261 | if (!test_bit(Journal, &rdev->flags) && |
3262 | !test_bit(In_sync, &rdev->flags)) | ||
3155 | *array_in_sync = true; | 3263 | *array_in_sync = true; |
3156 | #if 0 | 3264 | #if 0 |
3157 | r = 0; /* HM FIXME: TESTME: https://bugzilla.redhat.com/show_bug.cgi?id=1210637 ? */ | 3265 | r = 0; /* HM FIXME: TESTME: https://bugzilla.redhat.com/show_bug.cgi?id=1210637 ? */ |
@@ -3183,7 +3291,6 @@ static void raid_status(struct dm_target *ti, status_type_t type, | |||
3183 | sector_t progress, resync_max_sectors, resync_mismatches; | 3291 | sector_t progress, resync_max_sectors, resync_mismatches; |
3184 | const char *sync_action; | 3292 | const char *sync_action; |
3185 | struct raid_type *rt; | 3293 | struct raid_type *rt; |
3186 | struct md_rdev *rdev; | ||
3187 | 3294 | ||
3188 | switch (type) { | 3295 | switch (type) { |
3189 | case STATUSTYPE_INFO: | 3296 | case STATUSTYPE_INFO: |
@@ -3204,9 +3311,9 @@ static void raid_status(struct dm_target *ti, status_type_t type, | |||
3204 | atomic64_read(&mddev->resync_mismatches) : 0; | 3311 | atomic64_read(&mddev->resync_mismatches) : 0; |
3205 | sync_action = decipher_sync_action(&rs->md); | 3312 | sync_action = decipher_sync_action(&rs->md); |
3206 | 3313 | ||
3207 | /* HM FIXME: do we want another state char for raid0? It shows 'D' or 'A' now */ | 3314 | /* HM FIXME: do we want another state char for raid0? It shows 'D'/'A'/'-' now */ |
3208 | rdev_for_each(rdev, mddev) | 3315 | for (i = 0; i < rs->raid_disks; i++) |
3209 | DMEMIT(__raid_dev_status(rdev, array_in_sync)); | 3316 | DMEMIT(__raid_dev_status(&rs->dev[i].rdev, array_in_sync)); |
3210 | 3317 | ||
3211 | /* | 3318 | /* |
3212 | * In-sync/Reshape ratio: | 3319 | * In-sync/Reshape ratio: |
@@ -3252,6 +3359,12 @@ static void raid_status(struct dm_target *ti, status_type_t type, | |||
3252 | * so retrieving it from the first raid disk is sufficient. | 3359 | * so retrieving it from the first raid disk is sufficient. |
3253 | */ | 3360 | */ |
3254 | DMEMIT(" %llu", (unsigned long long) rs->dev[0].rdev.data_offset); | 3361 | DMEMIT(" %llu", (unsigned long long) rs->dev[0].rdev.data_offset); |
3362 | |||
3363 | /* | ||
3364 | * v1.10.0+: | ||
3365 | */ | ||
3366 | DMEMIT(" %s", test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags) ? | ||
3367 | __raid_dev_status(&rs->journal_dev.rdev, 0) : "-"); | ||
3255 | break; | 3368 | break; |
3256 | 3369 | ||
3257 | case STATUSTYPE_TABLE: | 3370 | case STATUSTYPE_TABLE: |
@@ -3265,7 +3378,8 @@ static void raid_status(struct dm_target *ti, status_type_t type, | |||
3265 | raid_param_cnt += rebuild_disks * 2 + | 3378 | raid_param_cnt += rebuild_disks * 2 + |
3266 | write_mostly_params + | 3379 | write_mostly_params + |
3267 | hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_NO_ARGS) + | 3380 | hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_NO_ARGS) + |
3268 | hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_ONE_ARG) * 2; | 3381 | hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_ONE_ARG) * 2 + |
3382 | (test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags) ? 2 : 0); | ||
3269 | /* Emit table line */ | 3383 | /* Emit table line */ |
3270 | DMEMIT("%s %u %u", rs->raid_type->name, raid_param_cnt, mddev->new_chunk_sectors); | 3384 | DMEMIT("%s %u %u", rs->raid_type->name, raid_param_cnt, mddev->new_chunk_sectors); |
3271 | if (test_bit(__CTR_FLAG_RAID10_FORMAT, &rs->ctr_flags)) | 3385 | if (test_bit(__CTR_FLAG_RAID10_FORMAT, &rs->ctr_flags)) |
@@ -3312,6 +3426,9 @@ static void raid_status(struct dm_target *ti, status_type_t type, | |||
3312 | if (test_bit(__CTR_FLAG_MIN_RECOVERY_RATE, &rs->ctr_flags)) | 3426 | if (test_bit(__CTR_FLAG_MIN_RECOVERY_RATE, &rs->ctr_flags)) |
3313 | DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_MIN_RECOVERY_RATE), | 3427 | DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_MIN_RECOVERY_RATE), |
3314 | mddev->sync_speed_min); | 3428 | mddev->sync_speed_min); |
3429 | if (test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags)) | ||
3430 | DMEMIT(" %s %s", dm_raid_arg_name_by_flag(CTR_FLAG_JOURNAL_DEV), | ||
3431 | __get_dev_name(rs->journal_dev.dev)); | ||
3315 | DMEMIT(" %d", rs->raid_disks); | 3432 | DMEMIT(" %d", rs->raid_disks); |
3316 | for (i = 0; i < rs->raid_disks; i++) | 3433 | for (i = 0; i < rs->raid_disks; i++) |
3317 | DMEMIT(" %s %s", __get_dev_name(rs->dev[i].meta_dev), | 3434 | DMEMIT(" %s %s", __get_dev_name(rs->dev[i].meta_dev), |
@@ -3347,10 +3464,11 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv) | |||
3347 | else { | 3464 | else { |
3348 | if (!strcasecmp(argv[0], "check")) | 3465 | if (!strcasecmp(argv[0], "check")) |
3349 | set_bit(MD_RECOVERY_CHECK, &mddev->recovery); | 3466 | set_bit(MD_RECOVERY_CHECK, &mddev->recovery); |
3350 | else if (!!strcasecmp(argv[0], "repair")) | 3467 | else if (!strcasecmp(argv[0], "repair")) { |
3468 | set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); | ||
3469 | set_bit(MD_RECOVERY_SYNC, &mddev->recovery); | ||
3470 | } else | ||
3351 | return -EINVAL; | 3471 | return -EINVAL; |
3352 | set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); | ||
3353 | set_bit(MD_RECOVERY_SYNC, &mddev->recovery); | ||
3354 | } | 3472 | } |
3355 | if (mddev->ro == 2) { | 3473 | if (mddev->ro == 2) { |
3356 | /* A write to sync_action is enough to justify | 3474 | /* A write to sync_action is enough to justify |
@@ -3427,11 +3545,14 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs) | |||
3427 | 3545 | ||
3428 | memset(cleared_failed_devices, 0, sizeof(cleared_failed_devices)); | 3546 | memset(cleared_failed_devices, 0, sizeof(cleared_failed_devices)); |
3429 | 3547 | ||
3430 | for (i = 0; i < rs->md.raid_disks; i++) { | 3548 | for (i = 0; i < mddev->raid_disks; i++) { |
3431 | r = &rs->dev[i].rdev; | 3549 | r = &rs->dev[i].rdev; |
3432 | if (test_bit(Faulty, &r->flags) && r->sb_page && | 3550 | /* HM FIXME: enhance journal device recovery processing */ |
3433 | sync_page_io(r, 0, r->sb_size, r->sb_page, | 3551 | if (test_bit(Journal, &r->flags)) |
3434 | REQ_OP_READ, 0, true)) { | 3552 | continue; |
3553 | |||
3554 | if (test_bit(Faulty, &r->flags) && | ||
3555 | r->meta_bdev && !read_disk_sb(r, r->sb_size, true)) { | ||
3435 | DMINFO("Faulty %s device #%d has readable super block." | 3556 | DMINFO("Faulty %s device #%d has readable super block." |
3436 | " Attempting to revive it.", | 3557 | " Attempting to revive it.", |
3437 | rs->raid_type->name, i); | 3558 | rs->raid_type->name, i); |
@@ -3445,22 +3566,26 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs) | |||
3445 | * '>= 0' - meaning we must call this function | 3566 | * '>= 0' - meaning we must call this function |
3446 | * ourselves. | 3567 | * ourselves. |
3447 | */ | 3568 | */ |
3448 | if ((r->raid_disk >= 0) && | ||
3449 | (mddev->pers->hot_remove_disk(mddev, r) != 0)) | ||
3450 | /* Failed to revive this device, try next */ | ||
3451 | continue; | ||
3452 | |||
3453 | r->raid_disk = i; | ||
3454 | r->saved_raid_disk = i; | ||
3455 | flags = r->flags; | 3569 | flags = r->flags; |
3570 | clear_bit(In_sync, &r->flags); /* Mandatory for hot remove. */ | ||
3571 | if (r->raid_disk >= 0) { | ||
3572 | if (mddev->pers->hot_remove_disk(mddev, r)) { | ||
3573 | /* Failed to revive this device, try next */ | ||
3574 | r->flags = flags; | ||
3575 | continue; | ||
3576 | } | ||
3577 | } else | ||
3578 | r->raid_disk = r->saved_raid_disk = i; | ||
3579 | |||
3456 | clear_bit(Faulty, &r->flags); | 3580 | clear_bit(Faulty, &r->flags); |
3457 | clear_bit(WriteErrorSeen, &r->flags); | 3581 | clear_bit(WriteErrorSeen, &r->flags); |
3458 | clear_bit(In_sync, &r->flags); | 3582 | |
3459 | if (mddev->pers->hot_add_disk(mddev, r)) { | 3583 | if (mddev->pers->hot_add_disk(mddev, r)) { |
3460 | r->raid_disk = -1; | 3584 | /* Failed to revive this device, try next */ |
3461 | r->saved_raid_disk = -1; | 3585 | r->raid_disk = r->saved_raid_disk = -1; |
3462 | r->flags = flags; | 3586 | r->flags = flags; |
3463 | } else { | 3587 | } else { |
3588 | clear_bit(In_sync, &r->flags); | ||
3464 | r->recovery_offset = 0; | 3589 | r->recovery_offset = 0; |
3465 | set_bit(i, (void *) cleared_failed_devices); | 3590 | set_bit(i, (void *) cleared_failed_devices); |
3466 | cleared = true; | 3591 | cleared = true; |
@@ -3473,6 +3598,9 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs) | |||
3473 | uint64_t failed_devices[DISKS_ARRAY_ELEMS]; | 3598 | uint64_t failed_devices[DISKS_ARRAY_ELEMS]; |
3474 | 3599 | ||
3475 | rdev_for_each(r, &rs->md) { | 3600 | rdev_for_each(r, &rs->md) { |
3601 | if (test_bit(Journal, &r->flags)) | ||
3602 | continue; | ||
3603 | |||
3476 | sb = page_address(r->sb_page); | 3604 | sb = page_address(r->sb_page); |
3477 | sb_retrieve_failed_devices(sb, failed_devices); | 3605 | sb_retrieve_failed_devices(sb, failed_devices); |
3478 | 3606 | ||
@@ -3651,7 +3779,7 @@ static void raid_resume(struct dm_target *ti) | |||
3651 | 3779 | ||
3652 | static struct target_type raid_target = { | 3780 | static struct target_type raid_target = { |
3653 | .name = "raid", | 3781 | .name = "raid", |
3654 | .version = {1, 9, 1}, | 3782 | .version = {1, 10, 0}, |
3655 | .module = THIS_MODULE, | 3783 | .module = THIS_MODULE, |
3656 | .ctr = raid_ctr, | 3784 | .ctr = raid_ctr, |
3657 | .dtr = raid_dtr, | 3785 | .dtr = raid_dtr, |
diff --git a/drivers/md/dm-round-robin.c b/drivers/md/dm-round-robin.c index 6c25213ab38c..bdbb7e6e8212 100644 --- a/drivers/md/dm-round-robin.c +++ b/drivers/md/dm-round-robin.c | |||
@@ -17,8 +17,8 @@ | |||
17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
18 | 18 | ||
19 | #define DM_MSG_PREFIX "multipath round-robin" | 19 | #define DM_MSG_PREFIX "multipath round-robin" |
20 | #define RR_MIN_IO 1000 | 20 | #define RR_MIN_IO 1 |
21 | #define RR_VERSION "1.1.0" | 21 | #define RR_VERSION "1.2.0" |
22 | 22 | ||
23 | /*----------------------------------------------------------------- | 23 | /*----------------------------------------------------------------- |
24 | * Path-handling code, paths are held in lists | 24 | * Path-handling code, paths are held in lists |
@@ -47,44 +47,19 @@ struct selector { | |||
47 | struct list_head valid_paths; | 47 | struct list_head valid_paths; |
48 | struct list_head invalid_paths; | 48 | struct list_head invalid_paths; |
49 | spinlock_t lock; | 49 | spinlock_t lock; |
50 | struct dm_path * __percpu *current_path; | ||
51 | struct percpu_counter repeat_count; | ||
52 | }; | 50 | }; |
53 | 51 | ||
54 | static void set_percpu_current_path(struct selector *s, struct dm_path *path) | ||
55 | { | ||
56 | int cpu; | ||
57 | |||
58 | for_each_possible_cpu(cpu) | ||
59 | *per_cpu_ptr(s->current_path, cpu) = path; | ||
60 | } | ||
61 | |||
62 | static struct selector *alloc_selector(void) | 52 | static struct selector *alloc_selector(void) |
63 | { | 53 | { |
64 | struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL); | 54 | struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL); |
65 | 55 | ||
66 | if (!s) | 56 | if (s) { |
67 | return NULL; | 57 | INIT_LIST_HEAD(&s->valid_paths); |
68 | 58 | INIT_LIST_HEAD(&s->invalid_paths); | |
69 | INIT_LIST_HEAD(&s->valid_paths); | 59 | spin_lock_init(&s->lock); |
70 | INIT_LIST_HEAD(&s->invalid_paths); | 60 | } |
71 | spin_lock_init(&s->lock); | ||
72 | |||
73 | s->current_path = alloc_percpu(struct dm_path *); | ||
74 | if (!s->current_path) | ||
75 | goto out_current_path; | ||
76 | set_percpu_current_path(s, NULL); | ||
77 | |||
78 | if (percpu_counter_init(&s->repeat_count, 0, GFP_KERNEL)) | ||
79 | goto out_repeat_count; | ||
80 | 61 | ||
81 | return s; | 62 | return s; |
82 | |||
83 | out_repeat_count: | ||
84 | free_percpu(s->current_path); | ||
85 | out_current_path: | ||
86 | kfree(s); | ||
87 | return NULL;; | ||
88 | } | 63 | } |
89 | 64 | ||
90 | static int rr_create(struct path_selector *ps, unsigned argc, char **argv) | 65 | static int rr_create(struct path_selector *ps, unsigned argc, char **argv) |
@@ -105,8 +80,6 @@ static void rr_destroy(struct path_selector *ps) | |||
105 | 80 | ||
106 | free_paths(&s->valid_paths); | 81 | free_paths(&s->valid_paths); |
107 | free_paths(&s->invalid_paths); | 82 | free_paths(&s->invalid_paths); |
108 | free_percpu(s->current_path); | ||
109 | percpu_counter_destroy(&s->repeat_count); | ||
110 | kfree(s); | 83 | kfree(s); |
111 | ps->context = NULL; | 84 | ps->context = NULL; |
112 | } | 85 | } |
@@ -157,6 +130,11 @@ static int rr_add_path(struct path_selector *ps, struct dm_path *path, | |||
157 | return -EINVAL; | 130 | return -EINVAL; |
158 | } | 131 | } |
159 | 132 | ||
133 | if (repeat_count > 1) { | ||
134 | DMWARN_LIMIT("repeat_count > 1 is deprecated, using 1 instead"); | ||
135 | repeat_count = 1; | ||
136 | } | ||
137 | |||
160 | /* allocate the path */ | 138 | /* allocate the path */ |
161 | pi = kmalloc(sizeof(*pi), GFP_KERNEL); | 139 | pi = kmalloc(sizeof(*pi), GFP_KERNEL); |
162 | if (!pi) { | 140 | if (!pi) { |
@@ -183,9 +161,6 @@ static void rr_fail_path(struct path_selector *ps, struct dm_path *p) | |||
183 | struct path_info *pi = p->pscontext; | 161 | struct path_info *pi = p->pscontext; |
184 | 162 | ||
185 | spin_lock_irqsave(&s->lock, flags); | 163 | spin_lock_irqsave(&s->lock, flags); |
186 | if (p == *this_cpu_ptr(s->current_path)) | ||
187 | set_percpu_current_path(s, NULL); | ||
188 | |||
189 | list_move(&pi->list, &s->invalid_paths); | 164 | list_move(&pi->list, &s->invalid_paths); |
190 | spin_unlock_irqrestore(&s->lock, flags); | 165 | spin_unlock_irqrestore(&s->lock, flags); |
191 | } | 166 | } |
@@ -208,29 +183,15 @@ static struct dm_path *rr_select_path(struct path_selector *ps, size_t nr_bytes) | |||
208 | unsigned long flags; | 183 | unsigned long flags; |
209 | struct selector *s = ps->context; | 184 | struct selector *s = ps->context; |
210 | struct path_info *pi = NULL; | 185 | struct path_info *pi = NULL; |
211 | struct dm_path *current_path = NULL; | ||
212 | |||
213 | local_irq_save(flags); | ||
214 | current_path = *this_cpu_ptr(s->current_path); | ||
215 | if (current_path) { | ||
216 | percpu_counter_dec(&s->repeat_count); | ||
217 | if (percpu_counter_read_positive(&s->repeat_count) > 0) { | ||
218 | local_irq_restore(flags); | ||
219 | return current_path; | ||
220 | } | ||
221 | } | ||
222 | 186 | ||
223 | spin_lock(&s->lock); | 187 | spin_lock_irqsave(&s->lock, flags); |
224 | if (!list_empty(&s->valid_paths)) { | 188 | if (!list_empty(&s->valid_paths)) { |
225 | pi = list_entry(s->valid_paths.next, struct path_info, list); | 189 | pi = list_entry(s->valid_paths.next, struct path_info, list); |
226 | list_move_tail(&pi->list, &s->valid_paths); | 190 | list_move_tail(&pi->list, &s->valid_paths); |
227 | percpu_counter_set(&s->repeat_count, pi->repeat_count); | ||
228 | set_percpu_current_path(s, pi->path); | ||
229 | current_path = pi->path; | ||
230 | } | 191 | } |
231 | spin_unlock_irqrestore(&s->lock, flags); | 192 | spin_unlock_irqrestore(&s->lock, flags); |
232 | 193 | ||
233 | return current_path; | 194 | return pi ? pi->path : NULL; |
234 | } | 195 | } |
235 | 196 | ||
236 | static struct path_selector_type rr_ps = { | 197 | static struct path_selector_type rr_ps = { |
diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c index 38b05f23b96c..0250e7e521ab 100644 --- a/drivers/md/dm-stats.c +++ b/drivers/md/dm-stats.c | |||
@@ -175,6 +175,7 @@ static void dm_stat_free(struct rcu_head *head) | |||
175 | int cpu; | 175 | int cpu; |
176 | struct dm_stat *s = container_of(head, struct dm_stat, rcu_head); | 176 | struct dm_stat *s = container_of(head, struct dm_stat, rcu_head); |
177 | 177 | ||
178 | kfree(s->histogram_boundaries); | ||
178 | kfree(s->program_id); | 179 | kfree(s->program_id); |
179 | kfree(s->aux_data); | 180 | kfree(s->aux_data); |
180 | for_each_possible_cpu(cpu) { | 181 | for_each_possible_cpu(cpu) { |
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 5bd9ab06a562..9f37d7fc2786 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -974,10 +974,61 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors) | |||
974 | } | 974 | } |
975 | EXPORT_SYMBOL_GPL(dm_accept_partial_bio); | 975 | EXPORT_SYMBOL_GPL(dm_accept_partial_bio); |
976 | 976 | ||
977 | /* | ||
978 | * Flush current->bio_list when the target map method blocks. | ||
979 | * This fixes deadlocks in snapshot and possibly in other targets. | ||
980 | */ | ||
981 | struct dm_offload { | ||
982 | struct blk_plug plug; | ||
983 | struct blk_plug_cb cb; | ||
984 | }; | ||
985 | |||
986 | static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule) | ||
987 | { | ||
988 | struct dm_offload *o = container_of(cb, struct dm_offload, cb); | ||
989 | struct bio_list list; | ||
990 | struct bio *bio; | ||
991 | |||
992 | INIT_LIST_HEAD(&o->cb.list); | ||
993 | |||
994 | if (unlikely(!current->bio_list)) | ||
995 | return; | ||
996 | |||
997 | list = *current->bio_list; | ||
998 | bio_list_init(current->bio_list); | ||
999 | |||
1000 | while ((bio = bio_list_pop(&list))) { | ||
1001 | struct bio_set *bs = bio->bi_pool; | ||
1002 | if (unlikely(!bs) || bs == fs_bio_set) { | ||
1003 | bio_list_add(current->bio_list, bio); | ||
1004 | continue; | ||
1005 | } | ||
1006 | |||
1007 | spin_lock(&bs->rescue_lock); | ||
1008 | bio_list_add(&bs->rescue_list, bio); | ||
1009 | queue_work(bs->rescue_workqueue, &bs->rescue_work); | ||
1010 | spin_unlock(&bs->rescue_lock); | ||
1011 | } | ||
1012 | } | ||
1013 | |||
1014 | static void dm_offload_start(struct dm_offload *o) | ||
1015 | { | ||
1016 | blk_start_plug(&o->plug); | ||
1017 | o->cb.callback = flush_current_bio_list; | ||
1018 | list_add(&o->cb.list, ¤t->plug->cb_list); | ||
1019 | } | ||
1020 | |||
1021 | static void dm_offload_end(struct dm_offload *o) | ||
1022 | { | ||
1023 | list_del(&o->cb.list); | ||
1024 | blk_finish_plug(&o->plug); | ||
1025 | } | ||
1026 | |||
977 | static void __map_bio(struct dm_target_io *tio) | 1027 | static void __map_bio(struct dm_target_io *tio) |
978 | { | 1028 | { |
979 | int r; | 1029 | int r; |
980 | sector_t sector; | 1030 | sector_t sector; |
1031 | struct dm_offload o; | ||
981 | struct bio *clone = &tio->clone; | 1032 | struct bio *clone = &tio->clone; |
982 | struct dm_target *ti = tio->ti; | 1033 | struct dm_target *ti = tio->ti; |
983 | 1034 | ||
@@ -990,7 +1041,11 @@ static void __map_bio(struct dm_target_io *tio) | |||
990 | */ | 1041 | */ |
991 | atomic_inc(&tio->io->io_count); | 1042 | atomic_inc(&tio->io->io_count); |
992 | sector = clone->bi_iter.bi_sector; | 1043 | sector = clone->bi_iter.bi_sector; |
1044 | |||
1045 | dm_offload_start(&o); | ||
993 | r = ti->type->map(ti, clone); | 1046 | r = ti->type->map(ti, clone); |
1047 | dm_offload_end(&o); | ||
1048 | |||
994 | if (r == DM_MAPIO_REMAPPED) { | 1049 | if (r == DM_MAPIO_REMAPPED) { |
995 | /* the bio has been remapped so dispatch it */ | 1050 | /* the bio has been remapped so dispatch it */ |
996 | 1051 | ||
diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c index 7938cd21fa4c..185dc60360b5 100644 --- a/drivers/md/persistent-data/dm-array.c +++ b/drivers/md/persistent-data/dm-array.c | |||
@@ -976,6 +976,27 @@ int dm_array_cursor_next(struct dm_array_cursor *c) | |||
976 | } | 976 | } |
977 | EXPORT_SYMBOL_GPL(dm_array_cursor_next); | 977 | EXPORT_SYMBOL_GPL(dm_array_cursor_next); |
978 | 978 | ||
979 | int dm_array_cursor_skip(struct dm_array_cursor *c, uint32_t count) | ||
980 | { | ||
981 | int r; | ||
982 | |||
983 | do { | ||
984 | uint32_t remaining = le32_to_cpu(c->ab->nr_entries) - c->index; | ||
985 | |||
986 | if (count < remaining) { | ||
987 | c->index += count; | ||
988 | return 0; | ||
989 | } | ||
990 | |||
991 | count -= remaining; | ||
992 | r = dm_array_cursor_next(c); | ||
993 | |||
994 | } while (!r); | ||
995 | |||
996 | return r; | ||
997 | } | ||
998 | EXPORT_SYMBOL_GPL(dm_array_cursor_skip); | ||
999 | |||
979 | void dm_array_cursor_get_value(struct dm_array_cursor *c, void **value_le) | 1000 | void dm_array_cursor_get_value(struct dm_array_cursor *c, void **value_le) |
980 | { | 1001 | { |
981 | *value_le = element_at(c->info, c->ab, c->index); | 1002 | *value_le = element_at(c->info, c->ab, c->index); |
diff --git a/drivers/md/persistent-data/dm-array.h b/drivers/md/persistent-data/dm-array.h index 27ee49a55473..d7d2d579c662 100644 --- a/drivers/md/persistent-data/dm-array.h +++ b/drivers/md/persistent-data/dm-array.h | |||
@@ -207,6 +207,7 @@ void dm_array_cursor_end(struct dm_array_cursor *c); | |||
207 | 207 | ||
208 | uint32_t dm_array_cursor_index(struct dm_array_cursor *c); | 208 | uint32_t dm_array_cursor_index(struct dm_array_cursor *c); |
209 | int dm_array_cursor_next(struct dm_array_cursor *c); | 209 | int dm_array_cursor_next(struct dm_array_cursor *c); |
210 | int dm_array_cursor_skip(struct dm_array_cursor *c, uint32_t count); | ||
210 | 211 | ||
211 | /* | 212 | /* |
212 | * value_le is only valid while the cursor points at the current value. | 213 | * value_le is only valid while the cursor points at the current value. |
diff --git a/drivers/md/persistent-data/dm-bitset.c b/drivers/md/persistent-data/dm-bitset.c index 36f7cc2c7109..b7208d82e748 100644 --- a/drivers/md/persistent-data/dm-bitset.c +++ b/drivers/md/persistent-data/dm-bitset.c | |||
@@ -39,6 +39,48 @@ int dm_bitset_empty(struct dm_disk_bitset *info, dm_block_t *root) | |||
39 | } | 39 | } |
40 | EXPORT_SYMBOL_GPL(dm_bitset_empty); | 40 | EXPORT_SYMBOL_GPL(dm_bitset_empty); |
41 | 41 | ||
42 | struct packer_context { | ||
43 | bit_value_fn fn; | ||
44 | unsigned nr_bits; | ||
45 | void *context; | ||
46 | }; | ||
47 | |||
48 | static int pack_bits(uint32_t index, void *value, void *context) | ||
49 | { | ||
50 | int r; | ||
51 | struct packer_context *p = context; | ||
52 | unsigned bit, nr = min(64u, p->nr_bits - (index * 64)); | ||
53 | uint64_t word = 0; | ||
54 | bool bv; | ||
55 | |||
56 | for (bit = 0; bit < nr; bit++) { | ||
57 | r = p->fn(index * 64 + bit, &bv, p->context); | ||
58 | if (r) | ||
59 | return r; | ||
60 | |||
61 | if (bv) | ||
62 | set_bit(bit, (unsigned long *) &word); | ||
63 | else | ||
64 | clear_bit(bit, (unsigned long *) &word); | ||
65 | } | ||
66 | |||
67 | *((__le64 *) value) = cpu_to_le64(word); | ||
68 | |||
69 | return 0; | ||
70 | } | ||
71 | |||
72 | int dm_bitset_new(struct dm_disk_bitset *info, dm_block_t *root, | ||
73 | uint32_t size, bit_value_fn fn, void *context) | ||
74 | { | ||
75 | struct packer_context p; | ||
76 | p.fn = fn; | ||
77 | p.nr_bits = size; | ||
78 | p.context = context; | ||
79 | |||
80 | return dm_array_new(&info->array_info, root, dm_div_up(size, 64), pack_bits, &p); | ||
81 | } | ||
82 | EXPORT_SYMBOL_GPL(dm_bitset_new); | ||
83 | |||
42 | int dm_bitset_resize(struct dm_disk_bitset *info, dm_block_t root, | 84 | int dm_bitset_resize(struct dm_disk_bitset *info, dm_block_t root, |
43 | uint32_t old_nr_entries, uint32_t new_nr_entries, | 85 | uint32_t old_nr_entries, uint32_t new_nr_entries, |
44 | bool default_value, dm_block_t *new_root) | 86 | bool default_value, dm_block_t *new_root) |
@@ -168,4 +210,108 @@ int dm_bitset_test_bit(struct dm_disk_bitset *info, dm_block_t root, | |||
168 | } | 210 | } |
169 | EXPORT_SYMBOL_GPL(dm_bitset_test_bit); | 211 | EXPORT_SYMBOL_GPL(dm_bitset_test_bit); |
170 | 212 | ||
213 | static int cursor_next_array_entry(struct dm_bitset_cursor *c) | ||
214 | { | ||
215 | int r; | ||
216 | __le64 *value; | ||
217 | |||
218 | r = dm_array_cursor_next(&c->cursor); | ||
219 | if (r) | ||
220 | return r; | ||
221 | |||
222 | dm_array_cursor_get_value(&c->cursor, (void **) &value); | ||
223 | c->array_index++; | ||
224 | c->bit_index = 0; | ||
225 | c->current_bits = le64_to_cpu(*value); | ||
226 | return 0; | ||
227 | } | ||
228 | |||
229 | int dm_bitset_cursor_begin(struct dm_disk_bitset *info, | ||
230 | dm_block_t root, uint32_t nr_entries, | ||
231 | struct dm_bitset_cursor *c) | ||
232 | { | ||
233 | int r; | ||
234 | __le64 *value; | ||
235 | |||
236 | if (!nr_entries) | ||
237 | return -ENODATA; | ||
238 | |||
239 | c->info = info; | ||
240 | c->entries_remaining = nr_entries; | ||
241 | |||
242 | r = dm_array_cursor_begin(&info->array_info, root, &c->cursor); | ||
243 | if (r) | ||
244 | return r; | ||
245 | |||
246 | dm_array_cursor_get_value(&c->cursor, (void **) &value); | ||
247 | c->array_index = 0; | ||
248 | c->bit_index = 0; | ||
249 | c->current_bits = le64_to_cpu(*value); | ||
250 | |||
251 | return r; | ||
252 | } | ||
253 | EXPORT_SYMBOL_GPL(dm_bitset_cursor_begin); | ||
254 | |||
255 | void dm_bitset_cursor_end(struct dm_bitset_cursor *c) | ||
256 | { | ||
257 | return dm_array_cursor_end(&c->cursor); | ||
258 | } | ||
259 | EXPORT_SYMBOL_GPL(dm_bitset_cursor_end); | ||
260 | |||
261 | int dm_bitset_cursor_next(struct dm_bitset_cursor *c) | ||
262 | { | ||
263 | int r = 0; | ||
264 | |||
265 | if (!c->entries_remaining) | ||
266 | return -ENODATA; | ||
267 | |||
268 | c->entries_remaining--; | ||
269 | if (++c->bit_index > 63) | ||
270 | r = cursor_next_array_entry(c); | ||
271 | |||
272 | return r; | ||
273 | } | ||
274 | EXPORT_SYMBOL_GPL(dm_bitset_cursor_next); | ||
275 | |||
276 | int dm_bitset_cursor_skip(struct dm_bitset_cursor *c, uint32_t count) | ||
277 | { | ||
278 | int r; | ||
279 | __le64 *value; | ||
280 | uint32_t nr_array_skip; | ||
281 | uint32_t remaining_in_word = 64 - c->bit_index; | ||
282 | |||
283 | if (c->entries_remaining < count) | ||
284 | return -ENODATA; | ||
285 | |||
286 | if (count < remaining_in_word) { | ||
287 | c->bit_index += count; | ||
288 | c->entries_remaining -= count; | ||
289 | return 0; | ||
290 | |||
291 | } else { | ||
292 | c->entries_remaining -= remaining_in_word; | ||
293 | count -= remaining_in_word; | ||
294 | } | ||
295 | |||
296 | nr_array_skip = (count / 64) + 1; | ||
297 | r = dm_array_cursor_skip(&c->cursor, nr_array_skip); | ||
298 | if (r) | ||
299 | return r; | ||
300 | |||
301 | dm_array_cursor_get_value(&c->cursor, (void **) &value); | ||
302 | c->entries_remaining -= count; | ||
303 | c->array_index += nr_array_skip; | ||
304 | c->bit_index = count & 63; | ||
305 | c->current_bits = le64_to_cpu(*value); | ||
306 | |||
307 | return 0; | ||
308 | } | ||
309 | EXPORT_SYMBOL_GPL(dm_bitset_cursor_skip); | ||
310 | |||
311 | bool dm_bitset_cursor_get_value(struct dm_bitset_cursor *c) | ||
312 | { | ||
313 | return test_bit(c->bit_index, (unsigned long *) &c->current_bits); | ||
314 | } | ||
315 | EXPORT_SYMBOL_GPL(dm_bitset_cursor_get_value); | ||
316 | |||
171 | /*----------------------------------------------------------------*/ | 317 | /*----------------------------------------------------------------*/ |
diff --git a/drivers/md/persistent-data/dm-bitset.h b/drivers/md/persistent-data/dm-bitset.h index c2287d672ef5..df888da04ee1 100644 --- a/drivers/md/persistent-data/dm-bitset.h +++ b/drivers/md/persistent-data/dm-bitset.h | |||
@@ -93,6 +93,22 @@ void dm_disk_bitset_init(struct dm_transaction_manager *tm, | |||
93 | int dm_bitset_empty(struct dm_disk_bitset *info, dm_block_t *new_root); | 93 | int dm_bitset_empty(struct dm_disk_bitset *info, dm_block_t *new_root); |
94 | 94 | ||
95 | /* | 95 | /* |
96 | * Creates a new bitset populated with values provided by a callback | ||
97 | * function. This is more efficient than creating an empty bitset, | ||
98 | * resizing, and then setting values since that process incurs a lot of | ||
99 | * copying. | ||
100 | * | ||
101 | * info - describes the array | ||
102 | * root - the root block of the array on disk | ||
103 | * size - the number of entries in the array | ||
104 | * fn - the callback | ||
105 | * context - passed to the callback | ||
106 | */ | ||
107 | typedef int (*bit_value_fn)(uint32_t index, bool *value, void *context); | ||
108 | int dm_bitset_new(struct dm_disk_bitset *info, dm_block_t *root, | ||
109 | uint32_t size, bit_value_fn fn, void *context); | ||
110 | |||
111 | /* | ||
96 | * Resize the bitset. | 112 | * Resize the bitset. |
97 | * | 113 | * |
98 | * info - describes the bitset | 114 | * info - describes the bitset |
@@ -161,6 +177,29 @@ int dm_bitset_test_bit(struct dm_disk_bitset *info, dm_block_t root, | |||
161 | int dm_bitset_flush(struct dm_disk_bitset *info, dm_block_t root, | 177 | int dm_bitset_flush(struct dm_disk_bitset *info, dm_block_t root, |
162 | dm_block_t *new_root); | 178 | dm_block_t *new_root); |
163 | 179 | ||
180 | struct dm_bitset_cursor { | ||
181 | struct dm_disk_bitset *info; | ||
182 | struct dm_array_cursor cursor; | ||
183 | |||
184 | uint32_t entries_remaining; | ||
185 | uint32_t array_index; | ||
186 | uint32_t bit_index; | ||
187 | uint64_t current_bits; | ||
188 | }; | ||
189 | |||
190 | /* | ||
191 | * Make sure you've flush any dm_disk_bitset and updated the root before | ||
192 | * using this. | ||
193 | */ | ||
194 | int dm_bitset_cursor_begin(struct dm_disk_bitset *info, | ||
195 | dm_block_t root, uint32_t nr_entries, | ||
196 | struct dm_bitset_cursor *c); | ||
197 | void dm_bitset_cursor_end(struct dm_bitset_cursor *c); | ||
198 | |||
199 | int dm_bitset_cursor_next(struct dm_bitset_cursor *c); | ||
200 | int dm_bitset_cursor_skip(struct dm_bitset_cursor *c, uint32_t count); | ||
201 | bool dm_bitset_cursor_get_value(struct dm_bitset_cursor *c); | ||
202 | |||
164 | /*----------------------------------------------------------------*/ | 203 | /*----------------------------------------------------------------*/ |
165 | 204 | ||
166 | #endif /* _LINUX_DM_BITSET_H */ | 205 | #endif /* _LINUX_DM_BITSET_H */ |
diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index 758d90cc2733..0863905dee02 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c | |||
@@ -462,7 +462,7 @@ int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b, | |||
462 | int r; | 462 | int r; |
463 | 463 | ||
464 | p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); | 464 | p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); |
465 | if (IS_ERR(p)) | 465 | if (unlikely(IS_ERR(p))) |
466 | return PTR_ERR(p); | 466 | return PTR_ERR(p); |
467 | 467 | ||
468 | aux = dm_bufio_get_aux_data(to_buffer(*result)); | 468 | aux = dm_bufio_get_aux_data(to_buffer(*result)); |
@@ -498,7 +498,7 @@ int dm_bm_write_lock(struct dm_block_manager *bm, | |||
498 | return -EPERM; | 498 | return -EPERM; |
499 | 499 | ||
500 | p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); | 500 | p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); |
501 | if (IS_ERR(p)) | 501 | if (unlikely(IS_ERR(p))) |
502 | return PTR_ERR(p); | 502 | return PTR_ERR(p); |
503 | 503 | ||
504 | aux = dm_bufio_get_aux_data(to_buffer(*result)); | 504 | aux = dm_bufio_get_aux_data(to_buffer(*result)); |
@@ -531,7 +531,7 @@ int dm_bm_read_try_lock(struct dm_block_manager *bm, | |||
531 | int r; | 531 | int r; |
532 | 532 | ||
533 | p = dm_bufio_get(bm->bufio, b, (struct dm_buffer **) result); | 533 | p = dm_bufio_get(bm->bufio, b, (struct dm_buffer **) result); |
534 | if (IS_ERR(p)) | 534 | if (unlikely(IS_ERR(p))) |
535 | return PTR_ERR(p); | 535 | return PTR_ERR(p); |
536 | if (unlikely(!p)) | 536 | if (unlikely(!p)) |
537 | return -EWOULDBLOCK; | 537 | return -EWOULDBLOCK; |
@@ -567,7 +567,7 @@ int dm_bm_write_lock_zero(struct dm_block_manager *bm, | |||
567 | return -EPERM; | 567 | return -EPERM; |
568 | 568 | ||
569 | p = dm_bufio_new(bm->bufio, b, (struct dm_buffer **) result); | 569 | p = dm_bufio_new(bm->bufio, b, (struct dm_buffer **) result); |
570 | if (IS_ERR(p)) | 570 | if (unlikely(IS_ERR(p))) |
571 | return PTR_ERR(p); | 571 | return PTR_ERR(p); |
572 | 572 | ||
573 | memset(p, 0, dm_bm_block_size(bm)); | 573 | memset(p, 0, dm_bm_block_size(bm)); |
diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index 20a40329d84a..02e2ee0d8a00 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c | |||
@@ -272,7 +272,12 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root) | |||
272 | int r; | 272 | int r; |
273 | struct del_stack *s; | 273 | struct del_stack *s; |
274 | 274 | ||
275 | s = kmalloc(sizeof(*s), GFP_NOIO); | 275 | /* |
276 | * dm_btree_del() is called via an ioctl, as such should be | ||
277 | * considered an FS op. We can't recurse back into the FS, so we | ||
278 | * allocate GFP_NOFS. | ||
279 | */ | ||
280 | s = kmalloc(sizeof(*s), GFP_NOFS); | ||
276 | if (!s) | 281 | if (!s) |
277 | return -ENOMEM; | 282 | return -ENOMEM; |
278 | s->info = info; | 283 | s->info = info; |
@@ -1139,6 +1144,17 @@ int dm_btree_cursor_next(struct dm_btree_cursor *c) | |||
1139 | } | 1144 | } |
1140 | EXPORT_SYMBOL_GPL(dm_btree_cursor_next); | 1145 | EXPORT_SYMBOL_GPL(dm_btree_cursor_next); |
1141 | 1146 | ||
1147 | int dm_btree_cursor_skip(struct dm_btree_cursor *c, uint32_t count) | ||
1148 | { | ||
1149 | int r = 0; | ||
1150 | |||
1151 | while (count-- && !r) | ||
1152 | r = dm_btree_cursor_next(c); | ||
1153 | |||
1154 | return r; | ||
1155 | } | ||
1156 | EXPORT_SYMBOL_GPL(dm_btree_cursor_skip); | ||
1157 | |||
1142 | int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le) | 1158 | int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le) |
1143 | { | 1159 | { |
1144 | if (c->depth) { | 1160 | if (c->depth) { |
diff --git a/drivers/md/persistent-data/dm-btree.h b/drivers/md/persistent-data/dm-btree.h index db9bd26adf31..3dc5bb1a4748 100644 --- a/drivers/md/persistent-data/dm-btree.h +++ b/drivers/md/persistent-data/dm-btree.h | |||
@@ -209,6 +209,7 @@ int dm_btree_cursor_begin(struct dm_btree_info *info, dm_block_t root, | |||
209 | bool prefetch_leaves, struct dm_btree_cursor *c); | 209 | bool prefetch_leaves, struct dm_btree_cursor *c); |
210 | void dm_btree_cursor_end(struct dm_btree_cursor *c); | 210 | void dm_btree_cursor_end(struct dm_btree_cursor *c); |
211 | int dm_btree_cursor_next(struct dm_btree_cursor *c); | 211 | int dm_btree_cursor_next(struct dm_btree_cursor *c); |
212 | int dm_btree_cursor_skip(struct dm_btree_cursor *c, uint32_t count); | ||
212 | int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le); | 213 | int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le); |
213 | 214 | ||
214 | #endif /* _LINUX_DM_BTREE_H */ | 215 | #endif /* _LINUX_DM_BTREE_H */ |
diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index 4c28608a0c94..829b4ce057d8 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c | |||
@@ -626,13 +626,19 @@ int sm_ll_open_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm, | |||
626 | void *root_le, size_t len) | 626 | void *root_le, size_t len) |
627 | { | 627 | { |
628 | int r; | 628 | int r; |
629 | struct disk_sm_root *smr = root_le; | 629 | struct disk_sm_root smr; |
630 | 630 | ||
631 | if (len < sizeof(struct disk_sm_root)) { | 631 | if (len < sizeof(struct disk_sm_root)) { |
632 | DMERR("sm_metadata root too small"); | 632 | DMERR("sm_metadata root too small"); |
633 | return -ENOMEM; | 633 | return -ENOMEM; |
634 | } | 634 | } |
635 | 635 | ||
636 | /* | ||
637 | * We don't know the alignment of the root_le buffer, so need to | ||
638 | * copy into a new structure. | ||
639 | */ | ||
640 | memcpy(&smr, root_le, sizeof(smr)); | ||
641 | |||
636 | r = sm_ll_init(ll, tm); | 642 | r = sm_ll_init(ll, tm); |
637 | if (r < 0) | 643 | if (r < 0) |
638 | return r; | 644 | return r; |
@@ -644,10 +650,10 @@ int sm_ll_open_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm, | |||
644 | ll->max_entries = metadata_ll_max_entries; | 650 | ll->max_entries = metadata_ll_max_entries; |
645 | ll->commit = metadata_ll_commit; | 651 | ll->commit = metadata_ll_commit; |
646 | 652 | ||
647 | ll->nr_blocks = le64_to_cpu(smr->nr_blocks); | 653 | ll->nr_blocks = le64_to_cpu(smr.nr_blocks); |
648 | ll->nr_allocated = le64_to_cpu(smr->nr_allocated); | 654 | ll->nr_allocated = le64_to_cpu(smr.nr_allocated); |
649 | ll->bitmap_root = le64_to_cpu(smr->bitmap_root); | 655 | ll->bitmap_root = le64_to_cpu(smr.bitmap_root); |
650 | ll->ref_count_root = le64_to_cpu(smr->ref_count_root); | 656 | ll->ref_count_root = le64_to_cpu(smr.ref_count_root); |
651 | 657 | ||
652 | return ll->open_index(ll); | 658 | return ll->open_index(ll); |
653 | } | 659 | } |
diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index 20557e2c60c6..4aed69d9dd17 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c | |||
@@ -544,7 +544,7 @@ static int sm_metadata_copy_root(struct dm_space_map *sm, void *where_le, size_t | |||
544 | 544 | ||
545 | static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks); | 545 | static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks); |
546 | 546 | ||
547 | static struct dm_space_map ops = { | 547 | static const struct dm_space_map ops = { |
548 | .destroy = sm_metadata_destroy, | 548 | .destroy = sm_metadata_destroy, |
549 | .extend = sm_metadata_extend, | 549 | .extend = sm_metadata_extend, |
550 | .get_nr_blocks = sm_metadata_get_nr_blocks, | 550 | .get_nr_blocks = sm_metadata_get_nr_blocks, |
@@ -671,7 +671,7 @@ static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where, | |||
671 | return -EINVAL; | 671 | return -EINVAL; |
672 | } | 672 | } |
673 | 673 | ||
674 | static struct dm_space_map bootstrap_ops = { | 674 | static const struct dm_space_map bootstrap_ops = { |
675 | .destroy = sm_bootstrap_destroy, | 675 | .destroy = sm_bootstrap_destroy, |
676 | .extend = sm_bootstrap_extend, | 676 | .extend = sm_bootstrap_extend, |
677 | .get_nr_blocks = sm_bootstrap_get_nr_blocks, | 677 | .get_nr_blocks = sm_bootstrap_get_nr_blocks, |