diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/Kconfig | 11 | ||||
-rw-r--r-- | drivers/md/dm-bufio.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-cache-metadata.c | 64 | ||||
-rw-r--r-- | drivers/md/dm-cache-metadata.h | 2 | ||||
-rw-r--r-- | drivers/md/dm-cache-policy-cleaner.c | 7 | ||||
-rw-r--r-- | drivers/md/dm-cache-policy-internal.h | 2 | ||||
-rw-r--r-- | drivers/md/dm-cache-policy-mq.c | 8 | ||||
-rw-r--r-- | drivers/md/dm-cache-policy.c | 8 | ||||
-rw-r--r-- | drivers/md/dm-cache-policy.h | 2 | ||||
-rw-r--r-- | drivers/md/dm-cache-target.c | 169 | ||||
-rw-r--r-- | drivers/md/dm-raid.c | 123 | ||||
-rw-r--r-- | drivers/md/dm-thin.c | 11 | ||||
-rw-r--r-- | drivers/md/dm-verity.c | 39 | ||||
-rw-r--r-- | drivers/md/md.c | 25 | ||||
-rw-r--r-- | drivers/md/md.h | 4 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-btree-remove.c | 46 | ||||
-rw-r--r-- | drivers/md/raid0.c | 13 | ||||
-rw-r--r-- | drivers/md/raid1.c | 8 | ||||
-rw-r--r-- | drivers/md/raid10.c | 97 | ||||
-rw-r--r-- | drivers/md/raid10.h | 5 | ||||
-rw-r--r-- | drivers/md/raid5.c | 154 | ||||
-rw-r--r-- | drivers/md/raid5.h | 5 |
22 files changed, 557 insertions, 248 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index e30b490055aa..4d8d90b4fe78 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig | |||
@@ -154,17 +154,6 @@ config MD_RAID456 | |||
154 | 154 | ||
155 | If unsure, say Y. | 155 | If unsure, say Y. |
156 | 156 | ||
157 | config MULTICORE_RAID456 | ||
158 | bool "RAID-4/RAID-5/RAID-6 Multicore processing (EXPERIMENTAL)" | ||
159 | depends on MD_RAID456 | ||
160 | depends on SMP | ||
161 | depends on EXPERIMENTAL | ||
162 | ---help--- | ||
163 | Enable the raid456 module to dispatch per-stripe raid operations to a | ||
164 | thread pool. | ||
165 | |||
166 | If unsure, say N. | ||
167 | |||
168 | config MD_MULTIPATH | 157 | config MD_MULTIPATH |
169 | tristate "Multipath I/O support" | 158 | tristate "Multipath I/O support" |
170 | depends on BLK_DEV_MD | 159 | depends on BLK_DEV_MD |
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 3c955e10a618..c6083132c4b8 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c | |||
@@ -1025,6 +1025,8 @@ void dm_bufio_prefetch(struct dm_bufio_client *c, | |||
1025 | { | 1025 | { |
1026 | struct blk_plug plug; | 1026 | struct blk_plug plug; |
1027 | 1027 | ||
1028 | BUG_ON(dm_bufio_in_request()); | ||
1029 | |||
1028 | blk_start_plug(&plug); | 1030 | blk_start_plug(&plug); |
1029 | dm_bufio_lock(c); | 1031 | dm_bufio_lock(c); |
1030 | 1032 | ||
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index fbd3625f2748..83e995fece88 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c | |||
@@ -83,6 +83,8 @@ struct cache_disk_superblock { | |||
83 | __le32 read_misses; | 83 | __le32 read_misses; |
84 | __le32 write_hits; | 84 | __le32 write_hits; |
85 | __le32 write_misses; | 85 | __le32 write_misses; |
86 | |||
87 | __le32 policy_version[CACHE_POLICY_VERSION_SIZE]; | ||
86 | } __packed; | 88 | } __packed; |
87 | 89 | ||
88 | struct dm_cache_metadata { | 90 | struct dm_cache_metadata { |
@@ -109,6 +111,7 @@ struct dm_cache_metadata { | |||
109 | bool clean_when_opened:1; | 111 | bool clean_when_opened:1; |
110 | 112 | ||
111 | char policy_name[CACHE_POLICY_NAME_SIZE]; | 113 | char policy_name[CACHE_POLICY_NAME_SIZE]; |
114 | unsigned policy_version[CACHE_POLICY_VERSION_SIZE]; | ||
112 | size_t policy_hint_size; | 115 | size_t policy_hint_size; |
113 | struct dm_cache_statistics stats; | 116 | struct dm_cache_statistics stats; |
114 | }; | 117 | }; |
@@ -268,7 +271,8 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd) | |||
268 | memset(disk_super->uuid, 0, sizeof(disk_super->uuid)); | 271 | memset(disk_super->uuid, 0, sizeof(disk_super->uuid)); |
269 | disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC); | 272 | disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC); |
270 | disk_super->version = cpu_to_le32(CACHE_VERSION); | 273 | disk_super->version = cpu_to_le32(CACHE_VERSION); |
271 | memset(disk_super->policy_name, 0, CACHE_POLICY_NAME_SIZE); | 274 | memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name)); |
275 | memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version)); | ||
272 | disk_super->policy_hint_size = 0; | 276 | disk_super->policy_hint_size = 0; |
273 | 277 | ||
274 | r = dm_sm_copy_root(cmd->metadata_sm, &disk_super->metadata_space_map_root, | 278 | r = dm_sm_copy_root(cmd->metadata_sm, &disk_super->metadata_space_map_root, |
@@ -284,7 +288,6 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd) | |||
284 | disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); | 288 | disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); |
285 | disk_super->data_block_size = cpu_to_le32(cmd->data_block_size); | 289 | disk_super->data_block_size = cpu_to_le32(cmd->data_block_size); |
286 | disk_super->cache_blocks = cpu_to_le32(0); | 290 | disk_super->cache_blocks = cpu_to_le32(0); |
287 | memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name)); | ||
288 | 291 | ||
289 | disk_super->read_hits = cpu_to_le32(0); | 292 | disk_super->read_hits = cpu_to_le32(0); |
290 | disk_super->read_misses = cpu_to_le32(0); | 293 | disk_super->read_misses = cpu_to_le32(0); |
@@ -478,6 +481,9 @@ static void read_superblock_fields(struct dm_cache_metadata *cmd, | |||
478 | cmd->data_block_size = le32_to_cpu(disk_super->data_block_size); | 481 | cmd->data_block_size = le32_to_cpu(disk_super->data_block_size); |
479 | cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks)); | 482 | cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks)); |
480 | strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name)); | 483 | strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name)); |
484 | cmd->policy_version[0] = le32_to_cpu(disk_super->policy_version[0]); | ||
485 | cmd->policy_version[1] = le32_to_cpu(disk_super->policy_version[1]); | ||
486 | cmd->policy_version[2] = le32_to_cpu(disk_super->policy_version[2]); | ||
481 | cmd->policy_hint_size = le32_to_cpu(disk_super->policy_hint_size); | 487 | cmd->policy_hint_size = le32_to_cpu(disk_super->policy_hint_size); |
482 | 488 | ||
483 | cmd->stats.read_hits = le32_to_cpu(disk_super->read_hits); | 489 | cmd->stats.read_hits = le32_to_cpu(disk_super->read_hits); |
@@ -572,6 +578,9 @@ static int __commit_transaction(struct dm_cache_metadata *cmd, | |||
572 | disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks)); | 578 | disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks)); |
573 | disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks)); | 579 | disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks)); |
574 | strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name)); | 580 | strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name)); |
581 | disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]); | ||
582 | disk_super->policy_version[1] = cpu_to_le32(cmd->policy_version[1]); | ||
583 | disk_super->policy_version[2] = cpu_to_le32(cmd->policy_version[2]); | ||
575 | 584 | ||
576 | disk_super->read_hits = cpu_to_le32(cmd->stats.read_hits); | 585 | disk_super->read_hits = cpu_to_le32(cmd->stats.read_hits); |
577 | disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses); | 586 | disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses); |
@@ -854,18 +863,43 @@ struct thunk { | |||
854 | bool hints_valid; | 863 | bool hints_valid; |
855 | }; | 864 | }; |
856 | 865 | ||
866 | static bool policy_unchanged(struct dm_cache_metadata *cmd, | ||
867 | struct dm_cache_policy *policy) | ||
868 | { | ||
869 | const char *policy_name = dm_cache_policy_get_name(policy); | ||
870 | const unsigned *policy_version = dm_cache_policy_get_version(policy); | ||
871 | size_t policy_hint_size = dm_cache_policy_get_hint_size(policy); | ||
872 | |||
873 | /* | ||
874 | * Ensure policy names match. | ||
875 | */ | ||
876 | if (strncmp(cmd->policy_name, policy_name, sizeof(cmd->policy_name))) | ||
877 | return false; | ||
878 | |||
879 | /* | ||
880 | * Ensure policy major versions match. | ||
881 | */ | ||
882 | if (cmd->policy_version[0] != policy_version[0]) | ||
883 | return false; | ||
884 | |||
885 | /* | ||
886 | * Ensure policy hint sizes match. | ||
887 | */ | ||
888 | if (cmd->policy_hint_size != policy_hint_size) | ||
889 | return false; | ||
890 | |||
891 | return true; | ||
892 | } | ||
893 | |||
857 | static bool hints_array_initialized(struct dm_cache_metadata *cmd) | 894 | static bool hints_array_initialized(struct dm_cache_metadata *cmd) |
858 | { | 895 | { |
859 | return cmd->hint_root && cmd->policy_hint_size; | 896 | return cmd->hint_root && cmd->policy_hint_size; |
860 | } | 897 | } |
861 | 898 | ||
862 | static bool hints_array_available(struct dm_cache_metadata *cmd, | 899 | static bool hints_array_available(struct dm_cache_metadata *cmd, |
863 | const char *policy_name) | 900 | struct dm_cache_policy *policy) |
864 | { | 901 | { |
865 | bool policy_names_match = !strncmp(cmd->policy_name, policy_name, | 902 | return cmd->clean_when_opened && policy_unchanged(cmd, policy) && |
866 | sizeof(cmd->policy_name)); | ||
867 | |||
868 | return cmd->clean_when_opened && policy_names_match && | ||
869 | hints_array_initialized(cmd); | 903 | hints_array_initialized(cmd); |
870 | } | 904 | } |
871 | 905 | ||
@@ -899,7 +933,8 @@ static int __load_mapping(void *context, uint64_t cblock, void *leaf) | |||
899 | return r; | 933 | return r; |
900 | } | 934 | } |
901 | 935 | ||
902 | static int __load_mappings(struct dm_cache_metadata *cmd, const char *policy_name, | 936 | static int __load_mappings(struct dm_cache_metadata *cmd, |
937 | struct dm_cache_policy *policy, | ||
903 | load_mapping_fn fn, void *context) | 938 | load_mapping_fn fn, void *context) |
904 | { | 939 | { |
905 | struct thunk thunk; | 940 | struct thunk thunk; |
@@ -909,18 +944,19 @@ static int __load_mappings(struct dm_cache_metadata *cmd, const char *policy_nam | |||
909 | 944 | ||
910 | thunk.cmd = cmd; | 945 | thunk.cmd = cmd; |
911 | thunk.respect_dirty_flags = cmd->clean_when_opened; | 946 | thunk.respect_dirty_flags = cmd->clean_when_opened; |
912 | thunk.hints_valid = hints_array_available(cmd, policy_name); | 947 | thunk.hints_valid = hints_array_available(cmd, policy); |
913 | 948 | ||
914 | return dm_array_walk(&cmd->info, cmd->root, __load_mapping, &thunk); | 949 | return dm_array_walk(&cmd->info, cmd->root, __load_mapping, &thunk); |
915 | } | 950 | } |
916 | 951 | ||
917 | int dm_cache_load_mappings(struct dm_cache_metadata *cmd, const char *policy_name, | 952 | int dm_cache_load_mappings(struct dm_cache_metadata *cmd, |
953 | struct dm_cache_policy *policy, | ||
918 | load_mapping_fn fn, void *context) | 954 | load_mapping_fn fn, void *context) |
919 | { | 955 | { |
920 | int r; | 956 | int r; |
921 | 957 | ||
922 | down_read(&cmd->root_lock); | 958 | down_read(&cmd->root_lock); |
923 | r = __load_mappings(cmd, policy_name, fn, context); | 959 | r = __load_mappings(cmd, policy, fn, context); |
924 | up_read(&cmd->root_lock); | 960 | up_read(&cmd->root_lock); |
925 | 961 | ||
926 | return r; | 962 | return r; |
@@ -979,7 +1015,7 @@ static int __dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty | |||
979 | /* nothing to be done */ | 1015 | /* nothing to be done */ |
980 | return 0; | 1016 | return 0; |
981 | 1017 | ||
982 | value = pack_value(oblock, flags | (dirty ? M_DIRTY : 0)); | 1018 | value = pack_value(oblock, (flags & ~M_DIRTY) | (dirty ? M_DIRTY : 0)); |
983 | __dm_bless_for_disk(&value); | 1019 | __dm_bless_for_disk(&value); |
984 | 1020 | ||
985 | r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock), | 1021 | r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock), |
@@ -1070,13 +1106,15 @@ static int begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *po | |||
1070 | __le32 value; | 1106 | __le32 value; |
1071 | size_t hint_size; | 1107 | size_t hint_size; |
1072 | const char *policy_name = dm_cache_policy_get_name(policy); | 1108 | const char *policy_name = dm_cache_policy_get_name(policy); |
1109 | const unsigned *policy_version = dm_cache_policy_get_version(policy); | ||
1073 | 1110 | ||
1074 | if (!policy_name[0] || | 1111 | if (!policy_name[0] || |
1075 | (strlen(policy_name) > sizeof(cmd->policy_name) - 1)) | 1112 | (strlen(policy_name) > sizeof(cmd->policy_name) - 1)) |
1076 | return -EINVAL; | 1113 | return -EINVAL; |
1077 | 1114 | ||
1078 | if (strcmp(cmd->policy_name, policy_name)) { | 1115 | if (!policy_unchanged(cmd, policy)) { |
1079 | strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name)); | 1116 | strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name)); |
1117 | memcpy(cmd->policy_version, policy_version, sizeof(cmd->policy_version)); | ||
1080 | 1118 | ||
1081 | hint_size = dm_cache_policy_get_hint_size(policy); | 1119 | hint_size = dm_cache_policy_get_hint_size(policy); |
1082 | if (!hint_size) | 1120 | if (!hint_size) |
diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h index 135864ea0eee..f45cef21f3d0 100644 --- a/drivers/md/dm-cache-metadata.h +++ b/drivers/md/dm-cache-metadata.h | |||
@@ -89,7 +89,7 @@ typedef int (*load_mapping_fn)(void *context, dm_oblock_t oblock, | |||
89 | dm_cblock_t cblock, bool dirty, | 89 | dm_cblock_t cblock, bool dirty, |
90 | uint32_t hint, bool hint_valid); | 90 | uint32_t hint, bool hint_valid); |
91 | int dm_cache_load_mappings(struct dm_cache_metadata *cmd, | 91 | int dm_cache_load_mappings(struct dm_cache_metadata *cmd, |
92 | const char *policy_name, | 92 | struct dm_cache_policy *policy, |
93 | load_mapping_fn fn, | 93 | load_mapping_fn fn, |
94 | void *context); | 94 | void *context); |
95 | 95 | ||
diff --git a/drivers/md/dm-cache-policy-cleaner.c b/drivers/md/dm-cache-policy-cleaner.c index cc05d70b3cb8..b04d1f904d07 100644 --- a/drivers/md/dm-cache-policy-cleaner.c +++ b/drivers/md/dm-cache-policy-cleaner.c | |||
@@ -17,7 +17,6 @@ | |||
17 | /*----------------------------------------------------------------*/ | 17 | /*----------------------------------------------------------------*/ |
18 | 18 | ||
19 | #define DM_MSG_PREFIX "cache cleaner" | 19 | #define DM_MSG_PREFIX "cache cleaner" |
20 | #define CLEANER_VERSION "1.0.0" | ||
21 | 20 | ||
22 | /* Cache entry struct. */ | 21 | /* Cache entry struct. */ |
23 | struct wb_cache_entry { | 22 | struct wb_cache_entry { |
@@ -434,6 +433,7 @@ static struct dm_cache_policy *wb_create(dm_cblock_t cache_size, | |||
434 | 433 | ||
435 | static struct dm_cache_policy_type wb_policy_type = { | 434 | static struct dm_cache_policy_type wb_policy_type = { |
436 | .name = "cleaner", | 435 | .name = "cleaner", |
436 | .version = {1, 0, 0}, | ||
437 | .hint_size = 0, | 437 | .hint_size = 0, |
438 | .owner = THIS_MODULE, | 438 | .owner = THIS_MODULE, |
439 | .create = wb_create | 439 | .create = wb_create |
@@ -446,7 +446,10 @@ static int __init wb_init(void) | |||
446 | if (r < 0) | 446 | if (r < 0) |
447 | DMERR("register failed %d", r); | 447 | DMERR("register failed %d", r); |
448 | else | 448 | else |
449 | DMINFO("version " CLEANER_VERSION " loaded"); | 449 | DMINFO("version %u.%u.%u loaded", |
450 | wb_policy_type.version[0], | ||
451 | wb_policy_type.version[1], | ||
452 | wb_policy_type.version[2]); | ||
450 | 453 | ||
451 | return r; | 454 | return r; |
452 | } | 455 | } |
diff --git a/drivers/md/dm-cache-policy-internal.h b/drivers/md/dm-cache-policy-internal.h index 52a75beeced5..0928abdc49f0 100644 --- a/drivers/md/dm-cache-policy-internal.h +++ b/drivers/md/dm-cache-policy-internal.h | |||
@@ -117,6 +117,8 @@ void dm_cache_policy_destroy(struct dm_cache_policy *p); | |||
117 | */ | 117 | */ |
118 | const char *dm_cache_policy_get_name(struct dm_cache_policy *p); | 118 | const char *dm_cache_policy_get_name(struct dm_cache_policy *p); |
119 | 119 | ||
120 | const unsigned *dm_cache_policy_get_version(struct dm_cache_policy *p); | ||
121 | |||
120 | size_t dm_cache_policy_get_hint_size(struct dm_cache_policy *p); | 122 | size_t dm_cache_policy_get_hint_size(struct dm_cache_policy *p); |
121 | 123 | ||
122 | /*----------------------------------------------------------------*/ | 124 | /*----------------------------------------------------------------*/ |
diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c index 964153255076..dc112a7137fe 100644 --- a/drivers/md/dm-cache-policy-mq.c +++ b/drivers/md/dm-cache-policy-mq.c | |||
@@ -14,7 +14,6 @@ | |||
14 | #include <linux/vmalloc.h> | 14 | #include <linux/vmalloc.h> |
15 | 15 | ||
16 | #define DM_MSG_PREFIX "cache-policy-mq" | 16 | #define DM_MSG_PREFIX "cache-policy-mq" |
17 | #define MQ_VERSION "1.0.0" | ||
18 | 17 | ||
19 | static struct kmem_cache *mq_entry_cache; | 18 | static struct kmem_cache *mq_entry_cache; |
20 | 19 | ||
@@ -1133,6 +1132,7 @@ bad_cache_alloc: | |||
1133 | 1132 | ||
1134 | static struct dm_cache_policy_type mq_policy_type = { | 1133 | static struct dm_cache_policy_type mq_policy_type = { |
1135 | .name = "mq", | 1134 | .name = "mq", |
1135 | .version = {1, 0, 0}, | ||
1136 | .hint_size = 4, | 1136 | .hint_size = 4, |
1137 | .owner = THIS_MODULE, | 1137 | .owner = THIS_MODULE, |
1138 | .create = mq_create | 1138 | .create = mq_create |
@@ -1140,6 +1140,7 @@ static struct dm_cache_policy_type mq_policy_type = { | |||
1140 | 1140 | ||
1141 | static struct dm_cache_policy_type default_policy_type = { | 1141 | static struct dm_cache_policy_type default_policy_type = { |
1142 | .name = "default", | 1142 | .name = "default", |
1143 | .version = {1, 0, 0}, | ||
1143 | .hint_size = 4, | 1144 | .hint_size = 4, |
1144 | .owner = THIS_MODULE, | 1145 | .owner = THIS_MODULE, |
1145 | .create = mq_create | 1146 | .create = mq_create |
@@ -1164,7 +1165,10 @@ static int __init mq_init(void) | |||
1164 | 1165 | ||
1165 | r = dm_cache_policy_register(&default_policy_type); | 1166 | r = dm_cache_policy_register(&default_policy_type); |
1166 | if (!r) { | 1167 | if (!r) { |
1167 | DMINFO("version " MQ_VERSION " loaded"); | 1168 | DMINFO("version %u.%u.%u loaded", |
1169 | mq_policy_type.version[0], | ||
1170 | mq_policy_type.version[1], | ||
1171 | mq_policy_type.version[2]); | ||
1168 | return 0; | 1172 | return 0; |
1169 | } | 1173 | } |
1170 | 1174 | ||
diff --git a/drivers/md/dm-cache-policy.c b/drivers/md/dm-cache-policy.c index 2cbf5fdaac52..21c03c570c06 100644 --- a/drivers/md/dm-cache-policy.c +++ b/drivers/md/dm-cache-policy.c | |||
@@ -150,6 +150,14 @@ const char *dm_cache_policy_get_name(struct dm_cache_policy *p) | |||
150 | } | 150 | } |
151 | EXPORT_SYMBOL_GPL(dm_cache_policy_get_name); | 151 | EXPORT_SYMBOL_GPL(dm_cache_policy_get_name); |
152 | 152 | ||
153 | const unsigned *dm_cache_policy_get_version(struct dm_cache_policy *p) | ||
154 | { | ||
155 | struct dm_cache_policy_type *t = p->private; | ||
156 | |||
157 | return t->version; | ||
158 | } | ||
159 | EXPORT_SYMBOL_GPL(dm_cache_policy_get_version); | ||
160 | |||
153 | size_t dm_cache_policy_get_hint_size(struct dm_cache_policy *p) | 161 | size_t dm_cache_policy_get_hint_size(struct dm_cache_policy *p) |
154 | { | 162 | { |
155 | struct dm_cache_policy_type *t = p->private; | 163 | struct dm_cache_policy_type *t = p->private; |
diff --git a/drivers/md/dm-cache-policy.h b/drivers/md/dm-cache-policy.h index f0f51b260544..558bdfdabf5f 100644 --- a/drivers/md/dm-cache-policy.h +++ b/drivers/md/dm-cache-policy.h | |||
@@ -196,6 +196,7 @@ struct dm_cache_policy { | |||
196 | * We maintain a little register of the different policy types. | 196 | * We maintain a little register of the different policy types. |
197 | */ | 197 | */ |
198 | #define CACHE_POLICY_NAME_SIZE 16 | 198 | #define CACHE_POLICY_NAME_SIZE 16 |
199 | #define CACHE_POLICY_VERSION_SIZE 3 | ||
199 | 200 | ||
200 | struct dm_cache_policy_type { | 201 | struct dm_cache_policy_type { |
201 | /* For use by the register code only. */ | 202 | /* For use by the register code only. */ |
@@ -206,6 +207,7 @@ struct dm_cache_policy_type { | |||
206 | * what gets passed on the target line to select your policy. | 207 | * what gets passed on the target line to select your policy. |
207 | */ | 208 | */ |
208 | char name[CACHE_POLICY_NAME_SIZE]; | 209 | char name[CACHE_POLICY_NAME_SIZE]; |
210 | unsigned version[CACHE_POLICY_VERSION_SIZE]; | ||
209 | 211 | ||
210 | /* | 212 | /* |
211 | * Policies may store a hint for each each cache block. | 213 | * Policies may store a hint for each each cache block. |
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 0f4e84b15c30..66120bd46d15 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c | |||
@@ -142,6 +142,7 @@ struct cache { | |||
142 | spinlock_t lock; | 142 | spinlock_t lock; |
143 | struct bio_list deferred_bios; | 143 | struct bio_list deferred_bios; |
144 | struct bio_list deferred_flush_bios; | 144 | struct bio_list deferred_flush_bios; |
145 | struct bio_list deferred_writethrough_bios; | ||
145 | struct list_head quiesced_migrations; | 146 | struct list_head quiesced_migrations; |
146 | struct list_head completed_migrations; | 147 | struct list_head completed_migrations; |
147 | struct list_head need_commit_migrations; | 148 | struct list_head need_commit_migrations; |
@@ -158,7 +159,7 @@ struct cache { | |||
158 | /* | 159 | /* |
159 | * origin_blocks entries, discarded if set. | 160 | * origin_blocks entries, discarded if set. |
160 | */ | 161 | */ |
161 | sector_t discard_block_size; /* a power of 2 times sectors per block */ | 162 | uint32_t discard_block_size; /* a power of 2 times sectors per block */ |
162 | dm_dblock_t discard_nr_blocks; | 163 | dm_dblock_t discard_nr_blocks; |
163 | unsigned long *discard_bitset; | 164 | unsigned long *discard_bitset; |
164 | 165 | ||
@@ -199,6 +200,11 @@ struct per_bio_data { | |||
199 | bool tick:1; | 200 | bool tick:1; |
200 | unsigned req_nr:2; | 201 | unsigned req_nr:2; |
201 | struct dm_deferred_entry *all_io_entry; | 202 | struct dm_deferred_entry *all_io_entry; |
203 | |||
204 | /* writethrough fields */ | ||
205 | struct cache *cache; | ||
206 | dm_cblock_t cblock; | ||
207 | bio_end_io_t *saved_bi_end_io; | ||
202 | }; | 208 | }; |
203 | 209 | ||
204 | struct dm_cache_migration { | 210 | struct dm_cache_migration { |
@@ -412,17 +418,24 @@ static bool block_size_is_power_of_two(struct cache *cache) | |||
412 | return cache->sectors_per_block_shift >= 0; | 418 | return cache->sectors_per_block_shift >= 0; |
413 | } | 419 | } |
414 | 420 | ||
421 | static dm_block_t block_div(dm_block_t b, uint32_t n) | ||
422 | { | ||
423 | do_div(b, n); | ||
424 | |||
425 | return b; | ||
426 | } | ||
427 | |||
415 | static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock) | 428 | static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock) |
416 | { | 429 | { |
417 | sector_t discard_blocks = cache->discard_block_size; | 430 | uint32_t discard_blocks = cache->discard_block_size; |
418 | dm_block_t b = from_oblock(oblock); | 431 | dm_block_t b = from_oblock(oblock); |
419 | 432 | ||
420 | if (!block_size_is_power_of_two(cache)) | 433 | if (!block_size_is_power_of_two(cache)) |
421 | (void) sector_div(discard_blocks, cache->sectors_per_block); | 434 | discard_blocks = discard_blocks / cache->sectors_per_block; |
422 | else | 435 | else |
423 | discard_blocks >>= cache->sectors_per_block_shift; | 436 | discard_blocks >>= cache->sectors_per_block_shift; |
424 | 437 | ||
425 | (void) sector_div(b, discard_blocks); | 438 | b = block_div(b, discard_blocks); |
426 | 439 | ||
427 | return to_dblock(b); | 440 | return to_dblock(b); |
428 | } | 441 | } |
@@ -609,6 +622,56 @@ static void issue(struct cache *cache, struct bio *bio) | |||
609 | spin_unlock_irqrestore(&cache->lock, flags); | 622 | spin_unlock_irqrestore(&cache->lock, flags); |
610 | } | 623 | } |
611 | 624 | ||
625 | static void defer_writethrough_bio(struct cache *cache, struct bio *bio) | ||
626 | { | ||
627 | unsigned long flags; | ||
628 | |||
629 | spin_lock_irqsave(&cache->lock, flags); | ||
630 | bio_list_add(&cache->deferred_writethrough_bios, bio); | ||
631 | spin_unlock_irqrestore(&cache->lock, flags); | ||
632 | |||
633 | wake_worker(cache); | ||
634 | } | ||
635 | |||
636 | static void writethrough_endio(struct bio *bio, int err) | ||
637 | { | ||
638 | struct per_bio_data *pb = get_per_bio_data(bio); | ||
639 | bio->bi_end_io = pb->saved_bi_end_io; | ||
640 | |||
641 | if (err) { | ||
642 | bio_endio(bio, err); | ||
643 | return; | ||
644 | } | ||
645 | |||
646 | remap_to_cache(pb->cache, bio, pb->cblock); | ||
647 | |||
648 | /* | ||
649 | * We can't issue this bio directly, since we're in interrupt | ||
650 | * context. So it get's put on a bio list for processing by the | ||
651 | * worker thread. | ||
652 | */ | ||
653 | defer_writethrough_bio(pb->cache, bio); | ||
654 | } | ||
655 | |||
656 | /* | ||
657 | * When running in writethrough mode we need to send writes to clean blocks | ||
658 | * to both the cache and origin devices. In future we'd like to clone the | ||
659 | * bio and send them in parallel, but for now we're doing them in | ||
660 | * series as this is easier. | ||
661 | */ | ||
662 | static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio, | ||
663 | dm_oblock_t oblock, dm_cblock_t cblock) | ||
664 | { | ||
665 | struct per_bio_data *pb = get_per_bio_data(bio); | ||
666 | |||
667 | pb->cache = cache; | ||
668 | pb->cblock = cblock; | ||
669 | pb->saved_bi_end_io = bio->bi_end_io; | ||
670 | bio->bi_end_io = writethrough_endio; | ||
671 | |||
672 | remap_to_origin_clear_discard(pb->cache, bio, oblock); | ||
673 | } | ||
674 | |||
612 | /*---------------------------------------------------------------- | 675 | /*---------------------------------------------------------------- |
613 | * Migration processing | 676 | * Migration processing |
614 | * | 677 | * |
@@ -1002,7 +1065,7 @@ static void process_discard_bio(struct cache *cache, struct bio *bio) | |||
1002 | dm_block_t end_block = bio->bi_sector + bio_sectors(bio); | 1065 | dm_block_t end_block = bio->bi_sector + bio_sectors(bio); |
1003 | dm_block_t b; | 1066 | dm_block_t b; |
1004 | 1067 | ||
1005 | (void) sector_div(end_block, cache->discard_block_size); | 1068 | end_block = block_div(end_block, cache->discard_block_size); |
1006 | 1069 | ||
1007 | for (b = start_block; b < end_block; b++) | 1070 | for (b = start_block; b < end_block; b++) |
1008 | set_discard(cache, to_dblock(b)); | 1071 | set_discard(cache, to_dblock(b)); |
@@ -1070,14 +1133,9 @@ static void process_bio(struct cache *cache, struct prealloc *structs, | |||
1070 | inc_hit_counter(cache, bio); | 1133 | inc_hit_counter(cache, bio); |
1071 | pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); | 1134 | pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); |
1072 | 1135 | ||
1073 | if (is_writethrough_io(cache, bio, lookup_result.cblock)) { | 1136 | if (is_writethrough_io(cache, bio, lookup_result.cblock)) |
1074 | /* | 1137 | remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); |
1075 | * No need to mark anything dirty in write through mode. | 1138 | else |
1076 | */ | ||
1077 | pb->req_nr == 0 ? | ||
1078 | remap_to_cache(cache, bio, lookup_result.cblock) : | ||
1079 | remap_to_origin_clear_discard(cache, bio, block); | ||
1080 | } else | ||
1081 | remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); | 1139 | remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); |
1082 | 1140 | ||
1083 | issue(cache, bio); | 1141 | issue(cache, bio); |
@@ -1086,17 +1144,8 @@ static void process_bio(struct cache *cache, struct prealloc *structs, | |||
1086 | case POLICY_MISS: | 1144 | case POLICY_MISS: |
1087 | inc_miss_counter(cache, bio); | 1145 | inc_miss_counter(cache, bio); |
1088 | pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); | 1146 | pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); |
1089 | 1147 | remap_to_origin_clear_discard(cache, bio, block); | |
1090 | if (pb->req_nr != 0) { | 1148 | issue(cache, bio); |
1091 | /* | ||
1092 | * This is a duplicate writethrough io that is no | ||
1093 | * longer needed because the block has been demoted. | ||
1094 | */ | ||
1095 | bio_endio(bio, 0); | ||
1096 | } else { | ||
1097 | remap_to_origin_clear_discard(cache, bio, block); | ||
1098 | issue(cache, bio); | ||
1099 | } | ||
1100 | break; | 1149 | break; |
1101 | 1150 | ||
1102 | case POLICY_NEW: | 1151 | case POLICY_NEW: |
@@ -1217,6 +1266,23 @@ static void process_deferred_flush_bios(struct cache *cache, bool submit_bios) | |||
1217 | submit_bios ? generic_make_request(bio) : bio_io_error(bio); | 1266 | submit_bios ? generic_make_request(bio) : bio_io_error(bio); |
1218 | } | 1267 | } |
1219 | 1268 | ||
1269 | static void process_deferred_writethrough_bios(struct cache *cache) | ||
1270 | { | ||
1271 | unsigned long flags; | ||
1272 | struct bio_list bios; | ||
1273 | struct bio *bio; | ||
1274 | |||
1275 | bio_list_init(&bios); | ||
1276 | |||
1277 | spin_lock_irqsave(&cache->lock, flags); | ||
1278 | bio_list_merge(&bios, &cache->deferred_writethrough_bios); | ||
1279 | bio_list_init(&cache->deferred_writethrough_bios); | ||
1280 | spin_unlock_irqrestore(&cache->lock, flags); | ||
1281 | |||
1282 | while ((bio = bio_list_pop(&bios))) | ||
1283 | generic_make_request(bio); | ||
1284 | } | ||
1285 | |||
1220 | static void writeback_some_dirty_blocks(struct cache *cache) | 1286 | static void writeback_some_dirty_blocks(struct cache *cache) |
1221 | { | 1287 | { |
1222 | int r = 0; | 1288 | int r = 0; |
@@ -1313,6 +1379,7 @@ static int more_work(struct cache *cache) | |||
1313 | else | 1379 | else |
1314 | return !bio_list_empty(&cache->deferred_bios) || | 1380 | return !bio_list_empty(&cache->deferred_bios) || |
1315 | !bio_list_empty(&cache->deferred_flush_bios) || | 1381 | !bio_list_empty(&cache->deferred_flush_bios) || |
1382 | !bio_list_empty(&cache->deferred_writethrough_bios) || | ||
1316 | !list_empty(&cache->quiesced_migrations) || | 1383 | !list_empty(&cache->quiesced_migrations) || |
1317 | !list_empty(&cache->completed_migrations) || | 1384 | !list_empty(&cache->completed_migrations) || |
1318 | !list_empty(&cache->need_commit_migrations); | 1385 | !list_empty(&cache->need_commit_migrations); |
@@ -1331,6 +1398,8 @@ static void do_worker(struct work_struct *ws) | |||
1331 | 1398 | ||
1332 | writeback_some_dirty_blocks(cache); | 1399 | writeback_some_dirty_blocks(cache); |
1333 | 1400 | ||
1401 | process_deferred_writethrough_bios(cache); | ||
1402 | |||
1334 | if (commit_if_needed(cache)) { | 1403 | if (commit_if_needed(cache)) { |
1335 | process_deferred_flush_bios(cache, false); | 1404 | process_deferred_flush_bios(cache, false); |
1336 | 1405 | ||
@@ -1756,8 +1825,11 @@ static int create_cache_policy(struct cache *cache, struct cache_args *ca, | |||
1756 | } | 1825 | } |
1757 | 1826 | ||
1758 | r = set_config_values(cache->policy, ca->policy_argc, ca->policy_argv); | 1827 | r = set_config_values(cache->policy, ca->policy_argc, ca->policy_argv); |
1759 | if (r) | 1828 | if (r) { |
1829 | *error = "Error setting cache policy's config values"; | ||
1760 | dm_cache_policy_destroy(cache->policy); | 1830 | dm_cache_policy_destroy(cache->policy); |
1831 | cache->policy = NULL; | ||
1832 | } | ||
1761 | 1833 | ||
1762 | return r; | 1834 | return r; |
1763 | } | 1835 | } |
@@ -1793,8 +1865,6 @@ static sector_t calculate_discard_block_size(sector_t cache_block_size, | |||
1793 | 1865 | ||
1794 | #define DEFAULT_MIGRATION_THRESHOLD (2048 * 100) | 1866 | #define DEFAULT_MIGRATION_THRESHOLD (2048 * 100) |
1795 | 1867 | ||
1796 | static unsigned cache_num_write_bios(struct dm_target *ti, struct bio *bio); | ||
1797 | |||
1798 | static int cache_create(struct cache_args *ca, struct cache **result) | 1868 | static int cache_create(struct cache_args *ca, struct cache **result) |
1799 | { | 1869 | { |
1800 | int r = 0; | 1870 | int r = 0; |
@@ -1821,9 +1891,6 @@ static int cache_create(struct cache_args *ca, struct cache **result) | |||
1821 | 1891 | ||
1822 | memcpy(&cache->features, &ca->features, sizeof(cache->features)); | 1892 | memcpy(&cache->features, &ca->features, sizeof(cache->features)); |
1823 | 1893 | ||
1824 | if (cache->features.write_through) | ||
1825 | ti->num_write_bios = cache_num_write_bios; | ||
1826 | |||
1827 | cache->callbacks.congested_fn = cache_is_congested; | 1894 | cache->callbacks.congested_fn = cache_is_congested; |
1828 | dm_table_add_target_callbacks(ti->table, &cache->callbacks); | 1895 | dm_table_add_target_callbacks(ti->table, &cache->callbacks); |
1829 | 1896 | ||
@@ -1835,7 +1902,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) | |||
1835 | 1902 | ||
1836 | /* FIXME: factor out this whole section */ | 1903 | /* FIXME: factor out this whole section */ |
1837 | origin_blocks = cache->origin_sectors = ca->origin_sectors; | 1904 | origin_blocks = cache->origin_sectors = ca->origin_sectors; |
1838 | (void) sector_div(origin_blocks, ca->block_size); | 1905 | origin_blocks = block_div(origin_blocks, ca->block_size); |
1839 | cache->origin_blocks = to_oblock(origin_blocks); | 1906 | cache->origin_blocks = to_oblock(origin_blocks); |
1840 | 1907 | ||
1841 | cache->sectors_per_block = ca->block_size; | 1908 | cache->sectors_per_block = ca->block_size; |
@@ -1848,7 +1915,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) | |||
1848 | dm_block_t cache_size = ca->cache_sectors; | 1915 | dm_block_t cache_size = ca->cache_sectors; |
1849 | 1916 | ||
1850 | cache->sectors_per_block_shift = -1; | 1917 | cache->sectors_per_block_shift = -1; |
1851 | (void) sector_div(cache_size, ca->block_size); | 1918 | cache_size = block_div(cache_size, ca->block_size); |
1852 | cache->cache_size = to_cblock(cache_size); | 1919 | cache->cache_size = to_cblock(cache_size); |
1853 | } else { | 1920 | } else { |
1854 | cache->sectors_per_block_shift = __ffs(ca->block_size); | 1921 | cache->sectors_per_block_shift = __ffs(ca->block_size); |
@@ -1873,6 +1940,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) | |||
1873 | spin_lock_init(&cache->lock); | 1940 | spin_lock_init(&cache->lock); |
1874 | bio_list_init(&cache->deferred_bios); | 1941 | bio_list_init(&cache->deferred_bios); |
1875 | bio_list_init(&cache->deferred_flush_bios); | 1942 | bio_list_init(&cache->deferred_flush_bios); |
1943 | bio_list_init(&cache->deferred_writethrough_bios); | ||
1876 | INIT_LIST_HEAD(&cache->quiesced_migrations); | 1944 | INIT_LIST_HEAD(&cache->quiesced_migrations); |
1877 | INIT_LIST_HEAD(&cache->completed_migrations); | 1945 | INIT_LIST_HEAD(&cache->completed_migrations); |
1878 | INIT_LIST_HEAD(&cache->need_commit_migrations); | 1946 | INIT_LIST_HEAD(&cache->need_commit_migrations); |
@@ -2002,6 +2070,8 @@ static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
2002 | goto out; | 2070 | goto out; |
2003 | 2071 | ||
2004 | r = cache_create(ca, &cache); | 2072 | r = cache_create(ca, &cache); |
2073 | if (r) | ||
2074 | goto out; | ||
2005 | 2075 | ||
2006 | r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3); | 2076 | r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3); |
2007 | if (r) { | 2077 | if (r) { |
@@ -2016,20 +2086,6 @@ out: | |||
2016 | return r; | 2086 | return r; |
2017 | } | 2087 | } |
2018 | 2088 | ||
2019 | static unsigned cache_num_write_bios(struct dm_target *ti, struct bio *bio) | ||
2020 | { | ||
2021 | int r; | ||
2022 | struct cache *cache = ti->private; | ||
2023 | dm_oblock_t block = get_bio_block(cache, bio); | ||
2024 | dm_cblock_t cblock; | ||
2025 | |||
2026 | r = policy_lookup(cache->policy, block, &cblock); | ||
2027 | if (r < 0) | ||
2028 | return 2; /* assume the worst */ | ||
2029 | |||
2030 | return (!r && !is_dirty(cache, cblock)) ? 2 : 1; | ||
2031 | } | ||
2032 | |||
2033 | static int cache_map(struct dm_target *ti, struct bio *bio) | 2089 | static int cache_map(struct dm_target *ti, struct bio *bio) |
2034 | { | 2090 | { |
2035 | struct cache *cache = ti->private; | 2091 | struct cache *cache = ti->private; |
@@ -2097,18 +2153,12 @@ static int cache_map(struct dm_target *ti, struct bio *bio) | |||
2097 | inc_hit_counter(cache, bio); | 2153 | inc_hit_counter(cache, bio); |
2098 | pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); | 2154 | pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); |
2099 | 2155 | ||
2100 | if (is_writethrough_io(cache, bio, lookup_result.cblock)) { | 2156 | if (is_writethrough_io(cache, bio, lookup_result.cblock)) |
2101 | /* | 2157 | remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); |
2102 | * No need to mark anything dirty in write through mode. | 2158 | else |
2103 | */ | ||
2104 | pb->req_nr == 0 ? | ||
2105 | remap_to_cache(cache, bio, lookup_result.cblock) : | ||
2106 | remap_to_origin_clear_discard(cache, bio, block); | ||
2107 | cell_defer(cache, cell, false); | ||
2108 | } else { | ||
2109 | remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); | 2159 | remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); |
2110 | cell_defer(cache, cell, false); | 2160 | |
2111 | } | 2161 | cell_defer(cache, cell, false); |
2112 | break; | 2162 | break; |
2113 | 2163 | ||
2114 | case POLICY_MISS: | 2164 | case POLICY_MISS: |
@@ -2319,8 +2369,7 @@ static int cache_preresume(struct dm_target *ti) | |||
2319 | } | 2369 | } |
2320 | 2370 | ||
2321 | if (!cache->loaded_mappings) { | 2371 | if (!cache->loaded_mappings) { |
2322 | r = dm_cache_load_mappings(cache->cmd, | 2372 | r = dm_cache_load_mappings(cache->cmd, cache->policy, |
2323 | dm_cache_policy_get_name(cache->policy), | ||
2324 | load_mapping, cache); | 2373 | load_mapping, cache); |
2325 | if (r) { | 2374 | if (r) { |
2326 | DMERR("could not load cache mappings"); | 2375 | DMERR("could not load cache mappings"); |
@@ -2535,7 +2584,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) | |||
2535 | 2584 | ||
2536 | static struct target_type cache_target = { | 2585 | static struct target_type cache_target = { |
2537 | .name = "cache", | 2586 | .name = "cache", |
2538 | .version = {1, 0, 0}, | 2587 | .version = {1, 1, 0}, |
2539 | .module = THIS_MODULE, | 2588 | .module = THIS_MODULE, |
2540 | .ctr = cache_ctr, | 2589 | .ctr = cache_ctr, |
2541 | .dtr = cache_dtr, | 2590 | .dtr = cache_dtr, |
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 9a01d1e4c783..311e3d35b272 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c | |||
@@ -91,15 +91,44 @@ static struct raid_type { | |||
91 | {"raid6_nc", "RAID6 (N continue)", 2, 4, 6, ALGORITHM_ROTATING_N_CONTINUE} | 91 | {"raid6_nc", "RAID6 (N continue)", 2, 4, 6, ALGORITHM_ROTATING_N_CONTINUE} |
92 | }; | 92 | }; |
93 | 93 | ||
94 | static char *raid10_md_layout_to_format(int layout) | ||
95 | { | ||
96 | /* | ||
97 | * Bit 16 and 17 stand for "offset" and "use_far_sets" | ||
98 | * Refer to MD's raid10.c for details | ||
99 | */ | ||
100 | if ((layout & 0x10000) && (layout & 0x20000)) | ||
101 | return "offset"; | ||
102 | |||
103 | if ((layout & 0xFF) > 1) | ||
104 | return "near"; | ||
105 | |||
106 | return "far"; | ||
107 | } | ||
108 | |||
94 | static unsigned raid10_md_layout_to_copies(int layout) | 109 | static unsigned raid10_md_layout_to_copies(int layout) |
95 | { | 110 | { |
96 | return layout & 0xFF; | 111 | if ((layout & 0xFF) > 1) |
112 | return layout & 0xFF; | ||
113 | return (layout >> 8) & 0xFF; | ||
97 | } | 114 | } |
98 | 115 | ||
99 | static int raid10_format_to_md_layout(char *format, unsigned copies) | 116 | static int raid10_format_to_md_layout(char *format, unsigned copies) |
100 | { | 117 | { |
101 | /* 1 "far" copy, and 'copies' "near" copies */ | 118 | unsigned n = 1, f = 1; |
102 | return (1 << 8) | (copies & 0xFF); | 119 | |
120 | if (!strcmp("near", format)) | ||
121 | n = copies; | ||
122 | else | ||
123 | f = copies; | ||
124 | |||
125 | if (!strcmp("offset", format)) | ||
126 | return 0x30000 | (f << 8) | n; | ||
127 | |||
128 | if (!strcmp("far", format)) | ||
129 | return 0x20000 | (f << 8) | n; | ||
130 | |||
131 | return (f << 8) | n; | ||
103 | } | 132 | } |
104 | 133 | ||
105 | static struct raid_type *get_raid_type(char *name) | 134 | static struct raid_type *get_raid_type(char *name) |
@@ -352,6 +381,7 @@ static int validate_raid_redundancy(struct raid_set *rs) | |||
352 | { | 381 | { |
353 | unsigned i, rebuild_cnt = 0; | 382 | unsigned i, rebuild_cnt = 0; |
354 | unsigned rebuilds_per_group, copies, d; | 383 | unsigned rebuilds_per_group, copies, d; |
384 | unsigned group_size, last_group_start; | ||
355 | 385 | ||
356 | for (i = 0; i < rs->md.raid_disks; i++) | 386 | for (i = 0; i < rs->md.raid_disks; i++) |
357 | if (!test_bit(In_sync, &rs->dev[i].rdev.flags) || | 387 | if (!test_bit(In_sync, &rs->dev[i].rdev.flags) || |
@@ -379,9 +409,6 @@ static int validate_raid_redundancy(struct raid_set *rs) | |||
379 | * as long as the failed devices occur in different mirror | 409 | * as long as the failed devices occur in different mirror |
380 | * groups (i.e. different stripes). | 410 | * groups (i.e. different stripes). |
381 | * | 411 | * |
382 | * Right now, we only allow for "near" copies. When other | ||
383 | * formats are added, we will have to check those too. | ||
384 | * | ||
385 | * When checking "near" format, make sure no adjacent devices | 412 | * When checking "near" format, make sure no adjacent devices |
386 | * have failed beyond what can be handled. In addition to the | 413 | * have failed beyond what can be handled. In addition to the |
387 | * simple case where the number of devices is a multiple of the | 414 | * simple case where the number of devices is a multiple of the |
@@ -391,14 +418,41 @@ static int validate_raid_redundancy(struct raid_set *rs) | |||
391 | * A A B B C | 418 | * A A B B C |
392 | * C D D E E | 419 | * C D D E E |
393 | */ | 420 | */ |
394 | for (i = 0; i < rs->md.raid_disks * copies; i++) { | 421 | if (!strcmp("near", raid10_md_layout_to_format(rs->md.layout))) { |
395 | if (!(i % copies)) | 422 | for (i = 0; i < rs->md.raid_disks * copies; i++) { |
423 | if (!(i % copies)) | ||
424 | rebuilds_per_group = 0; | ||
425 | d = i % rs->md.raid_disks; | ||
426 | if ((!rs->dev[d].rdev.sb_page || | ||
427 | !test_bit(In_sync, &rs->dev[d].rdev.flags)) && | ||
428 | (++rebuilds_per_group >= copies)) | ||
429 | goto too_many; | ||
430 | } | ||
431 | break; | ||
432 | } | ||
433 | |||
434 | /* | ||
435 | * When checking "far" and "offset" formats, we need to ensure | ||
436 | * that the device that holds its copy is not also dead or | ||
437 | * being rebuilt. (Note that "far" and "offset" formats only | ||
438 | * support two copies right now. These formats also only ever | ||
439 | * use the 'use_far_sets' variant.) | ||
440 | * | ||
441 | * This check is somewhat complicated by the need to account | ||
442 | * for arrays that are not a multiple of (far) copies. This | ||
443 | * results in the need to treat the last (potentially larger) | ||
444 | * set differently. | ||
445 | */ | ||
446 | group_size = (rs->md.raid_disks / copies); | ||
447 | last_group_start = (rs->md.raid_disks / group_size) - 1; | ||
448 | last_group_start *= group_size; | ||
449 | for (i = 0; i < rs->md.raid_disks; i++) { | ||
450 | if (!(i % copies) && !(i > last_group_start)) | ||
396 | rebuilds_per_group = 0; | 451 | rebuilds_per_group = 0; |
397 | d = i % rs->md.raid_disks; | 452 | if ((!rs->dev[i].rdev.sb_page || |
398 | if ((!rs->dev[d].rdev.sb_page || | 453 | !test_bit(In_sync, &rs->dev[i].rdev.flags)) && |
399 | !test_bit(In_sync, &rs->dev[d].rdev.flags)) && | ||
400 | (++rebuilds_per_group >= copies)) | 454 | (++rebuilds_per_group >= copies)) |
401 | goto too_many; | 455 | goto too_many; |
402 | } | 456 | } |
403 | break; | 457 | break; |
404 | default: | 458 | default: |
@@ -433,7 +487,7 @@ too_many: | |||
433 | * | 487 | * |
434 | * RAID10-only options: | 488 | * RAID10-only options: |
435 | * [raid10_copies <# copies>] Number of copies. (Default: 2) | 489 | * [raid10_copies <# copies>] Number of copies. (Default: 2) |
436 | * [raid10_format <near>] Layout algorithm. (Default: near) | 490 | * [raid10_format <near|far|offset>] Layout algorithm. (Default: near) |
437 | */ | 491 | */ |
438 | static int parse_raid_params(struct raid_set *rs, char **argv, | 492 | static int parse_raid_params(struct raid_set *rs, char **argv, |
439 | unsigned num_raid_params) | 493 | unsigned num_raid_params) |
@@ -520,7 +574,9 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
520 | rs->ti->error = "'raid10_format' is an invalid parameter for this RAID type"; | 574 | rs->ti->error = "'raid10_format' is an invalid parameter for this RAID type"; |
521 | return -EINVAL; | 575 | return -EINVAL; |
522 | } | 576 | } |
523 | if (strcmp("near", argv[i])) { | 577 | if (strcmp("near", argv[i]) && |
578 | strcmp("far", argv[i]) && | ||
579 | strcmp("offset", argv[i])) { | ||
524 | rs->ti->error = "Invalid 'raid10_format' value given"; | 580 | rs->ti->error = "Invalid 'raid10_format' value given"; |
525 | return -EINVAL; | 581 | return -EINVAL; |
526 | } | 582 | } |
@@ -644,6 +700,15 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
644 | return -EINVAL; | 700 | return -EINVAL; |
645 | } | 701 | } |
646 | 702 | ||
703 | /* | ||
704 | * If the format is not "near", we only support | ||
705 | * two copies at the moment. | ||
706 | */ | ||
707 | if (strcmp("near", raid10_format) && (raid10_copies > 2)) { | ||
708 | rs->ti->error = "Too many copies for given RAID10 format."; | ||
709 | return -EINVAL; | ||
710 | } | ||
711 | |||
647 | /* (Len * #mirrors) / #devices */ | 712 | /* (Len * #mirrors) / #devices */ |
648 | sectors_per_dev = rs->ti->len * raid10_copies; | 713 | sectors_per_dev = rs->ti->len * raid10_copies; |
649 | sector_div(sectors_per_dev, rs->md.raid_disks); | 714 | sector_div(sectors_per_dev, rs->md.raid_disks); |
@@ -854,17 +919,30 @@ static int super_init_validation(struct mddev *mddev, struct md_rdev *rdev) | |||
854 | /* | 919 | /* |
855 | * Reshaping is not currently allowed | 920 | * Reshaping is not currently allowed |
856 | */ | 921 | */ |
857 | if ((le32_to_cpu(sb->level) != mddev->level) || | 922 | if (le32_to_cpu(sb->level) != mddev->level) { |
858 | (le32_to_cpu(sb->layout) != mddev->layout) || | 923 | DMERR("Reshaping arrays not yet supported. (RAID level change)"); |
859 | (le32_to_cpu(sb->stripe_sectors) != mddev->chunk_sectors)) { | 924 | return -EINVAL; |
860 | DMERR("Reshaping arrays not yet supported."); | 925 | } |
926 | if (le32_to_cpu(sb->layout) != mddev->layout) { | ||
927 | DMERR("Reshaping arrays not yet supported. (RAID layout change)"); | ||
928 | DMERR(" 0x%X vs 0x%X", le32_to_cpu(sb->layout), mddev->layout); | ||
929 | DMERR(" Old layout: %s w/ %d copies", | ||
930 | raid10_md_layout_to_format(le32_to_cpu(sb->layout)), | ||
931 | raid10_md_layout_to_copies(le32_to_cpu(sb->layout))); | ||
932 | DMERR(" New layout: %s w/ %d copies", | ||
933 | raid10_md_layout_to_format(mddev->layout), | ||
934 | raid10_md_layout_to_copies(mddev->layout)); | ||
935 | return -EINVAL; | ||
936 | } | ||
937 | if (le32_to_cpu(sb->stripe_sectors) != mddev->chunk_sectors) { | ||
938 | DMERR("Reshaping arrays not yet supported. (stripe sectors change)"); | ||
861 | return -EINVAL; | 939 | return -EINVAL; |
862 | } | 940 | } |
863 | 941 | ||
864 | /* We can only change the number of devices in RAID1 right now */ | 942 | /* We can only change the number of devices in RAID1 right now */ |
865 | if ((rs->raid_type->level != 1) && | 943 | if ((rs->raid_type->level != 1) && |
866 | (le32_to_cpu(sb->num_devices) != mddev->raid_disks)) { | 944 | (le32_to_cpu(sb->num_devices) != mddev->raid_disks)) { |
867 | DMERR("Reshaping arrays not yet supported."); | 945 | DMERR("Reshaping arrays not yet supported. (device count change)"); |
868 | return -EINVAL; | 946 | return -EINVAL; |
869 | } | 947 | } |
870 | 948 | ||
@@ -1329,7 +1407,8 @@ static void raid_status(struct dm_target *ti, status_type_t type, | |||
1329 | raid10_md_layout_to_copies(rs->md.layout)); | 1407 | raid10_md_layout_to_copies(rs->md.layout)); |
1330 | 1408 | ||
1331 | if (rs->print_flags & DMPF_RAID10_FORMAT) | 1409 | if (rs->print_flags & DMPF_RAID10_FORMAT) |
1332 | DMEMIT(" raid10_format near"); | 1410 | DMEMIT(" raid10_format %s", |
1411 | raid10_md_layout_to_format(rs->md.layout)); | ||
1333 | 1412 | ||
1334 | DMEMIT(" %d", rs->md.raid_disks); | 1413 | DMEMIT(" %d", rs->md.raid_disks); |
1335 | for (i = 0; i < rs->md.raid_disks; i++) { | 1414 | for (i = 0; i < rs->md.raid_disks; i++) { |
@@ -1418,6 +1497,10 @@ static struct target_type raid_target = { | |||
1418 | 1497 | ||
1419 | static int __init dm_raid_init(void) | 1498 | static int __init dm_raid_init(void) |
1420 | { | 1499 | { |
1500 | DMINFO("Loading target version %u.%u.%u", | ||
1501 | raid_target.version[0], | ||
1502 | raid_target.version[1], | ||
1503 | raid_target.version[2]); | ||
1421 | return dm_register_target(&raid_target); | 1504 | return dm_register_target(&raid_target); |
1422 | } | 1505 | } |
1423 | 1506 | ||
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 009339d62828..004ad1652b73 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c | |||
@@ -1577,6 +1577,11 @@ static bool data_dev_supports_discard(struct pool_c *pt) | |||
1577 | return q && blk_queue_discard(q); | 1577 | return q && blk_queue_discard(q); |
1578 | } | 1578 | } |
1579 | 1579 | ||
1580 | static bool is_factor(sector_t block_size, uint32_t n) | ||
1581 | { | ||
1582 | return !sector_div(block_size, n); | ||
1583 | } | ||
1584 | |||
1580 | /* | 1585 | /* |
1581 | * If discard_passdown was enabled verify that the data device | 1586 | * If discard_passdown was enabled verify that the data device |
1582 | * supports discards. Disable discard_passdown if not. | 1587 | * supports discards. Disable discard_passdown if not. |
@@ -1602,7 +1607,7 @@ static void disable_passdown_if_not_supported(struct pool_c *pt) | |||
1602 | else if (data_limits->discard_granularity > block_size) | 1607 | else if (data_limits->discard_granularity > block_size) |
1603 | reason = "discard granularity larger than a block"; | 1608 | reason = "discard granularity larger than a block"; |
1604 | 1609 | ||
1605 | else if (block_size & (data_limits->discard_granularity - 1)) | 1610 | else if (!is_factor(block_size, data_limits->discard_granularity)) |
1606 | reason = "discard granularity not a factor of block size"; | 1611 | reason = "discard granularity not a factor of block size"; |
1607 | 1612 | ||
1608 | if (reason) { | 1613 | if (reason) { |
@@ -2544,7 +2549,7 @@ static struct target_type pool_target = { | |||
2544 | .name = "thin-pool", | 2549 | .name = "thin-pool", |
2545 | .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | | 2550 | .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | |
2546 | DM_TARGET_IMMUTABLE, | 2551 | DM_TARGET_IMMUTABLE, |
2547 | .version = {1, 6, 1}, | 2552 | .version = {1, 7, 0}, |
2548 | .module = THIS_MODULE, | 2553 | .module = THIS_MODULE, |
2549 | .ctr = pool_ctr, | 2554 | .ctr = pool_ctr, |
2550 | .dtr = pool_dtr, | 2555 | .dtr = pool_dtr, |
@@ -2831,7 +2836,7 @@ static int thin_iterate_devices(struct dm_target *ti, | |||
2831 | 2836 | ||
2832 | static struct target_type thin_target = { | 2837 | static struct target_type thin_target = { |
2833 | .name = "thin", | 2838 | .name = "thin", |
2834 | .version = {1, 7, 1}, | 2839 | .version = {1, 8, 0}, |
2835 | .module = THIS_MODULE, | 2840 | .module = THIS_MODULE, |
2836 | .ctr = thin_ctr, | 2841 | .ctr = thin_ctr, |
2837 | .dtr = thin_dtr, | 2842 | .dtr = thin_dtr, |
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 6ad538375c3c..a746f1d21c66 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c | |||
@@ -93,6 +93,13 @@ struct dm_verity_io { | |||
93 | */ | 93 | */ |
94 | }; | 94 | }; |
95 | 95 | ||
96 | struct dm_verity_prefetch_work { | ||
97 | struct work_struct work; | ||
98 | struct dm_verity *v; | ||
99 | sector_t block; | ||
100 | unsigned n_blocks; | ||
101 | }; | ||
102 | |||
96 | static struct shash_desc *io_hash_desc(struct dm_verity *v, struct dm_verity_io *io) | 103 | static struct shash_desc *io_hash_desc(struct dm_verity *v, struct dm_verity_io *io) |
97 | { | 104 | { |
98 | return (struct shash_desc *)(io + 1); | 105 | return (struct shash_desc *)(io + 1); |
@@ -424,15 +431,18 @@ static void verity_end_io(struct bio *bio, int error) | |||
424 | * The root buffer is not prefetched, it is assumed that it will be cached | 431 | * The root buffer is not prefetched, it is assumed that it will be cached |
425 | * all the time. | 432 | * all the time. |
426 | */ | 433 | */ |
427 | static void verity_prefetch_io(struct dm_verity *v, struct dm_verity_io *io) | 434 | static void verity_prefetch_io(struct work_struct *work) |
428 | { | 435 | { |
436 | struct dm_verity_prefetch_work *pw = | ||
437 | container_of(work, struct dm_verity_prefetch_work, work); | ||
438 | struct dm_verity *v = pw->v; | ||
429 | int i; | 439 | int i; |
430 | 440 | ||
431 | for (i = v->levels - 2; i >= 0; i--) { | 441 | for (i = v->levels - 2; i >= 0; i--) { |
432 | sector_t hash_block_start; | 442 | sector_t hash_block_start; |
433 | sector_t hash_block_end; | 443 | sector_t hash_block_end; |
434 | verity_hash_at_level(v, io->block, i, &hash_block_start, NULL); | 444 | verity_hash_at_level(v, pw->block, i, &hash_block_start, NULL); |
435 | verity_hash_at_level(v, io->block + io->n_blocks - 1, i, &hash_block_end, NULL); | 445 | verity_hash_at_level(v, pw->block + pw->n_blocks - 1, i, &hash_block_end, NULL); |
436 | if (!i) { | 446 | if (!i) { |
437 | unsigned cluster = ACCESS_ONCE(dm_verity_prefetch_cluster); | 447 | unsigned cluster = ACCESS_ONCE(dm_verity_prefetch_cluster); |
438 | 448 | ||
@@ -452,6 +462,25 @@ no_prefetch_cluster: | |||
452 | dm_bufio_prefetch(v->bufio, hash_block_start, | 462 | dm_bufio_prefetch(v->bufio, hash_block_start, |
453 | hash_block_end - hash_block_start + 1); | 463 | hash_block_end - hash_block_start + 1); |
454 | } | 464 | } |
465 | |||
466 | kfree(pw); | ||
467 | } | ||
468 | |||
469 | static void verity_submit_prefetch(struct dm_verity *v, struct dm_verity_io *io) | ||
470 | { | ||
471 | struct dm_verity_prefetch_work *pw; | ||
472 | |||
473 | pw = kmalloc(sizeof(struct dm_verity_prefetch_work), | ||
474 | GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); | ||
475 | |||
476 | if (!pw) | ||
477 | return; | ||
478 | |||
479 | INIT_WORK(&pw->work, verity_prefetch_io); | ||
480 | pw->v = v; | ||
481 | pw->block = io->block; | ||
482 | pw->n_blocks = io->n_blocks; | ||
483 | queue_work(v->verify_wq, &pw->work); | ||
455 | } | 484 | } |
456 | 485 | ||
457 | /* | 486 | /* |
@@ -498,7 +527,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio) | |||
498 | memcpy(io->io_vec, bio_iovec(bio), | 527 | memcpy(io->io_vec, bio_iovec(bio), |
499 | io->io_vec_size * sizeof(struct bio_vec)); | 528 | io->io_vec_size * sizeof(struct bio_vec)); |
500 | 529 | ||
501 | verity_prefetch_io(v, io); | 530 | verity_submit_prefetch(v, io); |
502 | 531 | ||
503 | generic_make_request(bio); | 532 | generic_make_request(bio); |
504 | 533 | ||
@@ -858,7 +887,7 @@ bad: | |||
858 | 887 | ||
859 | static struct target_type verity_target = { | 888 | static struct target_type verity_target = { |
860 | .name = "verity", | 889 | .name = "verity", |
861 | .version = {1, 1, 1}, | 890 | .version = {1, 2, 0}, |
862 | .module = THIS_MODULE, | 891 | .module = THIS_MODULE, |
863 | .ctr = verity_ctr, | 892 | .ctr = verity_ctr, |
864 | .dtr = verity_dtr, | 893 | .dtr = verity_dtr, |
diff --git a/drivers/md/md.c b/drivers/md/md.c index 3db3d1b271f7..aeceedfc530b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -307,6 +307,10 @@ static void md_make_request(struct request_queue *q, struct bio *bio) | |||
307 | bio_io_error(bio); | 307 | bio_io_error(bio); |
308 | return; | 308 | return; |
309 | } | 309 | } |
310 | if (mddev->ro == 1 && unlikely(rw == WRITE)) { | ||
311 | bio_endio(bio, bio_sectors(bio) == 0 ? 0 : -EROFS); | ||
312 | return; | ||
313 | } | ||
310 | smp_rmb(); /* Ensure implications of 'active' are visible */ | 314 | smp_rmb(); /* Ensure implications of 'active' are visible */ |
311 | rcu_read_lock(); | 315 | rcu_read_lock(); |
312 | if (mddev->suspended) { | 316 | if (mddev->suspended) { |
@@ -2994,6 +2998,9 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len) | |||
2994 | } else if (!sectors) | 2998 | } else if (!sectors) |
2995 | sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) - | 2999 | sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) - |
2996 | rdev->data_offset; | 3000 | rdev->data_offset; |
3001 | if (!my_mddev->pers->resize) | ||
3002 | /* Cannot change size for RAID0 or Linear etc */ | ||
3003 | return -EINVAL; | ||
2997 | } | 3004 | } |
2998 | if (sectors < my_mddev->dev_sectors) | 3005 | if (sectors < my_mddev->dev_sectors) |
2999 | return -EINVAL; /* component must fit device */ | 3006 | return -EINVAL; /* component must fit device */ |
@@ -6525,7 +6532,17 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, | |||
6525 | mddev->ro = 0; | 6532 | mddev->ro = 0; |
6526 | sysfs_notify_dirent_safe(mddev->sysfs_state); | 6533 | sysfs_notify_dirent_safe(mddev->sysfs_state); |
6527 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 6534 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
6528 | md_wakeup_thread(mddev->thread); | 6535 | /* mddev_unlock will wake thread */ |
6536 | /* If a device failed while we were read-only, we | ||
6537 | * need to make sure the metadata is updated now. | ||
6538 | */ | ||
6539 | if (test_bit(MD_CHANGE_DEVS, &mddev->flags)) { | ||
6540 | mddev_unlock(mddev); | ||
6541 | wait_event(mddev->sb_wait, | ||
6542 | !test_bit(MD_CHANGE_DEVS, &mddev->flags) && | ||
6543 | !test_bit(MD_CHANGE_PENDING, &mddev->flags)); | ||
6544 | mddev_lock(mddev); | ||
6545 | } | ||
6529 | } else { | 6546 | } else { |
6530 | err = -EROFS; | 6547 | err = -EROFS; |
6531 | goto abort_unlock; | 6548 | goto abort_unlock; |
@@ -7646,10 +7663,8 @@ static int remove_and_add_spares(struct mddev *mddev) | |||
7646 | removed++; | 7663 | removed++; |
7647 | } | 7664 | } |
7648 | } | 7665 | } |
7649 | if (removed) | 7666 | if (removed && mddev->kobj.sd) |
7650 | sysfs_notify(&mddev->kobj, NULL, | 7667 | sysfs_notify(&mddev->kobj, NULL, "degraded"); |
7651 | "degraded"); | ||
7652 | |||
7653 | 7668 | ||
7654 | rdev_for_each(rdev, mddev) { | 7669 | rdev_for_each(rdev, mddev) { |
7655 | if (rdev->raid_disk >= 0 && | 7670 | if (rdev->raid_disk >= 0 && |
diff --git a/drivers/md/md.h b/drivers/md/md.h index eca59c3074ef..d90fb1a879e1 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -506,7 +506,7 @@ static inline char * mdname (struct mddev * mddev) | |||
506 | static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev) | 506 | static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev) |
507 | { | 507 | { |
508 | char nm[20]; | 508 | char nm[20]; |
509 | if (!test_bit(Replacement, &rdev->flags)) { | 509 | if (!test_bit(Replacement, &rdev->flags) && mddev->kobj.sd) { |
510 | sprintf(nm, "rd%d", rdev->raid_disk); | 510 | sprintf(nm, "rd%d", rdev->raid_disk); |
511 | return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm); | 511 | return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm); |
512 | } else | 512 | } else |
@@ -516,7 +516,7 @@ static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev) | |||
516 | static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev) | 516 | static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev) |
517 | { | 517 | { |
518 | char nm[20]; | 518 | char nm[20]; |
519 | if (!test_bit(Replacement, &rdev->flags)) { | 519 | if (!test_bit(Replacement, &rdev->flags) && mddev->kobj.sd) { |
520 | sprintf(nm, "rd%d", rdev->raid_disk); | 520 | sprintf(nm, "rd%d", rdev->raid_disk); |
521 | sysfs_remove_link(&mddev->kobj, nm); | 521 | sysfs_remove_link(&mddev->kobj, nm); |
522 | } | 522 | } |
diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index c4f28133ef82..b88757cd0d1d 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c | |||
@@ -139,15 +139,8 @@ struct child { | |||
139 | struct btree_node *n; | 139 | struct btree_node *n; |
140 | }; | 140 | }; |
141 | 141 | ||
142 | static struct dm_btree_value_type le64_type = { | 142 | static int init_child(struct dm_btree_info *info, struct dm_btree_value_type *vt, |
143 | .context = NULL, | 143 | struct btree_node *parent, |
144 | .size = sizeof(__le64), | ||
145 | .inc = NULL, | ||
146 | .dec = NULL, | ||
147 | .equal = NULL | ||
148 | }; | ||
149 | |||
150 | static int init_child(struct dm_btree_info *info, struct btree_node *parent, | ||
151 | unsigned index, struct child *result) | 144 | unsigned index, struct child *result) |
152 | { | 145 | { |
153 | int r, inc; | 146 | int r, inc; |
@@ -164,7 +157,7 @@ static int init_child(struct dm_btree_info *info, struct btree_node *parent, | |||
164 | result->n = dm_block_data(result->block); | 157 | result->n = dm_block_data(result->block); |
165 | 158 | ||
166 | if (inc) | 159 | if (inc) |
167 | inc_children(info->tm, result->n, &le64_type); | 160 | inc_children(info->tm, result->n, vt); |
168 | 161 | ||
169 | *((__le64 *) value_ptr(parent, index)) = | 162 | *((__le64 *) value_ptr(parent, index)) = |
170 | cpu_to_le64(dm_block_location(result->block)); | 163 | cpu_to_le64(dm_block_location(result->block)); |
@@ -236,7 +229,7 @@ static void __rebalance2(struct dm_btree_info *info, struct btree_node *parent, | |||
236 | } | 229 | } |
237 | 230 | ||
238 | static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info, | 231 | static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info, |
239 | unsigned left_index) | 232 | struct dm_btree_value_type *vt, unsigned left_index) |
240 | { | 233 | { |
241 | int r; | 234 | int r; |
242 | struct btree_node *parent; | 235 | struct btree_node *parent; |
@@ -244,11 +237,11 @@ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info, | |||
244 | 237 | ||
245 | parent = dm_block_data(shadow_current(s)); | 238 | parent = dm_block_data(shadow_current(s)); |
246 | 239 | ||
247 | r = init_child(info, parent, left_index, &left); | 240 | r = init_child(info, vt, parent, left_index, &left); |
248 | if (r) | 241 | if (r) |
249 | return r; | 242 | return r; |
250 | 243 | ||
251 | r = init_child(info, parent, left_index + 1, &right); | 244 | r = init_child(info, vt, parent, left_index + 1, &right); |
252 | if (r) { | 245 | if (r) { |
253 | exit_child(info, &left); | 246 | exit_child(info, &left); |
254 | return r; | 247 | return r; |
@@ -368,7 +361,7 @@ static void __rebalance3(struct dm_btree_info *info, struct btree_node *parent, | |||
368 | } | 361 | } |
369 | 362 | ||
370 | static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info, | 363 | static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info, |
371 | unsigned left_index) | 364 | struct dm_btree_value_type *vt, unsigned left_index) |
372 | { | 365 | { |
373 | int r; | 366 | int r; |
374 | struct btree_node *parent = dm_block_data(shadow_current(s)); | 367 | struct btree_node *parent = dm_block_data(shadow_current(s)); |
@@ -377,17 +370,17 @@ static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info, | |||
377 | /* | 370 | /* |
378 | * FIXME: fill out an array? | 371 | * FIXME: fill out an array? |
379 | */ | 372 | */ |
380 | r = init_child(info, parent, left_index, &left); | 373 | r = init_child(info, vt, parent, left_index, &left); |
381 | if (r) | 374 | if (r) |
382 | return r; | 375 | return r; |
383 | 376 | ||
384 | r = init_child(info, parent, left_index + 1, ¢er); | 377 | r = init_child(info, vt, parent, left_index + 1, ¢er); |
385 | if (r) { | 378 | if (r) { |
386 | exit_child(info, &left); | 379 | exit_child(info, &left); |
387 | return r; | 380 | return r; |
388 | } | 381 | } |
389 | 382 | ||
390 | r = init_child(info, parent, left_index + 2, &right); | 383 | r = init_child(info, vt, parent, left_index + 2, &right); |
391 | if (r) { | 384 | if (r) { |
392 | exit_child(info, &left); | 385 | exit_child(info, &left); |
393 | exit_child(info, ¢er); | 386 | exit_child(info, ¢er); |
@@ -434,7 +427,8 @@ static int get_nr_entries(struct dm_transaction_manager *tm, | |||
434 | } | 427 | } |
435 | 428 | ||
436 | static int rebalance_children(struct shadow_spine *s, | 429 | static int rebalance_children(struct shadow_spine *s, |
437 | struct dm_btree_info *info, uint64_t key) | 430 | struct dm_btree_info *info, |
431 | struct dm_btree_value_type *vt, uint64_t key) | ||
438 | { | 432 | { |
439 | int i, r, has_left_sibling, has_right_sibling; | 433 | int i, r, has_left_sibling, has_right_sibling; |
440 | uint32_t child_entries; | 434 | uint32_t child_entries; |
@@ -472,13 +466,13 @@ static int rebalance_children(struct shadow_spine *s, | |||
472 | has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1); | 466 | has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1); |
473 | 467 | ||
474 | if (!has_left_sibling) | 468 | if (!has_left_sibling) |
475 | r = rebalance2(s, info, i); | 469 | r = rebalance2(s, info, vt, i); |
476 | 470 | ||
477 | else if (!has_right_sibling) | 471 | else if (!has_right_sibling) |
478 | r = rebalance2(s, info, i - 1); | 472 | r = rebalance2(s, info, vt, i - 1); |
479 | 473 | ||
480 | else | 474 | else |
481 | r = rebalance3(s, info, i - 1); | 475 | r = rebalance3(s, info, vt, i - 1); |
482 | 476 | ||
483 | return r; | 477 | return r; |
484 | } | 478 | } |
@@ -529,7 +523,7 @@ static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info, | |||
529 | if (le32_to_cpu(n->header.flags) & LEAF_NODE) | 523 | if (le32_to_cpu(n->header.flags) & LEAF_NODE) |
530 | return do_leaf(n, key, index); | 524 | return do_leaf(n, key, index); |
531 | 525 | ||
532 | r = rebalance_children(s, info, key); | 526 | r = rebalance_children(s, info, vt, key); |
533 | if (r) | 527 | if (r) |
534 | break; | 528 | break; |
535 | 529 | ||
@@ -550,6 +544,14 @@ static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info, | |||
550 | return r; | 544 | return r; |
551 | } | 545 | } |
552 | 546 | ||
547 | static struct dm_btree_value_type le64_type = { | ||
548 | .context = NULL, | ||
549 | .size = sizeof(__le64), | ||
550 | .inc = NULL, | ||
551 | .dec = NULL, | ||
552 | .equal = NULL | ||
553 | }; | ||
554 | |||
553 | int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, | 555 | int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, |
554 | uint64_t *keys, dm_block_t *new_root) | 556 | uint64_t *keys, dm_block_t *new_root) |
555 | { | 557 | { |
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 24b359717a7e..0505452de8d6 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c | |||
@@ -175,7 +175,13 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) | |||
175 | rdev1->new_raid_disk = j; | 175 | rdev1->new_raid_disk = j; |
176 | } | 176 | } |
177 | 177 | ||
178 | if (j < 0 || j >= mddev->raid_disks) { | 178 | if (j < 0) { |
179 | printk(KERN_ERR | ||
180 | "md/raid0:%s: remove inactive devices before converting to RAID0\n", | ||
181 | mdname(mddev)); | ||
182 | goto abort; | ||
183 | } | ||
184 | if (j >= mddev->raid_disks) { | ||
179 | printk(KERN_ERR "md/raid0:%s: bad disk number %d - " | 185 | printk(KERN_ERR "md/raid0:%s: bad disk number %d - " |
180 | "aborting!\n", mdname(mddev), j); | 186 | "aborting!\n", mdname(mddev), j); |
181 | goto abort; | 187 | goto abort; |
@@ -289,7 +295,7 @@ abort: | |||
289 | kfree(conf->strip_zone); | 295 | kfree(conf->strip_zone); |
290 | kfree(conf->devlist); | 296 | kfree(conf->devlist); |
291 | kfree(conf); | 297 | kfree(conf); |
292 | *private_conf = NULL; | 298 | *private_conf = ERR_PTR(err); |
293 | return err; | 299 | return err; |
294 | } | 300 | } |
295 | 301 | ||
@@ -411,7 +417,8 @@ static sector_t raid0_size(struct mddev *mddev, sector_t sectors, int raid_disks | |||
411 | "%s does not support generic reshape\n", __func__); | 417 | "%s does not support generic reshape\n", __func__); |
412 | 418 | ||
413 | rdev_for_each(rdev, mddev) | 419 | rdev_for_each(rdev, mddev) |
414 | array_sectors += rdev->sectors; | 420 | array_sectors += (rdev->sectors & |
421 | ~(sector_t)(mddev->chunk_sectors-1)); | ||
415 | 422 | ||
416 | return array_sectors; | 423 | return array_sectors; |
417 | } | 424 | } |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index d5bddfc4010e..fd86b372692d 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -967,6 +967,7 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule) | |||
967 | bio_list_merge(&conf->pending_bio_list, &plug->pending); | 967 | bio_list_merge(&conf->pending_bio_list, &plug->pending); |
968 | conf->pending_count += plug->pending_cnt; | 968 | conf->pending_count += plug->pending_cnt; |
969 | spin_unlock_irq(&conf->device_lock); | 969 | spin_unlock_irq(&conf->device_lock); |
970 | wake_up(&conf->wait_barrier); | ||
970 | md_wakeup_thread(mddev->thread); | 971 | md_wakeup_thread(mddev->thread); |
971 | kfree(plug); | 972 | kfree(plug); |
972 | return; | 973 | return; |
@@ -1000,6 +1001,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) | |||
1000 | const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); | 1001 | const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); |
1001 | const unsigned long do_discard = (bio->bi_rw | 1002 | const unsigned long do_discard = (bio->bi_rw |
1002 | & (REQ_DISCARD | REQ_SECURE)); | 1003 | & (REQ_DISCARD | REQ_SECURE)); |
1004 | const unsigned long do_same = (bio->bi_rw & REQ_WRITE_SAME); | ||
1003 | struct md_rdev *blocked_rdev; | 1005 | struct md_rdev *blocked_rdev; |
1004 | struct blk_plug_cb *cb; | 1006 | struct blk_plug_cb *cb; |
1005 | struct raid1_plug_cb *plug = NULL; | 1007 | struct raid1_plug_cb *plug = NULL; |
@@ -1301,7 +1303,8 @@ read_again: | |||
1301 | conf->mirrors[i].rdev->data_offset); | 1303 | conf->mirrors[i].rdev->data_offset); |
1302 | mbio->bi_bdev = conf->mirrors[i].rdev->bdev; | 1304 | mbio->bi_bdev = conf->mirrors[i].rdev->bdev; |
1303 | mbio->bi_end_io = raid1_end_write_request; | 1305 | mbio->bi_end_io = raid1_end_write_request; |
1304 | mbio->bi_rw = WRITE | do_flush_fua | do_sync | do_discard; | 1306 | mbio->bi_rw = |
1307 | WRITE | do_flush_fua | do_sync | do_discard | do_same; | ||
1305 | mbio->bi_private = r1_bio; | 1308 | mbio->bi_private = r1_bio; |
1306 | 1309 | ||
1307 | atomic_inc(&r1_bio->remaining); | 1310 | atomic_inc(&r1_bio->remaining); |
@@ -2818,6 +2821,9 @@ static int run(struct mddev *mddev) | |||
2818 | if (IS_ERR(conf)) | 2821 | if (IS_ERR(conf)) |
2819 | return PTR_ERR(conf); | 2822 | return PTR_ERR(conf); |
2820 | 2823 | ||
2824 | if (mddev->queue) | ||
2825 | blk_queue_max_write_same_sectors(mddev->queue, | ||
2826 | mddev->chunk_sectors); | ||
2821 | rdev_for_each(rdev, mddev) { | 2827 | rdev_for_each(rdev, mddev) { |
2822 | if (!mddev->gendisk) | 2828 | if (!mddev->gendisk) |
2823 | continue; | 2829 | continue; |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 64d48249c03b..77b562d18a90 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -38,21 +38,36 @@ | |||
38 | * near_copies (stored in low byte of layout) | 38 | * near_copies (stored in low byte of layout) |
39 | * far_copies (stored in second byte of layout) | 39 | * far_copies (stored in second byte of layout) |
40 | * far_offset (stored in bit 16 of layout ) | 40 | * far_offset (stored in bit 16 of layout ) |
41 | * use_far_sets (stored in bit 17 of layout ) | ||
41 | * | 42 | * |
42 | * The data to be stored is divided into chunks using chunksize. | 43 | * The data to be stored is divided into chunks using chunksize. Each device |
43 | * Each device is divided into far_copies sections. | 44 | * is divided into far_copies sections. In each section, chunks are laid out |
44 | * In each section, chunks are laid out in a style similar to raid0, but | 45 | * in a style similar to raid0, but near_copies copies of each chunk is stored |
45 | * near_copies copies of each chunk is stored (each on a different drive). | 46 | * (each on a different drive). The starting device for each section is offset |
46 | * The starting device for each section is offset near_copies from the starting | 47 | * near_copies from the starting device of the previous section. Thus there |
47 | * device of the previous section. | 48 | * are (near_copies * far_copies) of each chunk, and each is on a different |
48 | * Thus they are (near_copies*far_copies) of each chunk, and each is on a different | 49 | * drive. near_copies and far_copies must be at least one, and their product |
49 | * drive. | 50 | * is at most raid_disks. |
50 | * near_copies and far_copies must be at least one, and their product is at most | ||
51 | * raid_disks. | ||
52 | * | 51 | * |
53 | * If far_offset is true, then the far_copies are handled a bit differently. | 52 | * If far_offset is true, then the far_copies are handled a bit differently. |
54 | * The copies are still in different stripes, but instead of be very far apart | 53 | * The copies are still in different stripes, but instead of being very far |
55 | * on disk, there are adjacent stripes. | 54 | * apart on disk, there are adjacent stripes. |
55 | * | ||
56 | * The far and offset algorithms are handled slightly differently if | ||
57 | * 'use_far_sets' is true. In this case, the array's devices are grouped into | ||
58 | * sets that are (near_copies * far_copies) in size. The far copied stripes | ||
59 | * are still shifted by 'near_copies' devices, but this shifting stays confined | ||
60 | * to the set rather than the entire array. This is done to improve the number | ||
61 | * of device combinations that can fail without causing the array to fail. | ||
62 | * Example 'far' algorithm w/o 'use_far_sets' (each letter represents a chunk | ||
63 | * on a device): | ||
64 | * A B C D A B C D E | ||
65 | * ... ... | ||
66 | * D A B C E A B C D | ||
67 | * Example 'far' algorithm w/ 'use_far_sets' enabled (sets illustrated w/ []'s): | ||
68 | * [A B] [C D] [A B] [C D E] | ||
69 | * |...| |...| |...| | ... | | ||
70 | * [B A] [D C] [B A] [E C D] | ||
56 | */ | 71 | */ |
57 | 72 | ||
58 | /* | 73 | /* |
@@ -535,6 +550,13 @@ static void __raid10_find_phys(struct geom *geo, struct r10bio *r10bio) | |||
535 | sector_t stripe; | 550 | sector_t stripe; |
536 | int dev; | 551 | int dev; |
537 | int slot = 0; | 552 | int slot = 0; |
553 | int last_far_set_start, last_far_set_size; | ||
554 | |||
555 | last_far_set_start = (geo->raid_disks / geo->far_set_size) - 1; | ||
556 | last_far_set_start *= geo->far_set_size; | ||
557 | |||
558 | last_far_set_size = geo->far_set_size; | ||
559 | last_far_set_size += (geo->raid_disks % geo->far_set_size); | ||
538 | 560 | ||
539 | /* now calculate first sector/dev */ | 561 | /* now calculate first sector/dev */ |
540 | chunk = r10bio->sector >> geo->chunk_shift; | 562 | chunk = r10bio->sector >> geo->chunk_shift; |
@@ -551,15 +573,25 @@ static void __raid10_find_phys(struct geom *geo, struct r10bio *r10bio) | |||
551 | /* and calculate all the others */ | 573 | /* and calculate all the others */ |
552 | for (n = 0; n < geo->near_copies; n++) { | 574 | for (n = 0; n < geo->near_copies; n++) { |
553 | int d = dev; | 575 | int d = dev; |
576 | int set; | ||
554 | sector_t s = sector; | 577 | sector_t s = sector; |
555 | r10bio->devs[slot].addr = sector; | ||
556 | r10bio->devs[slot].devnum = d; | 578 | r10bio->devs[slot].devnum = d; |
579 | r10bio->devs[slot].addr = s; | ||
557 | slot++; | 580 | slot++; |
558 | 581 | ||
559 | for (f = 1; f < geo->far_copies; f++) { | 582 | for (f = 1; f < geo->far_copies; f++) { |
583 | set = d / geo->far_set_size; | ||
560 | d += geo->near_copies; | 584 | d += geo->near_copies; |
561 | if (d >= geo->raid_disks) | 585 | |
562 | d -= geo->raid_disks; | 586 | if ((geo->raid_disks % geo->far_set_size) && |
587 | (d > last_far_set_start)) { | ||
588 | d -= last_far_set_start; | ||
589 | d %= last_far_set_size; | ||
590 | d += last_far_set_start; | ||
591 | } else { | ||
592 | d %= geo->far_set_size; | ||
593 | d += geo->far_set_size * set; | ||
594 | } | ||
563 | s += geo->stride; | 595 | s += geo->stride; |
564 | r10bio->devs[slot].devnum = d; | 596 | r10bio->devs[slot].devnum = d; |
565 | r10bio->devs[slot].addr = s; | 597 | r10bio->devs[slot].addr = s; |
@@ -595,6 +627,20 @@ static sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev) | |||
595 | * or recovery, so reshape isn't happening | 627 | * or recovery, so reshape isn't happening |
596 | */ | 628 | */ |
597 | struct geom *geo = &conf->geo; | 629 | struct geom *geo = &conf->geo; |
630 | int far_set_start = (dev / geo->far_set_size) * geo->far_set_size; | ||
631 | int far_set_size = geo->far_set_size; | ||
632 | int last_far_set_start; | ||
633 | |||
634 | if (geo->raid_disks % geo->far_set_size) { | ||
635 | last_far_set_start = (geo->raid_disks / geo->far_set_size) - 1; | ||
636 | last_far_set_start *= geo->far_set_size; | ||
637 | |||
638 | if (dev >= last_far_set_start) { | ||
639 | far_set_size = geo->far_set_size; | ||
640 | far_set_size += (geo->raid_disks % geo->far_set_size); | ||
641 | far_set_start = last_far_set_start; | ||
642 | } | ||
643 | } | ||
598 | 644 | ||
599 | offset = sector & geo->chunk_mask; | 645 | offset = sector & geo->chunk_mask; |
600 | if (geo->far_offset) { | 646 | if (geo->far_offset) { |
@@ -602,13 +648,13 @@ static sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev) | |||
602 | chunk = sector >> geo->chunk_shift; | 648 | chunk = sector >> geo->chunk_shift; |
603 | fc = sector_div(chunk, geo->far_copies); | 649 | fc = sector_div(chunk, geo->far_copies); |
604 | dev -= fc * geo->near_copies; | 650 | dev -= fc * geo->near_copies; |
605 | if (dev < 0) | 651 | if (dev < far_set_start) |
606 | dev += geo->raid_disks; | 652 | dev += far_set_size; |
607 | } else { | 653 | } else { |
608 | while (sector >= geo->stride) { | 654 | while (sector >= geo->stride) { |
609 | sector -= geo->stride; | 655 | sector -= geo->stride; |
610 | if (dev < geo->near_copies) | 656 | if (dev < (geo->near_copies + far_set_start)) |
611 | dev += geo->raid_disks - geo->near_copies; | 657 | dev += far_set_size - geo->near_copies; |
612 | else | 658 | else |
613 | dev -= geo->near_copies; | 659 | dev -= geo->near_copies; |
614 | } | 660 | } |
@@ -1073,6 +1119,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) | |||
1073 | bio_list_merge(&conf->pending_bio_list, &plug->pending); | 1119 | bio_list_merge(&conf->pending_bio_list, &plug->pending); |
1074 | conf->pending_count += plug->pending_cnt; | 1120 | conf->pending_count += plug->pending_cnt; |
1075 | spin_unlock_irq(&conf->device_lock); | 1121 | spin_unlock_irq(&conf->device_lock); |
1122 | wake_up(&conf->wait_barrier); | ||
1076 | md_wakeup_thread(mddev->thread); | 1123 | md_wakeup_thread(mddev->thread); |
1077 | kfree(plug); | 1124 | kfree(plug); |
1078 | return; | 1125 | return; |
@@ -1105,6 +1152,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) | |||
1105 | const unsigned long do_fua = (bio->bi_rw & REQ_FUA); | 1152 | const unsigned long do_fua = (bio->bi_rw & REQ_FUA); |
1106 | const unsigned long do_discard = (bio->bi_rw | 1153 | const unsigned long do_discard = (bio->bi_rw |
1107 | & (REQ_DISCARD | REQ_SECURE)); | 1154 | & (REQ_DISCARD | REQ_SECURE)); |
1155 | const unsigned long do_same = (bio->bi_rw & REQ_WRITE_SAME); | ||
1108 | unsigned long flags; | 1156 | unsigned long flags; |
1109 | struct md_rdev *blocked_rdev; | 1157 | struct md_rdev *blocked_rdev; |
1110 | struct blk_plug_cb *cb; | 1158 | struct blk_plug_cb *cb; |
@@ -1460,7 +1508,8 @@ retry_write: | |||
1460 | rdev)); | 1508 | rdev)); |
1461 | mbio->bi_bdev = rdev->bdev; | 1509 | mbio->bi_bdev = rdev->bdev; |
1462 | mbio->bi_end_io = raid10_end_write_request; | 1510 | mbio->bi_end_io = raid10_end_write_request; |
1463 | mbio->bi_rw = WRITE | do_sync | do_fua | do_discard; | 1511 | mbio->bi_rw = |
1512 | WRITE | do_sync | do_fua | do_discard | do_same; | ||
1464 | mbio->bi_private = r10_bio; | 1513 | mbio->bi_private = r10_bio; |
1465 | 1514 | ||
1466 | atomic_inc(&r10_bio->remaining); | 1515 | atomic_inc(&r10_bio->remaining); |
@@ -1502,7 +1551,8 @@ retry_write: | |||
1502 | r10_bio, rdev)); | 1551 | r10_bio, rdev)); |
1503 | mbio->bi_bdev = rdev->bdev; | 1552 | mbio->bi_bdev = rdev->bdev; |
1504 | mbio->bi_end_io = raid10_end_write_request; | 1553 | mbio->bi_end_io = raid10_end_write_request; |
1505 | mbio->bi_rw = WRITE | do_sync | do_fua | do_discard; | 1554 | mbio->bi_rw = |
1555 | WRITE | do_sync | do_fua | do_discard | do_same; | ||
1506 | mbio->bi_private = r10_bio; | 1556 | mbio->bi_private = r10_bio; |
1507 | 1557 | ||
1508 | atomic_inc(&r10_bio->remaining); | 1558 | atomic_inc(&r10_bio->remaining); |
@@ -3436,7 +3486,7 @@ static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new) | |||
3436 | disks = mddev->raid_disks + mddev->delta_disks; | 3486 | disks = mddev->raid_disks + mddev->delta_disks; |
3437 | break; | 3487 | break; |
3438 | } | 3488 | } |
3439 | if (layout >> 17) | 3489 | if (layout >> 18) |
3440 | return -1; | 3490 | return -1; |
3441 | if (chunk < (PAGE_SIZE >> 9) || | 3491 | if (chunk < (PAGE_SIZE >> 9) || |
3442 | !is_power_of_2(chunk)) | 3492 | !is_power_of_2(chunk)) |
@@ -3448,6 +3498,7 @@ static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new) | |||
3448 | geo->near_copies = nc; | 3498 | geo->near_copies = nc; |
3449 | geo->far_copies = fc; | 3499 | geo->far_copies = fc; |
3450 | geo->far_offset = fo; | 3500 | geo->far_offset = fo; |
3501 | geo->far_set_size = (layout & (1<<17)) ? disks / fc : disks; | ||
3451 | geo->chunk_mask = chunk - 1; | 3502 | geo->chunk_mask = chunk - 1; |
3452 | geo->chunk_shift = ffz(~chunk); | 3503 | geo->chunk_shift = ffz(~chunk); |
3453 | return nc*fc; | 3504 | return nc*fc; |
@@ -3569,6 +3620,8 @@ static int run(struct mddev *mddev) | |||
3569 | if (mddev->queue) { | 3620 | if (mddev->queue) { |
3570 | blk_queue_max_discard_sectors(mddev->queue, | 3621 | blk_queue_max_discard_sectors(mddev->queue, |
3571 | mddev->chunk_sectors); | 3622 | mddev->chunk_sectors); |
3623 | blk_queue_max_write_same_sectors(mddev->queue, | ||
3624 | mddev->chunk_sectors); | ||
3572 | blk_queue_io_min(mddev->queue, chunk_size); | 3625 | blk_queue_io_min(mddev->queue, chunk_size); |
3573 | if (conf->geo.raid_disks % conf->geo.near_copies) | 3626 | if (conf->geo.raid_disks % conf->geo.near_copies) |
3574 | blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks); | 3627 | blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks); |
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index 1054cf602345..157d69e83ff4 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h | |||
@@ -33,6 +33,11 @@ struct r10conf { | |||
33 | * far_offset, in which case it is | 33 | * far_offset, in which case it is |
34 | * 1 stripe. | 34 | * 1 stripe. |
35 | */ | 35 | */ |
36 | int far_set_size; /* The number of devices in a set, | ||
37 | * where a 'set' are devices that | ||
38 | * contain far/offset copies of | ||
39 | * each other. | ||
40 | */ | ||
36 | int chunk_shift; /* shift from chunks to sectors */ | 41 | int chunk_shift; /* shift from chunks to sectors */ |
37 | sector_t chunk_mask; | 42 | sector_t chunk_mask; |
38 | } prev, geo; | 43 | } prev, geo; |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 5af2d2709081..24909eb13fec 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -671,9 +671,11 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
671 | bi->bi_next = NULL; | 671 | bi->bi_next = NULL; |
672 | if (rrdev) | 672 | if (rrdev) |
673 | set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags); | 673 | set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags); |
674 | trace_block_bio_remap(bdev_get_queue(bi->bi_bdev), | 674 | |
675 | bi, disk_devt(conf->mddev->gendisk), | 675 | if (conf->mddev->gendisk) |
676 | sh->dev[i].sector); | 676 | trace_block_bio_remap(bdev_get_queue(bi->bi_bdev), |
677 | bi, disk_devt(conf->mddev->gendisk), | ||
678 | sh->dev[i].sector); | ||
677 | generic_make_request(bi); | 679 | generic_make_request(bi); |
678 | } | 680 | } |
679 | if (rrdev) { | 681 | if (rrdev) { |
@@ -701,9 +703,10 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
701 | rbi->bi_io_vec[0].bv_offset = 0; | 703 | rbi->bi_io_vec[0].bv_offset = 0; |
702 | rbi->bi_size = STRIPE_SIZE; | 704 | rbi->bi_size = STRIPE_SIZE; |
703 | rbi->bi_next = NULL; | 705 | rbi->bi_next = NULL; |
704 | trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev), | 706 | if (conf->mddev->gendisk) |
705 | rbi, disk_devt(conf->mddev->gendisk), | 707 | trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev), |
706 | sh->dev[i].sector); | 708 | rbi, disk_devt(conf->mddev->gendisk), |
709 | sh->dev[i].sector); | ||
707 | generic_make_request(rbi); | 710 | generic_make_request(rbi); |
708 | } | 711 | } |
709 | if (!rdev && !rrdev) { | 712 | if (!rdev && !rrdev) { |
@@ -1403,7 +1406,7 @@ static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu | |||
1403 | &sh->ops.zero_sum_result, percpu->spare_page, &submit); | 1406 | &sh->ops.zero_sum_result, percpu->spare_page, &submit); |
1404 | } | 1407 | } |
1405 | 1408 | ||
1406 | static void __raid_run_ops(struct stripe_head *sh, unsigned long ops_request) | 1409 | static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) |
1407 | { | 1410 | { |
1408 | int overlap_clear = 0, i, disks = sh->disks; | 1411 | int overlap_clear = 0, i, disks = sh->disks; |
1409 | struct dma_async_tx_descriptor *tx = NULL; | 1412 | struct dma_async_tx_descriptor *tx = NULL; |
@@ -1468,36 +1471,6 @@ static void __raid_run_ops(struct stripe_head *sh, unsigned long ops_request) | |||
1468 | put_cpu(); | 1471 | put_cpu(); |
1469 | } | 1472 | } |
1470 | 1473 | ||
1471 | #ifdef CONFIG_MULTICORE_RAID456 | ||
1472 | static void async_run_ops(void *param, async_cookie_t cookie) | ||
1473 | { | ||
1474 | struct stripe_head *sh = param; | ||
1475 | unsigned long ops_request = sh->ops.request; | ||
1476 | |||
1477 | clear_bit_unlock(STRIPE_OPS_REQ_PENDING, &sh->state); | ||
1478 | wake_up(&sh->ops.wait_for_ops); | ||
1479 | |||
1480 | __raid_run_ops(sh, ops_request); | ||
1481 | release_stripe(sh); | ||
1482 | } | ||
1483 | |||
1484 | static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) | ||
1485 | { | ||
1486 | /* since handle_stripe can be called outside of raid5d context | ||
1487 | * we need to ensure sh->ops.request is de-staged before another | ||
1488 | * request arrives | ||
1489 | */ | ||
1490 | wait_event(sh->ops.wait_for_ops, | ||
1491 | !test_and_set_bit_lock(STRIPE_OPS_REQ_PENDING, &sh->state)); | ||
1492 | sh->ops.request = ops_request; | ||
1493 | |||
1494 | atomic_inc(&sh->count); | ||
1495 | async_schedule(async_run_ops, sh); | ||
1496 | } | ||
1497 | #else | ||
1498 | #define raid_run_ops __raid_run_ops | ||
1499 | #endif | ||
1500 | |||
1501 | static int grow_one_stripe(struct r5conf *conf) | 1474 | static int grow_one_stripe(struct r5conf *conf) |
1502 | { | 1475 | { |
1503 | struct stripe_head *sh; | 1476 | struct stripe_head *sh; |
@@ -1506,9 +1479,6 @@ static int grow_one_stripe(struct r5conf *conf) | |||
1506 | return 0; | 1479 | return 0; |
1507 | 1480 | ||
1508 | sh->raid_conf = conf; | 1481 | sh->raid_conf = conf; |
1509 | #ifdef CONFIG_MULTICORE_RAID456 | ||
1510 | init_waitqueue_head(&sh->ops.wait_for_ops); | ||
1511 | #endif | ||
1512 | 1482 | ||
1513 | spin_lock_init(&sh->stripe_lock); | 1483 | spin_lock_init(&sh->stripe_lock); |
1514 | 1484 | ||
@@ -1627,9 +1597,6 @@ static int resize_stripes(struct r5conf *conf, int newsize) | |||
1627 | break; | 1597 | break; |
1628 | 1598 | ||
1629 | nsh->raid_conf = conf; | 1599 | nsh->raid_conf = conf; |
1630 | #ifdef CONFIG_MULTICORE_RAID456 | ||
1631 | init_waitqueue_head(&nsh->ops.wait_for_ops); | ||
1632 | #endif | ||
1633 | spin_lock_init(&nsh->stripe_lock); | 1600 | spin_lock_init(&nsh->stripe_lock); |
1634 | 1601 | ||
1635 | list_add(&nsh->lru, &newstripes); | 1602 | list_add(&nsh->lru, &newstripes); |
@@ -2316,17 +2283,6 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s, | |||
2316 | int level = conf->level; | 2283 | int level = conf->level; |
2317 | 2284 | ||
2318 | if (rcw) { | 2285 | if (rcw) { |
2319 | /* if we are not expanding this is a proper write request, and | ||
2320 | * there will be bios with new data to be drained into the | ||
2321 | * stripe cache | ||
2322 | */ | ||
2323 | if (!expand) { | ||
2324 | sh->reconstruct_state = reconstruct_state_drain_run; | ||
2325 | set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); | ||
2326 | } else | ||
2327 | sh->reconstruct_state = reconstruct_state_run; | ||
2328 | |||
2329 | set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); | ||
2330 | 2286 | ||
2331 | for (i = disks; i--; ) { | 2287 | for (i = disks; i--; ) { |
2332 | struct r5dev *dev = &sh->dev[i]; | 2288 | struct r5dev *dev = &sh->dev[i]; |
@@ -2339,6 +2295,21 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s, | |||
2339 | s->locked++; | 2295 | s->locked++; |
2340 | } | 2296 | } |
2341 | } | 2297 | } |
2298 | /* if we are not expanding this is a proper write request, and | ||
2299 | * there will be bios with new data to be drained into the | ||
2300 | * stripe cache | ||
2301 | */ | ||
2302 | if (!expand) { | ||
2303 | if (!s->locked) | ||
2304 | /* False alarm, nothing to do */ | ||
2305 | return; | ||
2306 | sh->reconstruct_state = reconstruct_state_drain_run; | ||
2307 | set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); | ||
2308 | } else | ||
2309 | sh->reconstruct_state = reconstruct_state_run; | ||
2310 | |||
2311 | set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); | ||
2312 | |||
2342 | if (s->locked + conf->max_degraded == disks) | 2313 | if (s->locked + conf->max_degraded == disks) |
2343 | if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) | 2314 | if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) |
2344 | atomic_inc(&conf->pending_full_writes); | 2315 | atomic_inc(&conf->pending_full_writes); |
@@ -2347,11 +2318,6 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s, | |||
2347 | BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || | 2318 | BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || |
2348 | test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); | 2319 | test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); |
2349 | 2320 | ||
2350 | sh->reconstruct_state = reconstruct_state_prexor_drain_run; | ||
2351 | set_bit(STRIPE_OP_PREXOR, &s->ops_request); | ||
2352 | set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); | ||
2353 | set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); | ||
2354 | |||
2355 | for (i = disks; i--; ) { | 2321 | for (i = disks; i--; ) { |
2356 | struct r5dev *dev = &sh->dev[i]; | 2322 | struct r5dev *dev = &sh->dev[i]; |
2357 | if (i == pd_idx) | 2323 | if (i == pd_idx) |
@@ -2366,6 +2332,13 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s, | |||
2366 | s->locked++; | 2332 | s->locked++; |
2367 | } | 2333 | } |
2368 | } | 2334 | } |
2335 | if (!s->locked) | ||
2336 | /* False alarm - nothing to do */ | ||
2337 | return; | ||
2338 | sh->reconstruct_state = reconstruct_state_prexor_drain_run; | ||
2339 | set_bit(STRIPE_OP_PREXOR, &s->ops_request); | ||
2340 | set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); | ||
2341 | set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); | ||
2369 | } | 2342 | } |
2370 | 2343 | ||
2371 | /* keep the parity disk(s) locked while asynchronous operations | 2344 | /* keep the parity disk(s) locked while asynchronous operations |
@@ -2600,6 +2573,8 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh, | |||
2600 | int i; | 2573 | int i; |
2601 | 2574 | ||
2602 | clear_bit(STRIPE_SYNCING, &sh->state); | 2575 | clear_bit(STRIPE_SYNCING, &sh->state); |
2576 | if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags)) | ||
2577 | wake_up(&conf->wait_for_overlap); | ||
2603 | s->syncing = 0; | 2578 | s->syncing = 0; |
2604 | s->replacing = 0; | 2579 | s->replacing = 0; |
2605 | /* There is nothing more to do for sync/check/repair. | 2580 | /* There is nothing more to do for sync/check/repair. |
@@ -2773,6 +2748,7 @@ static void handle_stripe_clean_event(struct r5conf *conf, | |||
2773 | { | 2748 | { |
2774 | int i; | 2749 | int i; |
2775 | struct r5dev *dev; | 2750 | struct r5dev *dev; |
2751 | int discard_pending = 0; | ||
2776 | 2752 | ||
2777 | for (i = disks; i--; ) | 2753 | for (i = disks; i--; ) |
2778 | if (sh->dev[i].written) { | 2754 | if (sh->dev[i].written) { |
@@ -2801,9 +2777,23 @@ static void handle_stripe_clean_event(struct r5conf *conf, | |||
2801 | STRIPE_SECTORS, | 2777 | STRIPE_SECTORS, |
2802 | !test_bit(STRIPE_DEGRADED, &sh->state), | 2778 | !test_bit(STRIPE_DEGRADED, &sh->state), |
2803 | 0); | 2779 | 0); |
2804 | } | 2780 | } else if (test_bit(R5_Discard, &dev->flags)) |
2805 | } else if (test_bit(R5_Discard, &sh->dev[i].flags)) | 2781 | discard_pending = 1; |
2806 | clear_bit(R5_Discard, &sh->dev[i].flags); | 2782 | } |
2783 | if (!discard_pending && | ||
2784 | test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) { | ||
2785 | clear_bit(R5_Discard, &sh->dev[sh->pd_idx].flags); | ||
2786 | clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags); | ||
2787 | if (sh->qd_idx >= 0) { | ||
2788 | clear_bit(R5_Discard, &sh->dev[sh->qd_idx].flags); | ||
2789 | clear_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags); | ||
2790 | } | ||
2791 | /* now that discard is done we can proceed with any sync */ | ||
2792 | clear_bit(STRIPE_DISCARD, &sh->state); | ||
2793 | if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) | ||
2794 | set_bit(STRIPE_HANDLE, &sh->state); | ||
2795 | |||
2796 | } | ||
2807 | 2797 | ||
2808 | if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state)) | 2798 | if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state)) |
2809 | if (atomic_dec_and_test(&conf->pending_full_writes)) | 2799 | if (atomic_dec_and_test(&conf->pending_full_writes)) |
@@ -2862,8 +2852,10 @@ static void handle_stripe_dirtying(struct r5conf *conf, | |||
2862 | set_bit(STRIPE_HANDLE, &sh->state); | 2852 | set_bit(STRIPE_HANDLE, &sh->state); |
2863 | if (rmw < rcw && rmw > 0) { | 2853 | if (rmw < rcw && rmw > 0) { |
2864 | /* prefer read-modify-write, but need to get some data */ | 2854 | /* prefer read-modify-write, but need to get some data */ |
2865 | blk_add_trace_msg(conf->mddev->queue, "raid5 rmw %llu %d", | 2855 | if (conf->mddev->queue) |
2866 | (unsigned long long)sh->sector, rmw); | 2856 | blk_add_trace_msg(conf->mddev->queue, |
2857 | "raid5 rmw %llu %d", | ||
2858 | (unsigned long long)sh->sector, rmw); | ||
2867 | for (i = disks; i--; ) { | 2859 | for (i = disks; i--; ) { |
2868 | struct r5dev *dev = &sh->dev[i]; | 2860 | struct r5dev *dev = &sh->dev[i]; |
2869 | if ((dev->towrite || i == sh->pd_idx) && | 2861 | if ((dev->towrite || i == sh->pd_idx) && |
@@ -2913,7 +2905,7 @@ static void handle_stripe_dirtying(struct r5conf *conf, | |||
2913 | } | 2905 | } |
2914 | } | 2906 | } |
2915 | } | 2907 | } |
2916 | if (rcw) | 2908 | if (rcw && conf->mddev->queue) |
2917 | blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d", | 2909 | blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d", |
2918 | (unsigned long long)sh->sector, | 2910 | (unsigned long long)sh->sector, |
2919 | rcw, qread, test_bit(STRIPE_DELAYED, &sh->state)); | 2911 | rcw, qread, test_bit(STRIPE_DELAYED, &sh->state)); |
@@ -3453,9 +3445,15 @@ static void handle_stripe(struct stripe_head *sh) | |||
3453 | return; | 3445 | return; |
3454 | } | 3446 | } |
3455 | 3447 | ||
3456 | if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { | 3448 | if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { |
3457 | set_bit(STRIPE_SYNCING, &sh->state); | 3449 | spin_lock(&sh->stripe_lock); |
3458 | clear_bit(STRIPE_INSYNC, &sh->state); | 3450 | /* Cannot process 'sync' concurrently with 'discard' */ |
3451 | if (!test_bit(STRIPE_DISCARD, &sh->state) && | ||
3452 | test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { | ||
3453 | set_bit(STRIPE_SYNCING, &sh->state); | ||
3454 | clear_bit(STRIPE_INSYNC, &sh->state); | ||
3455 | } | ||
3456 | spin_unlock(&sh->stripe_lock); | ||
3459 | } | 3457 | } |
3460 | clear_bit(STRIPE_DELAYED, &sh->state); | 3458 | clear_bit(STRIPE_DELAYED, &sh->state); |
3461 | 3459 | ||
@@ -3615,6 +3613,8 @@ static void handle_stripe(struct stripe_head *sh) | |||
3615 | test_bit(STRIPE_INSYNC, &sh->state)) { | 3613 | test_bit(STRIPE_INSYNC, &sh->state)) { |
3616 | md_done_sync(conf->mddev, STRIPE_SECTORS, 1); | 3614 | md_done_sync(conf->mddev, STRIPE_SECTORS, 1); |
3617 | clear_bit(STRIPE_SYNCING, &sh->state); | 3615 | clear_bit(STRIPE_SYNCING, &sh->state); |
3616 | if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags)) | ||
3617 | wake_up(&conf->wait_for_overlap); | ||
3618 | } | 3618 | } |
3619 | 3619 | ||
3620 | /* If the failed drives are just a ReadError, then we might need | 3620 | /* If the failed drives are just a ReadError, then we might need |
@@ -4018,9 +4018,10 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) | |||
4018 | atomic_inc(&conf->active_aligned_reads); | 4018 | atomic_inc(&conf->active_aligned_reads); |
4019 | spin_unlock_irq(&conf->device_lock); | 4019 | spin_unlock_irq(&conf->device_lock); |
4020 | 4020 | ||
4021 | trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev), | 4021 | if (mddev->gendisk) |
4022 | align_bi, disk_devt(mddev->gendisk), | 4022 | trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev), |
4023 | raid_bio->bi_sector); | 4023 | align_bi, disk_devt(mddev->gendisk), |
4024 | raid_bio->bi_sector); | ||
4024 | generic_make_request(align_bi); | 4025 | generic_make_request(align_bi); |
4025 | return 1; | 4026 | return 1; |
4026 | } else { | 4027 | } else { |
@@ -4114,7 +4115,8 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule) | |||
4114 | } | 4115 | } |
4115 | spin_unlock_irq(&conf->device_lock); | 4116 | spin_unlock_irq(&conf->device_lock); |
4116 | } | 4117 | } |
4117 | trace_block_unplug(mddev->queue, cnt, !from_schedule); | 4118 | if (mddev->queue) |
4119 | trace_block_unplug(mddev->queue, cnt, !from_schedule); | ||
4118 | kfree(cb); | 4120 | kfree(cb); |
4119 | } | 4121 | } |
4120 | 4122 | ||
@@ -4177,6 +4179,13 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi) | |||
4177 | sh = get_active_stripe(conf, logical_sector, 0, 0, 0); | 4179 | sh = get_active_stripe(conf, logical_sector, 0, 0, 0); |
4178 | prepare_to_wait(&conf->wait_for_overlap, &w, | 4180 | prepare_to_wait(&conf->wait_for_overlap, &w, |
4179 | TASK_UNINTERRUPTIBLE); | 4181 | TASK_UNINTERRUPTIBLE); |
4182 | set_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags); | ||
4183 | if (test_bit(STRIPE_SYNCING, &sh->state)) { | ||
4184 | release_stripe(sh); | ||
4185 | schedule(); | ||
4186 | goto again; | ||
4187 | } | ||
4188 | clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags); | ||
4180 | spin_lock_irq(&sh->stripe_lock); | 4189 | spin_lock_irq(&sh->stripe_lock); |
4181 | for (d = 0; d < conf->raid_disks; d++) { | 4190 | for (d = 0; d < conf->raid_disks; d++) { |
4182 | if (d == sh->pd_idx || d == sh->qd_idx) | 4191 | if (d == sh->pd_idx || d == sh->qd_idx) |
@@ -4189,6 +4198,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi) | |||
4189 | goto again; | 4198 | goto again; |
4190 | } | 4199 | } |
4191 | } | 4200 | } |
4201 | set_bit(STRIPE_DISCARD, &sh->state); | ||
4192 | finish_wait(&conf->wait_for_overlap, &w); | 4202 | finish_wait(&conf->wait_for_overlap, &w); |
4193 | for (d = 0; d < conf->raid_disks; d++) { | 4203 | for (d = 0; d < conf->raid_disks; d++) { |
4194 | if (d == sh->pd_idx || d == sh->qd_idx) | 4204 | if (d == sh->pd_idx || d == sh->qd_idx) |
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 18b2c4a8a1fd..b0b663b119a8 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
@@ -221,10 +221,6 @@ struct stripe_head { | |||
221 | struct stripe_operations { | 221 | struct stripe_operations { |
222 | int target, target2; | 222 | int target, target2; |
223 | enum sum_check_flags zero_sum_result; | 223 | enum sum_check_flags zero_sum_result; |
224 | #ifdef CONFIG_MULTICORE_RAID456 | ||
225 | unsigned long request; | ||
226 | wait_queue_head_t wait_for_ops; | ||
227 | #endif | ||
228 | } ops; | 224 | } ops; |
229 | struct r5dev { | 225 | struct r5dev { |
230 | /* rreq and rvec are used for the replacement device when | 226 | /* rreq and rvec are used for the replacement device when |
@@ -323,6 +319,7 @@ enum { | |||
323 | STRIPE_COMPUTE_RUN, | 319 | STRIPE_COMPUTE_RUN, |
324 | STRIPE_OPS_REQ_PENDING, | 320 | STRIPE_OPS_REQ_PENDING, |
325 | STRIPE_ON_UNPLUG_LIST, | 321 | STRIPE_ON_UNPLUG_LIST, |
322 | STRIPE_DISCARD, | ||
326 | }; | 323 | }; |
327 | 324 | ||
328 | /* | 325 | /* |