aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-08-17 12:52:15 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-17 12:52:15 -0400
commitb0e5c29426940bd6f137b6a3222fe87766323ae5 (patch)
treef005473e83be269f7a69a43af5c304a3fe4c3e0c
parent2645b9d1a49c2c2cf23895657bdf9a56e07a4da8 (diff)
parent1e1132ea21da6d7be92a72195204379c819cb70b (diff)
Merge tag 'for-4.19/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer: - A couple stable fixes for the DM writecache target. - A stable fix for the DM cache target that fixes the potential for data corruption after an unclean shutdown of a cache device using writeback mode. - Update DM integrity target to allow the metadata to be stored on a separate device from data. - Fix DM kcopyd and the snapshot target to cond_resched() where appropriate and be more efficient with processing completed work. - A few fixes and improvements for DM crypt. - Add DM delay target feature to configure delay of flushes independent of writes. - Update DM thin-provisioning target to include metadata_low_watermark threshold in pool status. - Fix stale DM thin-provisioning Documentation. * tag 'for-4.19/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (26 commits) dm writecache: fix a crash due to reading past end of dirty_bitmap dm crypt: don't decrease device limits dm cache metadata: set dirty on all cache blocks after a crash dm snapshot: remove stale FIXME in snapshot_map() dm snapshot: improve performance by switching out_of_order_list to rbtree dm kcopyd: avoid softlockup in run_complete_job dm cache metadata: save in-core policy_hint_size to on-disk superblock dm thin: stop no_space_timeout worker when switching to write-mode dm kcopyd: return void from dm_kcopyd_copy() dm thin: include metadata_low_watermark threshold in pool status dm writecache: report start_sector in status line dm crypt: convert essiv from ahash to shash dm crypt: use wake_up_process() instead of a wait queue dm integrity: recalculate checksums on creation dm integrity: flush journal on suspend when using separate metadata device dm integrity: use version 2 for separate metadata dm integrity: allow separate metadata device dm integrity: add ic->start in get_data_sector() dm integrity: report provided data sectors in the status dm integrity: implement fair range locks ...
-rw-r--r--Documentation/device-mapper/delay.txt3
-rw-r--r--Documentation/device-mapper/dm-integrity.txt4
-rw-r--r--Documentation/device-mapper/thin-provisioning.txt20
-rw-r--r--drivers/md/dm-cache-metadata.c13
-rw-r--r--drivers/md/dm-cache-target.c35
-rw-r--r--drivers/md/dm-crypt.c66
-rw-r--r--drivers/md/dm-delay.c249
-rw-r--r--drivers/md/dm-integrity.c501
-rw-r--r--drivers/md/dm-kcopyd.c18
-rw-r--r--drivers/md/dm-raid1.c17
-rw-r--r--drivers/md/dm-snap.c41
-rw-r--r--drivers/md/dm-thin.c31
-rw-r--r--drivers/md/dm-writecache.c8
-rw-r--r--drivers/md/dm-zoned-reclaim.c6
-rw-r--r--include/linux/dm-kcopyd.h12
15 files changed, 690 insertions, 334 deletions
diff --git a/Documentation/device-mapper/delay.txt b/Documentation/device-mapper/delay.txt
index 4b1d22a44ce4..6426c45273cb 100644
--- a/Documentation/device-mapper/delay.txt
+++ b/Documentation/device-mapper/delay.txt
@@ -5,7 +5,8 @@ Device-Mapper's "delay" target delays reads and/or writes
5and maps them to different devices. 5and maps them to different devices.
6 6
7Parameters: 7Parameters:
8 <device> <offset> <delay> [<write_device> <write_offset> <write_delay>] 8 <device> <offset> <delay> [<write_device> <write_offset> <write_delay>
9 [<flush_device> <flush_offset> <flush_delay>]]
9 10
10With separate write parameters, the first set is only used for reads. 11With separate write parameters, the first set is only used for reads.
11Offsets are specified in sectors. 12Offsets are specified in sectors.
diff --git a/Documentation/device-mapper/dm-integrity.txt b/Documentation/device-mapper/dm-integrity.txt
index f33e3ade7a09..297251b0d2d5 100644
--- a/Documentation/device-mapper/dm-integrity.txt
+++ b/Documentation/device-mapper/dm-integrity.txt
@@ -113,6 +113,10 @@ internal_hash:algorithm(:key) (the key is optional)
113 from an upper layer target, such as dm-crypt. The upper layer 113 from an upper layer target, such as dm-crypt. The upper layer
114 target should check the validity of the integrity tags. 114 target should check the validity of the integrity tags.
115 115
116recalculate
117 Recalculate the integrity tags automatically. It is only valid
118 when using internal hash.
119
116journal_crypt:algorithm(:key) (the key is optional) 120journal_crypt:algorithm(:key) (the key is optional)
117 Encrypt the journal using given algorithm to make sure that the 121 Encrypt the journal using given algorithm to make sure that the
118 attacker can't read the journal. You can use a block cipher here 122 attacker can't read the journal. You can use a block cipher here
diff --git a/Documentation/device-mapper/thin-provisioning.txt b/Documentation/device-mapper/thin-provisioning.txt
index 3d01948ea061..883e7ca5f745 100644
--- a/Documentation/device-mapper/thin-provisioning.txt
+++ b/Documentation/device-mapper/thin-provisioning.txt
@@ -28,17 +28,18 @@ administrator some freedom, for example to:
28Status 28Status
29====== 29======
30 30
31These targets are very much still in the EXPERIMENTAL state. Please 31These targets are considered safe for production use. But different use
32do not yet rely on them in production. But do experiment and offer us 32cases will have different performance characteristics, for example due
33feedback. Different use cases will have different performance 33to fragmentation of the data volume.
34characteristics, for example due to fragmentation of the data volume.
35 34
36If you find this software is not performing as expected please mail 35If you find this software is not performing as expected please mail
37dm-devel@redhat.com with details and we'll try our best to improve 36dm-devel@redhat.com with details and we'll try our best to improve
38things for you. 37things for you.
39 38
40Userspace tools for checking and repairing the metadata are under 39Userspace tools for checking and repairing the metadata have been fully
41development. 40developed and are available as 'thin_check' and 'thin_repair'. The name
41of the package that provides these utilities varies by distribution (on
42a Red Hat distribution it is named 'device-mapper-persistent-data').
42 43
43Cookbook 44Cookbook
44======== 45========
@@ -280,7 +281,7 @@ ii) Status
280 <transaction id> <used metadata blocks>/<total metadata blocks> 281 <transaction id> <used metadata blocks>/<total metadata blocks>
281 <used data blocks>/<total data blocks> <held metadata root> 282 <used data blocks>/<total data blocks> <held metadata root>
282 ro|rw|out_of_data_space [no_]discard_passdown [error|queue]_if_no_space 283 ro|rw|out_of_data_space [no_]discard_passdown [error|queue]_if_no_space
283 needs_check|- 284 needs_check|- metadata_low_watermark
284 285
285 transaction id: 286 transaction id:
286 A 64-bit number used by userspace to help synchronise with metadata 287 A 64-bit number used by userspace to help synchronise with metadata
@@ -327,6 +328,11 @@ ii) Status
327 thin-pool can be made fully operational again. '-' indicates 328 thin-pool can be made fully operational again. '-' indicates
328 needs_check is not set. 329 needs_check is not set.
329 330
331 metadata_low_watermark:
332 Value of metadata low watermark in blocks. The kernel sets this
333 value internally but userspace needs to know this value to
334 determine if an event was caused by crossing this threshold.
335
330iii) Messages 336iii) Messages
331 337
332 create_thin <dev id> 338 create_thin <dev id>
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index 0d7212410e21..69dddeab124c 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -363,7 +363,7 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd)
363 disk_super->version = cpu_to_le32(cmd->version); 363 disk_super->version = cpu_to_le32(cmd->version);
364 memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name)); 364 memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name));
365 memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version)); 365 memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version));
366 disk_super->policy_hint_size = 0; 366 disk_super->policy_hint_size = cpu_to_le32(0);
367 367
368 __copy_sm_root(cmd, disk_super); 368 __copy_sm_root(cmd, disk_super);
369 369
@@ -701,6 +701,7 @@ static int __commit_transaction(struct dm_cache_metadata *cmd,
701 disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]); 701 disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]);
702 disk_super->policy_version[1] = cpu_to_le32(cmd->policy_version[1]); 702 disk_super->policy_version[1] = cpu_to_le32(cmd->policy_version[1]);
703 disk_super->policy_version[2] = cpu_to_le32(cmd->policy_version[2]); 703 disk_super->policy_version[2] = cpu_to_le32(cmd->policy_version[2]);
704 disk_super->policy_hint_size = cpu_to_le32(cmd->policy_hint_size);
704 705
705 disk_super->read_hits = cpu_to_le32(cmd->stats.read_hits); 706 disk_super->read_hits = cpu_to_le32(cmd->stats.read_hits);
706 disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses); 707 disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses);
@@ -1322,6 +1323,7 @@ static int __load_mapping_v1(struct dm_cache_metadata *cmd,
1322 1323
1323 dm_oblock_t oblock; 1324 dm_oblock_t oblock;
1324 unsigned flags; 1325 unsigned flags;
1326 bool dirty = true;
1325 1327
1326 dm_array_cursor_get_value(mapping_cursor, (void **) &mapping_value_le); 1328 dm_array_cursor_get_value(mapping_cursor, (void **) &mapping_value_le);
1327 memcpy(&mapping, mapping_value_le, sizeof(mapping)); 1329 memcpy(&mapping, mapping_value_le, sizeof(mapping));
@@ -1332,8 +1334,10 @@ static int __load_mapping_v1(struct dm_cache_metadata *cmd,
1332 dm_array_cursor_get_value(hint_cursor, (void **) &hint_value_le); 1334 dm_array_cursor_get_value(hint_cursor, (void **) &hint_value_le);
1333 memcpy(&hint, hint_value_le, sizeof(hint)); 1335 memcpy(&hint, hint_value_le, sizeof(hint));
1334 } 1336 }
1337 if (cmd->clean_when_opened)
1338 dirty = flags & M_DIRTY;
1335 1339
1336 r = fn(context, oblock, to_cblock(cb), flags & M_DIRTY, 1340 r = fn(context, oblock, to_cblock(cb), dirty,
1337 le32_to_cpu(hint), hints_valid); 1341 le32_to_cpu(hint), hints_valid);
1338 if (r) { 1342 if (r) {
1339 DMERR("policy couldn't load cache block %llu", 1343 DMERR("policy couldn't load cache block %llu",
@@ -1361,7 +1365,7 @@ static int __load_mapping_v2(struct dm_cache_metadata *cmd,
1361 1365
1362 dm_oblock_t oblock; 1366 dm_oblock_t oblock;
1363 unsigned flags; 1367 unsigned flags;
1364 bool dirty; 1368 bool dirty = true;
1365 1369
1366 dm_array_cursor_get_value(mapping_cursor, (void **) &mapping_value_le); 1370 dm_array_cursor_get_value(mapping_cursor, (void **) &mapping_value_le);
1367 memcpy(&mapping, mapping_value_le, sizeof(mapping)); 1371 memcpy(&mapping, mapping_value_le, sizeof(mapping));
@@ -1372,8 +1376,9 @@ static int __load_mapping_v2(struct dm_cache_metadata *cmd,
1372 dm_array_cursor_get_value(hint_cursor, (void **) &hint_value_le); 1376 dm_array_cursor_get_value(hint_cursor, (void **) &hint_value_le);
1373 memcpy(&hint, hint_value_le, sizeof(hint)); 1377 memcpy(&hint, hint_value_le, sizeof(hint));
1374 } 1378 }
1379 if (cmd->clean_when_opened)
1380 dirty = dm_bitset_cursor_get_value(dirty_cursor);
1375 1381
1376 dirty = dm_bitset_cursor_get_value(dirty_cursor);
1377 r = fn(context, oblock, to_cblock(cb), dirty, 1382 r = fn(context, oblock, to_cblock(cb), dirty,
1378 le32_to_cpu(hint), hints_valid); 1383 le32_to_cpu(hint), hints_valid);
1379 if (r) { 1384 if (r) {
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index ce14a3d1f609..a53413371725 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -1188,9 +1188,8 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
1188 queue_continuation(mg->cache->wq, &mg->k); 1188 queue_continuation(mg->cache->wq, &mg->k);
1189} 1189}
1190 1190
1191static int copy(struct dm_cache_migration *mg, bool promote) 1191static void copy(struct dm_cache_migration *mg, bool promote)
1192{ 1192{
1193 int r;
1194 struct dm_io_region o_region, c_region; 1193 struct dm_io_region o_region, c_region;
1195 struct cache *cache = mg->cache; 1194 struct cache *cache = mg->cache;
1196 1195
@@ -1203,11 +1202,9 @@ static int copy(struct dm_cache_migration *mg, bool promote)
1203 c_region.count = cache->sectors_per_block; 1202 c_region.count = cache->sectors_per_block;
1204 1203
1205 if (promote) 1204 if (promote)
1206 r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, &mg->k); 1205 dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, &mg->k);
1207 else 1206 else
1208 r = dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, &mg->k); 1207 dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, &mg->k);
1209
1210 return r;
1211} 1208}
1212 1209
1213static void bio_drop_shared_lock(struct cache *cache, struct bio *bio) 1210static void bio_drop_shared_lock(struct cache *cache, struct bio *bio)
@@ -1449,12 +1446,7 @@ static void mg_full_copy(struct work_struct *ws)
1449 } 1446 }
1450 1447
1451 init_continuation(&mg->k, mg_upgrade_lock); 1448 init_continuation(&mg->k, mg_upgrade_lock);
1452 1449 copy(mg, is_policy_promote);
1453 if (copy(mg, is_policy_promote)) {
1454 DMERR_LIMIT("%s: migration copy failed", cache_device_name(cache));
1455 mg->k.input = BLK_STS_IOERR;
1456 mg_complete(mg, false);
1457 }
1458} 1450}
1459 1451
1460static void mg_copy(struct work_struct *ws) 1452static void mg_copy(struct work_struct *ws)
@@ -2250,7 +2242,7 @@ static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
2250 {0, 2, "Invalid number of cache feature arguments"}, 2242 {0, 2, "Invalid number of cache feature arguments"},
2251 }; 2243 };
2252 2244
2253 int r; 2245 int r, mode_ctr = 0;
2254 unsigned argc; 2246 unsigned argc;
2255 const char *arg; 2247 const char *arg;
2256 struct cache_features *cf = &ca->features; 2248 struct cache_features *cf = &ca->features;
@@ -2264,14 +2256,20 @@ static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
2264 while (argc--) { 2256 while (argc--) {
2265 arg = dm_shift_arg(as); 2257 arg = dm_shift_arg(as);
2266 2258
2267 if (!strcasecmp(arg, "writeback")) 2259 if (!strcasecmp(arg, "writeback")) {
2268 cf->io_mode = CM_IO_WRITEBACK; 2260 cf->io_mode = CM_IO_WRITEBACK;
2261 mode_ctr++;
2262 }
2269 2263
2270 else if (!strcasecmp(arg, "writethrough")) 2264 else if (!strcasecmp(arg, "writethrough")) {
2271 cf->io_mode = CM_IO_WRITETHROUGH; 2265 cf->io_mode = CM_IO_WRITETHROUGH;
2266 mode_ctr++;
2267 }
2272 2268
2273 else if (!strcasecmp(arg, "passthrough")) 2269 else if (!strcasecmp(arg, "passthrough")) {
2274 cf->io_mode = CM_IO_PASSTHROUGH; 2270 cf->io_mode = CM_IO_PASSTHROUGH;
2271 mode_ctr++;
2272 }
2275 2273
2276 else if (!strcasecmp(arg, "metadata2")) 2274 else if (!strcasecmp(arg, "metadata2"))
2277 cf->metadata_version = 2; 2275 cf->metadata_version = 2;
@@ -2282,6 +2280,11 @@ static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
2282 } 2280 }
2283 } 2281 }
2284 2282
2283 if (mode_ctr > 1) {
2284 *error = "Duplicate cache io_mode features requested";
2285 return -EINVAL;
2286 }
2287
2285 return 0; 2288 return 0;
2286} 2289}
2287 2290
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index b61b069c33af..f266c81f396f 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -99,7 +99,7 @@ struct crypt_iv_operations {
99}; 99};
100 100
101struct iv_essiv_private { 101struct iv_essiv_private {
102 struct crypto_ahash *hash_tfm; 102 struct crypto_shash *hash_tfm;
103 u8 *salt; 103 u8 *salt;
104}; 104};
105 105
@@ -144,7 +144,7 @@ struct crypt_config {
144 struct workqueue_struct *io_queue; 144 struct workqueue_struct *io_queue;
145 struct workqueue_struct *crypt_queue; 145 struct workqueue_struct *crypt_queue;
146 146
147 wait_queue_head_t write_thread_wait; 147 spinlock_t write_thread_lock;
148 struct task_struct *write_thread; 148 struct task_struct *write_thread;
149 struct rb_root write_tree; 149 struct rb_root write_tree;
150 150
@@ -327,25 +327,22 @@ static int crypt_iv_plain64be_gen(struct crypt_config *cc, u8 *iv,
327static int crypt_iv_essiv_init(struct crypt_config *cc) 327static int crypt_iv_essiv_init(struct crypt_config *cc)
328{ 328{
329 struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; 329 struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv;
330 AHASH_REQUEST_ON_STACK(req, essiv->hash_tfm); 330 SHASH_DESC_ON_STACK(desc, essiv->hash_tfm);
331 struct scatterlist sg;
332 struct crypto_cipher *essiv_tfm; 331 struct crypto_cipher *essiv_tfm;
333 int err; 332 int err;
334 333
335 sg_init_one(&sg, cc->key, cc->key_size); 334 desc->tfm = essiv->hash_tfm;
336 ahash_request_set_tfm(req, essiv->hash_tfm); 335 desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
337 ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
338 ahash_request_set_crypt(req, &sg, essiv->salt, cc->key_size);
339 336
340 err = crypto_ahash_digest(req); 337 err = crypto_shash_digest(desc, cc->key, cc->key_size, essiv->salt);
341 ahash_request_zero(req); 338 shash_desc_zero(desc);
342 if (err) 339 if (err)
343 return err; 340 return err;
344 341
345 essiv_tfm = cc->iv_private; 342 essiv_tfm = cc->iv_private;
346 343
347 err = crypto_cipher_setkey(essiv_tfm, essiv->salt, 344 err = crypto_cipher_setkey(essiv_tfm, essiv->salt,
348 crypto_ahash_digestsize(essiv->hash_tfm)); 345 crypto_shash_digestsize(essiv->hash_tfm));
349 if (err) 346 if (err)
350 return err; 347 return err;
351 348
@@ -356,7 +353,7 @@ static int crypt_iv_essiv_init(struct crypt_config *cc)
356static int crypt_iv_essiv_wipe(struct crypt_config *cc) 353static int crypt_iv_essiv_wipe(struct crypt_config *cc)
357{ 354{
358 struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; 355 struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv;
359 unsigned salt_size = crypto_ahash_digestsize(essiv->hash_tfm); 356 unsigned salt_size = crypto_shash_digestsize(essiv->hash_tfm);
360 struct crypto_cipher *essiv_tfm; 357 struct crypto_cipher *essiv_tfm;
361 int r, err = 0; 358 int r, err = 0;
362 359
@@ -408,7 +405,7 @@ static void crypt_iv_essiv_dtr(struct crypt_config *cc)
408 struct crypto_cipher *essiv_tfm; 405 struct crypto_cipher *essiv_tfm;
409 struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; 406 struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv;
410 407
411 crypto_free_ahash(essiv->hash_tfm); 408 crypto_free_shash(essiv->hash_tfm);
412 essiv->hash_tfm = NULL; 409 essiv->hash_tfm = NULL;
413 410
414 kzfree(essiv->salt); 411 kzfree(essiv->salt);
@@ -426,7 +423,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
426 const char *opts) 423 const char *opts)
427{ 424{
428 struct crypto_cipher *essiv_tfm = NULL; 425 struct crypto_cipher *essiv_tfm = NULL;
429 struct crypto_ahash *hash_tfm = NULL; 426 struct crypto_shash *hash_tfm = NULL;
430 u8 *salt = NULL; 427 u8 *salt = NULL;
431 int err; 428 int err;
432 429
@@ -436,14 +433,14 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
436 } 433 }
437 434
438 /* Allocate hash algorithm */ 435 /* Allocate hash algorithm */
439 hash_tfm = crypto_alloc_ahash(opts, 0, CRYPTO_ALG_ASYNC); 436 hash_tfm = crypto_alloc_shash(opts, 0, 0);
440 if (IS_ERR(hash_tfm)) { 437 if (IS_ERR(hash_tfm)) {
441 ti->error = "Error initializing ESSIV hash"; 438 ti->error = "Error initializing ESSIV hash";
442 err = PTR_ERR(hash_tfm); 439 err = PTR_ERR(hash_tfm);
443 goto bad; 440 goto bad;
444 } 441 }
445 442
446 salt = kzalloc(crypto_ahash_digestsize(hash_tfm), GFP_KERNEL); 443 salt = kzalloc(crypto_shash_digestsize(hash_tfm), GFP_KERNEL);
447 if (!salt) { 444 if (!salt) {
448 ti->error = "Error kmallocing salt storage in ESSIV"; 445 ti->error = "Error kmallocing salt storage in ESSIV";
449 err = -ENOMEM; 446 err = -ENOMEM;
@@ -454,7 +451,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
454 cc->iv_gen_private.essiv.hash_tfm = hash_tfm; 451 cc->iv_gen_private.essiv.hash_tfm = hash_tfm;
455 452
456 essiv_tfm = alloc_essiv_cipher(cc, ti, salt, 453 essiv_tfm = alloc_essiv_cipher(cc, ti, salt,
457 crypto_ahash_digestsize(hash_tfm)); 454 crypto_shash_digestsize(hash_tfm));
458 if (IS_ERR(essiv_tfm)) { 455 if (IS_ERR(essiv_tfm)) {
459 crypt_iv_essiv_dtr(cc); 456 crypt_iv_essiv_dtr(cc);
460 return PTR_ERR(essiv_tfm); 457 return PTR_ERR(essiv_tfm);
@@ -465,7 +462,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
465 462
466bad: 463bad:
467 if (hash_tfm && !IS_ERR(hash_tfm)) 464 if (hash_tfm && !IS_ERR(hash_tfm))
468 crypto_free_ahash(hash_tfm); 465 crypto_free_shash(hash_tfm);
469 kfree(salt); 466 kfree(salt);
470 return err; 467 return err;
471} 468}
@@ -1620,36 +1617,31 @@ static int dmcrypt_write(void *data)
1620 struct rb_root write_tree; 1617 struct rb_root write_tree;
1621 struct blk_plug plug; 1618 struct blk_plug plug;
1622 1619
1623 DECLARE_WAITQUEUE(wait, current); 1620 spin_lock_irq(&cc->write_thread_lock);
1624
1625 spin_lock_irq(&cc->write_thread_wait.lock);
1626continue_locked: 1621continue_locked:
1627 1622
1628 if (!RB_EMPTY_ROOT(&cc->write_tree)) 1623 if (!RB_EMPTY_ROOT(&cc->write_tree))
1629 goto pop_from_list; 1624 goto pop_from_list;
1630 1625
1631 set_current_state(TASK_INTERRUPTIBLE); 1626 set_current_state(TASK_INTERRUPTIBLE);
1632 __add_wait_queue(&cc->write_thread_wait, &wait);
1633 1627
1634 spin_unlock_irq(&cc->write_thread_wait.lock); 1628 spin_unlock_irq(&cc->write_thread_lock);
1635 1629
1636 if (unlikely(kthread_should_stop())) { 1630 if (unlikely(kthread_should_stop())) {
1637 set_current_state(TASK_RUNNING); 1631 set_current_state(TASK_RUNNING);
1638 remove_wait_queue(&cc->write_thread_wait, &wait);
1639 break; 1632 break;
1640 } 1633 }
1641 1634
1642 schedule(); 1635 schedule();
1643 1636
1644 set_current_state(TASK_RUNNING); 1637 set_current_state(TASK_RUNNING);
1645 spin_lock_irq(&cc->write_thread_wait.lock); 1638 spin_lock_irq(&cc->write_thread_lock);
1646 __remove_wait_queue(&cc->write_thread_wait, &wait);
1647 goto continue_locked; 1639 goto continue_locked;
1648 1640
1649pop_from_list: 1641pop_from_list:
1650 write_tree = cc->write_tree; 1642 write_tree = cc->write_tree;
1651 cc->write_tree = RB_ROOT; 1643 cc->write_tree = RB_ROOT;
1652 spin_unlock_irq(&cc->write_thread_wait.lock); 1644 spin_unlock_irq(&cc->write_thread_lock);
1653 1645
1654 BUG_ON(rb_parent(write_tree.rb_node)); 1646 BUG_ON(rb_parent(write_tree.rb_node));
1655 1647
@@ -1693,7 +1685,9 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
1693 return; 1685 return;
1694 } 1686 }
1695 1687
1696 spin_lock_irqsave(&cc->write_thread_wait.lock, flags); 1688 spin_lock_irqsave(&cc->write_thread_lock, flags);
1689 if (RB_EMPTY_ROOT(&cc->write_tree))
1690 wake_up_process(cc->write_thread);
1697 rbp = &cc->write_tree.rb_node; 1691 rbp = &cc->write_tree.rb_node;
1698 parent = NULL; 1692 parent = NULL;
1699 sector = io->sector; 1693 sector = io->sector;
@@ -1706,9 +1700,7 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
1706 } 1700 }
1707 rb_link_node(&io->rb_node, parent, rbp); 1701 rb_link_node(&io->rb_node, parent, rbp);
1708 rb_insert_color(&io->rb_node, &cc->write_tree); 1702 rb_insert_color(&io->rb_node, &cc->write_tree);
1709 1703 spin_unlock_irqrestore(&cc->write_thread_lock, flags);
1710 wake_up_locked(&cc->write_thread_wait);
1711 spin_unlock_irqrestore(&cc->write_thread_wait.lock, flags);
1712} 1704}
1713 1705
1714static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) 1706static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
@@ -2831,7 +2823,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
2831 goto bad; 2823 goto bad;
2832 } 2824 }
2833 2825
2834 init_waitqueue_head(&cc->write_thread_wait); 2826 spin_lock_init(&cc->write_thread_lock);
2835 cc->write_tree = RB_ROOT; 2827 cc->write_tree = RB_ROOT;
2836 2828
2837 cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write"); 2829 cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write");
@@ -3069,11 +3061,11 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
3069 */ 3061 */
3070 limits->max_segment_size = PAGE_SIZE; 3062 limits->max_segment_size = PAGE_SIZE;
3071 3063
3072 if (cc->sector_size != (1 << SECTOR_SHIFT)) { 3064 limits->logical_block_size =
3073 limits->logical_block_size = cc->sector_size; 3065 max_t(unsigned short, limits->logical_block_size, cc->sector_size);
3074 limits->physical_block_size = cc->sector_size; 3066 limits->physical_block_size =
3075 blk_limits_io_min(limits, cc->sector_size); 3067 max_t(unsigned, limits->physical_block_size, cc->sector_size);
3076 } 3068 limits->io_min = max_t(unsigned, limits->io_min, cc->sector_size);
3077} 3069}
3078 3070
3079static struct target_type crypt_target = { 3071static struct target_type crypt_target = {
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 1783d80c9cad..2fb7bb4304ad 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -17,6 +17,13 @@
17 17
18#define DM_MSG_PREFIX "delay" 18#define DM_MSG_PREFIX "delay"
19 19
20struct delay_class {
21 struct dm_dev *dev;
22 sector_t start;
23 unsigned delay;
24 unsigned ops;
25};
26
20struct delay_c { 27struct delay_c {
21 struct timer_list delay_timer; 28 struct timer_list delay_timer;
22 struct mutex timer_lock; 29 struct mutex timer_lock;
@@ -25,19 +32,16 @@ struct delay_c {
25 struct list_head delayed_bios; 32 struct list_head delayed_bios;
26 atomic_t may_delay; 33 atomic_t may_delay;
27 34
28 struct dm_dev *dev_read; 35 struct delay_class read;
29 sector_t start_read; 36 struct delay_class write;
30 unsigned read_delay; 37 struct delay_class flush;
31 unsigned reads;
32 38
33 struct dm_dev *dev_write; 39 int argc;
34 sector_t start_write;
35 unsigned write_delay;
36 unsigned writes;
37}; 40};
38 41
39struct dm_delay_info { 42struct dm_delay_info {
40 struct delay_c *context; 43 struct delay_c *context;
44 struct delay_class *class;
41 struct list_head list; 45 struct list_head list;
42 unsigned long expires; 46 unsigned long expires;
43}; 47};
@@ -77,7 +81,7 @@ static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all)
77{ 81{
78 struct dm_delay_info *delayed, *next; 82 struct dm_delay_info *delayed, *next;
79 unsigned long next_expires = 0; 83 unsigned long next_expires = 0;
80 int start_timer = 0; 84 unsigned long start_timer = 0;
81 struct bio_list flush_bios = { }; 85 struct bio_list flush_bios = { };
82 86
83 mutex_lock(&delayed_bios_lock); 87 mutex_lock(&delayed_bios_lock);
@@ -87,10 +91,7 @@ static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all)
87 sizeof(struct dm_delay_info)); 91 sizeof(struct dm_delay_info));
88 list_del(&delayed->list); 92 list_del(&delayed->list);
89 bio_list_add(&flush_bios, bio); 93 bio_list_add(&flush_bios, bio);
90 if ((bio_data_dir(bio) == WRITE)) 94 delayed->class->ops--;
91 delayed->context->writes--;
92 else
93 delayed->context->reads--;
94 continue; 95 continue;
95 } 96 }
96 97
@@ -100,7 +101,6 @@ static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all)
100 } else 101 } else
101 next_expires = min(next_expires, delayed->expires); 102 next_expires = min(next_expires, delayed->expires);
102 } 103 }
103
104 mutex_unlock(&delayed_bios_lock); 104 mutex_unlock(&delayed_bios_lock);
105 105
106 if (start_timer) 106 if (start_timer)
@@ -117,6 +117,50 @@ static void flush_expired_bios(struct work_struct *work)
117 flush_bios(flush_delayed_bios(dc, 0)); 117 flush_bios(flush_delayed_bios(dc, 0));
118} 118}
119 119
120static void delay_dtr(struct dm_target *ti)
121{
122 struct delay_c *dc = ti->private;
123
124 destroy_workqueue(dc->kdelayd_wq);
125
126 if (dc->read.dev)
127 dm_put_device(ti, dc->read.dev);
128 if (dc->write.dev)
129 dm_put_device(ti, dc->write.dev);
130 if (dc->flush.dev)
131 dm_put_device(ti, dc->flush.dev);
132
133 mutex_destroy(&dc->timer_lock);
134
135 kfree(dc);
136}
137
138static int delay_class_ctr(struct dm_target *ti, struct delay_class *c, char **argv)
139{
140 int ret;
141 unsigned long long tmpll;
142 char dummy;
143
144 if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1) {
145 ti->error = "Invalid device sector";
146 return -EINVAL;
147 }
148 c->start = tmpll;
149
150 if (sscanf(argv[2], "%u%c", &c->delay, &dummy) != 1) {
151 ti->error = "Invalid delay";
152 return -EINVAL;
153 }
154
155 ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &c->dev);
156 if (ret) {
157 ti->error = "Device lookup failed";
158 return ret;
159 }
160
161 return 0;
162}
163
120/* 164/*
121 * Mapping parameters: 165 * Mapping parameters:
122 * <device> <offset> <delay> [<write_device> <write_offset> <write_delay>] 166 * <device> <offset> <delay> [<write_device> <write_offset> <write_delay>]
@@ -128,134 +172,89 @@ static void flush_expired_bios(struct work_struct *work)
128static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) 172static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
129{ 173{
130 struct delay_c *dc; 174 struct delay_c *dc;
131 unsigned long long tmpll;
132 char dummy;
133 int ret; 175 int ret;
134 176
135 if (argc != 3 && argc != 6) { 177 if (argc != 3 && argc != 6 && argc != 9) {
136 ti->error = "Requires exactly 3 or 6 arguments"; 178 ti->error = "Requires exactly 3, 6 or 9 arguments";
137 return -EINVAL; 179 return -EINVAL;
138 } 180 }
139 181
140 dc = kmalloc(sizeof(*dc), GFP_KERNEL); 182 dc = kzalloc(sizeof(*dc), GFP_KERNEL);
141 if (!dc) { 183 if (!dc) {
142 ti->error = "Cannot allocate context"; 184 ti->error = "Cannot allocate context";
143 return -ENOMEM; 185 return -ENOMEM;
144 } 186 }
145 187
146 dc->reads = dc->writes = 0; 188 ti->private = dc;
189 timer_setup(&dc->delay_timer, handle_delayed_timer, 0);
190 INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
191 INIT_LIST_HEAD(&dc->delayed_bios);
192 mutex_init(&dc->timer_lock);
193 atomic_set(&dc->may_delay, 1);
194 dc->argc = argc;
147 195
148 ret = -EINVAL; 196 ret = delay_class_ctr(ti, &dc->read, argv);
149 if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1) { 197 if (ret)
150 ti->error = "Invalid device sector";
151 goto bad; 198 goto bad;
152 }
153 dc->start_read = tmpll;
154 199
155 if (sscanf(argv[2], "%u%c", &dc->read_delay, &dummy) != 1) { 200 if (argc == 3) {
156 ti->error = "Invalid delay"; 201 ret = delay_class_ctr(ti, &dc->write, argv);
157 goto bad; 202 if (ret)
203 goto bad;
204 ret = delay_class_ctr(ti, &dc->flush, argv);
205 if (ret)
206 goto bad;
207 goto out;
158 } 208 }
159 209
160 ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), 210 ret = delay_class_ctr(ti, &dc->write, argv + 3);
161 &dc->dev_read); 211 if (ret)
162 if (ret) {
163 ti->error = "Device lookup failed";
164 goto bad; 212 goto bad;
165 } 213 if (argc == 6) {
166 214 ret = delay_class_ctr(ti, &dc->flush, argv + 3);
167 ret = -EINVAL; 215 if (ret)
168 dc->dev_write = NULL; 216 goto bad;
169 if (argc == 3)
170 goto out; 217 goto out;
171
172 if (sscanf(argv[4], "%llu%c", &tmpll, &dummy) != 1) {
173 ti->error = "Invalid write device sector";
174 goto bad_dev_read;
175 } 218 }
176 dc->start_write = tmpll;
177 219
178 if (sscanf(argv[5], "%u%c", &dc->write_delay, &dummy) != 1) { 220 ret = delay_class_ctr(ti, &dc->flush, argv + 6);
179 ti->error = "Invalid write delay"; 221 if (ret)
180 goto bad_dev_read; 222 goto bad;
181 }
182
183 ret = dm_get_device(ti, argv[3], dm_table_get_mode(ti->table),
184 &dc->dev_write);
185 if (ret) {
186 ti->error = "Write device lookup failed";
187 goto bad_dev_read;
188 }
189 223
190out: 224out:
191 ret = -EINVAL;
192 dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0); 225 dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
193 if (!dc->kdelayd_wq) { 226 if (!dc->kdelayd_wq) {
227 ret = -EINVAL;
194 DMERR("Couldn't start kdelayd"); 228 DMERR("Couldn't start kdelayd");
195 goto bad_queue; 229 goto bad;
196 } 230 }
197 231
198 timer_setup(&dc->delay_timer, handle_delayed_timer, 0);
199
200 INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
201 INIT_LIST_HEAD(&dc->delayed_bios);
202 mutex_init(&dc->timer_lock);
203 atomic_set(&dc->may_delay, 1);
204
205 ti->num_flush_bios = 1; 232 ti->num_flush_bios = 1;
206 ti->num_discard_bios = 1; 233 ti->num_discard_bios = 1;
207 ti->per_io_data_size = sizeof(struct dm_delay_info); 234 ti->per_io_data_size = sizeof(struct dm_delay_info);
208 ti->private = dc;
209 return 0; 235 return 0;
210 236
211bad_queue:
212 if (dc->dev_write)
213 dm_put_device(ti, dc->dev_write);
214bad_dev_read:
215 dm_put_device(ti, dc->dev_read);
216bad: 237bad:
217 kfree(dc); 238 delay_dtr(ti);
218 return ret; 239 return ret;
219} 240}
220 241
221static void delay_dtr(struct dm_target *ti) 242static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
222{
223 struct delay_c *dc = ti->private;
224
225 destroy_workqueue(dc->kdelayd_wq);
226
227 dm_put_device(ti, dc->dev_read);
228
229 if (dc->dev_write)
230 dm_put_device(ti, dc->dev_write);
231
232 mutex_destroy(&dc->timer_lock);
233
234 kfree(dc);
235}
236
237static int delay_bio(struct delay_c *dc, int delay, struct bio *bio)
238{ 243{
239 struct dm_delay_info *delayed; 244 struct dm_delay_info *delayed;
240 unsigned long expires = 0; 245 unsigned long expires = 0;
241 246
242 if (!delay || !atomic_read(&dc->may_delay)) 247 if (!c->delay || !atomic_read(&dc->may_delay))
243 return DM_MAPIO_REMAPPED; 248 return DM_MAPIO_REMAPPED;
244 249
245 delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info)); 250 delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info));
246 251
247 delayed->context = dc; 252 delayed->context = dc;
248 delayed->expires = expires = jiffies + msecs_to_jiffies(delay); 253 delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay);
249 254
250 mutex_lock(&delayed_bios_lock); 255 mutex_lock(&delayed_bios_lock);
251 256 c->ops++;
252 if (bio_data_dir(bio) == WRITE)
253 dc->writes++;
254 else
255 dc->reads++;
256
257 list_add_tail(&delayed->list, &dc->delayed_bios); 257 list_add_tail(&delayed->list, &dc->delayed_bios);
258
259 mutex_unlock(&delayed_bios_lock); 258 mutex_unlock(&delayed_bios_lock);
260 259
261 queue_timeout(dc, expires); 260 queue_timeout(dc, expires);
@@ -282,23 +281,28 @@ static void delay_resume(struct dm_target *ti)
282static int delay_map(struct dm_target *ti, struct bio *bio) 281static int delay_map(struct dm_target *ti, struct bio *bio)
283{ 282{
284 struct delay_c *dc = ti->private; 283 struct delay_c *dc = ti->private;
285 284 struct delay_class *c;
286 if ((bio_data_dir(bio) == WRITE) && (dc->dev_write)) { 285 struct dm_delay_info *delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info));
287 bio_set_dev(bio, dc->dev_write->bdev); 286
288 if (bio_sectors(bio)) 287 if (bio_data_dir(bio) == WRITE) {
289 bio->bi_iter.bi_sector = dc->start_write + 288 if (unlikely(bio->bi_opf & REQ_PREFLUSH))
290 dm_target_offset(ti, bio->bi_iter.bi_sector); 289 c = &dc->flush;
291 290 else
292 return delay_bio(dc, dc->write_delay, bio); 291 c = &dc->write;
292 } else {
293 c = &dc->read;
293 } 294 }
295 delayed->class = c;
296 bio_set_dev(bio, c->dev->bdev);
297 if (bio_sectors(bio))
298 bio->bi_iter.bi_sector = c->start + dm_target_offset(ti, bio->bi_iter.bi_sector);
294 299
295 bio_set_dev(bio, dc->dev_read->bdev); 300 return delay_bio(dc, c, bio);
296 bio->bi_iter.bi_sector = dc->start_read +
297 dm_target_offset(ti, bio->bi_iter.bi_sector);
298
299 return delay_bio(dc, dc->read_delay, bio);
300} 301}
301 302
303#define DMEMIT_DELAY_CLASS(c) \
304 DMEMIT("%s %llu %u", (c)->dev->name, (unsigned long long)(c)->start, (c)->delay)
305
302static void delay_status(struct dm_target *ti, status_type_t type, 306static void delay_status(struct dm_target *ti, status_type_t type,
303 unsigned status_flags, char *result, unsigned maxlen) 307 unsigned status_flags, char *result, unsigned maxlen)
304{ 308{
@@ -307,17 +311,19 @@ static void delay_status(struct dm_target *ti, status_type_t type,
307 311
308 switch (type) { 312 switch (type) {
309 case STATUSTYPE_INFO: 313 case STATUSTYPE_INFO:
310 DMEMIT("%u %u", dc->reads, dc->writes); 314 DMEMIT("%u %u %u", dc->read.ops, dc->write.ops, dc->flush.ops);
311 break; 315 break;
312 316
313 case STATUSTYPE_TABLE: 317 case STATUSTYPE_TABLE:
314 DMEMIT("%s %llu %u", dc->dev_read->name, 318 DMEMIT_DELAY_CLASS(&dc->read);
315 (unsigned long long) dc->start_read, 319 if (dc->argc >= 6) {
316 dc->read_delay); 320 DMEMIT(" ");
317 if (dc->dev_write) 321 DMEMIT_DELAY_CLASS(&dc->write);
318 DMEMIT(" %s %llu %u", dc->dev_write->name, 322 }
319 (unsigned long long) dc->start_write, 323 if (dc->argc >= 9) {
320 dc->write_delay); 324 DMEMIT(" ");
325 DMEMIT_DELAY_CLASS(&dc->flush);
326 }
321 break; 327 break;
322 } 328 }
323} 329}
@@ -328,12 +334,15 @@ static int delay_iterate_devices(struct dm_target *ti,
328 struct delay_c *dc = ti->private; 334 struct delay_c *dc = ti->private;
329 int ret = 0; 335 int ret = 0;
330 336
331 ret = fn(ti, dc->dev_read, dc->start_read, ti->len, data); 337 ret = fn(ti, dc->read.dev, dc->read.start, ti->len, data);
338 if (ret)
339 goto out;
340 ret = fn(ti, dc->write.dev, dc->write.start, ti->len, data);
341 if (ret)
342 goto out;
343 ret = fn(ti, dc->flush.dev, dc->flush.start, ti->len, data);
332 if (ret) 344 if (ret)
333 goto out; 345 goto out;
334
335 if (dc->dev_write)
336 ret = fn(ti, dc->dev_write, dc->start_write, ti->len, data);
337 346
338out: 347out:
339 return ret; 348 return ret;
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 86438b2f10dd..378878599466 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -31,6 +31,8 @@
31#define MIN_LOG2_INTERLEAVE_SECTORS 3 31#define MIN_LOG2_INTERLEAVE_SECTORS 3
32#define MAX_LOG2_INTERLEAVE_SECTORS 31 32#define MAX_LOG2_INTERLEAVE_SECTORS 31
33#define METADATA_WORKQUEUE_MAX_ACTIVE 16 33#define METADATA_WORKQUEUE_MAX_ACTIVE 16
34#define RECALC_SECTORS 8192
35#define RECALC_WRITE_SUPER 16
34 36
35/* 37/*
36 * Warning - DEBUG_PRINT prints security-sensitive data to the log, 38 * Warning - DEBUG_PRINT prints security-sensitive data to the log,
@@ -44,7 +46,8 @@
44 */ 46 */
45 47
46#define SB_MAGIC "integrt" 48#define SB_MAGIC "integrt"
47#define SB_VERSION 1 49#define SB_VERSION_1 1
50#define SB_VERSION_2 2
48#define SB_SECTORS 8 51#define SB_SECTORS 8
49#define MAX_SECTORS_PER_BLOCK 8 52#define MAX_SECTORS_PER_BLOCK 8
50 53
@@ -57,9 +60,12 @@ struct superblock {
57 __u64 provided_data_sectors; /* userspace uses this value */ 60 __u64 provided_data_sectors; /* userspace uses this value */
58 __u32 flags; 61 __u32 flags;
59 __u8 log2_sectors_per_block; 62 __u8 log2_sectors_per_block;
63 __u8 pad[3];
64 __u64 recalc_sector;
60}; 65};
61 66
62#define SB_FLAG_HAVE_JOURNAL_MAC 0x1 67#define SB_FLAG_HAVE_JOURNAL_MAC 0x1
68#define SB_FLAG_RECALCULATING 0x2
63 69
64#define JOURNAL_ENTRY_ROUNDUP 8 70#define JOURNAL_ENTRY_ROUNDUP 8
65 71
@@ -139,6 +145,7 @@ struct alg_spec {
139 145
140struct dm_integrity_c { 146struct dm_integrity_c {
141 struct dm_dev *dev; 147 struct dm_dev *dev;
148 struct dm_dev *meta_dev;
142 unsigned tag_size; 149 unsigned tag_size;
143 __s8 log2_tag_size; 150 __s8 log2_tag_size;
144 sector_t start; 151 sector_t start;
@@ -170,7 +177,8 @@ struct dm_integrity_c {
170 unsigned short journal_section_sectors; 177 unsigned short journal_section_sectors;
171 unsigned journal_sections; 178 unsigned journal_sections;
172 unsigned journal_entries; 179 unsigned journal_entries;
173 sector_t device_sectors; 180 sector_t data_device_sectors;
181 sector_t meta_device_sectors;
174 unsigned initial_sectors; 182 unsigned initial_sectors;
175 unsigned metadata_run; 183 unsigned metadata_run;
176 __s8 log2_metadata_run; 184 __s8 log2_metadata_run;
@@ -178,7 +186,7 @@ struct dm_integrity_c {
178 __u8 sectors_per_block; 186 __u8 sectors_per_block;
179 187
180 unsigned char mode; 188 unsigned char mode;
181 bool suspending; 189 int suspending;
182 190
183 int failed; 191 int failed;
184 192
@@ -186,6 +194,7 @@ struct dm_integrity_c {
186 194
187 /* these variables are locked with endio_wait.lock */ 195 /* these variables are locked with endio_wait.lock */
188 struct rb_root in_progress; 196 struct rb_root in_progress;
197 struct list_head wait_list;
189 wait_queue_head_t endio_wait; 198 wait_queue_head_t endio_wait;
190 struct workqueue_struct *wait_wq; 199 struct workqueue_struct *wait_wq;
191 200
@@ -210,6 +219,11 @@ struct dm_integrity_c {
210 struct workqueue_struct *writer_wq; 219 struct workqueue_struct *writer_wq;
211 struct work_struct writer_work; 220 struct work_struct writer_work;
212 221
222 struct workqueue_struct *recalc_wq;
223 struct work_struct recalc_work;
224 u8 *recalc_buffer;
225 u8 *recalc_tags;
226
213 struct bio_list flush_bio_list; 227 struct bio_list flush_bio_list;
214 228
215 unsigned long autocommit_jiffies; 229 unsigned long autocommit_jiffies;
@@ -233,7 +247,14 @@ struct dm_integrity_c {
233struct dm_integrity_range { 247struct dm_integrity_range {
234 sector_t logical_sector; 248 sector_t logical_sector;
235 unsigned n_sectors; 249 unsigned n_sectors;
236 struct rb_node node; 250 bool waiting;
251 union {
252 struct rb_node node;
253 struct {
254 struct task_struct *task;
255 struct list_head wait_entry;
256 };
257 };
237}; 258};
238 259
239struct dm_integrity_io { 260struct dm_integrity_io {
@@ -337,10 +358,14 @@ static commit_id_t dm_integrity_commit_id(struct dm_integrity_c *ic, unsigned i,
337static void get_area_and_offset(struct dm_integrity_c *ic, sector_t data_sector, 358static void get_area_and_offset(struct dm_integrity_c *ic, sector_t data_sector,
338 sector_t *area, sector_t *offset) 359 sector_t *area, sector_t *offset)
339{ 360{
340 __u8 log2_interleave_sectors = ic->sb->log2_interleave_sectors; 361 if (!ic->meta_dev) {
341 362 __u8 log2_interleave_sectors = ic->sb->log2_interleave_sectors;
342 *area = data_sector >> log2_interleave_sectors; 363 *area = data_sector >> log2_interleave_sectors;
343 *offset = (unsigned)data_sector & ((1U << log2_interleave_sectors) - 1); 364 *offset = (unsigned)data_sector & ((1U << log2_interleave_sectors) - 1);
365 } else {
366 *area = 0;
367 *offset = data_sector;
368 }
344} 369}
345 370
346#define sector_to_block(ic, n) \ 371#define sector_to_block(ic, n) \
@@ -379,6 +404,9 @@ static sector_t get_data_sector(struct dm_integrity_c *ic, sector_t area, sector
379{ 404{
380 sector_t result; 405 sector_t result;
381 406
407 if (ic->meta_dev)
408 return offset;
409
382 result = area << ic->sb->log2_interleave_sectors; 410 result = area << ic->sb->log2_interleave_sectors;
383 if (likely(ic->log2_metadata_run >= 0)) 411 if (likely(ic->log2_metadata_run >= 0))
384 result += (area + 1) << ic->log2_metadata_run; 412 result += (area + 1) << ic->log2_metadata_run;
@@ -386,6 +414,8 @@ static sector_t get_data_sector(struct dm_integrity_c *ic, sector_t area, sector
386 result += (area + 1) * ic->metadata_run; 414 result += (area + 1) * ic->metadata_run;
387 415
388 result += (sector_t)ic->initial_sectors + offset; 416 result += (sector_t)ic->initial_sectors + offset;
417 result += ic->start;
418
389 return result; 419 return result;
390} 420}
391 421
@@ -395,6 +425,14 @@ static void wraparound_section(struct dm_integrity_c *ic, unsigned *sec_ptr)
395 *sec_ptr -= ic->journal_sections; 425 *sec_ptr -= ic->journal_sections;
396} 426}
397 427
428static void sb_set_version(struct dm_integrity_c *ic)
429{
430 if (ic->meta_dev || ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
431 ic->sb->version = SB_VERSION_2;
432 else
433 ic->sb->version = SB_VERSION_1;
434}
435
398static int sync_rw_sb(struct dm_integrity_c *ic, int op, int op_flags) 436static int sync_rw_sb(struct dm_integrity_c *ic, int op, int op_flags)
399{ 437{
400 struct dm_io_request io_req; 438 struct dm_io_request io_req;
@@ -406,7 +444,7 @@ static int sync_rw_sb(struct dm_integrity_c *ic, int op, int op_flags)
406 io_req.mem.ptr.addr = ic->sb; 444 io_req.mem.ptr.addr = ic->sb;
407 io_req.notify.fn = NULL; 445 io_req.notify.fn = NULL;
408 io_req.client = ic->io; 446 io_req.client = ic->io;
409 io_loc.bdev = ic->dev->bdev; 447 io_loc.bdev = ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev;
410 io_loc.sector = ic->start; 448 io_loc.sector = ic->start;
411 io_loc.count = SB_SECTORS; 449 io_loc.count = SB_SECTORS;
412 450
@@ -753,7 +791,7 @@ static void rw_journal(struct dm_integrity_c *ic, int op, int op_flags, unsigned
753 io_req.notify.fn = NULL; 791 io_req.notify.fn = NULL;
754 } 792 }
755 io_req.client = ic->io; 793 io_req.client = ic->io;
756 io_loc.bdev = ic->dev->bdev; 794 io_loc.bdev = ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev;
757 io_loc.sector = ic->start + SB_SECTORS + sector; 795 io_loc.sector = ic->start + SB_SECTORS + sector;
758 io_loc.count = n_sectors; 796 io_loc.count = n_sectors;
759 797
@@ -857,7 +895,7 @@ static void copy_from_journal(struct dm_integrity_c *ic, unsigned section, unsig
857 io_req.notify.context = data; 895 io_req.notify.context = data;
858 io_req.client = ic->io; 896 io_req.client = ic->io;
859 io_loc.bdev = ic->dev->bdev; 897 io_loc.bdev = ic->dev->bdev;
860 io_loc.sector = ic->start + target; 898 io_loc.sector = target;
861 io_loc.count = n_sectors; 899 io_loc.count = n_sectors;
862 900
863 r = dm_io(&io_req, 1, &io_loc, NULL); 901 r = dm_io(&io_req, 1, &io_loc, NULL);
@@ -867,13 +905,27 @@ static void copy_from_journal(struct dm_integrity_c *ic, unsigned section, unsig
867 } 905 }
868} 906}
869 907
870static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range) 908static bool ranges_overlap(struct dm_integrity_range *range1, struct dm_integrity_range *range2)
909{
910 return range1->logical_sector < range2->logical_sector + range2->n_sectors &&
911 range2->logical_sector + range2->n_sectors > range2->logical_sector;
912}
913
914static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range, bool check_waiting)
871{ 915{
872 struct rb_node **n = &ic->in_progress.rb_node; 916 struct rb_node **n = &ic->in_progress.rb_node;
873 struct rb_node *parent; 917 struct rb_node *parent;
874 918
875 BUG_ON((new_range->logical_sector | new_range->n_sectors) & (unsigned)(ic->sectors_per_block - 1)); 919 BUG_ON((new_range->logical_sector | new_range->n_sectors) & (unsigned)(ic->sectors_per_block - 1));
876 920
921 if (likely(check_waiting)) {
922 struct dm_integrity_range *range;
923 list_for_each_entry(range, &ic->wait_list, wait_entry) {
924 if (unlikely(ranges_overlap(range, new_range)))
925 return false;
926 }
927 }
928
877 parent = NULL; 929 parent = NULL;
878 930
879 while (*n) { 931 while (*n) {
@@ -898,7 +950,22 @@ static bool add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *
898static void remove_range_unlocked(struct dm_integrity_c *ic, struct dm_integrity_range *range) 950static void remove_range_unlocked(struct dm_integrity_c *ic, struct dm_integrity_range *range)
899{ 951{
900 rb_erase(&range->node, &ic->in_progress); 952 rb_erase(&range->node, &ic->in_progress);
901 wake_up_locked(&ic->endio_wait); 953 while (unlikely(!list_empty(&ic->wait_list))) {
954 struct dm_integrity_range *last_range =
955 list_first_entry(&ic->wait_list, struct dm_integrity_range, wait_entry);
956 struct task_struct *last_range_task;
957 if (!ranges_overlap(range, last_range))
958 break;
959 last_range_task = last_range->task;
960 list_del(&last_range->wait_entry);
961 if (!add_new_range(ic, last_range, false)) {
962 last_range->task = last_range_task;
963 list_add(&last_range->wait_entry, &ic->wait_list);
964 break;
965 }
966 last_range->waiting = false;
967 wake_up_process(last_range_task);
968 }
902} 969}
903 970
904static void remove_range(struct dm_integrity_c *ic, struct dm_integrity_range *range) 971static void remove_range(struct dm_integrity_c *ic, struct dm_integrity_range *range)
@@ -910,6 +977,19 @@ static void remove_range(struct dm_integrity_c *ic, struct dm_integrity_range *r
910 spin_unlock_irqrestore(&ic->endio_wait.lock, flags); 977 spin_unlock_irqrestore(&ic->endio_wait.lock, flags);
911} 978}
912 979
980static void wait_and_add_new_range(struct dm_integrity_c *ic, struct dm_integrity_range *new_range)
981{
982 new_range->waiting = true;
983 list_add_tail(&new_range->wait_entry, &ic->wait_list);
984 new_range->task = current;
985 do {
986 __set_current_state(TASK_UNINTERRUPTIBLE);
987 spin_unlock_irq(&ic->endio_wait.lock);
988 io_schedule();
989 spin_lock_irq(&ic->endio_wait.lock);
990 } while (unlikely(new_range->waiting));
991}
992
913static void init_journal_node(struct journal_node *node) 993static void init_journal_node(struct journal_node *node)
914{ 994{
915 RB_CLEAR_NODE(&node->node); 995 RB_CLEAR_NODE(&node->node);
@@ -1599,8 +1679,12 @@ retry:
1599 1679
1600 dio->range.n_sectors = min(dio->range.n_sectors, 1680 dio->range.n_sectors = min(dio->range.n_sectors,
1601 ic->free_sectors << ic->sb->log2_sectors_per_block); 1681 ic->free_sectors << ic->sb->log2_sectors_per_block);
1602 if (unlikely(!dio->range.n_sectors)) 1682 if (unlikely(!dio->range.n_sectors)) {
1603 goto sleep; 1683 if (from_map)
1684 goto offload_to_thread;
1685 sleep_on_endio_wait(ic);
1686 goto retry;
1687 }
1604 range_sectors = dio->range.n_sectors >> ic->sb->log2_sectors_per_block; 1688 range_sectors = dio->range.n_sectors >> ic->sb->log2_sectors_per_block;
1605 ic->free_sectors -= range_sectors; 1689 ic->free_sectors -= range_sectors;
1606 journal_section = ic->free_section; 1690 journal_section = ic->free_section;
@@ -1654,22 +1738,20 @@ retry:
1654 } 1738 }
1655 } 1739 }
1656 } 1740 }
1657 if (unlikely(!add_new_range(ic, &dio->range))) { 1741 if (unlikely(!add_new_range(ic, &dio->range, true))) {
1658 /* 1742 /*
1659 * We must not sleep in the request routine because it could 1743 * We must not sleep in the request routine because it could
1660 * stall bios on current->bio_list. 1744 * stall bios on current->bio_list.
1661 * So, we offload the bio to a workqueue if we have to sleep. 1745 * So, we offload the bio to a workqueue if we have to sleep.
1662 */ 1746 */
1663sleep:
1664 if (from_map) { 1747 if (from_map) {
1748offload_to_thread:
1665 spin_unlock_irq(&ic->endio_wait.lock); 1749 spin_unlock_irq(&ic->endio_wait.lock);
1666 INIT_WORK(&dio->work, integrity_bio_wait); 1750 INIT_WORK(&dio->work, integrity_bio_wait);
1667 queue_work(ic->wait_wq, &dio->work); 1751 queue_work(ic->wait_wq, &dio->work);
1668 return; 1752 return;
1669 } else {
1670 sleep_on_endio_wait(ic);
1671 goto retry;
1672 } 1753 }
1754 wait_and_add_new_range(ic, &dio->range);
1673 } 1755 }
1674 spin_unlock_irq(&ic->endio_wait.lock); 1756 spin_unlock_irq(&ic->endio_wait.lock);
1675 1757
@@ -1701,14 +1783,18 @@ sleep:
1701 bio->bi_end_io = integrity_end_io; 1783 bio->bi_end_io = integrity_end_io;
1702 1784
1703 bio->bi_iter.bi_size = dio->range.n_sectors << SECTOR_SHIFT; 1785 bio->bi_iter.bi_size = dio->range.n_sectors << SECTOR_SHIFT;
1704 bio->bi_iter.bi_sector += ic->start;
1705 generic_make_request(bio); 1786 generic_make_request(bio);
1706 1787
1707 if (need_sync_io) { 1788 if (need_sync_io) {
1708 wait_for_completion_io(&read_comp); 1789 wait_for_completion_io(&read_comp);
1790 if (unlikely(ic->recalc_wq != NULL) &&
1791 ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
1792 dio->range.logical_sector + dio->range.n_sectors > le64_to_cpu(ic->sb->recalc_sector))
1793 goto skip_check;
1709 if (likely(!bio->bi_status)) 1794 if (likely(!bio->bi_status))
1710 integrity_metadata(&dio->work); 1795 integrity_metadata(&dio->work);
1711 else 1796 else
1797skip_check:
1712 dec_in_flight(dio); 1798 dec_in_flight(dio);
1713 1799
1714 } else { 1800 } else {
@@ -1892,8 +1978,8 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
1892 io->range.n_sectors = (k - j) << ic->sb->log2_sectors_per_block; 1978 io->range.n_sectors = (k - j) << ic->sb->log2_sectors_per_block;
1893 1979
1894 spin_lock_irq(&ic->endio_wait.lock); 1980 spin_lock_irq(&ic->endio_wait.lock);
1895 while (unlikely(!add_new_range(ic, &io->range))) 1981 if (unlikely(!add_new_range(ic, &io->range, true)))
1896 sleep_on_endio_wait(ic); 1982 wait_and_add_new_range(ic, &io->range);
1897 1983
1898 if (likely(!from_replay)) { 1984 if (likely(!from_replay)) {
1899 struct journal_node *section_node = &ic->journal_tree[i * ic->journal_section_entries]; 1985 struct journal_node *section_node = &ic->journal_tree[i * ic->journal_section_entries];
@@ -1981,7 +2067,7 @@ static void integrity_writer(struct work_struct *w)
1981 unsigned prev_free_sectors; 2067 unsigned prev_free_sectors;
1982 2068
1983 /* the following test is not needed, but it tests the replay code */ 2069 /* the following test is not needed, but it tests the replay code */
1984 if (READ_ONCE(ic->suspending)) 2070 if (READ_ONCE(ic->suspending) && !ic->meta_dev)
1985 return; 2071 return;
1986 2072
1987 spin_lock_irq(&ic->endio_wait.lock); 2073 spin_lock_irq(&ic->endio_wait.lock);
@@ -2008,6 +2094,108 @@ static void integrity_writer(struct work_struct *w)
2008 spin_unlock_irq(&ic->endio_wait.lock); 2094 spin_unlock_irq(&ic->endio_wait.lock);
2009} 2095}
2010 2096
2097static void recalc_write_super(struct dm_integrity_c *ic)
2098{
2099 int r;
2100
2101 dm_integrity_flush_buffers(ic);
2102 if (dm_integrity_failed(ic))
2103 return;
2104
2105 sb_set_version(ic);
2106 r = sync_rw_sb(ic, REQ_OP_WRITE, 0);
2107 if (unlikely(r))
2108 dm_integrity_io_error(ic, "writing superblock", r);
2109}
2110
2111static void integrity_recalc(struct work_struct *w)
2112{
2113 struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, recalc_work);
2114 struct dm_integrity_range range;
2115 struct dm_io_request io_req;
2116 struct dm_io_region io_loc;
2117 sector_t area, offset;
2118 sector_t metadata_block;
2119 unsigned metadata_offset;
2120 __u8 *t;
2121 unsigned i;
2122 int r;
2123 unsigned super_counter = 0;
2124
2125 spin_lock_irq(&ic->endio_wait.lock);
2126
2127next_chunk:
2128
2129 if (unlikely(READ_ONCE(ic->suspending)))
2130 goto unlock_ret;
2131
2132 range.logical_sector = le64_to_cpu(ic->sb->recalc_sector);
2133 if (unlikely(range.logical_sector >= ic->provided_data_sectors))
2134 goto unlock_ret;
2135
2136 get_area_and_offset(ic, range.logical_sector, &area, &offset);
2137 range.n_sectors = min((sector_t)RECALC_SECTORS, ic->provided_data_sectors - range.logical_sector);
2138 if (!ic->meta_dev)
2139 range.n_sectors = min(range.n_sectors, (1U << ic->sb->log2_interleave_sectors) - (unsigned)offset);
2140
2141 if (unlikely(!add_new_range(ic, &range, true)))
2142 wait_and_add_new_range(ic, &range);
2143
2144 spin_unlock_irq(&ic->endio_wait.lock);
2145
2146 if (unlikely(++super_counter == RECALC_WRITE_SUPER)) {
2147 recalc_write_super(ic);
2148 super_counter = 0;
2149 }
2150
2151 if (unlikely(dm_integrity_failed(ic)))
2152 goto err;
2153
2154 io_req.bi_op = REQ_OP_READ;
2155 io_req.bi_op_flags = 0;
2156 io_req.mem.type = DM_IO_VMA;
2157 io_req.mem.ptr.addr = ic->recalc_buffer;
2158 io_req.notify.fn = NULL;
2159 io_req.client = ic->io;
2160 io_loc.bdev = ic->dev->bdev;
2161 io_loc.sector = get_data_sector(ic, area, offset);
2162 io_loc.count = range.n_sectors;
2163
2164 r = dm_io(&io_req, 1, &io_loc, NULL);
2165 if (unlikely(r)) {
2166 dm_integrity_io_error(ic, "reading data", r);
2167 goto err;
2168 }
2169
2170 t = ic->recalc_tags;
2171 for (i = 0; i < range.n_sectors; i += ic->sectors_per_block) {
2172 integrity_sector_checksum(ic, range.logical_sector + i, ic->recalc_buffer + (i << SECTOR_SHIFT), t);
2173 t += ic->tag_size;
2174 }
2175
2176 metadata_block = get_metadata_sector_and_offset(ic, area, offset, &metadata_offset);
2177
2178 r = dm_integrity_rw_tag(ic, ic->recalc_tags, &metadata_block, &metadata_offset, t - ic->recalc_tags, TAG_WRITE);
2179 if (unlikely(r)) {
2180 dm_integrity_io_error(ic, "writing tags", r);
2181 goto err;
2182 }
2183
2184 spin_lock_irq(&ic->endio_wait.lock);
2185 remove_range_unlocked(ic, &range);
2186 ic->sb->recalc_sector = cpu_to_le64(range.logical_sector + range.n_sectors);
2187 goto next_chunk;
2188
2189err:
2190 remove_range(ic, &range);
2191 return;
2192
2193unlock_ret:
2194 spin_unlock_irq(&ic->endio_wait.lock);
2195
2196 recalc_write_super(ic);
2197}
2198
2011static void init_journal(struct dm_integrity_c *ic, unsigned start_section, 2199static void init_journal(struct dm_integrity_c *ic, unsigned start_section,
2012 unsigned n_sections, unsigned char commit_seq) 2200 unsigned n_sections, unsigned char commit_seq)
2013{ 2201{
@@ -2210,17 +2398,22 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
2210 2398
2211 del_timer_sync(&ic->autocommit_timer); 2399 del_timer_sync(&ic->autocommit_timer);
2212 2400
2213 ic->suspending = true; 2401 WRITE_ONCE(ic->suspending, 1);
2402
2403 if (ic->recalc_wq)
2404 drain_workqueue(ic->recalc_wq);
2214 2405
2215 queue_work(ic->commit_wq, &ic->commit_work); 2406 queue_work(ic->commit_wq, &ic->commit_work);
2216 drain_workqueue(ic->commit_wq); 2407 drain_workqueue(ic->commit_wq);
2217 2408
2218 if (ic->mode == 'J') { 2409 if (ic->mode == 'J') {
2410 if (ic->meta_dev)
2411 queue_work(ic->writer_wq, &ic->writer_work);
2219 drain_workqueue(ic->writer_wq); 2412 drain_workqueue(ic->writer_wq);
2220 dm_integrity_flush_buffers(ic); 2413 dm_integrity_flush_buffers(ic);
2221 } 2414 }
2222 2415
2223 ic->suspending = false; 2416 WRITE_ONCE(ic->suspending, 0);
2224 2417
2225 BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); 2418 BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
2226 2419
@@ -2232,6 +2425,16 @@ static void dm_integrity_resume(struct dm_target *ti)
2232 struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private; 2425 struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private;
2233 2426
2234 replay_journal(ic); 2427 replay_journal(ic);
2428
2429 if (ic->recalc_wq && ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
2430 __u64 recalc_pos = le64_to_cpu(ic->sb->recalc_sector);
2431 if (recalc_pos < ic->provided_data_sectors) {
2432 queue_work(ic->recalc_wq, &ic->recalc_work);
2433 } else if (recalc_pos > ic->provided_data_sectors) {
2434 ic->sb->recalc_sector = cpu_to_le64(ic->provided_data_sectors);
2435 recalc_write_super(ic);
2436 }
2437 }
2235} 2438}
2236 2439
2237static void dm_integrity_status(struct dm_target *ti, status_type_t type, 2440static void dm_integrity_status(struct dm_target *ti, status_type_t type,
@@ -2243,7 +2446,13 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
2243 2446
2244 switch (type) { 2447 switch (type) {
2245 case STATUSTYPE_INFO: 2448 case STATUSTYPE_INFO:
2246 DMEMIT("%llu", (unsigned long long)atomic64_read(&ic->number_of_mismatches)); 2449 DMEMIT("%llu %llu",
2450 (unsigned long long)atomic64_read(&ic->number_of_mismatches),
2451 (unsigned long long)ic->provided_data_sectors);
2452 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
2453 DMEMIT(" %llu", (unsigned long long)le64_to_cpu(ic->sb->recalc_sector));
2454 else
2455 DMEMIT(" -");
2247 break; 2456 break;
2248 2457
2249 case STATUSTYPE_TABLE: { 2458 case STATUSTYPE_TABLE: {
@@ -2251,19 +2460,25 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
2251 watermark_percentage += ic->journal_entries / 2; 2460 watermark_percentage += ic->journal_entries / 2;
2252 do_div(watermark_percentage, ic->journal_entries); 2461 do_div(watermark_percentage, ic->journal_entries);
2253 arg_count = 5; 2462 arg_count = 5;
2463 arg_count += !!ic->meta_dev;
2254 arg_count += ic->sectors_per_block != 1; 2464 arg_count += ic->sectors_per_block != 1;
2465 arg_count += !!(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING));
2255 arg_count += !!ic->internal_hash_alg.alg_string; 2466 arg_count += !!ic->internal_hash_alg.alg_string;
2256 arg_count += !!ic->journal_crypt_alg.alg_string; 2467 arg_count += !!ic->journal_crypt_alg.alg_string;
2257 arg_count += !!ic->journal_mac_alg.alg_string; 2468 arg_count += !!ic->journal_mac_alg.alg_string;
2258 DMEMIT("%s %llu %u %c %u", ic->dev->name, (unsigned long long)ic->start, 2469 DMEMIT("%s %llu %u %c %u", ic->dev->name, (unsigned long long)ic->start,
2259 ic->tag_size, ic->mode, arg_count); 2470 ic->tag_size, ic->mode, arg_count);
2471 if (ic->meta_dev)
2472 DMEMIT(" meta_device:%s", ic->meta_dev->name);
2473 if (ic->sectors_per_block != 1)
2474 DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT);
2475 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
2476 DMEMIT(" recalculate");
2260 DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS); 2477 DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS);
2261 DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors); 2478 DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors);
2262 DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors); 2479 DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors);
2263 DMEMIT(" journal_watermark:%u", (unsigned)watermark_percentage); 2480 DMEMIT(" journal_watermark:%u", (unsigned)watermark_percentage);
2264 DMEMIT(" commit_time:%u", ic->autocommit_msec); 2481 DMEMIT(" commit_time:%u", ic->autocommit_msec);
2265 if (ic->sectors_per_block != 1)
2266 DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT);
2267 2482
2268#define EMIT_ALG(a, n) \ 2483#define EMIT_ALG(a, n) \
2269 do { \ 2484 do { \
@@ -2286,7 +2501,10 @@ static int dm_integrity_iterate_devices(struct dm_target *ti,
2286{ 2501{
2287 struct dm_integrity_c *ic = ti->private; 2502 struct dm_integrity_c *ic = ti->private;
2288 2503
2289 return fn(ti, ic->dev, ic->start + ic->initial_sectors + ic->metadata_run, ti->len, data); 2504 if (!ic->meta_dev)
2505 return fn(ti, ic->dev, ic->start + ic->initial_sectors + ic->metadata_run, ti->len, data);
2506 else
2507 return fn(ti, ic->dev, 0, ti->len, data);
2290} 2508}
2291 2509
2292static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *limits) 2510static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *limits)
@@ -2319,26 +2537,38 @@ static void calculate_journal_section_size(struct dm_integrity_c *ic)
2319static int calculate_device_limits(struct dm_integrity_c *ic) 2537static int calculate_device_limits(struct dm_integrity_c *ic)
2320{ 2538{
2321 __u64 initial_sectors; 2539 __u64 initial_sectors;
2322 sector_t last_sector, last_area, last_offset;
2323 2540
2324 calculate_journal_section_size(ic); 2541 calculate_journal_section_size(ic);
2325 initial_sectors = SB_SECTORS + (__u64)ic->journal_section_sectors * ic->journal_sections; 2542 initial_sectors = SB_SECTORS + (__u64)ic->journal_section_sectors * ic->journal_sections;
2326 if (initial_sectors + METADATA_PADDING_SECTORS >= ic->device_sectors || initial_sectors > UINT_MAX) 2543 if (initial_sectors + METADATA_PADDING_SECTORS >= ic->meta_device_sectors || initial_sectors > UINT_MAX)
2327 return -EINVAL; 2544 return -EINVAL;
2328 ic->initial_sectors = initial_sectors; 2545 ic->initial_sectors = initial_sectors;
2329 2546
2330 ic->metadata_run = roundup((__u64)ic->tag_size << (ic->sb->log2_interleave_sectors - ic->sb->log2_sectors_per_block), 2547 if (!ic->meta_dev) {
2331 (__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS)) >> SECTOR_SHIFT; 2548 sector_t last_sector, last_area, last_offset;
2332 if (!(ic->metadata_run & (ic->metadata_run - 1)))
2333 ic->log2_metadata_run = __ffs(ic->metadata_run);
2334 else
2335 ic->log2_metadata_run = -1;
2336 2549
2337 get_area_and_offset(ic, ic->provided_data_sectors - 1, &last_area, &last_offset); 2550 ic->metadata_run = roundup((__u64)ic->tag_size << (ic->sb->log2_interleave_sectors - ic->sb->log2_sectors_per_block),
2338 last_sector = get_data_sector(ic, last_area, last_offset); 2551 (__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS)) >> SECTOR_SHIFT;
2552 if (!(ic->metadata_run & (ic->metadata_run - 1)))
2553 ic->log2_metadata_run = __ffs(ic->metadata_run);
2554 else
2555 ic->log2_metadata_run = -1;
2339 2556
2340 if (ic->start + last_sector < last_sector || ic->start + last_sector >= ic->device_sectors) 2557 get_area_and_offset(ic, ic->provided_data_sectors - 1, &last_area, &last_offset);
2341 return -EINVAL; 2558 last_sector = get_data_sector(ic, last_area, last_offset);
2559 if (last_sector < ic->start || last_sector >= ic->meta_device_sectors)
2560 return -EINVAL;
2561 } else {
2562 __u64 meta_size = ic->provided_data_sectors * ic->tag_size;
2563 meta_size = (meta_size + ((1U << (ic->log2_buffer_sectors + SECTOR_SHIFT)) - 1))
2564 >> (ic->log2_buffer_sectors + SECTOR_SHIFT);
2565 meta_size <<= ic->log2_buffer_sectors;
2566 if (ic->initial_sectors + meta_size < ic->initial_sectors ||
2567 ic->initial_sectors + meta_size > ic->meta_device_sectors)
2568 return -EINVAL;
2569 ic->metadata_run = 1;
2570 ic->log2_metadata_run = 0;
2571 }
2342 2572
2343 return 0; 2573 return 0;
2344} 2574}
@@ -2350,7 +2580,6 @@ static int initialize_superblock(struct dm_integrity_c *ic, unsigned journal_sec
2350 2580
2351 memset(ic->sb, 0, SB_SECTORS << SECTOR_SHIFT); 2581 memset(ic->sb, 0, SB_SECTORS << SECTOR_SHIFT);
2352 memcpy(ic->sb->magic, SB_MAGIC, 8); 2582 memcpy(ic->sb->magic, SB_MAGIC, 8);
2353 ic->sb->version = SB_VERSION;
2354 ic->sb->integrity_tag_size = cpu_to_le16(ic->tag_size); 2583 ic->sb->integrity_tag_size = cpu_to_le16(ic->tag_size);
2355 ic->sb->log2_sectors_per_block = __ffs(ic->sectors_per_block); 2584 ic->sb->log2_sectors_per_block = __ffs(ic->sectors_per_block);
2356 if (ic->journal_mac_alg.alg_string) 2585 if (ic->journal_mac_alg.alg_string)
@@ -2360,28 +2589,55 @@ static int initialize_superblock(struct dm_integrity_c *ic, unsigned journal_sec
2360 journal_sections = journal_sectors / ic->journal_section_sectors; 2589 journal_sections = journal_sectors / ic->journal_section_sectors;
2361 if (!journal_sections) 2590 if (!journal_sections)
2362 journal_sections = 1; 2591 journal_sections = 1;
2363 ic->sb->journal_sections = cpu_to_le32(journal_sections);
2364 2592
2365 if (!interleave_sectors) 2593 if (!ic->meta_dev) {
2366 interleave_sectors = DEFAULT_INTERLEAVE_SECTORS; 2594 ic->sb->journal_sections = cpu_to_le32(journal_sections);
2367 ic->sb->log2_interleave_sectors = __fls(interleave_sectors); 2595 if (!interleave_sectors)
2368 ic->sb->log2_interleave_sectors = max((__u8)MIN_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors); 2596 interleave_sectors = DEFAULT_INTERLEAVE_SECTORS;
2369 ic->sb->log2_interleave_sectors = min((__u8)MAX_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors); 2597 ic->sb->log2_interleave_sectors = __fls(interleave_sectors);
2370 2598 ic->sb->log2_interleave_sectors = max((__u8)MIN_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors);
2371 ic->provided_data_sectors = 0; 2599 ic->sb->log2_interleave_sectors = min((__u8)MAX_LOG2_INTERLEAVE_SECTORS, ic->sb->log2_interleave_sectors);
2372 for (test_bit = fls64(ic->device_sectors) - 1; test_bit >= 3; test_bit--) { 2600
2373 __u64 prev_data_sectors = ic->provided_data_sectors; 2601 ic->provided_data_sectors = 0;
2602 for (test_bit = fls64(ic->meta_device_sectors) - 1; test_bit >= 3; test_bit--) {
2603 __u64 prev_data_sectors = ic->provided_data_sectors;
2604
2605 ic->provided_data_sectors |= (sector_t)1 << test_bit;
2606 if (calculate_device_limits(ic))
2607 ic->provided_data_sectors = prev_data_sectors;
2608 }
2609 if (!ic->provided_data_sectors)
2610 return -EINVAL;
2611 } else {
2612 ic->sb->log2_interleave_sectors = 0;
2613 ic->provided_data_sectors = ic->data_device_sectors;
2614 ic->provided_data_sectors &= ~(sector_t)(ic->sectors_per_block - 1);
2615
2616try_smaller_buffer:
2617 ic->sb->journal_sections = cpu_to_le32(0);
2618 for (test_bit = fls(journal_sections) - 1; test_bit >= 0; test_bit--) {
2619 __u32 prev_journal_sections = le32_to_cpu(ic->sb->journal_sections);
2620 __u32 test_journal_sections = prev_journal_sections | (1U << test_bit);
2621 if (test_journal_sections > journal_sections)
2622 continue;
2623 ic->sb->journal_sections = cpu_to_le32(test_journal_sections);
2624 if (calculate_device_limits(ic))
2625 ic->sb->journal_sections = cpu_to_le32(prev_journal_sections);
2374 2626
2375 ic->provided_data_sectors |= (sector_t)1 << test_bit; 2627 }
2376 if (calculate_device_limits(ic)) 2628 if (!le32_to_cpu(ic->sb->journal_sections)) {
2377 ic->provided_data_sectors = prev_data_sectors; 2629 if (ic->log2_buffer_sectors > 3) {
2630 ic->log2_buffer_sectors--;
2631 goto try_smaller_buffer;
2632 }
2633 return -EINVAL;
2634 }
2378 } 2635 }
2379 2636
2380 if (!ic->provided_data_sectors)
2381 return -EINVAL;
2382
2383 ic->sb->provided_data_sectors = cpu_to_le64(ic->provided_data_sectors); 2637 ic->sb->provided_data_sectors = cpu_to_le64(ic->provided_data_sectors);
2384 2638
2639 sb_set_version(ic);
2640
2385 return 0; 2641 return 0;
2386} 2642}
2387 2643
@@ -2828,6 +3084,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
2828 {0, 9, "Invalid number of feature args"}, 3084 {0, 9, "Invalid number of feature args"},
2829 }; 3085 };
2830 unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec; 3086 unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec;
3087 bool recalculate;
2831 bool should_write_sb; 3088 bool should_write_sb;
2832 __u64 threshold; 3089 __u64 threshold;
2833 unsigned long long start; 3090 unsigned long long start;
@@ -2848,6 +3105,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
2848 ti->per_io_data_size = sizeof(struct dm_integrity_io); 3105 ti->per_io_data_size = sizeof(struct dm_integrity_io);
2849 3106
2850 ic->in_progress = RB_ROOT; 3107 ic->in_progress = RB_ROOT;
3108 INIT_LIST_HEAD(&ic->wait_list);
2851 init_waitqueue_head(&ic->endio_wait); 3109 init_waitqueue_head(&ic->endio_wait);
2852 bio_list_init(&ic->flush_bio_list); 3110 bio_list_init(&ic->flush_bio_list);
2853 init_waitqueue_head(&ic->copy_to_journal_wait); 3111 init_waitqueue_head(&ic->copy_to_journal_wait);
@@ -2883,13 +3141,12 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
2883 goto bad; 3141 goto bad;
2884 } 3142 }
2885 3143
2886 ic->device_sectors = i_size_read(ic->dev->bdev->bd_inode) >> SECTOR_SHIFT; 3144 journal_sectors = 0;
2887 journal_sectors = min((sector_t)DEFAULT_MAX_JOURNAL_SECTORS,
2888 ic->device_sectors >> DEFAULT_JOURNAL_SIZE_FACTOR);
2889 interleave_sectors = DEFAULT_INTERLEAVE_SECTORS; 3145 interleave_sectors = DEFAULT_INTERLEAVE_SECTORS;
2890 buffer_sectors = DEFAULT_BUFFER_SECTORS; 3146 buffer_sectors = DEFAULT_BUFFER_SECTORS;
2891 journal_watermark = DEFAULT_JOURNAL_WATERMARK; 3147 journal_watermark = DEFAULT_JOURNAL_WATERMARK;
2892 sync_msec = DEFAULT_SYNC_MSEC; 3148 sync_msec = DEFAULT_SYNC_MSEC;
3149 recalculate = false;
2893 ic->sectors_per_block = 1; 3150 ic->sectors_per_block = 1;
2894 3151
2895 as.argc = argc - DIRECT_ARGUMENTS; 3152 as.argc = argc - DIRECT_ARGUMENTS;
@@ -2908,7 +3165,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
2908 goto bad; 3165 goto bad;
2909 } 3166 }
2910 if (sscanf(opt_string, "journal_sectors:%u%c", &val, &dummy) == 1) 3167 if (sscanf(opt_string, "journal_sectors:%u%c", &val, &dummy) == 1)
2911 journal_sectors = val; 3168 journal_sectors = val ? val : 1;
2912 else if (sscanf(opt_string, "interleave_sectors:%u%c", &val, &dummy) == 1) 3169 else if (sscanf(opt_string, "interleave_sectors:%u%c", &val, &dummy) == 1)
2913 interleave_sectors = val; 3170 interleave_sectors = val;
2914 else if (sscanf(opt_string, "buffer_sectors:%u%c", &val, &dummy) == 1) 3171 else if (sscanf(opt_string, "buffer_sectors:%u%c", &val, &dummy) == 1)
@@ -2917,7 +3174,17 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
2917 journal_watermark = val; 3174 journal_watermark = val;
2918 else if (sscanf(opt_string, "commit_time:%u%c", &val, &dummy) == 1) 3175 else if (sscanf(opt_string, "commit_time:%u%c", &val, &dummy) == 1)
2919 sync_msec = val; 3176 sync_msec = val;
2920 else if (sscanf(opt_string, "block_size:%u%c", &val, &dummy) == 1) { 3177 else if (!memcmp(opt_string, "meta_device:", strlen("meta_device:"))) {
3178 if (ic->meta_dev) {
3179 dm_put_device(ti, ic->meta_dev);
3180 ic->meta_dev = NULL;
3181 }
3182 r = dm_get_device(ti, strchr(opt_string, ':') + 1, dm_table_get_mode(ti->table), &ic->meta_dev);
3183 if (r) {
3184 ti->error = "Device lookup failed";
3185 goto bad;
3186 }
3187 } else if (sscanf(opt_string, "block_size:%u%c", &val, &dummy) == 1) {
2921 if (val < 1 << SECTOR_SHIFT || 3188 if (val < 1 << SECTOR_SHIFT ||
2922 val > MAX_SECTORS_PER_BLOCK << SECTOR_SHIFT || 3189 val > MAX_SECTORS_PER_BLOCK << SECTOR_SHIFT ||
2923 (val & (val -1))) { 3190 (val & (val -1))) {
@@ -2941,6 +3208,8 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
2941 "Invalid journal_mac argument"); 3208 "Invalid journal_mac argument");
2942 if (r) 3209 if (r)
2943 goto bad; 3210 goto bad;
3211 } else if (!strcmp(opt_string, "recalculate")) {
3212 recalculate = true;
2944 } else { 3213 } else {
2945 r = -EINVAL; 3214 r = -EINVAL;
2946 ti->error = "Invalid argument"; 3215 ti->error = "Invalid argument";
@@ -2948,6 +3217,21 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
2948 } 3217 }
2949 } 3218 }
2950 3219
3220 ic->data_device_sectors = i_size_read(ic->dev->bdev->bd_inode) >> SECTOR_SHIFT;
3221 if (!ic->meta_dev)
3222 ic->meta_device_sectors = ic->data_device_sectors;
3223 else
3224 ic->meta_device_sectors = i_size_read(ic->meta_dev->bdev->bd_inode) >> SECTOR_SHIFT;
3225
3226 if (!journal_sectors) {
3227 journal_sectors = min((sector_t)DEFAULT_MAX_JOURNAL_SECTORS,
3228 ic->data_device_sectors >> DEFAULT_JOURNAL_SIZE_FACTOR);
3229 }
3230
3231 if (!buffer_sectors)
3232 buffer_sectors = 1;
3233 ic->log2_buffer_sectors = min((int)__fls(buffer_sectors), 31 - SECTOR_SHIFT);
3234
2951 r = get_mac(&ic->internal_hash, &ic->internal_hash_alg, &ti->error, 3235 r = get_mac(&ic->internal_hash, &ic->internal_hash_alg, &ti->error,
2952 "Invalid internal hash", "Error setting internal hash key"); 3236 "Invalid internal hash", "Error setting internal hash key");
2953 if (r) 3237 if (r)
@@ -3062,7 +3346,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
3062 should_write_sb = true; 3346 should_write_sb = true;
3063 } 3347 }
3064 3348
3065 if (ic->sb->version != SB_VERSION) { 3349 if (!ic->sb->version || ic->sb->version > SB_VERSION_2) {
3066 r = -EINVAL; 3350 r = -EINVAL;
3067 ti->error = "Unknown version"; 3351 ti->error = "Unknown version";
3068 goto bad; 3352 goto bad;
@@ -3083,11 +3367,19 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
3083 goto bad; 3367 goto bad;
3084 } 3368 }
3085 /* make sure that ti->max_io_len doesn't overflow */ 3369 /* make sure that ti->max_io_len doesn't overflow */
3086 if (ic->sb->log2_interleave_sectors < MIN_LOG2_INTERLEAVE_SECTORS || 3370 if (!ic->meta_dev) {
3087 ic->sb->log2_interleave_sectors > MAX_LOG2_INTERLEAVE_SECTORS) { 3371 if (ic->sb->log2_interleave_sectors < MIN_LOG2_INTERLEAVE_SECTORS ||
3088 r = -EINVAL; 3372 ic->sb->log2_interleave_sectors > MAX_LOG2_INTERLEAVE_SECTORS) {
3089 ti->error = "Invalid interleave_sectors in the superblock"; 3373 r = -EINVAL;
3090 goto bad; 3374 ti->error = "Invalid interleave_sectors in the superblock";
3375 goto bad;
3376 }
3377 } else {
3378 if (ic->sb->log2_interleave_sectors) {
3379 r = -EINVAL;
3380 ti->error = "Invalid interleave_sectors in the superblock";
3381 goto bad;
3382 }
3091 } 3383 }
3092 ic->provided_data_sectors = le64_to_cpu(ic->sb->provided_data_sectors); 3384 ic->provided_data_sectors = le64_to_cpu(ic->sb->provided_data_sectors);
3093 if (ic->provided_data_sectors != le64_to_cpu(ic->sb->provided_data_sectors)) { 3385 if (ic->provided_data_sectors != le64_to_cpu(ic->sb->provided_data_sectors)) {
@@ -3101,20 +3393,28 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
3101 ti->error = "Journal mac mismatch"; 3393 ti->error = "Journal mac mismatch";
3102 goto bad; 3394 goto bad;
3103 } 3395 }
3396
3397try_smaller_buffer:
3104 r = calculate_device_limits(ic); 3398 r = calculate_device_limits(ic);
3105 if (r) { 3399 if (r) {
3400 if (ic->meta_dev) {
3401 if (ic->log2_buffer_sectors > 3) {
3402 ic->log2_buffer_sectors--;
3403 goto try_smaller_buffer;
3404 }
3405 }
3106 ti->error = "The device is too small"; 3406 ti->error = "The device is too small";
3107 goto bad; 3407 goto bad;
3108 } 3408 }
3409 if (!ic->meta_dev)
3410 ic->log2_buffer_sectors = min(ic->log2_buffer_sectors, (__u8)__ffs(ic->metadata_run));
3411
3109 if (ti->len > ic->provided_data_sectors) { 3412 if (ti->len > ic->provided_data_sectors) {
3110 r = -EINVAL; 3413 r = -EINVAL;
3111 ti->error = "Not enough provided sectors for requested mapping size"; 3414 ti->error = "Not enough provided sectors for requested mapping size";
3112 goto bad; 3415 goto bad;
3113 } 3416 }
3114 3417
3115 if (!buffer_sectors)
3116 buffer_sectors = 1;
3117 ic->log2_buffer_sectors = min3((int)__fls(buffer_sectors), (int)__ffs(ic->metadata_run), 31 - SECTOR_SHIFT);
3118 3418
3119 threshold = (__u64)ic->journal_entries * (100 - journal_watermark); 3419 threshold = (__u64)ic->journal_entries * (100 - journal_watermark);
3120 threshold += 50; 3420 threshold += 50;
@@ -3138,8 +3438,40 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
3138 (unsigned long long)ic->provided_data_sectors); 3438 (unsigned long long)ic->provided_data_sectors);
3139 DEBUG_print(" log2_buffer_sectors %u\n", ic->log2_buffer_sectors); 3439 DEBUG_print(" log2_buffer_sectors %u\n", ic->log2_buffer_sectors);
3140 3440
3141 ic->bufio = dm_bufio_client_create(ic->dev->bdev, 1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), 3441 if (recalculate && !(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))) {
3142 1, 0, NULL, NULL); 3442 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
3443 ic->sb->recalc_sector = cpu_to_le64(0);
3444 }
3445
3446 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
3447 if (!ic->internal_hash) {
3448 r = -EINVAL;
3449 ti->error = "Recalculate is only valid with internal hash";
3450 goto bad;
3451 }
3452 ic->recalc_wq = alloc_workqueue("dm-intergrity-recalc", WQ_MEM_RECLAIM, 1);
3453 if (!ic->recalc_wq ) {
3454 ti->error = "Cannot allocate workqueue";
3455 r = -ENOMEM;
3456 goto bad;
3457 }
3458 INIT_WORK(&ic->recalc_work, integrity_recalc);
3459 ic->recalc_buffer = vmalloc(RECALC_SECTORS << SECTOR_SHIFT);
3460 if (!ic->recalc_buffer) {
3461 ti->error = "Cannot allocate buffer for recalculating";
3462 r = -ENOMEM;
3463 goto bad;
3464 }
3465 ic->recalc_tags = kvmalloc((RECALC_SECTORS >> ic->sb->log2_sectors_per_block) * ic->tag_size, GFP_KERNEL);
3466 if (!ic->recalc_tags) {
3467 ti->error = "Cannot allocate tags for recalculating";
3468 r = -ENOMEM;
3469 goto bad;
3470 }
3471 }
3472
3473 ic->bufio = dm_bufio_client_create(ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev,
3474 1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), 1, 0, NULL, NULL);
3143 if (IS_ERR(ic->bufio)) { 3475 if (IS_ERR(ic->bufio)) {
3144 r = PTR_ERR(ic->bufio); 3476 r = PTR_ERR(ic->bufio);
3145 ti->error = "Cannot initialize dm-bufio"; 3477 ti->error = "Cannot initialize dm-bufio";
@@ -3171,9 +3503,11 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
3171 ic->just_formatted = true; 3503 ic->just_formatted = true;
3172 } 3504 }
3173 3505
3174 r = dm_set_target_max_io_len(ti, 1U << ic->sb->log2_interleave_sectors); 3506 if (!ic->meta_dev) {
3175 if (r) 3507 r = dm_set_target_max_io_len(ti, 1U << ic->sb->log2_interleave_sectors);
3176 goto bad; 3508 if (r)
3509 goto bad;
3510 }
3177 3511
3178 if (!ic->internal_hash) 3512 if (!ic->internal_hash)
3179 dm_integrity_set(ti, ic); 3513 dm_integrity_set(ti, ic);
@@ -3192,6 +3526,7 @@ static void dm_integrity_dtr(struct dm_target *ti)
3192 struct dm_integrity_c *ic = ti->private; 3526 struct dm_integrity_c *ic = ti->private;
3193 3527
3194 BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); 3528 BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
3529 BUG_ON(!list_empty(&ic->wait_list));
3195 3530
3196 if (ic->metadata_wq) 3531 if (ic->metadata_wq)
3197 destroy_workqueue(ic->metadata_wq); 3532 destroy_workqueue(ic->metadata_wq);
@@ -3201,6 +3536,12 @@ static void dm_integrity_dtr(struct dm_target *ti)
3201 destroy_workqueue(ic->commit_wq); 3536 destroy_workqueue(ic->commit_wq);
3202 if (ic->writer_wq) 3537 if (ic->writer_wq)
3203 destroy_workqueue(ic->writer_wq); 3538 destroy_workqueue(ic->writer_wq);
3539 if (ic->recalc_wq)
3540 destroy_workqueue(ic->recalc_wq);
3541 if (ic->recalc_buffer)
3542 vfree(ic->recalc_buffer);
3543 if (ic->recalc_tags)
3544 kvfree(ic->recalc_tags);
3204 if (ic->bufio) 3545 if (ic->bufio)
3205 dm_bufio_client_destroy(ic->bufio); 3546 dm_bufio_client_destroy(ic->bufio);
3206 mempool_exit(&ic->journal_io_mempool); 3547 mempool_exit(&ic->journal_io_mempool);
@@ -3208,6 +3549,8 @@ static void dm_integrity_dtr(struct dm_target *ti)
3208 dm_io_client_destroy(ic->io); 3549 dm_io_client_destroy(ic->io);
3209 if (ic->dev) 3550 if (ic->dev)
3210 dm_put_device(ti, ic->dev); 3551 dm_put_device(ti, ic->dev);
3552 if (ic->meta_dev)
3553 dm_put_device(ti, ic->meta_dev);
3211 dm_integrity_free_page_list(ic, ic->journal); 3554 dm_integrity_free_page_list(ic, ic->journal);
3212 dm_integrity_free_page_list(ic, ic->journal_io); 3555 dm_integrity_free_page_list(ic, ic->journal_io);
3213 dm_integrity_free_page_list(ic, ic->journal_xor); 3556 dm_integrity_free_page_list(ic, ic->journal_xor);
@@ -3248,7 +3591,7 @@ static void dm_integrity_dtr(struct dm_target *ti)
3248 3591
3249static struct target_type integrity_target = { 3592static struct target_type integrity_target = {
3250 .name = "integrity", 3593 .name = "integrity",
3251 .version = {1, 1, 0}, 3594 .version = {1, 2, 0},
3252 .module = THIS_MODULE, 3595 .module = THIS_MODULE,
3253 .features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY, 3596 .features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY,
3254 .ctr = dm_integrity_ctr, 3597 .ctr = dm_integrity_ctr,
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 3c7547a3c371..2fc4213e02b5 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -487,6 +487,8 @@ static int run_complete_job(struct kcopyd_job *job)
487 if (atomic_dec_and_test(&kc->nr_jobs)) 487 if (atomic_dec_and_test(&kc->nr_jobs))
488 wake_up(&kc->destroyq); 488 wake_up(&kc->destroyq);
489 489
490 cond_resched();
491
490 return 0; 492 return 0;
491} 493}
492 494
@@ -741,9 +743,9 @@ static void split_job(struct kcopyd_job *master_job)
741 } 743 }
742} 744}
743 745
744int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, 746void dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
745 unsigned int num_dests, struct dm_io_region *dests, 747 unsigned int num_dests, struct dm_io_region *dests,
746 unsigned int flags, dm_kcopyd_notify_fn fn, void *context) 748 unsigned int flags, dm_kcopyd_notify_fn fn, void *context)
747{ 749{
748 struct kcopyd_job *job; 750 struct kcopyd_job *job;
749 int i; 751 int i;
@@ -818,16 +820,14 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
818 job->progress = 0; 820 job->progress = 0;
819 split_job(job); 821 split_job(job);
820 } 822 }
821
822 return 0;
823} 823}
824EXPORT_SYMBOL(dm_kcopyd_copy); 824EXPORT_SYMBOL(dm_kcopyd_copy);
825 825
826int dm_kcopyd_zero(struct dm_kcopyd_client *kc, 826void dm_kcopyd_zero(struct dm_kcopyd_client *kc,
827 unsigned num_dests, struct dm_io_region *dests, 827 unsigned num_dests, struct dm_io_region *dests,
828 unsigned flags, dm_kcopyd_notify_fn fn, void *context) 828 unsigned flags, dm_kcopyd_notify_fn fn, void *context)
829{ 829{
830 return dm_kcopyd_copy(kc, NULL, num_dests, dests, flags, fn, context); 830 dm_kcopyd_copy(kc, NULL, num_dests, dests, flags, fn, context);
831} 831}
832EXPORT_SYMBOL(dm_kcopyd_zero); 832EXPORT_SYMBOL(dm_kcopyd_zero);
833 833
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 5903e492bb34..79eab1071ec2 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -326,9 +326,8 @@ static void recovery_complete(int read_err, unsigned long write_err,
326 dm_rh_recovery_end(reg, !(read_err || write_err)); 326 dm_rh_recovery_end(reg, !(read_err || write_err));
327} 327}
328 328
329static int recover(struct mirror_set *ms, struct dm_region *reg) 329static void recover(struct mirror_set *ms, struct dm_region *reg)
330{ 330{
331 int r;
332 unsigned i; 331 unsigned i;
333 struct dm_io_region from, to[DM_KCOPYD_MAX_REGIONS], *dest; 332 struct dm_io_region from, to[DM_KCOPYD_MAX_REGIONS], *dest;
334 struct mirror *m; 333 struct mirror *m;
@@ -367,10 +366,8 @@ static int recover(struct mirror_set *ms, struct dm_region *reg)
367 if (!errors_handled(ms)) 366 if (!errors_handled(ms))
368 set_bit(DM_KCOPYD_IGNORE_ERROR, &flags); 367 set_bit(DM_KCOPYD_IGNORE_ERROR, &flags);
369 368
370 r = dm_kcopyd_copy(ms->kcopyd_client, &from, ms->nr_mirrors - 1, to, 369 dm_kcopyd_copy(ms->kcopyd_client, &from, ms->nr_mirrors - 1, to,
371 flags, recovery_complete, reg); 370 flags, recovery_complete, reg);
372
373 return r;
374} 371}
375 372
376static void reset_ms_flags(struct mirror_set *ms) 373static void reset_ms_flags(struct mirror_set *ms)
@@ -388,7 +385,6 @@ static void do_recovery(struct mirror_set *ms)
388{ 385{
389 struct dm_region *reg; 386 struct dm_region *reg;
390 struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); 387 struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
391 int r;
392 388
393 /* 389 /*
394 * Start quiescing some regions. 390 * Start quiescing some regions.
@@ -398,11 +394,8 @@ static void do_recovery(struct mirror_set *ms)
398 /* 394 /*
399 * Copy any already quiesced regions. 395 * Copy any already quiesced regions.
400 */ 396 */
401 while ((reg = dm_rh_recovery_start(ms->rh))) { 397 while ((reg = dm_rh_recovery_start(ms->rh)))
402 r = recover(ms, reg); 398 recover(ms, reg);
403 if (r)
404 dm_rh_recovery_end(reg, 0);
405 }
406 399
407 /* 400 /*
408 * Update the in sync flag. 401 * Update the in sync flag.
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 97de7a7334d4..ae4b33d10924 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -85,7 +85,7 @@ struct dm_snapshot {
85 * A list of pending exceptions that completed out of order. 85 * A list of pending exceptions that completed out of order.
86 * Protected by kcopyd single-threaded callback. 86 * Protected by kcopyd single-threaded callback.
87 */ 87 */
88 struct list_head out_of_order_list; 88 struct rb_root out_of_order_tree;
89 89
90 mempool_t pending_pool; 90 mempool_t pending_pool;
91 91
@@ -200,7 +200,7 @@ struct dm_snap_pending_exception {
200 /* A sequence number, it is used for in-order completion. */ 200 /* A sequence number, it is used for in-order completion. */
201 sector_t exception_sequence; 201 sector_t exception_sequence;
202 202
203 struct list_head out_of_order_entry; 203 struct rb_node out_of_order_node;
204 204
205 /* 205 /*
206 * For writing a complete chunk, bypassing the copy. 206 * For writing a complete chunk, bypassing the copy.
@@ -1173,7 +1173,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1173 atomic_set(&s->pending_exceptions_count, 0); 1173 atomic_set(&s->pending_exceptions_count, 0);
1174 s->exception_start_sequence = 0; 1174 s->exception_start_sequence = 0;
1175 s->exception_complete_sequence = 0; 1175 s->exception_complete_sequence = 0;
1176 INIT_LIST_HEAD(&s->out_of_order_list); 1176 s->out_of_order_tree = RB_ROOT;
1177 mutex_init(&s->lock); 1177 mutex_init(&s->lock);
1178 INIT_LIST_HEAD(&s->list); 1178 INIT_LIST_HEAD(&s->list);
1179 spin_lock_init(&s->pe_lock); 1179 spin_lock_init(&s->pe_lock);
@@ -1539,28 +1539,41 @@ static void copy_callback(int read_err, unsigned long write_err, void *context)
1539 pe->copy_error = read_err || write_err; 1539 pe->copy_error = read_err || write_err;
1540 1540
1541 if (pe->exception_sequence == s->exception_complete_sequence) { 1541 if (pe->exception_sequence == s->exception_complete_sequence) {
1542 struct rb_node *next;
1543
1542 s->exception_complete_sequence++; 1544 s->exception_complete_sequence++;
1543 complete_exception(pe); 1545 complete_exception(pe);
1544 1546
1545 while (!list_empty(&s->out_of_order_list)) { 1547 next = rb_first(&s->out_of_order_tree);
1546 pe = list_entry(s->out_of_order_list.next, 1548 while (next) {
1547 struct dm_snap_pending_exception, out_of_order_entry); 1549 pe = rb_entry(next, struct dm_snap_pending_exception,
1550 out_of_order_node);
1548 if (pe->exception_sequence != s->exception_complete_sequence) 1551 if (pe->exception_sequence != s->exception_complete_sequence)
1549 break; 1552 break;
1553 next = rb_next(next);
1550 s->exception_complete_sequence++; 1554 s->exception_complete_sequence++;
1551 list_del(&pe->out_of_order_entry); 1555 rb_erase(&pe->out_of_order_node, &s->out_of_order_tree);
1552 complete_exception(pe); 1556 complete_exception(pe);
1557 cond_resched();
1553 } 1558 }
1554 } else { 1559 } else {
1555 struct list_head *lh; 1560 struct rb_node *parent = NULL;
1561 struct rb_node **p = &s->out_of_order_tree.rb_node;
1556 struct dm_snap_pending_exception *pe2; 1562 struct dm_snap_pending_exception *pe2;
1557 1563
1558 list_for_each_prev(lh, &s->out_of_order_list) { 1564 while (*p) {
1559 pe2 = list_entry(lh, struct dm_snap_pending_exception, out_of_order_entry); 1565 pe2 = rb_entry(*p, struct dm_snap_pending_exception, out_of_order_node);
1560 if (pe2->exception_sequence < pe->exception_sequence) 1566 parent = *p;
1561 break; 1567
1568 BUG_ON(pe->exception_sequence == pe2->exception_sequence);
1569 if (pe->exception_sequence < pe2->exception_sequence)
1570 p = &((*p)->rb_left);
1571 else
1572 p = &((*p)->rb_right);
1562 } 1573 }
1563 list_add(&pe->out_of_order_entry, lh); 1574
1575 rb_link_node(&pe->out_of_order_node, parent, p);
1576 rb_insert_color(&pe->out_of_order_node, &s->out_of_order_tree);
1564 } 1577 }
1565} 1578}
1566 1579
@@ -1694,8 +1707,6 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
1694 if (!s->valid) 1707 if (!s->valid)
1695 return DM_MAPIO_KILL; 1708 return DM_MAPIO_KILL;
1696 1709
1697 /* FIXME: should only take write lock if we need
1698 * to copy an exception */
1699 mutex_lock(&s->lock); 1710 mutex_lock(&s->lock);
1700 1711
1701 if (!s->valid || (unlikely(s->snapshot_overflowed) && 1712 if (!s->valid || (unlikely(s->snapshot_overflowed) &&
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index b900723bbd0f..7bd60a150f8f 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -1220,18 +1220,13 @@ static struct dm_thin_new_mapping *get_next_mapping(struct pool *pool)
1220static void ll_zero(struct thin_c *tc, struct dm_thin_new_mapping *m, 1220static void ll_zero(struct thin_c *tc, struct dm_thin_new_mapping *m,
1221 sector_t begin, sector_t end) 1221 sector_t begin, sector_t end)
1222{ 1222{
1223 int r;
1224 struct dm_io_region to; 1223 struct dm_io_region to;
1225 1224
1226 to.bdev = tc->pool_dev->bdev; 1225 to.bdev = tc->pool_dev->bdev;
1227 to.sector = begin; 1226 to.sector = begin;
1228 to.count = end - begin; 1227 to.count = end - begin;
1229 1228
1230 r = dm_kcopyd_zero(tc->pool->copier, 1, &to, 0, copy_complete, m); 1229 dm_kcopyd_zero(tc->pool->copier, 1, &to, 0, copy_complete, m);
1231 if (r < 0) {
1232 DMERR_LIMIT("dm_kcopyd_zero() failed");
1233 copy_complete(1, 1, m);
1234 }
1235} 1230}
1236 1231
1237static void remap_and_issue_overwrite(struct thin_c *tc, struct bio *bio, 1232static void remap_and_issue_overwrite(struct thin_c *tc, struct bio *bio,
@@ -1257,7 +1252,6 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
1257 struct dm_bio_prison_cell *cell, struct bio *bio, 1252 struct dm_bio_prison_cell *cell, struct bio *bio,
1258 sector_t len) 1253 sector_t len)
1259{ 1254{
1260 int r;
1261 struct pool *pool = tc->pool; 1255 struct pool *pool = tc->pool;
1262 struct dm_thin_new_mapping *m = get_next_mapping(pool); 1256 struct dm_thin_new_mapping *m = get_next_mapping(pool);
1263 1257
@@ -1296,19 +1290,8 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
1296 to.sector = data_dest * pool->sectors_per_block; 1290 to.sector = data_dest * pool->sectors_per_block;
1297 to.count = len; 1291 to.count = len;
1298 1292
1299 r = dm_kcopyd_copy(pool->copier, &from, 1, &to, 1293 dm_kcopyd_copy(pool->copier, &from, 1, &to,
1300 0, copy_complete, m); 1294 0, copy_complete, m);
1301 if (r < 0) {
1302 DMERR_LIMIT("dm_kcopyd_copy() failed");
1303 copy_complete(1, 1, m);
1304
1305 /*
1306 * We allow the zero to be issued, to simplify the
1307 * error path. Otherwise we'd need to start
1308 * worrying about decrementing the prepare_actions
1309 * counter.
1310 */
1311 }
1312 1295
1313 /* 1296 /*
1314 * Do we need to zero a tail region? 1297 * Do we need to zero a tail region?
@@ -2520,6 +2503,8 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
2520 case PM_WRITE: 2503 case PM_WRITE:
2521 if (old_mode != new_mode) 2504 if (old_mode != new_mode)
2522 notify_of_pool_mode_change(pool, "write"); 2505 notify_of_pool_mode_change(pool, "write");
2506 if (old_mode == PM_OUT_OF_DATA_SPACE)
2507 cancel_delayed_work_sync(&pool->no_space_timeout);
2523 pool->out_of_data_space = false; 2508 pool->out_of_data_space = false;
2524 pool->pf.error_if_no_space = pt->requested_pf.error_if_no_space; 2509 pool->pf.error_if_no_space = pt->requested_pf.error_if_no_space;
2525 dm_pool_metadata_read_write(pool->pmd); 2510 dm_pool_metadata_read_write(pool->pmd);
@@ -3890,6 +3875,8 @@ static void pool_status(struct dm_target *ti, status_type_t type,
3890 else 3875 else
3891 DMEMIT("- "); 3876 DMEMIT("- ");
3892 3877
3878 DMEMIT("%llu ", (unsigned long long)calc_metadata_threshold(pt));
3879
3893 break; 3880 break;
3894 3881
3895 case STATUSTYPE_TABLE: 3882 case STATUSTYPE_TABLE:
@@ -3979,7 +3966,7 @@ static struct target_type pool_target = {
3979 .name = "thin-pool", 3966 .name = "thin-pool",
3980 .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | 3967 .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
3981 DM_TARGET_IMMUTABLE, 3968 DM_TARGET_IMMUTABLE,
3982 .version = {1, 19, 0}, 3969 .version = {1, 20, 0},
3983 .module = THIS_MODULE, 3970 .module = THIS_MODULE,
3984 .ctr = pool_ctr, 3971 .ctr = pool_ctr,
3985 .dtr = pool_dtr, 3972 .dtr = pool_dtr,
@@ -4353,7 +4340,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
4353 4340
4354static struct target_type thin_target = { 4341static struct target_type thin_target = {
4355 .name = "thin", 4342 .name = "thin",
4356 .version = {1, 19, 0}, 4343 .version = {1, 20, 0},
4357 .module = THIS_MODULE, 4344 .module = THIS_MODULE,
4358 .ctr = thin_ctr, 4345 .ctr = thin_ctr,
4359 .dtr = thin_dtr, 4346 .dtr = thin_dtr,
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 87107c995cb5..3a28a68f184c 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -457,7 +457,7 @@ static void ssd_commit_flushed(struct dm_writecache *wc)
457 COMPLETION_INITIALIZER_ONSTACK(endio.c), 457 COMPLETION_INITIALIZER_ONSTACK(endio.c),
458 ATOMIC_INIT(1), 458 ATOMIC_INIT(1),
459 }; 459 };
460 unsigned bitmap_bits = wc->dirty_bitmap_size * BITS_PER_LONG; 460 unsigned bitmap_bits = wc->dirty_bitmap_size * 8;
461 unsigned i = 0; 461 unsigned i = 0;
462 462
463 while (1) { 463 while (1) {
@@ -2240,6 +2240,8 @@ static void writecache_status(struct dm_target *ti, status_type_t type,
2240 DMEMIT("%c %s %s %u ", WC_MODE_PMEM(wc) ? 'p' : 's', 2240 DMEMIT("%c %s %s %u ", WC_MODE_PMEM(wc) ? 'p' : 's',
2241 wc->dev->name, wc->ssd_dev->name, wc->block_size); 2241 wc->dev->name, wc->ssd_dev->name, wc->block_size);
2242 extra_args = 0; 2242 extra_args = 0;
2243 if (wc->start_sector)
2244 extra_args += 2;
2243 if (wc->high_wm_percent_set) 2245 if (wc->high_wm_percent_set)
2244 extra_args += 2; 2246 extra_args += 2;
2245 if (wc->low_wm_percent_set) 2247 if (wc->low_wm_percent_set)
@@ -2254,6 +2256,8 @@ static void writecache_status(struct dm_target *ti, status_type_t type,
2254 extra_args++; 2256 extra_args++;
2255 2257
2256 DMEMIT("%u", extra_args); 2258 DMEMIT("%u", extra_args);
2259 if (wc->start_sector)
2260 DMEMIT(" start_sector %llu", (unsigned long long)wc->start_sector);
2257 if (wc->high_wm_percent_set) { 2261 if (wc->high_wm_percent_set) {
2258 x = (uint64_t)wc->freelist_high_watermark * 100; 2262 x = (uint64_t)wc->freelist_high_watermark * 100;
2259 x += wc->n_blocks / 2; 2263 x += wc->n_blocks / 2;
@@ -2280,7 +2284,7 @@ static void writecache_status(struct dm_target *ti, status_type_t type,
2280 2284
2281static struct target_type writecache_target = { 2285static struct target_type writecache_target = {
2282 .name = "writecache", 2286 .name = "writecache",
2283 .version = {1, 1, 0}, 2287 .version = {1, 1, 1},
2284 .module = THIS_MODULE, 2288 .module = THIS_MODULE,
2285 .ctr = writecache_ctr, 2289 .ctr = writecache_ctr,
2286 .dtr = writecache_dtr, 2290 .dtr = writecache_dtr,
diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c
index 44a119e12f1a..edf4b95eb075 100644
--- a/drivers/md/dm-zoned-reclaim.c
+++ b/drivers/md/dm-zoned-reclaim.c
@@ -161,10 +161,8 @@ static int dmz_reclaim_copy(struct dmz_reclaim *zrc,
161 161
162 /* Copy the valid region */ 162 /* Copy the valid region */
163 set_bit(DMZ_RECLAIM_KCOPY, &zrc->flags); 163 set_bit(DMZ_RECLAIM_KCOPY, &zrc->flags);
164 ret = dm_kcopyd_copy(zrc->kc, &src, 1, &dst, flags, 164 dm_kcopyd_copy(zrc->kc, &src, 1, &dst, flags,
165 dmz_reclaim_kcopy_end, zrc); 165 dmz_reclaim_kcopy_end, zrc);
166 if (ret)
167 return ret;
168 166
169 /* Wait for copy to complete */ 167 /* Wait for copy to complete */
170 wait_on_bit_io(&zrc->flags, DMZ_RECLAIM_KCOPY, 168 wait_on_bit_io(&zrc->flags, DMZ_RECLAIM_KCOPY,
diff --git a/include/linux/dm-kcopyd.h b/include/linux/dm-kcopyd.h
index cfac8588ed56..e42de7750c88 100644
--- a/include/linux/dm-kcopyd.h
+++ b/include/linux/dm-kcopyd.h
@@ -62,9 +62,9 @@ void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc);
62typedef void (*dm_kcopyd_notify_fn)(int read_err, unsigned long write_err, 62typedef void (*dm_kcopyd_notify_fn)(int read_err, unsigned long write_err,
63 void *context); 63 void *context);
64 64
65int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, 65void dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
66 unsigned num_dests, struct dm_io_region *dests, 66 unsigned num_dests, struct dm_io_region *dests,
67 unsigned flags, dm_kcopyd_notify_fn fn, void *context); 67 unsigned flags, dm_kcopyd_notify_fn fn, void *context);
68 68
69/* 69/*
70 * Prepare a callback and submit it via the kcopyd thread. 70 * Prepare a callback and submit it via the kcopyd thread.
@@ -81,9 +81,9 @@ void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc,
81 dm_kcopyd_notify_fn fn, void *context); 81 dm_kcopyd_notify_fn fn, void *context);
82void dm_kcopyd_do_callback(void *job, int read_err, unsigned long write_err); 82void dm_kcopyd_do_callback(void *job, int read_err, unsigned long write_err);
83 83
84int dm_kcopyd_zero(struct dm_kcopyd_client *kc, 84void dm_kcopyd_zero(struct dm_kcopyd_client *kc,
85 unsigned num_dests, struct dm_io_region *dests, 85 unsigned num_dests, struct dm_io_region *dests,
86 unsigned flags, dm_kcopyd_notify_fn fn, void *context); 86 unsigned flags, dm_kcopyd_notify_fn fn, void *context);
87 87
88#endif /* __KERNEL__ */ 88#endif /* __KERNEL__ */
89#endif /* _LINUX_DM_KCOPYD_H */ 89#endif /* _LINUX_DM_KCOPYD_H */