aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-12-28 18:02:26 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2018-12-28 18:02:26 -0500
commit4ed7bdc1eb4c82cf4bfdf6a94dd36fd695f6f387 (patch)
treec3a74a9825876fd3ffd78eb04b4035c29d6028f9 /drivers
parent5d24ae67a961c51beb255a28c9c417d9710247c2 (diff)
parentc6d6e9b0f6b4201c77f2cea3964dd122697e3543 (diff)
Merge tag 'for-4.21/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer: - Eliminate a couple indirect calls from bio-based DM core. - Fix DM to allow reads that exceed readahead limits by setting io_pages in the backing_dev_info. - A couple code cleanups in request-based DM. - Fix various DM targets to check for device sector overflow if CONFIG_LBDAF is not set. - Use u64 instead of sector_t to store iv_offset in DM crypt; sector_t isn't large enough on 32bit when CONFIG_LBDAF is not set. - Performance fixes to DM's kcopyd and the snapshot target focused on limiting memory use and workqueue stalls. - Fix typos in the integrity and writecache targets. - Log which algorithm is used for dm-crypt's encryption and dm-integrity's hashing. - Fix false -EBUSY errors in DM raid target's handling of check/repair messages. - Fix DM flakey target's corrupt_bio_byte feature to reliably corrupt the Nth byte in a bio's payload. * tag 'for-4.21/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: dm: do not allow readahead to limit IO size dm raid: fix false -EBUSY when handling check/repair message dm rq: cleanup leftover code from recently removed q->mq_ops branching dm verity: log the hash algorithm implementation dm crypt: log the encryption algorithm implementation dm integrity: fix spelling mistake in workqueue name dm flakey: Properly corrupt multi-page bios. dm: Check for device sector overflow if CONFIG_LBDAF is not set dm crypt: use u64 instead of sector_t to store iv_offset dm kcopyd: Fix bug causing workqueue stalls dm snapshot: Fix excessive memory usage and workqueue stalls dm bufio: update comment in dm-bufio.c dm writecache: fix typo in error msg for creating writecache_flush_thread dm: remove indirect calls from __send_changing_extent_only() dm mpath: only flush workqueue when needed dm rq: remove unused arguments from rq_completed() dm: avoid indirect call in __dm_make_request
Diffstat (limited to 'drivers')
-rw-r--r--drivers/md/dm-bufio.c12
-rw-r--r--drivers/md/dm-crypt.c17
-rw-r--r--drivers/md/dm-delay.c2
-rw-r--r--drivers/md/dm-flakey.c35
-rw-r--r--drivers/md/dm-integrity.c2
-rw-r--r--drivers/md/dm-kcopyd.c19
-rw-r--r--drivers/md/dm-linear.c2
-rw-r--r--drivers/md/dm-mpath.c6
-rw-r--r--drivers/md/dm-raid.c3
-rw-r--r--drivers/md/dm-raid1.c3
-rw-r--r--drivers/md/dm-rq.c18
-rw-r--r--drivers/md/dm-snap.c22
-rw-r--r--drivers/md/dm-table.c3
-rw-r--r--drivers/md/dm-unstripe.c2
-rw-r--r--drivers/md/dm-verity-target.c9
-rw-r--r--drivers/md/dm-writecache.c2
-rw-r--r--drivers/md/dm.c46
17 files changed, 121 insertions, 82 deletions
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index dc385b70e4c3..0e9fcceaefd2 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -65,7 +65,7 @@
65 65
66/* 66/*
67 * Linking of buffers: 67 * Linking of buffers:
68 * All buffers are linked to cache_hash with their hash_list field. 68 * All buffers are linked to buffer_tree with their node field.
69 * 69 *
70 * Clean buffers that are not being written (B_WRITING not set) 70 * Clean buffers that are not being written (B_WRITING not set)
71 * are linked to lru[LIST_CLEAN] with their lru_list field. 71 * are linked to lru[LIST_CLEAN] with their lru_list field.
@@ -457,7 +457,7 @@ static void free_buffer(struct dm_buffer *b)
457} 457}
458 458
459/* 459/*
460 * Link buffer to the hash list and clean or dirty queue. 460 * Link buffer to the buffer tree and clean or dirty queue.
461 */ 461 */
462static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty) 462static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty)
463{ 463{
@@ -472,7 +472,7 @@ static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty)
472} 472}
473 473
474/* 474/*
475 * Unlink buffer from the hash list and dirty or clean queue. 475 * Unlink buffer from the buffer tree and dirty or clean queue.
476 */ 476 */
477static void __unlink_buffer(struct dm_buffer *b) 477static void __unlink_buffer(struct dm_buffer *b)
478{ 478{
@@ -993,7 +993,7 @@ static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block,
993 993
994 /* 994 /*
995 * We've had a period where the mutex was unlocked, so need to 995 * We've had a period where the mutex was unlocked, so need to
996 * recheck the hash table. 996 * recheck the buffer tree.
997 */ 997 */
998 b = __find(c, block); 998 b = __find(c, block);
999 if (b) { 999 if (b) {
@@ -1327,7 +1327,7 @@ again:
1327EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers); 1327EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers);
1328 1328
1329/* 1329/*
1330 * Use dm-io to send and empty barrier flush the device. 1330 * Use dm-io to send an empty barrier to flush the device.
1331 */ 1331 */
1332int dm_bufio_issue_flush(struct dm_bufio_client *c) 1332int dm_bufio_issue_flush(struct dm_bufio_client *c)
1333{ 1333{
@@ -1356,7 +1356,7 @@ EXPORT_SYMBOL_GPL(dm_bufio_issue_flush);
1356 * Then, we write the buffer to the original location if it was dirty. 1356 * Then, we write the buffer to the original location if it was dirty.
1357 * 1357 *
1358 * Then, if we are the only one who is holding the buffer, relink the buffer 1358 * Then, if we are the only one who is holding the buffer, relink the buffer
1359 * in the hash queue for the new location. 1359 * in the buffer tree for the new location.
1360 * 1360 *
1361 * If there was someone else holding the buffer, we write it to the new 1361 * If there was someone else holding the buffer, we write it to the new
1362 * location but not relink it, because that other user needs to have the buffer 1362 * location but not relink it, because that other user needs to have the buffer
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index a7195eb5b8d8..1ea73ace9b9e 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -49,7 +49,7 @@ struct convert_context {
49 struct bio *bio_out; 49 struct bio *bio_out;
50 struct bvec_iter iter_in; 50 struct bvec_iter iter_in;
51 struct bvec_iter iter_out; 51 struct bvec_iter iter_out;
52 sector_t cc_sector; 52 u64 cc_sector;
53 atomic_t cc_pending; 53 atomic_t cc_pending;
54 union { 54 union {
55 struct skcipher_request *req; 55 struct skcipher_request *req;
@@ -81,7 +81,7 @@ struct dm_crypt_request {
81 struct convert_context *ctx; 81 struct convert_context *ctx;
82 struct scatterlist sg_in[4]; 82 struct scatterlist sg_in[4];
83 struct scatterlist sg_out[4]; 83 struct scatterlist sg_out[4];
84 sector_t iv_sector; 84 u64 iv_sector;
85}; 85};
86 86
87struct crypt_config; 87struct crypt_config;
@@ -160,7 +160,7 @@ struct crypt_config {
160 struct iv_lmk_private lmk; 160 struct iv_lmk_private lmk;
161 struct iv_tcw_private tcw; 161 struct iv_tcw_private tcw;
162 } iv_gen_private; 162 } iv_gen_private;
163 sector_t iv_offset; 163 u64 iv_offset;
164 unsigned int iv_size; 164 unsigned int iv_size;
165 unsigned short int sector_size; 165 unsigned short int sector_size;
166 unsigned char sector_shift; 166 unsigned char sector_shift;
@@ -1885,6 +1885,13 @@ static int crypt_alloc_tfms_skcipher(struct crypt_config *cc, char *ciphermode)
1885 } 1885 }
1886 } 1886 }
1887 1887
1888 /*
1889 * dm-crypt performance can vary greatly depending on which crypto
1890 * algorithm implementation is used. Help people debug performance
1891 * problems by logging the ->cra_driver_name.
1892 */
1893 DMINFO("%s using implementation \"%s\"", ciphermode,
1894 crypto_skcipher_alg(any_tfm(cc))->base.cra_driver_name);
1888 return 0; 1895 return 0;
1889} 1896}
1890 1897
@@ -1903,6 +1910,8 @@ static int crypt_alloc_tfms_aead(struct crypt_config *cc, char *ciphermode)
1903 return err; 1910 return err;
1904 } 1911 }
1905 1912
1913 DMINFO("%s using implementation \"%s\"", ciphermode,
1914 crypto_aead_alg(any_tfm_aead(cc))->base.cra_driver_name);
1906 return 0; 1915 return 0;
1907} 1916}
1908 1917
@@ -2781,7 +2790,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
2781 } 2790 }
2782 2791
2783 ret = -EINVAL; 2792 ret = -EINVAL;
2784 if (sscanf(argv[4], "%llu%c", &tmpll, &dummy) != 1) { 2793 if (sscanf(argv[4], "%llu%c", &tmpll, &dummy) != 1 || tmpll != (sector_t)tmpll) {
2785 ti->error = "Invalid device sector"; 2794 ti->error = "Invalid device sector";
2786 goto bad; 2795 goto bad;
2787 } 2796 }
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 2fb7bb4304ad..fddffe251bf6 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -141,7 +141,7 @@ static int delay_class_ctr(struct dm_target *ti, struct delay_class *c, char **a
141 unsigned long long tmpll; 141 unsigned long long tmpll;
142 char dummy; 142 char dummy;
143 143
144 if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1) { 144 if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1 || tmpll != (sector_t)tmpll) {
145 ti->error = "Invalid device sector"; 145 ti->error = "Invalid device sector";
146 return -EINVAL; 146 return -EINVAL;
147 } 147 }
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 3cb97fa4c11d..a9bc518156f2 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -213,7 +213,7 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv)
213 devname = dm_shift_arg(&as); 213 devname = dm_shift_arg(&as);
214 214
215 r = -EINVAL; 215 r = -EINVAL;
216 if (sscanf(dm_shift_arg(&as), "%llu%c", &tmpll, &dummy) != 1) { 216 if (sscanf(dm_shift_arg(&as), "%llu%c", &tmpll, &dummy) != 1 || tmpll != (sector_t)tmpll) {
217 ti->error = "Invalid device sector"; 217 ti->error = "Invalid device sector";
218 goto bad; 218 goto bad;
219 } 219 }
@@ -287,20 +287,31 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio)
287 287
288static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc) 288static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
289{ 289{
290 unsigned bio_bytes = bio_cur_bytes(bio); 290 unsigned int corrupt_bio_byte = fc->corrupt_bio_byte - 1;
291 char *data = bio_data(bio); 291
292 struct bvec_iter iter;
293 struct bio_vec bvec;
294
295 if (!bio_has_data(bio))
296 return;
292 297
293 /* 298 /*
294 * Overwrite the Nth byte of the data returned. 299 * Overwrite the Nth byte of the bio's data, on whichever page
300 * it falls.
295 */ 301 */
296 if (data && bio_bytes >= fc->corrupt_bio_byte) { 302 bio_for_each_segment(bvec, bio, iter) {
297 data[fc->corrupt_bio_byte - 1] = fc->corrupt_bio_value; 303 if (bio_iter_len(bio, iter) > corrupt_bio_byte) {
298 304 char *segment = (page_address(bio_iter_page(bio, iter))
299 DMDEBUG("Corrupting data bio=%p by writing %u to byte %u " 305 + bio_iter_offset(bio, iter));
300 "(rw=%c bi_opf=%u bi_sector=%llu cur_bytes=%u)\n", 306 segment[corrupt_bio_byte] = fc->corrupt_bio_value;
301 bio, fc->corrupt_bio_value, fc->corrupt_bio_byte, 307 DMDEBUG("Corrupting data bio=%p by writing %u to byte %u "
302 (bio_data_dir(bio) == WRITE) ? 'w' : 'r', bio->bi_opf, 308 "(rw=%c bi_opf=%u bi_sector=%llu size=%u)\n",
303 (unsigned long long)bio->bi_iter.bi_sector, bio_bytes); 309 bio, fc->corrupt_bio_value, fc->corrupt_bio_byte,
310 (bio_data_dir(bio) == WRITE) ? 'w' : 'r', bio->bi_opf,
311 (unsigned long long)bio->bi_iter.bi_sector, bio->bi_iter.bi_size);
312 break;
313 }
314 corrupt_bio_byte -= bio_iter_len(bio, iter);
304 } 315 }
305} 316}
306 317
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index d4ad0bfee251..2b27abfa428d 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -3460,7 +3460,7 @@ try_smaller_buffer:
3460 ti->error = "Recalculate is only valid with internal hash"; 3460 ti->error = "Recalculate is only valid with internal hash";
3461 goto bad; 3461 goto bad;
3462 } 3462 }
3463 ic->recalc_wq = alloc_workqueue("dm-intergrity-recalc", WQ_MEM_RECLAIM, 1); 3463 ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1);
3464 if (!ic->recalc_wq ) { 3464 if (!ic->recalc_wq ) {
3465 ti->error = "Cannot allocate workqueue"; 3465 ti->error = "Cannot allocate workqueue";
3466 r = -ENOMEM; 3466 r = -ENOMEM;
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 2fc4213e02b5..671c24332802 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -56,15 +56,17 @@ struct dm_kcopyd_client {
56 atomic_t nr_jobs; 56 atomic_t nr_jobs;
57 57
58/* 58/*
59 * We maintain three lists of jobs: 59 * We maintain four lists of jobs:
60 * 60 *
61 * i) jobs waiting for pages 61 * i) jobs waiting for pages
62 * ii) jobs that have pages, and are waiting for the io to be issued. 62 * ii) jobs that have pages, and are waiting for the io to be issued.
63 * iii) jobs that have completed. 63 * iii) jobs that don't need to do any IO and just run a callback
64 * iv) jobs that have completed.
64 * 65 *
65 * All three of these are protected by job_lock. 66 * All four of these are protected by job_lock.
66 */ 67 */
67 spinlock_t job_lock; 68 spinlock_t job_lock;
69 struct list_head callback_jobs;
68 struct list_head complete_jobs; 70 struct list_head complete_jobs;
69 struct list_head io_jobs; 71 struct list_head io_jobs;
70 struct list_head pages_jobs; 72 struct list_head pages_jobs;
@@ -625,6 +627,7 @@ static void do_work(struct work_struct *work)
625 struct dm_kcopyd_client *kc = container_of(work, 627 struct dm_kcopyd_client *kc = container_of(work,
626 struct dm_kcopyd_client, kcopyd_work); 628 struct dm_kcopyd_client, kcopyd_work);
627 struct blk_plug plug; 629 struct blk_plug plug;
630 unsigned long flags;
628 631
629 /* 632 /*
630 * The order that these are called is *very* important. 633 * The order that these are called is *very* important.
@@ -633,6 +636,10 @@ static void do_work(struct work_struct *work)
633 * list. io jobs call wake when they complete and it all 636 * list. io jobs call wake when they complete and it all
634 * starts again. 637 * starts again.
635 */ 638 */
639 spin_lock_irqsave(&kc->job_lock, flags);
640 list_splice_tail_init(&kc->callback_jobs, &kc->complete_jobs);
641 spin_unlock_irqrestore(&kc->job_lock, flags);
642
636 blk_start_plug(&plug); 643 blk_start_plug(&plug);
637 process_jobs(&kc->complete_jobs, kc, run_complete_job); 644 process_jobs(&kc->complete_jobs, kc, run_complete_job);
638 process_jobs(&kc->pages_jobs, kc, run_pages_job); 645 process_jobs(&kc->pages_jobs, kc, run_pages_job);
@@ -650,7 +657,7 @@ static void dispatch_job(struct kcopyd_job *job)
650 struct dm_kcopyd_client *kc = job->kc; 657 struct dm_kcopyd_client *kc = job->kc;
651 atomic_inc(&kc->nr_jobs); 658 atomic_inc(&kc->nr_jobs);
652 if (unlikely(!job->source.count)) 659 if (unlikely(!job->source.count))
653 push(&kc->complete_jobs, job); 660 push(&kc->callback_jobs, job);
654 else if (job->pages == &zero_page_list) 661 else if (job->pages == &zero_page_list)
655 push(&kc->io_jobs, job); 662 push(&kc->io_jobs, job);
656 else 663 else
@@ -858,7 +865,7 @@ void dm_kcopyd_do_callback(void *j, int read_err, unsigned long write_err)
858 job->read_err = read_err; 865 job->read_err = read_err;
859 job->write_err = write_err; 866 job->write_err = write_err;
860 867
861 push(&kc->complete_jobs, job); 868 push(&kc->callback_jobs, job);
862 wake(kc); 869 wake(kc);
863} 870}
864EXPORT_SYMBOL(dm_kcopyd_do_callback); 871EXPORT_SYMBOL(dm_kcopyd_do_callback);
@@ -888,6 +895,7 @@ struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *thro
888 return ERR_PTR(-ENOMEM); 895 return ERR_PTR(-ENOMEM);
889 896
890 spin_lock_init(&kc->job_lock); 897 spin_lock_init(&kc->job_lock);
898 INIT_LIST_HEAD(&kc->callback_jobs);
891 INIT_LIST_HEAD(&kc->complete_jobs); 899 INIT_LIST_HEAD(&kc->complete_jobs);
892 INIT_LIST_HEAD(&kc->io_jobs); 900 INIT_LIST_HEAD(&kc->io_jobs);
893 INIT_LIST_HEAD(&kc->pages_jobs); 901 INIT_LIST_HEAD(&kc->pages_jobs);
@@ -939,6 +947,7 @@ void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc)
939 /* Wait for completion of all jobs submitted by this client. */ 947 /* Wait for completion of all jobs submitted by this client. */
940 wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs)); 948 wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs));
941 949
950 BUG_ON(!list_empty(&kc->callback_jobs));
942 BUG_ON(!list_empty(&kc->complete_jobs)); 951 BUG_ON(!list_empty(&kc->complete_jobs));
943 BUG_ON(!list_empty(&kc->io_jobs)); 952 BUG_ON(!list_empty(&kc->io_jobs));
944 BUG_ON(!list_empty(&kc->pages_jobs)); 953 BUG_ON(!list_empty(&kc->pages_jobs));
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 8d7ddee6ac4d..ad980a38fb1e 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -45,7 +45,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
45 } 45 }
46 46
47 ret = -EINVAL; 47 ret = -EINVAL;
48 if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1) { 48 if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1 || tmp != (sector_t)tmp) {
49 ti->error = "Invalid device sector"; 49 ti->error = "Invalid device sector";
50 goto bad; 50 goto bad;
51 } 51 }
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index d6a66921daf4..2ee5e357a0a7 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1211,14 +1211,16 @@ static void flush_multipath_work(struct multipath *m)
1211 set_bit(MPATHF_PG_INIT_DISABLED, &m->flags); 1211 set_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
1212 smp_mb__after_atomic(); 1212 smp_mb__after_atomic();
1213 1213
1214 flush_workqueue(kmpath_handlerd); 1214 if (atomic_read(&m->pg_init_in_progress))
1215 flush_workqueue(kmpath_handlerd);
1215 multipath_wait_for_pg_init_completion(m); 1216 multipath_wait_for_pg_init_completion(m);
1216 1217
1217 clear_bit(MPATHF_PG_INIT_DISABLED, &m->flags); 1218 clear_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
1218 smp_mb__after_atomic(); 1219 smp_mb__after_atomic();
1219 } 1220 }
1220 1221
1221 flush_workqueue(kmultipathd); 1222 if (m->queue_mode == DM_TYPE_BIO_BASED)
1223 flush_work(&m->process_queued_bios);
1222 flush_work(&m->trigger_event); 1224 flush_work(&m->trigger_event);
1223} 1225}
1224 1226
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index e1dd1622a290..adcfe8ae10aa 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3690,8 +3690,7 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
3690 set_bit(MD_RECOVERY_INTR, &mddev->recovery); 3690 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
3691 md_reap_sync_thread(mddev); 3691 md_reap_sync_thread(mddev);
3692 } 3692 }
3693 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || 3693 } else if (decipher_sync_action(mddev, mddev->recovery) != st_idle)
3694 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
3695 return -EBUSY; 3694 return -EBUSY;
3696 else if (!strcasecmp(argv[0], "resync")) 3695 else if (!strcasecmp(argv[0], "resync"))
3697 ; /* MD_RECOVERY_NEEDED set below */ 3696 ; /* MD_RECOVERY_NEEDED set below */
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 79eab1071ec2..5a51151f680d 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -943,7 +943,8 @@ static int get_mirror(struct mirror_set *ms, struct dm_target *ti,
943 char dummy; 943 char dummy;
944 int ret; 944 int ret;
945 945
946 if (sscanf(argv[1], "%llu%c", &offset, &dummy) != 1) { 946 if (sscanf(argv[1], "%llu%c", &offset, &dummy) != 1 ||
947 offset != (sector_t)offset) {
947 ti->error = "Invalid offset"; 948 ti->error = "Invalid offset";
948 return -EINVAL; 949 return -EINVAL;
949 } 950 }
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 4e06be4f0a62..4eb5f8c56535 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -128,7 +128,7 @@ static void rq_end_stats(struct mapped_device *md, struct request *orig)
128 * the md may be freed in dm_put() at the end of this function. 128 * the md may be freed in dm_put() at the end of this function.
129 * Or do dm_get() before calling this function and dm_put() later. 129 * Or do dm_get() before calling this function and dm_put() later.
130 */ 130 */
131static void rq_completed(struct mapped_device *md, int rw, bool run_queue) 131static void rq_completed(struct mapped_device *md)
132{ 132{
133 /* nudge anyone waiting on suspend queue */ 133 /* nudge anyone waiting on suspend queue */
134 if (unlikely(waitqueue_active(&md->wait))) 134 if (unlikely(waitqueue_active(&md->wait)))
@@ -147,7 +147,6 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
147 */ 147 */
148static void dm_end_request(struct request *clone, blk_status_t error) 148static void dm_end_request(struct request *clone, blk_status_t error)
149{ 149{
150 int rw = rq_data_dir(clone);
151 struct dm_rq_target_io *tio = clone->end_io_data; 150 struct dm_rq_target_io *tio = clone->end_io_data;
152 struct mapped_device *md = tio->md; 151 struct mapped_device *md = tio->md;
153 struct request *rq = tio->orig; 152 struct request *rq = tio->orig;
@@ -157,7 +156,7 @@ static void dm_end_request(struct request *clone, blk_status_t error)
157 156
158 rq_end_stats(md, rq); 157 rq_end_stats(md, rq);
159 blk_mq_end_request(rq, error); 158 blk_mq_end_request(rq, error);
160 rq_completed(md, rw, true); 159 rq_completed(md);
161} 160}
162 161
163static void __dm_mq_kick_requeue_list(struct request_queue *q, unsigned long msecs) 162static void __dm_mq_kick_requeue_list(struct request_queue *q, unsigned long msecs)
@@ -181,7 +180,6 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_
181{ 180{
182 struct mapped_device *md = tio->md; 181 struct mapped_device *md = tio->md;
183 struct request *rq = tio->orig; 182 struct request *rq = tio->orig;
184 int rw = rq_data_dir(rq);
185 unsigned long delay_ms = delay_requeue ? 100 : 0; 183 unsigned long delay_ms = delay_requeue ? 100 : 0;
186 184
187 rq_end_stats(md, rq); 185 rq_end_stats(md, rq);
@@ -191,7 +189,7 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_
191 } 189 }
192 190
193 dm_mq_delay_requeue_request(rq, delay_ms); 191 dm_mq_delay_requeue_request(rq, delay_ms);
194 rq_completed(md, rw, false); 192 rq_completed(md);
195} 193}
196 194
197static void dm_done(struct request *clone, blk_status_t error, bool mapped) 195static void dm_done(struct request *clone, blk_status_t error, bool mapped)
@@ -246,15 +244,13 @@ static void dm_softirq_done(struct request *rq)
246 bool mapped = true; 244 bool mapped = true;
247 struct dm_rq_target_io *tio = tio_from_request(rq); 245 struct dm_rq_target_io *tio = tio_from_request(rq);
248 struct request *clone = tio->clone; 246 struct request *clone = tio->clone;
249 int rw;
250 247
251 if (!clone) { 248 if (!clone) {
252 struct mapped_device *md = tio->md; 249 struct mapped_device *md = tio->md;
253 250
254 rq_end_stats(md, rq); 251 rq_end_stats(md, rq);
255 rw = rq_data_dir(rq);
256 blk_mq_end_request(rq, tio->error); 252 blk_mq_end_request(rq, tio->error);
257 rq_completed(md, rw, false); 253 rq_completed(md);
258 return; 254 return;
259 } 255 }
260 256
@@ -376,7 +372,6 @@ static int map_request(struct dm_rq_target_io *tio)
376 blk_status_t ret; 372 blk_status_t ret;
377 373
378 r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone); 374 r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
379check_again:
380 switch (r) { 375 switch (r) {
381 case DM_MAPIO_SUBMITTED: 376 case DM_MAPIO_SUBMITTED:
382 /* The target has taken the I/O to submit by itself later */ 377 /* The target has taken the I/O to submit by itself later */
@@ -396,8 +391,7 @@ check_again:
396 blk_rq_unprep_clone(clone); 391 blk_rq_unprep_clone(clone);
397 tio->ti->type->release_clone_rq(clone); 392 tio->ti->type->release_clone_rq(clone);
398 tio->clone = NULL; 393 tio->clone = NULL;
399 r = DM_MAPIO_REQUEUE; 394 return DM_MAPIO_REQUEUE;
400 goto check_again;
401 } 395 }
402 break; 396 break;
403 case DM_MAPIO_REQUEUE: 397 case DM_MAPIO_REQUEUE:
@@ -507,7 +501,7 @@ static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
507 if (map_request(tio) == DM_MAPIO_REQUEUE) { 501 if (map_request(tio) == DM_MAPIO_REQUEUE) {
508 /* Undo dm_start_request() before requeuing */ 502 /* Undo dm_start_request() before requeuing */
509 rq_end_stats(md, rq); 503 rq_end_stats(md, rq);
510 rq_completed(md, rq_data_dir(rq), false); 504 rq_completed(md);
511 return BLK_STS_RESOURCE; 505 return BLK_STS_RESOURCE;
512 } 506 }
513 507
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index ae4b33d10924..36805b12661e 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -19,6 +19,7 @@
19#include <linux/vmalloc.h> 19#include <linux/vmalloc.h>
20#include <linux/log2.h> 20#include <linux/log2.h>
21#include <linux/dm-kcopyd.h> 21#include <linux/dm-kcopyd.h>
22#include <linux/semaphore.h>
22 23
23#include "dm.h" 24#include "dm.h"
24 25
@@ -105,6 +106,9 @@ struct dm_snapshot {
105 /* The on disk metadata handler */ 106 /* The on disk metadata handler */
106 struct dm_exception_store *store; 107 struct dm_exception_store *store;
107 108
109 /* Maximum number of in-flight COW jobs. */
110 struct semaphore cow_count;
111
108 struct dm_kcopyd_client *kcopyd_client; 112 struct dm_kcopyd_client *kcopyd_client;
109 113
110 /* Wait for events based on state_bits */ 114 /* Wait for events based on state_bits */
@@ -145,6 +149,19 @@ struct dm_snapshot {
145#define RUNNING_MERGE 0 149#define RUNNING_MERGE 0
146#define SHUTDOWN_MERGE 1 150#define SHUTDOWN_MERGE 1
147 151
152/*
153 * Maximum number of chunks being copied on write.
154 *
155 * The value was decided experimentally as a trade-off between memory
156 * consumption, stalling the kernel's workqueues and maintaining a high enough
157 * throughput.
158 */
159#define DEFAULT_COW_THRESHOLD 2048
160
161static int cow_threshold = DEFAULT_COW_THRESHOLD;
162module_param_named(snapshot_cow_threshold, cow_threshold, int, 0644);
163MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write");
164
148DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, 165DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle,
149 "A percentage of time allocated for copy on write"); 166 "A percentage of time allocated for copy on write");
150 167
@@ -1190,6 +1207,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1190 goto bad_hash_tables; 1207 goto bad_hash_tables;
1191 } 1208 }
1192 1209
1210 sema_init(&s->cow_count, (cow_threshold > 0) ? cow_threshold : INT_MAX);
1211
1193 s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle); 1212 s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle);
1194 if (IS_ERR(s->kcopyd_client)) { 1213 if (IS_ERR(s->kcopyd_client)) {
1195 r = PTR_ERR(s->kcopyd_client); 1214 r = PTR_ERR(s->kcopyd_client);
@@ -1575,6 +1594,7 @@ static void copy_callback(int read_err, unsigned long write_err, void *context)
1575 rb_link_node(&pe->out_of_order_node, parent, p); 1594 rb_link_node(&pe->out_of_order_node, parent, p);
1576 rb_insert_color(&pe->out_of_order_node, &s->out_of_order_tree); 1595 rb_insert_color(&pe->out_of_order_node, &s->out_of_order_tree);
1577 } 1596 }
1597 up(&s->cow_count);
1578} 1598}
1579 1599
1580/* 1600/*
@@ -1598,6 +1618,7 @@ static void start_copy(struct dm_snap_pending_exception *pe)
1598 dest.count = src.count; 1618 dest.count = src.count;
1599 1619
1600 /* Hand over to kcopyd */ 1620 /* Hand over to kcopyd */
1621 down(&s->cow_count);
1601 dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe); 1622 dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe);
1602} 1623}
1603 1624
@@ -1617,6 +1638,7 @@ static void start_full_bio(struct dm_snap_pending_exception *pe,
1617 pe->full_bio = bio; 1638 pe->full_bio = bio;
1618 pe->full_bio_end_io = bio->bi_end_io; 1639 pe->full_bio_end_io = bio->bi_end_io;
1619 1640
1641 down(&s->cow_count);
1620 callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client, 1642 callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client,
1621 copy_callback, pe); 1643 copy_callback, pe);
1622 1644
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 844f7d0f2ef8..4b1be754cc41 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1927,6 +1927,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
1927 */ 1927 */
1928 if (blk_queue_is_zoned(q)) 1928 if (blk_queue_is_zoned(q))
1929 blk_revalidate_disk_zones(t->md->disk); 1929 blk_revalidate_disk_zones(t->md->disk);
1930
1931 /* Allow reads to exceed readahead limits */
1932 q->backing_dev_info->io_pages = limits->max_sectors >> (PAGE_SHIFT - 9);
1930} 1933}
1931 1934
1932unsigned int dm_table_get_num_targets(struct dm_table *t) 1935unsigned int dm_table_get_num_targets(struct dm_table *t)
diff --git a/drivers/md/dm-unstripe.c b/drivers/md/dm-unstripe.c
index 954b7ab4e684..e673dacf6418 100644
--- a/drivers/md/dm-unstripe.c
+++ b/drivers/md/dm-unstripe.c
@@ -78,7 +78,7 @@ static int unstripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
78 goto err; 78 goto err;
79 } 79 }
80 80
81 if (sscanf(argv[4], "%llu%c", &start, &dummy) != 1) { 81 if (sscanf(argv[4], "%llu%c", &start, &dummy) != 1 || start != (sector_t)start) {
82 ti->error = "Invalid striped device offset"; 82 ti->error = "Invalid striped device offset";
83 goto err; 83 goto err;
84 } 84 }
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index fc65f0dedf7f..f4c31ffaa88e 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -1040,6 +1040,15 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
1040 v->tfm = NULL; 1040 v->tfm = NULL;
1041 goto bad; 1041 goto bad;
1042 } 1042 }
1043
1044 /*
1045 * dm-verity performance can vary greatly depending on which hash
1046 * algorithm implementation is used. Help people debug performance
1047 * problems by logging the ->cra_driver_name.
1048 */
1049 DMINFO("%s using implementation \"%s\"", v->alg_name,
1050 crypto_hash_alg_common(v->tfm)->base.cra_driver_name);
1051
1043 v->digest_size = crypto_ahash_digestsize(v->tfm); 1052 v->digest_size = crypto_ahash_digestsize(v->tfm);
1044 if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) { 1053 if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) {
1045 ti->error = "Digest size too big"; 1054 ti->error = "Digest size too big";
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 2d50eec94cd7..2b8cee35e4d5 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -2061,7 +2061,7 @@ invalid_optional:
2061 if (IS_ERR(wc->flush_thread)) { 2061 if (IS_ERR(wc->flush_thread)) {
2062 r = PTR_ERR(wc->flush_thread); 2062 r = PTR_ERR(wc->flush_thread);
2063 wc->flush_thread = NULL; 2063 wc->flush_thread = NULL;
2064 ti->error = "Couldn't spawn endio thread"; 2064 ti->error = "Couldn't spawn flush thread";
2065 goto bad; 2065 goto bad;
2066 } 2066 }
2067 wake_up_process(wc->flush_thread); 2067 wake_up_process(wc->flush_thread);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index a4a06982ed91..d67c95ef8d7e 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1486,11 +1486,9 @@ static bool is_split_required_for_discard(struct dm_target *ti)
1486} 1486}
1487 1487
1488static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti, 1488static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti,
1489 get_num_bios_fn get_num_bios, 1489 unsigned num_bios, bool is_split_required)
1490 is_split_required_fn is_split_required)
1491{ 1490{
1492 unsigned len; 1491 unsigned len;
1493 unsigned num_bios;
1494 1492
1495 /* 1493 /*
1496 * Even though the device advertised support for this type of 1494 * Even though the device advertised support for this type of
@@ -1498,11 +1496,10 @@ static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *
1498 * reconfiguration might also have changed that since the 1496 * reconfiguration might also have changed that since the
1499 * check was performed. 1497 * check was performed.
1500 */ 1498 */
1501 num_bios = get_num_bios ? get_num_bios(ti) : 0;
1502 if (!num_bios) 1499 if (!num_bios)
1503 return -EOPNOTSUPP; 1500 return -EOPNOTSUPP;
1504 1501
1505 if (is_split_required && !is_split_required(ti)) 1502 if (!is_split_required)
1506 len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); 1503 len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti));
1507 else 1504 else
1508 len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti)); 1505 len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti));
@@ -1517,23 +1514,23 @@ static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *
1517 1514
1518static int __send_discard(struct clone_info *ci, struct dm_target *ti) 1515static int __send_discard(struct clone_info *ci, struct dm_target *ti)
1519{ 1516{
1520 return __send_changing_extent_only(ci, ti, get_num_discard_bios, 1517 return __send_changing_extent_only(ci, ti, get_num_discard_bios(ti),
1521 is_split_required_for_discard); 1518 is_split_required_for_discard(ti));
1522} 1519}
1523 1520
1524static int __send_secure_erase(struct clone_info *ci, struct dm_target *ti) 1521static int __send_secure_erase(struct clone_info *ci, struct dm_target *ti)
1525{ 1522{
1526 return __send_changing_extent_only(ci, ti, get_num_secure_erase_bios, NULL); 1523 return __send_changing_extent_only(ci, ti, get_num_secure_erase_bios(ti), false);
1527} 1524}
1528 1525
1529static int __send_write_same(struct clone_info *ci, struct dm_target *ti) 1526static int __send_write_same(struct clone_info *ci, struct dm_target *ti)
1530{ 1527{
1531 return __send_changing_extent_only(ci, ti, get_num_write_same_bios, NULL); 1528 return __send_changing_extent_only(ci, ti, get_num_write_same_bios(ti), false);
1532} 1529}
1533 1530
1534static int __send_write_zeroes(struct clone_info *ci, struct dm_target *ti) 1531static int __send_write_zeroes(struct clone_info *ci, struct dm_target *ti)
1535{ 1532{
1536 return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios, NULL); 1533 return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios(ti), false);
1537} 1534}
1538 1535
1539static bool __process_abnormal_io(struct clone_info *ci, struct dm_target *ti, 1536static bool __process_abnormal_io(struct clone_info *ci, struct dm_target *ti,
@@ -1716,10 +1713,7 @@ out:
1716 return ret; 1713 return ret;
1717} 1714}
1718 1715
1719typedef blk_qc_t (process_bio_fn)(struct mapped_device *, struct dm_table *, struct bio *); 1716static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
1720
1721static blk_qc_t __dm_make_request(struct request_queue *q, struct bio *bio,
1722 process_bio_fn process_bio)
1723{ 1717{
1724 struct mapped_device *md = q->queuedata; 1718 struct mapped_device *md = q->queuedata;
1725 blk_qc_t ret = BLK_QC_T_NONE; 1719 blk_qc_t ret = BLK_QC_T_NONE;
@@ -1739,26 +1733,15 @@ static blk_qc_t __dm_make_request(struct request_queue *q, struct bio *bio,
1739 return ret; 1733 return ret;
1740 } 1734 }
1741 1735
1742 ret = process_bio(md, map, bio); 1736 if (dm_get_md_type(md) == DM_TYPE_NVME_BIO_BASED)
1737 ret = __process_bio(md, map, bio);
1738 else
1739 ret = __split_and_process_bio(md, map, bio);
1743 1740
1744 dm_put_live_table(md, srcu_idx); 1741 dm_put_live_table(md, srcu_idx);
1745 return ret; 1742 return ret;
1746} 1743}
1747 1744
1748/*
1749 * The request function that remaps the bio to one target and
1750 * splits off any remainder.
1751 */
1752static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
1753{
1754 return __dm_make_request(q, bio, __split_and_process_bio);
1755}
1756
1757static blk_qc_t dm_make_request_nvme(struct request_queue *q, struct bio *bio)
1758{
1759 return __dm_make_request(q, bio, __process_bio);
1760}
1761
1762static int dm_any_congested(void *congested_data, int bdi_bits) 1745static int dm_any_congested(void *congested_data, int bdi_bits)
1763{ 1746{
1764 int r = bdi_bits; 1747 int r = bdi_bits;
@@ -2246,12 +2229,9 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
2246 break; 2229 break;
2247 case DM_TYPE_BIO_BASED: 2230 case DM_TYPE_BIO_BASED:
2248 case DM_TYPE_DAX_BIO_BASED: 2231 case DM_TYPE_DAX_BIO_BASED:
2249 dm_init_normal_md_queue(md);
2250 blk_queue_make_request(md->queue, dm_make_request);
2251 break;
2252 case DM_TYPE_NVME_BIO_BASED: 2232 case DM_TYPE_NVME_BIO_BASED:
2253 dm_init_normal_md_queue(md); 2233 dm_init_normal_md_queue(md);
2254 blk_queue_make_request(md->queue, dm_make_request_nvme); 2234 blk_queue_make_request(md->queue, dm_make_request);
2255 break; 2235 break;
2256 case DM_TYPE_NONE: 2236 case DM_TYPE_NONE:
2257 WARN_ON_ONCE(true); 2237 WARN_ON_ONCE(true);