aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-12-14 14:01:00 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-12-14 14:01:00 -0500
commit775a2e29c3bbcf853432f47d3caa9ff8808807ad (patch)
tree614a7481b68dc9b0b628f392ab9bbdc53bbfe447
parent2a4c32edd39b7de166e723b1991abcde4db3a701 (diff)
parentef548c551e72dbbdcc6d9ed7c7b3b01083fea8e2 (diff)
Merge tag 'dm-4.10-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer: - various fixes and improvements to request-based DM and DM multipath - some locking improvements in DM bufio - add Kconfig option to disable the DM block manager's extra locking which mainly serves as a developer tool - a few bug fixes to DM's persistent-data - a couple changes to prepare for multipage biovec support in the block layer - various improvements and cleanups in the DM core, DM cache, DM raid and DM crypt - add ability to have DM crypt use keys from the kernel key retention service - add a new "error_writes" feature to the DM flakey target, reads are left unchanged in this mode * tag 'dm-4.10-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (40 commits) dm flakey: introduce "error_writes" feature dm cache policy smq: use hash_32() instead of hash_32_generic() dm crypt: reject key strings containing whitespace chars dm space map: always set ev if sm_ll_mutate() succeeds dm space map metadata: skip useless memcpy in metadata_ll_init_index() dm space map metadata: fix 'struct sm_metadata' leak on failed create Documentation: dm raid: define data_offset status field dm raid: fix discard support regression dm raid: don't allow "write behind" with raid4/5/6 dm mpath: use hw_handler_params if attached hw_handler is same as requested dm crypt: add ability to use keys from the kernel key retention service dm array: remove a dead assignment in populate_ablock_with_values() dm ioctl: use offsetof() instead of open-coding it dm rq: simplify use_blk_mq initialization dm: use blk_set_queue_dying() in __dm_destroy() dm bufio: drop the lock when doing GFP_NOIO allocation dm bufio: don't take the lock in dm_bufio_shrink_count dm bufio: avoid sleeping while holding the dm_bufio lock dm table: simplify dm_table_determine_type() dm table: an 'all_blk_mq' table must be loaded for a blk-mq DM device ...
-rw-r--r--Documentation/device-mapper/dm-crypt.txt25
-rw-r--r--Documentation/device-mapper/dm-raid.txt4
-rw-r--r--drivers/md/Kconfig10
-rw-r--r--drivers/md/dm-bufio.c28
-rw-r--r--drivers/md/dm-cache-metadata.c3
-rw-r--r--drivers/md/dm-cache-policy-smq.c2
-rw-r--r--drivers/md/dm-cache-target.c3
-rw-r--r--drivers/md/dm-crypt.c214
-rw-r--r--drivers/md/dm-flakey.c53
-rw-r--r--drivers/md/dm-io.c34
-rw-r--r--drivers/md/dm-ioctl.c2
-rw-r--r--drivers/md/dm-mpath.c42
-rw-r--r--drivers/md/dm-raid.c82
-rw-r--r--drivers/md/dm-rq.c18
-rw-r--r--drivers/md/dm-table.c43
-rw-r--r--drivers/md/dm-verity-target.c2
-rw-r--r--drivers/md/dm.c4
-rw-r--r--drivers/md/persistent-data/dm-array.c2
-rw-r--r--drivers/md/persistent-data/dm-block-manager.c19
-rw-r--r--drivers/md/persistent-data/dm-space-map-common.c4
-rw-r--r--drivers/md/persistent-data/dm-space-map-metadata.c14
-rw-r--r--include/uapi/linux/dm-log-userspace.h53
22 files changed, 461 insertions, 200 deletions
diff --git a/Documentation/device-mapper/dm-crypt.txt b/Documentation/device-mapper/dm-crypt.txt
index 692171fe9da0..6f15fcea9566 100644
--- a/Documentation/device-mapper/dm-crypt.txt
+++ b/Documentation/device-mapper/dm-crypt.txt
@@ -21,13 +21,30 @@ Parameters: <cipher> <key> <iv_offset> <device path> \
21 /proc/crypto contains supported crypto modes 21 /proc/crypto contains supported crypto modes
22 22
23<key> 23<key>
24 Key used for encryption. It is encoded as a hexadecimal number. 24 Key used for encryption. It is encoded either as a hexadecimal number
25 or it can be passed as <key_string> prefixed with single colon
26 character (':') for keys residing in kernel keyring service.
25 You can only use key sizes that are valid for the selected cipher 27 You can only use key sizes that are valid for the selected cipher
26 in combination with the selected iv mode. 28 in combination with the selected iv mode.
27 Note that for some iv modes the key string can contain additional 29 Note that for some iv modes the key string can contain additional
28 keys (for example IV seed) so the key contains more parts concatenated 30 keys (for example IV seed) so the key contains more parts concatenated
29 into a single string. 31 into a single string.
30 32
33<key_string>
34 The kernel keyring key is identified by string in following format:
35 <key_size>:<key_type>:<key_description>.
36
37<key_size>
38 The encryption key size in bytes. The kernel key payload size must match
39 the value passed in <key_size>.
40
41<key_type>
42 Either 'logon' or 'user' kernel key type.
43
44<key_description>
45 The kernel keyring key description crypt target should look for
46 when loading key of <key_type>.
47
31<keycount> 48<keycount>
32 Multi-key compatibility mode. You can define <keycount> keys and 49 Multi-key compatibility mode. You can define <keycount> keys and
33 then sectors are encrypted according to their offsets (sector 0 uses key0; 50 then sectors are encrypted according to their offsets (sector 0 uses key0;
@@ -90,6 +107,12 @@ dmsetup create crypt1 --table "0 `blockdev --getsize $1` crypt aes-cbc-essiv:sha
90 107
91[[ 108[[
92#!/bin/sh 109#!/bin/sh
110# Create a crypt device using dmsetup when encryption key is stored in keyring service
111dmsetup create crypt2 --table "0 `blockdev --getsize $1` crypt aes-cbc-essiv:sha256 :32:logon:my_prefix:my_key 0 $1 0"
112]]
113
114[[
115#!/bin/sh
93# Create a crypt device using cryptsetup and LUKS header with default cipher 116# Create a crypt device using cryptsetup and LUKS header with default cipher
94cryptsetup luksFormat $1 117cryptsetup luksFormat $1
95cryptsetup luksOpen $1 crypt1 118cryptsetup luksOpen $1 crypt1
diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt
index 9bd531aa2279..5e3786fd9ea7 100644
--- a/Documentation/device-mapper/dm-raid.txt
+++ b/Documentation/device-mapper/dm-raid.txt
@@ -242,6 +242,10 @@ recovery. Here is a fuller description of the individual fields:
242 in RAID1/10 or wrong parity values found in RAID4/5/6. 242 in RAID1/10 or wrong parity values found in RAID4/5/6.
243 This value is valid only after a "check" of the array 243 This value is valid only after a "check" of the array
244 is performed. A healthy array has a 'mismatch_cnt' of 0. 244 is performed. A healthy array has a 'mismatch_cnt' of 0.
245 <data_offset> The current data offset to the start of the user data on
246 each component device of a raid set (see the respective
247 raid parameter to support out-of-place reshaping).
248
245 249
246Message Interface 250Message Interface
247----------------- 251-----------------
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 02a5345a44a6..b7767da50c26 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -240,9 +240,17 @@ config DM_BUFIO
240 as a cache, holding recently-read blocks in memory and performing 240 as a cache, holding recently-read blocks in memory and performing
241 delayed writes. 241 delayed writes.
242 242
243config DM_DEBUG_BLOCK_MANAGER_LOCKING
244 bool "Block manager locking"
245 depends on DM_BUFIO
246 ---help---
247 Block manager locking can catch various metadata corruption issues.
248
249 If unsure, say N.
250
243config DM_DEBUG_BLOCK_STACK_TRACING 251config DM_DEBUG_BLOCK_STACK_TRACING
244 bool "Keep stack trace of persistent data block lock holders" 252 bool "Keep stack trace of persistent data block lock holders"
245 depends on STACKTRACE_SUPPORT && DM_BUFIO 253 depends on STACKTRACE_SUPPORT && DM_DEBUG_BLOCK_MANAGER_LOCKING
246 select STACKTRACE 254 select STACKTRACE
247 ---help--- 255 ---help---
248 Enable this for messages that may help debug problems with the 256 Enable this for messages that may help debug problems with the
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 262e75365cc0..84d2f0e4c754 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -820,12 +820,14 @@ enum new_flag {
820static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client *c, enum new_flag nf) 820static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client *c, enum new_flag nf)
821{ 821{
822 struct dm_buffer *b; 822 struct dm_buffer *b;
823 bool tried_noio_alloc = false;
823 824
824 /* 825 /*
825 * dm-bufio is resistant to allocation failures (it just keeps 826 * dm-bufio is resistant to allocation failures (it just keeps
826 * one buffer reserved in cases all the allocations fail). 827 * one buffer reserved in cases all the allocations fail).
827 * So set flags to not try too hard: 828 * So set flags to not try too hard:
828 * GFP_NOIO: don't recurse into the I/O layer 829 * GFP_NOWAIT: don't wait; if we need to sleep we'll release our
830 * mutex and wait ourselves.
829 * __GFP_NORETRY: don't retry and rather return failure 831 * __GFP_NORETRY: don't retry and rather return failure
830 * __GFP_NOMEMALLOC: don't use emergency reserves 832 * __GFP_NOMEMALLOC: don't use emergency reserves
831 * __GFP_NOWARN: don't print a warning in case of failure 833 * __GFP_NOWARN: don't print a warning in case of failure
@@ -835,7 +837,7 @@ static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client
835 */ 837 */
836 while (1) { 838 while (1) {
837 if (dm_bufio_cache_size_latch != 1) { 839 if (dm_bufio_cache_size_latch != 1) {
838 b = alloc_buffer(c, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); 840 b = alloc_buffer(c, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
839 if (b) 841 if (b)
840 return b; 842 return b;
841 } 843 }
@@ -843,6 +845,15 @@ static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client
843 if (nf == NF_PREFETCH) 845 if (nf == NF_PREFETCH)
844 return NULL; 846 return NULL;
845 847
848 if (dm_bufio_cache_size_latch != 1 && !tried_noio_alloc) {
849 dm_bufio_unlock(c);
850 b = alloc_buffer(c, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
851 dm_bufio_lock(c);
852 if (b)
853 return b;
854 tried_noio_alloc = true;
855 }
856
846 if (!list_empty(&c->reserved_buffers)) { 857 if (!list_empty(&c->reserved_buffers)) {
847 b = list_entry(c->reserved_buffers.next, 858 b = list_entry(c->reserved_buffers.next,
848 struct dm_buffer, lru_list); 859 struct dm_buffer, lru_list);
@@ -1585,18 +1596,9 @@ dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
1585static unsigned long 1596static unsigned long
1586dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) 1597dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
1587{ 1598{
1588 struct dm_bufio_client *c; 1599 struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker);
1589 unsigned long count;
1590
1591 c = container_of(shrink, struct dm_bufio_client, shrinker);
1592 if (sc->gfp_mask & __GFP_FS)
1593 dm_bufio_lock(c);
1594 else if (!dm_bufio_trylock(c))
1595 return 0;
1596 1600
1597 count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY]; 1601 return ACCESS_ONCE(c->n_buffers[LIST_CLEAN]) + ACCESS_ONCE(c->n_buffers[LIST_DIRTY]);
1598 dm_bufio_unlock(c);
1599 return count;
1600} 1602}
1601 1603
1602/* 1604/*
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index 695577812cf6..624fe4319b24 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -383,7 +383,6 @@ static int __format_metadata(struct dm_cache_metadata *cmd)
383 goto bad; 383 goto bad;
384 384
385 dm_disk_bitset_init(cmd->tm, &cmd->discard_info); 385 dm_disk_bitset_init(cmd->tm, &cmd->discard_info);
386
387 r = dm_bitset_empty(&cmd->discard_info, &cmd->discard_root); 386 r = dm_bitset_empty(&cmd->discard_info, &cmd->discard_root);
388 if (r < 0) 387 if (r < 0)
389 goto bad; 388 goto bad;
@@ -789,7 +788,7 @@ static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev,
789static bool same_params(struct dm_cache_metadata *cmd, sector_t data_block_size) 788static bool same_params(struct dm_cache_metadata *cmd, sector_t data_block_size)
790{ 789{
791 if (cmd->data_block_size != data_block_size) { 790 if (cmd->data_block_size != data_block_size) {
792 DMERR("data_block_size (%llu) different from that in metadata (%llu)\n", 791 DMERR("data_block_size (%llu) different from that in metadata (%llu)",
793 (unsigned long long) data_block_size, 792 (unsigned long long) data_block_size,
794 (unsigned long long) cmd->data_block_size); 793 (unsigned long long) cmd->data_block_size);
795 return false; 794 return false;
diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c
index c33f4a6e1d7d..f19c6930a67c 100644
--- a/drivers/md/dm-cache-policy-smq.c
+++ b/drivers/md/dm-cache-policy-smq.c
@@ -1361,7 +1361,7 @@ static void smq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
1361 1361
1362static unsigned random_level(dm_cblock_t cblock) 1362static unsigned random_level(dm_cblock_t cblock)
1363{ 1363{
1364 return hash_32_generic(from_cblock(cblock), 9) & (NR_CACHE_LEVELS - 1); 1364 return hash_32(from_cblock(cblock), 9) & (NR_CACHE_LEVELS - 1);
1365} 1365}
1366 1366
1367static int smq_load_mapping(struct dm_cache_policy *p, 1367static int smq_load_mapping(struct dm_cache_policy *p,
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 59b2c50562e4..e04c61e0839e 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -989,7 +989,8 @@ static void set_cache_mode(struct cache *cache, enum cache_metadata_mode new_mod
989 enum cache_metadata_mode old_mode = get_cache_mode(cache); 989 enum cache_metadata_mode old_mode = get_cache_mode(cache);
990 990
991 if (dm_cache_metadata_needs_check(cache->cmd, &needs_check)) { 991 if (dm_cache_metadata_needs_check(cache->cmd, &needs_check)) {
992 DMERR("unable to read needs_check flag, setting failure mode"); 992 DMERR("%s: unable to read needs_check flag, setting failure mode.",
993 cache_device_name(cache));
993 new_mode = CM_FAIL; 994 new_mode = CM_FAIL;
994 } 995 }
995 996
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 68a9eb4f3f36..7c6c57216bf2 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -12,6 +12,7 @@
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include <linux/key.h>
15#include <linux/bio.h> 16#include <linux/bio.h>
16#include <linux/blkdev.h> 17#include <linux/blkdev.h>
17#include <linux/mempool.h> 18#include <linux/mempool.h>
@@ -23,12 +24,14 @@
23#include <linux/atomic.h> 24#include <linux/atomic.h>
24#include <linux/scatterlist.h> 25#include <linux/scatterlist.h>
25#include <linux/rbtree.h> 26#include <linux/rbtree.h>
27#include <linux/ctype.h>
26#include <asm/page.h> 28#include <asm/page.h>
27#include <asm/unaligned.h> 29#include <asm/unaligned.h>
28#include <crypto/hash.h> 30#include <crypto/hash.h>
29#include <crypto/md5.h> 31#include <crypto/md5.h>
30#include <crypto/algapi.h> 32#include <crypto/algapi.h>
31#include <crypto/skcipher.h> 33#include <crypto/skcipher.h>
34#include <keys/user-type.h>
32 35
33#include <linux/device-mapper.h> 36#include <linux/device-mapper.h>
34 37
@@ -140,8 +143,9 @@ struct crypt_config {
140 143
141 char *cipher; 144 char *cipher;
142 char *cipher_string; 145 char *cipher_string;
146 char *key_string;
143 147
144 struct crypt_iv_operations *iv_gen_ops; 148 const struct crypt_iv_operations *iv_gen_ops;
145 union { 149 union {
146 struct iv_essiv_private essiv; 150 struct iv_essiv_private essiv;
147 struct iv_benbi_private benbi; 151 struct iv_benbi_private benbi;
@@ -758,15 +762,15 @@ static int crypt_iv_tcw_post(struct crypt_config *cc, u8 *iv,
758 return r; 762 return r;
759} 763}
760 764
761static struct crypt_iv_operations crypt_iv_plain_ops = { 765static const struct crypt_iv_operations crypt_iv_plain_ops = {
762 .generator = crypt_iv_plain_gen 766 .generator = crypt_iv_plain_gen
763}; 767};
764 768
765static struct crypt_iv_operations crypt_iv_plain64_ops = { 769static const struct crypt_iv_operations crypt_iv_plain64_ops = {
766 .generator = crypt_iv_plain64_gen 770 .generator = crypt_iv_plain64_gen
767}; 771};
768 772
769static struct crypt_iv_operations crypt_iv_essiv_ops = { 773static const struct crypt_iv_operations crypt_iv_essiv_ops = {
770 .ctr = crypt_iv_essiv_ctr, 774 .ctr = crypt_iv_essiv_ctr,
771 .dtr = crypt_iv_essiv_dtr, 775 .dtr = crypt_iv_essiv_dtr,
772 .init = crypt_iv_essiv_init, 776 .init = crypt_iv_essiv_init,
@@ -774,17 +778,17 @@ static struct crypt_iv_operations crypt_iv_essiv_ops = {
774 .generator = crypt_iv_essiv_gen 778 .generator = crypt_iv_essiv_gen
775}; 779};
776 780
777static struct crypt_iv_operations crypt_iv_benbi_ops = { 781static const struct crypt_iv_operations crypt_iv_benbi_ops = {
778 .ctr = crypt_iv_benbi_ctr, 782 .ctr = crypt_iv_benbi_ctr,
779 .dtr = crypt_iv_benbi_dtr, 783 .dtr = crypt_iv_benbi_dtr,
780 .generator = crypt_iv_benbi_gen 784 .generator = crypt_iv_benbi_gen
781}; 785};
782 786
783static struct crypt_iv_operations crypt_iv_null_ops = { 787static const struct crypt_iv_operations crypt_iv_null_ops = {
784 .generator = crypt_iv_null_gen 788 .generator = crypt_iv_null_gen
785}; 789};
786 790
787static struct crypt_iv_operations crypt_iv_lmk_ops = { 791static const struct crypt_iv_operations crypt_iv_lmk_ops = {
788 .ctr = crypt_iv_lmk_ctr, 792 .ctr = crypt_iv_lmk_ctr,
789 .dtr = crypt_iv_lmk_dtr, 793 .dtr = crypt_iv_lmk_dtr,
790 .init = crypt_iv_lmk_init, 794 .init = crypt_iv_lmk_init,
@@ -793,7 +797,7 @@ static struct crypt_iv_operations crypt_iv_lmk_ops = {
793 .post = crypt_iv_lmk_post 797 .post = crypt_iv_lmk_post
794}; 798};
795 799
796static struct crypt_iv_operations crypt_iv_tcw_ops = { 800static const struct crypt_iv_operations crypt_iv_tcw_ops = {
797 .ctr = crypt_iv_tcw_ctr, 801 .ctr = crypt_iv_tcw_ctr,
798 .dtr = crypt_iv_tcw_dtr, 802 .dtr = crypt_iv_tcw_dtr,
799 .init = crypt_iv_tcw_init, 803 .init = crypt_iv_tcw_init,
@@ -994,7 +998,6 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
994 gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM; 998 gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM;
995 unsigned i, len, remaining_size; 999 unsigned i, len, remaining_size;
996 struct page *page; 1000 struct page *page;
997 struct bio_vec *bvec;
998 1001
999retry: 1002retry:
1000 if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM)) 1003 if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
@@ -1019,12 +1022,7 @@ retry:
1019 1022
1020 len = (remaining_size > PAGE_SIZE) ? PAGE_SIZE : remaining_size; 1023 len = (remaining_size > PAGE_SIZE) ? PAGE_SIZE : remaining_size;
1021 1024
1022 bvec = &clone->bi_io_vec[clone->bi_vcnt++]; 1025 bio_add_page(clone, page, len, 0);
1023 bvec->bv_page = page;
1024 bvec->bv_len = len;
1025 bvec->bv_offset = 0;
1026
1027 clone->bi_iter.bi_size += len;
1028 1026
1029 remaining_size -= len; 1027 remaining_size -= len;
1030 } 1028 }
@@ -1471,7 +1469,7 @@ static int crypt_alloc_tfms(struct crypt_config *cc, char *ciphermode)
1471 return 0; 1469 return 0;
1472} 1470}
1473 1471
1474static int crypt_setkey_allcpus(struct crypt_config *cc) 1472static int crypt_setkey(struct crypt_config *cc)
1475{ 1473{
1476 unsigned subkey_size; 1474 unsigned subkey_size;
1477 int err = 0, i, r; 1475 int err = 0, i, r;
@@ -1490,25 +1488,157 @@ static int crypt_setkey_allcpus(struct crypt_config *cc)
1490 return err; 1488 return err;
1491} 1489}
1492 1490
1491#ifdef CONFIG_KEYS
1492
1493static bool contains_whitespace(const char *str)
1494{
1495 while (*str)
1496 if (isspace(*str++))
1497 return true;
1498 return false;
1499}
1500
1501static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string)
1502{
1503 char *new_key_string, *key_desc;
1504 int ret;
1505 struct key *key;
1506 const struct user_key_payload *ukp;
1507
1508 /*
1509 * Reject key_string with whitespace. dm core currently lacks code for
1510 * proper whitespace escaping in arguments on DM_TABLE_STATUS path.
1511 */
1512 if (contains_whitespace(key_string)) {
1513 DMERR("whitespace chars not allowed in key string");
1514 return -EINVAL;
1515 }
1516
1517 /* look for next ':' separating key_type from key_description */
1518 key_desc = strpbrk(key_string, ":");
1519 if (!key_desc || key_desc == key_string || !strlen(key_desc + 1))
1520 return -EINVAL;
1521
1522 if (strncmp(key_string, "logon:", key_desc - key_string + 1) &&
1523 strncmp(key_string, "user:", key_desc - key_string + 1))
1524 return -EINVAL;
1525
1526 new_key_string = kstrdup(key_string, GFP_KERNEL);
1527 if (!new_key_string)
1528 return -ENOMEM;
1529
1530 key = request_key(key_string[0] == 'l' ? &key_type_logon : &key_type_user,
1531 key_desc + 1, NULL);
1532 if (IS_ERR(key)) {
1533 kzfree(new_key_string);
1534 return PTR_ERR(key);
1535 }
1536
1537 rcu_read_lock();
1538
1539 ukp = user_key_payload(key);
1540 if (!ukp) {
1541 rcu_read_unlock();
1542 key_put(key);
1543 kzfree(new_key_string);
1544 return -EKEYREVOKED;
1545 }
1546
1547 if (cc->key_size != ukp->datalen) {
1548 rcu_read_unlock();
1549 key_put(key);
1550 kzfree(new_key_string);
1551 return -EINVAL;
1552 }
1553
1554 memcpy(cc->key, ukp->data, cc->key_size);
1555
1556 rcu_read_unlock();
1557 key_put(key);
1558
1559 /* clear the flag since following operations may invalidate previously valid key */
1560 clear_bit(DM_CRYPT_KEY_VALID, &cc->flags);
1561
1562 ret = crypt_setkey(cc);
1563
1564 /* wipe the kernel key payload copy in each case */
1565 memset(cc->key, 0, cc->key_size * sizeof(u8));
1566
1567 if (!ret) {
1568 set_bit(DM_CRYPT_KEY_VALID, &cc->flags);
1569 kzfree(cc->key_string);
1570 cc->key_string = new_key_string;
1571 } else
1572 kzfree(new_key_string);
1573
1574 return ret;
1575}
1576
1577static int get_key_size(char **key_string)
1578{
1579 char *colon, dummy;
1580 int ret;
1581
1582 if (*key_string[0] != ':')
1583 return strlen(*key_string) >> 1;
1584
1585 /* look for next ':' in key string */
1586 colon = strpbrk(*key_string + 1, ":");
1587 if (!colon)
1588 return -EINVAL;
1589
1590 if (sscanf(*key_string + 1, "%u%c", &ret, &dummy) != 2 || dummy != ':')
1591 return -EINVAL;
1592
1593 *key_string = colon;
1594
1595 /* remaining key string should be :<logon|user>:<key_desc> */
1596
1597 return ret;
1598}
1599
1600#else
1601
1602static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string)
1603{
1604 return -EINVAL;
1605}
1606
1607static int get_key_size(char **key_string)
1608{
1609 return (*key_string[0] == ':') ? -EINVAL : strlen(*key_string) >> 1;
1610}
1611
1612#endif
1613
1493static int crypt_set_key(struct crypt_config *cc, char *key) 1614static int crypt_set_key(struct crypt_config *cc, char *key)
1494{ 1615{
1495 int r = -EINVAL; 1616 int r = -EINVAL;
1496 int key_string_len = strlen(key); 1617 int key_string_len = strlen(key);
1497 1618
1498 /* The key size may not be changed. */
1499 if (cc->key_size != (key_string_len >> 1))
1500 goto out;
1501
1502 /* Hyphen (which gives a key_size of zero) means there is no key. */ 1619 /* Hyphen (which gives a key_size of zero) means there is no key. */
1503 if (!cc->key_size && strcmp(key, "-")) 1620 if (!cc->key_size && strcmp(key, "-"))
1504 goto out; 1621 goto out;
1505 1622
1506 if (cc->key_size && crypt_decode_key(cc->key, key, cc->key_size) < 0) 1623 /* ':' means the key is in kernel keyring, short-circuit normal key processing */
1624 if (key[0] == ':') {
1625 r = crypt_set_keyring_key(cc, key + 1);
1507 goto out; 1626 goto out;
1627 }
1508 1628
1509 set_bit(DM_CRYPT_KEY_VALID, &cc->flags); 1629 /* clear the flag since following operations may invalidate previously valid key */
1630 clear_bit(DM_CRYPT_KEY_VALID, &cc->flags);
1510 1631
1511 r = crypt_setkey_allcpus(cc); 1632 /* wipe references to any kernel keyring key */
1633 kzfree(cc->key_string);
1634 cc->key_string = NULL;
1635
1636 if (cc->key_size && crypt_decode_key(cc->key, key, cc->key_size) < 0)
1637 goto out;
1638
1639 r = crypt_setkey(cc);
1640 if (!r)
1641 set_bit(DM_CRYPT_KEY_VALID, &cc->flags);
1512 1642
1513out: 1643out:
1514 /* Hex key string not needed after here, so wipe it. */ 1644 /* Hex key string not needed after here, so wipe it. */
@@ -1521,8 +1651,10 @@ static int crypt_wipe_key(struct crypt_config *cc)
1521{ 1651{
1522 clear_bit(DM_CRYPT_KEY_VALID, &cc->flags); 1652 clear_bit(DM_CRYPT_KEY_VALID, &cc->flags);
1523 memset(&cc->key, 0, cc->key_size * sizeof(u8)); 1653 memset(&cc->key, 0, cc->key_size * sizeof(u8));
1654 kzfree(cc->key_string);
1655 cc->key_string = NULL;
1524 1656
1525 return crypt_setkey_allcpus(cc); 1657 return crypt_setkey(cc);
1526} 1658}
1527 1659
1528static void crypt_dtr(struct dm_target *ti) 1660static void crypt_dtr(struct dm_target *ti)
@@ -1558,6 +1690,7 @@ static void crypt_dtr(struct dm_target *ti)
1558 1690
1559 kzfree(cc->cipher); 1691 kzfree(cc->cipher);
1560 kzfree(cc->cipher_string); 1692 kzfree(cc->cipher_string);
1693 kzfree(cc->key_string);
1561 1694
1562 /* Must zero key material before freeing */ 1695 /* Must zero key material before freeing */
1563 kzfree(cc); 1696 kzfree(cc);
@@ -1726,12 +1859,13 @@ bad_mem:
1726 1859
1727/* 1860/*
1728 * Construct an encryption mapping: 1861 * Construct an encryption mapping:
1729 * <cipher> <key> <iv_offset> <dev_path> <start> 1862 * <cipher> [<key>|:<key_size>:<user|logon>:<key_description>] <iv_offset> <dev_path> <start>
1730 */ 1863 */
1731static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) 1864static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1732{ 1865{
1733 struct crypt_config *cc; 1866 struct crypt_config *cc;
1734 unsigned int key_size, opt_params; 1867 int key_size;
1868 unsigned int opt_params;
1735 unsigned long long tmpll; 1869 unsigned long long tmpll;
1736 int ret; 1870 int ret;
1737 size_t iv_size_padding; 1871 size_t iv_size_padding;
@@ -1748,7 +1882,11 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1748 return -EINVAL; 1882 return -EINVAL;
1749 } 1883 }
1750 1884
1751 key_size = strlen(argv[1]) >> 1; 1885 key_size = get_key_size(&argv[1]);
1886 if (key_size < 0) {
1887 ti->error = "Cannot parse key size";
1888 return -EINVAL;
1889 }
1752 1890
1753 cc = kzalloc(sizeof(*cc) + key_size * sizeof(u8), GFP_KERNEL); 1891 cc = kzalloc(sizeof(*cc) + key_size * sizeof(u8), GFP_KERNEL);
1754 if (!cc) { 1892 if (!cc) {
@@ -1955,10 +2093,13 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
1955 case STATUSTYPE_TABLE: 2093 case STATUSTYPE_TABLE:
1956 DMEMIT("%s ", cc->cipher_string); 2094 DMEMIT("%s ", cc->cipher_string);
1957 2095
1958 if (cc->key_size > 0) 2096 if (cc->key_size > 0) {
1959 for (i = 0; i < cc->key_size; i++) 2097 if (cc->key_string)
1960 DMEMIT("%02x", cc->key[i]); 2098 DMEMIT(":%u:%s", cc->key_size, cc->key_string);
1961 else 2099 else
2100 for (i = 0; i < cc->key_size; i++)
2101 DMEMIT("%02x", cc->key[i]);
2102 } else
1962 DMEMIT("-"); 2103 DMEMIT("-");
1963 2104
1964 DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset, 2105 DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset,
@@ -2014,7 +2155,7 @@ static void crypt_resume(struct dm_target *ti)
2014static int crypt_message(struct dm_target *ti, unsigned argc, char **argv) 2155static int crypt_message(struct dm_target *ti, unsigned argc, char **argv)
2015{ 2156{
2016 struct crypt_config *cc = ti->private; 2157 struct crypt_config *cc = ti->private;
2017 int ret = -EINVAL; 2158 int key_size, ret = -EINVAL;
2018 2159
2019 if (argc < 2) 2160 if (argc < 2)
2020 goto error; 2161 goto error;
@@ -2025,6 +2166,13 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv)
2025 return -EINVAL; 2166 return -EINVAL;
2026 } 2167 }
2027 if (argc == 3 && !strcasecmp(argv[1], "set")) { 2168 if (argc == 3 && !strcasecmp(argv[1], "set")) {
2169 /* The key size may not be changed. */
2170 key_size = get_key_size(&argv[2]);
2171 if (key_size < 0 || cc->key_size != key_size) {
2172 memset(argv[2], '0', strlen(argv[2]));
2173 return -EINVAL;
2174 }
2175
2028 ret = crypt_set_key(cc, argv[2]); 2176 ret = crypt_set_key(cc, argv[2]);
2029 if (ret) 2177 if (ret)
2030 return ret; 2178 return ret;
@@ -2068,7 +2216,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
2068 2216
2069static struct target_type crypt_target = { 2217static struct target_type crypt_target = {
2070 .name = "crypt", 2218 .name = "crypt",
2071 .version = {1, 14, 1}, 2219 .version = {1, 15, 0},
2072 .module = THIS_MODULE, 2220 .module = THIS_MODULE,
2073 .ctr = crypt_ctr, 2221 .ctr = crypt_ctr,
2074 .dtr = crypt_dtr, 2222 .dtr = crypt_dtr,
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 6a2e8dd44a1b..13305a182611 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -36,7 +36,8 @@ struct flakey_c {
36}; 36};
37 37
38enum feature_flag_bits { 38enum feature_flag_bits {
39 DROP_WRITES 39 DROP_WRITES,
40 ERROR_WRITES
40}; 41};
41 42
42struct per_bio_data { 43struct per_bio_data {
@@ -76,6 +77,25 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc,
76 if (test_and_set_bit(DROP_WRITES, &fc->flags)) { 77 if (test_and_set_bit(DROP_WRITES, &fc->flags)) {
77 ti->error = "Feature drop_writes duplicated"; 78 ti->error = "Feature drop_writes duplicated";
78 return -EINVAL; 79 return -EINVAL;
80 } else if (test_bit(ERROR_WRITES, &fc->flags)) {
81 ti->error = "Feature drop_writes conflicts with feature error_writes";
82 return -EINVAL;
83 }
84
85 continue;
86 }
87
88 /*
89 * error_writes
90 */
91 if (!strcasecmp(arg_name, "error_writes")) {
92 if (test_and_set_bit(ERROR_WRITES, &fc->flags)) {
93 ti->error = "Feature error_writes duplicated";
94 return -EINVAL;
95
96 } else if (test_bit(DROP_WRITES, &fc->flags)) {
97 ti->error = "Feature error_writes conflicts with feature drop_writes";
98 return -EINVAL;
79 } 99 }
80 100
81 continue; 101 continue;
@@ -135,6 +155,10 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc,
135 if (test_bit(DROP_WRITES, &fc->flags) && (fc->corrupt_bio_rw == WRITE)) { 155 if (test_bit(DROP_WRITES, &fc->flags) && (fc->corrupt_bio_rw == WRITE)) {
136 ti->error = "drop_writes is incompatible with corrupt_bio_byte with the WRITE flag set"; 156 ti->error = "drop_writes is incompatible with corrupt_bio_byte with the WRITE flag set";
137 return -EINVAL; 157 return -EINVAL;
158
159 } else if (test_bit(ERROR_WRITES, &fc->flags) && (fc->corrupt_bio_rw == WRITE)) {
160 ti->error = "error_writes is incompatible with corrupt_bio_byte with the WRITE flag set";
161 return -EINVAL;
138 } 162 }
139 163
140 return 0; 164 return 0;
@@ -200,11 +224,13 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv)
200 224
201 if (!(fc->up_interval + fc->down_interval)) { 225 if (!(fc->up_interval + fc->down_interval)) {
202 ti->error = "Total (up + down) interval is zero"; 226 ti->error = "Total (up + down) interval is zero";
227 r = -EINVAL;
203 goto bad; 228 goto bad;
204 } 229 }
205 230
206 if (fc->up_interval + fc->down_interval < fc->up_interval) { 231 if (fc->up_interval + fc->down_interval < fc->up_interval) {
207 ti->error = "Interval overflow"; 232 ti->error = "Interval overflow";
233 r = -EINVAL;
208 goto bad; 234 goto bad;
209 } 235 }
210 236
@@ -289,22 +315,27 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
289 pb->bio_submitted = true; 315 pb->bio_submitted = true;
290 316
291 /* 317 /*
292 * Error reads if neither corrupt_bio_byte or drop_writes are set. 318 * Error reads if neither corrupt_bio_byte or drop_writes or error_writes are set.
293 * Otherwise, flakey_end_io() will decide if the reads should be modified. 319 * Otherwise, flakey_end_io() will decide if the reads should be modified.
294 */ 320 */
295 if (bio_data_dir(bio) == READ) { 321 if (bio_data_dir(bio) == READ) {
296 if (!fc->corrupt_bio_byte && !test_bit(DROP_WRITES, &fc->flags)) 322 if (!fc->corrupt_bio_byte && !test_bit(DROP_WRITES, &fc->flags) &&
323 !test_bit(ERROR_WRITES, &fc->flags))
297 return -EIO; 324 return -EIO;
298 goto map_bio; 325 goto map_bio;
299 } 326 }
300 327
301 /* 328 /*
302 * Drop writes? 329 * Drop or error writes?
303 */ 330 */
304 if (test_bit(DROP_WRITES, &fc->flags)) { 331 if (test_bit(DROP_WRITES, &fc->flags)) {
305 bio_endio(bio); 332 bio_endio(bio);
306 return DM_MAPIO_SUBMITTED; 333 return DM_MAPIO_SUBMITTED;
307 } 334 }
335 else if (test_bit(ERROR_WRITES, &fc->flags)) {
336 bio_io_error(bio);
337 return DM_MAPIO_SUBMITTED;
338 }
308 339
309 /* 340 /*
310 * Corrupt matching writes. 341 * Corrupt matching writes.
@@ -340,10 +371,11 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error)
340 */ 371 */
341 corrupt_bio_data(bio, fc); 372 corrupt_bio_data(bio, fc);
342 373
343 } else if (!test_bit(DROP_WRITES, &fc->flags)) { 374 } else if (!test_bit(DROP_WRITES, &fc->flags) &&
375 !test_bit(ERROR_WRITES, &fc->flags)) {
344 /* 376 /*
345 * Error read during the down_interval if drop_writes 377 * Error read during the down_interval if drop_writes
346 * wasn't configured. 378 * and error_writes were not configured.
347 */ 379 */
348 return -EIO; 380 return -EIO;
349 } 381 }
@@ -357,7 +389,7 @@ static void flakey_status(struct dm_target *ti, status_type_t type,
357{ 389{
358 unsigned sz = 0; 390 unsigned sz = 0;
359 struct flakey_c *fc = ti->private; 391 struct flakey_c *fc = ti->private;
360 unsigned drop_writes; 392 unsigned drop_writes, error_writes;
361 393
362 switch (type) { 394 switch (type) {
363 case STATUSTYPE_INFO: 395 case STATUSTYPE_INFO:
@@ -370,10 +402,13 @@ static void flakey_status(struct dm_target *ti, status_type_t type,
370 fc->down_interval); 402 fc->down_interval);
371 403
372 drop_writes = test_bit(DROP_WRITES, &fc->flags); 404 drop_writes = test_bit(DROP_WRITES, &fc->flags);
373 DMEMIT("%u ", drop_writes + (fc->corrupt_bio_byte > 0) * 5); 405 error_writes = test_bit(ERROR_WRITES, &fc->flags);
406 DMEMIT("%u ", drop_writes + error_writes + (fc->corrupt_bio_byte > 0) * 5);
374 407
375 if (drop_writes) 408 if (drop_writes)
376 DMEMIT("drop_writes "); 409 DMEMIT("drop_writes ");
410 else if (error_writes)
411 DMEMIT("error_writes ");
377 412
378 if (fc->corrupt_bio_byte) 413 if (fc->corrupt_bio_byte)
379 DMEMIT("corrupt_bio_byte %u %c %u %u ", 414 DMEMIT("corrupt_bio_byte %u %c %u %u ",
@@ -410,7 +445,7 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_
410 445
411static struct target_type flakey_target = { 446static struct target_type flakey_target = {
412 .name = "flakey", 447 .name = "flakey",
413 .version = {1, 3, 1}, 448 .version = {1, 4, 0},
414 .module = THIS_MODULE, 449 .module = THIS_MODULE,
415 .ctr = flakey_ctr, 450 .ctr = flakey_ctr,
416 .dtr = flakey_dtr, 451 .dtr = flakey_dtr,
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 0bf1a12e35fe..03940bf36f6c 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -162,7 +162,10 @@ struct dpages {
162 struct page **p, unsigned long *len, unsigned *offset); 162 struct page **p, unsigned long *len, unsigned *offset);
163 void (*next_page)(struct dpages *dp); 163 void (*next_page)(struct dpages *dp);
164 164
165 unsigned context_u; 165 union {
166 unsigned context_u;
167 struct bvec_iter context_bi;
168 };
166 void *context_ptr; 169 void *context_ptr;
167 170
168 void *vma_invalidate_address; 171 void *vma_invalidate_address;
@@ -204,25 +207,36 @@ static void list_dp_init(struct dpages *dp, struct page_list *pl, unsigned offse
204static void bio_get_page(struct dpages *dp, struct page **p, 207static void bio_get_page(struct dpages *dp, struct page **p,
205 unsigned long *len, unsigned *offset) 208 unsigned long *len, unsigned *offset)
206{ 209{
207 struct bio_vec *bvec = dp->context_ptr; 210 struct bio_vec bvec = bvec_iter_bvec((struct bio_vec *)dp->context_ptr,
208 *p = bvec->bv_page; 211 dp->context_bi);
209 *len = bvec->bv_len - dp->context_u; 212
210 *offset = bvec->bv_offset + dp->context_u; 213 *p = bvec.bv_page;
214 *len = bvec.bv_len;
215 *offset = bvec.bv_offset;
216
217 /* avoid figuring it out again in bio_next_page() */
218 dp->context_bi.bi_sector = (sector_t)bvec.bv_len;
211} 219}
212 220
213static void bio_next_page(struct dpages *dp) 221static void bio_next_page(struct dpages *dp)
214{ 222{
215 struct bio_vec *bvec = dp->context_ptr; 223 unsigned int len = (unsigned int)dp->context_bi.bi_sector;
216 dp->context_ptr = bvec + 1; 224
217 dp->context_u = 0; 225 bvec_iter_advance((struct bio_vec *)dp->context_ptr,
226 &dp->context_bi, len);
218} 227}
219 228
220static void bio_dp_init(struct dpages *dp, struct bio *bio) 229static void bio_dp_init(struct dpages *dp, struct bio *bio)
221{ 230{
222 dp->get_page = bio_get_page; 231 dp->get_page = bio_get_page;
223 dp->next_page = bio_next_page; 232 dp->next_page = bio_next_page;
224 dp->context_ptr = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); 233
225 dp->context_u = bio->bi_iter.bi_bvec_done; 234 /*
235 * We just use bvec iterator to retrieve pages, so it is ok to
236 * access the bvec table directly here
237 */
238 dp->context_ptr = bio->bi_io_vec;
239 dp->context_bi = bio->bi_iter;
226} 240}
227 241
228/* 242/*
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 966eb4b61aed..c72a77048b73 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1697,7 +1697,7 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern
1697{ 1697{
1698 struct dm_ioctl *dmi; 1698 struct dm_ioctl *dmi;
1699 int secure_data; 1699 int secure_data;
1700 const size_t minimum_data_size = sizeof(*param_kernel) - sizeof(param_kernel->data); 1700 const size_t minimum_data_size = offsetof(struct dm_ioctl, data);
1701 1701
1702 if (copy_from_user(param_kernel, user, minimum_data_size)) 1702 if (copy_from_user(param_kernel, user, minimum_data_size))
1703 return -EFAULT; 1703 return -EFAULT;
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index e477af8596e2..6400cffb986d 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -372,16 +372,13 @@ static int __pg_init_all_paths(struct multipath *m)
372 return atomic_read(&m->pg_init_in_progress); 372 return atomic_read(&m->pg_init_in_progress);
373} 373}
374 374
375static int pg_init_all_paths(struct multipath *m) 375static void pg_init_all_paths(struct multipath *m)
376{ 376{
377 int r;
378 unsigned long flags; 377 unsigned long flags;
379 378
380 spin_lock_irqsave(&m->lock, flags); 379 spin_lock_irqsave(&m->lock, flags);
381 r = __pg_init_all_paths(m); 380 __pg_init_all_paths(m);
382 spin_unlock_irqrestore(&m->lock, flags); 381 spin_unlock_irqrestore(&m->lock, flags);
383
384 return r;
385} 382}
386 383
387static void __switch_pg(struct multipath *m, struct priority_group *pg) 384static void __switch_pg(struct multipath *m, struct priority_group *pg)
@@ -583,16 +580,17 @@ static int __multipath_map(struct dm_target *ti, struct request *clone,
583 * .request_fn stacked on blk-mq path(s) and 580 * .request_fn stacked on blk-mq path(s) and
584 * blk-mq stacked on blk-mq path(s). 581 * blk-mq stacked on blk-mq path(s).
585 */ 582 */
586 *__clone = blk_mq_alloc_request(bdev_get_queue(bdev), 583 clone = blk_mq_alloc_request(bdev_get_queue(bdev),
587 rq_data_dir(rq), BLK_MQ_REQ_NOWAIT); 584 rq_data_dir(rq), BLK_MQ_REQ_NOWAIT);
588 if (IS_ERR(*__clone)) { 585 if (IS_ERR(clone)) {
589 /* ENOMEM, requeue */ 586 /* EBUSY, ENODEV or EWOULDBLOCK: requeue */
590 clear_request_fn_mpio(m, map_context); 587 clear_request_fn_mpio(m, map_context);
591 return r; 588 return r;
592 } 589 }
593 (*__clone)->bio = (*__clone)->biotail = NULL; 590 clone->bio = clone->biotail = NULL;
594 (*__clone)->rq_disk = bdev->bd_disk; 591 clone->rq_disk = bdev->bd_disk;
595 (*__clone)->cmd_flags |= REQ_FAILFAST_TRANSPORT; 592 clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
593 *__clone = clone;
596 } 594 }
597 595
598 if (pgpath->pg->ps.type->start_io) 596 if (pgpath->pg->ps.type->start_io)
@@ -852,18 +850,22 @@ retain:
852 attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL); 850 attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL);
853 if (attached_handler_name) { 851 if (attached_handler_name) {
854 /* 852 /*
853 * Clear any hw_handler_params associated with a
854 * handler that isn't already attached.
855 */
856 if (m->hw_handler_name && strcmp(attached_handler_name, m->hw_handler_name)) {
857 kfree(m->hw_handler_params);
858 m->hw_handler_params = NULL;
859 }
860
861 /*
855 * Reset hw_handler_name to match the attached handler 862 * Reset hw_handler_name to match the attached handler
856 * and clear any hw_handler_params associated with the
857 * ignored handler.
858 * 863 *
859 * NB. This modifies the table line to show the actual 864 * NB. This modifies the table line to show the actual
860 * handler instead of the original table passed in. 865 * handler instead of the original table passed in.
861 */ 866 */
862 kfree(m->hw_handler_name); 867 kfree(m->hw_handler_name);
863 m->hw_handler_name = attached_handler_name; 868 m->hw_handler_name = attached_handler_name;
864
865 kfree(m->hw_handler_params);
866 m->hw_handler_params = NULL;
867 } 869 }
868 } 870 }
869 871
@@ -1002,6 +1004,8 @@ static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m)
1002 } 1004 }
1003 1005
1004 m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL); 1006 m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL);
1007 if (!m->hw_handler_name)
1008 return -EINVAL;
1005 1009
1006 if (hw_argc > 1) { 1010 if (hw_argc > 1) {
1007 char *p; 1011 char *p;
@@ -1362,7 +1366,7 @@ static int switch_pg_num(struct multipath *m, const char *pgstr)
1362 char dummy; 1366 char dummy;
1363 1367
1364 if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum || 1368 if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum ||
1365 (pgnum > m->nr_priority_groups)) { 1369 !m->nr_priority_groups || (pgnum > m->nr_priority_groups)) {
1366 DMWARN("invalid PG number supplied to switch_pg_num"); 1370 DMWARN("invalid PG number supplied to switch_pg_num");
1367 return -EINVAL; 1371 return -EINVAL;
1368 } 1372 }
@@ -1394,7 +1398,7 @@ static int bypass_pg_num(struct multipath *m, const char *pgstr, bool bypassed)
1394 char dummy; 1398 char dummy;
1395 1399
1396 if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum || 1400 if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum ||
1397 (pgnum > m->nr_priority_groups)) { 1401 !m->nr_priority_groups || (pgnum > m->nr_priority_groups)) {
1398 DMWARN("invalid PG number supplied to bypass_pg"); 1402 DMWARN("invalid PG number supplied to bypass_pg");
1399 return -EINVAL; 1403 return -EINVAL;
1400 } 1404 }
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 953159d9a825..b8f978e551d7 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -160,7 +160,6 @@ struct raid_dev {
160 CTR_FLAG_DAEMON_SLEEP | \ 160 CTR_FLAG_DAEMON_SLEEP | \
161 CTR_FLAG_MIN_RECOVERY_RATE | \ 161 CTR_FLAG_MIN_RECOVERY_RATE | \
162 CTR_FLAG_MAX_RECOVERY_RATE | \ 162 CTR_FLAG_MAX_RECOVERY_RATE | \
163 CTR_FLAG_MAX_WRITE_BEHIND | \
164 CTR_FLAG_STRIPE_CACHE | \ 163 CTR_FLAG_STRIPE_CACHE | \
165 CTR_FLAG_REGION_SIZE | \ 164 CTR_FLAG_REGION_SIZE | \
166 CTR_FLAG_DELTA_DISKS | \ 165 CTR_FLAG_DELTA_DISKS | \
@@ -171,7 +170,6 @@ struct raid_dev {
171 CTR_FLAG_DAEMON_SLEEP | \ 170 CTR_FLAG_DAEMON_SLEEP | \
172 CTR_FLAG_MIN_RECOVERY_RATE | \ 171 CTR_FLAG_MIN_RECOVERY_RATE | \
173 CTR_FLAG_MAX_RECOVERY_RATE | \ 172 CTR_FLAG_MAX_RECOVERY_RATE | \
174 CTR_FLAG_MAX_WRITE_BEHIND | \
175 CTR_FLAG_STRIPE_CACHE | \ 173 CTR_FLAG_STRIPE_CACHE | \
176 CTR_FLAG_REGION_SIZE | \ 174 CTR_FLAG_REGION_SIZE | \
177 CTR_FLAG_DELTA_DISKS | \ 175 CTR_FLAG_DELTA_DISKS | \
@@ -2050,16 +2048,17 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
2050 2048
2051 mddev->reshape_position = MaxSector; 2049 mddev->reshape_position = MaxSector;
2052 2050
2051 mddev->raid_disks = le32_to_cpu(sb->num_devices);
2052 mddev->level = le32_to_cpu(sb->level);
2053 mddev->layout = le32_to_cpu(sb->layout);
2054 mddev->chunk_sectors = le32_to_cpu(sb->stripe_sectors);
2055
2053 /* 2056 /*
2054 * Reshaping is supported, e.g. reshape_position is valid 2057 * Reshaping is supported, e.g. reshape_position is valid
2055 * in superblock and superblock content is authoritative. 2058 * in superblock and superblock content is authoritative.
2056 */ 2059 */
2057 if (le32_to_cpu(sb->compat_features) & FEATURE_FLAG_SUPPORTS_V190) { 2060 if (le32_to_cpu(sb->compat_features) & FEATURE_FLAG_SUPPORTS_V190) {
2058 /* Superblock is authoritative wrt given raid set layout! */ 2061 /* Superblock is authoritative wrt given raid set layout! */
2059 mddev->raid_disks = le32_to_cpu(sb->num_devices);
2060 mddev->level = le32_to_cpu(sb->level);
2061 mddev->layout = le32_to_cpu(sb->layout);
2062 mddev->chunk_sectors = le32_to_cpu(sb->stripe_sectors);
2063 mddev->new_level = le32_to_cpu(sb->new_level); 2062 mddev->new_level = le32_to_cpu(sb->new_level);
2064 mddev->new_layout = le32_to_cpu(sb->new_layout); 2063 mddev->new_layout = le32_to_cpu(sb->new_layout);
2065 mddev->new_chunk_sectors = le32_to_cpu(sb->new_stripe_sectors); 2064 mddev->new_chunk_sectors = le32_to_cpu(sb->new_stripe_sectors);
@@ -2087,38 +2086,44 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
2087 /* 2086 /*
2088 * No takeover/reshaping, because we don't have the extended v1.9.0 metadata 2087 * No takeover/reshaping, because we don't have the extended v1.9.0 metadata
2089 */ 2088 */
2090 if (le32_to_cpu(sb->level) != mddev->new_level) { 2089 struct raid_type *rt_cur = get_raid_type_by_ll(mddev->level, mddev->layout);
2091 DMERR("Reshaping/takeover raid sets not yet supported. (raid level/stripes/size change)"); 2090 struct raid_type *rt_new = get_raid_type_by_ll(mddev->new_level, mddev->new_layout);
2092 return -EINVAL;
2093 }
2094 if (le32_to_cpu(sb->layout) != mddev->new_layout) {
2095 DMERR("Reshaping raid sets not yet supported. (raid layout change)");
2096 DMERR(" 0x%X vs 0x%X", le32_to_cpu(sb->layout), mddev->layout);
2097 DMERR(" Old layout: %s w/ %d copies",
2098 raid10_md_layout_to_format(le32_to_cpu(sb->layout)),
2099 raid10_md_layout_to_copies(le32_to_cpu(sb->layout)));
2100 DMERR(" New layout: %s w/ %d copies",
2101 raid10_md_layout_to_format(mddev->layout),
2102 raid10_md_layout_to_copies(mddev->layout));
2103 return -EINVAL;
2104 }
2105 if (le32_to_cpu(sb->stripe_sectors) != mddev->new_chunk_sectors) {
2106 DMERR("Reshaping raid sets not yet supported. (stripe sectors change)");
2107 return -EINVAL;
2108 }
2109 2091
2110 /* We can only change the number of devices in raid1 with old (i.e. pre 1.0.7) metadata */ 2092 if (rs_takeover_requested(rs)) {
2111 if (!rt_is_raid1(rs->raid_type) && 2093 if (rt_cur && rt_new)
2112 (le32_to_cpu(sb->num_devices) != mddev->raid_disks)) { 2094 DMERR("Takeover raid sets from %s to %s not yet supported by metadata. (raid level change)",
2113 DMERR("Reshaping raid sets not yet supported. (device count change from %u to %u)", 2095 rt_cur->name, rt_new->name);
2114 sb->num_devices, mddev->raid_disks); 2096 else
2097 DMERR("Takeover raid sets not yet supported by metadata. (raid level change)");
2098 return -EINVAL;
2099 } else if (rs_reshape_requested(rs)) {
2100 DMERR("Reshaping raid sets not yet supported by metadata. (raid layout change keeping level)");
2101 if (mddev->layout != mddev->new_layout) {
2102 if (rt_cur && rt_new)
2103 DMERR(" current layout %s vs new layout %s",
2104 rt_cur->name, rt_new->name);
2105 else
2106 DMERR(" current layout 0x%X vs new layout 0x%X",
2107 le32_to_cpu(sb->layout), mddev->new_layout);
2108 }
2109 if (mddev->chunk_sectors != mddev->new_chunk_sectors)
2110 DMERR(" current stripe sectors %u vs new stripe sectors %u",
2111 mddev->chunk_sectors, mddev->new_chunk_sectors);
2112 if (rs->delta_disks)
2113 DMERR(" current %u disks vs new %u disks",
2114 mddev->raid_disks, mddev->raid_disks + rs->delta_disks);
2115 if (rs_is_raid10(rs)) {
2116 DMERR(" Old layout: %s w/ %u copies",
2117 raid10_md_layout_to_format(mddev->layout),
2118 raid10_md_layout_to_copies(mddev->layout));
2119 DMERR(" New layout: %s w/ %u copies",
2120 raid10_md_layout_to_format(mddev->new_layout),
2121 raid10_md_layout_to_copies(mddev->new_layout));
2122 }
2115 return -EINVAL; 2123 return -EINVAL;
2116 } 2124 }
2117 2125
2118 DMINFO("Discovered old metadata format; upgrading to extended metadata format"); 2126 DMINFO("Discovered old metadata format; upgrading to extended metadata format");
2119
2120 /* Table line is checked vs. authoritative superblock */
2121 rs_set_new(rs);
2122 } 2127 }
2123 2128
2124 if (!test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags)) 2129 if (!test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags))
@@ -2211,7 +2216,7 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
2211 continue; 2216 continue;
2212 2217
2213 if (role != r->raid_disk) { 2218 if (role != r->raid_disk) {
2214 if (__is_raid10_near(mddev->layout)) { 2219 if (rs_is_raid10(rs) && __is_raid10_near(mddev->layout)) {
2215 if (mddev->raid_disks % __raid10_near_copies(mddev->layout) || 2220 if (mddev->raid_disks % __raid10_near_copies(mddev->layout) ||
2216 rs->raid_disks % rs->raid10_copies) { 2221 rs->raid_disks % rs->raid10_copies) {
2217 rs->ti->error = 2222 rs->ti->error =
@@ -2994,6 +2999,9 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
2994 } 2999 }
2995 } 3000 }
2996 3001
3002 /* Disable/enable discard support on raid set. */
3003 configure_discard_support(rs);
3004
2997 mddev_unlock(&rs->md); 3005 mddev_unlock(&rs->md);
2998 return 0; 3006 return 0;
2999 3007
@@ -3580,12 +3588,6 @@ static int raid_preresume(struct dm_target *ti)
3580 if (test_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags)) 3588 if (test_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags))
3581 rs_update_sbs(rs); 3589 rs_update_sbs(rs);
3582 3590
3583 /*
3584 * Disable/enable discard support on raid set after any
3585 * conversion, because devices can have been added
3586 */
3587 configure_discard_support(rs);
3588
3589 /* Load the bitmap from disk unless raid0 */ 3591 /* Load the bitmap from disk unless raid0 */
3590 r = __load_dirty_region_bitmap(rs); 3592 r = __load_dirty_region_bitmap(rs);
3591 if (r) 3593 if (r)
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index b2a9e2d161e4..9d7275fb541a 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -23,11 +23,7 @@ static unsigned dm_mq_queue_depth = DM_MQ_QUEUE_DEPTH;
23#define RESERVED_REQUEST_BASED_IOS 256 23#define RESERVED_REQUEST_BASED_IOS 256
24static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS; 24static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
25 25
26#ifdef CONFIG_DM_MQ_DEFAULT 26static bool use_blk_mq = IS_ENABLED(CONFIG_DM_MQ_DEFAULT);
27static bool use_blk_mq = true;
28#else
29static bool use_blk_mq = false;
30#endif
31 27
32bool dm_use_blk_mq_default(void) 28bool dm_use_blk_mq_default(void)
33{ 29{
@@ -210,6 +206,9 @@ static void rq_end_stats(struct mapped_device *md, struct request *orig)
210 */ 206 */
211static void rq_completed(struct mapped_device *md, int rw, bool run_queue) 207static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
212{ 208{
209 struct request_queue *q = md->queue;
210 unsigned long flags;
211
213 atomic_dec(&md->pending[rw]); 212 atomic_dec(&md->pending[rw]);
214 213
215 /* nudge anyone waiting on suspend queue */ 214 /* nudge anyone waiting on suspend queue */
@@ -222,8 +221,11 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
222 * back into ->request_fn() could deadlock attempting to grab the 221 * back into ->request_fn() could deadlock attempting to grab the
223 * queue lock again. 222 * queue lock again.
224 */ 223 */
225 if (!md->queue->mq_ops && run_queue) 224 if (!q->mq_ops && run_queue) {
226 blk_run_queue_async(md->queue); 225 spin_lock_irqsave(q->queue_lock, flags);
226 blk_run_queue_async(q);
227 spin_unlock_irqrestore(q->queue_lock, flags);
228 }
227 229
228 /* 230 /*
229 * dm_put() must be at the end of this function. See the comment above 231 * dm_put() must be at the end of this function. See the comment above
@@ -798,7 +800,7 @@ static void dm_old_request_fn(struct request_queue *q)
798 pos = blk_rq_pos(rq); 800 pos = blk_rq_pos(rq);
799 801
800 if ((dm_old_request_peeked_before_merge_deadline(md) && 802 if ((dm_old_request_peeked_before_merge_deadline(md) &&
801 md_in_flight(md) && rq->bio && rq->bio->bi_vcnt == 1 && 803 md_in_flight(md) && rq->bio && !bio_multiple_segments(rq->bio) &&
802 md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq)) || 804 md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq)) ||
803 (ti->type->busy && ti->type->busy(ti))) { 805 (ti->type->busy && ti->type->busy(ti))) {
804 blk_delay_queue(q, 10); 806 blk_delay_queue(q, 10);
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index c4b53b332607..0a427de23ed2 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -871,7 +871,7 @@ static int dm_table_determine_type(struct dm_table *t)
871{ 871{
872 unsigned i; 872 unsigned i;
873 unsigned bio_based = 0, request_based = 0, hybrid = 0; 873 unsigned bio_based = 0, request_based = 0, hybrid = 0;
874 bool verify_blk_mq = false; 874 unsigned sq_count = 0, mq_count = 0;
875 struct dm_target *tgt; 875 struct dm_target *tgt;
876 struct dm_dev_internal *dd; 876 struct dm_dev_internal *dd;
877 struct list_head *devices = dm_table_get_devices(t); 877 struct list_head *devices = dm_table_get_devices(t);
@@ -924,12 +924,6 @@ static int dm_table_determine_type(struct dm_table *t)
924 924
925 BUG_ON(!request_based); /* No targets in this table */ 925 BUG_ON(!request_based); /* No targets in this table */
926 926
927 if (list_empty(devices) && __table_type_request_based(live_md_type)) {
928 /* inherit live MD type */
929 t->type = live_md_type;
930 return 0;
931 }
932
933 /* 927 /*
934 * The only way to establish DM_TYPE_MQ_REQUEST_BASED is by 928 * The only way to establish DM_TYPE_MQ_REQUEST_BASED is by
935 * having a compatible target use dm_table_set_type. 929 * having a compatible target use dm_table_set_type.
@@ -948,6 +942,19 @@ verify_rq_based:
948 return -EINVAL; 942 return -EINVAL;
949 } 943 }
950 944
945 if (list_empty(devices)) {
946 int srcu_idx;
947 struct dm_table *live_table = dm_get_live_table(t->md, &srcu_idx);
948
949 /* inherit live table's type and all_blk_mq */
950 if (live_table) {
951 t->type = live_table->type;
952 t->all_blk_mq = live_table->all_blk_mq;
953 }
954 dm_put_live_table(t->md, srcu_idx);
955 return 0;
956 }
957
951 /* Non-request-stackable devices can't be used for request-based dm */ 958 /* Non-request-stackable devices can't be used for request-based dm */
952 list_for_each_entry(dd, devices, list) { 959 list_for_each_entry(dd, devices, list) {
953 struct request_queue *q = bdev_get_queue(dd->dm_dev->bdev); 960 struct request_queue *q = bdev_get_queue(dd->dm_dev->bdev);
@@ -959,19 +966,19 @@ verify_rq_based:
959 } 966 }
960 967
961 if (q->mq_ops) 968 if (q->mq_ops)
962 verify_blk_mq = true; 969 mq_count++;
970 else
971 sq_count++;
963 } 972 }
973 if (sq_count && mq_count) {
974 DMERR("table load rejected: not all devices are blk-mq request-stackable");
975 return -EINVAL;
976 }
977 t->all_blk_mq = mq_count > 0;
964 978
965 if (verify_blk_mq) { 979 if (t->type == DM_TYPE_MQ_REQUEST_BASED && !t->all_blk_mq) {
966 /* verify _all_ devices in the table are blk-mq devices */ 980 DMERR("table load rejected: all devices are not blk-mq request-stackable");
967 list_for_each_entry(dd, devices, list) 981 return -EINVAL;
968 if (!bdev_get_queue(dd->dm_dev->bdev)->mq_ops) {
969 DMERR("table load rejected: not all devices"
970 " are blk-mq request-stackable");
971 return -EINVAL;
972 }
973
974 t->all_blk_mq = true;
975 } 982 }
976 983
977 return 0; 984 return 0;
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 0aba34a7b3b3..7335d8a3fc47 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -868,7 +868,7 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
868 868
869 r = dm_get_device(ti, argv[2], FMODE_READ, &v->hash_dev); 869 r = dm_get_device(ti, argv[2], FMODE_READ, &v->hash_dev);
870 if (r) { 870 if (r) {
871 ti->error = "Data device lookup failed"; 871 ti->error = "Hash device lookup failed";
872 goto bad; 872 goto bad;
873 } 873 }
874 874
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index ffa97b742a68..3086da5664f3 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1886,9 +1886,7 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
1886 set_bit(DMF_FREEING, &md->flags); 1886 set_bit(DMF_FREEING, &md->flags);
1887 spin_unlock(&_minor_lock); 1887 spin_unlock(&_minor_lock);
1888 1888
1889 spin_lock_irq(q->queue_lock); 1889 blk_set_queue_dying(q);
1890 queue_flag_set(QUEUE_FLAG_DYING, q);
1891 spin_unlock_irq(q->queue_lock);
1892 1890
1893 if (dm_request_based(md) && md->kworker_task) 1891 if (dm_request_based(md) && md->kworker_task)
1894 kthread_flush_worker(&md->kworker); 1892 kthread_flush_worker(&md->kworker);
diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c
index e83047cbb2da..7938cd21fa4c 100644
--- a/drivers/md/persistent-data/dm-array.c
+++ b/drivers/md/persistent-data/dm-array.c
@@ -700,13 +700,11 @@ static int populate_ablock_with_values(struct dm_array_info *info, struct array_
700{ 700{
701 int r; 701 int r;
702 unsigned i; 702 unsigned i;
703 uint32_t nr_entries;
704 struct dm_btree_value_type *vt = &info->value_type; 703 struct dm_btree_value_type *vt = &info->value_type;
705 704
706 BUG_ON(le32_to_cpu(ab->nr_entries)); 705 BUG_ON(le32_to_cpu(ab->nr_entries));
707 BUG_ON(new_nr > le32_to_cpu(ab->max_entries)); 706 BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
708 707
709 nr_entries = le32_to_cpu(ab->nr_entries);
710 for (i = 0; i < new_nr; i++) { 708 for (i = 0; i < new_nr; i++) {
711 r = fn(base + i, element_at(info, ab, i), context); 709 r = fn(base + i, element_at(info, ab, i), context);
712 if (r) 710 if (r)
diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c
index 1e33dd51c21f..a6dde7cab458 100644
--- a/drivers/md/persistent-data/dm-block-manager.c
+++ b/drivers/md/persistent-data/dm-block-manager.c
@@ -18,6 +18,8 @@
18 18
19/*----------------------------------------------------------------*/ 19/*----------------------------------------------------------------*/
20 20
21#ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING
22
21/* 23/*
22 * This is a read/write semaphore with a couple of differences. 24 * This is a read/write semaphore with a couple of differences.
23 * 25 *
@@ -302,6 +304,18 @@ static void report_recursive_bug(dm_block_t b, int r)
302 (unsigned long long) b); 304 (unsigned long long) b);
303} 305}
304 306
307#else /* !CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING */
308
309#define bl_init(x) do { } while (0)
310#define bl_down_read(x) 0
311#define bl_down_read_nonblock(x) 0
312#define bl_up_read(x) do { } while (0)
313#define bl_down_write(x) 0
314#define bl_up_write(x) do { } while (0)
315#define report_recursive_bug(x, y) do { } while (0)
316
317#endif /* CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING */
318
305/*----------------------------------------------------------------*/ 319/*----------------------------------------------------------------*/
306 320
307/* 321/*
@@ -330,8 +344,11 @@ EXPORT_SYMBOL_GPL(dm_block_data);
330 344
331struct buffer_aux { 345struct buffer_aux {
332 struct dm_block_validator *validator; 346 struct dm_block_validator *validator;
333 struct block_lock lock;
334 int write_locked; 347 int write_locked;
348
349#ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING
350 struct block_lock lock;
351#endif
335}; 352};
336 353
337static void dm_block_manager_alloc_callback(struct dm_buffer *buf) 354static void dm_block_manager_alloc_callback(struct dm_buffer *buf)
diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c
index 306d2e4502c4..4c28608a0c94 100644
--- a/drivers/md/persistent-data/dm-space-map-common.c
+++ b/drivers/md/persistent-data/dm-space-map-common.c
@@ -464,7 +464,8 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
464 ll->nr_allocated--; 464 ll->nr_allocated--;
465 le32_add_cpu(&ie_disk.nr_free, 1); 465 le32_add_cpu(&ie_disk.nr_free, 1);
466 ie_disk.none_free_before = cpu_to_le32(min(le32_to_cpu(ie_disk.none_free_before), bit)); 466 ie_disk.none_free_before = cpu_to_le32(min(le32_to_cpu(ie_disk.none_free_before), bit));
467 } 467 } else
468 *ev = SM_NONE;
468 469
469 return ll->save_ie(ll, index, &ie_disk); 470 return ll->save_ie(ll, index, &ie_disk);
470} 471}
@@ -547,7 +548,6 @@ static int metadata_ll_init_index(struct ll_disk *ll)
547 if (r < 0) 548 if (r < 0)
548 return r; 549 return r;
549 550
550 memcpy(dm_block_data(b), &ll->mi_le, sizeof(ll->mi_le));
551 ll->bitmap_root = dm_block_location(b); 551 ll->bitmap_root = dm_block_location(b);
552 552
553 dm_tm_unlock(ll->tm, b); 553 dm_tm_unlock(ll->tm, b);
diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c
index 7e44005595c1..20557e2c60c6 100644
--- a/drivers/md/persistent-data/dm-space-map-metadata.c
+++ b/drivers/md/persistent-data/dm-space-map-metadata.c
@@ -775,17 +775,15 @@ int dm_sm_metadata_create(struct dm_space_map *sm,
775 memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); 775 memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm));
776 776
777 r = sm_ll_new_metadata(&smm->ll, tm); 777 r = sm_ll_new_metadata(&smm->ll, tm);
778 if (!r) {
779 if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS)
780 nr_blocks = DM_SM_METADATA_MAX_BLOCKS;
781 r = sm_ll_extend(&smm->ll, nr_blocks);
782 }
783 memcpy(&smm->sm, &ops, sizeof(smm->sm));
778 if (r) 784 if (r)
779 return r; 785 return r;
780 786
781 if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS)
782 nr_blocks = DM_SM_METADATA_MAX_BLOCKS;
783 r = sm_ll_extend(&smm->ll, nr_blocks);
784 if (r)
785 return r;
786
787 memcpy(&smm->sm, &ops, sizeof(smm->sm));
788
789 /* 787 /*
790 * Now we need to update the newly created data structures with the 788 * Now we need to update the newly created data structures with the
791 * allocated blocks that they were built from. 789 * allocated blocks that they were built from.
diff --git a/include/uapi/linux/dm-log-userspace.h b/include/uapi/linux/dm-log-userspace.h
index 0fa0d9ef06a5..05e91e14c501 100644
--- a/include/uapi/linux/dm-log-userspace.h
+++ b/include/uapi/linux/dm-log-userspace.h
@@ -7,6 +7,7 @@
7#ifndef __DM_LOG_USERSPACE_H__ 7#ifndef __DM_LOG_USERSPACE_H__
8#define __DM_LOG_USERSPACE_H__ 8#define __DM_LOG_USERSPACE_H__
9 9
10#include <linux/types.h>
10#include <linux/dm-ioctl.h> /* For DM_UUID_LEN */ 11#include <linux/dm-ioctl.h> /* For DM_UUID_LEN */
11 12
12/* 13/*
@@ -147,12 +148,12 @@
147 148
148/* 149/*
149 * DM_ULOG_GET_REGION_SIZE corresponds to (found in dm-dirty-log.h): 150 * DM_ULOG_GET_REGION_SIZE corresponds to (found in dm-dirty-log.h):
150 * uint32_t (*get_region_size)(struct dm_dirty_log *log); 151 * __u32 (*get_region_size)(struct dm_dirty_log *log);
151 * 152 *
152 * Payload-to-userspace: 153 * Payload-to-userspace:
153 * None. 154 * None.
154 * Payload-to-kernel: 155 * Payload-to-kernel:
155 * uint64_t - contains the region size 156 * __u64 - contains the region size
156 * 157 *
157 * The region size is something that was determined at constructor time. 158 * The region size is something that was determined at constructor time.
158 * It is returned in the payload area and 'data_size' is set to 159 * It is returned in the payload area and 'data_size' is set to
@@ -168,11 +169,11 @@
168 * int (*is_clean)(struct dm_dirty_log *log, region_t region); 169 * int (*is_clean)(struct dm_dirty_log *log, region_t region);
169 * 170 *
170 * Payload-to-userspace: 171 * Payload-to-userspace:
171 * uint64_t - the region to get clean status on 172 * __u64 - the region to get clean status on
172 * Payload-to-kernel: 173 * Payload-to-kernel:
173 * int64_t - 1 if clean, 0 otherwise 174 * __s64 - 1 if clean, 0 otherwise
174 * 175 *
175 * Payload is sizeof(uint64_t) and contains the region for which the clean 176 * Payload is sizeof(__u64) and contains the region for which the clean
176 * status is being made. 177 * status is being made.
177 * 178 *
178 * When the request has been processed, user-space must return the 179 * When the request has been processed, user-space must return the
@@ -187,9 +188,9 @@
187 * int can_block); 188 * int can_block);
188 * 189 *
189 * Payload-to-userspace: 190 * Payload-to-userspace:
190 * uint64_t - the region to get sync status on 191 * __u64 - the region to get sync status on
191 * Payload-to-kernel: 192 * Payload-to-kernel:
192 * int64_t - 1 if in-sync, 0 otherwise 193 * __s64 - 1 if in-sync, 0 otherwise
193 * 194 *
194 * Exactly the same as 'is_clean' above, except this time asking "has the 195 * Exactly the same as 'is_clean' above, except this time asking "has the
195 * region been recovered?" vs. "is the region not being modified?" 196 * region been recovered?" vs. "is the region not being modified?"
@@ -203,7 +204,7 @@
203 * Payload-to-userspace: 204 * Payload-to-userspace:
204 * If the 'integrated_flush' directive is present in the constructor 205 * If the 'integrated_flush' directive is present in the constructor
205 * table, the payload is as same as DM_ULOG_MARK_REGION: 206 * table, the payload is as same as DM_ULOG_MARK_REGION:
206 * uint64_t [] - region(s) to mark 207 * __u64 [] - region(s) to mark
207 * else 208 * else
208 * None 209 * None
209 * Payload-to-kernel: 210 * Payload-to-kernel:
@@ -225,13 +226,13 @@
225 * void (*mark_region)(struct dm_dirty_log *log, region_t region); 226 * void (*mark_region)(struct dm_dirty_log *log, region_t region);
226 * 227 *
227 * Payload-to-userspace: 228 * Payload-to-userspace:
228 * uint64_t [] - region(s) to mark 229 * __u64 [] - region(s) to mark
229 * Payload-to-kernel: 230 * Payload-to-kernel:
230 * None. 231 * None.
231 * 232 *
232 * Incoming payload contains the one or more regions to mark dirty. 233 * Incoming payload contains the one or more regions to mark dirty.
233 * The number of regions contained in the payload can be determined from 234 * The number of regions contained in the payload can be determined from
234 * 'data_size/sizeof(uint64_t)'. 235 * 'data_size/sizeof(__u64)'.
235 * 236 *
236 * When the request has been processed, user-space must return the 237 * When the request has been processed, user-space must return the
237 * dm_ulog_request to the kernel - setting the 'error' field and clearing 238 * dm_ulog_request to the kernel - setting the 'error' field and clearing
@@ -244,13 +245,13 @@
244 * void (*clear_region)(struct dm_dirty_log *log, region_t region); 245 * void (*clear_region)(struct dm_dirty_log *log, region_t region);
245 * 246 *
246 * Payload-to-userspace: 247 * Payload-to-userspace:
247 * uint64_t [] - region(s) to clear 248 * __u64 [] - region(s) to clear
248 * Payload-to-kernel: 249 * Payload-to-kernel:
249 * None. 250 * None.
250 * 251 *
251 * Incoming payload contains the one or more regions to mark clean. 252 * Incoming payload contains the one or more regions to mark clean.
252 * The number of regions contained in the payload can be determined from 253 * The number of regions contained in the payload can be determined from
253 * 'data_size/sizeof(uint64_t)'. 254 * 'data_size/sizeof(__u64)'.
254 * 255 *
255 * When the request has been processed, user-space must return the 256 * When the request has been processed, user-space must return the
256 * dm_ulog_request to the kernel - setting the 'error' field and clearing 257 * dm_ulog_request to the kernel - setting the 'error' field and clearing
@@ -266,8 +267,8 @@
266 * None. 267 * None.
267 * Payload-to-kernel: 268 * Payload-to-kernel:
268 * { 269 * {
269 * int64_t i; -- 1 if recovery necessary, 0 otherwise 270 * __s64 i; -- 1 if recovery necessary, 0 otherwise
270 * uint64_t r; -- The region to recover if i=1 271 * __u64 r; -- The region to recover if i=1
271 * } 272 * }
272 * 'data_size' should be set appropriately. 273 * 'data_size' should be set appropriately.
273 * 274 *
@@ -283,8 +284,8 @@
283 * 284 *
284 * Payload-to-userspace: 285 * Payload-to-userspace:
285 * { 286 * {
286 * uint64_t - region to set sync state on 287 * __u64 - region to set sync state on
287 * int64_t - 0 if not-in-sync, 1 if in-sync 288 * __s64 - 0 if not-in-sync, 1 if in-sync
288 * } 289 * }
289 * Payload-to-kernel: 290 * Payload-to-kernel:
290 * None. 291 * None.
@@ -302,7 +303,7 @@
302 * Payload-to-userspace: 303 * Payload-to-userspace:
303 * None. 304 * None.
304 * Payload-to-kernel: 305 * Payload-to-kernel:
305 * uint64_t - the number of in-sync regions 306 * __u64 - the number of in-sync regions
306 * 307 *
307 * No incoming payload. Kernel-bound payload contains the number of 308 * No incoming payload. Kernel-bound payload contains the number of
308 * regions that are in-sync (in a size_t). 309 * regions that are in-sync (in a size_t).
@@ -350,11 +351,11 @@
350 * int (*is_remote_recovering)(struct dm_dirty_log *log, region_t region); 351 * int (*is_remote_recovering)(struct dm_dirty_log *log, region_t region);
351 * 352 *
352 * Payload-to-userspace: 353 * Payload-to-userspace:
353 * uint64_t - region to determine recovery status on 354 * __u64 - region to determine recovery status on
354 * Payload-to-kernel: 355 * Payload-to-kernel:
355 * { 356 * {
356 * int64_t is_recovering; -- 0 if no, 1 if yes 357 * __s64 is_recovering; -- 0 if no, 1 if yes
357 * uint64_t in_sync_hint; -- lowest region still needing resync 358 * __u64 in_sync_hint; -- lowest region still needing resync
358 * } 359 * }
359 * 360 *
360 * When the request has been processed, user-space must return the 361 * When the request has been processed, user-space must return the
@@ -413,16 +414,16 @@ struct dm_ulog_request {
413 * differentiate between logs that are being swapped and have the 414 * differentiate between logs that are being swapped and have the
414 * same 'uuid'. (Think "live" and "inactive" device-mapper tables.) 415 * same 'uuid'. (Think "live" and "inactive" device-mapper tables.)
415 */ 416 */
416 uint64_t luid; 417 __u64 luid;
417 char uuid[DM_UUID_LEN]; 418 char uuid[DM_UUID_LEN];
418 char padding[3]; /* Padding because DM_UUID_LEN = 129 */ 419 char padding[3]; /* Padding because DM_UUID_LEN = 129 */
419 420
420 uint32_t version; /* See DM_ULOG_REQUEST_VERSION */ 421 __u32 version; /* See DM_ULOG_REQUEST_VERSION */
421 int32_t error; /* Used to report back processing errors */ 422 __s32 error; /* Used to report back processing errors */
422 423
423 uint32_t seq; /* Sequence number for request */ 424 __u32 seq; /* Sequence number for request */
424 uint32_t request_type; /* DM_ULOG_* defined above */ 425 __u32 request_type; /* DM_ULOG_* defined above */
425 uint32_t data_size; /* How much data (not including this struct) */ 426 __u32 data_size; /* How much data (not including this struct) */
426 427
427 char data[0]; 428 char data[0];
428}; 429};