aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-02-21 16:28:45 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-21 16:28:45 -0500
commita911dcdba190ddf77e9199b9917156f879f42d4b (patch)
treec94d84a9fa48520cd993670a0b4f434c4dcb48db
parente20d3ef5406d3a28b76a63905b2a6bd3fb95c377 (diff)
parent22aa66a3ee5b61e0f4a0bfeabcaa567861109ec3 (diff)
Merge tag 'dm-3.20-changes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull more device mapper changes from Mike Snitzer: - Significant dm-crypt CPU scalability performance improvements thanks to changes that enable effective use of an unbound workqueue across all available CPUs. A large battery of tests were performed to validate these changes, summary of results is available here: https://www.redhat.com/archives/dm-devel/2015-February/msg00106.html - A few additional stable fixes (to DM core, dm-snapshot and dm-mirror) and a small fix to the dm-space-map-disk. * tag 'dm-3.20-changes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: dm snapshot: fix a possible invalid memory access on unload dm: fix a race condition in dm_get_md dm crypt: sort writes dm crypt: add 'submit_from_crypt_cpus' option dm crypt: offload writes to thread dm crypt: remove unused io_pool and _crypt_io_pool dm crypt: avoid deadlock in mempools dm crypt: don't allocate pages for a partial request dm crypt: use unbound workqueue for request processing dm io: reject unsupported DISCARD requests with EOPNOTSUPP dm mirror: do not degrade the mirror on discard error dm space map disk: fix sm_disk_count_is_more_than_one()
-rw-r--r--Documentation/device-mapper/dm-crypt.txt15
-rw-r--r--drivers/md/dm-crypt.c392
-rw-r--r--drivers/md/dm-io.c6
-rw-r--r--drivers/md/dm-raid1.c9
-rw-r--r--drivers/md/dm-snap.c4
-rw-r--r--drivers/md/dm.c27
-rw-r--r--drivers/md/persistent-data/dm-space-map-disk.c4
7 files changed, 269 insertions, 188 deletions
diff --git a/Documentation/device-mapper/dm-crypt.txt b/Documentation/device-mapper/dm-crypt.txt
index c81839b52c4d..ad697781f9ac 100644
--- a/Documentation/device-mapper/dm-crypt.txt
+++ b/Documentation/device-mapper/dm-crypt.txt
@@ -51,7 +51,7 @@ Parameters: <cipher> <key> <iv_offset> <device path> \
51 Otherwise #opt_params is the number of following arguments. 51 Otherwise #opt_params is the number of following arguments.
52 52
53 Example of optional parameters section: 53 Example of optional parameters section:
54 1 allow_discards 54 3 allow_discards same_cpu_crypt submit_from_crypt_cpus
55 55
56allow_discards 56allow_discards
57 Block discard requests (a.k.a. TRIM) are passed through the crypt device. 57 Block discard requests (a.k.a. TRIM) are passed through the crypt device.
@@ -63,6 +63,19 @@ allow_discards
63 used space etc.) if the discarded blocks can be located easily on the 63 used space etc.) if the discarded blocks can be located easily on the
64 device later. 64 device later.
65 65
66same_cpu_crypt
67 Perform encryption using the same cpu that IO was submitted on.
68 The default is to use an unbound workqueue so that encryption work
69 is automatically balanced between available CPUs.
70
71submit_from_crypt_cpus
72 Disable offloading writes to a separate thread after encryption.
73 There are some situations where offloading write bios from the
74 encryption threads to a single thread degrades performance
75 significantly. The default is to offload write bios to the same
76 thread because it benefits CFQ to have writes submitted using the
77 same context.
78
66Example scripts 79Example scripts
67=============== 80===============
68LUKS (Linux Unified Key Setup) is now the preferred way to set up disk 81LUKS (Linux Unified Key Setup) is now the preferred way to set up disk
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 08981be7baa1..713a96237a80 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -18,9 +18,11 @@
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/crypto.h> 19#include <linux/crypto.h>
20#include <linux/workqueue.h> 20#include <linux/workqueue.h>
21#include <linux/kthread.h>
21#include <linux/backing-dev.h> 22#include <linux/backing-dev.h>
22#include <linux/atomic.h> 23#include <linux/atomic.h>
23#include <linux/scatterlist.h> 24#include <linux/scatterlist.h>
25#include <linux/rbtree.h>
24#include <asm/page.h> 26#include <asm/page.h>
25#include <asm/unaligned.h> 27#include <asm/unaligned.h>
26#include <crypto/hash.h> 28#include <crypto/hash.h>
@@ -58,7 +60,8 @@ struct dm_crypt_io {
58 atomic_t io_pending; 60 atomic_t io_pending;
59 int error; 61 int error;
60 sector_t sector; 62 sector_t sector;
61 struct dm_crypt_io *base_io; 63
64 struct rb_node rb_node;
62} CRYPTO_MINALIGN_ATTR; 65} CRYPTO_MINALIGN_ATTR;
63 66
64struct dm_crypt_request { 67struct dm_crypt_request {
@@ -108,7 +111,8 @@ struct iv_tcw_private {
108 * Crypt: maps a linear range of a block device 111 * Crypt: maps a linear range of a block device
109 * and encrypts / decrypts at the same time. 112 * and encrypts / decrypts at the same time.
110 */ 113 */
111enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID }; 114enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID,
115 DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD };
112 116
113/* 117/*
114 * The fields in here must be read only after initialization. 118 * The fields in here must be read only after initialization.
@@ -121,14 +125,18 @@ struct crypt_config {
121 * pool for per bio private data, crypto requests and 125 * pool for per bio private data, crypto requests and
122 * encryption requeusts/buffer pages 126 * encryption requeusts/buffer pages
123 */ 127 */
124 mempool_t *io_pool;
125 mempool_t *req_pool; 128 mempool_t *req_pool;
126 mempool_t *page_pool; 129 mempool_t *page_pool;
127 struct bio_set *bs; 130 struct bio_set *bs;
131 struct mutex bio_alloc_lock;
128 132
129 struct workqueue_struct *io_queue; 133 struct workqueue_struct *io_queue;
130 struct workqueue_struct *crypt_queue; 134 struct workqueue_struct *crypt_queue;
131 135
136 struct task_struct *write_thread;
137 wait_queue_head_t write_thread_wait;
138 struct rb_root write_tree;
139
132 char *cipher; 140 char *cipher;
133 char *cipher_string; 141 char *cipher_string;
134 142
@@ -172,9 +180,6 @@ struct crypt_config {
172}; 180};
173 181
174#define MIN_IOS 16 182#define MIN_IOS 16
175#define MIN_POOL_PAGES 32
176
177static struct kmem_cache *_crypt_io_pool;
178 183
179static void clone_init(struct dm_crypt_io *, struct bio *); 184static void clone_init(struct dm_crypt_io *, struct bio *);
180static void kcryptd_queue_crypt(struct dm_crypt_io *io); 185static void kcryptd_queue_crypt(struct dm_crypt_io *io);
@@ -946,57 +951,70 @@ static int crypt_convert(struct crypt_config *cc,
946 return 0; 951 return 0;
947} 952}
948 953
954static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone);
955
949/* 956/*
950 * Generate a new unfragmented bio with the given size 957 * Generate a new unfragmented bio with the given size
951 * This should never violate the device limitations 958 * This should never violate the device limitations
952 * May return a smaller bio when running out of pages, indicated by 959 *
953 * *out_of_pages set to 1. 960 * This function may be called concurrently. If we allocate from the mempool
961 * concurrently, there is a possibility of deadlock. For example, if we have
962 * mempool of 256 pages, two processes, each wanting 256, pages allocate from
963 * the mempool concurrently, it may deadlock in a situation where both processes
964 * have allocated 128 pages and the mempool is exhausted.
965 *
966 * In order to avoid this scenario we allocate the pages under a mutex.
967 *
968 * In order to not degrade performance with excessive locking, we try
969 * non-blocking allocations without a mutex first but on failure we fallback
970 * to blocking allocations with a mutex.
954 */ 971 */
955static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size, 972static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
956 unsigned *out_of_pages)
957{ 973{
958 struct crypt_config *cc = io->cc; 974 struct crypt_config *cc = io->cc;
959 struct bio *clone; 975 struct bio *clone;
960 unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; 976 unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
961 gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM; 977 gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM;
962 unsigned i, len; 978 unsigned i, len, remaining_size;
963 struct page *page; 979 struct page *page;
980 struct bio_vec *bvec;
981
982retry:
983 if (unlikely(gfp_mask & __GFP_WAIT))
984 mutex_lock(&cc->bio_alloc_lock);
964 985
965 clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs); 986 clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs);
966 if (!clone) 987 if (!clone)
967 return NULL; 988 goto return_clone;
968 989
969 clone_init(io, clone); 990 clone_init(io, clone);
970 *out_of_pages = 0; 991
992 remaining_size = size;
971 993
972 for (i = 0; i < nr_iovecs; i++) { 994 for (i = 0; i < nr_iovecs; i++) {
973 page = mempool_alloc(cc->page_pool, gfp_mask); 995 page = mempool_alloc(cc->page_pool, gfp_mask);
974 if (!page) { 996 if (!page) {
975 *out_of_pages = 1; 997 crypt_free_buffer_pages(cc, clone);
976 break; 998 bio_put(clone);
999 gfp_mask |= __GFP_WAIT;
1000 goto retry;
977 } 1001 }
978 1002
979 /* 1003 len = (remaining_size > PAGE_SIZE) ? PAGE_SIZE : remaining_size;
980 * If additional pages cannot be allocated without waiting,
981 * return a partially-allocated bio. The caller will then try
982 * to allocate more bios while submitting this partial bio.
983 */
984 gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT;
985 1004
986 len = (size > PAGE_SIZE) ? PAGE_SIZE : size; 1005 bvec = &clone->bi_io_vec[clone->bi_vcnt++];
1006 bvec->bv_page = page;
1007 bvec->bv_len = len;
1008 bvec->bv_offset = 0;
987 1009
988 if (!bio_add_page(clone, page, len, 0)) { 1010 clone->bi_iter.bi_size += len;
989 mempool_free(page, cc->page_pool);
990 break;
991 }
992 1011
993 size -= len; 1012 remaining_size -= len;
994 } 1013 }
995 1014
996 if (!clone->bi_iter.bi_size) { 1015return_clone:
997 bio_put(clone); 1016 if (unlikely(gfp_mask & __GFP_WAIT))
998 return NULL; 1017 mutex_unlock(&cc->bio_alloc_lock);
999 }
1000 1018
1001 return clone; 1019 return clone;
1002} 1020}
@@ -1020,7 +1038,6 @@ static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc,
1020 io->base_bio = bio; 1038 io->base_bio = bio;
1021 io->sector = sector; 1039 io->sector = sector;
1022 io->error = 0; 1040 io->error = 0;
1023 io->base_io = NULL;
1024 io->ctx.req = NULL; 1041 io->ctx.req = NULL;
1025 atomic_set(&io->io_pending, 0); 1042 atomic_set(&io->io_pending, 0);
1026} 1043}
@@ -1033,13 +1050,11 @@ static void crypt_inc_pending(struct dm_crypt_io *io)
1033/* 1050/*
1034 * One of the bios was finished. Check for completion of 1051 * One of the bios was finished. Check for completion of
1035 * the whole request and correctly clean up the buffer. 1052 * the whole request and correctly clean up the buffer.
1036 * If base_io is set, wait for the last fragment to complete.
1037 */ 1053 */
1038static void crypt_dec_pending(struct dm_crypt_io *io) 1054static void crypt_dec_pending(struct dm_crypt_io *io)
1039{ 1055{
1040 struct crypt_config *cc = io->cc; 1056 struct crypt_config *cc = io->cc;
1041 struct bio *base_bio = io->base_bio; 1057 struct bio *base_bio = io->base_bio;
1042 struct dm_crypt_io *base_io = io->base_io;
1043 int error = io->error; 1058 int error = io->error;
1044 1059
1045 if (!atomic_dec_and_test(&io->io_pending)) 1060 if (!atomic_dec_and_test(&io->io_pending))
@@ -1047,16 +1062,8 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
1047 1062
1048 if (io->ctx.req) 1063 if (io->ctx.req)
1049 crypt_free_req(cc, io->ctx.req, base_bio); 1064 crypt_free_req(cc, io->ctx.req, base_bio);
1050 if (io != dm_per_bio_data(base_bio, cc->per_bio_data_size)) 1065
1051 mempool_free(io, cc->io_pool); 1066 bio_endio(base_bio, error);
1052
1053 if (likely(!base_io))
1054 bio_endio(base_bio, error);
1055 else {
1056 if (error && !base_io->error)
1057 base_io->error = error;
1058 crypt_dec_pending(base_io);
1059 }
1060} 1067}
1061 1068
1062/* 1069/*
@@ -1138,37 +1145,97 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
1138 return 0; 1145 return 0;
1139} 1146}
1140 1147
1148static void kcryptd_io_read_work(struct work_struct *work)
1149{
1150 struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
1151
1152 crypt_inc_pending(io);
1153 if (kcryptd_io_read(io, GFP_NOIO))
1154 io->error = -ENOMEM;
1155 crypt_dec_pending(io);
1156}
1157
1158static void kcryptd_queue_read(struct dm_crypt_io *io)
1159{
1160 struct crypt_config *cc = io->cc;
1161
1162 INIT_WORK(&io->work, kcryptd_io_read_work);
1163 queue_work(cc->io_queue, &io->work);
1164}
1165
1141static void kcryptd_io_write(struct dm_crypt_io *io) 1166static void kcryptd_io_write(struct dm_crypt_io *io)
1142{ 1167{
1143 struct bio *clone = io->ctx.bio_out; 1168 struct bio *clone = io->ctx.bio_out;
1169
1144 generic_make_request(clone); 1170 generic_make_request(clone);
1145} 1171}
1146 1172
1147static void kcryptd_io(struct work_struct *work) 1173#define crypt_io_from_node(node) rb_entry((node), struct dm_crypt_io, rb_node)
1174
1175static int dmcrypt_write(void *data)
1148{ 1176{
1149 struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); 1177 struct crypt_config *cc = data;
1178 struct dm_crypt_io *io;
1150 1179
1151 if (bio_data_dir(io->base_bio) == READ) { 1180 while (1) {
1152 crypt_inc_pending(io); 1181 struct rb_root write_tree;
1153 if (kcryptd_io_read(io, GFP_NOIO)) 1182 struct blk_plug plug;
1154 io->error = -ENOMEM;
1155 crypt_dec_pending(io);
1156 } else
1157 kcryptd_io_write(io);
1158}
1159 1183
1160static void kcryptd_queue_io(struct dm_crypt_io *io) 1184 DECLARE_WAITQUEUE(wait, current);
1161{
1162 struct crypt_config *cc = io->cc;
1163 1185
1164 INIT_WORK(&io->work, kcryptd_io); 1186 spin_lock_irq(&cc->write_thread_wait.lock);
1165 queue_work(cc->io_queue, &io->work); 1187continue_locked:
1188
1189 if (!RB_EMPTY_ROOT(&cc->write_tree))
1190 goto pop_from_list;
1191
1192 __set_current_state(TASK_INTERRUPTIBLE);
1193 __add_wait_queue(&cc->write_thread_wait, &wait);
1194
1195 spin_unlock_irq(&cc->write_thread_wait.lock);
1196
1197 if (unlikely(kthread_should_stop())) {
1198 set_task_state(current, TASK_RUNNING);
1199 remove_wait_queue(&cc->write_thread_wait, &wait);
1200 break;
1201 }
1202
1203 schedule();
1204
1205 set_task_state(current, TASK_RUNNING);
1206 spin_lock_irq(&cc->write_thread_wait.lock);
1207 __remove_wait_queue(&cc->write_thread_wait, &wait);
1208 goto continue_locked;
1209
1210pop_from_list:
1211 write_tree = cc->write_tree;
1212 cc->write_tree = RB_ROOT;
1213 spin_unlock_irq(&cc->write_thread_wait.lock);
1214
1215 BUG_ON(rb_parent(write_tree.rb_node));
1216
1217 /*
1218 * Note: we cannot walk the tree here with rb_next because
1219 * the structures may be freed when kcryptd_io_write is called.
1220 */
1221 blk_start_plug(&plug);
1222 do {
1223 io = crypt_io_from_node(rb_first(&write_tree));
1224 rb_erase(&io->rb_node, &write_tree);
1225 kcryptd_io_write(io);
1226 } while (!RB_EMPTY_ROOT(&write_tree));
1227 blk_finish_plug(&plug);
1228 }
1229 return 0;
1166} 1230}
1167 1231
1168static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) 1232static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
1169{ 1233{
1170 struct bio *clone = io->ctx.bio_out; 1234 struct bio *clone = io->ctx.bio_out;
1171 struct crypt_config *cc = io->cc; 1235 struct crypt_config *cc = io->cc;
1236 unsigned long flags;
1237 sector_t sector;
1238 struct rb_node **rbp, *parent;
1172 1239
1173 if (unlikely(io->error < 0)) { 1240 if (unlikely(io->error < 0)) {
1174 crypt_free_buffer_pages(cc, clone); 1241 crypt_free_buffer_pages(cc, clone);
@@ -1182,20 +1249,34 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
1182 1249
1183 clone->bi_iter.bi_sector = cc->start + io->sector; 1250 clone->bi_iter.bi_sector = cc->start + io->sector;
1184 1251
1185 if (async) 1252 if (likely(!async) && test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags)) {
1186 kcryptd_queue_io(io);
1187 else
1188 generic_make_request(clone); 1253 generic_make_request(clone);
1254 return;
1255 }
1256
1257 spin_lock_irqsave(&cc->write_thread_wait.lock, flags);
1258 rbp = &cc->write_tree.rb_node;
1259 parent = NULL;
1260 sector = io->sector;
1261 while (*rbp) {
1262 parent = *rbp;
1263 if (sector < crypt_io_from_node(parent)->sector)
1264 rbp = &(*rbp)->rb_left;
1265 else
1266 rbp = &(*rbp)->rb_right;
1267 }
1268 rb_link_node(&io->rb_node, parent, rbp);
1269 rb_insert_color(&io->rb_node, &cc->write_tree);
1270
1271 wake_up_locked(&cc->write_thread_wait);
1272 spin_unlock_irqrestore(&cc->write_thread_wait.lock, flags);
1189} 1273}
1190 1274
1191static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) 1275static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
1192{ 1276{
1193 struct crypt_config *cc = io->cc; 1277 struct crypt_config *cc = io->cc;
1194 struct bio *clone; 1278 struct bio *clone;
1195 struct dm_crypt_io *new_io;
1196 int crypt_finished; 1279 int crypt_finished;
1197 unsigned out_of_pages = 0;
1198 unsigned remaining = io->base_bio->bi_iter.bi_size;
1199 sector_t sector = io->sector; 1280 sector_t sector = io->sector;
1200 int r; 1281 int r;
1201 1282
@@ -1205,80 +1286,30 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
1205 crypt_inc_pending(io); 1286 crypt_inc_pending(io);
1206 crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, sector); 1287 crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, sector);
1207 1288
1208 /* 1289 clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size);
1209 * The allocated buffers can be smaller than the whole bio, 1290 if (unlikely(!clone)) {
1210 * so repeat the whole process until all the data can be handled. 1291 io->error = -EIO;
1211 */ 1292 goto dec;
1212 while (remaining) { 1293 }
1213 clone = crypt_alloc_buffer(io, remaining, &out_of_pages);
1214 if (unlikely(!clone)) {
1215 io->error = -ENOMEM;
1216 break;
1217 }
1218
1219 io->ctx.bio_out = clone;
1220 io->ctx.iter_out = clone->bi_iter;
1221
1222 remaining -= clone->bi_iter.bi_size;
1223 sector += bio_sectors(clone);
1224
1225 crypt_inc_pending(io);
1226
1227 r = crypt_convert(cc, &io->ctx);
1228 if (r < 0)
1229 io->error = -EIO;
1230
1231 crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending);
1232
1233 /* Encryption was already finished, submit io now */
1234 if (crypt_finished) {
1235 kcryptd_crypt_write_io_submit(io, 0);
1236
1237 /*
1238 * If there was an error, do not try next fragments.
1239 * For async, error is processed in async handler.
1240 */
1241 if (unlikely(r < 0))
1242 break;
1243 1294
1244 io->sector = sector; 1295 io->ctx.bio_out = clone;
1245 } 1296 io->ctx.iter_out = clone->bi_iter;
1246 1297
1247 /* 1298 sector += bio_sectors(clone);
1248 * Out of memory -> run queues
1249 * But don't wait if split was due to the io size restriction
1250 */
1251 if (unlikely(out_of_pages))
1252 congestion_wait(BLK_RW_ASYNC, HZ/100);
1253 1299
1254 /* 1300 crypt_inc_pending(io);
1255 * With async crypto it is unsafe to share the crypto context 1301 r = crypt_convert(cc, &io->ctx);
1256 * between fragments, so switch to a new dm_crypt_io structure. 1302 if (r)
1257 */ 1303 io->error = -EIO;
1258 if (unlikely(!crypt_finished && remaining)) { 1304 crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending);
1259 new_io = mempool_alloc(cc->io_pool, GFP_NOIO);
1260 crypt_io_init(new_io, io->cc, io->base_bio, sector);
1261 crypt_inc_pending(new_io);
1262 crypt_convert_init(cc, &new_io->ctx, NULL,
1263 io->base_bio, sector);
1264 new_io->ctx.iter_in = io->ctx.iter_in;
1265
1266 /*
1267 * Fragments after the first use the base_io
1268 * pending count.
1269 */
1270 if (!io->base_io)
1271 new_io->base_io = io;
1272 else {
1273 new_io->base_io = io->base_io;
1274 crypt_inc_pending(io->base_io);
1275 crypt_dec_pending(io);
1276 }
1277 1305
1278 io = new_io; 1306 /* Encryption was already finished, submit io now */
1279 } 1307 if (crypt_finished) {
1308 kcryptd_crypt_write_io_submit(io, 0);
1309 io->sector = sector;
1280 } 1310 }
1281 1311
1312dec:
1282 crypt_dec_pending(io); 1313 crypt_dec_pending(io);
1283} 1314}
1284 1315
@@ -1481,6 +1512,9 @@ static void crypt_dtr(struct dm_target *ti)
1481 if (!cc) 1512 if (!cc)
1482 return; 1513 return;
1483 1514
1515 if (cc->write_thread)
1516 kthread_stop(cc->write_thread);
1517
1484 if (cc->io_queue) 1518 if (cc->io_queue)
1485 destroy_workqueue(cc->io_queue); 1519 destroy_workqueue(cc->io_queue);
1486 if (cc->crypt_queue) 1520 if (cc->crypt_queue)
@@ -1495,8 +1529,6 @@ static void crypt_dtr(struct dm_target *ti)
1495 mempool_destroy(cc->page_pool); 1529 mempool_destroy(cc->page_pool);
1496 if (cc->req_pool) 1530 if (cc->req_pool)
1497 mempool_destroy(cc->req_pool); 1531 mempool_destroy(cc->req_pool);
1498 if (cc->io_pool)
1499 mempool_destroy(cc->io_pool);
1500 1532
1501 if (cc->iv_gen_ops && cc->iv_gen_ops->dtr) 1533 if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
1502 cc->iv_gen_ops->dtr(cc); 1534 cc->iv_gen_ops->dtr(cc);
@@ -1688,7 +1720,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1688 char dummy; 1720 char dummy;
1689 1721
1690 static struct dm_arg _args[] = { 1722 static struct dm_arg _args[] = {
1691 {0, 1, "Invalid number of feature args"}, 1723 {0, 3, "Invalid number of feature args"},
1692 }; 1724 };
1693 1725
1694 if (argc < 5) { 1726 if (argc < 5) {
@@ -1710,13 +1742,6 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1710 if (ret < 0) 1742 if (ret < 0)
1711 goto bad; 1743 goto bad;
1712 1744
1713 ret = -ENOMEM;
1714 cc->io_pool = mempool_create_slab_pool(MIN_IOS, _crypt_io_pool);
1715 if (!cc->io_pool) {
1716 ti->error = "Cannot allocate crypt io mempool";
1717 goto bad;
1718 }
1719
1720 cc->dmreq_start = sizeof(struct ablkcipher_request); 1745 cc->dmreq_start = sizeof(struct ablkcipher_request);
1721 cc->dmreq_start += crypto_ablkcipher_reqsize(any_tfm(cc)); 1746 cc->dmreq_start += crypto_ablkcipher_reqsize(any_tfm(cc));
1722 cc->dmreq_start = ALIGN(cc->dmreq_start, __alignof__(struct dm_crypt_request)); 1747 cc->dmreq_start = ALIGN(cc->dmreq_start, __alignof__(struct dm_crypt_request));
@@ -1734,6 +1759,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1734 iv_size_padding = crypto_ablkcipher_alignmask(any_tfm(cc)); 1759 iv_size_padding = crypto_ablkcipher_alignmask(any_tfm(cc));
1735 } 1760 }
1736 1761
1762 ret = -ENOMEM;
1737 cc->req_pool = mempool_create_kmalloc_pool(MIN_IOS, cc->dmreq_start + 1763 cc->req_pool = mempool_create_kmalloc_pool(MIN_IOS, cc->dmreq_start +
1738 sizeof(struct dm_crypt_request) + iv_size_padding + cc->iv_size); 1764 sizeof(struct dm_crypt_request) + iv_size_padding + cc->iv_size);
1739 if (!cc->req_pool) { 1765 if (!cc->req_pool) {
@@ -1746,7 +1772,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1746 sizeof(struct dm_crypt_request) + iv_size_padding + cc->iv_size, 1772 sizeof(struct dm_crypt_request) + iv_size_padding + cc->iv_size,
1747 ARCH_KMALLOC_MINALIGN); 1773 ARCH_KMALLOC_MINALIGN);
1748 1774
1749 cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0); 1775 cc->page_pool = mempool_create_page_pool(BIO_MAX_PAGES, 0);
1750 if (!cc->page_pool) { 1776 if (!cc->page_pool) {
1751 ti->error = "Cannot allocate page mempool"; 1777 ti->error = "Cannot allocate page mempool";
1752 goto bad; 1778 goto bad;
@@ -1758,6 +1784,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1758 goto bad; 1784 goto bad;
1759 } 1785 }
1760 1786
1787 mutex_init(&cc->bio_alloc_lock);
1788
1761 ret = -EINVAL; 1789 ret = -EINVAL;
1762 if (sscanf(argv[2], "%llu%c", &tmpll, &dummy) != 1) { 1790 if (sscanf(argv[2], "%llu%c", &tmpll, &dummy) != 1) {
1763 ti->error = "Invalid iv_offset sector"; 1791 ti->error = "Invalid iv_offset sector";
@@ -1788,15 +1816,26 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1788 if (ret) 1816 if (ret)
1789 goto bad; 1817 goto bad;
1790 1818
1791 opt_string = dm_shift_arg(&as); 1819 while (opt_params--) {
1820 opt_string = dm_shift_arg(&as);
1821 if (!opt_string) {
1822 ti->error = "Not enough feature arguments";
1823 goto bad;
1824 }
1792 1825
1793 if (opt_params == 1 && opt_string && 1826 if (!strcasecmp(opt_string, "allow_discards"))
1794 !strcasecmp(opt_string, "allow_discards")) 1827 ti->num_discard_bios = 1;
1795 ti->num_discard_bios = 1; 1828
1796 else if (opt_params) { 1829 else if (!strcasecmp(opt_string, "same_cpu_crypt"))
1797 ret = -EINVAL; 1830 set_bit(DM_CRYPT_SAME_CPU, &cc->flags);
1798 ti->error = "Invalid feature arguments"; 1831
1799 goto bad; 1832 else if (!strcasecmp(opt_string, "submit_from_crypt_cpus"))
1833 set_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags);
1834
1835 else {
1836 ti->error = "Invalid feature arguments";
1837 goto bad;
1838 }
1800 } 1839 }
1801 } 1840 }
1802 1841
@@ -1807,13 +1846,28 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1807 goto bad; 1846 goto bad;
1808 } 1847 }
1809 1848
1810 cc->crypt_queue = alloc_workqueue("kcryptd", 1849 if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
1811 WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1); 1850 cc->crypt_queue = alloc_workqueue("kcryptd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1);
1851 else
1852 cc->crypt_queue = alloc_workqueue("kcryptd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND,
1853 num_online_cpus());
1812 if (!cc->crypt_queue) { 1854 if (!cc->crypt_queue) {
1813 ti->error = "Couldn't create kcryptd queue"; 1855 ti->error = "Couldn't create kcryptd queue";
1814 goto bad; 1856 goto bad;
1815 } 1857 }
1816 1858
1859 init_waitqueue_head(&cc->write_thread_wait);
1860 cc->write_tree = RB_ROOT;
1861
1862 cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write");
1863 if (IS_ERR(cc->write_thread)) {
1864 ret = PTR_ERR(cc->write_thread);
1865 cc->write_thread = NULL;
1866 ti->error = "Couldn't spawn write thread";
1867 goto bad;
1868 }
1869 wake_up_process(cc->write_thread);
1870
1817 ti->num_flush_bios = 1; 1871 ti->num_flush_bios = 1;
1818 ti->discard_zeroes_data_unsupported = true; 1872 ti->discard_zeroes_data_unsupported = true;
1819 1873
@@ -1848,7 +1902,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
1848 1902
1849 if (bio_data_dir(io->base_bio) == READ) { 1903 if (bio_data_dir(io->base_bio) == READ) {
1850 if (kcryptd_io_read(io, GFP_NOWAIT)) 1904 if (kcryptd_io_read(io, GFP_NOWAIT))
1851 kcryptd_queue_io(io); 1905 kcryptd_queue_read(io);
1852 } else 1906 } else
1853 kcryptd_queue_crypt(io); 1907 kcryptd_queue_crypt(io);
1854 1908
@@ -1860,6 +1914,7 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
1860{ 1914{
1861 struct crypt_config *cc = ti->private; 1915 struct crypt_config *cc = ti->private;
1862 unsigned i, sz = 0; 1916 unsigned i, sz = 0;
1917 int num_feature_args = 0;
1863 1918
1864 switch (type) { 1919 switch (type) {
1865 case STATUSTYPE_INFO: 1920 case STATUSTYPE_INFO:
@@ -1878,8 +1933,18 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
1878 DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset, 1933 DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset,
1879 cc->dev->name, (unsigned long long)cc->start); 1934 cc->dev->name, (unsigned long long)cc->start);
1880 1935
1881 if (ti->num_discard_bios) 1936 num_feature_args += !!ti->num_discard_bios;
1882 DMEMIT(" 1 allow_discards"); 1937 num_feature_args += test_bit(DM_CRYPT_SAME_CPU, &cc->flags);
1938 num_feature_args += test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags);
1939 if (num_feature_args) {
1940 DMEMIT(" %d", num_feature_args);
1941 if (ti->num_discard_bios)
1942 DMEMIT(" allow_discards");
1943 if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
1944 DMEMIT(" same_cpu_crypt");
1945 if (test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags))
1946 DMEMIT(" submit_from_crypt_cpus");
1947 }
1883 1948
1884 break; 1949 break;
1885 } 1950 }
@@ -1976,7 +2041,7 @@ static int crypt_iterate_devices(struct dm_target *ti,
1976 2041
1977static struct target_type crypt_target = { 2042static struct target_type crypt_target = {
1978 .name = "crypt", 2043 .name = "crypt",
1979 .version = {1, 13, 0}, 2044 .version = {1, 14, 0},
1980 .module = THIS_MODULE, 2045 .module = THIS_MODULE,
1981 .ctr = crypt_ctr, 2046 .ctr = crypt_ctr,
1982 .dtr = crypt_dtr, 2047 .dtr = crypt_dtr,
@@ -1994,15 +2059,9 @@ static int __init dm_crypt_init(void)
1994{ 2059{
1995 int r; 2060 int r;
1996 2061
1997 _crypt_io_pool = KMEM_CACHE(dm_crypt_io, 0);
1998 if (!_crypt_io_pool)
1999 return -ENOMEM;
2000
2001 r = dm_register_target(&crypt_target); 2062 r = dm_register_target(&crypt_target);
2002 if (r < 0) { 2063 if (r < 0)
2003 DMERR("register failed %d", r); 2064 DMERR("register failed %d", r);
2004 kmem_cache_destroy(_crypt_io_pool);
2005 }
2006 2065
2007 return r; 2066 return r;
2008} 2067}
@@ -2010,7 +2069,6 @@ static int __init dm_crypt_init(void)
2010static void __exit dm_crypt_exit(void) 2069static void __exit dm_crypt_exit(void)
2011{ 2070{
2012 dm_unregister_target(&crypt_target); 2071 dm_unregister_target(&crypt_target);
2013 kmem_cache_destroy(_crypt_io_pool);
2014} 2072}
2015 2073
2016module_init(dm_crypt_init); 2074module_init(dm_crypt_init);
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index c09359db3a90..37de0173b6d2 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -290,6 +290,12 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
290 unsigned short logical_block_size = queue_logical_block_size(q); 290 unsigned short logical_block_size = queue_logical_block_size(q);
291 sector_t num_sectors; 291 sector_t num_sectors;
292 292
293 /* Reject unsupported discard requests */
294 if ((rw & REQ_DISCARD) && !blk_queue_discard(q)) {
295 dec_count(io, region, -EOPNOTSUPP);
296 return;
297 }
298
293 /* 299 /*
294 * where->count may be zero if rw holds a flush and we need to 300 * where->count may be zero if rw holds a flush and we need to
295 * send a zero-sized flush. 301 * send a zero-sized flush.
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 7dfdb5c746d6..089d62751f7f 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -604,6 +604,15 @@ static void write_callback(unsigned long error, void *context)
604 return; 604 return;
605 } 605 }
606 606
607 /*
608 * If the bio is discard, return an error, but do not
609 * degrade the array.
610 */
611 if (bio->bi_rw & REQ_DISCARD) {
612 bio_endio(bio, -EOPNOTSUPP);
613 return;
614 }
615
607 for (i = 0; i < ms->nr_mirrors; i++) 616 for (i = 0; i < ms->nr_mirrors; i++)
608 if (test_bit(i, &error)) 617 if (test_bit(i, &error))
609 fail_mirror(ms->mirror + i, DM_RAID1_WRITE_ERROR); 618 fail_mirror(ms->mirror + i, DM_RAID1_WRITE_ERROR);
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 864b03f47727..8b204ae216ab 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1432,8 +1432,6 @@ out:
1432 full_bio->bi_private = pe->full_bio_private; 1432 full_bio->bi_private = pe->full_bio_private;
1433 atomic_inc(&full_bio->bi_remaining); 1433 atomic_inc(&full_bio->bi_remaining);
1434 } 1434 }
1435 free_pending_exception(pe);
1436
1437 increment_pending_exceptions_done_count(); 1435 increment_pending_exceptions_done_count();
1438 1436
1439 up_write(&s->lock); 1437 up_write(&s->lock);
@@ -1450,6 +1448,8 @@ out:
1450 } 1448 }
1451 1449
1452 retry_origin_bios(s, origin_bios); 1450 retry_origin_bios(s, origin_bios);
1451
1452 free_pending_exception(pe);
1453} 1453}
1454 1454
1455static void commit_callback(void *context, int success) 1455static void commit_callback(void *context, int success)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index ec1444f49de1..73f28802dc7a 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -2571,7 +2571,7 @@ int dm_setup_md_queue(struct mapped_device *md)
2571 return 0; 2571 return 0;
2572} 2572}
2573 2573
2574static struct mapped_device *dm_find_md(dev_t dev) 2574struct mapped_device *dm_get_md(dev_t dev)
2575{ 2575{
2576 struct mapped_device *md; 2576 struct mapped_device *md;
2577 unsigned minor = MINOR(dev); 2577 unsigned minor = MINOR(dev);
@@ -2582,12 +2582,15 @@ static struct mapped_device *dm_find_md(dev_t dev)
2582 spin_lock(&_minor_lock); 2582 spin_lock(&_minor_lock);
2583 2583
2584 md = idr_find(&_minor_idr, minor); 2584 md = idr_find(&_minor_idr, minor);
2585 if (md && (md == MINOR_ALLOCED || 2585 if (md) {
2586 (MINOR(disk_devt(dm_disk(md))) != minor) || 2586 if ((md == MINOR_ALLOCED ||
2587 dm_deleting_md(md) || 2587 (MINOR(disk_devt(dm_disk(md))) != minor) ||
2588 test_bit(DMF_FREEING, &md->flags))) { 2588 dm_deleting_md(md) ||
2589 md = NULL; 2589 test_bit(DMF_FREEING, &md->flags))) {
2590 goto out; 2590 md = NULL;
2591 goto out;
2592 }
2593 dm_get(md);
2591 } 2594 }
2592 2595
2593out: 2596out:
@@ -2595,16 +2598,6 @@ out:
2595 2598
2596 return md; 2599 return md;
2597} 2600}
2598
2599struct mapped_device *dm_get_md(dev_t dev)
2600{
2601 struct mapped_device *md = dm_find_md(dev);
2602
2603 if (md)
2604 dm_get(md);
2605
2606 return md;
2607}
2608EXPORT_SYMBOL_GPL(dm_get_md); 2601EXPORT_SYMBOL_GPL(dm_get_md);
2609 2602
2610void *dm_get_mdptr(struct mapped_device *md) 2603void *dm_get_mdptr(struct mapped_device *md)
diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c
index cfbf9617e465..ebb280a14325 100644
--- a/drivers/md/persistent-data/dm-space-map-disk.c
+++ b/drivers/md/persistent-data/dm-space-map-disk.c
@@ -78,7 +78,9 @@ static int sm_disk_count_is_more_than_one(struct dm_space_map *sm, dm_block_t b,
78 if (r) 78 if (r)
79 return r; 79 return r;
80 80
81 return count > 1; 81 *result = count > 1;
82
83 return 0;
82} 84}
83 85
84static int sm_disk_set_count(struct dm_space_map *sm, dm_block_t b, 86static int sm_disk_set_count(struct dm_space_map *sm, dm_block_t b,