aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-02-21 16:28:45 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-21 16:28:45 -0500
commita911dcdba190ddf77e9199b9917156f879f42d4b (patch)
treec94d84a9fa48520cd993670a0b4f434c4dcb48db /drivers/md
parente20d3ef5406d3a28b76a63905b2a6bd3fb95c377 (diff)
parent22aa66a3ee5b61e0f4a0bfeabcaa567861109ec3 (diff)
Merge tag 'dm-3.20-changes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull more device mapper changes from Mike Snitzer: - Significant dm-crypt CPU scalability performance improvements thanks to changes that enable effective use of an unbound workqueue across all available CPUs. A large battery of tests were performed to validate these changes, summary of results is available here: https://www.redhat.com/archives/dm-devel/2015-February/msg00106.html - A few additional stable fixes (to DM core, dm-snapshot and dm-mirror) and a small fix to the dm-space-map-disk. * tag 'dm-3.20-changes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: dm snapshot: fix a possible invalid memory access on unload dm: fix a race condition in dm_get_md dm crypt: sort writes dm crypt: add 'submit_from_crypt_cpus' option dm crypt: offload writes to thread dm crypt: remove unused io_pool and _crypt_io_pool dm crypt: avoid deadlock in mempools dm crypt: don't allocate pages for a partial request dm crypt: use unbound workqueue for request processing dm io: reject unsupported DISCARD requests with EOPNOTSUPP dm mirror: do not degrade the mirror on discard error dm space map disk: fix sm_disk_count_is_more_than_one()
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-crypt.c392
-rw-r--r--drivers/md/dm-io.c6
-rw-r--r--drivers/md/dm-raid1.c9
-rw-r--r--drivers/md/dm-snap.c4
-rw-r--r--drivers/md/dm.c27
-rw-r--r--drivers/md/persistent-data/dm-space-map-disk.c4
6 files changed, 255 insertions, 187 deletions
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 08981be7baa1..713a96237a80 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -18,9 +18,11 @@
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/crypto.h> 19#include <linux/crypto.h>
20#include <linux/workqueue.h> 20#include <linux/workqueue.h>
21#include <linux/kthread.h>
21#include <linux/backing-dev.h> 22#include <linux/backing-dev.h>
22#include <linux/atomic.h> 23#include <linux/atomic.h>
23#include <linux/scatterlist.h> 24#include <linux/scatterlist.h>
25#include <linux/rbtree.h>
24#include <asm/page.h> 26#include <asm/page.h>
25#include <asm/unaligned.h> 27#include <asm/unaligned.h>
26#include <crypto/hash.h> 28#include <crypto/hash.h>
@@ -58,7 +60,8 @@ struct dm_crypt_io {
58 atomic_t io_pending; 60 atomic_t io_pending;
59 int error; 61 int error;
60 sector_t sector; 62 sector_t sector;
61 struct dm_crypt_io *base_io; 63
64 struct rb_node rb_node;
62} CRYPTO_MINALIGN_ATTR; 65} CRYPTO_MINALIGN_ATTR;
63 66
64struct dm_crypt_request { 67struct dm_crypt_request {
@@ -108,7 +111,8 @@ struct iv_tcw_private {
108 * Crypt: maps a linear range of a block device 111 * Crypt: maps a linear range of a block device
109 * and encrypts / decrypts at the same time. 112 * and encrypts / decrypts at the same time.
110 */ 113 */
111enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID }; 114enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID,
115 DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD };
112 116
113/* 117/*
114 * The fields in here must be read only after initialization. 118 * The fields in here must be read only after initialization.
@@ -121,14 +125,18 @@ struct crypt_config {
121 * pool for per bio private data, crypto requests and 125 * pool for per bio private data, crypto requests and
122 * encryption requeusts/buffer pages 126 * encryption requeusts/buffer pages
123 */ 127 */
124 mempool_t *io_pool;
125 mempool_t *req_pool; 128 mempool_t *req_pool;
126 mempool_t *page_pool; 129 mempool_t *page_pool;
127 struct bio_set *bs; 130 struct bio_set *bs;
131 struct mutex bio_alloc_lock;
128 132
129 struct workqueue_struct *io_queue; 133 struct workqueue_struct *io_queue;
130 struct workqueue_struct *crypt_queue; 134 struct workqueue_struct *crypt_queue;
131 135
136 struct task_struct *write_thread;
137 wait_queue_head_t write_thread_wait;
138 struct rb_root write_tree;
139
132 char *cipher; 140 char *cipher;
133 char *cipher_string; 141 char *cipher_string;
134 142
@@ -172,9 +180,6 @@ struct crypt_config {
172}; 180};
173 181
174#define MIN_IOS 16 182#define MIN_IOS 16
175#define MIN_POOL_PAGES 32
176
177static struct kmem_cache *_crypt_io_pool;
178 183
179static void clone_init(struct dm_crypt_io *, struct bio *); 184static void clone_init(struct dm_crypt_io *, struct bio *);
180static void kcryptd_queue_crypt(struct dm_crypt_io *io); 185static void kcryptd_queue_crypt(struct dm_crypt_io *io);
@@ -946,57 +951,70 @@ static int crypt_convert(struct crypt_config *cc,
946 return 0; 951 return 0;
947} 952}
948 953
954static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone);
955
949/* 956/*
950 * Generate a new unfragmented bio with the given size 957 * Generate a new unfragmented bio with the given size
951 * This should never violate the device limitations 958 * This should never violate the device limitations
952 * May return a smaller bio when running out of pages, indicated by 959 *
953 * *out_of_pages set to 1. 960 * This function may be called concurrently. If we allocate from the mempool
961 * concurrently, there is a possibility of deadlock. For example, if we have
962 * mempool of 256 pages, two processes, each wanting 256, pages allocate from
963 * the mempool concurrently, it may deadlock in a situation where both processes
964 * have allocated 128 pages and the mempool is exhausted.
965 *
966 * In order to avoid this scenario we allocate the pages under a mutex.
967 *
968 * In order to not degrade performance with excessive locking, we try
969 * non-blocking allocations without a mutex first but on failure we fallback
970 * to blocking allocations with a mutex.
954 */ 971 */
955static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size, 972static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
956 unsigned *out_of_pages)
957{ 973{
958 struct crypt_config *cc = io->cc; 974 struct crypt_config *cc = io->cc;
959 struct bio *clone; 975 struct bio *clone;
960 unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; 976 unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
961 gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM; 977 gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM;
962 unsigned i, len; 978 unsigned i, len, remaining_size;
963 struct page *page; 979 struct page *page;
980 struct bio_vec *bvec;
981
982retry:
983 if (unlikely(gfp_mask & __GFP_WAIT))
984 mutex_lock(&cc->bio_alloc_lock);
964 985
965 clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs); 986 clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs);
966 if (!clone) 987 if (!clone)
967 return NULL; 988 goto return_clone;
968 989
969 clone_init(io, clone); 990 clone_init(io, clone);
970 *out_of_pages = 0; 991
992 remaining_size = size;
971 993
972 for (i = 0; i < nr_iovecs; i++) { 994 for (i = 0; i < nr_iovecs; i++) {
973 page = mempool_alloc(cc->page_pool, gfp_mask); 995 page = mempool_alloc(cc->page_pool, gfp_mask);
974 if (!page) { 996 if (!page) {
975 *out_of_pages = 1; 997 crypt_free_buffer_pages(cc, clone);
976 break; 998 bio_put(clone);
999 gfp_mask |= __GFP_WAIT;
1000 goto retry;
977 } 1001 }
978 1002
979 /* 1003 len = (remaining_size > PAGE_SIZE) ? PAGE_SIZE : remaining_size;
980 * If additional pages cannot be allocated without waiting,
981 * return a partially-allocated bio. The caller will then try
982 * to allocate more bios while submitting this partial bio.
983 */
984 gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT;
985 1004
986 len = (size > PAGE_SIZE) ? PAGE_SIZE : size; 1005 bvec = &clone->bi_io_vec[clone->bi_vcnt++];
1006 bvec->bv_page = page;
1007 bvec->bv_len = len;
1008 bvec->bv_offset = 0;
987 1009
988 if (!bio_add_page(clone, page, len, 0)) { 1010 clone->bi_iter.bi_size += len;
989 mempool_free(page, cc->page_pool);
990 break;
991 }
992 1011
993 size -= len; 1012 remaining_size -= len;
994 } 1013 }
995 1014
996 if (!clone->bi_iter.bi_size) { 1015return_clone:
997 bio_put(clone); 1016 if (unlikely(gfp_mask & __GFP_WAIT))
998 return NULL; 1017 mutex_unlock(&cc->bio_alloc_lock);
999 }
1000 1018
1001 return clone; 1019 return clone;
1002} 1020}
@@ -1020,7 +1038,6 @@ static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc,
1020 io->base_bio = bio; 1038 io->base_bio = bio;
1021 io->sector = sector; 1039 io->sector = sector;
1022 io->error = 0; 1040 io->error = 0;
1023 io->base_io = NULL;
1024 io->ctx.req = NULL; 1041 io->ctx.req = NULL;
1025 atomic_set(&io->io_pending, 0); 1042 atomic_set(&io->io_pending, 0);
1026} 1043}
@@ -1033,13 +1050,11 @@ static void crypt_inc_pending(struct dm_crypt_io *io)
1033/* 1050/*
1034 * One of the bios was finished. Check for completion of 1051 * One of the bios was finished. Check for completion of
1035 * the whole request and correctly clean up the buffer. 1052 * the whole request and correctly clean up the buffer.
1036 * If base_io is set, wait for the last fragment to complete.
1037 */ 1053 */
1038static void crypt_dec_pending(struct dm_crypt_io *io) 1054static void crypt_dec_pending(struct dm_crypt_io *io)
1039{ 1055{
1040 struct crypt_config *cc = io->cc; 1056 struct crypt_config *cc = io->cc;
1041 struct bio *base_bio = io->base_bio; 1057 struct bio *base_bio = io->base_bio;
1042 struct dm_crypt_io *base_io = io->base_io;
1043 int error = io->error; 1058 int error = io->error;
1044 1059
1045 if (!atomic_dec_and_test(&io->io_pending)) 1060 if (!atomic_dec_and_test(&io->io_pending))
@@ -1047,16 +1062,8 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
1047 1062
1048 if (io->ctx.req) 1063 if (io->ctx.req)
1049 crypt_free_req(cc, io->ctx.req, base_bio); 1064 crypt_free_req(cc, io->ctx.req, base_bio);
1050 if (io != dm_per_bio_data(base_bio, cc->per_bio_data_size)) 1065
1051 mempool_free(io, cc->io_pool); 1066 bio_endio(base_bio, error);
1052
1053 if (likely(!base_io))
1054 bio_endio(base_bio, error);
1055 else {
1056 if (error && !base_io->error)
1057 base_io->error = error;
1058 crypt_dec_pending(base_io);
1059 }
1060} 1067}
1061 1068
1062/* 1069/*
@@ -1138,37 +1145,97 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
1138 return 0; 1145 return 0;
1139} 1146}
1140 1147
1148static void kcryptd_io_read_work(struct work_struct *work)
1149{
1150 struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
1151
1152 crypt_inc_pending(io);
1153 if (kcryptd_io_read(io, GFP_NOIO))
1154 io->error = -ENOMEM;
1155 crypt_dec_pending(io);
1156}
1157
1158static void kcryptd_queue_read(struct dm_crypt_io *io)
1159{
1160 struct crypt_config *cc = io->cc;
1161
1162 INIT_WORK(&io->work, kcryptd_io_read_work);
1163 queue_work(cc->io_queue, &io->work);
1164}
1165
1141static void kcryptd_io_write(struct dm_crypt_io *io) 1166static void kcryptd_io_write(struct dm_crypt_io *io)
1142{ 1167{
1143 struct bio *clone = io->ctx.bio_out; 1168 struct bio *clone = io->ctx.bio_out;
1169
1144 generic_make_request(clone); 1170 generic_make_request(clone);
1145} 1171}
1146 1172
1147static void kcryptd_io(struct work_struct *work) 1173#define crypt_io_from_node(node) rb_entry((node), struct dm_crypt_io, rb_node)
1174
1175static int dmcrypt_write(void *data)
1148{ 1176{
1149 struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); 1177 struct crypt_config *cc = data;
1178 struct dm_crypt_io *io;
1150 1179
1151 if (bio_data_dir(io->base_bio) == READ) { 1180 while (1) {
1152 crypt_inc_pending(io); 1181 struct rb_root write_tree;
1153 if (kcryptd_io_read(io, GFP_NOIO)) 1182 struct blk_plug plug;
1154 io->error = -ENOMEM;
1155 crypt_dec_pending(io);
1156 } else
1157 kcryptd_io_write(io);
1158}
1159 1183
1160static void kcryptd_queue_io(struct dm_crypt_io *io) 1184 DECLARE_WAITQUEUE(wait, current);
1161{
1162 struct crypt_config *cc = io->cc;
1163 1185
1164 INIT_WORK(&io->work, kcryptd_io); 1186 spin_lock_irq(&cc->write_thread_wait.lock);
1165 queue_work(cc->io_queue, &io->work); 1187continue_locked:
1188
1189 if (!RB_EMPTY_ROOT(&cc->write_tree))
1190 goto pop_from_list;
1191
1192 __set_current_state(TASK_INTERRUPTIBLE);
1193 __add_wait_queue(&cc->write_thread_wait, &wait);
1194
1195 spin_unlock_irq(&cc->write_thread_wait.lock);
1196
1197 if (unlikely(kthread_should_stop())) {
1198 set_task_state(current, TASK_RUNNING);
1199 remove_wait_queue(&cc->write_thread_wait, &wait);
1200 break;
1201 }
1202
1203 schedule();
1204
1205 set_task_state(current, TASK_RUNNING);
1206 spin_lock_irq(&cc->write_thread_wait.lock);
1207 __remove_wait_queue(&cc->write_thread_wait, &wait);
1208 goto continue_locked;
1209
1210pop_from_list:
1211 write_tree = cc->write_tree;
1212 cc->write_tree = RB_ROOT;
1213 spin_unlock_irq(&cc->write_thread_wait.lock);
1214
1215 BUG_ON(rb_parent(write_tree.rb_node));
1216
1217 /*
1218 * Note: we cannot walk the tree here with rb_next because
1219 * the structures may be freed when kcryptd_io_write is called.
1220 */
1221 blk_start_plug(&plug);
1222 do {
1223 io = crypt_io_from_node(rb_first(&write_tree));
1224 rb_erase(&io->rb_node, &write_tree);
1225 kcryptd_io_write(io);
1226 } while (!RB_EMPTY_ROOT(&write_tree));
1227 blk_finish_plug(&plug);
1228 }
1229 return 0;
1166} 1230}
1167 1231
1168static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) 1232static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
1169{ 1233{
1170 struct bio *clone = io->ctx.bio_out; 1234 struct bio *clone = io->ctx.bio_out;
1171 struct crypt_config *cc = io->cc; 1235 struct crypt_config *cc = io->cc;
1236 unsigned long flags;
1237 sector_t sector;
1238 struct rb_node **rbp, *parent;
1172 1239
1173 if (unlikely(io->error < 0)) { 1240 if (unlikely(io->error < 0)) {
1174 crypt_free_buffer_pages(cc, clone); 1241 crypt_free_buffer_pages(cc, clone);
@@ -1182,20 +1249,34 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
1182 1249
1183 clone->bi_iter.bi_sector = cc->start + io->sector; 1250 clone->bi_iter.bi_sector = cc->start + io->sector;
1184 1251
1185 if (async) 1252 if (likely(!async) && test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags)) {
1186 kcryptd_queue_io(io);
1187 else
1188 generic_make_request(clone); 1253 generic_make_request(clone);
1254 return;
1255 }
1256
1257 spin_lock_irqsave(&cc->write_thread_wait.lock, flags);
1258 rbp = &cc->write_tree.rb_node;
1259 parent = NULL;
1260 sector = io->sector;
1261 while (*rbp) {
1262 parent = *rbp;
1263 if (sector < crypt_io_from_node(parent)->sector)
1264 rbp = &(*rbp)->rb_left;
1265 else
1266 rbp = &(*rbp)->rb_right;
1267 }
1268 rb_link_node(&io->rb_node, parent, rbp);
1269 rb_insert_color(&io->rb_node, &cc->write_tree);
1270
1271 wake_up_locked(&cc->write_thread_wait);
1272 spin_unlock_irqrestore(&cc->write_thread_wait.lock, flags);
1189} 1273}
1190 1274
1191static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) 1275static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
1192{ 1276{
1193 struct crypt_config *cc = io->cc; 1277 struct crypt_config *cc = io->cc;
1194 struct bio *clone; 1278 struct bio *clone;
1195 struct dm_crypt_io *new_io;
1196 int crypt_finished; 1279 int crypt_finished;
1197 unsigned out_of_pages = 0;
1198 unsigned remaining = io->base_bio->bi_iter.bi_size;
1199 sector_t sector = io->sector; 1280 sector_t sector = io->sector;
1200 int r; 1281 int r;
1201 1282
@@ -1205,80 +1286,30 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
1205 crypt_inc_pending(io); 1286 crypt_inc_pending(io);
1206 crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, sector); 1287 crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, sector);
1207 1288
1208 /* 1289 clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size);
1209 * The allocated buffers can be smaller than the whole bio, 1290 if (unlikely(!clone)) {
1210 * so repeat the whole process until all the data can be handled. 1291 io->error = -EIO;
1211 */ 1292 goto dec;
1212 while (remaining) { 1293 }
1213 clone = crypt_alloc_buffer(io, remaining, &out_of_pages);
1214 if (unlikely(!clone)) {
1215 io->error = -ENOMEM;
1216 break;
1217 }
1218
1219 io->ctx.bio_out = clone;
1220 io->ctx.iter_out = clone->bi_iter;
1221
1222 remaining -= clone->bi_iter.bi_size;
1223 sector += bio_sectors(clone);
1224
1225 crypt_inc_pending(io);
1226
1227 r = crypt_convert(cc, &io->ctx);
1228 if (r < 0)
1229 io->error = -EIO;
1230
1231 crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending);
1232
1233 /* Encryption was already finished, submit io now */
1234 if (crypt_finished) {
1235 kcryptd_crypt_write_io_submit(io, 0);
1236
1237 /*
1238 * If there was an error, do not try next fragments.
1239 * For async, error is processed in async handler.
1240 */
1241 if (unlikely(r < 0))
1242 break;
1243 1294
1244 io->sector = sector; 1295 io->ctx.bio_out = clone;
1245 } 1296 io->ctx.iter_out = clone->bi_iter;
1246 1297
1247 /* 1298 sector += bio_sectors(clone);
1248 * Out of memory -> run queues
1249 * But don't wait if split was due to the io size restriction
1250 */
1251 if (unlikely(out_of_pages))
1252 congestion_wait(BLK_RW_ASYNC, HZ/100);
1253 1299
1254 /* 1300 crypt_inc_pending(io);
1255 * With async crypto it is unsafe to share the crypto context 1301 r = crypt_convert(cc, &io->ctx);
1256 * between fragments, so switch to a new dm_crypt_io structure. 1302 if (r)
1257 */ 1303 io->error = -EIO;
1258 if (unlikely(!crypt_finished && remaining)) { 1304 crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending);
1259 new_io = mempool_alloc(cc->io_pool, GFP_NOIO);
1260 crypt_io_init(new_io, io->cc, io->base_bio, sector);
1261 crypt_inc_pending(new_io);
1262 crypt_convert_init(cc, &new_io->ctx, NULL,
1263 io->base_bio, sector);
1264 new_io->ctx.iter_in = io->ctx.iter_in;
1265
1266 /*
1267 * Fragments after the first use the base_io
1268 * pending count.
1269 */
1270 if (!io->base_io)
1271 new_io->base_io = io;
1272 else {
1273 new_io->base_io = io->base_io;
1274 crypt_inc_pending(io->base_io);
1275 crypt_dec_pending(io);
1276 }
1277 1305
1278 io = new_io; 1306 /* Encryption was already finished, submit io now */
1279 } 1307 if (crypt_finished) {
1308 kcryptd_crypt_write_io_submit(io, 0);
1309 io->sector = sector;
1280 } 1310 }
1281 1311
1312dec:
1282 crypt_dec_pending(io); 1313 crypt_dec_pending(io);
1283} 1314}
1284 1315
@@ -1481,6 +1512,9 @@ static void crypt_dtr(struct dm_target *ti)
1481 if (!cc) 1512 if (!cc)
1482 return; 1513 return;
1483 1514
1515 if (cc->write_thread)
1516 kthread_stop(cc->write_thread);
1517
1484 if (cc->io_queue) 1518 if (cc->io_queue)
1485 destroy_workqueue(cc->io_queue); 1519 destroy_workqueue(cc->io_queue);
1486 if (cc->crypt_queue) 1520 if (cc->crypt_queue)
@@ -1495,8 +1529,6 @@ static void crypt_dtr(struct dm_target *ti)
1495 mempool_destroy(cc->page_pool); 1529 mempool_destroy(cc->page_pool);
1496 if (cc->req_pool) 1530 if (cc->req_pool)
1497 mempool_destroy(cc->req_pool); 1531 mempool_destroy(cc->req_pool);
1498 if (cc->io_pool)
1499 mempool_destroy(cc->io_pool);
1500 1532
1501 if (cc->iv_gen_ops && cc->iv_gen_ops->dtr) 1533 if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
1502 cc->iv_gen_ops->dtr(cc); 1534 cc->iv_gen_ops->dtr(cc);
@@ -1688,7 +1720,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1688 char dummy; 1720 char dummy;
1689 1721
1690 static struct dm_arg _args[] = { 1722 static struct dm_arg _args[] = {
1691 {0, 1, "Invalid number of feature args"}, 1723 {0, 3, "Invalid number of feature args"},
1692 }; 1724 };
1693 1725
1694 if (argc < 5) { 1726 if (argc < 5) {
@@ -1710,13 +1742,6 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1710 if (ret < 0) 1742 if (ret < 0)
1711 goto bad; 1743 goto bad;
1712 1744
1713 ret = -ENOMEM;
1714 cc->io_pool = mempool_create_slab_pool(MIN_IOS, _crypt_io_pool);
1715 if (!cc->io_pool) {
1716 ti->error = "Cannot allocate crypt io mempool";
1717 goto bad;
1718 }
1719
1720 cc->dmreq_start = sizeof(struct ablkcipher_request); 1745 cc->dmreq_start = sizeof(struct ablkcipher_request);
1721 cc->dmreq_start += crypto_ablkcipher_reqsize(any_tfm(cc)); 1746 cc->dmreq_start += crypto_ablkcipher_reqsize(any_tfm(cc));
1722 cc->dmreq_start = ALIGN(cc->dmreq_start, __alignof__(struct dm_crypt_request)); 1747 cc->dmreq_start = ALIGN(cc->dmreq_start, __alignof__(struct dm_crypt_request));
@@ -1734,6 +1759,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1734 iv_size_padding = crypto_ablkcipher_alignmask(any_tfm(cc)); 1759 iv_size_padding = crypto_ablkcipher_alignmask(any_tfm(cc));
1735 } 1760 }
1736 1761
1762 ret = -ENOMEM;
1737 cc->req_pool = mempool_create_kmalloc_pool(MIN_IOS, cc->dmreq_start + 1763 cc->req_pool = mempool_create_kmalloc_pool(MIN_IOS, cc->dmreq_start +
1738 sizeof(struct dm_crypt_request) + iv_size_padding + cc->iv_size); 1764 sizeof(struct dm_crypt_request) + iv_size_padding + cc->iv_size);
1739 if (!cc->req_pool) { 1765 if (!cc->req_pool) {
@@ -1746,7 +1772,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1746 sizeof(struct dm_crypt_request) + iv_size_padding + cc->iv_size, 1772 sizeof(struct dm_crypt_request) + iv_size_padding + cc->iv_size,
1747 ARCH_KMALLOC_MINALIGN); 1773 ARCH_KMALLOC_MINALIGN);
1748 1774
1749 cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0); 1775 cc->page_pool = mempool_create_page_pool(BIO_MAX_PAGES, 0);
1750 if (!cc->page_pool) { 1776 if (!cc->page_pool) {
1751 ti->error = "Cannot allocate page mempool"; 1777 ti->error = "Cannot allocate page mempool";
1752 goto bad; 1778 goto bad;
@@ -1758,6 +1784,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1758 goto bad; 1784 goto bad;
1759 } 1785 }
1760 1786
1787 mutex_init(&cc->bio_alloc_lock);
1788
1761 ret = -EINVAL; 1789 ret = -EINVAL;
1762 if (sscanf(argv[2], "%llu%c", &tmpll, &dummy) != 1) { 1790 if (sscanf(argv[2], "%llu%c", &tmpll, &dummy) != 1) {
1763 ti->error = "Invalid iv_offset sector"; 1791 ti->error = "Invalid iv_offset sector";
@@ -1788,15 +1816,26 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1788 if (ret) 1816 if (ret)
1789 goto bad; 1817 goto bad;
1790 1818
1791 opt_string = dm_shift_arg(&as); 1819 while (opt_params--) {
1820 opt_string = dm_shift_arg(&as);
1821 if (!opt_string) {
1822 ti->error = "Not enough feature arguments";
1823 goto bad;
1824 }
1792 1825
1793 if (opt_params == 1 && opt_string && 1826 if (!strcasecmp(opt_string, "allow_discards"))
1794 !strcasecmp(opt_string, "allow_discards")) 1827 ti->num_discard_bios = 1;
1795 ti->num_discard_bios = 1; 1828
1796 else if (opt_params) { 1829 else if (!strcasecmp(opt_string, "same_cpu_crypt"))
1797 ret = -EINVAL; 1830 set_bit(DM_CRYPT_SAME_CPU, &cc->flags);
1798 ti->error = "Invalid feature arguments"; 1831
1799 goto bad; 1832 else if (!strcasecmp(opt_string, "submit_from_crypt_cpus"))
1833 set_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags);
1834
1835 else {
1836 ti->error = "Invalid feature arguments";
1837 goto bad;
1838 }
1800 } 1839 }
1801 } 1840 }
1802 1841
@@ -1807,13 +1846,28 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1807 goto bad; 1846 goto bad;
1808 } 1847 }
1809 1848
1810 cc->crypt_queue = alloc_workqueue("kcryptd", 1849 if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
1811 WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1); 1850 cc->crypt_queue = alloc_workqueue("kcryptd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1);
1851 else
1852 cc->crypt_queue = alloc_workqueue("kcryptd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND,
1853 num_online_cpus());
1812 if (!cc->crypt_queue) { 1854 if (!cc->crypt_queue) {
1813 ti->error = "Couldn't create kcryptd queue"; 1855 ti->error = "Couldn't create kcryptd queue";
1814 goto bad; 1856 goto bad;
1815 } 1857 }
1816 1858
1859 init_waitqueue_head(&cc->write_thread_wait);
1860 cc->write_tree = RB_ROOT;
1861
1862 cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write");
1863 if (IS_ERR(cc->write_thread)) {
1864 ret = PTR_ERR(cc->write_thread);
1865 cc->write_thread = NULL;
1866 ti->error = "Couldn't spawn write thread";
1867 goto bad;
1868 }
1869 wake_up_process(cc->write_thread);
1870
1817 ti->num_flush_bios = 1; 1871 ti->num_flush_bios = 1;
1818 ti->discard_zeroes_data_unsupported = true; 1872 ti->discard_zeroes_data_unsupported = true;
1819 1873
@@ -1848,7 +1902,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
1848 1902
1849 if (bio_data_dir(io->base_bio) == READ) { 1903 if (bio_data_dir(io->base_bio) == READ) {
1850 if (kcryptd_io_read(io, GFP_NOWAIT)) 1904 if (kcryptd_io_read(io, GFP_NOWAIT))
1851 kcryptd_queue_io(io); 1905 kcryptd_queue_read(io);
1852 } else 1906 } else
1853 kcryptd_queue_crypt(io); 1907 kcryptd_queue_crypt(io);
1854 1908
@@ -1860,6 +1914,7 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
1860{ 1914{
1861 struct crypt_config *cc = ti->private; 1915 struct crypt_config *cc = ti->private;
1862 unsigned i, sz = 0; 1916 unsigned i, sz = 0;
1917 int num_feature_args = 0;
1863 1918
1864 switch (type) { 1919 switch (type) {
1865 case STATUSTYPE_INFO: 1920 case STATUSTYPE_INFO:
@@ -1878,8 +1933,18 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
1878 DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset, 1933 DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset,
1879 cc->dev->name, (unsigned long long)cc->start); 1934 cc->dev->name, (unsigned long long)cc->start);
1880 1935
1881 if (ti->num_discard_bios) 1936 num_feature_args += !!ti->num_discard_bios;
1882 DMEMIT(" 1 allow_discards"); 1937 num_feature_args += test_bit(DM_CRYPT_SAME_CPU, &cc->flags);
1938 num_feature_args += test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags);
1939 if (num_feature_args) {
1940 DMEMIT(" %d", num_feature_args);
1941 if (ti->num_discard_bios)
1942 DMEMIT(" allow_discards");
1943 if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
1944 DMEMIT(" same_cpu_crypt");
1945 if (test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags))
1946 DMEMIT(" submit_from_crypt_cpus");
1947 }
1883 1948
1884 break; 1949 break;
1885 } 1950 }
@@ -1976,7 +2041,7 @@ static int crypt_iterate_devices(struct dm_target *ti,
1976 2041
1977static struct target_type crypt_target = { 2042static struct target_type crypt_target = {
1978 .name = "crypt", 2043 .name = "crypt",
1979 .version = {1, 13, 0}, 2044 .version = {1, 14, 0},
1980 .module = THIS_MODULE, 2045 .module = THIS_MODULE,
1981 .ctr = crypt_ctr, 2046 .ctr = crypt_ctr,
1982 .dtr = crypt_dtr, 2047 .dtr = crypt_dtr,
@@ -1994,15 +2059,9 @@ static int __init dm_crypt_init(void)
1994{ 2059{
1995 int r; 2060 int r;
1996 2061
1997 _crypt_io_pool = KMEM_CACHE(dm_crypt_io, 0);
1998 if (!_crypt_io_pool)
1999 return -ENOMEM;
2000
2001 r = dm_register_target(&crypt_target); 2062 r = dm_register_target(&crypt_target);
2002 if (r < 0) { 2063 if (r < 0)
2003 DMERR("register failed %d", r); 2064 DMERR("register failed %d", r);
2004 kmem_cache_destroy(_crypt_io_pool);
2005 }
2006 2065
2007 return r; 2066 return r;
2008} 2067}
@@ -2010,7 +2069,6 @@ static int __init dm_crypt_init(void)
2010static void __exit dm_crypt_exit(void) 2069static void __exit dm_crypt_exit(void)
2011{ 2070{
2012 dm_unregister_target(&crypt_target); 2071 dm_unregister_target(&crypt_target);
2013 kmem_cache_destroy(_crypt_io_pool);
2014} 2072}
2015 2073
2016module_init(dm_crypt_init); 2074module_init(dm_crypt_init);
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index c09359db3a90..37de0173b6d2 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -290,6 +290,12 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
290 unsigned short logical_block_size = queue_logical_block_size(q); 290 unsigned short logical_block_size = queue_logical_block_size(q);
291 sector_t num_sectors; 291 sector_t num_sectors;
292 292
293 /* Reject unsupported discard requests */
294 if ((rw & REQ_DISCARD) && !blk_queue_discard(q)) {
295 dec_count(io, region, -EOPNOTSUPP);
296 return;
297 }
298
293 /* 299 /*
294 * where->count may be zero if rw holds a flush and we need to 300 * where->count may be zero if rw holds a flush and we need to
295 * send a zero-sized flush. 301 * send a zero-sized flush.
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 7dfdb5c746d6..089d62751f7f 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -604,6 +604,15 @@ static void write_callback(unsigned long error, void *context)
604 return; 604 return;
605 } 605 }
606 606
607 /*
608 * If the bio is discard, return an error, but do not
609 * degrade the array.
610 */
611 if (bio->bi_rw & REQ_DISCARD) {
612 bio_endio(bio, -EOPNOTSUPP);
613 return;
614 }
615
607 for (i = 0; i < ms->nr_mirrors; i++) 616 for (i = 0; i < ms->nr_mirrors; i++)
608 if (test_bit(i, &error)) 617 if (test_bit(i, &error))
609 fail_mirror(ms->mirror + i, DM_RAID1_WRITE_ERROR); 618 fail_mirror(ms->mirror + i, DM_RAID1_WRITE_ERROR);
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 864b03f47727..8b204ae216ab 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1432,8 +1432,6 @@ out:
1432 full_bio->bi_private = pe->full_bio_private; 1432 full_bio->bi_private = pe->full_bio_private;
1433 atomic_inc(&full_bio->bi_remaining); 1433 atomic_inc(&full_bio->bi_remaining);
1434 } 1434 }
1435 free_pending_exception(pe);
1436
1437 increment_pending_exceptions_done_count(); 1435 increment_pending_exceptions_done_count();
1438 1436
1439 up_write(&s->lock); 1437 up_write(&s->lock);
@@ -1450,6 +1448,8 @@ out:
1450 } 1448 }
1451 1449
1452 retry_origin_bios(s, origin_bios); 1450 retry_origin_bios(s, origin_bios);
1451
1452 free_pending_exception(pe);
1453} 1453}
1454 1454
1455static void commit_callback(void *context, int success) 1455static void commit_callback(void *context, int success)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index ec1444f49de1..73f28802dc7a 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -2571,7 +2571,7 @@ int dm_setup_md_queue(struct mapped_device *md)
2571 return 0; 2571 return 0;
2572} 2572}
2573 2573
2574static struct mapped_device *dm_find_md(dev_t dev) 2574struct mapped_device *dm_get_md(dev_t dev)
2575{ 2575{
2576 struct mapped_device *md; 2576 struct mapped_device *md;
2577 unsigned minor = MINOR(dev); 2577 unsigned minor = MINOR(dev);
@@ -2582,12 +2582,15 @@ static struct mapped_device *dm_find_md(dev_t dev)
2582 spin_lock(&_minor_lock); 2582 spin_lock(&_minor_lock);
2583 2583
2584 md = idr_find(&_minor_idr, minor); 2584 md = idr_find(&_minor_idr, minor);
2585 if (md && (md == MINOR_ALLOCED || 2585 if (md) {
2586 (MINOR(disk_devt(dm_disk(md))) != minor) || 2586 if ((md == MINOR_ALLOCED ||
2587 dm_deleting_md(md) || 2587 (MINOR(disk_devt(dm_disk(md))) != minor) ||
2588 test_bit(DMF_FREEING, &md->flags))) { 2588 dm_deleting_md(md) ||
2589 md = NULL; 2589 test_bit(DMF_FREEING, &md->flags))) {
2590 goto out; 2590 md = NULL;
2591 goto out;
2592 }
2593 dm_get(md);
2591 } 2594 }
2592 2595
2593out: 2596out:
@@ -2595,16 +2598,6 @@ out:
2595 2598
2596 return md; 2599 return md;
2597} 2600}
2598
2599struct mapped_device *dm_get_md(dev_t dev)
2600{
2601 struct mapped_device *md = dm_find_md(dev);
2602
2603 if (md)
2604 dm_get(md);
2605
2606 return md;
2607}
2608EXPORT_SYMBOL_GPL(dm_get_md); 2601EXPORT_SYMBOL_GPL(dm_get_md);
2609 2602
2610void *dm_get_mdptr(struct mapped_device *md) 2603void *dm_get_mdptr(struct mapped_device *md)
diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c
index cfbf9617e465..ebb280a14325 100644
--- a/drivers/md/persistent-data/dm-space-map-disk.c
+++ b/drivers/md/persistent-data/dm-space-map-disk.c
@@ -78,7 +78,9 @@ static int sm_disk_count_is_more_than_one(struct dm_space_map *sm, dm_block_t b,
78 if (r) 78 if (r)
79 return r; 79 return r;
80 80
81 return count > 1; 81 *result = count > 1;
82
83 return 0;
82} 84}
83 85
84static int sm_disk_set_count(struct dm_space_map *sm, dm_block_t b, 86static int sm_disk_set_count(struct dm_space_map *sm, dm_block_t b,