diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-21 16:28:45 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-21 16:28:45 -0500 |
commit | a911dcdba190ddf77e9199b9917156f879f42d4b (patch) | |
tree | c94d84a9fa48520cd993670a0b4f434c4dcb48db /drivers/md | |
parent | e20d3ef5406d3a28b76a63905b2a6bd3fb95c377 (diff) | |
parent | 22aa66a3ee5b61e0f4a0bfeabcaa567861109ec3 (diff) |
Merge tag 'dm-3.20-changes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull more device mapper changes from Mike Snitzer:
- Significant dm-crypt CPU scalability performance improvements thanks
to changes that enable effective use of an unbound workqueue across
all available CPUs. A large battery of tests were performed to
validate these changes, summary of results is available here:
https://www.redhat.com/archives/dm-devel/2015-February/msg00106.html
- A few additional stable fixes (to DM core, dm-snapshot and dm-mirror)
and a small fix to the dm-space-map-disk.
* tag 'dm-3.20-changes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
dm snapshot: fix a possible invalid memory access on unload
dm: fix a race condition in dm_get_md
dm crypt: sort writes
dm crypt: add 'submit_from_crypt_cpus' option
dm crypt: offload writes to thread
dm crypt: remove unused io_pool and _crypt_io_pool
dm crypt: avoid deadlock in mempools
dm crypt: don't allocate pages for a partial request
dm crypt: use unbound workqueue for request processing
dm io: reject unsupported DISCARD requests with EOPNOTSUPP
dm mirror: do not degrade the mirror on discard error
dm space map disk: fix sm_disk_count_is_more_than_one()
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/dm-crypt.c | 392 | ||||
-rw-r--r-- | drivers/md/dm-io.c | 6 | ||||
-rw-r--r-- | drivers/md/dm-raid1.c | 9 | ||||
-rw-r--r-- | drivers/md/dm-snap.c | 4 | ||||
-rw-r--r-- | drivers/md/dm.c | 27 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-space-map-disk.c | 4 |
6 files changed, 255 insertions, 187 deletions
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 08981be7baa1..713a96237a80 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c | |||
@@ -18,9 +18,11 @@ | |||
18 | #include <linux/slab.h> | 18 | #include <linux/slab.h> |
19 | #include <linux/crypto.h> | 19 | #include <linux/crypto.h> |
20 | #include <linux/workqueue.h> | 20 | #include <linux/workqueue.h> |
21 | #include <linux/kthread.h> | ||
21 | #include <linux/backing-dev.h> | 22 | #include <linux/backing-dev.h> |
22 | #include <linux/atomic.h> | 23 | #include <linux/atomic.h> |
23 | #include <linux/scatterlist.h> | 24 | #include <linux/scatterlist.h> |
25 | #include <linux/rbtree.h> | ||
24 | #include <asm/page.h> | 26 | #include <asm/page.h> |
25 | #include <asm/unaligned.h> | 27 | #include <asm/unaligned.h> |
26 | #include <crypto/hash.h> | 28 | #include <crypto/hash.h> |
@@ -58,7 +60,8 @@ struct dm_crypt_io { | |||
58 | atomic_t io_pending; | 60 | atomic_t io_pending; |
59 | int error; | 61 | int error; |
60 | sector_t sector; | 62 | sector_t sector; |
61 | struct dm_crypt_io *base_io; | 63 | |
64 | struct rb_node rb_node; | ||
62 | } CRYPTO_MINALIGN_ATTR; | 65 | } CRYPTO_MINALIGN_ATTR; |
63 | 66 | ||
64 | struct dm_crypt_request { | 67 | struct dm_crypt_request { |
@@ -108,7 +111,8 @@ struct iv_tcw_private { | |||
108 | * Crypt: maps a linear range of a block device | 111 | * Crypt: maps a linear range of a block device |
109 | * and encrypts / decrypts at the same time. | 112 | * and encrypts / decrypts at the same time. |
110 | */ | 113 | */ |
111 | enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID }; | 114 | enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID, |
115 | DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD }; | ||
112 | 116 | ||
113 | /* | 117 | /* |
114 | * The fields in here must be read only after initialization. | 118 | * The fields in here must be read only after initialization. |
@@ -121,14 +125,18 @@ struct crypt_config { | |||
121 | * pool for per bio private data, crypto requests and | 125 | * pool for per bio private data, crypto requests and |
122 | * encryption requeusts/buffer pages | 126 | * encryption requeusts/buffer pages |
123 | */ | 127 | */ |
124 | mempool_t *io_pool; | ||
125 | mempool_t *req_pool; | 128 | mempool_t *req_pool; |
126 | mempool_t *page_pool; | 129 | mempool_t *page_pool; |
127 | struct bio_set *bs; | 130 | struct bio_set *bs; |
131 | struct mutex bio_alloc_lock; | ||
128 | 132 | ||
129 | struct workqueue_struct *io_queue; | 133 | struct workqueue_struct *io_queue; |
130 | struct workqueue_struct *crypt_queue; | 134 | struct workqueue_struct *crypt_queue; |
131 | 135 | ||
136 | struct task_struct *write_thread; | ||
137 | wait_queue_head_t write_thread_wait; | ||
138 | struct rb_root write_tree; | ||
139 | |||
132 | char *cipher; | 140 | char *cipher; |
133 | char *cipher_string; | 141 | char *cipher_string; |
134 | 142 | ||
@@ -172,9 +180,6 @@ struct crypt_config { | |||
172 | }; | 180 | }; |
173 | 181 | ||
174 | #define MIN_IOS 16 | 182 | #define MIN_IOS 16 |
175 | #define MIN_POOL_PAGES 32 | ||
176 | |||
177 | static struct kmem_cache *_crypt_io_pool; | ||
178 | 183 | ||
179 | static void clone_init(struct dm_crypt_io *, struct bio *); | 184 | static void clone_init(struct dm_crypt_io *, struct bio *); |
180 | static void kcryptd_queue_crypt(struct dm_crypt_io *io); | 185 | static void kcryptd_queue_crypt(struct dm_crypt_io *io); |
@@ -946,57 +951,70 @@ static int crypt_convert(struct crypt_config *cc, | |||
946 | return 0; | 951 | return 0; |
947 | } | 952 | } |
948 | 953 | ||
954 | static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone); | ||
955 | |||
949 | /* | 956 | /* |
950 | * Generate a new unfragmented bio with the given size | 957 | * Generate a new unfragmented bio with the given size |
951 | * This should never violate the device limitations | 958 | * This should never violate the device limitations |
952 | * May return a smaller bio when running out of pages, indicated by | 959 | * |
953 | * *out_of_pages set to 1. | 960 | * This function may be called concurrently. If we allocate from the mempool |
961 | * concurrently, there is a possibility of deadlock. For example, if we have | ||
962 | * mempool of 256 pages, two processes, each wanting 256, pages allocate from | ||
963 | * the mempool concurrently, it may deadlock in a situation where both processes | ||
964 | * have allocated 128 pages and the mempool is exhausted. | ||
965 | * | ||
966 | * In order to avoid this scenario we allocate the pages under a mutex. | ||
967 | * | ||
968 | * In order to not degrade performance with excessive locking, we try | ||
969 | * non-blocking allocations without a mutex first but on failure we fallback | ||
970 | * to blocking allocations with a mutex. | ||
954 | */ | 971 | */ |
955 | static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size, | 972 | static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size) |
956 | unsigned *out_of_pages) | ||
957 | { | 973 | { |
958 | struct crypt_config *cc = io->cc; | 974 | struct crypt_config *cc = io->cc; |
959 | struct bio *clone; | 975 | struct bio *clone; |
960 | unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; | 976 | unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; |
961 | gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM; | 977 | gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM; |
962 | unsigned i, len; | 978 | unsigned i, len, remaining_size; |
963 | struct page *page; | 979 | struct page *page; |
980 | struct bio_vec *bvec; | ||
981 | |||
982 | retry: | ||
983 | if (unlikely(gfp_mask & __GFP_WAIT)) | ||
984 | mutex_lock(&cc->bio_alloc_lock); | ||
964 | 985 | ||
965 | clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs); | 986 | clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs); |
966 | if (!clone) | 987 | if (!clone) |
967 | return NULL; | 988 | goto return_clone; |
968 | 989 | ||
969 | clone_init(io, clone); | 990 | clone_init(io, clone); |
970 | *out_of_pages = 0; | 991 | |
992 | remaining_size = size; | ||
971 | 993 | ||
972 | for (i = 0; i < nr_iovecs; i++) { | 994 | for (i = 0; i < nr_iovecs; i++) { |
973 | page = mempool_alloc(cc->page_pool, gfp_mask); | 995 | page = mempool_alloc(cc->page_pool, gfp_mask); |
974 | if (!page) { | 996 | if (!page) { |
975 | *out_of_pages = 1; | 997 | crypt_free_buffer_pages(cc, clone); |
976 | break; | 998 | bio_put(clone); |
999 | gfp_mask |= __GFP_WAIT; | ||
1000 | goto retry; | ||
977 | } | 1001 | } |
978 | 1002 | ||
979 | /* | 1003 | len = (remaining_size > PAGE_SIZE) ? PAGE_SIZE : remaining_size; |
980 | * If additional pages cannot be allocated without waiting, | ||
981 | * return a partially-allocated bio. The caller will then try | ||
982 | * to allocate more bios while submitting this partial bio. | ||
983 | */ | ||
984 | gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT; | ||
985 | 1004 | ||
986 | len = (size > PAGE_SIZE) ? PAGE_SIZE : size; | 1005 | bvec = &clone->bi_io_vec[clone->bi_vcnt++]; |
1006 | bvec->bv_page = page; | ||
1007 | bvec->bv_len = len; | ||
1008 | bvec->bv_offset = 0; | ||
987 | 1009 | ||
988 | if (!bio_add_page(clone, page, len, 0)) { | 1010 | clone->bi_iter.bi_size += len; |
989 | mempool_free(page, cc->page_pool); | ||
990 | break; | ||
991 | } | ||
992 | 1011 | ||
993 | size -= len; | 1012 | remaining_size -= len; |
994 | } | 1013 | } |
995 | 1014 | ||
996 | if (!clone->bi_iter.bi_size) { | 1015 | return_clone: |
997 | bio_put(clone); | 1016 | if (unlikely(gfp_mask & __GFP_WAIT)) |
998 | return NULL; | 1017 | mutex_unlock(&cc->bio_alloc_lock); |
999 | } | ||
1000 | 1018 | ||
1001 | return clone; | 1019 | return clone; |
1002 | } | 1020 | } |
@@ -1020,7 +1038,6 @@ static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc, | |||
1020 | io->base_bio = bio; | 1038 | io->base_bio = bio; |
1021 | io->sector = sector; | 1039 | io->sector = sector; |
1022 | io->error = 0; | 1040 | io->error = 0; |
1023 | io->base_io = NULL; | ||
1024 | io->ctx.req = NULL; | 1041 | io->ctx.req = NULL; |
1025 | atomic_set(&io->io_pending, 0); | 1042 | atomic_set(&io->io_pending, 0); |
1026 | } | 1043 | } |
@@ -1033,13 +1050,11 @@ static void crypt_inc_pending(struct dm_crypt_io *io) | |||
1033 | /* | 1050 | /* |
1034 | * One of the bios was finished. Check for completion of | 1051 | * One of the bios was finished. Check for completion of |
1035 | * the whole request and correctly clean up the buffer. | 1052 | * the whole request and correctly clean up the buffer. |
1036 | * If base_io is set, wait for the last fragment to complete. | ||
1037 | */ | 1053 | */ |
1038 | static void crypt_dec_pending(struct dm_crypt_io *io) | 1054 | static void crypt_dec_pending(struct dm_crypt_io *io) |
1039 | { | 1055 | { |
1040 | struct crypt_config *cc = io->cc; | 1056 | struct crypt_config *cc = io->cc; |
1041 | struct bio *base_bio = io->base_bio; | 1057 | struct bio *base_bio = io->base_bio; |
1042 | struct dm_crypt_io *base_io = io->base_io; | ||
1043 | int error = io->error; | 1058 | int error = io->error; |
1044 | 1059 | ||
1045 | if (!atomic_dec_and_test(&io->io_pending)) | 1060 | if (!atomic_dec_and_test(&io->io_pending)) |
@@ -1047,16 +1062,8 @@ static void crypt_dec_pending(struct dm_crypt_io *io) | |||
1047 | 1062 | ||
1048 | if (io->ctx.req) | 1063 | if (io->ctx.req) |
1049 | crypt_free_req(cc, io->ctx.req, base_bio); | 1064 | crypt_free_req(cc, io->ctx.req, base_bio); |
1050 | if (io != dm_per_bio_data(base_bio, cc->per_bio_data_size)) | 1065 | |
1051 | mempool_free(io, cc->io_pool); | 1066 | bio_endio(base_bio, error); |
1052 | |||
1053 | if (likely(!base_io)) | ||
1054 | bio_endio(base_bio, error); | ||
1055 | else { | ||
1056 | if (error && !base_io->error) | ||
1057 | base_io->error = error; | ||
1058 | crypt_dec_pending(base_io); | ||
1059 | } | ||
1060 | } | 1067 | } |
1061 | 1068 | ||
1062 | /* | 1069 | /* |
@@ -1138,37 +1145,97 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) | |||
1138 | return 0; | 1145 | return 0; |
1139 | } | 1146 | } |
1140 | 1147 | ||
1148 | static void kcryptd_io_read_work(struct work_struct *work) | ||
1149 | { | ||
1150 | struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); | ||
1151 | |||
1152 | crypt_inc_pending(io); | ||
1153 | if (kcryptd_io_read(io, GFP_NOIO)) | ||
1154 | io->error = -ENOMEM; | ||
1155 | crypt_dec_pending(io); | ||
1156 | } | ||
1157 | |||
1158 | static void kcryptd_queue_read(struct dm_crypt_io *io) | ||
1159 | { | ||
1160 | struct crypt_config *cc = io->cc; | ||
1161 | |||
1162 | INIT_WORK(&io->work, kcryptd_io_read_work); | ||
1163 | queue_work(cc->io_queue, &io->work); | ||
1164 | } | ||
1165 | |||
1141 | static void kcryptd_io_write(struct dm_crypt_io *io) | 1166 | static void kcryptd_io_write(struct dm_crypt_io *io) |
1142 | { | 1167 | { |
1143 | struct bio *clone = io->ctx.bio_out; | 1168 | struct bio *clone = io->ctx.bio_out; |
1169 | |||
1144 | generic_make_request(clone); | 1170 | generic_make_request(clone); |
1145 | } | 1171 | } |
1146 | 1172 | ||
1147 | static void kcryptd_io(struct work_struct *work) | 1173 | #define crypt_io_from_node(node) rb_entry((node), struct dm_crypt_io, rb_node) |
1174 | |||
1175 | static int dmcrypt_write(void *data) | ||
1148 | { | 1176 | { |
1149 | struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); | 1177 | struct crypt_config *cc = data; |
1178 | struct dm_crypt_io *io; | ||
1150 | 1179 | ||
1151 | if (bio_data_dir(io->base_bio) == READ) { | 1180 | while (1) { |
1152 | crypt_inc_pending(io); | 1181 | struct rb_root write_tree; |
1153 | if (kcryptd_io_read(io, GFP_NOIO)) | 1182 | struct blk_plug plug; |
1154 | io->error = -ENOMEM; | ||
1155 | crypt_dec_pending(io); | ||
1156 | } else | ||
1157 | kcryptd_io_write(io); | ||
1158 | } | ||
1159 | 1183 | ||
1160 | static void kcryptd_queue_io(struct dm_crypt_io *io) | 1184 | DECLARE_WAITQUEUE(wait, current); |
1161 | { | ||
1162 | struct crypt_config *cc = io->cc; | ||
1163 | 1185 | ||
1164 | INIT_WORK(&io->work, kcryptd_io); | 1186 | spin_lock_irq(&cc->write_thread_wait.lock); |
1165 | queue_work(cc->io_queue, &io->work); | 1187 | continue_locked: |
1188 | |||
1189 | if (!RB_EMPTY_ROOT(&cc->write_tree)) | ||
1190 | goto pop_from_list; | ||
1191 | |||
1192 | __set_current_state(TASK_INTERRUPTIBLE); | ||
1193 | __add_wait_queue(&cc->write_thread_wait, &wait); | ||
1194 | |||
1195 | spin_unlock_irq(&cc->write_thread_wait.lock); | ||
1196 | |||
1197 | if (unlikely(kthread_should_stop())) { | ||
1198 | set_task_state(current, TASK_RUNNING); | ||
1199 | remove_wait_queue(&cc->write_thread_wait, &wait); | ||
1200 | break; | ||
1201 | } | ||
1202 | |||
1203 | schedule(); | ||
1204 | |||
1205 | set_task_state(current, TASK_RUNNING); | ||
1206 | spin_lock_irq(&cc->write_thread_wait.lock); | ||
1207 | __remove_wait_queue(&cc->write_thread_wait, &wait); | ||
1208 | goto continue_locked; | ||
1209 | |||
1210 | pop_from_list: | ||
1211 | write_tree = cc->write_tree; | ||
1212 | cc->write_tree = RB_ROOT; | ||
1213 | spin_unlock_irq(&cc->write_thread_wait.lock); | ||
1214 | |||
1215 | BUG_ON(rb_parent(write_tree.rb_node)); | ||
1216 | |||
1217 | /* | ||
1218 | * Note: we cannot walk the tree here with rb_next because | ||
1219 | * the structures may be freed when kcryptd_io_write is called. | ||
1220 | */ | ||
1221 | blk_start_plug(&plug); | ||
1222 | do { | ||
1223 | io = crypt_io_from_node(rb_first(&write_tree)); | ||
1224 | rb_erase(&io->rb_node, &write_tree); | ||
1225 | kcryptd_io_write(io); | ||
1226 | } while (!RB_EMPTY_ROOT(&write_tree)); | ||
1227 | blk_finish_plug(&plug); | ||
1228 | } | ||
1229 | return 0; | ||
1166 | } | 1230 | } |
1167 | 1231 | ||
1168 | static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) | 1232 | static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) |
1169 | { | 1233 | { |
1170 | struct bio *clone = io->ctx.bio_out; | 1234 | struct bio *clone = io->ctx.bio_out; |
1171 | struct crypt_config *cc = io->cc; | 1235 | struct crypt_config *cc = io->cc; |
1236 | unsigned long flags; | ||
1237 | sector_t sector; | ||
1238 | struct rb_node **rbp, *parent; | ||
1172 | 1239 | ||
1173 | if (unlikely(io->error < 0)) { | 1240 | if (unlikely(io->error < 0)) { |
1174 | crypt_free_buffer_pages(cc, clone); | 1241 | crypt_free_buffer_pages(cc, clone); |
@@ -1182,20 +1249,34 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) | |||
1182 | 1249 | ||
1183 | clone->bi_iter.bi_sector = cc->start + io->sector; | 1250 | clone->bi_iter.bi_sector = cc->start + io->sector; |
1184 | 1251 | ||
1185 | if (async) | 1252 | if (likely(!async) && test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags)) { |
1186 | kcryptd_queue_io(io); | ||
1187 | else | ||
1188 | generic_make_request(clone); | 1253 | generic_make_request(clone); |
1254 | return; | ||
1255 | } | ||
1256 | |||
1257 | spin_lock_irqsave(&cc->write_thread_wait.lock, flags); | ||
1258 | rbp = &cc->write_tree.rb_node; | ||
1259 | parent = NULL; | ||
1260 | sector = io->sector; | ||
1261 | while (*rbp) { | ||
1262 | parent = *rbp; | ||
1263 | if (sector < crypt_io_from_node(parent)->sector) | ||
1264 | rbp = &(*rbp)->rb_left; | ||
1265 | else | ||
1266 | rbp = &(*rbp)->rb_right; | ||
1267 | } | ||
1268 | rb_link_node(&io->rb_node, parent, rbp); | ||
1269 | rb_insert_color(&io->rb_node, &cc->write_tree); | ||
1270 | |||
1271 | wake_up_locked(&cc->write_thread_wait); | ||
1272 | spin_unlock_irqrestore(&cc->write_thread_wait.lock, flags); | ||
1189 | } | 1273 | } |
1190 | 1274 | ||
1191 | static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) | 1275 | static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) |
1192 | { | 1276 | { |
1193 | struct crypt_config *cc = io->cc; | 1277 | struct crypt_config *cc = io->cc; |
1194 | struct bio *clone; | 1278 | struct bio *clone; |
1195 | struct dm_crypt_io *new_io; | ||
1196 | int crypt_finished; | 1279 | int crypt_finished; |
1197 | unsigned out_of_pages = 0; | ||
1198 | unsigned remaining = io->base_bio->bi_iter.bi_size; | ||
1199 | sector_t sector = io->sector; | 1280 | sector_t sector = io->sector; |
1200 | int r; | 1281 | int r; |
1201 | 1282 | ||
@@ -1205,80 +1286,30 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) | |||
1205 | crypt_inc_pending(io); | 1286 | crypt_inc_pending(io); |
1206 | crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, sector); | 1287 | crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, sector); |
1207 | 1288 | ||
1208 | /* | 1289 | clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size); |
1209 | * The allocated buffers can be smaller than the whole bio, | 1290 | if (unlikely(!clone)) { |
1210 | * so repeat the whole process until all the data can be handled. | 1291 | io->error = -EIO; |
1211 | */ | 1292 | goto dec; |
1212 | while (remaining) { | 1293 | } |
1213 | clone = crypt_alloc_buffer(io, remaining, &out_of_pages); | ||
1214 | if (unlikely(!clone)) { | ||
1215 | io->error = -ENOMEM; | ||
1216 | break; | ||
1217 | } | ||
1218 | |||
1219 | io->ctx.bio_out = clone; | ||
1220 | io->ctx.iter_out = clone->bi_iter; | ||
1221 | |||
1222 | remaining -= clone->bi_iter.bi_size; | ||
1223 | sector += bio_sectors(clone); | ||
1224 | |||
1225 | crypt_inc_pending(io); | ||
1226 | |||
1227 | r = crypt_convert(cc, &io->ctx); | ||
1228 | if (r < 0) | ||
1229 | io->error = -EIO; | ||
1230 | |||
1231 | crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending); | ||
1232 | |||
1233 | /* Encryption was already finished, submit io now */ | ||
1234 | if (crypt_finished) { | ||
1235 | kcryptd_crypt_write_io_submit(io, 0); | ||
1236 | |||
1237 | /* | ||
1238 | * If there was an error, do not try next fragments. | ||
1239 | * For async, error is processed in async handler. | ||
1240 | */ | ||
1241 | if (unlikely(r < 0)) | ||
1242 | break; | ||
1243 | 1294 | ||
1244 | io->sector = sector; | 1295 | io->ctx.bio_out = clone; |
1245 | } | 1296 | io->ctx.iter_out = clone->bi_iter; |
1246 | 1297 | ||
1247 | /* | 1298 | sector += bio_sectors(clone); |
1248 | * Out of memory -> run queues | ||
1249 | * But don't wait if split was due to the io size restriction | ||
1250 | */ | ||
1251 | if (unlikely(out_of_pages)) | ||
1252 | congestion_wait(BLK_RW_ASYNC, HZ/100); | ||
1253 | 1299 | ||
1254 | /* | 1300 | crypt_inc_pending(io); |
1255 | * With async crypto it is unsafe to share the crypto context | 1301 | r = crypt_convert(cc, &io->ctx); |
1256 | * between fragments, so switch to a new dm_crypt_io structure. | 1302 | if (r) |
1257 | */ | 1303 | io->error = -EIO; |
1258 | if (unlikely(!crypt_finished && remaining)) { | 1304 | crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending); |
1259 | new_io = mempool_alloc(cc->io_pool, GFP_NOIO); | ||
1260 | crypt_io_init(new_io, io->cc, io->base_bio, sector); | ||
1261 | crypt_inc_pending(new_io); | ||
1262 | crypt_convert_init(cc, &new_io->ctx, NULL, | ||
1263 | io->base_bio, sector); | ||
1264 | new_io->ctx.iter_in = io->ctx.iter_in; | ||
1265 | |||
1266 | /* | ||
1267 | * Fragments after the first use the base_io | ||
1268 | * pending count. | ||
1269 | */ | ||
1270 | if (!io->base_io) | ||
1271 | new_io->base_io = io; | ||
1272 | else { | ||
1273 | new_io->base_io = io->base_io; | ||
1274 | crypt_inc_pending(io->base_io); | ||
1275 | crypt_dec_pending(io); | ||
1276 | } | ||
1277 | 1305 | ||
1278 | io = new_io; | 1306 | /* Encryption was already finished, submit io now */ |
1279 | } | 1307 | if (crypt_finished) { |
1308 | kcryptd_crypt_write_io_submit(io, 0); | ||
1309 | io->sector = sector; | ||
1280 | } | 1310 | } |
1281 | 1311 | ||
1312 | dec: | ||
1282 | crypt_dec_pending(io); | 1313 | crypt_dec_pending(io); |
1283 | } | 1314 | } |
1284 | 1315 | ||
@@ -1481,6 +1512,9 @@ static void crypt_dtr(struct dm_target *ti) | |||
1481 | if (!cc) | 1512 | if (!cc) |
1482 | return; | 1513 | return; |
1483 | 1514 | ||
1515 | if (cc->write_thread) | ||
1516 | kthread_stop(cc->write_thread); | ||
1517 | |||
1484 | if (cc->io_queue) | 1518 | if (cc->io_queue) |
1485 | destroy_workqueue(cc->io_queue); | 1519 | destroy_workqueue(cc->io_queue); |
1486 | if (cc->crypt_queue) | 1520 | if (cc->crypt_queue) |
@@ -1495,8 +1529,6 @@ static void crypt_dtr(struct dm_target *ti) | |||
1495 | mempool_destroy(cc->page_pool); | 1529 | mempool_destroy(cc->page_pool); |
1496 | if (cc->req_pool) | 1530 | if (cc->req_pool) |
1497 | mempool_destroy(cc->req_pool); | 1531 | mempool_destroy(cc->req_pool); |
1498 | if (cc->io_pool) | ||
1499 | mempool_destroy(cc->io_pool); | ||
1500 | 1532 | ||
1501 | if (cc->iv_gen_ops && cc->iv_gen_ops->dtr) | 1533 | if (cc->iv_gen_ops && cc->iv_gen_ops->dtr) |
1502 | cc->iv_gen_ops->dtr(cc); | 1534 | cc->iv_gen_ops->dtr(cc); |
@@ -1688,7 +1720,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1688 | char dummy; | 1720 | char dummy; |
1689 | 1721 | ||
1690 | static struct dm_arg _args[] = { | 1722 | static struct dm_arg _args[] = { |
1691 | {0, 1, "Invalid number of feature args"}, | 1723 | {0, 3, "Invalid number of feature args"}, |
1692 | }; | 1724 | }; |
1693 | 1725 | ||
1694 | if (argc < 5) { | 1726 | if (argc < 5) { |
@@ -1710,13 +1742,6 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1710 | if (ret < 0) | 1742 | if (ret < 0) |
1711 | goto bad; | 1743 | goto bad; |
1712 | 1744 | ||
1713 | ret = -ENOMEM; | ||
1714 | cc->io_pool = mempool_create_slab_pool(MIN_IOS, _crypt_io_pool); | ||
1715 | if (!cc->io_pool) { | ||
1716 | ti->error = "Cannot allocate crypt io mempool"; | ||
1717 | goto bad; | ||
1718 | } | ||
1719 | |||
1720 | cc->dmreq_start = sizeof(struct ablkcipher_request); | 1745 | cc->dmreq_start = sizeof(struct ablkcipher_request); |
1721 | cc->dmreq_start += crypto_ablkcipher_reqsize(any_tfm(cc)); | 1746 | cc->dmreq_start += crypto_ablkcipher_reqsize(any_tfm(cc)); |
1722 | cc->dmreq_start = ALIGN(cc->dmreq_start, __alignof__(struct dm_crypt_request)); | 1747 | cc->dmreq_start = ALIGN(cc->dmreq_start, __alignof__(struct dm_crypt_request)); |
@@ -1734,6 +1759,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1734 | iv_size_padding = crypto_ablkcipher_alignmask(any_tfm(cc)); | 1759 | iv_size_padding = crypto_ablkcipher_alignmask(any_tfm(cc)); |
1735 | } | 1760 | } |
1736 | 1761 | ||
1762 | ret = -ENOMEM; | ||
1737 | cc->req_pool = mempool_create_kmalloc_pool(MIN_IOS, cc->dmreq_start + | 1763 | cc->req_pool = mempool_create_kmalloc_pool(MIN_IOS, cc->dmreq_start + |
1738 | sizeof(struct dm_crypt_request) + iv_size_padding + cc->iv_size); | 1764 | sizeof(struct dm_crypt_request) + iv_size_padding + cc->iv_size); |
1739 | if (!cc->req_pool) { | 1765 | if (!cc->req_pool) { |
@@ -1746,7 +1772,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1746 | sizeof(struct dm_crypt_request) + iv_size_padding + cc->iv_size, | 1772 | sizeof(struct dm_crypt_request) + iv_size_padding + cc->iv_size, |
1747 | ARCH_KMALLOC_MINALIGN); | 1773 | ARCH_KMALLOC_MINALIGN); |
1748 | 1774 | ||
1749 | cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0); | 1775 | cc->page_pool = mempool_create_page_pool(BIO_MAX_PAGES, 0); |
1750 | if (!cc->page_pool) { | 1776 | if (!cc->page_pool) { |
1751 | ti->error = "Cannot allocate page mempool"; | 1777 | ti->error = "Cannot allocate page mempool"; |
1752 | goto bad; | 1778 | goto bad; |
@@ -1758,6 +1784,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1758 | goto bad; | 1784 | goto bad; |
1759 | } | 1785 | } |
1760 | 1786 | ||
1787 | mutex_init(&cc->bio_alloc_lock); | ||
1788 | |||
1761 | ret = -EINVAL; | 1789 | ret = -EINVAL; |
1762 | if (sscanf(argv[2], "%llu%c", &tmpll, &dummy) != 1) { | 1790 | if (sscanf(argv[2], "%llu%c", &tmpll, &dummy) != 1) { |
1763 | ti->error = "Invalid iv_offset sector"; | 1791 | ti->error = "Invalid iv_offset sector"; |
@@ -1788,15 +1816,26 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1788 | if (ret) | 1816 | if (ret) |
1789 | goto bad; | 1817 | goto bad; |
1790 | 1818 | ||
1791 | opt_string = dm_shift_arg(&as); | 1819 | while (opt_params--) { |
1820 | opt_string = dm_shift_arg(&as); | ||
1821 | if (!opt_string) { | ||
1822 | ti->error = "Not enough feature arguments"; | ||
1823 | goto bad; | ||
1824 | } | ||
1792 | 1825 | ||
1793 | if (opt_params == 1 && opt_string && | 1826 | if (!strcasecmp(opt_string, "allow_discards")) |
1794 | !strcasecmp(opt_string, "allow_discards")) | 1827 | ti->num_discard_bios = 1; |
1795 | ti->num_discard_bios = 1; | 1828 | |
1796 | else if (opt_params) { | 1829 | else if (!strcasecmp(opt_string, "same_cpu_crypt")) |
1797 | ret = -EINVAL; | 1830 | set_bit(DM_CRYPT_SAME_CPU, &cc->flags); |
1798 | ti->error = "Invalid feature arguments"; | 1831 | |
1799 | goto bad; | 1832 | else if (!strcasecmp(opt_string, "submit_from_crypt_cpus")) |
1833 | set_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags); | ||
1834 | |||
1835 | else { | ||
1836 | ti->error = "Invalid feature arguments"; | ||
1837 | goto bad; | ||
1838 | } | ||
1800 | } | 1839 | } |
1801 | } | 1840 | } |
1802 | 1841 | ||
@@ -1807,13 +1846,28 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1807 | goto bad; | 1846 | goto bad; |
1808 | } | 1847 | } |
1809 | 1848 | ||
1810 | cc->crypt_queue = alloc_workqueue("kcryptd", | 1849 | if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags)) |
1811 | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1); | 1850 | cc->crypt_queue = alloc_workqueue("kcryptd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1); |
1851 | else | ||
1852 | cc->crypt_queue = alloc_workqueue("kcryptd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND, | ||
1853 | num_online_cpus()); | ||
1812 | if (!cc->crypt_queue) { | 1854 | if (!cc->crypt_queue) { |
1813 | ti->error = "Couldn't create kcryptd queue"; | 1855 | ti->error = "Couldn't create kcryptd queue"; |
1814 | goto bad; | 1856 | goto bad; |
1815 | } | 1857 | } |
1816 | 1858 | ||
1859 | init_waitqueue_head(&cc->write_thread_wait); | ||
1860 | cc->write_tree = RB_ROOT; | ||
1861 | |||
1862 | cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write"); | ||
1863 | if (IS_ERR(cc->write_thread)) { | ||
1864 | ret = PTR_ERR(cc->write_thread); | ||
1865 | cc->write_thread = NULL; | ||
1866 | ti->error = "Couldn't spawn write thread"; | ||
1867 | goto bad; | ||
1868 | } | ||
1869 | wake_up_process(cc->write_thread); | ||
1870 | |||
1817 | ti->num_flush_bios = 1; | 1871 | ti->num_flush_bios = 1; |
1818 | ti->discard_zeroes_data_unsupported = true; | 1872 | ti->discard_zeroes_data_unsupported = true; |
1819 | 1873 | ||
@@ -1848,7 +1902,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) | |||
1848 | 1902 | ||
1849 | if (bio_data_dir(io->base_bio) == READ) { | 1903 | if (bio_data_dir(io->base_bio) == READ) { |
1850 | if (kcryptd_io_read(io, GFP_NOWAIT)) | 1904 | if (kcryptd_io_read(io, GFP_NOWAIT)) |
1851 | kcryptd_queue_io(io); | 1905 | kcryptd_queue_read(io); |
1852 | } else | 1906 | } else |
1853 | kcryptd_queue_crypt(io); | 1907 | kcryptd_queue_crypt(io); |
1854 | 1908 | ||
@@ -1860,6 +1914,7 @@ static void crypt_status(struct dm_target *ti, status_type_t type, | |||
1860 | { | 1914 | { |
1861 | struct crypt_config *cc = ti->private; | 1915 | struct crypt_config *cc = ti->private; |
1862 | unsigned i, sz = 0; | 1916 | unsigned i, sz = 0; |
1917 | int num_feature_args = 0; | ||
1863 | 1918 | ||
1864 | switch (type) { | 1919 | switch (type) { |
1865 | case STATUSTYPE_INFO: | 1920 | case STATUSTYPE_INFO: |
@@ -1878,8 +1933,18 @@ static void crypt_status(struct dm_target *ti, status_type_t type, | |||
1878 | DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset, | 1933 | DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset, |
1879 | cc->dev->name, (unsigned long long)cc->start); | 1934 | cc->dev->name, (unsigned long long)cc->start); |
1880 | 1935 | ||
1881 | if (ti->num_discard_bios) | 1936 | num_feature_args += !!ti->num_discard_bios; |
1882 | DMEMIT(" 1 allow_discards"); | 1937 | num_feature_args += test_bit(DM_CRYPT_SAME_CPU, &cc->flags); |
1938 | num_feature_args += test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags); | ||
1939 | if (num_feature_args) { | ||
1940 | DMEMIT(" %d", num_feature_args); | ||
1941 | if (ti->num_discard_bios) | ||
1942 | DMEMIT(" allow_discards"); | ||
1943 | if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags)) | ||
1944 | DMEMIT(" same_cpu_crypt"); | ||
1945 | if (test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags)) | ||
1946 | DMEMIT(" submit_from_crypt_cpus"); | ||
1947 | } | ||
1883 | 1948 | ||
1884 | break; | 1949 | break; |
1885 | } | 1950 | } |
@@ -1976,7 +2041,7 @@ static int crypt_iterate_devices(struct dm_target *ti, | |||
1976 | 2041 | ||
1977 | static struct target_type crypt_target = { | 2042 | static struct target_type crypt_target = { |
1978 | .name = "crypt", | 2043 | .name = "crypt", |
1979 | .version = {1, 13, 0}, | 2044 | .version = {1, 14, 0}, |
1980 | .module = THIS_MODULE, | 2045 | .module = THIS_MODULE, |
1981 | .ctr = crypt_ctr, | 2046 | .ctr = crypt_ctr, |
1982 | .dtr = crypt_dtr, | 2047 | .dtr = crypt_dtr, |
@@ -1994,15 +2059,9 @@ static int __init dm_crypt_init(void) | |||
1994 | { | 2059 | { |
1995 | int r; | 2060 | int r; |
1996 | 2061 | ||
1997 | _crypt_io_pool = KMEM_CACHE(dm_crypt_io, 0); | ||
1998 | if (!_crypt_io_pool) | ||
1999 | return -ENOMEM; | ||
2000 | |||
2001 | r = dm_register_target(&crypt_target); | 2062 | r = dm_register_target(&crypt_target); |
2002 | if (r < 0) { | 2063 | if (r < 0) |
2003 | DMERR("register failed %d", r); | 2064 | DMERR("register failed %d", r); |
2004 | kmem_cache_destroy(_crypt_io_pool); | ||
2005 | } | ||
2006 | 2065 | ||
2007 | return r; | 2066 | return r; |
2008 | } | 2067 | } |
@@ -2010,7 +2069,6 @@ static int __init dm_crypt_init(void) | |||
2010 | static void __exit dm_crypt_exit(void) | 2069 | static void __exit dm_crypt_exit(void) |
2011 | { | 2070 | { |
2012 | dm_unregister_target(&crypt_target); | 2071 | dm_unregister_target(&crypt_target); |
2013 | kmem_cache_destroy(_crypt_io_pool); | ||
2014 | } | 2072 | } |
2015 | 2073 | ||
2016 | module_init(dm_crypt_init); | 2074 | module_init(dm_crypt_init); |
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index c09359db3a90..37de0173b6d2 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c | |||
@@ -290,6 +290,12 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, | |||
290 | unsigned short logical_block_size = queue_logical_block_size(q); | 290 | unsigned short logical_block_size = queue_logical_block_size(q); |
291 | sector_t num_sectors; | 291 | sector_t num_sectors; |
292 | 292 | ||
293 | /* Reject unsupported discard requests */ | ||
294 | if ((rw & REQ_DISCARD) && !blk_queue_discard(q)) { | ||
295 | dec_count(io, region, -EOPNOTSUPP); | ||
296 | return; | ||
297 | } | ||
298 | |||
293 | /* | 299 | /* |
294 | * where->count may be zero if rw holds a flush and we need to | 300 | * where->count may be zero if rw holds a flush and we need to |
295 | * send a zero-sized flush. | 301 | * send a zero-sized flush. |
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 7dfdb5c746d6..089d62751f7f 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c | |||
@@ -604,6 +604,15 @@ static void write_callback(unsigned long error, void *context) | |||
604 | return; | 604 | return; |
605 | } | 605 | } |
606 | 606 | ||
607 | /* | ||
608 | * If the bio is discard, return an error, but do not | ||
609 | * degrade the array. | ||
610 | */ | ||
611 | if (bio->bi_rw & REQ_DISCARD) { | ||
612 | bio_endio(bio, -EOPNOTSUPP); | ||
613 | return; | ||
614 | } | ||
615 | |||
607 | for (i = 0; i < ms->nr_mirrors; i++) | 616 | for (i = 0; i < ms->nr_mirrors; i++) |
608 | if (test_bit(i, &error)) | 617 | if (test_bit(i, &error)) |
609 | fail_mirror(ms->mirror + i, DM_RAID1_WRITE_ERROR); | 618 | fail_mirror(ms->mirror + i, DM_RAID1_WRITE_ERROR); |
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 864b03f47727..8b204ae216ab 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c | |||
@@ -1432,8 +1432,6 @@ out: | |||
1432 | full_bio->bi_private = pe->full_bio_private; | 1432 | full_bio->bi_private = pe->full_bio_private; |
1433 | atomic_inc(&full_bio->bi_remaining); | 1433 | atomic_inc(&full_bio->bi_remaining); |
1434 | } | 1434 | } |
1435 | free_pending_exception(pe); | ||
1436 | |||
1437 | increment_pending_exceptions_done_count(); | 1435 | increment_pending_exceptions_done_count(); |
1438 | 1436 | ||
1439 | up_write(&s->lock); | 1437 | up_write(&s->lock); |
@@ -1450,6 +1448,8 @@ out: | |||
1450 | } | 1448 | } |
1451 | 1449 | ||
1452 | retry_origin_bios(s, origin_bios); | 1450 | retry_origin_bios(s, origin_bios); |
1451 | |||
1452 | free_pending_exception(pe); | ||
1453 | } | 1453 | } |
1454 | 1454 | ||
1455 | static void commit_callback(void *context, int success) | 1455 | static void commit_callback(void *context, int success) |
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ec1444f49de1..73f28802dc7a 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -2571,7 +2571,7 @@ int dm_setup_md_queue(struct mapped_device *md) | |||
2571 | return 0; | 2571 | return 0; |
2572 | } | 2572 | } |
2573 | 2573 | ||
2574 | static struct mapped_device *dm_find_md(dev_t dev) | 2574 | struct mapped_device *dm_get_md(dev_t dev) |
2575 | { | 2575 | { |
2576 | struct mapped_device *md; | 2576 | struct mapped_device *md; |
2577 | unsigned minor = MINOR(dev); | 2577 | unsigned minor = MINOR(dev); |
@@ -2582,12 +2582,15 @@ static struct mapped_device *dm_find_md(dev_t dev) | |||
2582 | spin_lock(&_minor_lock); | 2582 | spin_lock(&_minor_lock); |
2583 | 2583 | ||
2584 | md = idr_find(&_minor_idr, minor); | 2584 | md = idr_find(&_minor_idr, minor); |
2585 | if (md && (md == MINOR_ALLOCED || | 2585 | if (md) { |
2586 | (MINOR(disk_devt(dm_disk(md))) != minor) || | 2586 | if ((md == MINOR_ALLOCED || |
2587 | dm_deleting_md(md) || | 2587 | (MINOR(disk_devt(dm_disk(md))) != minor) || |
2588 | test_bit(DMF_FREEING, &md->flags))) { | 2588 | dm_deleting_md(md) || |
2589 | md = NULL; | 2589 | test_bit(DMF_FREEING, &md->flags))) { |
2590 | goto out; | 2590 | md = NULL; |
2591 | goto out; | ||
2592 | } | ||
2593 | dm_get(md); | ||
2591 | } | 2594 | } |
2592 | 2595 | ||
2593 | out: | 2596 | out: |
@@ -2595,16 +2598,6 @@ out: | |||
2595 | 2598 | ||
2596 | return md; | 2599 | return md; |
2597 | } | 2600 | } |
2598 | |||
2599 | struct mapped_device *dm_get_md(dev_t dev) | ||
2600 | { | ||
2601 | struct mapped_device *md = dm_find_md(dev); | ||
2602 | |||
2603 | if (md) | ||
2604 | dm_get(md); | ||
2605 | |||
2606 | return md; | ||
2607 | } | ||
2608 | EXPORT_SYMBOL_GPL(dm_get_md); | 2601 | EXPORT_SYMBOL_GPL(dm_get_md); |
2609 | 2602 | ||
2610 | void *dm_get_mdptr(struct mapped_device *md) | 2603 | void *dm_get_mdptr(struct mapped_device *md) |
diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c index cfbf9617e465..ebb280a14325 100644 --- a/drivers/md/persistent-data/dm-space-map-disk.c +++ b/drivers/md/persistent-data/dm-space-map-disk.c | |||
@@ -78,7 +78,9 @@ static int sm_disk_count_is_more_than_one(struct dm_space_map *sm, dm_block_t b, | |||
78 | if (r) | 78 | if (r) |
79 | return r; | 79 | return r; |
80 | 80 | ||
81 | return count > 1; | 81 | *result = count > 1; |
82 | |||
83 | return 0; | ||
82 | } | 84 | } |
83 | 85 | ||
84 | static int sm_disk_set_count(struct dm_space_map *sm, dm_block_t b, | 86 | static int sm_disk_set_count(struct dm_space_map *sm, dm_block_t b, |