diff options
author | Mikulas Patocka <mpatocka@redhat.com> | 2015-02-13 08:23:52 -0500 |
---|---|---|
committer | Mike Snitzer <snitzer@redhat.com> | 2015-02-16 11:11:12 -0500 |
commit | cf2f1abfbd0dba701f7f16ef619e4d2485de3366 (patch) | |
tree | 77faadd11b83b4faa5460be407572167e798b7a9 /drivers/md | |
parent | f3396c58fd8442850e759843457d78b6ec3a9589 (diff) |
dm crypt: don't allocate pages for a partial request
Change crypt_alloc_buffer so that it only ever allocates pages for a
full request. This is a prerequisite for the commit "dm crypt: offload
writes to thread".
This change simplifies the dm-crypt code at the expense of reduced
throughput in low memory conditions (where allocation for a partial
request is most useful).
Note: the next commit ("dm crypt: avoid deadlock in mempools") is needed
to fix a theoretical deadlock.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/dm-crypt.c | 139 |
1 files changed, 30 insertions, 109 deletions
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 5063c901c0f5..6199245ea6a6 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c | |||
@@ -58,7 +58,6 @@ struct dm_crypt_io { | |||
58 | atomic_t io_pending; | 58 | atomic_t io_pending; |
59 | int error; | 59 | int error; |
60 | sector_t sector; | 60 | sector_t sector; |
61 | struct dm_crypt_io *base_io; | ||
62 | } CRYPTO_MINALIGN_ATTR; | 61 | } CRYPTO_MINALIGN_ATTR; |
63 | 62 | ||
64 | struct dm_crypt_request { | 63 | struct dm_crypt_request { |
@@ -172,7 +171,6 @@ struct crypt_config { | |||
172 | }; | 171 | }; |
173 | 172 | ||
174 | #define MIN_IOS 16 | 173 | #define MIN_IOS 16 |
175 | #define MIN_POOL_PAGES 32 | ||
176 | 174 | ||
177 | static struct kmem_cache *_crypt_io_pool; | 175 | static struct kmem_cache *_crypt_io_pool; |
178 | 176 | ||
@@ -946,14 +944,13 @@ static int crypt_convert(struct crypt_config *cc, | |||
946 | return 0; | 944 | return 0; |
947 | } | 945 | } |
948 | 946 | ||
947 | static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone); | ||
948 | |||
949 | /* | 949 | /* |
950 | * Generate a new unfragmented bio with the given size | 950 | * Generate a new unfragmented bio with the given size |
951 | * This should never violate the device limitations | 951 | * This should never violate the device limitations |
952 | * May return a smaller bio when running out of pages, indicated by | ||
953 | * *out_of_pages set to 1. | ||
954 | */ | 952 | */ |
955 | static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size, | 953 | static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size) |
956 | unsigned *out_of_pages) | ||
957 | { | 954 | { |
958 | struct crypt_config *cc = io->cc; | 955 | struct crypt_config *cc = io->cc; |
959 | struct bio *clone; | 956 | struct bio *clone; |
@@ -961,41 +958,27 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size, | |||
961 | gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM; | 958 | gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM; |
962 | unsigned i, len; | 959 | unsigned i, len; |
963 | struct page *page; | 960 | struct page *page; |
961 | struct bio_vec *bvec; | ||
964 | 962 | ||
965 | clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs); | 963 | clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs); |
966 | if (!clone) | 964 | if (!clone) |
967 | return NULL; | 965 | return NULL; |
968 | 966 | ||
969 | clone_init(io, clone); | 967 | clone_init(io, clone); |
970 | *out_of_pages = 0; | ||
971 | 968 | ||
972 | for (i = 0; i < nr_iovecs; i++) { | 969 | for (i = 0; i < nr_iovecs; i++) { |
973 | page = mempool_alloc(cc->page_pool, gfp_mask); | 970 | page = mempool_alloc(cc->page_pool, gfp_mask); |
974 | if (!page) { | ||
975 | *out_of_pages = 1; | ||
976 | break; | ||
977 | } | ||
978 | |||
979 | /* | ||
980 | * If additional pages cannot be allocated without waiting, | ||
981 | * return a partially-allocated bio. The caller will then try | ||
982 | * to allocate more bios while submitting this partial bio. | ||
983 | */ | ||
984 | gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT; | ||
985 | 971 | ||
986 | len = (size > PAGE_SIZE) ? PAGE_SIZE : size; | 972 | len = (size > PAGE_SIZE) ? PAGE_SIZE : size; |
987 | 973 | ||
988 | if (!bio_add_page(clone, page, len, 0)) { | 974 | bvec = &clone->bi_io_vec[clone->bi_vcnt++]; |
989 | mempool_free(page, cc->page_pool); | 975 | bvec->bv_page = page; |
990 | break; | 976 | bvec->bv_len = len; |
991 | } | 977 | bvec->bv_offset = 0; |
992 | 978 | ||
993 | size -= len; | 979 | clone->bi_iter.bi_size += len; |
994 | } | ||
995 | 980 | ||
996 | if (!clone->bi_iter.bi_size) { | 981 | size -= len; |
997 | bio_put(clone); | ||
998 | return NULL; | ||
999 | } | 982 | } |
1000 | 983 | ||
1001 | return clone; | 984 | return clone; |
@@ -1020,7 +1003,6 @@ static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc, | |||
1020 | io->base_bio = bio; | 1003 | io->base_bio = bio; |
1021 | io->sector = sector; | 1004 | io->sector = sector; |
1022 | io->error = 0; | 1005 | io->error = 0; |
1023 | io->base_io = NULL; | ||
1024 | io->ctx.req = NULL; | 1006 | io->ctx.req = NULL; |
1025 | atomic_set(&io->io_pending, 0); | 1007 | atomic_set(&io->io_pending, 0); |
1026 | } | 1008 | } |
@@ -1033,13 +1015,11 @@ static void crypt_inc_pending(struct dm_crypt_io *io) | |||
1033 | /* | 1015 | /* |
1034 | * One of the bios was finished. Check for completion of | 1016 | * One of the bios was finished. Check for completion of |
1035 | * the whole request and correctly clean up the buffer. | 1017 | * the whole request and correctly clean up the buffer. |
1036 | * If base_io is set, wait for the last fragment to complete. | ||
1037 | */ | 1018 | */ |
1038 | static void crypt_dec_pending(struct dm_crypt_io *io) | 1019 | static void crypt_dec_pending(struct dm_crypt_io *io) |
1039 | { | 1020 | { |
1040 | struct crypt_config *cc = io->cc; | 1021 | struct crypt_config *cc = io->cc; |
1041 | struct bio *base_bio = io->base_bio; | 1022 | struct bio *base_bio = io->base_bio; |
1042 | struct dm_crypt_io *base_io = io->base_io; | ||
1043 | int error = io->error; | 1023 | int error = io->error; |
1044 | 1024 | ||
1045 | if (!atomic_dec_and_test(&io->io_pending)) | 1025 | if (!atomic_dec_and_test(&io->io_pending)) |
@@ -1050,13 +1030,7 @@ static void crypt_dec_pending(struct dm_crypt_io *io) | |||
1050 | if (io != dm_per_bio_data(base_bio, cc->per_bio_data_size)) | 1030 | if (io != dm_per_bio_data(base_bio, cc->per_bio_data_size)) |
1051 | mempool_free(io, cc->io_pool); | 1031 | mempool_free(io, cc->io_pool); |
1052 | 1032 | ||
1053 | if (likely(!base_io)) | 1033 | bio_endio(base_bio, error); |
1054 | bio_endio(base_bio, error); | ||
1055 | else { | ||
1056 | if (error && !base_io->error) | ||
1057 | base_io->error = error; | ||
1058 | crypt_dec_pending(base_io); | ||
1059 | } | ||
1060 | } | 1034 | } |
1061 | 1035 | ||
1062 | /* | 1036 | /* |
@@ -1192,10 +1166,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) | |||
1192 | { | 1166 | { |
1193 | struct crypt_config *cc = io->cc; | 1167 | struct crypt_config *cc = io->cc; |
1194 | struct bio *clone; | 1168 | struct bio *clone; |
1195 | struct dm_crypt_io *new_io; | ||
1196 | int crypt_finished; | 1169 | int crypt_finished; |
1197 | unsigned out_of_pages = 0; | ||
1198 | unsigned remaining = io->base_bio->bi_iter.bi_size; | ||
1199 | sector_t sector = io->sector; | 1170 | sector_t sector = io->sector; |
1200 | int r; | 1171 | int r; |
1201 | 1172 | ||
@@ -1205,80 +1176,30 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) | |||
1205 | crypt_inc_pending(io); | 1176 | crypt_inc_pending(io); |
1206 | crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, sector); | 1177 | crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, sector); |
1207 | 1178 | ||
1208 | /* | 1179 | clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size); |
1209 | * The allocated buffers can be smaller than the whole bio, | 1180 | if (unlikely(!clone)) { |
1210 | * so repeat the whole process until all the data can be handled. | 1181 | io->error = -EIO; |
1211 | */ | 1182 | goto dec; |
1212 | while (remaining) { | 1183 | } |
1213 | clone = crypt_alloc_buffer(io, remaining, &out_of_pages); | ||
1214 | if (unlikely(!clone)) { | ||
1215 | io->error = -ENOMEM; | ||
1216 | break; | ||
1217 | } | ||
1218 | |||
1219 | io->ctx.bio_out = clone; | ||
1220 | io->ctx.iter_out = clone->bi_iter; | ||
1221 | |||
1222 | remaining -= clone->bi_iter.bi_size; | ||
1223 | sector += bio_sectors(clone); | ||
1224 | |||
1225 | crypt_inc_pending(io); | ||
1226 | |||
1227 | r = crypt_convert(cc, &io->ctx); | ||
1228 | if (r < 0) | ||
1229 | io->error = -EIO; | ||
1230 | |||
1231 | crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending); | ||
1232 | |||
1233 | /* Encryption was already finished, submit io now */ | ||
1234 | if (crypt_finished) { | ||
1235 | kcryptd_crypt_write_io_submit(io, 0); | ||
1236 | |||
1237 | /* | ||
1238 | * If there was an error, do not try next fragments. | ||
1239 | * For async, error is processed in async handler. | ||
1240 | */ | ||
1241 | if (unlikely(r < 0)) | ||
1242 | break; | ||
1243 | 1184 | ||
1244 | io->sector = sector; | 1185 | io->ctx.bio_out = clone; |
1245 | } | 1186 | io->ctx.iter_out = clone->bi_iter; |
1246 | 1187 | ||
1247 | /* | 1188 | sector += bio_sectors(clone); |
1248 | * Out of memory -> run queues | ||
1249 | * But don't wait if split was due to the io size restriction | ||
1250 | */ | ||
1251 | if (unlikely(out_of_pages)) | ||
1252 | congestion_wait(BLK_RW_ASYNC, HZ/100); | ||
1253 | 1189 | ||
1254 | /* | 1190 | crypt_inc_pending(io); |
1255 | * With async crypto it is unsafe to share the crypto context | 1191 | r = crypt_convert(cc, &io->ctx); |
1256 | * between fragments, so switch to a new dm_crypt_io structure. | 1192 | if (r) |
1257 | */ | 1193 | io->error = -EIO; |
1258 | if (unlikely(!crypt_finished && remaining)) { | 1194 | crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending); |
1259 | new_io = mempool_alloc(cc->io_pool, GFP_NOIO); | ||
1260 | crypt_io_init(new_io, io->cc, io->base_bio, sector); | ||
1261 | crypt_inc_pending(new_io); | ||
1262 | crypt_convert_init(cc, &new_io->ctx, NULL, | ||
1263 | io->base_bio, sector); | ||
1264 | new_io->ctx.iter_in = io->ctx.iter_in; | ||
1265 | |||
1266 | /* | ||
1267 | * Fragments after the first use the base_io | ||
1268 | * pending count. | ||
1269 | */ | ||
1270 | if (!io->base_io) | ||
1271 | new_io->base_io = io; | ||
1272 | else { | ||
1273 | new_io->base_io = io->base_io; | ||
1274 | crypt_inc_pending(io->base_io); | ||
1275 | crypt_dec_pending(io); | ||
1276 | } | ||
1277 | 1195 | ||
1278 | io = new_io; | 1196 | /* Encryption was already finished, submit io now */ |
1279 | } | 1197 | if (crypt_finished) { |
1198 | kcryptd_crypt_write_io_submit(io, 0); | ||
1199 | io->sector = sector; | ||
1280 | } | 1200 | } |
1281 | 1201 | ||
1202 | dec: | ||
1282 | crypt_dec_pending(io); | 1203 | crypt_dec_pending(io); |
1283 | } | 1204 | } |
1284 | 1205 | ||
@@ -1746,7 +1667,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
1746 | sizeof(struct dm_crypt_request) + iv_size_padding + cc->iv_size, | 1667 | sizeof(struct dm_crypt_request) + iv_size_padding + cc->iv_size, |
1747 | ARCH_KMALLOC_MINALIGN); | 1668 | ARCH_KMALLOC_MINALIGN); |
1748 | 1669 | ||
1749 | cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0); | 1670 | cc->page_pool = mempool_create_page_pool(BIO_MAX_PAGES, 0); |
1750 | if (!cc->page_pool) { | 1671 | if (!cc->page_pool) { |
1751 | ti->error = "Cannot allocate page mempool"; | 1672 | ti->error = "Cannot allocate page mempool"; |
1752 | goto bad; | 1673 | goto bad; |