diff options
author | Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | 2012-06-18 07:07:14 -0400 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2012-06-27 02:42:01 -0400 |
commit | e81792fbc2a6fa4969f724b959829667fb2d4f01 (patch) | |
tree | 20e435e725e8ffca406ffb262c91552efd9dd952 /arch | |
parent | a9629d7142ea22567eaa999232d8a31a7493665a (diff) |
crypto: serpent-sse2 - prepare serpent-sse2 glue code into generic x86 glue code for 128bit block ciphers
Block cipher implementations in arch/x86/crypto/ contain common glue code that
is currently duplicated in each module (camellia-x86_64, twofish-x86_64-3way,
twofish-avx, serpent-sse2 and serpent-avx). This patch prepares serpent-sse2
glue into generic glue code for all 128bit block ciphers to use in
arch/x86/crypto.
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/crypto/serpent_sse2_glue.c | 466 |
1 files changed, 303 insertions, 163 deletions
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index 805c91fda7a2..8c86239010ae 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c | |||
@@ -48,105 +48,129 @@ | |||
48 | #include <linux/workqueue.h> | 48 | #include <linux/workqueue.h> |
49 | #include <linux/spinlock.h> | 49 | #include <linux/spinlock.h> |
50 | 50 | ||
51 | static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) | 51 | typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src); |
52 | typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src); | ||
53 | typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src, | ||
54 | u128 *iv); | ||
55 | |||
56 | #define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn)) | ||
57 | #define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn)) | ||
58 | #define GLUE_CTR_FUNC_CAST(fn) ((common_glue_ctr_func_t)(fn)) | ||
59 | |||
60 | struct common_glue_func_entry { | ||
61 | unsigned int num_blocks; /* number of blocks that @fn will process */ | ||
62 | union { | ||
63 | common_glue_func_t ecb; | ||
64 | common_glue_cbc_func_t cbc; | ||
65 | common_glue_ctr_func_t ctr; | ||
66 | } fn_u; | ||
67 | }; | ||
68 | |||
69 | struct common_glue_ctx { | ||
70 | unsigned int num_funcs; | ||
71 | int fpu_blocks_limit; /* -1 means fpu not needed at all */ | ||
72 | |||
73 | /* | ||
74 | * First funcs entry must have largest num_blocks and last funcs entry | ||
75 | * must have num_blocks == 1! | ||
76 | */ | ||
77 | struct common_glue_func_entry funcs[]; | ||
78 | }; | ||
79 | |||
80 | static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit, | ||
81 | struct blkcipher_desc *desc, | ||
82 | bool fpu_enabled, unsigned int nbytes) | ||
52 | { | 83 | { |
84 | if (likely(fpu_blocks_limit < 0)) | ||
85 | return false; | ||
86 | |||
53 | if (fpu_enabled) | 87 | if (fpu_enabled) |
54 | return true; | 88 | return true; |
55 | 89 | ||
56 | /* SSE2 is only used when chunk to be processed is large enough, so | 90 | /* |
57 | * do not enable FPU until it is necessary. | 91 | * Vector-registers are only used when chunk to be processed is large |
92 | * enough, so do not enable FPU until it is necessary. | ||
58 | */ | 93 | */ |
59 | if (nbytes < SERPENT_BLOCK_SIZE * SERPENT_PARALLEL_BLOCKS) | 94 | if (nbytes < bsize * (unsigned int)fpu_blocks_limit) |
60 | return false; | 95 | return false; |
61 | 96 | ||
97 | if (desc) { | ||
98 | /* prevent sleeping if FPU is in use */ | ||
99 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
100 | } | ||
101 | |||
62 | kernel_fpu_begin(); | 102 | kernel_fpu_begin(); |
63 | return true; | 103 | return true; |
64 | } | 104 | } |
65 | 105 | ||
66 | static inline void serpent_fpu_end(bool fpu_enabled) | 106 | static inline void glue_fpu_end(bool fpu_enabled) |
67 | { | 107 | { |
68 | if (fpu_enabled) | 108 | if (fpu_enabled) |
69 | kernel_fpu_end(); | 109 | kernel_fpu_end(); |
70 | } | 110 | } |
71 | 111 | ||
72 | static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | 112 | static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, |
73 | bool enc) | 113 | struct blkcipher_desc *desc, |
114 | struct blkcipher_walk *walk) | ||
74 | { | 115 | { |
116 | void *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
117 | const unsigned int bsize = 128 / 8; | ||
118 | unsigned int nbytes, i, func_bytes; | ||
75 | bool fpu_enabled = false; | 119 | bool fpu_enabled = false; |
76 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
77 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
78 | unsigned int nbytes; | ||
79 | int err; | 120 | int err; |
80 | 121 | ||
81 | err = blkcipher_walk_virt(desc, walk); | 122 | err = blkcipher_walk_virt(desc, walk); |
82 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
83 | 123 | ||
84 | while ((nbytes = walk->nbytes)) { | 124 | while ((nbytes = walk->nbytes)) { |
85 | u8 *wsrc = walk->src.virt.addr; | 125 | u8 *wsrc = walk->src.virt.addr; |
86 | u8 *wdst = walk->dst.virt.addr; | 126 | u8 *wdst = walk->dst.virt.addr; |
87 | 127 | ||
88 | fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); | 128 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, |
129 | desc, fpu_enabled, nbytes); | ||
89 | 130 | ||
90 | /* Process multi-block batch */ | 131 | for (i = 0; i < gctx->num_funcs; i++) { |
91 | if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { | 132 | func_bytes = bsize * gctx->funcs[i].num_blocks; |
92 | do { | ||
93 | if (enc) | ||
94 | serpent_enc_blk_xway(ctx, wdst, wsrc); | ||
95 | else | ||
96 | serpent_dec_blk_xway(ctx, wdst, wsrc); | ||
97 | 133 | ||
98 | wsrc += bsize * SERPENT_PARALLEL_BLOCKS; | 134 | /* Process multi-block batch */ |
99 | wdst += bsize * SERPENT_PARALLEL_BLOCKS; | 135 | if (nbytes >= func_bytes) { |
100 | nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; | 136 | do { |
101 | } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); | 137 | gctx->funcs[i].fn_u.ecb(ctx, wdst, |
138 | wsrc); | ||
102 | 139 | ||
103 | if (nbytes < bsize) | 140 | wsrc += func_bytes; |
104 | goto done; | 141 | wdst += func_bytes; |
105 | } | 142 | nbytes -= func_bytes; |
106 | 143 | } while (nbytes >= func_bytes); | |
107 | /* Handle leftovers */ | ||
108 | do { | ||
109 | if (enc) | ||
110 | __serpent_encrypt(ctx, wdst, wsrc); | ||
111 | else | ||
112 | __serpent_decrypt(ctx, wdst, wsrc); | ||
113 | 144 | ||
114 | wsrc += bsize; | 145 | if (nbytes < bsize) |
115 | wdst += bsize; | 146 | goto done; |
116 | nbytes -= bsize; | 147 | } |
117 | } while (nbytes >= bsize); | 148 | } |
118 | 149 | ||
119 | done: | 150 | done: |
120 | err = blkcipher_walk_done(desc, walk, nbytes); | 151 | err = blkcipher_walk_done(desc, walk, nbytes); |
121 | } | 152 | } |
122 | 153 | ||
123 | serpent_fpu_end(fpu_enabled); | 154 | glue_fpu_end(fpu_enabled); |
124 | return err; | 155 | return err; |
125 | } | 156 | } |
126 | 157 | ||
127 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 158 | int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, |
128 | struct scatterlist *src, unsigned int nbytes) | 159 | struct blkcipher_desc *desc, struct scatterlist *dst, |
160 | struct scatterlist *src, unsigned int nbytes) | ||
129 | { | 161 | { |
130 | struct blkcipher_walk walk; | 162 | struct blkcipher_walk walk; |
131 | 163 | ||
132 | blkcipher_walk_init(&walk, dst, src, nbytes); | 164 | blkcipher_walk_init(&walk, dst, src, nbytes); |
133 | return ecb_crypt(desc, &walk, true); | 165 | return __glue_ecb_crypt_128bit(gctx, desc, &walk); |
134 | } | 166 | } |
135 | 167 | ||
136 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 168 | static unsigned int __glue_cbc_encrypt_128bit(const common_glue_func_t fn, |
137 | struct scatterlist *src, unsigned int nbytes) | 169 | struct blkcipher_desc *desc, |
170 | struct blkcipher_walk *walk) | ||
138 | { | 171 | { |
139 | struct blkcipher_walk walk; | 172 | void *ctx = crypto_blkcipher_ctx(desc->tfm); |
140 | 173 | const unsigned int bsize = 128 / 8; | |
141 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
142 | return ecb_crypt(desc, &walk, false); | ||
143 | } | ||
144 | |||
145 | static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, | ||
146 | struct blkcipher_walk *walk) | ||
147 | { | ||
148 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
149 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
150 | unsigned int nbytes = walk->nbytes; | 174 | unsigned int nbytes = walk->nbytes; |
151 | u128 *src = (u128 *)walk->src.virt.addr; | 175 | u128 *src = (u128 *)walk->src.virt.addr; |
152 | u128 *dst = (u128 *)walk->dst.virt.addr; | 176 | u128 *dst = (u128 *)walk->dst.virt.addr; |
@@ -154,7 +178,7 @@ static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, | |||
154 | 178 | ||
155 | do { | 179 | do { |
156 | u128_xor(dst, src, iv); | 180 | u128_xor(dst, src, iv); |
157 | __serpent_encrypt(ctx, (u8 *)dst, (u8 *)dst); | 181 | fn(ctx, (u8 *)dst, (u8 *)dst); |
158 | iv = dst; | 182 | iv = dst; |
159 | 183 | ||
160 | src += 1; | 184 | src += 1; |
@@ -166,8 +190,10 @@ static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, | |||
166 | return nbytes; | 190 | return nbytes; |
167 | } | 191 | } |
168 | 192 | ||
169 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 193 | int glue_cbc_encrypt_128bit(const common_glue_func_t fn, |
170 | struct scatterlist *src, unsigned int nbytes) | 194 | struct blkcipher_desc *desc, |
195 | struct scatterlist *dst, | ||
196 | struct scatterlist *src, unsigned int nbytes) | ||
171 | { | 197 | { |
172 | struct blkcipher_walk walk; | 198 | struct blkcipher_walk walk; |
173 | int err; | 199 | int err; |
@@ -176,24 +202,26 @@ static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |||
176 | err = blkcipher_walk_virt(desc, &walk); | 202 | err = blkcipher_walk_virt(desc, &walk); |
177 | 203 | ||
178 | while ((nbytes = walk.nbytes)) { | 204 | while ((nbytes = walk.nbytes)) { |
179 | nbytes = __cbc_encrypt(desc, &walk); | 205 | nbytes = __glue_cbc_encrypt_128bit(fn, desc, &walk); |
180 | err = blkcipher_walk_done(desc, &walk, nbytes); | 206 | err = blkcipher_walk_done(desc, &walk, nbytes); |
181 | } | 207 | } |
182 | 208 | ||
183 | return err; | 209 | return err; |
184 | } | 210 | } |
185 | 211 | ||
186 | static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, | 212 | static unsigned int |
187 | struct blkcipher_walk *walk) | 213 | __glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, |
214 | struct blkcipher_desc *desc, | ||
215 | struct blkcipher_walk *walk) | ||
188 | { | 216 | { |
189 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 217 | void *ctx = crypto_blkcipher_ctx(desc->tfm); |
190 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | 218 | const unsigned int bsize = 128 / 8; |
191 | unsigned int nbytes = walk->nbytes; | 219 | unsigned int nbytes = walk->nbytes; |
192 | u128 *src = (u128 *)walk->src.virt.addr; | 220 | u128 *src = (u128 *)walk->src.virt.addr; |
193 | u128 *dst = (u128 *)walk->dst.virt.addr; | 221 | u128 *dst = (u128 *)walk->dst.virt.addr; |
194 | u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; | ||
195 | u128 last_iv; | 222 | u128 last_iv; |
196 | int i; | 223 | unsigned int num_blocks, func_bytes; |
224 | unsigned int i; | ||
197 | 225 | ||
198 | /* Start of the last block. */ | 226 | /* Start of the last block. */ |
199 | src += nbytes / bsize - 1; | 227 | src += nbytes / bsize - 1; |
@@ -201,45 +229,31 @@ static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, | |||
201 | 229 | ||
202 | last_iv = *src; | 230 | last_iv = *src; |
203 | 231 | ||
204 | /* Process multi-block batch */ | 232 | for (i = 0; i < gctx->num_funcs; i++) { |
205 | if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { | 233 | num_blocks = gctx->funcs[i].num_blocks; |
206 | do { | 234 | func_bytes = bsize * num_blocks; |
207 | nbytes -= bsize * (SERPENT_PARALLEL_BLOCKS - 1); | ||
208 | src -= SERPENT_PARALLEL_BLOCKS - 1; | ||
209 | dst -= SERPENT_PARALLEL_BLOCKS - 1; | ||
210 | 235 | ||
211 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++) | 236 | /* Process multi-block batch */ |
212 | ivs[i] = src[i]; | 237 | if (nbytes >= func_bytes) { |
238 | do { | ||
239 | nbytes -= func_bytes - bsize; | ||
240 | src -= num_blocks - 1; | ||
241 | dst -= num_blocks - 1; | ||
242 | |||
243 | gctx->funcs[i].fn_u.cbc(ctx, dst, src); | ||
213 | 244 | ||
214 | serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); | 245 | nbytes -= bsize; |
246 | if (nbytes < bsize) | ||
247 | goto done; | ||
215 | 248 | ||
216 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++) | 249 | u128_xor(dst, dst, src - 1); |
217 | u128_xor(dst + (i + 1), dst + (i + 1), ivs + i); | 250 | src -= 1; |
251 | dst -= 1; | ||
252 | } while (nbytes >= func_bytes); | ||
218 | 253 | ||
219 | nbytes -= bsize; | ||
220 | if (nbytes < bsize) | 254 | if (nbytes < bsize) |
221 | goto done; | 255 | goto done; |
222 | 256 | } | |
223 | u128_xor(dst, dst, src - 1); | ||
224 | src -= 1; | ||
225 | dst -= 1; | ||
226 | } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); | ||
227 | |||
228 | if (nbytes < bsize) | ||
229 | goto done; | ||
230 | } | ||
231 | |||
232 | /* Handle leftovers */ | ||
233 | for (;;) { | ||
234 | __serpent_decrypt(ctx, (u8 *)dst, (u8 *)src); | ||
235 | |||
236 | nbytes -= bsize; | ||
237 | if (nbytes < bsize) | ||
238 | break; | ||
239 | |||
240 | u128_xor(dst, dst, src - 1); | ||
241 | src -= 1; | ||
242 | dst -= 1; | ||
243 | } | 257 | } |
244 | 258 | ||
245 | done: | 259 | done: |
@@ -249,24 +263,27 @@ done: | |||
249 | return nbytes; | 263 | return nbytes; |
250 | } | 264 | } |
251 | 265 | ||
252 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 266 | int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, |
253 | struct scatterlist *src, unsigned int nbytes) | 267 | struct blkcipher_desc *desc, |
268 | struct scatterlist *dst, | ||
269 | struct scatterlist *src, unsigned int nbytes) | ||
254 | { | 270 | { |
271 | const unsigned int bsize = 128 / 8; | ||
255 | bool fpu_enabled = false; | 272 | bool fpu_enabled = false; |
256 | struct blkcipher_walk walk; | 273 | struct blkcipher_walk walk; |
257 | int err; | 274 | int err; |
258 | 275 | ||
259 | blkcipher_walk_init(&walk, dst, src, nbytes); | 276 | blkcipher_walk_init(&walk, dst, src, nbytes); |
260 | err = blkcipher_walk_virt(desc, &walk); | 277 | err = blkcipher_walk_virt(desc, &walk); |
261 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
262 | 278 | ||
263 | while ((nbytes = walk.nbytes)) { | 279 | while ((nbytes = walk.nbytes)) { |
264 | fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); | 280 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, |
265 | nbytes = __cbc_decrypt(desc, &walk); | 281 | desc, fpu_enabled, nbytes); |
282 | nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk); | ||
266 | err = blkcipher_walk_done(desc, &walk, nbytes); | 283 | err = blkcipher_walk_done(desc, &walk, nbytes); |
267 | } | 284 | } |
268 | 285 | ||
269 | serpent_fpu_end(fpu_enabled); | 286 | glue_fpu_end(fpu_enabled); |
270 | return err; | 287 | return err; |
271 | } | 288 | } |
272 | 289 | ||
@@ -289,109 +306,232 @@ static inline void u128_inc(u128 *i) | |||
289 | i->a++; | 306 | i->a++; |
290 | } | 307 | } |
291 | 308 | ||
292 | static void ctr_crypt_final(struct blkcipher_desc *desc, | 309 | static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr, |
293 | struct blkcipher_walk *walk) | 310 | struct blkcipher_desc *desc, |
311 | struct blkcipher_walk *walk) | ||
294 | { | 312 | { |
295 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 313 | void *ctx = crypto_blkcipher_ctx(desc->tfm); |
296 | u8 *ctrblk = walk->iv; | 314 | u8 *src = (u8 *)walk->src.virt.addr; |
297 | u8 keystream[SERPENT_BLOCK_SIZE]; | 315 | u8 *dst = (u8 *)walk->dst.virt.addr; |
298 | u8 *src = walk->src.virt.addr; | ||
299 | u8 *dst = walk->dst.virt.addr; | ||
300 | unsigned int nbytes = walk->nbytes; | 316 | unsigned int nbytes = walk->nbytes; |
317 | u128 ctrblk; | ||
318 | u128 tmp; | ||
301 | 319 | ||
302 | __serpent_encrypt(ctx, keystream, ctrblk); | 320 | be128_to_u128(&ctrblk, (be128 *)walk->iv); |
303 | crypto_xor(keystream, src, nbytes); | ||
304 | memcpy(dst, keystream, nbytes); | ||
305 | 321 | ||
306 | crypto_inc(ctrblk, SERPENT_BLOCK_SIZE); | 322 | memcpy(&tmp, src, nbytes); |
323 | fn_ctr(ctx, &tmp, &tmp, &ctrblk); | ||
324 | memcpy(dst, &tmp, nbytes); | ||
325 | |||
326 | u128_to_be128((be128 *)walk->iv, &ctrblk); | ||
307 | } | 327 | } |
308 | 328 | ||
309 | static unsigned int __ctr_crypt(struct blkcipher_desc *desc, | 329 | static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, |
310 | struct blkcipher_walk *walk) | 330 | struct blkcipher_desc *desc, |
331 | struct blkcipher_walk *walk) | ||
311 | { | 332 | { |
312 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 333 | const unsigned int bsize = 128 / 8; |
313 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | 334 | void *ctx = crypto_blkcipher_ctx(desc->tfm); |
314 | unsigned int nbytes = walk->nbytes; | 335 | unsigned int nbytes = walk->nbytes; |
315 | u128 *src = (u128 *)walk->src.virt.addr; | 336 | u128 *src = (u128 *)walk->src.virt.addr; |
316 | u128 *dst = (u128 *)walk->dst.virt.addr; | 337 | u128 *dst = (u128 *)walk->dst.virt.addr; |
317 | u128 ctrblk; | 338 | u128 ctrblk; |
318 | be128 ctrblocks[SERPENT_PARALLEL_BLOCKS]; | 339 | unsigned int num_blocks, func_bytes; |
319 | int i; | 340 | unsigned int i; |
320 | 341 | ||
321 | be128_to_u128(&ctrblk, (be128 *)walk->iv); | 342 | be128_to_u128(&ctrblk, (be128 *)walk->iv); |
322 | 343 | ||
323 | /* Process multi-block batch */ | 344 | /* Process multi-block batch */ |
324 | if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { | 345 | for (i = 0; i < gctx->num_funcs; i++) { |
325 | do { | 346 | num_blocks = gctx->funcs[i].num_blocks; |
326 | /* create ctrblks for parallel encrypt */ | 347 | func_bytes = bsize * num_blocks; |
327 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) { | ||
328 | if (dst != src) | ||
329 | dst[i] = src[i]; | ||
330 | |||
331 | u128_to_be128(&ctrblocks[i], &ctrblk); | ||
332 | u128_inc(&ctrblk); | ||
333 | } | ||
334 | 348 | ||
335 | serpent_enc_blk_xway_xor(ctx, (u8 *)dst, | 349 | if (nbytes >= func_bytes) { |
336 | (u8 *)ctrblocks); | 350 | do { |
351 | gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk); | ||
337 | 352 | ||
338 | src += SERPENT_PARALLEL_BLOCKS; | 353 | src += num_blocks; |
339 | dst += SERPENT_PARALLEL_BLOCKS; | 354 | dst += num_blocks; |
340 | nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; | 355 | nbytes -= func_bytes; |
341 | } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); | 356 | } while (nbytes >= func_bytes); |
342 | 357 | ||
343 | if (nbytes < bsize) | 358 | if (nbytes < bsize) |
344 | goto done; | 359 | goto done; |
360 | } | ||
345 | } | 361 | } |
346 | 362 | ||
347 | /* Handle leftovers */ | ||
348 | do { | ||
349 | if (dst != src) | ||
350 | *dst = *src; | ||
351 | |||
352 | u128_to_be128(&ctrblocks[0], &ctrblk); | ||
353 | u128_inc(&ctrblk); | ||
354 | |||
355 | __serpent_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); | ||
356 | u128_xor(dst, dst, (u128 *)ctrblocks); | ||
357 | |||
358 | src += 1; | ||
359 | dst += 1; | ||
360 | nbytes -= bsize; | ||
361 | } while (nbytes >= bsize); | ||
362 | |||
363 | done: | 363 | done: |
364 | u128_to_be128((be128 *)walk->iv, &ctrblk); | 364 | u128_to_be128((be128 *)walk->iv, &ctrblk); |
365 | return nbytes; | 365 | return nbytes; |
366 | } | 366 | } |
367 | 367 | ||
368 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 368 | int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, |
369 | struct scatterlist *src, unsigned int nbytes) | 369 | struct blkcipher_desc *desc, struct scatterlist *dst, |
370 | struct scatterlist *src, unsigned int nbytes) | ||
370 | { | 371 | { |
372 | const unsigned int bsize = 128 / 8; | ||
371 | bool fpu_enabled = false; | 373 | bool fpu_enabled = false; |
372 | struct blkcipher_walk walk; | 374 | struct blkcipher_walk walk; |
373 | int err; | 375 | int err; |
374 | 376 | ||
375 | blkcipher_walk_init(&walk, dst, src, nbytes); | 377 | blkcipher_walk_init(&walk, dst, src, nbytes); |
376 | err = blkcipher_walk_virt_block(desc, &walk, SERPENT_BLOCK_SIZE); | 378 | err = blkcipher_walk_virt_block(desc, &walk, bsize); |
377 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
378 | 379 | ||
379 | while ((nbytes = walk.nbytes) >= SERPENT_BLOCK_SIZE) { | 380 | while ((nbytes = walk.nbytes) >= bsize) { |
380 | fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); | 381 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, |
381 | nbytes = __ctr_crypt(desc, &walk); | 382 | desc, fpu_enabled, nbytes); |
383 | nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk); | ||
382 | err = blkcipher_walk_done(desc, &walk, nbytes); | 384 | err = blkcipher_walk_done(desc, &walk, nbytes); |
383 | } | 385 | } |
384 | 386 | ||
385 | serpent_fpu_end(fpu_enabled); | 387 | glue_fpu_end(fpu_enabled); |
386 | 388 | ||
387 | if (walk.nbytes) { | 389 | if (walk.nbytes) { |
388 | ctr_crypt_final(desc, &walk); | 390 | glue_ctr_crypt_final_128bit( |
391 | gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk); | ||
389 | err = blkcipher_walk_done(desc, &walk, 0); | 392 | err = blkcipher_walk_done(desc, &walk, 0); |
390 | } | 393 | } |
391 | 394 | ||
392 | return err; | 395 | return err; |
393 | } | 396 | } |
394 | 397 | ||
398 | static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src) | ||
399 | { | ||
400 | u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; | ||
401 | unsigned int j; | ||
402 | |||
403 | for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) | ||
404 | ivs[j] = src[j]; | ||
405 | |||
406 | serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); | ||
407 | |||
408 | for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) | ||
409 | u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); | ||
410 | } | ||
411 | |||
412 | static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) | ||
413 | { | ||
414 | be128 ctrblk; | ||
415 | |||
416 | u128_to_be128(&ctrblk, iv); | ||
417 | u128_inc(iv); | ||
418 | |||
419 | __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); | ||
420 | u128_xor(dst, src, (u128 *)&ctrblk); | ||
421 | } | ||
422 | |||
423 | static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src, | ||
424 | u128 *iv) | ||
425 | { | ||
426 | be128 ctrblks[SERPENT_PARALLEL_BLOCKS]; | ||
427 | unsigned int i; | ||
428 | |||
429 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) { | ||
430 | if (dst != src) | ||
431 | dst[i] = src[i]; | ||
432 | |||
433 | u128_to_be128(&ctrblks[i], iv); | ||
434 | u128_inc(iv); | ||
435 | } | ||
436 | |||
437 | serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks); | ||
438 | } | ||
439 | |||
440 | static const struct common_glue_ctx serpent_enc = { | ||
441 | .num_funcs = 2, | ||
442 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
443 | |||
444 | .funcs = { { | ||
445 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
446 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) } | ||
447 | }, { | ||
448 | .num_blocks = 1, | ||
449 | .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) } | ||
450 | } } | ||
451 | }; | ||
452 | |||
453 | static const struct common_glue_ctx serpent_ctr = { | ||
454 | .num_funcs = 2, | ||
455 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
456 | |||
457 | .funcs = { { | ||
458 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
459 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) } | ||
460 | }, { | ||
461 | .num_blocks = 1, | ||
462 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) } | ||
463 | } } | ||
464 | }; | ||
465 | |||
466 | static const struct common_glue_ctx serpent_dec = { | ||
467 | .num_funcs = 2, | ||
468 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
469 | |||
470 | .funcs = { { | ||
471 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
472 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) } | ||
473 | }, { | ||
474 | .num_blocks = 1, | ||
475 | .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) } | ||
476 | } } | ||
477 | }; | ||
478 | |||
479 | static const struct common_glue_ctx serpent_dec_cbc = { | ||
480 | .num_funcs = 2, | ||
481 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
482 | |||
483 | .funcs = { { | ||
484 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
485 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) } | ||
486 | }, { | ||
487 | .num_blocks = 1, | ||
488 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) } | ||
489 | } } | ||
490 | }; | ||
491 | |||
492 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
493 | struct scatterlist *src, unsigned int nbytes) | ||
494 | { | ||
495 | return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes); | ||
496 | } | ||
497 | |||
498 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
499 | struct scatterlist *src, unsigned int nbytes) | ||
500 | { | ||
501 | return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes); | ||
502 | } | ||
503 | |||
504 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
505 | struct scatterlist *src, unsigned int nbytes) | ||
506 | { | ||
507 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc, | ||
508 | dst, src, nbytes); | ||
509 | } | ||
510 | |||
511 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
512 | struct scatterlist *src, unsigned int nbytes) | ||
513 | { | ||
514 | return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src, | ||
515 | nbytes); | ||
516 | } | ||
517 | |||
518 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
519 | struct scatterlist *src, unsigned int nbytes) | ||
520 | { | ||
521 | return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes); | ||
522 | } | ||
523 | |||
524 | static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) | ||
525 | { | ||
526 | return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS, | ||
527 | NULL, fpu_enabled, nbytes); | ||
528 | } | ||
529 | |||
530 | static inline void serpent_fpu_end(bool fpu_enabled) | ||
531 | { | ||
532 | glue_fpu_end(fpu_enabled); | ||
533 | } | ||
534 | |||
395 | struct crypt_priv { | 535 | struct crypt_priv { |
396 | struct serpent_ctx *ctx; | 536 | struct serpent_ctx *ctx; |
397 | bool fpu_enabled; | 537 | bool fpu_enabled; |