diff options
-rw-r--r-- | arch/x86/crypto/aesni-intel_asm.S | 5 | ||||
-rw-r--r-- | arch/x86/crypto/aesni-intel_glue.c | 4 | ||||
-rw-r--r-- | arch/x86/crypto/fpu.c | 4 | ||||
-rw-r--r-- | drivers/crypto/padlock-aes.c | 138 |
4 files changed, 107 insertions, 44 deletions
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index caba99601703..eb0566e83319 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S | |||
@@ -845,7 +845,7 @@ ENTRY(aesni_cbc_enc) | |||
845 | */ | 845 | */ |
846 | ENTRY(aesni_cbc_dec) | 846 | ENTRY(aesni_cbc_dec) |
847 | cmp $16, LEN | 847 | cmp $16, LEN |
848 | jb .Lcbc_dec_ret | 848 | jb .Lcbc_dec_just_ret |
849 | mov 480(KEYP), KLEN | 849 | mov 480(KEYP), KLEN |
850 | add $240, KEYP | 850 | add $240, KEYP |
851 | movups (IVP), IV | 851 | movups (IVP), IV |
@@ -891,6 +891,7 @@ ENTRY(aesni_cbc_dec) | |||
891 | add $16, OUTP | 891 | add $16, OUTP |
892 | cmp $16, LEN | 892 | cmp $16, LEN |
893 | jge .Lcbc_dec_loop1 | 893 | jge .Lcbc_dec_loop1 |
894 | movups IV, (IVP) | ||
895 | .Lcbc_dec_ret: | 894 | .Lcbc_dec_ret: |
895 | movups IV, (IVP) | ||
896 | .Lcbc_dec_just_ret: | ||
896 | ret | 897 | ret |
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 4e663398f77f..c580c5ec1cad 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -198,6 +198,7 @@ static int ecb_encrypt(struct blkcipher_desc *desc, | |||
198 | 198 | ||
199 | blkcipher_walk_init(&walk, dst, src, nbytes); | 199 | blkcipher_walk_init(&walk, dst, src, nbytes); |
200 | err = blkcipher_walk_virt(desc, &walk); | 200 | err = blkcipher_walk_virt(desc, &walk); |
201 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
201 | 202 | ||
202 | kernel_fpu_begin(); | 203 | kernel_fpu_begin(); |
203 | while ((nbytes = walk.nbytes)) { | 204 | while ((nbytes = walk.nbytes)) { |
@@ -221,6 +222,7 @@ static int ecb_decrypt(struct blkcipher_desc *desc, | |||
221 | 222 | ||
222 | blkcipher_walk_init(&walk, dst, src, nbytes); | 223 | blkcipher_walk_init(&walk, dst, src, nbytes); |
223 | err = blkcipher_walk_virt(desc, &walk); | 224 | err = blkcipher_walk_virt(desc, &walk); |
225 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
224 | 226 | ||
225 | kernel_fpu_begin(); | 227 | kernel_fpu_begin(); |
226 | while ((nbytes = walk.nbytes)) { | 228 | while ((nbytes = walk.nbytes)) { |
@@ -266,6 +268,7 @@ static int cbc_encrypt(struct blkcipher_desc *desc, | |||
266 | 268 | ||
267 | blkcipher_walk_init(&walk, dst, src, nbytes); | 269 | blkcipher_walk_init(&walk, dst, src, nbytes); |
268 | err = blkcipher_walk_virt(desc, &walk); | 270 | err = blkcipher_walk_virt(desc, &walk); |
271 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
269 | 272 | ||
270 | kernel_fpu_begin(); | 273 | kernel_fpu_begin(); |
271 | while ((nbytes = walk.nbytes)) { | 274 | while ((nbytes = walk.nbytes)) { |
@@ -289,6 +292,7 @@ static int cbc_decrypt(struct blkcipher_desc *desc, | |||
289 | 292 | ||
290 | blkcipher_walk_init(&walk, dst, src, nbytes); | 293 | blkcipher_walk_init(&walk, dst, src, nbytes); |
291 | err = blkcipher_walk_virt(desc, &walk); | 294 | err = blkcipher_walk_virt(desc, &walk); |
295 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
292 | 296 | ||
293 | kernel_fpu_begin(); | 297 | kernel_fpu_begin(); |
294 | while ((nbytes = walk.nbytes)) { | 298 | while ((nbytes = walk.nbytes)) { |
diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c index 5f9781a3815f..daef6cd2b45d 100644 --- a/arch/x86/crypto/fpu.c +++ b/arch/x86/crypto/fpu.c | |||
@@ -48,7 +48,7 @@ static int crypto_fpu_encrypt(struct blkcipher_desc *desc_in, | |||
48 | struct blkcipher_desc desc = { | 48 | struct blkcipher_desc desc = { |
49 | .tfm = child, | 49 | .tfm = child, |
50 | .info = desc_in->info, | 50 | .info = desc_in->info, |
51 | .flags = desc_in->flags, | 51 | .flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP, |
52 | }; | 52 | }; |
53 | 53 | ||
54 | kernel_fpu_begin(); | 54 | kernel_fpu_begin(); |
@@ -67,7 +67,7 @@ static int crypto_fpu_decrypt(struct blkcipher_desc *desc_in, | |||
67 | struct blkcipher_desc desc = { | 67 | struct blkcipher_desc desc = { |
68 | .tfm = child, | 68 | .tfm = child, |
69 | .info = desc_in->info, | 69 | .info = desc_in->info, |
70 | .flags = desc_in->flags, | 70 | .flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP, |
71 | }; | 71 | }; |
72 | 72 | ||
73 | kernel_fpu_begin(); | 73 | kernel_fpu_begin(); |
diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index 87f92c39b5f0..a9952b1236b0 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c | |||
@@ -18,9 +18,22 @@ | |||
18 | #include <linux/percpu.h> | 18 | #include <linux/percpu.h> |
19 | #include <linux/smp.h> | 19 | #include <linux/smp.h> |
20 | #include <asm/byteorder.h> | 20 | #include <asm/byteorder.h> |
21 | #include <asm/processor.h> | ||
21 | #include <asm/i387.h> | 22 | #include <asm/i387.h> |
22 | #include "padlock.h" | 23 | #include "padlock.h" |
23 | 24 | ||
25 | /* | ||
26 | * Number of data blocks actually fetched for each xcrypt insn. | ||
27 | * Processors with prefetch errata will fetch extra blocks. | ||
28 | */ | ||
29 | static unsigned int ecb_fetch_blocks = 2; | ||
30 | #define MAX_ECB_FETCH_BLOCKS (8) | ||
31 | #define ecb_fetch_bytes (ecb_fetch_blocks * AES_BLOCK_SIZE) | ||
32 | |||
33 | static unsigned int cbc_fetch_blocks = 1; | ||
34 | #define MAX_CBC_FETCH_BLOCKS (4) | ||
35 | #define cbc_fetch_bytes (cbc_fetch_blocks * AES_BLOCK_SIZE) | ||
36 | |||
24 | /* Control word. */ | 37 | /* Control word. */ |
25 | struct cword { | 38 | struct cword { |
26 | unsigned int __attribute__ ((__packed__)) | 39 | unsigned int __attribute__ ((__packed__)) |
@@ -172,73 +185,111 @@ static inline void padlock_store_cword(struct cword *cword) | |||
172 | * should be used only inside the irq_ts_save/restore() context | 185 | * should be used only inside the irq_ts_save/restore() context |
173 | */ | 186 | */ |
174 | 187 | ||
175 | static inline void padlock_xcrypt(const u8 *input, u8 *output, void *key, | 188 | static inline void rep_xcrypt_ecb(const u8 *input, u8 *output, void *key, |
176 | struct cword *control_word) | 189 | struct cword *control_word, int count) |
177 | { | 190 | { |
178 | asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ | 191 | asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ |
179 | : "+S"(input), "+D"(output) | 192 | : "+S"(input), "+D"(output) |
180 | : "d"(control_word), "b"(key), "c"(1)); | 193 | : "d"(control_word), "b"(key), "c"(count)); |
194 | } | ||
195 | |||
196 | static inline u8 *rep_xcrypt_cbc(const u8 *input, u8 *output, void *key, | ||
197 | u8 *iv, struct cword *control_word, int count) | ||
198 | { | ||
199 | asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ | ||
200 | : "+S" (input), "+D" (output), "+a" (iv) | ||
201 | : "d" (control_word), "b" (key), "c" (count)); | ||
202 | return iv; | ||
181 | } | 203 | } |
182 | 204 | ||
183 | static void aes_crypt_copy(const u8 *in, u8 *out, u32 *key, struct cword *cword) | 205 | static void ecb_crypt_copy(const u8 *in, u8 *out, u32 *key, |
206 | struct cword *cword, int count) | ||
184 | { | 207 | { |
185 | u8 buf[AES_BLOCK_SIZE * 2 + PADLOCK_ALIGNMENT - 1]; | 208 | /* |
209 | * Padlock prefetches extra data so we must provide mapped input buffers. | ||
210 | * Assume there are at least 16 bytes of stack already in use. | ||
211 | */ | ||
212 | u8 buf[AES_BLOCK_SIZE * (MAX_ECB_FETCH_BLOCKS - 1) + PADLOCK_ALIGNMENT - 1]; | ||
213 | u8 *tmp = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); | ||
214 | |||
215 | memcpy(tmp, in, count * AES_BLOCK_SIZE); | ||
216 | rep_xcrypt_ecb(tmp, out, key, cword, count); | ||
217 | } | ||
218 | |||
219 | static u8 *cbc_crypt_copy(const u8 *in, u8 *out, u32 *key, | ||
220 | u8 *iv, struct cword *cword, int count) | ||
221 | { | ||
222 | /* | ||
223 | * Padlock prefetches extra data so we must provide mapped input buffers. | ||
224 | * Assume there are at least 16 bytes of stack already in use. | ||
225 | */ | ||
226 | u8 buf[AES_BLOCK_SIZE * (MAX_CBC_FETCH_BLOCKS - 1) + PADLOCK_ALIGNMENT - 1]; | ||
186 | u8 *tmp = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); | 227 | u8 *tmp = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); |
187 | 228 | ||
188 | memcpy(tmp, in, AES_BLOCK_SIZE); | 229 | memcpy(tmp, in, count * AES_BLOCK_SIZE); |
189 | padlock_xcrypt(tmp, out, key, cword); | 230 | return rep_xcrypt_cbc(tmp, out, key, iv, cword, count); |
190 | } | 231 | } |
191 | 232 | ||
192 | static inline void aes_crypt(const u8 *in, u8 *out, u32 *key, | 233 | static inline void ecb_crypt(const u8 *in, u8 *out, u32 *key, |
193 | struct cword *cword) | 234 | struct cword *cword, int count) |
194 | { | 235 | { |
195 | /* padlock_xcrypt requires at least two blocks of data. */ | 236 | /* Padlock in ECB mode fetches at least ecb_fetch_bytes of data. |
196 | if (unlikely(!(((unsigned long)in ^ (PAGE_SIZE - AES_BLOCK_SIZE)) & | 237 | * We could avoid some copying here but it's probably not worth it. |
197 | (PAGE_SIZE - 1)))) { | 238 | */ |
198 | aes_crypt_copy(in, out, key, cword); | 239 | if (unlikely(((unsigned long)in & PAGE_SIZE) + ecb_fetch_bytes > PAGE_SIZE)) { |
240 | ecb_crypt_copy(in, out, key, cword, count); | ||
199 | return; | 241 | return; |
200 | } | 242 | } |
201 | 243 | ||
202 | padlock_xcrypt(in, out, key, cword); | 244 | rep_xcrypt_ecb(in, out, key, cword, count); |
245 | } | ||
246 | |||
247 | static inline u8 *cbc_crypt(const u8 *in, u8 *out, u32 *key, | ||
248 | u8 *iv, struct cword *cword, int count) | ||
249 | { | ||
250 | /* Padlock in CBC mode fetches at least cbc_fetch_bytes of data. */ | ||
251 | if (unlikely(((unsigned long)in & PAGE_SIZE) + cbc_fetch_bytes > PAGE_SIZE)) | ||
252 | return cbc_crypt_copy(in, out, key, iv, cword, count); | ||
253 | |||
254 | return rep_xcrypt_cbc(in, out, key, iv, cword, count); | ||
203 | } | 255 | } |
204 | 256 | ||
205 | static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, | 257 | static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, |
206 | void *control_word, u32 count) | 258 | void *control_word, u32 count) |
207 | { | 259 | { |
208 | if (count == 1) { | 260 | u32 initial = count & (ecb_fetch_blocks - 1); |
209 | aes_crypt(input, output, key, control_word); | 261 | |
262 | if (count < ecb_fetch_blocks) { | ||
263 | ecb_crypt(input, output, key, control_word, count); | ||
210 | return; | 264 | return; |
211 | } | 265 | } |
212 | 266 | ||
213 | asm volatile ("test $1, %%cl;" | 267 | if (initial) |
214 | "je 1f;" | 268 | asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ |
215 | #ifndef CONFIG_X86_64 | 269 | : "+S"(input), "+D"(output) |
216 | "lea -1(%%ecx), %%eax;" | 270 | : "d"(control_word), "b"(key), "c"(initial)); |
217 | "mov $1, %%ecx;" | 271 | |
218 | #else | 272 | asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ |
219 | "lea -1(%%rcx), %%rax;" | ||
220 | "mov $1, %%rcx;" | ||
221 | #endif | ||
222 | ".byte 0xf3,0x0f,0xa7,0xc8;" /* rep xcryptecb */ | ||
223 | #ifndef CONFIG_X86_64 | ||
224 | "mov %%eax, %%ecx;" | ||
225 | #else | ||
226 | "mov %%rax, %%rcx;" | ||
227 | #endif | ||
228 | "1:" | ||
229 | ".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ | ||
230 | : "+S"(input), "+D"(output) | 273 | : "+S"(input), "+D"(output) |
231 | : "d"(control_word), "b"(key), "c"(count) | 274 | : "d"(control_word), "b"(key), "c"(count - initial)); |
232 | : "ax"); | ||
233 | } | 275 | } |
234 | 276 | ||
235 | static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, | 277 | static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, |
236 | u8 *iv, void *control_word, u32 count) | 278 | u8 *iv, void *control_word, u32 count) |
237 | { | 279 | { |
238 | /* rep xcryptcbc */ | 280 | u32 initial = count & (cbc_fetch_blocks - 1); |
239 | asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" | 281 | |
282 | if (count < cbc_fetch_blocks) | ||
283 | return cbc_crypt(input, output, key, iv, control_word, count); | ||
284 | |||
285 | if (initial) | ||
286 | asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ | ||
287 | : "+S" (input), "+D" (output), "+a" (iv) | ||
288 | : "d" (control_word), "b" (key), "c" (count)); | ||
289 | |||
290 | asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ | ||
240 | : "+S" (input), "+D" (output), "+a" (iv) | 291 | : "+S" (input), "+D" (output), "+a" (iv) |
241 | : "d" (control_word), "b" (key), "c" (count)); | 292 | : "d" (control_word), "b" (key), "c" (count-initial)); |
242 | return iv; | 293 | return iv; |
243 | } | 294 | } |
244 | 295 | ||
@@ -249,7 +300,7 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) | |||
249 | 300 | ||
250 | padlock_reset_key(&ctx->cword.encrypt); | 301 | padlock_reset_key(&ctx->cword.encrypt); |
251 | ts_state = irq_ts_save(); | 302 | ts_state = irq_ts_save(); |
252 | aes_crypt(in, out, ctx->E, &ctx->cword.encrypt); | 303 | ecb_crypt(in, out, ctx->E, &ctx->cword.encrypt, 1); |
253 | irq_ts_restore(ts_state); | 304 | irq_ts_restore(ts_state); |
254 | padlock_store_cword(&ctx->cword.encrypt); | 305 | padlock_store_cword(&ctx->cword.encrypt); |
255 | } | 306 | } |
@@ -261,7 +312,7 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) | |||
261 | 312 | ||
262 | padlock_reset_key(&ctx->cword.encrypt); | 313 | padlock_reset_key(&ctx->cword.encrypt); |
263 | ts_state = irq_ts_save(); | 314 | ts_state = irq_ts_save(); |
264 | aes_crypt(in, out, ctx->D, &ctx->cword.decrypt); | 315 | ecb_crypt(in, out, ctx->D, &ctx->cword.decrypt, 1); |
265 | irq_ts_restore(ts_state); | 316 | irq_ts_restore(ts_state); |
266 | padlock_store_cword(&ctx->cword.encrypt); | 317 | padlock_store_cword(&ctx->cword.encrypt); |
267 | } | 318 | } |
@@ -454,6 +505,7 @@ static struct crypto_alg cbc_aes_alg = { | |||
454 | static int __init padlock_init(void) | 505 | static int __init padlock_init(void) |
455 | { | 506 | { |
456 | int ret; | 507 | int ret; |
508 | struct cpuinfo_x86 *c = &cpu_data(0); | ||
457 | 509 | ||
458 | if (!cpu_has_xcrypt) { | 510 | if (!cpu_has_xcrypt) { |
459 | printk(KERN_NOTICE PFX "VIA PadLock not detected.\n"); | 511 | printk(KERN_NOTICE PFX "VIA PadLock not detected.\n"); |
@@ -476,6 +528,12 @@ static int __init padlock_init(void) | |||
476 | 528 | ||
477 | printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n"); | 529 | printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n"); |
478 | 530 | ||
531 | if (c->x86 == 6 && c->x86_model == 15 && c->x86_mask == 2) { | ||
532 | ecb_fetch_blocks = MAX_ECB_FETCH_BLOCKS; | ||
533 | cbc_fetch_blocks = MAX_CBC_FETCH_BLOCKS; | ||
534 | printk(KERN_NOTICE PFX "VIA Nano stepping 2 detected: enabling workaround.\n"); | ||
535 | } | ||
536 | |||
479 | out: | 537 | out: |
480 | return ret; | 538 | return ret; |
481 | 539 | ||