aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-06-21 16:14:07 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-06-21 16:14:07 -0400
commit00d94a6a5e3d6a44818e2911a4d606e28e29fecb (patch)
tree15a524318349cb4075f6dd69d87e4414ba54ed31
parent8b12e2505ad8c5010922e45f896d908fd1436709 (diff)
parentb6f34d44cb341ad32f08717d1a2c418e6053a031 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: crypto: aes-ni - Remove CRYPTO_TFM_REQ_MAY_SLEEP from fpu template crypto: aes-ni - Do not sleep when using the FPU crypto: aes-ni - Fix cbc mode IV saving crypto: padlock-aes - work around Nano CPU errata in CBC mode crypto: padlock-aes - work around Nano CPU errata in ECB mode
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S5
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c4
-rw-r--r--arch/x86/crypto/fpu.c4
-rw-r--r--drivers/crypto/padlock-aes.c138
4 files changed, 107 insertions, 44 deletions
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index caba99601703..eb0566e83319 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -845,7 +845,7 @@ ENTRY(aesni_cbc_enc)
845 */ 845 */
846ENTRY(aesni_cbc_dec) 846ENTRY(aesni_cbc_dec)
847 cmp $16, LEN 847 cmp $16, LEN
848 jb .Lcbc_dec_ret 848 jb .Lcbc_dec_just_ret
849 mov 480(KEYP), KLEN 849 mov 480(KEYP), KLEN
850 add $240, KEYP 850 add $240, KEYP
851 movups (IVP), IV 851 movups (IVP), IV
@@ -891,6 +891,7 @@ ENTRY(aesni_cbc_dec)
891 add $16, OUTP 891 add $16, OUTP
892 cmp $16, LEN 892 cmp $16, LEN
893 jge .Lcbc_dec_loop1 893 jge .Lcbc_dec_loop1
894 movups IV, (IVP)
895.Lcbc_dec_ret: 894.Lcbc_dec_ret:
895 movups IV, (IVP)
896.Lcbc_dec_just_ret:
896 ret 897 ret
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 4e663398f77f..c580c5ec1cad 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -198,6 +198,7 @@ static int ecb_encrypt(struct blkcipher_desc *desc,
198 198
199 blkcipher_walk_init(&walk, dst, src, nbytes); 199 blkcipher_walk_init(&walk, dst, src, nbytes);
200 err = blkcipher_walk_virt(desc, &walk); 200 err = blkcipher_walk_virt(desc, &walk);
201 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
201 202
202 kernel_fpu_begin(); 203 kernel_fpu_begin();
203 while ((nbytes = walk.nbytes)) { 204 while ((nbytes = walk.nbytes)) {
@@ -221,6 +222,7 @@ static int ecb_decrypt(struct blkcipher_desc *desc,
221 222
222 blkcipher_walk_init(&walk, dst, src, nbytes); 223 blkcipher_walk_init(&walk, dst, src, nbytes);
223 err = blkcipher_walk_virt(desc, &walk); 224 err = blkcipher_walk_virt(desc, &walk);
225 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
224 226
225 kernel_fpu_begin(); 227 kernel_fpu_begin();
226 while ((nbytes = walk.nbytes)) { 228 while ((nbytes = walk.nbytes)) {
@@ -266,6 +268,7 @@ static int cbc_encrypt(struct blkcipher_desc *desc,
266 268
267 blkcipher_walk_init(&walk, dst, src, nbytes); 269 blkcipher_walk_init(&walk, dst, src, nbytes);
268 err = blkcipher_walk_virt(desc, &walk); 270 err = blkcipher_walk_virt(desc, &walk);
271 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
269 272
270 kernel_fpu_begin(); 273 kernel_fpu_begin();
271 while ((nbytes = walk.nbytes)) { 274 while ((nbytes = walk.nbytes)) {
@@ -289,6 +292,7 @@ static int cbc_decrypt(struct blkcipher_desc *desc,
289 292
290 blkcipher_walk_init(&walk, dst, src, nbytes); 293 blkcipher_walk_init(&walk, dst, src, nbytes);
291 err = blkcipher_walk_virt(desc, &walk); 294 err = blkcipher_walk_virt(desc, &walk);
295 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
292 296
293 kernel_fpu_begin(); 297 kernel_fpu_begin();
294 while ((nbytes = walk.nbytes)) { 298 while ((nbytes = walk.nbytes)) {
diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c
index 5f9781a3815f..daef6cd2b45d 100644
--- a/arch/x86/crypto/fpu.c
+++ b/arch/x86/crypto/fpu.c
@@ -48,7 +48,7 @@ static int crypto_fpu_encrypt(struct blkcipher_desc *desc_in,
48 struct blkcipher_desc desc = { 48 struct blkcipher_desc desc = {
49 .tfm = child, 49 .tfm = child,
50 .info = desc_in->info, 50 .info = desc_in->info,
51 .flags = desc_in->flags, 51 .flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP,
52 }; 52 };
53 53
54 kernel_fpu_begin(); 54 kernel_fpu_begin();
@@ -67,7 +67,7 @@ static int crypto_fpu_decrypt(struct blkcipher_desc *desc_in,
67 struct blkcipher_desc desc = { 67 struct blkcipher_desc desc = {
68 .tfm = child, 68 .tfm = child,
69 .info = desc_in->info, 69 .info = desc_in->info,
70 .flags = desc_in->flags, 70 .flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP,
71 }; 71 };
72 72
73 kernel_fpu_begin(); 73 kernel_fpu_begin();
diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index 87f92c39b5f0..a9952b1236b0 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -18,9 +18,22 @@
18#include <linux/percpu.h> 18#include <linux/percpu.h>
19#include <linux/smp.h> 19#include <linux/smp.h>
20#include <asm/byteorder.h> 20#include <asm/byteorder.h>
21#include <asm/processor.h>
21#include <asm/i387.h> 22#include <asm/i387.h>
22#include "padlock.h" 23#include "padlock.h"
23 24
25/*
26 * Number of data blocks actually fetched for each xcrypt insn.
27 * Processors with prefetch errata will fetch extra blocks.
28 */
29static unsigned int ecb_fetch_blocks = 2;
30#define MAX_ECB_FETCH_BLOCKS (8)
31#define ecb_fetch_bytes (ecb_fetch_blocks * AES_BLOCK_SIZE)
32
33static unsigned int cbc_fetch_blocks = 1;
34#define MAX_CBC_FETCH_BLOCKS (4)
35#define cbc_fetch_bytes (cbc_fetch_blocks * AES_BLOCK_SIZE)
36
24/* Control word. */ 37/* Control word. */
25struct cword { 38struct cword {
26 unsigned int __attribute__ ((__packed__)) 39 unsigned int __attribute__ ((__packed__))
@@ -172,73 +185,111 @@ static inline void padlock_store_cword(struct cword *cword)
172 * should be used only inside the irq_ts_save/restore() context 185 * should be used only inside the irq_ts_save/restore() context
173 */ 186 */
174 187
175static inline void padlock_xcrypt(const u8 *input, u8 *output, void *key, 188static inline void rep_xcrypt_ecb(const u8 *input, u8 *output, void *key,
176 struct cword *control_word) 189 struct cword *control_word, int count)
177{ 190{
178 asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ 191 asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */
179 : "+S"(input), "+D"(output) 192 : "+S"(input), "+D"(output)
180 : "d"(control_word), "b"(key), "c"(1)); 193 : "d"(control_word), "b"(key), "c"(count));
194}
195
196static inline u8 *rep_xcrypt_cbc(const u8 *input, u8 *output, void *key,
197 u8 *iv, struct cword *control_word, int count)
198{
199 asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */
200 : "+S" (input), "+D" (output), "+a" (iv)
201 : "d" (control_word), "b" (key), "c" (count));
202 return iv;
181} 203}
182 204
183static void aes_crypt_copy(const u8 *in, u8 *out, u32 *key, struct cword *cword) 205static void ecb_crypt_copy(const u8 *in, u8 *out, u32 *key,
206 struct cword *cword, int count)
184{ 207{
185 u8 buf[AES_BLOCK_SIZE * 2 + PADLOCK_ALIGNMENT - 1]; 208 /*
209 * Padlock prefetches extra data so we must provide mapped input buffers.
210 * Assume there are at least 16 bytes of stack already in use.
211 */
212 u8 buf[AES_BLOCK_SIZE * (MAX_ECB_FETCH_BLOCKS - 1) + PADLOCK_ALIGNMENT - 1];
213 u8 *tmp = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
214
215 memcpy(tmp, in, count * AES_BLOCK_SIZE);
216 rep_xcrypt_ecb(tmp, out, key, cword, count);
217}
218
219static u8 *cbc_crypt_copy(const u8 *in, u8 *out, u32 *key,
220 u8 *iv, struct cword *cword, int count)
221{
222 /*
223 * Padlock prefetches extra data so we must provide mapped input buffers.
224 * Assume there are at least 16 bytes of stack already in use.
225 */
226 u8 buf[AES_BLOCK_SIZE * (MAX_CBC_FETCH_BLOCKS - 1) + PADLOCK_ALIGNMENT - 1];
186 u8 *tmp = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); 227 u8 *tmp = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
187 228
188 memcpy(tmp, in, AES_BLOCK_SIZE); 229 memcpy(tmp, in, count * AES_BLOCK_SIZE);
189 padlock_xcrypt(tmp, out, key, cword); 230 return rep_xcrypt_cbc(tmp, out, key, iv, cword, count);
190} 231}
191 232
192static inline void aes_crypt(const u8 *in, u8 *out, u32 *key, 233static inline void ecb_crypt(const u8 *in, u8 *out, u32 *key,
193 struct cword *cword) 234 struct cword *cword, int count)
194{ 235{
195 /* padlock_xcrypt requires at least two blocks of data. */ 236 /* Padlock in ECB mode fetches at least ecb_fetch_bytes of data.
196 if (unlikely(!(((unsigned long)in ^ (PAGE_SIZE - AES_BLOCK_SIZE)) & 237 * We could avoid some copying here but it's probably not worth it.
197 (PAGE_SIZE - 1)))) { 238 */
198 aes_crypt_copy(in, out, key, cword); 239 if (unlikely(((unsigned long)in & PAGE_SIZE) + ecb_fetch_bytes > PAGE_SIZE)) {
240 ecb_crypt_copy(in, out, key, cword, count);
199 return; 241 return;
200 } 242 }
201 243
202 padlock_xcrypt(in, out, key, cword); 244 rep_xcrypt_ecb(in, out, key, cword, count);
245}
246
247static inline u8 *cbc_crypt(const u8 *in, u8 *out, u32 *key,
248 u8 *iv, struct cword *cword, int count)
249{
250 /* Padlock in CBC mode fetches at least cbc_fetch_bytes of data. */
251 if (unlikely(((unsigned long)in & PAGE_SIZE) + cbc_fetch_bytes > PAGE_SIZE))
252 return cbc_crypt_copy(in, out, key, iv, cword, count);
253
254 return rep_xcrypt_cbc(in, out, key, iv, cword, count);
203} 255}
204 256
205static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, 257static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key,
206 void *control_word, u32 count) 258 void *control_word, u32 count)
207{ 259{
208 if (count == 1) { 260 u32 initial = count & (ecb_fetch_blocks - 1);
209 aes_crypt(input, output, key, control_word); 261
262 if (count < ecb_fetch_blocks) {
263 ecb_crypt(input, output, key, control_word, count);
210 return; 264 return;
211 } 265 }
212 266
213 asm volatile ("test $1, %%cl;" 267 if (initial)
214 "je 1f;" 268 asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */
215#ifndef CONFIG_X86_64 269 : "+S"(input), "+D"(output)
216 "lea -1(%%ecx), %%eax;" 270 : "d"(control_word), "b"(key), "c"(initial));
217 "mov $1, %%ecx;" 271
218#else 272 asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */
219 "lea -1(%%rcx), %%rax;"
220 "mov $1, %%rcx;"
221#endif
222 ".byte 0xf3,0x0f,0xa7,0xc8;" /* rep xcryptecb */
223#ifndef CONFIG_X86_64
224 "mov %%eax, %%ecx;"
225#else
226 "mov %%rax, %%rcx;"
227#endif
228 "1:"
229 ".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */
230 : "+S"(input), "+D"(output) 273 : "+S"(input), "+D"(output)
231 : "d"(control_word), "b"(key), "c"(count) 274 : "d"(control_word), "b"(key), "c"(count - initial));
232 : "ax");
233} 275}
234 276
235static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, 277static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key,
236 u8 *iv, void *control_word, u32 count) 278 u8 *iv, void *control_word, u32 count)
237{ 279{
238 /* rep xcryptcbc */ 280 u32 initial = count & (cbc_fetch_blocks - 1);
239 asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" 281
282 if (count < cbc_fetch_blocks)
283 return cbc_crypt(input, output, key, iv, control_word, count);
284
285 if (initial)
286 asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */
287 : "+S" (input), "+D" (output), "+a" (iv)
288 : "d" (control_word), "b" (key), "c" (count));
289
290 asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */
240 : "+S" (input), "+D" (output), "+a" (iv) 291 : "+S" (input), "+D" (output), "+a" (iv)
241 : "d" (control_word), "b" (key), "c" (count)); 292 : "d" (control_word), "b" (key), "c" (count-initial));
242 return iv; 293 return iv;
243} 294}
244 295
@@ -249,7 +300,7 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
249 300
250 padlock_reset_key(&ctx->cword.encrypt); 301 padlock_reset_key(&ctx->cword.encrypt);
251 ts_state = irq_ts_save(); 302 ts_state = irq_ts_save();
252 aes_crypt(in, out, ctx->E, &ctx->cword.encrypt); 303 ecb_crypt(in, out, ctx->E, &ctx->cword.encrypt, 1);
253 irq_ts_restore(ts_state); 304 irq_ts_restore(ts_state);
254 padlock_store_cword(&ctx->cword.encrypt); 305 padlock_store_cword(&ctx->cword.encrypt);
255} 306}
@@ -261,7 +312,7 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
261 312
262 padlock_reset_key(&ctx->cword.encrypt); 313 padlock_reset_key(&ctx->cword.encrypt);
263 ts_state = irq_ts_save(); 314 ts_state = irq_ts_save();
264 aes_crypt(in, out, ctx->D, &ctx->cword.decrypt); 315 ecb_crypt(in, out, ctx->D, &ctx->cword.decrypt, 1);
265 irq_ts_restore(ts_state); 316 irq_ts_restore(ts_state);
266 padlock_store_cword(&ctx->cword.encrypt); 317 padlock_store_cword(&ctx->cword.encrypt);
267} 318}
@@ -454,6 +505,7 @@ static struct crypto_alg cbc_aes_alg = {
454static int __init padlock_init(void) 505static int __init padlock_init(void)
455{ 506{
456 int ret; 507 int ret;
508 struct cpuinfo_x86 *c = &cpu_data(0);
457 509
458 if (!cpu_has_xcrypt) { 510 if (!cpu_has_xcrypt) {
459 printk(KERN_NOTICE PFX "VIA PadLock not detected.\n"); 511 printk(KERN_NOTICE PFX "VIA PadLock not detected.\n");
@@ -476,6 +528,12 @@ static int __init padlock_init(void)
476 528
477 printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n"); 529 printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n");
478 530
531 if (c->x86 == 6 && c->x86_model == 15 && c->x86_mask == 2) {
532 ecb_fetch_blocks = MAX_ECB_FETCH_BLOCKS;
533 cbc_fetch_blocks = MAX_CBC_FETCH_BLOCKS;
534 printk(KERN_NOTICE PFX "VIA Nano stepping 2 detected: enabling workaround.\n");
535 }
536
479out: 537out:
480 return ret; 538 return ret;
481 539