diff options
author | Chuck Ebbert <cebbert@redhat.com> | 2009-06-18 07:31:09 -0400 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2009-06-18 07:31:09 -0400 |
commit | 8d8409f773af2cfd52e23e4b138a7d55a31182cd (patch) | |
tree | c2bd862c4099acfe0453d049e7e65ae231e3ae9f | |
parent | a76c1c23d0c33d98f2d9b36e76e7f71289fc8391 (diff) |
crypto: padlock-aes - work around Nano CPU errata in CBC mode
Extend previous workarounds for the prefetch bug to cover CBC mode,
clean up the code a bit.
Signed-off-by: Chuck Ebbert <cebbert@redhat.com>
Acked-by: Harald Welte <HaraldWelte@viatech.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
-rw-r--r-- | drivers/crypto/padlock-aes.c | 83 |
1 files changed, 65 insertions, 18 deletions
diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index e1d8776c6972..a9952b1236b0 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c | |||
@@ -22,11 +22,16 @@ | |||
22 | #include <asm/i387.h> | 22 | #include <asm/i387.h> |
23 | #include "padlock.h" | 23 | #include "padlock.h" |
24 | 24 | ||
25 | /* number of data blocks actually fetched for each xcrypt insn */ | 25 | /* |
26 | * Number of data blocks actually fetched for each xcrypt insn. | ||
27 | * Processors with prefetch errata will fetch extra blocks. | ||
28 | */ | ||
26 | static unsigned int ecb_fetch_blocks = 2; | 29 | static unsigned int ecb_fetch_blocks = 2; |
27 | static unsigned int cbc_fetch_blocks = 1; | 30 | #define MAX_ECB_FETCH_BLOCKS (8) |
28 | |||
29 | #define ecb_fetch_bytes (ecb_fetch_blocks * AES_BLOCK_SIZE) | 31 | #define ecb_fetch_bytes (ecb_fetch_blocks * AES_BLOCK_SIZE) |
32 | |||
33 | static unsigned int cbc_fetch_blocks = 1; | ||
34 | #define MAX_CBC_FETCH_BLOCKS (4) | ||
30 | #define cbc_fetch_bytes (cbc_fetch_blocks * AES_BLOCK_SIZE) | 35 | #define cbc_fetch_bytes (cbc_fetch_blocks * AES_BLOCK_SIZE) |
31 | 36 | ||
32 | /* Control word. */ | 37 | /* Control word. */ |
@@ -180,7 +185,7 @@ static inline void padlock_store_cword(struct cword *cword) | |||
180 | * should be used only inside the irq_ts_save/restore() context | 185 | * should be used only inside the irq_ts_save/restore() context |
181 | */ | 186 | */ |
182 | 187 | ||
183 | static inline void padlock_xcrypt(const u8 *input, u8 *output, void *key, | 188 | static inline void rep_xcrypt_ecb(const u8 *input, u8 *output, void *key, |
184 | struct cword *control_word, int count) | 189 | struct cword *control_word, int count) |
185 | { | 190 | { |
186 | asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ | 191 | asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ |
@@ -188,32 +193,65 @@ static inline void padlock_xcrypt(const u8 *input, u8 *output, void *key, | |||
188 | : "d"(control_word), "b"(key), "c"(count)); | 193 | : "d"(control_word), "b"(key), "c"(count)); |
189 | } | 194 | } |
190 | 195 | ||
191 | static void aes_crypt_copy(const u8 *in, u8 *out, u32 *key, | 196 | static inline u8 *rep_xcrypt_cbc(const u8 *input, u8 *output, void *key, |
197 | u8 *iv, struct cword *control_word, int count) | ||
198 | { | ||
199 | asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ | ||
200 | : "+S" (input), "+D" (output), "+a" (iv) | ||
201 | : "d" (control_word), "b" (key), "c" (count)); | ||
202 | return iv; | ||
203 | } | ||
204 | |||
205 | static void ecb_crypt_copy(const u8 *in, u8 *out, u32 *key, | ||
192 | struct cword *cword, int count) | 206 | struct cword *cword, int count) |
193 | { | 207 | { |
194 | /* | 208 | /* |
195 | * Padlock prefetches extra data so we must provide mapped input buffers. | 209 | * Padlock prefetches extra data so we must provide mapped input buffers. |
196 | * Assume there are at least 16 bytes of stack already in use. | 210 | * Assume there are at least 16 bytes of stack already in use. |
197 | */ | 211 | */ |
198 | u8 buf[AES_BLOCK_SIZE * 7 + PADLOCK_ALIGNMENT - 1]; | 212 | u8 buf[AES_BLOCK_SIZE * (MAX_ECB_FETCH_BLOCKS - 1) + PADLOCK_ALIGNMENT - 1]; |
199 | u8 *tmp = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); | 213 | u8 *tmp = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); |
200 | 214 | ||
201 | memcpy(tmp, in, count * AES_BLOCK_SIZE); | 215 | memcpy(tmp, in, count * AES_BLOCK_SIZE); |
202 | padlock_xcrypt(tmp, out, key, cword, count); | 216 | rep_xcrypt_ecb(tmp, out, key, cword, count); |
203 | } | 217 | } |
204 | 218 | ||
205 | static inline void aes_crypt(const u8 *in, u8 *out, u32 *key, | 219 | static u8 *cbc_crypt_copy(const u8 *in, u8 *out, u32 *key, |
220 | u8 *iv, struct cword *cword, int count) | ||
221 | { | ||
222 | /* | ||
223 | * Padlock prefetches extra data so we must provide mapped input buffers. | ||
224 | * Assume there are at least 16 bytes of stack already in use. | ||
225 | */ | ||
226 | u8 buf[AES_BLOCK_SIZE * (MAX_CBC_FETCH_BLOCKS - 1) + PADLOCK_ALIGNMENT - 1]; | ||
227 | u8 *tmp = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); | ||
228 | |||
229 | memcpy(tmp, in, count * AES_BLOCK_SIZE); | ||
230 | return rep_xcrypt_cbc(tmp, out, key, iv, cword, count); | ||
231 | } | ||
232 | |||
233 | static inline void ecb_crypt(const u8 *in, u8 *out, u32 *key, | ||
206 | struct cword *cword, int count) | 234 | struct cword *cword, int count) |
207 | { | 235 | { |
208 | /* Padlock in ECB mode fetches at least ecb_fetch_bytes of data. | 236 | /* Padlock in ECB mode fetches at least ecb_fetch_bytes of data. |
209 | * We could avoid some copying here but it's probably not worth it. | 237 | * We could avoid some copying here but it's probably not worth it. |
210 | */ | 238 | */ |
211 | if (unlikely(((unsigned long)in & PAGE_SIZE) + ecb_fetch_bytes > PAGE_SIZE)) { | 239 | if (unlikely(((unsigned long)in & PAGE_SIZE) + ecb_fetch_bytes > PAGE_SIZE)) { |
212 | aes_crypt_copy(in, out, key, cword, count); | 240 | ecb_crypt_copy(in, out, key, cword, count); |
213 | return; | 241 | return; |
214 | } | 242 | } |
215 | 243 | ||
216 | padlock_xcrypt(in, out, key, cword, count); | 244 | rep_xcrypt_ecb(in, out, key, cword, count); |
245 | } | ||
246 | |||
247 | static inline u8 *cbc_crypt(const u8 *in, u8 *out, u32 *key, | ||
248 | u8 *iv, struct cword *cword, int count) | ||
249 | { | ||
250 | /* Padlock in CBC mode fetches at least cbc_fetch_bytes of data. */ | ||
251 | if (unlikely(((unsigned long)in & PAGE_SIZE) + cbc_fetch_bytes > PAGE_SIZE)) | ||
252 | return cbc_crypt_copy(in, out, key, iv, cword, count); | ||
253 | |||
254 | return rep_xcrypt_cbc(in, out, key, iv, cword, count); | ||
217 | } | 255 | } |
218 | 256 | ||
219 | static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, | 257 | static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, |
@@ -222,7 +260,7 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, | |||
222 | u32 initial = count & (ecb_fetch_blocks - 1); | 260 | u32 initial = count & (ecb_fetch_blocks - 1); |
223 | 261 | ||
224 | if (count < ecb_fetch_blocks) { | 262 | if (count < ecb_fetch_blocks) { |
225 | aes_crypt(input, output, key, control_word, count); | 263 | ecb_crypt(input, output, key, control_word, count); |
226 | return; | 264 | return; |
227 | } | 265 | } |
228 | 266 | ||
@@ -239,10 +277,19 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, | |||
239 | static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, | 277 | static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, |
240 | u8 *iv, void *control_word, u32 count) | 278 | u8 *iv, void *control_word, u32 count) |
241 | { | 279 | { |
242 | /* rep xcryptcbc */ | 280 | u32 initial = count & (cbc_fetch_blocks - 1); |
243 | asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" | 281 | |
282 | if (count < cbc_fetch_blocks) | ||
283 | return cbc_crypt(input, output, key, iv, control_word, count); | ||
284 | |||
285 | if (initial) | ||
286 | asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ | ||
287 | : "+S" (input), "+D" (output), "+a" (iv) | ||
288 | : "d" (control_word), "b" (key), "c" (count)); | ||
289 | |||
290 | asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */ | ||
244 | : "+S" (input), "+D" (output), "+a" (iv) | 291 | : "+S" (input), "+D" (output), "+a" (iv) |
245 | : "d" (control_word), "b" (key), "c" (count)); | 292 | : "d" (control_word), "b" (key), "c" (count-initial)); |
246 | return iv; | 293 | return iv; |
247 | } | 294 | } |
248 | 295 | ||
@@ -253,7 +300,7 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) | |||
253 | 300 | ||
254 | padlock_reset_key(&ctx->cword.encrypt); | 301 | padlock_reset_key(&ctx->cword.encrypt); |
255 | ts_state = irq_ts_save(); | 302 | ts_state = irq_ts_save(); |
256 | aes_crypt(in, out, ctx->E, &ctx->cword.encrypt, 1); | 303 | ecb_crypt(in, out, ctx->E, &ctx->cword.encrypt, 1); |
257 | irq_ts_restore(ts_state); | 304 | irq_ts_restore(ts_state); |
258 | padlock_store_cword(&ctx->cword.encrypt); | 305 | padlock_store_cword(&ctx->cword.encrypt); |
259 | } | 306 | } |
@@ -265,7 +312,7 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) | |||
265 | 312 | ||
266 | padlock_reset_key(&ctx->cword.encrypt); | 313 | padlock_reset_key(&ctx->cword.encrypt); |
267 | ts_state = irq_ts_save(); | 314 | ts_state = irq_ts_save(); |
268 | aes_crypt(in, out, ctx->D, &ctx->cword.decrypt, 1); | 315 | ecb_crypt(in, out, ctx->D, &ctx->cword.decrypt, 1); |
269 | irq_ts_restore(ts_state); | 316 | irq_ts_restore(ts_state); |
270 | padlock_store_cword(&ctx->cword.encrypt); | 317 | padlock_store_cword(&ctx->cword.encrypt); |
271 | } | 318 | } |
@@ -482,8 +529,8 @@ static int __init padlock_init(void) | |||
482 | printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n"); | 529 | printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n"); |
483 | 530 | ||
484 | if (c->x86 == 6 && c->x86_model == 15 && c->x86_mask == 2) { | 531 | if (c->x86 == 6 && c->x86_model == 15 && c->x86_mask == 2) { |
485 | ecb_fetch_blocks = 8; | 532 | ecb_fetch_blocks = MAX_ECB_FETCH_BLOCKS; |
486 | cbc_fetch_blocks = 4; /* NOTE: notused */ | 533 | cbc_fetch_blocks = MAX_CBC_FETCH_BLOCKS; |
487 | printk(KERN_NOTICE PFX "VIA Nano stepping 2 detected: enabling workaround.\n"); | 534 | printk(KERN_NOTICE PFX "VIA Nano stepping 2 detected: enabling workaround.\n"); |
488 | } | 535 | } |
489 | 536 | ||