aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorChuck Ebbert <cebbert@redhat.com>2009-06-18 07:31:09 -0400
committerHerbert Xu <herbert@gondor.apana.org.au>2009-06-18 07:31:09 -0400
commit8d8409f773af2cfd52e23e4b138a7d55a31182cd (patch)
treec2bd862c4099acfe0453d049e7e65ae231e3ae9f /drivers
parenta76c1c23d0c33d98f2d9b36e76e7f71289fc8391 (diff)
crypto: padlock-aes - work around Nano CPU errata in CBC mode
Extend previous workarounds for the prefetch bug to cover CBC mode, clean up the code a bit. Signed-off-by: Chuck Ebbert <cebbert@redhat.com> Acked-by: Harald Welte <HaraldWelte@viatech.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/crypto/padlock-aes.c83
1 files changed, 65 insertions, 18 deletions
diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index e1d8776c697..a9952b1236b 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -22,11 +22,16 @@
22#include <asm/i387.h> 22#include <asm/i387.h>
23#include "padlock.h" 23#include "padlock.h"
24 24
25/* number of data blocks actually fetched for each xcrypt insn */ 25/*
26 * Number of data blocks actually fetched for each xcrypt insn.
27 * Processors with prefetch errata will fetch extra blocks.
28 */
26static unsigned int ecb_fetch_blocks = 2; 29static unsigned int ecb_fetch_blocks = 2;
27static unsigned int cbc_fetch_blocks = 1; 30#define MAX_ECB_FETCH_BLOCKS (8)
28
29#define ecb_fetch_bytes (ecb_fetch_blocks * AES_BLOCK_SIZE) 31#define ecb_fetch_bytes (ecb_fetch_blocks * AES_BLOCK_SIZE)
32
33static unsigned int cbc_fetch_blocks = 1;
34#define MAX_CBC_FETCH_BLOCKS (4)
30#define cbc_fetch_bytes (cbc_fetch_blocks * AES_BLOCK_SIZE) 35#define cbc_fetch_bytes (cbc_fetch_blocks * AES_BLOCK_SIZE)
31 36
32/* Control word. */ 37/* Control word. */
@@ -180,7 +185,7 @@ static inline void padlock_store_cword(struct cword *cword)
180 * should be used only inside the irq_ts_save/restore() context 185 * should be used only inside the irq_ts_save/restore() context
181 */ 186 */
182 187
183static inline void padlock_xcrypt(const u8 *input, u8 *output, void *key, 188static inline void rep_xcrypt_ecb(const u8 *input, u8 *output, void *key,
184 struct cword *control_word, int count) 189 struct cword *control_word, int count)
185{ 190{
186 asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ 191 asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */
@@ -188,32 +193,65 @@ static inline void padlock_xcrypt(const u8 *input, u8 *output, void *key,
188 : "d"(control_word), "b"(key), "c"(count)); 193 : "d"(control_word), "b"(key), "c"(count));
189} 194}
190 195
191static void aes_crypt_copy(const u8 *in, u8 *out, u32 *key, 196static inline u8 *rep_xcrypt_cbc(const u8 *input, u8 *output, void *key,
197 u8 *iv, struct cword *control_word, int count)
198{
199 asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */
200 : "+S" (input), "+D" (output), "+a" (iv)
201 : "d" (control_word), "b" (key), "c" (count));
202 return iv;
203}
204
205static void ecb_crypt_copy(const u8 *in, u8 *out, u32 *key,
192 struct cword *cword, int count) 206 struct cword *cword, int count)
193{ 207{
194 /* 208 /*
195 * Padlock prefetches extra data so we must provide mapped input buffers. 209 * Padlock prefetches extra data so we must provide mapped input buffers.
196 * Assume there are at least 16 bytes of stack already in use. 210 * Assume there are at least 16 bytes of stack already in use.
197 */ 211 */
198 u8 buf[AES_BLOCK_SIZE * 7 + PADLOCK_ALIGNMENT - 1]; 212 u8 buf[AES_BLOCK_SIZE * (MAX_ECB_FETCH_BLOCKS - 1) + PADLOCK_ALIGNMENT - 1];
199 u8 *tmp = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); 213 u8 *tmp = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
200 214
201 memcpy(tmp, in, count * AES_BLOCK_SIZE); 215 memcpy(tmp, in, count * AES_BLOCK_SIZE);
202 padlock_xcrypt(tmp, out, key, cword, count); 216 rep_xcrypt_ecb(tmp, out, key, cword, count);
203} 217}
204 218
205static inline void aes_crypt(const u8 *in, u8 *out, u32 *key, 219static u8 *cbc_crypt_copy(const u8 *in, u8 *out, u32 *key,
220 u8 *iv, struct cword *cword, int count)
221{
222 /*
223 * Padlock prefetches extra data so we must provide mapped input buffers.
224 * Assume there are at least 16 bytes of stack already in use.
225 */
226 u8 buf[AES_BLOCK_SIZE * (MAX_CBC_FETCH_BLOCKS - 1) + PADLOCK_ALIGNMENT - 1];
227 u8 *tmp = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
228
229 memcpy(tmp, in, count * AES_BLOCK_SIZE);
230 return rep_xcrypt_cbc(tmp, out, key, iv, cword, count);
231}
232
233static inline void ecb_crypt(const u8 *in, u8 *out, u32 *key,
206 struct cword *cword, int count) 234 struct cword *cword, int count)
207{ 235{
208 /* Padlock in ECB mode fetches at least ecb_fetch_bytes of data. 236 /* Padlock in ECB mode fetches at least ecb_fetch_bytes of data.
209 * We could avoid some copying here but it's probably not worth it. 237 * We could avoid some copying here but it's probably not worth it.
210 */ 238 */
211 if (unlikely(((unsigned long)in & PAGE_SIZE) + ecb_fetch_bytes > PAGE_SIZE)) { 239 if (unlikely(((unsigned long)in & PAGE_SIZE) + ecb_fetch_bytes > PAGE_SIZE)) {
212 aes_crypt_copy(in, out, key, cword, count); 240 ecb_crypt_copy(in, out, key, cword, count);
213 return; 241 return;
214 } 242 }
215 243
216 padlock_xcrypt(in, out, key, cword, count); 244 rep_xcrypt_ecb(in, out, key, cword, count);
245}
246
247static inline u8 *cbc_crypt(const u8 *in, u8 *out, u32 *key,
248 u8 *iv, struct cword *cword, int count)
249{
250 /* Padlock in CBC mode fetches at least cbc_fetch_bytes of data. */
251 if (unlikely(((unsigned long)in & PAGE_SIZE) + cbc_fetch_bytes > PAGE_SIZE))
252 return cbc_crypt_copy(in, out, key, iv, cword, count);
253
254 return rep_xcrypt_cbc(in, out, key, iv, cword, count);
217} 255}
218 256
219static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, 257static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key,
@@ -222,7 +260,7 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key,
222 u32 initial = count & (ecb_fetch_blocks - 1); 260 u32 initial = count & (ecb_fetch_blocks - 1);
223 261
224 if (count < ecb_fetch_blocks) { 262 if (count < ecb_fetch_blocks) {
225 aes_crypt(input, output, key, control_word, count); 263 ecb_crypt(input, output, key, control_word, count);
226 return; 264 return;
227 } 265 }
228 266
@@ -239,10 +277,19 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key,
239static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, 277static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key,
240 u8 *iv, void *control_word, u32 count) 278 u8 *iv, void *control_word, u32 count)
241{ 279{
242 /* rep xcryptcbc */ 280 u32 initial = count & (cbc_fetch_blocks - 1);
243 asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" 281
282 if (count < cbc_fetch_blocks)
283 return cbc_crypt(input, output, key, iv, control_word, count);
284
285 if (initial)
286 asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */
287 : "+S" (input), "+D" (output), "+a" (iv)
288 : "d" (control_word), "b" (key), "c" (count));
289
290 asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" /* rep xcryptcbc */
244 : "+S" (input), "+D" (output), "+a" (iv) 291 : "+S" (input), "+D" (output), "+a" (iv)
245 : "d" (control_word), "b" (key), "c" (count)); 292 : "d" (control_word), "b" (key), "c" (count-initial));
246 return iv; 293 return iv;
247} 294}
248 295
@@ -253,7 +300,7 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
253 300
254 padlock_reset_key(&ctx->cword.encrypt); 301 padlock_reset_key(&ctx->cword.encrypt);
255 ts_state = irq_ts_save(); 302 ts_state = irq_ts_save();
256 aes_crypt(in, out, ctx->E, &ctx->cword.encrypt, 1); 303 ecb_crypt(in, out, ctx->E, &ctx->cword.encrypt, 1);
257 irq_ts_restore(ts_state); 304 irq_ts_restore(ts_state);
258 padlock_store_cword(&ctx->cword.encrypt); 305 padlock_store_cword(&ctx->cword.encrypt);
259} 306}
@@ -265,7 +312,7 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
265 312
266 padlock_reset_key(&ctx->cword.encrypt); 313 padlock_reset_key(&ctx->cword.encrypt);
267 ts_state = irq_ts_save(); 314 ts_state = irq_ts_save();
268 aes_crypt(in, out, ctx->D, &ctx->cword.decrypt, 1); 315 ecb_crypt(in, out, ctx->D, &ctx->cword.decrypt, 1);
269 irq_ts_restore(ts_state); 316 irq_ts_restore(ts_state);
270 padlock_store_cword(&ctx->cword.encrypt); 317 padlock_store_cword(&ctx->cword.encrypt);
271} 318}
@@ -482,8 +529,8 @@ static int __init padlock_init(void)
482 printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n"); 529 printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n");
483 530
484 if (c->x86 == 6 && c->x86_model == 15 && c->x86_mask == 2) { 531 if (c->x86 == 6 && c->x86_model == 15 && c->x86_mask == 2) {
485 ecb_fetch_blocks = 8; 532 ecb_fetch_blocks = MAX_ECB_FETCH_BLOCKS;
486 cbc_fetch_blocks = 4; /* NOTE: notused */ 533 cbc_fetch_blocks = MAX_CBC_FETCH_BLOCKS;
487 printk(KERN_NOTICE PFX "VIA Nano stepping 2 detected: enabling workaround.\n"); 534 printk(KERN_NOTICE PFX "VIA Nano stepping 2 detected: enabling workaround.\n");
488 } 535 }
489 536