diff options
| -rw-r--r-- | drivers/char/hw_random/via-rng.c | 8 | ||||
| -rw-r--r-- | drivers/crypto/padlock-aes.c | 28 | ||||
| -rw-r--r-- | drivers/crypto/padlock-sha.c | 9 | ||||
| -rw-r--r-- | include/asm-x86/i387.h | 32 |
4 files changed, 76 insertions, 1 deletions
diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c index f7feae4ebb5e..128202e18fc9 100644 --- a/drivers/char/hw_random/via-rng.c +++ b/drivers/char/hw_random/via-rng.c | |||
| @@ -31,6 +31,7 @@ | |||
| 31 | #include <asm/io.h> | 31 | #include <asm/io.h> |
| 32 | #include <asm/msr.h> | 32 | #include <asm/msr.h> |
| 33 | #include <asm/cpufeature.h> | 33 | #include <asm/cpufeature.h> |
| 34 | #include <asm/i387.h> | ||
| 34 | 35 | ||
| 35 | 36 | ||
| 36 | #define PFX KBUILD_MODNAME ": " | 37 | #define PFX KBUILD_MODNAME ": " |
| @@ -67,16 +68,23 @@ enum { | |||
| 67 | * Another possible performance boost may come from simply buffering | 68 | * Another possible performance boost may come from simply buffering |
| 68 | * until we have 4 bytes, thus returning a u32 at a time, | 69 | * until we have 4 bytes, thus returning a u32 at a time, |
| 69 | * instead of the current u8-at-a-time. | 70 | * instead of the current u8-at-a-time. |
| 71 | * | ||
| 72 | * Padlock instructions can generate a spurious DNA fault, so | ||
| 73 | * we have to call them in the context of irq_ts_save/restore() | ||
| 70 | */ | 74 | */ |
| 71 | 75 | ||
| 72 | static inline u32 xstore(u32 *addr, u32 edx_in) | 76 | static inline u32 xstore(u32 *addr, u32 edx_in) |
| 73 | { | 77 | { |
| 74 | u32 eax_out; | 78 | u32 eax_out; |
| 79 | int ts_state; | ||
| 80 | |||
| 81 | ts_state = irq_ts_save(); | ||
| 75 | 82 | ||
| 76 | asm(".byte 0x0F,0xA7,0xC0 /* xstore %%edi (addr=%0) */" | 83 | asm(".byte 0x0F,0xA7,0xC0 /* xstore %%edi (addr=%0) */" |
| 77 | :"=m"(*addr), "=a"(eax_out) | 84 | :"=m"(*addr), "=a"(eax_out) |
| 78 | :"D"(addr), "d"(edx_in)); | 85 | :"D"(addr), "d"(edx_in)); |
| 79 | 86 | ||
| 87 | irq_ts_restore(ts_state); | ||
| 80 | return eax_out; | 88 | return eax_out; |
| 81 | } | 89 | } |
| 82 | 90 | ||
diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index 54a2a166e566..bf2917d197a0 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #include <linux/interrupt.h> | 16 | #include <linux/interrupt.h> |
| 17 | #include <linux/kernel.h> | 17 | #include <linux/kernel.h> |
| 18 | #include <asm/byteorder.h> | 18 | #include <asm/byteorder.h> |
| 19 | #include <asm/i387.h> | ||
| 19 | #include "padlock.h" | 20 | #include "padlock.h" |
| 20 | 21 | ||
| 21 | /* Control word. */ | 22 | /* Control word. */ |
| @@ -141,6 +142,12 @@ static inline void padlock_reset_key(void) | |||
| 141 | asm volatile ("pushfl; popfl"); | 142 | asm volatile ("pushfl; popfl"); |
| 142 | } | 143 | } |
| 143 | 144 | ||
| 145 | /* | ||
| 146 | * While the padlock instructions don't use FP/SSE registers, they | ||
| 147 | * generate a spurious DNA fault when cr0.ts is '1'. These instructions | ||
| 148 | * should be used only inside the irq_ts_save/restore() context | ||
| 149 | */ | ||
| 150 | |||
| 144 | static inline void padlock_xcrypt(const u8 *input, u8 *output, void *key, | 151 | static inline void padlock_xcrypt(const u8 *input, u8 *output, void *key, |
| 145 | void *control_word) | 152 | void *control_word) |
| 146 | { | 153 | { |
| @@ -205,15 +212,23 @@ static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, | |||
| 205 | static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) | 212 | static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) |
| 206 | { | 213 | { |
| 207 | struct aes_ctx *ctx = aes_ctx(tfm); | 214 | struct aes_ctx *ctx = aes_ctx(tfm); |
| 215 | int ts_state; | ||
| 208 | padlock_reset_key(); | 216 | padlock_reset_key(); |
| 217 | |||
| 218 | ts_state = irq_ts_save(); | ||
| 209 | aes_crypt(in, out, ctx->E, &ctx->cword.encrypt); | 219 | aes_crypt(in, out, ctx->E, &ctx->cword.encrypt); |
| 220 | irq_ts_restore(ts_state); | ||
| 210 | } | 221 | } |
| 211 | 222 | ||
| 212 | static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) | 223 | static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) |
| 213 | { | 224 | { |
| 214 | struct aes_ctx *ctx = aes_ctx(tfm); | 225 | struct aes_ctx *ctx = aes_ctx(tfm); |
| 226 | int ts_state; | ||
| 215 | padlock_reset_key(); | 227 | padlock_reset_key(); |
| 228 | |||
| 229 | ts_state = irq_ts_save(); | ||
| 216 | aes_crypt(in, out, ctx->D, &ctx->cword.decrypt); | 230 | aes_crypt(in, out, ctx->D, &ctx->cword.decrypt); |
| 231 | irq_ts_restore(ts_state); | ||
| 217 | } | 232 | } |
| 218 | 233 | ||
| 219 | static struct crypto_alg aes_alg = { | 234 | static struct crypto_alg aes_alg = { |
| @@ -244,12 +259,14 @@ static int ecb_aes_encrypt(struct blkcipher_desc *desc, | |||
| 244 | struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); | 259 | struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); |
| 245 | struct blkcipher_walk walk; | 260 | struct blkcipher_walk walk; |
| 246 | int err; | 261 | int err; |
| 262 | int ts_state; | ||
| 247 | 263 | ||
| 248 | padlock_reset_key(); | 264 | padlock_reset_key(); |
| 249 | 265 | ||
| 250 | blkcipher_walk_init(&walk, dst, src, nbytes); | 266 | blkcipher_walk_init(&walk, dst, src, nbytes); |
| 251 | err = blkcipher_walk_virt(desc, &walk); | 267 | err = blkcipher_walk_virt(desc, &walk); |
| 252 | 268 | ||
| 269 | ts_state = irq_ts_save(); | ||
| 253 | while ((nbytes = walk.nbytes)) { | 270 | while ((nbytes = walk.nbytes)) { |
| 254 | padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr, | 271 | padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr, |
| 255 | ctx->E, &ctx->cword.encrypt, | 272 | ctx->E, &ctx->cword.encrypt, |
| @@ -257,6 +274,7 @@ static int ecb_aes_encrypt(struct blkcipher_desc *desc, | |||
| 257 | nbytes &= AES_BLOCK_SIZE - 1; | 274 | nbytes &= AES_BLOCK_SIZE - 1; |
| 258 | err = blkcipher_walk_done(desc, &walk, nbytes); | 275 | err = blkcipher_walk_done(desc, &walk, nbytes); |
| 259 | } | 276 | } |
| 277 | irq_ts_restore(ts_state); | ||
| 260 | 278 | ||
| 261 | return err; | 279 | return err; |
| 262 | } | 280 | } |
| @@ -268,12 +286,14 @@ static int ecb_aes_decrypt(struct blkcipher_desc *desc, | |||
| 268 | struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); | 286 | struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); |
| 269 | struct blkcipher_walk walk; | 287 | struct blkcipher_walk walk; |
| 270 | int err; | 288 | int err; |
| 289 | int ts_state; | ||
| 271 | 290 | ||
| 272 | padlock_reset_key(); | 291 | padlock_reset_key(); |
| 273 | 292 | ||
| 274 | blkcipher_walk_init(&walk, dst, src, nbytes); | 293 | blkcipher_walk_init(&walk, dst, src, nbytes); |
| 275 | err = blkcipher_walk_virt(desc, &walk); | 294 | err = blkcipher_walk_virt(desc, &walk); |
| 276 | 295 | ||
| 296 | ts_state = irq_ts_save(); | ||
| 277 | while ((nbytes = walk.nbytes)) { | 297 | while ((nbytes = walk.nbytes)) { |
| 278 | padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr, | 298 | padlock_xcrypt_ecb(walk.src.virt.addr, walk.dst.virt.addr, |
| 279 | ctx->D, &ctx->cword.decrypt, | 299 | ctx->D, &ctx->cword.decrypt, |
| @@ -281,7 +301,7 @@ static int ecb_aes_decrypt(struct blkcipher_desc *desc, | |||
| 281 | nbytes &= AES_BLOCK_SIZE - 1; | 301 | nbytes &= AES_BLOCK_SIZE - 1; |
| 282 | err = blkcipher_walk_done(desc, &walk, nbytes); | 302 | err = blkcipher_walk_done(desc, &walk, nbytes); |
| 283 | } | 303 | } |
| 284 | 304 | irq_ts_restore(ts_state); | |
| 285 | return err; | 305 | return err; |
| 286 | } | 306 | } |
| 287 | 307 | ||
| @@ -314,12 +334,14 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc, | |||
| 314 | struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); | 334 | struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); |
| 315 | struct blkcipher_walk walk; | 335 | struct blkcipher_walk walk; |
| 316 | int err; | 336 | int err; |
| 337 | int ts_state; | ||
| 317 | 338 | ||
| 318 | padlock_reset_key(); | 339 | padlock_reset_key(); |
| 319 | 340 | ||
| 320 | blkcipher_walk_init(&walk, dst, src, nbytes); | 341 | blkcipher_walk_init(&walk, dst, src, nbytes); |
| 321 | err = blkcipher_walk_virt(desc, &walk); | 342 | err = blkcipher_walk_virt(desc, &walk); |
| 322 | 343 | ||
| 344 | ts_state = irq_ts_save(); | ||
| 323 | while ((nbytes = walk.nbytes)) { | 345 | while ((nbytes = walk.nbytes)) { |
| 324 | u8 *iv = padlock_xcrypt_cbc(walk.src.virt.addr, | 346 | u8 *iv = padlock_xcrypt_cbc(walk.src.virt.addr, |
| 325 | walk.dst.virt.addr, ctx->E, | 347 | walk.dst.virt.addr, ctx->E, |
| @@ -329,6 +351,7 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc, | |||
| 329 | nbytes &= AES_BLOCK_SIZE - 1; | 351 | nbytes &= AES_BLOCK_SIZE - 1; |
| 330 | err = blkcipher_walk_done(desc, &walk, nbytes); | 352 | err = blkcipher_walk_done(desc, &walk, nbytes); |
| 331 | } | 353 | } |
| 354 | irq_ts_restore(ts_state); | ||
| 332 | 355 | ||
| 333 | return err; | 356 | return err; |
| 334 | } | 357 | } |
| @@ -340,12 +363,14 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc, | |||
| 340 | struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); | 363 | struct aes_ctx *ctx = blk_aes_ctx(desc->tfm); |
| 341 | struct blkcipher_walk walk; | 364 | struct blkcipher_walk walk; |
| 342 | int err; | 365 | int err; |
| 366 | int ts_state; | ||
| 343 | 367 | ||
| 344 | padlock_reset_key(); | 368 | padlock_reset_key(); |
| 345 | 369 | ||
| 346 | blkcipher_walk_init(&walk, dst, src, nbytes); | 370 | blkcipher_walk_init(&walk, dst, src, nbytes); |
| 347 | err = blkcipher_walk_virt(desc, &walk); | 371 | err = blkcipher_walk_virt(desc, &walk); |
| 348 | 372 | ||
| 373 | ts_state = irq_ts_save(); | ||
| 349 | while ((nbytes = walk.nbytes)) { | 374 | while ((nbytes = walk.nbytes)) { |
| 350 | padlock_xcrypt_cbc(walk.src.virt.addr, walk.dst.virt.addr, | 375 | padlock_xcrypt_cbc(walk.src.virt.addr, walk.dst.virt.addr, |
| 351 | ctx->D, walk.iv, &ctx->cword.decrypt, | 376 | ctx->D, walk.iv, &ctx->cword.decrypt, |
| @@ -354,6 +379,7 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc, | |||
| 354 | err = blkcipher_walk_done(desc, &walk, nbytes); | 379 | err = blkcipher_walk_done(desc, &walk, nbytes); |
| 355 | } | 380 | } |
| 356 | 381 | ||
| 382 | irq_ts_restore(ts_state); | ||
| 357 | return err; | 383 | return err; |
| 358 | } | 384 | } |
| 359 | 385 | ||
diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c index 40d5680fa013..a7fbadebf623 100644 --- a/drivers/crypto/padlock-sha.c +++ b/drivers/crypto/padlock-sha.c | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | #include <linux/interrupt.h> | 22 | #include <linux/interrupt.h> |
| 23 | #include <linux/kernel.h> | 23 | #include <linux/kernel.h> |
| 24 | #include <linux/scatterlist.h> | 24 | #include <linux/scatterlist.h> |
| 25 | #include <asm/i387.h> | ||
| 25 | #include "padlock.h" | 26 | #include "padlock.h" |
| 26 | 27 | ||
| 27 | #define SHA1_DEFAULT_FALLBACK "sha1-generic" | 28 | #define SHA1_DEFAULT_FALLBACK "sha1-generic" |
| @@ -102,6 +103,7 @@ static void padlock_do_sha1(const char *in, char *out, int count) | |||
| 102 | * PadLock microcode needs it that big. */ | 103 | * PadLock microcode needs it that big. */ |
| 103 | char buf[128+16]; | 104 | char buf[128+16]; |
| 104 | char *result = NEAREST_ALIGNED(buf); | 105 | char *result = NEAREST_ALIGNED(buf); |
| 106 | int ts_state; | ||
| 105 | 107 | ||
| 106 | ((uint32_t *)result)[0] = SHA1_H0; | 108 | ((uint32_t *)result)[0] = SHA1_H0; |
| 107 | ((uint32_t *)result)[1] = SHA1_H1; | 109 | ((uint32_t *)result)[1] = SHA1_H1; |
| @@ -109,9 +111,12 @@ static void padlock_do_sha1(const char *in, char *out, int count) | |||
| 109 | ((uint32_t *)result)[3] = SHA1_H3; | 111 | ((uint32_t *)result)[3] = SHA1_H3; |
| 110 | ((uint32_t *)result)[4] = SHA1_H4; | 112 | ((uint32_t *)result)[4] = SHA1_H4; |
| 111 | 113 | ||
| 114 | /* prevent taking the spurious DNA fault with padlock. */ | ||
| 115 | ts_state = irq_ts_save(); | ||
| 112 | asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */ | 116 | asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */ |
| 113 | : "+S"(in), "+D"(result) | 117 | : "+S"(in), "+D"(result) |
| 114 | : "c"(count), "a"(0)); | 118 | : "c"(count), "a"(0)); |
| 119 | irq_ts_restore(ts_state); | ||
| 115 | 120 | ||
| 116 | padlock_output_block((uint32_t *)result, (uint32_t *)out, 5); | 121 | padlock_output_block((uint32_t *)result, (uint32_t *)out, 5); |
| 117 | } | 122 | } |
| @@ -123,6 +128,7 @@ static void padlock_do_sha256(const char *in, char *out, int count) | |||
| 123 | * PadLock microcode needs it that big. */ | 128 | * PadLock microcode needs it that big. */ |
| 124 | char buf[128+16]; | 129 | char buf[128+16]; |
| 125 | char *result = NEAREST_ALIGNED(buf); | 130 | char *result = NEAREST_ALIGNED(buf); |
| 131 | int ts_state; | ||
| 126 | 132 | ||
| 127 | ((uint32_t *)result)[0] = SHA256_H0; | 133 | ((uint32_t *)result)[0] = SHA256_H0; |
| 128 | ((uint32_t *)result)[1] = SHA256_H1; | 134 | ((uint32_t *)result)[1] = SHA256_H1; |
| @@ -133,9 +139,12 @@ static void padlock_do_sha256(const char *in, char *out, int count) | |||
| 133 | ((uint32_t *)result)[6] = SHA256_H6; | 139 | ((uint32_t *)result)[6] = SHA256_H6; |
| 134 | ((uint32_t *)result)[7] = SHA256_H7; | 140 | ((uint32_t *)result)[7] = SHA256_H7; |
| 135 | 141 | ||
| 142 | /* prevent taking the spurious DNA fault with padlock. */ | ||
| 143 | ts_state = irq_ts_save(); | ||
| 136 | asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */ | 144 | asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */ |
| 137 | : "+S"(in), "+D"(result) | 145 | : "+S"(in), "+D"(result) |
| 138 | : "c"(count), "a"(0)); | 146 | : "c"(count), "a"(0)); |
| 147 | irq_ts_restore(ts_state); | ||
| 139 | 148 | ||
| 140 | padlock_output_block((uint32_t *)result, (uint32_t *)out, 8); | 149 | padlock_output_block((uint32_t *)result, (uint32_t *)out, 8); |
| 141 | } | 150 | } |
diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h index 96fa8449ff11..6d3b21063419 100644 --- a/include/asm-x86/i387.h +++ b/include/asm-x86/i387.h | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <linux/sched.h> | 13 | #include <linux/sched.h> |
| 14 | #include <linux/kernel_stat.h> | 14 | #include <linux/kernel_stat.h> |
| 15 | #include <linux/regset.h> | 15 | #include <linux/regset.h> |
| 16 | #include <linux/hardirq.h> | ||
| 16 | #include <asm/asm.h> | 17 | #include <asm/asm.h> |
| 17 | #include <asm/processor.h> | 18 | #include <asm/processor.h> |
| 18 | #include <asm/sigcontext.h> | 19 | #include <asm/sigcontext.h> |
| @@ -236,6 +237,37 @@ static inline void kernel_fpu_end(void) | |||
| 236 | preempt_enable(); | 237 | preempt_enable(); |
| 237 | } | 238 | } |
| 238 | 239 | ||
| 240 | /* | ||
| 241 | * Some instructions like VIA's padlock instructions generate a spurious | ||
| 242 | * DNA fault but don't modify SSE registers. And these instructions | ||
| 243 | * get used from interrupt context aswell. To prevent these kernel instructions | ||
| 244 | * in interrupt context interact wrongly with other user/kernel fpu usage, we | ||
| 245 | * should use them only in the context of irq_ts_save/restore() | ||
| 246 | */ | ||
| 247 | static inline int irq_ts_save(void) | ||
| 248 | { | ||
| 249 | /* | ||
| 250 | * If we are in process context, we are ok to take a spurious DNA fault. | ||
| 251 | * Otherwise, doing clts() in process context require pre-emption to | ||
| 252 | * be disabled or some heavy lifting like kernel_fpu_begin() | ||
| 253 | */ | ||
| 254 | if (!in_interrupt()) | ||
| 255 | return 0; | ||
| 256 | |||
| 257 | if (read_cr0() & X86_CR0_TS) { | ||
| 258 | clts(); | ||
| 259 | return 1; | ||
| 260 | } | ||
| 261 | |||
| 262 | return 0; | ||
| 263 | } | ||
| 264 | |||
| 265 | static inline void irq_ts_restore(int TS_state) | ||
| 266 | { | ||
| 267 | if (TS_state) | ||
| 268 | stts(); | ||
| 269 | } | ||
| 270 | |||
| 239 | #ifdef CONFIG_X86_64 | 271 | #ifdef CONFIG_X86_64 |
| 240 | 272 | ||
| 241 | static inline void save_init_fpu(struct task_struct *tsk) | 273 | static inline void save_init_fpu(struct task_struct *tsk) |
