diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-22 12:15:27 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-22 12:15:27 -0400 |
commit | 7a6d0071d821965349ff853041f1c1aab496f2d9 (patch) | |
tree | cd0b109b9f3bcb6bdf8993293ca417a7ff2d2208 | |
parent | 6e80ecdddf4ea6f3cd84e83720f3d852e6624a68 (diff) | |
parent | e117765a117da3ece15689cb8a759d16c415b08c (diff) |
Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto fixes from Herbert Xu:
- Fix compiler warnings in inside-secure
- Fix LS1021A support in caam
- Avoid using RBP in x86 crypto code
- Fix bug in talitos that prevents hashing with algif
- Fix bugs talitos hashing code that cause incorrect hash result
- Fix memory freeing path bug in drbg
- Fix af_alg crash when two SG lists are chained
* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6:
crypto: af_alg - update correct dst SGL entry
crypto: caam - fix LS1021A support on ARMv7 multiplatform kernel
crypto: inside-secure - fix gcc-4.9 warnings
crypto: talitos - Don't provide setkey for non hmac hashing algs
crypto: talitos - fix hashing
crypto: talitos - fix sha224
crypto: x86/twofish - Fix RBP usage
crypto: sha512-avx2 - Fix RBP usage
crypto: x86/sha256-ssse3 - Fix RBP usage
crypto: x86/sha256-avx2 - Fix RBP usage
crypto: x86/sha256-avx - Fix RBP usage
crypto: x86/sha1-ssse3 - Fix RBP usage
crypto: x86/sha1-avx2 - Fix RBP usage
crypto: x86/des3_ede - Fix RBP usage
crypto: x86/cast6 - Fix RBP usage
crypto: x86/cast5 - Fix RBP usage
crypto: x86/camellia - Fix RBP usage
crypto: x86/blowfish - Fix RBP usage
crypto: drbg - fix freeing of resources
-rw-r--r-- | arch/x86/crypto/blowfish-x86_64-asm_64.S | 48 | ||||
-rw-r--r-- | arch/x86/crypto/camellia-x86_64-asm_64.S | 26 | ||||
-rw-r--r-- | arch/x86/crypto/cast5-avx-x86_64-asm_64.S | 47 | ||||
-rw-r--r-- | arch/x86/crypto/cast6-avx-x86_64-asm_64.S | 50 | ||||
-rw-r--r-- | arch/x86/crypto/des3_ede-asm_64.S | 15 | ||||
-rw-r--r-- | arch/x86/crypto/sha1_avx2_x86_64_asm.S | 4 | ||||
-rw-r--r-- | arch/x86/crypto/sha1_ssse3_asm.S | 11 | ||||
-rw-r--r-- | arch/x86/crypto/sha256-avx-asm.S | 15 | ||||
-rw-r--r-- | arch/x86/crypto/sha256-avx2-asm.S | 22 | ||||
-rw-r--r-- | arch/x86/crypto/sha256-ssse3-asm.S | 15 | ||||
-rw-r--r-- | arch/x86/crypto/sha512-avx2-asm.S | 75 | ||||
-rw-r--r-- | arch/x86/crypto/twofish-avx-x86_64-asm_64.S | 12 | ||||
-rw-r--r-- | crypto/af_alg.c | 4 | ||||
-rw-r--r-- | crypto/drbg.c | 8 | ||||
-rw-r--r-- | drivers/crypto/caam/Kconfig | 5 | ||||
-rw-r--r-- | drivers/crypto/caam/ctrl.c | 19 | ||||
-rw-r--r-- | drivers/crypto/caam/regs.h | 59 | ||||
-rw-r--r-- | drivers/crypto/inside-secure/safexcel_cipher.c | 2 | ||||
-rw-r--r-- | drivers/crypto/inside-secure/safexcel_hash.c | 2 | ||||
-rw-r--r-- | drivers/crypto/talitos.c | 9 |
20 files changed, 236 insertions, 212 deletions
diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S index 246c67006ed0..8c1fcb6bad21 100644 --- a/arch/x86/crypto/blowfish-x86_64-asm_64.S +++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S | |||
@@ -33,7 +33,7 @@ | |||
33 | #define s3 ((16 + 2 + (3 * 256)) * 4) | 33 | #define s3 ((16 + 2 + (3 * 256)) * 4) |
34 | 34 | ||
35 | /* register macros */ | 35 | /* register macros */ |
36 | #define CTX %rdi | 36 | #define CTX %r12 |
37 | #define RIO %rsi | 37 | #define RIO %rsi |
38 | 38 | ||
39 | #define RX0 %rax | 39 | #define RX0 %rax |
@@ -56,12 +56,12 @@ | |||
56 | #define RX2bh %ch | 56 | #define RX2bh %ch |
57 | #define RX3bh %dh | 57 | #define RX3bh %dh |
58 | 58 | ||
59 | #define RT0 %rbp | 59 | #define RT0 %rdi |
60 | #define RT1 %rsi | 60 | #define RT1 %rsi |
61 | #define RT2 %r8 | 61 | #define RT2 %r8 |
62 | #define RT3 %r9 | 62 | #define RT3 %r9 |
63 | 63 | ||
64 | #define RT0d %ebp | 64 | #define RT0d %edi |
65 | #define RT1d %esi | 65 | #define RT1d %esi |
66 | #define RT2d %r8d | 66 | #define RT2d %r8d |
67 | #define RT3d %r9d | 67 | #define RT3d %r9d |
@@ -120,13 +120,14 @@ | |||
120 | 120 | ||
121 | ENTRY(__blowfish_enc_blk) | 121 | ENTRY(__blowfish_enc_blk) |
122 | /* input: | 122 | /* input: |
123 | * %rdi: ctx, CTX | 123 | * %rdi: ctx |
124 | * %rsi: dst | 124 | * %rsi: dst |
125 | * %rdx: src | 125 | * %rdx: src |
126 | * %rcx: bool, if true: xor output | 126 | * %rcx: bool, if true: xor output |
127 | */ | 127 | */ |
128 | movq %rbp, %r11; | 128 | movq %r12, %r11; |
129 | 129 | ||
130 | movq %rdi, CTX; | ||
130 | movq %rsi, %r10; | 131 | movq %rsi, %r10; |
131 | movq %rdx, RIO; | 132 | movq %rdx, RIO; |
132 | 133 | ||
@@ -142,7 +143,7 @@ ENTRY(__blowfish_enc_blk) | |||
142 | round_enc(14); | 143 | round_enc(14); |
143 | add_roundkey_enc(16); | 144 | add_roundkey_enc(16); |
144 | 145 | ||
145 | movq %r11, %rbp; | 146 | movq %r11, %r12; |
146 | 147 | ||
147 | movq %r10, RIO; | 148 | movq %r10, RIO; |
148 | test %cl, %cl; | 149 | test %cl, %cl; |
@@ -157,12 +158,13 @@ ENDPROC(__blowfish_enc_blk) | |||
157 | 158 | ||
158 | ENTRY(blowfish_dec_blk) | 159 | ENTRY(blowfish_dec_blk) |
159 | /* input: | 160 | /* input: |
160 | * %rdi: ctx, CTX | 161 | * %rdi: ctx |
161 | * %rsi: dst | 162 | * %rsi: dst |
162 | * %rdx: src | 163 | * %rdx: src |
163 | */ | 164 | */ |
164 | movq %rbp, %r11; | 165 | movq %r12, %r11; |
165 | 166 | ||
167 | movq %rdi, CTX; | ||
166 | movq %rsi, %r10; | 168 | movq %rsi, %r10; |
167 | movq %rdx, RIO; | 169 | movq %rdx, RIO; |
168 | 170 | ||
@@ -181,7 +183,7 @@ ENTRY(blowfish_dec_blk) | |||
181 | movq %r10, RIO; | 183 | movq %r10, RIO; |
182 | write_block(); | 184 | write_block(); |
183 | 185 | ||
184 | movq %r11, %rbp; | 186 | movq %r11, %r12; |
185 | 187 | ||
186 | ret; | 188 | ret; |
187 | ENDPROC(blowfish_dec_blk) | 189 | ENDPROC(blowfish_dec_blk) |
@@ -298,20 +300,21 @@ ENDPROC(blowfish_dec_blk) | |||
298 | 300 | ||
299 | ENTRY(__blowfish_enc_blk_4way) | 301 | ENTRY(__blowfish_enc_blk_4way) |
300 | /* input: | 302 | /* input: |
301 | * %rdi: ctx, CTX | 303 | * %rdi: ctx |
302 | * %rsi: dst | 304 | * %rsi: dst |
303 | * %rdx: src | 305 | * %rdx: src |
304 | * %rcx: bool, if true: xor output | 306 | * %rcx: bool, if true: xor output |
305 | */ | 307 | */ |
306 | pushq %rbp; | 308 | pushq %r12; |
307 | pushq %rbx; | 309 | pushq %rbx; |
308 | pushq %rcx; | 310 | pushq %rcx; |
309 | 311 | ||
310 | preload_roundkey_enc(0); | 312 | movq %rdi, CTX |
311 | |||
312 | movq %rsi, %r11; | 313 | movq %rsi, %r11; |
313 | movq %rdx, RIO; | 314 | movq %rdx, RIO; |
314 | 315 | ||
316 | preload_roundkey_enc(0); | ||
317 | |||
315 | read_block4(); | 318 | read_block4(); |
316 | 319 | ||
317 | round_enc4(0); | 320 | round_enc4(0); |
@@ -324,39 +327,40 @@ ENTRY(__blowfish_enc_blk_4way) | |||
324 | round_enc4(14); | 327 | round_enc4(14); |
325 | add_preloaded_roundkey4(); | 328 | add_preloaded_roundkey4(); |
326 | 329 | ||
327 | popq %rbp; | 330 | popq %r12; |
328 | movq %r11, RIO; | 331 | movq %r11, RIO; |
329 | 332 | ||
330 | test %bpl, %bpl; | 333 | test %r12b, %r12b; |
331 | jnz .L__enc_xor4; | 334 | jnz .L__enc_xor4; |
332 | 335 | ||
333 | write_block4(); | 336 | write_block4(); |
334 | 337 | ||
335 | popq %rbx; | 338 | popq %rbx; |
336 | popq %rbp; | 339 | popq %r12; |
337 | ret; | 340 | ret; |
338 | 341 | ||
339 | .L__enc_xor4: | 342 | .L__enc_xor4: |
340 | xor_block4(); | 343 | xor_block4(); |
341 | 344 | ||
342 | popq %rbx; | 345 | popq %rbx; |
343 | popq %rbp; | 346 | popq %r12; |
344 | ret; | 347 | ret; |
345 | ENDPROC(__blowfish_enc_blk_4way) | 348 | ENDPROC(__blowfish_enc_blk_4way) |
346 | 349 | ||
347 | ENTRY(blowfish_dec_blk_4way) | 350 | ENTRY(blowfish_dec_blk_4way) |
348 | /* input: | 351 | /* input: |
349 | * %rdi: ctx, CTX | 352 | * %rdi: ctx |
350 | * %rsi: dst | 353 | * %rsi: dst |
351 | * %rdx: src | 354 | * %rdx: src |
352 | */ | 355 | */ |
353 | pushq %rbp; | 356 | pushq %r12; |
354 | pushq %rbx; | 357 | pushq %rbx; |
355 | preload_roundkey_dec(17); | ||
356 | 358 | ||
357 | movq %rsi, %r11; | 359 | movq %rdi, CTX; |
360 | movq %rsi, %r11 | ||
358 | movq %rdx, RIO; | 361 | movq %rdx, RIO; |
359 | 362 | ||
363 | preload_roundkey_dec(17); | ||
360 | read_block4(); | 364 | read_block4(); |
361 | 365 | ||
362 | round_dec4(17); | 366 | round_dec4(17); |
@@ -373,7 +377,7 @@ ENTRY(blowfish_dec_blk_4way) | |||
373 | write_block4(); | 377 | write_block4(); |
374 | 378 | ||
375 | popq %rbx; | 379 | popq %rbx; |
376 | popq %rbp; | 380 | popq %r12; |
377 | 381 | ||
378 | ret; | 382 | ret; |
379 | ENDPROC(blowfish_dec_blk_4way) | 383 | ENDPROC(blowfish_dec_blk_4way) |
diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S index 310319c601ed..95ba6956a7f6 100644 --- a/arch/x86/crypto/camellia-x86_64-asm_64.S +++ b/arch/x86/crypto/camellia-x86_64-asm_64.S | |||
@@ -75,17 +75,17 @@ | |||
75 | #define RCD1bh %dh | 75 | #define RCD1bh %dh |
76 | 76 | ||
77 | #define RT0 %rsi | 77 | #define RT0 %rsi |
78 | #define RT1 %rbp | 78 | #define RT1 %r12 |
79 | #define RT2 %r8 | 79 | #define RT2 %r8 |
80 | 80 | ||
81 | #define RT0d %esi | 81 | #define RT0d %esi |
82 | #define RT1d %ebp | 82 | #define RT1d %r12d |
83 | #define RT2d %r8d | 83 | #define RT2d %r8d |
84 | 84 | ||
85 | #define RT2bl %r8b | 85 | #define RT2bl %r8b |
86 | 86 | ||
87 | #define RXOR %r9 | 87 | #define RXOR %r9 |
88 | #define RRBP %r10 | 88 | #define RR12 %r10 |
89 | #define RDST %r11 | 89 | #define RDST %r11 |
90 | 90 | ||
91 | #define RXORd %r9d | 91 | #define RXORd %r9d |
@@ -197,7 +197,7 @@ ENTRY(__camellia_enc_blk) | |||
197 | * %rdx: src | 197 | * %rdx: src |
198 | * %rcx: bool xor | 198 | * %rcx: bool xor |
199 | */ | 199 | */ |
200 | movq %rbp, RRBP; | 200 | movq %r12, RR12; |
201 | 201 | ||
202 | movq %rcx, RXOR; | 202 | movq %rcx, RXOR; |
203 | movq %rsi, RDST; | 203 | movq %rsi, RDST; |
@@ -227,13 +227,13 @@ ENTRY(__camellia_enc_blk) | |||
227 | 227 | ||
228 | enc_outunpack(mov, RT1); | 228 | enc_outunpack(mov, RT1); |
229 | 229 | ||
230 | movq RRBP, %rbp; | 230 | movq RR12, %r12; |
231 | ret; | 231 | ret; |
232 | 232 | ||
233 | .L__enc_xor: | 233 | .L__enc_xor: |
234 | enc_outunpack(xor, RT1); | 234 | enc_outunpack(xor, RT1); |
235 | 235 | ||
236 | movq RRBP, %rbp; | 236 | movq RR12, %r12; |
237 | ret; | 237 | ret; |
238 | ENDPROC(__camellia_enc_blk) | 238 | ENDPROC(__camellia_enc_blk) |
239 | 239 | ||
@@ -248,7 +248,7 @@ ENTRY(camellia_dec_blk) | |||
248 | movl $24, RXORd; | 248 | movl $24, RXORd; |
249 | cmovel RXORd, RT2d; /* max */ | 249 | cmovel RXORd, RT2d; /* max */ |
250 | 250 | ||
251 | movq %rbp, RRBP; | 251 | movq %r12, RR12; |
252 | movq %rsi, RDST; | 252 | movq %rsi, RDST; |
253 | movq %rdx, RIO; | 253 | movq %rdx, RIO; |
254 | 254 | ||
@@ -271,7 +271,7 @@ ENTRY(camellia_dec_blk) | |||
271 | 271 | ||
272 | dec_outunpack(); | 272 | dec_outunpack(); |
273 | 273 | ||
274 | movq RRBP, %rbp; | 274 | movq RR12, %r12; |
275 | ret; | 275 | ret; |
276 | ENDPROC(camellia_dec_blk) | 276 | ENDPROC(camellia_dec_blk) |
277 | 277 | ||
@@ -433,7 +433,7 @@ ENTRY(__camellia_enc_blk_2way) | |||
433 | */ | 433 | */ |
434 | pushq %rbx; | 434 | pushq %rbx; |
435 | 435 | ||
436 | movq %rbp, RRBP; | 436 | movq %r12, RR12; |
437 | movq %rcx, RXOR; | 437 | movq %rcx, RXOR; |
438 | movq %rsi, RDST; | 438 | movq %rsi, RDST; |
439 | movq %rdx, RIO; | 439 | movq %rdx, RIO; |
@@ -461,14 +461,14 @@ ENTRY(__camellia_enc_blk_2way) | |||
461 | 461 | ||
462 | enc_outunpack2(mov, RT2); | 462 | enc_outunpack2(mov, RT2); |
463 | 463 | ||
464 | movq RRBP, %rbp; | 464 | movq RR12, %r12; |
465 | popq %rbx; | 465 | popq %rbx; |
466 | ret; | 466 | ret; |
467 | 467 | ||
468 | .L__enc2_xor: | 468 | .L__enc2_xor: |
469 | enc_outunpack2(xor, RT2); | 469 | enc_outunpack2(xor, RT2); |
470 | 470 | ||
471 | movq RRBP, %rbp; | 471 | movq RR12, %r12; |
472 | popq %rbx; | 472 | popq %rbx; |
473 | ret; | 473 | ret; |
474 | ENDPROC(__camellia_enc_blk_2way) | 474 | ENDPROC(__camellia_enc_blk_2way) |
@@ -485,7 +485,7 @@ ENTRY(camellia_dec_blk_2way) | |||
485 | cmovel RXORd, RT2d; /* max */ | 485 | cmovel RXORd, RT2d; /* max */ |
486 | 486 | ||
487 | movq %rbx, RXOR; | 487 | movq %rbx, RXOR; |
488 | movq %rbp, RRBP; | 488 | movq %r12, RR12; |
489 | movq %rsi, RDST; | 489 | movq %rsi, RDST; |
490 | movq %rdx, RIO; | 490 | movq %rdx, RIO; |
491 | 491 | ||
@@ -508,7 +508,7 @@ ENTRY(camellia_dec_blk_2way) | |||
508 | 508 | ||
509 | dec_outunpack2(); | 509 | dec_outunpack2(); |
510 | 510 | ||
511 | movq RRBP, %rbp; | 511 | movq RR12, %r12; |
512 | movq RXOR, %rbx; | 512 | movq RXOR, %rbx; |
513 | ret; | 513 | ret; |
514 | ENDPROC(camellia_dec_blk_2way) | 514 | ENDPROC(camellia_dec_blk_2way) |
diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S index b4a8806234ea..86107c961bb4 100644 --- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S | |||
@@ -47,7 +47,7 @@ | |||
47 | /********************************************************************** | 47 | /********************************************************************** |
48 | 16-way AVX cast5 | 48 | 16-way AVX cast5 |
49 | **********************************************************************/ | 49 | **********************************************************************/ |
50 | #define CTX %rdi | 50 | #define CTX %r15 |
51 | 51 | ||
52 | #define RL1 %xmm0 | 52 | #define RL1 %xmm0 |
53 | #define RR1 %xmm1 | 53 | #define RR1 %xmm1 |
@@ -70,8 +70,8 @@ | |||
70 | 70 | ||
71 | #define RTMP %xmm15 | 71 | #define RTMP %xmm15 |
72 | 72 | ||
73 | #define RID1 %rbp | 73 | #define RID1 %rdi |
74 | #define RID1d %ebp | 74 | #define RID1d %edi |
75 | #define RID2 %rsi | 75 | #define RID2 %rsi |
76 | #define RID2d %esi | 76 | #define RID2d %esi |
77 | 77 | ||
@@ -226,7 +226,7 @@ | |||
226 | .align 16 | 226 | .align 16 |
227 | __cast5_enc_blk16: | 227 | __cast5_enc_blk16: |
228 | /* input: | 228 | /* input: |
229 | * %rdi: ctx, CTX | 229 | * %rdi: ctx |
230 | * RL1: blocks 1 and 2 | 230 | * RL1: blocks 1 and 2 |
231 | * RR1: blocks 3 and 4 | 231 | * RR1: blocks 3 and 4 |
232 | * RL2: blocks 5 and 6 | 232 | * RL2: blocks 5 and 6 |
@@ -246,9 +246,11 @@ __cast5_enc_blk16: | |||
246 | * RR4: encrypted blocks 15 and 16 | 246 | * RR4: encrypted blocks 15 and 16 |
247 | */ | 247 | */ |
248 | 248 | ||
249 | pushq %rbp; | 249 | pushq %r15; |
250 | pushq %rbx; | 250 | pushq %rbx; |
251 | 251 | ||
252 | movq %rdi, CTX; | ||
253 | |||
252 | vmovdqa .Lbswap_mask, RKM; | 254 | vmovdqa .Lbswap_mask, RKM; |
253 | vmovd .Lfirst_mask, R1ST; | 255 | vmovd .Lfirst_mask, R1ST; |
254 | vmovd .L32_mask, R32; | 256 | vmovd .L32_mask, R32; |
@@ -283,7 +285,7 @@ __cast5_enc_blk16: | |||
283 | 285 | ||
284 | .L__skip_enc: | 286 | .L__skip_enc: |
285 | popq %rbx; | 287 | popq %rbx; |
286 | popq %rbp; | 288 | popq %r15; |
287 | 289 | ||
288 | vmovdqa .Lbswap_mask, RKM; | 290 | vmovdqa .Lbswap_mask, RKM; |
289 | 291 | ||
@@ -298,7 +300,7 @@ ENDPROC(__cast5_enc_blk16) | |||
298 | .align 16 | 300 | .align 16 |
299 | __cast5_dec_blk16: | 301 | __cast5_dec_blk16: |
300 | /* input: | 302 | /* input: |
301 | * %rdi: ctx, CTX | 303 | * %rdi: ctx |
302 | * RL1: encrypted blocks 1 and 2 | 304 | * RL1: encrypted blocks 1 and 2 |
303 | * RR1: encrypted blocks 3 and 4 | 305 | * RR1: encrypted blocks 3 and 4 |
304 | * RL2: encrypted blocks 5 and 6 | 306 | * RL2: encrypted blocks 5 and 6 |
@@ -318,9 +320,11 @@ __cast5_dec_blk16: | |||
318 | * RR4: decrypted blocks 15 and 16 | 320 | * RR4: decrypted blocks 15 and 16 |
319 | */ | 321 | */ |
320 | 322 | ||
321 | pushq %rbp; | 323 | pushq %r15; |
322 | pushq %rbx; | 324 | pushq %rbx; |
323 | 325 | ||
326 | movq %rdi, CTX; | ||
327 | |||
324 | vmovdqa .Lbswap_mask, RKM; | 328 | vmovdqa .Lbswap_mask, RKM; |
325 | vmovd .Lfirst_mask, R1ST; | 329 | vmovd .Lfirst_mask, R1ST; |
326 | vmovd .L32_mask, R32; | 330 | vmovd .L32_mask, R32; |
@@ -356,7 +360,7 @@ __cast5_dec_blk16: | |||
356 | 360 | ||
357 | vmovdqa .Lbswap_mask, RKM; | 361 | vmovdqa .Lbswap_mask, RKM; |
358 | popq %rbx; | 362 | popq %rbx; |
359 | popq %rbp; | 363 | popq %r15; |
360 | 364 | ||
361 | outunpack_blocks(RR1, RL1, RTMP, RX, RKM); | 365 | outunpack_blocks(RR1, RL1, RTMP, RX, RKM); |
362 | outunpack_blocks(RR2, RL2, RTMP, RX, RKM); | 366 | outunpack_blocks(RR2, RL2, RTMP, RX, RKM); |
@@ -372,12 +376,14 @@ ENDPROC(__cast5_dec_blk16) | |||
372 | 376 | ||
373 | ENTRY(cast5_ecb_enc_16way) | 377 | ENTRY(cast5_ecb_enc_16way) |
374 | /* input: | 378 | /* input: |
375 | * %rdi: ctx, CTX | 379 | * %rdi: ctx |
376 | * %rsi: dst | 380 | * %rsi: dst |
377 | * %rdx: src | 381 | * %rdx: src |
378 | */ | 382 | */ |
379 | FRAME_BEGIN | 383 | FRAME_BEGIN |
384 | pushq %r15; | ||
380 | 385 | ||
386 | movq %rdi, CTX; | ||
381 | movq %rsi, %r11; | 387 | movq %rsi, %r11; |
382 | 388 | ||
383 | vmovdqu (0*4*4)(%rdx), RL1; | 389 | vmovdqu (0*4*4)(%rdx), RL1; |
@@ -400,18 +406,22 @@ ENTRY(cast5_ecb_enc_16way) | |||
400 | vmovdqu RR4, (6*4*4)(%r11); | 406 | vmovdqu RR4, (6*4*4)(%r11); |
401 | vmovdqu RL4, (7*4*4)(%r11); | 407 | vmovdqu RL4, (7*4*4)(%r11); |
402 | 408 | ||
409 | popq %r15; | ||
403 | FRAME_END | 410 | FRAME_END |
404 | ret; | 411 | ret; |
405 | ENDPROC(cast5_ecb_enc_16way) | 412 | ENDPROC(cast5_ecb_enc_16way) |
406 | 413 | ||
407 | ENTRY(cast5_ecb_dec_16way) | 414 | ENTRY(cast5_ecb_dec_16way) |
408 | /* input: | 415 | /* input: |
409 | * %rdi: ctx, CTX | 416 | * %rdi: ctx |
410 | * %rsi: dst | 417 | * %rsi: dst |
411 | * %rdx: src | 418 | * %rdx: src |
412 | */ | 419 | */ |
413 | 420 | ||
414 | FRAME_BEGIN | 421 | FRAME_BEGIN |
422 | pushq %r15; | ||
423 | |||
424 | movq %rdi, CTX; | ||
415 | movq %rsi, %r11; | 425 | movq %rsi, %r11; |
416 | 426 | ||
417 | vmovdqu (0*4*4)(%rdx), RL1; | 427 | vmovdqu (0*4*4)(%rdx), RL1; |
@@ -434,20 +444,22 @@ ENTRY(cast5_ecb_dec_16way) | |||
434 | vmovdqu RR4, (6*4*4)(%r11); | 444 | vmovdqu RR4, (6*4*4)(%r11); |
435 | vmovdqu RL4, (7*4*4)(%r11); | 445 | vmovdqu RL4, (7*4*4)(%r11); |
436 | 446 | ||
447 | popq %r15; | ||
437 | FRAME_END | 448 | FRAME_END |
438 | ret; | 449 | ret; |
439 | ENDPROC(cast5_ecb_dec_16way) | 450 | ENDPROC(cast5_ecb_dec_16way) |
440 | 451 | ||
441 | ENTRY(cast5_cbc_dec_16way) | 452 | ENTRY(cast5_cbc_dec_16way) |
442 | /* input: | 453 | /* input: |
443 | * %rdi: ctx, CTX | 454 | * %rdi: ctx |
444 | * %rsi: dst | 455 | * %rsi: dst |
445 | * %rdx: src | 456 | * %rdx: src |
446 | */ | 457 | */ |
447 | FRAME_BEGIN | 458 | FRAME_BEGIN |
448 | |||
449 | pushq %r12; | 459 | pushq %r12; |
460 | pushq %r15; | ||
450 | 461 | ||
462 | movq %rdi, CTX; | ||
451 | movq %rsi, %r11; | 463 | movq %rsi, %r11; |
452 | movq %rdx, %r12; | 464 | movq %rdx, %r12; |
453 | 465 | ||
@@ -483,23 +495,24 @@ ENTRY(cast5_cbc_dec_16way) | |||
483 | vmovdqu RR4, (6*16)(%r11); | 495 | vmovdqu RR4, (6*16)(%r11); |
484 | vmovdqu RL4, (7*16)(%r11); | 496 | vmovdqu RL4, (7*16)(%r11); |
485 | 497 | ||
498 | popq %r15; | ||
486 | popq %r12; | 499 | popq %r12; |
487 | |||
488 | FRAME_END | 500 | FRAME_END |
489 | ret; | 501 | ret; |
490 | ENDPROC(cast5_cbc_dec_16way) | 502 | ENDPROC(cast5_cbc_dec_16way) |
491 | 503 | ||
492 | ENTRY(cast5_ctr_16way) | 504 | ENTRY(cast5_ctr_16way) |
493 | /* input: | 505 | /* input: |
494 | * %rdi: ctx, CTX | 506 | * %rdi: ctx |
495 | * %rsi: dst | 507 | * %rsi: dst |
496 | * %rdx: src | 508 | * %rdx: src |
497 | * %rcx: iv (big endian, 64bit) | 509 | * %rcx: iv (big endian, 64bit) |
498 | */ | 510 | */ |
499 | FRAME_BEGIN | 511 | FRAME_BEGIN |
500 | |||
501 | pushq %r12; | 512 | pushq %r12; |
513 | pushq %r15; | ||
502 | 514 | ||
515 | movq %rdi, CTX; | ||
503 | movq %rsi, %r11; | 516 | movq %rsi, %r11; |
504 | movq %rdx, %r12; | 517 | movq %rdx, %r12; |
505 | 518 | ||
@@ -558,8 +571,8 @@ ENTRY(cast5_ctr_16way) | |||
558 | vmovdqu RR4, (6*16)(%r11); | 571 | vmovdqu RR4, (6*16)(%r11); |
559 | vmovdqu RL4, (7*16)(%r11); | 572 | vmovdqu RL4, (7*16)(%r11); |
560 | 573 | ||
574 | popq %r15; | ||
561 | popq %r12; | 575 | popq %r12; |
562 | |||
563 | FRAME_END | 576 | FRAME_END |
564 | ret; | 577 | ret; |
565 | ENDPROC(cast5_ctr_16way) | 578 | ENDPROC(cast5_ctr_16way) |
diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S index 952d3156a933..7f30b6f0d72c 100644 --- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S | |||
@@ -47,7 +47,7 @@ | |||
47 | /********************************************************************** | 47 | /********************************************************************** |
48 | 8-way AVX cast6 | 48 | 8-way AVX cast6 |
49 | **********************************************************************/ | 49 | **********************************************************************/ |
50 | #define CTX %rdi | 50 | #define CTX %r15 |
51 | 51 | ||
52 | #define RA1 %xmm0 | 52 | #define RA1 %xmm0 |
53 | #define RB1 %xmm1 | 53 | #define RB1 %xmm1 |
@@ -70,8 +70,8 @@ | |||
70 | 70 | ||
71 | #define RTMP %xmm15 | 71 | #define RTMP %xmm15 |
72 | 72 | ||
73 | #define RID1 %rbp | 73 | #define RID1 %rdi |
74 | #define RID1d %ebp | 74 | #define RID1d %edi |
75 | #define RID2 %rsi | 75 | #define RID2 %rsi |
76 | #define RID2d %esi | 76 | #define RID2d %esi |
77 | 77 | ||
@@ -264,15 +264,17 @@ | |||
264 | .align 8 | 264 | .align 8 |
265 | __cast6_enc_blk8: | 265 | __cast6_enc_blk8: |
266 | /* input: | 266 | /* input: |
267 | * %rdi: ctx, CTX | 267 | * %rdi: ctx |
268 | * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks | 268 | * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks |
269 | * output: | 269 | * output: |
270 | * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks | 270 | * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks |
271 | */ | 271 | */ |
272 | 272 | ||
273 | pushq %rbp; | 273 | pushq %r15; |
274 | pushq %rbx; | 274 | pushq %rbx; |
275 | 275 | ||
276 | movq %rdi, CTX; | ||
277 | |||
276 | vmovdqa .Lbswap_mask, RKM; | 278 | vmovdqa .Lbswap_mask, RKM; |
277 | vmovd .Lfirst_mask, R1ST; | 279 | vmovd .Lfirst_mask, R1ST; |
278 | vmovd .L32_mask, R32; | 280 | vmovd .L32_mask, R32; |
@@ -297,7 +299,7 @@ __cast6_enc_blk8: | |||
297 | QBAR(11); | 299 | QBAR(11); |
298 | 300 | ||
299 | popq %rbx; | 301 | popq %rbx; |
300 | popq %rbp; | 302 | popq %r15; |
301 | 303 | ||
302 | vmovdqa .Lbswap_mask, RKM; | 304 | vmovdqa .Lbswap_mask, RKM; |
303 | 305 | ||
@@ -310,15 +312,17 @@ ENDPROC(__cast6_enc_blk8) | |||
310 | .align 8 | 312 | .align 8 |
311 | __cast6_dec_blk8: | 313 | __cast6_dec_blk8: |
312 | /* input: | 314 | /* input: |
313 | * %rdi: ctx, CTX | 315 | * %rdi: ctx |
314 | * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks | 316 | * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks |
315 | * output: | 317 | * output: |
316 | * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks | 318 | * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks |
317 | */ | 319 | */ |
318 | 320 | ||
319 | pushq %rbp; | 321 | pushq %r15; |
320 | pushq %rbx; | 322 | pushq %rbx; |
321 | 323 | ||
324 | movq %rdi, CTX; | ||
325 | |||
322 | vmovdqa .Lbswap_mask, RKM; | 326 | vmovdqa .Lbswap_mask, RKM; |
323 | vmovd .Lfirst_mask, R1ST; | 327 | vmovd .Lfirst_mask, R1ST; |
324 | vmovd .L32_mask, R32; | 328 | vmovd .L32_mask, R32; |
@@ -343,7 +347,7 @@ __cast6_dec_blk8: | |||
343 | QBAR(0); | 347 | QBAR(0); |
344 | 348 | ||
345 | popq %rbx; | 349 | popq %rbx; |
346 | popq %rbp; | 350 | popq %r15; |
347 | 351 | ||
348 | vmovdqa .Lbswap_mask, RKM; | 352 | vmovdqa .Lbswap_mask, RKM; |
349 | outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); | 353 | outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); |
@@ -354,12 +358,14 @@ ENDPROC(__cast6_dec_blk8) | |||
354 | 358 | ||
355 | ENTRY(cast6_ecb_enc_8way) | 359 | ENTRY(cast6_ecb_enc_8way) |
356 | /* input: | 360 | /* input: |
357 | * %rdi: ctx, CTX | 361 | * %rdi: ctx |
358 | * %rsi: dst | 362 | * %rsi: dst |
359 | * %rdx: src | 363 | * %rdx: src |
360 | */ | 364 | */ |
361 | FRAME_BEGIN | 365 | FRAME_BEGIN |
366 | pushq %r15; | ||
362 | 367 | ||
368 | movq %rdi, CTX; | ||
363 | movq %rsi, %r11; | 369 | movq %rsi, %r11; |
364 | 370 | ||
365 | load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | 371 | load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); |
@@ -368,18 +374,21 @@ ENTRY(cast6_ecb_enc_8way) | |||
368 | 374 | ||
369 | store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | 375 | store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); |
370 | 376 | ||
377 | popq %r15; | ||
371 | FRAME_END | 378 | FRAME_END |
372 | ret; | 379 | ret; |
373 | ENDPROC(cast6_ecb_enc_8way) | 380 | ENDPROC(cast6_ecb_enc_8way) |
374 | 381 | ||
375 | ENTRY(cast6_ecb_dec_8way) | 382 | ENTRY(cast6_ecb_dec_8way) |
376 | /* input: | 383 | /* input: |
377 | * %rdi: ctx, CTX | 384 | * %rdi: ctx |
378 | * %rsi: dst | 385 | * %rsi: dst |
379 | * %rdx: src | 386 | * %rdx: src |
380 | */ | 387 | */ |
381 | FRAME_BEGIN | 388 | FRAME_BEGIN |
389 | pushq %r15; | ||
382 | 390 | ||
391 | movq %rdi, CTX; | ||
383 | movq %rsi, %r11; | 392 | movq %rsi, %r11; |
384 | 393 | ||
385 | load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | 394 | load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); |
@@ -388,20 +397,22 @@ ENTRY(cast6_ecb_dec_8way) | |||
388 | 397 | ||
389 | store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | 398 | store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); |
390 | 399 | ||
400 | popq %r15; | ||
391 | FRAME_END | 401 | FRAME_END |
392 | ret; | 402 | ret; |
393 | ENDPROC(cast6_ecb_dec_8way) | 403 | ENDPROC(cast6_ecb_dec_8way) |
394 | 404 | ||
395 | ENTRY(cast6_cbc_dec_8way) | 405 | ENTRY(cast6_cbc_dec_8way) |
396 | /* input: | 406 | /* input: |
397 | * %rdi: ctx, CTX | 407 | * %rdi: ctx |
398 | * %rsi: dst | 408 | * %rsi: dst |
399 | * %rdx: src | 409 | * %rdx: src |
400 | */ | 410 | */ |
401 | FRAME_BEGIN | 411 | FRAME_BEGIN |
402 | |||
403 | pushq %r12; | 412 | pushq %r12; |
413 | pushq %r15; | ||
404 | 414 | ||
415 | movq %rdi, CTX; | ||
405 | movq %rsi, %r11; | 416 | movq %rsi, %r11; |
406 | movq %rdx, %r12; | 417 | movq %rdx, %r12; |
407 | 418 | ||
@@ -411,8 +422,8 @@ ENTRY(cast6_cbc_dec_8way) | |||
411 | 422 | ||
412 | store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | 423 | store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); |
413 | 424 | ||
425 | popq %r15; | ||
414 | popq %r12; | 426 | popq %r12; |
415 | |||
416 | FRAME_END | 427 | FRAME_END |
417 | ret; | 428 | ret; |
418 | ENDPROC(cast6_cbc_dec_8way) | 429 | ENDPROC(cast6_cbc_dec_8way) |
@@ -425,9 +436,10 @@ ENTRY(cast6_ctr_8way) | |||
425 | * %rcx: iv (little endian, 128bit) | 436 | * %rcx: iv (little endian, 128bit) |
426 | */ | 437 | */ |
427 | FRAME_BEGIN | 438 | FRAME_BEGIN |
428 | |||
429 | pushq %r12; | 439 | pushq %r12; |
440 | pushq %r15 | ||
430 | 441 | ||
442 | movq %rdi, CTX; | ||
431 | movq %rsi, %r11; | 443 | movq %rsi, %r11; |
432 | movq %rdx, %r12; | 444 | movq %rdx, %r12; |
433 | 445 | ||
@@ -438,8 +450,8 @@ ENTRY(cast6_ctr_8way) | |||
438 | 450 | ||
439 | store_ctr_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | 451 | store_ctr_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); |
440 | 452 | ||
453 | popq %r15; | ||
441 | popq %r12; | 454 | popq %r12; |
442 | |||
443 | FRAME_END | 455 | FRAME_END |
444 | ret; | 456 | ret; |
445 | ENDPROC(cast6_ctr_8way) | 457 | ENDPROC(cast6_ctr_8way) |
@@ -452,7 +464,9 @@ ENTRY(cast6_xts_enc_8way) | |||
452 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | 464 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) |
453 | */ | 465 | */ |
454 | FRAME_BEGIN | 466 | FRAME_BEGIN |
467 | pushq %r15; | ||
455 | 468 | ||
469 | movq %rdi, CTX | ||
456 | movq %rsi, %r11; | 470 | movq %rsi, %r11; |
457 | 471 | ||
458 | /* regs <= src, dst <= IVs, regs <= regs xor IVs */ | 472 | /* regs <= src, dst <= IVs, regs <= regs xor IVs */ |
@@ -464,6 +478,7 @@ ENTRY(cast6_xts_enc_8way) | |||
464 | /* dst <= regs xor IVs(in dst) */ | 478 | /* dst <= regs xor IVs(in dst) */ |
465 | store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | 479 | store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); |
466 | 480 | ||
481 | popq %r15; | ||
467 | FRAME_END | 482 | FRAME_END |
468 | ret; | 483 | ret; |
469 | ENDPROC(cast6_xts_enc_8way) | 484 | ENDPROC(cast6_xts_enc_8way) |
@@ -476,7 +491,9 @@ ENTRY(cast6_xts_dec_8way) | |||
476 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | 491 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) |
477 | */ | 492 | */ |
478 | FRAME_BEGIN | 493 | FRAME_BEGIN |
494 | pushq %r15; | ||
479 | 495 | ||
496 | movq %rdi, CTX | ||
480 | movq %rsi, %r11; | 497 | movq %rsi, %r11; |
481 | 498 | ||
482 | /* regs <= src, dst <= IVs, regs <= regs xor IVs */ | 499 | /* regs <= src, dst <= IVs, regs <= regs xor IVs */ |
@@ -488,6 +505,7 @@ ENTRY(cast6_xts_dec_8way) | |||
488 | /* dst <= regs xor IVs(in dst) */ | 505 | /* dst <= regs xor IVs(in dst) */ |
489 | store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | 506 | store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); |
490 | 507 | ||
508 | popq %r15; | ||
491 | FRAME_END | 509 | FRAME_END |
492 | ret; | 510 | ret; |
493 | ENDPROC(cast6_xts_dec_8way) | 511 | ENDPROC(cast6_xts_dec_8way) |
diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S index f3e91647ca27..8e49ce117494 100644 --- a/arch/x86/crypto/des3_ede-asm_64.S +++ b/arch/x86/crypto/des3_ede-asm_64.S | |||
@@ -64,12 +64,12 @@ | |||
64 | #define RW2bh %ch | 64 | #define RW2bh %ch |
65 | 65 | ||
66 | #define RT0 %r15 | 66 | #define RT0 %r15 |
67 | #define RT1 %rbp | 67 | #define RT1 %rsi |
68 | #define RT2 %r14 | 68 | #define RT2 %r14 |
69 | #define RT3 %rdx | 69 | #define RT3 %rdx |
70 | 70 | ||
71 | #define RT0d %r15d | 71 | #define RT0d %r15d |
72 | #define RT1d %ebp | 72 | #define RT1d %esi |
73 | #define RT2d %r14d | 73 | #define RT2d %r14d |
74 | #define RT3d %edx | 74 | #define RT3d %edx |
75 | 75 | ||
@@ -177,13 +177,14 @@ ENTRY(des3_ede_x86_64_crypt_blk) | |||
177 | * %rsi: dst | 177 | * %rsi: dst |
178 | * %rdx: src | 178 | * %rdx: src |
179 | */ | 179 | */ |
180 | pushq %rbp; | ||
181 | pushq %rbx; | 180 | pushq %rbx; |
182 | pushq %r12; | 181 | pushq %r12; |
183 | pushq %r13; | 182 | pushq %r13; |
184 | pushq %r14; | 183 | pushq %r14; |
185 | pushq %r15; | 184 | pushq %r15; |
186 | 185 | ||
186 | pushq %rsi; /* dst */ | ||
187 | |||
187 | read_block(%rdx, RL0, RR0); | 188 | read_block(%rdx, RL0, RR0); |
188 | initial_permutation(RL0, RR0); | 189 | initial_permutation(RL0, RR0); |
189 | 190 | ||
@@ -241,6 +242,8 @@ ENTRY(des3_ede_x86_64_crypt_blk) | |||
241 | round1(32+15, RL0, RR0, dummy2); | 242 | round1(32+15, RL0, RR0, dummy2); |
242 | 243 | ||
243 | final_permutation(RR0, RL0); | 244 | final_permutation(RR0, RL0); |
245 | |||
246 | popq %rsi /* dst */ | ||
244 | write_block(%rsi, RR0, RL0); | 247 | write_block(%rsi, RR0, RL0); |
245 | 248 | ||
246 | popq %r15; | 249 | popq %r15; |
@@ -248,7 +251,6 @@ ENTRY(des3_ede_x86_64_crypt_blk) | |||
248 | popq %r13; | 251 | popq %r13; |
249 | popq %r12; | 252 | popq %r12; |
250 | popq %rbx; | 253 | popq %rbx; |
251 | popq %rbp; | ||
252 | 254 | ||
253 | ret; | 255 | ret; |
254 | ENDPROC(des3_ede_x86_64_crypt_blk) | 256 | ENDPROC(des3_ede_x86_64_crypt_blk) |
@@ -432,13 +434,14 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way) | |||
432 | * %rdx: src (3 blocks) | 434 | * %rdx: src (3 blocks) |
433 | */ | 435 | */ |
434 | 436 | ||
435 | pushq %rbp; | ||
436 | pushq %rbx; | 437 | pushq %rbx; |
437 | pushq %r12; | 438 | pushq %r12; |
438 | pushq %r13; | 439 | pushq %r13; |
439 | pushq %r14; | 440 | pushq %r14; |
440 | pushq %r15; | 441 | pushq %r15; |
441 | 442 | ||
443 | pushq %rsi /* dst */ | ||
444 | |||
442 | /* load input */ | 445 | /* load input */ |
443 | movl 0 * 4(%rdx), RL0d; | 446 | movl 0 * 4(%rdx), RL0d; |
444 | movl 1 * 4(%rdx), RR0d; | 447 | movl 1 * 4(%rdx), RR0d; |
@@ -520,6 +523,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way) | |||
520 | bswapl RR2d; | 523 | bswapl RR2d; |
521 | bswapl RL2d; | 524 | bswapl RL2d; |
522 | 525 | ||
526 | popq %rsi /* dst */ | ||
523 | movl RR0d, 0 * 4(%rsi); | 527 | movl RR0d, 0 * 4(%rsi); |
524 | movl RL0d, 1 * 4(%rsi); | 528 | movl RL0d, 1 * 4(%rsi); |
525 | movl RR1d, 2 * 4(%rsi); | 529 | movl RR1d, 2 * 4(%rsi); |
@@ -532,7 +536,6 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way) | |||
532 | popq %r13; | 536 | popq %r13; |
533 | popq %r12; | 537 | popq %r12; |
534 | popq %rbx; | 538 | popq %rbx; |
535 | popq %rbp; | ||
536 | 539 | ||
537 | ret; | 540 | ret; |
538 | ENDPROC(des3_ede_x86_64_crypt_blk_3way) | 541 | ENDPROC(des3_ede_x86_64_crypt_blk_3way) |
diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S index 1eab79c9ac48..9f712a7dfd79 100644 --- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S +++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S | |||
@@ -89,7 +89,7 @@ | |||
89 | #define REG_RE %rdx | 89 | #define REG_RE %rdx |
90 | #define REG_RTA %r12 | 90 | #define REG_RTA %r12 |
91 | #define REG_RTB %rbx | 91 | #define REG_RTB %rbx |
92 | #define REG_T1 %ebp | 92 | #define REG_T1 %r11d |
93 | #define xmm_mov vmovups | 93 | #define xmm_mov vmovups |
94 | #define avx2_zeroupper vzeroupper | 94 | #define avx2_zeroupper vzeroupper |
95 | #define RND_F1 1 | 95 | #define RND_F1 1 |
@@ -637,7 +637,6 @@ _loop3: | |||
637 | ENTRY(\name) | 637 | ENTRY(\name) |
638 | 638 | ||
639 | push %rbx | 639 | push %rbx |
640 | push %rbp | ||
641 | push %r12 | 640 | push %r12 |
642 | push %r13 | 641 | push %r13 |
643 | push %r14 | 642 | push %r14 |
@@ -673,7 +672,6 @@ _loop3: | |||
673 | pop %r14 | 672 | pop %r14 |
674 | pop %r13 | 673 | pop %r13 |
675 | pop %r12 | 674 | pop %r12 |
676 | pop %rbp | ||
677 | pop %rbx | 675 | pop %rbx |
678 | 676 | ||
679 | ret | 677 | ret |
diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S index a4109506a5e8..6204bd53528c 100644 --- a/arch/x86/crypto/sha1_ssse3_asm.S +++ b/arch/x86/crypto/sha1_ssse3_asm.S | |||
@@ -37,7 +37,7 @@ | |||
37 | #define REG_A %ecx | 37 | #define REG_A %ecx |
38 | #define REG_B %esi | 38 | #define REG_B %esi |
39 | #define REG_C %edi | 39 | #define REG_C %edi |
40 | #define REG_D %ebp | 40 | #define REG_D %r12d |
41 | #define REG_E %edx | 41 | #define REG_E %edx |
42 | 42 | ||
43 | #define REG_T1 %eax | 43 | #define REG_T1 %eax |
@@ -74,10 +74,10 @@ | |||
74 | ENTRY(\name) | 74 | ENTRY(\name) |
75 | 75 | ||
76 | push %rbx | 76 | push %rbx |
77 | push %rbp | ||
78 | push %r12 | 77 | push %r12 |
78 | push %rbp | ||
79 | mov %rsp, %rbp | ||
79 | 80 | ||
80 | mov %rsp, %r12 | ||
81 | sub $64, %rsp # allocate workspace | 81 | sub $64, %rsp # allocate workspace |
82 | and $~15, %rsp # align stack | 82 | and $~15, %rsp # align stack |
83 | 83 | ||
@@ -99,10 +99,9 @@ | |||
99 | xor %rax, %rax | 99 | xor %rax, %rax |
100 | rep stosq | 100 | rep stosq |
101 | 101 | ||
102 | mov %r12, %rsp # deallocate workspace | 102 | mov %rbp, %rsp # deallocate workspace |
103 | |||
104 | pop %r12 | ||
105 | pop %rbp | 103 | pop %rbp |
104 | pop %r12 | ||
106 | pop %rbx | 105 | pop %rbx |
107 | ret | 106 | ret |
108 | 107 | ||
diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S index e08888a1a5f2..001bbcf93c79 100644 --- a/arch/x86/crypto/sha256-avx-asm.S +++ b/arch/x86/crypto/sha256-avx-asm.S | |||
@@ -103,7 +103,7 @@ SRND = %rsi # clobbers INP | |||
103 | c = %ecx | 103 | c = %ecx |
104 | d = %r8d | 104 | d = %r8d |
105 | e = %edx | 105 | e = %edx |
106 | TBL = %rbp | 106 | TBL = %r12 |
107 | a = %eax | 107 | a = %eax |
108 | b = %ebx | 108 | b = %ebx |
109 | 109 | ||
@@ -350,13 +350,13 @@ a = TMP_ | |||
350 | ENTRY(sha256_transform_avx) | 350 | ENTRY(sha256_transform_avx) |
351 | .align 32 | 351 | .align 32 |
352 | pushq %rbx | 352 | pushq %rbx |
353 | pushq %rbp | 353 | pushq %r12 |
354 | pushq %r13 | 354 | pushq %r13 |
355 | pushq %r14 | 355 | pushq %r14 |
356 | pushq %r15 | 356 | pushq %r15 |
357 | pushq %r12 | 357 | pushq %rbp |
358 | movq %rsp, %rbp | ||
358 | 359 | ||
359 | mov %rsp, %r12 | ||
360 | subq $STACK_SIZE, %rsp # allocate stack space | 360 | subq $STACK_SIZE, %rsp # allocate stack space |
361 | and $~15, %rsp # align stack pointer | 361 | and $~15, %rsp # align stack pointer |
362 | 362 | ||
@@ -452,13 +452,12 @@ loop2: | |||
452 | 452 | ||
453 | done_hash: | 453 | done_hash: |
454 | 454 | ||
455 | mov %r12, %rsp | 455 | mov %rbp, %rsp |
456 | 456 | popq %rbp | |
457 | popq %r12 | ||
458 | popq %r15 | 457 | popq %r15 |
459 | popq %r14 | 458 | popq %r14 |
460 | popq %r13 | 459 | popq %r13 |
461 | popq %rbp | 460 | popq %r12 |
462 | popq %rbx | 461 | popq %rbx |
463 | ret | 462 | ret |
464 | ENDPROC(sha256_transform_avx) | 463 | ENDPROC(sha256_transform_avx) |
diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S index 89c8f09787d2..1420db15dcdd 100644 --- a/arch/x86/crypto/sha256-avx2-asm.S +++ b/arch/x86/crypto/sha256-avx2-asm.S | |||
@@ -98,8 +98,6 @@ d = %r8d | |||
98 | e = %edx # clobbers NUM_BLKS | 98 | e = %edx # clobbers NUM_BLKS |
99 | y3 = %esi # clobbers INP | 99 | y3 = %esi # clobbers INP |
100 | 100 | ||
101 | |||
102 | TBL = %rbp | ||
103 | SRND = CTX # SRND is same register as CTX | 101 | SRND = CTX # SRND is same register as CTX |
104 | 102 | ||
105 | a = %eax | 103 | a = %eax |
@@ -531,7 +529,6 @@ STACK_SIZE = _RSP + _RSP_SIZE | |||
531 | ENTRY(sha256_transform_rorx) | 529 | ENTRY(sha256_transform_rorx) |
532 | .align 32 | 530 | .align 32 |
533 | pushq %rbx | 531 | pushq %rbx |
534 | pushq %rbp | ||
535 | pushq %r12 | 532 | pushq %r12 |
536 | pushq %r13 | 533 | pushq %r13 |
537 | pushq %r14 | 534 | pushq %r14 |
@@ -568,8 +565,6 @@ ENTRY(sha256_transform_rorx) | |||
568 | mov CTX, _CTX(%rsp) | 565 | mov CTX, _CTX(%rsp) |
569 | 566 | ||
570 | loop0: | 567 | loop0: |
571 | lea K256(%rip), TBL | ||
572 | |||
573 | ## Load first 16 dwords from two blocks | 568 | ## Load first 16 dwords from two blocks |
574 | VMOVDQ 0*32(INP),XTMP0 | 569 | VMOVDQ 0*32(INP),XTMP0 |
575 | VMOVDQ 1*32(INP),XTMP1 | 570 | VMOVDQ 1*32(INP),XTMP1 |
@@ -597,19 +592,19 @@ last_block_enter: | |||
597 | 592 | ||
598 | .align 16 | 593 | .align 16 |
599 | loop1: | 594 | loop1: |
600 | vpaddd 0*32(TBL, SRND), X0, XFER | 595 | vpaddd K256+0*32(SRND), X0, XFER |
601 | vmovdqa XFER, 0*32+_XFER(%rsp, SRND) | 596 | vmovdqa XFER, 0*32+_XFER(%rsp, SRND) |
602 | FOUR_ROUNDS_AND_SCHED _XFER + 0*32 | 597 | FOUR_ROUNDS_AND_SCHED _XFER + 0*32 |
603 | 598 | ||
604 | vpaddd 1*32(TBL, SRND), X0, XFER | 599 | vpaddd K256+1*32(SRND), X0, XFER |
605 | vmovdqa XFER, 1*32+_XFER(%rsp, SRND) | 600 | vmovdqa XFER, 1*32+_XFER(%rsp, SRND) |
606 | FOUR_ROUNDS_AND_SCHED _XFER + 1*32 | 601 | FOUR_ROUNDS_AND_SCHED _XFER + 1*32 |
607 | 602 | ||
608 | vpaddd 2*32(TBL, SRND), X0, XFER | 603 | vpaddd K256+2*32(SRND), X0, XFER |
609 | vmovdqa XFER, 2*32+_XFER(%rsp, SRND) | 604 | vmovdqa XFER, 2*32+_XFER(%rsp, SRND) |
610 | FOUR_ROUNDS_AND_SCHED _XFER + 2*32 | 605 | FOUR_ROUNDS_AND_SCHED _XFER + 2*32 |
611 | 606 | ||
612 | vpaddd 3*32(TBL, SRND), X0, XFER | 607 | vpaddd K256+3*32(SRND), X0, XFER |
613 | vmovdqa XFER, 3*32+_XFER(%rsp, SRND) | 608 | vmovdqa XFER, 3*32+_XFER(%rsp, SRND) |
614 | FOUR_ROUNDS_AND_SCHED _XFER + 3*32 | 609 | FOUR_ROUNDS_AND_SCHED _XFER + 3*32 |
615 | 610 | ||
@@ -619,10 +614,11 @@ loop1: | |||
619 | 614 | ||
620 | loop2: | 615 | loop2: |
621 | ## Do last 16 rounds with no scheduling | 616 | ## Do last 16 rounds with no scheduling |
622 | vpaddd 0*32(TBL, SRND), X0, XFER | 617 | vpaddd K256+0*32(SRND), X0, XFER |
623 | vmovdqa XFER, 0*32+_XFER(%rsp, SRND) | 618 | vmovdqa XFER, 0*32+_XFER(%rsp, SRND) |
624 | DO_4ROUNDS _XFER + 0*32 | 619 | DO_4ROUNDS _XFER + 0*32 |
625 | vpaddd 1*32(TBL, SRND), X1, XFER | 620 | |
621 | vpaddd K256+1*32(SRND), X1, XFER | ||
626 | vmovdqa XFER, 1*32+_XFER(%rsp, SRND) | 622 | vmovdqa XFER, 1*32+_XFER(%rsp, SRND) |
627 | DO_4ROUNDS _XFER + 1*32 | 623 | DO_4ROUNDS _XFER + 1*32 |
628 | add $2*32, SRND | 624 | add $2*32, SRND |
@@ -676,9 +672,6 @@ loop3: | |||
676 | ja done_hash | 672 | ja done_hash |
677 | 673 | ||
678 | do_last_block: | 674 | do_last_block: |
679 | #### do last block | ||
680 | lea K256(%rip), TBL | ||
681 | |||
682 | VMOVDQ 0*16(INP),XWORD0 | 675 | VMOVDQ 0*16(INP),XWORD0 |
683 | VMOVDQ 1*16(INP),XWORD1 | 676 | VMOVDQ 1*16(INP),XWORD1 |
684 | VMOVDQ 2*16(INP),XWORD2 | 677 | VMOVDQ 2*16(INP),XWORD2 |
@@ -718,7 +711,6 @@ done_hash: | |||
718 | popq %r14 | 711 | popq %r14 |
719 | popq %r13 | 712 | popq %r13 |
720 | popq %r12 | 713 | popq %r12 |
721 | popq %rbp | ||
722 | popq %rbx | 714 | popq %rbx |
723 | ret | 715 | ret |
724 | ENDPROC(sha256_transform_rorx) | 716 | ENDPROC(sha256_transform_rorx) |
diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S index 39b83c93e7fd..c6c05ed2c16a 100644 --- a/arch/x86/crypto/sha256-ssse3-asm.S +++ b/arch/x86/crypto/sha256-ssse3-asm.S | |||
@@ -95,7 +95,7 @@ SRND = %rsi # clobbers INP | |||
95 | c = %ecx | 95 | c = %ecx |
96 | d = %r8d | 96 | d = %r8d |
97 | e = %edx | 97 | e = %edx |
98 | TBL = %rbp | 98 | TBL = %r12 |
99 | a = %eax | 99 | a = %eax |
100 | b = %ebx | 100 | b = %ebx |
101 | 101 | ||
@@ -356,13 +356,13 @@ a = TMP_ | |||
356 | ENTRY(sha256_transform_ssse3) | 356 | ENTRY(sha256_transform_ssse3) |
357 | .align 32 | 357 | .align 32 |
358 | pushq %rbx | 358 | pushq %rbx |
359 | pushq %rbp | 359 | pushq %r12 |
360 | pushq %r13 | 360 | pushq %r13 |
361 | pushq %r14 | 361 | pushq %r14 |
362 | pushq %r15 | 362 | pushq %r15 |
363 | pushq %r12 | 363 | pushq %rbp |
364 | mov %rsp, %rbp | ||
364 | 365 | ||
365 | mov %rsp, %r12 | ||
366 | subq $STACK_SIZE, %rsp | 366 | subq $STACK_SIZE, %rsp |
367 | and $~15, %rsp | 367 | and $~15, %rsp |
368 | 368 | ||
@@ -462,13 +462,12 @@ loop2: | |||
462 | 462 | ||
463 | done_hash: | 463 | done_hash: |
464 | 464 | ||
465 | mov %r12, %rsp | 465 | mov %rbp, %rsp |
466 | 466 | popq %rbp | |
467 | popq %r12 | ||
468 | popq %r15 | 467 | popq %r15 |
469 | popq %r14 | 468 | popq %r14 |
470 | popq %r13 | 469 | popq %r13 |
471 | popq %rbp | 470 | popq %r12 |
472 | popq %rbx | 471 | popq %rbx |
473 | 472 | ||
474 | ret | 473 | ret |
diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S index 7f5f6c6ec72e..b16d56005162 100644 --- a/arch/x86/crypto/sha512-avx2-asm.S +++ b/arch/x86/crypto/sha512-avx2-asm.S | |||
@@ -69,8 +69,9 @@ XFER = YTMP0 | |||
69 | 69 | ||
70 | BYTE_FLIP_MASK = %ymm9 | 70 | BYTE_FLIP_MASK = %ymm9 |
71 | 71 | ||
72 | # 1st arg | 72 | # 1st arg is %rdi, which is saved to the stack and accessed later via %r12 |
73 | CTX = %rdi | 73 | CTX1 = %rdi |
74 | CTX2 = %r12 | ||
74 | # 2nd arg | 75 | # 2nd arg |
75 | INP = %rsi | 76 | INP = %rsi |
76 | # 3rd arg | 77 | # 3rd arg |
@@ -81,7 +82,7 @@ d = %r8 | |||
81 | e = %rdx | 82 | e = %rdx |
82 | y3 = %rsi | 83 | y3 = %rsi |
83 | 84 | ||
84 | TBL = %rbp | 85 | TBL = %rdi # clobbers CTX1 |
85 | 86 | ||
86 | a = %rax | 87 | a = %rax |
87 | b = %rbx | 88 | b = %rbx |
@@ -91,26 +92,26 @@ g = %r10 | |||
91 | h = %r11 | 92 | h = %r11 |
92 | old_h = %r11 | 93 | old_h = %r11 |
93 | 94 | ||
94 | T1 = %r12 | 95 | T1 = %r12 # clobbers CTX2 |
95 | y0 = %r13 | 96 | y0 = %r13 |
96 | y1 = %r14 | 97 | y1 = %r14 |
97 | y2 = %r15 | 98 | y2 = %r15 |
98 | 99 | ||
99 | y4 = %r12 | ||
100 | |||
101 | # Local variables (stack frame) | 100 | # Local variables (stack frame) |
102 | XFER_SIZE = 4*8 | 101 | XFER_SIZE = 4*8 |
103 | SRND_SIZE = 1*8 | 102 | SRND_SIZE = 1*8 |
104 | INP_SIZE = 1*8 | 103 | INP_SIZE = 1*8 |
105 | INPEND_SIZE = 1*8 | 104 | INPEND_SIZE = 1*8 |
105 | CTX_SIZE = 1*8 | ||
106 | RSPSAVE_SIZE = 1*8 | 106 | RSPSAVE_SIZE = 1*8 |
107 | GPRSAVE_SIZE = 6*8 | 107 | GPRSAVE_SIZE = 5*8 |
108 | 108 | ||
109 | frame_XFER = 0 | 109 | frame_XFER = 0 |
110 | frame_SRND = frame_XFER + XFER_SIZE | 110 | frame_SRND = frame_XFER + XFER_SIZE |
111 | frame_INP = frame_SRND + SRND_SIZE | 111 | frame_INP = frame_SRND + SRND_SIZE |
112 | frame_INPEND = frame_INP + INP_SIZE | 112 | frame_INPEND = frame_INP + INP_SIZE |
113 | frame_RSPSAVE = frame_INPEND + INPEND_SIZE | 113 | frame_CTX = frame_INPEND + INPEND_SIZE |
114 | frame_RSPSAVE = frame_CTX + CTX_SIZE | ||
114 | frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE | 115 | frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE |
115 | frame_size = frame_GPRSAVE + GPRSAVE_SIZE | 116 | frame_size = frame_GPRSAVE + GPRSAVE_SIZE |
116 | 117 | ||
@@ -576,12 +577,11 @@ ENTRY(sha512_transform_rorx) | |||
576 | mov %rax, frame_RSPSAVE(%rsp) | 577 | mov %rax, frame_RSPSAVE(%rsp) |
577 | 578 | ||
578 | # Save GPRs | 579 | # Save GPRs |
579 | mov %rbp, frame_GPRSAVE(%rsp) | 580 | mov %rbx, 8*0+frame_GPRSAVE(%rsp) |
580 | mov %rbx, 8*1+frame_GPRSAVE(%rsp) | 581 | mov %r12, 8*1+frame_GPRSAVE(%rsp) |
581 | mov %r12, 8*2+frame_GPRSAVE(%rsp) | 582 | mov %r13, 8*2+frame_GPRSAVE(%rsp) |
582 | mov %r13, 8*3+frame_GPRSAVE(%rsp) | 583 | mov %r14, 8*3+frame_GPRSAVE(%rsp) |
583 | mov %r14, 8*4+frame_GPRSAVE(%rsp) | 584 | mov %r15, 8*4+frame_GPRSAVE(%rsp) |
584 | mov %r15, 8*5+frame_GPRSAVE(%rsp) | ||
585 | 585 | ||
586 | shl $7, NUM_BLKS # convert to bytes | 586 | shl $7, NUM_BLKS # convert to bytes |
587 | jz done_hash | 587 | jz done_hash |
@@ -589,14 +589,17 @@ ENTRY(sha512_transform_rorx) | |||
589 | mov NUM_BLKS, frame_INPEND(%rsp) | 589 | mov NUM_BLKS, frame_INPEND(%rsp) |
590 | 590 | ||
591 | ## load initial digest | 591 | ## load initial digest |
592 | mov 8*0(CTX),a | 592 | mov 8*0(CTX1), a |
593 | mov 8*1(CTX),b | 593 | mov 8*1(CTX1), b |
594 | mov 8*2(CTX),c | 594 | mov 8*2(CTX1), c |
595 | mov 8*3(CTX),d | 595 | mov 8*3(CTX1), d |
596 | mov 8*4(CTX),e | 596 | mov 8*4(CTX1), e |
597 | mov 8*5(CTX),f | 597 | mov 8*5(CTX1), f |
598 | mov 8*6(CTX),g | 598 | mov 8*6(CTX1), g |
599 | mov 8*7(CTX),h | 599 | mov 8*7(CTX1), h |
600 | |||
601 | # save %rdi (CTX) before it gets clobbered | ||
602 | mov %rdi, frame_CTX(%rsp) | ||
600 | 603 | ||
601 | vmovdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK | 604 | vmovdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK |
602 | 605 | ||
@@ -652,14 +655,15 @@ loop2: | |||
652 | subq $1, frame_SRND(%rsp) | 655 | subq $1, frame_SRND(%rsp) |
653 | jne loop2 | 656 | jne loop2 |
654 | 657 | ||
655 | addm 8*0(CTX),a | 658 | mov frame_CTX(%rsp), CTX2 |
656 | addm 8*1(CTX),b | 659 | addm 8*0(CTX2), a |
657 | addm 8*2(CTX),c | 660 | addm 8*1(CTX2), b |
658 | addm 8*3(CTX),d | 661 | addm 8*2(CTX2), c |
659 | addm 8*4(CTX),e | 662 | addm 8*3(CTX2), d |
660 | addm 8*5(CTX),f | 663 | addm 8*4(CTX2), e |
661 | addm 8*6(CTX),g | 664 | addm 8*5(CTX2), f |
662 | addm 8*7(CTX),h | 665 | addm 8*6(CTX2), g |
666 | addm 8*7(CTX2), h | ||
663 | 667 | ||
664 | mov frame_INP(%rsp), INP | 668 | mov frame_INP(%rsp), INP |
665 | add $128, INP | 669 | add $128, INP |
@@ -669,12 +673,11 @@ loop2: | |||
669 | done_hash: | 673 | done_hash: |
670 | 674 | ||
671 | # Restore GPRs | 675 | # Restore GPRs |
672 | mov frame_GPRSAVE(%rsp) ,%rbp | 676 | mov 8*0+frame_GPRSAVE(%rsp), %rbx |
673 | mov 8*1+frame_GPRSAVE(%rsp) ,%rbx | 677 | mov 8*1+frame_GPRSAVE(%rsp), %r12 |
674 | mov 8*2+frame_GPRSAVE(%rsp) ,%r12 | 678 | mov 8*2+frame_GPRSAVE(%rsp), %r13 |
675 | mov 8*3+frame_GPRSAVE(%rsp) ,%r13 | 679 | mov 8*3+frame_GPRSAVE(%rsp), %r14 |
676 | mov 8*4+frame_GPRSAVE(%rsp) ,%r14 | 680 | mov 8*4+frame_GPRSAVE(%rsp), %r15 |
677 | mov 8*5+frame_GPRSAVE(%rsp) ,%r15 | ||
678 | 681 | ||
679 | # Restore Stack Pointer | 682 | # Restore Stack Pointer |
680 | mov frame_RSPSAVE(%rsp), %rsp | 683 | mov frame_RSPSAVE(%rsp), %rsp |
diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S index b3f49d286348..73b471da3622 100644 --- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S | |||
@@ -76,8 +76,8 @@ | |||
76 | #define RT %xmm14 | 76 | #define RT %xmm14 |
77 | #define RR %xmm15 | 77 | #define RR %xmm15 |
78 | 78 | ||
79 | #define RID1 %rbp | 79 | #define RID1 %r13 |
80 | #define RID1d %ebp | 80 | #define RID1d %r13d |
81 | #define RID2 %rsi | 81 | #define RID2 %rsi |
82 | #define RID2d %esi | 82 | #define RID2d %esi |
83 | 83 | ||
@@ -259,7 +259,7 @@ __twofish_enc_blk8: | |||
259 | 259 | ||
260 | vmovdqu w(CTX), RK1; | 260 | vmovdqu w(CTX), RK1; |
261 | 261 | ||
262 | pushq %rbp; | 262 | pushq %r13; |
263 | pushq %rbx; | 263 | pushq %rbx; |
264 | pushq %rcx; | 264 | pushq %rcx; |
265 | 265 | ||
@@ -282,7 +282,7 @@ __twofish_enc_blk8: | |||
282 | 282 | ||
283 | popq %rcx; | 283 | popq %rcx; |
284 | popq %rbx; | 284 | popq %rbx; |
285 | popq %rbp; | 285 | popq %r13; |
286 | 286 | ||
287 | outunpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); | 287 | outunpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); |
288 | outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); | 288 | outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); |
@@ -301,7 +301,7 @@ __twofish_dec_blk8: | |||
301 | 301 | ||
302 | vmovdqu (w+4*4)(CTX), RK1; | 302 | vmovdqu (w+4*4)(CTX), RK1; |
303 | 303 | ||
304 | pushq %rbp; | 304 | pushq %r13; |
305 | pushq %rbx; | 305 | pushq %rbx; |
306 | 306 | ||
307 | inpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); | 307 | inpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); |
@@ -322,7 +322,7 @@ __twofish_dec_blk8: | |||
322 | vmovdqu (w)(CTX), RK1; | 322 | vmovdqu (w)(CTX), RK1; |
323 | 323 | ||
324 | popq %rbx; | 324 | popq %rbx; |
325 | popq %rbp; | 325 | popq %r13; |
326 | 326 | ||
327 | outunpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2); | 327 | outunpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2); |
328 | outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2); | 328 | outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2); |
diff --git a/crypto/af_alg.c b/crypto/af_alg.c index ffa9f4ccd9b4..337cf382718e 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c | |||
@@ -619,14 +619,14 @@ void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst, | |||
619 | struct af_alg_ctx *ctx = ask->private; | 619 | struct af_alg_ctx *ctx = ask->private; |
620 | struct af_alg_tsgl *sgl; | 620 | struct af_alg_tsgl *sgl; |
621 | struct scatterlist *sg; | 621 | struct scatterlist *sg; |
622 | unsigned int i, j; | 622 | unsigned int i, j = 0; |
623 | 623 | ||
624 | while (!list_empty(&ctx->tsgl_list)) { | 624 | while (!list_empty(&ctx->tsgl_list)) { |
625 | sgl = list_first_entry(&ctx->tsgl_list, struct af_alg_tsgl, | 625 | sgl = list_first_entry(&ctx->tsgl_list, struct af_alg_tsgl, |
626 | list); | 626 | list); |
627 | sg = sgl->sg; | 627 | sg = sgl->sg; |
628 | 628 | ||
629 | for (i = 0, j = 0; i < sgl->cur; i++) { | 629 | for (i = 0; i < sgl->cur; i++) { |
630 | size_t plen = min_t(size_t, used, sg[i].length); | 630 | size_t plen = min_t(size_t, used, sg[i].length); |
631 | struct page *page = sg_page(sg + i); | 631 | struct page *page = sg_page(sg + i); |
632 | 632 | ||
diff --git a/crypto/drbg.c b/crypto/drbg.c index 633a88e93ab0..70018397e59a 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c | |||
@@ -1133,10 +1133,10 @@ static inline void drbg_dealloc_state(struct drbg_state *drbg) | |||
1133 | { | 1133 | { |
1134 | if (!drbg) | 1134 | if (!drbg) |
1135 | return; | 1135 | return; |
1136 | kzfree(drbg->V); | 1136 | kzfree(drbg->Vbuf); |
1137 | drbg->Vbuf = NULL; | 1137 | drbg->V = NULL; |
1138 | kzfree(drbg->C); | 1138 | kzfree(drbg->Cbuf); |
1139 | drbg->Cbuf = NULL; | 1139 | drbg->C = NULL; |
1140 | kzfree(drbg->scratchpadbuf); | 1140 | kzfree(drbg->scratchpadbuf); |
1141 | drbg->scratchpadbuf = NULL; | 1141 | drbg->scratchpadbuf = NULL; |
1142 | drbg->reseed_ctr = 0; | 1142 | drbg->reseed_ctr = 0; |
diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig index e36aeacd7635..1eb852765469 100644 --- a/drivers/crypto/caam/Kconfig +++ b/drivers/crypto/caam/Kconfig | |||
@@ -1,6 +1,7 @@ | |||
1 | config CRYPTO_DEV_FSL_CAAM | 1 | config CRYPTO_DEV_FSL_CAAM |
2 | tristate "Freescale CAAM-Multicore driver backend" | 2 | tristate "Freescale CAAM-Multicore driver backend" |
3 | depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE | 3 | depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE |
4 | select SOC_BUS | ||
4 | help | 5 | help |
5 | Enables the driver module for Freescale's Cryptographic Accelerator | 6 | Enables the driver module for Freescale's Cryptographic Accelerator |
6 | and Assurance Module (CAAM), also known as the SEC version 4 (SEC4). | 7 | and Assurance Module (CAAM), also known as the SEC version 4 (SEC4). |
@@ -141,10 +142,6 @@ config CRYPTO_DEV_FSL_CAAM_RNG_API | |||
141 | To compile this as a module, choose M here: the module | 142 | To compile this as a module, choose M here: the module |
142 | will be called caamrng. | 143 | will be called caamrng. |
143 | 144 | ||
144 | config CRYPTO_DEV_FSL_CAAM_IMX | ||
145 | def_bool SOC_IMX6 || SOC_IMX7D | ||
146 | depends on CRYPTO_DEV_FSL_CAAM | ||
147 | |||
148 | config CRYPTO_DEV_FSL_CAAM_DEBUG | 145 | config CRYPTO_DEV_FSL_CAAM_DEBUG |
149 | bool "Enable debug output in CAAM driver" | 146 | bool "Enable debug output in CAAM driver" |
150 | depends on CRYPTO_DEV_FSL_CAAM | 147 | depends on CRYPTO_DEV_FSL_CAAM |
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index dacb53fb690e..027e121c6f70 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/device.h> | 7 | #include <linux/device.h> |
8 | #include <linux/of_address.h> | 8 | #include <linux/of_address.h> |
9 | #include <linux/of_irq.h> | 9 | #include <linux/of_irq.h> |
10 | #include <linux/sys_soc.h> | ||
10 | 11 | ||
11 | #include "compat.h" | 12 | #include "compat.h" |
12 | #include "regs.h" | 13 | #include "regs.h" |
@@ -19,6 +20,8 @@ bool caam_little_end; | |||
19 | EXPORT_SYMBOL(caam_little_end); | 20 | EXPORT_SYMBOL(caam_little_end); |
20 | bool caam_dpaa2; | 21 | bool caam_dpaa2; |
21 | EXPORT_SYMBOL(caam_dpaa2); | 22 | EXPORT_SYMBOL(caam_dpaa2); |
23 | bool caam_imx; | ||
24 | EXPORT_SYMBOL(caam_imx); | ||
22 | 25 | ||
23 | #ifdef CONFIG_CAAM_QI | 26 | #ifdef CONFIG_CAAM_QI |
24 | #include "qi.h" | 27 | #include "qi.h" |
@@ -28,19 +31,11 @@ EXPORT_SYMBOL(caam_dpaa2); | |||
28 | * i.MX targets tend to have clock control subsystems that can | 31 | * i.MX targets tend to have clock control subsystems that can |
29 | * enable/disable clocking to our device. | 32 | * enable/disable clocking to our device. |
30 | */ | 33 | */ |
31 | #ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX | ||
32 | static inline struct clk *caam_drv_identify_clk(struct device *dev, | 34 | static inline struct clk *caam_drv_identify_clk(struct device *dev, |
33 | char *clk_name) | 35 | char *clk_name) |
34 | { | 36 | { |
35 | return devm_clk_get(dev, clk_name); | 37 | return caam_imx ? devm_clk_get(dev, clk_name) : NULL; |
36 | } | 38 | } |
37 | #else | ||
38 | static inline struct clk *caam_drv_identify_clk(struct device *dev, | ||
39 | char *clk_name) | ||
40 | { | ||
41 | return NULL; | ||
42 | } | ||
43 | #endif | ||
44 | 39 | ||
45 | /* | 40 | /* |
46 | * Descriptor to instantiate RNG State Handle 0 in normal mode and | 41 | * Descriptor to instantiate RNG State Handle 0 in normal mode and |
@@ -430,6 +425,10 @@ static int caam_probe(struct platform_device *pdev) | |||
430 | { | 425 | { |
431 | int ret, ring, gen_sk, ent_delay = RTSDCTL_ENT_DLY_MIN; | 426 | int ret, ring, gen_sk, ent_delay = RTSDCTL_ENT_DLY_MIN; |
432 | u64 caam_id; | 427 | u64 caam_id; |
428 | static const struct soc_device_attribute imx_soc[] = { | ||
429 | {.family = "Freescale i.MX"}, | ||
430 | {}, | ||
431 | }; | ||
433 | struct device *dev; | 432 | struct device *dev; |
434 | struct device_node *nprop, *np; | 433 | struct device_node *nprop, *np; |
435 | struct caam_ctrl __iomem *ctrl; | 434 | struct caam_ctrl __iomem *ctrl; |
@@ -451,6 +450,8 @@ static int caam_probe(struct platform_device *pdev) | |||
451 | dev_set_drvdata(dev, ctrlpriv); | 450 | dev_set_drvdata(dev, ctrlpriv); |
452 | nprop = pdev->dev.of_node; | 451 | nprop = pdev->dev.of_node; |
453 | 452 | ||
453 | caam_imx = (bool)soc_device_match(imx_soc); | ||
454 | |||
454 | /* Enable clocking */ | 455 | /* Enable clocking */ |
455 | clk = caam_drv_identify_clk(&pdev->dev, "ipg"); | 456 | clk = caam_drv_identify_clk(&pdev->dev, "ipg"); |
456 | if (IS_ERR(clk)) { | 457 | if (IS_ERR(clk)) { |
diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h index 2b5efff9ec3c..17cfd23a38fa 100644 --- a/drivers/crypto/caam/regs.h +++ b/drivers/crypto/caam/regs.h | |||
@@ -67,6 +67,7 @@ | |||
67 | */ | 67 | */ |
68 | 68 | ||
69 | extern bool caam_little_end; | 69 | extern bool caam_little_end; |
70 | extern bool caam_imx; | ||
70 | 71 | ||
71 | #define caam_to_cpu(len) \ | 72 | #define caam_to_cpu(len) \ |
72 | static inline u##len caam##len ## _to_cpu(u##len val) \ | 73 | static inline u##len caam##len ## _to_cpu(u##len val) \ |
@@ -154,13 +155,10 @@ static inline u64 rd_reg64(void __iomem *reg) | |||
154 | #else /* CONFIG_64BIT */ | 155 | #else /* CONFIG_64BIT */ |
155 | static inline void wr_reg64(void __iomem *reg, u64 data) | 156 | static inline void wr_reg64(void __iomem *reg, u64 data) |
156 | { | 157 | { |
157 | #ifndef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX | 158 | if (!caam_imx && caam_little_end) { |
158 | if (caam_little_end) { | ||
159 | wr_reg32((u32 __iomem *)(reg) + 1, data >> 32); | 159 | wr_reg32((u32 __iomem *)(reg) + 1, data >> 32); |
160 | wr_reg32((u32 __iomem *)(reg), data); | 160 | wr_reg32((u32 __iomem *)(reg), data); |
161 | } else | 161 | } else { |
162 | #endif | ||
163 | { | ||
164 | wr_reg32((u32 __iomem *)(reg), data >> 32); | 162 | wr_reg32((u32 __iomem *)(reg), data >> 32); |
165 | wr_reg32((u32 __iomem *)(reg) + 1, data); | 163 | wr_reg32((u32 __iomem *)(reg) + 1, data); |
166 | } | 164 | } |
@@ -168,41 +166,40 @@ static inline void wr_reg64(void __iomem *reg, u64 data) | |||
168 | 166 | ||
169 | static inline u64 rd_reg64(void __iomem *reg) | 167 | static inline u64 rd_reg64(void __iomem *reg) |
170 | { | 168 | { |
171 | #ifndef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX | 169 | if (!caam_imx && caam_little_end) |
172 | if (caam_little_end) | ||
173 | return ((u64)rd_reg32((u32 __iomem *)(reg) + 1) << 32 | | 170 | return ((u64)rd_reg32((u32 __iomem *)(reg) + 1) << 32 | |
174 | (u64)rd_reg32((u32 __iomem *)(reg))); | 171 | (u64)rd_reg32((u32 __iomem *)(reg))); |
175 | else | 172 | |
176 | #endif | 173 | return ((u64)rd_reg32((u32 __iomem *)(reg)) << 32 | |
177 | return ((u64)rd_reg32((u32 __iomem *)(reg)) << 32 | | 174 | (u64)rd_reg32((u32 __iomem *)(reg) + 1)); |
178 | (u64)rd_reg32((u32 __iomem *)(reg) + 1)); | ||
179 | } | 175 | } |
180 | #endif /* CONFIG_64BIT */ | 176 | #endif /* CONFIG_64BIT */ |
181 | 177 | ||
178 | static inline u64 cpu_to_caam_dma64(dma_addr_t value) | ||
179 | { | ||
180 | if (caam_imx) | ||
181 | return (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | | ||
182 | (u64)cpu_to_caam32(upper_32_bits(value))); | ||
183 | |||
184 | return cpu_to_caam64(value); | ||
185 | } | ||
186 | |||
187 | static inline u64 caam_dma64_to_cpu(u64 value) | ||
188 | { | ||
189 | if (caam_imx) | ||
190 | return (((u64)caam32_to_cpu(lower_32_bits(value)) << 32) | | ||
191 | (u64)caam32_to_cpu(upper_32_bits(value))); | ||
192 | |||
193 | return caam64_to_cpu(value); | ||
194 | } | ||
195 | |||
182 | #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT | 196 | #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT |
183 | #ifdef CONFIG_SOC_IMX7D | 197 | #define cpu_to_caam_dma(value) cpu_to_caam_dma64(value) |
184 | #define cpu_to_caam_dma(value) \ | 198 | #define caam_dma_to_cpu(value) caam_dma64_to_cpu(value) |
185 | (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | \ | ||
186 | (u64)cpu_to_caam32(upper_32_bits(value))) | ||
187 | #define caam_dma_to_cpu(value) \ | ||
188 | (((u64)caam32_to_cpu(lower_32_bits(value)) << 32) | \ | ||
189 | (u64)caam32_to_cpu(upper_32_bits(value))) | ||
190 | #else | ||
191 | #define cpu_to_caam_dma(value) cpu_to_caam64(value) | ||
192 | #define caam_dma_to_cpu(value) caam64_to_cpu(value) | ||
193 | #endif /* CONFIG_SOC_IMX7D */ | ||
194 | #else | 199 | #else |
195 | #define cpu_to_caam_dma(value) cpu_to_caam32(value) | 200 | #define cpu_to_caam_dma(value) cpu_to_caam32(value) |
196 | #define caam_dma_to_cpu(value) caam32_to_cpu(value) | 201 | #define caam_dma_to_cpu(value) caam32_to_cpu(value) |
197 | #endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT */ | 202 | #endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT */ |
198 | |||
199 | #ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX | ||
200 | #define cpu_to_caam_dma64(value) \ | ||
201 | (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | \ | ||
202 | (u64)cpu_to_caam32(upper_32_bits(value))) | ||
203 | #else | ||
204 | #define cpu_to_caam_dma64(value) cpu_to_caam64(value) | ||
205 | #endif | ||
206 | 203 | ||
207 | /* | 204 | /* |
208 | * jr_outentry | 205 | * jr_outentry |
diff --git a/drivers/crypto/inside-secure/safexcel_cipher.c b/drivers/crypto/inside-secure/safexcel_cipher.c index d2207ac5ba19..5438552bc6d7 100644 --- a/drivers/crypto/inside-secure/safexcel_cipher.c +++ b/drivers/crypto/inside-secure/safexcel_cipher.c | |||
@@ -386,7 +386,7 @@ static int safexcel_cipher_exit_inv(struct crypto_tfm *tfm) | |||
386 | struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm); | 386 | struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm); |
387 | struct safexcel_crypto_priv *priv = ctx->priv; | 387 | struct safexcel_crypto_priv *priv = ctx->priv; |
388 | struct skcipher_request req; | 388 | struct skcipher_request req; |
389 | struct safexcel_inv_result result = { 0 }; | 389 | struct safexcel_inv_result result = {}; |
390 | int ring = ctx->base.ring; | 390 | int ring = ctx->base.ring; |
391 | 391 | ||
392 | memset(&req, 0, sizeof(struct skcipher_request)); | 392 | memset(&req, 0, sizeof(struct skcipher_request)); |
diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c index 3f819399cd95..3980f946874f 100644 --- a/drivers/crypto/inside-secure/safexcel_hash.c +++ b/drivers/crypto/inside-secure/safexcel_hash.c | |||
@@ -419,7 +419,7 @@ static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm) | |||
419 | struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm); | 419 | struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm); |
420 | struct safexcel_crypto_priv *priv = ctx->priv; | 420 | struct safexcel_crypto_priv *priv = ctx->priv; |
421 | struct ahash_request req; | 421 | struct ahash_request req; |
422 | struct safexcel_inv_result result = { 0 }; | 422 | struct safexcel_inv_result result = {}; |
423 | int ring = ctx->base.ring; | 423 | int ring = ctx->base.ring; |
424 | 424 | ||
425 | memset(&req, 0, sizeof(struct ahash_request)); | 425 | memset(&req, 0, sizeof(struct ahash_request)); |
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 79791c690858..dff88838dce7 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c | |||
@@ -1756,9 +1756,9 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, | |||
1756 | req_ctx->swinit = 0; | 1756 | req_ctx->swinit = 0; |
1757 | } else { | 1757 | } else { |
1758 | desc->ptr[1] = zero_entry; | 1758 | desc->ptr[1] = zero_entry; |
1759 | /* Indicate next op is not the first. */ | ||
1760 | req_ctx->first = 0; | ||
1761 | } | 1759 | } |
1760 | /* Indicate next op is not the first. */ | ||
1761 | req_ctx->first = 0; | ||
1762 | 1762 | ||
1763 | /* HMAC key */ | 1763 | /* HMAC key */ |
1764 | if (ctx->keylen) | 1764 | if (ctx->keylen) |
@@ -1769,7 +1769,7 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, | |||
1769 | 1769 | ||
1770 | sg_count = edesc->src_nents ?: 1; | 1770 | sg_count = edesc->src_nents ?: 1; |
1771 | if (is_sec1 && sg_count > 1) | 1771 | if (is_sec1 && sg_count > 1) |
1772 | sg_copy_to_buffer(areq->src, sg_count, edesc->buf, length); | 1772 | sg_copy_to_buffer(req_ctx->psrc, sg_count, edesc->buf, length); |
1773 | else | 1773 | else |
1774 | sg_count = dma_map_sg(dev, req_ctx->psrc, sg_count, | 1774 | sg_count = dma_map_sg(dev, req_ctx->psrc, sg_count, |
1775 | DMA_TO_DEVICE); | 1775 | DMA_TO_DEVICE); |
@@ -3057,7 +3057,8 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev, | |||
3057 | t_alg->algt.alg.hash.final = ahash_final; | 3057 | t_alg->algt.alg.hash.final = ahash_final; |
3058 | t_alg->algt.alg.hash.finup = ahash_finup; | 3058 | t_alg->algt.alg.hash.finup = ahash_finup; |
3059 | t_alg->algt.alg.hash.digest = ahash_digest; | 3059 | t_alg->algt.alg.hash.digest = ahash_digest; |
3060 | t_alg->algt.alg.hash.setkey = ahash_setkey; | 3060 | if (!strncmp(alg->cra_name, "hmac", 4)) |
3061 | t_alg->algt.alg.hash.setkey = ahash_setkey; | ||
3061 | t_alg->algt.alg.hash.import = ahash_import; | 3062 | t_alg->algt.alg.hash.import = ahash_import; |
3062 | t_alg->algt.alg.hash.export = ahash_export; | 3063 | t_alg->algt.alg.hash.export = ahash_export; |
3063 | 3064 | ||