aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-09-22 12:15:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-22 12:15:27 -0400
commit7a6d0071d821965349ff853041f1c1aab496f2d9 (patch)
treecd0b109b9f3bcb6bdf8993293ca417a7ff2d2208
parent6e80ecdddf4ea6f3cd84e83720f3d852e6624a68 (diff)
parente117765a117da3ece15689cb8a759d16c415b08c (diff)
Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto fixes from Herbert Xu: - Fix compiler warnings in inside-secure - Fix LS1021A support in caam - Avoid using RBP in x86 crypto code - Fix bug in talitos that prevents hashing with algif - Fix bugs talitos hashing code that cause incorrect hash result - Fix memory freeing path bug in drbg - Fix af_alg crash when two SG lists are chained * 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: crypto: af_alg - update correct dst SGL entry crypto: caam - fix LS1021A support on ARMv7 multiplatform kernel crypto: inside-secure - fix gcc-4.9 warnings crypto: talitos - Don't provide setkey for non hmac hashing algs crypto: talitos - fix hashing crypto: talitos - fix sha224 crypto: x86/twofish - Fix RBP usage crypto: sha512-avx2 - Fix RBP usage crypto: x86/sha256-ssse3 - Fix RBP usage crypto: x86/sha256-avx2 - Fix RBP usage crypto: x86/sha256-avx - Fix RBP usage crypto: x86/sha1-ssse3 - Fix RBP usage crypto: x86/sha1-avx2 - Fix RBP usage crypto: x86/des3_ede - Fix RBP usage crypto: x86/cast6 - Fix RBP usage crypto: x86/cast5 - Fix RBP usage crypto: x86/camellia - Fix RBP usage crypto: x86/blowfish - Fix RBP usage crypto: drbg - fix freeing of resources
-rw-r--r--arch/x86/crypto/blowfish-x86_64-asm_64.S48
-rw-r--r--arch/x86/crypto/camellia-x86_64-asm_64.S26
-rw-r--r--arch/x86/crypto/cast5-avx-x86_64-asm_64.S47
-rw-r--r--arch/x86/crypto/cast6-avx-x86_64-asm_64.S50
-rw-r--r--arch/x86/crypto/des3_ede-asm_64.S15
-rw-r--r--arch/x86/crypto/sha1_avx2_x86_64_asm.S4
-rw-r--r--arch/x86/crypto/sha1_ssse3_asm.S11
-rw-r--r--arch/x86/crypto/sha256-avx-asm.S15
-rw-r--r--arch/x86/crypto/sha256-avx2-asm.S22
-rw-r--r--arch/x86/crypto/sha256-ssse3-asm.S15
-rw-r--r--arch/x86/crypto/sha512-avx2-asm.S75
-rw-r--r--arch/x86/crypto/twofish-avx-x86_64-asm_64.S12
-rw-r--r--crypto/af_alg.c4
-rw-r--r--crypto/drbg.c8
-rw-r--r--drivers/crypto/caam/Kconfig5
-rw-r--r--drivers/crypto/caam/ctrl.c19
-rw-r--r--drivers/crypto/caam/regs.h59
-rw-r--r--drivers/crypto/inside-secure/safexcel_cipher.c2
-rw-r--r--drivers/crypto/inside-secure/safexcel_hash.c2
-rw-r--r--drivers/crypto/talitos.c9
20 files changed, 236 insertions, 212 deletions
diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S
index 246c67006ed0..8c1fcb6bad21 100644
--- a/arch/x86/crypto/blowfish-x86_64-asm_64.S
+++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S
@@ -33,7 +33,7 @@
33#define s3 ((16 + 2 + (3 * 256)) * 4) 33#define s3 ((16 + 2 + (3 * 256)) * 4)
34 34
35/* register macros */ 35/* register macros */
36#define CTX %rdi 36#define CTX %r12
37#define RIO %rsi 37#define RIO %rsi
38 38
39#define RX0 %rax 39#define RX0 %rax
@@ -56,12 +56,12 @@
56#define RX2bh %ch 56#define RX2bh %ch
57#define RX3bh %dh 57#define RX3bh %dh
58 58
59#define RT0 %rbp 59#define RT0 %rdi
60#define RT1 %rsi 60#define RT1 %rsi
61#define RT2 %r8 61#define RT2 %r8
62#define RT3 %r9 62#define RT3 %r9
63 63
64#define RT0d %ebp 64#define RT0d %edi
65#define RT1d %esi 65#define RT1d %esi
66#define RT2d %r8d 66#define RT2d %r8d
67#define RT3d %r9d 67#define RT3d %r9d
@@ -120,13 +120,14 @@
120 120
121ENTRY(__blowfish_enc_blk) 121ENTRY(__blowfish_enc_blk)
122 /* input: 122 /* input:
123 * %rdi: ctx, CTX 123 * %rdi: ctx
124 * %rsi: dst 124 * %rsi: dst
125 * %rdx: src 125 * %rdx: src
126 * %rcx: bool, if true: xor output 126 * %rcx: bool, if true: xor output
127 */ 127 */
128 movq %rbp, %r11; 128 movq %r12, %r11;
129 129
130 movq %rdi, CTX;
130 movq %rsi, %r10; 131 movq %rsi, %r10;
131 movq %rdx, RIO; 132 movq %rdx, RIO;
132 133
@@ -142,7 +143,7 @@ ENTRY(__blowfish_enc_blk)
142 round_enc(14); 143 round_enc(14);
143 add_roundkey_enc(16); 144 add_roundkey_enc(16);
144 145
145 movq %r11, %rbp; 146 movq %r11, %r12;
146 147
147 movq %r10, RIO; 148 movq %r10, RIO;
148 test %cl, %cl; 149 test %cl, %cl;
@@ -157,12 +158,13 @@ ENDPROC(__blowfish_enc_blk)
157 158
158ENTRY(blowfish_dec_blk) 159ENTRY(blowfish_dec_blk)
159 /* input: 160 /* input:
160 * %rdi: ctx, CTX 161 * %rdi: ctx
161 * %rsi: dst 162 * %rsi: dst
162 * %rdx: src 163 * %rdx: src
163 */ 164 */
164 movq %rbp, %r11; 165 movq %r12, %r11;
165 166
167 movq %rdi, CTX;
166 movq %rsi, %r10; 168 movq %rsi, %r10;
167 movq %rdx, RIO; 169 movq %rdx, RIO;
168 170
@@ -181,7 +183,7 @@ ENTRY(blowfish_dec_blk)
181 movq %r10, RIO; 183 movq %r10, RIO;
182 write_block(); 184 write_block();
183 185
184 movq %r11, %rbp; 186 movq %r11, %r12;
185 187
186 ret; 188 ret;
187ENDPROC(blowfish_dec_blk) 189ENDPROC(blowfish_dec_blk)
@@ -298,20 +300,21 @@ ENDPROC(blowfish_dec_blk)
298 300
299ENTRY(__blowfish_enc_blk_4way) 301ENTRY(__blowfish_enc_blk_4way)
300 /* input: 302 /* input:
301 * %rdi: ctx, CTX 303 * %rdi: ctx
302 * %rsi: dst 304 * %rsi: dst
303 * %rdx: src 305 * %rdx: src
304 * %rcx: bool, if true: xor output 306 * %rcx: bool, if true: xor output
305 */ 307 */
306 pushq %rbp; 308 pushq %r12;
307 pushq %rbx; 309 pushq %rbx;
308 pushq %rcx; 310 pushq %rcx;
309 311
310 preload_roundkey_enc(0); 312 movq %rdi, CTX
311
312 movq %rsi, %r11; 313 movq %rsi, %r11;
313 movq %rdx, RIO; 314 movq %rdx, RIO;
314 315
316 preload_roundkey_enc(0);
317
315 read_block4(); 318 read_block4();
316 319
317 round_enc4(0); 320 round_enc4(0);
@@ -324,39 +327,40 @@ ENTRY(__blowfish_enc_blk_4way)
324 round_enc4(14); 327 round_enc4(14);
325 add_preloaded_roundkey4(); 328 add_preloaded_roundkey4();
326 329
327 popq %rbp; 330 popq %r12;
328 movq %r11, RIO; 331 movq %r11, RIO;
329 332
330 test %bpl, %bpl; 333 test %r12b, %r12b;
331 jnz .L__enc_xor4; 334 jnz .L__enc_xor4;
332 335
333 write_block4(); 336 write_block4();
334 337
335 popq %rbx; 338 popq %rbx;
336 popq %rbp; 339 popq %r12;
337 ret; 340 ret;
338 341
339.L__enc_xor4: 342.L__enc_xor4:
340 xor_block4(); 343 xor_block4();
341 344
342 popq %rbx; 345 popq %rbx;
343 popq %rbp; 346 popq %r12;
344 ret; 347 ret;
345ENDPROC(__blowfish_enc_blk_4way) 348ENDPROC(__blowfish_enc_blk_4way)
346 349
347ENTRY(blowfish_dec_blk_4way) 350ENTRY(blowfish_dec_blk_4way)
348 /* input: 351 /* input:
349 * %rdi: ctx, CTX 352 * %rdi: ctx
350 * %rsi: dst 353 * %rsi: dst
351 * %rdx: src 354 * %rdx: src
352 */ 355 */
353 pushq %rbp; 356 pushq %r12;
354 pushq %rbx; 357 pushq %rbx;
355 preload_roundkey_dec(17);
356 358
357 movq %rsi, %r11; 359 movq %rdi, CTX;
360 movq %rsi, %r11
358 movq %rdx, RIO; 361 movq %rdx, RIO;
359 362
363 preload_roundkey_dec(17);
360 read_block4(); 364 read_block4();
361 365
362 round_dec4(17); 366 round_dec4(17);
@@ -373,7 +377,7 @@ ENTRY(blowfish_dec_blk_4way)
373 write_block4(); 377 write_block4();
374 378
375 popq %rbx; 379 popq %rbx;
376 popq %rbp; 380 popq %r12;
377 381
378 ret; 382 ret;
379ENDPROC(blowfish_dec_blk_4way) 383ENDPROC(blowfish_dec_blk_4way)
diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S
index 310319c601ed..95ba6956a7f6 100644
--- a/arch/x86/crypto/camellia-x86_64-asm_64.S
+++ b/arch/x86/crypto/camellia-x86_64-asm_64.S
@@ -75,17 +75,17 @@
75#define RCD1bh %dh 75#define RCD1bh %dh
76 76
77#define RT0 %rsi 77#define RT0 %rsi
78#define RT1 %rbp 78#define RT1 %r12
79#define RT2 %r8 79#define RT2 %r8
80 80
81#define RT0d %esi 81#define RT0d %esi
82#define RT1d %ebp 82#define RT1d %r12d
83#define RT2d %r8d 83#define RT2d %r8d
84 84
85#define RT2bl %r8b 85#define RT2bl %r8b
86 86
87#define RXOR %r9 87#define RXOR %r9
88#define RRBP %r10 88#define RR12 %r10
89#define RDST %r11 89#define RDST %r11
90 90
91#define RXORd %r9d 91#define RXORd %r9d
@@ -197,7 +197,7 @@ ENTRY(__camellia_enc_blk)
197 * %rdx: src 197 * %rdx: src
198 * %rcx: bool xor 198 * %rcx: bool xor
199 */ 199 */
200 movq %rbp, RRBP; 200 movq %r12, RR12;
201 201
202 movq %rcx, RXOR; 202 movq %rcx, RXOR;
203 movq %rsi, RDST; 203 movq %rsi, RDST;
@@ -227,13 +227,13 @@ ENTRY(__camellia_enc_blk)
227 227
228 enc_outunpack(mov, RT1); 228 enc_outunpack(mov, RT1);
229 229
230 movq RRBP, %rbp; 230 movq RR12, %r12;
231 ret; 231 ret;
232 232
233.L__enc_xor: 233.L__enc_xor:
234 enc_outunpack(xor, RT1); 234 enc_outunpack(xor, RT1);
235 235
236 movq RRBP, %rbp; 236 movq RR12, %r12;
237 ret; 237 ret;
238ENDPROC(__camellia_enc_blk) 238ENDPROC(__camellia_enc_blk)
239 239
@@ -248,7 +248,7 @@ ENTRY(camellia_dec_blk)
248 movl $24, RXORd; 248 movl $24, RXORd;
249 cmovel RXORd, RT2d; /* max */ 249 cmovel RXORd, RT2d; /* max */
250 250
251 movq %rbp, RRBP; 251 movq %r12, RR12;
252 movq %rsi, RDST; 252 movq %rsi, RDST;
253 movq %rdx, RIO; 253 movq %rdx, RIO;
254 254
@@ -271,7 +271,7 @@ ENTRY(camellia_dec_blk)
271 271
272 dec_outunpack(); 272 dec_outunpack();
273 273
274 movq RRBP, %rbp; 274 movq RR12, %r12;
275 ret; 275 ret;
276ENDPROC(camellia_dec_blk) 276ENDPROC(camellia_dec_blk)
277 277
@@ -433,7 +433,7 @@ ENTRY(__camellia_enc_blk_2way)
433 */ 433 */
434 pushq %rbx; 434 pushq %rbx;
435 435
436 movq %rbp, RRBP; 436 movq %r12, RR12;
437 movq %rcx, RXOR; 437 movq %rcx, RXOR;
438 movq %rsi, RDST; 438 movq %rsi, RDST;
439 movq %rdx, RIO; 439 movq %rdx, RIO;
@@ -461,14 +461,14 @@ ENTRY(__camellia_enc_blk_2way)
461 461
462 enc_outunpack2(mov, RT2); 462 enc_outunpack2(mov, RT2);
463 463
464 movq RRBP, %rbp; 464 movq RR12, %r12;
465 popq %rbx; 465 popq %rbx;
466 ret; 466 ret;
467 467
468.L__enc2_xor: 468.L__enc2_xor:
469 enc_outunpack2(xor, RT2); 469 enc_outunpack2(xor, RT2);
470 470
471 movq RRBP, %rbp; 471 movq RR12, %r12;
472 popq %rbx; 472 popq %rbx;
473 ret; 473 ret;
474ENDPROC(__camellia_enc_blk_2way) 474ENDPROC(__camellia_enc_blk_2way)
@@ -485,7 +485,7 @@ ENTRY(camellia_dec_blk_2way)
485 cmovel RXORd, RT2d; /* max */ 485 cmovel RXORd, RT2d; /* max */
486 486
487 movq %rbx, RXOR; 487 movq %rbx, RXOR;
488 movq %rbp, RRBP; 488 movq %r12, RR12;
489 movq %rsi, RDST; 489 movq %rsi, RDST;
490 movq %rdx, RIO; 490 movq %rdx, RIO;
491 491
@@ -508,7 +508,7 @@ ENTRY(camellia_dec_blk_2way)
508 508
509 dec_outunpack2(); 509 dec_outunpack2();
510 510
511 movq RRBP, %rbp; 511 movq RR12, %r12;
512 movq RXOR, %rbx; 512 movq RXOR, %rbx;
513 ret; 513 ret;
514ENDPROC(camellia_dec_blk_2way) 514ENDPROC(camellia_dec_blk_2way)
diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
index b4a8806234ea..86107c961bb4 100644
--- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
@@ -47,7 +47,7 @@
47/********************************************************************** 47/**********************************************************************
48 16-way AVX cast5 48 16-way AVX cast5
49 **********************************************************************/ 49 **********************************************************************/
50#define CTX %rdi 50#define CTX %r15
51 51
52#define RL1 %xmm0 52#define RL1 %xmm0
53#define RR1 %xmm1 53#define RR1 %xmm1
@@ -70,8 +70,8 @@
70 70
71#define RTMP %xmm15 71#define RTMP %xmm15
72 72
73#define RID1 %rbp 73#define RID1 %rdi
74#define RID1d %ebp 74#define RID1d %edi
75#define RID2 %rsi 75#define RID2 %rsi
76#define RID2d %esi 76#define RID2d %esi
77 77
@@ -226,7 +226,7 @@
226.align 16 226.align 16
227__cast5_enc_blk16: 227__cast5_enc_blk16:
228 /* input: 228 /* input:
229 * %rdi: ctx, CTX 229 * %rdi: ctx
230 * RL1: blocks 1 and 2 230 * RL1: blocks 1 and 2
231 * RR1: blocks 3 and 4 231 * RR1: blocks 3 and 4
232 * RL2: blocks 5 and 6 232 * RL2: blocks 5 and 6
@@ -246,9 +246,11 @@ __cast5_enc_blk16:
246 * RR4: encrypted blocks 15 and 16 246 * RR4: encrypted blocks 15 and 16
247 */ 247 */
248 248
249 pushq %rbp; 249 pushq %r15;
250 pushq %rbx; 250 pushq %rbx;
251 251
252 movq %rdi, CTX;
253
252 vmovdqa .Lbswap_mask, RKM; 254 vmovdqa .Lbswap_mask, RKM;
253 vmovd .Lfirst_mask, R1ST; 255 vmovd .Lfirst_mask, R1ST;
254 vmovd .L32_mask, R32; 256 vmovd .L32_mask, R32;
@@ -283,7 +285,7 @@ __cast5_enc_blk16:
283 285
284.L__skip_enc: 286.L__skip_enc:
285 popq %rbx; 287 popq %rbx;
286 popq %rbp; 288 popq %r15;
287 289
288 vmovdqa .Lbswap_mask, RKM; 290 vmovdqa .Lbswap_mask, RKM;
289 291
@@ -298,7 +300,7 @@ ENDPROC(__cast5_enc_blk16)
298.align 16 300.align 16
299__cast5_dec_blk16: 301__cast5_dec_blk16:
300 /* input: 302 /* input:
301 * %rdi: ctx, CTX 303 * %rdi: ctx
302 * RL1: encrypted blocks 1 and 2 304 * RL1: encrypted blocks 1 and 2
303 * RR1: encrypted blocks 3 and 4 305 * RR1: encrypted blocks 3 and 4
304 * RL2: encrypted blocks 5 and 6 306 * RL2: encrypted blocks 5 and 6
@@ -318,9 +320,11 @@ __cast5_dec_blk16:
318 * RR4: decrypted blocks 15 and 16 320 * RR4: decrypted blocks 15 and 16
319 */ 321 */
320 322
321 pushq %rbp; 323 pushq %r15;
322 pushq %rbx; 324 pushq %rbx;
323 325
326 movq %rdi, CTX;
327
324 vmovdqa .Lbswap_mask, RKM; 328 vmovdqa .Lbswap_mask, RKM;
325 vmovd .Lfirst_mask, R1ST; 329 vmovd .Lfirst_mask, R1ST;
326 vmovd .L32_mask, R32; 330 vmovd .L32_mask, R32;
@@ -356,7 +360,7 @@ __cast5_dec_blk16:
356 360
357 vmovdqa .Lbswap_mask, RKM; 361 vmovdqa .Lbswap_mask, RKM;
358 popq %rbx; 362 popq %rbx;
359 popq %rbp; 363 popq %r15;
360 364
361 outunpack_blocks(RR1, RL1, RTMP, RX, RKM); 365 outunpack_blocks(RR1, RL1, RTMP, RX, RKM);
362 outunpack_blocks(RR2, RL2, RTMP, RX, RKM); 366 outunpack_blocks(RR2, RL2, RTMP, RX, RKM);
@@ -372,12 +376,14 @@ ENDPROC(__cast5_dec_blk16)
372 376
373ENTRY(cast5_ecb_enc_16way) 377ENTRY(cast5_ecb_enc_16way)
374 /* input: 378 /* input:
375 * %rdi: ctx, CTX 379 * %rdi: ctx
376 * %rsi: dst 380 * %rsi: dst
377 * %rdx: src 381 * %rdx: src
378 */ 382 */
379 FRAME_BEGIN 383 FRAME_BEGIN
384 pushq %r15;
380 385
386 movq %rdi, CTX;
381 movq %rsi, %r11; 387 movq %rsi, %r11;
382 388
383 vmovdqu (0*4*4)(%rdx), RL1; 389 vmovdqu (0*4*4)(%rdx), RL1;
@@ -400,18 +406,22 @@ ENTRY(cast5_ecb_enc_16way)
400 vmovdqu RR4, (6*4*4)(%r11); 406 vmovdqu RR4, (6*4*4)(%r11);
401 vmovdqu RL4, (7*4*4)(%r11); 407 vmovdqu RL4, (7*4*4)(%r11);
402 408
409 popq %r15;
403 FRAME_END 410 FRAME_END
404 ret; 411 ret;
405ENDPROC(cast5_ecb_enc_16way) 412ENDPROC(cast5_ecb_enc_16way)
406 413
407ENTRY(cast5_ecb_dec_16way) 414ENTRY(cast5_ecb_dec_16way)
408 /* input: 415 /* input:
409 * %rdi: ctx, CTX 416 * %rdi: ctx
410 * %rsi: dst 417 * %rsi: dst
411 * %rdx: src 418 * %rdx: src
412 */ 419 */
413 420
414 FRAME_BEGIN 421 FRAME_BEGIN
422 pushq %r15;
423
424 movq %rdi, CTX;
415 movq %rsi, %r11; 425 movq %rsi, %r11;
416 426
417 vmovdqu (0*4*4)(%rdx), RL1; 427 vmovdqu (0*4*4)(%rdx), RL1;
@@ -434,20 +444,22 @@ ENTRY(cast5_ecb_dec_16way)
434 vmovdqu RR4, (6*4*4)(%r11); 444 vmovdqu RR4, (6*4*4)(%r11);
435 vmovdqu RL4, (7*4*4)(%r11); 445 vmovdqu RL4, (7*4*4)(%r11);
436 446
447 popq %r15;
437 FRAME_END 448 FRAME_END
438 ret; 449 ret;
439ENDPROC(cast5_ecb_dec_16way) 450ENDPROC(cast5_ecb_dec_16way)
440 451
441ENTRY(cast5_cbc_dec_16way) 452ENTRY(cast5_cbc_dec_16way)
442 /* input: 453 /* input:
443 * %rdi: ctx, CTX 454 * %rdi: ctx
444 * %rsi: dst 455 * %rsi: dst
445 * %rdx: src 456 * %rdx: src
446 */ 457 */
447 FRAME_BEGIN 458 FRAME_BEGIN
448
449 pushq %r12; 459 pushq %r12;
460 pushq %r15;
450 461
462 movq %rdi, CTX;
451 movq %rsi, %r11; 463 movq %rsi, %r11;
452 movq %rdx, %r12; 464 movq %rdx, %r12;
453 465
@@ -483,23 +495,24 @@ ENTRY(cast5_cbc_dec_16way)
483 vmovdqu RR4, (6*16)(%r11); 495 vmovdqu RR4, (6*16)(%r11);
484 vmovdqu RL4, (7*16)(%r11); 496 vmovdqu RL4, (7*16)(%r11);
485 497
498 popq %r15;
486 popq %r12; 499 popq %r12;
487
488 FRAME_END 500 FRAME_END
489 ret; 501 ret;
490ENDPROC(cast5_cbc_dec_16way) 502ENDPROC(cast5_cbc_dec_16way)
491 503
492ENTRY(cast5_ctr_16way) 504ENTRY(cast5_ctr_16way)
493 /* input: 505 /* input:
494 * %rdi: ctx, CTX 506 * %rdi: ctx
495 * %rsi: dst 507 * %rsi: dst
496 * %rdx: src 508 * %rdx: src
497 * %rcx: iv (big endian, 64bit) 509 * %rcx: iv (big endian, 64bit)
498 */ 510 */
499 FRAME_BEGIN 511 FRAME_BEGIN
500
501 pushq %r12; 512 pushq %r12;
513 pushq %r15;
502 514
515 movq %rdi, CTX;
503 movq %rsi, %r11; 516 movq %rsi, %r11;
504 movq %rdx, %r12; 517 movq %rdx, %r12;
505 518
@@ -558,8 +571,8 @@ ENTRY(cast5_ctr_16way)
558 vmovdqu RR4, (6*16)(%r11); 571 vmovdqu RR4, (6*16)(%r11);
559 vmovdqu RL4, (7*16)(%r11); 572 vmovdqu RL4, (7*16)(%r11);
560 573
574 popq %r15;
561 popq %r12; 575 popq %r12;
562
563 FRAME_END 576 FRAME_END
564 ret; 577 ret;
565ENDPROC(cast5_ctr_16way) 578ENDPROC(cast5_ctr_16way)
diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
index 952d3156a933..7f30b6f0d72c 100644
--- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
@@ -47,7 +47,7 @@
47/********************************************************************** 47/**********************************************************************
48 8-way AVX cast6 48 8-way AVX cast6
49 **********************************************************************/ 49 **********************************************************************/
50#define CTX %rdi 50#define CTX %r15
51 51
52#define RA1 %xmm0 52#define RA1 %xmm0
53#define RB1 %xmm1 53#define RB1 %xmm1
@@ -70,8 +70,8 @@
70 70
71#define RTMP %xmm15 71#define RTMP %xmm15
72 72
73#define RID1 %rbp 73#define RID1 %rdi
74#define RID1d %ebp 74#define RID1d %edi
75#define RID2 %rsi 75#define RID2 %rsi
76#define RID2d %esi 76#define RID2d %esi
77 77
@@ -264,15 +264,17 @@
264.align 8 264.align 8
265__cast6_enc_blk8: 265__cast6_enc_blk8:
266 /* input: 266 /* input:
267 * %rdi: ctx, CTX 267 * %rdi: ctx
268 * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks 268 * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
269 * output: 269 * output:
270 * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks 270 * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
271 */ 271 */
272 272
273 pushq %rbp; 273 pushq %r15;
274 pushq %rbx; 274 pushq %rbx;
275 275
276 movq %rdi, CTX;
277
276 vmovdqa .Lbswap_mask, RKM; 278 vmovdqa .Lbswap_mask, RKM;
277 vmovd .Lfirst_mask, R1ST; 279 vmovd .Lfirst_mask, R1ST;
278 vmovd .L32_mask, R32; 280 vmovd .L32_mask, R32;
@@ -297,7 +299,7 @@ __cast6_enc_blk8:
297 QBAR(11); 299 QBAR(11);
298 300
299 popq %rbx; 301 popq %rbx;
300 popq %rbp; 302 popq %r15;
301 303
302 vmovdqa .Lbswap_mask, RKM; 304 vmovdqa .Lbswap_mask, RKM;
303 305
@@ -310,15 +312,17 @@ ENDPROC(__cast6_enc_blk8)
310.align 8 312.align 8
311__cast6_dec_blk8: 313__cast6_dec_blk8:
312 /* input: 314 /* input:
313 * %rdi: ctx, CTX 315 * %rdi: ctx
314 * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks 316 * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
315 * output: 317 * output:
316 * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks 318 * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks
317 */ 319 */
318 320
319 pushq %rbp; 321 pushq %r15;
320 pushq %rbx; 322 pushq %rbx;
321 323
324 movq %rdi, CTX;
325
322 vmovdqa .Lbswap_mask, RKM; 326 vmovdqa .Lbswap_mask, RKM;
323 vmovd .Lfirst_mask, R1ST; 327 vmovd .Lfirst_mask, R1ST;
324 vmovd .L32_mask, R32; 328 vmovd .L32_mask, R32;
@@ -343,7 +347,7 @@ __cast6_dec_blk8:
343 QBAR(0); 347 QBAR(0);
344 348
345 popq %rbx; 349 popq %rbx;
346 popq %rbp; 350 popq %r15;
347 351
348 vmovdqa .Lbswap_mask, RKM; 352 vmovdqa .Lbswap_mask, RKM;
349 outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); 353 outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
@@ -354,12 +358,14 @@ ENDPROC(__cast6_dec_blk8)
354 358
355ENTRY(cast6_ecb_enc_8way) 359ENTRY(cast6_ecb_enc_8way)
356 /* input: 360 /* input:
357 * %rdi: ctx, CTX 361 * %rdi: ctx
358 * %rsi: dst 362 * %rsi: dst
359 * %rdx: src 363 * %rdx: src
360 */ 364 */
361 FRAME_BEGIN 365 FRAME_BEGIN
366 pushq %r15;
362 367
368 movq %rdi, CTX;
363 movq %rsi, %r11; 369 movq %rsi, %r11;
364 370
365 load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 371 load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
@@ -368,18 +374,21 @@ ENTRY(cast6_ecb_enc_8way)
368 374
369 store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 375 store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
370 376
377 popq %r15;
371 FRAME_END 378 FRAME_END
372 ret; 379 ret;
373ENDPROC(cast6_ecb_enc_8way) 380ENDPROC(cast6_ecb_enc_8way)
374 381
375ENTRY(cast6_ecb_dec_8way) 382ENTRY(cast6_ecb_dec_8way)
376 /* input: 383 /* input:
377 * %rdi: ctx, CTX 384 * %rdi: ctx
378 * %rsi: dst 385 * %rsi: dst
379 * %rdx: src 386 * %rdx: src
380 */ 387 */
381 FRAME_BEGIN 388 FRAME_BEGIN
389 pushq %r15;
382 390
391 movq %rdi, CTX;
383 movq %rsi, %r11; 392 movq %rsi, %r11;
384 393
385 load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 394 load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
@@ -388,20 +397,22 @@ ENTRY(cast6_ecb_dec_8way)
388 397
389 store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 398 store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
390 399
400 popq %r15;
391 FRAME_END 401 FRAME_END
392 ret; 402 ret;
393ENDPROC(cast6_ecb_dec_8way) 403ENDPROC(cast6_ecb_dec_8way)
394 404
395ENTRY(cast6_cbc_dec_8way) 405ENTRY(cast6_cbc_dec_8way)
396 /* input: 406 /* input:
397 * %rdi: ctx, CTX 407 * %rdi: ctx
398 * %rsi: dst 408 * %rsi: dst
399 * %rdx: src 409 * %rdx: src
400 */ 410 */
401 FRAME_BEGIN 411 FRAME_BEGIN
402
403 pushq %r12; 412 pushq %r12;
413 pushq %r15;
404 414
415 movq %rdi, CTX;
405 movq %rsi, %r11; 416 movq %rsi, %r11;
406 movq %rdx, %r12; 417 movq %rdx, %r12;
407 418
@@ -411,8 +422,8 @@ ENTRY(cast6_cbc_dec_8way)
411 422
412 store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 423 store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
413 424
425 popq %r15;
414 popq %r12; 426 popq %r12;
415
416 FRAME_END 427 FRAME_END
417 ret; 428 ret;
418ENDPROC(cast6_cbc_dec_8way) 429ENDPROC(cast6_cbc_dec_8way)
@@ -425,9 +436,10 @@ ENTRY(cast6_ctr_8way)
425 * %rcx: iv (little endian, 128bit) 436 * %rcx: iv (little endian, 128bit)
426 */ 437 */
427 FRAME_BEGIN 438 FRAME_BEGIN
428
429 pushq %r12; 439 pushq %r12;
440 pushq %r15
430 441
442 movq %rdi, CTX;
431 movq %rsi, %r11; 443 movq %rsi, %r11;
432 movq %rdx, %r12; 444 movq %rdx, %r12;
433 445
@@ -438,8 +450,8 @@ ENTRY(cast6_ctr_8way)
438 450
439 store_ctr_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 451 store_ctr_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
440 452
453 popq %r15;
441 popq %r12; 454 popq %r12;
442
443 FRAME_END 455 FRAME_END
444 ret; 456 ret;
445ENDPROC(cast6_ctr_8way) 457ENDPROC(cast6_ctr_8way)
@@ -452,7 +464,9 @@ ENTRY(cast6_xts_enc_8way)
452 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) 464 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
453 */ 465 */
454 FRAME_BEGIN 466 FRAME_BEGIN
467 pushq %r15;
455 468
469 movq %rdi, CTX
456 movq %rsi, %r11; 470 movq %rsi, %r11;
457 471
458 /* regs <= src, dst <= IVs, regs <= regs xor IVs */ 472 /* regs <= src, dst <= IVs, regs <= regs xor IVs */
@@ -464,6 +478,7 @@ ENTRY(cast6_xts_enc_8way)
464 /* dst <= regs xor IVs(in dst) */ 478 /* dst <= regs xor IVs(in dst) */
465 store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 479 store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
466 480
481 popq %r15;
467 FRAME_END 482 FRAME_END
468 ret; 483 ret;
469ENDPROC(cast6_xts_enc_8way) 484ENDPROC(cast6_xts_enc_8way)
@@ -476,7 +491,9 @@ ENTRY(cast6_xts_dec_8way)
476 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) 491 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
477 */ 492 */
478 FRAME_BEGIN 493 FRAME_BEGIN
494 pushq %r15;
479 495
496 movq %rdi, CTX
480 movq %rsi, %r11; 497 movq %rsi, %r11;
481 498
482 /* regs <= src, dst <= IVs, regs <= regs xor IVs */ 499 /* regs <= src, dst <= IVs, regs <= regs xor IVs */
@@ -488,6 +505,7 @@ ENTRY(cast6_xts_dec_8way)
488 /* dst <= regs xor IVs(in dst) */ 505 /* dst <= regs xor IVs(in dst) */
489 store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 506 store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
490 507
508 popq %r15;
491 FRAME_END 509 FRAME_END
492 ret; 510 ret;
493ENDPROC(cast6_xts_dec_8way) 511ENDPROC(cast6_xts_dec_8way)
diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S
index f3e91647ca27..8e49ce117494 100644
--- a/arch/x86/crypto/des3_ede-asm_64.S
+++ b/arch/x86/crypto/des3_ede-asm_64.S
@@ -64,12 +64,12 @@
64#define RW2bh %ch 64#define RW2bh %ch
65 65
66#define RT0 %r15 66#define RT0 %r15
67#define RT1 %rbp 67#define RT1 %rsi
68#define RT2 %r14 68#define RT2 %r14
69#define RT3 %rdx 69#define RT3 %rdx
70 70
71#define RT0d %r15d 71#define RT0d %r15d
72#define RT1d %ebp 72#define RT1d %esi
73#define RT2d %r14d 73#define RT2d %r14d
74#define RT3d %edx 74#define RT3d %edx
75 75
@@ -177,13 +177,14 @@ ENTRY(des3_ede_x86_64_crypt_blk)
177 * %rsi: dst 177 * %rsi: dst
178 * %rdx: src 178 * %rdx: src
179 */ 179 */
180 pushq %rbp;
181 pushq %rbx; 180 pushq %rbx;
182 pushq %r12; 181 pushq %r12;
183 pushq %r13; 182 pushq %r13;
184 pushq %r14; 183 pushq %r14;
185 pushq %r15; 184 pushq %r15;
186 185
186 pushq %rsi; /* dst */
187
187 read_block(%rdx, RL0, RR0); 188 read_block(%rdx, RL0, RR0);
188 initial_permutation(RL0, RR0); 189 initial_permutation(RL0, RR0);
189 190
@@ -241,6 +242,8 @@ ENTRY(des3_ede_x86_64_crypt_blk)
241 round1(32+15, RL0, RR0, dummy2); 242 round1(32+15, RL0, RR0, dummy2);
242 243
243 final_permutation(RR0, RL0); 244 final_permutation(RR0, RL0);
245
246 popq %rsi /* dst */
244 write_block(%rsi, RR0, RL0); 247 write_block(%rsi, RR0, RL0);
245 248
246 popq %r15; 249 popq %r15;
@@ -248,7 +251,6 @@ ENTRY(des3_ede_x86_64_crypt_blk)
248 popq %r13; 251 popq %r13;
249 popq %r12; 252 popq %r12;
250 popq %rbx; 253 popq %rbx;
251 popq %rbp;
252 254
253 ret; 255 ret;
254ENDPROC(des3_ede_x86_64_crypt_blk) 256ENDPROC(des3_ede_x86_64_crypt_blk)
@@ -432,13 +434,14 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
432 * %rdx: src (3 blocks) 434 * %rdx: src (3 blocks)
433 */ 435 */
434 436
435 pushq %rbp;
436 pushq %rbx; 437 pushq %rbx;
437 pushq %r12; 438 pushq %r12;
438 pushq %r13; 439 pushq %r13;
439 pushq %r14; 440 pushq %r14;
440 pushq %r15; 441 pushq %r15;
441 442
443 pushq %rsi /* dst */
444
442 /* load input */ 445 /* load input */
443 movl 0 * 4(%rdx), RL0d; 446 movl 0 * 4(%rdx), RL0d;
444 movl 1 * 4(%rdx), RR0d; 447 movl 1 * 4(%rdx), RR0d;
@@ -520,6 +523,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
520 bswapl RR2d; 523 bswapl RR2d;
521 bswapl RL2d; 524 bswapl RL2d;
522 525
526 popq %rsi /* dst */
523 movl RR0d, 0 * 4(%rsi); 527 movl RR0d, 0 * 4(%rsi);
524 movl RL0d, 1 * 4(%rsi); 528 movl RL0d, 1 * 4(%rsi);
525 movl RR1d, 2 * 4(%rsi); 529 movl RR1d, 2 * 4(%rsi);
@@ -532,7 +536,6 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
532 popq %r13; 536 popq %r13;
533 popq %r12; 537 popq %r12;
534 popq %rbx; 538 popq %rbx;
535 popq %rbp;
536 539
537 ret; 540 ret;
538ENDPROC(des3_ede_x86_64_crypt_blk_3way) 541ENDPROC(des3_ede_x86_64_crypt_blk_3way)
diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
index 1eab79c9ac48..9f712a7dfd79 100644
--- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S
+++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
@@ -89,7 +89,7 @@
89#define REG_RE %rdx 89#define REG_RE %rdx
90#define REG_RTA %r12 90#define REG_RTA %r12
91#define REG_RTB %rbx 91#define REG_RTB %rbx
92#define REG_T1 %ebp 92#define REG_T1 %r11d
93#define xmm_mov vmovups 93#define xmm_mov vmovups
94#define avx2_zeroupper vzeroupper 94#define avx2_zeroupper vzeroupper
95#define RND_F1 1 95#define RND_F1 1
@@ -637,7 +637,6 @@ _loop3:
637 ENTRY(\name) 637 ENTRY(\name)
638 638
639 push %rbx 639 push %rbx
640 push %rbp
641 push %r12 640 push %r12
642 push %r13 641 push %r13
643 push %r14 642 push %r14
@@ -673,7 +672,6 @@ _loop3:
673 pop %r14 672 pop %r14
674 pop %r13 673 pop %r13
675 pop %r12 674 pop %r12
676 pop %rbp
677 pop %rbx 675 pop %rbx
678 676
679 ret 677 ret
diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S
index a4109506a5e8..6204bd53528c 100644
--- a/arch/x86/crypto/sha1_ssse3_asm.S
+++ b/arch/x86/crypto/sha1_ssse3_asm.S
@@ -37,7 +37,7 @@
37#define REG_A %ecx 37#define REG_A %ecx
38#define REG_B %esi 38#define REG_B %esi
39#define REG_C %edi 39#define REG_C %edi
40#define REG_D %ebp 40#define REG_D %r12d
41#define REG_E %edx 41#define REG_E %edx
42 42
43#define REG_T1 %eax 43#define REG_T1 %eax
@@ -74,10 +74,10 @@
74 ENTRY(\name) 74 ENTRY(\name)
75 75
76 push %rbx 76 push %rbx
77 push %rbp
78 push %r12 77 push %r12
78 push %rbp
79 mov %rsp, %rbp
79 80
80 mov %rsp, %r12
81 sub $64, %rsp # allocate workspace 81 sub $64, %rsp # allocate workspace
82 and $~15, %rsp # align stack 82 and $~15, %rsp # align stack
83 83
@@ -99,10 +99,9 @@
99 xor %rax, %rax 99 xor %rax, %rax
100 rep stosq 100 rep stosq
101 101
102 mov %r12, %rsp # deallocate workspace 102 mov %rbp, %rsp # deallocate workspace
103
104 pop %r12
105 pop %rbp 103 pop %rbp
104 pop %r12
106 pop %rbx 105 pop %rbx
107 ret 106 ret
108 107
diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S
index e08888a1a5f2..001bbcf93c79 100644
--- a/arch/x86/crypto/sha256-avx-asm.S
+++ b/arch/x86/crypto/sha256-avx-asm.S
@@ -103,7 +103,7 @@ SRND = %rsi # clobbers INP
103c = %ecx 103c = %ecx
104d = %r8d 104d = %r8d
105e = %edx 105e = %edx
106TBL = %rbp 106TBL = %r12
107a = %eax 107a = %eax
108b = %ebx 108b = %ebx
109 109
@@ -350,13 +350,13 @@ a = TMP_
350ENTRY(sha256_transform_avx) 350ENTRY(sha256_transform_avx)
351.align 32 351.align 32
352 pushq %rbx 352 pushq %rbx
353 pushq %rbp 353 pushq %r12
354 pushq %r13 354 pushq %r13
355 pushq %r14 355 pushq %r14
356 pushq %r15 356 pushq %r15
357 pushq %r12 357 pushq %rbp
358 movq %rsp, %rbp
358 359
359 mov %rsp, %r12
360 subq $STACK_SIZE, %rsp # allocate stack space 360 subq $STACK_SIZE, %rsp # allocate stack space
361 and $~15, %rsp # align stack pointer 361 and $~15, %rsp # align stack pointer
362 362
@@ -452,13 +452,12 @@ loop2:
452 452
453done_hash: 453done_hash:
454 454
455 mov %r12, %rsp 455 mov %rbp, %rsp
456 456 popq %rbp
457 popq %r12
458 popq %r15 457 popq %r15
459 popq %r14 458 popq %r14
460 popq %r13 459 popq %r13
461 popq %rbp 460 popq %r12
462 popq %rbx 461 popq %rbx
463 ret 462 ret
464ENDPROC(sha256_transform_avx) 463ENDPROC(sha256_transform_avx)
diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S
index 89c8f09787d2..1420db15dcdd 100644
--- a/arch/x86/crypto/sha256-avx2-asm.S
+++ b/arch/x86/crypto/sha256-avx2-asm.S
@@ -98,8 +98,6 @@ d = %r8d
98e = %edx # clobbers NUM_BLKS 98e = %edx # clobbers NUM_BLKS
99y3 = %esi # clobbers INP 99y3 = %esi # clobbers INP
100 100
101
102TBL = %rbp
103SRND = CTX # SRND is same register as CTX 101SRND = CTX # SRND is same register as CTX
104 102
105a = %eax 103a = %eax
@@ -531,7 +529,6 @@ STACK_SIZE = _RSP + _RSP_SIZE
531ENTRY(sha256_transform_rorx) 529ENTRY(sha256_transform_rorx)
532.align 32 530.align 32
533 pushq %rbx 531 pushq %rbx
534 pushq %rbp
535 pushq %r12 532 pushq %r12
536 pushq %r13 533 pushq %r13
537 pushq %r14 534 pushq %r14
@@ -568,8 +565,6 @@ ENTRY(sha256_transform_rorx)
568 mov CTX, _CTX(%rsp) 565 mov CTX, _CTX(%rsp)
569 566
570loop0: 567loop0:
571 lea K256(%rip), TBL
572
573 ## Load first 16 dwords from two blocks 568 ## Load first 16 dwords from two blocks
574 VMOVDQ 0*32(INP),XTMP0 569 VMOVDQ 0*32(INP),XTMP0
575 VMOVDQ 1*32(INP),XTMP1 570 VMOVDQ 1*32(INP),XTMP1
@@ -597,19 +592,19 @@ last_block_enter:
597 592
598.align 16 593.align 16
599loop1: 594loop1:
600 vpaddd 0*32(TBL, SRND), X0, XFER 595 vpaddd K256+0*32(SRND), X0, XFER
601 vmovdqa XFER, 0*32+_XFER(%rsp, SRND) 596 vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
602 FOUR_ROUNDS_AND_SCHED _XFER + 0*32 597 FOUR_ROUNDS_AND_SCHED _XFER + 0*32
603 598
604 vpaddd 1*32(TBL, SRND), X0, XFER 599 vpaddd K256+1*32(SRND), X0, XFER
605 vmovdqa XFER, 1*32+_XFER(%rsp, SRND) 600 vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
606 FOUR_ROUNDS_AND_SCHED _XFER + 1*32 601 FOUR_ROUNDS_AND_SCHED _XFER + 1*32
607 602
608 vpaddd 2*32(TBL, SRND), X0, XFER 603 vpaddd K256+2*32(SRND), X0, XFER
609 vmovdqa XFER, 2*32+_XFER(%rsp, SRND) 604 vmovdqa XFER, 2*32+_XFER(%rsp, SRND)
610 FOUR_ROUNDS_AND_SCHED _XFER + 2*32 605 FOUR_ROUNDS_AND_SCHED _XFER + 2*32
611 606
612 vpaddd 3*32(TBL, SRND), X0, XFER 607 vpaddd K256+3*32(SRND), X0, XFER
613 vmovdqa XFER, 3*32+_XFER(%rsp, SRND) 608 vmovdqa XFER, 3*32+_XFER(%rsp, SRND)
614 FOUR_ROUNDS_AND_SCHED _XFER + 3*32 609 FOUR_ROUNDS_AND_SCHED _XFER + 3*32
615 610
@@ -619,10 +614,11 @@ loop1:
619 614
620loop2: 615loop2:
621 ## Do last 16 rounds with no scheduling 616 ## Do last 16 rounds with no scheduling
622 vpaddd 0*32(TBL, SRND), X0, XFER 617 vpaddd K256+0*32(SRND), X0, XFER
623 vmovdqa XFER, 0*32+_XFER(%rsp, SRND) 618 vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
624 DO_4ROUNDS _XFER + 0*32 619 DO_4ROUNDS _XFER + 0*32
625 vpaddd 1*32(TBL, SRND), X1, XFER 620
621 vpaddd K256+1*32(SRND), X1, XFER
626 vmovdqa XFER, 1*32+_XFER(%rsp, SRND) 622 vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
627 DO_4ROUNDS _XFER + 1*32 623 DO_4ROUNDS _XFER + 1*32
628 add $2*32, SRND 624 add $2*32, SRND
@@ -676,9 +672,6 @@ loop3:
676 ja done_hash 672 ja done_hash
677 673
678do_last_block: 674do_last_block:
679 #### do last block
680 lea K256(%rip), TBL
681
682 VMOVDQ 0*16(INP),XWORD0 675 VMOVDQ 0*16(INP),XWORD0
683 VMOVDQ 1*16(INP),XWORD1 676 VMOVDQ 1*16(INP),XWORD1
684 VMOVDQ 2*16(INP),XWORD2 677 VMOVDQ 2*16(INP),XWORD2
@@ -718,7 +711,6 @@ done_hash:
718 popq %r14 711 popq %r14
719 popq %r13 712 popq %r13
720 popq %r12 713 popq %r12
721 popq %rbp
722 popq %rbx 714 popq %rbx
723 ret 715 ret
724ENDPROC(sha256_transform_rorx) 716ENDPROC(sha256_transform_rorx)
diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S
index 39b83c93e7fd..c6c05ed2c16a 100644
--- a/arch/x86/crypto/sha256-ssse3-asm.S
+++ b/arch/x86/crypto/sha256-ssse3-asm.S
@@ -95,7 +95,7 @@ SRND = %rsi # clobbers INP
95c = %ecx 95c = %ecx
96d = %r8d 96d = %r8d
97e = %edx 97e = %edx
98TBL = %rbp 98TBL = %r12
99a = %eax 99a = %eax
100b = %ebx 100b = %ebx
101 101
@@ -356,13 +356,13 @@ a = TMP_
356ENTRY(sha256_transform_ssse3) 356ENTRY(sha256_transform_ssse3)
357.align 32 357.align 32
358 pushq %rbx 358 pushq %rbx
359 pushq %rbp 359 pushq %r12
360 pushq %r13 360 pushq %r13
361 pushq %r14 361 pushq %r14
362 pushq %r15 362 pushq %r15
363 pushq %r12 363 pushq %rbp
364 mov %rsp, %rbp
364 365
365 mov %rsp, %r12
366 subq $STACK_SIZE, %rsp 366 subq $STACK_SIZE, %rsp
367 and $~15, %rsp 367 and $~15, %rsp
368 368
@@ -462,13 +462,12 @@ loop2:
462 462
463done_hash: 463done_hash:
464 464
465 mov %r12, %rsp 465 mov %rbp, %rsp
466 466 popq %rbp
467 popq %r12
468 popq %r15 467 popq %r15
469 popq %r14 468 popq %r14
470 popq %r13 469 popq %r13
471 popq %rbp 470 popq %r12
472 popq %rbx 471 popq %rbx
473 472
474 ret 473 ret
diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S
index 7f5f6c6ec72e..b16d56005162 100644
--- a/arch/x86/crypto/sha512-avx2-asm.S
+++ b/arch/x86/crypto/sha512-avx2-asm.S
@@ -69,8 +69,9 @@ XFER = YTMP0
69 69
70BYTE_FLIP_MASK = %ymm9 70BYTE_FLIP_MASK = %ymm9
71 71
72# 1st arg 72# 1st arg is %rdi, which is saved to the stack and accessed later via %r12
73CTX = %rdi 73CTX1 = %rdi
74CTX2 = %r12
74# 2nd arg 75# 2nd arg
75INP = %rsi 76INP = %rsi
76# 3rd arg 77# 3rd arg
@@ -81,7 +82,7 @@ d = %r8
81e = %rdx 82e = %rdx
82y3 = %rsi 83y3 = %rsi
83 84
84TBL = %rbp 85TBL = %rdi # clobbers CTX1
85 86
86a = %rax 87a = %rax
87b = %rbx 88b = %rbx
@@ -91,26 +92,26 @@ g = %r10
91h = %r11 92h = %r11
92old_h = %r11 93old_h = %r11
93 94
94T1 = %r12 95T1 = %r12 # clobbers CTX2
95y0 = %r13 96y0 = %r13
96y1 = %r14 97y1 = %r14
97y2 = %r15 98y2 = %r15
98 99
99y4 = %r12
100
101# Local variables (stack frame) 100# Local variables (stack frame)
102XFER_SIZE = 4*8 101XFER_SIZE = 4*8
103SRND_SIZE = 1*8 102SRND_SIZE = 1*8
104INP_SIZE = 1*8 103INP_SIZE = 1*8
105INPEND_SIZE = 1*8 104INPEND_SIZE = 1*8
105CTX_SIZE = 1*8
106RSPSAVE_SIZE = 1*8 106RSPSAVE_SIZE = 1*8
107GPRSAVE_SIZE = 6*8 107GPRSAVE_SIZE = 5*8
108 108
109frame_XFER = 0 109frame_XFER = 0
110frame_SRND = frame_XFER + XFER_SIZE 110frame_SRND = frame_XFER + XFER_SIZE
111frame_INP = frame_SRND + SRND_SIZE 111frame_INP = frame_SRND + SRND_SIZE
112frame_INPEND = frame_INP + INP_SIZE 112frame_INPEND = frame_INP + INP_SIZE
113frame_RSPSAVE = frame_INPEND + INPEND_SIZE 113frame_CTX = frame_INPEND + INPEND_SIZE
114frame_RSPSAVE = frame_CTX + CTX_SIZE
114frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE 115frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE
115frame_size = frame_GPRSAVE + GPRSAVE_SIZE 116frame_size = frame_GPRSAVE + GPRSAVE_SIZE
116 117
@@ -576,12 +577,11 @@ ENTRY(sha512_transform_rorx)
576 mov %rax, frame_RSPSAVE(%rsp) 577 mov %rax, frame_RSPSAVE(%rsp)
577 578
578 # Save GPRs 579 # Save GPRs
579 mov %rbp, frame_GPRSAVE(%rsp) 580 mov %rbx, 8*0+frame_GPRSAVE(%rsp)
580 mov %rbx, 8*1+frame_GPRSAVE(%rsp) 581 mov %r12, 8*1+frame_GPRSAVE(%rsp)
581 mov %r12, 8*2+frame_GPRSAVE(%rsp) 582 mov %r13, 8*2+frame_GPRSAVE(%rsp)
582 mov %r13, 8*3+frame_GPRSAVE(%rsp) 583 mov %r14, 8*3+frame_GPRSAVE(%rsp)
583 mov %r14, 8*4+frame_GPRSAVE(%rsp) 584 mov %r15, 8*4+frame_GPRSAVE(%rsp)
584 mov %r15, 8*5+frame_GPRSAVE(%rsp)
585 585
586 shl $7, NUM_BLKS # convert to bytes 586 shl $7, NUM_BLKS # convert to bytes
587 jz done_hash 587 jz done_hash
@@ -589,14 +589,17 @@ ENTRY(sha512_transform_rorx)
589 mov NUM_BLKS, frame_INPEND(%rsp) 589 mov NUM_BLKS, frame_INPEND(%rsp)
590 590
591 ## load initial digest 591 ## load initial digest
592 mov 8*0(CTX),a 592 mov 8*0(CTX1), a
593 mov 8*1(CTX),b 593 mov 8*1(CTX1), b
594 mov 8*2(CTX),c 594 mov 8*2(CTX1), c
595 mov 8*3(CTX),d 595 mov 8*3(CTX1), d
596 mov 8*4(CTX),e 596 mov 8*4(CTX1), e
597 mov 8*5(CTX),f 597 mov 8*5(CTX1), f
598 mov 8*6(CTX),g 598 mov 8*6(CTX1), g
599 mov 8*7(CTX),h 599 mov 8*7(CTX1), h
600
601 # save %rdi (CTX) before it gets clobbered
602 mov %rdi, frame_CTX(%rsp)
600 603
601 vmovdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK 604 vmovdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK
602 605
@@ -652,14 +655,15 @@ loop2:
652 subq $1, frame_SRND(%rsp) 655 subq $1, frame_SRND(%rsp)
653 jne loop2 656 jne loop2
654 657
655 addm 8*0(CTX),a 658 mov frame_CTX(%rsp), CTX2
656 addm 8*1(CTX),b 659 addm 8*0(CTX2), a
657 addm 8*2(CTX),c 660 addm 8*1(CTX2), b
658 addm 8*3(CTX),d 661 addm 8*2(CTX2), c
659 addm 8*4(CTX),e 662 addm 8*3(CTX2), d
660 addm 8*5(CTX),f 663 addm 8*4(CTX2), e
661 addm 8*6(CTX),g 664 addm 8*5(CTX2), f
662 addm 8*7(CTX),h 665 addm 8*6(CTX2), g
666 addm 8*7(CTX2), h
663 667
664 mov frame_INP(%rsp), INP 668 mov frame_INP(%rsp), INP
665 add $128, INP 669 add $128, INP
@@ -669,12 +673,11 @@ loop2:
669done_hash: 673done_hash:
670 674
671# Restore GPRs 675# Restore GPRs
672 mov frame_GPRSAVE(%rsp) ,%rbp 676 mov 8*0+frame_GPRSAVE(%rsp), %rbx
673 mov 8*1+frame_GPRSAVE(%rsp) ,%rbx 677 mov 8*1+frame_GPRSAVE(%rsp), %r12
674 mov 8*2+frame_GPRSAVE(%rsp) ,%r12 678 mov 8*2+frame_GPRSAVE(%rsp), %r13
675 mov 8*3+frame_GPRSAVE(%rsp) ,%r13 679 mov 8*3+frame_GPRSAVE(%rsp), %r14
676 mov 8*4+frame_GPRSAVE(%rsp) ,%r14 680 mov 8*4+frame_GPRSAVE(%rsp), %r15
677 mov 8*5+frame_GPRSAVE(%rsp) ,%r15
678 681
679 # Restore Stack Pointer 682 # Restore Stack Pointer
680 mov frame_RSPSAVE(%rsp), %rsp 683 mov frame_RSPSAVE(%rsp), %rsp
diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
index b3f49d286348..73b471da3622 100644
--- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
@@ -76,8 +76,8 @@
76#define RT %xmm14 76#define RT %xmm14
77#define RR %xmm15 77#define RR %xmm15
78 78
79#define RID1 %rbp 79#define RID1 %r13
80#define RID1d %ebp 80#define RID1d %r13d
81#define RID2 %rsi 81#define RID2 %rsi
82#define RID2d %esi 82#define RID2d %esi
83 83
@@ -259,7 +259,7 @@ __twofish_enc_blk8:
259 259
260 vmovdqu w(CTX), RK1; 260 vmovdqu w(CTX), RK1;
261 261
262 pushq %rbp; 262 pushq %r13;
263 pushq %rbx; 263 pushq %rbx;
264 pushq %rcx; 264 pushq %rcx;
265 265
@@ -282,7 +282,7 @@ __twofish_enc_blk8:
282 282
283 popq %rcx; 283 popq %rcx;
284 popq %rbx; 284 popq %rbx;
285 popq %rbp; 285 popq %r13;
286 286
287 outunpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); 287 outunpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
288 outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); 288 outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
@@ -301,7 +301,7 @@ __twofish_dec_blk8:
301 301
302 vmovdqu (w+4*4)(CTX), RK1; 302 vmovdqu (w+4*4)(CTX), RK1;
303 303
304 pushq %rbp; 304 pushq %r13;
305 pushq %rbx; 305 pushq %rbx;
306 306
307 inpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); 307 inpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
@@ -322,7 +322,7 @@ __twofish_dec_blk8:
322 vmovdqu (w)(CTX), RK1; 322 vmovdqu (w)(CTX), RK1;
323 323
324 popq %rbx; 324 popq %rbx;
325 popq %rbp; 325 popq %r13;
326 326
327 outunpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2); 327 outunpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
328 outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2); 328 outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index ffa9f4ccd9b4..337cf382718e 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -619,14 +619,14 @@ void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst,
619 struct af_alg_ctx *ctx = ask->private; 619 struct af_alg_ctx *ctx = ask->private;
620 struct af_alg_tsgl *sgl; 620 struct af_alg_tsgl *sgl;
621 struct scatterlist *sg; 621 struct scatterlist *sg;
622 unsigned int i, j; 622 unsigned int i, j = 0;
623 623
624 while (!list_empty(&ctx->tsgl_list)) { 624 while (!list_empty(&ctx->tsgl_list)) {
625 sgl = list_first_entry(&ctx->tsgl_list, struct af_alg_tsgl, 625 sgl = list_first_entry(&ctx->tsgl_list, struct af_alg_tsgl,
626 list); 626 list);
627 sg = sgl->sg; 627 sg = sgl->sg;
628 628
629 for (i = 0, j = 0; i < sgl->cur; i++) { 629 for (i = 0; i < sgl->cur; i++) {
630 size_t plen = min_t(size_t, used, sg[i].length); 630 size_t plen = min_t(size_t, used, sg[i].length);
631 struct page *page = sg_page(sg + i); 631 struct page *page = sg_page(sg + i);
632 632
diff --git a/crypto/drbg.c b/crypto/drbg.c
index 633a88e93ab0..70018397e59a 100644
--- a/crypto/drbg.c
+++ b/crypto/drbg.c
@@ -1133,10 +1133,10 @@ static inline void drbg_dealloc_state(struct drbg_state *drbg)
1133{ 1133{
1134 if (!drbg) 1134 if (!drbg)
1135 return; 1135 return;
1136 kzfree(drbg->V); 1136 kzfree(drbg->Vbuf);
1137 drbg->Vbuf = NULL; 1137 drbg->V = NULL;
1138 kzfree(drbg->C); 1138 kzfree(drbg->Cbuf);
1139 drbg->Cbuf = NULL; 1139 drbg->C = NULL;
1140 kzfree(drbg->scratchpadbuf); 1140 kzfree(drbg->scratchpadbuf);
1141 drbg->scratchpadbuf = NULL; 1141 drbg->scratchpadbuf = NULL;
1142 drbg->reseed_ctr = 0; 1142 drbg->reseed_ctr = 0;
diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig
index e36aeacd7635..1eb852765469 100644
--- a/drivers/crypto/caam/Kconfig
+++ b/drivers/crypto/caam/Kconfig
@@ -1,6 +1,7 @@
1config CRYPTO_DEV_FSL_CAAM 1config CRYPTO_DEV_FSL_CAAM
2 tristate "Freescale CAAM-Multicore driver backend" 2 tristate "Freescale CAAM-Multicore driver backend"
3 depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE 3 depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE
4 select SOC_BUS
4 help 5 help
5 Enables the driver module for Freescale's Cryptographic Accelerator 6 Enables the driver module for Freescale's Cryptographic Accelerator
6 and Assurance Module (CAAM), also known as the SEC version 4 (SEC4). 7 and Assurance Module (CAAM), also known as the SEC version 4 (SEC4).
@@ -141,10 +142,6 @@ config CRYPTO_DEV_FSL_CAAM_RNG_API
141 To compile this as a module, choose M here: the module 142 To compile this as a module, choose M here: the module
142 will be called caamrng. 143 will be called caamrng.
143 144
144config CRYPTO_DEV_FSL_CAAM_IMX
145 def_bool SOC_IMX6 || SOC_IMX7D
146 depends on CRYPTO_DEV_FSL_CAAM
147
148config CRYPTO_DEV_FSL_CAAM_DEBUG 145config CRYPTO_DEV_FSL_CAAM_DEBUG
149 bool "Enable debug output in CAAM driver" 146 bool "Enable debug output in CAAM driver"
150 depends on CRYPTO_DEV_FSL_CAAM 147 depends on CRYPTO_DEV_FSL_CAAM
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index dacb53fb690e..027e121c6f70 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -7,6 +7,7 @@
7#include <linux/device.h> 7#include <linux/device.h>
8#include <linux/of_address.h> 8#include <linux/of_address.h>
9#include <linux/of_irq.h> 9#include <linux/of_irq.h>
10#include <linux/sys_soc.h>
10 11
11#include "compat.h" 12#include "compat.h"
12#include "regs.h" 13#include "regs.h"
@@ -19,6 +20,8 @@ bool caam_little_end;
19EXPORT_SYMBOL(caam_little_end); 20EXPORT_SYMBOL(caam_little_end);
20bool caam_dpaa2; 21bool caam_dpaa2;
21EXPORT_SYMBOL(caam_dpaa2); 22EXPORT_SYMBOL(caam_dpaa2);
23bool caam_imx;
24EXPORT_SYMBOL(caam_imx);
22 25
23#ifdef CONFIG_CAAM_QI 26#ifdef CONFIG_CAAM_QI
24#include "qi.h" 27#include "qi.h"
@@ -28,19 +31,11 @@ EXPORT_SYMBOL(caam_dpaa2);
28 * i.MX targets tend to have clock control subsystems that can 31 * i.MX targets tend to have clock control subsystems that can
29 * enable/disable clocking to our device. 32 * enable/disable clocking to our device.
30 */ 33 */
31#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX
32static inline struct clk *caam_drv_identify_clk(struct device *dev, 34static inline struct clk *caam_drv_identify_clk(struct device *dev,
33 char *clk_name) 35 char *clk_name)
34{ 36{
35 return devm_clk_get(dev, clk_name); 37 return caam_imx ? devm_clk_get(dev, clk_name) : NULL;
36} 38}
37#else
38static inline struct clk *caam_drv_identify_clk(struct device *dev,
39 char *clk_name)
40{
41 return NULL;
42}
43#endif
44 39
45/* 40/*
46 * Descriptor to instantiate RNG State Handle 0 in normal mode and 41 * Descriptor to instantiate RNG State Handle 0 in normal mode and
@@ -430,6 +425,10 @@ static int caam_probe(struct platform_device *pdev)
430{ 425{
431 int ret, ring, gen_sk, ent_delay = RTSDCTL_ENT_DLY_MIN; 426 int ret, ring, gen_sk, ent_delay = RTSDCTL_ENT_DLY_MIN;
432 u64 caam_id; 427 u64 caam_id;
428 static const struct soc_device_attribute imx_soc[] = {
429 {.family = "Freescale i.MX"},
430 {},
431 };
433 struct device *dev; 432 struct device *dev;
434 struct device_node *nprop, *np; 433 struct device_node *nprop, *np;
435 struct caam_ctrl __iomem *ctrl; 434 struct caam_ctrl __iomem *ctrl;
@@ -451,6 +450,8 @@ static int caam_probe(struct platform_device *pdev)
451 dev_set_drvdata(dev, ctrlpriv); 450 dev_set_drvdata(dev, ctrlpriv);
452 nprop = pdev->dev.of_node; 451 nprop = pdev->dev.of_node;
453 452
453 caam_imx = (bool)soc_device_match(imx_soc);
454
454 /* Enable clocking */ 455 /* Enable clocking */
455 clk = caam_drv_identify_clk(&pdev->dev, "ipg"); 456 clk = caam_drv_identify_clk(&pdev->dev, "ipg");
456 if (IS_ERR(clk)) { 457 if (IS_ERR(clk)) {
diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
index 2b5efff9ec3c..17cfd23a38fa 100644
--- a/drivers/crypto/caam/regs.h
+++ b/drivers/crypto/caam/regs.h
@@ -67,6 +67,7 @@
67 */ 67 */
68 68
69extern bool caam_little_end; 69extern bool caam_little_end;
70extern bool caam_imx;
70 71
71#define caam_to_cpu(len) \ 72#define caam_to_cpu(len) \
72static inline u##len caam##len ## _to_cpu(u##len val) \ 73static inline u##len caam##len ## _to_cpu(u##len val) \
@@ -154,13 +155,10 @@ static inline u64 rd_reg64(void __iomem *reg)
154#else /* CONFIG_64BIT */ 155#else /* CONFIG_64BIT */
155static inline void wr_reg64(void __iomem *reg, u64 data) 156static inline void wr_reg64(void __iomem *reg, u64 data)
156{ 157{
157#ifndef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX 158 if (!caam_imx && caam_little_end) {
158 if (caam_little_end) {
159 wr_reg32((u32 __iomem *)(reg) + 1, data >> 32); 159 wr_reg32((u32 __iomem *)(reg) + 1, data >> 32);
160 wr_reg32((u32 __iomem *)(reg), data); 160 wr_reg32((u32 __iomem *)(reg), data);
161 } else 161 } else {
162#endif
163 {
164 wr_reg32((u32 __iomem *)(reg), data >> 32); 162 wr_reg32((u32 __iomem *)(reg), data >> 32);
165 wr_reg32((u32 __iomem *)(reg) + 1, data); 163 wr_reg32((u32 __iomem *)(reg) + 1, data);
166 } 164 }
@@ -168,41 +166,40 @@ static inline void wr_reg64(void __iomem *reg, u64 data)
168 166
169static inline u64 rd_reg64(void __iomem *reg) 167static inline u64 rd_reg64(void __iomem *reg)
170{ 168{
171#ifndef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX 169 if (!caam_imx && caam_little_end)
172 if (caam_little_end)
173 return ((u64)rd_reg32((u32 __iomem *)(reg) + 1) << 32 | 170 return ((u64)rd_reg32((u32 __iomem *)(reg) + 1) << 32 |
174 (u64)rd_reg32((u32 __iomem *)(reg))); 171 (u64)rd_reg32((u32 __iomem *)(reg)));
175 else 172
176#endif 173 return ((u64)rd_reg32((u32 __iomem *)(reg)) << 32 |
177 return ((u64)rd_reg32((u32 __iomem *)(reg)) << 32 | 174 (u64)rd_reg32((u32 __iomem *)(reg) + 1));
178 (u64)rd_reg32((u32 __iomem *)(reg) + 1));
179} 175}
180#endif /* CONFIG_64BIT */ 176#endif /* CONFIG_64BIT */
181 177
178static inline u64 cpu_to_caam_dma64(dma_addr_t value)
179{
180 if (caam_imx)
181 return (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) |
182 (u64)cpu_to_caam32(upper_32_bits(value)));
183
184 return cpu_to_caam64(value);
185}
186
187static inline u64 caam_dma64_to_cpu(u64 value)
188{
189 if (caam_imx)
190 return (((u64)caam32_to_cpu(lower_32_bits(value)) << 32) |
191 (u64)caam32_to_cpu(upper_32_bits(value)));
192
193 return caam64_to_cpu(value);
194}
195
182#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT 196#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
183#ifdef CONFIG_SOC_IMX7D 197#define cpu_to_caam_dma(value) cpu_to_caam_dma64(value)
184#define cpu_to_caam_dma(value) \ 198#define caam_dma_to_cpu(value) caam_dma64_to_cpu(value)
185 (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | \
186 (u64)cpu_to_caam32(upper_32_bits(value)))
187#define caam_dma_to_cpu(value) \
188 (((u64)caam32_to_cpu(lower_32_bits(value)) << 32) | \
189 (u64)caam32_to_cpu(upper_32_bits(value)))
190#else
191#define cpu_to_caam_dma(value) cpu_to_caam64(value)
192#define caam_dma_to_cpu(value) caam64_to_cpu(value)
193#endif /* CONFIG_SOC_IMX7D */
194#else 199#else
195#define cpu_to_caam_dma(value) cpu_to_caam32(value) 200#define cpu_to_caam_dma(value) cpu_to_caam32(value)
196#define caam_dma_to_cpu(value) caam32_to_cpu(value) 201#define caam_dma_to_cpu(value) caam32_to_cpu(value)
197#endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT */ 202#endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT */
198
199#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX
200#define cpu_to_caam_dma64(value) \
201 (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | \
202 (u64)cpu_to_caam32(upper_32_bits(value)))
203#else
204#define cpu_to_caam_dma64(value) cpu_to_caam64(value)
205#endif
206 203
207/* 204/*
208 * jr_outentry 205 * jr_outentry
diff --git a/drivers/crypto/inside-secure/safexcel_cipher.c b/drivers/crypto/inside-secure/safexcel_cipher.c
index d2207ac5ba19..5438552bc6d7 100644
--- a/drivers/crypto/inside-secure/safexcel_cipher.c
+++ b/drivers/crypto/inside-secure/safexcel_cipher.c
@@ -386,7 +386,7 @@ static int safexcel_cipher_exit_inv(struct crypto_tfm *tfm)
386 struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm); 386 struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
387 struct safexcel_crypto_priv *priv = ctx->priv; 387 struct safexcel_crypto_priv *priv = ctx->priv;
388 struct skcipher_request req; 388 struct skcipher_request req;
389 struct safexcel_inv_result result = { 0 }; 389 struct safexcel_inv_result result = {};
390 int ring = ctx->base.ring; 390 int ring = ctx->base.ring;
391 391
392 memset(&req, 0, sizeof(struct skcipher_request)); 392 memset(&req, 0, sizeof(struct skcipher_request));
diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c
index 3f819399cd95..3980f946874f 100644
--- a/drivers/crypto/inside-secure/safexcel_hash.c
+++ b/drivers/crypto/inside-secure/safexcel_hash.c
@@ -419,7 +419,7 @@ static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm)
419 struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm); 419 struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm);
420 struct safexcel_crypto_priv *priv = ctx->priv; 420 struct safexcel_crypto_priv *priv = ctx->priv;
421 struct ahash_request req; 421 struct ahash_request req;
422 struct safexcel_inv_result result = { 0 }; 422 struct safexcel_inv_result result = {};
423 int ring = ctx->base.ring; 423 int ring = ctx->base.ring;
424 424
425 memset(&req, 0, sizeof(struct ahash_request)); 425 memset(&req, 0, sizeof(struct ahash_request));
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 79791c690858..dff88838dce7 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1756,9 +1756,9 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
1756 req_ctx->swinit = 0; 1756 req_ctx->swinit = 0;
1757 } else { 1757 } else {
1758 desc->ptr[1] = zero_entry; 1758 desc->ptr[1] = zero_entry;
1759 /* Indicate next op is not the first. */
1760 req_ctx->first = 0;
1761 } 1759 }
1760 /* Indicate next op is not the first. */
1761 req_ctx->first = 0;
1762 1762
1763 /* HMAC key */ 1763 /* HMAC key */
1764 if (ctx->keylen) 1764 if (ctx->keylen)
@@ -1769,7 +1769,7 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
1769 1769
1770 sg_count = edesc->src_nents ?: 1; 1770 sg_count = edesc->src_nents ?: 1;
1771 if (is_sec1 && sg_count > 1) 1771 if (is_sec1 && sg_count > 1)
1772 sg_copy_to_buffer(areq->src, sg_count, edesc->buf, length); 1772 sg_copy_to_buffer(req_ctx->psrc, sg_count, edesc->buf, length);
1773 else 1773 else
1774 sg_count = dma_map_sg(dev, req_ctx->psrc, sg_count, 1774 sg_count = dma_map_sg(dev, req_ctx->psrc, sg_count,
1775 DMA_TO_DEVICE); 1775 DMA_TO_DEVICE);
@@ -3057,7 +3057,8 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev,
3057 t_alg->algt.alg.hash.final = ahash_final; 3057 t_alg->algt.alg.hash.final = ahash_final;
3058 t_alg->algt.alg.hash.finup = ahash_finup; 3058 t_alg->algt.alg.hash.finup = ahash_finup;
3059 t_alg->algt.alg.hash.digest = ahash_digest; 3059 t_alg->algt.alg.hash.digest = ahash_digest;
3060 t_alg->algt.alg.hash.setkey = ahash_setkey; 3060 if (!strncmp(alg->cra_name, "hmac", 4))
3061 t_alg->algt.alg.hash.setkey = ahash_setkey;
3061 t_alg->algt.alg.hash.import = ahash_import; 3062 t_alg->algt.alg.hash.import = ahash_import;
3062 t_alg->algt.alg.hash.export = ahash_export; 3063 t_alg->algt.alg.hash.export = ahash_export;
3063 3064