aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/Kconfig3
-rw-r--r--arch/arm/crypto/Kconfig130
-rw-r--r--arch/arm/crypto/Makefile27
-rw-r--r--arch/arm/crypto/aes-ce-core.S518
-rw-r--r--arch/arm/crypto/aes-ce-glue.c524
-rw-r--r--arch/arm/crypto/aesbs-glue.c9
-rw-r--r--arch/arm/crypto/ghash-ce-core.S94
-rw-r--r--arch/arm/crypto/ghash-ce-glue.c320
-rw-r--r--arch/arm/crypto/sha1-ce-core.S125
-rw-r--r--arch/arm/crypto/sha1-ce-glue.c96
-rw-r--r--arch/arm/crypto/sha1.h (renamed from arch/arm/include/asm/crypto/sha1.h)3
-rw-r--r--arch/arm/crypto/sha1_glue.c112
-rw-r--r--arch/arm/crypto/sha1_neon_glue.c137
-rw-r--r--arch/arm/crypto/sha2-ce-core.S125
-rw-r--r--arch/arm/crypto/sha2-ce-glue.c114
-rw-r--r--arch/arm/crypto/sha256-armv4.pl716
-rw-r--r--arch/arm/crypto/sha256-core.S_shipped2808
-rw-r--r--arch/arm/crypto/sha256_glue.c128
-rw-r--r--arch/arm/crypto/sha256_glue.h14
-rw-r--r--arch/arm/crypto/sha256_neon_glue.c101
-rw-r--r--arch/arm64/crypto/aes-glue.c12
-rw-r--r--arch/arm64/crypto/sha1-ce-core.S33
-rw-r--r--arch/arm64/crypto/sha1-ce-glue.c151
-rw-r--r--arch/arm64/crypto/sha2-ce-core.S29
-rw-r--r--arch/arm64/crypto/sha2-ce-glue.c227
-rw-r--r--arch/mips/cavium-octeon/crypto/Makefile5
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-crypto.c4
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-crypto.h83
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-md5.c8
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-sha1.c241
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-sha256.c280
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-sha512.c277
-rw-r--r--arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h14
-rw-r--r--arch/powerpc/crypto/Makefile8
-rw-r--r--arch/powerpc/crypto/aes-spe-core.S351
-rw-r--r--arch/powerpc/crypto/aes-spe-glue.c512
-rw-r--r--arch/powerpc/crypto/aes-spe-keys.S283
-rw-r--r--arch/powerpc/crypto/aes-spe-modes.S630
-rw-r--r--arch/powerpc/crypto/aes-spe-regs.h42
-rw-r--r--arch/powerpc/crypto/aes-tab-4k.S331
-rw-r--r--arch/powerpc/crypto/md5-asm.S243
-rw-r--r--arch/powerpc/crypto/md5-glue.c165
-rw-r--r--arch/powerpc/crypto/sha1-spe-asm.S299
-rw-r--r--arch/powerpc/crypto/sha1-spe-glue.c210
-rw-r--r--arch/powerpc/crypto/sha256-spe-asm.S323
-rw-r--r--arch/powerpc/crypto/sha256-spe-glue.c275
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c187
-rw-r--r--arch/x86/crypto/camellia_aesni_avx2_glue.c15
-rw-r--r--arch/x86/crypto/camellia_aesni_avx_glue.c15
-rw-r--r--arch/x86/crypto/cast5_avx_glue.c9
-rw-r--r--arch/x86/crypto/cast6_avx_glue.c15
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_glue.c7
-rw-r--r--arch/x86/crypto/glue_helper.c1
-rw-r--r--arch/x86/crypto/serpent_avx2_glue.c15
-rw-r--r--arch/x86/crypto/serpent_avx_glue.c15
-rw-r--r--arch/x86/crypto/serpent_sse2_glue.c15
-rw-r--r--arch/x86/crypto/sha-mb/sha1_mb.c9
-rw-r--r--arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c2
-rw-r--r--arch/x86/crypto/sha1_ssse3_glue.c139
-rw-r--r--arch/x86/crypto/sha256-avx-asm.S10
-rw-r--r--arch/x86/crypto/sha256-avx2-asm.S10
-rw-r--r--arch/x86/crypto/sha256-ssse3-asm.S10
-rw-r--r--arch/x86/crypto/sha256_ssse3_glue.c193
-rw-r--r--arch/x86/crypto/sha512-avx-asm.S6
-rw-r--r--arch/x86/crypto/sha512-avx2-asm.S6
-rw-r--r--arch/x86/crypto/sha512-ssse3-asm.S6
-rw-r--r--arch/x86/crypto/sha512_ssse3_glue.c202
-rw-r--r--arch/x86/crypto/twofish_avx_glue.c15
68 files changed, 10913 insertions, 1129 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index da1266c53c13..7cbf4ef5c6fd 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -2175,6 +2175,9 @@ source "arch/arm/Kconfig.debug"
2175source "security/Kconfig" 2175source "security/Kconfig"
2176 2176
2177source "crypto/Kconfig" 2177source "crypto/Kconfig"
2178if CRYPTO
2179source "arch/arm/crypto/Kconfig"
2180endif
2178 2181
2179source "lib/Kconfig" 2182source "lib/Kconfig"
2180 2183
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
new file mode 100644
index 000000000000..8da2207b0072
--- /dev/null
+++ b/arch/arm/crypto/Kconfig
@@ -0,0 +1,130 @@
1
2menuconfig ARM_CRYPTO
3 bool "ARM Accelerated Cryptographic Algorithms"
4 depends on ARM
5 help
6 Say Y here to choose from a selection of cryptographic algorithms
7 implemented using ARM specific CPU features or instructions.
8
9if ARM_CRYPTO
10
11config CRYPTO_SHA1_ARM
12 tristate "SHA1 digest algorithm (ARM-asm)"
13 select CRYPTO_SHA1
14 select CRYPTO_HASH
15 help
16 SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
17 using optimized ARM assembler.
18
19config CRYPTO_SHA1_ARM_NEON
20 tristate "SHA1 digest algorithm (ARM NEON)"
21 depends on KERNEL_MODE_NEON
22 select CRYPTO_SHA1_ARM
23 select CRYPTO_SHA1
24 select CRYPTO_HASH
25 help
26 SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
27 using optimized ARM NEON assembly, when NEON instructions are
28 available.
29
30config CRYPTO_SHA1_ARM_CE
31 tristate "SHA1 digest algorithm (ARM v8 Crypto Extensions)"
32 depends on KERNEL_MODE_NEON
33 select CRYPTO_SHA1_ARM
34 select CRYPTO_HASH
35 help
36 SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
37 using special ARMv8 Crypto Extensions.
38
39config CRYPTO_SHA2_ARM_CE
40 tristate "SHA-224/256 digest algorithm (ARM v8 Crypto Extensions)"
41 depends on KERNEL_MODE_NEON
42 select CRYPTO_SHA256_ARM
43 select CRYPTO_HASH
44 help
45 SHA-256 secure hash standard (DFIPS 180-2) implemented
46 using special ARMv8 Crypto Extensions.
47
48config CRYPTO_SHA256_ARM
49 tristate "SHA-224/256 digest algorithm (ARM-asm and NEON)"
50 select CRYPTO_HASH
51 depends on !CPU_V7M
52 help
53 SHA-256 secure hash standard (DFIPS 180-2) implemented
54 using optimized ARM assembler and NEON, when available.
55
56config CRYPTO_SHA512_ARM_NEON
57 tristate "SHA384 and SHA512 digest algorithm (ARM NEON)"
58 depends on KERNEL_MODE_NEON
59 select CRYPTO_SHA512
60 select CRYPTO_HASH
61 help
62 SHA-512 secure hash standard (DFIPS 180-2) implemented
63 using ARM NEON instructions, when available.
64
65 This version of SHA implements a 512 bit hash with 256 bits of
66 security against collision attacks.
67
68 This code also includes SHA-384, a 384 bit hash with 192 bits
69 of security against collision attacks.
70
71config CRYPTO_AES_ARM
72 tristate "AES cipher algorithms (ARM-asm)"
73 depends on ARM
74 select CRYPTO_ALGAPI
75 select CRYPTO_AES
76 help
77 Use optimized AES assembler routines for ARM platforms.
78
79 AES cipher algorithms (FIPS-197). AES uses the Rijndael
80 algorithm.
81
82 Rijndael appears to be consistently a very good performer in
83 both hardware and software across a wide range of computing
84 environments regardless of its use in feedback or non-feedback
85 modes. Its key setup time is excellent, and its key agility is
86 good. Rijndael's very low memory requirements make it very well
87 suited for restricted-space environments, in which it also
88 demonstrates excellent performance. Rijndael's operations are
89 among the easiest to defend against power and timing attacks.
90
91 The AES specifies three key sizes: 128, 192 and 256 bits
92
93 See <http://csrc.nist.gov/encryption/aes/> for more information.
94
95config CRYPTO_AES_ARM_BS
96 tristate "Bit sliced AES using NEON instructions"
97 depends on KERNEL_MODE_NEON
98 select CRYPTO_ALGAPI
99 select CRYPTO_AES_ARM
100 select CRYPTO_ABLK_HELPER
101 help
102 Use a faster and more secure NEON based implementation of AES in CBC,
103 CTR and XTS modes
104
105 Bit sliced AES gives around 45% speedup on Cortex-A15 for CTR mode
106 and for XTS mode encryption, CBC and XTS mode decryption speedup is
107 around 25%. (CBC encryption speed is not affected by this driver.)
108 This implementation does not rely on any lookup tables so it is
109 believed to be invulnerable to cache timing attacks.
110
111config CRYPTO_AES_ARM_CE
112 tristate "Accelerated AES using ARMv8 Crypto Extensions"
113 depends on KERNEL_MODE_NEON
114 select CRYPTO_ALGAPI
115 select CRYPTO_ABLK_HELPER
116 help
117 Use an implementation of AES in CBC, CTR and XTS modes that uses
118 ARMv8 Crypto Extensions
119
120config CRYPTO_GHASH_ARM_CE
121 tristate "PMULL-accelerated GHASH using ARMv8 Crypto Extensions"
122 depends on KERNEL_MODE_NEON
123 select CRYPTO_HASH
124 select CRYPTO_CRYPTD
125 help
126 Use an implementation of GHASH (used by the GCM AEAD chaining mode)
127 that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64)
128 that is part of the ARMv8 Crypto Extensions
129
130endif
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index b48fa341648d..6ea828241fcb 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -6,13 +6,35 @@ obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o
6obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o 6obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o
7obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o 7obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
8obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o 8obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
9obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
9obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o 10obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o
10 11
12ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
13ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
14ce-obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o
15ce-obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o
16
17ifneq ($(ce-obj-y)$(ce-obj-m),)
18ifeq ($(call as-instr,.fpu crypto-neon-fp-armv8,y,n),y)
19obj-y += $(ce-obj-y)
20obj-m += $(ce-obj-m)
21else
22$(warning These ARMv8 Crypto Extensions modules need binutils 2.23 or higher)
23$(warning $(ce-obj-y) $(ce-obj-m))
24endif
25endif
26
11aes-arm-y := aes-armv4.o aes_glue.o 27aes-arm-y := aes-armv4.o aes_glue.o
12aes-arm-bs-y := aesbs-core.o aesbs-glue.o 28aes-arm-bs-y := aesbs-core.o aesbs-glue.o
13sha1-arm-y := sha1-armv4-large.o sha1_glue.o 29sha1-arm-y := sha1-armv4-large.o sha1_glue.o
14sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o 30sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o
31sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o
32sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y)
15sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o 33sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o
34sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o
35sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o
36aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o
37ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
16 38
17quiet_cmd_perl = PERL $@ 39quiet_cmd_perl = PERL $@
18 cmd_perl = $(PERL) $(<) > $(@) 40 cmd_perl = $(PERL) $(<) > $(@)
@@ -20,4 +42,7 @@ quiet_cmd_perl = PERL $@
20$(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl 42$(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl
21 $(call cmd,perl) 43 $(call cmd,perl)
22 44
23.PRECIOUS: $(obj)/aesbs-core.S 45$(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl
46 $(call cmd,perl)
47
48.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S
diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S
new file mode 100644
index 000000000000..8cfa468ee570
--- /dev/null
+++ b/arch/arm/crypto/aes-ce-core.S
@@ -0,0 +1,518 @@
1/*
2 * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions
3 *
4 * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/linkage.h>
12#include <asm/assembler.h>
13
14 .text
15 .fpu crypto-neon-fp-armv8
16 .align 3
17
18 .macro enc_round, state, key
19 aese.8 \state, \key
20 aesmc.8 \state, \state
21 .endm
22
23 .macro dec_round, state, key
24 aesd.8 \state, \key
25 aesimc.8 \state, \state
26 .endm
27
28 .macro enc_dround, key1, key2
29 enc_round q0, \key1
30 enc_round q0, \key2
31 .endm
32
33 .macro dec_dround, key1, key2
34 dec_round q0, \key1
35 dec_round q0, \key2
36 .endm
37
38 .macro enc_fround, key1, key2, key3
39 enc_round q0, \key1
40 aese.8 q0, \key2
41 veor q0, q0, \key3
42 .endm
43
44 .macro dec_fround, key1, key2, key3
45 dec_round q0, \key1
46 aesd.8 q0, \key2
47 veor q0, q0, \key3
48 .endm
49
50 .macro enc_dround_3x, key1, key2
51 enc_round q0, \key1
52 enc_round q1, \key1
53 enc_round q2, \key1
54 enc_round q0, \key2
55 enc_round q1, \key2
56 enc_round q2, \key2
57 .endm
58
59 .macro dec_dround_3x, key1, key2
60 dec_round q0, \key1
61 dec_round q1, \key1
62 dec_round q2, \key1
63 dec_round q0, \key2
64 dec_round q1, \key2
65 dec_round q2, \key2
66 .endm
67
68 .macro enc_fround_3x, key1, key2, key3
69 enc_round q0, \key1
70 enc_round q1, \key1
71 enc_round q2, \key1
72 aese.8 q0, \key2
73 aese.8 q1, \key2
74 aese.8 q2, \key2
75 veor q0, q0, \key3
76 veor q1, q1, \key3
77 veor q2, q2, \key3
78 .endm
79
80 .macro dec_fround_3x, key1, key2, key3
81 dec_round q0, \key1
82 dec_round q1, \key1
83 dec_round q2, \key1
84 aesd.8 q0, \key2
85 aesd.8 q1, \key2
86 aesd.8 q2, \key2
87 veor q0, q0, \key3
88 veor q1, q1, \key3
89 veor q2, q2, \key3
90 .endm
91
92 .macro do_block, dround, fround
93 cmp r3, #12 @ which key size?
94 vld1.8 {q10-q11}, [ip]!
95 \dround q8, q9
96 vld1.8 {q12-q13}, [ip]!
97 \dround q10, q11
98 vld1.8 {q10-q11}, [ip]!
99 \dround q12, q13
100 vld1.8 {q12-q13}, [ip]!
101 \dround q10, q11
102 blo 0f @ AES-128: 10 rounds
103 vld1.8 {q10-q11}, [ip]!
104 beq 1f @ AES-192: 12 rounds
105 \dround q12, q13
106 vld1.8 {q12-q13}, [ip]
107 \dround q10, q11
1080: \fround q12, q13, q14
109 bx lr
110
1111: \dround q12, q13
112 \fround q10, q11, q14
113 bx lr
114 .endm
115
116 /*
117 * Internal, non-AAPCS compliant functions that implement the core AES
118 * transforms. These should preserve all registers except q0 - q2 and ip
119 * Arguments:
120 * q0 : first in/output block
121 * q1 : second in/output block (_3x version only)
122 * q2 : third in/output block (_3x version only)
123 * q8 : first round key
124 * q9 : secound round key
125 * ip : address of 3rd round key
126 * q14 : final round key
127 * r3 : number of rounds
128 */
129 .align 6
130aes_encrypt:
131 add ip, r2, #32 @ 3rd round key
132.Laes_encrypt_tweak:
133 do_block enc_dround, enc_fround
134ENDPROC(aes_encrypt)
135
136 .align 6
137aes_decrypt:
138 add ip, r2, #32 @ 3rd round key
139 do_block dec_dround, dec_fround
140ENDPROC(aes_decrypt)
141
142 .align 6
143aes_encrypt_3x:
144 add ip, r2, #32 @ 3rd round key
145 do_block enc_dround_3x, enc_fround_3x
146ENDPROC(aes_encrypt_3x)
147
148 .align 6
149aes_decrypt_3x:
150 add ip, r2, #32 @ 3rd round key
151 do_block dec_dround_3x, dec_fround_3x
152ENDPROC(aes_decrypt_3x)
153
154 .macro prepare_key, rk, rounds
155 add ip, \rk, \rounds, lsl #4
156 vld1.8 {q8-q9}, [\rk] @ load first 2 round keys
157 vld1.8 {q14}, [ip] @ load last round key
158 .endm
159
160 /*
161 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
162 * int blocks)
163 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
164 * int blocks)
165 */
166ENTRY(ce_aes_ecb_encrypt)
167 push {r4, lr}
168 ldr r4, [sp, #8]
169 prepare_key r2, r3
170.Lecbencloop3x:
171 subs r4, r4, #3
172 bmi .Lecbenc1x
173 vld1.8 {q0-q1}, [r1, :64]!
174 vld1.8 {q2}, [r1, :64]!
175 bl aes_encrypt_3x
176 vst1.8 {q0-q1}, [r0, :64]!
177 vst1.8 {q2}, [r0, :64]!
178 b .Lecbencloop3x
179.Lecbenc1x:
180 adds r4, r4, #3
181 beq .Lecbencout
182.Lecbencloop:
183 vld1.8 {q0}, [r1, :64]!
184 bl aes_encrypt
185 vst1.8 {q0}, [r0, :64]!
186 subs r4, r4, #1
187 bne .Lecbencloop
188.Lecbencout:
189 pop {r4, pc}
190ENDPROC(ce_aes_ecb_encrypt)
191
192ENTRY(ce_aes_ecb_decrypt)
193 push {r4, lr}
194 ldr r4, [sp, #8]
195 prepare_key r2, r3
196.Lecbdecloop3x:
197 subs r4, r4, #3
198 bmi .Lecbdec1x
199 vld1.8 {q0-q1}, [r1, :64]!
200 vld1.8 {q2}, [r1, :64]!
201 bl aes_decrypt_3x
202 vst1.8 {q0-q1}, [r0, :64]!
203 vst1.8 {q2}, [r0, :64]!
204 b .Lecbdecloop3x
205.Lecbdec1x:
206 adds r4, r4, #3
207 beq .Lecbdecout
208.Lecbdecloop:
209 vld1.8 {q0}, [r1, :64]!
210 bl aes_decrypt
211 vst1.8 {q0}, [r0, :64]!
212 subs r4, r4, #1
213 bne .Lecbdecloop
214.Lecbdecout:
215 pop {r4, pc}
216ENDPROC(ce_aes_ecb_decrypt)
217
218 /*
219 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
220 * int blocks, u8 iv[])
221 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
222 * int blocks, u8 iv[])
223 */
224ENTRY(ce_aes_cbc_encrypt)
225 push {r4-r6, lr}
226 ldrd r4, r5, [sp, #16]
227 vld1.8 {q0}, [r5]
228 prepare_key r2, r3
229.Lcbcencloop:
230 vld1.8 {q1}, [r1, :64]! @ get next pt block
231 veor q0, q0, q1 @ ..and xor with iv
232 bl aes_encrypt
233 vst1.8 {q0}, [r0, :64]!
234 subs r4, r4, #1
235 bne .Lcbcencloop
236 vst1.8 {q0}, [r5]
237 pop {r4-r6, pc}
238ENDPROC(ce_aes_cbc_encrypt)
239
240ENTRY(ce_aes_cbc_decrypt)
241 push {r4-r6, lr}
242 ldrd r4, r5, [sp, #16]
243 vld1.8 {q6}, [r5] @ keep iv in q6
244 prepare_key r2, r3
245.Lcbcdecloop3x:
246 subs r4, r4, #3
247 bmi .Lcbcdec1x
248 vld1.8 {q0-q1}, [r1, :64]!
249 vld1.8 {q2}, [r1, :64]!
250 vmov q3, q0
251 vmov q4, q1
252 vmov q5, q2
253 bl aes_decrypt_3x
254 veor q0, q0, q6
255 veor q1, q1, q3
256 veor q2, q2, q4
257 vmov q6, q5
258 vst1.8 {q0-q1}, [r0, :64]!
259 vst1.8 {q2}, [r0, :64]!
260 b .Lcbcdecloop3x
261.Lcbcdec1x:
262 adds r4, r4, #3
263 beq .Lcbcdecout
264 vmov q15, q14 @ preserve last round key
265.Lcbcdecloop:
266 vld1.8 {q0}, [r1, :64]! @ get next ct block
267 veor q14, q15, q6 @ combine prev ct with last key
268 vmov q6, q0
269 bl aes_decrypt
270 vst1.8 {q0}, [r0, :64]!
271 subs r4, r4, #1
272 bne .Lcbcdecloop
273.Lcbcdecout:
274 vst1.8 {q6}, [r5] @ keep iv in q6
275 pop {r4-r6, pc}
276ENDPROC(ce_aes_cbc_decrypt)
277
278 /*
279 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
280 * int blocks, u8 ctr[])
281 */
282ENTRY(ce_aes_ctr_encrypt)
283 push {r4-r6, lr}
284 ldrd r4, r5, [sp, #16]
285 vld1.8 {q6}, [r5] @ load ctr
286 prepare_key r2, r3
287 vmov r6, s27 @ keep swabbed ctr in r6
288 rev r6, r6
289 cmn r6, r4 @ 32 bit overflow?
290 bcs .Lctrloop
291.Lctrloop3x:
292 subs r4, r4, #3
293 bmi .Lctr1x
294 add r6, r6, #1
295 vmov q0, q6
296 vmov q1, q6
297 rev ip, r6
298 add r6, r6, #1
299 vmov q2, q6
300 vmov s7, ip
301 rev ip, r6
302 add r6, r6, #1
303 vmov s11, ip
304 vld1.8 {q3-q4}, [r1, :64]!
305 vld1.8 {q5}, [r1, :64]!
306 bl aes_encrypt_3x
307 veor q0, q0, q3
308 veor q1, q1, q4
309 veor q2, q2, q5
310 rev ip, r6
311 vst1.8 {q0-q1}, [r0, :64]!
312 vst1.8 {q2}, [r0, :64]!
313 vmov s27, ip
314 b .Lctrloop3x
315.Lctr1x:
316 adds r4, r4, #3
317 beq .Lctrout
318.Lctrloop:
319 vmov q0, q6
320 bl aes_encrypt
321 subs r4, r4, #1
322 bmi .Lctrhalfblock @ blocks < 0 means 1/2 block
323 vld1.8 {q3}, [r1, :64]!
324 veor q3, q0, q3
325 vst1.8 {q3}, [r0, :64]!
326
327 adds r6, r6, #1 @ increment BE ctr
328 rev ip, r6
329 vmov s27, ip
330 bcs .Lctrcarry
331 teq r4, #0
332 bne .Lctrloop
333.Lctrout:
334 vst1.8 {q6}, [r5]
335 pop {r4-r6, pc}
336
337.Lctrhalfblock:
338 vld1.8 {d1}, [r1, :64]
339 veor d0, d0, d1
340 vst1.8 {d0}, [r0, :64]
341 pop {r4-r6, pc}
342
343.Lctrcarry:
344 .irp sreg, s26, s25, s24
345 vmov ip, \sreg @ load next word of ctr
346 rev ip, ip @ ... to handle the carry
347 adds ip, ip, #1
348 rev ip, ip
349 vmov \sreg, ip
350 bcc 0f
351 .endr
3520: teq r4, #0
353 beq .Lctrout
354 b .Lctrloop
355ENDPROC(ce_aes_ctr_encrypt)
356
357 /*
358 * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
359 * int blocks, u8 iv[], u8 const rk2[], int first)
360 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
361 * int blocks, u8 iv[], u8 const rk2[], int first)
362 */
363
364 .macro next_tweak, out, in, const, tmp
365 vshr.s64 \tmp, \in, #63
366 vand \tmp, \tmp, \const
367 vadd.u64 \out, \in, \in
368 vext.8 \tmp, \tmp, \tmp, #8
369 veor \out, \out, \tmp
370 .endm
371
372 .align 3
373.Lxts_mul_x:
374 .quad 1, 0x87
375
376ce_aes_xts_init:
377 vldr d14, .Lxts_mul_x
378 vldr d15, .Lxts_mul_x + 8
379
380 ldrd r4, r5, [sp, #16] @ load args
381 ldr r6, [sp, #28]
382 vld1.8 {q0}, [r5] @ load iv
383 teq r6, #1 @ start of a block?
384 bxne lr
385
386 @ Encrypt the IV in q0 with the second AES key. This should only
387 @ be done at the start of a block.
388 ldr r6, [sp, #24] @ load AES key 2
389 prepare_key r6, r3
390 add ip, r6, #32 @ 3rd round key of key 2
391 b .Laes_encrypt_tweak @ tail call
392ENDPROC(ce_aes_xts_init)
393
394ENTRY(ce_aes_xts_encrypt)
395 push {r4-r6, lr}
396
397 bl ce_aes_xts_init @ run shared prologue
398 prepare_key r2, r3
399 vmov q3, q0
400
401 teq r6, #0 @ start of a block?
402 bne .Lxtsenc3x
403
404.Lxtsencloop3x:
405 next_tweak q3, q3, q7, q6
406.Lxtsenc3x:
407 subs r4, r4, #3
408 bmi .Lxtsenc1x
409 vld1.8 {q0-q1}, [r1, :64]! @ get 3 pt blocks
410 vld1.8 {q2}, [r1, :64]!
411 next_tweak q4, q3, q7, q6
412 veor q0, q0, q3
413 next_tweak q5, q4, q7, q6
414 veor q1, q1, q4
415 veor q2, q2, q5
416 bl aes_encrypt_3x
417 veor q0, q0, q3
418 veor q1, q1, q4
419 veor q2, q2, q5
420 vst1.8 {q0-q1}, [r0, :64]! @ write 3 ct blocks
421 vst1.8 {q2}, [r0, :64]!
422 vmov q3, q5
423 teq r4, #0
424 beq .Lxtsencout
425 b .Lxtsencloop3x
426.Lxtsenc1x:
427 adds r4, r4, #3
428 beq .Lxtsencout
429.Lxtsencloop:
430 vld1.8 {q0}, [r1, :64]!
431 veor q0, q0, q3
432 bl aes_encrypt
433 veor q0, q0, q3
434 vst1.8 {q0}, [r0, :64]!
435 subs r4, r4, #1
436 beq .Lxtsencout
437 next_tweak q3, q3, q7, q6
438 b .Lxtsencloop
439.Lxtsencout:
440 vst1.8 {q3}, [r5]
441 pop {r4-r6, pc}
442ENDPROC(ce_aes_xts_encrypt)
443
444
445ENTRY(ce_aes_xts_decrypt)
446 push {r4-r6, lr}
447
448 bl ce_aes_xts_init @ run shared prologue
449 prepare_key r2, r3
450 vmov q3, q0
451
452 teq r6, #0 @ start of a block?
453 bne .Lxtsdec3x
454
455.Lxtsdecloop3x:
456 next_tweak q3, q3, q7, q6
457.Lxtsdec3x:
458 subs r4, r4, #3
459 bmi .Lxtsdec1x
460 vld1.8 {q0-q1}, [r1, :64]! @ get 3 ct blocks
461 vld1.8 {q2}, [r1, :64]!
462 next_tweak q4, q3, q7, q6
463 veor q0, q0, q3
464 next_tweak q5, q4, q7, q6
465 veor q1, q1, q4
466 veor q2, q2, q5
467 bl aes_decrypt_3x
468 veor q0, q0, q3
469 veor q1, q1, q4
470 veor q2, q2, q5
471 vst1.8 {q0-q1}, [r0, :64]! @ write 3 pt blocks
472 vst1.8 {q2}, [r0, :64]!
473 vmov q3, q5
474 teq r4, #0
475 beq .Lxtsdecout
476 b .Lxtsdecloop3x
477.Lxtsdec1x:
478 adds r4, r4, #3
479 beq .Lxtsdecout
480.Lxtsdecloop:
481 vld1.8 {q0}, [r1, :64]!
482 veor q0, q0, q3
483 add ip, r2, #32 @ 3rd round key
484 bl aes_decrypt
485 veor q0, q0, q3
486 vst1.8 {q0}, [r0, :64]!
487 subs r4, r4, #1
488 beq .Lxtsdecout
489 next_tweak q3, q3, q7, q6
490 b .Lxtsdecloop
491.Lxtsdecout:
492 vst1.8 {q3}, [r5]
493 pop {r4-r6, pc}
494ENDPROC(ce_aes_xts_decrypt)
495
496 /*
497 * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the
498 * AES sbox substitution on each byte in
499 * 'input'
500 */
501ENTRY(ce_aes_sub)
502 vdup.32 q1, r0
503 veor q0, q0, q0
504 aese.8 q0, q1
505 vmov r0, s0
506 bx lr
507ENDPROC(ce_aes_sub)
508
509 /*
510 * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns
511 * operation on round key *src
512 */
513ENTRY(ce_aes_invert)
514 vld1.8 {q0}, [r1]
515 aesimc.8 q0, q0
516 vst1.8 {q0}, [r0]
517 bx lr
518ENDPROC(ce_aes_invert)
diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c
new file mode 100644
index 000000000000..b445a5d56f43
--- /dev/null
+++ b/arch/arm/crypto/aes-ce-glue.c
@@ -0,0 +1,524 @@
1/*
2 * aes-ce-glue.c - wrapper code for ARMv8 AES
3 *
4 * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <asm/hwcap.h>
12#include <asm/neon.h>
13#include <asm/hwcap.h>
14#include <crypto/aes.h>
15#include <crypto/ablk_helper.h>
16#include <crypto/algapi.h>
17#include <linux/module.h>
18
19MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
20MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
21MODULE_LICENSE("GPL v2");
22
23/* defined in aes-ce-core.S */
24asmlinkage u32 ce_aes_sub(u32 input);
25asmlinkage void ce_aes_invert(void *dst, void *src);
26
27asmlinkage void ce_aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
28 int rounds, int blocks);
29asmlinkage void ce_aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
30 int rounds, int blocks);
31
32asmlinkage void ce_aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[],
33 int rounds, int blocks, u8 iv[]);
34asmlinkage void ce_aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
35 int rounds, int blocks, u8 iv[]);
36
37asmlinkage void ce_aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
38 int rounds, int blocks, u8 ctr[]);
39
40asmlinkage void ce_aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[],
41 int rounds, int blocks, u8 iv[],
42 u8 const rk2[], int first);
43asmlinkage void ce_aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[],
44 int rounds, int blocks, u8 iv[],
45 u8 const rk2[], int first);
46
47struct aes_block {
48 u8 b[AES_BLOCK_SIZE];
49};
50
51static int num_rounds(struct crypto_aes_ctx *ctx)
52{
53 /*
54 * # of rounds specified by AES:
55 * 128 bit key 10 rounds
56 * 192 bit key 12 rounds
57 * 256 bit key 14 rounds
58 * => n byte key => 6 + (n/4) rounds
59 */
60 return 6 + ctx->key_length / 4;
61}
62
63static int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
64 unsigned int key_len)
65{
66 /*
67 * The AES key schedule round constants
68 */
69 static u8 const rcon[] = {
70 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36,
71 };
72
73 u32 kwords = key_len / sizeof(u32);
74 struct aes_block *key_enc, *key_dec;
75 int i, j;
76
77 if (key_len != AES_KEYSIZE_128 &&
78 key_len != AES_KEYSIZE_192 &&
79 key_len != AES_KEYSIZE_256)
80 return -EINVAL;
81
82 memcpy(ctx->key_enc, in_key, key_len);
83 ctx->key_length = key_len;
84
85 kernel_neon_begin();
86 for (i = 0; i < sizeof(rcon); i++) {
87 u32 *rki = ctx->key_enc + (i * kwords);
88 u32 *rko = rki + kwords;
89
90 rko[0] = ror32(ce_aes_sub(rki[kwords - 1]), 8);
91 rko[0] = rko[0] ^ rki[0] ^ rcon[i];
92 rko[1] = rko[0] ^ rki[1];
93 rko[2] = rko[1] ^ rki[2];
94 rko[3] = rko[2] ^ rki[3];
95
96 if (key_len == AES_KEYSIZE_192) {
97 if (i >= 7)
98 break;
99 rko[4] = rko[3] ^ rki[4];
100 rko[5] = rko[4] ^ rki[5];
101 } else if (key_len == AES_KEYSIZE_256) {
102 if (i >= 6)
103 break;
104 rko[4] = ce_aes_sub(rko[3]) ^ rki[4];
105 rko[5] = rko[4] ^ rki[5];
106 rko[6] = rko[5] ^ rki[6];
107 rko[7] = rko[6] ^ rki[7];
108 }
109 }
110
111 /*
112 * Generate the decryption keys for the Equivalent Inverse Cipher.
113 * This involves reversing the order of the round keys, and applying
114 * the Inverse Mix Columns transformation on all but the first and
115 * the last one.
116 */
117 key_enc = (struct aes_block *)ctx->key_enc;
118 key_dec = (struct aes_block *)ctx->key_dec;
119 j = num_rounds(ctx);
120
121 key_dec[0] = key_enc[j];
122 for (i = 1, j--; j > 0; i++, j--)
123 ce_aes_invert(key_dec + i, key_enc + j);
124 key_dec[i] = key_enc[0];
125
126 kernel_neon_end();
127 return 0;
128}
129
130static int ce_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
131 unsigned int key_len)
132{
133 struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
134 int ret;
135
136 ret = ce_aes_expandkey(ctx, in_key, key_len);
137 if (!ret)
138 return 0;
139
140 tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
141 return -EINVAL;
142}
143
144struct crypto_aes_xts_ctx {
145 struct crypto_aes_ctx key1;
146 struct crypto_aes_ctx __aligned(8) key2;
147};
148
149static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
150 unsigned int key_len)
151{
152 struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
153 int ret;
154
155 ret = ce_aes_expandkey(&ctx->key1, in_key, key_len / 2);
156 if (!ret)
157 ret = ce_aes_expandkey(&ctx->key2, &in_key[key_len / 2],
158 key_len / 2);
159 if (!ret)
160 return 0;
161
162 tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
163 return -EINVAL;
164}
165
166static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
167 struct scatterlist *src, unsigned int nbytes)
168{
169 struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
170 struct blkcipher_walk walk;
171 unsigned int blocks;
172 int err;
173
174 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
175 blkcipher_walk_init(&walk, dst, src, nbytes);
176 err = blkcipher_walk_virt(desc, &walk);
177
178 kernel_neon_begin();
179 while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
180 ce_aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
181 (u8 *)ctx->key_enc, num_rounds(ctx), blocks);
182 err = blkcipher_walk_done(desc, &walk,
183 walk.nbytes % AES_BLOCK_SIZE);
184 }
185 kernel_neon_end();
186 return err;
187}
188
189static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
190 struct scatterlist *src, unsigned int nbytes)
191{
192 struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
193 struct blkcipher_walk walk;
194 unsigned int blocks;
195 int err;
196
197 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
198 blkcipher_walk_init(&walk, dst, src, nbytes);
199 err = blkcipher_walk_virt(desc, &walk);
200
201 kernel_neon_begin();
202 while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
203 ce_aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
204 (u8 *)ctx->key_dec, num_rounds(ctx), blocks);
205 err = blkcipher_walk_done(desc, &walk,
206 walk.nbytes % AES_BLOCK_SIZE);
207 }
208 kernel_neon_end();
209 return err;
210}
211
212static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
213 struct scatterlist *src, unsigned int nbytes)
214{
215 struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
216 struct blkcipher_walk walk;
217 unsigned int blocks;
218 int err;
219
220 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
221 blkcipher_walk_init(&walk, dst, src, nbytes);
222 err = blkcipher_walk_virt(desc, &walk);
223
224 kernel_neon_begin();
225 while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
226 ce_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
227 (u8 *)ctx->key_enc, num_rounds(ctx), blocks,
228 walk.iv);
229 err = blkcipher_walk_done(desc, &walk,
230 walk.nbytes % AES_BLOCK_SIZE);
231 }
232 kernel_neon_end();
233 return err;
234}
235
236static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
237 struct scatterlist *src, unsigned int nbytes)
238{
239 struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
240 struct blkcipher_walk walk;
241 unsigned int blocks;
242 int err;
243
244 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
245 blkcipher_walk_init(&walk, dst, src, nbytes);
246 err = blkcipher_walk_virt(desc, &walk);
247
248 kernel_neon_begin();
249 while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
250 ce_aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
251 (u8 *)ctx->key_dec, num_rounds(ctx), blocks,
252 walk.iv);
253 err = blkcipher_walk_done(desc, &walk,
254 walk.nbytes % AES_BLOCK_SIZE);
255 }
256 kernel_neon_end();
257 return err;
258}
259
260static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
261 struct scatterlist *src, unsigned int nbytes)
262{
263 struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
264 struct blkcipher_walk walk;
265 int err, blocks;
266
267 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
268 blkcipher_walk_init(&walk, dst, src, nbytes);
269 err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
270
271 kernel_neon_begin();
272 while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
273 ce_aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
274 (u8 *)ctx->key_enc, num_rounds(ctx), blocks,
275 walk.iv);
276 nbytes -= blocks * AES_BLOCK_SIZE;
277 if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE)
278 break;
279 err = blkcipher_walk_done(desc, &walk,
280 walk.nbytes % AES_BLOCK_SIZE);
281 }
282 if (nbytes) {
283 u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
284 u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
285 u8 __aligned(8) tail[AES_BLOCK_SIZE];
286
287 /*
288 * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need
289 * to tell aes_ctr_encrypt() to only read half a block.
290 */
291 blocks = (nbytes <= 8) ? -1 : 1;
292
293 ce_aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc,
294 num_rounds(ctx), blocks, walk.iv);
295 memcpy(tdst, tail, nbytes);
296 err = blkcipher_walk_done(desc, &walk, 0);
297 }
298 kernel_neon_end();
299
300 return err;
301}
302
303static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
304 struct scatterlist *src, unsigned int nbytes)
305{
306 struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
307 int err, first, rounds = num_rounds(&ctx->key1);
308 struct blkcipher_walk walk;
309 unsigned int blocks;
310
311 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
312 blkcipher_walk_init(&walk, dst, src, nbytes);
313 err = blkcipher_walk_virt(desc, &walk);
314
315 kernel_neon_begin();
316 for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
317 ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
318 (u8 *)ctx->key1.key_enc, rounds, blocks,
319 walk.iv, (u8 *)ctx->key2.key_enc, first);
320 err = blkcipher_walk_done(desc, &walk,
321 walk.nbytes % AES_BLOCK_SIZE);
322 }
323 kernel_neon_end();
324
325 return err;
326}
327
328static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
329 struct scatterlist *src, unsigned int nbytes)
330{
331 struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
332 int err, first, rounds = num_rounds(&ctx->key1);
333 struct blkcipher_walk walk;
334 unsigned int blocks;
335
336 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
337 blkcipher_walk_init(&walk, dst, src, nbytes);
338 err = blkcipher_walk_virt(desc, &walk);
339
340 kernel_neon_begin();
341 for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
342 ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
343 (u8 *)ctx->key1.key_dec, rounds, blocks,
344 walk.iv, (u8 *)ctx->key2.key_enc, first);
345 err = blkcipher_walk_done(desc, &walk,
346 walk.nbytes % AES_BLOCK_SIZE);
347 }
348 kernel_neon_end();
349
350 return err;
351}
352
353static struct crypto_alg aes_algs[] = { {
354 .cra_name = "__ecb-aes-ce",
355 .cra_driver_name = "__driver-ecb-aes-ce",
356 .cra_priority = 0,
357 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
358 CRYPTO_ALG_INTERNAL,
359 .cra_blocksize = AES_BLOCK_SIZE,
360 .cra_ctxsize = sizeof(struct crypto_aes_ctx),
361 .cra_alignmask = 7,
362 .cra_type = &crypto_blkcipher_type,
363 .cra_module = THIS_MODULE,
364 .cra_blkcipher = {
365 .min_keysize = AES_MIN_KEY_SIZE,
366 .max_keysize = AES_MAX_KEY_SIZE,
367 .ivsize = AES_BLOCK_SIZE,
368 .setkey = ce_aes_setkey,
369 .encrypt = ecb_encrypt,
370 .decrypt = ecb_decrypt,
371 },
372}, {
373 .cra_name = "__cbc-aes-ce",
374 .cra_driver_name = "__driver-cbc-aes-ce",
375 .cra_priority = 0,
376 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
377 CRYPTO_ALG_INTERNAL,
378 .cra_blocksize = AES_BLOCK_SIZE,
379 .cra_ctxsize = sizeof(struct crypto_aes_ctx),
380 .cra_alignmask = 7,
381 .cra_type = &crypto_blkcipher_type,
382 .cra_module = THIS_MODULE,
383 .cra_blkcipher = {
384 .min_keysize = AES_MIN_KEY_SIZE,
385 .max_keysize = AES_MAX_KEY_SIZE,
386 .ivsize = AES_BLOCK_SIZE,
387 .setkey = ce_aes_setkey,
388 .encrypt = cbc_encrypt,
389 .decrypt = cbc_decrypt,
390 },
391}, {
392 .cra_name = "__ctr-aes-ce",
393 .cra_driver_name = "__driver-ctr-aes-ce",
394 .cra_priority = 0,
395 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
396 CRYPTO_ALG_INTERNAL,
397 .cra_blocksize = 1,
398 .cra_ctxsize = sizeof(struct crypto_aes_ctx),
399 .cra_alignmask = 7,
400 .cra_type = &crypto_blkcipher_type,
401 .cra_module = THIS_MODULE,
402 .cra_blkcipher = {
403 .min_keysize = AES_MIN_KEY_SIZE,
404 .max_keysize = AES_MAX_KEY_SIZE,
405 .ivsize = AES_BLOCK_SIZE,
406 .setkey = ce_aes_setkey,
407 .encrypt = ctr_encrypt,
408 .decrypt = ctr_encrypt,
409 },
410}, {
411 .cra_name = "__xts-aes-ce",
412 .cra_driver_name = "__driver-xts-aes-ce",
413 .cra_priority = 0,
414 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
415 CRYPTO_ALG_INTERNAL,
416 .cra_blocksize = AES_BLOCK_SIZE,
417 .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx),
418 .cra_alignmask = 7,
419 .cra_type = &crypto_blkcipher_type,
420 .cra_module = THIS_MODULE,
421 .cra_blkcipher = {
422 .min_keysize = 2 * AES_MIN_KEY_SIZE,
423 .max_keysize = 2 * AES_MAX_KEY_SIZE,
424 .ivsize = AES_BLOCK_SIZE,
425 .setkey = xts_set_key,
426 .encrypt = xts_encrypt,
427 .decrypt = xts_decrypt,
428 },
429}, {
430 .cra_name = "ecb(aes)",
431 .cra_driver_name = "ecb-aes-ce",
432 .cra_priority = 300,
433 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
434 .cra_blocksize = AES_BLOCK_SIZE,
435 .cra_ctxsize = sizeof(struct async_helper_ctx),
436 .cra_alignmask = 7,
437 .cra_type = &crypto_ablkcipher_type,
438 .cra_module = THIS_MODULE,
439 .cra_init = ablk_init,
440 .cra_exit = ablk_exit,
441 .cra_ablkcipher = {
442 .min_keysize = AES_MIN_KEY_SIZE,
443 .max_keysize = AES_MAX_KEY_SIZE,
444 .ivsize = AES_BLOCK_SIZE,
445 .setkey = ablk_set_key,
446 .encrypt = ablk_encrypt,
447 .decrypt = ablk_decrypt,
448 }
449}, {
450 .cra_name = "cbc(aes)",
451 .cra_driver_name = "cbc-aes-ce",
452 .cra_priority = 300,
453 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
454 .cra_blocksize = AES_BLOCK_SIZE,
455 .cra_ctxsize = sizeof(struct async_helper_ctx),
456 .cra_alignmask = 7,
457 .cra_type = &crypto_ablkcipher_type,
458 .cra_module = THIS_MODULE,
459 .cra_init = ablk_init,
460 .cra_exit = ablk_exit,
461 .cra_ablkcipher = {
462 .min_keysize = AES_MIN_KEY_SIZE,
463 .max_keysize = AES_MAX_KEY_SIZE,
464 .ivsize = AES_BLOCK_SIZE,
465 .setkey = ablk_set_key,
466 .encrypt = ablk_encrypt,
467 .decrypt = ablk_decrypt,
468 }
469}, {
470 .cra_name = "ctr(aes)",
471 .cra_driver_name = "ctr-aes-ce",
472 .cra_priority = 300,
473 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
474 .cra_blocksize = 1,
475 .cra_ctxsize = sizeof(struct async_helper_ctx),
476 .cra_alignmask = 7,
477 .cra_type = &crypto_ablkcipher_type,
478 .cra_module = THIS_MODULE,
479 .cra_init = ablk_init,
480 .cra_exit = ablk_exit,
481 .cra_ablkcipher = {
482 .min_keysize = AES_MIN_KEY_SIZE,
483 .max_keysize = AES_MAX_KEY_SIZE,
484 .ivsize = AES_BLOCK_SIZE,
485 .setkey = ablk_set_key,
486 .encrypt = ablk_encrypt,
487 .decrypt = ablk_decrypt,
488 }
489}, {
490 .cra_name = "xts(aes)",
491 .cra_driver_name = "xts-aes-ce",
492 .cra_priority = 300,
493 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
494 .cra_blocksize = AES_BLOCK_SIZE,
495 .cra_ctxsize = sizeof(struct async_helper_ctx),
496 .cra_alignmask = 7,
497 .cra_type = &crypto_ablkcipher_type,
498 .cra_module = THIS_MODULE,
499 .cra_init = ablk_init,
500 .cra_exit = ablk_exit,
501 .cra_ablkcipher = {
502 .min_keysize = 2 * AES_MIN_KEY_SIZE,
503 .max_keysize = 2 * AES_MAX_KEY_SIZE,
504 .ivsize = AES_BLOCK_SIZE,
505 .setkey = ablk_set_key,
506 .encrypt = ablk_encrypt,
507 .decrypt = ablk_decrypt,
508 }
509} };
510
511static int __init aes_init(void)
512{
513 if (!(elf_hwcap2 & HWCAP2_AES))
514 return -ENODEV;
515 return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
516}
517
518static void __exit aes_exit(void)
519{
520 crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
521}
522
523module_init(aes_init);
524module_exit(aes_exit);
diff --git a/arch/arm/crypto/aesbs-glue.c b/arch/arm/crypto/aesbs-glue.c
index 15468fbbdea3..6d685298690e 100644
--- a/arch/arm/crypto/aesbs-glue.c
+++ b/arch/arm/crypto/aesbs-glue.c
@@ -301,7 +301,8 @@ static struct crypto_alg aesbs_algs[] = { {
301 .cra_name = "__cbc-aes-neonbs", 301 .cra_name = "__cbc-aes-neonbs",
302 .cra_driver_name = "__driver-cbc-aes-neonbs", 302 .cra_driver_name = "__driver-cbc-aes-neonbs",
303 .cra_priority = 0, 303 .cra_priority = 0,
304 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 304 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
305 CRYPTO_ALG_INTERNAL,
305 .cra_blocksize = AES_BLOCK_SIZE, 306 .cra_blocksize = AES_BLOCK_SIZE,
306 .cra_ctxsize = sizeof(struct aesbs_cbc_ctx), 307 .cra_ctxsize = sizeof(struct aesbs_cbc_ctx),
307 .cra_alignmask = 7, 308 .cra_alignmask = 7,
@@ -319,7 +320,8 @@ static struct crypto_alg aesbs_algs[] = { {
319 .cra_name = "__ctr-aes-neonbs", 320 .cra_name = "__ctr-aes-neonbs",
320 .cra_driver_name = "__driver-ctr-aes-neonbs", 321 .cra_driver_name = "__driver-ctr-aes-neonbs",
321 .cra_priority = 0, 322 .cra_priority = 0,
322 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 323 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
324 CRYPTO_ALG_INTERNAL,
323 .cra_blocksize = 1, 325 .cra_blocksize = 1,
324 .cra_ctxsize = sizeof(struct aesbs_ctr_ctx), 326 .cra_ctxsize = sizeof(struct aesbs_ctr_ctx),
325 .cra_alignmask = 7, 327 .cra_alignmask = 7,
@@ -337,7 +339,8 @@ static struct crypto_alg aesbs_algs[] = { {
337 .cra_name = "__xts-aes-neonbs", 339 .cra_name = "__xts-aes-neonbs",
338 .cra_driver_name = "__driver-xts-aes-neonbs", 340 .cra_driver_name = "__driver-xts-aes-neonbs",
339 .cra_priority = 0, 341 .cra_priority = 0,
340 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 342 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
343 CRYPTO_ALG_INTERNAL,
341 .cra_blocksize = AES_BLOCK_SIZE, 344 .cra_blocksize = AES_BLOCK_SIZE,
342 .cra_ctxsize = sizeof(struct aesbs_xts_ctx), 345 .cra_ctxsize = sizeof(struct aesbs_xts_ctx),
343 .cra_alignmask = 7, 346 .cra_alignmask = 7,
diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S
new file mode 100644
index 000000000000..f6ab8bcc9efe
--- /dev/null
+++ b/arch/arm/crypto/ghash-ce-core.S
@@ -0,0 +1,94 @@
1/*
2 * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions.
3 *
4 * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation.
9 */
10
11#include <linux/linkage.h>
12#include <asm/assembler.h>
13
14 SHASH .req q0
15 SHASH2 .req q1
16 T1 .req q2
17 T2 .req q3
18 MASK .req q4
19 XL .req q5
20 XM .req q6
21 XH .req q7
22 IN1 .req q7
23
24 SHASH_L .req d0
25 SHASH_H .req d1
26 SHASH2_L .req d2
27 T1_L .req d4
28 MASK_L .req d8
29 XL_L .req d10
30 XL_H .req d11
31 XM_L .req d12
32 XM_H .req d13
33 XH_L .req d14
34
35 .text
36 .fpu crypto-neon-fp-armv8
37
38 /*
39 * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
40 * struct ghash_key const *k, const char *head)
41 */
42ENTRY(pmull_ghash_update)
43 vld1.64 {SHASH}, [r3]
44 vld1.64 {XL}, [r1]
45 vmov.i8 MASK, #0xe1
46 vext.8 SHASH2, SHASH, SHASH, #8
47 vshl.u64 MASK, MASK, #57
48 veor SHASH2, SHASH2, SHASH
49
50 /* do the head block first, if supplied */
51 ldr ip, [sp]
52 teq ip, #0
53 beq 0f
54 vld1.64 {T1}, [ip]
55 teq r0, #0
56 b 1f
57
580: vld1.64 {T1}, [r2]!
59 subs r0, r0, #1
60
611: /* multiply XL by SHASH in GF(2^128) */
62#ifndef CONFIG_CPU_BIG_ENDIAN
63 vrev64.8 T1, T1
64#endif
65 vext.8 T2, XL, XL, #8
66 vext.8 IN1, T1, T1, #8
67 veor T1, T1, T2
68 veor XL, XL, IN1
69
70 vmull.p64 XH, SHASH_H, XL_H @ a1 * b1
71 veor T1, T1, XL
72 vmull.p64 XL, SHASH_L, XL_L @ a0 * b0
73 vmull.p64 XM, SHASH2_L, T1_L @ (a1 + a0)(b1 + b0)
74
75 vext.8 T1, XL, XH, #8
76 veor T2, XL, XH
77 veor XM, XM, T1
78 veor XM, XM, T2
79 vmull.p64 T2, XL_L, MASK_L
80
81 vmov XH_L, XM_H
82 vmov XM_H, XL_L
83
84 veor XL, XM, T2
85 vext.8 T2, XL, XL, #8
86 vmull.p64 XL, XL_L, MASK_L
87 veor T2, T2, XH
88 veor XL, XL, T2
89
90 bne 0b
91
92 vst1.64 {XL}, [r1]
93 bx lr
94ENDPROC(pmull_ghash_update)
diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c
new file mode 100644
index 000000000000..03a39fe29246
--- /dev/null
+++ b/arch/arm/crypto/ghash-ce-glue.c
@@ -0,0 +1,320 @@
1/*
2 * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions.
3 *
4 * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation.
9 */
10
11#include <asm/hwcap.h>
12#include <asm/neon.h>
13#include <asm/simd.h>
14#include <asm/unaligned.h>
15#include <crypto/cryptd.h>
16#include <crypto/internal/hash.h>
17#include <crypto/gf128mul.h>
18#include <linux/crypto.h>
19#include <linux/module.h>
20
21MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
22MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
23MODULE_LICENSE("GPL v2");
24
25#define GHASH_BLOCK_SIZE 16
26#define GHASH_DIGEST_SIZE 16
27
28struct ghash_key {
29 u64 a;
30 u64 b;
31};
32
33struct ghash_desc_ctx {
34 u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)];
35 u8 buf[GHASH_BLOCK_SIZE];
36 u32 count;
37};
38
39struct ghash_async_ctx {
40 struct cryptd_ahash *cryptd_tfm;
41};
42
43asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src,
44 struct ghash_key const *k, const char *head);
45
46static int ghash_init(struct shash_desc *desc)
47{
48 struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
49
50 *ctx = (struct ghash_desc_ctx){};
51 return 0;
52}
53
54static int ghash_update(struct shash_desc *desc, const u8 *src,
55 unsigned int len)
56{
57 struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
58 unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
59
60 ctx->count += len;
61
62 if ((partial + len) >= GHASH_BLOCK_SIZE) {
63 struct ghash_key *key = crypto_shash_ctx(desc->tfm);
64 int blocks;
65
66 if (partial) {
67 int p = GHASH_BLOCK_SIZE - partial;
68
69 memcpy(ctx->buf + partial, src, p);
70 src += p;
71 len -= p;
72 }
73
74 blocks = len / GHASH_BLOCK_SIZE;
75 len %= GHASH_BLOCK_SIZE;
76
77 kernel_neon_begin();
78 pmull_ghash_update(blocks, ctx->digest, src, key,
79 partial ? ctx->buf : NULL);
80 kernel_neon_end();
81 src += blocks * GHASH_BLOCK_SIZE;
82 partial = 0;
83 }
84 if (len)
85 memcpy(ctx->buf + partial, src, len);
86 return 0;
87}
88
89static int ghash_final(struct shash_desc *desc, u8 *dst)
90{
91 struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
92 unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
93
94 if (partial) {
95 struct ghash_key *key = crypto_shash_ctx(desc->tfm);
96
97 memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
98 kernel_neon_begin();
99 pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL);
100 kernel_neon_end();
101 }
102 put_unaligned_be64(ctx->digest[1], dst);
103 put_unaligned_be64(ctx->digest[0], dst + 8);
104
105 *ctx = (struct ghash_desc_ctx){};
106 return 0;
107}
108
109static int ghash_setkey(struct crypto_shash *tfm,
110 const u8 *inkey, unsigned int keylen)
111{
112 struct ghash_key *key = crypto_shash_ctx(tfm);
113 u64 a, b;
114
115 if (keylen != GHASH_BLOCK_SIZE) {
116 crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
117 return -EINVAL;
118 }
119
120 /* perform multiplication by 'x' in GF(2^128) */
121 b = get_unaligned_be64(inkey);
122 a = get_unaligned_be64(inkey + 8);
123
124 key->a = (a << 1) | (b >> 63);
125 key->b = (b << 1) | (a >> 63);
126
127 if (b >> 63)
128 key->b ^= 0xc200000000000000UL;
129
130 return 0;
131}
132
133static struct shash_alg ghash_alg = {
134 .digestsize = GHASH_DIGEST_SIZE,
135 .init = ghash_init,
136 .update = ghash_update,
137 .final = ghash_final,
138 .setkey = ghash_setkey,
139 .descsize = sizeof(struct ghash_desc_ctx),
140 .base = {
141 .cra_name = "ghash",
142 .cra_driver_name = "__driver-ghash-ce",
143 .cra_priority = 0,
144 .cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_INTERNAL,
145 .cra_blocksize = GHASH_BLOCK_SIZE,
146 .cra_ctxsize = sizeof(struct ghash_key),
147 .cra_module = THIS_MODULE,
148 },
149};
150
151static int ghash_async_init(struct ahash_request *req)
152{
153 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
154 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
155 struct ahash_request *cryptd_req = ahash_request_ctx(req);
156 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
157
158 if (!may_use_simd()) {
159 memcpy(cryptd_req, req, sizeof(*req));
160 ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
161 return crypto_ahash_init(cryptd_req);
162 } else {
163 struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
164 struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
165
166 desc->tfm = child;
167 desc->flags = req->base.flags;
168 return crypto_shash_init(desc);
169 }
170}
171
172static int ghash_async_update(struct ahash_request *req)
173{
174 struct ahash_request *cryptd_req = ahash_request_ctx(req);
175
176 if (!may_use_simd()) {
177 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
178 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
179 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
180
181 memcpy(cryptd_req, req, sizeof(*req));
182 ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
183 return crypto_ahash_update(cryptd_req);
184 } else {
185 struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
186 return shash_ahash_update(req, desc);
187 }
188}
189
190static int ghash_async_final(struct ahash_request *req)
191{
192 struct ahash_request *cryptd_req = ahash_request_ctx(req);
193
194 if (!may_use_simd()) {
195 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
196 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
197 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
198
199 memcpy(cryptd_req, req, sizeof(*req));
200 ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
201 return crypto_ahash_final(cryptd_req);
202 } else {
203 struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
204 return crypto_shash_final(desc, req->result);
205 }
206}
207
208static int ghash_async_digest(struct ahash_request *req)
209{
210 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
211 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
212 struct ahash_request *cryptd_req = ahash_request_ctx(req);
213 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
214
215 if (!may_use_simd()) {
216 memcpy(cryptd_req, req, sizeof(*req));
217 ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
218 return crypto_ahash_digest(cryptd_req);
219 } else {
220 struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
221 struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
222
223 desc->tfm = child;
224 desc->flags = req->base.flags;
225 return shash_ahash_digest(req, desc);
226 }
227}
228
229static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key,
230 unsigned int keylen)
231{
232 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
233 struct crypto_ahash *child = &ctx->cryptd_tfm->base;
234 int err;
235
236 crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
237 crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm)
238 & CRYPTO_TFM_REQ_MASK);
239 err = crypto_ahash_setkey(child, key, keylen);
240 crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child)
241 & CRYPTO_TFM_RES_MASK);
242
243 return err;
244}
245
246static int ghash_async_init_tfm(struct crypto_tfm *tfm)
247{
248 struct cryptd_ahash *cryptd_tfm;
249 struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
250
251 cryptd_tfm = cryptd_alloc_ahash("__driver-ghash-ce",
252 CRYPTO_ALG_INTERNAL,
253 CRYPTO_ALG_INTERNAL);
254 if (IS_ERR(cryptd_tfm))
255 return PTR_ERR(cryptd_tfm);
256 ctx->cryptd_tfm = cryptd_tfm;
257 crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
258 sizeof(struct ahash_request) +
259 crypto_ahash_reqsize(&cryptd_tfm->base));
260
261 return 0;
262}
263
264static void ghash_async_exit_tfm(struct crypto_tfm *tfm)
265{
266 struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
267
268 cryptd_free_ahash(ctx->cryptd_tfm);
269}
270
271static struct ahash_alg ghash_async_alg = {
272 .init = ghash_async_init,
273 .update = ghash_async_update,
274 .final = ghash_async_final,
275 .setkey = ghash_async_setkey,
276 .digest = ghash_async_digest,
277 .halg.digestsize = GHASH_DIGEST_SIZE,
278 .halg.base = {
279 .cra_name = "ghash",
280 .cra_driver_name = "ghash-ce",
281 .cra_priority = 300,
282 .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
283 .cra_blocksize = GHASH_BLOCK_SIZE,
284 .cra_type = &crypto_ahash_type,
285 .cra_ctxsize = sizeof(struct ghash_async_ctx),
286 .cra_module = THIS_MODULE,
287 .cra_init = ghash_async_init_tfm,
288 .cra_exit = ghash_async_exit_tfm,
289 },
290};
291
292static int __init ghash_ce_mod_init(void)
293{
294 int err;
295
296 if (!(elf_hwcap2 & HWCAP2_PMULL))
297 return -ENODEV;
298
299 err = crypto_register_shash(&ghash_alg);
300 if (err)
301 return err;
302 err = crypto_register_ahash(&ghash_async_alg);
303 if (err)
304 goto err_shash;
305
306 return 0;
307
308err_shash:
309 crypto_unregister_shash(&ghash_alg);
310 return err;
311}
312
313static void __exit ghash_ce_mod_exit(void)
314{
315 crypto_unregister_ahash(&ghash_async_alg);
316 crypto_unregister_shash(&ghash_alg);
317}
318
319module_init(ghash_ce_mod_init);
320module_exit(ghash_ce_mod_exit);
diff --git a/arch/arm/crypto/sha1-ce-core.S b/arch/arm/crypto/sha1-ce-core.S
new file mode 100644
index 000000000000..b623f51ccbcf
--- /dev/null
+++ b/arch/arm/crypto/sha1-ce-core.S
@@ -0,0 +1,125 @@
1/*
2 * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
3 *
4 * Copyright (C) 2015 Linaro Ltd.
5 * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/linkage.h>
13#include <asm/assembler.h>
14
15 .text
16 .fpu crypto-neon-fp-armv8
17
18 k0 .req q0
19 k1 .req q1
20 k2 .req q2
21 k3 .req q3
22
23 ta0 .req q4
24 ta1 .req q5
25 tb0 .req q5
26 tb1 .req q4
27
28 dga .req q6
29 dgb .req q7
30 dgbs .req s28
31
32 dg0 .req q12
33 dg1a0 .req q13
34 dg1a1 .req q14
35 dg1b0 .req q14
36 dg1b1 .req q13
37
38 .macro add_only, op, ev, rc, s0, dg1
39 .ifnb \s0
40 vadd.u32 tb\ev, q\s0, \rc
41 .endif
42 sha1h.32 dg1b\ev, dg0
43 .ifb \dg1
44 sha1\op\().32 dg0, dg1a\ev, ta\ev
45 .else
46 sha1\op\().32 dg0, \dg1, ta\ev
47 .endif
48 .endm
49
50 .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1
51 sha1su0.32 q\s0, q\s1, q\s2
52 add_only \op, \ev, \rc, \s1, \dg1
53 sha1su1.32 q\s0, q\s3
54 .endm
55
56 .align 6
57.Lsha1_rcon:
58 .word 0x5a827999, 0x5a827999, 0x5a827999, 0x5a827999
59 .word 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1
60 .word 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc
61 .word 0xca62c1d6, 0xca62c1d6, 0xca62c1d6, 0xca62c1d6
62
63 /*
64 * void sha1_ce_transform(struct sha1_state *sst, u8 const *src,
65 * int blocks);
66 */
67ENTRY(sha1_ce_transform)
68 /* load round constants */
69 adr ip, .Lsha1_rcon
70 vld1.32 {k0-k1}, [ip, :128]!
71 vld1.32 {k2-k3}, [ip, :128]
72
73 /* load state */
74 vld1.32 {dga}, [r0]
75 vldr dgbs, [r0, #16]
76
77 /* load input */
780: vld1.32 {q8-q9}, [r1]!
79 vld1.32 {q10-q11}, [r1]!
80 subs r2, r2, #1
81
82#ifndef CONFIG_CPU_BIG_ENDIAN
83 vrev32.8 q8, q8
84 vrev32.8 q9, q9
85 vrev32.8 q10, q10
86 vrev32.8 q11, q11
87#endif
88
89 vadd.u32 ta0, q8, k0
90 vmov dg0, dga
91
92 add_update c, 0, k0, 8, 9, 10, 11, dgb
93 add_update c, 1, k0, 9, 10, 11, 8
94 add_update c, 0, k0, 10, 11, 8, 9
95 add_update c, 1, k0, 11, 8, 9, 10
96 add_update c, 0, k1, 8, 9, 10, 11
97
98 add_update p, 1, k1, 9, 10, 11, 8
99 add_update p, 0, k1, 10, 11, 8, 9
100 add_update p, 1, k1, 11, 8, 9, 10
101 add_update p, 0, k1, 8, 9, 10, 11
102 add_update p, 1, k2, 9, 10, 11, 8
103
104 add_update m, 0, k2, 10, 11, 8, 9
105 add_update m, 1, k2, 11, 8, 9, 10
106 add_update m, 0, k2, 8, 9, 10, 11
107 add_update m, 1, k2, 9, 10, 11, 8
108 add_update m, 0, k3, 10, 11, 8, 9
109
110 add_update p, 1, k3, 11, 8, 9, 10
111 add_only p, 0, k3, 9
112 add_only p, 1, k3, 10
113 add_only p, 0, k3, 11
114 add_only p, 1
115
116 /* update state */
117 vadd.u32 dga, dga, dg0
118 vadd.u32 dgb, dgb, dg1a0
119 bne 0b
120
121 /* store new state */
122 vst1.32 {dga}, [r0]
123 vstr dgbs, [r0, #16]
124 bx lr
125ENDPROC(sha1_ce_transform)
diff --git a/arch/arm/crypto/sha1-ce-glue.c b/arch/arm/crypto/sha1-ce-glue.c
new file mode 100644
index 000000000000..80bc2fcd241a
--- /dev/null
+++ b/arch/arm/crypto/sha1-ce-glue.c
@@ -0,0 +1,96 @@
1/*
2 * sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions
3 *
4 * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <crypto/internal/hash.h>
12#include <crypto/sha.h>
13#include <crypto/sha1_base.h>
14#include <linux/crypto.h>
15#include <linux/module.h>
16
17#include <asm/hwcap.h>
18#include <asm/neon.h>
19#include <asm/simd.h>
20
21#include "sha1.h"
22
23MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
24MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
25MODULE_LICENSE("GPL v2");
26
27asmlinkage void sha1_ce_transform(struct sha1_state *sst, u8 const *src,
28 int blocks);
29
30static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
31 unsigned int len)
32{
33 struct sha1_state *sctx = shash_desc_ctx(desc);
34
35 if (!may_use_simd() ||
36 (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
37 return sha1_update_arm(desc, data, len);
38
39 kernel_neon_begin();
40 sha1_base_do_update(desc, data, len, sha1_ce_transform);
41 kernel_neon_end();
42
43 return 0;
44}
45
46static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
47 unsigned int len, u8 *out)
48{
49 if (!may_use_simd())
50 return sha1_finup_arm(desc, data, len, out);
51
52 kernel_neon_begin();
53 if (len)
54 sha1_base_do_update(desc, data, len, sha1_ce_transform);
55 sha1_base_do_finalize(desc, sha1_ce_transform);
56 kernel_neon_end();
57
58 return sha1_base_finish(desc, out);
59}
60
61static int sha1_ce_final(struct shash_desc *desc, u8 *out)
62{
63 return sha1_ce_finup(desc, NULL, 0, out);
64}
65
66static struct shash_alg alg = {
67 .init = sha1_base_init,
68 .update = sha1_ce_update,
69 .final = sha1_ce_final,
70 .finup = sha1_ce_finup,
71 .descsize = sizeof(struct sha1_state),
72 .digestsize = SHA1_DIGEST_SIZE,
73 .base = {
74 .cra_name = "sha1",
75 .cra_driver_name = "sha1-ce",
76 .cra_priority = 200,
77 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
78 .cra_blocksize = SHA1_BLOCK_SIZE,
79 .cra_module = THIS_MODULE,
80 }
81};
82
83static int __init sha1_ce_mod_init(void)
84{
85 if (!(elf_hwcap2 & HWCAP2_SHA1))
86 return -ENODEV;
87 return crypto_register_shash(&alg);
88}
89
90static void __exit sha1_ce_mod_fini(void)
91{
92 crypto_unregister_shash(&alg);
93}
94
95module_init(sha1_ce_mod_init);
96module_exit(sha1_ce_mod_fini);
diff --git a/arch/arm/include/asm/crypto/sha1.h b/arch/arm/crypto/sha1.h
index 75e6a417416b..ffd8bd08b1a7 100644
--- a/arch/arm/include/asm/crypto/sha1.h
+++ b/arch/arm/crypto/sha1.h
@@ -7,4 +7,7 @@
7extern int sha1_update_arm(struct shash_desc *desc, const u8 *data, 7extern int sha1_update_arm(struct shash_desc *desc, const u8 *data,
8 unsigned int len); 8 unsigned int len);
9 9
10extern int sha1_finup_arm(struct shash_desc *desc, const u8 *data,
11 unsigned int len, u8 *out);
12
10#endif 13#endif
diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c
index e31b0440c613..6fc73bf8766d 100644
--- a/arch/arm/crypto/sha1_glue.c
+++ b/arch/arm/crypto/sha1_glue.c
@@ -22,127 +22,47 @@
22#include <linux/cryptohash.h> 22#include <linux/cryptohash.h>
23#include <linux/types.h> 23#include <linux/types.h>
24#include <crypto/sha.h> 24#include <crypto/sha.h>
25#include <crypto/sha1_base.h>
25#include <asm/byteorder.h> 26#include <asm/byteorder.h>
26#include <asm/crypto/sha1.h>
27 27
28#include "sha1.h"
28 29
29asmlinkage void sha1_block_data_order(u32 *digest, 30asmlinkage void sha1_block_data_order(u32 *digest,
30 const unsigned char *data, unsigned int rounds); 31 const unsigned char *data, unsigned int rounds);
31 32
32
33static int sha1_init(struct shash_desc *desc)
34{
35 struct sha1_state *sctx = shash_desc_ctx(desc);
36
37 *sctx = (struct sha1_state){
38 .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
39 };
40
41 return 0;
42}
43
44
45static int __sha1_update(struct sha1_state *sctx, const u8 *data,
46 unsigned int len, unsigned int partial)
47{
48 unsigned int done = 0;
49
50 sctx->count += len;
51
52 if (partial) {
53 done = SHA1_BLOCK_SIZE - partial;
54 memcpy(sctx->buffer + partial, data, done);
55 sha1_block_data_order(sctx->state, sctx->buffer, 1);
56 }
57
58 if (len - done >= SHA1_BLOCK_SIZE) {
59 const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
60 sha1_block_data_order(sctx->state, data + done, rounds);
61 done += rounds * SHA1_BLOCK_SIZE;
62 }
63
64 memcpy(sctx->buffer, data + done, len - done);
65 return 0;
66}
67
68
69int sha1_update_arm(struct shash_desc *desc, const u8 *data, 33int sha1_update_arm(struct shash_desc *desc, const u8 *data,
70 unsigned int len) 34 unsigned int len)
71{ 35{
72 struct sha1_state *sctx = shash_desc_ctx(desc); 36 /* make sure casting to sha1_block_fn() is safe */
73 unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; 37 BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0);
74 int res;
75 38
76 /* Handle the fast case right here */ 39 return sha1_base_do_update(desc, data, len,
77 if (partial + len < SHA1_BLOCK_SIZE) { 40 (sha1_block_fn *)sha1_block_data_order);
78 sctx->count += len;
79 memcpy(sctx->buffer + partial, data, len);
80 return 0;
81 }
82 res = __sha1_update(sctx, data, len, partial);
83 return res;
84} 41}
85EXPORT_SYMBOL_GPL(sha1_update_arm); 42EXPORT_SYMBOL_GPL(sha1_update_arm);
86 43
87
88/* Add padding and return the message digest. */
89static int sha1_final(struct shash_desc *desc, u8 *out) 44static int sha1_final(struct shash_desc *desc, u8 *out)
90{ 45{
91 struct sha1_state *sctx = shash_desc_ctx(desc); 46 sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_block_data_order);
92 unsigned int i, index, padlen; 47 return sha1_base_finish(desc, out);
93 __be32 *dst = (__be32 *)out;
94 __be64 bits;
95 static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
96
97 bits = cpu_to_be64(sctx->count << 3);
98
99 /* Pad out to 56 mod 64 and append length */
100 index = sctx->count % SHA1_BLOCK_SIZE;
101 padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
102 /* We need to fill a whole block for __sha1_update() */
103 if (padlen <= 56) {
104 sctx->count += padlen;
105 memcpy(sctx->buffer + index, padding, padlen);
106 } else {
107 __sha1_update(sctx, padding, padlen, index);
108 }
109 __sha1_update(sctx, (const u8 *)&bits, sizeof(bits), 56);
110
111 /* Store state in digest */
112 for (i = 0; i < 5; i++)
113 dst[i] = cpu_to_be32(sctx->state[i]);
114
115 /* Wipe context */
116 memset(sctx, 0, sizeof(*sctx));
117 return 0;
118} 48}
119 49
120 50int sha1_finup_arm(struct shash_desc *desc, const u8 *data,
121static int sha1_export(struct shash_desc *desc, void *out) 51 unsigned int len, u8 *out)
122{ 52{
123 struct sha1_state *sctx = shash_desc_ctx(desc); 53 sha1_base_do_update(desc, data, len,
124 memcpy(out, sctx, sizeof(*sctx)); 54 (sha1_block_fn *)sha1_block_data_order);
125 return 0; 55 return sha1_final(desc, out);
126} 56}
127 57EXPORT_SYMBOL_GPL(sha1_finup_arm);
128
129static int sha1_import(struct shash_desc *desc, const void *in)
130{
131 struct sha1_state *sctx = shash_desc_ctx(desc);
132 memcpy(sctx, in, sizeof(*sctx));
133 return 0;
134}
135
136 58
137static struct shash_alg alg = { 59static struct shash_alg alg = {
138 .digestsize = SHA1_DIGEST_SIZE, 60 .digestsize = SHA1_DIGEST_SIZE,
139 .init = sha1_init, 61 .init = sha1_base_init,
140 .update = sha1_update_arm, 62 .update = sha1_update_arm,
141 .final = sha1_final, 63 .final = sha1_final,
142 .export = sha1_export, 64 .finup = sha1_finup_arm,
143 .import = sha1_import,
144 .descsize = sizeof(struct sha1_state), 65 .descsize = sizeof(struct sha1_state),
145 .statesize = sizeof(struct sha1_state),
146 .base = { 66 .base = {
147 .cra_name = "sha1", 67 .cra_name = "sha1",
148 .cra_driver_name= "sha1-asm", 68 .cra_driver_name= "sha1-asm",
diff --git a/arch/arm/crypto/sha1_neon_glue.c b/arch/arm/crypto/sha1_neon_glue.c
index 0b0083757d47..4e22f122f966 100644
--- a/arch/arm/crypto/sha1_neon_glue.c
+++ b/arch/arm/crypto/sha1_neon_glue.c
@@ -25,147 +25,60 @@
25#include <linux/cryptohash.h> 25#include <linux/cryptohash.h>
26#include <linux/types.h> 26#include <linux/types.h>
27#include <crypto/sha.h> 27#include <crypto/sha.h>
28#include <asm/byteorder.h> 28#include <crypto/sha1_base.h>
29#include <asm/neon.h> 29#include <asm/neon.h>
30#include <asm/simd.h> 30#include <asm/simd.h>
31#include <asm/crypto/sha1.h>
32 31
32#include "sha1.h"
33 33
34asmlinkage void sha1_transform_neon(void *state_h, const char *data, 34asmlinkage void sha1_transform_neon(void *state_h, const char *data,
35 unsigned int rounds); 35 unsigned int rounds);
36 36
37
38static int sha1_neon_init(struct shash_desc *desc)
39{
40 struct sha1_state *sctx = shash_desc_ctx(desc);
41
42 *sctx = (struct sha1_state){
43 .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
44 };
45
46 return 0;
47}
48
49static int __sha1_neon_update(struct shash_desc *desc, const u8 *data,
50 unsigned int len, unsigned int partial)
51{
52 struct sha1_state *sctx = shash_desc_ctx(desc);
53 unsigned int done = 0;
54
55 sctx->count += len;
56
57 if (partial) {
58 done = SHA1_BLOCK_SIZE - partial;
59 memcpy(sctx->buffer + partial, data, done);
60 sha1_transform_neon(sctx->state, sctx->buffer, 1);
61 }
62
63 if (len - done >= SHA1_BLOCK_SIZE) {
64 const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
65
66 sha1_transform_neon(sctx->state, data + done, rounds);
67 done += rounds * SHA1_BLOCK_SIZE;
68 }
69
70 memcpy(sctx->buffer, data + done, len - done);
71
72 return 0;
73}
74
75static int sha1_neon_update(struct shash_desc *desc, const u8 *data, 37static int sha1_neon_update(struct shash_desc *desc, const u8 *data,
76 unsigned int len) 38 unsigned int len)
77{ 39{
78 struct sha1_state *sctx = shash_desc_ctx(desc); 40 struct sha1_state *sctx = shash_desc_ctx(desc);
79 unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
80 int res;
81 41
82 /* Handle the fast case right here */ 42 if (!may_use_simd() ||
83 if (partial + len < SHA1_BLOCK_SIZE) { 43 (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
84 sctx->count += len; 44 return sha1_update_arm(desc, data, len);
85 memcpy(sctx->buffer + partial, data, len);
86 45
87 return 0; 46 kernel_neon_begin();
88 } 47 sha1_base_do_update(desc, data, len,
89 48 (sha1_block_fn *)sha1_transform_neon);
90 if (!may_use_simd()) { 49 kernel_neon_end();
91 res = sha1_update_arm(desc, data, len);
92 } else {
93 kernel_neon_begin();
94 res = __sha1_neon_update(desc, data, len, partial);
95 kernel_neon_end();
96 }
97
98 return res;
99}
100
101
102/* Add padding and return the message digest. */
103static int sha1_neon_final(struct shash_desc *desc, u8 *out)
104{
105 struct sha1_state *sctx = shash_desc_ctx(desc);
106 unsigned int i, index, padlen;
107 __be32 *dst = (__be32 *)out;
108 __be64 bits;
109 static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
110
111 bits = cpu_to_be64(sctx->count << 3);
112
113 /* Pad out to 56 mod 64 and append length */
114 index = sctx->count % SHA1_BLOCK_SIZE;
115 padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
116 if (!may_use_simd()) {
117 sha1_update_arm(desc, padding, padlen);
118 sha1_update_arm(desc, (const u8 *)&bits, sizeof(bits));
119 } else {
120 kernel_neon_begin();
121 /* We need to fill a whole block for __sha1_neon_update() */
122 if (padlen <= 56) {
123 sctx->count += padlen;
124 memcpy(sctx->buffer + index, padding, padlen);
125 } else {
126 __sha1_neon_update(desc, padding, padlen, index);
127 }
128 __sha1_neon_update(desc, (const u8 *)&bits, sizeof(bits), 56);
129 kernel_neon_end();
130 }
131
132 /* Store state in digest */
133 for (i = 0; i < 5; i++)
134 dst[i] = cpu_to_be32(sctx->state[i]);
135
136 /* Wipe context */
137 memset(sctx, 0, sizeof(*sctx));
138 50
139 return 0; 51 return 0;
140} 52}
141 53
142static int sha1_neon_export(struct shash_desc *desc, void *out) 54static int sha1_neon_finup(struct shash_desc *desc, const u8 *data,
55 unsigned int len, u8 *out)
143{ 56{
144 struct sha1_state *sctx = shash_desc_ctx(desc); 57 if (!may_use_simd())
58 return sha1_finup_arm(desc, data, len, out);
145 59
146 memcpy(out, sctx, sizeof(*sctx)); 60 kernel_neon_begin();
61 if (len)
62 sha1_base_do_update(desc, data, len,
63 (sha1_block_fn *)sha1_transform_neon);
64 sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_transform_neon);
65 kernel_neon_end();
147 66
148 return 0; 67 return sha1_base_finish(desc, out);
149} 68}
150 69
151static int sha1_neon_import(struct shash_desc *desc, const void *in) 70static int sha1_neon_final(struct shash_desc *desc, u8 *out)
152{ 71{
153 struct sha1_state *sctx = shash_desc_ctx(desc); 72 return sha1_neon_finup(desc, NULL, 0, out);
154
155 memcpy(sctx, in, sizeof(*sctx));
156
157 return 0;
158} 73}
159 74
160static struct shash_alg alg = { 75static struct shash_alg alg = {
161 .digestsize = SHA1_DIGEST_SIZE, 76 .digestsize = SHA1_DIGEST_SIZE,
162 .init = sha1_neon_init, 77 .init = sha1_base_init,
163 .update = sha1_neon_update, 78 .update = sha1_neon_update,
164 .final = sha1_neon_final, 79 .final = sha1_neon_final,
165 .export = sha1_neon_export, 80 .finup = sha1_neon_finup,
166 .import = sha1_neon_import,
167 .descsize = sizeof(struct sha1_state), 81 .descsize = sizeof(struct sha1_state),
168 .statesize = sizeof(struct sha1_state),
169 .base = { 82 .base = {
170 .cra_name = "sha1", 83 .cra_name = "sha1",
171 .cra_driver_name = "sha1-neon", 84 .cra_driver_name = "sha1-neon",
diff --git a/arch/arm/crypto/sha2-ce-core.S b/arch/arm/crypto/sha2-ce-core.S
new file mode 100644
index 000000000000..87ec11a5f405
--- /dev/null
+++ b/arch/arm/crypto/sha2-ce-core.S
@@ -0,0 +1,125 @@
1/*
2 * sha2-ce-core.S - SHA-224/256 secure hash using ARMv8 Crypto Extensions
3 *
4 * Copyright (C) 2015 Linaro Ltd.
5 * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/linkage.h>
13#include <asm/assembler.h>
14
15 .text
16 .fpu crypto-neon-fp-armv8
17
18 k0 .req q7
19 k1 .req q8
20 rk .req r3
21
22 ta0 .req q9
23 ta1 .req q10
24 tb0 .req q10
25 tb1 .req q9
26
27 dga .req q11
28 dgb .req q12
29
30 dg0 .req q13
31 dg1 .req q14
32 dg2 .req q15
33
34 .macro add_only, ev, s0
35 vmov dg2, dg0
36 .ifnb \s0
37 vld1.32 {k\ev}, [rk, :128]!
38 .endif
39 sha256h.32 dg0, dg1, tb\ev
40 sha256h2.32 dg1, dg2, tb\ev
41 .ifnb \s0
42 vadd.u32 ta\ev, q\s0, k\ev
43 .endif
44 .endm
45
46 .macro add_update, ev, s0, s1, s2, s3
47 sha256su0.32 q\s0, q\s1
48 add_only \ev, \s1
49 sha256su1.32 q\s0, q\s2, q\s3
50 .endm
51
52 .align 6
53.Lsha256_rcon:
54 .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
55 .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
56 .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
57 .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
58 .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
59 .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
60 .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
61 .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
62 .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
63 .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
64 .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
65 .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
66 .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
67 .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
68 .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
69 .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
70
71 /*
72 * void sha2_ce_transform(struct sha256_state *sst, u8 const *src,
73 int blocks);
74 */
75ENTRY(sha2_ce_transform)
76 /* load state */
77 vld1.32 {dga-dgb}, [r0]
78
79 /* load input */
800: vld1.32 {q0-q1}, [r1]!
81 vld1.32 {q2-q3}, [r1]!
82 subs r2, r2, #1
83
84#ifndef CONFIG_CPU_BIG_ENDIAN
85 vrev32.8 q0, q0
86 vrev32.8 q1, q1
87 vrev32.8 q2, q2
88 vrev32.8 q3, q3
89#endif
90
91 /* load first round constant */
92 adr rk, .Lsha256_rcon
93 vld1.32 {k0}, [rk, :128]!
94
95 vadd.u32 ta0, q0, k0
96 vmov dg0, dga
97 vmov dg1, dgb
98
99 add_update 1, 0, 1, 2, 3
100 add_update 0, 1, 2, 3, 0
101 add_update 1, 2, 3, 0, 1
102 add_update 0, 3, 0, 1, 2
103 add_update 1, 0, 1, 2, 3
104 add_update 0, 1, 2, 3, 0
105 add_update 1, 2, 3, 0, 1
106 add_update 0, 3, 0, 1, 2
107 add_update 1, 0, 1, 2, 3
108 add_update 0, 1, 2, 3, 0
109 add_update 1, 2, 3, 0, 1
110 add_update 0, 3, 0, 1, 2
111
112 add_only 1, 1
113 add_only 0, 2
114 add_only 1, 3
115 add_only 0
116
117 /* update state */
118 vadd.u32 dga, dga, dg0
119 vadd.u32 dgb, dgb, dg1
120 bne 0b
121
122 /* store new state */
123 vst1.32 {dga-dgb}, [r0]
124 bx lr
125ENDPROC(sha2_ce_transform)
diff --git a/arch/arm/crypto/sha2-ce-glue.c b/arch/arm/crypto/sha2-ce-glue.c
new file mode 100644
index 000000000000..0755b2d657f3
--- /dev/null
+++ b/arch/arm/crypto/sha2-ce-glue.c
@@ -0,0 +1,114 @@
1/*
2 * sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions
3 *
4 * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <crypto/internal/hash.h>
12#include <crypto/sha.h>
13#include <crypto/sha256_base.h>
14#include <linux/crypto.h>
15#include <linux/module.h>
16
17#include <asm/hwcap.h>
18#include <asm/simd.h>
19#include <asm/neon.h>
20#include <asm/unaligned.h>
21
22#include "sha256_glue.h"
23
24MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
25MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
26MODULE_LICENSE("GPL v2");
27
28asmlinkage void sha2_ce_transform(struct sha256_state *sst, u8 const *src,
29 int blocks);
30
31static int sha2_ce_update(struct shash_desc *desc, const u8 *data,
32 unsigned int len)
33{
34 struct sha256_state *sctx = shash_desc_ctx(desc);
35
36 if (!may_use_simd() ||
37 (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
38 return crypto_sha256_arm_update(desc, data, len);
39
40 kernel_neon_begin();
41 sha256_base_do_update(desc, data, len,
42 (sha256_block_fn *)sha2_ce_transform);
43 kernel_neon_end();
44
45 return 0;
46}
47
48static int sha2_ce_finup(struct shash_desc *desc, const u8 *data,
49 unsigned int len, u8 *out)
50{
51 if (!may_use_simd())
52 return crypto_sha256_arm_finup(desc, data, len, out);
53
54 kernel_neon_begin();
55 if (len)
56 sha256_base_do_update(desc, data, len,
57 (sha256_block_fn *)sha2_ce_transform);
58 sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform);
59 kernel_neon_end();
60
61 return sha256_base_finish(desc, out);
62}
63
64static int sha2_ce_final(struct shash_desc *desc, u8 *out)
65{
66 return sha2_ce_finup(desc, NULL, 0, out);
67}
68
69static struct shash_alg algs[] = { {
70 .init = sha224_base_init,
71 .update = sha2_ce_update,
72 .final = sha2_ce_final,
73 .finup = sha2_ce_finup,
74 .descsize = sizeof(struct sha256_state),
75 .digestsize = SHA224_DIGEST_SIZE,
76 .base = {
77 .cra_name = "sha224",
78 .cra_driver_name = "sha224-ce",
79 .cra_priority = 300,
80 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
81 .cra_blocksize = SHA256_BLOCK_SIZE,
82 .cra_module = THIS_MODULE,
83 }
84}, {
85 .init = sha256_base_init,
86 .update = sha2_ce_update,
87 .final = sha2_ce_final,
88 .finup = sha2_ce_finup,
89 .descsize = sizeof(struct sha256_state),
90 .digestsize = SHA256_DIGEST_SIZE,
91 .base = {
92 .cra_name = "sha256",
93 .cra_driver_name = "sha256-ce",
94 .cra_priority = 300,
95 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
96 .cra_blocksize = SHA256_BLOCK_SIZE,
97 .cra_module = THIS_MODULE,
98 }
99} };
100
101static int __init sha2_ce_mod_init(void)
102{
103 if (!(elf_hwcap2 & HWCAP2_SHA2))
104 return -ENODEV;
105 return crypto_register_shashes(algs, ARRAY_SIZE(algs));
106}
107
108static void __exit sha2_ce_mod_fini(void)
109{
110 crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
111}
112
113module_init(sha2_ce_mod_init);
114module_exit(sha2_ce_mod_fini);
diff --git a/arch/arm/crypto/sha256-armv4.pl b/arch/arm/crypto/sha256-armv4.pl
new file mode 100644
index 000000000000..fac0533ea633
--- /dev/null
+++ b/arch/arm/crypto/sha256-armv4.pl
@@ -0,0 +1,716 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8#
9# Permission to use under GPL terms is granted.
10# ====================================================================
11
12# SHA256 block procedure for ARMv4. May 2007.
13
14# Performance is ~2x better than gcc 3.4 generated code and in "abso-
15# lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
16# byte [on single-issue Xscale PXA250 core].
17
18# July 2010.
19#
20# Rescheduling for dual-issue pipeline resulted in 22% improvement on
21# Cortex A8 core and ~20 cycles per processed byte.
22
23# February 2011.
24#
25# Profiler-assisted and platform-specific optimization resulted in 16%
26# improvement on Cortex A8 core and ~15.4 cycles per processed byte.
27
28# September 2013.
29#
30# Add NEON implementation. On Cortex A8 it was measured to process one
31# byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
32# S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
33# code (meaning that latter performs sub-optimally, nothing was done
34# about it).
35
36# May 2014.
37#
38# Add ARMv8 code path performing at 2.0 cpb on Apple A7.
39
40while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
41open STDOUT,">$output";
42
43$ctx="r0"; $t0="r0";
44$inp="r1"; $t4="r1";
45$len="r2"; $t1="r2";
46$T1="r3"; $t3="r3";
47$A="r4";
48$B="r5";
49$C="r6";
50$D="r7";
51$E="r8";
52$F="r9";
53$G="r10";
54$H="r11";
55@V=($A,$B,$C,$D,$E,$F,$G,$H);
56$t2="r12";
57$Ktbl="r14";
58
59@Sigma0=( 2,13,22);
60@Sigma1=( 6,11,25);
61@sigma0=( 7,18, 3);
62@sigma1=(17,19,10);
63
64sub BODY_00_15 {
65my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
66
67$code.=<<___ if ($i<16);
68#if __ARM_ARCH__>=7
69 @ ldr $t1,[$inp],#4 @ $i
70# if $i==15
71 str $inp,[sp,#17*4] @ make room for $t4
72# endif
73 eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
74 add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
75 eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
76# ifndef __ARMEB__
77 rev $t1,$t1
78# endif
79#else
80 @ ldrb $t1,[$inp,#3] @ $i
81 add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
82 ldrb $t2,[$inp,#2]
83 ldrb $t0,[$inp,#1]
84 orr $t1,$t1,$t2,lsl#8
85 ldrb $t2,[$inp],#4
86 orr $t1,$t1,$t0,lsl#16
87# if $i==15
88 str $inp,[sp,#17*4] @ make room for $t4
89# endif
90 eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
91 orr $t1,$t1,$t2,lsl#24
92 eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
93#endif
94___
95$code.=<<___;
96 ldr $t2,[$Ktbl],#4 @ *K256++
97 add $h,$h,$t1 @ h+=X[i]
98 str $t1,[sp,#`$i%16`*4]
99 eor $t1,$f,$g
100 add $h,$h,$t0,ror#$Sigma1[0] @ h+=Sigma1(e)
101 and $t1,$t1,$e
102 add $h,$h,$t2 @ h+=K256[i]
103 eor $t1,$t1,$g @ Ch(e,f,g)
104 eor $t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]`
105 add $h,$h,$t1 @ h+=Ch(e,f,g)
106#if $i==31
107 and $t2,$t2,#0xff
108 cmp $t2,#0xf2 @ done?
109#endif
110#if $i<15
111# if __ARM_ARCH__>=7
112 ldr $t1,[$inp],#4 @ prefetch
113# else
114 ldrb $t1,[$inp,#3]
115# endif
116 eor $t2,$a,$b @ a^b, b^c in next round
117#else
118 ldr $t1,[sp,#`($i+2)%16`*4] @ from future BODY_16_xx
119 eor $t2,$a,$b @ a^b, b^c in next round
120 ldr $t4,[sp,#`($i+15)%16`*4] @ from future BODY_16_xx
121#endif
122 eor $t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]` @ Sigma0(a)
123 and $t3,$t3,$t2 @ (b^c)&=(a^b)
124 add $d,$d,$h @ d+=h
125 eor $t3,$t3,$b @ Maj(a,b,c)
126 add $h,$h,$t0,ror#$Sigma0[0] @ h+=Sigma0(a)
127 @ add $h,$h,$t3 @ h+=Maj(a,b,c)
128___
129 ($t2,$t3)=($t3,$t2);
130}
131
132sub BODY_16_XX {
133my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
134
135$code.=<<___;
136 @ ldr $t1,[sp,#`($i+1)%16`*4] @ $i
137 @ ldr $t4,[sp,#`($i+14)%16`*4]
138 mov $t0,$t1,ror#$sigma0[0]
139 add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
140 mov $t2,$t4,ror#$sigma1[0]
141 eor $t0,$t0,$t1,ror#$sigma0[1]
142 eor $t2,$t2,$t4,ror#$sigma1[1]
143 eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1])
144 ldr $t1,[sp,#`($i+0)%16`*4]
145 eor $t2,$t2,$t4,lsr#$sigma1[2] @ sigma1(X[i+14])
146 ldr $t4,[sp,#`($i+9)%16`*4]
147
148 add $t2,$t2,$t0
149 eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` @ from BODY_00_15
150 add $t1,$t1,$t2
151 eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
152 add $t1,$t1,$t4 @ X[i]
153___
154 &BODY_00_15(@_);
155}
156
157$code=<<___;
158#ifndef __KERNEL__
159# include "arm_arch.h"
160#else
161# define __ARM_ARCH__ __LINUX_ARM_ARCH__
162# define __ARM_MAX_ARCH__ 7
163#endif
164
165.text
166#if __ARM_ARCH__<7
167.code 32
168#else
169.syntax unified
170# ifdef __thumb2__
171# define adrl adr
172.thumb
173# else
174.code 32
175# endif
176#endif
177
178.type K256,%object
179.align 5
180K256:
181.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
182.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
183.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
184.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
185.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
186.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
187.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
188.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
189.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
190.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
191.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
192.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
193.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
194.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
195.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
196.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
197.size K256,.-K256
198.word 0 @ terminator
199#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
200.LOPENSSL_armcap:
201.word OPENSSL_armcap_P-sha256_block_data_order
202#endif
203.align 5
204
205.global sha256_block_data_order
206.type sha256_block_data_order,%function
207sha256_block_data_order:
208#if __ARM_ARCH__<7
209 sub r3,pc,#8 @ sha256_block_data_order
210#else
211 adr r3,sha256_block_data_order
212#endif
213#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
214 ldr r12,.LOPENSSL_armcap
215 ldr r12,[r3,r12] @ OPENSSL_armcap_P
216 tst r12,#ARMV8_SHA256
217 bne .LARMv8
218 tst r12,#ARMV7_NEON
219 bne .LNEON
220#endif
221 add $len,$inp,$len,lsl#6 @ len to point at the end of inp
222 stmdb sp!,{$ctx,$inp,$len,r4-r11,lr}
223 ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H}
224 sub $Ktbl,r3,#256+32 @ K256
225 sub sp,sp,#16*4 @ alloca(X[16])
226.Loop:
227# if __ARM_ARCH__>=7
228 ldr $t1,[$inp],#4
229# else
230 ldrb $t1,[$inp,#3]
231# endif
232 eor $t3,$B,$C @ magic
233 eor $t2,$t2,$t2
234___
235for($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); }
236$code.=".Lrounds_16_xx:\n";
237for (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); }
238$code.=<<___;
239#if __ARM_ARCH__>=7
240 ite eq @ Thumb2 thing, sanity check in ARM
241#endif
242 ldreq $t3,[sp,#16*4] @ pull ctx
243 bne .Lrounds_16_xx
244
245 add $A,$A,$t2 @ h+=Maj(a,b,c) from the past
246 ldr $t0,[$t3,#0]
247 ldr $t1,[$t3,#4]
248 ldr $t2,[$t3,#8]
249 add $A,$A,$t0
250 ldr $t0,[$t3,#12]
251 add $B,$B,$t1
252 ldr $t1,[$t3,#16]
253 add $C,$C,$t2
254 ldr $t2,[$t3,#20]
255 add $D,$D,$t0
256 ldr $t0,[$t3,#24]
257 add $E,$E,$t1
258 ldr $t1,[$t3,#28]
259 add $F,$F,$t2
260 ldr $inp,[sp,#17*4] @ pull inp
261 ldr $t2,[sp,#18*4] @ pull inp+len
262 add $G,$G,$t0
263 add $H,$H,$t1
264 stmia $t3,{$A,$B,$C,$D,$E,$F,$G,$H}
265 cmp $inp,$t2
266 sub $Ktbl,$Ktbl,#256 @ rewind Ktbl
267 bne .Loop
268
269 add sp,sp,#`16+3`*4 @ destroy frame
270#if __ARM_ARCH__>=5
271 ldmia sp!,{r4-r11,pc}
272#else
273 ldmia sp!,{r4-r11,lr}
274 tst lr,#1
275 moveq pc,lr @ be binary compatible with V4, yet
276 bx lr @ interoperable with Thumb ISA:-)
277#endif
278.size sha256_block_data_order,.-sha256_block_data_order
279___
280######################################################################
281# NEON stuff
282#
283{{{
284my @X=map("q$_",(0..3));
285my ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25");
286my $Xfer=$t4;
287my $j=0;
288
289sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; }
290sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; }
291
292sub AUTOLOAD() # thunk [simplified] x86-style perlasm
293{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
294 my $arg = pop;
295 $arg = "#$arg" if ($arg*1 eq $arg);
296 $code .= "\t$opcode\t".join(',',@_,$arg)."\n";
297}
298
299sub Xupdate()
300{ use integer;
301 my $body = shift;
302 my @insns = (&$body,&$body,&$body,&$body);
303 my ($a,$b,$c,$d,$e,$f,$g,$h);
304
305 &vext_8 ($T0,@X[0],@X[1],4); # X[1..4]
306 eval(shift(@insns));
307 eval(shift(@insns));
308 eval(shift(@insns));
309 &vext_8 ($T1,@X[2],@X[3],4); # X[9..12]
310 eval(shift(@insns));
311 eval(shift(@insns));
312 eval(shift(@insns));
313 &vshr_u32 ($T2,$T0,$sigma0[0]);
314 eval(shift(@insns));
315 eval(shift(@insns));
316 &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += X[9..12]
317 eval(shift(@insns));
318 eval(shift(@insns));
319 &vshr_u32 ($T1,$T0,$sigma0[2]);
320 eval(shift(@insns));
321 eval(shift(@insns));
322 &vsli_32 ($T2,$T0,32-$sigma0[0]);
323 eval(shift(@insns));
324 eval(shift(@insns));
325 &vshr_u32 ($T3,$T0,$sigma0[1]);
326 eval(shift(@insns));
327 eval(shift(@insns));
328 &veor ($T1,$T1,$T2);
329 eval(shift(@insns));
330 eval(shift(@insns));
331 &vsli_32 ($T3,$T0,32-$sigma0[1]);
332 eval(shift(@insns));
333 eval(shift(@insns));
334 &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[0]);
335 eval(shift(@insns));
336 eval(shift(@insns));
337 &veor ($T1,$T1,$T3); # sigma0(X[1..4])
338 eval(shift(@insns));
339 eval(shift(@insns));
340 &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[0]);
341 eval(shift(@insns));
342 eval(shift(@insns));
343 &vshr_u32 ($T5,&Dhi(@X[3]),$sigma1[2]);
344 eval(shift(@insns));
345 eval(shift(@insns));
346 &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += sigma0(X[1..4])
347 eval(shift(@insns));
348 eval(shift(@insns));
349 &veor ($T5,$T5,$T4);
350 eval(shift(@insns));
351 eval(shift(@insns));
352 &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[1]);
353 eval(shift(@insns));
354 eval(shift(@insns));
355 &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[1]);
356 eval(shift(@insns));
357 eval(shift(@insns));
358 &veor ($T5,$T5,$T4); # sigma1(X[14..15])
359 eval(shift(@insns));
360 eval(shift(@insns));
361 &vadd_i32 (&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15])
362 eval(shift(@insns));
363 eval(shift(@insns));
364 &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[0]);
365 eval(shift(@insns));
366 eval(shift(@insns));
367 &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[0]);
368 eval(shift(@insns));
369 eval(shift(@insns));
370 &vshr_u32 ($T5,&Dlo(@X[0]),$sigma1[2]);
371 eval(shift(@insns));
372 eval(shift(@insns));
373 &veor ($T5,$T5,$T4);
374 eval(shift(@insns));
375 eval(shift(@insns));
376 &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[1]);
377 eval(shift(@insns));
378 eval(shift(@insns));
379 &vld1_32 ("{$T0}","[$Ktbl,:128]!");
380 eval(shift(@insns));
381 eval(shift(@insns));
382 &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[1]);
383 eval(shift(@insns));
384 eval(shift(@insns));
385 &veor ($T5,$T5,$T4); # sigma1(X[16..17])
386 eval(shift(@insns));
387 eval(shift(@insns));
388 &vadd_i32 (&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17])
389 eval(shift(@insns));
390 eval(shift(@insns));
391 &vadd_i32 ($T0,$T0,@X[0]);
392 while($#insns>=2) { eval(shift(@insns)); }
393 &vst1_32 ("{$T0}","[$Xfer,:128]!");
394 eval(shift(@insns));
395 eval(shift(@insns));
396
397 push(@X,shift(@X)); # "rotate" X[]
398}
399
400sub Xpreload()
401{ use integer;
402 my $body = shift;
403 my @insns = (&$body,&$body,&$body,&$body);
404 my ($a,$b,$c,$d,$e,$f,$g,$h);
405
406 eval(shift(@insns));
407 eval(shift(@insns));
408 eval(shift(@insns));
409 eval(shift(@insns));
410 &vld1_32 ("{$T0}","[$Ktbl,:128]!");
411 eval(shift(@insns));
412 eval(shift(@insns));
413 eval(shift(@insns));
414 eval(shift(@insns));
415 &vrev32_8 (@X[0],@X[0]);
416 eval(shift(@insns));
417 eval(shift(@insns));
418 eval(shift(@insns));
419 eval(shift(@insns));
420 &vadd_i32 ($T0,$T0,@X[0]);
421 foreach (@insns) { eval; } # remaining instructions
422 &vst1_32 ("{$T0}","[$Xfer,:128]!");
423
424 push(@X,shift(@X)); # "rotate" X[]
425}
426
427sub body_00_15 () {
428 (
429 '($a,$b,$c,$d,$e,$f,$g,$h)=@V;'.
430 '&add ($h,$h,$t1)', # h+=X[i]+K[i]
431 '&eor ($t1,$f,$g)',
432 '&eor ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))',
433 '&add ($a,$a,$t2)', # h+=Maj(a,b,c) from the past
434 '&and ($t1,$t1,$e)',
435 '&eor ($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))', # Sigma1(e)
436 '&eor ($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))',
437 '&eor ($t1,$t1,$g)', # Ch(e,f,g)
438 '&add ($h,$h,$t2,"ror#$Sigma1[0]")', # h+=Sigma1(e)
439 '&eor ($t2,$a,$b)', # a^b, b^c in next round
440 '&eor ($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))', # Sigma0(a)
441 '&add ($h,$h,$t1)', # h+=Ch(e,f,g)
442 '&ldr ($t1,sprintf "[sp,#%d]",4*(($j+1)&15)) if (($j&15)!=15);'.
443 '&ldr ($t1,"[$Ktbl]") if ($j==15);'.
444 '&ldr ($t1,"[sp,#64]") if ($j==31)',
445 '&and ($t3,$t3,$t2)', # (b^c)&=(a^b)
446 '&add ($d,$d,$h)', # d+=h
447 '&add ($h,$h,$t0,"ror#$Sigma0[0]");'. # h+=Sigma0(a)
448 '&eor ($t3,$t3,$b)', # Maj(a,b,c)
449 '$j++; unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);'
450 )
451}
452
453$code.=<<___;
454#if __ARM_MAX_ARCH__>=7
455.arch armv7-a
456.fpu neon
457
458.global sha256_block_data_order_neon
459.type sha256_block_data_order_neon,%function
460.align 4
461sha256_block_data_order_neon:
462.LNEON:
463 stmdb sp!,{r4-r12,lr}
464
465 sub $H,sp,#16*4+16
466 adrl $Ktbl,K256
467 bic $H,$H,#15 @ align for 128-bit stores
468 mov $t2,sp
469 mov sp,$H @ alloca
470 add $len,$inp,$len,lsl#6 @ len to point at the end of inp
471
472 vld1.8 {@X[0]},[$inp]!
473 vld1.8 {@X[1]},[$inp]!
474 vld1.8 {@X[2]},[$inp]!
475 vld1.8 {@X[3]},[$inp]!
476 vld1.32 {$T0},[$Ktbl,:128]!
477 vld1.32 {$T1},[$Ktbl,:128]!
478 vld1.32 {$T2},[$Ktbl,:128]!
479 vld1.32 {$T3},[$Ktbl,:128]!
480 vrev32.8 @X[0],@X[0] @ yes, even on
481 str $ctx,[sp,#64]
482 vrev32.8 @X[1],@X[1] @ big-endian
483 str $inp,[sp,#68]
484 mov $Xfer,sp
485 vrev32.8 @X[2],@X[2]
486 str $len,[sp,#72]
487 vrev32.8 @X[3],@X[3]
488 str $t2,[sp,#76] @ save original sp
489 vadd.i32 $T0,$T0,@X[0]
490 vadd.i32 $T1,$T1,@X[1]
491 vst1.32 {$T0},[$Xfer,:128]!
492 vadd.i32 $T2,$T2,@X[2]
493 vst1.32 {$T1},[$Xfer,:128]!
494 vadd.i32 $T3,$T3,@X[3]
495 vst1.32 {$T2},[$Xfer,:128]!
496 vst1.32 {$T3},[$Xfer,:128]!
497
498 ldmia $ctx,{$A-$H}
499 sub $Xfer,$Xfer,#64
500 ldr $t1,[sp,#0]
501 eor $t2,$t2,$t2
502 eor $t3,$B,$C
503 b .L_00_48
504
505.align 4
506.L_00_48:
507___
508 &Xupdate(\&body_00_15);
509 &Xupdate(\&body_00_15);
510 &Xupdate(\&body_00_15);
511 &Xupdate(\&body_00_15);
512$code.=<<___;
513 teq $t1,#0 @ check for K256 terminator
514 ldr $t1,[sp,#0]
515 sub $Xfer,$Xfer,#64
516 bne .L_00_48
517
518 ldr $inp,[sp,#68]
519 ldr $t0,[sp,#72]
520 sub $Ktbl,$Ktbl,#256 @ rewind $Ktbl
521 teq $inp,$t0
522 it eq
523 subeq $inp,$inp,#64 @ avoid SEGV
524 vld1.8 {@X[0]},[$inp]! @ load next input block
525 vld1.8 {@X[1]},[$inp]!
526 vld1.8 {@X[2]},[$inp]!
527 vld1.8 {@X[3]},[$inp]!
528 it ne
529 strne $inp,[sp,#68]
530 mov $Xfer,sp
531___
532 &Xpreload(\&body_00_15);
533 &Xpreload(\&body_00_15);
534 &Xpreload(\&body_00_15);
535 &Xpreload(\&body_00_15);
536$code.=<<___;
537 ldr $t0,[$t1,#0]
538 add $A,$A,$t2 @ h+=Maj(a,b,c) from the past
539 ldr $t2,[$t1,#4]
540 ldr $t3,[$t1,#8]
541 ldr $t4,[$t1,#12]
542 add $A,$A,$t0 @ accumulate
543 ldr $t0,[$t1,#16]
544 add $B,$B,$t2
545 ldr $t2,[$t1,#20]
546 add $C,$C,$t3
547 ldr $t3,[$t1,#24]
548 add $D,$D,$t4
549 ldr $t4,[$t1,#28]
550 add $E,$E,$t0
551 str $A,[$t1],#4
552 add $F,$F,$t2
553 str $B,[$t1],#4
554 add $G,$G,$t3
555 str $C,[$t1],#4
556 add $H,$H,$t4
557 str $D,[$t1],#4
558 stmia $t1,{$E-$H}
559
560 ittte ne
561 movne $Xfer,sp
562 ldrne $t1,[sp,#0]
563 eorne $t2,$t2,$t2
564 ldreq sp,[sp,#76] @ restore original sp
565 itt ne
566 eorne $t3,$B,$C
567 bne .L_00_48
568
569 ldmia sp!,{r4-r12,pc}
570.size sha256_block_data_order_neon,.-sha256_block_data_order_neon
571#endif
572___
573}}}
574######################################################################
575# ARMv8 stuff
576#
577{{{
578my ($ABCD,$EFGH,$abcd)=map("q$_",(0..2));
579my @MSG=map("q$_",(8..11));
580my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15));
581my $Ktbl="r3";
582
583$code.=<<___;
584#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
585
586# ifdef __thumb2__
587# define INST(a,b,c,d) .byte c,d|0xc,a,b
588# else
589# define INST(a,b,c,d) .byte a,b,c,d
590# endif
591
592.type sha256_block_data_order_armv8,%function
593.align 5
594sha256_block_data_order_armv8:
595.LARMv8:
596 vld1.32 {$ABCD,$EFGH},[$ctx]
597# ifdef __thumb2__
598 adr $Ktbl,.LARMv8
599 sub $Ktbl,$Ktbl,#.LARMv8-K256
600# else
601 adrl $Ktbl,K256
602# endif
603 add $len,$inp,$len,lsl#6 @ len to point at the end of inp
604
605.Loop_v8:
606 vld1.8 {@MSG[0]-@MSG[1]},[$inp]!
607 vld1.8 {@MSG[2]-@MSG[3]},[$inp]!
608 vld1.32 {$W0},[$Ktbl]!
609 vrev32.8 @MSG[0],@MSG[0]
610 vrev32.8 @MSG[1],@MSG[1]
611 vrev32.8 @MSG[2],@MSG[2]
612 vrev32.8 @MSG[3],@MSG[3]
613 vmov $ABCD_SAVE,$ABCD @ offload
614 vmov $EFGH_SAVE,$EFGH
615 teq $inp,$len
616___
617for($i=0;$i<12;$i++) {
618$code.=<<___;
619 vld1.32 {$W1},[$Ktbl]!
620 vadd.i32 $W0,$W0,@MSG[0]
621 sha256su0 @MSG[0],@MSG[1]
622 vmov $abcd,$ABCD
623 sha256h $ABCD,$EFGH,$W0
624 sha256h2 $EFGH,$abcd,$W0
625 sha256su1 @MSG[0],@MSG[2],@MSG[3]
626___
627 ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG));
628}
629$code.=<<___;
630 vld1.32 {$W1},[$Ktbl]!
631 vadd.i32 $W0,$W0,@MSG[0]
632 vmov $abcd,$ABCD
633 sha256h $ABCD,$EFGH,$W0
634 sha256h2 $EFGH,$abcd,$W0
635
636 vld1.32 {$W0},[$Ktbl]!
637 vadd.i32 $W1,$W1,@MSG[1]
638 vmov $abcd,$ABCD
639 sha256h $ABCD,$EFGH,$W1
640 sha256h2 $EFGH,$abcd,$W1
641
642 vld1.32 {$W1},[$Ktbl]
643 vadd.i32 $W0,$W0,@MSG[2]
644 sub $Ktbl,$Ktbl,#256-16 @ rewind
645 vmov $abcd,$ABCD
646 sha256h $ABCD,$EFGH,$W0
647 sha256h2 $EFGH,$abcd,$W0
648
649 vadd.i32 $W1,$W1,@MSG[3]
650 vmov $abcd,$ABCD
651 sha256h $ABCD,$EFGH,$W1
652 sha256h2 $EFGH,$abcd,$W1
653
654 vadd.i32 $ABCD,$ABCD,$ABCD_SAVE
655 vadd.i32 $EFGH,$EFGH,$EFGH_SAVE
656 it ne
657 bne .Loop_v8
658
659 vst1.32 {$ABCD,$EFGH},[$ctx]
660
661 ret @ bx lr
662.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
663#endif
664___
665}}}
666$code.=<<___;
667.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
668.align 2
669#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
670.comm OPENSSL_armcap_P,4,4
671#endif
672___
673
674open SELF,$0;
675while(<SELF>) {
676 next if (/^#!/);
677 last if (!s/^#/@/ and !/^$/);
678 print;
679}
680close SELF;
681
682{ my %opcode = (
683 "sha256h" => 0xf3000c40, "sha256h2" => 0xf3100c40,
684 "sha256su0" => 0xf3ba03c0, "sha256su1" => 0xf3200c40 );
685
686 sub unsha256 {
687 my ($mnemonic,$arg)=@_;
688
689 if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) {
690 my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
691 |(($2&7)<<17)|(($2&8)<<4)
692 |(($3&7)<<1) |(($3&8)<<2);
693 # since ARMv7 instructions are always encoded little-endian.
694 # correct solution is to use .inst directive, but older
695 # assemblers don't implement it:-(
696 sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s",
697 $word&0xff,($word>>8)&0xff,
698 ($word>>16)&0xff,($word>>24)&0xff,
699 $mnemonic,$arg;
700 }
701 }
702}
703
704foreach (split($/,$code)) {
705
706 s/\`([^\`]*)\`/eval $1/geo;
707
708 s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo;
709
710 s/\bret\b/bx lr/go or
711 s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4
712
713 print $_,"\n";
714}
715
716close STDOUT; # enforce flush
diff --git a/arch/arm/crypto/sha256-core.S_shipped b/arch/arm/crypto/sha256-core.S_shipped
new file mode 100644
index 000000000000..555a1a8eec90
--- /dev/null
+++ b/arch/arm/crypto/sha256-core.S_shipped
@@ -0,0 +1,2808 @@
1
2@ ====================================================================
3@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
4@ project. The module is, however, dual licensed under OpenSSL and
5@ CRYPTOGAMS licenses depending on where you obtain it. For further
6@ details see http://www.openssl.org/~appro/cryptogams/.
7@
8@ Permission to use under GPL terms is granted.
9@ ====================================================================
10
11@ SHA256 block procedure for ARMv4. May 2007.
12
13@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
14@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
15@ byte [on single-issue Xscale PXA250 core].
16
17@ July 2010.
18@
19@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
20@ Cortex A8 core and ~20 cycles per processed byte.
21
22@ February 2011.
23@
24@ Profiler-assisted and platform-specific optimization resulted in 16%
25@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
26
27@ September 2013.
28@
29@ Add NEON implementation. On Cortex A8 it was measured to process one
30@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
31@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
32@ code (meaning that latter performs sub-optimally, nothing was done
33@ about it).
34
35@ May 2014.
36@
37@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
38
39#ifndef __KERNEL__
40# include "arm_arch.h"
41#else
42# define __ARM_ARCH__ __LINUX_ARM_ARCH__
43# define __ARM_MAX_ARCH__ 7
44#endif
45
46.text
47#if __ARM_ARCH__<7
48.code 32
49#else
50.syntax unified
51# ifdef __thumb2__
52# define adrl adr
53.thumb
54# else
55.code 32
56# endif
57#endif
58
59.type K256,%object
60.align 5
61K256:
62.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
63.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
64.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
65.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
66.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
67.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
68.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
69.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
70.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
71.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
72.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
73.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
74.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
75.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
76.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
77.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
78.size K256,.-K256
79.word 0 @ terminator
80#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
81.LOPENSSL_armcap:
82.word OPENSSL_armcap_P-sha256_block_data_order
83#endif
84.align 5
85
86.global sha256_block_data_order
87.type sha256_block_data_order,%function
88sha256_block_data_order:
89#if __ARM_ARCH__<7
90 sub r3,pc,#8 @ sha256_block_data_order
91#else
92 adr r3,sha256_block_data_order
93#endif
94#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
95 ldr r12,.LOPENSSL_armcap
96 ldr r12,[r3,r12] @ OPENSSL_armcap_P
97 tst r12,#ARMV8_SHA256
98 bne .LARMv8
99 tst r12,#ARMV7_NEON
100 bne .LNEON
101#endif
102 add r2,r1,r2,lsl#6 @ len to point at the end of inp
103 stmdb sp!,{r0,r1,r2,r4-r11,lr}
104 ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11}
105 sub r14,r3,#256+32 @ K256
106 sub sp,sp,#16*4 @ alloca(X[16])
107.Loop:
108# if __ARM_ARCH__>=7
109 ldr r2,[r1],#4
110# else
111 ldrb r2,[r1,#3]
112# endif
113 eor r3,r5,r6 @ magic
114 eor r12,r12,r12
115#if __ARM_ARCH__>=7
116 @ ldr r2,[r1],#4 @ 0
117# if 0==15
118 str r1,[sp,#17*4] @ make room for r1
119# endif
120 eor r0,r8,r8,ror#5
121 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
122 eor r0,r0,r8,ror#19 @ Sigma1(e)
123# ifndef __ARMEB__
124 rev r2,r2
125# endif
126#else
127 @ ldrb r2,[r1,#3] @ 0
128 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
129 ldrb r12,[r1,#2]
130 ldrb r0,[r1,#1]
131 orr r2,r2,r12,lsl#8
132 ldrb r12,[r1],#4
133 orr r2,r2,r0,lsl#16
134# if 0==15
135 str r1,[sp,#17*4] @ make room for r1
136# endif
137 eor r0,r8,r8,ror#5
138 orr r2,r2,r12,lsl#24
139 eor r0,r0,r8,ror#19 @ Sigma1(e)
140#endif
141 ldr r12,[r14],#4 @ *K256++
142 add r11,r11,r2 @ h+=X[i]
143 str r2,[sp,#0*4]
144 eor r2,r9,r10
145 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
146 and r2,r2,r8
147 add r11,r11,r12 @ h+=K256[i]
148 eor r2,r2,r10 @ Ch(e,f,g)
149 eor r0,r4,r4,ror#11
150 add r11,r11,r2 @ h+=Ch(e,f,g)
151#if 0==31
152 and r12,r12,#0xff
153 cmp r12,#0xf2 @ done?
154#endif
155#if 0<15
156# if __ARM_ARCH__>=7
157 ldr r2,[r1],#4 @ prefetch
158# else
159 ldrb r2,[r1,#3]
160# endif
161 eor r12,r4,r5 @ a^b, b^c in next round
162#else
163 ldr r2,[sp,#2*4] @ from future BODY_16_xx
164 eor r12,r4,r5 @ a^b, b^c in next round
165 ldr r1,[sp,#15*4] @ from future BODY_16_xx
166#endif
167 eor r0,r0,r4,ror#20 @ Sigma0(a)
168 and r3,r3,r12 @ (b^c)&=(a^b)
169 add r7,r7,r11 @ d+=h
170 eor r3,r3,r5 @ Maj(a,b,c)
171 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
172 @ add r11,r11,r3 @ h+=Maj(a,b,c)
173#if __ARM_ARCH__>=7
174 @ ldr r2,[r1],#4 @ 1
175# if 1==15
176 str r1,[sp,#17*4] @ make room for r1
177# endif
178 eor r0,r7,r7,ror#5
179 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
180 eor r0,r0,r7,ror#19 @ Sigma1(e)
181# ifndef __ARMEB__
182 rev r2,r2
183# endif
184#else
185 @ ldrb r2,[r1,#3] @ 1
186 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
187 ldrb r3,[r1,#2]
188 ldrb r0,[r1,#1]
189 orr r2,r2,r3,lsl#8
190 ldrb r3,[r1],#4
191 orr r2,r2,r0,lsl#16
192# if 1==15
193 str r1,[sp,#17*4] @ make room for r1
194# endif
195 eor r0,r7,r7,ror#5
196 orr r2,r2,r3,lsl#24
197 eor r0,r0,r7,ror#19 @ Sigma1(e)
198#endif
199 ldr r3,[r14],#4 @ *K256++
200 add r10,r10,r2 @ h+=X[i]
201 str r2,[sp,#1*4]
202 eor r2,r8,r9
203 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
204 and r2,r2,r7
205 add r10,r10,r3 @ h+=K256[i]
206 eor r2,r2,r9 @ Ch(e,f,g)
207 eor r0,r11,r11,ror#11
208 add r10,r10,r2 @ h+=Ch(e,f,g)
209#if 1==31
210 and r3,r3,#0xff
211 cmp r3,#0xf2 @ done?
212#endif
213#if 1<15
214# if __ARM_ARCH__>=7
215 ldr r2,[r1],#4 @ prefetch
216# else
217 ldrb r2,[r1,#3]
218# endif
219 eor r3,r11,r4 @ a^b, b^c in next round
220#else
221 ldr r2,[sp,#3*4] @ from future BODY_16_xx
222 eor r3,r11,r4 @ a^b, b^c in next round
223 ldr r1,[sp,#0*4] @ from future BODY_16_xx
224#endif
225 eor r0,r0,r11,ror#20 @ Sigma0(a)
226 and r12,r12,r3 @ (b^c)&=(a^b)
227 add r6,r6,r10 @ d+=h
228 eor r12,r12,r4 @ Maj(a,b,c)
229 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
230 @ add r10,r10,r12 @ h+=Maj(a,b,c)
231#if __ARM_ARCH__>=7
232 @ ldr r2,[r1],#4 @ 2
233# if 2==15
234 str r1,[sp,#17*4] @ make room for r1
235# endif
236 eor r0,r6,r6,ror#5
237 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
238 eor r0,r0,r6,ror#19 @ Sigma1(e)
239# ifndef __ARMEB__
240 rev r2,r2
241# endif
242#else
243 @ ldrb r2,[r1,#3] @ 2
244 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
245 ldrb r12,[r1,#2]
246 ldrb r0,[r1,#1]
247 orr r2,r2,r12,lsl#8
248 ldrb r12,[r1],#4
249 orr r2,r2,r0,lsl#16
250# if 2==15
251 str r1,[sp,#17*4] @ make room for r1
252# endif
253 eor r0,r6,r6,ror#5
254 orr r2,r2,r12,lsl#24
255 eor r0,r0,r6,ror#19 @ Sigma1(e)
256#endif
257 ldr r12,[r14],#4 @ *K256++
258 add r9,r9,r2 @ h+=X[i]
259 str r2,[sp,#2*4]
260 eor r2,r7,r8
261 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
262 and r2,r2,r6
263 add r9,r9,r12 @ h+=K256[i]
264 eor r2,r2,r8 @ Ch(e,f,g)
265 eor r0,r10,r10,ror#11
266 add r9,r9,r2 @ h+=Ch(e,f,g)
267#if 2==31
268 and r12,r12,#0xff
269 cmp r12,#0xf2 @ done?
270#endif
271#if 2<15
272# if __ARM_ARCH__>=7
273 ldr r2,[r1],#4 @ prefetch
274# else
275 ldrb r2,[r1,#3]
276# endif
277 eor r12,r10,r11 @ a^b, b^c in next round
278#else
279 ldr r2,[sp,#4*4] @ from future BODY_16_xx
280 eor r12,r10,r11 @ a^b, b^c in next round
281 ldr r1,[sp,#1*4] @ from future BODY_16_xx
282#endif
283 eor r0,r0,r10,ror#20 @ Sigma0(a)
284 and r3,r3,r12 @ (b^c)&=(a^b)
285 add r5,r5,r9 @ d+=h
286 eor r3,r3,r11 @ Maj(a,b,c)
287 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
288 @ add r9,r9,r3 @ h+=Maj(a,b,c)
289#if __ARM_ARCH__>=7
290 @ ldr r2,[r1],#4 @ 3
291# if 3==15
292 str r1,[sp,#17*4] @ make room for r1
293# endif
294 eor r0,r5,r5,ror#5
295 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
296 eor r0,r0,r5,ror#19 @ Sigma1(e)
297# ifndef __ARMEB__
298 rev r2,r2
299# endif
300#else
301 @ ldrb r2,[r1,#3] @ 3
302 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
303 ldrb r3,[r1,#2]
304 ldrb r0,[r1,#1]
305 orr r2,r2,r3,lsl#8
306 ldrb r3,[r1],#4
307 orr r2,r2,r0,lsl#16
308# if 3==15
309 str r1,[sp,#17*4] @ make room for r1
310# endif
311 eor r0,r5,r5,ror#5
312 orr r2,r2,r3,lsl#24
313 eor r0,r0,r5,ror#19 @ Sigma1(e)
314#endif
315 ldr r3,[r14],#4 @ *K256++
316 add r8,r8,r2 @ h+=X[i]
317 str r2,[sp,#3*4]
318 eor r2,r6,r7
319 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
320 and r2,r2,r5
321 add r8,r8,r3 @ h+=K256[i]
322 eor r2,r2,r7 @ Ch(e,f,g)
323 eor r0,r9,r9,ror#11
324 add r8,r8,r2 @ h+=Ch(e,f,g)
325#if 3==31
326 and r3,r3,#0xff
327 cmp r3,#0xf2 @ done?
328#endif
329#if 3<15
330# if __ARM_ARCH__>=7
331 ldr r2,[r1],#4 @ prefetch
332# else
333 ldrb r2,[r1,#3]
334# endif
335 eor r3,r9,r10 @ a^b, b^c in next round
336#else
337 ldr r2,[sp,#5*4] @ from future BODY_16_xx
338 eor r3,r9,r10 @ a^b, b^c in next round
339 ldr r1,[sp,#2*4] @ from future BODY_16_xx
340#endif
341 eor r0,r0,r9,ror#20 @ Sigma0(a)
342 and r12,r12,r3 @ (b^c)&=(a^b)
343 add r4,r4,r8 @ d+=h
344 eor r12,r12,r10 @ Maj(a,b,c)
345 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
346 @ add r8,r8,r12 @ h+=Maj(a,b,c)
347#if __ARM_ARCH__>=7
348 @ ldr r2,[r1],#4 @ 4
349# if 4==15
350 str r1,[sp,#17*4] @ make room for r1
351# endif
352 eor r0,r4,r4,ror#5
353 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
354 eor r0,r0,r4,ror#19 @ Sigma1(e)
355# ifndef __ARMEB__
356 rev r2,r2
357# endif
358#else
359 @ ldrb r2,[r1,#3] @ 4
360 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
361 ldrb r12,[r1,#2]
362 ldrb r0,[r1,#1]
363 orr r2,r2,r12,lsl#8
364 ldrb r12,[r1],#4
365 orr r2,r2,r0,lsl#16
366# if 4==15
367 str r1,[sp,#17*4] @ make room for r1
368# endif
369 eor r0,r4,r4,ror#5
370 orr r2,r2,r12,lsl#24
371 eor r0,r0,r4,ror#19 @ Sigma1(e)
372#endif
373 ldr r12,[r14],#4 @ *K256++
374 add r7,r7,r2 @ h+=X[i]
375 str r2,[sp,#4*4]
376 eor r2,r5,r6
377 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
378 and r2,r2,r4
379 add r7,r7,r12 @ h+=K256[i]
380 eor r2,r2,r6 @ Ch(e,f,g)
381 eor r0,r8,r8,ror#11
382 add r7,r7,r2 @ h+=Ch(e,f,g)
383#if 4==31
384 and r12,r12,#0xff
385 cmp r12,#0xf2 @ done?
386#endif
387#if 4<15
388# if __ARM_ARCH__>=7
389 ldr r2,[r1],#4 @ prefetch
390# else
391 ldrb r2,[r1,#3]
392# endif
393 eor r12,r8,r9 @ a^b, b^c in next round
394#else
395 ldr r2,[sp,#6*4] @ from future BODY_16_xx
396 eor r12,r8,r9 @ a^b, b^c in next round
397 ldr r1,[sp,#3*4] @ from future BODY_16_xx
398#endif
399 eor r0,r0,r8,ror#20 @ Sigma0(a)
400 and r3,r3,r12 @ (b^c)&=(a^b)
401 add r11,r11,r7 @ d+=h
402 eor r3,r3,r9 @ Maj(a,b,c)
403 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
404 @ add r7,r7,r3 @ h+=Maj(a,b,c)
405#if __ARM_ARCH__>=7
406 @ ldr r2,[r1],#4 @ 5
407# if 5==15
408 str r1,[sp,#17*4] @ make room for r1
409# endif
410 eor r0,r11,r11,ror#5
411 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
412 eor r0,r0,r11,ror#19 @ Sigma1(e)
413# ifndef __ARMEB__
414 rev r2,r2
415# endif
416#else
417 @ ldrb r2,[r1,#3] @ 5
418 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
419 ldrb r3,[r1,#2]
420 ldrb r0,[r1,#1]
421 orr r2,r2,r3,lsl#8
422 ldrb r3,[r1],#4
423 orr r2,r2,r0,lsl#16
424# if 5==15
425 str r1,[sp,#17*4] @ make room for r1
426# endif
427 eor r0,r11,r11,ror#5
428 orr r2,r2,r3,lsl#24
429 eor r0,r0,r11,ror#19 @ Sigma1(e)
430#endif
431 ldr r3,[r14],#4 @ *K256++
432 add r6,r6,r2 @ h+=X[i]
433 str r2,[sp,#5*4]
434 eor r2,r4,r5
435 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
436 and r2,r2,r11
437 add r6,r6,r3 @ h+=K256[i]
438 eor r2,r2,r5 @ Ch(e,f,g)
439 eor r0,r7,r7,ror#11
440 add r6,r6,r2 @ h+=Ch(e,f,g)
441#if 5==31
442 and r3,r3,#0xff
443 cmp r3,#0xf2 @ done?
444#endif
445#if 5<15
446# if __ARM_ARCH__>=7
447 ldr r2,[r1],#4 @ prefetch
448# else
449 ldrb r2,[r1,#3]
450# endif
451 eor r3,r7,r8 @ a^b, b^c in next round
452#else
453 ldr r2,[sp,#7*4] @ from future BODY_16_xx
454 eor r3,r7,r8 @ a^b, b^c in next round
455 ldr r1,[sp,#4*4] @ from future BODY_16_xx
456#endif
457 eor r0,r0,r7,ror#20 @ Sigma0(a)
458 and r12,r12,r3 @ (b^c)&=(a^b)
459 add r10,r10,r6 @ d+=h
460 eor r12,r12,r8 @ Maj(a,b,c)
461 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
462 @ add r6,r6,r12 @ h+=Maj(a,b,c)
463#if __ARM_ARCH__>=7
464 @ ldr r2,[r1],#4 @ 6
465# if 6==15
466 str r1,[sp,#17*4] @ make room for r1
467# endif
468 eor r0,r10,r10,ror#5
469 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
470 eor r0,r0,r10,ror#19 @ Sigma1(e)
471# ifndef __ARMEB__
472 rev r2,r2
473# endif
474#else
475 @ ldrb r2,[r1,#3] @ 6
476 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
477 ldrb r12,[r1,#2]
478 ldrb r0,[r1,#1]
479 orr r2,r2,r12,lsl#8
480 ldrb r12,[r1],#4
481 orr r2,r2,r0,lsl#16
482# if 6==15
483 str r1,[sp,#17*4] @ make room for r1
484# endif
485 eor r0,r10,r10,ror#5
486 orr r2,r2,r12,lsl#24
487 eor r0,r0,r10,ror#19 @ Sigma1(e)
488#endif
489 ldr r12,[r14],#4 @ *K256++
490 add r5,r5,r2 @ h+=X[i]
491 str r2,[sp,#6*4]
492 eor r2,r11,r4
493 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
494 and r2,r2,r10
495 add r5,r5,r12 @ h+=K256[i]
496 eor r2,r2,r4 @ Ch(e,f,g)
497 eor r0,r6,r6,ror#11
498 add r5,r5,r2 @ h+=Ch(e,f,g)
499#if 6==31
500 and r12,r12,#0xff
501 cmp r12,#0xf2 @ done?
502#endif
503#if 6<15
504# if __ARM_ARCH__>=7
505 ldr r2,[r1],#4 @ prefetch
506# else
507 ldrb r2,[r1,#3]
508# endif
509 eor r12,r6,r7 @ a^b, b^c in next round
510#else
511 ldr r2,[sp,#8*4] @ from future BODY_16_xx
512 eor r12,r6,r7 @ a^b, b^c in next round
513 ldr r1,[sp,#5*4] @ from future BODY_16_xx
514#endif
515 eor r0,r0,r6,ror#20 @ Sigma0(a)
516 and r3,r3,r12 @ (b^c)&=(a^b)
517 add r9,r9,r5 @ d+=h
518 eor r3,r3,r7 @ Maj(a,b,c)
519 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
520 @ add r5,r5,r3 @ h+=Maj(a,b,c)
521#if __ARM_ARCH__>=7
522 @ ldr r2,[r1],#4 @ 7
523# if 7==15
524 str r1,[sp,#17*4] @ make room for r1
525# endif
526 eor r0,r9,r9,ror#5
527 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
528 eor r0,r0,r9,ror#19 @ Sigma1(e)
529# ifndef __ARMEB__
530 rev r2,r2
531# endif
532#else
533 @ ldrb r2,[r1,#3] @ 7
534 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
535 ldrb r3,[r1,#2]
536 ldrb r0,[r1,#1]
537 orr r2,r2,r3,lsl#8
538 ldrb r3,[r1],#4
539 orr r2,r2,r0,lsl#16
540# if 7==15
541 str r1,[sp,#17*4] @ make room for r1
542# endif
543 eor r0,r9,r9,ror#5
544 orr r2,r2,r3,lsl#24
545 eor r0,r0,r9,ror#19 @ Sigma1(e)
546#endif
547 ldr r3,[r14],#4 @ *K256++
548 add r4,r4,r2 @ h+=X[i]
549 str r2,[sp,#7*4]
550 eor r2,r10,r11
551 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
552 and r2,r2,r9
553 add r4,r4,r3 @ h+=K256[i]
554 eor r2,r2,r11 @ Ch(e,f,g)
555 eor r0,r5,r5,ror#11
556 add r4,r4,r2 @ h+=Ch(e,f,g)
557#if 7==31
558 and r3,r3,#0xff
559 cmp r3,#0xf2 @ done?
560#endif
561#if 7<15
562# if __ARM_ARCH__>=7
563 ldr r2,[r1],#4 @ prefetch
564# else
565 ldrb r2,[r1,#3]
566# endif
567 eor r3,r5,r6 @ a^b, b^c in next round
568#else
569 ldr r2,[sp,#9*4] @ from future BODY_16_xx
570 eor r3,r5,r6 @ a^b, b^c in next round
571 ldr r1,[sp,#6*4] @ from future BODY_16_xx
572#endif
573 eor r0,r0,r5,ror#20 @ Sigma0(a)
574 and r12,r12,r3 @ (b^c)&=(a^b)
575 add r8,r8,r4 @ d+=h
576 eor r12,r12,r6 @ Maj(a,b,c)
577 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
578 @ add r4,r4,r12 @ h+=Maj(a,b,c)
579#if __ARM_ARCH__>=7
580 @ ldr r2,[r1],#4 @ 8
581# if 8==15
582 str r1,[sp,#17*4] @ make room for r1
583# endif
584 eor r0,r8,r8,ror#5
585 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
586 eor r0,r0,r8,ror#19 @ Sigma1(e)
587# ifndef __ARMEB__
588 rev r2,r2
589# endif
590#else
591 @ ldrb r2,[r1,#3] @ 8
592 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
593 ldrb r12,[r1,#2]
594 ldrb r0,[r1,#1]
595 orr r2,r2,r12,lsl#8
596 ldrb r12,[r1],#4
597 orr r2,r2,r0,lsl#16
598# if 8==15
599 str r1,[sp,#17*4] @ make room for r1
600# endif
601 eor r0,r8,r8,ror#5
602 orr r2,r2,r12,lsl#24
603 eor r0,r0,r8,ror#19 @ Sigma1(e)
604#endif
605 ldr r12,[r14],#4 @ *K256++
606 add r11,r11,r2 @ h+=X[i]
607 str r2,[sp,#8*4]
608 eor r2,r9,r10
609 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
610 and r2,r2,r8
611 add r11,r11,r12 @ h+=K256[i]
612 eor r2,r2,r10 @ Ch(e,f,g)
613 eor r0,r4,r4,ror#11
614 add r11,r11,r2 @ h+=Ch(e,f,g)
615#if 8==31
616 and r12,r12,#0xff
617 cmp r12,#0xf2 @ done?
618#endif
619#if 8<15
620# if __ARM_ARCH__>=7
621 ldr r2,[r1],#4 @ prefetch
622# else
623 ldrb r2,[r1,#3]
624# endif
625 eor r12,r4,r5 @ a^b, b^c in next round
626#else
627 ldr r2,[sp,#10*4] @ from future BODY_16_xx
628 eor r12,r4,r5 @ a^b, b^c in next round
629 ldr r1,[sp,#7*4] @ from future BODY_16_xx
630#endif
631 eor r0,r0,r4,ror#20 @ Sigma0(a)
632 and r3,r3,r12 @ (b^c)&=(a^b)
633 add r7,r7,r11 @ d+=h
634 eor r3,r3,r5 @ Maj(a,b,c)
635 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
636 @ add r11,r11,r3 @ h+=Maj(a,b,c)
637#if __ARM_ARCH__>=7
638 @ ldr r2,[r1],#4 @ 9
639# if 9==15
640 str r1,[sp,#17*4] @ make room for r1
641# endif
642 eor r0,r7,r7,ror#5
643 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
644 eor r0,r0,r7,ror#19 @ Sigma1(e)
645# ifndef __ARMEB__
646 rev r2,r2
647# endif
648#else
649 @ ldrb r2,[r1,#3] @ 9
650 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
651 ldrb r3,[r1,#2]
652 ldrb r0,[r1,#1]
653 orr r2,r2,r3,lsl#8
654 ldrb r3,[r1],#4
655 orr r2,r2,r0,lsl#16
656# if 9==15
657 str r1,[sp,#17*4] @ make room for r1
658# endif
659 eor r0,r7,r7,ror#5
660 orr r2,r2,r3,lsl#24
661 eor r0,r0,r7,ror#19 @ Sigma1(e)
662#endif
663 ldr r3,[r14],#4 @ *K256++
664 add r10,r10,r2 @ h+=X[i]
665 str r2,[sp,#9*4]
666 eor r2,r8,r9
667 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
668 and r2,r2,r7
669 add r10,r10,r3 @ h+=K256[i]
670 eor r2,r2,r9 @ Ch(e,f,g)
671 eor r0,r11,r11,ror#11
672 add r10,r10,r2 @ h+=Ch(e,f,g)
673#if 9==31
674 and r3,r3,#0xff
675 cmp r3,#0xf2 @ done?
676#endif
677#if 9<15
678# if __ARM_ARCH__>=7
679 ldr r2,[r1],#4 @ prefetch
680# else
681 ldrb r2,[r1,#3]
682# endif
683 eor r3,r11,r4 @ a^b, b^c in next round
684#else
685 ldr r2,[sp,#11*4] @ from future BODY_16_xx
686 eor r3,r11,r4 @ a^b, b^c in next round
687 ldr r1,[sp,#8*4] @ from future BODY_16_xx
688#endif
689 eor r0,r0,r11,ror#20 @ Sigma0(a)
690 and r12,r12,r3 @ (b^c)&=(a^b)
691 add r6,r6,r10 @ d+=h
692 eor r12,r12,r4 @ Maj(a,b,c)
693 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
694 @ add r10,r10,r12 @ h+=Maj(a,b,c)
695#if __ARM_ARCH__>=7
696 @ ldr r2,[r1],#4 @ 10
697# if 10==15
698 str r1,[sp,#17*4] @ make room for r1
699# endif
700 eor r0,r6,r6,ror#5
701 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
702 eor r0,r0,r6,ror#19 @ Sigma1(e)
703# ifndef __ARMEB__
704 rev r2,r2
705# endif
706#else
707 @ ldrb r2,[r1,#3] @ 10
708 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
709 ldrb r12,[r1,#2]
710 ldrb r0,[r1,#1]
711 orr r2,r2,r12,lsl#8
712 ldrb r12,[r1],#4
713 orr r2,r2,r0,lsl#16
714# if 10==15
715 str r1,[sp,#17*4] @ make room for r1
716# endif
717 eor r0,r6,r6,ror#5
718 orr r2,r2,r12,lsl#24
719 eor r0,r0,r6,ror#19 @ Sigma1(e)
720#endif
721 ldr r12,[r14],#4 @ *K256++
722 add r9,r9,r2 @ h+=X[i]
723 str r2,[sp,#10*4]
724 eor r2,r7,r8
725 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
726 and r2,r2,r6
727 add r9,r9,r12 @ h+=K256[i]
728 eor r2,r2,r8 @ Ch(e,f,g)
729 eor r0,r10,r10,ror#11
730 add r9,r9,r2 @ h+=Ch(e,f,g)
731#if 10==31
732 and r12,r12,#0xff
733 cmp r12,#0xf2 @ done?
734#endif
735#if 10<15
736# if __ARM_ARCH__>=7
737 ldr r2,[r1],#4 @ prefetch
738# else
739 ldrb r2,[r1,#3]
740# endif
741 eor r12,r10,r11 @ a^b, b^c in next round
742#else
743 ldr r2,[sp,#12*4] @ from future BODY_16_xx
744 eor r12,r10,r11 @ a^b, b^c in next round
745 ldr r1,[sp,#9*4] @ from future BODY_16_xx
746#endif
747 eor r0,r0,r10,ror#20 @ Sigma0(a)
748 and r3,r3,r12 @ (b^c)&=(a^b)
749 add r5,r5,r9 @ d+=h
750 eor r3,r3,r11 @ Maj(a,b,c)
751 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
752 @ add r9,r9,r3 @ h+=Maj(a,b,c)
753#if __ARM_ARCH__>=7
754 @ ldr r2,[r1],#4 @ 11
755# if 11==15
756 str r1,[sp,#17*4] @ make room for r1
757# endif
758 eor r0,r5,r5,ror#5
759 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
760 eor r0,r0,r5,ror#19 @ Sigma1(e)
761# ifndef __ARMEB__
762 rev r2,r2
763# endif
764#else
765 @ ldrb r2,[r1,#3] @ 11
766 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
767 ldrb r3,[r1,#2]
768 ldrb r0,[r1,#1]
769 orr r2,r2,r3,lsl#8
770 ldrb r3,[r1],#4
771 orr r2,r2,r0,lsl#16
772# if 11==15
773 str r1,[sp,#17*4] @ make room for r1
774# endif
775 eor r0,r5,r5,ror#5
776 orr r2,r2,r3,lsl#24
777 eor r0,r0,r5,ror#19 @ Sigma1(e)
778#endif
779 ldr r3,[r14],#4 @ *K256++
780 add r8,r8,r2 @ h+=X[i]
781 str r2,[sp,#11*4]
782 eor r2,r6,r7
783 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
784 and r2,r2,r5
785 add r8,r8,r3 @ h+=K256[i]
786 eor r2,r2,r7 @ Ch(e,f,g)
787 eor r0,r9,r9,ror#11
788 add r8,r8,r2 @ h+=Ch(e,f,g)
789#if 11==31
790 and r3,r3,#0xff
791 cmp r3,#0xf2 @ done?
792#endif
793#if 11<15
794# if __ARM_ARCH__>=7
795 ldr r2,[r1],#4 @ prefetch
796# else
797 ldrb r2,[r1,#3]
798# endif
799 eor r3,r9,r10 @ a^b, b^c in next round
800#else
801 ldr r2,[sp,#13*4] @ from future BODY_16_xx
802 eor r3,r9,r10 @ a^b, b^c in next round
803 ldr r1,[sp,#10*4] @ from future BODY_16_xx
804#endif
805 eor r0,r0,r9,ror#20 @ Sigma0(a)
806 and r12,r12,r3 @ (b^c)&=(a^b)
807 add r4,r4,r8 @ d+=h
808 eor r12,r12,r10 @ Maj(a,b,c)
809 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
810 @ add r8,r8,r12 @ h+=Maj(a,b,c)
811#if __ARM_ARCH__>=7
812 @ ldr r2,[r1],#4 @ 12
813# if 12==15
814 str r1,[sp,#17*4] @ make room for r1
815# endif
816 eor r0,r4,r4,ror#5
817 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
818 eor r0,r0,r4,ror#19 @ Sigma1(e)
819# ifndef __ARMEB__
820 rev r2,r2
821# endif
822#else
823 @ ldrb r2,[r1,#3] @ 12
824 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
825 ldrb r12,[r1,#2]
826 ldrb r0,[r1,#1]
827 orr r2,r2,r12,lsl#8
828 ldrb r12,[r1],#4
829 orr r2,r2,r0,lsl#16
830# if 12==15
831 str r1,[sp,#17*4] @ make room for r1
832# endif
833 eor r0,r4,r4,ror#5
834 orr r2,r2,r12,lsl#24
835 eor r0,r0,r4,ror#19 @ Sigma1(e)
836#endif
837 ldr r12,[r14],#4 @ *K256++
838 add r7,r7,r2 @ h+=X[i]
839 str r2,[sp,#12*4]
840 eor r2,r5,r6
841 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
842 and r2,r2,r4
843 add r7,r7,r12 @ h+=K256[i]
844 eor r2,r2,r6 @ Ch(e,f,g)
845 eor r0,r8,r8,ror#11
846 add r7,r7,r2 @ h+=Ch(e,f,g)
847#if 12==31
848 and r12,r12,#0xff
849 cmp r12,#0xf2 @ done?
850#endif
851#if 12<15
852# if __ARM_ARCH__>=7
853 ldr r2,[r1],#4 @ prefetch
854# else
855 ldrb r2,[r1,#3]
856# endif
857 eor r12,r8,r9 @ a^b, b^c in next round
858#else
859 ldr r2,[sp,#14*4] @ from future BODY_16_xx
860 eor r12,r8,r9 @ a^b, b^c in next round
861 ldr r1,[sp,#11*4] @ from future BODY_16_xx
862#endif
863 eor r0,r0,r8,ror#20 @ Sigma0(a)
864 and r3,r3,r12 @ (b^c)&=(a^b)
865 add r11,r11,r7 @ d+=h
866 eor r3,r3,r9 @ Maj(a,b,c)
867 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
868 @ add r7,r7,r3 @ h+=Maj(a,b,c)
869#if __ARM_ARCH__>=7
870 @ ldr r2,[r1],#4 @ 13
871# if 13==15
872 str r1,[sp,#17*4] @ make room for r1
873# endif
874 eor r0,r11,r11,ror#5
875 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
876 eor r0,r0,r11,ror#19 @ Sigma1(e)
877# ifndef __ARMEB__
878 rev r2,r2
879# endif
880#else
881 @ ldrb r2,[r1,#3] @ 13
882 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
883 ldrb r3,[r1,#2]
884 ldrb r0,[r1,#1]
885 orr r2,r2,r3,lsl#8
886 ldrb r3,[r1],#4
887 orr r2,r2,r0,lsl#16
888# if 13==15
889 str r1,[sp,#17*4] @ make room for r1
890# endif
891 eor r0,r11,r11,ror#5
892 orr r2,r2,r3,lsl#24
893 eor r0,r0,r11,ror#19 @ Sigma1(e)
894#endif
895 ldr r3,[r14],#4 @ *K256++
896 add r6,r6,r2 @ h+=X[i]
897 str r2,[sp,#13*4]
898 eor r2,r4,r5
899 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
900 and r2,r2,r11
901 add r6,r6,r3 @ h+=K256[i]
902 eor r2,r2,r5 @ Ch(e,f,g)
903 eor r0,r7,r7,ror#11
904 add r6,r6,r2 @ h+=Ch(e,f,g)
905#if 13==31
906 and r3,r3,#0xff
907 cmp r3,#0xf2 @ done?
908#endif
909#if 13<15
910# if __ARM_ARCH__>=7
911 ldr r2,[r1],#4 @ prefetch
912# else
913 ldrb r2,[r1,#3]
914# endif
915 eor r3,r7,r8 @ a^b, b^c in next round
916#else
917 ldr r2,[sp,#15*4] @ from future BODY_16_xx
918 eor r3,r7,r8 @ a^b, b^c in next round
919 ldr r1,[sp,#12*4] @ from future BODY_16_xx
920#endif
921 eor r0,r0,r7,ror#20 @ Sigma0(a)
922 and r12,r12,r3 @ (b^c)&=(a^b)
923 add r10,r10,r6 @ d+=h
924 eor r12,r12,r8 @ Maj(a,b,c)
925 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
926 @ add r6,r6,r12 @ h+=Maj(a,b,c)
927#if __ARM_ARCH__>=7
928 @ ldr r2,[r1],#4 @ 14
929# if 14==15
930 str r1,[sp,#17*4] @ make room for r1
931# endif
932 eor r0,r10,r10,ror#5
933 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
934 eor r0,r0,r10,ror#19 @ Sigma1(e)
935# ifndef __ARMEB__
936 rev r2,r2
937# endif
938#else
939 @ ldrb r2,[r1,#3] @ 14
940 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
941 ldrb r12,[r1,#2]
942 ldrb r0,[r1,#1]
943 orr r2,r2,r12,lsl#8
944 ldrb r12,[r1],#4
945 orr r2,r2,r0,lsl#16
946# if 14==15
947 str r1,[sp,#17*4] @ make room for r1
948# endif
949 eor r0,r10,r10,ror#5
950 orr r2,r2,r12,lsl#24
951 eor r0,r0,r10,ror#19 @ Sigma1(e)
952#endif
953 ldr r12,[r14],#4 @ *K256++
954 add r5,r5,r2 @ h+=X[i]
955 str r2,[sp,#14*4]
956 eor r2,r11,r4
957 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
958 and r2,r2,r10
959 add r5,r5,r12 @ h+=K256[i]
960 eor r2,r2,r4 @ Ch(e,f,g)
961 eor r0,r6,r6,ror#11
962 add r5,r5,r2 @ h+=Ch(e,f,g)
963#if 14==31
964 and r12,r12,#0xff
965 cmp r12,#0xf2 @ done?
966#endif
967#if 14<15
968# if __ARM_ARCH__>=7
969 ldr r2,[r1],#4 @ prefetch
970# else
971 ldrb r2,[r1,#3]
972# endif
973 eor r12,r6,r7 @ a^b, b^c in next round
974#else
975 ldr r2,[sp,#0*4] @ from future BODY_16_xx
976 eor r12,r6,r7 @ a^b, b^c in next round
977 ldr r1,[sp,#13*4] @ from future BODY_16_xx
978#endif
979 eor r0,r0,r6,ror#20 @ Sigma0(a)
980 and r3,r3,r12 @ (b^c)&=(a^b)
981 add r9,r9,r5 @ d+=h
982 eor r3,r3,r7 @ Maj(a,b,c)
983 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
984 @ add r5,r5,r3 @ h+=Maj(a,b,c)
985#if __ARM_ARCH__>=7
986 @ ldr r2,[r1],#4 @ 15
987# if 15==15
988 str r1,[sp,#17*4] @ make room for r1
989# endif
990 eor r0,r9,r9,ror#5
991 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
992 eor r0,r0,r9,ror#19 @ Sigma1(e)
993# ifndef __ARMEB__
994 rev r2,r2
995# endif
996#else
997 @ ldrb r2,[r1,#3] @ 15
998 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
999 ldrb r3,[r1,#2]
1000 ldrb r0,[r1,#1]
1001 orr r2,r2,r3,lsl#8
1002 ldrb r3,[r1],#4
1003 orr r2,r2,r0,lsl#16
1004# if 15==15
1005 str r1,[sp,#17*4] @ make room for r1
1006# endif
1007 eor r0,r9,r9,ror#5
1008 orr r2,r2,r3,lsl#24
1009 eor r0,r0,r9,ror#19 @ Sigma1(e)
1010#endif
1011 ldr r3,[r14],#4 @ *K256++
1012 add r4,r4,r2 @ h+=X[i]
1013 str r2,[sp,#15*4]
1014 eor r2,r10,r11
1015 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
1016 and r2,r2,r9
1017 add r4,r4,r3 @ h+=K256[i]
1018 eor r2,r2,r11 @ Ch(e,f,g)
1019 eor r0,r5,r5,ror#11
1020 add r4,r4,r2 @ h+=Ch(e,f,g)
1021#if 15==31
1022 and r3,r3,#0xff
1023 cmp r3,#0xf2 @ done?
1024#endif
1025#if 15<15
1026# if __ARM_ARCH__>=7
1027 ldr r2,[r1],#4 @ prefetch
1028# else
1029 ldrb r2,[r1,#3]
1030# endif
1031 eor r3,r5,r6 @ a^b, b^c in next round
1032#else
1033 ldr r2,[sp,#1*4] @ from future BODY_16_xx
1034 eor r3,r5,r6 @ a^b, b^c in next round
1035 ldr r1,[sp,#14*4] @ from future BODY_16_xx
1036#endif
1037 eor r0,r0,r5,ror#20 @ Sigma0(a)
1038 and r12,r12,r3 @ (b^c)&=(a^b)
1039 add r8,r8,r4 @ d+=h
1040 eor r12,r12,r6 @ Maj(a,b,c)
1041 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
1042 @ add r4,r4,r12 @ h+=Maj(a,b,c)
1043.Lrounds_16_xx:
1044 @ ldr r2,[sp,#1*4] @ 16
1045 @ ldr r1,[sp,#14*4]
1046 mov r0,r2,ror#7
1047 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
1048 mov r12,r1,ror#17
1049 eor r0,r0,r2,ror#18
1050 eor r12,r12,r1,ror#19
1051 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1052 ldr r2,[sp,#0*4]
1053 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1054 ldr r1,[sp,#9*4]
1055
1056 add r12,r12,r0
1057 eor r0,r8,r8,ror#5 @ from BODY_00_15
1058 add r2,r2,r12
1059 eor r0,r0,r8,ror#19 @ Sigma1(e)
1060 add r2,r2,r1 @ X[i]
1061 ldr r12,[r14],#4 @ *K256++
1062 add r11,r11,r2 @ h+=X[i]
1063 str r2,[sp,#0*4]
1064 eor r2,r9,r10
1065 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
1066 and r2,r2,r8
1067 add r11,r11,r12 @ h+=K256[i]
1068 eor r2,r2,r10 @ Ch(e,f,g)
1069 eor r0,r4,r4,ror#11
1070 add r11,r11,r2 @ h+=Ch(e,f,g)
1071#if 16==31
1072 and r12,r12,#0xff
1073 cmp r12,#0xf2 @ done?
1074#endif
1075#if 16<15
1076# if __ARM_ARCH__>=7
1077 ldr r2,[r1],#4 @ prefetch
1078# else
1079 ldrb r2,[r1,#3]
1080# endif
1081 eor r12,r4,r5 @ a^b, b^c in next round
1082#else
1083 ldr r2,[sp,#2*4] @ from future BODY_16_xx
1084 eor r12,r4,r5 @ a^b, b^c in next round
1085 ldr r1,[sp,#15*4] @ from future BODY_16_xx
1086#endif
1087 eor r0,r0,r4,ror#20 @ Sigma0(a)
1088 and r3,r3,r12 @ (b^c)&=(a^b)
1089 add r7,r7,r11 @ d+=h
1090 eor r3,r3,r5 @ Maj(a,b,c)
1091 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
1092 @ add r11,r11,r3 @ h+=Maj(a,b,c)
1093 @ ldr r2,[sp,#2*4] @ 17
1094 @ ldr r1,[sp,#15*4]
1095 mov r0,r2,ror#7
1096 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
1097 mov r3,r1,ror#17
1098 eor r0,r0,r2,ror#18
1099 eor r3,r3,r1,ror#19
1100 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1101 ldr r2,[sp,#1*4]
1102 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1103 ldr r1,[sp,#10*4]
1104
1105 add r3,r3,r0
1106 eor r0,r7,r7,ror#5 @ from BODY_00_15
1107 add r2,r2,r3
1108 eor r0,r0,r7,ror#19 @ Sigma1(e)
1109 add r2,r2,r1 @ X[i]
1110 ldr r3,[r14],#4 @ *K256++
1111 add r10,r10,r2 @ h+=X[i]
1112 str r2,[sp,#1*4]
1113 eor r2,r8,r9
1114 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
1115 and r2,r2,r7
1116 add r10,r10,r3 @ h+=K256[i]
1117 eor r2,r2,r9 @ Ch(e,f,g)
1118 eor r0,r11,r11,ror#11
1119 add r10,r10,r2 @ h+=Ch(e,f,g)
1120#if 17==31
1121 and r3,r3,#0xff
1122 cmp r3,#0xf2 @ done?
1123#endif
1124#if 17<15
1125# if __ARM_ARCH__>=7
1126 ldr r2,[r1],#4 @ prefetch
1127# else
1128 ldrb r2,[r1,#3]
1129# endif
1130 eor r3,r11,r4 @ a^b, b^c in next round
1131#else
1132 ldr r2,[sp,#3*4] @ from future BODY_16_xx
1133 eor r3,r11,r4 @ a^b, b^c in next round
1134 ldr r1,[sp,#0*4] @ from future BODY_16_xx
1135#endif
1136 eor r0,r0,r11,ror#20 @ Sigma0(a)
1137 and r12,r12,r3 @ (b^c)&=(a^b)
1138 add r6,r6,r10 @ d+=h
1139 eor r12,r12,r4 @ Maj(a,b,c)
1140 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
1141 @ add r10,r10,r12 @ h+=Maj(a,b,c)
1142 @ ldr r2,[sp,#3*4] @ 18
1143 @ ldr r1,[sp,#0*4]
1144 mov r0,r2,ror#7
1145 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
1146 mov r12,r1,ror#17
1147 eor r0,r0,r2,ror#18
1148 eor r12,r12,r1,ror#19
1149 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1150 ldr r2,[sp,#2*4]
1151 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1152 ldr r1,[sp,#11*4]
1153
1154 add r12,r12,r0
1155 eor r0,r6,r6,ror#5 @ from BODY_00_15
1156 add r2,r2,r12
1157 eor r0,r0,r6,ror#19 @ Sigma1(e)
1158 add r2,r2,r1 @ X[i]
1159 ldr r12,[r14],#4 @ *K256++
1160 add r9,r9,r2 @ h+=X[i]
1161 str r2,[sp,#2*4]
1162 eor r2,r7,r8
1163 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
1164 and r2,r2,r6
1165 add r9,r9,r12 @ h+=K256[i]
1166 eor r2,r2,r8 @ Ch(e,f,g)
1167 eor r0,r10,r10,ror#11
1168 add r9,r9,r2 @ h+=Ch(e,f,g)
1169#if 18==31
1170 and r12,r12,#0xff
1171 cmp r12,#0xf2 @ done?
1172#endif
1173#if 18<15
1174# if __ARM_ARCH__>=7
1175 ldr r2,[r1],#4 @ prefetch
1176# else
1177 ldrb r2,[r1,#3]
1178# endif
1179 eor r12,r10,r11 @ a^b, b^c in next round
1180#else
1181 ldr r2,[sp,#4*4] @ from future BODY_16_xx
1182 eor r12,r10,r11 @ a^b, b^c in next round
1183 ldr r1,[sp,#1*4] @ from future BODY_16_xx
1184#endif
1185 eor r0,r0,r10,ror#20 @ Sigma0(a)
1186 and r3,r3,r12 @ (b^c)&=(a^b)
1187 add r5,r5,r9 @ d+=h
1188 eor r3,r3,r11 @ Maj(a,b,c)
1189 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
1190 @ add r9,r9,r3 @ h+=Maj(a,b,c)
1191 @ ldr r2,[sp,#4*4] @ 19
1192 @ ldr r1,[sp,#1*4]
1193 mov r0,r2,ror#7
1194 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
1195 mov r3,r1,ror#17
1196 eor r0,r0,r2,ror#18
1197 eor r3,r3,r1,ror#19
1198 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1199 ldr r2,[sp,#3*4]
1200 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1201 ldr r1,[sp,#12*4]
1202
1203 add r3,r3,r0
1204 eor r0,r5,r5,ror#5 @ from BODY_00_15
1205 add r2,r2,r3
1206 eor r0,r0,r5,ror#19 @ Sigma1(e)
1207 add r2,r2,r1 @ X[i]
1208 ldr r3,[r14],#4 @ *K256++
1209 add r8,r8,r2 @ h+=X[i]
1210 str r2,[sp,#3*4]
1211 eor r2,r6,r7
1212 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
1213 and r2,r2,r5
1214 add r8,r8,r3 @ h+=K256[i]
1215 eor r2,r2,r7 @ Ch(e,f,g)
1216 eor r0,r9,r9,ror#11
1217 add r8,r8,r2 @ h+=Ch(e,f,g)
1218#if 19==31
1219 and r3,r3,#0xff
1220 cmp r3,#0xf2 @ done?
1221#endif
1222#if 19<15
1223# if __ARM_ARCH__>=7
1224 ldr r2,[r1],#4 @ prefetch
1225# else
1226 ldrb r2,[r1,#3]
1227# endif
1228 eor r3,r9,r10 @ a^b, b^c in next round
1229#else
1230 ldr r2,[sp,#5*4] @ from future BODY_16_xx
1231 eor r3,r9,r10 @ a^b, b^c in next round
1232 ldr r1,[sp,#2*4] @ from future BODY_16_xx
1233#endif
1234 eor r0,r0,r9,ror#20 @ Sigma0(a)
1235 and r12,r12,r3 @ (b^c)&=(a^b)
1236 add r4,r4,r8 @ d+=h
1237 eor r12,r12,r10 @ Maj(a,b,c)
1238 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
1239 @ add r8,r8,r12 @ h+=Maj(a,b,c)
1240 @ ldr r2,[sp,#5*4] @ 20
1241 @ ldr r1,[sp,#2*4]
1242 mov r0,r2,ror#7
1243 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
1244 mov r12,r1,ror#17
1245 eor r0,r0,r2,ror#18
1246 eor r12,r12,r1,ror#19
1247 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1248 ldr r2,[sp,#4*4]
1249 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1250 ldr r1,[sp,#13*4]
1251
1252 add r12,r12,r0
1253 eor r0,r4,r4,ror#5 @ from BODY_00_15
1254 add r2,r2,r12
1255 eor r0,r0,r4,ror#19 @ Sigma1(e)
1256 add r2,r2,r1 @ X[i]
1257 ldr r12,[r14],#4 @ *K256++
1258 add r7,r7,r2 @ h+=X[i]
1259 str r2,[sp,#4*4]
1260 eor r2,r5,r6
1261 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
1262 and r2,r2,r4
1263 add r7,r7,r12 @ h+=K256[i]
1264 eor r2,r2,r6 @ Ch(e,f,g)
1265 eor r0,r8,r8,ror#11
1266 add r7,r7,r2 @ h+=Ch(e,f,g)
1267#if 20==31
1268 and r12,r12,#0xff
1269 cmp r12,#0xf2 @ done?
1270#endif
1271#if 20<15
1272# if __ARM_ARCH__>=7
1273 ldr r2,[r1],#4 @ prefetch
1274# else
1275 ldrb r2,[r1,#3]
1276# endif
1277 eor r12,r8,r9 @ a^b, b^c in next round
1278#else
1279 ldr r2,[sp,#6*4] @ from future BODY_16_xx
1280 eor r12,r8,r9 @ a^b, b^c in next round
1281 ldr r1,[sp,#3*4] @ from future BODY_16_xx
1282#endif
1283 eor r0,r0,r8,ror#20 @ Sigma0(a)
1284 and r3,r3,r12 @ (b^c)&=(a^b)
1285 add r11,r11,r7 @ d+=h
1286 eor r3,r3,r9 @ Maj(a,b,c)
1287 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
1288 @ add r7,r7,r3 @ h+=Maj(a,b,c)
1289 @ ldr r2,[sp,#6*4] @ 21
1290 @ ldr r1,[sp,#3*4]
1291 mov r0,r2,ror#7
1292 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
1293 mov r3,r1,ror#17
1294 eor r0,r0,r2,ror#18
1295 eor r3,r3,r1,ror#19
1296 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1297 ldr r2,[sp,#5*4]
1298 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1299 ldr r1,[sp,#14*4]
1300
1301 add r3,r3,r0
1302 eor r0,r11,r11,ror#5 @ from BODY_00_15
1303 add r2,r2,r3
1304 eor r0,r0,r11,ror#19 @ Sigma1(e)
1305 add r2,r2,r1 @ X[i]
1306 ldr r3,[r14],#4 @ *K256++
1307 add r6,r6,r2 @ h+=X[i]
1308 str r2,[sp,#5*4]
1309 eor r2,r4,r5
1310 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
1311 and r2,r2,r11
1312 add r6,r6,r3 @ h+=K256[i]
1313 eor r2,r2,r5 @ Ch(e,f,g)
1314 eor r0,r7,r7,ror#11
1315 add r6,r6,r2 @ h+=Ch(e,f,g)
1316#if 21==31
1317 and r3,r3,#0xff
1318 cmp r3,#0xf2 @ done?
1319#endif
1320#if 21<15
1321# if __ARM_ARCH__>=7
1322 ldr r2,[r1],#4 @ prefetch
1323# else
1324 ldrb r2,[r1,#3]
1325# endif
1326 eor r3,r7,r8 @ a^b, b^c in next round
1327#else
1328 ldr r2,[sp,#7*4] @ from future BODY_16_xx
1329 eor r3,r7,r8 @ a^b, b^c in next round
1330 ldr r1,[sp,#4*4] @ from future BODY_16_xx
1331#endif
1332 eor r0,r0,r7,ror#20 @ Sigma0(a)
1333 and r12,r12,r3 @ (b^c)&=(a^b)
1334 add r10,r10,r6 @ d+=h
1335 eor r12,r12,r8 @ Maj(a,b,c)
1336 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
1337 @ add r6,r6,r12 @ h+=Maj(a,b,c)
1338 @ ldr r2,[sp,#7*4] @ 22
1339 @ ldr r1,[sp,#4*4]
1340 mov r0,r2,ror#7
1341 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
1342 mov r12,r1,ror#17
1343 eor r0,r0,r2,ror#18
1344 eor r12,r12,r1,ror#19
1345 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1346 ldr r2,[sp,#6*4]
1347 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1348 ldr r1,[sp,#15*4]
1349
1350 add r12,r12,r0
1351 eor r0,r10,r10,ror#5 @ from BODY_00_15
1352 add r2,r2,r12
1353 eor r0,r0,r10,ror#19 @ Sigma1(e)
1354 add r2,r2,r1 @ X[i]
1355 ldr r12,[r14],#4 @ *K256++
1356 add r5,r5,r2 @ h+=X[i]
1357 str r2,[sp,#6*4]
1358 eor r2,r11,r4
1359 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
1360 and r2,r2,r10
1361 add r5,r5,r12 @ h+=K256[i]
1362 eor r2,r2,r4 @ Ch(e,f,g)
1363 eor r0,r6,r6,ror#11
1364 add r5,r5,r2 @ h+=Ch(e,f,g)
1365#if 22==31
1366 and r12,r12,#0xff
1367 cmp r12,#0xf2 @ done?
1368#endif
1369#if 22<15
1370# if __ARM_ARCH__>=7
1371 ldr r2,[r1],#4 @ prefetch
1372# else
1373 ldrb r2,[r1,#3]
1374# endif
1375 eor r12,r6,r7 @ a^b, b^c in next round
1376#else
1377 ldr r2,[sp,#8*4] @ from future BODY_16_xx
1378 eor r12,r6,r7 @ a^b, b^c in next round
1379 ldr r1,[sp,#5*4] @ from future BODY_16_xx
1380#endif
1381 eor r0,r0,r6,ror#20 @ Sigma0(a)
1382 and r3,r3,r12 @ (b^c)&=(a^b)
1383 add r9,r9,r5 @ d+=h
1384 eor r3,r3,r7 @ Maj(a,b,c)
1385 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
1386 @ add r5,r5,r3 @ h+=Maj(a,b,c)
1387 @ ldr r2,[sp,#8*4] @ 23
1388 @ ldr r1,[sp,#5*4]
1389 mov r0,r2,ror#7
1390 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
1391 mov r3,r1,ror#17
1392 eor r0,r0,r2,ror#18
1393 eor r3,r3,r1,ror#19
1394 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1395 ldr r2,[sp,#7*4]
1396 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1397 ldr r1,[sp,#0*4]
1398
1399 add r3,r3,r0
1400 eor r0,r9,r9,ror#5 @ from BODY_00_15
1401 add r2,r2,r3
1402 eor r0,r0,r9,ror#19 @ Sigma1(e)
1403 add r2,r2,r1 @ X[i]
1404 ldr r3,[r14],#4 @ *K256++
1405 add r4,r4,r2 @ h+=X[i]
1406 str r2,[sp,#7*4]
1407 eor r2,r10,r11
1408 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
1409 and r2,r2,r9
1410 add r4,r4,r3 @ h+=K256[i]
1411 eor r2,r2,r11 @ Ch(e,f,g)
1412 eor r0,r5,r5,ror#11
1413 add r4,r4,r2 @ h+=Ch(e,f,g)
1414#if 23==31
1415 and r3,r3,#0xff
1416 cmp r3,#0xf2 @ done?
1417#endif
1418#if 23<15
1419# if __ARM_ARCH__>=7
1420 ldr r2,[r1],#4 @ prefetch
1421# else
1422 ldrb r2,[r1,#3]
1423# endif
1424 eor r3,r5,r6 @ a^b, b^c in next round
1425#else
1426 ldr r2,[sp,#9*4] @ from future BODY_16_xx
1427 eor r3,r5,r6 @ a^b, b^c in next round
1428 ldr r1,[sp,#6*4] @ from future BODY_16_xx
1429#endif
1430 eor r0,r0,r5,ror#20 @ Sigma0(a)
1431 and r12,r12,r3 @ (b^c)&=(a^b)
1432 add r8,r8,r4 @ d+=h
1433 eor r12,r12,r6 @ Maj(a,b,c)
1434 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
1435 @ add r4,r4,r12 @ h+=Maj(a,b,c)
1436 @ ldr r2,[sp,#9*4] @ 24
1437 @ ldr r1,[sp,#6*4]
1438 mov r0,r2,ror#7
1439 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
1440 mov r12,r1,ror#17
1441 eor r0,r0,r2,ror#18
1442 eor r12,r12,r1,ror#19
1443 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1444 ldr r2,[sp,#8*4]
1445 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1446 ldr r1,[sp,#1*4]
1447
1448 add r12,r12,r0
1449 eor r0,r8,r8,ror#5 @ from BODY_00_15
1450 add r2,r2,r12
1451 eor r0,r0,r8,ror#19 @ Sigma1(e)
1452 add r2,r2,r1 @ X[i]
1453 ldr r12,[r14],#4 @ *K256++
1454 add r11,r11,r2 @ h+=X[i]
1455 str r2,[sp,#8*4]
1456 eor r2,r9,r10
1457 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
1458 and r2,r2,r8
1459 add r11,r11,r12 @ h+=K256[i]
1460 eor r2,r2,r10 @ Ch(e,f,g)
1461 eor r0,r4,r4,ror#11
1462 add r11,r11,r2 @ h+=Ch(e,f,g)
1463#if 24==31
1464 and r12,r12,#0xff
1465 cmp r12,#0xf2 @ done?
1466#endif
1467#if 24<15
1468# if __ARM_ARCH__>=7
1469 ldr r2,[r1],#4 @ prefetch
1470# else
1471 ldrb r2,[r1,#3]
1472# endif
1473 eor r12,r4,r5 @ a^b, b^c in next round
1474#else
1475 ldr r2,[sp,#10*4] @ from future BODY_16_xx
1476 eor r12,r4,r5 @ a^b, b^c in next round
1477 ldr r1,[sp,#7*4] @ from future BODY_16_xx
1478#endif
1479 eor r0,r0,r4,ror#20 @ Sigma0(a)
1480 and r3,r3,r12 @ (b^c)&=(a^b)
1481 add r7,r7,r11 @ d+=h
1482 eor r3,r3,r5 @ Maj(a,b,c)
1483 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
1484 @ add r11,r11,r3 @ h+=Maj(a,b,c)
1485 @ ldr r2,[sp,#10*4] @ 25
1486 @ ldr r1,[sp,#7*4]
1487 mov r0,r2,ror#7
1488 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
1489 mov r3,r1,ror#17
1490 eor r0,r0,r2,ror#18
1491 eor r3,r3,r1,ror#19
1492 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1493 ldr r2,[sp,#9*4]
1494 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1495 ldr r1,[sp,#2*4]
1496
1497 add r3,r3,r0
1498 eor r0,r7,r7,ror#5 @ from BODY_00_15
1499 add r2,r2,r3
1500 eor r0,r0,r7,ror#19 @ Sigma1(e)
1501 add r2,r2,r1 @ X[i]
1502 ldr r3,[r14],#4 @ *K256++
1503 add r10,r10,r2 @ h+=X[i]
1504 str r2,[sp,#9*4]
1505 eor r2,r8,r9
1506 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
1507 and r2,r2,r7
1508 add r10,r10,r3 @ h+=K256[i]
1509 eor r2,r2,r9 @ Ch(e,f,g)
1510 eor r0,r11,r11,ror#11
1511 add r10,r10,r2 @ h+=Ch(e,f,g)
1512#if 25==31
1513 and r3,r3,#0xff
1514 cmp r3,#0xf2 @ done?
1515#endif
1516#if 25<15
1517# if __ARM_ARCH__>=7
1518 ldr r2,[r1],#4 @ prefetch
1519# else
1520 ldrb r2,[r1,#3]
1521# endif
1522 eor r3,r11,r4 @ a^b, b^c in next round
1523#else
1524 ldr r2,[sp,#11*4] @ from future BODY_16_xx
1525 eor r3,r11,r4 @ a^b, b^c in next round
1526 ldr r1,[sp,#8*4] @ from future BODY_16_xx
1527#endif
1528 eor r0,r0,r11,ror#20 @ Sigma0(a)
1529 and r12,r12,r3 @ (b^c)&=(a^b)
1530 add r6,r6,r10 @ d+=h
1531 eor r12,r12,r4 @ Maj(a,b,c)
1532 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
1533 @ add r10,r10,r12 @ h+=Maj(a,b,c)
1534 @ ldr r2,[sp,#11*4] @ 26
1535 @ ldr r1,[sp,#8*4]
1536 mov r0,r2,ror#7
1537 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
1538 mov r12,r1,ror#17
1539 eor r0,r0,r2,ror#18
1540 eor r12,r12,r1,ror#19
1541 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1542 ldr r2,[sp,#10*4]
1543 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1544 ldr r1,[sp,#3*4]
1545
1546 add r12,r12,r0
1547 eor r0,r6,r6,ror#5 @ from BODY_00_15
1548 add r2,r2,r12
1549 eor r0,r0,r6,ror#19 @ Sigma1(e)
1550 add r2,r2,r1 @ X[i]
1551 ldr r12,[r14],#4 @ *K256++
1552 add r9,r9,r2 @ h+=X[i]
1553 str r2,[sp,#10*4]
1554 eor r2,r7,r8
1555 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
1556 and r2,r2,r6
1557 add r9,r9,r12 @ h+=K256[i]
1558 eor r2,r2,r8 @ Ch(e,f,g)
1559 eor r0,r10,r10,ror#11
1560 add r9,r9,r2 @ h+=Ch(e,f,g)
1561#if 26==31
1562 and r12,r12,#0xff
1563 cmp r12,#0xf2 @ done?
1564#endif
1565#if 26<15
1566# if __ARM_ARCH__>=7
1567 ldr r2,[r1],#4 @ prefetch
1568# else
1569 ldrb r2,[r1,#3]
1570# endif
1571 eor r12,r10,r11 @ a^b, b^c in next round
1572#else
1573 ldr r2,[sp,#12*4] @ from future BODY_16_xx
1574 eor r12,r10,r11 @ a^b, b^c in next round
1575 ldr r1,[sp,#9*4] @ from future BODY_16_xx
1576#endif
1577 eor r0,r0,r10,ror#20 @ Sigma0(a)
1578 and r3,r3,r12 @ (b^c)&=(a^b)
1579 add r5,r5,r9 @ d+=h
1580 eor r3,r3,r11 @ Maj(a,b,c)
1581 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
1582 @ add r9,r9,r3 @ h+=Maj(a,b,c)
1583 @ ldr r2,[sp,#12*4] @ 27
1584 @ ldr r1,[sp,#9*4]
1585 mov r0,r2,ror#7
1586 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
1587 mov r3,r1,ror#17
1588 eor r0,r0,r2,ror#18
1589 eor r3,r3,r1,ror#19
1590 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1591 ldr r2,[sp,#11*4]
1592 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1593 ldr r1,[sp,#4*4]
1594
1595 add r3,r3,r0
1596 eor r0,r5,r5,ror#5 @ from BODY_00_15
1597 add r2,r2,r3
1598 eor r0,r0,r5,ror#19 @ Sigma1(e)
1599 add r2,r2,r1 @ X[i]
1600 ldr r3,[r14],#4 @ *K256++
1601 add r8,r8,r2 @ h+=X[i]
1602 str r2,[sp,#11*4]
1603 eor r2,r6,r7
1604 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
1605 and r2,r2,r5
1606 add r8,r8,r3 @ h+=K256[i]
1607 eor r2,r2,r7 @ Ch(e,f,g)
1608 eor r0,r9,r9,ror#11
1609 add r8,r8,r2 @ h+=Ch(e,f,g)
1610#if 27==31
1611 and r3,r3,#0xff
1612 cmp r3,#0xf2 @ done?
1613#endif
1614#if 27<15
1615# if __ARM_ARCH__>=7
1616 ldr r2,[r1],#4 @ prefetch
1617# else
1618 ldrb r2,[r1,#3]
1619# endif
1620 eor r3,r9,r10 @ a^b, b^c in next round
1621#else
1622 ldr r2,[sp,#13*4] @ from future BODY_16_xx
1623 eor r3,r9,r10 @ a^b, b^c in next round
1624 ldr r1,[sp,#10*4] @ from future BODY_16_xx
1625#endif
1626 eor r0,r0,r9,ror#20 @ Sigma0(a)
1627 and r12,r12,r3 @ (b^c)&=(a^b)
1628 add r4,r4,r8 @ d+=h
1629 eor r12,r12,r10 @ Maj(a,b,c)
1630 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
1631 @ add r8,r8,r12 @ h+=Maj(a,b,c)
1632 @ ldr r2,[sp,#13*4] @ 28
1633 @ ldr r1,[sp,#10*4]
1634 mov r0,r2,ror#7
1635 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
1636 mov r12,r1,ror#17
1637 eor r0,r0,r2,ror#18
1638 eor r12,r12,r1,ror#19
1639 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1640 ldr r2,[sp,#12*4]
1641 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1642 ldr r1,[sp,#5*4]
1643
1644 add r12,r12,r0
1645 eor r0,r4,r4,ror#5 @ from BODY_00_15
1646 add r2,r2,r12
1647 eor r0,r0,r4,ror#19 @ Sigma1(e)
1648 add r2,r2,r1 @ X[i]
1649 ldr r12,[r14],#4 @ *K256++
1650 add r7,r7,r2 @ h+=X[i]
1651 str r2,[sp,#12*4]
1652 eor r2,r5,r6
1653 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
1654 and r2,r2,r4
1655 add r7,r7,r12 @ h+=K256[i]
1656 eor r2,r2,r6 @ Ch(e,f,g)
1657 eor r0,r8,r8,ror#11
1658 add r7,r7,r2 @ h+=Ch(e,f,g)
1659#if 28==31
1660 and r12,r12,#0xff
1661 cmp r12,#0xf2 @ done?
1662#endif
1663#if 28<15
1664# if __ARM_ARCH__>=7
1665 ldr r2,[r1],#4 @ prefetch
1666# else
1667 ldrb r2,[r1,#3]
1668# endif
1669 eor r12,r8,r9 @ a^b, b^c in next round
1670#else
1671 ldr r2,[sp,#14*4] @ from future BODY_16_xx
1672 eor r12,r8,r9 @ a^b, b^c in next round
1673 ldr r1,[sp,#11*4] @ from future BODY_16_xx
1674#endif
1675 eor r0,r0,r8,ror#20 @ Sigma0(a)
1676 and r3,r3,r12 @ (b^c)&=(a^b)
1677 add r11,r11,r7 @ d+=h
1678 eor r3,r3,r9 @ Maj(a,b,c)
1679 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
1680 @ add r7,r7,r3 @ h+=Maj(a,b,c)
1681 @ ldr r2,[sp,#14*4] @ 29
1682 @ ldr r1,[sp,#11*4]
1683 mov r0,r2,ror#7
1684 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
1685 mov r3,r1,ror#17
1686 eor r0,r0,r2,ror#18
1687 eor r3,r3,r1,ror#19
1688 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1689 ldr r2,[sp,#13*4]
1690 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1691 ldr r1,[sp,#6*4]
1692
1693 add r3,r3,r0
1694 eor r0,r11,r11,ror#5 @ from BODY_00_15
1695 add r2,r2,r3
1696 eor r0,r0,r11,ror#19 @ Sigma1(e)
1697 add r2,r2,r1 @ X[i]
1698 ldr r3,[r14],#4 @ *K256++
1699 add r6,r6,r2 @ h+=X[i]
1700 str r2,[sp,#13*4]
1701 eor r2,r4,r5
1702 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
1703 and r2,r2,r11
1704 add r6,r6,r3 @ h+=K256[i]
1705 eor r2,r2,r5 @ Ch(e,f,g)
1706 eor r0,r7,r7,ror#11
1707 add r6,r6,r2 @ h+=Ch(e,f,g)
1708#if 29==31
1709 and r3,r3,#0xff
1710 cmp r3,#0xf2 @ done?
1711#endif
1712#if 29<15
1713# if __ARM_ARCH__>=7
1714 ldr r2,[r1],#4 @ prefetch
1715# else
1716 ldrb r2,[r1,#3]
1717# endif
1718 eor r3,r7,r8 @ a^b, b^c in next round
1719#else
1720 ldr r2,[sp,#15*4] @ from future BODY_16_xx
1721 eor r3,r7,r8 @ a^b, b^c in next round
1722 ldr r1,[sp,#12*4] @ from future BODY_16_xx
1723#endif
1724 eor r0,r0,r7,ror#20 @ Sigma0(a)
1725 and r12,r12,r3 @ (b^c)&=(a^b)
1726 add r10,r10,r6 @ d+=h
1727 eor r12,r12,r8 @ Maj(a,b,c)
1728 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
1729 @ add r6,r6,r12 @ h+=Maj(a,b,c)
1730 @ ldr r2,[sp,#15*4] @ 30
1731 @ ldr r1,[sp,#12*4]
1732 mov r0,r2,ror#7
1733 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
1734 mov r12,r1,ror#17
1735 eor r0,r0,r2,ror#18
1736 eor r12,r12,r1,ror#19
1737 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1738 ldr r2,[sp,#14*4]
1739 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1740 ldr r1,[sp,#7*4]
1741
1742 add r12,r12,r0
1743 eor r0,r10,r10,ror#5 @ from BODY_00_15
1744 add r2,r2,r12
1745 eor r0,r0,r10,ror#19 @ Sigma1(e)
1746 add r2,r2,r1 @ X[i]
1747 ldr r12,[r14],#4 @ *K256++
1748 add r5,r5,r2 @ h+=X[i]
1749 str r2,[sp,#14*4]
1750 eor r2,r11,r4
1751 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
1752 and r2,r2,r10
1753 add r5,r5,r12 @ h+=K256[i]
1754 eor r2,r2,r4 @ Ch(e,f,g)
1755 eor r0,r6,r6,ror#11
1756 add r5,r5,r2 @ h+=Ch(e,f,g)
1757#if 30==31
1758 and r12,r12,#0xff
1759 cmp r12,#0xf2 @ done?
1760#endif
1761#if 30<15
1762# if __ARM_ARCH__>=7
1763 ldr r2,[r1],#4 @ prefetch
1764# else
1765 ldrb r2,[r1,#3]
1766# endif
1767 eor r12,r6,r7 @ a^b, b^c in next round
1768#else
1769 ldr r2,[sp,#0*4] @ from future BODY_16_xx
1770 eor r12,r6,r7 @ a^b, b^c in next round
1771 ldr r1,[sp,#13*4] @ from future BODY_16_xx
1772#endif
1773 eor r0,r0,r6,ror#20 @ Sigma0(a)
1774 and r3,r3,r12 @ (b^c)&=(a^b)
1775 add r9,r9,r5 @ d+=h
1776 eor r3,r3,r7 @ Maj(a,b,c)
1777 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
1778 @ add r5,r5,r3 @ h+=Maj(a,b,c)
1779 @ ldr r2,[sp,#0*4] @ 31
1780 @ ldr r1,[sp,#13*4]
1781 mov r0,r2,ror#7
1782 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
1783 mov r3,r1,ror#17
1784 eor r0,r0,r2,ror#18
1785 eor r3,r3,r1,ror#19
1786 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1787 ldr r2,[sp,#15*4]
1788 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1789 ldr r1,[sp,#8*4]
1790
1791 add r3,r3,r0
1792 eor r0,r9,r9,ror#5 @ from BODY_00_15
1793 add r2,r2,r3
1794 eor r0,r0,r9,ror#19 @ Sigma1(e)
1795 add r2,r2,r1 @ X[i]
1796 ldr r3,[r14],#4 @ *K256++
1797 add r4,r4,r2 @ h+=X[i]
1798 str r2,[sp,#15*4]
1799 eor r2,r10,r11
1800 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
1801 and r2,r2,r9
1802 add r4,r4,r3 @ h+=K256[i]
1803 eor r2,r2,r11 @ Ch(e,f,g)
1804 eor r0,r5,r5,ror#11
1805 add r4,r4,r2 @ h+=Ch(e,f,g)
1806#if 31==31
1807 and r3,r3,#0xff
1808 cmp r3,#0xf2 @ done?
1809#endif
1810#if 31<15
1811# if __ARM_ARCH__>=7
1812 ldr r2,[r1],#4 @ prefetch
1813# else
1814 ldrb r2,[r1,#3]
1815# endif
1816 eor r3,r5,r6 @ a^b, b^c in next round
1817#else
1818 ldr r2,[sp,#1*4] @ from future BODY_16_xx
1819 eor r3,r5,r6 @ a^b, b^c in next round
1820 ldr r1,[sp,#14*4] @ from future BODY_16_xx
1821#endif
1822 eor r0,r0,r5,ror#20 @ Sigma0(a)
1823 and r12,r12,r3 @ (b^c)&=(a^b)
1824 add r8,r8,r4 @ d+=h
1825 eor r12,r12,r6 @ Maj(a,b,c)
1826 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
1827 @ add r4,r4,r12 @ h+=Maj(a,b,c)
1828#if __ARM_ARCH__>=7
1829 ite eq @ Thumb2 thing, sanity check in ARM
1830#endif
1831 ldreq r3,[sp,#16*4] @ pull ctx
1832 bne .Lrounds_16_xx
1833
1834 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
1835 ldr r0,[r3,#0]
1836 ldr r2,[r3,#4]
1837 ldr r12,[r3,#8]
1838 add r4,r4,r0
1839 ldr r0,[r3,#12]
1840 add r5,r5,r2
1841 ldr r2,[r3,#16]
1842 add r6,r6,r12
1843 ldr r12,[r3,#20]
1844 add r7,r7,r0
1845 ldr r0,[r3,#24]
1846 add r8,r8,r2
1847 ldr r2,[r3,#28]
1848 add r9,r9,r12
1849 ldr r1,[sp,#17*4] @ pull inp
1850 ldr r12,[sp,#18*4] @ pull inp+len
1851 add r10,r10,r0
1852 add r11,r11,r2
1853 stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1854 cmp r1,r12
1855 sub r14,r14,#256 @ rewind Ktbl
1856 bne .Loop
1857
1858 add sp,sp,#19*4 @ destroy frame
1859#if __ARM_ARCH__>=5
1860 ldmia sp!,{r4-r11,pc}
1861#else
1862 ldmia sp!,{r4-r11,lr}
1863 tst lr,#1
1864 moveq pc,lr @ be binary compatible with V4, yet
1865 .word 0xe12fff1e @ interoperable with Thumb ISA:-)
1866#endif
1867.size sha256_block_data_order,.-sha256_block_data_order
1868#if __ARM_MAX_ARCH__>=7
1869.arch armv7-a
1870.fpu neon
1871
1872.global sha256_block_data_order_neon
1873.type sha256_block_data_order_neon,%function
1874.align 4
1875sha256_block_data_order_neon:
1876.LNEON:
1877 stmdb sp!,{r4-r12,lr}
1878
1879 sub r11,sp,#16*4+16
1880 adrl r14,K256
1881 bic r11,r11,#15 @ align for 128-bit stores
1882 mov r12,sp
1883 mov sp,r11 @ alloca
1884 add r2,r1,r2,lsl#6 @ len to point at the end of inp
1885
1886 vld1.8 {q0},[r1]!
1887 vld1.8 {q1},[r1]!
1888 vld1.8 {q2},[r1]!
1889 vld1.8 {q3},[r1]!
1890 vld1.32 {q8},[r14,:128]!
1891 vld1.32 {q9},[r14,:128]!
1892 vld1.32 {q10},[r14,:128]!
1893 vld1.32 {q11},[r14,:128]!
1894 vrev32.8 q0,q0 @ yes, even on
1895 str r0,[sp,#64]
1896 vrev32.8 q1,q1 @ big-endian
1897 str r1,[sp,#68]
1898 mov r1,sp
1899 vrev32.8 q2,q2
1900 str r2,[sp,#72]
1901 vrev32.8 q3,q3
1902 str r12,[sp,#76] @ save original sp
1903 vadd.i32 q8,q8,q0
1904 vadd.i32 q9,q9,q1
1905 vst1.32 {q8},[r1,:128]!
1906 vadd.i32 q10,q10,q2
1907 vst1.32 {q9},[r1,:128]!
1908 vadd.i32 q11,q11,q3
1909 vst1.32 {q10},[r1,:128]!
1910 vst1.32 {q11},[r1,:128]!
1911
1912 ldmia r0,{r4-r11}
1913 sub r1,r1,#64
1914 ldr r2,[sp,#0]
1915 eor r12,r12,r12
1916 eor r3,r5,r6
1917 b .L_00_48
1918
1919.align 4
1920.L_00_48:
1921 vext.8 q8,q0,q1,#4
1922 add r11,r11,r2
1923 eor r2,r9,r10
1924 eor r0,r8,r8,ror#5
1925 vext.8 q9,q2,q3,#4
1926 add r4,r4,r12
1927 and r2,r2,r8
1928 eor r12,r0,r8,ror#19
1929 vshr.u32 q10,q8,#7
1930 eor r0,r4,r4,ror#11
1931 eor r2,r2,r10
1932 vadd.i32 q0,q0,q9
1933 add r11,r11,r12,ror#6
1934 eor r12,r4,r5
1935 vshr.u32 q9,q8,#3
1936 eor r0,r0,r4,ror#20
1937 add r11,r11,r2
1938 vsli.32 q10,q8,#25
1939 ldr r2,[sp,#4]
1940 and r3,r3,r12
1941 vshr.u32 q11,q8,#18
1942 add r7,r7,r11
1943 add r11,r11,r0,ror#2
1944 eor r3,r3,r5
1945 veor q9,q9,q10
1946 add r10,r10,r2
1947 vsli.32 q11,q8,#14
1948 eor r2,r8,r9
1949 eor r0,r7,r7,ror#5
1950 vshr.u32 d24,d7,#17
1951 add r11,r11,r3
1952 and r2,r2,r7
1953 veor q9,q9,q11
1954 eor r3,r0,r7,ror#19
1955 eor r0,r11,r11,ror#11
1956 vsli.32 d24,d7,#15
1957 eor r2,r2,r9
1958 add r10,r10,r3,ror#6
1959 vshr.u32 d25,d7,#10
1960 eor r3,r11,r4
1961 eor r0,r0,r11,ror#20
1962 vadd.i32 q0,q0,q9
1963 add r10,r10,r2
1964 ldr r2,[sp,#8]
1965 veor d25,d25,d24
1966 and r12,r12,r3
1967 add r6,r6,r10
1968 vshr.u32 d24,d7,#19
1969 add r10,r10,r0,ror#2
1970 eor r12,r12,r4
1971 vsli.32 d24,d7,#13
1972 add r9,r9,r2
1973 eor r2,r7,r8
1974 veor d25,d25,d24
1975 eor r0,r6,r6,ror#5
1976 add r10,r10,r12
1977 vadd.i32 d0,d0,d25
1978 and r2,r2,r6
1979 eor r12,r0,r6,ror#19
1980 vshr.u32 d24,d0,#17
1981 eor r0,r10,r10,ror#11
1982 eor r2,r2,r8
1983 vsli.32 d24,d0,#15
1984 add r9,r9,r12,ror#6
1985 eor r12,r10,r11
1986 vshr.u32 d25,d0,#10
1987 eor r0,r0,r10,ror#20
1988 add r9,r9,r2
1989 veor d25,d25,d24
1990 ldr r2,[sp,#12]
1991 and r3,r3,r12
1992 vshr.u32 d24,d0,#19
1993 add r5,r5,r9
1994 add r9,r9,r0,ror#2
1995 eor r3,r3,r11
1996 vld1.32 {q8},[r14,:128]!
1997 add r8,r8,r2
1998 vsli.32 d24,d0,#13
1999 eor r2,r6,r7
2000 eor r0,r5,r5,ror#5
2001 veor d25,d25,d24
2002 add r9,r9,r3
2003 and r2,r2,r5
2004 vadd.i32 d1,d1,d25
2005 eor r3,r0,r5,ror#19
2006 eor r0,r9,r9,ror#11
2007 vadd.i32 q8,q8,q0
2008 eor r2,r2,r7
2009 add r8,r8,r3,ror#6
2010 eor r3,r9,r10
2011 eor r0,r0,r9,ror#20
2012 add r8,r8,r2
2013 ldr r2,[sp,#16]
2014 and r12,r12,r3
2015 add r4,r4,r8
2016 vst1.32 {q8},[r1,:128]!
2017 add r8,r8,r0,ror#2
2018 eor r12,r12,r10
2019 vext.8 q8,q1,q2,#4
2020 add r7,r7,r2
2021 eor r2,r5,r6
2022 eor r0,r4,r4,ror#5
2023 vext.8 q9,q3,q0,#4
2024 add r8,r8,r12
2025 and r2,r2,r4
2026 eor r12,r0,r4,ror#19
2027 vshr.u32 q10,q8,#7
2028 eor r0,r8,r8,ror#11
2029 eor r2,r2,r6
2030 vadd.i32 q1,q1,q9
2031 add r7,r7,r12,ror#6
2032 eor r12,r8,r9
2033 vshr.u32 q9,q8,#3
2034 eor r0,r0,r8,ror#20
2035 add r7,r7,r2
2036 vsli.32 q10,q8,#25
2037 ldr r2,[sp,#20]
2038 and r3,r3,r12
2039 vshr.u32 q11,q8,#18
2040 add r11,r11,r7
2041 add r7,r7,r0,ror#2
2042 eor r3,r3,r9
2043 veor q9,q9,q10
2044 add r6,r6,r2
2045 vsli.32 q11,q8,#14
2046 eor r2,r4,r5
2047 eor r0,r11,r11,ror#5
2048 vshr.u32 d24,d1,#17
2049 add r7,r7,r3
2050 and r2,r2,r11
2051 veor q9,q9,q11
2052 eor r3,r0,r11,ror#19
2053 eor r0,r7,r7,ror#11
2054 vsli.32 d24,d1,#15
2055 eor r2,r2,r5
2056 add r6,r6,r3,ror#6
2057 vshr.u32 d25,d1,#10
2058 eor r3,r7,r8
2059 eor r0,r0,r7,ror#20
2060 vadd.i32 q1,q1,q9
2061 add r6,r6,r2
2062 ldr r2,[sp,#24]
2063 veor d25,d25,d24
2064 and r12,r12,r3
2065 add r10,r10,r6
2066 vshr.u32 d24,d1,#19
2067 add r6,r6,r0,ror#2
2068 eor r12,r12,r8
2069 vsli.32 d24,d1,#13
2070 add r5,r5,r2
2071 eor r2,r11,r4
2072 veor d25,d25,d24
2073 eor r0,r10,r10,ror#5
2074 add r6,r6,r12
2075 vadd.i32 d2,d2,d25
2076 and r2,r2,r10
2077 eor r12,r0,r10,ror#19
2078 vshr.u32 d24,d2,#17
2079 eor r0,r6,r6,ror#11
2080 eor r2,r2,r4
2081 vsli.32 d24,d2,#15
2082 add r5,r5,r12,ror#6
2083 eor r12,r6,r7
2084 vshr.u32 d25,d2,#10
2085 eor r0,r0,r6,ror#20
2086 add r5,r5,r2
2087 veor d25,d25,d24
2088 ldr r2,[sp,#28]
2089 and r3,r3,r12
2090 vshr.u32 d24,d2,#19
2091 add r9,r9,r5
2092 add r5,r5,r0,ror#2
2093 eor r3,r3,r7
2094 vld1.32 {q8},[r14,:128]!
2095 add r4,r4,r2
2096 vsli.32 d24,d2,#13
2097 eor r2,r10,r11
2098 eor r0,r9,r9,ror#5
2099 veor d25,d25,d24
2100 add r5,r5,r3
2101 and r2,r2,r9
2102 vadd.i32 d3,d3,d25
2103 eor r3,r0,r9,ror#19
2104 eor r0,r5,r5,ror#11
2105 vadd.i32 q8,q8,q1
2106 eor r2,r2,r11
2107 add r4,r4,r3,ror#6
2108 eor r3,r5,r6
2109 eor r0,r0,r5,ror#20
2110 add r4,r4,r2
2111 ldr r2,[sp,#32]
2112 and r12,r12,r3
2113 add r8,r8,r4
2114 vst1.32 {q8},[r1,:128]!
2115 add r4,r4,r0,ror#2
2116 eor r12,r12,r6
2117 vext.8 q8,q2,q3,#4
2118 add r11,r11,r2
2119 eor r2,r9,r10
2120 eor r0,r8,r8,ror#5
2121 vext.8 q9,q0,q1,#4
2122 add r4,r4,r12
2123 and r2,r2,r8
2124 eor r12,r0,r8,ror#19
2125 vshr.u32 q10,q8,#7
2126 eor r0,r4,r4,ror#11
2127 eor r2,r2,r10
2128 vadd.i32 q2,q2,q9
2129 add r11,r11,r12,ror#6
2130 eor r12,r4,r5
2131 vshr.u32 q9,q8,#3
2132 eor r0,r0,r4,ror#20
2133 add r11,r11,r2
2134 vsli.32 q10,q8,#25
2135 ldr r2,[sp,#36]
2136 and r3,r3,r12
2137 vshr.u32 q11,q8,#18
2138 add r7,r7,r11
2139 add r11,r11,r0,ror#2
2140 eor r3,r3,r5
2141 veor q9,q9,q10
2142 add r10,r10,r2
2143 vsli.32 q11,q8,#14
2144 eor r2,r8,r9
2145 eor r0,r7,r7,ror#5
2146 vshr.u32 d24,d3,#17
2147 add r11,r11,r3
2148 and r2,r2,r7
2149 veor q9,q9,q11
2150 eor r3,r0,r7,ror#19
2151 eor r0,r11,r11,ror#11
2152 vsli.32 d24,d3,#15
2153 eor r2,r2,r9
2154 add r10,r10,r3,ror#6
2155 vshr.u32 d25,d3,#10
2156 eor r3,r11,r4
2157 eor r0,r0,r11,ror#20
2158 vadd.i32 q2,q2,q9
2159 add r10,r10,r2
2160 ldr r2,[sp,#40]
2161 veor d25,d25,d24
2162 and r12,r12,r3
2163 add r6,r6,r10
2164 vshr.u32 d24,d3,#19
2165 add r10,r10,r0,ror#2
2166 eor r12,r12,r4
2167 vsli.32 d24,d3,#13
2168 add r9,r9,r2
2169 eor r2,r7,r8
2170 veor d25,d25,d24
2171 eor r0,r6,r6,ror#5
2172 add r10,r10,r12
2173 vadd.i32 d4,d4,d25
2174 and r2,r2,r6
2175 eor r12,r0,r6,ror#19
2176 vshr.u32 d24,d4,#17
2177 eor r0,r10,r10,ror#11
2178 eor r2,r2,r8
2179 vsli.32 d24,d4,#15
2180 add r9,r9,r12,ror#6
2181 eor r12,r10,r11
2182 vshr.u32 d25,d4,#10
2183 eor r0,r0,r10,ror#20
2184 add r9,r9,r2
2185 veor d25,d25,d24
2186 ldr r2,[sp,#44]
2187 and r3,r3,r12
2188 vshr.u32 d24,d4,#19
2189 add r5,r5,r9
2190 add r9,r9,r0,ror#2
2191 eor r3,r3,r11
2192 vld1.32 {q8},[r14,:128]!
2193 add r8,r8,r2
2194 vsli.32 d24,d4,#13
2195 eor r2,r6,r7
2196 eor r0,r5,r5,ror#5
2197 veor d25,d25,d24
2198 add r9,r9,r3
2199 and r2,r2,r5
2200 vadd.i32 d5,d5,d25
2201 eor r3,r0,r5,ror#19
2202 eor r0,r9,r9,ror#11
2203 vadd.i32 q8,q8,q2
2204 eor r2,r2,r7
2205 add r8,r8,r3,ror#6
2206 eor r3,r9,r10
2207 eor r0,r0,r9,ror#20
2208 add r8,r8,r2
2209 ldr r2,[sp,#48]
2210 and r12,r12,r3
2211 add r4,r4,r8
2212 vst1.32 {q8},[r1,:128]!
2213 add r8,r8,r0,ror#2
2214 eor r12,r12,r10
2215 vext.8 q8,q3,q0,#4
2216 add r7,r7,r2
2217 eor r2,r5,r6
2218 eor r0,r4,r4,ror#5
2219 vext.8 q9,q1,q2,#4
2220 add r8,r8,r12
2221 and r2,r2,r4
2222 eor r12,r0,r4,ror#19
2223 vshr.u32 q10,q8,#7
2224 eor r0,r8,r8,ror#11
2225 eor r2,r2,r6
2226 vadd.i32 q3,q3,q9
2227 add r7,r7,r12,ror#6
2228 eor r12,r8,r9
2229 vshr.u32 q9,q8,#3
2230 eor r0,r0,r8,ror#20
2231 add r7,r7,r2
2232 vsli.32 q10,q8,#25
2233 ldr r2,[sp,#52]
2234 and r3,r3,r12
2235 vshr.u32 q11,q8,#18
2236 add r11,r11,r7
2237 add r7,r7,r0,ror#2
2238 eor r3,r3,r9
2239 veor q9,q9,q10
2240 add r6,r6,r2
2241 vsli.32 q11,q8,#14
2242 eor r2,r4,r5
2243 eor r0,r11,r11,ror#5
2244 vshr.u32 d24,d5,#17
2245 add r7,r7,r3
2246 and r2,r2,r11
2247 veor q9,q9,q11
2248 eor r3,r0,r11,ror#19
2249 eor r0,r7,r7,ror#11
2250 vsli.32 d24,d5,#15
2251 eor r2,r2,r5
2252 add r6,r6,r3,ror#6
2253 vshr.u32 d25,d5,#10
2254 eor r3,r7,r8
2255 eor r0,r0,r7,ror#20
2256 vadd.i32 q3,q3,q9
2257 add r6,r6,r2
2258 ldr r2,[sp,#56]
2259 veor d25,d25,d24
2260 and r12,r12,r3
2261 add r10,r10,r6
2262 vshr.u32 d24,d5,#19
2263 add r6,r6,r0,ror#2
2264 eor r12,r12,r8
2265 vsli.32 d24,d5,#13
2266 add r5,r5,r2
2267 eor r2,r11,r4
2268 veor d25,d25,d24
2269 eor r0,r10,r10,ror#5
2270 add r6,r6,r12
2271 vadd.i32 d6,d6,d25
2272 and r2,r2,r10
2273 eor r12,r0,r10,ror#19
2274 vshr.u32 d24,d6,#17
2275 eor r0,r6,r6,ror#11
2276 eor r2,r2,r4
2277 vsli.32 d24,d6,#15
2278 add r5,r5,r12,ror#6
2279 eor r12,r6,r7
2280 vshr.u32 d25,d6,#10
2281 eor r0,r0,r6,ror#20
2282 add r5,r5,r2
2283 veor d25,d25,d24
2284 ldr r2,[sp,#60]
2285 and r3,r3,r12
2286 vshr.u32 d24,d6,#19
2287 add r9,r9,r5
2288 add r5,r5,r0,ror#2
2289 eor r3,r3,r7
2290 vld1.32 {q8},[r14,:128]!
2291 add r4,r4,r2
2292 vsli.32 d24,d6,#13
2293 eor r2,r10,r11
2294 eor r0,r9,r9,ror#5
2295 veor d25,d25,d24
2296 add r5,r5,r3
2297 and r2,r2,r9
2298 vadd.i32 d7,d7,d25
2299 eor r3,r0,r9,ror#19
2300 eor r0,r5,r5,ror#11
2301 vadd.i32 q8,q8,q3
2302 eor r2,r2,r11
2303 add r4,r4,r3,ror#6
2304 eor r3,r5,r6
2305 eor r0,r0,r5,ror#20
2306 add r4,r4,r2
2307 ldr r2,[r14]
2308 and r12,r12,r3
2309 add r8,r8,r4
2310 vst1.32 {q8},[r1,:128]!
2311 add r4,r4,r0,ror#2
2312 eor r12,r12,r6
2313 teq r2,#0 @ check for K256 terminator
2314 ldr r2,[sp,#0]
2315 sub r1,r1,#64
2316 bne .L_00_48
2317
2318 ldr r1,[sp,#68]
2319 ldr r0,[sp,#72]
2320 sub r14,r14,#256 @ rewind r14
2321 teq r1,r0
2322 it eq
2323 subeq r1,r1,#64 @ avoid SEGV
2324 vld1.8 {q0},[r1]! @ load next input block
2325 vld1.8 {q1},[r1]!
2326 vld1.8 {q2},[r1]!
2327 vld1.8 {q3},[r1]!
2328 it ne
2329 strne r1,[sp,#68]
2330 mov r1,sp
2331 add r11,r11,r2
2332 eor r2,r9,r10
2333 eor r0,r8,r8,ror#5
2334 add r4,r4,r12
2335 vld1.32 {q8},[r14,:128]!
2336 and r2,r2,r8
2337 eor r12,r0,r8,ror#19
2338 eor r0,r4,r4,ror#11
2339 eor r2,r2,r10
2340 vrev32.8 q0,q0
2341 add r11,r11,r12,ror#6
2342 eor r12,r4,r5
2343 eor r0,r0,r4,ror#20
2344 add r11,r11,r2
2345 vadd.i32 q8,q8,q0
2346 ldr r2,[sp,#4]
2347 and r3,r3,r12
2348 add r7,r7,r11
2349 add r11,r11,r0,ror#2
2350 eor r3,r3,r5
2351 add r10,r10,r2
2352 eor r2,r8,r9
2353 eor r0,r7,r7,ror#5
2354 add r11,r11,r3
2355 and r2,r2,r7
2356 eor r3,r0,r7,ror#19
2357 eor r0,r11,r11,ror#11
2358 eor r2,r2,r9
2359 add r10,r10,r3,ror#6
2360 eor r3,r11,r4
2361 eor r0,r0,r11,ror#20
2362 add r10,r10,r2
2363 ldr r2,[sp,#8]
2364 and r12,r12,r3
2365 add r6,r6,r10
2366 add r10,r10,r0,ror#2
2367 eor r12,r12,r4
2368 add r9,r9,r2
2369 eor r2,r7,r8
2370 eor r0,r6,r6,ror#5
2371 add r10,r10,r12
2372 and r2,r2,r6
2373 eor r12,r0,r6,ror#19
2374 eor r0,r10,r10,ror#11
2375 eor r2,r2,r8
2376 add r9,r9,r12,ror#6
2377 eor r12,r10,r11
2378 eor r0,r0,r10,ror#20
2379 add r9,r9,r2
2380 ldr r2,[sp,#12]
2381 and r3,r3,r12
2382 add r5,r5,r9
2383 add r9,r9,r0,ror#2
2384 eor r3,r3,r11
2385 add r8,r8,r2
2386 eor r2,r6,r7
2387 eor r0,r5,r5,ror#5
2388 add r9,r9,r3
2389 and r2,r2,r5
2390 eor r3,r0,r5,ror#19
2391 eor r0,r9,r9,ror#11
2392 eor r2,r2,r7
2393 add r8,r8,r3,ror#6
2394 eor r3,r9,r10
2395 eor r0,r0,r9,ror#20
2396 add r8,r8,r2
2397 ldr r2,[sp,#16]
2398 and r12,r12,r3
2399 add r4,r4,r8
2400 add r8,r8,r0,ror#2
2401 eor r12,r12,r10
2402 vst1.32 {q8},[r1,:128]!
2403 add r7,r7,r2
2404 eor r2,r5,r6
2405 eor r0,r4,r4,ror#5
2406 add r8,r8,r12
2407 vld1.32 {q8},[r14,:128]!
2408 and r2,r2,r4
2409 eor r12,r0,r4,ror#19
2410 eor r0,r8,r8,ror#11
2411 eor r2,r2,r6
2412 vrev32.8 q1,q1
2413 add r7,r7,r12,ror#6
2414 eor r12,r8,r9
2415 eor r0,r0,r8,ror#20
2416 add r7,r7,r2
2417 vadd.i32 q8,q8,q1
2418 ldr r2,[sp,#20]
2419 and r3,r3,r12
2420 add r11,r11,r7
2421 add r7,r7,r0,ror#2
2422 eor r3,r3,r9
2423 add r6,r6,r2
2424 eor r2,r4,r5
2425 eor r0,r11,r11,ror#5
2426 add r7,r7,r3
2427 and r2,r2,r11
2428 eor r3,r0,r11,ror#19
2429 eor r0,r7,r7,ror#11
2430 eor r2,r2,r5
2431 add r6,r6,r3,ror#6
2432 eor r3,r7,r8
2433 eor r0,r0,r7,ror#20
2434 add r6,r6,r2
2435 ldr r2,[sp,#24]
2436 and r12,r12,r3
2437 add r10,r10,r6
2438 add r6,r6,r0,ror#2
2439 eor r12,r12,r8
2440 add r5,r5,r2
2441 eor r2,r11,r4
2442 eor r0,r10,r10,ror#5
2443 add r6,r6,r12
2444 and r2,r2,r10
2445 eor r12,r0,r10,ror#19
2446 eor r0,r6,r6,ror#11
2447 eor r2,r2,r4
2448 add r5,r5,r12,ror#6
2449 eor r12,r6,r7
2450 eor r0,r0,r6,ror#20
2451 add r5,r5,r2
2452 ldr r2,[sp,#28]
2453 and r3,r3,r12
2454 add r9,r9,r5
2455 add r5,r5,r0,ror#2
2456 eor r3,r3,r7
2457 add r4,r4,r2
2458 eor r2,r10,r11
2459 eor r0,r9,r9,ror#5
2460 add r5,r5,r3
2461 and r2,r2,r9
2462 eor r3,r0,r9,ror#19
2463 eor r0,r5,r5,ror#11
2464 eor r2,r2,r11
2465 add r4,r4,r3,ror#6
2466 eor r3,r5,r6
2467 eor r0,r0,r5,ror#20
2468 add r4,r4,r2
2469 ldr r2,[sp,#32]
2470 and r12,r12,r3
2471 add r8,r8,r4
2472 add r4,r4,r0,ror#2
2473 eor r12,r12,r6
2474 vst1.32 {q8},[r1,:128]!
2475 add r11,r11,r2
2476 eor r2,r9,r10
2477 eor r0,r8,r8,ror#5
2478 add r4,r4,r12
2479 vld1.32 {q8},[r14,:128]!
2480 and r2,r2,r8
2481 eor r12,r0,r8,ror#19
2482 eor r0,r4,r4,ror#11
2483 eor r2,r2,r10
2484 vrev32.8 q2,q2
2485 add r11,r11,r12,ror#6
2486 eor r12,r4,r5
2487 eor r0,r0,r4,ror#20
2488 add r11,r11,r2
2489 vadd.i32 q8,q8,q2
2490 ldr r2,[sp,#36]
2491 and r3,r3,r12
2492 add r7,r7,r11
2493 add r11,r11,r0,ror#2
2494 eor r3,r3,r5
2495 add r10,r10,r2
2496 eor r2,r8,r9
2497 eor r0,r7,r7,ror#5
2498 add r11,r11,r3
2499 and r2,r2,r7
2500 eor r3,r0,r7,ror#19
2501 eor r0,r11,r11,ror#11
2502 eor r2,r2,r9
2503 add r10,r10,r3,ror#6
2504 eor r3,r11,r4
2505 eor r0,r0,r11,ror#20
2506 add r10,r10,r2
2507 ldr r2,[sp,#40]
2508 and r12,r12,r3
2509 add r6,r6,r10
2510 add r10,r10,r0,ror#2
2511 eor r12,r12,r4
2512 add r9,r9,r2
2513 eor r2,r7,r8
2514 eor r0,r6,r6,ror#5
2515 add r10,r10,r12
2516 and r2,r2,r6
2517 eor r12,r0,r6,ror#19
2518 eor r0,r10,r10,ror#11
2519 eor r2,r2,r8
2520 add r9,r9,r12,ror#6
2521 eor r12,r10,r11
2522 eor r0,r0,r10,ror#20
2523 add r9,r9,r2
2524 ldr r2,[sp,#44]
2525 and r3,r3,r12
2526 add r5,r5,r9
2527 add r9,r9,r0,ror#2
2528 eor r3,r3,r11
2529 add r8,r8,r2
2530 eor r2,r6,r7
2531 eor r0,r5,r5,ror#5
2532 add r9,r9,r3
2533 and r2,r2,r5
2534 eor r3,r0,r5,ror#19
2535 eor r0,r9,r9,ror#11
2536 eor r2,r2,r7
2537 add r8,r8,r3,ror#6
2538 eor r3,r9,r10
2539 eor r0,r0,r9,ror#20
2540 add r8,r8,r2
2541 ldr r2,[sp,#48]
2542 and r12,r12,r3
2543 add r4,r4,r8
2544 add r8,r8,r0,ror#2
2545 eor r12,r12,r10
2546 vst1.32 {q8},[r1,:128]!
2547 add r7,r7,r2
2548 eor r2,r5,r6
2549 eor r0,r4,r4,ror#5
2550 add r8,r8,r12
2551 vld1.32 {q8},[r14,:128]!
2552 and r2,r2,r4
2553 eor r12,r0,r4,ror#19
2554 eor r0,r8,r8,ror#11
2555 eor r2,r2,r6
2556 vrev32.8 q3,q3
2557 add r7,r7,r12,ror#6
2558 eor r12,r8,r9
2559 eor r0,r0,r8,ror#20
2560 add r7,r7,r2
2561 vadd.i32 q8,q8,q3
2562 ldr r2,[sp,#52]
2563 and r3,r3,r12
2564 add r11,r11,r7
2565 add r7,r7,r0,ror#2
2566 eor r3,r3,r9
2567 add r6,r6,r2
2568 eor r2,r4,r5
2569 eor r0,r11,r11,ror#5
2570 add r7,r7,r3
2571 and r2,r2,r11
2572 eor r3,r0,r11,ror#19
2573 eor r0,r7,r7,ror#11
2574 eor r2,r2,r5
2575 add r6,r6,r3,ror#6
2576 eor r3,r7,r8
2577 eor r0,r0,r7,ror#20
2578 add r6,r6,r2
2579 ldr r2,[sp,#56]
2580 and r12,r12,r3
2581 add r10,r10,r6
2582 add r6,r6,r0,ror#2
2583 eor r12,r12,r8
2584 add r5,r5,r2
2585 eor r2,r11,r4
2586 eor r0,r10,r10,ror#5
2587 add r6,r6,r12
2588 and r2,r2,r10
2589 eor r12,r0,r10,ror#19
2590 eor r0,r6,r6,ror#11
2591 eor r2,r2,r4
2592 add r5,r5,r12,ror#6
2593 eor r12,r6,r7
2594 eor r0,r0,r6,ror#20
2595 add r5,r5,r2
2596 ldr r2,[sp,#60]
2597 and r3,r3,r12
2598 add r9,r9,r5
2599 add r5,r5,r0,ror#2
2600 eor r3,r3,r7
2601 add r4,r4,r2
2602 eor r2,r10,r11
2603 eor r0,r9,r9,ror#5
2604 add r5,r5,r3
2605 and r2,r2,r9
2606 eor r3,r0,r9,ror#19
2607 eor r0,r5,r5,ror#11
2608 eor r2,r2,r11
2609 add r4,r4,r3,ror#6
2610 eor r3,r5,r6
2611 eor r0,r0,r5,ror#20
2612 add r4,r4,r2
2613 ldr r2,[sp,#64]
2614 and r12,r12,r3
2615 add r8,r8,r4
2616 add r4,r4,r0,ror#2
2617 eor r12,r12,r6
2618 vst1.32 {q8},[r1,:128]!
2619 ldr r0,[r2,#0]
2620 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
2621 ldr r12,[r2,#4]
2622 ldr r3,[r2,#8]
2623 ldr r1,[r2,#12]
2624 add r4,r4,r0 @ accumulate
2625 ldr r0,[r2,#16]
2626 add r5,r5,r12
2627 ldr r12,[r2,#20]
2628 add r6,r6,r3
2629 ldr r3,[r2,#24]
2630 add r7,r7,r1
2631 ldr r1,[r2,#28]
2632 add r8,r8,r0
2633 str r4,[r2],#4
2634 add r9,r9,r12
2635 str r5,[r2],#4
2636 add r10,r10,r3
2637 str r6,[r2],#4
2638 add r11,r11,r1
2639 str r7,[r2],#4
2640 stmia r2,{r8-r11}
2641
2642 ittte ne
2643 movne r1,sp
2644 ldrne r2,[sp,#0]
2645 eorne r12,r12,r12
2646 ldreq sp,[sp,#76] @ restore original sp
2647 itt ne
2648 eorne r3,r5,r6
2649 bne .L_00_48
2650
2651 ldmia sp!,{r4-r12,pc}
2652.size sha256_block_data_order_neon,.-sha256_block_data_order_neon
2653#endif
2654#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2655
2656# ifdef __thumb2__
2657# define INST(a,b,c,d) .byte c,d|0xc,a,b
2658# else
2659# define INST(a,b,c,d) .byte a,b,c,d
2660# endif
2661
2662.type sha256_block_data_order_armv8,%function
2663.align 5
2664sha256_block_data_order_armv8:
2665.LARMv8:
2666 vld1.32 {q0,q1},[r0]
2667# ifdef __thumb2__
2668 adr r3,.LARMv8
2669 sub r3,r3,#.LARMv8-K256
2670# else
2671 adrl r3,K256
2672# endif
2673 add r2,r1,r2,lsl#6 @ len to point at the end of inp
2674
2675.Loop_v8:
2676 vld1.8 {q8-q9},[r1]!
2677 vld1.8 {q10-q11},[r1]!
2678 vld1.32 {q12},[r3]!
2679 vrev32.8 q8,q8
2680 vrev32.8 q9,q9
2681 vrev32.8 q10,q10
2682 vrev32.8 q11,q11
2683 vmov q14,q0 @ offload
2684 vmov q15,q1
2685 teq r1,r2
2686 vld1.32 {q13},[r3]!
2687 vadd.i32 q12,q12,q8
2688 INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
2689 vmov q2,q0
2690 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2691 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2692 INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
2693 vld1.32 {q12},[r3]!
2694 vadd.i32 q13,q13,q9
2695 INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
2696 vmov q2,q0
2697 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2698 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2699 INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
2700 vld1.32 {q13},[r3]!
2701 vadd.i32 q12,q12,q10
2702 INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
2703 vmov q2,q0
2704 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2705 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2706 INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
2707 vld1.32 {q12},[r3]!
2708 vadd.i32 q13,q13,q11
2709 INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
2710 vmov q2,q0
2711 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2712 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2713 INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
2714 vld1.32 {q13},[r3]!
2715 vadd.i32 q12,q12,q8
2716 INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
2717 vmov q2,q0
2718 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2719 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2720 INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
2721 vld1.32 {q12},[r3]!
2722 vadd.i32 q13,q13,q9
2723 INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
2724 vmov q2,q0
2725 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2726 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2727 INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
2728 vld1.32 {q13},[r3]!
2729 vadd.i32 q12,q12,q10
2730 INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
2731 vmov q2,q0
2732 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2733 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2734 INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
2735 vld1.32 {q12},[r3]!
2736 vadd.i32 q13,q13,q11
2737 INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
2738 vmov q2,q0
2739 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2740 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2741 INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
2742 vld1.32 {q13},[r3]!
2743 vadd.i32 q12,q12,q8
2744 INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
2745 vmov q2,q0
2746 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2747 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2748 INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
2749 vld1.32 {q12},[r3]!
2750 vadd.i32 q13,q13,q9
2751 INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
2752 vmov q2,q0
2753 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2754 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2755 INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
2756 vld1.32 {q13},[r3]!
2757 vadd.i32 q12,q12,q10
2758 INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
2759 vmov q2,q0
2760 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2761 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2762 INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
2763 vld1.32 {q12},[r3]!
2764 vadd.i32 q13,q13,q11
2765 INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
2766 vmov q2,q0
2767 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2768 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2769 INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
2770 vld1.32 {q13},[r3]!
2771 vadd.i32 q12,q12,q8
2772 vmov q2,q0
2773 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2774 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2775
2776 vld1.32 {q12},[r3]!
2777 vadd.i32 q13,q13,q9
2778 vmov q2,q0
2779 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2780 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2781
2782 vld1.32 {q13},[r3]
2783 vadd.i32 q12,q12,q10
2784 sub r3,r3,#256-16 @ rewind
2785 vmov q2,q0
2786 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2787 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2788
2789 vadd.i32 q13,q13,q11
2790 vmov q2,q0
2791 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2792 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2793
2794 vadd.i32 q0,q0,q14
2795 vadd.i32 q1,q1,q15
2796 it ne
2797 bne .Loop_v8
2798
2799 vst1.32 {q0,q1},[r0]
2800
2801 bx lr @ bx lr
2802.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2803#endif
2804.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
2805.align 2
2806#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2807.comm OPENSSL_armcap_P,4,4
2808#endif
diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c
new file mode 100644
index 000000000000..a84e869ef900
--- /dev/null
+++ b/arch/arm/crypto/sha256_glue.c
@@ -0,0 +1,128 @@
1/*
2 * Glue code for the SHA256 Secure Hash Algorithm assembly implementation
3 * using optimized ARM assembler and NEON instructions.
4 *
5 * Copyright © 2015 Google Inc.
6 *
7 * This file is based on sha256_ssse3_glue.c:
8 * Copyright (C) 2013 Intel Corporation
9 * Author: Tim Chen <tim.c.chen@linux.intel.com>
10 *
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the Free
13 * Software Foundation; either version 2 of the License, or (at your option)
14 * any later version.
15 *
16 */
17
18#include <crypto/internal/hash.h>
19#include <linux/crypto.h>
20#include <linux/init.h>
21#include <linux/module.h>
22#include <linux/mm.h>
23#include <linux/cryptohash.h>
24#include <linux/types.h>
25#include <linux/string.h>
26#include <crypto/sha.h>
27#include <crypto/sha256_base.h>
28#include <asm/simd.h>
29#include <asm/neon.h>
30
31#include "sha256_glue.h"
32
33asmlinkage void sha256_block_data_order(u32 *digest, const void *data,
34 unsigned int num_blks);
35
36int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data,
37 unsigned int len)
38{
39 /* make sure casting to sha256_block_fn() is safe */
40 BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0);
41
42 return sha256_base_do_update(desc, data, len,
43 (sha256_block_fn *)sha256_block_data_order);
44}
45EXPORT_SYMBOL(crypto_sha256_arm_update);
46
47static int sha256_final(struct shash_desc *desc, u8 *out)
48{
49 sha256_base_do_finalize(desc,
50 (sha256_block_fn *)sha256_block_data_order);
51 return sha256_base_finish(desc, out);
52}
53
54int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data,
55 unsigned int len, u8 *out)
56{
57 sha256_base_do_update(desc, data, len,
58 (sha256_block_fn *)sha256_block_data_order);
59 return sha256_final(desc, out);
60}
61EXPORT_SYMBOL(crypto_sha256_arm_finup);
62
63static struct shash_alg algs[] = { {
64 .digestsize = SHA256_DIGEST_SIZE,
65 .init = sha256_base_init,
66 .update = crypto_sha256_arm_update,
67 .final = sha256_final,
68 .finup = crypto_sha256_arm_finup,
69 .descsize = sizeof(struct sha256_state),
70 .base = {
71 .cra_name = "sha256",
72 .cra_driver_name = "sha256-asm",
73 .cra_priority = 150,
74 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
75 .cra_blocksize = SHA256_BLOCK_SIZE,
76 .cra_module = THIS_MODULE,
77 }
78}, {
79 .digestsize = SHA224_DIGEST_SIZE,
80 .init = sha224_base_init,
81 .update = crypto_sha256_arm_update,
82 .final = sha256_final,
83 .finup = crypto_sha256_arm_finup,
84 .descsize = sizeof(struct sha256_state),
85 .base = {
86 .cra_name = "sha224",
87 .cra_driver_name = "sha224-asm",
88 .cra_priority = 150,
89 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
90 .cra_blocksize = SHA224_BLOCK_SIZE,
91 .cra_module = THIS_MODULE,
92 }
93} };
94
95static int __init sha256_mod_init(void)
96{
97 int res = crypto_register_shashes(algs, ARRAY_SIZE(algs));
98
99 if (res < 0)
100 return res;
101
102 if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) {
103 res = crypto_register_shashes(sha256_neon_algs,
104 ARRAY_SIZE(sha256_neon_algs));
105
106 if (res < 0)
107 crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
108 }
109
110 return res;
111}
112
113static void __exit sha256_mod_fini(void)
114{
115 crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
116
117 if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon())
118 crypto_unregister_shashes(sha256_neon_algs,
119 ARRAY_SIZE(sha256_neon_algs));
120}
121
122module_init(sha256_mod_init);
123module_exit(sha256_mod_fini);
124
125MODULE_LICENSE("GPL");
126MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm (ARM), including NEON");
127
128MODULE_ALIAS_CRYPTO("sha256");
diff --git a/arch/arm/crypto/sha256_glue.h b/arch/arm/crypto/sha256_glue.h
new file mode 100644
index 000000000000..7cf0bf786ada
--- /dev/null
+++ b/arch/arm/crypto/sha256_glue.h
@@ -0,0 +1,14 @@
1#ifndef _CRYPTO_SHA256_GLUE_H
2#define _CRYPTO_SHA256_GLUE_H
3
4#include <linux/crypto.h>
5
6extern struct shash_alg sha256_neon_algs[2];
7
8int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data,
9 unsigned int len);
10
11int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data,
12 unsigned int len, u8 *hash);
13
14#endif /* _CRYPTO_SHA256_GLUE_H */
diff --git a/arch/arm/crypto/sha256_neon_glue.c b/arch/arm/crypto/sha256_neon_glue.c
new file mode 100644
index 000000000000..39ccd658817e
--- /dev/null
+++ b/arch/arm/crypto/sha256_neon_glue.c
@@ -0,0 +1,101 @@
1/*
2 * Glue code for the SHA256 Secure Hash Algorithm assembly implementation
3 * using NEON instructions.
4 *
5 * Copyright © 2015 Google Inc.
6 *
7 * This file is based on sha512_neon_glue.c:
8 * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
9 *
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the Free
12 * Software Foundation; either version 2 of the License, or (at your option)
13 * any later version.
14 *
15 */
16
17#include <crypto/internal/hash.h>
18#include <linux/cryptohash.h>
19#include <linux/types.h>
20#include <linux/string.h>
21#include <crypto/sha.h>
22#include <crypto/sha256_base.h>
23#include <asm/byteorder.h>
24#include <asm/simd.h>
25#include <asm/neon.h>
26
27#include "sha256_glue.h"
28
29asmlinkage void sha256_block_data_order_neon(u32 *digest, const void *data,
30 unsigned int num_blks);
31
32static int sha256_update(struct shash_desc *desc, const u8 *data,
33 unsigned int len)
34{
35 struct sha256_state *sctx = shash_desc_ctx(desc);
36
37 if (!may_use_simd() ||
38 (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
39 return crypto_sha256_arm_update(desc, data, len);
40
41 kernel_neon_begin();
42 sha256_base_do_update(desc, data, len,
43 (sha256_block_fn *)sha256_block_data_order_neon);
44 kernel_neon_end();
45
46 return 0;
47}
48
49static int sha256_finup(struct shash_desc *desc, const u8 *data,
50 unsigned int len, u8 *out)
51{
52 if (!may_use_simd())
53 return crypto_sha256_arm_finup(desc, data, len, out);
54
55 kernel_neon_begin();
56 if (len)
57 sha256_base_do_update(desc, data, len,
58 (sha256_block_fn *)sha256_block_data_order_neon);
59 sha256_base_do_finalize(desc,
60 (sha256_block_fn *)sha256_block_data_order_neon);
61 kernel_neon_end();
62
63 return sha256_base_finish(desc, out);
64}
65
66static int sha256_final(struct shash_desc *desc, u8 *out)
67{
68 return sha256_finup(desc, NULL, 0, out);
69}
70
71struct shash_alg sha256_neon_algs[] = { {
72 .digestsize = SHA256_DIGEST_SIZE,
73 .init = sha256_base_init,
74 .update = sha256_update,
75 .final = sha256_final,
76 .finup = sha256_finup,
77 .descsize = sizeof(struct sha256_state),
78 .base = {
79 .cra_name = "sha256",
80 .cra_driver_name = "sha256-neon",
81 .cra_priority = 250,
82 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
83 .cra_blocksize = SHA256_BLOCK_SIZE,
84 .cra_module = THIS_MODULE,
85 }
86}, {
87 .digestsize = SHA224_DIGEST_SIZE,
88 .init = sha224_base_init,
89 .update = sha256_update,
90 .final = sha256_final,
91 .finup = sha256_finup,
92 .descsize = sizeof(struct sha256_state),
93 .base = {
94 .cra_name = "sha224",
95 .cra_driver_name = "sha224-neon",
96 .cra_priority = 250,
97 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
98 .cra_blocksize = SHA224_BLOCK_SIZE,
99 .cra_module = THIS_MODULE,
100 }
101} };
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index b1b5b893eb20..05d9e16c0dfd 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -284,7 +284,8 @@ static struct crypto_alg aes_algs[] = { {
284 .cra_name = "__ecb-aes-" MODE, 284 .cra_name = "__ecb-aes-" MODE,
285 .cra_driver_name = "__driver-ecb-aes-" MODE, 285 .cra_driver_name = "__driver-ecb-aes-" MODE,
286 .cra_priority = 0, 286 .cra_priority = 0,
287 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 287 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
288 CRYPTO_ALG_INTERNAL,
288 .cra_blocksize = AES_BLOCK_SIZE, 289 .cra_blocksize = AES_BLOCK_SIZE,
289 .cra_ctxsize = sizeof(struct crypto_aes_ctx), 290 .cra_ctxsize = sizeof(struct crypto_aes_ctx),
290 .cra_alignmask = 7, 291 .cra_alignmask = 7,
@@ -302,7 +303,8 @@ static struct crypto_alg aes_algs[] = { {
302 .cra_name = "__cbc-aes-" MODE, 303 .cra_name = "__cbc-aes-" MODE,
303 .cra_driver_name = "__driver-cbc-aes-" MODE, 304 .cra_driver_name = "__driver-cbc-aes-" MODE,
304 .cra_priority = 0, 305 .cra_priority = 0,
305 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 306 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
307 CRYPTO_ALG_INTERNAL,
306 .cra_blocksize = AES_BLOCK_SIZE, 308 .cra_blocksize = AES_BLOCK_SIZE,
307 .cra_ctxsize = sizeof(struct crypto_aes_ctx), 309 .cra_ctxsize = sizeof(struct crypto_aes_ctx),
308 .cra_alignmask = 7, 310 .cra_alignmask = 7,
@@ -320,7 +322,8 @@ static struct crypto_alg aes_algs[] = { {
320 .cra_name = "__ctr-aes-" MODE, 322 .cra_name = "__ctr-aes-" MODE,
321 .cra_driver_name = "__driver-ctr-aes-" MODE, 323 .cra_driver_name = "__driver-ctr-aes-" MODE,
322 .cra_priority = 0, 324 .cra_priority = 0,
323 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 325 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
326 CRYPTO_ALG_INTERNAL,
324 .cra_blocksize = 1, 327 .cra_blocksize = 1,
325 .cra_ctxsize = sizeof(struct crypto_aes_ctx), 328 .cra_ctxsize = sizeof(struct crypto_aes_ctx),
326 .cra_alignmask = 7, 329 .cra_alignmask = 7,
@@ -338,7 +341,8 @@ static struct crypto_alg aes_algs[] = { {
338 .cra_name = "__xts-aes-" MODE, 341 .cra_name = "__xts-aes-" MODE,
339 .cra_driver_name = "__driver-xts-aes-" MODE, 342 .cra_driver_name = "__driver-xts-aes-" MODE,
340 .cra_priority = 0, 343 .cra_priority = 0,
341 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 344 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
345 CRYPTO_ALG_INTERNAL,
342 .cra_blocksize = AES_BLOCK_SIZE, 346 .cra_blocksize = AES_BLOCK_SIZE,
343 .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), 347 .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx),
344 .cra_alignmask = 7, 348 .cra_alignmask = 7,
diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S
index 09d57d98609c..033aae6d732a 100644
--- a/arch/arm64/crypto/sha1-ce-core.S
+++ b/arch/arm64/crypto/sha1-ce-core.S
@@ -66,8 +66,8 @@
66 .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 66 .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
67 67
68 /* 68 /*
69 * void sha1_ce_transform(int blocks, u8 const *src, u32 *state, 69 * void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
70 * u8 *head, long bytes) 70 * int blocks)
71 */ 71 */
72ENTRY(sha1_ce_transform) 72ENTRY(sha1_ce_transform)
73 /* load round constants */ 73 /* load round constants */
@@ -78,25 +78,22 @@ ENTRY(sha1_ce_transform)
78 ld1r {k3.4s}, [x6] 78 ld1r {k3.4s}, [x6]
79 79
80 /* load state */ 80 /* load state */
81 ldr dga, [x2] 81 ldr dga, [x0]
82 ldr dgb, [x2, #16] 82 ldr dgb, [x0, #16]
83 83
84 /* load partial state (if supplied) */ 84 /* load sha1_ce_state::finalize */
85 cbz x3, 0f 85 ldr w4, [x0, #:lo12:sha1_ce_offsetof_finalize]
86 ld1 {v8.4s-v11.4s}, [x3]
87 b 1f
88 86
89 /* load input */ 87 /* load input */
900: ld1 {v8.4s-v11.4s}, [x1], #64 880: ld1 {v8.4s-v11.4s}, [x1], #64
91 sub w0, w0, #1 89 sub w2, w2, #1
92 90
931:
94CPU_LE( rev32 v8.16b, v8.16b ) 91CPU_LE( rev32 v8.16b, v8.16b )
95CPU_LE( rev32 v9.16b, v9.16b ) 92CPU_LE( rev32 v9.16b, v9.16b )
96CPU_LE( rev32 v10.16b, v10.16b ) 93CPU_LE( rev32 v10.16b, v10.16b )
97CPU_LE( rev32 v11.16b, v11.16b ) 94CPU_LE( rev32 v11.16b, v11.16b )
98 95
992: add t0.4s, v8.4s, k0.4s 961: add t0.4s, v8.4s, k0.4s
100 mov dg0v.16b, dgav.16b 97 mov dg0v.16b, dgav.16b
101 98
102 add_update c, ev, k0, 8, 9, 10, 11, dgb 99 add_update c, ev, k0, 8, 9, 10, 11, dgb
@@ -127,15 +124,15 @@ CPU_LE( rev32 v11.16b, v11.16b )
127 add dgbv.2s, dgbv.2s, dg1v.2s 124 add dgbv.2s, dgbv.2s, dg1v.2s
128 add dgav.4s, dgav.4s, dg0v.4s 125 add dgav.4s, dgav.4s, dg0v.4s
129 126
130 cbnz w0, 0b 127 cbnz w2, 0b
131 128
132 /* 129 /*
133 * Final block: add padding and total bit count. 130 * Final block: add padding and total bit count.
134 * Skip if we have no total byte count in x4. In that case, the input 131 * Skip if the input size was not a round multiple of the block size,
135 * size was not a round multiple of the block size, and the padding is 132 * the padding is handled by the C code in that case.
136 * handled by the C code.
137 */ 133 */
138 cbz x4, 3f 134 cbz x4, 3f
135 ldr x4, [x0, #:lo12:sha1_ce_offsetof_count]
139 movi v9.2d, #0 136 movi v9.2d, #0
140 mov x8, #0x80000000 137 mov x8, #0x80000000
141 movi v10.2d, #0 138 movi v10.2d, #0
@@ -144,10 +141,10 @@ CPU_LE( rev32 v11.16b, v11.16b )
144 mov x4, #0 141 mov x4, #0
145 mov v11.d[0], xzr 142 mov v11.d[0], xzr
146 mov v11.d[1], x7 143 mov v11.d[1], x7
147 b 2b 144 b 1b
148 145
149 /* store new state */ 146 /* store new state */
1503: str dga, [x2] 1473: str dga, [x0]
151 str dgb, [x2, #16] 148 str dgb, [x0, #16]
152 ret 149 ret
153ENDPROC(sha1_ce_transform) 150ENDPROC(sha1_ce_transform)
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
index 6fe83f37a750..114e7cc5de8c 100644
--- a/arch/arm64/crypto/sha1-ce-glue.c
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -12,144 +12,81 @@
12#include <asm/unaligned.h> 12#include <asm/unaligned.h>
13#include <crypto/internal/hash.h> 13#include <crypto/internal/hash.h>
14#include <crypto/sha.h> 14#include <crypto/sha.h>
15#include <crypto/sha1_base.h>
15#include <linux/cpufeature.h> 16#include <linux/cpufeature.h>
16#include <linux/crypto.h> 17#include <linux/crypto.h>
17#include <linux/module.h> 18#include <linux/module.h>
18 19
20#define ASM_EXPORT(sym, val) \
21 asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val));
22
19MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); 23MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
20MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); 24MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
21MODULE_LICENSE("GPL v2"); 25MODULE_LICENSE("GPL v2");
22 26
23asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state, 27struct sha1_ce_state {
24 u8 *head, long bytes); 28 struct sha1_state sst;
29 u32 finalize;
30};
25 31
26static int sha1_init(struct shash_desc *desc) 32asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
27{ 33 int blocks);
28 struct sha1_state *sctx = shash_desc_ctx(desc);
29 34
30 *sctx = (struct sha1_state){ 35static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
31 .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, 36 unsigned int len)
32 };
33 return 0;
34}
35
36static int sha1_update(struct shash_desc *desc, const u8 *data,
37 unsigned int len)
38{ 37{
39 struct sha1_state *sctx = shash_desc_ctx(desc); 38 struct sha1_ce_state *sctx = shash_desc_ctx(desc);
40 unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
41
42 sctx->count += len;
43
44 if ((partial + len) >= SHA1_BLOCK_SIZE) {
45 int blocks;
46
47 if (partial) {
48 int p = SHA1_BLOCK_SIZE - partial;
49 39
50 memcpy(sctx->buffer + partial, data, p); 40 sctx->finalize = 0;
51 data += p; 41 kernel_neon_begin_partial(16);
52 len -= p; 42 sha1_base_do_update(desc, data, len,
53 } 43 (sha1_block_fn *)sha1_ce_transform);
54 44 kernel_neon_end();
55 blocks = len / SHA1_BLOCK_SIZE;
56 len %= SHA1_BLOCK_SIZE;
57
58 kernel_neon_begin_partial(16);
59 sha1_ce_transform(blocks, data, sctx->state,
60 partial ? sctx->buffer : NULL, 0);
61 kernel_neon_end();
62 45
63 data += blocks * SHA1_BLOCK_SIZE;
64 partial = 0;
65 }
66 if (len)
67 memcpy(sctx->buffer + partial, data, len);
68 return 0; 46 return 0;
69} 47}
70 48
71static int sha1_final(struct shash_desc *desc, u8 *out) 49static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
50 unsigned int len, u8 *out)
72{ 51{
73 static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; 52 struct sha1_ce_state *sctx = shash_desc_ctx(desc);
53 bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE);
74 54
75 struct sha1_state *sctx = shash_desc_ctx(desc); 55 ASM_EXPORT(sha1_ce_offsetof_count,
76 __be64 bits = cpu_to_be64(sctx->count << 3); 56 offsetof(struct sha1_ce_state, sst.count));
77 __be32 *dst = (__be32 *)out; 57 ASM_EXPORT(sha1_ce_offsetof_finalize,
78 int i; 58 offsetof(struct sha1_ce_state, finalize));
79
80 u32 padlen = SHA1_BLOCK_SIZE
81 - ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE);
82
83 sha1_update(desc, padding, padlen);
84 sha1_update(desc, (const u8 *)&bits, sizeof(bits));
85
86 for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
87 put_unaligned_be32(sctx->state[i], dst++);
88
89 *sctx = (struct sha1_state){};
90 return 0;
91}
92
93static int sha1_finup(struct shash_desc *desc, const u8 *data,
94 unsigned int len, u8 *out)
95{
96 struct sha1_state *sctx = shash_desc_ctx(desc);
97 __be32 *dst = (__be32 *)out;
98 int blocks;
99 int i;
100
101 if (sctx->count || !len || (len % SHA1_BLOCK_SIZE)) {
102 sha1_update(desc, data, len);
103 return sha1_final(desc, out);
104 }
105 59
106 /* 60 /*
107 * Use a fast path if the input is a multiple of 64 bytes. In 61 * Allow the asm code to perform the finalization if there is no
108 * this case, there is no need to copy data around, and we can 62 * partial data and the input is a round multiple of the block size.
109 * perform the entire digest calculation in a single invocation
110 * of sha1_ce_transform()
111 */ 63 */
112 blocks = len / SHA1_BLOCK_SIZE; 64 sctx->finalize = finalize;
113 65
114 kernel_neon_begin_partial(16); 66 kernel_neon_begin_partial(16);
115 sha1_ce_transform(blocks, data, sctx->state, NULL, len); 67 sha1_base_do_update(desc, data, len,
68 (sha1_block_fn *)sha1_ce_transform);
69 if (!finalize)
70 sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
116 kernel_neon_end(); 71 kernel_neon_end();
117 72 return sha1_base_finish(desc, out);
118 for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
119 put_unaligned_be32(sctx->state[i], dst++);
120
121 *sctx = (struct sha1_state){};
122 return 0;
123} 73}
124 74
125static int sha1_export(struct shash_desc *desc, void *out) 75static int sha1_ce_final(struct shash_desc *desc, u8 *out)
126{ 76{
127 struct sha1_state *sctx = shash_desc_ctx(desc); 77 kernel_neon_begin_partial(16);
128 struct sha1_state *dst = out; 78 sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
129 79 kernel_neon_end();
130 *dst = *sctx; 80 return sha1_base_finish(desc, out);
131 return 0;
132}
133
134static int sha1_import(struct shash_desc *desc, const void *in)
135{
136 struct sha1_state *sctx = shash_desc_ctx(desc);
137 struct sha1_state const *src = in;
138
139 *sctx = *src;
140 return 0;
141} 81}
142 82
143static struct shash_alg alg = { 83static struct shash_alg alg = {
144 .init = sha1_init, 84 .init = sha1_base_init,
145 .update = sha1_update, 85 .update = sha1_ce_update,
146 .final = sha1_final, 86 .final = sha1_ce_final,
147 .finup = sha1_finup, 87 .finup = sha1_ce_finup,
148 .export = sha1_export, 88 .descsize = sizeof(struct sha1_ce_state),
149 .import = sha1_import,
150 .descsize = sizeof(struct sha1_state),
151 .digestsize = SHA1_DIGEST_SIZE, 89 .digestsize = SHA1_DIGEST_SIZE,
152 .statesize = sizeof(struct sha1_state),
153 .base = { 90 .base = {
154 .cra_name = "sha1", 91 .cra_name = "sha1",
155 .cra_driver_name = "sha1-ce", 92 .cra_driver_name = "sha1-ce",
diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S
index 7f29fc031ea8..5df9d9d470ad 100644
--- a/arch/arm64/crypto/sha2-ce-core.S
+++ b/arch/arm64/crypto/sha2-ce-core.S
@@ -73,8 +73,8 @@
73 .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 73 .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
74 74
75 /* 75 /*
76 * void sha2_ce_transform(int blocks, u8 const *src, u32 *state, 76 * void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
77 * u8 *head, long bytes) 77 * int blocks)
78 */ 78 */
79ENTRY(sha2_ce_transform) 79ENTRY(sha2_ce_transform)
80 /* load round constants */ 80 /* load round constants */
@@ -85,24 +85,21 @@ ENTRY(sha2_ce_transform)
85 ld1 {v12.4s-v15.4s}, [x8] 85 ld1 {v12.4s-v15.4s}, [x8]
86 86
87 /* load state */ 87 /* load state */
88 ldp dga, dgb, [x2] 88 ldp dga, dgb, [x0]
89 89
90 /* load partial input (if supplied) */ 90 /* load sha256_ce_state::finalize */
91 cbz x3, 0f 91 ldr w4, [x0, #:lo12:sha256_ce_offsetof_finalize]
92 ld1 {v16.4s-v19.4s}, [x3]
93 b 1f
94 92
95 /* load input */ 93 /* load input */
960: ld1 {v16.4s-v19.4s}, [x1], #64 940: ld1 {v16.4s-v19.4s}, [x1], #64
97 sub w0, w0, #1 95 sub w2, w2, #1
98 96
991:
100CPU_LE( rev32 v16.16b, v16.16b ) 97CPU_LE( rev32 v16.16b, v16.16b )
101CPU_LE( rev32 v17.16b, v17.16b ) 98CPU_LE( rev32 v17.16b, v17.16b )
102CPU_LE( rev32 v18.16b, v18.16b ) 99CPU_LE( rev32 v18.16b, v18.16b )
103CPU_LE( rev32 v19.16b, v19.16b ) 100CPU_LE( rev32 v19.16b, v19.16b )
104 101
1052: add t0.4s, v16.4s, v0.4s 1021: add t0.4s, v16.4s, v0.4s
106 mov dg0v.16b, dgav.16b 103 mov dg0v.16b, dgav.16b
107 mov dg1v.16b, dgbv.16b 104 mov dg1v.16b, dgbv.16b
108 105
@@ -131,15 +128,15 @@ CPU_LE( rev32 v19.16b, v19.16b )
131 add dgbv.4s, dgbv.4s, dg1v.4s 128 add dgbv.4s, dgbv.4s, dg1v.4s
132 129
133 /* handled all input blocks? */ 130 /* handled all input blocks? */
134 cbnz w0, 0b 131 cbnz w2, 0b
135 132
136 /* 133 /*
137 * Final block: add padding and total bit count. 134 * Final block: add padding and total bit count.
138 * Skip if we have no total byte count in x4. In that case, the input 135 * Skip if the input size was not a round multiple of the block size,
139 * size was not a round multiple of the block size, and the padding is 136 * the padding is handled by the C code in that case.
140 * handled by the C code.
141 */ 137 */
142 cbz x4, 3f 138 cbz x4, 3f
139 ldr x4, [x0, #:lo12:sha256_ce_offsetof_count]
143 movi v17.2d, #0 140 movi v17.2d, #0
144 mov x8, #0x80000000 141 mov x8, #0x80000000
145 movi v18.2d, #0 142 movi v18.2d, #0
@@ -148,9 +145,9 @@ CPU_LE( rev32 v19.16b, v19.16b )
148 mov x4, #0 145 mov x4, #0
149 mov v19.d[0], xzr 146 mov v19.d[0], xzr
150 mov v19.d[1], x7 147 mov v19.d[1], x7
151 b 2b 148 b 1b
152 149
153 /* store new state */ 150 /* store new state */
1543: stp dga, dgb, [x2] 1513: stp dga, dgb, [x0]
155 ret 152 ret
156ENDPROC(sha2_ce_transform) 153ENDPROC(sha2_ce_transform)
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
index ae67e88c28b9..1340e44c048b 100644
--- a/arch/arm64/crypto/sha2-ce-glue.c
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -12,206 +12,82 @@
12#include <asm/unaligned.h> 12#include <asm/unaligned.h>
13#include <crypto/internal/hash.h> 13#include <crypto/internal/hash.h>
14#include <crypto/sha.h> 14#include <crypto/sha.h>
15#include <crypto/sha256_base.h>
15#include <linux/cpufeature.h> 16#include <linux/cpufeature.h>
16#include <linux/crypto.h> 17#include <linux/crypto.h>
17#include <linux/module.h> 18#include <linux/module.h>
18 19
20#define ASM_EXPORT(sym, val) \
21 asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val));
22
19MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions"); 23MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
20MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); 24MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
21MODULE_LICENSE("GPL v2"); 25MODULE_LICENSE("GPL v2");
22 26
23asmlinkage int sha2_ce_transform(int blocks, u8 const *src, u32 *state, 27struct sha256_ce_state {
24 u8 *head, long bytes); 28 struct sha256_state sst;
25 29 u32 finalize;
26static int sha224_init(struct shash_desc *desc) 30};
27{
28 struct sha256_state *sctx = shash_desc_ctx(desc);
29
30 *sctx = (struct sha256_state){
31 .state = {
32 SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3,
33 SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7,
34 }
35 };
36 return 0;
37}
38
39static int sha256_init(struct shash_desc *desc)
40{
41 struct sha256_state *sctx = shash_desc_ctx(desc);
42
43 *sctx = (struct sha256_state){
44 .state = {
45 SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
46 SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7,
47 }
48 };
49 return 0;
50}
51
52static int sha2_update(struct shash_desc *desc, const u8 *data,
53 unsigned int len)
54{
55 struct sha256_state *sctx = shash_desc_ctx(desc);
56 unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
57
58 sctx->count += len;
59
60 if ((partial + len) >= SHA256_BLOCK_SIZE) {
61 int blocks;
62
63 if (partial) {
64 int p = SHA256_BLOCK_SIZE - partial;
65
66 memcpy(sctx->buf + partial, data, p);
67 data += p;
68 len -= p;
69 }
70 31
71 blocks = len / SHA256_BLOCK_SIZE; 32asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
72 len %= SHA256_BLOCK_SIZE; 33 int blocks);
73 34
74 kernel_neon_begin_partial(28); 35static int sha256_ce_update(struct shash_desc *desc, const u8 *data,
75 sha2_ce_transform(blocks, data, sctx->state, 36 unsigned int len)
76 partial ? sctx->buf : NULL, 0);
77 kernel_neon_end();
78
79 data += blocks * SHA256_BLOCK_SIZE;
80 partial = 0;
81 }
82 if (len)
83 memcpy(sctx->buf + partial, data, len);
84 return 0;
85}
86
87static void sha2_final(struct shash_desc *desc)
88{ 37{
89 static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; 38 struct sha256_ce_state *sctx = shash_desc_ctx(desc);
90
91 struct sha256_state *sctx = shash_desc_ctx(desc);
92 __be64 bits = cpu_to_be64(sctx->count << 3);
93 u32 padlen = SHA256_BLOCK_SIZE
94 - ((sctx->count + sizeof(bits)) % SHA256_BLOCK_SIZE);
95
96 sha2_update(desc, padding, padlen);
97 sha2_update(desc, (const u8 *)&bits, sizeof(bits));
98}
99
100static int sha224_final(struct shash_desc *desc, u8 *out)
101{
102 struct sha256_state *sctx = shash_desc_ctx(desc);
103 __be32 *dst = (__be32 *)out;
104 int i;
105
106 sha2_final(desc);
107
108 for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
109 put_unaligned_be32(sctx->state[i], dst++);
110
111 *sctx = (struct sha256_state){};
112 return 0;
113}
114 39
115static int sha256_final(struct shash_desc *desc, u8 *out) 40 sctx->finalize = 0;
116{ 41 kernel_neon_begin_partial(28);
117 struct sha256_state *sctx = shash_desc_ctx(desc); 42 sha256_base_do_update(desc, data, len,
118 __be32 *dst = (__be32 *)out; 43 (sha256_block_fn *)sha2_ce_transform);
119 int i; 44 kernel_neon_end();
120
121 sha2_final(desc);
122
123 for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
124 put_unaligned_be32(sctx->state[i], dst++);
125 45
126 *sctx = (struct sha256_state){};
127 return 0; 46 return 0;
128} 47}
129 48
130static void sha2_finup(struct shash_desc *desc, const u8 *data, 49static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
131 unsigned int len) 50 unsigned int len, u8 *out)
132{ 51{
133 struct sha256_state *sctx = shash_desc_ctx(desc); 52 struct sha256_ce_state *sctx = shash_desc_ctx(desc);
134 int blocks; 53 bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE);
135 54
136 if (sctx->count || !len || (len % SHA256_BLOCK_SIZE)) { 55 ASM_EXPORT(sha256_ce_offsetof_count,
137 sha2_update(desc, data, len); 56 offsetof(struct sha256_ce_state, sst.count));
138 sha2_final(desc); 57 ASM_EXPORT(sha256_ce_offsetof_finalize,
139 return; 58 offsetof(struct sha256_ce_state, finalize));
140 }
141 59
142 /* 60 /*
143 * Use a fast path if the input is a multiple of 64 bytes. In 61 * Allow the asm code to perform the finalization if there is no
144 * this case, there is no need to copy data around, and we can 62 * partial data and the input is a round multiple of the block size.
145 * perform the entire digest calculation in a single invocation
146 * of sha2_ce_transform()
147 */ 63 */
148 blocks = len / SHA256_BLOCK_SIZE; 64 sctx->finalize = finalize;
149 65
150 kernel_neon_begin_partial(28); 66 kernel_neon_begin_partial(28);
151 sha2_ce_transform(blocks, data, sctx->state, NULL, len); 67 sha256_base_do_update(desc, data, len,
68 (sha256_block_fn *)sha2_ce_transform);
69 if (!finalize)
70 sha256_base_do_finalize(desc,
71 (sha256_block_fn *)sha2_ce_transform);
152 kernel_neon_end(); 72 kernel_neon_end();
73 return sha256_base_finish(desc, out);
153} 74}
154 75
155static int sha224_finup(struct shash_desc *desc, const u8 *data, 76static int sha256_ce_final(struct shash_desc *desc, u8 *out)
156 unsigned int len, u8 *out)
157{ 77{
158 struct sha256_state *sctx = shash_desc_ctx(desc); 78 kernel_neon_begin_partial(28);
159 __be32 *dst = (__be32 *)out; 79 sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform);
160 int i; 80 kernel_neon_end();
161 81 return sha256_base_finish(desc, out);
162 sha2_finup(desc, data, len);
163
164 for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
165 put_unaligned_be32(sctx->state[i], dst++);
166
167 *sctx = (struct sha256_state){};
168 return 0;
169}
170
171static int sha256_finup(struct shash_desc *desc, const u8 *data,
172 unsigned int len, u8 *out)
173{
174 struct sha256_state *sctx = shash_desc_ctx(desc);
175 __be32 *dst = (__be32 *)out;
176 int i;
177
178 sha2_finup(desc, data, len);
179
180 for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
181 put_unaligned_be32(sctx->state[i], dst++);
182
183 *sctx = (struct sha256_state){};
184 return 0;
185}
186
187static int sha2_export(struct shash_desc *desc, void *out)
188{
189 struct sha256_state *sctx = shash_desc_ctx(desc);
190 struct sha256_state *dst = out;
191
192 *dst = *sctx;
193 return 0;
194}
195
196static int sha2_import(struct shash_desc *desc, const void *in)
197{
198 struct sha256_state *sctx = shash_desc_ctx(desc);
199 struct sha256_state const *src = in;
200
201 *sctx = *src;
202 return 0;
203} 82}
204 83
205static struct shash_alg algs[] = { { 84static struct shash_alg algs[] = { {
206 .init = sha224_init, 85 .init = sha224_base_init,
207 .update = sha2_update, 86 .update = sha256_ce_update,
208 .final = sha224_final, 87 .final = sha256_ce_final,
209 .finup = sha224_finup, 88 .finup = sha256_ce_finup,
210 .export = sha2_export, 89 .descsize = sizeof(struct sha256_ce_state),
211 .import = sha2_import,
212 .descsize = sizeof(struct sha256_state),
213 .digestsize = SHA224_DIGEST_SIZE, 90 .digestsize = SHA224_DIGEST_SIZE,
214 .statesize = sizeof(struct sha256_state),
215 .base = { 91 .base = {
216 .cra_name = "sha224", 92 .cra_name = "sha224",
217 .cra_driver_name = "sha224-ce", 93 .cra_driver_name = "sha224-ce",
@@ -221,15 +97,12 @@ static struct shash_alg algs[] = { {
221 .cra_module = THIS_MODULE, 97 .cra_module = THIS_MODULE,
222 } 98 }
223}, { 99}, {
224 .init = sha256_init, 100 .init = sha256_base_init,
225 .update = sha2_update, 101 .update = sha256_ce_update,
226 .final = sha256_final, 102 .final = sha256_ce_final,
227 .finup = sha256_finup, 103 .finup = sha256_ce_finup,
228 .export = sha2_export, 104 .descsize = sizeof(struct sha256_ce_state),
229 .import = sha2_import,
230 .descsize = sizeof(struct sha256_state),
231 .digestsize = SHA256_DIGEST_SIZE, 105 .digestsize = SHA256_DIGEST_SIZE,
232 .statesize = sizeof(struct sha256_state),
233 .base = { 106 .base = {
234 .cra_name = "sha256", 107 .cra_name = "sha256",
235 .cra_driver_name = "sha256-ce", 108 .cra_driver_name = "sha256-ce",
diff --git a/arch/mips/cavium-octeon/crypto/Makefile b/arch/mips/cavium-octeon/crypto/Makefile
index a74f76d85a2f..f7aa9d5d3b87 100644
--- a/arch/mips/cavium-octeon/crypto/Makefile
+++ b/arch/mips/cavium-octeon/crypto/Makefile
@@ -4,4 +4,7 @@
4 4
5obj-y += octeon-crypto.o 5obj-y += octeon-crypto.o
6 6
7obj-$(CONFIG_CRYPTO_MD5_OCTEON) += octeon-md5.o 7obj-$(CONFIG_CRYPTO_MD5_OCTEON) += octeon-md5.o
8obj-$(CONFIG_CRYPTO_SHA1_OCTEON) += octeon-sha1.o
9obj-$(CONFIG_CRYPTO_SHA256_OCTEON) += octeon-sha256.o
10obj-$(CONFIG_CRYPTO_SHA512_OCTEON) += octeon-sha512.o
diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.c b/arch/mips/cavium-octeon/crypto/octeon-crypto.c
index 7c82ff463b65..f66bd1adc7ff 100644
--- a/arch/mips/cavium-octeon/crypto/octeon-crypto.c
+++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.c
@@ -17,7 +17,7 @@
17 * crypto operations in calls to octeon_crypto_enable/disable in order to make 17 * crypto operations in calls to octeon_crypto_enable/disable in order to make
18 * sure the state of COP2 isn't corrupted if userspace is also performing 18 * sure the state of COP2 isn't corrupted if userspace is also performing
19 * hardware crypto operations. Allocate the state parameter on the stack. 19 * hardware crypto operations. Allocate the state parameter on the stack.
20 * Preemption must be disabled to prevent context switches. 20 * Returns with preemption disabled.
21 * 21 *
22 * @state: Pointer to state structure to store current COP2 state in. 22 * @state: Pointer to state structure to store current COP2 state in.
23 * 23 *
@@ -28,6 +28,7 @@ unsigned long octeon_crypto_enable(struct octeon_cop2_state *state)
28 int status; 28 int status;
29 unsigned long flags; 29 unsigned long flags;
30 30
31 preempt_disable();
31 local_irq_save(flags); 32 local_irq_save(flags);
32 status = read_c0_status(); 33 status = read_c0_status();
33 write_c0_status(status | ST0_CU2); 34 write_c0_status(status | ST0_CU2);
@@ -62,5 +63,6 @@ void octeon_crypto_disable(struct octeon_cop2_state *state,
62 else 63 else
63 write_c0_status(read_c0_status() & ~ST0_CU2); 64 write_c0_status(read_c0_status() & ~ST0_CU2);
64 local_irq_restore(flags); 65 local_irq_restore(flags);
66 preempt_enable();
65} 67}
66EXPORT_SYMBOL_GPL(octeon_crypto_disable); 68EXPORT_SYMBOL_GPL(octeon_crypto_disable);
diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.h b/arch/mips/cavium-octeon/crypto/octeon-crypto.h
index e2a4aece9c24..355072535110 100644
--- a/arch/mips/cavium-octeon/crypto/octeon-crypto.h
+++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.h
@@ -5,7 +5,8 @@
5 * 5 *
6 * Copyright (C) 2012-2013 Cavium Inc., All Rights Reserved. 6 * Copyright (C) 2012-2013 Cavium Inc., All Rights Reserved.
7 * 7 *
8 * MD5 instruction definitions added by Aaro Koskinen <aaro.koskinen@iki.fi>. 8 * MD5/SHA1/SHA256/SHA512 instruction definitions added by
9 * Aaro Koskinen <aaro.koskinen@iki.fi>.
9 * 10 *
10 */ 11 */
11#ifndef __LINUX_OCTEON_CRYPTO_H 12#ifndef __LINUX_OCTEON_CRYPTO_H
@@ -21,11 +22,11 @@ extern void octeon_crypto_disable(struct octeon_cop2_state *state,
21 unsigned long flags); 22 unsigned long flags);
22 23
23/* 24/*
24 * Macros needed to implement MD5: 25 * Macros needed to implement MD5/SHA1/SHA256:
25 */ 26 */
26 27
27/* 28/*
28 * The index can be 0-1. 29 * The index can be 0-1 (MD5) or 0-2 (SHA1), 0-3 (SHA256).
29 */ 30 */
30#define write_octeon_64bit_hash_dword(value, index) \ 31#define write_octeon_64bit_hash_dword(value, index) \
31do { \ 32do { \
@@ -36,7 +37,7 @@ do { \
36} while (0) 37} while (0)
37 38
38/* 39/*
39 * The index can be 0-1. 40 * The index can be 0-1 (MD5) or 0-2 (SHA1), 0-3 (SHA256).
40 */ 41 */
41#define read_octeon_64bit_hash_dword(index) \ 42#define read_octeon_64bit_hash_dword(index) \
42({ \ 43({ \
@@ -72,4 +73,78 @@ do { \
72 : [rt] "d" (value)); \ 73 : [rt] "d" (value)); \
73} while (0) 74} while (0)
74 75
76/*
77 * The value is the final block dword (64-bit).
78 */
79#define octeon_sha1_start(value) \
80do { \
81 __asm__ __volatile__ ( \
82 "dmtc2 %[rt],0x4057" \
83 : \
84 : [rt] "d" (value)); \
85} while (0)
86
87/*
88 * The value is the final block dword (64-bit).
89 */
90#define octeon_sha256_start(value) \
91do { \
92 __asm__ __volatile__ ( \
93 "dmtc2 %[rt],0x404f" \
94 : \
95 : [rt] "d" (value)); \
96} while (0)
97
98/*
99 * Macros needed to implement SHA512:
100 */
101
102/*
103 * The index can be 0-7.
104 */
105#define write_octeon_64bit_hash_sha512(value, index) \
106do { \
107 __asm__ __volatile__ ( \
108 "dmtc2 %[rt],0x0250+" STR(index) \
109 : \
110 : [rt] "d" (value)); \
111} while (0)
112
113/*
114 * The index can be 0-7.
115 */
116#define read_octeon_64bit_hash_sha512(index) \
117({ \
118 u64 __value; \
119 \
120 __asm__ __volatile__ ( \
121 "dmfc2 %[rt],0x0250+" STR(index) \
122 : [rt] "=d" (__value) \
123 : ); \
124 \
125 __value; \
126})
127
128/*
129 * The index can be 0-14.
130 */
131#define write_octeon_64bit_block_sha512(value, index) \
132do { \
133 __asm__ __volatile__ ( \
134 "dmtc2 %[rt],0x0240+" STR(index) \
135 : \
136 : [rt] "d" (value)); \
137} while (0)
138
139/*
140 * The value is the final block word (64-bit).
141 */
142#define octeon_sha512_start(value) \
143do { \
144 __asm__ __volatile__ ( \
145 "dmtc2 %[rt],0x424f" \
146 : \
147 : [rt] "d" (value)); \
148} while (0)
149
75#endif /* __LINUX_OCTEON_CRYPTO_H */ 150#endif /* __LINUX_OCTEON_CRYPTO_H */
diff --git a/arch/mips/cavium-octeon/crypto/octeon-md5.c b/arch/mips/cavium-octeon/crypto/octeon-md5.c
index b909881ba6c1..12dccdb38286 100644
--- a/arch/mips/cavium-octeon/crypto/octeon-md5.c
+++ b/arch/mips/cavium-octeon/crypto/octeon-md5.c
@@ -97,8 +97,6 @@ static int octeon_md5_update(struct shash_desc *desc, const u8 *data,
97 memcpy((char *)mctx->block + (sizeof(mctx->block) - avail), data, 97 memcpy((char *)mctx->block + (sizeof(mctx->block) - avail), data,
98 avail); 98 avail);
99 99
100 local_bh_disable();
101 preempt_disable();
102 flags = octeon_crypto_enable(&state); 100 flags = octeon_crypto_enable(&state);
103 octeon_md5_store_hash(mctx); 101 octeon_md5_store_hash(mctx);
104 102
@@ -114,8 +112,6 @@ static int octeon_md5_update(struct shash_desc *desc, const u8 *data,
114 112
115 octeon_md5_read_hash(mctx); 113 octeon_md5_read_hash(mctx);
116 octeon_crypto_disable(&state, flags); 114 octeon_crypto_disable(&state, flags);
117 preempt_enable();
118 local_bh_enable();
119 115
120 memcpy(mctx->block, data, len); 116 memcpy(mctx->block, data, len);
121 117
@@ -133,8 +129,6 @@ static int octeon_md5_final(struct shash_desc *desc, u8 *out)
133 129
134 *p++ = 0x80; 130 *p++ = 0x80;
135 131
136 local_bh_disable();
137 preempt_disable();
138 flags = octeon_crypto_enable(&state); 132 flags = octeon_crypto_enable(&state);
139 octeon_md5_store_hash(mctx); 133 octeon_md5_store_hash(mctx);
140 134
@@ -152,8 +146,6 @@ static int octeon_md5_final(struct shash_desc *desc, u8 *out)
152 146
153 octeon_md5_read_hash(mctx); 147 octeon_md5_read_hash(mctx);
154 octeon_crypto_disable(&state, flags); 148 octeon_crypto_disable(&state, flags);
155 preempt_enable();
156 local_bh_enable();
157 149
158 memcpy(out, mctx->hash, sizeof(mctx->hash)); 150 memcpy(out, mctx->hash, sizeof(mctx->hash));
159 memset(mctx, 0, sizeof(*mctx)); 151 memset(mctx, 0, sizeof(*mctx));
diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha1.c b/arch/mips/cavium-octeon/crypto/octeon-sha1.c
new file mode 100644
index 000000000000..2b74b5b67cae
--- /dev/null
+++ b/arch/mips/cavium-octeon/crypto/octeon-sha1.c
@@ -0,0 +1,241 @@
1/*
2 * Cryptographic API.
3 *
4 * SHA1 Secure Hash Algorithm.
5 *
6 * Adapted for OCTEON by Aaro Koskinen <aaro.koskinen@iki.fi>.
7 *
8 * Based on crypto/sha1_generic.c, which is:
9 *
10 * Copyright (c) Alan Smithee.
11 * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
12 * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
13 *
14 * This program is free software; you can redistribute it and/or modify it
15 * under the terms of the GNU General Public License as published by the Free
16 * Software Foundation; either version 2 of the License, or (at your option)
17 * any later version.
18 */
19
20#include <linux/mm.h>
21#include <crypto/sha.h>
22#include <linux/init.h>
23#include <linux/types.h>
24#include <linux/module.h>
25#include <asm/byteorder.h>
26#include <asm/octeon/octeon.h>
27#include <crypto/internal/hash.h>
28
29#include "octeon-crypto.h"
30
31/*
32 * We pass everything as 64-bit. OCTEON can handle misaligned data.
33 */
34
35static void octeon_sha1_store_hash(struct sha1_state *sctx)
36{
37 u64 *hash = (u64 *)sctx->state;
38 union {
39 u32 word[2];
40 u64 dword;
41 } hash_tail = { { sctx->state[4], } };
42
43 write_octeon_64bit_hash_dword(hash[0], 0);
44 write_octeon_64bit_hash_dword(hash[1], 1);
45 write_octeon_64bit_hash_dword(hash_tail.dword, 2);
46 memzero_explicit(&hash_tail.word[0], sizeof(hash_tail.word[0]));
47}
48
49static void octeon_sha1_read_hash(struct sha1_state *sctx)
50{
51 u64 *hash = (u64 *)sctx->state;
52 union {
53 u32 word[2];
54 u64 dword;
55 } hash_tail;
56
57 hash[0] = read_octeon_64bit_hash_dword(0);
58 hash[1] = read_octeon_64bit_hash_dword(1);
59 hash_tail.dword = read_octeon_64bit_hash_dword(2);
60 sctx->state[4] = hash_tail.word[0];
61 memzero_explicit(&hash_tail.dword, sizeof(hash_tail.dword));
62}
63
64static void octeon_sha1_transform(const void *_block)
65{
66 const u64 *block = _block;
67
68 write_octeon_64bit_block_dword(block[0], 0);
69 write_octeon_64bit_block_dword(block[1], 1);
70 write_octeon_64bit_block_dword(block[2], 2);
71 write_octeon_64bit_block_dword(block[3], 3);
72 write_octeon_64bit_block_dword(block[4], 4);
73 write_octeon_64bit_block_dword(block[5], 5);
74 write_octeon_64bit_block_dword(block[6], 6);
75 octeon_sha1_start(block[7]);
76}
77
78static int octeon_sha1_init(struct shash_desc *desc)
79{
80 struct sha1_state *sctx = shash_desc_ctx(desc);
81
82 sctx->state[0] = SHA1_H0;
83 sctx->state[1] = SHA1_H1;
84 sctx->state[2] = SHA1_H2;
85 sctx->state[3] = SHA1_H3;
86 sctx->state[4] = SHA1_H4;
87 sctx->count = 0;
88
89 return 0;
90}
91
92static void __octeon_sha1_update(struct sha1_state *sctx, const u8 *data,
93 unsigned int len)
94{
95 unsigned int partial;
96 unsigned int done;
97 const u8 *src;
98
99 partial = sctx->count % SHA1_BLOCK_SIZE;
100 sctx->count += len;
101 done = 0;
102 src = data;
103
104 if ((partial + len) >= SHA1_BLOCK_SIZE) {
105 if (partial) {
106 done = -partial;
107 memcpy(sctx->buffer + partial, data,
108 done + SHA1_BLOCK_SIZE);
109 src = sctx->buffer;
110 }
111
112 do {
113 octeon_sha1_transform(src);
114 done += SHA1_BLOCK_SIZE;
115 src = data + done;
116 } while (done + SHA1_BLOCK_SIZE <= len);
117
118 partial = 0;
119 }
120 memcpy(sctx->buffer + partial, src, len - done);
121}
122
123static int octeon_sha1_update(struct shash_desc *desc, const u8 *data,
124 unsigned int len)
125{
126 struct sha1_state *sctx = shash_desc_ctx(desc);
127 struct octeon_cop2_state state;
128 unsigned long flags;
129
130 /*
131 * Small updates never reach the crypto engine, so the generic sha1 is
132 * faster because of the heavyweight octeon_crypto_enable() /
133 * octeon_crypto_disable().
134 */
135 if ((sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
136 return crypto_sha1_update(desc, data, len);
137
138 flags = octeon_crypto_enable(&state);
139 octeon_sha1_store_hash(sctx);
140
141 __octeon_sha1_update(sctx, data, len);
142
143 octeon_sha1_read_hash(sctx);
144 octeon_crypto_disable(&state, flags);
145
146 return 0;
147}
148
149static int octeon_sha1_final(struct shash_desc *desc, u8 *out)
150{
151 struct sha1_state *sctx = shash_desc_ctx(desc);
152 static const u8 padding[64] = { 0x80, };
153 struct octeon_cop2_state state;
154 __be32 *dst = (__be32 *)out;
155 unsigned int pad_len;
156 unsigned long flags;
157 unsigned int index;
158 __be64 bits;
159 int i;
160
161 /* Save number of bits. */
162 bits = cpu_to_be64(sctx->count << 3);
163
164 /* Pad out to 56 mod 64. */
165 index = sctx->count & 0x3f;
166 pad_len = (index < 56) ? (56 - index) : ((64+56) - index);
167
168 flags = octeon_crypto_enable(&state);
169 octeon_sha1_store_hash(sctx);
170
171 __octeon_sha1_update(sctx, padding, pad_len);
172
173 /* Append length (before padding). */
174 __octeon_sha1_update(sctx, (const u8 *)&bits, sizeof(bits));
175
176 octeon_sha1_read_hash(sctx);
177 octeon_crypto_disable(&state, flags);
178
179 /* Store state in digest */
180 for (i = 0; i < 5; i++)
181 dst[i] = cpu_to_be32(sctx->state[i]);
182
183 /* Zeroize sensitive information. */
184 memset(sctx, 0, sizeof(*sctx));
185
186 return 0;
187}
188
189static int octeon_sha1_export(struct shash_desc *desc, void *out)
190{
191 struct sha1_state *sctx = shash_desc_ctx(desc);
192
193 memcpy(out, sctx, sizeof(*sctx));
194 return 0;
195}
196
197static int octeon_sha1_import(struct shash_desc *desc, const void *in)
198{
199 struct sha1_state *sctx = shash_desc_ctx(desc);
200
201 memcpy(sctx, in, sizeof(*sctx));
202 return 0;
203}
204
205static struct shash_alg octeon_sha1_alg = {
206 .digestsize = SHA1_DIGEST_SIZE,
207 .init = octeon_sha1_init,
208 .update = octeon_sha1_update,
209 .final = octeon_sha1_final,
210 .export = octeon_sha1_export,
211 .import = octeon_sha1_import,
212 .descsize = sizeof(struct sha1_state),
213 .statesize = sizeof(struct sha1_state),
214 .base = {
215 .cra_name = "sha1",
216 .cra_driver_name= "octeon-sha1",
217 .cra_priority = OCTEON_CR_OPCODE_PRIORITY,
218 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
219 .cra_blocksize = SHA1_BLOCK_SIZE,
220 .cra_module = THIS_MODULE,
221 }
222};
223
224static int __init octeon_sha1_mod_init(void)
225{
226 if (!octeon_has_crypto())
227 return -ENOTSUPP;
228 return crypto_register_shash(&octeon_sha1_alg);
229}
230
231static void __exit octeon_sha1_mod_fini(void)
232{
233 crypto_unregister_shash(&octeon_sha1_alg);
234}
235
236module_init(octeon_sha1_mod_init);
237module_exit(octeon_sha1_mod_fini);
238
239MODULE_LICENSE("GPL");
240MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm (OCTEON)");
241MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>");
diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha256.c b/arch/mips/cavium-octeon/crypto/octeon-sha256.c
new file mode 100644
index 000000000000..97e96fead08a
--- /dev/null
+++ b/arch/mips/cavium-octeon/crypto/octeon-sha256.c
@@ -0,0 +1,280 @@
1/*
2 * Cryptographic API.
3 *
4 * SHA-224 and SHA-256 Secure Hash Algorithm.
5 *
6 * Adapted for OCTEON by Aaro Koskinen <aaro.koskinen@iki.fi>.
7 *
8 * Based on crypto/sha256_generic.c, which is:
9 *
10 * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
11 * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
12 * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
13 * SHA224 Support Copyright 2007 Intel Corporation <jonathan.lynch@intel.com>
14 *
15 * This program is free software; you can redistribute it and/or modify it
16 * under the terms of the GNU General Public License as published by the Free
17 * Software Foundation; either version 2 of the License, or (at your option)
18 * any later version.
19 */
20
21#include <linux/mm.h>
22#include <crypto/sha.h>
23#include <linux/init.h>
24#include <linux/types.h>
25#include <linux/module.h>
26#include <asm/byteorder.h>
27#include <asm/octeon/octeon.h>
28#include <crypto/internal/hash.h>
29
30#include "octeon-crypto.h"
31
32/*
33 * We pass everything as 64-bit. OCTEON can handle misaligned data.
34 */
35
36static void octeon_sha256_store_hash(struct sha256_state *sctx)
37{
38 u64 *hash = (u64 *)sctx->state;
39
40 write_octeon_64bit_hash_dword(hash[0], 0);
41 write_octeon_64bit_hash_dword(hash[1], 1);
42 write_octeon_64bit_hash_dword(hash[2], 2);
43 write_octeon_64bit_hash_dword(hash[3], 3);
44}
45
46static void octeon_sha256_read_hash(struct sha256_state *sctx)
47{
48 u64 *hash = (u64 *)sctx->state;
49
50 hash[0] = read_octeon_64bit_hash_dword(0);
51 hash[1] = read_octeon_64bit_hash_dword(1);
52 hash[2] = read_octeon_64bit_hash_dword(2);
53 hash[3] = read_octeon_64bit_hash_dword(3);
54}
55
56static void octeon_sha256_transform(const void *_block)
57{
58 const u64 *block = _block;
59
60 write_octeon_64bit_block_dword(block[0], 0);
61 write_octeon_64bit_block_dword(block[1], 1);
62 write_octeon_64bit_block_dword(block[2], 2);
63 write_octeon_64bit_block_dword(block[3], 3);
64 write_octeon_64bit_block_dword(block[4], 4);
65 write_octeon_64bit_block_dword(block[5], 5);
66 write_octeon_64bit_block_dword(block[6], 6);
67 octeon_sha256_start(block[7]);
68}
69
70static int octeon_sha224_init(struct shash_desc *desc)
71{
72 struct sha256_state *sctx = shash_desc_ctx(desc);
73
74 sctx->state[0] = SHA224_H0;
75 sctx->state[1] = SHA224_H1;
76 sctx->state[2] = SHA224_H2;
77 sctx->state[3] = SHA224_H3;
78 sctx->state[4] = SHA224_H4;
79 sctx->state[5] = SHA224_H5;
80 sctx->state[6] = SHA224_H6;
81 sctx->state[7] = SHA224_H7;
82 sctx->count = 0;
83
84 return 0;
85}
86
87static int octeon_sha256_init(struct shash_desc *desc)
88{
89 struct sha256_state *sctx = shash_desc_ctx(desc);
90
91 sctx->state[0] = SHA256_H0;
92 sctx->state[1] = SHA256_H1;
93 sctx->state[2] = SHA256_H2;
94 sctx->state[3] = SHA256_H3;
95 sctx->state[4] = SHA256_H4;
96 sctx->state[5] = SHA256_H5;
97 sctx->state[6] = SHA256_H6;
98 sctx->state[7] = SHA256_H7;
99 sctx->count = 0;
100
101 return 0;
102}
103
104static void __octeon_sha256_update(struct sha256_state *sctx, const u8 *data,
105 unsigned int len)
106{
107 unsigned int partial;
108 unsigned int done;
109 const u8 *src;
110
111 partial = sctx->count % SHA256_BLOCK_SIZE;
112 sctx->count += len;
113 done = 0;
114 src = data;
115
116 if ((partial + len) >= SHA256_BLOCK_SIZE) {
117 if (partial) {
118 done = -partial;
119 memcpy(sctx->buf + partial, data,
120 done + SHA256_BLOCK_SIZE);
121 src = sctx->buf;
122 }
123
124 do {
125 octeon_sha256_transform(src);
126 done += SHA256_BLOCK_SIZE;
127 src = data + done;
128 } while (done + SHA256_BLOCK_SIZE <= len);
129
130 partial = 0;
131 }
132 memcpy(sctx->buf + partial, src, len - done);
133}
134
135static int octeon_sha256_update(struct shash_desc *desc, const u8 *data,
136 unsigned int len)
137{
138 struct sha256_state *sctx = shash_desc_ctx(desc);
139 struct octeon_cop2_state state;
140 unsigned long flags;
141
142 /*
143 * Small updates never reach the crypto engine, so the generic sha256 is
144 * faster because of the heavyweight octeon_crypto_enable() /
145 * octeon_crypto_disable().
146 */
147 if ((sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
148 return crypto_sha256_update(desc, data, len);
149
150 flags = octeon_crypto_enable(&state);
151 octeon_sha256_store_hash(sctx);
152
153 __octeon_sha256_update(sctx, data, len);
154
155 octeon_sha256_read_hash(sctx);
156 octeon_crypto_disable(&state, flags);
157
158 return 0;
159}
160
161static int octeon_sha256_final(struct shash_desc *desc, u8 *out)
162{
163 struct sha256_state *sctx = shash_desc_ctx(desc);
164 static const u8 padding[64] = { 0x80, };
165 struct octeon_cop2_state state;
166 __be32 *dst = (__be32 *)out;
167 unsigned int pad_len;
168 unsigned long flags;
169 unsigned int index;
170 __be64 bits;
171 int i;
172
173 /* Save number of bits. */
174 bits = cpu_to_be64(sctx->count << 3);
175
176 /* Pad out to 56 mod 64. */
177 index = sctx->count & 0x3f;
178 pad_len = (index < 56) ? (56 - index) : ((64+56) - index);
179
180 flags = octeon_crypto_enable(&state);
181 octeon_sha256_store_hash(sctx);
182
183 __octeon_sha256_update(sctx, padding, pad_len);
184
185 /* Append length (before padding). */
186 __octeon_sha256_update(sctx, (const u8 *)&bits, sizeof(bits));
187
188 octeon_sha256_read_hash(sctx);
189 octeon_crypto_disable(&state, flags);
190
191 /* Store state in digest */
192 for (i = 0; i < 8; i++)
193 dst[i] = cpu_to_be32(sctx->state[i]);
194
195 /* Zeroize sensitive information. */
196 memset(sctx, 0, sizeof(*sctx));
197
198 return 0;
199}
200
201static int octeon_sha224_final(struct shash_desc *desc, u8 *hash)
202{
203 u8 D[SHA256_DIGEST_SIZE];
204
205 octeon_sha256_final(desc, D);
206
207 memcpy(hash, D, SHA224_DIGEST_SIZE);
208 memzero_explicit(D, SHA256_DIGEST_SIZE);
209
210 return 0;
211}
212
213static int octeon_sha256_export(struct shash_desc *desc, void *out)
214{
215 struct sha256_state *sctx = shash_desc_ctx(desc);
216
217 memcpy(out, sctx, sizeof(*sctx));
218 return 0;
219}
220
221static int octeon_sha256_import(struct shash_desc *desc, const void *in)
222{
223 struct sha256_state *sctx = shash_desc_ctx(desc);
224
225 memcpy(sctx, in, sizeof(*sctx));
226 return 0;
227}
228
229static struct shash_alg octeon_sha256_algs[2] = { {
230 .digestsize = SHA256_DIGEST_SIZE,
231 .init = octeon_sha256_init,
232 .update = octeon_sha256_update,
233 .final = octeon_sha256_final,
234 .export = octeon_sha256_export,
235 .import = octeon_sha256_import,
236 .descsize = sizeof(struct sha256_state),
237 .statesize = sizeof(struct sha256_state),
238 .base = {
239 .cra_name = "sha256",
240 .cra_driver_name= "octeon-sha256",
241 .cra_priority = OCTEON_CR_OPCODE_PRIORITY,
242 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
243 .cra_blocksize = SHA256_BLOCK_SIZE,
244 .cra_module = THIS_MODULE,
245 }
246}, {
247 .digestsize = SHA224_DIGEST_SIZE,
248 .init = octeon_sha224_init,
249 .update = octeon_sha256_update,
250 .final = octeon_sha224_final,
251 .descsize = sizeof(struct sha256_state),
252 .base = {
253 .cra_name = "sha224",
254 .cra_driver_name= "octeon-sha224",
255 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
256 .cra_blocksize = SHA224_BLOCK_SIZE,
257 .cra_module = THIS_MODULE,
258 }
259} };
260
261static int __init octeon_sha256_mod_init(void)
262{
263 if (!octeon_has_crypto())
264 return -ENOTSUPP;
265 return crypto_register_shashes(octeon_sha256_algs,
266 ARRAY_SIZE(octeon_sha256_algs));
267}
268
269static void __exit octeon_sha256_mod_fini(void)
270{
271 crypto_unregister_shashes(octeon_sha256_algs,
272 ARRAY_SIZE(octeon_sha256_algs));
273}
274
275module_init(octeon_sha256_mod_init);
276module_exit(octeon_sha256_mod_fini);
277
278MODULE_LICENSE("GPL");
279MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm (OCTEON)");
280MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>");
diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha512.c b/arch/mips/cavium-octeon/crypto/octeon-sha512.c
new file mode 100644
index 000000000000..d5fb3c6f22ae
--- /dev/null
+++ b/arch/mips/cavium-octeon/crypto/octeon-sha512.c
@@ -0,0 +1,277 @@
1/*
2 * Cryptographic API.
3 *
4 * SHA-512 and SHA-384 Secure Hash Algorithm.
5 *
6 * Adapted for OCTEON by Aaro Koskinen <aaro.koskinen@iki.fi>.
7 *
8 * Based on crypto/sha512_generic.c, which is:
9 *
10 * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
11 * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
12 * Copyright (c) 2003 Kyle McMartin <kyle@debian.org>
13 *
14 * This program is free software; you can redistribute it and/or modify it
15 * under the terms of the GNU General Public License as published by the
16 * Free Software Foundation; either version 2, or (at your option) any
17 * later version.
18 */
19
20#include <linux/mm.h>
21#include <crypto/sha.h>
22#include <linux/init.h>
23#include <linux/types.h>
24#include <linux/module.h>
25#include <asm/byteorder.h>
26#include <asm/octeon/octeon.h>
27#include <crypto/internal/hash.h>
28
29#include "octeon-crypto.h"
30
31/*
32 * We pass everything as 64-bit. OCTEON can handle misaligned data.
33 */
34
35static void octeon_sha512_store_hash(struct sha512_state *sctx)
36{
37 write_octeon_64bit_hash_sha512(sctx->state[0], 0);
38 write_octeon_64bit_hash_sha512(sctx->state[1], 1);
39 write_octeon_64bit_hash_sha512(sctx->state[2], 2);
40 write_octeon_64bit_hash_sha512(sctx->state[3], 3);
41 write_octeon_64bit_hash_sha512(sctx->state[4], 4);
42 write_octeon_64bit_hash_sha512(sctx->state[5], 5);
43 write_octeon_64bit_hash_sha512(sctx->state[6], 6);
44 write_octeon_64bit_hash_sha512(sctx->state[7], 7);
45}
46
47static void octeon_sha512_read_hash(struct sha512_state *sctx)
48{
49 sctx->state[0] = read_octeon_64bit_hash_sha512(0);
50 sctx->state[1] = read_octeon_64bit_hash_sha512(1);
51 sctx->state[2] = read_octeon_64bit_hash_sha512(2);
52 sctx->state[3] = read_octeon_64bit_hash_sha512(3);
53 sctx->state[4] = read_octeon_64bit_hash_sha512(4);
54 sctx->state[5] = read_octeon_64bit_hash_sha512(5);
55 sctx->state[6] = read_octeon_64bit_hash_sha512(6);
56 sctx->state[7] = read_octeon_64bit_hash_sha512(7);
57}
58
59static void octeon_sha512_transform(const void *_block)
60{
61 const u64 *block = _block;
62
63 write_octeon_64bit_block_sha512(block[0], 0);
64 write_octeon_64bit_block_sha512(block[1], 1);
65 write_octeon_64bit_block_sha512(block[2], 2);
66 write_octeon_64bit_block_sha512(block[3], 3);
67 write_octeon_64bit_block_sha512(block[4], 4);
68 write_octeon_64bit_block_sha512(block[5], 5);
69 write_octeon_64bit_block_sha512(block[6], 6);
70 write_octeon_64bit_block_sha512(block[7], 7);
71 write_octeon_64bit_block_sha512(block[8], 8);
72 write_octeon_64bit_block_sha512(block[9], 9);
73 write_octeon_64bit_block_sha512(block[10], 10);
74 write_octeon_64bit_block_sha512(block[11], 11);
75 write_octeon_64bit_block_sha512(block[12], 12);
76 write_octeon_64bit_block_sha512(block[13], 13);
77 write_octeon_64bit_block_sha512(block[14], 14);
78 octeon_sha512_start(block[15]);
79}
80
81static int octeon_sha512_init(struct shash_desc *desc)
82{
83 struct sha512_state *sctx = shash_desc_ctx(desc);
84
85 sctx->state[0] = SHA512_H0;
86 sctx->state[1] = SHA512_H1;
87 sctx->state[2] = SHA512_H2;
88 sctx->state[3] = SHA512_H3;
89 sctx->state[4] = SHA512_H4;
90 sctx->state[5] = SHA512_H5;
91 sctx->state[6] = SHA512_H6;
92 sctx->state[7] = SHA512_H7;
93 sctx->count[0] = sctx->count[1] = 0;
94
95 return 0;
96}
97
98static int octeon_sha384_init(struct shash_desc *desc)
99{
100 struct sha512_state *sctx = shash_desc_ctx(desc);
101
102 sctx->state[0] = SHA384_H0;
103 sctx->state[1] = SHA384_H1;
104 sctx->state[2] = SHA384_H2;
105 sctx->state[3] = SHA384_H3;
106 sctx->state[4] = SHA384_H4;
107 sctx->state[5] = SHA384_H5;
108 sctx->state[6] = SHA384_H6;
109 sctx->state[7] = SHA384_H7;
110 sctx->count[0] = sctx->count[1] = 0;
111
112 return 0;
113}
114
115static void __octeon_sha512_update(struct sha512_state *sctx, const u8 *data,
116 unsigned int len)
117{
118 unsigned int part_len;
119 unsigned int index;
120 unsigned int i;
121
122 /* Compute number of bytes mod 128. */
123 index = sctx->count[0] % SHA512_BLOCK_SIZE;
124
125 /* Update number of bytes. */
126 if ((sctx->count[0] += len) < len)
127 sctx->count[1]++;
128
129 part_len = SHA512_BLOCK_SIZE - index;
130
131 /* Transform as many times as possible. */
132 if (len >= part_len) {
133 memcpy(&sctx->buf[index], data, part_len);
134 octeon_sha512_transform(sctx->buf);
135
136 for (i = part_len; i + SHA512_BLOCK_SIZE <= len;
137 i += SHA512_BLOCK_SIZE)
138 octeon_sha512_transform(&data[i]);
139
140 index = 0;
141 } else {
142 i = 0;
143 }
144
145 /* Buffer remaining input. */
146 memcpy(&sctx->buf[index], &data[i], len - i);
147}
148
149static int octeon_sha512_update(struct shash_desc *desc, const u8 *data,
150 unsigned int len)
151{
152 struct sha512_state *sctx = shash_desc_ctx(desc);
153 struct octeon_cop2_state state;
154 unsigned long flags;
155
156 /*
157 * Small updates never reach the crypto engine, so the generic sha512 is
158 * faster because of the heavyweight octeon_crypto_enable() /
159 * octeon_crypto_disable().
160 */
161 if ((sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE)
162 return crypto_sha512_update(desc, data, len);
163
164 flags = octeon_crypto_enable(&state);
165 octeon_sha512_store_hash(sctx);
166
167 __octeon_sha512_update(sctx, data, len);
168
169 octeon_sha512_read_hash(sctx);
170 octeon_crypto_disable(&state, flags);
171
172 return 0;
173}
174
175static int octeon_sha512_final(struct shash_desc *desc, u8 *hash)
176{
177 struct sha512_state *sctx = shash_desc_ctx(desc);
178 static u8 padding[128] = { 0x80, };
179 struct octeon_cop2_state state;
180 __be64 *dst = (__be64 *)hash;
181 unsigned int pad_len;
182 unsigned long flags;
183 unsigned int index;
184 __be64 bits[2];
185 int i;
186
187 /* Save number of bits. */
188 bits[1] = cpu_to_be64(sctx->count[0] << 3);
189 bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
190
191 /* Pad out to 112 mod 128. */
192 index = sctx->count[0] & 0x7f;
193 pad_len = (index < 112) ? (112 - index) : ((128+112) - index);
194
195 flags = octeon_crypto_enable(&state);
196 octeon_sha512_store_hash(sctx);
197
198 __octeon_sha512_update(sctx, padding, pad_len);
199
200 /* Append length (before padding). */
201 __octeon_sha512_update(sctx, (const u8 *)bits, sizeof(bits));
202
203 octeon_sha512_read_hash(sctx);
204 octeon_crypto_disable(&state, flags);
205
206 /* Store state in digest. */
207 for (i = 0; i < 8; i++)
208 dst[i] = cpu_to_be64(sctx->state[i]);
209
210 /* Zeroize sensitive information. */
211 memset(sctx, 0, sizeof(struct sha512_state));
212
213 return 0;
214}
215
216static int octeon_sha384_final(struct shash_desc *desc, u8 *hash)
217{
218 u8 D[64];
219
220 octeon_sha512_final(desc, D);
221
222 memcpy(hash, D, 48);
223 memzero_explicit(D, 64);
224
225 return 0;
226}
227
228static struct shash_alg octeon_sha512_algs[2] = { {
229 .digestsize = SHA512_DIGEST_SIZE,
230 .init = octeon_sha512_init,
231 .update = octeon_sha512_update,
232 .final = octeon_sha512_final,
233 .descsize = sizeof(struct sha512_state),
234 .base = {
235 .cra_name = "sha512",
236 .cra_driver_name= "octeon-sha512",
237 .cra_priority = OCTEON_CR_OPCODE_PRIORITY,
238 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
239 .cra_blocksize = SHA512_BLOCK_SIZE,
240 .cra_module = THIS_MODULE,
241 }
242}, {
243 .digestsize = SHA384_DIGEST_SIZE,
244 .init = octeon_sha384_init,
245 .update = octeon_sha512_update,
246 .final = octeon_sha384_final,
247 .descsize = sizeof(struct sha512_state),
248 .base = {
249 .cra_name = "sha384",
250 .cra_driver_name= "octeon-sha384",
251 .cra_priority = OCTEON_CR_OPCODE_PRIORITY,
252 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
253 .cra_blocksize = SHA384_BLOCK_SIZE,
254 .cra_module = THIS_MODULE,
255 }
256} };
257
258static int __init octeon_sha512_mod_init(void)
259{
260 if (!octeon_has_crypto())
261 return -ENOTSUPP;
262 return crypto_register_shashes(octeon_sha512_algs,
263 ARRAY_SIZE(octeon_sha512_algs));
264}
265
266static void __exit octeon_sha512_mod_fini(void)
267{
268 crypto_unregister_shashes(octeon_sha512_algs,
269 ARRAY_SIZE(octeon_sha512_algs));
270}
271
272module_init(octeon_sha512_mod_init);
273module_exit(octeon_sha512_mod_fini);
274
275MODULE_LICENSE("GPL");
276MODULE_DESCRIPTION("SHA-512 and SHA-384 Secure Hash Algorithms (OCTEON)");
277MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>");
diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h
index 4794067cb5a7..5035f09c5427 100644
--- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h
+++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h
@@ -1259,20 +1259,6 @@
1259#define M2M_DSTID_REG(x) ((x) * 0x40 + 0x18) 1259#define M2M_DSTID_REG(x) ((x) * 0x40 + 0x18)
1260 1260
1261/************************************************************************* 1261/*************************************************************************
1262 * _REG relative to RSET_RNG
1263 *************************************************************************/
1264
1265#define RNG_CTRL 0x00
1266#define RNG_EN (1 << 0)
1267
1268#define RNG_STAT 0x04
1269#define RNG_AVAIL_MASK (0xff000000)
1270
1271#define RNG_DATA 0x08
1272#define RNG_THRES 0x0c
1273#define RNG_MASK 0x10
1274
1275/*************************************************************************
1276 * _REG relative to RSET_SPI 1262 * _REG relative to RSET_SPI
1277 *************************************************************************/ 1263 *************************************************************************/
1278 1264
diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile
index 2926fb9c570a..9c221b69c181 100644
--- a/arch/powerpc/crypto/Makefile
+++ b/arch/powerpc/crypto/Makefile
@@ -4,6 +4,14 @@
4# Arch-specific CryptoAPI modules. 4# Arch-specific CryptoAPI modules.
5# 5#
6 6
7obj-$(CONFIG_CRYPTO_AES_PPC_SPE) += aes-ppc-spe.o
8obj-$(CONFIG_CRYPTO_MD5_PPC) += md5-ppc.o
7obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o 9obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o
10obj-$(CONFIG_CRYPTO_SHA1_PPC_SPE) += sha1-ppc-spe.o
11obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o
8 12
13aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o
14md5-ppc-y := md5-asm.o md5-glue.o
9sha1-powerpc-y := sha1-powerpc-asm.o sha1.o 15sha1-powerpc-y := sha1-powerpc-asm.o sha1.o
16sha1-ppc-spe-y := sha1-spe-asm.o sha1-spe-glue.o
17sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o
diff --git a/arch/powerpc/crypto/aes-spe-core.S b/arch/powerpc/crypto/aes-spe-core.S
new file mode 100644
index 000000000000..5dc6bce90a77
--- /dev/null
+++ b/arch/powerpc/crypto/aes-spe-core.S
@@ -0,0 +1,351 @@
1/*
2 * Fast AES implementation for SPE instruction set (PPC)
3 *
4 * This code makes use of the SPE SIMD instruction set as defined in
5 * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
6 * Implementation is based on optimization guide notes from
7 * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
8 *
9 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
10 *
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the Free
13 * Software Foundation; either version 2 of the License, or (at your option)
14 * any later version.
15 *
16 */
17
18#include <asm/ppc_asm.h>
19#include "aes-spe-regs.h"
20
21#define EAD(in, bpos) \
22 rlwimi rT0,in,28-((bpos+3)%4)*8,20,27;
23
24#define DAD(in, bpos) \
25 rlwimi rT1,in,24-((bpos+3)%4)*8,24,31;
26
27#define LWH(out, off) \
28 evlwwsplat out,off(rT0); /* load word high */
29
30#define LWL(out, off) \
31 lwz out,off(rT0); /* load word low */
32
33#define LBZ(out, tab, off) \
34 lbz out,off(tab); /* load byte */
35
36#define LAH(out, in, bpos, off) \
37 EAD(in, bpos) /* calc addr + load word high */ \
38 LWH(out, off)
39
40#define LAL(out, in, bpos, off) \
41 EAD(in, bpos) /* calc addr + load word low */ \
42 LWL(out, off)
43
44#define LAE(out, in, bpos) \
45 EAD(in, bpos) /* calc addr + load enc byte */ \
46 LBZ(out, rT0, 8)
47
48#define LBE(out) \
49 LBZ(out, rT0, 8) /* load enc byte */
50
51#define LAD(out, in, bpos) \
52 DAD(in, bpos) /* calc addr + load dec byte */ \
53 LBZ(out, rT1, 0)
54
55#define LBD(out) \
56 LBZ(out, rT1, 0)
57
58/*
59 * ppc_encrypt_block: The central encryption function for a single 16 bytes
60 * block. It does no stack handling or register saving to support fast calls
61 * via bl/blr. It expects that caller has pre-xored input data with first
62 * 4 words of encryption key into rD0-rD3. Pointer/counter registers must
63 * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
64 * and rW0-rW3 and caller must execute a final xor on the ouput registers.
65 * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
66 *
67 */
68_GLOBAL(ppc_encrypt_block)
69 LAH(rW4, rD1, 2, 4)
70 LAH(rW6, rD0, 3, 0)
71 LAH(rW3, rD0, 1, 8)
72ppc_encrypt_block_loop:
73 LAH(rW0, rD3, 0, 12)
74 LAL(rW0, rD0, 0, 12)
75 LAH(rW1, rD1, 0, 12)
76 LAH(rW2, rD2, 1, 8)
77 LAL(rW2, rD3, 1, 8)
78 LAL(rW3, rD1, 1, 8)
79 LAL(rW4, rD2, 2, 4)
80 LAL(rW6, rD1, 3, 0)
81 LAH(rW5, rD3, 2, 4)
82 LAL(rW5, rD0, 2, 4)
83 LAH(rW7, rD2, 3, 0)
84 evldw rD1,16(rKP)
85 EAD(rD3, 3)
86 evxor rW2,rW2,rW4
87 LWL(rW7, 0)
88 evxor rW2,rW2,rW6
89 EAD(rD2, 0)
90 evxor rD1,rD1,rW2
91 LWL(rW1, 12)
92 evxor rD1,rD1,rW0
93 evldw rD3,24(rKP)
94 evmergehi rD0,rD0,rD1
95 EAD(rD1, 2)
96 evxor rW3,rW3,rW5
97 LWH(rW4, 4)
98 evxor rW3,rW3,rW7
99 EAD(rD0, 3)
100 evxor rD3,rD3,rW3
101 LWH(rW6, 0)
102 evxor rD3,rD3,rW1
103 EAD(rD0, 1)
104 evmergehi rD2,rD2,rD3
105 LWH(rW3, 8)
106 LAH(rW0, rD3, 0, 12)
107 LAL(rW0, rD0, 0, 12)
108 LAH(rW1, rD1, 0, 12)
109 LAH(rW2, rD2, 1, 8)
110 LAL(rW2, rD3, 1, 8)
111 LAL(rW3, rD1, 1, 8)
112 LAL(rW4, rD2, 2, 4)
113 LAL(rW6, rD1, 3, 0)
114 LAH(rW5, rD3, 2, 4)
115 LAL(rW5, rD0, 2, 4)
116 LAH(rW7, rD2, 3, 0)
117 evldw rD1,32(rKP)
118 EAD(rD3, 3)
119 evxor rW2,rW2,rW4
120 LWL(rW7, 0)
121 evxor rW2,rW2,rW6
122 EAD(rD2, 0)
123 evxor rD1,rD1,rW2
124 LWL(rW1, 12)
125 evxor rD1,rD1,rW0
126 evldw rD3,40(rKP)
127 evmergehi rD0,rD0,rD1
128 EAD(rD1, 2)
129 evxor rW3,rW3,rW5
130 LWH(rW4, 4)
131 evxor rW3,rW3,rW7
132 EAD(rD0, 3)
133 evxor rD3,rD3,rW3
134 LWH(rW6, 0)
135 evxor rD3,rD3,rW1
136 EAD(rD0, 1)
137 evmergehi rD2,rD2,rD3
138 LWH(rW3, 8)
139 addi rKP,rKP,32
140 bdnz ppc_encrypt_block_loop
141 LAH(rW0, rD3, 0, 12)
142 LAL(rW0, rD0, 0, 12)
143 LAH(rW1, rD1, 0, 12)
144 LAH(rW2, rD2, 1, 8)
145 LAL(rW2, rD3, 1, 8)
146 LAL(rW3, rD1, 1, 8)
147 LAL(rW4, rD2, 2, 4)
148 LAH(rW5, rD3, 2, 4)
149 LAL(rW6, rD1, 3, 0)
150 LAL(rW5, rD0, 2, 4)
151 LAH(rW7, rD2, 3, 0)
152 evldw rD1,16(rKP)
153 EAD(rD3, 3)
154 evxor rW2,rW2,rW4
155 LWL(rW7, 0)
156 evxor rW2,rW2,rW6
157 EAD(rD2, 0)
158 evxor rD1,rD1,rW2
159 LWL(rW1, 12)
160 evxor rD1,rD1,rW0
161 evldw rD3,24(rKP)
162 evmergehi rD0,rD0,rD1
163 EAD(rD1, 0)
164 evxor rW3,rW3,rW5
165 LBE(rW2)
166 evxor rW3,rW3,rW7
167 EAD(rD0, 1)
168 evxor rD3,rD3,rW3
169 LBE(rW6)
170 evxor rD3,rD3,rW1
171 EAD(rD0, 0)
172 evmergehi rD2,rD2,rD3
173 LBE(rW1)
174 LAE(rW0, rD3, 0)
175 LAE(rW1, rD0, 0)
176 LAE(rW4, rD2, 1)
177 LAE(rW5, rD3, 1)
178 LAE(rW3, rD2, 0)
179 LAE(rW7, rD1, 1)
180 rlwimi rW0,rW4,8,16,23
181 rlwimi rW1,rW5,8,16,23
182 LAE(rW4, rD1, 2)
183 LAE(rW5, rD2, 2)
184 rlwimi rW2,rW6,8,16,23
185 rlwimi rW3,rW7,8,16,23
186 LAE(rW6, rD3, 2)
187 LAE(rW7, rD0, 2)
188 rlwimi rW0,rW4,16,8,15
189 rlwimi rW1,rW5,16,8,15
190 LAE(rW4, rD0, 3)
191 LAE(rW5, rD1, 3)
192 rlwimi rW2,rW6,16,8,15
193 lwz rD0,32(rKP)
194 rlwimi rW3,rW7,16,8,15
195 lwz rD1,36(rKP)
196 LAE(rW6, rD2, 3)
197 LAE(rW7, rD3, 3)
198 rlwimi rW0,rW4,24,0,7
199 lwz rD2,40(rKP)
200 rlwimi rW1,rW5,24,0,7
201 lwz rD3,44(rKP)
202 rlwimi rW2,rW6,24,0,7
203 rlwimi rW3,rW7,24,0,7
204 blr
205
206/*
207 * ppc_decrypt_block: The central decryption function for a single 16 bytes
208 * block. It does no stack handling or register saving to support fast calls
209 * via bl/blr. It expects that caller has pre-xored input data with first
210 * 4 words of encryption key into rD0-rD3. Pointer/counter registers must
211 * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
212 * and rW0-rW3 and caller must execute a final xor on the ouput registers.
213 * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
214 *
215 */
216_GLOBAL(ppc_decrypt_block)
217 LAH(rW0, rD1, 0, 12)
218 LAH(rW6, rD0, 3, 0)
219 LAH(rW3, rD0, 1, 8)
220ppc_decrypt_block_loop:
221 LAH(rW1, rD3, 0, 12)
222 LAL(rW0, rD2, 0, 12)
223 LAH(rW2, rD2, 1, 8)
224 LAL(rW2, rD3, 1, 8)
225 LAH(rW4, rD3, 2, 4)
226 LAL(rW4, rD0, 2, 4)
227 LAL(rW6, rD1, 3, 0)
228 LAH(rW5, rD1, 2, 4)
229 LAH(rW7, rD2, 3, 0)
230 LAL(rW7, rD3, 3, 0)
231 LAL(rW3, rD1, 1, 8)
232 evldw rD1,16(rKP)
233 EAD(rD0, 0)
234 evxor rW4,rW4,rW6
235 LWL(rW1, 12)
236 evxor rW0,rW0,rW4
237 EAD(rD2, 2)
238 evxor rW0,rW0,rW2
239 LWL(rW5, 4)
240 evxor rD1,rD1,rW0
241 evldw rD3,24(rKP)
242 evmergehi rD0,rD0,rD1
243 EAD(rD1, 0)
244 evxor rW3,rW3,rW7
245 LWH(rW0, 12)
246 evxor rW3,rW3,rW1
247 EAD(rD0, 3)
248 evxor rD3,rD3,rW3
249 LWH(rW6, 0)
250 evxor rD3,rD3,rW5
251 EAD(rD0, 1)
252 evmergehi rD2,rD2,rD3
253 LWH(rW3, 8)
254 LAH(rW1, rD3, 0, 12)
255 LAL(rW0, rD2, 0, 12)
256 LAH(rW2, rD2, 1, 8)
257 LAL(rW2, rD3, 1, 8)
258 LAH(rW4, rD3, 2, 4)
259 LAL(rW4, rD0, 2, 4)
260 LAL(rW6, rD1, 3, 0)
261 LAH(rW5, rD1, 2, 4)
262 LAH(rW7, rD2, 3, 0)
263 LAL(rW7, rD3, 3, 0)
264 LAL(rW3, rD1, 1, 8)
265 evldw rD1,32(rKP)
266 EAD(rD0, 0)
267 evxor rW4,rW4,rW6
268 LWL(rW1, 12)
269 evxor rW0,rW0,rW4
270 EAD(rD2, 2)
271 evxor rW0,rW0,rW2
272 LWL(rW5, 4)
273 evxor rD1,rD1,rW0
274 evldw rD3,40(rKP)
275 evmergehi rD0,rD0,rD1
276 EAD(rD1, 0)
277 evxor rW3,rW3,rW7
278 LWH(rW0, 12)
279 evxor rW3,rW3,rW1
280 EAD(rD0, 3)
281 evxor rD3,rD3,rW3
282 LWH(rW6, 0)
283 evxor rD3,rD3,rW5
284 EAD(rD0, 1)
285 evmergehi rD2,rD2,rD3
286 LWH(rW3, 8)
287 addi rKP,rKP,32
288 bdnz ppc_decrypt_block_loop
289 LAH(rW1, rD3, 0, 12)
290 LAL(rW0, rD2, 0, 12)
291 LAH(rW2, rD2, 1, 8)
292 LAL(rW2, rD3, 1, 8)
293 LAH(rW4, rD3, 2, 4)
294 LAL(rW4, rD0, 2, 4)
295 LAL(rW6, rD1, 3, 0)
296 LAH(rW5, rD1, 2, 4)
297 LAH(rW7, rD2, 3, 0)
298 LAL(rW7, rD3, 3, 0)
299 LAL(rW3, rD1, 1, 8)
300 evldw rD1,16(rKP)
301 EAD(rD0, 0)
302 evxor rW4,rW4,rW6
303 LWL(rW1, 12)
304 evxor rW0,rW0,rW4
305 EAD(rD2, 2)
306 evxor rW0,rW0,rW2
307 LWL(rW5, 4)
308 evxor rD1,rD1,rW0
309 evldw rD3,24(rKP)
310 evmergehi rD0,rD0,rD1
311 DAD(rD1, 0)
312 evxor rW3,rW3,rW7
313 LBD(rW0)
314 evxor rW3,rW3,rW1
315 DAD(rD0, 1)
316 evxor rD3,rD3,rW3
317 LBD(rW6)
318 evxor rD3,rD3,rW5
319 DAD(rD0, 0)
320 evmergehi rD2,rD2,rD3
321 LBD(rW3)
322 LAD(rW2, rD3, 0)
323 LAD(rW1, rD2, 0)
324 LAD(rW4, rD2, 1)
325 LAD(rW5, rD3, 1)
326 LAD(rW7, rD1, 1)
327 rlwimi rW0,rW4,8,16,23
328 rlwimi rW1,rW5,8,16,23
329 LAD(rW4, rD3, 2)
330 LAD(rW5, rD0, 2)
331 rlwimi rW2,rW6,8,16,23
332 rlwimi rW3,rW7,8,16,23
333 LAD(rW6, rD1, 2)
334 LAD(rW7, rD2, 2)
335 rlwimi rW0,rW4,16,8,15
336 rlwimi rW1,rW5,16,8,15
337 LAD(rW4, rD0, 3)
338 LAD(rW5, rD1, 3)
339 rlwimi rW2,rW6,16,8,15
340 lwz rD0,32(rKP)
341 rlwimi rW3,rW7,16,8,15
342 lwz rD1,36(rKP)
343 LAD(rW6, rD2, 3)
344 LAD(rW7, rD3, 3)
345 rlwimi rW0,rW4,24,0,7
346 lwz rD2,40(rKP)
347 rlwimi rW1,rW5,24,0,7
348 lwz rD3,44(rKP)
349 rlwimi rW2,rW6,24,0,7
350 rlwimi rW3,rW7,24,0,7
351 blr
diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c
new file mode 100644
index 000000000000..bd5e63f72ad4
--- /dev/null
+++ b/arch/powerpc/crypto/aes-spe-glue.c
@@ -0,0 +1,512 @@
1/*
2 * Glue code for AES implementation for SPE instructions (PPC)
3 *
4 * Based on generic implementation. The assembler module takes care
5 * about the SPE registers so it can run from interrupt context.
6 *
7 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License as published by the Free
11 * Software Foundation; either version 2 of the License, or (at your option)
12 * any later version.
13 *
14 */
15
16#include <crypto/aes.h>
17#include <linux/module.h>
18#include <linux/init.h>
19#include <linux/types.h>
20#include <linux/errno.h>
21#include <linux/crypto.h>
22#include <asm/byteorder.h>
23#include <asm/switch_to.h>
24#include <crypto/algapi.h>
25
26/*
27 * MAX_BYTES defines the number of bytes that are allowed to be processed
28 * between preempt_disable() and preempt_enable(). e500 cores can issue two
29 * instructions per clock cycle using one 32/64 bit unit (SU1) and one 32
30 * bit unit (SU2). One of these can be a memory access that is executed via
31 * a single load and store unit (LSU). XTS-AES-256 takes ~780 operations per
32 * 16 byte block block or 25 cycles per byte. Thus 768 bytes of input data
33 * will need an estimated maximum of 20,000 cycles. Headroom for cache misses
34 * included. Even with the low end model clocked at 667 MHz this equals to a
35 * critical time window of less than 30us. The value has been choosen to
36 * process a 512 byte disk block in one or a large 1400 bytes IPsec network
37 * packet in two runs.
38 *
39 */
40#define MAX_BYTES 768
41
42struct ppc_aes_ctx {
43 u32 key_enc[AES_MAX_KEYLENGTH_U32];
44 u32 key_dec[AES_MAX_KEYLENGTH_U32];
45 u32 rounds;
46};
47
48struct ppc_xts_ctx {
49 u32 key_enc[AES_MAX_KEYLENGTH_U32];
50 u32 key_dec[AES_MAX_KEYLENGTH_U32];
51 u32 key_twk[AES_MAX_KEYLENGTH_U32];
52 u32 rounds;
53};
54
55extern void ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc, u32 rounds);
56extern void ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec, u32 rounds);
57extern void ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
58 u32 bytes);
59extern void ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
60 u32 bytes);
61extern void ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
62 u32 bytes, u8 *iv);
63extern void ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
64 u32 bytes, u8 *iv);
65extern void ppc_crypt_ctr (u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
66 u32 bytes, u8 *iv);
67extern void ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
68 u32 bytes, u8 *iv, u32 *key_twk);
69extern void ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
70 u32 bytes, u8 *iv, u32 *key_twk);
71
72extern void ppc_expand_key_128(u32 *key_enc, const u8 *key);
73extern void ppc_expand_key_192(u32 *key_enc, const u8 *key);
74extern void ppc_expand_key_256(u32 *key_enc, const u8 *key);
75
76extern void ppc_generate_decrypt_key(u32 *key_dec,u32 *key_enc,
77 unsigned int key_len);
78
79static void spe_begin(void)
80{
81 /* disable preemption and save users SPE registers if required */
82 preempt_disable();
83 enable_kernel_spe();
84}
85
86static void spe_end(void)
87{
88 /* reenable preemption */
89 preempt_enable();
90}
91
92static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
93 unsigned int key_len)
94{
95 struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
96
97 if (key_len != AES_KEYSIZE_128 &&
98 key_len != AES_KEYSIZE_192 &&
99 key_len != AES_KEYSIZE_256) {
100 tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
101 return -EINVAL;
102 }
103
104 switch (key_len) {
105 case AES_KEYSIZE_128:
106 ctx->rounds = 4;
107 ppc_expand_key_128(ctx->key_enc, in_key);
108 break;
109 case AES_KEYSIZE_192:
110 ctx->rounds = 5;
111 ppc_expand_key_192(ctx->key_enc, in_key);
112 break;
113 case AES_KEYSIZE_256:
114 ctx->rounds = 6;
115 ppc_expand_key_256(ctx->key_enc, in_key);
116 break;
117 }
118
119 ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
120
121 return 0;
122}
123
124static int ppc_xts_setkey(struct crypto_tfm *tfm, const u8 *in_key,
125 unsigned int key_len)
126{
127 struct ppc_xts_ctx *ctx = crypto_tfm_ctx(tfm);
128
129 key_len >>= 1;
130
131 if (key_len != AES_KEYSIZE_128 &&
132 key_len != AES_KEYSIZE_192 &&
133 key_len != AES_KEYSIZE_256) {
134 tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
135 return -EINVAL;
136 }
137
138 switch (key_len) {
139 case AES_KEYSIZE_128:
140 ctx->rounds = 4;
141 ppc_expand_key_128(ctx->key_enc, in_key);
142 ppc_expand_key_128(ctx->key_twk, in_key + AES_KEYSIZE_128);
143 break;
144 case AES_KEYSIZE_192:
145 ctx->rounds = 5;
146 ppc_expand_key_192(ctx->key_enc, in_key);
147 ppc_expand_key_192(ctx->key_twk, in_key + AES_KEYSIZE_192);
148 break;
149 case AES_KEYSIZE_256:
150 ctx->rounds = 6;
151 ppc_expand_key_256(ctx->key_enc, in_key);
152 ppc_expand_key_256(ctx->key_twk, in_key + AES_KEYSIZE_256);
153 break;
154 }
155
156 ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
157
158 return 0;
159}
160
161static void ppc_aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
162{
163 struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
164
165 spe_begin();
166 ppc_encrypt_aes(out, in, ctx->key_enc, ctx->rounds);
167 spe_end();
168}
169
170static void ppc_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
171{
172 struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
173
174 spe_begin();
175 ppc_decrypt_aes(out, in, ctx->key_dec, ctx->rounds);
176 spe_end();
177}
178
179static int ppc_ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
180 struct scatterlist *src, unsigned int nbytes)
181{
182 struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
183 struct blkcipher_walk walk;
184 unsigned int ubytes;
185 int err;
186
187 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
188 blkcipher_walk_init(&walk, dst, src, nbytes);
189 err = blkcipher_walk_virt(desc, &walk);
190
191 while ((nbytes = walk.nbytes)) {
192 ubytes = nbytes > MAX_BYTES ?
193 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
194 nbytes -= ubytes;
195
196 spe_begin();
197 ppc_encrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
198 ctx->key_enc, ctx->rounds, nbytes);
199 spe_end();
200
201 err = blkcipher_walk_done(desc, &walk, ubytes);
202 }
203
204 return err;
205}
206
207static int ppc_ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
208 struct scatterlist *src, unsigned int nbytes)
209{
210 struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
211 struct blkcipher_walk walk;
212 unsigned int ubytes;
213 int err;
214
215 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
216 blkcipher_walk_init(&walk, dst, src, nbytes);
217 err = blkcipher_walk_virt(desc, &walk);
218
219 while ((nbytes = walk.nbytes)) {
220 ubytes = nbytes > MAX_BYTES ?
221 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
222 nbytes -= ubytes;
223
224 spe_begin();
225 ppc_decrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
226 ctx->key_dec, ctx->rounds, nbytes);
227 spe_end();
228
229 err = blkcipher_walk_done(desc, &walk, ubytes);
230 }
231
232 return err;
233}
234
235static int ppc_cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
236 struct scatterlist *src, unsigned int nbytes)
237{
238 struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
239 struct blkcipher_walk walk;
240 unsigned int ubytes;
241 int err;
242
243 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
244 blkcipher_walk_init(&walk, dst, src, nbytes);
245 err = blkcipher_walk_virt(desc, &walk);
246
247 while ((nbytes = walk.nbytes)) {
248 ubytes = nbytes > MAX_BYTES ?
249 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
250 nbytes -= ubytes;
251
252 spe_begin();
253 ppc_encrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
254 ctx->key_enc, ctx->rounds, nbytes, walk.iv);
255 spe_end();
256
257 err = blkcipher_walk_done(desc, &walk, ubytes);
258 }
259
260 return err;
261}
262
263static int ppc_cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
264 struct scatterlist *src, unsigned int nbytes)
265{
266 struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
267 struct blkcipher_walk walk;
268 unsigned int ubytes;
269 int err;
270
271 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
272 blkcipher_walk_init(&walk, dst, src, nbytes);
273 err = blkcipher_walk_virt(desc, &walk);
274
275 while ((nbytes = walk.nbytes)) {
276 ubytes = nbytes > MAX_BYTES ?
277 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
278 nbytes -= ubytes;
279
280 spe_begin();
281 ppc_decrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
282 ctx->key_dec, ctx->rounds, nbytes, walk.iv);
283 spe_end();
284
285 err = blkcipher_walk_done(desc, &walk, ubytes);
286 }
287
288 return err;
289}
290
291static int ppc_ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
292 struct scatterlist *src, unsigned int nbytes)
293{
294 struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
295 struct blkcipher_walk walk;
296 unsigned int pbytes, ubytes;
297 int err;
298
299 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
300 blkcipher_walk_init(&walk, dst, src, nbytes);
301 err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
302
303 while ((pbytes = walk.nbytes)) {
304 pbytes = pbytes > MAX_BYTES ? MAX_BYTES : pbytes;
305 pbytes = pbytes == nbytes ?
306 nbytes : pbytes & ~(AES_BLOCK_SIZE - 1);
307 ubytes = walk.nbytes - pbytes;
308
309 spe_begin();
310 ppc_crypt_ctr(walk.dst.virt.addr, walk.src.virt.addr,
311 ctx->key_enc, ctx->rounds, pbytes , walk.iv);
312 spe_end();
313
314 nbytes -= pbytes;
315 err = blkcipher_walk_done(desc, &walk, ubytes);
316 }
317
318 return err;
319}
320
321static int ppc_xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
322 struct scatterlist *src, unsigned int nbytes)
323{
324 struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
325 struct blkcipher_walk walk;
326 unsigned int ubytes;
327 int err;
328 u32 *twk;
329
330 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
331 blkcipher_walk_init(&walk, dst, src, nbytes);
332 err = blkcipher_walk_virt(desc, &walk);
333 twk = ctx->key_twk;
334
335 while ((nbytes = walk.nbytes)) {
336 ubytes = nbytes > MAX_BYTES ?
337 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
338 nbytes -= ubytes;
339
340 spe_begin();
341 ppc_encrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
342 ctx->key_enc, ctx->rounds, nbytes, walk.iv, twk);
343 spe_end();
344
345 twk = NULL;
346 err = blkcipher_walk_done(desc, &walk, ubytes);
347 }
348
349 return err;
350}
351
352static int ppc_xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
353 struct scatterlist *src, unsigned int nbytes)
354{
355 struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
356 struct blkcipher_walk walk;
357 unsigned int ubytes;
358 int err;
359 u32 *twk;
360
361 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
362 blkcipher_walk_init(&walk, dst, src, nbytes);
363 err = blkcipher_walk_virt(desc, &walk);
364 twk = ctx->key_twk;
365
366 while ((nbytes = walk.nbytes)) {
367 ubytes = nbytes > MAX_BYTES ?
368 nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
369 nbytes -= ubytes;
370
371 spe_begin();
372 ppc_decrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
373 ctx->key_dec, ctx->rounds, nbytes, walk.iv, twk);
374 spe_end();
375
376 twk = NULL;
377 err = blkcipher_walk_done(desc, &walk, ubytes);
378 }
379
380 return err;
381}
382
383/*
384 * Algorithm definitions. Disabling alignment (cra_alignmask=0) was chosen
385 * because the e500 platform can handle unaligned reads/writes very efficently.
386 * This improves IPsec thoughput by another few percent. Additionally we assume
387 * that AES context is always aligned to at least 8 bytes because it is created
388 * with kmalloc() in the crypto infrastructure
389 *
390 */
391static struct crypto_alg aes_algs[] = { {
392 .cra_name = "aes",
393 .cra_driver_name = "aes-ppc-spe",
394 .cra_priority = 300,
395 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
396 .cra_blocksize = AES_BLOCK_SIZE,
397 .cra_ctxsize = sizeof(struct ppc_aes_ctx),
398 .cra_alignmask = 0,
399 .cra_module = THIS_MODULE,
400 .cra_u = {
401 .cipher = {
402 .cia_min_keysize = AES_MIN_KEY_SIZE,
403 .cia_max_keysize = AES_MAX_KEY_SIZE,
404 .cia_setkey = ppc_aes_setkey,
405 .cia_encrypt = ppc_aes_encrypt,
406 .cia_decrypt = ppc_aes_decrypt
407 }
408 }
409}, {
410 .cra_name = "ecb(aes)",
411 .cra_driver_name = "ecb-ppc-spe",
412 .cra_priority = 300,
413 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
414 .cra_blocksize = AES_BLOCK_SIZE,
415 .cra_ctxsize = sizeof(struct ppc_aes_ctx),
416 .cra_alignmask = 0,
417 .cra_type = &crypto_blkcipher_type,
418 .cra_module = THIS_MODULE,
419 .cra_u = {
420 .blkcipher = {
421 .min_keysize = AES_MIN_KEY_SIZE,
422 .max_keysize = AES_MAX_KEY_SIZE,
423 .ivsize = AES_BLOCK_SIZE,
424 .setkey = ppc_aes_setkey,
425 .encrypt = ppc_ecb_encrypt,
426 .decrypt = ppc_ecb_decrypt,
427 }
428 }
429}, {
430 .cra_name = "cbc(aes)",
431 .cra_driver_name = "cbc-ppc-spe",
432 .cra_priority = 300,
433 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
434 .cra_blocksize = AES_BLOCK_SIZE,
435 .cra_ctxsize = sizeof(struct ppc_aes_ctx),
436 .cra_alignmask = 0,
437 .cra_type = &crypto_blkcipher_type,
438 .cra_module = THIS_MODULE,
439 .cra_u = {
440 .blkcipher = {
441 .min_keysize = AES_MIN_KEY_SIZE,
442 .max_keysize = AES_MAX_KEY_SIZE,
443 .ivsize = AES_BLOCK_SIZE,
444 .setkey = ppc_aes_setkey,
445 .encrypt = ppc_cbc_encrypt,
446 .decrypt = ppc_cbc_decrypt,
447 }
448 }
449}, {
450 .cra_name = "ctr(aes)",
451 .cra_driver_name = "ctr-ppc-spe",
452 .cra_priority = 300,
453 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
454 .cra_blocksize = 1,
455 .cra_ctxsize = sizeof(struct ppc_aes_ctx),
456 .cra_alignmask = 0,
457 .cra_type = &crypto_blkcipher_type,
458 .cra_module = THIS_MODULE,
459 .cra_u = {
460 .blkcipher = {
461 .min_keysize = AES_MIN_KEY_SIZE,
462 .max_keysize = AES_MAX_KEY_SIZE,
463 .ivsize = AES_BLOCK_SIZE,
464 .setkey = ppc_aes_setkey,
465 .encrypt = ppc_ctr_crypt,
466 .decrypt = ppc_ctr_crypt,
467 }
468 }
469}, {
470 .cra_name = "xts(aes)",
471 .cra_driver_name = "xts-ppc-spe",
472 .cra_priority = 300,
473 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
474 .cra_blocksize = AES_BLOCK_SIZE,
475 .cra_ctxsize = sizeof(struct ppc_xts_ctx),
476 .cra_alignmask = 0,
477 .cra_type = &crypto_blkcipher_type,
478 .cra_module = THIS_MODULE,
479 .cra_u = {
480 .blkcipher = {
481 .min_keysize = AES_MIN_KEY_SIZE * 2,
482 .max_keysize = AES_MAX_KEY_SIZE * 2,
483 .ivsize = AES_BLOCK_SIZE,
484 .setkey = ppc_xts_setkey,
485 .encrypt = ppc_xts_encrypt,
486 .decrypt = ppc_xts_decrypt,
487 }
488 }
489} };
490
491static int __init ppc_aes_mod_init(void)
492{
493 return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
494}
495
496static void __exit ppc_aes_mod_fini(void)
497{
498 crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
499}
500
501module_init(ppc_aes_mod_init);
502module_exit(ppc_aes_mod_fini);
503
504MODULE_LICENSE("GPL");
505MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS, SPE optimized");
506
507MODULE_ALIAS_CRYPTO("aes");
508MODULE_ALIAS_CRYPTO("ecb(aes)");
509MODULE_ALIAS_CRYPTO("cbc(aes)");
510MODULE_ALIAS_CRYPTO("ctr(aes)");
511MODULE_ALIAS_CRYPTO("xts(aes)");
512MODULE_ALIAS_CRYPTO("aes-ppc-spe");
diff --git a/arch/powerpc/crypto/aes-spe-keys.S b/arch/powerpc/crypto/aes-spe-keys.S
new file mode 100644
index 000000000000..be8090f3d700
--- /dev/null
+++ b/arch/powerpc/crypto/aes-spe-keys.S
@@ -0,0 +1,283 @@
1/*
2 * Key handling functions for PPC AES implementation
3 *
4 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation; either version 2 of the License, or (at your option)
9 * any later version.
10 *
11 */
12
13#include <asm/ppc_asm.h>
14
15#ifdef __BIG_ENDIAN__
16#define LOAD_KEY(d, s, off) \
17 lwz d,off(s);
18#else
19#define LOAD_KEY(d, s, off) \
20 li r0,off; \
21 lwbrx d,s,r0;
22#endif
23
24#define INITIALIZE_KEY \
25 stwu r1,-32(r1); /* create stack frame */ \
26 stw r14,8(r1); /* save registers */ \
27 stw r15,12(r1); \
28 stw r16,16(r1);
29
30#define FINALIZE_KEY \
31 lwz r14,8(r1); /* restore registers */ \
32 lwz r15,12(r1); \
33 lwz r16,16(r1); \
34 xor r5,r5,r5; /* clear sensitive data */ \
35 xor r6,r6,r6; \
36 xor r7,r7,r7; \
37 xor r8,r8,r8; \
38 xor r9,r9,r9; \
39 xor r10,r10,r10; \
40 xor r11,r11,r11; \
41 xor r12,r12,r12; \
42 addi r1,r1,32; /* cleanup stack */
43
44#define LS_BOX(r, t1, t2) \
45 lis t2,PPC_AES_4K_ENCTAB@h; \
46 ori t2,t2,PPC_AES_4K_ENCTAB@l; \
47 rlwimi t2,r,4,20,27; \
48 lbz t1,8(t2); \
49 rlwimi r,t1,0,24,31; \
50 rlwimi t2,r,28,20,27; \
51 lbz t1,8(t2); \
52 rlwimi r,t1,8,16,23; \
53 rlwimi t2,r,20,20,27; \
54 lbz t1,8(t2); \
55 rlwimi r,t1,16,8,15; \
56 rlwimi t2,r,12,20,27; \
57 lbz t1,8(t2); \
58 rlwimi r,t1,24,0,7;
59
60#define GF8_MUL(out, in, t1, t2) \
61 lis t1,0x8080; /* multiplication in GF8 */ \
62 ori t1,t1,0x8080; \
63 and t1,t1,in; \
64 srwi t1,t1,7; \
65 mulli t1,t1,0x1b; \
66 lis t2,0x7f7f; \
67 ori t2,t2,0x7f7f; \
68 and t2,t2,in; \
69 slwi t2,t2,1; \
70 xor out,t1,t2;
71
72/*
73 * ppc_expand_key_128(u32 *key_enc, const u8 *key)
74 *
75 * Expand 128 bit key into 176 bytes encryption key. It consists of
76 * key itself plus 10 rounds with 16 bytes each
77 *
78 */
79_GLOBAL(ppc_expand_key_128)
80 INITIALIZE_KEY
81 LOAD_KEY(r5,r4,0)
82 LOAD_KEY(r6,r4,4)
83 LOAD_KEY(r7,r4,8)
84 LOAD_KEY(r8,r4,12)
85 stw r5,0(r3) /* key[0..3] = input data */
86 stw r6,4(r3)
87 stw r7,8(r3)
88 stw r8,12(r3)
89 li r16,10 /* 10 expansion rounds */
90 lis r0,0x0100 /* RCO(1) */
91ppc_expand_128_loop:
92 addi r3,r3,16
93 mr r14,r8 /* apply LS_BOX to 4th temp */
94 rotlwi r14,r14,8
95 LS_BOX(r14, r15, r4)
96 xor r14,r14,r0
97 xor r5,r5,r14 /* xor next 4 keys */
98 xor r6,r6,r5
99 xor r7,r7,r6
100 xor r8,r8,r7
101 stw r5,0(r3) /* store next 4 keys */
102 stw r6,4(r3)
103 stw r7,8(r3)
104 stw r8,12(r3)
105 GF8_MUL(r0, r0, r4, r14) /* multiply RCO by 2 in GF */
106 subi r16,r16,1
107 cmpwi r16,0
108 bt eq,ppc_expand_128_end
109 b ppc_expand_128_loop
110ppc_expand_128_end:
111 FINALIZE_KEY
112 blr
113
114/*
115 * ppc_expand_key_192(u32 *key_enc, const u8 *key)
116 *
117 * Expand 192 bit key into 208 bytes encryption key. It consists of key
118 * itself plus 12 rounds with 16 bytes each
119 *
120 */
121_GLOBAL(ppc_expand_key_192)
122 INITIALIZE_KEY
123 LOAD_KEY(r5,r4,0)
124 LOAD_KEY(r6,r4,4)
125 LOAD_KEY(r7,r4,8)
126 LOAD_KEY(r8,r4,12)
127 LOAD_KEY(r9,r4,16)
128 LOAD_KEY(r10,r4,20)
129 stw r5,0(r3)
130 stw r6,4(r3)
131 stw r7,8(r3)
132 stw r8,12(r3)
133 stw r9,16(r3)
134 stw r10,20(r3)
135 li r16,8 /* 8 expansion rounds */
136 lis r0,0x0100 /* RCO(1) */
137ppc_expand_192_loop:
138 addi r3,r3,24
139 mr r14,r10 /* apply LS_BOX to 6th temp */
140 rotlwi r14,r14,8
141 LS_BOX(r14, r15, r4)
142 xor r14,r14,r0
143 xor r5,r5,r14 /* xor next 6 keys */
144 xor r6,r6,r5
145 xor r7,r7,r6
146 xor r8,r8,r7
147 xor r9,r9,r8
148 xor r10,r10,r9
149 stw r5,0(r3)
150 stw r6,4(r3)
151 stw r7,8(r3)
152 stw r8,12(r3)
153 subi r16,r16,1
154 cmpwi r16,0 /* last round early kick out */
155 bt eq,ppc_expand_192_end
156 stw r9,16(r3)
157 stw r10,20(r3)
158 GF8_MUL(r0, r0, r4, r14) /* multiply RCO GF8 */
159 b ppc_expand_192_loop
160ppc_expand_192_end:
161 FINALIZE_KEY
162 blr
163
164/*
165 * ppc_expand_key_256(u32 *key_enc, const u8 *key)
166 *
167 * Expand 256 bit key into 240 bytes encryption key. It consists of key
168 * itself plus 14 rounds with 16 bytes each
169 *
170 */
171_GLOBAL(ppc_expand_key_256)
172 INITIALIZE_KEY
173 LOAD_KEY(r5,r4,0)
174 LOAD_KEY(r6,r4,4)
175 LOAD_KEY(r7,r4,8)
176 LOAD_KEY(r8,r4,12)
177 LOAD_KEY(r9,r4,16)
178 LOAD_KEY(r10,r4,20)
179 LOAD_KEY(r11,r4,24)
180 LOAD_KEY(r12,r4,28)
181 stw r5,0(r3)
182 stw r6,4(r3)
183 stw r7,8(r3)
184 stw r8,12(r3)
185 stw r9,16(r3)
186 stw r10,20(r3)
187 stw r11,24(r3)
188 stw r12,28(r3)
189 li r16,7 /* 7 expansion rounds */
190 lis r0,0x0100 /* RCO(1) */
191ppc_expand_256_loop:
192 addi r3,r3,32
193 mr r14,r12 /* apply LS_BOX to 8th temp */
194 rotlwi r14,r14,8
195 LS_BOX(r14, r15, r4)
196 xor r14,r14,r0
197 xor r5,r5,r14 /* xor 4 keys */
198 xor r6,r6,r5
199 xor r7,r7,r6
200 xor r8,r8,r7
201 mr r14,r8
202 LS_BOX(r14, r15, r4) /* apply LS_BOX to 4th temp */
203 xor r9,r9,r14 /* xor 4 keys */
204 xor r10,r10,r9
205 xor r11,r11,r10
206 xor r12,r12,r11
207 stw r5,0(r3)
208 stw r6,4(r3)
209 stw r7,8(r3)
210 stw r8,12(r3)
211 subi r16,r16,1
212 cmpwi r16,0 /* last round early kick out */
213 bt eq,ppc_expand_256_end
214 stw r9,16(r3)
215 stw r10,20(r3)
216 stw r11,24(r3)
217 stw r12,28(r3)
218 GF8_MUL(r0, r0, r4, r14)
219 b ppc_expand_256_loop
220ppc_expand_256_end:
221 FINALIZE_KEY
222 blr
223
224/*
225 * ppc_generate_decrypt_key: derive decryption key from encryption key
226 * number of bytes to handle are calculated from length of key (16/24/32)
227 *
228 */
229_GLOBAL(ppc_generate_decrypt_key)
230 addi r6,r5,24
231 slwi r6,r6,2
232 lwzx r7,r4,r6 /* first/last 4 words are same */
233 stw r7,0(r3)
234 lwz r7,0(r4)
235 stwx r7,r3,r6
236 addi r6,r6,4
237 lwzx r7,r4,r6
238 stw r7,4(r3)
239 lwz r7,4(r4)
240 stwx r7,r3,r6
241 addi r6,r6,4
242 lwzx r7,r4,r6
243 stw r7,8(r3)
244 lwz r7,8(r4)
245 stwx r7,r3,r6
246 addi r6,r6,4
247 lwzx r7,r4,r6
248 stw r7,12(r3)
249 lwz r7,12(r4)
250 stwx r7,r3,r6
251 addi r3,r3,16
252 add r4,r4,r6
253 subi r4,r4,28
254 addi r5,r5,20
255 srwi r5,r5,2
256ppc_generate_decrypt_block:
257 li r6,4
258 mtctr r6
259ppc_generate_decrypt_word:
260 lwz r6,0(r4)
261 GF8_MUL(r7, r6, r0, r7)
262 GF8_MUL(r8, r7, r0, r8)
263 GF8_MUL(r9, r8, r0, r9)
264 xor r10,r9,r6
265 xor r11,r7,r8
266 xor r11,r11,r9
267 xor r12,r7,r10
268 rotrwi r12,r12,24
269 xor r11,r11,r12
270 xor r12,r8,r10
271 rotrwi r12,r12,16
272 xor r11,r11,r12
273 rotrwi r12,r10,8
274 xor r11,r11,r12
275 stw r11,0(r3)
276 addi r3,r3,4
277 addi r4,r4,4
278 bdnz ppc_generate_decrypt_word
279 subi r4,r4,32
280 subi r5,r5,1
281 cmpwi r5,0
282 bt gt,ppc_generate_decrypt_block
283 blr
diff --git a/arch/powerpc/crypto/aes-spe-modes.S b/arch/powerpc/crypto/aes-spe-modes.S
new file mode 100644
index 000000000000..ad48032ca8e0
--- /dev/null
+++ b/arch/powerpc/crypto/aes-spe-modes.S
@@ -0,0 +1,630 @@
1/*
2 * AES modes (ECB/CBC/CTR/XTS) for PPC AES implementation
3 *
4 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation; either version 2 of the License, or (at your option)
9 * any later version.
10 *
11 */
12
13#include <asm/ppc_asm.h>
14#include "aes-spe-regs.h"
15
16#ifdef __BIG_ENDIAN__ /* Macros for big endian builds */
17
18#define LOAD_DATA(reg, off) \
19 lwz reg,off(rSP); /* load with offset */
20#define SAVE_DATA(reg, off) \
21 stw reg,off(rDP); /* save with offset */
22#define NEXT_BLOCK \
23 addi rSP,rSP,16; /* increment pointers per bloc */ \
24 addi rDP,rDP,16;
25#define LOAD_IV(reg, off) \
26 lwz reg,off(rIP); /* IV loading with offset */
27#define SAVE_IV(reg, off) \
28 stw reg,off(rIP); /* IV saving with offset */
29#define START_IV /* nothing to reset */
30#define CBC_DEC 16 /* CBC decrement per block */
31#define CTR_DEC 1 /* CTR decrement one byte */
32
33#else /* Macros for little endian */
34
35#define LOAD_DATA(reg, off) \
36 lwbrx reg,0,rSP; /* load reversed */ \
37 addi rSP,rSP,4; /* and increment pointer */
38#define SAVE_DATA(reg, off) \
39 stwbrx reg,0,rDP; /* save reversed */ \
40 addi rDP,rDP,4; /* and increment pointer */
41#define NEXT_BLOCK /* nothing todo */
42#define LOAD_IV(reg, off) \
43 lwbrx reg,0,rIP; /* load reversed */ \
44 addi rIP,rIP,4; /* and increment pointer */
45#define SAVE_IV(reg, off) \
46 stwbrx reg,0,rIP; /* load reversed */ \
47 addi rIP,rIP,4; /* and increment pointer */
48#define START_IV \
49 subi rIP,rIP,16; /* must reset pointer */
50#define CBC_DEC 32 /* 2 blocks because of incs */
51#define CTR_DEC 17 /* 1 block because of incs */
52
53#endif
54
55#define SAVE_0_REGS
56#define LOAD_0_REGS
57
58#define SAVE_4_REGS \
59 stw rI0,96(r1); /* save 32 bit registers */ \
60 stw rI1,100(r1); \
61 stw rI2,104(r1); \
62 stw rI3,108(r1);
63
64#define LOAD_4_REGS \
65 lwz rI0,96(r1); /* restore 32 bit registers */ \
66 lwz rI1,100(r1); \
67 lwz rI2,104(r1); \
68 lwz rI3,108(r1);
69
70#define SAVE_8_REGS \
71 SAVE_4_REGS \
72 stw rG0,112(r1); /* save 32 bit registers */ \
73 stw rG1,116(r1); \
74 stw rG2,120(r1); \
75 stw rG3,124(r1);
76
77#define LOAD_8_REGS \
78 LOAD_4_REGS \
79 lwz rG0,112(r1); /* restore 32 bit registers */ \
80 lwz rG1,116(r1); \
81 lwz rG2,120(r1); \
82 lwz rG3,124(r1);
83
84#define INITIALIZE_CRYPT(tab,nr32bitregs) \
85 mflr r0; \
86 stwu r1,-160(r1); /* create stack frame */ \
87 lis rT0,tab@h; /* en-/decryption table pointer */ \
88 stw r0,8(r1); /* save link register */ \
89 ori rT0,rT0,tab@l; \
90 evstdw r14,16(r1); \
91 mr rKS,rKP; \
92 evstdw r15,24(r1); /* We must save non volatile */ \
93 evstdw r16,32(r1); /* registers. Take the chance */ \
94 evstdw r17,40(r1); /* and save the SPE part too */ \
95 evstdw r18,48(r1); \
96 evstdw r19,56(r1); \
97 evstdw r20,64(r1); \
98 evstdw r21,72(r1); \
99 evstdw r22,80(r1); \
100 evstdw r23,88(r1); \
101 SAVE_##nr32bitregs##_REGS
102
103#define FINALIZE_CRYPT(nr32bitregs) \
104 lwz r0,8(r1); \
105 evldw r14,16(r1); /* restore SPE registers */ \
106 evldw r15,24(r1); \
107 evldw r16,32(r1); \
108 evldw r17,40(r1); \
109 evldw r18,48(r1); \
110 evldw r19,56(r1); \
111 evldw r20,64(r1); \
112 evldw r21,72(r1); \
113 evldw r22,80(r1); \
114 evldw r23,88(r1); \
115 LOAD_##nr32bitregs##_REGS \
116 mtlr r0; /* restore link register */ \
117 xor r0,r0,r0; \
118 stw r0,16(r1); /* delete sensitive data */ \
119 stw r0,24(r1); /* that we might have pushed */ \
120 stw r0,32(r1); /* from other context that runs */ \
121 stw r0,40(r1); /* the same code */ \
122 stw r0,48(r1); \
123 stw r0,56(r1); \
124 stw r0,64(r1); \
125 stw r0,72(r1); \
126 stw r0,80(r1); \
127 stw r0,88(r1); \
128 addi r1,r1,160; /* cleanup stack frame */
129
130#define ENDIAN_SWAP(t0, t1, s0, s1) \
131 rotrwi t0,s0,8; /* swap endianness for 2 GPRs */ \
132 rotrwi t1,s1,8; \
133 rlwimi t0,s0,8,8,15; \
134 rlwimi t1,s1,8,8,15; \
135 rlwimi t0,s0,8,24,31; \
136 rlwimi t1,s1,8,24,31;
137
138#define GF128_MUL(d0, d1, d2, d3, t0) \
139 li t0,0x87; /* multiplication in GF128 */ \
140 cmpwi d3,-1; \
141 iselgt t0,0,t0; \
142 rlwimi d3,d2,0,0,0; /* propagate "carry" bits */ \
143 rotlwi d3,d3,1; \
144 rlwimi d2,d1,0,0,0; \
145 rotlwi d2,d2,1; \
146 rlwimi d1,d0,0,0,0; \
147 slwi d0,d0,1; /* shift left 128 bit */ \
148 rotlwi d1,d1,1; \
149 xor d0,d0,t0;
150
151#define START_KEY(d0, d1, d2, d3) \
152 lwz rW0,0(rKP); \
153 mtctr rRR; \
154 lwz rW1,4(rKP); \
155 lwz rW2,8(rKP); \
156 lwz rW3,12(rKP); \
157 xor rD0,d0,rW0; \
158 xor rD1,d1,rW1; \
159 xor rD2,d2,rW2; \
160 xor rD3,d3,rW3;
161
162/*
163 * ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc,
164 * u32 rounds)
165 *
166 * called from glue layer to encrypt a single 16 byte block
167 * round values are AES128 = 4, AES192 = 5, AES256 = 6
168 *
169 */
170_GLOBAL(ppc_encrypt_aes)
171 INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
172 LOAD_DATA(rD0, 0)
173 LOAD_DATA(rD1, 4)
174 LOAD_DATA(rD2, 8)
175 LOAD_DATA(rD3, 12)
176 START_KEY(rD0, rD1, rD2, rD3)
177 bl ppc_encrypt_block
178 xor rD0,rD0,rW0
179 SAVE_DATA(rD0, 0)
180 xor rD1,rD1,rW1
181 SAVE_DATA(rD1, 4)
182 xor rD2,rD2,rW2
183 SAVE_DATA(rD2, 8)
184 xor rD3,rD3,rW3
185 SAVE_DATA(rD3, 12)
186 FINALIZE_CRYPT(0)
187 blr
188
189/*
190 * ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec,
191 * u32 rounds)
192 *
193 * called from glue layer to decrypt a single 16 byte block
194 * round values are AES128 = 4, AES192 = 5, AES256 = 6
195 *
196 */
197_GLOBAL(ppc_decrypt_aes)
198 INITIALIZE_CRYPT(PPC_AES_4K_DECTAB,0)
199 LOAD_DATA(rD0, 0)
200 addi rT1,rT0,4096
201 LOAD_DATA(rD1, 4)
202 LOAD_DATA(rD2, 8)
203 LOAD_DATA(rD3, 12)
204 START_KEY(rD0, rD1, rD2, rD3)
205 bl ppc_decrypt_block
206 xor rD0,rD0,rW0
207 SAVE_DATA(rD0, 0)
208 xor rD1,rD1,rW1
209 SAVE_DATA(rD1, 4)
210 xor rD2,rD2,rW2
211 SAVE_DATA(rD2, 8)
212 xor rD3,rD3,rW3
213 SAVE_DATA(rD3, 12)
214 FINALIZE_CRYPT(0)
215 blr
216
217/*
218 * ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc,
219 * u32 rounds, u32 bytes);
220 *
221 * called from glue layer to encrypt multiple blocks via ECB
222 * Bytes must be larger or equal 16 and only whole blocks are
223 * processed. round values are AES128 = 4, AES192 = 5 and
224 * AES256 = 6
225 *
226 */
227_GLOBAL(ppc_encrypt_ecb)
228 INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
229ppc_encrypt_ecb_loop:
230 LOAD_DATA(rD0, 0)
231 mr rKP,rKS
232 LOAD_DATA(rD1, 4)
233 subi rLN,rLN,16
234 LOAD_DATA(rD2, 8)
235 cmpwi rLN,15
236 LOAD_DATA(rD3, 12)
237 START_KEY(rD0, rD1, rD2, rD3)
238 bl ppc_encrypt_block
239 xor rD0,rD0,rW0
240 SAVE_DATA(rD0, 0)
241 xor rD1,rD1,rW1
242 SAVE_DATA(rD1, 4)
243 xor rD2,rD2,rW2
244 SAVE_DATA(rD2, 8)
245 xor rD3,rD3,rW3
246 SAVE_DATA(rD3, 12)
247 NEXT_BLOCK
248 bt gt,ppc_encrypt_ecb_loop
249 FINALIZE_CRYPT(0)
250 blr
251
252/*
253 * ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec,
254 * u32 rounds, u32 bytes);
255 *
256 * called from glue layer to decrypt multiple blocks via ECB
257 * Bytes must be larger or equal 16 and only whole blocks are
258 * processed. round values are AES128 = 4, AES192 = 5 and
259 * AES256 = 6
260 *
261 */
262_GLOBAL(ppc_decrypt_ecb)
263 INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 0)
264 addi rT1,rT0,4096
265ppc_decrypt_ecb_loop:
266 LOAD_DATA(rD0, 0)
267 mr rKP,rKS
268 LOAD_DATA(rD1, 4)
269 subi rLN,rLN,16
270 LOAD_DATA(rD2, 8)
271 cmpwi rLN,15
272 LOAD_DATA(rD3, 12)
273 START_KEY(rD0, rD1, rD2, rD3)
274 bl ppc_decrypt_block
275 xor rD0,rD0,rW0
276 SAVE_DATA(rD0, 0)
277 xor rD1,rD1,rW1
278 SAVE_DATA(rD1, 4)
279 xor rD2,rD2,rW2
280 SAVE_DATA(rD2, 8)
281 xor rD3,rD3,rW3
282 SAVE_DATA(rD3, 12)
283 NEXT_BLOCK
284 bt gt,ppc_decrypt_ecb_loop
285 FINALIZE_CRYPT(0)
286 blr
287
288/*
289 * ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc,
290 * 32 rounds, u32 bytes, u8 *iv);
291 *
292 * called from glue layer to encrypt multiple blocks via CBC
293 * Bytes must be larger or equal 16 and only whole blocks are
294 * processed. round values are AES128 = 4, AES192 = 5 and
295 * AES256 = 6
296 *
297 */
298_GLOBAL(ppc_encrypt_cbc)
299 INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
300 LOAD_IV(rI0, 0)
301 LOAD_IV(rI1, 4)
302 LOAD_IV(rI2, 8)
303 LOAD_IV(rI3, 12)
304ppc_encrypt_cbc_loop:
305 LOAD_DATA(rD0, 0)
306 mr rKP,rKS
307 LOAD_DATA(rD1, 4)
308 subi rLN,rLN,16
309 LOAD_DATA(rD2, 8)
310 cmpwi rLN,15
311 LOAD_DATA(rD3, 12)
312 xor rD0,rD0,rI0
313 xor rD1,rD1,rI1
314 xor rD2,rD2,rI2
315 xor rD3,rD3,rI3
316 START_KEY(rD0, rD1, rD2, rD3)
317 bl ppc_encrypt_block
318 xor rI0,rD0,rW0
319 SAVE_DATA(rI0, 0)
320 xor rI1,rD1,rW1
321 SAVE_DATA(rI1, 4)
322 xor rI2,rD2,rW2
323 SAVE_DATA(rI2, 8)
324 xor rI3,rD3,rW3
325 SAVE_DATA(rI3, 12)
326 NEXT_BLOCK
327 bt gt,ppc_encrypt_cbc_loop
328 START_IV
329 SAVE_IV(rI0, 0)
330 SAVE_IV(rI1, 4)
331 SAVE_IV(rI2, 8)
332 SAVE_IV(rI3, 12)
333 FINALIZE_CRYPT(4)
334 blr
335
336/*
337 * ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec,
338 * u32 rounds, u32 bytes, u8 *iv);
339 *
340 * called from glue layer to decrypt multiple blocks via CBC
341 * round values are AES128 = 4, AES192 = 5, AES256 = 6
342 *
343 */
344_GLOBAL(ppc_decrypt_cbc)
345 INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 4)
346 li rT1,15
347 LOAD_IV(rI0, 0)
348 andc rLN,rLN,rT1
349 LOAD_IV(rI1, 4)
350 subi rLN,rLN,16
351 LOAD_IV(rI2, 8)
352 add rSP,rSP,rLN /* reverse processing */
353 LOAD_IV(rI3, 12)
354 add rDP,rDP,rLN
355 LOAD_DATA(rD0, 0)
356 addi rT1,rT0,4096
357 LOAD_DATA(rD1, 4)
358 LOAD_DATA(rD2, 8)
359 LOAD_DATA(rD3, 12)
360 START_IV
361 SAVE_IV(rD0, 0)
362 SAVE_IV(rD1, 4)
363 SAVE_IV(rD2, 8)
364 cmpwi rLN,16
365 SAVE_IV(rD3, 12)
366 bt lt,ppc_decrypt_cbc_end
367ppc_decrypt_cbc_loop:
368 mr rKP,rKS
369 START_KEY(rD0, rD1, rD2, rD3)
370 bl ppc_decrypt_block
371 subi rLN,rLN,16
372 subi rSP,rSP,CBC_DEC
373 xor rW0,rD0,rW0
374 LOAD_DATA(rD0, 0)
375 xor rW1,rD1,rW1
376 LOAD_DATA(rD1, 4)
377 xor rW2,rD2,rW2
378 LOAD_DATA(rD2, 8)
379 xor rW3,rD3,rW3
380 LOAD_DATA(rD3, 12)
381 xor rW0,rW0,rD0
382 SAVE_DATA(rW0, 0)
383 xor rW1,rW1,rD1
384 SAVE_DATA(rW1, 4)
385 xor rW2,rW2,rD2
386 SAVE_DATA(rW2, 8)
387 xor rW3,rW3,rD3
388 SAVE_DATA(rW3, 12)
389 cmpwi rLN,15
390 subi rDP,rDP,CBC_DEC
391 bt gt,ppc_decrypt_cbc_loop
392ppc_decrypt_cbc_end:
393 mr rKP,rKS
394 START_KEY(rD0, rD1, rD2, rD3)
395 bl ppc_decrypt_block
396 xor rW0,rW0,rD0
397 xor rW1,rW1,rD1
398 xor rW2,rW2,rD2
399 xor rW3,rW3,rD3
400 xor rW0,rW0,rI0 /* decrypt with initial IV */
401 SAVE_DATA(rW0, 0)
402 xor rW1,rW1,rI1
403 SAVE_DATA(rW1, 4)
404 xor rW2,rW2,rI2
405 SAVE_DATA(rW2, 8)
406 xor rW3,rW3,rI3
407 SAVE_DATA(rW3, 12)
408 FINALIZE_CRYPT(4)
409 blr
410
411/*
412 * ppc_crypt_ctr(u8 *out, const u8 *in, u32 *key_enc,
413 * u32 rounds, u32 bytes, u8 *iv);
414 *
415 * called from glue layer to encrypt/decrypt multiple blocks
416 * via CTR. Number of bytes does not need to be a multiple of
417 * 16. Round values are AES128 = 4, AES192 = 5, AES256 = 6
418 *
419 */
420_GLOBAL(ppc_crypt_ctr)
421 INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
422 LOAD_IV(rI0, 0)
423 LOAD_IV(rI1, 4)
424 LOAD_IV(rI2, 8)
425 cmpwi rLN,16
426 LOAD_IV(rI3, 12)
427 START_IV
428 bt lt,ppc_crypt_ctr_partial
429ppc_crypt_ctr_loop:
430 mr rKP,rKS
431 START_KEY(rI0, rI1, rI2, rI3)
432 bl ppc_encrypt_block
433 xor rW0,rD0,rW0
434 xor rW1,rD1,rW1
435 xor rW2,rD2,rW2
436 xor rW3,rD3,rW3
437 LOAD_DATA(rD0, 0)
438 subi rLN,rLN,16
439 LOAD_DATA(rD1, 4)
440 LOAD_DATA(rD2, 8)
441 LOAD_DATA(rD3, 12)
442 xor rD0,rD0,rW0
443 SAVE_DATA(rD0, 0)
444 xor rD1,rD1,rW1
445 SAVE_DATA(rD1, 4)
446 xor rD2,rD2,rW2
447 SAVE_DATA(rD2, 8)
448 xor rD3,rD3,rW3
449 SAVE_DATA(rD3, 12)
450 addic rI3,rI3,1 /* increase counter */
451 addze rI2,rI2
452 addze rI1,rI1
453 addze rI0,rI0
454 NEXT_BLOCK
455 cmpwi rLN,15
456 bt gt,ppc_crypt_ctr_loop
457ppc_crypt_ctr_partial:
458 cmpwi rLN,0
459 bt eq,ppc_crypt_ctr_end
460 mr rKP,rKS
461 START_KEY(rI0, rI1, rI2, rI3)
462 bl ppc_encrypt_block
463 xor rW0,rD0,rW0
464 SAVE_IV(rW0, 0)
465 xor rW1,rD1,rW1
466 SAVE_IV(rW1, 4)
467 xor rW2,rD2,rW2
468 SAVE_IV(rW2, 8)
469 xor rW3,rD3,rW3
470 SAVE_IV(rW3, 12)
471 mtctr rLN
472 subi rIP,rIP,CTR_DEC
473 subi rSP,rSP,1
474 subi rDP,rDP,1
475ppc_crypt_ctr_xorbyte:
476 lbzu rW4,1(rIP) /* bytewise xor for partial block */
477 lbzu rW5,1(rSP)
478 xor rW4,rW4,rW5
479 stbu rW4,1(rDP)
480 bdnz ppc_crypt_ctr_xorbyte
481 subf rIP,rLN,rIP
482 addi rIP,rIP,1
483 addic rI3,rI3,1
484 addze rI2,rI2
485 addze rI1,rI1
486 addze rI0,rI0
487ppc_crypt_ctr_end:
488 SAVE_IV(rI0, 0)
489 SAVE_IV(rI1, 4)
490 SAVE_IV(rI2, 8)
491 SAVE_IV(rI3, 12)
492 FINALIZE_CRYPT(4)
493 blr
494
495/*
496 * ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc,
497 * u32 rounds, u32 bytes, u8 *iv, u32 *key_twk);
498 *
499 * called from glue layer to encrypt multiple blocks via XTS
500 * If key_twk is given, the initial IV encryption will be
501 * processed too. Round values are AES128 = 4, AES192 = 5,
502 * AES256 = 6
503 *
504 */
505_GLOBAL(ppc_encrypt_xts)
506 INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 8)
507 LOAD_IV(rI0, 0)
508 LOAD_IV(rI1, 4)
509 LOAD_IV(rI2, 8)
510 cmpwi rKT,0
511 LOAD_IV(rI3, 12)
512 bt eq,ppc_encrypt_xts_notweak
513 mr rKP,rKT
514 START_KEY(rI0, rI1, rI2, rI3)
515 bl ppc_encrypt_block
516 xor rI0,rD0,rW0
517 xor rI1,rD1,rW1
518 xor rI2,rD2,rW2
519 xor rI3,rD3,rW3
520ppc_encrypt_xts_notweak:
521 ENDIAN_SWAP(rG0, rG1, rI0, rI1)
522 ENDIAN_SWAP(rG2, rG3, rI2, rI3)
523ppc_encrypt_xts_loop:
524 LOAD_DATA(rD0, 0)
525 mr rKP,rKS
526 LOAD_DATA(rD1, 4)
527 subi rLN,rLN,16
528 LOAD_DATA(rD2, 8)
529 LOAD_DATA(rD3, 12)
530 xor rD0,rD0,rI0
531 xor rD1,rD1,rI1
532 xor rD2,rD2,rI2
533 xor rD3,rD3,rI3
534 START_KEY(rD0, rD1, rD2, rD3)
535 bl ppc_encrypt_block
536 xor rD0,rD0,rW0
537 xor rD1,rD1,rW1
538 xor rD2,rD2,rW2
539 xor rD3,rD3,rW3
540 xor rD0,rD0,rI0
541 SAVE_DATA(rD0, 0)
542 xor rD1,rD1,rI1
543 SAVE_DATA(rD1, 4)
544 xor rD2,rD2,rI2
545 SAVE_DATA(rD2, 8)
546 xor rD3,rD3,rI3
547 SAVE_DATA(rD3, 12)
548 GF128_MUL(rG0, rG1, rG2, rG3, rW0)
549 ENDIAN_SWAP(rI0, rI1, rG0, rG1)
550 ENDIAN_SWAP(rI2, rI3, rG2, rG3)
551 cmpwi rLN,0
552 NEXT_BLOCK
553 bt gt,ppc_encrypt_xts_loop
554 START_IV
555 SAVE_IV(rI0, 0)
556 SAVE_IV(rI1, 4)
557 SAVE_IV(rI2, 8)
558 SAVE_IV(rI3, 12)
559 FINALIZE_CRYPT(8)
560 blr
561
562/*
563 * ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec,
564 * u32 rounds, u32 blocks, u8 *iv, u32 *key_twk);
565 *
566 * called from glue layer to decrypt multiple blocks via XTS
567 * If key_twk is given, the initial IV encryption will be
568 * processed too. Round values are AES128 = 4, AES192 = 5,
569 * AES256 = 6
570 *
571 */
572_GLOBAL(ppc_decrypt_xts)
573 INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 8)
574 LOAD_IV(rI0, 0)
575 addi rT1,rT0,4096
576 LOAD_IV(rI1, 4)
577 LOAD_IV(rI2, 8)
578 cmpwi rKT,0
579 LOAD_IV(rI3, 12)
580 bt eq,ppc_decrypt_xts_notweak
581 subi rT0,rT0,4096
582 mr rKP,rKT
583 START_KEY(rI0, rI1, rI2, rI3)
584 bl ppc_encrypt_block
585 xor rI0,rD0,rW0
586 xor rI1,rD1,rW1
587 xor rI2,rD2,rW2
588 xor rI3,rD3,rW3
589 addi rT0,rT0,4096
590ppc_decrypt_xts_notweak:
591 ENDIAN_SWAP(rG0, rG1, rI0, rI1)
592 ENDIAN_SWAP(rG2, rG3, rI2, rI3)
593ppc_decrypt_xts_loop:
594 LOAD_DATA(rD0, 0)
595 mr rKP,rKS
596 LOAD_DATA(rD1, 4)
597 subi rLN,rLN,16
598 LOAD_DATA(rD2, 8)
599 LOAD_DATA(rD3, 12)
600 xor rD0,rD0,rI0
601 xor rD1,rD1,rI1
602 xor rD2,rD2,rI2
603 xor rD3,rD3,rI3
604 START_KEY(rD0, rD1, rD2, rD3)
605 bl ppc_decrypt_block
606 xor rD0,rD0,rW0
607 xor rD1,rD1,rW1
608 xor rD2,rD2,rW2
609 xor rD3,rD3,rW3
610 xor rD0,rD0,rI0
611 SAVE_DATA(rD0, 0)
612 xor rD1,rD1,rI1
613 SAVE_DATA(rD1, 4)
614 xor rD2,rD2,rI2
615 SAVE_DATA(rD2, 8)
616 xor rD3,rD3,rI3
617 SAVE_DATA(rD3, 12)
618 GF128_MUL(rG0, rG1, rG2, rG3, rW0)
619 ENDIAN_SWAP(rI0, rI1, rG0, rG1)
620 ENDIAN_SWAP(rI2, rI3, rG2, rG3)
621 cmpwi rLN,0
622 NEXT_BLOCK
623 bt gt,ppc_decrypt_xts_loop
624 START_IV
625 SAVE_IV(rI0, 0)
626 SAVE_IV(rI1, 4)
627 SAVE_IV(rI2, 8)
628 SAVE_IV(rI3, 12)
629 FINALIZE_CRYPT(8)
630 blr
diff --git a/arch/powerpc/crypto/aes-spe-regs.h b/arch/powerpc/crypto/aes-spe-regs.h
new file mode 100644
index 000000000000..30d217b399c3
--- /dev/null
+++ b/arch/powerpc/crypto/aes-spe-regs.h
@@ -0,0 +1,42 @@
1/*
2 * Common registers for PPC AES implementation
3 *
4 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation; either version 2 of the License, or (at your option)
9 * any later version.
10 *
11 */
12
13#define rKS r0 /* copy of en-/decryption key pointer */
14#define rDP r3 /* destination pointer */
15#define rSP r4 /* source pointer */
16#define rKP r5 /* pointer to en-/decryption key pointer */
17#define rRR r6 /* en-/decryption rounds */
18#define rLN r7 /* length of data to be processed */
19#define rIP r8 /* potiner to IV (CBC/CTR/XTS modes) */
20#define rKT r9 /* pointer to tweak key (XTS mode) */
21#define rT0 r11 /* pointers to en-/decrpytion tables */
22#define rT1 r10
23#define rD0 r9 /* data */
24#define rD1 r14
25#define rD2 r12
26#define rD3 r15
27#define rW0 r16 /* working registers */
28#define rW1 r17
29#define rW2 r18
30#define rW3 r19
31#define rW4 r20
32#define rW5 r21
33#define rW6 r22
34#define rW7 r23
35#define rI0 r24 /* IV */
36#define rI1 r25
37#define rI2 r26
38#define rI3 r27
39#define rG0 r28 /* endian reversed tweak (XTS mode) */
40#define rG1 r29
41#define rG2 r30
42#define rG3 r31
diff --git a/arch/powerpc/crypto/aes-tab-4k.S b/arch/powerpc/crypto/aes-tab-4k.S
new file mode 100644
index 000000000000..701e60240dc3
--- /dev/null
+++ b/arch/powerpc/crypto/aes-tab-4k.S
@@ -0,0 +1,331 @@
1/*
2 * 4K AES tables for PPC AES implementation
3 *
4 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation; either version 2 of the License, or (at your option)
9 * any later version.
10 *
11 */
12
13/*
14 * These big endian AES encryption/decryption tables have been taken from
15 * crypto/aes_generic.c and are designed to be simply accessed by a combination
16 * of rlwimi/lwz instructions with a minimum of table registers (usually only
17 * one required). Thus they are aligned to 4K. The locality of rotated values
18 * is derived from the reduced offsets that are available in the SPE load
19 * instructions. E.g. evldw, evlwwsplat, ...
20 *
21 * For the safety-conscious it has to be noted that they might be vulnerable
22 * to cache timing attacks because of their size. Nevertheless in contrast to
23 * the generic tables they have been reduced from 16KB to 8KB + 256 bytes.
24 * This is a quite good tradeoff for low power devices (e.g. routers) without
25 * dedicated encryption hardware where we usually have no multiuser
26 * environment.
27 *
28 */
29
30#define R(a, b, c, d) \
31 0x##a##b##c##d, 0x##d##a##b##c, 0x##c##d##a##b, 0x##b##c##d##a
32
33.data
34.align 12
35.globl PPC_AES_4K_ENCTAB
36PPC_AES_4K_ENCTAB:
37/* encryption table, same as crypto_ft_tab in crypto/aes-generic.c */
38 .long R(c6, 63, 63, a5), R(f8, 7c, 7c, 84)
39 .long R(ee, 77, 77, 99), R(f6, 7b, 7b, 8d)
40 .long R(ff, f2, f2, 0d), R(d6, 6b, 6b, bd)
41 .long R(de, 6f, 6f, b1), R(91, c5, c5, 54)
42 .long R(60, 30, 30, 50), R(02, 01, 01, 03)
43 .long R(ce, 67, 67, a9), R(56, 2b, 2b, 7d)
44 .long R(e7, fe, fe, 19), R(b5, d7, d7, 62)
45 .long R(4d, ab, ab, e6), R(ec, 76, 76, 9a)
46 .long R(8f, ca, ca, 45), R(1f, 82, 82, 9d)
47 .long R(89, c9, c9, 40), R(fa, 7d, 7d, 87)
48 .long R(ef, fa, fa, 15), R(b2, 59, 59, eb)
49 .long R(8e, 47, 47, c9), R(fb, f0, f0, 0b)
50 .long R(41, ad, ad, ec), R(b3, d4, d4, 67)
51 .long R(5f, a2, a2, fd), R(45, af, af, ea)
52 .long R(23, 9c, 9c, bf), R(53, a4, a4, f7)
53 .long R(e4, 72, 72, 96), R(9b, c0, c0, 5b)
54 .long R(75, b7, b7, c2), R(e1, fd, fd, 1c)
55 .long R(3d, 93, 93, ae), R(4c, 26, 26, 6a)
56 .long R(6c, 36, 36, 5a), R(7e, 3f, 3f, 41)
57 .long R(f5, f7, f7, 02), R(83, cc, cc, 4f)
58 .long R(68, 34, 34, 5c), R(51, a5, a5, f4)
59 .long R(d1, e5, e5, 34), R(f9, f1, f1, 08)
60 .long R(e2, 71, 71, 93), R(ab, d8, d8, 73)
61 .long R(62, 31, 31, 53), R(2a, 15, 15, 3f)
62 .long R(08, 04, 04, 0c), R(95, c7, c7, 52)
63 .long R(46, 23, 23, 65), R(9d, c3, c3, 5e)
64 .long R(30, 18, 18, 28), R(37, 96, 96, a1)
65 .long R(0a, 05, 05, 0f), R(2f, 9a, 9a, b5)
66 .long R(0e, 07, 07, 09), R(24, 12, 12, 36)
67 .long R(1b, 80, 80, 9b), R(df, e2, e2, 3d)
68 .long R(cd, eb, eb, 26), R(4e, 27, 27, 69)
69 .long R(7f, b2, b2, cd), R(ea, 75, 75, 9f)
70 .long R(12, 09, 09, 1b), R(1d, 83, 83, 9e)
71 .long R(58, 2c, 2c, 74), R(34, 1a, 1a, 2e)
72 .long R(36, 1b, 1b, 2d), R(dc, 6e, 6e, b2)
73 .long R(b4, 5a, 5a, ee), R(5b, a0, a0, fb)
74 .long R(a4, 52, 52, f6), R(76, 3b, 3b, 4d)
75 .long R(b7, d6, d6, 61), R(7d, b3, b3, ce)
76 .long R(52, 29, 29, 7b), R(dd, e3, e3, 3e)
77 .long R(5e, 2f, 2f, 71), R(13, 84, 84, 97)
78 .long R(a6, 53, 53, f5), R(b9, d1, d1, 68)
79 .long R(00, 00, 00, 00), R(c1, ed, ed, 2c)
80 .long R(40, 20, 20, 60), R(e3, fc, fc, 1f)
81 .long R(79, b1, b1, c8), R(b6, 5b, 5b, ed)
82 .long R(d4, 6a, 6a, be), R(8d, cb, cb, 46)
83 .long R(67, be, be, d9), R(72, 39, 39, 4b)
84 .long R(94, 4a, 4a, de), R(98, 4c, 4c, d4)
85 .long R(b0, 58, 58, e8), R(85, cf, cf, 4a)
86 .long R(bb, d0, d0, 6b), R(c5, ef, ef, 2a)
87 .long R(4f, aa, aa, e5), R(ed, fb, fb, 16)
88 .long R(86, 43, 43, c5), R(9a, 4d, 4d, d7)
89 .long R(66, 33, 33, 55), R(11, 85, 85, 94)
90 .long R(8a, 45, 45, cf), R(e9, f9, f9, 10)
91 .long R(04, 02, 02, 06), R(fe, 7f, 7f, 81)
92 .long R(a0, 50, 50, f0), R(78, 3c, 3c, 44)
93 .long R(25, 9f, 9f, ba), R(4b, a8, a8, e3)
94 .long R(a2, 51, 51, f3), R(5d, a3, a3, fe)
95 .long R(80, 40, 40, c0), R(05, 8f, 8f, 8a)
96 .long R(3f, 92, 92, ad), R(21, 9d, 9d, bc)
97 .long R(70, 38, 38, 48), R(f1, f5, f5, 04)
98 .long R(63, bc, bc, df), R(77, b6, b6, c1)
99 .long R(af, da, da, 75), R(42, 21, 21, 63)
100 .long R(20, 10, 10, 30), R(e5, ff, ff, 1a)
101 .long R(fd, f3, f3, 0e), R(bf, d2, d2, 6d)
102 .long R(81, cd, cd, 4c), R(18, 0c, 0c, 14)
103 .long R(26, 13, 13, 35), R(c3, ec, ec, 2f)
104 .long R(be, 5f, 5f, e1), R(35, 97, 97, a2)
105 .long R(88, 44, 44, cc), R(2e, 17, 17, 39)
106 .long R(93, c4, c4, 57), R(55, a7, a7, f2)
107 .long R(fc, 7e, 7e, 82), R(7a, 3d, 3d, 47)
108 .long R(c8, 64, 64, ac), R(ba, 5d, 5d, e7)
109 .long R(32, 19, 19, 2b), R(e6, 73, 73, 95)
110 .long R(c0, 60, 60, a0), R(19, 81, 81, 98)
111 .long R(9e, 4f, 4f, d1), R(a3, dc, dc, 7f)
112 .long R(44, 22, 22, 66), R(54, 2a, 2a, 7e)
113 .long R(3b, 90, 90, ab), R(0b, 88, 88, 83)
114 .long R(8c, 46, 46, ca), R(c7, ee, ee, 29)
115 .long R(6b, b8, b8, d3), R(28, 14, 14, 3c)
116 .long R(a7, de, de, 79), R(bc, 5e, 5e, e2)
117 .long R(16, 0b, 0b, 1d), R(ad, db, db, 76)
118 .long R(db, e0, e0, 3b), R(64, 32, 32, 56)
119 .long R(74, 3a, 3a, 4e), R(14, 0a, 0a, 1e)
120 .long R(92, 49, 49, db), R(0c, 06, 06, 0a)
121 .long R(48, 24, 24, 6c), R(b8, 5c, 5c, e4)
122 .long R(9f, c2, c2, 5d), R(bd, d3, d3, 6e)
123 .long R(43, ac, ac, ef), R(c4, 62, 62, a6)
124 .long R(39, 91, 91, a8), R(31, 95, 95, a4)
125 .long R(d3, e4, e4, 37), R(f2, 79, 79, 8b)
126 .long R(d5, e7, e7, 32), R(8b, c8, c8, 43)
127 .long R(6e, 37, 37, 59), R(da, 6d, 6d, b7)
128 .long R(01, 8d, 8d, 8c), R(b1, d5, d5, 64)
129 .long R(9c, 4e, 4e, d2), R(49, a9, a9, e0)
130 .long R(d8, 6c, 6c, b4), R(ac, 56, 56, fa)
131 .long R(f3, f4, f4, 07), R(cf, ea, ea, 25)
132 .long R(ca, 65, 65, af), R(f4, 7a, 7a, 8e)
133 .long R(47, ae, ae, e9), R(10, 08, 08, 18)
134 .long R(6f, ba, ba, d5), R(f0, 78, 78, 88)
135 .long R(4a, 25, 25, 6f), R(5c, 2e, 2e, 72)
136 .long R(38, 1c, 1c, 24), R(57, a6, a6, f1)
137 .long R(73, b4, b4, c7), R(97, c6, c6, 51)
138 .long R(cb, e8, e8, 23), R(a1, dd, dd, 7c)
139 .long R(e8, 74, 74, 9c), R(3e, 1f, 1f, 21)
140 .long R(96, 4b, 4b, dd), R(61, bd, bd, dc)
141 .long R(0d, 8b, 8b, 86), R(0f, 8a, 8a, 85)
142 .long R(e0, 70, 70, 90), R(7c, 3e, 3e, 42)
143 .long R(71, b5, b5, c4), R(cc, 66, 66, aa)
144 .long R(90, 48, 48, d8), R(06, 03, 03, 05)
145 .long R(f7, f6, f6, 01), R(1c, 0e, 0e, 12)
146 .long R(c2, 61, 61, a3), R(6a, 35, 35, 5f)
147 .long R(ae, 57, 57, f9), R(69, b9, b9, d0)
148 .long R(17, 86, 86, 91), R(99, c1, c1, 58)
149 .long R(3a, 1d, 1d, 27), R(27, 9e, 9e, b9)
150 .long R(d9, e1, e1, 38), R(eb, f8, f8, 13)
151 .long R(2b, 98, 98, b3), R(22, 11, 11, 33)
152 .long R(d2, 69, 69, bb), R(a9, d9, d9, 70)
153 .long R(07, 8e, 8e, 89), R(33, 94, 94, a7)
154 .long R(2d, 9b, 9b, b6), R(3c, 1e, 1e, 22)
155 .long R(15, 87, 87, 92), R(c9, e9, e9, 20)
156 .long R(87, ce, ce, 49), R(aa, 55, 55, ff)
157 .long R(50, 28, 28, 78), R(a5, df, df, 7a)
158 .long R(03, 8c, 8c, 8f), R(59, a1, a1, f8)
159 .long R(09, 89, 89, 80), R(1a, 0d, 0d, 17)
160 .long R(65, bf, bf, da), R(d7, e6, e6, 31)
161 .long R(84, 42, 42, c6), R(d0, 68, 68, b8)
162 .long R(82, 41, 41, c3), R(29, 99, 99, b0)
163 .long R(5a, 2d, 2d, 77), R(1e, 0f, 0f, 11)
164 .long R(7b, b0, b0, cb), R(a8, 54, 54, fc)
165 .long R(6d, bb, bb, d6), R(2c, 16, 16, 3a)
166.globl PPC_AES_4K_DECTAB
167PPC_AES_4K_DECTAB:
168/* decryption table, same as crypto_it_tab in crypto/aes-generic.c */
169 .long R(51, f4, a7, 50), R(7e, 41, 65, 53)
170 .long R(1a, 17, a4, c3), R(3a, 27, 5e, 96)
171 .long R(3b, ab, 6b, cb), R(1f, 9d, 45, f1)
172 .long R(ac, fa, 58, ab), R(4b, e3, 03, 93)
173 .long R(20, 30, fa, 55), R(ad, 76, 6d, f6)
174 .long R(88, cc, 76, 91), R(f5, 02, 4c, 25)
175 .long R(4f, e5, d7, fc), R(c5, 2a, cb, d7)
176 .long R(26, 35, 44, 80), R(b5, 62, a3, 8f)
177 .long R(de, b1, 5a, 49), R(25, ba, 1b, 67)
178 .long R(45, ea, 0e, 98), R(5d, fe, c0, e1)
179 .long R(c3, 2f, 75, 02), R(81, 4c, f0, 12)
180 .long R(8d, 46, 97, a3), R(6b, d3, f9, c6)
181 .long R(03, 8f, 5f, e7), R(15, 92, 9c, 95)
182 .long R(bf, 6d, 7a, eb), R(95, 52, 59, da)
183 .long R(d4, be, 83, 2d), R(58, 74, 21, d3)
184 .long R(49, e0, 69, 29), R(8e, c9, c8, 44)
185 .long R(75, c2, 89, 6a), R(f4, 8e, 79, 78)
186 .long R(99, 58, 3e, 6b), R(27, b9, 71, dd)
187 .long R(be, e1, 4f, b6), R(f0, 88, ad, 17)
188 .long R(c9, 20, ac, 66), R(7d, ce, 3a, b4)
189 .long R(63, df, 4a, 18), R(e5, 1a, 31, 82)
190 .long R(97, 51, 33, 60), R(62, 53, 7f, 45)
191 .long R(b1, 64, 77, e0), R(bb, 6b, ae, 84)
192 .long R(fe, 81, a0, 1c), R(f9, 08, 2b, 94)
193 .long R(70, 48, 68, 58), R(8f, 45, fd, 19)
194 .long R(94, de, 6c, 87), R(52, 7b, f8, b7)
195 .long R(ab, 73, d3, 23), R(72, 4b, 02, e2)
196 .long R(e3, 1f, 8f, 57), R(66, 55, ab, 2a)
197 .long R(b2, eb, 28, 07), R(2f, b5, c2, 03)
198 .long R(86, c5, 7b, 9a), R(d3, 37, 08, a5)
199 .long R(30, 28, 87, f2), R(23, bf, a5, b2)
200 .long R(02, 03, 6a, ba), R(ed, 16, 82, 5c)
201 .long R(8a, cf, 1c, 2b), R(a7, 79, b4, 92)
202 .long R(f3, 07, f2, f0), R(4e, 69, e2, a1)
203 .long R(65, da, f4, cd), R(06, 05, be, d5)
204 .long R(d1, 34, 62, 1f), R(c4, a6, fe, 8a)
205 .long R(34, 2e, 53, 9d), R(a2, f3, 55, a0)
206 .long R(05, 8a, e1, 32), R(a4, f6, eb, 75)
207 .long R(0b, 83, ec, 39), R(40, 60, ef, aa)
208 .long R(5e, 71, 9f, 06), R(bd, 6e, 10, 51)
209 .long R(3e, 21, 8a, f9), R(96, dd, 06, 3d)
210 .long R(dd, 3e, 05, ae), R(4d, e6, bd, 46)
211 .long R(91, 54, 8d, b5), R(71, c4, 5d, 05)
212 .long R(04, 06, d4, 6f), R(60, 50, 15, ff)
213 .long R(19, 98, fb, 24), R(d6, bd, e9, 97)
214 .long R(89, 40, 43, cc), R(67, d9, 9e, 77)
215 .long R(b0, e8, 42, bd), R(07, 89, 8b, 88)
216 .long R(e7, 19, 5b, 38), R(79, c8, ee, db)
217 .long R(a1, 7c, 0a, 47), R(7c, 42, 0f, e9)
218 .long R(f8, 84, 1e, c9), R(00, 00, 00, 00)
219 .long R(09, 80, 86, 83), R(32, 2b, ed, 48)
220 .long R(1e, 11, 70, ac), R(6c, 5a, 72, 4e)
221 .long R(fd, 0e, ff, fb), R(0f, 85, 38, 56)
222 .long R(3d, ae, d5, 1e), R(36, 2d, 39, 27)
223 .long R(0a, 0f, d9, 64), R(68, 5c, a6, 21)
224 .long R(9b, 5b, 54, d1), R(24, 36, 2e, 3a)
225 .long R(0c, 0a, 67, b1), R(93, 57, e7, 0f)
226 .long R(b4, ee, 96, d2), R(1b, 9b, 91, 9e)
227 .long R(80, c0, c5, 4f), R(61, dc, 20, a2)
228 .long R(5a, 77, 4b, 69), R(1c, 12, 1a, 16)
229 .long R(e2, 93, ba, 0a), R(c0, a0, 2a, e5)
230 .long R(3c, 22, e0, 43), R(12, 1b, 17, 1d)
231 .long R(0e, 09, 0d, 0b), R(f2, 8b, c7, ad)
232 .long R(2d, b6, a8, b9), R(14, 1e, a9, c8)
233 .long R(57, f1, 19, 85), R(af, 75, 07, 4c)
234 .long R(ee, 99, dd, bb), R(a3, 7f, 60, fd)
235 .long R(f7, 01, 26, 9f), R(5c, 72, f5, bc)
236 .long R(44, 66, 3b, c5), R(5b, fb, 7e, 34)
237 .long R(8b, 43, 29, 76), R(cb, 23, c6, dc)
238 .long R(b6, ed, fc, 68), R(b8, e4, f1, 63)
239 .long R(d7, 31, dc, ca), R(42, 63, 85, 10)
240 .long R(13, 97, 22, 40), R(84, c6, 11, 20)
241 .long R(85, 4a, 24, 7d), R(d2, bb, 3d, f8)
242 .long R(ae, f9, 32, 11), R(c7, 29, a1, 6d)
243 .long R(1d, 9e, 2f, 4b), R(dc, b2, 30, f3)
244 .long R(0d, 86, 52, ec), R(77, c1, e3, d0)
245 .long R(2b, b3, 16, 6c), R(a9, 70, b9, 99)
246 .long R(11, 94, 48, fa), R(47, e9, 64, 22)
247 .long R(a8, fc, 8c, c4), R(a0, f0, 3f, 1a)
248 .long R(56, 7d, 2c, d8), R(22, 33, 90, ef)
249 .long R(87, 49, 4e, c7), R(d9, 38, d1, c1)
250 .long R(8c, ca, a2, fe), R(98, d4, 0b, 36)
251 .long R(a6, f5, 81, cf), R(a5, 7a, de, 28)
252 .long R(da, b7, 8e, 26), R(3f, ad, bf, a4)
253 .long R(2c, 3a, 9d, e4), R(50, 78, 92, 0d)
254 .long R(6a, 5f, cc, 9b), R(54, 7e, 46, 62)
255 .long R(f6, 8d, 13, c2), R(90, d8, b8, e8)
256 .long R(2e, 39, f7, 5e), R(82, c3, af, f5)
257 .long R(9f, 5d, 80, be), R(69, d0, 93, 7c)
258 .long R(6f, d5, 2d, a9), R(cf, 25, 12, b3)
259 .long R(c8, ac, 99, 3b), R(10, 18, 7d, a7)
260 .long R(e8, 9c, 63, 6e), R(db, 3b, bb, 7b)
261 .long R(cd, 26, 78, 09), R(6e, 59, 18, f4)
262 .long R(ec, 9a, b7, 01), R(83, 4f, 9a, a8)
263 .long R(e6, 95, 6e, 65), R(aa, ff, e6, 7e)
264 .long R(21, bc, cf, 08), R(ef, 15, e8, e6)
265 .long R(ba, e7, 9b, d9), R(4a, 6f, 36, ce)
266 .long R(ea, 9f, 09, d4), R(29, b0, 7c, d6)
267 .long R(31, a4, b2, af), R(2a, 3f, 23, 31)
268 .long R(c6, a5, 94, 30), R(35, a2, 66, c0)
269 .long R(74, 4e, bc, 37), R(fc, 82, ca, a6)
270 .long R(e0, 90, d0, b0), R(33, a7, d8, 15)
271 .long R(f1, 04, 98, 4a), R(41, ec, da, f7)
272 .long R(7f, cd, 50, 0e), R(17, 91, f6, 2f)
273 .long R(76, 4d, d6, 8d), R(43, ef, b0, 4d)
274 .long R(cc, aa, 4d, 54), R(e4, 96, 04, df)
275 .long R(9e, d1, b5, e3), R(4c, 6a, 88, 1b)
276 .long R(c1, 2c, 1f, b8), R(46, 65, 51, 7f)
277 .long R(9d, 5e, ea, 04), R(01, 8c, 35, 5d)
278 .long R(fa, 87, 74, 73), R(fb, 0b, 41, 2e)
279 .long R(b3, 67, 1d, 5a), R(92, db, d2, 52)
280 .long R(e9, 10, 56, 33), R(6d, d6, 47, 13)
281 .long R(9a, d7, 61, 8c), R(37, a1, 0c, 7a)
282 .long R(59, f8, 14, 8e), R(eb, 13, 3c, 89)
283 .long R(ce, a9, 27, ee), R(b7, 61, c9, 35)
284 .long R(e1, 1c, e5, ed), R(7a, 47, b1, 3c)
285 .long R(9c, d2, df, 59), R(55, f2, 73, 3f)
286 .long R(18, 14, ce, 79), R(73, c7, 37, bf)
287 .long R(53, f7, cd, ea), R(5f, fd, aa, 5b)
288 .long R(df, 3d, 6f, 14), R(78, 44, db, 86)
289 .long R(ca, af, f3, 81), R(b9, 68, c4, 3e)
290 .long R(38, 24, 34, 2c), R(c2, a3, 40, 5f)
291 .long R(16, 1d, c3, 72), R(bc, e2, 25, 0c)
292 .long R(28, 3c, 49, 8b), R(ff, 0d, 95, 41)
293 .long R(39, a8, 01, 71), R(08, 0c, b3, de)
294 .long R(d8, b4, e4, 9c), R(64, 56, c1, 90)
295 .long R(7b, cb, 84, 61), R(d5, 32, b6, 70)
296 .long R(48, 6c, 5c, 74), R(d0, b8, 57, 42)
297.globl PPC_AES_4K_DECTAB2
298PPC_AES_4K_DECTAB2:
299/* decryption table, same as crypto_il_tab in crypto/aes-generic.c */
300 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
301 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
302 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
303 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
304 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
305 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
306 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
307 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
308 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
309 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
310 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
311 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
312 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
313 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
314 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
315 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
316 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
317 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
318 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
319 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
320 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
321 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
322 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
323 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
324 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
325 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
326 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
327 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
328 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
329 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
330 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
331 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
diff --git a/arch/powerpc/crypto/md5-asm.S b/arch/powerpc/crypto/md5-asm.S
new file mode 100644
index 000000000000..10cdf5bceebb
--- /dev/null
+++ b/arch/powerpc/crypto/md5-asm.S
@@ -0,0 +1,243 @@
1/*
2 * Fast MD5 implementation for PPC
3 *
4 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation; either version 2 of the License, or (at your option)
9 * any later version.
10 *
11 */
12#include <asm/ppc_asm.h>
13#include <asm/asm-offsets.h>
14
15#define rHP r3
16#define rWP r4
17
18#define rH0 r0
19#define rH1 r6
20#define rH2 r7
21#define rH3 r5
22
23#define rW00 r8
24#define rW01 r9
25#define rW02 r10
26#define rW03 r11
27#define rW04 r12
28#define rW05 r14
29#define rW06 r15
30#define rW07 r16
31#define rW08 r17
32#define rW09 r18
33#define rW10 r19
34#define rW11 r20
35#define rW12 r21
36#define rW13 r22
37#define rW14 r23
38#define rW15 r24
39
40#define rT0 r25
41#define rT1 r26
42
43#define INITIALIZE \
44 PPC_STLU r1,-INT_FRAME_SIZE(r1); \
45 SAVE_8GPRS(14, r1); /* push registers onto stack */ \
46 SAVE_4GPRS(22, r1); \
47 SAVE_GPR(26, r1)
48
49#define FINALIZE \
50 REST_8GPRS(14, r1); /* pop registers from stack */ \
51 REST_4GPRS(22, r1); \
52 REST_GPR(26, r1); \
53 addi r1,r1,INT_FRAME_SIZE;
54
55#ifdef __BIG_ENDIAN__
56#define LOAD_DATA(reg, off) \
57 lwbrx reg,0,rWP; /* load data */
58#define INC_PTR \
59 addi rWP,rWP,4; /* increment per word */
60#define NEXT_BLOCK /* nothing to do */
61#else
62#define LOAD_DATA(reg, off) \
63 lwz reg,off(rWP); /* load data */
64#define INC_PTR /* nothing to do */
65#define NEXT_BLOCK \
66 addi rWP,rWP,64; /* increment per block */
67#endif
68
69#define R_00_15(a, b, c, d, w0, w1, p, q, off, k0h, k0l, k1h, k1l) \
70 LOAD_DATA(w0, off) /* W */ \
71 and rT0,b,c; /* 1: f = b and c */ \
72 INC_PTR /* ptr++ */ \
73 andc rT1,d,b; /* 1: f' = ~b and d */ \
74 LOAD_DATA(w1, off+4) /* W */ \
75 or rT0,rT0,rT1; /* 1: f = f or f' */ \
76 addi w0,w0,k0l; /* 1: wk = w + k */ \
77 add a,a,rT0; /* 1: a = a + f */ \
78 addis w0,w0,k0h; /* 1: wk = w + k' */ \
79 addis w1,w1,k1h; /* 2: wk = w + k */ \
80 add a,a,w0; /* 1: a = a + wk */ \
81 addi w1,w1,k1l; /* 2: wk = w + k' */ \
82 rotrwi a,a,p; /* 1: a = a rotl x */ \
83 add d,d,w1; /* 2: a = a + wk */ \
84 add a,a,b; /* 1: a = a + b */ \
85 and rT0,a,b; /* 2: f = b and c */ \
86 andc rT1,c,a; /* 2: f' = ~b and d */ \
87 or rT0,rT0,rT1; /* 2: f = f or f' */ \
88 add d,d,rT0; /* 2: a = a + f */ \
89 INC_PTR /* ptr++ */ \
90 rotrwi d,d,q; /* 2: a = a rotl x */ \
91 add d,d,a; /* 2: a = a + b */
92
93#define R_16_31(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
94 andc rT0,c,d; /* 1: f = c and ~d */ \
95 and rT1,b,d; /* 1: f' = b and d */ \
96 addi w0,w0,k0l; /* 1: wk = w + k */ \
97 or rT0,rT0,rT1; /* 1: f = f or f' */ \
98 addis w0,w0,k0h; /* 1: wk = w + k' */ \
99 add a,a,rT0; /* 1: a = a + f */ \
100 addi w1,w1,k1l; /* 2: wk = w + k */ \
101 add a,a,w0; /* 1: a = a + wk */ \
102 addis w1,w1,k1h; /* 2: wk = w + k' */ \
103 andc rT0,b,c; /* 2: f = c and ~d */ \
104 rotrwi a,a,p; /* 1: a = a rotl x */ \
105 add a,a,b; /* 1: a = a + b */ \
106 add d,d,w1; /* 2: a = a + wk */ \
107 and rT1,a,c; /* 2: f' = b and d */ \
108 or rT0,rT0,rT1; /* 2: f = f or f' */ \
109 add d,d,rT0; /* 2: a = a + f */ \
110 rotrwi d,d,q; /* 2: a = a rotl x */ \
111 add d,d,a; /* 2: a = a +b */
112
113#define R_32_47(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
114 xor rT0,b,c; /* 1: f' = b xor c */ \
115 addi w0,w0,k0l; /* 1: wk = w + k */ \
116 xor rT1,rT0,d; /* 1: f = f xor f' */ \
117 addis w0,w0,k0h; /* 1: wk = w + k' */ \
118 add a,a,rT1; /* 1: a = a + f */ \
119 addi w1,w1,k1l; /* 2: wk = w + k */ \
120 add a,a,w0; /* 1: a = a + wk */ \
121 addis w1,w1,k1h; /* 2: wk = w + k' */ \
122 rotrwi a,a,p; /* 1: a = a rotl x */ \
123 add d,d,w1; /* 2: a = a + wk */ \
124 add a,a,b; /* 1: a = a + b */ \
125 xor rT1,rT0,a; /* 2: f = b xor f' */ \
126 add d,d,rT1; /* 2: a = a + f */ \
127 rotrwi d,d,q; /* 2: a = a rotl x */ \
128 add d,d,a; /* 2: a = a + b */
129
130#define R_48_63(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
131 addi w0,w0,k0l; /* 1: w = w + k */ \
132 orc rT0,b,d; /* 1: f = b or ~d */ \
133 addis w0,w0,k0h; /* 1: w = w + k' */ \
134 xor rT0,rT0,c; /* 1: f = f xor c */ \
135 add a,a,w0; /* 1: a = a + wk */ \
136 addi w1,w1,k1l; /* 2: w = w + k */ \
137 add a,a,rT0; /* 1: a = a + f */ \
138 addis w1,w1,k1h; /* 2: w = w + k' */ \
139 rotrwi a,a,p; /* 1: a = a rotl x */ \
140 add a,a,b; /* 1: a = a + b */ \
141 orc rT0,a,c; /* 2: f = b or ~d */ \
142 add d,d,w1; /* 2: a = a + wk */ \
143 xor rT0,rT0,b; /* 2: f = f xor c */ \
144 add d,d,rT0; /* 2: a = a + f */ \
145 rotrwi d,d,q; /* 2: a = a rotl x */ \
146 add d,d,a; /* 2: a = a + b */
147
148_GLOBAL(ppc_md5_transform)
149 INITIALIZE
150
151 mtctr r5
152 lwz rH0,0(rHP)
153 lwz rH1,4(rHP)
154 lwz rH2,8(rHP)
155 lwz rH3,12(rHP)
156
157ppc_md5_main:
158 R_00_15(rH0, rH1, rH2, rH3, rW00, rW01, 25, 20, 0,
159 0xd76b, -23432, 0xe8c8, -18602)
160 R_00_15(rH2, rH3, rH0, rH1, rW02, rW03, 15, 10, 8,
161 0x2420, 0x70db, 0xc1be, -12562)
162 R_00_15(rH0, rH1, rH2, rH3, rW04, rW05, 25, 20, 16,
163 0xf57c, 0x0faf, 0x4788, -14806)
164 R_00_15(rH2, rH3, rH0, rH1, rW06, rW07, 15, 10, 24,
165 0xa830, 0x4613, 0xfd47, -27391)
166 R_00_15(rH0, rH1, rH2, rH3, rW08, rW09, 25, 20, 32,
167 0x6981, -26408, 0x8b45, -2129)
168 R_00_15(rH2, rH3, rH0, rH1, rW10, rW11, 15, 10, 40,
169 0xffff, 0x5bb1, 0x895d, -10306)
170 R_00_15(rH0, rH1, rH2, rH3, rW12, rW13, 25, 20, 48,
171 0x6b90, 0x1122, 0xfd98, 0x7193)
172 R_00_15(rH2, rH3, rH0, rH1, rW14, rW15, 15, 10, 56,
173 0xa679, 0x438e, 0x49b4, 0x0821)
174
175 R_16_31(rH0, rH1, rH2, rH3, rW01, rW06, 27, 23,
176 0x0d56, 0x6e0c, 0x1810, 0x6d2d)
177 R_16_31(rH2, rH3, rH0, rH1, rW11, rW00, 18, 12,
178 0x9d02, -32109, 0x124c, 0x2332)
179 R_16_31(rH0, rH1, rH2, rH3, rW05, rW10, 27, 23,
180 0x8ea7, 0x4a33, 0x0245, -18270)
181 R_16_31(rH2, rH3, rH0, rH1, rW15, rW04, 18, 12,
182 0x8eee, -8608, 0xf258, -5095)
183 R_16_31(rH0, rH1, rH2, rH3, rW09, rW14, 27, 23,
184 0x969d, -10697, 0x1cbe, -15288)
185 R_16_31(rH2, rH3, rH0, rH1, rW03, rW08, 18, 12,
186 0x3317, 0x3e99, 0xdbd9, 0x7c15)
187 R_16_31(rH0, rH1, rH2, rH3, rW13, rW02, 27, 23,
188 0xac4b, 0x7772, 0xd8cf, 0x331d)
189 R_16_31(rH2, rH3, rH0, rH1, rW07, rW12, 18, 12,
190 0x6a28, 0x6dd8, 0x219a, 0x3b68)
191
192 R_32_47(rH0, rH1, rH2, rH3, rW05, rW08, 28, 21,
193 0x29cb, 0x28e5, 0x4218, -7788)
194 R_32_47(rH2, rH3, rH0, rH1, rW11, rW14, 16, 9,
195 0x473f, 0x06d1, 0x3aae, 0x3036)
196 R_32_47(rH0, rH1, rH2, rH3, rW01, rW04, 28, 21,
197 0xaea1, -15134, 0x640b, -11295)
198 R_32_47(rH2, rH3, rH0, rH1, rW07, rW10, 16, 9,
199 0x8f4c, 0x4887, 0xbc7c, -22499)
200 R_32_47(rH0, rH1, rH2, rH3, rW13, rW00, 28, 21,
201 0x7eb8, -27199, 0x00ea, 0x6050)
202 R_32_47(rH2, rH3, rH0, rH1, rW03, rW06, 16, 9,
203 0xe01a, 0x22fe, 0x4447, 0x69c5)
204 R_32_47(rH0, rH1, rH2, rH3, rW09, rW12, 28, 21,
205 0xb7f3, 0x0253, 0x59b1, 0x4d5b)
206 R_32_47(rH2, rH3, rH0, rH1, rW15, rW02, 16, 9,
207 0x4701, -27017, 0xc7bd, -19859)
208
209 R_48_63(rH0, rH1, rH2, rH3, rW00, rW07, 26, 22,
210 0x0988, -1462, 0x4c70, -19401)
211 R_48_63(rH2, rH3, rH0, rH1, rW14, rW05, 17, 11,
212 0xadaf, -5221, 0xfc99, 0x66f7)
213 R_48_63(rH0, rH1, rH2, rH3, rW12, rW03, 26, 22,
214 0x7e80, -16418, 0xba1e, -25587)
215 R_48_63(rH2, rH3, rH0, rH1, rW10, rW01, 17, 11,
216 0x4130, 0x380d, 0xe0c5, 0x738d)
217 lwz rW00,0(rHP)
218 R_48_63(rH0, rH1, rH2, rH3, rW08, rW15, 26, 22,
219 0xe837, -30770, 0xde8a, 0x69e8)
220 lwz rW14,4(rHP)
221 R_48_63(rH2, rH3, rH0, rH1, rW06, rW13, 17, 11,
222 0x9e79, 0x260f, 0x256d, -27941)
223 lwz rW12,8(rHP)
224 R_48_63(rH0, rH1, rH2, rH3, rW04, rW11, 26, 22,
225 0xab75, -20775, 0x4f9e, -28397)
226 lwz rW10,12(rHP)
227 R_48_63(rH2, rH3, rH0, rH1, rW02, rW09, 17, 11,
228 0x662b, 0x7c56, 0x11b2, 0x0358)
229
230 add rH0,rH0,rW00
231 stw rH0,0(rHP)
232 add rH1,rH1,rW14
233 stw rH1,4(rHP)
234 add rH2,rH2,rW12
235 stw rH2,8(rHP)
236 add rH3,rH3,rW10
237 stw rH3,12(rHP)
238 NEXT_BLOCK
239
240 bdnz ppc_md5_main
241
242 FINALIZE
243 blr
diff --git a/arch/powerpc/crypto/md5-glue.c b/arch/powerpc/crypto/md5-glue.c
new file mode 100644
index 000000000000..452fb4dc575f
--- /dev/null
+++ b/arch/powerpc/crypto/md5-glue.c
@@ -0,0 +1,165 @@
1/*
2 * Glue code for MD5 implementation for PPC assembler
3 *
4 * Based on generic implementation.
5 *
6 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the Free
10 * Software Foundation; either version 2 of the License, or (at your option)
11 * any later version.
12 *
13 */
14
15#include <crypto/internal/hash.h>
16#include <linux/init.h>
17#include <linux/module.h>
18#include <linux/mm.h>
19#include <linux/cryptohash.h>
20#include <linux/types.h>
21#include <crypto/md5.h>
22#include <asm/byteorder.h>
23
24extern void ppc_md5_transform(u32 *state, const u8 *src, u32 blocks);
25
26static inline void ppc_md5_clear_context(struct md5_state *sctx)
27{
28 int count = sizeof(struct md5_state) >> 2;
29 u32 *ptr = (u32 *)sctx;
30
31 /* make sure we can clear the fast way */
32 BUILD_BUG_ON(sizeof(struct md5_state) % 4);
33 do { *ptr++ = 0; } while (--count);
34}
35
36static int ppc_md5_init(struct shash_desc *desc)
37{
38 struct md5_state *sctx = shash_desc_ctx(desc);
39
40 sctx->hash[0] = 0x67452301;
41 sctx->hash[1] = 0xefcdab89;
42 sctx->hash[2] = 0x98badcfe;
43 sctx->hash[3] = 0x10325476;
44 sctx->byte_count = 0;
45
46 return 0;
47}
48
49static int ppc_md5_update(struct shash_desc *desc, const u8 *data,
50 unsigned int len)
51{
52 struct md5_state *sctx = shash_desc_ctx(desc);
53 const unsigned int offset = sctx->byte_count & 0x3f;
54 unsigned int avail = 64 - offset;
55 const u8 *src = data;
56
57 sctx->byte_count += len;
58
59 if (avail > len) {
60 memcpy((char *)sctx->block + offset, src, len);
61 return 0;
62 }
63
64 if (offset) {
65 memcpy((char *)sctx->block + offset, src, avail);
66 ppc_md5_transform(sctx->hash, (const u8 *)sctx->block, 1);
67 len -= avail;
68 src += avail;
69 }
70
71 if (len > 63) {
72 ppc_md5_transform(sctx->hash, src, len >> 6);
73 src += len & ~0x3f;
74 len &= 0x3f;
75 }
76
77 memcpy((char *)sctx->block, src, len);
78 return 0;
79}
80
81static int ppc_md5_final(struct shash_desc *desc, u8 *out)
82{
83 struct md5_state *sctx = shash_desc_ctx(desc);
84 const unsigned int offset = sctx->byte_count & 0x3f;
85 const u8 *src = (const u8 *)sctx->block;
86 u8 *p = (u8 *)src + offset;
87 int padlen = 55 - offset;
88 __le64 *pbits = (__le64 *)((char *)sctx->block + 56);
89 __le32 *dst = (__le32 *)out;
90
91 *p++ = 0x80;
92
93 if (padlen < 0) {
94 memset(p, 0x00, padlen + sizeof (u64));
95 ppc_md5_transform(sctx->hash, src, 1);
96 p = (char *)sctx->block;
97 padlen = 56;
98 }
99
100 memset(p, 0, padlen);
101 *pbits = cpu_to_le64(sctx->byte_count << 3);
102 ppc_md5_transform(sctx->hash, src, 1);
103
104 dst[0] = cpu_to_le32(sctx->hash[0]);
105 dst[1] = cpu_to_le32(sctx->hash[1]);
106 dst[2] = cpu_to_le32(sctx->hash[2]);
107 dst[3] = cpu_to_le32(sctx->hash[3]);
108
109 ppc_md5_clear_context(sctx);
110 return 0;
111}
112
113static int ppc_md5_export(struct shash_desc *desc, void *out)
114{
115 struct md5_state *sctx = shash_desc_ctx(desc);
116
117 memcpy(out, sctx, sizeof(*sctx));
118 return 0;
119}
120
121static int ppc_md5_import(struct shash_desc *desc, const void *in)
122{
123 struct md5_state *sctx = shash_desc_ctx(desc);
124
125 memcpy(sctx, in, sizeof(*sctx));
126 return 0;
127}
128
129static struct shash_alg alg = {
130 .digestsize = MD5_DIGEST_SIZE,
131 .init = ppc_md5_init,
132 .update = ppc_md5_update,
133 .final = ppc_md5_final,
134 .export = ppc_md5_export,
135 .import = ppc_md5_import,
136 .descsize = sizeof(struct md5_state),
137 .statesize = sizeof(struct md5_state),
138 .base = {
139 .cra_name = "md5",
140 .cra_driver_name= "md5-ppc",
141 .cra_priority = 200,
142 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
143 .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
144 .cra_module = THIS_MODULE,
145 }
146};
147
148static int __init ppc_md5_mod_init(void)
149{
150 return crypto_register_shash(&alg);
151}
152
153static void __exit ppc_md5_mod_fini(void)
154{
155 crypto_unregister_shash(&alg);
156}
157
158module_init(ppc_md5_mod_init);
159module_exit(ppc_md5_mod_fini);
160
161MODULE_LICENSE("GPL");
162MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, PPC assembler");
163
164MODULE_ALIAS_CRYPTO("md5");
165MODULE_ALIAS_CRYPTO("md5-ppc");
diff --git a/arch/powerpc/crypto/sha1-spe-asm.S b/arch/powerpc/crypto/sha1-spe-asm.S
new file mode 100644
index 000000000000..fcb6cf002889
--- /dev/null
+++ b/arch/powerpc/crypto/sha1-spe-asm.S
@@ -0,0 +1,299 @@
1/*
2 * Fast SHA-1 implementation for SPE instruction set (PPC)
3 *
4 * This code makes use of the SPE SIMD instruction set as defined in
5 * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
6 * Implementation is based on optimization guide notes from
7 * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
8 *
9 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
10 *
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the Free
13 * Software Foundation; either version 2 of the License, or (at your option)
14 * any later version.
15 *
16 */
17
18#include <asm/ppc_asm.h>
19#include <asm/asm-offsets.h>
20
21#define rHP r3 /* pointer to hash value */
22#define rWP r4 /* pointer to input */
23#define rKP r5 /* pointer to constants */
24
25#define rW0 r14 /* 64 bit round words */
26#define rW1 r15
27#define rW2 r16
28#define rW3 r17
29#define rW4 r18
30#define rW5 r19
31#define rW6 r20
32#define rW7 r21
33
34#define rH0 r6 /* 32 bit hash values */
35#define rH1 r7
36#define rH2 r8
37#define rH3 r9
38#define rH4 r10
39
40#define rT0 r22 /* 64 bit temporary */
41#define rT1 r0 /* 32 bit temporaries */
42#define rT2 r11
43#define rT3 r12
44
45#define rK r23 /* 64 bit constant in volatile register */
46
47#define LOAD_K01
48
49#define LOAD_K11 \
50 evlwwsplat rK,0(rKP);
51
52#define LOAD_K21 \
53 evlwwsplat rK,4(rKP);
54
55#define LOAD_K31 \
56 evlwwsplat rK,8(rKP);
57
58#define LOAD_K41 \
59 evlwwsplat rK,12(rKP);
60
61#define INITIALIZE \
62 stwu r1,-128(r1); /* create stack frame */ \
63 evstdw r14,8(r1); /* We must save non volatile */ \
64 evstdw r15,16(r1); /* registers. Take the chance */ \
65 evstdw r16,24(r1); /* and save the SPE part too */ \
66 evstdw r17,32(r1); \
67 evstdw r18,40(r1); \
68 evstdw r19,48(r1); \
69 evstdw r20,56(r1); \
70 evstdw r21,64(r1); \
71 evstdw r22,72(r1); \
72 evstdw r23,80(r1);
73
74
75#define FINALIZE \
76 evldw r14,8(r1); /* restore SPE registers */ \
77 evldw r15,16(r1); \
78 evldw r16,24(r1); \
79 evldw r17,32(r1); \
80 evldw r18,40(r1); \
81 evldw r19,48(r1); \
82 evldw r20,56(r1); \
83 evldw r21,64(r1); \
84 evldw r22,72(r1); \
85 evldw r23,80(r1); \
86 xor r0,r0,r0; \
87 stw r0,8(r1); /* Delete sensitive data */ \
88 stw r0,16(r1); /* that we might have pushed */ \
89 stw r0,24(r1); /* from other context that runs */ \
90 stw r0,32(r1); /* the same code. Assume that */ \
91 stw r0,40(r1); /* the lower part of the GPRs */ \
92 stw r0,48(r1); /* were already overwritten on */ \
93 stw r0,56(r1); /* the way down to here */ \
94 stw r0,64(r1); \
95 stw r0,72(r1); \
96 stw r0,80(r1); \
97 addi r1,r1,128; /* cleanup stack frame */
98
99#ifdef __BIG_ENDIAN__
100#define LOAD_DATA(reg, off) \
101 lwz reg,off(rWP); /* load data */
102#define NEXT_BLOCK \
103 addi rWP,rWP,64; /* increment per block */
104#else
105#define LOAD_DATA(reg, off) \
106 lwbrx reg,0,rWP; /* load data */ \
107 addi rWP,rWP,4; /* increment per word */
108#define NEXT_BLOCK /* nothing to do */
109#endif
110
111#define R_00_15(a, b, c, d, e, w0, w1, k, off) \
112 LOAD_DATA(w0, off) /* 1: W */ \
113 and rT2,b,c; /* 1: F' = B and C */ \
114 LOAD_K##k##1 \
115 andc rT1,d,b; /* 1: F" = ~B and D */ \
116 rotrwi rT0,a,27; /* 1: A' = A rotl 5 */ \
117 or rT2,rT2,rT1; /* 1: F = F' or F" */ \
118 add e,e,rT0; /* 1: E = E + A' */ \
119 rotrwi b,b,2; /* 1: B = B rotl 30 */ \
120 add e,e,w0; /* 1: E = E + W */ \
121 LOAD_DATA(w1, off+4) /* 2: W */ \
122 add e,e,rT2; /* 1: E = E + F */ \
123 and rT1,a,b; /* 2: F' = B and C */ \
124 add e,e,rK; /* 1: E = E + K */ \
125 andc rT2,c,a; /* 2: F" = ~B and D */ \
126 add d,d,rK; /* 2: E = E + K */ \
127 or rT2,rT2,rT1; /* 2: F = F' or F" */ \
128 rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \
129 add d,d,w1; /* 2: E = E + W */ \
130 rotrwi a,a,2; /* 2: B = B rotl 30 */ \
131 add d,d,rT0; /* 2: E = E + A' */ \
132 evmergelo w1,w1,w0; /* mix W[0]/W[1] */ \
133 add d,d,rT2 /* 2: E = E + F */
134
135#define R_16_19(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
136 and rT2,b,c; /* 1: F' = B and C */ \
137 evmergelohi rT0,w7,w6; /* W[-3] */ \
138 andc rT1,d,b; /* 1: F" = ~B and D */ \
139 evxor w0,w0,rT0; /* W = W[-16] xor W[-3] */ \
140 or rT1,rT1,rT2; /* 1: F = F' or F" */ \
141 evxor w0,w0,w4; /* W = W xor W[-8] */ \
142 add e,e,rT1; /* 1: E = E + F */ \
143 evxor w0,w0,w1; /* W = W xor W[-14] */ \
144 rotrwi rT2,a,27; /* 1: A' = A rotl 5 */ \
145 evrlwi w0,w0,1; /* W = W rotl 1 */ \
146 add e,e,rT2; /* 1: E = E + A' */ \
147 evaddw rT0,w0,rK; /* WK = W + K */ \
148 rotrwi b,b,2; /* 1: B = B rotl 30 */ \
149 LOAD_K##k##1 \
150 evmergehi rT1,rT1,rT0; /* WK1/WK2 */ \
151 add e,e,rT0; /* 1: E = E + WK */ \
152 add d,d,rT1; /* 2: E = E + WK */ \
153 and rT2,a,b; /* 2: F' = B and C */ \
154 andc rT1,c,a; /* 2: F" = ~B and D */ \
155 rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \
156 or rT1,rT1,rT2; /* 2: F = F' or F" */ \
157 add d,d,rT0; /* 2: E = E + A' */ \
158 rotrwi a,a,2; /* 2: B = B rotl 30 */ \
159 add d,d,rT1 /* 2: E = E + F */
160
161#define R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
162 evmergelohi rT0,w7,w6; /* W[-3] */ \
163 xor rT2,b,c; /* 1: F' = B xor C */ \
164 evxor w0,w0,rT0; /* W = W[-16] xor W[-3] */ \
165 xor rT2,rT2,d; /* 1: F = F' xor D */ \
166 evxor w0,w0,w4; /* W = W xor W[-8] */ \
167 add e,e,rT2; /* 1: E = E + F */ \
168 evxor w0,w0,w1; /* W = W xor W[-14] */ \
169 rotrwi rT2,a,27; /* 1: A' = A rotl 5 */ \
170 evrlwi w0,w0,1; /* W = W rotl 1 */ \
171 add e,e,rT2; /* 1: E = E + A' */ \
172 evaddw rT0,w0,rK; /* WK = W + K */ \
173 rotrwi b,b,2; /* 1: B = B rotl 30 */ \
174 LOAD_K##k##1 \
175 evmergehi rT1,rT1,rT0; /* WK1/WK2 */ \
176 add e,e,rT0; /* 1: E = E + WK */ \
177 xor rT2,a,b; /* 2: F' = B xor C */ \
178 add d,d,rT1; /* 2: E = E + WK */ \
179 xor rT2,rT2,c; /* 2: F = F' xor D */ \
180 rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \
181 add d,d,rT2; /* 2: E = E + F */ \
182 rotrwi a,a,2; /* 2: B = B rotl 30 */ \
183 add d,d,rT0 /* 2: E = E + A' */
184
185#define R_40_59(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
186 and rT2,b,c; /* 1: F' = B and C */ \
187 evmergelohi rT0,w7,w6; /* W[-3] */ \
188 or rT1,b,c; /* 1: F" = B or C */ \
189 evxor w0,w0,rT0; /* W = W[-16] xor W[-3] */ \
190 and rT1,d,rT1; /* 1: F" = F" and D */ \
191 evxor w0,w0,w4; /* W = W xor W[-8] */ \
192 or rT2,rT2,rT1; /* 1: F = F' or F" */ \
193 evxor w0,w0,w1; /* W = W xor W[-14] */ \
194 add e,e,rT2; /* 1: E = E + F */ \
195 evrlwi w0,w0,1; /* W = W rotl 1 */ \
196 rotrwi rT2,a,27; /* 1: A' = A rotl 5 */ \
197 evaddw rT0,w0,rK; /* WK = W + K */ \
198 add e,e,rT2; /* 1: E = E + A' */ \
199 LOAD_K##k##1 \
200 evmergehi rT1,rT1,rT0; /* WK1/WK2 */ \
201 rotrwi b,b,2; /* 1: B = B rotl 30 */ \
202 add e,e,rT0; /* 1: E = E + WK */ \
203 and rT2,a,b; /* 2: F' = B and C */ \
204 or rT0,a,b; /* 2: F" = B or C */ \
205 add d,d,rT1; /* 2: E = E + WK */ \
206 and rT0,c,rT0; /* 2: F" = F" and D */ \
207 rotrwi a,a,2; /* 2: B = B rotl 30 */ \
208 or rT2,rT2,rT0; /* 2: F = F' or F" */ \
209 rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \
210 add d,d,rT2; /* 2: E = E + F */ \
211 add d,d,rT0 /* 2: E = E + A' */
212
213#define R_60_79(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
214 R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k)
215
216_GLOBAL(ppc_spe_sha1_transform)
217 INITIALIZE
218
219 lwz rH0,0(rHP)
220 lwz rH1,4(rHP)
221 mtctr r5
222 lwz rH2,8(rHP)
223 lis rKP,PPC_SPE_SHA1_K@h
224 lwz rH3,12(rHP)
225 ori rKP,rKP,PPC_SPE_SHA1_K@l
226 lwz rH4,16(rHP)
227
228ppc_spe_sha1_main:
229 R_00_15(rH0, rH1, rH2, rH3, rH4, rW1, rW0, 1, 0)
230 R_00_15(rH3, rH4, rH0, rH1, rH2, rW2, rW1, 0, 8)
231 R_00_15(rH1, rH2, rH3, rH4, rH0, rW3, rW2, 0, 16)
232 R_00_15(rH4, rH0, rH1, rH2, rH3, rW4, rW3, 0, 24)
233 R_00_15(rH2, rH3, rH4, rH0, rH1, rW5, rW4, 0, 32)
234 R_00_15(rH0, rH1, rH2, rH3, rH4, rW6, rW5, 0, 40)
235 R_00_15(rH3, rH4, rH0, rH1, rH2, rT3, rW6, 0, 48)
236 R_00_15(rH1, rH2, rH3, rH4, rH0, rT3, rW7, 0, 56)
237
238 R_16_19(rH4, rH0, rH1, rH2, rH3, rW0, rW1, rW4, rW6, rW7, 0)
239 R_16_19(rH2, rH3, rH4, rH0, rH1, rW1, rW2, rW5, rW7, rW0, 2)
240
241 R_20_39(rH0, rH1, rH2, rH3, rH4, rW2, rW3, rW6, rW0, rW1, 0)
242 R_20_39(rH3, rH4, rH0, rH1, rH2, rW3, rW4, rW7, rW1, rW2, 0)
243 R_20_39(rH1, rH2, rH3, rH4, rH0, rW4, rW5, rW0, rW2, rW3, 0)
244 R_20_39(rH4, rH0, rH1, rH2, rH3, rW5, rW6, rW1, rW3, rW4, 0)
245 R_20_39(rH2, rH3, rH4, rH0, rH1, rW6, rW7, rW2, rW4, rW5, 0)
246 R_20_39(rH0, rH1, rH2, rH3, rH4, rW7, rW0, rW3, rW5, rW6, 0)
247 R_20_39(rH3, rH4, rH0, rH1, rH2, rW0, rW1, rW4, rW6, rW7, 0)
248 R_20_39(rH1, rH2, rH3, rH4, rH0, rW1, rW2, rW5, rW7, rW0, 0)
249 R_20_39(rH4, rH0, rH1, rH2, rH3, rW2, rW3, rW6, rW0, rW1, 0)
250 R_20_39(rH2, rH3, rH4, rH0, rH1, rW3, rW4, rW7, rW1, rW2, 3)
251
252 R_40_59(rH0, rH1, rH2, rH3, rH4, rW4, rW5, rW0, rW2, rW3, 0)
253 R_40_59(rH3, rH4, rH0, rH1, rH2, rW5, rW6, rW1, rW3, rW4, 0)
254 R_40_59(rH1, rH2, rH3, rH4, rH0, rW6, rW7, rW2, rW4, rW5, 0)
255 R_40_59(rH4, rH0, rH1, rH2, rH3, rW7, rW0, rW3, rW5, rW6, 0)
256 R_40_59(rH2, rH3, rH4, rH0, rH1, rW0, rW1, rW4, rW6, rW7, 0)
257 R_40_59(rH0, rH1, rH2, rH3, rH4, rW1, rW2, rW5, rW7, rW0, 0)
258 R_40_59(rH3, rH4, rH0, rH1, rH2, rW2, rW3, rW6, rW0, rW1, 0)
259 R_40_59(rH1, rH2, rH3, rH4, rH0, rW3, rW4, rW7, rW1, rW2, 0)
260 R_40_59(rH4, rH0, rH1, rH2, rH3, rW4, rW5, rW0, rW2, rW3, 0)
261 R_40_59(rH2, rH3, rH4, rH0, rH1, rW5, rW6, rW1, rW3, rW4, 4)
262
263 R_60_79(rH0, rH1, rH2, rH3, rH4, rW6, rW7, rW2, rW4, rW5, 0)
264 R_60_79(rH3, rH4, rH0, rH1, rH2, rW7, rW0, rW3, rW5, rW6, 0)
265 R_60_79(rH1, rH2, rH3, rH4, rH0, rW0, rW1, rW4, rW6, rW7, 0)
266 R_60_79(rH4, rH0, rH1, rH2, rH3, rW1, rW2, rW5, rW7, rW0, 0)
267 R_60_79(rH2, rH3, rH4, rH0, rH1, rW2, rW3, rW6, rW0, rW1, 0)
268 R_60_79(rH0, rH1, rH2, rH3, rH4, rW3, rW4, rW7, rW1, rW2, 0)
269 R_60_79(rH3, rH4, rH0, rH1, rH2, rW4, rW5, rW0, rW2, rW3, 0)
270 lwz rT3,0(rHP)
271 R_60_79(rH1, rH2, rH3, rH4, rH0, rW5, rW6, rW1, rW3, rW4, 0)
272 lwz rW1,4(rHP)
273 R_60_79(rH4, rH0, rH1, rH2, rH3, rW6, rW7, rW2, rW4, rW5, 0)
274 lwz rW2,8(rHP)
275 R_60_79(rH2, rH3, rH4, rH0, rH1, rW7, rW0, rW3, rW5, rW6, 0)
276 lwz rW3,12(rHP)
277 NEXT_BLOCK
278 lwz rW4,16(rHP)
279
280 add rH0,rH0,rT3
281 stw rH0,0(rHP)
282 add rH1,rH1,rW1
283 stw rH1,4(rHP)
284 add rH2,rH2,rW2
285 stw rH2,8(rHP)
286 add rH3,rH3,rW3
287 stw rH3,12(rHP)
288 add rH4,rH4,rW4
289 stw rH4,16(rHP)
290
291 bdnz ppc_spe_sha1_main
292
293 FINALIZE
294 blr
295
296.data
297.align 4
298PPC_SPE_SHA1_K:
299 .long 0x5A827999,0x6ED9EBA1,0x8F1BBCDC,0xCA62C1D6
diff --git a/arch/powerpc/crypto/sha1-spe-glue.c b/arch/powerpc/crypto/sha1-spe-glue.c
new file mode 100644
index 000000000000..3e1d22212521
--- /dev/null
+++ b/arch/powerpc/crypto/sha1-spe-glue.c
@@ -0,0 +1,210 @@
1/*
2 * Glue code for SHA-1 implementation for SPE instructions (PPC)
3 *
4 * Based on generic implementation.
5 *
6 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the Free
10 * Software Foundation; either version 2 of the License, or (at your option)
11 * any later version.
12 *
13 */
14
15#include <crypto/internal/hash.h>
16#include <linux/init.h>
17#include <linux/module.h>
18#include <linux/mm.h>
19#include <linux/cryptohash.h>
20#include <linux/types.h>
21#include <crypto/sha.h>
22#include <asm/byteorder.h>
23#include <asm/switch_to.h>
24#include <linux/hardirq.h>
25
26/*
27 * MAX_BYTES defines the number of bytes that are allowed to be processed
28 * between preempt_disable() and preempt_enable(). SHA1 takes ~1000
29 * operations per 64 bytes. e500 cores can issue two arithmetic instructions
30 * per clock cycle using one 32/64 bit unit (SU1) and one 32 bit unit (SU2).
31 * Thus 2KB of input data will need an estimated maximum of 18,000 cycles.
32 * Headroom for cache misses included. Even with the low end model clocked
33 * at 667 MHz this equals to a critical time window of less than 27us.
34 *
35 */
36#define MAX_BYTES 2048
37
38extern void ppc_spe_sha1_transform(u32 *state, const u8 *src, u32 blocks);
39
40static void spe_begin(void)
41{
42 /* We just start SPE operations and will save SPE registers later. */
43 preempt_disable();
44 enable_kernel_spe();
45}
46
47static void spe_end(void)
48{
49 /* reenable preemption */
50 preempt_enable();
51}
52
53static inline void ppc_sha1_clear_context(struct sha1_state *sctx)
54{
55 int count = sizeof(struct sha1_state) >> 2;
56 u32 *ptr = (u32 *)sctx;
57
58 /* make sure we can clear the fast way */
59 BUILD_BUG_ON(sizeof(struct sha1_state) % 4);
60 do { *ptr++ = 0; } while (--count);
61}
62
63static int ppc_spe_sha1_init(struct shash_desc *desc)
64{
65 struct sha1_state *sctx = shash_desc_ctx(desc);
66
67 sctx->state[0] = SHA1_H0;
68 sctx->state[1] = SHA1_H1;
69 sctx->state[2] = SHA1_H2;
70 sctx->state[3] = SHA1_H3;
71 sctx->state[4] = SHA1_H4;
72 sctx->count = 0;
73
74 return 0;
75}
76
77static int ppc_spe_sha1_update(struct shash_desc *desc, const u8 *data,
78 unsigned int len)
79{
80 struct sha1_state *sctx = shash_desc_ctx(desc);
81 const unsigned int offset = sctx->count & 0x3f;
82 const unsigned int avail = 64 - offset;
83 unsigned int bytes;
84 const u8 *src = data;
85
86 if (avail > len) {
87 sctx->count += len;
88 memcpy((char *)sctx->buffer + offset, src, len);
89 return 0;
90 }
91
92 sctx->count += len;
93
94 if (offset) {
95 memcpy((char *)sctx->buffer + offset, src, avail);
96
97 spe_begin();
98 ppc_spe_sha1_transform(sctx->state, (const u8 *)sctx->buffer, 1);
99 spe_end();
100
101 len -= avail;
102 src += avail;
103 }
104
105 while (len > 63) {
106 bytes = (len > MAX_BYTES) ? MAX_BYTES : len;
107 bytes = bytes & ~0x3f;
108
109 spe_begin();
110 ppc_spe_sha1_transform(sctx->state, src, bytes >> 6);
111 spe_end();
112
113 src += bytes;
114 len -= bytes;
115 };
116
117 memcpy((char *)sctx->buffer, src, len);
118 return 0;
119}
120
121static int ppc_spe_sha1_final(struct shash_desc *desc, u8 *out)
122{
123 struct sha1_state *sctx = shash_desc_ctx(desc);
124 const unsigned int offset = sctx->count & 0x3f;
125 char *p = (char *)sctx->buffer + offset;
126 int padlen;
127 __be64 *pbits = (__be64 *)(((char *)&sctx->buffer) + 56);
128 __be32 *dst = (__be32 *)out;
129
130 padlen = 55 - offset;
131 *p++ = 0x80;
132
133 spe_begin();
134
135 if (padlen < 0) {
136 memset(p, 0x00, padlen + sizeof (u64));
137 ppc_spe_sha1_transform(sctx->state, sctx->buffer, 1);
138 p = (char *)sctx->buffer;
139 padlen = 56;
140 }
141
142 memset(p, 0, padlen);
143 *pbits = cpu_to_be64(sctx->count << 3);
144 ppc_spe_sha1_transform(sctx->state, sctx->buffer, 1);
145
146 spe_end();
147
148 dst[0] = cpu_to_be32(sctx->state[0]);
149 dst[1] = cpu_to_be32(sctx->state[1]);
150 dst[2] = cpu_to_be32(sctx->state[2]);
151 dst[3] = cpu_to_be32(sctx->state[3]);
152 dst[4] = cpu_to_be32(sctx->state[4]);
153
154 ppc_sha1_clear_context(sctx);
155 return 0;
156}
157
158static int ppc_spe_sha1_export(struct shash_desc *desc, void *out)
159{
160 struct sha1_state *sctx = shash_desc_ctx(desc);
161
162 memcpy(out, sctx, sizeof(*sctx));
163 return 0;
164}
165
166static int ppc_spe_sha1_import(struct shash_desc *desc, const void *in)
167{
168 struct sha1_state *sctx = shash_desc_ctx(desc);
169
170 memcpy(sctx, in, sizeof(*sctx));
171 return 0;
172}
173
174static struct shash_alg alg = {
175 .digestsize = SHA1_DIGEST_SIZE,
176 .init = ppc_spe_sha1_init,
177 .update = ppc_spe_sha1_update,
178 .final = ppc_spe_sha1_final,
179 .export = ppc_spe_sha1_export,
180 .import = ppc_spe_sha1_import,
181 .descsize = sizeof(struct sha1_state),
182 .statesize = sizeof(struct sha1_state),
183 .base = {
184 .cra_name = "sha1",
185 .cra_driver_name= "sha1-ppc-spe",
186 .cra_priority = 300,
187 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
188 .cra_blocksize = SHA1_BLOCK_SIZE,
189 .cra_module = THIS_MODULE,
190 }
191};
192
193static int __init ppc_spe_sha1_mod_init(void)
194{
195 return crypto_register_shash(&alg);
196}
197
198static void __exit ppc_spe_sha1_mod_fini(void)
199{
200 crypto_unregister_shash(&alg);
201}
202
203module_init(ppc_spe_sha1_mod_init);
204module_exit(ppc_spe_sha1_mod_fini);
205
206MODULE_LICENSE("GPL");
207MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, SPE optimized");
208
209MODULE_ALIAS_CRYPTO("sha1");
210MODULE_ALIAS_CRYPTO("sha1-ppc-spe");
diff --git a/arch/powerpc/crypto/sha256-spe-asm.S b/arch/powerpc/crypto/sha256-spe-asm.S
new file mode 100644
index 000000000000..2d10e4c08f03
--- /dev/null
+++ b/arch/powerpc/crypto/sha256-spe-asm.S
@@ -0,0 +1,323 @@
1/*
2 * Fast SHA-256 implementation for SPE instruction set (PPC)
3 *
4 * This code makes use of the SPE SIMD instruction set as defined in
5 * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
6 * Implementation is based on optimization guide notes from
7 * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
8 *
9 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
10 *
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the Free
13 * Software Foundation; either version 2 of the License, or (at your option)
14 * any later version.
15 *
16 */
17
18#include <asm/ppc_asm.h>
19#include <asm/asm-offsets.h>
20
21#define rHP r3 /* pointer to hash values in memory */
22#define rKP r24 /* pointer to round constants */
23#define rWP r4 /* pointer to input data */
24
25#define rH0 r5 /* 8 32 bit hash values in 8 registers */
26#define rH1 r6
27#define rH2 r7
28#define rH3 r8
29#define rH4 r9
30#define rH5 r10
31#define rH6 r11
32#define rH7 r12
33
34#define rW0 r14 /* 64 bit registers. 16 words in 8 registers */
35#define rW1 r15
36#define rW2 r16
37#define rW3 r17
38#define rW4 r18
39#define rW5 r19
40#define rW6 r20
41#define rW7 r21
42
43#define rT0 r22 /* 64 bit temporaries */
44#define rT1 r23
45#define rT2 r0 /* 32 bit temporaries */
46#define rT3 r25
47
48#define CMP_KN_LOOP
49#define CMP_KC_LOOP \
50 cmpwi rT1,0;
51
52#define INITIALIZE \
53 stwu r1,-128(r1); /* create stack frame */ \
54 evstdw r14,8(r1); /* We must save non volatile */ \
55 evstdw r15,16(r1); /* registers. Take the chance */ \
56 evstdw r16,24(r1); /* and save the SPE part too */ \
57 evstdw r17,32(r1); \
58 evstdw r18,40(r1); \
59 evstdw r19,48(r1); \
60 evstdw r20,56(r1); \
61 evstdw r21,64(r1); \
62 evstdw r22,72(r1); \
63 evstdw r23,80(r1); \
64 stw r24,88(r1); /* save normal registers */ \
65 stw r25,92(r1);
66
67
68#define FINALIZE \
69 evldw r14,8(r1); /* restore SPE registers */ \
70 evldw r15,16(r1); \
71 evldw r16,24(r1); \
72 evldw r17,32(r1); \
73 evldw r18,40(r1); \
74 evldw r19,48(r1); \
75 evldw r20,56(r1); \
76 evldw r21,64(r1); \
77 evldw r22,72(r1); \
78 evldw r23,80(r1); \
79 lwz r24,88(r1); /* restore normal registers */ \
80 lwz r25,92(r1); \
81 xor r0,r0,r0; \
82 stw r0,8(r1); /* Delete sensitive data */ \
83 stw r0,16(r1); /* that we might have pushed */ \
84 stw r0,24(r1); /* from other context that runs */ \
85 stw r0,32(r1); /* the same code. Assume that */ \
86 stw r0,40(r1); /* the lower part of the GPRs */ \
87 stw r0,48(r1); /* was already overwritten on */ \
88 stw r0,56(r1); /* the way down to here */ \
89 stw r0,64(r1); \
90 stw r0,72(r1); \
91 stw r0,80(r1); \
92 addi r1,r1,128; /* cleanup stack frame */
93
94#ifdef __BIG_ENDIAN__
95#define LOAD_DATA(reg, off) \
96 lwz reg,off(rWP); /* load data */
97#define NEXT_BLOCK \
98 addi rWP,rWP,64; /* increment per block */
99#else
100#define LOAD_DATA(reg, off) \
101 lwbrx reg,0,rWP; /* load data */ \
102 addi rWP,rWP,4; /* increment per word */
103#define NEXT_BLOCK /* nothing to do */
104#endif
105
106#define R_LOAD_W(a, b, c, d, e, f, g, h, w, off) \
107 LOAD_DATA(w, off) /* 1: W */ \
108 rotrwi rT0,e,6; /* 1: S1 = e rotr 6 */ \
109 rotrwi rT1,e,11; /* 1: S1' = e rotr 11 */ \
110 rotrwi rT2,e,25; /* 1: S1" = e rotr 25 */ \
111 xor rT0,rT0,rT1; /* 1: S1 = S1 xor S1' */ \
112 and rT3,e,f; /* 1: ch = e and f */ \
113 xor rT0,rT0,rT2; /* 1: S1 = S1 xor S1" */ \
114 andc rT1,g,e; /* 1: ch' = ~e and g */ \
115 lwz rT2,off(rKP); /* 1: K */ \
116 xor rT3,rT3,rT1; /* 1: ch = ch xor ch' */ \
117 add h,h,rT0; /* 1: temp1 = h + S1 */ \
118 add rT3,rT3,w; /* 1: temp1' = ch + w */ \
119 rotrwi rT0,a,2; /* 1: S0 = a rotr 2 */ \
120 add h,h,rT3; /* 1: temp1 = temp1 + temp1' */ \
121 rotrwi rT1,a,13; /* 1: S0' = a rotr 13 */ \
122 add h,h,rT2; /* 1: temp1 = temp1 + K */ \
123 rotrwi rT3,a,22; /* 1: S0" = a rotr 22 */ \
124 xor rT0,rT0,rT1; /* 1: S0 = S0 xor S0' */ \
125 add d,d,h; /* 1: d = d + temp1 */ \
126 xor rT3,rT0,rT3; /* 1: S0 = S0 xor S0" */ \
127 evmergelo w,w,w; /* shift W */ \
128 or rT2,a,b; /* 1: maj = a or b */ \
129 and rT1,a,b; /* 1: maj' = a and b */ \
130 and rT2,rT2,c; /* 1: maj = maj and c */ \
131 LOAD_DATA(w, off+4) /* 2: W */ \
132 or rT2,rT1,rT2; /* 1: maj = maj or maj' */ \
133 rotrwi rT0,d,6; /* 2: S1 = e rotr 6 */ \
134 add rT3,rT3,rT2; /* 1: temp2 = S0 + maj */ \
135 rotrwi rT1,d,11; /* 2: S1' = e rotr 11 */ \
136 add h,h,rT3; /* 1: h = temp1 + temp2 */ \
137 rotrwi rT2,d,25; /* 2: S1" = e rotr 25 */ \
138 xor rT0,rT0,rT1; /* 2: S1 = S1 xor S1' */ \
139 and rT3,d,e; /* 2: ch = e and f */ \
140 xor rT0,rT0,rT2; /* 2: S1 = S1 xor S1" */ \
141 andc rT1,f,d; /* 2: ch' = ~e and g */ \
142 lwz rT2,off+4(rKP); /* 2: K */ \
143 xor rT3,rT3,rT1; /* 2: ch = ch xor ch' */ \
144 add g,g,rT0; /* 2: temp1 = h + S1 */ \
145 add rT3,rT3,w; /* 2: temp1' = ch + w */ \
146 rotrwi rT0,h,2; /* 2: S0 = a rotr 2 */ \
147 add g,g,rT3; /* 2: temp1 = temp1 + temp1' */ \
148 rotrwi rT1,h,13; /* 2: S0' = a rotr 13 */ \
149 add g,g,rT2; /* 2: temp1 = temp1 + K */ \
150 rotrwi rT3,h,22; /* 2: S0" = a rotr 22 */ \
151 xor rT0,rT0,rT1; /* 2: S0 = S0 xor S0' */ \
152 or rT2,h,a; /* 2: maj = a or b */ \
153 xor rT3,rT0,rT3; /* 2: S0 = S0 xor S0" */ \
154 and rT1,h,a; /* 2: maj' = a and b */ \
155 and rT2,rT2,b; /* 2: maj = maj and c */ \
156 add c,c,g; /* 2: d = d + temp1 */ \
157 or rT2,rT1,rT2; /* 2: maj = maj or maj' */ \
158 add rT3,rT3,rT2; /* 2: temp2 = S0 + maj */ \
159 add g,g,rT3 /* 2: h = temp1 + temp2 */
160
161#define R_CALC_W(a, b, c, d, e, f, g, h, w0, w1, w4, w5, w7, k, off) \
162 rotrwi rT2,e,6; /* 1: S1 = e rotr 6 */ \
163 evmergelohi rT0,w0,w1; /* w[-15] */ \
164 rotrwi rT3,e,11; /* 1: S1' = e rotr 11 */ \
165 evsrwiu rT1,rT0,3; /* s0 = w[-15] >> 3 */ \
166 xor rT2,rT2,rT3; /* 1: S1 = S1 xor S1' */ \
167 evrlwi rT0,rT0,25; /* s0' = w[-15] rotr 7 */ \
168 rotrwi rT3,e,25; /* 1: S1' = e rotr 25 */ \
169 evxor rT1,rT1,rT0; /* s0 = s0 xor s0' */ \
170 xor rT2,rT2,rT3; /* 1: S1 = S1 xor S1' */ \
171 evrlwi rT0,rT0,21; /* s0' = w[-15] rotr 18 */ \
172 add h,h,rT2; /* 1: temp1 = h + S1 */ \
173 evxor rT0,rT0,rT1; /* s0 = s0 xor s0' */ \
174 and rT2,e,f; /* 1: ch = e and f */ \
175 evaddw w0,w0,rT0; /* w = w[-16] + s0 */ \
176 andc rT3,g,e; /* 1: ch' = ~e and g */ \
177 evsrwiu rT0,w7,10; /* s1 = w[-2] >> 10 */ \
178 xor rT2,rT2,rT3; /* 1: ch = ch xor ch' */ \
179 evrlwi rT1,w7,15; /* s1' = w[-2] rotr 17 */ \
180 add h,h,rT2; /* 1: temp1 = temp1 + ch */ \
181 evxor rT0,rT0,rT1; /* s1 = s1 xor s1' */ \
182 rotrwi rT2,a,2; /* 1: S0 = a rotr 2 */ \
183 evrlwi rT1,w7,13; /* s1' = w[-2] rotr 19 */ \
184 rotrwi rT3,a,13; /* 1: S0' = a rotr 13 */ \
185 evxor rT0,rT0,rT1; /* s1 = s1 xor s1' */ \
186 xor rT2,rT2,rT3; /* 1: S0 = S0 xor S0' */ \
187 evldw rT1,off(rKP); /* k */ \
188 rotrwi rT3,a,22; /* 1: S0' = a rotr 22 */ \
189 evaddw w0,w0,rT0; /* w = w + s1 */ \
190 xor rT2,rT2,rT3; /* 1: S0 = S0 xor S0' */ \
191 evmergelohi rT0,w4,w5; /* w[-7] */ \
192 and rT3,a,b; /* 1: maj = a and b */ \
193 evaddw w0,w0,rT0; /* w = w + w[-7] */ \
194 CMP_K##k##_LOOP \
195 add rT2,rT2,rT3; /* 1: temp2 = S0 + maj */ \
196 evaddw rT1,rT1,w0; /* wk = w + k */ \
197 xor rT3,a,b; /* 1: maj = a xor b */ \
198 evmergehi rT0,rT1,rT1; /* wk1/wk2 */ \
199 and rT3,rT3,c; /* 1: maj = maj and c */ \
200 add h,h,rT0; /* 1: temp1 = temp1 + wk */ \
201 add rT2,rT2,rT3; /* 1: temp2 = temp2 + maj */ \
202 add g,g,rT1; /* 2: temp1 = temp1 + wk */ \
203 add d,d,h; /* 1: d = d + temp1 */ \
204 rotrwi rT0,d,6; /* 2: S1 = e rotr 6 */ \
205 add h,h,rT2; /* 1: h = temp1 + temp2 */ \
206 rotrwi rT1,d,11; /* 2: S1' = e rotr 11 */ \
207 rotrwi rT2,d,25; /* 2: S" = e rotr 25 */ \
208 xor rT0,rT0,rT1; /* 2: S1 = S1 xor S1' */ \
209 and rT3,d,e; /* 2: ch = e and f */ \
210 xor rT0,rT0,rT2; /* 2: S1 = S1 xor S1" */ \
211 andc rT1,f,d; /* 2: ch' = ~e and g */ \
212 add g,g,rT0; /* 2: temp1 = h + S1 */ \
213 xor rT3,rT3,rT1; /* 2: ch = ch xor ch' */ \
214 rotrwi rT0,h,2; /* 2: S0 = a rotr 2 */ \
215 add g,g,rT3; /* 2: temp1 = temp1 + ch */ \
216 rotrwi rT1,h,13; /* 2: S0' = a rotr 13 */ \
217 rotrwi rT3,h,22; /* 2: S0" = a rotr 22 */ \
218 xor rT0,rT0,rT1; /* 2: S0 = S0 xor S0' */ \
219 or rT2,h,a; /* 2: maj = a or b */ \
220 and rT1,h,a; /* 2: maj' = a and b */ \
221 and rT2,rT2,b; /* 2: maj = maj and c */ \
222 xor rT3,rT0,rT3; /* 2: S0 = S0 xor S0" */ \
223 or rT2,rT1,rT2; /* 2: maj = maj or maj' */ \
224 add c,c,g; /* 2: d = d + temp1 */ \
225 add rT3,rT3,rT2; /* 2: temp2 = S0 + maj */ \
226 add g,g,rT3 /* 2: h = temp1 + temp2 */
227
228_GLOBAL(ppc_spe_sha256_transform)
229 INITIALIZE
230
231 mtctr r5
232 lwz rH0,0(rHP)
233 lwz rH1,4(rHP)
234 lwz rH2,8(rHP)
235 lwz rH3,12(rHP)
236 lwz rH4,16(rHP)
237 lwz rH5,20(rHP)
238 lwz rH6,24(rHP)
239 lwz rH7,28(rHP)
240
241ppc_spe_sha256_main:
242 lis rKP,PPC_SPE_SHA256_K@ha
243 addi rKP,rKP,PPC_SPE_SHA256_K@l
244
245 R_LOAD_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, rW0, 0)
246 R_LOAD_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, rW1, 8)
247 R_LOAD_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, rW2, 16)
248 R_LOAD_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, rW3, 24)
249 R_LOAD_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, rW4, 32)
250 R_LOAD_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, rW5, 40)
251 R_LOAD_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, rW6, 48)
252 R_LOAD_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, rW7, 56)
253ppc_spe_sha256_16_rounds:
254 addi rKP,rKP,64
255 R_CALC_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7,
256 rW0, rW1, rW4, rW5, rW7, N, 0)
257 R_CALC_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5,
258 rW1, rW2, rW5, rW6, rW0, N, 8)
259 R_CALC_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3,
260 rW2, rW3, rW6, rW7, rW1, N, 16)
261 R_CALC_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1,
262 rW3, rW4, rW7, rW0, rW2, N, 24)
263 R_CALC_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7,
264 rW4, rW5, rW0, rW1, rW3, N, 32)
265 R_CALC_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5,
266 rW5, rW6, rW1, rW2, rW4, N, 40)
267 R_CALC_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3,
268 rW6, rW7, rW2, rW3, rW5, N, 48)
269 R_CALC_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1,
270 rW7, rW0, rW3, rW4, rW6, C, 56)
271 bt gt,ppc_spe_sha256_16_rounds
272
273 lwz rW0,0(rHP)
274 NEXT_BLOCK
275 lwz rW1,4(rHP)
276 lwz rW2,8(rHP)
277 lwz rW3,12(rHP)
278 lwz rW4,16(rHP)
279 lwz rW5,20(rHP)
280 lwz rW6,24(rHP)
281 lwz rW7,28(rHP)
282
283 add rH0,rH0,rW0
284 stw rH0,0(rHP)
285 add rH1,rH1,rW1
286 stw rH1,4(rHP)
287 add rH2,rH2,rW2
288 stw rH2,8(rHP)
289 add rH3,rH3,rW3
290 stw rH3,12(rHP)
291 add rH4,rH4,rW4
292 stw rH4,16(rHP)
293 add rH5,rH5,rW5
294 stw rH5,20(rHP)
295 add rH6,rH6,rW6
296 stw rH6,24(rHP)
297 add rH7,rH7,rW7
298 stw rH7,28(rHP)
299
300 bdnz ppc_spe_sha256_main
301
302 FINALIZE
303 blr
304
305.data
306.align 5
307PPC_SPE_SHA256_K:
308 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
309 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
310 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
311 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
312 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
313 .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
314 .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
315 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
316 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
317 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
318 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
319 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
320 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
321 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
322 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
323 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
diff --git a/arch/powerpc/crypto/sha256-spe-glue.c b/arch/powerpc/crypto/sha256-spe-glue.c
new file mode 100644
index 000000000000..f4a616fe1a82
--- /dev/null
+++ b/arch/powerpc/crypto/sha256-spe-glue.c
@@ -0,0 +1,275 @@
1/*
2 * Glue code for SHA-256 implementation for SPE instructions (PPC)
3 *
4 * Based on generic implementation. The assembler module takes care
5 * about the SPE registers so it can run from interrupt context.
6 *
7 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License as published by the Free
11 * Software Foundation; either version 2 of the License, or (at your option)
12 * any later version.
13 *
14 */
15
16#include <crypto/internal/hash.h>
17#include <linux/init.h>
18#include <linux/module.h>
19#include <linux/mm.h>
20#include <linux/cryptohash.h>
21#include <linux/types.h>
22#include <crypto/sha.h>
23#include <asm/byteorder.h>
24#include <asm/switch_to.h>
25#include <linux/hardirq.h>
26
27/*
28 * MAX_BYTES defines the number of bytes that are allowed to be processed
29 * between preempt_disable() and preempt_enable(). SHA256 takes ~2,000
30 * operations per 64 bytes. e500 cores can issue two arithmetic instructions
31 * per clock cycle using one 32/64 bit unit (SU1) and one 32 bit unit (SU2).
32 * Thus 1KB of input data will need an estimated maximum of 18,000 cycles.
33 * Headroom for cache misses included. Even with the low end model clocked
34 * at 667 MHz this equals to a critical time window of less than 27us.
35 *
36 */
37#define MAX_BYTES 1024
38
39extern void ppc_spe_sha256_transform(u32 *state, const u8 *src, u32 blocks);
40
41static void spe_begin(void)
42{
43 /* We just start SPE operations and will save SPE registers later. */
44 preempt_disable();
45 enable_kernel_spe();
46}
47
48static void spe_end(void)
49{
50 /* reenable preemption */
51 preempt_enable();
52}
53
54static inline void ppc_sha256_clear_context(struct sha256_state *sctx)
55{
56 int count = sizeof(struct sha256_state) >> 2;
57 u32 *ptr = (u32 *)sctx;
58
59 /* make sure we can clear the fast way */
60 BUILD_BUG_ON(sizeof(struct sha256_state) % 4);
61 do { *ptr++ = 0; } while (--count);
62}
63
64static int ppc_spe_sha256_init(struct shash_desc *desc)
65{
66 struct sha256_state *sctx = shash_desc_ctx(desc);
67
68 sctx->state[0] = SHA256_H0;
69 sctx->state[1] = SHA256_H1;
70 sctx->state[2] = SHA256_H2;
71 sctx->state[3] = SHA256_H3;
72 sctx->state[4] = SHA256_H4;
73 sctx->state[5] = SHA256_H5;
74 sctx->state[6] = SHA256_H6;
75 sctx->state[7] = SHA256_H7;
76 sctx->count = 0;
77
78 return 0;
79}
80
81static int ppc_spe_sha224_init(struct shash_desc *desc)
82{
83 struct sha256_state *sctx = shash_desc_ctx(desc);
84
85 sctx->state[0] = SHA224_H0;
86 sctx->state[1] = SHA224_H1;
87 sctx->state[2] = SHA224_H2;
88 sctx->state[3] = SHA224_H3;
89 sctx->state[4] = SHA224_H4;
90 sctx->state[5] = SHA224_H5;
91 sctx->state[6] = SHA224_H6;
92 sctx->state[7] = SHA224_H7;
93 sctx->count = 0;
94
95 return 0;
96}
97
98static int ppc_spe_sha256_update(struct shash_desc *desc, const u8 *data,
99 unsigned int len)
100{
101 struct sha256_state *sctx = shash_desc_ctx(desc);
102 const unsigned int offset = sctx->count & 0x3f;
103 const unsigned int avail = 64 - offset;
104 unsigned int bytes;
105 const u8 *src = data;
106
107 if (avail > len) {
108 sctx->count += len;
109 memcpy((char *)sctx->buf + offset, src, len);
110 return 0;
111 }
112
113 sctx->count += len;
114
115 if (offset) {
116 memcpy((char *)sctx->buf + offset, src, avail);
117
118 spe_begin();
119 ppc_spe_sha256_transform(sctx->state, (const u8 *)sctx->buf, 1);
120 spe_end();
121
122 len -= avail;
123 src += avail;
124 }
125
126 while (len > 63) {
127 /* cut input data into smaller blocks */
128 bytes = (len > MAX_BYTES) ? MAX_BYTES : len;
129 bytes = bytes & ~0x3f;
130
131 spe_begin();
132 ppc_spe_sha256_transform(sctx->state, src, bytes >> 6);
133 spe_end();
134
135 src += bytes;
136 len -= bytes;
137 };
138
139 memcpy((char *)sctx->buf, src, len);
140 return 0;
141}
142
143static int ppc_spe_sha256_final(struct shash_desc *desc, u8 *out)
144{
145 struct sha256_state *sctx = shash_desc_ctx(desc);
146 const unsigned int offset = sctx->count & 0x3f;
147 char *p = (char *)sctx->buf + offset;
148 int padlen;
149 __be64 *pbits = (__be64 *)(((char *)&sctx->buf) + 56);
150 __be32 *dst = (__be32 *)out;
151
152 padlen = 55 - offset;
153 *p++ = 0x80;
154
155 spe_begin();
156
157 if (padlen < 0) {
158 memset(p, 0x00, padlen + sizeof (u64));
159 ppc_spe_sha256_transform(sctx->state, sctx->buf, 1);
160 p = (char *)sctx->buf;
161 padlen = 56;
162 }
163
164 memset(p, 0, padlen);
165 *pbits = cpu_to_be64(sctx->count << 3);
166 ppc_spe_sha256_transform(sctx->state, sctx->buf, 1);
167
168 spe_end();
169
170 dst[0] = cpu_to_be32(sctx->state[0]);
171 dst[1] = cpu_to_be32(sctx->state[1]);
172 dst[2] = cpu_to_be32(sctx->state[2]);
173 dst[3] = cpu_to_be32(sctx->state[3]);
174 dst[4] = cpu_to_be32(sctx->state[4]);
175 dst[5] = cpu_to_be32(sctx->state[5]);
176 dst[6] = cpu_to_be32(sctx->state[6]);
177 dst[7] = cpu_to_be32(sctx->state[7]);
178
179 ppc_sha256_clear_context(sctx);
180 return 0;
181}
182
183static int ppc_spe_sha224_final(struct shash_desc *desc, u8 *out)
184{
185 u32 D[SHA256_DIGEST_SIZE >> 2];
186 __be32 *dst = (__be32 *)out;
187
188 ppc_spe_sha256_final(desc, (u8 *)D);
189
190 /* avoid bytewise memcpy */
191 dst[0] = D[0];
192 dst[1] = D[1];
193 dst[2] = D[2];
194 dst[3] = D[3];
195 dst[4] = D[4];
196 dst[5] = D[5];
197 dst[6] = D[6];
198
199 /* clear sensitive data */
200 memzero_explicit(D, SHA256_DIGEST_SIZE);
201 return 0;
202}
203
204static int ppc_spe_sha256_export(struct shash_desc *desc, void *out)
205{
206 struct sha256_state *sctx = shash_desc_ctx(desc);
207
208 memcpy(out, sctx, sizeof(*sctx));
209 return 0;
210}
211
212static int ppc_spe_sha256_import(struct shash_desc *desc, const void *in)
213{
214 struct sha256_state *sctx = shash_desc_ctx(desc);
215
216 memcpy(sctx, in, sizeof(*sctx));
217 return 0;
218}
219
220static struct shash_alg algs[2] = { {
221 .digestsize = SHA256_DIGEST_SIZE,
222 .init = ppc_spe_sha256_init,
223 .update = ppc_spe_sha256_update,
224 .final = ppc_spe_sha256_final,
225 .export = ppc_spe_sha256_export,
226 .import = ppc_spe_sha256_import,
227 .descsize = sizeof(struct sha256_state),
228 .statesize = sizeof(struct sha256_state),
229 .base = {
230 .cra_name = "sha256",
231 .cra_driver_name= "sha256-ppc-spe",
232 .cra_priority = 300,
233 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
234 .cra_blocksize = SHA256_BLOCK_SIZE,
235 .cra_module = THIS_MODULE,
236 }
237}, {
238 .digestsize = SHA224_DIGEST_SIZE,
239 .init = ppc_spe_sha224_init,
240 .update = ppc_spe_sha256_update,
241 .final = ppc_spe_sha224_final,
242 .export = ppc_spe_sha256_export,
243 .import = ppc_spe_sha256_import,
244 .descsize = sizeof(struct sha256_state),
245 .statesize = sizeof(struct sha256_state),
246 .base = {
247 .cra_name = "sha224",
248 .cra_driver_name= "sha224-ppc-spe",
249 .cra_priority = 300,
250 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
251 .cra_blocksize = SHA224_BLOCK_SIZE,
252 .cra_module = THIS_MODULE,
253 }
254} };
255
256static int __init ppc_spe_sha256_mod_init(void)
257{
258 return crypto_register_shashes(algs, ARRAY_SIZE(algs));
259}
260
261static void __exit ppc_spe_sha256_mod_fini(void)
262{
263 crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
264}
265
266module_init(ppc_spe_sha256_mod_init);
267module_exit(ppc_spe_sha256_mod_fini);
268
269MODULE_LICENSE("GPL");
270MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm, SPE optimized");
271
272MODULE_ALIAS_CRYPTO("sha224");
273MODULE_ALIAS_CRYPTO("sha224-ppc-spe");
274MODULE_ALIAS_CRYPTO("sha256");
275MODULE_ALIAS_CRYPTO("sha256-ppc-spe");
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 54f60ab41c63..112cefacf2af 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -797,7 +797,9 @@ static int rfc4106_init(struct crypto_tfm *tfm)
797 PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN); 797 PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN);
798 struct crypto_aead *cryptd_child; 798 struct crypto_aead *cryptd_child;
799 struct aesni_rfc4106_gcm_ctx *child_ctx; 799 struct aesni_rfc4106_gcm_ctx *child_ctx;
800 cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni", 0, 0); 800 cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni",
801 CRYPTO_ALG_INTERNAL,
802 CRYPTO_ALG_INTERNAL);
801 if (IS_ERR(cryptd_tfm)) 803 if (IS_ERR(cryptd_tfm))
802 return PTR_ERR(cryptd_tfm); 804 return PTR_ERR(cryptd_tfm);
803 805
@@ -890,15 +892,12 @@ out_free_ablkcipher:
890 return ret; 892 return ret;
891} 893}
892 894
893static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, 895static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key,
894 unsigned int key_len) 896 unsigned int key_len)
895{ 897{
896 int ret = 0; 898 int ret = 0;
897 struct crypto_tfm *tfm = crypto_aead_tfm(parent); 899 struct crypto_tfm *tfm = crypto_aead_tfm(aead);
898 struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent); 900 struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(aead);
899 struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
900 struct aesni_rfc4106_gcm_ctx *child_ctx =
901 aesni_rfc4106_gcm_ctx_get(cryptd_child);
902 u8 *new_key_align, *new_key_mem = NULL; 901 u8 *new_key_align, *new_key_mem = NULL;
903 902
904 if (key_len < 4) { 903 if (key_len < 4) {
@@ -943,20 +942,31 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
943 goto exit; 942 goto exit;
944 } 943 }
945 ret = rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); 944 ret = rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len);
946 memcpy(child_ctx, ctx, sizeof(*ctx));
947exit: 945exit:
948 kfree(new_key_mem); 946 kfree(new_key_mem);
949 return ret; 947 return ret;
950} 948}
951 949
952/* This is the Integrity Check Value (aka the authentication tag length and can 950static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
953 * be 8, 12 or 16 bytes long. */ 951 unsigned int key_len)
954static int rfc4106_set_authsize(struct crypto_aead *parent,
955 unsigned int authsize)
956{ 952{
957 struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent); 953 struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent);
958 struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); 954 struct crypto_aead *child = cryptd_aead_child(ctx->cryptd_tfm);
955 struct aesni_rfc4106_gcm_ctx *c_ctx = aesni_rfc4106_gcm_ctx_get(child);
956 struct cryptd_aead *cryptd_tfm = ctx->cryptd_tfm;
957 int ret;
959 958
959 ret = crypto_aead_setkey(child, key, key_len);
960 if (!ret) {
961 memcpy(ctx, c_ctx, sizeof(*ctx));
962 ctx->cryptd_tfm = cryptd_tfm;
963 }
964 return ret;
965}
966
967static int common_rfc4106_set_authsize(struct crypto_aead *aead,
968 unsigned int authsize)
969{
960 switch (authsize) { 970 switch (authsize) {
961 case 8: 971 case 8:
962 case 12: 972 case 12:
@@ -965,51 +975,23 @@ static int rfc4106_set_authsize(struct crypto_aead *parent,
965 default: 975 default:
966 return -EINVAL; 976 return -EINVAL;
967 } 977 }
968 crypto_aead_crt(parent)->authsize = authsize; 978 crypto_aead_crt(aead)->authsize = authsize;
969 crypto_aead_crt(cryptd_child)->authsize = authsize;
970 return 0; 979 return 0;
971} 980}
972 981
973static int rfc4106_encrypt(struct aead_request *req) 982/* This is the Integrity Check Value (aka the authentication tag length and can
974{ 983 * be 8, 12 or 16 bytes long. */
975 int ret; 984static int rfc4106_set_authsize(struct crypto_aead *parent,
976 struct crypto_aead *tfm = crypto_aead_reqtfm(req); 985 unsigned int authsize)
977 struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
978
979 if (!irq_fpu_usable()) {
980 struct aead_request *cryptd_req =
981 (struct aead_request *) aead_request_ctx(req);
982 memcpy(cryptd_req, req, sizeof(*req));
983 aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
984 return crypto_aead_encrypt(cryptd_req);
985 } else {
986 struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
987 kernel_fpu_begin();
988 ret = cryptd_child->base.crt_aead.encrypt(req);
989 kernel_fpu_end();
990 return ret;
991 }
992}
993
994static int rfc4106_decrypt(struct aead_request *req)
995{ 986{
987 struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent);
988 struct crypto_aead *child = cryptd_aead_child(ctx->cryptd_tfm);
996 int ret; 989 int ret;
997 struct crypto_aead *tfm = crypto_aead_reqtfm(req);
998 struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
999 990
1000 if (!irq_fpu_usable()) { 991 ret = crypto_aead_setauthsize(child, authsize);
1001 struct aead_request *cryptd_req = 992 if (!ret)
1002 (struct aead_request *) aead_request_ctx(req); 993 crypto_aead_crt(parent)->authsize = authsize;
1003 memcpy(cryptd_req, req, sizeof(*req)); 994 return ret;
1004 aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
1005 return crypto_aead_decrypt(cryptd_req);
1006 } else {
1007 struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
1008 kernel_fpu_begin();
1009 ret = cryptd_child->base.crt_aead.decrypt(req);
1010 kernel_fpu_end();
1011 return ret;
1012 }
1013} 995}
1014 996
1015static int __driver_rfc4106_encrypt(struct aead_request *req) 997static int __driver_rfc4106_encrypt(struct aead_request *req)
@@ -1185,6 +1167,78 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
1185 } 1167 }
1186 return retval; 1168 return retval;
1187} 1169}
1170
1171static int rfc4106_encrypt(struct aead_request *req)
1172{
1173 int ret;
1174 struct crypto_aead *tfm = crypto_aead_reqtfm(req);
1175 struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
1176
1177 if (!irq_fpu_usable()) {
1178 struct aead_request *cryptd_req =
1179 (struct aead_request *) aead_request_ctx(req);
1180
1181 memcpy(cryptd_req, req, sizeof(*req));
1182 aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
1183 ret = crypto_aead_encrypt(cryptd_req);
1184 } else {
1185 kernel_fpu_begin();
1186 ret = __driver_rfc4106_encrypt(req);
1187 kernel_fpu_end();
1188 }
1189 return ret;
1190}
1191
1192static int rfc4106_decrypt(struct aead_request *req)
1193{
1194 int ret;
1195 struct crypto_aead *tfm = crypto_aead_reqtfm(req);
1196 struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
1197
1198 if (!irq_fpu_usable()) {
1199 struct aead_request *cryptd_req =
1200 (struct aead_request *) aead_request_ctx(req);
1201
1202 memcpy(cryptd_req, req, sizeof(*req));
1203 aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
1204 ret = crypto_aead_decrypt(cryptd_req);
1205 } else {
1206 kernel_fpu_begin();
1207 ret = __driver_rfc4106_decrypt(req);
1208 kernel_fpu_end();
1209 }
1210 return ret;
1211}
1212
1213static int helper_rfc4106_encrypt(struct aead_request *req)
1214{
1215 int ret;
1216
1217 if (unlikely(!irq_fpu_usable())) {
1218 WARN_ONCE(1, "__gcm-aes-aesni alg used in invalid context");
1219 ret = -EINVAL;
1220 } else {
1221 kernel_fpu_begin();
1222 ret = __driver_rfc4106_encrypt(req);
1223 kernel_fpu_end();
1224 }
1225 return ret;
1226}
1227
1228static int helper_rfc4106_decrypt(struct aead_request *req)
1229{
1230 int ret;
1231
1232 if (unlikely(!irq_fpu_usable())) {
1233 WARN_ONCE(1, "__gcm-aes-aesni alg used in invalid context");
1234 ret = -EINVAL;
1235 } else {
1236 kernel_fpu_begin();
1237 ret = __driver_rfc4106_decrypt(req);
1238 kernel_fpu_end();
1239 }
1240 return ret;
1241}
1188#endif 1242#endif
1189 1243
1190static struct crypto_alg aesni_algs[] = { { 1244static struct crypto_alg aesni_algs[] = { {
@@ -1210,7 +1264,7 @@ static struct crypto_alg aesni_algs[] = { {
1210 .cra_name = "__aes-aesni", 1264 .cra_name = "__aes-aesni",
1211 .cra_driver_name = "__driver-aes-aesni", 1265 .cra_driver_name = "__driver-aes-aesni",
1212 .cra_priority = 0, 1266 .cra_priority = 0,
1213 .cra_flags = CRYPTO_ALG_TYPE_CIPHER, 1267 .cra_flags = CRYPTO_ALG_TYPE_CIPHER | CRYPTO_ALG_INTERNAL,
1214 .cra_blocksize = AES_BLOCK_SIZE, 1268 .cra_blocksize = AES_BLOCK_SIZE,
1215 .cra_ctxsize = sizeof(struct crypto_aes_ctx) + 1269 .cra_ctxsize = sizeof(struct crypto_aes_ctx) +
1216 AESNI_ALIGN - 1, 1270 AESNI_ALIGN - 1,
@@ -1229,7 +1283,8 @@ static struct crypto_alg aesni_algs[] = { {
1229 .cra_name = "__ecb-aes-aesni", 1283 .cra_name = "__ecb-aes-aesni",
1230 .cra_driver_name = "__driver-ecb-aes-aesni", 1284 .cra_driver_name = "__driver-ecb-aes-aesni",
1231 .cra_priority = 0, 1285 .cra_priority = 0,
1232 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 1286 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
1287 CRYPTO_ALG_INTERNAL,
1233 .cra_blocksize = AES_BLOCK_SIZE, 1288 .cra_blocksize = AES_BLOCK_SIZE,
1234 .cra_ctxsize = sizeof(struct crypto_aes_ctx) + 1289 .cra_ctxsize = sizeof(struct crypto_aes_ctx) +
1235 AESNI_ALIGN - 1, 1290 AESNI_ALIGN - 1,
@@ -1249,7 +1304,8 @@ static struct crypto_alg aesni_algs[] = { {
1249 .cra_name = "__cbc-aes-aesni", 1304 .cra_name = "__cbc-aes-aesni",
1250 .cra_driver_name = "__driver-cbc-aes-aesni", 1305 .cra_driver_name = "__driver-cbc-aes-aesni",
1251 .cra_priority = 0, 1306 .cra_priority = 0,
1252 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 1307 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
1308 CRYPTO_ALG_INTERNAL,
1253 .cra_blocksize = AES_BLOCK_SIZE, 1309 .cra_blocksize = AES_BLOCK_SIZE,
1254 .cra_ctxsize = sizeof(struct crypto_aes_ctx) + 1310 .cra_ctxsize = sizeof(struct crypto_aes_ctx) +
1255 AESNI_ALIGN - 1, 1311 AESNI_ALIGN - 1,
@@ -1313,7 +1369,8 @@ static struct crypto_alg aesni_algs[] = { {
1313 .cra_name = "__ctr-aes-aesni", 1369 .cra_name = "__ctr-aes-aesni",
1314 .cra_driver_name = "__driver-ctr-aes-aesni", 1370 .cra_driver_name = "__driver-ctr-aes-aesni",
1315 .cra_priority = 0, 1371 .cra_priority = 0,
1316 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 1372 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
1373 CRYPTO_ALG_INTERNAL,
1317 .cra_blocksize = 1, 1374 .cra_blocksize = 1,
1318 .cra_ctxsize = sizeof(struct crypto_aes_ctx) + 1375 .cra_ctxsize = sizeof(struct crypto_aes_ctx) +
1319 AESNI_ALIGN - 1, 1376 AESNI_ALIGN - 1,
@@ -1357,7 +1414,7 @@ static struct crypto_alg aesni_algs[] = { {
1357 .cra_name = "__gcm-aes-aesni", 1414 .cra_name = "__gcm-aes-aesni",
1358 .cra_driver_name = "__driver-gcm-aes-aesni", 1415 .cra_driver_name = "__driver-gcm-aes-aesni",
1359 .cra_priority = 0, 1416 .cra_priority = 0,
1360 .cra_flags = CRYPTO_ALG_TYPE_AEAD, 1417 .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_INTERNAL,
1361 .cra_blocksize = 1, 1418 .cra_blocksize = 1,
1362 .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + 1419 .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) +
1363 AESNI_ALIGN, 1420 AESNI_ALIGN,
@@ -1366,8 +1423,12 @@ static struct crypto_alg aesni_algs[] = { {
1366 .cra_module = THIS_MODULE, 1423 .cra_module = THIS_MODULE,
1367 .cra_u = { 1424 .cra_u = {
1368 .aead = { 1425 .aead = {
1369 .encrypt = __driver_rfc4106_encrypt, 1426 .setkey = common_rfc4106_set_key,
1370 .decrypt = __driver_rfc4106_decrypt, 1427 .setauthsize = common_rfc4106_set_authsize,
1428 .encrypt = helper_rfc4106_encrypt,
1429 .decrypt = helper_rfc4106_decrypt,
1430 .ivsize = 8,
1431 .maxauthsize = 16,
1371 }, 1432 },
1372 }, 1433 },
1373}, { 1434}, {
@@ -1423,7 +1484,8 @@ static struct crypto_alg aesni_algs[] = { {
1423 .cra_name = "__lrw-aes-aesni", 1484 .cra_name = "__lrw-aes-aesni",
1424 .cra_driver_name = "__driver-lrw-aes-aesni", 1485 .cra_driver_name = "__driver-lrw-aes-aesni",
1425 .cra_priority = 0, 1486 .cra_priority = 0,
1426 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 1487 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
1488 CRYPTO_ALG_INTERNAL,
1427 .cra_blocksize = AES_BLOCK_SIZE, 1489 .cra_blocksize = AES_BLOCK_SIZE,
1428 .cra_ctxsize = sizeof(struct aesni_lrw_ctx), 1490 .cra_ctxsize = sizeof(struct aesni_lrw_ctx),
1429 .cra_alignmask = 0, 1491 .cra_alignmask = 0,
@@ -1444,7 +1506,8 @@ static struct crypto_alg aesni_algs[] = { {
1444 .cra_name = "__xts-aes-aesni", 1506 .cra_name = "__xts-aes-aesni",
1445 .cra_driver_name = "__driver-xts-aes-aesni", 1507 .cra_driver_name = "__driver-xts-aes-aesni",
1446 .cra_priority = 0, 1508 .cra_priority = 0,
1447 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 1509 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
1510 CRYPTO_ALG_INTERNAL,
1448 .cra_blocksize = AES_BLOCK_SIZE, 1511 .cra_blocksize = AES_BLOCK_SIZE,
1449 .cra_ctxsize = sizeof(struct aesni_xts_ctx), 1512 .cra_ctxsize = sizeof(struct aesni_xts_ctx),
1450 .cra_alignmask = 0, 1513 .cra_alignmask = 0,
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c
index 9a07fafe3831..baf0ac21ace5 100644
--- a/arch/x86/crypto/camellia_aesni_avx2_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c
@@ -343,7 +343,8 @@ static struct crypto_alg cmll_algs[10] = { {
343 .cra_name = "__ecb-camellia-aesni-avx2", 343 .cra_name = "__ecb-camellia-aesni-avx2",
344 .cra_driver_name = "__driver-ecb-camellia-aesni-avx2", 344 .cra_driver_name = "__driver-ecb-camellia-aesni-avx2",
345 .cra_priority = 0, 345 .cra_priority = 0,
346 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 346 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
347 CRYPTO_ALG_INTERNAL,
347 .cra_blocksize = CAMELLIA_BLOCK_SIZE, 348 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
348 .cra_ctxsize = sizeof(struct camellia_ctx), 349 .cra_ctxsize = sizeof(struct camellia_ctx),
349 .cra_alignmask = 0, 350 .cra_alignmask = 0,
@@ -362,7 +363,8 @@ static struct crypto_alg cmll_algs[10] = { {
362 .cra_name = "__cbc-camellia-aesni-avx2", 363 .cra_name = "__cbc-camellia-aesni-avx2",
363 .cra_driver_name = "__driver-cbc-camellia-aesni-avx2", 364 .cra_driver_name = "__driver-cbc-camellia-aesni-avx2",
364 .cra_priority = 0, 365 .cra_priority = 0,
365 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 366 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
367 CRYPTO_ALG_INTERNAL,
366 .cra_blocksize = CAMELLIA_BLOCK_SIZE, 368 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
367 .cra_ctxsize = sizeof(struct camellia_ctx), 369 .cra_ctxsize = sizeof(struct camellia_ctx),
368 .cra_alignmask = 0, 370 .cra_alignmask = 0,
@@ -381,7 +383,8 @@ static struct crypto_alg cmll_algs[10] = { {
381 .cra_name = "__ctr-camellia-aesni-avx2", 383 .cra_name = "__ctr-camellia-aesni-avx2",
382 .cra_driver_name = "__driver-ctr-camellia-aesni-avx2", 384 .cra_driver_name = "__driver-ctr-camellia-aesni-avx2",
383 .cra_priority = 0, 385 .cra_priority = 0,
384 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 386 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
387 CRYPTO_ALG_INTERNAL,
385 .cra_blocksize = 1, 388 .cra_blocksize = 1,
386 .cra_ctxsize = sizeof(struct camellia_ctx), 389 .cra_ctxsize = sizeof(struct camellia_ctx),
387 .cra_alignmask = 0, 390 .cra_alignmask = 0,
@@ -401,7 +404,8 @@ static struct crypto_alg cmll_algs[10] = { {
401 .cra_name = "__lrw-camellia-aesni-avx2", 404 .cra_name = "__lrw-camellia-aesni-avx2",
402 .cra_driver_name = "__driver-lrw-camellia-aesni-avx2", 405 .cra_driver_name = "__driver-lrw-camellia-aesni-avx2",
403 .cra_priority = 0, 406 .cra_priority = 0,
404 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 407 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
408 CRYPTO_ALG_INTERNAL,
405 .cra_blocksize = CAMELLIA_BLOCK_SIZE, 409 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
406 .cra_ctxsize = sizeof(struct camellia_lrw_ctx), 410 .cra_ctxsize = sizeof(struct camellia_lrw_ctx),
407 .cra_alignmask = 0, 411 .cra_alignmask = 0,
@@ -424,7 +428,8 @@ static struct crypto_alg cmll_algs[10] = { {
424 .cra_name = "__xts-camellia-aesni-avx2", 428 .cra_name = "__xts-camellia-aesni-avx2",
425 .cra_driver_name = "__driver-xts-camellia-aesni-avx2", 429 .cra_driver_name = "__driver-xts-camellia-aesni-avx2",
426 .cra_priority = 0, 430 .cra_priority = 0,
427 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 431 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
432 CRYPTO_ALG_INTERNAL,
428 .cra_blocksize = CAMELLIA_BLOCK_SIZE, 433 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
429 .cra_ctxsize = sizeof(struct camellia_xts_ctx), 434 .cra_ctxsize = sizeof(struct camellia_xts_ctx),
430 .cra_alignmask = 0, 435 .cra_alignmask = 0,
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
index ed38d959add6..78818a1e73e3 100644
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -335,7 +335,8 @@ static struct crypto_alg cmll_algs[10] = { {
335 .cra_name = "__ecb-camellia-aesni", 335 .cra_name = "__ecb-camellia-aesni",
336 .cra_driver_name = "__driver-ecb-camellia-aesni", 336 .cra_driver_name = "__driver-ecb-camellia-aesni",
337 .cra_priority = 0, 337 .cra_priority = 0,
338 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 338 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
339 CRYPTO_ALG_INTERNAL,
339 .cra_blocksize = CAMELLIA_BLOCK_SIZE, 340 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
340 .cra_ctxsize = sizeof(struct camellia_ctx), 341 .cra_ctxsize = sizeof(struct camellia_ctx),
341 .cra_alignmask = 0, 342 .cra_alignmask = 0,
@@ -354,7 +355,8 @@ static struct crypto_alg cmll_algs[10] = { {
354 .cra_name = "__cbc-camellia-aesni", 355 .cra_name = "__cbc-camellia-aesni",
355 .cra_driver_name = "__driver-cbc-camellia-aesni", 356 .cra_driver_name = "__driver-cbc-camellia-aesni",
356 .cra_priority = 0, 357 .cra_priority = 0,
357 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 358 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
359 CRYPTO_ALG_INTERNAL,
358 .cra_blocksize = CAMELLIA_BLOCK_SIZE, 360 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
359 .cra_ctxsize = sizeof(struct camellia_ctx), 361 .cra_ctxsize = sizeof(struct camellia_ctx),
360 .cra_alignmask = 0, 362 .cra_alignmask = 0,
@@ -373,7 +375,8 @@ static struct crypto_alg cmll_algs[10] = { {
373 .cra_name = "__ctr-camellia-aesni", 375 .cra_name = "__ctr-camellia-aesni",
374 .cra_driver_name = "__driver-ctr-camellia-aesni", 376 .cra_driver_name = "__driver-ctr-camellia-aesni",
375 .cra_priority = 0, 377 .cra_priority = 0,
376 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 378 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
379 CRYPTO_ALG_INTERNAL,
377 .cra_blocksize = 1, 380 .cra_blocksize = 1,
378 .cra_ctxsize = sizeof(struct camellia_ctx), 381 .cra_ctxsize = sizeof(struct camellia_ctx),
379 .cra_alignmask = 0, 382 .cra_alignmask = 0,
@@ -393,7 +396,8 @@ static struct crypto_alg cmll_algs[10] = { {
393 .cra_name = "__lrw-camellia-aesni", 396 .cra_name = "__lrw-camellia-aesni",
394 .cra_driver_name = "__driver-lrw-camellia-aesni", 397 .cra_driver_name = "__driver-lrw-camellia-aesni",
395 .cra_priority = 0, 398 .cra_priority = 0,
396 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 399 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
400 CRYPTO_ALG_INTERNAL,
397 .cra_blocksize = CAMELLIA_BLOCK_SIZE, 401 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
398 .cra_ctxsize = sizeof(struct camellia_lrw_ctx), 402 .cra_ctxsize = sizeof(struct camellia_lrw_ctx),
399 .cra_alignmask = 0, 403 .cra_alignmask = 0,
@@ -416,7 +420,8 @@ static struct crypto_alg cmll_algs[10] = { {
416 .cra_name = "__xts-camellia-aesni", 420 .cra_name = "__xts-camellia-aesni",
417 .cra_driver_name = "__driver-xts-camellia-aesni", 421 .cra_driver_name = "__driver-xts-camellia-aesni",
418 .cra_priority = 0, 422 .cra_priority = 0,
419 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 423 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
424 CRYPTO_ALG_INTERNAL,
420 .cra_blocksize = CAMELLIA_BLOCK_SIZE, 425 .cra_blocksize = CAMELLIA_BLOCK_SIZE,
421 .cra_ctxsize = sizeof(struct camellia_xts_ctx), 426 .cra_ctxsize = sizeof(struct camellia_xts_ctx),
422 .cra_alignmask = 0, 427 .cra_alignmask = 0,
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
index 60ada677a928..236c80974457 100644
--- a/arch/x86/crypto/cast5_avx_glue.c
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -341,7 +341,8 @@ static struct crypto_alg cast5_algs[6] = { {
341 .cra_name = "__ecb-cast5-avx", 341 .cra_name = "__ecb-cast5-avx",
342 .cra_driver_name = "__driver-ecb-cast5-avx", 342 .cra_driver_name = "__driver-ecb-cast5-avx",
343 .cra_priority = 0, 343 .cra_priority = 0,
344 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 344 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
345 CRYPTO_ALG_INTERNAL,
345 .cra_blocksize = CAST5_BLOCK_SIZE, 346 .cra_blocksize = CAST5_BLOCK_SIZE,
346 .cra_ctxsize = sizeof(struct cast5_ctx), 347 .cra_ctxsize = sizeof(struct cast5_ctx),
347 .cra_alignmask = 0, 348 .cra_alignmask = 0,
@@ -360,7 +361,8 @@ static struct crypto_alg cast5_algs[6] = { {
360 .cra_name = "__cbc-cast5-avx", 361 .cra_name = "__cbc-cast5-avx",
361 .cra_driver_name = "__driver-cbc-cast5-avx", 362 .cra_driver_name = "__driver-cbc-cast5-avx",
362 .cra_priority = 0, 363 .cra_priority = 0,
363 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 364 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
365 CRYPTO_ALG_INTERNAL,
364 .cra_blocksize = CAST5_BLOCK_SIZE, 366 .cra_blocksize = CAST5_BLOCK_SIZE,
365 .cra_ctxsize = sizeof(struct cast5_ctx), 367 .cra_ctxsize = sizeof(struct cast5_ctx),
366 .cra_alignmask = 0, 368 .cra_alignmask = 0,
@@ -379,7 +381,8 @@ static struct crypto_alg cast5_algs[6] = { {
379 .cra_name = "__ctr-cast5-avx", 381 .cra_name = "__ctr-cast5-avx",
380 .cra_driver_name = "__driver-ctr-cast5-avx", 382 .cra_driver_name = "__driver-ctr-cast5-avx",
381 .cra_priority = 0, 383 .cra_priority = 0,
382 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 384 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
385 CRYPTO_ALG_INTERNAL,
383 .cra_blocksize = 1, 386 .cra_blocksize = 1,
384 .cra_ctxsize = sizeof(struct cast5_ctx), 387 .cra_ctxsize = sizeof(struct cast5_ctx),
385 .cra_alignmask = 0, 388 .cra_alignmask = 0,
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
index 0160f68a57ff..f448810ca4ac 100644
--- a/arch/x86/crypto/cast6_avx_glue.c
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -372,7 +372,8 @@ static struct crypto_alg cast6_algs[10] = { {
372 .cra_name = "__ecb-cast6-avx", 372 .cra_name = "__ecb-cast6-avx",
373 .cra_driver_name = "__driver-ecb-cast6-avx", 373 .cra_driver_name = "__driver-ecb-cast6-avx",
374 .cra_priority = 0, 374 .cra_priority = 0,
375 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 375 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
376 CRYPTO_ALG_INTERNAL,
376 .cra_blocksize = CAST6_BLOCK_SIZE, 377 .cra_blocksize = CAST6_BLOCK_SIZE,
377 .cra_ctxsize = sizeof(struct cast6_ctx), 378 .cra_ctxsize = sizeof(struct cast6_ctx),
378 .cra_alignmask = 0, 379 .cra_alignmask = 0,
@@ -391,7 +392,8 @@ static struct crypto_alg cast6_algs[10] = { {
391 .cra_name = "__cbc-cast6-avx", 392 .cra_name = "__cbc-cast6-avx",
392 .cra_driver_name = "__driver-cbc-cast6-avx", 393 .cra_driver_name = "__driver-cbc-cast6-avx",
393 .cra_priority = 0, 394 .cra_priority = 0,
394 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 395 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
396 CRYPTO_ALG_INTERNAL,
395 .cra_blocksize = CAST6_BLOCK_SIZE, 397 .cra_blocksize = CAST6_BLOCK_SIZE,
396 .cra_ctxsize = sizeof(struct cast6_ctx), 398 .cra_ctxsize = sizeof(struct cast6_ctx),
397 .cra_alignmask = 0, 399 .cra_alignmask = 0,
@@ -410,7 +412,8 @@ static struct crypto_alg cast6_algs[10] = { {
410 .cra_name = "__ctr-cast6-avx", 412 .cra_name = "__ctr-cast6-avx",
411 .cra_driver_name = "__driver-ctr-cast6-avx", 413 .cra_driver_name = "__driver-ctr-cast6-avx",
412 .cra_priority = 0, 414 .cra_priority = 0,
413 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 415 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
416 CRYPTO_ALG_INTERNAL,
414 .cra_blocksize = 1, 417 .cra_blocksize = 1,
415 .cra_ctxsize = sizeof(struct cast6_ctx), 418 .cra_ctxsize = sizeof(struct cast6_ctx),
416 .cra_alignmask = 0, 419 .cra_alignmask = 0,
@@ -430,7 +433,8 @@ static struct crypto_alg cast6_algs[10] = { {
430 .cra_name = "__lrw-cast6-avx", 433 .cra_name = "__lrw-cast6-avx",
431 .cra_driver_name = "__driver-lrw-cast6-avx", 434 .cra_driver_name = "__driver-lrw-cast6-avx",
432 .cra_priority = 0, 435 .cra_priority = 0,
433 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 436 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
437 CRYPTO_ALG_INTERNAL,
434 .cra_blocksize = CAST6_BLOCK_SIZE, 438 .cra_blocksize = CAST6_BLOCK_SIZE,
435 .cra_ctxsize = sizeof(struct cast6_lrw_ctx), 439 .cra_ctxsize = sizeof(struct cast6_lrw_ctx),
436 .cra_alignmask = 0, 440 .cra_alignmask = 0,
@@ -453,7 +457,8 @@ static struct crypto_alg cast6_algs[10] = { {
453 .cra_name = "__xts-cast6-avx", 457 .cra_name = "__xts-cast6-avx",
454 .cra_driver_name = "__driver-xts-cast6-avx", 458 .cra_driver_name = "__driver-xts-cast6-avx",
455 .cra_priority = 0, 459 .cra_priority = 0,
456 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 460 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
461 CRYPTO_ALG_INTERNAL,
457 .cra_blocksize = CAST6_BLOCK_SIZE, 462 .cra_blocksize = CAST6_BLOCK_SIZE,
458 .cra_ctxsize = sizeof(struct cast6_xts_ctx), 463 .cra_ctxsize = sizeof(struct cast6_xts_ctx),
459 .cra_alignmask = 0, 464 .cra_alignmask = 0,
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index 8253d85aa165..2079baf06bdd 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -154,7 +154,8 @@ static struct shash_alg ghash_alg = {
154 .cra_name = "__ghash", 154 .cra_name = "__ghash",
155 .cra_driver_name = "__ghash-pclmulqdqni", 155 .cra_driver_name = "__ghash-pclmulqdqni",
156 .cra_priority = 0, 156 .cra_priority = 0,
157 .cra_flags = CRYPTO_ALG_TYPE_SHASH, 157 .cra_flags = CRYPTO_ALG_TYPE_SHASH |
158 CRYPTO_ALG_INTERNAL,
158 .cra_blocksize = GHASH_BLOCK_SIZE, 159 .cra_blocksize = GHASH_BLOCK_SIZE,
159 .cra_ctxsize = sizeof(struct ghash_ctx), 160 .cra_ctxsize = sizeof(struct ghash_ctx),
160 .cra_module = THIS_MODULE, 161 .cra_module = THIS_MODULE,
@@ -261,7 +262,9 @@ static int ghash_async_init_tfm(struct crypto_tfm *tfm)
261 struct cryptd_ahash *cryptd_tfm; 262 struct cryptd_ahash *cryptd_tfm;
262 struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); 263 struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
263 264
264 cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni", 0, 0); 265 cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni",
266 CRYPTO_ALG_INTERNAL,
267 CRYPTO_ALG_INTERNAL);
265 if (IS_ERR(cryptd_tfm)) 268 if (IS_ERR(cryptd_tfm))
266 return PTR_ERR(cryptd_tfm); 269 return PTR_ERR(cryptd_tfm);
267 ctx->cryptd_tfm = cryptd_tfm; 270 ctx->cryptd_tfm = cryptd_tfm;
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
index 432f1d76ceb8..6a85598931b5 100644
--- a/arch/x86/crypto/glue_helper.c
+++ b/arch/x86/crypto/glue_helper.c
@@ -232,7 +232,6 @@ static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr,
232 232
233 le128_to_be128((be128 *)walk->iv, &ctrblk); 233 le128_to_be128((be128 *)walk->iv, &ctrblk);
234} 234}
235EXPORT_SYMBOL_GPL(glue_ctr_crypt_final_128bit);
236 235
237static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, 236static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
238 struct blkcipher_desc *desc, 237 struct blkcipher_desc *desc,
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c
index 437e47a4d302..2f63dc89e7a9 100644
--- a/arch/x86/crypto/serpent_avx2_glue.c
+++ b/arch/x86/crypto/serpent_avx2_glue.c
@@ -309,7 +309,8 @@ static struct crypto_alg srp_algs[10] = { {
309 .cra_name = "__ecb-serpent-avx2", 309 .cra_name = "__ecb-serpent-avx2",
310 .cra_driver_name = "__driver-ecb-serpent-avx2", 310 .cra_driver_name = "__driver-ecb-serpent-avx2",
311 .cra_priority = 0, 311 .cra_priority = 0,
312 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 312 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
313 CRYPTO_ALG_INTERNAL,
313 .cra_blocksize = SERPENT_BLOCK_SIZE, 314 .cra_blocksize = SERPENT_BLOCK_SIZE,
314 .cra_ctxsize = sizeof(struct serpent_ctx), 315 .cra_ctxsize = sizeof(struct serpent_ctx),
315 .cra_alignmask = 0, 316 .cra_alignmask = 0,
@@ -329,7 +330,8 @@ static struct crypto_alg srp_algs[10] = { {
329 .cra_name = "__cbc-serpent-avx2", 330 .cra_name = "__cbc-serpent-avx2",
330 .cra_driver_name = "__driver-cbc-serpent-avx2", 331 .cra_driver_name = "__driver-cbc-serpent-avx2",
331 .cra_priority = 0, 332 .cra_priority = 0,
332 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 333 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
334 CRYPTO_ALG_INTERNAL,
333 .cra_blocksize = SERPENT_BLOCK_SIZE, 335 .cra_blocksize = SERPENT_BLOCK_SIZE,
334 .cra_ctxsize = sizeof(struct serpent_ctx), 336 .cra_ctxsize = sizeof(struct serpent_ctx),
335 .cra_alignmask = 0, 337 .cra_alignmask = 0,
@@ -349,7 +351,8 @@ static struct crypto_alg srp_algs[10] = { {
349 .cra_name = "__ctr-serpent-avx2", 351 .cra_name = "__ctr-serpent-avx2",
350 .cra_driver_name = "__driver-ctr-serpent-avx2", 352 .cra_driver_name = "__driver-ctr-serpent-avx2",
351 .cra_priority = 0, 353 .cra_priority = 0,
352 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 354 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
355 CRYPTO_ALG_INTERNAL,
353 .cra_blocksize = 1, 356 .cra_blocksize = 1,
354 .cra_ctxsize = sizeof(struct serpent_ctx), 357 .cra_ctxsize = sizeof(struct serpent_ctx),
355 .cra_alignmask = 0, 358 .cra_alignmask = 0,
@@ -370,7 +373,8 @@ static struct crypto_alg srp_algs[10] = { {
370 .cra_name = "__lrw-serpent-avx2", 373 .cra_name = "__lrw-serpent-avx2",
371 .cra_driver_name = "__driver-lrw-serpent-avx2", 374 .cra_driver_name = "__driver-lrw-serpent-avx2",
372 .cra_priority = 0, 375 .cra_priority = 0,
373 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 376 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
377 CRYPTO_ALG_INTERNAL,
374 .cra_blocksize = SERPENT_BLOCK_SIZE, 378 .cra_blocksize = SERPENT_BLOCK_SIZE,
375 .cra_ctxsize = sizeof(struct serpent_lrw_ctx), 379 .cra_ctxsize = sizeof(struct serpent_lrw_ctx),
376 .cra_alignmask = 0, 380 .cra_alignmask = 0,
@@ -394,7 +398,8 @@ static struct crypto_alg srp_algs[10] = { {
394 .cra_name = "__xts-serpent-avx2", 398 .cra_name = "__xts-serpent-avx2",
395 .cra_driver_name = "__driver-xts-serpent-avx2", 399 .cra_driver_name = "__driver-xts-serpent-avx2",
396 .cra_priority = 0, 400 .cra_priority = 0,
397 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 401 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
402 CRYPTO_ALG_INTERNAL,
398 .cra_blocksize = SERPENT_BLOCK_SIZE, 403 .cra_blocksize = SERPENT_BLOCK_SIZE,
399 .cra_ctxsize = sizeof(struct serpent_xts_ctx), 404 .cra_ctxsize = sizeof(struct serpent_xts_ctx),
400 .cra_alignmask = 0, 405 .cra_alignmask = 0,
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
index 7e217398b4eb..c8d478af8456 100644
--- a/arch/x86/crypto/serpent_avx_glue.c
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -378,7 +378,8 @@ static struct crypto_alg serpent_algs[10] = { {
378 .cra_name = "__ecb-serpent-avx", 378 .cra_name = "__ecb-serpent-avx",
379 .cra_driver_name = "__driver-ecb-serpent-avx", 379 .cra_driver_name = "__driver-ecb-serpent-avx",
380 .cra_priority = 0, 380 .cra_priority = 0,
381 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 381 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
382 CRYPTO_ALG_INTERNAL,
382 .cra_blocksize = SERPENT_BLOCK_SIZE, 383 .cra_blocksize = SERPENT_BLOCK_SIZE,
383 .cra_ctxsize = sizeof(struct serpent_ctx), 384 .cra_ctxsize = sizeof(struct serpent_ctx),
384 .cra_alignmask = 0, 385 .cra_alignmask = 0,
@@ -397,7 +398,8 @@ static struct crypto_alg serpent_algs[10] = { {
397 .cra_name = "__cbc-serpent-avx", 398 .cra_name = "__cbc-serpent-avx",
398 .cra_driver_name = "__driver-cbc-serpent-avx", 399 .cra_driver_name = "__driver-cbc-serpent-avx",
399 .cra_priority = 0, 400 .cra_priority = 0,
400 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 401 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
402 CRYPTO_ALG_INTERNAL,
401 .cra_blocksize = SERPENT_BLOCK_SIZE, 403 .cra_blocksize = SERPENT_BLOCK_SIZE,
402 .cra_ctxsize = sizeof(struct serpent_ctx), 404 .cra_ctxsize = sizeof(struct serpent_ctx),
403 .cra_alignmask = 0, 405 .cra_alignmask = 0,
@@ -416,7 +418,8 @@ static struct crypto_alg serpent_algs[10] = { {
416 .cra_name = "__ctr-serpent-avx", 418 .cra_name = "__ctr-serpent-avx",
417 .cra_driver_name = "__driver-ctr-serpent-avx", 419 .cra_driver_name = "__driver-ctr-serpent-avx",
418 .cra_priority = 0, 420 .cra_priority = 0,
419 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 421 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
422 CRYPTO_ALG_INTERNAL,
420 .cra_blocksize = 1, 423 .cra_blocksize = 1,
421 .cra_ctxsize = sizeof(struct serpent_ctx), 424 .cra_ctxsize = sizeof(struct serpent_ctx),
422 .cra_alignmask = 0, 425 .cra_alignmask = 0,
@@ -436,7 +439,8 @@ static struct crypto_alg serpent_algs[10] = { {
436 .cra_name = "__lrw-serpent-avx", 439 .cra_name = "__lrw-serpent-avx",
437 .cra_driver_name = "__driver-lrw-serpent-avx", 440 .cra_driver_name = "__driver-lrw-serpent-avx",
438 .cra_priority = 0, 441 .cra_priority = 0,
439 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 442 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
443 CRYPTO_ALG_INTERNAL,
440 .cra_blocksize = SERPENT_BLOCK_SIZE, 444 .cra_blocksize = SERPENT_BLOCK_SIZE,
441 .cra_ctxsize = sizeof(struct serpent_lrw_ctx), 445 .cra_ctxsize = sizeof(struct serpent_lrw_ctx),
442 .cra_alignmask = 0, 446 .cra_alignmask = 0,
@@ -459,7 +463,8 @@ static struct crypto_alg serpent_algs[10] = { {
459 .cra_name = "__xts-serpent-avx", 463 .cra_name = "__xts-serpent-avx",
460 .cra_driver_name = "__driver-xts-serpent-avx", 464 .cra_driver_name = "__driver-xts-serpent-avx",
461 .cra_priority = 0, 465 .cra_priority = 0,
462 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 466 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
467 CRYPTO_ALG_INTERNAL,
463 .cra_blocksize = SERPENT_BLOCK_SIZE, 468 .cra_blocksize = SERPENT_BLOCK_SIZE,
464 .cra_ctxsize = sizeof(struct serpent_xts_ctx), 469 .cra_ctxsize = sizeof(struct serpent_xts_ctx),
465 .cra_alignmask = 0, 470 .cra_alignmask = 0,
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index bf025adaea01..3643dd508f45 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -387,7 +387,8 @@ static struct crypto_alg serpent_algs[10] = { {
387 .cra_name = "__ecb-serpent-sse2", 387 .cra_name = "__ecb-serpent-sse2",
388 .cra_driver_name = "__driver-ecb-serpent-sse2", 388 .cra_driver_name = "__driver-ecb-serpent-sse2",
389 .cra_priority = 0, 389 .cra_priority = 0,
390 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 390 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
391 CRYPTO_ALG_INTERNAL,
391 .cra_blocksize = SERPENT_BLOCK_SIZE, 392 .cra_blocksize = SERPENT_BLOCK_SIZE,
392 .cra_ctxsize = sizeof(struct serpent_ctx), 393 .cra_ctxsize = sizeof(struct serpent_ctx),
393 .cra_alignmask = 0, 394 .cra_alignmask = 0,
@@ -406,7 +407,8 @@ static struct crypto_alg serpent_algs[10] = { {
406 .cra_name = "__cbc-serpent-sse2", 407 .cra_name = "__cbc-serpent-sse2",
407 .cra_driver_name = "__driver-cbc-serpent-sse2", 408 .cra_driver_name = "__driver-cbc-serpent-sse2",
408 .cra_priority = 0, 409 .cra_priority = 0,
409 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 410 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
411 CRYPTO_ALG_INTERNAL,
410 .cra_blocksize = SERPENT_BLOCK_SIZE, 412 .cra_blocksize = SERPENT_BLOCK_SIZE,
411 .cra_ctxsize = sizeof(struct serpent_ctx), 413 .cra_ctxsize = sizeof(struct serpent_ctx),
412 .cra_alignmask = 0, 414 .cra_alignmask = 0,
@@ -425,7 +427,8 @@ static struct crypto_alg serpent_algs[10] = { {
425 .cra_name = "__ctr-serpent-sse2", 427 .cra_name = "__ctr-serpent-sse2",
426 .cra_driver_name = "__driver-ctr-serpent-sse2", 428 .cra_driver_name = "__driver-ctr-serpent-sse2",
427 .cra_priority = 0, 429 .cra_priority = 0,
428 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 430 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
431 CRYPTO_ALG_INTERNAL,
429 .cra_blocksize = 1, 432 .cra_blocksize = 1,
430 .cra_ctxsize = sizeof(struct serpent_ctx), 433 .cra_ctxsize = sizeof(struct serpent_ctx),
431 .cra_alignmask = 0, 434 .cra_alignmask = 0,
@@ -445,7 +448,8 @@ static struct crypto_alg serpent_algs[10] = { {
445 .cra_name = "__lrw-serpent-sse2", 448 .cra_name = "__lrw-serpent-sse2",
446 .cra_driver_name = "__driver-lrw-serpent-sse2", 449 .cra_driver_name = "__driver-lrw-serpent-sse2",
447 .cra_priority = 0, 450 .cra_priority = 0,
448 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 451 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
452 CRYPTO_ALG_INTERNAL,
449 .cra_blocksize = SERPENT_BLOCK_SIZE, 453 .cra_blocksize = SERPENT_BLOCK_SIZE,
450 .cra_ctxsize = sizeof(struct serpent_lrw_ctx), 454 .cra_ctxsize = sizeof(struct serpent_lrw_ctx),
451 .cra_alignmask = 0, 455 .cra_alignmask = 0,
@@ -468,7 +472,8 @@ static struct crypto_alg serpent_algs[10] = { {
468 .cra_name = "__xts-serpent-sse2", 472 .cra_name = "__xts-serpent-sse2",
469 .cra_driver_name = "__driver-xts-serpent-sse2", 473 .cra_driver_name = "__driver-xts-serpent-sse2",
470 .cra_priority = 0, 474 .cra_priority = 0,
471 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 475 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
476 CRYPTO_ALG_INTERNAL,
472 .cra_blocksize = SERPENT_BLOCK_SIZE, 477 .cra_blocksize = SERPENT_BLOCK_SIZE,
473 .cra_ctxsize = sizeof(struct serpent_xts_ctx), 478 .cra_ctxsize = sizeof(struct serpent_xts_ctx),
474 .cra_alignmask = 0, 479 .cra_alignmask = 0,
diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c
index fd9f6b035b16..e510b1c5d690 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb.c
+++ b/arch/x86/crypto/sha-mb/sha1_mb.c
@@ -694,7 +694,8 @@ static struct shash_alg sha1_mb_shash_alg = {
694 * use ASYNC flag as some buffers in multi-buffer 694 * use ASYNC flag as some buffers in multi-buffer
695 * algo may not have completed before hashing thread sleep 695 * algo may not have completed before hashing thread sleep
696 */ 696 */
697 .cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_ASYNC, 697 .cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_ASYNC |
698 CRYPTO_ALG_INTERNAL,
698 .cra_blocksize = SHA1_BLOCK_SIZE, 699 .cra_blocksize = SHA1_BLOCK_SIZE,
699 .cra_module = THIS_MODULE, 700 .cra_module = THIS_MODULE,
700 .cra_list = LIST_HEAD_INIT(sha1_mb_shash_alg.base.cra_list), 701 .cra_list = LIST_HEAD_INIT(sha1_mb_shash_alg.base.cra_list),
@@ -770,7 +771,9 @@ static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm)
770 struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm); 771 struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
771 struct mcryptd_hash_ctx *mctx; 772 struct mcryptd_hash_ctx *mctx;
772 773
773 mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb", 0, 0); 774 mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb",
775 CRYPTO_ALG_INTERNAL,
776 CRYPTO_ALG_INTERNAL);
774 if (IS_ERR(mcryptd_tfm)) 777 if (IS_ERR(mcryptd_tfm))
775 return PTR_ERR(mcryptd_tfm); 778 return PTR_ERR(mcryptd_tfm);
776 mctx = crypto_ahash_ctx(&mcryptd_tfm->base); 779 mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
@@ -828,7 +831,7 @@ static unsigned long sha1_mb_flusher(struct mcryptd_alg_cstate *cstate)
828 while (!list_empty(&cstate->work_list)) { 831 while (!list_empty(&cstate->work_list)) {
829 rctx = list_entry(cstate->work_list.next, 832 rctx = list_entry(cstate->work_list.next,
830 struct mcryptd_hash_request_ctx, waiter); 833 struct mcryptd_hash_request_ctx, waiter);
831 if time_before(cur_time, rctx->tag.expire) 834 if (time_before(cur_time, rctx->tag.expire))
832 break; 835 break;
833 kernel_fpu_begin(); 836 kernel_fpu_begin();
834 sha_ctx = (struct sha1_hash_ctx *) sha1_ctx_mgr_flush(cstate->mgr); 837 sha_ctx = (struct sha1_hash_ctx *) sha1_ctx_mgr_flush(cstate->mgr);
diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c b/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c
index 4ca7e166a2aa..822acb5b464c 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c
+++ b/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c
@@ -56,7 +56,7 @@
56void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state) 56void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state)
57{ 57{
58 unsigned int j; 58 unsigned int j;
59 state->unused_lanes = 0xF76543210; 59 state->unused_lanes = 0xF76543210ULL;
60 for (j = 0; j < 8; j++) { 60 for (j = 0; j < 8; j++) {
61 state->lens[j] = 0xFFFFFFFF; 61 state->lens[j] = 0xFFFFFFFF;
62 state->ldata[j].job_in_lane = NULL; 62 state->ldata[j].job_in_lane = NULL;
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index 6c20fe04a738..33d1b9dc14cc 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -28,7 +28,7 @@
28#include <linux/cryptohash.h> 28#include <linux/cryptohash.h>
29#include <linux/types.h> 29#include <linux/types.h>
30#include <crypto/sha.h> 30#include <crypto/sha.h>
31#include <asm/byteorder.h> 31#include <crypto/sha1_base.h>
32#include <asm/i387.h> 32#include <asm/i387.h>
33#include <asm/xcr.h> 33#include <asm/xcr.h>
34#include <asm/xsave.h> 34#include <asm/xsave.h>
@@ -44,132 +44,51 @@ asmlinkage void sha1_transform_avx(u32 *digest, const char *data,
44#define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */ 44#define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */
45 45
46asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, 46asmlinkage void sha1_transform_avx2(u32 *digest, const char *data,
47 unsigned int rounds); 47 unsigned int rounds);
48#endif 48#endif
49 49
50static asmlinkage void (*sha1_transform_asm)(u32 *, const char *, unsigned int); 50static void (*sha1_transform_asm)(u32 *, const char *, unsigned int);
51
52
53static int sha1_ssse3_init(struct shash_desc *desc)
54{
55 struct sha1_state *sctx = shash_desc_ctx(desc);
56
57 *sctx = (struct sha1_state){
58 .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
59 };
60
61 return 0;
62}
63
64static int __sha1_ssse3_update(struct shash_desc *desc, const u8 *data,
65 unsigned int len, unsigned int partial)
66{
67 struct sha1_state *sctx = shash_desc_ctx(desc);
68 unsigned int done = 0;
69
70 sctx->count += len;
71
72 if (partial) {
73 done = SHA1_BLOCK_SIZE - partial;
74 memcpy(sctx->buffer + partial, data, done);
75 sha1_transform_asm(sctx->state, sctx->buffer, 1);
76 }
77
78 if (len - done >= SHA1_BLOCK_SIZE) {
79 const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
80
81 sha1_transform_asm(sctx->state, data + done, rounds);
82 done += rounds * SHA1_BLOCK_SIZE;
83 }
84
85 memcpy(sctx->buffer, data + done, len - done);
86
87 return 0;
88}
89 51
90static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data, 52static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data,
91 unsigned int len) 53 unsigned int len)
92{ 54{
93 struct sha1_state *sctx = shash_desc_ctx(desc); 55 struct sha1_state *sctx = shash_desc_ctx(desc);
94 unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
95 int res;
96 56
97 /* Handle the fast case right here */ 57 if (!irq_fpu_usable() ||
98 if (partial + len < SHA1_BLOCK_SIZE) { 58 (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
99 sctx->count += len; 59 return crypto_sha1_update(desc, data, len);
100 memcpy(sctx->buffer + partial, data, len);
101 60
102 return 0; 61 /* make sure casting to sha1_block_fn() is safe */
103 } 62 BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0);
104 63
105 if (!irq_fpu_usable()) { 64 kernel_fpu_begin();
106 res = crypto_sha1_update(desc, data, len); 65 sha1_base_do_update(desc, data, len,
107 } else { 66 (sha1_block_fn *)sha1_transform_asm);
108 kernel_fpu_begin(); 67 kernel_fpu_end();
109 res = __sha1_ssse3_update(desc, data, len, partial);
110 kernel_fpu_end();
111 }
112
113 return res;
114}
115
116
117/* Add padding and return the message digest. */
118static int sha1_ssse3_final(struct shash_desc *desc, u8 *out)
119{
120 struct sha1_state *sctx = shash_desc_ctx(desc);
121 unsigned int i, index, padlen;
122 __be32 *dst = (__be32 *)out;
123 __be64 bits;
124 static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
125
126 bits = cpu_to_be64(sctx->count << 3);
127
128 /* Pad out to 56 mod 64 and append length */
129 index = sctx->count % SHA1_BLOCK_SIZE;
130 padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
131 if (!irq_fpu_usable()) {
132 crypto_sha1_update(desc, padding, padlen);
133 crypto_sha1_update(desc, (const u8 *)&bits, sizeof(bits));
134 } else {
135 kernel_fpu_begin();
136 /* We need to fill a whole block for __sha1_ssse3_update() */
137 if (padlen <= 56) {
138 sctx->count += padlen;
139 memcpy(sctx->buffer + index, padding, padlen);
140 } else {
141 __sha1_ssse3_update(desc, padding, padlen, index);
142 }
143 __sha1_ssse3_update(desc, (const u8 *)&bits, sizeof(bits), 56);
144 kernel_fpu_end();
145 }
146
147 /* Store state in digest */
148 for (i = 0; i < 5; i++)
149 dst[i] = cpu_to_be32(sctx->state[i]);
150
151 /* Wipe context */
152 memset(sctx, 0, sizeof(*sctx));
153 68
154 return 0; 69 return 0;
155} 70}
156 71
157static int sha1_ssse3_export(struct shash_desc *desc, void *out) 72static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data,
73 unsigned int len, u8 *out)
158{ 74{
159 struct sha1_state *sctx = shash_desc_ctx(desc); 75 if (!irq_fpu_usable())
76 return crypto_sha1_finup(desc, data, len, out);
160 77
161 memcpy(out, sctx, sizeof(*sctx)); 78 kernel_fpu_begin();
79 if (len)
80 sha1_base_do_update(desc, data, len,
81 (sha1_block_fn *)sha1_transform_asm);
82 sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_transform_asm);
83 kernel_fpu_end();
162 84
163 return 0; 85 return sha1_base_finish(desc, out);
164} 86}
165 87
166static int sha1_ssse3_import(struct shash_desc *desc, const void *in) 88/* Add padding and return the message digest. */
89static int sha1_ssse3_final(struct shash_desc *desc, u8 *out)
167{ 90{
168 struct sha1_state *sctx = shash_desc_ctx(desc); 91 return sha1_ssse3_finup(desc, NULL, 0, out);
169
170 memcpy(sctx, in, sizeof(*sctx));
171
172 return 0;
173} 92}
174 93
175#ifdef CONFIG_AS_AVX2 94#ifdef CONFIG_AS_AVX2
@@ -186,13 +105,11 @@ static void sha1_apply_transform_avx2(u32 *digest, const char *data,
186 105
187static struct shash_alg alg = { 106static struct shash_alg alg = {
188 .digestsize = SHA1_DIGEST_SIZE, 107 .digestsize = SHA1_DIGEST_SIZE,
189 .init = sha1_ssse3_init, 108 .init = sha1_base_init,
190 .update = sha1_ssse3_update, 109 .update = sha1_ssse3_update,
191 .final = sha1_ssse3_final, 110 .final = sha1_ssse3_final,
192 .export = sha1_ssse3_export, 111 .finup = sha1_ssse3_finup,
193 .import = sha1_ssse3_import,
194 .descsize = sizeof(struct sha1_state), 112 .descsize = sizeof(struct sha1_state),
195 .statesize = sizeof(struct sha1_state),
196 .base = { 113 .base = {
197 .cra_name = "sha1", 114 .cra_name = "sha1",
198 .cra_driver_name= "sha1-ssse3", 115 .cra_driver_name= "sha1-ssse3",
diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S
index 642f15687a0a..92b3b5d75ba9 100644
--- a/arch/x86/crypto/sha256-avx-asm.S
+++ b/arch/x86/crypto/sha256-avx-asm.S
@@ -96,10 +96,10 @@ SHUF_DC00 = %xmm12 # shuffle xDxC -> DC00
96BYTE_FLIP_MASK = %xmm13 96BYTE_FLIP_MASK = %xmm13
97 97
98NUM_BLKS = %rdx # 3rd arg 98NUM_BLKS = %rdx # 3rd arg
99CTX = %rsi # 2nd arg 99INP = %rsi # 2nd arg
100INP = %rdi # 1st arg 100CTX = %rdi # 1st arg
101 101
102SRND = %rdi # clobbers INP 102SRND = %rsi # clobbers INP
103c = %ecx 103c = %ecx
104d = %r8d 104d = %r8d
105e = %edx 105e = %edx
@@ -342,8 +342,8 @@ a = TMP_
342 342
343######################################################################## 343########################################################################
344## void sha256_transform_avx(void *input_data, UINT32 digest[8], UINT64 num_blks) 344## void sha256_transform_avx(void *input_data, UINT32 digest[8], UINT64 num_blks)
345## arg 1 : pointer to input data 345## arg 1 : pointer to digest
346## arg 2 : pointer to digest 346## arg 2 : pointer to input data
347## arg 3 : Num blocks 347## arg 3 : Num blocks
348######################################################################## 348########################################################################
349.text 349.text
diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S
index 9e86944c539d..570ec5ec62d7 100644
--- a/arch/x86/crypto/sha256-avx2-asm.S
+++ b/arch/x86/crypto/sha256-avx2-asm.S
@@ -91,12 +91,12 @@ BYTE_FLIP_MASK = %ymm13
91X_BYTE_FLIP_MASK = %xmm13 # XMM version of BYTE_FLIP_MASK 91X_BYTE_FLIP_MASK = %xmm13 # XMM version of BYTE_FLIP_MASK
92 92
93NUM_BLKS = %rdx # 3rd arg 93NUM_BLKS = %rdx # 3rd arg
94CTX = %rsi # 2nd arg 94INP = %rsi # 2nd arg
95INP = %rdi # 1st arg 95CTX = %rdi # 1st arg
96c = %ecx 96c = %ecx
97d = %r8d 97d = %r8d
98e = %edx # clobbers NUM_BLKS 98e = %edx # clobbers NUM_BLKS
99y3 = %edi # clobbers INP 99y3 = %esi # clobbers INP
100 100
101 101
102TBL = %rbp 102TBL = %rbp
@@ -523,8 +523,8 @@ STACK_SIZE = _RSP + _RSP_SIZE
523 523
524######################################################################## 524########################################################################
525## void sha256_transform_rorx(void *input_data, UINT32 digest[8], UINT64 num_blks) 525## void sha256_transform_rorx(void *input_data, UINT32 digest[8], UINT64 num_blks)
526## arg 1 : pointer to input data 526## arg 1 : pointer to digest
527## arg 2 : pointer to digest 527## arg 2 : pointer to input data
528## arg 3 : Num blocks 528## arg 3 : Num blocks
529######################################################################## 529########################################################################
530.text 530.text
diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S
index f833b74d902b..2cedc44e8121 100644
--- a/arch/x86/crypto/sha256-ssse3-asm.S
+++ b/arch/x86/crypto/sha256-ssse3-asm.S
@@ -88,10 +88,10 @@ SHUF_DC00 = %xmm11 # shuffle xDxC -> DC00
88BYTE_FLIP_MASK = %xmm12 88BYTE_FLIP_MASK = %xmm12
89 89
90NUM_BLKS = %rdx # 3rd arg 90NUM_BLKS = %rdx # 3rd arg
91CTX = %rsi # 2nd arg 91INP = %rsi # 2nd arg
92INP = %rdi # 1st arg 92CTX = %rdi # 1st arg
93 93
94SRND = %rdi # clobbers INP 94SRND = %rsi # clobbers INP
95c = %ecx 95c = %ecx
96d = %r8d 96d = %r8d
97e = %edx 97e = %edx
@@ -348,8 +348,8 @@ a = TMP_
348 348
349######################################################################## 349########################################################################
350## void sha256_transform_ssse3(void *input_data, UINT32 digest[8], UINT64 num_blks) 350## void sha256_transform_ssse3(void *input_data, UINT32 digest[8], UINT64 num_blks)
351## arg 1 : pointer to input data 351## arg 1 : pointer to digest
352## arg 2 : pointer to digest 352## arg 2 : pointer to input data
353## arg 3 : Num blocks 353## arg 3 : Num blocks
354######################################################################## 354########################################################################
355.text 355.text
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
index 8fad72f4dfd2..ccc338881ee8 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -36,195 +36,74 @@
36#include <linux/cryptohash.h> 36#include <linux/cryptohash.h>
37#include <linux/types.h> 37#include <linux/types.h>
38#include <crypto/sha.h> 38#include <crypto/sha.h>
39#include <asm/byteorder.h> 39#include <crypto/sha256_base.h>
40#include <asm/i387.h> 40#include <asm/i387.h>
41#include <asm/xcr.h> 41#include <asm/xcr.h>
42#include <asm/xsave.h> 42#include <asm/xsave.h>
43#include <linux/string.h> 43#include <linux/string.h>
44 44
45asmlinkage void sha256_transform_ssse3(const char *data, u32 *digest, 45asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data,
46 u64 rounds); 46 u64 rounds);
47#ifdef CONFIG_AS_AVX 47#ifdef CONFIG_AS_AVX
48asmlinkage void sha256_transform_avx(const char *data, u32 *digest, 48asmlinkage void sha256_transform_avx(u32 *digest, const char *data,
49 u64 rounds); 49 u64 rounds);
50#endif 50#endif
51#ifdef CONFIG_AS_AVX2 51#ifdef CONFIG_AS_AVX2
52asmlinkage void sha256_transform_rorx(const char *data, u32 *digest, 52asmlinkage void sha256_transform_rorx(u32 *digest, const char *data,
53 u64 rounds); 53 u64 rounds);
54#endif 54#endif
55 55
56static asmlinkage void (*sha256_transform_asm)(const char *, u32 *, u64); 56static void (*sha256_transform_asm)(u32 *, const char *, u64);
57
58
59static int sha256_ssse3_init(struct shash_desc *desc)
60{
61 struct sha256_state *sctx = shash_desc_ctx(desc);
62
63 sctx->state[0] = SHA256_H0;
64 sctx->state[1] = SHA256_H1;
65 sctx->state[2] = SHA256_H2;
66 sctx->state[3] = SHA256_H3;
67 sctx->state[4] = SHA256_H4;
68 sctx->state[5] = SHA256_H5;
69 sctx->state[6] = SHA256_H6;
70 sctx->state[7] = SHA256_H7;
71 sctx->count = 0;
72
73 return 0;
74}
75
76static int __sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
77 unsigned int len, unsigned int partial)
78{
79 struct sha256_state *sctx = shash_desc_ctx(desc);
80 unsigned int done = 0;
81
82 sctx->count += len;
83
84 if (partial) {
85 done = SHA256_BLOCK_SIZE - partial;
86 memcpy(sctx->buf + partial, data, done);
87 sha256_transform_asm(sctx->buf, sctx->state, 1);
88 }
89
90 if (len - done >= SHA256_BLOCK_SIZE) {
91 const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE;
92
93 sha256_transform_asm(data + done, sctx->state, (u64) rounds);
94
95 done += rounds * SHA256_BLOCK_SIZE;
96 }
97
98 memcpy(sctx->buf, data + done, len - done);
99
100 return 0;
101}
102 57
103static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data, 58static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
104 unsigned int len) 59 unsigned int len)
105{ 60{
106 struct sha256_state *sctx = shash_desc_ctx(desc); 61 struct sha256_state *sctx = shash_desc_ctx(desc);
107 unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
108 int res;
109 62
110 /* Handle the fast case right here */ 63 if (!irq_fpu_usable() ||
111 if (partial + len < SHA256_BLOCK_SIZE) { 64 (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
112 sctx->count += len; 65 return crypto_sha256_update(desc, data, len);
113 memcpy(sctx->buf + partial, data, len);
114 66
115 return 0; 67 /* make sure casting to sha256_block_fn() is safe */
116 } 68 BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0);
117
118 if (!irq_fpu_usable()) {
119 res = crypto_sha256_update(desc, data, len);
120 } else {
121 kernel_fpu_begin();
122 res = __sha256_ssse3_update(desc, data, len, partial);
123 kernel_fpu_end();
124 }
125
126 return res;
127}
128 69
129 70 kernel_fpu_begin();
130/* Add padding and return the message digest. */ 71 sha256_base_do_update(desc, data, len,
131static int sha256_ssse3_final(struct shash_desc *desc, u8 *out) 72 (sha256_block_fn *)sha256_transform_asm);
132{ 73 kernel_fpu_end();
133 struct sha256_state *sctx = shash_desc_ctx(desc);
134 unsigned int i, index, padlen;
135 __be32 *dst = (__be32 *)out;
136 __be64 bits;
137 static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
138
139 bits = cpu_to_be64(sctx->count << 3);
140
141 /* Pad out to 56 mod 64 and append length */
142 index = sctx->count % SHA256_BLOCK_SIZE;
143 padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index);
144
145 if (!irq_fpu_usable()) {
146 crypto_sha256_update(desc, padding, padlen);
147 crypto_sha256_update(desc, (const u8 *)&bits, sizeof(bits));
148 } else {
149 kernel_fpu_begin();
150 /* We need to fill a whole block for __sha256_ssse3_update() */
151 if (padlen <= 56) {
152 sctx->count += padlen;
153 memcpy(sctx->buf + index, padding, padlen);
154 } else {
155 __sha256_ssse3_update(desc, padding, padlen, index);
156 }
157 __sha256_ssse3_update(desc, (const u8 *)&bits,
158 sizeof(bits), 56);
159 kernel_fpu_end();
160 }
161
162 /* Store state in digest */
163 for (i = 0; i < 8; i++)
164 dst[i] = cpu_to_be32(sctx->state[i]);
165
166 /* Wipe context */
167 memset(sctx, 0, sizeof(*sctx));
168 74
169 return 0; 75 return 0;
170} 76}
171 77
172static int sha256_ssse3_export(struct shash_desc *desc, void *out) 78static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data,
79 unsigned int len, u8 *out)
173{ 80{
174 struct sha256_state *sctx = shash_desc_ctx(desc); 81 if (!irq_fpu_usable())
82 return crypto_sha256_finup(desc, data, len, out);
175 83
176 memcpy(out, sctx, sizeof(*sctx)); 84 kernel_fpu_begin();
85 if (len)
86 sha256_base_do_update(desc, data, len,
87 (sha256_block_fn *)sha256_transform_asm);
88 sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_transform_asm);
89 kernel_fpu_end();
177 90
178 return 0; 91 return sha256_base_finish(desc, out);
179} 92}
180 93
181static int sha256_ssse3_import(struct shash_desc *desc, const void *in) 94/* Add padding and return the message digest. */
182{ 95static int sha256_ssse3_final(struct shash_desc *desc, u8 *out)
183 struct sha256_state *sctx = shash_desc_ctx(desc);
184
185 memcpy(sctx, in, sizeof(*sctx));
186
187 return 0;
188}
189
190static int sha224_ssse3_init(struct shash_desc *desc)
191{
192 struct sha256_state *sctx = shash_desc_ctx(desc);
193
194 sctx->state[0] = SHA224_H0;
195 sctx->state[1] = SHA224_H1;
196 sctx->state[2] = SHA224_H2;
197 sctx->state[3] = SHA224_H3;
198 sctx->state[4] = SHA224_H4;
199 sctx->state[5] = SHA224_H5;
200 sctx->state[6] = SHA224_H6;
201 sctx->state[7] = SHA224_H7;
202 sctx->count = 0;
203
204 return 0;
205}
206
207static int sha224_ssse3_final(struct shash_desc *desc, u8 *hash)
208{ 96{
209 u8 D[SHA256_DIGEST_SIZE]; 97 return sha256_ssse3_finup(desc, NULL, 0, out);
210
211 sha256_ssse3_final(desc, D);
212
213 memcpy(hash, D, SHA224_DIGEST_SIZE);
214 memzero_explicit(D, SHA256_DIGEST_SIZE);
215
216 return 0;
217} 98}
218 99
219static struct shash_alg algs[] = { { 100static struct shash_alg algs[] = { {
220 .digestsize = SHA256_DIGEST_SIZE, 101 .digestsize = SHA256_DIGEST_SIZE,
221 .init = sha256_ssse3_init, 102 .init = sha256_base_init,
222 .update = sha256_ssse3_update, 103 .update = sha256_ssse3_update,
223 .final = sha256_ssse3_final, 104 .final = sha256_ssse3_final,
224 .export = sha256_ssse3_export, 105 .finup = sha256_ssse3_finup,
225 .import = sha256_ssse3_import,
226 .descsize = sizeof(struct sha256_state), 106 .descsize = sizeof(struct sha256_state),
227 .statesize = sizeof(struct sha256_state),
228 .base = { 107 .base = {
229 .cra_name = "sha256", 108 .cra_name = "sha256",
230 .cra_driver_name = "sha256-ssse3", 109 .cra_driver_name = "sha256-ssse3",
@@ -235,13 +114,11 @@ static struct shash_alg algs[] = { {
235 } 114 }
236}, { 115}, {
237 .digestsize = SHA224_DIGEST_SIZE, 116 .digestsize = SHA224_DIGEST_SIZE,
238 .init = sha224_ssse3_init, 117 .init = sha224_base_init,
239 .update = sha256_ssse3_update, 118 .update = sha256_ssse3_update,
240 .final = sha224_ssse3_final, 119 .final = sha256_ssse3_final,
241 .export = sha256_ssse3_export, 120 .finup = sha256_ssse3_finup,
242 .import = sha256_ssse3_import,
243 .descsize = sizeof(struct sha256_state), 121 .descsize = sizeof(struct sha256_state),
244 .statesize = sizeof(struct sha256_state),
245 .base = { 122 .base = {
246 .cra_name = "sha224", 123 .cra_name = "sha224",
247 .cra_driver_name = "sha224-ssse3", 124 .cra_driver_name = "sha224-ssse3",
diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S
index 974dde9bc6cd..565274d6a641 100644
--- a/arch/x86/crypto/sha512-avx-asm.S
+++ b/arch/x86/crypto/sha512-avx-asm.S
@@ -54,9 +54,9 @@
54 54
55# Virtual Registers 55# Virtual Registers
56# ARG1 56# ARG1
57msg = %rdi 57digest = %rdi
58# ARG2 58# ARG2
59digest = %rsi 59msg = %rsi
60# ARG3 60# ARG3
61msglen = %rdx 61msglen = %rdx
62T1 = %rcx 62T1 = %rcx
@@ -271,7 +271,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE
271.endm 271.endm
272 272
273######################################################################## 273########################################################################
274# void sha512_transform_avx(const void* M, void* D, u64 L) 274# void sha512_transform_avx(void* D, const void* M, u64 L)
275# Purpose: Updates the SHA512 digest stored at D with the message stored in M. 275# Purpose: Updates the SHA512 digest stored at D with the message stored in M.
276# The size of the message pointed to by M must be an integer multiple of SHA512 276# The size of the message pointed to by M must be an integer multiple of SHA512
277# message blocks. 277# message blocks.
diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S
index 568b96105f5c..a4771dcd1fcf 100644
--- a/arch/x86/crypto/sha512-avx2-asm.S
+++ b/arch/x86/crypto/sha512-avx2-asm.S
@@ -70,9 +70,9 @@ XFER = YTMP0
70BYTE_FLIP_MASK = %ymm9 70BYTE_FLIP_MASK = %ymm9
71 71
72# 1st arg 72# 1st arg
73INP = %rdi 73CTX = %rdi
74# 2nd arg 74# 2nd arg
75CTX = %rsi 75INP = %rsi
76# 3rd arg 76# 3rd arg
77NUM_BLKS = %rdx 77NUM_BLKS = %rdx
78 78
@@ -562,7 +562,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE
562.endm 562.endm
563 563
564######################################################################## 564########################################################################
565# void sha512_transform_rorx(const void* M, void* D, uint64_t L)# 565# void sha512_transform_rorx(void* D, const void* M, uint64_t L)#
566# Purpose: Updates the SHA512 digest stored at D with the message stored in M. 566# Purpose: Updates the SHA512 digest stored at D with the message stored in M.
567# The size of the message pointed to by M must be an integer multiple of SHA512 567# The size of the message pointed to by M must be an integer multiple of SHA512
568# message blocks. 568# message blocks.
diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S
index fb56855d51f5..e610e29cbc81 100644
--- a/arch/x86/crypto/sha512-ssse3-asm.S
+++ b/arch/x86/crypto/sha512-ssse3-asm.S
@@ -53,9 +53,9 @@
53 53
54# Virtual Registers 54# Virtual Registers
55# ARG1 55# ARG1
56msg = %rdi 56digest = %rdi
57# ARG2 57# ARG2
58digest = %rsi 58msg = %rsi
59# ARG3 59# ARG3
60msglen = %rdx 60msglen = %rdx
61T1 = %rcx 61T1 = %rcx
@@ -269,7 +269,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE
269.endm 269.endm
270 270
271######################################################################## 271########################################################################
272# void sha512_transform_ssse3(const void* M, void* D, u64 L)# 272# void sha512_transform_ssse3(void* D, const void* M, u64 L)#
273# Purpose: Updates the SHA512 digest stored at D with the message stored in M. 273# Purpose: Updates the SHA512 digest stored at D with the message stored in M.
274# The size of the message pointed to by M must be an integer multiple of SHA512 274# The size of the message pointed to by M must be an integer multiple of SHA512
275# message blocks. 275# message blocks.
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
index 0b6af26832bf..d9fa4c1e063f 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -34,205 +34,75 @@
34#include <linux/cryptohash.h> 34#include <linux/cryptohash.h>
35#include <linux/types.h> 35#include <linux/types.h>
36#include <crypto/sha.h> 36#include <crypto/sha.h>
37#include <asm/byteorder.h> 37#include <crypto/sha512_base.h>
38#include <asm/i387.h> 38#include <asm/i387.h>
39#include <asm/xcr.h> 39#include <asm/xcr.h>
40#include <asm/xsave.h> 40#include <asm/xsave.h>
41 41
42#include <linux/string.h> 42#include <linux/string.h>
43 43
44asmlinkage void sha512_transform_ssse3(const char *data, u64 *digest, 44asmlinkage void sha512_transform_ssse3(u64 *digest, const char *data,
45 u64 rounds); 45 u64 rounds);
46#ifdef CONFIG_AS_AVX 46#ifdef CONFIG_AS_AVX
47asmlinkage void sha512_transform_avx(const char *data, u64 *digest, 47asmlinkage void sha512_transform_avx(u64 *digest, const char *data,
48 u64 rounds); 48 u64 rounds);
49#endif 49#endif
50#ifdef CONFIG_AS_AVX2 50#ifdef CONFIG_AS_AVX2
51asmlinkage void sha512_transform_rorx(const char *data, u64 *digest, 51asmlinkage void sha512_transform_rorx(u64 *digest, const char *data,
52 u64 rounds); 52 u64 rounds);
53#endif 53#endif
54 54
55static asmlinkage void (*sha512_transform_asm)(const char *, u64 *, u64); 55static void (*sha512_transform_asm)(u64 *, const char *, u64);
56
57
58static int sha512_ssse3_init(struct shash_desc *desc)
59{
60 struct sha512_state *sctx = shash_desc_ctx(desc);
61
62 sctx->state[0] = SHA512_H0;
63 sctx->state[1] = SHA512_H1;
64 sctx->state[2] = SHA512_H2;
65 sctx->state[3] = SHA512_H3;
66 sctx->state[4] = SHA512_H4;
67 sctx->state[5] = SHA512_H5;
68 sctx->state[6] = SHA512_H6;
69 sctx->state[7] = SHA512_H7;
70 sctx->count[0] = sctx->count[1] = 0;
71
72 return 0;
73}
74 56
75static int __sha512_ssse3_update(struct shash_desc *desc, const u8 *data, 57static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data,
76 unsigned int len, unsigned int partial) 58 unsigned int len)
77{ 59{
78 struct sha512_state *sctx = shash_desc_ctx(desc); 60 struct sha512_state *sctx = shash_desc_ctx(desc);
79 unsigned int done = 0;
80
81 sctx->count[0] += len;
82 if (sctx->count[0] < len)
83 sctx->count[1]++;
84 61
85 if (partial) { 62 if (!irq_fpu_usable() ||
86 done = SHA512_BLOCK_SIZE - partial; 63 (sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE)
87 memcpy(sctx->buf + partial, data, done); 64 return crypto_sha512_update(desc, data, len);
88 sha512_transform_asm(sctx->buf, sctx->state, 1);
89 }
90
91 if (len - done >= SHA512_BLOCK_SIZE) {
92 const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE;
93 65
94 sha512_transform_asm(data + done, sctx->state, (u64) rounds); 66 /* make sure casting to sha512_block_fn() is safe */
95 67 BUILD_BUG_ON(offsetof(struct sha512_state, state) != 0);
96 done += rounds * SHA512_BLOCK_SIZE;
97 }
98 68
99 memcpy(sctx->buf, data + done, len - done); 69 kernel_fpu_begin();
70 sha512_base_do_update(desc, data, len,
71 (sha512_block_fn *)sha512_transform_asm);
72 kernel_fpu_end();
100 73
101 return 0; 74 return 0;
102} 75}
103 76
104static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data, 77static int sha512_ssse3_finup(struct shash_desc *desc, const u8 *data,
105 unsigned int len) 78 unsigned int len, u8 *out)
106{ 79{
107 struct sha512_state *sctx = shash_desc_ctx(desc); 80 if (!irq_fpu_usable())
108 unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE; 81 return crypto_sha512_finup(desc, data, len, out);
109 int res;
110
111 /* Handle the fast case right here */
112 if (partial + len < SHA512_BLOCK_SIZE) {
113 sctx->count[0] += len;
114 if (sctx->count[0] < len)
115 sctx->count[1]++;
116 memcpy(sctx->buf + partial, data, len);
117
118 return 0;
119 }
120 82
121 if (!irq_fpu_usable()) { 83 kernel_fpu_begin();
122 res = crypto_sha512_update(desc, data, len); 84 if (len)
123 } else { 85 sha512_base_do_update(desc, data, len,
124 kernel_fpu_begin(); 86 (sha512_block_fn *)sha512_transform_asm);
125 res = __sha512_ssse3_update(desc, data, len, partial); 87 sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_transform_asm);
126 kernel_fpu_end(); 88 kernel_fpu_end();
127 }
128 89
129 return res; 90 return sha512_base_finish(desc, out);
130} 91}
131 92
132
133/* Add padding and return the message digest. */ 93/* Add padding and return the message digest. */
134static int sha512_ssse3_final(struct shash_desc *desc, u8 *out) 94static int sha512_ssse3_final(struct shash_desc *desc, u8 *out)
135{ 95{
136 struct sha512_state *sctx = shash_desc_ctx(desc); 96 return sha512_ssse3_finup(desc, NULL, 0, out);
137 unsigned int i, index, padlen;
138 __be64 *dst = (__be64 *)out;
139 __be64 bits[2];
140 static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, };
141
142 /* save number of bits */
143 bits[1] = cpu_to_be64(sctx->count[0] << 3);
144 bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
145
146 /* Pad out to 112 mod 128 and append length */
147 index = sctx->count[0] & 0x7f;
148 padlen = (index < 112) ? (112 - index) : ((128+112) - index);
149
150 if (!irq_fpu_usable()) {
151 crypto_sha512_update(desc, padding, padlen);
152 crypto_sha512_update(desc, (const u8 *)&bits, sizeof(bits));
153 } else {
154 kernel_fpu_begin();
155 /* We need to fill a whole block for __sha512_ssse3_update() */
156 if (padlen <= 112) {
157 sctx->count[0] += padlen;
158 if (sctx->count[0] < padlen)
159 sctx->count[1]++;
160 memcpy(sctx->buf + index, padding, padlen);
161 } else {
162 __sha512_ssse3_update(desc, padding, padlen, index);
163 }
164 __sha512_ssse3_update(desc, (const u8 *)&bits,
165 sizeof(bits), 112);
166 kernel_fpu_end();
167 }
168
169 /* Store state in digest */
170 for (i = 0; i < 8; i++)
171 dst[i] = cpu_to_be64(sctx->state[i]);
172
173 /* Wipe context */
174 memset(sctx, 0, sizeof(*sctx));
175
176 return 0;
177}
178
179static int sha512_ssse3_export(struct shash_desc *desc, void *out)
180{
181 struct sha512_state *sctx = shash_desc_ctx(desc);
182
183 memcpy(out, sctx, sizeof(*sctx));
184
185 return 0;
186}
187
188static int sha512_ssse3_import(struct shash_desc *desc, const void *in)
189{
190 struct sha512_state *sctx = shash_desc_ctx(desc);
191
192 memcpy(sctx, in, sizeof(*sctx));
193
194 return 0;
195}
196
197static int sha384_ssse3_init(struct shash_desc *desc)
198{
199 struct sha512_state *sctx = shash_desc_ctx(desc);
200
201 sctx->state[0] = SHA384_H0;
202 sctx->state[1] = SHA384_H1;
203 sctx->state[2] = SHA384_H2;
204 sctx->state[3] = SHA384_H3;
205 sctx->state[4] = SHA384_H4;
206 sctx->state[5] = SHA384_H5;
207 sctx->state[6] = SHA384_H6;
208 sctx->state[7] = SHA384_H7;
209
210 sctx->count[0] = sctx->count[1] = 0;
211
212 return 0;
213}
214
215static int sha384_ssse3_final(struct shash_desc *desc, u8 *hash)
216{
217 u8 D[SHA512_DIGEST_SIZE];
218
219 sha512_ssse3_final(desc, D);
220
221 memcpy(hash, D, SHA384_DIGEST_SIZE);
222 memzero_explicit(D, SHA512_DIGEST_SIZE);
223
224 return 0;
225} 97}
226 98
227static struct shash_alg algs[] = { { 99static struct shash_alg algs[] = { {
228 .digestsize = SHA512_DIGEST_SIZE, 100 .digestsize = SHA512_DIGEST_SIZE,
229 .init = sha512_ssse3_init, 101 .init = sha512_base_init,
230 .update = sha512_ssse3_update, 102 .update = sha512_ssse3_update,
231 .final = sha512_ssse3_final, 103 .final = sha512_ssse3_final,
232 .export = sha512_ssse3_export, 104 .finup = sha512_ssse3_finup,
233 .import = sha512_ssse3_import,
234 .descsize = sizeof(struct sha512_state), 105 .descsize = sizeof(struct sha512_state),
235 .statesize = sizeof(struct sha512_state),
236 .base = { 106 .base = {
237 .cra_name = "sha512", 107 .cra_name = "sha512",
238 .cra_driver_name = "sha512-ssse3", 108 .cra_driver_name = "sha512-ssse3",
@@ -243,13 +113,11 @@ static struct shash_alg algs[] = { {
243 } 113 }
244}, { 114}, {
245 .digestsize = SHA384_DIGEST_SIZE, 115 .digestsize = SHA384_DIGEST_SIZE,
246 .init = sha384_ssse3_init, 116 .init = sha384_base_init,
247 .update = sha512_ssse3_update, 117 .update = sha512_ssse3_update,
248 .final = sha384_ssse3_final, 118 .final = sha512_ssse3_final,
249 .export = sha512_ssse3_export, 119 .finup = sha512_ssse3_finup,
250 .import = sha512_ssse3_import,
251 .descsize = sizeof(struct sha512_state), 120 .descsize = sizeof(struct sha512_state),
252 .statesize = sizeof(struct sha512_state),
253 .base = { 121 .base = {
254 .cra_name = "sha384", 122 .cra_name = "sha384",
255 .cra_driver_name = "sha384-ssse3", 123 .cra_driver_name = "sha384-ssse3",
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
index 1ac531ea9bcc..b5e2d5651851 100644
--- a/arch/x86/crypto/twofish_avx_glue.c
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -340,7 +340,8 @@ static struct crypto_alg twofish_algs[10] = { {
340 .cra_name = "__ecb-twofish-avx", 340 .cra_name = "__ecb-twofish-avx",
341 .cra_driver_name = "__driver-ecb-twofish-avx", 341 .cra_driver_name = "__driver-ecb-twofish-avx",
342 .cra_priority = 0, 342 .cra_priority = 0,
343 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 343 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
344 CRYPTO_ALG_INTERNAL,
344 .cra_blocksize = TF_BLOCK_SIZE, 345 .cra_blocksize = TF_BLOCK_SIZE,
345 .cra_ctxsize = sizeof(struct twofish_ctx), 346 .cra_ctxsize = sizeof(struct twofish_ctx),
346 .cra_alignmask = 0, 347 .cra_alignmask = 0,
@@ -359,7 +360,8 @@ static struct crypto_alg twofish_algs[10] = { {
359 .cra_name = "__cbc-twofish-avx", 360 .cra_name = "__cbc-twofish-avx",
360 .cra_driver_name = "__driver-cbc-twofish-avx", 361 .cra_driver_name = "__driver-cbc-twofish-avx",
361 .cra_priority = 0, 362 .cra_priority = 0,
362 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 363 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
364 CRYPTO_ALG_INTERNAL,
363 .cra_blocksize = TF_BLOCK_SIZE, 365 .cra_blocksize = TF_BLOCK_SIZE,
364 .cra_ctxsize = sizeof(struct twofish_ctx), 366 .cra_ctxsize = sizeof(struct twofish_ctx),
365 .cra_alignmask = 0, 367 .cra_alignmask = 0,
@@ -378,7 +380,8 @@ static struct crypto_alg twofish_algs[10] = { {
378 .cra_name = "__ctr-twofish-avx", 380 .cra_name = "__ctr-twofish-avx",
379 .cra_driver_name = "__driver-ctr-twofish-avx", 381 .cra_driver_name = "__driver-ctr-twofish-avx",
380 .cra_priority = 0, 382 .cra_priority = 0,
381 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 383 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
384 CRYPTO_ALG_INTERNAL,
382 .cra_blocksize = 1, 385 .cra_blocksize = 1,
383 .cra_ctxsize = sizeof(struct twofish_ctx), 386 .cra_ctxsize = sizeof(struct twofish_ctx),
384 .cra_alignmask = 0, 387 .cra_alignmask = 0,
@@ -398,7 +401,8 @@ static struct crypto_alg twofish_algs[10] = { {
398 .cra_name = "__lrw-twofish-avx", 401 .cra_name = "__lrw-twofish-avx",
399 .cra_driver_name = "__driver-lrw-twofish-avx", 402 .cra_driver_name = "__driver-lrw-twofish-avx",
400 .cra_priority = 0, 403 .cra_priority = 0,
401 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 404 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
405 CRYPTO_ALG_INTERNAL,
402 .cra_blocksize = TF_BLOCK_SIZE, 406 .cra_blocksize = TF_BLOCK_SIZE,
403 .cra_ctxsize = sizeof(struct twofish_lrw_ctx), 407 .cra_ctxsize = sizeof(struct twofish_lrw_ctx),
404 .cra_alignmask = 0, 408 .cra_alignmask = 0,
@@ -421,7 +425,8 @@ static struct crypto_alg twofish_algs[10] = { {
421 .cra_name = "__xts-twofish-avx", 425 .cra_name = "__xts-twofish-avx",
422 .cra_driver_name = "__driver-xts-twofish-avx", 426 .cra_driver_name = "__driver-xts-twofish-avx",
423 .cra_priority = 0, 427 .cra_priority = 0,
424 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 428 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
429 CRYPTO_ALG_INTERNAL,
425 .cra_blocksize = TF_BLOCK_SIZE, 430 .cra_blocksize = TF_BLOCK_SIZE,
426 .cra_ctxsize = sizeof(struct twofish_xts_ctx), 431 .cra_ctxsize = sizeof(struct twofish_xts_ctx),
427 .cra_alignmask = 0, 432 .cra_alignmask = 0,