diff options
Diffstat (limited to 'arch')
68 files changed, 10913 insertions, 1129 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index da1266c53c13..7cbf4ef5c6fd 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig | |||
@@ -2175,6 +2175,9 @@ source "arch/arm/Kconfig.debug" | |||
2175 | source "security/Kconfig" | 2175 | source "security/Kconfig" |
2176 | 2176 | ||
2177 | source "crypto/Kconfig" | 2177 | source "crypto/Kconfig" |
2178 | if CRYPTO | ||
2179 | source "arch/arm/crypto/Kconfig" | ||
2180 | endif | ||
2178 | 2181 | ||
2179 | source "lib/Kconfig" | 2182 | source "lib/Kconfig" |
2180 | 2183 | ||
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig new file mode 100644 index 000000000000..8da2207b0072 --- /dev/null +++ b/arch/arm/crypto/Kconfig | |||
@@ -0,0 +1,130 @@ | |||
1 | |||
2 | menuconfig ARM_CRYPTO | ||
3 | bool "ARM Accelerated Cryptographic Algorithms" | ||
4 | depends on ARM | ||
5 | help | ||
6 | Say Y here to choose from a selection of cryptographic algorithms | ||
7 | implemented using ARM specific CPU features or instructions. | ||
8 | |||
9 | if ARM_CRYPTO | ||
10 | |||
11 | config CRYPTO_SHA1_ARM | ||
12 | tristate "SHA1 digest algorithm (ARM-asm)" | ||
13 | select CRYPTO_SHA1 | ||
14 | select CRYPTO_HASH | ||
15 | help | ||
16 | SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented | ||
17 | using optimized ARM assembler. | ||
18 | |||
19 | config CRYPTO_SHA1_ARM_NEON | ||
20 | tristate "SHA1 digest algorithm (ARM NEON)" | ||
21 | depends on KERNEL_MODE_NEON | ||
22 | select CRYPTO_SHA1_ARM | ||
23 | select CRYPTO_SHA1 | ||
24 | select CRYPTO_HASH | ||
25 | help | ||
26 | SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented | ||
27 | using optimized ARM NEON assembly, when NEON instructions are | ||
28 | available. | ||
29 | |||
30 | config CRYPTO_SHA1_ARM_CE | ||
31 | tristate "SHA1 digest algorithm (ARM v8 Crypto Extensions)" | ||
32 | depends on KERNEL_MODE_NEON | ||
33 | select CRYPTO_SHA1_ARM | ||
34 | select CRYPTO_HASH | ||
35 | help | ||
36 | SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented | ||
37 | using special ARMv8 Crypto Extensions. | ||
38 | |||
39 | config CRYPTO_SHA2_ARM_CE | ||
40 | tristate "SHA-224/256 digest algorithm (ARM v8 Crypto Extensions)" | ||
41 | depends on KERNEL_MODE_NEON | ||
42 | select CRYPTO_SHA256_ARM | ||
43 | select CRYPTO_HASH | ||
44 | help | ||
45 | SHA-256 secure hash standard (DFIPS 180-2) implemented | ||
46 | using special ARMv8 Crypto Extensions. | ||
47 | |||
48 | config CRYPTO_SHA256_ARM | ||
49 | tristate "SHA-224/256 digest algorithm (ARM-asm and NEON)" | ||
50 | select CRYPTO_HASH | ||
51 | depends on !CPU_V7M | ||
52 | help | ||
53 | SHA-256 secure hash standard (DFIPS 180-2) implemented | ||
54 | using optimized ARM assembler and NEON, when available. | ||
55 | |||
56 | config CRYPTO_SHA512_ARM_NEON | ||
57 | tristate "SHA384 and SHA512 digest algorithm (ARM NEON)" | ||
58 | depends on KERNEL_MODE_NEON | ||
59 | select CRYPTO_SHA512 | ||
60 | select CRYPTO_HASH | ||
61 | help | ||
62 | SHA-512 secure hash standard (DFIPS 180-2) implemented | ||
63 | using ARM NEON instructions, when available. | ||
64 | |||
65 | This version of SHA implements a 512 bit hash with 256 bits of | ||
66 | security against collision attacks. | ||
67 | |||
68 | This code also includes SHA-384, a 384 bit hash with 192 bits | ||
69 | of security against collision attacks. | ||
70 | |||
71 | config CRYPTO_AES_ARM | ||
72 | tristate "AES cipher algorithms (ARM-asm)" | ||
73 | depends on ARM | ||
74 | select CRYPTO_ALGAPI | ||
75 | select CRYPTO_AES | ||
76 | help | ||
77 | Use optimized AES assembler routines for ARM platforms. | ||
78 | |||
79 | AES cipher algorithms (FIPS-197). AES uses the Rijndael | ||
80 | algorithm. | ||
81 | |||
82 | Rijndael appears to be consistently a very good performer in | ||
83 | both hardware and software across a wide range of computing | ||
84 | environments regardless of its use in feedback or non-feedback | ||
85 | modes. Its key setup time is excellent, and its key agility is | ||
86 | good. Rijndael's very low memory requirements make it very well | ||
87 | suited for restricted-space environments, in which it also | ||
88 | demonstrates excellent performance. Rijndael's operations are | ||
89 | among the easiest to defend against power and timing attacks. | ||
90 | |||
91 | The AES specifies three key sizes: 128, 192 and 256 bits | ||
92 | |||
93 | See <http://csrc.nist.gov/encryption/aes/> for more information. | ||
94 | |||
95 | config CRYPTO_AES_ARM_BS | ||
96 | tristate "Bit sliced AES using NEON instructions" | ||
97 | depends on KERNEL_MODE_NEON | ||
98 | select CRYPTO_ALGAPI | ||
99 | select CRYPTO_AES_ARM | ||
100 | select CRYPTO_ABLK_HELPER | ||
101 | help | ||
102 | Use a faster and more secure NEON based implementation of AES in CBC, | ||
103 | CTR and XTS modes | ||
104 | |||
105 | Bit sliced AES gives around 45% speedup on Cortex-A15 for CTR mode | ||
106 | and for XTS mode encryption, CBC and XTS mode decryption speedup is | ||
107 | around 25%. (CBC encryption speed is not affected by this driver.) | ||
108 | This implementation does not rely on any lookup tables so it is | ||
109 | believed to be invulnerable to cache timing attacks. | ||
110 | |||
111 | config CRYPTO_AES_ARM_CE | ||
112 | tristate "Accelerated AES using ARMv8 Crypto Extensions" | ||
113 | depends on KERNEL_MODE_NEON | ||
114 | select CRYPTO_ALGAPI | ||
115 | select CRYPTO_ABLK_HELPER | ||
116 | help | ||
117 | Use an implementation of AES in CBC, CTR and XTS modes that uses | ||
118 | ARMv8 Crypto Extensions | ||
119 | |||
120 | config CRYPTO_GHASH_ARM_CE | ||
121 | tristate "PMULL-accelerated GHASH using ARMv8 Crypto Extensions" | ||
122 | depends on KERNEL_MODE_NEON | ||
123 | select CRYPTO_HASH | ||
124 | select CRYPTO_CRYPTD | ||
125 | help | ||
126 | Use an implementation of GHASH (used by the GCM AEAD chaining mode) | ||
127 | that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64) | ||
128 | that is part of the ARMv8 Crypto Extensions | ||
129 | |||
130 | endif | ||
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index b48fa341648d..6ea828241fcb 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile | |||
@@ -6,13 +6,35 @@ obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o | |||
6 | obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o | 6 | obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o |
7 | obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o | 7 | obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o |
8 | obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o | 8 | obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o |
9 | obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o | ||
9 | obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o | 10 | obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o |
10 | 11 | ||
12 | ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o | ||
13 | ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o | ||
14 | ce-obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o | ||
15 | ce-obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o | ||
16 | |||
17 | ifneq ($(ce-obj-y)$(ce-obj-m),) | ||
18 | ifeq ($(call as-instr,.fpu crypto-neon-fp-armv8,y,n),y) | ||
19 | obj-y += $(ce-obj-y) | ||
20 | obj-m += $(ce-obj-m) | ||
21 | else | ||
22 | $(warning These ARMv8 Crypto Extensions modules need binutils 2.23 or higher) | ||
23 | $(warning $(ce-obj-y) $(ce-obj-m)) | ||
24 | endif | ||
25 | endif | ||
26 | |||
11 | aes-arm-y := aes-armv4.o aes_glue.o | 27 | aes-arm-y := aes-armv4.o aes_glue.o |
12 | aes-arm-bs-y := aesbs-core.o aesbs-glue.o | 28 | aes-arm-bs-y := aesbs-core.o aesbs-glue.o |
13 | sha1-arm-y := sha1-armv4-large.o sha1_glue.o | 29 | sha1-arm-y := sha1-armv4-large.o sha1_glue.o |
14 | sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o | 30 | sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o |
31 | sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o | ||
32 | sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y) | ||
15 | sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o | 33 | sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o |
34 | sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o | ||
35 | sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o | ||
36 | aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o | ||
37 | ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o | ||
16 | 38 | ||
17 | quiet_cmd_perl = PERL $@ | 39 | quiet_cmd_perl = PERL $@ |
18 | cmd_perl = $(PERL) $(<) > $(@) | 40 | cmd_perl = $(PERL) $(<) > $(@) |
@@ -20,4 +42,7 @@ quiet_cmd_perl = PERL $@ | |||
20 | $(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl | 42 | $(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl |
21 | $(call cmd,perl) | 43 | $(call cmd,perl) |
22 | 44 | ||
23 | .PRECIOUS: $(obj)/aesbs-core.S | 45 | $(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl |
46 | $(call cmd,perl) | ||
47 | |||
48 | .PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S | ||
diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S new file mode 100644 index 000000000000..8cfa468ee570 --- /dev/null +++ b/arch/arm/crypto/aes-ce-core.S | |||
@@ -0,0 +1,518 @@ | |||
1 | /* | ||
2 | * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions | ||
3 | * | ||
4 | * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <linux/linkage.h> | ||
12 | #include <asm/assembler.h> | ||
13 | |||
14 | .text | ||
15 | .fpu crypto-neon-fp-armv8 | ||
16 | .align 3 | ||
17 | |||
18 | .macro enc_round, state, key | ||
19 | aese.8 \state, \key | ||
20 | aesmc.8 \state, \state | ||
21 | .endm | ||
22 | |||
23 | .macro dec_round, state, key | ||
24 | aesd.8 \state, \key | ||
25 | aesimc.8 \state, \state | ||
26 | .endm | ||
27 | |||
28 | .macro enc_dround, key1, key2 | ||
29 | enc_round q0, \key1 | ||
30 | enc_round q0, \key2 | ||
31 | .endm | ||
32 | |||
33 | .macro dec_dround, key1, key2 | ||
34 | dec_round q0, \key1 | ||
35 | dec_round q0, \key2 | ||
36 | .endm | ||
37 | |||
38 | .macro enc_fround, key1, key2, key3 | ||
39 | enc_round q0, \key1 | ||
40 | aese.8 q0, \key2 | ||
41 | veor q0, q0, \key3 | ||
42 | .endm | ||
43 | |||
44 | .macro dec_fround, key1, key2, key3 | ||
45 | dec_round q0, \key1 | ||
46 | aesd.8 q0, \key2 | ||
47 | veor q0, q0, \key3 | ||
48 | .endm | ||
49 | |||
50 | .macro enc_dround_3x, key1, key2 | ||
51 | enc_round q0, \key1 | ||
52 | enc_round q1, \key1 | ||
53 | enc_round q2, \key1 | ||
54 | enc_round q0, \key2 | ||
55 | enc_round q1, \key2 | ||
56 | enc_round q2, \key2 | ||
57 | .endm | ||
58 | |||
59 | .macro dec_dround_3x, key1, key2 | ||
60 | dec_round q0, \key1 | ||
61 | dec_round q1, \key1 | ||
62 | dec_round q2, \key1 | ||
63 | dec_round q0, \key2 | ||
64 | dec_round q1, \key2 | ||
65 | dec_round q2, \key2 | ||
66 | .endm | ||
67 | |||
68 | .macro enc_fround_3x, key1, key2, key3 | ||
69 | enc_round q0, \key1 | ||
70 | enc_round q1, \key1 | ||
71 | enc_round q2, \key1 | ||
72 | aese.8 q0, \key2 | ||
73 | aese.8 q1, \key2 | ||
74 | aese.8 q2, \key2 | ||
75 | veor q0, q0, \key3 | ||
76 | veor q1, q1, \key3 | ||
77 | veor q2, q2, \key3 | ||
78 | .endm | ||
79 | |||
80 | .macro dec_fround_3x, key1, key2, key3 | ||
81 | dec_round q0, \key1 | ||
82 | dec_round q1, \key1 | ||
83 | dec_round q2, \key1 | ||
84 | aesd.8 q0, \key2 | ||
85 | aesd.8 q1, \key2 | ||
86 | aesd.8 q2, \key2 | ||
87 | veor q0, q0, \key3 | ||
88 | veor q1, q1, \key3 | ||
89 | veor q2, q2, \key3 | ||
90 | .endm | ||
91 | |||
92 | .macro do_block, dround, fround | ||
93 | cmp r3, #12 @ which key size? | ||
94 | vld1.8 {q10-q11}, [ip]! | ||
95 | \dround q8, q9 | ||
96 | vld1.8 {q12-q13}, [ip]! | ||
97 | \dround q10, q11 | ||
98 | vld1.8 {q10-q11}, [ip]! | ||
99 | \dround q12, q13 | ||
100 | vld1.8 {q12-q13}, [ip]! | ||
101 | \dround q10, q11 | ||
102 | blo 0f @ AES-128: 10 rounds | ||
103 | vld1.8 {q10-q11}, [ip]! | ||
104 | beq 1f @ AES-192: 12 rounds | ||
105 | \dround q12, q13 | ||
106 | vld1.8 {q12-q13}, [ip] | ||
107 | \dround q10, q11 | ||
108 | 0: \fround q12, q13, q14 | ||
109 | bx lr | ||
110 | |||
111 | 1: \dround q12, q13 | ||
112 | \fround q10, q11, q14 | ||
113 | bx lr | ||
114 | .endm | ||
115 | |||
116 | /* | ||
117 | * Internal, non-AAPCS compliant functions that implement the core AES | ||
118 | * transforms. These should preserve all registers except q0 - q2 and ip | ||
119 | * Arguments: | ||
120 | * q0 : first in/output block | ||
121 | * q1 : second in/output block (_3x version only) | ||
122 | * q2 : third in/output block (_3x version only) | ||
123 | * q8 : first round key | ||
124 | * q9 : secound round key | ||
125 | * ip : address of 3rd round key | ||
126 | * q14 : final round key | ||
127 | * r3 : number of rounds | ||
128 | */ | ||
129 | .align 6 | ||
130 | aes_encrypt: | ||
131 | add ip, r2, #32 @ 3rd round key | ||
132 | .Laes_encrypt_tweak: | ||
133 | do_block enc_dround, enc_fround | ||
134 | ENDPROC(aes_encrypt) | ||
135 | |||
136 | .align 6 | ||
137 | aes_decrypt: | ||
138 | add ip, r2, #32 @ 3rd round key | ||
139 | do_block dec_dround, dec_fround | ||
140 | ENDPROC(aes_decrypt) | ||
141 | |||
142 | .align 6 | ||
143 | aes_encrypt_3x: | ||
144 | add ip, r2, #32 @ 3rd round key | ||
145 | do_block enc_dround_3x, enc_fround_3x | ||
146 | ENDPROC(aes_encrypt_3x) | ||
147 | |||
148 | .align 6 | ||
149 | aes_decrypt_3x: | ||
150 | add ip, r2, #32 @ 3rd round key | ||
151 | do_block dec_dround_3x, dec_fround_3x | ||
152 | ENDPROC(aes_decrypt_3x) | ||
153 | |||
154 | .macro prepare_key, rk, rounds | ||
155 | add ip, \rk, \rounds, lsl #4 | ||
156 | vld1.8 {q8-q9}, [\rk] @ load first 2 round keys | ||
157 | vld1.8 {q14}, [ip] @ load last round key | ||
158 | .endm | ||
159 | |||
160 | /* | ||
161 | * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, | ||
162 | * int blocks) | ||
163 | * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, | ||
164 | * int blocks) | ||
165 | */ | ||
166 | ENTRY(ce_aes_ecb_encrypt) | ||
167 | push {r4, lr} | ||
168 | ldr r4, [sp, #8] | ||
169 | prepare_key r2, r3 | ||
170 | .Lecbencloop3x: | ||
171 | subs r4, r4, #3 | ||
172 | bmi .Lecbenc1x | ||
173 | vld1.8 {q0-q1}, [r1, :64]! | ||
174 | vld1.8 {q2}, [r1, :64]! | ||
175 | bl aes_encrypt_3x | ||
176 | vst1.8 {q0-q1}, [r0, :64]! | ||
177 | vst1.8 {q2}, [r0, :64]! | ||
178 | b .Lecbencloop3x | ||
179 | .Lecbenc1x: | ||
180 | adds r4, r4, #3 | ||
181 | beq .Lecbencout | ||
182 | .Lecbencloop: | ||
183 | vld1.8 {q0}, [r1, :64]! | ||
184 | bl aes_encrypt | ||
185 | vst1.8 {q0}, [r0, :64]! | ||
186 | subs r4, r4, #1 | ||
187 | bne .Lecbencloop | ||
188 | .Lecbencout: | ||
189 | pop {r4, pc} | ||
190 | ENDPROC(ce_aes_ecb_encrypt) | ||
191 | |||
192 | ENTRY(ce_aes_ecb_decrypt) | ||
193 | push {r4, lr} | ||
194 | ldr r4, [sp, #8] | ||
195 | prepare_key r2, r3 | ||
196 | .Lecbdecloop3x: | ||
197 | subs r4, r4, #3 | ||
198 | bmi .Lecbdec1x | ||
199 | vld1.8 {q0-q1}, [r1, :64]! | ||
200 | vld1.8 {q2}, [r1, :64]! | ||
201 | bl aes_decrypt_3x | ||
202 | vst1.8 {q0-q1}, [r0, :64]! | ||
203 | vst1.8 {q2}, [r0, :64]! | ||
204 | b .Lecbdecloop3x | ||
205 | .Lecbdec1x: | ||
206 | adds r4, r4, #3 | ||
207 | beq .Lecbdecout | ||
208 | .Lecbdecloop: | ||
209 | vld1.8 {q0}, [r1, :64]! | ||
210 | bl aes_decrypt | ||
211 | vst1.8 {q0}, [r0, :64]! | ||
212 | subs r4, r4, #1 | ||
213 | bne .Lecbdecloop | ||
214 | .Lecbdecout: | ||
215 | pop {r4, pc} | ||
216 | ENDPROC(ce_aes_ecb_decrypt) | ||
217 | |||
218 | /* | ||
219 | * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, | ||
220 | * int blocks, u8 iv[]) | ||
221 | * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, | ||
222 | * int blocks, u8 iv[]) | ||
223 | */ | ||
224 | ENTRY(ce_aes_cbc_encrypt) | ||
225 | push {r4-r6, lr} | ||
226 | ldrd r4, r5, [sp, #16] | ||
227 | vld1.8 {q0}, [r5] | ||
228 | prepare_key r2, r3 | ||
229 | .Lcbcencloop: | ||
230 | vld1.8 {q1}, [r1, :64]! @ get next pt block | ||
231 | veor q0, q0, q1 @ ..and xor with iv | ||
232 | bl aes_encrypt | ||
233 | vst1.8 {q0}, [r0, :64]! | ||
234 | subs r4, r4, #1 | ||
235 | bne .Lcbcencloop | ||
236 | vst1.8 {q0}, [r5] | ||
237 | pop {r4-r6, pc} | ||
238 | ENDPROC(ce_aes_cbc_encrypt) | ||
239 | |||
240 | ENTRY(ce_aes_cbc_decrypt) | ||
241 | push {r4-r6, lr} | ||
242 | ldrd r4, r5, [sp, #16] | ||
243 | vld1.8 {q6}, [r5] @ keep iv in q6 | ||
244 | prepare_key r2, r3 | ||
245 | .Lcbcdecloop3x: | ||
246 | subs r4, r4, #3 | ||
247 | bmi .Lcbcdec1x | ||
248 | vld1.8 {q0-q1}, [r1, :64]! | ||
249 | vld1.8 {q2}, [r1, :64]! | ||
250 | vmov q3, q0 | ||
251 | vmov q4, q1 | ||
252 | vmov q5, q2 | ||
253 | bl aes_decrypt_3x | ||
254 | veor q0, q0, q6 | ||
255 | veor q1, q1, q3 | ||
256 | veor q2, q2, q4 | ||
257 | vmov q6, q5 | ||
258 | vst1.8 {q0-q1}, [r0, :64]! | ||
259 | vst1.8 {q2}, [r0, :64]! | ||
260 | b .Lcbcdecloop3x | ||
261 | .Lcbcdec1x: | ||
262 | adds r4, r4, #3 | ||
263 | beq .Lcbcdecout | ||
264 | vmov q15, q14 @ preserve last round key | ||
265 | .Lcbcdecloop: | ||
266 | vld1.8 {q0}, [r1, :64]! @ get next ct block | ||
267 | veor q14, q15, q6 @ combine prev ct with last key | ||
268 | vmov q6, q0 | ||
269 | bl aes_decrypt | ||
270 | vst1.8 {q0}, [r0, :64]! | ||
271 | subs r4, r4, #1 | ||
272 | bne .Lcbcdecloop | ||
273 | .Lcbcdecout: | ||
274 | vst1.8 {q6}, [r5] @ keep iv in q6 | ||
275 | pop {r4-r6, pc} | ||
276 | ENDPROC(ce_aes_cbc_decrypt) | ||
277 | |||
278 | /* | ||
279 | * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, | ||
280 | * int blocks, u8 ctr[]) | ||
281 | */ | ||
282 | ENTRY(ce_aes_ctr_encrypt) | ||
283 | push {r4-r6, lr} | ||
284 | ldrd r4, r5, [sp, #16] | ||
285 | vld1.8 {q6}, [r5] @ load ctr | ||
286 | prepare_key r2, r3 | ||
287 | vmov r6, s27 @ keep swabbed ctr in r6 | ||
288 | rev r6, r6 | ||
289 | cmn r6, r4 @ 32 bit overflow? | ||
290 | bcs .Lctrloop | ||
291 | .Lctrloop3x: | ||
292 | subs r4, r4, #3 | ||
293 | bmi .Lctr1x | ||
294 | add r6, r6, #1 | ||
295 | vmov q0, q6 | ||
296 | vmov q1, q6 | ||
297 | rev ip, r6 | ||
298 | add r6, r6, #1 | ||
299 | vmov q2, q6 | ||
300 | vmov s7, ip | ||
301 | rev ip, r6 | ||
302 | add r6, r6, #1 | ||
303 | vmov s11, ip | ||
304 | vld1.8 {q3-q4}, [r1, :64]! | ||
305 | vld1.8 {q5}, [r1, :64]! | ||
306 | bl aes_encrypt_3x | ||
307 | veor q0, q0, q3 | ||
308 | veor q1, q1, q4 | ||
309 | veor q2, q2, q5 | ||
310 | rev ip, r6 | ||
311 | vst1.8 {q0-q1}, [r0, :64]! | ||
312 | vst1.8 {q2}, [r0, :64]! | ||
313 | vmov s27, ip | ||
314 | b .Lctrloop3x | ||
315 | .Lctr1x: | ||
316 | adds r4, r4, #3 | ||
317 | beq .Lctrout | ||
318 | .Lctrloop: | ||
319 | vmov q0, q6 | ||
320 | bl aes_encrypt | ||
321 | subs r4, r4, #1 | ||
322 | bmi .Lctrhalfblock @ blocks < 0 means 1/2 block | ||
323 | vld1.8 {q3}, [r1, :64]! | ||
324 | veor q3, q0, q3 | ||
325 | vst1.8 {q3}, [r0, :64]! | ||
326 | |||
327 | adds r6, r6, #1 @ increment BE ctr | ||
328 | rev ip, r6 | ||
329 | vmov s27, ip | ||
330 | bcs .Lctrcarry | ||
331 | teq r4, #0 | ||
332 | bne .Lctrloop | ||
333 | .Lctrout: | ||
334 | vst1.8 {q6}, [r5] | ||
335 | pop {r4-r6, pc} | ||
336 | |||
337 | .Lctrhalfblock: | ||
338 | vld1.8 {d1}, [r1, :64] | ||
339 | veor d0, d0, d1 | ||
340 | vst1.8 {d0}, [r0, :64] | ||
341 | pop {r4-r6, pc} | ||
342 | |||
343 | .Lctrcarry: | ||
344 | .irp sreg, s26, s25, s24 | ||
345 | vmov ip, \sreg @ load next word of ctr | ||
346 | rev ip, ip @ ... to handle the carry | ||
347 | adds ip, ip, #1 | ||
348 | rev ip, ip | ||
349 | vmov \sreg, ip | ||
350 | bcc 0f | ||
351 | .endr | ||
352 | 0: teq r4, #0 | ||
353 | beq .Lctrout | ||
354 | b .Lctrloop | ||
355 | ENDPROC(ce_aes_ctr_encrypt) | ||
356 | |||
357 | /* | ||
358 | * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, | ||
359 | * int blocks, u8 iv[], u8 const rk2[], int first) | ||
360 | * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, | ||
361 | * int blocks, u8 iv[], u8 const rk2[], int first) | ||
362 | */ | ||
363 | |||
364 | .macro next_tweak, out, in, const, tmp | ||
365 | vshr.s64 \tmp, \in, #63 | ||
366 | vand \tmp, \tmp, \const | ||
367 | vadd.u64 \out, \in, \in | ||
368 | vext.8 \tmp, \tmp, \tmp, #8 | ||
369 | veor \out, \out, \tmp | ||
370 | .endm | ||
371 | |||
372 | .align 3 | ||
373 | .Lxts_mul_x: | ||
374 | .quad 1, 0x87 | ||
375 | |||
376 | ce_aes_xts_init: | ||
377 | vldr d14, .Lxts_mul_x | ||
378 | vldr d15, .Lxts_mul_x + 8 | ||
379 | |||
380 | ldrd r4, r5, [sp, #16] @ load args | ||
381 | ldr r6, [sp, #28] | ||
382 | vld1.8 {q0}, [r5] @ load iv | ||
383 | teq r6, #1 @ start of a block? | ||
384 | bxne lr | ||
385 | |||
386 | @ Encrypt the IV in q0 with the second AES key. This should only | ||
387 | @ be done at the start of a block. | ||
388 | ldr r6, [sp, #24] @ load AES key 2 | ||
389 | prepare_key r6, r3 | ||
390 | add ip, r6, #32 @ 3rd round key of key 2 | ||
391 | b .Laes_encrypt_tweak @ tail call | ||
392 | ENDPROC(ce_aes_xts_init) | ||
393 | |||
394 | ENTRY(ce_aes_xts_encrypt) | ||
395 | push {r4-r6, lr} | ||
396 | |||
397 | bl ce_aes_xts_init @ run shared prologue | ||
398 | prepare_key r2, r3 | ||
399 | vmov q3, q0 | ||
400 | |||
401 | teq r6, #0 @ start of a block? | ||
402 | bne .Lxtsenc3x | ||
403 | |||
404 | .Lxtsencloop3x: | ||
405 | next_tweak q3, q3, q7, q6 | ||
406 | .Lxtsenc3x: | ||
407 | subs r4, r4, #3 | ||
408 | bmi .Lxtsenc1x | ||
409 | vld1.8 {q0-q1}, [r1, :64]! @ get 3 pt blocks | ||
410 | vld1.8 {q2}, [r1, :64]! | ||
411 | next_tweak q4, q3, q7, q6 | ||
412 | veor q0, q0, q3 | ||
413 | next_tweak q5, q4, q7, q6 | ||
414 | veor q1, q1, q4 | ||
415 | veor q2, q2, q5 | ||
416 | bl aes_encrypt_3x | ||
417 | veor q0, q0, q3 | ||
418 | veor q1, q1, q4 | ||
419 | veor q2, q2, q5 | ||
420 | vst1.8 {q0-q1}, [r0, :64]! @ write 3 ct blocks | ||
421 | vst1.8 {q2}, [r0, :64]! | ||
422 | vmov q3, q5 | ||
423 | teq r4, #0 | ||
424 | beq .Lxtsencout | ||
425 | b .Lxtsencloop3x | ||
426 | .Lxtsenc1x: | ||
427 | adds r4, r4, #3 | ||
428 | beq .Lxtsencout | ||
429 | .Lxtsencloop: | ||
430 | vld1.8 {q0}, [r1, :64]! | ||
431 | veor q0, q0, q3 | ||
432 | bl aes_encrypt | ||
433 | veor q0, q0, q3 | ||
434 | vst1.8 {q0}, [r0, :64]! | ||
435 | subs r4, r4, #1 | ||
436 | beq .Lxtsencout | ||
437 | next_tweak q3, q3, q7, q6 | ||
438 | b .Lxtsencloop | ||
439 | .Lxtsencout: | ||
440 | vst1.8 {q3}, [r5] | ||
441 | pop {r4-r6, pc} | ||
442 | ENDPROC(ce_aes_xts_encrypt) | ||
443 | |||
444 | |||
445 | ENTRY(ce_aes_xts_decrypt) | ||
446 | push {r4-r6, lr} | ||
447 | |||
448 | bl ce_aes_xts_init @ run shared prologue | ||
449 | prepare_key r2, r3 | ||
450 | vmov q3, q0 | ||
451 | |||
452 | teq r6, #0 @ start of a block? | ||
453 | bne .Lxtsdec3x | ||
454 | |||
455 | .Lxtsdecloop3x: | ||
456 | next_tweak q3, q3, q7, q6 | ||
457 | .Lxtsdec3x: | ||
458 | subs r4, r4, #3 | ||
459 | bmi .Lxtsdec1x | ||
460 | vld1.8 {q0-q1}, [r1, :64]! @ get 3 ct blocks | ||
461 | vld1.8 {q2}, [r1, :64]! | ||
462 | next_tweak q4, q3, q7, q6 | ||
463 | veor q0, q0, q3 | ||
464 | next_tweak q5, q4, q7, q6 | ||
465 | veor q1, q1, q4 | ||
466 | veor q2, q2, q5 | ||
467 | bl aes_decrypt_3x | ||
468 | veor q0, q0, q3 | ||
469 | veor q1, q1, q4 | ||
470 | veor q2, q2, q5 | ||
471 | vst1.8 {q0-q1}, [r0, :64]! @ write 3 pt blocks | ||
472 | vst1.8 {q2}, [r0, :64]! | ||
473 | vmov q3, q5 | ||
474 | teq r4, #0 | ||
475 | beq .Lxtsdecout | ||
476 | b .Lxtsdecloop3x | ||
477 | .Lxtsdec1x: | ||
478 | adds r4, r4, #3 | ||
479 | beq .Lxtsdecout | ||
480 | .Lxtsdecloop: | ||
481 | vld1.8 {q0}, [r1, :64]! | ||
482 | veor q0, q0, q3 | ||
483 | add ip, r2, #32 @ 3rd round key | ||
484 | bl aes_decrypt | ||
485 | veor q0, q0, q3 | ||
486 | vst1.8 {q0}, [r0, :64]! | ||
487 | subs r4, r4, #1 | ||
488 | beq .Lxtsdecout | ||
489 | next_tweak q3, q3, q7, q6 | ||
490 | b .Lxtsdecloop | ||
491 | .Lxtsdecout: | ||
492 | vst1.8 {q3}, [r5] | ||
493 | pop {r4-r6, pc} | ||
494 | ENDPROC(ce_aes_xts_decrypt) | ||
495 | |||
496 | /* | ||
497 | * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the | ||
498 | * AES sbox substitution on each byte in | ||
499 | * 'input' | ||
500 | */ | ||
501 | ENTRY(ce_aes_sub) | ||
502 | vdup.32 q1, r0 | ||
503 | veor q0, q0, q0 | ||
504 | aese.8 q0, q1 | ||
505 | vmov r0, s0 | ||
506 | bx lr | ||
507 | ENDPROC(ce_aes_sub) | ||
508 | |||
509 | /* | ||
510 | * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns | ||
511 | * operation on round key *src | ||
512 | */ | ||
513 | ENTRY(ce_aes_invert) | ||
514 | vld1.8 {q0}, [r1] | ||
515 | aesimc.8 q0, q0 | ||
516 | vst1.8 {q0}, [r0] | ||
517 | bx lr | ||
518 | ENDPROC(ce_aes_invert) | ||
diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c new file mode 100644 index 000000000000..b445a5d56f43 --- /dev/null +++ b/arch/arm/crypto/aes-ce-glue.c | |||
@@ -0,0 +1,524 @@ | |||
1 | /* | ||
2 | * aes-ce-glue.c - wrapper code for ARMv8 AES | ||
3 | * | ||
4 | * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <asm/hwcap.h> | ||
12 | #include <asm/neon.h> | ||
13 | #include <asm/hwcap.h> | ||
14 | #include <crypto/aes.h> | ||
15 | #include <crypto/ablk_helper.h> | ||
16 | #include <crypto/algapi.h> | ||
17 | #include <linux/module.h> | ||
18 | |||
19 | MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions"); | ||
20 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); | ||
21 | MODULE_LICENSE("GPL v2"); | ||
22 | |||
23 | /* defined in aes-ce-core.S */ | ||
24 | asmlinkage u32 ce_aes_sub(u32 input); | ||
25 | asmlinkage void ce_aes_invert(void *dst, void *src); | ||
26 | |||
27 | asmlinkage void ce_aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], | ||
28 | int rounds, int blocks); | ||
29 | asmlinkage void ce_aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], | ||
30 | int rounds, int blocks); | ||
31 | |||
32 | asmlinkage void ce_aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], | ||
33 | int rounds, int blocks, u8 iv[]); | ||
34 | asmlinkage void ce_aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], | ||
35 | int rounds, int blocks, u8 iv[]); | ||
36 | |||
37 | asmlinkage void ce_aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], | ||
38 | int rounds, int blocks, u8 ctr[]); | ||
39 | |||
40 | asmlinkage void ce_aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], | ||
41 | int rounds, int blocks, u8 iv[], | ||
42 | u8 const rk2[], int first); | ||
43 | asmlinkage void ce_aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], | ||
44 | int rounds, int blocks, u8 iv[], | ||
45 | u8 const rk2[], int first); | ||
46 | |||
47 | struct aes_block { | ||
48 | u8 b[AES_BLOCK_SIZE]; | ||
49 | }; | ||
50 | |||
51 | static int num_rounds(struct crypto_aes_ctx *ctx) | ||
52 | { | ||
53 | /* | ||
54 | * # of rounds specified by AES: | ||
55 | * 128 bit key 10 rounds | ||
56 | * 192 bit key 12 rounds | ||
57 | * 256 bit key 14 rounds | ||
58 | * => n byte key => 6 + (n/4) rounds | ||
59 | */ | ||
60 | return 6 + ctx->key_length / 4; | ||
61 | } | ||
62 | |||
63 | static int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, | ||
64 | unsigned int key_len) | ||
65 | { | ||
66 | /* | ||
67 | * The AES key schedule round constants | ||
68 | */ | ||
69 | static u8 const rcon[] = { | ||
70 | 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, | ||
71 | }; | ||
72 | |||
73 | u32 kwords = key_len / sizeof(u32); | ||
74 | struct aes_block *key_enc, *key_dec; | ||
75 | int i, j; | ||
76 | |||
77 | if (key_len != AES_KEYSIZE_128 && | ||
78 | key_len != AES_KEYSIZE_192 && | ||
79 | key_len != AES_KEYSIZE_256) | ||
80 | return -EINVAL; | ||
81 | |||
82 | memcpy(ctx->key_enc, in_key, key_len); | ||
83 | ctx->key_length = key_len; | ||
84 | |||
85 | kernel_neon_begin(); | ||
86 | for (i = 0; i < sizeof(rcon); i++) { | ||
87 | u32 *rki = ctx->key_enc + (i * kwords); | ||
88 | u32 *rko = rki + kwords; | ||
89 | |||
90 | rko[0] = ror32(ce_aes_sub(rki[kwords - 1]), 8); | ||
91 | rko[0] = rko[0] ^ rki[0] ^ rcon[i]; | ||
92 | rko[1] = rko[0] ^ rki[1]; | ||
93 | rko[2] = rko[1] ^ rki[2]; | ||
94 | rko[3] = rko[2] ^ rki[3]; | ||
95 | |||
96 | if (key_len == AES_KEYSIZE_192) { | ||
97 | if (i >= 7) | ||
98 | break; | ||
99 | rko[4] = rko[3] ^ rki[4]; | ||
100 | rko[5] = rko[4] ^ rki[5]; | ||
101 | } else if (key_len == AES_KEYSIZE_256) { | ||
102 | if (i >= 6) | ||
103 | break; | ||
104 | rko[4] = ce_aes_sub(rko[3]) ^ rki[4]; | ||
105 | rko[5] = rko[4] ^ rki[5]; | ||
106 | rko[6] = rko[5] ^ rki[6]; | ||
107 | rko[7] = rko[6] ^ rki[7]; | ||
108 | } | ||
109 | } | ||
110 | |||
111 | /* | ||
112 | * Generate the decryption keys for the Equivalent Inverse Cipher. | ||
113 | * This involves reversing the order of the round keys, and applying | ||
114 | * the Inverse Mix Columns transformation on all but the first and | ||
115 | * the last one. | ||
116 | */ | ||
117 | key_enc = (struct aes_block *)ctx->key_enc; | ||
118 | key_dec = (struct aes_block *)ctx->key_dec; | ||
119 | j = num_rounds(ctx); | ||
120 | |||
121 | key_dec[0] = key_enc[j]; | ||
122 | for (i = 1, j--; j > 0; i++, j--) | ||
123 | ce_aes_invert(key_dec + i, key_enc + j); | ||
124 | key_dec[i] = key_enc[0]; | ||
125 | |||
126 | kernel_neon_end(); | ||
127 | return 0; | ||
128 | } | ||
129 | |||
130 | static int ce_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key, | ||
131 | unsigned int key_len) | ||
132 | { | ||
133 | struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
134 | int ret; | ||
135 | |||
136 | ret = ce_aes_expandkey(ctx, in_key, key_len); | ||
137 | if (!ret) | ||
138 | return 0; | ||
139 | |||
140 | tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
141 | return -EINVAL; | ||
142 | } | ||
143 | |||
144 | struct crypto_aes_xts_ctx { | ||
145 | struct crypto_aes_ctx key1; | ||
146 | struct crypto_aes_ctx __aligned(8) key2; | ||
147 | }; | ||
148 | |||
149 | static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key, | ||
150 | unsigned int key_len) | ||
151 | { | ||
152 | struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm); | ||
153 | int ret; | ||
154 | |||
155 | ret = ce_aes_expandkey(&ctx->key1, in_key, key_len / 2); | ||
156 | if (!ret) | ||
157 | ret = ce_aes_expandkey(&ctx->key2, &in_key[key_len / 2], | ||
158 | key_len / 2); | ||
159 | if (!ret) | ||
160 | return 0; | ||
161 | |||
162 | tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
163 | return -EINVAL; | ||
164 | } | ||
165 | |||
166 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
167 | struct scatterlist *src, unsigned int nbytes) | ||
168 | { | ||
169 | struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
170 | struct blkcipher_walk walk; | ||
171 | unsigned int blocks; | ||
172 | int err; | ||
173 | |||
174 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
175 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
176 | err = blkcipher_walk_virt(desc, &walk); | ||
177 | |||
178 | kernel_neon_begin(); | ||
179 | while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { | ||
180 | ce_aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr, | ||
181 | (u8 *)ctx->key_enc, num_rounds(ctx), blocks); | ||
182 | err = blkcipher_walk_done(desc, &walk, | ||
183 | walk.nbytes % AES_BLOCK_SIZE); | ||
184 | } | ||
185 | kernel_neon_end(); | ||
186 | return err; | ||
187 | } | ||
188 | |||
189 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
190 | struct scatterlist *src, unsigned int nbytes) | ||
191 | { | ||
192 | struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
193 | struct blkcipher_walk walk; | ||
194 | unsigned int blocks; | ||
195 | int err; | ||
196 | |||
197 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
198 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
199 | err = blkcipher_walk_virt(desc, &walk); | ||
200 | |||
201 | kernel_neon_begin(); | ||
202 | while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { | ||
203 | ce_aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr, | ||
204 | (u8 *)ctx->key_dec, num_rounds(ctx), blocks); | ||
205 | err = blkcipher_walk_done(desc, &walk, | ||
206 | walk.nbytes % AES_BLOCK_SIZE); | ||
207 | } | ||
208 | kernel_neon_end(); | ||
209 | return err; | ||
210 | } | ||
211 | |||
212 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
213 | struct scatterlist *src, unsigned int nbytes) | ||
214 | { | ||
215 | struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
216 | struct blkcipher_walk walk; | ||
217 | unsigned int blocks; | ||
218 | int err; | ||
219 | |||
220 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
221 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
222 | err = blkcipher_walk_virt(desc, &walk); | ||
223 | |||
224 | kernel_neon_begin(); | ||
225 | while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { | ||
226 | ce_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, | ||
227 | (u8 *)ctx->key_enc, num_rounds(ctx), blocks, | ||
228 | walk.iv); | ||
229 | err = blkcipher_walk_done(desc, &walk, | ||
230 | walk.nbytes % AES_BLOCK_SIZE); | ||
231 | } | ||
232 | kernel_neon_end(); | ||
233 | return err; | ||
234 | } | ||
235 | |||
236 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
237 | struct scatterlist *src, unsigned int nbytes) | ||
238 | { | ||
239 | struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
240 | struct blkcipher_walk walk; | ||
241 | unsigned int blocks; | ||
242 | int err; | ||
243 | |||
244 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
245 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
246 | err = blkcipher_walk_virt(desc, &walk); | ||
247 | |||
248 | kernel_neon_begin(); | ||
249 | while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { | ||
250 | ce_aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, | ||
251 | (u8 *)ctx->key_dec, num_rounds(ctx), blocks, | ||
252 | walk.iv); | ||
253 | err = blkcipher_walk_done(desc, &walk, | ||
254 | walk.nbytes % AES_BLOCK_SIZE); | ||
255 | } | ||
256 | kernel_neon_end(); | ||
257 | return err; | ||
258 | } | ||
259 | |||
260 | static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
261 | struct scatterlist *src, unsigned int nbytes) | ||
262 | { | ||
263 | struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
264 | struct blkcipher_walk walk; | ||
265 | int err, blocks; | ||
266 | |||
267 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
268 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
269 | err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); | ||
270 | |||
271 | kernel_neon_begin(); | ||
272 | while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { | ||
273 | ce_aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, | ||
274 | (u8 *)ctx->key_enc, num_rounds(ctx), blocks, | ||
275 | walk.iv); | ||
276 | nbytes -= blocks * AES_BLOCK_SIZE; | ||
277 | if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE) | ||
278 | break; | ||
279 | err = blkcipher_walk_done(desc, &walk, | ||
280 | walk.nbytes % AES_BLOCK_SIZE); | ||
281 | } | ||
282 | if (nbytes) { | ||
283 | u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; | ||
284 | u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; | ||
285 | u8 __aligned(8) tail[AES_BLOCK_SIZE]; | ||
286 | |||
287 | /* | ||
288 | * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need | ||
289 | * to tell aes_ctr_encrypt() to only read half a block. | ||
290 | */ | ||
291 | blocks = (nbytes <= 8) ? -1 : 1; | ||
292 | |||
293 | ce_aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, | ||
294 | num_rounds(ctx), blocks, walk.iv); | ||
295 | memcpy(tdst, tail, nbytes); | ||
296 | err = blkcipher_walk_done(desc, &walk, 0); | ||
297 | } | ||
298 | kernel_neon_end(); | ||
299 | |||
300 | return err; | ||
301 | } | ||
302 | |||
303 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
304 | struct scatterlist *src, unsigned int nbytes) | ||
305 | { | ||
306 | struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
307 | int err, first, rounds = num_rounds(&ctx->key1); | ||
308 | struct blkcipher_walk walk; | ||
309 | unsigned int blocks; | ||
310 | |||
311 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
312 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
313 | err = blkcipher_walk_virt(desc, &walk); | ||
314 | |||
315 | kernel_neon_begin(); | ||
316 | for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { | ||
317 | ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, | ||
318 | (u8 *)ctx->key1.key_enc, rounds, blocks, | ||
319 | walk.iv, (u8 *)ctx->key2.key_enc, first); | ||
320 | err = blkcipher_walk_done(desc, &walk, | ||
321 | walk.nbytes % AES_BLOCK_SIZE); | ||
322 | } | ||
323 | kernel_neon_end(); | ||
324 | |||
325 | return err; | ||
326 | } | ||
327 | |||
328 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
329 | struct scatterlist *src, unsigned int nbytes) | ||
330 | { | ||
331 | struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
332 | int err, first, rounds = num_rounds(&ctx->key1); | ||
333 | struct blkcipher_walk walk; | ||
334 | unsigned int blocks; | ||
335 | |||
336 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
337 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
338 | err = blkcipher_walk_virt(desc, &walk); | ||
339 | |||
340 | kernel_neon_begin(); | ||
341 | for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { | ||
342 | ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, | ||
343 | (u8 *)ctx->key1.key_dec, rounds, blocks, | ||
344 | walk.iv, (u8 *)ctx->key2.key_enc, first); | ||
345 | err = blkcipher_walk_done(desc, &walk, | ||
346 | walk.nbytes % AES_BLOCK_SIZE); | ||
347 | } | ||
348 | kernel_neon_end(); | ||
349 | |||
350 | return err; | ||
351 | } | ||
352 | |||
353 | static struct crypto_alg aes_algs[] = { { | ||
354 | .cra_name = "__ecb-aes-ce", | ||
355 | .cra_driver_name = "__driver-ecb-aes-ce", | ||
356 | .cra_priority = 0, | ||
357 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | | ||
358 | CRYPTO_ALG_INTERNAL, | ||
359 | .cra_blocksize = AES_BLOCK_SIZE, | ||
360 | .cra_ctxsize = sizeof(struct crypto_aes_ctx), | ||
361 | .cra_alignmask = 7, | ||
362 | .cra_type = &crypto_blkcipher_type, | ||
363 | .cra_module = THIS_MODULE, | ||
364 | .cra_blkcipher = { | ||
365 | .min_keysize = AES_MIN_KEY_SIZE, | ||
366 | .max_keysize = AES_MAX_KEY_SIZE, | ||
367 | .ivsize = AES_BLOCK_SIZE, | ||
368 | .setkey = ce_aes_setkey, | ||
369 | .encrypt = ecb_encrypt, | ||
370 | .decrypt = ecb_decrypt, | ||
371 | }, | ||
372 | }, { | ||
373 | .cra_name = "__cbc-aes-ce", | ||
374 | .cra_driver_name = "__driver-cbc-aes-ce", | ||
375 | .cra_priority = 0, | ||
376 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | | ||
377 | CRYPTO_ALG_INTERNAL, | ||
378 | .cra_blocksize = AES_BLOCK_SIZE, | ||
379 | .cra_ctxsize = sizeof(struct crypto_aes_ctx), | ||
380 | .cra_alignmask = 7, | ||
381 | .cra_type = &crypto_blkcipher_type, | ||
382 | .cra_module = THIS_MODULE, | ||
383 | .cra_blkcipher = { | ||
384 | .min_keysize = AES_MIN_KEY_SIZE, | ||
385 | .max_keysize = AES_MAX_KEY_SIZE, | ||
386 | .ivsize = AES_BLOCK_SIZE, | ||
387 | .setkey = ce_aes_setkey, | ||
388 | .encrypt = cbc_encrypt, | ||
389 | .decrypt = cbc_decrypt, | ||
390 | }, | ||
391 | }, { | ||
392 | .cra_name = "__ctr-aes-ce", | ||
393 | .cra_driver_name = "__driver-ctr-aes-ce", | ||
394 | .cra_priority = 0, | ||
395 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | | ||
396 | CRYPTO_ALG_INTERNAL, | ||
397 | .cra_blocksize = 1, | ||
398 | .cra_ctxsize = sizeof(struct crypto_aes_ctx), | ||
399 | .cra_alignmask = 7, | ||
400 | .cra_type = &crypto_blkcipher_type, | ||
401 | .cra_module = THIS_MODULE, | ||
402 | .cra_blkcipher = { | ||
403 | .min_keysize = AES_MIN_KEY_SIZE, | ||
404 | .max_keysize = AES_MAX_KEY_SIZE, | ||
405 | .ivsize = AES_BLOCK_SIZE, | ||
406 | .setkey = ce_aes_setkey, | ||
407 | .encrypt = ctr_encrypt, | ||
408 | .decrypt = ctr_encrypt, | ||
409 | }, | ||
410 | }, { | ||
411 | .cra_name = "__xts-aes-ce", | ||
412 | .cra_driver_name = "__driver-xts-aes-ce", | ||
413 | .cra_priority = 0, | ||
414 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | | ||
415 | CRYPTO_ALG_INTERNAL, | ||
416 | .cra_blocksize = AES_BLOCK_SIZE, | ||
417 | .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), | ||
418 | .cra_alignmask = 7, | ||
419 | .cra_type = &crypto_blkcipher_type, | ||
420 | .cra_module = THIS_MODULE, | ||
421 | .cra_blkcipher = { | ||
422 | .min_keysize = 2 * AES_MIN_KEY_SIZE, | ||
423 | .max_keysize = 2 * AES_MAX_KEY_SIZE, | ||
424 | .ivsize = AES_BLOCK_SIZE, | ||
425 | .setkey = xts_set_key, | ||
426 | .encrypt = xts_encrypt, | ||
427 | .decrypt = xts_decrypt, | ||
428 | }, | ||
429 | }, { | ||
430 | .cra_name = "ecb(aes)", | ||
431 | .cra_driver_name = "ecb-aes-ce", | ||
432 | .cra_priority = 300, | ||
433 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, | ||
434 | .cra_blocksize = AES_BLOCK_SIZE, | ||
435 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
436 | .cra_alignmask = 7, | ||
437 | .cra_type = &crypto_ablkcipher_type, | ||
438 | .cra_module = THIS_MODULE, | ||
439 | .cra_init = ablk_init, | ||
440 | .cra_exit = ablk_exit, | ||
441 | .cra_ablkcipher = { | ||
442 | .min_keysize = AES_MIN_KEY_SIZE, | ||
443 | .max_keysize = AES_MAX_KEY_SIZE, | ||
444 | .ivsize = AES_BLOCK_SIZE, | ||
445 | .setkey = ablk_set_key, | ||
446 | .encrypt = ablk_encrypt, | ||
447 | .decrypt = ablk_decrypt, | ||
448 | } | ||
449 | }, { | ||
450 | .cra_name = "cbc(aes)", | ||
451 | .cra_driver_name = "cbc-aes-ce", | ||
452 | .cra_priority = 300, | ||
453 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, | ||
454 | .cra_blocksize = AES_BLOCK_SIZE, | ||
455 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
456 | .cra_alignmask = 7, | ||
457 | .cra_type = &crypto_ablkcipher_type, | ||
458 | .cra_module = THIS_MODULE, | ||
459 | .cra_init = ablk_init, | ||
460 | .cra_exit = ablk_exit, | ||
461 | .cra_ablkcipher = { | ||
462 | .min_keysize = AES_MIN_KEY_SIZE, | ||
463 | .max_keysize = AES_MAX_KEY_SIZE, | ||
464 | .ivsize = AES_BLOCK_SIZE, | ||
465 | .setkey = ablk_set_key, | ||
466 | .encrypt = ablk_encrypt, | ||
467 | .decrypt = ablk_decrypt, | ||
468 | } | ||
469 | }, { | ||
470 | .cra_name = "ctr(aes)", | ||
471 | .cra_driver_name = "ctr-aes-ce", | ||
472 | .cra_priority = 300, | ||
473 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, | ||
474 | .cra_blocksize = 1, | ||
475 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
476 | .cra_alignmask = 7, | ||
477 | .cra_type = &crypto_ablkcipher_type, | ||
478 | .cra_module = THIS_MODULE, | ||
479 | .cra_init = ablk_init, | ||
480 | .cra_exit = ablk_exit, | ||
481 | .cra_ablkcipher = { | ||
482 | .min_keysize = AES_MIN_KEY_SIZE, | ||
483 | .max_keysize = AES_MAX_KEY_SIZE, | ||
484 | .ivsize = AES_BLOCK_SIZE, | ||
485 | .setkey = ablk_set_key, | ||
486 | .encrypt = ablk_encrypt, | ||
487 | .decrypt = ablk_decrypt, | ||
488 | } | ||
489 | }, { | ||
490 | .cra_name = "xts(aes)", | ||
491 | .cra_driver_name = "xts-aes-ce", | ||
492 | .cra_priority = 300, | ||
493 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, | ||
494 | .cra_blocksize = AES_BLOCK_SIZE, | ||
495 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
496 | .cra_alignmask = 7, | ||
497 | .cra_type = &crypto_ablkcipher_type, | ||
498 | .cra_module = THIS_MODULE, | ||
499 | .cra_init = ablk_init, | ||
500 | .cra_exit = ablk_exit, | ||
501 | .cra_ablkcipher = { | ||
502 | .min_keysize = 2 * AES_MIN_KEY_SIZE, | ||
503 | .max_keysize = 2 * AES_MAX_KEY_SIZE, | ||
504 | .ivsize = AES_BLOCK_SIZE, | ||
505 | .setkey = ablk_set_key, | ||
506 | .encrypt = ablk_encrypt, | ||
507 | .decrypt = ablk_decrypt, | ||
508 | } | ||
509 | } }; | ||
510 | |||
511 | static int __init aes_init(void) | ||
512 | { | ||
513 | if (!(elf_hwcap2 & HWCAP2_AES)) | ||
514 | return -ENODEV; | ||
515 | return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs)); | ||
516 | } | ||
517 | |||
518 | static void __exit aes_exit(void) | ||
519 | { | ||
520 | crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs)); | ||
521 | } | ||
522 | |||
523 | module_init(aes_init); | ||
524 | module_exit(aes_exit); | ||
diff --git a/arch/arm/crypto/aesbs-glue.c b/arch/arm/crypto/aesbs-glue.c index 15468fbbdea3..6d685298690e 100644 --- a/arch/arm/crypto/aesbs-glue.c +++ b/arch/arm/crypto/aesbs-glue.c | |||
@@ -301,7 +301,8 @@ static struct crypto_alg aesbs_algs[] = { { | |||
301 | .cra_name = "__cbc-aes-neonbs", | 301 | .cra_name = "__cbc-aes-neonbs", |
302 | .cra_driver_name = "__driver-cbc-aes-neonbs", | 302 | .cra_driver_name = "__driver-cbc-aes-neonbs", |
303 | .cra_priority = 0, | 303 | .cra_priority = 0, |
304 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 304 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
305 | CRYPTO_ALG_INTERNAL, | ||
305 | .cra_blocksize = AES_BLOCK_SIZE, | 306 | .cra_blocksize = AES_BLOCK_SIZE, |
306 | .cra_ctxsize = sizeof(struct aesbs_cbc_ctx), | 307 | .cra_ctxsize = sizeof(struct aesbs_cbc_ctx), |
307 | .cra_alignmask = 7, | 308 | .cra_alignmask = 7, |
@@ -319,7 +320,8 @@ static struct crypto_alg aesbs_algs[] = { { | |||
319 | .cra_name = "__ctr-aes-neonbs", | 320 | .cra_name = "__ctr-aes-neonbs", |
320 | .cra_driver_name = "__driver-ctr-aes-neonbs", | 321 | .cra_driver_name = "__driver-ctr-aes-neonbs", |
321 | .cra_priority = 0, | 322 | .cra_priority = 0, |
322 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 323 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
324 | CRYPTO_ALG_INTERNAL, | ||
323 | .cra_blocksize = 1, | 325 | .cra_blocksize = 1, |
324 | .cra_ctxsize = sizeof(struct aesbs_ctr_ctx), | 326 | .cra_ctxsize = sizeof(struct aesbs_ctr_ctx), |
325 | .cra_alignmask = 7, | 327 | .cra_alignmask = 7, |
@@ -337,7 +339,8 @@ static struct crypto_alg aesbs_algs[] = { { | |||
337 | .cra_name = "__xts-aes-neonbs", | 339 | .cra_name = "__xts-aes-neonbs", |
338 | .cra_driver_name = "__driver-xts-aes-neonbs", | 340 | .cra_driver_name = "__driver-xts-aes-neonbs", |
339 | .cra_priority = 0, | 341 | .cra_priority = 0, |
340 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 342 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
343 | CRYPTO_ALG_INTERNAL, | ||
341 | .cra_blocksize = AES_BLOCK_SIZE, | 344 | .cra_blocksize = AES_BLOCK_SIZE, |
342 | .cra_ctxsize = sizeof(struct aesbs_xts_ctx), | 345 | .cra_ctxsize = sizeof(struct aesbs_xts_ctx), |
343 | .cra_alignmask = 7, | 346 | .cra_alignmask = 7, |
diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S new file mode 100644 index 000000000000..f6ab8bcc9efe --- /dev/null +++ b/arch/arm/crypto/ghash-ce-core.S | |||
@@ -0,0 +1,94 @@ | |||
1 | /* | ||
2 | * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions. | ||
3 | * | ||
4 | * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU General Public License version 2 as published | ||
8 | * by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <linux/linkage.h> | ||
12 | #include <asm/assembler.h> | ||
13 | |||
14 | SHASH .req q0 | ||
15 | SHASH2 .req q1 | ||
16 | T1 .req q2 | ||
17 | T2 .req q3 | ||
18 | MASK .req q4 | ||
19 | XL .req q5 | ||
20 | XM .req q6 | ||
21 | XH .req q7 | ||
22 | IN1 .req q7 | ||
23 | |||
24 | SHASH_L .req d0 | ||
25 | SHASH_H .req d1 | ||
26 | SHASH2_L .req d2 | ||
27 | T1_L .req d4 | ||
28 | MASK_L .req d8 | ||
29 | XL_L .req d10 | ||
30 | XL_H .req d11 | ||
31 | XM_L .req d12 | ||
32 | XM_H .req d13 | ||
33 | XH_L .req d14 | ||
34 | |||
35 | .text | ||
36 | .fpu crypto-neon-fp-armv8 | ||
37 | |||
38 | /* | ||
39 | * void pmull_ghash_update(int blocks, u64 dg[], const char *src, | ||
40 | * struct ghash_key const *k, const char *head) | ||
41 | */ | ||
42 | ENTRY(pmull_ghash_update) | ||
43 | vld1.64 {SHASH}, [r3] | ||
44 | vld1.64 {XL}, [r1] | ||
45 | vmov.i8 MASK, #0xe1 | ||
46 | vext.8 SHASH2, SHASH, SHASH, #8 | ||
47 | vshl.u64 MASK, MASK, #57 | ||
48 | veor SHASH2, SHASH2, SHASH | ||
49 | |||
50 | /* do the head block first, if supplied */ | ||
51 | ldr ip, [sp] | ||
52 | teq ip, #0 | ||
53 | beq 0f | ||
54 | vld1.64 {T1}, [ip] | ||
55 | teq r0, #0 | ||
56 | b 1f | ||
57 | |||
58 | 0: vld1.64 {T1}, [r2]! | ||
59 | subs r0, r0, #1 | ||
60 | |||
61 | 1: /* multiply XL by SHASH in GF(2^128) */ | ||
62 | #ifndef CONFIG_CPU_BIG_ENDIAN | ||
63 | vrev64.8 T1, T1 | ||
64 | #endif | ||
65 | vext.8 T2, XL, XL, #8 | ||
66 | vext.8 IN1, T1, T1, #8 | ||
67 | veor T1, T1, T2 | ||
68 | veor XL, XL, IN1 | ||
69 | |||
70 | vmull.p64 XH, SHASH_H, XL_H @ a1 * b1 | ||
71 | veor T1, T1, XL | ||
72 | vmull.p64 XL, SHASH_L, XL_L @ a0 * b0 | ||
73 | vmull.p64 XM, SHASH2_L, T1_L @ (a1 + a0)(b1 + b0) | ||
74 | |||
75 | vext.8 T1, XL, XH, #8 | ||
76 | veor T2, XL, XH | ||
77 | veor XM, XM, T1 | ||
78 | veor XM, XM, T2 | ||
79 | vmull.p64 T2, XL_L, MASK_L | ||
80 | |||
81 | vmov XH_L, XM_H | ||
82 | vmov XM_H, XL_L | ||
83 | |||
84 | veor XL, XM, T2 | ||
85 | vext.8 T2, XL, XL, #8 | ||
86 | vmull.p64 XL, XL_L, MASK_L | ||
87 | veor T2, T2, XH | ||
88 | veor XL, XL, T2 | ||
89 | |||
90 | bne 0b | ||
91 | |||
92 | vst1.64 {XL}, [r1] | ||
93 | bx lr | ||
94 | ENDPROC(pmull_ghash_update) | ||
diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c new file mode 100644 index 000000000000..03a39fe29246 --- /dev/null +++ b/arch/arm/crypto/ghash-ce-glue.c | |||
@@ -0,0 +1,320 @@ | |||
1 | /* | ||
2 | * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions. | ||
3 | * | ||
4 | * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU General Public License version 2 as published | ||
8 | * by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <asm/hwcap.h> | ||
12 | #include <asm/neon.h> | ||
13 | #include <asm/simd.h> | ||
14 | #include <asm/unaligned.h> | ||
15 | #include <crypto/cryptd.h> | ||
16 | #include <crypto/internal/hash.h> | ||
17 | #include <crypto/gf128mul.h> | ||
18 | #include <linux/crypto.h> | ||
19 | #include <linux/module.h> | ||
20 | |||
21 | MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions"); | ||
22 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); | ||
23 | MODULE_LICENSE("GPL v2"); | ||
24 | |||
25 | #define GHASH_BLOCK_SIZE 16 | ||
26 | #define GHASH_DIGEST_SIZE 16 | ||
27 | |||
28 | struct ghash_key { | ||
29 | u64 a; | ||
30 | u64 b; | ||
31 | }; | ||
32 | |||
33 | struct ghash_desc_ctx { | ||
34 | u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)]; | ||
35 | u8 buf[GHASH_BLOCK_SIZE]; | ||
36 | u32 count; | ||
37 | }; | ||
38 | |||
39 | struct ghash_async_ctx { | ||
40 | struct cryptd_ahash *cryptd_tfm; | ||
41 | }; | ||
42 | |||
43 | asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src, | ||
44 | struct ghash_key const *k, const char *head); | ||
45 | |||
46 | static int ghash_init(struct shash_desc *desc) | ||
47 | { | ||
48 | struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); | ||
49 | |||
50 | *ctx = (struct ghash_desc_ctx){}; | ||
51 | return 0; | ||
52 | } | ||
53 | |||
54 | static int ghash_update(struct shash_desc *desc, const u8 *src, | ||
55 | unsigned int len) | ||
56 | { | ||
57 | struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); | ||
58 | unsigned int partial = ctx->count % GHASH_BLOCK_SIZE; | ||
59 | |||
60 | ctx->count += len; | ||
61 | |||
62 | if ((partial + len) >= GHASH_BLOCK_SIZE) { | ||
63 | struct ghash_key *key = crypto_shash_ctx(desc->tfm); | ||
64 | int blocks; | ||
65 | |||
66 | if (partial) { | ||
67 | int p = GHASH_BLOCK_SIZE - partial; | ||
68 | |||
69 | memcpy(ctx->buf + partial, src, p); | ||
70 | src += p; | ||
71 | len -= p; | ||
72 | } | ||
73 | |||
74 | blocks = len / GHASH_BLOCK_SIZE; | ||
75 | len %= GHASH_BLOCK_SIZE; | ||
76 | |||
77 | kernel_neon_begin(); | ||
78 | pmull_ghash_update(blocks, ctx->digest, src, key, | ||
79 | partial ? ctx->buf : NULL); | ||
80 | kernel_neon_end(); | ||
81 | src += blocks * GHASH_BLOCK_SIZE; | ||
82 | partial = 0; | ||
83 | } | ||
84 | if (len) | ||
85 | memcpy(ctx->buf + partial, src, len); | ||
86 | return 0; | ||
87 | } | ||
88 | |||
89 | static int ghash_final(struct shash_desc *desc, u8 *dst) | ||
90 | { | ||
91 | struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); | ||
92 | unsigned int partial = ctx->count % GHASH_BLOCK_SIZE; | ||
93 | |||
94 | if (partial) { | ||
95 | struct ghash_key *key = crypto_shash_ctx(desc->tfm); | ||
96 | |||
97 | memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial); | ||
98 | kernel_neon_begin(); | ||
99 | pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL); | ||
100 | kernel_neon_end(); | ||
101 | } | ||
102 | put_unaligned_be64(ctx->digest[1], dst); | ||
103 | put_unaligned_be64(ctx->digest[0], dst + 8); | ||
104 | |||
105 | *ctx = (struct ghash_desc_ctx){}; | ||
106 | return 0; | ||
107 | } | ||
108 | |||
109 | static int ghash_setkey(struct crypto_shash *tfm, | ||
110 | const u8 *inkey, unsigned int keylen) | ||
111 | { | ||
112 | struct ghash_key *key = crypto_shash_ctx(tfm); | ||
113 | u64 a, b; | ||
114 | |||
115 | if (keylen != GHASH_BLOCK_SIZE) { | ||
116 | crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); | ||
117 | return -EINVAL; | ||
118 | } | ||
119 | |||
120 | /* perform multiplication by 'x' in GF(2^128) */ | ||
121 | b = get_unaligned_be64(inkey); | ||
122 | a = get_unaligned_be64(inkey + 8); | ||
123 | |||
124 | key->a = (a << 1) | (b >> 63); | ||
125 | key->b = (b << 1) | (a >> 63); | ||
126 | |||
127 | if (b >> 63) | ||
128 | key->b ^= 0xc200000000000000UL; | ||
129 | |||
130 | return 0; | ||
131 | } | ||
132 | |||
133 | static struct shash_alg ghash_alg = { | ||
134 | .digestsize = GHASH_DIGEST_SIZE, | ||
135 | .init = ghash_init, | ||
136 | .update = ghash_update, | ||
137 | .final = ghash_final, | ||
138 | .setkey = ghash_setkey, | ||
139 | .descsize = sizeof(struct ghash_desc_ctx), | ||
140 | .base = { | ||
141 | .cra_name = "ghash", | ||
142 | .cra_driver_name = "__driver-ghash-ce", | ||
143 | .cra_priority = 0, | ||
144 | .cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_INTERNAL, | ||
145 | .cra_blocksize = GHASH_BLOCK_SIZE, | ||
146 | .cra_ctxsize = sizeof(struct ghash_key), | ||
147 | .cra_module = THIS_MODULE, | ||
148 | }, | ||
149 | }; | ||
150 | |||
151 | static int ghash_async_init(struct ahash_request *req) | ||
152 | { | ||
153 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
154 | struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); | ||
155 | struct ahash_request *cryptd_req = ahash_request_ctx(req); | ||
156 | struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; | ||
157 | |||
158 | if (!may_use_simd()) { | ||
159 | memcpy(cryptd_req, req, sizeof(*req)); | ||
160 | ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); | ||
161 | return crypto_ahash_init(cryptd_req); | ||
162 | } else { | ||
163 | struct shash_desc *desc = cryptd_shash_desc(cryptd_req); | ||
164 | struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); | ||
165 | |||
166 | desc->tfm = child; | ||
167 | desc->flags = req->base.flags; | ||
168 | return crypto_shash_init(desc); | ||
169 | } | ||
170 | } | ||
171 | |||
172 | static int ghash_async_update(struct ahash_request *req) | ||
173 | { | ||
174 | struct ahash_request *cryptd_req = ahash_request_ctx(req); | ||
175 | |||
176 | if (!may_use_simd()) { | ||
177 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
178 | struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); | ||
179 | struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; | ||
180 | |||
181 | memcpy(cryptd_req, req, sizeof(*req)); | ||
182 | ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); | ||
183 | return crypto_ahash_update(cryptd_req); | ||
184 | } else { | ||
185 | struct shash_desc *desc = cryptd_shash_desc(cryptd_req); | ||
186 | return shash_ahash_update(req, desc); | ||
187 | } | ||
188 | } | ||
189 | |||
190 | static int ghash_async_final(struct ahash_request *req) | ||
191 | { | ||
192 | struct ahash_request *cryptd_req = ahash_request_ctx(req); | ||
193 | |||
194 | if (!may_use_simd()) { | ||
195 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
196 | struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); | ||
197 | struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; | ||
198 | |||
199 | memcpy(cryptd_req, req, sizeof(*req)); | ||
200 | ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); | ||
201 | return crypto_ahash_final(cryptd_req); | ||
202 | } else { | ||
203 | struct shash_desc *desc = cryptd_shash_desc(cryptd_req); | ||
204 | return crypto_shash_final(desc, req->result); | ||
205 | } | ||
206 | } | ||
207 | |||
208 | static int ghash_async_digest(struct ahash_request *req) | ||
209 | { | ||
210 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); | ||
211 | struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); | ||
212 | struct ahash_request *cryptd_req = ahash_request_ctx(req); | ||
213 | struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; | ||
214 | |||
215 | if (!may_use_simd()) { | ||
216 | memcpy(cryptd_req, req, sizeof(*req)); | ||
217 | ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); | ||
218 | return crypto_ahash_digest(cryptd_req); | ||
219 | } else { | ||
220 | struct shash_desc *desc = cryptd_shash_desc(cryptd_req); | ||
221 | struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); | ||
222 | |||
223 | desc->tfm = child; | ||
224 | desc->flags = req->base.flags; | ||
225 | return shash_ahash_digest(req, desc); | ||
226 | } | ||
227 | } | ||
228 | |||
229 | static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key, | ||
230 | unsigned int keylen) | ||
231 | { | ||
232 | struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); | ||
233 | struct crypto_ahash *child = &ctx->cryptd_tfm->base; | ||
234 | int err; | ||
235 | |||
236 | crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK); | ||
237 | crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm) | ||
238 | & CRYPTO_TFM_REQ_MASK); | ||
239 | err = crypto_ahash_setkey(child, key, keylen); | ||
240 | crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child) | ||
241 | & CRYPTO_TFM_RES_MASK); | ||
242 | |||
243 | return err; | ||
244 | } | ||
245 | |||
246 | static int ghash_async_init_tfm(struct crypto_tfm *tfm) | ||
247 | { | ||
248 | struct cryptd_ahash *cryptd_tfm; | ||
249 | struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); | ||
250 | |||
251 | cryptd_tfm = cryptd_alloc_ahash("__driver-ghash-ce", | ||
252 | CRYPTO_ALG_INTERNAL, | ||
253 | CRYPTO_ALG_INTERNAL); | ||
254 | if (IS_ERR(cryptd_tfm)) | ||
255 | return PTR_ERR(cryptd_tfm); | ||
256 | ctx->cryptd_tfm = cryptd_tfm; | ||
257 | crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), | ||
258 | sizeof(struct ahash_request) + | ||
259 | crypto_ahash_reqsize(&cryptd_tfm->base)); | ||
260 | |||
261 | return 0; | ||
262 | } | ||
263 | |||
264 | static void ghash_async_exit_tfm(struct crypto_tfm *tfm) | ||
265 | { | ||
266 | struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); | ||
267 | |||
268 | cryptd_free_ahash(ctx->cryptd_tfm); | ||
269 | } | ||
270 | |||
271 | static struct ahash_alg ghash_async_alg = { | ||
272 | .init = ghash_async_init, | ||
273 | .update = ghash_async_update, | ||
274 | .final = ghash_async_final, | ||
275 | .setkey = ghash_async_setkey, | ||
276 | .digest = ghash_async_digest, | ||
277 | .halg.digestsize = GHASH_DIGEST_SIZE, | ||
278 | .halg.base = { | ||
279 | .cra_name = "ghash", | ||
280 | .cra_driver_name = "ghash-ce", | ||
281 | .cra_priority = 300, | ||
282 | .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC, | ||
283 | .cra_blocksize = GHASH_BLOCK_SIZE, | ||
284 | .cra_type = &crypto_ahash_type, | ||
285 | .cra_ctxsize = sizeof(struct ghash_async_ctx), | ||
286 | .cra_module = THIS_MODULE, | ||
287 | .cra_init = ghash_async_init_tfm, | ||
288 | .cra_exit = ghash_async_exit_tfm, | ||
289 | }, | ||
290 | }; | ||
291 | |||
292 | static int __init ghash_ce_mod_init(void) | ||
293 | { | ||
294 | int err; | ||
295 | |||
296 | if (!(elf_hwcap2 & HWCAP2_PMULL)) | ||
297 | return -ENODEV; | ||
298 | |||
299 | err = crypto_register_shash(&ghash_alg); | ||
300 | if (err) | ||
301 | return err; | ||
302 | err = crypto_register_ahash(&ghash_async_alg); | ||
303 | if (err) | ||
304 | goto err_shash; | ||
305 | |||
306 | return 0; | ||
307 | |||
308 | err_shash: | ||
309 | crypto_unregister_shash(&ghash_alg); | ||
310 | return err; | ||
311 | } | ||
312 | |||
313 | static void __exit ghash_ce_mod_exit(void) | ||
314 | { | ||
315 | crypto_unregister_ahash(&ghash_async_alg); | ||
316 | crypto_unregister_shash(&ghash_alg); | ||
317 | } | ||
318 | |||
319 | module_init(ghash_ce_mod_init); | ||
320 | module_exit(ghash_ce_mod_exit); | ||
diff --git a/arch/arm/crypto/sha1-ce-core.S b/arch/arm/crypto/sha1-ce-core.S new file mode 100644 index 000000000000..b623f51ccbcf --- /dev/null +++ b/arch/arm/crypto/sha1-ce-core.S | |||
@@ -0,0 +1,125 @@ | |||
1 | /* | ||
2 | * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions | ||
3 | * | ||
4 | * Copyright (C) 2015 Linaro Ltd. | ||
5 | * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | */ | ||
11 | |||
12 | #include <linux/linkage.h> | ||
13 | #include <asm/assembler.h> | ||
14 | |||
15 | .text | ||
16 | .fpu crypto-neon-fp-armv8 | ||
17 | |||
18 | k0 .req q0 | ||
19 | k1 .req q1 | ||
20 | k2 .req q2 | ||
21 | k3 .req q3 | ||
22 | |||
23 | ta0 .req q4 | ||
24 | ta1 .req q5 | ||
25 | tb0 .req q5 | ||
26 | tb1 .req q4 | ||
27 | |||
28 | dga .req q6 | ||
29 | dgb .req q7 | ||
30 | dgbs .req s28 | ||
31 | |||
32 | dg0 .req q12 | ||
33 | dg1a0 .req q13 | ||
34 | dg1a1 .req q14 | ||
35 | dg1b0 .req q14 | ||
36 | dg1b1 .req q13 | ||
37 | |||
38 | .macro add_only, op, ev, rc, s0, dg1 | ||
39 | .ifnb \s0 | ||
40 | vadd.u32 tb\ev, q\s0, \rc | ||
41 | .endif | ||
42 | sha1h.32 dg1b\ev, dg0 | ||
43 | .ifb \dg1 | ||
44 | sha1\op\().32 dg0, dg1a\ev, ta\ev | ||
45 | .else | ||
46 | sha1\op\().32 dg0, \dg1, ta\ev | ||
47 | .endif | ||
48 | .endm | ||
49 | |||
50 | .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1 | ||
51 | sha1su0.32 q\s0, q\s1, q\s2 | ||
52 | add_only \op, \ev, \rc, \s1, \dg1 | ||
53 | sha1su1.32 q\s0, q\s3 | ||
54 | .endm | ||
55 | |||
56 | .align 6 | ||
57 | .Lsha1_rcon: | ||
58 | .word 0x5a827999, 0x5a827999, 0x5a827999, 0x5a827999 | ||
59 | .word 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1 | ||
60 | .word 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc | ||
61 | .word 0xca62c1d6, 0xca62c1d6, 0xca62c1d6, 0xca62c1d6 | ||
62 | |||
63 | /* | ||
64 | * void sha1_ce_transform(struct sha1_state *sst, u8 const *src, | ||
65 | * int blocks); | ||
66 | */ | ||
67 | ENTRY(sha1_ce_transform) | ||
68 | /* load round constants */ | ||
69 | adr ip, .Lsha1_rcon | ||
70 | vld1.32 {k0-k1}, [ip, :128]! | ||
71 | vld1.32 {k2-k3}, [ip, :128] | ||
72 | |||
73 | /* load state */ | ||
74 | vld1.32 {dga}, [r0] | ||
75 | vldr dgbs, [r0, #16] | ||
76 | |||
77 | /* load input */ | ||
78 | 0: vld1.32 {q8-q9}, [r1]! | ||
79 | vld1.32 {q10-q11}, [r1]! | ||
80 | subs r2, r2, #1 | ||
81 | |||
82 | #ifndef CONFIG_CPU_BIG_ENDIAN | ||
83 | vrev32.8 q8, q8 | ||
84 | vrev32.8 q9, q9 | ||
85 | vrev32.8 q10, q10 | ||
86 | vrev32.8 q11, q11 | ||
87 | #endif | ||
88 | |||
89 | vadd.u32 ta0, q8, k0 | ||
90 | vmov dg0, dga | ||
91 | |||
92 | add_update c, 0, k0, 8, 9, 10, 11, dgb | ||
93 | add_update c, 1, k0, 9, 10, 11, 8 | ||
94 | add_update c, 0, k0, 10, 11, 8, 9 | ||
95 | add_update c, 1, k0, 11, 8, 9, 10 | ||
96 | add_update c, 0, k1, 8, 9, 10, 11 | ||
97 | |||
98 | add_update p, 1, k1, 9, 10, 11, 8 | ||
99 | add_update p, 0, k1, 10, 11, 8, 9 | ||
100 | add_update p, 1, k1, 11, 8, 9, 10 | ||
101 | add_update p, 0, k1, 8, 9, 10, 11 | ||
102 | add_update p, 1, k2, 9, 10, 11, 8 | ||
103 | |||
104 | add_update m, 0, k2, 10, 11, 8, 9 | ||
105 | add_update m, 1, k2, 11, 8, 9, 10 | ||
106 | add_update m, 0, k2, 8, 9, 10, 11 | ||
107 | add_update m, 1, k2, 9, 10, 11, 8 | ||
108 | add_update m, 0, k3, 10, 11, 8, 9 | ||
109 | |||
110 | add_update p, 1, k3, 11, 8, 9, 10 | ||
111 | add_only p, 0, k3, 9 | ||
112 | add_only p, 1, k3, 10 | ||
113 | add_only p, 0, k3, 11 | ||
114 | add_only p, 1 | ||
115 | |||
116 | /* update state */ | ||
117 | vadd.u32 dga, dga, dg0 | ||
118 | vadd.u32 dgb, dgb, dg1a0 | ||
119 | bne 0b | ||
120 | |||
121 | /* store new state */ | ||
122 | vst1.32 {dga}, [r0] | ||
123 | vstr dgbs, [r0, #16] | ||
124 | bx lr | ||
125 | ENDPROC(sha1_ce_transform) | ||
diff --git a/arch/arm/crypto/sha1-ce-glue.c b/arch/arm/crypto/sha1-ce-glue.c new file mode 100644 index 000000000000..80bc2fcd241a --- /dev/null +++ b/arch/arm/crypto/sha1-ce-glue.c | |||
@@ -0,0 +1,96 @@ | |||
1 | /* | ||
2 | * sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions | ||
3 | * | ||
4 | * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <crypto/internal/hash.h> | ||
12 | #include <crypto/sha.h> | ||
13 | #include <crypto/sha1_base.h> | ||
14 | #include <linux/crypto.h> | ||
15 | #include <linux/module.h> | ||
16 | |||
17 | #include <asm/hwcap.h> | ||
18 | #include <asm/neon.h> | ||
19 | #include <asm/simd.h> | ||
20 | |||
21 | #include "sha1.h" | ||
22 | |||
23 | MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); | ||
24 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); | ||
25 | MODULE_LICENSE("GPL v2"); | ||
26 | |||
27 | asmlinkage void sha1_ce_transform(struct sha1_state *sst, u8 const *src, | ||
28 | int blocks); | ||
29 | |||
30 | static int sha1_ce_update(struct shash_desc *desc, const u8 *data, | ||
31 | unsigned int len) | ||
32 | { | ||
33 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
34 | |||
35 | if (!may_use_simd() || | ||
36 | (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE) | ||
37 | return sha1_update_arm(desc, data, len); | ||
38 | |||
39 | kernel_neon_begin(); | ||
40 | sha1_base_do_update(desc, data, len, sha1_ce_transform); | ||
41 | kernel_neon_end(); | ||
42 | |||
43 | return 0; | ||
44 | } | ||
45 | |||
46 | static int sha1_ce_finup(struct shash_desc *desc, const u8 *data, | ||
47 | unsigned int len, u8 *out) | ||
48 | { | ||
49 | if (!may_use_simd()) | ||
50 | return sha1_finup_arm(desc, data, len, out); | ||
51 | |||
52 | kernel_neon_begin(); | ||
53 | if (len) | ||
54 | sha1_base_do_update(desc, data, len, sha1_ce_transform); | ||
55 | sha1_base_do_finalize(desc, sha1_ce_transform); | ||
56 | kernel_neon_end(); | ||
57 | |||
58 | return sha1_base_finish(desc, out); | ||
59 | } | ||
60 | |||
61 | static int sha1_ce_final(struct shash_desc *desc, u8 *out) | ||
62 | { | ||
63 | return sha1_ce_finup(desc, NULL, 0, out); | ||
64 | } | ||
65 | |||
66 | static struct shash_alg alg = { | ||
67 | .init = sha1_base_init, | ||
68 | .update = sha1_ce_update, | ||
69 | .final = sha1_ce_final, | ||
70 | .finup = sha1_ce_finup, | ||
71 | .descsize = sizeof(struct sha1_state), | ||
72 | .digestsize = SHA1_DIGEST_SIZE, | ||
73 | .base = { | ||
74 | .cra_name = "sha1", | ||
75 | .cra_driver_name = "sha1-ce", | ||
76 | .cra_priority = 200, | ||
77 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
78 | .cra_blocksize = SHA1_BLOCK_SIZE, | ||
79 | .cra_module = THIS_MODULE, | ||
80 | } | ||
81 | }; | ||
82 | |||
83 | static int __init sha1_ce_mod_init(void) | ||
84 | { | ||
85 | if (!(elf_hwcap2 & HWCAP2_SHA1)) | ||
86 | return -ENODEV; | ||
87 | return crypto_register_shash(&alg); | ||
88 | } | ||
89 | |||
90 | static void __exit sha1_ce_mod_fini(void) | ||
91 | { | ||
92 | crypto_unregister_shash(&alg); | ||
93 | } | ||
94 | |||
95 | module_init(sha1_ce_mod_init); | ||
96 | module_exit(sha1_ce_mod_fini); | ||
diff --git a/arch/arm/include/asm/crypto/sha1.h b/arch/arm/crypto/sha1.h index 75e6a417416b..ffd8bd08b1a7 100644 --- a/arch/arm/include/asm/crypto/sha1.h +++ b/arch/arm/crypto/sha1.h | |||
@@ -7,4 +7,7 @@ | |||
7 | extern int sha1_update_arm(struct shash_desc *desc, const u8 *data, | 7 | extern int sha1_update_arm(struct shash_desc *desc, const u8 *data, |
8 | unsigned int len); | 8 | unsigned int len); |
9 | 9 | ||
10 | extern int sha1_finup_arm(struct shash_desc *desc, const u8 *data, | ||
11 | unsigned int len, u8 *out); | ||
12 | |||
10 | #endif | 13 | #endif |
diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c index e31b0440c613..6fc73bf8766d 100644 --- a/arch/arm/crypto/sha1_glue.c +++ b/arch/arm/crypto/sha1_glue.c | |||
@@ -22,127 +22,47 @@ | |||
22 | #include <linux/cryptohash.h> | 22 | #include <linux/cryptohash.h> |
23 | #include <linux/types.h> | 23 | #include <linux/types.h> |
24 | #include <crypto/sha.h> | 24 | #include <crypto/sha.h> |
25 | #include <crypto/sha1_base.h> | ||
25 | #include <asm/byteorder.h> | 26 | #include <asm/byteorder.h> |
26 | #include <asm/crypto/sha1.h> | ||
27 | 27 | ||
28 | #include "sha1.h" | ||
28 | 29 | ||
29 | asmlinkage void sha1_block_data_order(u32 *digest, | 30 | asmlinkage void sha1_block_data_order(u32 *digest, |
30 | const unsigned char *data, unsigned int rounds); | 31 | const unsigned char *data, unsigned int rounds); |
31 | 32 | ||
32 | |||
33 | static int sha1_init(struct shash_desc *desc) | ||
34 | { | ||
35 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
36 | |||
37 | *sctx = (struct sha1_state){ | ||
38 | .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, | ||
39 | }; | ||
40 | |||
41 | return 0; | ||
42 | } | ||
43 | |||
44 | |||
45 | static int __sha1_update(struct sha1_state *sctx, const u8 *data, | ||
46 | unsigned int len, unsigned int partial) | ||
47 | { | ||
48 | unsigned int done = 0; | ||
49 | |||
50 | sctx->count += len; | ||
51 | |||
52 | if (partial) { | ||
53 | done = SHA1_BLOCK_SIZE - partial; | ||
54 | memcpy(sctx->buffer + partial, data, done); | ||
55 | sha1_block_data_order(sctx->state, sctx->buffer, 1); | ||
56 | } | ||
57 | |||
58 | if (len - done >= SHA1_BLOCK_SIZE) { | ||
59 | const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE; | ||
60 | sha1_block_data_order(sctx->state, data + done, rounds); | ||
61 | done += rounds * SHA1_BLOCK_SIZE; | ||
62 | } | ||
63 | |||
64 | memcpy(sctx->buffer, data + done, len - done); | ||
65 | return 0; | ||
66 | } | ||
67 | |||
68 | |||
69 | int sha1_update_arm(struct shash_desc *desc, const u8 *data, | 33 | int sha1_update_arm(struct shash_desc *desc, const u8 *data, |
70 | unsigned int len) | 34 | unsigned int len) |
71 | { | 35 | { |
72 | struct sha1_state *sctx = shash_desc_ctx(desc); | 36 | /* make sure casting to sha1_block_fn() is safe */ |
73 | unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; | 37 | BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0); |
74 | int res; | ||
75 | 38 | ||
76 | /* Handle the fast case right here */ | 39 | return sha1_base_do_update(desc, data, len, |
77 | if (partial + len < SHA1_BLOCK_SIZE) { | 40 | (sha1_block_fn *)sha1_block_data_order); |
78 | sctx->count += len; | ||
79 | memcpy(sctx->buffer + partial, data, len); | ||
80 | return 0; | ||
81 | } | ||
82 | res = __sha1_update(sctx, data, len, partial); | ||
83 | return res; | ||
84 | } | 41 | } |
85 | EXPORT_SYMBOL_GPL(sha1_update_arm); | 42 | EXPORT_SYMBOL_GPL(sha1_update_arm); |
86 | 43 | ||
87 | |||
88 | /* Add padding and return the message digest. */ | ||
89 | static int sha1_final(struct shash_desc *desc, u8 *out) | 44 | static int sha1_final(struct shash_desc *desc, u8 *out) |
90 | { | 45 | { |
91 | struct sha1_state *sctx = shash_desc_ctx(desc); | 46 | sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_block_data_order); |
92 | unsigned int i, index, padlen; | 47 | return sha1_base_finish(desc, out); |
93 | __be32 *dst = (__be32 *)out; | ||
94 | __be64 bits; | ||
95 | static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; | ||
96 | |||
97 | bits = cpu_to_be64(sctx->count << 3); | ||
98 | |||
99 | /* Pad out to 56 mod 64 and append length */ | ||
100 | index = sctx->count % SHA1_BLOCK_SIZE; | ||
101 | padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index); | ||
102 | /* We need to fill a whole block for __sha1_update() */ | ||
103 | if (padlen <= 56) { | ||
104 | sctx->count += padlen; | ||
105 | memcpy(sctx->buffer + index, padding, padlen); | ||
106 | } else { | ||
107 | __sha1_update(sctx, padding, padlen, index); | ||
108 | } | ||
109 | __sha1_update(sctx, (const u8 *)&bits, sizeof(bits), 56); | ||
110 | |||
111 | /* Store state in digest */ | ||
112 | for (i = 0; i < 5; i++) | ||
113 | dst[i] = cpu_to_be32(sctx->state[i]); | ||
114 | |||
115 | /* Wipe context */ | ||
116 | memset(sctx, 0, sizeof(*sctx)); | ||
117 | return 0; | ||
118 | } | 48 | } |
119 | 49 | ||
120 | 50 | int sha1_finup_arm(struct shash_desc *desc, const u8 *data, | |
121 | static int sha1_export(struct shash_desc *desc, void *out) | 51 | unsigned int len, u8 *out) |
122 | { | 52 | { |
123 | struct sha1_state *sctx = shash_desc_ctx(desc); | 53 | sha1_base_do_update(desc, data, len, |
124 | memcpy(out, sctx, sizeof(*sctx)); | 54 | (sha1_block_fn *)sha1_block_data_order); |
125 | return 0; | 55 | return sha1_final(desc, out); |
126 | } | 56 | } |
127 | 57 | EXPORT_SYMBOL_GPL(sha1_finup_arm); | |
128 | |||
129 | static int sha1_import(struct shash_desc *desc, const void *in) | ||
130 | { | ||
131 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
132 | memcpy(sctx, in, sizeof(*sctx)); | ||
133 | return 0; | ||
134 | } | ||
135 | |||
136 | 58 | ||
137 | static struct shash_alg alg = { | 59 | static struct shash_alg alg = { |
138 | .digestsize = SHA1_DIGEST_SIZE, | 60 | .digestsize = SHA1_DIGEST_SIZE, |
139 | .init = sha1_init, | 61 | .init = sha1_base_init, |
140 | .update = sha1_update_arm, | 62 | .update = sha1_update_arm, |
141 | .final = sha1_final, | 63 | .final = sha1_final, |
142 | .export = sha1_export, | 64 | .finup = sha1_finup_arm, |
143 | .import = sha1_import, | ||
144 | .descsize = sizeof(struct sha1_state), | 65 | .descsize = sizeof(struct sha1_state), |
145 | .statesize = sizeof(struct sha1_state), | ||
146 | .base = { | 66 | .base = { |
147 | .cra_name = "sha1", | 67 | .cra_name = "sha1", |
148 | .cra_driver_name= "sha1-asm", | 68 | .cra_driver_name= "sha1-asm", |
diff --git a/arch/arm/crypto/sha1_neon_glue.c b/arch/arm/crypto/sha1_neon_glue.c index 0b0083757d47..4e22f122f966 100644 --- a/arch/arm/crypto/sha1_neon_glue.c +++ b/arch/arm/crypto/sha1_neon_glue.c | |||
@@ -25,147 +25,60 @@ | |||
25 | #include <linux/cryptohash.h> | 25 | #include <linux/cryptohash.h> |
26 | #include <linux/types.h> | 26 | #include <linux/types.h> |
27 | #include <crypto/sha.h> | 27 | #include <crypto/sha.h> |
28 | #include <asm/byteorder.h> | 28 | #include <crypto/sha1_base.h> |
29 | #include <asm/neon.h> | 29 | #include <asm/neon.h> |
30 | #include <asm/simd.h> | 30 | #include <asm/simd.h> |
31 | #include <asm/crypto/sha1.h> | ||
32 | 31 | ||
32 | #include "sha1.h" | ||
33 | 33 | ||
34 | asmlinkage void sha1_transform_neon(void *state_h, const char *data, | 34 | asmlinkage void sha1_transform_neon(void *state_h, const char *data, |
35 | unsigned int rounds); | 35 | unsigned int rounds); |
36 | 36 | ||
37 | |||
38 | static int sha1_neon_init(struct shash_desc *desc) | ||
39 | { | ||
40 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
41 | |||
42 | *sctx = (struct sha1_state){ | ||
43 | .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, | ||
44 | }; | ||
45 | |||
46 | return 0; | ||
47 | } | ||
48 | |||
49 | static int __sha1_neon_update(struct shash_desc *desc, const u8 *data, | ||
50 | unsigned int len, unsigned int partial) | ||
51 | { | ||
52 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
53 | unsigned int done = 0; | ||
54 | |||
55 | sctx->count += len; | ||
56 | |||
57 | if (partial) { | ||
58 | done = SHA1_BLOCK_SIZE - partial; | ||
59 | memcpy(sctx->buffer + partial, data, done); | ||
60 | sha1_transform_neon(sctx->state, sctx->buffer, 1); | ||
61 | } | ||
62 | |||
63 | if (len - done >= SHA1_BLOCK_SIZE) { | ||
64 | const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE; | ||
65 | |||
66 | sha1_transform_neon(sctx->state, data + done, rounds); | ||
67 | done += rounds * SHA1_BLOCK_SIZE; | ||
68 | } | ||
69 | |||
70 | memcpy(sctx->buffer, data + done, len - done); | ||
71 | |||
72 | return 0; | ||
73 | } | ||
74 | |||
75 | static int sha1_neon_update(struct shash_desc *desc, const u8 *data, | 37 | static int sha1_neon_update(struct shash_desc *desc, const u8 *data, |
76 | unsigned int len) | 38 | unsigned int len) |
77 | { | 39 | { |
78 | struct sha1_state *sctx = shash_desc_ctx(desc); | 40 | struct sha1_state *sctx = shash_desc_ctx(desc); |
79 | unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; | ||
80 | int res; | ||
81 | 41 | ||
82 | /* Handle the fast case right here */ | 42 | if (!may_use_simd() || |
83 | if (partial + len < SHA1_BLOCK_SIZE) { | 43 | (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE) |
84 | sctx->count += len; | 44 | return sha1_update_arm(desc, data, len); |
85 | memcpy(sctx->buffer + partial, data, len); | ||
86 | 45 | ||
87 | return 0; | 46 | kernel_neon_begin(); |
88 | } | 47 | sha1_base_do_update(desc, data, len, |
89 | 48 | (sha1_block_fn *)sha1_transform_neon); | |
90 | if (!may_use_simd()) { | 49 | kernel_neon_end(); |
91 | res = sha1_update_arm(desc, data, len); | ||
92 | } else { | ||
93 | kernel_neon_begin(); | ||
94 | res = __sha1_neon_update(desc, data, len, partial); | ||
95 | kernel_neon_end(); | ||
96 | } | ||
97 | |||
98 | return res; | ||
99 | } | ||
100 | |||
101 | |||
102 | /* Add padding and return the message digest. */ | ||
103 | static int sha1_neon_final(struct shash_desc *desc, u8 *out) | ||
104 | { | ||
105 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
106 | unsigned int i, index, padlen; | ||
107 | __be32 *dst = (__be32 *)out; | ||
108 | __be64 bits; | ||
109 | static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; | ||
110 | |||
111 | bits = cpu_to_be64(sctx->count << 3); | ||
112 | |||
113 | /* Pad out to 56 mod 64 and append length */ | ||
114 | index = sctx->count % SHA1_BLOCK_SIZE; | ||
115 | padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index); | ||
116 | if (!may_use_simd()) { | ||
117 | sha1_update_arm(desc, padding, padlen); | ||
118 | sha1_update_arm(desc, (const u8 *)&bits, sizeof(bits)); | ||
119 | } else { | ||
120 | kernel_neon_begin(); | ||
121 | /* We need to fill a whole block for __sha1_neon_update() */ | ||
122 | if (padlen <= 56) { | ||
123 | sctx->count += padlen; | ||
124 | memcpy(sctx->buffer + index, padding, padlen); | ||
125 | } else { | ||
126 | __sha1_neon_update(desc, padding, padlen, index); | ||
127 | } | ||
128 | __sha1_neon_update(desc, (const u8 *)&bits, sizeof(bits), 56); | ||
129 | kernel_neon_end(); | ||
130 | } | ||
131 | |||
132 | /* Store state in digest */ | ||
133 | for (i = 0; i < 5; i++) | ||
134 | dst[i] = cpu_to_be32(sctx->state[i]); | ||
135 | |||
136 | /* Wipe context */ | ||
137 | memset(sctx, 0, sizeof(*sctx)); | ||
138 | 50 | ||
139 | return 0; | 51 | return 0; |
140 | } | 52 | } |
141 | 53 | ||
142 | static int sha1_neon_export(struct shash_desc *desc, void *out) | 54 | static int sha1_neon_finup(struct shash_desc *desc, const u8 *data, |
55 | unsigned int len, u8 *out) | ||
143 | { | 56 | { |
144 | struct sha1_state *sctx = shash_desc_ctx(desc); | 57 | if (!may_use_simd()) |
58 | return sha1_finup_arm(desc, data, len, out); | ||
145 | 59 | ||
146 | memcpy(out, sctx, sizeof(*sctx)); | 60 | kernel_neon_begin(); |
61 | if (len) | ||
62 | sha1_base_do_update(desc, data, len, | ||
63 | (sha1_block_fn *)sha1_transform_neon); | ||
64 | sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_transform_neon); | ||
65 | kernel_neon_end(); | ||
147 | 66 | ||
148 | return 0; | 67 | return sha1_base_finish(desc, out); |
149 | } | 68 | } |
150 | 69 | ||
151 | static int sha1_neon_import(struct shash_desc *desc, const void *in) | 70 | static int sha1_neon_final(struct shash_desc *desc, u8 *out) |
152 | { | 71 | { |
153 | struct sha1_state *sctx = shash_desc_ctx(desc); | 72 | return sha1_neon_finup(desc, NULL, 0, out); |
154 | |||
155 | memcpy(sctx, in, sizeof(*sctx)); | ||
156 | |||
157 | return 0; | ||
158 | } | 73 | } |
159 | 74 | ||
160 | static struct shash_alg alg = { | 75 | static struct shash_alg alg = { |
161 | .digestsize = SHA1_DIGEST_SIZE, | 76 | .digestsize = SHA1_DIGEST_SIZE, |
162 | .init = sha1_neon_init, | 77 | .init = sha1_base_init, |
163 | .update = sha1_neon_update, | 78 | .update = sha1_neon_update, |
164 | .final = sha1_neon_final, | 79 | .final = sha1_neon_final, |
165 | .export = sha1_neon_export, | 80 | .finup = sha1_neon_finup, |
166 | .import = sha1_neon_import, | ||
167 | .descsize = sizeof(struct sha1_state), | 81 | .descsize = sizeof(struct sha1_state), |
168 | .statesize = sizeof(struct sha1_state), | ||
169 | .base = { | 82 | .base = { |
170 | .cra_name = "sha1", | 83 | .cra_name = "sha1", |
171 | .cra_driver_name = "sha1-neon", | 84 | .cra_driver_name = "sha1-neon", |
diff --git a/arch/arm/crypto/sha2-ce-core.S b/arch/arm/crypto/sha2-ce-core.S new file mode 100644 index 000000000000..87ec11a5f405 --- /dev/null +++ b/arch/arm/crypto/sha2-ce-core.S | |||
@@ -0,0 +1,125 @@ | |||
1 | /* | ||
2 | * sha2-ce-core.S - SHA-224/256 secure hash using ARMv8 Crypto Extensions | ||
3 | * | ||
4 | * Copyright (C) 2015 Linaro Ltd. | ||
5 | * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | */ | ||
11 | |||
12 | #include <linux/linkage.h> | ||
13 | #include <asm/assembler.h> | ||
14 | |||
15 | .text | ||
16 | .fpu crypto-neon-fp-armv8 | ||
17 | |||
18 | k0 .req q7 | ||
19 | k1 .req q8 | ||
20 | rk .req r3 | ||
21 | |||
22 | ta0 .req q9 | ||
23 | ta1 .req q10 | ||
24 | tb0 .req q10 | ||
25 | tb1 .req q9 | ||
26 | |||
27 | dga .req q11 | ||
28 | dgb .req q12 | ||
29 | |||
30 | dg0 .req q13 | ||
31 | dg1 .req q14 | ||
32 | dg2 .req q15 | ||
33 | |||
34 | .macro add_only, ev, s0 | ||
35 | vmov dg2, dg0 | ||
36 | .ifnb \s0 | ||
37 | vld1.32 {k\ev}, [rk, :128]! | ||
38 | .endif | ||
39 | sha256h.32 dg0, dg1, tb\ev | ||
40 | sha256h2.32 dg1, dg2, tb\ev | ||
41 | .ifnb \s0 | ||
42 | vadd.u32 ta\ev, q\s0, k\ev | ||
43 | .endif | ||
44 | .endm | ||
45 | |||
46 | .macro add_update, ev, s0, s1, s2, s3 | ||
47 | sha256su0.32 q\s0, q\s1 | ||
48 | add_only \ev, \s1 | ||
49 | sha256su1.32 q\s0, q\s2, q\s3 | ||
50 | .endm | ||
51 | |||
52 | .align 6 | ||
53 | .Lsha256_rcon: | ||
54 | .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 | ||
55 | .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 | ||
56 | .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 | ||
57 | .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 | ||
58 | .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc | ||
59 | .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da | ||
60 | .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 | ||
61 | .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 | ||
62 | .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 | ||
63 | .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 | ||
64 | .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 | ||
65 | .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 | ||
66 | .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 | ||
67 | .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 | ||
68 | .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 | ||
69 | .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 | ||
70 | |||
71 | /* | ||
72 | * void sha2_ce_transform(struct sha256_state *sst, u8 const *src, | ||
73 | int blocks); | ||
74 | */ | ||
75 | ENTRY(sha2_ce_transform) | ||
76 | /* load state */ | ||
77 | vld1.32 {dga-dgb}, [r0] | ||
78 | |||
79 | /* load input */ | ||
80 | 0: vld1.32 {q0-q1}, [r1]! | ||
81 | vld1.32 {q2-q3}, [r1]! | ||
82 | subs r2, r2, #1 | ||
83 | |||
84 | #ifndef CONFIG_CPU_BIG_ENDIAN | ||
85 | vrev32.8 q0, q0 | ||
86 | vrev32.8 q1, q1 | ||
87 | vrev32.8 q2, q2 | ||
88 | vrev32.8 q3, q3 | ||
89 | #endif | ||
90 | |||
91 | /* load first round constant */ | ||
92 | adr rk, .Lsha256_rcon | ||
93 | vld1.32 {k0}, [rk, :128]! | ||
94 | |||
95 | vadd.u32 ta0, q0, k0 | ||
96 | vmov dg0, dga | ||
97 | vmov dg1, dgb | ||
98 | |||
99 | add_update 1, 0, 1, 2, 3 | ||
100 | add_update 0, 1, 2, 3, 0 | ||
101 | add_update 1, 2, 3, 0, 1 | ||
102 | add_update 0, 3, 0, 1, 2 | ||
103 | add_update 1, 0, 1, 2, 3 | ||
104 | add_update 0, 1, 2, 3, 0 | ||
105 | add_update 1, 2, 3, 0, 1 | ||
106 | add_update 0, 3, 0, 1, 2 | ||
107 | add_update 1, 0, 1, 2, 3 | ||
108 | add_update 0, 1, 2, 3, 0 | ||
109 | add_update 1, 2, 3, 0, 1 | ||
110 | add_update 0, 3, 0, 1, 2 | ||
111 | |||
112 | add_only 1, 1 | ||
113 | add_only 0, 2 | ||
114 | add_only 1, 3 | ||
115 | add_only 0 | ||
116 | |||
117 | /* update state */ | ||
118 | vadd.u32 dga, dga, dg0 | ||
119 | vadd.u32 dgb, dgb, dg1 | ||
120 | bne 0b | ||
121 | |||
122 | /* store new state */ | ||
123 | vst1.32 {dga-dgb}, [r0] | ||
124 | bx lr | ||
125 | ENDPROC(sha2_ce_transform) | ||
diff --git a/arch/arm/crypto/sha2-ce-glue.c b/arch/arm/crypto/sha2-ce-glue.c new file mode 100644 index 000000000000..0755b2d657f3 --- /dev/null +++ b/arch/arm/crypto/sha2-ce-glue.c | |||
@@ -0,0 +1,114 @@ | |||
1 | /* | ||
2 | * sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions | ||
3 | * | ||
4 | * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <crypto/internal/hash.h> | ||
12 | #include <crypto/sha.h> | ||
13 | #include <crypto/sha256_base.h> | ||
14 | #include <linux/crypto.h> | ||
15 | #include <linux/module.h> | ||
16 | |||
17 | #include <asm/hwcap.h> | ||
18 | #include <asm/simd.h> | ||
19 | #include <asm/neon.h> | ||
20 | #include <asm/unaligned.h> | ||
21 | |||
22 | #include "sha256_glue.h" | ||
23 | |||
24 | MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions"); | ||
25 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); | ||
26 | MODULE_LICENSE("GPL v2"); | ||
27 | |||
28 | asmlinkage void sha2_ce_transform(struct sha256_state *sst, u8 const *src, | ||
29 | int blocks); | ||
30 | |||
31 | static int sha2_ce_update(struct shash_desc *desc, const u8 *data, | ||
32 | unsigned int len) | ||
33 | { | ||
34 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
35 | |||
36 | if (!may_use_simd() || | ||
37 | (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE) | ||
38 | return crypto_sha256_arm_update(desc, data, len); | ||
39 | |||
40 | kernel_neon_begin(); | ||
41 | sha256_base_do_update(desc, data, len, | ||
42 | (sha256_block_fn *)sha2_ce_transform); | ||
43 | kernel_neon_end(); | ||
44 | |||
45 | return 0; | ||
46 | } | ||
47 | |||
48 | static int sha2_ce_finup(struct shash_desc *desc, const u8 *data, | ||
49 | unsigned int len, u8 *out) | ||
50 | { | ||
51 | if (!may_use_simd()) | ||
52 | return crypto_sha256_arm_finup(desc, data, len, out); | ||
53 | |||
54 | kernel_neon_begin(); | ||
55 | if (len) | ||
56 | sha256_base_do_update(desc, data, len, | ||
57 | (sha256_block_fn *)sha2_ce_transform); | ||
58 | sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform); | ||
59 | kernel_neon_end(); | ||
60 | |||
61 | return sha256_base_finish(desc, out); | ||
62 | } | ||
63 | |||
64 | static int sha2_ce_final(struct shash_desc *desc, u8 *out) | ||
65 | { | ||
66 | return sha2_ce_finup(desc, NULL, 0, out); | ||
67 | } | ||
68 | |||
69 | static struct shash_alg algs[] = { { | ||
70 | .init = sha224_base_init, | ||
71 | .update = sha2_ce_update, | ||
72 | .final = sha2_ce_final, | ||
73 | .finup = sha2_ce_finup, | ||
74 | .descsize = sizeof(struct sha256_state), | ||
75 | .digestsize = SHA224_DIGEST_SIZE, | ||
76 | .base = { | ||
77 | .cra_name = "sha224", | ||
78 | .cra_driver_name = "sha224-ce", | ||
79 | .cra_priority = 300, | ||
80 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
81 | .cra_blocksize = SHA256_BLOCK_SIZE, | ||
82 | .cra_module = THIS_MODULE, | ||
83 | } | ||
84 | }, { | ||
85 | .init = sha256_base_init, | ||
86 | .update = sha2_ce_update, | ||
87 | .final = sha2_ce_final, | ||
88 | .finup = sha2_ce_finup, | ||
89 | .descsize = sizeof(struct sha256_state), | ||
90 | .digestsize = SHA256_DIGEST_SIZE, | ||
91 | .base = { | ||
92 | .cra_name = "sha256", | ||
93 | .cra_driver_name = "sha256-ce", | ||
94 | .cra_priority = 300, | ||
95 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
96 | .cra_blocksize = SHA256_BLOCK_SIZE, | ||
97 | .cra_module = THIS_MODULE, | ||
98 | } | ||
99 | } }; | ||
100 | |||
101 | static int __init sha2_ce_mod_init(void) | ||
102 | { | ||
103 | if (!(elf_hwcap2 & HWCAP2_SHA2)) | ||
104 | return -ENODEV; | ||
105 | return crypto_register_shashes(algs, ARRAY_SIZE(algs)); | ||
106 | } | ||
107 | |||
108 | static void __exit sha2_ce_mod_fini(void) | ||
109 | { | ||
110 | crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); | ||
111 | } | ||
112 | |||
113 | module_init(sha2_ce_mod_init); | ||
114 | module_exit(sha2_ce_mod_fini); | ||
diff --git a/arch/arm/crypto/sha256-armv4.pl b/arch/arm/crypto/sha256-armv4.pl new file mode 100644 index 000000000000..fac0533ea633 --- /dev/null +++ b/arch/arm/crypto/sha256-armv4.pl | |||
@@ -0,0 +1,716 @@ | |||
1 | #!/usr/bin/env perl | ||
2 | |||
3 | # ==================================================================== | ||
4 | # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL | ||
5 | # project. The module is, however, dual licensed under OpenSSL and | ||
6 | # CRYPTOGAMS licenses depending on where you obtain it. For further | ||
7 | # details see http://www.openssl.org/~appro/cryptogams/. | ||
8 | # | ||
9 | # Permission to use under GPL terms is granted. | ||
10 | # ==================================================================== | ||
11 | |||
12 | # SHA256 block procedure for ARMv4. May 2007. | ||
13 | |||
14 | # Performance is ~2x better than gcc 3.4 generated code and in "abso- | ||
15 | # lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per | ||
16 | # byte [on single-issue Xscale PXA250 core]. | ||
17 | |||
18 | # July 2010. | ||
19 | # | ||
20 | # Rescheduling for dual-issue pipeline resulted in 22% improvement on | ||
21 | # Cortex A8 core and ~20 cycles per processed byte. | ||
22 | |||
23 | # February 2011. | ||
24 | # | ||
25 | # Profiler-assisted and platform-specific optimization resulted in 16% | ||
26 | # improvement on Cortex A8 core and ~15.4 cycles per processed byte. | ||
27 | |||
28 | # September 2013. | ||
29 | # | ||
30 | # Add NEON implementation. On Cortex A8 it was measured to process one | ||
31 | # byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon | ||
32 | # S4 does it in 12.5 cycles too, but it's 50% faster than integer-only | ||
33 | # code (meaning that latter performs sub-optimally, nothing was done | ||
34 | # about it). | ||
35 | |||
36 | # May 2014. | ||
37 | # | ||
38 | # Add ARMv8 code path performing at 2.0 cpb on Apple A7. | ||
39 | |||
40 | while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} | ||
41 | open STDOUT,">$output"; | ||
42 | |||
43 | $ctx="r0"; $t0="r0"; | ||
44 | $inp="r1"; $t4="r1"; | ||
45 | $len="r2"; $t1="r2"; | ||
46 | $T1="r3"; $t3="r3"; | ||
47 | $A="r4"; | ||
48 | $B="r5"; | ||
49 | $C="r6"; | ||
50 | $D="r7"; | ||
51 | $E="r8"; | ||
52 | $F="r9"; | ||
53 | $G="r10"; | ||
54 | $H="r11"; | ||
55 | @V=($A,$B,$C,$D,$E,$F,$G,$H); | ||
56 | $t2="r12"; | ||
57 | $Ktbl="r14"; | ||
58 | |||
59 | @Sigma0=( 2,13,22); | ||
60 | @Sigma1=( 6,11,25); | ||
61 | @sigma0=( 7,18, 3); | ||
62 | @sigma1=(17,19,10); | ||
63 | |||
64 | sub BODY_00_15 { | ||
65 | my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; | ||
66 | |||
67 | $code.=<<___ if ($i<16); | ||
68 | #if __ARM_ARCH__>=7 | ||
69 | @ ldr $t1,[$inp],#4 @ $i | ||
70 | # if $i==15 | ||
71 | str $inp,[sp,#17*4] @ make room for $t4 | ||
72 | # endif | ||
73 | eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` | ||
74 | add $a,$a,$t2 @ h+=Maj(a,b,c) from the past | ||
75 | eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) | ||
76 | # ifndef __ARMEB__ | ||
77 | rev $t1,$t1 | ||
78 | # endif | ||
79 | #else | ||
80 | @ ldrb $t1,[$inp,#3] @ $i | ||
81 | add $a,$a,$t2 @ h+=Maj(a,b,c) from the past | ||
82 | ldrb $t2,[$inp,#2] | ||
83 | ldrb $t0,[$inp,#1] | ||
84 | orr $t1,$t1,$t2,lsl#8 | ||
85 | ldrb $t2,[$inp],#4 | ||
86 | orr $t1,$t1,$t0,lsl#16 | ||
87 | # if $i==15 | ||
88 | str $inp,[sp,#17*4] @ make room for $t4 | ||
89 | # endif | ||
90 | eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` | ||
91 | orr $t1,$t1,$t2,lsl#24 | ||
92 | eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) | ||
93 | #endif | ||
94 | ___ | ||
95 | $code.=<<___; | ||
96 | ldr $t2,[$Ktbl],#4 @ *K256++ | ||
97 | add $h,$h,$t1 @ h+=X[i] | ||
98 | str $t1,[sp,#`$i%16`*4] | ||
99 | eor $t1,$f,$g | ||
100 | add $h,$h,$t0,ror#$Sigma1[0] @ h+=Sigma1(e) | ||
101 | and $t1,$t1,$e | ||
102 | add $h,$h,$t2 @ h+=K256[i] | ||
103 | eor $t1,$t1,$g @ Ch(e,f,g) | ||
104 | eor $t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]` | ||
105 | add $h,$h,$t1 @ h+=Ch(e,f,g) | ||
106 | #if $i==31 | ||
107 | and $t2,$t2,#0xff | ||
108 | cmp $t2,#0xf2 @ done? | ||
109 | #endif | ||
110 | #if $i<15 | ||
111 | # if __ARM_ARCH__>=7 | ||
112 | ldr $t1,[$inp],#4 @ prefetch | ||
113 | # else | ||
114 | ldrb $t1,[$inp,#3] | ||
115 | # endif | ||
116 | eor $t2,$a,$b @ a^b, b^c in next round | ||
117 | #else | ||
118 | ldr $t1,[sp,#`($i+2)%16`*4] @ from future BODY_16_xx | ||
119 | eor $t2,$a,$b @ a^b, b^c in next round | ||
120 | ldr $t4,[sp,#`($i+15)%16`*4] @ from future BODY_16_xx | ||
121 | #endif | ||
122 | eor $t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]` @ Sigma0(a) | ||
123 | and $t3,$t3,$t2 @ (b^c)&=(a^b) | ||
124 | add $d,$d,$h @ d+=h | ||
125 | eor $t3,$t3,$b @ Maj(a,b,c) | ||
126 | add $h,$h,$t0,ror#$Sigma0[0] @ h+=Sigma0(a) | ||
127 | @ add $h,$h,$t3 @ h+=Maj(a,b,c) | ||
128 | ___ | ||
129 | ($t2,$t3)=($t3,$t2); | ||
130 | } | ||
131 | |||
132 | sub BODY_16_XX { | ||
133 | my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; | ||
134 | |||
135 | $code.=<<___; | ||
136 | @ ldr $t1,[sp,#`($i+1)%16`*4] @ $i | ||
137 | @ ldr $t4,[sp,#`($i+14)%16`*4] | ||
138 | mov $t0,$t1,ror#$sigma0[0] | ||
139 | add $a,$a,$t2 @ h+=Maj(a,b,c) from the past | ||
140 | mov $t2,$t4,ror#$sigma1[0] | ||
141 | eor $t0,$t0,$t1,ror#$sigma0[1] | ||
142 | eor $t2,$t2,$t4,ror#$sigma1[1] | ||
143 | eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1]) | ||
144 | ldr $t1,[sp,#`($i+0)%16`*4] | ||
145 | eor $t2,$t2,$t4,lsr#$sigma1[2] @ sigma1(X[i+14]) | ||
146 | ldr $t4,[sp,#`($i+9)%16`*4] | ||
147 | |||
148 | add $t2,$t2,$t0 | ||
149 | eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` @ from BODY_00_15 | ||
150 | add $t1,$t1,$t2 | ||
151 | eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) | ||
152 | add $t1,$t1,$t4 @ X[i] | ||
153 | ___ | ||
154 | &BODY_00_15(@_); | ||
155 | } | ||
156 | |||
157 | $code=<<___; | ||
158 | #ifndef __KERNEL__ | ||
159 | # include "arm_arch.h" | ||
160 | #else | ||
161 | # define __ARM_ARCH__ __LINUX_ARM_ARCH__ | ||
162 | # define __ARM_MAX_ARCH__ 7 | ||
163 | #endif | ||
164 | |||
165 | .text | ||
166 | #if __ARM_ARCH__<7 | ||
167 | .code 32 | ||
168 | #else | ||
169 | .syntax unified | ||
170 | # ifdef __thumb2__ | ||
171 | # define adrl adr | ||
172 | .thumb | ||
173 | # else | ||
174 | .code 32 | ||
175 | # endif | ||
176 | #endif | ||
177 | |||
178 | .type K256,%object | ||
179 | .align 5 | ||
180 | K256: | ||
181 | .word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 | ||
182 | .word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 | ||
183 | .word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 | ||
184 | .word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 | ||
185 | .word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc | ||
186 | .word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da | ||
187 | .word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 | ||
188 | .word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 | ||
189 | .word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 | ||
190 | .word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 | ||
191 | .word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 | ||
192 | .word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 | ||
193 | .word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 | ||
194 | .word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 | ||
195 | .word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 | ||
196 | .word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 | ||
197 | .size K256,.-K256 | ||
198 | .word 0 @ terminator | ||
199 | #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) | ||
200 | .LOPENSSL_armcap: | ||
201 | .word OPENSSL_armcap_P-sha256_block_data_order | ||
202 | #endif | ||
203 | .align 5 | ||
204 | |||
205 | .global sha256_block_data_order | ||
206 | .type sha256_block_data_order,%function | ||
207 | sha256_block_data_order: | ||
208 | #if __ARM_ARCH__<7 | ||
209 | sub r3,pc,#8 @ sha256_block_data_order | ||
210 | #else | ||
211 | adr r3,sha256_block_data_order | ||
212 | #endif | ||
213 | #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) | ||
214 | ldr r12,.LOPENSSL_armcap | ||
215 | ldr r12,[r3,r12] @ OPENSSL_armcap_P | ||
216 | tst r12,#ARMV8_SHA256 | ||
217 | bne .LARMv8 | ||
218 | tst r12,#ARMV7_NEON | ||
219 | bne .LNEON | ||
220 | #endif | ||
221 | add $len,$inp,$len,lsl#6 @ len to point at the end of inp | ||
222 | stmdb sp!,{$ctx,$inp,$len,r4-r11,lr} | ||
223 | ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H} | ||
224 | sub $Ktbl,r3,#256+32 @ K256 | ||
225 | sub sp,sp,#16*4 @ alloca(X[16]) | ||
226 | .Loop: | ||
227 | # if __ARM_ARCH__>=7 | ||
228 | ldr $t1,[$inp],#4 | ||
229 | # else | ||
230 | ldrb $t1,[$inp,#3] | ||
231 | # endif | ||
232 | eor $t3,$B,$C @ magic | ||
233 | eor $t2,$t2,$t2 | ||
234 | ___ | ||
235 | for($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); } | ||
236 | $code.=".Lrounds_16_xx:\n"; | ||
237 | for (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); } | ||
238 | $code.=<<___; | ||
239 | #if __ARM_ARCH__>=7 | ||
240 | ite eq @ Thumb2 thing, sanity check in ARM | ||
241 | #endif | ||
242 | ldreq $t3,[sp,#16*4] @ pull ctx | ||
243 | bne .Lrounds_16_xx | ||
244 | |||
245 | add $A,$A,$t2 @ h+=Maj(a,b,c) from the past | ||
246 | ldr $t0,[$t3,#0] | ||
247 | ldr $t1,[$t3,#4] | ||
248 | ldr $t2,[$t3,#8] | ||
249 | add $A,$A,$t0 | ||
250 | ldr $t0,[$t3,#12] | ||
251 | add $B,$B,$t1 | ||
252 | ldr $t1,[$t3,#16] | ||
253 | add $C,$C,$t2 | ||
254 | ldr $t2,[$t3,#20] | ||
255 | add $D,$D,$t0 | ||
256 | ldr $t0,[$t3,#24] | ||
257 | add $E,$E,$t1 | ||
258 | ldr $t1,[$t3,#28] | ||
259 | add $F,$F,$t2 | ||
260 | ldr $inp,[sp,#17*4] @ pull inp | ||
261 | ldr $t2,[sp,#18*4] @ pull inp+len | ||
262 | add $G,$G,$t0 | ||
263 | add $H,$H,$t1 | ||
264 | stmia $t3,{$A,$B,$C,$D,$E,$F,$G,$H} | ||
265 | cmp $inp,$t2 | ||
266 | sub $Ktbl,$Ktbl,#256 @ rewind Ktbl | ||
267 | bne .Loop | ||
268 | |||
269 | add sp,sp,#`16+3`*4 @ destroy frame | ||
270 | #if __ARM_ARCH__>=5 | ||
271 | ldmia sp!,{r4-r11,pc} | ||
272 | #else | ||
273 | ldmia sp!,{r4-r11,lr} | ||
274 | tst lr,#1 | ||
275 | moveq pc,lr @ be binary compatible with V4, yet | ||
276 | bx lr @ interoperable with Thumb ISA:-) | ||
277 | #endif | ||
278 | .size sha256_block_data_order,.-sha256_block_data_order | ||
279 | ___ | ||
280 | ###################################################################### | ||
281 | # NEON stuff | ||
282 | # | ||
283 | {{{ | ||
284 | my @X=map("q$_",(0..3)); | ||
285 | my ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25"); | ||
286 | my $Xfer=$t4; | ||
287 | my $j=0; | ||
288 | |||
289 | sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } | ||
290 | sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } | ||
291 | |||
292 | sub AUTOLOAD() # thunk [simplified] x86-style perlasm | ||
293 | { my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./; | ||
294 | my $arg = pop; | ||
295 | $arg = "#$arg" if ($arg*1 eq $arg); | ||
296 | $code .= "\t$opcode\t".join(',',@_,$arg)."\n"; | ||
297 | } | ||
298 | |||
299 | sub Xupdate() | ||
300 | { use integer; | ||
301 | my $body = shift; | ||
302 | my @insns = (&$body,&$body,&$body,&$body); | ||
303 | my ($a,$b,$c,$d,$e,$f,$g,$h); | ||
304 | |||
305 | &vext_8 ($T0,@X[0],@X[1],4); # X[1..4] | ||
306 | eval(shift(@insns)); | ||
307 | eval(shift(@insns)); | ||
308 | eval(shift(@insns)); | ||
309 | &vext_8 ($T1,@X[2],@X[3],4); # X[9..12] | ||
310 | eval(shift(@insns)); | ||
311 | eval(shift(@insns)); | ||
312 | eval(shift(@insns)); | ||
313 | &vshr_u32 ($T2,$T0,$sigma0[0]); | ||
314 | eval(shift(@insns)); | ||
315 | eval(shift(@insns)); | ||
316 | &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += X[9..12] | ||
317 | eval(shift(@insns)); | ||
318 | eval(shift(@insns)); | ||
319 | &vshr_u32 ($T1,$T0,$sigma0[2]); | ||
320 | eval(shift(@insns)); | ||
321 | eval(shift(@insns)); | ||
322 | &vsli_32 ($T2,$T0,32-$sigma0[0]); | ||
323 | eval(shift(@insns)); | ||
324 | eval(shift(@insns)); | ||
325 | &vshr_u32 ($T3,$T0,$sigma0[1]); | ||
326 | eval(shift(@insns)); | ||
327 | eval(shift(@insns)); | ||
328 | &veor ($T1,$T1,$T2); | ||
329 | eval(shift(@insns)); | ||
330 | eval(shift(@insns)); | ||
331 | &vsli_32 ($T3,$T0,32-$sigma0[1]); | ||
332 | eval(shift(@insns)); | ||
333 | eval(shift(@insns)); | ||
334 | &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[0]); | ||
335 | eval(shift(@insns)); | ||
336 | eval(shift(@insns)); | ||
337 | &veor ($T1,$T1,$T3); # sigma0(X[1..4]) | ||
338 | eval(shift(@insns)); | ||
339 | eval(shift(@insns)); | ||
340 | &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[0]); | ||
341 | eval(shift(@insns)); | ||
342 | eval(shift(@insns)); | ||
343 | &vshr_u32 ($T5,&Dhi(@X[3]),$sigma1[2]); | ||
344 | eval(shift(@insns)); | ||
345 | eval(shift(@insns)); | ||
346 | &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += sigma0(X[1..4]) | ||
347 | eval(shift(@insns)); | ||
348 | eval(shift(@insns)); | ||
349 | &veor ($T5,$T5,$T4); | ||
350 | eval(shift(@insns)); | ||
351 | eval(shift(@insns)); | ||
352 | &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[1]); | ||
353 | eval(shift(@insns)); | ||
354 | eval(shift(@insns)); | ||
355 | &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[1]); | ||
356 | eval(shift(@insns)); | ||
357 | eval(shift(@insns)); | ||
358 | &veor ($T5,$T5,$T4); # sigma1(X[14..15]) | ||
359 | eval(shift(@insns)); | ||
360 | eval(shift(@insns)); | ||
361 | &vadd_i32 (&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15]) | ||
362 | eval(shift(@insns)); | ||
363 | eval(shift(@insns)); | ||
364 | &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[0]); | ||
365 | eval(shift(@insns)); | ||
366 | eval(shift(@insns)); | ||
367 | &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[0]); | ||
368 | eval(shift(@insns)); | ||
369 | eval(shift(@insns)); | ||
370 | &vshr_u32 ($T5,&Dlo(@X[0]),$sigma1[2]); | ||
371 | eval(shift(@insns)); | ||
372 | eval(shift(@insns)); | ||
373 | &veor ($T5,$T5,$T4); | ||
374 | eval(shift(@insns)); | ||
375 | eval(shift(@insns)); | ||
376 | &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[1]); | ||
377 | eval(shift(@insns)); | ||
378 | eval(shift(@insns)); | ||
379 | &vld1_32 ("{$T0}","[$Ktbl,:128]!"); | ||
380 | eval(shift(@insns)); | ||
381 | eval(shift(@insns)); | ||
382 | &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[1]); | ||
383 | eval(shift(@insns)); | ||
384 | eval(shift(@insns)); | ||
385 | &veor ($T5,$T5,$T4); # sigma1(X[16..17]) | ||
386 | eval(shift(@insns)); | ||
387 | eval(shift(@insns)); | ||
388 | &vadd_i32 (&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17]) | ||
389 | eval(shift(@insns)); | ||
390 | eval(shift(@insns)); | ||
391 | &vadd_i32 ($T0,$T0,@X[0]); | ||
392 | while($#insns>=2) { eval(shift(@insns)); } | ||
393 | &vst1_32 ("{$T0}","[$Xfer,:128]!"); | ||
394 | eval(shift(@insns)); | ||
395 | eval(shift(@insns)); | ||
396 | |||
397 | push(@X,shift(@X)); # "rotate" X[] | ||
398 | } | ||
399 | |||
400 | sub Xpreload() | ||
401 | { use integer; | ||
402 | my $body = shift; | ||
403 | my @insns = (&$body,&$body,&$body,&$body); | ||
404 | my ($a,$b,$c,$d,$e,$f,$g,$h); | ||
405 | |||
406 | eval(shift(@insns)); | ||
407 | eval(shift(@insns)); | ||
408 | eval(shift(@insns)); | ||
409 | eval(shift(@insns)); | ||
410 | &vld1_32 ("{$T0}","[$Ktbl,:128]!"); | ||
411 | eval(shift(@insns)); | ||
412 | eval(shift(@insns)); | ||
413 | eval(shift(@insns)); | ||
414 | eval(shift(@insns)); | ||
415 | &vrev32_8 (@X[0],@X[0]); | ||
416 | eval(shift(@insns)); | ||
417 | eval(shift(@insns)); | ||
418 | eval(shift(@insns)); | ||
419 | eval(shift(@insns)); | ||
420 | &vadd_i32 ($T0,$T0,@X[0]); | ||
421 | foreach (@insns) { eval; } # remaining instructions | ||
422 | &vst1_32 ("{$T0}","[$Xfer,:128]!"); | ||
423 | |||
424 | push(@X,shift(@X)); # "rotate" X[] | ||
425 | } | ||
426 | |||
427 | sub body_00_15 () { | ||
428 | ( | ||
429 | '($a,$b,$c,$d,$e,$f,$g,$h)=@V;'. | ||
430 | '&add ($h,$h,$t1)', # h+=X[i]+K[i] | ||
431 | '&eor ($t1,$f,$g)', | ||
432 | '&eor ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))', | ||
433 | '&add ($a,$a,$t2)', # h+=Maj(a,b,c) from the past | ||
434 | '&and ($t1,$t1,$e)', | ||
435 | '&eor ($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))', # Sigma1(e) | ||
436 | '&eor ($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))', | ||
437 | '&eor ($t1,$t1,$g)', # Ch(e,f,g) | ||
438 | '&add ($h,$h,$t2,"ror#$Sigma1[0]")', # h+=Sigma1(e) | ||
439 | '&eor ($t2,$a,$b)', # a^b, b^c in next round | ||
440 | '&eor ($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))', # Sigma0(a) | ||
441 | '&add ($h,$h,$t1)', # h+=Ch(e,f,g) | ||
442 | '&ldr ($t1,sprintf "[sp,#%d]",4*(($j+1)&15)) if (($j&15)!=15);'. | ||
443 | '&ldr ($t1,"[$Ktbl]") if ($j==15);'. | ||
444 | '&ldr ($t1,"[sp,#64]") if ($j==31)', | ||
445 | '&and ($t3,$t3,$t2)', # (b^c)&=(a^b) | ||
446 | '&add ($d,$d,$h)', # d+=h | ||
447 | '&add ($h,$h,$t0,"ror#$Sigma0[0]");'. # h+=Sigma0(a) | ||
448 | '&eor ($t3,$t3,$b)', # Maj(a,b,c) | ||
449 | '$j++; unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);' | ||
450 | ) | ||
451 | } | ||
452 | |||
453 | $code.=<<___; | ||
454 | #if __ARM_MAX_ARCH__>=7 | ||
455 | .arch armv7-a | ||
456 | .fpu neon | ||
457 | |||
458 | .global sha256_block_data_order_neon | ||
459 | .type sha256_block_data_order_neon,%function | ||
460 | .align 4 | ||
461 | sha256_block_data_order_neon: | ||
462 | .LNEON: | ||
463 | stmdb sp!,{r4-r12,lr} | ||
464 | |||
465 | sub $H,sp,#16*4+16 | ||
466 | adrl $Ktbl,K256 | ||
467 | bic $H,$H,#15 @ align for 128-bit stores | ||
468 | mov $t2,sp | ||
469 | mov sp,$H @ alloca | ||
470 | add $len,$inp,$len,lsl#6 @ len to point at the end of inp | ||
471 | |||
472 | vld1.8 {@X[0]},[$inp]! | ||
473 | vld1.8 {@X[1]},[$inp]! | ||
474 | vld1.8 {@X[2]},[$inp]! | ||
475 | vld1.8 {@X[3]},[$inp]! | ||
476 | vld1.32 {$T0},[$Ktbl,:128]! | ||
477 | vld1.32 {$T1},[$Ktbl,:128]! | ||
478 | vld1.32 {$T2},[$Ktbl,:128]! | ||
479 | vld1.32 {$T3},[$Ktbl,:128]! | ||
480 | vrev32.8 @X[0],@X[0] @ yes, even on | ||
481 | str $ctx,[sp,#64] | ||
482 | vrev32.8 @X[1],@X[1] @ big-endian | ||
483 | str $inp,[sp,#68] | ||
484 | mov $Xfer,sp | ||
485 | vrev32.8 @X[2],@X[2] | ||
486 | str $len,[sp,#72] | ||
487 | vrev32.8 @X[3],@X[3] | ||
488 | str $t2,[sp,#76] @ save original sp | ||
489 | vadd.i32 $T0,$T0,@X[0] | ||
490 | vadd.i32 $T1,$T1,@X[1] | ||
491 | vst1.32 {$T0},[$Xfer,:128]! | ||
492 | vadd.i32 $T2,$T2,@X[2] | ||
493 | vst1.32 {$T1},[$Xfer,:128]! | ||
494 | vadd.i32 $T3,$T3,@X[3] | ||
495 | vst1.32 {$T2},[$Xfer,:128]! | ||
496 | vst1.32 {$T3},[$Xfer,:128]! | ||
497 | |||
498 | ldmia $ctx,{$A-$H} | ||
499 | sub $Xfer,$Xfer,#64 | ||
500 | ldr $t1,[sp,#0] | ||
501 | eor $t2,$t2,$t2 | ||
502 | eor $t3,$B,$C | ||
503 | b .L_00_48 | ||
504 | |||
505 | .align 4 | ||
506 | .L_00_48: | ||
507 | ___ | ||
508 | &Xupdate(\&body_00_15); | ||
509 | &Xupdate(\&body_00_15); | ||
510 | &Xupdate(\&body_00_15); | ||
511 | &Xupdate(\&body_00_15); | ||
512 | $code.=<<___; | ||
513 | teq $t1,#0 @ check for K256 terminator | ||
514 | ldr $t1,[sp,#0] | ||
515 | sub $Xfer,$Xfer,#64 | ||
516 | bne .L_00_48 | ||
517 | |||
518 | ldr $inp,[sp,#68] | ||
519 | ldr $t0,[sp,#72] | ||
520 | sub $Ktbl,$Ktbl,#256 @ rewind $Ktbl | ||
521 | teq $inp,$t0 | ||
522 | it eq | ||
523 | subeq $inp,$inp,#64 @ avoid SEGV | ||
524 | vld1.8 {@X[0]},[$inp]! @ load next input block | ||
525 | vld1.8 {@X[1]},[$inp]! | ||
526 | vld1.8 {@X[2]},[$inp]! | ||
527 | vld1.8 {@X[3]},[$inp]! | ||
528 | it ne | ||
529 | strne $inp,[sp,#68] | ||
530 | mov $Xfer,sp | ||
531 | ___ | ||
532 | &Xpreload(\&body_00_15); | ||
533 | &Xpreload(\&body_00_15); | ||
534 | &Xpreload(\&body_00_15); | ||
535 | &Xpreload(\&body_00_15); | ||
536 | $code.=<<___; | ||
537 | ldr $t0,[$t1,#0] | ||
538 | add $A,$A,$t2 @ h+=Maj(a,b,c) from the past | ||
539 | ldr $t2,[$t1,#4] | ||
540 | ldr $t3,[$t1,#8] | ||
541 | ldr $t4,[$t1,#12] | ||
542 | add $A,$A,$t0 @ accumulate | ||
543 | ldr $t0,[$t1,#16] | ||
544 | add $B,$B,$t2 | ||
545 | ldr $t2,[$t1,#20] | ||
546 | add $C,$C,$t3 | ||
547 | ldr $t3,[$t1,#24] | ||
548 | add $D,$D,$t4 | ||
549 | ldr $t4,[$t1,#28] | ||
550 | add $E,$E,$t0 | ||
551 | str $A,[$t1],#4 | ||
552 | add $F,$F,$t2 | ||
553 | str $B,[$t1],#4 | ||
554 | add $G,$G,$t3 | ||
555 | str $C,[$t1],#4 | ||
556 | add $H,$H,$t4 | ||
557 | str $D,[$t1],#4 | ||
558 | stmia $t1,{$E-$H} | ||
559 | |||
560 | ittte ne | ||
561 | movne $Xfer,sp | ||
562 | ldrne $t1,[sp,#0] | ||
563 | eorne $t2,$t2,$t2 | ||
564 | ldreq sp,[sp,#76] @ restore original sp | ||
565 | itt ne | ||
566 | eorne $t3,$B,$C | ||
567 | bne .L_00_48 | ||
568 | |||
569 | ldmia sp!,{r4-r12,pc} | ||
570 | .size sha256_block_data_order_neon,.-sha256_block_data_order_neon | ||
571 | #endif | ||
572 | ___ | ||
573 | }}} | ||
574 | ###################################################################### | ||
575 | # ARMv8 stuff | ||
576 | # | ||
577 | {{{ | ||
578 | my ($ABCD,$EFGH,$abcd)=map("q$_",(0..2)); | ||
579 | my @MSG=map("q$_",(8..11)); | ||
580 | my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15)); | ||
581 | my $Ktbl="r3"; | ||
582 | |||
583 | $code.=<<___; | ||
584 | #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) | ||
585 | |||
586 | # ifdef __thumb2__ | ||
587 | # define INST(a,b,c,d) .byte c,d|0xc,a,b | ||
588 | # else | ||
589 | # define INST(a,b,c,d) .byte a,b,c,d | ||
590 | # endif | ||
591 | |||
592 | .type sha256_block_data_order_armv8,%function | ||
593 | .align 5 | ||
594 | sha256_block_data_order_armv8: | ||
595 | .LARMv8: | ||
596 | vld1.32 {$ABCD,$EFGH},[$ctx] | ||
597 | # ifdef __thumb2__ | ||
598 | adr $Ktbl,.LARMv8 | ||
599 | sub $Ktbl,$Ktbl,#.LARMv8-K256 | ||
600 | # else | ||
601 | adrl $Ktbl,K256 | ||
602 | # endif | ||
603 | add $len,$inp,$len,lsl#6 @ len to point at the end of inp | ||
604 | |||
605 | .Loop_v8: | ||
606 | vld1.8 {@MSG[0]-@MSG[1]},[$inp]! | ||
607 | vld1.8 {@MSG[2]-@MSG[3]},[$inp]! | ||
608 | vld1.32 {$W0},[$Ktbl]! | ||
609 | vrev32.8 @MSG[0],@MSG[0] | ||
610 | vrev32.8 @MSG[1],@MSG[1] | ||
611 | vrev32.8 @MSG[2],@MSG[2] | ||
612 | vrev32.8 @MSG[3],@MSG[3] | ||
613 | vmov $ABCD_SAVE,$ABCD @ offload | ||
614 | vmov $EFGH_SAVE,$EFGH | ||
615 | teq $inp,$len | ||
616 | ___ | ||
617 | for($i=0;$i<12;$i++) { | ||
618 | $code.=<<___; | ||
619 | vld1.32 {$W1},[$Ktbl]! | ||
620 | vadd.i32 $W0,$W0,@MSG[0] | ||
621 | sha256su0 @MSG[0],@MSG[1] | ||
622 | vmov $abcd,$ABCD | ||
623 | sha256h $ABCD,$EFGH,$W0 | ||
624 | sha256h2 $EFGH,$abcd,$W0 | ||
625 | sha256su1 @MSG[0],@MSG[2],@MSG[3] | ||
626 | ___ | ||
627 | ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); | ||
628 | } | ||
629 | $code.=<<___; | ||
630 | vld1.32 {$W1},[$Ktbl]! | ||
631 | vadd.i32 $W0,$W0,@MSG[0] | ||
632 | vmov $abcd,$ABCD | ||
633 | sha256h $ABCD,$EFGH,$W0 | ||
634 | sha256h2 $EFGH,$abcd,$W0 | ||
635 | |||
636 | vld1.32 {$W0},[$Ktbl]! | ||
637 | vadd.i32 $W1,$W1,@MSG[1] | ||
638 | vmov $abcd,$ABCD | ||
639 | sha256h $ABCD,$EFGH,$W1 | ||
640 | sha256h2 $EFGH,$abcd,$W1 | ||
641 | |||
642 | vld1.32 {$W1},[$Ktbl] | ||
643 | vadd.i32 $W0,$W0,@MSG[2] | ||
644 | sub $Ktbl,$Ktbl,#256-16 @ rewind | ||
645 | vmov $abcd,$ABCD | ||
646 | sha256h $ABCD,$EFGH,$W0 | ||
647 | sha256h2 $EFGH,$abcd,$W0 | ||
648 | |||
649 | vadd.i32 $W1,$W1,@MSG[3] | ||
650 | vmov $abcd,$ABCD | ||
651 | sha256h $ABCD,$EFGH,$W1 | ||
652 | sha256h2 $EFGH,$abcd,$W1 | ||
653 | |||
654 | vadd.i32 $ABCD,$ABCD,$ABCD_SAVE | ||
655 | vadd.i32 $EFGH,$EFGH,$EFGH_SAVE | ||
656 | it ne | ||
657 | bne .Loop_v8 | ||
658 | |||
659 | vst1.32 {$ABCD,$EFGH},[$ctx] | ||
660 | |||
661 | ret @ bx lr | ||
662 | .size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8 | ||
663 | #endif | ||
664 | ___ | ||
665 | }}} | ||
666 | $code.=<<___; | ||
667 | .asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>" | ||
668 | .align 2 | ||
669 | #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) | ||
670 | .comm OPENSSL_armcap_P,4,4 | ||
671 | #endif | ||
672 | ___ | ||
673 | |||
674 | open SELF,$0; | ||
675 | while(<SELF>) { | ||
676 | next if (/^#!/); | ||
677 | last if (!s/^#/@/ and !/^$/); | ||
678 | print; | ||
679 | } | ||
680 | close SELF; | ||
681 | |||
682 | { my %opcode = ( | ||
683 | "sha256h" => 0xf3000c40, "sha256h2" => 0xf3100c40, | ||
684 | "sha256su0" => 0xf3ba03c0, "sha256su1" => 0xf3200c40 ); | ||
685 | |||
686 | sub unsha256 { | ||
687 | my ($mnemonic,$arg)=@_; | ||
688 | |||
689 | if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) { | ||
690 | my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19) | ||
691 | |(($2&7)<<17)|(($2&8)<<4) | ||
692 | |(($3&7)<<1) |(($3&8)<<2); | ||
693 | # since ARMv7 instructions are always encoded little-endian. | ||
694 | # correct solution is to use .inst directive, but older | ||
695 | # assemblers don't implement it:-( | ||
696 | sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s", | ||
697 | $word&0xff,($word>>8)&0xff, | ||
698 | ($word>>16)&0xff,($word>>24)&0xff, | ||
699 | $mnemonic,$arg; | ||
700 | } | ||
701 | } | ||
702 | } | ||
703 | |||
704 | foreach (split($/,$code)) { | ||
705 | |||
706 | s/\`([^\`]*)\`/eval $1/geo; | ||
707 | |||
708 | s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo; | ||
709 | |||
710 | s/\bret\b/bx lr/go or | ||
711 | s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4 | ||
712 | |||
713 | print $_,"\n"; | ||
714 | } | ||
715 | |||
716 | close STDOUT; # enforce flush | ||
diff --git a/arch/arm/crypto/sha256-core.S_shipped b/arch/arm/crypto/sha256-core.S_shipped new file mode 100644 index 000000000000..555a1a8eec90 --- /dev/null +++ b/arch/arm/crypto/sha256-core.S_shipped | |||
@@ -0,0 +1,2808 @@ | |||
1 | |||
2 | @ ==================================================================== | ||
3 | @ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL | ||
4 | @ project. The module is, however, dual licensed under OpenSSL and | ||
5 | @ CRYPTOGAMS licenses depending on where you obtain it. For further | ||
6 | @ details see http://www.openssl.org/~appro/cryptogams/. | ||
7 | @ | ||
8 | @ Permission to use under GPL terms is granted. | ||
9 | @ ==================================================================== | ||
10 | |||
11 | @ SHA256 block procedure for ARMv4. May 2007. | ||
12 | |||
13 | @ Performance is ~2x better than gcc 3.4 generated code and in "abso- | ||
14 | @ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per | ||
15 | @ byte [on single-issue Xscale PXA250 core]. | ||
16 | |||
17 | @ July 2010. | ||
18 | @ | ||
19 | @ Rescheduling for dual-issue pipeline resulted in 22% improvement on | ||
20 | @ Cortex A8 core and ~20 cycles per processed byte. | ||
21 | |||
22 | @ February 2011. | ||
23 | @ | ||
24 | @ Profiler-assisted and platform-specific optimization resulted in 16% | ||
25 | @ improvement on Cortex A8 core and ~15.4 cycles per processed byte. | ||
26 | |||
27 | @ September 2013. | ||
28 | @ | ||
29 | @ Add NEON implementation. On Cortex A8 it was measured to process one | ||
30 | @ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon | ||
31 | @ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only | ||
32 | @ code (meaning that latter performs sub-optimally, nothing was done | ||
33 | @ about it). | ||
34 | |||
35 | @ May 2014. | ||
36 | @ | ||
37 | @ Add ARMv8 code path performing at 2.0 cpb on Apple A7. | ||
38 | |||
39 | #ifndef __KERNEL__ | ||
40 | # include "arm_arch.h" | ||
41 | #else | ||
42 | # define __ARM_ARCH__ __LINUX_ARM_ARCH__ | ||
43 | # define __ARM_MAX_ARCH__ 7 | ||
44 | #endif | ||
45 | |||
46 | .text | ||
47 | #if __ARM_ARCH__<7 | ||
48 | .code 32 | ||
49 | #else | ||
50 | .syntax unified | ||
51 | # ifdef __thumb2__ | ||
52 | # define adrl adr | ||
53 | .thumb | ||
54 | # else | ||
55 | .code 32 | ||
56 | # endif | ||
57 | #endif | ||
58 | |||
59 | .type K256,%object | ||
60 | .align 5 | ||
61 | K256: | ||
62 | .word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 | ||
63 | .word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 | ||
64 | .word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 | ||
65 | .word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 | ||
66 | .word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc | ||
67 | .word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da | ||
68 | .word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 | ||
69 | .word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 | ||
70 | .word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 | ||
71 | .word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 | ||
72 | .word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 | ||
73 | .word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 | ||
74 | .word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 | ||
75 | .word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 | ||
76 | .word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 | ||
77 | .word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 | ||
78 | .size K256,.-K256 | ||
79 | .word 0 @ terminator | ||
80 | #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) | ||
81 | .LOPENSSL_armcap: | ||
82 | .word OPENSSL_armcap_P-sha256_block_data_order | ||
83 | #endif | ||
84 | .align 5 | ||
85 | |||
86 | .global sha256_block_data_order | ||
87 | .type sha256_block_data_order,%function | ||
88 | sha256_block_data_order: | ||
89 | #if __ARM_ARCH__<7 | ||
90 | sub r3,pc,#8 @ sha256_block_data_order | ||
91 | #else | ||
92 | adr r3,sha256_block_data_order | ||
93 | #endif | ||
94 | #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) | ||
95 | ldr r12,.LOPENSSL_armcap | ||
96 | ldr r12,[r3,r12] @ OPENSSL_armcap_P | ||
97 | tst r12,#ARMV8_SHA256 | ||
98 | bne .LARMv8 | ||
99 | tst r12,#ARMV7_NEON | ||
100 | bne .LNEON | ||
101 | #endif | ||
102 | add r2,r1,r2,lsl#6 @ len to point at the end of inp | ||
103 | stmdb sp!,{r0,r1,r2,r4-r11,lr} | ||
104 | ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11} | ||
105 | sub r14,r3,#256+32 @ K256 | ||
106 | sub sp,sp,#16*4 @ alloca(X[16]) | ||
107 | .Loop: | ||
108 | # if __ARM_ARCH__>=7 | ||
109 | ldr r2,[r1],#4 | ||
110 | # else | ||
111 | ldrb r2,[r1,#3] | ||
112 | # endif | ||
113 | eor r3,r5,r6 @ magic | ||
114 | eor r12,r12,r12 | ||
115 | #if __ARM_ARCH__>=7 | ||
116 | @ ldr r2,[r1],#4 @ 0 | ||
117 | # if 0==15 | ||
118 | str r1,[sp,#17*4] @ make room for r1 | ||
119 | # endif | ||
120 | eor r0,r8,r8,ror#5 | ||
121 | add r4,r4,r12 @ h+=Maj(a,b,c) from the past | ||
122 | eor r0,r0,r8,ror#19 @ Sigma1(e) | ||
123 | # ifndef __ARMEB__ | ||
124 | rev r2,r2 | ||
125 | # endif | ||
126 | #else | ||
127 | @ ldrb r2,[r1,#3] @ 0 | ||
128 | add r4,r4,r12 @ h+=Maj(a,b,c) from the past | ||
129 | ldrb r12,[r1,#2] | ||
130 | ldrb r0,[r1,#1] | ||
131 | orr r2,r2,r12,lsl#8 | ||
132 | ldrb r12,[r1],#4 | ||
133 | orr r2,r2,r0,lsl#16 | ||
134 | # if 0==15 | ||
135 | str r1,[sp,#17*4] @ make room for r1 | ||
136 | # endif | ||
137 | eor r0,r8,r8,ror#5 | ||
138 | orr r2,r2,r12,lsl#24 | ||
139 | eor r0,r0,r8,ror#19 @ Sigma1(e) | ||
140 | #endif | ||
141 | ldr r12,[r14],#4 @ *K256++ | ||
142 | add r11,r11,r2 @ h+=X[i] | ||
143 | str r2,[sp,#0*4] | ||
144 | eor r2,r9,r10 | ||
145 | add r11,r11,r0,ror#6 @ h+=Sigma1(e) | ||
146 | and r2,r2,r8 | ||
147 | add r11,r11,r12 @ h+=K256[i] | ||
148 | eor r2,r2,r10 @ Ch(e,f,g) | ||
149 | eor r0,r4,r4,ror#11 | ||
150 | add r11,r11,r2 @ h+=Ch(e,f,g) | ||
151 | #if 0==31 | ||
152 | and r12,r12,#0xff | ||
153 | cmp r12,#0xf2 @ done? | ||
154 | #endif | ||
155 | #if 0<15 | ||
156 | # if __ARM_ARCH__>=7 | ||
157 | ldr r2,[r1],#4 @ prefetch | ||
158 | # else | ||
159 | ldrb r2,[r1,#3] | ||
160 | # endif | ||
161 | eor r12,r4,r5 @ a^b, b^c in next round | ||
162 | #else | ||
163 | ldr r2,[sp,#2*4] @ from future BODY_16_xx | ||
164 | eor r12,r4,r5 @ a^b, b^c in next round | ||
165 | ldr r1,[sp,#15*4] @ from future BODY_16_xx | ||
166 | #endif | ||
167 | eor r0,r0,r4,ror#20 @ Sigma0(a) | ||
168 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
169 | add r7,r7,r11 @ d+=h | ||
170 | eor r3,r3,r5 @ Maj(a,b,c) | ||
171 | add r11,r11,r0,ror#2 @ h+=Sigma0(a) | ||
172 | @ add r11,r11,r3 @ h+=Maj(a,b,c) | ||
173 | #if __ARM_ARCH__>=7 | ||
174 | @ ldr r2,[r1],#4 @ 1 | ||
175 | # if 1==15 | ||
176 | str r1,[sp,#17*4] @ make room for r1 | ||
177 | # endif | ||
178 | eor r0,r7,r7,ror#5 | ||
179 | add r11,r11,r3 @ h+=Maj(a,b,c) from the past | ||
180 | eor r0,r0,r7,ror#19 @ Sigma1(e) | ||
181 | # ifndef __ARMEB__ | ||
182 | rev r2,r2 | ||
183 | # endif | ||
184 | #else | ||
185 | @ ldrb r2,[r1,#3] @ 1 | ||
186 | add r11,r11,r3 @ h+=Maj(a,b,c) from the past | ||
187 | ldrb r3,[r1,#2] | ||
188 | ldrb r0,[r1,#1] | ||
189 | orr r2,r2,r3,lsl#8 | ||
190 | ldrb r3,[r1],#4 | ||
191 | orr r2,r2,r0,lsl#16 | ||
192 | # if 1==15 | ||
193 | str r1,[sp,#17*4] @ make room for r1 | ||
194 | # endif | ||
195 | eor r0,r7,r7,ror#5 | ||
196 | orr r2,r2,r3,lsl#24 | ||
197 | eor r0,r0,r7,ror#19 @ Sigma1(e) | ||
198 | #endif | ||
199 | ldr r3,[r14],#4 @ *K256++ | ||
200 | add r10,r10,r2 @ h+=X[i] | ||
201 | str r2,[sp,#1*4] | ||
202 | eor r2,r8,r9 | ||
203 | add r10,r10,r0,ror#6 @ h+=Sigma1(e) | ||
204 | and r2,r2,r7 | ||
205 | add r10,r10,r3 @ h+=K256[i] | ||
206 | eor r2,r2,r9 @ Ch(e,f,g) | ||
207 | eor r0,r11,r11,ror#11 | ||
208 | add r10,r10,r2 @ h+=Ch(e,f,g) | ||
209 | #if 1==31 | ||
210 | and r3,r3,#0xff | ||
211 | cmp r3,#0xf2 @ done? | ||
212 | #endif | ||
213 | #if 1<15 | ||
214 | # if __ARM_ARCH__>=7 | ||
215 | ldr r2,[r1],#4 @ prefetch | ||
216 | # else | ||
217 | ldrb r2,[r1,#3] | ||
218 | # endif | ||
219 | eor r3,r11,r4 @ a^b, b^c in next round | ||
220 | #else | ||
221 | ldr r2,[sp,#3*4] @ from future BODY_16_xx | ||
222 | eor r3,r11,r4 @ a^b, b^c in next round | ||
223 | ldr r1,[sp,#0*4] @ from future BODY_16_xx | ||
224 | #endif | ||
225 | eor r0,r0,r11,ror#20 @ Sigma0(a) | ||
226 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
227 | add r6,r6,r10 @ d+=h | ||
228 | eor r12,r12,r4 @ Maj(a,b,c) | ||
229 | add r10,r10,r0,ror#2 @ h+=Sigma0(a) | ||
230 | @ add r10,r10,r12 @ h+=Maj(a,b,c) | ||
231 | #if __ARM_ARCH__>=7 | ||
232 | @ ldr r2,[r1],#4 @ 2 | ||
233 | # if 2==15 | ||
234 | str r1,[sp,#17*4] @ make room for r1 | ||
235 | # endif | ||
236 | eor r0,r6,r6,ror#5 | ||
237 | add r10,r10,r12 @ h+=Maj(a,b,c) from the past | ||
238 | eor r0,r0,r6,ror#19 @ Sigma1(e) | ||
239 | # ifndef __ARMEB__ | ||
240 | rev r2,r2 | ||
241 | # endif | ||
242 | #else | ||
243 | @ ldrb r2,[r1,#3] @ 2 | ||
244 | add r10,r10,r12 @ h+=Maj(a,b,c) from the past | ||
245 | ldrb r12,[r1,#2] | ||
246 | ldrb r0,[r1,#1] | ||
247 | orr r2,r2,r12,lsl#8 | ||
248 | ldrb r12,[r1],#4 | ||
249 | orr r2,r2,r0,lsl#16 | ||
250 | # if 2==15 | ||
251 | str r1,[sp,#17*4] @ make room for r1 | ||
252 | # endif | ||
253 | eor r0,r6,r6,ror#5 | ||
254 | orr r2,r2,r12,lsl#24 | ||
255 | eor r0,r0,r6,ror#19 @ Sigma1(e) | ||
256 | #endif | ||
257 | ldr r12,[r14],#4 @ *K256++ | ||
258 | add r9,r9,r2 @ h+=X[i] | ||
259 | str r2,[sp,#2*4] | ||
260 | eor r2,r7,r8 | ||
261 | add r9,r9,r0,ror#6 @ h+=Sigma1(e) | ||
262 | and r2,r2,r6 | ||
263 | add r9,r9,r12 @ h+=K256[i] | ||
264 | eor r2,r2,r8 @ Ch(e,f,g) | ||
265 | eor r0,r10,r10,ror#11 | ||
266 | add r9,r9,r2 @ h+=Ch(e,f,g) | ||
267 | #if 2==31 | ||
268 | and r12,r12,#0xff | ||
269 | cmp r12,#0xf2 @ done? | ||
270 | #endif | ||
271 | #if 2<15 | ||
272 | # if __ARM_ARCH__>=7 | ||
273 | ldr r2,[r1],#4 @ prefetch | ||
274 | # else | ||
275 | ldrb r2,[r1,#3] | ||
276 | # endif | ||
277 | eor r12,r10,r11 @ a^b, b^c in next round | ||
278 | #else | ||
279 | ldr r2,[sp,#4*4] @ from future BODY_16_xx | ||
280 | eor r12,r10,r11 @ a^b, b^c in next round | ||
281 | ldr r1,[sp,#1*4] @ from future BODY_16_xx | ||
282 | #endif | ||
283 | eor r0,r0,r10,ror#20 @ Sigma0(a) | ||
284 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
285 | add r5,r5,r9 @ d+=h | ||
286 | eor r3,r3,r11 @ Maj(a,b,c) | ||
287 | add r9,r9,r0,ror#2 @ h+=Sigma0(a) | ||
288 | @ add r9,r9,r3 @ h+=Maj(a,b,c) | ||
289 | #if __ARM_ARCH__>=7 | ||
290 | @ ldr r2,[r1],#4 @ 3 | ||
291 | # if 3==15 | ||
292 | str r1,[sp,#17*4] @ make room for r1 | ||
293 | # endif | ||
294 | eor r0,r5,r5,ror#5 | ||
295 | add r9,r9,r3 @ h+=Maj(a,b,c) from the past | ||
296 | eor r0,r0,r5,ror#19 @ Sigma1(e) | ||
297 | # ifndef __ARMEB__ | ||
298 | rev r2,r2 | ||
299 | # endif | ||
300 | #else | ||
301 | @ ldrb r2,[r1,#3] @ 3 | ||
302 | add r9,r9,r3 @ h+=Maj(a,b,c) from the past | ||
303 | ldrb r3,[r1,#2] | ||
304 | ldrb r0,[r1,#1] | ||
305 | orr r2,r2,r3,lsl#8 | ||
306 | ldrb r3,[r1],#4 | ||
307 | orr r2,r2,r0,lsl#16 | ||
308 | # if 3==15 | ||
309 | str r1,[sp,#17*4] @ make room for r1 | ||
310 | # endif | ||
311 | eor r0,r5,r5,ror#5 | ||
312 | orr r2,r2,r3,lsl#24 | ||
313 | eor r0,r0,r5,ror#19 @ Sigma1(e) | ||
314 | #endif | ||
315 | ldr r3,[r14],#4 @ *K256++ | ||
316 | add r8,r8,r2 @ h+=X[i] | ||
317 | str r2,[sp,#3*4] | ||
318 | eor r2,r6,r7 | ||
319 | add r8,r8,r0,ror#6 @ h+=Sigma1(e) | ||
320 | and r2,r2,r5 | ||
321 | add r8,r8,r3 @ h+=K256[i] | ||
322 | eor r2,r2,r7 @ Ch(e,f,g) | ||
323 | eor r0,r9,r9,ror#11 | ||
324 | add r8,r8,r2 @ h+=Ch(e,f,g) | ||
325 | #if 3==31 | ||
326 | and r3,r3,#0xff | ||
327 | cmp r3,#0xf2 @ done? | ||
328 | #endif | ||
329 | #if 3<15 | ||
330 | # if __ARM_ARCH__>=7 | ||
331 | ldr r2,[r1],#4 @ prefetch | ||
332 | # else | ||
333 | ldrb r2,[r1,#3] | ||
334 | # endif | ||
335 | eor r3,r9,r10 @ a^b, b^c in next round | ||
336 | #else | ||
337 | ldr r2,[sp,#5*4] @ from future BODY_16_xx | ||
338 | eor r3,r9,r10 @ a^b, b^c in next round | ||
339 | ldr r1,[sp,#2*4] @ from future BODY_16_xx | ||
340 | #endif | ||
341 | eor r0,r0,r9,ror#20 @ Sigma0(a) | ||
342 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
343 | add r4,r4,r8 @ d+=h | ||
344 | eor r12,r12,r10 @ Maj(a,b,c) | ||
345 | add r8,r8,r0,ror#2 @ h+=Sigma0(a) | ||
346 | @ add r8,r8,r12 @ h+=Maj(a,b,c) | ||
347 | #if __ARM_ARCH__>=7 | ||
348 | @ ldr r2,[r1],#4 @ 4 | ||
349 | # if 4==15 | ||
350 | str r1,[sp,#17*4] @ make room for r1 | ||
351 | # endif | ||
352 | eor r0,r4,r4,ror#5 | ||
353 | add r8,r8,r12 @ h+=Maj(a,b,c) from the past | ||
354 | eor r0,r0,r4,ror#19 @ Sigma1(e) | ||
355 | # ifndef __ARMEB__ | ||
356 | rev r2,r2 | ||
357 | # endif | ||
358 | #else | ||
359 | @ ldrb r2,[r1,#3] @ 4 | ||
360 | add r8,r8,r12 @ h+=Maj(a,b,c) from the past | ||
361 | ldrb r12,[r1,#2] | ||
362 | ldrb r0,[r1,#1] | ||
363 | orr r2,r2,r12,lsl#8 | ||
364 | ldrb r12,[r1],#4 | ||
365 | orr r2,r2,r0,lsl#16 | ||
366 | # if 4==15 | ||
367 | str r1,[sp,#17*4] @ make room for r1 | ||
368 | # endif | ||
369 | eor r0,r4,r4,ror#5 | ||
370 | orr r2,r2,r12,lsl#24 | ||
371 | eor r0,r0,r4,ror#19 @ Sigma1(e) | ||
372 | #endif | ||
373 | ldr r12,[r14],#4 @ *K256++ | ||
374 | add r7,r7,r2 @ h+=X[i] | ||
375 | str r2,[sp,#4*4] | ||
376 | eor r2,r5,r6 | ||
377 | add r7,r7,r0,ror#6 @ h+=Sigma1(e) | ||
378 | and r2,r2,r4 | ||
379 | add r7,r7,r12 @ h+=K256[i] | ||
380 | eor r2,r2,r6 @ Ch(e,f,g) | ||
381 | eor r0,r8,r8,ror#11 | ||
382 | add r7,r7,r2 @ h+=Ch(e,f,g) | ||
383 | #if 4==31 | ||
384 | and r12,r12,#0xff | ||
385 | cmp r12,#0xf2 @ done? | ||
386 | #endif | ||
387 | #if 4<15 | ||
388 | # if __ARM_ARCH__>=7 | ||
389 | ldr r2,[r1],#4 @ prefetch | ||
390 | # else | ||
391 | ldrb r2,[r1,#3] | ||
392 | # endif | ||
393 | eor r12,r8,r9 @ a^b, b^c in next round | ||
394 | #else | ||
395 | ldr r2,[sp,#6*4] @ from future BODY_16_xx | ||
396 | eor r12,r8,r9 @ a^b, b^c in next round | ||
397 | ldr r1,[sp,#3*4] @ from future BODY_16_xx | ||
398 | #endif | ||
399 | eor r0,r0,r8,ror#20 @ Sigma0(a) | ||
400 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
401 | add r11,r11,r7 @ d+=h | ||
402 | eor r3,r3,r9 @ Maj(a,b,c) | ||
403 | add r7,r7,r0,ror#2 @ h+=Sigma0(a) | ||
404 | @ add r7,r7,r3 @ h+=Maj(a,b,c) | ||
405 | #if __ARM_ARCH__>=7 | ||
406 | @ ldr r2,[r1],#4 @ 5 | ||
407 | # if 5==15 | ||
408 | str r1,[sp,#17*4] @ make room for r1 | ||
409 | # endif | ||
410 | eor r0,r11,r11,ror#5 | ||
411 | add r7,r7,r3 @ h+=Maj(a,b,c) from the past | ||
412 | eor r0,r0,r11,ror#19 @ Sigma1(e) | ||
413 | # ifndef __ARMEB__ | ||
414 | rev r2,r2 | ||
415 | # endif | ||
416 | #else | ||
417 | @ ldrb r2,[r1,#3] @ 5 | ||
418 | add r7,r7,r3 @ h+=Maj(a,b,c) from the past | ||
419 | ldrb r3,[r1,#2] | ||
420 | ldrb r0,[r1,#1] | ||
421 | orr r2,r2,r3,lsl#8 | ||
422 | ldrb r3,[r1],#4 | ||
423 | orr r2,r2,r0,lsl#16 | ||
424 | # if 5==15 | ||
425 | str r1,[sp,#17*4] @ make room for r1 | ||
426 | # endif | ||
427 | eor r0,r11,r11,ror#5 | ||
428 | orr r2,r2,r3,lsl#24 | ||
429 | eor r0,r0,r11,ror#19 @ Sigma1(e) | ||
430 | #endif | ||
431 | ldr r3,[r14],#4 @ *K256++ | ||
432 | add r6,r6,r2 @ h+=X[i] | ||
433 | str r2,[sp,#5*4] | ||
434 | eor r2,r4,r5 | ||
435 | add r6,r6,r0,ror#6 @ h+=Sigma1(e) | ||
436 | and r2,r2,r11 | ||
437 | add r6,r6,r3 @ h+=K256[i] | ||
438 | eor r2,r2,r5 @ Ch(e,f,g) | ||
439 | eor r0,r7,r7,ror#11 | ||
440 | add r6,r6,r2 @ h+=Ch(e,f,g) | ||
441 | #if 5==31 | ||
442 | and r3,r3,#0xff | ||
443 | cmp r3,#0xf2 @ done? | ||
444 | #endif | ||
445 | #if 5<15 | ||
446 | # if __ARM_ARCH__>=7 | ||
447 | ldr r2,[r1],#4 @ prefetch | ||
448 | # else | ||
449 | ldrb r2,[r1,#3] | ||
450 | # endif | ||
451 | eor r3,r7,r8 @ a^b, b^c in next round | ||
452 | #else | ||
453 | ldr r2,[sp,#7*4] @ from future BODY_16_xx | ||
454 | eor r3,r7,r8 @ a^b, b^c in next round | ||
455 | ldr r1,[sp,#4*4] @ from future BODY_16_xx | ||
456 | #endif | ||
457 | eor r0,r0,r7,ror#20 @ Sigma0(a) | ||
458 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
459 | add r10,r10,r6 @ d+=h | ||
460 | eor r12,r12,r8 @ Maj(a,b,c) | ||
461 | add r6,r6,r0,ror#2 @ h+=Sigma0(a) | ||
462 | @ add r6,r6,r12 @ h+=Maj(a,b,c) | ||
463 | #if __ARM_ARCH__>=7 | ||
464 | @ ldr r2,[r1],#4 @ 6 | ||
465 | # if 6==15 | ||
466 | str r1,[sp,#17*4] @ make room for r1 | ||
467 | # endif | ||
468 | eor r0,r10,r10,ror#5 | ||
469 | add r6,r6,r12 @ h+=Maj(a,b,c) from the past | ||
470 | eor r0,r0,r10,ror#19 @ Sigma1(e) | ||
471 | # ifndef __ARMEB__ | ||
472 | rev r2,r2 | ||
473 | # endif | ||
474 | #else | ||
475 | @ ldrb r2,[r1,#3] @ 6 | ||
476 | add r6,r6,r12 @ h+=Maj(a,b,c) from the past | ||
477 | ldrb r12,[r1,#2] | ||
478 | ldrb r0,[r1,#1] | ||
479 | orr r2,r2,r12,lsl#8 | ||
480 | ldrb r12,[r1],#4 | ||
481 | orr r2,r2,r0,lsl#16 | ||
482 | # if 6==15 | ||
483 | str r1,[sp,#17*4] @ make room for r1 | ||
484 | # endif | ||
485 | eor r0,r10,r10,ror#5 | ||
486 | orr r2,r2,r12,lsl#24 | ||
487 | eor r0,r0,r10,ror#19 @ Sigma1(e) | ||
488 | #endif | ||
489 | ldr r12,[r14],#4 @ *K256++ | ||
490 | add r5,r5,r2 @ h+=X[i] | ||
491 | str r2,[sp,#6*4] | ||
492 | eor r2,r11,r4 | ||
493 | add r5,r5,r0,ror#6 @ h+=Sigma1(e) | ||
494 | and r2,r2,r10 | ||
495 | add r5,r5,r12 @ h+=K256[i] | ||
496 | eor r2,r2,r4 @ Ch(e,f,g) | ||
497 | eor r0,r6,r6,ror#11 | ||
498 | add r5,r5,r2 @ h+=Ch(e,f,g) | ||
499 | #if 6==31 | ||
500 | and r12,r12,#0xff | ||
501 | cmp r12,#0xf2 @ done? | ||
502 | #endif | ||
503 | #if 6<15 | ||
504 | # if __ARM_ARCH__>=7 | ||
505 | ldr r2,[r1],#4 @ prefetch | ||
506 | # else | ||
507 | ldrb r2,[r1,#3] | ||
508 | # endif | ||
509 | eor r12,r6,r7 @ a^b, b^c in next round | ||
510 | #else | ||
511 | ldr r2,[sp,#8*4] @ from future BODY_16_xx | ||
512 | eor r12,r6,r7 @ a^b, b^c in next round | ||
513 | ldr r1,[sp,#5*4] @ from future BODY_16_xx | ||
514 | #endif | ||
515 | eor r0,r0,r6,ror#20 @ Sigma0(a) | ||
516 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
517 | add r9,r9,r5 @ d+=h | ||
518 | eor r3,r3,r7 @ Maj(a,b,c) | ||
519 | add r5,r5,r0,ror#2 @ h+=Sigma0(a) | ||
520 | @ add r5,r5,r3 @ h+=Maj(a,b,c) | ||
521 | #if __ARM_ARCH__>=7 | ||
522 | @ ldr r2,[r1],#4 @ 7 | ||
523 | # if 7==15 | ||
524 | str r1,[sp,#17*4] @ make room for r1 | ||
525 | # endif | ||
526 | eor r0,r9,r9,ror#5 | ||
527 | add r5,r5,r3 @ h+=Maj(a,b,c) from the past | ||
528 | eor r0,r0,r9,ror#19 @ Sigma1(e) | ||
529 | # ifndef __ARMEB__ | ||
530 | rev r2,r2 | ||
531 | # endif | ||
532 | #else | ||
533 | @ ldrb r2,[r1,#3] @ 7 | ||
534 | add r5,r5,r3 @ h+=Maj(a,b,c) from the past | ||
535 | ldrb r3,[r1,#2] | ||
536 | ldrb r0,[r1,#1] | ||
537 | orr r2,r2,r3,lsl#8 | ||
538 | ldrb r3,[r1],#4 | ||
539 | orr r2,r2,r0,lsl#16 | ||
540 | # if 7==15 | ||
541 | str r1,[sp,#17*4] @ make room for r1 | ||
542 | # endif | ||
543 | eor r0,r9,r9,ror#5 | ||
544 | orr r2,r2,r3,lsl#24 | ||
545 | eor r0,r0,r9,ror#19 @ Sigma1(e) | ||
546 | #endif | ||
547 | ldr r3,[r14],#4 @ *K256++ | ||
548 | add r4,r4,r2 @ h+=X[i] | ||
549 | str r2,[sp,#7*4] | ||
550 | eor r2,r10,r11 | ||
551 | add r4,r4,r0,ror#6 @ h+=Sigma1(e) | ||
552 | and r2,r2,r9 | ||
553 | add r4,r4,r3 @ h+=K256[i] | ||
554 | eor r2,r2,r11 @ Ch(e,f,g) | ||
555 | eor r0,r5,r5,ror#11 | ||
556 | add r4,r4,r2 @ h+=Ch(e,f,g) | ||
557 | #if 7==31 | ||
558 | and r3,r3,#0xff | ||
559 | cmp r3,#0xf2 @ done? | ||
560 | #endif | ||
561 | #if 7<15 | ||
562 | # if __ARM_ARCH__>=7 | ||
563 | ldr r2,[r1],#4 @ prefetch | ||
564 | # else | ||
565 | ldrb r2,[r1,#3] | ||
566 | # endif | ||
567 | eor r3,r5,r6 @ a^b, b^c in next round | ||
568 | #else | ||
569 | ldr r2,[sp,#9*4] @ from future BODY_16_xx | ||
570 | eor r3,r5,r6 @ a^b, b^c in next round | ||
571 | ldr r1,[sp,#6*4] @ from future BODY_16_xx | ||
572 | #endif | ||
573 | eor r0,r0,r5,ror#20 @ Sigma0(a) | ||
574 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
575 | add r8,r8,r4 @ d+=h | ||
576 | eor r12,r12,r6 @ Maj(a,b,c) | ||
577 | add r4,r4,r0,ror#2 @ h+=Sigma0(a) | ||
578 | @ add r4,r4,r12 @ h+=Maj(a,b,c) | ||
579 | #if __ARM_ARCH__>=7 | ||
580 | @ ldr r2,[r1],#4 @ 8 | ||
581 | # if 8==15 | ||
582 | str r1,[sp,#17*4] @ make room for r1 | ||
583 | # endif | ||
584 | eor r0,r8,r8,ror#5 | ||
585 | add r4,r4,r12 @ h+=Maj(a,b,c) from the past | ||
586 | eor r0,r0,r8,ror#19 @ Sigma1(e) | ||
587 | # ifndef __ARMEB__ | ||
588 | rev r2,r2 | ||
589 | # endif | ||
590 | #else | ||
591 | @ ldrb r2,[r1,#3] @ 8 | ||
592 | add r4,r4,r12 @ h+=Maj(a,b,c) from the past | ||
593 | ldrb r12,[r1,#2] | ||
594 | ldrb r0,[r1,#1] | ||
595 | orr r2,r2,r12,lsl#8 | ||
596 | ldrb r12,[r1],#4 | ||
597 | orr r2,r2,r0,lsl#16 | ||
598 | # if 8==15 | ||
599 | str r1,[sp,#17*4] @ make room for r1 | ||
600 | # endif | ||
601 | eor r0,r8,r8,ror#5 | ||
602 | orr r2,r2,r12,lsl#24 | ||
603 | eor r0,r0,r8,ror#19 @ Sigma1(e) | ||
604 | #endif | ||
605 | ldr r12,[r14],#4 @ *K256++ | ||
606 | add r11,r11,r2 @ h+=X[i] | ||
607 | str r2,[sp,#8*4] | ||
608 | eor r2,r9,r10 | ||
609 | add r11,r11,r0,ror#6 @ h+=Sigma1(e) | ||
610 | and r2,r2,r8 | ||
611 | add r11,r11,r12 @ h+=K256[i] | ||
612 | eor r2,r2,r10 @ Ch(e,f,g) | ||
613 | eor r0,r4,r4,ror#11 | ||
614 | add r11,r11,r2 @ h+=Ch(e,f,g) | ||
615 | #if 8==31 | ||
616 | and r12,r12,#0xff | ||
617 | cmp r12,#0xf2 @ done? | ||
618 | #endif | ||
619 | #if 8<15 | ||
620 | # if __ARM_ARCH__>=7 | ||
621 | ldr r2,[r1],#4 @ prefetch | ||
622 | # else | ||
623 | ldrb r2,[r1,#3] | ||
624 | # endif | ||
625 | eor r12,r4,r5 @ a^b, b^c in next round | ||
626 | #else | ||
627 | ldr r2,[sp,#10*4] @ from future BODY_16_xx | ||
628 | eor r12,r4,r5 @ a^b, b^c in next round | ||
629 | ldr r1,[sp,#7*4] @ from future BODY_16_xx | ||
630 | #endif | ||
631 | eor r0,r0,r4,ror#20 @ Sigma0(a) | ||
632 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
633 | add r7,r7,r11 @ d+=h | ||
634 | eor r3,r3,r5 @ Maj(a,b,c) | ||
635 | add r11,r11,r0,ror#2 @ h+=Sigma0(a) | ||
636 | @ add r11,r11,r3 @ h+=Maj(a,b,c) | ||
637 | #if __ARM_ARCH__>=7 | ||
638 | @ ldr r2,[r1],#4 @ 9 | ||
639 | # if 9==15 | ||
640 | str r1,[sp,#17*4] @ make room for r1 | ||
641 | # endif | ||
642 | eor r0,r7,r7,ror#5 | ||
643 | add r11,r11,r3 @ h+=Maj(a,b,c) from the past | ||
644 | eor r0,r0,r7,ror#19 @ Sigma1(e) | ||
645 | # ifndef __ARMEB__ | ||
646 | rev r2,r2 | ||
647 | # endif | ||
648 | #else | ||
649 | @ ldrb r2,[r1,#3] @ 9 | ||
650 | add r11,r11,r3 @ h+=Maj(a,b,c) from the past | ||
651 | ldrb r3,[r1,#2] | ||
652 | ldrb r0,[r1,#1] | ||
653 | orr r2,r2,r3,lsl#8 | ||
654 | ldrb r3,[r1],#4 | ||
655 | orr r2,r2,r0,lsl#16 | ||
656 | # if 9==15 | ||
657 | str r1,[sp,#17*4] @ make room for r1 | ||
658 | # endif | ||
659 | eor r0,r7,r7,ror#5 | ||
660 | orr r2,r2,r3,lsl#24 | ||
661 | eor r0,r0,r7,ror#19 @ Sigma1(e) | ||
662 | #endif | ||
663 | ldr r3,[r14],#4 @ *K256++ | ||
664 | add r10,r10,r2 @ h+=X[i] | ||
665 | str r2,[sp,#9*4] | ||
666 | eor r2,r8,r9 | ||
667 | add r10,r10,r0,ror#6 @ h+=Sigma1(e) | ||
668 | and r2,r2,r7 | ||
669 | add r10,r10,r3 @ h+=K256[i] | ||
670 | eor r2,r2,r9 @ Ch(e,f,g) | ||
671 | eor r0,r11,r11,ror#11 | ||
672 | add r10,r10,r2 @ h+=Ch(e,f,g) | ||
673 | #if 9==31 | ||
674 | and r3,r3,#0xff | ||
675 | cmp r3,#0xf2 @ done? | ||
676 | #endif | ||
677 | #if 9<15 | ||
678 | # if __ARM_ARCH__>=7 | ||
679 | ldr r2,[r1],#4 @ prefetch | ||
680 | # else | ||
681 | ldrb r2,[r1,#3] | ||
682 | # endif | ||
683 | eor r3,r11,r4 @ a^b, b^c in next round | ||
684 | #else | ||
685 | ldr r2,[sp,#11*4] @ from future BODY_16_xx | ||
686 | eor r3,r11,r4 @ a^b, b^c in next round | ||
687 | ldr r1,[sp,#8*4] @ from future BODY_16_xx | ||
688 | #endif | ||
689 | eor r0,r0,r11,ror#20 @ Sigma0(a) | ||
690 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
691 | add r6,r6,r10 @ d+=h | ||
692 | eor r12,r12,r4 @ Maj(a,b,c) | ||
693 | add r10,r10,r0,ror#2 @ h+=Sigma0(a) | ||
694 | @ add r10,r10,r12 @ h+=Maj(a,b,c) | ||
695 | #if __ARM_ARCH__>=7 | ||
696 | @ ldr r2,[r1],#4 @ 10 | ||
697 | # if 10==15 | ||
698 | str r1,[sp,#17*4] @ make room for r1 | ||
699 | # endif | ||
700 | eor r0,r6,r6,ror#5 | ||
701 | add r10,r10,r12 @ h+=Maj(a,b,c) from the past | ||
702 | eor r0,r0,r6,ror#19 @ Sigma1(e) | ||
703 | # ifndef __ARMEB__ | ||
704 | rev r2,r2 | ||
705 | # endif | ||
706 | #else | ||
707 | @ ldrb r2,[r1,#3] @ 10 | ||
708 | add r10,r10,r12 @ h+=Maj(a,b,c) from the past | ||
709 | ldrb r12,[r1,#2] | ||
710 | ldrb r0,[r1,#1] | ||
711 | orr r2,r2,r12,lsl#8 | ||
712 | ldrb r12,[r1],#4 | ||
713 | orr r2,r2,r0,lsl#16 | ||
714 | # if 10==15 | ||
715 | str r1,[sp,#17*4] @ make room for r1 | ||
716 | # endif | ||
717 | eor r0,r6,r6,ror#5 | ||
718 | orr r2,r2,r12,lsl#24 | ||
719 | eor r0,r0,r6,ror#19 @ Sigma1(e) | ||
720 | #endif | ||
721 | ldr r12,[r14],#4 @ *K256++ | ||
722 | add r9,r9,r2 @ h+=X[i] | ||
723 | str r2,[sp,#10*4] | ||
724 | eor r2,r7,r8 | ||
725 | add r9,r9,r0,ror#6 @ h+=Sigma1(e) | ||
726 | and r2,r2,r6 | ||
727 | add r9,r9,r12 @ h+=K256[i] | ||
728 | eor r2,r2,r8 @ Ch(e,f,g) | ||
729 | eor r0,r10,r10,ror#11 | ||
730 | add r9,r9,r2 @ h+=Ch(e,f,g) | ||
731 | #if 10==31 | ||
732 | and r12,r12,#0xff | ||
733 | cmp r12,#0xf2 @ done? | ||
734 | #endif | ||
735 | #if 10<15 | ||
736 | # if __ARM_ARCH__>=7 | ||
737 | ldr r2,[r1],#4 @ prefetch | ||
738 | # else | ||
739 | ldrb r2,[r1,#3] | ||
740 | # endif | ||
741 | eor r12,r10,r11 @ a^b, b^c in next round | ||
742 | #else | ||
743 | ldr r2,[sp,#12*4] @ from future BODY_16_xx | ||
744 | eor r12,r10,r11 @ a^b, b^c in next round | ||
745 | ldr r1,[sp,#9*4] @ from future BODY_16_xx | ||
746 | #endif | ||
747 | eor r0,r0,r10,ror#20 @ Sigma0(a) | ||
748 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
749 | add r5,r5,r9 @ d+=h | ||
750 | eor r3,r3,r11 @ Maj(a,b,c) | ||
751 | add r9,r9,r0,ror#2 @ h+=Sigma0(a) | ||
752 | @ add r9,r9,r3 @ h+=Maj(a,b,c) | ||
753 | #if __ARM_ARCH__>=7 | ||
754 | @ ldr r2,[r1],#4 @ 11 | ||
755 | # if 11==15 | ||
756 | str r1,[sp,#17*4] @ make room for r1 | ||
757 | # endif | ||
758 | eor r0,r5,r5,ror#5 | ||
759 | add r9,r9,r3 @ h+=Maj(a,b,c) from the past | ||
760 | eor r0,r0,r5,ror#19 @ Sigma1(e) | ||
761 | # ifndef __ARMEB__ | ||
762 | rev r2,r2 | ||
763 | # endif | ||
764 | #else | ||
765 | @ ldrb r2,[r1,#3] @ 11 | ||
766 | add r9,r9,r3 @ h+=Maj(a,b,c) from the past | ||
767 | ldrb r3,[r1,#2] | ||
768 | ldrb r0,[r1,#1] | ||
769 | orr r2,r2,r3,lsl#8 | ||
770 | ldrb r3,[r1],#4 | ||
771 | orr r2,r2,r0,lsl#16 | ||
772 | # if 11==15 | ||
773 | str r1,[sp,#17*4] @ make room for r1 | ||
774 | # endif | ||
775 | eor r0,r5,r5,ror#5 | ||
776 | orr r2,r2,r3,lsl#24 | ||
777 | eor r0,r0,r5,ror#19 @ Sigma1(e) | ||
778 | #endif | ||
779 | ldr r3,[r14],#4 @ *K256++ | ||
780 | add r8,r8,r2 @ h+=X[i] | ||
781 | str r2,[sp,#11*4] | ||
782 | eor r2,r6,r7 | ||
783 | add r8,r8,r0,ror#6 @ h+=Sigma1(e) | ||
784 | and r2,r2,r5 | ||
785 | add r8,r8,r3 @ h+=K256[i] | ||
786 | eor r2,r2,r7 @ Ch(e,f,g) | ||
787 | eor r0,r9,r9,ror#11 | ||
788 | add r8,r8,r2 @ h+=Ch(e,f,g) | ||
789 | #if 11==31 | ||
790 | and r3,r3,#0xff | ||
791 | cmp r3,#0xf2 @ done? | ||
792 | #endif | ||
793 | #if 11<15 | ||
794 | # if __ARM_ARCH__>=7 | ||
795 | ldr r2,[r1],#4 @ prefetch | ||
796 | # else | ||
797 | ldrb r2,[r1,#3] | ||
798 | # endif | ||
799 | eor r3,r9,r10 @ a^b, b^c in next round | ||
800 | #else | ||
801 | ldr r2,[sp,#13*4] @ from future BODY_16_xx | ||
802 | eor r3,r9,r10 @ a^b, b^c in next round | ||
803 | ldr r1,[sp,#10*4] @ from future BODY_16_xx | ||
804 | #endif | ||
805 | eor r0,r0,r9,ror#20 @ Sigma0(a) | ||
806 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
807 | add r4,r4,r8 @ d+=h | ||
808 | eor r12,r12,r10 @ Maj(a,b,c) | ||
809 | add r8,r8,r0,ror#2 @ h+=Sigma0(a) | ||
810 | @ add r8,r8,r12 @ h+=Maj(a,b,c) | ||
811 | #if __ARM_ARCH__>=7 | ||
812 | @ ldr r2,[r1],#4 @ 12 | ||
813 | # if 12==15 | ||
814 | str r1,[sp,#17*4] @ make room for r1 | ||
815 | # endif | ||
816 | eor r0,r4,r4,ror#5 | ||
817 | add r8,r8,r12 @ h+=Maj(a,b,c) from the past | ||
818 | eor r0,r0,r4,ror#19 @ Sigma1(e) | ||
819 | # ifndef __ARMEB__ | ||
820 | rev r2,r2 | ||
821 | # endif | ||
822 | #else | ||
823 | @ ldrb r2,[r1,#3] @ 12 | ||
824 | add r8,r8,r12 @ h+=Maj(a,b,c) from the past | ||
825 | ldrb r12,[r1,#2] | ||
826 | ldrb r0,[r1,#1] | ||
827 | orr r2,r2,r12,lsl#8 | ||
828 | ldrb r12,[r1],#4 | ||
829 | orr r2,r2,r0,lsl#16 | ||
830 | # if 12==15 | ||
831 | str r1,[sp,#17*4] @ make room for r1 | ||
832 | # endif | ||
833 | eor r0,r4,r4,ror#5 | ||
834 | orr r2,r2,r12,lsl#24 | ||
835 | eor r0,r0,r4,ror#19 @ Sigma1(e) | ||
836 | #endif | ||
837 | ldr r12,[r14],#4 @ *K256++ | ||
838 | add r7,r7,r2 @ h+=X[i] | ||
839 | str r2,[sp,#12*4] | ||
840 | eor r2,r5,r6 | ||
841 | add r7,r7,r0,ror#6 @ h+=Sigma1(e) | ||
842 | and r2,r2,r4 | ||
843 | add r7,r7,r12 @ h+=K256[i] | ||
844 | eor r2,r2,r6 @ Ch(e,f,g) | ||
845 | eor r0,r8,r8,ror#11 | ||
846 | add r7,r7,r2 @ h+=Ch(e,f,g) | ||
847 | #if 12==31 | ||
848 | and r12,r12,#0xff | ||
849 | cmp r12,#0xf2 @ done? | ||
850 | #endif | ||
851 | #if 12<15 | ||
852 | # if __ARM_ARCH__>=7 | ||
853 | ldr r2,[r1],#4 @ prefetch | ||
854 | # else | ||
855 | ldrb r2,[r1,#3] | ||
856 | # endif | ||
857 | eor r12,r8,r9 @ a^b, b^c in next round | ||
858 | #else | ||
859 | ldr r2,[sp,#14*4] @ from future BODY_16_xx | ||
860 | eor r12,r8,r9 @ a^b, b^c in next round | ||
861 | ldr r1,[sp,#11*4] @ from future BODY_16_xx | ||
862 | #endif | ||
863 | eor r0,r0,r8,ror#20 @ Sigma0(a) | ||
864 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
865 | add r11,r11,r7 @ d+=h | ||
866 | eor r3,r3,r9 @ Maj(a,b,c) | ||
867 | add r7,r7,r0,ror#2 @ h+=Sigma0(a) | ||
868 | @ add r7,r7,r3 @ h+=Maj(a,b,c) | ||
869 | #if __ARM_ARCH__>=7 | ||
870 | @ ldr r2,[r1],#4 @ 13 | ||
871 | # if 13==15 | ||
872 | str r1,[sp,#17*4] @ make room for r1 | ||
873 | # endif | ||
874 | eor r0,r11,r11,ror#5 | ||
875 | add r7,r7,r3 @ h+=Maj(a,b,c) from the past | ||
876 | eor r0,r0,r11,ror#19 @ Sigma1(e) | ||
877 | # ifndef __ARMEB__ | ||
878 | rev r2,r2 | ||
879 | # endif | ||
880 | #else | ||
881 | @ ldrb r2,[r1,#3] @ 13 | ||
882 | add r7,r7,r3 @ h+=Maj(a,b,c) from the past | ||
883 | ldrb r3,[r1,#2] | ||
884 | ldrb r0,[r1,#1] | ||
885 | orr r2,r2,r3,lsl#8 | ||
886 | ldrb r3,[r1],#4 | ||
887 | orr r2,r2,r0,lsl#16 | ||
888 | # if 13==15 | ||
889 | str r1,[sp,#17*4] @ make room for r1 | ||
890 | # endif | ||
891 | eor r0,r11,r11,ror#5 | ||
892 | orr r2,r2,r3,lsl#24 | ||
893 | eor r0,r0,r11,ror#19 @ Sigma1(e) | ||
894 | #endif | ||
895 | ldr r3,[r14],#4 @ *K256++ | ||
896 | add r6,r6,r2 @ h+=X[i] | ||
897 | str r2,[sp,#13*4] | ||
898 | eor r2,r4,r5 | ||
899 | add r6,r6,r0,ror#6 @ h+=Sigma1(e) | ||
900 | and r2,r2,r11 | ||
901 | add r6,r6,r3 @ h+=K256[i] | ||
902 | eor r2,r2,r5 @ Ch(e,f,g) | ||
903 | eor r0,r7,r7,ror#11 | ||
904 | add r6,r6,r2 @ h+=Ch(e,f,g) | ||
905 | #if 13==31 | ||
906 | and r3,r3,#0xff | ||
907 | cmp r3,#0xf2 @ done? | ||
908 | #endif | ||
909 | #if 13<15 | ||
910 | # if __ARM_ARCH__>=7 | ||
911 | ldr r2,[r1],#4 @ prefetch | ||
912 | # else | ||
913 | ldrb r2,[r1,#3] | ||
914 | # endif | ||
915 | eor r3,r7,r8 @ a^b, b^c in next round | ||
916 | #else | ||
917 | ldr r2,[sp,#15*4] @ from future BODY_16_xx | ||
918 | eor r3,r7,r8 @ a^b, b^c in next round | ||
919 | ldr r1,[sp,#12*4] @ from future BODY_16_xx | ||
920 | #endif | ||
921 | eor r0,r0,r7,ror#20 @ Sigma0(a) | ||
922 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
923 | add r10,r10,r6 @ d+=h | ||
924 | eor r12,r12,r8 @ Maj(a,b,c) | ||
925 | add r6,r6,r0,ror#2 @ h+=Sigma0(a) | ||
926 | @ add r6,r6,r12 @ h+=Maj(a,b,c) | ||
927 | #if __ARM_ARCH__>=7 | ||
928 | @ ldr r2,[r1],#4 @ 14 | ||
929 | # if 14==15 | ||
930 | str r1,[sp,#17*4] @ make room for r1 | ||
931 | # endif | ||
932 | eor r0,r10,r10,ror#5 | ||
933 | add r6,r6,r12 @ h+=Maj(a,b,c) from the past | ||
934 | eor r0,r0,r10,ror#19 @ Sigma1(e) | ||
935 | # ifndef __ARMEB__ | ||
936 | rev r2,r2 | ||
937 | # endif | ||
938 | #else | ||
939 | @ ldrb r2,[r1,#3] @ 14 | ||
940 | add r6,r6,r12 @ h+=Maj(a,b,c) from the past | ||
941 | ldrb r12,[r1,#2] | ||
942 | ldrb r0,[r1,#1] | ||
943 | orr r2,r2,r12,lsl#8 | ||
944 | ldrb r12,[r1],#4 | ||
945 | orr r2,r2,r0,lsl#16 | ||
946 | # if 14==15 | ||
947 | str r1,[sp,#17*4] @ make room for r1 | ||
948 | # endif | ||
949 | eor r0,r10,r10,ror#5 | ||
950 | orr r2,r2,r12,lsl#24 | ||
951 | eor r0,r0,r10,ror#19 @ Sigma1(e) | ||
952 | #endif | ||
953 | ldr r12,[r14],#4 @ *K256++ | ||
954 | add r5,r5,r2 @ h+=X[i] | ||
955 | str r2,[sp,#14*4] | ||
956 | eor r2,r11,r4 | ||
957 | add r5,r5,r0,ror#6 @ h+=Sigma1(e) | ||
958 | and r2,r2,r10 | ||
959 | add r5,r5,r12 @ h+=K256[i] | ||
960 | eor r2,r2,r4 @ Ch(e,f,g) | ||
961 | eor r0,r6,r6,ror#11 | ||
962 | add r5,r5,r2 @ h+=Ch(e,f,g) | ||
963 | #if 14==31 | ||
964 | and r12,r12,#0xff | ||
965 | cmp r12,#0xf2 @ done? | ||
966 | #endif | ||
967 | #if 14<15 | ||
968 | # if __ARM_ARCH__>=7 | ||
969 | ldr r2,[r1],#4 @ prefetch | ||
970 | # else | ||
971 | ldrb r2,[r1,#3] | ||
972 | # endif | ||
973 | eor r12,r6,r7 @ a^b, b^c in next round | ||
974 | #else | ||
975 | ldr r2,[sp,#0*4] @ from future BODY_16_xx | ||
976 | eor r12,r6,r7 @ a^b, b^c in next round | ||
977 | ldr r1,[sp,#13*4] @ from future BODY_16_xx | ||
978 | #endif | ||
979 | eor r0,r0,r6,ror#20 @ Sigma0(a) | ||
980 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
981 | add r9,r9,r5 @ d+=h | ||
982 | eor r3,r3,r7 @ Maj(a,b,c) | ||
983 | add r5,r5,r0,ror#2 @ h+=Sigma0(a) | ||
984 | @ add r5,r5,r3 @ h+=Maj(a,b,c) | ||
985 | #if __ARM_ARCH__>=7 | ||
986 | @ ldr r2,[r1],#4 @ 15 | ||
987 | # if 15==15 | ||
988 | str r1,[sp,#17*4] @ make room for r1 | ||
989 | # endif | ||
990 | eor r0,r9,r9,ror#5 | ||
991 | add r5,r5,r3 @ h+=Maj(a,b,c) from the past | ||
992 | eor r0,r0,r9,ror#19 @ Sigma1(e) | ||
993 | # ifndef __ARMEB__ | ||
994 | rev r2,r2 | ||
995 | # endif | ||
996 | #else | ||
997 | @ ldrb r2,[r1,#3] @ 15 | ||
998 | add r5,r5,r3 @ h+=Maj(a,b,c) from the past | ||
999 | ldrb r3,[r1,#2] | ||
1000 | ldrb r0,[r1,#1] | ||
1001 | orr r2,r2,r3,lsl#8 | ||
1002 | ldrb r3,[r1],#4 | ||
1003 | orr r2,r2,r0,lsl#16 | ||
1004 | # if 15==15 | ||
1005 | str r1,[sp,#17*4] @ make room for r1 | ||
1006 | # endif | ||
1007 | eor r0,r9,r9,ror#5 | ||
1008 | orr r2,r2,r3,lsl#24 | ||
1009 | eor r0,r0,r9,ror#19 @ Sigma1(e) | ||
1010 | #endif | ||
1011 | ldr r3,[r14],#4 @ *K256++ | ||
1012 | add r4,r4,r2 @ h+=X[i] | ||
1013 | str r2,[sp,#15*4] | ||
1014 | eor r2,r10,r11 | ||
1015 | add r4,r4,r0,ror#6 @ h+=Sigma1(e) | ||
1016 | and r2,r2,r9 | ||
1017 | add r4,r4,r3 @ h+=K256[i] | ||
1018 | eor r2,r2,r11 @ Ch(e,f,g) | ||
1019 | eor r0,r5,r5,ror#11 | ||
1020 | add r4,r4,r2 @ h+=Ch(e,f,g) | ||
1021 | #if 15==31 | ||
1022 | and r3,r3,#0xff | ||
1023 | cmp r3,#0xf2 @ done? | ||
1024 | #endif | ||
1025 | #if 15<15 | ||
1026 | # if __ARM_ARCH__>=7 | ||
1027 | ldr r2,[r1],#4 @ prefetch | ||
1028 | # else | ||
1029 | ldrb r2,[r1,#3] | ||
1030 | # endif | ||
1031 | eor r3,r5,r6 @ a^b, b^c in next round | ||
1032 | #else | ||
1033 | ldr r2,[sp,#1*4] @ from future BODY_16_xx | ||
1034 | eor r3,r5,r6 @ a^b, b^c in next round | ||
1035 | ldr r1,[sp,#14*4] @ from future BODY_16_xx | ||
1036 | #endif | ||
1037 | eor r0,r0,r5,ror#20 @ Sigma0(a) | ||
1038 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
1039 | add r8,r8,r4 @ d+=h | ||
1040 | eor r12,r12,r6 @ Maj(a,b,c) | ||
1041 | add r4,r4,r0,ror#2 @ h+=Sigma0(a) | ||
1042 | @ add r4,r4,r12 @ h+=Maj(a,b,c) | ||
1043 | .Lrounds_16_xx: | ||
1044 | @ ldr r2,[sp,#1*4] @ 16 | ||
1045 | @ ldr r1,[sp,#14*4] | ||
1046 | mov r0,r2,ror#7 | ||
1047 | add r4,r4,r12 @ h+=Maj(a,b,c) from the past | ||
1048 | mov r12,r1,ror#17 | ||
1049 | eor r0,r0,r2,ror#18 | ||
1050 | eor r12,r12,r1,ror#19 | ||
1051 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1052 | ldr r2,[sp,#0*4] | ||
1053 | eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) | ||
1054 | ldr r1,[sp,#9*4] | ||
1055 | |||
1056 | add r12,r12,r0 | ||
1057 | eor r0,r8,r8,ror#5 @ from BODY_00_15 | ||
1058 | add r2,r2,r12 | ||
1059 | eor r0,r0,r8,ror#19 @ Sigma1(e) | ||
1060 | add r2,r2,r1 @ X[i] | ||
1061 | ldr r12,[r14],#4 @ *K256++ | ||
1062 | add r11,r11,r2 @ h+=X[i] | ||
1063 | str r2,[sp,#0*4] | ||
1064 | eor r2,r9,r10 | ||
1065 | add r11,r11,r0,ror#6 @ h+=Sigma1(e) | ||
1066 | and r2,r2,r8 | ||
1067 | add r11,r11,r12 @ h+=K256[i] | ||
1068 | eor r2,r2,r10 @ Ch(e,f,g) | ||
1069 | eor r0,r4,r4,ror#11 | ||
1070 | add r11,r11,r2 @ h+=Ch(e,f,g) | ||
1071 | #if 16==31 | ||
1072 | and r12,r12,#0xff | ||
1073 | cmp r12,#0xf2 @ done? | ||
1074 | #endif | ||
1075 | #if 16<15 | ||
1076 | # if __ARM_ARCH__>=7 | ||
1077 | ldr r2,[r1],#4 @ prefetch | ||
1078 | # else | ||
1079 | ldrb r2,[r1,#3] | ||
1080 | # endif | ||
1081 | eor r12,r4,r5 @ a^b, b^c in next round | ||
1082 | #else | ||
1083 | ldr r2,[sp,#2*4] @ from future BODY_16_xx | ||
1084 | eor r12,r4,r5 @ a^b, b^c in next round | ||
1085 | ldr r1,[sp,#15*4] @ from future BODY_16_xx | ||
1086 | #endif | ||
1087 | eor r0,r0,r4,ror#20 @ Sigma0(a) | ||
1088 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
1089 | add r7,r7,r11 @ d+=h | ||
1090 | eor r3,r3,r5 @ Maj(a,b,c) | ||
1091 | add r11,r11,r0,ror#2 @ h+=Sigma0(a) | ||
1092 | @ add r11,r11,r3 @ h+=Maj(a,b,c) | ||
1093 | @ ldr r2,[sp,#2*4] @ 17 | ||
1094 | @ ldr r1,[sp,#15*4] | ||
1095 | mov r0,r2,ror#7 | ||
1096 | add r11,r11,r3 @ h+=Maj(a,b,c) from the past | ||
1097 | mov r3,r1,ror#17 | ||
1098 | eor r0,r0,r2,ror#18 | ||
1099 | eor r3,r3,r1,ror#19 | ||
1100 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1101 | ldr r2,[sp,#1*4] | ||
1102 | eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) | ||
1103 | ldr r1,[sp,#10*4] | ||
1104 | |||
1105 | add r3,r3,r0 | ||
1106 | eor r0,r7,r7,ror#5 @ from BODY_00_15 | ||
1107 | add r2,r2,r3 | ||
1108 | eor r0,r0,r7,ror#19 @ Sigma1(e) | ||
1109 | add r2,r2,r1 @ X[i] | ||
1110 | ldr r3,[r14],#4 @ *K256++ | ||
1111 | add r10,r10,r2 @ h+=X[i] | ||
1112 | str r2,[sp,#1*4] | ||
1113 | eor r2,r8,r9 | ||
1114 | add r10,r10,r0,ror#6 @ h+=Sigma1(e) | ||
1115 | and r2,r2,r7 | ||
1116 | add r10,r10,r3 @ h+=K256[i] | ||
1117 | eor r2,r2,r9 @ Ch(e,f,g) | ||
1118 | eor r0,r11,r11,ror#11 | ||
1119 | add r10,r10,r2 @ h+=Ch(e,f,g) | ||
1120 | #if 17==31 | ||
1121 | and r3,r3,#0xff | ||
1122 | cmp r3,#0xf2 @ done? | ||
1123 | #endif | ||
1124 | #if 17<15 | ||
1125 | # if __ARM_ARCH__>=7 | ||
1126 | ldr r2,[r1],#4 @ prefetch | ||
1127 | # else | ||
1128 | ldrb r2,[r1,#3] | ||
1129 | # endif | ||
1130 | eor r3,r11,r4 @ a^b, b^c in next round | ||
1131 | #else | ||
1132 | ldr r2,[sp,#3*4] @ from future BODY_16_xx | ||
1133 | eor r3,r11,r4 @ a^b, b^c in next round | ||
1134 | ldr r1,[sp,#0*4] @ from future BODY_16_xx | ||
1135 | #endif | ||
1136 | eor r0,r0,r11,ror#20 @ Sigma0(a) | ||
1137 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
1138 | add r6,r6,r10 @ d+=h | ||
1139 | eor r12,r12,r4 @ Maj(a,b,c) | ||
1140 | add r10,r10,r0,ror#2 @ h+=Sigma0(a) | ||
1141 | @ add r10,r10,r12 @ h+=Maj(a,b,c) | ||
1142 | @ ldr r2,[sp,#3*4] @ 18 | ||
1143 | @ ldr r1,[sp,#0*4] | ||
1144 | mov r0,r2,ror#7 | ||
1145 | add r10,r10,r12 @ h+=Maj(a,b,c) from the past | ||
1146 | mov r12,r1,ror#17 | ||
1147 | eor r0,r0,r2,ror#18 | ||
1148 | eor r12,r12,r1,ror#19 | ||
1149 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1150 | ldr r2,[sp,#2*4] | ||
1151 | eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) | ||
1152 | ldr r1,[sp,#11*4] | ||
1153 | |||
1154 | add r12,r12,r0 | ||
1155 | eor r0,r6,r6,ror#5 @ from BODY_00_15 | ||
1156 | add r2,r2,r12 | ||
1157 | eor r0,r0,r6,ror#19 @ Sigma1(e) | ||
1158 | add r2,r2,r1 @ X[i] | ||
1159 | ldr r12,[r14],#4 @ *K256++ | ||
1160 | add r9,r9,r2 @ h+=X[i] | ||
1161 | str r2,[sp,#2*4] | ||
1162 | eor r2,r7,r8 | ||
1163 | add r9,r9,r0,ror#6 @ h+=Sigma1(e) | ||
1164 | and r2,r2,r6 | ||
1165 | add r9,r9,r12 @ h+=K256[i] | ||
1166 | eor r2,r2,r8 @ Ch(e,f,g) | ||
1167 | eor r0,r10,r10,ror#11 | ||
1168 | add r9,r9,r2 @ h+=Ch(e,f,g) | ||
1169 | #if 18==31 | ||
1170 | and r12,r12,#0xff | ||
1171 | cmp r12,#0xf2 @ done? | ||
1172 | #endif | ||
1173 | #if 18<15 | ||
1174 | # if __ARM_ARCH__>=7 | ||
1175 | ldr r2,[r1],#4 @ prefetch | ||
1176 | # else | ||
1177 | ldrb r2,[r1,#3] | ||
1178 | # endif | ||
1179 | eor r12,r10,r11 @ a^b, b^c in next round | ||
1180 | #else | ||
1181 | ldr r2,[sp,#4*4] @ from future BODY_16_xx | ||
1182 | eor r12,r10,r11 @ a^b, b^c in next round | ||
1183 | ldr r1,[sp,#1*4] @ from future BODY_16_xx | ||
1184 | #endif | ||
1185 | eor r0,r0,r10,ror#20 @ Sigma0(a) | ||
1186 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
1187 | add r5,r5,r9 @ d+=h | ||
1188 | eor r3,r3,r11 @ Maj(a,b,c) | ||
1189 | add r9,r9,r0,ror#2 @ h+=Sigma0(a) | ||
1190 | @ add r9,r9,r3 @ h+=Maj(a,b,c) | ||
1191 | @ ldr r2,[sp,#4*4] @ 19 | ||
1192 | @ ldr r1,[sp,#1*4] | ||
1193 | mov r0,r2,ror#7 | ||
1194 | add r9,r9,r3 @ h+=Maj(a,b,c) from the past | ||
1195 | mov r3,r1,ror#17 | ||
1196 | eor r0,r0,r2,ror#18 | ||
1197 | eor r3,r3,r1,ror#19 | ||
1198 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1199 | ldr r2,[sp,#3*4] | ||
1200 | eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) | ||
1201 | ldr r1,[sp,#12*4] | ||
1202 | |||
1203 | add r3,r3,r0 | ||
1204 | eor r0,r5,r5,ror#5 @ from BODY_00_15 | ||
1205 | add r2,r2,r3 | ||
1206 | eor r0,r0,r5,ror#19 @ Sigma1(e) | ||
1207 | add r2,r2,r1 @ X[i] | ||
1208 | ldr r3,[r14],#4 @ *K256++ | ||
1209 | add r8,r8,r2 @ h+=X[i] | ||
1210 | str r2,[sp,#3*4] | ||
1211 | eor r2,r6,r7 | ||
1212 | add r8,r8,r0,ror#6 @ h+=Sigma1(e) | ||
1213 | and r2,r2,r5 | ||
1214 | add r8,r8,r3 @ h+=K256[i] | ||
1215 | eor r2,r2,r7 @ Ch(e,f,g) | ||
1216 | eor r0,r9,r9,ror#11 | ||
1217 | add r8,r8,r2 @ h+=Ch(e,f,g) | ||
1218 | #if 19==31 | ||
1219 | and r3,r3,#0xff | ||
1220 | cmp r3,#0xf2 @ done? | ||
1221 | #endif | ||
1222 | #if 19<15 | ||
1223 | # if __ARM_ARCH__>=7 | ||
1224 | ldr r2,[r1],#4 @ prefetch | ||
1225 | # else | ||
1226 | ldrb r2,[r1,#3] | ||
1227 | # endif | ||
1228 | eor r3,r9,r10 @ a^b, b^c in next round | ||
1229 | #else | ||
1230 | ldr r2,[sp,#5*4] @ from future BODY_16_xx | ||
1231 | eor r3,r9,r10 @ a^b, b^c in next round | ||
1232 | ldr r1,[sp,#2*4] @ from future BODY_16_xx | ||
1233 | #endif | ||
1234 | eor r0,r0,r9,ror#20 @ Sigma0(a) | ||
1235 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
1236 | add r4,r4,r8 @ d+=h | ||
1237 | eor r12,r12,r10 @ Maj(a,b,c) | ||
1238 | add r8,r8,r0,ror#2 @ h+=Sigma0(a) | ||
1239 | @ add r8,r8,r12 @ h+=Maj(a,b,c) | ||
1240 | @ ldr r2,[sp,#5*4] @ 20 | ||
1241 | @ ldr r1,[sp,#2*4] | ||
1242 | mov r0,r2,ror#7 | ||
1243 | add r8,r8,r12 @ h+=Maj(a,b,c) from the past | ||
1244 | mov r12,r1,ror#17 | ||
1245 | eor r0,r0,r2,ror#18 | ||
1246 | eor r12,r12,r1,ror#19 | ||
1247 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1248 | ldr r2,[sp,#4*4] | ||
1249 | eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) | ||
1250 | ldr r1,[sp,#13*4] | ||
1251 | |||
1252 | add r12,r12,r0 | ||
1253 | eor r0,r4,r4,ror#5 @ from BODY_00_15 | ||
1254 | add r2,r2,r12 | ||
1255 | eor r0,r0,r4,ror#19 @ Sigma1(e) | ||
1256 | add r2,r2,r1 @ X[i] | ||
1257 | ldr r12,[r14],#4 @ *K256++ | ||
1258 | add r7,r7,r2 @ h+=X[i] | ||
1259 | str r2,[sp,#4*4] | ||
1260 | eor r2,r5,r6 | ||
1261 | add r7,r7,r0,ror#6 @ h+=Sigma1(e) | ||
1262 | and r2,r2,r4 | ||
1263 | add r7,r7,r12 @ h+=K256[i] | ||
1264 | eor r2,r2,r6 @ Ch(e,f,g) | ||
1265 | eor r0,r8,r8,ror#11 | ||
1266 | add r7,r7,r2 @ h+=Ch(e,f,g) | ||
1267 | #if 20==31 | ||
1268 | and r12,r12,#0xff | ||
1269 | cmp r12,#0xf2 @ done? | ||
1270 | #endif | ||
1271 | #if 20<15 | ||
1272 | # if __ARM_ARCH__>=7 | ||
1273 | ldr r2,[r1],#4 @ prefetch | ||
1274 | # else | ||
1275 | ldrb r2,[r1,#3] | ||
1276 | # endif | ||
1277 | eor r12,r8,r9 @ a^b, b^c in next round | ||
1278 | #else | ||
1279 | ldr r2,[sp,#6*4] @ from future BODY_16_xx | ||
1280 | eor r12,r8,r9 @ a^b, b^c in next round | ||
1281 | ldr r1,[sp,#3*4] @ from future BODY_16_xx | ||
1282 | #endif | ||
1283 | eor r0,r0,r8,ror#20 @ Sigma0(a) | ||
1284 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
1285 | add r11,r11,r7 @ d+=h | ||
1286 | eor r3,r3,r9 @ Maj(a,b,c) | ||
1287 | add r7,r7,r0,ror#2 @ h+=Sigma0(a) | ||
1288 | @ add r7,r7,r3 @ h+=Maj(a,b,c) | ||
1289 | @ ldr r2,[sp,#6*4] @ 21 | ||
1290 | @ ldr r1,[sp,#3*4] | ||
1291 | mov r0,r2,ror#7 | ||
1292 | add r7,r7,r3 @ h+=Maj(a,b,c) from the past | ||
1293 | mov r3,r1,ror#17 | ||
1294 | eor r0,r0,r2,ror#18 | ||
1295 | eor r3,r3,r1,ror#19 | ||
1296 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1297 | ldr r2,[sp,#5*4] | ||
1298 | eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) | ||
1299 | ldr r1,[sp,#14*4] | ||
1300 | |||
1301 | add r3,r3,r0 | ||
1302 | eor r0,r11,r11,ror#5 @ from BODY_00_15 | ||
1303 | add r2,r2,r3 | ||
1304 | eor r0,r0,r11,ror#19 @ Sigma1(e) | ||
1305 | add r2,r2,r1 @ X[i] | ||
1306 | ldr r3,[r14],#4 @ *K256++ | ||
1307 | add r6,r6,r2 @ h+=X[i] | ||
1308 | str r2,[sp,#5*4] | ||
1309 | eor r2,r4,r5 | ||
1310 | add r6,r6,r0,ror#6 @ h+=Sigma1(e) | ||
1311 | and r2,r2,r11 | ||
1312 | add r6,r6,r3 @ h+=K256[i] | ||
1313 | eor r2,r2,r5 @ Ch(e,f,g) | ||
1314 | eor r0,r7,r7,ror#11 | ||
1315 | add r6,r6,r2 @ h+=Ch(e,f,g) | ||
1316 | #if 21==31 | ||
1317 | and r3,r3,#0xff | ||
1318 | cmp r3,#0xf2 @ done? | ||
1319 | #endif | ||
1320 | #if 21<15 | ||
1321 | # if __ARM_ARCH__>=7 | ||
1322 | ldr r2,[r1],#4 @ prefetch | ||
1323 | # else | ||
1324 | ldrb r2,[r1,#3] | ||
1325 | # endif | ||
1326 | eor r3,r7,r8 @ a^b, b^c in next round | ||
1327 | #else | ||
1328 | ldr r2,[sp,#7*4] @ from future BODY_16_xx | ||
1329 | eor r3,r7,r8 @ a^b, b^c in next round | ||
1330 | ldr r1,[sp,#4*4] @ from future BODY_16_xx | ||
1331 | #endif | ||
1332 | eor r0,r0,r7,ror#20 @ Sigma0(a) | ||
1333 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
1334 | add r10,r10,r6 @ d+=h | ||
1335 | eor r12,r12,r8 @ Maj(a,b,c) | ||
1336 | add r6,r6,r0,ror#2 @ h+=Sigma0(a) | ||
1337 | @ add r6,r6,r12 @ h+=Maj(a,b,c) | ||
1338 | @ ldr r2,[sp,#7*4] @ 22 | ||
1339 | @ ldr r1,[sp,#4*4] | ||
1340 | mov r0,r2,ror#7 | ||
1341 | add r6,r6,r12 @ h+=Maj(a,b,c) from the past | ||
1342 | mov r12,r1,ror#17 | ||
1343 | eor r0,r0,r2,ror#18 | ||
1344 | eor r12,r12,r1,ror#19 | ||
1345 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1346 | ldr r2,[sp,#6*4] | ||
1347 | eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) | ||
1348 | ldr r1,[sp,#15*4] | ||
1349 | |||
1350 | add r12,r12,r0 | ||
1351 | eor r0,r10,r10,ror#5 @ from BODY_00_15 | ||
1352 | add r2,r2,r12 | ||
1353 | eor r0,r0,r10,ror#19 @ Sigma1(e) | ||
1354 | add r2,r2,r1 @ X[i] | ||
1355 | ldr r12,[r14],#4 @ *K256++ | ||
1356 | add r5,r5,r2 @ h+=X[i] | ||
1357 | str r2,[sp,#6*4] | ||
1358 | eor r2,r11,r4 | ||
1359 | add r5,r5,r0,ror#6 @ h+=Sigma1(e) | ||
1360 | and r2,r2,r10 | ||
1361 | add r5,r5,r12 @ h+=K256[i] | ||
1362 | eor r2,r2,r4 @ Ch(e,f,g) | ||
1363 | eor r0,r6,r6,ror#11 | ||
1364 | add r5,r5,r2 @ h+=Ch(e,f,g) | ||
1365 | #if 22==31 | ||
1366 | and r12,r12,#0xff | ||
1367 | cmp r12,#0xf2 @ done? | ||
1368 | #endif | ||
1369 | #if 22<15 | ||
1370 | # if __ARM_ARCH__>=7 | ||
1371 | ldr r2,[r1],#4 @ prefetch | ||
1372 | # else | ||
1373 | ldrb r2,[r1,#3] | ||
1374 | # endif | ||
1375 | eor r12,r6,r7 @ a^b, b^c in next round | ||
1376 | #else | ||
1377 | ldr r2,[sp,#8*4] @ from future BODY_16_xx | ||
1378 | eor r12,r6,r7 @ a^b, b^c in next round | ||
1379 | ldr r1,[sp,#5*4] @ from future BODY_16_xx | ||
1380 | #endif | ||
1381 | eor r0,r0,r6,ror#20 @ Sigma0(a) | ||
1382 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
1383 | add r9,r9,r5 @ d+=h | ||
1384 | eor r3,r3,r7 @ Maj(a,b,c) | ||
1385 | add r5,r5,r0,ror#2 @ h+=Sigma0(a) | ||
1386 | @ add r5,r5,r3 @ h+=Maj(a,b,c) | ||
1387 | @ ldr r2,[sp,#8*4] @ 23 | ||
1388 | @ ldr r1,[sp,#5*4] | ||
1389 | mov r0,r2,ror#7 | ||
1390 | add r5,r5,r3 @ h+=Maj(a,b,c) from the past | ||
1391 | mov r3,r1,ror#17 | ||
1392 | eor r0,r0,r2,ror#18 | ||
1393 | eor r3,r3,r1,ror#19 | ||
1394 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1395 | ldr r2,[sp,#7*4] | ||
1396 | eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) | ||
1397 | ldr r1,[sp,#0*4] | ||
1398 | |||
1399 | add r3,r3,r0 | ||
1400 | eor r0,r9,r9,ror#5 @ from BODY_00_15 | ||
1401 | add r2,r2,r3 | ||
1402 | eor r0,r0,r9,ror#19 @ Sigma1(e) | ||
1403 | add r2,r2,r1 @ X[i] | ||
1404 | ldr r3,[r14],#4 @ *K256++ | ||
1405 | add r4,r4,r2 @ h+=X[i] | ||
1406 | str r2,[sp,#7*4] | ||
1407 | eor r2,r10,r11 | ||
1408 | add r4,r4,r0,ror#6 @ h+=Sigma1(e) | ||
1409 | and r2,r2,r9 | ||
1410 | add r4,r4,r3 @ h+=K256[i] | ||
1411 | eor r2,r2,r11 @ Ch(e,f,g) | ||
1412 | eor r0,r5,r5,ror#11 | ||
1413 | add r4,r4,r2 @ h+=Ch(e,f,g) | ||
1414 | #if 23==31 | ||
1415 | and r3,r3,#0xff | ||
1416 | cmp r3,#0xf2 @ done? | ||
1417 | #endif | ||
1418 | #if 23<15 | ||
1419 | # if __ARM_ARCH__>=7 | ||
1420 | ldr r2,[r1],#4 @ prefetch | ||
1421 | # else | ||
1422 | ldrb r2,[r1,#3] | ||
1423 | # endif | ||
1424 | eor r3,r5,r6 @ a^b, b^c in next round | ||
1425 | #else | ||
1426 | ldr r2,[sp,#9*4] @ from future BODY_16_xx | ||
1427 | eor r3,r5,r6 @ a^b, b^c in next round | ||
1428 | ldr r1,[sp,#6*4] @ from future BODY_16_xx | ||
1429 | #endif | ||
1430 | eor r0,r0,r5,ror#20 @ Sigma0(a) | ||
1431 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
1432 | add r8,r8,r4 @ d+=h | ||
1433 | eor r12,r12,r6 @ Maj(a,b,c) | ||
1434 | add r4,r4,r0,ror#2 @ h+=Sigma0(a) | ||
1435 | @ add r4,r4,r12 @ h+=Maj(a,b,c) | ||
1436 | @ ldr r2,[sp,#9*4] @ 24 | ||
1437 | @ ldr r1,[sp,#6*4] | ||
1438 | mov r0,r2,ror#7 | ||
1439 | add r4,r4,r12 @ h+=Maj(a,b,c) from the past | ||
1440 | mov r12,r1,ror#17 | ||
1441 | eor r0,r0,r2,ror#18 | ||
1442 | eor r12,r12,r1,ror#19 | ||
1443 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1444 | ldr r2,[sp,#8*4] | ||
1445 | eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) | ||
1446 | ldr r1,[sp,#1*4] | ||
1447 | |||
1448 | add r12,r12,r0 | ||
1449 | eor r0,r8,r8,ror#5 @ from BODY_00_15 | ||
1450 | add r2,r2,r12 | ||
1451 | eor r0,r0,r8,ror#19 @ Sigma1(e) | ||
1452 | add r2,r2,r1 @ X[i] | ||
1453 | ldr r12,[r14],#4 @ *K256++ | ||
1454 | add r11,r11,r2 @ h+=X[i] | ||
1455 | str r2,[sp,#8*4] | ||
1456 | eor r2,r9,r10 | ||
1457 | add r11,r11,r0,ror#6 @ h+=Sigma1(e) | ||
1458 | and r2,r2,r8 | ||
1459 | add r11,r11,r12 @ h+=K256[i] | ||
1460 | eor r2,r2,r10 @ Ch(e,f,g) | ||
1461 | eor r0,r4,r4,ror#11 | ||
1462 | add r11,r11,r2 @ h+=Ch(e,f,g) | ||
1463 | #if 24==31 | ||
1464 | and r12,r12,#0xff | ||
1465 | cmp r12,#0xf2 @ done? | ||
1466 | #endif | ||
1467 | #if 24<15 | ||
1468 | # if __ARM_ARCH__>=7 | ||
1469 | ldr r2,[r1],#4 @ prefetch | ||
1470 | # else | ||
1471 | ldrb r2,[r1,#3] | ||
1472 | # endif | ||
1473 | eor r12,r4,r5 @ a^b, b^c in next round | ||
1474 | #else | ||
1475 | ldr r2,[sp,#10*4] @ from future BODY_16_xx | ||
1476 | eor r12,r4,r5 @ a^b, b^c in next round | ||
1477 | ldr r1,[sp,#7*4] @ from future BODY_16_xx | ||
1478 | #endif | ||
1479 | eor r0,r0,r4,ror#20 @ Sigma0(a) | ||
1480 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
1481 | add r7,r7,r11 @ d+=h | ||
1482 | eor r3,r3,r5 @ Maj(a,b,c) | ||
1483 | add r11,r11,r0,ror#2 @ h+=Sigma0(a) | ||
1484 | @ add r11,r11,r3 @ h+=Maj(a,b,c) | ||
1485 | @ ldr r2,[sp,#10*4] @ 25 | ||
1486 | @ ldr r1,[sp,#7*4] | ||
1487 | mov r0,r2,ror#7 | ||
1488 | add r11,r11,r3 @ h+=Maj(a,b,c) from the past | ||
1489 | mov r3,r1,ror#17 | ||
1490 | eor r0,r0,r2,ror#18 | ||
1491 | eor r3,r3,r1,ror#19 | ||
1492 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1493 | ldr r2,[sp,#9*4] | ||
1494 | eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) | ||
1495 | ldr r1,[sp,#2*4] | ||
1496 | |||
1497 | add r3,r3,r0 | ||
1498 | eor r0,r7,r7,ror#5 @ from BODY_00_15 | ||
1499 | add r2,r2,r3 | ||
1500 | eor r0,r0,r7,ror#19 @ Sigma1(e) | ||
1501 | add r2,r2,r1 @ X[i] | ||
1502 | ldr r3,[r14],#4 @ *K256++ | ||
1503 | add r10,r10,r2 @ h+=X[i] | ||
1504 | str r2,[sp,#9*4] | ||
1505 | eor r2,r8,r9 | ||
1506 | add r10,r10,r0,ror#6 @ h+=Sigma1(e) | ||
1507 | and r2,r2,r7 | ||
1508 | add r10,r10,r3 @ h+=K256[i] | ||
1509 | eor r2,r2,r9 @ Ch(e,f,g) | ||
1510 | eor r0,r11,r11,ror#11 | ||
1511 | add r10,r10,r2 @ h+=Ch(e,f,g) | ||
1512 | #if 25==31 | ||
1513 | and r3,r3,#0xff | ||
1514 | cmp r3,#0xf2 @ done? | ||
1515 | #endif | ||
1516 | #if 25<15 | ||
1517 | # if __ARM_ARCH__>=7 | ||
1518 | ldr r2,[r1],#4 @ prefetch | ||
1519 | # else | ||
1520 | ldrb r2,[r1,#3] | ||
1521 | # endif | ||
1522 | eor r3,r11,r4 @ a^b, b^c in next round | ||
1523 | #else | ||
1524 | ldr r2,[sp,#11*4] @ from future BODY_16_xx | ||
1525 | eor r3,r11,r4 @ a^b, b^c in next round | ||
1526 | ldr r1,[sp,#8*4] @ from future BODY_16_xx | ||
1527 | #endif | ||
1528 | eor r0,r0,r11,ror#20 @ Sigma0(a) | ||
1529 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
1530 | add r6,r6,r10 @ d+=h | ||
1531 | eor r12,r12,r4 @ Maj(a,b,c) | ||
1532 | add r10,r10,r0,ror#2 @ h+=Sigma0(a) | ||
1533 | @ add r10,r10,r12 @ h+=Maj(a,b,c) | ||
1534 | @ ldr r2,[sp,#11*4] @ 26 | ||
1535 | @ ldr r1,[sp,#8*4] | ||
1536 | mov r0,r2,ror#7 | ||
1537 | add r10,r10,r12 @ h+=Maj(a,b,c) from the past | ||
1538 | mov r12,r1,ror#17 | ||
1539 | eor r0,r0,r2,ror#18 | ||
1540 | eor r12,r12,r1,ror#19 | ||
1541 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1542 | ldr r2,[sp,#10*4] | ||
1543 | eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) | ||
1544 | ldr r1,[sp,#3*4] | ||
1545 | |||
1546 | add r12,r12,r0 | ||
1547 | eor r0,r6,r6,ror#5 @ from BODY_00_15 | ||
1548 | add r2,r2,r12 | ||
1549 | eor r0,r0,r6,ror#19 @ Sigma1(e) | ||
1550 | add r2,r2,r1 @ X[i] | ||
1551 | ldr r12,[r14],#4 @ *K256++ | ||
1552 | add r9,r9,r2 @ h+=X[i] | ||
1553 | str r2,[sp,#10*4] | ||
1554 | eor r2,r7,r8 | ||
1555 | add r9,r9,r0,ror#6 @ h+=Sigma1(e) | ||
1556 | and r2,r2,r6 | ||
1557 | add r9,r9,r12 @ h+=K256[i] | ||
1558 | eor r2,r2,r8 @ Ch(e,f,g) | ||
1559 | eor r0,r10,r10,ror#11 | ||
1560 | add r9,r9,r2 @ h+=Ch(e,f,g) | ||
1561 | #if 26==31 | ||
1562 | and r12,r12,#0xff | ||
1563 | cmp r12,#0xf2 @ done? | ||
1564 | #endif | ||
1565 | #if 26<15 | ||
1566 | # if __ARM_ARCH__>=7 | ||
1567 | ldr r2,[r1],#4 @ prefetch | ||
1568 | # else | ||
1569 | ldrb r2,[r1,#3] | ||
1570 | # endif | ||
1571 | eor r12,r10,r11 @ a^b, b^c in next round | ||
1572 | #else | ||
1573 | ldr r2,[sp,#12*4] @ from future BODY_16_xx | ||
1574 | eor r12,r10,r11 @ a^b, b^c in next round | ||
1575 | ldr r1,[sp,#9*4] @ from future BODY_16_xx | ||
1576 | #endif | ||
1577 | eor r0,r0,r10,ror#20 @ Sigma0(a) | ||
1578 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
1579 | add r5,r5,r9 @ d+=h | ||
1580 | eor r3,r3,r11 @ Maj(a,b,c) | ||
1581 | add r9,r9,r0,ror#2 @ h+=Sigma0(a) | ||
1582 | @ add r9,r9,r3 @ h+=Maj(a,b,c) | ||
1583 | @ ldr r2,[sp,#12*4] @ 27 | ||
1584 | @ ldr r1,[sp,#9*4] | ||
1585 | mov r0,r2,ror#7 | ||
1586 | add r9,r9,r3 @ h+=Maj(a,b,c) from the past | ||
1587 | mov r3,r1,ror#17 | ||
1588 | eor r0,r0,r2,ror#18 | ||
1589 | eor r3,r3,r1,ror#19 | ||
1590 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1591 | ldr r2,[sp,#11*4] | ||
1592 | eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) | ||
1593 | ldr r1,[sp,#4*4] | ||
1594 | |||
1595 | add r3,r3,r0 | ||
1596 | eor r0,r5,r5,ror#5 @ from BODY_00_15 | ||
1597 | add r2,r2,r3 | ||
1598 | eor r0,r0,r5,ror#19 @ Sigma1(e) | ||
1599 | add r2,r2,r1 @ X[i] | ||
1600 | ldr r3,[r14],#4 @ *K256++ | ||
1601 | add r8,r8,r2 @ h+=X[i] | ||
1602 | str r2,[sp,#11*4] | ||
1603 | eor r2,r6,r7 | ||
1604 | add r8,r8,r0,ror#6 @ h+=Sigma1(e) | ||
1605 | and r2,r2,r5 | ||
1606 | add r8,r8,r3 @ h+=K256[i] | ||
1607 | eor r2,r2,r7 @ Ch(e,f,g) | ||
1608 | eor r0,r9,r9,ror#11 | ||
1609 | add r8,r8,r2 @ h+=Ch(e,f,g) | ||
1610 | #if 27==31 | ||
1611 | and r3,r3,#0xff | ||
1612 | cmp r3,#0xf2 @ done? | ||
1613 | #endif | ||
1614 | #if 27<15 | ||
1615 | # if __ARM_ARCH__>=7 | ||
1616 | ldr r2,[r1],#4 @ prefetch | ||
1617 | # else | ||
1618 | ldrb r2,[r1,#3] | ||
1619 | # endif | ||
1620 | eor r3,r9,r10 @ a^b, b^c in next round | ||
1621 | #else | ||
1622 | ldr r2,[sp,#13*4] @ from future BODY_16_xx | ||
1623 | eor r3,r9,r10 @ a^b, b^c in next round | ||
1624 | ldr r1,[sp,#10*4] @ from future BODY_16_xx | ||
1625 | #endif | ||
1626 | eor r0,r0,r9,ror#20 @ Sigma0(a) | ||
1627 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
1628 | add r4,r4,r8 @ d+=h | ||
1629 | eor r12,r12,r10 @ Maj(a,b,c) | ||
1630 | add r8,r8,r0,ror#2 @ h+=Sigma0(a) | ||
1631 | @ add r8,r8,r12 @ h+=Maj(a,b,c) | ||
1632 | @ ldr r2,[sp,#13*4] @ 28 | ||
1633 | @ ldr r1,[sp,#10*4] | ||
1634 | mov r0,r2,ror#7 | ||
1635 | add r8,r8,r12 @ h+=Maj(a,b,c) from the past | ||
1636 | mov r12,r1,ror#17 | ||
1637 | eor r0,r0,r2,ror#18 | ||
1638 | eor r12,r12,r1,ror#19 | ||
1639 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1640 | ldr r2,[sp,#12*4] | ||
1641 | eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) | ||
1642 | ldr r1,[sp,#5*4] | ||
1643 | |||
1644 | add r12,r12,r0 | ||
1645 | eor r0,r4,r4,ror#5 @ from BODY_00_15 | ||
1646 | add r2,r2,r12 | ||
1647 | eor r0,r0,r4,ror#19 @ Sigma1(e) | ||
1648 | add r2,r2,r1 @ X[i] | ||
1649 | ldr r12,[r14],#4 @ *K256++ | ||
1650 | add r7,r7,r2 @ h+=X[i] | ||
1651 | str r2,[sp,#12*4] | ||
1652 | eor r2,r5,r6 | ||
1653 | add r7,r7,r0,ror#6 @ h+=Sigma1(e) | ||
1654 | and r2,r2,r4 | ||
1655 | add r7,r7,r12 @ h+=K256[i] | ||
1656 | eor r2,r2,r6 @ Ch(e,f,g) | ||
1657 | eor r0,r8,r8,ror#11 | ||
1658 | add r7,r7,r2 @ h+=Ch(e,f,g) | ||
1659 | #if 28==31 | ||
1660 | and r12,r12,#0xff | ||
1661 | cmp r12,#0xf2 @ done? | ||
1662 | #endif | ||
1663 | #if 28<15 | ||
1664 | # if __ARM_ARCH__>=7 | ||
1665 | ldr r2,[r1],#4 @ prefetch | ||
1666 | # else | ||
1667 | ldrb r2,[r1,#3] | ||
1668 | # endif | ||
1669 | eor r12,r8,r9 @ a^b, b^c in next round | ||
1670 | #else | ||
1671 | ldr r2,[sp,#14*4] @ from future BODY_16_xx | ||
1672 | eor r12,r8,r9 @ a^b, b^c in next round | ||
1673 | ldr r1,[sp,#11*4] @ from future BODY_16_xx | ||
1674 | #endif | ||
1675 | eor r0,r0,r8,ror#20 @ Sigma0(a) | ||
1676 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
1677 | add r11,r11,r7 @ d+=h | ||
1678 | eor r3,r3,r9 @ Maj(a,b,c) | ||
1679 | add r7,r7,r0,ror#2 @ h+=Sigma0(a) | ||
1680 | @ add r7,r7,r3 @ h+=Maj(a,b,c) | ||
1681 | @ ldr r2,[sp,#14*4] @ 29 | ||
1682 | @ ldr r1,[sp,#11*4] | ||
1683 | mov r0,r2,ror#7 | ||
1684 | add r7,r7,r3 @ h+=Maj(a,b,c) from the past | ||
1685 | mov r3,r1,ror#17 | ||
1686 | eor r0,r0,r2,ror#18 | ||
1687 | eor r3,r3,r1,ror#19 | ||
1688 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1689 | ldr r2,[sp,#13*4] | ||
1690 | eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) | ||
1691 | ldr r1,[sp,#6*4] | ||
1692 | |||
1693 | add r3,r3,r0 | ||
1694 | eor r0,r11,r11,ror#5 @ from BODY_00_15 | ||
1695 | add r2,r2,r3 | ||
1696 | eor r0,r0,r11,ror#19 @ Sigma1(e) | ||
1697 | add r2,r2,r1 @ X[i] | ||
1698 | ldr r3,[r14],#4 @ *K256++ | ||
1699 | add r6,r6,r2 @ h+=X[i] | ||
1700 | str r2,[sp,#13*4] | ||
1701 | eor r2,r4,r5 | ||
1702 | add r6,r6,r0,ror#6 @ h+=Sigma1(e) | ||
1703 | and r2,r2,r11 | ||
1704 | add r6,r6,r3 @ h+=K256[i] | ||
1705 | eor r2,r2,r5 @ Ch(e,f,g) | ||
1706 | eor r0,r7,r7,ror#11 | ||
1707 | add r6,r6,r2 @ h+=Ch(e,f,g) | ||
1708 | #if 29==31 | ||
1709 | and r3,r3,#0xff | ||
1710 | cmp r3,#0xf2 @ done? | ||
1711 | #endif | ||
1712 | #if 29<15 | ||
1713 | # if __ARM_ARCH__>=7 | ||
1714 | ldr r2,[r1],#4 @ prefetch | ||
1715 | # else | ||
1716 | ldrb r2,[r1,#3] | ||
1717 | # endif | ||
1718 | eor r3,r7,r8 @ a^b, b^c in next round | ||
1719 | #else | ||
1720 | ldr r2,[sp,#15*4] @ from future BODY_16_xx | ||
1721 | eor r3,r7,r8 @ a^b, b^c in next round | ||
1722 | ldr r1,[sp,#12*4] @ from future BODY_16_xx | ||
1723 | #endif | ||
1724 | eor r0,r0,r7,ror#20 @ Sigma0(a) | ||
1725 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
1726 | add r10,r10,r6 @ d+=h | ||
1727 | eor r12,r12,r8 @ Maj(a,b,c) | ||
1728 | add r6,r6,r0,ror#2 @ h+=Sigma0(a) | ||
1729 | @ add r6,r6,r12 @ h+=Maj(a,b,c) | ||
1730 | @ ldr r2,[sp,#15*4] @ 30 | ||
1731 | @ ldr r1,[sp,#12*4] | ||
1732 | mov r0,r2,ror#7 | ||
1733 | add r6,r6,r12 @ h+=Maj(a,b,c) from the past | ||
1734 | mov r12,r1,ror#17 | ||
1735 | eor r0,r0,r2,ror#18 | ||
1736 | eor r12,r12,r1,ror#19 | ||
1737 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1738 | ldr r2,[sp,#14*4] | ||
1739 | eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) | ||
1740 | ldr r1,[sp,#7*4] | ||
1741 | |||
1742 | add r12,r12,r0 | ||
1743 | eor r0,r10,r10,ror#5 @ from BODY_00_15 | ||
1744 | add r2,r2,r12 | ||
1745 | eor r0,r0,r10,ror#19 @ Sigma1(e) | ||
1746 | add r2,r2,r1 @ X[i] | ||
1747 | ldr r12,[r14],#4 @ *K256++ | ||
1748 | add r5,r5,r2 @ h+=X[i] | ||
1749 | str r2,[sp,#14*4] | ||
1750 | eor r2,r11,r4 | ||
1751 | add r5,r5,r0,ror#6 @ h+=Sigma1(e) | ||
1752 | and r2,r2,r10 | ||
1753 | add r5,r5,r12 @ h+=K256[i] | ||
1754 | eor r2,r2,r4 @ Ch(e,f,g) | ||
1755 | eor r0,r6,r6,ror#11 | ||
1756 | add r5,r5,r2 @ h+=Ch(e,f,g) | ||
1757 | #if 30==31 | ||
1758 | and r12,r12,#0xff | ||
1759 | cmp r12,#0xf2 @ done? | ||
1760 | #endif | ||
1761 | #if 30<15 | ||
1762 | # if __ARM_ARCH__>=7 | ||
1763 | ldr r2,[r1],#4 @ prefetch | ||
1764 | # else | ||
1765 | ldrb r2,[r1,#3] | ||
1766 | # endif | ||
1767 | eor r12,r6,r7 @ a^b, b^c in next round | ||
1768 | #else | ||
1769 | ldr r2,[sp,#0*4] @ from future BODY_16_xx | ||
1770 | eor r12,r6,r7 @ a^b, b^c in next round | ||
1771 | ldr r1,[sp,#13*4] @ from future BODY_16_xx | ||
1772 | #endif | ||
1773 | eor r0,r0,r6,ror#20 @ Sigma0(a) | ||
1774 | and r3,r3,r12 @ (b^c)&=(a^b) | ||
1775 | add r9,r9,r5 @ d+=h | ||
1776 | eor r3,r3,r7 @ Maj(a,b,c) | ||
1777 | add r5,r5,r0,ror#2 @ h+=Sigma0(a) | ||
1778 | @ add r5,r5,r3 @ h+=Maj(a,b,c) | ||
1779 | @ ldr r2,[sp,#0*4] @ 31 | ||
1780 | @ ldr r1,[sp,#13*4] | ||
1781 | mov r0,r2,ror#7 | ||
1782 | add r5,r5,r3 @ h+=Maj(a,b,c) from the past | ||
1783 | mov r3,r1,ror#17 | ||
1784 | eor r0,r0,r2,ror#18 | ||
1785 | eor r3,r3,r1,ror#19 | ||
1786 | eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) | ||
1787 | ldr r2,[sp,#15*4] | ||
1788 | eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) | ||
1789 | ldr r1,[sp,#8*4] | ||
1790 | |||
1791 | add r3,r3,r0 | ||
1792 | eor r0,r9,r9,ror#5 @ from BODY_00_15 | ||
1793 | add r2,r2,r3 | ||
1794 | eor r0,r0,r9,ror#19 @ Sigma1(e) | ||
1795 | add r2,r2,r1 @ X[i] | ||
1796 | ldr r3,[r14],#4 @ *K256++ | ||
1797 | add r4,r4,r2 @ h+=X[i] | ||
1798 | str r2,[sp,#15*4] | ||
1799 | eor r2,r10,r11 | ||
1800 | add r4,r4,r0,ror#6 @ h+=Sigma1(e) | ||
1801 | and r2,r2,r9 | ||
1802 | add r4,r4,r3 @ h+=K256[i] | ||
1803 | eor r2,r2,r11 @ Ch(e,f,g) | ||
1804 | eor r0,r5,r5,ror#11 | ||
1805 | add r4,r4,r2 @ h+=Ch(e,f,g) | ||
1806 | #if 31==31 | ||
1807 | and r3,r3,#0xff | ||
1808 | cmp r3,#0xf2 @ done? | ||
1809 | #endif | ||
1810 | #if 31<15 | ||
1811 | # if __ARM_ARCH__>=7 | ||
1812 | ldr r2,[r1],#4 @ prefetch | ||
1813 | # else | ||
1814 | ldrb r2,[r1,#3] | ||
1815 | # endif | ||
1816 | eor r3,r5,r6 @ a^b, b^c in next round | ||
1817 | #else | ||
1818 | ldr r2,[sp,#1*4] @ from future BODY_16_xx | ||
1819 | eor r3,r5,r6 @ a^b, b^c in next round | ||
1820 | ldr r1,[sp,#14*4] @ from future BODY_16_xx | ||
1821 | #endif | ||
1822 | eor r0,r0,r5,ror#20 @ Sigma0(a) | ||
1823 | and r12,r12,r3 @ (b^c)&=(a^b) | ||
1824 | add r8,r8,r4 @ d+=h | ||
1825 | eor r12,r12,r6 @ Maj(a,b,c) | ||
1826 | add r4,r4,r0,ror#2 @ h+=Sigma0(a) | ||
1827 | @ add r4,r4,r12 @ h+=Maj(a,b,c) | ||
1828 | #if __ARM_ARCH__>=7 | ||
1829 | ite eq @ Thumb2 thing, sanity check in ARM | ||
1830 | #endif | ||
1831 | ldreq r3,[sp,#16*4] @ pull ctx | ||
1832 | bne .Lrounds_16_xx | ||
1833 | |||
1834 | add r4,r4,r12 @ h+=Maj(a,b,c) from the past | ||
1835 | ldr r0,[r3,#0] | ||
1836 | ldr r2,[r3,#4] | ||
1837 | ldr r12,[r3,#8] | ||
1838 | add r4,r4,r0 | ||
1839 | ldr r0,[r3,#12] | ||
1840 | add r5,r5,r2 | ||
1841 | ldr r2,[r3,#16] | ||
1842 | add r6,r6,r12 | ||
1843 | ldr r12,[r3,#20] | ||
1844 | add r7,r7,r0 | ||
1845 | ldr r0,[r3,#24] | ||
1846 | add r8,r8,r2 | ||
1847 | ldr r2,[r3,#28] | ||
1848 | add r9,r9,r12 | ||
1849 | ldr r1,[sp,#17*4] @ pull inp | ||
1850 | ldr r12,[sp,#18*4] @ pull inp+len | ||
1851 | add r10,r10,r0 | ||
1852 | add r11,r11,r2 | ||
1853 | stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11} | ||
1854 | cmp r1,r12 | ||
1855 | sub r14,r14,#256 @ rewind Ktbl | ||
1856 | bne .Loop | ||
1857 | |||
1858 | add sp,sp,#19*4 @ destroy frame | ||
1859 | #if __ARM_ARCH__>=5 | ||
1860 | ldmia sp!,{r4-r11,pc} | ||
1861 | #else | ||
1862 | ldmia sp!,{r4-r11,lr} | ||
1863 | tst lr,#1 | ||
1864 | moveq pc,lr @ be binary compatible with V4, yet | ||
1865 | .word 0xe12fff1e @ interoperable with Thumb ISA:-) | ||
1866 | #endif | ||
1867 | .size sha256_block_data_order,.-sha256_block_data_order | ||
1868 | #if __ARM_MAX_ARCH__>=7 | ||
1869 | .arch armv7-a | ||
1870 | .fpu neon | ||
1871 | |||
1872 | .global sha256_block_data_order_neon | ||
1873 | .type sha256_block_data_order_neon,%function | ||
1874 | .align 4 | ||
1875 | sha256_block_data_order_neon: | ||
1876 | .LNEON: | ||
1877 | stmdb sp!,{r4-r12,lr} | ||
1878 | |||
1879 | sub r11,sp,#16*4+16 | ||
1880 | adrl r14,K256 | ||
1881 | bic r11,r11,#15 @ align for 128-bit stores | ||
1882 | mov r12,sp | ||
1883 | mov sp,r11 @ alloca | ||
1884 | add r2,r1,r2,lsl#6 @ len to point at the end of inp | ||
1885 | |||
1886 | vld1.8 {q0},[r1]! | ||
1887 | vld1.8 {q1},[r1]! | ||
1888 | vld1.8 {q2},[r1]! | ||
1889 | vld1.8 {q3},[r1]! | ||
1890 | vld1.32 {q8},[r14,:128]! | ||
1891 | vld1.32 {q9},[r14,:128]! | ||
1892 | vld1.32 {q10},[r14,:128]! | ||
1893 | vld1.32 {q11},[r14,:128]! | ||
1894 | vrev32.8 q0,q0 @ yes, even on | ||
1895 | str r0,[sp,#64] | ||
1896 | vrev32.8 q1,q1 @ big-endian | ||
1897 | str r1,[sp,#68] | ||
1898 | mov r1,sp | ||
1899 | vrev32.8 q2,q2 | ||
1900 | str r2,[sp,#72] | ||
1901 | vrev32.8 q3,q3 | ||
1902 | str r12,[sp,#76] @ save original sp | ||
1903 | vadd.i32 q8,q8,q0 | ||
1904 | vadd.i32 q9,q9,q1 | ||
1905 | vst1.32 {q8},[r1,:128]! | ||
1906 | vadd.i32 q10,q10,q2 | ||
1907 | vst1.32 {q9},[r1,:128]! | ||
1908 | vadd.i32 q11,q11,q3 | ||
1909 | vst1.32 {q10},[r1,:128]! | ||
1910 | vst1.32 {q11},[r1,:128]! | ||
1911 | |||
1912 | ldmia r0,{r4-r11} | ||
1913 | sub r1,r1,#64 | ||
1914 | ldr r2,[sp,#0] | ||
1915 | eor r12,r12,r12 | ||
1916 | eor r3,r5,r6 | ||
1917 | b .L_00_48 | ||
1918 | |||
1919 | .align 4 | ||
1920 | .L_00_48: | ||
1921 | vext.8 q8,q0,q1,#4 | ||
1922 | add r11,r11,r2 | ||
1923 | eor r2,r9,r10 | ||
1924 | eor r0,r8,r8,ror#5 | ||
1925 | vext.8 q9,q2,q3,#4 | ||
1926 | add r4,r4,r12 | ||
1927 | and r2,r2,r8 | ||
1928 | eor r12,r0,r8,ror#19 | ||
1929 | vshr.u32 q10,q8,#7 | ||
1930 | eor r0,r4,r4,ror#11 | ||
1931 | eor r2,r2,r10 | ||
1932 | vadd.i32 q0,q0,q9 | ||
1933 | add r11,r11,r12,ror#6 | ||
1934 | eor r12,r4,r5 | ||
1935 | vshr.u32 q9,q8,#3 | ||
1936 | eor r0,r0,r4,ror#20 | ||
1937 | add r11,r11,r2 | ||
1938 | vsli.32 q10,q8,#25 | ||
1939 | ldr r2,[sp,#4] | ||
1940 | and r3,r3,r12 | ||
1941 | vshr.u32 q11,q8,#18 | ||
1942 | add r7,r7,r11 | ||
1943 | add r11,r11,r0,ror#2 | ||
1944 | eor r3,r3,r5 | ||
1945 | veor q9,q9,q10 | ||
1946 | add r10,r10,r2 | ||
1947 | vsli.32 q11,q8,#14 | ||
1948 | eor r2,r8,r9 | ||
1949 | eor r0,r7,r7,ror#5 | ||
1950 | vshr.u32 d24,d7,#17 | ||
1951 | add r11,r11,r3 | ||
1952 | and r2,r2,r7 | ||
1953 | veor q9,q9,q11 | ||
1954 | eor r3,r0,r7,ror#19 | ||
1955 | eor r0,r11,r11,ror#11 | ||
1956 | vsli.32 d24,d7,#15 | ||
1957 | eor r2,r2,r9 | ||
1958 | add r10,r10,r3,ror#6 | ||
1959 | vshr.u32 d25,d7,#10 | ||
1960 | eor r3,r11,r4 | ||
1961 | eor r0,r0,r11,ror#20 | ||
1962 | vadd.i32 q0,q0,q9 | ||
1963 | add r10,r10,r2 | ||
1964 | ldr r2,[sp,#8] | ||
1965 | veor d25,d25,d24 | ||
1966 | and r12,r12,r3 | ||
1967 | add r6,r6,r10 | ||
1968 | vshr.u32 d24,d7,#19 | ||
1969 | add r10,r10,r0,ror#2 | ||
1970 | eor r12,r12,r4 | ||
1971 | vsli.32 d24,d7,#13 | ||
1972 | add r9,r9,r2 | ||
1973 | eor r2,r7,r8 | ||
1974 | veor d25,d25,d24 | ||
1975 | eor r0,r6,r6,ror#5 | ||
1976 | add r10,r10,r12 | ||
1977 | vadd.i32 d0,d0,d25 | ||
1978 | and r2,r2,r6 | ||
1979 | eor r12,r0,r6,ror#19 | ||
1980 | vshr.u32 d24,d0,#17 | ||
1981 | eor r0,r10,r10,ror#11 | ||
1982 | eor r2,r2,r8 | ||
1983 | vsli.32 d24,d0,#15 | ||
1984 | add r9,r9,r12,ror#6 | ||
1985 | eor r12,r10,r11 | ||
1986 | vshr.u32 d25,d0,#10 | ||
1987 | eor r0,r0,r10,ror#20 | ||
1988 | add r9,r9,r2 | ||
1989 | veor d25,d25,d24 | ||
1990 | ldr r2,[sp,#12] | ||
1991 | and r3,r3,r12 | ||
1992 | vshr.u32 d24,d0,#19 | ||
1993 | add r5,r5,r9 | ||
1994 | add r9,r9,r0,ror#2 | ||
1995 | eor r3,r3,r11 | ||
1996 | vld1.32 {q8},[r14,:128]! | ||
1997 | add r8,r8,r2 | ||
1998 | vsli.32 d24,d0,#13 | ||
1999 | eor r2,r6,r7 | ||
2000 | eor r0,r5,r5,ror#5 | ||
2001 | veor d25,d25,d24 | ||
2002 | add r9,r9,r3 | ||
2003 | and r2,r2,r5 | ||
2004 | vadd.i32 d1,d1,d25 | ||
2005 | eor r3,r0,r5,ror#19 | ||
2006 | eor r0,r9,r9,ror#11 | ||
2007 | vadd.i32 q8,q8,q0 | ||
2008 | eor r2,r2,r7 | ||
2009 | add r8,r8,r3,ror#6 | ||
2010 | eor r3,r9,r10 | ||
2011 | eor r0,r0,r9,ror#20 | ||
2012 | add r8,r8,r2 | ||
2013 | ldr r2,[sp,#16] | ||
2014 | and r12,r12,r3 | ||
2015 | add r4,r4,r8 | ||
2016 | vst1.32 {q8},[r1,:128]! | ||
2017 | add r8,r8,r0,ror#2 | ||
2018 | eor r12,r12,r10 | ||
2019 | vext.8 q8,q1,q2,#4 | ||
2020 | add r7,r7,r2 | ||
2021 | eor r2,r5,r6 | ||
2022 | eor r0,r4,r4,ror#5 | ||
2023 | vext.8 q9,q3,q0,#4 | ||
2024 | add r8,r8,r12 | ||
2025 | and r2,r2,r4 | ||
2026 | eor r12,r0,r4,ror#19 | ||
2027 | vshr.u32 q10,q8,#7 | ||
2028 | eor r0,r8,r8,ror#11 | ||
2029 | eor r2,r2,r6 | ||
2030 | vadd.i32 q1,q1,q9 | ||
2031 | add r7,r7,r12,ror#6 | ||
2032 | eor r12,r8,r9 | ||
2033 | vshr.u32 q9,q8,#3 | ||
2034 | eor r0,r0,r8,ror#20 | ||
2035 | add r7,r7,r2 | ||
2036 | vsli.32 q10,q8,#25 | ||
2037 | ldr r2,[sp,#20] | ||
2038 | and r3,r3,r12 | ||
2039 | vshr.u32 q11,q8,#18 | ||
2040 | add r11,r11,r7 | ||
2041 | add r7,r7,r0,ror#2 | ||
2042 | eor r3,r3,r9 | ||
2043 | veor q9,q9,q10 | ||
2044 | add r6,r6,r2 | ||
2045 | vsli.32 q11,q8,#14 | ||
2046 | eor r2,r4,r5 | ||
2047 | eor r0,r11,r11,ror#5 | ||
2048 | vshr.u32 d24,d1,#17 | ||
2049 | add r7,r7,r3 | ||
2050 | and r2,r2,r11 | ||
2051 | veor q9,q9,q11 | ||
2052 | eor r3,r0,r11,ror#19 | ||
2053 | eor r0,r7,r7,ror#11 | ||
2054 | vsli.32 d24,d1,#15 | ||
2055 | eor r2,r2,r5 | ||
2056 | add r6,r6,r3,ror#6 | ||
2057 | vshr.u32 d25,d1,#10 | ||
2058 | eor r3,r7,r8 | ||
2059 | eor r0,r0,r7,ror#20 | ||
2060 | vadd.i32 q1,q1,q9 | ||
2061 | add r6,r6,r2 | ||
2062 | ldr r2,[sp,#24] | ||
2063 | veor d25,d25,d24 | ||
2064 | and r12,r12,r3 | ||
2065 | add r10,r10,r6 | ||
2066 | vshr.u32 d24,d1,#19 | ||
2067 | add r6,r6,r0,ror#2 | ||
2068 | eor r12,r12,r8 | ||
2069 | vsli.32 d24,d1,#13 | ||
2070 | add r5,r5,r2 | ||
2071 | eor r2,r11,r4 | ||
2072 | veor d25,d25,d24 | ||
2073 | eor r0,r10,r10,ror#5 | ||
2074 | add r6,r6,r12 | ||
2075 | vadd.i32 d2,d2,d25 | ||
2076 | and r2,r2,r10 | ||
2077 | eor r12,r0,r10,ror#19 | ||
2078 | vshr.u32 d24,d2,#17 | ||
2079 | eor r0,r6,r6,ror#11 | ||
2080 | eor r2,r2,r4 | ||
2081 | vsli.32 d24,d2,#15 | ||
2082 | add r5,r5,r12,ror#6 | ||
2083 | eor r12,r6,r7 | ||
2084 | vshr.u32 d25,d2,#10 | ||
2085 | eor r0,r0,r6,ror#20 | ||
2086 | add r5,r5,r2 | ||
2087 | veor d25,d25,d24 | ||
2088 | ldr r2,[sp,#28] | ||
2089 | and r3,r3,r12 | ||
2090 | vshr.u32 d24,d2,#19 | ||
2091 | add r9,r9,r5 | ||
2092 | add r5,r5,r0,ror#2 | ||
2093 | eor r3,r3,r7 | ||
2094 | vld1.32 {q8},[r14,:128]! | ||
2095 | add r4,r4,r2 | ||
2096 | vsli.32 d24,d2,#13 | ||
2097 | eor r2,r10,r11 | ||
2098 | eor r0,r9,r9,ror#5 | ||
2099 | veor d25,d25,d24 | ||
2100 | add r5,r5,r3 | ||
2101 | and r2,r2,r9 | ||
2102 | vadd.i32 d3,d3,d25 | ||
2103 | eor r3,r0,r9,ror#19 | ||
2104 | eor r0,r5,r5,ror#11 | ||
2105 | vadd.i32 q8,q8,q1 | ||
2106 | eor r2,r2,r11 | ||
2107 | add r4,r4,r3,ror#6 | ||
2108 | eor r3,r5,r6 | ||
2109 | eor r0,r0,r5,ror#20 | ||
2110 | add r4,r4,r2 | ||
2111 | ldr r2,[sp,#32] | ||
2112 | and r12,r12,r3 | ||
2113 | add r8,r8,r4 | ||
2114 | vst1.32 {q8},[r1,:128]! | ||
2115 | add r4,r4,r0,ror#2 | ||
2116 | eor r12,r12,r6 | ||
2117 | vext.8 q8,q2,q3,#4 | ||
2118 | add r11,r11,r2 | ||
2119 | eor r2,r9,r10 | ||
2120 | eor r0,r8,r8,ror#5 | ||
2121 | vext.8 q9,q0,q1,#4 | ||
2122 | add r4,r4,r12 | ||
2123 | and r2,r2,r8 | ||
2124 | eor r12,r0,r8,ror#19 | ||
2125 | vshr.u32 q10,q8,#7 | ||
2126 | eor r0,r4,r4,ror#11 | ||
2127 | eor r2,r2,r10 | ||
2128 | vadd.i32 q2,q2,q9 | ||
2129 | add r11,r11,r12,ror#6 | ||
2130 | eor r12,r4,r5 | ||
2131 | vshr.u32 q9,q8,#3 | ||
2132 | eor r0,r0,r4,ror#20 | ||
2133 | add r11,r11,r2 | ||
2134 | vsli.32 q10,q8,#25 | ||
2135 | ldr r2,[sp,#36] | ||
2136 | and r3,r3,r12 | ||
2137 | vshr.u32 q11,q8,#18 | ||
2138 | add r7,r7,r11 | ||
2139 | add r11,r11,r0,ror#2 | ||
2140 | eor r3,r3,r5 | ||
2141 | veor q9,q9,q10 | ||
2142 | add r10,r10,r2 | ||
2143 | vsli.32 q11,q8,#14 | ||
2144 | eor r2,r8,r9 | ||
2145 | eor r0,r7,r7,ror#5 | ||
2146 | vshr.u32 d24,d3,#17 | ||
2147 | add r11,r11,r3 | ||
2148 | and r2,r2,r7 | ||
2149 | veor q9,q9,q11 | ||
2150 | eor r3,r0,r7,ror#19 | ||
2151 | eor r0,r11,r11,ror#11 | ||
2152 | vsli.32 d24,d3,#15 | ||
2153 | eor r2,r2,r9 | ||
2154 | add r10,r10,r3,ror#6 | ||
2155 | vshr.u32 d25,d3,#10 | ||
2156 | eor r3,r11,r4 | ||
2157 | eor r0,r0,r11,ror#20 | ||
2158 | vadd.i32 q2,q2,q9 | ||
2159 | add r10,r10,r2 | ||
2160 | ldr r2,[sp,#40] | ||
2161 | veor d25,d25,d24 | ||
2162 | and r12,r12,r3 | ||
2163 | add r6,r6,r10 | ||
2164 | vshr.u32 d24,d3,#19 | ||
2165 | add r10,r10,r0,ror#2 | ||
2166 | eor r12,r12,r4 | ||
2167 | vsli.32 d24,d3,#13 | ||
2168 | add r9,r9,r2 | ||
2169 | eor r2,r7,r8 | ||
2170 | veor d25,d25,d24 | ||
2171 | eor r0,r6,r6,ror#5 | ||
2172 | add r10,r10,r12 | ||
2173 | vadd.i32 d4,d4,d25 | ||
2174 | and r2,r2,r6 | ||
2175 | eor r12,r0,r6,ror#19 | ||
2176 | vshr.u32 d24,d4,#17 | ||
2177 | eor r0,r10,r10,ror#11 | ||
2178 | eor r2,r2,r8 | ||
2179 | vsli.32 d24,d4,#15 | ||
2180 | add r9,r9,r12,ror#6 | ||
2181 | eor r12,r10,r11 | ||
2182 | vshr.u32 d25,d4,#10 | ||
2183 | eor r0,r0,r10,ror#20 | ||
2184 | add r9,r9,r2 | ||
2185 | veor d25,d25,d24 | ||
2186 | ldr r2,[sp,#44] | ||
2187 | and r3,r3,r12 | ||
2188 | vshr.u32 d24,d4,#19 | ||
2189 | add r5,r5,r9 | ||
2190 | add r9,r9,r0,ror#2 | ||
2191 | eor r3,r3,r11 | ||
2192 | vld1.32 {q8},[r14,:128]! | ||
2193 | add r8,r8,r2 | ||
2194 | vsli.32 d24,d4,#13 | ||
2195 | eor r2,r6,r7 | ||
2196 | eor r0,r5,r5,ror#5 | ||
2197 | veor d25,d25,d24 | ||
2198 | add r9,r9,r3 | ||
2199 | and r2,r2,r5 | ||
2200 | vadd.i32 d5,d5,d25 | ||
2201 | eor r3,r0,r5,ror#19 | ||
2202 | eor r0,r9,r9,ror#11 | ||
2203 | vadd.i32 q8,q8,q2 | ||
2204 | eor r2,r2,r7 | ||
2205 | add r8,r8,r3,ror#6 | ||
2206 | eor r3,r9,r10 | ||
2207 | eor r0,r0,r9,ror#20 | ||
2208 | add r8,r8,r2 | ||
2209 | ldr r2,[sp,#48] | ||
2210 | and r12,r12,r3 | ||
2211 | add r4,r4,r8 | ||
2212 | vst1.32 {q8},[r1,:128]! | ||
2213 | add r8,r8,r0,ror#2 | ||
2214 | eor r12,r12,r10 | ||
2215 | vext.8 q8,q3,q0,#4 | ||
2216 | add r7,r7,r2 | ||
2217 | eor r2,r5,r6 | ||
2218 | eor r0,r4,r4,ror#5 | ||
2219 | vext.8 q9,q1,q2,#4 | ||
2220 | add r8,r8,r12 | ||
2221 | and r2,r2,r4 | ||
2222 | eor r12,r0,r4,ror#19 | ||
2223 | vshr.u32 q10,q8,#7 | ||
2224 | eor r0,r8,r8,ror#11 | ||
2225 | eor r2,r2,r6 | ||
2226 | vadd.i32 q3,q3,q9 | ||
2227 | add r7,r7,r12,ror#6 | ||
2228 | eor r12,r8,r9 | ||
2229 | vshr.u32 q9,q8,#3 | ||
2230 | eor r0,r0,r8,ror#20 | ||
2231 | add r7,r7,r2 | ||
2232 | vsli.32 q10,q8,#25 | ||
2233 | ldr r2,[sp,#52] | ||
2234 | and r3,r3,r12 | ||
2235 | vshr.u32 q11,q8,#18 | ||
2236 | add r11,r11,r7 | ||
2237 | add r7,r7,r0,ror#2 | ||
2238 | eor r3,r3,r9 | ||
2239 | veor q9,q9,q10 | ||
2240 | add r6,r6,r2 | ||
2241 | vsli.32 q11,q8,#14 | ||
2242 | eor r2,r4,r5 | ||
2243 | eor r0,r11,r11,ror#5 | ||
2244 | vshr.u32 d24,d5,#17 | ||
2245 | add r7,r7,r3 | ||
2246 | and r2,r2,r11 | ||
2247 | veor q9,q9,q11 | ||
2248 | eor r3,r0,r11,ror#19 | ||
2249 | eor r0,r7,r7,ror#11 | ||
2250 | vsli.32 d24,d5,#15 | ||
2251 | eor r2,r2,r5 | ||
2252 | add r6,r6,r3,ror#6 | ||
2253 | vshr.u32 d25,d5,#10 | ||
2254 | eor r3,r7,r8 | ||
2255 | eor r0,r0,r7,ror#20 | ||
2256 | vadd.i32 q3,q3,q9 | ||
2257 | add r6,r6,r2 | ||
2258 | ldr r2,[sp,#56] | ||
2259 | veor d25,d25,d24 | ||
2260 | and r12,r12,r3 | ||
2261 | add r10,r10,r6 | ||
2262 | vshr.u32 d24,d5,#19 | ||
2263 | add r6,r6,r0,ror#2 | ||
2264 | eor r12,r12,r8 | ||
2265 | vsli.32 d24,d5,#13 | ||
2266 | add r5,r5,r2 | ||
2267 | eor r2,r11,r4 | ||
2268 | veor d25,d25,d24 | ||
2269 | eor r0,r10,r10,ror#5 | ||
2270 | add r6,r6,r12 | ||
2271 | vadd.i32 d6,d6,d25 | ||
2272 | and r2,r2,r10 | ||
2273 | eor r12,r0,r10,ror#19 | ||
2274 | vshr.u32 d24,d6,#17 | ||
2275 | eor r0,r6,r6,ror#11 | ||
2276 | eor r2,r2,r4 | ||
2277 | vsli.32 d24,d6,#15 | ||
2278 | add r5,r5,r12,ror#6 | ||
2279 | eor r12,r6,r7 | ||
2280 | vshr.u32 d25,d6,#10 | ||
2281 | eor r0,r0,r6,ror#20 | ||
2282 | add r5,r5,r2 | ||
2283 | veor d25,d25,d24 | ||
2284 | ldr r2,[sp,#60] | ||
2285 | and r3,r3,r12 | ||
2286 | vshr.u32 d24,d6,#19 | ||
2287 | add r9,r9,r5 | ||
2288 | add r5,r5,r0,ror#2 | ||
2289 | eor r3,r3,r7 | ||
2290 | vld1.32 {q8},[r14,:128]! | ||
2291 | add r4,r4,r2 | ||
2292 | vsli.32 d24,d6,#13 | ||
2293 | eor r2,r10,r11 | ||
2294 | eor r0,r9,r9,ror#5 | ||
2295 | veor d25,d25,d24 | ||
2296 | add r5,r5,r3 | ||
2297 | and r2,r2,r9 | ||
2298 | vadd.i32 d7,d7,d25 | ||
2299 | eor r3,r0,r9,ror#19 | ||
2300 | eor r0,r5,r5,ror#11 | ||
2301 | vadd.i32 q8,q8,q3 | ||
2302 | eor r2,r2,r11 | ||
2303 | add r4,r4,r3,ror#6 | ||
2304 | eor r3,r5,r6 | ||
2305 | eor r0,r0,r5,ror#20 | ||
2306 | add r4,r4,r2 | ||
2307 | ldr r2,[r14] | ||
2308 | and r12,r12,r3 | ||
2309 | add r8,r8,r4 | ||
2310 | vst1.32 {q8},[r1,:128]! | ||
2311 | add r4,r4,r0,ror#2 | ||
2312 | eor r12,r12,r6 | ||
2313 | teq r2,#0 @ check for K256 terminator | ||
2314 | ldr r2,[sp,#0] | ||
2315 | sub r1,r1,#64 | ||
2316 | bne .L_00_48 | ||
2317 | |||
2318 | ldr r1,[sp,#68] | ||
2319 | ldr r0,[sp,#72] | ||
2320 | sub r14,r14,#256 @ rewind r14 | ||
2321 | teq r1,r0 | ||
2322 | it eq | ||
2323 | subeq r1,r1,#64 @ avoid SEGV | ||
2324 | vld1.8 {q0},[r1]! @ load next input block | ||
2325 | vld1.8 {q1},[r1]! | ||
2326 | vld1.8 {q2},[r1]! | ||
2327 | vld1.8 {q3},[r1]! | ||
2328 | it ne | ||
2329 | strne r1,[sp,#68] | ||
2330 | mov r1,sp | ||
2331 | add r11,r11,r2 | ||
2332 | eor r2,r9,r10 | ||
2333 | eor r0,r8,r8,ror#5 | ||
2334 | add r4,r4,r12 | ||
2335 | vld1.32 {q8},[r14,:128]! | ||
2336 | and r2,r2,r8 | ||
2337 | eor r12,r0,r8,ror#19 | ||
2338 | eor r0,r4,r4,ror#11 | ||
2339 | eor r2,r2,r10 | ||
2340 | vrev32.8 q0,q0 | ||
2341 | add r11,r11,r12,ror#6 | ||
2342 | eor r12,r4,r5 | ||
2343 | eor r0,r0,r4,ror#20 | ||
2344 | add r11,r11,r2 | ||
2345 | vadd.i32 q8,q8,q0 | ||
2346 | ldr r2,[sp,#4] | ||
2347 | and r3,r3,r12 | ||
2348 | add r7,r7,r11 | ||
2349 | add r11,r11,r0,ror#2 | ||
2350 | eor r3,r3,r5 | ||
2351 | add r10,r10,r2 | ||
2352 | eor r2,r8,r9 | ||
2353 | eor r0,r7,r7,ror#5 | ||
2354 | add r11,r11,r3 | ||
2355 | and r2,r2,r7 | ||
2356 | eor r3,r0,r7,ror#19 | ||
2357 | eor r0,r11,r11,ror#11 | ||
2358 | eor r2,r2,r9 | ||
2359 | add r10,r10,r3,ror#6 | ||
2360 | eor r3,r11,r4 | ||
2361 | eor r0,r0,r11,ror#20 | ||
2362 | add r10,r10,r2 | ||
2363 | ldr r2,[sp,#8] | ||
2364 | and r12,r12,r3 | ||
2365 | add r6,r6,r10 | ||
2366 | add r10,r10,r0,ror#2 | ||
2367 | eor r12,r12,r4 | ||
2368 | add r9,r9,r2 | ||
2369 | eor r2,r7,r8 | ||
2370 | eor r0,r6,r6,ror#5 | ||
2371 | add r10,r10,r12 | ||
2372 | and r2,r2,r6 | ||
2373 | eor r12,r0,r6,ror#19 | ||
2374 | eor r0,r10,r10,ror#11 | ||
2375 | eor r2,r2,r8 | ||
2376 | add r9,r9,r12,ror#6 | ||
2377 | eor r12,r10,r11 | ||
2378 | eor r0,r0,r10,ror#20 | ||
2379 | add r9,r9,r2 | ||
2380 | ldr r2,[sp,#12] | ||
2381 | and r3,r3,r12 | ||
2382 | add r5,r5,r9 | ||
2383 | add r9,r9,r0,ror#2 | ||
2384 | eor r3,r3,r11 | ||
2385 | add r8,r8,r2 | ||
2386 | eor r2,r6,r7 | ||
2387 | eor r0,r5,r5,ror#5 | ||
2388 | add r9,r9,r3 | ||
2389 | and r2,r2,r5 | ||
2390 | eor r3,r0,r5,ror#19 | ||
2391 | eor r0,r9,r9,ror#11 | ||
2392 | eor r2,r2,r7 | ||
2393 | add r8,r8,r3,ror#6 | ||
2394 | eor r3,r9,r10 | ||
2395 | eor r0,r0,r9,ror#20 | ||
2396 | add r8,r8,r2 | ||
2397 | ldr r2,[sp,#16] | ||
2398 | and r12,r12,r3 | ||
2399 | add r4,r4,r8 | ||
2400 | add r8,r8,r0,ror#2 | ||
2401 | eor r12,r12,r10 | ||
2402 | vst1.32 {q8},[r1,:128]! | ||
2403 | add r7,r7,r2 | ||
2404 | eor r2,r5,r6 | ||
2405 | eor r0,r4,r4,ror#5 | ||
2406 | add r8,r8,r12 | ||
2407 | vld1.32 {q8},[r14,:128]! | ||
2408 | and r2,r2,r4 | ||
2409 | eor r12,r0,r4,ror#19 | ||
2410 | eor r0,r8,r8,ror#11 | ||
2411 | eor r2,r2,r6 | ||
2412 | vrev32.8 q1,q1 | ||
2413 | add r7,r7,r12,ror#6 | ||
2414 | eor r12,r8,r9 | ||
2415 | eor r0,r0,r8,ror#20 | ||
2416 | add r7,r7,r2 | ||
2417 | vadd.i32 q8,q8,q1 | ||
2418 | ldr r2,[sp,#20] | ||
2419 | and r3,r3,r12 | ||
2420 | add r11,r11,r7 | ||
2421 | add r7,r7,r0,ror#2 | ||
2422 | eor r3,r3,r9 | ||
2423 | add r6,r6,r2 | ||
2424 | eor r2,r4,r5 | ||
2425 | eor r0,r11,r11,ror#5 | ||
2426 | add r7,r7,r3 | ||
2427 | and r2,r2,r11 | ||
2428 | eor r3,r0,r11,ror#19 | ||
2429 | eor r0,r7,r7,ror#11 | ||
2430 | eor r2,r2,r5 | ||
2431 | add r6,r6,r3,ror#6 | ||
2432 | eor r3,r7,r8 | ||
2433 | eor r0,r0,r7,ror#20 | ||
2434 | add r6,r6,r2 | ||
2435 | ldr r2,[sp,#24] | ||
2436 | and r12,r12,r3 | ||
2437 | add r10,r10,r6 | ||
2438 | add r6,r6,r0,ror#2 | ||
2439 | eor r12,r12,r8 | ||
2440 | add r5,r5,r2 | ||
2441 | eor r2,r11,r4 | ||
2442 | eor r0,r10,r10,ror#5 | ||
2443 | add r6,r6,r12 | ||
2444 | and r2,r2,r10 | ||
2445 | eor r12,r0,r10,ror#19 | ||
2446 | eor r0,r6,r6,ror#11 | ||
2447 | eor r2,r2,r4 | ||
2448 | add r5,r5,r12,ror#6 | ||
2449 | eor r12,r6,r7 | ||
2450 | eor r0,r0,r6,ror#20 | ||
2451 | add r5,r5,r2 | ||
2452 | ldr r2,[sp,#28] | ||
2453 | and r3,r3,r12 | ||
2454 | add r9,r9,r5 | ||
2455 | add r5,r5,r0,ror#2 | ||
2456 | eor r3,r3,r7 | ||
2457 | add r4,r4,r2 | ||
2458 | eor r2,r10,r11 | ||
2459 | eor r0,r9,r9,ror#5 | ||
2460 | add r5,r5,r3 | ||
2461 | and r2,r2,r9 | ||
2462 | eor r3,r0,r9,ror#19 | ||
2463 | eor r0,r5,r5,ror#11 | ||
2464 | eor r2,r2,r11 | ||
2465 | add r4,r4,r3,ror#6 | ||
2466 | eor r3,r5,r6 | ||
2467 | eor r0,r0,r5,ror#20 | ||
2468 | add r4,r4,r2 | ||
2469 | ldr r2,[sp,#32] | ||
2470 | and r12,r12,r3 | ||
2471 | add r8,r8,r4 | ||
2472 | add r4,r4,r0,ror#2 | ||
2473 | eor r12,r12,r6 | ||
2474 | vst1.32 {q8},[r1,:128]! | ||
2475 | add r11,r11,r2 | ||
2476 | eor r2,r9,r10 | ||
2477 | eor r0,r8,r8,ror#5 | ||
2478 | add r4,r4,r12 | ||
2479 | vld1.32 {q8},[r14,:128]! | ||
2480 | and r2,r2,r8 | ||
2481 | eor r12,r0,r8,ror#19 | ||
2482 | eor r0,r4,r4,ror#11 | ||
2483 | eor r2,r2,r10 | ||
2484 | vrev32.8 q2,q2 | ||
2485 | add r11,r11,r12,ror#6 | ||
2486 | eor r12,r4,r5 | ||
2487 | eor r0,r0,r4,ror#20 | ||
2488 | add r11,r11,r2 | ||
2489 | vadd.i32 q8,q8,q2 | ||
2490 | ldr r2,[sp,#36] | ||
2491 | and r3,r3,r12 | ||
2492 | add r7,r7,r11 | ||
2493 | add r11,r11,r0,ror#2 | ||
2494 | eor r3,r3,r5 | ||
2495 | add r10,r10,r2 | ||
2496 | eor r2,r8,r9 | ||
2497 | eor r0,r7,r7,ror#5 | ||
2498 | add r11,r11,r3 | ||
2499 | and r2,r2,r7 | ||
2500 | eor r3,r0,r7,ror#19 | ||
2501 | eor r0,r11,r11,ror#11 | ||
2502 | eor r2,r2,r9 | ||
2503 | add r10,r10,r3,ror#6 | ||
2504 | eor r3,r11,r4 | ||
2505 | eor r0,r0,r11,ror#20 | ||
2506 | add r10,r10,r2 | ||
2507 | ldr r2,[sp,#40] | ||
2508 | and r12,r12,r3 | ||
2509 | add r6,r6,r10 | ||
2510 | add r10,r10,r0,ror#2 | ||
2511 | eor r12,r12,r4 | ||
2512 | add r9,r9,r2 | ||
2513 | eor r2,r7,r8 | ||
2514 | eor r0,r6,r6,ror#5 | ||
2515 | add r10,r10,r12 | ||
2516 | and r2,r2,r6 | ||
2517 | eor r12,r0,r6,ror#19 | ||
2518 | eor r0,r10,r10,ror#11 | ||
2519 | eor r2,r2,r8 | ||
2520 | add r9,r9,r12,ror#6 | ||
2521 | eor r12,r10,r11 | ||
2522 | eor r0,r0,r10,ror#20 | ||
2523 | add r9,r9,r2 | ||
2524 | ldr r2,[sp,#44] | ||
2525 | and r3,r3,r12 | ||
2526 | add r5,r5,r9 | ||
2527 | add r9,r9,r0,ror#2 | ||
2528 | eor r3,r3,r11 | ||
2529 | add r8,r8,r2 | ||
2530 | eor r2,r6,r7 | ||
2531 | eor r0,r5,r5,ror#5 | ||
2532 | add r9,r9,r3 | ||
2533 | and r2,r2,r5 | ||
2534 | eor r3,r0,r5,ror#19 | ||
2535 | eor r0,r9,r9,ror#11 | ||
2536 | eor r2,r2,r7 | ||
2537 | add r8,r8,r3,ror#6 | ||
2538 | eor r3,r9,r10 | ||
2539 | eor r0,r0,r9,ror#20 | ||
2540 | add r8,r8,r2 | ||
2541 | ldr r2,[sp,#48] | ||
2542 | and r12,r12,r3 | ||
2543 | add r4,r4,r8 | ||
2544 | add r8,r8,r0,ror#2 | ||
2545 | eor r12,r12,r10 | ||
2546 | vst1.32 {q8},[r1,:128]! | ||
2547 | add r7,r7,r2 | ||
2548 | eor r2,r5,r6 | ||
2549 | eor r0,r4,r4,ror#5 | ||
2550 | add r8,r8,r12 | ||
2551 | vld1.32 {q8},[r14,:128]! | ||
2552 | and r2,r2,r4 | ||
2553 | eor r12,r0,r4,ror#19 | ||
2554 | eor r0,r8,r8,ror#11 | ||
2555 | eor r2,r2,r6 | ||
2556 | vrev32.8 q3,q3 | ||
2557 | add r7,r7,r12,ror#6 | ||
2558 | eor r12,r8,r9 | ||
2559 | eor r0,r0,r8,ror#20 | ||
2560 | add r7,r7,r2 | ||
2561 | vadd.i32 q8,q8,q3 | ||
2562 | ldr r2,[sp,#52] | ||
2563 | and r3,r3,r12 | ||
2564 | add r11,r11,r7 | ||
2565 | add r7,r7,r0,ror#2 | ||
2566 | eor r3,r3,r9 | ||
2567 | add r6,r6,r2 | ||
2568 | eor r2,r4,r5 | ||
2569 | eor r0,r11,r11,ror#5 | ||
2570 | add r7,r7,r3 | ||
2571 | and r2,r2,r11 | ||
2572 | eor r3,r0,r11,ror#19 | ||
2573 | eor r0,r7,r7,ror#11 | ||
2574 | eor r2,r2,r5 | ||
2575 | add r6,r6,r3,ror#6 | ||
2576 | eor r3,r7,r8 | ||
2577 | eor r0,r0,r7,ror#20 | ||
2578 | add r6,r6,r2 | ||
2579 | ldr r2,[sp,#56] | ||
2580 | and r12,r12,r3 | ||
2581 | add r10,r10,r6 | ||
2582 | add r6,r6,r0,ror#2 | ||
2583 | eor r12,r12,r8 | ||
2584 | add r5,r5,r2 | ||
2585 | eor r2,r11,r4 | ||
2586 | eor r0,r10,r10,ror#5 | ||
2587 | add r6,r6,r12 | ||
2588 | and r2,r2,r10 | ||
2589 | eor r12,r0,r10,ror#19 | ||
2590 | eor r0,r6,r6,ror#11 | ||
2591 | eor r2,r2,r4 | ||
2592 | add r5,r5,r12,ror#6 | ||
2593 | eor r12,r6,r7 | ||
2594 | eor r0,r0,r6,ror#20 | ||
2595 | add r5,r5,r2 | ||
2596 | ldr r2,[sp,#60] | ||
2597 | and r3,r3,r12 | ||
2598 | add r9,r9,r5 | ||
2599 | add r5,r5,r0,ror#2 | ||
2600 | eor r3,r3,r7 | ||
2601 | add r4,r4,r2 | ||
2602 | eor r2,r10,r11 | ||
2603 | eor r0,r9,r9,ror#5 | ||
2604 | add r5,r5,r3 | ||
2605 | and r2,r2,r9 | ||
2606 | eor r3,r0,r9,ror#19 | ||
2607 | eor r0,r5,r5,ror#11 | ||
2608 | eor r2,r2,r11 | ||
2609 | add r4,r4,r3,ror#6 | ||
2610 | eor r3,r5,r6 | ||
2611 | eor r0,r0,r5,ror#20 | ||
2612 | add r4,r4,r2 | ||
2613 | ldr r2,[sp,#64] | ||
2614 | and r12,r12,r3 | ||
2615 | add r8,r8,r4 | ||
2616 | add r4,r4,r0,ror#2 | ||
2617 | eor r12,r12,r6 | ||
2618 | vst1.32 {q8},[r1,:128]! | ||
2619 | ldr r0,[r2,#0] | ||
2620 | add r4,r4,r12 @ h+=Maj(a,b,c) from the past | ||
2621 | ldr r12,[r2,#4] | ||
2622 | ldr r3,[r2,#8] | ||
2623 | ldr r1,[r2,#12] | ||
2624 | add r4,r4,r0 @ accumulate | ||
2625 | ldr r0,[r2,#16] | ||
2626 | add r5,r5,r12 | ||
2627 | ldr r12,[r2,#20] | ||
2628 | add r6,r6,r3 | ||
2629 | ldr r3,[r2,#24] | ||
2630 | add r7,r7,r1 | ||
2631 | ldr r1,[r2,#28] | ||
2632 | add r8,r8,r0 | ||
2633 | str r4,[r2],#4 | ||
2634 | add r9,r9,r12 | ||
2635 | str r5,[r2],#4 | ||
2636 | add r10,r10,r3 | ||
2637 | str r6,[r2],#4 | ||
2638 | add r11,r11,r1 | ||
2639 | str r7,[r2],#4 | ||
2640 | stmia r2,{r8-r11} | ||
2641 | |||
2642 | ittte ne | ||
2643 | movne r1,sp | ||
2644 | ldrne r2,[sp,#0] | ||
2645 | eorne r12,r12,r12 | ||
2646 | ldreq sp,[sp,#76] @ restore original sp | ||
2647 | itt ne | ||
2648 | eorne r3,r5,r6 | ||
2649 | bne .L_00_48 | ||
2650 | |||
2651 | ldmia sp!,{r4-r12,pc} | ||
2652 | .size sha256_block_data_order_neon,.-sha256_block_data_order_neon | ||
2653 | #endif | ||
2654 | #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) | ||
2655 | |||
2656 | # ifdef __thumb2__ | ||
2657 | # define INST(a,b,c,d) .byte c,d|0xc,a,b | ||
2658 | # else | ||
2659 | # define INST(a,b,c,d) .byte a,b,c,d | ||
2660 | # endif | ||
2661 | |||
2662 | .type sha256_block_data_order_armv8,%function | ||
2663 | .align 5 | ||
2664 | sha256_block_data_order_armv8: | ||
2665 | .LARMv8: | ||
2666 | vld1.32 {q0,q1},[r0] | ||
2667 | # ifdef __thumb2__ | ||
2668 | adr r3,.LARMv8 | ||
2669 | sub r3,r3,#.LARMv8-K256 | ||
2670 | # else | ||
2671 | adrl r3,K256 | ||
2672 | # endif | ||
2673 | add r2,r1,r2,lsl#6 @ len to point at the end of inp | ||
2674 | |||
2675 | .Loop_v8: | ||
2676 | vld1.8 {q8-q9},[r1]! | ||
2677 | vld1.8 {q10-q11},[r1]! | ||
2678 | vld1.32 {q12},[r3]! | ||
2679 | vrev32.8 q8,q8 | ||
2680 | vrev32.8 q9,q9 | ||
2681 | vrev32.8 q10,q10 | ||
2682 | vrev32.8 q11,q11 | ||
2683 | vmov q14,q0 @ offload | ||
2684 | vmov q15,q1 | ||
2685 | teq r1,r2 | ||
2686 | vld1.32 {q13},[r3]! | ||
2687 | vadd.i32 q12,q12,q8 | ||
2688 | INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 | ||
2689 | vmov q2,q0 | ||
2690 | INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 | ||
2691 | INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 | ||
2692 | INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 | ||
2693 | vld1.32 {q12},[r3]! | ||
2694 | vadd.i32 q13,q13,q9 | ||
2695 | INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 | ||
2696 | vmov q2,q0 | ||
2697 | INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 | ||
2698 | INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 | ||
2699 | INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 | ||
2700 | vld1.32 {q13},[r3]! | ||
2701 | vadd.i32 q12,q12,q10 | ||
2702 | INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 | ||
2703 | vmov q2,q0 | ||
2704 | INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 | ||
2705 | INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 | ||
2706 | INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 | ||
2707 | vld1.32 {q12},[r3]! | ||
2708 | vadd.i32 q13,q13,q11 | ||
2709 | INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 | ||
2710 | vmov q2,q0 | ||
2711 | INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 | ||
2712 | INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 | ||
2713 | INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 | ||
2714 | vld1.32 {q13},[r3]! | ||
2715 | vadd.i32 q12,q12,q8 | ||
2716 | INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 | ||
2717 | vmov q2,q0 | ||
2718 | INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 | ||
2719 | INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 | ||
2720 | INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 | ||
2721 | vld1.32 {q12},[r3]! | ||
2722 | vadd.i32 q13,q13,q9 | ||
2723 | INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 | ||
2724 | vmov q2,q0 | ||
2725 | INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 | ||
2726 | INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 | ||
2727 | INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 | ||
2728 | vld1.32 {q13},[r3]! | ||
2729 | vadd.i32 q12,q12,q10 | ||
2730 | INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 | ||
2731 | vmov q2,q0 | ||
2732 | INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 | ||
2733 | INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 | ||
2734 | INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 | ||
2735 | vld1.32 {q12},[r3]! | ||
2736 | vadd.i32 q13,q13,q11 | ||
2737 | INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 | ||
2738 | vmov q2,q0 | ||
2739 | INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 | ||
2740 | INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 | ||
2741 | INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 | ||
2742 | vld1.32 {q13},[r3]! | ||
2743 | vadd.i32 q12,q12,q8 | ||
2744 | INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 | ||
2745 | vmov q2,q0 | ||
2746 | INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 | ||
2747 | INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 | ||
2748 | INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 | ||
2749 | vld1.32 {q12},[r3]! | ||
2750 | vadd.i32 q13,q13,q9 | ||
2751 | INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 | ||
2752 | vmov q2,q0 | ||
2753 | INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 | ||
2754 | INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 | ||
2755 | INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 | ||
2756 | vld1.32 {q13},[r3]! | ||
2757 | vadd.i32 q12,q12,q10 | ||
2758 | INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 | ||
2759 | vmov q2,q0 | ||
2760 | INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 | ||
2761 | INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 | ||
2762 | INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 | ||
2763 | vld1.32 {q12},[r3]! | ||
2764 | vadd.i32 q13,q13,q11 | ||
2765 | INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 | ||
2766 | vmov q2,q0 | ||
2767 | INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 | ||
2768 | INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 | ||
2769 | INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 | ||
2770 | vld1.32 {q13},[r3]! | ||
2771 | vadd.i32 q12,q12,q8 | ||
2772 | vmov q2,q0 | ||
2773 | INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 | ||
2774 | INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 | ||
2775 | |||
2776 | vld1.32 {q12},[r3]! | ||
2777 | vadd.i32 q13,q13,q9 | ||
2778 | vmov q2,q0 | ||
2779 | INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 | ||
2780 | INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 | ||
2781 | |||
2782 | vld1.32 {q13},[r3] | ||
2783 | vadd.i32 q12,q12,q10 | ||
2784 | sub r3,r3,#256-16 @ rewind | ||
2785 | vmov q2,q0 | ||
2786 | INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 | ||
2787 | INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 | ||
2788 | |||
2789 | vadd.i32 q13,q13,q11 | ||
2790 | vmov q2,q0 | ||
2791 | INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 | ||
2792 | INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 | ||
2793 | |||
2794 | vadd.i32 q0,q0,q14 | ||
2795 | vadd.i32 q1,q1,q15 | ||
2796 | it ne | ||
2797 | bne .Loop_v8 | ||
2798 | |||
2799 | vst1.32 {q0,q1},[r0] | ||
2800 | |||
2801 | bx lr @ bx lr | ||
2802 | .size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8 | ||
2803 | #endif | ||
2804 | .asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>" | ||
2805 | .align 2 | ||
2806 | #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) | ||
2807 | .comm OPENSSL_armcap_P,4,4 | ||
2808 | #endif | ||
diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c new file mode 100644 index 000000000000..a84e869ef900 --- /dev/null +++ b/arch/arm/crypto/sha256_glue.c | |||
@@ -0,0 +1,128 @@ | |||
1 | /* | ||
2 | * Glue code for the SHA256 Secure Hash Algorithm assembly implementation | ||
3 | * using optimized ARM assembler and NEON instructions. | ||
4 | * | ||
5 | * Copyright © 2015 Google Inc. | ||
6 | * | ||
7 | * This file is based on sha256_ssse3_glue.c: | ||
8 | * Copyright (C) 2013 Intel Corporation | ||
9 | * Author: Tim Chen <tim.c.chen@linux.intel.com> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify it | ||
12 | * under the terms of the GNU General Public License as published by the Free | ||
13 | * Software Foundation; either version 2 of the License, or (at your option) | ||
14 | * any later version. | ||
15 | * | ||
16 | */ | ||
17 | |||
18 | #include <crypto/internal/hash.h> | ||
19 | #include <linux/crypto.h> | ||
20 | #include <linux/init.h> | ||
21 | #include <linux/module.h> | ||
22 | #include <linux/mm.h> | ||
23 | #include <linux/cryptohash.h> | ||
24 | #include <linux/types.h> | ||
25 | #include <linux/string.h> | ||
26 | #include <crypto/sha.h> | ||
27 | #include <crypto/sha256_base.h> | ||
28 | #include <asm/simd.h> | ||
29 | #include <asm/neon.h> | ||
30 | |||
31 | #include "sha256_glue.h" | ||
32 | |||
33 | asmlinkage void sha256_block_data_order(u32 *digest, const void *data, | ||
34 | unsigned int num_blks); | ||
35 | |||
36 | int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data, | ||
37 | unsigned int len) | ||
38 | { | ||
39 | /* make sure casting to sha256_block_fn() is safe */ | ||
40 | BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0); | ||
41 | |||
42 | return sha256_base_do_update(desc, data, len, | ||
43 | (sha256_block_fn *)sha256_block_data_order); | ||
44 | } | ||
45 | EXPORT_SYMBOL(crypto_sha256_arm_update); | ||
46 | |||
47 | static int sha256_final(struct shash_desc *desc, u8 *out) | ||
48 | { | ||
49 | sha256_base_do_finalize(desc, | ||
50 | (sha256_block_fn *)sha256_block_data_order); | ||
51 | return sha256_base_finish(desc, out); | ||
52 | } | ||
53 | |||
54 | int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data, | ||
55 | unsigned int len, u8 *out) | ||
56 | { | ||
57 | sha256_base_do_update(desc, data, len, | ||
58 | (sha256_block_fn *)sha256_block_data_order); | ||
59 | return sha256_final(desc, out); | ||
60 | } | ||
61 | EXPORT_SYMBOL(crypto_sha256_arm_finup); | ||
62 | |||
63 | static struct shash_alg algs[] = { { | ||
64 | .digestsize = SHA256_DIGEST_SIZE, | ||
65 | .init = sha256_base_init, | ||
66 | .update = crypto_sha256_arm_update, | ||
67 | .final = sha256_final, | ||
68 | .finup = crypto_sha256_arm_finup, | ||
69 | .descsize = sizeof(struct sha256_state), | ||
70 | .base = { | ||
71 | .cra_name = "sha256", | ||
72 | .cra_driver_name = "sha256-asm", | ||
73 | .cra_priority = 150, | ||
74 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
75 | .cra_blocksize = SHA256_BLOCK_SIZE, | ||
76 | .cra_module = THIS_MODULE, | ||
77 | } | ||
78 | }, { | ||
79 | .digestsize = SHA224_DIGEST_SIZE, | ||
80 | .init = sha224_base_init, | ||
81 | .update = crypto_sha256_arm_update, | ||
82 | .final = sha256_final, | ||
83 | .finup = crypto_sha256_arm_finup, | ||
84 | .descsize = sizeof(struct sha256_state), | ||
85 | .base = { | ||
86 | .cra_name = "sha224", | ||
87 | .cra_driver_name = "sha224-asm", | ||
88 | .cra_priority = 150, | ||
89 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
90 | .cra_blocksize = SHA224_BLOCK_SIZE, | ||
91 | .cra_module = THIS_MODULE, | ||
92 | } | ||
93 | } }; | ||
94 | |||
95 | static int __init sha256_mod_init(void) | ||
96 | { | ||
97 | int res = crypto_register_shashes(algs, ARRAY_SIZE(algs)); | ||
98 | |||
99 | if (res < 0) | ||
100 | return res; | ||
101 | |||
102 | if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) { | ||
103 | res = crypto_register_shashes(sha256_neon_algs, | ||
104 | ARRAY_SIZE(sha256_neon_algs)); | ||
105 | |||
106 | if (res < 0) | ||
107 | crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); | ||
108 | } | ||
109 | |||
110 | return res; | ||
111 | } | ||
112 | |||
113 | static void __exit sha256_mod_fini(void) | ||
114 | { | ||
115 | crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); | ||
116 | |||
117 | if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) | ||
118 | crypto_unregister_shashes(sha256_neon_algs, | ||
119 | ARRAY_SIZE(sha256_neon_algs)); | ||
120 | } | ||
121 | |||
122 | module_init(sha256_mod_init); | ||
123 | module_exit(sha256_mod_fini); | ||
124 | |||
125 | MODULE_LICENSE("GPL"); | ||
126 | MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm (ARM), including NEON"); | ||
127 | |||
128 | MODULE_ALIAS_CRYPTO("sha256"); | ||
diff --git a/arch/arm/crypto/sha256_glue.h b/arch/arm/crypto/sha256_glue.h new file mode 100644 index 000000000000..7cf0bf786ada --- /dev/null +++ b/arch/arm/crypto/sha256_glue.h | |||
@@ -0,0 +1,14 @@ | |||
1 | #ifndef _CRYPTO_SHA256_GLUE_H | ||
2 | #define _CRYPTO_SHA256_GLUE_H | ||
3 | |||
4 | #include <linux/crypto.h> | ||
5 | |||
6 | extern struct shash_alg sha256_neon_algs[2]; | ||
7 | |||
8 | int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data, | ||
9 | unsigned int len); | ||
10 | |||
11 | int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data, | ||
12 | unsigned int len, u8 *hash); | ||
13 | |||
14 | #endif /* _CRYPTO_SHA256_GLUE_H */ | ||
diff --git a/arch/arm/crypto/sha256_neon_glue.c b/arch/arm/crypto/sha256_neon_glue.c new file mode 100644 index 000000000000..39ccd658817e --- /dev/null +++ b/arch/arm/crypto/sha256_neon_glue.c | |||
@@ -0,0 +1,101 @@ | |||
1 | /* | ||
2 | * Glue code for the SHA256 Secure Hash Algorithm assembly implementation | ||
3 | * using NEON instructions. | ||
4 | * | ||
5 | * Copyright © 2015 Google Inc. | ||
6 | * | ||
7 | * This file is based on sha512_neon_glue.c: | ||
8 | * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify it | ||
11 | * under the terms of the GNU General Public License as published by the Free | ||
12 | * Software Foundation; either version 2 of the License, or (at your option) | ||
13 | * any later version. | ||
14 | * | ||
15 | */ | ||
16 | |||
17 | #include <crypto/internal/hash.h> | ||
18 | #include <linux/cryptohash.h> | ||
19 | #include <linux/types.h> | ||
20 | #include <linux/string.h> | ||
21 | #include <crypto/sha.h> | ||
22 | #include <crypto/sha256_base.h> | ||
23 | #include <asm/byteorder.h> | ||
24 | #include <asm/simd.h> | ||
25 | #include <asm/neon.h> | ||
26 | |||
27 | #include "sha256_glue.h" | ||
28 | |||
29 | asmlinkage void sha256_block_data_order_neon(u32 *digest, const void *data, | ||
30 | unsigned int num_blks); | ||
31 | |||
32 | static int sha256_update(struct shash_desc *desc, const u8 *data, | ||
33 | unsigned int len) | ||
34 | { | ||
35 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
36 | |||
37 | if (!may_use_simd() || | ||
38 | (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE) | ||
39 | return crypto_sha256_arm_update(desc, data, len); | ||
40 | |||
41 | kernel_neon_begin(); | ||
42 | sha256_base_do_update(desc, data, len, | ||
43 | (sha256_block_fn *)sha256_block_data_order_neon); | ||
44 | kernel_neon_end(); | ||
45 | |||
46 | return 0; | ||
47 | } | ||
48 | |||
49 | static int sha256_finup(struct shash_desc *desc, const u8 *data, | ||
50 | unsigned int len, u8 *out) | ||
51 | { | ||
52 | if (!may_use_simd()) | ||
53 | return crypto_sha256_arm_finup(desc, data, len, out); | ||
54 | |||
55 | kernel_neon_begin(); | ||
56 | if (len) | ||
57 | sha256_base_do_update(desc, data, len, | ||
58 | (sha256_block_fn *)sha256_block_data_order_neon); | ||
59 | sha256_base_do_finalize(desc, | ||
60 | (sha256_block_fn *)sha256_block_data_order_neon); | ||
61 | kernel_neon_end(); | ||
62 | |||
63 | return sha256_base_finish(desc, out); | ||
64 | } | ||
65 | |||
66 | static int sha256_final(struct shash_desc *desc, u8 *out) | ||
67 | { | ||
68 | return sha256_finup(desc, NULL, 0, out); | ||
69 | } | ||
70 | |||
71 | struct shash_alg sha256_neon_algs[] = { { | ||
72 | .digestsize = SHA256_DIGEST_SIZE, | ||
73 | .init = sha256_base_init, | ||
74 | .update = sha256_update, | ||
75 | .final = sha256_final, | ||
76 | .finup = sha256_finup, | ||
77 | .descsize = sizeof(struct sha256_state), | ||
78 | .base = { | ||
79 | .cra_name = "sha256", | ||
80 | .cra_driver_name = "sha256-neon", | ||
81 | .cra_priority = 250, | ||
82 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
83 | .cra_blocksize = SHA256_BLOCK_SIZE, | ||
84 | .cra_module = THIS_MODULE, | ||
85 | } | ||
86 | }, { | ||
87 | .digestsize = SHA224_DIGEST_SIZE, | ||
88 | .init = sha224_base_init, | ||
89 | .update = sha256_update, | ||
90 | .final = sha256_final, | ||
91 | .finup = sha256_finup, | ||
92 | .descsize = sizeof(struct sha256_state), | ||
93 | .base = { | ||
94 | .cra_name = "sha224", | ||
95 | .cra_driver_name = "sha224-neon", | ||
96 | .cra_priority = 250, | ||
97 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
98 | .cra_blocksize = SHA224_BLOCK_SIZE, | ||
99 | .cra_module = THIS_MODULE, | ||
100 | } | ||
101 | } }; | ||
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index b1b5b893eb20..05d9e16c0dfd 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c | |||
@@ -284,7 +284,8 @@ static struct crypto_alg aes_algs[] = { { | |||
284 | .cra_name = "__ecb-aes-" MODE, | 284 | .cra_name = "__ecb-aes-" MODE, |
285 | .cra_driver_name = "__driver-ecb-aes-" MODE, | 285 | .cra_driver_name = "__driver-ecb-aes-" MODE, |
286 | .cra_priority = 0, | 286 | .cra_priority = 0, |
287 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 287 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
288 | CRYPTO_ALG_INTERNAL, | ||
288 | .cra_blocksize = AES_BLOCK_SIZE, | 289 | .cra_blocksize = AES_BLOCK_SIZE, |
289 | .cra_ctxsize = sizeof(struct crypto_aes_ctx), | 290 | .cra_ctxsize = sizeof(struct crypto_aes_ctx), |
290 | .cra_alignmask = 7, | 291 | .cra_alignmask = 7, |
@@ -302,7 +303,8 @@ static struct crypto_alg aes_algs[] = { { | |||
302 | .cra_name = "__cbc-aes-" MODE, | 303 | .cra_name = "__cbc-aes-" MODE, |
303 | .cra_driver_name = "__driver-cbc-aes-" MODE, | 304 | .cra_driver_name = "__driver-cbc-aes-" MODE, |
304 | .cra_priority = 0, | 305 | .cra_priority = 0, |
305 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 306 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
307 | CRYPTO_ALG_INTERNAL, | ||
306 | .cra_blocksize = AES_BLOCK_SIZE, | 308 | .cra_blocksize = AES_BLOCK_SIZE, |
307 | .cra_ctxsize = sizeof(struct crypto_aes_ctx), | 309 | .cra_ctxsize = sizeof(struct crypto_aes_ctx), |
308 | .cra_alignmask = 7, | 310 | .cra_alignmask = 7, |
@@ -320,7 +322,8 @@ static struct crypto_alg aes_algs[] = { { | |||
320 | .cra_name = "__ctr-aes-" MODE, | 322 | .cra_name = "__ctr-aes-" MODE, |
321 | .cra_driver_name = "__driver-ctr-aes-" MODE, | 323 | .cra_driver_name = "__driver-ctr-aes-" MODE, |
322 | .cra_priority = 0, | 324 | .cra_priority = 0, |
323 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 325 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
326 | CRYPTO_ALG_INTERNAL, | ||
324 | .cra_blocksize = 1, | 327 | .cra_blocksize = 1, |
325 | .cra_ctxsize = sizeof(struct crypto_aes_ctx), | 328 | .cra_ctxsize = sizeof(struct crypto_aes_ctx), |
326 | .cra_alignmask = 7, | 329 | .cra_alignmask = 7, |
@@ -338,7 +341,8 @@ static struct crypto_alg aes_algs[] = { { | |||
338 | .cra_name = "__xts-aes-" MODE, | 341 | .cra_name = "__xts-aes-" MODE, |
339 | .cra_driver_name = "__driver-xts-aes-" MODE, | 342 | .cra_driver_name = "__driver-xts-aes-" MODE, |
340 | .cra_priority = 0, | 343 | .cra_priority = 0, |
341 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 344 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
345 | CRYPTO_ALG_INTERNAL, | ||
342 | .cra_blocksize = AES_BLOCK_SIZE, | 346 | .cra_blocksize = AES_BLOCK_SIZE, |
343 | .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), | 347 | .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), |
344 | .cra_alignmask = 7, | 348 | .cra_alignmask = 7, |
diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S index 09d57d98609c..033aae6d732a 100644 --- a/arch/arm64/crypto/sha1-ce-core.S +++ b/arch/arm64/crypto/sha1-ce-core.S | |||
@@ -66,8 +66,8 @@ | |||
66 | .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 | 66 | .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 |
67 | 67 | ||
68 | /* | 68 | /* |
69 | * void sha1_ce_transform(int blocks, u8 const *src, u32 *state, | 69 | * void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, |
70 | * u8 *head, long bytes) | 70 | * int blocks) |
71 | */ | 71 | */ |
72 | ENTRY(sha1_ce_transform) | 72 | ENTRY(sha1_ce_transform) |
73 | /* load round constants */ | 73 | /* load round constants */ |
@@ -78,25 +78,22 @@ ENTRY(sha1_ce_transform) | |||
78 | ld1r {k3.4s}, [x6] | 78 | ld1r {k3.4s}, [x6] |
79 | 79 | ||
80 | /* load state */ | 80 | /* load state */ |
81 | ldr dga, [x2] | 81 | ldr dga, [x0] |
82 | ldr dgb, [x2, #16] | 82 | ldr dgb, [x0, #16] |
83 | 83 | ||
84 | /* load partial state (if supplied) */ | 84 | /* load sha1_ce_state::finalize */ |
85 | cbz x3, 0f | 85 | ldr w4, [x0, #:lo12:sha1_ce_offsetof_finalize] |
86 | ld1 {v8.4s-v11.4s}, [x3] | ||
87 | b 1f | ||
88 | 86 | ||
89 | /* load input */ | 87 | /* load input */ |
90 | 0: ld1 {v8.4s-v11.4s}, [x1], #64 | 88 | 0: ld1 {v8.4s-v11.4s}, [x1], #64 |
91 | sub w0, w0, #1 | 89 | sub w2, w2, #1 |
92 | 90 | ||
93 | 1: | ||
94 | CPU_LE( rev32 v8.16b, v8.16b ) | 91 | CPU_LE( rev32 v8.16b, v8.16b ) |
95 | CPU_LE( rev32 v9.16b, v9.16b ) | 92 | CPU_LE( rev32 v9.16b, v9.16b ) |
96 | CPU_LE( rev32 v10.16b, v10.16b ) | 93 | CPU_LE( rev32 v10.16b, v10.16b ) |
97 | CPU_LE( rev32 v11.16b, v11.16b ) | 94 | CPU_LE( rev32 v11.16b, v11.16b ) |
98 | 95 | ||
99 | 2: add t0.4s, v8.4s, k0.4s | 96 | 1: add t0.4s, v8.4s, k0.4s |
100 | mov dg0v.16b, dgav.16b | 97 | mov dg0v.16b, dgav.16b |
101 | 98 | ||
102 | add_update c, ev, k0, 8, 9, 10, 11, dgb | 99 | add_update c, ev, k0, 8, 9, 10, 11, dgb |
@@ -127,15 +124,15 @@ CPU_LE( rev32 v11.16b, v11.16b ) | |||
127 | add dgbv.2s, dgbv.2s, dg1v.2s | 124 | add dgbv.2s, dgbv.2s, dg1v.2s |
128 | add dgav.4s, dgav.4s, dg0v.4s | 125 | add dgav.4s, dgav.4s, dg0v.4s |
129 | 126 | ||
130 | cbnz w0, 0b | 127 | cbnz w2, 0b |
131 | 128 | ||
132 | /* | 129 | /* |
133 | * Final block: add padding and total bit count. | 130 | * Final block: add padding and total bit count. |
134 | * Skip if we have no total byte count in x4. In that case, the input | 131 | * Skip if the input size was not a round multiple of the block size, |
135 | * size was not a round multiple of the block size, and the padding is | 132 | * the padding is handled by the C code in that case. |
136 | * handled by the C code. | ||
137 | */ | 133 | */ |
138 | cbz x4, 3f | 134 | cbz x4, 3f |
135 | ldr x4, [x0, #:lo12:sha1_ce_offsetof_count] | ||
139 | movi v9.2d, #0 | 136 | movi v9.2d, #0 |
140 | mov x8, #0x80000000 | 137 | mov x8, #0x80000000 |
141 | movi v10.2d, #0 | 138 | movi v10.2d, #0 |
@@ -144,10 +141,10 @@ CPU_LE( rev32 v11.16b, v11.16b ) | |||
144 | mov x4, #0 | 141 | mov x4, #0 |
145 | mov v11.d[0], xzr | 142 | mov v11.d[0], xzr |
146 | mov v11.d[1], x7 | 143 | mov v11.d[1], x7 |
147 | b 2b | 144 | b 1b |
148 | 145 | ||
149 | /* store new state */ | 146 | /* store new state */ |
150 | 3: str dga, [x2] | 147 | 3: str dga, [x0] |
151 | str dgb, [x2, #16] | 148 | str dgb, [x0, #16] |
152 | ret | 149 | ret |
153 | ENDPROC(sha1_ce_transform) | 150 | ENDPROC(sha1_ce_transform) |
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c index 6fe83f37a750..114e7cc5de8c 100644 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ b/arch/arm64/crypto/sha1-ce-glue.c | |||
@@ -12,144 +12,81 @@ | |||
12 | #include <asm/unaligned.h> | 12 | #include <asm/unaligned.h> |
13 | #include <crypto/internal/hash.h> | 13 | #include <crypto/internal/hash.h> |
14 | #include <crypto/sha.h> | 14 | #include <crypto/sha.h> |
15 | #include <crypto/sha1_base.h> | ||
15 | #include <linux/cpufeature.h> | 16 | #include <linux/cpufeature.h> |
16 | #include <linux/crypto.h> | 17 | #include <linux/crypto.h> |
17 | #include <linux/module.h> | 18 | #include <linux/module.h> |
18 | 19 | ||
20 | #define ASM_EXPORT(sym, val) \ | ||
21 | asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val)); | ||
22 | |||
19 | MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); | 23 | MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); |
20 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); | 24 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); |
21 | MODULE_LICENSE("GPL v2"); | 25 | MODULE_LICENSE("GPL v2"); |
22 | 26 | ||
23 | asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state, | 27 | struct sha1_ce_state { |
24 | u8 *head, long bytes); | 28 | struct sha1_state sst; |
29 | u32 finalize; | ||
30 | }; | ||
25 | 31 | ||
26 | static int sha1_init(struct shash_desc *desc) | 32 | asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, |
27 | { | 33 | int blocks); |
28 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
29 | 34 | ||
30 | *sctx = (struct sha1_state){ | 35 | static int sha1_ce_update(struct shash_desc *desc, const u8 *data, |
31 | .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, | 36 | unsigned int len) |
32 | }; | ||
33 | return 0; | ||
34 | } | ||
35 | |||
36 | static int sha1_update(struct shash_desc *desc, const u8 *data, | ||
37 | unsigned int len) | ||
38 | { | 37 | { |
39 | struct sha1_state *sctx = shash_desc_ctx(desc); | 38 | struct sha1_ce_state *sctx = shash_desc_ctx(desc); |
40 | unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; | ||
41 | |||
42 | sctx->count += len; | ||
43 | |||
44 | if ((partial + len) >= SHA1_BLOCK_SIZE) { | ||
45 | int blocks; | ||
46 | |||
47 | if (partial) { | ||
48 | int p = SHA1_BLOCK_SIZE - partial; | ||
49 | 39 | ||
50 | memcpy(sctx->buffer + partial, data, p); | 40 | sctx->finalize = 0; |
51 | data += p; | 41 | kernel_neon_begin_partial(16); |
52 | len -= p; | 42 | sha1_base_do_update(desc, data, len, |
53 | } | 43 | (sha1_block_fn *)sha1_ce_transform); |
54 | 44 | kernel_neon_end(); | |
55 | blocks = len / SHA1_BLOCK_SIZE; | ||
56 | len %= SHA1_BLOCK_SIZE; | ||
57 | |||
58 | kernel_neon_begin_partial(16); | ||
59 | sha1_ce_transform(blocks, data, sctx->state, | ||
60 | partial ? sctx->buffer : NULL, 0); | ||
61 | kernel_neon_end(); | ||
62 | 45 | ||
63 | data += blocks * SHA1_BLOCK_SIZE; | ||
64 | partial = 0; | ||
65 | } | ||
66 | if (len) | ||
67 | memcpy(sctx->buffer + partial, data, len); | ||
68 | return 0; | 46 | return 0; |
69 | } | 47 | } |
70 | 48 | ||
71 | static int sha1_final(struct shash_desc *desc, u8 *out) | 49 | static int sha1_ce_finup(struct shash_desc *desc, const u8 *data, |
50 | unsigned int len, u8 *out) | ||
72 | { | 51 | { |
73 | static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; | 52 | struct sha1_ce_state *sctx = shash_desc_ctx(desc); |
53 | bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE); | ||
74 | 54 | ||
75 | struct sha1_state *sctx = shash_desc_ctx(desc); | 55 | ASM_EXPORT(sha1_ce_offsetof_count, |
76 | __be64 bits = cpu_to_be64(sctx->count << 3); | 56 | offsetof(struct sha1_ce_state, sst.count)); |
77 | __be32 *dst = (__be32 *)out; | 57 | ASM_EXPORT(sha1_ce_offsetof_finalize, |
78 | int i; | 58 | offsetof(struct sha1_ce_state, finalize)); |
79 | |||
80 | u32 padlen = SHA1_BLOCK_SIZE | ||
81 | - ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE); | ||
82 | |||
83 | sha1_update(desc, padding, padlen); | ||
84 | sha1_update(desc, (const u8 *)&bits, sizeof(bits)); | ||
85 | |||
86 | for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++) | ||
87 | put_unaligned_be32(sctx->state[i], dst++); | ||
88 | |||
89 | *sctx = (struct sha1_state){}; | ||
90 | return 0; | ||
91 | } | ||
92 | |||
93 | static int sha1_finup(struct shash_desc *desc, const u8 *data, | ||
94 | unsigned int len, u8 *out) | ||
95 | { | ||
96 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
97 | __be32 *dst = (__be32 *)out; | ||
98 | int blocks; | ||
99 | int i; | ||
100 | |||
101 | if (sctx->count || !len || (len % SHA1_BLOCK_SIZE)) { | ||
102 | sha1_update(desc, data, len); | ||
103 | return sha1_final(desc, out); | ||
104 | } | ||
105 | 59 | ||
106 | /* | 60 | /* |
107 | * Use a fast path if the input is a multiple of 64 bytes. In | 61 | * Allow the asm code to perform the finalization if there is no |
108 | * this case, there is no need to copy data around, and we can | 62 | * partial data and the input is a round multiple of the block size. |
109 | * perform the entire digest calculation in a single invocation | ||
110 | * of sha1_ce_transform() | ||
111 | */ | 63 | */ |
112 | blocks = len / SHA1_BLOCK_SIZE; | 64 | sctx->finalize = finalize; |
113 | 65 | ||
114 | kernel_neon_begin_partial(16); | 66 | kernel_neon_begin_partial(16); |
115 | sha1_ce_transform(blocks, data, sctx->state, NULL, len); | 67 | sha1_base_do_update(desc, data, len, |
68 | (sha1_block_fn *)sha1_ce_transform); | ||
69 | if (!finalize) | ||
70 | sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform); | ||
116 | kernel_neon_end(); | 71 | kernel_neon_end(); |
117 | 72 | return sha1_base_finish(desc, out); | |
118 | for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++) | ||
119 | put_unaligned_be32(sctx->state[i], dst++); | ||
120 | |||
121 | *sctx = (struct sha1_state){}; | ||
122 | return 0; | ||
123 | } | 73 | } |
124 | 74 | ||
125 | static int sha1_export(struct shash_desc *desc, void *out) | 75 | static int sha1_ce_final(struct shash_desc *desc, u8 *out) |
126 | { | 76 | { |
127 | struct sha1_state *sctx = shash_desc_ctx(desc); | 77 | kernel_neon_begin_partial(16); |
128 | struct sha1_state *dst = out; | 78 | sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform); |
129 | 79 | kernel_neon_end(); | |
130 | *dst = *sctx; | 80 | return sha1_base_finish(desc, out); |
131 | return 0; | ||
132 | } | ||
133 | |||
134 | static int sha1_import(struct shash_desc *desc, const void *in) | ||
135 | { | ||
136 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
137 | struct sha1_state const *src = in; | ||
138 | |||
139 | *sctx = *src; | ||
140 | return 0; | ||
141 | } | 81 | } |
142 | 82 | ||
143 | static struct shash_alg alg = { | 83 | static struct shash_alg alg = { |
144 | .init = sha1_init, | 84 | .init = sha1_base_init, |
145 | .update = sha1_update, | 85 | .update = sha1_ce_update, |
146 | .final = sha1_final, | 86 | .final = sha1_ce_final, |
147 | .finup = sha1_finup, | 87 | .finup = sha1_ce_finup, |
148 | .export = sha1_export, | 88 | .descsize = sizeof(struct sha1_ce_state), |
149 | .import = sha1_import, | ||
150 | .descsize = sizeof(struct sha1_state), | ||
151 | .digestsize = SHA1_DIGEST_SIZE, | 89 | .digestsize = SHA1_DIGEST_SIZE, |
152 | .statesize = sizeof(struct sha1_state), | ||
153 | .base = { | 90 | .base = { |
154 | .cra_name = "sha1", | 91 | .cra_name = "sha1", |
155 | .cra_driver_name = "sha1-ce", | 92 | .cra_driver_name = "sha1-ce", |
diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S index 7f29fc031ea8..5df9d9d470ad 100644 --- a/arch/arm64/crypto/sha2-ce-core.S +++ b/arch/arm64/crypto/sha2-ce-core.S | |||
@@ -73,8 +73,8 @@ | |||
73 | .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 | 73 | .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 |
74 | 74 | ||
75 | /* | 75 | /* |
76 | * void sha2_ce_transform(int blocks, u8 const *src, u32 *state, | 76 | * void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src, |
77 | * u8 *head, long bytes) | 77 | * int blocks) |
78 | */ | 78 | */ |
79 | ENTRY(sha2_ce_transform) | 79 | ENTRY(sha2_ce_transform) |
80 | /* load round constants */ | 80 | /* load round constants */ |
@@ -85,24 +85,21 @@ ENTRY(sha2_ce_transform) | |||
85 | ld1 {v12.4s-v15.4s}, [x8] | 85 | ld1 {v12.4s-v15.4s}, [x8] |
86 | 86 | ||
87 | /* load state */ | 87 | /* load state */ |
88 | ldp dga, dgb, [x2] | 88 | ldp dga, dgb, [x0] |
89 | 89 | ||
90 | /* load partial input (if supplied) */ | 90 | /* load sha256_ce_state::finalize */ |
91 | cbz x3, 0f | 91 | ldr w4, [x0, #:lo12:sha256_ce_offsetof_finalize] |
92 | ld1 {v16.4s-v19.4s}, [x3] | ||
93 | b 1f | ||
94 | 92 | ||
95 | /* load input */ | 93 | /* load input */ |
96 | 0: ld1 {v16.4s-v19.4s}, [x1], #64 | 94 | 0: ld1 {v16.4s-v19.4s}, [x1], #64 |
97 | sub w0, w0, #1 | 95 | sub w2, w2, #1 |
98 | 96 | ||
99 | 1: | ||
100 | CPU_LE( rev32 v16.16b, v16.16b ) | 97 | CPU_LE( rev32 v16.16b, v16.16b ) |
101 | CPU_LE( rev32 v17.16b, v17.16b ) | 98 | CPU_LE( rev32 v17.16b, v17.16b ) |
102 | CPU_LE( rev32 v18.16b, v18.16b ) | 99 | CPU_LE( rev32 v18.16b, v18.16b ) |
103 | CPU_LE( rev32 v19.16b, v19.16b ) | 100 | CPU_LE( rev32 v19.16b, v19.16b ) |
104 | 101 | ||
105 | 2: add t0.4s, v16.4s, v0.4s | 102 | 1: add t0.4s, v16.4s, v0.4s |
106 | mov dg0v.16b, dgav.16b | 103 | mov dg0v.16b, dgav.16b |
107 | mov dg1v.16b, dgbv.16b | 104 | mov dg1v.16b, dgbv.16b |
108 | 105 | ||
@@ -131,15 +128,15 @@ CPU_LE( rev32 v19.16b, v19.16b ) | |||
131 | add dgbv.4s, dgbv.4s, dg1v.4s | 128 | add dgbv.4s, dgbv.4s, dg1v.4s |
132 | 129 | ||
133 | /* handled all input blocks? */ | 130 | /* handled all input blocks? */ |
134 | cbnz w0, 0b | 131 | cbnz w2, 0b |
135 | 132 | ||
136 | /* | 133 | /* |
137 | * Final block: add padding and total bit count. | 134 | * Final block: add padding and total bit count. |
138 | * Skip if we have no total byte count in x4. In that case, the input | 135 | * Skip if the input size was not a round multiple of the block size, |
139 | * size was not a round multiple of the block size, and the padding is | 136 | * the padding is handled by the C code in that case. |
140 | * handled by the C code. | ||
141 | */ | 137 | */ |
142 | cbz x4, 3f | 138 | cbz x4, 3f |
139 | ldr x4, [x0, #:lo12:sha256_ce_offsetof_count] | ||
143 | movi v17.2d, #0 | 140 | movi v17.2d, #0 |
144 | mov x8, #0x80000000 | 141 | mov x8, #0x80000000 |
145 | movi v18.2d, #0 | 142 | movi v18.2d, #0 |
@@ -148,9 +145,9 @@ CPU_LE( rev32 v19.16b, v19.16b ) | |||
148 | mov x4, #0 | 145 | mov x4, #0 |
149 | mov v19.d[0], xzr | 146 | mov v19.d[0], xzr |
150 | mov v19.d[1], x7 | 147 | mov v19.d[1], x7 |
151 | b 2b | 148 | b 1b |
152 | 149 | ||
153 | /* store new state */ | 150 | /* store new state */ |
154 | 3: stp dga, dgb, [x2] | 151 | 3: stp dga, dgb, [x0] |
155 | ret | 152 | ret |
156 | ENDPROC(sha2_ce_transform) | 153 | ENDPROC(sha2_ce_transform) |
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index ae67e88c28b9..1340e44c048b 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c | |||
@@ -12,206 +12,82 @@ | |||
12 | #include <asm/unaligned.h> | 12 | #include <asm/unaligned.h> |
13 | #include <crypto/internal/hash.h> | 13 | #include <crypto/internal/hash.h> |
14 | #include <crypto/sha.h> | 14 | #include <crypto/sha.h> |
15 | #include <crypto/sha256_base.h> | ||
15 | #include <linux/cpufeature.h> | 16 | #include <linux/cpufeature.h> |
16 | #include <linux/crypto.h> | 17 | #include <linux/crypto.h> |
17 | #include <linux/module.h> | 18 | #include <linux/module.h> |
18 | 19 | ||
20 | #define ASM_EXPORT(sym, val) \ | ||
21 | asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val)); | ||
22 | |||
19 | MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions"); | 23 | MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions"); |
20 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); | 24 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); |
21 | MODULE_LICENSE("GPL v2"); | 25 | MODULE_LICENSE("GPL v2"); |
22 | 26 | ||
23 | asmlinkage int sha2_ce_transform(int blocks, u8 const *src, u32 *state, | 27 | struct sha256_ce_state { |
24 | u8 *head, long bytes); | 28 | struct sha256_state sst; |
25 | 29 | u32 finalize; | |
26 | static int sha224_init(struct shash_desc *desc) | 30 | }; |
27 | { | ||
28 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
29 | |||
30 | *sctx = (struct sha256_state){ | ||
31 | .state = { | ||
32 | SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3, | ||
33 | SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7, | ||
34 | } | ||
35 | }; | ||
36 | return 0; | ||
37 | } | ||
38 | |||
39 | static int sha256_init(struct shash_desc *desc) | ||
40 | { | ||
41 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
42 | |||
43 | *sctx = (struct sha256_state){ | ||
44 | .state = { | ||
45 | SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, | ||
46 | SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7, | ||
47 | } | ||
48 | }; | ||
49 | return 0; | ||
50 | } | ||
51 | |||
52 | static int sha2_update(struct shash_desc *desc, const u8 *data, | ||
53 | unsigned int len) | ||
54 | { | ||
55 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
56 | unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; | ||
57 | |||
58 | sctx->count += len; | ||
59 | |||
60 | if ((partial + len) >= SHA256_BLOCK_SIZE) { | ||
61 | int blocks; | ||
62 | |||
63 | if (partial) { | ||
64 | int p = SHA256_BLOCK_SIZE - partial; | ||
65 | |||
66 | memcpy(sctx->buf + partial, data, p); | ||
67 | data += p; | ||
68 | len -= p; | ||
69 | } | ||
70 | 31 | ||
71 | blocks = len / SHA256_BLOCK_SIZE; | 32 | asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src, |
72 | len %= SHA256_BLOCK_SIZE; | 33 | int blocks); |
73 | 34 | ||
74 | kernel_neon_begin_partial(28); | 35 | static int sha256_ce_update(struct shash_desc *desc, const u8 *data, |
75 | sha2_ce_transform(blocks, data, sctx->state, | 36 | unsigned int len) |
76 | partial ? sctx->buf : NULL, 0); | ||
77 | kernel_neon_end(); | ||
78 | |||
79 | data += blocks * SHA256_BLOCK_SIZE; | ||
80 | partial = 0; | ||
81 | } | ||
82 | if (len) | ||
83 | memcpy(sctx->buf + partial, data, len); | ||
84 | return 0; | ||
85 | } | ||
86 | |||
87 | static void sha2_final(struct shash_desc *desc) | ||
88 | { | 37 | { |
89 | static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; | 38 | struct sha256_ce_state *sctx = shash_desc_ctx(desc); |
90 | |||
91 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
92 | __be64 bits = cpu_to_be64(sctx->count << 3); | ||
93 | u32 padlen = SHA256_BLOCK_SIZE | ||
94 | - ((sctx->count + sizeof(bits)) % SHA256_BLOCK_SIZE); | ||
95 | |||
96 | sha2_update(desc, padding, padlen); | ||
97 | sha2_update(desc, (const u8 *)&bits, sizeof(bits)); | ||
98 | } | ||
99 | |||
100 | static int sha224_final(struct shash_desc *desc, u8 *out) | ||
101 | { | ||
102 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
103 | __be32 *dst = (__be32 *)out; | ||
104 | int i; | ||
105 | |||
106 | sha2_final(desc); | ||
107 | |||
108 | for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++) | ||
109 | put_unaligned_be32(sctx->state[i], dst++); | ||
110 | |||
111 | *sctx = (struct sha256_state){}; | ||
112 | return 0; | ||
113 | } | ||
114 | 39 | ||
115 | static int sha256_final(struct shash_desc *desc, u8 *out) | 40 | sctx->finalize = 0; |
116 | { | 41 | kernel_neon_begin_partial(28); |
117 | struct sha256_state *sctx = shash_desc_ctx(desc); | 42 | sha256_base_do_update(desc, data, len, |
118 | __be32 *dst = (__be32 *)out; | 43 | (sha256_block_fn *)sha2_ce_transform); |
119 | int i; | 44 | kernel_neon_end(); |
120 | |||
121 | sha2_final(desc); | ||
122 | |||
123 | for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++) | ||
124 | put_unaligned_be32(sctx->state[i], dst++); | ||
125 | 45 | ||
126 | *sctx = (struct sha256_state){}; | ||
127 | return 0; | 46 | return 0; |
128 | } | 47 | } |
129 | 48 | ||
130 | static void sha2_finup(struct shash_desc *desc, const u8 *data, | 49 | static int sha256_ce_finup(struct shash_desc *desc, const u8 *data, |
131 | unsigned int len) | 50 | unsigned int len, u8 *out) |
132 | { | 51 | { |
133 | struct sha256_state *sctx = shash_desc_ctx(desc); | 52 | struct sha256_ce_state *sctx = shash_desc_ctx(desc); |
134 | int blocks; | 53 | bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE); |
135 | 54 | ||
136 | if (sctx->count || !len || (len % SHA256_BLOCK_SIZE)) { | 55 | ASM_EXPORT(sha256_ce_offsetof_count, |
137 | sha2_update(desc, data, len); | 56 | offsetof(struct sha256_ce_state, sst.count)); |
138 | sha2_final(desc); | 57 | ASM_EXPORT(sha256_ce_offsetof_finalize, |
139 | return; | 58 | offsetof(struct sha256_ce_state, finalize)); |
140 | } | ||
141 | 59 | ||
142 | /* | 60 | /* |
143 | * Use a fast path if the input is a multiple of 64 bytes. In | 61 | * Allow the asm code to perform the finalization if there is no |
144 | * this case, there is no need to copy data around, and we can | 62 | * partial data and the input is a round multiple of the block size. |
145 | * perform the entire digest calculation in a single invocation | ||
146 | * of sha2_ce_transform() | ||
147 | */ | 63 | */ |
148 | blocks = len / SHA256_BLOCK_SIZE; | 64 | sctx->finalize = finalize; |
149 | 65 | ||
150 | kernel_neon_begin_partial(28); | 66 | kernel_neon_begin_partial(28); |
151 | sha2_ce_transform(blocks, data, sctx->state, NULL, len); | 67 | sha256_base_do_update(desc, data, len, |
68 | (sha256_block_fn *)sha2_ce_transform); | ||
69 | if (!finalize) | ||
70 | sha256_base_do_finalize(desc, | ||
71 | (sha256_block_fn *)sha2_ce_transform); | ||
152 | kernel_neon_end(); | 72 | kernel_neon_end(); |
73 | return sha256_base_finish(desc, out); | ||
153 | } | 74 | } |
154 | 75 | ||
155 | static int sha224_finup(struct shash_desc *desc, const u8 *data, | 76 | static int sha256_ce_final(struct shash_desc *desc, u8 *out) |
156 | unsigned int len, u8 *out) | ||
157 | { | 77 | { |
158 | struct sha256_state *sctx = shash_desc_ctx(desc); | 78 | kernel_neon_begin_partial(28); |
159 | __be32 *dst = (__be32 *)out; | 79 | sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform); |
160 | int i; | 80 | kernel_neon_end(); |
161 | 81 | return sha256_base_finish(desc, out); | |
162 | sha2_finup(desc, data, len); | ||
163 | |||
164 | for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++) | ||
165 | put_unaligned_be32(sctx->state[i], dst++); | ||
166 | |||
167 | *sctx = (struct sha256_state){}; | ||
168 | return 0; | ||
169 | } | ||
170 | |||
171 | static int sha256_finup(struct shash_desc *desc, const u8 *data, | ||
172 | unsigned int len, u8 *out) | ||
173 | { | ||
174 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
175 | __be32 *dst = (__be32 *)out; | ||
176 | int i; | ||
177 | |||
178 | sha2_finup(desc, data, len); | ||
179 | |||
180 | for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++) | ||
181 | put_unaligned_be32(sctx->state[i], dst++); | ||
182 | |||
183 | *sctx = (struct sha256_state){}; | ||
184 | return 0; | ||
185 | } | ||
186 | |||
187 | static int sha2_export(struct shash_desc *desc, void *out) | ||
188 | { | ||
189 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
190 | struct sha256_state *dst = out; | ||
191 | |||
192 | *dst = *sctx; | ||
193 | return 0; | ||
194 | } | ||
195 | |||
196 | static int sha2_import(struct shash_desc *desc, const void *in) | ||
197 | { | ||
198 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
199 | struct sha256_state const *src = in; | ||
200 | |||
201 | *sctx = *src; | ||
202 | return 0; | ||
203 | } | 82 | } |
204 | 83 | ||
205 | static struct shash_alg algs[] = { { | 84 | static struct shash_alg algs[] = { { |
206 | .init = sha224_init, | 85 | .init = sha224_base_init, |
207 | .update = sha2_update, | 86 | .update = sha256_ce_update, |
208 | .final = sha224_final, | 87 | .final = sha256_ce_final, |
209 | .finup = sha224_finup, | 88 | .finup = sha256_ce_finup, |
210 | .export = sha2_export, | 89 | .descsize = sizeof(struct sha256_ce_state), |
211 | .import = sha2_import, | ||
212 | .descsize = sizeof(struct sha256_state), | ||
213 | .digestsize = SHA224_DIGEST_SIZE, | 90 | .digestsize = SHA224_DIGEST_SIZE, |
214 | .statesize = sizeof(struct sha256_state), | ||
215 | .base = { | 91 | .base = { |
216 | .cra_name = "sha224", | 92 | .cra_name = "sha224", |
217 | .cra_driver_name = "sha224-ce", | 93 | .cra_driver_name = "sha224-ce", |
@@ -221,15 +97,12 @@ static struct shash_alg algs[] = { { | |||
221 | .cra_module = THIS_MODULE, | 97 | .cra_module = THIS_MODULE, |
222 | } | 98 | } |
223 | }, { | 99 | }, { |
224 | .init = sha256_init, | 100 | .init = sha256_base_init, |
225 | .update = sha2_update, | 101 | .update = sha256_ce_update, |
226 | .final = sha256_final, | 102 | .final = sha256_ce_final, |
227 | .finup = sha256_finup, | 103 | .finup = sha256_ce_finup, |
228 | .export = sha2_export, | 104 | .descsize = sizeof(struct sha256_ce_state), |
229 | .import = sha2_import, | ||
230 | .descsize = sizeof(struct sha256_state), | ||
231 | .digestsize = SHA256_DIGEST_SIZE, | 105 | .digestsize = SHA256_DIGEST_SIZE, |
232 | .statesize = sizeof(struct sha256_state), | ||
233 | .base = { | 106 | .base = { |
234 | .cra_name = "sha256", | 107 | .cra_name = "sha256", |
235 | .cra_driver_name = "sha256-ce", | 108 | .cra_driver_name = "sha256-ce", |
diff --git a/arch/mips/cavium-octeon/crypto/Makefile b/arch/mips/cavium-octeon/crypto/Makefile index a74f76d85a2f..f7aa9d5d3b87 100644 --- a/arch/mips/cavium-octeon/crypto/Makefile +++ b/arch/mips/cavium-octeon/crypto/Makefile | |||
@@ -4,4 +4,7 @@ | |||
4 | 4 | ||
5 | obj-y += octeon-crypto.o | 5 | obj-y += octeon-crypto.o |
6 | 6 | ||
7 | obj-$(CONFIG_CRYPTO_MD5_OCTEON) += octeon-md5.o | 7 | obj-$(CONFIG_CRYPTO_MD5_OCTEON) += octeon-md5.o |
8 | obj-$(CONFIG_CRYPTO_SHA1_OCTEON) += octeon-sha1.o | ||
9 | obj-$(CONFIG_CRYPTO_SHA256_OCTEON) += octeon-sha256.o | ||
10 | obj-$(CONFIG_CRYPTO_SHA512_OCTEON) += octeon-sha512.o | ||
diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.c b/arch/mips/cavium-octeon/crypto/octeon-crypto.c index 7c82ff463b65..f66bd1adc7ff 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-crypto.c +++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.c | |||
@@ -17,7 +17,7 @@ | |||
17 | * crypto operations in calls to octeon_crypto_enable/disable in order to make | 17 | * crypto operations in calls to octeon_crypto_enable/disable in order to make |
18 | * sure the state of COP2 isn't corrupted if userspace is also performing | 18 | * sure the state of COP2 isn't corrupted if userspace is also performing |
19 | * hardware crypto operations. Allocate the state parameter on the stack. | 19 | * hardware crypto operations. Allocate the state parameter on the stack. |
20 | * Preemption must be disabled to prevent context switches. | 20 | * Returns with preemption disabled. |
21 | * | 21 | * |
22 | * @state: Pointer to state structure to store current COP2 state in. | 22 | * @state: Pointer to state structure to store current COP2 state in. |
23 | * | 23 | * |
@@ -28,6 +28,7 @@ unsigned long octeon_crypto_enable(struct octeon_cop2_state *state) | |||
28 | int status; | 28 | int status; |
29 | unsigned long flags; | 29 | unsigned long flags; |
30 | 30 | ||
31 | preempt_disable(); | ||
31 | local_irq_save(flags); | 32 | local_irq_save(flags); |
32 | status = read_c0_status(); | 33 | status = read_c0_status(); |
33 | write_c0_status(status | ST0_CU2); | 34 | write_c0_status(status | ST0_CU2); |
@@ -62,5 +63,6 @@ void octeon_crypto_disable(struct octeon_cop2_state *state, | |||
62 | else | 63 | else |
63 | write_c0_status(read_c0_status() & ~ST0_CU2); | 64 | write_c0_status(read_c0_status() & ~ST0_CU2); |
64 | local_irq_restore(flags); | 65 | local_irq_restore(flags); |
66 | preempt_enable(); | ||
65 | } | 67 | } |
66 | EXPORT_SYMBOL_GPL(octeon_crypto_disable); | 68 | EXPORT_SYMBOL_GPL(octeon_crypto_disable); |
diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.h b/arch/mips/cavium-octeon/crypto/octeon-crypto.h index e2a4aece9c24..355072535110 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-crypto.h +++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.h | |||
@@ -5,7 +5,8 @@ | |||
5 | * | 5 | * |
6 | * Copyright (C) 2012-2013 Cavium Inc., All Rights Reserved. | 6 | * Copyright (C) 2012-2013 Cavium Inc., All Rights Reserved. |
7 | * | 7 | * |
8 | * MD5 instruction definitions added by Aaro Koskinen <aaro.koskinen@iki.fi>. | 8 | * MD5/SHA1/SHA256/SHA512 instruction definitions added by |
9 | * Aaro Koskinen <aaro.koskinen@iki.fi>. | ||
9 | * | 10 | * |
10 | */ | 11 | */ |
11 | #ifndef __LINUX_OCTEON_CRYPTO_H | 12 | #ifndef __LINUX_OCTEON_CRYPTO_H |
@@ -21,11 +22,11 @@ extern void octeon_crypto_disable(struct octeon_cop2_state *state, | |||
21 | unsigned long flags); | 22 | unsigned long flags); |
22 | 23 | ||
23 | /* | 24 | /* |
24 | * Macros needed to implement MD5: | 25 | * Macros needed to implement MD5/SHA1/SHA256: |
25 | */ | 26 | */ |
26 | 27 | ||
27 | /* | 28 | /* |
28 | * The index can be 0-1. | 29 | * The index can be 0-1 (MD5) or 0-2 (SHA1), 0-3 (SHA256). |
29 | */ | 30 | */ |
30 | #define write_octeon_64bit_hash_dword(value, index) \ | 31 | #define write_octeon_64bit_hash_dword(value, index) \ |
31 | do { \ | 32 | do { \ |
@@ -36,7 +37,7 @@ do { \ | |||
36 | } while (0) | 37 | } while (0) |
37 | 38 | ||
38 | /* | 39 | /* |
39 | * The index can be 0-1. | 40 | * The index can be 0-1 (MD5) or 0-2 (SHA1), 0-3 (SHA256). |
40 | */ | 41 | */ |
41 | #define read_octeon_64bit_hash_dword(index) \ | 42 | #define read_octeon_64bit_hash_dword(index) \ |
42 | ({ \ | 43 | ({ \ |
@@ -72,4 +73,78 @@ do { \ | |||
72 | : [rt] "d" (value)); \ | 73 | : [rt] "d" (value)); \ |
73 | } while (0) | 74 | } while (0) |
74 | 75 | ||
76 | /* | ||
77 | * The value is the final block dword (64-bit). | ||
78 | */ | ||
79 | #define octeon_sha1_start(value) \ | ||
80 | do { \ | ||
81 | __asm__ __volatile__ ( \ | ||
82 | "dmtc2 %[rt],0x4057" \ | ||
83 | : \ | ||
84 | : [rt] "d" (value)); \ | ||
85 | } while (0) | ||
86 | |||
87 | /* | ||
88 | * The value is the final block dword (64-bit). | ||
89 | */ | ||
90 | #define octeon_sha256_start(value) \ | ||
91 | do { \ | ||
92 | __asm__ __volatile__ ( \ | ||
93 | "dmtc2 %[rt],0x404f" \ | ||
94 | : \ | ||
95 | : [rt] "d" (value)); \ | ||
96 | } while (0) | ||
97 | |||
98 | /* | ||
99 | * Macros needed to implement SHA512: | ||
100 | */ | ||
101 | |||
102 | /* | ||
103 | * The index can be 0-7. | ||
104 | */ | ||
105 | #define write_octeon_64bit_hash_sha512(value, index) \ | ||
106 | do { \ | ||
107 | __asm__ __volatile__ ( \ | ||
108 | "dmtc2 %[rt],0x0250+" STR(index) \ | ||
109 | : \ | ||
110 | : [rt] "d" (value)); \ | ||
111 | } while (0) | ||
112 | |||
113 | /* | ||
114 | * The index can be 0-7. | ||
115 | */ | ||
116 | #define read_octeon_64bit_hash_sha512(index) \ | ||
117 | ({ \ | ||
118 | u64 __value; \ | ||
119 | \ | ||
120 | __asm__ __volatile__ ( \ | ||
121 | "dmfc2 %[rt],0x0250+" STR(index) \ | ||
122 | : [rt] "=d" (__value) \ | ||
123 | : ); \ | ||
124 | \ | ||
125 | __value; \ | ||
126 | }) | ||
127 | |||
128 | /* | ||
129 | * The index can be 0-14. | ||
130 | */ | ||
131 | #define write_octeon_64bit_block_sha512(value, index) \ | ||
132 | do { \ | ||
133 | __asm__ __volatile__ ( \ | ||
134 | "dmtc2 %[rt],0x0240+" STR(index) \ | ||
135 | : \ | ||
136 | : [rt] "d" (value)); \ | ||
137 | } while (0) | ||
138 | |||
139 | /* | ||
140 | * The value is the final block word (64-bit). | ||
141 | */ | ||
142 | #define octeon_sha512_start(value) \ | ||
143 | do { \ | ||
144 | __asm__ __volatile__ ( \ | ||
145 | "dmtc2 %[rt],0x424f" \ | ||
146 | : \ | ||
147 | : [rt] "d" (value)); \ | ||
148 | } while (0) | ||
149 | |||
75 | #endif /* __LINUX_OCTEON_CRYPTO_H */ | 150 | #endif /* __LINUX_OCTEON_CRYPTO_H */ |
diff --git a/arch/mips/cavium-octeon/crypto/octeon-md5.c b/arch/mips/cavium-octeon/crypto/octeon-md5.c index b909881ba6c1..12dccdb38286 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-md5.c +++ b/arch/mips/cavium-octeon/crypto/octeon-md5.c | |||
@@ -97,8 +97,6 @@ static int octeon_md5_update(struct shash_desc *desc, const u8 *data, | |||
97 | memcpy((char *)mctx->block + (sizeof(mctx->block) - avail), data, | 97 | memcpy((char *)mctx->block + (sizeof(mctx->block) - avail), data, |
98 | avail); | 98 | avail); |
99 | 99 | ||
100 | local_bh_disable(); | ||
101 | preempt_disable(); | ||
102 | flags = octeon_crypto_enable(&state); | 100 | flags = octeon_crypto_enable(&state); |
103 | octeon_md5_store_hash(mctx); | 101 | octeon_md5_store_hash(mctx); |
104 | 102 | ||
@@ -114,8 +112,6 @@ static int octeon_md5_update(struct shash_desc *desc, const u8 *data, | |||
114 | 112 | ||
115 | octeon_md5_read_hash(mctx); | 113 | octeon_md5_read_hash(mctx); |
116 | octeon_crypto_disable(&state, flags); | 114 | octeon_crypto_disable(&state, flags); |
117 | preempt_enable(); | ||
118 | local_bh_enable(); | ||
119 | 115 | ||
120 | memcpy(mctx->block, data, len); | 116 | memcpy(mctx->block, data, len); |
121 | 117 | ||
@@ -133,8 +129,6 @@ static int octeon_md5_final(struct shash_desc *desc, u8 *out) | |||
133 | 129 | ||
134 | *p++ = 0x80; | 130 | *p++ = 0x80; |
135 | 131 | ||
136 | local_bh_disable(); | ||
137 | preempt_disable(); | ||
138 | flags = octeon_crypto_enable(&state); | 132 | flags = octeon_crypto_enable(&state); |
139 | octeon_md5_store_hash(mctx); | 133 | octeon_md5_store_hash(mctx); |
140 | 134 | ||
@@ -152,8 +146,6 @@ static int octeon_md5_final(struct shash_desc *desc, u8 *out) | |||
152 | 146 | ||
153 | octeon_md5_read_hash(mctx); | 147 | octeon_md5_read_hash(mctx); |
154 | octeon_crypto_disable(&state, flags); | 148 | octeon_crypto_disable(&state, flags); |
155 | preempt_enable(); | ||
156 | local_bh_enable(); | ||
157 | 149 | ||
158 | memcpy(out, mctx->hash, sizeof(mctx->hash)); | 150 | memcpy(out, mctx->hash, sizeof(mctx->hash)); |
159 | memset(mctx, 0, sizeof(*mctx)); | 151 | memset(mctx, 0, sizeof(*mctx)); |
diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha1.c b/arch/mips/cavium-octeon/crypto/octeon-sha1.c new file mode 100644 index 000000000000..2b74b5b67cae --- /dev/null +++ b/arch/mips/cavium-octeon/crypto/octeon-sha1.c | |||
@@ -0,0 +1,241 @@ | |||
1 | /* | ||
2 | * Cryptographic API. | ||
3 | * | ||
4 | * SHA1 Secure Hash Algorithm. | ||
5 | * | ||
6 | * Adapted for OCTEON by Aaro Koskinen <aaro.koskinen@iki.fi>. | ||
7 | * | ||
8 | * Based on crypto/sha1_generic.c, which is: | ||
9 | * | ||
10 | * Copyright (c) Alan Smithee. | ||
11 | * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> | ||
12 | * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> | ||
13 | * | ||
14 | * This program is free software; you can redistribute it and/or modify it | ||
15 | * under the terms of the GNU General Public License as published by the Free | ||
16 | * Software Foundation; either version 2 of the License, or (at your option) | ||
17 | * any later version. | ||
18 | */ | ||
19 | |||
20 | #include <linux/mm.h> | ||
21 | #include <crypto/sha.h> | ||
22 | #include <linux/init.h> | ||
23 | #include <linux/types.h> | ||
24 | #include <linux/module.h> | ||
25 | #include <asm/byteorder.h> | ||
26 | #include <asm/octeon/octeon.h> | ||
27 | #include <crypto/internal/hash.h> | ||
28 | |||
29 | #include "octeon-crypto.h" | ||
30 | |||
31 | /* | ||
32 | * We pass everything as 64-bit. OCTEON can handle misaligned data. | ||
33 | */ | ||
34 | |||
35 | static void octeon_sha1_store_hash(struct sha1_state *sctx) | ||
36 | { | ||
37 | u64 *hash = (u64 *)sctx->state; | ||
38 | union { | ||
39 | u32 word[2]; | ||
40 | u64 dword; | ||
41 | } hash_tail = { { sctx->state[4], } }; | ||
42 | |||
43 | write_octeon_64bit_hash_dword(hash[0], 0); | ||
44 | write_octeon_64bit_hash_dword(hash[1], 1); | ||
45 | write_octeon_64bit_hash_dword(hash_tail.dword, 2); | ||
46 | memzero_explicit(&hash_tail.word[0], sizeof(hash_tail.word[0])); | ||
47 | } | ||
48 | |||
49 | static void octeon_sha1_read_hash(struct sha1_state *sctx) | ||
50 | { | ||
51 | u64 *hash = (u64 *)sctx->state; | ||
52 | union { | ||
53 | u32 word[2]; | ||
54 | u64 dword; | ||
55 | } hash_tail; | ||
56 | |||
57 | hash[0] = read_octeon_64bit_hash_dword(0); | ||
58 | hash[1] = read_octeon_64bit_hash_dword(1); | ||
59 | hash_tail.dword = read_octeon_64bit_hash_dword(2); | ||
60 | sctx->state[4] = hash_tail.word[0]; | ||
61 | memzero_explicit(&hash_tail.dword, sizeof(hash_tail.dword)); | ||
62 | } | ||
63 | |||
64 | static void octeon_sha1_transform(const void *_block) | ||
65 | { | ||
66 | const u64 *block = _block; | ||
67 | |||
68 | write_octeon_64bit_block_dword(block[0], 0); | ||
69 | write_octeon_64bit_block_dword(block[1], 1); | ||
70 | write_octeon_64bit_block_dword(block[2], 2); | ||
71 | write_octeon_64bit_block_dword(block[3], 3); | ||
72 | write_octeon_64bit_block_dword(block[4], 4); | ||
73 | write_octeon_64bit_block_dword(block[5], 5); | ||
74 | write_octeon_64bit_block_dword(block[6], 6); | ||
75 | octeon_sha1_start(block[7]); | ||
76 | } | ||
77 | |||
78 | static int octeon_sha1_init(struct shash_desc *desc) | ||
79 | { | ||
80 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
81 | |||
82 | sctx->state[0] = SHA1_H0; | ||
83 | sctx->state[1] = SHA1_H1; | ||
84 | sctx->state[2] = SHA1_H2; | ||
85 | sctx->state[3] = SHA1_H3; | ||
86 | sctx->state[4] = SHA1_H4; | ||
87 | sctx->count = 0; | ||
88 | |||
89 | return 0; | ||
90 | } | ||
91 | |||
92 | static void __octeon_sha1_update(struct sha1_state *sctx, const u8 *data, | ||
93 | unsigned int len) | ||
94 | { | ||
95 | unsigned int partial; | ||
96 | unsigned int done; | ||
97 | const u8 *src; | ||
98 | |||
99 | partial = sctx->count % SHA1_BLOCK_SIZE; | ||
100 | sctx->count += len; | ||
101 | done = 0; | ||
102 | src = data; | ||
103 | |||
104 | if ((partial + len) >= SHA1_BLOCK_SIZE) { | ||
105 | if (partial) { | ||
106 | done = -partial; | ||
107 | memcpy(sctx->buffer + partial, data, | ||
108 | done + SHA1_BLOCK_SIZE); | ||
109 | src = sctx->buffer; | ||
110 | } | ||
111 | |||
112 | do { | ||
113 | octeon_sha1_transform(src); | ||
114 | done += SHA1_BLOCK_SIZE; | ||
115 | src = data + done; | ||
116 | } while (done + SHA1_BLOCK_SIZE <= len); | ||
117 | |||
118 | partial = 0; | ||
119 | } | ||
120 | memcpy(sctx->buffer + partial, src, len - done); | ||
121 | } | ||
122 | |||
123 | static int octeon_sha1_update(struct shash_desc *desc, const u8 *data, | ||
124 | unsigned int len) | ||
125 | { | ||
126 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
127 | struct octeon_cop2_state state; | ||
128 | unsigned long flags; | ||
129 | |||
130 | /* | ||
131 | * Small updates never reach the crypto engine, so the generic sha1 is | ||
132 | * faster because of the heavyweight octeon_crypto_enable() / | ||
133 | * octeon_crypto_disable(). | ||
134 | */ | ||
135 | if ((sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE) | ||
136 | return crypto_sha1_update(desc, data, len); | ||
137 | |||
138 | flags = octeon_crypto_enable(&state); | ||
139 | octeon_sha1_store_hash(sctx); | ||
140 | |||
141 | __octeon_sha1_update(sctx, data, len); | ||
142 | |||
143 | octeon_sha1_read_hash(sctx); | ||
144 | octeon_crypto_disable(&state, flags); | ||
145 | |||
146 | return 0; | ||
147 | } | ||
148 | |||
149 | static int octeon_sha1_final(struct shash_desc *desc, u8 *out) | ||
150 | { | ||
151 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
152 | static const u8 padding[64] = { 0x80, }; | ||
153 | struct octeon_cop2_state state; | ||
154 | __be32 *dst = (__be32 *)out; | ||
155 | unsigned int pad_len; | ||
156 | unsigned long flags; | ||
157 | unsigned int index; | ||
158 | __be64 bits; | ||
159 | int i; | ||
160 | |||
161 | /* Save number of bits. */ | ||
162 | bits = cpu_to_be64(sctx->count << 3); | ||
163 | |||
164 | /* Pad out to 56 mod 64. */ | ||
165 | index = sctx->count & 0x3f; | ||
166 | pad_len = (index < 56) ? (56 - index) : ((64+56) - index); | ||
167 | |||
168 | flags = octeon_crypto_enable(&state); | ||
169 | octeon_sha1_store_hash(sctx); | ||
170 | |||
171 | __octeon_sha1_update(sctx, padding, pad_len); | ||
172 | |||
173 | /* Append length (before padding). */ | ||
174 | __octeon_sha1_update(sctx, (const u8 *)&bits, sizeof(bits)); | ||
175 | |||
176 | octeon_sha1_read_hash(sctx); | ||
177 | octeon_crypto_disable(&state, flags); | ||
178 | |||
179 | /* Store state in digest */ | ||
180 | for (i = 0; i < 5; i++) | ||
181 | dst[i] = cpu_to_be32(sctx->state[i]); | ||
182 | |||
183 | /* Zeroize sensitive information. */ | ||
184 | memset(sctx, 0, sizeof(*sctx)); | ||
185 | |||
186 | return 0; | ||
187 | } | ||
188 | |||
189 | static int octeon_sha1_export(struct shash_desc *desc, void *out) | ||
190 | { | ||
191 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
192 | |||
193 | memcpy(out, sctx, sizeof(*sctx)); | ||
194 | return 0; | ||
195 | } | ||
196 | |||
197 | static int octeon_sha1_import(struct shash_desc *desc, const void *in) | ||
198 | { | ||
199 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
200 | |||
201 | memcpy(sctx, in, sizeof(*sctx)); | ||
202 | return 0; | ||
203 | } | ||
204 | |||
205 | static struct shash_alg octeon_sha1_alg = { | ||
206 | .digestsize = SHA1_DIGEST_SIZE, | ||
207 | .init = octeon_sha1_init, | ||
208 | .update = octeon_sha1_update, | ||
209 | .final = octeon_sha1_final, | ||
210 | .export = octeon_sha1_export, | ||
211 | .import = octeon_sha1_import, | ||
212 | .descsize = sizeof(struct sha1_state), | ||
213 | .statesize = sizeof(struct sha1_state), | ||
214 | .base = { | ||
215 | .cra_name = "sha1", | ||
216 | .cra_driver_name= "octeon-sha1", | ||
217 | .cra_priority = OCTEON_CR_OPCODE_PRIORITY, | ||
218 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
219 | .cra_blocksize = SHA1_BLOCK_SIZE, | ||
220 | .cra_module = THIS_MODULE, | ||
221 | } | ||
222 | }; | ||
223 | |||
224 | static int __init octeon_sha1_mod_init(void) | ||
225 | { | ||
226 | if (!octeon_has_crypto()) | ||
227 | return -ENOTSUPP; | ||
228 | return crypto_register_shash(&octeon_sha1_alg); | ||
229 | } | ||
230 | |||
231 | static void __exit octeon_sha1_mod_fini(void) | ||
232 | { | ||
233 | crypto_unregister_shash(&octeon_sha1_alg); | ||
234 | } | ||
235 | |||
236 | module_init(octeon_sha1_mod_init); | ||
237 | module_exit(octeon_sha1_mod_fini); | ||
238 | |||
239 | MODULE_LICENSE("GPL"); | ||
240 | MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm (OCTEON)"); | ||
241 | MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>"); | ||
diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha256.c b/arch/mips/cavium-octeon/crypto/octeon-sha256.c new file mode 100644 index 000000000000..97e96fead08a --- /dev/null +++ b/arch/mips/cavium-octeon/crypto/octeon-sha256.c | |||
@@ -0,0 +1,280 @@ | |||
1 | /* | ||
2 | * Cryptographic API. | ||
3 | * | ||
4 | * SHA-224 and SHA-256 Secure Hash Algorithm. | ||
5 | * | ||
6 | * Adapted for OCTEON by Aaro Koskinen <aaro.koskinen@iki.fi>. | ||
7 | * | ||
8 | * Based on crypto/sha256_generic.c, which is: | ||
9 | * | ||
10 | * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com> | ||
11 | * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> | ||
12 | * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> | ||
13 | * SHA224 Support Copyright 2007 Intel Corporation <jonathan.lynch@intel.com> | ||
14 | * | ||
15 | * This program is free software; you can redistribute it and/or modify it | ||
16 | * under the terms of the GNU General Public License as published by the Free | ||
17 | * Software Foundation; either version 2 of the License, or (at your option) | ||
18 | * any later version. | ||
19 | */ | ||
20 | |||
21 | #include <linux/mm.h> | ||
22 | #include <crypto/sha.h> | ||
23 | #include <linux/init.h> | ||
24 | #include <linux/types.h> | ||
25 | #include <linux/module.h> | ||
26 | #include <asm/byteorder.h> | ||
27 | #include <asm/octeon/octeon.h> | ||
28 | #include <crypto/internal/hash.h> | ||
29 | |||
30 | #include "octeon-crypto.h" | ||
31 | |||
32 | /* | ||
33 | * We pass everything as 64-bit. OCTEON can handle misaligned data. | ||
34 | */ | ||
35 | |||
36 | static void octeon_sha256_store_hash(struct sha256_state *sctx) | ||
37 | { | ||
38 | u64 *hash = (u64 *)sctx->state; | ||
39 | |||
40 | write_octeon_64bit_hash_dword(hash[0], 0); | ||
41 | write_octeon_64bit_hash_dword(hash[1], 1); | ||
42 | write_octeon_64bit_hash_dword(hash[2], 2); | ||
43 | write_octeon_64bit_hash_dword(hash[3], 3); | ||
44 | } | ||
45 | |||
46 | static void octeon_sha256_read_hash(struct sha256_state *sctx) | ||
47 | { | ||
48 | u64 *hash = (u64 *)sctx->state; | ||
49 | |||
50 | hash[0] = read_octeon_64bit_hash_dword(0); | ||
51 | hash[1] = read_octeon_64bit_hash_dword(1); | ||
52 | hash[2] = read_octeon_64bit_hash_dword(2); | ||
53 | hash[3] = read_octeon_64bit_hash_dword(3); | ||
54 | } | ||
55 | |||
56 | static void octeon_sha256_transform(const void *_block) | ||
57 | { | ||
58 | const u64 *block = _block; | ||
59 | |||
60 | write_octeon_64bit_block_dword(block[0], 0); | ||
61 | write_octeon_64bit_block_dword(block[1], 1); | ||
62 | write_octeon_64bit_block_dword(block[2], 2); | ||
63 | write_octeon_64bit_block_dword(block[3], 3); | ||
64 | write_octeon_64bit_block_dword(block[4], 4); | ||
65 | write_octeon_64bit_block_dword(block[5], 5); | ||
66 | write_octeon_64bit_block_dword(block[6], 6); | ||
67 | octeon_sha256_start(block[7]); | ||
68 | } | ||
69 | |||
70 | static int octeon_sha224_init(struct shash_desc *desc) | ||
71 | { | ||
72 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
73 | |||
74 | sctx->state[0] = SHA224_H0; | ||
75 | sctx->state[1] = SHA224_H1; | ||
76 | sctx->state[2] = SHA224_H2; | ||
77 | sctx->state[3] = SHA224_H3; | ||
78 | sctx->state[4] = SHA224_H4; | ||
79 | sctx->state[5] = SHA224_H5; | ||
80 | sctx->state[6] = SHA224_H6; | ||
81 | sctx->state[7] = SHA224_H7; | ||
82 | sctx->count = 0; | ||
83 | |||
84 | return 0; | ||
85 | } | ||
86 | |||
87 | static int octeon_sha256_init(struct shash_desc *desc) | ||
88 | { | ||
89 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
90 | |||
91 | sctx->state[0] = SHA256_H0; | ||
92 | sctx->state[1] = SHA256_H1; | ||
93 | sctx->state[2] = SHA256_H2; | ||
94 | sctx->state[3] = SHA256_H3; | ||
95 | sctx->state[4] = SHA256_H4; | ||
96 | sctx->state[5] = SHA256_H5; | ||
97 | sctx->state[6] = SHA256_H6; | ||
98 | sctx->state[7] = SHA256_H7; | ||
99 | sctx->count = 0; | ||
100 | |||
101 | return 0; | ||
102 | } | ||
103 | |||
104 | static void __octeon_sha256_update(struct sha256_state *sctx, const u8 *data, | ||
105 | unsigned int len) | ||
106 | { | ||
107 | unsigned int partial; | ||
108 | unsigned int done; | ||
109 | const u8 *src; | ||
110 | |||
111 | partial = sctx->count % SHA256_BLOCK_SIZE; | ||
112 | sctx->count += len; | ||
113 | done = 0; | ||
114 | src = data; | ||
115 | |||
116 | if ((partial + len) >= SHA256_BLOCK_SIZE) { | ||
117 | if (partial) { | ||
118 | done = -partial; | ||
119 | memcpy(sctx->buf + partial, data, | ||
120 | done + SHA256_BLOCK_SIZE); | ||
121 | src = sctx->buf; | ||
122 | } | ||
123 | |||
124 | do { | ||
125 | octeon_sha256_transform(src); | ||
126 | done += SHA256_BLOCK_SIZE; | ||
127 | src = data + done; | ||
128 | } while (done + SHA256_BLOCK_SIZE <= len); | ||
129 | |||
130 | partial = 0; | ||
131 | } | ||
132 | memcpy(sctx->buf + partial, src, len - done); | ||
133 | } | ||
134 | |||
135 | static int octeon_sha256_update(struct shash_desc *desc, const u8 *data, | ||
136 | unsigned int len) | ||
137 | { | ||
138 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
139 | struct octeon_cop2_state state; | ||
140 | unsigned long flags; | ||
141 | |||
142 | /* | ||
143 | * Small updates never reach the crypto engine, so the generic sha256 is | ||
144 | * faster because of the heavyweight octeon_crypto_enable() / | ||
145 | * octeon_crypto_disable(). | ||
146 | */ | ||
147 | if ((sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE) | ||
148 | return crypto_sha256_update(desc, data, len); | ||
149 | |||
150 | flags = octeon_crypto_enable(&state); | ||
151 | octeon_sha256_store_hash(sctx); | ||
152 | |||
153 | __octeon_sha256_update(sctx, data, len); | ||
154 | |||
155 | octeon_sha256_read_hash(sctx); | ||
156 | octeon_crypto_disable(&state, flags); | ||
157 | |||
158 | return 0; | ||
159 | } | ||
160 | |||
161 | static int octeon_sha256_final(struct shash_desc *desc, u8 *out) | ||
162 | { | ||
163 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
164 | static const u8 padding[64] = { 0x80, }; | ||
165 | struct octeon_cop2_state state; | ||
166 | __be32 *dst = (__be32 *)out; | ||
167 | unsigned int pad_len; | ||
168 | unsigned long flags; | ||
169 | unsigned int index; | ||
170 | __be64 bits; | ||
171 | int i; | ||
172 | |||
173 | /* Save number of bits. */ | ||
174 | bits = cpu_to_be64(sctx->count << 3); | ||
175 | |||
176 | /* Pad out to 56 mod 64. */ | ||
177 | index = sctx->count & 0x3f; | ||
178 | pad_len = (index < 56) ? (56 - index) : ((64+56) - index); | ||
179 | |||
180 | flags = octeon_crypto_enable(&state); | ||
181 | octeon_sha256_store_hash(sctx); | ||
182 | |||
183 | __octeon_sha256_update(sctx, padding, pad_len); | ||
184 | |||
185 | /* Append length (before padding). */ | ||
186 | __octeon_sha256_update(sctx, (const u8 *)&bits, sizeof(bits)); | ||
187 | |||
188 | octeon_sha256_read_hash(sctx); | ||
189 | octeon_crypto_disable(&state, flags); | ||
190 | |||
191 | /* Store state in digest */ | ||
192 | for (i = 0; i < 8; i++) | ||
193 | dst[i] = cpu_to_be32(sctx->state[i]); | ||
194 | |||
195 | /* Zeroize sensitive information. */ | ||
196 | memset(sctx, 0, sizeof(*sctx)); | ||
197 | |||
198 | return 0; | ||
199 | } | ||
200 | |||
201 | static int octeon_sha224_final(struct shash_desc *desc, u8 *hash) | ||
202 | { | ||
203 | u8 D[SHA256_DIGEST_SIZE]; | ||
204 | |||
205 | octeon_sha256_final(desc, D); | ||
206 | |||
207 | memcpy(hash, D, SHA224_DIGEST_SIZE); | ||
208 | memzero_explicit(D, SHA256_DIGEST_SIZE); | ||
209 | |||
210 | return 0; | ||
211 | } | ||
212 | |||
213 | static int octeon_sha256_export(struct shash_desc *desc, void *out) | ||
214 | { | ||
215 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
216 | |||
217 | memcpy(out, sctx, sizeof(*sctx)); | ||
218 | return 0; | ||
219 | } | ||
220 | |||
221 | static int octeon_sha256_import(struct shash_desc *desc, const void *in) | ||
222 | { | ||
223 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
224 | |||
225 | memcpy(sctx, in, sizeof(*sctx)); | ||
226 | return 0; | ||
227 | } | ||
228 | |||
229 | static struct shash_alg octeon_sha256_algs[2] = { { | ||
230 | .digestsize = SHA256_DIGEST_SIZE, | ||
231 | .init = octeon_sha256_init, | ||
232 | .update = octeon_sha256_update, | ||
233 | .final = octeon_sha256_final, | ||
234 | .export = octeon_sha256_export, | ||
235 | .import = octeon_sha256_import, | ||
236 | .descsize = sizeof(struct sha256_state), | ||
237 | .statesize = sizeof(struct sha256_state), | ||
238 | .base = { | ||
239 | .cra_name = "sha256", | ||
240 | .cra_driver_name= "octeon-sha256", | ||
241 | .cra_priority = OCTEON_CR_OPCODE_PRIORITY, | ||
242 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
243 | .cra_blocksize = SHA256_BLOCK_SIZE, | ||
244 | .cra_module = THIS_MODULE, | ||
245 | } | ||
246 | }, { | ||
247 | .digestsize = SHA224_DIGEST_SIZE, | ||
248 | .init = octeon_sha224_init, | ||
249 | .update = octeon_sha256_update, | ||
250 | .final = octeon_sha224_final, | ||
251 | .descsize = sizeof(struct sha256_state), | ||
252 | .base = { | ||
253 | .cra_name = "sha224", | ||
254 | .cra_driver_name= "octeon-sha224", | ||
255 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
256 | .cra_blocksize = SHA224_BLOCK_SIZE, | ||
257 | .cra_module = THIS_MODULE, | ||
258 | } | ||
259 | } }; | ||
260 | |||
261 | static int __init octeon_sha256_mod_init(void) | ||
262 | { | ||
263 | if (!octeon_has_crypto()) | ||
264 | return -ENOTSUPP; | ||
265 | return crypto_register_shashes(octeon_sha256_algs, | ||
266 | ARRAY_SIZE(octeon_sha256_algs)); | ||
267 | } | ||
268 | |||
269 | static void __exit octeon_sha256_mod_fini(void) | ||
270 | { | ||
271 | crypto_unregister_shashes(octeon_sha256_algs, | ||
272 | ARRAY_SIZE(octeon_sha256_algs)); | ||
273 | } | ||
274 | |||
275 | module_init(octeon_sha256_mod_init); | ||
276 | module_exit(octeon_sha256_mod_fini); | ||
277 | |||
278 | MODULE_LICENSE("GPL"); | ||
279 | MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm (OCTEON)"); | ||
280 | MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>"); | ||
diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha512.c b/arch/mips/cavium-octeon/crypto/octeon-sha512.c new file mode 100644 index 000000000000..d5fb3c6f22ae --- /dev/null +++ b/arch/mips/cavium-octeon/crypto/octeon-sha512.c | |||
@@ -0,0 +1,277 @@ | |||
1 | /* | ||
2 | * Cryptographic API. | ||
3 | * | ||
4 | * SHA-512 and SHA-384 Secure Hash Algorithm. | ||
5 | * | ||
6 | * Adapted for OCTEON by Aaro Koskinen <aaro.koskinen@iki.fi>. | ||
7 | * | ||
8 | * Based on crypto/sha512_generic.c, which is: | ||
9 | * | ||
10 | * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com> | ||
11 | * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> | ||
12 | * Copyright (c) 2003 Kyle McMartin <kyle@debian.org> | ||
13 | * | ||
14 | * This program is free software; you can redistribute it and/or modify it | ||
15 | * under the terms of the GNU General Public License as published by the | ||
16 | * Free Software Foundation; either version 2, or (at your option) any | ||
17 | * later version. | ||
18 | */ | ||
19 | |||
20 | #include <linux/mm.h> | ||
21 | #include <crypto/sha.h> | ||
22 | #include <linux/init.h> | ||
23 | #include <linux/types.h> | ||
24 | #include <linux/module.h> | ||
25 | #include <asm/byteorder.h> | ||
26 | #include <asm/octeon/octeon.h> | ||
27 | #include <crypto/internal/hash.h> | ||
28 | |||
29 | #include "octeon-crypto.h" | ||
30 | |||
31 | /* | ||
32 | * We pass everything as 64-bit. OCTEON can handle misaligned data. | ||
33 | */ | ||
34 | |||
35 | static void octeon_sha512_store_hash(struct sha512_state *sctx) | ||
36 | { | ||
37 | write_octeon_64bit_hash_sha512(sctx->state[0], 0); | ||
38 | write_octeon_64bit_hash_sha512(sctx->state[1], 1); | ||
39 | write_octeon_64bit_hash_sha512(sctx->state[2], 2); | ||
40 | write_octeon_64bit_hash_sha512(sctx->state[3], 3); | ||
41 | write_octeon_64bit_hash_sha512(sctx->state[4], 4); | ||
42 | write_octeon_64bit_hash_sha512(sctx->state[5], 5); | ||
43 | write_octeon_64bit_hash_sha512(sctx->state[6], 6); | ||
44 | write_octeon_64bit_hash_sha512(sctx->state[7], 7); | ||
45 | } | ||
46 | |||
47 | static void octeon_sha512_read_hash(struct sha512_state *sctx) | ||
48 | { | ||
49 | sctx->state[0] = read_octeon_64bit_hash_sha512(0); | ||
50 | sctx->state[1] = read_octeon_64bit_hash_sha512(1); | ||
51 | sctx->state[2] = read_octeon_64bit_hash_sha512(2); | ||
52 | sctx->state[3] = read_octeon_64bit_hash_sha512(3); | ||
53 | sctx->state[4] = read_octeon_64bit_hash_sha512(4); | ||
54 | sctx->state[5] = read_octeon_64bit_hash_sha512(5); | ||
55 | sctx->state[6] = read_octeon_64bit_hash_sha512(6); | ||
56 | sctx->state[7] = read_octeon_64bit_hash_sha512(7); | ||
57 | } | ||
58 | |||
59 | static void octeon_sha512_transform(const void *_block) | ||
60 | { | ||
61 | const u64 *block = _block; | ||
62 | |||
63 | write_octeon_64bit_block_sha512(block[0], 0); | ||
64 | write_octeon_64bit_block_sha512(block[1], 1); | ||
65 | write_octeon_64bit_block_sha512(block[2], 2); | ||
66 | write_octeon_64bit_block_sha512(block[3], 3); | ||
67 | write_octeon_64bit_block_sha512(block[4], 4); | ||
68 | write_octeon_64bit_block_sha512(block[5], 5); | ||
69 | write_octeon_64bit_block_sha512(block[6], 6); | ||
70 | write_octeon_64bit_block_sha512(block[7], 7); | ||
71 | write_octeon_64bit_block_sha512(block[8], 8); | ||
72 | write_octeon_64bit_block_sha512(block[9], 9); | ||
73 | write_octeon_64bit_block_sha512(block[10], 10); | ||
74 | write_octeon_64bit_block_sha512(block[11], 11); | ||
75 | write_octeon_64bit_block_sha512(block[12], 12); | ||
76 | write_octeon_64bit_block_sha512(block[13], 13); | ||
77 | write_octeon_64bit_block_sha512(block[14], 14); | ||
78 | octeon_sha512_start(block[15]); | ||
79 | } | ||
80 | |||
81 | static int octeon_sha512_init(struct shash_desc *desc) | ||
82 | { | ||
83 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
84 | |||
85 | sctx->state[0] = SHA512_H0; | ||
86 | sctx->state[1] = SHA512_H1; | ||
87 | sctx->state[2] = SHA512_H2; | ||
88 | sctx->state[3] = SHA512_H3; | ||
89 | sctx->state[4] = SHA512_H4; | ||
90 | sctx->state[5] = SHA512_H5; | ||
91 | sctx->state[6] = SHA512_H6; | ||
92 | sctx->state[7] = SHA512_H7; | ||
93 | sctx->count[0] = sctx->count[1] = 0; | ||
94 | |||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | static int octeon_sha384_init(struct shash_desc *desc) | ||
99 | { | ||
100 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
101 | |||
102 | sctx->state[0] = SHA384_H0; | ||
103 | sctx->state[1] = SHA384_H1; | ||
104 | sctx->state[2] = SHA384_H2; | ||
105 | sctx->state[3] = SHA384_H3; | ||
106 | sctx->state[4] = SHA384_H4; | ||
107 | sctx->state[5] = SHA384_H5; | ||
108 | sctx->state[6] = SHA384_H6; | ||
109 | sctx->state[7] = SHA384_H7; | ||
110 | sctx->count[0] = sctx->count[1] = 0; | ||
111 | |||
112 | return 0; | ||
113 | } | ||
114 | |||
115 | static void __octeon_sha512_update(struct sha512_state *sctx, const u8 *data, | ||
116 | unsigned int len) | ||
117 | { | ||
118 | unsigned int part_len; | ||
119 | unsigned int index; | ||
120 | unsigned int i; | ||
121 | |||
122 | /* Compute number of bytes mod 128. */ | ||
123 | index = sctx->count[0] % SHA512_BLOCK_SIZE; | ||
124 | |||
125 | /* Update number of bytes. */ | ||
126 | if ((sctx->count[0] += len) < len) | ||
127 | sctx->count[1]++; | ||
128 | |||
129 | part_len = SHA512_BLOCK_SIZE - index; | ||
130 | |||
131 | /* Transform as many times as possible. */ | ||
132 | if (len >= part_len) { | ||
133 | memcpy(&sctx->buf[index], data, part_len); | ||
134 | octeon_sha512_transform(sctx->buf); | ||
135 | |||
136 | for (i = part_len; i + SHA512_BLOCK_SIZE <= len; | ||
137 | i += SHA512_BLOCK_SIZE) | ||
138 | octeon_sha512_transform(&data[i]); | ||
139 | |||
140 | index = 0; | ||
141 | } else { | ||
142 | i = 0; | ||
143 | } | ||
144 | |||
145 | /* Buffer remaining input. */ | ||
146 | memcpy(&sctx->buf[index], &data[i], len - i); | ||
147 | } | ||
148 | |||
149 | static int octeon_sha512_update(struct shash_desc *desc, const u8 *data, | ||
150 | unsigned int len) | ||
151 | { | ||
152 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
153 | struct octeon_cop2_state state; | ||
154 | unsigned long flags; | ||
155 | |||
156 | /* | ||
157 | * Small updates never reach the crypto engine, so the generic sha512 is | ||
158 | * faster because of the heavyweight octeon_crypto_enable() / | ||
159 | * octeon_crypto_disable(). | ||
160 | */ | ||
161 | if ((sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE) | ||
162 | return crypto_sha512_update(desc, data, len); | ||
163 | |||
164 | flags = octeon_crypto_enable(&state); | ||
165 | octeon_sha512_store_hash(sctx); | ||
166 | |||
167 | __octeon_sha512_update(sctx, data, len); | ||
168 | |||
169 | octeon_sha512_read_hash(sctx); | ||
170 | octeon_crypto_disable(&state, flags); | ||
171 | |||
172 | return 0; | ||
173 | } | ||
174 | |||
175 | static int octeon_sha512_final(struct shash_desc *desc, u8 *hash) | ||
176 | { | ||
177 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
178 | static u8 padding[128] = { 0x80, }; | ||
179 | struct octeon_cop2_state state; | ||
180 | __be64 *dst = (__be64 *)hash; | ||
181 | unsigned int pad_len; | ||
182 | unsigned long flags; | ||
183 | unsigned int index; | ||
184 | __be64 bits[2]; | ||
185 | int i; | ||
186 | |||
187 | /* Save number of bits. */ | ||
188 | bits[1] = cpu_to_be64(sctx->count[0] << 3); | ||
189 | bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61); | ||
190 | |||
191 | /* Pad out to 112 mod 128. */ | ||
192 | index = sctx->count[0] & 0x7f; | ||
193 | pad_len = (index < 112) ? (112 - index) : ((128+112) - index); | ||
194 | |||
195 | flags = octeon_crypto_enable(&state); | ||
196 | octeon_sha512_store_hash(sctx); | ||
197 | |||
198 | __octeon_sha512_update(sctx, padding, pad_len); | ||
199 | |||
200 | /* Append length (before padding). */ | ||
201 | __octeon_sha512_update(sctx, (const u8 *)bits, sizeof(bits)); | ||
202 | |||
203 | octeon_sha512_read_hash(sctx); | ||
204 | octeon_crypto_disable(&state, flags); | ||
205 | |||
206 | /* Store state in digest. */ | ||
207 | for (i = 0; i < 8; i++) | ||
208 | dst[i] = cpu_to_be64(sctx->state[i]); | ||
209 | |||
210 | /* Zeroize sensitive information. */ | ||
211 | memset(sctx, 0, sizeof(struct sha512_state)); | ||
212 | |||
213 | return 0; | ||
214 | } | ||
215 | |||
216 | static int octeon_sha384_final(struct shash_desc *desc, u8 *hash) | ||
217 | { | ||
218 | u8 D[64]; | ||
219 | |||
220 | octeon_sha512_final(desc, D); | ||
221 | |||
222 | memcpy(hash, D, 48); | ||
223 | memzero_explicit(D, 64); | ||
224 | |||
225 | return 0; | ||
226 | } | ||
227 | |||
228 | static struct shash_alg octeon_sha512_algs[2] = { { | ||
229 | .digestsize = SHA512_DIGEST_SIZE, | ||
230 | .init = octeon_sha512_init, | ||
231 | .update = octeon_sha512_update, | ||
232 | .final = octeon_sha512_final, | ||
233 | .descsize = sizeof(struct sha512_state), | ||
234 | .base = { | ||
235 | .cra_name = "sha512", | ||
236 | .cra_driver_name= "octeon-sha512", | ||
237 | .cra_priority = OCTEON_CR_OPCODE_PRIORITY, | ||
238 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
239 | .cra_blocksize = SHA512_BLOCK_SIZE, | ||
240 | .cra_module = THIS_MODULE, | ||
241 | } | ||
242 | }, { | ||
243 | .digestsize = SHA384_DIGEST_SIZE, | ||
244 | .init = octeon_sha384_init, | ||
245 | .update = octeon_sha512_update, | ||
246 | .final = octeon_sha384_final, | ||
247 | .descsize = sizeof(struct sha512_state), | ||
248 | .base = { | ||
249 | .cra_name = "sha384", | ||
250 | .cra_driver_name= "octeon-sha384", | ||
251 | .cra_priority = OCTEON_CR_OPCODE_PRIORITY, | ||
252 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
253 | .cra_blocksize = SHA384_BLOCK_SIZE, | ||
254 | .cra_module = THIS_MODULE, | ||
255 | } | ||
256 | } }; | ||
257 | |||
258 | static int __init octeon_sha512_mod_init(void) | ||
259 | { | ||
260 | if (!octeon_has_crypto()) | ||
261 | return -ENOTSUPP; | ||
262 | return crypto_register_shashes(octeon_sha512_algs, | ||
263 | ARRAY_SIZE(octeon_sha512_algs)); | ||
264 | } | ||
265 | |||
266 | static void __exit octeon_sha512_mod_fini(void) | ||
267 | { | ||
268 | crypto_unregister_shashes(octeon_sha512_algs, | ||
269 | ARRAY_SIZE(octeon_sha512_algs)); | ||
270 | } | ||
271 | |||
272 | module_init(octeon_sha512_mod_init); | ||
273 | module_exit(octeon_sha512_mod_fini); | ||
274 | |||
275 | MODULE_LICENSE("GPL"); | ||
276 | MODULE_DESCRIPTION("SHA-512 and SHA-384 Secure Hash Algorithms (OCTEON)"); | ||
277 | MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>"); | ||
diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h index 4794067cb5a7..5035f09c5427 100644 --- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h | |||
@@ -1259,20 +1259,6 @@ | |||
1259 | #define M2M_DSTID_REG(x) ((x) * 0x40 + 0x18) | 1259 | #define M2M_DSTID_REG(x) ((x) * 0x40 + 0x18) |
1260 | 1260 | ||
1261 | /************************************************************************* | 1261 | /************************************************************************* |
1262 | * _REG relative to RSET_RNG | ||
1263 | *************************************************************************/ | ||
1264 | |||
1265 | #define RNG_CTRL 0x00 | ||
1266 | #define RNG_EN (1 << 0) | ||
1267 | |||
1268 | #define RNG_STAT 0x04 | ||
1269 | #define RNG_AVAIL_MASK (0xff000000) | ||
1270 | |||
1271 | #define RNG_DATA 0x08 | ||
1272 | #define RNG_THRES 0x0c | ||
1273 | #define RNG_MASK 0x10 | ||
1274 | |||
1275 | /************************************************************************* | ||
1276 | * _REG relative to RSET_SPI | 1262 | * _REG relative to RSET_SPI |
1277 | *************************************************************************/ | 1263 | *************************************************************************/ |
1278 | 1264 | ||
diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile index 2926fb9c570a..9c221b69c181 100644 --- a/arch/powerpc/crypto/Makefile +++ b/arch/powerpc/crypto/Makefile | |||
@@ -4,6 +4,14 @@ | |||
4 | # Arch-specific CryptoAPI modules. | 4 | # Arch-specific CryptoAPI modules. |
5 | # | 5 | # |
6 | 6 | ||
7 | obj-$(CONFIG_CRYPTO_AES_PPC_SPE) += aes-ppc-spe.o | ||
8 | obj-$(CONFIG_CRYPTO_MD5_PPC) += md5-ppc.o | ||
7 | obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o | 9 | obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o |
10 | obj-$(CONFIG_CRYPTO_SHA1_PPC_SPE) += sha1-ppc-spe.o | ||
11 | obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o | ||
8 | 12 | ||
13 | aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o | ||
14 | md5-ppc-y := md5-asm.o md5-glue.o | ||
9 | sha1-powerpc-y := sha1-powerpc-asm.o sha1.o | 15 | sha1-powerpc-y := sha1-powerpc-asm.o sha1.o |
16 | sha1-ppc-spe-y := sha1-spe-asm.o sha1-spe-glue.o | ||
17 | sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o | ||
diff --git a/arch/powerpc/crypto/aes-spe-core.S b/arch/powerpc/crypto/aes-spe-core.S new file mode 100644 index 000000000000..5dc6bce90a77 --- /dev/null +++ b/arch/powerpc/crypto/aes-spe-core.S | |||
@@ -0,0 +1,351 @@ | |||
1 | /* | ||
2 | * Fast AES implementation for SPE instruction set (PPC) | ||
3 | * | ||
4 | * This code makes use of the SPE SIMD instruction set as defined in | ||
5 | * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf | ||
6 | * Implementation is based on optimization guide notes from | ||
7 | * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf | ||
8 | * | ||
9 | * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify it | ||
12 | * under the terms of the GNU General Public License as published by the Free | ||
13 | * Software Foundation; either version 2 of the License, or (at your option) | ||
14 | * any later version. | ||
15 | * | ||
16 | */ | ||
17 | |||
18 | #include <asm/ppc_asm.h> | ||
19 | #include "aes-spe-regs.h" | ||
20 | |||
21 | #define EAD(in, bpos) \ | ||
22 | rlwimi rT0,in,28-((bpos+3)%4)*8,20,27; | ||
23 | |||
24 | #define DAD(in, bpos) \ | ||
25 | rlwimi rT1,in,24-((bpos+3)%4)*8,24,31; | ||
26 | |||
27 | #define LWH(out, off) \ | ||
28 | evlwwsplat out,off(rT0); /* load word high */ | ||
29 | |||
30 | #define LWL(out, off) \ | ||
31 | lwz out,off(rT0); /* load word low */ | ||
32 | |||
33 | #define LBZ(out, tab, off) \ | ||
34 | lbz out,off(tab); /* load byte */ | ||
35 | |||
36 | #define LAH(out, in, bpos, off) \ | ||
37 | EAD(in, bpos) /* calc addr + load word high */ \ | ||
38 | LWH(out, off) | ||
39 | |||
40 | #define LAL(out, in, bpos, off) \ | ||
41 | EAD(in, bpos) /* calc addr + load word low */ \ | ||
42 | LWL(out, off) | ||
43 | |||
44 | #define LAE(out, in, bpos) \ | ||
45 | EAD(in, bpos) /* calc addr + load enc byte */ \ | ||
46 | LBZ(out, rT0, 8) | ||
47 | |||
48 | #define LBE(out) \ | ||
49 | LBZ(out, rT0, 8) /* load enc byte */ | ||
50 | |||
51 | #define LAD(out, in, bpos) \ | ||
52 | DAD(in, bpos) /* calc addr + load dec byte */ \ | ||
53 | LBZ(out, rT1, 0) | ||
54 | |||
55 | #define LBD(out) \ | ||
56 | LBZ(out, rT1, 0) | ||
57 | |||
58 | /* | ||
59 | * ppc_encrypt_block: The central encryption function for a single 16 bytes | ||
60 | * block. It does no stack handling or register saving to support fast calls | ||
61 | * via bl/blr. It expects that caller has pre-xored input data with first | ||
62 | * 4 words of encryption key into rD0-rD3. Pointer/counter registers must | ||
63 | * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3 | ||
64 | * and rW0-rW3 and caller must execute a final xor on the ouput registers. | ||
65 | * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing. | ||
66 | * | ||
67 | */ | ||
68 | _GLOBAL(ppc_encrypt_block) | ||
69 | LAH(rW4, rD1, 2, 4) | ||
70 | LAH(rW6, rD0, 3, 0) | ||
71 | LAH(rW3, rD0, 1, 8) | ||
72 | ppc_encrypt_block_loop: | ||
73 | LAH(rW0, rD3, 0, 12) | ||
74 | LAL(rW0, rD0, 0, 12) | ||
75 | LAH(rW1, rD1, 0, 12) | ||
76 | LAH(rW2, rD2, 1, 8) | ||
77 | LAL(rW2, rD3, 1, 8) | ||
78 | LAL(rW3, rD1, 1, 8) | ||
79 | LAL(rW4, rD2, 2, 4) | ||
80 | LAL(rW6, rD1, 3, 0) | ||
81 | LAH(rW5, rD3, 2, 4) | ||
82 | LAL(rW5, rD0, 2, 4) | ||
83 | LAH(rW7, rD2, 3, 0) | ||
84 | evldw rD1,16(rKP) | ||
85 | EAD(rD3, 3) | ||
86 | evxor rW2,rW2,rW4 | ||
87 | LWL(rW7, 0) | ||
88 | evxor rW2,rW2,rW6 | ||
89 | EAD(rD2, 0) | ||
90 | evxor rD1,rD1,rW2 | ||
91 | LWL(rW1, 12) | ||
92 | evxor rD1,rD1,rW0 | ||
93 | evldw rD3,24(rKP) | ||
94 | evmergehi rD0,rD0,rD1 | ||
95 | EAD(rD1, 2) | ||
96 | evxor rW3,rW3,rW5 | ||
97 | LWH(rW4, 4) | ||
98 | evxor rW3,rW3,rW7 | ||
99 | EAD(rD0, 3) | ||
100 | evxor rD3,rD3,rW3 | ||
101 | LWH(rW6, 0) | ||
102 | evxor rD3,rD3,rW1 | ||
103 | EAD(rD0, 1) | ||
104 | evmergehi rD2,rD2,rD3 | ||
105 | LWH(rW3, 8) | ||
106 | LAH(rW0, rD3, 0, 12) | ||
107 | LAL(rW0, rD0, 0, 12) | ||
108 | LAH(rW1, rD1, 0, 12) | ||
109 | LAH(rW2, rD2, 1, 8) | ||
110 | LAL(rW2, rD3, 1, 8) | ||
111 | LAL(rW3, rD1, 1, 8) | ||
112 | LAL(rW4, rD2, 2, 4) | ||
113 | LAL(rW6, rD1, 3, 0) | ||
114 | LAH(rW5, rD3, 2, 4) | ||
115 | LAL(rW5, rD0, 2, 4) | ||
116 | LAH(rW7, rD2, 3, 0) | ||
117 | evldw rD1,32(rKP) | ||
118 | EAD(rD3, 3) | ||
119 | evxor rW2,rW2,rW4 | ||
120 | LWL(rW7, 0) | ||
121 | evxor rW2,rW2,rW6 | ||
122 | EAD(rD2, 0) | ||
123 | evxor rD1,rD1,rW2 | ||
124 | LWL(rW1, 12) | ||
125 | evxor rD1,rD1,rW0 | ||
126 | evldw rD3,40(rKP) | ||
127 | evmergehi rD0,rD0,rD1 | ||
128 | EAD(rD1, 2) | ||
129 | evxor rW3,rW3,rW5 | ||
130 | LWH(rW4, 4) | ||
131 | evxor rW3,rW3,rW7 | ||
132 | EAD(rD0, 3) | ||
133 | evxor rD3,rD3,rW3 | ||
134 | LWH(rW6, 0) | ||
135 | evxor rD3,rD3,rW1 | ||
136 | EAD(rD0, 1) | ||
137 | evmergehi rD2,rD2,rD3 | ||
138 | LWH(rW3, 8) | ||
139 | addi rKP,rKP,32 | ||
140 | bdnz ppc_encrypt_block_loop | ||
141 | LAH(rW0, rD3, 0, 12) | ||
142 | LAL(rW0, rD0, 0, 12) | ||
143 | LAH(rW1, rD1, 0, 12) | ||
144 | LAH(rW2, rD2, 1, 8) | ||
145 | LAL(rW2, rD3, 1, 8) | ||
146 | LAL(rW3, rD1, 1, 8) | ||
147 | LAL(rW4, rD2, 2, 4) | ||
148 | LAH(rW5, rD3, 2, 4) | ||
149 | LAL(rW6, rD1, 3, 0) | ||
150 | LAL(rW5, rD0, 2, 4) | ||
151 | LAH(rW7, rD2, 3, 0) | ||
152 | evldw rD1,16(rKP) | ||
153 | EAD(rD3, 3) | ||
154 | evxor rW2,rW2,rW4 | ||
155 | LWL(rW7, 0) | ||
156 | evxor rW2,rW2,rW6 | ||
157 | EAD(rD2, 0) | ||
158 | evxor rD1,rD1,rW2 | ||
159 | LWL(rW1, 12) | ||
160 | evxor rD1,rD1,rW0 | ||
161 | evldw rD3,24(rKP) | ||
162 | evmergehi rD0,rD0,rD1 | ||
163 | EAD(rD1, 0) | ||
164 | evxor rW3,rW3,rW5 | ||
165 | LBE(rW2) | ||
166 | evxor rW3,rW3,rW7 | ||
167 | EAD(rD0, 1) | ||
168 | evxor rD3,rD3,rW3 | ||
169 | LBE(rW6) | ||
170 | evxor rD3,rD3,rW1 | ||
171 | EAD(rD0, 0) | ||
172 | evmergehi rD2,rD2,rD3 | ||
173 | LBE(rW1) | ||
174 | LAE(rW0, rD3, 0) | ||
175 | LAE(rW1, rD0, 0) | ||
176 | LAE(rW4, rD2, 1) | ||
177 | LAE(rW5, rD3, 1) | ||
178 | LAE(rW3, rD2, 0) | ||
179 | LAE(rW7, rD1, 1) | ||
180 | rlwimi rW0,rW4,8,16,23 | ||
181 | rlwimi rW1,rW5,8,16,23 | ||
182 | LAE(rW4, rD1, 2) | ||
183 | LAE(rW5, rD2, 2) | ||
184 | rlwimi rW2,rW6,8,16,23 | ||
185 | rlwimi rW3,rW7,8,16,23 | ||
186 | LAE(rW6, rD3, 2) | ||
187 | LAE(rW7, rD0, 2) | ||
188 | rlwimi rW0,rW4,16,8,15 | ||
189 | rlwimi rW1,rW5,16,8,15 | ||
190 | LAE(rW4, rD0, 3) | ||
191 | LAE(rW5, rD1, 3) | ||
192 | rlwimi rW2,rW6,16,8,15 | ||
193 | lwz rD0,32(rKP) | ||
194 | rlwimi rW3,rW7,16,8,15 | ||
195 | lwz rD1,36(rKP) | ||
196 | LAE(rW6, rD2, 3) | ||
197 | LAE(rW7, rD3, 3) | ||
198 | rlwimi rW0,rW4,24,0,7 | ||
199 | lwz rD2,40(rKP) | ||
200 | rlwimi rW1,rW5,24,0,7 | ||
201 | lwz rD3,44(rKP) | ||
202 | rlwimi rW2,rW6,24,0,7 | ||
203 | rlwimi rW3,rW7,24,0,7 | ||
204 | blr | ||
205 | |||
206 | /* | ||
207 | * ppc_decrypt_block: The central decryption function for a single 16 bytes | ||
208 | * block. It does no stack handling or register saving to support fast calls | ||
209 | * via bl/blr. It expects that caller has pre-xored input data with first | ||
210 | * 4 words of encryption key into rD0-rD3. Pointer/counter registers must | ||
211 | * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3 | ||
212 | * and rW0-rW3 and caller must execute a final xor on the ouput registers. | ||
213 | * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing. | ||
214 | * | ||
215 | */ | ||
216 | _GLOBAL(ppc_decrypt_block) | ||
217 | LAH(rW0, rD1, 0, 12) | ||
218 | LAH(rW6, rD0, 3, 0) | ||
219 | LAH(rW3, rD0, 1, 8) | ||
220 | ppc_decrypt_block_loop: | ||
221 | LAH(rW1, rD3, 0, 12) | ||
222 | LAL(rW0, rD2, 0, 12) | ||
223 | LAH(rW2, rD2, 1, 8) | ||
224 | LAL(rW2, rD3, 1, 8) | ||
225 | LAH(rW4, rD3, 2, 4) | ||
226 | LAL(rW4, rD0, 2, 4) | ||
227 | LAL(rW6, rD1, 3, 0) | ||
228 | LAH(rW5, rD1, 2, 4) | ||
229 | LAH(rW7, rD2, 3, 0) | ||
230 | LAL(rW7, rD3, 3, 0) | ||
231 | LAL(rW3, rD1, 1, 8) | ||
232 | evldw rD1,16(rKP) | ||
233 | EAD(rD0, 0) | ||
234 | evxor rW4,rW4,rW6 | ||
235 | LWL(rW1, 12) | ||
236 | evxor rW0,rW0,rW4 | ||
237 | EAD(rD2, 2) | ||
238 | evxor rW0,rW0,rW2 | ||
239 | LWL(rW5, 4) | ||
240 | evxor rD1,rD1,rW0 | ||
241 | evldw rD3,24(rKP) | ||
242 | evmergehi rD0,rD0,rD1 | ||
243 | EAD(rD1, 0) | ||
244 | evxor rW3,rW3,rW7 | ||
245 | LWH(rW0, 12) | ||
246 | evxor rW3,rW3,rW1 | ||
247 | EAD(rD0, 3) | ||
248 | evxor rD3,rD3,rW3 | ||
249 | LWH(rW6, 0) | ||
250 | evxor rD3,rD3,rW5 | ||
251 | EAD(rD0, 1) | ||
252 | evmergehi rD2,rD2,rD3 | ||
253 | LWH(rW3, 8) | ||
254 | LAH(rW1, rD3, 0, 12) | ||
255 | LAL(rW0, rD2, 0, 12) | ||
256 | LAH(rW2, rD2, 1, 8) | ||
257 | LAL(rW2, rD3, 1, 8) | ||
258 | LAH(rW4, rD3, 2, 4) | ||
259 | LAL(rW4, rD0, 2, 4) | ||
260 | LAL(rW6, rD1, 3, 0) | ||
261 | LAH(rW5, rD1, 2, 4) | ||
262 | LAH(rW7, rD2, 3, 0) | ||
263 | LAL(rW7, rD3, 3, 0) | ||
264 | LAL(rW3, rD1, 1, 8) | ||
265 | evldw rD1,32(rKP) | ||
266 | EAD(rD0, 0) | ||
267 | evxor rW4,rW4,rW6 | ||
268 | LWL(rW1, 12) | ||
269 | evxor rW0,rW0,rW4 | ||
270 | EAD(rD2, 2) | ||
271 | evxor rW0,rW0,rW2 | ||
272 | LWL(rW5, 4) | ||
273 | evxor rD1,rD1,rW0 | ||
274 | evldw rD3,40(rKP) | ||
275 | evmergehi rD0,rD0,rD1 | ||
276 | EAD(rD1, 0) | ||
277 | evxor rW3,rW3,rW7 | ||
278 | LWH(rW0, 12) | ||
279 | evxor rW3,rW3,rW1 | ||
280 | EAD(rD0, 3) | ||
281 | evxor rD3,rD3,rW3 | ||
282 | LWH(rW6, 0) | ||
283 | evxor rD3,rD3,rW5 | ||
284 | EAD(rD0, 1) | ||
285 | evmergehi rD2,rD2,rD3 | ||
286 | LWH(rW3, 8) | ||
287 | addi rKP,rKP,32 | ||
288 | bdnz ppc_decrypt_block_loop | ||
289 | LAH(rW1, rD3, 0, 12) | ||
290 | LAL(rW0, rD2, 0, 12) | ||
291 | LAH(rW2, rD2, 1, 8) | ||
292 | LAL(rW2, rD3, 1, 8) | ||
293 | LAH(rW4, rD3, 2, 4) | ||
294 | LAL(rW4, rD0, 2, 4) | ||
295 | LAL(rW6, rD1, 3, 0) | ||
296 | LAH(rW5, rD1, 2, 4) | ||
297 | LAH(rW7, rD2, 3, 0) | ||
298 | LAL(rW7, rD3, 3, 0) | ||
299 | LAL(rW3, rD1, 1, 8) | ||
300 | evldw rD1,16(rKP) | ||
301 | EAD(rD0, 0) | ||
302 | evxor rW4,rW4,rW6 | ||
303 | LWL(rW1, 12) | ||
304 | evxor rW0,rW0,rW4 | ||
305 | EAD(rD2, 2) | ||
306 | evxor rW0,rW0,rW2 | ||
307 | LWL(rW5, 4) | ||
308 | evxor rD1,rD1,rW0 | ||
309 | evldw rD3,24(rKP) | ||
310 | evmergehi rD0,rD0,rD1 | ||
311 | DAD(rD1, 0) | ||
312 | evxor rW3,rW3,rW7 | ||
313 | LBD(rW0) | ||
314 | evxor rW3,rW3,rW1 | ||
315 | DAD(rD0, 1) | ||
316 | evxor rD3,rD3,rW3 | ||
317 | LBD(rW6) | ||
318 | evxor rD3,rD3,rW5 | ||
319 | DAD(rD0, 0) | ||
320 | evmergehi rD2,rD2,rD3 | ||
321 | LBD(rW3) | ||
322 | LAD(rW2, rD3, 0) | ||
323 | LAD(rW1, rD2, 0) | ||
324 | LAD(rW4, rD2, 1) | ||
325 | LAD(rW5, rD3, 1) | ||
326 | LAD(rW7, rD1, 1) | ||
327 | rlwimi rW0,rW4,8,16,23 | ||
328 | rlwimi rW1,rW5,8,16,23 | ||
329 | LAD(rW4, rD3, 2) | ||
330 | LAD(rW5, rD0, 2) | ||
331 | rlwimi rW2,rW6,8,16,23 | ||
332 | rlwimi rW3,rW7,8,16,23 | ||
333 | LAD(rW6, rD1, 2) | ||
334 | LAD(rW7, rD2, 2) | ||
335 | rlwimi rW0,rW4,16,8,15 | ||
336 | rlwimi rW1,rW5,16,8,15 | ||
337 | LAD(rW4, rD0, 3) | ||
338 | LAD(rW5, rD1, 3) | ||
339 | rlwimi rW2,rW6,16,8,15 | ||
340 | lwz rD0,32(rKP) | ||
341 | rlwimi rW3,rW7,16,8,15 | ||
342 | lwz rD1,36(rKP) | ||
343 | LAD(rW6, rD2, 3) | ||
344 | LAD(rW7, rD3, 3) | ||
345 | rlwimi rW0,rW4,24,0,7 | ||
346 | lwz rD2,40(rKP) | ||
347 | rlwimi rW1,rW5,24,0,7 | ||
348 | lwz rD3,44(rKP) | ||
349 | rlwimi rW2,rW6,24,0,7 | ||
350 | rlwimi rW3,rW7,24,0,7 | ||
351 | blr | ||
diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c new file mode 100644 index 000000000000..bd5e63f72ad4 --- /dev/null +++ b/arch/powerpc/crypto/aes-spe-glue.c | |||
@@ -0,0 +1,512 @@ | |||
1 | /* | ||
2 | * Glue code for AES implementation for SPE instructions (PPC) | ||
3 | * | ||
4 | * Based on generic implementation. The assembler module takes care | ||
5 | * about the SPE registers so it can run from interrupt context. | ||
6 | * | ||
7 | * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify it | ||
10 | * under the terms of the GNU General Public License as published by the Free | ||
11 | * Software Foundation; either version 2 of the License, or (at your option) | ||
12 | * any later version. | ||
13 | * | ||
14 | */ | ||
15 | |||
16 | #include <crypto/aes.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/init.h> | ||
19 | #include <linux/types.h> | ||
20 | #include <linux/errno.h> | ||
21 | #include <linux/crypto.h> | ||
22 | #include <asm/byteorder.h> | ||
23 | #include <asm/switch_to.h> | ||
24 | #include <crypto/algapi.h> | ||
25 | |||
26 | /* | ||
27 | * MAX_BYTES defines the number of bytes that are allowed to be processed | ||
28 | * between preempt_disable() and preempt_enable(). e500 cores can issue two | ||
29 | * instructions per clock cycle using one 32/64 bit unit (SU1) and one 32 | ||
30 | * bit unit (SU2). One of these can be a memory access that is executed via | ||
31 | * a single load and store unit (LSU). XTS-AES-256 takes ~780 operations per | ||
32 | * 16 byte block block or 25 cycles per byte. Thus 768 bytes of input data | ||
33 | * will need an estimated maximum of 20,000 cycles. Headroom for cache misses | ||
34 | * included. Even with the low end model clocked at 667 MHz this equals to a | ||
35 | * critical time window of less than 30us. The value has been choosen to | ||
36 | * process a 512 byte disk block in one or a large 1400 bytes IPsec network | ||
37 | * packet in two runs. | ||
38 | * | ||
39 | */ | ||
40 | #define MAX_BYTES 768 | ||
41 | |||
42 | struct ppc_aes_ctx { | ||
43 | u32 key_enc[AES_MAX_KEYLENGTH_U32]; | ||
44 | u32 key_dec[AES_MAX_KEYLENGTH_U32]; | ||
45 | u32 rounds; | ||
46 | }; | ||
47 | |||
48 | struct ppc_xts_ctx { | ||
49 | u32 key_enc[AES_MAX_KEYLENGTH_U32]; | ||
50 | u32 key_dec[AES_MAX_KEYLENGTH_U32]; | ||
51 | u32 key_twk[AES_MAX_KEYLENGTH_U32]; | ||
52 | u32 rounds; | ||
53 | }; | ||
54 | |||
55 | extern void ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc, u32 rounds); | ||
56 | extern void ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec, u32 rounds); | ||
57 | extern void ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc, u32 rounds, | ||
58 | u32 bytes); | ||
59 | extern void ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec, u32 rounds, | ||
60 | u32 bytes); | ||
61 | extern void ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc, u32 rounds, | ||
62 | u32 bytes, u8 *iv); | ||
63 | extern void ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec, u32 rounds, | ||
64 | u32 bytes, u8 *iv); | ||
65 | extern void ppc_crypt_ctr (u8 *out, const u8 *in, u32 *key_enc, u32 rounds, | ||
66 | u32 bytes, u8 *iv); | ||
67 | extern void ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc, u32 rounds, | ||
68 | u32 bytes, u8 *iv, u32 *key_twk); | ||
69 | extern void ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec, u32 rounds, | ||
70 | u32 bytes, u8 *iv, u32 *key_twk); | ||
71 | |||
72 | extern void ppc_expand_key_128(u32 *key_enc, const u8 *key); | ||
73 | extern void ppc_expand_key_192(u32 *key_enc, const u8 *key); | ||
74 | extern void ppc_expand_key_256(u32 *key_enc, const u8 *key); | ||
75 | |||
76 | extern void ppc_generate_decrypt_key(u32 *key_dec,u32 *key_enc, | ||
77 | unsigned int key_len); | ||
78 | |||
79 | static void spe_begin(void) | ||
80 | { | ||
81 | /* disable preemption and save users SPE registers if required */ | ||
82 | preempt_disable(); | ||
83 | enable_kernel_spe(); | ||
84 | } | ||
85 | |||
86 | static void spe_end(void) | ||
87 | { | ||
88 | /* reenable preemption */ | ||
89 | preempt_enable(); | ||
90 | } | ||
91 | |||
92 | static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key, | ||
93 | unsigned int key_len) | ||
94 | { | ||
95 | struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
96 | |||
97 | if (key_len != AES_KEYSIZE_128 && | ||
98 | key_len != AES_KEYSIZE_192 && | ||
99 | key_len != AES_KEYSIZE_256) { | ||
100 | tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
101 | return -EINVAL; | ||
102 | } | ||
103 | |||
104 | switch (key_len) { | ||
105 | case AES_KEYSIZE_128: | ||
106 | ctx->rounds = 4; | ||
107 | ppc_expand_key_128(ctx->key_enc, in_key); | ||
108 | break; | ||
109 | case AES_KEYSIZE_192: | ||
110 | ctx->rounds = 5; | ||
111 | ppc_expand_key_192(ctx->key_enc, in_key); | ||
112 | break; | ||
113 | case AES_KEYSIZE_256: | ||
114 | ctx->rounds = 6; | ||
115 | ppc_expand_key_256(ctx->key_enc, in_key); | ||
116 | break; | ||
117 | } | ||
118 | |||
119 | ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len); | ||
120 | |||
121 | return 0; | ||
122 | } | ||
123 | |||
124 | static int ppc_xts_setkey(struct crypto_tfm *tfm, const u8 *in_key, | ||
125 | unsigned int key_len) | ||
126 | { | ||
127 | struct ppc_xts_ctx *ctx = crypto_tfm_ctx(tfm); | ||
128 | |||
129 | key_len >>= 1; | ||
130 | |||
131 | if (key_len != AES_KEYSIZE_128 && | ||
132 | key_len != AES_KEYSIZE_192 && | ||
133 | key_len != AES_KEYSIZE_256) { | ||
134 | tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
135 | return -EINVAL; | ||
136 | } | ||
137 | |||
138 | switch (key_len) { | ||
139 | case AES_KEYSIZE_128: | ||
140 | ctx->rounds = 4; | ||
141 | ppc_expand_key_128(ctx->key_enc, in_key); | ||
142 | ppc_expand_key_128(ctx->key_twk, in_key + AES_KEYSIZE_128); | ||
143 | break; | ||
144 | case AES_KEYSIZE_192: | ||
145 | ctx->rounds = 5; | ||
146 | ppc_expand_key_192(ctx->key_enc, in_key); | ||
147 | ppc_expand_key_192(ctx->key_twk, in_key + AES_KEYSIZE_192); | ||
148 | break; | ||
149 | case AES_KEYSIZE_256: | ||
150 | ctx->rounds = 6; | ||
151 | ppc_expand_key_256(ctx->key_enc, in_key); | ||
152 | ppc_expand_key_256(ctx->key_twk, in_key + AES_KEYSIZE_256); | ||
153 | break; | ||
154 | } | ||
155 | |||
156 | ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len); | ||
157 | |||
158 | return 0; | ||
159 | } | ||
160 | |||
161 | static void ppc_aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) | ||
162 | { | ||
163 | struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
164 | |||
165 | spe_begin(); | ||
166 | ppc_encrypt_aes(out, in, ctx->key_enc, ctx->rounds); | ||
167 | spe_end(); | ||
168 | } | ||
169 | |||
170 | static void ppc_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) | ||
171 | { | ||
172 | struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
173 | |||
174 | spe_begin(); | ||
175 | ppc_decrypt_aes(out, in, ctx->key_dec, ctx->rounds); | ||
176 | spe_end(); | ||
177 | } | ||
178 | |||
179 | static int ppc_ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
180 | struct scatterlist *src, unsigned int nbytes) | ||
181 | { | ||
182 | struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
183 | struct blkcipher_walk walk; | ||
184 | unsigned int ubytes; | ||
185 | int err; | ||
186 | |||
187 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
188 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
189 | err = blkcipher_walk_virt(desc, &walk); | ||
190 | |||
191 | while ((nbytes = walk.nbytes)) { | ||
192 | ubytes = nbytes > MAX_BYTES ? | ||
193 | nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1); | ||
194 | nbytes -= ubytes; | ||
195 | |||
196 | spe_begin(); | ||
197 | ppc_encrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr, | ||
198 | ctx->key_enc, ctx->rounds, nbytes); | ||
199 | spe_end(); | ||
200 | |||
201 | err = blkcipher_walk_done(desc, &walk, ubytes); | ||
202 | } | ||
203 | |||
204 | return err; | ||
205 | } | ||
206 | |||
207 | static int ppc_ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
208 | struct scatterlist *src, unsigned int nbytes) | ||
209 | { | ||
210 | struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
211 | struct blkcipher_walk walk; | ||
212 | unsigned int ubytes; | ||
213 | int err; | ||
214 | |||
215 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
216 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
217 | err = blkcipher_walk_virt(desc, &walk); | ||
218 | |||
219 | while ((nbytes = walk.nbytes)) { | ||
220 | ubytes = nbytes > MAX_BYTES ? | ||
221 | nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1); | ||
222 | nbytes -= ubytes; | ||
223 | |||
224 | spe_begin(); | ||
225 | ppc_decrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr, | ||
226 | ctx->key_dec, ctx->rounds, nbytes); | ||
227 | spe_end(); | ||
228 | |||
229 | err = blkcipher_walk_done(desc, &walk, ubytes); | ||
230 | } | ||
231 | |||
232 | return err; | ||
233 | } | ||
234 | |||
235 | static int ppc_cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
236 | struct scatterlist *src, unsigned int nbytes) | ||
237 | { | ||
238 | struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
239 | struct blkcipher_walk walk; | ||
240 | unsigned int ubytes; | ||
241 | int err; | ||
242 | |||
243 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
244 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
245 | err = blkcipher_walk_virt(desc, &walk); | ||
246 | |||
247 | while ((nbytes = walk.nbytes)) { | ||
248 | ubytes = nbytes > MAX_BYTES ? | ||
249 | nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1); | ||
250 | nbytes -= ubytes; | ||
251 | |||
252 | spe_begin(); | ||
253 | ppc_encrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr, | ||
254 | ctx->key_enc, ctx->rounds, nbytes, walk.iv); | ||
255 | spe_end(); | ||
256 | |||
257 | err = blkcipher_walk_done(desc, &walk, ubytes); | ||
258 | } | ||
259 | |||
260 | return err; | ||
261 | } | ||
262 | |||
263 | static int ppc_cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
264 | struct scatterlist *src, unsigned int nbytes) | ||
265 | { | ||
266 | struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
267 | struct blkcipher_walk walk; | ||
268 | unsigned int ubytes; | ||
269 | int err; | ||
270 | |||
271 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
272 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
273 | err = blkcipher_walk_virt(desc, &walk); | ||
274 | |||
275 | while ((nbytes = walk.nbytes)) { | ||
276 | ubytes = nbytes > MAX_BYTES ? | ||
277 | nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1); | ||
278 | nbytes -= ubytes; | ||
279 | |||
280 | spe_begin(); | ||
281 | ppc_decrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr, | ||
282 | ctx->key_dec, ctx->rounds, nbytes, walk.iv); | ||
283 | spe_end(); | ||
284 | |||
285 | err = blkcipher_walk_done(desc, &walk, ubytes); | ||
286 | } | ||
287 | |||
288 | return err; | ||
289 | } | ||
290 | |||
291 | static int ppc_ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
292 | struct scatterlist *src, unsigned int nbytes) | ||
293 | { | ||
294 | struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
295 | struct blkcipher_walk walk; | ||
296 | unsigned int pbytes, ubytes; | ||
297 | int err; | ||
298 | |||
299 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
300 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
301 | err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); | ||
302 | |||
303 | while ((pbytes = walk.nbytes)) { | ||
304 | pbytes = pbytes > MAX_BYTES ? MAX_BYTES : pbytes; | ||
305 | pbytes = pbytes == nbytes ? | ||
306 | nbytes : pbytes & ~(AES_BLOCK_SIZE - 1); | ||
307 | ubytes = walk.nbytes - pbytes; | ||
308 | |||
309 | spe_begin(); | ||
310 | ppc_crypt_ctr(walk.dst.virt.addr, walk.src.virt.addr, | ||
311 | ctx->key_enc, ctx->rounds, pbytes , walk.iv); | ||
312 | spe_end(); | ||
313 | |||
314 | nbytes -= pbytes; | ||
315 | err = blkcipher_walk_done(desc, &walk, ubytes); | ||
316 | } | ||
317 | |||
318 | return err; | ||
319 | } | ||
320 | |||
321 | static int ppc_xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
322 | struct scatterlist *src, unsigned int nbytes) | ||
323 | { | ||
324 | struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
325 | struct blkcipher_walk walk; | ||
326 | unsigned int ubytes; | ||
327 | int err; | ||
328 | u32 *twk; | ||
329 | |||
330 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
331 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
332 | err = blkcipher_walk_virt(desc, &walk); | ||
333 | twk = ctx->key_twk; | ||
334 | |||
335 | while ((nbytes = walk.nbytes)) { | ||
336 | ubytes = nbytes > MAX_BYTES ? | ||
337 | nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1); | ||
338 | nbytes -= ubytes; | ||
339 | |||
340 | spe_begin(); | ||
341 | ppc_encrypt_xts(walk.dst.virt.addr, walk.src.virt.addr, | ||
342 | ctx->key_enc, ctx->rounds, nbytes, walk.iv, twk); | ||
343 | spe_end(); | ||
344 | |||
345 | twk = NULL; | ||
346 | err = blkcipher_walk_done(desc, &walk, ubytes); | ||
347 | } | ||
348 | |||
349 | return err; | ||
350 | } | ||
351 | |||
352 | static int ppc_xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
353 | struct scatterlist *src, unsigned int nbytes) | ||
354 | { | ||
355 | struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
356 | struct blkcipher_walk walk; | ||
357 | unsigned int ubytes; | ||
358 | int err; | ||
359 | u32 *twk; | ||
360 | |||
361 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
362 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
363 | err = blkcipher_walk_virt(desc, &walk); | ||
364 | twk = ctx->key_twk; | ||
365 | |||
366 | while ((nbytes = walk.nbytes)) { | ||
367 | ubytes = nbytes > MAX_BYTES ? | ||
368 | nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1); | ||
369 | nbytes -= ubytes; | ||
370 | |||
371 | spe_begin(); | ||
372 | ppc_decrypt_xts(walk.dst.virt.addr, walk.src.virt.addr, | ||
373 | ctx->key_dec, ctx->rounds, nbytes, walk.iv, twk); | ||
374 | spe_end(); | ||
375 | |||
376 | twk = NULL; | ||
377 | err = blkcipher_walk_done(desc, &walk, ubytes); | ||
378 | } | ||
379 | |||
380 | return err; | ||
381 | } | ||
382 | |||
383 | /* | ||
384 | * Algorithm definitions. Disabling alignment (cra_alignmask=0) was chosen | ||
385 | * because the e500 platform can handle unaligned reads/writes very efficently. | ||
386 | * This improves IPsec thoughput by another few percent. Additionally we assume | ||
387 | * that AES context is always aligned to at least 8 bytes because it is created | ||
388 | * with kmalloc() in the crypto infrastructure | ||
389 | * | ||
390 | */ | ||
391 | static struct crypto_alg aes_algs[] = { { | ||
392 | .cra_name = "aes", | ||
393 | .cra_driver_name = "aes-ppc-spe", | ||
394 | .cra_priority = 300, | ||
395 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | ||
396 | .cra_blocksize = AES_BLOCK_SIZE, | ||
397 | .cra_ctxsize = sizeof(struct ppc_aes_ctx), | ||
398 | .cra_alignmask = 0, | ||
399 | .cra_module = THIS_MODULE, | ||
400 | .cra_u = { | ||
401 | .cipher = { | ||
402 | .cia_min_keysize = AES_MIN_KEY_SIZE, | ||
403 | .cia_max_keysize = AES_MAX_KEY_SIZE, | ||
404 | .cia_setkey = ppc_aes_setkey, | ||
405 | .cia_encrypt = ppc_aes_encrypt, | ||
406 | .cia_decrypt = ppc_aes_decrypt | ||
407 | } | ||
408 | } | ||
409 | }, { | ||
410 | .cra_name = "ecb(aes)", | ||
411 | .cra_driver_name = "ecb-ppc-spe", | ||
412 | .cra_priority = 300, | ||
413 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
414 | .cra_blocksize = AES_BLOCK_SIZE, | ||
415 | .cra_ctxsize = sizeof(struct ppc_aes_ctx), | ||
416 | .cra_alignmask = 0, | ||
417 | .cra_type = &crypto_blkcipher_type, | ||
418 | .cra_module = THIS_MODULE, | ||
419 | .cra_u = { | ||
420 | .blkcipher = { | ||
421 | .min_keysize = AES_MIN_KEY_SIZE, | ||
422 | .max_keysize = AES_MAX_KEY_SIZE, | ||
423 | .ivsize = AES_BLOCK_SIZE, | ||
424 | .setkey = ppc_aes_setkey, | ||
425 | .encrypt = ppc_ecb_encrypt, | ||
426 | .decrypt = ppc_ecb_decrypt, | ||
427 | } | ||
428 | } | ||
429 | }, { | ||
430 | .cra_name = "cbc(aes)", | ||
431 | .cra_driver_name = "cbc-ppc-spe", | ||
432 | .cra_priority = 300, | ||
433 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
434 | .cra_blocksize = AES_BLOCK_SIZE, | ||
435 | .cra_ctxsize = sizeof(struct ppc_aes_ctx), | ||
436 | .cra_alignmask = 0, | ||
437 | .cra_type = &crypto_blkcipher_type, | ||
438 | .cra_module = THIS_MODULE, | ||
439 | .cra_u = { | ||
440 | .blkcipher = { | ||
441 | .min_keysize = AES_MIN_KEY_SIZE, | ||
442 | .max_keysize = AES_MAX_KEY_SIZE, | ||
443 | .ivsize = AES_BLOCK_SIZE, | ||
444 | .setkey = ppc_aes_setkey, | ||
445 | .encrypt = ppc_cbc_encrypt, | ||
446 | .decrypt = ppc_cbc_decrypt, | ||
447 | } | ||
448 | } | ||
449 | }, { | ||
450 | .cra_name = "ctr(aes)", | ||
451 | .cra_driver_name = "ctr-ppc-spe", | ||
452 | .cra_priority = 300, | ||
453 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
454 | .cra_blocksize = 1, | ||
455 | .cra_ctxsize = sizeof(struct ppc_aes_ctx), | ||
456 | .cra_alignmask = 0, | ||
457 | .cra_type = &crypto_blkcipher_type, | ||
458 | .cra_module = THIS_MODULE, | ||
459 | .cra_u = { | ||
460 | .blkcipher = { | ||
461 | .min_keysize = AES_MIN_KEY_SIZE, | ||
462 | .max_keysize = AES_MAX_KEY_SIZE, | ||
463 | .ivsize = AES_BLOCK_SIZE, | ||
464 | .setkey = ppc_aes_setkey, | ||
465 | .encrypt = ppc_ctr_crypt, | ||
466 | .decrypt = ppc_ctr_crypt, | ||
467 | } | ||
468 | } | ||
469 | }, { | ||
470 | .cra_name = "xts(aes)", | ||
471 | .cra_driver_name = "xts-ppc-spe", | ||
472 | .cra_priority = 300, | ||
473 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
474 | .cra_blocksize = AES_BLOCK_SIZE, | ||
475 | .cra_ctxsize = sizeof(struct ppc_xts_ctx), | ||
476 | .cra_alignmask = 0, | ||
477 | .cra_type = &crypto_blkcipher_type, | ||
478 | .cra_module = THIS_MODULE, | ||
479 | .cra_u = { | ||
480 | .blkcipher = { | ||
481 | .min_keysize = AES_MIN_KEY_SIZE * 2, | ||
482 | .max_keysize = AES_MAX_KEY_SIZE * 2, | ||
483 | .ivsize = AES_BLOCK_SIZE, | ||
484 | .setkey = ppc_xts_setkey, | ||
485 | .encrypt = ppc_xts_encrypt, | ||
486 | .decrypt = ppc_xts_decrypt, | ||
487 | } | ||
488 | } | ||
489 | } }; | ||
490 | |||
491 | static int __init ppc_aes_mod_init(void) | ||
492 | { | ||
493 | return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs)); | ||
494 | } | ||
495 | |||
496 | static void __exit ppc_aes_mod_fini(void) | ||
497 | { | ||
498 | crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs)); | ||
499 | } | ||
500 | |||
501 | module_init(ppc_aes_mod_init); | ||
502 | module_exit(ppc_aes_mod_fini); | ||
503 | |||
504 | MODULE_LICENSE("GPL"); | ||
505 | MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS, SPE optimized"); | ||
506 | |||
507 | MODULE_ALIAS_CRYPTO("aes"); | ||
508 | MODULE_ALIAS_CRYPTO("ecb(aes)"); | ||
509 | MODULE_ALIAS_CRYPTO("cbc(aes)"); | ||
510 | MODULE_ALIAS_CRYPTO("ctr(aes)"); | ||
511 | MODULE_ALIAS_CRYPTO("xts(aes)"); | ||
512 | MODULE_ALIAS_CRYPTO("aes-ppc-spe"); | ||
diff --git a/arch/powerpc/crypto/aes-spe-keys.S b/arch/powerpc/crypto/aes-spe-keys.S new file mode 100644 index 000000000000..be8090f3d700 --- /dev/null +++ b/arch/powerpc/crypto/aes-spe-keys.S | |||
@@ -0,0 +1,283 @@ | |||
1 | /* | ||
2 | * Key handling functions for PPC AES implementation | ||
3 | * | ||
4 | * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU General Public License as published by the Free | ||
8 | * Software Foundation; either version 2 of the License, or (at your option) | ||
9 | * any later version. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <asm/ppc_asm.h> | ||
14 | |||
15 | #ifdef __BIG_ENDIAN__ | ||
16 | #define LOAD_KEY(d, s, off) \ | ||
17 | lwz d,off(s); | ||
18 | #else | ||
19 | #define LOAD_KEY(d, s, off) \ | ||
20 | li r0,off; \ | ||
21 | lwbrx d,s,r0; | ||
22 | #endif | ||
23 | |||
24 | #define INITIALIZE_KEY \ | ||
25 | stwu r1,-32(r1); /* create stack frame */ \ | ||
26 | stw r14,8(r1); /* save registers */ \ | ||
27 | stw r15,12(r1); \ | ||
28 | stw r16,16(r1); | ||
29 | |||
30 | #define FINALIZE_KEY \ | ||
31 | lwz r14,8(r1); /* restore registers */ \ | ||
32 | lwz r15,12(r1); \ | ||
33 | lwz r16,16(r1); \ | ||
34 | xor r5,r5,r5; /* clear sensitive data */ \ | ||
35 | xor r6,r6,r6; \ | ||
36 | xor r7,r7,r7; \ | ||
37 | xor r8,r8,r8; \ | ||
38 | xor r9,r9,r9; \ | ||
39 | xor r10,r10,r10; \ | ||
40 | xor r11,r11,r11; \ | ||
41 | xor r12,r12,r12; \ | ||
42 | addi r1,r1,32; /* cleanup stack */ | ||
43 | |||
44 | #define LS_BOX(r, t1, t2) \ | ||
45 | lis t2,PPC_AES_4K_ENCTAB@h; \ | ||
46 | ori t2,t2,PPC_AES_4K_ENCTAB@l; \ | ||
47 | rlwimi t2,r,4,20,27; \ | ||
48 | lbz t1,8(t2); \ | ||
49 | rlwimi r,t1,0,24,31; \ | ||
50 | rlwimi t2,r,28,20,27; \ | ||
51 | lbz t1,8(t2); \ | ||
52 | rlwimi r,t1,8,16,23; \ | ||
53 | rlwimi t2,r,20,20,27; \ | ||
54 | lbz t1,8(t2); \ | ||
55 | rlwimi r,t1,16,8,15; \ | ||
56 | rlwimi t2,r,12,20,27; \ | ||
57 | lbz t1,8(t2); \ | ||
58 | rlwimi r,t1,24,0,7; | ||
59 | |||
60 | #define GF8_MUL(out, in, t1, t2) \ | ||
61 | lis t1,0x8080; /* multiplication in GF8 */ \ | ||
62 | ori t1,t1,0x8080; \ | ||
63 | and t1,t1,in; \ | ||
64 | srwi t1,t1,7; \ | ||
65 | mulli t1,t1,0x1b; \ | ||
66 | lis t2,0x7f7f; \ | ||
67 | ori t2,t2,0x7f7f; \ | ||
68 | and t2,t2,in; \ | ||
69 | slwi t2,t2,1; \ | ||
70 | xor out,t1,t2; | ||
71 | |||
72 | /* | ||
73 | * ppc_expand_key_128(u32 *key_enc, const u8 *key) | ||
74 | * | ||
75 | * Expand 128 bit key into 176 bytes encryption key. It consists of | ||
76 | * key itself plus 10 rounds with 16 bytes each | ||
77 | * | ||
78 | */ | ||
79 | _GLOBAL(ppc_expand_key_128) | ||
80 | INITIALIZE_KEY | ||
81 | LOAD_KEY(r5,r4,0) | ||
82 | LOAD_KEY(r6,r4,4) | ||
83 | LOAD_KEY(r7,r4,8) | ||
84 | LOAD_KEY(r8,r4,12) | ||
85 | stw r5,0(r3) /* key[0..3] = input data */ | ||
86 | stw r6,4(r3) | ||
87 | stw r7,8(r3) | ||
88 | stw r8,12(r3) | ||
89 | li r16,10 /* 10 expansion rounds */ | ||
90 | lis r0,0x0100 /* RCO(1) */ | ||
91 | ppc_expand_128_loop: | ||
92 | addi r3,r3,16 | ||
93 | mr r14,r8 /* apply LS_BOX to 4th temp */ | ||
94 | rotlwi r14,r14,8 | ||
95 | LS_BOX(r14, r15, r4) | ||
96 | xor r14,r14,r0 | ||
97 | xor r5,r5,r14 /* xor next 4 keys */ | ||
98 | xor r6,r6,r5 | ||
99 | xor r7,r7,r6 | ||
100 | xor r8,r8,r7 | ||
101 | stw r5,0(r3) /* store next 4 keys */ | ||
102 | stw r6,4(r3) | ||
103 | stw r7,8(r3) | ||
104 | stw r8,12(r3) | ||
105 | GF8_MUL(r0, r0, r4, r14) /* multiply RCO by 2 in GF */ | ||
106 | subi r16,r16,1 | ||
107 | cmpwi r16,0 | ||
108 | bt eq,ppc_expand_128_end | ||
109 | b ppc_expand_128_loop | ||
110 | ppc_expand_128_end: | ||
111 | FINALIZE_KEY | ||
112 | blr | ||
113 | |||
114 | /* | ||
115 | * ppc_expand_key_192(u32 *key_enc, const u8 *key) | ||
116 | * | ||
117 | * Expand 192 bit key into 208 bytes encryption key. It consists of key | ||
118 | * itself plus 12 rounds with 16 bytes each | ||
119 | * | ||
120 | */ | ||
121 | _GLOBAL(ppc_expand_key_192) | ||
122 | INITIALIZE_KEY | ||
123 | LOAD_KEY(r5,r4,0) | ||
124 | LOAD_KEY(r6,r4,4) | ||
125 | LOAD_KEY(r7,r4,8) | ||
126 | LOAD_KEY(r8,r4,12) | ||
127 | LOAD_KEY(r9,r4,16) | ||
128 | LOAD_KEY(r10,r4,20) | ||
129 | stw r5,0(r3) | ||
130 | stw r6,4(r3) | ||
131 | stw r7,8(r3) | ||
132 | stw r8,12(r3) | ||
133 | stw r9,16(r3) | ||
134 | stw r10,20(r3) | ||
135 | li r16,8 /* 8 expansion rounds */ | ||
136 | lis r0,0x0100 /* RCO(1) */ | ||
137 | ppc_expand_192_loop: | ||
138 | addi r3,r3,24 | ||
139 | mr r14,r10 /* apply LS_BOX to 6th temp */ | ||
140 | rotlwi r14,r14,8 | ||
141 | LS_BOX(r14, r15, r4) | ||
142 | xor r14,r14,r0 | ||
143 | xor r5,r5,r14 /* xor next 6 keys */ | ||
144 | xor r6,r6,r5 | ||
145 | xor r7,r7,r6 | ||
146 | xor r8,r8,r7 | ||
147 | xor r9,r9,r8 | ||
148 | xor r10,r10,r9 | ||
149 | stw r5,0(r3) | ||
150 | stw r6,4(r3) | ||
151 | stw r7,8(r3) | ||
152 | stw r8,12(r3) | ||
153 | subi r16,r16,1 | ||
154 | cmpwi r16,0 /* last round early kick out */ | ||
155 | bt eq,ppc_expand_192_end | ||
156 | stw r9,16(r3) | ||
157 | stw r10,20(r3) | ||
158 | GF8_MUL(r0, r0, r4, r14) /* multiply RCO GF8 */ | ||
159 | b ppc_expand_192_loop | ||
160 | ppc_expand_192_end: | ||
161 | FINALIZE_KEY | ||
162 | blr | ||
163 | |||
164 | /* | ||
165 | * ppc_expand_key_256(u32 *key_enc, const u8 *key) | ||
166 | * | ||
167 | * Expand 256 bit key into 240 bytes encryption key. It consists of key | ||
168 | * itself plus 14 rounds with 16 bytes each | ||
169 | * | ||
170 | */ | ||
171 | _GLOBAL(ppc_expand_key_256) | ||
172 | INITIALIZE_KEY | ||
173 | LOAD_KEY(r5,r4,0) | ||
174 | LOAD_KEY(r6,r4,4) | ||
175 | LOAD_KEY(r7,r4,8) | ||
176 | LOAD_KEY(r8,r4,12) | ||
177 | LOAD_KEY(r9,r4,16) | ||
178 | LOAD_KEY(r10,r4,20) | ||
179 | LOAD_KEY(r11,r4,24) | ||
180 | LOAD_KEY(r12,r4,28) | ||
181 | stw r5,0(r3) | ||
182 | stw r6,4(r3) | ||
183 | stw r7,8(r3) | ||
184 | stw r8,12(r3) | ||
185 | stw r9,16(r3) | ||
186 | stw r10,20(r3) | ||
187 | stw r11,24(r3) | ||
188 | stw r12,28(r3) | ||
189 | li r16,7 /* 7 expansion rounds */ | ||
190 | lis r0,0x0100 /* RCO(1) */ | ||
191 | ppc_expand_256_loop: | ||
192 | addi r3,r3,32 | ||
193 | mr r14,r12 /* apply LS_BOX to 8th temp */ | ||
194 | rotlwi r14,r14,8 | ||
195 | LS_BOX(r14, r15, r4) | ||
196 | xor r14,r14,r0 | ||
197 | xor r5,r5,r14 /* xor 4 keys */ | ||
198 | xor r6,r6,r5 | ||
199 | xor r7,r7,r6 | ||
200 | xor r8,r8,r7 | ||
201 | mr r14,r8 | ||
202 | LS_BOX(r14, r15, r4) /* apply LS_BOX to 4th temp */ | ||
203 | xor r9,r9,r14 /* xor 4 keys */ | ||
204 | xor r10,r10,r9 | ||
205 | xor r11,r11,r10 | ||
206 | xor r12,r12,r11 | ||
207 | stw r5,0(r3) | ||
208 | stw r6,4(r3) | ||
209 | stw r7,8(r3) | ||
210 | stw r8,12(r3) | ||
211 | subi r16,r16,1 | ||
212 | cmpwi r16,0 /* last round early kick out */ | ||
213 | bt eq,ppc_expand_256_end | ||
214 | stw r9,16(r3) | ||
215 | stw r10,20(r3) | ||
216 | stw r11,24(r3) | ||
217 | stw r12,28(r3) | ||
218 | GF8_MUL(r0, r0, r4, r14) | ||
219 | b ppc_expand_256_loop | ||
220 | ppc_expand_256_end: | ||
221 | FINALIZE_KEY | ||
222 | blr | ||
223 | |||
224 | /* | ||
225 | * ppc_generate_decrypt_key: derive decryption key from encryption key | ||
226 | * number of bytes to handle are calculated from length of key (16/24/32) | ||
227 | * | ||
228 | */ | ||
229 | _GLOBAL(ppc_generate_decrypt_key) | ||
230 | addi r6,r5,24 | ||
231 | slwi r6,r6,2 | ||
232 | lwzx r7,r4,r6 /* first/last 4 words are same */ | ||
233 | stw r7,0(r3) | ||
234 | lwz r7,0(r4) | ||
235 | stwx r7,r3,r6 | ||
236 | addi r6,r6,4 | ||
237 | lwzx r7,r4,r6 | ||
238 | stw r7,4(r3) | ||
239 | lwz r7,4(r4) | ||
240 | stwx r7,r3,r6 | ||
241 | addi r6,r6,4 | ||
242 | lwzx r7,r4,r6 | ||
243 | stw r7,8(r3) | ||
244 | lwz r7,8(r4) | ||
245 | stwx r7,r3,r6 | ||
246 | addi r6,r6,4 | ||
247 | lwzx r7,r4,r6 | ||
248 | stw r7,12(r3) | ||
249 | lwz r7,12(r4) | ||
250 | stwx r7,r3,r6 | ||
251 | addi r3,r3,16 | ||
252 | add r4,r4,r6 | ||
253 | subi r4,r4,28 | ||
254 | addi r5,r5,20 | ||
255 | srwi r5,r5,2 | ||
256 | ppc_generate_decrypt_block: | ||
257 | li r6,4 | ||
258 | mtctr r6 | ||
259 | ppc_generate_decrypt_word: | ||
260 | lwz r6,0(r4) | ||
261 | GF8_MUL(r7, r6, r0, r7) | ||
262 | GF8_MUL(r8, r7, r0, r8) | ||
263 | GF8_MUL(r9, r8, r0, r9) | ||
264 | xor r10,r9,r6 | ||
265 | xor r11,r7,r8 | ||
266 | xor r11,r11,r9 | ||
267 | xor r12,r7,r10 | ||
268 | rotrwi r12,r12,24 | ||
269 | xor r11,r11,r12 | ||
270 | xor r12,r8,r10 | ||
271 | rotrwi r12,r12,16 | ||
272 | xor r11,r11,r12 | ||
273 | rotrwi r12,r10,8 | ||
274 | xor r11,r11,r12 | ||
275 | stw r11,0(r3) | ||
276 | addi r3,r3,4 | ||
277 | addi r4,r4,4 | ||
278 | bdnz ppc_generate_decrypt_word | ||
279 | subi r4,r4,32 | ||
280 | subi r5,r5,1 | ||
281 | cmpwi r5,0 | ||
282 | bt gt,ppc_generate_decrypt_block | ||
283 | blr | ||
diff --git a/arch/powerpc/crypto/aes-spe-modes.S b/arch/powerpc/crypto/aes-spe-modes.S new file mode 100644 index 000000000000..ad48032ca8e0 --- /dev/null +++ b/arch/powerpc/crypto/aes-spe-modes.S | |||
@@ -0,0 +1,630 @@ | |||
1 | /* | ||
2 | * AES modes (ECB/CBC/CTR/XTS) for PPC AES implementation | ||
3 | * | ||
4 | * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU General Public License as published by the Free | ||
8 | * Software Foundation; either version 2 of the License, or (at your option) | ||
9 | * any later version. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <asm/ppc_asm.h> | ||
14 | #include "aes-spe-regs.h" | ||
15 | |||
16 | #ifdef __BIG_ENDIAN__ /* Macros for big endian builds */ | ||
17 | |||
18 | #define LOAD_DATA(reg, off) \ | ||
19 | lwz reg,off(rSP); /* load with offset */ | ||
20 | #define SAVE_DATA(reg, off) \ | ||
21 | stw reg,off(rDP); /* save with offset */ | ||
22 | #define NEXT_BLOCK \ | ||
23 | addi rSP,rSP,16; /* increment pointers per bloc */ \ | ||
24 | addi rDP,rDP,16; | ||
25 | #define LOAD_IV(reg, off) \ | ||
26 | lwz reg,off(rIP); /* IV loading with offset */ | ||
27 | #define SAVE_IV(reg, off) \ | ||
28 | stw reg,off(rIP); /* IV saving with offset */ | ||
29 | #define START_IV /* nothing to reset */ | ||
30 | #define CBC_DEC 16 /* CBC decrement per block */ | ||
31 | #define CTR_DEC 1 /* CTR decrement one byte */ | ||
32 | |||
33 | #else /* Macros for little endian */ | ||
34 | |||
35 | #define LOAD_DATA(reg, off) \ | ||
36 | lwbrx reg,0,rSP; /* load reversed */ \ | ||
37 | addi rSP,rSP,4; /* and increment pointer */ | ||
38 | #define SAVE_DATA(reg, off) \ | ||
39 | stwbrx reg,0,rDP; /* save reversed */ \ | ||
40 | addi rDP,rDP,4; /* and increment pointer */ | ||
41 | #define NEXT_BLOCK /* nothing todo */ | ||
42 | #define LOAD_IV(reg, off) \ | ||
43 | lwbrx reg,0,rIP; /* load reversed */ \ | ||
44 | addi rIP,rIP,4; /* and increment pointer */ | ||
45 | #define SAVE_IV(reg, off) \ | ||
46 | stwbrx reg,0,rIP; /* load reversed */ \ | ||
47 | addi rIP,rIP,4; /* and increment pointer */ | ||
48 | #define START_IV \ | ||
49 | subi rIP,rIP,16; /* must reset pointer */ | ||
50 | #define CBC_DEC 32 /* 2 blocks because of incs */ | ||
51 | #define CTR_DEC 17 /* 1 block because of incs */ | ||
52 | |||
53 | #endif | ||
54 | |||
55 | #define SAVE_0_REGS | ||
56 | #define LOAD_0_REGS | ||
57 | |||
58 | #define SAVE_4_REGS \ | ||
59 | stw rI0,96(r1); /* save 32 bit registers */ \ | ||
60 | stw rI1,100(r1); \ | ||
61 | stw rI2,104(r1); \ | ||
62 | stw rI3,108(r1); | ||
63 | |||
64 | #define LOAD_4_REGS \ | ||
65 | lwz rI0,96(r1); /* restore 32 bit registers */ \ | ||
66 | lwz rI1,100(r1); \ | ||
67 | lwz rI2,104(r1); \ | ||
68 | lwz rI3,108(r1); | ||
69 | |||
70 | #define SAVE_8_REGS \ | ||
71 | SAVE_4_REGS \ | ||
72 | stw rG0,112(r1); /* save 32 bit registers */ \ | ||
73 | stw rG1,116(r1); \ | ||
74 | stw rG2,120(r1); \ | ||
75 | stw rG3,124(r1); | ||
76 | |||
77 | #define LOAD_8_REGS \ | ||
78 | LOAD_4_REGS \ | ||
79 | lwz rG0,112(r1); /* restore 32 bit registers */ \ | ||
80 | lwz rG1,116(r1); \ | ||
81 | lwz rG2,120(r1); \ | ||
82 | lwz rG3,124(r1); | ||
83 | |||
84 | #define INITIALIZE_CRYPT(tab,nr32bitregs) \ | ||
85 | mflr r0; \ | ||
86 | stwu r1,-160(r1); /* create stack frame */ \ | ||
87 | lis rT0,tab@h; /* en-/decryption table pointer */ \ | ||
88 | stw r0,8(r1); /* save link register */ \ | ||
89 | ori rT0,rT0,tab@l; \ | ||
90 | evstdw r14,16(r1); \ | ||
91 | mr rKS,rKP; \ | ||
92 | evstdw r15,24(r1); /* We must save non volatile */ \ | ||
93 | evstdw r16,32(r1); /* registers. Take the chance */ \ | ||
94 | evstdw r17,40(r1); /* and save the SPE part too */ \ | ||
95 | evstdw r18,48(r1); \ | ||
96 | evstdw r19,56(r1); \ | ||
97 | evstdw r20,64(r1); \ | ||
98 | evstdw r21,72(r1); \ | ||
99 | evstdw r22,80(r1); \ | ||
100 | evstdw r23,88(r1); \ | ||
101 | SAVE_##nr32bitregs##_REGS | ||
102 | |||
103 | #define FINALIZE_CRYPT(nr32bitregs) \ | ||
104 | lwz r0,8(r1); \ | ||
105 | evldw r14,16(r1); /* restore SPE registers */ \ | ||
106 | evldw r15,24(r1); \ | ||
107 | evldw r16,32(r1); \ | ||
108 | evldw r17,40(r1); \ | ||
109 | evldw r18,48(r1); \ | ||
110 | evldw r19,56(r1); \ | ||
111 | evldw r20,64(r1); \ | ||
112 | evldw r21,72(r1); \ | ||
113 | evldw r22,80(r1); \ | ||
114 | evldw r23,88(r1); \ | ||
115 | LOAD_##nr32bitregs##_REGS \ | ||
116 | mtlr r0; /* restore link register */ \ | ||
117 | xor r0,r0,r0; \ | ||
118 | stw r0,16(r1); /* delete sensitive data */ \ | ||
119 | stw r0,24(r1); /* that we might have pushed */ \ | ||
120 | stw r0,32(r1); /* from other context that runs */ \ | ||
121 | stw r0,40(r1); /* the same code */ \ | ||
122 | stw r0,48(r1); \ | ||
123 | stw r0,56(r1); \ | ||
124 | stw r0,64(r1); \ | ||
125 | stw r0,72(r1); \ | ||
126 | stw r0,80(r1); \ | ||
127 | stw r0,88(r1); \ | ||
128 | addi r1,r1,160; /* cleanup stack frame */ | ||
129 | |||
130 | #define ENDIAN_SWAP(t0, t1, s0, s1) \ | ||
131 | rotrwi t0,s0,8; /* swap endianness for 2 GPRs */ \ | ||
132 | rotrwi t1,s1,8; \ | ||
133 | rlwimi t0,s0,8,8,15; \ | ||
134 | rlwimi t1,s1,8,8,15; \ | ||
135 | rlwimi t0,s0,8,24,31; \ | ||
136 | rlwimi t1,s1,8,24,31; | ||
137 | |||
138 | #define GF128_MUL(d0, d1, d2, d3, t0) \ | ||
139 | li t0,0x87; /* multiplication in GF128 */ \ | ||
140 | cmpwi d3,-1; \ | ||
141 | iselgt t0,0,t0; \ | ||
142 | rlwimi d3,d2,0,0,0; /* propagate "carry" bits */ \ | ||
143 | rotlwi d3,d3,1; \ | ||
144 | rlwimi d2,d1,0,0,0; \ | ||
145 | rotlwi d2,d2,1; \ | ||
146 | rlwimi d1,d0,0,0,0; \ | ||
147 | slwi d0,d0,1; /* shift left 128 bit */ \ | ||
148 | rotlwi d1,d1,1; \ | ||
149 | xor d0,d0,t0; | ||
150 | |||
151 | #define START_KEY(d0, d1, d2, d3) \ | ||
152 | lwz rW0,0(rKP); \ | ||
153 | mtctr rRR; \ | ||
154 | lwz rW1,4(rKP); \ | ||
155 | lwz rW2,8(rKP); \ | ||
156 | lwz rW3,12(rKP); \ | ||
157 | xor rD0,d0,rW0; \ | ||
158 | xor rD1,d1,rW1; \ | ||
159 | xor rD2,d2,rW2; \ | ||
160 | xor rD3,d3,rW3; | ||
161 | |||
162 | /* | ||
163 | * ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc, | ||
164 | * u32 rounds) | ||
165 | * | ||
166 | * called from glue layer to encrypt a single 16 byte block | ||
167 | * round values are AES128 = 4, AES192 = 5, AES256 = 6 | ||
168 | * | ||
169 | */ | ||
170 | _GLOBAL(ppc_encrypt_aes) | ||
171 | INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0) | ||
172 | LOAD_DATA(rD0, 0) | ||
173 | LOAD_DATA(rD1, 4) | ||
174 | LOAD_DATA(rD2, 8) | ||
175 | LOAD_DATA(rD3, 12) | ||
176 | START_KEY(rD0, rD1, rD2, rD3) | ||
177 | bl ppc_encrypt_block | ||
178 | xor rD0,rD0,rW0 | ||
179 | SAVE_DATA(rD0, 0) | ||
180 | xor rD1,rD1,rW1 | ||
181 | SAVE_DATA(rD1, 4) | ||
182 | xor rD2,rD2,rW2 | ||
183 | SAVE_DATA(rD2, 8) | ||
184 | xor rD3,rD3,rW3 | ||
185 | SAVE_DATA(rD3, 12) | ||
186 | FINALIZE_CRYPT(0) | ||
187 | blr | ||
188 | |||
189 | /* | ||
190 | * ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec, | ||
191 | * u32 rounds) | ||
192 | * | ||
193 | * called from glue layer to decrypt a single 16 byte block | ||
194 | * round values are AES128 = 4, AES192 = 5, AES256 = 6 | ||
195 | * | ||
196 | */ | ||
197 | _GLOBAL(ppc_decrypt_aes) | ||
198 | INITIALIZE_CRYPT(PPC_AES_4K_DECTAB,0) | ||
199 | LOAD_DATA(rD0, 0) | ||
200 | addi rT1,rT0,4096 | ||
201 | LOAD_DATA(rD1, 4) | ||
202 | LOAD_DATA(rD2, 8) | ||
203 | LOAD_DATA(rD3, 12) | ||
204 | START_KEY(rD0, rD1, rD2, rD3) | ||
205 | bl ppc_decrypt_block | ||
206 | xor rD0,rD0,rW0 | ||
207 | SAVE_DATA(rD0, 0) | ||
208 | xor rD1,rD1,rW1 | ||
209 | SAVE_DATA(rD1, 4) | ||
210 | xor rD2,rD2,rW2 | ||
211 | SAVE_DATA(rD2, 8) | ||
212 | xor rD3,rD3,rW3 | ||
213 | SAVE_DATA(rD3, 12) | ||
214 | FINALIZE_CRYPT(0) | ||
215 | blr | ||
216 | |||
217 | /* | ||
218 | * ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc, | ||
219 | * u32 rounds, u32 bytes); | ||
220 | * | ||
221 | * called from glue layer to encrypt multiple blocks via ECB | ||
222 | * Bytes must be larger or equal 16 and only whole blocks are | ||
223 | * processed. round values are AES128 = 4, AES192 = 5 and | ||
224 | * AES256 = 6 | ||
225 | * | ||
226 | */ | ||
227 | _GLOBAL(ppc_encrypt_ecb) | ||
228 | INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0) | ||
229 | ppc_encrypt_ecb_loop: | ||
230 | LOAD_DATA(rD0, 0) | ||
231 | mr rKP,rKS | ||
232 | LOAD_DATA(rD1, 4) | ||
233 | subi rLN,rLN,16 | ||
234 | LOAD_DATA(rD2, 8) | ||
235 | cmpwi rLN,15 | ||
236 | LOAD_DATA(rD3, 12) | ||
237 | START_KEY(rD0, rD1, rD2, rD3) | ||
238 | bl ppc_encrypt_block | ||
239 | xor rD0,rD0,rW0 | ||
240 | SAVE_DATA(rD0, 0) | ||
241 | xor rD1,rD1,rW1 | ||
242 | SAVE_DATA(rD1, 4) | ||
243 | xor rD2,rD2,rW2 | ||
244 | SAVE_DATA(rD2, 8) | ||
245 | xor rD3,rD3,rW3 | ||
246 | SAVE_DATA(rD3, 12) | ||
247 | NEXT_BLOCK | ||
248 | bt gt,ppc_encrypt_ecb_loop | ||
249 | FINALIZE_CRYPT(0) | ||
250 | blr | ||
251 | |||
252 | /* | ||
253 | * ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec, | ||
254 | * u32 rounds, u32 bytes); | ||
255 | * | ||
256 | * called from glue layer to decrypt multiple blocks via ECB | ||
257 | * Bytes must be larger or equal 16 and only whole blocks are | ||
258 | * processed. round values are AES128 = 4, AES192 = 5 and | ||
259 | * AES256 = 6 | ||
260 | * | ||
261 | */ | ||
262 | _GLOBAL(ppc_decrypt_ecb) | ||
263 | INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 0) | ||
264 | addi rT1,rT0,4096 | ||
265 | ppc_decrypt_ecb_loop: | ||
266 | LOAD_DATA(rD0, 0) | ||
267 | mr rKP,rKS | ||
268 | LOAD_DATA(rD1, 4) | ||
269 | subi rLN,rLN,16 | ||
270 | LOAD_DATA(rD2, 8) | ||
271 | cmpwi rLN,15 | ||
272 | LOAD_DATA(rD3, 12) | ||
273 | START_KEY(rD0, rD1, rD2, rD3) | ||
274 | bl ppc_decrypt_block | ||
275 | xor rD0,rD0,rW0 | ||
276 | SAVE_DATA(rD0, 0) | ||
277 | xor rD1,rD1,rW1 | ||
278 | SAVE_DATA(rD1, 4) | ||
279 | xor rD2,rD2,rW2 | ||
280 | SAVE_DATA(rD2, 8) | ||
281 | xor rD3,rD3,rW3 | ||
282 | SAVE_DATA(rD3, 12) | ||
283 | NEXT_BLOCK | ||
284 | bt gt,ppc_decrypt_ecb_loop | ||
285 | FINALIZE_CRYPT(0) | ||
286 | blr | ||
287 | |||
288 | /* | ||
289 | * ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc, | ||
290 | * 32 rounds, u32 bytes, u8 *iv); | ||
291 | * | ||
292 | * called from glue layer to encrypt multiple blocks via CBC | ||
293 | * Bytes must be larger or equal 16 and only whole blocks are | ||
294 | * processed. round values are AES128 = 4, AES192 = 5 and | ||
295 | * AES256 = 6 | ||
296 | * | ||
297 | */ | ||
298 | _GLOBAL(ppc_encrypt_cbc) | ||
299 | INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4) | ||
300 | LOAD_IV(rI0, 0) | ||
301 | LOAD_IV(rI1, 4) | ||
302 | LOAD_IV(rI2, 8) | ||
303 | LOAD_IV(rI3, 12) | ||
304 | ppc_encrypt_cbc_loop: | ||
305 | LOAD_DATA(rD0, 0) | ||
306 | mr rKP,rKS | ||
307 | LOAD_DATA(rD1, 4) | ||
308 | subi rLN,rLN,16 | ||
309 | LOAD_DATA(rD2, 8) | ||
310 | cmpwi rLN,15 | ||
311 | LOAD_DATA(rD3, 12) | ||
312 | xor rD0,rD0,rI0 | ||
313 | xor rD1,rD1,rI1 | ||
314 | xor rD2,rD2,rI2 | ||
315 | xor rD3,rD3,rI3 | ||
316 | START_KEY(rD0, rD1, rD2, rD3) | ||
317 | bl ppc_encrypt_block | ||
318 | xor rI0,rD0,rW0 | ||
319 | SAVE_DATA(rI0, 0) | ||
320 | xor rI1,rD1,rW1 | ||
321 | SAVE_DATA(rI1, 4) | ||
322 | xor rI2,rD2,rW2 | ||
323 | SAVE_DATA(rI2, 8) | ||
324 | xor rI3,rD3,rW3 | ||
325 | SAVE_DATA(rI3, 12) | ||
326 | NEXT_BLOCK | ||
327 | bt gt,ppc_encrypt_cbc_loop | ||
328 | START_IV | ||
329 | SAVE_IV(rI0, 0) | ||
330 | SAVE_IV(rI1, 4) | ||
331 | SAVE_IV(rI2, 8) | ||
332 | SAVE_IV(rI3, 12) | ||
333 | FINALIZE_CRYPT(4) | ||
334 | blr | ||
335 | |||
336 | /* | ||
337 | * ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec, | ||
338 | * u32 rounds, u32 bytes, u8 *iv); | ||
339 | * | ||
340 | * called from glue layer to decrypt multiple blocks via CBC | ||
341 | * round values are AES128 = 4, AES192 = 5, AES256 = 6 | ||
342 | * | ||
343 | */ | ||
344 | _GLOBAL(ppc_decrypt_cbc) | ||
345 | INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 4) | ||
346 | li rT1,15 | ||
347 | LOAD_IV(rI0, 0) | ||
348 | andc rLN,rLN,rT1 | ||
349 | LOAD_IV(rI1, 4) | ||
350 | subi rLN,rLN,16 | ||
351 | LOAD_IV(rI2, 8) | ||
352 | add rSP,rSP,rLN /* reverse processing */ | ||
353 | LOAD_IV(rI3, 12) | ||
354 | add rDP,rDP,rLN | ||
355 | LOAD_DATA(rD0, 0) | ||
356 | addi rT1,rT0,4096 | ||
357 | LOAD_DATA(rD1, 4) | ||
358 | LOAD_DATA(rD2, 8) | ||
359 | LOAD_DATA(rD3, 12) | ||
360 | START_IV | ||
361 | SAVE_IV(rD0, 0) | ||
362 | SAVE_IV(rD1, 4) | ||
363 | SAVE_IV(rD2, 8) | ||
364 | cmpwi rLN,16 | ||
365 | SAVE_IV(rD3, 12) | ||
366 | bt lt,ppc_decrypt_cbc_end | ||
367 | ppc_decrypt_cbc_loop: | ||
368 | mr rKP,rKS | ||
369 | START_KEY(rD0, rD1, rD2, rD3) | ||
370 | bl ppc_decrypt_block | ||
371 | subi rLN,rLN,16 | ||
372 | subi rSP,rSP,CBC_DEC | ||
373 | xor rW0,rD0,rW0 | ||
374 | LOAD_DATA(rD0, 0) | ||
375 | xor rW1,rD1,rW1 | ||
376 | LOAD_DATA(rD1, 4) | ||
377 | xor rW2,rD2,rW2 | ||
378 | LOAD_DATA(rD2, 8) | ||
379 | xor rW3,rD3,rW3 | ||
380 | LOAD_DATA(rD3, 12) | ||
381 | xor rW0,rW0,rD0 | ||
382 | SAVE_DATA(rW0, 0) | ||
383 | xor rW1,rW1,rD1 | ||
384 | SAVE_DATA(rW1, 4) | ||
385 | xor rW2,rW2,rD2 | ||
386 | SAVE_DATA(rW2, 8) | ||
387 | xor rW3,rW3,rD3 | ||
388 | SAVE_DATA(rW3, 12) | ||
389 | cmpwi rLN,15 | ||
390 | subi rDP,rDP,CBC_DEC | ||
391 | bt gt,ppc_decrypt_cbc_loop | ||
392 | ppc_decrypt_cbc_end: | ||
393 | mr rKP,rKS | ||
394 | START_KEY(rD0, rD1, rD2, rD3) | ||
395 | bl ppc_decrypt_block | ||
396 | xor rW0,rW0,rD0 | ||
397 | xor rW1,rW1,rD1 | ||
398 | xor rW2,rW2,rD2 | ||
399 | xor rW3,rW3,rD3 | ||
400 | xor rW0,rW0,rI0 /* decrypt with initial IV */ | ||
401 | SAVE_DATA(rW0, 0) | ||
402 | xor rW1,rW1,rI1 | ||
403 | SAVE_DATA(rW1, 4) | ||
404 | xor rW2,rW2,rI2 | ||
405 | SAVE_DATA(rW2, 8) | ||
406 | xor rW3,rW3,rI3 | ||
407 | SAVE_DATA(rW3, 12) | ||
408 | FINALIZE_CRYPT(4) | ||
409 | blr | ||
410 | |||
411 | /* | ||
412 | * ppc_crypt_ctr(u8 *out, const u8 *in, u32 *key_enc, | ||
413 | * u32 rounds, u32 bytes, u8 *iv); | ||
414 | * | ||
415 | * called from glue layer to encrypt/decrypt multiple blocks | ||
416 | * via CTR. Number of bytes does not need to be a multiple of | ||
417 | * 16. Round values are AES128 = 4, AES192 = 5, AES256 = 6 | ||
418 | * | ||
419 | */ | ||
420 | _GLOBAL(ppc_crypt_ctr) | ||
421 | INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4) | ||
422 | LOAD_IV(rI0, 0) | ||
423 | LOAD_IV(rI1, 4) | ||
424 | LOAD_IV(rI2, 8) | ||
425 | cmpwi rLN,16 | ||
426 | LOAD_IV(rI3, 12) | ||
427 | START_IV | ||
428 | bt lt,ppc_crypt_ctr_partial | ||
429 | ppc_crypt_ctr_loop: | ||
430 | mr rKP,rKS | ||
431 | START_KEY(rI0, rI1, rI2, rI3) | ||
432 | bl ppc_encrypt_block | ||
433 | xor rW0,rD0,rW0 | ||
434 | xor rW1,rD1,rW1 | ||
435 | xor rW2,rD2,rW2 | ||
436 | xor rW3,rD3,rW3 | ||
437 | LOAD_DATA(rD0, 0) | ||
438 | subi rLN,rLN,16 | ||
439 | LOAD_DATA(rD1, 4) | ||
440 | LOAD_DATA(rD2, 8) | ||
441 | LOAD_DATA(rD3, 12) | ||
442 | xor rD0,rD0,rW0 | ||
443 | SAVE_DATA(rD0, 0) | ||
444 | xor rD1,rD1,rW1 | ||
445 | SAVE_DATA(rD1, 4) | ||
446 | xor rD2,rD2,rW2 | ||
447 | SAVE_DATA(rD2, 8) | ||
448 | xor rD3,rD3,rW3 | ||
449 | SAVE_DATA(rD3, 12) | ||
450 | addic rI3,rI3,1 /* increase counter */ | ||
451 | addze rI2,rI2 | ||
452 | addze rI1,rI1 | ||
453 | addze rI0,rI0 | ||
454 | NEXT_BLOCK | ||
455 | cmpwi rLN,15 | ||
456 | bt gt,ppc_crypt_ctr_loop | ||
457 | ppc_crypt_ctr_partial: | ||
458 | cmpwi rLN,0 | ||
459 | bt eq,ppc_crypt_ctr_end | ||
460 | mr rKP,rKS | ||
461 | START_KEY(rI0, rI1, rI2, rI3) | ||
462 | bl ppc_encrypt_block | ||
463 | xor rW0,rD0,rW0 | ||
464 | SAVE_IV(rW0, 0) | ||
465 | xor rW1,rD1,rW1 | ||
466 | SAVE_IV(rW1, 4) | ||
467 | xor rW2,rD2,rW2 | ||
468 | SAVE_IV(rW2, 8) | ||
469 | xor rW3,rD3,rW3 | ||
470 | SAVE_IV(rW3, 12) | ||
471 | mtctr rLN | ||
472 | subi rIP,rIP,CTR_DEC | ||
473 | subi rSP,rSP,1 | ||
474 | subi rDP,rDP,1 | ||
475 | ppc_crypt_ctr_xorbyte: | ||
476 | lbzu rW4,1(rIP) /* bytewise xor for partial block */ | ||
477 | lbzu rW5,1(rSP) | ||
478 | xor rW4,rW4,rW5 | ||
479 | stbu rW4,1(rDP) | ||
480 | bdnz ppc_crypt_ctr_xorbyte | ||
481 | subf rIP,rLN,rIP | ||
482 | addi rIP,rIP,1 | ||
483 | addic rI3,rI3,1 | ||
484 | addze rI2,rI2 | ||
485 | addze rI1,rI1 | ||
486 | addze rI0,rI0 | ||
487 | ppc_crypt_ctr_end: | ||
488 | SAVE_IV(rI0, 0) | ||
489 | SAVE_IV(rI1, 4) | ||
490 | SAVE_IV(rI2, 8) | ||
491 | SAVE_IV(rI3, 12) | ||
492 | FINALIZE_CRYPT(4) | ||
493 | blr | ||
494 | |||
495 | /* | ||
496 | * ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc, | ||
497 | * u32 rounds, u32 bytes, u8 *iv, u32 *key_twk); | ||
498 | * | ||
499 | * called from glue layer to encrypt multiple blocks via XTS | ||
500 | * If key_twk is given, the initial IV encryption will be | ||
501 | * processed too. Round values are AES128 = 4, AES192 = 5, | ||
502 | * AES256 = 6 | ||
503 | * | ||
504 | */ | ||
505 | _GLOBAL(ppc_encrypt_xts) | ||
506 | INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 8) | ||
507 | LOAD_IV(rI0, 0) | ||
508 | LOAD_IV(rI1, 4) | ||
509 | LOAD_IV(rI2, 8) | ||
510 | cmpwi rKT,0 | ||
511 | LOAD_IV(rI3, 12) | ||
512 | bt eq,ppc_encrypt_xts_notweak | ||
513 | mr rKP,rKT | ||
514 | START_KEY(rI0, rI1, rI2, rI3) | ||
515 | bl ppc_encrypt_block | ||
516 | xor rI0,rD0,rW0 | ||
517 | xor rI1,rD1,rW1 | ||
518 | xor rI2,rD2,rW2 | ||
519 | xor rI3,rD3,rW3 | ||
520 | ppc_encrypt_xts_notweak: | ||
521 | ENDIAN_SWAP(rG0, rG1, rI0, rI1) | ||
522 | ENDIAN_SWAP(rG2, rG3, rI2, rI3) | ||
523 | ppc_encrypt_xts_loop: | ||
524 | LOAD_DATA(rD0, 0) | ||
525 | mr rKP,rKS | ||
526 | LOAD_DATA(rD1, 4) | ||
527 | subi rLN,rLN,16 | ||
528 | LOAD_DATA(rD2, 8) | ||
529 | LOAD_DATA(rD3, 12) | ||
530 | xor rD0,rD0,rI0 | ||
531 | xor rD1,rD1,rI1 | ||
532 | xor rD2,rD2,rI2 | ||
533 | xor rD3,rD3,rI3 | ||
534 | START_KEY(rD0, rD1, rD2, rD3) | ||
535 | bl ppc_encrypt_block | ||
536 | xor rD0,rD0,rW0 | ||
537 | xor rD1,rD1,rW1 | ||
538 | xor rD2,rD2,rW2 | ||
539 | xor rD3,rD3,rW3 | ||
540 | xor rD0,rD0,rI0 | ||
541 | SAVE_DATA(rD0, 0) | ||
542 | xor rD1,rD1,rI1 | ||
543 | SAVE_DATA(rD1, 4) | ||
544 | xor rD2,rD2,rI2 | ||
545 | SAVE_DATA(rD2, 8) | ||
546 | xor rD3,rD3,rI3 | ||
547 | SAVE_DATA(rD3, 12) | ||
548 | GF128_MUL(rG0, rG1, rG2, rG3, rW0) | ||
549 | ENDIAN_SWAP(rI0, rI1, rG0, rG1) | ||
550 | ENDIAN_SWAP(rI2, rI3, rG2, rG3) | ||
551 | cmpwi rLN,0 | ||
552 | NEXT_BLOCK | ||
553 | bt gt,ppc_encrypt_xts_loop | ||
554 | START_IV | ||
555 | SAVE_IV(rI0, 0) | ||
556 | SAVE_IV(rI1, 4) | ||
557 | SAVE_IV(rI2, 8) | ||
558 | SAVE_IV(rI3, 12) | ||
559 | FINALIZE_CRYPT(8) | ||
560 | blr | ||
561 | |||
562 | /* | ||
563 | * ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec, | ||
564 | * u32 rounds, u32 blocks, u8 *iv, u32 *key_twk); | ||
565 | * | ||
566 | * called from glue layer to decrypt multiple blocks via XTS | ||
567 | * If key_twk is given, the initial IV encryption will be | ||
568 | * processed too. Round values are AES128 = 4, AES192 = 5, | ||
569 | * AES256 = 6 | ||
570 | * | ||
571 | */ | ||
572 | _GLOBAL(ppc_decrypt_xts) | ||
573 | INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 8) | ||
574 | LOAD_IV(rI0, 0) | ||
575 | addi rT1,rT0,4096 | ||
576 | LOAD_IV(rI1, 4) | ||
577 | LOAD_IV(rI2, 8) | ||
578 | cmpwi rKT,0 | ||
579 | LOAD_IV(rI3, 12) | ||
580 | bt eq,ppc_decrypt_xts_notweak | ||
581 | subi rT0,rT0,4096 | ||
582 | mr rKP,rKT | ||
583 | START_KEY(rI0, rI1, rI2, rI3) | ||
584 | bl ppc_encrypt_block | ||
585 | xor rI0,rD0,rW0 | ||
586 | xor rI1,rD1,rW1 | ||
587 | xor rI2,rD2,rW2 | ||
588 | xor rI3,rD3,rW3 | ||
589 | addi rT0,rT0,4096 | ||
590 | ppc_decrypt_xts_notweak: | ||
591 | ENDIAN_SWAP(rG0, rG1, rI0, rI1) | ||
592 | ENDIAN_SWAP(rG2, rG3, rI2, rI3) | ||
593 | ppc_decrypt_xts_loop: | ||
594 | LOAD_DATA(rD0, 0) | ||
595 | mr rKP,rKS | ||
596 | LOAD_DATA(rD1, 4) | ||
597 | subi rLN,rLN,16 | ||
598 | LOAD_DATA(rD2, 8) | ||
599 | LOAD_DATA(rD3, 12) | ||
600 | xor rD0,rD0,rI0 | ||
601 | xor rD1,rD1,rI1 | ||
602 | xor rD2,rD2,rI2 | ||
603 | xor rD3,rD3,rI3 | ||
604 | START_KEY(rD0, rD1, rD2, rD3) | ||
605 | bl ppc_decrypt_block | ||
606 | xor rD0,rD0,rW0 | ||
607 | xor rD1,rD1,rW1 | ||
608 | xor rD2,rD2,rW2 | ||
609 | xor rD3,rD3,rW3 | ||
610 | xor rD0,rD0,rI0 | ||
611 | SAVE_DATA(rD0, 0) | ||
612 | xor rD1,rD1,rI1 | ||
613 | SAVE_DATA(rD1, 4) | ||
614 | xor rD2,rD2,rI2 | ||
615 | SAVE_DATA(rD2, 8) | ||
616 | xor rD3,rD3,rI3 | ||
617 | SAVE_DATA(rD3, 12) | ||
618 | GF128_MUL(rG0, rG1, rG2, rG3, rW0) | ||
619 | ENDIAN_SWAP(rI0, rI1, rG0, rG1) | ||
620 | ENDIAN_SWAP(rI2, rI3, rG2, rG3) | ||
621 | cmpwi rLN,0 | ||
622 | NEXT_BLOCK | ||
623 | bt gt,ppc_decrypt_xts_loop | ||
624 | START_IV | ||
625 | SAVE_IV(rI0, 0) | ||
626 | SAVE_IV(rI1, 4) | ||
627 | SAVE_IV(rI2, 8) | ||
628 | SAVE_IV(rI3, 12) | ||
629 | FINALIZE_CRYPT(8) | ||
630 | blr | ||
diff --git a/arch/powerpc/crypto/aes-spe-regs.h b/arch/powerpc/crypto/aes-spe-regs.h new file mode 100644 index 000000000000..30d217b399c3 --- /dev/null +++ b/arch/powerpc/crypto/aes-spe-regs.h | |||
@@ -0,0 +1,42 @@ | |||
1 | /* | ||
2 | * Common registers for PPC AES implementation | ||
3 | * | ||
4 | * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU General Public License as published by the Free | ||
8 | * Software Foundation; either version 2 of the License, or (at your option) | ||
9 | * any later version. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #define rKS r0 /* copy of en-/decryption key pointer */ | ||
14 | #define rDP r3 /* destination pointer */ | ||
15 | #define rSP r4 /* source pointer */ | ||
16 | #define rKP r5 /* pointer to en-/decryption key pointer */ | ||
17 | #define rRR r6 /* en-/decryption rounds */ | ||
18 | #define rLN r7 /* length of data to be processed */ | ||
19 | #define rIP r8 /* potiner to IV (CBC/CTR/XTS modes) */ | ||
20 | #define rKT r9 /* pointer to tweak key (XTS mode) */ | ||
21 | #define rT0 r11 /* pointers to en-/decrpytion tables */ | ||
22 | #define rT1 r10 | ||
23 | #define rD0 r9 /* data */ | ||
24 | #define rD1 r14 | ||
25 | #define rD2 r12 | ||
26 | #define rD3 r15 | ||
27 | #define rW0 r16 /* working registers */ | ||
28 | #define rW1 r17 | ||
29 | #define rW2 r18 | ||
30 | #define rW3 r19 | ||
31 | #define rW4 r20 | ||
32 | #define rW5 r21 | ||
33 | #define rW6 r22 | ||
34 | #define rW7 r23 | ||
35 | #define rI0 r24 /* IV */ | ||
36 | #define rI1 r25 | ||
37 | #define rI2 r26 | ||
38 | #define rI3 r27 | ||
39 | #define rG0 r28 /* endian reversed tweak (XTS mode) */ | ||
40 | #define rG1 r29 | ||
41 | #define rG2 r30 | ||
42 | #define rG3 r31 | ||
diff --git a/arch/powerpc/crypto/aes-tab-4k.S b/arch/powerpc/crypto/aes-tab-4k.S new file mode 100644 index 000000000000..701e60240dc3 --- /dev/null +++ b/arch/powerpc/crypto/aes-tab-4k.S | |||
@@ -0,0 +1,331 @@ | |||
1 | /* | ||
2 | * 4K AES tables for PPC AES implementation | ||
3 | * | ||
4 | * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU General Public License as published by the Free | ||
8 | * Software Foundation; either version 2 of the License, or (at your option) | ||
9 | * any later version. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | /* | ||
14 | * These big endian AES encryption/decryption tables have been taken from | ||
15 | * crypto/aes_generic.c and are designed to be simply accessed by a combination | ||
16 | * of rlwimi/lwz instructions with a minimum of table registers (usually only | ||
17 | * one required). Thus they are aligned to 4K. The locality of rotated values | ||
18 | * is derived from the reduced offsets that are available in the SPE load | ||
19 | * instructions. E.g. evldw, evlwwsplat, ... | ||
20 | * | ||
21 | * For the safety-conscious it has to be noted that they might be vulnerable | ||
22 | * to cache timing attacks because of their size. Nevertheless in contrast to | ||
23 | * the generic tables they have been reduced from 16KB to 8KB + 256 bytes. | ||
24 | * This is a quite good tradeoff for low power devices (e.g. routers) without | ||
25 | * dedicated encryption hardware where we usually have no multiuser | ||
26 | * environment. | ||
27 | * | ||
28 | */ | ||
29 | |||
30 | #define R(a, b, c, d) \ | ||
31 | 0x##a##b##c##d, 0x##d##a##b##c, 0x##c##d##a##b, 0x##b##c##d##a | ||
32 | |||
33 | .data | ||
34 | .align 12 | ||
35 | .globl PPC_AES_4K_ENCTAB | ||
36 | PPC_AES_4K_ENCTAB: | ||
37 | /* encryption table, same as crypto_ft_tab in crypto/aes-generic.c */ | ||
38 | .long R(c6, 63, 63, a5), R(f8, 7c, 7c, 84) | ||
39 | .long R(ee, 77, 77, 99), R(f6, 7b, 7b, 8d) | ||
40 | .long R(ff, f2, f2, 0d), R(d6, 6b, 6b, bd) | ||
41 | .long R(de, 6f, 6f, b1), R(91, c5, c5, 54) | ||
42 | .long R(60, 30, 30, 50), R(02, 01, 01, 03) | ||
43 | .long R(ce, 67, 67, a9), R(56, 2b, 2b, 7d) | ||
44 | .long R(e7, fe, fe, 19), R(b5, d7, d7, 62) | ||
45 | .long R(4d, ab, ab, e6), R(ec, 76, 76, 9a) | ||
46 | .long R(8f, ca, ca, 45), R(1f, 82, 82, 9d) | ||
47 | .long R(89, c9, c9, 40), R(fa, 7d, 7d, 87) | ||
48 | .long R(ef, fa, fa, 15), R(b2, 59, 59, eb) | ||
49 | .long R(8e, 47, 47, c9), R(fb, f0, f0, 0b) | ||
50 | .long R(41, ad, ad, ec), R(b3, d4, d4, 67) | ||
51 | .long R(5f, a2, a2, fd), R(45, af, af, ea) | ||
52 | .long R(23, 9c, 9c, bf), R(53, a4, a4, f7) | ||
53 | .long R(e4, 72, 72, 96), R(9b, c0, c0, 5b) | ||
54 | .long R(75, b7, b7, c2), R(e1, fd, fd, 1c) | ||
55 | .long R(3d, 93, 93, ae), R(4c, 26, 26, 6a) | ||
56 | .long R(6c, 36, 36, 5a), R(7e, 3f, 3f, 41) | ||
57 | .long R(f5, f7, f7, 02), R(83, cc, cc, 4f) | ||
58 | .long R(68, 34, 34, 5c), R(51, a5, a5, f4) | ||
59 | .long R(d1, e5, e5, 34), R(f9, f1, f1, 08) | ||
60 | .long R(e2, 71, 71, 93), R(ab, d8, d8, 73) | ||
61 | .long R(62, 31, 31, 53), R(2a, 15, 15, 3f) | ||
62 | .long R(08, 04, 04, 0c), R(95, c7, c7, 52) | ||
63 | .long R(46, 23, 23, 65), R(9d, c3, c3, 5e) | ||
64 | .long R(30, 18, 18, 28), R(37, 96, 96, a1) | ||
65 | .long R(0a, 05, 05, 0f), R(2f, 9a, 9a, b5) | ||
66 | .long R(0e, 07, 07, 09), R(24, 12, 12, 36) | ||
67 | .long R(1b, 80, 80, 9b), R(df, e2, e2, 3d) | ||
68 | .long R(cd, eb, eb, 26), R(4e, 27, 27, 69) | ||
69 | .long R(7f, b2, b2, cd), R(ea, 75, 75, 9f) | ||
70 | .long R(12, 09, 09, 1b), R(1d, 83, 83, 9e) | ||
71 | .long R(58, 2c, 2c, 74), R(34, 1a, 1a, 2e) | ||
72 | .long R(36, 1b, 1b, 2d), R(dc, 6e, 6e, b2) | ||
73 | .long R(b4, 5a, 5a, ee), R(5b, a0, a0, fb) | ||
74 | .long R(a4, 52, 52, f6), R(76, 3b, 3b, 4d) | ||
75 | .long R(b7, d6, d6, 61), R(7d, b3, b3, ce) | ||
76 | .long R(52, 29, 29, 7b), R(dd, e3, e3, 3e) | ||
77 | .long R(5e, 2f, 2f, 71), R(13, 84, 84, 97) | ||
78 | .long R(a6, 53, 53, f5), R(b9, d1, d1, 68) | ||
79 | .long R(00, 00, 00, 00), R(c1, ed, ed, 2c) | ||
80 | .long R(40, 20, 20, 60), R(e3, fc, fc, 1f) | ||
81 | .long R(79, b1, b1, c8), R(b6, 5b, 5b, ed) | ||
82 | .long R(d4, 6a, 6a, be), R(8d, cb, cb, 46) | ||
83 | .long R(67, be, be, d9), R(72, 39, 39, 4b) | ||
84 | .long R(94, 4a, 4a, de), R(98, 4c, 4c, d4) | ||
85 | .long R(b0, 58, 58, e8), R(85, cf, cf, 4a) | ||
86 | .long R(bb, d0, d0, 6b), R(c5, ef, ef, 2a) | ||
87 | .long R(4f, aa, aa, e5), R(ed, fb, fb, 16) | ||
88 | .long R(86, 43, 43, c5), R(9a, 4d, 4d, d7) | ||
89 | .long R(66, 33, 33, 55), R(11, 85, 85, 94) | ||
90 | .long R(8a, 45, 45, cf), R(e9, f9, f9, 10) | ||
91 | .long R(04, 02, 02, 06), R(fe, 7f, 7f, 81) | ||
92 | .long R(a0, 50, 50, f0), R(78, 3c, 3c, 44) | ||
93 | .long R(25, 9f, 9f, ba), R(4b, a8, a8, e3) | ||
94 | .long R(a2, 51, 51, f3), R(5d, a3, a3, fe) | ||
95 | .long R(80, 40, 40, c0), R(05, 8f, 8f, 8a) | ||
96 | .long R(3f, 92, 92, ad), R(21, 9d, 9d, bc) | ||
97 | .long R(70, 38, 38, 48), R(f1, f5, f5, 04) | ||
98 | .long R(63, bc, bc, df), R(77, b6, b6, c1) | ||
99 | .long R(af, da, da, 75), R(42, 21, 21, 63) | ||
100 | .long R(20, 10, 10, 30), R(e5, ff, ff, 1a) | ||
101 | .long R(fd, f3, f3, 0e), R(bf, d2, d2, 6d) | ||
102 | .long R(81, cd, cd, 4c), R(18, 0c, 0c, 14) | ||
103 | .long R(26, 13, 13, 35), R(c3, ec, ec, 2f) | ||
104 | .long R(be, 5f, 5f, e1), R(35, 97, 97, a2) | ||
105 | .long R(88, 44, 44, cc), R(2e, 17, 17, 39) | ||
106 | .long R(93, c4, c4, 57), R(55, a7, a7, f2) | ||
107 | .long R(fc, 7e, 7e, 82), R(7a, 3d, 3d, 47) | ||
108 | .long R(c8, 64, 64, ac), R(ba, 5d, 5d, e7) | ||
109 | .long R(32, 19, 19, 2b), R(e6, 73, 73, 95) | ||
110 | .long R(c0, 60, 60, a0), R(19, 81, 81, 98) | ||
111 | .long R(9e, 4f, 4f, d1), R(a3, dc, dc, 7f) | ||
112 | .long R(44, 22, 22, 66), R(54, 2a, 2a, 7e) | ||
113 | .long R(3b, 90, 90, ab), R(0b, 88, 88, 83) | ||
114 | .long R(8c, 46, 46, ca), R(c7, ee, ee, 29) | ||
115 | .long R(6b, b8, b8, d3), R(28, 14, 14, 3c) | ||
116 | .long R(a7, de, de, 79), R(bc, 5e, 5e, e2) | ||
117 | .long R(16, 0b, 0b, 1d), R(ad, db, db, 76) | ||
118 | .long R(db, e0, e0, 3b), R(64, 32, 32, 56) | ||
119 | .long R(74, 3a, 3a, 4e), R(14, 0a, 0a, 1e) | ||
120 | .long R(92, 49, 49, db), R(0c, 06, 06, 0a) | ||
121 | .long R(48, 24, 24, 6c), R(b8, 5c, 5c, e4) | ||
122 | .long R(9f, c2, c2, 5d), R(bd, d3, d3, 6e) | ||
123 | .long R(43, ac, ac, ef), R(c4, 62, 62, a6) | ||
124 | .long R(39, 91, 91, a8), R(31, 95, 95, a4) | ||
125 | .long R(d3, e4, e4, 37), R(f2, 79, 79, 8b) | ||
126 | .long R(d5, e7, e7, 32), R(8b, c8, c8, 43) | ||
127 | .long R(6e, 37, 37, 59), R(da, 6d, 6d, b7) | ||
128 | .long R(01, 8d, 8d, 8c), R(b1, d5, d5, 64) | ||
129 | .long R(9c, 4e, 4e, d2), R(49, a9, a9, e0) | ||
130 | .long R(d8, 6c, 6c, b4), R(ac, 56, 56, fa) | ||
131 | .long R(f3, f4, f4, 07), R(cf, ea, ea, 25) | ||
132 | .long R(ca, 65, 65, af), R(f4, 7a, 7a, 8e) | ||
133 | .long R(47, ae, ae, e9), R(10, 08, 08, 18) | ||
134 | .long R(6f, ba, ba, d5), R(f0, 78, 78, 88) | ||
135 | .long R(4a, 25, 25, 6f), R(5c, 2e, 2e, 72) | ||
136 | .long R(38, 1c, 1c, 24), R(57, a6, a6, f1) | ||
137 | .long R(73, b4, b4, c7), R(97, c6, c6, 51) | ||
138 | .long R(cb, e8, e8, 23), R(a1, dd, dd, 7c) | ||
139 | .long R(e8, 74, 74, 9c), R(3e, 1f, 1f, 21) | ||
140 | .long R(96, 4b, 4b, dd), R(61, bd, bd, dc) | ||
141 | .long R(0d, 8b, 8b, 86), R(0f, 8a, 8a, 85) | ||
142 | .long R(e0, 70, 70, 90), R(7c, 3e, 3e, 42) | ||
143 | .long R(71, b5, b5, c4), R(cc, 66, 66, aa) | ||
144 | .long R(90, 48, 48, d8), R(06, 03, 03, 05) | ||
145 | .long R(f7, f6, f6, 01), R(1c, 0e, 0e, 12) | ||
146 | .long R(c2, 61, 61, a3), R(6a, 35, 35, 5f) | ||
147 | .long R(ae, 57, 57, f9), R(69, b9, b9, d0) | ||
148 | .long R(17, 86, 86, 91), R(99, c1, c1, 58) | ||
149 | .long R(3a, 1d, 1d, 27), R(27, 9e, 9e, b9) | ||
150 | .long R(d9, e1, e1, 38), R(eb, f8, f8, 13) | ||
151 | .long R(2b, 98, 98, b3), R(22, 11, 11, 33) | ||
152 | .long R(d2, 69, 69, bb), R(a9, d9, d9, 70) | ||
153 | .long R(07, 8e, 8e, 89), R(33, 94, 94, a7) | ||
154 | .long R(2d, 9b, 9b, b6), R(3c, 1e, 1e, 22) | ||
155 | .long R(15, 87, 87, 92), R(c9, e9, e9, 20) | ||
156 | .long R(87, ce, ce, 49), R(aa, 55, 55, ff) | ||
157 | .long R(50, 28, 28, 78), R(a5, df, df, 7a) | ||
158 | .long R(03, 8c, 8c, 8f), R(59, a1, a1, f8) | ||
159 | .long R(09, 89, 89, 80), R(1a, 0d, 0d, 17) | ||
160 | .long R(65, bf, bf, da), R(d7, e6, e6, 31) | ||
161 | .long R(84, 42, 42, c6), R(d0, 68, 68, b8) | ||
162 | .long R(82, 41, 41, c3), R(29, 99, 99, b0) | ||
163 | .long R(5a, 2d, 2d, 77), R(1e, 0f, 0f, 11) | ||
164 | .long R(7b, b0, b0, cb), R(a8, 54, 54, fc) | ||
165 | .long R(6d, bb, bb, d6), R(2c, 16, 16, 3a) | ||
166 | .globl PPC_AES_4K_DECTAB | ||
167 | PPC_AES_4K_DECTAB: | ||
168 | /* decryption table, same as crypto_it_tab in crypto/aes-generic.c */ | ||
169 | .long R(51, f4, a7, 50), R(7e, 41, 65, 53) | ||
170 | .long R(1a, 17, a4, c3), R(3a, 27, 5e, 96) | ||
171 | .long R(3b, ab, 6b, cb), R(1f, 9d, 45, f1) | ||
172 | .long R(ac, fa, 58, ab), R(4b, e3, 03, 93) | ||
173 | .long R(20, 30, fa, 55), R(ad, 76, 6d, f6) | ||
174 | .long R(88, cc, 76, 91), R(f5, 02, 4c, 25) | ||
175 | .long R(4f, e5, d7, fc), R(c5, 2a, cb, d7) | ||
176 | .long R(26, 35, 44, 80), R(b5, 62, a3, 8f) | ||
177 | .long R(de, b1, 5a, 49), R(25, ba, 1b, 67) | ||
178 | .long R(45, ea, 0e, 98), R(5d, fe, c0, e1) | ||
179 | .long R(c3, 2f, 75, 02), R(81, 4c, f0, 12) | ||
180 | .long R(8d, 46, 97, a3), R(6b, d3, f9, c6) | ||
181 | .long R(03, 8f, 5f, e7), R(15, 92, 9c, 95) | ||
182 | .long R(bf, 6d, 7a, eb), R(95, 52, 59, da) | ||
183 | .long R(d4, be, 83, 2d), R(58, 74, 21, d3) | ||
184 | .long R(49, e0, 69, 29), R(8e, c9, c8, 44) | ||
185 | .long R(75, c2, 89, 6a), R(f4, 8e, 79, 78) | ||
186 | .long R(99, 58, 3e, 6b), R(27, b9, 71, dd) | ||
187 | .long R(be, e1, 4f, b6), R(f0, 88, ad, 17) | ||
188 | .long R(c9, 20, ac, 66), R(7d, ce, 3a, b4) | ||
189 | .long R(63, df, 4a, 18), R(e5, 1a, 31, 82) | ||
190 | .long R(97, 51, 33, 60), R(62, 53, 7f, 45) | ||
191 | .long R(b1, 64, 77, e0), R(bb, 6b, ae, 84) | ||
192 | .long R(fe, 81, a0, 1c), R(f9, 08, 2b, 94) | ||
193 | .long R(70, 48, 68, 58), R(8f, 45, fd, 19) | ||
194 | .long R(94, de, 6c, 87), R(52, 7b, f8, b7) | ||
195 | .long R(ab, 73, d3, 23), R(72, 4b, 02, e2) | ||
196 | .long R(e3, 1f, 8f, 57), R(66, 55, ab, 2a) | ||
197 | .long R(b2, eb, 28, 07), R(2f, b5, c2, 03) | ||
198 | .long R(86, c5, 7b, 9a), R(d3, 37, 08, a5) | ||
199 | .long R(30, 28, 87, f2), R(23, bf, a5, b2) | ||
200 | .long R(02, 03, 6a, ba), R(ed, 16, 82, 5c) | ||
201 | .long R(8a, cf, 1c, 2b), R(a7, 79, b4, 92) | ||
202 | .long R(f3, 07, f2, f0), R(4e, 69, e2, a1) | ||
203 | .long R(65, da, f4, cd), R(06, 05, be, d5) | ||
204 | .long R(d1, 34, 62, 1f), R(c4, a6, fe, 8a) | ||
205 | .long R(34, 2e, 53, 9d), R(a2, f3, 55, a0) | ||
206 | .long R(05, 8a, e1, 32), R(a4, f6, eb, 75) | ||
207 | .long R(0b, 83, ec, 39), R(40, 60, ef, aa) | ||
208 | .long R(5e, 71, 9f, 06), R(bd, 6e, 10, 51) | ||
209 | .long R(3e, 21, 8a, f9), R(96, dd, 06, 3d) | ||
210 | .long R(dd, 3e, 05, ae), R(4d, e6, bd, 46) | ||
211 | .long R(91, 54, 8d, b5), R(71, c4, 5d, 05) | ||
212 | .long R(04, 06, d4, 6f), R(60, 50, 15, ff) | ||
213 | .long R(19, 98, fb, 24), R(d6, bd, e9, 97) | ||
214 | .long R(89, 40, 43, cc), R(67, d9, 9e, 77) | ||
215 | .long R(b0, e8, 42, bd), R(07, 89, 8b, 88) | ||
216 | .long R(e7, 19, 5b, 38), R(79, c8, ee, db) | ||
217 | .long R(a1, 7c, 0a, 47), R(7c, 42, 0f, e9) | ||
218 | .long R(f8, 84, 1e, c9), R(00, 00, 00, 00) | ||
219 | .long R(09, 80, 86, 83), R(32, 2b, ed, 48) | ||
220 | .long R(1e, 11, 70, ac), R(6c, 5a, 72, 4e) | ||
221 | .long R(fd, 0e, ff, fb), R(0f, 85, 38, 56) | ||
222 | .long R(3d, ae, d5, 1e), R(36, 2d, 39, 27) | ||
223 | .long R(0a, 0f, d9, 64), R(68, 5c, a6, 21) | ||
224 | .long R(9b, 5b, 54, d1), R(24, 36, 2e, 3a) | ||
225 | .long R(0c, 0a, 67, b1), R(93, 57, e7, 0f) | ||
226 | .long R(b4, ee, 96, d2), R(1b, 9b, 91, 9e) | ||
227 | .long R(80, c0, c5, 4f), R(61, dc, 20, a2) | ||
228 | .long R(5a, 77, 4b, 69), R(1c, 12, 1a, 16) | ||
229 | .long R(e2, 93, ba, 0a), R(c0, a0, 2a, e5) | ||
230 | .long R(3c, 22, e0, 43), R(12, 1b, 17, 1d) | ||
231 | .long R(0e, 09, 0d, 0b), R(f2, 8b, c7, ad) | ||
232 | .long R(2d, b6, a8, b9), R(14, 1e, a9, c8) | ||
233 | .long R(57, f1, 19, 85), R(af, 75, 07, 4c) | ||
234 | .long R(ee, 99, dd, bb), R(a3, 7f, 60, fd) | ||
235 | .long R(f7, 01, 26, 9f), R(5c, 72, f5, bc) | ||
236 | .long R(44, 66, 3b, c5), R(5b, fb, 7e, 34) | ||
237 | .long R(8b, 43, 29, 76), R(cb, 23, c6, dc) | ||
238 | .long R(b6, ed, fc, 68), R(b8, e4, f1, 63) | ||
239 | .long R(d7, 31, dc, ca), R(42, 63, 85, 10) | ||
240 | .long R(13, 97, 22, 40), R(84, c6, 11, 20) | ||
241 | .long R(85, 4a, 24, 7d), R(d2, bb, 3d, f8) | ||
242 | .long R(ae, f9, 32, 11), R(c7, 29, a1, 6d) | ||
243 | .long R(1d, 9e, 2f, 4b), R(dc, b2, 30, f3) | ||
244 | .long R(0d, 86, 52, ec), R(77, c1, e3, d0) | ||
245 | .long R(2b, b3, 16, 6c), R(a9, 70, b9, 99) | ||
246 | .long R(11, 94, 48, fa), R(47, e9, 64, 22) | ||
247 | .long R(a8, fc, 8c, c4), R(a0, f0, 3f, 1a) | ||
248 | .long R(56, 7d, 2c, d8), R(22, 33, 90, ef) | ||
249 | .long R(87, 49, 4e, c7), R(d9, 38, d1, c1) | ||
250 | .long R(8c, ca, a2, fe), R(98, d4, 0b, 36) | ||
251 | .long R(a6, f5, 81, cf), R(a5, 7a, de, 28) | ||
252 | .long R(da, b7, 8e, 26), R(3f, ad, bf, a4) | ||
253 | .long R(2c, 3a, 9d, e4), R(50, 78, 92, 0d) | ||
254 | .long R(6a, 5f, cc, 9b), R(54, 7e, 46, 62) | ||
255 | .long R(f6, 8d, 13, c2), R(90, d8, b8, e8) | ||
256 | .long R(2e, 39, f7, 5e), R(82, c3, af, f5) | ||
257 | .long R(9f, 5d, 80, be), R(69, d0, 93, 7c) | ||
258 | .long R(6f, d5, 2d, a9), R(cf, 25, 12, b3) | ||
259 | .long R(c8, ac, 99, 3b), R(10, 18, 7d, a7) | ||
260 | .long R(e8, 9c, 63, 6e), R(db, 3b, bb, 7b) | ||
261 | .long R(cd, 26, 78, 09), R(6e, 59, 18, f4) | ||
262 | .long R(ec, 9a, b7, 01), R(83, 4f, 9a, a8) | ||
263 | .long R(e6, 95, 6e, 65), R(aa, ff, e6, 7e) | ||
264 | .long R(21, bc, cf, 08), R(ef, 15, e8, e6) | ||
265 | .long R(ba, e7, 9b, d9), R(4a, 6f, 36, ce) | ||
266 | .long R(ea, 9f, 09, d4), R(29, b0, 7c, d6) | ||
267 | .long R(31, a4, b2, af), R(2a, 3f, 23, 31) | ||
268 | .long R(c6, a5, 94, 30), R(35, a2, 66, c0) | ||
269 | .long R(74, 4e, bc, 37), R(fc, 82, ca, a6) | ||
270 | .long R(e0, 90, d0, b0), R(33, a7, d8, 15) | ||
271 | .long R(f1, 04, 98, 4a), R(41, ec, da, f7) | ||
272 | .long R(7f, cd, 50, 0e), R(17, 91, f6, 2f) | ||
273 | .long R(76, 4d, d6, 8d), R(43, ef, b0, 4d) | ||
274 | .long R(cc, aa, 4d, 54), R(e4, 96, 04, df) | ||
275 | .long R(9e, d1, b5, e3), R(4c, 6a, 88, 1b) | ||
276 | .long R(c1, 2c, 1f, b8), R(46, 65, 51, 7f) | ||
277 | .long R(9d, 5e, ea, 04), R(01, 8c, 35, 5d) | ||
278 | .long R(fa, 87, 74, 73), R(fb, 0b, 41, 2e) | ||
279 | .long R(b3, 67, 1d, 5a), R(92, db, d2, 52) | ||
280 | .long R(e9, 10, 56, 33), R(6d, d6, 47, 13) | ||
281 | .long R(9a, d7, 61, 8c), R(37, a1, 0c, 7a) | ||
282 | .long R(59, f8, 14, 8e), R(eb, 13, 3c, 89) | ||
283 | .long R(ce, a9, 27, ee), R(b7, 61, c9, 35) | ||
284 | .long R(e1, 1c, e5, ed), R(7a, 47, b1, 3c) | ||
285 | .long R(9c, d2, df, 59), R(55, f2, 73, 3f) | ||
286 | .long R(18, 14, ce, 79), R(73, c7, 37, bf) | ||
287 | .long R(53, f7, cd, ea), R(5f, fd, aa, 5b) | ||
288 | .long R(df, 3d, 6f, 14), R(78, 44, db, 86) | ||
289 | .long R(ca, af, f3, 81), R(b9, 68, c4, 3e) | ||
290 | .long R(38, 24, 34, 2c), R(c2, a3, 40, 5f) | ||
291 | .long R(16, 1d, c3, 72), R(bc, e2, 25, 0c) | ||
292 | .long R(28, 3c, 49, 8b), R(ff, 0d, 95, 41) | ||
293 | .long R(39, a8, 01, 71), R(08, 0c, b3, de) | ||
294 | .long R(d8, b4, e4, 9c), R(64, 56, c1, 90) | ||
295 | .long R(7b, cb, 84, 61), R(d5, 32, b6, 70) | ||
296 | .long R(48, 6c, 5c, 74), R(d0, b8, 57, 42) | ||
297 | .globl PPC_AES_4K_DECTAB2 | ||
298 | PPC_AES_4K_DECTAB2: | ||
299 | /* decryption table, same as crypto_il_tab in crypto/aes-generic.c */ | ||
300 | .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 | ||
301 | .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb | ||
302 | .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 | ||
303 | .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb | ||
304 | .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d | ||
305 | .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e | ||
306 | .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 | ||
307 | .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 | ||
308 | .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 | ||
309 | .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 | ||
310 | .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda | ||
311 | .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 | ||
312 | .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a | ||
313 | .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 | ||
314 | .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 | ||
315 | .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b | ||
316 | .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea | ||
317 | .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 | ||
318 | .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 | ||
319 | .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e | ||
320 | .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 | ||
321 | .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b | ||
322 | .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 | ||
323 | .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 | ||
324 | .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 | ||
325 | .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f | ||
326 | .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d | ||
327 | .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef | ||
328 | .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 | ||
329 | .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 | ||
330 | .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 | ||
331 | .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d | ||
diff --git a/arch/powerpc/crypto/md5-asm.S b/arch/powerpc/crypto/md5-asm.S new file mode 100644 index 000000000000..10cdf5bceebb --- /dev/null +++ b/arch/powerpc/crypto/md5-asm.S | |||
@@ -0,0 +1,243 @@ | |||
1 | /* | ||
2 | * Fast MD5 implementation for PPC | ||
3 | * | ||
4 | * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU General Public License as published by the Free | ||
8 | * Software Foundation; either version 2 of the License, or (at your option) | ||
9 | * any later version. | ||
10 | * | ||
11 | */ | ||
12 | #include <asm/ppc_asm.h> | ||
13 | #include <asm/asm-offsets.h> | ||
14 | |||
15 | #define rHP r3 | ||
16 | #define rWP r4 | ||
17 | |||
18 | #define rH0 r0 | ||
19 | #define rH1 r6 | ||
20 | #define rH2 r7 | ||
21 | #define rH3 r5 | ||
22 | |||
23 | #define rW00 r8 | ||
24 | #define rW01 r9 | ||
25 | #define rW02 r10 | ||
26 | #define rW03 r11 | ||
27 | #define rW04 r12 | ||
28 | #define rW05 r14 | ||
29 | #define rW06 r15 | ||
30 | #define rW07 r16 | ||
31 | #define rW08 r17 | ||
32 | #define rW09 r18 | ||
33 | #define rW10 r19 | ||
34 | #define rW11 r20 | ||
35 | #define rW12 r21 | ||
36 | #define rW13 r22 | ||
37 | #define rW14 r23 | ||
38 | #define rW15 r24 | ||
39 | |||
40 | #define rT0 r25 | ||
41 | #define rT1 r26 | ||
42 | |||
43 | #define INITIALIZE \ | ||
44 | PPC_STLU r1,-INT_FRAME_SIZE(r1); \ | ||
45 | SAVE_8GPRS(14, r1); /* push registers onto stack */ \ | ||
46 | SAVE_4GPRS(22, r1); \ | ||
47 | SAVE_GPR(26, r1) | ||
48 | |||
49 | #define FINALIZE \ | ||
50 | REST_8GPRS(14, r1); /* pop registers from stack */ \ | ||
51 | REST_4GPRS(22, r1); \ | ||
52 | REST_GPR(26, r1); \ | ||
53 | addi r1,r1,INT_FRAME_SIZE; | ||
54 | |||
55 | #ifdef __BIG_ENDIAN__ | ||
56 | #define LOAD_DATA(reg, off) \ | ||
57 | lwbrx reg,0,rWP; /* load data */ | ||
58 | #define INC_PTR \ | ||
59 | addi rWP,rWP,4; /* increment per word */ | ||
60 | #define NEXT_BLOCK /* nothing to do */ | ||
61 | #else | ||
62 | #define LOAD_DATA(reg, off) \ | ||
63 | lwz reg,off(rWP); /* load data */ | ||
64 | #define INC_PTR /* nothing to do */ | ||
65 | #define NEXT_BLOCK \ | ||
66 | addi rWP,rWP,64; /* increment per block */ | ||
67 | #endif | ||
68 | |||
69 | #define R_00_15(a, b, c, d, w0, w1, p, q, off, k0h, k0l, k1h, k1l) \ | ||
70 | LOAD_DATA(w0, off) /* W */ \ | ||
71 | and rT0,b,c; /* 1: f = b and c */ \ | ||
72 | INC_PTR /* ptr++ */ \ | ||
73 | andc rT1,d,b; /* 1: f' = ~b and d */ \ | ||
74 | LOAD_DATA(w1, off+4) /* W */ \ | ||
75 | or rT0,rT0,rT1; /* 1: f = f or f' */ \ | ||
76 | addi w0,w0,k0l; /* 1: wk = w + k */ \ | ||
77 | add a,a,rT0; /* 1: a = a + f */ \ | ||
78 | addis w0,w0,k0h; /* 1: wk = w + k' */ \ | ||
79 | addis w1,w1,k1h; /* 2: wk = w + k */ \ | ||
80 | add a,a,w0; /* 1: a = a + wk */ \ | ||
81 | addi w1,w1,k1l; /* 2: wk = w + k' */ \ | ||
82 | rotrwi a,a,p; /* 1: a = a rotl x */ \ | ||
83 | add d,d,w1; /* 2: a = a + wk */ \ | ||
84 | add a,a,b; /* 1: a = a + b */ \ | ||
85 | and rT0,a,b; /* 2: f = b and c */ \ | ||
86 | andc rT1,c,a; /* 2: f' = ~b and d */ \ | ||
87 | or rT0,rT0,rT1; /* 2: f = f or f' */ \ | ||
88 | add d,d,rT0; /* 2: a = a + f */ \ | ||
89 | INC_PTR /* ptr++ */ \ | ||
90 | rotrwi d,d,q; /* 2: a = a rotl x */ \ | ||
91 | add d,d,a; /* 2: a = a + b */ | ||
92 | |||
93 | #define R_16_31(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \ | ||
94 | andc rT0,c,d; /* 1: f = c and ~d */ \ | ||
95 | and rT1,b,d; /* 1: f' = b and d */ \ | ||
96 | addi w0,w0,k0l; /* 1: wk = w + k */ \ | ||
97 | or rT0,rT0,rT1; /* 1: f = f or f' */ \ | ||
98 | addis w0,w0,k0h; /* 1: wk = w + k' */ \ | ||
99 | add a,a,rT0; /* 1: a = a + f */ \ | ||
100 | addi w1,w1,k1l; /* 2: wk = w + k */ \ | ||
101 | add a,a,w0; /* 1: a = a + wk */ \ | ||
102 | addis w1,w1,k1h; /* 2: wk = w + k' */ \ | ||
103 | andc rT0,b,c; /* 2: f = c and ~d */ \ | ||
104 | rotrwi a,a,p; /* 1: a = a rotl x */ \ | ||
105 | add a,a,b; /* 1: a = a + b */ \ | ||
106 | add d,d,w1; /* 2: a = a + wk */ \ | ||
107 | and rT1,a,c; /* 2: f' = b and d */ \ | ||
108 | or rT0,rT0,rT1; /* 2: f = f or f' */ \ | ||
109 | add d,d,rT0; /* 2: a = a + f */ \ | ||
110 | rotrwi d,d,q; /* 2: a = a rotl x */ \ | ||
111 | add d,d,a; /* 2: a = a +b */ | ||
112 | |||
113 | #define R_32_47(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \ | ||
114 | xor rT0,b,c; /* 1: f' = b xor c */ \ | ||
115 | addi w0,w0,k0l; /* 1: wk = w + k */ \ | ||
116 | xor rT1,rT0,d; /* 1: f = f xor f' */ \ | ||
117 | addis w0,w0,k0h; /* 1: wk = w + k' */ \ | ||
118 | add a,a,rT1; /* 1: a = a + f */ \ | ||
119 | addi w1,w1,k1l; /* 2: wk = w + k */ \ | ||
120 | add a,a,w0; /* 1: a = a + wk */ \ | ||
121 | addis w1,w1,k1h; /* 2: wk = w + k' */ \ | ||
122 | rotrwi a,a,p; /* 1: a = a rotl x */ \ | ||
123 | add d,d,w1; /* 2: a = a + wk */ \ | ||
124 | add a,a,b; /* 1: a = a + b */ \ | ||
125 | xor rT1,rT0,a; /* 2: f = b xor f' */ \ | ||
126 | add d,d,rT1; /* 2: a = a + f */ \ | ||
127 | rotrwi d,d,q; /* 2: a = a rotl x */ \ | ||
128 | add d,d,a; /* 2: a = a + b */ | ||
129 | |||
130 | #define R_48_63(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \ | ||
131 | addi w0,w0,k0l; /* 1: w = w + k */ \ | ||
132 | orc rT0,b,d; /* 1: f = b or ~d */ \ | ||
133 | addis w0,w0,k0h; /* 1: w = w + k' */ \ | ||
134 | xor rT0,rT0,c; /* 1: f = f xor c */ \ | ||
135 | add a,a,w0; /* 1: a = a + wk */ \ | ||
136 | addi w1,w1,k1l; /* 2: w = w + k */ \ | ||
137 | add a,a,rT0; /* 1: a = a + f */ \ | ||
138 | addis w1,w1,k1h; /* 2: w = w + k' */ \ | ||
139 | rotrwi a,a,p; /* 1: a = a rotl x */ \ | ||
140 | add a,a,b; /* 1: a = a + b */ \ | ||
141 | orc rT0,a,c; /* 2: f = b or ~d */ \ | ||
142 | add d,d,w1; /* 2: a = a + wk */ \ | ||
143 | xor rT0,rT0,b; /* 2: f = f xor c */ \ | ||
144 | add d,d,rT0; /* 2: a = a + f */ \ | ||
145 | rotrwi d,d,q; /* 2: a = a rotl x */ \ | ||
146 | add d,d,a; /* 2: a = a + b */ | ||
147 | |||
148 | _GLOBAL(ppc_md5_transform) | ||
149 | INITIALIZE | ||
150 | |||
151 | mtctr r5 | ||
152 | lwz rH0,0(rHP) | ||
153 | lwz rH1,4(rHP) | ||
154 | lwz rH2,8(rHP) | ||
155 | lwz rH3,12(rHP) | ||
156 | |||
157 | ppc_md5_main: | ||
158 | R_00_15(rH0, rH1, rH2, rH3, rW00, rW01, 25, 20, 0, | ||
159 | 0xd76b, -23432, 0xe8c8, -18602) | ||
160 | R_00_15(rH2, rH3, rH0, rH1, rW02, rW03, 15, 10, 8, | ||
161 | 0x2420, 0x70db, 0xc1be, -12562) | ||
162 | R_00_15(rH0, rH1, rH2, rH3, rW04, rW05, 25, 20, 16, | ||
163 | 0xf57c, 0x0faf, 0x4788, -14806) | ||
164 | R_00_15(rH2, rH3, rH0, rH1, rW06, rW07, 15, 10, 24, | ||
165 | 0xa830, 0x4613, 0xfd47, -27391) | ||
166 | R_00_15(rH0, rH1, rH2, rH3, rW08, rW09, 25, 20, 32, | ||
167 | 0x6981, -26408, 0x8b45, -2129) | ||
168 | R_00_15(rH2, rH3, rH0, rH1, rW10, rW11, 15, 10, 40, | ||
169 | 0xffff, 0x5bb1, 0x895d, -10306) | ||
170 | R_00_15(rH0, rH1, rH2, rH3, rW12, rW13, 25, 20, 48, | ||
171 | 0x6b90, 0x1122, 0xfd98, 0x7193) | ||
172 | R_00_15(rH2, rH3, rH0, rH1, rW14, rW15, 15, 10, 56, | ||
173 | 0xa679, 0x438e, 0x49b4, 0x0821) | ||
174 | |||
175 | R_16_31(rH0, rH1, rH2, rH3, rW01, rW06, 27, 23, | ||
176 | 0x0d56, 0x6e0c, 0x1810, 0x6d2d) | ||
177 | R_16_31(rH2, rH3, rH0, rH1, rW11, rW00, 18, 12, | ||
178 | 0x9d02, -32109, 0x124c, 0x2332) | ||
179 | R_16_31(rH0, rH1, rH2, rH3, rW05, rW10, 27, 23, | ||
180 | 0x8ea7, 0x4a33, 0x0245, -18270) | ||
181 | R_16_31(rH2, rH3, rH0, rH1, rW15, rW04, 18, 12, | ||
182 | 0x8eee, -8608, 0xf258, -5095) | ||
183 | R_16_31(rH0, rH1, rH2, rH3, rW09, rW14, 27, 23, | ||
184 | 0x969d, -10697, 0x1cbe, -15288) | ||
185 | R_16_31(rH2, rH3, rH0, rH1, rW03, rW08, 18, 12, | ||
186 | 0x3317, 0x3e99, 0xdbd9, 0x7c15) | ||
187 | R_16_31(rH0, rH1, rH2, rH3, rW13, rW02, 27, 23, | ||
188 | 0xac4b, 0x7772, 0xd8cf, 0x331d) | ||
189 | R_16_31(rH2, rH3, rH0, rH1, rW07, rW12, 18, 12, | ||
190 | 0x6a28, 0x6dd8, 0x219a, 0x3b68) | ||
191 | |||
192 | R_32_47(rH0, rH1, rH2, rH3, rW05, rW08, 28, 21, | ||
193 | 0x29cb, 0x28e5, 0x4218, -7788) | ||
194 | R_32_47(rH2, rH3, rH0, rH1, rW11, rW14, 16, 9, | ||
195 | 0x473f, 0x06d1, 0x3aae, 0x3036) | ||
196 | R_32_47(rH0, rH1, rH2, rH3, rW01, rW04, 28, 21, | ||
197 | 0xaea1, -15134, 0x640b, -11295) | ||
198 | R_32_47(rH2, rH3, rH0, rH1, rW07, rW10, 16, 9, | ||
199 | 0x8f4c, 0x4887, 0xbc7c, -22499) | ||
200 | R_32_47(rH0, rH1, rH2, rH3, rW13, rW00, 28, 21, | ||
201 | 0x7eb8, -27199, 0x00ea, 0x6050) | ||
202 | R_32_47(rH2, rH3, rH0, rH1, rW03, rW06, 16, 9, | ||
203 | 0xe01a, 0x22fe, 0x4447, 0x69c5) | ||
204 | R_32_47(rH0, rH1, rH2, rH3, rW09, rW12, 28, 21, | ||
205 | 0xb7f3, 0x0253, 0x59b1, 0x4d5b) | ||
206 | R_32_47(rH2, rH3, rH0, rH1, rW15, rW02, 16, 9, | ||
207 | 0x4701, -27017, 0xc7bd, -19859) | ||
208 | |||
209 | R_48_63(rH0, rH1, rH2, rH3, rW00, rW07, 26, 22, | ||
210 | 0x0988, -1462, 0x4c70, -19401) | ||
211 | R_48_63(rH2, rH3, rH0, rH1, rW14, rW05, 17, 11, | ||
212 | 0xadaf, -5221, 0xfc99, 0x66f7) | ||
213 | R_48_63(rH0, rH1, rH2, rH3, rW12, rW03, 26, 22, | ||
214 | 0x7e80, -16418, 0xba1e, -25587) | ||
215 | R_48_63(rH2, rH3, rH0, rH1, rW10, rW01, 17, 11, | ||
216 | 0x4130, 0x380d, 0xe0c5, 0x738d) | ||
217 | lwz rW00,0(rHP) | ||
218 | R_48_63(rH0, rH1, rH2, rH3, rW08, rW15, 26, 22, | ||
219 | 0xe837, -30770, 0xde8a, 0x69e8) | ||
220 | lwz rW14,4(rHP) | ||
221 | R_48_63(rH2, rH3, rH0, rH1, rW06, rW13, 17, 11, | ||
222 | 0x9e79, 0x260f, 0x256d, -27941) | ||
223 | lwz rW12,8(rHP) | ||
224 | R_48_63(rH0, rH1, rH2, rH3, rW04, rW11, 26, 22, | ||
225 | 0xab75, -20775, 0x4f9e, -28397) | ||
226 | lwz rW10,12(rHP) | ||
227 | R_48_63(rH2, rH3, rH0, rH1, rW02, rW09, 17, 11, | ||
228 | 0x662b, 0x7c56, 0x11b2, 0x0358) | ||
229 | |||
230 | add rH0,rH0,rW00 | ||
231 | stw rH0,0(rHP) | ||
232 | add rH1,rH1,rW14 | ||
233 | stw rH1,4(rHP) | ||
234 | add rH2,rH2,rW12 | ||
235 | stw rH2,8(rHP) | ||
236 | add rH3,rH3,rW10 | ||
237 | stw rH3,12(rHP) | ||
238 | NEXT_BLOCK | ||
239 | |||
240 | bdnz ppc_md5_main | ||
241 | |||
242 | FINALIZE | ||
243 | blr | ||
diff --git a/arch/powerpc/crypto/md5-glue.c b/arch/powerpc/crypto/md5-glue.c new file mode 100644 index 000000000000..452fb4dc575f --- /dev/null +++ b/arch/powerpc/crypto/md5-glue.c | |||
@@ -0,0 +1,165 @@ | |||
1 | /* | ||
2 | * Glue code for MD5 implementation for PPC assembler | ||
3 | * | ||
4 | * Based on generic implementation. | ||
5 | * | ||
6 | * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms of the GNU General Public License as published by the Free | ||
10 | * Software Foundation; either version 2 of the License, or (at your option) | ||
11 | * any later version. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include <crypto/internal/hash.h> | ||
16 | #include <linux/init.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/mm.h> | ||
19 | #include <linux/cryptohash.h> | ||
20 | #include <linux/types.h> | ||
21 | #include <crypto/md5.h> | ||
22 | #include <asm/byteorder.h> | ||
23 | |||
24 | extern void ppc_md5_transform(u32 *state, const u8 *src, u32 blocks); | ||
25 | |||
26 | static inline void ppc_md5_clear_context(struct md5_state *sctx) | ||
27 | { | ||
28 | int count = sizeof(struct md5_state) >> 2; | ||
29 | u32 *ptr = (u32 *)sctx; | ||
30 | |||
31 | /* make sure we can clear the fast way */ | ||
32 | BUILD_BUG_ON(sizeof(struct md5_state) % 4); | ||
33 | do { *ptr++ = 0; } while (--count); | ||
34 | } | ||
35 | |||
36 | static int ppc_md5_init(struct shash_desc *desc) | ||
37 | { | ||
38 | struct md5_state *sctx = shash_desc_ctx(desc); | ||
39 | |||
40 | sctx->hash[0] = 0x67452301; | ||
41 | sctx->hash[1] = 0xefcdab89; | ||
42 | sctx->hash[2] = 0x98badcfe; | ||
43 | sctx->hash[3] = 0x10325476; | ||
44 | sctx->byte_count = 0; | ||
45 | |||
46 | return 0; | ||
47 | } | ||
48 | |||
49 | static int ppc_md5_update(struct shash_desc *desc, const u8 *data, | ||
50 | unsigned int len) | ||
51 | { | ||
52 | struct md5_state *sctx = shash_desc_ctx(desc); | ||
53 | const unsigned int offset = sctx->byte_count & 0x3f; | ||
54 | unsigned int avail = 64 - offset; | ||
55 | const u8 *src = data; | ||
56 | |||
57 | sctx->byte_count += len; | ||
58 | |||
59 | if (avail > len) { | ||
60 | memcpy((char *)sctx->block + offset, src, len); | ||
61 | return 0; | ||
62 | } | ||
63 | |||
64 | if (offset) { | ||
65 | memcpy((char *)sctx->block + offset, src, avail); | ||
66 | ppc_md5_transform(sctx->hash, (const u8 *)sctx->block, 1); | ||
67 | len -= avail; | ||
68 | src += avail; | ||
69 | } | ||
70 | |||
71 | if (len > 63) { | ||
72 | ppc_md5_transform(sctx->hash, src, len >> 6); | ||
73 | src += len & ~0x3f; | ||
74 | len &= 0x3f; | ||
75 | } | ||
76 | |||
77 | memcpy((char *)sctx->block, src, len); | ||
78 | return 0; | ||
79 | } | ||
80 | |||
81 | static int ppc_md5_final(struct shash_desc *desc, u8 *out) | ||
82 | { | ||
83 | struct md5_state *sctx = shash_desc_ctx(desc); | ||
84 | const unsigned int offset = sctx->byte_count & 0x3f; | ||
85 | const u8 *src = (const u8 *)sctx->block; | ||
86 | u8 *p = (u8 *)src + offset; | ||
87 | int padlen = 55 - offset; | ||
88 | __le64 *pbits = (__le64 *)((char *)sctx->block + 56); | ||
89 | __le32 *dst = (__le32 *)out; | ||
90 | |||
91 | *p++ = 0x80; | ||
92 | |||
93 | if (padlen < 0) { | ||
94 | memset(p, 0x00, padlen + sizeof (u64)); | ||
95 | ppc_md5_transform(sctx->hash, src, 1); | ||
96 | p = (char *)sctx->block; | ||
97 | padlen = 56; | ||
98 | } | ||
99 | |||
100 | memset(p, 0, padlen); | ||
101 | *pbits = cpu_to_le64(sctx->byte_count << 3); | ||
102 | ppc_md5_transform(sctx->hash, src, 1); | ||
103 | |||
104 | dst[0] = cpu_to_le32(sctx->hash[0]); | ||
105 | dst[1] = cpu_to_le32(sctx->hash[1]); | ||
106 | dst[2] = cpu_to_le32(sctx->hash[2]); | ||
107 | dst[3] = cpu_to_le32(sctx->hash[3]); | ||
108 | |||
109 | ppc_md5_clear_context(sctx); | ||
110 | return 0; | ||
111 | } | ||
112 | |||
113 | static int ppc_md5_export(struct shash_desc *desc, void *out) | ||
114 | { | ||
115 | struct md5_state *sctx = shash_desc_ctx(desc); | ||
116 | |||
117 | memcpy(out, sctx, sizeof(*sctx)); | ||
118 | return 0; | ||
119 | } | ||
120 | |||
121 | static int ppc_md5_import(struct shash_desc *desc, const void *in) | ||
122 | { | ||
123 | struct md5_state *sctx = shash_desc_ctx(desc); | ||
124 | |||
125 | memcpy(sctx, in, sizeof(*sctx)); | ||
126 | return 0; | ||
127 | } | ||
128 | |||
129 | static struct shash_alg alg = { | ||
130 | .digestsize = MD5_DIGEST_SIZE, | ||
131 | .init = ppc_md5_init, | ||
132 | .update = ppc_md5_update, | ||
133 | .final = ppc_md5_final, | ||
134 | .export = ppc_md5_export, | ||
135 | .import = ppc_md5_import, | ||
136 | .descsize = sizeof(struct md5_state), | ||
137 | .statesize = sizeof(struct md5_state), | ||
138 | .base = { | ||
139 | .cra_name = "md5", | ||
140 | .cra_driver_name= "md5-ppc", | ||
141 | .cra_priority = 200, | ||
142 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
143 | .cra_blocksize = MD5_HMAC_BLOCK_SIZE, | ||
144 | .cra_module = THIS_MODULE, | ||
145 | } | ||
146 | }; | ||
147 | |||
148 | static int __init ppc_md5_mod_init(void) | ||
149 | { | ||
150 | return crypto_register_shash(&alg); | ||
151 | } | ||
152 | |||
153 | static void __exit ppc_md5_mod_fini(void) | ||
154 | { | ||
155 | crypto_unregister_shash(&alg); | ||
156 | } | ||
157 | |||
158 | module_init(ppc_md5_mod_init); | ||
159 | module_exit(ppc_md5_mod_fini); | ||
160 | |||
161 | MODULE_LICENSE("GPL"); | ||
162 | MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, PPC assembler"); | ||
163 | |||
164 | MODULE_ALIAS_CRYPTO("md5"); | ||
165 | MODULE_ALIAS_CRYPTO("md5-ppc"); | ||
diff --git a/arch/powerpc/crypto/sha1-spe-asm.S b/arch/powerpc/crypto/sha1-spe-asm.S new file mode 100644 index 000000000000..fcb6cf002889 --- /dev/null +++ b/arch/powerpc/crypto/sha1-spe-asm.S | |||
@@ -0,0 +1,299 @@ | |||
1 | /* | ||
2 | * Fast SHA-1 implementation for SPE instruction set (PPC) | ||
3 | * | ||
4 | * This code makes use of the SPE SIMD instruction set as defined in | ||
5 | * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf | ||
6 | * Implementation is based on optimization guide notes from | ||
7 | * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf | ||
8 | * | ||
9 | * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify it | ||
12 | * under the terms of the GNU General Public License as published by the Free | ||
13 | * Software Foundation; either version 2 of the License, or (at your option) | ||
14 | * any later version. | ||
15 | * | ||
16 | */ | ||
17 | |||
18 | #include <asm/ppc_asm.h> | ||
19 | #include <asm/asm-offsets.h> | ||
20 | |||
21 | #define rHP r3 /* pointer to hash value */ | ||
22 | #define rWP r4 /* pointer to input */ | ||
23 | #define rKP r5 /* pointer to constants */ | ||
24 | |||
25 | #define rW0 r14 /* 64 bit round words */ | ||
26 | #define rW1 r15 | ||
27 | #define rW2 r16 | ||
28 | #define rW3 r17 | ||
29 | #define rW4 r18 | ||
30 | #define rW5 r19 | ||
31 | #define rW6 r20 | ||
32 | #define rW7 r21 | ||
33 | |||
34 | #define rH0 r6 /* 32 bit hash values */ | ||
35 | #define rH1 r7 | ||
36 | #define rH2 r8 | ||
37 | #define rH3 r9 | ||
38 | #define rH4 r10 | ||
39 | |||
40 | #define rT0 r22 /* 64 bit temporary */ | ||
41 | #define rT1 r0 /* 32 bit temporaries */ | ||
42 | #define rT2 r11 | ||
43 | #define rT3 r12 | ||
44 | |||
45 | #define rK r23 /* 64 bit constant in volatile register */ | ||
46 | |||
47 | #define LOAD_K01 | ||
48 | |||
49 | #define LOAD_K11 \ | ||
50 | evlwwsplat rK,0(rKP); | ||
51 | |||
52 | #define LOAD_K21 \ | ||
53 | evlwwsplat rK,4(rKP); | ||
54 | |||
55 | #define LOAD_K31 \ | ||
56 | evlwwsplat rK,8(rKP); | ||
57 | |||
58 | #define LOAD_K41 \ | ||
59 | evlwwsplat rK,12(rKP); | ||
60 | |||
61 | #define INITIALIZE \ | ||
62 | stwu r1,-128(r1); /* create stack frame */ \ | ||
63 | evstdw r14,8(r1); /* We must save non volatile */ \ | ||
64 | evstdw r15,16(r1); /* registers. Take the chance */ \ | ||
65 | evstdw r16,24(r1); /* and save the SPE part too */ \ | ||
66 | evstdw r17,32(r1); \ | ||
67 | evstdw r18,40(r1); \ | ||
68 | evstdw r19,48(r1); \ | ||
69 | evstdw r20,56(r1); \ | ||
70 | evstdw r21,64(r1); \ | ||
71 | evstdw r22,72(r1); \ | ||
72 | evstdw r23,80(r1); | ||
73 | |||
74 | |||
75 | #define FINALIZE \ | ||
76 | evldw r14,8(r1); /* restore SPE registers */ \ | ||
77 | evldw r15,16(r1); \ | ||
78 | evldw r16,24(r1); \ | ||
79 | evldw r17,32(r1); \ | ||
80 | evldw r18,40(r1); \ | ||
81 | evldw r19,48(r1); \ | ||
82 | evldw r20,56(r1); \ | ||
83 | evldw r21,64(r1); \ | ||
84 | evldw r22,72(r1); \ | ||
85 | evldw r23,80(r1); \ | ||
86 | xor r0,r0,r0; \ | ||
87 | stw r0,8(r1); /* Delete sensitive data */ \ | ||
88 | stw r0,16(r1); /* that we might have pushed */ \ | ||
89 | stw r0,24(r1); /* from other context that runs */ \ | ||
90 | stw r0,32(r1); /* the same code. Assume that */ \ | ||
91 | stw r0,40(r1); /* the lower part of the GPRs */ \ | ||
92 | stw r0,48(r1); /* were already overwritten on */ \ | ||
93 | stw r0,56(r1); /* the way down to here */ \ | ||
94 | stw r0,64(r1); \ | ||
95 | stw r0,72(r1); \ | ||
96 | stw r0,80(r1); \ | ||
97 | addi r1,r1,128; /* cleanup stack frame */ | ||
98 | |||
99 | #ifdef __BIG_ENDIAN__ | ||
100 | #define LOAD_DATA(reg, off) \ | ||
101 | lwz reg,off(rWP); /* load data */ | ||
102 | #define NEXT_BLOCK \ | ||
103 | addi rWP,rWP,64; /* increment per block */ | ||
104 | #else | ||
105 | #define LOAD_DATA(reg, off) \ | ||
106 | lwbrx reg,0,rWP; /* load data */ \ | ||
107 | addi rWP,rWP,4; /* increment per word */ | ||
108 | #define NEXT_BLOCK /* nothing to do */ | ||
109 | #endif | ||
110 | |||
111 | #define R_00_15(a, b, c, d, e, w0, w1, k, off) \ | ||
112 | LOAD_DATA(w0, off) /* 1: W */ \ | ||
113 | and rT2,b,c; /* 1: F' = B and C */ \ | ||
114 | LOAD_K##k##1 \ | ||
115 | andc rT1,d,b; /* 1: F" = ~B and D */ \ | ||
116 | rotrwi rT0,a,27; /* 1: A' = A rotl 5 */ \ | ||
117 | or rT2,rT2,rT1; /* 1: F = F' or F" */ \ | ||
118 | add e,e,rT0; /* 1: E = E + A' */ \ | ||
119 | rotrwi b,b,2; /* 1: B = B rotl 30 */ \ | ||
120 | add e,e,w0; /* 1: E = E + W */ \ | ||
121 | LOAD_DATA(w1, off+4) /* 2: W */ \ | ||
122 | add e,e,rT2; /* 1: E = E + F */ \ | ||
123 | and rT1,a,b; /* 2: F' = B and C */ \ | ||
124 | add e,e,rK; /* 1: E = E + K */ \ | ||
125 | andc rT2,c,a; /* 2: F" = ~B and D */ \ | ||
126 | add d,d,rK; /* 2: E = E + K */ \ | ||
127 | or rT2,rT2,rT1; /* 2: F = F' or F" */ \ | ||
128 | rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \ | ||
129 | add d,d,w1; /* 2: E = E + W */ \ | ||
130 | rotrwi a,a,2; /* 2: B = B rotl 30 */ \ | ||
131 | add d,d,rT0; /* 2: E = E + A' */ \ | ||
132 | evmergelo w1,w1,w0; /* mix W[0]/W[1] */ \ | ||
133 | add d,d,rT2 /* 2: E = E + F */ | ||
134 | |||
135 | #define R_16_19(a, b, c, d, e, w0, w1, w4, w6, w7, k) \ | ||
136 | and rT2,b,c; /* 1: F' = B and C */ \ | ||
137 | evmergelohi rT0,w7,w6; /* W[-3] */ \ | ||
138 | andc rT1,d,b; /* 1: F" = ~B and D */ \ | ||
139 | evxor w0,w0,rT0; /* W = W[-16] xor W[-3] */ \ | ||
140 | or rT1,rT1,rT2; /* 1: F = F' or F" */ \ | ||
141 | evxor w0,w0,w4; /* W = W xor W[-8] */ \ | ||
142 | add e,e,rT1; /* 1: E = E + F */ \ | ||
143 | evxor w0,w0,w1; /* W = W xor W[-14] */ \ | ||
144 | rotrwi rT2,a,27; /* 1: A' = A rotl 5 */ \ | ||
145 | evrlwi w0,w0,1; /* W = W rotl 1 */ \ | ||
146 | add e,e,rT2; /* 1: E = E + A' */ \ | ||
147 | evaddw rT0,w0,rK; /* WK = W + K */ \ | ||
148 | rotrwi b,b,2; /* 1: B = B rotl 30 */ \ | ||
149 | LOAD_K##k##1 \ | ||
150 | evmergehi rT1,rT1,rT0; /* WK1/WK2 */ \ | ||
151 | add e,e,rT0; /* 1: E = E + WK */ \ | ||
152 | add d,d,rT1; /* 2: E = E + WK */ \ | ||
153 | and rT2,a,b; /* 2: F' = B and C */ \ | ||
154 | andc rT1,c,a; /* 2: F" = ~B and D */ \ | ||
155 | rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \ | ||
156 | or rT1,rT1,rT2; /* 2: F = F' or F" */ \ | ||
157 | add d,d,rT0; /* 2: E = E + A' */ \ | ||
158 | rotrwi a,a,2; /* 2: B = B rotl 30 */ \ | ||
159 | add d,d,rT1 /* 2: E = E + F */ | ||
160 | |||
161 | #define R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k) \ | ||
162 | evmergelohi rT0,w7,w6; /* W[-3] */ \ | ||
163 | xor rT2,b,c; /* 1: F' = B xor C */ \ | ||
164 | evxor w0,w0,rT0; /* W = W[-16] xor W[-3] */ \ | ||
165 | xor rT2,rT2,d; /* 1: F = F' xor D */ \ | ||
166 | evxor w0,w0,w4; /* W = W xor W[-8] */ \ | ||
167 | add e,e,rT2; /* 1: E = E + F */ \ | ||
168 | evxor w0,w0,w1; /* W = W xor W[-14] */ \ | ||
169 | rotrwi rT2,a,27; /* 1: A' = A rotl 5 */ \ | ||
170 | evrlwi w0,w0,1; /* W = W rotl 1 */ \ | ||
171 | add e,e,rT2; /* 1: E = E + A' */ \ | ||
172 | evaddw rT0,w0,rK; /* WK = W + K */ \ | ||
173 | rotrwi b,b,2; /* 1: B = B rotl 30 */ \ | ||
174 | LOAD_K##k##1 \ | ||
175 | evmergehi rT1,rT1,rT0; /* WK1/WK2 */ \ | ||
176 | add e,e,rT0; /* 1: E = E + WK */ \ | ||
177 | xor rT2,a,b; /* 2: F' = B xor C */ \ | ||
178 | add d,d,rT1; /* 2: E = E + WK */ \ | ||
179 | xor rT2,rT2,c; /* 2: F = F' xor D */ \ | ||
180 | rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \ | ||
181 | add d,d,rT2; /* 2: E = E + F */ \ | ||
182 | rotrwi a,a,2; /* 2: B = B rotl 30 */ \ | ||
183 | add d,d,rT0 /* 2: E = E + A' */ | ||
184 | |||
185 | #define R_40_59(a, b, c, d, e, w0, w1, w4, w6, w7, k) \ | ||
186 | and rT2,b,c; /* 1: F' = B and C */ \ | ||
187 | evmergelohi rT0,w7,w6; /* W[-3] */ \ | ||
188 | or rT1,b,c; /* 1: F" = B or C */ \ | ||
189 | evxor w0,w0,rT0; /* W = W[-16] xor W[-3] */ \ | ||
190 | and rT1,d,rT1; /* 1: F" = F" and D */ \ | ||
191 | evxor w0,w0,w4; /* W = W xor W[-8] */ \ | ||
192 | or rT2,rT2,rT1; /* 1: F = F' or F" */ \ | ||
193 | evxor w0,w0,w1; /* W = W xor W[-14] */ \ | ||
194 | add e,e,rT2; /* 1: E = E + F */ \ | ||
195 | evrlwi w0,w0,1; /* W = W rotl 1 */ \ | ||
196 | rotrwi rT2,a,27; /* 1: A' = A rotl 5 */ \ | ||
197 | evaddw rT0,w0,rK; /* WK = W + K */ \ | ||
198 | add e,e,rT2; /* 1: E = E + A' */ \ | ||
199 | LOAD_K##k##1 \ | ||
200 | evmergehi rT1,rT1,rT0; /* WK1/WK2 */ \ | ||
201 | rotrwi b,b,2; /* 1: B = B rotl 30 */ \ | ||
202 | add e,e,rT0; /* 1: E = E + WK */ \ | ||
203 | and rT2,a,b; /* 2: F' = B and C */ \ | ||
204 | or rT0,a,b; /* 2: F" = B or C */ \ | ||
205 | add d,d,rT1; /* 2: E = E + WK */ \ | ||
206 | and rT0,c,rT0; /* 2: F" = F" and D */ \ | ||
207 | rotrwi a,a,2; /* 2: B = B rotl 30 */ \ | ||
208 | or rT2,rT2,rT0; /* 2: F = F' or F" */ \ | ||
209 | rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \ | ||
210 | add d,d,rT2; /* 2: E = E + F */ \ | ||
211 | add d,d,rT0 /* 2: E = E + A' */ | ||
212 | |||
213 | #define R_60_79(a, b, c, d, e, w0, w1, w4, w6, w7, k) \ | ||
214 | R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k) | ||
215 | |||
216 | _GLOBAL(ppc_spe_sha1_transform) | ||
217 | INITIALIZE | ||
218 | |||
219 | lwz rH0,0(rHP) | ||
220 | lwz rH1,4(rHP) | ||
221 | mtctr r5 | ||
222 | lwz rH2,8(rHP) | ||
223 | lis rKP,PPC_SPE_SHA1_K@h | ||
224 | lwz rH3,12(rHP) | ||
225 | ori rKP,rKP,PPC_SPE_SHA1_K@l | ||
226 | lwz rH4,16(rHP) | ||
227 | |||
228 | ppc_spe_sha1_main: | ||
229 | R_00_15(rH0, rH1, rH2, rH3, rH4, rW1, rW0, 1, 0) | ||
230 | R_00_15(rH3, rH4, rH0, rH1, rH2, rW2, rW1, 0, 8) | ||
231 | R_00_15(rH1, rH2, rH3, rH4, rH0, rW3, rW2, 0, 16) | ||
232 | R_00_15(rH4, rH0, rH1, rH2, rH3, rW4, rW3, 0, 24) | ||
233 | R_00_15(rH2, rH3, rH4, rH0, rH1, rW5, rW4, 0, 32) | ||
234 | R_00_15(rH0, rH1, rH2, rH3, rH4, rW6, rW5, 0, 40) | ||
235 | R_00_15(rH3, rH4, rH0, rH1, rH2, rT3, rW6, 0, 48) | ||
236 | R_00_15(rH1, rH2, rH3, rH4, rH0, rT3, rW7, 0, 56) | ||
237 | |||
238 | R_16_19(rH4, rH0, rH1, rH2, rH3, rW0, rW1, rW4, rW6, rW7, 0) | ||
239 | R_16_19(rH2, rH3, rH4, rH0, rH1, rW1, rW2, rW5, rW7, rW0, 2) | ||
240 | |||
241 | R_20_39(rH0, rH1, rH2, rH3, rH4, rW2, rW3, rW6, rW0, rW1, 0) | ||
242 | R_20_39(rH3, rH4, rH0, rH1, rH2, rW3, rW4, rW7, rW1, rW2, 0) | ||
243 | R_20_39(rH1, rH2, rH3, rH4, rH0, rW4, rW5, rW0, rW2, rW3, 0) | ||
244 | R_20_39(rH4, rH0, rH1, rH2, rH3, rW5, rW6, rW1, rW3, rW4, 0) | ||
245 | R_20_39(rH2, rH3, rH4, rH0, rH1, rW6, rW7, rW2, rW4, rW5, 0) | ||
246 | R_20_39(rH0, rH1, rH2, rH3, rH4, rW7, rW0, rW3, rW5, rW6, 0) | ||
247 | R_20_39(rH3, rH4, rH0, rH1, rH2, rW0, rW1, rW4, rW6, rW7, 0) | ||
248 | R_20_39(rH1, rH2, rH3, rH4, rH0, rW1, rW2, rW5, rW7, rW0, 0) | ||
249 | R_20_39(rH4, rH0, rH1, rH2, rH3, rW2, rW3, rW6, rW0, rW1, 0) | ||
250 | R_20_39(rH2, rH3, rH4, rH0, rH1, rW3, rW4, rW7, rW1, rW2, 3) | ||
251 | |||
252 | R_40_59(rH0, rH1, rH2, rH3, rH4, rW4, rW5, rW0, rW2, rW3, 0) | ||
253 | R_40_59(rH3, rH4, rH0, rH1, rH2, rW5, rW6, rW1, rW3, rW4, 0) | ||
254 | R_40_59(rH1, rH2, rH3, rH4, rH0, rW6, rW7, rW2, rW4, rW5, 0) | ||
255 | R_40_59(rH4, rH0, rH1, rH2, rH3, rW7, rW0, rW3, rW5, rW6, 0) | ||
256 | R_40_59(rH2, rH3, rH4, rH0, rH1, rW0, rW1, rW4, rW6, rW7, 0) | ||
257 | R_40_59(rH0, rH1, rH2, rH3, rH4, rW1, rW2, rW5, rW7, rW0, 0) | ||
258 | R_40_59(rH3, rH4, rH0, rH1, rH2, rW2, rW3, rW6, rW0, rW1, 0) | ||
259 | R_40_59(rH1, rH2, rH3, rH4, rH0, rW3, rW4, rW7, rW1, rW2, 0) | ||
260 | R_40_59(rH4, rH0, rH1, rH2, rH3, rW4, rW5, rW0, rW2, rW3, 0) | ||
261 | R_40_59(rH2, rH3, rH4, rH0, rH1, rW5, rW6, rW1, rW3, rW4, 4) | ||
262 | |||
263 | R_60_79(rH0, rH1, rH2, rH3, rH4, rW6, rW7, rW2, rW4, rW5, 0) | ||
264 | R_60_79(rH3, rH4, rH0, rH1, rH2, rW7, rW0, rW3, rW5, rW6, 0) | ||
265 | R_60_79(rH1, rH2, rH3, rH4, rH0, rW0, rW1, rW4, rW6, rW7, 0) | ||
266 | R_60_79(rH4, rH0, rH1, rH2, rH3, rW1, rW2, rW5, rW7, rW0, 0) | ||
267 | R_60_79(rH2, rH3, rH4, rH0, rH1, rW2, rW3, rW6, rW0, rW1, 0) | ||
268 | R_60_79(rH0, rH1, rH2, rH3, rH4, rW3, rW4, rW7, rW1, rW2, 0) | ||
269 | R_60_79(rH3, rH4, rH0, rH1, rH2, rW4, rW5, rW0, rW2, rW3, 0) | ||
270 | lwz rT3,0(rHP) | ||
271 | R_60_79(rH1, rH2, rH3, rH4, rH0, rW5, rW6, rW1, rW3, rW4, 0) | ||
272 | lwz rW1,4(rHP) | ||
273 | R_60_79(rH4, rH0, rH1, rH2, rH3, rW6, rW7, rW2, rW4, rW5, 0) | ||
274 | lwz rW2,8(rHP) | ||
275 | R_60_79(rH2, rH3, rH4, rH0, rH1, rW7, rW0, rW3, rW5, rW6, 0) | ||
276 | lwz rW3,12(rHP) | ||
277 | NEXT_BLOCK | ||
278 | lwz rW4,16(rHP) | ||
279 | |||
280 | add rH0,rH0,rT3 | ||
281 | stw rH0,0(rHP) | ||
282 | add rH1,rH1,rW1 | ||
283 | stw rH1,4(rHP) | ||
284 | add rH2,rH2,rW2 | ||
285 | stw rH2,8(rHP) | ||
286 | add rH3,rH3,rW3 | ||
287 | stw rH3,12(rHP) | ||
288 | add rH4,rH4,rW4 | ||
289 | stw rH4,16(rHP) | ||
290 | |||
291 | bdnz ppc_spe_sha1_main | ||
292 | |||
293 | FINALIZE | ||
294 | blr | ||
295 | |||
296 | .data | ||
297 | .align 4 | ||
298 | PPC_SPE_SHA1_K: | ||
299 | .long 0x5A827999,0x6ED9EBA1,0x8F1BBCDC,0xCA62C1D6 | ||
diff --git a/arch/powerpc/crypto/sha1-spe-glue.c b/arch/powerpc/crypto/sha1-spe-glue.c new file mode 100644 index 000000000000..3e1d22212521 --- /dev/null +++ b/arch/powerpc/crypto/sha1-spe-glue.c | |||
@@ -0,0 +1,210 @@ | |||
1 | /* | ||
2 | * Glue code for SHA-1 implementation for SPE instructions (PPC) | ||
3 | * | ||
4 | * Based on generic implementation. | ||
5 | * | ||
6 | * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms of the GNU General Public License as published by the Free | ||
10 | * Software Foundation; either version 2 of the License, or (at your option) | ||
11 | * any later version. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include <crypto/internal/hash.h> | ||
16 | #include <linux/init.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/mm.h> | ||
19 | #include <linux/cryptohash.h> | ||
20 | #include <linux/types.h> | ||
21 | #include <crypto/sha.h> | ||
22 | #include <asm/byteorder.h> | ||
23 | #include <asm/switch_to.h> | ||
24 | #include <linux/hardirq.h> | ||
25 | |||
26 | /* | ||
27 | * MAX_BYTES defines the number of bytes that are allowed to be processed | ||
28 | * between preempt_disable() and preempt_enable(). SHA1 takes ~1000 | ||
29 | * operations per 64 bytes. e500 cores can issue two arithmetic instructions | ||
30 | * per clock cycle using one 32/64 bit unit (SU1) and one 32 bit unit (SU2). | ||
31 | * Thus 2KB of input data will need an estimated maximum of 18,000 cycles. | ||
32 | * Headroom for cache misses included. Even with the low end model clocked | ||
33 | * at 667 MHz this equals to a critical time window of less than 27us. | ||
34 | * | ||
35 | */ | ||
36 | #define MAX_BYTES 2048 | ||
37 | |||
38 | extern void ppc_spe_sha1_transform(u32 *state, const u8 *src, u32 blocks); | ||
39 | |||
40 | static void spe_begin(void) | ||
41 | { | ||
42 | /* We just start SPE operations and will save SPE registers later. */ | ||
43 | preempt_disable(); | ||
44 | enable_kernel_spe(); | ||
45 | } | ||
46 | |||
47 | static void spe_end(void) | ||
48 | { | ||
49 | /* reenable preemption */ | ||
50 | preempt_enable(); | ||
51 | } | ||
52 | |||
53 | static inline void ppc_sha1_clear_context(struct sha1_state *sctx) | ||
54 | { | ||
55 | int count = sizeof(struct sha1_state) >> 2; | ||
56 | u32 *ptr = (u32 *)sctx; | ||
57 | |||
58 | /* make sure we can clear the fast way */ | ||
59 | BUILD_BUG_ON(sizeof(struct sha1_state) % 4); | ||
60 | do { *ptr++ = 0; } while (--count); | ||
61 | } | ||
62 | |||
63 | static int ppc_spe_sha1_init(struct shash_desc *desc) | ||
64 | { | ||
65 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
66 | |||
67 | sctx->state[0] = SHA1_H0; | ||
68 | sctx->state[1] = SHA1_H1; | ||
69 | sctx->state[2] = SHA1_H2; | ||
70 | sctx->state[3] = SHA1_H3; | ||
71 | sctx->state[4] = SHA1_H4; | ||
72 | sctx->count = 0; | ||
73 | |||
74 | return 0; | ||
75 | } | ||
76 | |||
77 | static int ppc_spe_sha1_update(struct shash_desc *desc, const u8 *data, | ||
78 | unsigned int len) | ||
79 | { | ||
80 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
81 | const unsigned int offset = sctx->count & 0x3f; | ||
82 | const unsigned int avail = 64 - offset; | ||
83 | unsigned int bytes; | ||
84 | const u8 *src = data; | ||
85 | |||
86 | if (avail > len) { | ||
87 | sctx->count += len; | ||
88 | memcpy((char *)sctx->buffer + offset, src, len); | ||
89 | return 0; | ||
90 | } | ||
91 | |||
92 | sctx->count += len; | ||
93 | |||
94 | if (offset) { | ||
95 | memcpy((char *)sctx->buffer + offset, src, avail); | ||
96 | |||
97 | spe_begin(); | ||
98 | ppc_spe_sha1_transform(sctx->state, (const u8 *)sctx->buffer, 1); | ||
99 | spe_end(); | ||
100 | |||
101 | len -= avail; | ||
102 | src += avail; | ||
103 | } | ||
104 | |||
105 | while (len > 63) { | ||
106 | bytes = (len > MAX_BYTES) ? MAX_BYTES : len; | ||
107 | bytes = bytes & ~0x3f; | ||
108 | |||
109 | spe_begin(); | ||
110 | ppc_spe_sha1_transform(sctx->state, src, bytes >> 6); | ||
111 | spe_end(); | ||
112 | |||
113 | src += bytes; | ||
114 | len -= bytes; | ||
115 | }; | ||
116 | |||
117 | memcpy((char *)sctx->buffer, src, len); | ||
118 | return 0; | ||
119 | } | ||
120 | |||
121 | static int ppc_spe_sha1_final(struct shash_desc *desc, u8 *out) | ||
122 | { | ||
123 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
124 | const unsigned int offset = sctx->count & 0x3f; | ||
125 | char *p = (char *)sctx->buffer + offset; | ||
126 | int padlen; | ||
127 | __be64 *pbits = (__be64 *)(((char *)&sctx->buffer) + 56); | ||
128 | __be32 *dst = (__be32 *)out; | ||
129 | |||
130 | padlen = 55 - offset; | ||
131 | *p++ = 0x80; | ||
132 | |||
133 | spe_begin(); | ||
134 | |||
135 | if (padlen < 0) { | ||
136 | memset(p, 0x00, padlen + sizeof (u64)); | ||
137 | ppc_spe_sha1_transform(sctx->state, sctx->buffer, 1); | ||
138 | p = (char *)sctx->buffer; | ||
139 | padlen = 56; | ||
140 | } | ||
141 | |||
142 | memset(p, 0, padlen); | ||
143 | *pbits = cpu_to_be64(sctx->count << 3); | ||
144 | ppc_spe_sha1_transform(sctx->state, sctx->buffer, 1); | ||
145 | |||
146 | spe_end(); | ||
147 | |||
148 | dst[0] = cpu_to_be32(sctx->state[0]); | ||
149 | dst[1] = cpu_to_be32(sctx->state[1]); | ||
150 | dst[2] = cpu_to_be32(sctx->state[2]); | ||
151 | dst[3] = cpu_to_be32(sctx->state[3]); | ||
152 | dst[4] = cpu_to_be32(sctx->state[4]); | ||
153 | |||
154 | ppc_sha1_clear_context(sctx); | ||
155 | return 0; | ||
156 | } | ||
157 | |||
158 | static int ppc_spe_sha1_export(struct shash_desc *desc, void *out) | ||
159 | { | ||
160 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
161 | |||
162 | memcpy(out, sctx, sizeof(*sctx)); | ||
163 | return 0; | ||
164 | } | ||
165 | |||
166 | static int ppc_spe_sha1_import(struct shash_desc *desc, const void *in) | ||
167 | { | ||
168 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
169 | |||
170 | memcpy(sctx, in, sizeof(*sctx)); | ||
171 | return 0; | ||
172 | } | ||
173 | |||
174 | static struct shash_alg alg = { | ||
175 | .digestsize = SHA1_DIGEST_SIZE, | ||
176 | .init = ppc_spe_sha1_init, | ||
177 | .update = ppc_spe_sha1_update, | ||
178 | .final = ppc_spe_sha1_final, | ||
179 | .export = ppc_spe_sha1_export, | ||
180 | .import = ppc_spe_sha1_import, | ||
181 | .descsize = sizeof(struct sha1_state), | ||
182 | .statesize = sizeof(struct sha1_state), | ||
183 | .base = { | ||
184 | .cra_name = "sha1", | ||
185 | .cra_driver_name= "sha1-ppc-spe", | ||
186 | .cra_priority = 300, | ||
187 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
188 | .cra_blocksize = SHA1_BLOCK_SIZE, | ||
189 | .cra_module = THIS_MODULE, | ||
190 | } | ||
191 | }; | ||
192 | |||
193 | static int __init ppc_spe_sha1_mod_init(void) | ||
194 | { | ||
195 | return crypto_register_shash(&alg); | ||
196 | } | ||
197 | |||
198 | static void __exit ppc_spe_sha1_mod_fini(void) | ||
199 | { | ||
200 | crypto_unregister_shash(&alg); | ||
201 | } | ||
202 | |||
203 | module_init(ppc_spe_sha1_mod_init); | ||
204 | module_exit(ppc_spe_sha1_mod_fini); | ||
205 | |||
206 | MODULE_LICENSE("GPL"); | ||
207 | MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, SPE optimized"); | ||
208 | |||
209 | MODULE_ALIAS_CRYPTO("sha1"); | ||
210 | MODULE_ALIAS_CRYPTO("sha1-ppc-spe"); | ||
diff --git a/arch/powerpc/crypto/sha256-spe-asm.S b/arch/powerpc/crypto/sha256-spe-asm.S new file mode 100644 index 000000000000..2d10e4c08f03 --- /dev/null +++ b/arch/powerpc/crypto/sha256-spe-asm.S | |||
@@ -0,0 +1,323 @@ | |||
1 | /* | ||
2 | * Fast SHA-256 implementation for SPE instruction set (PPC) | ||
3 | * | ||
4 | * This code makes use of the SPE SIMD instruction set as defined in | ||
5 | * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf | ||
6 | * Implementation is based on optimization guide notes from | ||
7 | * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf | ||
8 | * | ||
9 | * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify it | ||
12 | * under the terms of the GNU General Public License as published by the Free | ||
13 | * Software Foundation; either version 2 of the License, or (at your option) | ||
14 | * any later version. | ||
15 | * | ||
16 | */ | ||
17 | |||
18 | #include <asm/ppc_asm.h> | ||
19 | #include <asm/asm-offsets.h> | ||
20 | |||
21 | #define rHP r3 /* pointer to hash values in memory */ | ||
22 | #define rKP r24 /* pointer to round constants */ | ||
23 | #define rWP r4 /* pointer to input data */ | ||
24 | |||
25 | #define rH0 r5 /* 8 32 bit hash values in 8 registers */ | ||
26 | #define rH1 r6 | ||
27 | #define rH2 r7 | ||
28 | #define rH3 r8 | ||
29 | #define rH4 r9 | ||
30 | #define rH5 r10 | ||
31 | #define rH6 r11 | ||
32 | #define rH7 r12 | ||
33 | |||
34 | #define rW0 r14 /* 64 bit registers. 16 words in 8 registers */ | ||
35 | #define rW1 r15 | ||
36 | #define rW2 r16 | ||
37 | #define rW3 r17 | ||
38 | #define rW4 r18 | ||
39 | #define rW5 r19 | ||
40 | #define rW6 r20 | ||
41 | #define rW7 r21 | ||
42 | |||
43 | #define rT0 r22 /* 64 bit temporaries */ | ||
44 | #define rT1 r23 | ||
45 | #define rT2 r0 /* 32 bit temporaries */ | ||
46 | #define rT3 r25 | ||
47 | |||
48 | #define CMP_KN_LOOP | ||
49 | #define CMP_KC_LOOP \ | ||
50 | cmpwi rT1,0; | ||
51 | |||
52 | #define INITIALIZE \ | ||
53 | stwu r1,-128(r1); /* create stack frame */ \ | ||
54 | evstdw r14,8(r1); /* We must save non volatile */ \ | ||
55 | evstdw r15,16(r1); /* registers. Take the chance */ \ | ||
56 | evstdw r16,24(r1); /* and save the SPE part too */ \ | ||
57 | evstdw r17,32(r1); \ | ||
58 | evstdw r18,40(r1); \ | ||
59 | evstdw r19,48(r1); \ | ||
60 | evstdw r20,56(r1); \ | ||
61 | evstdw r21,64(r1); \ | ||
62 | evstdw r22,72(r1); \ | ||
63 | evstdw r23,80(r1); \ | ||
64 | stw r24,88(r1); /* save normal registers */ \ | ||
65 | stw r25,92(r1); | ||
66 | |||
67 | |||
68 | #define FINALIZE \ | ||
69 | evldw r14,8(r1); /* restore SPE registers */ \ | ||
70 | evldw r15,16(r1); \ | ||
71 | evldw r16,24(r1); \ | ||
72 | evldw r17,32(r1); \ | ||
73 | evldw r18,40(r1); \ | ||
74 | evldw r19,48(r1); \ | ||
75 | evldw r20,56(r1); \ | ||
76 | evldw r21,64(r1); \ | ||
77 | evldw r22,72(r1); \ | ||
78 | evldw r23,80(r1); \ | ||
79 | lwz r24,88(r1); /* restore normal registers */ \ | ||
80 | lwz r25,92(r1); \ | ||
81 | xor r0,r0,r0; \ | ||
82 | stw r0,8(r1); /* Delete sensitive data */ \ | ||
83 | stw r0,16(r1); /* that we might have pushed */ \ | ||
84 | stw r0,24(r1); /* from other context that runs */ \ | ||
85 | stw r0,32(r1); /* the same code. Assume that */ \ | ||
86 | stw r0,40(r1); /* the lower part of the GPRs */ \ | ||
87 | stw r0,48(r1); /* was already overwritten on */ \ | ||
88 | stw r0,56(r1); /* the way down to here */ \ | ||
89 | stw r0,64(r1); \ | ||
90 | stw r0,72(r1); \ | ||
91 | stw r0,80(r1); \ | ||
92 | addi r1,r1,128; /* cleanup stack frame */ | ||
93 | |||
94 | #ifdef __BIG_ENDIAN__ | ||
95 | #define LOAD_DATA(reg, off) \ | ||
96 | lwz reg,off(rWP); /* load data */ | ||
97 | #define NEXT_BLOCK \ | ||
98 | addi rWP,rWP,64; /* increment per block */ | ||
99 | #else | ||
100 | #define LOAD_DATA(reg, off) \ | ||
101 | lwbrx reg,0,rWP; /* load data */ \ | ||
102 | addi rWP,rWP,4; /* increment per word */ | ||
103 | #define NEXT_BLOCK /* nothing to do */ | ||
104 | #endif | ||
105 | |||
106 | #define R_LOAD_W(a, b, c, d, e, f, g, h, w, off) \ | ||
107 | LOAD_DATA(w, off) /* 1: W */ \ | ||
108 | rotrwi rT0,e,6; /* 1: S1 = e rotr 6 */ \ | ||
109 | rotrwi rT1,e,11; /* 1: S1' = e rotr 11 */ \ | ||
110 | rotrwi rT2,e,25; /* 1: S1" = e rotr 25 */ \ | ||
111 | xor rT0,rT0,rT1; /* 1: S1 = S1 xor S1' */ \ | ||
112 | and rT3,e,f; /* 1: ch = e and f */ \ | ||
113 | xor rT0,rT0,rT2; /* 1: S1 = S1 xor S1" */ \ | ||
114 | andc rT1,g,e; /* 1: ch' = ~e and g */ \ | ||
115 | lwz rT2,off(rKP); /* 1: K */ \ | ||
116 | xor rT3,rT3,rT1; /* 1: ch = ch xor ch' */ \ | ||
117 | add h,h,rT0; /* 1: temp1 = h + S1 */ \ | ||
118 | add rT3,rT3,w; /* 1: temp1' = ch + w */ \ | ||
119 | rotrwi rT0,a,2; /* 1: S0 = a rotr 2 */ \ | ||
120 | add h,h,rT3; /* 1: temp1 = temp1 + temp1' */ \ | ||
121 | rotrwi rT1,a,13; /* 1: S0' = a rotr 13 */ \ | ||
122 | add h,h,rT2; /* 1: temp1 = temp1 + K */ \ | ||
123 | rotrwi rT3,a,22; /* 1: S0" = a rotr 22 */ \ | ||
124 | xor rT0,rT0,rT1; /* 1: S0 = S0 xor S0' */ \ | ||
125 | add d,d,h; /* 1: d = d + temp1 */ \ | ||
126 | xor rT3,rT0,rT3; /* 1: S0 = S0 xor S0" */ \ | ||
127 | evmergelo w,w,w; /* shift W */ \ | ||
128 | or rT2,a,b; /* 1: maj = a or b */ \ | ||
129 | and rT1,a,b; /* 1: maj' = a and b */ \ | ||
130 | and rT2,rT2,c; /* 1: maj = maj and c */ \ | ||
131 | LOAD_DATA(w, off+4) /* 2: W */ \ | ||
132 | or rT2,rT1,rT2; /* 1: maj = maj or maj' */ \ | ||
133 | rotrwi rT0,d,6; /* 2: S1 = e rotr 6 */ \ | ||
134 | add rT3,rT3,rT2; /* 1: temp2 = S0 + maj */ \ | ||
135 | rotrwi rT1,d,11; /* 2: S1' = e rotr 11 */ \ | ||
136 | add h,h,rT3; /* 1: h = temp1 + temp2 */ \ | ||
137 | rotrwi rT2,d,25; /* 2: S1" = e rotr 25 */ \ | ||
138 | xor rT0,rT0,rT1; /* 2: S1 = S1 xor S1' */ \ | ||
139 | and rT3,d,e; /* 2: ch = e and f */ \ | ||
140 | xor rT0,rT0,rT2; /* 2: S1 = S1 xor S1" */ \ | ||
141 | andc rT1,f,d; /* 2: ch' = ~e and g */ \ | ||
142 | lwz rT2,off+4(rKP); /* 2: K */ \ | ||
143 | xor rT3,rT3,rT1; /* 2: ch = ch xor ch' */ \ | ||
144 | add g,g,rT0; /* 2: temp1 = h + S1 */ \ | ||
145 | add rT3,rT3,w; /* 2: temp1' = ch + w */ \ | ||
146 | rotrwi rT0,h,2; /* 2: S0 = a rotr 2 */ \ | ||
147 | add g,g,rT3; /* 2: temp1 = temp1 + temp1' */ \ | ||
148 | rotrwi rT1,h,13; /* 2: S0' = a rotr 13 */ \ | ||
149 | add g,g,rT2; /* 2: temp1 = temp1 + K */ \ | ||
150 | rotrwi rT3,h,22; /* 2: S0" = a rotr 22 */ \ | ||
151 | xor rT0,rT0,rT1; /* 2: S0 = S0 xor S0' */ \ | ||
152 | or rT2,h,a; /* 2: maj = a or b */ \ | ||
153 | xor rT3,rT0,rT3; /* 2: S0 = S0 xor S0" */ \ | ||
154 | and rT1,h,a; /* 2: maj' = a and b */ \ | ||
155 | and rT2,rT2,b; /* 2: maj = maj and c */ \ | ||
156 | add c,c,g; /* 2: d = d + temp1 */ \ | ||
157 | or rT2,rT1,rT2; /* 2: maj = maj or maj' */ \ | ||
158 | add rT3,rT3,rT2; /* 2: temp2 = S0 + maj */ \ | ||
159 | add g,g,rT3 /* 2: h = temp1 + temp2 */ | ||
160 | |||
161 | #define R_CALC_W(a, b, c, d, e, f, g, h, w0, w1, w4, w5, w7, k, off) \ | ||
162 | rotrwi rT2,e,6; /* 1: S1 = e rotr 6 */ \ | ||
163 | evmergelohi rT0,w0,w1; /* w[-15] */ \ | ||
164 | rotrwi rT3,e,11; /* 1: S1' = e rotr 11 */ \ | ||
165 | evsrwiu rT1,rT0,3; /* s0 = w[-15] >> 3 */ \ | ||
166 | xor rT2,rT2,rT3; /* 1: S1 = S1 xor S1' */ \ | ||
167 | evrlwi rT0,rT0,25; /* s0' = w[-15] rotr 7 */ \ | ||
168 | rotrwi rT3,e,25; /* 1: S1' = e rotr 25 */ \ | ||
169 | evxor rT1,rT1,rT0; /* s0 = s0 xor s0' */ \ | ||
170 | xor rT2,rT2,rT3; /* 1: S1 = S1 xor S1' */ \ | ||
171 | evrlwi rT0,rT0,21; /* s0' = w[-15] rotr 18 */ \ | ||
172 | add h,h,rT2; /* 1: temp1 = h + S1 */ \ | ||
173 | evxor rT0,rT0,rT1; /* s0 = s0 xor s0' */ \ | ||
174 | and rT2,e,f; /* 1: ch = e and f */ \ | ||
175 | evaddw w0,w0,rT0; /* w = w[-16] + s0 */ \ | ||
176 | andc rT3,g,e; /* 1: ch' = ~e and g */ \ | ||
177 | evsrwiu rT0,w7,10; /* s1 = w[-2] >> 10 */ \ | ||
178 | xor rT2,rT2,rT3; /* 1: ch = ch xor ch' */ \ | ||
179 | evrlwi rT1,w7,15; /* s1' = w[-2] rotr 17 */ \ | ||
180 | add h,h,rT2; /* 1: temp1 = temp1 + ch */ \ | ||
181 | evxor rT0,rT0,rT1; /* s1 = s1 xor s1' */ \ | ||
182 | rotrwi rT2,a,2; /* 1: S0 = a rotr 2 */ \ | ||
183 | evrlwi rT1,w7,13; /* s1' = w[-2] rotr 19 */ \ | ||
184 | rotrwi rT3,a,13; /* 1: S0' = a rotr 13 */ \ | ||
185 | evxor rT0,rT0,rT1; /* s1 = s1 xor s1' */ \ | ||
186 | xor rT2,rT2,rT3; /* 1: S0 = S0 xor S0' */ \ | ||
187 | evldw rT1,off(rKP); /* k */ \ | ||
188 | rotrwi rT3,a,22; /* 1: S0' = a rotr 22 */ \ | ||
189 | evaddw w0,w0,rT0; /* w = w + s1 */ \ | ||
190 | xor rT2,rT2,rT3; /* 1: S0 = S0 xor S0' */ \ | ||
191 | evmergelohi rT0,w4,w5; /* w[-7] */ \ | ||
192 | and rT3,a,b; /* 1: maj = a and b */ \ | ||
193 | evaddw w0,w0,rT0; /* w = w + w[-7] */ \ | ||
194 | CMP_K##k##_LOOP \ | ||
195 | add rT2,rT2,rT3; /* 1: temp2 = S0 + maj */ \ | ||
196 | evaddw rT1,rT1,w0; /* wk = w + k */ \ | ||
197 | xor rT3,a,b; /* 1: maj = a xor b */ \ | ||
198 | evmergehi rT0,rT1,rT1; /* wk1/wk2 */ \ | ||
199 | and rT3,rT3,c; /* 1: maj = maj and c */ \ | ||
200 | add h,h,rT0; /* 1: temp1 = temp1 + wk */ \ | ||
201 | add rT2,rT2,rT3; /* 1: temp2 = temp2 + maj */ \ | ||
202 | add g,g,rT1; /* 2: temp1 = temp1 + wk */ \ | ||
203 | add d,d,h; /* 1: d = d + temp1 */ \ | ||
204 | rotrwi rT0,d,6; /* 2: S1 = e rotr 6 */ \ | ||
205 | add h,h,rT2; /* 1: h = temp1 + temp2 */ \ | ||
206 | rotrwi rT1,d,11; /* 2: S1' = e rotr 11 */ \ | ||
207 | rotrwi rT2,d,25; /* 2: S" = e rotr 25 */ \ | ||
208 | xor rT0,rT0,rT1; /* 2: S1 = S1 xor S1' */ \ | ||
209 | and rT3,d,e; /* 2: ch = e and f */ \ | ||
210 | xor rT0,rT0,rT2; /* 2: S1 = S1 xor S1" */ \ | ||
211 | andc rT1,f,d; /* 2: ch' = ~e and g */ \ | ||
212 | add g,g,rT0; /* 2: temp1 = h + S1 */ \ | ||
213 | xor rT3,rT3,rT1; /* 2: ch = ch xor ch' */ \ | ||
214 | rotrwi rT0,h,2; /* 2: S0 = a rotr 2 */ \ | ||
215 | add g,g,rT3; /* 2: temp1 = temp1 + ch */ \ | ||
216 | rotrwi rT1,h,13; /* 2: S0' = a rotr 13 */ \ | ||
217 | rotrwi rT3,h,22; /* 2: S0" = a rotr 22 */ \ | ||
218 | xor rT0,rT0,rT1; /* 2: S0 = S0 xor S0' */ \ | ||
219 | or rT2,h,a; /* 2: maj = a or b */ \ | ||
220 | and rT1,h,a; /* 2: maj' = a and b */ \ | ||
221 | and rT2,rT2,b; /* 2: maj = maj and c */ \ | ||
222 | xor rT3,rT0,rT3; /* 2: S0 = S0 xor S0" */ \ | ||
223 | or rT2,rT1,rT2; /* 2: maj = maj or maj' */ \ | ||
224 | add c,c,g; /* 2: d = d + temp1 */ \ | ||
225 | add rT3,rT3,rT2; /* 2: temp2 = S0 + maj */ \ | ||
226 | add g,g,rT3 /* 2: h = temp1 + temp2 */ | ||
227 | |||
228 | _GLOBAL(ppc_spe_sha256_transform) | ||
229 | INITIALIZE | ||
230 | |||
231 | mtctr r5 | ||
232 | lwz rH0,0(rHP) | ||
233 | lwz rH1,4(rHP) | ||
234 | lwz rH2,8(rHP) | ||
235 | lwz rH3,12(rHP) | ||
236 | lwz rH4,16(rHP) | ||
237 | lwz rH5,20(rHP) | ||
238 | lwz rH6,24(rHP) | ||
239 | lwz rH7,28(rHP) | ||
240 | |||
241 | ppc_spe_sha256_main: | ||
242 | lis rKP,PPC_SPE_SHA256_K@ha | ||
243 | addi rKP,rKP,PPC_SPE_SHA256_K@l | ||
244 | |||
245 | R_LOAD_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, rW0, 0) | ||
246 | R_LOAD_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, rW1, 8) | ||
247 | R_LOAD_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, rW2, 16) | ||
248 | R_LOAD_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, rW3, 24) | ||
249 | R_LOAD_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, rW4, 32) | ||
250 | R_LOAD_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, rW5, 40) | ||
251 | R_LOAD_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, rW6, 48) | ||
252 | R_LOAD_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, rW7, 56) | ||
253 | ppc_spe_sha256_16_rounds: | ||
254 | addi rKP,rKP,64 | ||
255 | R_CALC_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, | ||
256 | rW0, rW1, rW4, rW5, rW7, N, 0) | ||
257 | R_CALC_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, | ||
258 | rW1, rW2, rW5, rW6, rW0, N, 8) | ||
259 | R_CALC_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, | ||
260 | rW2, rW3, rW6, rW7, rW1, N, 16) | ||
261 | R_CALC_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, | ||
262 | rW3, rW4, rW7, rW0, rW2, N, 24) | ||
263 | R_CALC_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, | ||
264 | rW4, rW5, rW0, rW1, rW3, N, 32) | ||
265 | R_CALC_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, | ||
266 | rW5, rW6, rW1, rW2, rW4, N, 40) | ||
267 | R_CALC_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, | ||
268 | rW6, rW7, rW2, rW3, rW5, N, 48) | ||
269 | R_CALC_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, | ||
270 | rW7, rW0, rW3, rW4, rW6, C, 56) | ||
271 | bt gt,ppc_spe_sha256_16_rounds | ||
272 | |||
273 | lwz rW0,0(rHP) | ||
274 | NEXT_BLOCK | ||
275 | lwz rW1,4(rHP) | ||
276 | lwz rW2,8(rHP) | ||
277 | lwz rW3,12(rHP) | ||
278 | lwz rW4,16(rHP) | ||
279 | lwz rW5,20(rHP) | ||
280 | lwz rW6,24(rHP) | ||
281 | lwz rW7,28(rHP) | ||
282 | |||
283 | add rH0,rH0,rW0 | ||
284 | stw rH0,0(rHP) | ||
285 | add rH1,rH1,rW1 | ||
286 | stw rH1,4(rHP) | ||
287 | add rH2,rH2,rW2 | ||
288 | stw rH2,8(rHP) | ||
289 | add rH3,rH3,rW3 | ||
290 | stw rH3,12(rHP) | ||
291 | add rH4,rH4,rW4 | ||
292 | stw rH4,16(rHP) | ||
293 | add rH5,rH5,rW5 | ||
294 | stw rH5,20(rHP) | ||
295 | add rH6,rH6,rW6 | ||
296 | stw rH6,24(rHP) | ||
297 | add rH7,rH7,rW7 | ||
298 | stw rH7,28(rHP) | ||
299 | |||
300 | bdnz ppc_spe_sha256_main | ||
301 | |||
302 | FINALIZE | ||
303 | blr | ||
304 | |||
305 | .data | ||
306 | .align 5 | ||
307 | PPC_SPE_SHA256_K: | ||
308 | .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 | ||
309 | .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 | ||
310 | .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 | ||
311 | .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 | ||
312 | .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc | ||
313 | .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da | ||
314 | .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 | ||
315 | .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 | ||
316 | .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 | ||
317 | .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 | ||
318 | .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 | ||
319 | .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 | ||
320 | .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 | ||
321 | .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 | ||
322 | .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 | ||
323 | .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 | ||
diff --git a/arch/powerpc/crypto/sha256-spe-glue.c b/arch/powerpc/crypto/sha256-spe-glue.c new file mode 100644 index 000000000000..f4a616fe1a82 --- /dev/null +++ b/arch/powerpc/crypto/sha256-spe-glue.c | |||
@@ -0,0 +1,275 @@ | |||
1 | /* | ||
2 | * Glue code for SHA-256 implementation for SPE instructions (PPC) | ||
3 | * | ||
4 | * Based on generic implementation. The assembler module takes care | ||
5 | * about the SPE registers so it can run from interrupt context. | ||
6 | * | ||
7 | * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify it | ||
10 | * under the terms of the GNU General Public License as published by the Free | ||
11 | * Software Foundation; either version 2 of the License, or (at your option) | ||
12 | * any later version. | ||
13 | * | ||
14 | */ | ||
15 | |||
16 | #include <crypto/internal/hash.h> | ||
17 | #include <linux/init.h> | ||
18 | #include <linux/module.h> | ||
19 | #include <linux/mm.h> | ||
20 | #include <linux/cryptohash.h> | ||
21 | #include <linux/types.h> | ||
22 | #include <crypto/sha.h> | ||
23 | #include <asm/byteorder.h> | ||
24 | #include <asm/switch_to.h> | ||
25 | #include <linux/hardirq.h> | ||
26 | |||
27 | /* | ||
28 | * MAX_BYTES defines the number of bytes that are allowed to be processed | ||
29 | * between preempt_disable() and preempt_enable(). SHA256 takes ~2,000 | ||
30 | * operations per 64 bytes. e500 cores can issue two arithmetic instructions | ||
31 | * per clock cycle using one 32/64 bit unit (SU1) and one 32 bit unit (SU2). | ||
32 | * Thus 1KB of input data will need an estimated maximum of 18,000 cycles. | ||
33 | * Headroom for cache misses included. Even with the low end model clocked | ||
34 | * at 667 MHz this equals to a critical time window of less than 27us. | ||
35 | * | ||
36 | */ | ||
37 | #define MAX_BYTES 1024 | ||
38 | |||
39 | extern void ppc_spe_sha256_transform(u32 *state, const u8 *src, u32 blocks); | ||
40 | |||
41 | static void spe_begin(void) | ||
42 | { | ||
43 | /* We just start SPE operations and will save SPE registers later. */ | ||
44 | preempt_disable(); | ||
45 | enable_kernel_spe(); | ||
46 | } | ||
47 | |||
48 | static void spe_end(void) | ||
49 | { | ||
50 | /* reenable preemption */ | ||
51 | preempt_enable(); | ||
52 | } | ||
53 | |||
54 | static inline void ppc_sha256_clear_context(struct sha256_state *sctx) | ||
55 | { | ||
56 | int count = sizeof(struct sha256_state) >> 2; | ||
57 | u32 *ptr = (u32 *)sctx; | ||
58 | |||
59 | /* make sure we can clear the fast way */ | ||
60 | BUILD_BUG_ON(sizeof(struct sha256_state) % 4); | ||
61 | do { *ptr++ = 0; } while (--count); | ||
62 | } | ||
63 | |||
64 | static int ppc_spe_sha256_init(struct shash_desc *desc) | ||
65 | { | ||
66 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
67 | |||
68 | sctx->state[0] = SHA256_H0; | ||
69 | sctx->state[1] = SHA256_H1; | ||
70 | sctx->state[2] = SHA256_H2; | ||
71 | sctx->state[3] = SHA256_H3; | ||
72 | sctx->state[4] = SHA256_H4; | ||
73 | sctx->state[5] = SHA256_H5; | ||
74 | sctx->state[6] = SHA256_H6; | ||
75 | sctx->state[7] = SHA256_H7; | ||
76 | sctx->count = 0; | ||
77 | |||
78 | return 0; | ||
79 | } | ||
80 | |||
81 | static int ppc_spe_sha224_init(struct shash_desc *desc) | ||
82 | { | ||
83 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
84 | |||
85 | sctx->state[0] = SHA224_H0; | ||
86 | sctx->state[1] = SHA224_H1; | ||
87 | sctx->state[2] = SHA224_H2; | ||
88 | sctx->state[3] = SHA224_H3; | ||
89 | sctx->state[4] = SHA224_H4; | ||
90 | sctx->state[5] = SHA224_H5; | ||
91 | sctx->state[6] = SHA224_H6; | ||
92 | sctx->state[7] = SHA224_H7; | ||
93 | sctx->count = 0; | ||
94 | |||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | static int ppc_spe_sha256_update(struct shash_desc *desc, const u8 *data, | ||
99 | unsigned int len) | ||
100 | { | ||
101 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
102 | const unsigned int offset = sctx->count & 0x3f; | ||
103 | const unsigned int avail = 64 - offset; | ||
104 | unsigned int bytes; | ||
105 | const u8 *src = data; | ||
106 | |||
107 | if (avail > len) { | ||
108 | sctx->count += len; | ||
109 | memcpy((char *)sctx->buf + offset, src, len); | ||
110 | return 0; | ||
111 | } | ||
112 | |||
113 | sctx->count += len; | ||
114 | |||
115 | if (offset) { | ||
116 | memcpy((char *)sctx->buf + offset, src, avail); | ||
117 | |||
118 | spe_begin(); | ||
119 | ppc_spe_sha256_transform(sctx->state, (const u8 *)sctx->buf, 1); | ||
120 | spe_end(); | ||
121 | |||
122 | len -= avail; | ||
123 | src += avail; | ||
124 | } | ||
125 | |||
126 | while (len > 63) { | ||
127 | /* cut input data into smaller blocks */ | ||
128 | bytes = (len > MAX_BYTES) ? MAX_BYTES : len; | ||
129 | bytes = bytes & ~0x3f; | ||
130 | |||
131 | spe_begin(); | ||
132 | ppc_spe_sha256_transform(sctx->state, src, bytes >> 6); | ||
133 | spe_end(); | ||
134 | |||
135 | src += bytes; | ||
136 | len -= bytes; | ||
137 | }; | ||
138 | |||
139 | memcpy((char *)sctx->buf, src, len); | ||
140 | return 0; | ||
141 | } | ||
142 | |||
143 | static int ppc_spe_sha256_final(struct shash_desc *desc, u8 *out) | ||
144 | { | ||
145 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
146 | const unsigned int offset = sctx->count & 0x3f; | ||
147 | char *p = (char *)sctx->buf + offset; | ||
148 | int padlen; | ||
149 | __be64 *pbits = (__be64 *)(((char *)&sctx->buf) + 56); | ||
150 | __be32 *dst = (__be32 *)out; | ||
151 | |||
152 | padlen = 55 - offset; | ||
153 | *p++ = 0x80; | ||
154 | |||
155 | spe_begin(); | ||
156 | |||
157 | if (padlen < 0) { | ||
158 | memset(p, 0x00, padlen + sizeof (u64)); | ||
159 | ppc_spe_sha256_transform(sctx->state, sctx->buf, 1); | ||
160 | p = (char *)sctx->buf; | ||
161 | padlen = 56; | ||
162 | } | ||
163 | |||
164 | memset(p, 0, padlen); | ||
165 | *pbits = cpu_to_be64(sctx->count << 3); | ||
166 | ppc_spe_sha256_transform(sctx->state, sctx->buf, 1); | ||
167 | |||
168 | spe_end(); | ||
169 | |||
170 | dst[0] = cpu_to_be32(sctx->state[0]); | ||
171 | dst[1] = cpu_to_be32(sctx->state[1]); | ||
172 | dst[2] = cpu_to_be32(sctx->state[2]); | ||
173 | dst[3] = cpu_to_be32(sctx->state[3]); | ||
174 | dst[4] = cpu_to_be32(sctx->state[4]); | ||
175 | dst[5] = cpu_to_be32(sctx->state[5]); | ||
176 | dst[6] = cpu_to_be32(sctx->state[6]); | ||
177 | dst[7] = cpu_to_be32(sctx->state[7]); | ||
178 | |||
179 | ppc_sha256_clear_context(sctx); | ||
180 | return 0; | ||
181 | } | ||
182 | |||
183 | static int ppc_spe_sha224_final(struct shash_desc *desc, u8 *out) | ||
184 | { | ||
185 | u32 D[SHA256_DIGEST_SIZE >> 2]; | ||
186 | __be32 *dst = (__be32 *)out; | ||
187 | |||
188 | ppc_spe_sha256_final(desc, (u8 *)D); | ||
189 | |||
190 | /* avoid bytewise memcpy */ | ||
191 | dst[0] = D[0]; | ||
192 | dst[1] = D[1]; | ||
193 | dst[2] = D[2]; | ||
194 | dst[3] = D[3]; | ||
195 | dst[4] = D[4]; | ||
196 | dst[5] = D[5]; | ||
197 | dst[6] = D[6]; | ||
198 | |||
199 | /* clear sensitive data */ | ||
200 | memzero_explicit(D, SHA256_DIGEST_SIZE); | ||
201 | return 0; | ||
202 | } | ||
203 | |||
204 | static int ppc_spe_sha256_export(struct shash_desc *desc, void *out) | ||
205 | { | ||
206 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
207 | |||
208 | memcpy(out, sctx, sizeof(*sctx)); | ||
209 | return 0; | ||
210 | } | ||
211 | |||
212 | static int ppc_spe_sha256_import(struct shash_desc *desc, const void *in) | ||
213 | { | ||
214 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
215 | |||
216 | memcpy(sctx, in, sizeof(*sctx)); | ||
217 | return 0; | ||
218 | } | ||
219 | |||
220 | static struct shash_alg algs[2] = { { | ||
221 | .digestsize = SHA256_DIGEST_SIZE, | ||
222 | .init = ppc_spe_sha256_init, | ||
223 | .update = ppc_spe_sha256_update, | ||
224 | .final = ppc_spe_sha256_final, | ||
225 | .export = ppc_spe_sha256_export, | ||
226 | .import = ppc_spe_sha256_import, | ||
227 | .descsize = sizeof(struct sha256_state), | ||
228 | .statesize = sizeof(struct sha256_state), | ||
229 | .base = { | ||
230 | .cra_name = "sha256", | ||
231 | .cra_driver_name= "sha256-ppc-spe", | ||
232 | .cra_priority = 300, | ||
233 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
234 | .cra_blocksize = SHA256_BLOCK_SIZE, | ||
235 | .cra_module = THIS_MODULE, | ||
236 | } | ||
237 | }, { | ||
238 | .digestsize = SHA224_DIGEST_SIZE, | ||
239 | .init = ppc_spe_sha224_init, | ||
240 | .update = ppc_spe_sha256_update, | ||
241 | .final = ppc_spe_sha224_final, | ||
242 | .export = ppc_spe_sha256_export, | ||
243 | .import = ppc_spe_sha256_import, | ||
244 | .descsize = sizeof(struct sha256_state), | ||
245 | .statesize = sizeof(struct sha256_state), | ||
246 | .base = { | ||
247 | .cra_name = "sha224", | ||
248 | .cra_driver_name= "sha224-ppc-spe", | ||
249 | .cra_priority = 300, | ||
250 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
251 | .cra_blocksize = SHA224_BLOCK_SIZE, | ||
252 | .cra_module = THIS_MODULE, | ||
253 | } | ||
254 | } }; | ||
255 | |||
256 | static int __init ppc_spe_sha256_mod_init(void) | ||
257 | { | ||
258 | return crypto_register_shashes(algs, ARRAY_SIZE(algs)); | ||
259 | } | ||
260 | |||
261 | static void __exit ppc_spe_sha256_mod_fini(void) | ||
262 | { | ||
263 | crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); | ||
264 | } | ||
265 | |||
266 | module_init(ppc_spe_sha256_mod_init); | ||
267 | module_exit(ppc_spe_sha256_mod_fini); | ||
268 | |||
269 | MODULE_LICENSE("GPL"); | ||
270 | MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm, SPE optimized"); | ||
271 | |||
272 | MODULE_ALIAS_CRYPTO("sha224"); | ||
273 | MODULE_ALIAS_CRYPTO("sha224-ppc-spe"); | ||
274 | MODULE_ALIAS_CRYPTO("sha256"); | ||
275 | MODULE_ALIAS_CRYPTO("sha256-ppc-spe"); | ||
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 54f60ab41c63..112cefacf2af 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -797,7 +797,9 @@ static int rfc4106_init(struct crypto_tfm *tfm) | |||
797 | PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN); | 797 | PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN); |
798 | struct crypto_aead *cryptd_child; | 798 | struct crypto_aead *cryptd_child; |
799 | struct aesni_rfc4106_gcm_ctx *child_ctx; | 799 | struct aesni_rfc4106_gcm_ctx *child_ctx; |
800 | cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni", 0, 0); | 800 | cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni", |
801 | CRYPTO_ALG_INTERNAL, | ||
802 | CRYPTO_ALG_INTERNAL); | ||
801 | if (IS_ERR(cryptd_tfm)) | 803 | if (IS_ERR(cryptd_tfm)) |
802 | return PTR_ERR(cryptd_tfm); | 804 | return PTR_ERR(cryptd_tfm); |
803 | 805 | ||
@@ -890,15 +892,12 @@ out_free_ablkcipher: | |||
890 | return ret; | 892 | return ret; |
891 | } | 893 | } |
892 | 894 | ||
893 | static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, | 895 | static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key, |
894 | unsigned int key_len) | 896 | unsigned int key_len) |
895 | { | 897 | { |
896 | int ret = 0; | 898 | int ret = 0; |
897 | struct crypto_tfm *tfm = crypto_aead_tfm(parent); | 899 | struct crypto_tfm *tfm = crypto_aead_tfm(aead); |
898 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent); | 900 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(aead); |
899 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); | ||
900 | struct aesni_rfc4106_gcm_ctx *child_ctx = | ||
901 | aesni_rfc4106_gcm_ctx_get(cryptd_child); | ||
902 | u8 *new_key_align, *new_key_mem = NULL; | 901 | u8 *new_key_align, *new_key_mem = NULL; |
903 | 902 | ||
904 | if (key_len < 4) { | 903 | if (key_len < 4) { |
@@ -943,20 +942,31 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, | |||
943 | goto exit; | 942 | goto exit; |
944 | } | 943 | } |
945 | ret = rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); | 944 | ret = rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); |
946 | memcpy(child_ctx, ctx, sizeof(*ctx)); | ||
947 | exit: | 945 | exit: |
948 | kfree(new_key_mem); | 946 | kfree(new_key_mem); |
949 | return ret; | 947 | return ret; |
950 | } | 948 | } |
951 | 949 | ||
952 | /* This is the Integrity Check Value (aka the authentication tag length and can | 950 | static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, |
953 | * be 8, 12 or 16 bytes long. */ | 951 | unsigned int key_len) |
954 | static int rfc4106_set_authsize(struct crypto_aead *parent, | ||
955 | unsigned int authsize) | ||
956 | { | 952 | { |
957 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent); | 953 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent); |
958 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); | 954 | struct crypto_aead *child = cryptd_aead_child(ctx->cryptd_tfm); |
955 | struct aesni_rfc4106_gcm_ctx *c_ctx = aesni_rfc4106_gcm_ctx_get(child); | ||
956 | struct cryptd_aead *cryptd_tfm = ctx->cryptd_tfm; | ||
957 | int ret; | ||
959 | 958 | ||
959 | ret = crypto_aead_setkey(child, key, key_len); | ||
960 | if (!ret) { | ||
961 | memcpy(ctx, c_ctx, sizeof(*ctx)); | ||
962 | ctx->cryptd_tfm = cryptd_tfm; | ||
963 | } | ||
964 | return ret; | ||
965 | } | ||
966 | |||
967 | static int common_rfc4106_set_authsize(struct crypto_aead *aead, | ||
968 | unsigned int authsize) | ||
969 | { | ||
960 | switch (authsize) { | 970 | switch (authsize) { |
961 | case 8: | 971 | case 8: |
962 | case 12: | 972 | case 12: |
@@ -965,51 +975,23 @@ static int rfc4106_set_authsize(struct crypto_aead *parent, | |||
965 | default: | 975 | default: |
966 | return -EINVAL; | 976 | return -EINVAL; |
967 | } | 977 | } |
968 | crypto_aead_crt(parent)->authsize = authsize; | 978 | crypto_aead_crt(aead)->authsize = authsize; |
969 | crypto_aead_crt(cryptd_child)->authsize = authsize; | ||
970 | return 0; | 979 | return 0; |
971 | } | 980 | } |
972 | 981 | ||
973 | static int rfc4106_encrypt(struct aead_request *req) | 982 | /* This is the Integrity Check Value (aka the authentication tag length and can |
974 | { | 983 | * be 8, 12 or 16 bytes long. */ |
975 | int ret; | 984 | static int rfc4106_set_authsize(struct crypto_aead *parent, |
976 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | 985 | unsigned int authsize) |
977 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); | ||
978 | |||
979 | if (!irq_fpu_usable()) { | ||
980 | struct aead_request *cryptd_req = | ||
981 | (struct aead_request *) aead_request_ctx(req); | ||
982 | memcpy(cryptd_req, req, sizeof(*req)); | ||
983 | aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
984 | return crypto_aead_encrypt(cryptd_req); | ||
985 | } else { | ||
986 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); | ||
987 | kernel_fpu_begin(); | ||
988 | ret = cryptd_child->base.crt_aead.encrypt(req); | ||
989 | kernel_fpu_end(); | ||
990 | return ret; | ||
991 | } | ||
992 | } | ||
993 | |||
994 | static int rfc4106_decrypt(struct aead_request *req) | ||
995 | { | 986 | { |
987 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent); | ||
988 | struct crypto_aead *child = cryptd_aead_child(ctx->cryptd_tfm); | ||
996 | int ret; | 989 | int ret; |
997 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | ||
998 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); | ||
999 | 990 | ||
1000 | if (!irq_fpu_usable()) { | 991 | ret = crypto_aead_setauthsize(child, authsize); |
1001 | struct aead_request *cryptd_req = | 992 | if (!ret) |
1002 | (struct aead_request *) aead_request_ctx(req); | 993 | crypto_aead_crt(parent)->authsize = authsize; |
1003 | memcpy(cryptd_req, req, sizeof(*req)); | 994 | return ret; |
1004 | aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
1005 | return crypto_aead_decrypt(cryptd_req); | ||
1006 | } else { | ||
1007 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); | ||
1008 | kernel_fpu_begin(); | ||
1009 | ret = cryptd_child->base.crt_aead.decrypt(req); | ||
1010 | kernel_fpu_end(); | ||
1011 | return ret; | ||
1012 | } | ||
1013 | } | 995 | } |
1014 | 996 | ||
1015 | static int __driver_rfc4106_encrypt(struct aead_request *req) | 997 | static int __driver_rfc4106_encrypt(struct aead_request *req) |
@@ -1185,6 +1167,78 @@ static int __driver_rfc4106_decrypt(struct aead_request *req) | |||
1185 | } | 1167 | } |
1186 | return retval; | 1168 | return retval; |
1187 | } | 1169 | } |
1170 | |||
1171 | static int rfc4106_encrypt(struct aead_request *req) | ||
1172 | { | ||
1173 | int ret; | ||
1174 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | ||
1175 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); | ||
1176 | |||
1177 | if (!irq_fpu_usable()) { | ||
1178 | struct aead_request *cryptd_req = | ||
1179 | (struct aead_request *) aead_request_ctx(req); | ||
1180 | |||
1181 | memcpy(cryptd_req, req, sizeof(*req)); | ||
1182 | aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
1183 | ret = crypto_aead_encrypt(cryptd_req); | ||
1184 | } else { | ||
1185 | kernel_fpu_begin(); | ||
1186 | ret = __driver_rfc4106_encrypt(req); | ||
1187 | kernel_fpu_end(); | ||
1188 | } | ||
1189 | return ret; | ||
1190 | } | ||
1191 | |||
1192 | static int rfc4106_decrypt(struct aead_request *req) | ||
1193 | { | ||
1194 | int ret; | ||
1195 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | ||
1196 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); | ||
1197 | |||
1198 | if (!irq_fpu_usable()) { | ||
1199 | struct aead_request *cryptd_req = | ||
1200 | (struct aead_request *) aead_request_ctx(req); | ||
1201 | |||
1202 | memcpy(cryptd_req, req, sizeof(*req)); | ||
1203 | aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
1204 | ret = crypto_aead_decrypt(cryptd_req); | ||
1205 | } else { | ||
1206 | kernel_fpu_begin(); | ||
1207 | ret = __driver_rfc4106_decrypt(req); | ||
1208 | kernel_fpu_end(); | ||
1209 | } | ||
1210 | return ret; | ||
1211 | } | ||
1212 | |||
1213 | static int helper_rfc4106_encrypt(struct aead_request *req) | ||
1214 | { | ||
1215 | int ret; | ||
1216 | |||
1217 | if (unlikely(!irq_fpu_usable())) { | ||
1218 | WARN_ONCE(1, "__gcm-aes-aesni alg used in invalid context"); | ||
1219 | ret = -EINVAL; | ||
1220 | } else { | ||
1221 | kernel_fpu_begin(); | ||
1222 | ret = __driver_rfc4106_encrypt(req); | ||
1223 | kernel_fpu_end(); | ||
1224 | } | ||
1225 | return ret; | ||
1226 | } | ||
1227 | |||
1228 | static int helper_rfc4106_decrypt(struct aead_request *req) | ||
1229 | { | ||
1230 | int ret; | ||
1231 | |||
1232 | if (unlikely(!irq_fpu_usable())) { | ||
1233 | WARN_ONCE(1, "__gcm-aes-aesni alg used in invalid context"); | ||
1234 | ret = -EINVAL; | ||
1235 | } else { | ||
1236 | kernel_fpu_begin(); | ||
1237 | ret = __driver_rfc4106_decrypt(req); | ||
1238 | kernel_fpu_end(); | ||
1239 | } | ||
1240 | return ret; | ||
1241 | } | ||
1188 | #endif | 1242 | #endif |
1189 | 1243 | ||
1190 | static struct crypto_alg aesni_algs[] = { { | 1244 | static struct crypto_alg aesni_algs[] = { { |
@@ -1210,7 +1264,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
1210 | .cra_name = "__aes-aesni", | 1264 | .cra_name = "__aes-aesni", |
1211 | .cra_driver_name = "__driver-aes-aesni", | 1265 | .cra_driver_name = "__driver-aes-aesni", |
1212 | .cra_priority = 0, | 1266 | .cra_priority = 0, |
1213 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | 1267 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER | CRYPTO_ALG_INTERNAL, |
1214 | .cra_blocksize = AES_BLOCK_SIZE, | 1268 | .cra_blocksize = AES_BLOCK_SIZE, |
1215 | .cra_ctxsize = sizeof(struct crypto_aes_ctx) + | 1269 | .cra_ctxsize = sizeof(struct crypto_aes_ctx) + |
1216 | AESNI_ALIGN - 1, | 1270 | AESNI_ALIGN - 1, |
@@ -1229,7 +1283,8 @@ static struct crypto_alg aesni_algs[] = { { | |||
1229 | .cra_name = "__ecb-aes-aesni", | 1283 | .cra_name = "__ecb-aes-aesni", |
1230 | .cra_driver_name = "__driver-ecb-aes-aesni", | 1284 | .cra_driver_name = "__driver-ecb-aes-aesni", |
1231 | .cra_priority = 0, | 1285 | .cra_priority = 0, |
1232 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 1286 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
1287 | CRYPTO_ALG_INTERNAL, | ||
1233 | .cra_blocksize = AES_BLOCK_SIZE, | 1288 | .cra_blocksize = AES_BLOCK_SIZE, |
1234 | .cra_ctxsize = sizeof(struct crypto_aes_ctx) + | 1289 | .cra_ctxsize = sizeof(struct crypto_aes_ctx) + |
1235 | AESNI_ALIGN - 1, | 1290 | AESNI_ALIGN - 1, |
@@ -1249,7 +1304,8 @@ static struct crypto_alg aesni_algs[] = { { | |||
1249 | .cra_name = "__cbc-aes-aesni", | 1304 | .cra_name = "__cbc-aes-aesni", |
1250 | .cra_driver_name = "__driver-cbc-aes-aesni", | 1305 | .cra_driver_name = "__driver-cbc-aes-aesni", |
1251 | .cra_priority = 0, | 1306 | .cra_priority = 0, |
1252 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 1307 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
1308 | CRYPTO_ALG_INTERNAL, | ||
1253 | .cra_blocksize = AES_BLOCK_SIZE, | 1309 | .cra_blocksize = AES_BLOCK_SIZE, |
1254 | .cra_ctxsize = sizeof(struct crypto_aes_ctx) + | 1310 | .cra_ctxsize = sizeof(struct crypto_aes_ctx) + |
1255 | AESNI_ALIGN - 1, | 1311 | AESNI_ALIGN - 1, |
@@ -1313,7 +1369,8 @@ static struct crypto_alg aesni_algs[] = { { | |||
1313 | .cra_name = "__ctr-aes-aesni", | 1369 | .cra_name = "__ctr-aes-aesni", |
1314 | .cra_driver_name = "__driver-ctr-aes-aesni", | 1370 | .cra_driver_name = "__driver-ctr-aes-aesni", |
1315 | .cra_priority = 0, | 1371 | .cra_priority = 0, |
1316 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 1372 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
1373 | CRYPTO_ALG_INTERNAL, | ||
1317 | .cra_blocksize = 1, | 1374 | .cra_blocksize = 1, |
1318 | .cra_ctxsize = sizeof(struct crypto_aes_ctx) + | 1375 | .cra_ctxsize = sizeof(struct crypto_aes_ctx) + |
1319 | AESNI_ALIGN - 1, | 1376 | AESNI_ALIGN - 1, |
@@ -1357,7 +1414,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
1357 | .cra_name = "__gcm-aes-aesni", | 1414 | .cra_name = "__gcm-aes-aesni", |
1358 | .cra_driver_name = "__driver-gcm-aes-aesni", | 1415 | .cra_driver_name = "__driver-gcm-aes-aesni", |
1359 | .cra_priority = 0, | 1416 | .cra_priority = 0, |
1360 | .cra_flags = CRYPTO_ALG_TYPE_AEAD, | 1417 | .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_INTERNAL, |
1361 | .cra_blocksize = 1, | 1418 | .cra_blocksize = 1, |
1362 | .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + | 1419 | .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + |
1363 | AESNI_ALIGN, | 1420 | AESNI_ALIGN, |
@@ -1366,8 +1423,12 @@ static struct crypto_alg aesni_algs[] = { { | |||
1366 | .cra_module = THIS_MODULE, | 1423 | .cra_module = THIS_MODULE, |
1367 | .cra_u = { | 1424 | .cra_u = { |
1368 | .aead = { | 1425 | .aead = { |
1369 | .encrypt = __driver_rfc4106_encrypt, | 1426 | .setkey = common_rfc4106_set_key, |
1370 | .decrypt = __driver_rfc4106_decrypt, | 1427 | .setauthsize = common_rfc4106_set_authsize, |
1428 | .encrypt = helper_rfc4106_encrypt, | ||
1429 | .decrypt = helper_rfc4106_decrypt, | ||
1430 | .ivsize = 8, | ||
1431 | .maxauthsize = 16, | ||
1371 | }, | 1432 | }, |
1372 | }, | 1433 | }, |
1373 | }, { | 1434 | }, { |
@@ -1423,7 +1484,8 @@ static struct crypto_alg aesni_algs[] = { { | |||
1423 | .cra_name = "__lrw-aes-aesni", | 1484 | .cra_name = "__lrw-aes-aesni", |
1424 | .cra_driver_name = "__driver-lrw-aes-aesni", | 1485 | .cra_driver_name = "__driver-lrw-aes-aesni", |
1425 | .cra_priority = 0, | 1486 | .cra_priority = 0, |
1426 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 1487 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
1488 | CRYPTO_ALG_INTERNAL, | ||
1427 | .cra_blocksize = AES_BLOCK_SIZE, | 1489 | .cra_blocksize = AES_BLOCK_SIZE, |
1428 | .cra_ctxsize = sizeof(struct aesni_lrw_ctx), | 1490 | .cra_ctxsize = sizeof(struct aesni_lrw_ctx), |
1429 | .cra_alignmask = 0, | 1491 | .cra_alignmask = 0, |
@@ -1444,7 +1506,8 @@ static struct crypto_alg aesni_algs[] = { { | |||
1444 | .cra_name = "__xts-aes-aesni", | 1506 | .cra_name = "__xts-aes-aesni", |
1445 | .cra_driver_name = "__driver-xts-aes-aesni", | 1507 | .cra_driver_name = "__driver-xts-aes-aesni", |
1446 | .cra_priority = 0, | 1508 | .cra_priority = 0, |
1447 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 1509 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
1510 | CRYPTO_ALG_INTERNAL, | ||
1448 | .cra_blocksize = AES_BLOCK_SIZE, | 1511 | .cra_blocksize = AES_BLOCK_SIZE, |
1449 | .cra_ctxsize = sizeof(struct aesni_xts_ctx), | 1512 | .cra_ctxsize = sizeof(struct aesni_xts_ctx), |
1450 | .cra_alignmask = 0, | 1513 | .cra_alignmask = 0, |
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index 9a07fafe3831..baf0ac21ace5 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c | |||
@@ -343,7 +343,8 @@ static struct crypto_alg cmll_algs[10] = { { | |||
343 | .cra_name = "__ecb-camellia-aesni-avx2", | 343 | .cra_name = "__ecb-camellia-aesni-avx2", |
344 | .cra_driver_name = "__driver-ecb-camellia-aesni-avx2", | 344 | .cra_driver_name = "__driver-ecb-camellia-aesni-avx2", |
345 | .cra_priority = 0, | 345 | .cra_priority = 0, |
346 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 346 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
347 | CRYPTO_ALG_INTERNAL, | ||
347 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | 348 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, |
348 | .cra_ctxsize = sizeof(struct camellia_ctx), | 349 | .cra_ctxsize = sizeof(struct camellia_ctx), |
349 | .cra_alignmask = 0, | 350 | .cra_alignmask = 0, |
@@ -362,7 +363,8 @@ static struct crypto_alg cmll_algs[10] = { { | |||
362 | .cra_name = "__cbc-camellia-aesni-avx2", | 363 | .cra_name = "__cbc-camellia-aesni-avx2", |
363 | .cra_driver_name = "__driver-cbc-camellia-aesni-avx2", | 364 | .cra_driver_name = "__driver-cbc-camellia-aesni-avx2", |
364 | .cra_priority = 0, | 365 | .cra_priority = 0, |
365 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 366 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
367 | CRYPTO_ALG_INTERNAL, | ||
366 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | 368 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, |
367 | .cra_ctxsize = sizeof(struct camellia_ctx), | 369 | .cra_ctxsize = sizeof(struct camellia_ctx), |
368 | .cra_alignmask = 0, | 370 | .cra_alignmask = 0, |
@@ -381,7 +383,8 @@ static struct crypto_alg cmll_algs[10] = { { | |||
381 | .cra_name = "__ctr-camellia-aesni-avx2", | 383 | .cra_name = "__ctr-camellia-aesni-avx2", |
382 | .cra_driver_name = "__driver-ctr-camellia-aesni-avx2", | 384 | .cra_driver_name = "__driver-ctr-camellia-aesni-avx2", |
383 | .cra_priority = 0, | 385 | .cra_priority = 0, |
384 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 386 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
387 | CRYPTO_ALG_INTERNAL, | ||
385 | .cra_blocksize = 1, | 388 | .cra_blocksize = 1, |
386 | .cra_ctxsize = sizeof(struct camellia_ctx), | 389 | .cra_ctxsize = sizeof(struct camellia_ctx), |
387 | .cra_alignmask = 0, | 390 | .cra_alignmask = 0, |
@@ -401,7 +404,8 @@ static struct crypto_alg cmll_algs[10] = { { | |||
401 | .cra_name = "__lrw-camellia-aesni-avx2", | 404 | .cra_name = "__lrw-camellia-aesni-avx2", |
402 | .cra_driver_name = "__driver-lrw-camellia-aesni-avx2", | 405 | .cra_driver_name = "__driver-lrw-camellia-aesni-avx2", |
403 | .cra_priority = 0, | 406 | .cra_priority = 0, |
404 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 407 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
408 | CRYPTO_ALG_INTERNAL, | ||
405 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | 409 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, |
406 | .cra_ctxsize = sizeof(struct camellia_lrw_ctx), | 410 | .cra_ctxsize = sizeof(struct camellia_lrw_ctx), |
407 | .cra_alignmask = 0, | 411 | .cra_alignmask = 0, |
@@ -424,7 +428,8 @@ static struct crypto_alg cmll_algs[10] = { { | |||
424 | .cra_name = "__xts-camellia-aesni-avx2", | 428 | .cra_name = "__xts-camellia-aesni-avx2", |
425 | .cra_driver_name = "__driver-xts-camellia-aesni-avx2", | 429 | .cra_driver_name = "__driver-xts-camellia-aesni-avx2", |
426 | .cra_priority = 0, | 430 | .cra_priority = 0, |
427 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 431 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
432 | CRYPTO_ALG_INTERNAL, | ||
428 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | 433 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, |
429 | .cra_ctxsize = sizeof(struct camellia_xts_ctx), | 434 | .cra_ctxsize = sizeof(struct camellia_xts_ctx), |
430 | .cra_alignmask = 0, | 435 | .cra_alignmask = 0, |
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index ed38d959add6..78818a1e73e3 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c | |||
@@ -335,7 +335,8 @@ static struct crypto_alg cmll_algs[10] = { { | |||
335 | .cra_name = "__ecb-camellia-aesni", | 335 | .cra_name = "__ecb-camellia-aesni", |
336 | .cra_driver_name = "__driver-ecb-camellia-aesni", | 336 | .cra_driver_name = "__driver-ecb-camellia-aesni", |
337 | .cra_priority = 0, | 337 | .cra_priority = 0, |
338 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 338 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
339 | CRYPTO_ALG_INTERNAL, | ||
339 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | 340 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, |
340 | .cra_ctxsize = sizeof(struct camellia_ctx), | 341 | .cra_ctxsize = sizeof(struct camellia_ctx), |
341 | .cra_alignmask = 0, | 342 | .cra_alignmask = 0, |
@@ -354,7 +355,8 @@ static struct crypto_alg cmll_algs[10] = { { | |||
354 | .cra_name = "__cbc-camellia-aesni", | 355 | .cra_name = "__cbc-camellia-aesni", |
355 | .cra_driver_name = "__driver-cbc-camellia-aesni", | 356 | .cra_driver_name = "__driver-cbc-camellia-aesni", |
356 | .cra_priority = 0, | 357 | .cra_priority = 0, |
357 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 358 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
359 | CRYPTO_ALG_INTERNAL, | ||
358 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | 360 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, |
359 | .cra_ctxsize = sizeof(struct camellia_ctx), | 361 | .cra_ctxsize = sizeof(struct camellia_ctx), |
360 | .cra_alignmask = 0, | 362 | .cra_alignmask = 0, |
@@ -373,7 +375,8 @@ static struct crypto_alg cmll_algs[10] = { { | |||
373 | .cra_name = "__ctr-camellia-aesni", | 375 | .cra_name = "__ctr-camellia-aesni", |
374 | .cra_driver_name = "__driver-ctr-camellia-aesni", | 376 | .cra_driver_name = "__driver-ctr-camellia-aesni", |
375 | .cra_priority = 0, | 377 | .cra_priority = 0, |
376 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 378 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
379 | CRYPTO_ALG_INTERNAL, | ||
377 | .cra_blocksize = 1, | 380 | .cra_blocksize = 1, |
378 | .cra_ctxsize = sizeof(struct camellia_ctx), | 381 | .cra_ctxsize = sizeof(struct camellia_ctx), |
379 | .cra_alignmask = 0, | 382 | .cra_alignmask = 0, |
@@ -393,7 +396,8 @@ static struct crypto_alg cmll_algs[10] = { { | |||
393 | .cra_name = "__lrw-camellia-aesni", | 396 | .cra_name = "__lrw-camellia-aesni", |
394 | .cra_driver_name = "__driver-lrw-camellia-aesni", | 397 | .cra_driver_name = "__driver-lrw-camellia-aesni", |
395 | .cra_priority = 0, | 398 | .cra_priority = 0, |
396 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 399 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
400 | CRYPTO_ALG_INTERNAL, | ||
397 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | 401 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, |
398 | .cra_ctxsize = sizeof(struct camellia_lrw_ctx), | 402 | .cra_ctxsize = sizeof(struct camellia_lrw_ctx), |
399 | .cra_alignmask = 0, | 403 | .cra_alignmask = 0, |
@@ -416,7 +420,8 @@ static struct crypto_alg cmll_algs[10] = { { | |||
416 | .cra_name = "__xts-camellia-aesni", | 420 | .cra_name = "__xts-camellia-aesni", |
417 | .cra_driver_name = "__driver-xts-camellia-aesni", | 421 | .cra_driver_name = "__driver-xts-camellia-aesni", |
418 | .cra_priority = 0, | 422 | .cra_priority = 0, |
419 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 423 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
424 | CRYPTO_ALG_INTERNAL, | ||
420 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | 425 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, |
421 | .cra_ctxsize = sizeof(struct camellia_xts_ctx), | 426 | .cra_ctxsize = sizeof(struct camellia_xts_ctx), |
422 | .cra_alignmask = 0, | 427 | .cra_alignmask = 0, |
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c index 60ada677a928..236c80974457 100644 --- a/arch/x86/crypto/cast5_avx_glue.c +++ b/arch/x86/crypto/cast5_avx_glue.c | |||
@@ -341,7 +341,8 @@ static struct crypto_alg cast5_algs[6] = { { | |||
341 | .cra_name = "__ecb-cast5-avx", | 341 | .cra_name = "__ecb-cast5-avx", |
342 | .cra_driver_name = "__driver-ecb-cast5-avx", | 342 | .cra_driver_name = "__driver-ecb-cast5-avx", |
343 | .cra_priority = 0, | 343 | .cra_priority = 0, |
344 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 344 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
345 | CRYPTO_ALG_INTERNAL, | ||
345 | .cra_blocksize = CAST5_BLOCK_SIZE, | 346 | .cra_blocksize = CAST5_BLOCK_SIZE, |
346 | .cra_ctxsize = sizeof(struct cast5_ctx), | 347 | .cra_ctxsize = sizeof(struct cast5_ctx), |
347 | .cra_alignmask = 0, | 348 | .cra_alignmask = 0, |
@@ -360,7 +361,8 @@ static struct crypto_alg cast5_algs[6] = { { | |||
360 | .cra_name = "__cbc-cast5-avx", | 361 | .cra_name = "__cbc-cast5-avx", |
361 | .cra_driver_name = "__driver-cbc-cast5-avx", | 362 | .cra_driver_name = "__driver-cbc-cast5-avx", |
362 | .cra_priority = 0, | 363 | .cra_priority = 0, |
363 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 364 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
365 | CRYPTO_ALG_INTERNAL, | ||
364 | .cra_blocksize = CAST5_BLOCK_SIZE, | 366 | .cra_blocksize = CAST5_BLOCK_SIZE, |
365 | .cra_ctxsize = sizeof(struct cast5_ctx), | 367 | .cra_ctxsize = sizeof(struct cast5_ctx), |
366 | .cra_alignmask = 0, | 368 | .cra_alignmask = 0, |
@@ -379,7 +381,8 @@ static struct crypto_alg cast5_algs[6] = { { | |||
379 | .cra_name = "__ctr-cast5-avx", | 381 | .cra_name = "__ctr-cast5-avx", |
380 | .cra_driver_name = "__driver-ctr-cast5-avx", | 382 | .cra_driver_name = "__driver-ctr-cast5-avx", |
381 | .cra_priority = 0, | 383 | .cra_priority = 0, |
382 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 384 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
385 | CRYPTO_ALG_INTERNAL, | ||
383 | .cra_blocksize = 1, | 386 | .cra_blocksize = 1, |
384 | .cra_ctxsize = sizeof(struct cast5_ctx), | 387 | .cra_ctxsize = sizeof(struct cast5_ctx), |
385 | .cra_alignmask = 0, | 388 | .cra_alignmask = 0, |
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c index 0160f68a57ff..f448810ca4ac 100644 --- a/arch/x86/crypto/cast6_avx_glue.c +++ b/arch/x86/crypto/cast6_avx_glue.c | |||
@@ -372,7 +372,8 @@ static struct crypto_alg cast6_algs[10] = { { | |||
372 | .cra_name = "__ecb-cast6-avx", | 372 | .cra_name = "__ecb-cast6-avx", |
373 | .cra_driver_name = "__driver-ecb-cast6-avx", | 373 | .cra_driver_name = "__driver-ecb-cast6-avx", |
374 | .cra_priority = 0, | 374 | .cra_priority = 0, |
375 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 375 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
376 | CRYPTO_ALG_INTERNAL, | ||
376 | .cra_blocksize = CAST6_BLOCK_SIZE, | 377 | .cra_blocksize = CAST6_BLOCK_SIZE, |
377 | .cra_ctxsize = sizeof(struct cast6_ctx), | 378 | .cra_ctxsize = sizeof(struct cast6_ctx), |
378 | .cra_alignmask = 0, | 379 | .cra_alignmask = 0, |
@@ -391,7 +392,8 @@ static struct crypto_alg cast6_algs[10] = { { | |||
391 | .cra_name = "__cbc-cast6-avx", | 392 | .cra_name = "__cbc-cast6-avx", |
392 | .cra_driver_name = "__driver-cbc-cast6-avx", | 393 | .cra_driver_name = "__driver-cbc-cast6-avx", |
393 | .cra_priority = 0, | 394 | .cra_priority = 0, |
394 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 395 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
396 | CRYPTO_ALG_INTERNAL, | ||
395 | .cra_blocksize = CAST6_BLOCK_SIZE, | 397 | .cra_blocksize = CAST6_BLOCK_SIZE, |
396 | .cra_ctxsize = sizeof(struct cast6_ctx), | 398 | .cra_ctxsize = sizeof(struct cast6_ctx), |
397 | .cra_alignmask = 0, | 399 | .cra_alignmask = 0, |
@@ -410,7 +412,8 @@ static struct crypto_alg cast6_algs[10] = { { | |||
410 | .cra_name = "__ctr-cast6-avx", | 412 | .cra_name = "__ctr-cast6-avx", |
411 | .cra_driver_name = "__driver-ctr-cast6-avx", | 413 | .cra_driver_name = "__driver-ctr-cast6-avx", |
412 | .cra_priority = 0, | 414 | .cra_priority = 0, |
413 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 415 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
416 | CRYPTO_ALG_INTERNAL, | ||
414 | .cra_blocksize = 1, | 417 | .cra_blocksize = 1, |
415 | .cra_ctxsize = sizeof(struct cast6_ctx), | 418 | .cra_ctxsize = sizeof(struct cast6_ctx), |
416 | .cra_alignmask = 0, | 419 | .cra_alignmask = 0, |
@@ -430,7 +433,8 @@ static struct crypto_alg cast6_algs[10] = { { | |||
430 | .cra_name = "__lrw-cast6-avx", | 433 | .cra_name = "__lrw-cast6-avx", |
431 | .cra_driver_name = "__driver-lrw-cast6-avx", | 434 | .cra_driver_name = "__driver-lrw-cast6-avx", |
432 | .cra_priority = 0, | 435 | .cra_priority = 0, |
433 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 436 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
437 | CRYPTO_ALG_INTERNAL, | ||
434 | .cra_blocksize = CAST6_BLOCK_SIZE, | 438 | .cra_blocksize = CAST6_BLOCK_SIZE, |
435 | .cra_ctxsize = sizeof(struct cast6_lrw_ctx), | 439 | .cra_ctxsize = sizeof(struct cast6_lrw_ctx), |
436 | .cra_alignmask = 0, | 440 | .cra_alignmask = 0, |
@@ -453,7 +457,8 @@ static struct crypto_alg cast6_algs[10] = { { | |||
453 | .cra_name = "__xts-cast6-avx", | 457 | .cra_name = "__xts-cast6-avx", |
454 | .cra_driver_name = "__driver-xts-cast6-avx", | 458 | .cra_driver_name = "__driver-xts-cast6-avx", |
455 | .cra_priority = 0, | 459 | .cra_priority = 0, |
456 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 460 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
461 | CRYPTO_ALG_INTERNAL, | ||
457 | .cra_blocksize = CAST6_BLOCK_SIZE, | 462 | .cra_blocksize = CAST6_BLOCK_SIZE, |
458 | .cra_ctxsize = sizeof(struct cast6_xts_ctx), | 463 | .cra_ctxsize = sizeof(struct cast6_xts_ctx), |
459 | .cra_alignmask = 0, | 464 | .cra_alignmask = 0, |
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 8253d85aa165..2079baf06bdd 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c | |||
@@ -154,7 +154,8 @@ static struct shash_alg ghash_alg = { | |||
154 | .cra_name = "__ghash", | 154 | .cra_name = "__ghash", |
155 | .cra_driver_name = "__ghash-pclmulqdqni", | 155 | .cra_driver_name = "__ghash-pclmulqdqni", |
156 | .cra_priority = 0, | 156 | .cra_priority = 0, |
157 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | 157 | .cra_flags = CRYPTO_ALG_TYPE_SHASH | |
158 | CRYPTO_ALG_INTERNAL, | ||
158 | .cra_blocksize = GHASH_BLOCK_SIZE, | 159 | .cra_blocksize = GHASH_BLOCK_SIZE, |
159 | .cra_ctxsize = sizeof(struct ghash_ctx), | 160 | .cra_ctxsize = sizeof(struct ghash_ctx), |
160 | .cra_module = THIS_MODULE, | 161 | .cra_module = THIS_MODULE, |
@@ -261,7 +262,9 @@ static int ghash_async_init_tfm(struct crypto_tfm *tfm) | |||
261 | struct cryptd_ahash *cryptd_tfm; | 262 | struct cryptd_ahash *cryptd_tfm; |
262 | struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); | 263 | struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); |
263 | 264 | ||
264 | cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni", 0, 0); | 265 | cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni", |
266 | CRYPTO_ALG_INTERNAL, | ||
267 | CRYPTO_ALG_INTERNAL); | ||
265 | if (IS_ERR(cryptd_tfm)) | 268 | if (IS_ERR(cryptd_tfm)) |
266 | return PTR_ERR(cryptd_tfm); | 269 | return PTR_ERR(cryptd_tfm); |
267 | ctx->cryptd_tfm = cryptd_tfm; | 270 | ctx->cryptd_tfm = cryptd_tfm; |
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c index 432f1d76ceb8..6a85598931b5 100644 --- a/arch/x86/crypto/glue_helper.c +++ b/arch/x86/crypto/glue_helper.c | |||
@@ -232,7 +232,6 @@ static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr, | |||
232 | 232 | ||
233 | le128_to_be128((be128 *)walk->iv, &ctrblk); | 233 | le128_to_be128((be128 *)walk->iv, &ctrblk); |
234 | } | 234 | } |
235 | EXPORT_SYMBOL_GPL(glue_ctr_crypt_final_128bit); | ||
236 | 235 | ||
237 | static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, | 236 | static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, |
238 | struct blkcipher_desc *desc, | 237 | struct blkcipher_desc *desc, |
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index 437e47a4d302..2f63dc89e7a9 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c | |||
@@ -309,7 +309,8 @@ static struct crypto_alg srp_algs[10] = { { | |||
309 | .cra_name = "__ecb-serpent-avx2", | 309 | .cra_name = "__ecb-serpent-avx2", |
310 | .cra_driver_name = "__driver-ecb-serpent-avx2", | 310 | .cra_driver_name = "__driver-ecb-serpent-avx2", |
311 | .cra_priority = 0, | 311 | .cra_priority = 0, |
312 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 312 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
313 | CRYPTO_ALG_INTERNAL, | ||
313 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 314 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
314 | .cra_ctxsize = sizeof(struct serpent_ctx), | 315 | .cra_ctxsize = sizeof(struct serpent_ctx), |
315 | .cra_alignmask = 0, | 316 | .cra_alignmask = 0, |
@@ -329,7 +330,8 @@ static struct crypto_alg srp_algs[10] = { { | |||
329 | .cra_name = "__cbc-serpent-avx2", | 330 | .cra_name = "__cbc-serpent-avx2", |
330 | .cra_driver_name = "__driver-cbc-serpent-avx2", | 331 | .cra_driver_name = "__driver-cbc-serpent-avx2", |
331 | .cra_priority = 0, | 332 | .cra_priority = 0, |
332 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 333 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
334 | CRYPTO_ALG_INTERNAL, | ||
333 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 335 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
334 | .cra_ctxsize = sizeof(struct serpent_ctx), | 336 | .cra_ctxsize = sizeof(struct serpent_ctx), |
335 | .cra_alignmask = 0, | 337 | .cra_alignmask = 0, |
@@ -349,7 +351,8 @@ static struct crypto_alg srp_algs[10] = { { | |||
349 | .cra_name = "__ctr-serpent-avx2", | 351 | .cra_name = "__ctr-serpent-avx2", |
350 | .cra_driver_name = "__driver-ctr-serpent-avx2", | 352 | .cra_driver_name = "__driver-ctr-serpent-avx2", |
351 | .cra_priority = 0, | 353 | .cra_priority = 0, |
352 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 354 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
355 | CRYPTO_ALG_INTERNAL, | ||
353 | .cra_blocksize = 1, | 356 | .cra_blocksize = 1, |
354 | .cra_ctxsize = sizeof(struct serpent_ctx), | 357 | .cra_ctxsize = sizeof(struct serpent_ctx), |
355 | .cra_alignmask = 0, | 358 | .cra_alignmask = 0, |
@@ -370,7 +373,8 @@ static struct crypto_alg srp_algs[10] = { { | |||
370 | .cra_name = "__lrw-serpent-avx2", | 373 | .cra_name = "__lrw-serpent-avx2", |
371 | .cra_driver_name = "__driver-lrw-serpent-avx2", | 374 | .cra_driver_name = "__driver-lrw-serpent-avx2", |
372 | .cra_priority = 0, | 375 | .cra_priority = 0, |
373 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 376 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
377 | CRYPTO_ALG_INTERNAL, | ||
374 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 378 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
375 | .cra_ctxsize = sizeof(struct serpent_lrw_ctx), | 379 | .cra_ctxsize = sizeof(struct serpent_lrw_ctx), |
376 | .cra_alignmask = 0, | 380 | .cra_alignmask = 0, |
@@ -394,7 +398,8 @@ static struct crypto_alg srp_algs[10] = { { | |||
394 | .cra_name = "__xts-serpent-avx2", | 398 | .cra_name = "__xts-serpent-avx2", |
395 | .cra_driver_name = "__driver-xts-serpent-avx2", | 399 | .cra_driver_name = "__driver-xts-serpent-avx2", |
396 | .cra_priority = 0, | 400 | .cra_priority = 0, |
397 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 401 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
402 | CRYPTO_ALG_INTERNAL, | ||
398 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 403 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
399 | .cra_ctxsize = sizeof(struct serpent_xts_ctx), | 404 | .cra_ctxsize = sizeof(struct serpent_xts_ctx), |
400 | .cra_alignmask = 0, | 405 | .cra_alignmask = 0, |
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index 7e217398b4eb..c8d478af8456 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c | |||
@@ -378,7 +378,8 @@ static struct crypto_alg serpent_algs[10] = { { | |||
378 | .cra_name = "__ecb-serpent-avx", | 378 | .cra_name = "__ecb-serpent-avx", |
379 | .cra_driver_name = "__driver-ecb-serpent-avx", | 379 | .cra_driver_name = "__driver-ecb-serpent-avx", |
380 | .cra_priority = 0, | 380 | .cra_priority = 0, |
381 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 381 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
382 | CRYPTO_ALG_INTERNAL, | ||
382 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 383 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
383 | .cra_ctxsize = sizeof(struct serpent_ctx), | 384 | .cra_ctxsize = sizeof(struct serpent_ctx), |
384 | .cra_alignmask = 0, | 385 | .cra_alignmask = 0, |
@@ -397,7 +398,8 @@ static struct crypto_alg serpent_algs[10] = { { | |||
397 | .cra_name = "__cbc-serpent-avx", | 398 | .cra_name = "__cbc-serpent-avx", |
398 | .cra_driver_name = "__driver-cbc-serpent-avx", | 399 | .cra_driver_name = "__driver-cbc-serpent-avx", |
399 | .cra_priority = 0, | 400 | .cra_priority = 0, |
400 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 401 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
402 | CRYPTO_ALG_INTERNAL, | ||
401 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 403 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
402 | .cra_ctxsize = sizeof(struct serpent_ctx), | 404 | .cra_ctxsize = sizeof(struct serpent_ctx), |
403 | .cra_alignmask = 0, | 405 | .cra_alignmask = 0, |
@@ -416,7 +418,8 @@ static struct crypto_alg serpent_algs[10] = { { | |||
416 | .cra_name = "__ctr-serpent-avx", | 418 | .cra_name = "__ctr-serpent-avx", |
417 | .cra_driver_name = "__driver-ctr-serpent-avx", | 419 | .cra_driver_name = "__driver-ctr-serpent-avx", |
418 | .cra_priority = 0, | 420 | .cra_priority = 0, |
419 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 421 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
422 | CRYPTO_ALG_INTERNAL, | ||
420 | .cra_blocksize = 1, | 423 | .cra_blocksize = 1, |
421 | .cra_ctxsize = sizeof(struct serpent_ctx), | 424 | .cra_ctxsize = sizeof(struct serpent_ctx), |
422 | .cra_alignmask = 0, | 425 | .cra_alignmask = 0, |
@@ -436,7 +439,8 @@ static struct crypto_alg serpent_algs[10] = { { | |||
436 | .cra_name = "__lrw-serpent-avx", | 439 | .cra_name = "__lrw-serpent-avx", |
437 | .cra_driver_name = "__driver-lrw-serpent-avx", | 440 | .cra_driver_name = "__driver-lrw-serpent-avx", |
438 | .cra_priority = 0, | 441 | .cra_priority = 0, |
439 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 442 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
443 | CRYPTO_ALG_INTERNAL, | ||
440 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 444 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
441 | .cra_ctxsize = sizeof(struct serpent_lrw_ctx), | 445 | .cra_ctxsize = sizeof(struct serpent_lrw_ctx), |
442 | .cra_alignmask = 0, | 446 | .cra_alignmask = 0, |
@@ -459,7 +463,8 @@ static struct crypto_alg serpent_algs[10] = { { | |||
459 | .cra_name = "__xts-serpent-avx", | 463 | .cra_name = "__xts-serpent-avx", |
460 | .cra_driver_name = "__driver-xts-serpent-avx", | 464 | .cra_driver_name = "__driver-xts-serpent-avx", |
461 | .cra_priority = 0, | 465 | .cra_priority = 0, |
462 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 466 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
467 | CRYPTO_ALG_INTERNAL, | ||
463 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 468 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
464 | .cra_ctxsize = sizeof(struct serpent_xts_ctx), | 469 | .cra_ctxsize = sizeof(struct serpent_xts_ctx), |
465 | .cra_alignmask = 0, | 470 | .cra_alignmask = 0, |
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index bf025adaea01..3643dd508f45 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c | |||
@@ -387,7 +387,8 @@ static struct crypto_alg serpent_algs[10] = { { | |||
387 | .cra_name = "__ecb-serpent-sse2", | 387 | .cra_name = "__ecb-serpent-sse2", |
388 | .cra_driver_name = "__driver-ecb-serpent-sse2", | 388 | .cra_driver_name = "__driver-ecb-serpent-sse2", |
389 | .cra_priority = 0, | 389 | .cra_priority = 0, |
390 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 390 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
391 | CRYPTO_ALG_INTERNAL, | ||
391 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 392 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
392 | .cra_ctxsize = sizeof(struct serpent_ctx), | 393 | .cra_ctxsize = sizeof(struct serpent_ctx), |
393 | .cra_alignmask = 0, | 394 | .cra_alignmask = 0, |
@@ -406,7 +407,8 @@ static struct crypto_alg serpent_algs[10] = { { | |||
406 | .cra_name = "__cbc-serpent-sse2", | 407 | .cra_name = "__cbc-serpent-sse2", |
407 | .cra_driver_name = "__driver-cbc-serpent-sse2", | 408 | .cra_driver_name = "__driver-cbc-serpent-sse2", |
408 | .cra_priority = 0, | 409 | .cra_priority = 0, |
409 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 410 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
411 | CRYPTO_ALG_INTERNAL, | ||
410 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 412 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
411 | .cra_ctxsize = sizeof(struct serpent_ctx), | 413 | .cra_ctxsize = sizeof(struct serpent_ctx), |
412 | .cra_alignmask = 0, | 414 | .cra_alignmask = 0, |
@@ -425,7 +427,8 @@ static struct crypto_alg serpent_algs[10] = { { | |||
425 | .cra_name = "__ctr-serpent-sse2", | 427 | .cra_name = "__ctr-serpent-sse2", |
426 | .cra_driver_name = "__driver-ctr-serpent-sse2", | 428 | .cra_driver_name = "__driver-ctr-serpent-sse2", |
427 | .cra_priority = 0, | 429 | .cra_priority = 0, |
428 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 430 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
431 | CRYPTO_ALG_INTERNAL, | ||
429 | .cra_blocksize = 1, | 432 | .cra_blocksize = 1, |
430 | .cra_ctxsize = sizeof(struct serpent_ctx), | 433 | .cra_ctxsize = sizeof(struct serpent_ctx), |
431 | .cra_alignmask = 0, | 434 | .cra_alignmask = 0, |
@@ -445,7 +448,8 @@ static struct crypto_alg serpent_algs[10] = { { | |||
445 | .cra_name = "__lrw-serpent-sse2", | 448 | .cra_name = "__lrw-serpent-sse2", |
446 | .cra_driver_name = "__driver-lrw-serpent-sse2", | 449 | .cra_driver_name = "__driver-lrw-serpent-sse2", |
447 | .cra_priority = 0, | 450 | .cra_priority = 0, |
448 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 451 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
452 | CRYPTO_ALG_INTERNAL, | ||
449 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 453 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
450 | .cra_ctxsize = sizeof(struct serpent_lrw_ctx), | 454 | .cra_ctxsize = sizeof(struct serpent_lrw_ctx), |
451 | .cra_alignmask = 0, | 455 | .cra_alignmask = 0, |
@@ -468,7 +472,8 @@ static struct crypto_alg serpent_algs[10] = { { | |||
468 | .cra_name = "__xts-serpent-sse2", | 472 | .cra_name = "__xts-serpent-sse2", |
469 | .cra_driver_name = "__driver-xts-serpent-sse2", | 473 | .cra_driver_name = "__driver-xts-serpent-sse2", |
470 | .cra_priority = 0, | 474 | .cra_priority = 0, |
471 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 475 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
476 | CRYPTO_ALG_INTERNAL, | ||
472 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 477 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
473 | .cra_ctxsize = sizeof(struct serpent_xts_ctx), | 478 | .cra_ctxsize = sizeof(struct serpent_xts_ctx), |
474 | .cra_alignmask = 0, | 479 | .cra_alignmask = 0, |
diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c index fd9f6b035b16..e510b1c5d690 100644 --- a/arch/x86/crypto/sha-mb/sha1_mb.c +++ b/arch/x86/crypto/sha-mb/sha1_mb.c | |||
@@ -694,7 +694,8 @@ static struct shash_alg sha1_mb_shash_alg = { | |||
694 | * use ASYNC flag as some buffers in multi-buffer | 694 | * use ASYNC flag as some buffers in multi-buffer |
695 | * algo may not have completed before hashing thread sleep | 695 | * algo may not have completed before hashing thread sleep |
696 | */ | 696 | */ |
697 | .cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_ASYNC, | 697 | .cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_ASYNC | |
698 | CRYPTO_ALG_INTERNAL, | ||
698 | .cra_blocksize = SHA1_BLOCK_SIZE, | 699 | .cra_blocksize = SHA1_BLOCK_SIZE, |
699 | .cra_module = THIS_MODULE, | 700 | .cra_module = THIS_MODULE, |
700 | .cra_list = LIST_HEAD_INIT(sha1_mb_shash_alg.base.cra_list), | 701 | .cra_list = LIST_HEAD_INIT(sha1_mb_shash_alg.base.cra_list), |
@@ -770,7 +771,9 @@ static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm) | |||
770 | struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm); | 771 | struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm); |
771 | struct mcryptd_hash_ctx *mctx; | 772 | struct mcryptd_hash_ctx *mctx; |
772 | 773 | ||
773 | mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb", 0, 0); | 774 | mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb", |
775 | CRYPTO_ALG_INTERNAL, | ||
776 | CRYPTO_ALG_INTERNAL); | ||
774 | if (IS_ERR(mcryptd_tfm)) | 777 | if (IS_ERR(mcryptd_tfm)) |
775 | return PTR_ERR(mcryptd_tfm); | 778 | return PTR_ERR(mcryptd_tfm); |
776 | mctx = crypto_ahash_ctx(&mcryptd_tfm->base); | 779 | mctx = crypto_ahash_ctx(&mcryptd_tfm->base); |
@@ -828,7 +831,7 @@ static unsigned long sha1_mb_flusher(struct mcryptd_alg_cstate *cstate) | |||
828 | while (!list_empty(&cstate->work_list)) { | 831 | while (!list_empty(&cstate->work_list)) { |
829 | rctx = list_entry(cstate->work_list.next, | 832 | rctx = list_entry(cstate->work_list.next, |
830 | struct mcryptd_hash_request_ctx, waiter); | 833 | struct mcryptd_hash_request_ctx, waiter); |
831 | if time_before(cur_time, rctx->tag.expire) | 834 | if (time_before(cur_time, rctx->tag.expire)) |
832 | break; | 835 | break; |
833 | kernel_fpu_begin(); | 836 | kernel_fpu_begin(); |
834 | sha_ctx = (struct sha1_hash_ctx *) sha1_ctx_mgr_flush(cstate->mgr); | 837 | sha_ctx = (struct sha1_hash_ctx *) sha1_ctx_mgr_flush(cstate->mgr); |
diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c b/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c index 4ca7e166a2aa..822acb5b464c 100644 --- a/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c +++ b/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c | |||
@@ -56,7 +56,7 @@ | |||
56 | void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state) | 56 | void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state) |
57 | { | 57 | { |
58 | unsigned int j; | 58 | unsigned int j; |
59 | state->unused_lanes = 0xF76543210; | 59 | state->unused_lanes = 0xF76543210ULL; |
60 | for (j = 0; j < 8; j++) { | 60 | for (j = 0; j < 8; j++) { |
61 | state->lens[j] = 0xFFFFFFFF; | 61 | state->lens[j] = 0xFFFFFFFF; |
62 | state->ldata[j].job_in_lane = NULL; | 62 | state->ldata[j].job_in_lane = NULL; |
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 6c20fe04a738..33d1b9dc14cc 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c | |||
@@ -28,7 +28,7 @@ | |||
28 | #include <linux/cryptohash.h> | 28 | #include <linux/cryptohash.h> |
29 | #include <linux/types.h> | 29 | #include <linux/types.h> |
30 | #include <crypto/sha.h> | 30 | #include <crypto/sha.h> |
31 | #include <asm/byteorder.h> | 31 | #include <crypto/sha1_base.h> |
32 | #include <asm/i387.h> | 32 | #include <asm/i387.h> |
33 | #include <asm/xcr.h> | 33 | #include <asm/xcr.h> |
34 | #include <asm/xsave.h> | 34 | #include <asm/xsave.h> |
@@ -44,132 +44,51 @@ asmlinkage void sha1_transform_avx(u32 *digest, const char *data, | |||
44 | #define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */ | 44 | #define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */ |
45 | 45 | ||
46 | asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, | 46 | asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, |
47 | unsigned int rounds); | 47 | unsigned int rounds); |
48 | #endif | 48 | #endif |
49 | 49 | ||
50 | static asmlinkage void (*sha1_transform_asm)(u32 *, const char *, unsigned int); | 50 | static void (*sha1_transform_asm)(u32 *, const char *, unsigned int); |
51 | |||
52 | |||
53 | static int sha1_ssse3_init(struct shash_desc *desc) | ||
54 | { | ||
55 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
56 | |||
57 | *sctx = (struct sha1_state){ | ||
58 | .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, | ||
59 | }; | ||
60 | |||
61 | return 0; | ||
62 | } | ||
63 | |||
64 | static int __sha1_ssse3_update(struct shash_desc *desc, const u8 *data, | ||
65 | unsigned int len, unsigned int partial) | ||
66 | { | ||
67 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
68 | unsigned int done = 0; | ||
69 | |||
70 | sctx->count += len; | ||
71 | |||
72 | if (partial) { | ||
73 | done = SHA1_BLOCK_SIZE - partial; | ||
74 | memcpy(sctx->buffer + partial, data, done); | ||
75 | sha1_transform_asm(sctx->state, sctx->buffer, 1); | ||
76 | } | ||
77 | |||
78 | if (len - done >= SHA1_BLOCK_SIZE) { | ||
79 | const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE; | ||
80 | |||
81 | sha1_transform_asm(sctx->state, data + done, rounds); | ||
82 | done += rounds * SHA1_BLOCK_SIZE; | ||
83 | } | ||
84 | |||
85 | memcpy(sctx->buffer, data + done, len - done); | ||
86 | |||
87 | return 0; | ||
88 | } | ||
89 | 51 | ||
90 | static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data, | 52 | static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data, |
91 | unsigned int len) | 53 | unsigned int len) |
92 | { | 54 | { |
93 | struct sha1_state *sctx = shash_desc_ctx(desc); | 55 | struct sha1_state *sctx = shash_desc_ctx(desc); |
94 | unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; | ||
95 | int res; | ||
96 | 56 | ||
97 | /* Handle the fast case right here */ | 57 | if (!irq_fpu_usable() || |
98 | if (partial + len < SHA1_BLOCK_SIZE) { | 58 | (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE) |
99 | sctx->count += len; | 59 | return crypto_sha1_update(desc, data, len); |
100 | memcpy(sctx->buffer + partial, data, len); | ||
101 | 60 | ||
102 | return 0; | 61 | /* make sure casting to sha1_block_fn() is safe */ |
103 | } | 62 | BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0); |
104 | 63 | ||
105 | if (!irq_fpu_usable()) { | 64 | kernel_fpu_begin(); |
106 | res = crypto_sha1_update(desc, data, len); | 65 | sha1_base_do_update(desc, data, len, |
107 | } else { | 66 | (sha1_block_fn *)sha1_transform_asm); |
108 | kernel_fpu_begin(); | 67 | kernel_fpu_end(); |
109 | res = __sha1_ssse3_update(desc, data, len, partial); | ||
110 | kernel_fpu_end(); | ||
111 | } | ||
112 | |||
113 | return res; | ||
114 | } | ||
115 | |||
116 | |||
117 | /* Add padding and return the message digest. */ | ||
118 | static int sha1_ssse3_final(struct shash_desc *desc, u8 *out) | ||
119 | { | ||
120 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
121 | unsigned int i, index, padlen; | ||
122 | __be32 *dst = (__be32 *)out; | ||
123 | __be64 bits; | ||
124 | static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; | ||
125 | |||
126 | bits = cpu_to_be64(sctx->count << 3); | ||
127 | |||
128 | /* Pad out to 56 mod 64 and append length */ | ||
129 | index = sctx->count % SHA1_BLOCK_SIZE; | ||
130 | padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index); | ||
131 | if (!irq_fpu_usable()) { | ||
132 | crypto_sha1_update(desc, padding, padlen); | ||
133 | crypto_sha1_update(desc, (const u8 *)&bits, sizeof(bits)); | ||
134 | } else { | ||
135 | kernel_fpu_begin(); | ||
136 | /* We need to fill a whole block for __sha1_ssse3_update() */ | ||
137 | if (padlen <= 56) { | ||
138 | sctx->count += padlen; | ||
139 | memcpy(sctx->buffer + index, padding, padlen); | ||
140 | } else { | ||
141 | __sha1_ssse3_update(desc, padding, padlen, index); | ||
142 | } | ||
143 | __sha1_ssse3_update(desc, (const u8 *)&bits, sizeof(bits), 56); | ||
144 | kernel_fpu_end(); | ||
145 | } | ||
146 | |||
147 | /* Store state in digest */ | ||
148 | for (i = 0; i < 5; i++) | ||
149 | dst[i] = cpu_to_be32(sctx->state[i]); | ||
150 | |||
151 | /* Wipe context */ | ||
152 | memset(sctx, 0, sizeof(*sctx)); | ||
153 | 68 | ||
154 | return 0; | 69 | return 0; |
155 | } | 70 | } |
156 | 71 | ||
157 | static int sha1_ssse3_export(struct shash_desc *desc, void *out) | 72 | static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data, |
73 | unsigned int len, u8 *out) | ||
158 | { | 74 | { |
159 | struct sha1_state *sctx = shash_desc_ctx(desc); | 75 | if (!irq_fpu_usable()) |
76 | return crypto_sha1_finup(desc, data, len, out); | ||
160 | 77 | ||
161 | memcpy(out, sctx, sizeof(*sctx)); | 78 | kernel_fpu_begin(); |
79 | if (len) | ||
80 | sha1_base_do_update(desc, data, len, | ||
81 | (sha1_block_fn *)sha1_transform_asm); | ||
82 | sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_transform_asm); | ||
83 | kernel_fpu_end(); | ||
162 | 84 | ||
163 | return 0; | 85 | return sha1_base_finish(desc, out); |
164 | } | 86 | } |
165 | 87 | ||
166 | static int sha1_ssse3_import(struct shash_desc *desc, const void *in) | 88 | /* Add padding and return the message digest. */ |
89 | static int sha1_ssse3_final(struct shash_desc *desc, u8 *out) | ||
167 | { | 90 | { |
168 | struct sha1_state *sctx = shash_desc_ctx(desc); | 91 | return sha1_ssse3_finup(desc, NULL, 0, out); |
169 | |||
170 | memcpy(sctx, in, sizeof(*sctx)); | ||
171 | |||
172 | return 0; | ||
173 | } | 92 | } |
174 | 93 | ||
175 | #ifdef CONFIG_AS_AVX2 | 94 | #ifdef CONFIG_AS_AVX2 |
@@ -186,13 +105,11 @@ static void sha1_apply_transform_avx2(u32 *digest, const char *data, | |||
186 | 105 | ||
187 | static struct shash_alg alg = { | 106 | static struct shash_alg alg = { |
188 | .digestsize = SHA1_DIGEST_SIZE, | 107 | .digestsize = SHA1_DIGEST_SIZE, |
189 | .init = sha1_ssse3_init, | 108 | .init = sha1_base_init, |
190 | .update = sha1_ssse3_update, | 109 | .update = sha1_ssse3_update, |
191 | .final = sha1_ssse3_final, | 110 | .final = sha1_ssse3_final, |
192 | .export = sha1_ssse3_export, | 111 | .finup = sha1_ssse3_finup, |
193 | .import = sha1_ssse3_import, | ||
194 | .descsize = sizeof(struct sha1_state), | 112 | .descsize = sizeof(struct sha1_state), |
195 | .statesize = sizeof(struct sha1_state), | ||
196 | .base = { | 113 | .base = { |
197 | .cra_name = "sha1", | 114 | .cra_name = "sha1", |
198 | .cra_driver_name= "sha1-ssse3", | 115 | .cra_driver_name= "sha1-ssse3", |
diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S index 642f15687a0a..92b3b5d75ba9 100644 --- a/arch/x86/crypto/sha256-avx-asm.S +++ b/arch/x86/crypto/sha256-avx-asm.S | |||
@@ -96,10 +96,10 @@ SHUF_DC00 = %xmm12 # shuffle xDxC -> DC00 | |||
96 | BYTE_FLIP_MASK = %xmm13 | 96 | BYTE_FLIP_MASK = %xmm13 |
97 | 97 | ||
98 | NUM_BLKS = %rdx # 3rd arg | 98 | NUM_BLKS = %rdx # 3rd arg |
99 | CTX = %rsi # 2nd arg | 99 | INP = %rsi # 2nd arg |
100 | INP = %rdi # 1st arg | 100 | CTX = %rdi # 1st arg |
101 | 101 | ||
102 | SRND = %rdi # clobbers INP | 102 | SRND = %rsi # clobbers INP |
103 | c = %ecx | 103 | c = %ecx |
104 | d = %r8d | 104 | d = %r8d |
105 | e = %edx | 105 | e = %edx |
@@ -342,8 +342,8 @@ a = TMP_ | |||
342 | 342 | ||
343 | ######################################################################## | 343 | ######################################################################## |
344 | ## void sha256_transform_avx(void *input_data, UINT32 digest[8], UINT64 num_blks) | 344 | ## void sha256_transform_avx(void *input_data, UINT32 digest[8], UINT64 num_blks) |
345 | ## arg 1 : pointer to input data | 345 | ## arg 1 : pointer to digest |
346 | ## arg 2 : pointer to digest | 346 | ## arg 2 : pointer to input data |
347 | ## arg 3 : Num blocks | 347 | ## arg 3 : Num blocks |
348 | ######################################################################## | 348 | ######################################################################## |
349 | .text | 349 | .text |
diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S index 9e86944c539d..570ec5ec62d7 100644 --- a/arch/x86/crypto/sha256-avx2-asm.S +++ b/arch/x86/crypto/sha256-avx2-asm.S | |||
@@ -91,12 +91,12 @@ BYTE_FLIP_MASK = %ymm13 | |||
91 | X_BYTE_FLIP_MASK = %xmm13 # XMM version of BYTE_FLIP_MASK | 91 | X_BYTE_FLIP_MASK = %xmm13 # XMM version of BYTE_FLIP_MASK |
92 | 92 | ||
93 | NUM_BLKS = %rdx # 3rd arg | 93 | NUM_BLKS = %rdx # 3rd arg |
94 | CTX = %rsi # 2nd arg | 94 | INP = %rsi # 2nd arg |
95 | INP = %rdi # 1st arg | 95 | CTX = %rdi # 1st arg |
96 | c = %ecx | 96 | c = %ecx |
97 | d = %r8d | 97 | d = %r8d |
98 | e = %edx # clobbers NUM_BLKS | 98 | e = %edx # clobbers NUM_BLKS |
99 | y3 = %edi # clobbers INP | 99 | y3 = %esi # clobbers INP |
100 | 100 | ||
101 | 101 | ||
102 | TBL = %rbp | 102 | TBL = %rbp |
@@ -523,8 +523,8 @@ STACK_SIZE = _RSP + _RSP_SIZE | |||
523 | 523 | ||
524 | ######################################################################## | 524 | ######################################################################## |
525 | ## void sha256_transform_rorx(void *input_data, UINT32 digest[8], UINT64 num_blks) | 525 | ## void sha256_transform_rorx(void *input_data, UINT32 digest[8], UINT64 num_blks) |
526 | ## arg 1 : pointer to input data | 526 | ## arg 1 : pointer to digest |
527 | ## arg 2 : pointer to digest | 527 | ## arg 2 : pointer to input data |
528 | ## arg 3 : Num blocks | 528 | ## arg 3 : Num blocks |
529 | ######################################################################## | 529 | ######################################################################## |
530 | .text | 530 | .text |
diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S index f833b74d902b..2cedc44e8121 100644 --- a/arch/x86/crypto/sha256-ssse3-asm.S +++ b/arch/x86/crypto/sha256-ssse3-asm.S | |||
@@ -88,10 +88,10 @@ SHUF_DC00 = %xmm11 # shuffle xDxC -> DC00 | |||
88 | BYTE_FLIP_MASK = %xmm12 | 88 | BYTE_FLIP_MASK = %xmm12 |
89 | 89 | ||
90 | NUM_BLKS = %rdx # 3rd arg | 90 | NUM_BLKS = %rdx # 3rd arg |
91 | CTX = %rsi # 2nd arg | 91 | INP = %rsi # 2nd arg |
92 | INP = %rdi # 1st arg | 92 | CTX = %rdi # 1st arg |
93 | 93 | ||
94 | SRND = %rdi # clobbers INP | 94 | SRND = %rsi # clobbers INP |
95 | c = %ecx | 95 | c = %ecx |
96 | d = %r8d | 96 | d = %r8d |
97 | e = %edx | 97 | e = %edx |
@@ -348,8 +348,8 @@ a = TMP_ | |||
348 | 348 | ||
349 | ######################################################################## | 349 | ######################################################################## |
350 | ## void sha256_transform_ssse3(void *input_data, UINT32 digest[8], UINT64 num_blks) | 350 | ## void sha256_transform_ssse3(void *input_data, UINT32 digest[8], UINT64 num_blks) |
351 | ## arg 1 : pointer to input data | 351 | ## arg 1 : pointer to digest |
352 | ## arg 2 : pointer to digest | 352 | ## arg 2 : pointer to input data |
353 | ## arg 3 : Num blocks | 353 | ## arg 3 : Num blocks |
354 | ######################################################################## | 354 | ######################################################################## |
355 | .text | 355 | .text |
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index 8fad72f4dfd2..ccc338881ee8 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c | |||
@@ -36,195 +36,74 @@ | |||
36 | #include <linux/cryptohash.h> | 36 | #include <linux/cryptohash.h> |
37 | #include <linux/types.h> | 37 | #include <linux/types.h> |
38 | #include <crypto/sha.h> | 38 | #include <crypto/sha.h> |
39 | #include <asm/byteorder.h> | 39 | #include <crypto/sha256_base.h> |
40 | #include <asm/i387.h> | 40 | #include <asm/i387.h> |
41 | #include <asm/xcr.h> | 41 | #include <asm/xcr.h> |
42 | #include <asm/xsave.h> | 42 | #include <asm/xsave.h> |
43 | #include <linux/string.h> | 43 | #include <linux/string.h> |
44 | 44 | ||
45 | asmlinkage void sha256_transform_ssse3(const char *data, u32 *digest, | 45 | asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data, |
46 | u64 rounds); | 46 | u64 rounds); |
47 | #ifdef CONFIG_AS_AVX | 47 | #ifdef CONFIG_AS_AVX |
48 | asmlinkage void sha256_transform_avx(const char *data, u32 *digest, | 48 | asmlinkage void sha256_transform_avx(u32 *digest, const char *data, |
49 | u64 rounds); | 49 | u64 rounds); |
50 | #endif | 50 | #endif |
51 | #ifdef CONFIG_AS_AVX2 | 51 | #ifdef CONFIG_AS_AVX2 |
52 | asmlinkage void sha256_transform_rorx(const char *data, u32 *digest, | 52 | asmlinkage void sha256_transform_rorx(u32 *digest, const char *data, |
53 | u64 rounds); | 53 | u64 rounds); |
54 | #endif | 54 | #endif |
55 | 55 | ||
56 | static asmlinkage void (*sha256_transform_asm)(const char *, u32 *, u64); | 56 | static void (*sha256_transform_asm)(u32 *, const char *, u64); |
57 | |||
58 | |||
59 | static int sha256_ssse3_init(struct shash_desc *desc) | ||
60 | { | ||
61 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
62 | |||
63 | sctx->state[0] = SHA256_H0; | ||
64 | sctx->state[1] = SHA256_H1; | ||
65 | sctx->state[2] = SHA256_H2; | ||
66 | sctx->state[3] = SHA256_H3; | ||
67 | sctx->state[4] = SHA256_H4; | ||
68 | sctx->state[5] = SHA256_H5; | ||
69 | sctx->state[6] = SHA256_H6; | ||
70 | sctx->state[7] = SHA256_H7; | ||
71 | sctx->count = 0; | ||
72 | |||
73 | return 0; | ||
74 | } | ||
75 | |||
76 | static int __sha256_ssse3_update(struct shash_desc *desc, const u8 *data, | ||
77 | unsigned int len, unsigned int partial) | ||
78 | { | ||
79 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
80 | unsigned int done = 0; | ||
81 | |||
82 | sctx->count += len; | ||
83 | |||
84 | if (partial) { | ||
85 | done = SHA256_BLOCK_SIZE - partial; | ||
86 | memcpy(sctx->buf + partial, data, done); | ||
87 | sha256_transform_asm(sctx->buf, sctx->state, 1); | ||
88 | } | ||
89 | |||
90 | if (len - done >= SHA256_BLOCK_SIZE) { | ||
91 | const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE; | ||
92 | |||
93 | sha256_transform_asm(data + done, sctx->state, (u64) rounds); | ||
94 | |||
95 | done += rounds * SHA256_BLOCK_SIZE; | ||
96 | } | ||
97 | |||
98 | memcpy(sctx->buf, data + done, len - done); | ||
99 | |||
100 | return 0; | ||
101 | } | ||
102 | 57 | ||
103 | static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data, | 58 | static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data, |
104 | unsigned int len) | 59 | unsigned int len) |
105 | { | 60 | { |
106 | struct sha256_state *sctx = shash_desc_ctx(desc); | 61 | struct sha256_state *sctx = shash_desc_ctx(desc); |
107 | unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; | ||
108 | int res; | ||
109 | 62 | ||
110 | /* Handle the fast case right here */ | 63 | if (!irq_fpu_usable() || |
111 | if (partial + len < SHA256_BLOCK_SIZE) { | 64 | (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE) |
112 | sctx->count += len; | 65 | return crypto_sha256_update(desc, data, len); |
113 | memcpy(sctx->buf + partial, data, len); | ||
114 | 66 | ||
115 | return 0; | 67 | /* make sure casting to sha256_block_fn() is safe */ |
116 | } | 68 | BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0); |
117 | |||
118 | if (!irq_fpu_usable()) { | ||
119 | res = crypto_sha256_update(desc, data, len); | ||
120 | } else { | ||
121 | kernel_fpu_begin(); | ||
122 | res = __sha256_ssse3_update(desc, data, len, partial); | ||
123 | kernel_fpu_end(); | ||
124 | } | ||
125 | |||
126 | return res; | ||
127 | } | ||
128 | 69 | ||
129 | 70 | kernel_fpu_begin(); | |
130 | /* Add padding and return the message digest. */ | 71 | sha256_base_do_update(desc, data, len, |
131 | static int sha256_ssse3_final(struct shash_desc *desc, u8 *out) | 72 | (sha256_block_fn *)sha256_transform_asm); |
132 | { | 73 | kernel_fpu_end(); |
133 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
134 | unsigned int i, index, padlen; | ||
135 | __be32 *dst = (__be32 *)out; | ||
136 | __be64 bits; | ||
137 | static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; | ||
138 | |||
139 | bits = cpu_to_be64(sctx->count << 3); | ||
140 | |||
141 | /* Pad out to 56 mod 64 and append length */ | ||
142 | index = sctx->count % SHA256_BLOCK_SIZE; | ||
143 | padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index); | ||
144 | |||
145 | if (!irq_fpu_usable()) { | ||
146 | crypto_sha256_update(desc, padding, padlen); | ||
147 | crypto_sha256_update(desc, (const u8 *)&bits, sizeof(bits)); | ||
148 | } else { | ||
149 | kernel_fpu_begin(); | ||
150 | /* We need to fill a whole block for __sha256_ssse3_update() */ | ||
151 | if (padlen <= 56) { | ||
152 | sctx->count += padlen; | ||
153 | memcpy(sctx->buf + index, padding, padlen); | ||
154 | } else { | ||
155 | __sha256_ssse3_update(desc, padding, padlen, index); | ||
156 | } | ||
157 | __sha256_ssse3_update(desc, (const u8 *)&bits, | ||
158 | sizeof(bits), 56); | ||
159 | kernel_fpu_end(); | ||
160 | } | ||
161 | |||
162 | /* Store state in digest */ | ||
163 | for (i = 0; i < 8; i++) | ||
164 | dst[i] = cpu_to_be32(sctx->state[i]); | ||
165 | |||
166 | /* Wipe context */ | ||
167 | memset(sctx, 0, sizeof(*sctx)); | ||
168 | 74 | ||
169 | return 0; | 75 | return 0; |
170 | } | 76 | } |
171 | 77 | ||
172 | static int sha256_ssse3_export(struct shash_desc *desc, void *out) | 78 | static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data, |
79 | unsigned int len, u8 *out) | ||
173 | { | 80 | { |
174 | struct sha256_state *sctx = shash_desc_ctx(desc); | 81 | if (!irq_fpu_usable()) |
82 | return crypto_sha256_finup(desc, data, len, out); | ||
175 | 83 | ||
176 | memcpy(out, sctx, sizeof(*sctx)); | 84 | kernel_fpu_begin(); |
85 | if (len) | ||
86 | sha256_base_do_update(desc, data, len, | ||
87 | (sha256_block_fn *)sha256_transform_asm); | ||
88 | sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_transform_asm); | ||
89 | kernel_fpu_end(); | ||
177 | 90 | ||
178 | return 0; | 91 | return sha256_base_finish(desc, out); |
179 | } | 92 | } |
180 | 93 | ||
181 | static int sha256_ssse3_import(struct shash_desc *desc, const void *in) | 94 | /* Add padding and return the message digest. */ |
182 | { | 95 | static int sha256_ssse3_final(struct shash_desc *desc, u8 *out) |
183 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
184 | |||
185 | memcpy(sctx, in, sizeof(*sctx)); | ||
186 | |||
187 | return 0; | ||
188 | } | ||
189 | |||
190 | static int sha224_ssse3_init(struct shash_desc *desc) | ||
191 | { | ||
192 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
193 | |||
194 | sctx->state[0] = SHA224_H0; | ||
195 | sctx->state[1] = SHA224_H1; | ||
196 | sctx->state[2] = SHA224_H2; | ||
197 | sctx->state[3] = SHA224_H3; | ||
198 | sctx->state[4] = SHA224_H4; | ||
199 | sctx->state[5] = SHA224_H5; | ||
200 | sctx->state[6] = SHA224_H6; | ||
201 | sctx->state[7] = SHA224_H7; | ||
202 | sctx->count = 0; | ||
203 | |||
204 | return 0; | ||
205 | } | ||
206 | |||
207 | static int sha224_ssse3_final(struct shash_desc *desc, u8 *hash) | ||
208 | { | 96 | { |
209 | u8 D[SHA256_DIGEST_SIZE]; | 97 | return sha256_ssse3_finup(desc, NULL, 0, out); |
210 | |||
211 | sha256_ssse3_final(desc, D); | ||
212 | |||
213 | memcpy(hash, D, SHA224_DIGEST_SIZE); | ||
214 | memzero_explicit(D, SHA256_DIGEST_SIZE); | ||
215 | |||
216 | return 0; | ||
217 | } | 98 | } |
218 | 99 | ||
219 | static struct shash_alg algs[] = { { | 100 | static struct shash_alg algs[] = { { |
220 | .digestsize = SHA256_DIGEST_SIZE, | 101 | .digestsize = SHA256_DIGEST_SIZE, |
221 | .init = sha256_ssse3_init, | 102 | .init = sha256_base_init, |
222 | .update = sha256_ssse3_update, | 103 | .update = sha256_ssse3_update, |
223 | .final = sha256_ssse3_final, | 104 | .final = sha256_ssse3_final, |
224 | .export = sha256_ssse3_export, | 105 | .finup = sha256_ssse3_finup, |
225 | .import = sha256_ssse3_import, | ||
226 | .descsize = sizeof(struct sha256_state), | 106 | .descsize = sizeof(struct sha256_state), |
227 | .statesize = sizeof(struct sha256_state), | ||
228 | .base = { | 107 | .base = { |
229 | .cra_name = "sha256", | 108 | .cra_name = "sha256", |
230 | .cra_driver_name = "sha256-ssse3", | 109 | .cra_driver_name = "sha256-ssse3", |
@@ -235,13 +114,11 @@ static struct shash_alg algs[] = { { | |||
235 | } | 114 | } |
236 | }, { | 115 | }, { |
237 | .digestsize = SHA224_DIGEST_SIZE, | 116 | .digestsize = SHA224_DIGEST_SIZE, |
238 | .init = sha224_ssse3_init, | 117 | .init = sha224_base_init, |
239 | .update = sha256_ssse3_update, | 118 | .update = sha256_ssse3_update, |
240 | .final = sha224_ssse3_final, | 119 | .final = sha256_ssse3_final, |
241 | .export = sha256_ssse3_export, | 120 | .finup = sha256_ssse3_finup, |
242 | .import = sha256_ssse3_import, | ||
243 | .descsize = sizeof(struct sha256_state), | 121 | .descsize = sizeof(struct sha256_state), |
244 | .statesize = sizeof(struct sha256_state), | ||
245 | .base = { | 122 | .base = { |
246 | .cra_name = "sha224", | 123 | .cra_name = "sha224", |
247 | .cra_driver_name = "sha224-ssse3", | 124 | .cra_driver_name = "sha224-ssse3", |
diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S index 974dde9bc6cd..565274d6a641 100644 --- a/arch/x86/crypto/sha512-avx-asm.S +++ b/arch/x86/crypto/sha512-avx-asm.S | |||
@@ -54,9 +54,9 @@ | |||
54 | 54 | ||
55 | # Virtual Registers | 55 | # Virtual Registers |
56 | # ARG1 | 56 | # ARG1 |
57 | msg = %rdi | 57 | digest = %rdi |
58 | # ARG2 | 58 | # ARG2 |
59 | digest = %rsi | 59 | msg = %rsi |
60 | # ARG3 | 60 | # ARG3 |
61 | msglen = %rdx | 61 | msglen = %rdx |
62 | T1 = %rcx | 62 | T1 = %rcx |
@@ -271,7 +271,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE | |||
271 | .endm | 271 | .endm |
272 | 272 | ||
273 | ######################################################################## | 273 | ######################################################################## |
274 | # void sha512_transform_avx(const void* M, void* D, u64 L) | 274 | # void sha512_transform_avx(void* D, const void* M, u64 L) |
275 | # Purpose: Updates the SHA512 digest stored at D with the message stored in M. | 275 | # Purpose: Updates the SHA512 digest stored at D with the message stored in M. |
276 | # The size of the message pointed to by M must be an integer multiple of SHA512 | 276 | # The size of the message pointed to by M must be an integer multiple of SHA512 |
277 | # message blocks. | 277 | # message blocks. |
diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S index 568b96105f5c..a4771dcd1fcf 100644 --- a/arch/x86/crypto/sha512-avx2-asm.S +++ b/arch/x86/crypto/sha512-avx2-asm.S | |||
@@ -70,9 +70,9 @@ XFER = YTMP0 | |||
70 | BYTE_FLIP_MASK = %ymm9 | 70 | BYTE_FLIP_MASK = %ymm9 |
71 | 71 | ||
72 | # 1st arg | 72 | # 1st arg |
73 | INP = %rdi | 73 | CTX = %rdi |
74 | # 2nd arg | 74 | # 2nd arg |
75 | CTX = %rsi | 75 | INP = %rsi |
76 | # 3rd arg | 76 | # 3rd arg |
77 | NUM_BLKS = %rdx | 77 | NUM_BLKS = %rdx |
78 | 78 | ||
@@ -562,7 +562,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE | |||
562 | .endm | 562 | .endm |
563 | 563 | ||
564 | ######################################################################## | 564 | ######################################################################## |
565 | # void sha512_transform_rorx(const void* M, void* D, uint64_t L)# | 565 | # void sha512_transform_rorx(void* D, const void* M, uint64_t L)# |
566 | # Purpose: Updates the SHA512 digest stored at D with the message stored in M. | 566 | # Purpose: Updates the SHA512 digest stored at D with the message stored in M. |
567 | # The size of the message pointed to by M must be an integer multiple of SHA512 | 567 | # The size of the message pointed to by M must be an integer multiple of SHA512 |
568 | # message blocks. | 568 | # message blocks. |
diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S index fb56855d51f5..e610e29cbc81 100644 --- a/arch/x86/crypto/sha512-ssse3-asm.S +++ b/arch/x86/crypto/sha512-ssse3-asm.S | |||
@@ -53,9 +53,9 @@ | |||
53 | 53 | ||
54 | # Virtual Registers | 54 | # Virtual Registers |
55 | # ARG1 | 55 | # ARG1 |
56 | msg = %rdi | 56 | digest = %rdi |
57 | # ARG2 | 57 | # ARG2 |
58 | digest = %rsi | 58 | msg = %rsi |
59 | # ARG3 | 59 | # ARG3 |
60 | msglen = %rdx | 60 | msglen = %rdx |
61 | T1 = %rcx | 61 | T1 = %rcx |
@@ -269,7 +269,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE | |||
269 | .endm | 269 | .endm |
270 | 270 | ||
271 | ######################################################################## | 271 | ######################################################################## |
272 | # void sha512_transform_ssse3(const void* M, void* D, u64 L)# | 272 | # void sha512_transform_ssse3(void* D, const void* M, u64 L)# |
273 | # Purpose: Updates the SHA512 digest stored at D with the message stored in M. | 273 | # Purpose: Updates the SHA512 digest stored at D with the message stored in M. |
274 | # The size of the message pointed to by M must be an integer multiple of SHA512 | 274 | # The size of the message pointed to by M must be an integer multiple of SHA512 |
275 | # message blocks. | 275 | # message blocks. |
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index 0b6af26832bf..d9fa4c1e063f 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c | |||
@@ -34,205 +34,75 @@ | |||
34 | #include <linux/cryptohash.h> | 34 | #include <linux/cryptohash.h> |
35 | #include <linux/types.h> | 35 | #include <linux/types.h> |
36 | #include <crypto/sha.h> | 36 | #include <crypto/sha.h> |
37 | #include <asm/byteorder.h> | 37 | #include <crypto/sha512_base.h> |
38 | #include <asm/i387.h> | 38 | #include <asm/i387.h> |
39 | #include <asm/xcr.h> | 39 | #include <asm/xcr.h> |
40 | #include <asm/xsave.h> | 40 | #include <asm/xsave.h> |
41 | 41 | ||
42 | #include <linux/string.h> | 42 | #include <linux/string.h> |
43 | 43 | ||
44 | asmlinkage void sha512_transform_ssse3(const char *data, u64 *digest, | 44 | asmlinkage void sha512_transform_ssse3(u64 *digest, const char *data, |
45 | u64 rounds); | 45 | u64 rounds); |
46 | #ifdef CONFIG_AS_AVX | 46 | #ifdef CONFIG_AS_AVX |
47 | asmlinkage void sha512_transform_avx(const char *data, u64 *digest, | 47 | asmlinkage void sha512_transform_avx(u64 *digest, const char *data, |
48 | u64 rounds); | 48 | u64 rounds); |
49 | #endif | 49 | #endif |
50 | #ifdef CONFIG_AS_AVX2 | 50 | #ifdef CONFIG_AS_AVX2 |
51 | asmlinkage void sha512_transform_rorx(const char *data, u64 *digest, | 51 | asmlinkage void sha512_transform_rorx(u64 *digest, const char *data, |
52 | u64 rounds); | 52 | u64 rounds); |
53 | #endif | 53 | #endif |
54 | 54 | ||
55 | static asmlinkage void (*sha512_transform_asm)(const char *, u64 *, u64); | 55 | static void (*sha512_transform_asm)(u64 *, const char *, u64); |
56 | |||
57 | |||
58 | static int sha512_ssse3_init(struct shash_desc *desc) | ||
59 | { | ||
60 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
61 | |||
62 | sctx->state[0] = SHA512_H0; | ||
63 | sctx->state[1] = SHA512_H1; | ||
64 | sctx->state[2] = SHA512_H2; | ||
65 | sctx->state[3] = SHA512_H3; | ||
66 | sctx->state[4] = SHA512_H4; | ||
67 | sctx->state[5] = SHA512_H5; | ||
68 | sctx->state[6] = SHA512_H6; | ||
69 | sctx->state[7] = SHA512_H7; | ||
70 | sctx->count[0] = sctx->count[1] = 0; | ||
71 | |||
72 | return 0; | ||
73 | } | ||
74 | 56 | ||
75 | static int __sha512_ssse3_update(struct shash_desc *desc, const u8 *data, | 57 | static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data, |
76 | unsigned int len, unsigned int partial) | 58 | unsigned int len) |
77 | { | 59 | { |
78 | struct sha512_state *sctx = shash_desc_ctx(desc); | 60 | struct sha512_state *sctx = shash_desc_ctx(desc); |
79 | unsigned int done = 0; | ||
80 | |||
81 | sctx->count[0] += len; | ||
82 | if (sctx->count[0] < len) | ||
83 | sctx->count[1]++; | ||
84 | 61 | ||
85 | if (partial) { | 62 | if (!irq_fpu_usable() || |
86 | done = SHA512_BLOCK_SIZE - partial; | 63 | (sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE) |
87 | memcpy(sctx->buf + partial, data, done); | 64 | return crypto_sha512_update(desc, data, len); |
88 | sha512_transform_asm(sctx->buf, sctx->state, 1); | ||
89 | } | ||
90 | |||
91 | if (len - done >= SHA512_BLOCK_SIZE) { | ||
92 | const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE; | ||
93 | 65 | ||
94 | sha512_transform_asm(data + done, sctx->state, (u64) rounds); | 66 | /* make sure casting to sha512_block_fn() is safe */ |
95 | 67 | BUILD_BUG_ON(offsetof(struct sha512_state, state) != 0); | |
96 | done += rounds * SHA512_BLOCK_SIZE; | ||
97 | } | ||
98 | 68 | ||
99 | memcpy(sctx->buf, data + done, len - done); | 69 | kernel_fpu_begin(); |
70 | sha512_base_do_update(desc, data, len, | ||
71 | (sha512_block_fn *)sha512_transform_asm); | ||
72 | kernel_fpu_end(); | ||
100 | 73 | ||
101 | return 0; | 74 | return 0; |
102 | } | 75 | } |
103 | 76 | ||
104 | static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data, | 77 | static int sha512_ssse3_finup(struct shash_desc *desc, const u8 *data, |
105 | unsigned int len) | 78 | unsigned int len, u8 *out) |
106 | { | 79 | { |
107 | struct sha512_state *sctx = shash_desc_ctx(desc); | 80 | if (!irq_fpu_usable()) |
108 | unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE; | 81 | return crypto_sha512_finup(desc, data, len, out); |
109 | int res; | ||
110 | |||
111 | /* Handle the fast case right here */ | ||
112 | if (partial + len < SHA512_BLOCK_SIZE) { | ||
113 | sctx->count[0] += len; | ||
114 | if (sctx->count[0] < len) | ||
115 | sctx->count[1]++; | ||
116 | memcpy(sctx->buf + partial, data, len); | ||
117 | |||
118 | return 0; | ||
119 | } | ||
120 | 82 | ||
121 | if (!irq_fpu_usable()) { | 83 | kernel_fpu_begin(); |
122 | res = crypto_sha512_update(desc, data, len); | 84 | if (len) |
123 | } else { | 85 | sha512_base_do_update(desc, data, len, |
124 | kernel_fpu_begin(); | 86 | (sha512_block_fn *)sha512_transform_asm); |
125 | res = __sha512_ssse3_update(desc, data, len, partial); | 87 | sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_transform_asm); |
126 | kernel_fpu_end(); | 88 | kernel_fpu_end(); |
127 | } | ||
128 | 89 | ||
129 | return res; | 90 | return sha512_base_finish(desc, out); |
130 | } | 91 | } |
131 | 92 | ||
132 | |||
133 | /* Add padding and return the message digest. */ | 93 | /* Add padding and return the message digest. */ |
134 | static int sha512_ssse3_final(struct shash_desc *desc, u8 *out) | 94 | static int sha512_ssse3_final(struct shash_desc *desc, u8 *out) |
135 | { | 95 | { |
136 | struct sha512_state *sctx = shash_desc_ctx(desc); | 96 | return sha512_ssse3_finup(desc, NULL, 0, out); |
137 | unsigned int i, index, padlen; | ||
138 | __be64 *dst = (__be64 *)out; | ||
139 | __be64 bits[2]; | ||
140 | static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, }; | ||
141 | |||
142 | /* save number of bits */ | ||
143 | bits[1] = cpu_to_be64(sctx->count[0] << 3); | ||
144 | bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61); | ||
145 | |||
146 | /* Pad out to 112 mod 128 and append length */ | ||
147 | index = sctx->count[0] & 0x7f; | ||
148 | padlen = (index < 112) ? (112 - index) : ((128+112) - index); | ||
149 | |||
150 | if (!irq_fpu_usable()) { | ||
151 | crypto_sha512_update(desc, padding, padlen); | ||
152 | crypto_sha512_update(desc, (const u8 *)&bits, sizeof(bits)); | ||
153 | } else { | ||
154 | kernel_fpu_begin(); | ||
155 | /* We need to fill a whole block for __sha512_ssse3_update() */ | ||
156 | if (padlen <= 112) { | ||
157 | sctx->count[0] += padlen; | ||
158 | if (sctx->count[0] < padlen) | ||
159 | sctx->count[1]++; | ||
160 | memcpy(sctx->buf + index, padding, padlen); | ||
161 | } else { | ||
162 | __sha512_ssse3_update(desc, padding, padlen, index); | ||
163 | } | ||
164 | __sha512_ssse3_update(desc, (const u8 *)&bits, | ||
165 | sizeof(bits), 112); | ||
166 | kernel_fpu_end(); | ||
167 | } | ||
168 | |||
169 | /* Store state in digest */ | ||
170 | for (i = 0; i < 8; i++) | ||
171 | dst[i] = cpu_to_be64(sctx->state[i]); | ||
172 | |||
173 | /* Wipe context */ | ||
174 | memset(sctx, 0, sizeof(*sctx)); | ||
175 | |||
176 | return 0; | ||
177 | } | ||
178 | |||
179 | static int sha512_ssse3_export(struct shash_desc *desc, void *out) | ||
180 | { | ||
181 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
182 | |||
183 | memcpy(out, sctx, sizeof(*sctx)); | ||
184 | |||
185 | return 0; | ||
186 | } | ||
187 | |||
188 | static int sha512_ssse3_import(struct shash_desc *desc, const void *in) | ||
189 | { | ||
190 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
191 | |||
192 | memcpy(sctx, in, sizeof(*sctx)); | ||
193 | |||
194 | return 0; | ||
195 | } | ||
196 | |||
197 | static int sha384_ssse3_init(struct shash_desc *desc) | ||
198 | { | ||
199 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
200 | |||
201 | sctx->state[0] = SHA384_H0; | ||
202 | sctx->state[1] = SHA384_H1; | ||
203 | sctx->state[2] = SHA384_H2; | ||
204 | sctx->state[3] = SHA384_H3; | ||
205 | sctx->state[4] = SHA384_H4; | ||
206 | sctx->state[5] = SHA384_H5; | ||
207 | sctx->state[6] = SHA384_H6; | ||
208 | sctx->state[7] = SHA384_H7; | ||
209 | |||
210 | sctx->count[0] = sctx->count[1] = 0; | ||
211 | |||
212 | return 0; | ||
213 | } | ||
214 | |||
215 | static int sha384_ssse3_final(struct shash_desc *desc, u8 *hash) | ||
216 | { | ||
217 | u8 D[SHA512_DIGEST_SIZE]; | ||
218 | |||
219 | sha512_ssse3_final(desc, D); | ||
220 | |||
221 | memcpy(hash, D, SHA384_DIGEST_SIZE); | ||
222 | memzero_explicit(D, SHA512_DIGEST_SIZE); | ||
223 | |||
224 | return 0; | ||
225 | } | 97 | } |
226 | 98 | ||
227 | static struct shash_alg algs[] = { { | 99 | static struct shash_alg algs[] = { { |
228 | .digestsize = SHA512_DIGEST_SIZE, | 100 | .digestsize = SHA512_DIGEST_SIZE, |
229 | .init = sha512_ssse3_init, | 101 | .init = sha512_base_init, |
230 | .update = sha512_ssse3_update, | 102 | .update = sha512_ssse3_update, |
231 | .final = sha512_ssse3_final, | 103 | .final = sha512_ssse3_final, |
232 | .export = sha512_ssse3_export, | 104 | .finup = sha512_ssse3_finup, |
233 | .import = sha512_ssse3_import, | ||
234 | .descsize = sizeof(struct sha512_state), | 105 | .descsize = sizeof(struct sha512_state), |
235 | .statesize = sizeof(struct sha512_state), | ||
236 | .base = { | 106 | .base = { |
237 | .cra_name = "sha512", | 107 | .cra_name = "sha512", |
238 | .cra_driver_name = "sha512-ssse3", | 108 | .cra_driver_name = "sha512-ssse3", |
@@ -243,13 +113,11 @@ static struct shash_alg algs[] = { { | |||
243 | } | 113 | } |
244 | }, { | 114 | }, { |
245 | .digestsize = SHA384_DIGEST_SIZE, | 115 | .digestsize = SHA384_DIGEST_SIZE, |
246 | .init = sha384_ssse3_init, | 116 | .init = sha384_base_init, |
247 | .update = sha512_ssse3_update, | 117 | .update = sha512_ssse3_update, |
248 | .final = sha384_ssse3_final, | 118 | .final = sha512_ssse3_final, |
249 | .export = sha512_ssse3_export, | 119 | .finup = sha512_ssse3_finup, |
250 | .import = sha512_ssse3_import, | ||
251 | .descsize = sizeof(struct sha512_state), | 120 | .descsize = sizeof(struct sha512_state), |
252 | .statesize = sizeof(struct sha512_state), | ||
253 | .base = { | 121 | .base = { |
254 | .cra_name = "sha384", | 122 | .cra_name = "sha384", |
255 | .cra_driver_name = "sha384-ssse3", | 123 | .cra_driver_name = "sha384-ssse3", |
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 1ac531ea9bcc..b5e2d5651851 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c | |||
@@ -340,7 +340,8 @@ static struct crypto_alg twofish_algs[10] = { { | |||
340 | .cra_name = "__ecb-twofish-avx", | 340 | .cra_name = "__ecb-twofish-avx", |
341 | .cra_driver_name = "__driver-ecb-twofish-avx", | 341 | .cra_driver_name = "__driver-ecb-twofish-avx", |
342 | .cra_priority = 0, | 342 | .cra_priority = 0, |
343 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 343 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
344 | CRYPTO_ALG_INTERNAL, | ||
344 | .cra_blocksize = TF_BLOCK_SIZE, | 345 | .cra_blocksize = TF_BLOCK_SIZE, |
345 | .cra_ctxsize = sizeof(struct twofish_ctx), | 346 | .cra_ctxsize = sizeof(struct twofish_ctx), |
346 | .cra_alignmask = 0, | 347 | .cra_alignmask = 0, |
@@ -359,7 +360,8 @@ static struct crypto_alg twofish_algs[10] = { { | |||
359 | .cra_name = "__cbc-twofish-avx", | 360 | .cra_name = "__cbc-twofish-avx", |
360 | .cra_driver_name = "__driver-cbc-twofish-avx", | 361 | .cra_driver_name = "__driver-cbc-twofish-avx", |
361 | .cra_priority = 0, | 362 | .cra_priority = 0, |
362 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 363 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
364 | CRYPTO_ALG_INTERNAL, | ||
363 | .cra_blocksize = TF_BLOCK_SIZE, | 365 | .cra_blocksize = TF_BLOCK_SIZE, |
364 | .cra_ctxsize = sizeof(struct twofish_ctx), | 366 | .cra_ctxsize = sizeof(struct twofish_ctx), |
365 | .cra_alignmask = 0, | 367 | .cra_alignmask = 0, |
@@ -378,7 +380,8 @@ static struct crypto_alg twofish_algs[10] = { { | |||
378 | .cra_name = "__ctr-twofish-avx", | 380 | .cra_name = "__ctr-twofish-avx", |
379 | .cra_driver_name = "__driver-ctr-twofish-avx", | 381 | .cra_driver_name = "__driver-ctr-twofish-avx", |
380 | .cra_priority = 0, | 382 | .cra_priority = 0, |
381 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 383 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
384 | CRYPTO_ALG_INTERNAL, | ||
382 | .cra_blocksize = 1, | 385 | .cra_blocksize = 1, |
383 | .cra_ctxsize = sizeof(struct twofish_ctx), | 386 | .cra_ctxsize = sizeof(struct twofish_ctx), |
384 | .cra_alignmask = 0, | 387 | .cra_alignmask = 0, |
@@ -398,7 +401,8 @@ static struct crypto_alg twofish_algs[10] = { { | |||
398 | .cra_name = "__lrw-twofish-avx", | 401 | .cra_name = "__lrw-twofish-avx", |
399 | .cra_driver_name = "__driver-lrw-twofish-avx", | 402 | .cra_driver_name = "__driver-lrw-twofish-avx", |
400 | .cra_priority = 0, | 403 | .cra_priority = 0, |
401 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 404 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
405 | CRYPTO_ALG_INTERNAL, | ||
402 | .cra_blocksize = TF_BLOCK_SIZE, | 406 | .cra_blocksize = TF_BLOCK_SIZE, |
403 | .cra_ctxsize = sizeof(struct twofish_lrw_ctx), | 407 | .cra_ctxsize = sizeof(struct twofish_lrw_ctx), |
404 | .cra_alignmask = 0, | 408 | .cra_alignmask = 0, |
@@ -421,7 +425,8 @@ static struct crypto_alg twofish_algs[10] = { { | |||
421 | .cra_name = "__xts-twofish-avx", | 425 | .cra_name = "__xts-twofish-avx", |
422 | .cra_driver_name = "__driver-xts-twofish-avx", | 426 | .cra_driver_name = "__driver-xts-twofish-avx", |
423 | .cra_priority = 0, | 427 | .cra_priority = 0, |
424 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | 428 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
429 | CRYPTO_ALG_INTERNAL, | ||
425 | .cra_blocksize = TF_BLOCK_SIZE, | 430 | .cra_blocksize = TF_BLOCK_SIZE, |
426 | .cra_ctxsize = sizeof(struct twofish_xts_ctx), | 431 | .cra_ctxsize = sizeof(struct twofish_xts_ctx), |
427 | .cra_alignmask = 0, | 432 | .cra_alignmask = 0, |