diff options
author | Kim Phillips <kim.phillips@freescale.com> | 2015-03-06 19:46:21 -0500 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2015-03-09 06:06:19 -0400 |
commit | 3265c4babe93832167cb148083a0544548c23e6a (patch) | |
tree | fa78a5f4d61fec514792367ef0c24a827da2b6eb /arch/powerpc/crypto/sha1-spe-asm.S | |
parent | a508412b169d5398dc5f800147097b255c2941be (diff) |
crypto: powerpc - move files to fix build error
The current cryptodev-2.6 tree commits:
d9850fc529ef ("crypto: powerpc/sha1 - kernel config")
50ba29aaa7b0 ("crypto: powerpc/sha1 - glue")
failed to properly place files under arch/powerpc/crypto, which
leads to build errors:
make[1]: *** No rule to make target 'arch/powerpc/crypto/sha1-spe-asm.o', needed by 'arch/powerpc/crypto/sha1-ppc-spe.o'. Stop.
make[1]: *** No rule to make target 'arch/powerpc/crypto/sha1_spe_glue.o', needed by 'arch/powerpc/crypto/sha1-ppc-spe.o'. Stop.
Makefile:947: recipe for target 'arch/powerpc/crypto' failed
Move the two sha1 spe files under crypto/, and whilst there, rename
other powerpc crypto files with underscores to use dashes for
consistency.
Cc: Markus Stockhausen <stockhausen@collogia.de>
Signed-off-by: Kim Phillips <kim.phillips@freescale.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/powerpc/crypto/sha1-spe-asm.S')
-rw-r--r-- | arch/powerpc/crypto/sha1-spe-asm.S | 299 |
1 files changed, 299 insertions, 0 deletions
diff --git a/arch/powerpc/crypto/sha1-spe-asm.S b/arch/powerpc/crypto/sha1-spe-asm.S new file mode 100644 index 000000000000..fcb6cf002889 --- /dev/null +++ b/arch/powerpc/crypto/sha1-spe-asm.S | |||
@@ -0,0 +1,299 @@ | |||
1 | /* | ||
2 | * Fast SHA-1 implementation for SPE instruction set (PPC) | ||
3 | * | ||
4 | * This code makes use of the SPE SIMD instruction set as defined in | ||
5 | * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf | ||
6 | * Implementation is based on optimization guide notes from | ||
7 | * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf | ||
8 | * | ||
9 | * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify it | ||
12 | * under the terms of the GNU General Public License as published by the Free | ||
13 | * Software Foundation; either version 2 of the License, or (at your option) | ||
14 | * any later version. | ||
15 | * | ||
16 | */ | ||
17 | |||
18 | #include <asm/ppc_asm.h> | ||
19 | #include <asm/asm-offsets.h> | ||
20 | |||
21 | #define rHP r3 /* pointer to hash value */ | ||
22 | #define rWP r4 /* pointer to input */ | ||
23 | #define rKP r5 /* pointer to constants */ | ||
24 | |||
25 | #define rW0 r14 /* 64 bit round words */ | ||
26 | #define rW1 r15 | ||
27 | #define rW2 r16 | ||
28 | #define rW3 r17 | ||
29 | #define rW4 r18 | ||
30 | #define rW5 r19 | ||
31 | #define rW6 r20 | ||
32 | #define rW7 r21 | ||
33 | |||
34 | #define rH0 r6 /* 32 bit hash values */ | ||
35 | #define rH1 r7 | ||
36 | #define rH2 r8 | ||
37 | #define rH3 r9 | ||
38 | #define rH4 r10 | ||
39 | |||
40 | #define rT0 r22 /* 64 bit temporary */ | ||
41 | #define rT1 r0 /* 32 bit temporaries */ | ||
42 | #define rT2 r11 | ||
43 | #define rT3 r12 | ||
44 | |||
45 | #define rK r23 /* 64 bit constant in volatile register */ | ||
46 | |||
47 | #define LOAD_K01 | ||
48 | |||
49 | #define LOAD_K11 \ | ||
50 | evlwwsplat rK,0(rKP); | ||
51 | |||
52 | #define LOAD_K21 \ | ||
53 | evlwwsplat rK,4(rKP); | ||
54 | |||
55 | #define LOAD_K31 \ | ||
56 | evlwwsplat rK,8(rKP); | ||
57 | |||
58 | #define LOAD_K41 \ | ||
59 | evlwwsplat rK,12(rKP); | ||
60 | |||
61 | #define INITIALIZE \ | ||
62 | stwu r1,-128(r1); /* create stack frame */ \ | ||
63 | evstdw r14,8(r1); /* We must save non volatile */ \ | ||
64 | evstdw r15,16(r1); /* registers. Take the chance */ \ | ||
65 | evstdw r16,24(r1); /* and save the SPE part too */ \ | ||
66 | evstdw r17,32(r1); \ | ||
67 | evstdw r18,40(r1); \ | ||
68 | evstdw r19,48(r1); \ | ||
69 | evstdw r20,56(r1); \ | ||
70 | evstdw r21,64(r1); \ | ||
71 | evstdw r22,72(r1); \ | ||
72 | evstdw r23,80(r1); | ||
73 | |||
74 | |||
75 | #define FINALIZE \ | ||
76 | evldw r14,8(r1); /* restore SPE registers */ \ | ||
77 | evldw r15,16(r1); \ | ||
78 | evldw r16,24(r1); \ | ||
79 | evldw r17,32(r1); \ | ||
80 | evldw r18,40(r1); \ | ||
81 | evldw r19,48(r1); \ | ||
82 | evldw r20,56(r1); \ | ||
83 | evldw r21,64(r1); \ | ||
84 | evldw r22,72(r1); \ | ||
85 | evldw r23,80(r1); \ | ||
86 | xor r0,r0,r0; \ | ||
87 | stw r0,8(r1); /* Delete sensitive data */ \ | ||
88 | stw r0,16(r1); /* that we might have pushed */ \ | ||
89 | stw r0,24(r1); /* from other context that runs */ \ | ||
90 | stw r0,32(r1); /* the same code. Assume that */ \ | ||
91 | stw r0,40(r1); /* the lower part of the GPRs */ \ | ||
92 | stw r0,48(r1); /* were already overwritten on */ \ | ||
93 | stw r0,56(r1); /* the way down to here */ \ | ||
94 | stw r0,64(r1); \ | ||
95 | stw r0,72(r1); \ | ||
96 | stw r0,80(r1); \ | ||
97 | addi r1,r1,128; /* cleanup stack frame */ | ||
98 | |||
99 | #ifdef __BIG_ENDIAN__ | ||
100 | #define LOAD_DATA(reg, off) \ | ||
101 | lwz reg,off(rWP); /* load data */ | ||
102 | #define NEXT_BLOCK \ | ||
103 | addi rWP,rWP,64; /* increment per block */ | ||
104 | #else | ||
105 | #define LOAD_DATA(reg, off) \ | ||
106 | lwbrx reg,0,rWP; /* load data */ \ | ||
107 | addi rWP,rWP,4; /* increment per word */ | ||
108 | #define NEXT_BLOCK /* nothing to do */ | ||
109 | #endif | ||
110 | |||
111 | #define R_00_15(a, b, c, d, e, w0, w1, k, off) \ | ||
112 | LOAD_DATA(w0, off) /* 1: W */ \ | ||
113 | and rT2,b,c; /* 1: F' = B and C */ \ | ||
114 | LOAD_K##k##1 \ | ||
115 | andc rT1,d,b; /* 1: F" = ~B and D */ \ | ||
116 | rotrwi rT0,a,27; /* 1: A' = A rotl 5 */ \ | ||
117 | or rT2,rT2,rT1; /* 1: F = F' or F" */ \ | ||
118 | add e,e,rT0; /* 1: E = E + A' */ \ | ||
119 | rotrwi b,b,2; /* 1: B = B rotl 30 */ \ | ||
120 | add e,e,w0; /* 1: E = E + W */ \ | ||
121 | LOAD_DATA(w1, off+4) /* 2: W */ \ | ||
122 | add e,e,rT2; /* 1: E = E + F */ \ | ||
123 | and rT1,a,b; /* 2: F' = B and C */ \ | ||
124 | add e,e,rK; /* 1: E = E + K */ \ | ||
125 | andc rT2,c,a; /* 2: F" = ~B and D */ \ | ||
126 | add d,d,rK; /* 2: E = E + K */ \ | ||
127 | or rT2,rT2,rT1; /* 2: F = F' or F" */ \ | ||
128 | rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \ | ||
129 | add d,d,w1; /* 2: E = E + W */ \ | ||
130 | rotrwi a,a,2; /* 2: B = B rotl 30 */ \ | ||
131 | add d,d,rT0; /* 2: E = E + A' */ \ | ||
132 | evmergelo w1,w1,w0; /* mix W[0]/W[1] */ \ | ||
133 | add d,d,rT2 /* 2: E = E + F */ | ||
134 | |||
135 | #define R_16_19(a, b, c, d, e, w0, w1, w4, w6, w7, k) \ | ||
136 | and rT2,b,c; /* 1: F' = B and C */ \ | ||
137 | evmergelohi rT0,w7,w6; /* W[-3] */ \ | ||
138 | andc rT1,d,b; /* 1: F" = ~B and D */ \ | ||
139 | evxor w0,w0,rT0; /* W = W[-16] xor W[-3] */ \ | ||
140 | or rT1,rT1,rT2; /* 1: F = F' or F" */ \ | ||
141 | evxor w0,w0,w4; /* W = W xor W[-8] */ \ | ||
142 | add e,e,rT1; /* 1: E = E + F */ \ | ||
143 | evxor w0,w0,w1; /* W = W xor W[-14] */ \ | ||
144 | rotrwi rT2,a,27; /* 1: A' = A rotl 5 */ \ | ||
145 | evrlwi w0,w0,1; /* W = W rotl 1 */ \ | ||
146 | add e,e,rT2; /* 1: E = E + A' */ \ | ||
147 | evaddw rT0,w0,rK; /* WK = W + K */ \ | ||
148 | rotrwi b,b,2; /* 1: B = B rotl 30 */ \ | ||
149 | LOAD_K##k##1 \ | ||
150 | evmergehi rT1,rT1,rT0; /* WK1/WK2 */ \ | ||
151 | add e,e,rT0; /* 1: E = E + WK */ \ | ||
152 | add d,d,rT1; /* 2: E = E + WK */ \ | ||
153 | and rT2,a,b; /* 2: F' = B and C */ \ | ||
154 | andc rT1,c,a; /* 2: F" = ~B and D */ \ | ||
155 | rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \ | ||
156 | or rT1,rT1,rT2; /* 2: F = F' or F" */ \ | ||
157 | add d,d,rT0; /* 2: E = E + A' */ \ | ||
158 | rotrwi a,a,2; /* 2: B = B rotl 30 */ \ | ||
159 | add d,d,rT1 /* 2: E = E + F */ | ||
160 | |||
161 | #define R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k) \ | ||
162 | evmergelohi rT0,w7,w6; /* W[-3] */ \ | ||
163 | xor rT2,b,c; /* 1: F' = B xor C */ \ | ||
164 | evxor w0,w0,rT0; /* W = W[-16] xor W[-3] */ \ | ||
165 | xor rT2,rT2,d; /* 1: F = F' xor D */ \ | ||
166 | evxor w0,w0,w4; /* W = W xor W[-8] */ \ | ||
167 | add e,e,rT2; /* 1: E = E + F */ \ | ||
168 | evxor w0,w0,w1; /* W = W xor W[-14] */ \ | ||
169 | rotrwi rT2,a,27; /* 1: A' = A rotl 5 */ \ | ||
170 | evrlwi w0,w0,1; /* W = W rotl 1 */ \ | ||
171 | add e,e,rT2; /* 1: E = E + A' */ \ | ||
172 | evaddw rT0,w0,rK; /* WK = W + K */ \ | ||
173 | rotrwi b,b,2; /* 1: B = B rotl 30 */ \ | ||
174 | LOAD_K##k##1 \ | ||
175 | evmergehi rT1,rT1,rT0; /* WK1/WK2 */ \ | ||
176 | add e,e,rT0; /* 1: E = E + WK */ \ | ||
177 | xor rT2,a,b; /* 2: F' = B xor C */ \ | ||
178 | add d,d,rT1; /* 2: E = E + WK */ \ | ||
179 | xor rT2,rT2,c; /* 2: F = F' xor D */ \ | ||
180 | rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \ | ||
181 | add d,d,rT2; /* 2: E = E + F */ \ | ||
182 | rotrwi a,a,2; /* 2: B = B rotl 30 */ \ | ||
183 | add d,d,rT0 /* 2: E = E + A' */ | ||
184 | |||
185 | #define R_40_59(a, b, c, d, e, w0, w1, w4, w6, w7, k) \ | ||
186 | and rT2,b,c; /* 1: F' = B and C */ \ | ||
187 | evmergelohi rT0,w7,w6; /* W[-3] */ \ | ||
188 | or rT1,b,c; /* 1: F" = B or C */ \ | ||
189 | evxor w0,w0,rT0; /* W = W[-16] xor W[-3] */ \ | ||
190 | and rT1,d,rT1; /* 1: F" = F" and D */ \ | ||
191 | evxor w0,w0,w4; /* W = W xor W[-8] */ \ | ||
192 | or rT2,rT2,rT1; /* 1: F = F' or F" */ \ | ||
193 | evxor w0,w0,w1; /* W = W xor W[-14] */ \ | ||
194 | add e,e,rT2; /* 1: E = E + F */ \ | ||
195 | evrlwi w0,w0,1; /* W = W rotl 1 */ \ | ||
196 | rotrwi rT2,a,27; /* 1: A' = A rotl 5 */ \ | ||
197 | evaddw rT0,w0,rK; /* WK = W + K */ \ | ||
198 | add e,e,rT2; /* 1: E = E + A' */ \ | ||
199 | LOAD_K##k##1 \ | ||
200 | evmergehi rT1,rT1,rT0; /* WK1/WK2 */ \ | ||
201 | rotrwi b,b,2; /* 1: B = B rotl 30 */ \ | ||
202 | add e,e,rT0; /* 1: E = E + WK */ \ | ||
203 | and rT2,a,b; /* 2: F' = B and C */ \ | ||
204 | or rT0,a,b; /* 2: F" = B or C */ \ | ||
205 | add d,d,rT1; /* 2: E = E + WK */ \ | ||
206 | and rT0,c,rT0; /* 2: F" = F" and D */ \ | ||
207 | rotrwi a,a,2; /* 2: B = B rotl 30 */ \ | ||
208 | or rT2,rT2,rT0; /* 2: F = F' or F" */ \ | ||
209 | rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \ | ||
210 | add d,d,rT2; /* 2: E = E + F */ \ | ||
211 | add d,d,rT0 /* 2: E = E + A' */ | ||
212 | |||
213 | #define R_60_79(a, b, c, d, e, w0, w1, w4, w6, w7, k) \ | ||
214 | R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k) | ||
215 | |||
216 | _GLOBAL(ppc_spe_sha1_transform) | ||
217 | INITIALIZE | ||
218 | |||
219 | lwz rH0,0(rHP) | ||
220 | lwz rH1,4(rHP) | ||
221 | mtctr r5 | ||
222 | lwz rH2,8(rHP) | ||
223 | lis rKP,PPC_SPE_SHA1_K@h | ||
224 | lwz rH3,12(rHP) | ||
225 | ori rKP,rKP,PPC_SPE_SHA1_K@l | ||
226 | lwz rH4,16(rHP) | ||
227 | |||
228 | ppc_spe_sha1_main: | ||
229 | R_00_15(rH0, rH1, rH2, rH3, rH4, rW1, rW0, 1, 0) | ||
230 | R_00_15(rH3, rH4, rH0, rH1, rH2, rW2, rW1, 0, 8) | ||
231 | R_00_15(rH1, rH2, rH3, rH4, rH0, rW3, rW2, 0, 16) | ||
232 | R_00_15(rH4, rH0, rH1, rH2, rH3, rW4, rW3, 0, 24) | ||
233 | R_00_15(rH2, rH3, rH4, rH0, rH1, rW5, rW4, 0, 32) | ||
234 | R_00_15(rH0, rH1, rH2, rH3, rH4, rW6, rW5, 0, 40) | ||
235 | R_00_15(rH3, rH4, rH0, rH1, rH2, rT3, rW6, 0, 48) | ||
236 | R_00_15(rH1, rH2, rH3, rH4, rH0, rT3, rW7, 0, 56) | ||
237 | |||
238 | R_16_19(rH4, rH0, rH1, rH2, rH3, rW0, rW1, rW4, rW6, rW7, 0) | ||
239 | R_16_19(rH2, rH3, rH4, rH0, rH1, rW1, rW2, rW5, rW7, rW0, 2) | ||
240 | |||
241 | R_20_39(rH0, rH1, rH2, rH3, rH4, rW2, rW3, rW6, rW0, rW1, 0) | ||
242 | R_20_39(rH3, rH4, rH0, rH1, rH2, rW3, rW4, rW7, rW1, rW2, 0) | ||
243 | R_20_39(rH1, rH2, rH3, rH4, rH0, rW4, rW5, rW0, rW2, rW3, 0) | ||
244 | R_20_39(rH4, rH0, rH1, rH2, rH3, rW5, rW6, rW1, rW3, rW4, 0) | ||
245 | R_20_39(rH2, rH3, rH4, rH0, rH1, rW6, rW7, rW2, rW4, rW5, 0) | ||
246 | R_20_39(rH0, rH1, rH2, rH3, rH4, rW7, rW0, rW3, rW5, rW6, 0) | ||
247 | R_20_39(rH3, rH4, rH0, rH1, rH2, rW0, rW1, rW4, rW6, rW7, 0) | ||
248 | R_20_39(rH1, rH2, rH3, rH4, rH0, rW1, rW2, rW5, rW7, rW0, 0) | ||
249 | R_20_39(rH4, rH0, rH1, rH2, rH3, rW2, rW3, rW6, rW0, rW1, 0) | ||
250 | R_20_39(rH2, rH3, rH4, rH0, rH1, rW3, rW4, rW7, rW1, rW2, 3) | ||
251 | |||
252 | R_40_59(rH0, rH1, rH2, rH3, rH4, rW4, rW5, rW0, rW2, rW3, 0) | ||
253 | R_40_59(rH3, rH4, rH0, rH1, rH2, rW5, rW6, rW1, rW3, rW4, 0) | ||
254 | R_40_59(rH1, rH2, rH3, rH4, rH0, rW6, rW7, rW2, rW4, rW5, 0) | ||
255 | R_40_59(rH4, rH0, rH1, rH2, rH3, rW7, rW0, rW3, rW5, rW6, 0) | ||
256 | R_40_59(rH2, rH3, rH4, rH0, rH1, rW0, rW1, rW4, rW6, rW7, 0) | ||
257 | R_40_59(rH0, rH1, rH2, rH3, rH4, rW1, rW2, rW5, rW7, rW0, 0) | ||
258 | R_40_59(rH3, rH4, rH0, rH1, rH2, rW2, rW3, rW6, rW0, rW1, 0) | ||
259 | R_40_59(rH1, rH2, rH3, rH4, rH0, rW3, rW4, rW7, rW1, rW2, 0) | ||
260 | R_40_59(rH4, rH0, rH1, rH2, rH3, rW4, rW5, rW0, rW2, rW3, 0) | ||
261 | R_40_59(rH2, rH3, rH4, rH0, rH1, rW5, rW6, rW1, rW3, rW4, 4) | ||
262 | |||
263 | R_60_79(rH0, rH1, rH2, rH3, rH4, rW6, rW7, rW2, rW4, rW5, 0) | ||
264 | R_60_79(rH3, rH4, rH0, rH1, rH2, rW7, rW0, rW3, rW5, rW6, 0) | ||
265 | R_60_79(rH1, rH2, rH3, rH4, rH0, rW0, rW1, rW4, rW6, rW7, 0) | ||
266 | R_60_79(rH4, rH0, rH1, rH2, rH3, rW1, rW2, rW5, rW7, rW0, 0) | ||
267 | R_60_79(rH2, rH3, rH4, rH0, rH1, rW2, rW3, rW6, rW0, rW1, 0) | ||
268 | R_60_79(rH0, rH1, rH2, rH3, rH4, rW3, rW4, rW7, rW1, rW2, 0) | ||
269 | R_60_79(rH3, rH4, rH0, rH1, rH2, rW4, rW5, rW0, rW2, rW3, 0) | ||
270 | lwz rT3,0(rHP) | ||
271 | R_60_79(rH1, rH2, rH3, rH4, rH0, rW5, rW6, rW1, rW3, rW4, 0) | ||
272 | lwz rW1,4(rHP) | ||
273 | R_60_79(rH4, rH0, rH1, rH2, rH3, rW6, rW7, rW2, rW4, rW5, 0) | ||
274 | lwz rW2,8(rHP) | ||
275 | R_60_79(rH2, rH3, rH4, rH0, rH1, rW7, rW0, rW3, rW5, rW6, 0) | ||
276 | lwz rW3,12(rHP) | ||
277 | NEXT_BLOCK | ||
278 | lwz rW4,16(rHP) | ||
279 | |||
280 | add rH0,rH0,rT3 | ||
281 | stw rH0,0(rHP) | ||
282 | add rH1,rH1,rW1 | ||
283 | stw rH1,4(rHP) | ||
284 | add rH2,rH2,rW2 | ||
285 | stw rH2,8(rHP) | ||
286 | add rH3,rH3,rW3 | ||
287 | stw rH3,12(rHP) | ||
288 | add rH4,rH4,rW4 | ||
289 | stw rH4,16(rHP) | ||
290 | |||
291 | bdnz ppc_spe_sha1_main | ||
292 | |||
293 | FINALIZE | ||
294 | blr | ||
295 | |||
296 | .data | ||
297 | .align 4 | ||
298 | PPC_SPE_SHA1_K: | ||
299 | .long 0x5A827999,0x6ED9EBA1,0x8F1BBCDC,0xCA62C1D6 | ||