diff options
Diffstat (limited to 'arch/sparc')
47 files changed, 6814 insertions, 263 deletions
diff --git a/arch/sparc/Kbuild b/arch/sparc/Kbuild index 5cd01161fd00..675afa285ddb 100644 --- a/arch/sparc/Kbuild +++ b/arch/sparc/Kbuild | |||
@@ -6,3 +6,4 @@ obj-y += kernel/ | |||
6 | obj-y += mm/ | 6 | obj-y += mm/ |
7 | obj-y += math-emu/ | 7 | obj-y += math-emu/ |
8 | obj-y += net/ | 8 | obj-y += net/ |
9 | obj-y += crypto/ | ||
diff --git a/arch/sparc/crypto/Makefile b/arch/sparc/crypto/Makefile new file mode 100644 index 000000000000..6ae1ad5e502b --- /dev/null +++ b/arch/sparc/crypto/Makefile | |||
@@ -0,0 +1,25 @@ | |||
1 | # | ||
2 | # Arch-specific CryptoAPI modules. | ||
3 | # | ||
4 | |||
5 | obj-$(CONFIG_CRYPTO_SHA1_SPARC64) += sha1-sparc64.o | ||
6 | obj-$(CONFIG_CRYPTO_SHA256_SPARC64) += sha256-sparc64.o | ||
7 | obj-$(CONFIG_CRYPTO_SHA512_SPARC64) += sha512-sparc64.o | ||
8 | obj-$(CONFIG_CRYPTO_MD5_SPARC64) += md5-sparc64.o | ||
9 | |||
10 | obj-$(CONFIG_CRYPTO_AES_SPARC64) += aes-sparc64.o | ||
11 | obj-$(CONFIG_CRYPTO_DES_SPARC64) += des-sparc64.o | ||
12 | obj-$(CONFIG_CRYPTO_DES_SPARC64) += camellia-sparc64.o | ||
13 | |||
14 | obj-$(CONFIG_CRYPTO_CRC32C_SPARC64) += crc32c-sparc64.o | ||
15 | |||
16 | sha1-sparc64-y := sha1_asm.o sha1_glue.o crop_devid.o | ||
17 | sha256-sparc64-y := sha256_asm.o sha256_glue.o crop_devid.o | ||
18 | sha512-sparc64-y := sha512_asm.o sha512_glue.o crop_devid.o | ||
19 | md5-sparc64-y := md5_asm.o md5_glue.o crop_devid.o | ||
20 | |||
21 | aes-sparc64-y := aes_asm.o aes_glue.o crop_devid.o | ||
22 | des-sparc64-y := des_asm.o des_glue.o crop_devid.o | ||
23 | camellia-sparc64-y := camellia_asm.o camellia_glue.o crop_devid.o | ||
24 | |||
25 | crc32c-sparc64-y := crc32c_asm.o crc32c_glue.o crop_devid.o | ||
diff --git a/arch/sparc/crypto/aes_asm.S b/arch/sparc/crypto/aes_asm.S new file mode 100644 index 000000000000..23f6cbb910d3 --- /dev/null +++ b/arch/sparc/crypto/aes_asm.S | |||
@@ -0,0 +1,1535 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | #include <asm/visasm.h> | ||
3 | |||
4 | #include "opcodes.h" | ||
5 | |||
6 | #define ENCRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \ | ||
7 | AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \ | ||
8 | AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \ | ||
9 | AES_EROUND01(KEY_BASE + 4, T0, T1, I0) \ | ||
10 | AES_EROUND23(KEY_BASE + 6, T0, T1, I1) | ||
11 | |||
12 | #define ENCRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
13 | AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \ | ||
14 | AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \ | ||
15 | AES_EROUND01(KEY_BASE + 0, I2, I3, T2) \ | ||
16 | AES_EROUND23(KEY_BASE + 2, I2, I3, T3) \ | ||
17 | AES_EROUND01(KEY_BASE + 4, T0, T1, I0) \ | ||
18 | AES_EROUND23(KEY_BASE + 6, T0, T1, I1) \ | ||
19 | AES_EROUND01(KEY_BASE + 4, T2, T3, I2) \ | ||
20 | AES_EROUND23(KEY_BASE + 6, T2, T3, I3) | ||
21 | |||
22 | #define ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \ | ||
23 | AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \ | ||
24 | AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \ | ||
25 | AES_EROUND01_L(KEY_BASE + 4, T0, T1, I0) \ | ||
26 | AES_EROUND23_L(KEY_BASE + 6, T0, T1, I1) | ||
27 | |||
28 | #define ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
29 | AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \ | ||
30 | AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \ | ||
31 | AES_EROUND01(KEY_BASE + 0, I2, I3, T2) \ | ||
32 | AES_EROUND23(KEY_BASE + 2, I2, I3, T3) \ | ||
33 | AES_EROUND01_L(KEY_BASE + 4, T0, T1, I0) \ | ||
34 | AES_EROUND23_L(KEY_BASE + 6, T0, T1, I1) \ | ||
35 | AES_EROUND01_L(KEY_BASE + 4, T2, T3, I2) \ | ||
36 | AES_EROUND23_L(KEY_BASE + 6, T2, T3, I3) | ||
37 | |||
38 | /* 10 rounds */ | ||
39 | #define ENCRYPT_128(KEY_BASE, I0, I1, T0, T1) \ | ||
40 | ENCRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ | ||
41 | ENCRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \ | ||
42 | ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ | ||
43 | ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ | ||
44 | ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1) | ||
45 | |||
46 | #define ENCRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
47 | ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
48 | ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
49 | ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
50 | ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
51 | ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) | ||
52 | |||
53 | /* 12 rounds */ | ||
54 | #define ENCRYPT_192(KEY_BASE, I0, I1, T0, T1) \ | ||
55 | ENCRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ | ||
56 | ENCRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \ | ||
57 | ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ | ||
58 | ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ | ||
59 | ENCRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \ | ||
60 | ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1) | ||
61 | |||
62 | #define ENCRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
63 | ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
64 | ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
65 | ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
66 | ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
67 | ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
68 | ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3) | ||
69 | |||
70 | /* 14 rounds */ | ||
71 | #define ENCRYPT_256(KEY_BASE, I0, I1, T0, T1) \ | ||
72 | ENCRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ | ||
73 | ENCRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \ | ||
74 | ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ | ||
75 | ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ | ||
76 | ENCRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \ | ||
77 | ENCRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \ | ||
78 | ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1) | ||
79 | |||
80 | #define ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \ | ||
81 | ENCRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \ | ||
82 | TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6) | ||
83 | |||
84 | #define ENCRYPT_256_2(KEY_BASE, I0, I1, I2, I3) \ | ||
85 | ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, KEY_BASE + 48) \ | ||
86 | ldd [%o0 + 0xd0], %f56; \ | ||
87 | ldd [%o0 + 0xd8], %f58; \ | ||
88 | ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, KEY_BASE + 0) \ | ||
89 | ldd [%o0 + 0xe0], %f60; \ | ||
90 | ldd [%o0 + 0xe8], %f62; \ | ||
91 | ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE + 0) \ | ||
92 | ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE + 0) \ | ||
93 | ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE + 0) \ | ||
94 | ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE + 0) \ | ||
95 | AES_EROUND01(KEY_BASE + 48, I0, I1, KEY_BASE + 0) \ | ||
96 | AES_EROUND23(KEY_BASE + 50, I0, I1, KEY_BASE + 2) \ | ||
97 | AES_EROUND01(KEY_BASE + 48, I2, I3, KEY_BASE + 4) \ | ||
98 | AES_EROUND23(KEY_BASE + 50, I2, I3, KEY_BASE + 6) \ | ||
99 | AES_EROUND01_L(KEY_BASE + 52, KEY_BASE + 0, KEY_BASE + 2, I0) \ | ||
100 | AES_EROUND23_L(KEY_BASE + 54, KEY_BASE + 0, KEY_BASE + 2, I1) \ | ||
101 | ldd [%o0 + 0x10], %f8; \ | ||
102 | ldd [%o0 + 0x18], %f10; \ | ||
103 | AES_EROUND01_L(KEY_BASE + 52, KEY_BASE + 4, KEY_BASE + 6, I2) \ | ||
104 | AES_EROUND23_L(KEY_BASE + 54, KEY_BASE + 4, KEY_BASE + 6, I3) \ | ||
105 | ldd [%o0 + 0x20], %f12; \ | ||
106 | ldd [%o0 + 0x28], %f14; | ||
107 | |||
108 | #define DECRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \ | ||
109 | AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \ | ||
110 | AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \ | ||
111 | AES_DROUND23(KEY_BASE + 4, T0, T1, I1) \ | ||
112 | AES_DROUND01(KEY_BASE + 6, T0, T1, I0) | ||
113 | |||
114 | #define DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
115 | AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \ | ||
116 | AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \ | ||
117 | AES_DROUND23(KEY_BASE + 0, I2, I3, T3) \ | ||
118 | AES_DROUND01(KEY_BASE + 2, I2, I3, T2) \ | ||
119 | AES_DROUND23(KEY_BASE + 4, T0, T1, I1) \ | ||
120 | AES_DROUND01(KEY_BASE + 6, T0, T1, I0) \ | ||
121 | AES_DROUND23(KEY_BASE + 4, T2, T3, I3) \ | ||
122 | AES_DROUND01(KEY_BASE + 6, T2, T3, I2) | ||
123 | |||
124 | #define DECRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \ | ||
125 | AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \ | ||
126 | AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \ | ||
127 | AES_DROUND23_L(KEY_BASE + 4, T0, T1, I1) \ | ||
128 | AES_DROUND01_L(KEY_BASE + 6, T0, T1, I0) | ||
129 | |||
130 | #define DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
131 | AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \ | ||
132 | AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \ | ||
133 | AES_DROUND23(KEY_BASE + 0, I2, I3, T3) \ | ||
134 | AES_DROUND01(KEY_BASE + 2, I2, I3, T2) \ | ||
135 | AES_DROUND23_L(KEY_BASE + 4, T0, T1, I1) \ | ||
136 | AES_DROUND01_L(KEY_BASE + 6, T0, T1, I0) \ | ||
137 | AES_DROUND23_L(KEY_BASE + 4, T2, T3, I3) \ | ||
138 | AES_DROUND01_L(KEY_BASE + 6, T2, T3, I2) | ||
139 | |||
140 | /* 10 rounds */ | ||
141 | #define DECRYPT_128(KEY_BASE, I0, I1, T0, T1) \ | ||
142 | DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ | ||
143 | DECRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \ | ||
144 | DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ | ||
145 | DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ | ||
146 | DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1) | ||
147 | |||
148 | #define DECRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
149 | DECRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
150 | DECRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
151 | DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
152 | DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
153 | DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) | ||
154 | |||
155 | /* 12 rounds */ | ||
156 | #define DECRYPT_192(KEY_BASE, I0, I1, T0, T1) \ | ||
157 | DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ | ||
158 | DECRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \ | ||
159 | DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ | ||
160 | DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ | ||
161 | DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \ | ||
162 | DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1) | ||
163 | |||
164 | #define DECRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
165 | DECRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
166 | DECRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
167 | DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
168 | DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
169 | DECRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
170 | DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3) | ||
171 | |||
172 | /* 14 rounds */ | ||
173 | #define DECRYPT_256(KEY_BASE, I0, I1, T0, T1) \ | ||
174 | DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ | ||
175 | DECRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \ | ||
176 | DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \ | ||
177 | DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ | ||
178 | DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \ | ||
179 | DECRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \ | ||
180 | DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1) | ||
181 | |||
182 | #define DECRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \ | ||
183 | DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \ | ||
184 | TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6) | ||
185 | |||
186 | #define DECRYPT_256_2(KEY_BASE, I0, I1, I2, I3) \ | ||
187 | DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, KEY_BASE + 48) \ | ||
188 | ldd [%o0 + 0x18], %f56; \ | ||
189 | ldd [%o0 + 0x10], %f58; \ | ||
190 | DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, KEY_BASE + 0) \ | ||
191 | ldd [%o0 + 0x08], %f60; \ | ||
192 | ldd [%o0 + 0x00], %f62; \ | ||
193 | DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE + 0) \ | ||
194 | DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE + 0) \ | ||
195 | DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE + 0) \ | ||
196 | DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE + 0) \ | ||
197 | AES_DROUND23(KEY_BASE + 48, I0, I1, KEY_BASE + 2) \ | ||
198 | AES_DROUND01(KEY_BASE + 50, I0, I1, KEY_BASE + 0) \ | ||
199 | AES_DROUND23(KEY_BASE + 48, I2, I3, KEY_BASE + 6) \ | ||
200 | AES_DROUND01(KEY_BASE + 50, I2, I3, KEY_BASE + 4) \ | ||
201 | AES_DROUND23_L(KEY_BASE + 52, KEY_BASE + 0, KEY_BASE + 2, I1) \ | ||
202 | AES_DROUND01_L(KEY_BASE + 54, KEY_BASE + 0, KEY_BASE + 2, I0) \ | ||
203 | ldd [%o0 + 0xd8], %f8; \ | ||
204 | ldd [%o0 + 0xd0], %f10; \ | ||
205 | AES_DROUND23_L(KEY_BASE + 52, KEY_BASE + 4, KEY_BASE + 6, I3) \ | ||
206 | AES_DROUND01_L(KEY_BASE + 54, KEY_BASE + 4, KEY_BASE + 6, I2) \ | ||
207 | ldd [%o0 + 0xc8], %f12; \ | ||
208 | ldd [%o0 + 0xc0], %f14; | ||
209 | |||
210 | .align 32 | ||
211 | ENTRY(aes_sparc64_key_expand) | ||
212 | /* %o0=input_key, %o1=output_key, %o2=key_len */ | ||
213 | VISEntry | ||
214 | ld [%o0 + 0x00], %f0 | ||
215 | ld [%o0 + 0x04], %f1 | ||
216 | ld [%o0 + 0x08], %f2 | ||
217 | ld [%o0 + 0x0c], %f3 | ||
218 | |||
219 | std %f0, [%o1 + 0x00] | ||
220 | std %f2, [%o1 + 0x08] | ||
221 | add %o1, 0x10, %o1 | ||
222 | |||
223 | cmp %o2, 24 | ||
224 | bl 2f | ||
225 | nop | ||
226 | |||
227 | be 1f | ||
228 | nop | ||
229 | |||
230 | /* 256-bit key expansion */ | ||
231 | ld [%o0 + 0x10], %f4 | ||
232 | ld [%o0 + 0x14], %f5 | ||
233 | ld [%o0 + 0x18], %f6 | ||
234 | ld [%o0 + 0x1c], %f7 | ||
235 | |||
236 | std %f4, [%o1 + 0x00] | ||
237 | std %f6, [%o1 + 0x08] | ||
238 | add %o1, 0x10, %o1 | ||
239 | |||
240 | AES_KEXPAND1(0, 6, 0x0, 8) | ||
241 | AES_KEXPAND2(2, 8, 10) | ||
242 | AES_KEXPAND0(4, 10, 12) | ||
243 | AES_KEXPAND2(6, 12, 14) | ||
244 | AES_KEXPAND1(8, 14, 0x1, 16) | ||
245 | AES_KEXPAND2(10, 16, 18) | ||
246 | AES_KEXPAND0(12, 18, 20) | ||
247 | AES_KEXPAND2(14, 20, 22) | ||
248 | AES_KEXPAND1(16, 22, 0x2, 24) | ||
249 | AES_KEXPAND2(18, 24, 26) | ||
250 | AES_KEXPAND0(20, 26, 28) | ||
251 | AES_KEXPAND2(22, 28, 30) | ||
252 | AES_KEXPAND1(24, 30, 0x3, 32) | ||
253 | AES_KEXPAND2(26, 32, 34) | ||
254 | AES_KEXPAND0(28, 34, 36) | ||
255 | AES_KEXPAND2(30, 36, 38) | ||
256 | AES_KEXPAND1(32, 38, 0x4, 40) | ||
257 | AES_KEXPAND2(34, 40, 42) | ||
258 | AES_KEXPAND0(36, 42, 44) | ||
259 | AES_KEXPAND2(38, 44, 46) | ||
260 | AES_KEXPAND1(40, 46, 0x5, 48) | ||
261 | AES_KEXPAND2(42, 48, 50) | ||
262 | AES_KEXPAND0(44, 50, 52) | ||
263 | AES_KEXPAND2(46, 52, 54) | ||
264 | AES_KEXPAND1(48, 54, 0x6, 56) | ||
265 | AES_KEXPAND2(50, 56, 58) | ||
266 | |||
267 | std %f8, [%o1 + 0x00] | ||
268 | std %f10, [%o1 + 0x08] | ||
269 | std %f12, [%o1 + 0x10] | ||
270 | std %f14, [%o1 + 0x18] | ||
271 | std %f16, [%o1 + 0x20] | ||
272 | std %f18, [%o1 + 0x28] | ||
273 | std %f20, [%o1 + 0x30] | ||
274 | std %f22, [%o1 + 0x38] | ||
275 | std %f24, [%o1 + 0x40] | ||
276 | std %f26, [%o1 + 0x48] | ||
277 | std %f28, [%o1 + 0x50] | ||
278 | std %f30, [%o1 + 0x58] | ||
279 | std %f32, [%o1 + 0x60] | ||
280 | std %f34, [%o1 + 0x68] | ||
281 | std %f36, [%o1 + 0x70] | ||
282 | std %f38, [%o1 + 0x78] | ||
283 | std %f40, [%o1 + 0x80] | ||
284 | std %f42, [%o1 + 0x88] | ||
285 | std %f44, [%o1 + 0x90] | ||
286 | std %f46, [%o1 + 0x98] | ||
287 | std %f48, [%o1 + 0xa0] | ||
288 | std %f50, [%o1 + 0xa8] | ||
289 | std %f52, [%o1 + 0xb0] | ||
290 | std %f54, [%o1 + 0xb8] | ||
291 | std %f56, [%o1 + 0xc0] | ||
292 | ba,pt %xcc, 80f | ||
293 | std %f58, [%o1 + 0xc8] | ||
294 | |||
295 | 1: | ||
296 | /* 192-bit key expansion */ | ||
297 | ld [%o0 + 0x10], %f4 | ||
298 | ld [%o0 + 0x14], %f5 | ||
299 | |||
300 | std %f4, [%o1 + 0x00] | ||
301 | add %o1, 0x08, %o1 | ||
302 | |||
303 | AES_KEXPAND1(0, 4, 0x0, 6) | ||
304 | AES_KEXPAND2(2, 6, 8) | ||
305 | AES_KEXPAND2(4, 8, 10) | ||
306 | AES_KEXPAND1(6, 10, 0x1, 12) | ||
307 | AES_KEXPAND2(8, 12, 14) | ||
308 | AES_KEXPAND2(10, 14, 16) | ||
309 | AES_KEXPAND1(12, 16, 0x2, 18) | ||
310 | AES_KEXPAND2(14, 18, 20) | ||
311 | AES_KEXPAND2(16, 20, 22) | ||
312 | AES_KEXPAND1(18, 22, 0x3, 24) | ||
313 | AES_KEXPAND2(20, 24, 26) | ||
314 | AES_KEXPAND2(22, 26, 28) | ||
315 | AES_KEXPAND1(24, 28, 0x4, 30) | ||
316 | AES_KEXPAND2(26, 30, 32) | ||
317 | AES_KEXPAND2(28, 32, 34) | ||
318 | AES_KEXPAND1(30, 34, 0x5, 36) | ||
319 | AES_KEXPAND2(32, 36, 38) | ||
320 | AES_KEXPAND2(34, 38, 40) | ||
321 | AES_KEXPAND1(36, 40, 0x6, 42) | ||
322 | AES_KEXPAND2(38, 42, 44) | ||
323 | AES_KEXPAND2(40, 44, 46) | ||
324 | AES_KEXPAND1(42, 46, 0x7, 48) | ||
325 | AES_KEXPAND2(44, 48, 50) | ||
326 | |||
327 | std %f6, [%o1 + 0x00] | ||
328 | std %f8, [%o1 + 0x08] | ||
329 | std %f10, [%o1 + 0x10] | ||
330 | std %f12, [%o1 + 0x18] | ||
331 | std %f14, [%o1 + 0x20] | ||
332 | std %f16, [%o1 + 0x28] | ||
333 | std %f18, [%o1 + 0x30] | ||
334 | std %f20, [%o1 + 0x38] | ||
335 | std %f22, [%o1 + 0x40] | ||
336 | std %f24, [%o1 + 0x48] | ||
337 | std %f26, [%o1 + 0x50] | ||
338 | std %f28, [%o1 + 0x58] | ||
339 | std %f30, [%o1 + 0x60] | ||
340 | std %f32, [%o1 + 0x68] | ||
341 | std %f34, [%o1 + 0x70] | ||
342 | std %f36, [%o1 + 0x78] | ||
343 | std %f38, [%o1 + 0x80] | ||
344 | std %f40, [%o1 + 0x88] | ||
345 | std %f42, [%o1 + 0x90] | ||
346 | std %f44, [%o1 + 0x98] | ||
347 | std %f46, [%o1 + 0xa0] | ||
348 | std %f48, [%o1 + 0xa8] | ||
349 | ba,pt %xcc, 80f | ||
350 | std %f50, [%o1 + 0xb0] | ||
351 | |||
352 | 2: | ||
353 | /* 128-bit key expansion */ | ||
354 | AES_KEXPAND1(0, 2, 0x0, 4) | ||
355 | AES_KEXPAND2(2, 4, 6) | ||
356 | AES_KEXPAND1(4, 6, 0x1, 8) | ||
357 | AES_KEXPAND2(6, 8, 10) | ||
358 | AES_KEXPAND1(8, 10, 0x2, 12) | ||
359 | AES_KEXPAND2(10, 12, 14) | ||
360 | AES_KEXPAND1(12, 14, 0x3, 16) | ||
361 | AES_KEXPAND2(14, 16, 18) | ||
362 | AES_KEXPAND1(16, 18, 0x4, 20) | ||
363 | AES_KEXPAND2(18, 20, 22) | ||
364 | AES_KEXPAND1(20, 22, 0x5, 24) | ||
365 | AES_KEXPAND2(22, 24, 26) | ||
366 | AES_KEXPAND1(24, 26, 0x6, 28) | ||
367 | AES_KEXPAND2(26, 28, 30) | ||
368 | AES_KEXPAND1(28, 30, 0x7, 32) | ||
369 | AES_KEXPAND2(30, 32, 34) | ||
370 | AES_KEXPAND1(32, 34, 0x8, 36) | ||
371 | AES_KEXPAND2(34, 36, 38) | ||
372 | AES_KEXPAND1(36, 38, 0x9, 40) | ||
373 | AES_KEXPAND2(38, 40, 42) | ||
374 | |||
375 | std %f4, [%o1 + 0x00] | ||
376 | std %f6, [%o1 + 0x08] | ||
377 | std %f8, [%o1 + 0x10] | ||
378 | std %f10, [%o1 + 0x18] | ||
379 | std %f12, [%o1 + 0x20] | ||
380 | std %f14, [%o1 + 0x28] | ||
381 | std %f16, [%o1 + 0x30] | ||
382 | std %f18, [%o1 + 0x38] | ||
383 | std %f20, [%o1 + 0x40] | ||
384 | std %f22, [%o1 + 0x48] | ||
385 | std %f24, [%o1 + 0x50] | ||
386 | std %f26, [%o1 + 0x58] | ||
387 | std %f28, [%o1 + 0x60] | ||
388 | std %f30, [%o1 + 0x68] | ||
389 | std %f32, [%o1 + 0x70] | ||
390 | std %f34, [%o1 + 0x78] | ||
391 | std %f36, [%o1 + 0x80] | ||
392 | std %f38, [%o1 + 0x88] | ||
393 | std %f40, [%o1 + 0x90] | ||
394 | std %f42, [%o1 + 0x98] | ||
395 | 80: | ||
396 | retl | ||
397 | VISExit | ||
398 | ENDPROC(aes_sparc64_key_expand) | ||
399 | |||
400 | .align 32 | ||
401 | ENTRY(aes_sparc64_encrypt_128) | ||
402 | /* %o0=key, %o1=input, %o2=output */ | ||
403 | VISEntry | ||
404 | ld [%o1 + 0x00], %f4 | ||
405 | ld [%o1 + 0x04], %f5 | ||
406 | ld [%o1 + 0x08], %f6 | ||
407 | ld [%o1 + 0x0c], %f7 | ||
408 | ldd [%o0 + 0x00], %f8 | ||
409 | ldd [%o0 + 0x08], %f10 | ||
410 | ldd [%o0 + 0x10], %f12 | ||
411 | ldd [%o0 + 0x18], %f14 | ||
412 | ldd [%o0 + 0x20], %f16 | ||
413 | ldd [%o0 + 0x28], %f18 | ||
414 | ldd [%o0 + 0x30], %f20 | ||
415 | ldd [%o0 + 0x38], %f22 | ||
416 | ldd [%o0 + 0x40], %f24 | ||
417 | ldd [%o0 + 0x48], %f26 | ||
418 | ldd [%o0 + 0x50], %f28 | ||
419 | ldd [%o0 + 0x58], %f30 | ||
420 | ldd [%o0 + 0x60], %f32 | ||
421 | ldd [%o0 + 0x68], %f34 | ||
422 | ldd [%o0 + 0x70], %f36 | ||
423 | ldd [%o0 + 0x78], %f38 | ||
424 | ldd [%o0 + 0x80], %f40 | ||
425 | ldd [%o0 + 0x88], %f42 | ||
426 | ldd [%o0 + 0x90], %f44 | ||
427 | ldd [%o0 + 0x98], %f46 | ||
428 | ldd [%o0 + 0xa0], %f48 | ||
429 | ldd [%o0 + 0xa8], %f50 | ||
430 | fxor %f8, %f4, %f4 | ||
431 | fxor %f10, %f6, %f6 | ||
432 | ENCRYPT_128(12, 4, 6, 0, 2) | ||
433 | st %f4, [%o2 + 0x00] | ||
434 | st %f5, [%o2 + 0x04] | ||
435 | st %f6, [%o2 + 0x08] | ||
436 | st %f7, [%o2 + 0x0c] | ||
437 | retl | ||
438 | VISExit | ||
439 | ENDPROC(aes_sparc64_encrypt_128) | ||
440 | |||
441 | .align 32 | ||
442 | ENTRY(aes_sparc64_encrypt_192) | ||
443 | /* %o0=key, %o1=input, %o2=output */ | ||
444 | VISEntry | ||
445 | ld [%o1 + 0x00], %f4 | ||
446 | ld [%o1 + 0x04], %f5 | ||
447 | ld [%o1 + 0x08], %f6 | ||
448 | ld [%o1 + 0x0c], %f7 | ||
449 | |||
450 | ldd [%o0 + 0x00], %f8 | ||
451 | ldd [%o0 + 0x08], %f10 | ||
452 | |||
453 | fxor %f8, %f4, %f4 | ||
454 | fxor %f10, %f6, %f6 | ||
455 | |||
456 | ldd [%o0 + 0x10], %f8 | ||
457 | ldd [%o0 + 0x18], %f10 | ||
458 | ldd [%o0 + 0x20], %f12 | ||
459 | ldd [%o0 + 0x28], %f14 | ||
460 | add %o0, 0x20, %o0 | ||
461 | |||
462 | ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2) | ||
463 | |||
464 | ldd [%o0 + 0x10], %f12 | ||
465 | ldd [%o0 + 0x18], %f14 | ||
466 | ldd [%o0 + 0x20], %f16 | ||
467 | ldd [%o0 + 0x28], %f18 | ||
468 | ldd [%o0 + 0x30], %f20 | ||
469 | ldd [%o0 + 0x38], %f22 | ||
470 | ldd [%o0 + 0x40], %f24 | ||
471 | ldd [%o0 + 0x48], %f26 | ||
472 | ldd [%o0 + 0x50], %f28 | ||
473 | ldd [%o0 + 0x58], %f30 | ||
474 | ldd [%o0 + 0x60], %f32 | ||
475 | ldd [%o0 + 0x68], %f34 | ||
476 | ldd [%o0 + 0x70], %f36 | ||
477 | ldd [%o0 + 0x78], %f38 | ||
478 | ldd [%o0 + 0x80], %f40 | ||
479 | ldd [%o0 + 0x88], %f42 | ||
480 | ldd [%o0 + 0x90], %f44 | ||
481 | ldd [%o0 + 0x98], %f46 | ||
482 | ldd [%o0 + 0xa0], %f48 | ||
483 | ldd [%o0 + 0xa8], %f50 | ||
484 | |||
485 | |||
486 | ENCRYPT_128(12, 4, 6, 0, 2) | ||
487 | |||
488 | st %f4, [%o2 + 0x00] | ||
489 | st %f5, [%o2 + 0x04] | ||
490 | st %f6, [%o2 + 0x08] | ||
491 | st %f7, [%o2 + 0x0c] | ||
492 | |||
493 | retl | ||
494 | VISExit | ||
495 | ENDPROC(aes_sparc64_encrypt_192) | ||
496 | |||
497 | .align 32 | ||
498 | ENTRY(aes_sparc64_encrypt_256) | ||
499 | /* %o0=key, %o1=input, %o2=output */ | ||
500 | VISEntry | ||
501 | ld [%o1 + 0x00], %f4 | ||
502 | ld [%o1 + 0x04], %f5 | ||
503 | ld [%o1 + 0x08], %f6 | ||
504 | ld [%o1 + 0x0c], %f7 | ||
505 | |||
506 | ldd [%o0 + 0x00], %f8 | ||
507 | ldd [%o0 + 0x08], %f10 | ||
508 | |||
509 | fxor %f8, %f4, %f4 | ||
510 | fxor %f10, %f6, %f6 | ||
511 | |||
512 | ldd [%o0 + 0x10], %f8 | ||
513 | |||
514 | ldd [%o0 + 0x18], %f10 | ||
515 | ldd [%o0 + 0x20], %f12 | ||
516 | ldd [%o0 + 0x28], %f14 | ||
517 | add %o0, 0x20, %o0 | ||
518 | |||
519 | ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2) | ||
520 | |||
521 | ldd [%o0 + 0x10], %f8 | ||
522 | |||
523 | ldd [%o0 + 0x18], %f10 | ||
524 | ldd [%o0 + 0x20], %f12 | ||
525 | ldd [%o0 + 0x28], %f14 | ||
526 | add %o0, 0x20, %o0 | ||
527 | |||
528 | ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2) | ||
529 | |||
530 | ldd [%o0 + 0x10], %f12 | ||
531 | ldd [%o0 + 0x18], %f14 | ||
532 | ldd [%o0 + 0x20], %f16 | ||
533 | ldd [%o0 + 0x28], %f18 | ||
534 | ldd [%o0 + 0x30], %f20 | ||
535 | ldd [%o0 + 0x38], %f22 | ||
536 | ldd [%o0 + 0x40], %f24 | ||
537 | ldd [%o0 + 0x48], %f26 | ||
538 | ldd [%o0 + 0x50], %f28 | ||
539 | ldd [%o0 + 0x58], %f30 | ||
540 | ldd [%o0 + 0x60], %f32 | ||
541 | ldd [%o0 + 0x68], %f34 | ||
542 | ldd [%o0 + 0x70], %f36 | ||
543 | ldd [%o0 + 0x78], %f38 | ||
544 | ldd [%o0 + 0x80], %f40 | ||
545 | ldd [%o0 + 0x88], %f42 | ||
546 | ldd [%o0 + 0x90], %f44 | ||
547 | ldd [%o0 + 0x98], %f46 | ||
548 | ldd [%o0 + 0xa0], %f48 | ||
549 | ldd [%o0 + 0xa8], %f50 | ||
550 | |||
551 | ENCRYPT_128(12, 4, 6, 0, 2) | ||
552 | |||
553 | st %f4, [%o2 + 0x00] | ||
554 | st %f5, [%o2 + 0x04] | ||
555 | st %f6, [%o2 + 0x08] | ||
556 | st %f7, [%o2 + 0x0c] | ||
557 | |||
558 | retl | ||
559 | VISExit | ||
560 | ENDPROC(aes_sparc64_encrypt_256) | ||
561 | |||
562 | .align 32 | ||
563 | ENTRY(aes_sparc64_decrypt_128) | ||
564 | /* %o0=key, %o1=input, %o2=output */ | ||
565 | VISEntry | ||
566 | ld [%o1 + 0x00], %f4 | ||
567 | ld [%o1 + 0x04], %f5 | ||
568 | ld [%o1 + 0x08], %f6 | ||
569 | ld [%o1 + 0x0c], %f7 | ||
570 | ldd [%o0 + 0xa0], %f8 | ||
571 | ldd [%o0 + 0xa8], %f10 | ||
572 | ldd [%o0 + 0x98], %f12 | ||
573 | ldd [%o0 + 0x90], %f14 | ||
574 | ldd [%o0 + 0x88], %f16 | ||
575 | ldd [%o0 + 0x80], %f18 | ||
576 | ldd [%o0 + 0x78], %f20 | ||
577 | ldd [%o0 + 0x70], %f22 | ||
578 | ldd [%o0 + 0x68], %f24 | ||
579 | ldd [%o0 + 0x60], %f26 | ||
580 | ldd [%o0 + 0x58], %f28 | ||
581 | ldd [%o0 + 0x50], %f30 | ||
582 | ldd [%o0 + 0x48], %f32 | ||
583 | ldd [%o0 + 0x40], %f34 | ||
584 | ldd [%o0 + 0x38], %f36 | ||
585 | ldd [%o0 + 0x30], %f38 | ||
586 | ldd [%o0 + 0x28], %f40 | ||
587 | ldd [%o0 + 0x20], %f42 | ||
588 | ldd [%o0 + 0x18], %f44 | ||
589 | ldd [%o0 + 0x10], %f46 | ||
590 | ldd [%o0 + 0x08], %f48 | ||
591 | ldd [%o0 + 0x00], %f50 | ||
592 | fxor %f8, %f4, %f4 | ||
593 | fxor %f10, %f6, %f6 | ||
594 | DECRYPT_128(12, 4, 6, 0, 2) | ||
595 | st %f4, [%o2 + 0x00] | ||
596 | st %f5, [%o2 + 0x04] | ||
597 | st %f6, [%o2 + 0x08] | ||
598 | st %f7, [%o2 + 0x0c] | ||
599 | retl | ||
600 | VISExit | ||
601 | ENDPROC(aes_sparc64_decrypt_128) | ||
602 | |||
603 | .align 32 | ||
604 | ENTRY(aes_sparc64_decrypt_192) | ||
605 | /* %o0=key, %o1=input, %o2=output */ | ||
606 | VISEntry | ||
607 | ld [%o1 + 0x00], %f4 | ||
608 | ld [%o1 + 0x04], %f5 | ||
609 | ld [%o1 + 0x08], %f6 | ||
610 | ld [%o1 + 0x0c], %f7 | ||
611 | ldd [%o0 + 0xc0], %f8 | ||
612 | ldd [%o0 + 0xc8], %f10 | ||
613 | ldd [%o0 + 0xb8], %f12 | ||
614 | ldd [%o0 + 0xb0], %f14 | ||
615 | ldd [%o0 + 0xa8], %f16 | ||
616 | ldd [%o0 + 0xa0], %f18 | ||
617 | fxor %f8, %f4, %f4 | ||
618 | fxor %f10, %f6, %f6 | ||
619 | ldd [%o0 + 0x98], %f20 | ||
620 | ldd [%o0 + 0x90], %f22 | ||
621 | ldd [%o0 + 0x88], %f24 | ||
622 | ldd [%o0 + 0x80], %f26 | ||
623 | DECRYPT_TWO_ROUNDS(12, 4, 6, 0, 2) | ||
624 | ldd [%o0 + 0x78], %f28 | ||
625 | ldd [%o0 + 0x70], %f30 | ||
626 | ldd [%o0 + 0x68], %f32 | ||
627 | ldd [%o0 + 0x60], %f34 | ||
628 | ldd [%o0 + 0x58], %f36 | ||
629 | ldd [%o0 + 0x50], %f38 | ||
630 | ldd [%o0 + 0x48], %f40 | ||
631 | ldd [%o0 + 0x40], %f42 | ||
632 | ldd [%o0 + 0x38], %f44 | ||
633 | ldd [%o0 + 0x30], %f46 | ||
634 | ldd [%o0 + 0x28], %f48 | ||
635 | ldd [%o0 + 0x20], %f50 | ||
636 | ldd [%o0 + 0x18], %f52 | ||
637 | ldd [%o0 + 0x10], %f54 | ||
638 | ldd [%o0 + 0x08], %f56 | ||
639 | ldd [%o0 + 0x00], %f58 | ||
640 | DECRYPT_128(20, 4, 6, 0, 2) | ||
641 | st %f4, [%o2 + 0x00] | ||
642 | st %f5, [%o2 + 0x04] | ||
643 | st %f6, [%o2 + 0x08] | ||
644 | st %f7, [%o2 + 0x0c] | ||
645 | retl | ||
646 | VISExit | ||
647 | ENDPROC(aes_sparc64_decrypt_192) | ||
648 | |||
649 | .align 32 | ||
650 | ENTRY(aes_sparc64_decrypt_256) | ||
651 | /* %o0=key, %o1=input, %o2=output */ | ||
652 | VISEntry | ||
653 | ld [%o1 + 0x00], %f4 | ||
654 | ld [%o1 + 0x04], %f5 | ||
655 | ld [%o1 + 0x08], %f6 | ||
656 | ld [%o1 + 0x0c], %f7 | ||
657 | ldd [%o0 + 0xe0], %f8 | ||
658 | ldd [%o0 + 0xe8], %f10 | ||
659 | ldd [%o0 + 0xd8], %f12 | ||
660 | ldd [%o0 + 0xd0], %f14 | ||
661 | ldd [%o0 + 0xc8], %f16 | ||
662 | fxor %f8, %f4, %f4 | ||
663 | ldd [%o0 + 0xc0], %f18 | ||
664 | fxor %f10, %f6, %f6 | ||
665 | ldd [%o0 + 0xb8], %f20 | ||
666 | AES_DROUND23(12, 4, 6, 2) | ||
667 | ldd [%o0 + 0xb0], %f22 | ||
668 | AES_DROUND01(14, 4, 6, 0) | ||
669 | ldd [%o0 + 0xa8], %f24 | ||
670 | AES_DROUND23(16, 0, 2, 6) | ||
671 | ldd [%o0 + 0xa0], %f26 | ||
672 | AES_DROUND01(18, 0, 2, 4) | ||
673 | ldd [%o0 + 0x98], %f12 | ||
674 | AES_DROUND23(20, 4, 6, 2) | ||
675 | ldd [%o0 + 0x90], %f14 | ||
676 | AES_DROUND01(22, 4, 6, 0) | ||
677 | ldd [%o0 + 0x88], %f16 | ||
678 | AES_DROUND23(24, 0, 2, 6) | ||
679 | ldd [%o0 + 0x80], %f18 | ||
680 | AES_DROUND01(26, 0, 2, 4) | ||
681 | ldd [%o0 + 0x78], %f20 | ||
682 | AES_DROUND23(12, 4, 6, 2) | ||
683 | ldd [%o0 + 0x70], %f22 | ||
684 | AES_DROUND01(14, 4, 6, 0) | ||
685 | ldd [%o0 + 0x68], %f24 | ||
686 | AES_DROUND23(16, 0, 2, 6) | ||
687 | ldd [%o0 + 0x60], %f26 | ||
688 | AES_DROUND01(18, 0, 2, 4) | ||
689 | ldd [%o0 + 0x58], %f28 | ||
690 | AES_DROUND23(20, 4, 6, 2) | ||
691 | ldd [%o0 + 0x50], %f30 | ||
692 | AES_DROUND01(22, 4, 6, 0) | ||
693 | ldd [%o0 + 0x48], %f32 | ||
694 | AES_DROUND23(24, 0, 2, 6) | ||
695 | ldd [%o0 + 0x40], %f34 | ||
696 | AES_DROUND01(26, 0, 2, 4) | ||
697 | ldd [%o0 + 0x38], %f36 | ||
698 | AES_DROUND23(28, 4, 6, 2) | ||
699 | ldd [%o0 + 0x30], %f38 | ||
700 | AES_DROUND01(30, 4, 6, 0) | ||
701 | ldd [%o0 + 0x28], %f40 | ||
702 | AES_DROUND23(32, 0, 2, 6) | ||
703 | ldd [%o0 + 0x20], %f42 | ||
704 | AES_DROUND01(34, 0, 2, 4) | ||
705 | ldd [%o0 + 0x18], %f44 | ||
706 | AES_DROUND23(36, 4, 6, 2) | ||
707 | ldd [%o0 + 0x10], %f46 | ||
708 | AES_DROUND01(38, 4, 6, 0) | ||
709 | ldd [%o0 + 0x08], %f48 | ||
710 | AES_DROUND23(40, 0, 2, 6) | ||
711 | ldd [%o0 + 0x00], %f50 | ||
712 | AES_DROUND01(42, 0, 2, 4) | ||
713 | AES_DROUND23(44, 4, 6, 2) | ||
714 | AES_DROUND01(46, 4, 6, 0) | ||
715 | AES_DROUND23_L(48, 0, 2, 6) | ||
716 | AES_DROUND01_L(50, 0, 2, 4) | ||
717 | st %f4, [%o2 + 0x00] | ||
718 | st %f5, [%o2 + 0x04] | ||
719 | st %f6, [%o2 + 0x08] | ||
720 | st %f7, [%o2 + 0x0c] | ||
721 | retl | ||
722 | VISExit | ||
723 | ENDPROC(aes_sparc64_decrypt_256) | ||
724 | |||
725 | .align 32 | ||
726 | ENTRY(aes_sparc64_load_encrypt_keys_128) | ||
727 | /* %o0=key */ | ||
728 | VISEntry | ||
729 | ldd [%o0 + 0x10], %f8 | ||
730 | ldd [%o0 + 0x18], %f10 | ||
731 | ldd [%o0 + 0x20], %f12 | ||
732 | ldd [%o0 + 0x28], %f14 | ||
733 | ldd [%o0 + 0x30], %f16 | ||
734 | ldd [%o0 + 0x38], %f18 | ||
735 | ldd [%o0 + 0x40], %f20 | ||
736 | ldd [%o0 + 0x48], %f22 | ||
737 | ldd [%o0 + 0x50], %f24 | ||
738 | ldd [%o0 + 0x58], %f26 | ||
739 | ldd [%o0 + 0x60], %f28 | ||
740 | ldd [%o0 + 0x68], %f30 | ||
741 | ldd [%o0 + 0x70], %f32 | ||
742 | ldd [%o0 + 0x78], %f34 | ||
743 | ldd [%o0 + 0x80], %f36 | ||
744 | ldd [%o0 + 0x88], %f38 | ||
745 | ldd [%o0 + 0x90], %f40 | ||
746 | ldd [%o0 + 0x98], %f42 | ||
747 | ldd [%o0 + 0xa0], %f44 | ||
748 | retl | ||
749 | ldd [%o0 + 0xa8], %f46 | ||
750 | ENDPROC(aes_sparc64_load_encrypt_keys_128) | ||
751 | |||
752 | .align 32 | ||
753 | ENTRY(aes_sparc64_load_encrypt_keys_192) | ||
754 | /* %o0=key */ | ||
755 | VISEntry | ||
756 | ldd [%o0 + 0x10], %f8 | ||
757 | ldd [%o0 + 0x18], %f10 | ||
758 | ldd [%o0 + 0x20], %f12 | ||
759 | ldd [%o0 + 0x28], %f14 | ||
760 | ldd [%o0 + 0x30], %f16 | ||
761 | ldd [%o0 + 0x38], %f18 | ||
762 | ldd [%o0 + 0x40], %f20 | ||
763 | ldd [%o0 + 0x48], %f22 | ||
764 | ldd [%o0 + 0x50], %f24 | ||
765 | ldd [%o0 + 0x58], %f26 | ||
766 | ldd [%o0 + 0x60], %f28 | ||
767 | ldd [%o0 + 0x68], %f30 | ||
768 | ldd [%o0 + 0x70], %f32 | ||
769 | ldd [%o0 + 0x78], %f34 | ||
770 | ldd [%o0 + 0x80], %f36 | ||
771 | ldd [%o0 + 0x88], %f38 | ||
772 | ldd [%o0 + 0x90], %f40 | ||
773 | ldd [%o0 + 0x98], %f42 | ||
774 | ldd [%o0 + 0xa0], %f44 | ||
775 | ldd [%o0 + 0xa8], %f46 | ||
776 | ldd [%o0 + 0xb0], %f48 | ||
777 | ldd [%o0 + 0xb8], %f50 | ||
778 | ldd [%o0 + 0xc0], %f52 | ||
779 | retl | ||
780 | ldd [%o0 + 0xc8], %f54 | ||
781 | ENDPROC(aes_sparc64_load_encrypt_keys_192) | ||
782 | |||
783 | .align 32 | ||
784 | ENTRY(aes_sparc64_load_encrypt_keys_256) | ||
785 | /* %o0=key */ | ||
786 | VISEntry | ||
787 | ldd [%o0 + 0x10], %f8 | ||
788 | ldd [%o0 + 0x18], %f10 | ||
789 | ldd [%o0 + 0x20], %f12 | ||
790 | ldd [%o0 + 0x28], %f14 | ||
791 | ldd [%o0 + 0x30], %f16 | ||
792 | ldd [%o0 + 0x38], %f18 | ||
793 | ldd [%o0 + 0x40], %f20 | ||
794 | ldd [%o0 + 0x48], %f22 | ||
795 | ldd [%o0 + 0x50], %f24 | ||
796 | ldd [%o0 + 0x58], %f26 | ||
797 | ldd [%o0 + 0x60], %f28 | ||
798 | ldd [%o0 + 0x68], %f30 | ||
799 | ldd [%o0 + 0x70], %f32 | ||
800 | ldd [%o0 + 0x78], %f34 | ||
801 | ldd [%o0 + 0x80], %f36 | ||
802 | ldd [%o0 + 0x88], %f38 | ||
803 | ldd [%o0 + 0x90], %f40 | ||
804 | ldd [%o0 + 0x98], %f42 | ||
805 | ldd [%o0 + 0xa0], %f44 | ||
806 | ldd [%o0 + 0xa8], %f46 | ||
807 | ldd [%o0 + 0xb0], %f48 | ||
808 | ldd [%o0 + 0xb8], %f50 | ||
809 | ldd [%o0 + 0xc0], %f52 | ||
810 | ldd [%o0 + 0xc8], %f54 | ||
811 | ldd [%o0 + 0xd0], %f56 | ||
812 | ldd [%o0 + 0xd8], %f58 | ||
813 | ldd [%o0 + 0xe0], %f60 | ||
814 | retl | ||
815 | ldd [%o0 + 0xe8], %f62 | ||
816 | ENDPROC(aes_sparc64_load_encrypt_keys_256) | ||
817 | |||
818 | .align 32 | ||
819 | ENTRY(aes_sparc64_load_decrypt_keys_128) | ||
820 | /* %o0=key */ | ||
821 | VISEntry | ||
822 | ldd [%o0 + 0x98], %f8 | ||
823 | ldd [%o0 + 0x90], %f10 | ||
824 | ldd [%o0 + 0x88], %f12 | ||
825 | ldd [%o0 + 0x80], %f14 | ||
826 | ldd [%o0 + 0x78], %f16 | ||
827 | ldd [%o0 + 0x70], %f18 | ||
828 | ldd [%o0 + 0x68], %f20 | ||
829 | ldd [%o0 + 0x60], %f22 | ||
830 | ldd [%o0 + 0x58], %f24 | ||
831 | ldd [%o0 + 0x50], %f26 | ||
832 | ldd [%o0 + 0x48], %f28 | ||
833 | ldd [%o0 + 0x40], %f30 | ||
834 | ldd [%o0 + 0x38], %f32 | ||
835 | ldd [%o0 + 0x30], %f34 | ||
836 | ldd [%o0 + 0x28], %f36 | ||
837 | ldd [%o0 + 0x20], %f38 | ||
838 | ldd [%o0 + 0x18], %f40 | ||
839 | ldd [%o0 + 0x10], %f42 | ||
840 | ldd [%o0 + 0x08], %f44 | ||
841 | retl | ||
842 | ldd [%o0 + 0x00], %f46 | ||
843 | ENDPROC(aes_sparc64_load_decrypt_keys_128) | ||
844 | |||
845 | .align 32 | ||
846 | ENTRY(aes_sparc64_load_decrypt_keys_192) | ||
847 | /* %o0=key */ | ||
848 | VISEntry | ||
849 | ldd [%o0 + 0xb8], %f8 | ||
850 | ldd [%o0 + 0xb0], %f10 | ||
851 | ldd [%o0 + 0xa8], %f12 | ||
852 | ldd [%o0 + 0xa0], %f14 | ||
853 | ldd [%o0 + 0x98], %f16 | ||
854 | ldd [%o0 + 0x90], %f18 | ||
855 | ldd [%o0 + 0x88], %f20 | ||
856 | ldd [%o0 + 0x80], %f22 | ||
857 | ldd [%o0 + 0x78], %f24 | ||
858 | ldd [%o0 + 0x70], %f26 | ||
859 | ldd [%o0 + 0x68], %f28 | ||
860 | ldd [%o0 + 0x60], %f30 | ||
861 | ldd [%o0 + 0x58], %f32 | ||
862 | ldd [%o0 + 0x50], %f34 | ||
863 | ldd [%o0 + 0x48], %f36 | ||
864 | ldd [%o0 + 0x40], %f38 | ||
865 | ldd [%o0 + 0x38], %f40 | ||
866 | ldd [%o0 + 0x30], %f42 | ||
867 | ldd [%o0 + 0x28], %f44 | ||
868 | ldd [%o0 + 0x20], %f46 | ||
869 | ldd [%o0 + 0x18], %f48 | ||
870 | ldd [%o0 + 0x10], %f50 | ||
871 | ldd [%o0 + 0x08], %f52 | ||
872 | retl | ||
873 | ldd [%o0 + 0x00], %f54 | ||
874 | ENDPROC(aes_sparc64_load_decrypt_keys_192) | ||
875 | |||
876 | .align 32 | ||
877 | ENTRY(aes_sparc64_load_decrypt_keys_256) | ||
878 | /* %o0=key */ | ||
879 | VISEntry | ||
880 | ldd [%o0 + 0xd8], %f8 | ||
881 | ldd [%o0 + 0xd0], %f10 | ||
882 | ldd [%o0 + 0xc8], %f12 | ||
883 | ldd [%o0 + 0xc0], %f14 | ||
884 | ldd [%o0 + 0xb8], %f16 | ||
885 | ldd [%o0 + 0xb0], %f18 | ||
886 | ldd [%o0 + 0xa8], %f20 | ||
887 | ldd [%o0 + 0xa0], %f22 | ||
888 | ldd [%o0 + 0x98], %f24 | ||
889 | ldd [%o0 + 0x90], %f26 | ||
890 | ldd [%o0 + 0x88], %f28 | ||
891 | ldd [%o0 + 0x80], %f30 | ||
892 | ldd [%o0 + 0x78], %f32 | ||
893 | ldd [%o0 + 0x70], %f34 | ||
894 | ldd [%o0 + 0x68], %f36 | ||
895 | ldd [%o0 + 0x60], %f38 | ||
896 | ldd [%o0 + 0x58], %f40 | ||
897 | ldd [%o0 + 0x50], %f42 | ||
898 | ldd [%o0 + 0x48], %f44 | ||
899 | ldd [%o0 + 0x40], %f46 | ||
900 | ldd [%o0 + 0x38], %f48 | ||
901 | ldd [%o0 + 0x30], %f50 | ||
902 | ldd [%o0 + 0x28], %f52 | ||
903 | ldd [%o0 + 0x20], %f54 | ||
904 | ldd [%o0 + 0x18], %f56 | ||
905 | ldd [%o0 + 0x10], %f58 | ||
906 | ldd [%o0 + 0x08], %f60 | ||
907 | retl | ||
908 | ldd [%o0 + 0x00], %f62 | ||
909 | ENDPROC(aes_sparc64_load_decrypt_keys_256) | ||
910 | |||
911 | .align 32 | ||
912 | ENTRY(aes_sparc64_ecb_encrypt_128) | ||
913 | /* %o0=key, %o1=input, %o2=output, %o3=len */ | ||
914 | ldx [%o0 + 0x00], %g1 | ||
915 | subcc %o3, 0x10, %o3 | ||
916 | be 10f | ||
917 | ldx [%o0 + 0x08], %g2 | ||
918 | 1: ldx [%o1 + 0x00], %g3 | ||
919 | ldx [%o1 + 0x08], %g7 | ||
920 | ldx [%o1 + 0x10], %o4 | ||
921 | ldx [%o1 + 0x18], %o5 | ||
922 | xor %g1, %g3, %g3 | ||
923 | xor %g2, %g7, %g7 | ||
924 | MOVXTOD_G3_F4 | ||
925 | MOVXTOD_G7_F6 | ||
926 | xor %g1, %o4, %g3 | ||
927 | xor %g2, %o5, %g7 | ||
928 | MOVXTOD_G3_F60 | ||
929 | MOVXTOD_G7_F62 | ||
930 | ENCRYPT_128_2(8, 4, 6, 60, 62, 0, 2, 56, 58) | ||
931 | std %f4, [%o2 + 0x00] | ||
932 | std %f6, [%o2 + 0x08] | ||
933 | std %f60, [%o2 + 0x10] | ||
934 | std %f62, [%o2 + 0x18] | ||
935 | sub %o3, 0x20, %o3 | ||
936 | add %o1, 0x20, %o1 | ||
937 | brgz %o3, 1b | ||
938 | add %o2, 0x20, %o2 | ||
939 | brlz,pt %o3, 11f | ||
940 | nop | ||
941 | 10: ldx [%o1 + 0x00], %g3 | ||
942 | ldx [%o1 + 0x08], %g7 | ||
943 | xor %g1, %g3, %g3 | ||
944 | xor %g2, %g7, %g7 | ||
945 | MOVXTOD_G3_F4 | ||
946 | MOVXTOD_G7_F6 | ||
947 | ENCRYPT_128(8, 4, 6, 0, 2) | ||
948 | std %f4, [%o2 + 0x00] | ||
949 | std %f6, [%o2 + 0x08] | ||
950 | 11: retl | ||
951 | nop | ||
952 | ENDPROC(aes_sparc64_ecb_encrypt_128) | ||
953 | |||
954 | .align 32 | ||
955 | ENTRY(aes_sparc64_ecb_encrypt_192) | ||
956 | /* %o0=key, %o1=input, %o2=output, %o3=len */ | ||
957 | ldx [%o0 + 0x00], %g1 | ||
958 | subcc %o3, 0x10, %o3 | ||
959 | be 10f | ||
960 | ldx [%o0 + 0x08], %g2 | ||
961 | 1: ldx [%o1 + 0x00], %g3 | ||
962 | ldx [%o1 + 0x08], %g7 | ||
963 | ldx [%o1 + 0x10], %o4 | ||
964 | ldx [%o1 + 0x18], %o5 | ||
965 | xor %g1, %g3, %g3 | ||
966 | xor %g2, %g7, %g7 | ||
967 | MOVXTOD_G3_F4 | ||
968 | MOVXTOD_G7_F6 | ||
969 | xor %g1, %o4, %g3 | ||
970 | xor %g2, %o5, %g7 | ||
971 | MOVXTOD_G3_F60 | ||
972 | MOVXTOD_G7_F62 | ||
973 | ENCRYPT_192_2(8, 4, 6, 60, 62, 0, 2, 56, 58) | ||
974 | std %f4, [%o2 + 0x00] | ||
975 | std %f6, [%o2 + 0x08] | ||
976 | std %f60, [%o2 + 0x10] | ||
977 | std %f62, [%o2 + 0x18] | ||
978 | sub %o3, 0x20, %o3 | ||
979 | add %o1, 0x20, %o1 | ||
980 | brgz %o3, 1b | ||
981 | add %o2, 0x20, %o2 | ||
982 | brlz,pt %o3, 11f | ||
983 | nop | ||
984 | 10: ldx [%o1 + 0x00], %g3 | ||
985 | ldx [%o1 + 0x08], %g7 | ||
986 | xor %g1, %g3, %g3 | ||
987 | xor %g2, %g7, %g7 | ||
988 | MOVXTOD_G3_F4 | ||
989 | MOVXTOD_G7_F6 | ||
990 | ENCRYPT_192(8, 4, 6, 0, 2) | ||
991 | std %f4, [%o2 + 0x00] | ||
992 | std %f6, [%o2 + 0x08] | ||
993 | 11: retl | ||
994 | nop | ||
995 | ENDPROC(aes_sparc64_ecb_encrypt_192) | ||
996 | |||
997 | .align 32 | ||
998 | ENTRY(aes_sparc64_ecb_encrypt_256) | ||
999 | /* %o0=key, %o1=input, %o2=output, %o3=len */ | ||
1000 | ldx [%o0 + 0x00], %g1 | ||
1001 | subcc %o3, 0x10, %o3 | ||
1002 | be 10f | ||
1003 | ldx [%o0 + 0x08], %g2 | ||
1004 | 1: ldx [%o1 + 0x00], %g3 | ||
1005 | ldx [%o1 + 0x08], %g7 | ||
1006 | ldx [%o1 + 0x10], %o4 | ||
1007 | ldx [%o1 + 0x18], %o5 | ||
1008 | xor %g1, %g3, %g3 | ||
1009 | xor %g2, %g7, %g7 | ||
1010 | MOVXTOD_G3_F4 | ||
1011 | MOVXTOD_G7_F6 | ||
1012 | xor %g1, %o4, %g3 | ||
1013 | xor %g2, %o5, %g7 | ||
1014 | MOVXTOD_G3_F0 | ||
1015 | MOVXTOD_G7_F2 | ||
1016 | ENCRYPT_256_2(8, 4, 6, 0, 2) | ||
1017 | std %f4, [%o2 + 0x00] | ||
1018 | std %f6, [%o2 + 0x08] | ||
1019 | std %f0, [%o2 + 0x10] | ||
1020 | std %f2, [%o2 + 0x18] | ||
1021 | sub %o3, 0x20, %o3 | ||
1022 | add %o1, 0x20, %o1 | ||
1023 | brgz %o3, 1b | ||
1024 | add %o2, 0x20, %o2 | ||
1025 | brlz,pt %o3, 11f | ||
1026 | nop | ||
1027 | 10: ldx [%o1 + 0x00], %g3 | ||
1028 | ldx [%o1 + 0x08], %g7 | ||
1029 | xor %g1, %g3, %g3 | ||
1030 | xor %g2, %g7, %g7 | ||
1031 | MOVXTOD_G3_F4 | ||
1032 | MOVXTOD_G7_F6 | ||
1033 | ENCRYPT_256(8, 4, 6, 0, 2) | ||
1034 | std %f4, [%o2 + 0x00] | ||
1035 | std %f6, [%o2 + 0x08] | ||
1036 | 11: retl | ||
1037 | nop | ||
1038 | ENDPROC(aes_sparc64_ecb_encrypt_256) | ||
1039 | |||
1040 | .align 32 | ||
1041 | ENTRY(aes_sparc64_ecb_decrypt_128) | ||
1042 | /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */ | ||
1043 | ldx [%o0 - 0x10], %g1 | ||
1044 | subcc %o3, 0x10, %o3 | ||
1045 | be 10f | ||
1046 | ldx [%o0 - 0x08], %g2 | ||
1047 | 1: ldx [%o1 + 0x00], %g3 | ||
1048 | ldx [%o1 + 0x08], %g7 | ||
1049 | ldx [%o1 + 0x10], %o4 | ||
1050 | ldx [%o1 + 0x18], %o5 | ||
1051 | xor %g1, %g3, %g3 | ||
1052 | xor %g2, %g7, %g7 | ||
1053 | MOVXTOD_G3_F4 | ||
1054 | MOVXTOD_G7_F6 | ||
1055 | xor %g1, %o4, %g3 | ||
1056 | xor %g2, %o5, %g7 | ||
1057 | MOVXTOD_G3_F60 | ||
1058 | MOVXTOD_G7_F62 | ||
1059 | DECRYPT_128_2(8, 4, 6, 60, 62, 0, 2, 56, 58) | ||
1060 | std %f4, [%o2 + 0x00] | ||
1061 | std %f6, [%o2 + 0x08] | ||
1062 | std %f60, [%o2 + 0x10] | ||
1063 | std %f62, [%o2 + 0x18] | ||
1064 | sub %o3, 0x20, %o3 | ||
1065 | add %o1, 0x20, %o1 | ||
1066 | brgz,pt %o3, 1b | ||
1067 | add %o2, 0x20, %o2 | ||
1068 | brlz,pt %o3, 11f | ||
1069 | nop | ||
1070 | 10: ldx [%o1 + 0x00], %g3 | ||
1071 | ldx [%o1 + 0x08], %g7 | ||
1072 | xor %g1, %g3, %g3 | ||
1073 | xor %g2, %g7, %g7 | ||
1074 | MOVXTOD_G3_F4 | ||
1075 | MOVXTOD_G7_F6 | ||
1076 | DECRYPT_128(8, 4, 6, 0, 2) | ||
1077 | std %f4, [%o2 + 0x00] | ||
1078 | std %f6, [%o2 + 0x08] | ||
1079 | 11: retl | ||
1080 | nop | ||
1081 | ENDPROC(aes_sparc64_ecb_decrypt_128) | ||
1082 | |||
1083 | .align 32 | ||
1084 | ENTRY(aes_sparc64_ecb_decrypt_192) | ||
1085 | /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */ | ||
1086 | ldx [%o0 - 0x10], %g1 | ||
1087 | subcc %o3, 0x10, %o3 | ||
1088 | be 10f | ||
1089 | ldx [%o0 - 0x08], %g2 | ||
1090 | 1: ldx [%o1 + 0x00], %g3 | ||
1091 | ldx [%o1 + 0x08], %g7 | ||
1092 | ldx [%o1 + 0x10], %o4 | ||
1093 | ldx [%o1 + 0x18], %o5 | ||
1094 | xor %g1, %g3, %g3 | ||
1095 | xor %g2, %g7, %g7 | ||
1096 | MOVXTOD_G3_F4 | ||
1097 | MOVXTOD_G7_F6 | ||
1098 | xor %g1, %o4, %g3 | ||
1099 | xor %g2, %o5, %g7 | ||
1100 | MOVXTOD_G3_F60 | ||
1101 | MOVXTOD_G7_F62 | ||
1102 | DECRYPT_192_2(8, 4, 6, 60, 62, 0, 2, 56, 58) | ||
1103 | std %f4, [%o2 + 0x00] | ||
1104 | std %f6, [%o2 + 0x08] | ||
1105 | std %f60, [%o2 + 0x10] | ||
1106 | std %f62, [%o2 + 0x18] | ||
1107 | sub %o3, 0x20, %o3 | ||
1108 | add %o1, 0x20, %o1 | ||
1109 | brgz,pt %o3, 1b | ||
1110 | add %o2, 0x20, %o2 | ||
1111 | brlz,pt %o3, 11f | ||
1112 | nop | ||
1113 | 10: ldx [%o1 + 0x00], %g3 | ||
1114 | ldx [%o1 + 0x08], %g7 | ||
1115 | xor %g1, %g3, %g3 | ||
1116 | xor %g2, %g7, %g7 | ||
1117 | MOVXTOD_G3_F4 | ||
1118 | MOVXTOD_G7_F6 | ||
1119 | DECRYPT_192(8, 4, 6, 0, 2) | ||
1120 | std %f4, [%o2 + 0x00] | ||
1121 | std %f6, [%o2 + 0x08] | ||
1122 | 11: retl | ||
1123 | nop | ||
1124 | ENDPROC(aes_sparc64_ecb_decrypt_192) | ||
1125 | |||
1126 | .align 32 | ||
1127 | ENTRY(aes_sparc64_ecb_decrypt_256) | ||
1128 | /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */ | ||
1129 | ldx [%o0 - 0x10], %g1 | ||
1130 | subcc %o3, 0x10, %o3 | ||
1131 | be 10f | ||
1132 | ldx [%o0 - 0x08], %g2 | ||
1133 | sub %o0, 0xf0, %o0 | ||
1134 | 1: ldx [%o1 + 0x00], %g3 | ||
1135 | ldx [%o1 + 0x08], %g7 | ||
1136 | ldx [%o1 + 0x10], %o4 | ||
1137 | ldx [%o1 + 0x18], %o5 | ||
1138 | xor %g1, %g3, %g3 | ||
1139 | xor %g2, %g7, %g7 | ||
1140 | MOVXTOD_G3_F4 | ||
1141 | MOVXTOD_G7_F6 | ||
1142 | xor %g1, %o4, %g3 | ||
1143 | xor %g2, %o5, %g7 | ||
1144 | MOVXTOD_G3_F0 | ||
1145 | MOVXTOD_G7_F2 | ||
1146 | DECRYPT_256_2(8, 4, 6, 0, 2) | ||
1147 | std %f4, [%o2 + 0x00] | ||
1148 | std %f6, [%o2 + 0x08] | ||
1149 | std %f0, [%o2 + 0x10] | ||
1150 | std %f2, [%o2 + 0x18] | ||
1151 | sub %o3, 0x20, %o3 | ||
1152 | add %o1, 0x20, %o1 | ||
1153 | brgz,pt %o3, 1b | ||
1154 | add %o2, 0x20, %o2 | ||
1155 | brlz,pt %o3, 11f | ||
1156 | nop | ||
1157 | 10: ldx [%o1 + 0x00], %g3 | ||
1158 | ldx [%o1 + 0x08], %g7 | ||
1159 | xor %g1, %g3, %g3 | ||
1160 | xor %g2, %g7, %g7 | ||
1161 | MOVXTOD_G3_F4 | ||
1162 | MOVXTOD_G7_F6 | ||
1163 | DECRYPT_256(8, 4, 6, 0, 2) | ||
1164 | std %f4, [%o2 + 0x00] | ||
1165 | std %f6, [%o2 + 0x08] | ||
1166 | 11: retl | ||
1167 | nop | ||
1168 | ENDPROC(aes_sparc64_ecb_decrypt_256) | ||
1169 | |||
1170 | .align 32 | ||
1171 | ENTRY(aes_sparc64_cbc_encrypt_128) | ||
1172 | /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ | ||
1173 | ldd [%o4 + 0x00], %f4 | ||
1174 | ldd [%o4 + 0x08], %f6 | ||
1175 | ldx [%o0 + 0x00], %g1 | ||
1176 | ldx [%o0 + 0x08], %g2 | ||
1177 | 1: ldx [%o1 + 0x00], %g3 | ||
1178 | ldx [%o1 + 0x08], %g7 | ||
1179 | add %o1, 0x10, %o1 | ||
1180 | xor %g1, %g3, %g3 | ||
1181 | xor %g2, %g7, %g7 | ||
1182 | MOVXTOD_G3_F0 | ||
1183 | MOVXTOD_G7_F2 | ||
1184 | fxor %f4, %f0, %f4 | ||
1185 | fxor %f6, %f2, %f6 | ||
1186 | ENCRYPT_128(8, 4, 6, 0, 2) | ||
1187 | std %f4, [%o2 + 0x00] | ||
1188 | std %f6, [%o2 + 0x08] | ||
1189 | subcc %o3, 0x10, %o3 | ||
1190 | bne,pt %xcc, 1b | ||
1191 | add %o2, 0x10, %o2 | ||
1192 | std %f4, [%o4 + 0x00] | ||
1193 | std %f6, [%o4 + 0x08] | ||
1194 | retl | ||
1195 | nop | ||
1196 | ENDPROC(aes_sparc64_cbc_encrypt_128) | ||
1197 | |||
1198 | .align 32 | ||
1199 | ENTRY(aes_sparc64_cbc_encrypt_192) | ||
1200 | /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ | ||
1201 | ldd [%o4 + 0x00], %f4 | ||
1202 | ldd [%o4 + 0x08], %f6 | ||
1203 | ldx [%o0 + 0x00], %g1 | ||
1204 | ldx [%o0 + 0x08], %g2 | ||
1205 | 1: ldx [%o1 + 0x00], %g3 | ||
1206 | ldx [%o1 + 0x08], %g7 | ||
1207 | add %o1, 0x10, %o1 | ||
1208 | xor %g1, %g3, %g3 | ||
1209 | xor %g2, %g7, %g7 | ||
1210 | MOVXTOD_G3_F0 | ||
1211 | MOVXTOD_G7_F2 | ||
1212 | fxor %f4, %f0, %f4 | ||
1213 | fxor %f6, %f2, %f6 | ||
1214 | ENCRYPT_192(8, 4, 6, 0, 2) | ||
1215 | std %f4, [%o2 + 0x00] | ||
1216 | std %f6, [%o2 + 0x08] | ||
1217 | subcc %o3, 0x10, %o3 | ||
1218 | bne,pt %xcc, 1b | ||
1219 | add %o2, 0x10, %o2 | ||
1220 | std %f4, [%o4 + 0x00] | ||
1221 | std %f6, [%o4 + 0x08] | ||
1222 | retl | ||
1223 | nop | ||
1224 | ENDPROC(aes_sparc64_cbc_encrypt_192) | ||
1225 | |||
1226 | .align 32 | ||
1227 | ENTRY(aes_sparc64_cbc_encrypt_256) | ||
1228 | /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ | ||
1229 | ldd [%o4 + 0x00], %f4 | ||
1230 | ldd [%o4 + 0x08], %f6 | ||
1231 | ldx [%o0 + 0x00], %g1 | ||
1232 | ldx [%o0 + 0x08], %g2 | ||
1233 | 1: ldx [%o1 + 0x00], %g3 | ||
1234 | ldx [%o1 + 0x08], %g7 | ||
1235 | add %o1, 0x10, %o1 | ||
1236 | xor %g1, %g3, %g3 | ||
1237 | xor %g2, %g7, %g7 | ||
1238 | MOVXTOD_G3_F0 | ||
1239 | MOVXTOD_G7_F2 | ||
1240 | fxor %f4, %f0, %f4 | ||
1241 | fxor %f6, %f2, %f6 | ||
1242 | ENCRYPT_256(8, 4, 6, 0, 2) | ||
1243 | std %f4, [%o2 + 0x00] | ||
1244 | std %f6, [%o2 + 0x08] | ||
1245 | subcc %o3, 0x10, %o3 | ||
1246 | bne,pt %xcc, 1b | ||
1247 | add %o2, 0x10, %o2 | ||
1248 | std %f4, [%o4 + 0x00] | ||
1249 | std %f6, [%o4 + 0x08] | ||
1250 | retl | ||
1251 | nop | ||
1252 | ENDPROC(aes_sparc64_cbc_encrypt_256) | ||
1253 | |||
1254 | .align 32 | ||
1255 | ENTRY(aes_sparc64_cbc_decrypt_128) | ||
1256 | /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */ | ||
1257 | ldx [%o0 - 0x10], %g1 | ||
1258 | ldx [%o0 - 0x08], %g2 | ||
1259 | ldx [%o4 + 0x00], %o0 | ||
1260 | ldx [%o4 + 0x08], %o5 | ||
1261 | 1: ldx [%o1 + 0x00], %g3 | ||
1262 | ldx [%o1 + 0x08], %g7 | ||
1263 | add %o1, 0x10, %o1 | ||
1264 | xor %g1, %g3, %g3 | ||
1265 | xor %g2, %g7, %g7 | ||
1266 | MOVXTOD_G3_F4 | ||
1267 | MOVXTOD_G7_F6 | ||
1268 | DECRYPT_128(8, 4, 6, 0, 2) | ||
1269 | MOVXTOD_O0_F0 | ||
1270 | MOVXTOD_O5_F2 | ||
1271 | xor %g1, %g3, %o0 | ||
1272 | xor %g2, %g7, %o5 | ||
1273 | fxor %f4, %f0, %f4 | ||
1274 | fxor %f6, %f2, %f6 | ||
1275 | std %f4, [%o2 + 0x00] | ||
1276 | std %f6, [%o2 + 0x08] | ||
1277 | subcc %o3, 0x10, %o3 | ||
1278 | bne,pt %xcc, 1b | ||
1279 | add %o2, 0x10, %o2 | ||
1280 | stx %o0, [%o4 + 0x00] | ||
1281 | stx %o5, [%o4 + 0x08] | ||
1282 | retl | ||
1283 | nop | ||
1284 | ENDPROC(aes_sparc64_cbc_decrypt_128) | ||
1285 | |||
1286 | .align 32 | ||
1287 | ENTRY(aes_sparc64_cbc_decrypt_192) | ||
1288 | /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */ | ||
1289 | ldx [%o0 - 0x10], %g1 | ||
1290 | ldx [%o0 - 0x08], %g2 | ||
1291 | ldx [%o4 + 0x00], %o0 | ||
1292 | ldx [%o4 + 0x08], %o5 | ||
1293 | 1: ldx [%o1 + 0x00], %g3 | ||
1294 | ldx [%o1 + 0x08], %g7 | ||
1295 | add %o1, 0x10, %o1 | ||
1296 | xor %g1, %g3, %g3 | ||
1297 | xor %g2, %g7, %g7 | ||
1298 | MOVXTOD_G3_F4 | ||
1299 | MOVXTOD_G7_F6 | ||
1300 | DECRYPT_192(8, 4, 6, 0, 2) | ||
1301 | MOVXTOD_O0_F0 | ||
1302 | MOVXTOD_O5_F2 | ||
1303 | xor %g1, %g3, %o0 | ||
1304 | xor %g2, %g7, %o5 | ||
1305 | fxor %f4, %f0, %f4 | ||
1306 | fxor %f6, %f2, %f6 | ||
1307 | std %f4, [%o2 + 0x00] | ||
1308 | std %f6, [%o2 + 0x08] | ||
1309 | subcc %o3, 0x10, %o3 | ||
1310 | bne,pt %xcc, 1b | ||
1311 | add %o2, 0x10, %o2 | ||
1312 | stx %o0, [%o4 + 0x00] | ||
1313 | stx %o5, [%o4 + 0x08] | ||
1314 | retl | ||
1315 | nop | ||
1316 | ENDPROC(aes_sparc64_cbc_decrypt_192) | ||
1317 | |||
1318 | .align 32 | ||
1319 | ENTRY(aes_sparc64_cbc_decrypt_256) | ||
1320 | /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */ | ||
1321 | ldx [%o0 - 0x10], %g1 | ||
1322 | ldx [%o0 - 0x08], %g2 | ||
1323 | ldx [%o4 + 0x00], %o0 | ||
1324 | ldx [%o4 + 0x08], %o5 | ||
1325 | 1: ldx [%o1 + 0x00], %g3 | ||
1326 | ldx [%o1 + 0x08], %g7 | ||
1327 | add %o1, 0x10, %o1 | ||
1328 | xor %g1, %g3, %g3 | ||
1329 | xor %g2, %g7, %g7 | ||
1330 | MOVXTOD_G3_F4 | ||
1331 | MOVXTOD_G7_F6 | ||
1332 | DECRYPT_256(8, 4, 6, 0, 2) | ||
1333 | MOVXTOD_O0_F0 | ||
1334 | MOVXTOD_O5_F2 | ||
1335 | xor %g1, %g3, %o0 | ||
1336 | xor %g2, %g7, %o5 | ||
1337 | fxor %f4, %f0, %f4 | ||
1338 | fxor %f6, %f2, %f6 | ||
1339 | std %f4, [%o2 + 0x00] | ||
1340 | std %f6, [%o2 + 0x08] | ||
1341 | subcc %o3, 0x10, %o3 | ||
1342 | bne,pt %xcc, 1b | ||
1343 | add %o2, 0x10, %o2 | ||
1344 | stx %o0, [%o4 + 0x00] | ||
1345 | stx %o5, [%o4 + 0x08] | ||
1346 | retl | ||
1347 | nop | ||
1348 | ENDPROC(aes_sparc64_cbc_decrypt_256) | ||
1349 | |||
1350 | .align 32 | ||
1351 | ENTRY(aes_sparc64_ctr_crypt_128) | ||
1352 | /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ | ||
1353 | ldx [%o4 + 0x00], %g3 | ||
1354 | ldx [%o4 + 0x08], %g7 | ||
1355 | subcc %o3, 0x10, %o3 | ||
1356 | ldx [%o0 + 0x00], %g1 | ||
1357 | be 10f | ||
1358 | ldx [%o0 + 0x08], %g2 | ||
1359 | 1: xor %g1, %g3, %o5 | ||
1360 | MOVXTOD_O5_F0 | ||
1361 | xor %g2, %g7, %o5 | ||
1362 | MOVXTOD_O5_F2 | ||
1363 | add %g7, 1, %g7 | ||
1364 | add %g3, 1, %o5 | ||
1365 | movrz %g7, %o5, %g3 | ||
1366 | xor %g1, %g3, %o5 | ||
1367 | MOVXTOD_O5_F4 | ||
1368 | xor %g2, %g7, %o5 | ||
1369 | MOVXTOD_O5_F6 | ||
1370 | add %g7, 1, %g7 | ||
1371 | add %g3, 1, %o5 | ||
1372 | movrz %g7, %o5, %g3 | ||
1373 | ENCRYPT_128_2(8, 0, 2, 4, 6, 56, 58, 60, 62) | ||
1374 | ldd [%o1 + 0x00], %f56 | ||
1375 | ldd [%o1 + 0x08], %f58 | ||
1376 | ldd [%o1 + 0x10], %f60 | ||
1377 | ldd [%o1 + 0x18], %f62 | ||
1378 | fxor %f56, %f0, %f56 | ||
1379 | fxor %f58, %f2, %f58 | ||
1380 | fxor %f60, %f4, %f60 | ||
1381 | fxor %f62, %f6, %f62 | ||
1382 | std %f56, [%o2 + 0x00] | ||
1383 | std %f58, [%o2 + 0x08] | ||
1384 | std %f60, [%o2 + 0x10] | ||
1385 | std %f62, [%o2 + 0x18] | ||
1386 | subcc %o3, 0x20, %o3 | ||
1387 | add %o1, 0x20, %o1 | ||
1388 | brgz %o3, 1b | ||
1389 | add %o2, 0x20, %o2 | ||
1390 | brlz,pt %o3, 11f | ||
1391 | nop | ||
1392 | 10: xor %g1, %g3, %o5 | ||
1393 | MOVXTOD_O5_F0 | ||
1394 | xor %g2, %g7, %o5 | ||
1395 | MOVXTOD_O5_F2 | ||
1396 | add %g7, 1, %g7 | ||
1397 | add %g3, 1, %o5 | ||
1398 | movrz %g7, %o5, %g3 | ||
1399 | ENCRYPT_128(8, 0, 2, 4, 6) | ||
1400 | ldd [%o1 + 0x00], %f4 | ||
1401 | ldd [%o1 + 0x08], %f6 | ||
1402 | fxor %f4, %f0, %f4 | ||
1403 | fxor %f6, %f2, %f6 | ||
1404 | std %f4, [%o2 + 0x00] | ||
1405 | std %f6, [%o2 + 0x08] | ||
1406 | 11: stx %g3, [%o4 + 0x00] | ||
1407 | retl | ||
1408 | stx %g7, [%o4 + 0x08] | ||
1409 | ENDPROC(aes_sparc64_ctr_crypt_128) | ||
1410 | |||
1411 | .align 32 | ||
1412 | ENTRY(aes_sparc64_ctr_crypt_192) | ||
1413 | /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ | ||
1414 | ldx [%o4 + 0x00], %g3 | ||
1415 | ldx [%o4 + 0x08], %g7 | ||
1416 | subcc %o3, 0x10, %o3 | ||
1417 | ldx [%o0 + 0x00], %g1 | ||
1418 | be 10f | ||
1419 | ldx [%o0 + 0x08], %g2 | ||
1420 | 1: xor %g1, %g3, %o5 | ||
1421 | MOVXTOD_O5_F0 | ||
1422 | xor %g2, %g7, %o5 | ||
1423 | MOVXTOD_O5_F2 | ||
1424 | add %g7, 1, %g7 | ||
1425 | add %g3, 1, %o5 | ||
1426 | movrz %g7, %o5, %g3 | ||
1427 | xor %g1, %g3, %o5 | ||
1428 | MOVXTOD_O5_F4 | ||
1429 | xor %g2, %g7, %o5 | ||
1430 | MOVXTOD_O5_F6 | ||
1431 | add %g7, 1, %g7 | ||
1432 | add %g3, 1, %o5 | ||
1433 | movrz %g7, %o5, %g3 | ||
1434 | ENCRYPT_192_2(8, 0, 2, 4, 6, 56, 58, 60, 62) | ||
1435 | ldd [%o1 + 0x00], %f56 | ||
1436 | ldd [%o1 + 0x08], %f58 | ||
1437 | ldd [%o1 + 0x10], %f60 | ||
1438 | ldd [%o1 + 0x18], %f62 | ||
1439 | fxor %f56, %f0, %f56 | ||
1440 | fxor %f58, %f2, %f58 | ||
1441 | fxor %f60, %f4, %f60 | ||
1442 | fxor %f62, %f6, %f62 | ||
1443 | std %f56, [%o2 + 0x00] | ||
1444 | std %f58, [%o2 + 0x08] | ||
1445 | std %f60, [%o2 + 0x10] | ||
1446 | std %f62, [%o2 + 0x18] | ||
1447 | subcc %o3, 0x20, %o3 | ||
1448 | add %o1, 0x20, %o1 | ||
1449 | brgz %o3, 1b | ||
1450 | add %o2, 0x20, %o2 | ||
1451 | brlz,pt %o3, 11f | ||
1452 | nop | ||
1453 | 10: xor %g1, %g3, %o5 | ||
1454 | MOVXTOD_O5_F0 | ||
1455 | xor %g2, %g7, %o5 | ||
1456 | MOVXTOD_O5_F2 | ||
1457 | add %g7, 1, %g7 | ||
1458 | add %g3, 1, %o5 | ||
1459 | movrz %g7, %o5, %g3 | ||
1460 | ENCRYPT_192(8, 0, 2, 4, 6) | ||
1461 | ldd [%o1 + 0x00], %f4 | ||
1462 | ldd [%o1 + 0x08], %f6 | ||
1463 | fxor %f4, %f0, %f4 | ||
1464 | fxor %f6, %f2, %f6 | ||
1465 | std %f4, [%o2 + 0x00] | ||
1466 | std %f6, [%o2 + 0x08] | ||
1467 | 11: stx %g3, [%o4 + 0x00] | ||
1468 | retl | ||
1469 | stx %g7, [%o4 + 0x08] | ||
1470 | ENDPROC(aes_sparc64_ctr_crypt_192) | ||
1471 | |||
1472 | .align 32 | ||
1473 | ENTRY(aes_sparc64_ctr_crypt_256) | ||
1474 | /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ | ||
1475 | ldx [%o4 + 0x00], %g3 | ||
1476 | ldx [%o4 + 0x08], %g7 | ||
1477 | subcc %o3, 0x10, %o3 | ||
1478 | ldx [%o0 + 0x00], %g1 | ||
1479 | be 10f | ||
1480 | ldx [%o0 + 0x08], %g2 | ||
1481 | 1: xor %g1, %g3, %o5 | ||
1482 | MOVXTOD_O5_F0 | ||
1483 | xor %g2, %g7, %o5 | ||
1484 | MOVXTOD_O5_F2 | ||
1485 | add %g7, 1, %g7 | ||
1486 | add %g3, 1, %o5 | ||
1487 | movrz %g7, %o5, %g3 | ||
1488 | xor %g1, %g3, %o5 | ||
1489 | MOVXTOD_O5_F4 | ||
1490 | xor %g2, %g7, %o5 | ||
1491 | MOVXTOD_O5_F6 | ||
1492 | add %g7, 1, %g7 | ||
1493 | add %g3, 1, %o5 | ||
1494 | movrz %g7, %o5, %g3 | ||
1495 | ENCRYPT_256_2(8, 0, 2, 4, 6) | ||
1496 | ldd [%o1 + 0x00], %f56 | ||
1497 | ldd [%o1 + 0x08], %f58 | ||
1498 | ldd [%o1 + 0x10], %f60 | ||
1499 | ldd [%o1 + 0x18], %f62 | ||
1500 | fxor %f56, %f0, %f56 | ||
1501 | fxor %f58, %f2, %f58 | ||
1502 | fxor %f60, %f4, %f60 | ||
1503 | fxor %f62, %f6, %f62 | ||
1504 | std %f56, [%o2 + 0x00] | ||
1505 | std %f58, [%o2 + 0x08] | ||
1506 | std %f60, [%o2 + 0x10] | ||
1507 | std %f62, [%o2 + 0x18] | ||
1508 | subcc %o3, 0x20, %o3 | ||
1509 | add %o1, 0x20, %o1 | ||
1510 | brgz %o3, 1b | ||
1511 | add %o2, 0x20, %o2 | ||
1512 | brlz,pt %o3, 11f | ||
1513 | nop | ||
1514 | ldd [%o0 + 0xd0], %f56 | ||
1515 | ldd [%o0 + 0xd8], %f58 | ||
1516 | ldd [%o0 + 0xe0], %f60 | ||
1517 | ldd [%o0 + 0xe8], %f62 | ||
1518 | 10: xor %g1, %g3, %o5 | ||
1519 | MOVXTOD_O5_F0 | ||
1520 | xor %g2, %g7, %o5 | ||
1521 | MOVXTOD_O5_F2 | ||
1522 | add %g7, 1, %g7 | ||
1523 | add %g3, 1, %o5 | ||
1524 | movrz %g7, %o5, %g3 | ||
1525 | ENCRYPT_256(8, 0, 2, 4, 6) | ||
1526 | ldd [%o1 + 0x00], %f4 | ||
1527 | ldd [%o1 + 0x08], %f6 | ||
1528 | fxor %f4, %f0, %f4 | ||
1529 | fxor %f6, %f2, %f6 | ||
1530 | std %f4, [%o2 + 0x00] | ||
1531 | std %f6, [%o2 + 0x08] | ||
1532 | 11: stx %g3, [%o4 + 0x00] | ||
1533 | retl | ||
1534 | stx %g7, [%o4 + 0x08] | ||
1535 | ENDPROC(aes_sparc64_ctr_crypt_256) | ||
diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c new file mode 100644 index 000000000000..8f1c9980f637 --- /dev/null +++ b/arch/sparc/crypto/aes_glue.c | |||
@@ -0,0 +1,477 @@ | |||
1 | /* Glue code for AES encryption optimized for sparc64 crypto opcodes. | ||
2 | * | ||
3 | * This is based largely upon arch/x86/crypto/aesni-intel_glue.c | ||
4 | * | ||
5 | * Copyright (C) 2008, Intel Corp. | ||
6 | * Author: Huang Ying <ying.huang@intel.com> | ||
7 | * | ||
8 | * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD | ||
9 | * interface for 64-bit kernels. | ||
10 | * Authors: Adrian Hoban <adrian.hoban@intel.com> | ||
11 | * Gabriele Paoloni <gabriele.paoloni@intel.com> | ||
12 | * Tadeusz Struk (tadeusz.struk@intel.com) | ||
13 | * Aidan O'Mahony (aidan.o.mahony@intel.com) | ||
14 | * Copyright (c) 2010, Intel Corporation. | ||
15 | */ | ||
16 | |||
17 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
18 | |||
19 | #include <linux/crypto.h> | ||
20 | #include <linux/init.h> | ||
21 | #include <linux/module.h> | ||
22 | #include <linux/mm.h> | ||
23 | #include <linux/types.h> | ||
24 | #include <crypto/algapi.h> | ||
25 | #include <crypto/aes.h> | ||
26 | |||
27 | #include <asm/fpumacro.h> | ||
28 | #include <asm/pstate.h> | ||
29 | #include <asm/elf.h> | ||
30 | |||
31 | #include "opcodes.h" | ||
32 | |||
33 | struct aes_ops { | ||
34 | void (*encrypt)(const u64 *key, const u32 *input, u32 *output); | ||
35 | void (*decrypt)(const u64 *key, const u32 *input, u32 *output); | ||
36 | void (*load_encrypt_keys)(const u64 *key); | ||
37 | void (*load_decrypt_keys)(const u64 *key); | ||
38 | void (*ecb_encrypt)(const u64 *key, const u64 *input, u64 *output, | ||
39 | unsigned int len); | ||
40 | void (*ecb_decrypt)(const u64 *key, const u64 *input, u64 *output, | ||
41 | unsigned int len); | ||
42 | void (*cbc_encrypt)(const u64 *key, const u64 *input, u64 *output, | ||
43 | unsigned int len, u64 *iv); | ||
44 | void (*cbc_decrypt)(const u64 *key, const u64 *input, u64 *output, | ||
45 | unsigned int len, u64 *iv); | ||
46 | void (*ctr_crypt)(const u64 *key, const u64 *input, u64 *output, | ||
47 | unsigned int len, u64 *iv); | ||
48 | }; | ||
49 | |||
50 | struct crypto_sparc64_aes_ctx { | ||
51 | struct aes_ops *ops; | ||
52 | u64 key[AES_MAX_KEYLENGTH / sizeof(u64)]; | ||
53 | u32 key_length; | ||
54 | u32 expanded_key_length; | ||
55 | }; | ||
56 | |||
57 | extern void aes_sparc64_encrypt_128(const u64 *key, const u32 *input, | ||
58 | u32 *output); | ||
59 | extern void aes_sparc64_encrypt_192(const u64 *key, const u32 *input, | ||
60 | u32 *output); | ||
61 | extern void aes_sparc64_encrypt_256(const u64 *key, const u32 *input, | ||
62 | u32 *output); | ||
63 | |||
64 | extern void aes_sparc64_decrypt_128(const u64 *key, const u32 *input, | ||
65 | u32 *output); | ||
66 | extern void aes_sparc64_decrypt_192(const u64 *key, const u32 *input, | ||
67 | u32 *output); | ||
68 | extern void aes_sparc64_decrypt_256(const u64 *key, const u32 *input, | ||
69 | u32 *output); | ||
70 | |||
71 | extern void aes_sparc64_load_encrypt_keys_128(const u64 *key); | ||
72 | extern void aes_sparc64_load_encrypt_keys_192(const u64 *key); | ||
73 | extern void aes_sparc64_load_encrypt_keys_256(const u64 *key); | ||
74 | |||
75 | extern void aes_sparc64_load_decrypt_keys_128(const u64 *key); | ||
76 | extern void aes_sparc64_load_decrypt_keys_192(const u64 *key); | ||
77 | extern void aes_sparc64_load_decrypt_keys_256(const u64 *key); | ||
78 | |||
79 | extern void aes_sparc64_ecb_encrypt_128(const u64 *key, const u64 *input, | ||
80 | u64 *output, unsigned int len); | ||
81 | extern void aes_sparc64_ecb_encrypt_192(const u64 *key, const u64 *input, | ||
82 | u64 *output, unsigned int len); | ||
83 | extern void aes_sparc64_ecb_encrypt_256(const u64 *key, const u64 *input, | ||
84 | u64 *output, unsigned int len); | ||
85 | |||
86 | extern void aes_sparc64_ecb_decrypt_128(const u64 *key, const u64 *input, | ||
87 | u64 *output, unsigned int len); | ||
88 | extern void aes_sparc64_ecb_decrypt_192(const u64 *key, const u64 *input, | ||
89 | u64 *output, unsigned int len); | ||
90 | extern void aes_sparc64_ecb_decrypt_256(const u64 *key, const u64 *input, | ||
91 | u64 *output, unsigned int len); | ||
92 | |||
93 | extern void aes_sparc64_cbc_encrypt_128(const u64 *key, const u64 *input, | ||
94 | u64 *output, unsigned int len, | ||
95 | u64 *iv); | ||
96 | |||
97 | extern void aes_sparc64_cbc_encrypt_192(const u64 *key, const u64 *input, | ||
98 | u64 *output, unsigned int len, | ||
99 | u64 *iv); | ||
100 | |||
101 | extern void aes_sparc64_cbc_encrypt_256(const u64 *key, const u64 *input, | ||
102 | u64 *output, unsigned int len, | ||
103 | u64 *iv); | ||
104 | |||
105 | extern void aes_sparc64_cbc_decrypt_128(const u64 *key, const u64 *input, | ||
106 | u64 *output, unsigned int len, | ||
107 | u64 *iv); | ||
108 | |||
109 | extern void aes_sparc64_cbc_decrypt_192(const u64 *key, const u64 *input, | ||
110 | u64 *output, unsigned int len, | ||
111 | u64 *iv); | ||
112 | |||
113 | extern void aes_sparc64_cbc_decrypt_256(const u64 *key, const u64 *input, | ||
114 | u64 *output, unsigned int len, | ||
115 | u64 *iv); | ||
116 | |||
117 | extern void aes_sparc64_ctr_crypt_128(const u64 *key, const u64 *input, | ||
118 | u64 *output, unsigned int len, | ||
119 | u64 *iv); | ||
120 | extern void aes_sparc64_ctr_crypt_192(const u64 *key, const u64 *input, | ||
121 | u64 *output, unsigned int len, | ||
122 | u64 *iv); | ||
123 | extern void aes_sparc64_ctr_crypt_256(const u64 *key, const u64 *input, | ||
124 | u64 *output, unsigned int len, | ||
125 | u64 *iv); | ||
126 | |||
127 | struct aes_ops aes128_ops = { | ||
128 | .encrypt = aes_sparc64_encrypt_128, | ||
129 | .decrypt = aes_sparc64_decrypt_128, | ||
130 | .load_encrypt_keys = aes_sparc64_load_encrypt_keys_128, | ||
131 | .load_decrypt_keys = aes_sparc64_load_decrypt_keys_128, | ||
132 | .ecb_encrypt = aes_sparc64_ecb_encrypt_128, | ||
133 | .ecb_decrypt = aes_sparc64_ecb_decrypt_128, | ||
134 | .cbc_encrypt = aes_sparc64_cbc_encrypt_128, | ||
135 | .cbc_decrypt = aes_sparc64_cbc_decrypt_128, | ||
136 | .ctr_crypt = aes_sparc64_ctr_crypt_128, | ||
137 | }; | ||
138 | |||
139 | struct aes_ops aes192_ops = { | ||
140 | .encrypt = aes_sparc64_encrypt_192, | ||
141 | .decrypt = aes_sparc64_decrypt_192, | ||
142 | .load_encrypt_keys = aes_sparc64_load_encrypt_keys_192, | ||
143 | .load_decrypt_keys = aes_sparc64_load_decrypt_keys_192, | ||
144 | .ecb_encrypt = aes_sparc64_ecb_encrypt_192, | ||
145 | .ecb_decrypt = aes_sparc64_ecb_decrypt_192, | ||
146 | .cbc_encrypt = aes_sparc64_cbc_encrypt_192, | ||
147 | .cbc_decrypt = aes_sparc64_cbc_decrypt_192, | ||
148 | .ctr_crypt = aes_sparc64_ctr_crypt_192, | ||
149 | }; | ||
150 | |||
151 | struct aes_ops aes256_ops = { | ||
152 | .encrypt = aes_sparc64_encrypt_256, | ||
153 | .decrypt = aes_sparc64_decrypt_256, | ||
154 | .load_encrypt_keys = aes_sparc64_load_encrypt_keys_256, | ||
155 | .load_decrypt_keys = aes_sparc64_load_decrypt_keys_256, | ||
156 | .ecb_encrypt = aes_sparc64_ecb_encrypt_256, | ||
157 | .ecb_decrypt = aes_sparc64_ecb_decrypt_256, | ||
158 | .cbc_encrypt = aes_sparc64_cbc_encrypt_256, | ||
159 | .cbc_decrypt = aes_sparc64_cbc_decrypt_256, | ||
160 | .ctr_crypt = aes_sparc64_ctr_crypt_256, | ||
161 | }; | ||
162 | |||
163 | extern void aes_sparc64_key_expand(const u32 *in_key, u64 *output_key, | ||
164 | unsigned int key_len); | ||
165 | |||
166 | static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, | ||
167 | unsigned int key_len) | ||
168 | { | ||
169 | struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
170 | u32 *flags = &tfm->crt_flags; | ||
171 | |||
172 | switch (key_len) { | ||
173 | case AES_KEYSIZE_128: | ||
174 | ctx->expanded_key_length = 0xb0; | ||
175 | ctx->ops = &aes128_ops; | ||
176 | break; | ||
177 | |||
178 | case AES_KEYSIZE_192: | ||
179 | ctx->expanded_key_length = 0xd0; | ||
180 | ctx->ops = &aes192_ops; | ||
181 | break; | ||
182 | |||
183 | case AES_KEYSIZE_256: | ||
184 | ctx->expanded_key_length = 0xf0; | ||
185 | ctx->ops = &aes256_ops; | ||
186 | break; | ||
187 | |||
188 | default: | ||
189 | *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
190 | return -EINVAL; | ||
191 | } | ||
192 | |||
193 | aes_sparc64_key_expand((const u32 *)in_key, &ctx->key[0], key_len); | ||
194 | ctx->key_length = key_len; | ||
195 | |||
196 | return 0; | ||
197 | } | ||
198 | |||
199 | static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
200 | { | ||
201 | struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
202 | |||
203 | ctx->ops->encrypt(&ctx->key[0], (const u32 *) src, (u32 *) dst); | ||
204 | } | ||
205 | |||
206 | static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
207 | { | ||
208 | struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
209 | |||
210 | ctx->ops->decrypt(&ctx->key[0], (const u32 *) src, (u32 *) dst); | ||
211 | } | ||
212 | |||
213 | #define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1)) | ||
214 | |||
215 | static int ecb_encrypt(struct blkcipher_desc *desc, | ||
216 | struct scatterlist *dst, struct scatterlist *src, | ||
217 | unsigned int nbytes) | ||
218 | { | ||
219 | struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
220 | struct blkcipher_walk walk; | ||
221 | int err; | ||
222 | |||
223 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
224 | err = blkcipher_walk_virt(desc, &walk); | ||
225 | |||
226 | ctx->ops->load_encrypt_keys(&ctx->key[0]); | ||
227 | while ((nbytes = walk.nbytes)) { | ||
228 | unsigned int block_len = nbytes & AES_BLOCK_MASK; | ||
229 | |||
230 | if (likely(block_len)) { | ||
231 | ctx->ops->ecb_encrypt(&ctx->key[0], | ||
232 | (const u64 *)walk.src.virt.addr, | ||
233 | (u64 *) walk.dst.virt.addr, | ||
234 | block_len); | ||
235 | } | ||
236 | nbytes &= AES_BLOCK_SIZE - 1; | ||
237 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
238 | } | ||
239 | fprs_write(0); | ||
240 | return err; | ||
241 | } | ||
242 | |||
243 | static int ecb_decrypt(struct blkcipher_desc *desc, | ||
244 | struct scatterlist *dst, struct scatterlist *src, | ||
245 | unsigned int nbytes) | ||
246 | { | ||
247 | struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
248 | struct blkcipher_walk walk; | ||
249 | u64 *key_end; | ||
250 | int err; | ||
251 | |||
252 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
253 | err = blkcipher_walk_virt(desc, &walk); | ||
254 | |||
255 | ctx->ops->load_decrypt_keys(&ctx->key[0]); | ||
256 | key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)]; | ||
257 | while ((nbytes = walk.nbytes)) { | ||
258 | unsigned int block_len = nbytes & AES_BLOCK_MASK; | ||
259 | |||
260 | if (likely(block_len)) { | ||
261 | ctx->ops->ecb_decrypt(key_end, | ||
262 | (const u64 *) walk.src.virt.addr, | ||
263 | (u64 *) walk.dst.virt.addr, block_len); | ||
264 | } | ||
265 | nbytes &= AES_BLOCK_SIZE - 1; | ||
266 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
267 | } | ||
268 | fprs_write(0); | ||
269 | |||
270 | return err; | ||
271 | } | ||
272 | |||
273 | static int cbc_encrypt(struct blkcipher_desc *desc, | ||
274 | struct scatterlist *dst, struct scatterlist *src, | ||
275 | unsigned int nbytes) | ||
276 | { | ||
277 | struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
278 | struct blkcipher_walk walk; | ||
279 | int err; | ||
280 | |||
281 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
282 | err = blkcipher_walk_virt(desc, &walk); | ||
283 | |||
284 | ctx->ops->load_encrypt_keys(&ctx->key[0]); | ||
285 | while ((nbytes = walk.nbytes)) { | ||
286 | unsigned int block_len = nbytes & AES_BLOCK_MASK; | ||
287 | |||
288 | if (likely(block_len)) { | ||
289 | ctx->ops->cbc_encrypt(&ctx->key[0], | ||
290 | (const u64 *)walk.src.virt.addr, | ||
291 | (u64 *) walk.dst.virt.addr, | ||
292 | block_len, (u64 *) walk.iv); | ||
293 | } | ||
294 | nbytes &= AES_BLOCK_SIZE - 1; | ||
295 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
296 | } | ||
297 | fprs_write(0); | ||
298 | return err; | ||
299 | } | ||
300 | |||
301 | static int cbc_decrypt(struct blkcipher_desc *desc, | ||
302 | struct scatterlist *dst, struct scatterlist *src, | ||
303 | unsigned int nbytes) | ||
304 | { | ||
305 | struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
306 | struct blkcipher_walk walk; | ||
307 | u64 *key_end; | ||
308 | int err; | ||
309 | |||
310 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
311 | err = blkcipher_walk_virt(desc, &walk); | ||
312 | |||
313 | ctx->ops->load_decrypt_keys(&ctx->key[0]); | ||
314 | key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)]; | ||
315 | while ((nbytes = walk.nbytes)) { | ||
316 | unsigned int block_len = nbytes & AES_BLOCK_MASK; | ||
317 | |||
318 | if (likely(block_len)) { | ||
319 | ctx->ops->cbc_decrypt(key_end, | ||
320 | (const u64 *) walk.src.virt.addr, | ||
321 | (u64 *) walk.dst.virt.addr, | ||
322 | block_len, (u64 *) walk.iv); | ||
323 | } | ||
324 | nbytes &= AES_BLOCK_SIZE - 1; | ||
325 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
326 | } | ||
327 | fprs_write(0); | ||
328 | |||
329 | return err; | ||
330 | } | ||
331 | |||
332 | static int ctr_crypt(struct blkcipher_desc *desc, | ||
333 | struct scatterlist *dst, struct scatterlist *src, | ||
334 | unsigned int nbytes) | ||
335 | { | ||
336 | struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
337 | struct blkcipher_walk walk; | ||
338 | int err; | ||
339 | |||
340 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
341 | err = blkcipher_walk_virt(desc, &walk); | ||
342 | |||
343 | ctx->ops->load_encrypt_keys(&ctx->key[0]); | ||
344 | while ((nbytes = walk.nbytes)) { | ||
345 | unsigned int block_len = nbytes & AES_BLOCK_MASK; | ||
346 | |||
347 | if (likely(block_len)) { | ||
348 | ctx->ops->ctr_crypt(&ctx->key[0], | ||
349 | (const u64 *)walk.src.virt.addr, | ||
350 | (u64 *) walk.dst.virt.addr, | ||
351 | block_len, (u64 *) walk.iv); | ||
352 | } | ||
353 | nbytes &= AES_BLOCK_SIZE - 1; | ||
354 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
355 | } | ||
356 | fprs_write(0); | ||
357 | return err; | ||
358 | } | ||
359 | |||
360 | static struct crypto_alg algs[] = { { | ||
361 | .cra_name = "aes", | ||
362 | .cra_driver_name = "aes-sparc64", | ||
363 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
364 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | ||
365 | .cra_blocksize = AES_BLOCK_SIZE, | ||
366 | .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx), | ||
367 | .cra_alignmask = 3, | ||
368 | .cra_module = THIS_MODULE, | ||
369 | .cra_u = { | ||
370 | .cipher = { | ||
371 | .cia_min_keysize = AES_MIN_KEY_SIZE, | ||
372 | .cia_max_keysize = AES_MAX_KEY_SIZE, | ||
373 | .cia_setkey = aes_set_key, | ||
374 | .cia_encrypt = aes_encrypt, | ||
375 | .cia_decrypt = aes_decrypt | ||
376 | } | ||
377 | } | ||
378 | }, { | ||
379 | .cra_name = "ecb(aes)", | ||
380 | .cra_driver_name = "ecb-aes-sparc64", | ||
381 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
382 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
383 | .cra_blocksize = AES_BLOCK_SIZE, | ||
384 | .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx), | ||
385 | .cra_alignmask = 7, | ||
386 | .cra_type = &crypto_blkcipher_type, | ||
387 | .cra_module = THIS_MODULE, | ||
388 | .cra_u = { | ||
389 | .blkcipher = { | ||
390 | .min_keysize = AES_MIN_KEY_SIZE, | ||
391 | .max_keysize = AES_MAX_KEY_SIZE, | ||
392 | .setkey = aes_set_key, | ||
393 | .encrypt = ecb_encrypt, | ||
394 | .decrypt = ecb_decrypt, | ||
395 | }, | ||
396 | }, | ||
397 | }, { | ||
398 | .cra_name = "cbc(aes)", | ||
399 | .cra_driver_name = "cbc-aes-sparc64", | ||
400 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
401 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
402 | .cra_blocksize = AES_BLOCK_SIZE, | ||
403 | .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx), | ||
404 | .cra_alignmask = 7, | ||
405 | .cra_type = &crypto_blkcipher_type, | ||
406 | .cra_module = THIS_MODULE, | ||
407 | .cra_u = { | ||
408 | .blkcipher = { | ||
409 | .min_keysize = AES_MIN_KEY_SIZE, | ||
410 | .max_keysize = AES_MAX_KEY_SIZE, | ||
411 | .setkey = aes_set_key, | ||
412 | .encrypt = cbc_encrypt, | ||
413 | .decrypt = cbc_decrypt, | ||
414 | }, | ||
415 | }, | ||
416 | }, { | ||
417 | .cra_name = "ctr(aes)", | ||
418 | .cra_driver_name = "ctr-aes-sparc64", | ||
419 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
420 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
421 | .cra_blocksize = AES_BLOCK_SIZE, | ||
422 | .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx), | ||
423 | .cra_alignmask = 7, | ||
424 | .cra_type = &crypto_blkcipher_type, | ||
425 | .cra_module = THIS_MODULE, | ||
426 | .cra_u = { | ||
427 | .blkcipher = { | ||
428 | .min_keysize = AES_MIN_KEY_SIZE, | ||
429 | .max_keysize = AES_MAX_KEY_SIZE, | ||
430 | .setkey = aes_set_key, | ||
431 | .encrypt = ctr_crypt, | ||
432 | .decrypt = ctr_crypt, | ||
433 | }, | ||
434 | }, | ||
435 | } }; | ||
436 | |||
437 | static bool __init sparc64_has_aes_opcode(void) | ||
438 | { | ||
439 | unsigned long cfr; | ||
440 | |||
441 | if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) | ||
442 | return false; | ||
443 | |||
444 | __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); | ||
445 | if (!(cfr & CFR_AES)) | ||
446 | return false; | ||
447 | |||
448 | return true; | ||
449 | } | ||
450 | |||
451 | static int __init aes_sparc64_mod_init(void) | ||
452 | { | ||
453 | int i; | ||
454 | |||
455 | for (i = 0; i < ARRAY_SIZE(algs); i++) | ||
456 | INIT_LIST_HEAD(&algs[i].cra_list); | ||
457 | |||
458 | if (sparc64_has_aes_opcode()) { | ||
459 | pr_info("Using sparc64 aes opcodes optimized AES implementation\n"); | ||
460 | return crypto_register_algs(algs, ARRAY_SIZE(algs)); | ||
461 | } | ||
462 | pr_info("sparc64 aes opcodes not available.\n"); | ||
463 | return -ENODEV; | ||
464 | } | ||
465 | |||
466 | static void __exit aes_sparc64_mod_fini(void) | ||
467 | { | ||
468 | crypto_unregister_algs(algs, ARRAY_SIZE(algs)); | ||
469 | } | ||
470 | |||
471 | module_init(aes_sparc64_mod_init); | ||
472 | module_exit(aes_sparc64_mod_fini); | ||
473 | |||
474 | MODULE_LICENSE("GPL"); | ||
475 | MODULE_DESCRIPTION("AES Secure Hash Algorithm, sparc64 aes opcode accelerated"); | ||
476 | |||
477 | MODULE_ALIAS("aes"); | ||
diff --git a/arch/sparc/crypto/camellia_asm.S b/arch/sparc/crypto/camellia_asm.S new file mode 100644 index 000000000000..cc39553a4e43 --- /dev/null +++ b/arch/sparc/crypto/camellia_asm.S | |||
@@ -0,0 +1,563 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | #include <asm/visasm.h> | ||
3 | |||
4 | #include "opcodes.h" | ||
5 | |||
6 | #define CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \ | ||
7 | CAMELLIA_F(KEY_BASE + 0, I1, I0, I1) \ | ||
8 | CAMELLIA_F(KEY_BASE + 2, I0, I1, I0) \ | ||
9 | CAMELLIA_F(KEY_BASE + 4, I1, I0, I1) \ | ||
10 | CAMELLIA_F(KEY_BASE + 6, I0, I1, I0) \ | ||
11 | CAMELLIA_F(KEY_BASE + 8, I1, I0, I1) \ | ||
12 | CAMELLIA_F(KEY_BASE + 10, I0, I1, I0) | ||
13 | |||
14 | #define CAMELLIA_6ROUNDS_FL_FLI(KEY_BASE, I0, I1) \ | ||
15 | CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \ | ||
16 | CAMELLIA_FL(KEY_BASE + 12, I0, I0) \ | ||
17 | CAMELLIA_FLI(KEY_BASE + 14, I1, I1) | ||
18 | |||
19 | .data | ||
20 | |||
21 | .align 8 | ||
22 | SIGMA: .xword 0xA09E667F3BCC908B | ||
23 | .xword 0xB67AE8584CAA73B2 | ||
24 | .xword 0xC6EF372FE94F82BE | ||
25 | .xword 0x54FF53A5F1D36F1C | ||
26 | .xword 0x10E527FADE682D1D | ||
27 | .xword 0xB05688C2B3E6C1FD | ||
28 | |||
29 | .text | ||
30 | |||
31 | .align 32 | ||
32 | ENTRY(camellia_sparc64_key_expand) | ||
33 | /* %o0=in_key, %o1=encrypt_key, %o2=key_len, %o3=decrypt_key */ | ||
34 | VISEntry | ||
35 | ld [%o0 + 0x00], %f0 ! i0, k[0] | ||
36 | ld [%o0 + 0x04], %f1 ! i1, k[1] | ||
37 | ld [%o0 + 0x08], %f2 ! i2, k[2] | ||
38 | ld [%o0 + 0x0c], %f3 ! i3, k[3] | ||
39 | std %f0, [%o1 + 0x00] ! k[0, 1] | ||
40 | fsrc2 %f0, %f28 | ||
41 | std %f2, [%o1 + 0x08] ! k[2, 3] | ||
42 | cmp %o2, 16 | ||
43 | be 10f | ||
44 | fsrc2 %f2, %f30 | ||
45 | |||
46 | ld [%o0 + 0x10], %f0 | ||
47 | ld [%o0 + 0x14], %f1 | ||
48 | std %f0, [%o1 + 0x20] ! k[8, 9] | ||
49 | cmp %o2, 24 | ||
50 | fone %f10 | ||
51 | be,a 1f | ||
52 | fxor %f10, %f0, %f2 | ||
53 | ld [%o0 + 0x18], %f2 | ||
54 | ld [%o0 + 0x1c], %f3 | ||
55 | 1: | ||
56 | std %f2, [%o1 + 0x28] ! k[10, 11] | ||
57 | fxor %f28, %f0, %f0 | ||
58 | fxor %f30, %f2, %f2 | ||
59 | |||
60 | 10: | ||
61 | sethi %hi(SIGMA), %g3 | ||
62 | or %g3, %lo(SIGMA), %g3 | ||
63 | ldd [%g3 + 0x00], %f16 | ||
64 | ldd [%g3 + 0x08], %f18 | ||
65 | ldd [%g3 + 0x10], %f20 | ||
66 | ldd [%g3 + 0x18], %f22 | ||
67 | ldd [%g3 + 0x20], %f24 | ||
68 | ldd [%g3 + 0x28], %f26 | ||
69 | CAMELLIA_F(16, 2, 0, 2) | ||
70 | CAMELLIA_F(18, 0, 2, 0) | ||
71 | fxor %f28, %f0, %f0 | ||
72 | fxor %f30, %f2, %f2 | ||
73 | CAMELLIA_F(20, 2, 0, 2) | ||
74 | CAMELLIA_F(22, 0, 2, 0) | ||
75 | |||
76 | #define ROTL128(S01, S23, TMP1, TMP2, N) \ | ||
77 | srlx S01, (64 - N), TMP1; \ | ||
78 | sllx S01, N, S01; \ | ||
79 | srlx S23, (64 - N), TMP2; \ | ||
80 | sllx S23, N, S23; \ | ||
81 | or S01, TMP2, S01; \ | ||
82 | or S23, TMP1, S23 | ||
83 | |||
84 | cmp %o2, 16 | ||
85 | bne 1f | ||
86 | nop | ||
87 | /* 128-bit key */ | ||
88 | std %f0, [%o1 + 0x10] ! k[ 4, 5] | ||
89 | std %f2, [%o1 + 0x18] ! k[ 6, 7] | ||
90 | MOVDTOX_F0_O4 | ||
91 | MOVDTOX_F2_O5 | ||
92 | ROTL128(%o4, %o5, %g2, %g3, 15) | ||
93 | stx %o4, [%o1 + 0x30] ! k[12, 13] | ||
94 | stx %o5, [%o1 + 0x38] ! k[14, 15] | ||
95 | ROTL128(%o4, %o5, %g2, %g3, 15) | ||
96 | stx %o4, [%o1 + 0x40] ! k[16, 17] | ||
97 | stx %o5, [%o1 + 0x48] ! k[18, 19] | ||
98 | ROTL128(%o4, %o5, %g2, %g3, 15) | ||
99 | stx %o4, [%o1 + 0x60] ! k[24, 25] | ||
100 | ROTL128(%o4, %o5, %g2, %g3, 15) | ||
101 | stx %o4, [%o1 + 0x70] ! k[28, 29] | ||
102 | stx %o5, [%o1 + 0x78] ! k[30, 31] | ||
103 | ROTL128(%o4, %o5, %g2, %g3, 34) | ||
104 | stx %o4, [%o1 + 0xa0] ! k[40, 41] | ||
105 | stx %o5, [%o1 + 0xa8] ! k[42, 43] | ||
106 | ROTL128(%o4, %o5, %g2, %g3, 17) | ||
107 | stx %o4, [%o1 + 0xc0] ! k[48, 49] | ||
108 | stx %o5, [%o1 + 0xc8] ! k[50, 51] | ||
109 | |||
110 | ldx [%o1 + 0x00], %o4 ! k[ 0, 1] | ||
111 | ldx [%o1 + 0x08], %o5 ! k[ 2, 3] | ||
112 | ROTL128(%o4, %o5, %g2, %g3, 15) | ||
113 | stx %o4, [%o1 + 0x20] ! k[ 8, 9] | ||
114 | stx %o5, [%o1 + 0x28] ! k[10, 11] | ||
115 | ROTL128(%o4, %o5, %g2, %g3, 30) | ||
116 | stx %o4, [%o1 + 0x50] ! k[20, 21] | ||
117 | stx %o5, [%o1 + 0x58] ! k[22, 23] | ||
118 | ROTL128(%o4, %o5, %g2, %g3, 15) | ||
119 | stx %o5, [%o1 + 0x68] ! k[26, 27] | ||
120 | ROTL128(%o4, %o5, %g2, %g3, 17) | ||
121 | stx %o4, [%o1 + 0x80] ! k[32, 33] | ||
122 | stx %o5, [%o1 + 0x88] ! k[34, 35] | ||
123 | ROTL128(%o4, %o5, %g2, %g3, 17) | ||
124 | stx %o4, [%o1 + 0x90] ! k[36, 37] | ||
125 | stx %o5, [%o1 + 0x98] ! k[38, 39] | ||
126 | ROTL128(%o4, %o5, %g2, %g3, 17) | ||
127 | stx %o4, [%o1 + 0xb0] ! k[44, 45] | ||
128 | stx %o5, [%o1 + 0xb8] ! k[46, 47] | ||
129 | |||
130 | ba,pt %xcc, 2f | ||
131 | mov (3 * 16 * 4), %o0 | ||
132 | |||
133 | 1: | ||
134 | /* 192-bit or 256-bit key */ | ||
135 | std %f0, [%o1 + 0x30] ! k[12, 13] | ||
136 | std %f2, [%o1 + 0x38] ! k[14, 15] | ||
137 | ldd [%o1 + 0x20], %f4 ! k[ 8, 9] | ||
138 | ldd [%o1 + 0x28], %f6 ! k[10, 11] | ||
139 | fxor %f0, %f4, %f0 | ||
140 | fxor %f2, %f6, %f2 | ||
141 | CAMELLIA_F(24, 2, 0, 2) | ||
142 | CAMELLIA_F(26, 0, 2, 0) | ||
143 | std %f0, [%o1 + 0x10] ! k[ 4, 5] | ||
144 | std %f2, [%o1 + 0x18] ! k[ 6, 7] | ||
145 | MOVDTOX_F0_O4 | ||
146 | MOVDTOX_F2_O5 | ||
147 | ROTL128(%o4, %o5, %g2, %g3, 30) | ||
148 | stx %o4, [%o1 + 0x50] ! k[20, 21] | ||
149 | stx %o5, [%o1 + 0x58] ! k[22, 23] | ||
150 | ROTL128(%o4, %o5, %g2, %g3, 30) | ||
151 | stx %o4, [%o1 + 0xa0] ! k[40, 41] | ||
152 | stx %o5, [%o1 + 0xa8] ! k[42, 43] | ||
153 | ROTL128(%o4, %o5, %g2, %g3, 51) | ||
154 | stx %o4, [%o1 + 0x100] ! k[64, 65] | ||
155 | stx %o5, [%o1 + 0x108] ! k[66, 67] | ||
156 | ldx [%o1 + 0x20], %o4 ! k[ 8, 9] | ||
157 | ldx [%o1 + 0x28], %o5 ! k[10, 11] | ||
158 | ROTL128(%o4, %o5, %g2, %g3, 15) | ||
159 | stx %o4, [%o1 + 0x20] ! k[ 8, 9] | ||
160 | stx %o5, [%o1 + 0x28] ! k[10, 11] | ||
161 | ROTL128(%o4, %o5, %g2, %g3, 15) | ||
162 | stx %o4, [%o1 + 0x40] ! k[16, 17] | ||
163 | stx %o5, [%o1 + 0x48] ! k[18, 19] | ||
164 | ROTL128(%o4, %o5, %g2, %g3, 30) | ||
165 | stx %o4, [%o1 + 0x90] ! k[36, 37] | ||
166 | stx %o5, [%o1 + 0x98] ! k[38, 39] | ||
167 | ROTL128(%o4, %o5, %g2, %g3, 34) | ||
168 | stx %o4, [%o1 + 0xd0] ! k[52, 53] | ||
169 | stx %o5, [%o1 + 0xd8] ! k[54, 55] | ||
170 | ldx [%o1 + 0x30], %o4 ! k[12, 13] | ||
171 | ldx [%o1 + 0x38], %o5 ! k[14, 15] | ||
172 | ROTL128(%o4, %o5, %g2, %g3, 15) | ||
173 | stx %o4, [%o1 + 0x30] ! k[12, 13] | ||
174 | stx %o5, [%o1 + 0x38] ! k[14, 15] | ||
175 | ROTL128(%o4, %o5, %g2, %g3, 30) | ||
176 | stx %o4, [%o1 + 0x70] ! k[28, 29] | ||
177 | stx %o5, [%o1 + 0x78] ! k[30, 31] | ||
178 | srlx %o4, 32, %g2 | ||
179 | srlx %o5, 32, %g3 | ||
180 | stw %o4, [%o1 + 0xc0] ! k[48] | ||
181 | stw %g3, [%o1 + 0xc4] ! k[49] | ||
182 | stw %o5, [%o1 + 0xc8] ! k[50] | ||
183 | stw %g2, [%o1 + 0xcc] ! k[51] | ||
184 | ROTL128(%o4, %o5, %g2, %g3, 49) | ||
185 | stx %o4, [%o1 + 0xe0] ! k[56, 57] | ||
186 | stx %o5, [%o1 + 0xe8] ! k[58, 59] | ||
187 | ldx [%o1 + 0x00], %o4 ! k[ 0, 1] | ||
188 | ldx [%o1 + 0x08], %o5 ! k[ 2, 3] | ||
189 | ROTL128(%o4, %o5, %g2, %g3, 45) | ||
190 | stx %o4, [%o1 + 0x60] ! k[24, 25] | ||
191 | stx %o5, [%o1 + 0x68] ! k[26, 27] | ||
192 | ROTL128(%o4, %o5, %g2, %g3, 15) | ||
193 | stx %o4, [%o1 + 0x80] ! k[32, 33] | ||
194 | stx %o5, [%o1 + 0x88] ! k[34, 35] | ||
195 | ROTL128(%o4, %o5, %g2, %g3, 17) | ||
196 | stx %o4, [%o1 + 0xb0] ! k[44, 45] | ||
197 | stx %o5, [%o1 + 0xb8] ! k[46, 47] | ||
198 | ROTL128(%o4, %o5, %g2, %g3, 34) | ||
199 | stx %o4, [%o1 + 0xf0] ! k[60, 61] | ||
200 | stx %o5, [%o1 + 0xf8] ! k[62, 63] | ||
201 | mov (4 * 16 * 4), %o0 | ||
202 | 2: | ||
203 | add %o1, %o0, %o1 | ||
204 | ldd [%o1 + 0x00], %f0 | ||
205 | ldd [%o1 + 0x08], %f2 | ||
206 | std %f0, [%o3 + 0x00] | ||
207 | std %f2, [%o3 + 0x08] | ||
208 | add %o3, 0x10, %o3 | ||
209 | 1: | ||
210 | sub %o1, (16 * 4), %o1 | ||
211 | ldd [%o1 + 0x38], %f0 | ||
212 | ldd [%o1 + 0x30], %f2 | ||
213 | ldd [%o1 + 0x28], %f4 | ||
214 | ldd [%o1 + 0x20], %f6 | ||
215 | ldd [%o1 + 0x18], %f8 | ||
216 | ldd [%o1 + 0x10], %f10 | ||
217 | std %f0, [%o3 + 0x00] | ||
218 | std %f2, [%o3 + 0x08] | ||
219 | std %f4, [%o3 + 0x10] | ||
220 | std %f6, [%o3 + 0x18] | ||
221 | std %f8, [%o3 + 0x20] | ||
222 | std %f10, [%o3 + 0x28] | ||
223 | |||
224 | ldd [%o1 + 0x08], %f0 | ||
225 | ldd [%o1 + 0x00], %f2 | ||
226 | std %f0, [%o3 + 0x30] | ||
227 | std %f2, [%o3 + 0x38] | ||
228 | subcc %o0, (16 * 4), %o0 | ||
229 | bne,pt %icc, 1b | ||
230 | add %o3, (16 * 4), %o3 | ||
231 | |||
232 | std %f2, [%o3 - 0x10] | ||
233 | std %f0, [%o3 - 0x08] | ||
234 | |||
235 | retl | ||
236 | VISExit | ||
237 | ENDPROC(camellia_sparc64_key_expand) | ||
238 | |||
239 | .align 32 | ||
240 | ENTRY(camellia_sparc64_crypt) | ||
241 | /* %o0=key, %o1=input, %o2=output, %o3=key_len */ | ||
242 | VISEntry | ||
243 | |||
244 | ld [%o1 + 0x00], %f0 | ||
245 | ld [%o1 + 0x04], %f1 | ||
246 | ld [%o1 + 0x08], %f2 | ||
247 | ld [%o1 + 0x0c], %f3 | ||
248 | |||
249 | ldd [%o0 + 0x00], %f4 | ||
250 | ldd [%o0 + 0x08], %f6 | ||
251 | |||
252 | cmp %o3, 16 | ||
253 | fxor %f4, %f0, %f0 | ||
254 | be 1f | ||
255 | fxor %f6, %f2, %f2 | ||
256 | |||
257 | ldd [%o0 + 0x10], %f8 | ||
258 | ldd [%o0 + 0x18], %f10 | ||
259 | ldd [%o0 + 0x20], %f12 | ||
260 | ldd [%o0 + 0x28], %f14 | ||
261 | ldd [%o0 + 0x30], %f16 | ||
262 | ldd [%o0 + 0x38], %f18 | ||
263 | ldd [%o0 + 0x40], %f20 | ||
264 | ldd [%o0 + 0x48], %f22 | ||
265 | add %o0, 0x40, %o0 | ||
266 | |||
267 | CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) | ||
268 | |||
269 | 1: | ||
270 | ldd [%o0 + 0x10], %f8 | ||
271 | ldd [%o0 + 0x18], %f10 | ||
272 | ldd [%o0 + 0x20], %f12 | ||
273 | ldd [%o0 + 0x28], %f14 | ||
274 | ldd [%o0 + 0x30], %f16 | ||
275 | ldd [%o0 + 0x38], %f18 | ||
276 | ldd [%o0 + 0x40], %f20 | ||
277 | ldd [%o0 + 0x48], %f22 | ||
278 | ldd [%o0 + 0x50], %f24 | ||
279 | ldd [%o0 + 0x58], %f26 | ||
280 | ldd [%o0 + 0x60], %f28 | ||
281 | ldd [%o0 + 0x68], %f30 | ||
282 | ldd [%o0 + 0x70], %f32 | ||
283 | ldd [%o0 + 0x78], %f34 | ||
284 | ldd [%o0 + 0x80], %f36 | ||
285 | ldd [%o0 + 0x88], %f38 | ||
286 | ldd [%o0 + 0x90], %f40 | ||
287 | ldd [%o0 + 0x98], %f42 | ||
288 | ldd [%o0 + 0xa0], %f44 | ||
289 | ldd [%o0 + 0xa8], %f46 | ||
290 | ldd [%o0 + 0xb0], %f48 | ||
291 | ldd [%o0 + 0xb8], %f50 | ||
292 | ldd [%o0 + 0xc0], %f52 | ||
293 | ldd [%o0 + 0xc8], %f54 | ||
294 | |||
295 | CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) | ||
296 | CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) | ||
297 | CAMELLIA_6ROUNDS(40, 0, 2) | ||
298 | fxor %f52, %f2, %f2 | ||
299 | fxor %f54, %f0, %f0 | ||
300 | |||
301 | st %f2, [%o2 + 0x00] | ||
302 | st %f3, [%o2 + 0x04] | ||
303 | st %f0, [%o2 + 0x08] | ||
304 | st %f1, [%o2 + 0x0c] | ||
305 | |||
306 | retl | ||
307 | VISExit | ||
308 | ENDPROC(camellia_sparc64_crypt) | ||
309 | |||
310 | .align 32 | ||
311 | ENTRY(camellia_sparc64_load_keys) | ||
312 | /* %o0=key, %o1=key_len */ | ||
313 | VISEntry | ||
314 | ldd [%o0 + 0x00], %f4 | ||
315 | ldd [%o0 + 0x08], %f6 | ||
316 | ldd [%o0 + 0x10], %f8 | ||
317 | ldd [%o0 + 0x18], %f10 | ||
318 | ldd [%o0 + 0x20], %f12 | ||
319 | ldd [%o0 + 0x28], %f14 | ||
320 | ldd [%o0 + 0x30], %f16 | ||
321 | ldd [%o0 + 0x38], %f18 | ||
322 | ldd [%o0 + 0x40], %f20 | ||
323 | ldd [%o0 + 0x48], %f22 | ||
324 | ldd [%o0 + 0x50], %f24 | ||
325 | ldd [%o0 + 0x58], %f26 | ||
326 | ldd [%o0 + 0x60], %f28 | ||
327 | ldd [%o0 + 0x68], %f30 | ||
328 | ldd [%o0 + 0x70], %f32 | ||
329 | ldd [%o0 + 0x78], %f34 | ||
330 | ldd [%o0 + 0x80], %f36 | ||
331 | ldd [%o0 + 0x88], %f38 | ||
332 | ldd [%o0 + 0x90], %f40 | ||
333 | ldd [%o0 + 0x98], %f42 | ||
334 | ldd [%o0 + 0xa0], %f44 | ||
335 | ldd [%o0 + 0xa8], %f46 | ||
336 | ldd [%o0 + 0xb0], %f48 | ||
337 | ldd [%o0 + 0xb8], %f50 | ||
338 | ldd [%o0 + 0xc0], %f52 | ||
339 | retl | ||
340 | ldd [%o0 + 0xc8], %f54 | ||
341 | ENDPROC(camellia_sparc64_load_keys) | ||
342 | |||
343 | .align 32 | ||
344 | ENTRY(camellia_sparc64_ecb_crypt_3_grand_rounds) | ||
345 | /* %o0=input, %o1=output, %o2=len, %o3=key */ | ||
346 | 1: ldd [%o0 + 0x00], %f0 | ||
347 | ldd [%o0 + 0x08], %f2 | ||
348 | add %o0, 0x10, %o0 | ||
349 | fxor %f4, %f0, %f0 | ||
350 | fxor %f6, %f2, %f2 | ||
351 | CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) | ||
352 | CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) | ||
353 | CAMELLIA_6ROUNDS(40, 0, 2) | ||
354 | fxor %f52, %f2, %f2 | ||
355 | fxor %f54, %f0, %f0 | ||
356 | std %f2, [%o1 + 0x00] | ||
357 | std %f0, [%o1 + 0x08] | ||
358 | subcc %o2, 0x10, %o2 | ||
359 | bne,pt %icc, 1b | ||
360 | add %o1, 0x10, %o1 | ||
361 | retl | ||
362 | nop | ||
363 | ENDPROC(camellia_sparc64_ecb_crypt_3_grand_rounds) | ||
364 | |||
365 | .align 32 | ||
366 | ENTRY(camellia_sparc64_ecb_crypt_4_grand_rounds) | ||
367 | /* %o0=input, %o1=output, %o2=len, %o3=key */ | ||
368 | 1: ldd [%o0 + 0x00], %f0 | ||
369 | ldd [%o0 + 0x08], %f2 | ||
370 | add %o0, 0x10, %o0 | ||
371 | fxor %f4, %f0, %f0 | ||
372 | fxor %f6, %f2, %f2 | ||
373 | CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) | ||
374 | ldd [%o3 + 0xd0], %f8 | ||
375 | ldd [%o3 + 0xd8], %f10 | ||
376 | ldd [%o3 + 0xe0], %f12 | ||
377 | ldd [%o3 + 0xe8], %f14 | ||
378 | ldd [%o3 + 0xf0], %f16 | ||
379 | ldd [%o3 + 0xf8], %f18 | ||
380 | ldd [%o3 + 0x100], %f20 | ||
381 | ldd [%o3 + 0x108], %f22 | ||
382 | CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) | ||
383 | CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2) | ||
384 | CAMELLIA_F(8, 2, 0, 2) | ||
385 | CAMELLIA_F(10, 0, 2, 0) | ||
386 | ldd [%o3 + 0x10], %f8 | ||
387 | ldd [%o3 + 0x18], %f10 | ||
388 | CAMELLIA_F(12, 2, 0, 2) | ||
389 | CAMELLIA_F(14, 0, 2, 0) | ||
390 | ldd [%o3 + 0x20], %f12 | ||
391 | ldd [%o3 + 0x28], %f14 | ||
392 | CAMELLIA_F(16, 2, 0, 2) | ||
393 | CAMELLIA_F(18, 0, 2, 0) | ||
394 | ldd [%o3 + 0x30], %f16 | ||
395 | ldd [%o3 + 0x38], %f18 | ||
396 | fxor %f20, %f2, %f2 | ||
397 | fxor %f22, %f0, %f0 | ||
398 | ldd [%o3 + 0x40], %f20 | ||
399 | ldd [%o3 + 0x48], %f22 | ||
400 | std %f2, [%o1 + 0x00] | ||
401 | std %f0, [%o1 + 0x08] | ||
402 | subcc %o2, 0x10, %o2 | ||
403 | bne,pt %icc, 1b | ||
404 | add %o1, 0x10, %o1 | ||
405 | retl | ||
406 | nop | ||
407 | ENDPROC(camellia_sparc64_ecb_crypt_4_grand_rounds) | ||
408 | |||
409 | .align 32 | ||
410 | ENTRY(camellia_sparc64_cbc_encrypt_3_grand_rounds) | ||
411 | /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */ | ||
412 | ldd [%o4 + 0x00], %f60 | ||
413 | ldd [%o4 + 0x08], %f62 | ||
414 | 1: ldd [%o0 + 0x00], %f0 | ||
415 | ldd [%o0 + 0x08], %f2 | ||
416 | add %o0, 0x10, %o0 | ||
417 | fxor %f60, %f0, %f0 | ||
418 | fxor %f62, %f2, %f2 | ||
419 | fxor %f4, %f0, %f0 | ||
420 | fxor %f6, %f2, %f2 | ||
421 | CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) | ||
422 | CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) | ||
423 | CAMELLIA_6ROUNDS(40, 0, 2) | ||
424 | fxor %f52, %f2, %f60 | ||
425 | fxor %f54, %f0, %f62 | ||
426 | std %f60, [%o1 + 0x00] | ||
427 | std %f62, [%o1 + 0x08] | ||
428 | subcc %o2, 0x10, %o2 | ||
429 | bne,pt %icc, 1b | ||
430 | add %o1, 0x10, %o1 | ||
431 | std %f60, [%o4 + 0x00] | ||
432 | retl | ||
433 | std %f62, [%o4 + 0x08] | ||
434 | ENDPROC(camellia_sparc64_cbc_encrypt_3_grand_rounds) | ||
435 | |||
436 | .align 32 | ||
437 | ENTRY(camellia_sparc64_cbc_encrypt_4_grand_rounds) | ||
438 | /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */ | ||
439 | ldd [%o4 + 0x00], %f60 | ||
440 | ldd [%o4 + 0x08], %f62 | ||
441 | 1: ldd [%o0 + 0x00], %f0 | ||
442 | ldd [%o0 + 0x08], %f2 | ||
443 | add %o0, 0x10, %o0 | ||
444 | fxor %f60, %f0, %f0 | ||
445 | fxor %f62, %f2, %f2 | ||
446 | fxor %f4, %f0, %f0 | ||
447 | fxor %f6, %f2, %f2 | ||
448 | CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) | ||
449 | ldd [%o3 + 0xd0], %f8 | ||
450 | ldd [%o3 + 0xd8], %f10 | ||
451 | ldd [%o3 + 0xe0], %f12 | ||
452 | ldd [%o3 + 0xe8], %f14 | ||
453 | ldd [%o3 + 0xf0], %f16 | ||
454 | ldd [%o3 + 0xf8], %f18 | ||
455 | ldd [%o3 + 0x100], %f20 | ||
456 | ldd [%o3 + 0x108], %f22 | ||
457 | CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) | ||
458 | CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2) | ||
459 | CAMELLIA_F(8, 2, 0, 2) | ||
460 | CAMELLIA_F(10, 0, 2, 0) | ||
461 | ldd [%o3 + 0x10], %f8 | ||
462 | ldd [%o3 + 0x18], %f10 | ||
463 | CAMELLIA_F(12, 2, 0, 2) | ||
464 | CAMELLIA_F(14, 0, 2, 0) | ||
465 | ldd [%o3 + 0x20], %f12 | ||
466 | ldd [%o3 + 0x28], %f14 | ||
467 | CAMELLIA_F(16, 2, 0, 2) | ||
468 | CAMELLIA_F(18, 0, 2, 0) | ||
469 | ldd [%o3 + 0x30], %f16 | ||
470 | ldd [%o3 + 0x38], %f18 | ||
471 | fxor %f20, %f2, %f60 | ||
472 | fxor %f22, %f0, %f62 | ||
473 | ldd [%o3 + 0x40], %f20 | ||
474 | ldd [%o3 + 0x48], %f22 | ||
475 | std %f60, [%o1 + 0x00] | ||
476 | std %f62, [%o1 + 0x08] | ||
477 | subcc %o2, 0x10, %o2 | ||
478 | bne,pt %icc, 1b | ||
479 | add %o1, 0x10, %o1 | ||
480 | std %f60, [%o4 + 0x00] | ||
481 | retl | ||
482 | std %f62, [%o4 + 0x08] | ||
483 | ENDPROC(camellia_sparc64_cbc_encrypt_4_grand_rounds) | ||
484 | |||
485 | .align 32 | ||
486 | ENTRY(camellia_sparc64_cbc_decrypt_3_grand_rounds) | ||
487 | /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */ | ||
488 | ldd [%o4 + 0x00], %f60 | ||
489 | ldd [%o4 + 0x08], %f62 | ||
490 | 1: ldd [%o0 + 0x00], %f56 | ||
491 | ldd [%o0 + 0x08], %f58 | ||
492 | add %o0, 0x10, %o0 | ||
493 | fxor %f4, %f56, %f0 | ||
494 | fxor %f6, %f58, %f2 | ||
495 | CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) | ||
496 | CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) | ||
497 | CAMELLIA_6ROUNDS(40, 0, 2) | ||
498 | fxor %f52, %f2, %f2 | ||
499 | fxor %f54, %f0, %f0 | ||
500 | fxor %f60, %f2, %f2 | ||
501 | fxor %f62, %f0, %f0 | ||
502 | fsrc2 %f56, %f60 | ||
503 | fsrc2 %f58, %f62 | ||
504 | std %f2, [%o1 + 0x00] | ||
505 | std %f0, [%o1 + 0x08] | ||
506 | subcc %o2, 0x10, %o2 | ||
507 | bne,pt %icc, 1b | ||
508 | add %o1, 0x10, %o1 | ||
509 | std %f60, [%o4 + 0x00] | ||
510 | retl | ||
511 | std %f62, [%o4 + 0x08] | ||
512 | ENDPROC(camellia_sparc64_cbc_decrypt_3_grand_rounds) | ||
513 | |||
514 | .align 32 | ||
515 | ENTRY(camellia_sparc64_cbc_decrypt_4_grand_rounds) | ||
516 | /* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */ | ||
517 | ldd [%o4 + 0x00], %f60 | ||
518 | ldd [%o4 + 0x08], %f62 | ||
519 | 1: ldd [%o0 + 0x00], %f56 | ||
520 | ldd [%o0 + 0x08], %f58 | ||
521 | add %o0, 0x10, %o0 | ||
522 | fxor %f4, %f56, %f0 | ||
523 | fxor %f6, %f58, %f2 | ||
524 | CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2) | ||
525 | ldd [%o3 + 0xd0], %f8 | ||
526 | ldd [%o3 + 0xd8], %f10 | ||
527 | ldd [%o3 + 0xe0], %f12 | ||
528 | ldd [%o3 + 0xe8], %f14 | ||
529 | ldd [%o3 + 0xf0], %f16 | ||
530 | ldd [%o3 + 0xf8], %f18 | ||
531 | ldd [%o3 + 0x100], %f20 | ||
532 | ldd [%o3 + 0x108], %f22 | ||
533 | CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2) | ||
534 | CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2) | ||
535 | CAMELLIA_F(8, 2, 0, 2) | ||
536 | CAMELLIA_F(10, 0, 2, 0) | ||
537 | ldd [%o3 + 0x10], %f8 | ||
538 | ldd [%o3 + 0x18], %f10 | ||
539 | CAMELLIA_F(12, 2, 0, 2) | ||
540 | CAMELLIA_F(14, 0, 2, 0) | ||
541 | ldd [%o3 + 0x20], %f12 | ||
542 | ldd [%o3 + 0x28], %f14 | ||
543 | CAMELLIA_F(16, 2, 0, 2) | ||
544 | CAMELLIA_F(18, 0, 2, 0) | ||
545 | ldd [%o3 + 0x30], %f16 | ||
546 | ldd [%o3 + 0x38], %f18 | ||
547 | fxor %f20, %f2, %f2 | ||
548 | fxor %f22, %f0, %f0 | ||
549 | ldd [%o3 + 0x40], %f20 | ||
550 | ldd [%o3 + 0x48], %f22 | ||
551 | fxor %f60, %f2, %f2 | ||
552 | fxor %f62, %f0, %f0 | ||
553 | fsrc2 %f56, %f60 | ||
554 | fsrc2 %f58, %f62 | ||
555 | std %f2, [%o1 + 0x00] | ||
556 | std %f0, [%o1 + 0x08] | ||
557 | subcc %o2, 0x10, %o2 | ||
558 | bne,pt %icc, 1b | ||
559 | add %o1, 0x10, %o1 | ||
560 | std %f60, [%o4 + 0x00] | ||
561 | retl | ||
562 | std %f62, [%o4 + 0x08] | ||
563 | ENDPROC(camellia_sparc64_cbc_decrypt_4_grand_rounds) | ||
diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c new file mode 100644 index 000000000000..42905c084299 --- /dev/null +++ b/arch/sparc/crypto/camellia_glue.c | |||
@@ -0,0 +1,322 @@ | |||
1 | /* Glue code for CAMELLIA encryption optimized for sparc64 crypto opcodes. | ||
2 | * | ||
3 | * Copyright (C) 2012 David S. Miller <davem@davemloft.net> | ||
4 | */ | ||
5 | |||
6 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
7 | |||
8 | #include <linux/crypto.h> | ||
9 | #include <linux/init.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <linux/mm.h> | ||
12 | #include <linux/types.h> | ||
13 | #include <crypto/algapi.h> | ||
14 | |||
15 | #include <asm/fpumacro.h> | ||
16 | #include <asm/pstate.h> | ||
17 | #include <asm/elf.h> | ||
18 | |||
19 | #include "opcodes.h" | ||
20 | |||
21 | #define CAMELLIA_MIN_KEY_SIZE 16 | ||
22 | #define CAMELLIA_MAX_KEY_SIZE 32 | ||
23 | #define CAMELLIA_BLOCK_SIZE 16 | ||
24 | #define CAMELLIA_TABLE_BYTE_LEN 272 | ||
25 | |||
26 | struct camellia_sparc64_ctx { | ||
27 | u64 encrypt_key[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)]; | ||
28 | u64 decrypt_key[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)]; | ||
29 | int key_len; | ||
30 | }; | ||
31 | |||
32 | extern void camellia_sparc64_key_expand(const u32 *in_key, u64 *encrypt_key, | ||
33 | unsigned int key_len, u64 *decrypt_key); | ||
34 | |||
35 | static int camellia_set_key(struct crypto_tfm *tfm, const u8 *_in_key, | ||
36 | unsigned int key_len) | ||
37 | { | ||
38 | struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); | ||
39 | const u32 *in_key = (const u32 *) _in_key; | ||
40 | u32 *flags = &tfm->crt_flags; | ||
41 | |||
42 | if (key_len != 16 && key_len != 24 && key_len != 32) { | ||
43 | *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
44 | return -EINVAL; | ||
45 | } | ||
46 | |||
47 | ctx->key_len = key_len; | ||
48 | |||
49 | camellia_sparc64_key_expand(in_key, &ctx->encrypt_key[0], | ||
50 | key_len, &ctx->decrypt_key[0]); | ||
51 | return 0; | ||
52 | } | ||
53 | |||
54 | extern void camellia_sparc64_crypt(const u64 *key, const u32 *input, | ||
55 | u32 *output, unsigned int key_len); | ||
56 | |||
57 | static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
58 | { | ||
59 | struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); | ||
60 | |||
61 | camellia_sparc64_crypt(&ctx->encrypt_key[0], | ||
62 | (const u32 *) src, | ||
63 | (u32 *) dst, ctx->key_len); | ||
64 | } | ||
65 | |||
66 | static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
67 | { | ||
68 | struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); | ||
69 | |||
70 | camellia_sparc64_crypt(&ctx->decrypt_key[0], | ||
71 | (const u32 *) src, | ||
72 | (u32 *) dst, ctx->key_len); | ||
73 | } | ||
74 | |||
75 | extern void camellia_sparc64_load_keys(const u64 *key, unsigned int key_len); | ||
76 | |||
77 | typedef void ecb_crypt_op(const u64 *input, u64 *output, unsigned int len, | ||
78 | const u64 *key); | ||
79 | |||
80 | extern ecb_crypt_op camellia_sparc64_ecb_crypt_3_grand_rounds; | ||
81 | extern ecb_crypt_op camellia_sparc64_ecb_crypt_4_grand_rounds; | ||
82 | |||
83 | #define CAMELLIA_BLOCK_MASK (~(CAMELLIA_BLOCK_SIZE - 1)) | ||
84 | |||
85 | static int __ecb_crypt(struct blkcipher_desc *desc, | ||
86 | struct scatterlist *dst, struct scatterlist *src, | ||
87 | unsigned int nbytes, bool encrypt) | ||
88 | { | ||
89 | struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
90 | struct blkcipher_walk walk; | ||
91 | ecb_crypt_op *op; | ||
92 | const u64 *key; | ||
93 | int err; | ||
94 | |||
95 | op = camellia_sparc64_ecb_crypt_3_grand_rounds; | ||
96 | if (ctx->key_len != 16) | ||
97 | op = camellia_sparc64_ecb_crypt_4_grand_rounds; | ||
98 | |||
99 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
100 | err = blkcipher_walk_virt(desc, &walk); | ||
101 | |||
102 | if (encrypt) | ||
103 | key = &ctx->encrypt_key[0]; | ||
104 | else | ||
105 | key = &ctx->decrypt_key[0]; | ||
106 | camellia_sparc64_load_keys(key, ctx->key_len); | ||
107 | while ((nbytes = walk.nbytes)) { | ||
108 | unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK; | ||
109 | |||
110 | if (likely(block_len)) { | ||
111 | const u64 *src64; | ||
112 | u64 *dst64; | ||
113 | |||
114 | src64 = (const u64 *)walk.src.virt.addr; | ||
115 | dst64 = (u64 *) walk.dst.virt.addr; | ||
116 | op(src64, dst64, block_len, key); | ||
117 | } | ||
118 | nbytes &= CAMELLIA_BLOCK_SIZE - 1; | ||
119 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
120 | } | ||
121 | fprs_write(0); | ||
122 | return err; | ||
123 | } | ||
124 | |||
125 | static int ecb_encrypt(struct blkcipher_desc *desc, | ||
126 | struct scatterlist *dst, struct scatterlist *src, | ||
127 | unsigned int nbytes) | ||
128 | { | ||
129 | return __ecb_crypt(desc, dst, src, nbytes, true); | ||
130 | } | ||
131 | |||
132 | static int ecb_decrypt(struct blkcipher_desc *desc, | ||
133 | struct scatterlist *dst, struct scatterlist *src, | ||
134 | unsigned int nbytes) | ||
135 | { | ||
136 | return __ecb_crypt(desc, dst, src, nbytes, false); | ||
137 | } | ||
138 | |||
139 | typedef void cbc_crypt_op(const u64 *input, u64 *output, unsigned int len, | ||
140 | const u64 *key, u64 *iv); | ||
141 | |||
142 | extern cbc_crypt_op camellia_sparc64_cbc_encrypt_3_grand_rounds; | ||
143 | extern cbc_crypt_op camellia_sparc64_cbc_encrypt_4_grand_rounds; | ||
144 | extern cbc_crypt_op camellia_sparc64_cbc_decrypt_3_grand_rounds; | ||
145 | extern cbc_crypt_op camellia_sparc64_cbc_decrypt_4_grand_rounds; | ||
146 | |||
147 | static int cbc_encrypt(struct blkcipher_desc *desc, | ||
148 | struct scatterlist *dst, struct scatterlist *src, | ||
149 | unsigned int nbytes) | ||
150 | { | ||
151 | struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
152 | struct blkcipher_walk walk; | ||
153 | cbc_crypt_op *op; | ||
154 | const u64 *key; | ||
155 | int err; | ||
156 | |||
157 | op = camellia_sparc64_cbc_encrypt_3_grand_rounds; | ||
158 | if (ctx->key_len != 16) | ||
159 | op = camellia_sparc64_cbc_encrypt_4_grand_rounds; | ||
160 | |||
161 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
162 | err = blkcipher_walk_virt(desc, &walk); | ||
163 | |||
164 | key = &ctx->encrypt_key[0]; | ||
165 | camellia_sparc64_load_keys(key, ctx->key_len); | ||
166 | while ((nbytes = walk.nbytes)) { | ||
167 | unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK; | ||
168 | |||
169 | if (likely(block_len)) { | ||
170 | const u64 *src64; | ||
171 | u64 *dst64; | ||
172 | |||
173 | src64 = (const u64 *)walk.src.virt.addr; | ||
174 | dst64 = (u64 *) walk.dst.virt.addr; | ||
175 | op(src64, dst64, block_len, key, | ||
176 | (u64 *) walk.iv); | ||
177 | } | ||
178 | nbytes &= CAMELLIA_BLOCK_SIZE - 1; | ||
179 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
180 | } | ||
181 | fprs_write(0); | ||
182 | return err; | ||
183 | } | ||
184 | |||
185 | static int cbc_decrypt(struct blkcipher_desc *desc, | ||
186 | struct scatterlist *dst, struct scatterlist *src, | ||
187 | unsigned int nbytes) | ||
188 | { | ||
189 | struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
190 | struct blkcipher_walk walk; | ||
191 | cbc_crypt_op *op; | ||
192 | const u64 *key; | ||
193 | int err; | ||
194 | |||
195 | op = camellia_sparc64_cbc_decrypt_3_grand_rounds; | ||
196 | if (ctx->key_len != 16) | ||
197 | op = camellia_sparc64_cbc_decrypt_4_grand_rounds; | ||
198 | |||
199 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
200 | err = blkcipher_walk_virt(desc, &walk); | ||
201 | |||
202 | key = &ctx->decrypt_key[0]; | ||
203 | camellia_sparc64_load_keys(key, ctx->key_len); | ||
204 | while ((nbytes = walk.nbytes)) { | ||
205 | unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK; | ||
206 | |||
207 | if (likely(block_len)) { | ||
208 | const u64 *src64; | ||
209 | u64 *dst64; | ||
210 | |||
211 | src64 = (const u64 *)walk.src.virt.addr; | ||
212 | dst64 = (u64 *) walk.dst.virt.addr; | ||
213 | op(src64, dst64, block_len, key, | ||
214 | (u64 *) walk.iv); | ||
215 | } | ||
216 | nbytes &= CAMELLIA_BLOCK_SIZE - 1; | ||
217 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
218 | } | ||
219 | fprs_write(0); | ||
220 | return err; | ||
221 | } | ||
222 | |||
223 | static struct crypto_alg algs[] = { { | ||
224 | .cra_name = "camellia", | ||
225 | .cra_driver_name = "camellia-sparc64", | ||
226 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
227 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | ||
228 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | ||
229 | .cra_ctxsize = sizeof(struct camellia_sparc64_ctx), | ||
230 | .cra_alignmask = 3, | ||
231 | .cra_module = THIS_MODULE, | ||
232 | .cra_u = { | ||
233 | .cipher = { | ||
234 | .cia_min_keysize = CAMELLIA_MIN_KEY_SIZE, | ||
235 | .cia_max_keysize = CAMELLIA_MAX_KEY_SIZE, | ||
236 | .cia_setkey = camellia_set_key, | ||
237 | .cia_encrypt = camellia_encrypt, | ||
238 | .cia_decrypt = camellia_decrypt | ||
239 | } | ||
240 | } | ||
241 | }, { | ||
242 | .cra_name = "ecb(camellia)", | ||
243 | .cra_driver_name = "ecb-camellia-sparc64", | ||
244 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
245 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
246 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | ||
247 | .cra_ctxsize = sizeof(struct camellia_sparc64_ctx), | ||
248 | .cra_alignmask = 7, | ||
249 | .cra_type = &crypto_blkcipher_type, | ||
250 | .cra_module = THIS_MODULE, | ||
251 | .cra_u = { | ||
252 | .blkcipher = { | ||
253 | .min_keysize = CAMELLIA_MIN_KEY_SIZE, | ||
254 | .max_keysize = CAMELLIA_MAX_KEY_SIZE, | ||
255 | .setkey = camellia_set_key, | ||
256 | .encrypt = ecb_encrypt, | ||
257 | .decrypt = ecb_decrypt, | ||
258 | }, | ||
259 | }, | ||
260 | }, { | ||
261 | .cra_name = "cbc(camellia)", | ||
262 | .cra_driver_name = "cbc-camellia-sparc64", | ||
263 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
264 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
265 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | ||
266 | .cra_ctxsize = sizeof(struct camellia_sparc64_ctx), | ||
267 | .cra_alignmask = 7, | ||
268 | .cra_type = &crypto_blkcipher_type, | ||
269 | .cra_module = THIS_MODULE, | ||
270 | .cra_u = { | ||
271 | .blkcipher = { | ||
272 | .min_keysize = CAMELLIA_MIN_KEY_SIZE, | ||
273 | .max_keysize = CAMELLIA_MAX_KEY_SIZE, | ||
274 | .setkey = camellia_set_key, | ||
275 | .encrypt = cbc_encrypt, | ||
276 | .decrypt = cbc_decrypt, | ||
277 | }, | ||
278 | }, | ||
279 | } | ||
280 | }; | ||
281 | |||
282 | static bool __init sparc64_has_camellia_opcode(void) | ||
283 | { | ||
284 | unsigned long cfr; | ||
285 | |||
286 | if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) | ||
287 | return false; | ||
288 | |||
289 | __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); | ||
290 | if (!(cfr & CFR_CAMELLIA)) | ||
291 | return false; | ||
292 | |||
293 | return true; | ||
294 | } | ||
295 | |||
296 | static int __init camellia_sparc64_mod_init(void) | ||
297 | { | ||
298 | int i; | ||
299 | |||
300 | for (i = 0; i < ARRAY_SIZE(algs); i++) | ||
301 | INIT_LIST_HEAD(&algs[i].cra_list); | ||
302 | |||
303 | if (sparc64_has_camellia_opcode()) { | ||
304 | pr_info("Using sparc64 camellia opcodes optimized CAMELLIA implementation\n"); | ||
305 | return crypto_register_algs(algs, ARRAY_SIZE(algs)); | ||
306 | } | ||
307 | pr_info("sparc64 camellia opcodes not available.\n"); | ||
308 | return -ENODEV; | ||
309 | } | ||
310 | |||
311 | static void __exit camellia_sparc64_mod_fini(void) | ||
312 | { | ||
313 | crypto_unregister_algs(algs, ARRAY_SIZE(algs)); | ||
314 | } | ||
315 | |||
316 | module_init(camellia_sparc64_mod_init); | ||
317 | module_exit(camellia_sparc64_mod_fini); | ||
318 | |||
319 | MODULE_LICENSE("GPL"); | ||
320 | MODULE_DESCRIPTION("Camellia Cipher Algorithm, sparc64 camellia opcode accelerated"); | ||
321 | |||
322 | MODULE_ALIAS("aes"); | ||
diff --git a/arch/sparc/crypto/crc32c_asm.S b/arch/sparc/crypto/crc32c_asm.S new file mode 100644 index 000000000000..2b1976e765b5 --- /dev/null +++ b/arch/sparc/crypto/crc32c_asm.S | |||
@@ -0,0 +1,20 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | #include <asm/visasm.h> | ||
3 | #include <asm/asi.h> | ||
4 | |||
5 | #include "opcodes.h" | ||
6 | |||
7 | ENTRY(crc32c_sparc64) | ||
8 | /* %o0=crc32p, %o1=data_ptr, %o2=len */ | ||
9 | VISEntryHalf | ||
10 | lda [%o0] ASI_PL, %f1 | ||
11 | 1: ldd [%o1], %f2 | ||
12 | CRC32C(0,2,0) | ||
13 | subcc %o2, 8, %o2 | ||
14 | bne,pt %icc, 1b | ||
15 | add %o1, 0x8, %o1 | ||
16 | sta %f1, [%o0] ASI_PL | ||
17 | VISExitHalf | ||
18 | 2: retl | ||
19 | nop | ||
20 | ENDPROC(crc32c_sparc64) | ||
diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glue.c new file mode 100644 index 000000000000..0bd89cea8d8e --- /dev/null +++ b/arch/sparc/crypto/crc32c_glue.c | |||
@@ -0,0 +1,179 @@ | |||
1 | /* Glue code for CRC32C optimized for sparc64 crypto opcodes. | ||
2 | * | ||
3 | * This is based largely upon arch/x86/crypto/crc32c-intel.c | ||
4 | * | ||
5 | * Copyright (C) 2008 Intel Corporation | ||
6 | * Authors: Austin Zhang <austin_zhang@linux.intel.com> | ||
7 | * Kent Liu <kent.liu@intel.com> | ||
8 | */ | ||
9 | |||
10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
11 | |||
12 | #include <linux/init.h> | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/string.h> | ||
15 | #include <linux/kernel.h> | ||
16 | #include <linux/crc32.h> | ||
17 | |||
18 | #include <crypto/internal/hash.h> | ||
19 | |||
20 | #include <asm/pstate.h> | ||
21 | #include <asm/elf.h> | ||
22 | |||
23 | #include "opcodes.h" | ||
24 | |||
25 | /* | ||
26 | * Setting the seed allows arbitrary accumulators and flexible XOR policy | ||
27 | * If your algorithm starts with ~0, then XOR with ~0 before you set | ||
28 | * the seed. | ||
29 | */ | ||
30 | static int crc32c_sparc64_setkey(struct crypto_shash *hash, const u8 *key, | ||
31 | unsigned int keylen) | ||
32 | { | ||
33 | u32 *mctx = crypto_shash_ctx(hash); | ||
34 | |||
35 | if (keylen != sizeof(u32)) { | ||
36 | crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); | ||
37 | return -EINVAL; | ||
38 | } | ||
39 | *(__le32 *)mctx = le32_to_cpup((__le32 *)key); | ||
40 | return 0; | ||
41 | } | ||
42 | |||
43 | static int crc32c_sparc64_init(struct shash_desc *desc) | ||
44 | { | ||
45 | u32 *mctx = crypto_shash_ctx(desc->tfm); | ||
46 | u32 *crcp = shash_desc_ctx(desc); | ||
47 | |||
48 | *crcp = *mctx; | ||
49 | |||
50 | return 0; | ||
51 | } | ||
52 | |||
53 | extern void crc32c_sparc64(u32 *crcp, const u64 *data, unsigned int len); | ||
54 | |||
55 | static void crc32c_compute(u32 *crcp, const u64 *data, unsigned int len) | ||
56 | { | ||
57 | unsigned int asm_len; | ||
58 | |||
59 | asm_len = len & ~7U; | ||
60 | if (asm_len) { | ||
61 | crc32c_sparc64(crcp, data, asm_len); | ||
62 | data += asm_len / 8; | ||
63 | len -= asm_len; | ||
64 | } | ||
65 | if (len) | ||
66 | *crcp = __crc32c_le(*crcp, (const unsigned char *) data, len); | ||
67 | } | ||
68 | |||
69 | static int crc32c_sparc64_update(struct shash_desc *desc, const u8 *data, | ||
70 | unsigned int len) | ||
71 | { | ||
72 | u32 *crcp = shash_desc_ctx(desc); | ||
73 | |||
74 | crc32c_compute(crcp, (const u64 *) data, len); | ||
75 | |||
76 | return 0; | ||
77 | } | ||
78 | |||
79 | static int __crc32c_sparc64_finup(u32 *crcp, const u8 *data, unsigned int len, | ||
80 | u8 *out) | ||
81 | { | ||
82 | u32 tmp = *crcp; | ||
83 | |||
84 | crc32c_compute(&tmp, (const u64 *) data, len); | ||
85 | |||
86 | *(__le32 *) out = ~cpu_to_le32(tmp); | ||
87 | return 0; | ||
88 | } | ||
89 | |||
90 | static int crc32c_sparc64_finup(struct shash_desc *desc, const u8 *data, | ||
91 | unsigned int len, u8 *out) | ||
92 | { | ||
93 | return __crc32c_sparc64_finup(shash_desc_ctx(desc), data, len, out); | ||
94 | } | ||
95 | |||
96 | static int crc32c_sparc64_final(struct shash_desc *desc, u8 *out) | ||
97 | { | ||
98 | u32 *crcp = shash_desc_ctx(desc); | ||
99 | |||
100 | *(__le32 *) out = ~cpu_to_le32p(crcp); | ||
101 | return 0; | ||
102 | } | ||
103 | |||
104 | static int crc32c_sparc64_digest(struct shash_desc *desc, const u8 *data, | ||
105 | unsigned int len, u8 *out) | ||
106 | { | ||
107 | return __crc32c_sparc64_finup(crypto_shash_ctx(desc->tfm), data, len, | ||
108 | out); | ||
109 | } | ||
110 | |||
111 | static int crc32c_sparc64_cra_init(struct crypto_tfm *tfm) | ||
112 | { | ||
113 | u32 *key = crypto_tfm_ctx(tfm); | ||
114 | |||
115 | *key = ~0; | ||
116 | |||
117 | return 0; | ||
118 | } | ||
119 | |||
120 | #define CHKSUM_BLOCK_SIZE 1 | ||
121 | #define CHKSUM_DIGEST_SIZE 4 | ||
122 | |||
123 | static struct shash_alg alg = { | ||
124 | .setkey = crc32c_sparc64_setkey, | ||
125 | .init = crc32c_sparc64_init, | ||
126 | .update = crc32c_sparc64_update, | ||
127 | .final = crc32c_sparc64_final, | ||
128 | .finup = crc32c_sparc64_finup, | ||
129 | .digest = crc32c_sparc64_digest, | ||
130 | .descsize = sizeof(u32), | ||
131 | .digestsize = CHKSUM_DIGEST_SIZE, | ||
132 | .base = { | ||
133 | .cra_name = "crc32c", | ||
134 | .cra_driver_name = "crc32c-sparc64", | ||
135 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
136 | .cra_blocksize = CHKSUM_BLOCK_SIZE, | ||
137 | .cra_ctxsize = sizeof(u32), | ||
138 | .cra_alignmask = 7, | ||
139 | .cra_module = THIS_MODULE, | ||
140 | .cra_init = crc32c_sparc64_cra_init, | ||
141 | } | ||
142 | }; | ||
143 | |||
144 | static bool __init sparc64_has_crc32c_opcode(void) | ||
145 | { | ||
146 | unsigned long cfr; | ||
147 | |||
148 | if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) | ||
149 | return false; | ||
150 | |||
151 | __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); | ||
152 | if (!(cfr & CFR_CRC32C)) | ||
153 | return false; | ||
154 | |||
155 | return true; | ||
156 | } | ||
157 | |||
158 | static int __init crc32c_sparc64_mod_init(void) | ||
159 | { | ||
160 | if (sparc64_has_crc32c_opcode()) { | ||
161 | pr_info("Using sparc64 crc32c opcode optimized CRC32C implementation\n"); | ||
162 | return crypto_register_shash(&alg); | ||
163 | } | ||
164 | pr_info("sparc64 crc32c opcode not available.\n"); | ||
165 | return -ENODEV; | ||
166 | } | ||
167 | |||
168 | static void __exit crc32c_sparc64_mod_fini(void) | ||
169 | { | ||
170 | crypto_unregister_shash(&alg); | ||
171 | } | ||
172 | |||
173 | module_init(crc32c_sparc64_mod_init); | ||
174 | module_exit(crc32c_sparc64_mod_fini); | ||
175 | |||
176 | MODULE_LICENSE("GPL"); | ||
177 | MODULE_DESCRIPTION("CRC32c (Castagnoli), sparc64 crc32c opcode accelerated"); | ||
178 | |||
179 | MODULE_ALIAS("crc32c"); | ||
diff --git a/arch/sparc/crypto/crop_devid.c b/arch/sparc/crypto/crop_devid.c new file mode 100644 index 000000000000..5f5724a0ae22 --- /dev/null +++ b/arch/sparc/crypto/crop_devid.c | |||
@@ -0,0 +1,14 @@ | |||
1 | #include <linux/module.h> | ||
2 | #include <linux/of_device.h> | ||
3 | |||
4 | /* This is a dummy device table linked into all of the crypto | ||
5 | * opcode drivers. It serves to trigger the module autoloading | ||
6 | * mechanisms in userspace which scan the OF device tree and | ||
7 | * load any modules which have device table entries that | ||
8 | * match OF device nodes. | ||
9 | */ | ||
10 | static const struct of_device_id crypto_opcode_match[] = { | ||
11 | { .name = "cpu", .compatible = "sun4v", }, | ||
12 | {}, | ||
13 | }; | ||
14 | MODULE_DEVICE_TABLE(of, crypto_opcode_match); | ||
diff --git a/arch/sparc/crypto/des_asm.S b/arch/sparc/crypto/des_asm.S new file mode 100644 index 000000000000..30b6e90b28b2 --- /dev/null +++ b/arch/sparc/crypto/des_asm.S | |||
@@ -0,0 +1,418 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | #include <asm/visasm.h> | ||
3 | |||
4 | #include "opcodes.h" | ||
5 | |||
6 | .align 32 | ||
7 | ENTRY(des_sparc64_key_expand) | ||
8 | /* %o0=input_key, %o1=output_key */ | ||
9 | VISEntryHalf | ||
10 | ld [%o0 + 0x00], %f0 | ||
11 | ld [%o0 + 0x04], %f1 | ||
12 | DES_KEXPAND(0, 0, 0) | ||
13 | DES_KEXPAND(0, 1, 2) | ||
14 | DES_KEXPAND(2, 3, 6) | ||
15 | DES_KEXPAND(2, 2, 4) | ||
16 | DES_KEXPAND(6, 3, 10) | ||
17 | DES_KEXPAND(6, 2, 8) | ||
18 | DES_KEXPAND(10, 3, 14) | ||
19 | DES_KEXPAND(10, 2, 12) | ||
20 | DES_KEXPAND(14, 1, 16) | ||
21 | DES_KEXPAND(16, 3, 20) | ||
22 | DES_KEXPAND(16, 2, 18) | ||
23 | DES_KEXPAND(20, 3, 24) | ||
24 | DES_KEXPAND(20, 2, 22) | ||
25 | DES_KEXPAND(24, 3, 28) | ||
26 | DES_KEXPAND(24, 2, 26) | ||
27 | DES_KEXPAND(28, 1, 30) | ||
28 | std %f0, [%o1 + 0x00] | ||
29 | std %f2, [%o1 + 0x08] | ||
30 | std %f4, [%o1 + 0x10] | ||
31 | std %f6, [%o1 + 0x18] | ||
32 | std %f8, [%o1 + 0x20] | ||
33 | std %f10, [%o1 + 0x28] | ||
34 | std %f12, [%o1 + 0x30] | ||
35 | std %f14, [%o1 + 0x38] | ||
36 | std %f16, [%o1 + 0x40] | ||
37 | std %f18, [%o1 + 0x48] | ||
38 | std %f20, [%o1 + 0x50] | ||
39 | std %f22, [%o1 + 0x58] | ||
40 | std %f24, [%o1 + 0x60] | ||
41 | std %f26, [%o1 + 0x68] | ||
42 | std %f28, [%o1 + 0x70] | ||
43 | std %f30, [%o1 + 0x78] | ||
44 | retl | ||
45 | VISExitHalf | ||
46 | ENDPROC(des_sparc64_key_expand) | ||
47 | |||
48 | .align 32 | ||
49 | ENTRY(des_sparc64_crypt) | ||
50 | /* %o0=key, %o1=input, %o2=output */ | ||
51 | VISEntry | ||
52 | ldd [%o1 + 0x00], %f32 | ||
53 | ldd [%o0 + 0x00], %f0 | ||
54 | ldd [%o0 + 0x08], %f2 | ||
55 | ldd [%o0 + 0x10], %f4 | ||
56 | ldd [%o0 + 0x18], %f6 | ||
57 | ldd [%o0 + 0x20], %f8 | ||
58 | ldd [%o0 + 0x28], %f10 | ||
59 | ldd [%o0 + 0x30], %f12 | ||
60 | ldd [%o0 + 0x38], %f14 | ||
61 | ldd [%o0 + 0x40], %f16 | ||
62 | ldd [%o0 + 0x48], %f18 | ||
63 | ldd [%o0 + 0x50], %f20 | ||
64 | ldd [%o0 + 0x58], %f22 | ||
65 | ldd [%o0 + 0x60], %f24 | ||
66 | ldd [%o0 + 0x68], %f26 | ||
67 | ldd [%o0 + 0x70], %f28 | ||
68 | ldd [%o0 + 0x78], %f30 | ||
69 | DES_IP(32, 32) | ||
70 | DES_ROUND(0, 2, 32, 32) | ||
71 | DES_ROUND(4, 6, 32, 32) | ||
72 | DES_ROUND(8, 10, 32, 32) | ||
73 | DES_ROUND(12, 14, 32, 32) | ||
74 | DES_ROUND(16, 18, 32, 32) | ||
75 | DES_ROUND(20, 22, 32, 32) | ||
76 | DES_ROUND(24, 26, 32, 32) | ||
77 | DES_ROUND(28, 30, 32, 32) | ||
78 | DES_IIP(32, 32) | ||
79 | std %f32, [%o2 + 0x00] | ||
80 | retl | ||
81 | VISExit | ||
82 | ENDPROC(des_sparc64_crypt) | ||
83 | |||
84 | .align 32 | ||
85 | ENTRY(des_sparc64_load_keys) | ||
86 | /* %o0=key */ | ||
87 | VISEntry | ||
88 | ldd [%o0 + 0x00], %f0 | ||
89 | ldd [%o0 + 0x08], %f2 | ||
90 | ldd [%o0 + 0x10], %f4 | ||
91 | ldd [%o0 + 0x18], %f6 | ||
92 | ldd [%o0 + 0x20], %f8 | ||
93 | ldd [%o0 + 0x28], %f10 | ||
94 | ldd [%o0 + 0x30], %f12 | ||
95 | ldd [%o0 + 0x38], %f14 | ||
96 | ldd [%o0 + 0x40], %f16 | ||
97 | ldd [%o0 + 0x48], %f18 | ||
98 | ldd [%o0 + 0x50], %f20 | ||
99 | ldd [%o0 + 0x58], %f22 | ||
100 | ldd [%o0 + 0x60], %f24 | ||
101 | ldd [%o0 + 0x68], %f26 | ||
102 | ldd [%o0 + 0x70], %f28 | ||
103 | retl | ||
104 | ldd [%o0 + 0x78], %f30 | ||
105 | ENDPROC(des_sparc64_load_keys) | ||
106 | |||
107 | .align 32 | ||
108 | ENTRY(des_sparc64_ecb_crypt) | ||
109 | /* %o0=input, %o1=output, %o2=len */ | ||
110 | 1: ldd [%o0 + 0x00], %f32 | ||
111 | add %o0, 0x08, %o0 | ||
112 | DES_IP(32, 32) | ||
113 | DES_ROUND(0, 2, 32, 32) | ||
114 | DES_ROUND(4, 6, 32, 32) | ||
115 | DES_ROUND(8, 10, 32, 32) | ||
116 | DES_ROUND(12, 14, 32, 32) | ||
117 | DES_ROUND(16, 18, 32, 32) | ||
118 | DES_ROUND(20, 22, 32, 32) | ||
119 | DES_ROUND(24, 26, 32, 32) | ||
120 | DES_ROUND(28, 30, 32, 32) | ||
121 | DES_IIP(32, 32) | ||
122 | std %f32, [%o1 + 0x00] | ||
123 | subcc %o2, 0x08, %o2 | ||
124 | bne,pt %icc, 1b | ||
125 | add %o1, 0x08, %o1 | ||
126 | retl | ||
127 | nop | ||
128 | ENDPROC(des_sparc64_ecb_crypt) | ||
129 | |||
130 | .align 32 | ||
131 | ENTRY(des_sparc64_cbc_encrypt) | ||
132 | /* %o0=input, %o1=output, %o2=len, %o3=IV */ | ||
133 | ldd [%o3 + 0x00], %f32 | ||
134 | 1: ldd [%o0 + 0x00], %f34 | ||
135 | fxor %f32, %f34, %f32 | ||
136 | DES_IP(32, 32) | ||
137 | DES_ROUND(0, 2, 32, 32) | ||
138 | DES_ROUND(4, 6, 32, 32) | ||
139 | DES_ROUND(8, 10, 32, 32) | ||
140 | DES_ROUND(12, 14, 32, 32) | ||
141 | DES_ROUND(16, 18, 32, 32) | ||
142 | DES_ROUND(20, 22, 32, 32) | ||
143 | DES_ROUND(24, 26, 32, 32) | ||
144 | DES_ROUND(28, 30, 32, 32) | ||
145 | DES_IIP(32, 32) | ||
146 | std %f32, [%o1 + 0x00] | ||
147 | add %o0, 0x08, %o0 | ||
148 | subcc %o2, 0x08, %o2 | ||
149 | bne,pt %icc, 1b | ||
150 | add %o1, 0x08, %o1 | ||
151 | retl | ||
152 | std %f32, [%o3 + 0x00] | ||
153 | ENDPROC(des_sparc64_cbc_encrypt) | ||
154 | |||
155 | .align 32 | ||
156 | ENTRY(des_sparc64_cbc_decrypt) | ||
157 | /* %o0=input, %o1=output, %o2=len, %o3=IV */ | ||
158 | ldd [%o3 + 0x00], %f34 | ||
159 | 1: ldd [%o0 + 0x00], %f36 | ||
160 | DES_IP(36, 32) | ||
161 | DES_ROUND(0, 2, 32, 32) | ||
162 | DES_ROUND(4, 6, 32, 32) | ||
163 | DES_ROUND(8, 10, 32, 32) | ||
164 | DES_ROUND(12, 14, 32, 32) | ||
165 | DES_ROUND(16, 18, 32, 32) | ||
166 | DES_ROUND(20, 22, 32, 32) | ||
167 | DES_ROUND(24, 26, 32, 32) | ||
168 | DES_ROUND(28, 30, 32, 32) | ||
169 | DES_IIP(32, 32) | ||
170 | fxor %f32, %f34, %f32 | ||
171 | fsrc2 %f36, %f34 | ||
172 | std %f32, [%o1 + 0x00] | ||
173 | add %o0, 0x08, %o0 | ||
174 | subcc %o2, 0x08, %o2 | ||
175 | bne,pt %icc, 1b | ||
176 | add %o1, 0x08, %o1 | ||
177 | retl | ||
178 | std %f36, [%o3 + 0x00] | ||
179 | ENDPROC(des_sparc64_cbc_decrypt) | ||
180 | |||
181 | .align 32 | ||
182 | ENTRY(des3_ede_sparc64_crypt) | ||
183 | /* %o0=key, %o1=input, %o2=output */ | ||
184 | VISEntry | ||
185 | ldd [%o1 + 0x00], %f32 | ||
186 | ldd [%o0 + 0x00], %f0 | ||
187 | ldd [%o0 + 0x08], %f2 | ||
188 | ldd [%o0 + 0x10], %f4 | ||
189 | ldd [%o0 + 0x18], %f6 | ||
190 | ldd [%o0 + 0x20], %f8 | ||
191 | ldd [%o0 + 0x28], %f10 | ||
192 | ldd [%o0 + 0x30], %f12 | ||
193 | ldd [%o0 + 0x38], %f14 | ||
194 | ldd [%o0 + 0x40], %f16 | ||
195 | ldd [%o0 + 0x48], %f18 | ||
196 | ldd [%o0 + 0x50], %f20 | ||
197 | ldd [%o0 + 0x58], %f22 | ||
198 | ldd [%o0 + 0x60], %f24 | ||
199 | ldd [%o0 + 0x68], %f26 | ||
200 | ldd [%o0 + 0x70], %f28 | ||
201 | ldd [%o0 + 0x78], %f30 | ||
202 | DES_IP(32, 32) | ||
203 | DES_ROUND(0, 2, 32, 32) | ||
204 | ldd [%o0 + 0x80], %f0 | ||
205 | ldd [%o0 + 0x88], %f2 | ||
206 | DES_ROUND(4, 6, 32, 32) | ||
207 | ldd [%o0 + 0x90], %f4 | ||
208 | ldd [%o0 + 0x98], %f6 | ||
209 | DES_ROUND(8, 10, 32, 32) | ||
210 | ldd [%o0 + 0xa0], %f8 | ||
211 | ldd [%o0 + 0xa8], %f10 | ||
212 | DES_ROUND(12, 14, 32, 32) | ||
213 | ldd [%o0 + 0xb0], %f12 | ||
214 | ldd [%o0 + 0xb8], %f14 | ||
215 | DES_ROUND(16, 18, 32, 32) | ||
216 | ldd [%o0 + 0xc0], %f16 | ||
217 | ldd [%o0 + 0xc8], %f18 | ||
218 | DES_ROUND(20, 22, 32, 32) | ||
219 | ldd [%o0 + 0xd0], %f20 | ||
220 | ldd [%o0 + 0xd8], %f22 | ||
221 | DES_ROUND(24, 26, 32, 32) | ||
222 | ldd [%o0 + 0xe0], %f24 | ||
223 | ldd [%o0 + 0xe8], %f26 | ||
224 | DES_ROUND(28, 30, 32, 32) | ||
225 | ldd [%o0 + 0xf0], %f28 | ||
226 | ldd [%o0 + 0xf8], %f30 | ||
227 | DES_IIP(32, 32) | ||
228 | DES_IP(32, 32) | ||
229 | DES_ROUND(0, 2, 32, 32) | ||
230 | ldd [%o0 + 0x100], %f0 | ||
231 | ldd [%o0 + 0x108], %f2 | ||
232 | DES_ROUND(4, 6, 32, 32) | ||
233 | ldd [%o0 + 0x110], %f4 | ||
234 | ldd [%o0 + 0x118], %f6 | ||
235 | DES_ROUND(8, 10, 32, 32) | ||
236 | ldd [%o0 + 0x120], %f8 | ||
237 | ldd [%o0 + 0x128], %f10 | ||
238 | DES_ROUND(12, 14, 32, 32) | ||
239 | ldd [%o0 + 0x130], %f12 | ||
240 | ldd [%o0 + 0x138], %f14 | ||
241 | DES_ROUND(16, 18, 32, 32) | ||
242 | ldd [%o0 + 0x140], %f16 | ||
243 | ldd [%o0 + 0x148], %f18 | ||
244 | DES_ROUND(20, 22, 32, 32) | ||
245 | ldd [%o0 + 0x150], %f20 | ||
246 | ldd [%o0 + 0x158], %f22 | ||
247 | DES_ROUND(24, 26, 32, 32) | ||
248 | ldd [%o0 + 0x160], %f24 | ||
249 | ldd [%o0 + 0x168], %f26 | ||
250 | DES_ROUND(28, 30, 32, 32) | ||
251 | ldd [%o0 + 0x170], %f28 | ||
252 | ldd [%o0 + 0x178], %f30 | ||
253 | DES_IIP(32, 32) | ||
254 | DES_IP(32, 32) | ||
255 | DES_ROUND(0, 2, 32, 32) | ||
256 | DES_ROUND(4, 6, 32, 32) | ||
257 | DES_ROUND(8, 10, 32, 32) | ||
258 | DES_ROUND(12, 14, 32, 32) | ||
259 | DES_ROUND(16, 18, 32, 32) | ||
260 | DES_ROUND(20, 22, 32, 32) | ||
261 | DES_ROUND(24, 26, 32, 32) | ||
262 | DES_ROUND(28, 30, 32, 32) | ||
263 | DES_IIP(32, 32) | ||
264 | |||
265 | std %f32, [%o2 + 0x00] | ||
266 | retl | ||
267 | VISExit | ||
268 | ENDPROC(des3_ede_sparc64_crypt) | ||
269 | |||
270 | .align 32 | ||
271 | ENTRY(des3_ede_sparc64_load_keys) | ||
272 | /* %o0=key */ | ||
273 | VISEntry | ||
274 | ldd [%o0 + 0x00], %f0 | ||
275 | ldd [%o0 + 0x08], %f2 | ||
276 | ldd [%o0 + 0x10], %f4 | ||
277 | ldd [%o0 + 0x18], %f6 | ||
278 | ldd [%o0 + 0x20], %f8 | ||
279 | ldd [%o0 + 0x28], %f10 | ||
280 | ldd [%o0 + 0x30], %f12 | ||
281 | ldd [%o0 + 0x38], %f14 | ||
282 | ldd [%o0 + 0x40], %f16 | ||
283 | ldd [%o0 + 0x48], %f18 | ||
284 | ldd [%o0 + 0x50], %f20 | ||
285 | ldd [%o0 + 0x58], %f22 | ||
286 | ldd [%o0 + 0x60], %f24 | ||
287 | ldd [%o0 + 0x68], %f26 | ||
288 | ldd [%o0 + 0x70], %f28 | ||
289 | ldd [%o0 + 0x78], %f30 | ||
290 | ldd [%o0 + 0x80], %f32 | ||
291 | ldd [%o0 + 0x88], %f34 | ||
292 | ldd [%o0 + 0x90], %f36 | ||
293 | ldd [%o0 + 0x98], %f38 | ||
294 | ldd [%o0 + 0xa0], %f40 | ||
295 | ldd [%o0 + 0xa8], %f42 | ||
296 | ldd [%o0 + 0xb0], %f44 | ||
297 | ldd [%o0 + 0xb8], %f46 | ||
298 | ldd [%o0 + 0xc0], %f48 | ||
299 | ldd [%o0 + 0xc8], %f50 | ||
300 | ldd [%o0 + 0xd0], %f52 | ||
301 | ldd [%o0 + 0xd8], %f54 | ||
302 | ldd [%o0 + 0xe0], %f56 | ||
303 | retl | ||
304 | ldd [%o0 + 0xe8], %f58 | ||
305 | ENDPROC(des3_ede_sparc64_load_keys) | ||
306 | |||
307 | #define DES3_LOOP_BODY(X) \ | ||
308 | DES_IP(X, X) \ | ||
309 | DES_ROUND(0, 2, X, X) \ | ||
310 | DES_ROUND(4, 6, X, X) \ | ||
311 | DES_ROUND(8, 10, X, X) \ | ||
312 | DES_ROUND(12, 14, X, X) \ | ||
313 | DES_ROUND(16, 18, X, X) \ | ||
314 | ldd [%o0 + 0xf0], %f16; \ | ||
315 | ldd [%o0 + 0xf8], %f18; \ | ||
316 | DES_ROUND(20, 22, X, X) \ | ||
317 | ldd [%o0 + 0x100], %f20; \ | ||
318 | ldd [%o0 + 0x108], %f22; \ | ||
319 | DES_ROUND(24, 26, X, X) \ | ||
320 | ldd [%o0 + 0x110], %f24; \ | ||
321 | ldd [%o0 + 0x118], %f26; \ | ||
322 | DES_ROUND(28, 30, X, X) \ | ||
323 | ldd [%o0 + 0x120], %f28; \ | ||
324 | ldd [%o0 + 0x128], %f30; \ | ||
325 | DES_IIP(X, X) \ | ||
326 | DES_IP(X, X) \ | ||
327 | DES_ROUND(32, 34, X, X) \ | ||
328 | ldd [%o0 + 0x130], %f0; \ | ||
329 | ldd [%o0 + 0x138], %f2; \ | ||
330 | DES_ROUND(36, 38, X, X) \ | ||
331 | ldd [%o0 + 0x140], %f4; \ | ||
332 | ldd [%o0 + 0x148], %f6; \ | ||
333 | DES_ROUND(40, 42, X, X) \ | ||
334 | ldd [%o0 + 0x150], %f8; \ | ||
335 | ldd [%o0 + 0x158], %f10; \ | ||
336 | DES_ROUND(44, 46, X, X) \ | ||
337 | ldd [%o0 + 0x160], %f12; \ | ||
338 | ldd [%o0 + 0x168], %f14; \ | ||
339 | DES_ROUND(48, 50, X, X) \ | ||
340 | DES_ROUND(52, 54, X, X) \ | ||
341 | DES_ROUND(56, 58, X, X) \ | ||
342 | DES_ROUND(16, 18, X, X) \ | ||
343 | ldd [%o0 + 0x170], %f16; \ | ||
344 | ldd [%o0 + 0x178], %f18; \ | ||
345 | DES_IIP(X, X) \ | ||
346 | DES_IP(X, X) \ | ||
347 | DES_ROUND(20, 22, X, X) \ | ||
348 | ldd [%o0 + 0x50], %f20; \ | ||
349 | ldd [%o0 + 0x58], %f22; \ | ||
350 | DES_ROUND(24, 26, X, X) \ | ||
351 | ldd [%o0 + 0x60], %f24; \ | ||
352 | ldd [%o0 + 0x68], %f26; \ | ||
353 | DES_ROUND(28, 30, X, X) \ | ||
354 | ldd [%o0 + 0x70], %f28; \ | ||
355 | ldd [%o0 + 0x78], %f30; \ | ||
356 | DES_ROUND(0, 2, X, X) \ | ||
357 | ldd [%o0 + 0x00], %f0; \ | ||
358 | ldd [%o0 + 0x08], %f2; \ | ||
359 | DES_ROUND(4, 6, X, X) \ | ||
360 | ldd [%o0 + 0x10], %f4; \ | ||
361 | ldd [%o0 + 0x18], %f6; \ | ||
362 | DES_ROUND(8, 10, X, X) \ | ||
363 | ldd [%o0 + 0x20], %f8; \ | ||
364 | ldd [%o0 + 0x28], %f10; \ | ||
365 | DES_ROUND(12, 14, X, X) \ | ||
366 | ldd [%o0 + 0x30], %f12; \ | ||
367 | ldd [%o0 + 0x38], %f14; \ | ||
368 | DES_ROUND(16, 18, X, X) \ | ||
369 | ldd [%o0 + 0x40], %f16; \ | ||
370 | ldd [%o0 + 0x48], %f18; \ | ||
371 | DES_IIP(X, X) | ||
372 | |||
373 | .align 32 | ||
374 | ENTRY(des3_ede_sparc64_ecb_crypt) | ||
375 | /* %o0=key, %o1=input, %o2=output, %o3=len */ | ||
376 | 1: ldd [%o1 + 0x00], %f60 | ||
377 | DES3_LOOP_BODY(60) | ||
378 | std %f60, [%o2 + 0x00] | ||
379 | subcc %o3, 0x08, %o3 | ||
380 | bne,pt %icc, 1b | ||
381 | add %o2, 0x08, %o2 | ||
382 | retl | ||
383 | nop | ||
384 | ENDPROC(des3_ede_sparc64_ecb_crypt) | ||
385 | |||
386 | .align 32 | ||
387 | ENTRY(des3_ede_sparc64_cbc_encrypt) | ||
388 | /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ | ||
389 | ldd [%o4 + 0x00], %f60 | ||
390 | 1: ldd [%o1 + 0x00], %f62 | ||
391 | fxor %f60, %f62, %f60 | ||
392 | DES3_LOOP_BODY(60) | ||
393 | std %f60, [%o2 + 0x00] | ||
394 | add %o1, 0x08, %o1 | ||
395 | subcc %o3, 0x08, %o3 | ||
396 | bne,pt %icc, 1b | ||
397 | add %o2, 0x08, %o2 | ||
398 | retl | ||
399 | std %f60, [%o4 + 0x00] | ||
400 | ENDPROC(des3_ede_sparc64_cbc_encrypt) | ||
401 | |||
402 | .align 32 | ||
403 | ENTRY(des3_ede_sparc64_cbc_decrypt) | ||
404 | /* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */ | ||
405 | ldd [%o4 + 0x00], %f62 | ||
406 | 1: ldx [%o1 + 0x00], %g1 | ||
407 | MOVXTOD_G1_F60 | ||
408 | DES3_LOOP_BODY(60) | ||
409 | fxor %f62, %f60, %f60 | ||
410 | MOVXTOD_G1_F62 | ||
411 | std %f60, [%o2 + 0x00] | ||
412 | add %o1, 0x08, %o1 | ||
413 | subcc %o3, 0x08, %o3 | ||
414 | bne,pt %icc, 1b | ||
415 | add %o2, 0x08, %o2 | ||
416 | retl | ||
417 | stx %g1, [%o4 + 0x00] | ||
418 | ENDPROC(des3_ede_sparc64_cbc_decrypt) | ||
diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c new file mode 100644 index 000000000000..c4940c2d3073 --- /dev/null +++ b/arch/sparc/crypto/des_glue.c | |||
@@ -0,0 +1,529 @@ | |||
1 | /* Glue code for DES encryption optimized for sparc64 crypto opcodes. | ||
2 | * | ||
3 | * Copyright (C) 2012 David S. Miller <davem@davemloft.net> | ||
4 | */ | ||
5 | |||
6 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
7 | |||
8 | #include <linux/crypto.h> | ||
9 | #include <linux/init.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <linux/mm.h> | ||
12 | #include <linux/types.h> | ||
13 | #include <crypto/algapi.h> | ||
14 | #include <crypto/des.h> | ||
15 | |||
16 | #include <asm/fpumacro.h> | ||
17 | #include <asm/pstate.h> | ||
18 | #include <asm/elf.h> | ||
19 | |||
20 | #include "opcodes.h" | ||
21 | |||
22 | struct des_sparc64_ctx { | ||
23 | u64 encrypt_expkey[DES_EXPKEY_WORDS / 2]; | ||
24 | u64 decrypt_expkey[DES_EXPKEY_WORDS / 2]; | ||
25 | }; | ||
26 | |||
27 | struct des3_ede_sparc64_ctx { | ||
28 | u64 encrypt_expkey[DES3_EDE_EXPKEY_WORDS / 2]; | ||
29 | u64 decrypt_expkey[DES3_EDE_EXPKEY_WORDS / 2]; | ||
30 | }; | ||
31 | |||
32 | static void encrypt_to_decrypt(u64 *d, const u64 *e) | ||
33 | { | ||
34 | const u64 *s = e + (DES_EXPKEY_WORDS / 2) - 1; | ||
35 | int i; | ||
36 | |||
37 | for (i = 0; i < DES_EXPKEY_WORDS / 2; i++) | ||
38 | *d++ = *s--; | ||
39 | } | ||
40 | |||
41 | extern void des_sparc64_key_expand(const u32 *input_key, u64 *key); | ||
42 | |||
43 | static int des_set_key(struct crypto_tfm *tfm, const u8 *key, | ||
44 | unsigned int keylen) | ||
45 | { | ||
46 | struct des_sparc64_ctx *dctx = crypto_tfm_ctx(tfm); | ||
47 | u32 *flags = &tfm->crt_flags; | ||
48 | u32 tmp[DES_EXPKEY_WORDS]; | ||
49 | int ret; | ||
50 | |||
51 | /* Even though we have special instructions for key expansion, | ||
52 | * we call des_ekey() so that we don't have to write our own | ||
53 | * weak key detection code. | ||
54 | */ | ||
55 | ret = des_ekey(tmp, key); | ||
56 | if (unlikely(ret == 0) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { | ||
57 | *flags |= CRYPTO_TFM_RES_WEAK_KEY; | ||
58 | return -EINVAL; | ||
59 | } | ||
60 | |||
61 | des_sparc64_key_expand((const u32 *) key, &dctx->encrypt_expkey[0]); | ||
62 | encrypt_to_decrypt(&dctx->decrypt_expkey[0], &dctx->encrypt_expkey[0]); | ||
63 | |||
64 | return 0; | ||
65 | } | ||
66 | |||
67 | extern void des_sparc64_crypt(const u64 *key, const u64 *input, | ||
68 | u64 *output); | ||
69 | |||
70 | static void des_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
71 | { | ||
72 | struct des_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); | ||
73 | const u64 *K = ctx->encrypt_expkey; | ||
74 | |||
75 | des_sparc64_crypt(K, (const u64 *) src, (u64 *) dst); | ||
76 | } | ||
77 | |||
78 | static void des_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
79 | { | ||
80 | struct des_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); | ||
81 | const u64 *K = ctx->decrypt_expkey; | ||
82 | |||
83 | des_sparc64_crypt(K, (const u64 *) src, (u64 *) dst); | ||
84 | } | ||
85 | |||
86 | extern void des_sparc64_load_keys(const u64 *key); | ||
87 | |||
88 | extern void des_sparc64_ecb_crypt(const u64 *input, u64 *output, | ||
89 | unsigned int len); | ||
90 | |||
91 | #define DES_BLOCK_MASK (~(DES_BLOCK_SIZE - 1)) | ||
92 | |||
93 | static int __ecb_crypt(struct blkcipher_desc *desc, | ||
94 | struct scatterlist *dst, struct scatterlist *src, | ||
95 | unsigned int nbytes, bool encrypt) | ||
96 | { | ||
97 | struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
98 | struct blkcipher_walk walk; | ||
99 | int err; | ||
100 | |||
101 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
102 | err = blkcipher_walk_virt(desc, &walk); | ||
103 | |||
104 | if (encrypt) | ||
105 | des_sparc64_load_keys(&ctx->encrypt_expkey[0]); | ||
106 | else | ||
107 | des_sparc64_load_keys(&ctx->decrypt_expkey[0]); | ||
108 | while ((nbytes = walk.nbytes)) { | ||
109 | unsigned int block_len = nbytes & DES_BLOCK_MASK; | ||
110 | |||
111 | if (likely(block_len)) { | ||
112 | des_sparc64_ecb_crypt((const u64 *)walk.src.virt.addr, | ||
113 | (u64 *) walk.dst.virt.addr, | ||
114 | block_len); | ||
115 | } | ||
116 | nbytes &= DES_BLOCK_SIZE - 1; | ||
117 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
118 | } | ||
119 | fprs_write(0); | ||
120 | return err; | ||
121 | } | ||
122 | |||
123 | static int ecb_encrypt(struct blkcipher_desc *desc, | ||
124 | struct scatterlist *dst, struct scatterlist *src, | ||
125 | unsigned int nbytes) | ||
126 | { | ||
127 | return __ecb_crypt(desc, dst, src, nbytes, true); | ||
128 | } | ||
129 | |||
130 | static int ecb_decrypt(struct blkcipher_desc *desc, | ||
131 | struct scatterlist *dst, struct scatterlist *src, | ||
132 | unsigned int nbytes) | ||
133 | { | ||
134 | return __ecb_crypt(desc, dst, src, nbytes, false); | ||
135 | } | ||
136 | |||
137 | extern void des_sparc64_cbc_encrypt(const u64 *input, u64 *output, | ||
138 | unsigned int len, u64 *iv); | ||
139 | |||
140 | static int cbc_encrypt(struct blkcipher_desc *desc, | ||
141 | struct scatterlist *dst, struct scatterlist *src, | ||
142 | unsigned int nbytes) | ||
143 | { | ||
144 | struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
145 | struct blkcipher_walk walk; | ||
146 | int err; | ||
147 | |||
148 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
149 | err = blkcipher_walk_virt(desc, &walk); | ||
150 | |||
151 | des_sparc64_load_keys(&ctx->encrypt_expkey[0]); | ||
152 | while ((nbytes = walk.nbytes)) { | ||
153 | unsigned int block_len = nbytes & DES_BLOCK_MASK; | ||
154 | |||
155 | if (likely(block_len)) { | ||
156 | des_sparc64_cbc_encrypt((const u64 *)walk.src.virt.addr, | ||
157 | (u64 *) walk.dst.virt.addr, | ||
158 | block_len, (u64 *) walk.iv); | ||
159 | } | ||
160 | nbytes &= DES_BLOCK_SIZE - 1; | ||
161 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
162 | } | ||
163 | fprs_write(0); | ||
164 | return err; | ||
165 | } | ||
166 | |||
167 | extern void des_sparc64_cbc_decrypt(const u64 *input, u64 *output, | ||
168 | unsigned int len, u64 *iv); | ||
169 | |||
170 | static int cbc_decrypt(struct blkcipher_desc *desc, | ||
171 | struct scatterlist *dst, struct scatterlist *src, | ||
172 | unsigned int nbytes) | ||
173 | { | ||
174 | struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
175 | struct blkcipher_walk walk; | ||
176 | int err; | ||
177 | |||
178 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
179 | err = blkcipher_walk_virt(desc, &walk); | ||
180 | |||
181 | des_sparc64_load_keys(&ctx->decrypt_expkey[0]); | ||
182 | while ((nbytes = walk.nbytes)) { | ||
183 | unsigned int block_len = nbytes & DES_BLOCK_MASK; | ||
184 | |||
185 | if (likely(block_len)) { | ||
186 | des_sparc64_cbc_decrypt((const u64 *)walk.src.virt.addr, | ||
187 | (u64 *) walk.dst.virt.addr, | ||
188 | block_len, (u64 *) walk.iv); | ||
189 | } | ||
190 | nbytes &= DES_BLOCK_SIZE - 1; | ||
191 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
192 | } | ||
193 | fprs_write(0); | ||
194 | return err; | ||
195 | } | ||
196 | |||
197 | static int des3_ede_set_key(struct crypto_tfm *tfm, const u8 *key, | ||
198 | unsigned int keylen) | ||
199 | { | ||
200 | struct des3_ede_sparc64_ctx *dctx = crypto_tfm_ctx(tfm); | ||
201 | const u32 *K = (const u32 *)key; | ||
202 | u32 *flags = &tfm->crt_flags; | ||
203 | u64 k1[DES_EXPKEY_WORDS / 2]; | ||
204 | u64 k2[DES_EXPKEY_WORDS / 2]; | ||
205 | u64 k3[DES_EXPKEY_WORDS / 2]; | ||
206 | |||
207 | if (unlikely(!((K[0] ^ K[2]) | (K[1] ^ K[3])) || | ||
208 | !((K[2] ^ K[4]) | (K[3] ^ K[5]))) && | ||
209 | (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { | ||
210 | *flags |= CRYPTO_TFM_RES_WEAK_KEY; | ||
211 | return -EINVAL; | ||
212 | } | ||
213 | |||
214 | des_sparc64_key_expand((const u32 *)key, k1); | ||
215 | key += DES_KEY_SIZE; | ||
216 | des_sparc64_key_expand((const u32 *)key, k2); | ||
217 | key += DES_KEY_SIZE; | ||
218 | des_sparc64_key_expand((const u32 *)key, k3); | ||
219 | |||
220 | memcpy(&dctx->encrypt_expkey[0], &k1[0], sizeof(k1)); | ||
221 | encrypt_to_decrypt(&dctx->encrypt_expkey[DES_EXPKEY_WORDS / 2], &k2[0]); | ||
222 | memcpy(&dctx->encrypt_expkey[(DES_EXPKEY_WORDS / 2) * 2], | ||
223 | &k3[0], sizeof(k3)); | ||
224 | |||
225 | encrypt_to_decrypt(&dctx->decrypt_expkey[0], &k3[0]); | ||
226 | memcpy(&dctx->decrypt_expkey[DES_EXPKEY_WORDS / 2], | ||
227 | &k2[0], sizeof(k2)); | ||
228 | encrypt_to_decrypt(&dctx->decrypt_expkey[(DES_EXPKEY_WORDS / 2) * 2], | ||
229 | &k1[0]); | ||
230 | |||
231 | return 0; | ||
232 | } | ||
233 | |||
234 | extern void des3_ede_sparc64_crypt(const u64 *key, const u64 *input, | ||
235 | u64 *output); | ||
236 | |||
237 | static void des3_ede_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
238 | { | ||
239 | struct des3_ede_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); | ||
240 | const u64 *K = ctx->encrypt_expkey; | ||
241 | |||
242 | des3_ede_sparc64_crypt(K, (const u64 *) src, (u64 *) dst); | ||
243 | } | ||
244 | |||
245 | static void des3_ede_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
246 | { | ||
247 | struct des3_ede_sparc64_ctx *ctx = crypto_tfm_ctx(tfm); | ||
248 | const u64 *K = ctx->decrypt_expkey; | ||
249 | |||
250 | des3_ede_sparc64_crypt(K, (const u64 *) src, (u64 *) dst); | ||
251 | } | ||
252 | |||
253 | extern void des3_ede_sparc64_load_keys(const u64 *key); | ||
254 | |||
255 | extern void des3_ede_sparc64_ecb_crypt(const u64 *expkey, const u64 *input, | ||
256 | u64 *output, unsigned int len); | ||
257 | |||
258 | static int __ecb3_crypt(struct blkcipher_desc *desc, | ||
259 | struct scatterlist *dst, struct scatterlist *src, | ||
260 | unsigned int nbytes, bool encrypt) | ||
261 | { | ||
262 | struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
263 | struct blkcipher_walk walk; | ||
264 | const u64 *K; | ||
265 | int err; | ||
266 | |||
267 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
268 | err = blkcipher_walk_virt(desc, &walk); | ||
269 | |||
270 | if (encrypt) | ||
271 | K = &ctx->encrypt_expkey[0]; | ||
272 | else | ||
273 | K = &ctx->decrypt_expkey[0]; | ||
274 | des3_ede_sparc64_load_keys(K); | ||
275 | while ((nbytes = walk.nbytes)) { | ||
276 | unsigned int block_len = nbytes & DES_BLOCK_MASK; | ||
277 | |||
278 | if (likely(block_len)) { | ||
279 | const u64 *src64 = (const u64 *)walk.src.virt.addr; | ||
280 | des3_ede_sparc64_ecb_crypt(K, src64, | ||
281 | (u64 *) walk.dst.virt.addr, | ||
282 | block_len); | ||
283 | } | ||
284 | nbytes &= DES_BLOCK_SIZE - 1; | ||
285 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
286 | } | ||
287 | fprs_write(0); | ||
288 | return err; | ||
289 | } | ||
290 | |||
291 | static int ecb3_encrypt(struct blkcipher_desc *desc, | ||
292 | struct scatterlist *dst, struct scatterlist *src, | ||
293 | unsigned int nbytes) | ||
294 | { | ||
295 | return __ecb3_crypt(desc, dst, src, nbytes, true); | ||
296 | } | ||
297 | |||
298 | static int ecb3_decrypt(struct blkcipher_desc *desc, | ||
299 | struct scatterlist *dst, struct scatterlist *src, | ||
300 | unsigned int nbytes) | ||
301 | { | ||
302 | return __ecb3_crypt(desc, dst, src, nbytes, false); | ||
303 | } | ||
304 | |||
305 | extern void des3_ede_sparc64_cbc_encrypt(const u64 *expkey, const u64 *input, | ||
306 | u64 *output, unsigned int len, | ||
307 | u64 *iv); | ||
308 | |||
309 | static int cbc3_encrypt(struct blkcipher_desc *desc, | ||
310 | struct scatterlist *dst, struct scatterlist *src, | ||
311 | unsigned int nbytes) | ||
312 | { | ||
313 | struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
314 | struct blkcipher_walk walk; | ||
315 | const u64 *K; | ||
316 | int err; | ||
317 | |||
318 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
319 | err = blkcipher_walk_virt(desc, &walk); | ||
320 | |||
321 | K = &ctx->encrypt_expkey[0]; | ||
322 | des3_ede_sparc64_load_keys(K); | ||
323 | while ((nbytes = walk.nbytes)) { | ||
324 | unsigned int block_len = nbytes & DES_BLOCK_MASK; | ||
325 | |||
326 | if (likely(block_len)) { | ||
327 | const u64 *src64 = (const u64 *)walk.src.virt.addr; | ||
328 | des3_ede_sparc64_cbc_encrypt(K, src64, | ||
329 | (u64 *) walk.dst.virt.addr, | ||
330 | block_len, | ||
331 | (u64 *) walk.iv); | ||
332 | } | ||
333 | nbytes &= DES_BLOCK_SIZE - 1; | ||
334 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
335 | } | ||
336 | fprs_write(0); | ||
337 | return err; | ||
338 | } | ||
339 | |||
340 | extern void des3_ede_sparc64_cbc_decrypt(const u64 *expkey, const u64 *input, | ||
341 | u64 *output, unsigned int len, | ||
342 | u64 *iv); | ||
343 | |||
344 | static int cbc3_decrypt(struct blkcipher_desc *desc, | ||
345 | struct scatterlist *dst, struct scatterlist *src, | ||
346 | unsigned int nbytes) | ||
347 | { | ||
348 | struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
349 | struct blkcipher_walk walk; | ||
350 | const u64 *K; | ||
351 | int err; | ||
352 | |||
353 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
354 | err = blkcipher_walk_virt(desc, &walk); | ||
355 | |||
356 | K = &ctx->decrypt_expkey[0]; | ||
357 | des3_ede_sparc64_load_keys(K); | ||
358 | while ((nbytes = walk.nbytes)) { | ||
359 | unsigned int block_len = nbytes & DES_BLOCK_MASK; | ||
360 | |||
361 | if (likely(block_len)) { | ||
362 | const u64 *src64 = (const u64 *)walk.src.virt.addr; | ||
363 | des3_ede_sparc64_cbc_decrypt(K, src64, | ||
364 | (u64 *) walk.dst.virt.addr, | ||
365 | block_len, | ||
366 | (u64 *) walk.iv); | ||
367 | } | ||
368 | nbytes &= DES_BLOCK_SIZE - 1; | ||
369 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
370 | } | ||
371 | fprs_write(0); | ||
372 | return err; | ||
373 | } | ||
374 | |||
375 | static struct crypto_alg algs[] = { { | ||
376 | .cra_name = "des", | ||
377 | .cra_driver_name = "des-sparc64", | ||
378 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
379 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | ||
380 | .cra_blocksize = DES_BLOCK_SIZE, | ||
381 | .cra_ctxsize = sizeof(struct des_sparc64_ctx), | ||
382 | .cra_alignmask = 7, | ||
383 | .cra_module = THIS_MODULE, | ||
384 | .cra_u = { | ||
385 | .cipher = { | ||
386 | .cia_min_keysize = DES_KEY_SIZE, | ||
387 | .cia_max_keysize = DES_KEY_SIZE, | ||
388 | .cia_setkey = des_set_key, | ||
389 | .cia_encrypt = des_encrypt, | ||
390 | .cia_decrypt = des_decrypt | ||
391 | } | ||
392 | } | ||
393 | }, { | ||
394 | .cra_name = "ecb(des)", | ||
395 | .cra_driver_name = "ecb-des-sparc64", | ||
396 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
397 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
398 | .cra_blocksize = DES_BLOCK_SIZE, | ||
399 | .cra_ctxsize = sizeof(struct des_sparc64_ctx), | ||
400 | .cra_alignmask = 7, | ||
401 | .cra_type = &crypto_blkcipher_type, | ||
402 | .cra_module = THIS_MODULE, | ||
403 | .cra_u = { | ||
404 | .blkcipher = { | ||
405 | .min_keysize = DES_KEY_SIZE, | ||
406 | .max_keysize = DES_KEY_SIZE, | ||
407 | .setkey = des_set_key, | ||
408 | .encrypt = ecb_encrypt, | ||
409 | .decrypt = ecb_decrypt, | ||
410 | }, | ||
411 | }, | ||
412 | }, { | ||
413 | .cra_name = "cbc(des)", | ||
414 | .cra_driver_name = "cbc-des-sparc64", | ||
415 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
416 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
417 | .cra_blocksize = DES_BLOCK_SIZE, | ||
418 | .cra_ctxsize = sizeof(struct des_sparc64_ctx), | ||
419 | .cra_alignmask = 7, | ||
420 | .cra_type = &crypto_blkcipher_type, | ||
421 | .cra_module = THIS_MODULE, | ||
422 | .cra_u = { | ||
423 | .blkcipher = { | ||
424 | .min_keysize = DES_KEY_SIZE, | ||
425 | .max_keysize = DES_KEY_SIZE, | ||
426 | .setkey = des_set_key, | ||
427 | .encrypt = cbc_encrypt, | ||
428 | .decrypt = cbc_decrypt, | ||
429 | }, | ||
430 | }, | ||
431 | }, { | ||
432 | .cra_name = "des3_ede", | ||
433 | .cra_driver_name = "des3_ede-sparc64", | ||
434 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
435 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | ||
436 | .cra_blocksize = DES3_EDE_BLOCK_SIZE, | ||
437 | .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx), | ||
438 | .cra_alignmask = 7, | ||
439 | .cra_module = THIS_MODULE, | ||
440 | .cra_u = { | ||
441 | .cipher = { | ||
442 | .cia_min_keysize = DES3_EDE_KEY_SIZE, | ||
443 | .cia_max_keysize = DES3_EDE_KEY_SIZE, | ||
444 | .cia_setkey = des3_ede_set_key, | ||
445 | .cia_encrypt = des3_ede_encrypt, | ||
446 | .cia_decrypt = des3_ede_decrypt | ||
447 | } | ||
448 | } | ||
449 | }, { | ||
450 | .cra_name = "ecb(des3_ede)", | ||
451 | .cra_driver_name = "ecb-des3_ede-sparc64", | ||
452 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
453 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
454 | .cra_blocksize = DES3_EDE_BLOCK_SIZE, | ||
455 | .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx), | ||
456 | .cra_alignmask = 7, | ||
457 | .cra_type = &crypto_blkcipher_type, | ||
458 | .cra_module = THIS_MODULE, | ||
459 | .cra_u = { | ||
460 | .blkcipher = { | ||
461 | .min_keysize = DES3_EDE_KEY_SIZE, | ||
462 | .max_keysize = DES3_EDE_KEY_SIZE, | ||
463 | .setkey = des3_ede_set_key, | ||
464 | .encrypt = ecb3_encrypt, | ||
465 | .decrypt = ecb3_decrypt, | ||
466 | }, | ||
467 | }, | ||
468 | }, { | ||
469 | .cra_name = "cbc(des3_ede)", | ||
470 | .cra_driver_name = "cbc-des3_ede-sparc64", | ||
471 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
472 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
473 | .cra_blocksize = DES3_EDE_BLOCK_SIZE, | ||
474 | .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx), | ||
475 | .cra_alignmask = 7, | ||
476 | .cra_type = &crypto_blkcipher_type, | ||
477 | .cra_module = THIS_MODULE, | ||
478 | .cra_u = { | ||
479 | .blkcipher = { | ||
480 | .min_keysize = DES3_EDE_KEY_SIZE, | ||
481 | .max_keysize = DES3_EDE_KEY_SIZE, | ||
482 | .setkey = des3_ede_set_key, | ||
483 | .encrypt = cbc3_encrypt, | ||
484 | .decrypt = cbc3_decrypt, | ||
485 | }, | ||
486 | }, | ||
487 | } }; | ||
488 | |||
489 | static bool __init sparc64_has_des_opcode(void) | ||
490 | { | ||
491 | unsigned long cfr; | ||
492 | |||
493 | if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) | ||
494 | return false; | ||
495 | |||
496 | __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); | ||
497 | if (!(cfr & CFR_DES)) | ||
498 | return false; | ||
499 | |||
500 | return true; | ||
501 | } | ||
502 | |||
503 | static int __init des_sparc64_mod_init(void) | ||
504 | { | ||
505 | int i; | ||
506 | |||
507 | for (i = 0; i < ARRAY_SIZE(algs); i++) | ||
508 | INIT_LIST_HEAD(&algs[i].cra_list); | ||
509 | |||
510 | if (sparc64_has_des_opcode()) { | ||
511 | pr_info("Using sparc64 des opcodes optimized DES implementation\n"); | ||
512 | return crypto_register_algs(algs, ARRAY_SIZE(algs)); | ||
513 | } | ||
514 | pr_info("sparc64 des opcodes not available.\n"); | ||
515 | return -ENODEV; | ||
516 | } | ||
517 | |||
518 | static void __exit des_sparc64_mod_fini(void) | ||
519 | { | ||
520 | crypto_unregister_algs(algs, ARRAY_SIZE(algs)); | ||
521 | } | ||
522 | |||
523 | module_init(des_sparc64_mod_init); | ||
524 | module_exit(des_sparc64_mod_fini); | ||
525 | |||
526 | MODULE_LICENSE("GPL"); | ||
527 | MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms, sparc64 des opcode accelerated"); | ||
528 | |||
529 | MODULE_ALIAS("des"); | ||
diff --git a/arch/sparc/crypto/md5_asm.S b/arch/sparc/crypto/md5_asm.S new file mode 100644 index 000000000000..3150404e602e --- /dev/null +++ b/arch/sparc/crypto/md5_asm.S | |||
@@ -0,0 +1,70 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | #include <asm/visasm.h> | ||
3 | |||
4 | #include "opcodes.h" | ||
5 | |||
6 | ENTRY(md5_sparc64_transform) | ||
7 | /* %o0 = digest, %o1 = data, %o2 = rounds */ | ||
8 | VISEntryHalf | ||
9 | ld [%o0 + 0x00], %f0 | ||
10 | ld [%o0 + 0x04], %f1 | ||
11 | andcc %o1, 0x7, %g0 | ||
12 | ld [%o0 + 0x08], %f2 | ||
13 | bne,pn %xcc, 10f | ||
14 | ld [%o0 + 0x0c], %f3 | ||
15 | |||
16 | 1: | ||
17 | ldd [%o1 + 0x00], %f8 | ||
18 | ldd [%o1 + 0x08], %f10 | ||
19 | ldd [%o1 + 0x10], %f12 | ||
20 | ldd [%o1 + 0x18], %f14 | ||
21 | ldd [%o1 + 0x20], %f16 | ||
22 | ldd [%o1 + 0x28], %f18 | ||
23 | ldd [%o1 + 0x30], %f20 | ||
24 | ldd [%o1 + 0x38], %f22 | ||
25 | |||
26 | MD5 | ||
27 | |||
28 | subcc %o2, 1, %o2 | ||
29 | bne,pt %xcc, 1b | ||
30 | add %o1, 0x40, %o1 | ||
31 | |||
32 | 5: | ||
33 | st %f0, [%o0 + 0x00] | ||
34 | st %f1, [%o0 + 0x04] | ||
35 | st %f2, [%o0 + 0x08] | ||
36 | st %f3, [%o0 + 0x0c] | ||
37 | retl | ||
38 | VISExitHalf | ||
39 | 10: | ||
40 | alignaddr %o1, %g0, %o1 | ||
41 | |||
42 | ldd [%o1 + 0x00], %f10 | ||
43 | 1: | ||
44 | ldd [%o1 + 0x08], %f12 | ||
45 | ldd [%o1 + 0x10], %f14 | ||
46 | ldd [%o1 + 0x18], %f16 | ||
47 | ldd [%o1 + 0x20], %f18 | ||
48 | ldd [%o1 + 0x28], %f20 | ||
49 | ldd [%o1 + 0x30], %f22 | ||
50 | ldd [%o1 + 0x38], %f24 | ||
51 | ldd [%o1 + 0x40], %f26 | ||
52 | |||
53 | faligndata %f10, %f12, %f8 | ||
54 | faligndata %f12, %f14, %f10 | ||
55 | faligndata %f14, %f16, %f12 | ||
56 | faligndata %f16, %f18, %f14 | ||
57 | faligndata %f18, %f20, %f16 | ||
58 | faligndata %f20, %f22, %f18 | ||
59 | faligndata %f22, %f24, %f20 | ||
60 | faligndata %f24, %f26, %f22 | ||
61 | |||
62 | MD5 | ||
63 | |||
64 | subcc %o2, 1, %o2 | ||
65 | fsrc2 %f26, %f10 | ||
66 | bne,pt %xcc, 1b | ||
67 | add %o1, 0x40, %o1 | ||
68 | |||
69 | ba,a,pt %xcc, 5b | ||
70 | ENDPROC(md5_sparc64_transform) | ||
diff --git a/arch/sparc/crypto/md5_glue.c b/arch/sparc/crypto/md5_glue.c new file mode 100644 index 000000000000..603d723038ce --- /dev/null +++ b/arch/sparc/crypto/md5_glue.c | |||
@@ -0,0 +1,188 @@ | |||
1 | /* Glue code for MD5 hashing optimized for sparc64 crypto opcodes. | ||
2 | * | ||
3 | * This is based largely upon arch/x86/crypto/sha1_ssse3_glue.c | ||
4 | * and crypto/md5.c which are: | ||
5 | * | ||
6 | * Copyright (c) Alan Smithee. | ||
7 | * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> | ||
8 | * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> | ||
9 | * Copyright (c) Mathias Krause <minipli@googlemail.com> | ||
10 | * Copyright (c) Cryptoapi developers. | ||
11 | * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> | ||
12 | */ | ||
13 | |||
14 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
15 | |||
16 | #include <crypto/internal/hash.h> | ||
17 | #include <linux/init.h> | ||
18 | #include <linux/module.h> | ||
19 | #include <linux/mm.h> | ||
20 | #include <linux/cryptohash.h> | ||
21 | #include <linux/types.h> | ||
22 | #include <crypto/md5.h> | ||
23 | |||
24 | #include <asm/pstate.h> | ||
25 | #include <asm/elf.h> | ||
26 | |||
27 | #include "opcodes.h" | ||
28 | |||
29 | asmlinkage void md5_sparc64_transform(u32 *digest, const char *data, | ||
30 | unsigned int rounds); | ||
31 | |||
32 | static int md5_sparc64_init(struct shash_desc *desc) | ||
33 | { | ||
34 | struct md5_state *mctx = shash_desc_ctx(desc); | ||
35 | |||
36 | mctx->hash[0] = cpu_to_le32(0x67452301); | ||
37 | mctx->hash[1] = cpu_to_le32(0xefcdab89); | ||
38 | mctx->hash[2] = cpu_to_le32(0x98badcfe); | ||
39 | mctx->hash[3] = cpu_to_le32(0x10325476); | ||
40 | mctx->byte_count = 0; | ||
41 | |||
42 | return 0; | ||
43 | } | ||
44 | |||
45 | static void __md5_sparc64_update(struct md5_state *sctx, const u8 *data, | ||
46 | unsigned int len, unsigned int partial) | ||
47 | { | ||
48 | unsigned int done = 0; | ||
49 | |||
50 | sctx->byte_count += len; | ||
51 | if (partial) { | ||
52 | done = MD5_HMAC_BLOCK_SIZE - partial; | ||
53 | memcpy((u8 *)sctx->block + partial, data, done); | ||
54 | md5_sparc64_transform(sctx->hash, (u8 *)sctx->block, 1); | ||
55 | } | ||
56 | if (len - done >= MD5_HMAC_BLOCK_SIZE) { | ||
57 | const unsigned int rounds = (len - done) / MD5_HMAC_BLOCK_SIZE; | ||
58 | |||
59 | md5_sparc64_transform(sctx->hash, data + done, rounds); | ||
60 | done += rounds * MD5_HMAC_BLOCK_SIZE; | ||
61 | } | ||
62 | |||
63 | memcpy(sctx->block, data + done, len - done); | ||
64 | } | ||
65 | |||
66 | static int md5_sparc64_update(struct shash_desc *desc, const u8 *data, | ||
67 | unsigned int len) | ||
68 | { | ||
69 | struct md5_state *sctx = shash_desc_ctx(desc); | ||
70 | unsigned int partial = sctx->byte_count % MD5_HMAC_BLOCK_SIZE; | ||
71 | |||
72 | /* Handle the fast case right here */ | ||
73 | if (partial + len < MD5_HMAC_BLOCK_SIZE) { | ||
74 | sctx->byte_count += len; | ||
75 | memcpy((u8 *)sctx->block + partial, data, len); | ||
76 | } else | ||
77 | __md5_sparc64_update(sctx, data, len, partial); | ||
78 | |||
79 | return 0; | ||
80 | } | ||
81 | |||
82 | /* Add padding and return the message digest. */ | ||
83 | static int md5_sparc64_final(struct shash_desc *desc, u8 *out) | ||
84 | { | ||
85 | struct md5_state *sctx = shash_desc_ctx(desc); | ||
86 | unsigned int i, index, padlen; | ||
87 | u32 *dst = (u32 *)out; | ||
88 | __le64 bits; | ||
89 | static const u8 padding[MD5_HMAC_BLOCK_SIZE] = { 0x80, }; | ||
90 | |||
91 | bits = cpu_to_le64(sctx->byte_count << 3); | ||
92 | |||
93 | /* Pad out to 56 mod 64 and append length */ | ||
94 | index = sctx->byte_count % MD5_HMAC_BLOCK_SIZE; | ||
95 | padlen = (index < 56) ? (56 - index) : ((MD5_HMAC_BLOCK_SIZE+56) - index); | ||
96 | |||
97 | /* We need to fill a whole block for __md5_sparc64_update() */ | ||
98 | if (padlen <= 56) { | ||
99 | sctx->byte_count += padlen; | ||
100 | memcpy((u8 *)sctx->block + index, padding, padlen); | ||
101 | } else { | ||
102 | __md5_sparc64_update(sctx, padding, padlen, index); | ||
103 | } | ||
104 | __md5_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56); | ||
105 | |||
106 | /* Store state in digest */ | ||
107 | for (i = 0; i < MD5_HASH_WORDS; i++) | ||
108 | dst[i] = sctx->hash[i]; | ||
109 | |||
110 | /* Wipe context */ | ||
111 | memset(sctx, 0, sizeof(*sctx)); | ||
112 | |||
113 | return 0; | ||
114 | } | ||
115 | |||
116 | static int md5_sparc64_export(struct shash_desc *desc, void *out) | ||
117 | { | ||
118 | struct md5_state *sctx = shash_desc_ctx(desc); | ||
119 | |||
120 | memcpy(out, sctx, sizeof(*sctx)); | ||
121 | |||
122 | return 0; | ||
123 | } | ||
124 | |||
125 | static int md5_sparc64_import(struct shash_desc *desc, const void *in) | ||
126 | { | ||
127 | struct md5_state *sctx = shash_desc_ctx(desc); | ||
128 | |||
129 | memcpy(sctx, in, sizeof(*sctx)); | ||
130 | |||
131 | return 0; | ||
132 | } | ||
133 | |||
134 | static struct shash_alg alg = { | ||
135 | .digestsize = MD5_DIGEST_SIZE, | ||
136 | .init = md5_sparc64_init, | ||
137 | .update = md5_sparc64_update, | ||
138 | .final = md5_sparc64_final, | ||
139 | .export = md5_sparc64_export, | ||
140 | .import = md5_sparc64_import, | ||
141 | .descsize = sizeof(struct md5_state), | ||
142 | .statesize = sizeof(struct md5_state), | ||
143 | .base = { | ||
144 | .cra_name = "md5", | ||
145 | .cra_driver_name= "md5-sparc64", | ||
146 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
147 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
148 | .cra_blocksize = MD5_HMAC_BLOCK_SIZE, | ||
149 | .cra_module = THIS_MODULE, | ||
150 | } | ||
151 | }; | ||
152 | |||
153 | static bool __init sparc64_has_md5_opcode(void) | ||
154 | { | ||
155 | unsigned long cfr; | ||
156 | |||
157 | if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) | ||
158 | return false; | ||
159 | |||
160 | __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); | ||
161 | if (!(cfr & CFR_MD5)) | ||
162 | return false; | ||
163 | |||
164 | return true; | ||
165 | } | ||
166 | |||
167 | static int __init md5_sparc64_mod_init(void) | ||
168 | { | ||
169 | if (sparc64_has_md5_opcode()) { | ||
170 | pr_info("Using sparc64 md5 opcode optimized MD5 implementation\n"); | ||
171 | return crypto_register_shash(&alg); | ||
172 | } | ||
173 | pr_info("sparc64 md5 opcode not available.\n"); | ||
174 | return -ENODEV; | ||
175 | } | ||
176 | |||
177 | static void __exit md5_sparc64_mod_fini(void) | ||
178 | { | ||
179 | crypto_unregister_shash(&alg); | ||
180 | } | ||
181 | |||
182 | module_init(md5_sparc64_mod_init); | ||
183 | module_exit(md5_sparc64_mod_fini); | ||
184 | |||
185 | MODULE_LICENSE("GPL"); | ||
186 | MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, sparc64 md5 opcode accelerated"); | ||
187 | |||
188 | MODULE_ALIAS("md5"); | ||
diff --git a/arch/sparc/crypto/opcodes.h b/arch/sparc/crypto/opcodes.h new file mode 100644 index 000000000000..19cbaea6976f --- /dev/null +++ b/arch/sparc/crypto/opcodes.h | |||
@@ -0,0 +1,99 @@ | |||
1 | #ifndef _OPCODES_H | ||
2 | #define _OPCODES_H | ||
3 | |||
4 | #define SPARC_CR_OPCODE_PRIORITY 300 | ||
5 | |||
6 | #define F3F(x,y,z) (((x)<<30)|((y)<<19)|((z)<<5)) | ||
7 | |||
8 | #define FPD_ENCODE(x) (((x) >> 5) | ((x) & ~(0x20))) | ||
9 | |||
10 | #define RS1(x) (FPD_ENCODE(x) << 14) | ||
11 | #define RS2(x) (FPD_ENCODE(x) << 0) | ||
12 | #define RS3(x) (FPD_ENCODE(x) << 9) | ||
13 | #define RD(x) (FPD_ENCODE(x) << 25) | ||
14 | #define IMM5_0(x) ((x) << 0) | ||
15 | #define IMM5_9(x) ((x) << 9) | ||
16 | |||
17 | #define CRC32C(a,b,c) \ | ||
18 | .word (F3F(2,0x36,0x147)|RS1(a)|RS2(b)|RD(c)); | ||
19 | |||
20 | #define MD5 \ | ||
21 | .word 0x81b02800; | ||
22 | #define SHA1 \ | ||
23 | .word 0x81b02820; | ||
24 | #define SHA256 \ | ||
25 | .word 0x81b02840; | ||
26 | #define SHA512 \ | ||
27 | .word 0x81b02860; | ||
28 | |||
29 | #define AES_EROUND01(a,b,c,d) \ | ||
30 | .word (F3F(2, 0x19, 0)|RS1(a)|RS2(b)|RS3(c)|RD(d)); | ||
31 | #define AES_EROUND23(a,b,c,d) \ | ||
32 | .word (F3F(2, 0x19, 1)|RS1(a)|RS2(b)|RS3(c)|RD(d)); | ||
33 | #define AES_DROUND01(a,b,c,d) \ | ||
34 | .word (F3F(2, 0x19, 2)|RS1(a)|RS2(b)|RS3(c)|RD(d)); | ||
35 | #define AES_DROUND23(a,b,c,d) \ | ||
36 | .word (F3F(2, 0x19, 3)|RS1(a)|RS2(b)|RS3(c)|RD(d)); | ||
37 | #define AES_EROUND01_L(a,b,c,d) \ | ||
38 | .word (F3F(2, 0x19, 4)|RS1(a)|RS2(b)|RS3(c)|RD(d)); | ||
39 | #define AES_EROUND23_L(a,b,c,d) \ | ||
40 | .word (F3F(2, 0x19, 5)|RS1(a)|RS2(b)|RS3(c)|RD(d)); | ||
41 | #define AES_DROUND01_L(a,b,c,d) \ | ||
42 | .word (F3F(2, 0x19, 6)|RS1(a)|RS2(b)|RS3(c)|RD(d)); | ||
43 | #define AES_DROUND23_L(a,b,c,d) \ | ||
44 | .word (F3F(2, 0x19, 7)|RS1(a)|RS2(b)|RS3(c)|RD(d)); | ||
45 | #define AES_KEXPAND1(a,b,c,d) \ | ||
46 | .word (F3F(2, 0x19, 8)|RS1(a)|RS2(b)|IMM5_9(c)|RD(d)); | ||
47 | #define AES_KEXPAND0(a,b,c) \ | ||
48 | .word (F3F(2, 0x36, 0x130)|RS1(a)|RS2(b)|RD(c)); | ||
49 | #define AES_KEXPAND2(a,b,c) \ | ||
50 | .word (F3F(2, 0x36, 0x131)|RS1(a)|RS2(b)|RD(c)); | ||
51 | |||
52 | #define DES_IP(a,b) \ | ||
53 | .word (F3F(2, 0x36, 0x134)|RS1(a)|RD(b)); | ||
54 | #define DES_IIP(a,b) \ | ||
55 | .word (F3F(2, 0x36, 0x135)|RS1(a)|RD(b)); | ||
56 | #define DES_KEXPAND(a,b,c) \ | ||
57 | .word (F3F(2, 0x36, 0x136)|RS1(a)|IMM5_0(b)|RD(c)); | ||
58 | #define DES_ROUND(a,b,c,d) \ | ||
59 | .word (F3F(2, 0x19, 0x009)|RS1(a)|RS2(b)|RS3(c)|RD(d)); | ||
60 | |||
61 | #define CAMELLIA_F(a,b,c,d) \ | ||
62 | .word (F3F(2, 0x19, 0x00c)|RS1(a)|RS2(b)|RS3(c)|RD(d)); | ||
63 | #define CAMELLIA_FL(a,b,c) \ | ||
64 | .word (F3F(2, 0x36, 0x13c)|RS1(a)|RS2(b)|RD(c)); | ||
65 | #define CAMELLIA_FLI(a,b,c) \ | ||
66 | .word (F3F(2, 0x36, 0x13d)|RS1(a)|RS2(b)|RD(c)); | ||
67 | |||
68 | #define MOVDTOX_F0_O4 \ | ||
69 | .word 0x99b02200 | ||
70 | #define MOVDTOX_F2_O5 \ | ||
71 | .word 0x9bb02202 | ||
72 | #define MOVXTOD_G1_F60 \ | ||
73 | .word 0xbbb02301 | ||
74 | #define MOVXTOD_G1_F62 \ | ||
75 | .word 0xbfb02301 | ||
76 | #define MOVXTOD_G3_F4 \ | ||
77 | .word 0x89b02303; | ||
78 | #define MOVXTOD_G7_F6 \ | ||
79 | .word 0x8db02307; | ||
80 | #define MOVXTOD_G3_F0 \ | ||
81 | .word 0x81b02303; | ||
82 | #define MOVXTOD_G7_F2 \ | ||
83 | .word 0x85b02307; | ||
84 | #define MOVXTOD_O0_F0 \ | ||
85 | .word 0x81b02308; | ||
86 | #define MOVXTOD_O5_F0 \ | ||
87 | .word 0x81b0230d; | ||
88 | #define MOVXTOD_O5_F2 \ | ||
89 | .word 0x85b0230d; | ||
90 | #define MOVXTOD_O5_F4 \ | ||
91 | .word 0x89b0230d; | ||
92 | #define MOVXTOD_O5_F6 \ | ||
93 | .word 0x8db0230d; | ||
94 | #define MOVXTOD_G3_F60 \ | ||
95 | .word 0xbbb02303; | ||
96 | #define MOVXTOD_G7_F62 \ | ||
97 | .word 0xbfb02307; | ||
98 | |||
99 | #endif /* _OPCODES_H */ | ||
diff --git a/arch/sparc/crypto/sha1_asm.S b/arch/sparc/crypto/sha1_asm.S new file mode 100644 index 000000000000..219d10c5ae0e --- /dev/null +++ b/arch/sparc/crypto/sha1_asm.S | |||
@@ -0,0 +1,72 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | #include <asm/visasm.h> | ||
3 | |||
4 | #include "opcodes.h" | ||
5 | |||
6 | ENTRY(sha1_sparc64_transform) | ||
7 | /* %o0 = digest, %o1 = data, %o2 = rounds */ | ||
8 | VISEntryHalf | ||
9 | ld [%o0 + 0x00], %f0 | ||
10 | ld [%o0 + 0x04], %f1 | ||
11 | ld [%o0 + 0x08], %f2 | ||
12 | andcc %o1, 0x7, %g0 | ||
13 | ld [%o0 + 0x0c], %f3 | ||
14 | bne,pn %xcc, 10f | ||
15 | ld [%o0 + 0x10], %f4 | ||
16 | |||
17 | 1: | ||
18 | ldd [%o1 + 0x00], %f8 | ||
19 | ldd [%o1 + 0x08], %f10 | ||
20 | ldd [%o1 + 0x10], %f12 | ||
21 | ldd [%o1 + 0x18], %f14 | ||
22 | ldd [%o1 + 0x20], %f16 | ||
23 | ldd [%o1 + 0x28], %f18 | ||
24 | ldd [%o1 + 0x30], %f20 | ||
25 | ldd [%o1 + 0x38], %f22 | ||
26 | |||
27 | SHA1 | ||
28 | |||
29 | subcc %o2, 1, %o2 | ||
30 | bne,pt %xcc, 1b | ||
31 | add %o1, 0x40, %o1 | ||
32 | |||
33 | 5: | ||
34 | st %f0, [%o0 + 0x00] | ||
35 | st %f1, [%o0 + 0x04] | ||
36 | st %f2, [%o0 + 0x08] | ||
37 | st %f3, [%o0 + 0x0c] | ||
38 | st %f4, [%o0 + 0x10] | ||
39 | retl | ||
40 | VISExitHalf | ||
41 | 10: | ||
42 | alignaddr %o1, %g0, %o1 | ||
43 | |||
44 | ldd [%o1 + 0x00], %f10 | ||
45 | 1: | ||
46 | ldd [%o1 + 0x08], %f12 | ||
47 | ldd [%o1 + 0x10], %f14 | ||
48 | ldd [%o1 + 0x18], %f16 | ||
49 | ldd [%o1 + 0x20], %f18 | ||
50 | ldd [%o1 + 0x28], %f20 | ||
51 | ldd [%o1 + 0x30], %f22 | ||
52 | ldd [%o1 + 0x38], %f24 | ||
53 | ldd [%o1 + 0x40], %f26 | ||
54 | |||
55 | faligndata %f10, %f12, %f8 | ||
56 | faligndata %f12, %f14, %f10 | ||
57 | faligndata %f14, %f16, %f12 | ||
58 | faligndata %f16, %f18, %f14 | ||
59 | faligndata %f18, %f20, %f16 | ||
60 | faligndata %f20, %f22, %f18 | ||
61 | faligndata %f22, %f24, %f20 | ||
62 | faligndata %f24, %f26, %f22 | ||
63 | |||
64 | SHA1 | ||
65 | |||
66 | subcc %o2, 1, %o2 | ||
67 | fsrc2 %f26, %f10 | ||
68 | bne,pt %xcc, 1b | ||
69 | add %o1, 0x40, %o1 | ||
70 | |||
71 | ba,a,pt %xcc, 5b | ||
72 | ENDPROC(sha1_sparc64_transform) | ||
diff --git a/arch/sparc/crypto/sha1_glue.c b/arch/sparc/crypto/sha1_glue.c new file mode 100644 index 000000000000..2bbb20bee9f1 --- /dev/null +++ b/arch/sparc/crypto/sha1_glue.c | |||
@@ -0,0 +1,183 @@ | |||
1 | /* Glue code for SHA1 hashing optimized for sparc64 crypto opcodes. | ||
2 | * | ||
3 | * This is based largely upon arch/x86/crypto/sha1_ssse3_glue.c | ||
4 | * | ||
5 | * Copyright (c) Alan Smithee. | ||
6 | * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> | ||
7 | * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> | ||
8 | * Copyright (c) Mathias Krause <minipli@googlemail.com> | ||
9 | */ | ||
10 | |||
11 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
12 | |||
13 | #include <crypto/internal/hash.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/mm.h> | ||
17 | #include <linux/cryptohash.h> | ||
18 | #include <linux/types.h> | ||
19 | #include <crypto/sha.h> | ||
20 | |||
21 | #include <asm/pstate.h> | ||
22 | #include <asm/elf.h> | ||
23 | |||
24 | #include "opcodes.h" | ||
25 | |||
26 | asmlinkage void sha1_sparc64_transform(u32 *digest, const char *data, | ||
27 | unsigned int rounds); | ||
28 | |||
29 | static int sha1_sparc64_init(struct shash_desc *desc) | ||
30 | { | ||
31 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
32 | |||
33 | *sctx = (struct sha1_state){ | ||
34 | .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, | ||
35 | }; | ||
36 | |||
37 | return 0; | ||
38 | } | ||
39 | |||
40 | static void __sha1_sparc64_update(struct sha1_state *sctx, const u8 *data, | ||
41 | unsigned int len, unsigned int partial) | ||
42 | { | ||
43 | unsigned int done = 0; | ||
44 | |||
45 | sctx->count += len; | ||
46 | if (partial) { | ||
47 | done = SHA1_BLOCK_SIZE - partial; | ||
48 | memcpy(sctx->buffer + partial, data, done); | ||
49 | sha1_sparc64_transform(sctx->state, sctx->buffer, 1); | ||
50 | } | ||
51 | if (len - done >= SHA1_BLOCK_SIZE) { | ||
52 | const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE; | ||
53 | |||
54 | sha1_sparc64_transform(sctx->state, data + done, rounds); | ||
55 | done += rounds * SHA1_BLOCK_SIZE; | ||
56 | } | ||
57 | |||
58 | memcpy(sctx->buffer, data + done, len - done); | ||
59 | } | ||
60 | |||
61 | static int sha1_sparc64_update(struct shash_desc *desc, const u8 *data, | ||
62 | unsigned int len) | ||
63 | { | ||
64 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
65 | unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; | ||
66 | |||
67 | /* Handle the fast case right here */ | ||
68 | if (partial + len < SHA1_BLOCK_SIZE) { | ||
69 | sctx->count += len; | ||
70 | memcpy(sctx->buffer + partial, data, len); | ||
71 | } else | ||
72 | __sha1_sparc64_update(sctx, data, len, partial); | ||
73 | |||
74 | return 0; | ||
75 | } | ||
76 | |||
77 | /* Add padding and return the message digest. */ | ||
78 | static int sha1_sparc64_final(struct shash_desc *desc, u8 *out) | ||
79 | { | ||
80 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
81 | unsigned int i, index, padlen; | ||
82 | __be32 *dst = (__be32 *)out; | ||
83 | __be64 bits; | ||
84 | static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; | ||
85 | |||
86 | bits = cpu_to_be64(sctx->count << 3); | ||
87 | |||
88 | /* Pad out to 56 mod 64 and append length */ | ||
89 | index = sctx->count % SHA1_BLOCK_SIZE; | ||
90 | padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index); | ||
91 | |||
92 | /* We need to fill a whole block for __sha1_sparc64_update() */ | ||
93 | if (padlen <= 56) { | ||
94 | sctx->count += padlen; | ||
95 | memcpy(sctx->buffer + index, padding, padlen); | ||
96 | } else { | ||
97 | __sha1_sparc64_update(sctx, padding, padlen, index); | ||
98 | } | ||
99 | __sha1_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56); | ||
100 | |||
101 | /* Store state in digest */ | ||
102 | for (i = 0; i < 5; i++) | ||
103 | dst[i] = cpu_to_be32(sctx->state[i]); | ||
104 | |||
105 | /* Wipe context */ | ||
106 | memset(sctx, 0, sizeof(*sctx)); | ||
107 | |||
108 | return 0; | ||
109 | } | ||
110 | |||
111 | static int sha1_sparc64_export(struct shash_desc *desc, void *out) | ||
112 | { | ||
113 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
114 | |||
115 | memcpy(out, sctx, sizeof(*sctx)); | ||
116 | |||
117 | return 0; | ||
118 | } | ||
119 | |||
120 | static int sha1_sparc64_import(struct shash_desc *desc, const void *in) | ||
121 | { | ||
122 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
123 | |||
124 | memcpy(sctx, in, sizeof(*sctx)); | ||
125 | |||
126 | return 0; | ||
127 | } | ||
128 | |||
129 | static struct shash_alg alg = { | ||
130 | .digestsize = SHA1_DIGEST_SIZE, | ||
131 | .init = sha1_sparc64_init, | ||
132 | .update = sha1_sparc64_update, | ||
133 | .final = sha1_sparc64_final, | ||
134 | .export = sha1_sparc64_export, | ||
135 | .import = sha1_sparc64_import, | ||
136 | .descsize = sizeof(struct sha1_state), | ||
137 | .statesize = sizeof(struct sha1_state), | ||
138 | .base = { | ||
139 | .cra_name = "sha1", | ||
140 | .cra_driver_name= "sha1-sparc64", | ||
141 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
142 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
143 | .cra_blocksize = SHA1_BLOCK_SIZE, | ||
144 | .cra_module = THIS_MODULE, | ||
145 | } | ||
146 | }; | ||
147 | |||
148 | static bool __init sparc64_has_sha1_opcode(void) | ||
149 | { | ||
150 | unsigned long cfr; | ||
151 | |||
152 | if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) | ||
153 | return false; | ||
154 | |||
155 | __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); | ||
156 | if (!(cfr & CFR_SHA1)) | ||
157 | return false; | ||
158 | |||
159 | return true; | ||
160 | } | ||
161 | |||
162 | static int __init sha1_sparc64_mod_init(void) | ||
163 | { | ||
164 | if (sparc64_has_sha1_opcode()) { | ||
165 | pr_info("Using sparc64 sha1 opcode optimized SHA-1 implementation\n"); | ||
166 | return crypto_register_shash(&alg); | ||
167 | } | ||
168 | pr_info("sparc64 sha1 opcode not available.\n"); | ||
169 | return -ENODEV; | ||
170 | } | ||
171 | |||
172 | static void __exit sha1_sparc64_mod_fini(void) | ||
173 | { | ||
174 | crypto_unregister_shash(&alg); | ||
175 | } | ||
176 | |||
177 | module_init(sha1_sparc64_mod_init); | ||
178 | module_exit(sha1_sparc64_mod_fini); | ||
179 | |||
180 | MODULE_LICENSE("GPL"); | ||
181 | MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, sparc64 sha1 opcode accelerated"); | ||
182 | |||
183 | MODULE_ALIAS("sha1"); | ||
diff --git a/arch/sparc/crypto/sha256_asm.S b/arch/sparc/crypto/sha256_asm.S new file mode 100644 index 000000000000..b5f3d5826eb4 --- /dev/null +++ b/arch/sparc/crypto/sha256_asm.S | |||
@@ -0,0 +1,78 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | #include <asm/visasm.h> | ||
3 | |||
4 | #include "opcodes.h" | ||
5 | |||
6 | ENTRY(sha256_sparc64_transform) | ||
7 | /* %o0 = digest, %o1 = data, %o2 = rounds */ | ||
8 | VISEntryHalf | ||
9 | ld [%o0 + 0x00], %f0 | ||
10 | ld [%o0 + 0x04], %f1 | ||
11 | ld [%o0 + 0x08], %f2 | ||
12 | ld [%o0 + 0x0c], %f3 | ||
13 | ld [%o0 + 0x10], %f4 | ||
14 | ld [%o0 + 0x14], %f5 | ||
15 | andcc %o1, 0x7, %g0 | ||
16 | ld [%o0 + 0x18], %f6 | ||
17 | bne,pn %xcc, 10f | ||
18 | ld [%o0 + 0x1c], %f7 | ||
19 | |||
20 | 1: | ||
21 | ldd [%o1 + 0x00], %f8 | ||
22 | ldd [%o1 + 0x08], %f10 | ||
23 | ldd [%o1 + 0x10], %f12 | ||
24 | ldd [%o1 + 0x18], %f14 | ||
25 | ldd [%o1 + 0x20], %f16 | ||
26 | ldd [%o1 + 0x28], %f18 | ||
27 | ldd [%o1 + 0x30], %f20 | ||
28 | ldd [%o1 + 0x38], %f22 | ||
29 | |||
30 | SHA256 | ||
31 | |||
32 | subcc %o2, 1, %o2 | ||
33 | bne,pt %xcc, 1b | ||
34 | add %o1, 0x40, %o1 | ||
35 | |||
36 | 5: | ||
37 | st %f0, [%o0 + 0x00] | ||
38 | st %f1, [%o0 + 0x04] | ||
39 | st %f2, [%o0 + 0x08] | ||
40 | st %f3, [%o0 + 0x0c] | ||
41 | st %f4, [%o0 + 0x10] | ||
42 | st %f5, [%o0 + 0x14] | ||
43 | st %f6, [%o0 + 0x18] | ||
44 | st %f7, [%o0 + 0x1c] | ||
45 | retl | ||
46 | VISExitHalf | ||
47 | 10: | ||
48 | alignaddr %o1, %g0, %o1 | ||
49 | |||
50 | ldd [%o1 + 0x00], %f10 | ||
51 | 1: | ||
52 | ldd [%o1 + 0x08], %f12 | ||
53 | ldd [%o1 + 0x10], %f14 | ||
54 | ldd [%o1 + 0x18], %f16 | ||
55 | ldd [%o1 + 0x20], %f18 | ||
56 | ldd [%o1 + 0x28], %f20 | ||
57 | ldd [%o1 + 0x30], %f22 | ||
58 | ldd [%o1 + 0x38], %f24 | ||
59 | ldd [%o1 + 0x40], %f26 | ||
60 | |||
61 | faligndata %f10, %f12, %f8 | ||
62 | faligndata %f12, %f14, %f10 | ||
63 | faligndata %f14, %f16, %f12 | ||
64 | faligndata %f16, %f18, %f14 | ||
65 | faligndata %f18, %f20, %f16 | ||
66 | faligndata %f20, %f22, %f18 | ||
67 | faligndata %f22, %f24, %f20 | ||
68 | faligndata %f24, %f26, %f22 | ||
69 | |||
70 | SHA256 | ||
71 | |||
72 | subcc %o2, 1, %o2 | ||
73 | fsrc2 %f26, %f10 | ||
74 | bne,pt %xcc, 1b | ||
75 | add %o1, 0x40, %o1 | ||
76 | |||
77 | ba,a,pt %xcc, 5b | ||
78 | ENDPROC(sha256_sparc64_transform) | ||
diff --git a/arch/sparc/crypto/sha256_glue.c b/arch/sparc/crypto/sha256_glue.c new file mode 100644 index 000000000000..591e656bd891 --- /dev/null +++ b/arch/sparc/crypto/sha256_glue.c | |||
@@ -0,0 +1,241 @@ | |||
1 | /* Glue code for SHA256 hashing optimized for sparc64 crypto opcodes. | ||
2 | * | ||
3 | * This is based largely upon crypto/sha256_generic.c | ||
4 | * | ||
5 | * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com> | ||
6 | * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> | ||
7 | * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> | ||
8 | * SHA224 Support Copyright 2007 Intel Corporation <jonathan.lynch@intel.com> | ||
9 | */ | ||
10 | |||
11 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
12 | |||
13 | #include <crypto/internal/hash.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/mm.h> | ||
17 | #include <linux/cryptohash.h> | ||
18 | #include <linux/types.h> | ||
19 | #include <crypto/sha.h> | ||
20 | |||
21 | #include <asm/pstate.h> | ||
22 | #include <asm/elf.h> | ||
23 | |||
24 | #include "opcodes.h" | ||
25 | |||
26 | asmlinkage void sha256_sparc64_transform(u32 *digest, const char *data, | ||
27 | unsigned int rounds); | ||
28 | |||
29 | static int sha224_sparc64_init(struct shash_desc *desc) | ||
30 | { | ||
31 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
32 | sctx->state[0] = SHA224_H0; | ||
33 | sctx->state[1] = SHA224_H1; | ||
34 | sctx->state[2] = SHA224_H2; | ||
35 | sctx->state[3] = SHA224_H3; | ||
36 | sctx->state[4] = SHA224_H4; | ||
37 | sctx->state[5] = SHA224_H5; | ||
38 | sctx->state[6] = SHA224_H6; | ||
39 | sctx->state[7] = SHA224_H7; | ||
40 | sctx->count = 0; | ||
41 | |||
42 | return 0; | ||
43 | } | ||
44 | |||
45 | static int sha256_sparc64_init(struct shash_desc *desc) | ||
46 | { | ||
47 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
48 | sctx->state[0] = SHA256_H0; | ||
49 | sctx->state[1] = SHA256_H1; | ||
50 | sctx->state[2] = SHA256_H2; | ||
51 | sctx->state[3] = SHA256_H3; | ||
52 | sctx->state[4] = SHA256_H4; | ||
53 | sctx->state[5] = SHA256_H5; | ||
54 | sctx->state[6] = SHA256_H6; | ||
55 | sctx->state[7] = SHA256_H7; | ||
56 | sctx->count = 0; | ||
57 | |||
58 | return 0; | ||
59 | } | ||
60 | |||
61 | static void __sha256_sparc64_update(struct sha256_state *sctx, const u8 *data, | ||
62 | unsigned int len, unsigned int partial) | ||
63 | { | ||
64 | unsigned int done = 0; | ||
65 | |||
66 | sctx->count += len; | ||
67 | if (partial) { | ||
68 | done = SHA256_BLOCK_SIZE - partial; | ||
69 | memcpy(sctx->buf + partial, data, done); | ||
70 | sha256_sparc64_transform(sctx->state, sctx->buf, 1); | ||
71 | } | ||
72 | if (len - done >= SHA256_BLOCK_SIZE) { | ||
73 | const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE; | ||
74 | |||
75 | sha256_sparc64_transform(sctx->state, data + done, rounds); | ||
76 | done += rounds * SHA256_BLOCK_SIZE; | ||
77 | } | ||
78 | |||
79 | memcpy(sctx->buf, data + done, len - done); | ||
80 | } | ||
81 | |||
82 | static int sha256_sparc64_update(struct shash_desc *desc, const u8 *data, | ||
83 | unsigned int len) | ||
84 | { | ||
85 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
86 | unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; | ||
87 | |||
88 | /* Handle the fast case right here */ | ||
89 | if (partial + len < SHA256_BLOCK_SIZE) { | ||
90 | sctx->count += len; | ||
91 | memcpy(sctx->buf + partial, data, len); | ||
92 | } else | ||
93 | __sha256_sparc64_update(sctx, data, len, partial); | ||
94 | |||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | static int sha256_sparc64_final(struct shash_desc *desc, u8 *out) | ||
99 | { | ||
100 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
101 | unsigned int i, index, padlen; | ||
102 | __be32 *dst = (__be32 *)out; | ||
103 | __be64 bits; | ||
104 | static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; | ||
105 | |||
106 | bits = cpu_to_be64(sctx->count << 3); | ||
107 | |||
108 | /* Pad out to 56 mod 64 and append length */ | ||
109 | index = sctx->count % SHA256_BLOCK_SIZE; | ||
110 | padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56) - index); | ||
111 | |||
112 | /* We need to fill a whole block for __sha256_sparc64_update() */ | ||
113 | if (padlen <= 56) { | ||
114 | sctx->count += padlen; | ||
115 | memcpy(sctx->buf + index, padding, padlen); | ||
116 | } else { | ||
117 | __sha256_sparc64_update(sctx, padding, padlen, index); | ||
118 | } | ||
119 | __sha256_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56); | ||
120 | |||
121 | /* Store state in digest */ | ||
122 | for (i = 0; i < 8; i++) | ||
123 | dst[i] = cpu_to_be32(sctx->state[i]); | ||
124 | |||
125 | /* Wipe context */ | ||
126 | memset(sctx, 0, sizeof(*sctx)); | ||
127 | |||
128 | return 0; | ||
129 | } | ||
130 | |||
131 | static int sha224_sparc64_final(struct shash_desc *desc, u8 *hash) | ||
132 | { | ||
133 | u8 D[SHA256_DIGEST_SIZE]; | ||
134 | |||
135 | sha256_sparc64_final(desc, D); | ||
136 | |||
137 | memcpy(hash, D, SHA224_DIGEST_SIZE); | ||
138 | memset(D, 0, SHA256_DIGEST_SIZE); | ||
139 | |||
140 | return 0; | ||
141 | } | ||
142 | |||
143 | static int sha256_sparc64_export(struct shash_desc *desc, void *out) | ||
144 | { | ||
145 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
146 | |||
147 | memcpy(out, sctx, sizeof(*sctx)); | ||
148 | return 0; | ||
149 | } | ||
150 | |||
151 | static int sha256_sparc64_import(struct shash_desc *desc, const void *in) | ||
152 | { | ||
153 | struct sha256_state *sctx = shash_desc_ctx(desc); | ||
154 | |||
155 | memcpy(sctx, in, sizeof(*sctx)); | ||
156 | return 0; | ||
157 | } | ||
158 | |||
159 | static struct shash_alg sha256 = { | ||
160 | .digestsize = SHA256_DIGEST_SIZE, | ||
161 | .init = sha256_sparc64_init, | ||
162 | .update = sha256_sparc64_update, | ||
163 | .final = sha256_sparc64_final, | ||
164 | .export = sha256_sparc64_export, | ||
165 | .import = sha256_sparc64_import, | ||
166 | .descsize = sizeof(struct sha256_state), | ||
167 | .statesize = sizeof(struct sha256_state), | ||
168 | .base = { | ||
169 | .cra_name = "sha256", | ||
170 | .cra_driver_name= "sha256-sparc64", | ||
171 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
172 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
173 | .cra_blocksize = SHA256_BLOCK_SIZE, | ||
174 | .cra_module = THIS_MODULE, | ||
175 | } | ||
176 | }; | ||
177 | |||
178 | static struct shash_alg sha224 = { | ||
179 | .digestsize = SHA224_DIGEST_SIZE, | ||
180 | .init = sha224_sparc64_init, | ||
181 | .update = sha256_sparc64_update, | ||
182 | .final = sha224_sparc64_final, | ||
183 | .descsize = sizeof(struct sha256_state), | ||
184 | .base = { | ||
185 | .cra_name = "sha224", | ||
186 | .cra_driver_name= "sha224-sparc64", | ||
187 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
188 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
189 | .cra_blocksize = SHA224_BLOCK_SIZE, | ||
190 | .cra_module = THIS_MODULE, | ||
191 | } | ||
192 | }; | ||
193 | |||
194 | static bool __init sparc64_has_sha256_opcode(void) | ||
195 | { | ||
196 | unsigned long cfr; | ||
197 | |||
198 | if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) | ||
199 | return false; | ||
200 | |||
201 | __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); | ||
202 | if (!(cfr & CFR_SHA256)) | ||
203 | return false; | ||
204 | |||
205 | return true; | ||
206 | } | ||
207 | |||
208 | static int __init sha256_sparc64_mod_init(void) | ||
209 | { | ||
210 | if (sparc64_has_sha256_opcode()) { | ||
211 | int ret = crypto_register_shash(&sha224); | ||
212 | if (ret < 0) | ||
213 | return ret; | ||
214 | |||
215 | ret = crypto_register_shash(&sha256); | ||
216 | if (ret < 0) { | ||
217 | crypto_unregister_shash(&sha224); | ||
218 | return ret; | ||
219 | } | ||
220 | |||
221 | pr_info("Using sparc64 sha256 opcode optimized SHA-256/SHA-224 implementation\n"); | ||
222 | return 0; | ||
223 | } | ||
224 | pr_info("sparc64 sha256 opcode not available.\n"); | ||
225 | return -ENODEV; | ||
226 | } | ||
227 | |||
228 | static void __exit sha256_sparc64_mod_fini(void) | ||
229 | { | ||
230 | crypto_unregister_shash(&sha224); | ||
231 | crypto_unregister_shash(&sha256); | ||
232 | } | ||
233 | |||
234 | module_init(sha256_sparc64_mod_init); | ||
235 | module_exit(sha256_sparc64_mod_fini); | ||
236 | |||
237 | MODULE_LICENSE("GPL"); | ||
238 | MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm, sparc64 sha256 opcode accelerated"); | ||
239 | |||
240 | MODULE_ALIAS("sha224"); | ||
241 | MODULE_ALIAS("sha256"); | ||
diff --git a/arch/sparc/crypto/sha512_asm.S b/arch/sparc/crypto/sha512_asm.S new file mode 100644 index 000000000000..54bfba713c0e --- /dev/null +++ b/arch/sparc/crypto/sha512_asm.S | |||
@@ -0,0 +1,102 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | #include <asm/visasm.h> | ||
3 | |||
4 | #include "opcodes.h" | ||
5 | |||
6 | ENTRY(sha512_sparc64_transform) | ||
7 | /* %o0 = digest, %o1 = data, %o2 = rounds */ | ||
8 | VISEntry | ||
9 | ldd [%o0 + 0x00], %f0 | ||
10 | ldd [%o0 + 0x08], %f2 | ||
11 | ldd [%o0 + 0x10], %f4 | ||
12 | ldd [%o0 + 0x18], %f6 | ||
13 | ldd [%o0 + 0x20], %f8 | ||
14 | ldd [%o0 + 0x28], %f10 | ||
15 | andcc %o1, 0x7, %g0 | ||
16 | ldd [%o0 + 0x30], %f12 | ||
17 | bne,pn %xcc, 10f | ||
18 | ldd [%o0 + 0x38], %f14 | ||
19 | |||
20 | 1: | ||
21 | ldd [%o1 + 0x00], %f16 | ||
22 | ldd [%o1 + 0x08], %f18 | ||
23 | ldd [%o1 + 0x10], %f20 | ||
24 | ldd [%o1 + 0x18], %f22 | ||
25 | ldd [%o1 + 0x20], %f24 | ||
26 | ldd [%o1 + 0x28], %f26 | ||
27 | ldd [%o1 + 0x30], %f28 | ||
28 | ldd [%o1 + 0x38], %f30 | ||
29 | ldd [%o1 + 0x40], %f32 | ||
30 | ldd [%o1 + 0x48], %f34 | ||
31 | ldd [%o1 + 0x50], %f36 | ||
32 | ldd [%o1 + 0x58], %f38 | ||
33 | ldd [%o1 + 0x60], %f40 | ||
34 | ldd [%o1 + 0x68], %f42 | ||
35 | ldd [%o1 + 0x70], %f44 | ||
36 | ldd [%o1 + 0x78], %f46 | ||
37 | |||
38 | SHA512 | ||
39 | |||
40 | subcc %o2, 1, %o2 | ||
41 | bne,pt %xcc, 1b | ||
42 | add %o1, 0x80, %o1 | ||
43 | |||
44 | 5: | ||
45 | std %f0, [%o0 + 0x00] | ||
46 | std %f2, [%o0 + 0x08] | ||
47 | std %f4, [%o0 + 0x10] | ||
48 | std %f6, [%o0 + 0x18] | ||
49 | std %f8, [%o0 + 0x20] | ||
50 | std %f10, [%o0 + 0x28] | ||
51 | std %f12, [%o0 + 0x30] | ||
52 | std %f14, [%o0 + 0x38] | ||
53 | retl | ||
54 | VISExit | ||
55 | 10: | ||
56 | alignaddr %o1, %g0, %o1 | ||
57 | |||
58 | ldd [%o1 + 0x00], %f18 | ||
59 | 1: | ||
60 | ldd [%o1 + 0x08], %f20 | ||
61 | ldd [%o1 + 0x10], %f22 | ||
62 | ldd [%o1 + 0x18], %f24 | ||
63 | ldd [%o1 + 0x20], %f26 | ||
64 | ldd [%o1 + 0x28], %f28 | ||
65 | ldd [%o1 + 0x30], %f30 | ||
66 | ldd [%o1 + 0x38], %f32 | ||
67 | ldd [%o1 + 0x40], %f34 | ||
68 | ldd [%o1 + 0x48], %f36 | ||
69 | ldd [%o1 + 0x50], %f38 | ||
70 | ldd [%o1 + 0x58], %f40 | ||
71 | ldd [%o1 + 0x60], %f42 | ||
72 | ldd [%o1 + 0x68], %f44 | ||
73 | ldd [%o1 + 0x70], %f46 | ||
74 | ldd [%o1 + 0x78], %f48 | ||
75 | ldd [%o1 + 0x80], %f50 | ||
76 | |||
77 | faligndata %f18, %f20, %f16 | ||
78 | faligndata %f20, %f22, %f18 | ||
79 | faligndata %f22, %f24, %f20 | ||
80 | faligndata %f24, %f26, %f22 | ||
81 | faligndata %f26, %f28, %f24 | ||
82 | faligndata %f28, %f30, %f26 | ||
83 | faligndata %f30, %f32, %f28 | ||
84 | faligndata %f32, %f34, %f30 | ||
85 | faligndata %f34, %f36, %f32 | ||
86 | faligndata %f36, %f38, %f34 | ||
87 | faligndata %f38, %f40, %f36 | ||
88 | faligndata %f40, %f42, %f38 | ||
89 | faligndata %f42, %f44, %f40 | ||
90 | faligndata %f44, %f46, %f42 | ||
91 | faligndata %f46, %f48, %f44 | ||
92 | faligndata %f48, %f50, %f46 | ||
93 | |||
94 | SHA512 | ||
95 | |||
96 | subcc %o2, 1, %o2 | ||
97 | fsrc2 %f50, %f18 | ||
98 | bne,pt %xcc, 1b | ||
99 | add %o1, 0x80, %o1 | ||
100 | |||
101 | ba,a,pt %xcc, 5b | ||
102 | ENDPROC(sha512_sparc64_transform) | ||
diff --git a/arch/sparc/crypto/sha512_glue.c b/arch/sparc/crypto/sha512_glue.c new file mode 100644 index 000000000000..486f0a2b7001 --- /dev/null +++ b/arch/sparc/crypto/sha512_glue.c | |||
@@ -0,0 +1,226 @@ | |||
1 | /* Glue code for SHA512 hashing optimized for sparc64 crypto opcodes. | ||
2 | * | ||
3 | * This is based largely upon crypto/sha512_generic.c | ||
4 | * | ||
5 | * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com> | ||
6 | * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> | ||
7 | * Copyright (c) 2003 Kyle McMartin <kyle@debian.org> | ||
8 | */ | ||
9 | |||
10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
11 | |||
12 | #include <crypto/internal/hash.h> | ||
13 | #include <linux/init.h> | ||
14 | #include <linux/module.h> | ||
15 | #include <linux/mm.h> | ||
16 | #include <linux/cryptohash.h> | ||
17 | #include <linux/types.h> | ||
18 | #include <crypto/sha.h> | ||
19 | |||
20 | #include <asm/pstate.h> | ||
21 | #include <asm/elf.h> | ||
22 | |||
23 | #include "opcodes.h" | ||
24 | |||
25 | asmlinkage void sha512_sparc64_transform(u64 *digest, const char *data, | ||
26 | unsigned int rounds); | ||
27 | |||
28 | static int sha512_sparc64_init(struct shash_desc *desc) | ||
29 | { | ||
30 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
31 | sctx->state[0] = SHA512_H0; | ||
32 | sctx->state[1] = SHA512_H1; | ||
33 | sctx->state[2] = SHA512_H2; | ||
34 | sctx->state[3] = SHA512_H3; | ||
35 | sctx->state[4] = SHA512_H4; | ||
36 | sctx->state[5] = SHA512_H5; | ||
37 | sctx->state[6] = SHA512_H6; | ||
38 | sctx->state[7] = SHA512_H7; | ||
39 | sctx->count[0] = sctx->count[1] = 0; | ||
40 | |||
41 | return 0; | ||
42 | } | ||
43 | |||
44 | static int sha384_sparc64_init(struct shash_desc *desc) | ||
45 | { | ||
46 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
47 | sctx->state[0] = SHA384_H0; | ||
48 | sctx->state[1] = SHA384_H1; | ||
49 | sctx->state[2] = SHA384_H2; | ||
50 | sctx->state[3] = SHA384_H3; | ||
51 | sctx->state[4] = SHA384_H4; | ||
52 | sctx->state[5] = SHA384_H5; | ||
53 | sctx->state[6] = SHA384_H6; | ||
54 | sctx->state[7] = SHA384_H7; | ||
55 | sctx->count[0] = sctx->count[1] = 0; | ||
56 | |||
57 | return 0; | ||
58 | } | ||
59 | |||
60 | static void __sha512_sparc64_update(struct sha512_state *sctx, const u8 *data, | ||
61 | unsigned int len, unsigned int partial) | ||
62 | { | ||
63 | unsigned int done = 0; | ||
64 | |||
65 | if ((sctx->count[0] += len) < len) | ||
66 | sctx->count[1]++; | ||
67 | if (partial) { | ||
68 | done = SHA512_BLOCK_SIZE - partial; | ||
69 | memcpy(sctx->buf + partial, data, done); | ||
70 | sha512_sparc64_transform(sctx->state, sctx->buf, 1); | ||
71 | } | ||
72 | if (len - done >= SHA512_BLOCK_SIZE) { | ||
73 | const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE; | ||
74 | |||
75 | sha512_sparc64_transform(sctx->state, data + done, rounds); | ||
76 | done += rounds * SHA512_BLOCK_SIZE; | ||
77 | } | ||
78 | |||
79 | memcpy(sctx->buf, data + done, len - done); | ||
80 | } | ||
81 | |||
82 | static int sha512_sparc64_update(struct shash_desc *desc, const u8 *data, | ||
83 | unsigned int len) | ||
84 | { | ||
85 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
86 | unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE; | ||
87 | |||
88 | /* Handle the fast case right here */ | ||
89 | if (partial + len < SHA512_BLOCK_SIZE) { | ||
90 | if ((sctx->count[0] += len) < len) | ||
91 | sctx->count[1]++; | ||
92 | memcpy(sctx->buf + partial, data, len); | ||
93 | } else | ||
94 | __sha512_sparc64_update(sctx, data, len, partial); | ||
95 | |||
96 | return 0; | ||
97 | } | ||
98 | |||
99 | static int sha512_sparc64_final(struct shash_desc *desc, u8 *out) | ||
100 | { | ||
101 | struct sha512_state *sctx = shash_desc_ctx(desc); | ||
102 | unsigned int i, index, padlen; | ||
103 | __be64 *dst = (__be64 *)out; | ||
104 | __be64 bits[2]; | ||
105 | static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, }; | ||
106 | |||
107 | /* Save number of bits */ | ||
108 | bits[1] = cpu_to_be64(sctx->count[0] << 3); | ||
109 | bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61); | ||
110 | |||
111 | /* Pad out to 112 mod 128 and append length */ | ||
112 | index = sctx->count[0] % SHA512_BLOCK_SIZE; | ||
113 | padlen = (index < 112) ? (112 - index) : ((SHA512_BLOCK_SIZE+112) - index); | ||
114 | |||
115 | /* We need to fill a whole block for __sha512_sparc64_update() */ | ||
116 | if (padlen <= 112) { | ||
117 | if ((sctx->count[0] += padlen) < padlen) | ||
118 | sctx->count[1]++; | ||
119 | memcpy(sctx->buf + index, padding, padlen); | ||
120 | } else { | ||
121 | __sha512_sparc64_update(sctx, padding, padlen, index); | ||
122 | } | ||
123 | __sha512_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 112); | ||
124 | |||
125 | /* Store state in digest */ | ||
126 | for (i = 0; i < 8; i++) | ||
127 | dst[i] = cpu_to_be64(sctx->state[i]); | ||
128 | |||
129 | /* Wipe context */ | ||
130 | memset(sctx, 0, sizeof(*sctx)); | ||
131 | |||
132 | return 0; | ||
133 | } | ||
134 | |||
135 | static int sha384_sparc64_final(struct shash_desc *desc, u8 *hash) | ||
136 | { | ||
137 | u8 D[64]; | ||
138 | |||
139 | sha512_sparc64_final(desc, D); | ||
140 | |||
141 | memcpy(hash, D, 48); | ||
142 | memset(D, 0, 64); | ||
143 | |||
144 | return 0; | ||
145 | } | ||
146 | |||
147 | static struct shash_alg sha512 = { | ||
148 | .digestsize = SHA512_DIGEST_SIZE, | ||
149 | .init = sha512_sparc64_init, | ||
150 | .update = sha512_sparc64_update, | ||
151 | .final = sha512_sparc64_final, | ||
152 | .descsize = sizeof(struct sha512_state), | ||
153 | .base = { | ||
154 | .cra_name = "sha512", | ||
155 | .cra_driver_name= "sha512-sparc64", | ||
156 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
157 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
158 | .cra_blocksize = SHA512_BLOCK_SIZE, | ||
159 | .cra_module = THIS_MODULE, | ||
160 | } | ||
161 | }; | ||
162 | |||
163 | static struct shash_alg sha384 = { | ||
164 | .digestsize = SHA384_DIGEST_SIZE, | ||
165 | .init = sha384_sparc64_init, | ||
166 | .update = sha512_sparc64_update, | ||
167 | .final = sha384_sparc64_final, | ||
168 | .descsize = sizeof(struct sha512_state), | ||
169 | .base = { | ||
170 | .cra_name = "sha384", | ||
171 | .cra_driver_name= "sha384-sparc64", | ||
172 | .cra_priority = SPARC_CR_OPCODE_PRIORITY, | ||
173 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
174 | .cra_blocksize = SHA384_BLOCK_SIZE, | ||
175 | .cra_module = THIS_MODULE, | ||
176 | } | ||
177 | }; | ||
178 | |||
179 | static bool __init sparc64_has_sha512_opcode(void) | ||
180 | { | ||
181 | unsigned long cfr; | ||
182 | |||
183 | if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) | ||
184 | return false; | ||
185 | |||
186 | __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); | ||
187 | if (!(cfr & CFR_SHA512)) | ||
188 | return false; | ||
189 | |||
190 | return true; | ||
191 | } | ||
192 | |||
193 | static int __init sha512_sparc64_mod_init(void) | ||
194 | { | ||
195 | if (sparc64_has_sha512_opcode()) { | ||
196 | int ret = crypto_register_shash(&sha384); | ||
197 | if (ret < 0) | ||
198 | return ret; | ||
199 | |||
200 | ret = crypto_register_shash(&sha512); | ||
201 | if (ret < 0) { | ||
202 | crypto_unregister_shash(&sha384); | ||
203 | return ret; | ||
204 | } | ||
205 | |||
206 | pr_info("Using sparc64 sha512 opcode optimized SHA-512/SHA-384 implementation\n"); | ||
207 | return 0; | ||
208 | } | ||
209 | pr_info("sparc64 sha512 opcode not available.\n"); | ||
210 | return -ENODEV; | ||
211 | } | ||
212 | |||
213 | static void __exit sha512_sparc64_mod_fini(void) | ||
214 | { | ||
215 | crypto_unregister_shash(&sha384); | ||
216 | crypto_unregister_shash(&sha512); | ||
217 | } | ||
218 | |||
219 | module_init(sha512_sparc64_mod_init); | ||
220 | module_exit(sha512_sparc64_mod_fini); | ||
221 | |||
222 | MODULE_LICENSE("GPL"); | ||
223 | MODULE_DESCRIPTION("SHA-384 and SHA-512 Secure Hash Algorithm, sparc64 sha512 opcode accelerated"); | ||
224 | |||
225 | MODULE_ALIAS("sha384"); | ||
226 | MODULE_ALIAS("sha512"); | ||
diff --git a/arch/sparc/include/asm/asi.h b/arch/sparc/include/asm/asi.h index 61ebe7411ceb..cc0006dc5d4a 100644 --- a/arch/sparc/include/asm/asi.h +++ b/arch/sparc/include/asm/asi.h | |||
@@ -141,7 +141,8 @@ | |||
141 | /* SpitFire and later extended ASIs. The "(III)" marker designates | 141 | /* SpitFire and later extended ASIs. The "(III)" marker designates |
142 | * UltraSparc-III and later specific ASIs. The "(CMT)" marker designates | 142 | * UltraSparc-III and later specific ASIs. The "(CMT)" marker designates |
143 | * Chip Multi Threading specific ASIs. "(NG)" designates Niagara specific | 143 | * Chip Multi Threading specific ASIs. "(NG)" designates Niagara specific |
144 | * ASIs, "(4V)" designates SUN4V specific ASIs. | 144 | * ASIs, "(4V)" designates SUN4V specific ASIs. "(NG4)" designates SPARC-T4 |
145 | * and later ASIs. | ||
145 | */ | 146 | */ |
146 | #define ASI_PHYS_USE_EC 0x14 /* PADDR, E-cachable */ | 147 | #define ASI_PHYS_USE_EC 0x14 /* PADDR, E-cachable */ |
147 | #define ASI_PHYS_BYPASS_EC_E 0x15 /* PADDR, E-bit */ | 148 | #define ASI_PHYS_BYPASS_EC_E 0x15 /* PADDR, E-bit */ |
@@ -243,6 +244,7 @@ | |||
243 | #define ASI_UDBL_CONTROL_R 0x7f /* External UDB control regs rd low*/ | 244 | #define ASI_UDBL_CONTROL_R 0x7f /* External UDB control regs rd low*/ |
244 | #define ASI_INTR_R 0x7f /* IRQ vector dispatch read */ | 245 | #define ASI_INTR_R 0x7f /* IRQ vector dispatch read */ |
245 | #define ASI_INTR_DATAN_R 0x7f /* (III) In irq vector data reg N */ | 246 | #define ASI_INTR_DATAN_R 0x7f /* (III) In irq vector data reg N */ |
247 | #define ASI_PIC 0xb0 /* (NG4) PIC registers */ | ||
246 | #define ASI_PST8_P 0xc0 /* Primary, 8 8-bit, partial */ | 248 | #define ASI_PST8_P 0xc0 /* Primary, 8 8-bit, partial */ |
247 | #define ASI_PST8_S 0xc1 /* Secondary, 8 8-bit, partial */ | 249 | #define ASI_PST8_S 0xc1 /* Secondary, 8 8-bit, partial */ |
248 | #define ASI_PST16_P 0xc2 /* Primary, 4 16-bit, partial */ | 250 | #define ASI_PST16_P 0xc2 /* Primary, 4 16-bit, partial */ |
diff --git a/arch/sparc/include/asm/elf_64.h b/arch/sparc/include/asm/elf_64.h index 7df8b7f544d4..370ca1e71ffb 100644 --- a/arch/sparc/include/asm/elf_64.h +++ b/arch/sparc/include/asm/elf_64.h | |||
@@ -86,6 +86,15 @@ | |||
86 | #define AV_SPARC_IMA 0x00400000 /* integer multiply-add */ | 86 | #define AV_SPARC_IMA 0x00400000 /* integer multiply-add */ |
87 | #define AV_SPARC_ASI_CACHE_SPARING \ | 87 | #define AV_SPARC_ASI_CACHE_SPARING \ |
88 | 0x00800000 /* cache sparing ASIs available */ | 88 | 0x00800000 /* cache sparing ASIs available */ |
89 | #define AV_SPARC_PAUSE 0x01000000 /* PAUSE available */ | ||
90 | #define AV_SPARC_CBCOND 0x02000000 /* CBCOND insns available */ | ||
91 | |||
92 | /* Solaris decided to enumerate every single crypto instruction type | ||
93 | * in the AT_HWCAP bits. This is wasteful, since if crypto is present, | ||
94 | * you still need to look in the CFR register to see if the opcode is | ||
95 | * really available. So we simply advertise only "crypto" support. | ||
96 | */ | ||
97 | #define HWCAP_SPARC_CRYPTO 0x04000000 /* CRYPTO insns available */ | ||
89 | 98 | ||
90 | #define CORE_DUMP_USE_REGSET | 99 | #define CORE_DUMP_USE_REGSET |
91 | 100 | ||
diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h index 015a761eaa32..ca121f0fa3ec 100644 --- a/arch/sparc/include/asm/hypervisor.h +++ b/arch/sparc/include/asm/hypervisor.h | |||
@@ -2934,6 +2934,16 @@ extern unsigned long sun4v_reboot_data_set(unsigned long ra, | |||
2934 | unsigned long len); | 2934 | unsigned long len); |
2935 | #endif | 2935 | #endif |
2936 | 2936 | ||
2937 | #define HV_FAST_VT_GET_PERFREG 0x184 | ||
2938 | #define HV_FAST_VT_SET_PERFREG 0x185 | ||
2939 | |||
2940 | #ifndef __ASSEMBLY__ | ||
2941 | extern unsigned long sun4v_vt_get_perfreg(unsigned long reg_num, | ||
2942 | unsigned long *reg_val); | ||
2943 | extern unsigned long sun4v_vt_set_perfreg(unsigned long reg_num, | ||
2944 | unsigned long reg_val); | ||
2945 | #endif | ||
2946 | |||
2937 | /* Function numbers for HV_CORE_TRAP. */ | 2947 | /* Function numbers for HV_CORE_TRAP. */ |
2938 | #define HV_CORE_SET_VER 0x00 | 2948 | #define HV_CORE_SET_VER 0x00 |
2939 | #define HV_CORE_PUTCHAR 0x01 | 2949 | #define HV_CORE_PUTCHAR 0x01 |
@@ -2964,6 +2974,7 @@ extern unsigned long sun4v_reboot_data_set(unsigned long ra, | |||
2964 | #define HV_GRP_NIU 0x0204 | 2974 | #define HV_GRP_NIU 0x0204 |
2965 | #define HV_GRP_VF_CPU 0x0205 | 2975 | #define HV_GRP_VF_CPU 0x0205 |
2966 | #define HV_GRP_KT_CPU 0x0209 | 2976 | #define HV_GRP_KT_CPU 0x0209 |
2977 | #define HV_GRP_VT_CPU 0x020c | ||
2967 | #define HV_GRP_DIAG 0x0300 | 2978 | #define HV_GRP_DIAG 0x0300 |
2968 | 2979 | ||
2969 | #ifndef __ASSEMBLY__ | 2980 | #ifndef __ASSEMBLY__ |
diff --git a/arch/sparc/include/asm/mdesc.h b/arch/sparc/include/asm/mdesc.h index 9faa046713fb..139097f3a67b 100644 --- a/arch/sparc/include/asm/mdesc.h +++ b/arch/sparc/include/asm/mdesc.h | |||
@@ -73,6 +73,7 @@ extern void mdesc_register_notifier(struct mdesc_notifier_client *client); | |||
73 | 73 | ||
74 | extern void mdesc_fill_in_cpu_data(cpumask_t *mask); | 74 | extern void mdesc_fill_in_cpu_data(cpumask_t *mask); |
75 | extern void mdesc_populate_present_mask(cpumask_t *mask); | 75 | extern void mdesc_populate_present_mask(cpumask_t *mask); |
76 | extern void mdesc_get_page_sizes(cpumask_t *mask, unsigned long *pgsz_mask); | ||
76 | 77 | ||
77 | extern void sun4v_mdesc_init(void); | 78 | extern void sun4v_mdesc_init(void); |
78 | 79 | ||
diff --git a/arch/sparc/include/asm/pcr.h b/arch/sparc/include/asm/pcr.h index 288d7beba051..942bb17f60cd 100644 --- a/arch/sparc/include/asm/pcr.h +++ b/arch/sparc/include/asm/pcr.h | |||
@@ -2,8 +2,13 @@ | |||
2 | #define __PCR_H | 2 | #define __PCR_H |
3 | 3 | ||
4 | struct pcr_ops { | 4 | struct pcr_ops { |
5 | u64 (*read)(void); | 5 | u64 (*read_pcr)(unsigned long); |
6 | void (*write)(u64); | 6 | void (*write_pcr)(unsigned long, u64); |
7 | u64 (*read_pic)(unsigned long); | ||
8 | void (*write_pic)(unsigned long, u64); | ||
9 | u64 (*nmi_picl_value)(unsigned int nmi_hz); | ||
10 | u64 pcr_nmi_enable; | ||
11 | u64 pcr_nmi_disable; | ||
7 | }; | 12 | }; |
8 | extern const struct pcr_ops *pcr_ops; | 13 | extern const struct pcr_ops *pcr_ops; |
9 | 14 | ||
@@ -27,21 +32,18 @@ extern void schedule_deferred_pcr_work(void); | |||
27 | #define PCR_N2_SL1_SHIFT 27 | 32 | #define PCR_N2_SL1_SHIFT 27 |
28 | #define PCR_N2_OV1 0x80000000 | 33 | #define PCR_N2_OV1 0x80000000 |
29 | 34 | ||
30 | extern unsigned int picl_shift; | 35 | #define PCR_N4_OV 0x00000001 /* PIC overflow */ |
31 | 36 | #define PCR_N4_TOE 0x00000002 /* Trap On Event */ | |
32 | /* In order to commonize as much of the implementation as | 37 | #define PCR_N4_UTRACE 0x00000004 /* Trace user events */ |
33 | * possible, we use PICH as our counter. Mostly this is | 38 | #define PCR_N4_STRACE 0x00000008 /* Trace supervisor events */ |
34 | * to accommodate Niagara-1 which can only count insn cycles | 39 | #define PCR_N4_HTRACE 0x00000010 /* Trace hypervisor events */ |
35 | * in PICH. | 40 | #define PCR_N4_MASK 0x000007e0 /* Event mask */ |
36 | */ | 41 | #define PCR_N4_MASK_SHIFT 5 |
37 | static inline u64 picl_value(unsigned int nmi_hz) | 42 | #define PCR_N4_SL 0x0000f800 /* Event Select */ |
38 | { | 43 | #define PCR_N4_SL_SHIFT 11 |
39 | u32 delta = local_cpu_data().clock_tick / (nmi_hz << picl_shift); | 44 | #define PCR_N4_PICNPT 0x00010000 /* PIC non-privileged trap */ |
40 | 45 | #define PCR_N4_PICNHT 0x00020000 /* PIC non-hypervisor trap */ | |
41 | return ((u64)((0 - delta) & 0xffffffff)) << 32; | 46 | #define PCR_N4_NTC 0x00040000 /* Next-To-Commit wrap */ |
42 | } | ||
43 | |||
44 | extern u64 pcr_enable; | ||
45 | 47 | ||
46 | extern int pcr_arch_init(void); | 48 | extern int pcr_arch_init(void); |
47 | 49 | ||
diff --git a/arch/sparc/include/asm/perfctr.h b/arch/sparc/include/asm/perfctr.h index 3332d2cba6c1..214feefa577c 100644 --- a/arch/sparc/include/asm/perfctr.h +++ b/arch/sparc/include/asm/perfctr.h | |||
@@ -54,11 +54,6 @@ enum perfctr_opcode { | |||
54 | PERFCTR_GETPCR | 54 | PERFCTR_GETPCR |
55 | }; | 55 | }; |
56 | 56 | ||
57 | /* I don't want the kernel's namespace to be polluted with this | ||
58 | * stuff when this file is included. --DaveM | ||
59 | */ | ||
60 | #ifndef __KERNEL__ | ||
61 | |||
62 | #define PRIV 0x00000001 | 57 | #define PRIV 0x00000001 |
63 | #define SYS 0x00000002 | 58 | #define SYS 0x00000002 |
64 | #define USR 0x00000004 | 59 | #define USR 0x00000004 |
@@ -168,29 +163,4 @@ struct vcounter_struct { | |||
168 | unsigned long long vcnt1; | 163 | unsigned long long vcnt1; |
169 | }; | 164 | }; |
170 | 165 | ||
171 | #else /* !(__KERNEL__) */ | ||
172 | |||
173 | #ifndef CONFIG_SPARC32 | ||
174 | |||
175 | /* Performance counter register access. */ | ||
176 | #define read_pcr(__p) __asm__ __volatile__("rd %%pcr, %0" : "=r" (__p)) | ||
177 | #define write_pcr(__p) __asm__ __volatile__("wr %0, 0x0, %%pcr" : : "r" (__p)) | ||
178 | #define read_pic(__p) __asm__ __volatile__("rd %%pic, %0" : "=r" (__p)) | ||
179 | |||
180 | /* Blackbird errata workaround. See commentary in | ||
181 | * arch/sparc64/kernel/smp.c:smp_percpu_timer_interrupt() | ||
182 | * for more information. | ||
183 | */ | ||
184 | #define write_pic(__p) \ | ||
185 | __asm__ __volatile__("ba,pt %%xcc, 99f\n\t" \ | ||
186 | " nop\n\t" \ | ||
187 | ".align 64\n" \ | ||
188 | "99:wr %0, 0x0, %%pic\n\t" \ | ||
189 | "rd %%pic, %%g0" : : "r" (__p)) | ||
190 | #define reset_pic() write_pic(0) | ||
191 | |||
192 | #endif /* !CONFIG_SPARC32 */ | ||
193 | |||
194 | #endif /* !(__KERNEL__) */ | ||
195 | |||
196 | #endif /* !(PERF_COUNTER_API) */ | 166 | #endif /* !(PERF_COUNTER_API) */ |
diff --git a/arch/sparc/include/asm/pstate.h b/arch/sparc/include/asm/pstate.h index a26a53777bb0..4b6b998afd99 100644 --- a/arch/sparc/include/asm/pstate.h +++ b/arch/sparc/include/asm/pstate.h | |||
@@ -88,4 +88,18 @@ | |||
88 | #define VERS_MAXTL _AC(0x000000000000ff00,UL) /* Max Trap Level. */ | 88 | #define VERS_MAXTL _AC(0x000000000000ff00,UL) /* Max Trap Level. */ |
89 | #define VERS_MAXWIN _AC(0x000000000000001f,UL) /* Max RegWindow Idx.*/ | 89 | #define VERS_MAXWIN _AC(0x000000000000001f,UL) /* Max RegWindow Idx.*/ |
90 | 90 | ||
91 | /* Compatability Feature Register (%asr26), SPARC-T4 and later */ | ||
92 | #define CFR_AES _AC(0x0000000000000001,UL) /* Supports AES opcodes */ | ||
93 | #define CFR_DES _AC(0x0000000000000002,UL) /* Supports DES opcodes */ | ||
94 | #define CFR_KASUMI _AC(0x0000000000000004,UL) /* Supports KASUMI opcodes */ | ||
95 | #define CFR_CAMELLIA _AC(0x0000000000000008,UL) /* Supports CAMELLIA opcodes*/ | ||
96 | #define CFR_MD5 _AC(0x0000000000000010,UL) /* Supports MD5 opcodes */ | ||
97 | #define CFR_SHA1 _AC(0x0000000000000020,UL) /* Supports SHA1 opcodes */ | ||
98 | #define CFR_SHA256 _AC(0x0000000000000040,UL) /* Supports SHA256 opcodes */ | ||
99 | #define CFR_SHA512 _AC(0x0000000000000080,UL) /* Supports SHA512 opcodes */ | ||
100 | #define CFR_MPMUL _AC(0x0000000000000100,UL) /* Supports MPMUL opcodes */ | ||
101 | #define CFR_MONTMUL _AC(0x0000000000000200,UL) /* Supports MONTMUL opcodes */ | ||
102 | #define CFR_MONTSQR _AC(0x0000000000000400,UL) /* Supports MONTSQR opcodes */ | ||
103 | #define CFR_CRC32C _AC(0x0000000000000800,UL) /* Supports CRC32C opcodes */ | ||
104 | |||
91 | #endif /* !(_SPARC64_PSTATE_H) */ | 105 | #endif /* !(_SPARC64_PSTATE_H) */ |
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index b42ddbf9651e..ee5dcced2499 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S | |||
@@ -559,10 +559,10 @@ niagara_tlb_fixup: | |||
559 | be,pt %xcc, niagara2_patch | 559 | be,pt %xcc, niagara2_patch |
560 | nop | 560 | nop |
561 | cmp %g1, SUN4V_CHIP_NIAGARA4 | 561 | cmp %g1, SUN4V_CHIP_NIAGARA4 |
562 | be,pt %xcc, niagara2_patch | 562 | be,pt %xcc, niagara4_patch |
563 | nop | 563 | nop |
564 | cmp %g1, SUN4V_CHIP_NIAGARA5 | 564 | cmp %g1, SUN4V_CHIP_NIAGARA5 |
565 | be,pt %xcc, niagara2_patch | 565 | be,pt %xcc, niagara4_patch |
566 | nop | 566 | nop |
567 | 567 | ||
568 | call generic_patch_copyops | 568 | call generic_patch_copyops |
@@ -573,6 +573,16 @@ niagara_tlb_fixup: | |||
573 | nop | 573 | nop |
574 | 574 | ||
575 | ba,a,pt %xcc, 80f | 575 | ba,a,pt %xcc, 80f |
576 | niagara4_patch: | ||
577 | call niagara4_patch_copyops | ||
578 | nop | ||
579 | call niagara_patch_bzero | ||
580 | nop | ||
581 | call niagara4_patch_pageops | ||
582 | nop | ||
583 | |||
584 | ba,a,pt %xcc, 80f | ||
585 | |||
576 | niagara2_patch: | 586 | niagara2_patch: |
577 | call niagara2_patch_copyops | 587 | call niagara2_patch_copyops |
578 | nop | 588 | nop |
diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c index 8593672838fd..1032df43ec95 100644 --- a/arch/sparc/kernel/hvapi.c +++ b/arch/sparc/kernel/hvapi.c | |||
@@ -45,6 +45,7 @@ static struct api_info api_table[] = { | |||
45 | { .group = HV_GRP_NIU, }, | 45 | { .group = HV_GRP_NIU, }, |
46 | { .group = HV_GRP_VF_CPU, }, | 46 | { .group = HV_GRP_VF_CPU, }, |
47 | { .group = HV_GRP_KT_CPU, }, | 47 | { .group = HV_GRP_KT_CPU, }, |
48 | { .group = HV_GRP_VT_CPU, }, | ||
48 | { .group = HV_GRP_DIAG, .flags = FLAG_PRE_API }, | 49 | { .group = HV_GRP_DIAG, .flags = FLAG_PRE_API }, |
49 | }; | 50 | }; |
50 | 51 | ||
diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S index 58d60de4d65b..f3ab509b76a8 100644 --- a/arch/sparc/kernel/hvcalls.S +++ b/arch/sparc/kernel/hvcalls.S | |||
@@ -805,3 +805,19 @@ ENTRY(sun4v_reboot_data_set) | |||
805 | retl | 805 | retl |
806 | nop | 806 | nop |
807 | ENDPROC(sun4v_reboot_data_set) | 807 | ENDPROC(sun4v_reboot_data_set) |
808 | |||
809 | ENTRY(sun4v_vt_get_perfreg) | ||
810 | mov %o1, %o4 | ||
811 | mov HV_FAST_VT_GET_PERFREG, %o5 | ||
812 | ta HV_FAST_TRAP | ||
813 | stx %o1, [%o4] | ||
814 | retl | ||
815 | nop | ||
816 | ENDPROC(sun4v_vt_get_perfreg) | ||
817 | |||
818 | ENTRY(sun4v_vt_set_perfreg) | ||
819 | mov HV_FAST_VT_SET_PERFREG, %o5 | ||
820 | ta HV_FAST_TRAP | ||
821 | retl | ||
822 | nop | ||
823 | ENDPROC(sun4v_vt_set_perfreg) | ||
diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S index 79f310364849..0746e5e32b37 100644 --- a/arch/sparc/kernel/ktlb.S +++ b/arch/sparc/kernel/ktlb.S | |||
@@ -188,31 +188,26 @@ valid_addr_bitmap_patch: | |||
188 | be,pn %xcc, kvmap_dtlb_longpath | 188 | be,pn %xcc, kvmap_dtlb_longpath |
189 | 189 | ||
190 | 2: sethi %hi(kpte_linear_bitmap), %g2 | 190 | 2: sethi %hi(kpte_linear_bitmap), %g2 |
191 | or %g2, %lo(kpte_linear_bitmap), %g2 | ||
192 | 191 | ||
193 | /* Get the 256MB physical address index. */ | 192 | /* Get the 256MB physical address index. */ |
194 | sllx %g4, 21, %g5 | 193 | sllx %g4, 21, %g5 |
195 | mov 1, %g7 | 194 | or %g2, %lo(kpte_linear_bitmap), %g2 |
196 | srlx %g5, 21 + 28, %g5 | 195 | srlx %g5, 21 + 28, %g5 |
196 | and %g5, (32 - 1), %g7 | ||
197 | 197 | ||
198 | /* Don't try this at home kids... this depends upon srlx | 198 | /* Divide by 32 to get the offset into the bitmask. */ |
199 | * only taking the low 6 bits of the shift count in %g5. | 199 | srlx %g5, 5, %g5 |
200 | */ | 200 | add %g7, %g7, %g7 |
201 | sllx %g7, %g5, %g7 | ||
202 | |||
203 | /* Divide by 64 to get the offset into the bitmask. */ | ||
204 | srlx %g5, 6, %g5 | ||
205 | sllx %g5, 3, %g5 | 201 | sllx %g5, 3, %g5 |
206 | 202 | ||
207 | /* kern_linear_pte_xor[((mask & bit) ? 1 : 0)] */ | 203 | /* kern_linear_pte_xor[(mask >> shift) & 3)] */ |
208 | ldx [%g2 + %g5], %g2 | 204 | ldx [%g2 + %g5], %g2 |
209 | andcc %g2, %g7, %g0 | 205 | srlx %g2, %g7, %g7 |
210 | sethi %hi(kern_linear_pte_xor), %g5 | 206 | sethi %hi(kern_linear_pte_xor), %g5 |
207 | and %g7, 3, %g7 | ||
211 | or %g5, %lo(kern_linear_pte_xor), %g5 | 208 | or %g5, %lo(kern_linear_pte_xor), %g5 |
212 | bne,a,pt %xcc, 1f | 209 | sllx %g7, 3, %g7 |
213 | add %g5, 8, %g5 | 210 | ldx [%g5 + %g7], %g2 |
214 | |||
215 | 1: ldx [%g5], %g2 | ||
216 | 211 | ||
217 | .globl kvmap_linear_patch | 212 | .globl kvmap_linear_patch |
218 | kvmap_linear_patch: | 213 | kvmap_linear_patch: |
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index 6dc796280589..831c001604e8 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c | |||
@@ -817,6 +817,30 @@ void __cpuinit mdesc_populate_present_mask(cpumask_t *mask) | |||
817 | mdesc_iterate_over_cpus(record_one_cpu, NULL, mask); | 817 | mdesc_iterate_over_cpus(record_one_cpu, NULL, mask); |
818 | } | 818 | } |
819 | 819 | ||
820 | static void * __init check_one_pgsz(struct mdesc_handle *hp, u64 mp, int cpuid, void *arg) | ||
821 | { | ||
822 | const u64 *pgsz_prop = mdesc_get_property(hp, mp, "mmu-page-size-list", NULL); | ||
823 | unsigned long *pgsz_mask = arg; | ||
824 | u64 val; | ||
825 | |||
826 | val = (HV_PGSZ_MASK_8K | HV_PGSZ_MASK_64K | | ||
827 | HV_PGSZ_MASK_512K | HV_PGSZ_MASK_4MB); | ||
828 | if (pgsz_prop) | ||
829 | val = *pgsz_prop; | ||
830 | |||
831 | if (!*pgsz_mask) | ||
832 | *pgsz_mask = val; | ||
833 | else | ||
834 | *pgsz_mask &= val; | ||
835 | return NULL; | ||
836 | } | ||
837 | |||
838 | void __init mdesc_get_page_sizes(cpumask_t *mask, unsigned long *pgsz_mask) | ||
839 | { | ||
840 | *pgsz_mask = 0; | ||
841 | mdesc_iterate_over_cpus(check_one_pgsz, pgsz_mask, mask); | ||
842 | } | ||
843 | |||
820 | static void * __cpuinit fill_in_one_cpu(struct mdesc_handle *hp, u64 mp, int cpuid, void *arg) | 844 | static void * __cpuinit fill_in_one_cpu(struct mdesc_handle *hp, u64 mp, int cpuid, void *arg) |
821 | { | 845 | { |
822 | const u64 *cfreq = mdesc_get_property(hp, mp, "clock-frequency", NULL); | 846 | const u64 *cfreq = mdesc_get_property(hp, mp, "clock-frequency", NULL); |
diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index eb1c1f010a47..6479256fd5a4 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c | |||
@@ -22,7 +22,6 @@ | |||
22 | #include <asm/perf_event.h> | 22 | #include <asm/perf_event.h> |
23 | #include <asm/ptrace.h> | 23 | #include <asm/ptrace.h> |
24 | #include <asm/pcr.h> | 24 | #include <asm/pcr.h> |
25 | #include <asm/perfctr.h> | ||
26 | 25 | ||
27 | #include "kstack.h" | 26 | #include "kstack.h" |
28 | 27 | ||
@@ -109,7 +108,7 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) | |||
109 | pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) | 108 | pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) |
110 | touched = 1; | 109 | touched = 1; |
111 | else | 110 | else |
112 | pcr_ops->write(PCR_PIC_PRIV); | 111 | pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); |
113 | 112 | ||
114 | sum = local_cpu_data().irq0_irqs; | 113 | sum = local_cpu_data().irq0_irqs; |
115 | if (__get_cpu_var(nmi_touch)) { | 114 | if (__get_cpu_var(nmi_touch)) { |
@@ -126,8 +125,8 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) | |||
126 | __this_cpu_write(alert_counter, 0); | 125 | __this_cpu_write(alert_counter, 0); |
127 | } | 126 | } |
128 | if (__get_cpu_var(wd_enabled)) { | 127 | if (__get_cpu_var(wd_enabled)) { |
129 | write_pic(picl_value(nmi_hz)); | 128 | pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz)); |
130 | pcr_ops->write(pcr_enable); | 129 | pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable); |
131 | } | 130 | } |
132 | 131 | ||
133 | restore_hardirq_stack(orig_sp); | 132 | restore_hardirq_stack(orig_sp); |
@@ -166,7 +165,7 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count) | |||
166 | 165 | ||
167 | void stop_nmi_watchdog(void *unused) | 166 | void stop_nmi_watchdog(void *unused) |
168 | { | 167 | { |
169 | pcr_ops->write(PCR_PIC_PRIV); | 168 | pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); |
170 | __get_cpu_var(wd_enabled) = 0; | 169 | __get_cpu_var(wd_enabled) = 0; |
171 | atomic_dec(&nmi_active); | 170 | atomic_dec(&nmi_active); |
172 | } | 171 | } |
@@ -223,10 +222,10 @@ void start_nmi_watchdog(void *unused) | |||
223 | __get_cpu_var(wd_enabled) = 1; | 222 | __get_cpu_var(wd_enabled) = 1; |
224 | atomic_inc(&nmi_active); | 223 | atomic_inc(&nmi_active); |
225 | 224 | ||
226 | pcr_ops->write(PCR_PIC_PRIV); | 225 | pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); |
227 | write_pic(picl_value(nmi_hz)); | 226 | pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz)); |
228 | 227 | ||
229 | pcr_ops->write(pcr_enable); | 228 | pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable); |
230 | } | 229 | } |
231 | 230 | ||
232 | static void nmi_adjust_hz_one(void *unused) | 231 | static void nmi_adjust_hz_one(void *unused) |
@@ -234,10 +233,10 @@ static void nmi_adjust_hz_one(void *unused) | |||
234 | if (!__get_cpu_var(wd_enabled)) | 233 | if (!__get_cpu_var(wd_enabled)) |
235 | return; | 234 | return; |
236 | 235 | ||
237 | pcr_ops->write(PCR_PIC_PRIV); | 236 | pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); |
238 | write_pic(picl_value(nmi_hz)); | 237 | pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz)); |
239 | 238 | ||
240 | pcr_ops->write(pcr_enable); | 239 | pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable); |
241 | } | 240 | } |
242 | 241 | ||
243 | void nmi_adjust_hz(unsigned int new_hz) | 242 | void nmi_adjust_hz(unsigned int new_hz) |
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 7661e84a05a0..051b69caeffd 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c | |||
@@ -594,7 +594,7 @@ static int __devinit pci_sun4v_iommu_init(struct pci_pbm_info *pbm) | |||
594 | printk(KERN_ERR PFX "Strange virtual-dma[%08x:%08x].\n", | 594 | printk(KERN_ERR PFX "Strange virtual-dma[%08x:%08x].\n", |
595 | vdma[0], vdma[1]); | 595 | vdma[0], vdma[1]); |
596 | return -EINVAL; | 596 | return -EINVAL; |
597 | }; | 597 | } |
598 | 598 | ||
599 | dma_mask = (roundup_pow_of_two(vdma[1]) - 1UL); | 599 | dma_mask = (roundup_pow_of_two(vdma[1]) - 1UL); |
600 | num_tsb_entries = vdma[1] / IO_PAGE_SIZE; | 600 | num_tsb_entries = vdma[1] / IO_PAGE_SIZE; |
diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c index 0ce0dd2332aa..269af58497aa 100644 --- a/arch/sparc/kernel/pcr.c +++ b/arch/sparc/kernel/pcr.c | |||
@@ -13,23 +13,14 @@ | |||
13 | #include <asm/pil.h> | 13 | #include <asm/pil.h> |
14 | #include <asm/pcr.h> | 14 | #include <asm/pcr.h> |
15 | #include <asm/nmi.h> | 15 | #include <asm/nmi.h> |
16 | #include <asm/asi.h> | ||
16 | #include <asm/spitfire.h> | 17 | #include <asm/spitfire.h> |
17 | #include <asm/perfctr.h> | ||
18 | 18 | ||
19 | /* This code is shared between various users of the performance | 19 | /* This code is shared between various users of the performance |
20 | * counters. Users will be oprofile, pseudo-NMI watchdog, and the | 20 | * counters. Users will be oprofile, pseudo-NMI watchdog, and the |
21 | * perf_event support layer. | 21 | * perf_event support layer. |
22 | */ | 22 | */ |
23 | 23 | ||
24 | #define PCR_SUN4U_ENABLE (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE) | ||
25 | #define PCR_N2_ENABLE (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE | \ | ||
26 | PCR_N2_TOE_OV1 | \ | ||
27 | (2 << PCR_N2_SL1_SHIFT) | \ | ||
28 | (0xff << PCR_N2_MASK1_SHIFT)) | ||
29 | |||
30 | u64 pcr_enable; | ||
31 | unsigned int picl_shift; | ||
32 | |||
33 | /* Performance counter interrupts run unmasked at PIL level 15. | 24 | /* Performance counter interrupts run unmasked at PIL level 15. |
34 | * Therefore we can't do things like wakeups and other work | 25 | * Therefore we can't do things like wakeups and other work |
35 | * that expects IRQ disabling to be adhered to in locking etc. | 26 | * that expects IRQ disabling to be adhered to in locking etc. |
@@ -60,39 +51,144 @@ void arch_irq_work_raise(void) | |||
60 | const struct pcr_ops *pcr_ops; | 51 | const struct pcr_ops *pcr_ops; |
61 | EXPORT_SYMBOL_GPL(pcr_ops); | 52 | EXPORT_SYMBOL_GPL(pcr_ops); |
62 | 53 | ||
63 | static u64 direct_pcr_read(void) | 54 | static u64 direct_pcr_read(unsigned long reg_num) |
64 | { | 55 | { |
65 | u64 val; | 56 | u64 val; |
66 | 57 | ||
67 | read_pcr(val); | 58 | WARN_ON_ONCE(reg_num != 0); |
59 | __asm__ __volatile__("rd %%pcr, %0" : "=r" (val)); | ||
68 | return val; | 60 | return val; |
69 | } | 61 | } |
70 | 62 | ||
71 | static void direct_pcr_write(u64 val) | 63 | static void direct_pcr_write(unsigned long reg_num, u64 val) |
64 | { | ||
65 | WARN_ON_ONCE(reg_num != 0); | ||
66 | __asm__ __volatile__("wr %0, 0x0, %%pcr" : : "r" (val)); | ||
67 | } | ||
68 | |||
69 | static u64 direct_pic_read(unsigned long reg_num) | ||
72 | { | 70 | { |
73 | write_pcr(val); | 71 | u64 val; |
72 | |||
73 | WARN_ON_ONCE(reg_num != 0); | ||
74 | __asm__ __volatile__("rd %%pic, %0" : "=r" (val)); | ||
75 | return val; | ||
76 | } | ||
77 | |||
78 | static void direct_pic_write(unsigned long reg_num, u64 val) | ||
79 | { | ||
80 | WARN_ON_ONCE(reg_num != 0); | ||
81 | |||
82 | /* Blackbird errata workaround. See commentary in | ||
83 | * arch/sparc64/kernel/smp.c:smp_percpu_timer_interrupt() | ||
84 | * for more information. | ||
85 | */ | ||
86 | __asm__ __volatile__("ba,pt %%xcc, 99f\n\t" | ||
87 | " nop\n\t" | ||
88 | ".align 64\n" | ||
89 | "99:wr %0, 0x0, %%pic\n\t" | ||
90 | "rd %%pic, %%g0" : : "r" (val)); | ||
91 | } | ||
92 | |||
93 | static u64 direct_picl_value(unsigned int nmi_hz) | ||
94 | { | ||
95 | u32 delta = local_cpu_data().clock_tick / nmi_hz; | ||
96 | |||
97 | return ((u64)((0 - delta) & 0xffffffff)) << 32; | ||
74 | } | 98 | } |
75 | 99 | ||
76 | static const struct pcr_ops direct_pcr_ops = { | 100 | static const struct pcr_ops direct_pcr_ops = { |
77 | .read = direct_pcr_read, | 101 | .read_pcr = direct_pcr_read, |
78 | .write = direct_pcr_write, | 102 | .write_pcr = direct_pcr_write, |
103 | .read_pic = direct_pic_read, | ||
104 | .write_pic = direct_pic_write, | ||
105 | .nmi_picl_value = direct_picl_value, | ||
106 | .pcr_nmi_enable = (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE), | ||
107 | .pcr_nmi_disable = PCR_PIC_PRIV, | ||
79 | }; | 108 | }; |
80 | 109 | ||
81 | static void n2_pcr_write(u64 val) | 110 | static void n2_pcr_write(unsigned long reg_num, u64 val) |
82 | { | 111 | { |
83 | unsigned long ret; | 112 | unsigned long ret; |
84 | 113 | ||
114 | WARN_ON_ONCE(reg_num != 0); | ||
85 | if (val & PCR_N2_HTRACE) { | 115 | if (val & PCR_N2_HTRACE) { |
86 | ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val); | 116 | ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val); |
87 | if (ret != HV_EOK) | 117 | if (ret != HV_EOK) |
88 | write_pcr(val); | 118 | direct_pcr_write(reg_num, val); |
89 | } else | 119 | } else |
90 | write_pcr(val); | 120 | direct_pcr_write(reg_num, val); |
121 | } | ||
122 | |||
123 | static u64 n2_picl_value(unsigned int nmi_hz) | ||
124 | { | ||
125 | u32 delta = local_cpu_data().clock_tick / (nmi_hz << 2); | ||
126 | |||
127 | return ((u64)((0 - delta) & 0xffffffff)) << 32; | ||
91 | } | 128 | } |
92 | 129 | ||
93 | static const struct pcr_ops n2_pcr_ops = { | 130 | static const struct pcr_ops n2_pcr_ops = { |
94 | .read = direct_pcr_read, | 131 | .read_pcr = direct_pcr_read, |
95 | .write = n2_pcr_write, | 132 | .write_pcr = n2_pcr_write, |
133 | .read_pic = direct_pic_read, | ||
134 | .write_pic = direct_pic_write, | ||
135 | .nmi_picl_value = n2_picl_value, | ||
136 | .pcr_nmi_enable = (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE | | ||
137 | PCR_N2_TOE_OV1 | | ||
138 | (2 << PCR_N2_SL1_SHIFT) | | ||
139 | (0xff << PCR_N2_MASK1_SHIFT)), | ||
140 | .pcr_nmi_disable = PCR_PIC_PRIV, | ||
141 | }; | ||
142 | |||
143 | static u64 n4_pcr_read(unsigned long reg_num) | ||
144 | { | ||
145 | unsigned long val; | ||
146 | |||
147 | (void) sun4v_vt_get_perfreg(reg_num, &val); | ||
148 | |||
149 | return val; | ||
150 | } | ||
151 | |||
152 | static void n4_pcr_write(unsigned long reg_num, u64 val) | ||
153 | { | ||
154 | (void) sun4v_vt_set_perfreg(reg_num, val); | ||
155 | } | ||
156 | |||
157 | static u64 n4_pic_read(unsigned long reg_num) | ||
158 | { | ||
159 | unsigned long val; | ||
160 | |||
161 | __asm__ __volatile__("ldxa [%1] %2, %0" | ||
162 | : "=r" (val) | ||
163 | : "r" (reg_num * 0x8UL), "i" (ASI_PIC)); | ||
164 | |||
165 | return val; | ||
166 | } | ||
167 | |||
168 | static void n4_pic_write(unsigned long reg_num, u64 val) | ||
169 | { | ||
170 | __asm__ __volatile__("stxa %0, [%1] %2" | ||
171 | : /* no outputs */ | ||
172 | : "r" (val), "r" (reg_num * 0x8UL), "i" (ASI_PIC)); | ||
173 | } | ||
174 | |||
175 | static u64 n4_picl_value(unsigned int nmi_hz) | ||
176 | { | ||
177 | u32 delta = local_cpu_data().clock_tick / (nmi_hz << 2); | ||
178 | |||
179 | return ((u64)((0 - delta) & 0xffffffff)); | ||
180 | } | ||
181 | |||
182 | static const struct pcr_ops n4_pcr_ops = { | ||
183 | .read_pcr = n4_pcr_read, | ||
184 | .write_pcr = n4_pcr_write, | ||
185 | .read_pic = n4_pic_read, | ||
186 | .write_pic = n4_pic_write, | ||
187 | .nmi_picl_value = n4_picl_value, | ||
188 | .pcr_nmi_enable = (PCR_N4_PICNPT | PCR_N4_STRACE | | ||
189 | PCR_N4_UTRACE | PCR_N4_TOE | | ||
190 | (26 << PCR_N4_SL_SHIFT)), | ||
191 | .pcr_nmi_disable = PCR_N4_PICNPT, | ||
96 | }; | 192 | }; |
97 | 193 | ||
98 | static unsigned long perf_hsvc_group; | 194 | static unsigned long perf_hsvc_group; |
@@ -115,6 +211,10 @@ static int __init register_perf_hsvc(void) | |||
115 | perf_hsvc_group = HV_GRP_KT_CPU; | 211 | perf_hsvc_group = HV_GRP_KT_CPU; |
116 | break; | 212 | break; |
117 | 213 | ||
214 | case SUN4V_CHIP_NIAGARA4: | ||
215 | perf_hsvc_group = HV_GRP_VT_CPU; | ||
216 | break; | ||
217 | |||
118 | default: | 218 | default: |
119 | return -ENODEV; | 219 | return -ENODEV; |
120 | } | 220 | } |
@@ -139,6 +239,29 @@ static void __init unregister_perf_hsvc(void) | |||
139 | sun4v_hvapi_unregister(perf_hsvc_group); | 239 | sun4v_hvapi_unregister(perf_hsvc_group); |
140 | } | 240 | } |
141 | 241 | ||
242 | static int __init setup_sun4v_pcr_ops(void) | ||
243 | { | ||
244 | int ret = 0; | ||
245 | |||
246 | switch (sun4v_chip_type) { | ||
247 | case SUN4V_CHIP_NIAGARA1: | ||
248 | case SUN4V_CHIP_NIAGARA2: | ||
249 | case SUN4V_CHIP_NIAGARA3: | ||
250 | pcr_ops = &n2_pcr_ops; | ||
251 | break; | ||
252 | |||
253 | case SUN4V_CHIP_NIAGARA4: | ||
254 | pcr_ops = &n4_pcr_ops; | ||
255 | break; | ||
256 | |||
257 | default: | ||
258 | ret = -ENODEV; | ||
259 | break; | ||
260 | } | ||
261 | |||
262 | return ret; | ||
263 | } | ||
264 | |||
142 | int __init pcr_arch_init(void) | 265 | int __init pcr_arch_init(void) |
143 | { | 266 | { |
144 | int err = register_perf_hsvc(); | 267 | int err = register_perf_hsvc(); |
@@ -148,15 +271,14 @@ int __init pcr_arch_init(void) | |||
148 | 271 | ||
149 | switch (tlb_type) { | 272 | switch (tlb_type) { |
150 | case hypervisor: | 273 | case hypervisor: |
151 | pcr_ops = &n2_pcr_ops; | 274 | err = setup_sun4v_pcr_ops(); |
152 | pcr_enable = PCR_N2_ENABLE; | 275 | if (err) |
153 | picl_shift = 2; | 276 | goto out_unregister; |
154 | break; | 277 | break; |
155 | 278 | ||
156 | case cheetah: | 279 | case cheetah: |
157 | case cheetah_plus: | 280 | case cheetah_plus: |
158 | pcr_ops = &direct_pcr_ops; | 281 | pcr_ops = &direct_pcr_ops; |
159 | pcr_enable = PCR_SUN4U_ENABLE; | ||
160 | break; | 282 | break; |
161 | 283 | ||
162 | case spitfire: | 284 | case spitfire: |
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 5713957dcb8a..e48651dace1b 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c | |||
@@ -25,36 +25,48 @@ | |||
25 | #include <linux/atomic.h> | 25 | #include <linux/atomic.h> |
26 | #include <asm/nmi.h> | 26 | #include <asm/nmi.h> |
27 | #include <asm/pcr.h> | 27 | #include <asm/pcr.h> |
28 | #include <asm/perfctr.h> | ||
29 | #include <asm/cacheflush.h> | 28 | #include <asm/cacheflush.h> |
30 | 29 | ||
31 | #include "kernel.h" | 30 | #include "kernel.h" |
32 | #include "kstack.h" | 31 | #include "kstack.h" |
33 | 32 | ||
34 | /* Sparc64 chips have two performance counters, 32-bits each, with | 33 | /* Two classes of sparc64 chips currently exist. All of which have |
35 | * overflow interrupts generated on transition from 0xffffffff to 0. | 34 | * 32-bit counters which can generate overflow interrupts on the |
36 | * The counters are accessed in one go using a 64-bit register. | 35 | * transition from 0xffffffff to 0. |
37 | * | 36 | * |
38 | * Both counters are controlled using a single control register. The | 37 | * All chips upto and including SPARC-T3 have two performance |
39 | * only way to stop all sampling is to clear all of the context (user, | 38 | * counters. The two 32-bit counters are accessed in one go using a |
40 | * supervisor, hypervisor) sampling enable bits. But these bits apply | 39 | * single 64-bit register. |
41 | * to both counters, thus the two counters can't be enabled/disabled | ||
42 | * individually. | ||
43 | * | 40 | * |
44 | * The control register has two event fields, one for each of the two | 41 | * On these older chips both counters are controlled using a single |
45 | * counters. It's thus nearly impossible to have one counter going | 42 | * control register. The only way to stop all sampling is to clear |
46 | * while keeping the other one stopped. Therefore it is possible to | 43 | * all of the context (user, supervisor, hypervisor) sampling enable |
47 | * get overflow interrupts for counters not currently "in use" and | 44 | * bits. But these bits apply to both counters, thus the two counters |
48 | * that condition must be checked in the overflow interrupt handler. | 45 | * can't be enabled/disabled individually. |
46 | * | ||
47 | * Furthermore, the control register on these older chips have two | ||
48 | * event fields, one for each of the two counters. It's thus nearly | ||
49 | * impossible to have one counter going while keeping the other one | ||
50 | * stopped. Therefore it is possible to get overflow interrupts for | ||
51 | * counters not currently "in use" and that condition must be checked | ||
52 | * in the overflow interrupt handler. | ||
49 | * | 53 | * |
50 | * So we use a hack, in that we program inactive counters with the | 54 | * So we use a hack, in that we program inactive counters with the |
51 | * "sw_count0" and "sw_count1" events. These count how many times | 55 | * "sw_count0" and "sw_count1" events. These count how many times |
52 | * the instruction "sethi %hi(0xfc000), %g0" is executed. It's an | 56 | * the instruction "sethi %hi(0xfc000), %g0" is executed. It's an |
53 | * unusual way to encode a NOP and therefore will not trigger in | 57 | * unusual way to encode a NOP and therefore will not trigger in |
54 | * normal code. | 58 | * normal code. |
59 | * | ||
60 | * Starting with SPARC-T4 we have one control register per counter. | ||
61 | * And the counters are stored in individual registers. The registers | ||
62 | * for the counters are 64-bit but only a 32-bit counter is | ||
63 | * implemented. The event selections on SPARC-T4 lack any | ||
64 | * restrictions, therefore we can elide all of the complicated | ||
65 | * conflict resolution code we have for SPARC-T3 and earlier chips. | ||
55 | */ | 66 | */ |
56 | 67 | ||
57 | #define MAX_HWEVENTS 2 | 68 | #define MAX_HWEVENTS 4 |
69 | #define MAX_PCRS 4 | ||
58 | #define MAX_PERIOD ((1UL << 32) - 1) | 70 | #define MAX_PERIOD ((1UL << 32) - 1) |
59 | 71 | ||
60 | #define PIC_UPPER_INDEX 0 | 72 | #define PIC_UPPER_INDEX 0 |
@@ -90,8 +102,8 @@ struct cpu_hw_events { | |||
90 | */ | 102 | */ |
91 | int current_idx[MAX_HWEVENTS]; | 103 | int current_idx[MAX_HWEVENTS]; |
92 | 104 | ||
93 | /* Software copy of %pcr register on this cpu. */ | 105 | /* Software copy of %pcr register(s) on this cpu. */ |
94 | u64 pcr; | 106 | u64 pcr[MAX_HWEVENTS]; |
95 | 107 | ||
96 | /* Enabled/disable state. */ | 108 | /* Enabled/disable state. */ |
97 | int enabled; | 109 | int enabled; |
@@ -103,6 +115,8 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; | |||
103 | /* An event map describes the characteristics of a performance | 115 | /* An event map describes the characteristics of a performance |
104 | * counter event. In particular it gives the encoding as well as | 116 | * counter event. In particular it gives the encoding as well as |
105 | * a mask telling which counters the event can be measured on. | 117 | * a mask telling which counters the event can be measured on. |
118 | * | ||
119 | * The mask is unused on SPARC-T4 and later. | ||
106 | */ | 120 | */ |
107 | struct perf_event_map { | 121 | struct perf_event_map { |
108 | u16 encoding; | 122 | u16 encoding; |
@@ -142,15 +156,53 @@ struct sparc_pmu { | |||
142 | const struct perf_event_map *(*event_map)(int); | 156 | const struct perf_event_map *(*event_map)(int); |
143 | const cache_map_t *cache_map; | 157 | const cache_map_t *cache_map; |
144 | int max_events; | 158 | int max_events; |
159 | u32 (*read_pmc)(int); | ||
160 | void (*write_pmc)(int, u64); | ||
145 | int upper_shift; | 161 | int upper_shift; |
146 | int lower_shift; | 162 | int lower_shift; |
147 | int event_mask; | 163 | int event_mask; |
164 | int user_bit; | ||
165 | int priv_bit; | ||
148 | int hv_bit; | 166 | int hv_bit; |
149 | int irq_bit; | 167 | int irq_bit; |
150 | int upper_nop; | 168 | int upper_nop; |
151 | int lower_nop; | 169 | int lower_nop; |
170 | unsigned int flags; | ||
171 | #define SPARC_PMU_ALL_EXCLUDES_SAME 0x00000001 | ||
172 | #define SPARC_PMU_HAS_CONFLICTS 0x00000002 | ||
173 | int max_hw_events; | ||
174 | int num_pcrs; | ||
175 | int num_pic_regs; | ||
152 | }; | 176 | }; |
153 | 177 | ||
178 | static u32 sparc_default_read_pmc(int idx) | ||
179 | { | ||
180 | u64 val; | ||
181 | |||
182 | val = pcr_ops->read_pic(0); | ||
183 | if (idx == PIC_UPPER_INDEX) | ||
184 | val >>= 32; | ||
185 | |||
186 | return val & 0xffffffff; | ||
187 | } | ||
188 | |||
189 | static void sparc_default_write_pmc(int idx, u64 val) | ||
190 | { | ||
191 | u64 shift, mask, pic; | ||
192 | |||
193 | shift = 0; | ||
194 | if (idx == PIC_UPPER_INDEX) | ||
195 | shift = 32; | ||
196 | |||
197 | mask = ((u64) 0xffffffff) << shift; | ||
198 | val <<= shift; | ||
199 | |||
200 | pic = pcr_ops->read_pic(0); | ||
201 | pic &= ~mask; | ||
202 | pic |= val; | ||
203 | pcr_ops->write_pic(0, pic); | ||
204 | } | ||
205 | |||
154 | static const struct perf_event_map ultra3_perfmon_event_map[] = { | 206 | static const struct perf_event_map ultra3_perfmon_event_map[] = { |
155 | [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER }, | 207 | [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER }, |
156 | [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER }, | 208 | [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER }, |
@@ -268,11 +320,20 @@ static const struct sparc_pmu ultra3_pmu = { | |||
268 | .event_map = ultra3_event_map, | 320 | .event_map = ultra3_event_map, |
269 | .cache_map = &ultra3_cache_map, | 321 | .cache_map = &ultra3_cache_map, |
270 | .max_events = ARRAY_SIZE(ultra3_perfmon_event_map), | 322 | .max_events = ARRAY_SIZE(ultra3_perfmon_event_map), |
323 | .read_pmc = sparc_default_read_pmc, | ||
324 | .write_pmc = sparc_default_write_pmc, | ||
271 | .upper_shift = 11, | 325 | .upper_shift = 11, |
272 | .lower_shift = 4, | 326 | .lower_shift = 4, |
273 | .event_mask = 0x3f, | 327 | .event_mask = 0x3f, |
328 | .user_bit = PCR_UTRACE, | ||
329 | .priv_bit = PCR_STRACE, | ||
274 | .upper_nop = 0x1c, | 330 | .upper_nop = 0x1c, |
275 | .lower_nop = 0x14, | 331 | .lower_nop = 0x14, |
332 | .flags = (SPARC_PMU_ALL_EXCLUDES_SAME | | ||
333 | SPARC_PMU_HAS_CONFLICTS), | ||
334 | .max_hw_events = 2, | ||
335 | .num_pcrs = 1, | ||
336 | .num_pic_regs = 1, | ||
276 | }; | 337 | }; |
277 | 338 | ||
278 | /* Niagara1 is very limited. The upper PIC is hard-locked to count | 339 | /* Niagara1 is very limited. The upper PIC is hard-locked to count |
@@ -397,11 +458,20 @@ static const struct sparc_pmu niagara1_pmu = { | |||
397 | .event_map = niagara1_event_map, | 458 | .event_map = niagara1_event_map, |
398 | .cache_map = &niagara1_cache_map, | 459 | .cache_map = &niagara1_cache_map, |
399 | .max_events = ARRAY_SIZE(niagara1_perfmon_event_map), | 460 | .max_events = ARRAY_SIZE(niagara1_perfmon_event_map), |
461 | .read_pmc = sparc_default_read_pmc, | ||
462 | .write_pmc = sparc_default_write_pmc, | ||
400 | .upper_shift = 0, | 463 | .upper_shift = 0, |
401 | .lower_shift = 4, | 464 | .lower_shift = 4, |
402 | .event_mask = 0x7, | 465 | .event_mask = 0x7, |
466 | .user_bit = PCR_UTRACE, | ||
467 | .priv_bit = PCR_STRACE, | ||
403 | .upper_nop = 0x0, | 468 | .upper_nop = 0x0, |
404 | .lower_nop = 0x0, | 469 | .lower_nop = 0x0, |
470 | .flags = (SPARC_PMU_ALL_EXCLUDES_SAME | | ||
471 | SPARC_PMU_HAS_CONFLICTS), | ||
472 | .max_hw_events = 2, | ||
473 | .num_pcrs = 1, | ||
474 | .num_pic_regs = 1, | ||
405 | }; | 475 | }; |
406 | 476 | ||
407 | static const struct perf_event_map niagara2_perfmon_event_map[] = { | 477 | static const struct perf_event_map niagara2_perfmon_event_map[] = { |
@@ -523,13 +593,203 @@ static const struct sparc_pmu niagara2_pmu = { | |||
523 | .event_map = niagara2_event_map, | 593 | .event_map = niagara2_event_map, |
524 | .cache_map = &niagara2_cache_map, | 594 | .cache_map = &niagara2_cache_map, |
525 | .max_events = ARRAY_SIZE(niagara2_perfmon_event_map), | 595 | .max_events = ARRAY_SIZE(niagara2_perfmon_event_map), |
596 | .read_pmc = sparc_default_read_pmc, | ||
597 | .write_pmc = sparc_default_write_pmc, | ||
526 | .upper_shift = 19, | 598 | .upper_shift = 19, |
527 | .lower_shift = 6, | 599 | .lower_shift = 6, |
528 | .event_mask = 0xfff, | 600 | .event_mask = 0xfff, |
529 | .hv_bit = 0x8, | 601 | .user_bit = PCR_UTRACE, |
602 | .priv_bit = PCR_STRACE, | ||
603 | .hv_bit = PCR_N2_HTRACE, | ||
530 | .irq_bit = 0x30, | 604 | .irq_bit = 0x30, |
531 | .upper_nop = 0x220, | 605 | .upper_nop = 0x220, |
532 | .lower_nop = 0x220, | 606 | .lower_nop = 0x220, |
607 | .flags = (SPARC_PMU_ALL_EXCLUDES_SAME | | ||
608 | SPARC_PMU_HAS_CONFLICTS), | ||
609 | .max_hw_events = 2, | ||
610 | .num_pcrs = 1, | ||
611 | .num_pic_regs = 1, | ||
612 | }; | ||
613 | |||
614 | static const struct perf_event_map niagara4_perfmon_event_map[] = { | ||
615 | [PERF_COUNT_HW_CPU_CYCLES] = { (26 << 6) }, | ||
616 | [PERF_COUNT_HW_INSTRUCTIONS] = { (3 << 6) | 0x3f }, | ||
617 | [PERF_COUNT_HW_CACHE_REFERENCES] = { (3 << 6) | 0x04 }, | ||
618 | [PERF_COUNT_HW_CACHE_MISSES] = { (16 << 6) | 0x07 }, | ||
619 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { (4 << 6) | 0x01 }, | ||
620 | [PERF_COUNT_HW_BRANCH_MISSES] = { (25 << 6) | 0x0f }, | ||
621 | }; | ||
622 | |||
623 | static const struct perf_event_map *niagara4_event_map(int event_id) | ||
624 | { | ||
625 | return &niagara4_perfmon_event_map[event_id]; | ||
626 | } | ||
627 | |||
628 | static const cache_map_t niagara4_cache_map = { | ||
629 | [C(L1D)] = { | ||
630 | [C(OP_READ)] = { | ||
631 | [C(RESULT_ACCESS)] = { (3 << 6) | 0x04 }, | ||
632 | [C(RESULT_MISS)] = { (16 << 6) | 0x07 }, | ||
633 | }, | ||
634 | [C(OP_WRITE)] = { | ||
635 | [C(RESULT_ACCESS)] = { (3 << 6) | 0x08 }, | ||
636 | [C(RESULT_MISS)] = { (16 << 6) | 0x07 }, | ||
637 | }, | ||
638 | [C(OP_PREFETCH)] = { | ||
639 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
640 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, | ||
641 | }, | ||
642 | }, | ||
643 | [C(L1I)] = { | ||
644 | [C(OP_READ)] = { | ||
645 | [C(RESULT_ACCESS)] = { (3 << 6) | 0x3f }, | ||
646 | [C(RESULT_MISS)] = { (11 << 6) | 0x03 }, | ||
647 | }, | ||
648 | [ C(OP_WRITE) ] = { | ||
649 | [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, | ||
650 | [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE }, | ||
651 | }, | ||
652 | [ C(OP_PREFETCH) ] = { | ||
653 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
654 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
655 | }, | ||
656 | }, | ||
657 | [C(LL)] = { | ||
658 | [C(OP_READ)] = { | ||
659 | [C(RESULT_ACCESS)] = { (3 << 6) | 0x04 }, | ||
660 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, | ||
661 | }, | ||
662 | [C(OP_WRITE)] = { | ||
663 | [C(RESULT_ACCESS)] = { (3 << 6) | 0x08 }, | ||
664 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, | ||
665 | }, | ||
666 | [C(OP_PREFETCH)] = { | ||
667 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
668 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, | ||
669 | }, | ||
670 | }, | ||
671 | [C(DTLB)] = { | ||
672 | [C(OP_READ)] = { | ||
673 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
674 | [C(RESULT_MISS)] = { (17 << 6) | 0x3f }, | ||
675 | }, | ||
676 | [ C(OP_WRITE) ] = { | ||
677 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
678 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
679 | }, | ||
680 | [ C(OP_PREFETCH) ] = { | ||
681 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
682 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
683 | }, | ||
684 | }, | ||
685 | [C(ITLB)] = { | ||
686 | [C(OP_READ)] = { | ||
687 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
688 | [C(RESULT_MISS)] = { (6 << 6) | 0x3f }, | ||
689 | }, | ||
690 | [ C(OP_WRITE) ] = { | ||
691 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
692 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
693 | }, | ||
694 | [ C(OP_PREFETCH) ] = { | ||
695 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
696 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
697 | }, | ||
698 | }, | ||
699 | [C(BPU)] = { | ||
700 | [C(OP_READ)] = { | ||
701 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
702 | [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, | ||
703 | }, | ||
704 | [ C(OP_WRITE) ] = { | ||
705 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
706 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
707 | }, | ||
708 | [ C(OP_PREFETCH) ] = { | ||
709 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
710 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
711 | }, | ||
712 | }, | ||
713 | [C(NODE)] = { | ||
714 | [C(OP_READ)] = { | ||
715 | [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, | ||
716 | [C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
717 | }, | ||
718 | [ C(OP_WRITE) ] = { | ||
719 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
720 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
721 | }, | ||
722 | [ C(OP_PREFETCH) ] = { | ||
723 | [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, | ||
724 | [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, | ||
725 | }, | ||
726 | }, | ||
727 | }; | ||
728 | |||
729 | static u32 sparc_vt_read_pmc(int idx) | ||
730 | { | ||
731 | u64 val = pcr_ops->read_pic(idx); | ||
732 | |||
733 | return val & 0xffffffff; | ||
734 | } | ||
735 | |||
736 | static void sparc_vt_write_pmc(int idx, u64 val) | ||
737 | { | ||
738 | u64 pcr; | ||
739 | |||
740 | /* There seems to be an internal latch on the overflow event | ||
741 | * on SPARC-T4 that prevents it from triggering unless you | ||
742 | * update the PIC exactly as we do here. The requirement | ||
743 | * seems to be that you have to turn off event counting in the | ||
744 | * PCR around the PIC update. | ||
745 | * | ||
746 | * For example, after the following sequence: | ||
747 | * | ||
748 | * 1) set PIC to -1 | ||
749 | * 2) enable event counting and overflow reporting in PCR | ||
750 | * 3) overflow triggers, softint 15 handler invoked | ||
751 | * 4) clear OV bit in PCR | ||
752 | * 5) write PIC to -1 | ||
753 | * | ||
754 | * a subsequent overflow event will not trigger. This | ||
755 | * sequence works on SPARC-T3 and previous chips. | ||
756 | */ | ||
757 | pcr = pcr_ops->read_pcr(idx); | ||
758 | pcr_ops->write_pcr(idx, PCR_N4_PICNPT); | ||
759 | |||
760 | pcr_ops->write_pic(idx, val & 0xffffffff); | ||
761 | |||
762 | pcr_ops->write_pcr(idx, pcr); | ||
763 | } | ||
764 | |||
765 | static const struct sparc_pmu niagara4_pmu = { | ||
766 | .event_map = niagara4_event_map, | ||
767 | .cache_map = &niagara4_cache_map, | ||
768 | .max_events = ARRAY_SIZE(niagara4_perfmon_event_map), | ||
769 | .read_pmc = sparc_vt_read_pmc, | ||
770 | .write_pmc = sparc_vt_write_pmc, | ||
771 | .upper_shift = 5, | ||
772 | .lower_shift = 5, | ||
773 | .event_mask = 0x7ff, | ||
774 | .user_bit = PCR_N4_UTRACE, | ||
775 | .priv_bit = PCR_N4_STRACE, | ||
776 | |||
777 | /* We explicitly don't support hypervisor tracing. The T4 | ||
778 | * generates the overflow event for precise events via a trap | ||
779 | * which will not be generated (ie. it's completely lost) if | ||
780 | * we happen to be in the hypervisor when the event triggers. | ||
781 | * Essentially, the overflow event reporting is completely | ||
782 | * unusable when you have hypervisor mode tracing enabled. | ||
783 | */ | ||
784 | .hv_bit = 0, | ||
785 | |||
786 | .irq_bit = PCR_N4_TOE, | ||
787 | .upper_nop = 0, | ||
788 | .lower_nop = 0, | ||
789 | .flags = 0, | ||
790 | .max_hw_events = 4, | ||
791 | .num_pcrs = 4, | ||
792 | .num_pic_regs = 4, | ||
533 | }; | 793 | }; |
534 | 794 | ||
535 | static const struct sparc_pmu *sparc_pmu __read_mostly; | 795 | static const struct sparc_pmu *sparc_pmu __read_mostly; |
@@ -558,55 +818,35 @@ static u64 nop_for_index(int idx) | |||
558 | static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) | 818 | static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) |
559 | { | 819 | { |
560 | u64 val, mask = mask_for_index(idx); | 820 | u64 val, mask = mask_for_index(idx); |
821 | int pcr_index = 0; | ||
561 | 822 | ||
562 | val = cpuc->pcr; | 823 | if (sparc_pmu->num_pcrs > 1) |
824 | pcr_index = idx; | ||
825 | |||
826 | val = cpuc->pcr[pcr_index]; | ||
563 | val &= ~mask; | 827 | val &= ~mask; |
564 | val |= hwc->config; | 828 | val |= hwc->config; |
565 | cpuc->pcr = val; | 829 | cpuc->pcr[pcr_index] = val; |
566 | 830 | ||
567 | pcr_ops->write(cpuc->pcr); | 831 | pcr_ops->write_pcr(pcr_index, cpuc->pcr[pcr_index]); |
568 | } | 832 | } |
569 | 833 | ||
570 | static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) | 834 | static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx) |
571 | { | 835 | { |
572 | u64 mask = mask_for_index(idx); | 836 | u64 mask = mask_for_index(idx); |
573 | u64 nop = nop_for_index(idx); | 837 | u64 nop = nop_for_index(idx); |
838 | int pcr_index = 0; | ||
574 | u64 val; | 839 | u64 val; |
575 | 840 | ||
576 | val = cpuc->pcr; | 841 | if (sparc_pmu->num_pcrs > 1) |
842 | pcr_index = idx; | ||
843 | |||
844 | val = cpuc->pcr[pcr_index]; | ||
577 | val &= ~mask; | 845 | val &= ~mask; |
578 | val |= nop; | 846 | val |= nop; |
579 | cpuc->pcr = val; | 847 | cpuc->pcr[pcr_index] = val; |
580 | 848 | ||
581 | pcr_ops->write(cpuc->pcr); | 849 | pcr_ops->write_pcr(pcr_index, cpuc->pcr[pcr_index]); |
582 | } | ||
583 | |||
584 | static u32 read_pmc(int idx) | ||
585 | { | ||
586 | u64 val; | ||
587 | |||
588 | read_pic(val); | ||
589 | if (idx == PIC_UPPER_INDEX) | ||
590 | val >>= 32; | ||
591 | |||
592 | return val & 0xffffffff; | ||
593 | } | ||
594 | |||
595 | static void write_pmc(int idx, u64 val) | ||
596 | { | ||
597 | u64 shift, mask, pic; | ||
598 | |||
599 | shift = 0; | ||
600 | if (idx == PIC_UPPER_INDEX) | ||
601 | shift = 32; | ||
602 | |||
603 | mask = ((u64) 0xffffffff) << shift; | ||
604 | val <<= shift; | ||
605 | |||
606 | read_pic(pic); | ||
607 | pic &= ~mask; | ||
608 | pic |= val; | ||
609 | write_pic(pic); | ||
610 | } | 850 | } |
611 | 851 | ||
612 | static u64 sparc_perf_event_update(struct perf_event *event, | 852 | static u64 sparc_perf_event_update(struct perf_event *event, |
@@ -618,7 +858,7 @@ static u64 sparc_perf_event_update(struct perf_event *event, | |||
618 | 858 | ||
619 | again: | 859 | again: |
620 | prev_raw_count = local64_read(&hwc->prev_count); | 860 | prev_raw_count = local64_read(&hwc->prev_count); |
621 | new_raw_count = read_pmc(idx); | 861 | new_raw_count = sparc_pmu->read_pmc(idx); |
622 | 862 | ||
623 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, | 863 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, |
624 | new_raw_count) != prev_raw_count) | 864 | new_raw_count) != prev_raw_count) |
@@ -658,25 +898,17 @@ static int sparc_perf_event_set_period(struct perf_event *event, | |||
658 | 898 | ||
659 | local64_set(&hwc->prev_count, (u64)-left); | 899 | local64_set(&hwc->prev_count, (u64)-left); |
660 | 900 | ||
661 | write_pmc(idx, (u64)(-left) & 0xffffffff); | 901 | sparc_pmu->write_pmc(idx, (u64)(-left) & 0xffffffff); |
662 | 902 | ||
663 | perf_event_update_userpage(event); | 903 | perf_event_update_userpage(event); |
664 | 904 | ||
665 | return ret; | 905 | return ret; |
666 | } | 906 | } |
667 | 907 | ||
668 | /* If performance event entries have been added, move existing | 908 | static void read_in_all_counters(struct cpu_hw_events *cpuc) |
669 | * events around (if necessary) and then assign new entries to | ||
670 | * counters. | ||
671 | */ | ||
672 | static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr) | ||
673 | { | 909 | { |
674 | int i; | 910 | int i; |
675 | 911 | ||
676 | if (!cpuc->n_added) | ||
677 | goto out; | ||
678 | |||
679 | /* Read in the counters which are moving. */ | ||
680 | for (i = 0; i < cpuc->n_events; i++) { | 912 | for (i = 0; i < cpuc->n_events; i++) { |
681 | struct perf_event *cp = cpuc->event[i]; | 913 | struct perf_event *cp = cpuc->event[i]; |
682 | 914 | ||
@@ -687,6 +919,20 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr) | |||
687 | cpuc->current_idx[i] = PIC_NO_INDEX; | 919 | cpuc->current_idx[i] = PIC_NO_INDEX; |
688 | } | 920 | } |
689 | } | 921 | } |
922 | } | ||
923 | |||
924 | /* On this PMU all PICs are programmed using a single PCR. Calculate | ||
925 | * the combined control register value. | ||
926 | * | ||
927 | * For such chips we require that all of the events have the same | ||
928 | * configuration, so just fetch the settings from the first entry. | ||
929 | */ | ||
930 | static void calculate_single_pcr(struct cpu_hw_events *cpuc) | ||
931 | { | ||
932 | int i; | ||
933 | |||
934 | if (!cpuc->n_added) | ||
935 | goto out; | ||
690 | 936 | ||
691 | /* Assign to counters all unassigned events. */ | 937 | /* Assign to counters all unassigned events. */ |
692 | for (i = 0; i < cpuc->n_events; i++) { | 938 | for (i = 0; i < cpuc->n_events; i++) { |
@@ -702,20 +948,71 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr) | |||
702 | cpuc->current_idx[i] = idx; | 948 | cpuc->current_idx[i] = idx; |
703 | 949 | ||
704 | enc = perf_event_get_enc(cpuc->events[i]); | 950 | enc = perf_event_get_enc(cpuc->events[i]); |
705 | pcr &= ~mask_for_index(idx); | 951 | cpuc->pcr[0] &= ~mask_for_index(idx); |
706 | if (hwc->state & PERF_HES_STOPPED) | 952 | if (hwc->state & PERF_HES_STOPPED) |
707 | pcr |= nop_for_index(idx); | 953 | cpuc->pcr[0] |= nop_for_index(idx); |
708 | else | 954 | else |
709 | pcr |= event_encoding(enc, idx); | 955 | cpuc->pcr[0] |= event_encoding(enc, idx); |
710 | } | 956 | } |
711 | out: | 957 | out: |
712 | return pcr; | 958 | cpuc->pcr[0] |= cpuc->event[0]->hw.config_base; |
959 | } | ||
960 | |||
961 | /* On this PMU each PIC has it's own PCR control register. */ | ||
962 | static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc) | ||
963 | { | ||
964 | int i; | ||
965 | |||
966 | if (!cpuc->n_added) | ||
967 | goto out; | ||
968 | |||
969 | for (i = 0; i < cpuc->n_events; i++) { | ||
970 | struct perf_event *cp = cpuc->event[i]; | ||
971 | struct hw_perf_event *hwc = &cp->hw; | ||
972 | int idx = hwc->idx; | ||
973 | u64 enc; | ||
974 | |||
975 | if (cpuc->current_idx[i] != PIC_NO_INDEX) | ||
976 | continue; | ||
977 | |||
978 | sparc_perf_event_set_period(cp, hwc, idx); | ||
979 | cpuc->current_idx[i] = idx; | ||
980 | |||
981 | enc = perf_event_get_enc(cpuc->events[i]); | ||
982 | cpuc->pcr[idx] &= ~mask_for_index(idx); | ||
983 | if (hwc->state & PERF_HES_STOPPED) | ||
984 | cpuc->pcr[idx] |= nop_for_index(idx); | ||
985 | else | ||
986 | cpuc->pcr[idx] |= event_encoding(enc, idx); | ||
987 | } | ||
988 | out: | ||
989 | for (i = 0; i < cpuc->n_events; i++) { | ||
990 | struct perf_event *cp = cpuc->event[i]; | ||
991 | int idx = cp->hw.idx; | ||
992 | |||
993 | cpuc->pcr[idx] |= cp->hw.config_base; | ||
994 | } | ||
995 | } | ||
996 | |||
997 | /* If performance event entries have been added, move existing events | ||
998 | * around (if necessary) and then assign new entries to counters. | ||
999 | */ | ||
1000 | static void update_pcrs_for_enable(struct cpu_hw_events *cpuc) | ||
1001 | { | ||
1002 | if (cpuc->n_added) | ||
1003 | read_in_all_counters(cpuc); | ||
1004 | |||
1005 | if (sparc_pmu->num_pcrs == 1) { | ||
1006 | calculate_single_pcr(cpuc); | ||
1007 | } else { | ||
1008 | calculate_multiple_pcrs(cpuc); | ||
1009 | } | ||
713 | } | 1010 | } |
714 | 1011 | ||
715 | static void sparc_pmu_enable(struct pmu *pmu) | 1012 | static void sparc_pmu_enable(struct pmu *pmu) |
716 | { | 1013 | { |
717 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1014 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
718 | u64 pcr; | 1015 | int i; |
719 | 1016 | ||
720 | if (cpuc->enabled) | 1017 | if (cpuc->enabled) |
721 | return; | 1018 | return; |
@@ -723,26 +1020,17 @@ static void sparc_pmu_enable(struct pmu *pmu) | |||
723 | cpuc->enabled = 1; | 1020 | cpuc->enabled = 1; |
724 | barrier(); | 1021 | barrier(); |
725 | 1022 | ||
726 | pcr = cpuc->pcr; | 1023 | if (cpuc->n_events) |
727 | if (!cpuc->n_events) { | 1024 | update_pcrs_for_enable(cpuc); |
728 | pcr = 0; | ||
729 | } else { | ||
730 | pcr = maybe_change_configuration(cpuc, pcr); | ||
731 | |||
732 | /* We require that all of the events have the same | ||
733 | * configuration, so just fetch the settings from the | ||
734 | * first entry. | ||
735 | */ | ||
736 | cpuc->pcr = pcr | cpuc->event[0]->hw.config_base; | ||
737 | } | ||
738 | 1025 | ||
739 | pcr_ops->write(cpuc->pcr); | 1026 | for (i = 0; i < sparc_pmu->num_pcrs; i++) |
1027 | pcr_ops->write_pcr(i, cpuc->pcr[i]); | ||
740 | } | 1028 | } |
741 | 1029 | ||
742 | static void sparc_pmu_disable(struct pmu *pmu) | 1030 | static void sparc_pmu_disable(struct pmu *pmu) |
743 | { | 1031 | { |
744 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1032 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
745 | u64 val; | 1033 | int i; |
746 | 1034 | ||
747 | if (!cpuc->enabled) | 1035 | if (!cpuc->enabled) |
748 | return; | 1036 | return; |
@@ -750,12 +1038,14 @@ static void sparc_pmu_disable(struct pmu *pmu) | |||
750 | cpuc->enabled = 0; | 1038 | cpuc->enabled = 0; |
751 | cpuc->n_added = 0; | 1039 | cpuc->n_added = 0; |
752 | 1040 | ||
753 | val = cpuc->pcr; | 1041 | for (i = 0; i < sparc_pmu->num_pcrs; i++) { |
754 | val &= ~(PCR_UTRACE | PCR_STRACE | | 1042 | u64 val = cpuc->pcr[i]; |
755 | sparc_pmu->hv_bit | sparc_pmu->irq_bit); | ||
756 | cpuc->pcr = val; | ||
757 | 1043 | ||
758 | pcr_ops->write(cpuc->pcr); | 1044 | val &= ~(sparc_pmu->user_bit | sparc_pmu->priv_bit | |
1045 | sparc_pmu->hv_bit | sparc_pmu->irq_bit); | ||
1046 | cpuc->pcr[i] = val; | ||
1047 | pcr_ops->write_pcr(i, cpuc->pcr[i]); | ||
1048 | } | ||
759 | } | 1049 | } |
760 | 1050 | ||
761 | static int active_event_index(struct cpu_hw_events *cpuc, | 1051 | static int active_event_index(struct cpu_hw_events *cpuc, |
@@ -854,9 +1144,11 @@ static DEFINE_MUTEX(pmc_grab_mutex); | |||
854 | static void perf_stop_nmi_watchdog(void *unused) | 1144 | static void perf_stop_nmi_watchdog(void *unused) |
855 | { | 1145 | { |
856 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1146 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1147 | int i; | ||
857 | 1148 | ||
858 | stop_nmi_watchdog(NULL); | 1149 | stop_nmi_watchdog(NULL); |
859 | cpuc->pcr = pcr_ops->read(); | 1150 | for (i = 0; i < sparc_pmu->num_pcrs; i++) |
1151 | cpuc->pcr[i] = pcr_ops->read_pcr(i); | ||
860 | } | 1152 | } |
861 | 1153 | ||
862 | void perf_event_grab_pmc(void) | 1154 | void perf_event_grab_pmc(void) |
@@ -942,9 +1234,17 @@ static int sparc_check_constraints(struct perf_event **evts, | |||
942 | if (!n_ev) | 1234 | if (!n_ev) |
943 | return 0; | 1235 | return 0; |
944 | 1236 | ||
945 | if (n_ev > MAX_HWEVENTS) | 1237 | if (n_ev > sparc_pmu->max_hw_events) |
946 | return -1; | 1238 | return -1; |
947 | 1239 | ||
1240 | if (!(sparc_pmu->flags & SPARC_PMU_HAS_CONFLICTS)) { | ||
1241 | int i; | ||
1242 | |||
1243 | for (i = 0; i < n_ev; i++) | ||
1244 | evts[i]->hw.idx = i; | ||
1245 | return 0; | ||
1246 | } | ||
1247 | |||
948 | msk0 = perf_event_get_msk(events[0]); | 1248 | msk0 = perf_event_get_msk(events[0]); |
949 | if (n_ev == 1) { | 1249 | if (n_ev == 1) { |
950 | if (msk0 & PIC_LOWER) | 1250 | if (msk0 & PIC_LOWER) |
@@ -1000,6 +1300,9 @@ static int check_excludes(struct perf_event **evts, int n_prev, int n_new) | |||
1000 | struct perf_event *event; | 1300 | struct perf_event *event; |
1001 | int i, n, first; | 1301 | int i, n, first; |
1002 | 1302 | ||
1303 | if (!(sparc_pmu->flags & SPARC_PMU_ALL_EXCLUDES_SAME)) | ||
1304 | return 0; | ||
1305 | |||
1003 | n = n_prev + n_new; | 1306 | n = n_prev + n_new; |
1004 | if (n <= 1) | 1307 | if (n <= 1) |
1005 | return 0; | 1308 | return 0; |
@@ -1059,7 +1362,7 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags) | |||
1059 | perf_pmu_disable(event->pmu); | 1362 | perf_pmu_disable(event->pmu); |
1060 | 1363 | ||
1061 | n0 = cpuc->n_events; | 1364 | n0 = cpuc->n_events; |
1062 | if (n0 >= MAX_HWEVENTS) | 1365 | if (n0 >= sparc_pmu->max_hw_events) |
1063 | goto out; | 1366 | goto out; |
1064 | 1367 | ||
1065 | cpuc->event[n0] = event; | 1368 | cpuc->event[n0] = event; |
@@ -1146,16 +1449,16 @@ static int sparc_pmu_event_init(struct perf_event *event) | |||
1146 | /* We save the enable bits in the config_base. */ | 1449 | /* We save the enable bits in the config_base. */ |
1147 | hwc->config_base = sparc_pmu->irq_bit; | 1450 | hwc->config_base = sparc_pmu->irq_bit; |
1148 | if (!attr->exclude_user) | 1451 | if (!attr->exclude_user) |
1149 | hwc->config_base |= PCR_UTRACE; | 1452 | hwc->config_base |= sparc_pmu->user_bit; |
1150 | if (!attr->exclude_kernel) | 1453 | if (!attr->exclude_kernel) |
1151 | hwc->config_base |= PCR_STRACE; | 1454 | hwc->config_base |= sparc_pmu->priv_bit; |
1152 | if (!attr->exclude_hv) | 1455 | if (!attr->exclude_hv) |
1153 | hwc->config_base |= sparc_pmu->hv_bit; | 1456 | hwc->config_base |= sparc_pmu->hv_bit; |
1154 | 1457 | ||
1155 | n = 0; | 1458 | n = 0; |
1156 | if (event->group_leader != event) { | 1459 | if (event->group_leader != event) { |
1157 | n = collect_events(event->group_leader, | 1460 | n = collect_events(event->group_leader, |
1158 | MAX_HWEVENTS - 1, | 1461 | sparc_pmu->max_hw_events - 1, |
1159 | evts, events, current_idx_dmy); | 1462 | evts, events, current_idx_dmy); |
1160 | if (n < 0) | 1463 | if (n < 0) |
1161 | return -EINVAL; | 1464 | return -EINVAL; |
@@ -1254,8 +1557,7 @@ static struct pmu pmu = { | |||
1254 | void perf_event_print_debug(void) | 1557 | void perf_event_print_debug(void) |
1255 | { | 1558 | { |
1256 | unsigned long flags; | 1559 | unsigned long flags; |
1257 | u64 pcr, pic; | 1560 | int cpu, i; |
1258 | int cpu; | ||
1259 | 1561 | ||
1260 | if (!sparc_pmu) | 1562 | if (!sparc_pmu) |
1261 | return; | 1563 | return; |
@@ -1264,12 +1566,13 @@ void perf_event_print_debug(void) | |||
1264 | 1566 | ||
1265 | cpu = smp_processor_id(); | 1567 | cpu = smp_processor_id(); |
1266 | 1568 | ||
1267 | pcr = pcr_ops->read(); | ||
1268 | read_pic(pic); | ||
1269 | |||
1270 | pr_info("\n"); | 1569 | pr_info("\n"); |
1271 | pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n", | 1570 | for (i = 0; i < sparc_pmu->num_pcrs; i++) |
1272 | cpu, pcr, pic); | 1571 | pr_info("CPU#%d: PCR%d[%016llx]\n", |
1572 | cpu, i, pcr_ops->read_pcr(i)); | ||
1573 | for (i = 0; i < sparc_pmu->num_pic_regs; i++) | ||
1574 | pr_info("CPU#%d: PIC%d[%016llx]\n", | ||
1575 | cpu, i, pcr_ops->read_pic(i)); | ||
1273 | 1576 | ||
1274 | local_irq_restore(flags); | 1577 | local_irq_restore(flags); |
1275 | } | 1578 | } |
@@ -1305,8 +1608,9 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, | |||
1305 | * Do this before we peek at the counters to determine | 1608 | * Do this before we peek at the counters to determine |
1306 | * overflow so we don't lose any events. | 1609 | * overflow so we don't lose any events. |
1307 | */ | 1610 | */ |
1308 | if (sparc_pmu->irq_bit) | 1611 | if (sparc_pmu->irq_bit && |
1309 | pcr_ops->write(cpuc->pcr); | 1612 | sparc_pmu->num_pcrs == 1) |
1613 | pcr_ops->write_pcr(0, cpuc->pcr[0]); | ||
1310 | 1614 | ||
1311 | for (i = 0; i < cpuc->n_events; i++) { | 1615 | for (i = 0; i < cpuc->n_events; i++) { |
1312 | struct perf_event *event = cpuc->event[i]; | 1616 | struct perf_event *event = cpuc->event[i]; |
@@ -1314,6 +1618,10 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, | |||
1314 | struct hw_perf_event *hwc; | 1618 | struct hw_perf_event *hwc; |
1315 | u64 val; | 1619 | u64 val; |
1316 | 1620 | ||
1621 | if (sparc_pmu->irq_bit && | ||
1622 | sparc_pmu->num_pcrs > 1) | ||
1623 | pcr_ops->write_pcr(idx, cpuc->pcr[idx]); | ||
1624 | |||
1317 | hwc = &event->hw; | 1625 | hwc = &event->hw; |
1318 | val = sparc_perf_event_update(event, hwc, idx); | 1626 | val = sparc_perf_event_update(event, hwc, idx); |
1319 | if (val & (1ULL << 31)) | 1627 | if (val & (1ULL << 31)) |
@@ -1352,6 +1660,10 @@ static bool __init supported_pmu(void) | |||
1352 | sparc_pmu = &niagara2_pmu; | 1660 | sparc_pmu = &niagara2_pmu; |
1353 | return true; | 1661 | return true; |
1354 | } | 1662 | } |
1663 | if (!strcmp(sparc_pmu_type, "niagara4")) { | ||
1664 | sparc_pmu = &niagara4_pmu; | ||
1665 | return true; | ||
1666 | } | ||
1355 | return false; | 1667 | return false; |
1356 | } | 1668 | } |
1357 | 1669 | ||
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 1414d16712b2..0800e71d8a88 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c | |||
@@ -340,7 +340,12 @@ static const char *hwcaps[] = { | |||
340 | */ | 340 | */ |
341 | "mul32", "div32", "fsmuld", "v8plus", "popc", "vis", "vis2", | 341 | "mul32", "div32", "fsmuld", "v8plus", "popc", "vis", "vis2", |
342 | "ASIBlkInit", "fmaf", "vis3", "hpc", "random", "trans", "fjfmau", | 342 | "ASIBlkInit", "fmaf", "vis3", "hpc", "random", "trans", "fjfmau", |
343 | "ima", "cspare", | 343 | "ima", "cspare", "pause", "cbcond", |
344 | }; | ||
345 | |||
346 | static const char *crypto_hwcaps[] = { | ||
347 | "aes", "des", "kasumi", "camellia", "md5", "sha1", "sha256", | ||
348 | "sha512", "mpmul", "montmul", "montsqr", "crc32c", | ||
344 | }; | 349 | }; |
345 | 350 | ||
346 | void cpucap_info(struct seq_file *m) | 351 | void cpucap_info(struct seq_file *m) |
@@ -357,27 +362,61 @@ void cpucap_info(struct seq_file *m) | |||
357 | printed++; | 362 | printed++; |
358 | } | 363 | } |
359 | } | 364 | } |
365 | if (caps & HWCAP_SPARC_CRYPTO) { | ||
366 | unsigned long cfr; | ||
367 | |||
368 | __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); | ||
369 | for (i = 0; i < ARRAY_SIZE(crypto_hwcaps); i++) { | ||
370 | unsigned long bit = 1UL << i; | ||
371 | if (cfr & bit) { | ||
372 | seq_printf(m, "%s%s", | ||
373 | printed ? "," : "", crypto_hwcaps[i]); | ||
374 | printed++; | ||
375 | } | ||
376 | } | ||
377 | } | ||
360 | seq_putc(m, '\n'); | 378 | seq_putc(m, '\n'); |
361 | } | 379 | } |
362 | 380 | ||
381 | static void __init report_one_hwcap(int *printed, const char *name) | ||
382 | { | ||
383 | if ((*printed) == 0) | ||
384 | printk(KERN_INFO "CPU CAPS: ["); | ||
385 | printk(KERN_CONT "%s%s", | ||
386 | (*printed) ? "," : "", name); | ||
387 | if (++(*printed) == 8) { | ||
388 | printk(KERN_CONT "]\n"); | ||
389 | *printed = 0; | ||
390 | } | ||
391 | } | ||
392 | |||
393 | static void __init report_crypto_hwcaps(int *printed) | ||
394 | { | ||
395 | unsigned long cfr; | ||
396 | int i; | ||
397 | |||
398 | __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); | ||
399 | |||
400 | for (i = 0; i < ARRAY_SIZE(crypto_hwcaps); i++) { | ||
401 | unsigned long bit = 1UL << i; | ||
402 | if (cfr & bit) | ||
403 | report_one_hwcap(printed, crypto_hwcaps[i]); | ||
404 | } | ||
405 | } | ||
406 | |||
363 | static void __init report_hwcaps(unsigned long caps) | 407 | static void __init report_hwcaps(unsigned long caps) |
364 | { | 408 | { |
365 | int i, printed = 0; | 409 | int i, printed = 0; |
366 | 410 | ||
367 | printk(KERN_INFO "CPU CAPS: ["); | ||
368 | for (i = 0; i < ARRAY_SIZE(hwcaps); i++) { | 411 | for (i = 0; i < ARRAY_SIZE(hwcaps); i++) { |
369 | unsigned long bit = 1UL << i; | 412 | unsigned long bit = 1UL << i; |
370 | if (caps & bit) { | 413 | if (caps & bit) |
371 | printk(KERN_CONT "%s%s", | 414 | report_one_hwcap(&printed, hwcaps[i]); |
372 | printed ? "," : "", hwcaps[i]); | ||
373 | if (++printed == 8) { | ||
374 | printk(KERN_CONT "]\n"); | ||
375 | printk(KERN_INFO "CPU CAPS: ["); | ||
376 | printed = 0; | ||
377 | } | ||
378 | } | ||
379 | } | 415 | } |
380 | printk(KERN_CONT "]\n"); | 416 | if (caps & HWCAP_SPARC_CRYPTO) |
417 | report_crypto_hwcaps(&printed); | ||
418 | if (printed != 0) | ||
419 | printk(KERN_CONT "]\n"); | ||
381 | } | 420 | } |
382 | 421 | ||
383 | static unsigned long __init mdesc_cpu_hwcap_list(void) | 422 | static unsigned long __init mdesc_cpu_hwcap_list(void) |
@@ -411,6 +450,10 @@ static unsigned long __init mdesc_cpu_hwcap_list(void) | |||
411 | break; | 450 | break; |
412 | } | 451 | } |
413 | } | 452 | } |
453 | for (i = 0; i < ARRAY_SIZE(crypto_hwcaps); i++) { | ||
454 | if (!strcmp(prop, crypto_hwcaps[i])) | ||
455 | caps |= HWCAP_SPARC_CRYPTO; | ||
456 | } | ||
414 | 457 | ||
415 | plen = strlen(prop) + 1; | 458 | plen = strlen(prop) + 1; |
416 | prop += plen; | 459 | prop += plen; |
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index dff4096f3dec..30f6ab51c551 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile | |||
@@ -32,6 +32,9 @@ lib-$(CONFIG_SPARC64) += NGpatch.o NGpage.o NGbzero.o | |||
32 | lib-$(CONFIG_SPARC64) += NG2memcpy.o NG2copy_from_user.o NG2copy_to_user.o | 32 | lib-$(CONFIG_SPARC64) += NG2memcpy.o NG2copy_from_user.o NG2copy_to_user.o |
33 | lib-$(CONFIG_SPARC64) += NG2patch.o | 33 | lib-$(CONFIG_SPARC64) += NG2patch.o |
34 | 34 | ||
35 | lib-$(CONFIG_SPARC64) += NG4memcpy.o NG4copy_from_user.o NG4copy_to_user.o | ||
36 | lib-$(CONFIG_SPARC64) += NG4patch.o NG4copy_page.o | ||
37 | |||
35 | lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o | 38 | lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o |
36 | lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o | 39 | lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o |
37 | 40 | ||
diff --git a/arch/sparc/lib/NG4copy_from_user.S b/arch/sparc/lib/NG4copy_from_user.S new file mode 100644 index 000000000000..fd9f903ffa32 --- /dev/null +++ b/arch/sparc/lib/NG4copy_from_user.S | |||
@@ -0,0 +1,30 @@ | |||
1 | /* NG4copy_from_user.S: Niagara-4 optimized copy from userspace. | ||
2 | * | ||
3 | * Copyright (C) 2012 David S. Miller (davem@davemloft.net) | ||
4 | */ | ||
5 | |||
6 | #define EX_LD(x) \ | ||
7 | 98: x; \ | ||
8 | .section __ex_table,"a";\ | ||
9 | .align 4; \ | ||
10 | .word 98b, __retl_one_asi;\ | ||
11 | .text; \ | ||
12 | .align 4; | ||
13 | |||
14 | #ifndef ASI_AIUS | ||
15 | #define ASI_AIUS 0x11 | ||
16 | #endif | ||
17 | |||
18 | #define FUNC_NAME NG4copy_from_user | ||
19 | #define LOAD(type,addr,dest) type##a [addr] %asi, dest | ||
20 | #define EX_RETVAL(x) 0 | ||
21 | |||
22 | #ifdef __KERNEL__ | ||
23 | #define PREAMBLE \ | ||
24 | rd %asi, %g1; \ | ||
25 | cmp %g1, ASI_AIUS; \ | ||
26 | bne,pn %icc, ___copy_in_user; \ | ||
27 | nop | ||
28 | #endif | ||
29 | |||
30 | #include "NG4memcpy.S" | ||
diff --git a/arch/sparc/lib/NG4copy_page.S b/arch/sparc/lib/NG4copy_page.S new file mode 100644 index 000000000000..f30ec10bbcac --- /dev/null +++ b/arch/sparc/lib/NG4copy_page.S | |||
@@ -0,0 +1,57 @@ | |||
1 | /* NG4copy_page.S: Niagara-4 optimized copy page. | ||
2 | * | ||
3 | * Copyright (C) 2012 (davem@davemloft.net) | ||
4 | */ | ||
5 | |||
6 | #include <asm/asi.h> | ||
7 | #include <asm/page.h> | ||
8 | |||
9 | .text | ||
10 | .align 32 | ||
11 | |||
12 | .register %g2, #scratch | ||
13 | .register %g3, #scratch | ||
14 | |||
15 | .globl NG4copy_user_page | ||
16 | NG4copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ | ||
17 | prefetch [%o1 + 0x000], #n_reads_strong | ||
18 | prefetch [%o1 + 0x040], #n_reads_strong | ||
19 | prefetch [%o1 + 0x080], #n_reads_strong | ||
20 | prefetch [%o1 + 0x0c0], #n_reads_strong | ||
21 | set PAGE_SIZE, %g7 | ||
22 | prefetch [%o1 + 0x100], #n_reads_strong | ||
23 | prefetch [%o1 + 0x140], #n_reads_strong | ||
24 | prefetch [%o1 + 0x180], #n_reads_strong | ||
25 | prefetch [%o1 + 0x1c0], #n_reads_strong | ||
26 | 1: | ||
27 | ldx [%o1 + 0x00], %o2 | ||
28 | subcc %g7, 0x40, %g7 | ||
29 | ldx [%o1 + 0x08], %o3 | ||
30 | ldx [%o1 + 0x10], %o4 | ||
31 | ldx [%o1 + 0x18], %o5 | ||
32 | ldx [%o1 + 0x20], %g1 | ||
33 | stxa %o2, [%o0] ASI_BLK_INIT_QUAD_LDD_P | ||
34 | add %o0, 0x08, %o0 | ||
35 | ldx [%o1 + 0x28], %g2 | ||
36 | stxa %o3, [%o0] ASI_BLK_INIT_QUAD_LDD_P | ||
37 | add %o0, 0x08, %o0 | ||
38 | ldx [%o1 + 0x30], %g3 | ||
39 | stxa %o4, [%o0] ASI_BLK_INIT_QUAD_LDD_P | ||
40 | add %o0, 0x08, %o0 | ||
41 | ldx [%o1 + 0x38], %o2 | ||
42 | add %o1, 0x40, %o1 | ||
43 | stxa %o5, [%o0] ASI_BLK_INIT_QUAD_LDD_P | ||
44 | add %o0, 0x08, %o0 | ||
45 | stxa %g1, [%o0] ASI_BLK_INIT_QUAD_LDD_P | ||
46 | add %o0, 0x08, %o0 | ||
47 | stxa %g2, [%o0] ASI_BLK_INIT_QUAD_LDD_P | ||
48 | add %o0, 0x08, %o0 | ||
49 | stxa %g3, [%o0] ASI_BLK_INIT_QUAD_LDD_P | ||
50 | add %o0, 0x08, %o0 | ||
51 | stxa %o2, [%o0] ASI_BLK_INIT_QUAD_LDD_P | ||
52 | add %o0, 0x08, %o0 | ||
53 | bne,pt %icc, 1b | ||
54 | prefetch [%o1 + 0x200], #n_reads_strong | ||
55 | retl | ||
56 | membar #StoreLoad | #StoreStore | ||
57 | .size NG4copy_user_page,.-NG4copy_user_page | ||
diff --git a/arch/sparc/lib/NG4copy_to_user.S b/arch/sparc/lib/NG4copy_to_user.S new file mode 100644 index 000000000000..9744c4540a8d --- /dev/null +++ b/arch/sparc/lib/NG4copy_to_user.S | |||
@@ -0,0 +1,39 @@ | |||
1 | /* NG4copy_to_user.S: Niagara-4 optimized copy to userspace. | ||
2 | * | ||
3 | * Copyright (C) 2012 David S. Miller (davem@davemloft.net) | ||
4 | */ | ||
5 | |||
6 | #define EX_ST(x) \ | ||
7 | 98: x; \ | ||
8 | .section __ex_table,"a";\ | ||
9 | .align 4; \ | ||
10 | .word 98b, __retl_one_asi;\ | ||
11 | .text; \ | ||
12 | .align 4; | ||
13 | |||
14 | #ifndef ASI_AIUS | ||
15 | #define ASI_AIUS 0x11 | ||
16 | #endif | ||
17 | |||
18 | #ifndef ASI_BLK_INIT_QUAD_LDD_AIUS | ||
19 | #define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23 | ||
20 | #endif | ||
21 | |||
22 | #define FUNC_NAME NG4copy_to_user | ||
23 | #define STORE(type,src,addr) type##a src, [addr] %asi | ||
24 | #define STORE_ASI ASI_BLK_INIT_QUAD_LDD_AIUS | ||
25 | #define EX_RETVAL(x) 0 | ||
26 | |||
27 | #ifdef __KERNEL__ | ||
28 | /* Writing to %asi is _expensive_ so we hardcode it. | ||
29 | * Reading %asi to check for KERNEL_DS is comparatively | ||
30 | * cheap. | ||
31 | */ | ||
32 | #define PREAMBLE \ | ||
33 | rd %asi, %g1; \ | ||
34 | cmp %g1, ASI_AIUS; \ | ||
35 | bne,pn %icc, ___copy_in_user; \ | ||
36 | nop | ||
37 | #endif | ||
38 | |||
39 | #include "NG4memcpy.S" | ||
diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S new file mode 100644 index 000000000000..9cf2ee01cee3 --- /dev/null +++ b/arch/sparc/lib/NG4memcpy.S | |||
@@ -0,0 +1,360 @@ | |||
1 | /* NG4memcpy.S: Niagara-4 optimized memcpy. | ||
2 | * | ||
3 | * Copyright (C) 2012 David S. Miller (davem@davemloft.net) | ||
4 | */ | ||
5 | |||
6 | #ifdef __KERNEL__ | ||
7 | #include <asm/visasm.h> | ||
8 | #include <asm/asi.h> | ||
9 | #define GLOBAL_SPARE %g7 | ||
10 | #else | ||
11 | #define ASI_BLK_INIT_QUAD_LDD_P 0xe2 | ||
12 | #define FPRS_FEF 0x04 | ||
13 | |||
14 | /* On T4 it is very expensive to access ASRs like %fprs and | ||
15 | * %asi, avoiding a read or a write can save ~50 cycles. | ||
16 | */ | ||
17 | #define FPU_ENTER \ | ||
18 | rd %fprs, %o5; \ | ||
19 | andcc %o5, FPRS_FEF, %g0; \ | ||
20 | be,a,pn %icc, 999f; \ | ||
21 | wr %g0, FPRS_FEF, %fprs; \ | ||
22 | 999: | ||
23 | |||
24 | #ifdef MEMCPY_DEBUG | ||
25 | #define VISEntryHalf FPU_ENTER; \ | ||
26 | clr %g1; clr %g2; clr %g3; clr %g5; subcc %g0, %g0, %g0; | ||
27 | #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs | ||
28 | #else | ||
29 | #define VISEntryHalf FPU_ENTER | ||
30 | #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs | ||
31 | #endif | ||
32 | |||
33 | #define GLOBAL_SPARE %g5 | ||
34 | #endif | ||
35 | |||
36 | #ifndef STORE_ASI | ||
37 | #ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA | ||
38 | #define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P | ||
39 | #else | ||
40 | #define STORE_ASI 0x80 /* ASI_P */ | ||
41 | #endif | ||
42 | #endif | ||
43 | |||
44 | #ifndef EX_LD | ||
45 | #define EX_LD(x) x | ||
46 | #endif | ||
47 | |||
48 | #ifndef EX_ST | ||
49 | #define EX_ST(x) x | ||
50 | #endif | ||
51 | |||
52 | #ifndef EX_RETVAL | ||
53 | #define EX_RETVAL(x) x | ||
54 | #endif | ||
55 | |||
56 | #ifndef LOAD | ||
57 | #define LOAD(type,addr,dest) type [addr], dest | ||
58 | #endif | ||
59 | |||
60 | #ifndef STORE | ||
61 | #ifndef MEMCPY_DEBUG | ||
62 | #define STORE(type,src,addr) type src, [addr] | ||
63 | #else | ||
64 | #define STORE(type,src,addr) type##a src, [addr] %asi | ||
65 | #endif | ||
66 | #endif | ||
67 | |||
68 | #ifndef STORE_INIT | ||
69 | #define STORE_INIT(src,addr) stxa src, [addr] STORE_ASI | ||
70 | #endif | ||
71 | |||
72 | #ifndef FUNC_NAME | ||
73 | #define FUNC_NAME NG4memcpy | ||
74 | #endif | ||
75 | #ifndef PREAMBLE | ||
76 | #define PREAMBLE | ||
77 | #endif | ||
78 | |||
79 | #ifndef XCC | ||
80 | #define XCC xcc | ||
81 | #endif | ||
82 | |||
83 | .register %g2,#scratch | ||
84 | .register %g3,#scratch | ||
85 | |||
86 | .text | ||
87 | .align 64 | ||
88 | |||
89 | .globl FUNC_NAME | ||
90 | .type FUNC_NAME,#function | ||
91 | FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
92 | #ifdef MEMCPY_DEBUG | ||
93 | wr %g0, 0x80, %asi | ||
94 | #endif | ||
95 | srlx %o2, 31, %g2 | ||
96 | cmp %g2, 0 | ||
97 | tne %XCC, 5 | ||
98 | PREAMBLE | ||
99 | mov %o0, %o3 | ||
100 | brz,pn %o2, .Lexit | ||
101 | cmp %o2, 3 | ||
102 | ble,pn %icc, .Ltiny | ||
103 | cmp %o2, 19 | ||
104 | ble,pn %icc, .Lsmall | ||
105 | or %o0, %o1, %g2 | ||
106 | cmp %o2, 128 | ||
107 | bl,pn %icc, .Lmedium | ||
108 | nop | ||
109 | |||
110 | .Llarge:/* len >= 0x80 */ | ||
111 | /* First get dest 8 byte aligned. */ | ||
112 | sub %g0, %o0, %g1 | ||
113 | and %g1, 0x7, %g1 | ||
114 | brz,pt %g1, 51f | ||
115 | sub %o2, %g1, %o2 | ||
116 | |||
117 | 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) | ||
118 | add %o1, 1, %o1 | ||
119 | subcc %g1, 1, %g1 | ||
120 | add %o0, 1, %o0 | ||
121 | bne,pt %icc, 1b | ||
122 | EX_ST(STORE(stb, %g2, %o0 - 0x01)) | ||
123 | |||
124 | 51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong) | ||
125 | LOAD(prefetch, %o1 + 0x080, #n_reads_strong) | ||
126 | LOAD(prefetch, %o1 + 0x0c0, #n_reads_strong) | ||
127 | LOAD(prefetch, %o1 + 0x100, #n_reads_strong) | ||
128 | LOAD(prefetch, %o1 + 0x140, #n_reads_strong) | ||
129 | LOAD(prefetch, %o1 + 0x180, #n_reads_strong) | ||
130 | LOAD(prefetch, %o1 + 0x1c0, #n_reads_strong) | ||
131 | LOAD(prefetch, %o1 + 0x200, #n_reads_strong) | ||
132 | |||
133 | /* Check if we can use the straight fully aligned | ||
134 | * loop, or we require the alignaddr/faligndata variant. | ||
135 | */ | ||
136 | andcc %o1, 0x7, %o5 | ||
137 | bne,pn %icc, .Llarge_src_unaligned | ||
138 | sub %g0, %o0, %g1 | ||
139 | |||
140 | /* Legitimize the use of initializing stores by getting dest | ||
141 | * to be 64-byte aligned. | ||
142 | */ | ||
143 | and %g1, 0x3f, %g1 | ||
144 | brz,pt %g1, .Llarge_aligned | ||
145 | sub %o2, %g1, %o2 | ||
146 | |||
147 | 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2)) | ||
148 | add %o1, 8, %o1 | ||
149 | subcc %g1, 8, %g1 | ||
150 | add %o0, 8, %o0 | ||
151 | bne,pt %icc, 1b | ||
152 | EX_ST(STORE(stx, %g2, %o0 - 0x08)) | ||
153 | |||
154 | .Llarge_aligned: | ||
155 | /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */ | ||
156 | andn %o2, 0x3f, %o4 | ||
157 | sub %o2, %o4, %o2 | ||
158 | |||
159 | 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) | ||
160 | add %o1, 0x40, %o1 | ||
161 | EX_LD(LOAD(ldx, %o1 - 0x38, %g2)) | ||
162 | subcc %o4, 0x40, %o4 | ||
163 | EX_LD(LOAD(ldx, %o1 - 0x30, %g3)) | ||
164 | EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE)) | ||
165 | EX_LD(LOAD(ldx, %o1 - 0x20, %o5)) | ||
166 | EX_ST(STORE_INIT(%g1, %o0)) | ||
167 | add %o0, 0x08, %o0 | ||
168 | EX_ST(STORE_INIT(%g2, %o0)) | ||
169 | add %o0, 0x08, %o0 | ||
170 | EX_LD(LOAD(ldx, %o1 - 0x18, %g2)) | ||
171 | EX_ST(STORE_INIT(%g3, %o0)) | ||
172 | add %o0, 0x08, %o0 | ||
173 | EX_LD(LOAD(ldx, %o1 - 0x10, %g3)) | ||
174 | EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) | ||
175 | add %o0, 0x08, %o0 | ||
176 | EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE)) | ||
177 | EX_ST(STORE_INIT(%o5, %o0)) | ||
178 | add %o0, 0x08, %o0 | ||
179 | EX_ST(STORE_INIT(%g2, %o0)) | ||
180 | add %o0, 0x08, %o0 | ||
181 | EX_ST(STORE_INIT(%g3, %o0)) | ||
182 | add %o0, 0x08, %o0 | ||
183 | EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) | ||
184 | add %o0, 0x08, %o0 | ||
185 | bne,pt %icc, 1b | ||
186 | LOAD(prefetch, %o1 + 0x200, #n_reads_strong) | ||
187 | |||
188 | membar #StoreLoad | #StoreStore | ||
189 | |||
190 | brz,pn %o2, .Lexit | ||
191 | cmp %o2, 19 | ||
192 | ble,pn %icc, .Lsmall_unaligned | ||
193 | nop | ||
194 | ba,a,pt %icc, .Lmedium_noprefetch | ||
195 | |||
196 | .Lexit: retl | ||
197 | mov EX_RETVAL(%o3), %o0 | ||
198 | |||
199 | .Llarge_src_unaligned: | ||
200 | andn %o2, 0x3f, %o4 | ||
201 | sub %o2, %o4, %o2 | ||
202 | VISEntryHalf | ||
203 | alignaddr %o1, %g0, %g1 | ||
204 | add %o1, %o4, %o1 | ||
205 | EX_LD(LOAD(ldd, %g1 + 0x00, %f0)) | ||
206 | 1: EX_LD(LOAD(ldd, %g1 + 0x08, %f2)) | ||
207 | subcc %o4, 0x40, %o4 | ||
208 | EX_LD(LOAD(ldd, %g1 + 0x10, %f4)) | ||
209 | EX_LD(LOAD(ldd, %g1 + 0x18, %f6)) | ||
210 | EX_LD(LOAD(ldd, %g1 + 0x20, %f8)) | ||
211 | EX_LD(LOAD(ldd, %g1 + 0x28, %f10)) | ||
212 | EX_LD(LOAD(ldd, %g1 + 0x30, %f12)) | ||
213 | EX_LD(LOAD(ldd, %g1 + 0x38, %f14)) | ||
214 | faligndata %f0, %f2, %f16 | ||
215 | EX_LD(LOAD(ldd, %g1 + 0x40, %f0)) | ||
216 | faligndata %f2, %f4, %f18 | ||
217 | add %g1, 0x40, %g1 | ||
218 | faligndata %f4, %f6, %f20 | ||
219 | faligndata %f6, %f8, %f22 | ||
220 | faligndata %f8, %f10, %f24 | ||
221 | faligndata %f10, %f12, %f26 | ||
222 | faligndata %f12, %f14, %f28 | ||
223 | faligndata %f14, %f0, %f30 | ||
224 | EX_ST(STORE(std, %f16, %o0 + 0x00)) | ||
225 | EX_ST(STORE(std, %f18, %o0 + 0x08)) | ||
226 | EX_ST(STORE(std, %f20, %o0 + 0x10)) | ||
227 | EX_ST(STORE(std, %f22, %o0 + 0x18)) | ||
228 | EX_ST(STORE(std, %f24, %o0 + 0x20)) | ||
229 | EX_ST(STORE(std, %f26, %o0 + 0x28)) | ||
230 | EX_ST(STORE(std, %f28, %o0 + 0x30)) | ||
231 | EX_ST(STORE(std, %f30, %o0 + 0x38)) | ||
232 | add %o0, 0x40, %o0 | ||
233 | bne,pt %icc, 1b | ||
234 | LOAD(prefetch, %g1 + 0x200, #n_reads_strong) | ||
235 | VISExitHalf | ||
236 | |||
237 | brz,pn %o2, .Lexit | ||
238 | cmp %o2, 19 | ||
239 | ble,pn %icc, .Lsmall_unaligned | ||
240 | nop | ||
241 | ba,a,pt %icc, .Lmedium_unaligned | ||
242 | |||
243 | .Lmedium: | ||
244 | LOAD(prefetch, %o1 + 0x40, #n_reads_strong) | ||
245 | andcc %g2, 0x7, %g0 | ||
246 | bne,pn %icc, .Lmedium_unaligned | ||
247 | nop | ||
248 | .Lmedium_noprefetch: | ||
249 | andncc %o2, 0x20 - 1, %o5 | ||
250 | be,pn %icc, 2f | ||
251 | sub %o2, %o5, %o2 | ||
252 | 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) | ||
253 | EX_LD(LOAD(ldx, %o1 + 0x08, %g2)) | ||
254 | EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE)) | ||
255 | EX_LD(LOAD(ldx, %o1 + 0x18, %o4)) | ||
256 | add %o1, 0x20, %o1 | ||
257 | subcc %o5, 0x20, %o5 | ||
258 | EX_ST(STORE(stx, %g1, %o0 + 0x00)) | ||
259 | EX_ST(STORE(stx, %g2, %o0 + 0x08)) | ||
260 | EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10)) | ||
261 | EX_ST(STORE(stx, %o4, %o0 + 0x18)) | ||
262 | bne,pt %icc, 1b | ||
263 | add %o0, 0x20, %o0 | ||
264 | 2: andcc %o2, 0x18, %o5 | ||
265 | be,pt %icc, 3f | ||
266 | sub %o2, %o5, %o2 | ||
267 | 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) | ||
268 | add %o1, 0x08, %o1 | ||
269 | add %o0, 0x08, %o0 | ||
270 | subcc %o5, 0x08, %o5 | ||
271 | bne,pt %icc, 1b | ||
272 | EX_ST(STORE(stx, %g1, %o0 - 0x08)) | ||
273 | 3: brz,pt %o2, .Lexit | ||
274 | cmp %o2, 0x04 | ||
275 | bl,pn %icc, .Ltiny | ||
276 | nop | ||
277 | EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) | ||
278 | add %o1, 0x04, %o1 | ||
279 | add %o0, 0x04, %o0 | ||
280 | subcc %o2, 0x04, %o2 | ||
281 | bne,pn %icc, .Ltiny | ||
282 | EX_ST(STORE(stw, %g1, %o0 - 0x04)) | ||
283 | ba,a,pt %icc, .Lexit | ||
284 | .Lmedium_unaligned: | ||
285 | /* First get dest 8 byte aligned. */ | ||
286 | sub %g0, %o0, %g1 | ||
287 | and %g1, 0x7, %g1 | ||
288 | brz,pt %g1, 2f | ||
289 | sub %o2, %g1, %o2 | ||
290 | |||
291 | 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) | ||
292 | add %o1, 1, %o1 | ||
293 | subcc %g1, 1, %g1 | ||
294 | add %o0, 1, %o0 | ||
295 | bne,pt %icc, 1b | ||
296 | EX_ST(STORE(stb, %g2, %o0 - 0x01)) | ||
297 | 2: | ||
298 | and %o1, 0x7, %g1 | ||
299 | brz,pn %g1, .Lmedium_noprefetch | ||
300 | sll %g1, 3, %g1 | ||
301 | mov 64, %g2 | ||
302 | sub %g2, %g1, %g2 | ||
303 | andn %o1, 0x7, %o1 | ||
304 | EX_LD(LOAD(ldx, %o1 + 0x00, %o4)) | ||
305 | sllx %o4, %g1, %o4 | ||
306 | andn %o2, 0x08 - 1, %o5 | ||
307 | sub %o2, %o5, %o2 | ||
308 | 1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3)) | ||
309 | add %o1, 0x08, %o1 | ||
310 | subcc %o5, 0x08, %o5 | ||
311 | srlx %g3, %g2, GLOBAL_SPARE | ||
312 | or GLOBAL_SPARE, %o4, GLOBAL_SPARE | ||
313 | EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00)) | ||
314 | add %o0, 0x08, %o0 | ||
315 | bne,pt %icc, 1b | ||
316 | sllx %g3, %g1, %o4 | ||
317 | srl %g1, 3, %g1 | ||
318 | add %o1, %g1, %o1 | ||
319 | brz,pn %o2, .Lexit | ||
320 | nop | ||
321 | ba,pt %icc, .Lsmall_unaligned | ||
322 | |||
323 | .Ltiny: | ||
324 | EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) | ||
325 | subcc %o2, 1, %o2 | ||
326 | be,pn %icc, .Lexit | ||
327 | EX_ST(STORE(stb, %g1, %o0 + 0x00)) | ||
328 | EX_LD(LOAD(ldub, %o1 + 0x01, %g1)) | ||
329 | subcc %o2, 1, %o2 | ||
330 | be,pn %icc, .Lexit | ||
331 | EX_ST(STORE(stb, %g1, %o0 + 0x01)) | ||
332 | EX_LD(LOAD(ldub, %o1 + 0x02, %g1)) | ||
333 | ba,pt %icc, .Lexit | ||
334 | EX_ST(STORE(stb, %g1, %o0 + 0x02)) | ||
335 | |||
336 | .Lsmall: | ||
337 | andcc %g2, 0x3, %g0 | ||
338 | bne,pn %icc, .Lsmall_unaligned | ||
339 | andn %o2, 0x4 - 1, %o5 | ||
340 | sub %o2, %o5, %o2 | ||
341 | 1: | ||
342 | EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) | ||
343 | add %o1, 0x04, %o1 | ||
344 | subcc %o5, 0x04, %o5 | ||
345 | add %o0, 0x04, %o0 | ||
346 | bne,pt %icc, 1b | ||
347 | EX_ST(STORE(stw, %g1, %o0 - 0x04)) | ||
348 | brz,pt %o2, .Lexit | ||
349 | nop | ||
350 | ba,a,pt %icc, .Ltiny | ||
351 | |||
352 | .Lsmall_unaligned: | ||
353 | 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) | ||
354 | add %o1, 1, %o1 | ||
355 | add %o0, 1, %o0 | ||
356 | subcc %o2, 1, %o2 | ||
357 | bne,pt %icc, 1b | ||
358 | EX_ST(STORE(stb, %g1, %o0 - 0x01)) | ||
359 | ba,a,pt %icc, .Lexit | ||
360 | .size FUNC_NAME, .-FUNC_NAME | ||
diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S new file mode 100644 index 000000000000..c21c34c61dda --- /dev/null +++ b/arch/sparc/lib/NG4patch.S | |||
@@ -0,0 +1,43 @@ | |||
1 | /* NG4patch.S: Patch Ultra-I routines with Niagara-4 variant. | ||
2 | * | ||
3 | * Copyright (C) 2012 David S. Miller <davem@davemloft.net> | ||
4 | */ | ||
5 | |||
6 | #define BRANCH_ALWAYS 0x10680000 | ||
7 | #define NOP 0x01000000 | ||
8 | #define NG_DO_PATCH(OLD, NEW) \ | ||
9 | sethi %hi(NEW), %g1; \ | ||
10 | or %g1, %lo(NEW), %g1; \ | ||
11 | sethi %hi(OLD), %g2; \ | ||
12 | or %g2, %lo(OLD), %g2; \ | ||
13 | sub %g1, %g2, %g1; \ | ||
14 | sethi %hi(BRANCH_ALWAYS), %g3; \ | ||
15 | sll %g1, 11, %g1; \ | ||
16 | srl %g1, 11 + 2, %g1; \ | ||
17 | or %g3, %lo(BRANCH_ALWAYS), %g3; \ | ||
18 | or %g3, %g1, %g3; \ | ||
19 | stw %g3, [%g2]; \ | ||
20 | sethi %hi(NOP), %g3; \ | ||
21 | or %g3, %lo(NOP), %g3; \ | ||
22 | stw %g3, [%g2 + 0x4]; \ | ||
23 | flush %g2; | ||
24 | |||
25 | .globl niagara4_patch_copyops | ||
26 | .type niagara4_patch_copyops,#function | ||
27 | niagara4_patch_copyops: | ||
28 | NG_DO_PATCH(memcpy, NG4memcpy) | ||
29 | NG_DO_PATCH(___copy_from_user, NG4copy_from_user) | ||
30 | NG_DO_PATCH(___copy_to_user, NG4copy_to_user) | ||
31 | retl | ||
32 | nop | ||
33 | .size niagara4_patch_copyops,.-niagara4_patch_copyops | ||
34 | |||
35 | .globl niagara4_patch_pageops | ||
36 | .type niagara4_patch_pageops,#function | ||
37 | niagara4_patch_pageops: | ||
38 | NG_DO_PATCH(copy_user_page, NG4copy_user_page) | ||
39 | NG_DO_PATCH(_clear_page, NGclear_page) | ||
40 | NG_DO_PATCH(clear_user_page, NGclear_user_page) | ||
41 | retl | ||
42 | nop | ||
43 | .size niagara4_patch_pageops,.-niagara4_patch_pageops | ||
diff --git a/arch/sparc/lib/NGpage.S b/arch/sparc/lib/NGpage.S index b9e790b9c6b8..423d46e2258b 100644 --- a/arch/sparc/lib/NGpage.S +++ b/arch/sparc/lib/NGpage.S | |||
@@ -59,6 +59,8 @@ NGcopy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ | |||
59 | restore | 59 | restore |
60 | 60 | ||
61 | .align 32 | 61 | .align 32 |
62 | .globl NGclear_page | ||
63 | .globl NGclear_user_page | ||
62 | NGclear_page: /* %o0=dest */ | 64 | NGclear_page: /* %o0=dest */ |
63 | NGclear_user_page: /* %o0=dest, %o1=vaddr */ | 65 | NGclear_user_page: /* %o0=dest, %o1=vaddr */ |
64 | rd %asi, %g3 | 66 | rd %asi, %g3 |
diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c index 3b31218cafc6..ee31b884c61b 100644 --- a/arch/sparc/lib/ksyms.c +++ b/arch/sparc/lib/ksyms.c | |||
@@ -134,6 +134,10 @@ EXPORT_SYMBOL(copy_user_page); | |||
134 | void VISenter(void); | 134 | void VISenter(void); |
135 | EXPORT_SYMBOL(VISenter); | 135 | EXPORT_SYMBOL(VISenter); |
136 | 136 | ||
137 | /* CRYPTO code needs this */ | ||
138 | void VISenterhalf(void); | ||
139 | EXPORT_SYMBOL(VISenterhalf); | ||
140 | |||
137 | extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *); | 141 | extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *); |
138 | extern void xor_vis_3(unsigned long, unsigned long *, unsigned long *, | 142 | extern void xor_vis_3(unsigned long, unsigned long *, unsigned long *, |
139 | unsigned long *); | 143 | unsigned long *); |
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index d58edf5fefdb..696bb095e0fc 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c | |||
@@ -51,22 +51,40 @@ | |||
51 | 51 | ||
52 | #include "init_64.h" | 52 | #include "init_64.h" |
53 | 53 | ||
54 | unsigned long kern_linear_pte_xor[2] __read_mostly; | 54 | unsigned long kern_linear_pte_xor[4] __read_mostly; |
55 | 55 | ||
56 | /* A bitmap, one bit for every 256MB of physical memory. If the bit | 56 | /* A bitmap, two bits for every 256MB of physical memory. These two |
57 | * is clear, we should use a 4MB page (via kern_linear_pte_xor[0]) else | 57 | * bits determine what page size we use for kernel linear |
58 | * if set we should use a 256MB page (via kern_linear_pte_xor[1]). | 58 | * translations. They form an index into kern_linear_pte_xor[]. The |
59 | * value in the indexed slot is XOR'd with the TLB miss virtual | ||
60 | * address to form the resulting TTE. The mapping is: | ||
61 | * | ||
62 | * 0 ==> 4MB | ||
63 | * 1 ==> 256MB | ||
64 | * 2 ==> 2GB | ||
65 | * 3 ==> 16GB | ||
66 | * | ||
67 | * All sun4v chips support 256MB pages. Only SPARC-T4 and later | ||
68 | * support 2GB pages, and hopefully future cpus will support the 16GB | ||
69 | * pages as well. For slots 2 and 3, we encode a 256MB TTE xor there | ||
70 | * if these larger page sizes are not supported by the cpu. | ||
71 | * | ||
72 | * It would be nice to determine this from the machine description | ||
73 | * 'cpu' properties, but we need to have this table setup before the | ||
74 | * MDESC is initialized. | ||
59 | */ | 75 | */ |
60 | unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; | 76 | unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; |
61 | 77 | ||
62 | #ifndef CONFIG_DEBUG_PAGEALLOC | 78 | #ifndef CONFIG_DEBUG_PAGEALLOC |
63 | /* A special kernel TSB for 4MB and 256MB linear mappings. | 79 | /* A special kernel TSB for 4MB, 256MB, 2GB and 16GB linear mappings. |
64 | * Space is allocated for this right after the trap table | 80 | * Space is allocated for this right after the trap table in |
65 | * in arch/sparc64/kernel/head.S | 81 | * arch/sparc64/kernel/head.S |
66 | */ | 82 | */ |
67 | extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES]; | 83 | extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES]; |
68 | #endif | 84 | #endif |
69 | 85 | ||
86 | static unsigned long cpu_pgsz_mask; | ||
87 | |||
70 | #define MAX_BANKS 32 | 88 | #define MAX_BANKS 32 |
71 | 89 | ||
72 | static struct linux_prom64_registers pavail[MAX_BANKS] __devinitdata; | 90 | static struct linux_prom64_registers pavail[MAX_BANKS] __devinitdata; |
@@ -403,6 +421,12 @@ EXPORT_SYMBOL(flush_icache_range); | |||
403 | 421 | ||
404 | void mmu_info(struct seq_file *m) | 422 | void mmu_info(struct seq_file *m) |
405 | { | 423 | { |
424 | static const char *pgsz_strings[] = { | ||
425 | "8K", "64K", "512K", "4MB", "32MB", | ||
426 | "256MB", "2GB", "16GB", | ||
427 | }; | ||
428 | int i, printed; | ||
429 | |||
406 | if (tlb_type == cheetah) | 430 | if (tlb_type == cheetah) |
407 | seq_printf(m, "MMU Type\t: Cheetah\n"); | 431 | seq_printf(m, "MMU Type\t: Cheetah\n"); |
408 | else if (tlb_type == cheetah_plus) | 432 | else if (tlb_type == cheetah_plus) |
@@ -414,6 +438,17 @@ void mmu_info(struct seq_file *m) | |||
414 | else | 438 | else |
415 | seq_printf(m, "MMU Type\t: ???\n"); | 439 | seq_printf(m, "MMU Type\t: ???\n"); |
416 | 440 | ||
441 | seq_printf(m, "MMU PGSZs\t: "); | ||
442 | printed = 0; | ||
443 | for (i = 0; i < ARRAY_SIZE(pgsz_strings); i++) { | ||
444 | if (cpu_pgsz_mask & (1UL << i)) { | ||
445 | seq_printf(m, "%s%s", | ||
446 | printed ? "," : "", pgsz_strings[i]); | ||
447 | printed++; | ||
448 | } | ||
449 | } | ||
450 | seq_putc(m, '\n'); | ||
451 | |||
417 | #ifdef CONFIG_DEBUG_DCFLUSH | 452 | #ifdef CONFIG_DEBUG_DCFLUSH |
418 | seq_printf(m, "DCPageFlushes\t: %d\n", | 453 | seq_printf(m, "DCPageFlushes\t: %d\n", |
419 | atomic_read(&dcpage_flushes)); | 454 | atomic_read(&dcpage_flushes)); |
@@ -1358,32 +1393,75 @@ static unsigned long __ref kernel_map_range(unsigned long pstart, | |||
1358 | extern unsigned int kvmap_linear_patch[1]; | 1393 | extern unsigned int kvmap_linear_patch[1]; |
1359 | #endif /* CONFIG_DEBUG_PAGEALLOC */ | 1394 | #endif /* CONFIG_DEBUG_PAGEALLOC */ |
1360 | 1395 | ||
1361 | static void __init mark_kpte_bitmap(unsigned long start, unsigned long end) | 1396 | static void __init kpte_set_val(unsigned long index, unsigned long val) |
1362 | { | 1397 | { |
1363 | const unsigned long shift_256MB = 28; | 1398 | unsigned long *ptr = kpte_linear_bitmap; |
1364 | const unsigned long mask_256MB = ((1UL << shift_256MB) - 1UL); | ||
1365 | const unsigned long size_256MB = (1UL << shift_256MB); | ||
1366 | 1399 | ||
1367 | while (start < end) { | 1400 | val <<= ((index % (BITS_PER_LONG / 2)) * 2); |
1368 | long remains; | 1401 | ptr += (index / (BITS_PER_LONG / 2)); |
1369 | 1402 | ||
1370 | remains = end - start; | 1403 | *ptr |= val; |
1371 | if (remains < size_256MB) | 1404 | } |
1372 | break; | ||
1373 | 1405 | ||
1374 | if (start & mask_256MB) { | 1406 | static const unsigned long kpte_shift_min = 28; /* 256MB */ |
1375 | start = (start + size_256MB) & ~mask_256MB; | 1407 | static const unsigned long kpte_shift_max = 34; /* 16GB */ |
1376 | continue; | 1408 | static const unsigned long kpte_shift_incr = 3; |
1377 | } | 1409 | |
1410 | static unsigned long kpte_mark_using_shift(unsigned long start, unsigned long end, | ||
1411 | unsigned long shift) | ||
1412 | { | ||
1413 | unsigned long size = (1UL << shift); | ||
1414 | unsigned long mask = (size - 1UL); | ||
1415 | unsigned long remains = end - start; | ||
1416 | unsigned long val; | ||
1417 | |||
1418 | if (remains < size || (start & mask)) | ||
1419 | return start; | ||
1420 | |||
1421 | /* VAL maps: | ||
1422 | * | ||
1423 | * shift 28 --> kern_linear_pte_xor index 1 | ||
1424 | * shift 31 --> kern_linear_pte_xor index 2 | ||
1425 | * shift 34 --> kern_linear_pte_xor index 3 | ||
1426 | */ | ||
1427 | val = ((shift - kpte_shift_min) / kpte_shift_incr) + 1; | ||
1428 | |||
1429 | remains &= ~mask; | ||
1430 | if (shift != kpte_shift_max) | ||
1431 | remains = size; | ||
1378 | 1432 | ||
1379 | while (remains >= size_256MB) { | 1433 | while (remains) { |
1380 | unsigned long index = start >> shift_256MB; | 1434 | unsigned long index = start >> kpte_shift_min; |
1381 | 1435 | ||
1382 | __set_bit(index, kpte_linear_bitmap); | 1436 | kpte_set_val(index, val); |
1383 | 1437 | ||
1384 | start += size_256MB; | 1438 | start += 1UL << kpte_shift_min; |
1385 | remains -= size_256MB; | 1439 | remains -= 1UL << kpte_shift_min; |
1440 | } | ||
1441 | |||
1442 | return start; | ||
1443 | } | ||
1444 | |||
1445 | static void __init mark_kpte_bitmap(unsigned long start, unsigned long end) | ||
1446 | { | ||
1447 | unsigned long smallest_size, smallest_mask; | ||
1448 | unsigned long s; | ||
1449 | |||
1450 | smallest_size = (1UL << kpte_shift_min); | ||
1451 | smallest_mask = (smallest_size - 1UL); | ||
1452 | |||
1453 | while (start < end) { | ||
1454 | unsigned long orig_start = start; | ||
1455 | |||
1456 | for (s = kpte_shift_max; s >= kpte_shift_min; s -= kpte_shift_incr) { | ||
1457 | start = kpte_mark_using_shift(start, end, s); | ||
1458 | |||
1459 | if (start != orig_start) | ||
1460 | break; | ||
1386 | } | 1461 | } |
1462 | |||
1463 | if (start == orig_start) | ||
1464 | start = (start + smallest_size) & ~smallest_mask; | ||
1387 | } | 1465 | } |
1388 | } | 1466 | } |
1389 | 1467 | ||
@@ -1577,13 +1655,16 @@ static void __init sun4v_ktsb_init(void) | |||
1577 | ktsb_descr[0].resv = 0; | 1655 | ktsb_descr[0].resv = 0; |
1578 | 1656 | ||
1579 | #ifndef CONFIG_DEBUG_PAGEALLOC | 1657 | #ifndef CONFIG_DEBUG_PAGEALLOC |
1580 | /* Second KTSB for 4MB/256MB mappings. */ | 1658 | /* Second KTSB for 4MB/256MB/2GB/16GB mappings. */ |
1581 | ktsb_pa = (kern_base + | 1659 | ktsb_pa = (kern_base + |
1582 | ((unsigned long)&swapper_4m_tsb[0] - KERNBASE)); | 1660 | ((unsigned long)&swapper_4m_tsb[0] - KERNBASE)); |
1583 | 1661 | ||
1584 | ktsb_descr[1].pgsz_idx = HV_PGSZ_IDX_4MB; | 1662 | ktsb_descr[1].pgsz_idx = HV_PGSZ_IDX_4MB; |
1585 | ktsb_descr[1].pgsz_mask = (HV_PGSZ_MASK_4MB | | 1663 | ktsb_descr[1].pgsz_mask = ((HV_PGSZ_MASK_4MB | |
1586 | HV_PGSZ_MASK_256MB); | 1664 | HV_PGSZ_MASK_256MB | |
1665 | HV_PGSZ_MASK_2GB | | ||
1666 | HV_PGSZ_MASK_16GB) & | ||
1667 | cpu_pgsz_mask); | ||
1587 | ktsb_descr[1].assoc = 1; | 1668 | ktsb_descr[1].assoc = 1; |
1588 | ktsb_descr[1].num_ttes = KERNEL_TSB4M_NENTRIES; | 1669 | ktsb_descr[1].num_ttes = KERNEL_TSB4M_NENTRIES; |
1589 | ktsb_descr[1].ctx_idx = 0; | 1670 | ktsb_descr[1].ctx_idx = 0; |
@@ -1606,6 +1687,47 @@ void __cpuinit sun4v_ktsb_register(void) | |||
1606 | } | 1687 | } |
1607 | } | 1688 | } |
1608 | 1689 | ||
1690 | static void __init sun4u_linear_pte_xor_finalize(void) | ||
1691 | { | ||
1692 | #ifndef CONFIG_DEBUG_PAGEALLOC | ||
1693 | /* This is where we would add Panther support for | ||
1694 | * 32MB and 256MB pages. | ||
1695 | */ | ||
1696 | #endif | ||
1697 | } | ||
1698 | |||
1699 | static void __init sun4v_linear_pte_xor_finalize(void) | ||
1700 | { | ||
1701 | #ifndef CONFIG_DEBUG_PAGEALLOC | ||
1702 | if (cpu_pgsz_mask & HV_PGSZ_MASK_256MB) { | ||
1703 | kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^ | ||
1704 | 0xfffff80000000000UL; | ||
1705 | kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V | | ||
1706 | _PAGE_P_4V | _PAGE_W_4V); | ||
1707 | } else { | ||
1708 | kern_linear_pte_xor[1] = kern_linear_pte_xor[0]; | ||
1709 | } | ||
1710 | |||
1711 | if (cpu_pgsz_mask & HV_PGSZ_MASK_2GB) { | ||
1712 | kern_linear_pte_xor[2] = (_PAGE_VALID | _PAGE_SZ2GB_4V) ^ | ||
1713 | 0xfffff80000000000UL; | ||
1714 | kern_linear_pte_xor[2] |= (_PAGE_CP_4V | _PAGE_CV_4V | | ||
1715 | _PAGE_P_4V | _PAGE_W_4V); | ||
1716 | } else { | ||
1717 | kern_linear_pte_xor[2] = kern_linear_pte_xor[1]; | ||
1718 | } | ||
1719 | |||
1720 | if (cpu_pgsz_mask & HV_PGSZ_MASK_16GB) { | ||
1721 | kern_linear_pte_xor[3] = (_PAGE_VALID | _PAGE_SZ16GB_4V) ^ | ||
1722 | 0xfffff80000000000UL; | ||
1723 | kern_linear_pte_xor[3] |= (_PAGE_CP_4V | _PAGE_CV_4V | | ||
1724 | _PAGE_P_4V | _PAGE_W_4V); | ||
1725 | } else { | ||
1726 | kern_linear_pte_xor[3] = kern_linear_pte_xor[2]; | ||
1727 | } | ||
1728 | #endif | ||
1729 | } | ||
1730 | |||
1609 | /* paging_init() sets up the page tables */ | 1731 | /* paging_init() sets up the page tables */ |
1610 | 1732 | ||
1611 | static unsigned long last_valid_pfn; | 1733 | static unsigned long last_valid_pfn; |
@@ -1665,10 +1787,8 @@ void __init paging_init(void) | |||
1665 | ktsb_phys_patch(); | 1787 | ktsb_phys_patch(); |
1666 | } | 1788 | } |
1667 | 1789 | ||
1668 | if (tlb_type == hypervisor) { | 1790 | if (tlb_type == hypervisor) |
1669 | sun4v_patch_tlb_handlers(); | 1791 | sun4v_patch_tlb_handlers(); |
1670 | sun4v_ktsb_init(); | ||
1671 | } | ||
1672 | 1792 | ||
1673 | /* Find available physical memory... | 1793 | /* Find available physical memory... |
1674 | * | 1794 | * |
@@ -1727,9 +1847,6 @@ void __init paging_init(void) | |||
1727 | 1847 | ||
1728 | __flush_tlb_all(); | 1848 | __flush_tlb_all(); |
1729 | 1849 | ||
1730 | if (tlb_type == hypervisor) | ||
1731 | sun4v_ktsb_register(); | ||
1732 | |||
1733 | prom_build_devicetree(); | 1850 | prom_build_devicetree(); |
1734 | of_populate_present_mask(); | 1851 | of_populate_present_mask(); |
1735 | #ifndef CONFIG_SMP | 1852 | #ifndef CONFIG_SMP |
@@ -1742,8 +1859,36 @@ void __init paging_init(void) | |||
1742 | #ifndef CONFIG_SMP | 1859 | #ifndef CONFIG_SMP |
1743 | mdesc_fill_in_cpu_data(cpu_all_mask); | 1860 | mdesc_fill_in_cpu_data(cpu_all_mask); |
1744 | #endif | 1861 | #endif |
1862 | mdesc_get_page_sizes(cpu_all_mask, &cpu_pgsz_mask); | ||
1863 | |||
1864 | sun4v_linear_pte_xor_finalize(); | ||
1865 | |||
1866 | sun4v_ktsb_init(); | ||
1867 | sun4v_ktsb_register(); | ||
1868 | } else { | ||
1869 | unsigned long impl, ver; | ||
1870 | |||
1871 | cpu_pgsz_mask = (HV_PGSZ_MASK_8K | HV_PGSZ_MASK_64K | | ||
1872 | HV_PGSZ_MASK_512K | HV_PGSZ_MASK_4MB); | ||
1873 | |||
1874 | __asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver)); | ||
1875 | impl = ((ver >> 32) & 0xffff); | ||
1876 | if (impl == PANTHER_IMPL) | ||
1877 | cpu_pgsz_mask |= (HV_PGSZ_MASK_32MB | | ||
1878 | HV_PGSZ_MASK_256MB); | ||
1879 | |||
1880 | sun4u_linear_pte_xor_finalize(); | ||
1745 | } | 1881 | } |
1746 | 1882 | ||
1883 | /* Flush the TLBs and the 4M TSB so that the updated linear | ||
1884 | * pte XOR settings are realized for all mappings. | ||
1885 | */ | ||
1886 | __flush_tlb_all(); | ||
1887 | #ifndef CONFIG_DEBUG_PAGEALLOC | ||
1888 | memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb)); | ||
1889 | #endif | ||
1890 | __flush_tlb_all(); | ||
1891 | |||
1747 | /* Setup bootmem... */ | 1892 | /* Setup bootmem... */ |
1748 | last_valid_pfn = end_pfn = bootmem_init(phys_base); | 1893 | last_valid_pfn = end_pfn = bootmem_init(phys_base); |
1749 | 1894 | ||
@@ -2110,6 +2255,7 @@ static void __init sun4u_pgprot_init(void) | |||
2110 | { | 2255 | { |
2111 | unsigned long page_none, page_shared, page_copy, page_readonly; | 2256 | unsigned long page_none, page_shared, page_copy, page_readonly; |
2112 | unsigned long page_exec_bit; | 2257 | unsigned long page_exec_bit; |
2258 | int i; | ||
2113 | 2259 | ||
2114 | PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID | | 2260 | PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID | |
2115 | _PAGE_CACHE_4U | _PAGE_P_4U | | 2261 | _PAGE_CACHE_4U | _PAGE_P_4U | |
@@ -2137,8 +2283,8 @@ static void __init sun4u_pgprot_init(void) | |||
2137 | kern_linear_pte_xor[0] |= (_PAGE_CP_4U | _PAGE_CV_4U | | 2283 | kern_linear_pte_xor[0] |= (_PAGE_CP_4U | _PAGE_CV_4U | |
2138 | _PAGE_P_4U | _PAGE_W_4U); | 2284 | _PAGE_P_4U | _PAGE_W_4U); |
2139 | 2285 | ||
2140 | /* XXX Should use 256MB on Panther. XXX */ | 2286 | for (i = 1; i < 4; i++) |
2141 | kern_linear_pte_xor[1] = kern_linear_pte_xor[0]; | 2287 | kern_linear_pte_xor[i] = kern_linear_pte_xor[0]; |
2142 | 2288 | ||
2143 | _PAGE_SZBITS = _PAGE_SZBITS_4U; | 2289 | _PAGE_SZBITS = _PAGE_SZBITS_4U; |
2144 | _PAGE_ALL_SZ_BITS = (_PAGE_SZ4MB_4U | _PAGE_SZ512K_4U | | 2290 | _PAGE_ALL_SZ_BITS = (_PAGE_SZ4MB_4U | _PAGE_SZ512K_4U | |
@@ -2164,6 +2310,7 @@ static void __init sun4v_pgprot_init(void) | |||
2164 | { | 2310 | { |
2165 | unsigned long page_none, page_shared, page_copy, page_readonly; | 2311 | unsigned long page_none, page_shared, page_copy, page_readonly; |
2166 | unsigned long page_exec_bit; | 2312 | unsigned long page_exec_bit; |
2313 | int i; | ||
2167 | 2314 | ||
2168 | PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID | | 2315 | PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID | |
2169 | _PAGE_CACHE_4V | _PAGE_P_4V | | 2316 | _PAGE_CACHE_4V | _PAGE_P_4V | |
@@ -2185,15 +2332,8 @@ static void __init sun4v_pgprot_init(void) | |||
2185 | kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V | | 2332 | kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V | |
2186 | _PAGE_P_4V | _PAGE_W_4V); | 2333 | _PAGE_P_4V | _PAGE_W_4V); |
2187 | 2334 | ||
2188 | #ifdef CONFIG_DEBUG_PAGEALLOC | 2335 | for (i = 1; i < 4; i++) |
2189 | kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZBITS_4V) ^ | 2336 | kern_linear_pte_xor[i] = kern_linear_pte_xor[0]; |
2190 | 0xfffff80000000000UL; | ||
2191 | #else | ||
2192 | kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^ | ||
2193 | 0xfffff80000000000UL; | ||
2194 | #endif | ||
2195 | kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V | | ||
2196 | _PAGE_P_4V | _PAGE_W_4V); | ||
2197 | 2337 | ||
2198 | pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4V | __DIRTY_BITS_4V | | 2338 | pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4V | __DIRTY_BITS_4V | |
2199 | __ACCESS_BITS_4V | _PAGE_E_4V); | 2339 | __ACCESS_BITS_4V | _PAGE_E_4V); |
diff --git a/arch/sparc/mm/init_64.h b/arch/sparc/mm/init_64.h index 3e1ac8b96cae..0661aa606dec 100644 --- a/arch/sparc/mm/init_64.h +++ b/arch/sparc/mm/init_64.h | |||
@@ -8,12 +8,12 @@ | |||
8 | #define MAX_PHYS_ADDRESS (1UL << 41UL) | 8 | #define MAX_PHYS_ADDRESS (1UL << 41UL) |
9 | #define KPTE_BITMAP_CHUNK_SZ (256UL * 1024UL * 1024UL) | 9 | #define KPTE_BITMAP_CHUNK_SZ (256UL * 1024UL * 1024UL) |
10 | #define KPTE_BITMAP_BYTES \ | 10 | #define KPTE_BITMAP_BYTES \ |
11 | ((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 8) | 11 | ((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 4) |
12 | #define VALID_ADDR_BITMAP_CHUNK_SZ (4UL * 1024UL * 1024UL) | 12 | #define VALID_ADDR_BITMAP_CHUNK_SZ (4UL * 1024UL * 1024UL) |
13 | #define VALID_ADDR_BITMAP_BYTES \ | 13 | #define VALID_ADDR_BITMAP_BYTES \ |
14 | ((MAX_PHYS_ADDRESS / VALID_ADDR_BITMAP_CHUNK_SZ) / 8) | 14 | ((MAX_PHYS_ADDRESS / VALID_ADDR_BITMAP_CHUNK_SZ) / 8) |
15 | 15 | ||
16 | extern unsigned long kern_linear_pte_xor[2]; | 16 | extern unsigned long kern_linear_pte_xor[4]; |
17 | extern unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; | 17 | extern unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; |
18 | extern unsigned int sparc64_highest_unlocked_tlb_ent; | 18 | extern unsigned int sparc64_highest_unlocked_tlb_ent; |
19 | extern unsigned long sparc64_kern_pri_context; | 19 | extern unsigned long sparc64_kern_pri_context; |