aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sparc/crypto
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2012-08-30 11:11:01 -0400
committerDavid S. Miller <davem@davemloft.net>2012-08-30 11:11:01 -0400
commit301013159e4cdce44700418c8fd5eadb270e2d3a (patch)
treeaf377685fb89ddc20c751722d5bd68489db425f6 /arch/sparc/crypto
parent03d168ad122d6e622ad00490211704c4f2994976 (diff)
sparc64: Unroll ECB decryption loops in AES driver.
Before: testing speed of ecb(aes) decryption test 0 (128 bit key, 16 byte blocks): 1 operation in 223 cycles (16 bytes) test 1 (128 bit key, 64 byte blocks): 1 operation in 230 cycles (64 bytes) test 2 (128 bit key, 256 byte blocks): 1 operation in 325 cycles (256 bytes) test 3 (128 bit key, 1024 byte blocks): 1 operation in 719 cycles (1024 bytes) test 4 (128 bit key, 8192 byte blocks): 1 operation in 4266 cycles (8192 bytes) test 5 (192 bit key, 16 byte blocks): 1 operation in 211 cycles (16 bytes) test 6 (192 bit key, 64 byte blocks): 1 operation in 234 cycles (64 bytes) test 7 (192 bit key, 256 byte blocks): 1 operation in 353 cycles (256 bytes) test 8 (192 bit key, 1024 byte blocks): 1 operation in 808 cycles (1024 bytes) test 9 (192 bit key, 8192 byte blocks): 1 operation in 5344 cycles (8192 bytes) test 10 (256 bit key, 16 byte blocks): 1 operation in 214 cycles (16 bytes) test 11 (256 bit key, 64 byte blocks): 1 operation in 243 cycles (64 bytes) test 12 (256 bit key, 256 byte blocks): 1 operation in 393 cycles (256 bytes) test 13 (256 bit key, 1024 byte blocks): 1 operation in 939 cycles (1024 bytes) test 14 (256 bit key, 8192 byte blocks): 1 operation in 6039 cycles (8192 bytes) After: testing speed of ecb(aes) decryption test 0 (128 bit key, 16 byte blocks): 1 operation in 226 cycles (16 bytes) test 1 (128 bit key, 64 byte blocks): 1 operation in 231 cycles (64 bytes) test 2 (128 bit key, 256 byte blocks): 1 operation in 313 cycles (256 bytes) test 3 (128 bit key, 1024 byte blocks): 1 operation in 681 cycles (1024 bytes) test 4 (128 bit key, 8192 byte blocks): 1 operation in 3964 cycles (8192 bytes) test 5 (192 bit key, 16 byte blocks): 1 operation in 205 cycles (16 bytes) test 6 (192 bit key, 64 byte blocks): 1 operation in 240 cycles (64 bytes) test 7 (192 bit key, 256 byte blocks): 1 operation in 341 cycles (256 bytes) test 8 (192 bit key, 1024 byte blocks): 1 operation in 770 cycles (1024 bytes) test 9 (192 bit key, 8192 byte blocks): 1 operation in 5050 cycles (8192 bytes) test 10 (256 bit key, 16 byte blocks): 1 operation in 216 cycles (16 bytes) test 11 (256 bit key, 64 byte blocks): 1 operation in 250 cycles (64 bytes) test 12 (256 bit key, 256 byte blocks): 1 operation in 371 cycles (256 bytes) test 13 (256 bit key, 1024 byte blocks): 1 operation in 869 cycles (1024 bytes) test 14 (256 bit key, 8192 byte blocks): 1 operation in 5494 cycles (8192 bytes) Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc/crypto')
-rw-r--r--arch/sparc/crypto/aes_asm.S161
1 files changed, 143 insertions, 18 deletions
diff --git a/arch/sparc/crypto/aes_asm.S b/arch/sparc/crypto/aes_asm.S
index 33d59c66f1e2..0bd3e04ac42d 100644
--- a/arch/sparc/crypto/aes_asm.S
+++ b/arch/sparc/crypto/aes_asm.S
@@ -161,12 +161,32 @@
161 AES_DROUND23(KEY_BASE + 4, T0, T1, I1) \ 161 AES_DROUND23(KEY_BASE + 4, T0, T1, I1) \
162 AES_DROUND01(KEY_BASE + 6, T0, T1, I0) 162 AES_DROUND01(KEY_BASE + 6, T0, T1, I0)
163 163
164#define DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
165 AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \
166 AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \
167 AES_DROUND23(KEY_BASE + 0, I2, I3, T3) \
168 AES_DROUND01(KEY_BASE + 2, I2, I3, T2) \
169 AES_DROUND23(KEY_BASE + 4, T0, T1, I1) \
170 AES_DROUND01(KEY_BASE + 6, T0, T1, I0) \
171 AES_DROUND23(KEY_BASE + 4, T2, T3, I3) \
172 AES_DROUND01(KEY_BASE + 6, T2, T3, I2)
173
164#define DECRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \ 174#define DECRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \
165 AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \ 175 AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \
166 AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \ 176 AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \
167 AES_DROUND23_L(KEY_BASE + 4, T0, T1, I1) \ 177 AES_DROUND23_L(KEY_BASE + 4, T0, T1, I1) \
168 AES_DROUND01_L(KEY_BASE + 6, T0, T1, I0) 178 AES_DROUND01_L(KEY_BASE + 6, T0, T1, I0)
169 179
180#define DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
181 AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \
182 AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \
183 AES_DROUND23(KEY_BASE + 0, I2, I3, T3) \
184 AES_DROUND01(KEY_BASE + 2, I2, I3, T2) \
185 AES_DROUND23_L(KEY_BASE + 4, T0, T1, I1) \
186 AES_DROUND01_L(KEY_BASE + 6, T0, T1, I0) \
187 AES_DROUND23_L(KEY_BASE + 4, T2, T3, I3) \
188 AES_DROUND01_L(KEY_BASE + 6, T2, T3, I2)
189
170 /* 10 rounds */ 190 /* 10 rounds */
171#define DECRYPT_128(KEY_BASE, I0, I1, T0, T1) \ 191#define DECRYPT_128(KEY_BASE, I0, I1, T0, T1) \
172 DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ 192 DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \
@@ -175,6 +195,13 @@
175 DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ 195 DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
176 DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1) 196 DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1)
177 197
198#define DECRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
199 DECRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \
200 DECRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \
201 DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
202 DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
203 DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3)
204
178 /* 12 rounds */ 205 /* 12 rounds */
179#define DECRYPT_192(KEY_BASE, I0, I1, T0, T1) \ 206#define DECRYPT_192(KEY_BASE, I0, I1, T0, T1) \
180 DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ 207 DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \
@@ -184,6 +211,14 @@
184 DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \ 211 DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
185 DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1) 212 DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1)
186 213
214#define DECRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
215 DECRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \
216 DECRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \
217 DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
218 DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
219 DECRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \
220 DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3)
221
187 /* 14 rounds */ 222 /* 14 rounds */
188#define DECRYPT_256(KEY_BASE, I0, I1, T0, T1) \ 223#define DECRYPT_256(KEY_BASE, I0, I1, T0, T1) \
189 DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ 224 DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \
@@ -194,6 +229,32 @@
194 DECRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \ 229 DECRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \
195 DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1) 230 DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1)
196 231
232#define DECRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \
233 DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \
234 TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6)
235
236#define DECRYPT_256_2(KEY_BASE, I0, I1, I2, I3) \
237 DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, KEY_BASE + 48) \
238 ldd [%o0 + 0x18], %f56; \
239 ldd [%o0 + 0x10], %f58; \
240 DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, KEY_BASE + 0) \
241 DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE + 0) \
242 DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE + 0) \
243 DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE + 0) \
244 DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE + 0) \
245 AES_DROUND23(KEY_BASE + 48, I0, I1, KEY_BASE + 2) \
246 AES_DROUND01(KEY_BASE + 50, I0, I1, KEY_BASE + 0) \
247 AES_DROUND23(KEY_BASE + 48, I2, I3, KEY_BASE + 6) \
248 AES_DROUND01(KEY_BASE + 50, I2, I3, KEY_BASE + 4) \
249 AES_DROUND23_L(KEY_BASE + 52, KEY_BASE + 0, KEY_BASE + 2, I1) \
250 AES_DROUND01_L(KEY_BASE + 54, KEY_BASE + 0, KEY_BASE + 2, I0) \
251 ldd [%o0 + 0xd8], %f8; \
252 ldd [%o0 + 0xd0], %f10; \
253 AES_DROUND23_L(KEY_BASE + 52, KEY_BASE + 4, KEY_BASE + 6, I3) \
254 AES_DROUND01_L(KEY_BASE + 54, KEY_BASE + 4, KEY_BASE + 6, I2)
255 ldd [%o0 + 0xc8], %f12; \
256 ldd [%o0 + 0xc0], %f14;
257
197 .align 32 258 .align 32
198ENTRY(aes_sparc64_key_expand) 259ENTRY(aes_sparc64_key_expand)
199 /* %o0=input_key, %o1=output_key, %o2=key_len */ 260 /* %o0=input_key, %o1=output_key, %o2=key_len */
@@ -1028,10 +1089,34 @@ ENDPROC(aes_sparc64_ecb_encrypt_256)
1028ENTRY(aes_sparc64_ecb_decrypt_128) 1089ENTRY(aes_sparc64_ecb_decrypt_128)
1029 /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */ 1090 /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */
1030 ldx [%o0 - 0x10], %g1 1091 ldx [%o0 - 0x10], %g1
1031 ldx [%o0 - 0x08], %g2 1092 subcc %o3, 0x10, %o3
1093 be 10f
1094 ldx [%o0 - 0x08], %g2
10321: ldx [%o1 + 0x00], %g3 10951: ldx [%o1 + 0x00], %g3
1033 ldx [%o1 + 0x08], %g7 1096 ldx [%o1 + 0x08], %g7
1034 add %o1, 0x10, %o1 1097 ldx [%o1 + 0x10], %o4
1098 ldx [%o1 + 0x18], %o5
1099 xor %g1, %g3, %g3
1100 xor %g2, %g7, %g7
1101 MOVXTOD_G3_F4
1102 MOVXTOD_G7_F6
1103 xor %g1, %o4, %g3
1104 xor %g2, %o5, %g7
1105 MOVXTOD_G3_F60
1106 MOVXTOD_G7_F62
1107 DECRYPT_128_2(8, 4, 6, 60, 62, 0, 2, 56, 58)
1108 std %f4, [%o2 + 0x00]
1109 std %f6, [%o2 + 0x08]
1110 std %f60, [%o2 + 0x10]
1111 std %f62, [%o2 + 0x18]
1112 sub %o3, 0x20, %o3
1113 add %o1, 0x20, %o1
1114 brgz,pt %o3, 1b
1115 add %o2, 0x20, %o2
1116 brlz,pt %o3, 11f
1117 nop
111810: ldx [%o1 + 0x00], %g3
1119 ldx [%o1 + 0x08], %g7
1035 xor %g1, %g3, %g3 1120 xor %g1, %g3, %g3
1036 xor %g2, %g7, %g7 1121 xor %g2, %g7, %g7
1037 MOVXTOD_G3_F4 1122 MOVXTOD_G3_F4
@@ -1039,10 +1124,7 @@ ENTRY(aes_sparc64_ecb_decrypt_128)
1039 DECRYPT_128(8, 4, 6, 0, 2) 1124 DECRYPT_128(8, 4, 6, 0, 2)
1040 std %f4, [%o2 + 0x00] 1125 std %f4, [%o2 + 0x00]
1041 std %f6, [%o2 + 0x08] 1126 std %f6, [%o2 + 0x08]
1042 subcc %o3, 0x10, %o3 112711: retl
1043 bne,pt %xcc, 1b
1044 add %o2, 0x10, %o2
1045 retl
1046 nop 1128 nop
1047ENDPROC(aes_sparc64_ecb_decrypt_128) 1129ENDPROC(aes_sparc64_ecb_decrypt_128)
1048 1130
@@ -1050,10 +1132,34 @@ ENDPROC(aes_sparc64_ecb_decrypt_128)
1050ENTRY(aes_sparc64_ecb_decrypt_192) 1132ENTRY(aes_sparc64_ecb_decrypt_192)
1051 /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */ 1133 /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */
1052 ldx [%o0 - 0x10], %g1 1134 ldx [%o0 - 0x10], %g1
1053 ldx [%o0 - 0x08], %g2 1135 subcc %o3, 0x10, %o3
1136 be 10f
1137 ldx [%o0 - 0x08], %g2
10541: ldx [%o1 + 0x00], %g3 11381: ldx [%o1 + 0x00], %g3
1055 ldx [%o1 + 0x08], %g7 1139 ldx [%o1 + 0x08], %g7
1056 add %o1, 0x10, %o1 1140 ldx [%o1 + 0x10], %o4
1141 ldx [%o1 + 0x18], %o5
1142 xor %g1, %g3, %g3
1143 xor %g2, %g7, %g7
1144 MOVXTOD_G3_F4
1145 MOVXTOD_G7_F6
1146 xor %g1, %o4, %g3
1147 xor %g2, %o5, %g7
1148 MOVXTOD_G3_F60
1149 MOVXTOD_G7_F62
1150 DECRYPT_192_2(8, 4, 6, 60, 62, 0, 2, 56, 58)
1151 std %f4, [%o2 + 0x00]
1152 std %f6, [%o2 + 0x08]
1153 std %f60, [%o2 + 0x10]
1154 std %f62, [%o2 + 0x18]
1155 sub %o3, 0x20, %o3
1156 add %o1, 0x20, %o1
1157 brgz,pt %o3, 1b
1158 add %o2, 0x20, %o2
1159 brlz,pt %o3, 11f
1160 nop
116110: ldx [%o1 + 0x00], %g3
1162 ldx [%o1 + 0x08], %g7
1057 xor %g1, %g3, %g3 1163 xor %g1, %g3, %g3
1058 xor %g2, %g7, %g7 1164 xor %g2, %g7, %g7
1059 MOVXTOD_G3_F4 1165 MOVXTOD_G3_F4
@@ -1061,10 +1167,7 @@ ENTRY(aes_sparc64_ecb_decrypt_192)
1061 DECRYPT_192(8, 4, 6, 0, 2) 1167 DECRYPT_192(8, 4, 6, 0, 2)
1062 std %f4, [%o2 + 0x00] 1168 std %f4, [%o2 + 0x00]
1063 std %f6, [%o2 + 0x08] 1169 std %f6, [%o2 + 0x08]
1064 subcc %o3, 0x10, %o3 117011: retl
1065 bne,pt %xcc, 1b
1066 add %o2, 0x10, %o2
1067 retl
1068 nop 1171 nop
1069ENDPROC(aes_sparc64_ecb_decrypt_192) 1172ENDPROC(aes_sparc64_ecb_decrypt_192)
1070 1173
@@ -1072,10 +1175,35 @@ ENDPROC(aes_sparc64_ecb_decrypt_192)
1072ENTRY(aes_sparc64_ecb_decrypt_256) 1175ENTRY(aes_sparc64_ecb_decrypt_256)
1073 /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */ 1176 /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */
1074 ldx [%o0 - 0x10], %g1 1177 ldx [%o0 - 0x10], %g1
1075 ldx [%o0 - 0x08], %g2 1178 subcc %o3, 0x10, %o3
1179 be 10f
1180 ldx [%o0 - 0x08], %g2
1181 sub %o0, 0xf0, %o0
10761: ldx [%o1 + 0x00], %g3 11821: ldx [%o1 + 0x00], %g3
1077 ldx [%o1 + 0x08], %g7 1183 ldx [%o1 + 0x08], %g7
1078 add %o1, 0x10, %o1 1184 ldx [%o1 + 0x10], %o4
1185 ldx [%o1 + 0x18], %o5
1186 xor %g1, %g3, %g3
1187 xor %g2, %g7, %g7
1188 MOVXTOD_G3_F4
1189 MOVXTOD_G7_F6
1190 xor %g1, %o4, %g3
1191 xor %g2, %o5, %g7
1192 MOVXTOD_G3_F0
1193 MOVXTOD_G7_F2
1194 DECRYPT_256_2(8, 4, 6, 0, 2)
1195 std %f4, [%o2 + 0x00]
1196 std %f6, [%o2 + 0x08]
1197 std %f60, [%o2 + 0x10]
1198 std %f62, [%o2 + 0x18]
1199 sub %o3, 0x20, %o3
1200 add %o1, 0x20, %o1
1201 brgz,pt %o3, 1b
1202 add %o2, 0x20, %o2
1203 brlz,pt %o3, 11f
1204 nop
120510: ldx [%o1 + 0x00], %g3
1206 ldx [%o1 + 0x08], %g7
1079 xor %g1, %g3, %g3 1207 xor %g1, %g3, %g3
1080 xor %g2, %g7, %g7 1208 xor %g2, %g7, %g7
1081 MOVXTOD_G3_F4 1209 MOVXTOD_G3_F4
@@ -1083,10 +1211,7 @@ ENTRY(aes_sparc64_ecb_decrypt_256)
1083 DECRYPT_256(8, 4, 6, 0, 2) 1211 DECRYPT_256(8, 4, 6, 0, 2)
1084 std %f4, [%o2 + 0x00] 1212 std %f4, [%o2 + 0x00]
1085 std %f6, [%o2 + 0x08] 1213 std %f6, [%o2 + 0x08]
1086 subcc %o3, 0x10, %o3 121411: retl
1087 bne,pt %xcc, 1b
1088 add %o2, 0x10, %o2
1089 retl
1090 nop 1215 nop
1091ENDPROC(aes_sparc64_ecb_decrypt_256) 1216ENDPROC(aes_sparc64_ecb_decrypt_256)
1092 1217