diff options
author | David S. Miller <davem@davemloft.net> | 2012-08-30 11:11:01 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-08-30 11:11:01 -0400 |
commit | 301013159e4cdce44700418c8fd5eadb270e2d3a (patch) | |
tree | af377685fb89ddc20c751722d5bd68489db425f6 /arch/sparc/crypto | |
parent | 03d168ad122d6e622ad00490211704c4f2994976 (diff) |
sparc64: Unroll ECB decryption loops in AES driver.
Before:
testing speed of ecb(aes) decryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 223 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 230 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 325 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 719 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 4266 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 211 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 234 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 353 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 808 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 5344 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 214 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 243 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 393 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 939 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 6039 cycles (8192 bytes)
After:
testing speed of ecb(aes) decryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 226 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 231 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 313 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 681 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 3964 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 205 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 240 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 341 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 770 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 5050 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 216 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 250 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 371 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 869 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 5494 cycles (8192 bytes)
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc/crypto')
-rw-r--r-- | arch/sparc/crypto/aes_asm.S | 161 |
1 files changed, 143 insertions, 18 deletions
diff --git a/arch/sparc/crypto/aes_asm.S b/arch/sparc/crypto/aes_asm.S index 33d59c66f1e2..0bd3e04ac42d 100644 --- a/arch/sparc/crypto/aes_asm.S +++ b/arch/sparc/crypto/aes_asm.S | |||
@@ -161,12 +161,32 @@ | |||
161 | AES_DROUND23(KEY_BASE + 4, T0, T1, I1) \ | 161 | AES_DROUND23(KEY_BASE + 4, T0, T1, I1) \ |
162 | AES_DROUND01(KEY_BASE + 6, T0, T1, I0) | 162 | AES_DROUND01(KEY_BASE + 6, T0, T1, I0) |
163 | 163 | ||
164 | #define DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
165 | AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \ | ||
166 | AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \ | ||
167 | AES_DROUND23(KEY_BASE + 0, I2, I3, T3) \ | ||
168 | AES_DROUND01(KEY_BASE + 2, I2, I3, T2) \ | ||
169 | AES_DROUND23(KEY_BASE + 4, T0, T1, I1) \ | ||
170 | AES_DROUND01(KEY_BASE + 6, T0, T1, I0) \ | ||
171 | AES_DROUND23(KEY_BASE + 4, T2, T3, I3) \ | ||
172 | AES_DROUND01(KEY_BASE + 6, T2, T3, I2) | ||
173 | |||
164 | #define DECRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \ | 174 | #define DECRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \ |
165 | AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \ | 175 | AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \ |
166 | AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \ | 176 | AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \ |
167 | AES_DROUND23_L(KEY_BASE + 4, T0, T1, I1) \ | 177 | AES_DROUND23_L(KEY_BASE + 4, T0, T1, I1) \ |
168 | AES_DROUND01_L(KEY_BASE + 6, T0, T1, I0) | 178 | AES_DROUND01_L(KEY_BASE + 6, T0, T1, I0) |
169 | 179 | ||
180 | #define DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
181 | AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \ | ||
182 | AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \ | ||
183 | AES_DROUND23(KEY_BASE + 0, I2, I3, T3) \ | ||
184 | AES_DROUND01(KEY_BASE + 2, I2, I3, T2) \ | ||
185 | AES_DROUND23_L(KEY_BASE + 4, T0, T1, I1) \ | ||
186 | AES_DROUND01_L(KEY_BASE + 6, T0, T1, I0) \ | ||
187 | AES_DROUND23_L(KEY_BASE + 4, T2, T3, I3) \ | ||
188 | AES_DROUND01_L(KEY_BASE + 6, T2, T3, I2) | ||
189 | |||
170 | /* 10 rounds */ | 190 | /* 10 rounds */ |
171 | #define DECRYPT_128(KEY_BASE, I0, I1, T0, T1) \ | 191 | #define DECRYPT_128(KEY_BASE, I0, I1, T0, T1) \ |
172 | DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ | 192 | DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ |
@@ -175,6 +195,13 @@ | |||
175 | DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ | 195 | DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \ |
176 | DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1) | 196 | DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1) |
177 | 197 | ||
198 | #define DECRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
199 | DECRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
200 | DECRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
201 | DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
202 | DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
203 | DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) | ||
204 | |||
178 | /* 12 rounds */ | 205 | /* 12 rounds */ |
179 | #define DECRYPT_192(KEY_BASE, I0, I1, T0, T1) \ | 206 | #define DECRYPT_192(KEY_BASE, I0, I1, T0, T1) \ |
180 | DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ | 207 | DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ |
@@ -184,6 +211,14 @@ | |||
184 | DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \ | 211 | DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \ |
185 | DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1) | 212 | DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1) |
186 | 213 | ||
214 | #define DECRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
215 | DECRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
216 | DECRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
217 | DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
218 | DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
219 | DECRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \ | ||
220 | DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3) | ||
221 | |||
187 | /* 14 rounds */ | 222 | /* 14 rounds */ |
188 | #define DECRYPT_256(KEY_BASE, I0, I1, T0, T1) \ | 223 | #define DECRYPT_256(KEY_BASE, I0, I1, T0, T1) \ |
189 | DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ | 224 | DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \ |
@@ -194,6 +229,32 @@ | |||
194 | DECRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \ | 229 | DECRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \ |
195 | DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1) | 230 | DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1) |
196 | 231 | ||
232 | #define DECRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \ | ||
233 | DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \ | ||
234 | TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6) | ||
235 | |||
236 | #define DECRYPT_256_2(KEY_BASE, I0, I1, I2, I3) \ | ||
237 | DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, KEY_BASE + 48) \ | ||
238 | ldd [%o0 + 0x18], %f56; \ | ||
239 | ldd [%o0 + 0x10], %f58; \ | ||
240 | DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, KEY_BASE + 0) \ | ||
241 | DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE + 0) \ | ||
242 | DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE + 0) \ | ||
243 | DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE + 0) \ | ||
244 | DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE + 0) \ | ||
245 | AES_DROUND23(KEY_BASE + 48, I0, I1, KEY_BASE + 2) \ | ||
246 | AES_DROUND01(KEY_BASE + 50, I0, I1, KEY_BASE + 0) \ | ||
247 | AES_DROUND23(KEY_BASE + 48, I2, I3, KEY_BASE + 6) \ | ||
248 | AES_DROUND01(KEY_BASE + 50, I2, I3, KEY_BASE + 4) \ | ||
249 | AES_DROUND23_L(KEY_BASE + 52, KEY_BASE + 0, KEY_BASE + 2, I1) \ | ||
250 | AES_DROUND01_L(KEY_BASE + 54, KEY_BASE + 0, KEY_BASE + 2, I0) \ | ||
251 | ldd [%o0 + 0xd8], %f8; \ | ||
252 | ldd [%o0 + 0xd0], %f10; \ | ||
253 | AES_DROUND23_L(KEY_BASE + 52, KEY_BASE + 4, KEY_BASE + 6, I3) \ | ||
254 | AES_DROUND01_L(KEY_BASE + 54, KEY_BASE + 4, KEY_BASE + 6, I2) | ||
255 | ldd [%o0 + 0xc8], %f12; \ | ||
256 | ldd [%o0 + 0xc0], %f14; | ||
257 | |||
197 | .align 32 | 258 | .align 32 |
198 | ENTRY(aes_sparc64_key_expand) | 259 | ENTRY(aes_sparc64_key_expand) |
199 | /* %o0=input_key, %o1=output_key, %o2=key_len */ | 260 | /* %o0=input_key, %o1=output_key, %o2=key_len */ |
@@ -1028,10 +1089,34 @@ ENDPROC(aes_sparc64_ecb_encrypt_256) | |||
1028 | ENTRY(aes_sparc64_ecb_decrypt_128) | 1089 | ENTRY(aes_sparc64_ecb_decrypt_128) |
1029 | /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */ | 1090 | /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */ |
1030 | ldx [%o0 - 0x10], %g1 | 1091 | ldx [%o0 - 0x10], %g1 |
1031 | ldx [%o0 - 0x08], %g2 | 1092 | subcc %o3, 0x10, %o3 |
1093 | be 10f | ||
1094 | ldx [%o0 - 0x08], %g2 | ||
1032 | 1: ldx [%o1 + 0x00], %g3 | 1095 | 1: ldx [%o1 + 0x00], %g3 |
1033 | ldx [%o1 + 0x08], %g7 | 1096 | ldx [%o1 + 0x08], %g7 |
1034 | add %o1, 0x10, %o1 | 1097 | ldx [%o1 + 0x10], %o4 |
1098 | ldx [%o1 + 0x18], %o5 | ||
1099 | xor %g1, %g3, %g3 | ||
1100 | xor %g2, %g7, %g7 | ||
1101 | MOVXTOD_G3_F4 | ||
1102 | MOVXTOD_G7_F6 | ||
1103 | xor %g1, %o4, %g3 | ||
1104 | xor %g2, %o5, %g7 | ||
1105 | MOVXTOD_G3_F60 | ||
1106 | MOVXTOD_G7_F62 | ||
1107 | DECRYPT_128_2(8, 4, 6, 60, 62, 0, 2, 56, 58) | ||
1108 | std %f4, [%o2 + 0x00] | ||
1109 | std %f6, [%o2 + 0x08] | ||
1110 | std %f60, [%o2 + 0x10] | ||
1111 | std %f62, [%o2 + 0x18] | ||
1112 | sub %o3, 0x20, %o3 | ||
1113 | add %o1, 0x20, %o1 | ||
1114 | brgz,pt %o3, 1b | ||
1115 | add %o2, 0x20, %o2 | ||
1116 | brlz,pt %o3, 11f | ||
1117 | nop | ||
1118 | 10: ldx [%o1 + 0x00], %g3 | ||
1119 | ldx [%o1 + 0x08], %g7 | ||
1035 | xor %g1, %g3, %g3 | 1120 | xor %g1, %g3, %g3 |
1036 | xor %g2, %g7, %g7 | 1121 | xor %g2, %g7, %g7 |
1037 | MOVXTOD_G3_F4 | 1122 | MOVXTOD_G3_F4 |
@@ -1039,10 +1124,7 @@ ENTRY(aes_sparc64_ecb_decrypt_128) | |||
1039 | DECRYPT_128(8, 4, 6, 0, 2) | 1124 | DECRYPT_128(8, 4, 6, 0, 2) |
1040 | std %f4, [%o2 + 0x00] | 1125 | std %f4, [%o2 + 0x00] |
1041 | std %f6, [%o2 + 0x08] | 1126 | std %f6, [%o2 + 0x08] |
1042 | subcc %o3, 0x10, %o3 | 1127 | 11: retl |
1043 | bne,pt %xcc, 1b | ||
1044 | add %o2, 0x10, %o2 | ||
1045 | retl | ||
1046 | nop | 1128 | nop |
1047 | ENDPROC(aes_sparc64_ecb_decrypt_128) | 1129 | ENDPROC(aes_sparc64_ecb_decrypt_128) |
1048 | 1130 | ||
@@ -1050,10 +1132,34 @@ ENDPROC(aes_sparc64_ecb_decrypt_128) | |||
1050 | ENTRY(aes_sparc64_ecb_decrypt_192) | 1132 | ENTRY(aes_sparc64_ecb_decrypt_192) |
1051 | /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */ | 1133 | /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */ |
1052 | ldx [%o0 - 0x10], %g1 | 1134 | ldx [%o0 - 0x10], %g1 |
1053 | ldx [%o0 - 0x08], %g2 | 1135 | subcc %o3, 0x10, %o3 |
1136 | be 10f | ||
1137 | ldx [%o0 - 0x08], %g2 | ||
1054 | 1: ldx [%o1 + 0x00], %g3 | 1138 | 1: ldx [%o1 + 0x00], %g3 |
1055 | ldx [%o1 + 0x08], %g7 | 1139 | ldx [%o1 + 0x08], %g7 |
1056 | add %o1, 0x10, %o1 | 1140 | ldx [%o1 + 0x10], %o4 |
1141 | ldx [%o1 + 0x18], %o5 | ||
1142 | xor %g1, %g3, %g3 | ||
1143 | xor %g2, %g7, %g7 | ||
1144 | MOVXTOD_G3_F4 | ||
1145 | MOVXTOD_G7_F6 | ||
1146 | xor %g1, %o4, %g3 | ||
1147 | xor %g2, %o5, %g7 | ||
1148 | MOVXTOD_G3_F60 | ||
1149 | MOVXTOD_G7_F62 | ||
1150 | DECRYPT_192_2(8, 4, 6, 60, 62, 0, 2, 56, 58) | ||
1151 | std %f4, [%o2 + 0x00] | ||
1152 | std %f6, [%o2 + 0x08] | ||
1153 | std %f60, [%o2 + 0x10] | ||
1154 | std %f62, [%o2 + 0x18] | ||
1155 | sub %o3, 0x20, %o3 | ||
1156 | add %o1, 0x20, %o1 | ||
1157 | brgz,pt %o3, 1b | ||
1158 | add %o2, 0x20, %o2 | ||
1159 | brlz,pt %o3, 11f | ||
1160 | nop | ||
1161 | 10: ldx [%o1 + 0x00], %g3 | ||
1162 | ldx [%o1 + 0x08], %g7 | ||
1057 | xor %g1, %g3, %g3 | 1163 | xor %g1, %g3, %g3 |
1058 | xor %g2, %g7, %g7 | 1164 | xor %g2, %g7, %g7 |
1059 | MOVXTOD_G3_F4 | 1165 | MOVXTOD_G3_F4 |
@@ -1061,10 +1167,7 @@ ENTRY(aes_sparc64_ecb_decrypt_192) | |||
1061 | DECRYPT_192(8, 4, 6, 0, 2) | 1167 | DECRYPT_192(8, 4, 6, 0, 2) |
1062 | std %f4, [%o2 + 0x00] | 1168 | std %f4, [%o2 + 0x00] |
1063 | std %f6, [%o2 + 0x08] | 1169 | std %f6, [%o2 + 0x08] |
1064 | subcc %o3, 0x10, %o3 | 1170 | 11: retl |
1065 | bne,pt %xcc, 1b | ||
1066 | add %o2, 0x10, %o2 | ||
1067 | retl | ||
1068 | nop | 1171 | nop |
1069 | ENDPROC(aes_sparc64_ecb_decrypt_192) | 1172 | ENDPROC(aes_sparc64_ecb_decrypt_192) |
1070 | 1173 | ||
@@ -1072,10 +1175,35 @@ ENDPROC(aes_sparc64_ecb_decrypt_192) | |||
1072 | ENTRY(aes_sparc64_ecb_decrypt_256) | 1175 | ENTRY(aes_sparc64_ecb_decrypt_256) |
1073 | /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */ | 1176 | /* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */ |
1074 | ldx [%o0 - 0x10], %g1 | 1177 | ldx [%o0 - 0x10], %g1 |
1075 | ldx [%o0 - 0x08], %g2 | 1178 | subcc %o3, 0x10, %o3 |
1179 | be 10f | ||
1180 | ldx [%o0 - 0x08], %g2 | ||
1181 | sub %o0, 0xf0, %o0 | ||
1076 | 1: ldx [%o1 + 0x00], %g3 | 1182 | 1: ldx [%o1 + 0x00], %g3 |
1077 | ldx [%o1 + 0x08], %g7 | 1183 | ldx [%o1 + 0x08], %g7 |
1078 | add %o1, 0x10, %o1 | 1184 | ldx [%o1 + 0x10], %o4 |
1185 | ldx [%o1 + 0x18], %o5 | ||
1186 | xor %g1, %g3, %g3 | ||
1187 | xor %g2, %g7, %g7 | ||
1188 | MOVXTOD_G3_F4 | ||
1189 | MOVXTOD_G7_F6 | ||
1190 | xor %g1, %o4, %g3 | ||
1191 | xor %g2, %o5, %g7 | ||
1192 | MOVXTOD_G3_F0 | ||
1193 | MOVXTOD_G7_F2 | ||
1194 | DECRYPT_256_2(8, 4, 6, 0, 2) | ||
1195 | std %f4, [%o2 + 0x00] | ||
1196 | std %f6, [%o2 + 0x08] | ||
1197 | std %f60, [%o2 + 0x10] | ||
1198 | std %f62, [%o2 + 0x18] | ||
1199 | sub %o3, 0x20, %o3 | ||
1200 | add %o1, 0x20, %o1 | ||
1201 | brgz,pt %o3, 1b | ||
1202 | add %o2, 0x20, %o2 | ||
1203 | brlz,pt %o3, 11f | ||
1204 | nop | ||
1205 | 10: ldx [%o1 + 0x00], %g3 | ||
1206 | ldx [%o1 + 0x08], %g7 | ||
1079 | xor %g1, %g3, %g3 | 1207 | xor %g1, %g3, %g3 |
1080 | xor %g2, %g7, %g7 | 1208 | xor %g2, %g7, %g7 |
1081 | MOVXTOD_G3_F4 | 1209 | MOVXTOD_G3_F4 |
@@ -1083,10 +1211,7 @@ ENTRY(aes_sparc64_ecb_decrypt_256) | |||
1083 | DECRYPT_256(8, 4, 6, 0, 2) | 1211 | DECRYPT_256(8, 4, 6, 0, 2) |
1084 | std %f4, [%o2 + 0x00] | 1212 | std %f4, [%o2 + 0x00] |
1085 | std %f6, [%o2 + 0x08] | 1213 | std %f6, [%o2 + 0x08] |
1086 | subcc %o3, 0x10, %o3 | 1214 | 11: retl |
1087 | bne,pt %xcc, 1b | ||
1088 | add %o2, 0x10, %o2 | ||
1089 | retl | ||
1090 | nop | 1215 | nop |
1091 | ENDPROC(aes_sparc64_ecb_decrypt_256) | 1216 | ENDPROC(aes_sparc64_ecb_decrypt_256) |
1092 | 1217 | ||