diff options
Diffstat (limited to 'arch/x86_64/crypto/aes-x86_64-asm.S')
-rw-r--r-- | arch/x86_64/crypto/aes-x86_64-asm.S | 186 |
1 files changed, 186 insertions, 0 deletions
diff --git a/arch/x86_64/crypto/aes-x86_64-asm.S b/arch/x86_64/crypto/aes-x86_64-asm.S new file mode 100644 index 000000000000..483cbb23ab8d --- /dev/null +++ b/arch/x86_64/crypto/aes-x86_64-asm.S | |||
@@ -0,0 +1,186 @@ | |||
1 | /* AES (Rijndael) implementation (FIPS PUB 197) for x86_64 | ||
2 | * | ||
3 | * Copyright (C) 2005 Andreas Steinmetz, <ast@domdv.de> | ||
4 | * | ||
5 | * License: | ||
6 | * This code can be distributed under the terms of the GNU General Public | ||
7 | * License (GPL) Version 2 provided that the above header down to and | ||
8 | * including this sentence is retained in full. | ||
9 | */ | ||
10 | |||
11 | .extern aes_ft_tab | ||
12 | .extern aes_it_tab | ||
13 | .extern aes_fl_tab | ||
14 | .extern aes_il_tab | ||
15 | |||
16 | .text | ||
17 | |||
18 | #define R1 %rax | ||
19 | #define R1E %eax | ||
20 | #define R1X %ax | ||
21 | #define R1H %ah | ||
22 | #define R1L %al | ||
23 | #define R2 %rbx | ||
24 | #define R2E %ebx | ||
25 | #define R2X %bx | ||
26 | #define R2H %bh | ||
27 | #define R2L %bl | ||
28 | #define R3 %rcx | ||
29 | #define R3E %ecx | ||
30 | #define R3X %cx | ||
31 | #define R3H %ch | ||
32 | #define R3L %cl | ||
33 | #define R4 %rdx | ||
34 | #define R4E %edx | ||
35 | #define R4X %dx | ||
36 | #define R4H %dh | ||
37 | #define R4L %dl | ||
38 | #define R5 %rsi | ||
39 | #define R5E %esi | ||
40 | #define R6 %rdi | ||
41 | #define R6E %edi | ||
42 | #define R7 %rbp | ||
43 | #define R7E %ebp | ||
44 | #define R8 %r8 | ||
45 | #define R9 %r9 | ||
46 | #define R10 %r10 | ||
47 | #define R11 %r11 | ||
48 | |||
49 | #define prologue(FUNC,BASE,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \ | ||
50 | .global FUNC; \ | ||
51 | .type FUNC,@function; \ | ||
52 | .align 8; \ | ||
53 | FUNC: movq r1,r2; \ | ||
54 | movq r3,r4; \ | ||
55 | leaq BASE+52(r8),r9; \ | ||
56 | movq r10,r11; \ | ||
57 | movl (r7),r5 ## E; \ | ||
58 | movl 4(r7),r1 ## E; \ | ||
59 | movl 8(r7),r6 ## E; \ | ||
60 | movl 12(r7),r7 ## E; \ | ||
61 | movl (r8),r10 ## E; \ | ||
62 | xorl -48(r9),r5 ## E; \ | ||
63 | xorl -44(r9),r1 ## E; \ | ||
64 | xorl -40(r9),r6 ## E; \ | ||
65 | xorl -36(r9),r7 ## E; \ | ||
66 | cmpl $24,r10 ## E; \ | ||
67 | jb B128; \ | ||
68 | leaq 32(r9),r9; \ | ||
69 | je B192; \ | ||
70 | leaq 32(r9),r9; | ||
71 | |||
72 | #define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \ | ||
73 | movq r1,r2; \ | ||
74 | movq r3,r4; \ | ||
75 | movl r5 ## E,(r9); \ | ||
76 | movl r6 ## E,4(r9); \ | ||
77 | movl r7 ## E,8(r9); \ | ||
78 | movl r8 ## E,12(r9); \ | ||
79 | ret; | ||
80 | |||
81 | #define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \ | ||
82 | movzbl r2 ## H,r5 ## E; \ | ||
83 | movzbl r2 ## L,r6 ## E; \ | ||
84 | movl TAB+1024(,r5,4),r5 ## E;\ | ||
85 | movw r4 ## X,r2 ## X; \ | ||
86 | movl TAB(,r6,4),r6 ## E; \ | ||
87 | roll $16,r2 ## E; \ | ||
88 | shrl $16,r4 ## E; \ | ||
89 | movzbl r4 ## H,r7 ## E; \ | ||
90 | movzbl r4 ## L,r4 ## E; \ | ||
91 | xorl OFFSET(r8),ra ## E; \ | ||
92 | xorl OFFSET+4(r8),rb ## E; \ | ||
93 | xorl TAB+3072(,r7,4),r5 ## E;\ | ||
94 | xorl TAB+2048(,r4,4),r6 ## E;\ | ||
95 | movzbl r1 ## L,r7 ## E; \ | ||
96 | movzbl r1 ## H,r4 ## E; \ | ||
97 | movl TAB+1024(,r4,4),r4 ## E;\ | ||
98 | movw r3 ## X,r1 ## X; \ | ||
99 | roll $16,r1 ## E; \ | ||
100 | shrl $16,r3 ## E; \ | ||
101 | xorl TAB(,r7,4),r5 ## E; \ | ||
102 | movzbl r3 ## H,r7 ## E; \ | ||
103 | movzbl r3 ## L,r3 ## E; \ | ||
104 | xorl TAB+3072(,r7,4),r4 ## E;\ | ||
105 | xorl TAB+2048(,r3,4),r5 ## E;\ | ||
106 | movzbl r1 ## H,r7 ## E; \ | ||
107 | movzbl r1 ## L,r3 ## E; \ | ||
108 | shrl $16,r1 ## E; \ | ||
109 | xorl TAB+3072(,r7,4),r6 ## E;\ | ||
110 | movl TAB+2048(,r3,4),r3 ## E;\ | ||
111 | movzbl r1 ## H,r7 ## E; \ | ||
112 | movzbl r1 ## L,r1 ## E; \ | ||
113 | xorl TAB+1024(,r7,4),r6 ## E;\ | ||
114 | xorl TAB(,r1,4),r3 ## E; \ | ||
115 | movzbl r2 ## H,r1 ## E; \ | ||
116 | movzbl r2 ## L,r7 ## E; \ | ||
117 | shrl $16,r2 ## E; \ | ||
118 | xorl TAB+3072(,r1,4),r3 ## E;\ | ||
119 | xorl TAB+2048(,r7,4),r4 ## E;\ | ||
120 | movzbl r2 ## H,r1 ## E; \ | ||
121 | movzbl r2 ## L,r2 ## E; \ | ||
122 | xorl OFFSET+8(r8),rc ## E; \ | ||
123 | xorl OFFSET+12(r8),rd ## E; \ | ||
124 | xorl TAB+1024(,r1,4),r3 ## E;\ | ||
125 | xorl TAB(,r2,4),r4 ## E; | ||
126 | |||
127 | #define move_regs(r1,r2,r3,r4) \ | ||
128 | movl r3 ## E,r1 ## E; \ | ||
129 | movl r4 ## E,r2 ## E; | ||
130 | |||
131 | #define entry(FUNC,BASE,B128,B192) \ | ||
132 | prologue(FUNC,BASE,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11) | ||
133 | |||
134 | #define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11) | ||
135 | |||
136 | #define encrypt_round(TAB,OFFSET) \ | ||
137 | round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \ | ||
138 | move_regs(R1,R2,R5,R6) | ||
139 | |||
140 | #define encrypt_final(TAB,OFFSET) \ | ||
141 | round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) | ||
142 | |||
143 | #define decrypt_round(TAB,OFFSET) \ | ||
144 | round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \ | ||
145 | move_regs(R1,R2,R5,R6) | ||
146 | |||
147 | #define decrypt_final(TAB,OFFSET) \ | ||
148 | round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) | ||
149 | |||
150 | /* void aes_encrypt(void *ctx, u8 *out, const u8 *in) */ | ||
151 | |||
152 | entry(aes_encrypt,0,enc128,enc192) | ||
153 | encrypt_round(aes_ft_tab,-96) | ||
154 | encrypt_round(aes_ft_tab,-80) | ||
155 | enc192: encrypt_round(aes_ft_tab,-64) | ||
156 | encrypt_round(aes_ft_tab,-48) | ||
157 | enc128: encrypt_round(aes_ft_tab,-32) | ||
158 | encrypt_round(aes_ft_tab,-16) | ||
159 | encrypt_round(aes_ft_tab, 0) | ||
160 | encrypt_round(aes_ft_tab, 16) | ||
161 | encrypt_round(aes_ft_tab, 32) | ||
162 | encrypt_round(aes_ft_tab, 48) | ||
163 | encrypt_round(aes_ft_tab, 64) | ||
164 | encrypt_round(aes_ft_tab, 80) | ||
165 | encrypt_round(aes_ft_tab, 96) | ||
166 | encrypt_final(aes_fl_tab,112) | ||
167 | return | ||
168 | |||
169 | /* void aes_decrypt(void *ctx, u8 *out, const u8 *in) */ | ||
170 | |||
171 | entry(aes_decrypt,240,dec128,dec192) | ||
172 | decrypt_round(aes_it_tab,-96) | ||
173 | decrypt_round(aes_it_tab,-80) | ||
174 | dec192: decrypt_round(aes_it_tab,-64) | ||
175 | decrypt_round(aes_it_tab,-48) | ||
176 | dec128: decrypt_round(aes_it_tab,-32) | ||
177 | decrypt_round(aes_it_tab,-16) | ||
178 | decrypt_round(aes_it_tab, 0) | ||
179 | decrypt_round(aes_it_tab, 16) | ||
180 | decrypt_round(aes_it_tab, 32) | ||
181 | decrypt_round(aes_it_tab, 48) | ||
182 | decrypt_round(aes_it_tab, 64) | ||
183 | decrypt_round(aes_it_tab, 80) | ||
184 | decrypt_round(aes_it_tab, 96) | ||
185 | decrypt_final(aes_il_tab,112) | ||
186 | return | ||