diff options
Diffstat (limited to 'arch/x86/lib/csum-copy_64.S')
-rw-r--r-- | arch/x86/lib/csum-copy_64.S | 249 |
1 files changed, 249 insertions, 0 deletions
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S new file mode 100644 index 000000000000..f0dba36578ea --- /dev/null +++ b/arch/x86/lib/csum-copy_64.S | |||
@@ -0,0 +1,249 @@ | |||
1 | /* | ||
2 | * Copyright 2002,2003 Andi Kleen, SuSE Labs. | ||
3 | * | ||
4 | * This file is subject to the terms and conditions of the GNU General Public | ||
5 | * License. See the file COPYING in the main directory of this archive | ||
6 | * for more details. No warranty for anything given at all. | ||
7 | */ | ||
8 | #include <linux/linkage.h> | ||
9 | #include <asm/dwarf2.h> | ||
10 | #include <asm/errno.h> | ||
11 | |||
12 | /* | ||
13 | * Checksum copy with exception handling. | ||
14 | * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the | ||
15 | * destination is zeroed. | ||
16 | * | ||
17 | * Input | ||
18 | * rdi source | ||
19 | * rsi destination | ||
20 | * edx len (32bit) | ||
21 | * ecx sum (32bit) | ||
22 | * r8 src_err_ptr (int) | ||
23 | * r9 dst_err_ptr (int) | ||
24 | * | ||
25 | * Output | ||
26 | * eax 64bit sum. undefined in case of exception. | ||
27 | * | ||
28 | * Wrappers need to take care of valid exception sum and zeroing. | ||
29 | * They also should align source or destination to 8 bytes. | ||
30 | */ | ||
31 | |||
32 | .macro source | ||
33 | 10: | ||
34 | .section __ex_table,"a" | ||
35 | .align 8 | ||
36 | .quad 10b,.Lbad_source | ||
37 | .previous | ||
38 | .endm | ||
39 | |||
40 | .macro dest | ||
41 | 20: | ||
42 | .section __ex_table,"a" | ||
43 | .align 8 | ||
44 | .quad 20b,.Lbad_dest | ||
45 | .previous | ||
46 | .endm | ||
47 | |||
48 | .macro ignore L=.Lignore | ||
49 | 30: | ||
50 | .section __ex_table,"a" | ||
51 | .align 8 | ||
52 | .quad 30b,\L | ||
53 | .previous | ||
54 | .endm | ||
55 | |||
56 | |||
57 | ENTRY(csum_partial_copy_generic) | ||
58 | CFI_STARTPROC | ||
59 | cmpl $3*64,%edx | ||
60 | jle .Lignore | ||
61 | |||
62 | .Lignore: | ||
63 | subq $7*8,%rsp | ||
64 | CFI_ADJUST_CFA_OFFSET 7*8 | ||
65 | movq %rbx,2*8(%rsp) | ||
66 | CFI_REL_OFFSET rbx, 2*8 | ||
67 | movq %r12,3*8(%rsp) | ||
68 | CFI_REL_OFFSET r12, 3*8 | ||
69 | movq %r14,4*8(%rsp) | ||
70 | CFI_REL_OFFSET r14, 4*8 | ||
71 | movq %r13,5*8(%rsp) | ||
72 | CFI_REL_OFFSET r13, 5*8 | ||
73 | movq %rbp,6*8(%rsp) | ||
74 | CFI_REL_OFFSET rbp, 6*8 | ||
75 | |||
76 | movq %r8,(%rsp) | ||
77 | movq %r9,1*8(%rsp) | ||
78 | |||
79 | movl %ecx,%eax | ||
80 | movl %edx,%ecx | ||
81 | |||
82 | xorl %r9d,%r9d | ||
83 | movq %rcx,%r12 | ||
84 | |||
85 | shrq $6,%r12 | ||
86 | jz .Lhandle_tail /* < 64 */ | ||
87 | |||
88 | clc | ||
89 | |||
90 | /* main loop. clear in 64 byte blocks */ | ||
91 | /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ | ||
92 | /* r11: temp3, rdx: temp4, r12 loopcnt */ | ||
93 | /* r10: temp5, rbp: temp6, r14 temp7, r13 temp8 */ | ||
94 | .p2align 4 | ||
95 | .Lloop: | ||
96 | source | ||
97 | movq (%rdi),%rbx | ||
98 | source | ||
99 | movq 8(%rdi),%r8 | ||
100 | source | ||
101 | movq 16(%rdi),%r11 | ||
102 | source | ||
103 | movq 24(%rdi),%rdx | ||
104 | |||
105 | source | ||
106 | movq 32(%rdi),%r10 | ||
107 | source | ||
108 | movq 40(%rdi),%rbp | ||
109 | source | ||
110 | movq 48(%rdi),%r14 | ||
111 | source | ||
112 | movq 56(%rdi),%r13 | ||
113 | |||
114 | ignore 2f | ||
115 | prefetcht0 5*64(%rdi) | ||
116 | 2: | ||
117 | adcq %rbx,%rax | ||
118 | adcq %r8,%rax | ||
119 | adcq %r11,%rax | ||
120 | adcq %rdx,%rax | ||
121 | adcq %r10,%rax | ||
122 | adcq %rbp,%rax | ||
123 | adcq %r14,%rax | ||
124 | adcq %r13,%rax | ||
125 | |||
126 | decl %r12d | ||
127 | |||
128 | dest | ||
129 | movq %rbx,(%rsi) | ||
130 | dest | ||
131 | movq %r8,8(%rsi) | ||
132 | dest | ||
133 | movq %r11,16(%rsi) | ||
134 | dest | ||
135 | movq %rdx,24(%rsi) | ||
136 | |||
137 | dest | ||
138 | movq %r10,32(%rsi) | ||
139 | dest | ||
140 | movq %rbp,40(%rsi) | ||
141 | dest | ||
142 | movq %r14,48(%rsi) | ||
143 | dest | ||
144 | movq %r13,56(%rsi) | ||
145 | |||
146 | 3: | ||
147 | |||
148 | leaq 64(%rdi),%rdi | ||
149 | leaq 64(%rsi),%rsi | ||
150 | |||
151 | jnz .Lloop | ||
152 | |||
153 | adcq %r9,%rax | ||
154 | |||
155 | /* do last upto 56 bytes */ | ||
156 | .Lhandle_tail: | ||
157 | /* ecx: count */ | ||
158 | movl %ecx,%r10d | ||
159 | andl $63,%ecx | ||
160 | shrl $3,%ecx | ||
161 | jz .Lfold | ||
162 | clc | ||
163 | .p2align 4 | ||
164 | .Lloop_8: | ||
165 | source | ||
166 | movq (%rdi),%rbx | ||
167 | adcq %rbx,%rax | ||
168 | decl %ecx | ||
169 | dest | ||
170 | movq %rbx,(%rsi) | ||
171 | leaq 8(%rsi),%rsi /* preserve carry */ | ||
172 | leaq 8(%rdi),%rdi | ||
173 | jnz .Lloop_8 | ||
174 | adcq %r9,%rax /* add in carry */ | ||
175 | |||
176 | .Lfold: | ||
177 | /* reduce checksum to 32bits */ | ||
178 | movl %eax,%ebx | ||
179 | shrq $32,%rax | ||
180 | addl %ebx,%eax | ||
181 | adcl %r9d,%eax | ||
182 | |||
183 | /* do last upto 6 bytes */ | ||
184 | .Lhandle_7: | ||
185 | movl %r10d,%ecx | ||
186 | andl $7,%ecx | ||
187 | shrl $1,%ecx | ||
188 | jz .Lhandle_1 | ||
189 | movl $2,%edx | ||
190 | xorl %ebx,%ebx | ||
191 | clc | ||
192 | .p2align 4 | ||
193 | .Lloop_1: | ||
194 | source | ||
195 | movw (%rdi),%bx | ||
196 | adcl %ebx,%eax | ||
197 | decl %ecx | ||
198 | dest | ||
199 | movw %bx,(%rsi) | ||
200 | leaq 2(%rdi),%rdi | ||
201 | leaq 2(%rsi),%rsi | ||
202 | jnz .Lloop_1 | ||
203 | adcl %r9d,%eax /* add in carry */ | ||
204 | |||
205 | /* handle last odd byte */ | ||
206 | .Lhandle_1: | ||
207 | testl $1,%r10d | ||
208 | jz .Lende | ||
209 | xorl %ebx,%ebx | ||
210 | source | ||
211 | movb (%rdi),%bl | ||
212 | dest | ||
213 | movb %bl,(%rsi) | ||
214 | addl %ebx,%eax | ||
215 | adcl %r9d,%eax /* carry */ | ||
216 | |||
217 | CFI_REMEMBER_STATE | ||
218 | .Lende: | ||
219 | movq 2*8(%rsp),%rbx | ||
220 | CFI_RESTORE rbx | ||
221 | movq 3*8(%rsp),%r12 | ||
222 | CFI_RESTORE r12 | ||
223 | movq 4*8(%rsp),%r14 | ||
224 | CFI_RESTORE r14 | ||
225 | movq 5*8(%rsp),%r13 | ||
226 | CFI_RESTORE r13 | ||
227 | movq 6*8(%rsp),%rbp | ||
228 | CFI_RESTORE rbp | ||
229 | addq $7*8,%rsp | ||
230 | CFI_ADJUST_CFA_OFFSET -7*8 | ||
231 | ret | ||
232 | CFI_RESTORE_STATE | ||
233 | |||
234 | /* Exception handlers. Very simple, zeroing is done in the wrappers */ | ||
235 | .Lbad_source: | ||
236 | movq (%rsp),%rax | ||
237 | testq %rax,%rax | ||
238 | jz .Lende | ||
239 | movl $-EFAULT,(%rax) | ||
240 | jmp .Lende | ||
241 | |||
242 | .Lbad_dest: | ||
243 | movq 8(%rsp),%rax | ||
244 | testq %rax,%rax | ||
245 | jz .Lende | ||
246 | movl $-EFAULT,(%rax) | ||
247 | jmp .Lende | ||
248 | CFI_ENDPROC | ||
249 | ENDPROC(csum_partial_copy_generic) | ||