aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64/lib/csum-copy.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64/lib/csum-copy.S')
-rw-r--r--arch/x86_64/lib/csum-copy.S233
1 files changed, 233 insertions, 0 deletions
diff --git a/arch/x86_64/lib/csum-copy.S b/arch/x86_64/lib/csum-copy.S
new file mode 100644
index 000000000000..01808ec37836
--- /dev/null
+++ b/arch/x86_64/lib/csum-copy.S
@@ -0,0 +1,233 @@
1/*
2 * Copyright 2002,2003 Andi Kleen, SuSE Labs.
3 *
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License. See the file COPYING in the main directory of this archive
6 * for more details. No warranty for anything given at all.
7 */
8 #include <linux/linkage.h>
9 #include <asm/errno.h>
10
11/*
12 * Checksum copy with exception handling.
13 * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
14 * destination is zeroed.
15 *
16 * Input
17 * rdi source
18 * rsi destination
19 * edx len (32bit)
20 * ecx sum (32bit)
21 * r8 src_err_ptr (int)
22 * r9 dst_err_ptr (int)
23 *
24 * Output
25 * eax 64bit sum. undefined in case of exception.
26 *
27 * Wrappers need to take care of valid exception sum and zeroing.
28 * They also should align source or destination to 8 bytes.
29 */
30
31 .macro source
3210:
33 .section __ex_table,"a"
34 .align 8
35 .quad 10b,.Lbad_source
36 .previous
37 .endm
38
39 .macro dest
4020:
41 .section __ex_table,"a"
42 .align 8
43 .quad 20b,.Lbad_dest
44 .previous
45 .endm
46
47 .macro ignore L=.Lignore
4830:
49 .section __ex_table,"a"
50 .align 8
51 .quad 30b,\L
52 .previous
53 .endm
54
55
56 .globl csum_partial_copy_generic
57 .p2align 4
58csum_partial_copy_generic:
59 cmpl $3*64,%edx
60 jle .Lignore
61
62.Lignore:
63 subq $7*8,%rsp
64 movq %rbx,2*8(%rsp)
65 movq %r12,3*8(%rsp)
66 movq %r14,4*8(%rsp)
67 movq %r13,5*8(%rsp)
68 movq %rbp,6*8(%rsp)
69
70 movq %r8,(%rsp)
71 movq %r9,1*8(%rsp)
72
73 movl %ecx,%eax
74 movl %edx,%ecx
75
76 xorl %r9d,%r9d
77 movq %rcx,%r12
78
79 shrq $6,%r12
80 jz .Lhandle_tail /* < 64 */
81
82 clc
83
84 /* main loop. clear in 64 byte blocks */
85 /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
86 /* r11: temp3, rdx: temp4, r12 loopcnt */
87 /* r10: temp5, rbp: temp6, r14 temp7, r13 temp8 */
88 .p2align 4
89.Lloop:
90 source
91 movq (%rdi),%rbx
92 source
93 movq 8(%rdi),%r8
94 source
95 movq 16(%rdi),%r11
96 source
97 movq 24(%rdi),%rdx
98
99 source
100 movq 32(%rdi),%r10
101 source
102 movq 40(%rdi),%rbp
103 source
104 movq 48(%rdi),%r14
105 source
106 movq 56(%rdi),%r13
107
108 ignore 2f
109 prefetcht0 5*64(%rdi)
1102:
111 adcq %rbx,%rax
112 adcq %r8,%rax
113 adcq %r11,%rax
114 adcq %rdx,%rax
115 adcq %r10,%rax
116 adcq %rbp,%rax
117 adcq %r14,%rax
118 adcq %r13,%rax
119
120 decl %r12d
121
122 dest
123 movq %rbx,(%rsi)
124 dest
125 movq %r8,8(%rsi)
126 dest
127 movq %r11,16(%rsi)
128 dest
129 movq %rdx,24(%rsi)
130
131 dest
132 movq %r10,32(%rsi)
133 dest
134 movq %rbp,40(%rsi)
135 dest
136 movq %r14,48(%rsi)
137 dest
138 movq %r13,56(%rsi)
139
1403:
141
142 leaq 64(%rdi),%rdi
143 leaq 64(%rsi),%rsi
144
145 jnz .Lloop
146
147 adcq %r9,%rax
148
149 /* do last upto 56 bytes */
150.Lhandle_tail:
151 /* ecx: count */
152 movl %ecx,%r10d
153 andl $63,%ecx
154 shrl $3,%ecx
155 jz .Lfold
156 clc
157 .p2align 4
158.Lloop_8:
159 source
160 movq (%rdi),%rbx
161 adcq %rbx,%rax
162 decl %ecx
163 dest
164 movq %rbx,(%rsi)
165 leaq 8(%rsi),%rsi /* preserve carry */
166 leaq 8(%rdi),%rdi
167 jnz .Lloop_8
168 adcq %r9,%rax /* add in carry */
169
170.Lfold:
171 /* reduce checksum to 32bits */
172 movl %eax,%ebx
173 shrq $32,%rax
174 addl %ebx,%eax
175 adcl %r9d,%eax
176
177 /* do last upto 6 bytes */
178.Lhandle_7:
179 movl %r10d,%ecx
180 andl $7,%ecx
181 shrl $1,%ecx
182 jz .Lhandle_1
183 movl $2,%edx
184 xorl %ebx,%ebx
185 clc
186 .p2align 4
187.Lloop_1:
188 source
189 movw (%rdi),%bx
190 adcl %ebx,%eax
191 dest
192 decl %ecx
193 movw %bx,(%rsi)
194 leaq 2(%rdi),%rdi
195 leaq 2(%rsi),%rsi
196 jnz .Lloop_1
197 adcl %r9d,%eax /* add in carry */
198
199 /* handle last odd byte */
200.Lhandle_1:
201 testl $1,%r10d
202 jz .Lende
203 xorl %ebx,%ebx
204 source
205 movb (%rdi),%bl
206 dest
207 movb %bl,(%rsi)
208 addl %ebx,%eax
209 adcl %r9d,%eax /* carry */
210
211.Lende:
212 movq 2*8(%rsp),%rbx
213 movq 3*8(%rsp),%r12
214 movq 4*8(%rsp),%r14
215 movq 5*8(%rsp),%r13
216 movq 6*8(%rsp),%rbp
217 addq $7*8,%rsp
218 ret
219
220 /* Exception handlers. Very simple, zeroing is done in the wrappers */
221.Lbad_source:
222 movq (%rsp),%rax
223 testq %rax,%rax
224 jz .Lende
225 movl $-EFAULT,(%rax)
226 jmp .Lende
227
228.Lbad_dest:
229 movq 8(%rsp),%rax
230 testq %rax,%rax
231 jz .Lende
232 movl $-EFAULT,(%rax)
233 jmp .Lende