diff options
Diffstat (limited to 'arch/x86/lib/csum-copy_64.S')
-rw-r--r-- | arch/x86/lib/csum-copy_64.S | 242 |
1 files changed, 121 insertions, 121 deletions
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S index f0dba36578ea..fb903b758da8 100644 --- a/arch/x86/lib/csum-copy_64.S +++ b/arch/x86/lib/csum-copy_64.S | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright 2002,2003 Andi Kleen, SuSE Labs. | 2 | * Copyright 2002, 2003 Andi Kleen, SuSE Labs. |
3 | * | 3 | * |
4 | * This file is subject to the terms and conditions of the GNU General Public | 4 | * This file is subject to the terms and conditions of the GNU General Public |
5 | * License. See the file COPYING in the main directory of this archive | 5 | * License. See the file COPYING in the main directory of this archive |
6 | * for more details. No warranty for anything given at all. | 6 | * for more details. No warranty for anything given at all. |
@@ -11,82 +11,82 @@ | |||
11 | 11 | ||
12 | /* | 12 | /* |
13 | * Checksum copy with exception handling. | 13 | * Checksum copy with exception handling. |
14 | * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the | 14 | * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the |
15 | * destination is zeroed. | 15 | * destination is zeroed. |
16 | * | 16 | * |
17 | * Input | 17 | * Input |
18 | * rdi source | 18 | * rdi source |
19 | * rsi destination | 19 | * rsi destination |
20 | * edx len (32bit) | 20 | * edx len (32bit) |
21 | * ecx sum (32bit) | 21 | * ecx sum (32bit) |
22 | * r8 src_err_ptr (int) | 22 | * r8 src_err_ptr (int) |
23 | * r9 dst_err_ptr (int) | 23 | * r9 dst_err_ptr (int) |
24 | * | 24 | * |
25 | * Output | 25 | * Output |
26 | * eax 64bit sum. undefined in case of exception. | 26 | * eax 64bit sum. undefined in case of exception. |
27 | * | 27 | * |
28 | * Wrappers need to take care of valid exception sum and zeroing. | 28 | * Wrappers need to take care of valid exception sum and zeroing. |
29 | * They also should align source or destination to 8 bytes. | 29 | * They also should align source or destination to 8 bytes. |
30 | */ | 30 | */ |
31 | 31 | ||
32 | .macro source | 32 | .macro source |
33 | 10: | 33 | 10: |
34 | .section __ex_table,"a" | 34 | .section __ex_table, "a" |
35 | .align 8 | 35 | .align 8 |
36 | .quad 10b,.Lbad_source | 36 | .quad 10b, .Lbad_source |
37 | .previous | 37 | .previous |
38 | .endm | 38 | .endm |
39 | 39 | ||
40 | .macro dest | 40 | .macro dest |
41 | 20: | 41 | 20: |
42 | .section __ex_table,"a" | 42 | .section __ex_table, "a" |
43 | .align 8 | 43 | .align 8 |
44 | .quad 20b,.Lbad_dest | 44 | .quad 20b, .Lbad_dest |
45 | .previous | 45 | .previous |
46 | .endm | 46 | .endm |
47 | 47 | ||
48 | .macro ignore L=.Lignore | 48 | .macro ignore L=.Lignore |
49 | 30: | 49 | 30: |
50 | .section __ex_table,"a" | 50 | .section __ex_table, "a" |
51 | .align 8 | 51 | .align 8 |
52 | .quad 30b,\L | 52 | .quad 30b, \L |
53 | .previous | 53 | .previous |
54 | .endm | 54 | .endm |
55 | 55 | ||
56 | 56 | ||
57 | ENTRY(csum_partial_copy_generic) | 57 | ENTRY(csum_partial_copy_generic) |
58 | CFI_STARTPROC | 58 | CFI_STARTPROC |
59 | cmpl $3*64,%edx | 59 | cmpl $3*64, %edx |
60 | jle .Lignore | 60 | jle .Lignore |
61 | 61 | ||
62 | .Lignore: | 62 | .Lignore: |
63 | subq $7*8,%rsp | 63 | subq $7*8, %rsp |
64 | CFI_ADJUST_CFA_OFFSET 7*8 | 64 | CFI_ADJUST_CFA_OFFSET 7*8 |
65 | movq %rbx,2*8(%rsp) | 65 | movq %rbx, 2*8(%rsp) |
66 | CFI_REL_OFFSET rbx, 2*8 | 66 | CFI_REL_OFFSET rbx, 2*8 |
67 | movq %r12,3*8(%rsp) | 67 | movq %r12, 3*8(%rsp) |
68 | CFI_REL_OFFSET r12, 3*8 | 68 | CFI_REL_OFFSET r12, 3*8 |
69 | movq %r14,4*8(%rsp) | 69 | movq %r14, 4*8(%rsp) |
70 | CFI_REL_OFFSET r14, 4*8 | 70 | CFI_REL_OFFSET r14, 4*8 |
71 | movq %r13,5*8(%rsp) | 71 | movq %r13, 5*8(%rsp) |
72 | CFI_REL_OFFSET r13, 5*8 | 72 | CFI_REL_OFFSET r13, 5*8 |
73 | movq %rbp,6*8(%rsp) | 73 | movq %rbp, 6*8(%rsp) |
74 | CFI_REL_OFFSET rbp, 6*8 | 74 | CFI_REL_OFFSET rbp, 6*8 |
75 | 75 | ||
76 | movq %r8,(%rsp) | 76 | movq %r8, (%rsp) |
77 | movq %r9,1*8(%rsp) | 77 | movq %r9, 1*8(%rsp) |
78 | |||
79 | movl %ecx,%eax | ||
80 | movl %edx,%ecx | ||
81 | 78 | ||
82 | xorl %r9d,%r9d | 79 | movl %ecx, %eax |
83 | movq %rcx,%r12 | 80 | movl %edx, %ecx |
84 | 81 | ||
85 | shrq $6,%r12 | 82 | xorl %r9d, %r9d |
86 | jz .Lhandle_tail /* < 64 */ | 83 | movq %rcx, %r12 |
84 | |||
85 | shrq $6, %r12 | ||
86 | jz .Lhandle_tail /* < 64 */ | ||
87 | 87 | ||
88 | clc | 88 | clc |
89 | 89 | ||
90 | /* main loop. clear in 64 byte blocks */ | 90 | /* main loop. clear in 64 byte blocks */ |
91 | /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ | 91 | /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ |
92 | /* r11: temp3, rdx: temp4, r12 loopcnt */ | 92 | /* r11: temp3, rdx: temp4, r12 loopcnt */ |
@@ -94,156 +94,156 @@ ENTRY(csum_partial_copy_generic) | |||
94 | .p2align 4 | 94 | .p2align 4 |
95 | .Lloop: | 95 | .Lloop: |
96 | source | 96 | source |
97 | movq (%rdi),%rbx | 97 | movq (%rdi), %rbx |
98 | source | 98 | source |
99 | movq 8(%rdi),%r8 | 99 | movq 8(%rdi), %r8 |
100 | source | 100 | source |
101 | movq 16(%rdi),%r11 | 101 | movq 16(%rdi), %r11 |
102 | source | 102 | source |
103 | movq 24(%rdi),%rdx | 103 | movq 24(%rdi), %rdx |
104 | 104 | ||
105 | source | 105 | source |
106 | movq 32(%rdi),%r10 | 106 | movq 32(%rdi), %r10 |
107 | source | 107 | source |
108 | movq 40(%rdi),%rbp | 108 | movq 40(%rdi), %rbp |
109 | source | 109 | source |
110 | movq 48(%rdi),%r14 | 110 | movq 48(%rdi), %r14 |
111 | source | 111 | source |
112 | movq 56(%rdi),%r13 | 112 | movq 56(%rdi), %r13 |
113 | 113 | ||
114 | ignore 2f | 114 | ignore 2f |
115 | prefetcht0 5*64(%rdi) | 115 | prefetcht0 5*64(%rdi) |
116 | 2: | 116 | 2: |
117 | adcq %rbx,%rax | 117 | adcq %rbx, %rax |
118 | adcq %r8,%rax | 118 | adcq %r8, %rax |
119 | adcq %r11,%rax | 119 | adcq %r11, %rax |
120 | adcq %rdx,%rax | 120 | adcq %rdx, %rax |
121 | adcq %r10,%rax | 121 | adcq %r10, %rax |
122 | adcq %rbp,%rax | 122 | adcq %rbp, %rax |
123 | adcq %r14,%rax | 123 | adcq %r14, %rax |
124 | adcq %r13,%rax | 124 | adcq %r13, %rax |
125 | 125 | ||
126 | decl %r12d | 126 | decl %r12d |
127 | 127 | ||
128 | dest | 128 | dest |
129 | movq %rbx,(%rsi) | 129 | movq %rbx, (%rsi) |
130 | dest | 130 | dest |
131 | movq %r8,8(%rsi) | 131 | movq %r8, 8(%rsi) |
132 | dest | 132 | dest |
133 | movq %r11,16(%rsi) | 133 | movq %r11, 16(%rsi) |
134 | dest | 134 | dest |
135 | movq %rdx,24(%rsi) | 135 | movq %rdx, 24(%rsi) |
136 | 136 | ||
137 | dest | 137 | dest |
138 | movq %r10,32(%rsi) | 138 | movq %r10, 32(%rsi) |
139 | dest | 139 | dest |
140 | movq %rbp,40(%rsi) | 140 | movq %rbp, 40(%rsi) |
141 | dest | 141 | dest |
142 | movq %r14,48(%rsi) | 142 | movq %r14, 48(%rsi) |
143 | dest | 143 | dest |
144 | movq %r13,56(%rsi) | 144 | movq %r13, 56(%rsi) |
145 | 145 | ||
146 | 3: | 146 | 3: |
147 | |||
148 | leaq 64(%rdi),%rdi | ||
149 | leaq 64(%rsi),%rsi | ||
150 | 147 | ||
151 | jnz .Lloop | 148 | leaq 64(%rdi), %rdi |
149 | leaq 64(%rsi), %rsi | ||
152 | 150 | ||
153 | adcq %r9,%rax | 151 | jnz .Lloop |
154 | 152 | ||
155 | /* do last upto 56 bytes */ | 153 | adcq %r9, %rax |
154 | |||
155 | /* do last up to 56 bytes */ | ||
156 | .Lhandle_tail: | 156 | .Lhandle_tail: |
157 | /* ecx: count */ | 157 | /* ecx: count */ |
158 | movl %ecx,%r10d | 158 | movl %ecx, %r10d |
159 | andl $63,%ecx | 159 | andl $63, %ecx |
160 | shrl $3,%ecx | 160 | shrl $3, %ecx |
161 | jz .Lfold | 161 | jz .Lfold |
162 | clc | 162 | clc |
163 | .p2align 4 | 163 | .p2align 4 |
164 | .Lloop_8: | 164 | .Lloop_8: |
165 | source | 165 | source |
166 | movq (%rdi),%rbx | 166 | movq (%rdi), %rbx |
167 | adcq %rbx,%rax | 167 | adcq %rbx, %rax |
168 | decl %ecx | 168 | decl %ecx |
169 | dest | 169 | dest |
170 | movq %rbx,(%rsi) | 170 | movq %rbx, (%rsi) |
171 | leaq 8(%rsi),%rsi /* preserve carry */ | 171 | leaq 8(%rsi), %rsi /* preserve carry */ |
172 | leaq 8(%rdi),%rdi | 172 | leaq 8(%rdi), %rdi |
173 | jnz .Lloop_8 | 173 | jnz .Lloop_8 |
174 | adcq %r9,%rax /* add in carry */ | 174 | adcq %r9, %rax /* add in carry */ |
175 | 175 | ||
176 | .Lfold: | 176 | .Lfold: |
177 | /* reduce checksum to 32bits */ | 177 | /* reduce checksum to 32bits */ |
178 | movl %eax,%ebx | 178 | movl %eax, %ebx |
179 | shrq $32,%rax | 179 | shrq $32, %rax |
180 | addl %ebx,%eax | 180 | addl %ebx, %eax |
181 | adcl %r9d,%eax | 181 | adcl %r9d, %eax |
182 | 182 | ||
183 | /* do last upto 6 bytes */ | 183 | /* do last up to 6 bytes */ |
184 | .Lhandle_7: | 184 | .Lhandle_7: |
185 | movl %r10d,%ecx | 185 | movl %r10d, %ecx |
186 | andl $7,%ecx | 186 | andl $7, %ecx |
187 | shrl $1,%ecx | 187 | shrl $1, %ecx |
188 | jz .Lhandle_1 | 188 | jz .Lhandle_1 |
189 | movl $2,%edx | 189 | movl $2, %edx |
190 | xorl %ebx,%ebx | 190 | xorl %ebx, %ebx |
191 | clc | 191 | clc |
192 | .p2align 4 | 192 | .p2align 4 |
193 | .Lloop_1: | 193 | .Lloop_1: |
194 | source | 194 | source |
195 | movw (%rdi),%bx | 195 | movw (%rdi), %bx |
196 | adcl %ebx,%eax | 196 | adcl %ebx, %eax |
197 | decl %ecx | 197 | decl %ecx |
198 | dest | 198 | dest |
199 | movw %bx,(%rsi) | 199 | movw %bx, (%rsi) |
200 | leaq 2(%rdi),%rdi | 200 | leaq 2(%rdi), %rdi |
201 | leaq 2(%rsi),%rsi | 201 | leaq 2(%rsi), %rsi |
202 | jnz .Lloop_1 | 202 | jnz .Lloop_1 |
203 | adcl %r9d,%eax /* add in carry */ | 203 | adcl %r9d, %eax /* add in carry */ |
204 | 204 | ||
205 | /* handle last odd byte */ | 205 | /* handle last odd byte */ |
206 | .Lhandle_1: | 206 | .Lhandle_1: |
207 | testl $1,%r10d | 207 | testl $1, %r10d |
208 | jz .Lende | 208 | jz .Lende |
209 | xorl %ebx,%ebx | 209 | xorl %ebx, %ebx |
210 | source | 210 | source |
211 | movb (%rdi),%bl | 211 | movb (%rdi), %bl |
212 | dest | 212 | dest |
213 | movb %bl,(%rsi) | 213 | movb %bl, (%rsi) |
214 | addl %ebx,%eax | 214 | addl %ebx, %eax |
215 | adcl %r9d,%eax /* carry */ | 215 | adcl %r9d, %eax /* carry */ |
216 | 216 | ||
217 | CFI_REMEMBER_STATE | 217 | CFI_REMEMBER_STATE |
218 | .Lende: | 218 | .Lende: |
219 | movq 2*8(%rsp),%rbx | 219 | movq 2*8(%rsp), %rbx |
220 | CFI_RESTORE rbx | 220 | CFI_RESTORE rbx |
221 | movq 3*8(%rsp),%r12 | 221 | movq 3*8(%rsp), %r12 |
222 | CFI_RESTORE r12 | 222 | CFI_RESTORE r12 |
223 | movq 4*8(%rsp),%r14 | 223 | movq 4*8(%rsp), %r14 |
224 | CFI_RESTORE r14 | 224 | CFI_RESTORE r14 |
225 | movq 5*8(%rsp),%r13 | 225 | movq 5*8(%rsp), %r13 |
226 | CFI_RESTORE r13 | 226 | CFI_RESTORE r13 |
227 | movq 6*8(%rsp),%rbp | 227 | movq 6*8(%rsp), %rbp |
228 | CFI_RESTORE rbp | 228 | CFI_RESTORE rbp |
229 | addq $7*8,%rsp | 229 | addq $7*8, %rsp |
230 | CFI_ADJUST_CFA_OFFSET -7*8 | 230 | CFI_ADJUST_CFA_OFFSET -7*8 |
231 | ret | 231 | ret |
232 | CFI_RESTORE_STATE | 232 | CFI_RESTORE_STATE |
233 | 233 | ||
234 | /* Exception handlers. Very simple, zeroing is done in the wrappers */ | 234 | /* Exception handlers. Very simple, zeroing is done in the wrappers */ |
235 | .Lbad_source: | 235 | .Lbad_source: |
236 | movq (%rsp),%rax | 236 | movq (%rsp), %rax |
237 | testq %rax,%rax | 237 | testq %rax, %rax |
238 | jz .Lende | 238 | jz .Lende |
239 | movl $-EFAULT,(%rax) | 239 | movl $-EFAULT, (%rax) |
240 | jmp .Lende | 240 | jmp .Lende |
241 | 241 | ||
242 | .Lbad_dest: | 242 | .Lbad_dest: |
243 | movq 8(%rsp),%rax | 243 | movq 8(%rsp), %rax |
244 | testq %rax,%rax | 244 | testq %rax, %rax |
245 | jz .Lende | 245 | jz .Lende |
246 | movl $-EFAULT,(%rax) | 246 | movl $-EFAULT, (%rax) |
247 | jmp .Lende | 247 | jmp .Lende |
248 | CFI_ENDPROC | 248 | CFI_ENDPROC |
249 | ENDPROC(csum_partial_copy_generic) | 249 | ENDPROC(csum_partial_copy_generic) |