diff options
Diffstat (limited to 'arch/m32r/lib/checksum.S')
-rw-r--r-- | arch/m32r/lib/checksum.S | 322 |
1 files changed, 322 insertions, 0 deletions
diff --git a/arch/m32r/lib/checksum.S b/arch/m32r/lib/checksum.S new file mode 100644 index 000000000000..f6fc1bdb87e4 --- /dev/null +++ b/arch/m32r/lib/checksum.S | |||
@@ -0,0 +1,322 @@ | |||
1 | /* | ||
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
3 | * operating system. INET is implemented using the BSD Socket | ||
4 | * interface as the means of communication with the user level. | ||
5 | * | ||
6 | * IP/TCP/UDP checksumming routines | ||
7 | * | ||
8 | * Authors: Jorge Cwik, <jorge@laser.satlink.net> | ||
9 | * Arnt Gulbrandsen, <agulbra@nvg.unit.no> | ||
10 | * Tom May, <ftom@netcom.com> | ||
11 | * Pentium Pro/II routines: | ||
12 | * Alexander Kjeldaas <astor@guardian.no> | ||
13 | * Finn Arne Gangstad <finnag@guardian.no> | ||
14 | * Lots of code moved from tcp.c and ip.c; see those files | ||
15 | * for more names. | ||
16 | * | ||
17 | * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception | ||
18 | * handling. | ||
19 | * Andi Kleen, add zeroing on error | ||
20 | * converted to pure assembler | ||
21 | * Hirokazu Takata,Hiroyuki Kondo rewrite for the m32r architecture. | ||
22 | * | ||
23 | * This program is free software; you can redistribute it and/or | ||
24 | * modify it under the terms of the GNU General Public License | ||
25 | * as published by the Free Software Foundation; either version | ||
26 | * 2 of the License, or (at your option) any later version. | ||
27 | */ | ||
28 | /* $Id$ */ | ||
29 | |||
30 | |||
31 | #include <linux/config.h> | ||
32 | #include <linux/linkage.h> | ||
33 | #include <asm/assembler.h> | ||
34 | #include <asm/errno.h> | ||
35 | |||
36 | /* | ||
37 | * computes a partial checksum, e.g. for TCP/UDP fragments | ||
38 | */ | ||
39 | |||
40 | /* | ||
41 | unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) | ||
42 | */ | ||
43 | |||
44 | |||
45 | #ifdef CONFIG_ISA_DUAL_ISSUE | ||
46 | |||
47 | /* | ||
48 | * Experiments with Ethernet and SLIP connections show that buff | ||
49 | * is aligned on either a 2-byte or 4-byte boundary. We get at | ||
50 | * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. | ||
51 | * Fortunately, it is easy to convert 2-byte alignment to 4-byte | ||
52 | * alignment for the unrolled loop. | ||
53 | */ | ||
54 | |||
55 | .text | ||
56 | ENTRY(csum_partial) | ||
57 | ; Function args | ||
58 | ; r0: unsigned char *buff | ||
59 | ; r1: int len | ||
60 | ; r2: unsigned int sum | ||
61 | |||
62 | push r2 || ldi r2, #0 | ||
63 | and3 r7, r0, #1 ; Check alignment. | ||
64 | beqz r7, 1f ; Jump if alignment is ok. | ||
65 | ; 1-byte mis aligned | ||
66 | ldub r4, @r0 || addi r0, #1 | ||
67 | ; clear c-bit || Alignment uses up bytes. | ||
68 | cmp r0, r0 || addi r1, #-1 | ||
69 | ldi r3, #0 || addx r2, r4 | ||
70 | addx r2, r3 | ||
71 | .fillinsn | ||
72 | 1: | ||
73 | and3 r4, r0, #2 ; Check alignment. | ||
74 | beqz r4, 2f ; Jump if alignment is ok. | ||
75 | ; clear c-bit || Alignment uses up two bytes. | ||
76 | cmp r0, r0 || addi r1, #-2 | ||
77 | bgtz r1, 1f ; Jump if we had at least two bytes. | ||
78 | bra 4f || addi r1, #2 | ||
79 | .fillinsn ; len(r1) was < 2. Deal with it. | ||
80 | 1: | ||
81 | ; 2-byte aligned | ||
82 | lduh r4, @r0 || ldi r3, #0 | ||
83 | addx r2, r4 || addi r0, #2 | ||
84 | addx r2, r3 | ||
85 | .fillinsn | ||
86 | 2: | ||
87 | ; 4-byte aligned | ||
88 | cmp r0, r0 ; clear c-bit | ||
89 | srl3 r6, r1, #5 | ||
90 | beqz r6, 2f | ||
91 | .fillinsn | ||
92 | |||
93 | 1: ld r3, @r0+ | ||
94 | ld r4, @r0+ ; +4 | ||
95 | ld r5, @r0+ ; +8 | ||
96 | ld r3, @r0+ || addx r2, r3 ; +12 | ||
97 | ld r4, @r0+ || addx r2, r4 ; +16 | ||
98 | ld r5, @r0+ || addx r2, r5 ; +20 | ||
99 | ld r3, @r0+ || addx r2, r3 ; +24 | ||
100 | ld r4, @r0+ || addx r2, r4 ; +28 | ||
101 | addx r2, r5 || addi r6, #-1 | ||
102 | addx r2, r3 | ||
103 | addx r2, r4 | ||
104 | bnez r6, 1b | ||
105 | |||
106 | addx r2, r6 ; r6=0 | ||
107 | cmp r0, r0 ; This clears c-bit | ||
108 | .fillinsn | ||
109 | 2: and3 r6, r1, #0x1c ; withdraw len | ||
110 | beqz r6, 4f | ||
111 | srli r6, #2 | ||
112 | .fillinsn | ||
113 | |||
114 | 3: ld r4, @r0+ || addi r6, #-1 | ||
115 | addx r2, r4 | ||
116 | bnez r6, 3b | ||
117 | |||
118 | addx r2, r6 ; r6=0 | ||
119 | cmp r0, r0 ; This clears c-bit | ||
120 | .fillinsn | ||
121 | 4: and3 r1, r1, #3 | ||
122 | beqz r1, 7f ; if len == 0 goto end | ||
123 | and3 r6, r1, #2 | ||
124 | beqz r6, 5f ; if len < 2 goto 5f(1byte) | ||
125 | lduh r4, @r0 || addi r0, #2 | ||
126 | addi r1, #-2 || slli r4, #16 | ||
127 | addx r2, r4 | ||
128 | beqz r1, 6f | ||
129 | .fillinsn | ||
130 | 5: ldub r4, @r0 || ldi r1, #0 | ||
131 | #ifndef __LITTLE_ENDIAN__ | ||
132 | slli r4, #8 | ||
133 | #endif | ||
134 | addx r2, r4 | ||
135 | .fillinsn | ||
136 | 6: addx r2, r1 | ||
137 | .fillinsn | ||
138 | 7: | ||
139 | and3 r0, r2, #0xffff | ||
140 | srli r2, #16 | ||
141 | add r0, r2 | ||
142 | srl3 r2, r0, #16 | ||
143 | beqz r2, 1f | ||
144 | addi r0, #1 | ||
145 | and3 r0, r0, #0xffff | ||
146 | .fillinsn | ||
147 | 1: | ||
148 | beqz r7, 1f ; swap the upper byte for the lower | ||
149 | and3 r2, r0, #0xff | ||
150 | srl3 r0, r0, #8 | ||
151 | slli r2, #8 | ||
152 | or r0, r2 | ||
153 | .fillinsn | ||
154 | 1: | ||
155 | pop r2 || cmp r0, r0 | ||
156 | addx r0, r2 || ldi r2, #0 | ||
157 | addx r0, r2 | ||
158 | jmp r14 | ||
159 | |||
160 | #else /* not CONFIG_ISA_DUAL_ISSUE */ | ||
161 | |||
162 | /* | ||
163 | * Experiments with Ethernet and SLIP connections show that buff | ||
164 | * is aligned on either a 2-byte or 4-byte boundary. We get at | ||
165 | * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. | ||
166 | * Fortunately, it is easy to convert 2-byte alignment to 4-byte | ||
167 | * alignment for the unrolled loop. | ||
168 | */ | ||
169 | |||
170 | .text | ||
171 | ENTRY(csum_partial) | ||
172 | ; Function args | ||
173 | ; r0: unsigned char *buff | ||
174 | ; r1: int len | ||
175 | ; r2: unsigned int sum | ||
176 | |||
177 | push r2 | ||
178 | ldi r2, #0 | ||
179 | and3 r7, r0, #1 ; Check alignment. | ||
180 | beqz r7, 1f ; Jump if alignment is ok. | ||
181 | ; 1-byte mis aligned | ||
182 | ldub r4, @r0 | ||
183 | addi r0, #1 | ||
184 | addi r1, #-1 ; Alignment uses up bytes. | ||
185 | cmp r0, r0 ; clear c-bit | ||
186 | ldi r3, #0 | ||
187 | addx r2, r4 | ||
188 | addx r2, r3 | ||
189 | .fillinsn | ||
190 | 1: | ||
191 | and3 r4, r0, #2 ; Check alignment. | ||
192 | beqz r4, 2f ; Jump if alignment is ok. | ||
193 | addi r1, #-2 ; Alignment uses up two bytes. | ||
194 | cmp r0, r0 ; clear c-bit | ||
195 | bgtz r1, 1f ; Jump if we had at least two bytes. | ||
196 | addi r1, #2 ; len(r1) was < 2. Deal with it. | ||
197 | bra 4f | ||
198 | .fillinsn | ||
199 | 1: | ||
200 | ; 2-byte aligned | ||
201 | lduh r4, @r0 | ||
202 | addi r0, #2 | ||
203 | ldi r3, #0 | ||
204 | addx r2, r4 | ||
205 | addx r2, r3 | ||
206 | .fillinsn | ||
207 | 2: | ||
208 | ; 4-byte aligned | ||
209 | cmp r0, r0 ; clear c-bit | ||
210 | srl3 r6, r1, #5 | ||
211 | beqz r6, 2f | ||
212 | .fillinsn | ||
213 | |||
214 | 1: ld r3, @r0+ | ||
215 | ld r4, @r0+ ; +4 | ||
216 | ld r5, @r0+ ; +8 | ||
217 | addx r2, r3 | ||
218 | addx r2, r4 | ||
219 | addx r2, r5 | ||
220 | ld r3, @r0+ ; +12 | ||
221 | ld r4, @r0+ ; +16 | ||
222 | ld r5, @r0+ ; +20 | ||
223 | addx r2, r3 | ||
224 | addx r2, r4 | ||
225 | addx r2, r5 | ||
226 | ld r3, @r0+ ; +24 | ||
227 | ld r4, @r0+ ; +28 | ||
228 | addi r6, #-1 | ||
229 | addx r2, r3 | ||
230 | addx r2, r4 | ||
231 | bnez r6, 1b | ||
232 | addx r2, r6 ; r6=0 | ||
233 | cmp r0, r0 ; This clears c-bit | ||
234 | .fillinsn | ||
235 | |||
236 | 2: and3 r6, r1, #0x1c ; withdraw len | ||
237 | beqz r6, 4f | ||
238 | srli r6, #2 | ||
239 | .fillinsn | ||
240 | |||
241 | 3: ld r4, @r0+ | ||
242 | addi r6, #-1 | ||
243 | addx r2, r4 | ||
244 | bnez r6, 3b | ||
245 | addx r2, r6 ; r6=0 | ||
246 | cmp r0, r0 ; This clears c-bit | ||
247 | .fillinsn | ||
248 | |||
249 | 4: and3 r1, r1, #3 | ||
250 | beqz r1, 7f ; if len == 0 goto end | ||
251 | and3 r6, r1, #2 | ||
252 | beqz r6, 5f ; if len < 2 goto 5f(1byte) | ||
253 | |||
254 | lduh r4, @r0 | ||
255 | addi r0, #2 | ||
256 | addi r1, #-2 | ||
257 | slli r4, #16 | ||
258 | addx r2, r4 | ||
259 | beqz r1, 6f | ||
260 | .fillinsn | ||
261 | 5: ldub r4, @r0 | ||
262 | #ifndef __LITTLE_ENDIAN__ | ||
263 | slli r4, #8 | ||
264 | #endif | ||
265 | addx r2, r4 | ||
266 | .fillinsn | ||
267 | 6: ldi r5, #0 | ||
268 | addx r2, r5 | ||
269 | .fillinsn | ||
270 | 7: | ||
271 | and3 r0, r2, #0xffff | ||
272 | srli r2, #16 | ||
273 | add r0, r2 | ||
274 | srl3 r2, r0, #16 | ||
275 | beqz r2, 1f | ||
276 | addi r0, #1 | ||
277 | and3 r0, r0, #0xffff | ||
278 | .fillinsn | ||
279 | 1: | ||
280 | beqz r7, 1f | ||
281 | mv r2, r0 | ||
282 | srl3 r0, r2, #8 | ||
283 | and3 r2, r2, #0xff | ||
284 | slli r2, #8 | ||
285 | or r0, r2 | ||
286 | .fillinsn | ||
287 | 1: | ||
288 | pop r2 | ||
289 | cmp r0, r0 | ||
290 | addx r0, r2 | ||
291 | ldi r2, #0 | ||
292 | addx r0, r2 | ||
293 | jmp r14 | ||
294 | |||
295 | #endif /* not CONFIG_ISA_DUAL_ISSUE */ | ||
296 | |||
297 | /* | ||
298 | unsigned int csum_partial_copy_generic (const char *src, char *dst, | ||
299 | int len, int sum, int *src_err_ptr, int *dst_err_ptr) | ||
300 | */ | ||
301 | |||
302 | /* | ||
303 | * Copy from ds while checksumming, otherwise like csum_partial | ||
304 | * | ||
305 | * The macros SRC and DST specify the type of access for the instruction. | ||
306 | * thus we can call a custom exception handler for all access types. | ||
307 | * | ||
308 | * FIXME: could someone double-check whether I haven't mixed up some SRC and | ||
309 | * DST definitions? It's damn hard to trigger all cases. I hope I got | ||
310 | * them all but there's no guarantee. | ||
311 | */ | ||
312 | |||
313 | ENTRY(csum_partial_copy_generic) | ||
314 | nop | ||
315 | nop | ||
316 | nop | ||
317 | nop | ||
318 | jmp r14 | ||
319 | nop | ||
320 | nop | ||
321 | nop | ||
322 | |||