diff options
author | Atsushi Nemoto <anemo@mba.ocn.ne.jp> | 2006-12-03 10:42:59 -0500 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2006-12-04 17:43:13 -0500 |
commit | 0bcdda0f3a87ed684d46841b6069409e39c4af65 (patch) | |
tree | 52df8fc8d148b98188c09152bcf6e55c55a98c2a /arch/mips/lib/csum_partial.S | |
parent | 03dbd2e0b1dcb24d63f64aa234cce3a7fabed328 (diff) |
[MIPS] Unify csum_partial.S
The 32-bit version and 64-bit version are almost equal. Unify them. This
makes further improvements (for example, copying with parallel, supporting
PREFETCH, etc.) easier.
Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Diffstat (limited to 'arch/mips/lib/csum_partial.S')
-rw-r--r-- | arch/mips/lib/csum_partial.S | 258 |
1 files changed, 258 insertions, 0 deletions
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S new file mode 100644 index 000000000000..15611d9df7ac --- /dev/null +++ b/arch/mips/lib/csum_partial.S | |||
@@ -0,0 +1,258 @@ | |||
1 | /* | ||
2 | * This file is subject to the terms and conditions of the GNU General Public | ||
3 | * License. See the file "COPYING" in the main directory of this archive | ||
4 | * for more details. | ||
5 | * | ||
6 | * Quick'n'dirty IP checksum ... | ||
7 | * | ||
8 | * Copyright (C) 1998, 1999 Ralf Baechle | ||
9 | * Copyright (C) 1999 Silicon Graphics, Inc. | ||
10 | */ | ||
11 | #include <asm/asm.h> | ||
12 | #include <asm/regdef.h> | ||
13 | |||
14 | #ifdef CONFIG_64BIT | ||
15 | #define T0 ta0 | ||
16 | #define T1 ta1 | ||
17 | #define T2 ta2 | ||
18 | #define T3 ta3 | ||
19 | #define T4 t0 | ||
20 | #define T7 t3 | ||
21 | #else | ||
22 | #define T0 t0 | ||
23 | #define T1 t1 | ||
24 | #define T2 t2 | ||
25 | #define T3 t3 | ||
26 | #define T4 t4 | ||
27 | #define T7 t7 | ||
28 | #endif | ||
29 | |||
30 | #define ADDC(sum,reg) \ | ||
31 | addu sum, reg; \ | ||
32 | sltu v1, sum, reg; \ | ||
33 | addu sum, v1 | ||
34 | |||
35 | #define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \ | ||
36 | lw _t0, (offset + 0x00)(src); \ | ||
37 | lw _t1, (offset + 0x04)(src); \ | ||
38 | lw _t2, (offset + 0x08)(src); \ | ||
39 | lw _t3, (offset + 0x0c)(src); \ | ||
40 | ADDC(sum, _t0); \ | ||
41 | ADDC(sum, _t1); \ | ||
42 | ADDC(sum, _t2); \ | ||
43 | ADDC(sum, _t3); \ | ||
44 | lw _t0, (offset + 0x10)(src); \ | ||
45 | lw _t1, (offset + 0x14)(src); \ | ||
46 | lw _t2, (offset + 0x18)(src); \ | ||
47 | lw _t3, (offset + 0x1c)(src); \ | ||
48 | ADDC(sum, _t0); \ | ||
49 | ADDC(sum, _t1); \ | ||
50 | ADDC(sum, _t2); \ | ||
51 | ADDC(sum, _t3); \ | ||
52 | |||
53 | /* | ||
54 | * a0: source address | ||
55 | * a1: length of the area to checksum | ||
56 | * a2: partial checksum | ||
57 | */ | ||
58 | |||
59 | #define src a0 | ||
60 | #define sum v0 | ||
61 | |||
62 | .text | ||
63 | .set noreorder | ||
64 | |||
65 | /* unknown src alignment and < 8 bytes to go */ | ||
66 | small_csumcpy: | ||
67 | move a1, T2 | ||
68 | |||
69 | andi T0, a1, 4 | ||
70 | beqz T0, 1f | ||
71 | andi T0, a1, 2 | ||
72 | |||
73 | /* Still a full word to go */ | ||
74 | ulw T1, (src) | ||
75 | PTR_ADDIU src, 4 | ||
76 | ADDC(sum, T1) | ||
77 | |||
78 | 1: move T1, zero | ||
79 | beqz T0, 1f | ||
80 | andi T0, a1, 1 | ||
81 | |||
82 | /* Still a halfword to go */ | ||
83 | ulhu T1, (src) | ||
84 | PTR_ADDIU src, 2 | ||
85 | |||
86 | 1: beqz T0, 1f | ||
87 | sll T1, T1, 16 | ||
88 | |||
89 | lbu T2, (src) | ||
90 | nop | ||
91 | |||
92 | #ifdef __MIPSEB__ | ||
93 | sll T2, T2, 8 | ||
94 | #endif | ||
95 | or T1, T2 | ||
96 | |||
97 | 1: ADDC(sum, T1) | ||
98 | |||
99 | /* fold checksum */ | ||
100 | sll v1, sum, 16 | ||
101 | addu sum, v1 | ||
102 | sltu v1, sum, v1 | ||
103 | srl sum, sum, 16 | ||
104 | addu sum, v1 | ||
105 | |||
106 | /* odd buffer alignment? */ | ||
107 | beqz T7, 1f | ||
108 | nop | ||
109 | sll v1, sum, 8 | ||
110 | srl sum, sum, 8 | ||
111 | or sum, v1 | ||
112 | andi sum, 0xffff | ||
113 | 1: | ||
114 | .set reorder | ||
115 | /* Add the passed partial csum. */ | ||
116 | ADDC(sum, a2) | ||
117 | jr ra | ||
118 | .set noreorder | ||
119 | |||
120 | /* ------------------------------------------------------------------------- */ | ||
121 | |||
122 | .align 5 | ||
123 | LEAF(csum_partial) | ||
124 | move sum, zero | ||
125 | move T7, zero | ||
126 | |||
127 | sltiu t8, a1, 0x8 | ||
128 | bnez t8, small_csumcpy /* < 8 bytes to copy */ | ||
129 | move T2, a1 | ||
130 | |||
131 | beqz a1, out | ||
132 | andi T7, src, 0x1 /* odd buffer? */ | ||
133 | |||
134 | hword_align: | ||
135 | beqz T7, word_align | ||
136 | andi t8, src, 0x2 | ||
137 | |||
138 | lbu T0, (src) | ||
139 | LONG_SUBU a1, a1, 0x1 | ||
140 | #ifdef __MIPSEL__ | ||
141 | sll T0, T0, 8 | ||
142 | #endif | ||
143 | ADDC(sum, T0) | ||
144 | PTR_ADDU src, src, 0x1 | ||
145 | andi t8, src, 0x2 | ||
146 | |||
147 | word_align: | ||
148 | beqz t8, dword_align | ||
149 | sltiu t8, a1, 56 | ||
150 | |||
151 | lhu T0, (src) | ||
152 | LONG_SUBU a1, a1, 0x2 | ||
153 | ADDC(sum, T0) | ||
154 | sltiu t8, a1, 56 | ||
155 | PTR_ADDU src, src, 0x2 | ||
156 | |||
157 | dword_align: | ||
158 | bnez t8, do_end_words | ||
159 | move t8, a1 | ||
160 | |||
161 | andi t8, src, 0x4 | ||
162 | beqz t8, qword_align | ||
163 | andi t8, src, 0x8 | ||
164 | |||
165 | lw T0, 0x00(src) | ||
166 | LONG_SUBU a1, a1, 0x4 | ||
167 | ADDC(sum, T0) | ||
168 | PTR_ADDU src, src, 0x4 | ||
169 | andi t8, src, 0x8 | ||
170 | |||
171 | qword_align: | ||
172 | beqz t8, oword_align | ||
173 | andi t8, src, 0x10 | ||
174 | |||
175 | lw T0, 0x00(src) | ||
176 | lw T1, 0x04(src) | ||
177 | LONG_SUBU a1, a1, 0x8 | ||
178 | ADDC(sum, T0) | ||
179 | ADDC(sum, T1) | ||
180 | PTR_ADDU src, src, 0x8 | ||
181 | andi t8, src, 0x10 | ||
182 | |||
183 | oword_align: | ||
184 | beqz t8, begin_movement | ||
185 | LONG_SRL t8, a1, 0x7 | ||
186 | |||
187 | lw T3, 0x08(src) | ||
188 | lw T4, 0x0c(src) | ||
189 | lw T0, 0x00(src) | ||
190 | lw T1, 0x04(src) | ||
191 | ADDC(sum, T3) | ||
192 | ADDC(sum, T4) | ||
193 | ADDC(sum, T0) | ||
194 | ADDC(sum, T1) | ||
195 | LONG_SUBU a1, a1, 0x10 | ||
196 | PTR_ADDU src, src, 0x10 | ||
197 | LONG_SRL t8, a1, 0x7 | ||
198 | |||
199 | begin_movement: | ||
200 | beqz t8, 1f | ||
201 | andi T2, a1, 0x40 | ||
202 | |||
203 | move_128bytes: | ||
204 | CSUM_BIGCHUNK(src, 0x00, sum, T0, T1, T3, T4) | ||
205 | CSUM_BIGCHUNK(src, 0x20, sum, T0, T1, T3, T4) | ||
206 | CSUM_BIGCHUNK(src, 0x40, sum, T0, T1, T3, T4) | ||
207 | CSUM_BIGCHUNK(src, 0x60, sum, T0, T1, T3, T4) | ||
208 | LONG_SUBU t8, t8, 0x01 | ||
209 | bnez t8, move_128bytes | ||
210 | PTR_ADDU src, src, 0x80 | ||
211 | |||
212 | 1: | ||
213 | beqz T2, 1f | ||
214 | andi T2, a1, 0x20 | ||
215 | |||
216 | move_64bytes: | ||
217 | CSUM_BIGCHUNK(src, 0x00, sum, T0, T1, T3, T4) | ||
218 | CSUM_BIGCHUNK(src, 0x20, sum, T0, T1, T3, T4) | ||
219 | PTR_ADDU src, src, 0x40 | ||
220 | |||
221 | 1: | ||
222 | beqz T2, do_end_words | ||
223 | andi t8, a1, 0x1c | ||
224 | |||
225 | move_32bytes: | ||
226 | CSUM_BIGCHUNK(src, 0x00, sum, T0, T1, T3, T4) | ||
227 | andi t8, a1, 0x1c | ||
228 | PTR_ADDU src, src, 0x20 | ||
229 | |||
230 | do_end_words: | ||
231 | beqz t8, maybe_end_cruft | ||
232 | LONG_SRL t8, t8, 0x2 | ||
233 | |||
234 | end_words: | ||
235 | lw T0, (src) | ||
236 | LONG_SUBU t8, t8, 0x1 | ||
237 | ADDC(sum, T0) | ||
238 | bnez t8, end_words | ||
239 | PTR_ADDU src, src, 0x4 | ||
240 | |||
241 | maybe_end_cruft: | ||
242 | andi T2, a1, 0x3 | ||
243 | |||
244 | small_memcpy: | ||
245 | j small_csumcpy; move a1, T2 /* XXX ??? */ | ||
246 | beqz t2, out | ||
247 | move a1, T2 | ||
248 | |||
249 | end_bytes: | ||
250 | lb T0, (src) | ||
251 | LONG_SUBU a1, a1, 0x1 | ||
252 | bnez a2, end_bytes | ||
253 | PTR_ADDU src, src, 0x1 | ||
254 | |||
255 | out: | ||
256 | jr ra | ||
257 | move v0, sum | ||
258 | END(csum_partial) | ||