aboutsummaryrefslogtreecommitdiffstats
path: root/arch/mips/lib/csum_partial.S
diff options
context:
space:
mode:
authorAtsushi Nemoto <anemo@mba.ocn.ne.jp>2006-12-03 10:42:59 -0500
committerRalf Baechle <ralf@linux-mips.org>2006-12-04 17:43:13 -0500
commit0bcdda0f3a87ed684d46841b6069409e39c4af65 (patch)
tree52df8fc8d148b98188c09152bcf6e55c55a98c2a /arch/mips/lib/csum_partial.S
parent03dbd2e0b1dcb24d63f64aa234cce3a7fabed328 (diff)
[MIPS] Unify csum_partial.S
The 32-bit version and 64-bit version are almost equal. Unify them. This makes further improvements (for example, copying with parallel, supporting PREFETCH, etc.) easier. Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp> Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Diffstat (limited to 'arch/mips/lib/csum_partial.S')
-rw-r--r--arch/mips/lib/csum_partial.S258
1 files changed, 258 insertions, 0 deletions
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
new file mode 100644
index 000000000000..15611d9df7ac
--- /dev/null
+++ b/arch/mips/lib/csum_partial.S
@@ -0,0 +1,258 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Quick'n'dirty IP checksum ...
7 *
8 * Copyright (C) 1998, 1999 Ralf Baechle
9 * Copyright (C) 1999 Silicon Graphics, Inc.
10 */
11#include <asm/asm.h>
12#include <asm/regdef.h>
13
14#ifdef CONFIG_64BIT
15#define T0 ta0
16#define T1 ta1
17#define T2 ta2
18#define T3 ta3
19#define T4 t0
20#define T7 t3
21#else
22#define T0 t0
23#define T1 t1
24#define T2 t2
25#define T3 t3
26#define T4 t4
27#define T7 t7
28#endif
29
30#define ADDC(sum,reg) \
31 addu sum, reg; \
32 sltu v1, sum, reg; \
33 addu sum, v1
34
35#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \
36 lw _t0, (offset + 0x00)(src); \
37 lw _t1, (offset + 0x04)(src); \
38 lw _t2, (offset + 0x08)(src); \
39 lw _t3, (offset + 0x0c)(src); \
40 ADDC(sum, _t0); \
41 ADDC(sum, _t1); \
42 ADDC(sum, _t2); \
43 ADDC(sum, _t3); \
44 lw _t0, (offset + 0x10)(src); \
45 lw _t1, (offset + 0x14)(src); \
46 lw _t2, (offset + 0x18)(src); \
47 lw _t3, (offset + 0x1c)(src); \
48 ADDC(sum, _t0); \
49 ADDC(sum, _t1); \
50 ADDC(sum, _t2); \
51 ADDC(sum, _t3); \
52
53/*
54 * a0: source address
55 * a1: length of the area to checksum
56 * a2: partial checksum
57 */
58
59#define src a0
60#define sum v0
61
62 .text
63 .set noreorder
64
65/* unknown src alignment and < 8 bytes to go */
66small_csumcpy:
67 move a1, T2
68
69 andi T0, a1, 4
70 beqz T0, 1f
71 andi T0, a1, 2
72
73 /* Still a full word to go */
74 ulw T1, (src)
75 PTR_ADDIU src, 4
76 ADDC(sum, T1)
77
781: move T1, zero
79 beqz T0, 1f
80 andi T0, a1, 1
81
82 /* Still a halfword to go */
83 ulhu T1, (src)
84 PTR_ADDIU src, 2
85
861: beqz T0, 1f
87 sll T1, T1, 16
88
89 lbu T2, (src)
90 nop
91
92#ifdef __MIPSEB__
93 sll T2, T2, 8
94#endif
95 or T1, T2
96
971: ADDC(sum, T1)
98
99 /* fold checksum */
100 sll v1, sum, 16
101 addu sum, v1
102 sltu v1, sum, v1
103 srl sum, sum, 16
104 addu sum, v1
105
106 /* odd buffer alignment? */
107 beqz T7, 1f
108 nop
109 sll v1, sum, 8
110 srl sum, sum, 8
111 or sum, v1
112 andi sum, 0xffff
1131:
114 .set reorder
115 /* Add the passed partial csum. */
116 ADDC(sum, a2)
117 jr ra
118 .set noreorder
119
120/* ------------------------------------------------------------------------- */
121
122 .align 5
123LEAF(csum_partial)
124 move sum, zero
125 move T7, zero
126
127 sltiu t8, a1, 0x8
128 bnez t8, small_csumcpy /* < 8 bytes to copy */
129 move T2, a1
130
131 beqz a1, out
132 andi T7, src, 0x1 /* odd buffer? */
133
134hword_align:
135 beqz T7, word_align
136 andi t8, src, 0x2
137
138 lbu T0, (src)
139 LONG_SUBU a1, a1, 0x1
140#ifdef __MIPSEL__
141 sll T0, T0, 8
142#endif
143 ADDC(sum, T0)
144 PTR_ADDU src, src, 0x1
145 andi t8, src, 0x2
146
147word_align:
148 beqz t8, dword_align
149 sltiu t8, a1, 56
150
151 lhu T0, (src)
152 LONG_SUBU a1, a1, 0x2
153 ADDC(sum, T0)
154 sltiu t8, a1, 56
155 PTR_ADDU src, src, 0x2
156
157dword_align:
158 bnez t8, do_end_words
159 move t8, a1
160
161 andi t8, src, 0x4
162 beqz t8, qword_align
163 andi t8, src, 0x8
164
165 lw T0, 0x00(src)
166 LONG_SUBU a1, a1, 0x4
167 ADDC(sum, T0)
168 PTR_ADDU src, src, 0x4
169 andi t8, src, 0x8
170
171qword_align:
172 beqz t8, oword_align
173 andi t8, src, 0x10
174
175 lw T0, 0x00(src)
176 lw T1, 0x04(src)
177 LONG_SUBU a1, a1, 0x8
178 ADDC(sum, T0)
179 ADDC(sum, T1)
180 PTR_ADDU src, src, 0x8
181 andi t8, src, 0x10
182
183oword_align:
184 beqz t8, begin_movement
185 LONG_SRL t8, a1, 0x7
186
187 lw T3, 0x08(src)
188 lw T4, 0x0c(src)
189 lw T0, 0x00(src)
190 lw T1, 0x04(src)
191 ADDC(sum, T3)
192 ADDC(sum, T4)
193 ADDC(sum, T0)
194 ADDC(sum, T1)
195 LONG_SUBU a1, a1, 0x10
196 PTR_ADDU src, src, 0x10
197 LONG_SRL t8, a1, 0x7
198
199begin_movement:
200 beqz t8, 1f
201 andi T2, a1, 0x40
202
203move_128bytes:
204 CSUM_BIGCHUNK(src, 0x00, sum, T0, T1, T3, T4)
205 CSUM_BIGCHUNK(src, 0x20, sum, T0, T1, T3, T4)
206 CSUM_BIGCHUNK(src, 0x40, sum, T0, T1, T3, T4)
207 CSUM_BIGCHUNK(src, 0x60, sum, T0, T1, T3, T4)
208 LONG_SUBU t8, t8, 0x01
209 bnez t8, move_128bytes
210 PTR_ADDU src, src, 0x80
211
2121:
213 beqz T2, 1f
214 andi T2, a1, 0x20
215
216move_64bytes:
217 CSUM_BIGCHUNK(src, 0x00, sum, T0, T1, T3, T4)
218 CSUM_BIGCHUNK(src, 0x20, sum, T0, T1, T3, T4)
219 PTR_ADDU src, src, 0x40
220
2211:
222 beqz T2, do_end_words
223 andi t8, a1, 0x1c
224
225move_32bytes:
226 CSUM_BIGCHUNK(src, 0x00, sum, T0, T1, T3, T4)
227 andi t8, a1, 0x1c
228 PTR_ADDU src, src, 0x20
229
230do_end_words:
231 beqz t8, maybe_end_cruft
232 LONG_SRL t8, t8, 0x2
233
234end_words:
235 lw T0, (src)
236 LONG_SUBU t8, t8, 0x1
237 ADDC(sum, T0)
238 bnez t8, end_words
239 PTR_ADDU src, src, 0x4
240
241maybe_end_cruft:
242 andi T2, a1, 0x3
243
244small_memcpy:
245 j small_csumcpy; move a1, T2 /* XXX ??? */
246 beqz t2, out
247 move a1, T2
248
249end_bytes:
250 lb T0, (src)
251 LONG_SUBU a1, a1, 0x1
252 bnez a2, end_bytes
253 PTR_ADDU src, src, 0x1
254
255out:
256 jr ra
257 move v0, sum
258 END(csum_partial)