diff options
author | Atsushi Nemoto <anemo@mba.ocn.ne.jp> | 2006-12-03 10:42:59 -0500 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2006-12-04 17:43:13 -0500 |
commit | 0bcdda0f3a87ed684d46841b6069409e39c4af65 (patch) | |
tree | 52df8fc8d148b98188c09152bcf6e55c55a98c2a /arch/mips/lib-64 | |
parent | 03dbd2e0b1dcb24d63f64aa234cce3a7fabed328 (diff) |
[MIPS] Unify csum_partial.S
The 32-bit version and 64-bit version are almost equal. Unify them. This
makes further improvements (for example, copying with parallel, supporting
PREFETCH, etc.) easier.
Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Diffstat (limited to 'arch/mips/lib-64')
-rw-r--r-- | arch/mips/lib-64/Makefile | 2 | ||||
-rw-r--r-- | arch/mips/lib-64/csum_partial.S | 242 |
2 files changed, 1 insertions, 243 deletions
diff --git a/arch/mips/lib-64/Makefile b/arch/mips/lib-64/Makefile index ad285786e74b..dcd4d2ed2ac4 100644 --- a/arch/mips/lib-64/Makefile +++ b/arch/mips/lib-64/Makefile | |||
@@ -2,7 +2,7 @@ | |||
2 | # Makefile for MIPS-specific library files.. | 2 | # Makefile for MIPS-specific library files.. |
3 | # | 3 | # |
4 | 4 | ||
5 | lib-y += csum_partial.o memset.o watch.o | 5 | lib-y += memset.o watch.o |
6 | 6 | ||
7 | obj-$(CONFIG_CPU_MIPS32) += dump_tlb.o | 7 | obj-$(CONFIG_CPU_MIPS32) += dump_tlb.o |
8 | obj-$(CONFIG_CPU_MIPS64) += dump_tlb.o | 8 | obj-$(CONFIG_CPU_MIPS64) += dump_tlb.o |
diff --git a/arch/mips/lib-64/csum_partial.S b/arch/mips/lib-64/csum_partial.S deleted file mode 100644 index 25aba660cc9c..000000000000 --- a/arch/mips/lib-64/csum_partial.S +++ /dev/null | |||
@@ -1,242 +0,0 @@ | |||
1 | /* | ||
2 | * This file is subject to the terms and conditions of the GNU General Public | ||
3 | * License. See the file "COPYING" in the main directory of this archive | ||
4 | * for more details. | ||
5 | * | ||
6 | * Quick'n'dirty IP checksum ... | ||
7 | * | ||
8 | * Copyright (C) 1998, 1999 Ralf Baechle | ||
9 | * Copyright (C) 1999 Silicon Graphics, Inc. | ||
10 | */ | ||
11 | #include <asm/asm.h> | ||
12 | #include <asm/regdef.h> | ||
13 | |||
14 | #define ADDC(sum,reg) \ | ||
15 | addu sum, reg; \ | ||
16 | sltu v1, sum, reg; \ | ||
17 | addu sum, v1 | ||
18 | |||
19 | #define CSUM_BIGCHUNK(src, offset, sum, t0, t1, t2, t3) \ | ||
20 | lw t0, (offset + 0x00)(src); \ | ||
21 | lw t1, (offset + 0x04)(src); \ | ||
22 | lw t2, (offset + 0x08)(src); \ | ||
23 | lw t3, (offset + 0x0c)(src); \ | ||
24 | ADDC(sum, t0); \ | ||
25 | ADDC(sum, t1); \ | ||
26 | ADDC(sum, t2); \ | ||
27 | ADDC(sum, t3); \ | ||
28 | lw t0, (offset + 0x10)(src); \ | ||
29 | lw t1, (offset + 0x14)(src); \ | ||
30 | lw t2, (offset + 0x18)(src); \ | ||
31 | lw t3, (offset + 0x1c)(src); \ | ||
32 | ADDC(sum, t0); \ | ||
33 | ADDC(sum, t1); \ | ||
34 | ADDC(sum, t2); \ | ||
35 | ADDC(sum, t3); \ | ||
36 | |||
37 | /* | ||
38 | * a0: source address | ||
39 | * a1: length of the area to checksum | ||
40 | * a2: partial checksum | ||
41 | */ | ||
42 | |||
43 | #define src a0 | ||
44 | #define sum v0 | ||
45 | |||
46 | .text | ||
47 | .set noreorder | ||
48 | |||
49 | /* unknown src alignment and < 8 bytes to go */ | ||
50 | small_csumcpy: | ||
51 | move a1, ta2 | ||
52 | |||
53 | andi ta0, a1, 4 | ||
54 | beqz ta0, 1f | ||
55 | andi ta0, a1, 2 | ||
56 | |||
57 | /* Still a full word to go */ | ||
58 | ulw ta1, (src) | ||
59 | daddiu src, 4 | ||
60 | ADDC(sum, ta1) | ||
61 | |||
62 | 1: move ta1, zero | ||
63 | beqz ta0, 1f | ||
64 | andi ta0, a1, 1 | ||
65 | |||
66 | /* Still a halfword to go */ | ||
67 | ulhu ta1, (src) | ||
68 | daddiu src, 2 | ||
69 | |||
70 | 1: beqz ta0, 1f | ||
71 | sll ta1, ta1, 16 | ||
72 | |||
73 | lbu ta2, (src) | ||
74 | nop | ||
75 | |||
76 | #ifdef __MIPSEB__ | ||
77 | sll ta2, ta2, 8 | ||
78 | #endif | ||
79 | or ta1, ta2 | ||
80 | |||
81 | 1: ADDC(sum, ta1) | ||
82 | |||
83 | /* fold checksum */ | ||
84 | sll v1, sum, 16 | ||
85 | addu sum, v1 | ||
86 | sltu v1, sum, v1 | ||
87 | srl sum, sum, 16 | ||
88 | addu sum, v1 | ||
89 | |||
90 | /* odd buffer alignment? */ | ||
91 | beqz t3, 1f | ||
92 | nop | ||
93 | sll v1, sum, 8 | ||
94 | srl sum, sum, 8 | ||
95 | or sum, v1 | ||
96 | andi sum, 0xffff | ||
97 | 1: | ||
98 | .set reorder | ||
99 | /* Add the passed partial csum. */ | ||
100 | ADDC(sum, a2) | ||
101 | jr ra | ||
102 | .set noreorder | ||
103 | |||
104 | /* ------------------------------------------------------------------------- */ | ||
105 | |||
106 | .align 5 | ||
107 | LEAF(csum_partial) | ||
108 | move sum, zero | ||
109 | move t3, zero | ||
110 | |||
111 | sltiu t8, a1, 0x8 | ||
112 | bnez t8, small_csumcpy /* < 8 bytes to copy */ | ||
113 | move ta2, a1 | ||
114 | |||
115 | beqz a1, out | ||
116 | andi t3, src, 0x1 /* odd buffer? */ | ||
117 | |||
118 | hword_align: | ||
119 | beqz t3, word_align | ||
120 | andi t8, src, 0x2 | ||
121 | |||
122 | lbu ta0, (src) | ||
123 | dsubu a1, a1, 0x1 | ||
124 | #ifdef __MIPSEL__ | ||
125 | sll ta0, ta0, 8 | ||
126 | #endif | ||
127 | ADDC(sum, ta0) | ||
128 | daddu src, src, 0x1 | ||
129 | andi t8, src, 0x2 | ||
130 | |||
131 | word_align: | ||
132 | beqz t8, dword_align | ||
133 | sltiu t8, a1, 56 | ||
134 | |||
135 | lhu ta0, (src) | ||
136 | dsubu a1, a1, 0x2 | ||
137 | ADDC(sum, ta0) | ||
138 | sltiu t8, a1, 56 | ||
139 | daddu src, src, 0x2 | ||
140 | |||
141 | dword_align: | ||
142 | bnez t8, do_end_words | ||
143 | move t8, a1 | ||
144 | |||
145 | andi t8, src, 0x4 | ||
146 | beqz t8, qword_align | ||
147 | andi t8, src, 0x8 | ||
148 | |||
149 | lw ta0, 0x00(src) | ||
150 | dsubu a1, a1, 0x4 | ||
151 | ADDC(sum, ta0) | ||
152 | daddu src, src, 0x4 | ||
153 | andi t8, src, 0x8 | ||
154 | |||
155 | qword_align: | ||
156 | beqz t8, oword_align | ||
157 | andi t8, src, 0x10 | ||
158 | |||
159 | lw ta0, 0x00(src) | ||
160 | lw ta1, 0x04(src) | ||
161 | dsubu a1, a1, 0x8 | ||
162 | ADDC(sum, ta0) | ||
163 | ADDC(sum, ta1) | ||
164 | daddu src, src, 0x8 | ||
165 | andi t8, src, 0x10 | ||
166 | |||
167 | oword_align: | ||
168 | beqz t8, begin_movement | ||
169 | dsrl t8, a1, 0x7 | ||
170 | |||
171 | lw ta3, 0x08(src) | ||
172 | lw t0, 0x0c(src) | ||
173 | lw ta0, 0x00(src) | ||
174 | lw ta1, 0x04(src) | ||
175 | ADDC(sum, ta3) | ||
176 | ADDC(sum, t0) | ||
177 | ADDC(sum, ta0) | ||
178 | ADDC(sum, ta1) | ||
179 | dsubu a1, a1, 0x10 | ||
180 | daddu src, src, 0x10 | ||
181 | dsrl t8, a1, 0x7 | ||
182 | |||
183 | begin_movement: | ||
184 | beqz t8, 1f | ||
185 | andi ta2, a1, 0x40 | ||
186 | |||
187 | move_128bytes: | ||
188 | CSUM_BIGCHUNK(src, 0x00, sum, ta0, ta1, ta3, t0) | ||
189 | CSUM_BIGCHUNK(src, 0x20, sum, ta0, ta1, ta3, t0) | ||
190 | CSUM_BIGCHUNK(src, 0x40, sum, ta0, ta1, ta3, t0) | ||
191 | CSUM_BIGCHUNK(src, 0x60, sum, ta0, ta1, ta3, t0) | ||
192 | dsubu t8, t8, 0x01 | ||
193 | bnez t8, move_128bytes | ||
194 | daddu src, src, 0x80 | ||
195 | |||
196 | 1: | ||
197 | beqz ta2, 1f | ||
198 | andi ta2, a1, 0x20 | ||
199 | |||
200 | move_64bytes: | ||
201 | CSUM_BIGCHUNK(src, 0x00, sum, ta0, ta1, ta3, t0) | ||
202 | CSUM_BIGCHUNK(src, 0x20, sum, ta0, ta1, ta3, t0) | ||
203 | daddu src, src, 0x40 | ||
204 | |||
205 | 1: | ||
206 | beqz ta2, do_end_words | ||
207 | andi t8, a1, 0x1c | ||
208 | |||
209 | move_32bytes: | ||
210 | CSUM_BIGCHUNK(src, 0x00, sum, ta0, ta1, ta3, t0) | ||
211 | andi t8, a1, 0x1c | ||
212 | daddu src, src, 0x20 | ||
213 | |||
214 | do_end_words: | ||
215 | beqz t8, maybe_end_cruft | ||
216 | dsrl t8, t8, 0x2 | ||
217 | |||
218 | end_words: | ||
219 | lw ta0, (src) | ||
220 | dsubu t8, t8, 0x1 | ||
221 | ADDC(sum, ta0) | ||
222 | bnez t8, end_words | ||
223 | daddu src, src, 0x4 | ||
224 | |||
225 | maybe_end_cruft: | ||
226 | andi ta2, a1, 0x3 | ||
227 | |||
228 | small_memcpy: | ||
229 | j small_csumcpy; move a1, ta2 /* XXX ??? */ | ||
230 | beqz t2, out | ||
231 | move a1, ta2 | ||
232 | |||
233 | end_bytes: | ||
234 | lb ta0, (src) | ||
235 | dsubu a1, a1, 0x1 | ||
236 | bnez a2, end_bytes | ||
237 | daddu src, src, 0x1 | ||
238 | |||
239 | out: | ||
240 | jr ra | ||
241 | move v0, sum | ||
242 | END(csum_partial) | ||