diff options
author | Atsushi Nemoto <anemo@mba.ocn.ne.jp> | 2006-12-03 10:42:59 -0500 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2006-12-04 17:43:13 -0500 |
commit | 0bcdda0f3a87ed684d46841b6069409e39c4af65 (patch) | |
tree | 52df8fc8d148b98188c09152bcf6e55c55a98c2a /arch/mips/lib-32 | |
parent | 03dbd2e0b1dcb24d63f64aa234cce3a7fabed328 (diff) |
[MIPS] Unify csum_partial.S
The 32-bit version and 64-bit version are almost equal. Unify them. This
makes further improvements (for example, copying with parallel, supporting
PREFETCH, etc.) easier.
Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Diffstat (limited to 'arch/mips/lib-32')
-rw-r--r-- | arch/mips/lib-32/Makefile | 2 | ||||
-rw-r--r-- | arch/mips/lib-32/csum_partial.S | 240 |
2 files changed, 1 insertions, 241 deletions
diff --git a/arch/mips/lib-32/Makefile b/arch/mips/lib-32/Makefile index ad285786e74b..dcd4d2ed2ac4 100644 --- a/arch/mips/lib-32/Makefile +++ b/arch/mips/lib-32/Makefile | |||
@@ -2,7 +2,7 @@ | |||
2 | # Makefile for MIPS-specific library files.. | 2 | # Makefile for MIPS-specific library files.. |
3 | # | 3 | # |
4 | 4 | ||
5 | lib-y += csum_partial.o memset.o watch.o | 5 | lib-y += memset.o watch.o |
6 | 6 | ||
7 | obj-$(CONFIG_CPU_MIPS32) += dump_tlb.o | 7 | obj-$(CONFIG_CPU_MIPS32) += dump_tlb.o |
8 | obj-$(CONFIG_CPU_MIPS64) += dump_tlb.o | 8 | obj-$(CONFIG_CPU_MIPS64) += dump_tlb.o |
diff --git a/arch/mips/lib-32/csum_partial.S b/arch/mips/lib-32/csum_partial.S deleted file mode 100644 index ea257dbdcc40..000000000000 --- a/arch/mips/lib-32/csum_partial.S +++ /dev/null | |||
@@ -1,240 +0,0 @@ | |||
1 | /* | ||
2 | * This file is subject to the terms and conditions of the GNU General Public | ||
3 | * License. See the file "COPYING" in the main directory of this archive | ||
4 | * for more details. | ||
5 | * | ||
6 | * Copyright (C) 1998 Ralf Baechle | ||
7 | */ | ||
8 | #include <asm/asm.h> | ||
9 | #include <asm/regdef.h> | ||
10 | |||
11 | #define ADDC(sum,reg) \ | ||
12 | addu sum, reg; \ | ||
13 | sltu v1, sum, reg; \ | ||
14 | addu sum, v1 | ||
15 | |||
16 | #define CSUM_BIGCHUNK(src, offset, sum, t0, t1, t2, t3) \ | ||
17 | lw t0, (offset + 0x00)(src); \ | ||
18 | lw t1, (offset + 0x04)(src); \ | ||
19 | lw t2, (offset + 0x08)(src); \ | ||
20 | lw t3, (offset + 0x0c)(src); \ | ||
21 | ADDC(sum, t0); \ | ||
22 | ADDC(sum, t1); \ | ||
23 | ADDC(sum, t2); \ | ||
24 | ADDC(sum, t3); \ | ||
25 | lw t0, (offset + 0x10)(src); \ | ||
26 | lw t1, (offset + 0x14)(src); \ | ||
27 | lw t2, (offset + 0x18)(src); \ | ||
28 | lw t3, (offset + 0x1c)(src); \ | ||
29 | ADDC(sum, t0); \ | ||
30 | ADDC(sum, t1); \ | ||
31 | ADDC(sum, t2); \ | ||
32 | ADDC(sum, t3); \ | ||
33 | |||
34 | /* | ||
35 | * a0: source address | ||
36 | * a1: length of the area to checksum | ||
37 | * a2: partial checksum | ||
38 | */ | ||
39 | |||
40 | #define src a0 | ||
41 | #define dest a1 | ||
42 | #define sum v0 | ||
43 | |||
44 | .text | ||
45 | .set noreorder | ||
46 | |||
47 | /* unknown src alignment and < 8 bytes to go */ | ||
48 | small_csumcpy: | ||
49 | move a1, t2 | ||
50 | |||
51 | andi t0, a1, 4 | ||
52 | beqz t0, 1f | ||
53 | andi t0, a1, 2 | ||
54 | |||
55 | /* Still a full word to go */ | ||
56 | ulw t1, (src) | ||
57 | addiu src, 4 | ||
58 | ADDC(sum, t1) | ||
59 | |||
60 | 1: move t1, zero | ||
61 | beqz t0, 1f | ||
62 | andi t0, a1, 1 | ||
63 | |||
64 | /* Still a halfword to go */ | ||
65 | ulhu t1, (src) | ||
66 | addiu src, 2 | ||
67 | |||
68 | 1: beqz t0, 1f | ||
69 | sll t1, t1, 16 | ||
70 | |||
71 | lbu t2, (src) | ||
72 | nop | ||
73 | |||
74 | #ifdef __MIPSEB__ | ||
75 | sll t2, t2, 8 | ||
76 | #endif | ||
77 | or t1, t2 | ||
78 | |||
79 | 1: ADDC(sum, t1) | ||
80 | |||
81 | /* fold checksum */ | ||
82 | sll v1, sum, 16 | ||
83 | addu sum, v1 | ||
84 | sltu v1, sum, v1 | ||
85 | srl sum, sum, 16 | ||
86 | addu sum, v1 | ||
87 | |||
88 | /* odd buffer alignment? */ | ||
89 | beqz t7, 1f | ||
90 | nop | ||
91 | sll v1, sum, 8 | ||
92 | srl sum, sum, 8 | ||
93 | or sum, v1 | ||
94 | andi sum, 0xffff | ||
95 | 1: | ||
96 | .set reorder | ||
97 | /* Add the passed partial csum. */ | ||
98 | ADDC(sum, a2) | ||
99 | jr ra | ||
100 | .set noreorder | ||
101 | |||
102 | /* ------------------------------------------------------------------------- */ | ||
103 | |||
104 | .align 5 | ||
105 | LEAF(csum_partial) | ||
106 | move sum, zero | ||
107 | move t7, zero | ||
108 | |||
109 | sltiu t8, a1, 0x8 | ||
110 | bnez t8, small_csumcpy /* < 8 bytes to copy */ | ||
111 | move t2, a1 | ||
112 | |||
113 | beqz a1, out | ||
114 | andi t7, src, 0x1 /* odd buffer? */ | ||
115 | |||
116 | hword_align: | ||
117 | beqz t7, word_align | ||
118 | andi t8, src, 0x2 | ||
119 | |||
120 | lbu t0, (src) | ||
121 | subu a1, a1, 0x1 | ||
122 | #ifdef __MIPSEL__ | ||
123 | sll t0, t0, 8 | ||
124 | #endif | ||
125 | ADDC(sum, t0) | ||
126 | addu src, src, 0x1 | ||
127 | andi t8, src, 0x2 | ||
128 | |||
129 | word_align: | ||
130 | beqz t8, dword_align | ||
131 | sltiu t8, a1, 56 | ||
132 | |||
133 | lhu t0, (src) | ||
134 | subu a1, a1, 0x2 | ||
135 | ADDC(sum, t0) | ||
136 | sltiu t8, a1, 56 | ||
137 | addu src, src, 0x2 | ||
138 | |||
139 | dword_align: | ||
140 | bnez t8, do_end_words | ||
141 | move t8, a1 | ||
142 | |||
143 | andi t8, src, 0x4 | ||
144 | beqz t8, qword_align | ||
145 | andi t8, src, 0x8 | ||
146 | |||
147 | lw t0, 0x00(src) | ||
148 | subu a1, a1, 0x4 | ||
149 | ADDC(sum, t0) | ||
150 | addu src, src, 0x4 | ||
151 | andi t8, src, 0x8 | ||
152 | |||
153 | qword_align: | ||
154 | beqz t8, oword_align | ||
155 | andi t8, src, 0x10 | ||
156 | |||
157 | lw t0, 0x00(src) | ||
158 | lw t1, 0x04(src) | ||
159 | subu a1, a1, 0x8 | ||
160 | ADDC(sum, t0) | ||
161 | ADDC(sum, t1) | ||
162 | addu src, src, 0x8 | ||
163 | andi t8, src, 0x10 | ||
164 | |||
165 | oword_align: | ||
166 | beqz t8, begin_movement | ||
167 | srl t8, a1, 0x7 | ||
168 | |||
169 | lw t3, 0x08(src) | ||
170 | lw t4, 0x0c(src) | ||
171 | lw t0, 0x00(src) | ||
172 | lw t1, 0x04(src) | ||
173 | ADDC(sum, t3) | ||
174 | ADDC(sum, t4) | ||
175 | ADDC(sum, t0) | ||
176 | ADDC(sum, t1) | ||
177 | subu a1, a1, 0x10 | ||
178 | addu src, src, 0x10 | ||
179 | srl t8, a1, 0x7 | ||
180 | |||
181 | begin_movement: | ||
182 | beqz t8, 1f | ||
183 | andi t2, a1, 0x40 | ||
184 | |||
185 | move_128bytes: | ||
186 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) | ||
187 | CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) | ||
188 | CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4) | ||
189 | CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4) | ||
190 | subu t8, t8, 0x01 | ||
191 | bnez t8, move_128bytes | ||
192 | addu src, src, 0x80 | ||
193 | |||
194 | 1: | ||
195 | beqz t2, 1f | ||
196 | andi t2, a1, 0x20 | ||
197 | |||
198 | move_64bytes: | ||
199 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) | ||
200 | CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) | ||
201 | addu src, src, 0x40 | ||
202 | |||
203 | 1: | ||
204 | beqz t2, do_end_words | ||
205 | andi t8, a1, 0x1c | ||
206 | |||
207 | move_32bytes: | ||
208 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) | ||
209 | andi t8, a1, 0x1c | ||
210 | addu src, src, 0x20 | ||
211 | |||
212 | do_end_words: | ||
213 | beqz t8, maybe_end_cruft | ||
214 | srl t8, t8, 0x2 | ||
215 | |||
216 | end_words: | ||
217 | lw t0, (src) | ||
218 | subu t8, t8, 0x1 | ||
219 | ADDC(sum, t0) | ||
220 | bnez t8, end_words | ||
221 | addu src, src, 0x4 | ||
222 | |||
223 | maybe_end_cruft: | ||
224 | andi t2, a1, 0x3 | ||
225 | |||
226 | small_memcpy: | ||
227 | j small_csumcpy; move a1, t2 | ||
228 | beqz t2, out | ||
229 | move a1, t2 | ||
230 | |||
231 | end_bytes: | ||
232 | lb t0, (src) | ||
233 | subu a1, a1, 0x1 | ||
234 | bnez a2, end_bytes | ||
235 | addu src, src, 0x1 | ||
236 | |||
237 | out: | ||
238 | jr ra | ||
239 | move v0, sum | ||
240 | END(csum_partial) | ||