diff options
author | Atsushi Nemoto <anemo@mba.ocn.ne.jp> | 2006-12-03 10:42:59 -0500 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2006-12-04 17:43:13 -0500 |
commit | 0bcdda0f3a87ed684d46841b6069409e39c4af65 (patch) | |
tree | 52df8fc8d148b98188c09152bcf6e55c55a98c2a | |
parent | 03dbd2e0b1dcb24d63f64aa234cce3a7fabed328 (diff) |
[MIPS] Unify csum_partial.S
The 32-bit version and 64-bit version are almost equal. Unify them. This
makes further improvements (for example, copying with parallel, supporting
PREFETCH, etc.) easier.
Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
-rw-r--r-- | arch/mips/lib-32/Makefile | 2 | ||||
-rw-r--r-- | arch/mips/lib-32/csum_partial.S | 240 | ||||
-rw-r--r-- | arch/mips/lib-64/Makefile | 2 | ||||
-rw-r--r-- | arch/mips/lib-64/csum_partial.S | 242 | ||||
-rw-r--r-- | arch/mips/lib/Makefile | 4 | ||||
-rw-r--r-- | arch/mips/lib/csum_partial.S | 258 |
6 files changed, 262 insertions, 486 deletions
diff --git a/arch/mips/lib-32/Makefile b/arch/mips/lib-32/Makefile index ad285786e74b..dcd4d2ed2ac4 100644 --- a/arch/mips/lib-32/Makefile +++ b/arch/mips/lib-32/Makefile | |||
@@ -2,7 +2,7 @@ | |||
2 | # Makefile for MIPS-specific library files.. | 2 | # Makefile for MIPS-specific library files.. |
3 | # | 3 | # |
4 | 4 | ||
5 | lib-y += csum_partial.o memset.o watch.o | 5 | lib-y += memset.o watch.o |
6 | 6 | ||
7 | obj-$(CONFIG_CPU_MIPS32) += dump_tlb.o | 7 | obj-$(CONFIG_CPU_MIPS32) += dump_tlb.o |
8 | obj-$(CONFIG_CPU_MIPS64) += dump_tlb.o | 8 | obj-$(CONFIG_CPU_MIPS64) += dump_tlb.o |
diff --git a/arch/mips/lib-32/csum_partial.S b/arch/mips/lib-32/csum_partial.S deleted file mode 100644 index ea257dbdcc40..000000000000 --- a/arch/mips/lib-32/csum_partial.S +++ /dev/null | |||
@@ -1,240 +0,0 @@ | |||
1 | /* | ||
2 | * This file is subject to the terms and conditions of the GNU General Public | ||
3 | * License. See the file "COPYING" in the main directory of this archive | ||
4 | * for more details. | ||
5 | * | ||
6 | * Copyright (C) 1998 Ralf Baechle | ||
7 | */ | ||
8 | #include <asm/asm.h> | ||
9 | #include <asm/regdef.h> | ||
10 | |||
11 | #define ADDC(sum,reg) \ | ||
12 | addu sum, reg; \ | ||
13 | sltu v1, sum, reg; \ | ||
14 | addu sum, v1 | ||
15 | |||
16 | #define CSUM_BIGCHUNK(src, offset, sum, t0, t1, t2, t3) \ | ||
17 | lw t0, (offset + 0x00)(src); \ | ||
18 | lw t1, (offset + 0x04)(src); \ | ||
19 | lw t2, (offset + 0x08)(src); \ | ||
20 | lw t3, (offset + 0x0c)(src); \ | ||
21 | ADDC(sum, t0); \ | ||
22 | ADDC(sum, t1); \ | ||
23 | ADDC(sum, t2); \ | ||
24 | ADDC(sum, t3); \ | ||
25 | lw t0, (offset + 0x10)(src); \ | ||
26 | lw t1, (offset + 0x14)(src); \ | ||
27 | lw t2, (offset + 0x18)(src); \ | ||
28 | lw t3, (offset + 0x1c)(src); \ | ||
29 | ADDC(sum, t0); \ | ||
30 | ADDC(sum, t1); \ | ||
31 | ADDC(sum, t2); \ | ||
32 | ADDC(sum, t3); \ | ||
33 | |||
34 | /* | ||
35 | * a0: source address | ||
36 | * a1: length of the area to checksum | ||
37 | * a2: partial checksum | ||
38 | */ | ||
39 | |||
40 | #define src a0 | ||
41 | #define dest a1 | ||
42 | #define sum v0 | ||
43 | |||
44 | .text | ||
45 | .set noreorder | ||
46 | |||
47 | /* unknown src alignment and < 8 bytes to go */ | ||
48 | small_csumcpy: | ||
49 | move a1, t2 | ||
50 | |||
51 | andi t0, a1, 4 | ||
52 | beqz t0, 1f | ||
53 | andi t0, a1, 2 | ||
54 | |||
55 | /* Still a full word to go */ | ||
56 | ulw t1, (src) | ||
57 | addiu src, 4 | ||
58 | ADDC(sum, t1) | ||
59 | |||
60 | 1: move t1, zero | ||
61 | beqz t0, 1f | ||
62 | andi t0, a1, 1 | ||
63 | |||
64 | /* Still a halfword to go */ | ||
65 | ulhu t1, (src) | ||
66 | addiu src, 2 | ||
67 | |||
68 | 1: beqz t0, 1f | ||
69 | sll t1, t1, 16 | ||
70 | |||
71 | lbu t2, (src) | ||
72 | nop | ||
73 | |||
74 | #ifdef __MIPSEB__ | ||
75 | sll t2, t2, 8 | ||
76 | #endif | ||
77 | or t1, t2 | ||
78 | |||
79 | 1: ADDC(sum, t1) | ||
80 | |||
81 | /* fold checksum */ | ||
82 | sll v1, sum, 16 | ||
83 | addu sum, v1 | ||
84 | sltu v1, sum, v1 | ||
85 | srl sum, sum, 16 | ||
86 | addu sum, v1 | ||
87 | |||
88 | /* odd buffer alignment? */ | ||
89 | beqz t7, 1f | ||
90 | nop | ||
91 | sll v1, sum, 8 | ||
92 | srl sum, sum, 8 | ||
93 | or sum, v1 | ||
94 | andi sum, 0xffff | ||
95 | 1: | ||
96 | .set reorder | ||
97 | /* Add the passed partial csum. */ | ||
98 | ADDC(sum, a2) | ||
99 | jr ra | ||
100 | .set noreorder | ||
101 | |||
102 | /* ------------------------------------------------------------------------- */ | ||
103 | |||
104 | .align 5 | ||
105 | LEAF(csum_partial) | ||
106 | move sum, zero | ||
107 | move t7, zero | ||
108 | |||
109 | sltiu t8, a1, 0x8 | ||
110 | bnez t8, small_csumcpy /* < 8 bytes to copy */ | ||
111 | move t2, a1 | ||
112 | |||
113 | beqz a1, out | ||
114 | andi t7, src, 0x1 /* odd buffer? */ | ||
115 | |||
116 | hword_align: | ||
117 | beqz t7, word_align | ||
118 | andi t8, src, 0x2 | ||
119 | |||
120 | lbu t0, (src) | ||
121 | subu a1, a1, 0x1 | ||
122 | #ifdef __MIPSEL__ | ||
123 | sll t0, t0, 8 | ||
124 | #endif | ||
125 | ADDC(sum, t0) | ||
126 | addu src, src, 0x1 | ||
127 | andi t8, src, 0x2 | ||
128 | |||
129 | word_align: | ||
130 | beqz t8, dword_align | ||
131 | sltiu t8, a1, 56 | ||
132 | |||
133 | lhu t0, (src) | ||
134 | subu a1, a1, 0x2 | ||
135 | ADDC(sum, t0) | ||
136 | sltiu t8, a1, 56 | ||
137 | addu src, src, 0x2 | ||
138 | |||
139 | dword_align: | ||
140 | bnez t8, do_end_words | ||
141 | move t8, a1 | ||
142 | |||
143 | andi t8, src, 0x4 | ||
144 | beqz t8, qword_align | ||
145 | andi t8, src, 0x8 | ||
146 | |||
147 | lw t0, 0x00(src) | ||
148 | subu a1, a1, 0x4 | ||
149 | ADDC(sum, t0) | ||
150 | addu src, src, 0x4 | ||
151 | andi t8, src, 0x8 | ||
152 | |||
153 | qword_align: | ||
154 | beqz t8, oword_align | ||
155 | andi t8, src, 0x10 | ||
156 | |||
157 | lw t0, 0x00(src) | ||
158 | lw t1, 0x04(src) | ||
159 | subu a1, a1, 0x8 | ||
160 | ADDC(sum, t0) | ||
161 | ADDC(sum, t1) | ||
162 | addu src, src, 0x8 | ||
163 | andi t8, src, 0x10 | ||
164 | |||
165 | oword_align: | ||
166 | beqz t8, begin_movement | ||
167 | srl t8, a1, 0x7 | ||
168 | |||
169 | lw t3, 0x08(src) | ||
170 | lw t4, 0x0c(src) | ||
171 | lw t0, 0x00(src) | ||
172 | lw t1, 0x04(src) | ||
173 | ADDC(sum, t3) | ||
174 | ADDC(sum, t4) | ||
175 | ADDC(sum, t0) | ||
176 | ADDC(sum, t1) | ||
177 | subu a1, a1, 0x10 | ||
178 | addu src, src, 0x10 | ||
179 | srl t8, a1, 0x7 | ||
180 | |||
181 | begin_movement: | ||
182 | beqz t8, 1f | ||
183 | andi t2, a1, 0x40 | ||
184 | |||
185 | move_128bytes: | ||
186 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) | ||
187 | CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) | ||
188 | CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4) | ||
189 | CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4) | ||
190 | subu t8, t8, 0x01 | ||
191 | bnez t8, move_128bytes | ||
192 | addu src, src, 0x80 | ||
193 | |||
194 | 1: | ||
195 | beqz t2, 1f | ||
196 | andi t2, a1, 0x20 | ||
197 | |||
198 | move_64bytes: | ||
199 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) | ||
200 | CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) | ||
201 | addu src, src, 0x40 | ||
202 | |||
203 | 1: | ||
204 | beqz t2, do_end_words | ||
205 | andi t8, a1, 0x1c | ||
206 | |||
207 | move_32bytes: | ||
208 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) | ||
209 | andi t8, a1, 0x1c | ||
210 | addu src, src, 0x20 | ||
211 | |||
212 | do_end_words: | ||
213 | beqz t8, maybe_end_cruft | ||
214 | srl t8, t8, 0x2 | ||
215 | |||
216 | end_words: | ||
217 | lw t0, (src) | ||
218 | subu t8, t8, 0x1 | ||
219 | ADDC(sum, t0) | ||
220 | bnez t8, end_words | ||
221 | addu src, src, 0x4 | ||
222 | |||
223 | maybe_end_cruft: | ||
224 | andi t2, a1, 0x3 | ||
225 | |||
226 | small_memcpy: | ||
227 | j small_csumcpy; move a1, t2 | ||
228 | beqz t2, out | ||
229 | move a1, t2 | ||
230 | |||
231 | end_bytes: | ||
232 | lb t0, (src) | ||
233 | subu a1, a1, 0x1 | ||
234 | bnez a2, end_bytes | ||
235 | addu src, src, 0x1 | ||
236 | |||
237 | out: | ||
238 | jr ra | ||
239 | move v0, sum | ||
240 | END(csum_partial) | ||
diff --git a/arch/mips/lib-64/Makefile b/arch/mips/lib-64/Makefile index ad285786e74b..dcd4d2ed2ac4 100644 --- a/arch/mips/lib-64/Makefile +++ b/arch/mips/lib-64/Makefile | |||
@@ -2,7 +2,7 @@ | |||
2 | # Makefile for MIPS-specific library files.. | 2 | # Makefile for MIPS-specific library files.. |
3 | # | 3 | # |
4 | 4 | ||
5 | lib-y += csum_partial.o memset.o watch.o | 5 | lib-y += memset.o watch.o |
6 | 6 | ||
7 | obj-$(CONFIG_CPU_MIPS32) += dump_tlb.o | 7 | obj-$(CONFIG_CPU_MIPS32) += dump_tlb.o |
8 | obj-$(CONFIG_CPU_MIPS64) += dump_tlb.o | 8 | obj-$(CONFIG_CPU_MIPS64) += dump_tlb.o |
diff --git a/arch/mips/lib-64/csum_partial.S b/arch/mips/lib-64/csum_partial.S deleted file mode 100644 index 25aba660cc9c..000000000000 --- a/arch/mips/lib-64/csum_partial.S +++ /dev/null | |||
@@ -1,242 +0,0 @@ | |||
1 | /* | ||
2 | * This file is subject to the terms and conditions of the GNU General Public | ||
3 | * License. See the file "COPYING" in the main directory of this archive | ||
4 | * for more details. | ||
5 | * | ||
6 | * Quick'n'dirty IP checksum ... | ||
7 | * | ||
8 | * Copyright (C) 1998, 1999 Ralf Baechle | ||
9 | * Copyright (C) 1999 Silicon Graphics, Inc. | ||
10 | */ | ||
11 | #include <asm/asm.h> | ||
12 | #include <asm/regdef.h> | ||
13 | |||
14 | #define ADDC(sum,reg) \ | ||
15 | addu sum, reg; \ | ||
16 | sltu v1, sum, reg; \ | ||
17 | addu sum, v1 | ||
18 | |||
19 | #define CSUM_BIGCHUNK(src, offset, sum, t0, t1, t2, t3) \ | ||
20 | lw t0, (offset + 0x00)(src); \ | ||
21 | lw t1, (offset + 0x04)(src); \ | ||
22 | lw t2, (offset + 0x08)(src); \ | ||
23 | lw t3, (offset + 0x0c)(src); \ | ||
24 | ADDC(sum, t0); \ | ||
25 | ADDC(sum, t1); \ | ||
26 | ADDC(sum, t2); \ | ||
27 | ADDC(sum, t3); \ | ||
28 | lw t0, (offset + 0x10)(src); \ | ||
29 | lw t1, (offset + 0x14)(src); \ | ||
30 | lw t2, (offset + 0x18)(src); \ | ||
31 | lw t3, (offset + 0x1c)(src); \ | ||
32 | ADDC(sum, t0); \ | ||
33 | ADDC(sum, t1); \ | ||
34 | ADDC(sum, t2); \ | ||
35 | ADDC(sum, t3); \ | ||
36 | |||
37 | /* | ||
38 | * a0: source address | ||
39 | * a1: length of the area to checksum | ||
40 | * a2: partial checksum | ||
41 | */ | ||
42 | |||
43 | #define src a0 | ||
44 | #define sum v0 | ||
45 | |||
46 | .text | ||
47 | .set noreorder | ||
48 | |||
49 | /* unknown src alignment and < 8 bytes to go */ | ||
50 | small_csumcpy: | ||
51 | move a1, ta2 | ||
52 | |||
53 | andi ta0, a1, 4 | ||
54 | beqz ta0, 1f | ||
55 | andi ta0, a1, 2 | ||
56 | |||
57 | /* Still a full word to go */ | ||
58 | ulw ta1, (src) | ||
59 | daddiu src, 4 | ||
60 | ADDC(sum, ta1) | ||
61 | |||
62 | 1: move ta1, zero | ||
63 | beqz ta0, 1f | ||
64 | andi ta0, a1, 1 | ||
65 | |||
66 | /* Still a halfword to go */ | ||
67 | ulhu ta1, (src) | ||
68 | daddiu src, 2 | ||
69 | |||
70 | 1: beqz ta0, 1f | ||
71 | sll ta1, ta1, 16 | ||
72 | |||
73 | lbu ta2, (src) | ||
74 | nop | ||
75 | |||
76 | #ifdef __MIPSEB__ | ||
77 | sll ta2, ta2, 8 | ||
78 | #endif | ||
79 | or ta1, ta2 | ||
80 | |||
81 | 1: ADDC(sum, ta1) | ||
82 | |||
83 | /* fold checksum */ | ||
84 | sll v1, sum, 16 | ||
85 | addu sum, v1 | ||
86 | sltu v1, sum, v1 | ||
87 | srl sum, sum, 16 | ||
88 | addu sum, v1 | ||
89 | |||
90 | /* odd buffer alignment? */ | ||
91 | beqz t3, 1f | ||
92 | nop | ||
93 | sll v1, sum, 8 | ||
94 | srl sum, sum, 8 | ||
95 | or sum, v1 | ||
96 | andi sum, 0xffff | ||
97 | 1: | ||
98 | .set reorder | ||
99 | /* Add the passed partial csum. */ | ||
100 | ADDC(sum, a2) | ||
101 | jr ra | ||
102 | .set noreorder | ||
103 | |||
104 | /* ------------------------------------------------------------------------- */ | ||
105 | |||
106 | .align 5 | ||
107 | LEAF(csum_partial) | ||
108 | move sum, zero | ||
109 | move t3, zero | ||
110 | |||
111 | sltiu t8, a1, 0x8 | ||
112 | bnez t8, small_csumcpy /* < 8 bytes to copy */ | ||
113 | move ta2, a1 | ||
114 | |||
115 | beqz a1, out | ||
116 | andi t3, src, 0x1 /* odd buffer? */ | ||
117 | |||
118 | hword_align: | ||
119 | beqz t3, word_align | ||
120 | andi t8, src, 0x2 | ||
121 | |||
122 | lbu ta0, (src) | ||
123 | dsubu a1, a1, 0x1 | ||
124 | #ifdef __MIPSEL__ | ||
125 | sll ta0, ta0, 8 | ||
126 | #endif | ||
127 | ADDC(sum, ta0) | ||
128 | daddu src, src, 0x1 | ||
129 | andi t8, src, 0x2 | ||
130 | |||
131 | word_align: | ||
132 | beqz t8, dword_align | ||
133 | sltiu t8, a1, 56 | ||
134 | |||
135 | lhu ta0, (src) | ||
136 | dsubu a1, a1, 0x2 | ||
137 | ADDC(sum, ta0) | ||
138 | sltiu t8, a1, 56 | ||
139 | daddu src, src, 0x2 | ||
140 | |||
141 | dword_align: | ||
142 | bnez t8, do_end_words | ||
143 | move t8, a1 | ||
144 | |||
145 | andi t8, src, 0x4 | ||
146 | beqz t8, qword_align | ||
147 | andi t8, src, 0x8 | ||
148 | |||
149 | lw ta0, 0x00(src) | ||
150 | dsubu a1, a1, 0x4 | ||
151 | ADDC(sum, ta0) | ||
152 | daddu src, src, 0x4 | ||
153 | andi t8, src, 0x8 | ||
154 | |||
155 | qword_align: | ||
156 | beqz t8, oword_align | ||
157 | andi t8, src, 0x10 | ||
158 | |||
159 | lw ta0, 0x00(src) | ||
160 | lw ta1, 0x04(src) | ||
161 | dsubu a1, a1, 0x8 | ||
162 | ADDC(sum, ta0) | ||
163 | ADDC(sum, ta1) | ||
164 | daddu src, src, 0x8 | ||
165 | andi t8, src, 0x10 | ||
166 | |||
167 | oword_align: | ||
168 | beqz t8, begin_movement | ||
169 | dsrl t8, a1, 0x7 | ||
170 | |||
171 | lw ta3, 0x08(src) | ||
172 | lw t0, 0x0c(src) | ||
173 | lw ta0, 0x00(src) | ||
174 | lw ta1, 0x04(src) | ||
175 | ADDC(sum, ta3) | ||
176 | ADDC(sum, t0) | ||
177 | ADDC(sum, ta0) | ||
178 | ADDC(sum, ta1) | ||
179 | dsubu a1, a1, 0x10 | ||
180 | daddu src, src, 0x10 | ||
181 | dsrl t8, a1, 0x7 | ||
182 | |||
183 | begin_movement: | ||
184 | beqz t8, 1f | ||
185 | andi ta2, a1, 0x40 | ||
186 | |||
187 | move_128bytes: | ||
188 | CSUM_BIGCHUNK(src, 0x00, sum, ta0, ta1, ta3, t0) | ||
189 | CSUM_BIGCHUNK(src, 0x20, sum, ta0, ta1, ta3, t0) | ||
190 | CSUM_BIGCHUNK(src, 0x40, sum, ta0, ta1, ta3, t0) | ||
191 | CSUM_BIGCHUNK(src, 0x60, sum, ta0, ta1, ta3, t0) | ||
192 | dsubu t8, t8, 0x01 | ||
193 | bnez t8, move_128bytes | ||
194 | daddu src, src, 0x80 | ||
195 | |||
196 | 1: | ||
197 | beqz ta2, 1f | ||
198 | andi ta2, a1, 0x20 | ||
199 | |||
200 | move_64bytes: | ||
201 | CSUM_BIGCHUNK(src, 0x00, sum, ta0, ta1, ta3, t0) | ||
202 | CSUM_BIGCHUNK(src, 0x20, sum, ta0, ta1, ta3, t0) | ||
203 | daddu src, src, 0x40 | ||
204 | |||
205 | 1: | ||
206 | beqz ta2, do_end_words | ||
207 | andi t8, a1, 0x1c | ||
208 | |||
209 | move_32bytes: | ||
210 | CSUM_BIGCHUNK(src, 0x00, sum, ta0, ta1, ta3, t0) | ||
211 | andi t8, a1, 0x1c | ||
212 | daddu src, src, 0x20 | ||
213 | |||
214 | do_end_words: | ||
215 | beqz t8, maybe_end_cruft | ||
216 | dsrl t8, t8, 0x2 | ||
217 | |||
218 | end_words: | ||
219 | lw ta0, (src) | ||
220 | dsubu t8, t8, 0x1 | ||
221 | ADDC(sum, ta0) | ||
222 | bnez t8, end_words | ||
223 | daddu src, src, 0x4 | ||
224 | |||
225 | maybe_end_cruft: | ||
226 | andi ta2, a1, 0x3 | ||
227 | |||
228 | small_memcpy: | ||
229 | j small_csumcpy; move a1, ta2 /* XXX ??? */ | ||
230 | beqz t2, out | ||
231 | move a1, ta2 | ||
232 | |||
233 | end_bytes: | ||
234 | lb ta0, (src) | ||
235 | dsubu a1, a1, 0x1 | ||
236 | bnez a2, end_bytes | ||
237 | daddu src, src, 0x1 | ||
238 | |||
239 | out: | ||
240 | jr ra | ||
241 | move v0, sum | ||
242 | END(csum_partial) | ||
diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile index b225543f5302..888b61ea12fe 100644 --- a/arch/mips/lib/Makefile +++ b/arch/mips/lib/Makefile | |||
@@ -2,8 +2,8 @@ | |||
2 | # Makefile for MIPS-specific library files.. | 2 | # Makefile for MIPS-specific library files.. |
3 | # | 3 | # |
4 | 4 | ||
5 | lib-y += csum_partial_copy.o memcpy.o promlib.o strlen_user.o strncpy_user.o \ | 5 | lib-y += csum_partial.o csum_partial_copy.o memcpy.o promlib.o \ |
6 | strnlen_user.o uncached.o | 6 | strlen_user.o strncpy_user.o strnlen_user.o uncached.o |
7 | 7 | ||
8 | obj-y += iomap.o | 8 | obj-y += iomap.o |
9 | 9 | ||
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S new file mode 100644 index 000000000000..15611d9df7ac --- /dev/null +++ b/arch/mips/lib/csum_partial.S | |||
@@ -0,0 +1,258 @@ | |||
1 | /* | ||
2 | * This file is subject to the terms and conditions of the GNU General Public | ||
3 | * License. See the file "COPYING" in the main directory of this archive | ||
4 | * for more details. | ||
5 | * | ||
6 | * Quick'n'dirty IP checksum ... | ||
7 | * | ||
8 | * Copyright (C) 1998, 1999 Ralf Baechle | ||
9 | * Copyright (C) 1999 Silicon Graphics, Inc. | ||
10 | */ | ||
11 | #include <asm/asm.h> | ||
12 | #include <asm/regdef.h> | ||
13 | |||
14 | #ifdef CONFIG_64BIT | ||
15 | #define T0 ta0 | ||
16 | #define T1 ta1 | ||
17 | #define T2 ta2 | ||
18 | #define T3 ta3 | ||
19 | #define T4 t0 | ||
20 | #define T7 t3 | ||
21 | #else | ||
22 | #define T0 t0 | ||
23 | #define T1 t1 | ||
24 | #define T2 t2 | ||
25 | #define T3 t3 | ||
26 | #define T4 t4 | ||
27 | #define T7 t7 | ||
28 | #endif | ||
29 | |||
30 | #define ADDC(sum,reg) \ | ||
31 | addu sum, reg; \ | ||
32 | sltu v1, sum, reg; \ | ||
33 | addu sum, v1 | ||
34 | |||
35 | #define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \ | ||
36 | lw _t0, (offset + 0x00)(src); \ | ||
37 | lw _t1, (offset + 0x04)(src); \ | ||
38 | lw _t2, (offset + 0x08)(src); \ | ||
39 | lw _t3, (offset + 0x0c)(src); \ | ||
40 | ADDC(sum, _t0); \ | ||
41 | ADDC(sum, _t1); \ | ||
42 | ADDC(sum, _t2); \ | ||
43 | ADDC(sum, _t3); \ | ||
44 | lw _t0, (offset + 0x10)(src); \ | ||
45 | lw _t1, (offset + 0x14)(src); \ | ||
46 | lw _t2, (offset + 0x18)(src); \ | ||
47 | lw _t3, (offset + 0x1c)(src); \ | ||
48 | ADDC(sum, _t0); \ | ||
49 | ADDC(sum, _t1); \ | ||
50 | ADDC(sum, _t2); \ | ||
51 | ADDC(sum, _t3); \ | ||
52 | |||
53 | /* | ||
54 | * a0: source address | ||
55 | * a1: length of the area to checksum | ||
56 | * a2: partial checksum | ||
57 | */ | ||
58 | |||
59 | #define src a0 | ||
60 | #define sum v0 | ||
61 | |||
62 | .text | ||
63 | .set noreorder | ||
64 | |||
65 | /* unknown src alignment and < 8 bytes to go */ | ||
66 | small_csumcpy: | ||
67 | move a1, T2 | ||
68 | |||
69 | andi T0, a1, 4 | ||
70 | beqz T0, 1f | ||
71 | andi T0, a1, 2 | ||
72 | |||
73 | /* Still a full word to go */ | ||
74 | ulw T1, (src) | ||
75 | PTR_ADDIU src, 4 | ||
76 | ADDC(sum, T1) | ||
77 | |||
78 | 1: move T1, zero | ||
79 | beqz T0, 1f | ||
80 | andi T0, a1, 1 | ||
81 | |||
82 | /* Still a halfword to go */ | ||
83 | ulhu T1, (src) | ||
84 | PTR_ADDIU src, 2 | ||
85 | |||
86 | 1: beqz T0, 1f | ||
87 | sll T1, T1, 16 | ||
88 | |||
89 | lbu T2, (src) | ||
90 | nop | ||
91 | |||
92 | #ifdef __MIPSEB__ | ||
93 | sll T2, T2, 8 | ||
94 | #endif | ||
95 | or T1, T2 | ||
96 | |||
97 | 1: ADDC(sum, T1) | ||
98 | |||
99 | /* fold checksum */ | ||
100 | sll v1, sum, 16 | ||
101 | addu sum, v1 | ||
102 | sltu v1, sum, v1 | ||
103 | srl sum, sum, 16 | ||
104 | addu sum, v1 | ||
105 | |||
106 | /* odd buffer alignment? */ | ||
107 | beqz T7, 1f | ||
108 | nop | ||
109 | sll v1, sum, 8 | ||
110 | srl sum, sum, 8 | ||
111 | or sum, v1 | ||
112 | andi sum, 0xffff | ||
113 | 1: | ||
114 | .set reorder | ||
115 | /* Add the passed partial csum. */ | ||
116 | ADDC(sum, a2) | ||
117 | jr ra | ||
118 | .set noreorder | ||
119 | |||
120 | /* ------------------------------------------------------------------------- */ | ||
121 | |||
122 | .align 5 | ||
123 | LEAF(csum_partial) | ||
124 | move sum, zero | ||
125 | move T7, zero | ||
126 | |||
127 | sltiu t8, a1, 0x8 | ||
128 | bnez t8, small_csumcpy /* < 8 bytes to copy */ | ||
129 | move T2, a1 | ||
130 | |||
131 | beqz a1, out | ||
132 | andi T7, src, 0x1 /* odd buffer? */ | ||
133 | |||
134 | hword_align: | ||
135 | beqz T7, word_align | ||
136 | andi t8, src, 0x2 | ||
137 | |||
138 | lbu T0, (src) | ||
139 | LONG_SUBU a1, a1, 0x1 | ||
140 | #ifdef __MIPSEL__ | ||
141 | sll T0, T0, 8 | ||
142 | #endif | ||
143 | ADDC(sum, T0) | ||
144 | PTR_ADDU src, src, 0x1 | ||
145 | andi t8, src, 0x2 | ||
146 | |||
147 | word_align: | ||
148 | beqz t8, dword_align | ||
149 | sltiu t8, a1, 56 | ||
150 | |||
151 | lhu T0, (src) | ||
152 | LONG_SUBU a1, a1, 0x2 | ||
153 | ADDC(sum, T0) | ||
154 | sltiu t8, a1, 56 | ||
155 | PTR_ADDU src, src, 0x2 | ||
156 | |||
157 | dword_align: | ||
158 | bnez t8, do_end_words | ||
159 | move t8, a1 | ||
160 | |||
161 | andi t8, src, 0x4 | ||
162 | beqz t8, qword_align | ||
163 | andi t8, src, 0x8 | ||
164 | |||
165 | lw T0, 0x00(src) | ||
166 | LONG_SUBU a1, a1, 0x4 | ||
167 | ADDC(sum, T0) | ||
168 | PTR_ADDU src, src, 0x4 | ||
169 | andi t8, src, 0x8 | ||
170 | |||
171 | qword_align: | ||
172 | beqz t8, oword_align | ||
173 | andi t8, src, 0x10 | ||
174 | |||
175 | lw T0, 0x00(src) | ||
176 | lw T1, 0x04(src) | ||
177 | LONG_SUBU a1, a1, 0x8 | ||
178 | ADDC(sum, T0) | ||
179 | ADDC(sum, T1) | ||
180 | PTR_ADDU src, src, 0x8 | ||
181 | andi t8, src, 0x10 | ||
182 | |||
183 | oword_align: | ||
184 | beqz t8, begin_movement | ||
185 | LONG_SRL t8, a1, 0x7 | ||
186 | |||
187 | lw T3, 0x08(src) | ||
188 | lw T4, 0x0c(src) | ||
189 | lw T0, 0x00(src) | ||
190 | lw T1, 0x04(src) | ||
191 | ADDC(sum, T3) | ||
192 | ADDC(sum, T4) | ||
193 | ADDC(sum, T0) | ||
194 | ADDC(sum, T1) | ||
195 | LONG_SUBU a1, a1, 0x10 | ||
196 | PTR_ADDU src, src, 0x10 | ||
197 | LONG_SRL t8, a1, 0x7 | ||
198 | |||
199 | begin_movement: | ||
200 | beqz t8, 1f | ||
201 | andi T2, a1, 0x40 | ||
202 | |||
203 | move_128bytes: | ||
204 | CSUM_BIGCHUNK(src, 0x00, sum, T0, T1, T3, T4) | ||
205 | CSUM_BIGCHUNK(src, 0x20, sum, T0, T1, T3, T4) | ||
206 | CSUM_BIGCHUNK(src, 0x40, sum, T0, T1, T3, T4) | ||
207 | CSUM_BIGCHUNK(src, 0x60, sum, T0, T1, T3, T4) | ||
208 | LONG_SUBU t8, t8, 0x01 | ||
209 | bnez t8, move_128bytes | ||
210 | PTR_ADDU src, src, 0x80 | ||
211 | |||
212 | 1: | ||
213 | beqz T2, 1f | ||
214 | andi T2, a1, 0x20 | ||
215 | |||
216 | move_64bytes: | ||
217 | CSUM_BIGCHUNK(src, 0x00, sum, T0, T1, T3, T4) | ||
218 | CSUM_BIGCHUNK(src, 0x20, sum, T0, T1, T3, T4) | ||
219 | PTR_ADDU src, src, 0x40 | ||
220 | |||
221 | 1: | ||
222 | beqz T2, do_end_words | ||
223 | andi t8, a1, 0x1c | ||
224 | |||
225 | move_32bytes: | ||
226 | CSUM_BIGCHUNK(src, 0x00, sum, T0, T1, T3, T4) | ||
227 | andi t8, a1, 0x1c | ||
228 | PTR_ADDU src, src, 0x20 | ||
229 | |||
230 | do_end_words: | ||
231 | beqz t8, maybe_end_cruft | ||
232 | LONG_SRL t8, t8, 0x2 | ||
233 | |||
234 | end_words: | ||
235 | lw T0, (src) | ||
236 | LONG_SUBU t8, t8, 0x1 | ||
237 | ADDC(sum, T0) | ||
238 | bnez t8, end_words | ||
239 | PTR_ADDU src, src, 0x4 | ||
240 | |||
241 | maybe_end_cruft: | ||
242 | andi T2, a1, 0x3 | ||
243 | |||
244 | small_memcpy: | ||
245 | j small_csumcpy; move a1, T2 /* XXX ??? */ | ||
246 | beqz t2, out | ||
247 | move a1, T2 | ||
248 | |||
249 | end_bytes: | ||
250 | lb T0, (src) | ||
251 | LONG_SUBU a1, a1, 0x1 | ||
252 | bnez a2, end_bytes | ||
253 | PTR_ADDU src, src, 0x1 | ||
254 | |||
255 | out: | ||
256 | jr ra | ||
257 | move v0, sum | ||
258 | END(csum_partial) | ||