aboutsummaryrefslogtreecommitdiffstats
path: root/arch/mips/lib-32
diff options
context:
space:
mode:
authorAtsushi Nemoto <anemo@mba.ocn.ne.jp>2006-12-03 10:42:59 -0500
committerRalf Baechle <ralf@linux-mips.org>2006-12-04 17:43:13 -0500
commit0bcdda0f3a87ed684d46841b6069409e39c4af65 (patch)
tree52df8fc8d148b98188c09152bcf6e55c55a98c2a /arch/mips/lib-32
parent03dbd2e0b1dcb24d63f64aa234cce3a7fabed328 (diff)
[MIPS] Unify csum_partial.S
The 32-bit version and 64-bit version are almost equal. Unify them. This makes further improvements (for example, copying with parallel, supporting PREFETCH, etc.) easier. Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp> Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Diffstat (limited to 'arch/mips/lib-32')
-rw-r--r--arch/mips/lib-32/Makefile2
-rw-r--r--arch/mips/lib-32/csum_partial.S240
2 files changed, 1 insertions, 241 deletions
diff --git a/arch/mips/lib-32/Makefile b/arch/mips/lib-32/Makefile
index ad285786e74b..dcd4d2ed2ac4 100644
--- a/arch/mips/lib-32/Makefile
+++ b/arch/mips/lib-32/Makefile
@@ -2,7 +2,7 @@
2# Makefile for MIPS-specific library files.. 2# Makefile for MIPS-specific library files..
3# 3#
4 4
5lib-y += csum_partial.o memset.o watch.o 5lib-y += memset.o watch.o
6 6
7obj-$(CONFIG_CPU_MIPS32) += dump_tlb.o 7obj-$(CONFIG_CPU_MIPS32) += dump_tlb.o
8obj-$(CONFIG_CPU_MIPS64) += dump_tlb.o 8obj-$(CONFIG_CPU_MIPS64) += dump_tlb.o
diff --git a/arch/mips/lib-32/csum_partial.S b/arch/mips/lib-32/csum_partial.S
deleted file mode 100644
index ea257dbdcc40..000000000000
--- a/arch/mips/lib-32/csum_partial.S
+++ /dev/null
@@ -1,240 +0,0 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 1998 Ralf Baechle
7 */
8#include <asm/asm.h>
9#include <asm/regdef.h>
10
11#define ADDC(sum,reg) \
12 addu sum, reg; \
13 sltu v1, sum, reg; \
14 addu sum, v1
15
16#define CSUM_BIGCHUNK(src, offset, sum, t0, t1, t2, t3) \
17 lw t0, (offset + 0x00)(src); \
18 lw t1, (offset + 0x04)(src); \
19 lw t2, (offset + 0x08)(src); \
20 lw t3, (offset + 0x0c)(src); \
21 ADDC(sum, t0); \
22 ADDC(sum, t1); \
23 ADDC(sum, t2); \
24 ADDC(sum, t3); \
25 lw t0, (offset + 0x10)(src); \
26 lw t1, (offset + 0x14)(src); \
27 lw t2, (offset + 0x18)(src); \
28 lw t3, (offset + 0x1c)(src); \
29 ADDC(sum, t0); \
30 ADDC(sum, t1); \
31 ADDC(sum, t2); \
32 ADDC(sum, t3); \
33
34/*
35 * a0: source address
36 * a1: length of the area to checksum
37 * a2: partial checksum
38 */
39
40#define src a0
41#define dest a1
42#define sum v0
43
44 .text
45 .set noreorder
46
47/* unknown src alignment and < 8 bytes to go */
48small_csumcpy:
49 move a1, t2
50
51 andi t0, a1, 4
52 beqz t0, 1f
53 andi t0, a1, 2
54
55 /* Still a full word to go */
56 ulw t1, (src)
57 addiu src, 4
58 ADDC(sum, t1)
59
601: move t1, zero
61 beqz t0, 1f
62 andi t0, a1, 1
63
64 /* Still a halfword to go */
65 ulhu t1, (src)
66 addiu src, 2
67
681: beqz t0, 1f
69 sll t1, t1, 16
70
71 lbu t2, (src)
72 nop
73
74#ifdef __MIPSEB__
75 sll t2, t2, 8
76#endif
77 or t1, t2
78
791: ADDC(sum, t1)
80
81 /* fold checksum */
82 sll v1, sum, 16
83 addu sum, v1
84 sltu v1, sum, v1
85 srl sum, sum, 16
86 addu sum, v1
87
88 /* odd buffer alignment? */
89 beqz t7, 1f
90 nop
91 sll v1, sum, 8
92 srl sum, sum, 8
93 or sum, v1
94 andi sum, 0xffff
951:
96 .set reorder
97 /* Add the passed partial csum. */
98 ADDC(sum, a2)
99 jr ra
100 .set noreorder
101
102/* ------------------------------------------------------------------------- */
103
104 .align 5
105LEAF(csum_partial)
106 move sum, zero
107 move t7, zero
108
109 sltiu t8, a1, 0x8
110 bnez t8, small_csumcpy /* < 8 bytes to copy */
111 move t2, a1
112
113 beqz a1, out
114 andi t7, src, 0x1 /* odd buffer? */
115
116hword_align:
117 beqz t7, word_align
118 andi t8, src, 0x2
119
120 lbu t0, (src)
121 subu a1, a1, 0x1
122#ifdef __MIPSEL__
123 sll t0, t0, 8
124#endif
125 ADDC(sum, t0)
126 addu src, src, 0x1
127 andi t8, src, 0x2
128
129word_align:
130 beqz t8, dword_align
131 sltiu t8, a1, 56
132
133 lhu t0, (src)
134 subu a1, a1, 0x2
135 ADDC(sum, t0)
136 sltiu t8, a1, 56
137 addu src, src, 0x2
138
139dword_align:
140 bnez t8, do_end_words
141 move t8, a1
142
143 andi t8, src, 0x4
144 beqz t8, qword_align
145 andi t8, src, 0x8
146
147 lw t0, 0x00(src)
148 subu a1, a1, 0x4
149 ADDC(sum, t0)
150 addu src, src, 0x4
151 andi t8, src, 0x8
152
153qword_align:
154 beqz t8, oword_align
155 andi t8, src, 0x10
156
157 lw t0, 0x00(src)
158 lw t1, 0x04(src)
159 subu a1, a1, 0x8
160 ADDC(sum, t0)
161 ADDC(sum, t1)
162 addu src, src, 0x8
163 andi t8, src, 0x10
164
165oword_align:
166 beqz t8, begin_movement
167 srl t8, a1, 0x7
168
169 lw t3, 0x08(src)
170 lw t4, 0x0c(src)
171 lw t0, 0x00(src)
172 lw t1, 0x04(src)
173 ADDC(sum, t3)
174 ADDC(sum, t4)
175 ADDC(sum, t0)
176 ADDC(sum, t1)
177 subu a1, a1, 0x10
178 addu src, src, 0x10
179 srl t8, a1, 0x7
180
181begin_movement:
182 beqz t8, 1f
183 andi t2, a1, 0x40
184
185move_128bytes:
186 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
187 CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
188 CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
189 CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4)
190 subu t8, t8, 0x01
191 bnez t8, move_128bytes
192 addu src, src, 0x80
193
1941:
195 beqz t2, 1f
196 andi t2, a1, 0x20
197
198move_64bytes:
199 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
200 CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
201 addu src, src, 0x40
202
2031:
204 beqz t2, do_end_words
205 andi t8, a1, 0x1c
206
207move_32bytes:
208 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
209 andi t8, a1, 0x1c
210 addu src, src, 0x20
211
212do_end_words:
213 beqz t8, maybe_end_cruft
214 srl t8, t8, 0x2
215
216end_words:
217 lw t0, (src)
218 subu t8, t8, 0x1
219 ADDC(sum, t0)
220 bnez t8, end_words
221 addu src, src, 0x4
222
223maybe_end_cruft:
224 andi t2, a1, 0x3
225
226small_memcpy:
227 j small_csumcpy; move a1, t2
228 beqz t2, out
229 move a1, t2
230
231end_bytes:
232 lb t0, (src)
233 subu a1, a1, 0x1
234 bnez a2, end_bytes
235 addu src, src, 0x1
236
237out:
238 jr ra
239 move v0, sum
240 END(csum_partial)