aboutsummaryrefslogtreecommitdiffstats
path: root/arch/mips/lib-64
diff options
context:
space:
mode:
authorAtsushi Nemoto <anemo@mba.ocn.ne.jp>2006-12-03 10:42:59 -0500
committerRalf Baechle <ralf@linux-mips.org>2006-12-04 17:43:13 -0500
commit0bcdda0f3a87ed684d46841b6069409e39c4af65 (patch)
tree52df8fc8d148b98188c09152bcf6e55c55a98c2a /arch/mips/lib-64
parent03dbd2e0b1dcb24d63f64aa234cce3a7fabed328 (diff)
[MIPS] Unify csum_partial.S
The 32-bit version and 64-bit version are almost equal. Unify them. This makes further improvements (for example, copying with parallel, supporting PREFETCH, etc.) easier. Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp> Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Diffstat (limited to 'arch/mips/lib-64')
-rw-r--r--arch/mips/lib-64/Makefile2
-rw-r--r--arch/mips/lib-64/csum_partial.S242
2 files changed, 1 insertions, 243 deletions
diff --git a/arch/mips/lib-64/Makefile b/arch/mips/lib-64/Makefile
index ad285786e74b..dcd4d2ed2ac4 100644
--- a/arch/mips/lib-64/Makefile
+++ b/arch/mips/lib-64/Makefile
@@ -2,7 +2,7 @@
2# Makefile for MIPS-specific library files.. 2# Makefile for MIPS-specific library files..
3# 3#
4 4
5lib-y += csum_partial.o memset.o watch.o 5lib-y += memset.o watch.o
6 6
7obj-$(CONFIG_CPU_MIPS32) += dump_tlb.o 7obj-$(CONFIG_CPU_MIPS32) += dump_tlb.o
8obj-$(CONFIG_CPU_MIPS64) += dump_tlb.o 8obj-$(CONFIG_CPU_MIPS64) += dump_tlb.o
diff --git a/arch/mips/lib-64/csum_partial.S b/arch/mips/lib-64/csum_partial.S
deleted file mode 100644
index 25aba660cc9c..000000000000
--- a/arch/mips/lib-64/csum_partial.S
+++ /dev/null
@@ -1,242 +0,0 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Quick'n'dirty IP checksum ...
7 *
8 * Copyright (C) 1998, 1999 Ralf Baechle
9 * Copyright (C) 1999 Silicon Graphics, Inc.
10 */
11#include <asm/asm.h>
12#include <asm/regdef.h>
13
14#define ADDC(sum,reg) \
15 addu sum, reg; \
16 sltu v1, sum, reg; \
17 addu sum, v1
18
19#define CSUM_BIGCHUNK(src, offset, sum, t0, t1, t2, t3) \
20 lw t0, (offset + 0x00)(src); \
21 lw t1, (offset + 0x04)(src); \
22 lw t2, (offset + 0x08)(src); \
23 lw t3, (offset + 0x0c)(src); \
24 ADDC(sum, t0); \
25 ADDC(sum, t1); \
26 ADDC(sum, t2); \
27 ADDC(sum, t3); \
28 lw t0, (offset + 0x10)(src); \
29 lw t1, (offset + 0x14)(src); \
30 lw t2, (offset + 0x18)(src); \
31 lw t3, (offset + 0x1c)(src); \
32 ADDC(sum, t0); \
33 ADDC(sum, t1); \
34 ADDC(sum, t2); \
35 ADDC(sum, t3); \
36
37/*
38 * a0: source address
39 * a1: length of the area to checksum
40 * a2: partial checksum
41 */
42
43#define src a0
44#define sum v0
45
46 .text
47 .set noreorder
48
49/* unknown src alignment and < 8 bytes to go */
50small_csumcpy:
51 move a1, ta2
52
53 andi ta0, a1, 4
54 beqz ta0, 1f
55 andi ta0, a1, 2
56
57 /* Still a full word to go */
58 ulw ta1, (src)
59 daddiu src, 4
60 ADDC(sum, ta1)
61
621: move ta1, zero
63 beqz ta0, 1f
64 andi ta0, a1, 1
65
66 /* Still a halfword to go */
67 ulhu ta1, (src)
68 daddiu src, 2
69
701: beqz ta0, 1f
71 sll ta1, ta1, 16
72
73 lbu ta2, (src)
74 nop
75
76#ifdef __MIPSEB__
77 sll ta2, ta2, 8
78#endif
79 or ta1, ta2
80
811: ADDC(sum, ta1)
82
83 /* fold checksum */
84 sll v1, sum, 16
85 addu sum, v1
86 sltu v1, sum, v1
87 srl sum, sum, 16
88 addu sum, v1
89
90 /* odd buffer alignment? */
91 beqz t3, 1f
92 nop
93 sll v1, sum, 8
94 srl sum, sum, 8
95 or sum, v1
96 andi sum, 0xffff
971:
98 .set reorder
99 /* Add the passed partial csum. */
100 ADDC(sum, a2)
101 jr ra
102 .set noreorder
103
104/* ------------------------------------------------------------------------- */
105
106 .align 5
107LEAF(csum_partial)
108 move sum, zero
109 move t3, zero
110
111 sltiu t8, a1, 0x8
112 bnez t8, small_csumcpy /* < 8 bytes to copy */
113 move ta2, a1
114
115 beqz a1, out
116 andi t3, src, 0x1 /* odd buffer? */
117
118hword_align:
119 beqz t3, word_align
120 andi t8, src, 0x2
121
122 lbu ta0, (src)
123 dsubu a1, a1, 0x1
124#ifdef __MIPSEL__
125 sll ta0, ta0, 8
126#endif
127 ADDC(sum, ta0)
128 daddu src, src, 0x1
129 andi t8, src, 0x2
130
131word_align:
132 beqz t8, dword_align
133 sltiu t8, a1, 56
134
135 lhu ta0, (src)
136 dsubu a1, a1, 0x2
137 ADDC(sum, ta0)
138 sltiu t8, a1, 56
139 daddu src, src, 0x2
140
141dword_align:
142 bnez t8, do_end_words
143 move t8, a1
144
145 andi t8, src, 0x4
146 beqz t8, qword_align
147 andi t8, src, 0x8
148
149 lw ta0, 0x00(src)
150 dsubu a1, a1, 0x4
151 ADDC(sum, ta0)
152 daddu src, src, 0x4
153 andi t8, src, 0x8
154
155qword_align:
156 beqz t8, oword_align
157 andi t8, src, 0x10
158
159 lw ta0, 0x00(src)
160 lw ta1, 0x04(src)
161 dsubu a1, a1, 0x8
162 ADDC(sum, ta0)
163 ADDC(sum, ta1)
164 daddu src, src, 0x8
165 andi t8, src, 0x10
166
167oword_align:
168 beqz t8, begin_movement
169 dsrl t8, a1, 0x7
170
171 lw ta3, 0x08(src)
172 lw t0, 0x0c(src)
173 lw ta0, 0x00(src)
174 lw ta1, 0x04(src)
175 ADDC(sum, ta3)
176 ADDC(sum, t0)
177 ADDC(sum, ta0)
178 ADDC(sum, ta1)
179 dsubu a1, a1, 0x10
180 daddu src, src, 0x10
181 dsrl t8, a1, 0x7
182
183begin_movement:
184 beqz t8, 1f
185 andi ta2, a1, 0x40
186
187move_128bytes:
188 CSUM_BIGCHUNK(src, 0x00, sum, ta0, ta1, ta3, t0)
189 CSUM_BIGCHUNK(src, 0x20, sum, ta0, ta1, ta3, t0)
190 CSUM_BIGCHUNK(src, 0x40, sum, ta0, ta1, ta3, t0)
191 CSUM_BIGCHUNK(src, 0x60, sum, ta0, ta1, ta3, t0)
192 dsubu t8, t8, 0x01
193 bnez t8, move_128bytes
194 daddu src, src, 0x80
195
1961:
197 beqz ta2, 1f
198 andi ta2, a1, 0x20
199
200move_64bytes:
201 CSUM_BIGCHUNK(src, 0x00, sum, ta0, ta1, ta3, t0)
202 CSUM_BIGCHUNK(src, 0x20, sum, ta0, ta1, ta3, t0)
203 daddu src, src, 0x40
204
2051:
206 beqz ta2, do_end_words
207 andi t8, a1, 0x1c
208
209move_32bytes:
210 CSUM_BIGCHUNK(src, 0x00, sum, ta0, ta1, ta3, t0)
211 andi t8, a1, 0x1c
212 daddu src, src, 0x20
213
214do_end_words:
215 beqz t8, maybe_end_cruft
216 dsrl t8, t8, 0x2
217
218end_words:
219 lw ta0, (src)
220 dsubu t8, t8, 0x1
221 ADDC(sum, ta0)
222 bnez t8, end_words
223 daddu src, src, 0x4
224
225maybe_end_cruft:
226 andi ta2, a1, 0x3
227
228small_memcpy:
229 j small_csumcpy; move a1, ta2 /* XXX ??? */
230 beqz t2, out
231 move a1, ta2
232
233end_bytes:
234 lb ta0, (src)
235 dsubu a1, a1, 0x1
236 bnez a2, end_bytes
237 daddu src, src, 0x1
238
239out:
240 jr ra
241 move v0, sum
242 END(csum_partial)