aboutsummaryrefslogtreecommitdiffstats
path: root/arch/mips/lib/csum_partial.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/mips/lib/csum_partial.S')
-rw-r--r--arch/mips/lib/csum_partial.S293
1 files changed, 154 insertions, 139 deletions
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
index 15611d9df7ac..9db357294be1 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -12,43 +12,66 @@
12#include <asm/regdef.h> 12#include <asm/regdef.h>
13 13
14#ifdef CONFIG_64BIT 14#ifdef CONFIG_64BIT
15#define T0 ta0 15/*
16#define T1 ta1 16 * As we are sharing code base with the mips32 tree (which use the o32 ABI
17#define T2 ta2 17 * register definitions). We need to redefine the register definitions from
18#define T3 ta3 18 * the n64 ABI register naming to the o32 ABI register naming.
19#define T4 t0 19 */
20#define T7 t3 20#undef t0
21#else 21#undef t1
22#define T0 t0 22#undef t2
23#define T1 t1 23#undef t3
24#define T2 t2 24#define t0 $8
25#define T3 t3 25#define t1 $9
26#define T4 t4 26#define t2 $10
27#define T7 t7 27#define t3 $11
28#define t4 $12
29#define t5 $13
30#define t6 $14
31#define t7 $15
32
33#define USE_DOUBLE
28#endif 34#endif
29 35
36#ifdef USE_DOUBLE
37
38#define LOAD ld
39#define ADD daddu
40#define NBYTES 8
41
42#else
43
44#define LOAD lw
45#define ADD addu
46#define NBYTES 4
47
48#endif /* USE_DOUBLE */
49
50#define UNIT(unit) ((unit)*NBYTES)
51
30#define ADDC(sum,reg) \ 52#define ADDC(sum,reg) \
31 addu sum, reg; \ 53 ADD sum, reg; \
32 sltu v1, sum, reg; \ 54 sltu v1, sum, reg; \
33 addu sum, v1 55 ADD sum, v1
34 56
35#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \ 57#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \
36 lw _t0, (offset + 0x00)(src); \ 58 LOAD _t0, (offset + UNIT(0))(src); \
37 lw _t1, (offset + 0x04)(src); \ 59 LOAD _t1, (offset + UNIT(1))(src); \
38 lw _t2, (offset + 0x08)(src); \ 60 LOAD _t2, (offset + UNIT(2))(src); \
39 lw _t3, (offset + 0x0c)(src); \ 61 LOAD _t3, (offset + UNIT(3))(src); \
40 ADDC(sum, _t0); \
41 ADDC(sum, _t1); \
42 ADDC(sum, _t2); \
43 ADDC(sum, _t3); \
44 lw _t0, (offset + 0x10)(src); \
45 lw _t1, (offset + 0x14)(src); \
46 lw _t2, (offset + 0x18)(src); \
47 lw _t3, (offset + 0x1c)(src); \
48 ADDC(sum, _t0); \ 62 ADDC(sum, _t0); \
49 ADDC(sum, _t1); \ 63 ADDC(sum, _t1); \
50 ADDC(sum, _t2); \ 64 ADDC(sum, _t2); \
51 ADDC(sum, _t3); \ 65 ADDC(sum, _t3)
66
67#ifdef USE_DOUBLE
68#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \
69 CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3)
70#else
71#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \
72 CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3); \
73 CSUM_BIGCHUNK1(src, offset + 0x10, sum, _t0, _t1, _t2, _t3)
74#endif
52 75
53/* 76/*
54 * a0: source address 77 * a0: source address
@@ -61,86 +84,27 @@
61 84
62 .text 85 .text
63 .set noreorder 86 .set noreorder
64
65/* unknown src alignment and < 8 bytes to go */
66small_csumcpy:
67 move a1, T2
68
69 andi T0, a1, 4
70 beqz T0, 1f
71 andi T0, a1, 2
72
73 /* Still a full word to go */
74 ulw T1, (src)
75 PTR_ADDIU src, 4
76 ADDC(sum, T1)
77
781: move T1, zero
79 beqz T0, 1f
80 andi T0, a1, 1
81
82 /* Still a halfword to go */
83 ulhu T1, (src)
84 PTR_ADDIU src, 2
85
861: beqz T0, 1f
87 sll T1, T1, 16
88
89 lbu T2, (src)
90 nop
91
92#ifdef __MIPSEB__
93 sll T2, T2, 8
94#endif
95 or T1, T2
96
971: ADDC(sum, T1)
98
99 /* fold checksum */
100 sll v1, sum, 16
101 addu sum, v1
102 sltu v1, sum, v1
103 srl sum, sum, 16
104 addu sum, v1
105
106 /* odd buffer alignment? */
107 beqz T7, 1f
108 nop
109 sll v1, sum, 8
110 srl sum, sum, 8
111 or sum, v1
112 andi sum, 0xffff
1131:
114 .set reorder
115 /* Add the passed partial csum. */
116 ADDC(sum, a2)
117 jr ra
118 .set noreorder
119
120/* ------------------------------------------------------------------------- */
121
122 .align 5 87 .align 5
123LEAF(csum_partial) 88LEAF(csum_partial)
124 move sum, zero 89 move sum, zero
125 move T7, zero 90 move t7, zero
126 91
127 sltiu t8, a1, 0x8 92 sltiu t8, a1, 0x8
128 bnez t8, small_csumcpy /* < 8 bytes to copy */ 93 bnez t8, small_csumcpy /* < 8 bytes to copy */
129 move T2, a1 94 move t2, a1
130 95
131 beqz a1, out 96 andi t7, src, 0x1 /* odd buffer? */
132 andi T7, src, 0x1 /* odd buffer? */
133 97
134hword_align: 98hword_align:
135 beqz T7, word_align 99 beqz t7, word_align
136 andi t8, src, 0x2 100 andi t8, src, 0x2
137 101
138 lbu T0, (src) 102 lbu t0, (src)
139 LONG_SUBU a1, a1, 0x1 103 LONG_SUBU a1, a1, 0x1
140#ifdef __MIPSEL__ 104#ifdef __MIPSEL__
141 sll T0, T0, 8 105 sll t0, t0, 8
142#endif 106#endif
143 ADDC(sum, T0) 107 ADDC(sum, t0)
144 PTR_ADDU src, src, 0x1 108 PTR_ADDU src, src, 0x1
145 andi t8, src, 0x2 109 andi t8, src, 0x2
146 110
@@ -148,9 +112,9 @@ word_align:
148 beqz t8, dword_align 112 beqz t8, dword_align
149 sltiu t8, a1, 56 113 sltiu t8, a1, 56
150 114
151 lhu T0, (src) 115 lhu t0, (src)
152 LONG_SUBU a1, a1, 0x2 116 LONG_SUBU a1, a1, 0x2
153 ADDC(sum, T0) 117 ADDC(sum, t0)
154 sltiu t8, a1, 56 118 sltiu t8, a1, 56
155 PTR_ADDU src, src, 0x2 119 PTR_ADDU src, src, 0x2
156 120
@@ -162,9 +126,9 @@ dword_align:
162 beqz t8, qword_align 126 beqz t8, qword_align
163 andi t8, src, 0x8 127 andi t8, src, 0x8
164 128
165 lw T0, 0x00(src) 129 lw t0, 0x00(src)
166 LONG_SUBU a1, a1, 0x4 130 LONG_SUBU a1, a1, 0x4
167 ADDC(sum, T0) 131 ADDC(sum, t0)
168 PTR_ADDU src, src, 0x4 132 PTR_ADDU src, src, 0x4
169 andi t8, src, 0x8 133 andi t8, src, 0x8
170 134
@@ -172,11 +136,17 @@ qword_align:
172 beqz t8, oword_align 136 beqz t8, oword_align
173 andi t8, src, 0x10 137 andi t8, src, 0x10
174 138
175 lw T0, 0x00(src) 139#ifdef USE_DOUBLE
176 lw T1, 0x04(src) 140 ld t0, 0x00(src)
141 LONG_SUBU a1, a1, 0x8
142 ADDC(sum, t0)
143#else
144 lw t0, 0x00(src)
145 lw t1, 0x04(src)
177 LONG_SUBU a1, a1, 0x8 146 LONG_SUBU a1, a1, 0x8
178 ADDC(sum, T0) 147 ADDC(sum, t0)
179 ADDC(sum, T1) 148 ADDC(sum, t1)
149#endif
180 PTR_ADDU src, src, 0x8 150 PTR_ADDU src, src, 0x8
181 andi t8, src, 0x10 151 andi t8, src, 0x10
182 152
@@ -184,75 +154,120 @@ oword_align:
184 beqz t8, begin_movement 154 beqz t8, begin_movement
185 LONG_SRL t8, a1, 0x7 155 LONG_SRL t8, a1, 0x7
186 156
187 lw T3, 0x08(src) 157#ifdef USE_DOUBLE
188 lw T4, 0x0c(src) 158 ld t0, 0x00(src)
189 lw T0, 0x00(src) 159 ld t1, 0x08(src)
190 lw T1, 0x04(src) 160 ADDC(sum, t0)
191 ADDC(sum, T3) 161 ADDC(sum, t1)
192 ADDC(sum, T4) 162#else
193 ADDC(sum, T0) 163 CSUM_BIGCHUNK1(src, 0x00, sum, t0, t1, t3, t4)
194 ADDC(sum, T1) 164#endif
195 LONG_SUBU a1, a1, 0x10 165 LONG_SUBU a1, a1, 0x10
196 PTR_ADDU src, src, 0x10 166 PTR_ADDU src, src, 0x10
197 LONG_SRL t8, a1, 0x7 167 LONG_SRL t8, a1, 0x7
198 168
199begin_movement: 169begin_movement:
200 beqz t8, 1f 170 beqz t8, 1f
201 andi T2, a1, 0x40 171 andi t2, a1, 0x40
202 172
203move_128bytes: 173move_128bytes:
204 CSUM_BIGCHUNK(src, 0x00, sum, T0, T1, T3, T4) 174 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
205 CSUM_BIGCHUNK(src, 0x20, sum, T0, T1, T3, T4) 175 CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
206 CSUM_BIGCHUNK(src, 0x40, sum, T0, T1, T3, T4) 176 CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
207 CSUM_BIGCHUNK(src, 0x60, sum, T0, T1, T3, T4) 177 CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4)
208 LONG_SUBU t8, t8, 0x01 178 LONG_SUBU t8, t8, 0x01
209 bnez t8, move_128bytes 179 bnez t8, move_128bytes
210 PTR_ADDU src, src, 0x80 180 PTR_ADDU src, src, 0x80
211 181
2121: 1821:
213 beqz T2, 1f 183 beqz t2, 1f
214 andi T2, a1, 0x20 184 andi t2, a1, 0x20
215 185
216move_64bytes: 186move_64bytes:
217 CSUM_BIGCHUNK(src, 0x00, sum, T0, T1, T3, T4) 187 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
218 CSUM_BIGCHUNK(src, 0x20, sum, T0, T1, T3, T4) 188 CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
219 PTR_ADDU src, src, 0x40 189 PTR_ADDU src, src, 0x40
220 190
2211: 1911:
222 beqz T2, do_end_words 192 beqz t2, do_end_words
223 andi t8, a1, 0x1c 193 andi t8, a1, 0x1c
224 194
225move_32bytes: 195move_32bytes:
226 CSUM_BIGCHUNK(src, 0x00, sum, T0, T1, T3, T4) 196 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
227 andi t8, a1, 0x1c 197 andi t8, a1, 0x1c
228 PTR_ADDU src, src, 0x20 198 PTR_ADDU src, src, 0x20
229 199
230do_end_words: 200do_end_words:
231 beqz t8, maybe_end_cruft 201 beqz t8, small_csumcpy
232 LONG_SRL t8, t8, 0x2 202 andi t2, a1, 0x3
203 LONG_SRL t8, t8, 0x2
233 204
234end_words: 205end_words:
235 lw T0, (src) 206 lw t0, (src)
236 LONG_SUBU t8, t8, 0x1 207 LONG_SUBU t8, t8, 0x1
237 ADDC(sum, T0) 208 ADDC(sum, t0)
238 bnez t8, end_words 209 bnez t8, end_words
239 PTR_ADDU src, src, 0x4 210 PTR_ADDU src, src, 0x4
240 211
241maybe_end_cruft: 212/* unknown src alignment and < 8 bytes to go */
242 andi T2, a1, 0x3 213small_csumcpy:
214 move a1, t2
243 215
244small_memcpy: 216 andi t0, a1, 4
245 j small_csumcpy; move a1, T2 /* XXX ??? */ 217 beqz t0, 1f
246 beqz t2, out 218 andi t0, a1, 2
247 move a1, T2
248 219
249end_bytes: 220 /* Still a full word to go */
250 lb T0, (src) 221 ulw t1, (src)
251 LONG_SUBU a1, a1, 0x1 222 PTR_ADDIU src, 4
252 bnez a2, end_bytes 223 ADDC(sum, t1)
253 PTR_ADDU src, src, 0x1 224
2251: move t1, zero
226 beqz t0, 1f
227 andi t0, a1, 1
228
229 /* Still a halfword to go */
230 ulhu t1, (src)
231 PTR_ADDIU src, 2
232
2331: beqz t0, 1f
234 sll t1, t1, 16
235
236 lbu t2, (src)
237 nop
254 238
255out: 239#ifdef __MIPSEB__
240 sll t2, t2, 8
241#endif
242 or t1, t2
243
2441: ADDC(sum, t1)
245
246 /* fold checksum */
247#ifdef USE_DOUBLE
248 dsll32 v1, sum, 0
249 daddu sum, v1
250 sltu v1, sum, v1
251 dsra32 sum, sum, 0
252 addu sum, v1
253#endif
254 sll v1, sum, 16
255 addu sum, v1
256 sltu v1, sum, v1
257 srl sum, sum, 16
258 addu sum, v1
259
260 /* odd buffer alignment? */
261 beqz t7, 1f
262 nop
263 sll v1, sum, 8
264 srl sum, sum, 8
265 or sum, v1
266 andi sum, 0xffff
2671:
268 .set reorder
269 /* Add the passed partial csum. */
270 ADDC(sum, a2)
256 jr ra 271 jr ra
257 move v0, sum 272 .set noreorder
258 END(csum_partial) 273 END(csum_partial)