diff options
Diffstat (limited to 'arch/mips/lib/csum_partial.S')
-rw-r--r-- | arch/mips/lib/csum_partial.S | 72 |
1 files changed, 38 insertions, 34 deletions
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index 8d7784122c14..6b876ca299ee 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S | |||
@@ -39,12 +39,14 @@ | |||
39 | #ifdef USE_DOUBLE | 39 | #ifdef USE_DOUBLE |
40 | 40 | ||
41 | #define LOAD ld | 41 | #define LOAD ld |
42 | #define LOAD32 lwu | ||
42 | #define ADD daddu | 43 | #define ADD daddu |
43 | #define NBYTES 8 | 44 | #define NBYTES 8 |
44 | 45 | ||
45 | #else | 46 | #else |
46 | 47 | ||
47 | #define LOAD lw | 48 | #define LOAD lw |
49 | #define LOAD32 lw | ||
48 | #define ADD addu | 50 | #define ADD addu |
49 | #define NBYTES 4 | 51 | #define NBYTES 4 |
50 | 52 | ||
@@ -53,12 +55,14 @@ | |||
53 | #define UNIT(unit) ((unit)*NBYTES) | 55 | #define UNIT(unit) ((unit)*NBYTES) |
54 | 56 | ||
55 | #define ADDC(sum,reg) \ | 57 | #define ADDC(sum,reg) \ |
56 | .set push; \ | ||
57 | .set noat; \ | ||
58 | ADD sum, reg; \ | 58 | ADD sum, reg; \ |
59 | sltu v1, sum, reg; \ | 59 | sltu v1, sum, reg; \ |
60 | ADD sum, v1; \ | 60 | ADD sum, v1; \ |
61 | .set pop | 61 | |
62 | #define ADDC32(sum,reg) \ | ||
63 | addu sum, reg; \ | ||
64 | sltu v1, sum, reg; \ | ||
65 | addu sum, v1; \ | ||
62 | 66 | ||
63 | #define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \ | 67 | #define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \ |
64 | LOAD _t0, (offset + UNIT(0))(src); \ | 68 | LOAD _t0, (offset + UNIT(0))(src); \ |
@@ -132,7 +136,7 @@ LEAF(csum_partial) | |||
132 | beqz t8, .Lqword_align | 136 | beqz t8, .Lqword_align |
133 | andi t8, src, 0x8 | 137 | andi t8, src, 0x8 |
134 | 138 | ||
135 | lw t0, 0x00(src) | 139 | LOAD32 t0, 0x00(src) |
136 | LONG_SUBU a1, a1, 0x4 | 140 | LONG_SUBU a1, a1, 0x4 |
137 | ADDC(sum, t0) | 141 | ADDC(sum, t0) |
138 | PTR_ADDU src, src, 0x4 | 142 | PTR_ADDU src, src, 0x4 |
@@ -211,7 +215,7 @@ LEAF(csum_partial) | |||
211 | LONG_SRL t8, t8, 0x2 | 215 | LONG_SRL t8, t8, 0x2 |
212 | 216 | ||
213 | .Lend_words: | 217 | .Lend_words: |
214 | lw t0, (src) | 218 | LOAD32 t0, (src) |
215 | LONG_SUBU t8, t8, 0x1 | 219 | LONG_SUBU t8, t8, 0x1 |
216 | ADDC(sum, t0) | 220 | ADDC(sum, t0) |
217 | .set reorder /* DADDI_WAR */ | 221 | .set reorder /* DADDI_WAR */ |
@@ -230,6 +234,9 @@ LEAF(csum_partial) | |||
230 | /* Still a full word to go */ | 234 | /* Still a full word to go */ |
231 | ulw t1, (src) | 235 | ulw t1, (src) |
232 | PTR_ADDIU src, 4 | 236 | PTR_ADDIU src, 4 |
237 | #ifdef USE_DOUBLE | ||
238 | dsll t1, t1, 32 /* clear lower 32bit */ | ||
239 | #endif | ||
233 | ADDC(sum, t1) | 240 | ADDC(sum, t1) |
234 | 241 | ||
235 | 1: move t1, zero | 242 | 1: move t1, zero |
@@ -254,8 +261,6 @@ LEAF(csum_partial) | |||
254 | 1: ADDC(sum, t1) | 261 | 1: ADDC(sum, t1) |
255 | 262 | ||
256 | /* fold checksum */ | 263 | /* fold checksum */ |
257 | .set push | ||
258 | .set noat | ||
259 | #ifdef USE_DOUBLE | 264 | #ifdef USE_DOUBLE |
260 | dsll32 v1, sum, 0 | 265 | dsll32 v1, sum, 0 |
261 | daddu sum, v1 | 266 | daddu sum, v1 |
@@ -263,24 +268,25 @@ LEAF(csum_partial) | |||
263 | dsra32 sum, sum, 0 | 268 | dsra32 sum, sum, 0 |
264 | addu sum, v1 | 269 | addu sum, v1 |
265 | #endif | 270 | #endif |
266 | sll v1, sum, 16 | ||
267 | addu sum, v1 | ||
268 | sltu v1, sum, v1 | ||
269 | srl sum, sum, 16 | ||
270 | addu sum, v1 | ||
271 | 271 | ||
272 | /* odd buffer alignment? */ | 272 | /* odd buffer alignment? */ |
273 | beqz t7, 1f | 273 | #ifdef CPU_MIPSR2 |
274 | nop | 274 | wsbh v1, sum |
275 | sll v1, sum, 8 | 275 | movn sum, v1, t7 |
276 | #else | ||
277 | beqz t7, 1f /* odd buffer alignment? */ | ||
278 | lui v1, 0x00ff | ||
279 | addu v1, 0x00ff | ||
280 | and t0, sum, v1 | ||
281 | sll t0, t0, 8 | ||
276 | srl sum, sum, 8 | 282 | srl sum, sum, 8 |
277 | or sum, v1 | 283 | and sum, sum, v1 |
278 | andi sum, 0xffff | 284 | or sum, sum, t0 |
279 | .set pop | ||
280 | 1: | 285 | 1: |
286 | #endif | ||
281 | .set reorder | 287 | .set reorder |
282 | /* Add the passed partial csum. */ | 288 | /* Add the passed partial csum. */ |
283 | ADDC(sum, a2) | 289 | ADDC32(sum, a2) |
284 | jr ra | 290 | jr ra |
285 | .set noreorder | 291 | .set noreorder |
286 | END(csum_partial) | 292 | END(csum_partial) |
@@ -656,8 +662,6 @@ EXC( sb t0, NBYTES-2(dst), .Ls_exc) | |||
656 | ADDC(sum, t2) | 662 | ADDC(sum, t2) |
657 | .Ldone: | 663 | .Ldone: |
658 | /* fold checksum */ | 664 | /* fold checksum */ |
659 | .set push | ||
660 | .set noat | ||
661 | #ifdef USE_DOUBLE | 665 | #ifdef USE_DOUBLE |
662 | dsll32 v1, sum, 0 | 666 | dsll32 v1, sum, 0 |
663 | daddu sum, v1 | 667 | daddu sum, v1 |
@@ -665,23 +669,23 @@ EXC( sb t0, NBYTES-2(dst), .Ls_exc) | |||
665 | dsra32 sum, sum, 0 | 669 | dsra32 sum, sum, 0 |
666 | addu sum, v1 | 670 | addu sum, v1 |
667 | #endif | 671 | #endif |
668 | sll v1, sum, 16 | ||
669 | addu sum, v1 | ||
670 | sltu v1, sum, v1 | ||
671 | srl sum, sum, 16 | ||
672 | addu sum, v1 | ||
673 | 672 | ||
674 | /* odd buffer alignment? */ | 673 | #ifdef CPU_MIPSR2 |
675 | beqz odd, 1f | 674 | wsbh v1, sum |
676 | nop | 675 | movn sum, v1, odd |
677 | sll v1, sum, 8 | 676 | #else |
677 | beqz odd, 1f /* odd buffer alignment? */ | ||
678 | lui v1, 0x00ff | ||
679 | addu v1, 0x00ff | ||
680 | and t0, sum, v1 | ||
681 | sll t0, t0, 8 | ||
678 | srl sum, sum, 8 | 682 | srl sum, sum, 8 |
679 | or sum, v1 | 683 | and sum, sum, v1 |
680 | andi sum, 0xffff | 684 | or sum, sum, t0 |
681 | .set pop | ||
682 | 1: | 685 | 1: |
686 | #endif | ||
683 | .set reorder | 687 | .set reorder |
684 | ADDC(sum, psum) | 688 | ADDC32(sum, psum) |
685 | jr ra | 689 | jr ra |
686 | .set noreorder | 690 | .set noreorder |
687 | 691 | ||