diff options
author | Atsushi Nemoto <anemo@mba.ocn.ne.jp> | 2006-12-07 11:04:51 -0500 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2006-12-08 20:03:59 -0500 |
commit | ed99e2bc1dc5dc54eb5a019f4975562dbef20103 (patch) | |
tree | c8ff52ab4a29fe842e34fd94d01e74082486391d | |
parent | 773ff78838ca3c07245e45c06235e0baaa5f710a (diff) |
[MIPS] Optimize csum_partial for 64bit kernel
Make csum_partial 64-bit powered.
Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
-rw-r--r-- | arch/mips/lib/csum_partial.S | 76 |
1 files changed, 54 insertions, 22 deletions
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index b04475d76f3c..9db357294be1 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S | |||
@@ -29,30 +29,49 @@ | |||
29 | #define t5 $13 | 29 | #define t5 $13 |
30 | #define t6 $14 | 30 | #define t6 $14 |
31 | #define t7 $15 | 31 | #define t7 $15 |
32 | |||
33 | #define USE_DOUBLE | ||
32 | #endif | 34 | #endif |
33 | 35 | ||
36 | #ifdef USE_DOUBLE | ||
37 | |||
38 | #define LOAD ld | ||
39 | #define ADD daddu | ||
40 | #define NBYTES 8 | ||
41 | |||
42 | #else | ||
43 | |||
44 | #define LOAD lw | ||
45 | #define ADD addu | ||
46 | #define NBYTES 4 | ||
47 | |||
48 | #endif /* USE_DOUBLE */ | ||
49 | |||
50 | #define UNIT(unit) ((unit)*NBYTES) | ||
51 | |||
34 | #define ADDC(sum,reg) \ | 52 | #define ADDC(sum,reg) \ |
35 | addu sum, reg; \ | 53 | ADD sum, reg; \ |
36 | sltu v1, sum, reg; \ | 54 | sltu v1, sum, reg; \ |
37 | addu sum, v1 | 55 | ADD sum, v1 |
38 | 56 | ||
39 | #define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \ | 57 | #define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \ |
40 | lw _t0, (offset + 0x00)(src); \ | 58 | LOAD _t0, (offset + UNIT(0))(src); \ |
41 | lw _t1, (offset + 0x04)(src); \ | 59 | LOAD _t1, (offset + UNIT(1))(src); \ |
42 | lw _t2, (offset + 0x08)(src); \ | 60 | LOAD _t2, (offset + UNIT(2))(src); \ |
43 | lw _t3, (offset + 0x0c)(src); \ | 61 | LOAD _t3, (offset + UNIT(3))(src); \ |
44 | ADDC(sum, _t0); \ | ||
45 | ADDC(sum, _t1); \ | ||
46 | ADDC(sum, _t2); \ | ||
47 | ADDC(sum, _t3); \ | ||
48 | lw _t0, (offset + 0x10)(src); \ | ||
49 | lw _t1, (offset + 0x14)(src); \ | ||
50 | lw _t2, (offset + 0x18)(src); \ | ||
51 | lw _t3, (offset + 0x1c)(src); \ | ||
52 | ADDC(sum, _t0); \ | 62 | ADDC(sum, _t0); \ |
53 | ADDC(sum, _t1); \ | 63 | ADDC(sum, _t1); \ |
54 | ADDC(sum, _t2); \ | 64 | ADDC(sum, _t2); \ |
55 | ADDC(sum, _t3); \ | 65 | ADDC(sum, _t3) |
66 | |||
67 | #ifdef USE_DOUBLE | ||
68 | #define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \ | ||
69 | CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) | ||
70 | #else | ||
71 | #define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \ | ||
72 | CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3); \ | ||
73 | CSUM_BIGCHUNK1(src, offset + 0x10, sum, _t0, _t1, _t2, _t3) | ||
74 | #endif | ||
56 | 75 | ||
57 | /* | 76 | /* |
58 | * a0: source address | 77 | * a0: source address |
@@ -117,11 +136,17 @@ qword_align: | |||
117 | beqz t8, oword_align | 136 | beqz t8, oword_align |
118 | andi t8, src, 0x10 | 137 | andi t8, src, 0x10 |
119 | 138 | ||
139 | #ifdef USE_DOUBLE | ||
140 | ld t0, 0x00(src) | ||
141 | LONG_SUBU a1, a1, 0x8 | ||
142 | ADDC(sum, t0) | ||
143 | #else | ||
120 | lw t0, 0x00(src) | 144 | lw t0, 0x00(src) |
121 | lw t1, 0x04(src) | 145 | lw t1, 0x04(src) |
122 | LONG_SUBU a1, a1, 0x8 | 146 | LONG_SUBU a1, a1, 0x8 |
123 | ADDC(sum, t0) | 147 | ADDC(sum, t0) |
124 | ADDC(sum, t1) | 148 | ADDC(sum, t1) |
149 | #endif | ||
125 | PTR_ADDU src, src, 0x8 | 150 | PTR_ADDU src, src, 0x8 |
126 | andi t8, src, 0x10 | 151 | andi t8, src, 0x10 |
127 | 152 | ||
@@ -129,14 +154,14 @@ oword_align: | |||
129 | beqz t8, begin_movement | 154 | beqz t8, begin_movement |
130 | LONG_SRL t8, a1, 0x7 | 155 | LONG_SRL t8, a1, 0x7 |
131 | 156 | ||
132 | lw t3, 0x08(src) | 157 | #ifdef USE_DOUBLE |
133 | lw t4, 0x0c(src) | 158 | ld t0, 0x00(src) |
134 | lw t0, 0x00(src) | 159 | ld t1, 0x08(src) |
135 | lw t1, 0x04(src) | ||
136 | ADDC(sum, t3) | ||
137 | ADDC(sum, t4) | ||
138 | ADDC(sum, t0) | 160 | ADDC(sum, t0) |
139 | ADDC(sum, t1) | 161 | ADDC(sum, t1) |
162 | #else | ||
163 | CSUM_BIGCHUNK1(src, 0x00, sum, t0, t1, t3, t4) | ||
164 | #endif | ||
140 | LONG_SUBU a1, a1, 0x10 | 165 | LONG_SUBU a1, a1, 0x10 |
141 | PTR_ADDU src, src, 0x10 | 166 | PTR_ADDU src, src, 0x10 |
142 | LONG_SRL t8, a1, 0x7 | 167 | LONG_SRL t8, a1, 0x7 |
@@ -219,6 +244,13 @@ small_csumcpy: | |||
219 | 1: ADDC(sum, t1) | 244 | 1: ADDC(sum, t1) |
220 | 245 | ||
221 | /* fold checksum */ | 246 | /* fold checksum */ |
247 | #ifdef USE_DOUBLE | ||
248 | dsll32 v1, sum, 0 | ||
249 | daddu sum, v1 | ||
250 | sltu v1, sum, v1 | ||
251 | dsra32 sum, sum, 0 | ||
252 | addu sum, v1 | ||
253 | #endif | ||
222 | sll v1, sum, 16 | 254 | sll v1, sum, 16 |
223 | addu sum, v1 | 255 | addu sum, v1 |
224 | sltu v1, sum, v1 | 256 | sltu v1, sum, v1 |