diff options
author | Atsushi Nemoto <anemo@mba.ocn.ne.jp> | 2006-12-07 11:04:45 -0500 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2006-12-08 20:03:59 -0500 |
commit | 773ff78838ca3c07245e45c06235e0baaa5f710a (patch) | |
tree | c238920f34ab310a7a3d426cefbf9ebb1d5ea78c /arch/mips | |
parent | 52ffe760ea9ec407292d093c3f06c1cda5187228 (diff) |
[MIPS] Optimize flow of csum_partial
Delete dead codes at end of the function and move small_csumcopy
there. This makes some labels (maybe_end_cruft, small_memcpy,
end_bytes, out) needless and eliminates some branches.
Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Diffstat (limited to 'arch/mips')
-rw-r--r-- | arch/mips/lib/csum_partial.S | 129 |
1 files changed, 54 insertions, 75 deletions
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index 3bffdbb1c1f9..b04475d76f3c 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S | |||
@@ -65,64 +65,6 @@ | |||
65 | 65 | ||
66 | .text | 66 | .text |
67 | .set noreorder | 67 | .set noreorder |
68 | |||
69 | /* unknown src alignment and < 8 bytes to go */ | ||
70 | small_csumcpy: | ||
71 | move a1, t2 | ||
72 | |||
73 | andi t0, a1, 4 | ||
74 | beqz t0, 1f | ||
75 | andi t0, a1, 2 | ||
76 | |||
77 | /* Still a full word to go */ | ||
78 | ulw t1, (src) | ||
79 | PTR_ADDIU src, 4 | ||
80 | ADDC(sum, t1) | ||
81 | |||
82 | 1: move t1, zero | ||
83 | beqz t0, 1f | ||
84 | andi t0, a1, 1 | ||
85 | |||
86 | /* Still a halfword to go */ | ||
87 | ulhu t1, (src) | ||
88 | PTR_ADDIU src, 2 | ||
89 | |||
90 | 1: beqz t0, 1f | ||
91 | sll t1, t1, 16 | ||
92 | |||
93 | lbu t2, (src) | ||
94 | nop | ||
95 | |||
96 | #ifdef __MIPSEB__ | ||
97 | sll t2, t2, 8 | ||
98 | #endif | ||
99 | or t1, t2 | ||
100 | |||
101 | 1: ADDC(sum, t1) | ||
102 | |||
103 | /* fold checksum */ | ||
104 | sll v1, sum, 16 | ||
105 | addu sum, v1 | ||
106 | sltu v1, sum, v1 | ||
107 | srl sum, sum, 16 | ||
108 | addu sum, v1 | ||
109 | |||
110 | /* odd buffer alignment? */ | ||
111 | beqz t7, 1f | ||
112 | nop | ||
113 | sll v1, sum, 8 | ||
114 | srl sum, sum, 8 | ||
115 | or sum, v1 | ||
116 | andi sum, 0xffff | ||
117 | 1: | ||
118 | .set reorder | ||
119 | /* Add the passed partial csum. */ | ||
120 | ADDC(sum, a2) | ||
121 | jr ra | ||
122 | .set noreorder | ||
123 | |||
124 | /* ------------------------------------------------------------------------- */ | ||
125 | |||
126 | .align 5 | 68 | .align 5 |
127 | LEAF(csum_partial) | 69 | LEAF(csum_partial) |
128 | move sum, zero | 70 | move sum, zero |
@@ -132,8 +74,7 @@ LEAF(csum_partial) | |||
132 | bnez t8, small_csumcpy /* < 8 bytes to copy */ | 74 | bnez t8, small_csumcpy /* < 8 bytes to copy */ |
133 | move t2, a1 | 75 | move t2, a1 |
134 | 76 | ||
135 | beqz a1, out | 77 | andi t7, src, 0x1 /* odd buffer? */ |
136 | andi t7, src, 0x1 /* odd buffer? */ | ||
137 | 78 | ||
138 | hword_align: | 79 | hword_align: |
139 | beqz t7, word_align | 80 | beqz t7, word_align |
@@ -232,8 +173,9 @@ move_32bytes: | |||
232 | PTR_ADDU src, src, 0x20 | 173 | PTR_ADDU src, src, 0x20 |
233 | 174 | ||
234 | do_end_words: | 175 | do_end_words: |
235 | beqz t8, maybe_end_cruft | 176 | beqz t8, small_csumcpy |
236 | LONG_SRL t8, t8, 0x2 | 177 | andi t2, a1, 0x3 |
178 | LONG_SRL t8, t8, 0x2 | ||
237 | 179 | ||
238 | end_words: | 180 | end_words: |
239 | lw t0, (src) | 181 | lw t0, (src) |
@@ -242,21 +184,58 @@ end_words: | |||
242 | bnez t8, end_words | 184 | bnez t8, end_words |
243 | PTR_ADDU src, src, 0x4 | 185 | PTR_ADDU src, src, 0x4 |
244 | 186 | ||
245 | maybe_end_cruft: | 187 | /* unknown src alignment and < 8 bytes to go */ |
246 | andi t2, a1, 0x3 | 188 | small_csumcpy: |
189 | move a1, t2 | ||
247 | 190 | ||
248 | small_memcpy: | 191 | andi t0, a1, 4 |
249 | j small_csumcpy; move a1, t2 /* XXX ??? */ | 192 | beqz t0, 1f |
250 | beqz t2, out | 193 | andi t0, a1, 2 |
251 | move a1, t2 | ||
252 | 194 | ||
253 | end_bytes: | 195 | /* Still a full word to go */ |
254 | lb t0, (src) | 196 | ulw t1, (src) |
255 | LONG_SUBU a1, a1, 0x1 | 197 | PTR_ADDIU src, 4 |
256 | bnez a2, end_bytes | 198 | ADDC(sum, t1) |
257 | PTR_ADDU src, src, 0x1 | 199 | |
200 | 1: move t1, zero | ||
201 | beqz t0, 1f | ||
202 | andi t0, a1, 1 | ||
203 | |||
204 | /* Still a halfword to go */ | ||
205 | ulhu t1, (src) | ||
206 | PTR_ADDIU src, 2 | ||
207 | |||
208 | 1: beqz t0, 1f | ||
209 | sll t1, t1, 16 | ||
210 | |||
211 | lbu t2, (src) | ||
212 | nop | ||
213 | |||
214 | #ifdef __MIPSEB__ | ||
215 | sll t2, t2, 8 | ||
216 | #endif | ||
217 | or t1, t2 | ||
218 | |||
219 | 1: ADDC(sum, t1) | ||
258 | 220 | ||
259 | out: | 221 | /* fold checksum */ |
222 | sll v1, sum, 16 | ||
223 | addu sum, v1 | ||
224 | sltu v1, sum, v1 | ||
225 | srl sum, sum, 16 | ||
226 | addu sum, v1 | ||
227 | |||
228 | /* odd buffer alignment? */ | ||
229 | beqz t7, 1f | ||
230 | nop | ||
231 | sll v1, sum, 8 | ||
232 | srl sum, sum, 8 | ||
233 | or sum, v1 | ||
234 | andi sum, 0xffff | ||
235 | 1: | ||
236 | .set reorder | ||
237 | /* Add the passed partial csum. */ | ||
238 | ADDC(sum, a2) | ||
260 | jr ra | 239 | jr ra |
261 | move v0, sum | 240 | .set noreorder |
262 | END(csum_partial) | 241 | END(csum_partial) |