aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorAtsushi Nemoto <anemo@mba.ocn.ne.jp>2006-12-07 11:04:45 -0500
committerRalf Baechle <ralf@linux-mips.org>2006-12-08 20:03:59 -0500
commit773ff78838ca3c07245e45c06235e0baaa5f710a (patch)
treec238920f34ab310a7a3d426cefbf9ebb1d5ea78c /arch
parent52ffe760ea9ec407292d093c3f06c1cda5187228 (diff)
[MIPS] Optimize flow of csum_partial
Delete dead codes at end of the function and move small_csumcopy there. This makes some labels (maybe_end_cruft, small_memcpy, end_bytes, out) needless and eliminates some branches. Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp> Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/mips/lib/csum_partial.S129
1 files changed, 54 insertions, 75 deletions
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
index 3bffdbb1c1f9..b04475d76f3c 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -65,64 +65,6 @@
65 65
66 .text 66 .text
67 .set noreorder 67 .set noreorder
68
69/* unknown src alignment and < 8 bytes to go */
70small_csumcpy:
71 move a1, t2
72
73 andi t0, a1, 4
74 beqz t0, 1f
75 andi t0, a1, 2
76
77 /* Still a full word to go */
78 ulw t1, (src)
79 PTR_ADDIU src, 4
80 ADDC(sum, t1)
81
821: move t1, zero
83 beqz t0, 1f
84 andi t0, a1, 1
85
86 /* Still a halfword to go */
87 ulhu t1, (src)
88 PTR_ADDIU src, 2
89
901: beqz t0, 1f
91 sll t1, t1, 16
92
93 lbu t2, (src)
94 nop
95
96#ifdef __MIPSEB__
97 sll t2, t2, 8
98#endif
99 or t1, t2
100
1011: ADDC(sum, t1)
102
103 /* fold checksum */
104 sll v1, sum, 16
105 addu sum, v1
106 sltu v1, sum, v1
107 srl sum, sum, 16
108 addu sum, v1
109
110 /* odd buffer alignment? */
111 beqz t7, 1f
112 nop
113 sll v1, sum, 8
114 srl sum, sum, 8
115 or sum, v1
116 andi sum, 0xffff
1171:
118 .set reorder
119 /* Add the passed partial csum. */
120 ADDC(sum, a2)
121 jr ra
122 .set noreorder
123
124/* ------------------------------------------------------------------------- */
125
126 .align 5 68 .align 5
127LEAF(csum_partial) 69LEAF(csum_partial)
128 move sum, zero 70 move sum, zero
@@ -132,8 +74,7 @@ LEAF(csum_partial)
132 bnez t8, small_csumcpy /* < 8 bytes to copy */ 74 bnez t8, small_csumcpy /* < 8 bytes to copy */
133 move t2, a1 75 move t2, a1
134 76
135 beqz a1, out 77 andi t7, src, 0x1 /* odd buffer? */
136 andi t7, src, 0x1 /* odd buffer? */
137 78
138hword_align: 79hword_align:
139 beqz t7, word_align 80 beqz t7, word_align
@@ -232,8 +173,9 @@ move_32bytes:
232 PTR_ADDU src, src, 0x20 173 PTR_ADDU src, src, 0x20
233 174
234do_end_words: 175do_end_words:
235 beqz t8, maybe_end_cruft 176 beqz t8, small_csumcpy
236 LONG_SRL t8, t8, 0x2 177 andi t2, a1, 0x3
178 LONG_SRL t8, t8, 0x2
237 179
238end_words: 180end_words:
239 lw t0, (src) 181 lw t0, (src)
@@ -242,21 +184,58 @@ end_words:
242 bnez t8, end_words 184 bnez t8, end_words
243 PTR_ADDU src, src, 0x4 185 PTR_ADDU src, src, 0x4
244 186
245maybe_end_cruft: 187/* unknown src alignment and < 8 bytes to go */
246 andi t2, a1, 0x3 188small_csumcpy:
189 move a1, t2
247 190
248small_memcpy: 191 andi t0, a1, 4
249 j small_csumcpy; move a1, t2 /* XXX ??? */ 192 beqz t0, 1f
250 beqz t2, out 193 andi t0, a1, 2
251 move a1, t2
252 194
253end_bytes: 195 /* Still a full word to go */
254 lb t0, (src) 196 ulw t1, (src)
255 LONG_SUBU a1, a1, 0x1 197 PTR_ADDIU src, 4
256 bnez a2, end_bytes 198 ADDC(sum, t1)
257 PTR_ADDU src, src, 0x1 199
2001: move t1, zero
201 beqz t0, 1f
202 andi t0, a1, 1
203
204 /* Still a halfword to go */
205 ulhu t1, (src)
206 PTR_ADDIU src, 2
207
2081: beqz t0, 1f
209 sll t1, t1, 16
210
211 lbu t2, (src)
212 nop
213
214#ifdef __MIPSEB__
215 sll t2, t2, 8
216#endif
217 or t1, t2
218
2191: ADDC(sum, t1)
258 220
259out: 221 /* fold checksum */
222 sll v1, sum, 16
223 addu sum, v1
224 sltu v1, sum, v1
225 srl sum, sum, 16
226 addu sum, v1
227
228 /* odd buffer alignment? */
229 beqz t7, 1f
230 nop
231 sll v1, sum, 8
232 srl sum, sum, 8
233 or sum, v1
234 andi sum, 0xffff
2351:
236 .set reorder
237 /* Add the passed partial csum. */
238 ADDC(sum, a2)
260 jr ra 239 jr ra
261 move v0, sum 240 .set noreorder
262 END(csum_partial) 241 END(csum_partial)