diff options
Diffstat (limited to 'arch/arm/lib/csumpartialcopygeneric.S')
-rw-r--r-- | arch/arm/lib/csumpartialcopygeneric.S | 70 |
1 files changed, 36 insertions, 34 deletions
diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S index d3a2f4667db4..4a4609c19095 100644 --- a/arch/arm/lib/csumpartialcopygeneric.S +++ b/arch/arm/lib/csumpartialcopygeneric.S | |||
@@ -22,7 +22,7 @@ dst .req r1 | |||
22 | len .req r2 | 22 | len .req r2 |
23 | sum .req r3 | 23 | sum .req r3 |
24 | 24 | ||
25 | .zero: mov r0, sum | 25 | .Lzero: mov r0, sum |
26 | load_regs ea | 26 | load_regs ea |
27 | 27 | ||
28 | /* | 28 | /* |
@@ -31,8 +31,9 @@ sum .req r3 | |||
31 | * the length. Note that the source pointer hasn't been | 31 | * the length. Note that the source pointer hasn't been |
32 | * aligned yet. | 32 | * aligned yet. |
33 | */ | 33 | */ |
34 | .dst_unaligned: tst dst, #1 | 34 | .Ldst_unaligned: |
35 | beq .dst_16bit | 35 | tst dst, #1 |
36 | beq .Ldst_16bit | ||
36 | 37 | ||
37 | load1b ip | 38 | load1b ip |
38 | sub len, len, #1 | 39 | sub len, len, #1 |
@@ -41,7 +42,7 @@ sum .req r3 | |||
41 | tst dst, #2 | 42 | tst dst, #2 |
42 | moveq pc, lr @ dst is now 32bit aligned | 43 | moveq pc, lr @ dst is now 32bit aligned |
43 | 44 | ||
44 | .dst_16bit: load2b r8, ip | 45 | .Ldst_16bit: load2b r8, ip |
45 | sub len, len, #2 | 46 | sub len, len, #2 |
46 | adcs sum, sum, r8, put_byte_0 | 47 | adcs sum, sum, r8, put_byte_0 |
47 | strb r8, [dst], #1 | 48 | strb r8, [dst], #1 |
@@ -53,12 +54,12 @@ sum .req r3 | |||
53 | * Handle 0 to 7 bytes, with any alignment of source and | 54 | * Handle 0 to 7 bytes, with any alignment of source and |
54 | * destination pointers. Note that when we get here, C = 0 | 55 | * destination pointers. Note that when we get here, C = 0 |
55 | */ | 56 | */ |
56 | .less8: teq len, #0 @ check for zero count | 57 | .Lless8: teq len, #0 @ check for zero count |
57 | beq .zero | 58 | beq .Lzero |
58 | 59 | ||
59 | /* we must have at least one byte. */ | 60 | /* we must have at least one byte. */ |
60 | tst dst, #1 @ dst 16-bit aligned | 61 | tst dst, #1 @ dst 16-bit aligned |
61 | beq .less8_aligned | 62 | beq .Lless8_aligned |
62 | 63 | ||
63 | /* Align dst */ | 64 | /* Align dst */ |
64 | load1b ip | 65 | load1b ip |
@@ -66,7 +67,7 @@ sum .req r3 | |||
66 | adcs sum, sum, ip, put_byte_1 @ update checksum | 67 | adcs sum, sum, ip, put_byte_1 @ update checksum |
67 | strb ip, [dst], #1 | 68 | strb ip, [dst], #1 |
68 | tst len, #6 | 69 | tst len, #6 |
69 | beq .less8_byteonly | 70 | beq .Lless8_byteonly |
70 | 71 | ||
71 | 1: load2b r8, ip | 72 | 1: load2b r8, ip |
72 | sub len, len, #2 | 73 | sub len, len, #2 |
@@ -74,15 +75,16 @@ sum .req r3 | |||
74 | strb r8, [dst], #1 | 75 | strb r8, [dst], #1 |
75 | adcs sum, sum, ip, put_byte_1 | 76 | adcs sum, sum, ip, put_byte_1 |
76 | strb ip, [dst], #1 | 77 | strb ip, [dst], #1 |
77 | .less8_aligned: tst len, #6 | 78 | .Lless8_aligned: |
79 | tst len, #6 | ||
78 | bne 1b | 80 | bne 1b |
79 | .less8_byteonly: | 81 | .Lless8_byteonly: |
80 | tst len, #1 | 82 | tst len, #1 |
81 | beq .done | 83 | beq .Ldone |
82 | load1b r8 | 84 | load1b r8 |
83 | adcs sum, sum, r8, put_byte_0 @ update checksum | 85 | adcs sum, sum, r8, put_byte_0 @ update checksum |
84 | strb r8, [dst], #1 | 86 | strb r8, [dst], #1 |
85 | b .done | 87 | b .Ldone |
86 | 88 | ||
87 | FN_ENTRY | 89 | FN_ENTRY |
88 | mov ip, sp | 90 | mov ip, sp |
@@ -90,11 +92,11 @@ FN_ENTRY | |||
90 | sub fp, ip, #4 | 92 | sub fp, ip, #4 |
91 | 93 | ||
92 | cmp len, #8 @ Ensure that we have at least | 94 | cmp len, #8 @ Ensure that we have at least |
93 | blo .less8 @ 8 bytes to copy. | 95 | blo .Lless8 @ 8 bytes to copy. |
94 | 96 | ||
95 | adds sum, sum, #0 @ C = 0 | 97 | adds sum, sum, #0 @ C = 0 |
96 | tst dst, #3 @ Test destination alignment | 98 | tst dst, #3 @ Test destination alignment |
97 | blne .dst_unaligned @ align destination, return here | 99 | blne .Ldst_unaligned @ align destination, return here |
98 | 100 | ||
99 | /* | 101 | /* |
100 | * Ok, the dst pointer is now 32bit aligned, and we know | 102 | * Ok, the dst pointer is now 32bit aligned, and we know |
@@ -103,7 +105,7 @@ FN_ENTRY | |||
103 | */ | 105 | */ |
104 | 106 | ||
105 | tst src, #3 @ Test source alignment | 107 | tst src, #3 @ Test source alignment |
106 | bne .src_not_aligned | 108 | bne .Lsrc_not_aligned |
107 | 109 | ||
108 | /* Routine for src & dst aligned */ | 110 | /* Routine for src & dst aligned */ |
109 | 111 | ||
@@ -136,17 +138,17 @@ FN_ENTRY | |||
136 | adcs sum, sum, r4 | 138 | adcs sum, sum, r4 |
137 | 139 | ||
138 | 4: ands len, len, #3 | 140 | 4: ands len, len, #3 |
139 | beq .done | 141 | beq .Ldone |
140 | load1l r4 | 142 | load1l r4 |
141 | tst len, #2 | 143 | tst len, #2 |
142 | mov r5, r4, get_byte_0 | 144 | mov r5, r4, get_byte_0 |
143 | beq .exit | 145 | beq .Lexit |
144 | adcs sum, sum, r4, push #16 | 146 | adcs sum, sum, r4, push #16 |
145 | strb r5, [dst], #1 | 147 | strb r5, [dst], #1 |
146 | mov r5, r4, get_byte_1 | 148 | mov r5, r4, get_byte_1 |
147 | strb r5, [dst], #1 | 149 | strb r5, [dst], #1 |
148 | mov r5, r4, get_byte_2 | 150 | mov r5, r4, get_byte_2 |
149 | .exit: tst len, #1 | 151 | .Lexit: tst len, #1 |
150 | strneb r5, [dst], #1 | 152 | strneb r5, [dst], #1 |
151 | andne r5, r5, #255 | 153 | andne r5, r5, #255 |
152 | adcnes sum, sum, r5, put_byte_0 | 154 | adcnes sum, sum, r5, put_byte_0 |
@@ -157,20 +159,20 @@ FN_ENTRY | |||
157 | * the inefficient byte manipulations in the | 159 | * the inefficient byte manipulations in the |
158 | * architecture independent code. | 160 | * architecture independent code. |
159 | */ | 161 | */ |
160 | .done: adc r0, sum, #0 | 162 | .Ldone: adc r0, sum, #0 |
161 | ldr sum, [sp, #0] @ dst | 163 | ldr sum, [sp, #0] @ dst |
162 | tst sum, #1 | 164 | tst sum, #1 |
163 | movne r0, r0, ror #8 | 165 | movne r0, r0, ror #8 |
164 | load_regs ea | 166 | load_regs ea |
165 | 167 | ||
166 | .src_not_aligned: | 168 | .Lsrc_not_aligned: |
167 | adc sum, sum, #0 @ include C from dst alignment | 169 | adc sum, sum, #0 @ include C from dst alignment |
168 | and ip, src, #3 | 170 | and ip, src, #3 |
169 | bic src, src, #3 | 171 | bic src, src, #3 |
170 | load1l r5 | 172 | load1l r5 |
171 | cmp ip, #2 | 173 | cmp ip, #2 |
172 | beq .src2_aligned | 174 | beq .Lsrc2_aligned |
173 | bhi .src3_aligned | 175 | bhi .Lsrc3_aligned |
174 | mov r4, r5, pull #8 @ C = 0 | 176 | mov r4, r5, pull #8 @ C = 0 |
175 | bics ip, len, #15 | 177 | bics ip, len, #15 |
176 | beq 2f | 178 | beq 2f |
@@ -211,18 +213,18 @@ FN_ENTRY | |||
211 | adcs sum, sum, r4 | 213 | adcs sum, sum, r4 |
212 | mov r4, r5, pull #8 | 214 | mov r4, r5, pull #8 |
213 | 4: ands len, len, #3 | 215 | 4: ands len, len, #3 |
214 | beq .done | 216 | beq .Ldone |
215 | mov r5, r4, get_byte_0 | 217 | mov r5, r4, get_byte_0 |
216 | tst len, #2 | 218 | tst len, #2 |
217 | beq .exit | 219 | beq .Lexit |
218 | adcs sum, sum, r4, push #16 | 220 | adcs sum, sum, r4, push #16 |
219 | strb r5, [dst], #1 | 221 | strb r5, [dst], #1 |
220 | mov r5, r4, get_byte_1 | 222 | mov r5, r4, get_byte_1 |
221 | strb r5, [dst], #1 | 223 | strb r5, [dst], #1 |
222 | mov r5, r4, get_byte_2 | 224 | mov r5, r4, get_byte_2 |
223 | b .exit | 225 | b .Lexit |
224 | 226 | ||
225 | .src2_aligned: mov r4, r5, pull #16 | 227 | .Lsrc2_aligned: mov r4, r5, pull #16 |
226 | adds sum, sum, #0 | 228 | adds sum, sum, #0 |
227 | bics ip, len, #15 | 229 | bics ip, len, #15 |
228 | beq 2f | 230 | beq 2f |
@@ -263,20 +265,20 @@ FN_ENTRY | |||
263 | adcs sum, sum, r4 | 265 | adcs sum, sum, r4 |
264 | mov r4, r5, pull #16 | 266 | mov r4, r5, pull #16 |
265 | 4: ands len, len, #3 | 267 | 4: ands len, len, #3 |
266 | beq .done | 268 | beq .Ldone |
267 | mov r5, r4, get_byte_0 | 269 | mov r5, r4, get_byte_0 |
268 | tst len, #2 | 270 | tst len, #2 |
269 | beq .exit | 271 | beq .Lexit |
270 | adcs sum, sum, r4 | 272 | adcs sum, sum, r4 |
271 | strb r5, [dst], #1 | 273 | strb r5, [dst], #1 |
272 | mov r5, r4, get_byte_1 | 274 | mov r5, r4, get_byte_1 |
273 | strb r5, [dst], #1 | 275 | strb r5, [dst], #1 |
274 | tst len, #1 | 276 | tst len, #1 |
275 | beq .done | 277 | beq .Ldone |
276 | load1b r5 | 278 | load1b r5 |
277 | b .exit | 279 | b .Lexit |
278 | 280 | ||
279 | .src3_aligned: mov r4, r5, pull #24 | 281 | .Lsrc3_aligned: mov r4, r5, pull #24 |
280 | adds sum, sum, #0 | 282 | adds sum, sum, #0 |
281 | bics ip, len, #15 | 283 | bics ip, len, #15 |
282 | beq 2f | 284 | beq 2f |
@@ -317,10 +319,10 @@ FN_ENTRY | |||
317 | adcs sum, sum, r4 | 319 | adcs sum, sum, r4 |
318 | mov r4, r5, pull #24 | 320 | mov r4, r5, pull #24 |
319 | 4: ands len, len, #3 | 321 | 4: ands len, len, #3 |
320 | beq .done | 322 | beq .Ldone |
321 | mov r5, r4, get_byte_0 | 323 | mov r5, r4, get_byte_0 |
322 | tst len, #2 | 324 | tst len, #2 |
323 | beq .exit | 325 | beq .Lexit |
324 | strb r5, [dst], #1 | 326 | strb r5, [dst], #1 |
325 | adcs sum, sum, r4 | 327 | adcs sum, sum, r4 |
326 | load1l r4 | 328 | load1l r4 |
@@ -328,4 +330,4 @@ FN_ENTRY | |||
328 | strb r5, [dst], #1 | 330 | strb r5, [dst], #1 |
329 | adcs sum, sum, r4, push #24 | 331 | adcs sum, sum, r4, push #24 |
330 | mov r5, r4, get_byte_1 | 332 | mov r5, r4, get_byte_1 |
331 | b .exit | 333 | b .Lexit |