aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/lib/csumpartialcopygeneric.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/lib/csumpartialcopygeneric.S')
-rw-r--r--arch/arm/lib/csumpartialcopygeneric.S70
1 files changed, 36 insertions, 34 deletions
diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S
index d3a2f4667db4..4a4609c19095 100644
--- a/arch/arm/lib/csumpartialcopygeneric.S
+++ b/arch/arm/lib/csumpartialcopygeneric.S
@@ -22,7 +22,7 @@ dst .req r1
22len .req r2 22len .req r2
23sum .req r3 23sum .req r3
24 24
25.zero: mov r0, sum 25.Lzero: mov r0, sum
26 load_regs ea 26 load_regs ea
27 27
28 /* 28 /*
@@ -31,8 +31,9 @@ sum .req r3
31 * the length. Note that the source pointer hasn't been 31 * the length. Note that the source pointer hasn't been
32 * aligned yet. 32 * aligned yet.
33 */ 33 */
34.dst_unaligned: tst dst, #1 34.Ldst_unaligned:
35 beq .dst_16bit 35 tst dst, #1
36 beq .Ldst_16bit
36 37
37 load1b ip 38 load1b ip
38 sub len, len, #1 39 sub len, len, #1
@@ -41,7 +42,7 @@ sum .req r3
41 tst dst, #2 42 tst dst, #2
42 moveq pc, lr @ dst is now 32bit aligned 43 moveq pc, lr @ dst is now 32bit aligned
43 44
44.dst_16bit: load2b r8, ip 45.Ldst_16bit: load2b r8, ip
45 sub len, len, #2 46 sub len, len, #2
46 adcs sum, sum, r8, put_byte_0 47 adcs sum, sum, r8, put_byte_0
47 strb r8, [dst], #1 48 strb r8, [dst], #1
@@ -53,12 +54,12 @@ sum .req r3
53 * Handle 0 to 7 bytes, with any alignment of source and 54 * Handle 0 to 7 bytes, with any alignment of source and
54 * destination pointers. Note that when we get here, C = 0 55 * destination pointers. Note that when we get here, C = 0
55 */ 56 */
56.less8: teq len, #0 @ check for zero count 57.Lless8: teq len, #0 @ check for zero count
57 beq .zero 58 beq .Lzero
58 59
59 /* we must have at least one byte. */ 60 /* we must have at least one byte. */
60 tst dst, #1 @ dst 16-bit aligned 61 tst dst, #1 @ dst 16-bit aligned
61 beq .less8_aligned 62 beq .Lless8_aligned
62 63
63 /* Align dst */ 64 /* Align dst */
64 load1b ip 65 load1b ip
@@ -66,7 +67,7 @@ sum .req r3
66 adcs sum, sum, ip, put_byte_1 @ update checksum 67 adcs sum, sum, ip, put_byte_1 @ update checksum
67 strb ip, [dst], #1 68 strb ip, [dst], #1
68 tst len, #6 69 tst len, #6
69 beq .less8_byteonly 70 beq .Lless8_byteonly
70 71
711: load2b r8, ip 721: load2b r8, ip
72 sub len, len, #2 73 sub len, len, #2
@@ -74,15 +75,16 @@ sum .req r3
74 strb r8, [dst], #1 75 strb r8, [dst], #1
75 adcs sum, sum, ip, put_byte_1 76 adcs sum, sum, ip, put_byte_1
76 strb ip, [dst], #1 77 strb ip, [dst], #1
77.less8_aligned: tst len, #6 78.Lless8_aligned:
79 tst len, #6
78 bne 1b 80 bne 1b
79.less8_byteonly: 81.Lless8_byteonly:
80 tst len, #1 82 tst len, #1
81 beq .done 83 beq .Ldone
82 load1b r8 84 load1b r8
83 adcs sum, sum, r8, put_byte_0 @ update checksum 85 adcs sum, sum, r8, put_byte_0 @ update checksum
84 strb r8, [dst], #1 86 strb r8, [dst], #1
85 b .done 87 b .Ldone
86 88
87FN_ENTRY 89FN_ENTRY
88 mov ip, sp 90 mov ip, sp
@@ -90,11 +92,11 @@ FN_ENTRY
90 sub fp, ip, #4 92 sub fp, ip, #4
91 93
92 cmp len, #8 @ Ensure that we have at least 94 cmp len, #8 @ Ensure that we have at least
93 blo .less8 @ 8 bytes to copy. 95 blo .Lless8 @ 8 bytes to copy.
94 96
95 adds sum, sum, #0 @ C = 0 97 adds sum, sum, #0 @ C = 0
96 tst dst, #3 @ Test destination alignment 98 tst dst, #3 @ Test destination alignment
97 blne .dst_unaligned @ align destination, return here 99 blne .Ldst_unaligned @ align destination, return here
98 100
99 /* 101 /*
100 * Ok, the dst pointer is now 32bit aligned, and we know 102 * Ok, the dst pointer is now 32bit aligned, and we know
@@ -103,7 +105,7 @@ FN_ENTRY
103 */ 105 */
104 106
105 tst src, #3 @ Test source alignment 107 tst src, #3 @ Test source alignment
106 bne .src_not_aligned 108 bne .Lsrc_not_aligned
107 109
108 /* Routine for src & dst aligned */ 110 /* Routine for src & dst aligned */
109 111
@@ -136,17 +138,17 @@ FN_ENTRY
136 adcs sum, sum, r4 138 adcs sum, sum, r4
137 139
1384: ands len, len, #3 1404: ands len, len, #3
139 beq .done 141 beq .Ldone
140 load1l r4 142 load1l r4
141 tst len, #2 143 tst len, #2
142 mov r5, r4, get_byte_0 144 mov r5, r4, get_byte_0
143 beq .exit 145 beq .Lexit
144 adcs sum, sum, r4, push #16 146 adcs sum, sum, r4, push #16
145 strb r5, [dst], #1 147 strb r5, [dst], #1
146 mov r5, r4, get_byte_1 148 mov r5, r4, get_byte_1
147 strb r5, [dst], #1 149 strb r5, [dst], #1
148 mov r5, r4, get_byte_2 150 mov r5, r4, get_byte_2
149.exit: tst len, #1 151.Lexit: tst len, #1
150 strneb r5, [dst], #1 152 strneb r5, [dst], #1
151 andne r5, r5, #255 153 andne r5, r5, #255
152 adcnes sum, sum, r5, put_byte_0 154 adcnes sum, sum, r5, put_byte_0
@@ -157,20 +159,20 @@ FN_ENTRY
157 * the inefficient byte manipulations in the 159 * the inefficient byte manipulations in the
158 * architecture independent code. 160 * architecture independent code.
159 */ 161 */
160.done: adc r0, sum, #0 162.Ldone: adc r0, sum, #0
161 ldr sum, [sp, #0] @ dst 163 ldr sum, [sp, #0] @ dst
162 tst sum, #1 164 tst sum, #1
163 movne r0, r0, ror #8 165 movne r0, r0, ror #8
164 load_regs ea 166 load_regs ea
165 167
166.src_not_aligned: 168.Lsrc_not_aligned:
167 adc sum, sum, #0 @ include C from dst alignment 169 adc sum, sum, #0 @ include C from dst alignment
168 and ip, src, #3 170 and ip, src, #3
169 bic src, src, #3 171 bic src, src, #3
170 load1l r5 172 load1l r5
171 cmp ip, #2 173 cmp ip, #2
172 beq .src2_aligned 174 beq .Lsrc2_aligned
173 bhi .src3_aligned 175 bhi .Lsrc3_aligned
174 mov r4, r5, pull #8 @ C = 0 176 mov r4, r5, pull #8 @ C = 0
175 bics ip, len, #15 177 bics ip, len, #15
176 beq 2f 178 beq 2f
@@ -211,18 +213,18 @@ FN_ENTRY
211 adcs sum, sum, r4 213 adcs sum, sum, r4
212 mov r4, r5, pull #8 214 mov r4, r5, pull #8
2134: ands len, len, #3 2154: ands len, len, #3
214 beq .done 216 beq .Ldone
215 mov r5, r4, get_byte_0 217 mov r5, r4, get_byte_0
216 tst len, #2 218 tst len, #2
217 beq .exit 219 beq .Lexit
218 adcs sum, sum, r4, push #16 220 adcs sum, sum, r4, push #16
219 strb r5, [dst], #1 221 strb r5, [dst], #1
220 mov r5, r4, get_byte_1 222 mov r5, r4, get_byte_1
221 strb r5, [dst], #1 223 strb r5, [dst], #1
222 mov r5, r4, get_byte_2 224 mov r5, r4, get_byte_2
223 b .exit 225 b .Lexit
224 226
225.src2_aligned: mov r4, r5, pull #16 227.Lsrc2_aligned: mov r4, r5, pull #16
226 adds sum, sum, #0 228 adds sum, sum, #0
227 bics ip, len, #15 229 bics ip, len, #15
228 beq 2f 230 beq 2f
@@ -263,20 +265,20 @@ FN_ENTRY
263 adcs sum, sum, r4 265 adcs sum, sum, r4
264 mov r4, r5, pull #16 266 mov r4, r5, pull #16
2654: ands len, len, #3 2674: ands len, len, #3
266 beq .done 268 beq .Ldone
267 mov r5, r4, get_byte_0 269 mov r5, r4, get_byte_0
268 tst len, #2 270 tst len, #2
269 beq .exit 271 beq .Lexit
270 adcs sum, sum, r4 272 adcs sum, sum, r4
271 strb r5, [dst], #1 273 strb r5, [dst], #1
272 mov r5, r4, get_byte_1 274 mov r5, r4, get_byte_1
273 strb r5, [dst], #1 275 strb r5, [dst], #1
274 tst len, #1 276 tst len, #1
275 beq .done 277 beq .Ldone
276 load1b r5 278 load1b r5
277 b .exit 279 b .Lexit
278 280
279.src3_aligned: mov r4, r5, pull #24 281.Lsrc3_aligned: mov r4, r5, pull #24
280 adds sum, sum, #0 282 adds sum, sum, #0
281 bics ip, len, #15 283 bics ip, len, #15
282 beq 2f 284 beq 2f
@@ -317,10 +319,10 @@ FN_ENTRY
317 adcs sum, sum, r4 319 adcs sum, sum, r4
318 mov r4, r5, pull #24 320 mov r4, r5, pull #24
3194: ands len, len, #3 3214: ands len, len, #3
320 beq .done 322 beq .Ldone
321 mov r5, r4, get_byte_0 323 mov r5, r4, get_byte_0
322 tst len, #2 324 tst len, #2
323 beq .exit 325 beq .Lexit
324 strb r5, [dst], #1 326 strb r5, [dst], #1
325 adcs sum, sum, r4 327 adcs sum, sum, r4
326 load1l r4 328 load1l r4
@@ -328,4 +330,4 @@ FN_ENTRY
328 strb r5, [dst], #1 330 strb r5, [dst], #1
329 adcs sum, sum, r4, push #24 331 adcs sum, sum, r4, push #24
330 mov r5, r4, get_byte_1 332 mov r5, r4, get_byte_1
331 b .exit 333 b .Lexit