diff options
-rw-r--r-- | arch/tile/lib/Makefile | 4 | ||||
-rw-r--r-- | arch/tile/lib/memcpy_32.S | 206 | ||||
-rw-r--r-- | arch/tile/lib/memmove.c (renamed from arch/tile/lib/memmove_32.c) | 0 | ||||
-rw-r--r-- | arch/tile/lib/memset_32.c | 1 | ||||
-rw-r--r-- | arch/tile/lib/strlen_32.c | 2 |
5 files changed, 109 insertions, 104 deletions
diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile index 746dc81ed3c4..93122d5b1558 100644 --- a/arch/tile/lib/Makefile +++ b/arch/tile/lib/Makefile | |||
@@ -3,8 +3,8 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | lib-y = cacheflush.o checksum.o cpumask.o delay.o \ | 5 | lib-y = cacheflush.o checksum.o cpumask.o delay.o \ |
6 | mb_incoherent.o uaccess.o \ | 6 | mb_incoherent.o uaccess.o memmove.o \ |
7 | memcpy_$(BITS).o memchr_$(BITS).o memmove_$(BITS).o memset_$(BITS).o \ | 7 | memcpy_$(BITS).o memchr_$(BITS).o memset_$(BITS).o \ |
8 | strchr_$(BITS).o strlen_$(BITS).o | 8 | strchr_$(BITS).o strlen_$(BITS).o |
9 | 9 | ||
10 | ifeq ($(CONFIG_TILEGX),y) | 10 | ifeq ($(CONFIG_TILEGX),y) |
diff --git a/arch/tile/lib/memcpy_32.S b/arch/tile/lib/memcpy_32.S index 30c3b7ebb55d..2a419a6122db 100644 --- a/arch/tile/lib/memcpy_32.S +++ b/arch/tile/lib/memcpy_32.S | |||
@@ -10,14 +10,16 @@ | |||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | 10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or |
11 | * NON INFRINGEMENT. See the GNU General Public License for | 11 | * NON INFRINGEMENT. See the GNU General Public License for |
12 | * more details. | 12 | * more details. |
13 | * | ||
14 | * This file shares the implementation of the userspace memcpy and | ||
15 | * the kernel's memcpy, copy_to_user and copy_from_user. | ||
16 | */ | 13 | */ |
17 | 14 | ||
18 | #include <arch/chip.h> | 15 | #include <arch/chip.h> |
19 | 16 | ||
20 | 17 | ||
18 | /* | ||
19 | * This file shares the implementation of the userspace memcpy and | ||
20 | * the kernel's memcpy, copy_to_user and copy_from_user. | ||
21 | */ | ||
22 | |||
21 | #include <linux/linkage.h> | 23 | #include <linux/linkage.h> |
22 | 24 | ||
23 | /* On TILE64, we wrap these functions via arch/tile/lib/memcpy_tile64.c */ | 25 | /* On TILE64, we wrap these functions via arch/tile/lib/memcpy_tile64.c */ |
@@ -53,9 +55,9 @@ | |||
53 | */ | 55 | */ |
54 | ENTRY(__copy_from_user_inatomic) | 56 | ENTRY(__copy_from_user_inatomic) |
55 | .type __copy_from_user_inatomic, @function | 57 | .type __copy_from_user_inatomic, @function |
56 | FEEDBACK_ENTER_EXPLICIT(__copy_from_user_inatomic, \ | 58 | FEEDBACK_ENTER_EXPLICIT(__copy_from_user_inatomic, \ |
57 | .text.memcpy_common, \ | 59 | .text.memcpy_common, \ |
58 | .Lend_memcpy_common - __copy_from_user_inatomic) | 60 | .Lend_memcpy_common - __copy_from_user_inatomic) |
59 | { movei r29, IS_COPY_FROM_USER; j memcpy_common } | 61 | { movei r29, IS_COPY_FROM_USER; j memcpy_common } |
60 | .size __copy_from_user_inatomic, . - __copy_from_user_inatomic | 62 | .size __copy_from_user_inatomic, . - __copy_from_user_inatomic |
61 | 63 | ||
@@ -64,7 +66,7 @@ ENTRY(__copy_from_user_inatomic) | |||
64 | */ | 66 | */ |
65 | ENTRY(__copy_from_user_zeroing) | 67 | ENTRY(__copy_from_user_zeroing) |
66 | .type __copy_from_user_zeroing, @function | 68 | .type __copy_from_user_zeroing, @function |
67 | FEEDBACK_REENTER(__copy_from_user_inatomic) | 69 | FEEDBACK_REENTER(__copy_from_user_inatomic) |
68 | { movei r29, IS_COPY_FROM_USER_ZEROING; j memcpy_common } | 70 | { movei r29, IS_COPY_FROM_USER_ZEROING; j memcpy_common } |
69 | .size __copy_from_user_zeroing, . - __copy_from_user_zeroing | 71 | .size __copy_from_user_zeroing, . - __copy_from_user_zeroing |
70 | 72 | ||
@@ -74,13 +76,13 @@ ENTRY(__copy_from_user_zeroing) | |||
74 | */ | 76 | */ |
75 | ENTRY(__copy_to_user_inatomic) | 77 | ENTRY(__copy_to_user_inatomic) |
76 | .type __copy_to_user_inatomic, @function | 78 | .type __copy_to_user_inatomic, @function |
77 | FEEDBACK_REENTER(__copy_from_user_inatomic) | 79 | FEEDBACK_REENTER(__copy_from_user_inatomic) |
78 | { movei r29, IS_COPY_TO_USER; j memcpy_common } | 80 | { movei r29, IS_COPY_TO_USER; j memcpy_common } |
79 | .size __copy_to_user_inatomic, . - __copy_to_user_inatomic | 81 | .size __copy_to_user_inatomic, . - __copy_to_user_inatomic |
80 | 82 | ||
81 | ENTRY(memcpy) | 83 | ENTRY(memcpy) |
82 | .type memcpy, @function | 84 | .type memcpy, @function |
83 | FEEDBACK_REENTER(__copy_from_user_inatomic) | 85 | FEEDBACK_REENTER(__copy_from_user_inatomic) |
84 | { movei r29, IS_MEMCPY } | 86 | { movei r29, IS_MEMCPY } |
85 | .size memcpy, . - memcpy | 87 | .size memcpy, . - memcpy |
86 | /* Fall through */ | 88 | /* Fall through */ |
@@ -157,35 +159,35 @@ EX: { sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 } | |||
157 | { addi r3, r1, 60; andi r9, r9, -64 } | 159 | { addi r3, r1, 60; andi r9, r9, -64 } |
158 | 160 | ||
159 | #if CHIP_HAS_WH64() | 161 | #if CHIP_HAS_WH64() |
160 | /* No need to prefetch dst, we'll just do the wh64 | 162 | /* No need to prefetch dst, we'll just do the wh64 |
161 | * right before we copy a line. | 163 | * right before we copy a line. |
162 | */ | 164 | */ |
163 | #endif | 165 | #endif |
164 | 166 | ||
165 | EX: { lw r5, r3; addi r3, r3, 64; movei r4, 1 } | 167 | EX: { lw r5, r3; addi r3, r3, 64; movei r4, 1 } |
166 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ | 168 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ |
167 | { bnzt zero, .; move r27, lr } | 169 | { bnzt zero, .; move r27, lr } |
168 | EX: { lw r6, r3; addi r3, r3, 64 } | 170 | EX: { lw r6, r3; addi r3, r3, 64 } |
169 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ | 171 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ |
170 | { bnzt zero, . } | 172 | { bnzt zero, . } |
171 | EX: { lw r7, r3; addi r3, r3, 64 } | 173 | EX: { lw r7, r3; addi r3, r3, 64 } |
172 | #if !CHIP_HAS_WH64() | 174 | #if !CHIP_HAS_WH64() |
173 | /* Prefetch the dest */ | 175 | /* Prefetch the dest */ |
174 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ | 176 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ |
175 | { bnzt zero, . } | 177 | { bnzt zero, . } |
176 | /* Use a real load to cause a TLB miss if necessary. We aren't using | 178 | /* Use a real load to cause a TLB miss if necessary. We aren't using |
177 | * r28, so this should be fine. | 179 | * r28, so this should be fine. |
178 | */ | 180 | */ |
179 | EX: { lw r28, r9; addi r9, r9, 64 } | 181 | EX: { lw r28, r9; addi r9, r9, 64 } |
180 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ | 182 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ |
181 | { bnzt zero, . } | 183 | { bnzt zero, . } |
182 | { prefetch r9; addi r9, r9, 64 } | 184 | { prefetch r9; addi r9, r9, 64 } |
183 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ | 185 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ |
184 | { bnzt zero, . } | 186 | { bnzt zero, . } |
185 | { prefetch r9; addi r9, r9, 64 } | 187 | { prefetch r9; addi r9, r9, 64 } |
186 | #endif | 188 | #endif |
187 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ | 189 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ |
188 | { bz zero, .Lbig_loop2 } | 190 | { bz zero, .Lbig_loop2 } |
189 | 191 | ||
190 | /* On entry to this loop: | 192 | /* On entry to this loop: |
191 | * - r0 points to the start of dst line 0 | 193 | * - r0 points to the start of dst line 0 |
@@ -197,7 +199,7 @@ EX: { lw r28, r9; addi r9, r9, 64 } | |||
197 | * to some "safe" recently loaded address. | 199 | * to some "safe" recently loaded address. |
198 | * - r5 contains *(r1 + 60) [i.e. last word of source line 0] | 200 | * - r5 contains *(r1 + 60) [i.e. last word of source line 0] |
199 | * - r6 contains *(r1 + 64 + 60) [i.e. last word of source line 1] | 201 | * - r6 contains *(r1 + 64 + 60) [i.e. last word of source line 1] |
200 | * - r9 contains ((r0 + 63) & -64) | 202 | * - r9 contains ((r0 + 63) & -64) |
201 | * [start of next dst cache line.] | 203 | * [start of next dst cache line.] |
202 | */ | 204 | */ |
203 | 205 | ||
@@ -208,137 +210,137 @@ EX: { lw r28, r9; addi r9, r9, 64 } | |||
208 | /* Copy line 0, first stalling until r5 is ready. */ | 210 | /* Copy line 0, first stalling until r5 is ready. */ |
209 | EX: { move r12, r5; lw r16, r1 } | 211 | EX: { move r12, r5; lw r16, r1 } |
210 | { bz r4, .Lcopy_8_check; slti_u r8, r2, 8 } | 212 | { bz r4, .Lcopy_8_check; slti_u r8, r2, 8 } |
211 | /* Prefetch several lines ahead. */ | 213 | /* Prefetch several lines ahead. */ |
212 | EX: { lw r5, r3; addi r3, r3, 64 } | 214 | EX: { lw r5, r3; addi r3, r3, 64 } |
213 | { jal .Lcopy_line } | 215 | { jal .Lcopy_line } |
214 | 216 | ||
215 | /* Copy line 1, first stalling until r6 is ready. */ | 217 | /* Copy line 1, first stalling until r6 is ready. */ |
216 | EX: { move r12, r6; lw r16, r1 } | 218 | EX: { move r12, r6; lw r16, r1 } |
217 | { bz r4, .Lcopy_8_check; slti_u r8, r2, 8 } | 219 | { bz r4, .Lcopy_8_check; slti_u r8, r2, 8 } |
218 | /* Prefetch several lines ahead. */ | 220 | /* Prefetch several lines ahead. */ |
219 | EX: { lw r6, r3; addi r3, r3, 64 } | 221 | EX: { lw r6, r3; addi r3, r3, 64 } |
220 | { jal .Lcopy_line } | 222 | { jal .Lcopy_line } |
221 | 223 | ||
222 | /* Copy line 2, first stalling until r7 is ready. */ | 224 | /* Copy line 2, first stalling until r7 is ready. */ |
223 | EX: { move r12, r7; lw r16, r1 } | 225 | EX: { move r12, r7; lw r16, r1 } |
224 | { bz r4, .Lcopy_8_check; slti_u r8, r2, 8 } | 226 | { bz r4, .Lcopy_8_check; slti_u r8, r2, 8 } |
225 | /* Prefetch several lines ahead. */ | 227 | /* Prefetch several lines ahead. */ |
226 | EX: { lw r7, r3; addi r3, r3, 64 } | 228 | EX: { lw r7, r3; addi r3, r3, 64 } |
227 | /* Use up a caches-busy cycle by jumping back to the top of the | 229 | /* Use up a caches-busy cycle by jumping back to the top of the |
228 | * loop. Might as well get it out of the way now. | 230 | * loop. Might as well get it out of the way now. |
229 | */ | 231 | */ |
230 | { j .Lbig_loop } | 232 | { j .Lbig_loop } |
231 | 233 | ||
232 | 234 | ||
233 | /* On entry: | 235 | /* On entry: |
234 | * - r0 points to the destination line. | 236 | * - r0 points to the destination line. |
235 | * - r1 points to the source line. | 237 | * - r1 points to the source line. |
236 | * - r3 is the next prefetch address. | 238 | * - r3 is the next prefetch address. |
237 | * - r9 holds the last address used for wh64. | 239 | * - r9 holds the last address used for wh64. |
238 | * - r12 = WORD_15 | 240 | * - r12 = WORD_15 |
239 | * - r16 = WORD_0. | 241 | * - r16 = WORD_0. |
240 | * - r17 == r1 + 16. | 242 | * - r17 == r1 + 16. |
241 | * - r27 holds saved lr to restore. | 243 | * - r27 holds saved lr to restore. |
242 | * | 244 | * |
243 | * On exit: | 245 | * On exit: |
244 | * - r0 is incremented by 64. | 246 | * - r0 is incremented by 64. |
245 | * - r1 is incremented by 64, unless that would point to a word | 247 | * - r1 is incremented by 64, unless that would point to a word |
246 | * beyond the end of the source array, in which case it is redirected | 248 | * beyond the end of the source array, in which case it is redirected |
247 | * to point to an arbitrary word already in the cache. | 249 | * to point to an arbitrary word already in the cache. |
248 | * - r2 is decremented by 64. | 250 | * - r2 is decremented by 64. |
249 | * - r3 is unchanged, unless it points to a word beyond the | 251 | * - r3 is unchanged, unless it points to a word beyond the |
250 | * end of the source array, in which case it is redirected | 252 | * end of the source array, in which case it is redirected |
251 | * to point to an arbitrary word already in the cache. | 253 | * to point to an arbitrary word already in the cache. |
252 | * Redirecting is OK since if we are that close to the end | 254 | * Redirecting is OK since if we are that close to the end |
253 | * of the array we will not come back to this subroutine | 255 | * of the array we will not come back to this subroutine |
254 | * and use the contents of the prefetched address. | 256 | * and use the contents of the prefetched address. |
255 | * - r4 is nonzero iff r2 >= 64. | 257 | * - r4 is nonzero iff r2 >= 64. |
256 | * - r9 is incremented by 64, unless it points beyond the | 258 | * - r9 is incremented by 64, unless it points beyond the |
257 | * end of the last full destination cache line, in which | 259 | * end of the last full destination cache line, in which |
258 | * case it is redirected to a "safe address" that can be | 260 | * case it is redirected to a "safe address" that can be |
259 | * clobbered (sp - 64) | 261 | * clobbered (sp - 64) |
260 | * - lr contains the value in r27. | 262 | * - lr contains the value in r27. |
261 | */ | 263 | */ |
262 | 264 | ||
263 | /* r26 unused */ | 265 | /* r26 unused */ |
264 | 266 | ||
265 | .Lcopy_line: | 267 | .Lcopy_line: |
266 | /* TODO: when r3 goes past the end, we would like to redirect it | 268 | /* TODO: when r3 goes past the end, we would like to redirect it |
267 | * to prefetch the last partial cache line (if any) just once, for the | 269 | * to prefetch the last partial cache line (if any) just once, for the |
268 | * benefit of the final cleanup loop. But we don't want to | 270 | * benefit of the final cleanup loop. But we don't want to |
269 | * prefetch that line more than once, or subsequent prefetches | 271 | * prefetch that line more than once, or subsequent prefetches |
270 | * will go into the RTF. But then .Lbig_loop should unconditionally | 272 | * will go into the RTF. But then .Lbig_loop should unconditionally |
271 | * branch to top of loop to execute final prefetch, and its | 273 | * branch to top of loop to execute final prefetch, and its |
272 | * nop should become a conditional branch. | 274 | * nop should become a conditional branch. |
273 | */ | 275 | */ |
274 | 276 | ||
275 | /* We need two non-memory cycles here to cover the resources | 277 | /* We need two non-memory cycles here to cover the resources |
276 | * used by the loads initiated by the caller. | 278 | * used by the loads initiated by the caller. |
277 | */ | 279 | */ |
278 | { add r15, r1, r2 } | 280 | { add r15, r1, r2 } |
279 | .Lcopy_line2: | 281 | .Lcopy_line2: |
280 | { slt_u r13, r3, r15; addi r17, r1, 16 } | 282 | { slt_u r13, r3, r15; addi r17, r1, 16 } |
281 | 283 | ||
282 | /* NOTE: this will stall for one cycle as L1 is busy. */ | 284 | /* NOTE: this will stall for one cycle as L1 is busy. */ |
283 | 285 | ||
284 | /* Fill second L1D line. */ | 286 | /* Fill second L1D line. */ |
285 | EX: { lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */ | 287 | EX: { lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */ |
286 | 288 | ||
287 | #if CHIP_HAS_WH64() | 289 | #if CHIP_HAS_WH64() |
288 | /* Prepare destination line for writing. */ | 290 | /* Prepare destination line for writing. */ |
289 | EX: { wh64 r9; addi r9, r9, 64 } | 291 | EX: { wh64 r9; addi r9, r9, 64 } |
290 | #else | 292 | #else |
291 | /* Prefetch dest line */ | 293 | /* Prefetch dest line */ |
292 | { prefetch r9; addi r9, r9, 64 } | 294 | { prefetch r9; addi r9, r9, 64 } |
293 | #endif | 295 | #endif |
294 | /* Load seven words that are L1D hits to cover wh64 L2 usage. */ | 296 | /* Load seven words that are L1D hits to cover wh64 L2 usage. */ |
295 | 297 | ||
296 | /* Load the three remaining words from the last L1D line, which | 298 | /* Load the three remaining words from the last L1D line, which |
297 | * we know has already filled the L1D. | 299 | * we know has already filled the L1D. |
298 | */ | 300 | */ |
299 | EX: { lw r4, r1; addi r1, r1, 4; addi r20, r1, 16 } /* r4 = WORD_12 */ | 301 | EX: { lw r4, r1; addi r1, r1, 4; addi r20, r1, 16 } /* r4 = WORD_12 */ |
300 | EX: { lw r8, r1; addi r1, r1, 4; slt_u r13, r20, r15 }/* r8 = WORD_13 */ | 302 | EX: { lw r8, r1; addi r1, r1, 4; slt_u r13, r20, r15 }/* r8 = WORD_13 */ |
301 | EX: { lw r11, r1; addi r1, r1, -52; mvz r20, r13, r1 } /* r11 = WORD_14 */ | 303 | EX: { lw r11, r1; addi r1, r1, -52; mvz r20, r13, r1 } /* r11 = WORD_14 */ |
302 | 304 | ||
303 | /* Load the three remaining words from the first L1D line, first | 305 | /* Load the three remaining words from the first L1D line, first |
304 | * stalling until it has filled by "looking at" r16. | 306 | * stalling until it has filled by "looking at" r16. |
305 | */ | 307 | */ |
306 | EX: { lw r13, r1; addi r1, r1, 4; move zero, r16 } /* r13 = WORD_1 */ | 308 | EX: { lw r13, r1; addi r1, r1, 4; move zero, r16 } /* r13 = WORD_1 */ |
307 | EX: { lw r14, r1; addi r1, r1, 4 } /* r14 = WORD_2 */ | 309 | EX: { lw r14, r1; addi r1, r1, 4 } /* r14 = WORD_2 */ |
308 | EX: { lw r15, r1; addi r1, r1, 8; addi r10, r0, 60 } /* r15 = WORD_3 */ | 310 | EX: { lw r15, r1; addi r1, r1, 8; addi r10, r0, 60 } /* r15 = WORD_3 */ |
309 | 311 | ||
310 | /* Load second word from the second L1D line, first | 312 | /* Load second word from the second L1D line, first |
311 | * stalling until it has filled by "looking at" r17. | 313 | * stalling until it has filled by "looking at" r17. |
312 | */ | 314 | */ |
313 | EX: { lw r19, r1; addi r1, r1, 4; move zero, r17 } /* r19 = WORD_5 */ | 315 | EX: { lw r19, r1; addi r1, r1, 4; move zero, r17 } /* r19 = WORD_5 */ |
314 | 316 | ||
315 | /* Store last word to the destination line, potentially dirtying it | 317 | /* Store last word to the destination line, potentially dirtying it |
316 | * for the first time, which keeps the L2 busy for two cycles. | 318 | * for the first time, which keeps the L2 busy for two cycles. |
317 | */ | 319 | */ |
318 | EX: { sw r10, r12 } /* store(WORD_15) */ | 320 | EX: { sw r10, r12 } /* store(WORD_15) */ |
319 | 321 | ||
320 | /* Use two L1D hits to cover the sw L2 access above. */ | 322 | /* Use two L1D hits to cover the sw L2 access above. */ |
321 | EX: { lw r10, r1; addi r1, r1, 4 } /* r10 = WORD_6 */ | 323 | EX: { lw r10, r1; addi r1, r1, 4 } /* r10 = WORD_6 */ |
322 | EX: { lw r12, r1; addi r1, r1, 4 } /* r12 = WORD_7 */ | 324 | EX: { lw r12, r1; addi r1, r1, 4 } /* r12 = WORD_7 */ |
323 | 325 | ||
324 | /* Fill third L1D line. */ | 326 | /* Fill third L1D line. */ |
325 | EX: { lw r18, r1; addi r1, r1, 4 } /* r18 = WORD_8 */ | 327 | EX: { lw r18, r1; addi r1, r1, 4 } /* r18 = WORD_8 */ |
326 | 328 | ||
327 | /* Store first L1D line. */ | 329 | /* Store first L1D line. */ |
328 | EX: { sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */ | 330 | EX: { sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */ |
329 | EX: { sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */ | 331 | EX: { sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */ |
330 | EX: { sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */ | 332 | EX: { sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */ |
331 | #if CHIP_HAS_WH64() | 333 | #if CHIP_HAS_WH64() |
332 | EX: { sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */ | 334 | EX: { sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */ |
333 | #else | 335 | #else |
334 | /* Back up the r9 to a cache line we are already storing to | 336 | /* Back up the r9 to a cache line we are already storing to |
335 | * if it gets past the end of the dest vector. Strictly speaking, | 337 | * if it gets past the end of the dest vector. Strictly speaking, |
336 | * we don't need to back up to the start of a cache line, but it's free | 338 | * we don't need to back up to the start of a cache line, but it's free |
337 | * and tidy, so why not? | 339 | * and tidy, so why not? |
338 | */ | 340 | */ |
339 | EX: { sw r0, r15; addi r0, r0, 4; andi r13, r0, -64 } /* store(WORD_3) */ | 341 | EX: { sw r0, r15; addi r0, r0, 4; andi r13, r0, -64 } /* store(WORD_3) */ |
340 | #endif | 342 | #endif |
341 | /* Store second L1D line. */ | 343 | /* Store second L1D line. */ |
342 | EX: { sw r0, r17; addi r0, r0, 4; mvz r9, r16, r13 }/* store(WORD_4) */ | 344 | EX: { sw r0, r17; addi r0, r0, 4; mvz r9, r16, r13 }/* store(WORD_4) */ |
343 | EX: { sw r0, r19; addi r0, r0, 4 } /* store(WORD_5) */ | 345 | EX: { sw r0, r19; addi r0, r0, 4 } /* store(WORD_5) */ |
344 | EX: { sw r0, r10; addi r0, r0, 4 } /* store(WORD_6) */ | 346 | EX: { sw r0, r10; addi r0, r0, 4 } /* store(WORD_6) */ |
@@ -348,30 +350,30 @@ EX: { lw r13, r1; addi r1, r1, 4; move zero, r18 } /* r13 = WORD_9 */ | |||
348 | EX: { lw r14, r1; addi r1, r1, 4 } /* r14 = WORD_10 */ | 350 | EX: { lw r14, r1; addi r1, r1, 4 } /* r14 = WORD_10 */ |
349 | EX: { lw r15, r1; move r1, r20 } /* r15 = WORD_11 */ | 351 | EX: { lw r15, r1; move r1, r20 } /* r15 = WORD_11 */ |
350 | 352 | ||
351 | /* Store third L1D line. */ | 353 | /* Store third L1D line. */ |
352 | EX: { sw r0, r18; addi r0, r0, 4 } /* store(WORD_8) */ | 354 | EX: { sw r0, r18; addi r0, r0, 4 } /* store(WORD_8) */ |
353 | EX: { sw r0, r13; addi r0, r0, 4 } /* store(WORD_9) */ | 355 | EX: { sw r0, r13; addi r0, r0, 4 } /* store(WORD_9) */ |
354 | EX: { sw r0, r14; addi r0, r0, 4 } /* store(WORD_10) */ | 356 | EX: { sw r0, r14; addi r0, r0, 4 } /* store(WORD_10) */ |
355 | EX: { sw r0, r15; addi r0, r0, 4 } /* store(WORD_11) */ | 357 | EX: { sw r0, r15; addi r0, r0, 4 } /* store(WORD_11) */ |
356 | 358 | ||
357 | /* Store rest of fourth L1D line. */ | 359 | /* Store rest of fourth L1D line. */ |
358 | EX: { sw r0, r4; addi r0, r0, 4 } /* store(WORD_12) */ | 360 | EX: { sw r0, r4; addi r0, r0, 4 } /* store(WORD_12) */ |
359 | { | 361 | { |
360 | EX: sw r0, r8 /* store(WORD_13) */ | 362 | EX: sw r0, r8 /* store(WORD_13) */ |
361 | addi r0, r0, 4 | 363 | addi r0, r0, 4 |
362 | /* Will r2 be > 64 after we subtract 64 below? */ | 364 | /* Will r2 be > 64 after we subtract 64 below? */ |
363 | shri r4, r2, 7 | 365 | shri r4, r2, 7 |
364 | } | 366 | } |
365 | { | 367 | { |
366 | EX: sw r0, r11 /* store(WORD_14) */ | 368 | EX: sw r0, r11 /* store(WORD_14) */ |
367 | addi r0, r0, 8 | 369 | addi r0, r0, 8 |
368 | /* Record 64 bytes successfully copied. */ | 370 | /* Record 64 bytes successfully copied. */ |
369 | addi r2, r2, -64 | 371 | addi r2, r2, -64 |
370 | } | 372 | } |
371 | 373 | ||
372 | { jrp lr; move lr, r27 } | 374 | { jrp lr; move lr, r27 } |
373 | 375 | ||
374 | /* Convey to the backtrace library that the stack frame is size | 376 | /* Convey to the backtrace library that the stack frame is size |
375 | * zero, and the real return address is on the stack rather than | 377 | * zero, and the real return address is on the stack rather than |
376 | * in 'lr'. | 378 | * in 'lr'. |
377 | */ | 379 | */ |
diff --git a/arch/tile/lib/memmove_32.c b/arch/tile/lib/memmove.c index fd615ae6ade7..fd615ae6ade7 100644 --- a/arch/tile/lib/memmove_32.c +++ b/arch/tile/lib/memmove.c | |||
diff --git a/arch/tile/lib/memset_32.c b/arch/tile/lib/memset_32.c index d014c1fbcbc2..57dbb3a5bff8 100644 --- a/arch/tile/lib/memset_32.c +++ b/arch/tile/lib/memset_32.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/string.h> | 18 | #include <linux/string.h> |
19 | #include <linux/module.h> | 19 | #include <linux/module.h> |
20 | 20 | ||
21 | #undef memset | ||
21 | 22 | ||
22 | void *memset(void *s, int c, size_t n) | 23 | void *memset(void *s, int c, size_t n) |
23 | { | 24 | { |
diff --git a/arch/tile/lib/strlen_32.c b/arch/tile/lib/strlen_32.c index f26f88e11e4a..4974292a5534 100644 --- a/arch/tile/lib/strlen_32.c +++ b/arch/tile/lib/strlen_32.c | |||
@@ -16,6 +16,8 @@ | |||
16 | #include <linux/string.h> | 16 | #include <linux/string.h> |
17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
18 | 18 | ||
19 | #undef strlen | ||
20 | |||
19 | size_t strlen(const char *s) | 21 | size_t strlen(const char *s) |
20 | { | 22 | { |
21 | /* Get an aligned pointer. */ | 23 | /* Get an aligned pointer. */ |