diff options
author | Chris Metcalf <cmetcalf@tilera.com> | 2010-10-14 16:39:42 -0400 |
---|---|---|
committer | Chris Metcalf <cmetcalf@tilera.com> | 2010-10-15 15:38:54 -0400 |
commit | 29507663dfa2590647a1ef66f3652a0cac033eca (patch) | |
tree | 80509f81ce8e6b6846f78e62cb0cbf1ac03321be /arch/tile/lib | |
parent | 233325b94999d4bb8df227bb39904a57509e4995 (diff) |
arch/tile: minor whitespace/naming changes for string support files
Our internal process shares memcpy, memset, etc., with libc, and
we did some minor tweaking as part of moving from uclibc to glibc,
which is now reflected in the kernel versions of these files.
There are no semantic changes in this commit, just whitespace
(memcpy_32.S now properly uses tabs), naming (memmove.c instead
of memmove_32.c, since TILE-Gx shares the file with TILEPro),
and a couple of other minor tweaks.
Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Diffstat (limited to 'arch/tile/lib')
-rw-r--r-- | arch/tile/lib/Makefile | 4 | ||||
-rw-r--r-- | arch/tile/lib/memcpy_32.S | 206 | ||||
-rw-r--r-- | arch/tile/lib/memmove.c (renamed from arch/tile/lib/memmove_32.c) | 0 | ||||
-rw-r--r-- | arch/tile/lib/memset_32.c | 1 | ||||
-rw-r--r-- | arch/tile/lib/strlen_32.c | 2 |
5 files changed, 109 insertions, 104 deletions
diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile index 746dc81ed3c4..93122d5b1558 100644 --- a/arch/tile/lib/Makefile +++ b/arch/tile/lib/Makefile | |||
@@ -3,8 +3,8 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | lib-y = cacheflush.o checksum.o cpumask.o delay.o \ | 5 | lib-y = cacheflush.o checksum.o cpumask.o delay.o \ |
6 | mb_incoherent.o uaccess.o \ | 6 | mb_incoherent.o uaccess.o memmove.o \ |
7 | memcpy_$(BITS).o memchr_$(BITS).o memmove_$(BITS).o memset_$(BITS).o \ | 7 | memcpy_$(BITS).o memchr_$(BITS).o memset_$(BITS).o \ |
8 | strchr_$(BITS).o strlen_$(BITS).o | 8 | strchr_$(BITS).o strlen_$(BITS).o |
9 | 9 | ||
10 | ifeq ($(CONFIG_TILEGX),y) | 10 | ifeq ($(CONFIG_TILEGX),y) |
diff --git a/arch/tile/lib/memcpy_32.S b/arch/tile/lib/memcpy_32.S index 30c3b7ebb55d..2a419a6122db 100644 --- a/arch/tile/lib/memcpy_32.S +++ b/arch/tile/lib/memcpy_32.S | |||
@@ -10,14 +10,16 @@ | |||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | 10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or |
11 | * NON INFRINGEMENT. See the GNU General Public License for | 11 | * NON INFRINGEMENT. See the GNU General Public License for |
12 | * more details. | 12 | * more details. |
13 | * | ||
14 | * This file shares the implementation of the userspace memcpy and | ||
15 | * the kernel's memcpy, copy_to_user and copy_from_user. | ||
16 | */ | 13 | */ |
17 | 14 | ||
18 | #include <arch/chip.h> | 15 | #include <arch/chip.h> |
19 | 16 | ||
20 | 17 | ||
18 | /* | ||
19 | * This file shares the implementation of the userspace memcpy and | ||
20 | * the kernel's memcpy, copy_to_user and copy_from_user. | ||
21 | */ | ||
22 | |||
21 | #include <linux/linkage.h> | 23 | #include <linux/linkage.h> |
22 | 24 | ||
23 | /* On TILE64, we wrap these functions via arch/tile/lib/memcpy_tile64.c */ | 25 | /* On TILE64, we wrap these functions via arch/tile/lib/memcpy_tile64.c */ |
@@ -53,9 +55,9 @@ | |||
53 | */ | 55 | */ |
54 | ENTRY(__copy_from_user_inatomic) | 56 | ENTRY(__copy_from_user_inatomic) |
55 | .type __copy_from_user_inatomic, @function | 57 | .type __copy_from_user_inatomic, @function |
56 | FEEDBACK_ENTER_EXPLICIT(__copy_from_user_inatomic, \ | 58 | FEEDBACK_ENTER_EXPLICIT(__copy_from_user_inatomic, \ |
57 | .text.memcpy_common, \ | 59 | .text.memcpy_common, \ |
58 | .Lend_memcpy_common - __copy_from_user_inatomic) | 60 | .Lend_memcpy_common - __copy_from_user_inatomic) |
59 | { movei r29, IS_COPY_FROM_USER; j memcpy_common } | 61 | { movei r29, IS_COPY_FROM_USER; j memcpy_common } |
60 | .size __copy_from_user_inatomic, . - __copy_from_user_inatomic | 62 | .size __copy_from_user_inatomic, . - __copy_from_user_inatomic |
61 | 63 | ||
@@ -64,7 +66,7 @@ ENTRY(__copy_from_user_inatomic) | |||
64 | */ | 66 | */ |
65 | ENTRY(__copy_from_user_zeroing) | 67 | ENTRY(__copy_from_user_zeroing) |
66 | .type __copy_from_user_zeroing, @function | 68 | .type __copy_from_user_zeroing, @function |
67 | FEEDBACK_REENTER(__copy_from_user_inatomic) | 69 | FEEDBACK_REENTER(__copy_from_user_inatomic) |
68 | { movei r29, IS_COPY_FROM_USER_ZEROING; j memcpy_common } | 70 | { movei r29, IS_COPY_FROM_USER_ZEROING; j memcpy_common } |
69 | .size __copy_from_user_zeroing, . - __copy_from_user_zeroing | 71 | .size __copy_from_user_zeroing, . - __copy_from_user_zeroing |
70 | 72 | ||
@@ -74,13 +76,13 @@ ENTRY(__copy_from_user_zeroing) | |||
74 | */ | 76 | */ |
75 | ENTRY(__copy_to_user_inatomic) | 77 | ENTRY(__copy_to_user_inatomic) |
76 | .type __copy_to_user_inatomic, @function | 78 | .type __copy_to_user_inatomic, @function |
77 | FEEDBACK_REENTER(__copy_from_user_inatomic) | 79 | FEEDBACK_REENTER(__copy_from_user_inatomic) |
78 | { movei r29, IS_COPY_TO_USER; j memcpy_common } | 80 | { movei r29, IS_COPY_TO_USER; j memcpy_common } |
79 | .size __copy_to_user_inatomic, . - __copy_to_user_inatomic | 81 | .size __copy_to_user_inatomic, . - __copy_to_user_inatomic |
80 | 82 | ||
81 | ENTRY(memcpy) | 83 | ENTRY(memcpy) |
82 | .type memcpy, @function | 84 | .type memcpy, @function |
83 | FEEDBACK_REENTER(__copy_from_user_inatomic) | 85 | FEEDBACK_REENTER(__copy_from_user_inatomic) |
84 | { movei r29, IS_MEMCPY } | 86 | { movei r29, IS_MEMCPY } |
85 | .size memcpy, . - memcpy | 87 | .size memcpy, . - memcpy |
86 | /* Fall through */ | 88 | /* Fall through */ |
@@ -157,35 +159,35 @@ EX: { sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 } | |||
157 | { addi r3, r1, 60; andi r9, r9, -64 } | 159 | { addi r3, r1, 60; andi r9, r9, -64 } |
158 | 160 | ||
159 | #if CHIP_HAS_WH64() | 161 | #if CHIP_HAS_WH64() |
160 | /* No need to prefetch dst, we'll just do the wh64 | 162 | /* No need to prefetch dst, we'll just do the wh64 |
161 | * right before we copy a line. | 163 | * right before we copy a line. |
162 | */ | 164 | */ |
163 | #endif | 165 | #endif |
164 | 166 | ||
165 | EX: { lw r5, r3; addi r3, r3, 64; movei r4, 1 } | 167 | EX: { lw r5, r3; addi r3, r3, 64; movei r4, 1 } |
166 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ | 168 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ |
167 | { bnzt zero, .; move r27, lr } | 169 | { bnzt zero, .; move r27, lr } |
168 | EX: { lw r6, r3; addi r3, r3, 64 } | 170 | EX: { lw r6, r3; addi r3, r3, 64 } |
169 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ | 171 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ |
170 | { bnzt zero, . } | 172 | { bnzt zero, . } |
171 | EX: { lw r7, r3; addi r3, r3, 64 } | 173 | EX: { lw r7, r3; addi r3, r3, 64 } |
172 | #if !CHIP_HAS_WH64() | 174 | #if !CHIP_HAS_WH64() |
173 | /* Prefetch the dest */ | 175 | /* Prefetch the dest */ |
174 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ | 176 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ |
175 | { bnzt zero, . } | 177 | { bnzt zero, . } |
176 | /* Use a real load to cause a TLB miss if necessary. We aren't using | 178 | /* Use a real load to cause a TLB miss if necessary. We aren't using |
177 | * r28, so this should be fine. | 179 | * r28, so this should be fine. |
178 | */ | 180 | */ |
179 | EX: { lw r28, r9; addi r9, r9, 64 } | 181 | EX: { lw r28, r9; addi r9, r9, 64 } |
180 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ | 182 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ |
181 | { bnzt zero, . } | 183 | { bnzt zero, . } |
182 | { prefetch r9; addi r9, r9, 64 } | 184 | { prefetch r9; addi r9, r9, 64 } |
183 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ | 185 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ |
184 | { bnzt zero, . } | 186 | { bnzt zero, . } |
185 | { prefetch r9; addi r9, r9, 64 } | 187 | { prefetch r9; addi r9, r9, 64 } |
186 | #endif | 188 | #endif |
187 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ | 189 | /* Intentionally stall for a few cycles to leave L2 cache alone. */ |
188 | { bz zero, .Lbig_loop2 } | 190 | { bz zero, .Lbig_loop2 } |
189 | 191 | ||
190 | /* On entry to this loop: | 192 | /* On entry to this loop: |
191 | * - r0 points to the start of dst line 0 | 193 | * - r0 points to the start of dst line 0 |
@@ -197,7 +199,7 @@ EX: { lw r28, r9; addi r9, r9, 64 } | |||
197 | * to some "safe" recently loaded address. | 199 | * to some "safe" recently loaded address. |
198 | * - r5 contains *(r1 + 60) [i.e. last word of source line 0] | 200 | * - r5 contains *(r1 + 60) [i.e. last word of source line 0] |
199 | * - r6 contains *(r1 + 64 + 60) [i.e. last word of source line 1] | 201 | * - r6 contains *(r1 + 64 + 60) [i.e. last word of source line 1] |
200 | * - r9 contains ((r0 + 63) & -64) | 202 | * - r9 contains ((r0 + 63) & -64) |
201 | * [start of next dst cache line.] | 203 | * [start of next dst cache line.] |
202 | */ | 204 | */ |
203 | 205 | ||
@@ -208,137 +210,137 @@ EX: { lw r28, r9; addi r9, r9, 64 } | |||
208 | /* Copy line 0, first stalling until r5 is ready. */ | 210 | /* Copy line 0, first stalling until r5 is ready. */ |
209 | EX: { move r12, r5; lw r16, r1 } | 211 | EX: { move r12, r5; lw r16, r1 } |
210 | { bz r4, .Lcopy_8_check; slti_u r8, r2, 8 } | 212 | { bz r4, .Lcopy_8_check; slti_u r8, r2, 8 } |
211 | /* Prefetch several lines ahead. */ | 213 | /* Prefetch several lines ahead. */ |
212 | EX: { lw r5, r3; addi r3, r3, 64 } | 214 | EX: { lw r5, r3; addi r3, r3, 64 } |
213 | { jal .Lcopy_line } | 215 | { jal .Lcopy_line } |
214 | 216 | ||
215 | /* Copy line 1, first stalling until r6 is ready. */ | 217 | /* Copy line 1, first stalling until r6 is ready. */ |
216 | EX: { move r12, r6; lw r16, r1 } | 218 | EX: { move r12, r6; lw r16, r1 } |
217 | { bz r4, .Lcopy_8_check; slti_u r8, r2, 8 } | 219 | { bz r4, .Lcopy_8_check; slti_u r8, r2, 8 } |
218 | /* Prefetch several lines ahead. */ | 220 | /* Prefetch several lines ahead. */ |
219 | EX: { lw r6, r3; addi r3, r3, 64 } | 221 | EX: { lw r6, r3; addi r3, r3, 64 } |
220 | { jal .Lcopy_line } | 222 | { jal .Lcopy_line } |
221 | 223 | ||
222 | /* Copy line 2, first stalling until r7 is ready. */ | 224 | /* Copy line 2, first stalling until r7 is ready. */ |
223 | EX: { move r12, r7; lw r16, r1 } | 225 | EX: { move r12, r7; lw r16, r1 } |
224 | { bz r4, .Lcopy_8_check; slti_u r8, r2, 8 } | 226 | { bz r4, .Lcopy_8_check; slti_u r8, r2, 8 } |
225 | /* Prefetch several lines ahead. */ | 227 | /* Prefetch several lines ahead. */ |
226 | EX: { lw r7, r3; addi r3, r3, 64 } | 228 | EX: { lw r7, r3; addi r3, r3, 64 } |
227 | /* Use up a caches-busy cycle by jumping back to the top of the | 229 | /* Use up a caches-busy cycle by jumping back to the top of the |
228 | * loop. Might as well get it out of the way now. | 230 | * loop. Might as well get it out of the way now. |
229 | */ | 231 | */ |
230 | { j .Lbig_loop } | 232 | { j .Lbig_loop } |
231 | 233 | ||
232 | 234 | ||
233 | /* On entry: | 235 | /* On entry: |
234 | * - r0 points to the destination line. | 236 | * - r0 points to the destination line. |
235 | * - r1 points to the source line. | 237 | * - r1 points to the source line. |
236 | * - r3 is the next prefetch address. | 238 | * - r3 is the next prefetch address. |
237 | * - r9 holds the last address used for wh64. | 239 | * - r9 holds the last address used for wh64. |
238 | * - r12 = WORD_15 | 240 | * - r12 = WORD_15 |
239 | * - r16 = WORD_0. | 241 | * - r16 = WORD_0. |
240 | * - r17 == r1 + 16. | 242 | * - r17 == r1 + 16. |
241 | * - r27 holds saved lr to restore. | 243 | * - r27 holds saved lr to restore. |
242 | * | 244 | * |
243 | * On exit: | 245 | * On exit: |
244 | * - r0 is incremented by 64. | 246 | * - r0 is incremented by 64. |
245 | * - r1 is incremented by 64, unless that would point to a word | 247 | * - r1 is incremented by 64, unless that would point to a word |
246 | * beyond the end of the source array, in which case it is redirected | 248 | * beyond the end of the source array, in which case it is redirected |
247 | * to point to an arbitrary word already in the cache. | 249 | * to point to an arbitrary word already in the cache. |
248 | * - r2 is decremented by 64. | 250 | * - r2 is decremented by 64. |
249 | * - r3 is unchanged, unless it points to a word beyond the | 251 | * - r3 is unchanged, unless it points to a word beyond the |
250 | * end of the source array, in which case it is redirected | 252 | * end of the source array, in which case it is redirected |
251 | * to point to an arbitrary word already in the cache. | 253 | * to point to an arbitrary word already in the cache. |
252 | * Redirecting is OK since if we are that close to the end | 254 | * Redirecting is OK since if we are that close to the end |
253 | * of the array we will not come back to this subroutine | 255 | * of the array we will not come back to this subroutine |
254 | * and use the contents of the prefetched address. | 256 | * and use the contents of the prefetched address. |
255 | * - r4 is nonzero iff r2 >= 64. | 257 | * - r4 is nonzero iff r2 >= 64. |
256 | * - r9 is incremented by 64, unless it points beyond the | 258 | * - r9 is incremented by 64, unless it points beyond the |
257 | * end of the last full destination cache line, in which | 259 | * end of the last full destination cache line, in which |
258 | * case it is redirected to a "safe address" that can be | 260 | * case it is redirected to a "safe address" that can be |
259 | * clobbered (sp - 64) | 261 | * clobbered (sp - 64) |
260 | * - lr contains the value in r27. | 262 | * - lr contains the value in r27. |
261 | */ | 263 | */ |
262 | 264 | ||
263 | /* r26 unused */ | 265 | /* r26 unused */ |
264 | 266 | ||
265 | .Lcopy_line: | 267 | .Lcopy_line: |
266 | /* TODO: when r3 goes past the end, we would like to redirect it | 268 | /* TODO: when r3 goes past the end, we would like to redirect it |
267 | * to prefetch the last partial cache line (if any) just once, for the | 269 | * to prefetch the last partial cache line (if any) just once, for the |
268 | * benefit of the final cleanup loop. But we don't want to | 270 | * benefit of the final cleanup loop. But we don't want to |
269 | * prefetch that line more than once, or subsequent prefetches | 271 | * prefetch that line more than once, or subsequent prefetches |
270 | * will go into the RTF. But then .Lbig_loop should unconditionally | 272 | * will go into the RTF. But then .Lbig_loop should unconditionally |
271 | * branch to top of loop to execute final prefetch, and its | 273 | * branch to top of loop to execute final prefetch, and its |
272 | * nop should become a conditional branch. | 274 | * nop should become a conditional branch. |
273 | */ | 275 | */ |
274 | 276 | ||
275 | /* We need two non-memory cycles here to cover the resources | 277 | /* We need two non-memory cycles here to cover the resources |
276 | * used by the loads initiated by the caller. | 278 | * used by the loads initiated by the caller. |
277 | */ | 279 | */ |
278 | { add r15, r1, r2 } | 280 | { add r15, r1, r2 } |
279 | .Lcopy_line2: | 281 | .Lcopy_line2: |
280 | { slt_u r13, r3, r15; addi r17, r1, 16 } | 282 | { slt_u r13, r3, r15; addi r17, r1, 16 } |
281 | 283 | ||
282 | /* NOTE: this will stall for one cycle as L1 is busy. */ | 284 | /* NOTE: this will stall for one cycle as L1 is busy. */ |
283 | 285 | ||
284 | /* Fill second L1D line. */ | 286 | /* Fill second L1D line. */ |
285 | EX: { lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */ | 287 | EX: { lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */ |
286 | 288 | ||
287 | #if CHIP_HAS_WH64() | 289 | #if CHIP_HAS_WH64() |
288 | /* Prepare destination line for writing. */ | 290 | /* Prepare destination line for writing. */ |
289 | EX: { wh64 r9; addi r9, r9, 64 } | 291 | EX: { wh64 r9; addi r9, r9, 64 } |
290 | #else | 292 | #else |
291 | /* Prefetch dest line */ | 293 | /* Prefetch dest line */ |
292 | { prefetch r9; addi r9, r9, 64 } | 294 | { prefetch r9; addi r9, r9, 64 } |
293 | #endif | 295 | #endif |
294 | /* Load seven words that are L1D hits to cover wh64 L2 usage. */ | 296 | /* Load seven words that are L1D hits to cover wh64 L2 usage. */ |
295 | 297 | ||
296 | /* Load the three remaining words from the last L1D line, which | 298 | /* Load the three remaining words from the last L1D line, which |
297 | * we know has already filled the L1D. | 299 | * we know has already filled the L1D. |
298 | */ | 300 | */ |
299 | EX: { lw r4, r1; addi r1, r1, 4; addi r20, r1, 16 } /* r4 = WORD_12 */ | 301 | EX: { lw r4, r1; addi r1, r1, 4; addi r20, r1, 16 } /* r4 = WORD_12 */ |
300 | EX: { lw r8, r1; addi r1, r1, 4; slt_u r13, r20, r15 }/* r8 = WORD_13 */ | 302 | EX: { lw r8, r1; addi r1, r1, 4; slt_u r13, r20, r15 }/* r8 = WORD_13 */ |
301 | EX: { lw r11, r1; addi r1, r1, -52; mvz r20, r13, r1 } /* r11 = WORD_14 */ | 303 | EX: { lw r11, r1; addi r1, r1, -52; mvz r20, r13, r1 } /* r11 = WORD_14 */ |
302 | 304 | ||
303 | /* Load the three remaining words from the first L1D line, first | 305 | /* Load the three remaining words from the first L1D line, first |
304 | * stalling until it has filled by "looking at" r16. | 306 | * stalling until it has filled by "looking at" r16. |
305 | */ | 307 | */ |
306 | EX: { lw r13, r1; addi r1, r1, 4; move zero, r16 } /* r13 = WORD_1 */ | 308 | EX: { lw r13, r1; addi r1, r1, 4; move zero, r16 } /* r13 = WORD_1 */ |
307 | EX: { lw r14, r1; addi r1, r1, 4 } /* r14 = WORD_2 */ | 309 | EX: { lw r14, r1; addi r1, r1, 4 } /* r14 = WORD_2 */ |
308 | EX: { lw r15, r1; addi r1, r1, 8; addi r10, r0, 60 } /* r15 = WORD_3 */ | 310 | EX: { lw r15, r1; addi r1, r1, 8; addi r10, r0, 60 } /* r15 = WORD_3 */ |
309 | 311 | ||
310 | /* Load second word from the second L1D line, first | 312 | /* Load second word from the second L1D line, first |
311 | * stalling until it has filled by "looking at" r17. | 313 | * stalling until it has filled by "looking at" r17. |
312 | */ | 314 | */ |
313 | EX: { lw r19, r1; addi r1, r1, 4; move zero, r17 } /* r19 = WORD_5 */ | 315 | EX: { lw r19, r1; addi r1, r1, 4; move zero, r17 } /* r19 = WORD_5 */ |
314 | 316 | ||
315 | /* Store last word to the destination line, potentially dirtying it | 317 | /* Store last word to the destination line, potentially dirtying it |
316 | * for the first time, which keeps the L2 busy for two cycles. | 318 | * for the first time, which keeps the L2 busy for two cycles. |
317 | */ | 319 | */ |
318 | EX: { sw r10, r12 } /* store(WORD_15) */ | 320 | EX: { sw r10, r12 } /* store(WORD_15) */ |
319 | 321 | ||
320 | /* Use two L1D hits to cover the sw L2 access above. */ | 322 | /* Use two L1D hits to cover the sw L2 access above. */ |
321 | EX: { lw r10, r1; addi r1, r1, 4 } /* r10 = WORD_6 */ | 323 | EX: { lw r10, r1; addi r1, r1, 4 } /* r10 = WORD_6 */ |
322 | EX: { lw r12, r1; addi r1, r1, 4 } /* r12 = WORD_7 */ | 324 | EX: { lw r12, r1; addi r1, r1, 4 } /* r12 = WORD_7 */ |
323 | 325 | ||
324 | /* Fill third L1D line. */ | 326 | /* Fill third L1D line. */ |
325 | EX: { lw r18, r1; addi r1, r1, 4 } /* r18 = WORD_8 */ | 327 | EX: { lw r18, r1; addi r1, r1, 4 } /* r18 = WORD_8 */ |
326 | 328 | ||
327 | /* Store first L1D line. */ | 329 | /* Store first L1D line. */ |
328 | EX: { sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */ | 330 | EX: { sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */ |
329 | EX: { sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */ | 331 | EX: { sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */ |
330 | EX: { sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */ | 332 | EX: { sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */ |
331 | #if CHIP_HAS_WH64() | 333 | #if CHIP_HAS_WH64() |
332 | EX: { sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */ | 334 | EX: { sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */ |
333 | #else | 335 | #else |
334 | /* Back up the r9 to a cache line we are already storing to | 336 | /* Back up the r9 to a cache line we are already storing to |
335 | * if it gets past the end of the dest vector. Strictly speaking, | 337 | * if it gets past the end of the dest vector. Strictly speaking, |
336 | * we don't need to back up to the start of a cache line, but it's free | 338 | * we don't need to back up to the start of a cache line, but it's free |
337 | * and tidy, so why not? | 339 | * and tidy, so why not? |
338 | */ | 340 | */ |
339 | EX: { sw r0, r15; addi r0, r0, 4; andi r13, r0, -64 } /* store(WORD_3) */ | 341 | EX: { sw r0, r15; addi r0, r0, 4; andi r13, r0, -64 } /* store(WORD_3) */ |
340 | #endif | 342 | #endif |
341 | /* Store second L1D line. */ | 343 | /* Store second L1D line. */ |
342 | EX: { sw r0, r17; addi r0, r0, 4; mvz r9, r16, r13 }/* store(WORD_4) */ | 344 | EX: { sw r0, r17; addi r0, r0, 4; mvz r9, r16, r13 }/* store(WORD_4) */ |
343 | EX: { sw r0, r19; addi r0, r0, 4 } /* store(WORD_5) */ | 345 | EX: { sw r0, r19; addi r0, r0, 4 } /* store(WORD_5) */ |
344 | EX: { sw r0, r10; addi r0, r0, 4 } /* store(WORD_6) */ | 346 | EX: { sw r0, r10; addi r0, r0, 4 } /* store(WORD_6) */ |
@@ -348,30 +350,30 @@ EX: { lw r13, r1; addi r1, r1, 4; move zero, r18 } /* r13 = WORD_9 */ | |||
348 | EX: { lw r14, r1; addi r1, r1, 4 } /* r14 = WORD_10 */ | 350 | EX: { lw r14, r1; addi r1, r1, 4 } /* r14 = WORD_10 */ |
349 | EX: { lw r15, r1; move r1, r20 } /* r15 = WORD_11 */ | 351 | EX: { lw r15, r1; move r1, r20 } /* r15 = WORD_11 */ |
350 | 352 | ||
351 | /* Store third L1D line. */ | 353 | /* Store third L1D line. */ |
352 | EX: { sw r0, r18; addi r0, r0, 4 } /* store(WORD_8) */ | 354 | EX: { sw r0, r18; addi r0, r0, 4 } /* store(WORD_8) */ |
353 | EX: { sw r0, r13; addi r0, r0, 4 } /* store(WORD_9) */ | 355 | EX: { sw r0, r13; addi r0, r0, 4 } /* store(WORD_9) */ |
354 | EX: { sw r0, r14; addi r0, r0, 4 } /* store(WORD_10) */ | 356 | EX: { sw r0, r14; addi r0, r0, 4 } /* store(WORD_10) */ |
355 | EX: { sw r0, r15; addi r0, r0, 4 } /* store(WORD_11) */ | 357 | EX: { sw r0, r15; addi r0, r0, 4 } /* store(WORD_11) */ |
356 | 358 | ||
357 | /* Store rest of fourth L1D line. */ | 359 | /* Store rest of fourth L1D line. */ |
358 | EX: { sw r0, r4; addi r0, r0, 4 } /* store(WORD_12) */ | 360 | EX: { sw r0, r4; addi r0, r0, 4 } /* store(WORD_12) */ |
359 | { | 361 | { |
360 | EX: sw r0, r8 /* store(WORD_13) */ | 362 | EX: sw r0, r8 /* store(WORD_13) */ |
361 | addi r0, r0, 4 | 363 | addi r0, r0, 4 |
362 | /* Will r2 be > 64 after we subtract 64 below? */ | 364 | /* Will r2 be > 64 after we subtract 64 below? */ |
363 | shri r4, r2, 7 | 365 | shri r4, r2, 7 |
364 | } | 366 | } |
365 | { | 367 | { |
366 | EX: sw r0, r11 /* store(WORD_14) */ | 368 | EX: sw r0, r11 /* store(WORD_14) */ |
367 | addi r0, r0, 8 | 369 | addi r0, r0, 8 |
368 | /* Record 64 bytes successfully copied. */ | 370 | /* Record 64 bytes successfully copied. */ |
369 | addi r2, r2, -64 | 371 | addi r2, r2, -64 |
370 | } | 372 | } |
371 | 373 | ||
372 | { jrp lr; move lr, r27 } | 374 | { jrp lr; move lr, r27 } |
373 | 375 | ||
374 | /* Convey to the backtrace library that the stack frame is size | 376 | /* Convey to the backtrace library that the stack frame is size |
375 | * zero, and the real return address is on the stack rather than | 377 | * zero, and the real return address is on the stack rather than |
376 | * in 'lr'. | 378 | * in 'lr'. |
377 | */ | 379 | */ |
diff --git a/arch/tile/lib/memmove_32.c b/arch/tile/lib/memmove.c index fd615ae6ade7..fd615ae6ade7 100644 --- a/arch/tile/lib/memmove_32.c +++ b/arch/tile/lib/memmove.c | |||
diff --git a/arch/tile/lib/memset_32.c b/arch/tile/lib/memset_32.c index d014c1fbcbc2..57dbb3a5bff8 100644 --- a/arch/tile/lib/memset_32.c +++ b/arch/tile/lib/memset_32.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/string.h> | 18 | #include <linux/string.h> |
19 | #include <linux/module.h> | 19 | #include <linux/module.h> |
20 | 20 | ||
21 | #undef memset | ||
21 | 22 | ||
22 | void *memset(void *s, int c, size_t n) | 23 | void *memset(void *s, int c, size_t n) |
23 | { | 24 | { |
diff --git a/arch/tile/lib/strlen_32.c b/arch/tile/lib/strlen_32.c index f26f88e11e4a..4974292a5534 100644 --- a/arch/tile/lib/strlen_32.c +++ b/arch/tile/lib/strlen_32.c | |||
@@ -16,6 +16,8 @@ | |||
16 | #include <linux/string.h> | 16 | #include <linux/string.h> |
17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
18 | 18 | ||
19 | #undef strlen | ||
20 | |||
19 | size_t strlen(const char *s) | 21 | size_t strlen(const char *s) |
20 | { | 22 | { |
21 | /* Get an aligned pointer. */ | 23 | /* Get an aligned pointer. */ |