aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sparc64/lib/U1memcpy.S
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2005-06-27 18:42:04 -0400
committerDavid S. Miller <davem@davemloft.net>2005-06-27 18:42:04 -0400
commitb445e26cbf784cdba10f2b6c3e2cd3ee7bab360a (patch)
tree8c8c377a7e5cbf608d730ec45e091e4f2b826a82 /arch/sparc64/lib/U1memcpy.S
parent020f46a39eb7b99a575b9f4d105fce2b142acdf1 (diff)
[SPARC64]: Avoid membar instructions in delay slots.
In particular, avoid membar instructions in the delay slot of a jmpl instruction. UltraSPARC-I, II, IIi, and IIe have a bug, documented in the UltraSPARC-IIi User's Manual, Appendix K, Erratum 51 The long and short of it is that if the IMU unit misses on a branch or jmpl, and there is a store buffer synchronizing membar in the delay slot, the chip can stop fetching instructions. If interrupts are enabled or some other trap is enabled, the chip will unwedge itself, but performance will suffer. We already had a workaround for this bug in a few spots, but it's better to have the entire tree sanitized for this rule. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc64/lib/U1memcpy.S')
-rw-r--r--arch/sparc64/lib/U1memcpy.S103
1 files changed, 53 insertions, 50 deletions
diff --git a/arch/sparc64/lib/U1memcpy.S b/arch/sparc64/lib/U1memcpy.S
index da9b520c7189..bafd2fc07acb 100644
--- a/arch/sparc64/lib/U1memcpy.S
+++ b/arch/sparc64/lib/U1memcpy.S
@@ -87,14 +87,17 @@
87#define LOOP_CHUNK3(src, dest, len, branch_dest) \ 87#define LOOP_CHUNK3(src, dest, len, branch_dest) \
88 MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) 88 MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)
89 89
90#define DO_SYNC membar #Sync;
90#define STORE_SYNC(dest, fsrc) \ 91#define STORE_SYNC(dest, fsrc) \
91 EX_ST(STORE_BLK(%fsrc, %dest)); \ 92 EX_ST(STORE_BLK(%fsrc, %dest)); \
92 add %dest, 0x40, %dest; 93 add %dest, 0x40, %dest; \
94 DO_SYNC
93 95
94#define STORE_JUMP(dest, fsrc, target) \ 96#define STORE_JUMP(dest, fsrc, target) \
95 EX_ST(STORE_BLK(%fsrc, %dest)); \ 97 EX_ST(STORE_BLK(%fsrc, %dest)); \
96 add %dest, 0x40, %dest; \ 98 add %dest, 0x40, %dest; \
97 ba,pt %xcc, target; 99 ba,pt %xcc, target; \
100 nop;
98 101
99#define FINISH_VISCHUNK(dest, f0, f1, left) \ 102#define FINISH_VISCHUNK(dest, f0, f1, left) \
100 subcc %left, 8, %left;\ 103 subcc %left, 8, %left;\
@@ -239,17 +242,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
239 ba,pt %xcc, 1b+4 242 ba,pt %xcc, 1b+4
240 faligndata %f0, %f2, %f48 243 faligndata %f0, %f2, %f48
2411: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) 2441: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
242 STORE_SYNC(o0, f48) membar #Sync 245 STORE_SYNC(o0, f48)
243 FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) 246 FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
244 STORE_JUMP(o0, f48, 40f) membar #Sync 247 STORE_JUMP(o0, f48, 40f)
2452: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) 2482: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
246 STORE_SYNC(o0, f48) membar #Sync 249 STORE_SYNC(o0, f48)
247 FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) 250 FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
248 STORE_JUMP(o0, f48, 48f) membar #Sync 251 STORE_JUMP(o0, f48, 48f)
2493: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) 2523: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
250 STORE_SYNC(o0, f48) membar #Sync 253 STORE_SYNC(o0, f48)
251 FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) 254 FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
252 STORE_JUMP(o0, f48, 56f) membar #Sync 255 STORE_JUMP(o0, f48, 56f)
253 256
2541: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 2571: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
255 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 258 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -260,17 +263,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
260 ba,pt %xcc, 1b+4 263 ba,pt %xcc, 1b+4
261 faligndata %f2, %f4, %f48 264 faligndata %f2, %f4, %f48
2621: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) 2651: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
263 STORE_SYNC(o0, f48) membar #Sync 266 STORE_SYNC(o0, f48)
264 FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) 267 FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
265 STORE_JUMP(o0, f48, 41f) membar #Sync 268 STORE_JUMP(o0, f48, 41f)
2662: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) 2692: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
267 STORE_SYNC(o0, f48) membar #Sync 270 STORE_SYNC(o0, f48)
268 FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 271 FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
269 STORE_JUMP(o0, f48, 49f) membar #Sync 272 STORE_JUMP(o0, f48, 49f)
2703: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 2733: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
271 STORE_SYNC(o0, f48) membar #Sync 274 STORE_SYNC(o0, f48)
272 FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) 275 FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
273 STORE_JUMP(o0, f48, 57f) membar #Sync 276 STORE_JUMP(o0, f48, 57f)
274 277
2751: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 2781: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
276 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 279 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -281,17 +284,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
281 ba,pt %xcc, 1b+4 284 ba,pt %xcc, 1b+4
282 faligndata %f4, %f6, %f48 285 faligndata %f4, %f6, %f48
2831: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) 2861: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
284 STORE_SYNC(o0, f48) membar #Sync 287 STORE_SYNC(o0, f48)
285 FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) 288 FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
286 STORE_JUMP(o0, f48, 42f) membar #Sync 289 STORE_JUMP(o0, f48, 42f)
2872: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) 2902: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
288 STORE_SYNC(o0, f48) membar #Sync 291 STORE_SYNC(o0, f48)
289 FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 292 FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
290 STORE_JUMP(o0, f48, 50f) membar #Sync 293 STORE_JUMP(o0, f48, 50f)
2913: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 2943: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
292 STORE_SYNC(o0, f48) membar #Sync 295 STORE_SYNC(o0, f48)
293 FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) 296 FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
294 STORE_JUMP(o0, f48, 58f) membar #Sync 297 STORE_JUMP(o0, f48, 58f)
295 298
2961: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 2991: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
297 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 300 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -302,17 +305,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
302 ba,pt %xcc, 1b+4 305 ba,pt %xcc, 1b+4
303 faligndata %f6, %f8, %f48 306 faligndata %f6, %f8, %f48
3041: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) 3071: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
305 STORE_SYNC(o0, f48) membar #Sync 308 STORE_SYNC(o0, f48)
306 FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) 309 FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
307 STORE_JUMP(o0, f48, 43f) membar #Sync 310 STORE_JUMP(o0, f48, 43f)
3082: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) 3112: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
309 STORE_SYNC(o0, f48) membar #Sync 312 STORE_SYNC(o0, f48)
310 FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 313 FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
311 STORE_JUMP(o0, f48, 51f) membar #Sync 314 STORE_JUMP(o0, f48, 51f)
3123: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 3153: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
313 STORE_SYNC(o0, f48) membar #Sync 316 STORE_SYNC(o0, f48)
314 FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) 317 FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
315 STORE_JUMP(o0, f48, 59f) membar #Sync 318 STORE_JUMP(o0, f48, 59f)
316 319
3171: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 3201: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
318 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 321 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -323,17 +326,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
323 ba,pt %xcc, 1b+4 326 ba,pt %xcc, 1b+4
324 faligndata %f8, %f10, %f48 327 faligndata %f8, %f10, %f48
3251: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) 3281: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
326 STORE_SYNC(o0, f48) membar #Sync 329 STORE_SYNC(o0, f48)
327 FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) 330 FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
328 STORE_JUMP(o0, f48, 44f) membar #Sync 331 STORE_JUMP(o0, f48, 44f)
3292: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) 3322: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
330 STORE_SYNC(o0, f48) membar #Sync 333 STORE_SYNC(o0, f48)
331 FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 334 FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
332 STORE_JUMP(o0, f48, 52f) membar #Sync 335 STORE_JUMP(o0, f48, 52f)
3333: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 3363: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
334 STORE_SYNC(o0, f48) membar #Sync 337 STORE_SYNC(o0, f48)
335 FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) 338 FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
336 STORE_JUMP(o0, f48, 60f) membar #Sync 339 STORE_JUMP(o0, f48, 60f)
337 340
3381: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 3411: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
339 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 342 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -344,17 +347,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
344 ba,pt %xcc, 1b+4 347 ba,pt %xcc, 1b+4
345 faligndata %f10, %f12, %f48 348 faligndata %f10, %f12, %f48
3461: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) 3491: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
347 STORE_SYNC(o0, f48) membar #Sync 350 STORE_SYNC(o0, f48)
348 FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) 351 FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
349 STORE_JUMP(o0, f48, 45f) membar #Sync 352 STORE_JUMP(o0, f48, 45f)
3502: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) 3532: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
351 STORE_SYNC(o0, f48) membar #Sync 354 STORE_SYNC(o0, f48)
352 FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 355 FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
353 STORE_JUMP(o0, f48, 53f) membar #Sync 356 STORE_JUMP(o0, f48, 53f)
3543: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 3573: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
355 STORE_SYNC(o0, f48) membar #Sync 358 STORE_SYNC(o0, f48)
356 FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) 359 FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
357 STORE_JUMP(o0, f48, 61f) membar #Sync 360 STORE_JUMP(o0, f48, 61f)
358 361
3591: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 3621: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
360 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 363 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -365,17 +368,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
365 ba,pt %xcc, 1b+4 368 ba,pt %xcc, 1b+4
366 faligndata %f12, %f14, %f48 369 faligndata %f12, %f14, %f48
3671: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) 3701: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
368 STORE_SYNC(o0, f48) membar #Sync 371 STORE_SYNC(o0, f48)
369 FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) 372 FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
370 STORE_JUMP(o0, f48, 46f) membar #Sync 373 STORE_JUMP(o0, f48, 46f)
3712: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) 3742: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
372 STORE_SYNC(o0, f48) membar #Sync 375 STORE_SYNC(o0, f48)
373 FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 376 FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
374 STORE_JUMP(o0, f48, 54f) membar #Sync 377 STORE_JUMP(o0, f48, 54f)
3753: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 3783: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
376 STORE_SYNC(o0, f48) membar #Sync 379 STORE_SYNC(o0, f48)
377 FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) 380 FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
378 STORE_JUMP(o0, f48, 62f) membar #Sync 381 STORE_JUMP(o0, f48, 62f)
379 382
3801: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 3831: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
381 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 384 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -386,17 +389,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
386 ba,pt %xcc, 1b+4 389 ba,pt %xcc, 1b+4
387 faligndata %f14, %f16, %f48 390 faligndata %f14, %f16, %f48
3881: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) 3911: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
389 STORE_SYNC(o0, f48) membar #Sync 392 STORE_SYNC(o0, f48)
390 FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) 393 FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
391 STORE_JUMP(o0, f48, 47f) membar #Sync 394 STORE_JUMP(o0, f48, 47f)
3922: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) 3952: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
393 STORE_SYNC(o0, f48) membar #Sync 396 STORE_SYNC(o0, f48)
394 FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 397 FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
395 STORE_JUMP(o0, f48, 55f) membar #Sync 398 STORE_JUMP(o0, f48, 55f)
3963: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 3993: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
397 STORE_SYNC(o0, f48) membar #Sync 400 STORE_SYNC(o0, f48)
398 FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) 401 FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
399 STORE_JUMP(o0, f48, 63f) membar #Sync 402 STORE_JUMP(o0, f48, 63f)
400 403
40140: FINISH_VISCHUNK(o0, f0, f2, g3) 40440: FINISH_VISCHUNK(o0, f0, f2, g3)
40241: FINISH_VISCHUNK(o0, f2, f4, g3) 40541: FINISH_VISCHUNK(o0, f2, f4, g3)