aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sparc64/lib/U1memcpy.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/sparc64/lib/U1memcpy.S')
-rw-r--r--arch/sparc64/lib/U1memcpy.S103
1 files changed, 53 insertions, 50 deletions
diff --git a/arch/sparc64/lib/U1memcpy.S b/arch/sparc64/lib/U1memcpy.S
index da9b520c7189..bafd2fc07acb 100644
--- a/arch/sparc64/lib/U1memcpy.S
+++ b/arch/sparc64/lib/U1memcpy.S
@@ -87,14 +87,17 @@
87#define LOOP_CHUNK3(src, dest, len, branch_dest) \ 87#define LOOP_CHUNK3(src, dest, len, branch_dest) \
88 MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) 88 MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)
89 89
90#define DO_SYNC membar #Sync;
90#define STORE_SYNC(dest, fsrc) \ 91#define STORE_SYNC(dest, fsrc) \
91 EX_ST(STORE_BLK(%fsrc, %dest)); \ 92 EX_ST(STORE_BLK(%fsrc, %dest)); \
92 add %dest, 0x40, %dest; 93 add %dest, 0x40, %dest; \
94 DO_SYNC
93 95
94#define STORE_JUMP(dest, fsrc, target) \ 96#define STORE_JUMP(dest, fsrc, target) \
95 EX_ST(STORE_BLK(%fsrc, %dest)); \ 97 EX_ST(STORE_BLK(%fsrc, %dest)); \
96 add %dest, 0x40, %dest; \ 98 add %dest, 0x40, %dest; \
97 ba,pt %xcc, target; 99 ba,pt %xcc, target; \
100 nop;
98 101
99#define FINISH_VISCHUNK(dest, f0, f1, left) \ 102#define FINISH_VISCHUNK(dest, f0, f1, left) \
100 subcc %left, 8, %left;\ 103 subcc %left, 8, %left;\
@@ -239,17 +242,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
239 ba,pt %xcc, 1b+4 242 ba,pt %xcc, 1b+4
240 faligndata %f0, %f2, %f48 243 faligndata %f0, %f2, %f48
2411: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) 2441: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
242 STORE_SYNC(o0, f48) membar #Sync 245 STORE_SYNC(o0, f48)
243 FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) 246 FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
244 STORE_JUMP(o0, f48, 40f) membar #Sync 247 STORE_JUMP(o0, f48, 40f)
2452: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) 2482: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
246 STORE_SYNC(o0, f48) membar #Sync 249 STORE_SYNC(o0, f48)
247 FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) 250 FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
248 STORE_JUMP(o0, f48, 48f) membar #Sync 251 STORE_JUMP(o0, f48, 48f)
2493: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) 2523: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
250 STORE_SYNC(o0, f48) membar #Sync 253 STORE_SYNC(o0, f48)
251 FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) 254 FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
252 STORE_JUMP(o0, f48, 56f) membar #Sync 255 STORE_JUMP(o0, f48, 56f)
253 256
2541: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 2571: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
255 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 258 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -260,17 +263,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
260 ba,pt %xcc, 1b+4 263 ba,pt %xcc, 1b+4
261 faligndata %f2, %f4, %f48 264 faligndata %f2, %f4, %f48
2621: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) 2651: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
263 STORE_SYNC(o0, f48) membar #Sync 266 STORE_SYNC(o0, f48)
264 FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) 267 FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
265 STORE_JUMP(o0, f48, 41f) membar #Sync 268 STORE_JUMP(o0, f48, 41f)
2662: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) 2692: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
267 STORE_SYNC(o0, f48) membar #Sync 270 STORE_SYNC(o0, f48)
268 FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 271 FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
269 STORE_JUMP(o0, f48, 49f) membar #Sync 272 STORE_JUMP(o0, f48, 49f)
2703: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 2733: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
271 STORE_SYNC(o0, f48) membar #Sync 274 STORE_SYNC(o0, f48)
272 FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) 275 FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
273 STORE_JUMP(o0, f48, 57f) membar #Sync 276 STORE_JUMP(o0, f48, 57f)
274 277
2751: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 2781: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
276 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 279 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -281,17 +284,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
281 ba,pt %xcc, 1b+4 284 ba,pt %xcc, 1b+4
282 faligndata %f4, %f6, %f48 285 faligndata %f4, %f6, %f48
2831: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) 2861: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
284 STORE_SYNC(o0, f48) membar #Sync 287 STORE_SYNC(o0, f48)
285 FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) 288 FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
286 STORE_JUMP(o0, f48, 42f) membar #Sync 289 STORE_JUMP(o0, f48, 42f)
2872: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) 2902: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
288 STORE_SYNC(o0, f48) membar #Sync 291 STORE_SYNC(o0, f48)
289 FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 292 FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
290 STORE_JUMP(o0, f48, 50f) membar #Sync 293 STORE_JUMP(o0, f48, 50f)
2913: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 2943: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
292 STORE_SYNC(o0, f48) membar #Sync 295 STORE_SYNC(o0, f48)
293 FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) 296 FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
294 STORE_JUMP(o0, f48, 58f) membar #Sync 297 STORE_JUMP(o0, f48, 58f)
295 298
2961: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 2991: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
297 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 300 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -302,17 +305,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
302 ba,pt %xcc, 1b+4 305 ba,pt %xcc, 1b+4
303 faligndata %f6, %f8, %f48 306 faligndata %f6, %f8, %f48
3041: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) 3071: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
305 STORE_SYNC(o0, f48) membar #Sync 308 STORE_SYNC(o0, f48)
306 FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) 309 FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
307 STORE_JUMP(o0, f48, 43f) membar #Sync 310 STORE_JUMP(o0, f48, 43f)
3082: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) 3112: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
309 STORE_SYNC(o0, f48) membar #Sync 312 STORE_SYNC(o0, f48)
310 FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 313 FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
311 STORE_JUMP(o0, f48, 51f) membar #Sync 314 STORE_JUMP(o0, f48, 51f)
3123: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 3153: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
313 STORE_SYNC(o0, f48) membar #Sync 316 STORE_SYNC(o0, f48)
314 FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) 317 FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
315 STORE_JUMP(o0, f48, 59f) membar #Sync 318 STORE_JUMP(o0, f48, 59f)
316 319
3171: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 3201: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
318 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 321 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -323,17 +326,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
323 ba,pt %xcc, 1b+4 326 ba,pt %xcc, 1b+4
324 faligndata %f8, %f10, %f48 327 faligndata %f8, %f10, %f48
3251: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) 3281: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
326 STORE_SYNC(o0, f48) membar #Sync 329 STORE_SYNC(o0, f48)
327 FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) 330 FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
328 STORE_JUMP(o0, f48, 44f) membar #Sync 331 STORE_JUMP(o0, f48, 44f)
3292: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) 3322: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
330 STORE_SYNC(o0, f48) membar #Sync 333 STORE_SYNC(o0, f48)
331 FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 334 FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
332 STORE_JUMP(o0, f48, 52f) membar #Sync 335 STORE_JUMP(o0, f48, 52f)
3333: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 3363: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
334 STORE_SYNC(o0, f48) membar #Sync 337 STORE_SYNC(o0, f48)
335 FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) 338 FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
336 STORE_JUMP(o0, f48, 60f) membar #Sync 339 STORE_JUMP(o0, f48, 60f)
337 340
3381: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 3411: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
339 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 342 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -344,17 +347,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
344 ba,pt %xcc, 1b+4 347 ba,pt %xcc, 1b+4
345 faligndata %f10, %f12, %f48 348 faligndata %f10, %f12, %f48
3461: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) 3491: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
347 STORE_SYNC(o0, f48) membar #Sync 350 STORE_SYNC(o0, f48)
348 FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) 351 FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
349 STORE_JUMP(o0, f48, 45f) membar #Sync 352 STORE_JUMP(o0, f48, 45f)
3502: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) 3532: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
351 STORE_SYNC(o0, f48) membar #Sync 354 STORE_SYNC(o0, f48)
352 FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 355 FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
353 STORE_JUMP(o0, f48, 53f) membar #Sync 356 STORE_JUMP(o0, f48, 53f)
3543: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 3573: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
355 STORE_SYNC(o0, f48) membar #Sync 358 STORE_SYNC(o0, f48)
356 FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) 359 FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
357 STORE_JUMP(o0, f48, 61f) membar #Sync 360 STORE_JUMP(o0, f48, 61f)
358 361
3591: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 3621: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
360 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 363 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -365,17 +368,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
365 ba,pt %xcc, 1b+4 368 ba,pt %xcc, 1b+4
366 faligndata %f12, %f14, %f48 369 faligndata %f12, %f14, %f48
3671: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) 3701: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
368 STORE_SYNC(o0, f48) membar #Sync 371 STORE_SYNC(o0, f48)
369 FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) 372 FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
370 STORE_JUMP(o0, f48, 46f) membar #Sync 373 STORE_JUMP(o0, f48, 46f)
3712: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) 3742: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
372 STORE_SYNC(o0, f48) membar #Sync 375 STORE_SYNC(o0, f48)
373 FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 376 FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
374 STORE_JUMP(o0, f48, 54f) membar #Sync 377 STORE_JUMP(o0, f48, 54f)
3753: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 3783: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
376 STORE_SYNC(o0, f48) membar #Sync 379 STORE_SYNC(o0, f48)
377 FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) 380 FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
378 STORE_JUMP(o0, f48, 62f) membar #Sync 381 STORE_JUMP(o0, f48, 62f)
379 382
3801: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 3831: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
381 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 384 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -386,17 +389,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
386 ba,pt %xcc, 1b+4 389 ba,pt %xcc, 1b+4
387 faligndata %f14, %f16, %f48 390 faligndata %f14, %f16, %f48
3881: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) 3911: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
389 STORE_SYNC(o0, f48) membar #Sync 392 STORE_SYNC(o0, f48)
390 FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) 393 FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
391 STORE_JUMP(o0, f48, 47f) membar #Sync 394 STORE_JUMP(o0, f48, 47f)
3922: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) 3952: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
393 STORE_SYNC(o0, f48) membar #Sync 396 STORE_SYNC(o0, f48)
394 FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 397 FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
395 STORE_JUMP(o0, f48, 55f) membar #Sync 398 STORE_JUMP(o0, f48, 55f)
3963: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 3993: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
397 STORE_SYNC(o0, f48) membar #Sync 400 STORE_SYNC(o0, f48)
398 FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) 401 FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
399 STORE_JUMP(o0, f48, 63f) membar #Sync 402 STORE_JUMP(o0, f48, 63f)
400 403
40140: FINISH_VISCHUNK(o0, f0, f2, g3) 40440: FINISH_VISCHUNK(o0, f0, f2, g3)
40241: FINISH_VISCHUNK(o0, f2, f4, g3) 40541: FINISH_VISCHUNK(o0, f2, f4, g3)