diff options
author | David S. Miller <davem@davemloft.net> | 2012-09-27 04:06:43 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-09-27 04:06:43 -0400 |
commit | 1b62ca7bf5775bed048032b7e779561e1fe66aa0 (patch) | |
tree | dac210049b36fe6a8b57a8a98a4bc2687fcdcc7c /arch/sparc/lib | |
parent | a9e8d1a6b87167116e5779378f1d25ffed2e833b (diff) |
sparc64: Fix return value of Niagara-2 memcpy.
It gets clobbered by the kernel's VISEntryHalf, so we have to save it
in a different register than the set clobbered by that macro.
The instance in glibc is OK and doesn't have this problem.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc/lib')
-rw-r--r-- | arch/sparc/lib/NG2memcpy.S | 46 |
1 files changed, 23 insertions, 23 deletions
diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S index 03eadf66b0d3..2c20ad63ddbf 100644 --- a/arch/sparc/lib/NG2memcpy.S +++ b/arch/sparc/lib/NG2memcpy.S | |||
@@ -14,7 +14,7 @@ | |||
14 | #define FPRS_FEF 0x04 | 14 | #define FPRS_FEF 0x04 |
15 | #ifdef MEMCPY_DEBUG | 15 | #ifdef MEMCPY_DEBUG |
16 | #define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \ | 16 | #define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \ |
17 | clr %g1; clr %g2; clr %g3; subcc %g0, %g0, %g0; | 17 | clr %g1; clr %g2; clr %g3; clr %g5; subcc %g0, %g0, %g0; |
18 | #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs | 18 | #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs |
19 | #else | 19 | #else |
20 | #define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs | 20 | #define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs |
@@ -182,13 +182,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
182 | cmp %g2, 0 | 182 | cmp %g2, 0 |
183 | tne %xcc, 5 | 183 | tne %xcc, 5 |
184 | PREAMBLE | 184 | PREAMBLE |
185 | mov %o0, GLOBAL_SPARE | 185 | mov %o0, %o3 |
186 | cmp %o2, 0 | 186 | cmp %o2, 0 |
187 | be,pn %XCC, 85f | 187 | be,pn %XCC, 85f |
188 | or %o0, %o1, %o3 | 188 | or %o0, %o1, GLOBAL_SPARE |
189 | cmp %o2, 16 | 189 | cmp %o2, 16 |
190 | blu,a,pn %XCC, 80f | 190 | blu,a,pn %XCC, 80f |
191 | or %o3, %o2, %o3 | 191 | or GLOBAL_SPARE, %o2, GLOBAL_SPARE |
192 | 192 | ||
193 | /* 2 blocks (128 bytes) is the minimum we can do the block | 193 | /* 2 blocks (128 bytes) is the minimum we can do the block |
194 | * copy with. We need to ensure that we'll iterate at least | 194 | * copy with. We need to ensure that we'll iterate at least |
@@ -202,7 +202,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
202 | */ | 202 | */ |
203 | cmp %o2, (4 * 64) | 203 | cmp %o2, (4 * 64) |
204 | blu,pt %XCC, 75f | 204 | blu,pt %XCC, 75f |
205 | andcc %o3, 0x7, %g0 | 205 | andcc GLOBAL_SPARE, 0x7, %g0 |
206 | 206 | ||
207 | /* %o0: dst | 207 | /* %o0: dst |
208 | * %o1: src | 208 | * %o1: src |
@@ -404,13 +404,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
404 | * over. If anything is left, we copy it one byte at a time. | 404 | * over. If anything is left, we copy it one byte at a time. |
405 | */ | 405 | */ |
406 | brz,pt %o2, 85f | 406 | brz,pt %o2, 85f |
407 | sub %o0, %o1, %o3 | 407 | sub %o0, %o1, GLOBAL_SPARE |
408 | ba,a,pt %XCC, 90f | 408 | ba,a,pt %XCC, 90f |
409 | 409 | ||
410 | .align 64 | 410 | .align 64 |
411 | 75: /* 16 < len <= 64 */ | 411 | 75: /* 16 < len <= 64 */ |
412 | bne,pn %XCC, 75f | 412 | bne,pn %XCC, 75f |
413 | sub %o0, %o1, %o3 | 413 | sub %o0, %o1, GLOBAL_SPARE |
414 | 414 | ||
415 | 72: | 415 | 72: |
416 | andn %o2, 0xf, %o4 | 416 | andn %o2, 0xf, %o4 |
@@ -420,9 +420,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
420 | add %o1, 0x08, %o1 | 420 | add %o1, 0x08, %o1 |
421 | EX_LD(LOAD(ldx, %o1, %g1)) | 421 | EX_LD(LOAD(ldx, %o1, %g1)) |
422 | sub %o1, 0x08, %o1 | 422 | sub %o1, 0x08, %o1 |
423 | EX_ST(STORE(stx, %o5, %o1 + %o3)) | 423 | EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) |
424 | add %o1, 0x8, %o1 | 424 | add %o1, 0x8, %o1 |
425 | EX_ST(STORE(stx, %g1, %o1 + %o3)) | 425 | EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE)) |
426 | bgu,pt %XCC, 1b | 426 | bgu,pt %XCC, 1b |
427 | add %o1, 0x8, %o1 | 427 | add %o1, 0x8, %o1 |
428 | 73: andcc %o2, 0x8, %g0 | 428 | 73: andcc %o2, 0x8, %g0 |
@@ -430,14 +430,14 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
430 | nop | 430 | nop |
431 | sub %o2, 0x8, %o2 | 431 | sub %o2, 0x8, %o2 |
432 | EX_LD(LOAD(ldx, %o1, %o5)) | 432 | EX_LD(LOAD(ldx, %o1, %o5)) |
433 | EX_ST(STORE(stx, %o5, %o1 + %o3)) | 433 | EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) |
434 | add %o1, 0x8, %o1 | 434 | add %o1, 0x8, %o1 |
435 | 1: andcc %o2, 0x4, %g0 | 435 | 1: andcc %o2, 0x4, %g0 |
436 | be,pt %XCC, 1f | 436 | be,pt %XCC, 1f |
437 | nop | 437 | nop |
438 | sub %o2, 0x4, %o2 | 438 | sub %o2, 0x4, %o2 |
439 | EX_LD(LOAD(lduw, %o1, %o5)) | 439 | EX_LD(LOAD(lduw, %o1, %o5)) |
440 | EX_ST(STORE(stw, %o5, %o1 + %o3)) | 440 | EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE)) |
441 | add %o1, 0x4, %o1 | 441 | add %o1, 0x4, %o1 |
442 | 1: cmp %o2, 0 | 442 | 1: cmp %o2, 0 |
443 | be,pt %XCC, 85f | 443 | be,pt %XCC, 85f |
@@ -454,11 +454,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
454 | 454 | ||
455 | 1: subcc %g1, 1, %g1 | 455 | 1: subcc %g1, 1, %g1 |
456 | EX_LD(LOAD(ldub, %o1, %o5)) | 456 | EX_LD(LOAD(ldub, %o1, %o5)) |
457 | EX_ST(STORE(stb, %o5, %o1 + %o3)) | 457 | EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE)) |
458 | bgu,pt %icc, 1b | 458 | bgu,pt %icc, 1b |
459 | add %o1, 1, %o1 | 459 | add %o1, 1, %o1 |
460 | 460 | ||
461 | 2: add %o1, %o3, %o0 | 461 | 2: add %o1, GLOBAL_SPARE, %o0 |
462 | andcc %o1, 0x7, %g1 | 462 | andcc %o1, 0x7, %g1 |
463 | bne,pt %icc, 8f | 463 | bne,pt %icc, 8f |
464 | sll %g1, 3, %g1 | 464 | sll %g1, 3, %g1 |
@@ -468,16 +468,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
468 | nop | 468 | nop |
469 | ba,a,pt %xcc, 73b | 469 | ba,a,pt %xcc, 73b |
470 | 470 | ||
471 | 8: mov 64, %o3 | 471 | 8: mov 64, GLOBAL_SPARE |
472 | andn %o1, 0x7, %o1 | 472 | andn %o1, 0x7, %o1 |
473 | EX_LD(LOAD(ldx, %o1, %g2)) | 473 | EX_LD(LOAD(ldx, %o1, %g2)) |
474 | sub %o3, %g1, %o3 | 474 | sub GLOBAL_SPARE, %g1, GLOBAL_SPARE |
475 | andn %o2, 0x7, %o4 | 475 | andn %o2, 0x7, %o4 |
476 | sllx %g2, %g1, %g2 | 476 | sllx %g2, %g1, %g2 |
477 | 1: add %o1, 0x8, %o1 | 477 | 1: add %o1, 0x8, %o1 |
478 | EX_LD(LOAD(ldx, %o1, %g3)) | 478 | EX_LD(LOAD(ldx, %o1, %g3)) |
479 | subcc %o4, 0x8, %o4 | 479 | subcc %o4, 0x8, %o4 |
480 | srlx %g3, %o3, %o5 | 480 | srlx %g3, GLOBAL_SPARE, %o5 |
481 | or %o5, %g2, %o5 | 481 | or %o5, %g2, %o5 |
482 | EX_ST(STORE(stx, %o5, %o0)) | 482 | EX_ST(STORE(stx, %o5, %o0)) |
483 | add %o0, 0x8, %o0 | 483 | add %o0, 0x8, %o0 |
@@ -489,32 +489,32 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
489 | be,pn %icc, 85f | 489 | be,pn %icc, 85f |
490 | add %o1, %g1, %o1 | 490 | add %o1, %g1, %o1 |
491 | ba,pt %xcc, 90f | 491 | ba,pt %xcc, 90f |
492 | sub %o0, %o1, %o3 | 492 | sub %o0, %o1, GLOBAL_SPARE |
493 | 493 | ||
494 | .align 64 | 494 | .align 64 |
495 | 80: /* 0 < len <= 16 */ | 495 | 80: /* 0 < len <= 16 */ |
496 | andcc %o3, 0x3, %g0 | 496 | andcc GLOBAL_SPARE, 0x3, %g0 |
497 | bne,pn %XCC, 90f | 497 | bne,pn %XCC, 90f |
498 | sub %o0, %o1, %o3 | 498 | sub %o0, %o1, GLOBAL_SPARE |
499 | 499 | ||
500 | 1: | 500 | 1: |
501 | subcc %o2, 4, %o2 | 501 | subcc %o2, 4, %o2 |
502 | EX_LD(LOAD(lduw, %o1, %g1)) | 502 | EX_LD(LOAD(lduw, %o1, %g1)) |
503 | EX_ST(STORE(stw, %g1, %o1 + %o3)) | 503 | EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE)) |
504 | bgu,pt %XCC, 1b | 504 | bgu,pt %XCC, 1b |
505 | add %o1, 4, %o1 | 505 | add %o1, 4, %o1 |
506 | 506 | ||
507 | 85: retl | 507 | 85: retl |
508 | mov EX_RETVAL(GLOBAL_SPARE), %o0 | 508 | mov EX_RETVAL(%o3), %o0 |
509 | 509 | ||
510 | .align 32 | 510 | .align 32 |
511 | 90: | 511 | 90: |
512 | subcc %o2, 1, %o2 | 512 | subcc %o2, 1, %o2 |
513 | EX_LD(LOAD(ldub, %o1, %g1)) | 513 | EX_LD(LOAD(ldub, %o1, %g1)) |
514 | EX_ST(STORE(stb, %g1, %o1 + %o3)) | 514 | EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE)) |
515 | bgu,pt %XCC, 90b | 515 | bgu,pt %XCC, 90b |
516 | add %o1, 1, %o1 | 516 | add %o1, 1, %o1 |
517 | retl | 517 | retl |
518 | mov EX_RETVAL(GLOBAL_SPARE), %o0 | 518 | mov EX_RETVAL(%o3), %o0 |
519 | 519 | ||
520 | .size FUNC_NAME, .-FUNC_NAME | 520 | .size FUNC_NAME, .-FUNC_NAME |