diff options
author | Babu Moger <babu.moger@oracle.com> | 2017-03-17 16:52:21 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-03-28 00:51:40 -0400 |
commit | 0ae2d26ffe70c32d4a7fe77593f0a55ce416c09e (patch) | |
tree | 56fd33ac22298ef2ae162366727ada11c719323e | |
parent | adfae8a5d833fa2b46577a8081f350e408851f5b (diff) |
arch/sparc: Avoid DCTI Couples
Avoid un-intended DCTI Couples. Use of DCTI couples is deprecated.
Also address the "Programming Note" for optimal performance.
Here is the complete text from Oracle SPARC Architecture Specs.
6.3.4.7 DCTI Couples
"A delayed control transfer instruction (DCTI) in the delay slot of
another DCTI is referred to as a “DCTI couple”. The use of DCTI couples
is deprecated in the Oracle SPARC Architecture; no new software should
place a DCTI in the delay slot of another DCTI, because on future Oracle
SPARC Architecture implementations DCTI couples may execute either
slowly or differently than the programmer assumes it will.
SPARC V8 and SPARC V9 Compatibility Note
The SPARC V8 architecture left behavior undefined for a DCTI couple. The
SPARC V9 architecture defined behavior in that case, but as of
UltraSPARC Architecture 2005, use of DCTI couples was deprecated.
Software should not expect high performance from DCTI couples, and
performance of DCTI couples should be expected to decline further in
future processors.
Programming Note
As noted in TABLE 6-5 on page 115, an annulled branch-always
(branch-always with a = 1) instruction is not architecturally a DCTI.
However, since not all implementations make that distinction, for
optimal performance, a DCTI should not be placed in the instruction word
immediately following an annulled branch-always instruction (BA,A or
BPA,A)."
Signed-off-by: Babu Moger <babu.moger@oracle.com>
Reviewed-by: Rob Gardner <rob.gardner@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | arch/sparc/kernel/head_64.S | 4 | ||||
-rw-r--r-- | arch/sparc/kernel/misctrap.S | 1 | ||||
-rw-r--r-- | arch/sparc/kernel/rtrap_64.S | 1 | ||||
-rw-r--r-- | arch/sparc/kernel/spiterrs.S | 1 | ||||
-rw-r--r-- | arch/sparc/kernel/sun4v_tlb_miss.S | 1 | ||||
-rw-r--r-- | arch/sparc/kernel/urtt_fill.S | 1 | ||||
-rw-r--r-- | arch/sparc/kernel/winfixup.S | 2 | ||||
-rw-r--r-- | arch/sparc/lib/NG2memcpy.S | 4 | ||||
-rw-r--r-- | arch/sparc/lib/NG4memcpy.S | 1 | ||||
-rw-r--r-- | arch/sparc/lib/NG4memset.S | 1 | ||||
-rw-r--r-- | arch/sparc/lib/NGmemcpy.S | 1 |
11 files changed, 18 insertions, 0 deletions
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 6aa3da152c20..44101196d02b 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S | |||
@@ -96,6 +96,7 @@ sparc64_boot: | |||
96 | andn %g1, PSTATE_AM, %g1 | 96 | andn %g1, PSTATE_AM, %g1 |
97 | wrpr %g1, 0x0, %pstate | 97 | wrpr %g1, 0x0, %pstate |
98 | ba,a,pt %xcc, 1f | 98 | ba,a,pt %xcc, 1f |
99 | nop | ||
99 | 100 | ||
100 | .globl prom_finddev_name, prom_chosen_path, prom_root_node | 101 | .globl prom_finddev_name, prom_chosen_path, prom_root_node |
101 | .globl prom_getprop_name, prom_mmu_name, prom_peer_name | 102 | .globl prom_getprop_name, prom_mmu_name, prom_peer_name |
@@ -613,6 +614,7 @@ niagara_tlb_fixup: | |||
613 | nop | 614 | nop |
614 | 615 | ||
615 | ba,a,pt %xcc, 80f | 616 | ba,a,pt %xcc, 80f |
617 | nop | ||
616 | niagara4_patch: | 618 | niagara4_patch: |
617 | call niagara4_patch_copyops | 619 | call niagara4_patch_copyops |
618 | nop | 620 | nop |
@@ -622,6 +624,7 @@ niagara4_patch: | |||
622 | nop | 624 | nop |
623 | 625 | ||
624 | ba,a,pt %xcc, 80f | 626 | ba,a,pt %xcc, 80f |
627 | nop | ||
625 | 628 | ||
626 | niagara2_patch: | 629 | niagara2_patch: |
627 | call niagara2_patch_copyops | 630 | call niagara2_patch_copyops |
@@ -632,6 +635,7 @@ niagara2_patch: | |||
632 | nop | 635 | nop |
633 | 636 | ||
634 | ba,a,pt %xcc, 80f | 637 | ba,a,pt %xcc, 80f |
638 | nop | ||
635 | 639 | ||
636 | niagara_patch: | 640 | niagara_patch: |
637 | call niagara_patch_copyops | 641 | call niagara_patch_copyops |
diff --git a/arch/sparc/kernel/misctrap.S b/arch/sparc/kernel/misctrap.S index 34b4933900bf..9276d2f0dd86 100644 --- a/arch/sparc/kernel/misctrap.S +++ b/arch/sparc/kernel/misctrap.S | |||
@@ -82,6 +82,7 @@ do_stdfmna: | |||
82 | call handle_stdfmna | 82 | call handle_stdfmna |
83 | add %sp, PTREGS_OFF, %o0 | 83 | add %sp, PTREGS_OFF, %o0 |
84 | ba,a,pt %xcc, rtrap | 84 | ba,a,pt %xcc, rtrap |
85 | nop | ||
85 | .size do_stdfmna,.-do_stdfmna | 86 | .size do_stdfmna,.-do_stdfmna |
86 | 87 | ||
87 | .type breakpoint_trap,#function | 88 | .type breakpoint_trap,#function |
diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S index 216948ca4382..709a82ebd294 100644 --- a/arch/sparc/kernel/rtrap_64.S +++ b/arch/sparc/kernel/rtrap_64.S | |||
@@ -237,6 +237,7 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 | |||
237 | bne,pt %xcc, user_rtt_fill_32bit | 237 | bne,pt %xcc, user_rtt_fill_32bit |
238 | wrpr %g1, %cwp | 238 | wrpr %g1, %cwp |
239 | ba,a,pt %xcc, user_rtt_fill_64bit | 239 | ba,a,pt %xcc, user_rtt_fill_64bit |
240 | nop | ||
240 | 241 | ||
241 | user_rtt_fill_fixup_dax: | 242 | user_rtt_fill_fixup_dax: |
242 | ba,pt %xcc, user_rtt_fill_fixup_common | 243 | ba,pt %xcc, user_rtt_fill_fixup_common |
diff --git a/arch/sparc/kernel/spiterrs.S b/arch/sparc/kernel/spiterrs.S index 4a73009f66a5..d7e540842809 100644 --- a/arch/sparc/kernel/spiterrs.S +++ b/arch/sparc/kernel/spiterrs.S | |||
@@ -86,6 +86,7 @@ __spitfire_cee_trap_continue: | |||
86 | rd %pc, %g7 | 86 | rd %pc, %g7 |
87 | 87 | ||
88 | ba,a,pt %xcc, 2f | 88 | ba,a,pt %xcc, 2f |
89 | nop | ||
89 | 90 | ||
90 | 1: ba,pt %xcc, etrap_irq | 91 | 1: ba,pt %xcc, etrap_irq |
91 | rd %pc, %g7 | 92 | rd %pc, %g7 |
diff --git a/arch/sparc/kernel/sun4v_tlb_miss.S b/arch/sparc/kernel/sun4v_tlb_miss.S index 6179e19bc9b9..c19f352f46c7 100644 --- a/arch/sparc/kernel/sun4v_tlb_miss.S +++ b/arch/sparc/kernel/sun4v_tlb_miss.S | |||
@@ -352,6 +352,7 @@ sun4v_mna: | |||
352 | call sun4v_do_mna | 352 | call sun4v_do_mna |
353 | add %sp, PTREGS_OFF, %o0 | 353 | add %sp, PTREGS_OFF, %o0 |
354 | ba,a,pt %xcc, rtrap | 354 | ba,a,pt %xcc, rtrap |
355 | nop | ||
355 | 356 | ||
356 | /* Privileged Action. */ | 357 | /* Privileged Action. */ |
357 | sun4v_privact: | 358 | sun4v_privact: |
diff --git a/arch/sparc/kernel/urtt_fill.S b/arch/sparc/kernel/urtt_fill.S index 5604a2b051d4..364af3250646 100644 --- a/arch/sparc/kernel/urtt_fill.S +++ b/arch/sparc/kernel/urtt_fill.S | |||
@@ -92,6 +92,7 @@ user_rtt_fill_fixup_common: | |||
92 | call sun4v_data_access_exception | 92 | call sun4v_data_access_exception |
93 | nop | 93 | nop |
94 | ba,a,pt %xcc, rtrap | 94 | ba,a,pt %xcc, rtrap |
95 | nop | ||
95 | 96 | ||
96 | 1: call spitfire_data_access_exception | 97 | 1: call spitfire_data_access_exception |
97 | nop | 98 | nop |
diff --git a/arch/sparc/kernel/winfixup.S b/arch/sparc/kernel/winfixup.S index 855019a8590e..1ee173cc3c39 100644 --- a/arch/sparc/kernel/winfixup.S +++ b/arch/sparc/kernel/winfixup.S | |||
@@ -152,6 +152,8 @@ fill_fixup_dax: | |||
152 | call sun4v_data_access_exception | 152 | call sun4v_data_access_exception |
153 | nop | 153 | nop |
154 | ba,a,pt %xcc, rtrap | 154 | ba,a,pt %xcc, rtrap |
155 | nop | ||
155 | 1: call spitfire_data_access_exception | 156 | 1: call spitfire_data_access_exception |
156 | nop | 157 | nop |
157 | ba,a,pt %xcc, rtrap | 158 | ba,a,pt %xcc, rtrap |
159 | nop | ||
diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S index c629dbd121b6..64dcd6cdb606 100644 --- a/arch/sparc/lib/NG2memcpy.S +++ b/arch/sparc/lib/NG2memcpy.S | |||
@@ -326,11 +326,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
326 | blu 170f | 326 | blu 170f |
327 | nop | 327 | nop |
328 | ba,a,pt %xcc, 180f | 328 | ba,a,pt %xcc, 180f |
329 | nop | ||
329 | 330 | ||
330 | 4: /* 32 <= low bits < 48 */ | 331 | 4: /* 32 <= low bits < 48 */ |
331 | blu 150f | 332 | blu 150f |
332 | nop | 333 | nop |
333 | ba,a,pt %xcc, 160f | 334 | ba,a,pt %xcc, 160f |
335 | nop | ||
334 | 5: /* 0 < low bits < 32 */ | 336 | 5: /* 0 < low bits < 32 */ |
335 | blu,a 6f | 337 | blu,a 6f |
336 | cmp %g2, 8 | 338 | cmp %g2, 8 |
@@ -338,6 +340,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
338 | blu 130f | 340 | blu 130f |
339 | nop | 341 | nop |
340 | ba,a,pt %xcc, 140f | 342 | ba,a,pt %xcc, 140f |
343 | nop | ||
341 | 6: /* 0 < low bits < 16 */ | 344 | 6: /* 0 < low bits < 16 */ |
342 | bgeu 120f | 345 | bgeu 120f |
343 | nop | 346 | nop |
@@ -475,6 +478,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
475 | brz,pt %o2, 85f | 478 | brz,pt %o2, 85f |
476 | sub %o0, %o1, GLOBAL_SPARE | 479 | sub %o0, %o1, GLOBAL_SPARE |
477 | ba,a,pt %XCC, 90f | 480 | ba,a,pt %XCC, 90f |
481 | nop | ||
478 | 482 | ||
479 | .align 64 | 483 | .align 64 |
480 | 75: /* 16 < len <= 64 */ | 484 | 75: /* 16 < len <= 64 */ |
diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S index 75bb93b1437f..78ea962edcbe 100644 --- a/arch/sparc/lib/NG4memcpy.S +++ b/arch/sparc/lib/NG4memcpy.S | |||
@@ -530,4 +530,5 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
530 | bne,pt %icc, 1b | 530 | bne,pt %icc, 1b |
531 | EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1) | 531 | EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1) |
532 | ba,a,pt %icc, .Lexit | 532 | ba,a,pt %icc, .Lexit |
533 | nop | ||
533 | .size FUNC_NAME, .-FUNC_NAME | 534 | .size FUNC_NAME, .-FUNC_NAME |
diff --git a/arch/sparc/lib/NG4memset.S b/arch/sparc/lib/NG4memset.S index 41da4bdd95cb..7c0c81f18837 100644 --- a/arch/sparc/lib/NG4memset.S +++ b/arch/sparc/lib/NG4memset.S | |||
@@ -102,4 +102,5 @@ NG4bzero: | |||
102 | bne,pt %icc, 1b | 102 | bne,pt %icc, 1b |
103 | add %o0, 0x30, %o0 | 103 | add %o0, 0x30, %o0 |
104 | ba,a,pt %icc, .Lpostloop | 104 | ba,a,pt %icc, .Lpostloop |
105 | nop | ||
105 | .size NG4bzero,.-NG4bzero | 106 | .size NG4bzero,.-NG4bzero |
diff --git a/arch/sparc/lib/NGmemcpy.S b/arch/sparc/lib/NGmemcpy.S index d88c4ed50a00..cd654a719b27 100644 --- a/arch/sparc/lib/NGmemcpy.S +++ b/arch/sparc/lib/NGmemcpy.S | |||
@@ -394,6 +394,7 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ | |||
394 | brz,pt %i2, 85f | 394 | brz,pt %i2, 85f |
395 | sub %o0, %i1, %i3 | 395 | sub %o0, %i1, %i3 |
396 | ba,a,pt %XCC, 90f | 396 | ba,a,pt %XCC, 90f |
397 | nop | ||
397 | 398 | ||
398 | .align 64 | 399 | .align 64 |
399 | 70: /* 16 < len <= 64 */ | 400 | 70: /* 16 < len <= 64 */ |