aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOlof Johansson <olof@lixom.net>2007-03-22 10:34:13 -0400
committerPaul Mackerras <paulus@samba.org>2007-04-12 13:55:13 -0400
commit3467bfd340f9ad48f3732415533a2e9c18240b62 (patch)
tree91f57918199d9508868aa0889a5b2aca4cc1da13
parent569975591c5530fdc9c7a3c45122e5e46f075a74 (diff)
[POWERPC] Use mtocrf instruction in asm when CONFIG_POWER4_ONLY=y
mtocrf is a faster single-field mtcrf (move to condition register fields) instruction available in POWER4 and later processors. It can make quite a difference in performance on some implementations, so use it for CONFIG_POWER4_ONLY builds. Signed-off-by: Olof Johansson <olof@lixom.net> Signed-off-by: Paul Mackerras <paulus@samba.org>
-rw-r--r--arch/powerpc/lib/copyuser_64.S6
-rw-r--r--arch/powerpc/lib/mem_64.S6
-rw-r--r--arch/powerpc/lib/memcpy_64.S6
-rw-r--r--include/asm-powerpc/asm-compat.h10
4 files changed, 19 insertions, 9 deletions
diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
index a6b54cb97c49..25ec5378afa4 100644
--- a/arch/powerpc/lib/copyuser_64.S
+++ b/arch/powerpc/lib/copyuser_64.S
@@ -24,7 +24,7 @@ _GLOBAL(__copy_tofrom_user)
24 dcbt 0,r4 24 dcbt 0,r4
25 beq .Lcopy_page_4K 25 beq .Lcopy_page_4K
26 andi. r6,r6,7 26 andi. r6,r6,7
27 mtcrf 0x01,r5 27 PPC_MTOCRF 0x01,r5
28 blt cr1,.Lshort_copy 28 blt cr1,.Lshort_copy
29 bne .Ldst_unaligned 29 bne .Ldst_unaligned
30.Ldst_aligned: 30.Ldst_aligned:
@@ -135,7 +135,7 @@ _GLOBAL(__copy_tofrom_user)
135 b .Ldo_tail 135 b .Ldo_tail
136 136
137.Ldst_unaligned: 137.Ldst_unaligned:
138 mtcrf 0x01,r6 /* put #bytes to 8B bdry into cr7 */ 138 PPC_MTOCRF 0x01,r6 /* put #bytes to 8B bdry into cr7 */
139 subf r5,r6,r5 139 subf r5,r6,r5
140 li r7,0 140 li r7,0
141 cmpldi r1,r5,16 141 cmpldi r1,r5,16
@@ -150,7 +150,7 @@ _GLOBAL(__copy_tofrom_user)
1502: bf cr7*4+1,3f 1502: bf cr7*4+1,3f
15137: lwzx r0,r7,r4 15137: lwzx r0,r7,r4
15283: stwx r0,r7,r3 15283: stwx r0,r7,r3
1533: mtcrf 0x01,r5 1533: PPC_MTOCRF 0x01,r5
154 add r4,r6,r4 154 add r4,r6,r4
155 add r3,r6,r3 155 add r3,r6,r3
156 b .Ldst_aligned 156 b .Ldst_aligned
diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S
index 68df20283ff5..11ce045e21fd 100644
--- a/arch/powerpc/lib/mem_64.S
+++ b/arch/powerpc/lib/mem_64.S
@@ -19,7 +19,7 @@ _GLOBAL(memset)
19 rlwimi r4,r4,16,0,15 19 rlwimi r4,r4,16,0,15
20 cmplw cr1,r5,r0 /* do we get that far? */ 20 cmplw cr1,r5,r0 /* do we get that far? */
21 rldimi r4,r4,32,0 21 rldimi r4,r4,32,0
22 mtcrf 1,r0 22 PPC_MTOCRF 1,r0
23 mr r6,r3 23 mr r6,r3
24 blt cr1,8f 24 blt cr1,8f
25 beq+ 3f /* if already 8-byte aligned */ 25 beq+ 3f /* if already 8-byte aligned */
@@ -49,7 +49,7 @@ _GLOBAL(memset)
49 bdnz 4b 49 bdnz 4b
505: srwi. r0,r5,3 505: srwi. r0,r5,3
51 clrlwi r5,r5,29 51 clrlwi r5,r5,29
52 mtcrf 1,r0 52 PPC_MTOCRF 1,r0
53 beq 8f 53 beq 8f
54 bf 29,6f 54 bf 29,6f
55 std r4,0(r6) 55 std r4,0(r6)
@@ -65,7 +65,7 @@ _GLOBAL(memset)
65 std r4,0(r6) 65 std r4,0(r6)
66 addi r6,r6,8 66 addi r6,r6,8
678: cmpwi r5,0 678: cmpwi r5,0
68 mtcrf 1,r5 68 PPC_MTOCRF 1,r5
69 beqlr+ 69 beqlr+
70 bf 29,9f 70 bf 29,9f
71 stw r4,0(r6) 71 stw r4,0(r6)
diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
index 7173ba98f427..3f131129d1c1 100644
--- a/arch/powerpc/lib/memcpy_64.S
+++ b/arch/powerpc/lib/memcpy_64.S
@@ -12,7 +12,7 @@
12 .align 7 12 .align 7
13_GLOBAL(memcpy) 13_GLOBAL(memcpy)
14 std r3,48(r1) /* save destination pointer for return value */ 14 std r3,48(r1) /* save destination pointer for return value */
15 mtcrf 0x01,r5 15 PPC_MTOCRF 0x01,r5
16 cmpldi cr1,r5,16 16 cmpldi cr1,r5,16
17 neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry 17 neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry
18 andi. r6,r6,7 18 andi. r6,r6,7
@@ -128,7 +128,7 @@ _GLOBAL(memcpy)
128 b .Ldo_tail 128 b .Ldo_tail
129 129
130.Ldst_unaligned: 130.Ldst_unaligned:
131 mtcrf 0x01,r6 # put #bytes to 8B bdry into cr7 131 PPC_MTOCRF 0x01,r6 # put #bytes to 8B bdry into cr7
132 subf r5,r6,r5 132 subf r5,r6,r5
133 li r7,0 133 li r7,0
134 cmpldi r1,r5,16 134 cmpldi r1,r5,16
@@ -143,7 +143,7 @@ _GLOBAL(memcpy)
1432: bf cr7*4+1,3f 1432: bf cr7*4+1,3f
144 lwzx r0,r7,r4 144 lwzx r0,r7,r4
145 stwx r0,r7,r3 145 stwx r0,r7,r3
1463: mtcrf 0x01,r5 1463: PPC_MTOCRF 0x01,r5
147 add r4,r6,r4 147 add r4,r6,r4
148 add r3,r6,r3 148 add r3,r6,r3
149 b .Ldst_aligned 149 b .Ldst_aligned
diff --git a/include/asm-powerpc/asm-compat.h b/include/asm-powerpc/asm-compat.h
index c89bd58ee283..c19e7367fce6 100644
--- a/include/asm-powerpc/asm-compat.h
+++ b/include/asm-powerpc/asm-compat.h
@@ -78,6 +78,15 @@
78#define PPC_STLCX stringify_in_c(stdcx.) 78#define PPC_STLCX stringify_in_c(stdcx.)
79#define PPC_CNTLZL stringify_in_c(cntlzd) 79#define PPC_CNTLZL stringify_in_c(cntlzd)
80 80
81/* Move to CR, single-entry optimized version. Only available
82 * on POWER4 and later.
83 */
84#ifdef CONFIG_POWER4_ONLY
85#define PPC_MTOCRF stringify_in_c(mtocrf)
86#else
87#define PPC_MTOCRF stringify_in_c(mtcrf)
88#endif
89
81#else /* 32-bit */ 90#else /* 32-bit */
82 91
83/* operations for longs and pointers */ 92/* operations for longs and pointers */
@@ -89,6 +98,7 @@
89#define PPC_LLARX stringify_in_c(lwarx) 98#define PPC_LLARX stringify_in_c(lwarx)
90#define PPC_STLCX stringify_in_c(stwcx.) 99#define PPC_STLCX stringify_in_c(stwcx.)
91#define PPC_CNTLZL stringify_in_c(cntlzw) 100#define PPC_CNTLZL stringify_in_c(cntlzw)
101#define PPC_MTOCRF stringify_in_c(mtcrf)
92 102
93#endif 103#endif
94 104