diff options
author | Olof Johansson <olof@lixom.net> | 2007-03-22 10:34:13 -0400 |
---|---|---|
committer | Paul Mackerras <paulus@samba.org> | 2007-04-12 13:55:13 -0400 |
commit | 3467bfd340f9ad48f3732415533a2e9c18240b62 (patch) | |
tree | 91f57918199d9508868aa0889a5b2aca4cc1da13 | |
parent | 569975591c5530fdc9c7a3c45122e5e46f075a74 (diff) |
[POWERPC] Use mtocrf instruction in asm when CONFIG_POWER4_ONLY=y
mtocrf is a faster single-field mtcrf (move to condition register
fields) instruction available in POWER4 and later processors. It can
make quite a difference in performance on some implementations, so use
it for CONFIG_POWER4_ONLY builds.
Signed-off-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Paul Mackerras <paulus@samba.org>
-rw-r--r-- | arch/powerpc/lib/copyuser_64.S | 6 | ||||
-rw-r--r-- | arch/powerpc/lib/mem_64.S | 6 | ||||
-rw-r--r-- | arch/powerpc/lib/memcpy_64.S | 6 | ||||
-rw-r--r-- | include/asm-powerpc/asm-compat.h | 10 |
4 files changed, 19 insertions, 9 deletions
diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S index a6b54cb97c49..25ec5378afa4 100644 --- a/arch/powerpc/lib/copyuser_64.S +++ b/arch/powerpc/lib/copyuser_64.S | |||
@@ -24,7 +24,7 @@ _GLOBAL(__copy_tofrom_user) | |||
24 | dcbt 0,r4 | 24 | dcbt 0,r4 |
25 | beq .Lcopy_page_4K | 25 | beq .Lcopy_page_4K |
26 | andi. r6,r6,7 | 26 | andi. r6,r6,7 |
27 | mtcrf 0x01,r5 | 27 | PPC_MTOCRF 0x01,r5 |
28 | blt cr1,.Lshort_copy | 28 | blt cr1,.Lshort_copy |
29 | bne .Ldst_unaligned | 29 | bne .Ldst_unaligned |
30 | .Ldst_aligned: | 30 | .Ldst_aligned: |
@@ -135,7 +135,7 @@ _GLOBAL(__copy_tofrom_user) | |||
135 | b .Ldo_tail | 135 | b .Ldo_tail |
136 | 136 | ||
137 | .Ldst_unaligned: | 137 | .Ldst_unaligned: |
138 | mtcrf 0x01,r6 /* put #bytes to 8B bdry into cr7 */ | 138 | PPC_MTOCRF 0x01,r6 /* put #bytes to 8B bdry into cr7 */ |
139 | subf r5,r6,r5 | 139 | subf r5,r6,r5 |
140 | li r7,0 | 140 | li r7,0 |
141 | cmpldi r1,r5,16 | 141 | cmpldi r1,r5,16 |
@@ -150,7 +150,7 @@ _GLOBAL(__copy_tofrom_user) | |||
150 | 2: bf cr7*4+1,3f | 150 | 2: bf cr7*4+1,3f |
151 | 37: lwzx r0,r7,r4 | 151 | 37: lwzx r0,r7,r4 |
152 | 83: stwx r0,r7,r3 | 152 | 83: stwx r0,r7,r3 |
153 | 3: mtcrf 0x01,r5 | 153 | 3: PPC_MTOCRF 0x01,r5 |
154 | add r4,r6,r4 | 154 | add r4,r6,r4 |
155 | add r3,r6,r3 | 155 | add r3,r6,r3 |
156 | b .Ldst_aligned | 156 | b .Ldst_aligned |
diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S index 68df20283ff5..11ce045e21fd 100644 --- a/arch/powerpc/lib/mem_64.S +++ b/arch/powerpc/lib/mem_64.S | |||
@@ -19,7 +19,7 @@ _GLOBAL(memset) | |||
19 | rlwimi r4,r4,16,0,15 | 19 | rlwimi r4,r4,16,0,15 |
20 | cmplw cr1,r5,r0 /* do we get that far? */ | 20 | cmplw cr1,r5,r0 /* do we get that far? */ |
21 | rldimi r4,r4,32,0 | 21 | rldimi r4,r4,32,0 |
22 | mtcrf 1,r0 | 22 | PPC_MTOCRF 1,r0 |
23 | mr r6,r3 | 23 | mr r6,r3 |
24 | blt cr1,8f | 24 | blt cr1,8f |
25 | beq+ 3f /* if already 8-byte aligned */ | 25 | beq+ 3f /* if already 8-byte aligned */ |
@@ -49,7 +49,7 @@ _GLOBAL(memset) | |||
49 | bdnz 4b | 49 | bdnz 4b |
50 | 5: srwi. r0,r5,3 | 50 | 5: srwi. r0,r5,3 |
51 | clrlwi r5,r5,29 | 51 | clrlwi r5,r5,29 |
52 | mtcrf 1,r0 | 52 | PPC_MTOCRF 1,r0 |
53 | beq 8f | 53 | beq 8f |
54 | bf 29,6f | 54 | bf 29,6f |
55 | std r4,0(r6) | 55 | std r4,0(r6) |
@@ -65,7 +65,7 @@ _GLOBAL(memset) | |||
65 | std r4,0(r6) | 65 | std r4,0(r6) |
66 | addi r6,r6,8 | 66 | addi r6,r6,8 |
67 | 8: cmpwi r5,0 | 67 | 8: cmpwi r5,0 |
68 | mtcrf 1,r5 | 68 | PPC_MTOCRF 1,r5 |
69 | beqlr+ | 69 | beqlr+ |
70 | bf 29,9f | 70 | bf 29,9f |
71 | stw r4,0(r6) | 71 | stw r4,0(r6) |
diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S index 7173ba98f427..3f131129d1c1 100644 --- a/arch/powerpc/lib/memcpy_64.S +++ b/arch/powerpc/lib/memcpy_64.S | |||
@@ -12,7 +12,7 @@ | |||
12 | .align 7 | 12 | .align 7 |
13 | _GLOBAL(memcpy) | 13 | _GLOBAL(memcpy) |
14 | std r3,48(r1) /* save destination pointer for return value */ | 14 | std r3,48(r1) /* save destination pointer for return value */ |
15 | mtcrf 0x01,r5 | 15 | PPC_MTOCRF 0x01,r5 |
16 | cmpldi cr1,r5,16 | 16 | cmpldi cr1,r5,16 |
17 | neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry | 17 | neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry |
18 | andi. r6,r6,7 | 18 | andi. r6,r6,7 |
@@ -128,7 +128,7 @@ _GLOBAL(memcpy) | |||
128 | b .Ldo_tail | 128 | b .Ldo_tail |
129 | 129 | ||
130 | .Ldst_unaligned: | 130 | .Ldst_unaligned: |
131 | mtcrf 0x01,r6 # put #bytes to 8B bdry into cr7 | 131 | PPC_MTOCRF 0x01,r6 # put #bytes to 8B bdry into cr7 |
132 | subf r5,r6,r5 | 132 | subf r5,r6,r5 |
133 | li r7,0 | 133 | li r7,0 |
134 | cmpldi r1,r5,16 | 134 | cmpldi r1,r5,16 |
@@ -143,7 +143,7 @@ _GLOBAL(memcpy) | |||
143 | 2: bf cr7*4+1,3f | 143 | 2: bf cr7*4+1,3f |
144 | lwzx r0,r7,r4 | 144 | lwzx r0,r7,r4 |
145 | stwx r0,r7,r3 | 145 | stwx r0,r7,r3 |
146 | 3: mtcrf 0x01,r5 | 146 | 3: PPC_MTOCRF 0x01,r5 |
147 | add r4,r6,r4 | 147 | add r4,r6,r4 |
148 | add r3,r6,r3 | 148 | add r3,r6,r3 |
149 | b .Ldst_aligned | 149 | b .Ldst_aligned |
diff --git a/include/asm-powerpc/asm-compat.h b/include/asm-powerpc/asm-compat.h index c89bd58ee283..c19e7367fce6 100644 --- a/include/asm-powerpc/asm-compat.h +++ b/include/asm-powerpc/asm-compat.h | |||
@@ -78,6 +78,15 @@ | |||
78 | #define PPC_STLCX stringify_in_c(stdcx.) | 78 | #define PPC_STLCX stringify_in_c(stdcx.) |
79 | #define PPC_CNTLZL stringify_in_c(cntlzd) | 79 | #define PPC_CNTLZL stringify_in_c(cntlzd) |
80 | 80 | ||
81 | /* Move to CR, single-entry optimized version. Only available | ||
82 | * on POWER4 and later. | ||
83 | */ | ||
84 | #ifdef CONFIG_POWER4_ONLY | ||
85 | #define PPC_MTOCRF stringify_in_c(mtocrf) | ||
86 | #else | ||
87 | #define PPC_MTOCRF stringify_in_c(mtcrf) | ||
88 | #endif | ||
89 | |||
81 | #else /* 32-bit */ | 90 | #else /* 32-bit */ |
82 | 91 | ||
83 | /* operations for longs and pointers */ | 92 | /* operations for longs and pointers */ |
@@ -89,6 +98,7 @@ | |||
89 | #define PPC_LLARX stringify_in_c(lwarx) | 98 | #define PPC_LLARX stringify_in_c(lwarx) |
90 | #define PPC_STLCX stringify_in_c(stwcx.) | 99 | #define PPC_STLCX stringify_in_c(stwcx.) |
91 | #define PPC_CNTLZL stringify_in_c(cntlzw) | 100 | #define PPC_CNTLZL stringify_in_c(cntlzw) |
101 | #define PPC_MTOCRF stringify_in_c(mtcrf) | ||
92 | 102 | ||
93 | #endif | 103 | #endif |
94 | 104 | ||