diff options
Diffstat (limited to 'arch/powerpc/lib')
-rw-r--r-- | arch/powerpc/lib/Makefile | 5 | ||||
-rw-r--r-- | arch/powerpc/lib/checksum_64.S | 27 | ||||
-rw-r--r-- | arch/powerpc/lib/code-patching.c | 14 | ||||
-rw-r--r-- | arch/powerpc/lib/copypage_64.S | 4 | ||||
-rw-r--r-- | arch/powerpc/lib/copypage_power7.S | 165 | ||||
-rw-r--r-- | arch/powerpc/lib/copyuser_power7.S | 157 | ||||
-rw-r--r-- | arch/powerpc/lib/crtsavres.S | 5 | ||||
-rw-r--r-- | arch/powerpc/lib/hweight_64.S | 14 | ||||
-rw-r--r-- | arch/powerpc/lib/ldstfp.S | 12 | ||||
-rw-r--r-- | arch/powerpc/lib/memcpy_64.S | 4 | ||||
-rw-r--r-- | arch/powerpc/lib/memcpy_power7.S | 647 | ||||
-rw-r--r-- | arch/powerpc/lib/string.S | 2 | ||||
-rw-r--r-- | arch/powerpc/lib/string_64.S | 202 | ||||
-rw-r--r-- | arch/powerpc/lib/vmx-helper.c (renamed from arch/powerpc/lib/copyuser_power7_vmx.c) | 27 |
14 files changed, 1199 insertions, 86 deletions
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 7735a2c2e6d9..746e0c895cd7 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile | |||
@@ -17,14 +17,15 @@ obj-$(CONFIG_HAS_IOMEM) += devres.o | |||
17 | obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \ | 17 | obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \ |
18 | memcpy_64.o usercopy_64.o mem_64.o string.o \ | 18 | memcpy_64.o usercopy_64.o mem_64.o string.o \ |
19 | checksum_wrappers_64.o hweight_64.o \ | 19 | checksum_wrappers_64.o hweight_64.o \ |
20 | copyuser_power7.o | 20 | copyuser_power7.o string_64.o copypage_power7.o \ |
21 | memcpy_power7.o | ||
21 | obj-$(CONFIG_XMON) += sstep.o ldstfp.o | 22 | obj-$(CONFIG_XMON) += sstep.o ldstfp.o |
22 | obj-$(CONFIG_KPROBES) += sstep.o ldstfp.o | 23 | obj-$(CONFIG_KPROBES) += sstep.o ldstfp.o |
23 | obj-$(CONFIG_HAVE_HW_BREAKPOINT) += sstep.o ldstfp.o | 24 | obj-$(CONFIG_HAVE_HW_BREAKPOINT) += sstep.o ldstfp.o |
24 | 25 | ||
25 | ifeq ($(CONFIG_PPC64),y) | 26 | ifeq ($(CONFIG_PPC64),y) |
26 | obj-$(CONFIG_SMP) += locks.o | 27 | obj-$(CONFIG_SMP) += locks.o |
27 | obj-$(CONFIG_ALTIVEC) += copyuser_power7_vmx.o | 28 | obj-$(CONFIG_ALTIVEC) += vmx-helper.o |
28 | endif | 29 | endif |
29 | 30 | ||
30 | obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o | 31 | obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o |
diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S index 18245af38aea..167f72555d60 100644 --- a/arch/powerpc/lib/checksum_64.S +++ b/arch/powerpc/lib/checksum_64.S | |||
@@ -65,9 +65,6 @@ _GLOBAL(csum_tcpudp_magic) | |||
65 | srwi r3,r3,16 | 65 | srwi r3,r3,16 |
66 | blr | 66 | blr |
67 | 67 | ||
68 | #define STACKFRAMESIZE 256 | ||
69 | #define STK_REG(i) (112 + ((i)-14)*8) | ||
70 | |||
71 | /* | 68 | /* |
72 | * Computes the checksum of a memory block at buff, length len, | 69 | * Computes the checksum of a memory block at buff, length len, |
73 | * and adds in "sum" (32-bit). | 70 | * and adds in "sum" (32-bit). |
@@ -114,9 +111,9 @@ _GLOBAL(csum_partial) | |||
114 | mtctr r6 | 111 | mtctr r6 |
115 | 112 | ||
116 | stdu r1,-STACKFRAMESIZE(r1) | 113 | stdu r1,-STACKFRAMESIZE(r1) |
117 | std r14,STK_REG(r14)(r1) | 114 | std r14,STK_REG(R14)(r1) |
118 | std r15,STK_REG(r15)(r1) | 115 | std r15,STK_REG(R15)(r1) |
119 | std r16,STK_REG(r16)(r1) | 116 | std r16,STK_REG(R16)(r1) |
120 | 117 | ||
121 | ld r6,0(r3) | 118 | ld r6,0(r3) |
122 | ld r9,8(r3) | 119 | ld r9,8(r3) |
@@ -175,9 +172,9 @@ _GLOBAL(csum_partial) | |||
175 | adde r0,r0,r15 | 172 | adde r0,r0,r15 |
176 | adde r0,r0,r16 | 173 | adde r0,r0,r16 |
177 | 174 | ||
178 | ld r14,STK_REG(r14)(r1) | 175 | ld r14,STK_REG(R14)(r1) |
179 | ld r15,STK_REG(r15)(r1) | 176 | ld r15,STK_REG(R15)(r1) |
180 | ld r16,STK_REG(r16)(r1) | 177 | ld r16,STK_REG(R16)(r1) |
181 | addi r1,r1,STACKFRAMESIZE | 178 | addi r1,r1,STACKFRAMESIZE |
182 | 179 | ||
183 | andi. r4,r4,63 | 180 | andi. r4,r4,63 |
@@ -299,9 +296,9 @@ dest; sth r6,0(r4) | |||
299 | mtctr r6 | 296 | mtctr r6 |
300 | 297 | ||
301 | stdu r1,-STACKFRAMESIZE(r1) | 298 | stdu r1,-STACKFRAMESIZE(r1) |
302 | std r14,STK_REG(r14)(r1) | 299 | std r14,STK_REG(R14)(r1) |
303 | std r15,STK_REG(r15)(r1) | 300 | std r15,STK_REG(R15)(r1) |
304 | std r16,STK_REG(r16)(r1) | 301 | std r16,STK_REG(R16)(r1) |
305 | 302 | ||
306 | source; ld r6,0(r3) | 303 | source; ld r6,0(r3) |
307 | source; ld r9,8(r3) | 304 | source; ld r9,8(r3) |
@@ -382,9 +379,9 @@ dest; std r16,56(r4) | |||
382 | adde r0,r0,r15 | 379 | adde r0,r0,r15 |
383 | adde r0,r0,r16 | 380 | adde r0,r0,r16 |
384 | 381 | ||
385 | ld r14,STK_REG(r14)(r1) | 382 | ld r14,STK_REG(R14)(r1) |
386 | ld r15,STK_REG(r15)(r1) | 383 | ld r15,STK_REG(R15)(r1) |
387 | ld r16,STK_REG(r16)(r1) | 384 | ld r16,STK_REG(R16)(r1) |
388 | addi r1,r1,STACKFRAMESIZE | 385 | addi r1,r1,STACKFRAMESIZE |
389 | 386 | ||
390 | andi. r5,r5,63 | 387 | andi. r5,r5,63 |
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 7c975d43e3f3..dd223b3eb333 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c | |||
@@ -13,17 +13,23 @@ | |||
13 | #include <linux/mm.h> | 13 | #include <linux/mm.h> |
14 | #include <asm/page.h> | 14 | #include <asm/page.h> |
15 | #include <asm/code-patching.h> | 15 | #include <asm/code-patching.h> |
16 | #include <asm/uaccess.h> | ||
16 | 17 | ||
17 | 18 | ||
18 | void patch_instruction(unsigned int *addr, unsigned int instr) | 19 | int patch_instruction(unsigned int *addr, unsigned int instr) |
19 | { | 20 | { |
20 | *addr = instr; | 21 | int err; |
22 | |||
23 | err = __put_user(instr, addr); | ||
24 | if (err) | ||
25 | return err; | ||
21 | asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" : : "r" (addr)); | 26 | asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" : : "r" (addr)); |
27 | return 0; | ||
22 | } | 28 | } |
23 | 29 | ||
24 | void patch_branch(unsigned int *addr, unsigned long target, int flags) | 30 | int patch_branch(unsigned int *addr, unsigned long target, int flags) |
25 | { | 31 | { |
26 | patch_instruction(addr, create_branch(addr, target, flags)); | 32 | return patch_instruction(addr, create_branch(addr, target, flags)); |
27 | } | 33 | } |
28 | 34 | ||
29 | unsigned int create_branch(const unsigned int *addr, | 35 | unsigned int create_branch(const unsigned int *addr, |
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S index 53dcb6b1b708..9f9434a85264 100644 --- a/arch/powerpc/lib/copypage_64.S +++ b/arch/powerpc/lib/copypage_64.S | |||
@@ -17,7 +17,11 @@ PPC64_CACHES: | |||
17 | .section ".text" | 17 | .section ".text" |
18 | 18 | ||
19 | _GLOBAL(copy_page) | 19 | _GLOBAL(copy_page) |
20 | BEGIN_FTR_SECTION | ||
20 | lis r5,PAGE_SIZE@h | 21 | lis r5,PAGE_SIZE@h |
22 | FTR_SECTION_ELSE | ||
23 | b .copypage_power7 | ||
24 | ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) | ||
21 | ori r5,r5,PAGE_SIZE@l | 25 | ori r5,r5,PAGE_SIZE@l |
22 | BEGIN_FTR_SECTION | 26 | BEGIN_FTR_SECTION |
23 | ld r10,PPC64_CACHES@toc(r2) | 27 | ld r10,PPC64_CACHES@toc(r2) |
diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S new file mode 100644 index 000000000000..0ef75bf0695c --- /dev/null +++ b/arch/powerpc/lib/copypage_power7.S | |||
@@ -0,0 +1,165 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License as published by | ||
4 | * the Free Software Foundation; either version 2 of the License, or | ||
5 | * (at your option) any later version. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, | ||
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
10 | * GNU General Public License for more details. | ||
11 | * | ||
12 | * You should have received a copy of the GNU General Public License | ||
13 | * along with this program; if not, write to the Free Software | ||
14 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
15 | * | ||
16 | * Copyright (C) IBM Corporation, 2012 | ||
17 | * | ||
18 | * Author: Anton Blanchard <anton@au.ibm.com> | ||
19 | */ | ||
20 | #include <asm/page.h> | ||
21 | #include <asm/ppc_asm.h> | ||
22 | |||
23 | _GLOBAL(copypage_power7) | ||
24 | /* | ||
25 | * We prefetch both the source and destination using enhanced touch | ||
26 | * instructions. We use a stream ID of 0 for the load side and | ||
27 | * 1 for the store side. Since source and destination are page | ||
28 | * aligned we don't need to clear the bottom 7 bits of either | ||
29 | * address. | ||
30 | */ | ||
31 | ori r9,r3,1 /* stream=1 */ | ||
32 | |||
33 | #ifdef CONFIG_PPC_64K_PAGES | ||
34 | lis r7,0x0E01 /* depth=7, units=512 */ | ||
35 | #else | ||
36 | lis r7,0x0E00 /* depth=7 */ | ||
37 | ori r7,r7,0x1000 /* units=32 */ | ||
38 | #endif | ||
39 | ori r10,r7,1 /* stream=1 */ | ||
40 | |||
41 | lis r8,0x8000 /* GO=1 */ | ||
42 | clrldi r8,r8,32 | ||
43 | |||
44 | .machine push | ||
45 | .machine "power4" | ||
46 | dcbt r0,r4,0b01000 | ||
47 | dcbt r0,r7,0b01010 | ||
48 | dcbtst r0,r9,0b01000 | ||
49 | dcbtst r0,r10,0b01010 | ||
50 | eieio | ||
51 | dcbt r0,r8,0b01010 /* GO */ | ||
52 | .machine pop | ||
53 | |||
54 | #ifdef CONFIG_ALTIVEC | ||
55 | mflr r0 | ||
56 | std r3,48(r1) | ||
57 | std r4,56(r1) | ||
58 | std r0,16(r1) | ||
59 | stdu r1,-STACKFRAMESIZE(r1) | ||
60 | bl .enter_vmx_copy | ||
61 | cmpwi r3,0 | ||
62 | ld r0,STACKFRAMESIZE+16(r1) | ||
63 | ld r3,STACKFRAMESIZE+48(r1) | ||
64 | ld r4,STACKFRAMESIZE+56(r1) | ||
65 | mtlr r0 | ||
66 | |||
67 | li r0,(PAGE_SIZE/128) | ||
68 | mtctr r0 | ||
69 | |||
70 | beq .Lnonvmx_copy | ||
71 | |||
72 | addi r1,r1,STACKFRAMESIZE | ||
73 | |||
74 | li r6,16 | ||
75 | li r7,32 | ||
76 | li r8,48 | ||
77 | li r9,64 | ||
78 | li r10,80 | ||
79 | li r11,96 | ||
80 | li r12,112 | ||
81 | |||
82 | .align 5 | ||
83 | 1: lvx vr7,r0,r4 | ||
84 | lvx vr6,r4,r6 | ||
85 | lvx vr5,r4,r7 | ||
86 | lvx vr4,r4,r8 | ||
87 | lvx vr3,r4,r9 | ||
88 | lvx vr2,r4,r10 | ||
89 | lvx vr1,r4,r11 | ||
90 | lvx vr0,r4,r12 | ||
91 | addi r4,r4,128 | ||
92 | stvx vr7,r0,r3 | ||
93 | stvx vr6,r3,r6 | ||
94 | stvx vr5,r3,r7 | ||
95 | stvx vr4,r3,r8 | ||
96 | stvx vr3,r3,r9 | ||
97 | stvx vr2,r3,r10 | ||
98 | stvx vr1,r3,r11 | ||
99 | stvx vr0,r3,r12 | ||
100 | addi r3,r3,128 | ||
101 | bdnz 1b | ||
102 | |||
103 | b .exit_vmx_copy /* tail call optimise */ | ||
104 | |||
105 | #else | ||
106 | li r0,(PAGE_SIZE/128) | ||
107 | mtctr r0 | ||
108 | |||
109 | stdu r1,-STACKFRAMESIZE(r1) | ||
110 | #endif | ||
111 | |||
112 | .Lnonvmx_copy: | ||
113 | std r14,STK_REG(R14)(r1) | ||
114 | std r15,STK_REG(R15)(r1) | ||
115 | std r16,STK_REG(R16)(r1) | ||
116 | std r17,STK_REG(R17)(r1) | ||
117 | std r18,STK_REG(R18)(r1) | ||
118 | std r19,STK_REG(R19)(r1) | ||
119 | std r20,STK_REG(R20)(r1) | ||
120 | |||
121 | 1: ld r0,0(r4) | ||
122 | ld r5,8(r4) | ||
123 | ld r6,16(r4) | ||
124 | ld r7,24(r4) | ||
125 | ld r8,32(r4) | ||
126 | ld r9,40(r4) | ||
127 | ld r10,48(r4) | ||
128 | ld r11,56(r4) | ||
129 | ld r12,64(r4) | ||
130 | ld r14,72(r4) | ||
131 | ld r15,80(r4) | ||
132 | ld r16,88(r4) | ||
133 | ld r17,96(r4) | ||
134 | ld r18,104(r4) | ||
135 | ld r19,112(r4) | ||
136 | ld r20,120(r4) | ||
137 | addi r4,r4,128 | ||
138 | std r0,0(r3) | ||
139 | std r5,8(r3) | ||
140 | std r6,16(r3) | ||
141 | std r7,24(r3) | ||
142 | std r8,32(r3) | ||
143 | std r9,40(r3) | ||
144 | std r10,48(r3) | ||
145 | std r11,56(r3) | ||
146 | std r12,64(r3) | ||
147 | std r14,72(r3) | ||
148 | std r15,80(r3) | ||
149 | std r16,88(r3) | ||
150 | std r17,96(r3) | ||
151 | std r18,104(r3) | ||
152 | std r19,112(r3) | ||
153 | std r20,120(r3) | ||
154 | addi r3,r3,128 | ||
155 | bdnz 1b | ||
156 | |||
157 | ld r14,STK_REG(R14)(r1) | ||
158 | ld r15,STK_REG(R15)(r1) | ||
159 | ld r16,STK_REG(R16)(r1) | ||
160 | ld r17,STK_REG(R17)(r1) | ||
161 | ld r18,STK_REG(R18)(r1) | ||
162 | ld r19,STK_REG(R19)(r1) | ||
163 | ld r20,STK_REG(R20)(r1) | ||
164 | addi r1,r1,STACKFRAMESIZE | ||
165 | blr | ||
diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S index 497db7b23bb1..f9ede7c6606e 100644 --- a/arch/powerpc/lib/copyuser_power7.S +++ b/arch/powerpc/lib/copyuser_power7.S | |||
@@ -19,9 +19,6 @@ | |||
19 | */ | 19 | */ |
20 | #include <asm/ppc_asm.h> | 20 | #include <asm/ppc_asm.h> |
21 | 21 | ||
22 | #define STACKFRAMESIZE 256 | ||
23 | #define STK_REG(i) (112 + ((i)-14)*8) | ||
24 | |||
25 | .macro err1 | 22 | .macro err1 |
26 | 100: | 23 | 100: |
27 | .section __ex_table,"a" | 24 | .section __ex_table,"a" |
@@ -57,26 +54,26 @@ | |||
57 | 54 | ||
58 | 55 | ||
59 | .Ldo_err4: | 56 | .Ldo_err4: |
60 | ld r16,STK_REG(r16)(r1) | 57 | ld r16,STK_REG(R16)(r1) |
61 | ld r15,STK_REG(r15)(r1) | 58 | ld r15,STK_REG(R15)(r1) |
62 | ld r14,STK_REG(r14)(r1) | 59 | ld r14,STK_REG(R14)(r1) |
63 | .Ldo_err3: | 60 | .Ldo_err3: |
64 | bl .exit_vmx_copy | 61 | bl .exit_vmx_usercopy |
65 | ld r0,STACKFRAMESIZE+16(r1) | 62 | ld r0,STACKFRAMESIZE+16(r1) |
66 | mtlr r0 | 63 | mtlr r0 |
67 | b .Lexit | 64 | b .Lexit |
68 | #endif /* CONFIG_ALTIVEC */ | 65 | #endif /* CONFIG_ALTIVEC */ |
69 | 66 | ||
70 | .Ldo_err2: | 67 | .Ldo_err2: |
71 | ld r22,STK_REG(r22)(r1) | 68 | ld r22,STK_REG(R22)(r1) |
72 | ld r21,STK_REG(r21)(r1) | 69 | ld r21,STK_REG(R21)(r1) |
73 | ld r20,STK_REG(r20)(r1) | 70 | ld r20,STK_REG(R20)(r1) |
74 | ld r19,STK_REG(r19)(r1) | 71 | ld r19,STK_REG(R19)(r1) |
75 | ld r18,STK_REG(r18)(r1) | 72 | ld r18,STK_REG(R18)(r1) |
76 | ld r17,STK_REG(r17)(r1) | 73 | ld r17,STK_REG(R17)(r1) |
77 | ld r16,STK_REG(r16)(r1) | 74 | ld r16,STK_REG(R16)(r1) |
78 | ld r15,STK_REG(r15)(r1) | 75 | ld r15,STK_REG(R15)(r1) |
79 | ld r14,STK_REG(r14)(r1) | 76 | ld r14,STK_REG(R14)(r1) |
80 | .Lexit: | 77 | .Lexit: |
81 | addi r1,r1,STACKFRAMESIZE | 78 | addi r1,r1,STACKFRAMESIZE |
82 | .Ldo_err1: | 79 | .Ldo_err1: |
@@ -137,15 +134,15 @@ err1; stw r0,0(r3) | |||
137 | 134 | ||
138 | mflr r0 | 135 | mflr r0 |
139 | stdu r1,-STACKFRAMESIZE(r1) | 136 | stdu r1,-STACKFRAMESIZE(r1) |
140 | std r14,STK_REG(r14)(r1) | 137 | std r14,STK_REG(R14)(r1) |
141 | std r15,STK_REG(r15)(r1) | 138 | std r15,STK_REG(R15)(r1) |
142 | std r16,STK_REG(r16)(r1) | 139 | std r16,STK_REG(R16)(r1) |
143 | std r17,STK_REG(r17)(r1) | 140 | std r17,STK_REG(R17)(r1) |
144 | std r18,STK_REG(r18)(r1) | 141 | std r18,STK_REG(R18)(r1) |
145 | std r19,STK_REG(r19)(r1) | 142 | std r19,STK_REG(R19)(r1) |
146 | std r20,STK_REG(r20)(r1) | 143 | std r20,STK_REG(R20)(r1) |
147 | std r21,STK_REG(r21)(r1) | 144 | std r21,STK_REG(R21)(r1) |
148 | std r22,STK_REG(r22)(r1) | 145 | std r22,STK_REG(R22)(r1) |
149 | std r0,STACKFRAMESIZE+16(r1) | 146 | std r0,STACKFRAMESIZE+16(r1) |
150 | 147 | ||
151 | srdi r6,r5,7 | 148 | srdi r6,r5,7 |
@@ -192,15 +189,15 @@ err2; std r21,120(r3) | |||
192 | 189 | ||
193 | clrldi r5,r5,(64-7) | 190 | clrldi r5,r5,(64-7) |
194 | 191 | ||
195 | ld r14,STK_REG(r14)(r1) | 192 | ld r14,STK_REG(R14)(r1) |
196 | ld r15,STK_REG(r15)(r1) | 193 | ld r15,STK_REG(R15)(r1) |
197 | ld r16,STK_REG(r16)(r1) | 194 | ld r16,STK_REG(R16)(r1) |
198 | ld r17,STK_REG(r17)(r1) | 195 | ld r17,STK_REG(R17)(r1) |
199 | ld r18,STK_REG(r18)(r1) | 196 | ld r18,STK_REG(R18)(r1) |
200 | ld r19,STK_REG(r19)(r1) | 197 | ld r19,STK_REG(R19)(r1) |
201 | ld r20,STK_REG(r20)(r1) | 198 | ld r20,STK_REG(R20)(r1) |
202 | ld r21,STK_REG(r21)(r1) | 199 | ld r21,STK_REG(R21)(r1) |
203 | ld r22,STK_REG(r22)(r1) | 200 | ld r22,STK_REG(R22)(r1) |
204 | addi r1,r1,STACKFRAMESIZE | 201 | addi r1,r1,STACKFRAMESIZE |
205 | 202 | ||
206 | /* Up to 127B to go */ | 203 | /* Up to 127B to go */ |
@@ -290,7 +287,7 @@ err1; stb r0,0(r3) | |||
290 | mflr r0 | 287 | mflr r0 |
291 | std r0,16(r1) | 288 | std r0,16(r1) |
292 | stdu r1,-STACKFRAMESIZE(r1) | 289 | stdu r1,-STACKFRAMESIZE(r1) |
293 | bl .enter_vmx_copy | 290 | bl .enter_vmx_usercopy |
294 | cmpwi r3,0 | 291 | cmpwi r3,0 |
295 | ld r0,STACKFRAMESIZE+16(r1) | 292 | ld r0,STACKFRAMESIZE+16(r1) |
296 | ld r3,STACKFRAMESIZE+48(r1) | 293 | ld r3,STACKFRAMESIZE+48(r1) |
@@ -298,6 +295,68 @@ err1; stb r0,0(r3) | |||
298 | ld r5,STACKFRAMESIZE+64(r1) | 295 | ld r5,STACKFRAMESIZE+64(r1) |
299 | mtlr r0 | 296 | mtlr r0 |
300 | 297 | ||
298 | /* | ||
299 | * We prefetch both the source and destination using enhanced touch | ||
300 | * instructions. We use a stream ID of 0 for the load side and | ||
301 | * 1 for the store side. | ||
302 | */ | ||
303 | clrrdi r6,r4,7 | ||
304 | clrrdi r9,r3,7 | ||
305 | ori r9,r9,1 /* stream=1 */ | ||
306 | |||
307 | srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */ | ||
308 | cmpldi r7,0x3FF | ||
309 | ble 1f | ||
310 | li r7,0x3FF | ||
311 | 1: lis r0,0x0E00 /* depth=7 */ | ||
312 | sldi r7,r7,7 | ||
313 | or r7,r7,r0 | ||
314 | ori r10,r7,1 /* stream=1 */ | ||
315 | |||
316 | lis r8,0x8000 /* GO=1 */ | ||
317 | clrldi r8,r8,32 | ||
318 | |||
319 | .machine push | ||
320 | .machine "power4" | ||
321 | dcbt r0,r6,0b01000 | ||
322 | dcbt r0,r7,0b01010 | ||
323 | dcbtst r0,r9,0b01000 | ||
324 | dcbtst r0,r10,0b01010 | ||
325 | eieio | ||
326 | dcbt r0,r8,0b01010 /* GO */ | ||
327 | .machine pop | ||
328 | |||
329 | /* | ||
330 | * We prefetch both the source and destination using enhanced touch | ||
331 | * instructions. We use a stream ID of 0 for the load side and | ||
332 | * 1 for the store side. | ||
333 | */ | ||
334 | clrrdi r6,r4,7 | ||
335 | clrrdi r9,r3,7 | ||
336 | ori r9,r9,1 /* stream=1 */ | ||
337 | |||
338 | srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */ | ||
339 | cmpldi cr1,r7,0x3FF | ||
340 | ble cr1,1f | ||
341 | li r7,0x3FF | ||
342 | 1: lis r0,0x0E00 /* depth=7 */ | ||
343 | sldi r7,r7,7 | ||
344 | or r7,r7,r0 | ||
345 | ori r10,r7,1 /* stream=1 */ | ||
346 | |||
347 | lis r8,0x8000 /* GO=1 */ | ||
348 | clrldi r8,r8,32 | ||
349 | |||
350 | .machine push | ||
351 | .machine "power4" | ||
352 | dcbt r0,r6,0b01000 | ||
353 | dcbt r0,r7,0b01010 | ||
354 | dcbtst r0,r9,0b01000 | ||
355 | dcbtst r0,r10,0b01010 | ||
356 | eieio | ||
357 | dcbt r0,r8,0b01010 /* GO */ | ||
358 | .machine pop | ||
359 | |||
301 | beq .Lunwind_stack_nonvmx_copy | 360 | beq .Lunwind_stack_nonvmx_copy |
302 | 361 | ||
303 | /* | 362 | /* |
@@ -378,9 +437,9 @@ err3; stvx vr0,r3,r11 | |||
378 | 7: sub r5,r5,r6 | 437 | 7: sub r5,r5,r6 |
379 | srdi r6,r5,7 | 438 | srdi r6,r5,7 |
380 | 439 | ||
381 | std r14,STK_REG(r14)(r1) | 440 | std r14,STK_REG(R14)(r1) |
382 | std r15,STK_REG(r15)(r1) | 441 | std r15,STK_REG(R15)(r1) |
383 | std r16,STK_REG(r16)(r1) | 442 | std r16,STK_REG(R16)(r1) |
384 | 443 | ||
385 | li r12,64 | 444 | li r12,64 |
386 | li r14,80 | 445 | li r14,80 |
@@ -415,9 +474,9 @@ err4; stvx vr0,r3,r16 | |||
415 | addi r3,r3,128 | 474 | addi r3,r3,128 |
416 | bdnz 8b | 475 | bdnz 8b |
417 | 476 | ||
418 | ld r14,STK_REG(r14)(r1) | 477 | ld r14,STK_REG(R14)(r1) |
419 | ld r15,STK_REG(r15)(r1) | 478 | ld r15,STK_REG(R15)(r1) |
420 | ld r16,STK_REG(r16)(r1) | 479 | ld r16,STK_REG(R16)(r1) |
421 | 480 | ||
422 | /* Up to 127B to go */ | 481 | /* Up to 127B to go */ |
423 | clrldi r5,r5,(64-7) | 482 | clrldi r5,r5,(64-7) |
@@ -476,7 +535,7 @@ err3; lbz r0,0(r4) | |||
476 | err3; stb r0,0(r3) | 535 | err3; stb r0,0(r3) |
477 | 536 | ||
478 | 15: addi r1,r1,STACKFRAMESIZE | 537 | 15: addi r1,r1,STACKFRAMESIZE |
479 | b .exit_vmx_copy /* tail call optimise */ | 538 | b .exit_vmx_usercopy /* tail call optimise */ |
480 | 539 | ||
481 | .Lvmx_unaligned_copy: | 540 | .Lvmx_unaligned_copy: |
482 | /* Get the destination 16B aligned */ | 541 | /* Get the destination 16B aligned */ |
@@ -563,9 +622,9 @@ err3; stvx vr11,r3,r11 | |||
563 | 7: sub r5,r5,r6 | 622 | 7: sub r5,r5,r6 |
564 | srdi r6,r5,7 | 623 | srdi r6,r5,7 |
565 | 624 | ||
566 | std r14,STK_REG(r14)(r1) | 625 | std r14,STK_REG(R14)(r1) |
567 | std r15,STK_REG(r15)(r1) | 626 | std r15,STK_REG(R15)(r1) |
568 | std r16,STK_REG(r16)(r1) | 627 | std r16,STK_REG(R16)(r1) |
569 | 628 | ||
570 | li r12,64 | 629 | li r12,64 |
571 | li r14,80 | 630 | li r14,80 |
@@ -608,9 +667,9 @@ err4; stvx vr15,r3,r16 | |||
608 | addi r3,r3,128 | 667 | addi r3,r3,128 |
609 | bdnz 8b | 668 | bdnz 8b |
610 | 669 | ||
611 | ld r14,STK_REG(r14)(r1) | 670 | ld r14,STK_REG(R14)(r1) |
612 | ld r15,STK_REG(r15)(r1) | 671 | ld r15,STK_REG(R15)(r1) |
613 | ld r16,STK_REG(r16)(r1) | 672 | ld r16,STK_REG(R16)(r1) |
614 | 673 | ||
615 | /* Up to 127B to go */ | 674 | /* Up to 127B to go */ |
616 | clrldi r5,r5,(64-7) | 675 | clrldi r5,r5,(64-7) |
@@ -679,5 +738,5 @@ err3; lbz r0,0(r4) | |||
679 | err3; stb r0,0(r3) | 738 | err3; stb r0,0(r3) |
680 | 739 | ||
681 | 15: addi r1,r1,STACKFRAMESIZE | 740 | 15: addi r1,r1,STACKFRAMESIZE |
682 | b .exit_vmx_copy /* tail call optimise */ | 741 | b .exit_vmx_usercopy /* tail call optimise */ |
683 | #endif /* CONFiG_ALTIVEC */ | 742 | #endif /* CONFiG_ALTIVEC */ |
diff --git a/arch/powerpc/lib/crtsavres.S b/arch/powerpc/lib/crtsavres.S index 1c893f05d224..b2c68ce139ae 100644 --- a/arch/powerpc/lib/crtsavres.S +++ b/arch/powerpc/lib/crtsavres.S | |||
@@ -41,12 +41,13 @@ | |||
41 | #include <asm/ppc_asm.h> | 41 | #include <asm/ppc_asm.h> |
42 | 42 | ||
43 | .file "crtsavres.S" | 43 | .file "crtsavres.S" |
44 | .section ".text" | ||
45 | 44 | ||
46 | #ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE | 45 | #ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE |
47 | 46 | ||
48 | #ifndef CONFIG_PPC64 | 47 | #ifndef CONFIG_PPC64 |
49 | 48 | ||
49 | .section ".text" | ||
50 | |||
50 | /* Routines for saving integer registers, called by the compiler. */ | 51 | /* Routines for saving integer registers, called by the compiler. */ |
51 | /* Called with r11 pointing to the stack header word of the caller of the */ | 52 | /* Called with r11 pointing to the stack header word of the caller of the */ |
52 | /* function, just beyond the end of the integer save area. */ | 53 | /* function, just beyond the end of the integer save area. */ |
@@ -232,6 +233,8 @@ _GLOBAL(_rest32gpr_31_x) | |||
232 | 233 | ||
233 | #else /* CONFIG_PPC64 */ | 234 | #else /* CONFIG_PPC64 */ |
234 | 235 | ||
236 | .section ".text.save.restore","ax",@progbits | ||
237 | |||
235 | .globl _savegpr0_14 | 238 | .globl _savegpr0_14 |
236 | _savegpr0_14: | 239 | _savegpr0_14: |
237 | std r14,-144(r1) | 240 | std r14,-144(r1) |
diff --git a/arch/powerpc/lib/hweight_64.S b/arch/powerpc/lib/hweight_64.S index fda27868cf8c..9b96ff2ecd4d 100644 --- a/arch/powerpc/lib/hweight_64.S +++ b/arch/powerpc/lib/hweight_64.S | |||
@@ -28,7 +28,7 @@ BEGIN_FTR_SECTION | |||
28 | nop | 28 | nop |
29 | nop | 29 | nop |
30 | FTR_SECTION_ELSE | 30 | FTR_SECTION_ELSE |
31 | PPC_POPCNTB(r3,r3) | 31 | PPC_POPCNTB(R3,R3) |
32 | clrldi r3,r3,64-8 | 32 | clrldi r3,r3,64-8 |
33 | blr | 33 | blr |
34 | ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB) | 34 | ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB) |
@@ -42,14 +42,14 @@ BEGIN_FTR_SECTION | |||
42 | nop | 42 | nop |
43 | FTR_SECTION_ELSE | 43 | FTR_SECTION_ELSE |
44 | BEGIN_FTR_SECTION_NESTED(50) | 44 | BEGIN_FTR_SECTION_NESTED(50) |
45 | PPC_POPCNTB(r3,r3) | 45 | PPC_POPCNTB(R3,R3) |
46 | srdi r4,r3,8 | 46 | srdi r4,r3,8 |
47 | add r3,r4,r3 | 47 | add r3,r4,r3 |
48 | clrldi r3,r3,64-8 | 48 | clrldi r3,r3,64-8 |
49 | blr | 49 | blr |
50 | FTR_SECTION_ELSE_NESTED(50) | 50 | FTR_SECTION_ELSE_NESTED(50) |
51 | clrlwi r3,r3,16 | 51 | clrlwi r3,r3,16 |
52 | PPC_POPCNTW(r3,r3) | 52 | PPC_POPCNTW(R3,R3) |
53 | clrldi r3,r3,64-8 | 53 | clrldi r3,r3,64-8 |
54 | blr | 54 | blr |
55 | ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 50) | 55 | ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 50) |
@@ -66,7 +66,7 @@ BEGIN_FTR_SECTION | |||
66 | nop | 66 | nop |
67 | FTR_SECTION_ELSE | 67 | FTR_SECTION_ELSE |
68 | BEGIN_FTR_SECTION_NESTED(51) | 68 | BEGIN_FTR_SECTION_NESTED(51) |
69 | PPC_POPCNTB(r3,r3) | 69 | PPC_POPCNTB(R3,R3) |
70 | srdi r4,r3,16 | 70 | srdi r4,r3,16 |
71 | add r3,r4,r3 | 71 | add r3,r4,r3 |
72 | srdi r4,r3,8 | 72 | srdi r4,r3,8 |
@@ -74,7 +74,7 @@ FTR_SECTION_ELSE | |||
74 | clrldi r3,r3,64-8 | 74 | clrldi r3,r3,64-8 |
75 | blr | 75 | blr |
76 | FTR_SECTION_ELSE_NESTED(51) | 76 | FTR_SECTION_ELSE_NESTED(51) |
77 | PPC_POPCNTW(r3,r3) | 77 | PPC_POPCNTW(R3,R3) |
78 | clrldi r3,r3,64-8 | 78 | clrldi r3,r3,64-8 |
79 | blr | 79 | blr |
80 | ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 51) | 80 | ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 51) |
@@ -93,7 +93,7 @@ BEGIN_FTR_SECTION | |||
93 | nop | 93 | nop |
94 | FTR_SECTION_ELSE | 94 | FTR_SECTION_ELSE |
95 | BEGIN_FTR_SECTION_NESTED(52) | 95 | BEGIN_FTR_SECTION_NESTED(52) |
96 | PPC_POPCNTB(r3,r3) | 96 | PPC_POPCNTB(R3,R3) |
97 | srdi r4,r3,32 | 97 | srdi r4,r3,32 |
98 | add r3,r4,r3 | 98 | add r3,r4,r3 |
99 | srdi r4,r3,16 | 99 | srdi r4,r3,16 |
@@ -103,7 +103,7 @@ FTR_SECTION_ELSE | |||
103 | clrldi r3,r3,64-8 | 103 | clrldi r3,r3,64-8 |
104 | blr | 104 | blr |
105 | FTR_SECTION_ELSE_NESTED(52) | 105 | FTR_SECTION_ELSE_NESTED(52) |
106 | PPC_POPCNTD(r3,r3) | 106 | PPC_POPCNTD(R3,R3) |
107 | clrldi r3,r3,64-8 | 107 | clrldi r3,r3,64-8 |
108 | blr | 108 | blr |
109 | ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 52) | 109 | ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 52) |
diff --git a/arch/powerpc/lib/ldstfp.S b/arch/powerpc/lib/ldstfp.S index 6a85380520b6..85aec08ab234 100644 --- a/arch/powerpc/lib/ldstfp.S +++ b/arch/powerpc/lib/ldstfp.S | |||
@@ -330,13 +330,13 @@ _GLOBAL(do_lxvd2x) | |||
330 | MTMSRD(r7) | 330 | MTMSRD(r7) |
331 | isync | 331 | isync |
332 | beq cr7,1f | 332 | beq cr7,1f |
333 | STXVD2X(0,r1,r8) | 333 | STXVD2X(0,R1,R8) |
334 | 1: li r9,-EFAULT | 334 | 1: li r9,-EFAULT |
335 | 2: LXVD2X(0,0,r4) | 335 | 2: LXVD2X(0,R0,R4) |
336 | li r9,0 | 336 | li r9,0 |
337 | 3: beq cr7,4f | 337 | 3: beq cr7,4f |
338 | bl put_vsr | 338 | bl put_vsr |
339 | LXVD2X(0,r1,r8) | 339 | LXVD2X(0,R1,R8) |
340 | 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) | 340 | 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) |
341 | mtlr r0 | 341 | mtlr r0 |
342 | MTMSRD(r6) | 342 | MTMSRD(r6) |
@@ -358,13 +358,13 @@ _GLOBAL(do_stxvd2x) | |||
358 | MTMSRD(r7) | 358 | MTMSRD(r7) |
359 | isync | 359 | isync |
360 | beq cr7,1f | 360 | beq cr7,1f |
361 | STXVD2X(0,r1,r8) | 361 | STXVD2X(0,R1,R8) |
362 | bl get_vsr | 362 | bl get_vsr |
363 | 1: li r9,-EFAULT | 363 | 1: li r9,-EFAULT |
364 | 2: STXVD2X(0,0,r4) | 364 | 2: STXVD2X(0,R0,R4) |
365 | li r9,0 | 365 | li r9,0 |
366 | 3: beq cr7,4f | 366 | 3: beq cr7,4f |
367 | LXVD2X(0,r1,r8) | 367 | LXVD2X(0,R1,R8) |
368 | 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) | 368 | 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) |
369 | mtlr r0 | 369 | mtlr r0 |
370 | MTMSRD(r6) | 370 | MTMSRD(r6) |
diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S index 82fea3963e15..d2bbbc8d7dc0 100644 --- a/arch/powerpc/lib/memcpy_64.S +++ b/arch/powerpc/lib/memcpy_64.S | |||
@@ -11,7 +11,11 @@ | |||
11 | 11 | ||
12 | .align 7 | 12 | .align 7 |
13 | _GLOBAL(memcpy) | 13 | _GLOBAL(memcpy) |
14 | BEGIN_FTR_SECTION | ||
14 | std r3,48(r1) /* save destination pointer for return value */ | 15 | std r3,48(r1) /* save destination pointer for return value */ |
16 | FTR_SECTION_ELSE | ||
17 | b memcpy_power7 | ||
18 | ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) | ||
15 | PPC_MTOCRF(0x01,r5) | 19 | PPC_MTOCRF(0x01,r5) |
16 | cmpldi cr1,r5,16 | 20 | cmpldi cr1,r5,16 |
17 | neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry | 21 | neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry |
diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S new file mode 100644 index 000000000000..0efdc51bc716 --- /dev/null +++ b/arch/powerpc/lib/memcpy_power7.S | |||
@@ -0,0 +1,647 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License as published by | ||
4 | * the Free Software Foundation; either version 2 of the License, or | ||
5 | * (at your option) any later version. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, | ||
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
10 | * GNU General Public License for more details. | ||
11 | * | ||
12 | * You should have received a copy of the GNU General Public License | ||
13 | * along with this program; if not, write to the Free Software | ||
14 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
15 | * | ||
16 | * Copyright (C) IBM Corporation, 2012 | ||
17 | * | ||
18 | * Author: Anton Blanchard <anton@au.ibm.com> | ||
19 | */ | ||
20 | #include <asm/ppc_asm.h> | ||
21 | |||
22 | _GLOBAL(memcpy_power7) | ||
23 | #ifdef CONFIG_ALTIVEC | ||
24 | cmpldi r5,16 | ||
25 | cmpldi cr1,r5,4096 | ||
26 | |||
27 | std r3,48(r1) | ||
28 | |||
29 | blt .Lshort_copy | ||
30 | bgt cr1,.Lvmx_copy | ||
31 | #else | ||
32 | cmpldi r5,16 | ||
33 | |||
34 | std r3,48(r1) | ||
35 | |||
36 | blt .Lshort_copy | ||
37 | #endif | ||
38 | |||
39 | .Lnonvmx_copy: | ||
40 | /* Get the source 8B aligned */ | ||
41 | neg r6,r4 | ||
42 | mtocrf 0x01,r6 | ||
43 | clrldi r6,r6,(64-3) | ||
44 | |||
45 | bf cr7*4+3,1f | ||
46 | lbz r0,0(r4) | ||
47 | addi r4,r4,1 | ||
48 | stb r0,0(r3) | ||
49 | addi r3,r3,1 | ||
50 | |||
51 | 1: bf cr7*4+2,2f | ||
52 | lhz r0,0(r4) | ||
53 | addi r4,r4,2 | ||
54 | sth r0,0(r3) | ||
55 | addi r3,r3,2 | ||
56 | |||
57 | 2: bf cr7*4+1,3f | ||
58 | lwz r0,0(r4) | ||
59 | addi r4,r4,4 | ||
60 | stw r0,0(r3) | ||
61 | addi r3,r3,4 | ||
62 | |||
63 | 3: sub r5,r5,r6 | ||
64 | cmpldi r5,128 | ||
65 | blt 5f | ||
66 | |||
67 | mflr r0 | ||
68 | stdu r1,-STACKFRAMESIZE(r1) | ||
69 | std r14,STK_REG(R14)(r1) | ||
70 | std r15,STK_REG(R15)(r1) | ||
71 | std r16,STK_REG(R16)(r1) | ||
72 | std r17,STK_REG(R17)(r1) | ||
73 | std r18,STK_REG(R18)(r1) | ||
74 | std r19,STK_REG(R19)(r1) | ||
75 | std r20,STK_REG(R20)(r1) | ||
76 | std r21,STK_REG(R21)(r1) | ||
77 | std r22,STK_REG(R22)(r1) | ||
78 | std r0,STACKFRAMESIZE+16(r1) | ||
79 | |||
80 | srdi r6,r5,7 | ||
81 | mtctr r6 | ||
82 | |||
83 | /* Now do cacheline (128B) sized loads and stores. */ | ||
84 | .align 5 | ||
85 | 4: | ||
86 | ld r0,0(r4) | ||
87 | ld r6,8(r4) | ||
88 | ld r7,16(r4) | ||
89 | ld r8,24(r4) | ||
90 | ld r9,32(r4) | ||
91 | ld r10,40(r4) | ||
92 | ld r11,48(r4) | ||
93 | ld r12,56(r4) | ||
94 | ld r14,64(r4) | ||
95 | ld r15,72(r4) | ||
96 | ld r16,80(r4) | ||
97 | ld r17,88(r4) | ||
98 | ld r18,96(r4) | ||
99 | ld r19,104(r4) | ||
100 | ld r20,112(r4) | ||
101 | ld r21,120(r4) | ||
102 | addi r4,r4,128 | ||
103 | std r0,0(r3) | ||
104 | std r6,8(r3) | ||
105 | std r7,16(r3) | ||
106 | std r8,24(r3) | ||
107 | std r9,32(r3) | ||
108 | std r10,40(r3) | ||
109 | std r11,48(r3) | ||
110 | std r12,56(r3) | ||
111 | std r14,64(r3) | ||
112 | std r15,72(r3) | ||
113 | std r16,80(r3) | ||
114 | std r17,88(r3) | ||
115 | std r18,96(r3) | ||
116 | std r19,104(r3) | ||
117 | std r20,112(r3) | ||
118 | std r21,120(r3) | ||
119 | addi r3,r3,128 | ||
120 | bdnz 4b | ||
121 | |||
122 | clrldi r5,r5,(64-7) | ||
123 | |||
124 | ld r14,STK_REG(R14)(r1) | ||
125 | ld r15,STK_REG(R15)(r1) | ||
126 | ld r16,STK_REG(R16)(r1) | ||
127 | ld r17,STK_REG(R17)(r1) | ||
128 | ld r18,STK_REG(R18)(r1) | ||
129 | ld r19,STK_REG(R19)(r1) | ||
130 | ld r20,STK_REG(R20)(r1) | ||
131 | ld r21,STK_REG(R21)(r1) | ||
132 | ld r22,STK_REG(R22)(r1) | ||
133 | addi r1,r1,STACKFRAMESIZE | ||
134 | |||
135 | /* Up to 127B to go */ | ||
136 | 5: srdi r6,r5,4 | ||
137 | mtocrf 0x01,r6 | ||
138 | |||
139 | 6: bf cr7*4+1,7f | ||
140 | ld r0,0(r4) | ||
141 | ld r6,8(r4) | ||
142 | ld r7,16(r4) | ||
143 | ld r8,24(r4) | ||
144 | ld r9,32(r4) | ||
145 | ld r10,40(r4) | ||
146 | ld r11,48(r4) | ||
147 | ld r12,56(r4) | ||
148 | addi r4,r4,64 | ||
149 | std r0,0(r3) | ||
150 | std r6,8(r3) | ||
151 | std r7,16(r3) | ||
152 | std r8,24(r3) | ||
153 | std r9,32(r3) | ||
154 | std r10,40(r3) | ||
155 | std r11,48(r3) | ||
156 | std r12,56(r3) | ||
157 | addi r3,r3,64 | ||
158 | |||
159 | /* Up to 63B to go */ | ||
160 | 7: bf cr7*4+2,8f | ||
161 | ld r0,0(r4) | ||
162 | ld r6,8(r4) | ||
163 | ld r7,16(r4) | ||
164 | ld r8,24(r4) | ||
165 | addi r4,r4,32 | ||
166 | std r0,0(r3) | ||
167 | std r6,8(r3) | ||
168 | std r7,16(r3) | ||
169 | std r8,24(r3) | ||
170 | addi r3,r3,32 | ||
171 | |||
172 | /* Up to 31B to go */ | ||
173 | 8: bf cr7*4+3,9f | ||
174 | ld r0,0(r4) | ||
175 | ld r6,8(r4) | ||
176 | addi r4,r4,16 | ||
177 | std r0,0(r3) | ||
178 | std r6,8(r3) | ||
179 | addi r3,r3,16 | ||
180 | |||
181 | 9: clrldi r5,r5,(64-4) | ||
182 | |||
183 | /* Up to 15B to go */ | ||
184 | .Lshort_copy: | ||
185 | mtocrf 0x01,r5 | ||
186 | bf cr7*4+0,12f | ||
187 | lwz r0,0(r4) /* Less chance of a reject with word ops */ | ||
188 | lwz r6,4(r4) | ||
189 | addi r4,r4,8 | ||
190 | stw r0,0(r3) | ||
191 | stw r6,4(r3) | ||
192 | addi r3,r3,8 | ||
193 | |||
194 | 12: bf cr7*4+1,13f | ||
195 | lwz r0,0(r4) | ||
196 | addi r4,r4,4 | ||
197 | stw r0,0(r3) | ||
198 | addi r3,r3,4 | ||
199 | |||
200 | 13: bf cr7*4+2,14f | ||
201 | lhz r0,0(r4) | ||
202 | addi r4,r4,2 | ||
203 | sth r0,0(r3) | ||
204 | addi r3,r3,2 | ||
205 | |||
206 | 14: bf cr7*4+3,15f | ||
207 | lbz r0,0(r4) | ||
208 | stb r0,0(r3) | ||
209 | |||
210 | 15: ld r3,48(r1) | ||
211 | blr | ||
212 | |||
213 | .Lunwind_stack_nonvmx_copy: | ||
214 | addi r1,r1,STACKFRAMESIZE | ||
215 | b .Lnonvmx_copy | ||
216 | |||
217 | #ifdef CONFIG_ALTIVEC | ||
218 | .Lvmx_copy: | ||
219 | mflr r0 | ||
220 | std r4,56(r1) | ||
221 | std r5,64(r1) | ||
222 | std r0,16(r1) | ||
223 | stdu r1,-STACKFRAMESIZE(r1) | ||
224 | bl .enter_vmx_copy | ||
225 | cmpwi r3,0 | ||
226 | ld r0,STACKFRAMESIZE+16(r1) | ||
227 | ld r3,STACKFRAMESIZE+48(r1) | ||
228 | ld r4,STACKFRAMESIZE+56(r1) | ||
229 | ld r5,STACKFRAMESIZE+64(r1) | ||
230 | mtlr r0 | ||
231 | |||
232 | /* | ||
233 | * We prefetch both the source and destination using enhanced touch | ||
234 | * instructions. We use a stream ID of 0 for the load side and | ||
235 | * 1 for the store side. | ||
236 | */ | ||
237 | clrrdi r6,r4,7 | ||
238 | clrrdi r9,r3,7 | ||
239 | ori r9,r9,1 /* stream=1 */ | ||
240 | |||
241 | srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */ | ||
242 | cmpldi cr1,r7,0x3FF | ||
243 | ble cr1,1f | ||
244 | li r7,0x3FF | ||
245 | 1: lis r0,0x0E00 /* depth=7 */ | ||
246 | sldi r7,r7,7 | ||
247 | or r7,r7,r0 | ||
248 | ori r10,r7,1 /* stream=1 */ | ||
249 | |||
250 | lis r8,0x8000 /* GO=1 */ | ||
251 | clrldi r8,r8,32 | ||
252 | |||
253 | .machine push | ||
254 | .machine "power4" | ||
255 | dcbt r0,r6,0b01000 | ||
256 | dcbt r0,r7,0b01010 | ||
257 | dcbtst r0,r9,0b01000 | ||
258 | dcbtst r0,r10,0b01010 | ||
259 | eieio | ||
260 | dcbt r0,r8,0b01010 /* GO */ | ||
261 | .machine pop | ||
262 | |||
263 | beq .Lunwind_stack_nonvmx_copy | ||
264 | |||
265 | /* | ||
266 | * If source and destination are not relatively aligned we use a | ||
267 | * slower permute loop. | ||
268 | */ | ||
269 | xor r6,r4,r3 | ||
270 | rldicl. r6,r6,0,(64-4) | ||
271 | bne .Lvmx_unaligned_copy | ||
272 | |||
273 | /* Get the destination 16B aligned */ | ||
274 | neg r6,r3 | ||
275 | mtocrf 0x01,r6 | ||
276 | clrldi r6,r6,(64-4) | ||
277 | |||
278 | bf cr7*4+3,1f | ||
279 | lbz r0,0(r4) | ||
280 | addi r4,r4,1 | ||
281 | stb r0,0(r3) | ||
282 | addi r3,r3,1 | ||
283 | |||
284 | 1: bf cr7*4+2,2f | ||
285 | lhz r0,0(r4) | ||
286 | addi r4,r4,2 | ||
287 | sth r0,0(r3) | ||
288 | addi r3,r3,2 | ||
289 | |||
290 | 2: bf cr7*4+1,3f | ||
291 | lwz r0,0(r4) | ||
292 | addi r4,r4,4 | ||
293 | stw r0,0(r3) | ||
294 | addi r3,r3,4 | ||
295 | |||
296 | 3: bf cr7*4+0,4f | ||
297 | ld r0,0(r4) | ||
298 | addi r4,r4,8 | ||
299 | std r0,0(r3) | ||
300 | addi r3,r3,8 | ||
301 | |||
302 | 4: sub r5,r5,r6 | ||
303 | |||
304 | /* Get the desination 128B aligned */ | ||
305 | neg r6,r3 | ||
306 | srdi r7,r6,4 | ||
307 | mtocrf 0x01,r7 | ||
308 | clrldi r6,r6,(64-7) | ||
309 | |||
310 | li r9,16 | ||
311 | li r10,32 | ||
312 | li r11,48 | ||
313 | |||
314 | bf cr7*4+3,5f | ||
315 | lvx vr1,r0,r4 | ||
316 | addi r4,r4,16 | ||
317 | stvx vr1,r0,r3 | ||
318 | addi r3,r3,16 | ||
319 | |||
320 | 5: bf cr7*4+2,6f | ||
321 | lvx vr1,r0,r4 | ||
322 | lvx vr0,r4,r9 | ||
323 | addi r4,r4,32 | ||
324 | stvx vr1,r0,r3 | ||
325 | stvx vr0,r3,r9 | ||
326 | addi r3,r3,32 | ||
327 | |||
328 | 6: bf cr7*4+1,7f | ||
329 | lvx vr3,r0,r4 | ||
330 | lvx vr2,r4,r9 | ||
331 | lvx vr1,r4,r10 | ||
332 | lvx vr0,r4,r11 | ||
333 | addi r4,r4,64 | ||
334 | stvx vr3,r0,r3 | ||
335 | stvx vr2,r3,r9 | ||
336 | stvx vr1,r3,r10 | ||
337 | stvx vr0,r3,r11 | ||
338 | addi r3,r3,64 | ||
339 | |||
340 | 7: sub r5,r5,r6 | ||
341 | srdi r6,r5,7 | ||
342 | |||
343 | std r14,STK_REG(R14)(r1) | ||
344 | std r15,STK_REG(R15)(r1) | ||
345 | std r16,STK_REG(R16)(r1) | ||
346 | |||
347 | li r12,64 | ||
348 | li r14,80 | ||
349 | li r15,96 | ||
350 | li r16,112 | ||
351 | |||
352 | mtctr r6 | ||
353 | |||
354 | /* | ||
355 | * Now do cacheline sized loads and stores. By this stage the | ||
356 | * cacheline stores are also cacheline aligned. | ||
357 | */ | ||
358 | .align 5 | ||
359 | 8: | ||
360 | lvx vr7,r0,r4 | ||
361 | lvx vr6,r4,r9 | ||
362 | lvx vr5,r4,r10 | ||
363 | lvx vr4,r4,r11 | ||
364 | lvx vr3,r4,r12 | ||
365 | lvx vr2,r4,r14 | ||
366 | lvx vr1,r4,r15 | ||
367 | lvx vr0,r4,r16 | ||
368 | addi r4,r4,128 | ||
369 | stvx vr7,r0,r3 | ||
370 | stvx vr6,r3,r9 | ||
371 | stvx vr5,r3,r10 | ||
372 | stvx vr4,r3,r11 | ||
373 | stvx vr3,r3,r12 | ||
374 | stvx vr2,r3,r14 | ||
375 | stvx vr1,r3,r15 | ||
376 | stvx vr0,r3,r16 | ||
377 | addi r3,r3,128 | ||
378 | bdnz 8b | ||
379 | |||
380 | ld r14,STK_REG(R14)(r1) | ||
381 | ld r15,STK_REG(R15)(r1) | ||
382 | ld r16,STK_REG(R16)(r1) | ||
383 | |||
384 | /* Up to 127B to go */ | ||
385 | clrldi r5,r5,(64-7) | ||
386 | srdi r6,r5,4 | ||
387 | mtocrf 0x01,r6 | ||
388 | |||
389 | bf cr7*4+1,9f | ||
390 | lvx vr3,r0,r4 | ||
391 | lvx vr2,r4,r9 | ||
392 | lvx vr1,r4,r10 | ||
393 | lvx vr0,r4,r11 | ||
394 | addi r4,r4,64 | ||
395 | stvx vr3,r0,r3 | ||
396 | stvx vr2,r3,r9 | ||
397 | stvx vr1,r3,r10 | ||
398 | stvx vr0,r3,r11 | ||
399 | addi r3,r3,64 | ||
400 | |||
401 | 9: bf cr7*4+2,10f | ||
402 | lvx vr1,r0,r4 | ||
403 | lvx vr0,r4,r9 | ||
404 | addi r4,r4,32 | ||
405 | stvx vr1,r0,r3 | ||
406 | stvx vr0,r3,r9 | ||
407 | addi r3,r3,32 | ||
408 | |||
409 | 10: bf cr7*4+3,11f | ||
410 | lvx vr1,r0,r4 | ||
411 | addi r4,r4,16 | ||
412 | stvx vr1,r0,r3 | ||
413 | addi r3,r3,16 | ||
414 | |||
415 | /* Up to 15B to go */ | ||
416 | 11: clrldi r5,r5,(64-4) | ||
417 | mtocrf 0x01,r5 | ||
418 | bf cr7*4+0,12f | ||
419 | ld r0,0(r4) | ||
420 | addi r4,r4,8 | ||
421 | std r0,0(r3) | ||
422 | addi r3,r3,8 | ||
423 | |||
424 | 12: bf cr7*4+1,13f | ||
425 | lwz r0,0(r4) | ||
426 | addi r4,r4,4 | ||
427 | stw r0,0(r3) | ||
428 | addi r3,r3,4 | ||
429 | |||
430 | 13: bf cr7*4+2,14f | ||
431 | lhz r0,0(r4) | ||
432 | addi r4,r4,2 | ||
433 | sth r0,0(r3) | ||
434 | addi r3,r3,2 | ||
435 | |||
436 | 14: bf cr7*4+3,15f | ||
437 | lbz r0,0(r4) | ||
438 | stb r0,0(r3) | ||
439 | |||
440 | 15: addi r1,r1,STACKFRAMESIZE | ||
441 | ld r3,48(r1) | ||
442 | b .exit_vmx_copy /* tail call optimise */ | ||
443 | |||
444 | .Lvmx_unaligned_copy: | ||
445 | /* Get the destination 16B aligned */ | ||
446 | neg r6,r3 | ||
447 | mtocrf 0x01,r6 | ||
448 | clrldi r6,r6,(64-4) | ||
449 | |||
450 | bf cr7*4+3,1f | ||
451 | lbz r0,0(r4) | ||
452 | addi r4,r4,1 | ||
453 | stb r0,0(r3) | ||
454 | addi r3,r3,1 | ||
455 | |||
456 | 1: bf cr7*4+2,2f | ||
457 | lhz r0,0(r4) | ||
458 | addi r4,r4,2 | ||
459 | sth r0,0(r3) | ||
460 | addi r3,r3,2 | ||
461 | |||
462 | 2: bf cr7*4+1,3f | ||
463 | lwz r0,0(r4) | ||
464 | addi r4,r4,4 | ||
465 | stw r0,0(r3) | ||
466 | addi r3,r3,4 | ||
467 | |||
468 | 3: bf cr7*4+0,4f | ||
469 | lwz r0,0(r4) /* Less chance of a reject with word ops */ | ||
470 | lwz r7,4(r4) | ||
471 | addi r4,r4,8 | ||
472 | stw r0,0(r3) | ||
473 | stw r7,4(r3) | ||
474 | addi r3,r3,8 | ||
475 | |||
476 | 4: sub r5,r5,r6 | ||
477 | |||
478 | /* Get the desination 128B aligned */ | ||
479 | neg r6,r3 | ||
480 | srdi r7,r6,4 | ||
481 | mtocrf 0x01,r7 | ||
482 | clrldi r6,r6,(64-7) | ||
483 | |||
484 | li r9,16 | ||
485 | li r10,32 | ||
486 | li r11,48 | ||
487 | |||
488 | lvsl vr16,0,r4 /* Setup permute control vector */ | ||
489 | lvx vr0,0,r4 | ||
490 | addi r4,r4,16 | ||
491 | |||
492 | bf cr7*4+3,5f | ||
493 | lvx vr1,r0,r4 | ||
494 | vperm vr8,vr0,vr1,vr16 | ||
495 | addi r4,r4,16 | ||
496 | stvx vr8,r0,r3 | ||
497 | addi r3,r3,16 | ||
498 | vor vr0,vr1,vr1 | ||
499 | |||
500 | 5: bf cr7*4+2,6f | ||
501 | lvx vr1,r0,r4 | ||
502 | vperm vr8,vr0,vr1,vr16 | ||
503 | lvx vr0,r4,r9 | ||
504 | vperm vr9,vr1,vr0,vr16 | ||
505 | addi r4,r4,32 | ||
506 | stvx vr8,r0,r3 | ||
507 | stvx vr9,r3,r9 | ||
508 | addi r3,r3,32 | ||
509 | |||
510 | 6: bf cr7*4+1,7f | ||
511 | lvx vr3,r0,r4 | ||
512 | vperm vr8,vr0,vr3,vr16 | ||
513 | lvx vr2,r4,r9 | ||
514 | vperm vr9,vr3,vr2,vr16 | ||
515 | lvx vr1,r4,r10 | ||
516 | vperm vr10,vr2,vr1,vr16 | ||
517 | lvx vr0,r4,r11 | ||
518 | vperm vr11,vr1,vr0,vr16 | ||
519 | addi r4,r4,64 | ||
520 | stvx vr8,r0,r3 | ||
521 | stvx vr9,r3,r9 | ||
522 | stvx vr10,r3,r10 | ||
523 | stvx vr11,r3,r11 | ||
524 | addi r3,r3,64 | ||
525 | |||
526 | 7: sub r5,r5,r6 | ||
527 | srdi r6,r5,7 | ||
528 | |||
529 | std r14,STK_REG(R14)(r1) | ||
530 | std r15,STK_REG(R15)(r1) | ||
531 | std r16,STK_REG(R16)(r1) | ||
532 | |||
533 | li r12,64 | ||
534 | li r14,80 | ||
535 | li r15,96 | ||
536 | li r16,112 | ||
537 | |||
538 | mtctr r6 | ||
539 | |||
540 | /* | ||
541 | * Now do cacheline sized loads and stores. By this stage the | ||
542 | * cacheline stores are also cacheline aligned. | ||
543 | */ | ||
544 | .align 5 | ||
545 | 8: | ||
546 | lvx vr7,r0,r4 | ||
547 | vperm vr8,vr0,vr7,vr16 | ||
548 | lvx vr6,r4,r9 | ||
549 | vperm vr9,vr7,vr6,vr16 | ||
550 | lvx vr5,r4,r10 | ||
551 | vperm vr10,vr6,vr5,vr16 | ||
552 | lvx vr4,r4,r11 | ||
553 | vperm vr11,vr5,vr4,vr16 | ||
554 | lvx vr3,r4,r12 | ||
555 | vperm vr12,vr4,vr3,vr16 | ||
556 | lvx vr2,r4,r14 | ||
557 | vperm vr13,vr3,vr2,vr16 | ||
558 | lvx vr1,r4,r15 | ||
559 | vperm vr14,vr2,vr1,vr16 | ||
560 | lvx vr0,r4,r16 | ||
561 | vperm vr15,vr1,vr0,vr16 | ||
562 | addi r4,r4,128 | ||
563 | stvx vr8,r0,r3 | ||
564 | stvx vr9,r3,r9 | ||
565 | stvx vr10,r3,r10 | ||
566 | stvx vr11,r3,r11 | ||
567 | stvx vr12,r3,r12 | ||
568 | stvx vr13,r3,r14 | ||
569 | stvx vr14,r3,r15 | ||
570 | stvx vr15,r3,r16 | ||
571 | addi r3,r3,128 | ||
572 | bdnz 8b | ||
573 | |||
574 | ld r14,STK_REG(R14)(r1) | ||
575 | ld r15,STK_REG(R15)(r1) | ||
576 | ld r16,STK_REG(R16)(r1) | ||
577 | |||
578 | /* Up to 127B to go */ | ||
579 | clrldi r5,r5,(64-7) | ||
580 | srdi r6,r5,4 | ||
581 | mtocrf 0x01,r6 | ||
582 | |||
583 | bf cr7*4+1,9f | ||
584 | lvx vr3,r0,r4 | ||
585 | vperm vr8,vr0,vr3,vr16 | ||
586 | lvx vr2,r4,r9 | ||
587 | vperm vr9,vr3,vr2,vr16 | ||
588 | lvx vr1,r4,r10 | ||
589 | vperm vr10,vr2,vr1,vr16 | ||
590 | lvx vr0,r4,r11 | ||
591 | vperm vr11,vr1,vr0,vr16 | ||
592 | addi r4,r4,64 | ||
593 | stvx vr8,r0,r3 | ||
594 | stvx vr9,r3,r9 | ||
595 | stvx vr10,r3,r10 | ||
596 | stvx vr11,r3,r11 | ||
597 | addi r3,r3,64 | ||
598 | |||
599 | 9: bf cr7*4+2,10f | ||
600 | lvx vr1,r0,r4 | ||
601 | vperm vr8,vr0,vr1,vr16 | ||
602 | lvx vr0,r4,r9 | ||
603 | vperm vr9,vr1,vr0,vr16 | ||
604 | addi r4,r4,32 | ||
605 | stvx vr8,r0,r3 | ||
606 | stvx vr9,r3,r9 | ||
607 | addi r3,r3,32 | ||
608 | |||
609 | 10: bf cr7*4+3,11f | ||
610 | lvx vr1,r0,r4 | ||
611 | vperm vr8,vr0,vr1,vr16 | ||
612 | addi r4,r4,16 | ||
613 | stvx vr8,r0,r3 | ||
614 | addi r3,r3,16 | ||
615 | |||
616 | /* Up to 15B to go */ | ||
617 | 11: clrldi r5,r5,(64-4) | ||
618 | addi r4,r4,-16 /* Unwind the +16 load offset */ | ||
619 | mtocrf 0x01,r5 | ||
620 | bf cr7*4+0,12f | ||
621 | lwz r0,0(r4) /* Less chance of a reject with word ops */ | ||
622 | lwz r6,4(r4) | ||
623 | addi r4,r4,8 | ||
624 | stw r0,0(r3) | ||
625 | stw r6,4(r3) | ||
626 | addi r3,r3,8 | ||
627 | |||
628 | 12: bf cr7*4+1,13f | ||
629 | lwz r0,0(r4) | ||
630 | addi r4,r4,4 | ||
631 | stw r0,0(r3) | ||
632 | addi r3,r3,4 | ||
633 | |||
634 | 13: bf cr7*4+2,14f | ||
635 | lhz r0,0(r4) | ||
636 | addi r4,r4,2 | ||
637 | sth r0,0(r3) | ||
638 | addi r3,r3,2 | ||
639 | |||
640 | 14: bf cr7*4+3,15f | ||
641 | lbz r0,0(r4) | ||
642 | stb r0,0(r3) | ||
643 | |||
644 | 15: addi r1,r1,STACKFRAMESIZE | ||
645 | ld r3,48(r1) | ||
646 | b .exit_vmx_copy /* tail call optimise */ | ||
647 | #endif /* CONFiG_ALTIVEC */ | ||
diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S index 093d6316435c..1b5a0a09d609 100644 --- a/arch/powerpc/lib/string.S +++ b/arch/powerpc/lib/string.S | |||
@@ -119,6 +119,7 @@ _GLOBAL(memchr) | |||
119 | 2: li r3,0 | 119 | 2: li r3,0 |
120 | blr | 120 | blr |
121 | 121 | ||
122 | #ifdef CONFIG_PPC32 | ||
122 | _GLOBAL(__clear_user) | 123 | _GLOBAL(__clear_user) |
123 | addi r6,r3,-4 | 124 | addi r6,r3,-4 |
124 | li r3,0 | 125 | li r3,0 |
@@ -160,3 +161,4 @@ _GLOBAL(__clear_user) | |||
160 | PPC_LONG 1b,91b | 161 | PPC_LONG 1b,91b |
161 | PPC_LONG 8b,92b | 162 | PPC_LONG 8b,92b |
162 | .text | 163 | .text |
164 | #endif | ||
diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S new file mode 100644 index 000000000000..3b1e48049faf --- /dev/null +++ b/arch/powerpc/lib/string_64.S | |||
@@ -0,0 +1,202 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License as published by | ||
4 | * the Free Software Foundation; either version 2 of the License, or | ||
5 | * (at your option) any later version. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, | ||
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
10 | * GNU General Public License for more details. | ||
11 | * | ||
12 | * You should have received a copy of the GNU General Public License | ||
13 | * along with this program; if not, write to the Free Software | ||
14 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
15 | * | ||
16 | * Copyright (C) IBM Corporation, 2012 | ||
17 | * | ||
18 | * Author: Anton Blanchard <anton@au.ibm.com> | ||
19 | */ | ||
20 | |||
21 | #include <asm/ppc_asm.h> | ||
22 | #include <asm/asm-offsets.h> | ||
23 | |||
24 | .section ".toc","aw" | ||
25 | PPC64_CACHES: | ||
26 | .tc ppc64_caches[TC],ppc64_caches | ||
27 | .section ".text" | ||
28 | |||
29 | /** | ||
30 | * __clear_user: - Zero a block of memory in user space, with less checking. | ||
31 | * @to: Destination address, in user space. | ||
32 | * @n: Number of bytes to zero. | ||
33 | * | ||
34 | * Zero a block of memory in user space. Caller must check | ||
35 | * the specified block with access_ok() before calling this function. | ||
36 | * | ||
37 | * Returns number of bytes that could not be cleared. | ||
38 | * On success, this will be zero. | ||
39 | */ | ||
40 | |||
41 | .macro err1 | ||
42 | 100: | ||
43 | .section __ex_table,"a" | ||
44 | .align 3 | ||
45 | .llong 100b,.Ldo_err1 | ||
46 | .previous | ||
47 | .endm | ||
48 | |||
49 | .macro err2 | ||
50 | 200: | ||
51 | .section __ex_table,"a" | ||
52 | .align 3 | ||
53 | .llong 200b,.Ldo_err2 | ||
54 | .previous | ||
55 | .endm | ||
56 | |||
57 | .macro err3 | ||
58 | 300: | ||
59 | .section __ex_table,"a" | ||
60 | .align 3 | ||
61 | .llong 300b,.Ldo_err3 | ||
62 | .previous | ||
63 | .endm | ||
64 | |||
65 | .Ldo_err1: | ||
66 | mr r3,r8 | ||
67 | |||
68 | .Ldo_err2: | ||
69 | mtctr r4 | ||
70 | 1: | ||
71 | err3; stb r0,0(r3) | ||
72 | addi r3,r3,1 | ||
73 | addi r4,r4,-1 | ||
74 | bdnz 1b | ||
75 | |||
76 | .Ldo_err3: | ||
77 | mr r3,r4 | ||
78 | blr | ||
79 | |||
80 | _GLOBAL(__clear_user) | ||
81 | cmpdi r4,32 | ||
82 | neg r6,r3 | ||
83 | li r0,0 | ||
84 | blt .Lshort_clear | ||
85 | mr r8,r3 | ||
86 | mtocrf 0x01,r6 | ||
87 | clrldi r6,r6,(64-3) | ||
88 | |||
89 | /* Get the destination 8 byte aligned */ | ||
90 | bf cr7*4+3,1f | ||
91 | err1; stb r0,0(r3) | ||
92 | addi r3,r3,1 | ||
93 | |||
94 | 1: bf cr7*4+2,2f | ||
95 | err1; sth r0,0(r3) | ||
96 | addi r3,r3,2 | ||
97 | |||
98 | 2: bf cr7*4+1,3f | ||
99 | err1; stw r0,0(r3) | ||
100 | addi r3,r3,4 | ||
101 | |||
102 | 3: sub r4,r4,r6 | ||
103 | |||
104 | cmpdi r4,32 | ||
105 | cmpdi cr1,r4,512 | ||
106 | blt .Lshort_clear | ||
107 | bgt cr1,.Llong_clear | ||
108 | |||
109 | .Lmedium_clear: | ||
110 | srdi r6,r4,5 | ||
111 | mtctr r6 | ||
112 | |||
113 | /* Do 32 byte chunks */ | ||
114 | 4: | ||
115 | err2; std r0,0(r3) | ||
116 | err2; std r0,8(r3) | ||
117 | err2; std r0,16(r3) | ||
118 | err2; std r0,24(r3) | ||
119 | addi r3,r3,32 | ||
120 | addi r4,r4,-32 | ||
121 | bdnz 4b | ||
122 | |||
123 | .Lshort_clear: | ||
124 | /* up to 31 bytes to go */ | ||
125 | cmpdi r4,16 | ||
126 | blt 6f | ||
127 | err2; std r0,0(r3) | ||
128 | err2; std r0,8(r3) | ||
129 | addi r3,r3,16 | ||
130 | addi r4,r4,-16 | ||
131 | |||
132 | /* Up to 15 bytes to go */ | ||
133 | 6: mr r8,r3 | ||
134 | clrldi r4,r4,(64-4) | ||
135 | mtocrf 0x01,r4 | ||
136 | bf cr7*4+0,7f | ||
137 | err1; std r0,0(r3) | ||
138 | addi r3,r3,8 | ||
139 | |||
140 | 7: bf cr7*4+1,8f | ||
141 | err1; stw r0,0(r3) | ||
142 | addi r3,r3,4 | ||
143 | |||
144 | 8: bf cr7*4+2,9f | ||
145 | err1; sth r0,0(r3) | ||
146 | addi r3,r3,2 | ||
147 | |||
148 | 9: bf cr7*4+3,10f | ||
149 | err1; stb r0,0(r3) | ||
150 | |||
151 | 10: li r3,0 | ||
152 | blr | ||
153 | |||
154 | .Llong_clear: | ||
155 | ld r5,PPC64_CACHES@toc(r2) | ||
156 | |||
157 | bf cr7*4+0,11f | ||
158 | err2; std r0,0(r3) | ||
159 | addi r3,r3,8 | ||
160 | addi r4,r4,-8 | ||
161 | |||
162 | /* Destination is 16 byte aligned, need to get it cacheline aligned */ | ||
163 | 11: lwz r7,DCACHEL1LOGLINESIZE(r5) | ||
164 | lwz r9,DCACHEL1LINESIZE(r5) | ||
165 | |||
166 | /* | ||
167 | * With worst case alignment the long clear loop takes a minimum | ||
168 | * of 1 byte less than 2 cachelines. | ||
169 | */ | ||
170 | sldi r10,r9,2 | ||
171 | cmpd r4,r10 | ||
172 | blt .Lmedium_clear | ||
173 | |||
174 | neg r6,r3 | ||
175 | addi r10,r9,-1 | ||
176 | and. r5,r6,r10 | ||
177 | beq 13f | ||
178 | |||
179 | srdi r6,r5,4 | ||
180 | mtctr r6 | ||
181 | mr r8,r3 | ||
182 | 12: | ||
183 | err1; std r0,0(r3) | ||
184 | err1; std r0,8(r3) | ||
185 | addi r3,r3,16 | ||
186 | bdnz 12b | ||
187 | |||
188 | sub r4,r4,r5 | ||
189 | |||
190 | 13: srd r6,r4,r7 | ||
191 | mtctr r6 | ||
192 | mr r8,r3 | ||
193 | 14: | ||
194 | err1; dcbz r0,r3 | ||
195 | add r3,r3,r9 | ||
196 | bdnz 14b | ||
197 | |||
198 | and r4,r4,r10 | ||
199 | |||
200 | cmpdi r4,32 | ||
201 | blt .Lshort_clear | ||
202 | b .Lmedium_clear | ||
diff --git a/arch/powerpc/lib/copyuser_power7_vmx.c b/arch/powerpc/lib/vmx-helper.c index bf2654f2b68e..3cf529ceec5b 100644 --- a/arch/powerpc/lib/copyuser_power7_vmx.c +++ b/arch/powerpc/lib/vmx-helper.c | |||
@@ -22,7 +22,7 @@ | |||
22 | #include <linux/hardirq.h> | 22 | #include <linux/hardirq.h> |
23 | #include <asm/switch_to.h> | 23 | #include <asm/switch_to.h> |
24 | 24 | ||
25 | int enter_vmx_copy(void) | 25 | int enter_vmx_usercopy(void) |
26 | { | 26 | { |
27 | if (in_interrupt()) | 27 | if (in_interrupt()) |
28 | return 0; | 28 | return 0; |
@@ -44,8 +44,31 @@ int enter_vmx_copy(void) | |||
44 | * This function must return 0 because we tail call optimise when calling | 44 | * This function must return 0 because we tail call optimise when calling |
45 | * from __copy_tofrom_user_power7 which returns 0 on success. | 45 | * from __copy_tofrom_user_power7 which returns 0 on success. |
46 | */ | 46 | */ |
47 | int exit_vmx_copy(void) | 47 | int exit_vmx_usercopy(void) |
48 | { | 48 | { |
49 | pagefault_enable(); | 49 | pagefault_enable(); |
50 | return 0; | 50 | return 0; |
51 | } | 51 | } |
52 | |||
53 | int enter_vmx_copy(void) | ||
54 | { | ||
55 | if (in_interrupt()) | ||
56 | return 0; | ||
57 | |||
58 | preempt_disable(); | ||
59 | |||
60 | enable_kernel_altivec(); | ||
61 | |||
62 | return 1; | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * All calls to this function will be optimised into tail calls. We are | ||
67 | * passed a pointer to the destination which we return as required by a | ||
68 | * memcpy implementation. | ||
69 | */ | ||
70 | void *exit_vmx_copy(void *dest) | ||
71 | { | ||
72 | preempt_enable(); | ||
73 | return dest; | ||
74 | } | ||