diff options
Diffstat (limited to 'arch/powerpc/lib/string_64.S')
-rw-r--r-- | arch/powerpc/lib/string_64.S | 202 |
1 files changed, 202 insertions, 0 deletions
diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S new file mode 100644 index 000000000000..3b1e48049faf --- /dev/null +++ b/arch/powerpc/lib/string_64.S | |||
@@ -0,0 +1,202 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License as published by | ||
4 | * the Free Software Foundation; either version 2 of the License, or | ||
5 | * (at your option) any later version. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, | ||
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
10 | * GNU General Public License for more details. | ||
11 | * | ||
12 | * You should have received a copy of the GNU General Public License | ||
13 | * along with this program; if not, write to the Free Software | ||
14 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
15 | * | ||
16 | * Copyright (C) IBM Corporation, 2012 | ||
17 | * | ||
18 | * Author: Anton Blanchard <anton@au.ibm.com> | ||
19 | */ | ||
20 | |||
21 | #include <asm/ppc_asm.h> | ||
22 | #include <asm/asm-offsets.h> | ||
23 | |||
24 | .section ".toc","aw" | ||
25 | PPC64_CACHES: | ||
26 | .tc ppc64_caches[TC],ppc64_caches | ||
27 | .section ".text" | ||
28 | |||
29 | /** | ||
30 | * __clear_user: - Zero a block of memory in user space, with less checking. | ||
31 | * @to: Destination address, in user space. | ||
32 | * @n: Number of bytes to zero. | ||
33 | * | ||
34 | * Zero a block of memory in user space. Caller must check | ||
35 | * the specified block with access_ok() before calling this function. | ||
36 | * | ||
37 | * Returns number of bytes that could not be cleared. | ||
38 | * On success, this will be zero. | ||
39 | */ | ||
40 | |||
41 | .macro err1 | ||
42 | 100: | ||
43 | .section __ex_table,"a" | ||
44 | .align 3 | ||
45 | .llong 100b,.Ldo_err1 | ||
46 | .previous | ||
47 | .endm | ||
48 | |||
49 | .macro err2 | ||
50 | 200: | ||
51 | .section __ex_table,"a" | ||
52 | .align 3 | ||
53 | .llong 200b,.Ldo_err2 | ||
54 | .previous | ||
55 | .endm | ||
56 | |||
57 | .macro err3 | ||
58 | 300: | ||
59 | .section __ex_table,"a" | ||
60 | .align 3 | ||
61 | .llong 300b,.Ldo_err3 | ||
62 | .previous | ||
63 | .endm | ||
64 | |||
65 | .Ldo_err1: | ||
66 | mr r3,r8 | ||
67 | |||
68 | .Ldo_err2: | ||
69 | mtctr r4 | ||
70 | 1: | ||
71 | err3; stb r0,0(r3) | ||
72 | addi r3,r3,1 | ||
73 | addi r4,r4,-1 | ||
74 | bdnz 1b | ||
75 | |||
76 | .Ldo_err3: | ||
77 | mr r3,r4 | ||
78 | blr | ||
79 | |||
80 | _GLOBAL(__clear_user) | ||
81 | cmpdi r4,32 | ||
82 | neg r6,r3 | ||
83 | li r0,0 | ||
84 | blt .Lshort_clear | ||
85 | mr r8,r3 | ||
86 | mtocrf 0x01,r6 | ||
87 | clrldi r6,r6,(64-3) | ||
88 | |||
89 | /* Get the destination 8 byte aligned */ | ||
90 | bf cr7*4+3,1f | ||
91 | err1; stb r0,0(r3) | ||
92 | addi r3,r3,1 | ||
93 | |||
94 | 1: bf cr7*4+2,2f | ||
95 | err1; sth r0,0(r3) | ||
96 | addi r3,r3,2 | ||
97 | |||
98 | 2: bf cr7*4+1,3f | ||
99 | err1; stw r0,0(r3) | ||
100 | addi r3,r3,4 | ||
101 | |||
102 | 3: sub r4,r4,r6 | ||
103 | |||
104 | cmpdi r4,32 | ||
105 | cmpdi cr1,r4,512 | ||
106 | blt .Lshort_clear | ||
107 | bgt cr1,.Llong_clear | ||
108 | |||
109 | .Lmedium_clear: | ||
110 | srdi r6,r4,5 | ||
111 | mtctr r6 | ||
112 | |||
113 | /* Do 32 byte chunks */ | ||
114 | 4: | ||
115 | err2; std r0,0(r3) | ||
116 | err2; std r0,8(r3) | ||
117 | err2; std r0,16(r3) | ||
118 | err2; std r0,24(r3) | ||
119 | addi r3,r3,32 | ||
120 | addi r4,r4,-32 | ||
121 | bdnz 4b | ||
122 | |||
123 | .Lshort_clear: | ||
124 | /* up to 31 bytes to go */ | ||
125 | cmpdi r4,16 | ||
126 | blt 6f | ||
127 | err2; std r0,0(r3) | ||
128 | err2; std r0,8(r3) | ||
129 | addi r3,r3,16 | ||
130 | addi r4,r4,-16 | ||
131 | |||
132 | /* Up to 15 bytes to go */ | ||
133 | 6: mr r8,r3 | ||
134 | clrldi r4,r4,(64-4) | ||
135 | mtocrf 0x01,r4 | ||
136 | bf cr7*4+0,7f | ||
137 | err1; std r0,0(r3) | ||
138 | addi r3,r3,8 | ||
139 | |||
140 | 7: bf cr7*4+1,8f | ||
141 | err1; stw r0,0(r3) | ||
142 | addi r3,r3,4 | ||
143 | |||
144 | 8: bf cr7*4+2,9f | ||
145 | err1; sth r0,0(r3) | ||
146 | addi r3,r3,2 | ||
147 | |||
148 | 9: bf cr7*4+3,10f | ||
149 | err1; stb r0,0(r3) | ||
150 | |||
151 | 10: li r3,0 | ||
152 | blr | ||
153 | |||
154 | .Llong_clear: | ||
155 | ld r5,PPC64_CACHES@toc(r2) | ||
156 | |||
157 | bf cr7*4+0,11f | ||
158 | err2; std r0,0(r3) | ||
159 | addi r3,r3,8 | ||
160 | addi r4,r4,-8 | ||
161 | |||
162 | /* Destination is 16 byte aligned, need to get it cacheline aligned */ | ||
163 | 11: lwz r7,DCACHEL1LOGLINESIZE(r5) | ||
164 | lwz r9,DCACHEL1LINESIZE(r5) | ||
165 | |||
166 | /* | ||
167 | * With worst case alignment the long clear loop takes a minimum | ||
168 | * of 1 byte less than 2 cachelines. | ||
169 | */ | ||
170 | sldi r10,r9,2 | ||
171 | cmpd r4,r10 | ||
172 | blt .Lmedium_clear | ||
173 | |||
174 | neg r6,r3 | ||
175 | addi r10,r9,-1 | ||
176 | and. r5,r6,r10 | ||
177 | beq 13f | ||
178 | |||
179 | srdi r6,r5,4 | ||
180 | mtctr r6 | ||
181 | mr r8,r3 | ||
182 | 12: | ||
183 | err1; std r0,0(r3) | ||
184 | err1; std r0,8(r3) | ||
185 | addi r3,r3,16 | ||
186 | bdnz 12b | ||
187 | |||
188 | sub r4,r4,r5 | ||
189 | |||
190 | 13: srd r6,r4,r7 | ||
191 | mtctr r6 | ||
192 | mr r8,r3 | ||
193 | 14: | ||
194 | err1; dcbz r0,r3 | ||
195 | add r3,r3,r9 | ||
196 | bdnz 14b | ||
197 | |||
198 | and r4,r4,r10 | ||
199 | |||
200 | cmpdi r4,32 | ||
201 | blt .Lshort_clear | ||
202 | b .Lmedium_clear | ||