diff options
Diffstat (limited to 'include/asm-x86/string_32.h')
-rw-r--r-- | include/asm-x86/string_32.h | 323 |
1 files changed, 188 insertions, 135 deletions
diff --git a/include/asm-x86/string_32.h b/include/asm-x86/string_32.h index b49369ad9a61..193578cd1fd9 100644 --- a/include/asm-x86/string_32.h +++ b/include/asm-x86/string_32.h | |||
@@ -29,81 +29,116 @@ extern char *strchr(const char *s, int c); | |||
29 | #define __HAVE_ARCH_STRLEN | 29 | #define __HAVE_ARCH_STRLEN |
30 | extern size_t strlen(const char *s); | 30 | extern size_t strlen(const char *s); |
31 | 31 | ||
32 | static __always_inline void * __memcpy(void * to, const void * from, size_t n) | 32 | static __always_inline void *__memcpy(void *to, const void *from, size_t n) |
33 | { | 33 | { |
34 | int d0, d1, d2; | 34 | int d0, d1, d2; |
35 | __asm__ __volatile__( | 35 | asm volatile("rep ; movsl\n\t" |
36 | "rep ; movsl\n\t" | 36 | "movl %4,%%ecx\n\t" |
37 | "movl %4,%%ecx\n\t" | 37 | "andl $3,%%ecx\n\t" |
38 | "andl $3,%%ecx\n\t" | 38 | "jz 1f\n\t" |
39 | "jz 1f\n\t" | 39 | "rep ; movsb\n\t" |
40 | "rep ; movsb\n\t" | 40 | "1:" |
41 | "1:" | 41 | : "=&c" (d0), "=&D" (d1), "=&S" (d2) |
42 | : "=&c" (d0), "=&D" (d1), "=&S" (d2) | 42 | : "0" (n / 4), "g" (n), "1" ((long)to), "2" ((long)from) |
43 | : "0" (n/4), "g" (n), "1" ((long) to), "2" ((long) from) | 43 | : "memory"); |
44 | : "memory"); | 44 | return to; |
45 | return (to); | ||
46 | } | 45 | } |
47 | 46 | ||
48 | /* | 47 | /* |
49 | * This looks ugly, but the compiler can optimize it totally, | 48 | * This looks ugly, but the compiler can optimize it totally, |
50 | * as the count is constant. | 49 | * as the count is constant. |
51 | */ | 50 | */ |
52 | static __always_inline void * __constant_memcpy(void * to, const void * from, size_t n) | 51 | static __always_inline void *__constant_memcpy(void *to, const void *from, |
52 | size_t n) | ||
53 | { | 53 | { |
54 | long esi, edi; | 54 | long esi, edi; |
55 | if (!n) return to; | 55 | if (!n) |
56 | #if 1 /* want to do small copies with non-string ops? */ | 56 | return to; |
57 | |||
57 | switch (n) { | 58 | switch (n) { |
58 | case 1: *(char*)to = *(char*)from; return to; | 59 | case 1: |
59 | case 2: *(short*)to = *(short*)from; return to; | 60 | *(char *)to = *(char *)from; |
60 | case 4: *(int*)to = *(int*)from; return to; | 61 | return to; |
61 | #if 1 /* including those doable with two moves? */ | 62 | case 2: |
62 | case 3: *(short*)to = *(short*)from; | 63 | *(short *)to = *(short *)from; |
63 | *((char*)to+2) = *((char*)from+2); return to; | 64 | return to; |
64 | case 5: *(int*)to = *(int*)from; | 65 | case 4: |
65 | *((char*)to+4) = *((char*)from+4); return to; | 66 | *(int *)to = *(int *)from; |
66 | case 6: *(int*)to = *(int*)from; | 67 | return to; |
67 | *((short*)to+2) = *((short*)from+2); return to; | 68 | |
68 | case 8: *(int*)to = *(int*)from; | 69 | case 3: |
69 | *((int*)to+1) = *((int*)from+1); return to; | 70 | *(short *)to = *(short *)from; |
70 | #endif | 71 | *((char *)to + 2) = *((char *)from + 2); |
72 | return to; | ||
73 | case 5: | ||
74 | *(int *)to = *(int *)from; | ||
75 | *((char *)to + 4) = *((char *)from + 4); | ||
76 | return to; | ||
77 | case 6: | ||
78 | *(int *)to = *(int *)from; | ||
79 | *((short *)to + 2) = *((short *)from + 2); | ||
80 | return to; | ||
81 | case 8: | ||
82 | *(int *)to = *(int *)from; | ||
83 | *((int *)to + 1) = *((int *)from + 1); | ||
84 | return to; | ||
71 | } | 85 | } |
72 | #endif | 86 | |
73 | esi = (long) from; | 87 | esi = (long)from; |
74 | edi = (long) to; | 88 | edi = (long)to; |
75 | if (n >= 5*4) { | 89 | if (n >= 5 * 4) { |
76 | /* large block: use rep prefix */ | 90 | /* large block: use rep prefix */ |
77 | int ecx; | 91 | int ecx; |
78 | __asm__ __volatile__( | 92 | asm volatile("rep ; movsl" |
79 | "rep ; movsl" | 93 | : "=&c" (ecx), "=&D" (edi), "=&S" (esi) |
80 | : "=&c" (ecx), "=&D" (edi), "=&S" (esi) | 94 | : "0" (n / 4), "1" (edi), "2" (esi) |
81 | : "0" (n/4), "1" (edi),"2" (esi) | 95 | : "memory" |
82 | : "memory" | ||
83 | ); | 96 | ); |
84 | } else { | 97 | } else { |
85 | /* small block: don't clobber ecx + smaller code */ | 98 | /* small block: don't clobber ecx + smaller code */ |
86 | if (n >= 4*4) __asm__ __volatile__("movsl" | 99 | if (n >= 4 * 4) |
87 | :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); | 100 | asm volatile("movsl" |
88 | if (n >= 3*4) __asm__ __volatile__("movsl" | 101 | : "=&D"(edi), "=&S"(esi) |
89 | :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); | 102 | : "0"(edi), "1"(esi) |
90 | if (n >= 2*4) __asm__ __volatile__("movsl" | 103 | : "memory"); |
91 | :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); | 104 | if (n >= 3 * 4) |
92 | if (n >= 1*4) __asm__ __volatile__("movsl" | 105 | asm volatile("movsl" |
93 | :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); | 106 | : "=&D"(edi), "=&S"(esi) |
107 | : "0"(edi), "1"(esi) | ||
108 | : "memory"); | ||
109 | if (n >= 2 * 4) | ||
110 | asm volatile("movsl" | ||
111 | : "=&D"(edi), "=&S"(esi) | ||
112 | : "0"(edi), "1"(esi) | ||
113 | : "memory"); | ||
114 | if (n >= 1 * 4) | ||
115 | asm volatile("movsl" | ||
116 | : "=&D"(edi), "=&S"(esi) | ||
117 | : "0"(edi), "1"(esi) | ||
118 | : "memory"); | ||
94 | } | 119 | } |
95 | switch (n % 4) { | 120 | switch (n % 4) { |
96 | /* tail */ | 121 | /* tail */ |
97 | case 0: return to; | 122 | case 0: |
98 | case 1: __asm__ __volatile__("movsb" | 123 | return to; |
99 | :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); | 124 | case 1: |
100 | return to; | 125 | asm volatile("movsb" |
101 | case 2: __asm__ __volatile__("movsw" | 126 | : "=&D"(edi), "=&S"(esi) |
102 | :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); | 127 | : "0"(edi), "1"(esi) |
103 | return to; | 128 | : "memory"); |
104 | default: __asm__ __volatile__("movsw\n\tmovsb" | 129 | return to; |
105 | :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory"); | 130 | case 2: |
106 | return to; | 131 | asm volatile("movsw" |
132 | : "=&D"(edi), "=&S"(esi) | ||
133 | : "0"(edi), "1"(esi) | ||
134 | : "memory"); | ||
135 | return to; | ||
136 | default: | ||
137 | asm volatile("movsw\n\tmovsb" | ||
138 | : "=&D"(edi), "=&S"(esi) | ||
139 | : "0"(edi), "1"(esi) | ||
140 | : "memory"); | ||
141 | return to; | ||
107 | } | 142 | } |
108 | } | 143 | } |
109 | 144 | ||
@@ -117,87 +152,86 @@ static __always_inline void * __constant_memcpy(void * to, const void * from, si | |||
117 | * This CPU favours 3DNow strongly (eg AMD Athlon) | 152 | * This CPU favours 3DNow strongly (eg AMD Athlon) |
118 | */ | 153 | */ |
119 | 154 | ||
120 | static inline void * __constant_memcpy3d(void * to, const void * from, size_t len) | 155 | static inline void *__constant_memcpy3d(void *to, const void *from, size_t len) |
121 | { | 156 | { |
122 | if (len < 512) | 157 | if (len < 512) |
123 | return __constant_memcpy(to, from, len); | 158 | return __constant_memcpy(to, from, len); |
124 | return _mmx_memcpy(to, from, len); | 159 | return _mmx_memcpy(to, from, len); |
125 | } | 160 | } |
126 | 161 | ||
127 | static __inline__ void *__memcpy3d(void *to, const void *from, size_t len) | 162 | static inline void *__memcpy3d(void *to, const void *from, size_t len) |
128 | { | 163 | { |
129 | if (len < 512) | 164 | if (len < 512) |
130 | return __memcpy(to, from, len); | 165 | return __memcpy(to, from, len); |
131 | return _mmx_memcpy(to, from, len); | 166 | return _mmx_memcpy(to, from, len); |
132 | } | 167 | } |
133 | 168 | ||
134 | #define memcpy(t, f, n) \ | 169 | #define memcpy(t, f, n) \ |
135 | (__builtin_constant_p(n) ? \ | 170 | (__builtin_constant_p((n)) \ |
136 | __constant_memcpy3d((t),(f),(n)) : \ | 171 | ? __constant_memcpy3d((t), (f), (n)) \ |
137 | __memcpy3d((t),(f),(n))) | 172 | : __memcpy3d((t), (f), (n))) |
138 | 173 | ||
139 | #else | 174 | #else |
140 | 175 | ||
141 | /* | 176 | /* |
142 | * No 3D Now! | 177 | * No 3D Now! |
143 | */ | 178 | */ |
144 | 179 | ||
145 | #define memcpy(t, f, n) \ | 180 | #define memcpy(t, f, n) \ |
146 | (__builtin_constant_p(n) ? \ | 181 | (__builtin_constant_p((n)) \ |
147 | __constant_memcpy((t),(f),(n)) : \ | 182 | ? __constant_memcpy((t), (f), (n)) \ |
148 | __memcpy((t),(f),(n))) | 183 | : __memcpy((t), (f), (n))) |
149 | 184 | ||
150 | #endif | 185 | #endif |
151 | 186 | ||
152 | #define __HAVE_ARCH_MEMMOVE | 187 | #define __HAVE_ARCH_MEMMOVE |
153 | void *memmove(void * dest,const void * src, size_t n); | 188 | void *memmove(void *dest, const void *src, size_t n); |
154 | 189 | ||
155 | #define memcmp __builtin_memcmp | 190 | #define memcmp __builtin_memcmp |
156 | 191 | ||
157 | #define __HAVE_ARCH_MEMCHR | 192 | #define __HAVE_ARCH_MEMCHR |
158 | extern void *memchr(const void * cs,int c,size_t count); | 193 | extern void *memchr(const void *cs, int c, size_t count); |
159 | 194 | ||
160 | static inline void * __memset_generic(void * s, char c,size_t count) | 195 | static inline void *__memset_generic(void *s, char c, size_t count) |
161 | { | 196 | { |
162 | int d0, d1; | 197 | int d0, d1; |
163 | __asm__ __volatile__( | 198 | asm volatile("rep\n\t" |
164 | "rep\n\t" | 199 | "stosb" |
165 | "stosb" | 200 | : "=&c" (d0), "=&D" (d1) |
166 | : "=&c" (d0), "=&D" (d1) | 201 | : "a" (c), "1" (s), "0" (count) |
167 | :"a" (c),"1" (s),"0" (count) | 202 | : "memory"); |
168 | :"memory"); | 203 | return s; |
169 | return s; | ||
170 | } | 204 | } |
171 | 205 | ||
172 | /* we might want to write optimized versions of these later */ | 206 | /* we might want to write optimized versions of these later */ |
173 | #define __constant_count_memset(s,c,count) __memset_generic((s),(c),(count)) | 207 | #define __constant_count_memset(s, c, count) __memset_generic((s), (c), (count)) |
174 | 208 | ||
175 | /* | 209 | /* |
176 | * memset(x,0,y) is a reasonably common thing to do, so we want to fill | 210 | * memset(x, 0, y) is a reasonably common thing to do, so we want to fill |
177 | * things 32 bits at a time even when we don't know the size of the | 211 | * things 32 bits at a time even when we don't know the size of the |
178 | * area at compile-time.. | 212 | * area at compile-time.. |
179 | */ | 213 | */ |
180 | static __always_inline void * __constant_c_memset(void * s, unsigned long c, size_t count) | 214 | static __always_inline |
215 | void *__constant_c_memset(void *s, unsigned long c, size_t count) | ||
181 | { | 216 | { |
182 | int d0, d1; | 217 | int d0, d1; |
183 | __asm__ __volatile__( | 218 | asm volatile("rep ; stosl\n\t" |
184 | "rep ; stosl\n\t" | 219 | "testb $2,%b3\n\t" |
185 | "testb $2,%b3\n\t" | 220 | "je 1f\n\t" |
186 | "je 1f\n\t" | 221 | "stosw\n" |
187 | "stosw\n" | 222 | "1:\ttestb $1,%b3\n\t" |
188 | "1:\ttestb $1,%b3\n\t" | 223 | "je 2f\n\t" |
189 | "je 2f\n\t" | 224 | "stosb\n" |
190 | "stosb\n" | 225 | "2:" |
191 | "2:" | 226 | : "=&c" (d0), "=&D" (d1) |
192 | :"=&c" (d0), "=&D" (d1) | 227 | : "a" (c), "q" (count), "0" (count/4), "1" ((long)s) |
193 | :"a" (c), "q" (count), "0" (count/4), "1" ((long) s) | 228 | : "memory"); |
194 | :"memory"); | 229 | return s; |
195 | return (s); | ||
196 | } | 230 | } |
197 | 231 | ||
198 | /* Added by Gertjan van Wingerde to make minix and sysv module work */ | 232 | /* Added by Gertjan van Wingerde to make minix and sysv module work */ |
199 | #define __HAVE_ARCH_STRNLEN | 233 | #define __HAVE_ARCH_STRNLEN |
200 | extern size_t strnlen(const char * s, size_t count); | 234 | extern size_t strnlen(const char *s, size_t count); |
201 | /* end of additional stuff */ | 235 | /* end of additional stuff */ |
202 | 236 | ||
203 | #define __HAVE_ARCH_STRSTR | 237 | #define __HAVE_ARCH_STRSTR |
@@ -207,66 +241,85 @@ extern char *strstr(const char *cs, const char *ct); | |||
207 | * This looks horribly ugly, but the compiler can optimize it totally, | 241 | * This looks horribly ugly, but the compiler can optimize it totally, |
208 | * as we by now know that both pattern and count is constant.. | 242 | * as we by now know that both pattern and count is constant.. |
209 | */ | 243 | */ |
210 | static __always_inline void * __constant_c_and_count_memset(void * s, unsigned long pattern, size_t count) | 244 | static __always_inline |
245 | void *__constant_c_and_count_memset(void *s, unsigned long pattern, | ||
246 | size_t count) | ||
211 | { | 247 | { |
212 | switch (count) { | 248 | switch (count) { |
249 | case 0: | ||
250 | return s; | ||
251 | case 1: | ||
252 | *(unsigned char *)s = pattern & 0xff; | ||
253 | return s; | ||
254 | case 2: | ||
255 | *(unsigned short *)s = pattern & 0xffff; | ||
256 | return s; | ||
257 | case 3: | ||
258 | *(unsigned short *)s = pattern & 0xffff; | ||
259 | *((unsigned char *)s + 2) = pattern & 0xff; | ||
260 | return s; | ||
261 | case 4: | ||
262 | *(unsigned long *)s = pattern; | ||
263 | return s; | ||
264 | } | ||
265 | |||
266 | #define COMMON(x) \ | ||
267 | asm volatile("rep ; stosl" \ | ||
268 | x \ | ||
269 | : "=&c" (d0), "=&D" (d1) \ | ||
270 | : "a" (eax), "0" (count/4), "1" ((long)s) \ | ||
271 | : "memory") | ||
272 | |||
273 | { | ||
274 | int d0, d1; | ||
275 | #if __GNUC__ == 4 && __GNUC_MINOR__ == 0 | ||
276 | /* Workaround for broken gcc 4.0 */ | ||
277 | register unsigned long eax asm("%eax") = pattern; | ||
278 | #else | ||
279 | unsigned long eax = pattern; | ||
280 | #endif | ||
281 | |||
282 | switch (count % 4) { | ||
213 | case 0: | 283 | case 0: |
284 | COMMON(""); | ||
214 | return s; | 285 | return s; |
215 | case 1: | 286 | case 1: |
216 | *(unsigned char *)s = pattern & 0xff; | 287 | COMMON("\n\tstosb"); |
217 | return s; | 288 | return s; |
218 | case 2: | 289 | case 2: |
219 | *(unsigned short *)s = pattern & 0xffff; | 290 | COMMON("\n\tstosw"); |
220 | return s; | 291 | return s; |
221 | case 3: | 292 | default: |
222 | *(unsigned short *)s = pattern & 0xffff; | 293 | COMMON("\n\tstosw\n\tstosb"); |
223 | *(2+(unsigned char *)s) = pattern & 0xff; | ||
224 | return s; | ||
225 | case 4: | ||
226 | *(unsigned long *)s = pattern; | ||
227 | return s; | 294 | return s; |
295 | } | ||
228 | } | 296 | } |
229 | #define COMMON(x) \ | 297 | |
230 | __asm__ __volatile__( \ | ||
231 | "rep ; stosl" \ | ||
232 | x \ | ||
233 | : "=&c" (d0), "=&D" (d1) \ | ||
234 | : "a" (pattern),"0" (count/4),"1" ((long) s) \ | ||
235 | : "memory") | ||
236 | { | ||
237 | int d0, d1; | ||
238 | switch (count % 4) { | ||
239 | case 0: COMMON(""); return s; | ||
240 | case 1: COMMON("\n\tstosb"); return s; | ||
241 | case 2: COMMON("\n\tstosw"); return s; | ||
242 | default: COMMON("\n\tstosw\n\tstosb"); return s; | ||
243 | } | ||
244 | } | ||
245 | |||
246 | #undef COMMON | 298 | #undef COMMON |
247 | } | 299 | } |
248 | 300 | ||
249 | #define __constant_c_x_memset(s, c, count) \ | 301 | #define __constant_c_x_memset(s, c, count) \ |
250 | (__builtin_constant_p(count) ? \ | 302 | (__builtin_constant_p(count) \ |
251 | __constant_c_and_count_memset((s),(c),(count)) : \ | 303 | ? __constant_c_and_count_memset((s), (c), (count)) \ |
252 | __constant_c_memset((s),(c),(count))) | 304 | : __constant_c_memset((s), (c), (count))) |
253 | 305 | ||
254 | #define __memset(s, c, count) \ | 306 | #define __memset(s, c, count) \ |
255 | (__builtin_constant_p(count) ? \ | 307 | (__builtin_constant_p(count) \ |
256 | __constant_count_memset((s),(c),(count)) : \ | 308 | ? __constant_count_memset((s), (c), (count)) \ |
257 | __memset_generic((s),(c),(count))) | 309 | : __memset_generic((s), (c), (count))) |
258 | 310 | ||
259 | #define __HAVE_ARCH_MEMSET | 311 | #define __HAVE_ARCH_MEMSET |
260 | #define memset(s, c, count) \ | 312 | #define memset(s, c, count) \ |
261 | (__builtin_constant_p(c) ? \ | 313 | (__builtin_constant_p(c) \ |
262 | __constant_c_x_memset((s),(0x01010101UL*(unsigned char)(c)),(count)) : \ | 314 | ? __constant_c_x_memset((s), (0x01010101UL * (unsigned char)(c)), \ |
263 | __memset((s),(c),(count))) | 315 | (count)) \ |
316 | : __memset((s), (c), (count))) | ||
264 | 317 | ||
265 | /* | 318 | /* |
266 | * find the first occurrence of byte 'c', or 1 past the area if none | 319 | * find the first occurrence of byte 'c', or 1 past the area if none |
267 | */ | 320 | */ |
268 | #define __HAVE_ARCH_MEMSCAN | 321 | #define __HAVE_ARCH_MEMSCAN |
269 | extern void *memscan(void * addr, int c, size_t size); | 322 | extern void *memscan(void *addr, int c, size_t size); |
270 | 323 | ||
271 | #endif /* __KERNEL__ */ | 324 | #endif /* __KERNEL__ */ |
272 | 325 | ||