diff options
author | Al Viro <viro@zeniv.linux.org.uk> | 2008-08-17 21:05:42 -0400 |
---|---|---|
committer | H. Peter Anvin <hpa@zytor.com> | 2008-10-23 01:55:20 -0400 |
commit | bb8985586b7a906e116db835c64773b7a7d51663 (patch) | |
tree | de93ae58e88cc563d95cc124a73f3930594c6100 /arch/x86/include/asm/string_32.h | |
parent | 8ede0bdb63305d3353efd97e9af6210afb05734e (diff) |
x86, um: ... and asm-x86 move
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'arch/x86/include/asm/string_32.h')
-rw-r--r-- | arch/x86/include/asm/string_32.h | 326 |
1 files changed, 326 insertions, 0 deletions
diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h new file mode 100644 index 000000000000..487843ed245a --- /dev/null +++ b/arch/x86/include/asm/string_32.h | |||
@@ -0,0 +1,326 @@ | |||
1 | #ifndef ASM_X86__STRING_32_H | ||
2 | #define ASM_X86__STRING_32_H | ||
3 | |||
4 | #ifdef __KERNEL__ | ||
5 | |||
6 | /* Let gcc decide whether to inline or use the out of line functions */ | ||
7 | |||
8 | #define __HAVE_ARCH_STRCPY | ||
9 | extern char *strcpy(char *dest, const char *src); | ||
10 | |||
11 | #define __HAVE_ARCH_STRNCPY | ||
12 | extern char *strncpy(char *dest, const char *src, size_t count); | ||
13 | |||
14 | #define __HAVE_ARCH_STRCAT | ||
15 | extern char *strcat(char *dest, const char *src); | ||
16 | |||
17 | #define __HAVE_ARCH_STRNCAT | ||
18 | extern char *strncat(char *dest, const char *src, size_t count); | ||
19 | |||
20 | #define __HAVE_ARCH_STRCMP | ||
21 | extern int strcmp(const char *cs, const char *ct); | ||
22 | |||
23 | #define __HAVE_ARCH_STRNCMP | ||
24 | extern int strncmp(const char *cs, const char *ct, size_t count); | ||
25 | |||
26 | #define __HAVE_ARCH_STRCHR | ||
27 | extern char *strchr(const char *s, int c); | ||
28 | |||
29 | #define __HAVE_ARCH_STRLEN | ||
30 | extern size_t strlen(const char *s); | ||
31 | |||
32 | static __always_inline void *__memcpy(void *to, const void *from, size_t n) | ||
33 | { | ||
34 | int d0, d1, d2; | ||
35 | asm volatile("rep ; movsl\n\t" | ||
36 | "movl %4,%%ecx\n\t" | ||
37 | "andl $3,%%ecx\n\t" | ||
38 | "jz 1f\n\t" | ||
39 | "rep ; movsb\n\t" | ||
40 | "1:" | ||
41 | : "=&c" (d0), "=&D" (d1), "=&S" (d2) | ||
42 | : "0" (n / 4), "g" (n), "1" ((long)to), "2" ((long)from) | ||
43 | : "memory"); | ||
44 | return to; | ||
45 | } | ||
46 | |||
47 | /* | ||
48 | * This looks ugly, but the compiler can optimize it totally, | ||
49 | * as the count is constant. | ||
50 | */ | ||
51 | static __always_inline void *__constant_memcpy(void *to, const void *from, | ||
52 | size_t n) | ||
53 | { | ||
54 | long esi, edi; | ||
55 | if (!n) | ||
56 | return to; | ||
57 | |||
58 | switch (n) { | ||
59 | case 1: | ||
60 | *(char *)to = *(char *)from; | ||
61 | return to; | ||
62 | case 2: | ||
63 | *(short *)to = *(short *)from; | ||
64 | return to; | ||
65 | case 4: | ||
66 | *(int *)to = *(int *)from; | ||
67 | return to; | ||
68 | |||
69 | case 3: | ||
70 | *(short *)to = *(short *)from; | ||
71 | *((char *)to + 2) = *((char *)from + 2); | ||
72 | return to; | ||
73 | case 5: | ||
74 | *(int *)to = *(int *)from; | ||
75 | *((char *)to + 4) = *((char *)from + 4); | ||
76 | return to; | ||
77 | case 6: | ||
78 | *(int *)to = *(int *)from; | ||
79 | *((short *)to + 2) = *((short *)from + 2); | ||
80 | return to; | ||
81 | case 8: | ||
82 | *(int *)to = *(int *)from; | ||
83 | *((int *)to + 1) = *((int *)from + 1); | ||
84 | return to; | ||
85 | } | ||
86 | |||
87 | esi = (long)from; | ||
88 | edi = (long)to; | ||
89 | if (n >= 5 * 4) { | ||
90 | /* large block: use rep prefix */ | ||
91 | int ecx; | ||
92 | asm volatile("rep ; movsl" | ||
93 | : "=&c" (ecx), "=&D" (edi), "=&S" (esi) | ||
94 | : "0" (n / 4), "1" (edi), "2" (esi) | ||
95 | : "memory" | ||
96 | ); | ||
97 | } else { | ||
98 | /* small block: don't clobber ecx + smaller code */ | ||
99 | if (n >= 4 * 4) | ||
100 | asm volatile("movsl" | ||
101 | : "=&D"(edi), "=&S"(esi) | ||
102 | : "0"(edi), "1"(esi) | ||
103 | : "memory"); | ||
104 | if (n >= 3 * 4) | ||
105 | asm volatile("movsl" | ||
106 | : "=&D"(edi), "=&S"(esi) | ||
107 | : "0"(edi), "1"(esi) | ||
108 | : "memory"); | ||
109 | if (n >= 2 * 4) | ||
110 | asm volatile("movsl" | ||
111 | : "=&D"(edi), "=&S"(esi) | ||
112 | : "0"(edi), "1"(esi) | ||
113 | : "memory"); | ||
114 | if (n >= 1 * 4) | ||
115 | asm volatile("movsl" | ||
116 | : "=&D"(edi), "=&S"(esi) | ||
117 | : "0"(edi), "1"(esi) | ||
118 | : "memory"); | ||
119 | } | ||
120 | switch (n % 4) { | ||
121 | /* tail */ | ||
122 | case 0: | ||
123 | return to; | ||
124 | case 1: | ||
125 | asm volatile("movsb" | ||
126 | : "=&D"(edi), "=&S"(esi) | ||
127 | : "0"(edi), "1"(esi) | ||
128 | : "memory"); | ||
129 | return to; | ||
130 | case 2: | ||
131 | asm volatile("movsw" | ||
132 | : "=&D"(edi), "=&S"(esi) | ||
133 | : "0"(edi), "1"(esi) | ||
134 | : "memory"); | ||
135 | return to; | ||
136 | default: | ||
137 | asm volatile("movsw\n\tmovsb" | ||
138 | : "=&D"(edi), "=&S"(esi) | ||
139 | : "0"(edi), "1"(esi) | ||
140 | : "memory"); | ||
141 | return to; | ||
142 | } | ||
143 | } | ||
144 | |||
145 | #define __HAVE_ARCH_MEMCPY | ||
146 | |||
147 | #ifdef CONFIG_X86_USE_3DNOW | ||
148 | |||
149 | #include <asm/mmx.h> | ||
150 | |||
151 | /* | ||
152 | * This CPU favours 3DNow strongly (eg AMD Athlon) | ||
153 | */ | ||
154 | |||
155 | static inline void *__constant_memcpy3d(void *to, const void *from, size_t len) | ||
156 | { | ||
157 | if (len < 512) | ||
158 | return __constant_memcpy(to, from, len); | ||
159 | return _mmx_memcpy(to, from, len); | ||
160 | } | ||
161 | |||
162 | static inline void *__memcpy3d(void *to, const void *from, size_t len) | ||
163 | { | ||
164 | if (len < 512) | ||
165 | return __memcpy(to, from, len); | ||
166 | return _mmx_memcpy(to, from, len); | ||
167 | } | ||
168 | |||
169 | #define memcpy(t, f, n) \ | ||
170 | (__builtin_constant_p((n)) \ | ||
171 | ? __constant_memcpy3d((t), (f), (n)) \ | ||
172 | : __memcpy3d((t), (f), (n))) | ||
173 | |||
174 | #else | ||
175 | |||
176 | /* | ||
177 | * No 3D Now! | ||
178 | */ | ||
179 | |||
180 | #define memcpy(t, f, n) \ | ||
181 | (__builtin_constant_p((n)) \ | ||
182 | ? __constant_memcpy((t), (f), (n)) \ | ||
183 | : __memcpy((t), (f), (n))) | ||
184 | |||
185 | #endif | ||
186 | |||
187 | #define __HAVE_ARCH_MEMMOVE | ||
188 | void *memmove(void *dest, const void *src, size_t n); | ||
189 | |||
190 | #define memcmp __builtin_memcmp | ||
191 | |||
192 | #define __HAVE_ARCH_MEMCHR | ||
193 | extern void *memchr(const void *cs, int c, size_t count); | ||
194 | |||
195 | static inline void *__memset_generic(void *s, char c, size_t count) | ||
196 | { | ||
197 | int d0, d1; | ||
198 | asm volatile("rep\n\t" | ||
199 | "stosb" | ||
200 | : "=&c" (d0), "=&D" (d1) | ||
201 | : "a" (c), "1" (s), "0" (count) | ||
202 | : "memory"); | ||
203 | return s; | ||
204 | } | ||
205 | |||
206 | /* we might want to write optimized versions of these later */ | ||
207 | #define __constant_count_memset(s, c, count) __memset_generic((s), (c), (count)) | ||
208 | |||
209 | /* | ||
210 | * memset(x, 0, y) is a reasonably common thing to do, so we want to fill | ||
211 | * things 32 bits at a time even when we don't know the size of the | ||
212 | * area at compile-time.. | ||
213 | */ | ||
214 | static __always_inline | ||
215 | void *__constant_c_memset(void *s, unsigned long c, size_t count) | ||
216 | { | ||
217 | int d0, d1; | ||
218 | asm volatile("rep ; stosl\n\t" | ||
219 | "testb $2,%b3\n\t" | ||
220 | "je 1f\n\t" | ||
221 | "stosw\n" | ||
222 | "1:\ttestb $1,%b3\n\t" | ||
223 | "je 2f\n\t" | ||
224 | "stosb\n" | ||
225 | "2:" | ||
226 | : "=&c" (d0), "=&D" (d1) | ||
227 | : "a" (c), "q" (count), "0" (count/4), "1" ((long)s) | ||
228 | : "memory"); | ||
229 | return s; | ||
230 | } | ||
231 | |||
232 | /* Added by Gertjan van Wingerde to make minix and sysv module work */ | ||
233 | #define __HAVE_ARCH_STRNLEN | ||
234 | extern size_t strnlen(const char *s, size_t count); | ||
235 | /* end of additional stuff */ | ||
236 | |||
237 | #define __HAVE_ARCH_STRSTR | ||
238 | extern char *strstr(const char *cs, const char *ct); | ||
239 | |||
240 | /* | ||
241 | * This looks horribly ugly, but the compiler can optimize it totally, | ||
242 | * as we by now know that both pattern and count is constant.. | ||
243 | */ | ||
244 | static __always_inline | ||
245 | void *__constant_c_and_count_memset(void *s, unsigned long pattern, | ||
246 | size_t count) | ||
247 | { | ||
248 | switch (count) { | ||
249 | case 0: | ||
250 | return s; | ||
251 | case 1: | ||
252 | *(unsigned char *)s = pattern & 0xff; | ||
253 | return s; | ||
254 | case 2: | ||
255 | *(unsigned short *)s = pattern & 0xffff; | ||
256 | return s; | ||
257 | case 3: | ||
258 | *(unsigned short *)s = pattern & 0xffff; | ||
259 | *((unsigned char *)s + 2) = pattern & 0xff; | ||
260 | return s; | ||
261 | case 4: | ||
262 | *(unsigned long *)s = pattern; | ||
263 | return s; | ||
264 | } | ||
265 | |||
266 | #define COMMON(x) \ | ||
267 | asm volatile("rep ; stosl" \ | ||
268 | x \ | ||
269 | : "=&c" (d0), "=&D" (d1) \ | ||
270 | : "a" (eax), "0" (count/4), "1" ((long)s) \ | ||
271 | : "memory") | ||
272 | |||
273 | { | ||
274 | int d0, d1; | ||
275 | #if __GNUC__ == 4 && __GNUC_MINOR__ == 0 | ||
276 | /* Workaround for broken gcc 4.0 */ | ||
277 | register unsigned long eax asm("%eax") = pattern; | ||
278 | #else | ||
279 | unsigned long eax = pattern; | ||
280 | #endif | ||
281 | |||
282 | switch (count % 4) { | ||
283 | case 0: | ||
284 | COMMON(""); | ||
285 | return s; | ||
286 | case 1: | ||
287 | COMMON("\n\tstosb"); | ||
288 | return s; | ||
289 | case 2: | ||
290 | COMMON("\n\tstosw"); | ||
291 | return s; | ||
292 | default: | ||
293 | COMMON("\n\tstosw\n\tstosb"); | ||
294 | return s; | ||
295 | } | ||
296 | } | ||
297 | |||
298 | #undef COMMON | ||
299 | } | ||
300 | |||
301 | #define __constant_c_x_memset(s, c, count) \ | ||
302 | (__builtin_constant_p(count) \ | ||
303 | ? __constant_c_and_count_memset((s), (c), (count)) \ | ||
304 | : __constant_c_memset((s), (c), (count))) | ||
305 | |||
306 | #define __memset(s, c, count) \ | ||
307 | (__builtin_constant_p(count) \ | ||
308 | ? __constant_count_memset((s), (c), (count)) \ | ||
309 | : __memset_generic((s), (c), (count))) | ||
310 | |||
311 | #define __HAVE_ARCH_MEMSET | ||
312 | #define memset(s, c, count) \ | ||
313 | (__builtin_constant_p(c) \ | ||
314 | ? __constant_c_x_memset((s), (0x01010101UL * (unsigned char)(c)), \ | ||
315 | (count)) \ | ||
316 | : __memset((s), (c), (count))) | ||
317 | |||
318 | /* | ||
319 | * find the first occurrence of byte 'c', or 1 past the area if none | ||
320 | */ | ||
321 | #define __HAVE_ARCH_MEMSCAN | ||
322 | extern void *memscan(void *addr, int c, size_t size); | ||
323 | |||
324 | #endif /* __KERNEL__ */ | ||
325 | |||
326 | #endif /* ASM_X86__STRING_32_H */ | ||