aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndi Kleen <ak@suse.de>2007-07-21 11:09:59 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-07-21 21:37:08 -0400
commitb520b85a963bf7b14b9614579aff14558d7ee264 (patch)
treea254ee00655f042c3dfaa2486a7ce8caa0767806
parentaac57f81eb16d56afb5bd5a31ff325b5d1615580 (diff)
i386: Move all simple string operations out of line
The compiler generally generates reasonable inline code for the simple cases and for the rest it's better for code size for them to be out of line. Also there they can be potentially optimized more in the future. In fact they probably should be in a .S file because they're all pure assembly, but that's for another day. Also some code style cleanup on them while I was on it (this seems to be the last untouched really early Linux code) This saves ~12k text for a defconfig kernel with gcc 4.1. Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/i386/lib/Makefile2
-rw-r--r--arch/i386/lib/string.c257
-rw-r--r--include/asm-i386/string.h243
3 files changed, 271 insertions, 231 deletions
diff --git a/arch/i386/lib/Makefile b/arch/i386/lib/Makefile
index 22d8ac5815f0..4d105fdfe817 100644
--- a/arch/i386/lib/Makefile
+++ b/arch/i386/lib/Makefile
@@ -4,7 +4,7 @@
4 4
5 5
6lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \ 6lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \
7 bitops.o semaphore.o 7 bitops.o semaphore.o string.o
8 8
9lib-$(CONFIG_X86_USE_3DNOW) += mmx.o 9lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
10 10
diff --git a/arch/i386/lib/string.c b/arch/i386/lib/string.c
new file mode 100644
index 000000000000..2c773fefa3dd
--- /dev/null
+++ b/arch/i386/lib/string.c
@@ -0,0 +1,257 @@
1/*
2 * Most of the string-functions are rather heavily hand-optimized,
3 * see especially strsep,strstr,str[c]spn. They should work, but are not
4 * very easy to understand. Everything is done entirely within the register
5 * set, making the functions fast and clean. String instructions have been
6 * used through-out, making for "slightly" unclear code :-)
7 *
8 * AK: On P4 and K7 using non string instruction implementations might be faster
9 * for large memory blocks. But most of them are unlikely to be used on large
10 * strings.
11 */
12
13#include <linux/string.h>
14#include <linux/module.h>
15
16#ifdef __HAVE_ARCH_STRCPY
17char *strcpy(char * dest,const char *src)
18{
19 int d0, d1, d2;
20 asm volatile( "1:\tlodsb\n\t"
21 "stosb\n\t"
22 "testb %%al,%%al\n\t"
23 "jne 1b"
24 : "=&S" (d0), "=&D" (d1), "=&a" (d2)
25 :"0" (src),"1" (dest) : "memory");
26 return dest;
27}
28EXPORT_SYMBOL(strcpy);
29#endif
30
31#ifdef __HAVE_ARCH_STRNCPY
32char *strncpy(char * dest,const char *src,size_t count)
33{
34 int d0, d1, d2, d3;
35 asm volatile( "1:\tdecl %2\n\t"
36 "js 2f\n\t"
37 "lodsb\n\t"
38 "stosb\n\t"
39 "testb %%al,%%al\n\t"
40 "jne 1b\n\t"
41 "rep\n\t"
42 "stosb\n"
43 "2:"
44 : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3)
45 :"0" (src),"1" (dest),"2" (count) : "memory");
46 return dest;
47}
48EXPORT_SYMBOL(strncpy);
49#endif
50
51#ifdef __HAVE_ARCH_STRCAT
52char *strcat(char * dest,const char * src)
53{
54 int d0, d1, d2, d3;
55 asm volatile( "repne\n\t"
56 "scasb\n\t"
57 "decl %1\n"
58 "1:\tlodsb\n\t"
59 "stosb\n\t"
60 "testb %%al,%%al\n\t"
61 "jne 1b"
62 : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
63 : "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu): "memory");
64 return dest;
65}
66EXPORT_SYMBOL(strcat);
67#endif
68
69#ifdef __HAVE_ARCH_STRNCAT
70char *strncat(char * dest,const char * src,size_t count)
71{
72 int d0, d1, d2, d3;
73 asm volatile( "repne\n\t"
74 "scasb\n\t"
75 "decl %1\n\t"
76 "movl %8,%3\n"
77 "1:\tdecl %3\n\t"
78 "js 2f\n\t"
79 "lodsb\n\t"
80 "stosb\n\t"
81 "testb %%al,%%al\n\t"
82 "jne 1b\n"
83 "2:\txorl %2,%2\n\t"
84 "stosb"
85 : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
86 : "0" (src),"1" (dest),"2" (0),"3" (0xffffffffu), "g" (count)
87 : "memory");
88 return dest;
89}
90EXPORT_SYMBOL(strncat);
91#endif
92
93#ifdef __HAVE_ARCH_STRCMP
94int strcmp(const char * cs,const char * ct)
95{
96 int d0, d1;
97 int res;
98 asm volatile( "1:\tlodsb\n\t"
99 "scasb\n\t"
100 "jne 2f\n\t"
101 "testb %%al,%%al\n\t"
102 "jne 1b\n\t"
103 "xorl %%eax,%%eax\n\t"
104 "jmp 3f\n"
105 "2:\tsbbl %%eax,%%eax\n\t"
106 "orb $1,%%al\n"
107 "3:"
108 :"=a" (res), "=&S" (d0), "=&D" (d1)
109 :"1" (cs),"2" (ct)
110 :"memory");
111 return res;
112}
113EXPORT_SYMBOL(strcmp);
114#endif
115
116#ifdef __HAVE_ARCH_STRNCMP
117int strncmp(const char * cs,const char * ct,size_t count)
118{
119 int res;
120 int d0, d1, d2;
121 asm volatile( "1:\tdecl %3\n\t"
122 "js 2f\n\t"
123 "lodsb\n\t"
124 "scasb\n\t"
125 "jne 3f\n\t"
126 "testb %%al,%%al\n\t"
127 "jne 1b\n"
128 "2:\txorl %%eax,%%eax\n\t"
129 "jmp 4f\n"
130 "3:\tsbbl %%eax,%%eax\n\t"
131 "orb $1,%%al\n"
132 "4:"
133 :"=a" (res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
134 :"1" (cs),"2" (ct),"3" (count)
135 :"memory");
136 return res;
137}
138EXPORT_SYMBOL(strncmp);
139#endif
140
141#ifdef __HAVE_ARCH_STRCHR
142char *strchr(const char * s, int c)
143{
144 int d0;
145 char * res;
146 asm volatile( "movb %%al,%%ah\n"
147 "1:\tlodsb\n\t"
148 "cmpb %%ah,%%al\n\t"
149 "je 2f\n\t"
150 "testb %%al,%%al\n\t"
151 "jne 1b\n\t"
152 "movl $1,%1\n"
153 "2:\tmovl %1,%0\n\t"
154 "decl %0"
155 :"=a" (res), "=&S" (d0)
156 :"1" (s),"0" (c)
157 :"memory");
158 return res;
159}
160EXPORT_SYMBOL(strchr);
161#endif
162
163#ifdef __HAVE_ARCH_STRRCHR
164char *strrchr(const char * s, int c)
165{
166 int d0, d1;
167 char * res;
168 asm volatile( "movb %%al,%%ah\n"
169 "1:\tlodsb\n\t"
170 "cmpb %%ah,%%al\n\t"
171 "jne 2f\n\t"
172 "leal -1(%%esi),%0\n"
173 "2:\ttestb %%al,%%al\n\t"
174 "jne 1b"
175 :"=g" (res), "=&S" (d0), "=&a" (d1)
176 :"0" (0),"1" (s),"2" (c)
177 :"memory");
178 return res;
179}
180EXPORT_SYMBOL(strrchr);
181#endif
182
183#ifdef __HAVE_ARCH_STRLEN
184size_t strlen(const char * s)
185{
186 int d0;
187 int res;
188 asm volatile( "repne\n\t"
189 "scasb\n\t"
190 "notl %0\n\t"
191 "decl %0"
192 :"=c" (res), "=&D" (d0)
193 :"1" (s),"a" (0), "0" (0xffffffffu)
194 :"memory");
195 return res;
196}
197EXPORT_SYMBOL(strlen);
198#endif
199
200#ifdef __HAVE_ARCH_MEMCHR
201void *memchr(const void *cs,int c,size_t count)
202{
203 int d0;
204 void *res;
205 if (!count)
206 return NULL;
207 asm volatile( "repne\n\t"
208 "scasb\n\t"
209 "je 1f\n\t"
210 "movl $1,%0\n"
211 "1:\tdecl %0"
212 :"=D" (res), "=&c" (d0)
213 :"a" (c),"0" (cs),"1" (count)
214 :"memory");
215 return res;
216}
217EXPORT_SYMBOL(memchr);
218#endif
219
220#ifdef __HAVE_ARCH_MEMSCAN
221void *memscan(void * addr, int c, size_t size)
222{
223 if (!size)
224 return addr;
225 asm volatile("repnz; scasb\n\t"
226 "jnz 1f\n\t"
227 "dec %%edi\n"
228 "1:"
229 : "=D" (addr), "=c" (size)
230 : "0" (addr), "1" (size), "a" (c)
231 : "memory");
232 return addr;
233}
234EXPORT_SYMBOL(memscan);
235#endif
236
237#ifdef __HAVE_ARCH_STRNLEN
238size_t strnlen(const char *s, size_t count)
239{
240 int d0;
241 int res;
242 asm volatile( "movl %2,%0\n\t"
243 "jmp 2f\n"
244 "1:\tcmpb $0,(%0)\n\t"
245 "je 3f\n\t"
246 "incl %0\n"
247 "2:\tdecl %1\n\t"
248 "cmpl $-1,%1\n\t"
249 "jne 1b\n"
250 "3:\tsubl %2,%0"
251 :"=a" (res), "=&d" (d0)
252 :"c" (s),"1" (count)
253 :"memory");
254 return res;
255}
256EXPORT_SYMBOL(strnlen);
257#endif
diff --git a/include/asm-i386/string.h b/include/asm-i386/string.h
index b9277361954b..a9b64453bdf5 100644
--- a/include/asm-i386/string.h
+++ b/include/asm-i386/string.h
@@ -2,203 +2,35 @@
2#define _I386_STRING_H_ 2#define _I386_STRING_H_
3 3
4#ifdef __KERNEL__ 4#ifdef __KERNEL__
5/*
6 * On a 486 or Pentium, we are better off not using the
7 * byte string operations. But on a 386 or a PPro the
8 * byte string ops are faster than doing it by hand
9 * (MUCH faster on a Pentium).
10 */
11
12/*
13 * This string-include defines all string functions as inline
14 * functions. Use gcc. It also assumes ds=es=data space, this should be
15 * normal. Most of the string-functions are rather heavily hand-optimized,
16 * see especially strsep,strstr,str[c]spn. They should work, but are not
17 * very easy to understand. Everything is done entirely within the register
18 * set, making the functions fast and clean. String instructions have been
19 * used through-out, making for "slightly" unclear code :-)
20 *
21 * NO Copyright (C) 1991, 1992 Linus Torvalds,
22 * consider these trivial functions to be PD.
23 */
24 5
25/* AK: in fact I bet it would be better to move this stuff all out of line. 6/* Let gcc decide wether to inline or use the out of line functions */
26 */
27 7
28#define __HAVE_ARCH_STRCPY 8#define __HAVE_ARCH_STRCPY
29static inline char * strcpy(char * dest,const char *src) 9extern char *strcpy(char *dest, const char *src);
30{
31int d0, d1, d2;
32__asm__ __volatile__(
33 "1:\tlodsb\n\t"
34 "stosb\n\t"
35 "testb %%al,%%al\n\t"
36 "jne 1b"
37 : "=&S" (d0), "=&D" (d1), "=&a" (d2)
38 :"0" (src),"1" (dest) : "memory");
39return dest;
40}
41 10
42#define __HAVE_ARCH_STRNCPY 11#define __HAVE_ARCH_STRNCPY
43static inline char * strncpy(char * dest,const char *src,size_t count) 12extern char *strncpy(char *dest, const char *src, size_t count);
44{
45int d0, d1, d2, d3;
46__asm__ __volatile__(
47 "1:\tdecl %2\n\t"
48 "js 2f\n\t"
49 "lodsb\n\t"
50 "stosb\n\t"
51 "testb %%al,%%al\n\t"
52 "jne 1b\n\t"
53 "rep\n\t"
54 "stosb\n"
55 "2:"
56 : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3)
57 :"0" (src),"1" (dest),"2" (count) : "memory");
58return dest;
59}
60 13
61#define __HAVE_ARCH_STRCAT 14#define __HAVE_ARCH_STRCAT
62static inline char * strcat(char * dest,const char * src) 15extern char *strcat(char *dest, const char *src);
63{
64int d0, d1, d2, d3;
65__asm__ __volatile__(
66 "repne\n\t"
67 "scasb\n\t"
68 "decl %1\n"
69 "1:\tlodsb\n\t"
70 "stosb\n\t"
71 "testb %%al,%%al\n\t"
72 "jne 1b"
73 : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
74 : "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu):"memory");
75return dest;
76}
77 16
78#define __HAVE_ARCH_STRNCAT 17#define __HAVE_ARCH_STRNCAT
79static inline char * strncat(char * dest,const char * src,size_t count) 18extern char *strncat(char *dest, const char *src, size_t count);
80{
81int d0, d1, d2, d3;
82__asm__ __volatile__(
83 "repne\n\t"
84 "scasb\n\t"
85 "decl %1\n\t"
86 "movl %8,%3\n"
87 "1:\tdecl %3\n\t"
88 "js 2f\n\t"
89 "lodsb\n\t"
90 "stosb\n\t"
91 "testb %%al,%%al\n\t"
92 "jne 1b\n"
93 "2:\txorl %2,%2\n\t"
94 "stosb"
95 : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
96 : "0" (src),"1" (dest),"2" (0),"3" (0xffffffffu), "g" (count)
97 : "memory");
98return dest;
99}
100 19
101#define __HAVE_ARCH_STRCMP 20#define __HAVE_ARCH_STRCMP
102static inline int strcmp(const char * cs,const char * ct) 21extern int strcmp(const char *cs, const char *ct);
103{
104int d0, d1;
105register int __res;
106__asm__ __volatile__(
107 "1:\tlodsb\n\t"
108 "scasb\n\t"
109 "jne 2f\n\t"
110 "testb %%al,%%al\n\t"
111 "jne 1b\n\t"
112 "xorl %%eax,%%eax\n\t"
113 "jmp 3f\n"
114 "2:\tsbbl %%eax,%%eax\n\t"
115 "orb $1,%%al\n"
116 "3:"
117 :"=a" (__res), "=&S" (d0), "=&D" (d1)
118 :"1" (cs),"2" (ct)
119 :"memory");
120return __res;
121}
122 22
123#define __HAVE_ARCH_STRNCMP 23#define __HAVE_ARCH_STRNCMP
124static inline int strncmp(const char * cs,const char * ct,size_t count) 24extern int strncmp(const char *cs, const char *ct, size_t count);
125{
126register int __res;
127int d0, d1, d2;
128__asm__ __volatile__(
129 "1:\tdecl %3\n\t"
130 "js 2f\n\t"
131 "lodsb\n\t"
132 "scasb\n\t"
133 "jne 3f\n\t"
134 "testb %%al,%%al\n\t"
135 "jne 1b\n"
136 "2:\txorl %%eax,%%eax\n\t"
137 "jmp 4f\n"
138 "3:\tsbbl %%eax,%%eax\n\t"
139 "orb $1,%%al\n"
140 "4:"
141 :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
142 :"1" (cs),"2" (ct),"3" (count)
143 :"memory");
144return __res;
145}
146 25
147#define __HAVE_ARCH_STRCHR 26#define __HAVE_ARCH_STRCHR
148static inline char * strchr(const char * s, int c) 27extern char *strchr(const char *s, int c);
149{
150int d0;
151register char * __res;
152__asm__ __volatile__(
153 "movb %%al,%%ah\n"
154 "1:\tlodsb\n\t"
155 "cmpb %%ah,%%al\n\t"
156 "je 2f\n\t"
157 "testb %%al,%%al\n\t"
158 "jne 1b\n\t"
159 "movl $1,%1\n"
160 "2:\tmovl %1,%0\n\t"
161 "decl %0"
162 :"=a" (__res), "=&S" (d0)
163 :"1" (s),"0" (c)
164 :"memory");
165return __res;
166}
167 28
168#define __HAVE_ARCH_STRRCHR 29#define __HAVE_ARCH_STRRCHR
169static inline char * strrchr(const char * s, int c) 30extern char *strrchr(const char *s, int c);
170{
171int d0, d1;
172register char * __res;
173__asm__ __volatile__(
174 "movb %%al,%%ah\n"
175 "1:\tlodsb\n\t"
176 "cmpb %%ah,%%al\n\t"
177 "jne 2f\n\t"
178 "leal -1(%%esi),%0\n"
179 "2:\ttestb %%al,%%al\n\t"
180 "jne 1b"
181 :"=g" (__res), "=&S" (d0), "=&a" (d1)
182 :"0" (0),"1" (s),"2" (c)
183 :"memory");
184return __res;
185}
186 31
187#define __HAVE_ARCH_STRLEN 32#define __HAVE_ARCH_STRLEN
188static inline size_t strlen(const char * s) 33extern size_t strlen(const char *s);
189{
190int d0;
191register int __res;
192__asm__ __volatile__(
193 "repne\n\t"
194 "scasb\n\t"
195 "notl %0\n\t"
196 "decl %0"
197 :"=c" (__res), "=&D" (d0)
198 :"1" (s),"a" (0), "0" (0xffffffffu)
199 :"memory");
200return __res;
201}
202 34
203static __always_inline void * __memcpy(void * to, const void * from, size_t n) 35static __always_inline void * __memcpy(void * to, const void * from, size_t n)
204{ 36{
@@ -207,9 +39,7 @@ __asm__ __volatile__(
207 "rep ; movsl\n\t" 39 "rep ; movsl\n\t"
208 "movl %4,%%ecx\n\t" 40 "movl %4,%%ecx\n\t"
209 "andl $3,%%ecx\n\t" 41 "andl $3,%%ecx\n\t"
210#if 1 /* want to pay 2 byte penalty for a chance to skip microcoded rep? */
211 "jz 1f\n\t" 42 "jz 1f\n\t"
212#endif
213 "rep ; movsb\n\t" 43 "rep ; movsb\n\t"
214 "1:" 44 "1:"
215 : "=&c" (d0), "=&D" (d1), "=&S" (d2) 45 : "=&c" (d0), "=&D" (d1), "=&S" (d2)
@@ -328,23 +158,7 @@ void *memmove(void * dest,const void * src, size_t n);
328#define memcmp __builtin_memcmp 158#define memcmp __builtin_memcmp
329 159
330#define __HAVE_ARCH_MEMCHR 160#define __HAVE_ARCH_MEMCHR
331static inline void * memchr(const void * cs,int c,size_t count) 161extern void *memchr(const void * cs,int c,size_t count);
332{
333int d0;
334register void * __res;
335if (!count)
336 return NULL;
337__asm__ __volatile__(
338 "repne\n\t"
339 "scasb\n\t"
340 "je 1f\n\t"
341 "movl $1,%0\n"
342 "1:\tdecl %0"
343 :"=D" (__res), "=&c" (d0)
344 :"a" (c),"0" (cs),"1" (count)
345 :"memory");
346return __res;
347}
348 162
349static inline void * __memset_generic(void * s, char c,size_t count) 163static inline void * __memset_generic(void * s, char c,size_t count)
350{ 164{
@@ -386,29 +200,10 @@ return (s);
386 200
387/* Added by Gertjan van Wingerde to make minix and sysv module work */ 201/* Added by Gertjan van Wingerde to make minix and sysv module work */
388#define __HAVE_ARCH_STRNLEN 202#define __HAVE_ARCH_STRNLEN
389static inline size_t strnlen(const char * s, size_t count) 203extern size_t strnlen(const char * s, size_t count);
390{
391int d0;
392register int __res;
393__asm__ __volatile__(
394 "movl %2,%0\n\t"
395 "jmp 2f\n"
396 "1:\tcmpb $0,(%0)\n\t"
397 "je 3f\n\t"
398 "incl %0\n"
399 "2:\tdecl %1\n\t"
400 "cmpl $-1,%1\n\t"
401 "jne 1b\n"
402 "3:\tsubl %2,%0"
403 :"=a" (__res), "=&d" (d0)
404 :"c" (s),"1" (count)
405 :"memory");
406return __res;
407}
408/* end of additional stuff */ 204/* end of additional stuff */
409 205
410#define __HAVE_ARCH_STRSTR 206#define __HAVE_ARCH_STRSTR
411
412extern char *strstr(const char *cs, const char *ct); 207extern char *strstr(const char *cs, const char *ct);
413 208
414/* 209/*
@@ -474,19 +269,7 @@ __asm__ __volatile__( \
474 * find the first occurrence of byte 'c', or 1 past the area if none 269 * find the first occurrence of byte 'c', or 1 past the area if none
475 */ 270 */
476#define __HAVE_ARCH_MEMSCAN 271#define __HAVE_ARCH_MEMSCAN
477static inline void * memscan(void * addr, int c, size_t size) 272extern void *memscan(void * addr, int c, size_t size);
478{
479 if (!size)
480 return addr;
481 __asm__("repnz; scasb\n\t"
482 "jnz 1f\n\t"
483 "dec %%edi\n"
484 "1:"
485 : "=D" (addr), "=c" (size)
486 : "0" (addr), "1" (size), "a" (c)
487 : "memory");
488 return addr;
489}
490 273
491#endif /* __KERNEL__ */ 274#endif /* __KERNEL__ */
492 275