diff options
author | Roman Zippel <zippel@linux-m68k.org> | 2005-09-03 18:57:10 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@evo.osdl.org> | 2005-09-05 03:06:19 -0400 |
commit | 072dffda1d35c391fe893ec9b1d098145e668fef (patch) | |
tree | 2eee1530619abb7cf751db022216ab483e6fc823 /include | |
parent | 2855b97020f6d4a4dfb005fb77c0b79c8cb9d13f (diff) |
[PATCH] m68k: cleanup inline mem functions
Use the builtin functions for memset/memclr/memcpy, special optimizations for
page operations have dedicated functions now. Uninline memmove/memchr and
move all functions into a single file and clean it up a little.
Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'include')
-rw-r--r-- | include/asm-m68k/string.h | 403 |
1 files changed, 7 insertions, 396 deletions
diff --git a/include/asm-m68k/string.h b/include/asm-m68k/string.h index 44def078132a..6c59215b285e 100644 --- a/include/asm-m68k/string.h +++ b/include/asm-m68k/string.h | |||
@@ -80,43 +80,6 @@ static inline char * strchr(const char * s, int c) | |||
80 | return( (char *) s); | 80 | return( (char *) s); |
81 | } | 81 | } |
82 | 82 | ||
83 | #if 0 | ||
84 | #define __HAVE_ARCH_STRPBRK | ||
85 | static inline char *strpbrk(const char *cs,const char *ct) | ||
86 | { | ||
87 | const char *sc1,*sc2; | ||
88 | |||
89 | for( sc1 = cs; *sc1 != '\0'; ++sc1) | ||
90 | for( sc2 = ct; *sc2 != '\0'; ++sc2) | ||
91 | if (*sc1 == *sc2) | ||
92 | return((char *) sc1); | ||
93 | return( NULL ); | ||
94 | } | ||
95 | #endif | ||
96 | |||
97 | #if 0 | ||
98 | #define __HAVE_ARCH_STRSPN | ||
99 | static inline size_t strspn(const char *s, const char *accept) | ||
100 | { | ||
101 | const char *p; | ||
102 | const char *a; | ||
103 | size_t count = 0; | ||
104 | |||
105 | for (p = s; *p != '\0'; ++p) | ||
106 | { | ||
107 | for (a = accept; *a != '\0'; ++a) | ||
108 | if (*p == *a) | ||
109 | break; | ||
110 | if (*a == '\0') | ||
111 | return count; | ||
112 | else | ||
113 | ++count; | ||
114 | } | ||
115 | |||
116 | return count; | ||
117 | } | ||
118 | #endif | ||
119 | |||
120 | /* strstr !! */ | 83 | /* strstr !! */ |
121 | 84 | ||
122 | #define __HAVE_ARCH_STRLEN | 85 | #define __HAVE_ARCH_STRLEN |
@@ -173,370 +136,18 @@ static inline int strncmp(const char * cs,const char * ct,size_t count) | |||
173 | } | 136 | } |
174 | 137 | ||
175 | #define __HAVE_ARCH_MEMSET | 138 | #define __HAVE_ARCH_MEMSET |
176 | /* | 139 | extern void *memset(void *, int, __kernel_size_t); |
177 | * This is really ugly, but its highly optimizatiable by the | 140 | #define memset(d, c, n) __builtin_memset(d, c, n) |
178 | * compiler and is meant as compensation for gcc's missing | ||
179 | * __builtin_memset(). For the 680[23]0 it might be worth considering | ||
180 | * the optimal number of misaligned writes compared to the number of | ||
181 | * tests'n'branches needed to align the destination address. The | ||
182 | * 680[46]0 doesn't really care due to their copy-back caches. | ||
183 | * 10/09/96 - Jes Sorensen | ||
184 | */ | ||
185 | static inline void * __memset_g(void * s, int c, size_t count) | ||
186 | { | ||
187 | void *xs = s; | ||
188 | size_t temp; | ||
189 | |||
190 | if (!count) | ||
191 | return xs; | ||
192 | |||
193 | c &= 0xff; | ||
194 | c |= c << 8; | ||
195 | c |= c << 16; | ||
196 | |||
197 | if (count < 36){ | ||
198 | long *ls = s; | ||
199 | |||
200 | switch(count){ | ||
201 | case 32: case 33: case 34: case 35: | ||
202 | *ls++ = c; | ||
203 | case 28: case 29: case 30: case 31: | ||
204 | *ls++ = c; | ||
205 | case 24: case 25: case 26: case 27: | ||
206 | *ls++ = c; | ||
207 | case 20: case 21: case 22: case 23: | ||
208 | *ls++ = c; | ||
209 | case 16: case 17: case 18: case 19: | ||
210 | *ls++ = c; | ||
211 | case 12: case 13: case 14: case 15: | ||
212 | *ls++ = c; | ||
213 | case 8: case 9: case 10: case 11: | ||
214 | *ls++ = c; | ||
215 | case 4: case 5: case 6: case 7: | ||
216 | *ls++ = c; | ||
217 | break; | ||
218 | default: | ||
219 | break; | ||
220 | } | ||
221 | s = ls; | ||
222 | if (count & 0x02){ | ||
223 | short *ss = s; | ||
224 | *ss++ = c; | ||
225 | s = ss; | ||
226 | } | ||
227 | if (count & 0x01){ | ||
228 | char *cs = s; | ||
229 | *cs++ = c; | ||
230 | s = cs; | ||
231 | } | ||
232 | return xs; | ||
233 | } | ||
234 | |||
235 | if ((long) s & 1) | ||
236 | { | ||
237 | char *cs = s; | ||
238 | *cs++ = c; | ||
239 | s = cs; | ||
240 | count--; | ||
241 | } | ||
242 | if (count > 2 && (long) s & 2) | ||
243 | { | ||
244 | short *ss = s; | ||
245 | *ss++ = c; | ||
246 | s = ss; | ||
247 | count -= 2; | ||
248 | } | ||
249 | temp = count >> 2; | ||
250 | if (temp) | ||
251 | { | ||
252 | long *ls = s; | ||
253 | temp--; | ||
254 | do | ||
255 | *ls++ = c; | ||
256 | while (temp--); | ||
257 | s = ls; | ||
258 | } | ||
259 | if (count & 2) | ||
260 | { | ||
261 | short *ss = s; | ||
262 | *ss++ = c; | ||
263 | s = ss; | ||
264 | } | ||
265 | if (count & 1) | ||
266 | { | ||
267 | char *cs = s; | ||
268 | *cs = c; | ||
269 | } | ||
270 | return xs; | ||
271 | } | ||
272 | |||
273 | /* | ||
274 | * __memset_page assumes that data is longword aligned. Most, if not | ||
275 | * all, of these page sized memsets are performed on page aligned | ||
276 | * areas, thus we do not need to check if the destination is longword | ||
277 | * aligned. Of course we suffer a serious performance loss if this is | ||
278 | * not the case but I think the risk of this ever happening is | ||
279 | * extremely small. We spend a lot of time clearing pages in | ||
280 | * get_empty_page() so I think it is worth it anyway. Besides, the | ||
281 | * 680[46]0 do not really care about misaligned writes due to their | ||
282 | * copy-back cache. | ||
283 | * | ||
284 | * The optimized case for the 680[46]0 is implemented using the move16 | ||
285 | * instruction. My tests showed that this implementation is 35-45% | ||
286 | * faster than the original implementation using movel, the only | ||
287 | * caveat is that the destination address must be 16-byte aligned. | ||
288 | * 01/09/96 - Jes Sorensen | ||
289 | */ | ||
290 | static inline void * __memset_page(void * s,int c,size_t count) | ||
291 | { | ||
292 | unsigned long data, tmp; | ||
293 | void *xs = s; | ||
294 | |||
295 | c = c & 255; | ||
296 | data = c | (c << 8); | ||
297 | data |= data << 16; | ||
298 | |||
299 | #ifdef CPU_M68040_OR_M68060_ONLY | ||
300 | |||
301 | if (((unsigned long) s) & 0x0f) | ||
302 | __memset_g(s, c, count); | ||
303 | else{ | ||
304 | unsigned long *sp = s; | ||
305 | *sp++ = data; | ||
306 | *sp++ = data; | ||
307 | *sp++ = data; | ||
308 | *sp++ = data; | ||
309 | |||
310 | __asm__ __volatile__("1:\t" | ||
311 | ".chip 68040\n\t" | ||
312 | "move16 %2@+,%0@+\n\t" | ||
313 | ".chip 68k\n\t" | ||
314 | "subqw #8,%2\n\t" | ||
315 | "subqw #8,%2\n\t" | ||
316 | "dbra %1,1b\n\t" | ||
317 | : "=a" (sp), "=d" (tmp) | ||
318 | : "a" (s), "0" (sp), "1" ((count - 16) / 16 - 1) | ||
319 | ); | ||
320 | } | ||
321 | |||
322 | #else | ||
323 | __asm__ __volatile__("1:\t" | ||
324 | "movel %2,%0@+\n\t" | ||
325 | "movel %2,%0@+\n\t" | ||
326 | "movel %2,%0@+\n\t" | ||
327 | "movel %2,%0@+\n\t" | ||
328 | "movel %2,%0@+\n\t" | ||
329 | "movel %2,%0@+\n\t" | ||
330 | "movel %2,%0@+\n\t" | ||
331 | "movel %2,%0@+\n\t" | ||
332 | "dbra %1,1b\n\t" | ||
333 | : "=a" (s), "=d" (tmp) | ||
334 | : "d" (data), "0" (s), "1" (count / 32 - 1) | ||
335 | ); | ||
336 | #endif | ||
337 | |||
338 | return xs; | ||
339 | } | ||
340 | |||
341 | extern void *memset(void *,int,__kernel_size_t); | ||
342 | |||
343 | #define __memset_const(s,c,count) \ | ||
344 | ((count==PAGE_SIZE) ? \ | ||
345 | __memset_page((s),(c),(count)) : \ | ||
346 | __memset_g((s),(c),(count))) | ||
347 | |||
348 | #define memset(s, c, count) \ | ||
349 | (__builtin_constant_p(count) ? \ | ||
350 | __memset_const((s),(c),(count)) : \ | ||
351 | __memset_g((s),(c),(count))) | ||
352 | 141 | ||
353 | #define __HAVE_ARCH_MEMCPY | 142 | #define __HAVE_ARCH_MEMCPY |
354 | extern void * memcpy(void *, const void *, size_t ); | 143 | extern void *memcpy(void *, const void *, __kernel_size_t); |
355 | /* | 144 | #define memcpy(d, s, n) __builtin_memcpy(d, s, n) |
356 | * __builtin_memcpy() does not handle page-sized memcpys very well, | ||
357 | * thus following the same assumptions as for page-sized memsets, this | ||
358 | * function copies page-sized areas using an unrolled loop, without | ||
359 | * considering alignment. | ||
360 | * | ||
361 | * For the 680[46]0 only kernels we use the move16 instruction instead | ||
362 | * as it writes through the data-cache, invalidating the cache-lines | ||
363 | * touched. In this way we do not use up the entire data-cache (well, | ||
364 | * half of it on the 68060) by copying a page. An unrolled loop of two | ||
365 | * move16 instructions seem to the fastest. The only caveat is that | ||
366 | * both source and destination must be 16-byte aligned, if not we fall | ||
367 | * back to the generic memcpy function. - Jes | ||
368 | */ | ||
369 | static inline void * __memcpy_page(void * to, const void * from, size_t count) | ||
370 | { | ||
371 | unsigned long tmp; | ||
372 | void *xto = to; | ||
373 | |||
374 | #ifdef CPU_M68040_OR_M68060_ONLY | ||
375 | |||
376 | if (((unsigned long) to | (unsigned long) from) & 0x0f) | ||
377 | return memcpy(to, from, count); | ||
378 | |||
379 | __asm__ __volatile__("1:\t" | ||
380 | ".chip 68040\n\t" | ||
381 | "move16 %1@+,%0@+\n\t" | ||
382 | "move16 %1@+,%0@+\n\t" | ||
383 | ".chip 68k\n\t" | ||
384 | "dbra %2,1b\n\t" | ||
385 | : "=a" (to), "=a" (from), "=d" (tmp) | ||
386 | : "0" (to), "1" (from) , "2" (count / 32 - 1) | ||
387 | ); | ||
388 | #else | ||
389 | __asm__ __volatile__("1:\t" | ||
390 | "movel %1@+,%0@+\n\t" | ||
391 | "movel %1@+,%0@+\n\t" | ||
392 | "movel %1@+,%0@+\n\t" | ||
393 | "movel %1@+,%0@+\n\t" | ||
394 | "movel %1@+,%0@+\n\t" | ||
395 | "movel %1@+,%0@+\n\t" | ||
396 | "movel %1@+,%0@+\n\t" | ||
397 | "movel %1@+,%0@+\n\t" | ||
398 | "dbra %2,1b\n\t" | ||
399 | : "=a" (to), "=a" (from), "=d" (tmp) | ||
400 | : "0" (to), "1" (from) , "2" (count / 32 - 1) | ||
401 | ); | ||
402 | #endif | ||
403 | return xto; | ||
404 | } | ||
405 | |||
406 | #define __memcpy_const(to, from, n) \ | ||
407 | ((n==PAGE_SIZE) ? \ | ||
408 | __memcpy_page((to),(from),(n)) : \ | ||
409 | __builtin_memcpy((to),(from),(n))) | ||
410 | |||
411 | #define memcpy(to, from, n) \ | ||
412 | (__builtin_constant_p(n) ? \ | ||
413 | __memcpy_const((to),(from),(n)) : \ | ||
414 | memcpy((to),(from),(n))) | ||
415 | 145 | ||
416 | #define __HAVE_ARCH_MEMMOVE | 146 | #define __HAVE_ARCH_MEMMOVE |
417 | static inline void * memmove(void * dest,const void * src, size_t n) | 147 | extern void *memmove(void *, const void *, __kernel_size_t); |
418 | { | ||
419 | void *xdest = dest; | ||
420 | size_t temp; | ||
421 | |||
422 | if (!n) | ||
423 | return xdest; | ||
424 | |||
425 | if (dest < src) | ||
426 | { | ||
427 | if ((long) dest & 1) | ||
428 | { | ||
429 | char *cdest = dest; | ||
430 | const char *csrc = src; | ||
431 | *cdest++ = *csrc++; | ||
432 | dest = cdest; | ||
433 | src = csrc; | ||
434 | n--; | ||
435 | } | ||
436 | if (n > 2 && (long) dest & 2) | ||
437 | { | ||
438 | short *sdest = dest; | ||
439 | const short *ssrc = src; | ||
440 | *sdest++ = *ssrc++; | ||
441 | dest = sdest; | ||
442 | src = ssrc; | ||
443 | n -= 2; | ||
444 | } | ||
445 | temp = n >> 2; | ||
446 | if (temp) | ||
447 | { | ||
448 | long *ldest = dest; | ||
449 | const long *lsrc = src; | ||
450 | temp--; | ||
451 | do | ||
452 | *ldest++ = *lsrc++; | ||
453 | while (temp--); | ||
454 | dest = ldest; | ||
455 | src = lsrc; | ||
456 | } | ||
457 | if (n & 2) | ||
458 | { | ||
459 | short *sdest = dest; | ||
460 | const short *ssrc = src; | ||
461 | *sdest++ = *ssrc++; | ||
462 | dest = sdest; | ||
463 | src = ssrc; | ||
464 | } | ||
465 | if (n & 1) | ||
466 | { | ||
467 | char *cdest = dest; | ||
468 | const char *csrc = src; | ||
469 | *cdest = *csrc; | ||
470 | } | ||
471 | } | ||
472 | else | ||
473 | { | ||
474 | dest = (char *) dest + n; | ||
475 | src = (const char *) src + n; | ||
476 | if ((long) dest & 1) | ||
477 | { | ||
478 | char *cdest = dest; | ||
479 | const char *csrc = src; | ||
480 | *--cdest = *--csrc; | ||
481 | dest = cdest; | ||
482 | src = csrc; | ||
483 | n--; | ||
484 | } | ||
485 | if (n > 2 && (long) dest & 2) | ||
486 | { | ||
487 | short *sdest = dest; | ||
488 | const short *ssrc = src; | ||
489 | *--sdest = *--ssrc; | ||
490 | dest = sdest; | ||
491 | src = ssrc; | ||
492 | n -= 2; | ||
493 | } | ||
494 | temp = n >> 2; | ||
495 | if (temp) | ||
496 | { | ||
497 | long *ldest = dest; | ||
498 | const long *lsrc = src; | ||
499 | temp--; | ||
500 | do | ||
501 | *--ldest = *--lsrc; | ||
502 | while (temp--); | ||
503 | dest = ldest; | ||
504 | src = lsrc; | ||
505 | } | ||
506 | if (n & 2) | ||
507 | { | ||
508 | short *sdest = dest; | ||
509 | const short *ssrc = src; | ||
510 | *--sdest = *--ssrc; | ||
511 | dest = sdest; | ||
512 | src = ssrc; | ||
513 | } | ||
514 | if (n & 1) | ||
515 | { | ||
516 | char *cdest = dest; | ||
517 | const char *csrc = src; | ||
518 | *--cdest = *--csrc; | ||
519 | } | ||
520 | } | ||
521 | return xdest; | ||
522 | } | ||
523 | 148 | ||
524 | #define __HAVE_ARCH_MEMCMP | 149 | #define __HAVE_ARCH_MEMCMP |
525 | extern int memcmp(const void * ,const void * ,size_t ); | 150 | extern int memcmp(const void *, const void *, __kernel_size_t); |
526 | #define memcmp(cs, ct, n) \ | 151 | #define memcmp(d, s, n) __builtin_memcmp(d, s, n) |
527 | (__builtin_constant_p(n) ? \ | ||
528 | __builtin_memcmp((cs),(ct),(n)) : \ | ||
529 | memcmp((cs),(ct),(n))) | ||
530 | |||
531 | #define __HAVE_ARCH_MEMCHR | ||
532 | static inline void *memchr(const void *cs, int c, size_t count) | ||
533 | { | ||
534 | /* Someone else can optimize this, I don't care - tonym@mac.linux-m68k.org */ | ||
535 | unsigned char *ret = (unsigned char *)cs; | ||
536 | for(;count>0;count--,ret++) | ||
537 | if(*ret == c) return ret; | ||
538 | |||
539 | return NULL; | ||
540 | } | ||
541 | 152 | ||
542 | #endif /* _M68K_STRING_H_ */ | 153 | #endif /* _M68K_STRING_H_ */ |