aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorRoman Zippel <zippel@linux-m68k.org>2005-09-03 18:57:10 -0400
committerLinus Torvalds <torvalds@evo.osdl.org>2005-09-05 03:06:19 -0400
commit072dffda1d35c391fe893ec9b1d098145e668fef (patch)
tree2eee1530619abb7cf751db022216ab483e6fc823 /include
parent2855b97020f6d4a4dfb005fb77c0b79c8cb9d13f (diff)
[PATCH] m68k: cleanup inline mem functions
Use the builtin functions for memset/memclr/memcpy, special optimizations for page operations have dedicated functions now. Uninline memmove/memchr and move all functions into a single file and clean it up a little. Signed-off-by: Roman Zippel <zippel@linux-m68k.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'include')
-rw-r--r--include/asm-m68k/string.h403
1 files changed, 7 insertions, 396 deletions
diff --git a/include/asm-m68k/string.h b/include/asm-m68k/string.h
index 44def078132a..6c59215b285e 100644
--- a/include/asm-m68k/string.h
+++ b/include/asm-m68k/string.h
@@ -80,43 +80,6 @@ static inline char * strchr(const char * s, int c)
80 return( (char *) s); 80 return( (char *) s);
81} 81}
82 82
83#if 0
84#define __HAVE_ARCH_STRPBRK
85static inline char *strpbrk(const char *cs,const char *ct)
86{
87 const char *sc1,*sc2;
88
89 for( sc1 = cs; *sc1 != '\0'; ++sc1)
90 for( sc2 = ct; *sc2 != '\0'; ++sc2)
91 if (*sc1 == *sc2)
92 return((char *) sc1);
93 return( NULL );
94}
95#endif
96
97#if 0
98#define __HAVE_ARCH_STRSPN
99static inline size_t strspn(const char *s, const char *accept)
100{
101 const char *p;
102 const char *a;
103 size_t count = 0;
104
105 for (p = s; *p != '\0'; ++p)
106 {
107 for (a = accept; *a != '\0'; ++a)
108 if (*p == *a)
109 break;
110 if (*a == '\0')
111 return count;
112 else
113 ++count;
114 }
115
116 return count;
117}
118#endif
119
120/* strstr !! */ 83/* strstr !! */
121 84
122#define __HAVE_ARCH_STRLEN 85#define __HAVE_ARCH_STRLEN
@@ -173,370 +136,18 @@ static inline int strncmp(const char * cs,const char * ct,size_t count)
173} 136}
174 137
175#define __HAVE_ARCH_MEMSET 138#define __HAVE_ARCH_MEMSET
176/* 139extern void *memset(void *, int, __kernel_size_t);
177 * This is really ugly, but its highly optimizatiable by the 140#define memset(d, c, n) __builtin_memset(d, c, n)
178 * compiler and is meant as compensation for gcc's missing
179 * __builtin_memset(). For the 680[23]0 it might be worth considering
180 * the optimal number of misaligned writes compared to the number of
181 * tests'n'branches needed to align the destination address. The
182 * 680[46]0 doesn't really care due to their copy-back caches.
183 * 10/09/96 - Jes Sorensen
184 */
185static inline void * __memset_g(void * s, int c, size_t count)
186{
187 void *xs = s;
188 size_t temp;
189
190 if (!count)
191 return xs;
192
193 c &= 0xff;
194 c |= c << 8;
195 c |= c << 16;
196
197 if (count < 36){
198 long *ls = s;
199
200 switch(count){
201 case 32: case 33: case 34: case 35:
202 *ls++ = c;
203 case 28: case 29: case 30: case 31:
204 *ls++ = c;
205 case 24: case 25: case 26: case 27:
206 *ls++ = c;
207 case 20: case 21: case 22: case 23:
208 *ls++ = c;
209 case 16: case 17: case 18: case 19:
210 *ls++ = c;
211 case 12: case 13: case 14: case 15:
212 *ls++ = c;
213 case 8: case 9: case 10: case 11:
214 *ls++ = c;
215 case 4: case 5: case 6: case 7:
216 *ls++ = c;
217 break;
218 default:
219 break;
220 }
221 s = ls;
222 if (count & 0x02){
223 short *ss = s;
224 *ss++ = c;
225 s = ss;
226 }
227 if (count & 0x01){
228 char *cs = s;
229 *cs++ = c;
230 s = cs;
231 }
232 return xs;
233 }
234
235 if ((long) s & 1)
236 {
237 char *cs = s;
238 *cs++ = c;
239 s = cs;
240 count--;
241 }
242 if (count > 2 && (long) s & 2)
243 {
244 short *ss = s;
245 *ss++ = c;
246 s = ss;
247 count -= 2;
248 }
249 temp = count >> 2;
250 if (temp)
251 {
252 long *ls = s;
253 temp--;
254 do
255 *ls++ = c;
256 while (temp--);
257 s = ls;
258 }
259 if (count & 2)
260 {
261 short *ss = s;
262 *ss++ = c;
263 s = ss;
264 }
265 if (count & 1)
266 {
267 char *cs = s;
268 *cs = c;
269 }
270 return xs;
271}
272
273/*
274 * __memset_page assumes that data is longword aligned. Most, if not
275 * all, of these page sized memsets are performed on page aligned
276 * areas, thus we do not need to check if the destination is longword
277 * aligned. Of course we suffer a serious performance loss if this is
278 * not the case but I think the risk of this ever happening is
279 * extremely small. We spend a lot of time clearing pages in
280 * get_empty_page() so I think it is worth it anyway. Besides, the
281 * 680[46]0 do not really care about misaligned writes due to their
282 * copy-back cache.
283 *
284 * The optimized case for the 680[46]0 is implemented using the move16
285 * instruction. My tests showed that this implementation is 35-45%
286 * faster than the original implementation using movel, the only
287 * caveat is that the destination address must be 16-byte aligned.
288 * 01/09/96 - Jes Sorensen
289 */
290static inline void * __memset_page(void * s,int c,size_t count)
291{
292 unsigned long data, tmp;
293 void *xs = s;
294
295 c = c & 255;
296 data = c | (c << 8);
297 data |= data << 16;
298
299#ifdef CPU_M68040_OR_M68060_ONLY
300
301 if (((unsigned long) s) & 0x0f)
302 __memset_g(s, c, count);
303 else{
304 unsigned long *sp = s;
305 *sp++ = data;
306 *sp++ = data;
307 *sp++ = data;
308 *sp++ = data;
309
310 __asm__ __volatile__("1:\t"
311 ".chip 68040\n\t"
312 "move16 %2@+,%0@+\n\t"
313 ".chip 68k\n\t"
314 "subqw #8,%2\n\t"
315 "subqw #8,%2\n\t"
316 "dbra %1,1b\n\t"
317 : "=a" (sp), "=d" (tmp)
318 : "a" (s), "0" (sp), "1" ((count - 16) / 16 - 1)
319 );
320 }
321
322#else
323 __asm__ __volatile__("1:\t"
324 "movel %2,%0@+\n\t"
325 "movel %2,%0@+\n\t"
326 "movel %2,%0@+\n\t"
327 "movel %2,%0@+\n\t"
328 "movel %2,%0@+\n\t"
329 "movel %2,%0@+\n\t"
330 "movel %2,%0@+\n\t"
331 "movel %2,%0@+\n\t"
332 "dbra %1,1b\n\t"
333 : "=a" (s), "=d" (tmp)
334 : "d" (data), "0" (s), "1" (count / 32 - 1)
335 );
336#endif
337
338 return xs;
339}
340
341extern void *memset(void *,int,__kernel_size_t);
342
343#define __memset_const(s,c,count) \
344((count==PAGE_SIZE) ? \
345 __memset_page((s),(c),(count)) : \
346 __memset_g((s),(c),(count)))
347
348#define memset(s, c, count) \
349(__builtin_constant_p(count) ? \
350 __memset_const((s),(c),(count)) : \
351 __memset_g((s),(c),(count)))
352 141
353#define __HAVE_ARCH_MEMCPY 142#define __HAVE_ARCH_MEMCPY
354extern void * memcpy(void *, const void *, size_t ); 143extern void *memcpy(void *, const void *, __kernel_size_t);
355/* 144#define memcpy(d, s, n) __builtin_memcpy(d, s, n)
356 * __builtin_memcpy() does not handle page-sized memcpys very well,
357 * thus following the same assumptions as for page-sized memsets, this
358 * function copies page-sized areas using an unrolled loop, without
359 * considering alignment.
360 *
361 * For the 680[46]0 only kernels we use the move16 instruction instead
362 * as it writes through the data-cache, invalidating the cache-lines
363 * touched. In this way we do not use up the entire data-cache (well,
364 * half of it on the 68060) by copying a page. An unrolled loop of two
365 * move16 instructions seem to the fastest. The only caveat is that
366 * both source and destination must be 16-byte aligned, if not we fall
367 * back to the generic memcpy function. - Jes
368 */
369static inline void * __memcpy_page(void * to, const void * from, size_t count)
370{
371 unsigned long tmp;
372 void *xto = to;
373
374#ifdef CPU_M68040_OR_M68060_ONLY
375
376 if (((unsigned long) to | (unsigned long) from) & 0x0f)
377 return memcpy(to, from, count);
378
379 __asm__ __volatile__("1:\t"
380 ".chip 68040\n\t"
381 "move16 %1@+,%0@+\n\t"
382 "move16 %1@+,%0@+\n\t"
383 ".chip 68k\n\t"
384 "dbra %2,1b\n\t"
385 : "=a" (to), "=a" (from), "=d" (tmp)
386 : "0" (to), "1" (from) , "2" (count / 32 - 1)
387 );
388#else
389 __asm__ __volatile__("1:\t"
390 "movel %1@+,%0@+\n\t"
391 "movel %1@+,%0@+\n\t"
392 "movel %1@+,%0@+\n\t"
393 "movel %1@+,%0@+\n\t"
394 "movel %1@+,%0@+\n\t"
395 "movel %1@+,%0@+\n\t"
396 "movel %1@+,%0@+\n\t"
397 "movel %1@+,%0@+\n\t"
398 "dbra %2,1b\n\t"
399 : "=a" (to), "=a" (from), "=d" (tmp)
400 : "0" (to), "1" (from) , "2" (count / 32 - 1)
401 );
402#endif
403 return xto;
404}
405
406#define __memcpy_const(to, from, n) \
407((n==PAGE_SIZE) ? \
408 __memcpy_page((to),(from),(n)) : \
409 __builtin_memcpy((to),(from),(n)))
410
411#define memcpy(to, from, n) \
412(__builtin_constant_p(n) ? \
413 __memcpy_const((to),(from),(n)) : \
414 memcpy((to),(from),(n)))
415 145
416#define __HAVE_ARCH_MEMMOVE 146#define __HAVE_ARCH_MEMMOVE
417static inline void * memmove(void * dest,const void * src, size_t n) 147extern void *memmove(void *, const void *, __kernel_size_t);
418{
419 void *xdest = dest;
420 size_t temp;
421
422 if (!n)
423 return xdest;
424
425 if (dest < src)
426 {
427 if ((long) dest & 1)
428 {
429 char *cdest = dest;
430 const char *csrc = src;
431 *cdest++ = *csrc++;
432 dest = cdest;
433 src = csrc;
434 n--;
435 }
436 if (n > 2 && (long) dest & 2)
437 {
438 short *sdest = dest;
439 const short *ssrc = src;
440 *sdest++ = *ssrc++;
441 dest = sdest;
442 src = ssrc;
443 n -= 2;
444 }
445 temp = n >> 2;
446 if (temp)
447 {
448 long *ldest = dest;
449 const long *lsrc = src;
450 temp--;
451 do
452 *ldest++ = *lsrc++;
453 while (temp--);
454 dest = ldest;
455 src = lsrc;
456 }
457 if (n & 2)
458 {
459 short *sdest = dest;
460 const short *ssrc = src;
461 *sdest++ = *ssrc++;
462 dest = sdest;
463 src = ssrc;
464 }
465 if (n & 1)
466 {
467 char *cdest = dest;
468 const char *csrc = src;
469 *cdest = *csrc;
470 }
471 }
472 else
473 {
474 dest = (char *) dest + n;
475 src = (const char *) src + n;
476 if ((long) dest & 1)
477 {
478 char *cdest = dest;
479 const char *csrc = src;
480 *--cdest = *--csrc;
481 dest = cdest;
482 src = csrc;
483 n--;
484 }
485 if (n > 2 && (long) dest & 2)
486 {
487 short *sdest = dest;
488 const short *ssrc = src;
489 *--sdest = *--ssrc;
490 dest = sdest;
491 src = ssrc;
492 n -= 2;
493 }
494 temp = n >> 2;
495 if (temp)
496 {
497 long *ldest = dest;
498 const long *lsrc = src;
499 temp--;
500 do
501 *--ldest = *--lsrc;
502 while (temp--);
503 dest = ldest;
504 src = lsrc;
505 }
506 if (n & 2)
507 {
508 short *sdest = dest;
509 const short *ssrc = src;
510 *--sdest = *--ssrc;
511 dest = sdest;
512 src = ssrc;
513 }
514 if (n & 1)
515 {
516 char *cdest = dest;
517 const char *csrc = src;
518 *--cdest = *--csrc;
519 }
520 }
521 return xdest;
522}
523 148
524#define __HAVE_ARCH_MEMCMP 149#define __HAVE_ARCH_MEMCMP
525extern int memcmp(const void * ,const void * ,size_t ); 150extern int memcmp(const void *, const void *, __kernel_size_t);
526#define memcmp(cs, ct, n) \ 151#define memcmp(d, s, n) __builtin_memcmp(d, s, n)
527(__builtin_constant_p(n) ? \
528 __builtin_memcmp((cs),(ct),(n)) : \
529 memcmp((cs),(ct),(n)))
530
531#define __HAVE_ARCH_MEMCHR
532static inline void *memchr(const void *cs, int c, size_t count)
533{
534 /* Someone else can optimize this, I don't care - tonym@mac.linux-m68k.org */
535 unsigned char *ret = (unsigned char *)cs;
536 for(;count>0;count--,ret++)
537 if(*ret == c) return ret;
538
539 return NULL;
540}
541 152
542#endif /* _M68K_STRING_H_ */ 153#endif /* _M68K_STRING_H_ */