diff options
Diffstat (limited to 'arch/sh/lib64/memset.S')
-rw-r--r-- | arch/sh/lib64/memset.S | 91 |
1 files changed, 91 insertions, 0 deletions
diff --git a/arch/sh/lib64/memset.S b/arch/sh/lib64/memset.S new file mode 100644 index 000000000000..2d37b0488552 --- /dev/null +++ b/arch/sh/lib64/memset.S | |||
@@ -0,0 +1,91 @@ | |||
1 | /* Cloned and hacked for uClibc by Paul Mundt, December 2003 */ | ||
2 | /* Modified by SuperH, Inc. September 2003 */ | ||
3 | ! | ||
4 | ! Fast SH memset | ||
5 | ! | ||
6 | ! by Toshiyasu Morita (tm@netcom.com) | ||
7 | ! | ||
8 | ! SH5 code by J"orn Rennecke (joern.rennecke@superh.com) | ||
9 | ! Copyright 2002 SuperH Ltd. | ||
10 | ! | ||
11 | |||
12 | #if __BYTE_ORDER == __LITTLE_ENDIAN | ||
13 | #define SHHI shlld | ||
14 | #define SHLO shlrd | ||
15 | #else | ||
16 | #define SHHI shlrd | ||
17 | #define SHLO shlld | ||
18 | #endif | ||
19 | |||
20 | .section .text..SHmedia32,"ax" | ||
21 | .globl memset | ||
22 | .type memset, @function | ||
23 | |||
24 | .align 5 | ||
25 | |||
26 | memset: | ||
27 | pta/l multiquad, tr0 | ||
28 | andi r2, 7, r22 | ||
29 | ptabs r18, tr2 | ||
30 | mshflo.b r3,r3,r3 | ||
31 | add r4, r22, r23 | ||
32 | mperm.w r3, r63, r3 // Fill pattern now in every byte of r3 | ||
33 | |||
34 | movi 8, r9 | ||
35 | bgtu/u r23, r9, tr0 // multiquad | ||
36 | |||
37 | beqi/u r4, 0, tr2 // Return with size 0 - ensures no mem accesses | ||
38 | ldlo.q r2, 0, r7 | ||
39 | shlli r4, 2, r4 | ||
40 | movi -1, r8 | ||
41 | SHHI r8, r4, r8 | ||
42 | SHHI r8, r4, r8 | ||
43 | mcmv r7, r8, r3 | ||
44 | stlo.q r2, 0, r3 | ||
45 | blink tr2, r63 | ||
46 | |||
47 | multiquad: | ||
48 | pta/l lastquad, tr0 | ||
49 | stlo.q r2, 0, r3 | ||
50 | shlri r23, 3, r24 | ||
51 | add r2, r4, r5 | ||
52 | beqi/u r24, 1, tr0 // lastquad | ||
53 | pta/l loop, tr1 | ||
54 | sub r2, r22, r25 | ||
55 | andi r5, -8, r20 // calculate end address and | ||
56 | addi r20, -7*8, r8 // loop end address; This might overflow, so we need | ||
57 | // to use a different test before we start the loop | ||
58 | bge/u r24, r9, tr1 // loop | ||
59 | st.q r25, 8, r3 | ||
60 | st.q r20, -8, r3 | ||
61 | shlri r24, 1, r24 | ||
62 | beqi/u r24, 1, tr0 // lastquad | ||
63 | st.q r25, 16, r3 | ||
64 | st.q r20, -16, r3 | ||
65 | beqi/u r24, 2, tr0 // lastquad | ||
66 | st.q r25, 24, r3 | ||
67 | st.q r20, -24, r3 | ||
68 | lastquad: | ||
69 | sthi.q r5, -1, r3 | ||
70 | blink tr2,r63 | ||
71 | |||
72 | loop: | ||
73 | !!! alloco r25, 32 // QQQ comment out for short-term fix to SHUK #3895. | ||
74 | // QQQ commenting out is locically correct, but sub-optimal | ||
75 | // QQQ Sean McGoogan - 4th April 2003. | ||
76 | st.q r25, 8, r3 | ||
77 | st.q r25, 16, r3 | ||
78 | st.q r25, 24, r3 | ||
79 | st.q r25, 32, r3 | ||
80 | addi r25, 32, r25 | ||
81 | bgeu/l r8, r25, tr1 // loop | ||
82 | |||
83 | st.q r20, -40, r3 | ||
84 | st.q r20, -32, r3 | ||
85 | st.q r20, -24, r3 | ||
86 | st.q r20, -16, r3 | ||
87 | st.q r20, -8, r3 | ||
88 | sthi.q r5, -1, r3 | ||
89 | blink tr2,r63 | ||
90 | |||
91 | .size memset,.-memset | ||