diff options
Diffstat (limited to 'arch/arc/lib')
-rw-r--r-- | arch/arc/lib/memcmp.S | 124 | ||||
-rw-r--r-- | arch/arc/lib/memcpy-700.S | 66 | ||||
-rw-r--r-- | arch/arc/lib/memset.S | 59 | ||||
-rw-r--r-- | arch/arc/lib/strchr-700.S | 123 | ||||
-rw-r--r-- | arch/arc/lib/strcmp.S | 96 | ||||
-rw-r--r-- | arch/arc/lib/strcpy-700.S | 70 | ||||
-rw-r--r-- | arch/arc/lib/strlen.S | 83 |
7 files changed, 621 insertions, 0 deletions
diff --git a/arch/arc/lib/memcmp.S b/arch/arc/lib/memcmp.S new file mode 100644 index 000000000000..bc813d55b6c3 --- /dev/null +++ b/arch/arc/lib/memcmp.S | |||
@@ -0,0 +1,124 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | */ | ||
8 | |||
9 | #include <asm/linkage.h> | ||
10 | |||
11 | #ifdef __LITTLE_ENDIAN__ | ||
12 | #define WORD2 r2 | ||
13 | #define SHIFT r3 | ||
14 | #else /* BIG ENDIAN */ | ||
15 | #define WORD2 r3 | ||
16 | #define SHIFT r2 | ||
17 | #endif | ||
18 | |||
19 | ARC_ENTRY memcmp | ||
20 | or r12,r0,r1 | ||
21 | asl_s r12,r12,30 | ||
22 | sub r3,r2,1 | ||
23 | brls r2,r12,.Lbytewise | ||
24 | ld r4,[r0,0] | ||
25 | ld r5,[r1,0] | ||
26 | lsr.f lp_count,r3,3 | ||
27 | lpne .Loop_end | ||
28 | ld_s WORD2,[r0,4] | ||
29 | ld_s r12,[r1,4] | ||
30 | brne r4,r5,.Leven | ||
31 | ld.a r4,[r0,8] | ||
32 | ld.a r5,[r1,8] | ||
33 | brne WORD2,r12,.Lodd | ||
34 | .Loop_end: | ||
35 | asl_s SHIFT,SHIFT,3 | ||
36 | bhs_s .Last_cmp | ||
37 | brne r4,r5,.Leven | ||
38 | ld r4,[r0,4] | ||
39 | ld r5,[r1,4] | ||
40 | #ifdef __LITTLE_ENDIAN__ | ||
41 | nop_s | ||
42 | ; one more load latency cycle | ||
43 | .Last_cmp: | ||
44 | xor r0,r4,r5 | ||
45 | bset r0,r0,SHIFT | ||
46 | sub_s r1,r0,1 | ||
47 | bic_s r1,r1,r0 | ||
48 | norm r1,r1 | ||
49 | b.d .Leven_cmp | ||
50 | and r1,r1,24 | ||
51 | .Leven: | ||
52 | xor r0,r4,r5 | ||
53 | sub_s r1,r0,1 | ||
54 | bic_s r1,r1,r0 | ||
55 | norm r1,r1 | ||
56 | ; slow track insn | ||
57 | and r1,r1,24 | ||
58 | .Leven_cmp: | ||
59 | asl r2,r4,r1 | ||
60 | asl r12,r5,r1 | ||
61 | lsr_s r2,r2,1 | ||
62 | lsr_s r12,r12,1 | ||
63 | j_s.d [blink] | ||
64 | sub r0,r2,r12 | ||
65 | .balign 4 | ||
66 | .Lodd: | ||
67 | xor r0,WORD2,r12 | ||
68 | sub_s r1,r0,1 | ||
69 | bic_s r1,r1,r0 | ||
70 | norm r1,r1 | ||
71 | ; slow track insn | ||
72 | and r1,r1,24 | ||
73 | asl_s r2,r2,r1 | ||
74 | asl_s r12,r12,r1 | ||
75 | lsr_s r2,r2,1 | ||
76 | lsr_s r12,r12,1 | ||
77 | j_s.d [blink] | ||
78 | sub r0,r2,r12 | ||
79 | #else /* BIG ENDIAN */ | ||
80 | .Last_cmp: | ||
81 | neg_s SHIFT,SHIFT | ||
82 | lsr r4,r4,SHIFT | ||
83 | lsr r5,r5,SHIFT | ||
84 | ; slow track insn | ||
85 | .Leven: | ||
86 | sub.f r0,r4,r5 | ||
87 | mov.ne r0,1 | ||
88 | j_s.d [blink] | ||
89 | bset.cs r0,r0,31 | ||
90 | .Lodd: | ||
91 | cmp_s WORD2,r12 | ||
92 | |||
93 | mov_s r0,1 | ||
94 | j_s.d [blink] | ||
95 | bset.cs r0,r0,31 | ||
96 | #endif /* ENDIAN */ | ||
97 | .balign 4 | ||
98 | .Lbytewise: | ||
99 | breq r2,0,.Lnil | ||
100 | ldb r4,[r0,0] | ||
101 | ldb r5,[r1,0] | ||
102 | lsr.f lp_count,r3 | ||
103 | lpne .Lbyte_end | ||
104 | ldb_s r3,[r0,1] | ||
105 | ldb r12,[r1,1] | ||
106 | brne r4,r5,.Lbyte_even | ||
107 | ldb.a r4,[r0,2] | ||
108 | ldb.a r5,[r1,2] | ||
109 | brne r3,r12,.Lbyte_odd | ||
110 | .Lbyte_end: | ||
111 | bcc .Lbyte_even | ||
112 | brne r4,r5,.Lbyte_even | ||
113 | ldb_s r3,[r0,1] | ||
114 | ldb_s r12,[r1,1] | ||
115 | .Lbyte_odd: | ||
116 | j_s.d [blink] | ||
117 | sub r0,r3,r12 | ||
118 | .Lbyte_even: | ||
119 | j_s.d [blink] | ||
120 | sub r0,r4,r5 | ||
121 | .Lnil: | ||
122 | j_s.d [blink] | ||
123 | mov r0,0 | ||
124 | ARC_EXIT memcmp | ||
diff --git a/arch/arc/lib/memcpy-700.S b/arch/arc/lib/memcpy-700.S new file mode 100644 index 000000000000..b64cc10ac918 --- /dev/null +++ b/arch/arc/lib/memcpy-700.S | |||
@@ -0,0 +1,66 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | */ | ||
8 | |||
9 | #include <asm/linkage.h> | ||
10 | |||
11 | ARC_ENTRY memcpy | ||
12 | or r3,r0,r1 | ||
13 | asl_s r3,r3,30 | ||
14 | mov_s r5,r0 | ||
15 | brls.d r2,r3,.Lcopy_bytewise | ||
16 | sub.f r3,r2,1 | ||
17 | ld_s r12,[r1,0] | ||
18 | asr.f lp_count,r3,3 | ||
19 | bbit0.d r3,2,.Lnox4 | ||
20 | bmsk_s r2,r2,1 | ||
21 | st.ab r12,[r5,4] | ||
22 | ld.a r12,[r1,4] | ||
23 | .Lnox4: | ||
24 | lppnz .Lendloop | ||
25 | ld_s r3,[r1,4] | ||
26 | st.ab r12,[r5,4] | ||
27 | ld.a r12,[r1,8] | ||
28 | st.ab r3,[r5,4] | ||
29 | .Lendloop: | ||
30 | breq r2,0,.Last_store | ||
31 | ld r3,[r5,0] | ||
32 | #ifdef __LITTLE_ENDIAN__ | ||
33 | add3 r2,-1,r2 | ||
34 | ; uses long immediate | ||
35 | xor_s r12,r12,r3 | ||
36 | bmsk r12,r12,r2 | ||
37 | xor_s r12,r12,r3 | ||
38 | #else /* BIG ENDIAN */ | ||
39 | sub3 r2,31,r2 | ||
40 | ; uses long immediate | ||
41 | xor_s r3,r3,r12 | ||
42 | bmsk r3,r3,r2 | ||
43 | xor_s r12,r12,r3 | ||
44 | #endif /* ENDIAN */ | ||
45 | .Last_store: | ||
46 | j_s.d [blink] | ||
47 | st r12,[r5,0] | ||
48 | |||
49 | .balign 4 | ||
50 | .Lcopy_bytewise: | ||
51 | jcs [blink] | ||
52 | ldb_s r12,[r1,0] | ||
53 | lsr.f lp_count,r3 | ||
54 | bhs_s .Lnox1 | ||
55 | stb.ab r12,[r5,1] | ||
56 | ldb.a r12,[r1,1] | ||
57 | .Lnox1: | ||
58 | lppnz .Lendbloop | ||
59 | ldb_s r3,[r1,1] | ||
60 | stb.ab r12,[r5,1] | ||
61 | ldb.a r12,[r1,2] | ||
62 | stb.ab r3,[r5,1] | ||
63 | .Lendbloop: | ||
64 | j_s.d [blink] | ||
65 | stb r12,[r5,0] | ||
66 | ARC_EXIT memcpy | ||
diff --git a/arch/arc/lib/memset.S b/arch/arc/lib/memset.S new file mode 100644 index 000000000000..9b2d88d2e141 --- /dev/null +++ b/arch/arc/lib/memset.S | |||
@@ -0,0 +1,59 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | */ | ||
8 | |||
9 | #include <asm/linkage.h> | ||
10 | |||
11 | #define SMALL 7 /* Must be at least 6 to deal with alignment/loop issues. */ | ||
12 | |||
13 | ARC_ENTRY memset | ||
14 | mov_s r4,r0 | ||
15 | or r12,r0,r2 | ||
16 | bmsk.f r12,r12,1 | ||
17 | extb_s r1,r1 | ||
18 | asl r3,r1,8 | ||
19 | beq.d .Laligned | ||
20 | or_s r1,r1,r3 | ||
21 | brls r2,SMALL,.Ltiny | ||
22 | add r3,r2,r0 | ||
23 | stb r1,[r3,-1] | ||
24 | bclr_s r3,r3,0 | ||
25 | stw r1,[r3,-2] | ||
26 | bmsk.f r12,r0,1 | ||
27 | add_s r2,r2,r12 | ||
28 | sub.ne r2,r2,4 | ||
29 | stb.ab r1,[r4,1] | ||
30 | and r4,r4,-2 | ||
31 | stw.ab r1,[r4,2] | ||
32 | and r4,r4,-4 | ||
33 | .Laligned: ; This code address should be aligned for speed. | ||
34 | asl r3,r1,16 | ||
35 | lsr.f lp_count,r2,2 | ||
36 | or_s r1,r1,r3 | ||
37 | lpne .Loop_end | ||
38 | st.ab r1,[r4,4] | ||
39 | .Loop_end: | ||
40 | j_s [blink] | ||
41 | |||
42 | .balign 4 | ||
43 | .Ltiny: | ||
44 | mov.f lp_count,r2 | ||
45 | lpne .Ltiny_end | ||
46 | stb.ab r1,[r4,1] | ||
47 | .Ltiny_end: | ||
48 | j_s [blink] | ||
49 | ARC_EXIT memset | ||
50 | |||
51 | ; memzero: @r0 = mem, @r1 = size_t | ||
52 | ; memset: @r0 = mem, @r1 = char, @r2 = size_t | ||
53 | |||
54 | ARC_ENTRY memzero | ||
55 | ; adjust bzero args to memset args | ||
56 | mov r2, r1 | ||
57 | mov r1, 0 | ||
58 | b memset ;tail call so need to tinker with blink | ||
59 | ARC_EXIT memzero | ||
diff --git a/arch/arc/lib/strchr-700.S b/arch/arc/lib/strchr-700.S new file mode 100644 index 000000000000..99c10475d477 --- /dev/null +++ b/arch/arc/lib/strchr-700.S | |||
@@ -0,0 +1,123 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | */ | ||
8 | |||
9 | /* ARC700 has a relatively long pipeline and branch prediction, so we want | ||
10 | to avoid branches that are hard to predict. On the other hand, the | ||
11 | presence of the norm instruction makes it easier to operate on whole | ||
12 | words branch-free. */ | ||
13 | |||
14 | #include <asm/linkage.h> | ||
15 | |||
16 | ARC_ENTRY strchr | ||
17 | extb_s r1,r1 | ||
18 | asl r5,r1,8 | ||
19 | bmsk r2,r0,1 | ||
20 | or r5,r5,r1 | ||
21 | mov_s r3,0x01010101 | ||
22 | breq.d r2,r0,.Laligned | ||
23 | asl r4,r5,16 | ||
24 | sub_s r0,r0,r2 | ||
25 | asl r7,r2,3 | ||
26 | ld_s r2,[r0] | ||
27 | #ifdef __LITTLE_ENDIAN__ | ||
28 | asl r7,r3,r7 | ||
29 | #else | ||
30 | lsr r7,r3,r7 | ||
31 | #endif | ||
32 | or r5,r5,r4 | ||
33 | ror r4,r3 | ||
34 | sub r12,r2,r7 | ||
35 | bic_s r12,r12,r2 | ||
36 | and r12,r12,r4 | ||
37 | brne.d r12,0,.Lfound0_ua | ||
38 | xor r6,r2,r5 | ||
39 | ld.a r2,[r0,4] | ||
40 | sub r12,r6,r7 | ||
41 | bic r12,r12,r6 | ||
42 | and r7,r12,r4 | ||
43 | breq r7,0,.Loop ; For speed, we want this branch to be unaligned. | ||
44 | b .Lfound_char ; Likewise this one. | ||
45 | ; /* We require this code address to be unaligned for speed... */ | ||
46 | .Laligned: | ||
47 | ld_s r2,[r0] | ||
48 | or r5,r5,r4 | ||
49 | ror r4,r3 | ||
50 | ; /* ... so that this code address is aligned, for itself and ... */ | ||
51 | .Loop: | ||
52 | sub r12,r2,r3 | ||
53 | bic_s r12,r12,r2 | ||
54 | and r12,r12,r4 | ||
55 | brne.d r12,0,.Lfound0 | ||
56 | xor r6,r2,r5 | ||
57 | ld.a r2,[r0,4] | ||
58 | sub r12,r6,r3 | ||
59 | bic r12,r12,r6 | ||
60 | and r7,r12,r4 | ||
61 | breq r7,0,.Loop /* ... so that this branch is unaligned. */ | ||
62 | ; Found searched-for character. r0 has already advanced to next word. | ||
63 | #ifdef __LITTLE_ENDIAN__ | ||
64 | /* We only need the information about the first matching byte | ||
65 | (i.e. the least significant matching byte) to be exact, | ||
66 | hence there is no problem with carry effects. */ | ||
67 | .Lfound_char: | ||
68 | sub r3,r7,1 | ||
69 | bic r3,r3,r7 | ||
70 | norm r2,r3 | ||
71 | sub_s r0,r0,1 | ||
72 | asr_s r2,r2,3 | ||
73 | j.d [blink] | ||
74 | sub_s r0,r0,r2 | ||
75 | |||
76 | .balign 4 | ||
77 | .Lfound0_ua: | ||
78 | mov r3,r7 | ||
79 | .Lfound0: | ||
80 | sub r3,r6,r3 | ||
81 | bic r3,r3,r6 | ||
82 | and r2,r3,r4 | ||
83 | or_s r12,r12,r2 | ||
84 | sub_s r3,r12,1 | ||
85 | bic_s r3,r3,r12 | ||
86 | norm r3,r3 | ||
87 | add_s r0,r0,3 | ||
88 | asr_s r12,r3,3 | ||
89 | asl.f 0,r2,r3 | ||
90 | sub_s r0,r0,r12 | ||
91 | j_s.d [blink] | ||
92 | mov.pl r0,0 | ||
93 | #else /* BIG ENDIAN */ | ||
94 | .Lfound_char: | ||
95 | lsr r7,r7,7 | ||
96 | |||
97 | bic r2,r7,r6 | ||
98 | norm r2,r2 | ||
99 | sub_s r0,r0,4 | ||
100 | asr_s r2,r2,3 | ||
101 | j.d [blink] | ||
102 | add_s r0,r0,r2 | ||
103 | |||
104 | .Lfound0_ua: | ||
105 | mov_s r3,r7 | ||
106 | .Lfound0: | ||
107 | asl_s r2,r2,7 | ||
108 | or r7,r6,r4 | ||
109 | bic_s r12,r12,r2 | ||
110 | sub r2,r7,r3 | ||
111 | or r2,r2,r6 | ||
112 | bic r12,r2,r12 | ||
113 | bic.f r3,r4,r12 | ||
114 | norm r3,r3 | ||
115 | |||
116 | add.pl r3,r3,1 | ||
117 | asr_s r12,r3,3 | ||
118 | asl.f 0,r2,r3 | ||
119 | add_s r0,r0,r12 | ||
120 | j_s.d [blink] | ||
121 | mov.mi r0,0 | ||
122 | #endif /* ENDIAN */ | ||
123 | ARC_EXIT strchr | ||
diff --git a/arch/arc/lib/strcmp.S b/arch/arc/lib/strcmp.S new file mode 100644 index 000000000000..5dc802b45cf3 --- /dev/null +++ b/arch/arc/lib/strcmp.S | |||
@@ -0,0 +1,96 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | */ | ||
8 | |||
9 | /* This is optimized primarily for the ARC700. | ||
10 | It would be possible to speed up the loops by one cycle / word | ||
11 | respective one cycle / byte by forcing double source 1 alignment, unrolling | ||
12 | by a factor of two, and speculatively loading the second word / byte of | ||
13 | source 1; however, that would increase the overhead for loop setup / finish, | ||
14 | and strcmp might often terminate early. */ | ||
15 | |||
16 | #include <asm/linkage.h> | ||
17 | |||
18 | ARC_ENTRY strcmp | ||
19 | or r2,r0,r1 | ||
20 | bmsk_s r2,r2,1 | ||
21 | brne r2,0,.Lcharloop | ||
22 | mov_s r12,0x01010101 | ||
23 | ror r5,r12 | ||
24 | .Lwordloop: | ||
25 | ld.ab r2,[r0,4] | ||
26 | ld.ab r3,[r1,4] | ||
27 | nop_s | ||
28 | sub r4,r2,r12 | ||
29 | bic r4,r4,r2 | ||
30 | and r4,r4,r5 | ||
31 | brne r4,0,.Lfound0 | ||
32 | breq r2,r3,.Lwordloop | ||
33 | #ifdef __LITTLE_ENDIAN__ | ||
34 | xor r0,r2,r3 ; mask for difference | ||
35 | sub_s r1,r0,1 | ||
36 | bic_s r0,r0,r1 ; mask for least significant difference bit | ||
37 | sub r1,r5,r0 | ||
38 | xor r0,r5,r1 ; mask for least significant difference byte | ||
39 | and_s r2,r2,r0 | ||
40 | and_s r3,r3,r0 | ||
41 | #endif /* LITTLE ENDIAN */ | ||
42 | cmp_s r2,r3 | ||
43 | mov_s r0,1 | ||
44 | j_s.d [blink] | ||
45 | bset.lo r0,r0,31 | ||
46 | |||
47 | .balign 4 | ||
48 | #ifdef __LITTLE_ENDIAN__ | ||
49 | .Lfound0: | ||
50 | xor r0,r2,r3 ; mask for difference | ||
51 | or r0,r0,r4 ; or in zero indicator | ||
52 | sub_s r1,r0,1 | ||
53 | bic_s r0,r0,r1 ; mask for least significant difference bit | ||
54 | sub r1,r5,r0 | ||
55 | xor r0,r5,r1 ; mask for least significant difference byte | ||
56 | and_s r2,r2,r0 | ||
57 | and_s r3,r3,r0 | ||
58 | sub.f r0,r2,r3 | ||
59 | mov.hi r0,1 | ||
60 | j_s.d [blink] | ||
61 | bset.lo r0,r0,31 | ||
62 | #else /* BIG ENDIAN */ | ||
63 | /* The zero-detection above can mis-detect 0x01 bytes as zeroes | ||
64 | because of carry-propagateion from a lower significant zero byte. | ||
65 | We can compensate for this by checking that bit0 is zero. | ||
66 | This compensation is not necessary in the step where we | ||
67 | get a low estimate for r2, because in any affected bytes | ||
68 | we already have 0x00 or 0x01, which will remain unchanged | ||
69 | when bit 7 is cleared. */ | ||
70 | .balign 4 | ||
71 | .Lfound0: | ||
72 | lsr r0,r4,8 | ||
73 | lsr_s r1,r2 | ||
74 | bic_s r2,r2,r0 ; get low estimate for r2 and get ... | ||
75 | bic_s r0,r0,r1 ; <this is the adjusted mask for zeros> | ||
76 | or_s r3,r3,r0 ; ... high estimate r3 so that r2 > r3 will ... | ||
77 | cmp_s r3,r2 ; ... be independent of trailing garbage | ||
78 | or_s r2,r2,r0 ; likewise for r3 > r2 | ||
79 | bic_s r3,r3,r0 | ||
80 | rlc r0,0 ; r0 := r2 > r3 ? 1 : 0 | ||
81 | cmp_s r2,r3 | ||
82 | j_s.d [blink] | ||
83 | bset.lo r0,r0,31 | ||
84 | #endif /* ENDIAN */ | ||
85 | |||
86 | .balign 4 | ||
87 | .Lcharloop: | ||
88 | ldb.ab r2,[r0,1] | ||
89 | ldb.ab r3,[r1,1] | ||
90 | nop_s | ||
91 | breq r2,0,.Lcmpend | ||
92 | breq r2,r3,.Lcharloop | ||
93 | .Lcmpend: | ||
94 | j_s.d [blink] | ||
95 | sub r0,r2,r3 | ||
96 | ARC_EXIT strcmp | ||
diff --git a/arch/arc/lib/strcpy-700.S b/arch/arc/lib/strcpy-700.S new file mode 100644 index 000000000000..b7ca4ae81d88 --- /dev/null +++ b/arch/arc/lib/strcpy-700.S | |||
@@ -0,0 +1,70 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | */ | ||
8 | |||
9 | /* If dst and src are 4 byte aligned, copy 8 bytes at a time. | ||
10 | If the src is 4, but not 8 byte aligned, we first read 4 bytes to get | ||
11 | it 8 byte aligned. Thus, we can do a little read-ahead, without | ||
12 | dereferencing a cache line that we should not touch. | ||
13 | Note that short and long instructions have been scheduled to avoid | ||
14 | branch stalls. | ||
15 | The beq_s to r3z could be made unaligned & long to avoid a stall | ||
16 | there, but the it is not likely to be taken often, and it | ||
17 | would also be likey to cost an unaligned mispredict at the next call. */ | ||
18 | |||
19 | #include <asm/linkage.h> | ||
20 | |||
21 | ARC_ENTRY strcpy | ||
22 | or r2,r0,r1 | ||
23 | bmsk_s r2,r2,1 | ||
24 | brne.d r2,0,charloop | ||
25 | mov_s r10,r0 | ||
26 | ld_s r3,[r1,0] | ||
27 | mov r8,0x01010101 | ||
28 | bbit0.d r1,2,loop_start | ||
29 | ror r12,r8 | ||
30 | sub r2,r3,r8 | ||
31 | bic_s r2,r2,r3 | ||
32 | tst_s r2,r12 | ||
33 | bne r3z | ||
34 | mov_s r4,r3 | ||
35 | .balign 4 | ||
36 | loop: | ||
37 | ld.a r3,[r1,4] | ||
38 | st.ab r4,[r10,4] | ||
39 | loop_start: | ||
40 | ld.a r4,[r1,4] | ||
41 | sub r2,r3,r8 | ||
42 | bic_s r2,r2,r3 | ||
43 | tst_s r2,r12 | ||
44 | bne_s r3z | ||
45 | st.ab r3,[r10,4] | ||
46 | sub r2,r4,r8 | ||
47 | bic r2,r2,r4 | ||
48 | tst r2,r12 | ||
49 | beq loop | ||
50 | mov_s r3,r4 | ||
51 | #ifdef __LITTLE_ENDIAN__ | ||
52 | r3z: bmsk.f r1,r3,7 | ||
53 | lsr_s r3,r3,8 | ||
54 | #else | ||
55 | r3z: lsr.f r1,r3,24 | ||
56 | asl_s r3,r3,8 | ||
57 | #endif | ||
58 | bne.d r3z | ||
59 | stb.ab r1,[r10,1] | ||
60 | j_s [blink] | ||
61 | |||
62 | .balign 4 | ||
63 | charloop: | ||
64 | ldb.ab r3,[r1,1] | ||
65 | |||
66 | |||
67 | brne.d r3,0,charloop | ||
68 | stb.ab r3,[r10,1] | ||
69 | j [blink] | ||
70 | ARC_EXIT strcpy | ||
diff --git a/arch/arc/lib/strlen.S b/arch/arc/lib/strlen.S new file mode 100644 index 000000000000..39759e099696 --- /dev/null +++ b/arch/arc/lib/strlen.S | |||
@@ -0,0 +1,83 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | */ | ||
8 | |||
9 | #include <asm/linkage.h> | ||
10 | |||
11 | ARC_ENTRY strlen | ||
12 | or r3,r0,7 | ||
13 | ld r2,[r3,-7] | ||
14 | ld.a r6,[r3,-3] | ||
15 | mov r4,0x01010101 | ||
16 | ; uses long immediate | ||
17 | #ifdef __LITTLE_ENDIAN__ | ||
18 | asl_s r1,r0,3 | ||
19 | btst_s r0,2 | ||
20 | asl r7,r4,r1 | ||
21 | ror r5,r4 | ||
22 | sub r1,r2,r7 | ||
23 | bic_s r1,r1,r2 | ||
24 | mov.eq r7,r4 | ||
25 | sub r12,r6,r7 | ||
26 | bic r12,r12,r6 | ||
27 | or.eq r12,r12,r1 | ||
28 | and r12,r12,r5 | ||
29 | brne r12,0,.Learly_end | ||
30 | #else /* BIG ENDIAN */ | ||
31 | ror r5,r4 | ||
32 | btst_s r0,2 | ||
33 | mov_s r1,31 | ||
34 | sub3 r7,r1,r0 | ||
35 | sub r1,r2,r4 | ||
36 | bic_s r1,r1,r2 | ||
37 | bmsk r1,r1,r7 | ||
38 | sub r12,r6,r4 | ||
39 | bic r12,r12,r6 | ||
40 | bmsk.ne r12,r12,r7 | ||
41 | or.eq r12,r12,r1 | ||
42 | and r12,r12,r5 | ||
43 | brne r12,0,.Learly_end | ||
44 | #endif /* ENDIAN */ | ||
45 | |||
46 | .Loop: | ||
47 | ld_s r2,[r3,4] | ||
48 | ld.a r6,[r3,8] | ||
49 | ; stall for load result | ||
50 | sub r1,r2,r4 | ||
51 | bic_s r1,r1,r2 | ||
52 | sub r12,r6,r4 | ||
53 | bic r12,r12,r6 | ||
54 | or r12,r12,r1 | ||
55 | and r12,r12,r5 | ||
56 | breq r12,0,.Loop | ||
57 | .Lend: | ||
58 | and.f r1,r1,r5 | ||
59 | sub.ne r3,r3,4 | ||
60 | mov.eq r1,r12 | ||
61 | #ifdef __LITTLE_ENDIAN__ | ||
62 | sub_s r2,r1,1 | ||
63 | bic_s r2,r2,r1 | ||
64 | norm r1,r2 | ||
65 | sub_s r0,r0,3 | ||
66 | lsr_s r1,r1,3 | ||
67 | sub r0,r3,r0 | ||
68 | j_s.d [blink] | ||
69 | sub r0,r0,r1 | ||
70 | #else /* BIG ENDIAN */ | ||
71 | lsr_s r1,r1,7 | ||
72 | mov.eq r2,r6 | ||
73 | bic_s r1,r1,r2 | ||
74 | norm r1,r1 | ||
75 | sub r0,r3,r0 | ||
76 | lsr_s r1,r1,3 | ||
77 | j_s.d [blink] | ||
78 | add r0,r0,r1 | ||
79 | #endif /* ENDIAN */ | ||
80 | .Learly_end: | ||
81 | b.d .Lend | ||
82 | sub_s.ne r1,r1,r1 | ||
83 | ARC_EXIT strlen | ||