diff options
author | Vineet Gupta <vgupta@synopsys.com> | 2013-01-18 04:42:18 -0500 |
---|---|---|
committer | Vineet Gupta <vgupta@synopsys.com> | 2013-02-11 09:30:35 -0500 |
commit | 5210d1e6889c8183ecad269e86e2d9c524015b5f (patch) | |
tree | 77fcc0cfb1853c553eaf58a271256f13b860a528 /arch/arc/lib/strchr-700.S | |
parent | 6e35fa2d430538cd0609e499c6f789beea9e9798 (diff) |
ARC: String library
Hand optimised asm code for ARC700 pipeline.
Originally written/optimized by Joern Rennecke
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Cc: Joern Rennecke <joern.rennecke@embecosm.com>
Diffstat (limited to 'arch/arc/lib/strchr-700.S')
-rw-r--r-- | arch/arc/lib/strchr-700.S | 123 |
1 files changed, 123 insertions, 0 deletions
diff --git a/arch/arc/lib/strchr-700.S b/arch/arc/lib/strchr-700.S new file mode 100644 index 000000000000..99c10475d477 --- /dev/null +++ b/arch/arc/lib/strchr-700.S | |||
@@ -0,0 +1,123 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | */ | ||
8 | |||
9 | /* ARC700 has a relatively long pipeline and branch prediction, so we want | ||
10 | to avoid branches that are hard to predict. On the other hand, the | ||
11 | presence of the norm instruction makes it easier to operate on whole | ||
12 | words branch-free. */ | ||
13 | |||
14 | #include <asm/linkage.h> | ||
15 | |||
16 | ARC_ENTRY strchr | ||
17 | extb_s r1,r1 | ||
18 | asl r5,r1,8 | ||
19 | bmsk r2,r0,1 | ||
20 | or r5,r5,r1 | ||
21 | mov_s r3,0x01010101 | ||
22 | breq.d r2,r0,.Laligned | ||
23 | asl r4,r5,16 | ||
24 | sub_s r0,r0,r2 | ||
25 | asl r7,r2,3 | ||
26 | ld_s r2,[r0] | ||
27 | #ifdef __LITTLE_ENDIAN__ | ||
28 | asl r7,r3,r7 | ||
29 | #else | ||
30 | lsr r7,r3,r7 | ||
31 | #endif | ||
32 | or r5,r5,r4 | ||
33 | ror r4,r3 | ||
34 | sub r12,r2,r7 | ||
35 | bic_s r12,r12,r2 | ||
36 | and r12,r12,r4 | ||
37 | brne.d r12,0,.Lfound0_ua | ||
38 | xor r6,r2,r5 | ||
39 | ld.a r2,[r0,4] | ||
40 | sub r12,r6,r7 | ||
41 | bic r12,r12,r6 | ||
42 | and r7,r12,r4 | ||
43 | breq r7,0,.Loop ; For speed, we want this branch to be unaligned. | ||
44 | b .Lfound_char ; Likewise this one. | ||
45 | ; /* We require this code address to be unaligned for speed... */ | ||
46 | .Laligned: | ||
47 | ld_s r2,[r0] | ||
48 | or r5,r5,r4 | ||
49 | ror r4,r3 | ||
50 | ; /* ... so that this code address is aligned, for itself and ... */ | ||
51 | .Loop: | ||
52 | sub r12,r2,r3 | ||
53 | bic_s r12,r12,r2 | ||
54 | and r12,r12,r4 | ||
55 | brne.d r12,0,.Lfound0 | ||
56 | xor r6,r2,r5 | ||
57 | ld.a r2,[r0,4] | ||
58 | sub r12,r6,r3 | ||
59 | bic r12,r12,r6 | ||
60 | and r7,r12,r4 | ||
61 | breq r7,0,.Loop /* ... so that this branch is unaligned. */ | ||
62 | ; Found searched-for character. r0 has already advanced to next word. | ||
63 | #ifdef __LITTLE_ENDIAN__ | ||
64 | /* We only need the information about the first matching byte | ||
65 | (i.e. the least significant matching byte) to be exact, | ||
66 | hence there is no problem with carry effects. */ | ||
67 | .Lfound_char: | ||
68 | sub r3,r7,1 | ||
69 | bic r3,r3,r7 | ||
70 | norm r2,r3 | ||
71 | sub_s r0,r0,1 | ||
72 | asr_s r2,r2,3 | ||
73 | j.d [blink] | ||
74 | sub_s r0,r0,r2 | ||
75 | |||
76 | .balign 4 | ||
77 | .Lfound0_ua: | ||
78 | mov r3,r7 | ||
79 | .Lfound0: | ||
80 | sub r3,r6,r3 | ||
81 | bic r3,r3,r6 | ||
82 | and r2,r3,r4 | ||
83 | or_s r12,r12,r2 | ||
84 | sub_s r3,r12,1 | ||
85 | bic_s r3,r3,r12 | ||
86 | norm r3,r3 | ||
87 | add_s r0,r0,3 | ||
88 | asr_s r12,r3,3 | ||
89 | asl.f 0,r2,r3 | ||
90 | sub_s r0,r0,r12 | ||
91 | j_s.d [blink] | ||
92 | mov.pl r0,0 | ||
93 | #else /* BIG ENDIAN */ | ||
94 | .Lfound_char: | ||
95 | lsr r7,r7,7 | ||
96 | |||
97 | bic r2,r7,r6 | ||
98 | norm r2,r2 | ||
99 | sub_s r0,r0,4 | ||
100 | asr_s r2,r2,3 | ||
101 | j.d [blink] | ||
102 | add_s r0,r0,r2 | ||
103 | |||
104 | .Lfound0_ua: | ||
105 | mov_s r3,r7 | ||
106 | .Lfound0: | ||
107 | asl_s r2,r2,7 | ||
108 | or r7,r6,r4 | ||
109 | bic_s r12,r12,r2 | ||
110 | sub r2,r7,r3 | ||
111 | or r2,r2,r6 | ||
112 | bic r12,r2,r12 | ||
113 | bic.f r3,r4,r12 | ||
114 | norm r3,r3 | ||
115 | |||
116 | add.pl r3,r3,1 | ||
117 | asr_s r12,r3,3 | ||
118 | asl.f 0,r2,r3 | ||
119 | add_s r0,r0,r12 | ||
120 | j_s.d [blink] | ||
121 | mov.mi r0,0 | ||
122 | #endif /* ENDIAN */ | ||
123 | ARC_EXIT strchr | ||