diff options
author | Vineet Gupta <vgupta@synopsys.com> | 2015-10-29 10:06:03 -0400 |
---|---|---|
committer | Vineet Gupta <vgupta@synopsys.com> | 2015-11-03 07:03:00 -0500 |
commit | ac506b7f2233b35f17172304255e08cabc072aad (patch) | |
tree | 55ceb2afc4fb46a9d65b82f7acfd9b231cd51cf2 | |
parent | 5a364c2a1762e8a78721fafc93144509c0b6cb84 (diff) |
ARCv2: lib: memcpy: use local symbols
Otherwise perf profiles don't charge tme to memcpy
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
-rw-r--r-- | arch/arc/lib/memcpy-archs.S | 52 |
1 files changed, 26 insertions, 26 deletions
diff --git a/arch/arc/lib/memcpy-archs.S b/arch/arc/lib/memcpy-archs.S index 0cab0b8a57c5..f96c75edf30a 100644 --- a/arch/arc/lib/memcpy-archs.S +++ b/arch/arc/lib/memcpy-archs.S | |||
@@ -50,26 +50,26 @@ ENTRY(memcpy) | |||
50 | 50 | ||
51 | ;;; if size <= 8 | 51 | ;;; if size <= 8 |
52 | cmp r2, 8 | 52 | cmp r2, 8 |
53 | bls.d @smallchunk | 53 | bls.d @.Lsmallchunk |
54 | mov.f lp_count, r2 | 54 | mov.f lp_count, r2 |
55 | 55 | ||
56 | and.f r4, r0, 0x03 | 56 | and.f r4, r0, 0x03 |
57 | rsub lp_count, r4, 4 | 57 | rsub lp_count, r4, 4 |
58 | lpnz @aligndestination | 58 | lpnz @.Laligndestination |
59 | ;; LOOP BEGIN | 59 | ;; LOOP BEGIN |
60 | ldb.ab r5, [r1,1] | 60 | ldb.ab r5, [r1,1] |
61 | sub r2, r2, 1 | 61 | sub r2, r2, 1 |
62 | stb.ab r5, [r3,1] | 62 | stb.ab r5, [r3,1] |
63 | aligndestination: | 63 | .Laligndestination: |
64 | 64 | ||
65 | ;;; Check the alignment of the source | 65 | ;;; Check the alignment of the source |
66 | and.f r4, r1, 0x03 | 66 | and.f r4, r1, 0x03 |
67 | bnz.d @sourceunaligned | 67 | bnz.d @.Lsourceunaligned |
68 | 68 | ||
69 | ;;; CASE 0: Both source and destination are 32bit aligned | 69 | ;;; CASE 0: Both source and destination are 32bit aligned |
70 | ;;; Convert len to Dwords, unfold x4 | 70 | ;;; Convert len to Dwords, unfold x4 |
71 | lsr.f lp_count, r2, ZOLSHFT | 71 | lsr.f lp_count, r2, ZOLSHFT |
72 | lpnz @copy32_64bytes | 72 | lpnz @.Lcopy32_64bytes |
73 | ;; LOOP START | 73 | ;; LOOP START |
74 | LOADX (r6, r1) | 74 | LOADX (r6, r1) |
75 | PREFETCH_READ (r1) | 75 | PREFETCH_READ (r1) |
@@ -81,25 +81,25 @@ aligndestination: | |||
81 | STOREX (r8, r3) | 81 | STOREX (r8, r3) |
82 | STOREX (r10, r3) | 82 | STOREX (r10, r3) |
83 | STOREX (r4, r3) | 83 | STOREX (r4, r3) |
84 | copy32_64bytes: | 84 | .Lcopy32_64bytes: |
85 | 85 | ||
86 | and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes | 86 | and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes |
87 | smallchunk: | 87 | .Lsmallchunk: |
88 | lpnz @copyremainingbytes | 88 | lpnz @.Lcopyremainingbytes |
89 | ;; LOOP START | 89 | ;; LOOP START |
90 | ldb.ab r5, [r1,1] | 90 | ldb.ab r5, [r1,1] |
91 | stb.ab r5, [r3,1] | 91 | stb.ab r5, [r3,1] |
92 | copyremainingbytes: | 92 | .Lcopyremainingbytes: |
93 | 93 | ||
94 | j [blink] | 94 | j [blink] |
95 | ;;; END CASE 0 | 95 | ;;; END CASE 0 |
96 | 96 | ||
97 | sourceunaligned: | 97 | .Lsourceunaligned: |
98 | cmp r4, 2 | 98 | cmp r4, 2 |
99 | beq.d @unalignedOffby2 | 99 | beq.d @.LunalignedOffby2 |
100 | sub r2, r2, 1 | 100 | sub r2, r2, 1 |
101 | 101 | ||
102 | bhi.d @unalignedOffby3 | 102 | bhi.d @.LunalignedOffby3 |
103 | ldb.ab r5, [r1, 1] | 103 | ldb.ab r5, [r1, 1] |
104 | 104 | ||
105 | ;;; CASE 1: The source is unaligned, off by 1 | 105 | ;;; CASE 1: The source is unaligned, off by 1 |
@@ -114,7 +114,7 @@ sourceunaligned: | |||
114 | or r5, r5, r6 | 114 | or r5, r5, r6 |
115 | 115 | ||
116 | ;; Both src and dst are aligned | 116 | ;; Both src and dst are aligned |
117 | lpnz @copy8bytes_1 | 117 | lpnz @.Lcopy8bytes_1 |
118 | ;; LOOP START | 118 | ;; LOOP START |
119 | ld.ab r6, [r1, 4] | 119 | ld.ab r6, [r1, 4] |
120 | prefetch [r1, 28] ;Prefetch the next read location | 120 | prefetch [r1, 28] ;Prefetch the next read location |
@@ -131,7 +131,7 @@ sourceunaligned: | |||
131 | 131 | ||
132 | st.ab r7, [r3, 4] | 132 | st.ab r7, [r3, 4] |
133 | st.ab r9, [r3, 4] | 133 | st.ab r9, [r3, 4] |
134 | copy8bytes_1: | 134 | .Lcopy8bytes_1: |
135 | 135 | ||
136 | ;; Write back the remaining 16bits | 136 | ;; Write back the remaining 16bits |
137 | EXTRACT_1 (r6, r5, 16) | 137 | EXTRACT_1 (r6, r5, 16) |
@@ -141,14 +141,14 @@ copy8bytes_1: | |||
141 | stb.ab r5, [r3, 1] | 141 | stb.ab r5, [r3, 1] |
142 | 142 | ||
143 | and.f lp_count, r2, 0x07 ;Last 8bytes | 143 | and.f lp_count, r2, 0x07 ;Last 8bytes |
144 | lpnz @copybytewise_1 | 144 | lpnz @.Lcopybytewise_1 |
145 | ;; LOOP START | 145 | ;; LOOP START |
146 | ldb.ab r6, [r1,1] | 146 | ldb.ab r6, [r1,1] |
147 | stb.ab r6, [r3,1] | 147 | stb.ab r6, [r3,1] |
148 | copybytewise_1: | 148 | .Lcopybytewise_1: |
149 | j [blink] | 149 | j [blink] |
150 | 150 | ||
151 | unalignedOffby2: | 151 | .LunalignedOffby2: |
152 | ;;; CASE 2: The source is unaligned, off by 2 | 152 | ;;; CASE 2: The source is unaligned, off by 2 |
153 | ldh.ab r5, [r1, 2] | 153 | ldh.ab r5, [r1, 2] |
154 | sub r2, r2, 1 | 154 | sub r2, r2, 1 |
@@ -159,7 +159,7 @@ unalignedOffby2: | |||
159 | #ifdef __BIG_ENDIAN__ | 159 | #ifdef __BIG_ENDIAN__ |
160 | asl.nz r5, r5, 16 | 160 | asl.nz r5, r5, 16 |
161 | #endif | 161 | #endif |
162 | lpnz @copy8bytes_2 | 162 | lpnz @.Lcopy8bytes_2 |
163 | ;; LOOP START | 163 | ;; LOOP START |
164 | ld.ab r6, [r1, 4] | 164 | ld.ab r6, [r1, 4] |
165 | prefetch [r1, 28] ;Prefetch the next read location | 165 | prefetch [r1, 28] ;Prefetch the next read location |
@@ -176,7 +176,7 @@ unalignedOffby2: | |||
176 | 176 | ||
177 | st.ab r7, [r3, 4] | 177 | st.ab r7, [r3, 4] |
178 | st.ab r9, [r3, 4] | 178 | st.ab r9, [r3, 4] |
179 | copy8bytes_2: | 179 | .Lcopy8bytes_2: |
180 | 180 | ||
181 | #ifdef __BIG_ENDIAN__ | 181 | #ifdef __BIG_ENDIAN__ |
182 | lsr.nz r5, r5, 16 | 182 | lsr.nz r5, r5, 16 |
@@ -184,14 +184,14 @@ copy8bytes_2: | |||
184 | sth.ab r5, [r3, 2] | 184 | sth.ab r5, [r3, 2] |
185 | 185 | ||
186 | and.f lp_count, r2, 0x07 ;Last 8bytes | 186 | and.f lp_count, r2, 0x07 ;Last 8bytes |
187 | lpnz @copybytewise_2 | 187 | lpnz @.Lcopybytewise_2 |
188 | ;; LOOP START | 188 | ;; LOOP START |
189 | ldb.ab r6, [r1,1] | 189 | ldb.ab r6, [r1,1] |
190 | stb.ab r6, [r3,1] | 190 | stb.ab r6, [r3,1] |
191 | copybytewise_2: | 191 | .Lcopybytewise_2: |
192 | j [blink] | 192 | j [blink] |
193 | 193 | ||
194 | unalignedOffby3: | 194 | .LunalignedOffby3: |
195 | ;;; CASE 3: The source is unaligned, off by 3 | 195 | ;;; CASE 3: The source is unaligned, off by 3 |
196 | ;;; Hence, I need to read 1byte for achieve the 32bit alignment | 196 | ;;; Hence, I need to read 1byte for achieve the 32bit alignment |
197 | 197 | ||
@@ -201,7 +201,7 @@ unalignedOffby3: | |||
201 | #ifdef __BIG_ENDIAN__ | 201 | #ifdef __BIG_ENDIAN__ |
202 | asl.ne r5, r5, 24 | 202 | asl.ne r5, r5, 24 |
203 | #endif | 203 | #endif |
204 | lpnz @copy8bytes_3 | 204 | lpnz @.Lcopy8bytes_3 |
205 | ;; LOOP START | 205 | ;; LOOP START |
206 | ld.ab r6, [r1, 4] | 206 | ld.ab r6, [r1, 4] |
207 | prefetch [r1, 28] ;Prefetch the next read location | 207 | prefetch [r1, 28] ;Prefetch the next read location |
@@ -218,7 +218,7 @@ unalignedOffby3: | |||
218 | 218 | ||
219 | st.ab r7, [r3, 4] | 219 | st.ab r7, [r3, 4] |
220 | st.ab r9, [r3, 4] | 220 | st.ab r9, [r3, 4] |
221 | copy8bytes_3: | 221 | .Lcopy8bytes_3: |
222 | 222 | ||
223 | #ifdef __BIG_ENDIAN__ | 223 | #ifdef __BIG_ENDIAN__ |
224 | lsr.nz r5, r5, 24 | 224 | lsr.nz r5, r5, 24 |
@@ -226,11 +226,11 @@ copy8bytes_3: | |||
226 | stb.ab r5, [r3, 1] | 226 | stb.ab r5, [r3, 1] |
227 | 227 | ||
228 | and.f lp_count, r2, 0x07 ;Last 8bytes | 228 | and.f lp_count, r2, 0x07 ;Last 8bytes |
229 | lpnz @copybytewise_3 | 229 | lpnz @.Lcopybytewise_3 |
230 | ;; LOOP START | 230 | ;; LOOP START |
231 | ldb.ab r6, [r1,1] | 231 | ldb.ab r6, [r1,1] |
232 | stb.ab r6, [r3,1] | 232 | stb.ab r6, [r3,1] |
233 | copybytewise_3: | 233 | .Lcopybytewise_3: |
234 | j [blink] | 234 | j [blink] |
235 | 235 | ||
236 | END(memcpy) | 236 | END(memcpy) |