diff options
Diffstat (limited to 'arch/sparc/lib/NG2memcpy.S')
-rw-r--r-- | arch/sparc/lib/NG2memcpy.S | 228 |
1 files changed, 145 insertions, 83 deletions
diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S index d5f585df2f3f..c629dbd121b6 100644 --- a/arch/sparc/lib/NG2memcpy.S +++ b/arch/sparc/lib/NG2memcpy.S | |||
@@ -4,6 +4,7 @@ | |||
4 | */ | 4 | */ |
5 | 5 | ||
6 | #ifdef __KERNEL__ | 6 | #ifdef __KERNEL__ |
7 | #include <linux/linkage.h> | ||
7 | #include <asm/visasm.h> | 8 | #include <asm/visasm.h> |
8 | #include <asm/asi.h> | 9 | #include <asm/asi.h> |
9 | #define GLOBAL_SPARE %g7 | 10 | #define GLOBAL_SPARE %g7 |
@@ -32,21 +33,17 @@ | |||
32 | #endif | 33 | #endif |
33 | 34 | ||
34 | #ifndef EX_LD | 35 | #ifndef EX_LD |
35 | #define EX_LD(x) x | 36 | #define EX_LD(x,y) x |
36 | #endif | 37 | #endif |
37 | #ifndef EX_LD_FP | 38 | #ifndef EX_LD_FP |
38 | #define EX_LD_FP(x) x | 39 | #define EX_LD_FP(x,y) x |
39 | #endif | 40 | #endif |
40 | 41 | ||
41 | #ifndef EX_ST | 42 | #ifndef EX_ST |
42 | #define EX_ST(x) x | 43 | #define EX_ST(x,y) x |
43 | #endif | 44 | #endif |
44 | #ifndef EX_ST_FP | 45 | #ifndef EX_ST_FP |
45 | #define EX_ST_FP(x) x | 46 | #define EX_ST_FP(x,y) x |
46 | #endif | ||
47 | |||
48 | #ifndef EX_RETVAL | ||
49 | #define EX_RETVAL(x) x | ||
50 | #endif | 47 | #endif |
51 | 48 | ||
52 | #ifndef LOAD | 49 | #ifndef LOAD |
@@ -140,45 +137,110 @@ | |||
140 | fsrc2 %x6, %f12; \ | 137 | fsrc2 %x6, %f12; \ |
141 | fsrc2 %x7, %f14; | 138 | fsrc2 %x7, %f14; |
142 | #define FREG_LOAD_1(base, x0) \ | 139 | #define FREG_LOAD_1(base, x0) \ |
143 | EX_LD_FP(LOAD(ldd, base + 0x00, %x0)) | 140 | EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1) |
144 | #define FREG_LOAD_2(base, x0, x1) \ | 141 | #define FREG_LOAD_2(base, x0, x1) \ |
145 | EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ | 142 | EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ |
146 | EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); | 143 | EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); |
147 | #define FREG_LOAD_3(base, x0, x1, x2) \ | 144 | #define FREG_LOAD_3(base, x0, x1, x2) \ |
148 | EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ | 145 | EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ |
149 | EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ | 146 | EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ |
150 | EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); | 147 | EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); |
151 | #define FREG_LOAD_4(base, x0, x1, x2, x3) \ | 148 | #define FREG_LOAD_4(base, x0, x1, x2, x3) \ |
152 | EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ | 149 | EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ |
153 | EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ | 150 | EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ |
154 | EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ | 151 | EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ |
155 | EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); | 152 | EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); |
156 | #define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \ | 153 | #define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \ |
157 | EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ | 154 | EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ |
158 | EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ | 155 | EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ |
159 | EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ | 156 | EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ |
160 | EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ | 157 | EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ |
161 | EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); | 158 | EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); |
162 | #define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \ | 159 | #define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \ |
163 | EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ | 160 | EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ |
164 | EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ | 161 | EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ |
165 | EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ | 162 | EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ |
166 | EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ | 163 | EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ |
167 | EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \ | 164 | EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \ |
168 | EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); | 165 | EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); |
169 | #define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \ | 166 | #define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \ |
170 | EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ | 167 | EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ |
171 | EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ | 168 | EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ |
172 | EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ | 169 | EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ |
173 | EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ | 170 | EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ |
174 | EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \ | 171 | EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \ |
175 | EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); \ | 172 | EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); \ |
176 | EX_LD_FP(LOAD(ldd, base + 0x30, %x6)); | 173 | EX_LD_FP(LOAD(ldd, base + 0x30, %x6), NG2_retl_o2_plus_g1); |
177 | 174 | ||
178 | .register %g2,#scratch | 175 | .register %g2,#scratch |
179 | .register %g3,#scratch | 176 | .register %g3,#scratch |
180 | 177 | ||
181 | .text | 178 | .text |
179 | #ifndef EX_RETVAL | ||
180 | #define EX_RETVAL(x) x | ||
181 | __restore_fp: | ||
182 | VISExitHalf | ||
183 | __restore_asi: | ||
184 | retl | ||
185 | wr %g0, ASI_AIUS, %asi | ||
186 | ENTRY(NG2_retl_o2) | ||
187 | ba,pt %xcc, __restore_asi | ||
188 | mov %o2, %o0 | ||
189 | ENDPROC(NG2_retl_o2) | ||
190 | ENTRY(NG2_retl_o2_plus_1) | ||
191 | ba,pt %xcc, __restore_asi | ||
192 | add %o2, 1, %o0 | ||
193 | ENDPROC(NG2_retl_o2_plus_1) | ||
194 | ENTRY(NG2_retl_o2_plus_4) | ||
195 | ba,pt %xcc, __restore_asi | ||
196 | add %o2, 4, %o0 | ||
197 | ENDPROC(NG2_retl_o2_plus_4) | ||
198 | ENTRY(NG2_retl_o2_plus_8) | ||
199 | ba,pt %xcc, __restore_asi | ||
200 | add %o2, 8, %o0 | ||
201 | ENDPROC(NG2_retl_o2_plus_8) | ||
202 | ENTRY(NG2_retl_o2_plus_o4_plus_1) | ||
203 | add %o4, 1, %o4 | ||
204 | ba,pt %xcc, __restore_asi | ||
205 | add %o2, %o4, %o0 | ||
206 | ENDPROC(NG2_retl_o2_plus_o4_plus_1) | ||
207 | ENTRY(NG2_retl_o2_plus_o4_plus_8) | ||
208 | add %o4, 8, %o4 | ||
209 | ba,pt %xcc, __restore_asi | ||
210 | add %o2, %o4, %o0 | ||
211 | ENDPROC(NG2_retl_o2_plus_o4_plus_8) | ||
212 | ENTRY(NG2_retl_o2_plus_o4_plus_16) | ||
213 | add %o4, 16, %o4 | ||
214 | ba,pt %xcc, __restore_asi | ||
215 | add %o2, %o4, %o0 | ||
216 | ENDPROC(NG2_retl_o2_plus_o4_plus_16) | ||
217 | ENTRY(NG2_retl_o2_plus_g1_fp) | ||
218 | ba,pt %xcc, __restore_fp | ||
219 | add %o2, %g1, %o0 | ||
220 | ENDPROC(NG2_retl_o2_plus_g1_fp) | ||
221 | ENTRY(NG2_retl_o2_plus_g1_plus_64_fp) | ||
222 | add %g1, 64, %g1 | ||
223 | ba,pt %xcc, __restore_fp | ||
224 | add %o2, %g1, %o0 | ||
225 | ENDPROC(NG2_retl_o2_plus_g1_plus_64_fp) | ||
226 | ENTRY(NG2_retl_o2_plus_g1_plus_1) | ||
227 | add %g1, 1, %g1 | ||
228 | ba,pt %xcc, __restore_asi | ||
229 | add %o2, %g1, %o0 | ||
230 | ENDPROC(NG2_retl_o2_plus_g1_plus_1) | ||
231 | ENTRY(NG2_retl_o2_and_7_plus_o4) | ||
232 | and %o2, 7, %o2 | ||
233 | ba,pt %xcc, __restore_asi | ||
234 | add %o2, %o4, %o0 | ||
235 | ENDPROC(NG2_retl_o2_and_7_plus_o4) | ||
236 | ENTRY(NG2_retl_o2_and_7_plus_o4_plus_8) | ||
237 | and %o2, 7, %o2 | ||
238 | add %o4, 8, %o4 | ||
239 | ba,pt %xcc, __restore_asi | ||
240 | add %o2, %o4, %o0 | ||
241 | ENDPROC(NG2_retl_o2_and_7_plus_o4_plus_8) | ||
242 | #endif | ||
243 | |||
182 | .align 64 | 244 | .align 64 |
183 | 245 | ||
184 | .globl FUNC_NAME | 246 | .globl FUNC_NAME |
@@ -230,8 +292,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
230 | sub %g0, %o4, %o4 ! bytes to align dst | 292 | sub %g0, %o4, %o4 ! bytes to align dst |
231 | sub %o2, %o4, %o2 | 293 | sub %o2, %o4, %o2 |
232 | 1: subcc %o4, 1, %o4 | 294 | 1: subcc %o4, 1, %o4 |
233 | EX_LD(LOAD(ldub, %o1, %g1)) | 295 | EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_o4_plus_1) |
234 | EX_ST(STORE(stb, %g1, %o0)) | 296 | EX_ST(STORE(stb, %g1, %o0), NG2_retl_o2_plus_o4_plus_1) |
235 | add %o1, 1, %o1 | 297 | add %o1, 1, %o1 |
236 | bne,pt %XCC, 1b | 298 | bne,pt %XCC, 1b |
237 | add %o0, 1, %o0 | 299 | add %o0, 1, %o0 |
@@ -281,11 +343,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
281 | nop | 343 | nop |
282 | /* fall through for 0 < low bits < 8 */ | 344 | /* fall through for 0 < low bits < 8 */ |
283 | 110: sub %o4, 64, %g2 | 345 | 110: sub %o4, 64, %g2 |
284 | EX_LD_FP(LOAD_BLK(%g2, %f0)) | 346 | EX_LD_FP(LOAD_BLK(%g2, %f0), NG2_retl_o2_plus_g1) |
285 | 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) | 347 | 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) |
286 | EX_LD_FP(LOAD_BLK(%o4, %f16)) | 348 | EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) |
287 | FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16) | 349 | FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16) |
288 | EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) | 350 | EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) |
289 | FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30) | 351 | FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30) |
290 | subcc %g1, 64, %g1 | 352 | subcc %g1, 64, %g1 |
291 | add %o4, 64, %o4 | 353 | add %o4, 64, %o4 |
@@ -296,10 +358,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
296 | 358 | ||
297 | 120: sub %o4, 56, %g2 | 359 | 120: sub %o4, 56, %g2 |
298 | FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12) | 360 | FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12) |
299 | 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) | 361 | 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) |
300 | EX_LD_FP(LOAD_BLK(%o4, %f16)) | 362 | EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) |
301 | FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18) | 363 | FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18) |
302 | EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) | 364 | EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) |
303 | FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30) | 365 | FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30) |
304 | subcc %g1, 64, %g1 | 366 | subcc %g1, 64, %g1 |
305 | add %o4, 64, %o4 | 367 | add %o4, 64, %o4 |
@@ -310,10 +372,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
310 | 372 | ||
311 | 130: sub %o4, 48, %g2 | 373 | 130: sub %o4, 48, %g2 |
312 | FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10) | 374 | FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10) |
313 | 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) | 375 | 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) |
314 | EX_LD_FP(LOAD_BLK(%o4, %f16)) | 376 | EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) |
315 | FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20) | 377 | FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20) |
316 | EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) | 378 | EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) |
317 | FREG_MOVE_6(f20, f22, f24, f26, f28, f30) | 379 | FREG_MOVE_6(f20, f22, f24, f26, f28, f30) |
318 | subcc %g1, 64, %g1 | 380 | subcc %g1, 64, %g1 |
319 | add %o4, 64, %o4 | 381 | add %o4, 64, %o4 |
@@ -324,10 +386,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
324 | 386 | ||
325 | 140: sub %o4, 40, %g2 | 387 | 140: sub %o4, 40, %g2 |
326 | FREG_LOAD_5(%g2, f0, f2, f4, f6, f8) | 388 | FREG_LOAD_5(%g2, f0, f2, f4, f6, f8) |
327 | 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) | 389 | 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) |
328 | EX_LD_FP(LOAD_BLK(%o4, %f16)) | 390 | EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) |
329 | FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22) | 391 | FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22) |
330 | EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) | 392 | EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) |
331 | FREG_MOVE_5(f22, f24, f26, f28, f30) | 393 | FREG_MOVE_5(f22, f24, f26, f28, f30) |
332 | subcc %g1, 64, %g1 | 394 | subcc %g1, 64, %g1 |
333 | add %o4, 64, %o4 | 395 | add %o4, 64, %o4 |
@@ -338,10 +400,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
338 | 400 | ||
339 | 150: sub %o4, 32, %g2 | 401 | 150: sub %o4, 32, %g2 |
340 | FREG_LOAD_4(%g2, f0, f2, f4, f6) | 402 | FREG_LOAD_4(%g2, f0, f2, f4, f6) |
341 | 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) | 403 | 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) |
342 | EX_LD_FP(LOAD_BLK(%o4, %f16)) | 404 | EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) |
343 | FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24) | 405 | FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24) |
344 | EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) | 406 | EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) |
345 | FREG_MOVE_4(f24, f26, f28, f30) | 407 | FREG_MOVE_4(f24, f26, f28, f30) |
346 | subcc %g1, 64, %g1 | 408 | subcc %g1, 64, %g1 |
347 | add %o4, 64, %o4 | 409 | add %o4, 64, %o4 |
@@ -352,10 +414,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
352 | 414 | ||
353 | 160: sub %o4, 24, %g2 | 415 | 160: sub %o4, 24, %g2 |
354 | FREG_LOAD_3(%g2, f0, f2, f4) | 416 | FREG_LOAD_3(%g2, f0, f2, f4) |
355 | 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) | 417 | 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) |
356 | EX_LD_FP(LOAD_BLK(%o4, %f16)) | 418 | EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) |
357 | FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26) | 419 | FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26) |
358 | EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) | 420 | EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) |
359 | FREG_MOVE_3(f26, f28, f30) | 421 | FREG_MOVE_3(f26, f28, f30) |
360 | subcc %g1, 64, %g1 | 422 | subcc %g1, 64, %g1 |
361 | add %o4, 64, %o4 | 423 | add %o4, 64, %o4 |
@@ -366,10 +428,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
366 | 428 | ||
367 | 170: sub %o4, 16, %g2 | 429 | 170: sub %o4, 16, %g2 |
368 | FREG_LOAD_2(%g2, f0, f2) | 430 | FREG_LOAD_2(%g2, f0, f2) |
369 | 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) | 431 | 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) |
370 | EX_LD_FP(LOAD_BLK(%o4, %f16)) | 432 | EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) |
371 | FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28) | 433 | FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28) |
372 | EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) | 434 | EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) |
373 | FREG_MOVE_2(f28, f30) | 435 | FREG_MOVE_2(f28, f30) |
374 | subcc %g1, 64, %g1 | 436 | subcc %g1, 64, %g1 |
375 | add %o4, 64, %o4 | 437 | add %o4, 64, %o4 |
@@ -380,10 +442,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
380 | 442 | ||
381 | 180: sub %o4, 8, %g2 | 443 | 180: sub %o4, 8, %g2 |
382 | FREG_LOAD_1(%g2, f0) | 444 | FREG_LOAD_1(%g2, f0) |
383 | 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) | 445 | 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) |
384 | EX_LD_FP(LOAD_BLK(%o4, %f16)) | 446 | EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) |
385 | FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30) | 447 | FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30) |
386 | EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) | 448 | EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) |
387 | FREG_MOVE_1(f30) | 449 | FREG_MOVE_1(f30) |
388 | subcc %g1, 64, %g1 | 450 | subcc %g1, 64, %g1 |
389 | add %o4, 64, %o4 | 451 | add %o4, 64, %o4 |
@@ -393,10 +455,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
393 | nop | 455 | nop |
394 | 456 | ||
395 | 190: | 457 | 190: |
396 | 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) | 458 | 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) |
397 | subcc %g1, 64, %g1 | 459 | subcc %g1, 64, %g1 |
398 | EX_LD_FP(LOAD_BLK(%o4, %f0)) | 460 | EX_LD_FP(LOAD_BLK(%o4, %f0), NG2_retl_o2_plus_g1_plus_64) |
399 | EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) | 461 | EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1_plus_64) |
400 | add %o4, 64, %o4 | 462 | add %o4, 64, %o4 |
401 | bne,pt %xcc, 1b | 463 | bne,pt %xcc, 1b |
402 | LOAD(prefetch, %o4 + 64, #one_read) | 464 | LOAD(prefetch, %o4 + 64, #one_read) |
@@ -423,28 +485,28 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
423 | andn %o2, 0xf, %o4 | 485 | andn %o2, 0xf, %o4 |
424 | and %o2, 0xf, %o2 | 486 | and %o2, 0xf, %o2 |
425 | 1: subcc %o4, 0x10, %o4 | 487 | 1: subcc %o4, 0x10, %o4 |
426 | EX_LD(LOAD(ldx, %o1, %o5)) | 488 | EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_o4_plus_16) |
427 | add %o1, 0x08, %o1 | 489 | add %o1, 0x08, %o1 |
428 | EX_LD(LOAD(ldx, %o1, %g1)) | 490 | EX_LD(LOAD(ldx, %o1, %g1), NG2_retl_o2_plus_o4_plus_16) |
429 | sub %o1, 0x08, %o1 | 491 | sub %o1, 0x08, %o1 |
430 | EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) | 492 | EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_16) |
431 | add %o1, 0x8, %o1 | 493 | add %o1, 0x8, %o1 |
432 | EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE)) | 494 | EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_8) |
433 | bgu,pt %XCC, 1b | 495 | bgu,pt %XCC, 1b |
434 | add %o1, 0x8, %o1 | 496 | add %o1, 0x8, %o1 |
435 | 73: andcc %o2, 0x8, %g0 | 497 | 73: andcc %o2, 0x8, %g0 |
436 | be,pt %XCC, 1f | 498 | be,pt %XCC, 1f |
437 | nop | 499 | nop |
438 | sub %o2, 0x8, %o2 | 500 | sub %o2, 0x8, %o2 |
439 | EX_LD(LOAD(ldx, %o1, %o5)) | 501 | EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_8) |
440 | EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) | 502 | EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_8) |
441 | add %o1, 0x8, %o1 | 503 | add %o1, 0x8, %o1 |
442 | 1: andcc %o2, 0x4, %g0 | 504 | 1: andcc %o2, 0x4, %g0 |
443 | be,pt %XCC, 1f | 505 | be,pt %XCC, 1f |
444 | nop | 506 | nop |
445 | sub %o2, 0x4, %o2 | 507 | sub %o2, 0x4, %o2 |
446 | EX_LD(LOAD(lduw, %o1, %o5)) | 508 | EX_LD(LOAD(lduw, %o1, %o5), NG2_retl_o2_plus_4) |
447 | EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE)) | 509 | EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4) |
448 | add %o1, 0x4, %o1 | 510 | add %o1, 0x4, %o1 |
449 | 1: cmp %o2, 0 | 511 | 1: cmp %o2, 0 |
450 | be,pt %XCC, 85f | 512 | be,pt %XCC, 85f |
@@ -460,8 +522,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
460 | sub %o2, %g1, %o2 | 522 | sub %o2, %g1, %o2 |
461 | 523 | ||
462 | 1: subcc %g1, 1, %g1 | 524 | 1: subcc %g1, 1, %g1 |
463 | EX_LD(LOAD(ldub, %o1, %o5)) | 525 | EX_LD(LOAD(ldub, %o1, %o5), NG2_retl_o2_plus_g1_plus_1) |
464 | EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE)) | 526 | EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_g1_plus_1) |
465 | bgu,pt %icc, 1b | 527 | bgu,pt %icc, 1b |
466 | add %o1, 1, %o1 | 528 | add %o1, 1, %o1 |
467 | 529 | ||
@@ -477,16 +539,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
477 | 539 | ||
478 | 8: mov 64, GLOBAL_SPARE | 540 | 8: mov 64, GLOBAL_SPARE |
479 | andn %o1, 0x7, %o1 | 541 | andn %o1, 0x7, %o1 |
480 | EX_LD(LOAD(ldx, %o1, %g2)) | 542 | EX_LD(LOAD(ldx, %o1, %g2), NG2_retl_o2) |
481 | sub GLOBAL_SPARE, %g1, GLOBAL_SPARE | 543 | sub GLOBAL_SPARE, %g1, GLOBAL_SPARE |
482 | andn %o2, 0x7, %o4 | 544 | andn %o2, 0x7, %o4 |
483 | sllx %g2, %g1, %g2 | 545 | sllx %g2, %g1, %g2 |
484 | 1: add %o1, 0x8, %o1 | 546 | 1: add %o1, 0x8, %o1 |
485 | EX_LD(LOAD(ldx, %o1, %g3)) | 547 | EX_LD(LOAD(ldx, %o1, %g3), NG2_retl_o2_and_7_plus_o4) |
486 | subcc %o4, 0x8, %o4 | 548 | subcc %o4, 0x8, %o4 |
487 | srlx %g3, GLOBAL_SPARE, %o5 | 549 | srlx %g3, GLOBAL_SPARE, %o5 |
488 | or %o5, %g2, %o5 | 550 | or %o5, %g2, %o5 |
489 | EX_ST(STORE(stx, %o5, %o0)) | 551 | EX_ST(STORE(stx, %o5, %o0), NG2_retl_o2_and_7_plus_o4_plus_8) |
490 | add %o0, 0x8, %o0 | 552 | add %o0, 0x8, %o0 |
491 | bgu,pt %icc, 1b | 553 | bgu,pt %icc, 1b |
492 | sllx %g3, %g1, %g2 | 554 | sllx %g3, %g1, %g2 |
@@ -506,8 +568,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
506 | 568 | ||
507 | 1: | 569 | 1: |
508 | subcc %o2, 4, %o2 | 570 | subcc %o2, 4, %o2 |
509 | EX_LD(LOAD(lduw, %o1, %g1)) | 571 | EX_LD(LOAD(lduw, %o1, %g1), NG2_retl_o2_plus_4) |
510 | EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE)) | 572 | EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4) |
511 | bgu,pt %XCC, 1b | 573 | bgu,pt %XCC, 1b |
512 | add %o1, 4, %o1 | 574 | add %o1, 4, %o1 |
513 | 575 | ||
@@ -517,8 +579,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
517 | .align 32 | 579 | .align 32 |
518 | 90: | 580 | 90: |
519 | subcc %o2, 1, %o2 | 581 | subcc %o2, 1, %o2 |
520 | EX_LD(LOAD(ldub, %o1, %g1)) | 582 | EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_1) |
521 | EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE)) | 583 | EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_1) |
522 | bgu,pt %XCC, 90b | 584 | bgu,pt %XCC, 90b |
523 | add %o1, 1, %o1 | 585 | add %o1, 1, %o1 |
524 | retl | 586 | retl |