aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sparc/lib/U1memcpy.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/sparc/lib/U1memcpy.S')
-rw-r--r--arch/sparc/lib/U1memcpy.S345
1 files changed, 229 insertions, 116 deletions
diff --git a/arch/sparc/lib/U1memcpy.S b/arch/sparc/lib/U1memcpy.S
index 97e1b211090c..4f0d50b33a72 100644
--- a/arch/sparc/lib/U1memcpy.S
+++ b/arch/sparc/lib/U1memcpy.S
@@ -5,6 +5,7 @@
5 */ 5 */
6 6
7#ifdef __KERNEL__ 7#ifdef __KERNEL__
8#include <linux/linkage.h>
8#include <asm/visasm.h> 9#include <asm/visasm.h>
9#include <asm/asi.h> 10#include <asm/asi.h>
10#include <asm/export.h> 11#include <asm/export.h>
@@ -24,21 +25,17 @@
24#endif 25#endif
25 26
26#ifndef EX_LD 27#ifndef EX_LD
27#define EX_LD(x) x 28#define EX_LD(x,y) x
28#endif 29#endif
29#ifndef EX_LD_FP 30#ifndef EX_LD_FP
30#define EX_LD_FP(x) x 31#define EX_LD_FP(x,y) x
31#endif 32#endif
32 33
33#ifndef EX_ST 34#ifndef EX_ST
34#define EX_ST(x) x 35#define EX_ST(x,y) x
35#endif 36#endif
36#ifndef EX_ST_FP 37#ifndef EX_ST_FP
37#define EX_ST_FP(x) x 38#define EX_ST_FP(x,y) x
38#endif
39
40#ifndef EX_RETVAL
41#define EX_RETVAL(x) x
42#endif 39#endif
43 40
44#ifndef LOAD 41#ifndef LOAD
@@ -79,53 +76,169 @@
79 faligndata %f7, %f8, %f60; \ 76 faligndata %f7, %f8, %f60; \
80 faligndata %f8, %f9, %f62; 77 faligndata %f8, %f9, %f62;
81 78
82#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \ 79#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, jmptgt) \
83 EX_LD_FP(LOAD_BLK(%src, %fdest)); \ 80 EX_LD_FP(LOAD_BLK(%src, %fdest), U1_gs_80_fp); \
84 EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ 81 EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \
85 add %src, 0x40, %src; \ 82 add %src, 0x40, %src; \
86 subcc %len, 0x40, %len; \ 83 subcc %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE; \
87 be,pn %xcc, jmptgt; \ 84 be,pn %xcc, jmptgt; \
88 add %dest, 0x40, %dest; \ 85 add %dest, 0x40, %dest; \
89 86
90#define LOOP_CHUNK1(src, dest, len, branch_dest) \ 87#define LOOP_CHUNK1(src, dest, branch_dest) \
91 MAIN_LOOP_CHUNK(src, dest, f0, f48, len, branch_dest) 88 MAIN_LOOP_CHUNK(src, dest, f0, f48, branch_dest)
92#define LOOP_CHUNK2(src, dest, len, branch_dest) \ 89#define LOOP_CHUNK2(src, dest, branch_dest) \
93 MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest) 90 MAIN_LOOP_CHUNK(src, dest, f16, f48, branch_dest)
94#define LOOP_CHUNK3(src, dest, len, branch_dest) \ 91#define LOOP_CHUNK3(src, dest, branch_dest) \
95 MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) 92 MAIN_LOOP_CHUNK(src, dest, f32, f48, branch_dest)
96 93
97#define DO_SYNC membar #Sync; 94#define DO_SYNC membar #Sync;
98#define STORE_SYNC(dest, fsrc) \ 95#define STORE_SYNC(dest, fsrc) \
99 EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ 96 EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \
100 add %dest, 0x40, %dest; \ 97 add %dest, 0x40, %dest; \
101 DO_SYNC 98 DO_SYNC
102 99
103#define STORE_JUMP(dest, fsrc, target) \ 100#define STORE_JUMP(dest, fsrc, target) \
104 EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ 101 EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_40_fp); \
105 add %dest, 0x40, %dest; \ 102 add %dest, 0x40, %dest; \
106 ba,pt %xcc, target; \ 103 ba,pt %xcc, target; \
107 nop; 104 nop;
108 105
109#define FINISH_VISCHUNK(dest, f0, f1, left) \ 106#define FINISH_VISCHUNK(dest, f0, f1) \
110 subcc %left, 8, %left;\ 107 subcc %g3, 8, %g3; \
111 bl,pn %xcc, 95f; \ 108 bl,pn %xcc, 95f; \
112 faligndata %f0, %f1, %f48; \ 109 faligndata %f0, %f1, %f48; \
113 EX_ST_FP(STORE(std, %f48, %dest)); \ 110 EX_ST_FP(STORE(std, %f48, %dest), U1_g3_8_fp); \
114 add %dest, 8, %dest; 111 add %dest, 8, %dest;
115 112
116#define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ 113#define UNEVEN_VISCHUNK_LAST(dest, f0, f1) \
117 subcc %left, 8, %left; \ 114 subcc %g3, 8, %g3; \
118 bl,pn %xcc, 95f; \ 115 bl,pn %xcc, 95f; \
119 fsrc2 %f0, %f1; 116 fsrc2 %f0, %f1;
120 117
121#define UNEVEN_VISCHUNK(dest, f0, f1, left) \ 118#define UNEVEN_VISCHUNK(dest, f0, f1) \
122 UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ 119 UNEVEN_VISCHUNK_LAST(dest, f0, f1) \
123 ba,a,pt %xcc, 93f; 120 ba,a,pt %xcc, 93f;
124 121
125 .register %g2,#scratch 122 .register %g2,#scratch
126 .register %g3,#scratch 123 .register %g3,#scratch
127 124
128 .text 125 .text
126#ifndef EX_RETVAL
127#define EX_RETVAL(x) x
128ENTRY(U1_g1_1_fp)
129 VISExitHalf
130 add %g1, 1, %g1
131 add %g1, %g2, %g1
132 retl
133 add %g1, %o2, %o0
134ENDPROC(U1_g1_1_fp)
135ENTRY(U1_g2_0_fp)
136 VISExitHalf
137 retl
138 add %g2, %o2, %o0
139ENDPROC(U1_g2_0_fp)
140ENTRY(U1_g2_8_fp)
141 VISExitHalf
142 add %g2, 8, %g2
143 retl
144 add %g2, %o2, %o0
145ENDPROC(U1_g2_8_fp)
146ENTRY(U1_gs_0_fp)
147 VISExitHalf
148 add %GLOBAL_SPARE, %g3, %o0
149 retl
150 add %o0, %o2, %o0
151ENDPROC(U1_gs_0_fp)
152ENTRY(U1_gs_80_fp)
153 VISExitHalf
154 add %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
155 add %GLOBAL_SPARE, %g3, %o0
156 retl
157 add %o0, %o2, %o0
158ENDPROC(U1_gs_80_fp)
159ENTRY(U1_gs_40_fp)
160 VISExitHalf
161 add %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE
162 add %GLOBAL_SPARE, %g3, %o0
163 retl
164 add %o0, %o2, %o0
165ENDPROC(U1_gs_40_fp)
166ENTRY(U1_g3_0_fp)
167 VISExitHalf
168 retl
169 add %g3, %o2, %o0
170ENDPROC(U1_g3_0_fp)
171ENTRY(U1_g3_8_fp)
172 VISExitHalf
173 add %g3, 8, %g3
174 retl
175 add %g3, %o2, %o0
176ENDPROC(U1_g3_8_fp)
177ENTRY(U1_o2_0_fp)
178 VISExitHalf
179 retl
180 mov %o2, %o0
181ENDPROC(U1_o2_0_fp)
182ENTRY(U1_o2_1_fp)
183 VISExitHalf
184 retl
185 add %o2, 1, %o0
186ENDPROC(U1_o2_1_fp)
187ENTRY(U1_gs_0)
188 VISExitHalf
189 retl
190 add %GLOBAL_SPARE, %o2, %o0
191ENDPROC(U1_gs_0)
192ENTRY(U1_gs_8)
193 VISExitHalf
194 add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE
195 retl
196 add %GLOBAL_SPARE, 0x8, %o0
197ENDPROC(U1_gs_8)
198ENTRY(U1_gs_10)
199 VISExitHalf
200 add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE
201 retl
202 add %GLOBAL_SPARE, 0x10, %o0
203ENDPROC(U1_gs_10)
204ENTRY(U1_o2_0)
205 retl
206 mov %o2, %o0
207ENDPROC(U1_o2_0)
208ENTRY(U1_o2_8)
209 retl
210 add %o2, 8, %o0
211ENDPROC(U1_o2_8)
212ENTRY(U1_o2_4)
213 retl
214 add %o2, 4, %o0
215ENDPROC(U1_o2_4)
216ENTRY(U1_o2_1)
217 retl
218 add %o2, 1, %o0
219ENDPROC(U1_o2_1)
220ENTRY(U1_g1_0)
221 retl
222 add %g1, %o2, %o0
223ENDPROC(U1_g1_0)
224ENTRY(U1_g1_1)
225 add %g1, 1, %g1
226 retl
227 add %g1, %o2, %o0
228ENDPROC(U1_g1_1)
229ENTRY(U1_gs_0_o2_adj)
230 and %o2, 7, %o2
231 retl
232 add %GLOBAL_SPARE, %o2, %o0
233ENDPROC(U1_gs_0_o2_adj)
234ENTRY(U1_gs_8_o2_adj)
235 and %o2, 7, %o2
236 add %GLOBAL_SPARE, 8, %GLOBAL_SPARE
237 retl
238 add %GLOBAL_SPARE, %o2, %o0
239ENDPROC(U1_gs_8_o2_adj)
240#endif
241
129 .align 64 242 .align 64
130 243
131 .globl FUNC_NAME 244 .globl FUNC_NAME
@@ -167,8 +280,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
167 and %g2, 0x38, %g2 280 and %g2, 0x38, %g2
168 281
1691: subcc %g1, 0x1, %g1 2821: subcc %g1, 0x1, %g1
170 EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3)) 283 EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U1_g1_1_fp)
171 EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE)) 284 EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE), U1_g1_1_fp)
172 bgu,pt %XCC, 1b 285 bgu,pt %XCC, 1b
173 add %o1, 0x1, %o1 286 add %o1, 0x1, %o1
174 287
@@ -179,20 +292,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
179 be,pt %icc, 3f 292 be,pt %icc, 3f
180 alignaddr %o1, %g0, %o1 293 alignaddr %o1, %g0, %o1
181 294
182 EX_LD_FP(LOAD(ldd, %o1, %f4)) 295 EX_LD_FP(LOAD(ldd, %o1, %f4), U1_g2_0_fp)
1831: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6)) 2961: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U1_g2_0_fp)
184 add %o1, 0x8, %o1 297 add %o1, 0x8, %o1
185 subcc %g2, 0x8, %g2 298 subcc %g2, 0x8, %g2
186 faligndata %f4, %f6, %f0 299 faligndata %f4, %f6, %f0
187 EX_ST_FP(STORE(std, %f0, %o0)) 300 EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
188 be,pn %icc, 3f 301 be,pn %icc, 3f
189 add %o0, 0x8, %o0 302 add %o0, 0x8, %o0
190 303
191 EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4)) 304 EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U1_g2_0_fp)
192 add %o1, 0x8, %o1 305 add %o1, 0x8, %o1
193 subcc %g2, 0x8, %g2 306 subcc %g2, 0x8, %g2
194 faligndata %f6, %f4, %f0 307 faligndata %f6, %f4, %f0
195 EX_ST_FP(STORE(std, %f0, %o0)) 308 EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
196 bne,pt %icc, 1b 309 bne,pt %icc, 1b
197 add %o0, 0x8, %o0 310 add %o0, 0x8, %o0
198 311
@@ -215,13 +328,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
215 add %g1, %GLOBAL_SPARE, %g1 328 add %g1, %GLOBAL_SPARE, %g1
216 subcc %o2, %g3, %o2 329 subcc %o2, %g3, %o2
217 330
218 EX_LD_FP(LOAD_BLK(%o1, %f0)) 331 EX_LD_FP(LOAD_BLK(%o1, %f0), U1_gs_0_fp)
219 add %o1, 0x40, %o1 332 add %o1, 0x40, %o1
220 add %g1, %g3, %g1 333 add %g1, %g3, %g1
221 EX_LD_FP(LOAD_BLK(%o1, %f16)) 334 EX_LD_FP(LOAD_BLK(%o1, %f16), U1_gs_0_fp)
222 add %o1, 0x40, %o1 335 add %o1, 0x40, %o1
223 sub %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE 336 sub %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
224 EX_LD_FP(LOAD_BLK(%o1, %f32)) 337 EX_LD_FP(LOAD_BLK(%o1, %f32), U1_gs_80_fp)
225 add %o1, 0x40, %o1 338 add %o1, 0x40, %o1
226 339
227 /* There are 8 instances of the unrolled loop, 340 /* There are 8 instances of the unrolled loop,
@@ -241,11 +354,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
241 354
242 .align 64 355 .align 64
2431: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) 3561: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
244 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 357 LOOP_CHUNK1(o1, o0, 1f)
245 FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) 358 FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
246 LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 359 LOOP_CHUNK2(o1, o0, 2f)
247 FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) 360 FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
248 LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 361 LOOP_CHUNK3(o1, o0, 3f)
249 ba,pt %xcc, 1b+4 362 ba,pt %xcc, 1b+4
250 faligndata %f0, %f2, %f48 363 faligndata %f0, %f2, %f48
2511: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) 3641: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
@@ -262,11 +375,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
262 STORE_JUMP(o0, f48, 56f) 375 STORE_JUMP(o0, f48, 56f)
263 376
2641: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 3771: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
265 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 378 LOOP_CHUNK1(o1, o0, 1f)
266 FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) 379 FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
267 LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 380 LOOP_CHUNK2(o1, o0, 2f)
268 FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) 381 FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
269 LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 382 LOOP_CHUNK3(o1, o0, 3f)
270 ba,pt %xcc, 1b+4 383 ba,pt %xcc, 1b+4
271 faligndata %f2, %f4, %f48 384 faligndata %f2, %f4, %f48
2721: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) 3851: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
@@ -283,11 +396,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
283 STORE_JUMP(o0, f48, 57f) 396 STORE_JUMP(o0, f48, 57f)
284 397
2851: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 3981: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
286 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 399 LOOP_CHUNK1(o1, o0, 1f)
287 FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) 400 FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
288 LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 401 LOOP_CHUNK2(o1, o0, 2f)
289 FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) 402 FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
290 LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 403 LOOP_CHUNK3(o1, o0, 3f)
291 ba,pt %xcc, 1b+4 404 ba,pt %xcc, 1b+4
292 faligndata %f4, %f6, %f48 405 faligndata %f4, %f6, %f48
2931: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) 4061: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
@@ -304,11 +417,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
304 STORE_JUMP(o0, f48, 58f) 417 STORE_JUMP(o0, f48, 58f)
305 418
3061: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 4191: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
307 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 420 LOOP_CHUNK1(o1, o0, 1f)
308 FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) 421 FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
309 LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 422 LOOP_CHUNK2(o1, o0, 2f)
310 FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) 423 FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
311 LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 424 LOOP_CHUNK3(o1, o0, 3f)
312 ba,pt %xcc, 1b+4 425 ba,pt %xcc, 1b+4
313 faligndata %f6, %f8, %f48 426 faligndata %f6, %f8, %f48
3141: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) 4271: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
@@ -325,11 +438,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
325 STORE_JUMP(o0, f48, 59f) 438 STORE_JUMP(o0, f48, 59f)
326 439
3271: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 4401: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
328 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 441 LOOP_CHUNK1(o1, o0, 1f)
329 FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) 442 FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
330 LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 443 LOOP_CHUNK2(o1, o0, 2f)
331 FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) 444 FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
332 LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 445 LOOP_CHUNK3(o1, o0, 3f)
333 ba,pt %xcc, 1b+4 446 ba,pt %xcc, 1b+4
334 faligndata %f8, %f10, %f48 447 faligndata %f8, %f10, %f48
3351: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) 4481: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
@@ -346,11 +459,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
346 STORE_JUMP(o0, f48, 60f) 459 STORE_JUMP(o0, f48, 60f)
347 460
3481: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 4611: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
349 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 462 LOOP_CHUNK1(o1, o0, 1f)
350 FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) 463 FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
351 LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 464 LOOP_CHUNK2(o1, o0, 2f)
352 FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) 465 FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
353 LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 466 LOOP_CHUNK3(o1, o0, 3f)
354 ba,pt %xcc, 1b+4 467 ba,pt %xcc, 1b+4
355 faligndata %f10, %f12, %f48 468 faligndata %f10, %f12, %f48
3561: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) 4691: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
@@ -367,11 +480,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
367 STORE_JUMP(o0, f48, 61f) 480 STORE_JUMP(o0, f48, 61f)
368 481
3691: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 4821: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
370 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 483 LOOP_CHUNK1(o1, o0, 1f)
371 FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) 484 FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
372 LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 485 LOOP_CHUNK2(o1, o0, 2f)
373 FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) 486 FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
374 LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 487 LOOP_CHUNK3(o1, o0, 3f)
375 ba,pt %xcc, 1b+4 488 ba,pt %xcc, 1b+4
376 faligndata %f12, %f14, %f48 489 faligndata %f12, %f14, %f48
3771: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) 4901: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
@@ -388,11 +501,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
388 STORE_JUMP(o0, f48, 62f) 501 STORE_JUMP(o0, f48, 62f)
389 502
3901: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 5031: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
391 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 504 LOOP_CHUNK1(o1, o0, 1f)
392 FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) 505 FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
393 LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) 506 LOOP_CHUNK2(o1, o0, 2f)
394 FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) 507 FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
395 LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) 508 LOOP_CHUNK3(o1, o0, 3f)
396 ba,pt %xcc, 1b+4 509 ba,pt %xcc, 1b+4
397 faligndata %f14, %f16, %f48 510 faligndata %f14, %f16, %f48
3981: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) 5111: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
@@ -408,53 +521,53 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
408 FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) 521 FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
409 STORE_JUMP(o0, f48, 63f) 522 STORE_JUMP(o0, f48, 63f)
410 523
41140: FINISH_VISCHUNK(o0, f0, f2, g3) 52440: FINISH_VISCHUNK(o0, f0, f2)
41241: FINISH_VISCHUNK(o0, f2, f4, g3) 52541: FINISH_VISCHUNK(o0, f2, f4)
41342: FINISH_VISCHUNK(o0, f4, f6, g3) 52642: FINISH_VISCHUNK(o0, f4, f6)
41443: FINISH_VISCHUNK(o0, f6, f8, g3) 52743: FINISH_VISCHUNK(o0, f6, f8)
41544: FINISH_VISCHUNK(o0, f8, f10, g3) 52844: FINISH_VISCHUNK(o0, f8, f10)
41645: FINISH_VISCHUNK(o0, f10, f12, g3) 52945: FINISH_VISCHUNK(o0, f10, f12)
41746: FINISH_VISCHUNK(o0, f12, f14, g3) 53046: FINISH_VISCHUNK(o0, f12, f14)
41847: UNEVEN_VISCHUNK(o0, f14, f0, g3) 53147: UNEVEN_VISCHUNK(o0, f14, f0)
41948: FINISH_VISCHUNK(o0, f16, f18, g3) 53248: FINISH_VISCHUNK(o0, f16, f18)
42049: FINISH_VISCHUNK(o0, f18, f20, g3) 53349: FINISH_VISCHUNK(o0, f18, f20)
42150: FINISH_VISCHUNK(o0, f20, f22, g3) 53450: FINISH_VISCHUNK(o0, f20, f22)
42251: FINISH_VISCHUNK(o0, f22, f24, g3) 53551: FINISH_VISCHUNK(o0, f22, f24)
42352: FINISH_VISCHUNK(o0, f24, f26, g3) 53652: FINISH_VISCHUNK(o0, f24, f26)
42453: FINISH_VISCHUNK(o0, f26, f28, g3) 53753: FINISH_VISCHUNK(o0, f26, f28)
42554: FINISH_VISCHUNK(o0, f28, f30, g3) 53854: FINISH_VISCHUNK(o0, f28, f30)
42655: UNEVEN_VISCHUNK(o0, f30, f0, g3) 53955: UNEVEN_VISCHUNK(o0, f30, f0)
42756: FINISH_VISCHUNK(o0, f32, f34, g3) 54056: FINISH_VISCHUNK(o0, f32, f34)
42857: FINISH_VISCHUNK(o0, f34, f36, g3) 54157: FINISH_VISCHUNK(o0, f34, f36)
42958: FINISH_VISCHUNK(o0, f36, f38, g3) 54258: FINISH_VISCHUNK(o0, f36, f38)
43059: FINISH_VISCHUNK(o0, f38, f40, g3) 54359: FINISH_VISCHUNK(o0, f38, f40)
43160: FINISH_VISCHUNK(o0, f40, f42, g3) 54460: FINISH_VISCHUNK(o0, f40, f42)
43261: FINISH_VISCHUNK(o0, f42, f44, g3) 54561: FINISH_VISCHUNK(o0, f42, f44)
43362: FINISH_VISCHUNK(o0, f44, f46, g3) 54662: FINISH_VISCHUNK(o0, f44, f46)
43463: UNEVEN_VISCHUNK_LAST(o0, f46, f0, g3) 54763: UNEVEN_VISCHUNK_LAST(o0, f46, f0)
435 548
43693: EX_LD_FP(LOAD(ldd, %o1, %f2)) 54993: EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_0_fp)
437 add %o1, 8, %o1 550 add %o1, 8, %o1
438 subcc %g3, 8, %g3 551 subcc %g3, 8, %g3
439 faligndata %f0, %f2, %f8 552 faligndata %f0, %f2, %f8
440 EX_ST_FP(STORE(std, %f8, %o0)) 553 EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
441 bl,pn %xcc, 95f 554 bl,pn %xcc, 95f
442 add %o0, 8, %o0 555 add %o0, 8, %o0
443 EX_LD_FP(LOAD(ldd, %o1, %f0)) 556 EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_0_fp)
444 add %o1, 8, %o1 557 add %o1, 8, %o1
445 subcc %g3, 8, %g3 558 subcc %g3, 8, %g3
446 faligndata %f2, %f0, %f8 559 faligndata %f2, %f0, %f8
447 EX_ST_FP(STORE(std, %f8, %o0)) 560 EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
448 bge,pt %xcc, 93b 561 bge,pt %xcc, 93b
449 add %o0, 8, %o0 562 add %o0, 8, %o0
450 563
45195: brz,pt %o2, 2f 56495: brz,pt %o2, 2f
452 mov %g1, %o1 565 mov %g1, %o1
453 566
4541: EX_LD_FP(LOAD(ldub, %o1, %o3)) 5671: EX_LD_FP(LOAD(ldub, %o1, %o3), U1_o2_0_fp)
455 add %o1, 1, %o1 568 add %o1, 1, %o1
456 subcc %o2, 1, %o2 569 subcc %o2, 1, %o2
457 EX_ST_FP(STORE(stb, %o3, %o0)) 570 EX_ST_FP(STORE(stb, %o3, %o0), U1_o2_1_fp)
458 bne,pt %xcc, 1b 571 bne,pt %xcc, 1b
459 add %o0, 1, %o0 572 add %o0, 1, %o0
460 573
@@ -470,27 +583,27 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
470 583
47172: andn %o2, 0xf, %GLOBAL_SPARE 58472: andn %o2, 0xf, %GLOBAL_SPARE
472 and %o2, 0xf, %o2 585 and %o2, 0xf, %o2
4731: EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) 5861: EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U1_gs_0)
474 EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) 587 EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U1_gs_0)
475 subcc %GLOBAL_SPARE, 0x10, %GLOBAL_SPARE 588 subcc %GLOBAL_SPARE, 0x10, %GLOBAL_SPARE
476 EX_ST(STORE(stx, %o5, %o1 + %o3)) 589 EX_ST(STORE(stx, %o5, %o1 + %o3), U1_gs_10)
477 add %o1, 0x8, %o1 590 add %o1, 0x8, %o1
478 EX_ST(STORE(stx, %g1, %o1 + %o3)) 591 EX_ST(STORE(stx, %g1, %o1 + %o3), U1_gs_8)
479 bgu,pt %XCC, 1b 592 bgu,pt %XCC, 1b
480 add %o1, 0x8, %o1 593 add %o1, 0x8, %o1
48173: andcc %o2, 0x8, %g0 59473: andcc %o2, 0x8, %g0
482 be,pt %XCC, 1f 595 be,pt %XCC, 1f
483 nop 596 nop
484 EX_LD(LOAD(ldx, %o1, %o5)) 597 EX_LD(LOAD(ldx, %o1, %o5), U1_o2_0)
485 sub %o2, 0x8, %o2 598 sub %o2, 0x8, %o2
486 EX_ST(STORE(stx, %o5, %o1 + %o3)) 599 EX_ST(STORE(stx, %o5, %o1 + %o3), U1_o2_8)
487 add %o1, 0x8, %o1 600 add %o1, 0x8, %o1
4881: andcc %o2, 0x4, %g0 6011: andcc %o2, 0x4, %g0
489 be,pt %XCC, 1f 602 be,pt %XCC, 1f
490 nop 603 nop
491 EX_LD(LOAD(lduw, %o1, %o5)) 604 EX_LD(LOAD(lduw, %o1, %o5), U1_o2_0)
492 sub %o2, 0x4, %o2 605 sub %o2, 0x4, %o2
493 EX_ST(STORE(stw, %o5, %o1 + %o3)) 606 EX_ST(STORE(stw, %o5, %o1 + %o3), U1_o2_4)
494 add %o1, 0x4, %o1 607 add %o1, 0x4, %o1
4951: cmp %o2, 0 6081: cmp %o2, 0
496 be,pt %XCC, 85f 609 be,pt %XCC, 85f
@@ -504,9 +617,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
504 sub %g0, %g1, %g1 617 sub %g0, %g1, %g1
505 sub %o2, %g1, %o2 618 sub %o2, %g1, %o2
506 619
5071: EX_LD(LOAD(ldub, %o1, %o5)) 6201: EX_LD(LOAD(ldub, %o1, %o5), U1_g1_0)
508 subcc %g1, 1, %g1 621 subcc %g1, 1, %g1
509 EX_ST(STORE(stb, %o5, %o1 + %o3)) 622 EX_ST(STORE(stb, %o5, %o1 + %o3), U1_g1_1)
510 bgu,pt %icc, 1b 623 bgu,pt %icc, 1b
511 add %o1, 1, %o1 624 add %o1, 1, %o1
512 625
@@ -522,16 +635,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
522 635
5238: mov 64, %o3 6368: mov 64, %o3
524 andn %o1, 0x7, %o1 637 andn %o1, 0x7, %o1
525 EX_LD(LOAD(ldx, %o1, %g2)) 638 EX_LD(LOAD(ldx, %o1, %g2), U1_o2_0)
526 sub %o3, %g1, %o3 639 sub %o3, %g1, %o3
527 andn %o2, 0x7, %GLOBAL_SPARE 640 andn %o2, 0x7, %GLOBAL_SPARE
528 sllx %g2, %g1, %g2 641 sllx %g2, %g1, %g2
5291: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) 6421: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U1_gs_0_o2_adj)
530 subcc %GLOBAL_SPARE, 0x8, %GLOBAL_SPARE 643 subcc %GLOBAL_SPARE, 0x8, %GLOBAL_SPARE
531 add %o1, 0x8, %o1 644 add %o1, 0x8, %o1
532 srlx %g3, %o3, %o5 645 srlx %g3, %o3, %o5
533 or %o5, %g2, %o5 646 or %o5, %g2, %o5
534 EX_ST(STORE(stx, %o5, %o0)) 647 EX_ST(STORE(stx, %o5, %o0), U1_gs_8_o2_adj)
535 add %o0, 0x8, %o0 648 add %o0, 0x8, %o0
536 bgu,pt %icc, 1b 649 bgu,pt %icc, 1b
537 sllx %g3, %g1, %g2 650 sllx %g3, %g1, %g2
@@ -549,9 +662,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
549 bne,pn %XCC, 90f 662 bne,pn %XCC, 90f
550 sub %o0, %o1, %o3 663 sub %o0, %o1, %o3
551 664
5521: EX_LD(LOAD(lduw, %o1, %g1)) 6651: EX_LD(LOAD(lduw, %o1, %g1), U1_o2_0)
553 subcc %o2, 4, %o2 666 subcc %o2, 4, %o2
554 EX_ST(STORE(stw, %g1, %o1 + %o3)) 667 EX_ST(STORE(stw, %g1, %o1 + %o3), U1_o2_4)
555 bgu,pt %XCC, 1b 668 bgu,pt %XCC, 1b
556 add %o1, 4, %o1 669 add %o1, 4, %o1
557 670
@@ -559,9 +672,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
559 mov EX_RETVAL(%o4), %o0 672 mov EX_RETVAL(%o4), %o0
560 673
561 .align 32 674 .align 32
56290: EX_LD(LOAD(ldub, %o1, %g1)) 67590: EX_LD(LOAD(ldub, %o1, %g1), U1_o2_0)
563 subcc %o2, 1, %o2 676 subcc %o2, 1, %o2
564 EX_ST(STORE(stb, %g1, %o1 + %o3)) 677 EX_ST(STORE(stb, %g1, %o1 + %o3), U1_o2_1)
565 bgu,pt %XCC, 90b 678 bgu,pt %XCC, 90b
566 add %o1, 1, %o1 679 add %o1, 1, %o1
567 retl 680 retl