aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sparc/lib
diff options
context:
space:
mode:
authorRob Gardner <rob.gardner@oracle.com>2015-12-23 01:24:49 -0500
committerDavid S. Miller <davem@davemloft.net>2015-12-24 12:13:18 -0500
commita7c5724b5c17775ca8ea2fd9906d8a7e37337cce (patch)
tree802877bf40b98d95256551b97ac42492705291d3 /arch/sparc/lib
parent833526941f5945cf0b22a595bb8f3525b512f654 (diff)
sparc64: fix FP corruption in user copy functions
Short story: Exception handlers used by some copy_to_user() and copy_from_user() functions do not diligently clean up floating point register usage, and this can result in a user process seeing invalid values in floating point registers. This sometimes makes the process fail. Long story: Several cpu-specific (NG4, NG2, U1, U3) memcpy functions use floating point registers and VIS alignaddr/faligndata to accelerate data copying when source and dest addresses don't align well. Linux uses a lazy scheme for saving floating point registers; It is not done upon entering the kernel since it's a very expensive operation. Rather, it is done only when needed. If the kernel ends up not using FP regs during the course of some trap or system call, then it can return to user space without saving or restoring them. The various memcpy functions begin their FP code with VISEntry (or a variation thereof), which saves the FP regs. They conclude their FP code with VISExit (or a variation) which essentially marks the FP regs "clean", ie, they contain no unsaved values. fprs.FPRS_FEF is turned off so that a lazy restore will be triggered when/if the user process accesses floating point regs again. The bug is that the user copy variants of memcpy, copy_from_user() and copy_to_user(), employ an exception handling mechanism to detect faults when accessing user space addresses, and when this handler is invoked, an immediate return from the function is forced, and VISExit is not executed, thus leaving the fprs register in an indeterminate state, but often with fprs.FPRS_FEF set and one or more dirty bits. This results in a return to user space with invalid values in the FP regs, and since fprs.FPRS_FEF is on, no lazy restore occurs. This bug affects copy_to_user() and copy_from_user() for NG4, NG2, U3, and U1. All are fixed by using a new exception handler for those loads and stores that are done during the time between VISEnter and VISExit. n.b. In NG4memcpy, the problematic code can be triggered by a copy size greater than 128 bytes and an unaligned source address. This bug is known to be the cause of random user process memory corruptions while perf is running with the callgraph option (ie, perf record -g). This occurs because perf uses copy_from_user() to read user stacks, and may fault when it follows a stack frame pointer off to an invalid page. Validation checks on the stack address just obscure the underlying problem. Signed-off-by: Rob Gardner <rob.gardner@oracle.com> Signed-off-by: Dave Aldridge <david.j.aldridge@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc/lib')
-rw-r--r--arch/sparc/lib/NG2copy_from_user.S8
-rw-r--r--arch/sparc/lib/NG2copy_to_user.S8
-rw-r--r--arch/sparc/lib/NG2memcpy.S118
-rw-r--r--arch/sparc/lib/NG4copy_from_user.S8
-rw-r--r--arch/sparc/lib/NG4copy_to_user.S8
-rw-r--r--arch/sparc/lib/NG4memcpy.S40
-rw-r--r--arch/sparc/lib/U1copy_from_user.S8
-rw-r--r--arch/sparc/lib/U1copy_to_user.S8
-rw-r--r--arch/sparc/lib/U1memcpy.S48
-rw-r--r--arch/sparc/lib/U3copy_from_user.S8
-rw-r--r--arch/sparc/lib/U3copy_to_user.S8
-rw-r--r--arch/sparc/lib/U3memcpy.S86
12 files changed, 222 insertions, 134 deletions
diff --git a/arch/sparc/lib/NG2copy_from_user.S b/arch/sparc/lib/NG2copy_from_user.S
index 119ccb9a54f4..d5242b8c4f94 100644
--- a/arch/sparc/lib/NG2copy_from_user.S
+++ b/arch/sparc/lib/NG2copy_from_user.S
@@ -11,6 +11,14 @@
11 .text; \ 11 .text; \
12 .align 4; 12 .align 4;
13 13
14#define EX_LD_FP(x) \
1598: x; \
16 .section __ex_table,"a";\
17 .align 4; \
18 .word 98b, __retl_one_asi_fp;\
19 .text; \
20 .align 4;
21
14#ifndef ASI_AIUS 22#ifndef ASI_AIUS
15#define ASI_AIUS 0x11 23#define ASI_AIUS 0x11
16#endif 24#endif
diff --git a/arch/sparc/lib/NG2copy_to_user.S b/arch/sparc/lib/NG2copy_to_user.S
index 7fe1ccefd9d0..4e962d993b10 100644
--- a/arch/sparc/lib/NG2copy_to_user.S
+++ b/arch/sparc/lib/NG2copy_to_user.S
@@ -11,6 +11,14 @@
11 .text; \ 11 .text; \
12 .align 4; 12 .align 4;
13 13
14#define EX_ST_FP(x) \
1598: x; \
16 .section __ex_table,"a";\
17 .align 4; \
18 .word 98b, __retl_one_asi_fp;\
19 .text; \
20 .align 4;
21
14#ifndef ASI_AIUS 22#ifndef ASI_AIUS
15#define ASI_AIUS 0x11 23#define ASI_AIUS 0x11
16#endif 24#endif
diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S
index 30eee6e8a81b..d5f585df2f3f 100644
--- a/arch/sparc/lib/NG2memcpy.S
+++ b/arch/sparc/lib/NG2memcpy.S
@@ -34,10 +34,16 @@
34#ifndef EX_LD 34#ifndef EX_LD
35#define EX_LD(x) x 35#define EX_LD(x) x
36#endif 36#endif
37#ifndef EX_LD_FP
38#define EX_LD_FP(x) x
39#endif
37 40
38#ifndef EX_ST 41#ifndef EX_ST
39#define EX_ST(x) x 42#define EX_ST(x) x
40#endif 43#endif
44#ifndef EX_ST_FP
45#define EX_ST_FP(x) x
46#endif
41 47
42#ifndef EX_RETVAL 48#ifndef EX_RETVAL
43#define EX_RETVAL(x) x 49#define EX_RETVAL(x) x
@@ -134,40 +140,40 @@
134 fsrc2 %x6, %f12; \ 140 fsrc2 %x6, %f12; \
135 fsrc2 %x7, %f14; 141 fsrc2 %x7, %f14;
136#define FREG_LOAD_1(base, x0) \ 142#define FREG_LOAD_1(base, x0) \
137 EX_LD(LOAD(ldd, base + 0x00, %x0)) 143 EX_LD_FP(LOAD(ldd, base + 0x00, %x0))
138#define FREG_LOAD_2(base, x0, x1) \ 144#define FREG_LOAD_2(base, x0, x1) \
139 EX_LD(LOAD(ldd, base + 0x00, %x0)); \ 145 EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
140 EX_LD(LOAD(ldd, base + 0x08, %x1)); 146 EX_LD_FP(LOAD(ldd, base + 0x08, %x1));
141#define FREG_LOAD_3(base, x0, x1, x2) \ 147#define FREG_LOAD_3(base, x0, x1, x2) \
142 EX_LD(LOAD(ldd, base + 0x00, %x0)); \ 148 EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
143 EX_LD(LOAD(ldd, base + 0x08, %x1)); \ 149 EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
144 EX_LD(LOAD(ldd, base + 0x10, %x2)); 150 EX_LD_FP(LOAD(ldd, base + 0x10, %x2));
145#define FREG_LOAD_4(base, x0, x1, x2, x3) \ 151#define FREG_LOAD_4(base, x0, x1, x2, x3) \
146 EX_LD(LOAD(ldd, base + 0x00, %x0)); \ 152 EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
147 EX_LD(LOAD(ldd, base + 0x08, %x1)); \ 153 EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
148 EX_LD(LOAD(ldd, base + 0x10, %x2)); \ 154 EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
149 EX_LD(LOAD(ldd, base + 0x18, %x3)); 155 EX_LD_FP(LOAD(ldd, base + 0x18, %x3));
150#define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \ 156#define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \
151 EX_LD(LOAD(ldd, base + 0x00, %x0)); \ 157 EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
152 EX_LD(LOAD(ldd, base + 0x08, %x1)); \ 158 EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
153 EX_LD(LOAD(ldd, base + 0x10, %x2)); \ 159 EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
154 EX_LD(LOAD(ldd, base + 0x18, %x3)); \ 160 EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
155 EX_LD(LOAD(ldd, base + 0x20, %x4)); 161 EX_LD_FP(LOAD(ldd, base + 0x20, %x4));
156#define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \ 162#define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \
157 EX_LD(LOAD(ldd, base + 0x00, %x0)); \ 163 EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
158 EX_LD(LOAD(ldd, base + 0x08, %x1)); \ 164 EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
159 EX_LD(LOAD(ldd, base + 0x10, %x2)); \ 165 EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
160 EX_LD(LOAD(ldd, base + 0x18, %x3)); \ 166 EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
161 EX_LD(LOAD(ldd, base + 0x20, %x4)); \ 167 EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \
162 EX_LD(LOAD(ldd, base + 0x28, %x5)); 168 EX_LD_FP(LOAD(ldd, base + 0x28, %x5));
163#define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \ 169#define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \
164 EX_LD(LOAD(ldd, base + 0x00, %x0)); \ 170 EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
165 EX_LD(LOAD(ldd, base + 0x08, %x1)); \ 171 EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
166 EX_LD(LOAD(ldd, base + 0x10, %x2)); \ 172 EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
167 EX_LD(LOAD(ldd, base + 0x18, %x3)); \ 173 EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
168 EX_LD(LOAD(ldd, base + 0x20, %x4)); \ 174 EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \
169 EX_LD(LOAD(ldd, base + 0x28, %x5)); \ 175 EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); \
170 EX_LD(LOAD(ldd, base + 0x30, %x6)); 176 EX_LD_FP(LOAD(ldd, base + 0x30, %x6));
171 177
172 .register %g2,#scratch 178 .register %g2,#scratch
173 .register %g3,#scratch 179 .register %g3,#scratch
@@ -275,11 +281,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
275 nop 281 nop
276 /* fall through for 0 < low bits < 8 */ 282 /* fall through for 0 < low bits < 8 */
277110: sub %o4, 64, %g2 283110: sub %o4, 64, %g2
278 EX_LD(LOAD_BLK(%g2, %f0)) 284 EX_LD_FP(LOAD_BLK(%g2, %f0))
2791: EX_ST(STORE_INIT(%g0, %o4 + %g3)) 2851: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
280 EX_LD(LOAD_BLK(%o4, %f16)) 286 EX_LD_FP(LOAD_BLK(%o4, %f16))
281 FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16) 287 FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16)
282 EX_ST(STORE_BLK(%f0, %o4 + %g3)) 288 EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
283 FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30) 289 FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30)
284 subcc %g1, 64, %g1 290 subcc %g1, 64, %g1
285 add %o4, 64, %o4 291 add %o4, 64, %o4
@@ -290,10 +296,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
290 296
291120: sub %o4, 56, %g2 297120: sub %o4, 56, %g2
292 FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12) 298 FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12)
2931: EX_ST(STORE_INIT(%g0, %o4 + %g3)) 2991: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
294 EX_LD(LOAD_BLK(%o4, %f16)) 300 EX_LD_FP(LOAD_BLK(%o4, %f16))
295 FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18) 301 FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18)
296 EX_ST(STORE_BLK(%f0, %o4 + %g3)) 302 EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
297 FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30) 303 FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30)
298 subcc %g1, 64, %g1 304 subcc %g1, 64, %g1
299 add %o4, 64, %o4 305 add %o4, 64, %o4
@@ -304,10 +310,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
304 310
305130: sub %o4, 48, %g2 311130: sub %o4, 48, %g2
306 FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10) 312 FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10)
3071: EX_ST(STORE_INIT(%g0, %o4 + %g3)) 3131: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
308 EX_LD(LOAD_BLK(%o4, %f16)) 314 EX_LD_FP(LOAD_BLK(%o4, %f16))
309 FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20) 315 FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20)
310 EX_ST(STORE_BLK(%f0, %o4 + %g3)) 316 EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
311 FREG_MOVE_6(f20, f22, f24, f26, f28, f30) 317 FREG_MOVE_6(f20, f22, f24, f26, f28, f30)
312 subcc %g1, 64, %g1 318 subcc %g1, 64, %g1
313 add %o4, 64, %o4 319 add %o4, 64, %o4
@@ -318,10 +324,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
318 324
319140: sub %o4, 40, %g2 325140: sub %o4, 40, %g2
320 FREG_LOAD_5(%g2, f0, f2, f4, f6, f8) 326 FREG_LOAD_5(%g2, f0, f2, f4, f6, f8)
3211: EX_ST(STORE_INIT(%g0, %o4 + %g3)) 3271: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
322 EX_LD(LOAD_BLK(%o4, %f16)) 328 EX_LD_FP(LOAD_BLK(%o4, %f16))
323 FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22) 329 FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22)
324 EX_ST(STORE_BLK(%f0, %o4 + %g3)) 330 EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
325 FREG_MOVE_5(f22, f24, f26, f28, f30) 331 FREG_MOVE_5(f22, f24, f26, f28, f30)
326 subcc %g1, 64, %g1 332 subcc %g1, 64, %g1
327 add %o4, 64, %o4 333 add %o4, 64, %o4
@@ -332,10 +338,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
332 338
333150: sub %o4, 32, %g2 339150: sub %o4, 32, %g2
334 FREG_LOAD_4(%g2, f0, f2, f4, f6) 340 FREG_LOAD_4(%g2, f0, f2, f4, f6)
3351: EX_ST(STORE_INIT(%g0, %o4 + %g3)) 3411: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
336 EX_LD(LOAD_BLK(%o4, %f16)) 342 EX_LD_FP(LOAD_BLK(%o4, %f16))
337 FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24) 343 FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24)
338 EX_ST(STORE_BLK(%f0, %o4 + %g3)) 344 EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
339 FREG_MOVE_4(f24, f26, f28, f30) 345 FREG_MOVE_4(f24, f26, f28, f30)
340 subcc %g1, 64, %g1 346 subcc %g1, 64, %g1
341 add %o4, 64, %o4 347 add %o4, 64, %o4
@@ -346,10 +352,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
346 352
347160: sub %o4, 24, %g2 353160: sub %o4, 24, %g2
348 FREG_LOAD_3(%g2, f0, f2, f4) 354 FREG_LOAD_3(%g2, f0, f2, f4)
3491: EX_ST(STORE_INIT(%g0, %o4 + %g3)) 3551: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
350 EX_LD(LOAD_BLK(%o4, %f16)) 356 EX_LD_FP(LOAD_BLK(%o4, %f16))
351 FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26) 357 FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26)
352 EX_ST(STORE_BLK(%f0, %o4 + %g3)) 358 EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
353 FREG_MOVE_3(f26, f28, f30) 359 FREG_MOVE_3(f26, f28, f30)
354 subcc %g1, 64, %g1 360 subcc %g1, 64, %g1
355 add %o4, 64, %o4 361 add %o4, 64, %o4
@@ -360,10 +366,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
360 366
361170: sub %o4, 16, %g2 367170: sub %o4, 16, %g2
362 FREG_LOAD_2(%g2, f0, f2) 368 FREG_LOAD_2(%g2, f0, f2)
3631: EX_ST(STORE_INIT(%g0, %o4 + %g3)) 3691: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
364 EX_LD(LOAD_BLK(%o4, %f16)) 370 EX_LD_FP(LOAD_BLK(%o4, %f16))
365 FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28) 371 FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28)
366 EX_ST(STORE_BLK(%f0, %o4 + %g3)) 372 EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
367 FREG_MOVE_2(f28, f30) 373 FREG_MOVE_2(f28, f30)
368 subcc %g1, 64, %g1 374 subcc %g1, 64, %g1
369 add %o4, 64, %o4 375 add %o4, 64, %o4
@@ -374,10 +380,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
374 380
375180: sub %o4, 8, %g2 381180: sub %o4, 8, %g2
376 FREG_LOAD_1(%g2, f0) 382 FREG_LOAD_1(%g2, f0)
3771: EX_ST(STORE_INIT(%g0, %o4 + %g3)) 3831: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
378 EX_LD(LOAD_BLK(%o4, %f16)) 384 EX_LD_FP(LOAD_BLK(%o4, %f16))
379 FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30) 385 FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30)
380 EX_ST(STORE_BLK(%f0, %o4 + %g3)) 386 EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
381 FREG_MOVE_1(f30) 387 FREG_MOVE_1(f30)
382 subcc %g1, 64, %g1 388 subcc %g1, 64, %g1
383 add %o4, 64, %o4 389 add %o4, 64, %o4
@@ -387,10 +393,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
387 nop 393 nop
388 394
389190: 395190:
3901: EX_ST(STORE_INIT(%g0, %o4 + %g3)) 3961: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
391 subcc %g1, 64, %g1 397 subcc %g1, 64, %g1
392 EX_LD(LOAD_BLK(%o4, %f0)) 398 EX_LD_FP(LOAD_BLK(%o4, %f0))
393 EX_ST(STORE_BLK(%f0, %o4 + %g3)) 399 EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
394 add %o4, 64, %o4 400 add %o4, 64, %o4
395 bne,pt %xcc, 1b 401 bne,pt %xcc, 1b
396 LOAD(prefetch, %o4 + 64, #one_read) 402 LOAD(prefetch, %o4 + 64, #one_read)
diff --git a/arch/sparc/lib/NG4copy_from_user.S b/arch/sparc/lib/NG4copy_from_user.S
index fd9f903ffa32..2e8ee7ad07a9 100644
--- a/arch/sparc/lib/NG4copy_from_user.S
+++ b/arch/sparc/lib/NG4copy_from_user.S
@@ -11,6 +11,14 @@
11 .text; \ 11 .text; \
12 .align 4; 12 .align 4;
13 13
14#define EX_LD_FP(x) \
1598: x; \
16 .section __ex_table,"a";\
17 .align 4; \
18 .word 98b, __retl_one_asi_fp;\
19 .text; \
20 .align 4;
21
14#ifndef ASI_AIUS 22#ifndef ASI_AIUS
15#define ASI_AIUS 0x11 23#define ASI_AIUS 0x11
16#endif 24#endif
diff --git a/arch/sparc/lib/NG4copy_to_user.S b/arch/sparc/lib/NG4copy_to_user.S
index 9744c4540a8d..be0bf4590df8 100644
--- a/arch/sparc/lib/NG4copy_to_user.S
+++ b/arch/sparc/lib/NG4copy_to_user.S
@@ -11,6 +11,14 @@
11 .text; \ 11 .text; \
12 .align 4; 12 .align 4;
13 13
14#define EX_ST_FP(x) \
1598: x; \
16 .section __ex_table,"a";\
17 .align 4; \
18 .word 98b, __retl_one_asi_fp;\
19 .text; \
20 .align 4;
21
14#ifndef ASI_AIUS 22#ifndef ASI_AIUS
15#define ASI_AIUS 0x11 23#define ASI_AIUS 0x11
16#endif 24#endif
diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S
index 83aeeb1dffdb..8e13ee1f4454 100644
--- a/arch/sparc/lib/NG4memcpy.S
+++ b/arch/sparc/lib/NG4memcpy.S
@@ -48,10 +48,16 @@
48#ifndef EX_LD 48#ifndef EX_LD
49#define EX_LD(x) x 49#define EX_LD(x) x
50#endif 50#endif
51#ifndef EX_LD_FP
52#define EX_LD_FP(x) x
53#endif
51 54
52#ifndef EX_ST 55#ifndef EX_ST
53#define EX_ST(x) x 56#define EX_ST(x) x
54#endif 57#endif
58#ifndef EX_ST_FP
59#define EX_ST_FP(x) x
60#endif
55 61
56#ifndef EX_RETVAL 62#ifndef EX_RETVAL
57#define EX_RETVAL(x) x 63#define EX_RETVAL(x) x
@@ -210,17 +216,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
210 sub %o2, %o4, %o2 216 sub %o2, %o4, %o2
211 alignaddr %o1, %g0, %g1 217 alignaddr %o1, %g0, %g1
212 add %o1, %o4, %o1 218 add %o1, %o4, %o1
213 EX_LD(LOAD(ldd, %g1 + 0x00, %f0)) 219 EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0))
2141: EX_LD(LOAD(ldd, %g1 + 0x08, %f2)) 2201: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2))
215 subcc %o4, 0x40, %o4 221 subcc %o4, 0x40, %o4
216 EX_LD(LOAD(ldd, %g1 + 0x10, %f4)) 222 EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4))
217 EX_LD(LOAD(ldd, %g1 + 0x18, %f6)) 223 EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6))
218 EX_LD(LOAD(ldd, %g1 + 0x20, %f8)) 224 EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8))
219 EX_LD(LOAD(ldd, %g1 + 0x28, %f10)) 225 EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10))
220 EX_LD(LOAD(ldd, %g1 + 0x30, %f12)) 226 EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12))
221 EX_LD(LOAD(ldd, %g1 + 0x38, %f14)) 227 EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14))
222 faligndata %f0, %f2, %f16 228 faligndata %f0, %f2, %f16
223 EX_LD(LOAD(ldd, %g1 + 0x40, %f0)) 229 EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0))
224 faligndata %f2, %f4, %f18 230 faligndata %f2, %f4, %f18
225 add %g1, 0x40, %g1 231 add %g1, 0x40, %g1
226 faligndata %f4, %f6, %f20 232 faligndata %f4, %f6, %f20
@@ -229,14 +235,14 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
229 faligndata %f10, %f12, %f26 235 faligndata %f10, %f12, %f26
230 faligndata %f12, %f14, %f28 236 faligndata %f12, %f14, %f28
231 faligndata %f14, %f0, %f30 237 faligndata %f14, %f0, %f30
232 EX_ST(STORE(std, %f16, %o0 + 0x00)) 238 EX_ST_FP(STORE(std, %f16, %o0 + 0x00))
233 EX_ST(STORE(std, %f18, %o0 + 0x08)) 239 EX_ST_FP(STORE(std, %f18, %o0 + 0x08))
234 EX_ST(STORE(std, %f20, %o0 + 0x10)) 240 EX_ST_FP(STORE(std, %f20, %o0 + 0x10))
235 EX_ST(STORE(std, %f22, %o0 + 0x18)) 241 EX_ST_FP(STORE(std, %f22, %o0 + 0x18))
236 EX_ST(STORE(std, %f24, %o0 + 0x20)) 242 EX_ST_FP(STORE(std, %f24, %o0 + 0x20))
237 EX_ST(STORE(std, %f26, %o0 + 0x28)) 243 EX_ST_FP(STORE(std, %f26, %o0 + 0x28))
238 EX_ST(STORE(std, %f28, %o0 + 0x30)) 244 EX_ST_FP(STORE(std, %f28, %o0 + 0x30))
239 EX_ST(STORE(std, %f30, %o0 + 0x38)) 245 EX_ST_FP(STORE(std, %f30, %o0 + 0x38))
240 add %o0, 0x40, %o0 246 add %o0, 0x40, %o0
241 bne,pt %icc, 1b 247 bne,pt %icc, 1b
242 LOAD(prefetch, %g1 + 0x200, #n_reads_strong) 248 LOAD(prefetch, %g1 + 0x200, #n_reads_strong)
diff --git a/arch/sparc/lib/U1copy_from_user.S b/arch/sparc/lib/U1copy_from_user.S
index a6ae2ea04bf5..ecc5692fa2b4 100644
--- a/arch/sparc/lib/U1copy_from_user.S
+++ b/arch/sparc/lib/U1copy_from_user.S
@@ -11,6 +11,14 @@
11 .text; \ 11 .text; \
12 .align 4; 12 .align 4;
13 13
14#define EX_LD_FP(x) \
1598: x; \
16 .section __ex_table,"a";\
17 .align 4; \
18 .word 98b, __retl_one_fp;\
19 .text; \
20 .align 4;
21
14#define FUNC_NAME ___copy_from_user 22#define FUNC_NAME ___copy_from_user
15#define LOAD(type,addr,dest) type##a [addr] %asi, dest 23#define LOAD(type,addr,dest) type##a [addr] %asi, dest
16#define LOAD_BLK(addr,dest) ldda [addr] ASI_BLK_AIUS, dest 24#define LOAD_BLK(addr,dest) ldda [addr] ASI_BLK_AIUS, dest
diff --git a/arch/sparc/lib/U1copy_to_user.S b/arch/sparc/lib/U1copy_to_user.S
index f4b970eeb485..9eea392e44d4 100644
--- a/arch/sparc/lib/U1copy_to_user.S
+++ b/arch/sparc/lib/U1copy_to_user.S
@@ -11,6 +11,14 @@
11 .text; \ 11 .text; \
12 .align 4; 12 .align 4;
13 13
14#define EX_ST_FP(x) \
1598: x; \
16 .section __ex_table,"a";\
17 .align 4; \
18 .word 98b, __retl_one_fp;\
19 .text; \
20 .align 4;
21
14#define FUNC_NAME ___copy_to_user 22#define FUNC_NAME ___copy_to_user
15#define STORE(type,src,addr) type##a src, [addr] ASI_AIUS 23#define STORE(type,src,addr) type##a src, [addr] ASI_AIUS
16#define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_AIUS 24#define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_AIUS
diff --git a/arch/sparc/lib/U1memcpy.S b/arch/sparc/lib/U1memcpy.S
index b67142b7768e..3e6209ebb7d7 100644
--- a/arch/sparc/lib/U1memcpy.S
+++ b/arch/sparc/lib/U1memcpy.S
@@ -25,10 +25,16 @@
25#ifndef EX_LD 25#ifndef EX_LD
26#define EX_LD(x) x 26#define EX_LD(x) x
27#endif 27#endif
28#ifndef EX_LD_FP
29#define EX_LD_FP(x) x
30#endif
28 31
29#ifndef EX_ST 32#ifndef EX_ST
30#define EX_ST(x) x 33#define EX_ST(x) x
31#endif 34#endif
35#ifndef EX_ST_FP
36#define EX_ST_FP(x) x
37#endif
32 38
33#ifndef EX_RETVAL 39#ifndef EX_RETVAL
34#define EX_RETVAL(x) x 40#define EX_RETVAL(x) x
@@ -73,8 +79,8 @@
73 faligndata %f8, %f9, %f62; 79 faligndata %f8, %f9, %f62;
74 80
75#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \ 81#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \
76 EX_LD(LOAD_BLK(%src, %fdest)); \ 82 EX_LD_FP(LOAD_BLK(%src, %fdest)); \
77 EX_ST(STORE_BLK(%fsrc, %dest)); \ 83 EX_ST_FP(STORE_BLK(%fsrc, %dest)); \
78 add %src, 0x40, %src; \ 84 add %src, 0x40, %src; \
79 subcc %len, 0x40, %len; \ 85 subcc %len, 0x40, %len; \
80 be,pn %xcc, jmptgt; \ 86 be,pn %xcc, jmptgt; \
@@ -89,12 +95,12 @@
89 95
90#define DO_SYNC membar #Sync; 96#define DO_SYNC membar #Sync;
91#define STORE_SYNC(dest, fsrc) \ 97#define STORE_SYNC(dest, fsrc) \
92 EX_ST(STORE_BLK(%fsrc, %dest)); \ 98 EX_ST_FP(STORE_BLK(%fsrc, %dest)); \
93 add %dest, 0x40, %dest; \ 99 add %dest, 0x40, %dest; \
94 DO_SYNC 100 DO_SYNC
95 101
96#define STORE_JUMP(dest, fsrc, target) \ 102#define STORE_JUMP(dest, fsrc, target) \
97 EX_ST(STORE_BLK(%fsrc, %dest)); \ 103 EX_ST_FP(STORE_BLK(%fsrc, %dest)); \
98 add %dest, 0x40, %dest; \ 104 add %dest, 0x40, %dest; \
99 ba,pt %xcc, target; \ 105 ba,pt %xcc, target; \
100 nop; 106 nop;
@@ -103,7 +109,7 @@
103 subcc %left, 8, %left;\ 109 subcc %left, 8, %left;\
104 bl,pn %xcc, 95f; \ 110 bl,pn %xcc, 95f; \
105 faligndata %f0, %f1, %f48; \ 111 faligndata %f0, %f1, %f48; \
106 EX_ST(STORE(std, %f48, %dest)); \ 112 EX_ST_FP(STORE(std, %f48, %dest)); \
107 add %dest, 8, %dest; 113 add %dest, 8, %dest;
108 114
109#define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ 115#define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \
@@ -160,8 +166,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
160 and %g2, 0x38, %g2 166 and %g2, 0x38, %g2
161 167
1621: subcc %g1, 0x1, %g1 1681: subcc %g1, 0x1, %g1
163 EX_LD(LOAD(ldub, %o1 + 0x00, %o3)) 169 EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3))
164 EX_ST(STORE(stb, %o3, %o1 + %GLOBAL_SPARE)) 170 EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE))
165 bgu,pt %XCC, 1b 171 bgu,pt %XCC, 1b
166 add %o1, 0x1, %o1 172 add %o1, 0x1, %o1
167 173
@@ -172,20 +178,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
172 be,pt %icc, 3f 178 be,pt %icc, 3f
173 alignaddr %o1, %g0, %o1 179 alignaddr %o1, %g0, %o1
174 180
175 EX_LD(LOAD(ldd, %o1, %f4)) 181 EX_LD_FP(LOAD(ldd, %o1, %f4))
1761: EX_LD(LOAD(ldd, %o1 + 0x8, %f6)) 1821: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6))
177 add %o1, 0x8, %o1 183 add %o1, 0x8, %o1
178 subcc %g2, 0x8, %g2 184 subcc %g2, 0x8, %g2
179 faligndata %f4, %f6, %f0 185 faligndata %f4, %f6, %f0
180 EX_ST(STORE(std, %f0, %o0)) 186 EX_ST_FP(STORE(std, %f0, %o0))
181 be,pn %icc, 3f 187 be,pn %icc, 3f
182 add %o0, 0x8, %o0 188 add %o0, 0x8, %o0
183 189
184 EX_LD(LOAD(ldd, %o1 + 0x8, %f4)) 190 EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4))
185 add %o1, 0x8, %o1 191 add %o1, 0x8, %o1
186 subcc %g2, 0x8, %g2 192 subcc %g2, 0x8, %g2
187 faligndata %f6, %f4, %f0 193 faligndata %f6, %f4, %f0
188 EX_ST(STORE(std, %f0, %o0)) 194 EX_ST_FP(STORE(std, %f0, %o0))
189 bne,pt %icc, 1b 195 bne,pt %icc, 1b
190 add %o0, 0x8, %o0 196 add %o0, 0x8, %o0
191 197
@@ -208,13 +214,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
208 add %g1, %GLOBAL_SPARE, %g1 214 add %g1, %GLOBAL_SPARE, %g1
209 subcc %o2, %g3, %o2 215 subcc %o2, %g3, %o2
210 216
211 EX_LD(LOAD_BLK(%o1, %f0)) 217 EX_LD_FP(LOAD_BLK(%o1, %f0))
212 add %o1, 0x40, %o1 218 add %o1, 0x40, %o1
213 add %g1, %g3, %g1 219 add %g1, %g3, %g1
214 EX_LD(LOAD_BLK(%o1, %f16)) 220 EX_LD_FP(LOAD_BLK(%o1, %f16))
215 add %o1, 0x40, %o1 221 add %o1, 0x40, %o1
216 sub %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE 222 sub %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
217 EX_LD(LOAD_BLK(%o1, %f32)) 223 EX_LD_FP(LOAD_BLK(%o1, %f32))
218 add %o1, 0x40, %o1 224 add %o1, 0x40, %o1
219 225
220 /* There are 8 instances of the unrolled loop, 226 /* There are 8 instances of the unrolled loop,
@@ -426,28 +432,28 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
42662: FINISH_VISCHUNK(o0, f44, f46, g3) 43262: FINISH_VISCHUNK(o0, f44, f46, g3)
42763: UNEVEN_VISCHUNK_LAST(o0, f46, f0, g3) 43363: UNEVEN_VISCHUNK_LAST(o0, f46, f0, g3)
428 434
42993: EX_LD(LOAD(ldd, %o1, %f2)) 43593: EX_LD_FP(LOAD(ldd, %o1, %f2))
430 add %o1, 8, %o1 436 add %o1, 8, %o1
431 subcc %g3, 8, %g3 437 subcc %g3, 8, %g3
432 faligndata %f0, %f2, %f8 438 faligndata %f0, %f2, %f8
433 EX_ST(STORE(std, %f8, %o0)) 439 EX_ST_FP(STORE(std, %f8, %o0))
434 bl,pn %xcc, 95f 440 bl,pn %xcc, 95f
435 add %o0, 8, %o0 441 add %o0, 8, %o0
436 EX_LD(LOAD(ldd, %o1, %f0)) 442 EX_LD_FP(LOAD(ldd, %o1, %f0))
437 add %o1, 8, %o1 443 add %o1, 8, %o1
438 subcc %g3, 8, %g3 444 subcc %g3, 8, %g3
439 faligndata %f2, %f0, %f8 445 faligndata %f2, %f0, %f8
440 EX_ST(STORE(std, %f8, %o0)) 446 EX_ST_FP(STORE(std, %f8, %o0))
441 bge,pt %xcc, 93b 447 bge,pt %xcc, 93b
442 add %o0, 8, %o0 448 add %o0, 8, %o0
443 449
44495: brz,pt %o2, 2f 45095: brz,pt %o2, 2f
445 mov %g1, %o1 451 mov %g1, %o1
446 452
4471: EX_LD(LOAD(ldub, %o1, %o3)) 4531: EX_LD_FP(LOAD(ldub, %o1, %o3))
448 add %o1, 1, %o1 454 add %o1, 1, %o1
449 subcc %o2, 1, %o2 455 subcc %o2, 1, %o2
450 EX_ST(STORE(stb, %o3, %o0)) 456 EX_ST_FP(STORE(stb, %o3, %o0))
451 bne,pt %xcc, 1b 457 bne,pt %xcc, 1b
452 add %o0, 1, %o0 458 add %o0, 1, %o0
453 459
diff --git a/arch/sparc/lib/U3copy_from_user.S b/arch/sparc/lib/U3copy_from_user.S
index b1acd1331c33..88ad73d86fe4 100644
--- a/arch/sparc/lib/U3copy_from_user.S
+++ b/arch/sparc/lib/U3copy_from_user.S
@@ -11,6 +11,14 @@
11 .text; \ 11 .text; \
12 .align 4; 12 .align 4;
13 13
14#define EX_LD_FP(x) \
1598: x; \
16 .section __ex_table,"a";\
17 .align 4; \
18 .word 98b, __retl_one_fp;\
19 .text; \
20 .align 4;
21
14#define FUNC_NAME U3copy_from_user 22#define FUNC_NAME U3copy_from_user
15#define LOAD(type,addr,dest) type##a [addr] %asi, dest 23#define LOAD(type,addr,dest) type##a [addr] %asi, dest
16#define EX_RETVAL(x) 0 24#define EX_RETVAL(x) 0
diff --git a/arch/sparc/lib/U3copy_to_user.S b/arch/sparc/lib/U3copy_to_user.S
index ef1e493afdfa..845139d75537 100644
--- a/arch/sparc/lib/U3copy_to_user.S
+++ b/arch/sparc/lib/U3copy_to_user.S
@@ -11,6 +11,14 @@
11 .text; \ 11 .text; \
12 .align 4; 12 .align 4;
13 13
14#define EX_ST_FP(x) \
1598: x; \
16 .section __ex_table,"a";\
17 .align 4; \
18 .word 98b, __retl_one_fp;\
19 .text; \
20 .align 4;
21
14#define FUNC_NAME U3copy_to_user 22#define FUNC_NAME U3copy_to_user
15#define STORE(type,src,addr) type##a src, [addr] ASI_AIUS 23#define STORE(type,src,addr) type##a src, [addr] ASI_AIUS
16#define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_AIUS 24#define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_AIUS
diff --git a/arch/sparc/lib/U3memcpy.S b/arch/sparc/lib/U3memcpy.S
index 7cae9cc6a204..491ee69e4995 100644
--- a/arch/sparc/lib/U3memcpy.S
+++ b/arch/sparc/lib/U3memcpy.S
@@ -24,10 +24,16 @@
24#ifndef EX_LD 24#ifndef EX_LD
25#define EX_LD(x) x 25#define EX_LD(x) x
26#endif 26#endif
27#ifndef EX_LD_FP
28#define EX_LD_FP(x) x
29#endif
27 30
28#ifndef EX_ST 31#ifndef EX_ST
29#define EX_ST(x) x 32#define EX_ST(x) x
30#endif 33#endif
34#ifndef EX_ST_FP
35#define EX_ST_FP(x) x
36#endif
31 37
32#ifndef EX_RETVAL 38#ifndef EX_RETVAL
33#define EX_RETVAL(x) x 39#define EX_RETVAL(x) x
@@ -120,8 +126,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
120 and %g2, 0x38, %g2 126 and %g2, 0x38, %g2
121 127
1221: subcc %g1, 0x1, %g1 1281: subcc %g1, 0x1, %g1
123 EX_LD(LOAD(ldub, %o1 + 0x00, %o3)) 129 EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3))
124 EX_ST(STORE(stb, %o3, %o1 + GLOBAL_SPARE)) 130 EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE))
125 bgu,pt %XCC, 1b 131 bgu,pt %XCC, 1b
126 add %o1, 0x1, %o1 132 add %o1, 0x1, %o1
127 133
@@ -132,20 +138,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
132 be,pt %icc, 3f 138 be,pt %icc, 3f
133 alignaddr %o1, %g0, %o1 139 alignaddr %o1, %g0, %o1
134 140
135 EX_LD(LOAD(ldd, %o1, %f4)) 141 EX_LD_FP(LOAD(ldd, %o1, %f4))
1361: EX_LD(LOAD(ldd, %o1 + 0x8, %f6)) 1421: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6))
137 add %o1, 0x8, %o1 143 add %o1, 0x8, %o1
138 subcc %g2, 0x8, %g2 144 subcc %g2, 0x8, %g2
139 faligndata %f4, %f6, %f0 145 faligndata %f4, %f6, %f0
140 EX_ST(STORE(std, %f0, %o0)) 146 EX_ST_FP(STORE(std, %f0, %o0))
141 be,pn %icc, 3f 147 be,pn %icc, 3f
142 add %o0, 0x8, %o0 148 add %o0, 0x8, %o0
143 149
144 EX_LD(LOAD(ldd, %o1 + 0x8, %f4)) 150 EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4))
145 add %o1, 0x8, %o1 151 add %o1, 0x8, %o1
146 subcc %g2, 0x8, %g2 152 subcc %g2, 0x8, %g2
147 faligndata %f6, %f4, %f2 153 faligndata %f6, %f4, %f2
148 EX_ST(STORE(std, %f2, %o0)) 154 EX_ST_FP(STORE(std, %f2, %o0))
149 bne,pt %icc, 1b 155 bne,pt %icc, 1b
150 add %o0, 0x8, %o0 156 add %o0, 0x8, %o0
151 157
@@ -155,25 +161,25 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
155 LOAD(prefetch, %o1 + 0x080, #one_read) 161 LOAD(prefetch, %o1 + 0x080, #one_read)
156 LOAD(prefetch, %o1 + 0x0c0, #one_read) 162 LOAD(prefetch, %o1 + 0x0c0, #one_read)
157 LOAD(prefetch, %o1 + 0x100, #one_read) 163 LOAD(prefetch, %o1 + 0x100, #one_read)
158 EX_LD(LOAD(ldd, %o1 + 0x000, %f0)) 164 EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0))
159 LOAD(prefetch, %o1 + 0x140, #one_read) 165 LOAD(prefetch, %o1 + 0x140, #one_read)
160 EX_LD(LOAD(ldd, %o1 + 0x008, %f2)) 166 EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
161 LOAD(prefetch, %o1 + 0x180, #one_read) 167 LOAD(prefetch, %o1 + 0x180, #one_read)
162 EX_LD(LOAD(ldd, %o1 + 0x010, %f4)) 168 EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
163 LOAD(prefetch, %o1 + 0x1c0, #one_read) 169 LOAD(prefetch, %o1 + 0x1c0, #one_read)
164 faligndata %f0, %f2, %f16 170 faligndata %f0, %f2, %f16
165 EX_LD(LOAD(ldd, %o1 + 0x018, %f6)) 171 EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
166 faligndata %f2, %f4, %f18 172 faligndata %f2, %f4, %f18
167 EX_LD(LOAD(ldd, %o1 + 0x020, %f8)) 173 EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
168 faligndata %f4, %f6, %f20 174 faligndata %f4, %f6, %f20
169 EX_LD(LOAD(ldd, %o1 + 0x028, %f10)) 175 EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
170 faligndata %f6, %f8, %f22 176 faligndata %f6, %f8, %f22
171 177
172 EX_LD(LOAD(ldd, %o1 + 0x030, %f12)) 178 EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
173 faligndata %f8, %f10, %f24 179 faligndata %f8, %f10, %f24
174 EX_LD(LOAD(ldd, %o1 + 0x038, %f14)) 180 EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
175 faligndata %f10, %f12, %f26 181 faligndata %f10, %f12, %f26
176 EX_LD(LOAD(ldd, %o1 + 0x040, %f0)) 182 EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
177 183
178 subcc GLOBAL_SPARE, 0x80, GLOBAL_SPARE 184 subcc GLOBAL_SPARE, 0x80, GLOBAL_SPARE
179 add %o1, 0x40, %o1 185 add %o1, 0x40, %o1
@@ -184,26 +190,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
184 190
185 .align 64 191 .align 64
1861: 1921:
187 EX_LD(LOAD(ldd, %o1 + 0x008, %f2)) 193 EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
188 faligndata %f12, %f14, %f28 194 faligndata %f12, %f14, %f28
189 EX_LD(LOAD(ldd, %o1 + 0x010, %f4)) 195 EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
190 faligndata %f14, %f0, %f30 196 faligndata %f14, %f0, %f30
191 EX_ST(STORE_BLK(%f16, %o0)) 197 EX_ST_FP(STORE_BLK(%f16, %o0))
192 EX_LD(LOAD(ldd, %o1 + 0x018, %f6)) 198 EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
193 faligndata %f0, %f2, %f16 199 faligndata %f0, %f2, %f16
194 add %o0, 0x40, %o0 200 add %o0, 0x40, %o0
195 201
196 EX_LD(LOAD(ldd, %o1 + 0x020, %f8)) 202 EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
197 faligndata %f2, %f4, %f18 203 faligndata %f2, %f4, %f18
198 EX_LD(LOAD(ldd, %o1 + 0x028, %f10)) 204 EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
199 faligndata %f4, %f6, %f20 205 faligndata %f4, %f6, %f20
200 EX_LD(LOAD(ldd, %o1 + 0x030, %f12)) 206 EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
201 subcc %o3, 0x01, %o3 207 subcc %o3, 0x01, %o3
202 faligndata %f6, %f8, %f22 208 faligndata %f6, %f8, %f22
203 EX_LD(LOAD(ldd, %o1 + 0x038, %f14)) 209 EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
204 210
205 faligndata %f8, %f10, %f24 211 faligndata %f8, %f10, %f24
206 EX_LD(LOAD(ldd, %o1 + 0x040, %f0)) 212 EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
207 LOAD(prefetch, %o1 + 0x1c0, #one_read) 213 LOAD(prefetch, %o1 + 0x1c0, #one_read)
208 faligndata %f10, %f12, %f26 214 faligndata %f10, %f12, %f26
209 bg,pt %XCC, 1b 215 bg,pt %XCC, 1b
@@ -211,29 +217,29 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
211 217
212 /* Finally we copy the last full 64-byte block. */ 218 /* Finally we copy the last full 64-byte block. */
2132: 2192:
214 EX_LD(LOAD(ldd, %o1 + 0x008, %f2)) 220 EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
215 faligndata %f12, %f14, %f28 221 faligndata %f12, %f14, %f28
216 EX_LD(LOAD(ldd, %o1 + 0x010, %f4)) 222 EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
217 faligndata %f14, %f0, %f30 223 faligndata %f14, %f0, %f30
218 EX_ST(STORE_BLK(%f16, %o0)) 224 EX_ST_FP(STORE_BLK(%f16, %o0))
219 EX_LD(LOAD(ldd, %o1 + 0x018, %f6)) 225 EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
220 faligndata %f0, %f2, %f16 226 faligndata %f0, %f2, %f16
221 EX_LD(LOAD(ldd, %o1 + 0x020, %f8)) 227 EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
222 faligndata %f2, %f4, %f18 228 faligndata %f2, %f4, %f18
223 EX_LD(LOAD(ldd, %o1 + 0x028, %f10)) 229 EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
224 faligndata %f4, %f6, %f20 230 faligndata %f4, %f6, %f20
225 EX_LD(LOAD(ldd, %o1 + 0x030, %f12)) 231 EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
226 faligndata %f6, %f8, %f22 232 faligndata %f6, %f8, %f22
227 EX_LD(LOAD(ldd, %o1 + 0x038, %f14)) 233 EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
228 faligndata %f8, %f10, %f24 234 faligndata %f8, %f10, %f24
229 cmp %g1, 0 235 cmp %g1, 0
230 be,pt %XCC, 1f 236 be,pt %XCC, 1f
231 add %o0, 0x40, %o0 237 add %o0, 0x40, %o0
232 EX_LD(LOAD(ldd, %o1 + 0x040, %f0)) 238 EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
2331: faligndata %f10, %f12, %f26 2391: faligndata %f10, %f12, %f26
234 faligndata %f12, %f14, %f28 240 faligndata %f12, %f14, %f28
235 faligndata %f14, %f0, %f30 241 faligndata %f14, %f0, %f30
236 EX_ST(STORE_BLK(%f16, %o0)) 242 EX_ST_FP(STORE_BLK(%f16, %o0))
237 add %o0, 0x40, %o0 243 add %o0, 0x40, %o0
238 add %o1, 0x40, %o1 244 add %o1, 0x40, %o1
239 membar #Sync 245 membar #Sync
@@ -253,20 +259,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
253 259
254 sub %o2, %g2, %o2 260 sub %o2, %g2, %o2
255 be,a,pt %XCC, 1f 261 be,a,pt %XCC, 1f
256 EX_LD(LOAD(ldd, %o1 + 0x00, %f0)) 262 EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0))
257 263
2581: EX_LD(LOAD(ldd, %o1 + 0x08, %f2)) 2641: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2))
259 add %o1, 0x8, %o1 265 add %o1, 0x8, %o1
260 subcc %g2, 0x8, %g2 266 subcc %g2, 0x8, %g2
261 faligndata %f0, %f2, %f8 267 faligndata %f0, %f2, %f8
262 EX_ST(STORE(std, %f8, %o0)) 268 EX_ST_FP(STORE(std, %f8, %o0))
263 be,pn %XCC, 2f 269 be,pn %XCC, 2f
264 add %o0, 0x8, %o0 270 add %o0, 0x8, %o0
265 EX_LD(LOAD(ldd, %o1 + 0x08, %f0)) 271 EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0))
266 add %o1, 0x8, %o1 272 add %o1, 0x8, %o1
267 subcc %g2, 0x8, %g2 273 subcc %g2, 0x8, %g2
268 faligndata %f2, %f0, %f8 274 faligndata %f2, %f0, %f8
269 EX_ST(STORE(std, %f8, %o0)) 275 EX_ST_FP(STORE(std, %f8, %o0))
270 bne,pn %XCC, 1b 276 bne,pn %XCC, 1b
271 add %o0, 0x8, %o0 277 add %o0, 0x8, %o0
272 278