diff options
author | David S. Miller <davem@davemloft.net> | 2016-10-24 21:58:05 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-10-24 21:58:05 -0400 |
commit | 95707704800988093a9b9a27e0f2f67f5b4bf2fa (patch) | |
tree | e910ba46194a2fbb5d5b6693b2c17514d8fbd8b3 | |
parent | cb736fdbb208eb3420f1a2eb2bfc024a6e9dcada (diff) |
sparc64: Convert NG4copy_{from,to}_user to accurate exception reporting.
Report the exact number of bytes which have not been successfully
copied when an exception occurs, using the running remaining length.
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | arch/sparc/lib/NG4copy_from_user.S | 8 | ||||
-rw-r--r-- | arch/sparc/lib/NG4copy_to_user.S | 8 | ||||
-rw-r--r-- | arch/sparc/lib/NG4memcpy.S | 294 |
3 files changed, 231 insertions, 79 deletions
diff --git a/arch/sparc/lib/NG4copy_from_user.S b/arch/sparc/lib/NG4copy_from_user.S index f9746e7cf25e..16a286c1a528 100644 --- a/arch/sparc/lib/NG4copy_from_user.S +++ b/arch/sparc/lib/NG4copy_from_user.S | |||
@@ -3,19 +3,19 @@ | |||
3 | * Copyright (C) 2012 David S. Miller (davem@davemloft.net) | 3 | * Copyright (C) 2012 David S. Miller (davem@davemloft.net) |
4 | */ | 4 | */ |
5 | 5 | ||
6 | #define EX_LD(x) \ | 6 | #define EX_LD(x, y) \ |
7 | 98: x; \ | 7 | 98: x; \ |
8 | .section __ex_table,"a";\ | 8 | .section __ex_table,"a";\ |
9 | .align 4; \ | 9 | .align 4; \ |
10 | .word 98b, __retl_mone_asi;\ | 10 | .word 98b, y; \ |
11 | .text; \ | 11 | .text; \ |
12 | .align 4; | 12 | .align 4; |
13 | 13 | ||
14 | #define EX_LD_FP(x) \ | 14 | #define EX_LD_FP(x,y) \ |
15 | 98: x; \ | 15 | 98: x; \ |
16 | .section __ex_table,"a";\ | 16 | .section __ex_table,"a";\ |
17 | .align 4; \ | 17 | .align 4; \ |
18 | .word 98b, __retl_mone_asi_fp;\ | 18 | .word 98b, y##_fp; \ |
19 | .text; \ | 19 | .text; \ |
20 | .align 4; | 20 | .align 4; |
21 | 21 | ||
diff --git a/arch/sparc/lib/NG4copy_to_user.S b/arch/sparc/lib/NG4copy_to_user.S index 5fa44349adde..6b0276ffc858 100644 --- a/arch/sparc/lib/NG4copy_to_user.S +++ b/arch/sparc/lib/NG4copy_to_user.S | |||
@@ -3,19 +3,19 @@ | |||
3 | * Copyright (C) 2012 David S. Miller (davem@davemloft.net) | 3 | * Copyright (C) 2012 David S. Miller (davem@davemloft.net) |
4 | */ | 4 | */ |
5 | 5 | ||
6 | #define EX_ST(x) \ | 6 | #define EX_ST(x,y) \ |
7 | 98: x; \ | 7 | 98: x; \ |
8 | .section __ex_table,"a";\ | 8 | .section __ex_table,"a";\ |
9 | .align 4; \ | 9 | .align 4; \ |
10 | .word 98b, __retl_mone_asi;\ | 10 | .word 98b, y; \ |
11 | .text; \ | 11 | .text; \ |
12 | .align 4; | 12 | .align 4; |
13 | 13 | ||
14 | #define EX_ST_FP(x) \ | 14 | #define EX_ST_FP(x,y) \ |
15 | 98: x; \ | 15 | 98: x; \ |
16 | .section __ex_table,"a";\ | 16 | .section __ex_table,"a";\ |
17 | .align 4; \ | 17 | .align 4; \ |
18 | .word 98b, __retl_mone_asi_fp;\ | 18 | .word 98b, y##_fp; \ |
19 | .text; \ | 19 | .text; \ |
20 | .align 4; | 20 | .align 4; |
21 | 21 | ||
diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S index 8e13ee1f4454..75bb93b1437f 100644 --- a/arch/sparc/lib/NG4memcpy.S +++ b/arch/sparc/lib/NG4memcpy.S | |||
@@ -4,6 +4,7 @@ | |||
4 | */ | 4 | */ |
5 | 5 | ||
6 | #ifdef __KERNEL__ | 6 | #ifdef __KERNEL__ |
7 | #include <linux/linkage.h> | ||
7 | #include <asm/visasm.h> | 8 | #include <asm/visasm.h> |
8 | #include <asm/asi.h> | 9 | #include <asm/asi.h> |
9 | #define GLOBAL_SPARE %g7 | 10 | #define GLOBAL_SPARE %g7 |
@@ -46,22 +47,19 @@ | |||
46 | #endif | 47 | #endif |
47 | 48 | ||
48 | #ifndef EX_LD | 49 | #ifndef EX_LD |
49 | #define EX_LD(x) x | 50 | #define EX_LD(x,y) x |
50 | #endif | 51 | #endif |
51 | #ifndef EX_LD_FP | 52 | #ifndef EX_LD_FP |
52 | #define EX_LD_FP(x) x | 53 | #define EX_LD_FP(x,y) x |
53 | #endif | 54 | #endif |
54 | 55 | ||
55 | #ifndef EX_ST | 56 | #ifndef EX_ST |
56 | #define EX_ST(x) x | 57 | #define EX_ST(x,y) x |
57 | #endif | 58 | #endif |
58 | #ifndef EX_ST_FP | 59 | #ifndef EX_ST_FP |
59 | #define EX_ST_FP(x) x | 60 | #define EX_ST_FP(x,y) x |
60 | #endif | 61 | #endif |
61 | 62 | ||
62 | #ifndef EX_RETVAL | ||
63 | #define EX_RETVAL(x) x | ||
64 | #endif | ||
65 | 63 | ||
66 | #ifndef LOAD | 64 | #ifndef LOAD |
67 | #define LOAD(type,addr,dest) type [addr], dest | 65 | #define LOAD(type,addr,dest) type [addr], dest |
@@ -94,6 +92,158 @@ | |||
94 | .register %g3,#scratch | 92 | .register %g3,#scratch |
95 | 93 | ||
96 | .text | 94 | .text |
95 | #ifndef EX_RETVAL | ||
96 | #define EX_RETVAL(x) x | ||
97 | __restore_asi_fp: | ||
98 | VISExitHalf | ||
99 | __restore_asi: | ||
100 | retl | ||
101 | wr %g0, ASI_AIUS, %asi | ||
102 | |||
103 | ENTRY(NG4_retl_o2) | ||
104 | ba,pt %xcc, __restore_asi | ||
105 | mov %o2, %o0 | ||
106 | ENDPROC(NG4_retl_o2) | ||
107 | ENTRY(NG4_retl_o2_plus_1) | ||
108 | ba,pt %xcc, __restore_asi | ||
109 | add %o2, 1, %o0 | ||
110 | ENDPROC(NG4_retl_o2_plus_1) | ||
111 | ENTRY(NG4_retl_o2_plus_4) | ||
112 | ba,pt %xcc, __restore_asi | ||
113 | add %o2, 4, %o0 | ||
114 | ENDPROC(NG4_retl_o2_plus_4) | ||
115 | ENTRY(NG4_retl_o2_plus_o5) | ||
116 | ba,pt %xcc, __restore_asi | ||
117 | add %o2, %o5, %o0 | ||
118 | ENDPROC(NG4_retl_o2_plus_o5) | ||
119 | ENTRY(NG4_retl_o2_plus_o5_plus_4) | ||
120 | add %o5, 4, %o5 | ||
121 | ba,pt %xcc, __restore_asi | ||
122 | add %o2, %o5, %o0 | ||
123 | ENDPROC(NG4_retl_o2_plus_o5_plus_4) | ||
124 | ENTRY(NG4_retl_o2_plus_o5_plus_8) | ||
125 | add %o5, 8, %o5 | ||
126 | ba,pt %xcc, __restore_asi | ||
127 | add %o2, %o5, %o0 | ||
128 | ENDPROC(NG4_retl_o2_plus_o5_plus_8) | ||
129 | ENTRY(NG4_retl_o2_plus_o5_plus_16) | ||
130 | add %o5, 16, %o5 | ||
131 | ba,pt %xcc, __restore_asi | ||
132 | add %o2, %o5, %o0 | ||
133 | ENDPROC(NG4_retl_o2_plus_o5_plus_16) | ||
134 | ENTRY(NG4_retl_o2_plus_o5_plus_24) | ||
135 | add %o5, 24, %o5 | ||
136 | ba,pt %xcc, __restore_asi | ||
137 | add %o2, %o5, %o0 | ||
138 | ENDPROC(NG4_retl_o2_plus_o5_plus_24) | ||
139 | ENTRY(NG4_retl_o2_plus_o5_plus_32) | ||
140 | add %o5, 32, %o5 | ||
141 | ba,pt %xcc, __restore_asi | ||
142 | add %o2, %o5, %o0 | ||
143 | ENDPROC(NG4_retl_o2_plus_o5_plus_32) | ||
144 | ENTRY(NG4_retl_o2_plus_g1) | ||
145 | ba,pt %xcc, __restore_asi | ||
146 | add %o2, %g1, %o0 | ||
147 | ENDPROC(NG4_retl_o2_plus_g1) | ||
148 | ENTRY(NG4_retl_o2_plus_g1_plus_1) | ||
149 | add %g1, 1, %g1 | ||
150 | ba,pt %xcc, __restore_asi | ||
151 | add %o2, %g1, %o0 | ||
152 | ENDPROC(NG4_retl_o2_plus_g1_plus_1) | ||
153 | ENTRY(NG4_retl_o2_plus_g1_plus_8) | ||
154 | add %g1, 8, %g1 | ||
155 | ba,pt %xcc, __restore_asi | ||
156 | add %o2, %g1, %o0 | ||
157 | ENDPROC(NG4_retl_o2_plus_g1_plus_8) | ||
158 | ENTRY(NG4_retl_o2_plus_o4) | ||
159 | ba,pt %xcc, __restore_asi | ||
160 | add %o2, %o4, %o0 | ||
161 | ENDPROC(NG4_retl_o2_plus_o4) | ||
162 | ENTRY(NG4_retl_o2_plus_o4_plus_8) | ||
163 | add %o4, 8, %o4 | ||
164 | ba,pt %xcc, __restore_asi | ||
165 | add %o2, %o4, %o0 | ||
166 | ENDPROC(NG4_retl_o2_plus_o4_plus_8) | ||
167 | ENTRY(NG4_retl_o2_plus_o4_plus_16) | ||
168 | add %o4, 16, %o4 | ||
169 | ba,pt %xcc, __restore_asi | ||
170 | add %o2, %o4, %o0 | ||
171 | ENDPROC(NG4_retl_o2_plus_o4_plus_16) | ||
172 | ENTRY(NG4_retl_o2_plus_o4_plus_24) | ||
173 | add %o4, 24, %o4 | ||
174 | ba,pt %xcc, __restore_asi | ||
175 | add %o2, %o4, %o0 | ||
176 | ENDPROC(NG4_retl_o2_plus_o4_plus_24) | ||
177 | ENTRY(NG4_retl_o2_plus_o4_plus_32) | ||
178 | add %o4, 32, %o4 | ||
179 | ba,pt %xcc, __restore_asi | ||
180 | add %o2, %o4, %o0 | ||
181 | ENDPROC(NG4_retl_o2_plus_o4_plus_32) | ||
182 | ENTRY(NG4_retl_o2_plus_o4_plus_40) | ||
183 | add %o4, 40, %o4 | ||
184 | ba,pt %xcc, __restore_asi | ||
185 | add %o2, %o4, %o0 | ||
186 | ENDPROC(NG4_retl_o2_plus_o4_plus_40) | ||
187 | ENTRY(NG4_retl_o2_plus_o4_plus_48) | ||
188 | add %o4, 48, %o4 | ||
189 | ba,pt %xcc, __restore_asi | ||
190 | add %o2, %o4, %o0 | ||
191 | ENDPROC(NG4_retl_o2_plus_o4_plus_48) | ||
192 | ENTRY(NG4_retl_o2_plus_o4_plus_56) | ||
193 | add %o4, 56, %o4 | ||
194 | ba,pt %xcc, __restore_asi | ||
195 | add %o2, %o4, %o0 | ||
196 | ENDPROC(NG4_retl_o2_plus_o4_plus_56) | ||
197 | ENTRY(NG4_retl_o2_plus_o4_plus_64) | ||
198 | add %o4, 64, %o4 | ||
199 | ba,pt %xcc, __restore_asi | ||
200 | add %o2, %o4, %o0 | ||
201 | ENDPROC(NG4_retl_o2_plus_o4_plus_64) | ||
202 | ENTRY(NG4_retl_o2_plus_o4_fp) | ||
203 | ba,pt %xcc, __restore_asi_fp | ||
204 | add %o2, %o4, %o0 | ||
205 | ENDPROC(NG4_retl_o2_plus_o4_fp) | ||
206 | ENTRY(NG4_retl_o2_plus_o4_plus_8_fp) | ||
207 | add %o4, 8, %o4 | ||
208 | ba,pt %xcc, __restore_asi_fp | ||
209 | add %o2, %o4, %o0 | ||
210 | ENDPROC(NG4_retl_o2_plus_o4_plus_8_fp) | ||
211 | ENTRY(NG4_retl_o2_plus_o4_plus_16_fp) | ||
212 | add %o4, 16, %o4 | ||
213 | ba,pt %xcc, __restore_asi_fp | ||
214 | add %o2, %o4, %o0 | ||
215 | ENDPROC(NG4_retl_o2_plus_o4_plus_16_fp) | ||
216 | ENTRY(NG4_retl_o2_plus_o4_plus_24_fp) | ||
217 | add %o4, 24, %o4 | ||
218 | ba,pt %xcc, __restore_asi_fp | ||
219 | add %o2, %o4, %o0 | ||
220 | ENDPROC(NG4_retl_o2_plus_o4_plus_24_fp) | ||
221 | ENTRY(NG4_retl_o2_plus_o4_plus_32_fp) | ||
222 | add %o4, 32, %o4 | ||
223 | ba,pt %xcc, __restore_asi_fp | ||
224 | add %o2, %o4, %o0 | ||
225 | ENDPROC(NG4_retl_o2_plus_o4_plus_32_fp) | ||
226 | ENTRY(NG4_retl_o2_plus_o4_plus_40_fp) | ||
227 | add %o4, 40, %o4 | ||
228 | ba,pt %xcc, __restore_asi_fp | ||
229 | add %o2, %o4, %o0 | ||
230 | ENDPROC(NG4_retl_o2_plus_o4_plus_40_fp) | ||
231 | ENTRY(NG4_retl_o2_plus_o4_plus_48_fp) | ||
232 | add %o4, 48, %o4 | ||
233 | ba,pt %xcc, __restore_asi_fp | ||
234 | add %o2, %o4, %o0 | ||
235 | ENDPROC(NG4_retl_o2_plus_o4_plus_48_fp) | ||
236 | ENTRY(NG4_retl_o2_plus_o4_plus_56_fp) | ||
237 | add %o4, 56, %o4 | ||
238 | ba,pt %xcc, __restore_asi_fp | ||
239 | add %o2, %o4, %o0 | ||
240 | ENDPROC(NG4_retl_o2_plus_o4_plus_56_fp) | ||
241 | ENTRY(NG4_retl_o2_plus_o4_plus_64_fp) | ||
242 | add %o4, 64, %o4 | ||
243 | ba,pt %xcc, __restore_asi_fp | ||
244 | add %o2, %o4, %o0 | ||
245 | ENDPROC(NG4_retl_o2_plus_o4_plus_64_fp) | ||
246 | #endif | ||
97 | .align 64 | 247 | .align 64 |
98 | 248 | ||
99 | .globl FUNC_NAME | 249 | .globl FUNC_NAME |
@@ -124,12 +274,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
124 | brz,pt %g1, 51f | 274 | brz,pt %g1, 51f |
125 | sub %o2, %g1, %o2 | 275 | sub %o2, %g1, %o2 |
126 | 276 | ||
127 | 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) | 277 | |
278 | 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) | ||
128 | add %o1, 1, %o1 | 279 | add %o1, 1, %o1 |
129 | subcc %g1, 1, %g1 | 280 | subcc %g1, 1, %g1 |
130 | add %o0, 1, %o0 | 281 | add %o0, 1, %o0 |
131 | bne,pt %icc, 1b | 282 | bne,pt %icc, 1b |
132 | EX_ST(STORE(stb, %g2, %o0 - 0x01)) | 283 | EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1) |
133 | 284 | ||
134 | 51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong) | 285 | 51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong) |
135 | LOAD(prefetch, %o1 + 0x080, #n_reads_strong) | 286 | LOAD(prefetch, %o1 + 0x080, #n_reads_strong) |
@@ -154,43 +305,43 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
154 | brz,pt %g1, .Llarge_aligned | 305 | brz,pt %g1, .Llarge_aligned |
155 | sub %o2, %g1, %o2 | 306 | sub %o2, %g1, %o2 |
156 | 307 | ||
157 | 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2)) | 308 | 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) |
158 | add %o1, 8, %o1 | 309 | add %o1, 8, %o1 |
159 | subcc %g1, 8, %g1 | 310 | subcc %g1, 8, %g1 |
160 | add %o0, 8, %o0 | 311 | add %o0, 8, %o0 |
161 | bne,pt %icc, 1b | 312 | bne,pt %icc, 1b |
162 | EX_ST(STORE(stx, %g2, %o0 - 0x08)) | 313 | EX_ST(STORE(stx, %g2, %o0 - 0x08), NG4_retl_o2_plus_g1_plus_8) |
163 | 314 | ||
164 | .Llarge_aligned: | 315 | .Llarge_aligned: |
165 | /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */ | 316 | /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */ |
166 | andn %o2, 0x3f, %o4 | 317 | andn %o2, 0x3f, %o4 |
167 | sub %o2, %o4, %o2 | 318 | sub %o2, %o4, %o2 |
168 | 319 | ||
169 | 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) | 320 | 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o4) |
170 | add %o1, 0x40, %o1 | 321 | add %o1, 0x40, %o1 |
171 | EX_LD(LOAD(ldx, %o1 - 0x38, %g2)) | 322 | EX_LD(LOAD(ldx, %o1 - 0x38, %g2), NG4_retl_o2_plus_o4) |
172 | subcc %o4, 0x40, %o4 | 323 | subcc %o4, 0x40, %o4 |
173 | EX_LD(LOAD(ldx, %o1 - 0x30, %g3)) | 324 | EX_LD(LOAD(ldx, %o1 - 0x30, %g3), NG4_retl_o2_plus_o4_plus_64) |
174 | EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE)) | 325 | EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_64) |
175 | EX_LD(LOAD(ldx, %o1 - 0x20, %o5)) | 326 | EX_LD(LOAD(ldx, %o1 - 0x20, %o5), NG4_retl_o2_plus_o4_plus_64) |
176 | EX_ST(STORE_INIT(%g1, %o0)) | 327 | EX_ST(STORE_INIT(%g1, %o0), NG4_retl_o2_plus_o4_plus_64) |
177 | add %o0, 0x08, %o0 | 328 | add %o0, 0x08, %o0 |
178 | EX_ST(STORE_INIT(%g2, %o0)) | 329 | EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_56) |
179 | add %o0, 0x08, %o0 | 330 | add %o0, 0x08, %o0 |
180 | EX_LD(LOAD(ldx, %o1 - 0x18, %g2)) | 331 | EX_LD(LOAD(ldx, %o1 - 0x18, %g2), NG4_retl_o2_plus_o4_plus_48) |
181 | EX_ST(STORE_INIT(%g3, %o0)) | 332 | EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_48) |
182 | add %o0, 0x08, %o0 | 333 | add %o0, 0x08, %o0 |
183 | EX_LD(LOAD(ldx, %o1 - 0x10, %g3)) | 334 | EX_LD(LOAD(ldx, %o1 - 0x10, %g3), NG4_retl_o2_plus_o4_plus_40) |
184 | EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) | 335 | EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_40) |
185 | add %o0, 0x08, %o0 | 336 | add %o0, 0x08, %o0 |
186 | EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE)) | 337 | EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_32) |
187 | EX_ST(STORE_INIT(%o5, %o0)) | 338 | EX_ST(STORE_INIT(%o5, %o0), NG4_retl_o2_plus_o4_plus_32) |
188 | add %o0, 0x08, %o0 | 339 | add %o0, 0x08, %o0 |
189 | EX_ST(STORE_INIT(%g2, %o0)) | 340 | EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_24) |
190 | add %o0, 0x08, %o0 | 341 | add %o0, 0x08, %o0 |
191 | EX_ST(STORE_INIT(%g3, %o0)) | 342 | EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_16) |
192 | add %o0, 0x08, %o0 | 343 | add %o0, 0x08, %o0 |
193 | EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) | 344 | EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_8) |
194 | add %o0, 0x08, %o0 | 345 | add %o0, 0x08, %o0 |
195 | bne,pt %icc, 1b | 346 | bne,pt %icc, 1b |
196 | LOAD(prefetch, %o1 + 0x200, #n_reads_strong) | 347 | LOAD(prefetch, %o1 + 0x200, #n_reads_strong) |
@@ -216,17 +367,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
216 | sub %o2, %o4, %o2 | 367 | sub %o2, %o4, %o2 |
217 | alignaddr %o1, %g0, %g1 | 368 | alignaddr %o1, %g0, %g1 |
218 | add %o1, %o4, %o1 | 369 | add %o1, %o4, %o1 |
219 | EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0)) | 370 | EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), NG4_retl_o2_plus_o4) |
220 | 1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2)) | 371 | 1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), NG4_retl_o2_plus_o4) |
221 | subcc %o4, 0x40, %o4 | 372 | subcc %o4, 0x40, %o4 |
222 | EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4)) | 373 | EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), NG4_retl_o2_plus_o4_plus_64) |
223 | EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6)) | 374 | EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), NG4_retl_o2_plus_o4_plus_64) |
224 | EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8)) | 375 | EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), NG4_retl_o2_plus_o4_plus_64) |
225 | EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10)) | 376 | EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), NG4_retl_o2_plus_o4_plus_64) |
226 | EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12)) | 377 | EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), NG4_retl_o2_plus_o4_plus_64) |
227 | EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14)) | 378 | EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), NG4_retl_o2_plus_o4_plus_64) |
228 | faligndata %f0, %f2, %f16 | 379 | faligndata %f0, %f2, %f16 |
229 | EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0)) | 380 | EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), NG4_retl_o2_plus_o4_plus_64) |
230 | faligndata %f2, %f4, %f18 | 381 | faligndata %f2, %f4, %f18 |
231 | add %g1, 0x40, %g1 | 382 | add %g1, 0x40, %g1 |
232 | faligndata %f4, %f6, %f20 | 383 | faligndata %f4, %f6, %f20 |
@@ -235,14 +386,14 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
235 | faligndata %f10, %f12, %f26 | 386 | faligndata %f10, %f12, %f26 |
236 | faligndata %f12, %f14, %f28 | 387 | faligndata %f12, %f14, %f28 |
237 | faligndata %f14, %f0, %f30 | 388 | faligndata %f14, %f0, %f30 |
238 | EX_ST_FP(STORE(std, %f16, %o0 + 0x00)) | 389 | EX_ST_FP(STORE(std, %f16, %o0 + 0x00), NG4_retl_o2_plus_o4_plus_64) |
239 | EX_ST_FP(STORE(std, %f18, %o0 + 0x08)) | 390 | EX_ST_FP(STORE(std, %f18, %o0 + 0x08), NG4_retl_o2_plus_o4_plus_56) |
240 | EX_ST_FP(STORE(std, %f20, %o0 + 0x10)) | 391 | EX_ST_FP(STORE(std, %f20, %o0 + 0x10), NG4_retl_o2_plus_o4_plus_48) |
241 | EX_ST_FP(STORE(std, %f22, %o0 + 0x18)) | 392 | EX_ST_FP(STORE(std, %f22, %o0 + 0x18), NG4_retl_o2_plus_o4_plus_40) |
242 | EX_ST_FP(STORE(std, %f24, %o0 + 0x20)) | 393 | EX_ST_FP(STORE(std, %f24, %o0 + 0x20), NG4_retl_o2_plus_o4_plus_32) |
243 | EX_ST_FP(STORE(std, %f26, %o0 + 0x28)) | 394 | EX_ST_FP(STORE(std, %f26, %o0 + 0x28), NG4_retl_o2_plus_o4_plus_24) |
244 | EX_ST_FP(STORE(std, %f28, %o0 + 0x30)) | 395 | EX_ST_FP(STORE(std, %f28, %o0 + 0x30), NG4_retl_o2_plus_o4_plus_16) |
245 | EX_ST_FP(STORE(std, %f30, %o0 + 0x38)) | 396 | EX_ST_FP(STORE(std, %f30, %o0 + 0x38), NG4_retl_o2_plus_o4_plus_8) |
246 | add %o0, 0x40, %o0 | 397 | add %o0, 0x40, %o0 |
247 | bne,pt %icc, 1b | 398 | bne,pt %icc, 1b |
248 | LOAD(prefetch, %g1 + 0x200, #n_reads_strong) | 399 | LOAD(prefetch, %g1 + 0x200, #n_reads_strong) |
@@ -270,37 +421,38 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
270 | andncc %o2, 0x20 - 1, %o5 | 421 | andncc %o2, 0x20 - 1, %o5 |
271 | be,pn %icc, 2f | 422 | be,pn %icc, 2f |
272 | sub %o2, %o5, %o2 | 423 | sub %o2, %o5, %o2 |
273 | 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) | 424 | 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) |
274 | EX_LD(LOAD(ldx, %o1 + 0x08, %g2)) | 425 | EX_LD(LOAD(ldx, %o1 + 0x08, %g2), NG4_retl_o2_plus_o5) |
275 | EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE)) | 426 | EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), NG4_retl_o2_plus_o5) |
276 | EX_LD(LOAD(ldx, %o1 + 0x18, %o4)) | 427 | EX_LD(LOAD(ldx, %o1 + 0x18, %o4), NG4_retl_o2_plus_o5) |
277 | add %o1, 0x20, %o1 | 428 | add %o1, 0x20, %o1 |
278 | subcc %o5, 0x20, %o5 | 429 | subcc %o5, 0x20, %o5 |
279 | EX_ST(STORE(stx, %g1, %o0 + 0x00)) | 430 | EX_ST(STORE(stx, %g1, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_32) |
280 | EX_ST(STORE(stx, %g2, %o0 + 0x08)) | 431 | EX_ST(STORE(stx, %g2, %o0 + 0x08), NG4_retl_o2_plus_o5_plus_24) |
281 | EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10)) | 432 | EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), NG4_retl_o2_plus_o5_plus_24) |
282 | EX_ST(STORE(stx, %o4, %o0 + 0x18)) | 433 | EX_ST(STORE(stx, %o4, %o0 + 0x18), NG4_retl_o2_plus_o5_plus_8) |
283 | bne,pt %icc, 1b | 434 | bne,pt %icc, 1b |
284 | add %o0, 0x20, %o0 | 435 | add %o0, 0x20, %o0 |
285 | 2: andcc %o2, 0x18, %o5 | 436 | 2: andcc %o2, 0x18, %o5 |
286 | be,pt %icc, 3f | 437 | be,pt %icc, 3f |
287 | sub %o2, %o5, %o2 | 438 | sub %o2, %o5, %o2 |
288 | 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) | 439 | |
440 | 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) | ||
289 | add %o1, 0x08, %o1 | 441 | add %o1, 0x08, %o1 |
290 | add %o0, 0x08, %o0 | 442 | add %o0, 0x08, %o0 |
291 | subcc %o5, 0x08, %o5 | 443 | subcc %o5, 0x08, %o5 |
292 | bne,pt %icc, 1b | 444 | bne,pt %icc, 1b |
293 | EX_ST(STORE(stx, %g1, %o0 - 0x08)) | 445 | EX_ST(STORE(stx, %g1, %o0 - 0x08), NG4_retl_o2_plus_o5_plus_8) |
294 | 3: brz,pt %o2, .Lexit | 446 | 3: brz,pt %o2, .Lexit |
295 | cmp %o2, 0x04 | 447 | cmp %o2, 0x04 |
296 | bl,pn %icc, .Ltiny | 448 | bl,pn %icc, .Ltiny |
297 | nop | 449 | nop |
298 | EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) | 450 | EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2) |
299 | add %o1, 0x04, %o1 | 451 | add %o1, 0x04, %o1 |
300 | add %o0, 0x04, %o0 | 452 | add %o0, 0x04, %o0 |
301 | subcc %o2, 0x04, %o2 | 453 | subcc %o2, 0x04, %o2 |
302 | bne,pn %icc, .Ltiny | 454 | bne,pn %icc, .Ltiny |
303 | EX_ST(STORE(stw, %g1, %o0 - 0x04)) | 455 | EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_4) |
304 | ba,a,pt %icc, .Lexit | 456 | ba,a,pt %icc, .Lexit |
305 | .Lmedium_unaligned: | 457 | .Lmedium_unaligned: |
306 | /* First get dest 8 byte aligned. */ | 458 | /* First get dest 8 byte aligned. */ |
@@ -309,12 +461,12 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
309 | brz,pt %g1, 2f | 461 | brz,pt %g1, 2f |
310 | sub %o2, %g1, %o2 | 462 | sub %o2, %g1, %o2 |
311 | 463 | ||
312 | 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) | 464 | 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) |
313 | add %o1, 1, %o1 | 465 | add %o1, 1, %o1 |
314 | subcc %g1, 1, %g1 | 466 | subcc %g1, 1, %g1 |
315 | add %o0, 1, %o0 | 467 | add %o0, 1, %o0 |
316 | bne,pt %icc, 1b | 468 | bne,pt %icc, 1b |
317 | EX_ST(STORE(stb, %g2, %o0 - 0x01)) | 469 | EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1) |
318 | 2: | 470 | 2: |
319 | and %o1, 0x7, %g1 | 471 | and %o1, 0x7, %g1 |
320 | brz,pn %g1, .Lmedium_noprefetch | 472 | brz,pn %g1, .Lmedium_noprefetch |
@@ -322,16 +474,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
322 | mov 64, %g2 | 474 | mov 64, %g2 |
323 | sub %g2, %g1, %g2 | 475 | sub %g2, %g1, %g2 |
324 | andn %o1, 0x7, %o1 | 476 | andn %o1, 0x7, %o1 |
325 | EX_LD(LOAD(ldx, %o1 + 0x00, %o4)) | 477 | EX_LD(LOAD(ldx, %o1 + 0x00, %o4), NG4_retl_o2) |
326 | sllx %o4, %g1, %o4 | 478 | sllx %o4, %g1, %o4 |
327 | andn %o2, 0x08 - 1, %o5 | 479 | andn %o2, 0x08 - 1, %o5 |
328 | sub %o2, %o5, %o2 | 480 | sub %o2, %o5, %o2 |
329 | 1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3)) | 481 | 1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3), NG4_retl_o2_plus_o5) |
330 | add %o1, 0x08, %o1 | 482 | add %o1, 0x08, %o1 |
331 | subcc %o5, 0x08, %o5 | 483 | subcc %o5, 0x08, %o5 |
332 | srlx %g3, %g2, GLOBAL_SPARE | 484 | srlx %g3, %g2, GLOBAL_SPARE |
333 | or GLOBAL_SPARE, %o4, GLOBAL_SPARE | 485 | or GLOBAL_SPARE, %o4, GLOBAL_SPARE |
334 | EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00)) | 486 | EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_8) |
335 | add %o0, 0x08, %o0 | 487 | add %o0, 0x08, %o0 |
336 | bne,pt %icc, 1b | 488 | bne,pt %icc, 1b |
337 | sllx %g3, %g1, %o4 | 489 | sllx %g3, %g1, %o4 |
@@ -342,17 +494,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
342 | ba,pt %icc, .Lsmall_unaligned | 494 | ba,pt %icc, .Lsmall_unaligned |
343 | 495 | ||
344 | .Ltiny: | 496 | .Ltiny: |
345 | EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) | 497 | EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2) |
346 | subcc %o2, 1, %o2 | 498 | subcc %o2, 1, %o2 |
347 | be,pn %icc, .Lexit | 499 | be,pn %icc, .Lexit |
348 | EX_ST(STORE(stb, %g1, %o0 + 0x00)) | 500 | EX_ST(STORE(stb, %g1, %o0 + 0x00), NG4_retl_o2_plus_1) |
349 | EX_LD(LOAD(ldub, %o1 + 0x01, %g1)) | 501 | EX_LD(LOAD(ldub, %o1 + 0x01, %g1), NG4_retl_o2) |
350 | subcc %o2, 1, %o2 | 502 | subcc %o2, 1, %o2 |
351 | be,pn %icc, .Lexit | 503 | be,pn %icc, .Lexit |
352 | EX_ST(STORE(stb, %g1, %o0 + 0x01)) | 504 | EX_ST(STORE(stb, %g1, %o0 + 0x01), NG4_retl_o2_plus_1) |
353 | EX_LD(LOAD(ldub, %o1 + 0x02, %g1)) | 505 | EX_LD(LOAD(ldub, %o1 + 0x02, %g1), NG4_retl_o2) |
354 | ba,pt %icc, .Lexit | 506 | ba,pt %icc, .Lexit |
355 | EX_ST(STORE(stb, %g1, %o0 + 0x02)) | 507 | EX_ST(STORE(stb, %g1, %o0 + 0x02), NG4_retl_o2) |
356 | 508 | ||
357 | .Lsmall: | 509 | .Lsmall: |
358 | andcc %g2, 0x3, %g0 | 510 | andcc %g2, 0x3, %g0 |
@@ -360,22 +512,22 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
360 | andn %o2, 0x4 - 1, %o5 | 512 | andn %o2, 0x4 - 1, %o5 |
361 | sub %o2, %o5, %o2 | 513 | sub %o2, %o5, %o2 |
362 | 1: | 514 | 1: |
363 | EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) | 515 | EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) |
364 | add %o1, 0x04, %o1 | 516 | add %o1, 0x04, %o1 |
365 | subcc %o5, 0x04, %o5 | 517 | subcc %o5, 0x04, %o5 |
366 | add %o0, 0x04, %o0 | 518 | add %o0, 0x04, %o0 |
367 | bne,pt %icc, 1b | 519 | bne,pt %icc, 1b |
368 | EX_ST(STORE(stw, %g1, %o0 - 0x04)) | 520 | EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_o5_plus_4) |
369 | brz,pt %o2, .Lexit | 521 | brz,pt %o2, .Lexit |
370 | nop | 522 | nop |
371 | ba,a,pt %icc, .Ltiny | 523 | ba,a,pt %icc, .Ltiny |
372 | 524 | ||
373 | .Lsmall_unaligned: | 525 | .Lsmall_unaligned: |
374 | 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) | 526 | 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2) |
375 | add %o1, 1, %o1 | 527 | add %o1, 1, %o1 |
376 | add %o0, 1, %o0 | 528 | add %o0, 1, %o0 |
377 | subcc %o2, 1, %o2 | 529 | subcc %o2, 1, %o2 |
378 | bne,pt %icc, 1b | 530 | bne,pt %icc, 1b |
379 | EX_ST(STORE(stb, %g1, %o0 - 0x01)) | 531 | EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1) |
380 | ba,a,pt %icc, .Lexit | 532 | ba,a,pt %icc, .Lexit |
381 | .size FUNC_NAME, .-FUNC_NAME | 533 | .size FUNC_NAME, .-FUNC_NAME |