diff options
author | David S. Miller <davem@davemloft.net> | 2005-06-27 18:42:04 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2005-06-27 18:42:04 -0400 |
commit | b445e26cbf784cdba10f2b6c3e2cd3ee7bab360a (patch) | |
tree | 8c8c377a7e5cbf608d730ec45e091e4f2b826a82 /arch/sparc64/lib | |
parent | 020f46a39eb7b99a575b9f4d105fce2b142acdf1 (diff) |
[SPARC64]: Avoid membar instructions in delay slots.
In particular, avoid membar instructions in the delay
slot of a jmpl instruction.
UltraSPARC-I, II, IIi, and IIe have a bug, documented in
the UltraSPARC-IIi User's Manual, Appendix K, Erratum 51
The long and short of it is that if the IMU unit misses
on a branch or jmpl, and there is a store buffer synchronizing
membar in the delay slot, the chip can stop fetching instructions.
If interrupts are enabled or some other trap is enabled, the
chip will unwedge itself, but performance will suffer.
We already had a workaround for this bug in a few spots, but
it's better to have the entire tree sanitized for this rule.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc64/lib')
-rw-r--r-- | arch/sparc64/lib/U1memcpy.S | 103 | ||||
-rw-r--r-- | arch/sparc64/lib/VISsave.S | 15 | ||||
-rw-r--r-- | arch/sparc64/lib/atomic.S | 42 | ||||
-rw-r--r-- | arch/sparc64/lib/bitops.S | 31 | ||||
-rw-r--r-- | arch/sparc64/lib/debuglocks.c | 6 | ||||
-rw-r--r-- | arch/sparc64/lib/dec_and_lock.S | 6 | ||||
-rw-r--r-- | arch/sparc64/lib/rwsem.S | 15 |
7 files changed, 130 insertions, 88 deletions
diff --git a/arch/sparc64/lib/U1memcpy.S b/arch/sparc64/lib/U1memcpy.S index da9b520c7189..bafd2fc07acb 100644 --- a/arch/sparc64/lib/U1memcpy.S +++ b/arch/sparc64/lib/U1memcpy.S | |||
@@ -87,14 +87,17 @@ | |||
87 | #define LOOP_CHUNK3(src, dest, len, branch_dest) \ | 87 | #define LOOP_CHUNK3(src, dest, len, branch_dest) \ |
88 | MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) | 88 | MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) |
89 | 89 | ||
90 | #define DO_SYNC membar #Sync; | ||
90 | #define STORE_SYNC(dest, fsrc) \ | 91 | #define STORE_SYNC(dest, fsrc) \ |
91 | EX_ST(STORE_BLK(%fsrc, %dest)); \ | 92 | EX_ST(STORE_BLK(%fsrc, %dest)); \ |
92 | add %dest, 0x40, %dest; | 93 | add %dest, 0x40, %dest; \ |
94 | DO_SYNC | ||
93 | 95 | ||
94 | #define STORE_JUMP(dest, fsrc, target) \ | 96 | #define STORE_JUMP(dest, fsrc, target) \ |
95 | EX_ST(STORE_BLK(%fsrc, %dest)); \ | 97 | EX_ST(STORE_BLK(%fsrc, %dest)); \ |
96 | add %dest, 0x40, %dest; \ | 98 | add %dest, 0x40, %dest; \ |
97 | ba,pt %xcc, target; | 99 | ba,pt %xcc, target; \ |
100 | nop; | ||
98 | 101 | ||
99 | #define FINISH_VISCHUNK(dest, f0, f1, left) \ | 102 | #define FINISH_VISCHUNK(dest, f0, f1, left) \ |
100 | subcc %left, 8, %left;\ | 103 | subcc %left, 8, %left;\ |
@@ -239,17 +242,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
239 | ba,pt %xcc, 1b+4 | 242 | ba,pt %xcc, 1b+4 |
240 | faligndata %f0, %f2, %f48 | 243 | faligndata %f0, %f2, %f48 |
241 | 1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) | 244 | 1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) |
242 | STORE_SYNC(o0, f48) membar #Sync | 245 | STORE_SYNC(o0, f48) |
243 | FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) | 246 | FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) |
244 | STORE_JUMP(o0, f48, 40f) membar #Sync | 247 | STORE_JUMP(o0, f48, 40f) |
245 | 2: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) | 248 | 2: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) |
246 | STORE_SYNC(o0, f48) membar #Sync | 249 | STORE_SYNC(o0, f48) |
247 | FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) | 250 | FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) |
248 | STORE_JUMP(o0, f48, 48f) membar #Sync | 251 | STORE_JUMP(o0, f48, 48f) |
249 | 3: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) | 252 | 3: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) |
250 | STORE_SYNC(o0, f48) membar #Sync | 253 | STORE_SYNC(o0, f48) |
251 | FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) | 254 | FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) |
252 | STORE_JUMP(o0, f48, 56f) membar #Sync | 255 | STORE_JUMP(o0, f48, 56f) |
253 | 256 | ||
254 | 1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) | 257 | 1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) |
255 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | 258 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) |
@@ -260,17 +263,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
260 | ba,pt %xcc, 1b+4 | 263 | ba,pt %xcc, 1b+4 |
261 | faligndata %f2, %f4, %f48 | 264 | faligndata %f2, %f4, %f48 |
262 | 1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) | 265 | 1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) |
263 | STORE_SYNC(o0, f48) membar #Sync | 266 | STORE_SYNC(o0, f48) |
264 | FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) | 267 | FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) |
265 | STORE_JUMP(o0, f48, 41f) membar #Sync | 268 | STORE_JUMP(o0, f48, 41f) |
266 | 2: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) | 269 | 2: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) |
267 | STORE_SYNC(o0, f48) membar #Sync | 270 | STORE_SYNC(o0, f48) |
268 | FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) | 271 | FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) |
269 | STORE_JUMP(o0, f48, 49f) membar #Sync | 272 | STORE_JUMP(o0, f48, 49f) |
270 | 3: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) | 273 | 3: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) |
271 | STORE_SYNC(o0, f48) membar #Sync | 274 | STORE_SYNC(o0, f48) |
272 | FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) | 275 | FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) |
273 | STORE_JUMP(o0, f48, 57f) membar #Sync | 276 | STORE_JUMP(o0, f48, 57f) |
274 | 277 | ||
275 | 1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) | 278 | 1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) |
276 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | 279 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) |
@@ -281,17 +284,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
281 | ba,pt %xcc, 1b+4 | 284 | ba,pt %xcc, 1b+4 |
282 | faligndata %f4, %f6, %f48 | 285 | faligndata %f4, %f6, %f48 |
283 | 1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) | 286 | 1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) |
284 | STORE_SYNC(o0, f48) membar #Sync | 287 | STORE_SYNC(o0, f48) |
285 | FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) | 288 | FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) |
286 | STORE_JUMP(o0, f48, 42f) membar #Sync | 289 | STORE_JUMP(o0, f48, 42f) |
287 | 2: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) | 290 | 2: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) |
288 | STORE_SYNC(o0, f48) membar #Sync | 291 | STORE_SYNC(o0, f48) |
289 | FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) | 292 | FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) |
290 | STORE_JUMP(o0, f48, 50f) membar #Sync | 293 | STORE_JUMP(o0, f48, 50f) |
291 | 3: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) | 294 | 3: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) |
292 | STORE_SYNC(o0, f48) membar #Sync | 295 | STORE_SYNC(o0, f48) |
293 | FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) | 296 | FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) |
294 | STORE_JUMP(o0, f48, 58f) membar #Sync | 297 | STORE_JUMP(o0, f48, 58f) |
295 | 298 | ||
296 | 1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) | 299 | 1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) |
297 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | 300 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) |
@@ -302,17 +305,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
302 | ba,pt %xcc, 1b+4 | 305 | ba,pt %xcc, 1b+4 |
303 | faligndata %f6, %f8, %f48 | 306 | faligndata %f6, %f8, %f48 |
304 | 1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) | 307 | 1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) |
305 | STORE_SYNC(o0, f48) membar #Sync | 308 | STORE_SYNC(o0, f48) |
306 | FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) | 309 | FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) |
307 | STORE_JUMP(o0, f48, 43f) membar #Sync | 310 | STORE_JUMP(o0, f48, 43f) |
308 | 2: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) | 311 | 2: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) |
309 | STORE_SYNC(o0, f48) membar #Sync | 312 | STORE_SYNC(o0, f48) |
310 | FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) | 313 | FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) |
311 | STORE_JUMP(o0, f48, 51f) membar #Sync | 314 | STORE_JUMP(o0, f48, 51f) |
312 | 3: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) | 315 | 3: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) |
313 | STORE_SYNC(o0, f48) membar #Sync | 316 | STORE_SYNC(o0, f48) |
314 | FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) | 317 | FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) |
315 | STORE_JUMP(o0, f48, 59f) membar #Sync | 318 | STORE_JUMP(o0, f48, 59f) |
316 | 319 | ||
317 | 1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) | 320 | 1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) |
318 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | 321 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) |
@@ -323,17 +326,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
323 | ba,pt %xcc, 1b+4 | 326 | ba,pt %xcc, 1b+4 |
324 | faligndata %f8, %f10, %f48 | 327 | faligndata %f8, %f10, %f48 |
325 | 1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) | 328 | 1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) |
326 | STORE_SYNC(o0, f48) membar #Sync | 329 | STORE_SYNC(o0, f48) |
327 | FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) | 330 | FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) |
328 | STORE_JUMP(o0, f48, 44f) membar #Sync | 331 | STORE_JUMP(o0, f48, 44f) |
329 | 2: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) | 332 | 2: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) |
330 | STORE_SYNC(o0, f48) membar #Sync | 333 | STORE_SYNC(o0, f48) |
331 | FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) | 334 | FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) |
332 | STORE_JUMP(o0, f48, 52f) membar #Sync | 335 | STORE_JUMP(o0, f48, 52f) |
333 | 3: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) | 336 | 3: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) |
334 | STORE_SYNC(o0, f48) membar #Sync | 337 | STORE_SYNC(o0, f48) |
335 | FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) | 338 | FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) |
336 | STORE_JUMP(o0, f48, 60f) membar #Sync | 339 | STORE_JUMP(o0, f48, 60f) |
337 | 340 | ||
338 | 1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) | 341 | 1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) |
339 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | 342 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) |
@@ -344,17 +347,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
344 | ba,pt %xcc, 1b+4 | 347 | ba,pt %xcc, 1b+4 |
345 | faligndata %f10, %f12, %f48 | 348 | faligndata %f10, %f12, %f48 |
346 | 1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) | 349 | 1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) |
347 | STORE_SYNC(o0, f48) membar #Sync | 350 | STORE_SYNC(o0, f48) |
348 | FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) | 351 | FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) |
349 | STORE_JUMP(o0, f48, 45f) membar #Sync | 352 | STORE_JUMP(o0, f48, 45f) |
350 | 2: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) | 353 | 2: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) |
351 | STORE_SYNC(o0, f48) membar #Sync | 354 | STORE_SYNC(o0, f48) |
352 | FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) | 355 | FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) |
353 | STORE_JUMP(o0, f48, 53f) membar #Sync | 356 | STORE_JUMP(o0, f48, 53f) |
354 | 3: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) | 357 | 3: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) |
355 | STORE_SYNC(o0, f48) membar #Sync | 358 | STORE_SYNC(o0, f48) |
356 | FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) | 359 | FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) |
357 | STORE_JUMP(o0, f48, 61f) membar #Sync | 360 | STORE_JUMP(o0, f48, 61f) |
358 | 361 | ||
359 | 1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) | 362 | 1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) |
360 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | 363 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) |
@@ -365,17 +368,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
365 | ba,pt %xcc, 1b+4 | 368 | ba,pt %xcc, 1b+4 |
366 | faligndata %f12, %f14, %f48 | 369 | faligndata %f12, %f14, %f48 |
367 | 1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) | 370 | 1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) |
368 | STORE_SYNC(o0, f48) membar #Sync | 371 | STORE_SYNC(o0, f48) |
369 | FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) | 372 | FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) |
370 | STORE_JUMP(o0, f48, 46f) membar #Sync | 373 | STORE_JUMP(o0, f48, 46f) |
371 | 2: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) | 374 | 2: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) |
372 | STORE_SYNC(o0, f48) membar #Sync | 375 | STORE_SYNC(o0, f48) |
373 | FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) | 376 | FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) |
374 | STORE_JUMP(o0, f48, 54f) membar #Sync | 377 | STORE_JUMP(o0, f48, 54f) |
375 | 3: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) | 378 | 3: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) |
376 | STORE_SYNC(o0, f48) membar #Sync | 379 | STORE_SYNC(o0, f48) |
377 | FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) | 380 | FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) |
378 | STORE_JUMP(o0, f48, 62f) membar #Sync | 381 | STORE_JUMP(o0, f48, 62f) |
379 | 382 | ||
380 | 1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) | 383 | 1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) |
381 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | 384 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) |
@@ -386,17 +389,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
386 | ba,pt %xcc, 1b+4 | 389 | ba,pt %xcc, 1b+4 |
387 | faligndata %f14, %f16, %f48 | 390 | faligndata %f14, %f16, %f48 |
388 | 1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) | 391 | 1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) |
389 | STORE_SYNC(o0, f48) membar #Sync | 392 | STORE_SYNC(o0, f48) |
390 | FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) | 393 | FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) |
391 | STORE_JUMP(o0, f48, 47f) membar #Sync | 394 | STORE_JUMP(o0, f48, 47f) |
392 | 2: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) | 395 | 2: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) |
393 | STORE_SYNC(o0, f48) membar #Sync | 396 | STORE_SYNC(o0, f48) |
394 | FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) | 397 | FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) |
395 | STORE_JUMP(o0, f48, 55f) membar #Sync | 398 | STORE_JUMP(o0, f48, 55f) |
396 | 3: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) | 399 | 3: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) |
397 | STORE_SYNC(o0, f48) membar #Sync | 400 | STORE_SYNC(o0, f48) |
398 | FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) | 401 | FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) |
399 | STORE_JUMP(o0, f48, 63f) membar #Sync | 402 | STORE_JUMP(o0, f48, 63f) |
400 | 403 | ||
401 | 40: FINISH_VISCHUNK(o0, f0, f2, g3) | 404 | 40: FINISH_VISCHUNK(o0, f0, f2, g3) |
402 | 41: FINISH_VISCHUNK(o0, f2, f4, g3) | 405 | 41: FINISH_VISCHUNK(o0, f2, f4, g3) |
diff --git a/arch/sparc64/lib/VISsave.S b/arch/sparc64/lib/VISsave.S index 65e328d600a8..4e18989bd602 100644 --- a/arch/sparc64/lib/VISsave.S +++ b/arch/sparc64/lib/VISsave.S | |||
@@ -72,7 +72,11 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3 | |||
72 | 72 | ||
73 | stda %f48, [%g3 + %g1] ASI_BLK_P | 73 | stda %f48, [%g3 + %g1] ASI_BLK_P |
74 | 5: membar #Sync | 74 | 5: membar #Sync |
75 | jmpl %g7 + %g0, %g0 | 75 | ba,pt %xcc, 80f |
76 | nop | ||
77 | |||
78 | .align 32 | ||
79 | 80: jmpl %g7 + %g0, %g0 | ||
76 | nop | 80 | nop |
77 | 81 | ||
78 | 6: ldub [%g3 + TI_FPSAVED], %o5 | 82 | 6: ldub [%g3 + TI_FPSAVED], %o5 |
@@ -87,8 +91,11 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3 | |||
87 | stda %f32, [%g2 + %g1] ASI_BLK_P | 91 | stda %f32, [%g2 + %g1] ASI_BLK_P |
88 | stda %f48, [%g3 + %g1] ASI_BLK_P | 92 | stda %f48, [%g3 + %g1] ASI_BLK_P |
89 | membar #Sync | 93 | membar #Sync |
90 | jmpl %g7 + %g0, %g0 | 94 | ba,pt %xcc, 80f |
95 | nop | ||
91 | 96 | ||
97 | .align 32 | ||
98 | 80: jmpl %g7 + %g0, %g0 | ||
92 | nop | 99 | nop |
93 | 100 | ||
94 | .align 32 | 101 | .align 32 |
@@ -126,6 +133,10 @@ VISenterhalf: | |||
126 | stda %f0, [%g2 + %g1] ASI_BLK_P | 133 | stda %f0, [%g2 + %g1] ASI_BLK_P |
127 | stda %f16, [%g3 + %g1] ASI_BLK_P | 134 | stda %f16, [%g3 + %g1] ASI_BLK_P |
128 | membar #Sync | 135 | membar #Sync |
136 | ba,pt %xcc, 4f | ||
137 | nop | ||
138 | |||
139 | .align 32 | ||
129 | 4: and %o5, FPRS_DU, %o5 | 140 | 4: and %o5, FPRS_DU, %o5 |
130 | jmpl %g7 + %g0, %g0 | 141 | jmpl %g7 + %g0, %g0 |
131 | wr %o5, FPRS_FEF, %fprs | 142 | wr %o5, FPRS_FEF, %fprs |
diff --git a/arch/sparc64/lib/atomic.S b/arch/sparc64/lib/atomic.S index e528b8d1a3e6..faf87c31598b 100644 --- a/arch/sparc64/lib/atomic.S +++ b/arch/sparc64/lib/atomic.S | |||
@@ -7,18 +7,6 @@ | |||
7 | #include <linux/config.h> | 7 | #include <linux/config.h> |
8 | #include <asm/asi.h> | 8 | #include <asm/asi.h> |
9 | 9 | ||
10 | /* On SMP we need to use memory barriers to ensure | ||
11 | * correct memory operation ordering, nop these out | ||
12 | * for uniprocessor. | ||
13 | */ | ||
14 | #ifdef CONFIG_SMP | ||
15 | #define ATOMIC_PRE_BARRIER membar #StoreLoad | #LoadLoad | ||
16 | #define ATOMIC_POST_BARRIER membar #StoreLoad | #StoreStore | ||
17 | #else | ||
18 | #define ATOMIC_PRE_BARRIER nop | ||
19 | #define ATOMIC_POST_BARRIER nop | ||
20 | #endif | ||
21 | |||
22 | .text | 10 | .text |
23 | 11 | ||
24 | /* Two versions of the atomic routines, one that | 12 | /* Two versions of the atomic routines, one that |
@@ -52,6 +40,24 @@ atomic_sub: /* %o0 = decrement, %o1 = atomic_ptr */ | |||
52 | nop | 40 | nop |
53 | .size atomic_sub, .-atomic_sub | 41 | .size atomic_sub, .-atomic_sub |
54 | 42 | ||
43 | /* On SMP we need to use memory barriers to ensure | ||
44 | * correct memory operation ordering, nop these out | ||
45 | * for uniprocessor. | ||
46 | */ | ||
47 | #ifdef CONFIG_SMP | ||
48 | |||
49 | #define ATOMIC_PRE_BARRIER membar #StoreLoad | #LoadLoad; | ||
50 | #define ATOMIC_POST_BARRIER \ | ||
51 | ba,pt %xcc, 80b; \ | ||
52 | membar #StoreLoad | #StoreStore | ||
53 | |||
54 | 80: retl | ||
55 | nop | ||
56 | #else | ||
57 | #define ATOMIC_PRE_BARRIER | ||
58 | #define ATOMIC_POST_BARRIER | ||
59 | #endif | ||
60 | |||
55 | .globl atomic_add_ret | 61 | .globl atomic_add_ret |
56 | .type atomic_add_ret,#function | 62 | .type atomic_add_ret,#function |
57 | atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ | 63 | atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ |
@@ -62,9 +68,10 @@ atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ | |||
62 | cmp %g1, %g7 | 68 | cmp %g1, %g7 |
63 | bne,pn %icc, 1b | 69 | bne,pn %icc, 1b |
64 | add %g7, %o0, %g7 | 70 | add %g7, %o0, %g7 |
71 | sra %g7, 0, %o0 | ||
65 | ATOMIC_POST_BARRIER | 72 | ATOMIC_POST_BARRIER |
66 | retl | 73 | retl |
67 | sra %g7, 0, %o0 | 74 | nop |
68 | .size atomic_add_ret, .-atomic_add_ret | 75 | .size atomic_add_ret, .-atomic_add_ret |
69 | 76 | ||
70 | .globl atomic_sub_ret | 77 | .globl atomic_sub_ret |
@@ -77,9 +84,10 @@ atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */ | |||
77 | cmp %g1, %g7 | 84 | cmp %g1, %g7 |
78 | bne,pn %icc, 1b | 85 | bne,pn %icc, 1b |
79 | sub %g7, %o0, %g7 | 86 | sub %g7, %o0, %g7 |
87 | sra %g7, 0, %o0 | ||
80 | ATOMIC_POST_BARRIER | 88 | ATOMIC_POST_BARRIER |
81 | retl | 89 | retl |
82 | sra %g7, 0, %o0 | 90 | nop |
83 | .size atomic_sub_ret, .-atomic_sub_ret | 91 | .size atomic_sub_ret, .-atomic_sub_ret |
84 | 92 | ||
85 | .globl atomic64_add | 93 | .globl atomic64_add |
@@ -118,9 +126,10 @@ atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ | |||
118 | cmp %g1, %g7 | 126 | cmp %g1, %g7 |
119 | bne,pn %xcc, 1b | 127 | bne,pn %xcc, 1b |
120 | add %g7, %o0, %g7 | 128 | add %g7, %o0, %g7 |
129 | mov %g7, %o0 | ||
121 | ATOMIC_POST_BARRIER | 130 | ATOMIC_POST_BARRIER |
122 | retl | 131 | retl |
123 | mov %g7, %o0 | 132 | nop |
124 | .size atomic64_add_ret, .-atomic64_add_ret | 133 | .size atomic64_add_ret, .-atomic64_add_ret |
125 | 134 | ||
126 | .globl atomic64_sub_ret | 135 | .globl atomic64_sub_ret |
@@ -133,7 +142,8 @@ atomic64_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */ | |||
133 | cmp %g1, %g7 | 142 | cmp %g1, %g7 |
134 | bne,pn %xcc, 1b | 143 | bne,pn %xcc, 1b |
135 | sub %g7, %o0, %g7 | 144 | sub %g7, %o0, %g7 |
145 | mov %g7, %o0 | ||
136 | ATOMIC_POST_BARRIER | 146 | ATOMIC_POST_BARRIER |
137 | retl | 147 | retl |
138 | mov %g7, %o0 | 148 | nop |
139 | .size atomic64_sub_ret, .-atomic64_sub_ret | 149 | .size atomic64_sub_ret, .-atomic64_sub_ret |
diff --git a/arch/sparc64/lib/bitops.S b/arch/sparc64/lib/bitops.S index 886dcd2b376a..31afbfe6c1e8 100644 --- a/arch/sparc64/lib/bitops.S +++ b/arch/sparc64/lib/bitops.S | |||
@@ -7,20 +7,26 @@ | |||
7 | #include <linux/config.h> | 7 | #include <linux/config.h> |
8 | #include <asm/asi.h> | 8 | #include <asm/asi.h> |
9 | 9 | ||
10 | .text | ||
11 | |||
10 | /* On SMP we need to use memory barriers to ensure | 12 | /* On SMP we need to use memory barriers to ensure |
11 | * correct memory operation ordering, nop these out | 13 | * correct memory operation ordering, nop these out |
12 | * for uniprocessor. | 14 | * for uniprocessor. |
13 | */ | 15 | */ |
16 | |||
14 | #ifdef CONFIG_SMP | 17 | #ifdef CONFIG_SMP |
15 | #define BITOP_PRE_BARRIER membar #StoreLoad | #LoadLoad | 18 | #define BITOP_PRE_BARRIER membar #StoreLoad | #LoadLoad |
16 | #define BITOP_POST_BARRIER membar #StoreLoad | #StoreStore | 19 | #define BITOP_POST_BARRIER \ |
20 | ba,pt %xcc, 80b; \ | ||
21 | membar #StoreLoad | #StoreStore | ||
22 | |||
23 | 80: retl | ||
24 | nop | ||
17 | #else | 25 | #else |
18 | #define BITOP_PRE_BARRIER nop | 26 | #define BITOP_PRE_BARRIER |
19 | #define BITOP_POST_BARRIER nop | 27 | #define BITOP_POST_BARRIER |
20 | #endif | 28 | #endif |
21 | 29 | ||
22 | .text | ||
23 | |||
24 | .globl test_and_set_bit | 30 | .globl test_and_set_bit |
25 | .type test_and_set_bit,#function | 31 | .type test_and_set_bit,#function |
26 | test_and_set_bit: /* %o0=nr, %o1=addr */ | 32 | test_and_set_bit: /* %o0=nr, %o1=addr */ |
@@ -37,10 +43,11 @@ test_and_set_bit: /* %o0=nr, %o1=addr */ | |||
37 | cmp %g7, %g1 | 43 | cmp %g7, %g1 |
38 | bne,pn %xcc, 1b | 44 | bne,pn %xcc, 1b |
39 | and %g7, %o2, %g2 | 45 | and %g7, %o2, %g2 |
40 | BITOP_POST_BARRIER | ||
41 | clr %o0 | 46 | clr %o0 |
47 | movrne %g2, 1, %o0 | ||
48 | BITOP_POST_BARRIER | ||
42 | retl | 49 | retl |
43 | movrne %g2, 1, %o0 | 50 | nop |
44 | .size test_and_set_bit, .-test_and_set_bit | 51 | .size test_and_set_bit, .-test_and_set_bit |
45 | 52 | ||
46 | .globl test_and_clear_bit | 53 | .globl test_and_clear_bit |
@@ -59,10 +66,11 @@ test_and_clear_bit: /* %o0=nr, %o1=addr */ | |||
59 | cmp %g7, %g1 | 66 | cmp %g7, %g1 |
60 | bne,pn %xcc, 1b | 67 | bne,pn %xcc, 1b |
61 | and %g7, %o2, %g2 | 68 | and %g7, %o2, %g2 |
62 | BITOP_POST_BARRIER | ||
63 | clr %o0 | 69 | clr %o0 |
70 | movrne %g2, 1, %o0 | ||
71 | BITOP_POST_BARRIER | ||
64 | retl | 72 | retl |
65 | movrne %g2, 1, %o0 | 73 | nop |
66 | .size test_and_clear_bit, .-test_and_clear_bit | 74 | .size test_and_clear_bit, .-test_and_clear_bit |
67 | 75 | ||
68 | .globl test_and_change_bit | 76 | .globl test_and_change_bit |
@@ -81,10 +89,11 @@ test_and_change_bit: /* %o0=nr, %o1=addr */ | |||
81 | cmp %g7, %g1 | 89 | cmp %g7, %g1 |
82 | bne,pn %xcc, 1b | 90 | bne,pn %xcc, 1b |
83 | and %g7, %o2, %g2 | 91 | and %g7, %o2, %g2 |
84 | BITOP_POST_BARRIER | ||
85 | clr %o0 | 92 | clr %o0 |
93 | movrne %g2, 1, %o0 | ||
94 | BITOP_POST_BARRIER | ||
86 | retl | 95 | retl |
87 | movrne %g2, 1, %o0 | 96 | nop |
88 | .size test_and_change_bit, .-test_and_change_bit | 97 | .size test_and_change_bit, .-test_and_change_bit |
89 | 98 | ||
90 | .globl set_bit | 99 | .globl set_bit |
diff --git a/arch/sparc64/lib/debuglocks.c b/arch/sparc64/lib/debuglocks.c index c421e0c65325..f03344cf784e 100644 --- a/arch/sparc64/lib/debuglocks.c +++ b/arch/sparc64/lib/debuglocks.c | |||
@@ -252,8 +252,9 @@ wlock_again: | |||
252 | " andn %%g1, %%g3, %%g7\n" | 252 | " andn %%g1, %%g3, %%g7\n" |
253 | " casx [%0], %%g1, %%g7\n" | 253 | " casx [%0], %%g1, %%g7\n" |
254 | " cmp %%g1, %%g7\n" | 254 | " cmp %%g1, %%g7\n" |
255 | " membar #StoreLoad | #StoreStore\n" | ||
255 | " bne,pn %%xcc, 1b\n" | 256 | " bne,pn %%xcc, 1b\n" |
256 | " membar #StoreLoad | #StoreStore" | 257 | " nop" |
257 | : /* no outputs */ | 258 | : /* no outputs */ |
258 | : "r" (&(rw->lock)) | 259 | : "r" (&(rw->lock)) |
259 | : "g3", "g1", "g7", "cc", "memory"); | 260 | : "g3", "g1", "g7", "cc", "memory"); |
@@ -351,8 +352,9 @@ int _do_write_trylock (rwlock_t *rw, char *str) | |||
351 | " andn %%g1, %%g3, %%g7\n" | 352 | " andn %%g1, %%g3, %%g7\n" |
352 | " casx [%0], %%g1, %%g7\n" | 353 | " casx [%0], %%g1, %%g7\n" |
353 | " cmp %%g1, %%g7\n" | 354 | " cmp %%g1, %%g7\n" |
355 | " membar #StoreLoad | #StoreStore\n" | ||
354 | " bne,pn %%xcc, 1b\n" | 356 | " bne,pn %%xcc, 1b\n" |
355 | " membar #StoreLoad | #StoreStore" | 357 | " nop" |
356 | : /* no outputs */ | 358 | : /* no outputs */ |
357 | : "r" (&(rw->lock)) | 359 | : "r" (&(rw->lock)) |
358 | : "g3", "g1", "g7", "cc", "memory"); | 360 | : "g3", "g1", "g7", "cc", "memory"); |
diff --git a/arch/sparc64/lib/dec_and_lock.S b/arch/sparc64/lib/dec_and_lock.S index 7e6fdaebedba..8ee288dd0afc 100644 --- a/arch/sparc64/lib/dec_and_lock.S +++ b/arch/sparc64/lib/dec_and_lock.S | |||
@@ -48,8 +48,9 @@ start_to_zero: | |||
48 | #endif | 48 | #endif |
49 | to_zero: | 49 | to_zero: |
50 | ldstub [%o1], %g3 | 50 | ldstub [%o1], %g3 |
51 | membar #StoreLoad | #StoreStore | ||
51 | brnz,pn %g3, spin_on_lock | 52 | brnz,pn %g3, spin_on_lock |
52 | membar #StoreLoad | #StoreStore | 53 | nop |
53 | loop2: cas [%o0], %g2, %g7 /* ASSERT(g7 == 0) */ | 54 | loop2: cas [%o0], %g2, %g7 /* ASSERT(g7 == 0) */ |
54 | cmp %g2, %g7 | 55 | cmp %g2, %g7 |
55 | 56 | ||
@@ -71,8 +72,9 @@ loop2: cas [%o0], %g2, %g7 /* ASSERT(g7 == 0) */ | |||
71 | nop | 72 | nop |
72 | spin_on_lock: | 73 | spin_on_lock: |
73 | ldub [%o1], %g3 | 74 | ldub [%o1], %g3 |
75 | membar #LoadLoad | ||
74 | brnz,pt %g3, spin_on_lock | 76 | brnz,pt %g3, spin_on_lock |
75 | membar #LoadLoad | 77 | nop |
76 | ba,pt %xcc, to_zero | 78 | ba,pt %xcc, to_zero |
77 | nop | 79 | nop |
78 | nop | 80 | nop |
diff --git a/arch/sparc64/lib/rwsem.S b/arch/sparc64/lib/rwsem.S index 174ff7b9164c..75f0e6b951d6 100644 --- a/arch/sparc64/lib/rwsem.S +++ b/arch/sparc64/lib/rwsem.S | |||
@@ -17,8 +17,9 @@ __down_read: | |||
17 | bne,pn %icc, 1b | 17 | bne,pn %icc, 1b |
18 | add %g7, 1, %g7 | 18 | add %g7, 1, %g7 |
19 | cmp %g7, 0 | 19 | cmp %g7, 0 |
20 | membar #StoreLoad | #StoreStore | ||
20 | bl,pn %icc, 3f | 21 | bl,pn %icc, 3f |
21 | membar #StoreLoad | #StoreStore | 22 | nop |
22 | 2: | 23 | 2: |
23 | retl | 24 | retl |
24 | nop | 25 | nop |
@@ -57,8 +58,9 @@ __down_write: | |||
57 | cmp %g3, %g7 | 58 | cmp %g3, %g7 |
58 | bne,pn %icc, 1b | 59 | bne,pn %icc, 1b |
59 | cmp %g7, 0 | 60 | cmp %g7, 0 |
61 | membar #StoreLoad | #StoreStore | ||
60 | bne,pn %icc, 3f | 62 | bne,pn %icc, 3f |
61 | membar #StoreLoad | #StoreStore | 63 | nop |
62 | 2: retl | 64 | 2: retl |
63 | nop | 65 | nop |
64 | 3: | 66 | 3: |
@@ -97,8 +99,9 @@ __up_read: | |||
97 | cmp %g1, %g7 | 99 | cmp %g1, %g7 |
98 | bne,pn %icc, 1b | 100 | bne,pn %icc, 1b |
99 | cmp %g7, 0 | 101 | cmp %g7, 0 |
102 | membar #StoreLoad | #StoreStore | ||
100 | bl,pn %icc, 3f | 103 | bl,pn %icc, 3f |
101 | membar #StoreLoad | #StoreStore | 104 | nop |
102 | 2: retl | 105 | 2: retl |
103 | nop | 106 | nop |
104 | 3: sethi %hi(RWSEM_ACTIVE_MASK), %g1 | 107 | 3: sethi %hi(RWSEM_ACTIVE_MASK), %g1 |
@@ -126,8 +129,9 @@ __up_write: | |||
126 | bne,pn %icc, 1b | 129 | bne,pn %icc, 1b |
127 | sub %g7, %g1, %g7 | 130 | sub %g7, %g1, %g7 |
128 | cmp %g7, 0 | 131 | cmp %g7, 0 |
132 | membar #StoreLoad | #StoreStore | ||
129 | bl,pn %icc, 3f | 133 | bl,pn %icc, 3f |
130 | membar #StoreLoad | #StoreStore | 134 | nop |
131 | 2: | 135 | 2: |
132 | retl | 136 | retl |
133 | nop | 137 | nop |
@@ -151,8 +155,9 @@ __downgrade_write: | |||
151 | bne,pn %icc, 1b | 155 | bne,pn %icc, 1b |
152 | sub %g7, %g1, %g7 | 156 | sub %g7, %g1, %g7 |
153 | cmp %g7, 0 | 157 | cmp %g7, 0 |
158 | membar #StoreLoad | #StoreStore | ||
154 | bl,pn %icc, 3f | 159 | bl,pn %icc, 3f |
155 | membar #StoreLoad | #StoreStore | 160 | nop |
156 | 2: | 161 | 2: |
157 | retl | 162 | retl |
158 | nop | 163 | nop |