aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sparc64/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/sparc64/lib')
-rw-r--r--arch/sparc64/lib/U1memcpy.S103
-rw-r--r--arch/sparc64/lib/VISsave.S15
-rw-r--r--arch/sparc64/lib/atomic.S42
-rw-r--r--arch/sparc64/lib/bitops.S31
-rw-r--r--arch/sparc64/lib/debuglocks.c6
-rw-r--r--arch/sparc64/lib/dec_and_lock.S6
-rw-r--r--arch/sparc64/lib/rwsem.S15
7 files changed, 130 insertions, 88 deletions
diff --git a/arch/sparc64/lib/U1memcpy.S b/arch/sparc64/lib/U1memcpy.S
index da9b520c7189..bafd2fc07acb 100644
--- a/arch/sparc64/lib/U1memcpy.S
+++ b/arch/sparc64/lib/U1memcpy.S
@@ -87,14 +87,17 @@
87#define LOOP_CHUNK3(src, dest, len, branch_dest) \ 87#define LOOP_CHUNK3(src, dest, len, branch_dest) \
88 MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) 88 MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)
89 89
90#define DO_SYNC membar #Sync;
90#define STORE_SYNC(dest, fsrc) \ 91#define STORE_SYNC(dest, fsrc) \
91 EX_ST(STORE_BLK(%fsrc, %dest)); \ 92 EX_ST(STORE_BLK(%fsrc, %dest)); \
92 add %dest, 0x40, %dest; 93 add %dest, 0x40, %dest; \
94 DO_SYNC
93 95
94#define STORE_JUMP(dest, fsrc, target) \ 96#define STORE_JUMP(dest, fsrc, target) \
95 EX_ST(STORE_BLK(%fsrc, %dest)); \ 97 EX_ST(STORE_BLK(%fsrc, %dest)); \
96 add %dest, 0x40, %dest; \ 98 add %dest, 0x40, %dest; \
97 ba,pt %xcc, target; 99 ba,pt %xcc, target; \
100 nop;
98 101
99#define FINISH_VISCHUNK(dest, f0, f1, left) \ 102#define FINISH_VISCHUNK(dest, f0, f1, left) \
100 subcc %left, 8, %left;\ 103 subcc %left, 8, %left;\
@@ -239,17 +242,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
239 ba,pt %xcc, 1b+4 242 ba,pt %xcc, 1b+4
240 faligndata %f0, %f2, %f48 243 faligndata %f0, %f2, %f48
2411: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) 2441: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
242 STORE_SYNC(o0, f48) membar #Sync 245 STORE_SYNC(o0, f48)
243 FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) 246 FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
244 STORE_JUMP(o0, f48, 40f) membar #Sync 247 STORE_JUMP(o0, f48, 40f)
2452: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) 2482: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
246 STORE_SYNC(o0, f48) membar #Sync 249 STORE_SYNC(o0, f48)
247 FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) 250 FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
248 STORE_JUMP(o0, f48, 48f) membar #Sync 251 STORE_JUMP(o0, f48, 48f)
2493: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) 2523: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
250 STORE_SYNC(o0, f48) membar #Sync 253 STORE_SYNC(o0, f48)
251 FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) 254 FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
252 STORE_JUMP(o0, f48, 56f) membar #Sync 255 STORE_JUMP(o0, f48, 56f)
253 256
2541: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 2571: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
255 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 258 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -260,17 +263,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
260 ba,pt %xcc, 1b+4 263 ba,pt %xcc, 1b+4
261 faligndata %f2, %f4, %f48 264 faligndata %f2, %f4, %f48
2621: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) 2651: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
263 STORE_SYNC(o0, f48) membar #Sync 266 STORE_SYNC(o0, f48)
264 FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) 267 FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
265 STORE_JUMP(o0, f48, 41f) membar #Sync 268 STORE_JUMP(o0, f48, 41f)
2662: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) 2692: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
267 STORE_SYNC(o0, f48) membar #Sync 270 STORE_SYNC(o0, f48)
268 FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 271 FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
269 STORE_JUMP(o0, f48, 49f) membar #Sync 272 STORE_JUMP(o0, f48, 49f)
2703: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 2733: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
271 STORE_SYNC(o0, f48) membar #Sync 274 STORE_SYNC(o0, f48)
272 FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) 275 FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
273 STORE_JUMP(o0, f48, 57f) membar #Sync 276 STORE_JUMP(o0, f48, 57f)
274 277
2751: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 2781: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
276 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 279 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -281,17 +284,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
281 ba,pt %xcc, 1b+4 284 ba,pt %xcc, 1b+4
282 faligndata %f4, %f6, %f48 285 faligndata %f4, %f6, %f48
2831: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) 2861: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
284 STORE_SYNC(o0, f48) membar #Sync 287 STORE_SYNC(o0, f48)
285 FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) 288 FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
286 STORE_JUMP(o0, f48, 42f) membar #Sync 289 STORE_JUMP(o0, f48, 42f)
2872: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) 2902: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
288 STORE_SYNC(o0, f48) membar #Sync 291 STORE_SYNC(o0, f48)
289 FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 292 FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
290 STORE_JUMP(o0, f48, 50f) membar #Sync 293 STORE_JUMP(o0, f48, 50f)
2913: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 2943: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
292 STORE_SYNC(o0, f48) membar #Sync 295 STORE_SYNC(o0, f48)
293 FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) 296 FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
294 STORE_JUMP(o0, f48, 58f) membar #Sync 297 STORE_JUMP(o0, f48, 58f)
295 298
2961: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 2991: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
297 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 300 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -302,17 +305,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
302 ba,pt %xcc, 1b+4 305 ba,pt %xcc, 1b+4
303 faligndata %f6, %f8, %f48 306 faligndata %f6, %f8, %f48
3041: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) 3071: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
305 STORE_SYNC(o0, f48) membar #Sync 308 STORE_SYNC(o0, f48)
306 FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) 309 FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
307 STORE_JUMP(o0, f48, 43f) membar #Sync 310 STORE_JUMP(o0, f48, 43f)
3082: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) 3112: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
309 STORE_SYNC(o0, f48) membar #Sync 312 STORE_SYNC(o0, f48)
310 FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 313 FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
311 STORE_JUMP(o0, f48, 51f) membar #Sync 314 STORE_JUMP(o0, f48, 51f)
3123: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 3153: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
313 STORE_SYNC(o0, f48) membar #Sync 316 STORE_SYNC(o0, f48)
314 FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) 317 FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
315 STORE_JUMP(o0, f48, 59f) membar #Sync 318 STORE_JUMP(o0, f48, 59f)
316 319
3171: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 3201: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
318 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 321 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -323,17 +326,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
323 ba,pt %xcc, 1b+4 326 ba,pt %xcc, 1b+4
324 faligndata %f8, %f10, %f48 327 faligndata %f8, %f10, %f48
3251: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) 3281: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
326 STORE_SYNC(o0, f48) membar #Sync 329 STORE_SYNC(o0, f48)
327 FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) 330 FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
328 STORE_JUMP(o0, f48, 44f) membar #Sync 331 STORE_JUMP(o0, f48, 44f)
3292: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) 3322: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
330 STORE_SYNC(o0, f48) membar #Sync 333 STORE_SYNC(o0, f48)
331 FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 334 FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
332 STORE_JUMP(o0, f48, 52f) membar #Sync 335 STORE_JUMP(o0, f48, 52f)
3333: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 3363: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
334 STORE_SYNC(o0, f48) membar #Sync 337 STORE_SYNC(o0, f48)
335 FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) 338 FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
336 STORE_JUMP(o0, f48, 60f) membar #Sync 339 STORE_JUMP(o0, f48, 60f)
337 340
3381: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 3411: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
339 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 342 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -344,17 +347,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
344 ba,pt %xcc, 1b+4 347 ba,pt %xcc, 1b+4
345 faligndata %f10, %f12, %f48 348 faligndata %f10, %f12, %f48
3461: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) 3491: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
347 STORE_SYNC(o0, f48) membar #Sync 350 STORE_SYNC(o0, f48)
348 FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) 351 FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
349 STORE_JUMP(o0, f48, 45f) membar #Sync 352 STORE_JUMP(o0, f48, 45f)
3502: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) 3532: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
351 STORE_SYNC(o0, f48) membar #Sync 354 STORE_SYNC(o0, f48)
352 FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 355 FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
353 STORE_JUMP(o0, f48, 53f) membar #Sync 356 STORE_JUMP(o0, f48, 53f)
3543: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 3573: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
355 STORE_SYNC(o0, f48) membar #Sync 358 STORE_SYNC(o0, f48)
356 FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) 359 FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
357 STORE_JUMP(o0, f48, 61f) membar #Sync 360 STORE_JUMP(o0, f48, 61f)
358 361
3591: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 3621: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
360 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 363 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -365,17 +368,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
365 ba,pt %xcc, 1b+4 368 ba,pt %xcc, 1b+4
366 faligndata %f12, %f14, %f48 369 faligndata %f12, %f14, %f48
3671: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) 3701: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
368 STORE_SYNC(o0, f48) membar #Sync 371 STORE_SYNC(o0, f48)
369 FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) 372 FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
370 STORE_JUMP(o0, f48, 46f) membar #Sync 373 STORE_JUMP(o0, f48, 46f)
3712: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) 3742: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
372 STORE_SYNC(o0, f48) membar #Sync 375 STORE_SYNC(o0, f48)
373 FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 376 FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
374 STORE_JUMP(o0, f48, 54f) membar #Sync 377 STORE_JUMP(o0, f48, 54f)
3753: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 3783: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
376 STORE_SYNC(o0, f48) membar #Sync 379 STORE_SYNC(o0, f48)
377 FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) 380 FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
378 STORE_JUMP(o0, f48, 62f) membar #Sync 381 STORE_JUMP(o0, f48, 62f)
379 382
3801: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 3831: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
381 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) 384 LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -386,17 +389,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
386 ba,pt %xcc, 1b+4 389 ba,pt %xcc, 1b+4
387 faligndata %f14, %f16, %f48 390 faligndata %f14, %f16, %f48
3881: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) 3911: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
389 STORE_SYNC(o0, f48) membar #Sync 392 STORE_SYNC(o0, f48)
390 FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) 393 FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
391 STORE_JUMP(o0, f48, 47f) membar #Sync 394 STORE_JUMP(o0, f48, 47f)
3922: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) 3952: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
393 STORE_SYNC(o0, f48) membar #Sync 396 STORE_SYNC(o0, f48)
394 FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 397 FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
395 STORE_JUMP(o0, f48, 55f) membar #Sync 398 STORE_JUMP(o0, f48, 55f)
3963: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 3993: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
397 STORE_SYNC(o0, f48) membar #Sync 400 STORE_SYNC(o0, f48)
398 FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) 401 FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
399 STORE_JUMP(o0, f48, 63f) membar #Sync 402 STORE_JUMP(o0, f48, 63f)
400 403
40140: FINISH_VISCHUNK(o0, f0, f2, g3) 40440: FINISH_VISCHUNK(o0, f0, f2, g3)
40241: FINISH_VISCHUNK(o0, f2, f4, g3) 40541: FINISH_VISCHUNK(o0, f2, f4, g3)
diff --git a/arch/sparc64/lib/VISsave.S b/arch/sparc64/lib/VISsave.S
index 65e328d600a8..4e18989bd602 100644
--- a/arch/sparc64/lib/VISsave.S
+++ b/arch/sparc64/lib/VISsave.S
@@ -72,7 +72,11 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3
72 72
73 stda %f48, [%g3 + %g1] ASI_BLK_P 73 stda %f48, [%g3 + %g1] ASI_BLK_P
745: membar #Sync 745: membar #Sync
75 jmpl %g7 + %g0, %g0 75 ba,pt %xcc, 80f
76 nop
77
78 .align 32
7980: jmpl %g7 + %g0, %g0
76 nop 80 nop
77 81
786: ldub [%g3 + TI_FPSAVED], %o5 826: ldub [%g3 + TI_FPSAVED], %o5
@@ -87,8 +91,11 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3
87 stda %f32, [%g2 + %g1] ASI_BLK_P 91 stda %f32, [%g2 + %g1] ASI_BLK_P
88 stda %f48, [%g3 + %g1] ASI_BLK_P 92 stda %f48, [%g3 + %g1] ASI_BLK_P
89 membar #Sync 93 membar #Sync
90 jmpl %g7 + %g0, %g0 94 ba,pt %xcc, 80f
95 nop
91 96
97 .align 32
9880: jmpl %g7 + %g0, %g0
92 nop 99 nop
93 100
94 .align 32 101 .align 32
@@ -126,6 +133,10 @@ VISenterhalf:
126 stda %f0, [%g2 + %g1] ASI_BLK_P 133 stda %f0, [%g2 + %g1] ASI_BLK_P
127 stda %f16, [%g3 + %g1] ASI_BLK_P 134 stda %f16, [%g3 + %g1] ASI_BLK_P
128 membar #Sync 135 membar #Sync
136 ba,pt %xcc, 4f
137 nop
138
139 .align 32
1294: and %o5, FPRS_DU, %o5 1404: and %o5, FPRS_DU, %o5
130 jmpl %g7 + %g0, %g0 141 jmpl %g7 + %g0, %g0
131 wr %o5, FPRS_FEF, %fprs 142 wr %o5, FPRS_FEF, %fprs
diff --git a/arch/sparc64/lib/atomic.S b/arch/sparc64/lib/atomic.S
index e528b8d1a3e6..faf87c31598b 100644
--- a/arch/sparc64/lib/atomic.S
+++ b/arch/sparc64/lib/atomic.S
@@ -7,18 +7,6 @@
7#include <linux/config.h> 7#include <linux/config.h>
8#include <asm/asi.h> 8#include <asm/asi.h>
9 9
10 /* On SMP we need to use memory barriers to ensure
11 * correct memory operation ordering, nop these out
12 * for uniprocessor.
13 */
14#ifdef CONFIG_SMP
15#define ATOMIC_PRE_BARRIER membar #StoreLoad | #LoadLoad
16#define ATOMIC_POST_BARRIER membar #StoreLoad | #StoreStore
17#else
18#define ATOMIC_PRE_BARRIER nop
19#define ATOMIC_POST_BARRIER nop
20#endif
21
22 .text 10 .text
23 11
24 /* Two versions of the atomic routines, one that 12 /* Two versions of the atomic routines, one that
@@ -52,6 +40,24 @@ atomic_sub: /* %o0 = decrement, %o1 = atomic_ptr */
52 nop 40 nop
53 .size atomic_sub, .-atomic_sub 41 .size atomic_sub, .-atomic_sub
54 42
43 /* On SMP we need to use memory barriers to ensure
44 * correct memory operation ordering, nop these out
45 * for uniprocessor.
46 */
47#ifdef CONFIG_SMP
48
49#define ATOMIC_PRE_BARRIER membar #StoreLoad | #LoadLoad;
50#define ATOMIC_POST_BARRIER \
51 ba,pt %xcc, 80b; \
52 membar #StoreLoad | #StoreStore
53
5480: retl
55 nop
56#else
57#define ATOMIC_PRE_BARRIER
58#define ATOMIC_POST_BARRIER
59#endif
60
55 .globl atomic_add_ret 61 .globl atomic_add_ret
56 .type atomic_add_ret,#function 62 .type atomic_add_ret,#function
57atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ 63atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
@@ -62,9 +68,10 @@ atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
62 cmp %g1, %g7 68 cmp %g1, %g7
63 bne,pn %icc, 1b 69 bne,pn %icc, 1b
64 add %g7, %o0, %g7 70 add %g7, %o0, %g7
71 sra %g7, 0, %o0
65 ATOMIC_POST_BARRIER 72 ATOMIC_POST_BARRIER
66 retl 73 retl
67 sra %g7, 0, %o0 74 nop
68 .size atomic_add_ret, .-atomic_add_ret 75 .size atomic_add_ret, .-atomic_add_ret
69 76
70 .globl atomic_sub_ret 77 .globl atomic_sub_ret
@@ -77,9 +84,10 @@ atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */
77 cmp %g1, %g7 84 cmp %g1, %g7
78 bne,pn %icc, 1b 85 bne,pn %icc, 1b
79 sub %g7, %o0, %g7 86 sub %g7, %o0, %g7
87 sra %g7, 0, %o0
80 ATOMIC_POST_BARRIER 88 ATOMIC_POST_BARRIER
81 retl 89 retl
82 sra %g7, 0, %o0 90 nop
83 .size atomic_sub_ret, .-atomic_sub_ret 91 .size atomic_sub_ret, .-atomic_sub_ret
84 92
85 .globl atomic64_add 93 .globl atomic64_add
@@ -118,9 +126,10 @@ atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
118 cmp %g1, %g7 126 cmp %g1, %g7
119 bne,pn %xcc, 1b 127 bne,pn %xcc, 1b
120 add %g7, %o0, %g7 128 add %g7, %o0, %g7
129 mov %g7, %o0
121 ATOMIC_POST_BARRIER 130 ATOMIC_POST_BARRIER
122 retl 131 retl
123 mov %g7, %o0 132 nop
124 .size atomic64_add_ret, .-atomic64_add_ret 133 .size atomic64_add_ret, .-atomic64_add_ret
125 134
126 .globl atomic64_sub_ret 135 .globl atomic64_sub_ret
@@ -133,7 +142,8 @@ atomic64_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */
133 cmp %g1, %g7 142 cmp %g1, %g7
134 bne,pn %xcc, 1b 143 bne,pn %xcc, 1b
135 sub %g7, %o0, %g7 144 sub %g7, %o0, %g7
145 mov %g7, %o0
136 ATOMIC_POST_BARRIER 146 ATOMIC_POST_BARRIER
137 retl 147 retl
138 mov %g7, %o0 148 nop
139 .size atomic64_sub_ret, .-atomic64_sub_ret 149 .size atomic64_sub_ret, .-atomic64_sub_ret
diff --git a/arch/sparc64/lib/bitops.S b/arch/sparc64/lib/bitops.S
index 886dcd2b376a..31afbfe6c1e8 100644
--- a/arch/sparc64/lib/bitops.S
+++ b/arch/sparc64/lib/bitops.S
@@ -7,20 +7,26 @@
7#include <linux/config.h> 7#include <linux/config.h>
8#include <asm/asi.h> 8#include <asm/asi.h>
9 9
10 .text
11
10 /* On SMP we need to use memory barriers to ensure 12 /* On SMP we need to use memory barriers to ensure
11 * correct memory operation ordering, nop these out 13 * correct memory operation ordering, nop these out
12 * for uniprocessor. 14 * for uniprocessor.
13 */ 15 */
16
14#ifdef CONFIG_SMP 17#ifdef CONFIG_SMP
15#define BITOP_PRE_BARRIER membar #StoreLoad | #LoadLoad 18#define BITOP_PRE_BARRIER membar #StoreLoad | #LoadLoad
16#define BITOP_POST_BARRIER membar #StoreLoad | #StoreStore 19#define BITOP_POST_BARRIER \
20 ba,pt %xcc, 80b; \
21 membar #StoreLoad | #StoreStore
22
2380: retl
24 nop
17#else 25#else
18#define BITOP_PRE_BARRIER nop 26#define BITOP_PRE_BARRIER
19#define BITOP_POST_BARRIER nop 27#define BITOP_POST_BARRIER
20#endif 28#endif
21 29
22 .text
23
24 .globl test_and_set_bit 30 .globl test_and_set_bit
25 .type test_and_set_bit,#function 31 .type test_and_set_bit,#function
26test_and_set_bit: /* %o0=nr, %o1=addr */ 32test_and_set_bit: /* %o0=nr, %o1=addr */
@@ -37,10 +43,11 @@ test_and_set_bit: /* %o0=nr, %o1=addr */
37 cmp %g7, %g1 43 cmp %g7, %g1
38 bne,pn %xcc, 1b 44 bne,pn %xcc, 1b
39 and %g7, %o2, %g2 45 and %g7, %o2, %g2
40 BITOP_POST_BARRIER
41 clr %o0 46 clr %o0
47 movrne %g2, 1, %o0
48 BITOP_POST_BARRIER
42 retl 49 retl
43 movrne %g2, 1, %o0 50 nop
44 .size test_and_set_bit, .-test_and_set_bit 51 .size test_and_set_bit, .-test_and_set_bit
45 52
46 .globl test_and_clear_bit 53 .globl test_and_clear_bit
@@ -59,10 +66,11 @@ test_and_clear_bit: /* %o0=nr, %o1=addr */
59 cmp %g7, %g1 66 cmp %g7, %g1
60 bne,pn %xcc, 1b 67 bne,pn %xcc, 1b
61 and %g7, %o2, %g2 68 and %g7, %o2, %g2
62 BITOP_POST_BARRIER
63 clr %o0 69 clr %o0
70 movrne %g2, 1, %o0
71 BITOP_POST_BARRIER
64 retl 72 retl
65 movrne %g2, 1, %o0 73 nop
66 .size test_and_clear_bit, .-test_and_clear_bit 74 .size test_and_clear_bit, .-test_and_clear_bit
67 75
68 .globl test_and_change_bit 76 .globl test_and_change_bit
@@ -81,10 +89,11 @@ test_and_change_bit: /* %o0=nr, %o1=addr */
81 cmp %g7, %g1 89 cmp %g7, %g1
82 bne,pn %xcc, 1b 90 bne,pn %xcc, 1b
83 and %g7, %o2, %g2 91 and %g7, %o2, %g2
84 BITOP_POST_BARRIER
85 clr %o0 92 clr %o0
93 movrne %g2, 1, %o0
94 BITOP_POST_BARRIER
86 retl 95 retl
87 movrne %g2, 1, %o0 96 nop
88 .size test_and_change_bit, .-test_and_change_bit 97 .size test_and_change_bit, .-test_and_change_bit
89 98
90 .globl set_bit 99 .globl set_bit
diff --git a/arch/sparc64/lib/debuglocks.c b/arch/sparc64/lib/debuglocks.c
index c421e0c65325..f03344cf784e 100644
--- a/arch/sparc64/lib/debuglocks.c
+++ b/arch/sparc64/lib/debuglocks.c
@@ -252,8 +252,9 @@ wlock_again:
252" andn %%g1, %%g3, %%g7\n" 252" andn %%g1, %%g3, %%g7\n"
253" casx [%0], %%g1, %%g7\n" 253" casx [%0], %%g1, %%g7\n"
254" cmp %%g1, %%g7\n" 254" cmp %%g1, %%g7\n"
255" membar #StoreLoad | #StoreStore\n"
255" bne,pn %%xcc, 1b\n" 256" bne,pn %%xcc, 1b\n"
256" membar #StoreLoad | #StoreStore" 257" nop"
257 : /* no outputs */ 258 : /* no outputs */
258 : "r" (&(rw->lock)) 259 : "r" (&(rw->lock))
259 : "g3", "g1", "g7", "cc", "memory"); 260 : "g3", "g1", "g7", "cc", "memory");
@@ -351,8 +352,9 @@ int _do_write_trylock (rwlock_t *rw, char *str)
351" andn %%g1, %%g3, %%g7\n" 352" andn %%g1, %%g3, %%g7\n"
352" casx [%0], %%g1, %%g7\n" 353" casx [%0], %%g1, %%g7\n"
353" cmp %%g1, %%g7\n" 354" cmp %%g1, %%g7\n"
355" membar #StoreLoad | #StoreStore\n"
354" bne,pn %%xcc, 1b\n" 356" bne,pn %%xcc, 1b\n"
355" membar #StoreLoad | #StoreStore" 357" nop"
356 : /* no outputs */ 358 : /* no outputs */
357 : "r" (&(rw->lock)) 359 : "r" (&(rw->lock))
358 : "g3", "g1", "g7", "cc", "memory"); 360 : "g3", "g1", "g7", "cc", "memory");
diff --git a/arch/sparc64/lib/dec_and_lock.S b/arch/sparc64/lib/dec_and_lock.S
index 7e6fdaebedba..8ee288dd0afc 100644
--- a/arch/sparc64/lib/dec_and_lock.S
+++ b/arch/sparc64/lib/dec_and_lock.S
@@ -48,8 +48,9 @@ start_to_zero:
48#endif 48#endif
49to_zero: 49to_zero:
50 ldstub [%o1], %g3 50 ldstub [%o1], %g3
51 membar #StoreLoad | #StoreStore
51 brnz,pn %g3, spin_on_lock 52 brnz,pn %g3, spin_on_lock
52 membar #StoreLoad | #StoreStore 53 nop
53loop2: cas [%o0], %g2, %g7 /* ASSERT(g7 == 0) */ 54loop2: cas [%o0], %g2, %g7 /* ASSERT(g7 == 0) */
54 cmp %g2, %g7 55 cmp %g2, %g7
55 56
@@ -71,8 +72,9 @@ loop2: cas [%o0], %g2, %g7 /* ASSERT(g7 == 0) */
71 nop 72 nop
72spin_on_lock: 73spin_on_lock:
73 ldub [%o1], %g3 74 ldub [%o1], %g3
75 membar #LoadLoad
74 brnz,pt %g3, spin_on_lock 76 brnz,pt %g3, spin_on_lock
75 membar #LoadLoad 77 nop
76 ba,pt %xcc, to_zero 78 ba,pt %xcc, to_zero
77 nop 79 nop
78 nop 80 nop
diff --git a/arch/sparc64/lib/rwsem.S b/arch/sparc64/lib/rwsem.S
index 174ff7b9164c..75f0e6b951d6 100644
--- a/arch/sparc64/lib/rwsem.S
+++ b/arch/sparc64/lib/rwsem.S
@@ -17,8 +17,9 @@ __down_read:
17 bne,pn %icc, 1b 17 bne,pn %icc, 1b
18 add %g7, 1, %g7 18 add %g7, 1, %g7
19 cmp %g7, 0 19 cmp %g7, 0
20 membar #StoreLoad | #StoreStore
20 bl,pn %icc, 3f 21 bl,pn %icc, 3f
21 membar #StoreLoad | #StoreStore 22 nop
222: 232:
23 retl 24 retl
24 nop 25 nop
@@ -57,8 +58,9 @@ __down_write:
57 cmp %g3, %g7 58 cmp %g3, %g7
58 bne,pn %icc, 1b 59 bne,pn %icc, 1b
59 cmp %g7, 0 60 cmp %g7, 0
61 membar #StoreLoad | #StoreStore
60 bne,pn %icc, 3f 62 bne,pn %icc, 3f
61 membar #StoreLoad | #StoreStore 63 nop
622: retl 642: retl
63 nop 65 nop
643: 663:
@@ -97,8 +99,9 @@ __up_read:
97 cmp %g1, %g7 99 cmp %g1, %g7
98 bne,pn %icc, 1b 100 bne,pn %icc, 1b
99 cmp %g7, 0 101 cmp %g7, 0
102 membar #StoreLoad | #StoreStore
100 bl,pn %icc, 3f 103 bl,pn %icc, 3f
101 membar #StoreLoad | #StoreStore 104 nop
1022: retl 1052: retl
103 nop 106 nop
1043: sethi %hi(RWSEM_ACTIVE_MASK), %g1 1073: sethi %hi(RWSEM_ACTIVE_MASK), %g1
@@ -126,8 +129,9 @@ __up_write:
126 bne,pn %icc, 1b 129 bne,pn %icc, 1b
127 sub %g7, %g1, %g7 130 sub %g7, %g1, %g7
128 cmp %g7, 0 131 cmp %g7, 0
132 membar #StoreLoad | #StoreStore
129 bl,pn %icc, 3f 133 bl,pn %icc, 3f
130 membar #StoreLoad | #StoreStore 134 nop
1312: 1352:
132 retl 136 retl
133 nop 137 nop
@@ -151,8 +155,9 @@ __downgrade_write:
151 bne,pn %icc, 1b 155 bne,pn %icc, 1b
152 sub %g7, %g1, %g7 156 sub %g7, %g1, %g7
153 cmp %g7, 0 157 cmp %g7, 0
158 membar #StoreLoad | #StoreStore
154 bl,pn %icc, 3f 159 bl,pn %icc, 3f
155 membar #StoreLoad | #StoreStore 160 nop
1562: 1612:
157 retl 162 retl
158 nop 163 nop