diff options
| -rw-r--r-- | arch/sparc64/kernel/entry.S | 6 | ||||
| -rw-r--r-- | arch/sparc64/kernel/semaphore.c | 12 | ||||
| -rw-r--r-- | arch/sparc64/kernel/trampoline.S | 3 | ||||
| -rw-r--r-- | arch/sparc64/lib/U1memcpy.S | 103 | ||||
| -rw-r--r-- | arch/sparc64/lib/VISsave.S | 15 | ||||
| -rw-r--r-- | arch/sparc64/lib/atomic.S | 42 | ||||
| -rw-r--r-- | arch/sparc64/lib/bitops.S | 31 | ||||
| -rw-r--r-- | arch/sparc64/lib/debuglocks.c | 6 | ||||
| -rw-r--r-- | arch/sparc64/lib/dec_and_lock.S | 6 | ||||
| -rw-r--r-- | arch/sparc64/lib/rwsem.S | 15 | ||||
| -rw-r--r-- | arch/sparc64/mm/init.c | 6 | ||||
| -rw-r--r-- | arch/sparc64/mm/ultra.S | 3 | ||||
| -rw-r--r-- | include/asm-sparc64/rwsem.h | 3 | ||||
| -rw-r--r-- | include/asm-sparc64/spinlock.h | 29 | ||||
| -rw-r--r-- | include/asm-sparc64/spitfire.h | 1 |
15 files changed, 171 insertions, 110 deletions
diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S index a47f2d0b1a29..ffe717ab7f83 100644 --- a/arch/sparc64/kernel/entry.S +++ b/arch/sparc64/kernel/entry.S | |||
| @@ -271,8 +271,9 @@ cplus_fptrap_insn_1: | |||
| 271 | fmuld %f0, %f2, %f26 | 271 | fmuld %f0, %f2, %f26 |
| 272 | faddd %f0, %f2, %f28 | 272 | faddd %f0, %f2, %f28 |
| 273 | fmuld %f0, %f2, %f30 | 273 | fmuld %f0, %f2, %f30 |
| 274 | membar #Sync | ||
| 274 | b,pt %xcc, fpdis_exit | 275 | b,pt %xcc, fpdis_exit |
| 275 | membar #Sync | 276 | nop |
| 276 | 2: andcc %g5, FPRS_DU, %g0 | 277 | 2: andcc %g5, FPRS_DU, %g0 |
| 277 | bne,pt %icc, 3f | 278 | bne,pt %icc, 3f |
| 278 | fzero %f32 | 279 | fzero %f32 |
| @@ -301,8 +302,9 @@ cplus_fptrap_insn_2: | |||
| 301 | fmuld %f32, %f34, %f58 | 302 | fmuld %f32, %f34, %f58 |
| 302 | faddd %f32, %f34, %f60 | 303 | faddd %f32, %f34, %f60 |
| 303 | fmuld %f32, %f34, %f62 | 304 | fmuld %f32, %f34, %f62 |
| 305 | membar #Sync | ||
| 304 | ba,pt %xcc, fpdis_exit | 306 | ba,pt %xcc, fpdis_exit |
| 305 | membar #Sync | 307 | nop |
| 306 | 3: mov SECONDARY_CONTEXT, %g3 | 308 | 3: mov SECONDARY_CONTEXT, %g3 |
| 307 | add %g6, TI_FPREGS, %g1 | 309 | add %g6, TI_FPREGS, %g1 |
| 308 | ldxa [%g3] ASI_DMMU, %g5 | 310 | ldxa [%g3] ASI_DMMU, %g5 |
diff --git a/arch/sparc64/kernel/semaphore.c b/arch/sparc64/kernel/semaphore.c index 63496c43fe17..a809e63f03ef 100644 --- a/arch/sparc64/kernel/semaphore.c +++ b/arch/sparc64/kernel/semaphore.c | |||
| @@ -32,8 +32,9 @@ static __inline__ int __sem_update_count(struct semaphore *sem, int incr) | |||
| 32 | " add %1, %4, %1\n" | 32 | " add %1, %4, %1\n" |
| 33 | " cas [%3], %0, %1\n" | 33 | " cas [%3], %0, %1\n" |
| 34 | " cmp %0, %1\n" | 34 | " cmp %0, %1\n" |
| 35 | " membar #StoreLoad | #StoreStore\n" | ||
| 35 | " bne,pn %%icc, 1b\n" | 36 | " bne,pn %%icc, 1b\n" |
| 36 | " membar #StoreLoad | #StoreStore\n" | 37 | " nop\n" |
| 37 | : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count) | 38 | : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count) |
| 38 | : "r" (&sem->count), "r" (incr), "m" (sem->count) | 39 | : "r" (&sem->count), "r" (incr), "m" (sem->count) |
| 39 | : "cc"); | 40 | : "cc"); |
| @@ -71,8 +72,9 @@ void up(struct semaphore *sem) | |||
| 71 | " cmp %%g1, %%g7\n" | 72 | " cmp %%g1, %%g7\n" |
| 72 | " bne,pn %%icc, 1b\n" | 73 | " bne,pn %%icc, 1b\n" |
| 73 | " addcc %%g7, 1, %%g0\n" | 74 | " addcc %%g7, 1, %%g0\n" |
| 75 | " membar #StoreLoad | #StoreStore\n" | ||
| 74 | " ble,pn %%icc, 3f\n" | 76 | " ble,pn %%icc, 3f\n" |
| 75 | " membar #StoreLoad | #StoreStore\n" | 77 | " nop\n" |
| 76 | "2:\n" | 78 | "2:\n" |
| 77 | " .subsection 2\n" | 79 | " .subsection 2\n" |
| 78 | "3: mov %0, %%g1\n" | 80 | "3: mov %0, %%g1\n" |
| @@ -128,8 +130,9 @@ void __sched down(struct semaphore *sem) | |||
| 128 | " cmp %%g1, %%g7\n" | 130 | " cmp %%g1, %%g7\n" |
| 129 | " bne,pn %%icc, 1b\n" | 131 | " bne,pn %%icc, 1b\n" |
| 130 | " cmp %%g7, 1\n" | 132 | " cmp %%g7, 1\n" |
| 133 | " membar #StoreLoad | #StoreStore\n" | ||
| 131 | " bl,pn %%icc, 3f\n" | 134 | " bl,pn %%icc, 3f\n" |
| 132 | " membar #StoreLoad | #StoreStore\n" | 135 | " nop\n" |
| 133 | "2:\n" | 136 | "2:\n" |
| 134 | " .subsection 2\n" | 137 | " .subsection 2\n" |
| 135 | "3: mov %0, %%g1\n" | 138 | "3: mov %0, %%g1\n" |
| @@ -233,8 +236,9 @@ int __sched down_interruptible(struct semaphore *sem) | |||
| 233 | " cmp %%g1, %%g7\n" | 236 | " cmp %%g1, %%g7\n" |
| 234 | " bne,pn %%icc, 1b\n" | 237 | " bne,pn %%icc, 1b\n" |
| 235 | " cmp %%g7, 1\n" | 238 | " cmp %%g7, 1\n" |
| 239 | " membar #StoreLoad | #StoreStore\n" | ||
| 236 | " bl,pn %%icc, 3f\n" | 240 | " bl,pn %%icc, 3f\n" |
| 237 | " membar #StoreLoad | #StoreStore\n" | 241 | " nop\n" |
| 238 | "2:\n" | 242 | "2:\n" |
| 239 | " .subsection 2\n" | 243 | " .subsection 2\n" |
| 240 | "3: mov %2, %%g1\n" | 244 | "3: mov %2, %%g1\n" |
diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S index 2c8f9344b4ee..3a145fc39cf2 100644 --- a/arch/sparc64/kernel/trampoline.S +++ b/arch/sparc64/kernel/trampoline.S | |||
| @@ -98,8 +98,9 @@ startup_continue: | |||
| 98 | 98 | ||
| 99 | sethi %hi(prom_entry_lock), %g2 | 99 | sethi %hi(prom_entry_lock), %g2 |
| 100 | 1: ldstub [%g2 + %lo(prom_entry_lock)], %g1 | 100 | 1: ldstub [%g2 + %lo(prom_entry_lock)], %g1 |
| 101 | membar #StoreLoad | #StoreStore | ||
| 101 | brnz,pn %g1, 1b | 102 | brnz,pn %g1, 1b |
| 102 | membar #StoreLoad | #StoreStore | 103 | nop |
| 103 | 104 | ||
| 104 | sethi %hi(p1275buf), %g2 | 105 | sethi %hi(p1275buf), %g2 |
| 105 | or %g2, %lo(p1275buf), %g2 | 106 | or %g2, %lo(p1275buf), %g2 |
diff --git a/arch/sparc64/lib/U1memcpy.S b/arch/sparc64/lib/U1memcpy.S index da9b520c7189..bafd2fc07acb 100644 --- a/arch/sparc64/lib/U1memcpy.S +++ b/arch/sparc64/lib/U1memcpy.S | |||
| @@ -87,14 +87,17 @@ | |||
| 87 | #define LOOP_CHUNK3(src, dest, len, branch_dest) \ | 87 | #define LOOP_CHUNK3(src, dest, len, branch_dest) \ |
| 88 | MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) | 88 | MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) |
| 89 | 89 | ||
| 90 | #define DO_SYNC membar #Sync; | ||
| 90 | #define STORE_SYNC(dest, fsrc) \ | 91 | #define STORE_SYNC(dest, fsrc) \ |
| 91 | EX_ST(STORE_BLK(%fsrc, %dest)); \ | 92 | EX_ST(STORE_BLK(%fsrc, %dest)); \ |
| 92 | add %dest, 0x40, %dest; | 93 | add %dest, 0x40, %dest; \ |
| 94 | DO_SYNC | ||
| 93 | 95 | ||
| 94 | #define STORE_JUMP(dest, fsrc, target) \ | 96 | #define STORE_JUMP(dest, fsrc, target) \ |
| 95 | EX_ST(STORE_BLK(%fsrc, %dest)); \ | 97 | EX_ST(STORE_BLK(%fsrc, %dest)); \ |
| 96 | add %dest, 0x40, %dest; \ | 98 | add %dest, 0x40, %dest; \ |
| 97 | ba,pt %xcc, target; | 99 | ba,pt %xcc, target; \ |
| 100 | nop; | ||
| 98 | 101 | ||
| 99 | #define FINISH_VISCHUNK(dest, f0, f1, left) \ | 102 | #define FINISH_VISCHUNK(dest, f0, f1, left) \ |
| 100 | subcc %left, 8, %left;\ | 103 | subcc %left, 8, %left;\ |
| @@ -239,17 +242,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
| 239 | ba,pt %xcc, 1b+4 | 242 | ba,pt %xcc, 1b+4 |
| 240 | faligndata %f0, %f2, %f48 | 243 | faligndata %f0, %f2, %f48 |
| 241 | 1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) | 244 | 1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) |
| 242 | STORE_SYNC(o0, f48) membar #Sync | 245 | STORE_SYNC(o0, f48) |
| 243 | FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) | 246 | FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) |
| 244 | STORE_JUMP(o0, f48, 40f) membar #Sync | 247 | STORE_JUMP(o0, f48, 40f) |
| 245 | 2: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) | 248 | 2: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) |
| 246 | STORE_SYNC(o0, f48) membar #Sync | 249 | STORE_SYNC(o0, f48) |
| 247 | FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) | 250 | FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) |
| 248 | STORE_JUMP(o0, f48, 48f) membar #Sync | 251 | STORE_JUMP(o0, f48, 48f) |
| 249 | 3: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) | 252 | 3: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) |
| 250 | STORE_SYNC(o0, f48) membar #Sync | 253 | STORE_SYNC(o0, f48) |
| 251 | FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) | 254 | FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) |
| 252 | STORE_JUMP(o0, f48, 56f) membar #Sync | 255 | STORE_JUMP(o0, f48, 56f) |
| 253 | 256 | ||
| 254 | 1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) | 257 | 1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) |
| 255 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | 258 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) |
| @@ -260,17 +263,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
| 260 | ba,pt %xcc, 1b+4 | 263 | ba,pt %xcc, 1b+4 |
| 261 | faligndata %f2, %f4, %f48 | 264 | faligndata %f2, %f4, %f48 |
| 262 | 1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) | 265 | 1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) |
| 263 | STORE_SYNC(o0, f48) membar #Sync | 266 | STORE_SYNC(o0, f48) |
| 264 | FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) | 267 | FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) |
| 265 | STORE_JUMP(o0, f48, 41f) membar #Sync | 268 | STORE_JUMP(o0, f48, 41f) |
| 266 | 2: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) | 269 | 2: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) |
| 267 | STORE_SYNC(o0, f48) membar #Sync | 270 | STORE_SYNC(o0, f48) |
| 268 | FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) | 271 | FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) |
| 269 | STORE_JUMP(o0, f48, 49f) membar #Sync | 272 | STORE_JUMP(o0, f48, 49f) |
| 270 | 3: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) | 273 | 3: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) |
| 271 | STORE_SYNC(o0, f48) membar #Sync | 274 | STORE_SYNC(o0, f48) |
| 272 | FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) | 275 | FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) |
| 273 | STORE_JUMP(o0, f48, 57f) membar #Sync | 276 | STORE_JUMP(o0, f48, 57f) |
| 274 | 277 | ||
| 275 | 1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) | 278 | 1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) |
| 276 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | 279 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) |
| @@ -281,17 +284,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
| 281 | ba,pt %xcc, 1b+4 | 284 | ba,pt %xcc, 1b+4 |
| 282 | faligndata %f4, %f6, %f48 | 285 | faligndata %f4, %f6, %f48 |
| 283 | 1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) | 286 | 1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) |
| 284 | STORE_SYNC(o0, f48) membar #Sync | 287 | STORE_SYNC(o0, f48) |
| 285 | FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) | 288 | FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) |
| 286 | STORE_JUMP(o0, f48, 42f) membar #Sync | 289 | STORE_JUMP(o0, f48, 42f) |
| 287 | 2: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) | 290 | 2: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) |
| 288 | STORE_SYNC(o0, f48) membar #Sync | 291 | STORE_SYNC(o0, f48) |
| 289 | FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) | 292 | FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) |
| 290 | STORE_JUMP(o0, f48, 50f) membar #Sync | 293 | STORE_JUMP(o0, f48, 50f) |
| 291 | 3: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) | 294 | 3: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) |
| 292 | STORE_SYNC(o0, f48) membar #Sync | 295 | STORE_SYNC(o0, f48) |
| 293 | FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) | 296 | FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) |
| 294 | STORE_JUMP(o0, f48, 58f) membar #Sync | 297 | STORE_JUMP(o0, f48, 58f) |
| 295 | 298 | ||
| 296 | 1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) | 299 | 1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) |
| 297 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | 300 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) |
| @@ -302,17 +305,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
| 302 | ba,pt %xcc, 1b+4 | 305 | ba,pt %xcc, 1b+4 |
| 303 | faligndata %f6, %f8, %f48 | 306 | faligndata %f6, %f8, %f48 |
| 304 | 1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) | 307 | 1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) |
| 305 | STORE_SYNC(o0, f48) membar #Sync | 308 | STORE_SYNC(o0, f48) |
| 306 | FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) | 309 | FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) |
| 307 | STORE_JUMP(o0, f48, 43f) membar #Sync | 310 | STORE_JUMP(o0, f48, 43f) |
| 308 | 2: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) | 311 | 2: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) |
| 309 | STORE_SYNC(o0, f48) membar #Sync | 312 | STORE_SYNC(o0, f48) |
| 310 | FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) | 313 | FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) |
| 311 | STORE_JUMP(o0, f48, 51f) membar #Sync | 314 | STORE_JUMP(o0, f48, 51f) |
| 312 | 3: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) | 315 | 3: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) |
| 313 | STORE_SYNC(o0, f48) membar #Sync | 316 | STORE_SYNC(o0, f48) |
| 314 | FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) | 317 | FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) |
| 315 | STORE_JUMP(o0, f48, 59f) membar #Sync | 318 | STORE_JUMP(o0, f48, 59f) |
| 316 | 319 | ||
| 317 | 1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) | 320 | 1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) |
| 318 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | 321 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) |
| @@ -323,17 +326,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
| 323 | ba,pt %xcc, 1b+4 | 326 | ba,pt %xcc, 1b+4 |
| 324 | faligndata %f8, %f10, %f48 | 327 | faligndata %f8, %f10, %f48 |
| 325 | 1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) | 328 | 1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) |
| 326 | STORE_SYNC(o0, f48) membar #Sync | 329 | STORE_SYNC(o0, f48) |
| 327 | FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) | 330 | FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) |
| 328 | STORE_JUMP(o0, f48, 44f) membar #Sync | 331 | STORE_JUMP(o0, f48, 44f) |
| 329 | 2: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) | 332 | 2: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) |
| 330 | STORE_SYNC(o0, f48) membar #Sync | 333 | STORE_SYNC(o0, f48) |
| 331 | FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) | 334 | FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) |
| 332 | STORE_JUMP(o0, f48, 52f) membar #Sync | 335 | STORE_JUMP(o0, f48, 52f) |
| 333 | 3: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) | 336 | 3: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) |
| 334 | STORE_SYNC(o0, f48) membar #Sync | 337 | STORE_SYNC(o0, f48) |
| 335 | FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) | 338 | FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) |
| 336 | STORE_JUMP(o0, f48, 60f) membar #Sync | 339 | STORE_JUMP(o0, f48, 60f) |
| 337 | 340 | ||
| 338 | 1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) | 341 | 1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) |
| 339 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | 342 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) |
| @@ -344,17 +347,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
| 344 | ba,pt %xcc, 1b+4 | 347 | ba,pt %xcc, 1b+4 |
| 345 | faligndata %f10, %f12, %f48 | 348 | faligndata %f10, %f12, %f48 |
| 346 | 1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) | 349 | 1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) |
| 347 | STORE_SYNC(o0, f48) membar #Sync | 350 | STORE_SYNC(o0, f48) |
| 348 | FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) | 351 | FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) |
| 349 | STORE_JUMP(o0, f48, 45f) membar #Sync | 352 | STORE_JUMP(o0, f48, 45f) |
| 350 | 2: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) | 353 | 2: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) |
| 351 | STORE_SYNC(o0, f48) membar #Sync | 354 | STORE_SYNC(o0, f48) |
| 352 | FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) | 355 | FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) |
| 353 | STORE_JUMP(o0, f48, 53f) membar #Sync | 356 | STORE_JUMP(o0, f48, 53f) |
| 354 | 3: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) | 357 | 3: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) |
| 355 | STORE_SYNC(o0, f48) membar #Sync | 358 | STORE_SYNC(o0, f48) |
| 356 | FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) | 359 | FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) |
| 357 | STORE_JUMP(o0, f48, 61f) membar #Sync | 360 | STORE_JUMP(o0, f48, 61f) |
| 358 | 361 | ||
| 359 | 1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) | 362 | 1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) |
| 360 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | 363 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) |
| @@ -365,17 +368,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
| 365 | ba,pt %xcc, 1b+4 | 368 | ba,pt %xcc, 1b+4 |
| 366 | faligndata %f12, %f14, %f48 | 369 | faligndata %f12, %f14, %f48 |
| 367 | 1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) | 370 | 1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) |
| 368 | STORE_SYNC(o0, f48) membar #Sync | 371 | STORE_SYNC(o0, f48) |
| 369 | FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) | 372 | FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) |
| 370 | STORE_JUMP(o0, f48, 46f) membar #Sync | 373 | STORE_JUMP(o0, f48, 46f) |
| 371 | 2: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) | 374 | 2: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) |
| 372 | STORE_SYNC(o0, f48) membar #Sync | 375 | STORE_SYNC(o0, f48) |
| 373 | FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) | 376 | FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) |
| 374 | STORE_JUMP(o0, f48, 54f) membar #Sync | 377 | STORE_JUMP(o0, f48, 54f) |
| 375 | 3: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) | 378 | 3: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) |
| 376 | STORE_SYNC(o0, f48) membar #Sync | 379 | STORE_SYNC(o0, f48) |
| 377 | FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) | 380 | FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) |
| 378 | STORE_JUMP(o0, f48, 62f) membar #Sync | 381 | STORE_JUMP(o0, f48, 62f) |
| 379 | 382 | ||
| 380 | 1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) | 383 | 1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) |
| 381 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | 384 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) |
| @@ -386,17 +389,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
| 386 | ba,pt %xcc, 1b+4 | 389 | ba,pt %xcc, 1b+4 |
| 387 | faligndata %f14, %f16, %f48 | 390 | faligndata %f14, %f16, %f48 |
| 388 | 1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) | 391 | 1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) |
| 389 | STORE_SYNC(o0, f48) membar #Sync | 392 | STORE_SYNC(o0, f48) |
| 390 | FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) | 393 | FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) |
| 391 | STORE_JUMP(o0, f48, 47f) membar #Sync | 394 | STORE_JUMP(o0, f48, 47f) |
| 392 | 2: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) | 395 | 2: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) |
| 393 | STORE_SYNC(o0, f48) membar #Sync | 396 | STORE_SYNC(o0, f48) |
| 394 | FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) | 397 | FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) |
| 395 | STORE_JUMP(o0, f48, 55f) membar #Sync | 398 | STORE_JUMP(o0, f48, 55f) |
| 396 | 3: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) | 399 | 3: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) |
| 397 | STORE_SYNC(o0, f48) membar #Sync | 400 | STORE_SYNC(o0, f48) |
| 398 | FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) | 401 | FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) |
| 399 | STORE_JUMP(o0, f48, 63f) membar #Sync | 402 | STORE_JUMP(o0, f48, 63f) |
| 400 | 403 | ||
| 401 | 40: FINISH_VISCHUNK(o0, f0, f2, g3) | 404 | 40: FINISH_VISCHUNK(o0, f0, f2, g3) |
| 402 | 41: FINISH_VISCHUNK(o0, f2, f4, g3) | 405 | 41: FINISH_VISCHUNK(o0, f2, f4, g3) |
diff --git a/arch/sparc64/lib/VISsave.S b/arch/sparc64/lib/VISsave.S index 65e328d600a8..4e18989bd602 100644 --- a/arch/sparc64/lib/VISsave.S +++ b/arch/sparc64/lib/VISsave.S | |||
| @@ -72,7 +72,11 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3 | |||
| 72 | 72 | ||
| 73 | stda %f48, [%g3 + %g1] ASI_BLK_P | 73 | stda %f48, [%g3 + %g1] ASI_BLK_P |
| 74 | 5: membar #Sync | 74 | 5: membar #Sync |
| 75 | jmpl %g7 + %g0, %g0 | 75 | ba,pt %xcc, 80f |
| 76 | nop | ||
| 77 | |||
| 78 | .align 32 | ||
| 79 | 80: jmpl %g7 + %g0, %g0 | ||
| 76 | nop | 80 | nop |
| 77 | 81 | ||
| 78 | 6: ldub [%g3 + TI_FPSAVED], %o5 | 82 | 6: ldub [%g3 + TI_FPSAVED], %o5 |
| @@ -87,8 +91,11 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3 | |||
| 87 | stda %f32, [%g2 + %g1] ASI_BLK_P | 91 | stda %f32, [%g2 + %g1] ASI_BLK_P |
| 88 | stda %f48, [%g3 + %g1] ASI_BLK_P | 92 | stda %f48, [%g3 + %g1] ASI_BLK_P |
| 89 | membar #Sync | 93 | membar #Sync |
| 90 | jmpl %g7 + %g0, %g0 | 94 | ba,pt %xcc, 80f |
| 95 | nop | ||
| 91 | 96 | ||
| 97 | .align 32 | ||
| 98 | 80: jmpl %g7 + %g0, %g0 | ||
| 92 | nop | 99 | nop |
| 93 | 100 | ||
| 94 | .align 32 | 101 | .align 32 |
| @@ -126,6 +133,10 @@ VISenterhalf: | |||
| 126 | stda %f0, [%g2 + %g1] ASI_BLK_P | 133 | stda %f0, [%g2 + %g1] ASI_BLK_P |
| 127 | stda %f16, [%g3 + %g1] ASI_BLK_P | 134 | stda %f16, [%g3 + %g1] ASI_BLK_P |
| 128 | membar #Sync | 135 | membar #Sync |
| 136 | ba,pt %xcc, 4f | ||
| 137 | nop | ||
| 138 | |||
| 139 | .align 32 | ||
| 129 | 4: and %o5, FPRS_DU, %o5 | 140 | 4: and %o5, FPRS_DU, %o5 |
| 130 | jmpl %g7 + %g0, %g0 | 141 | jmpl %g7 + %g0, %g0 |
| 131 | wr %o5, FPRS_FEF, %fprs | 142 | wr %o5, FPRS_FEF, %fprs |
diff --git a/arch/sparc64/lib/atomic.S b/arch/sparc64/lib/atomic.S index e528b8d1a3e6..faf87c31598b 100644 --- a/arch/sparc64/lib/atomic.S +++ b/arch/sparc64/lib/atomic.S | |||
| @@ -7,18 +7,6 @@ | |||
| 7 | #include <linux/config.h> | 7 | #include <linux/config.h> |
| 8 | #include <asm/asi.h> | 8 | #include <asm/asi.h> |
| 9 | 9 | ||
| 10 | /* On SMP we need to use memory barriers to ensure | ||
| 11 | * correct memory operation ordering, nop these out | ||
| 12 | * for uniprocessor. | ||
| 13 | */ | ||
| 14 | #ifdef CONFIG_SMP | ||
| 15 | #define ATOMIC_PRE_BARRIER membar #StoreLoad | #LoadLoad | ||
| 16 | #define ATOMIC_POST_BARRIER membar #StoreLoad | #StoreStore | ||
| 17 | #else | ||
| 18 | #define ATOMIC_PRE_BARRIER nop | ||
| 19 | #define ATOMIC_POST_BARRIER nop | ||
| 20 | #endif | ||
| 21 | |||
| 22 | .text | 10 | .text |
| 23 | 11 | ||
| 24 | /* Two versions of the atomic routines, one that | 12 | /* Two versions of the atomic routines, one that |
| @@ -52,6 +40,24 @@ atomic_sub: /* %o0 = decrement, %o1 = atomic_ptr */ | |||
| 52 | nop | 40 | nop |
| 53 | .size atomic_sub, .-atomic_sub | 41 | .size atomic_sub, .-atomic_sub |
| 54 | 42 | ||
| 43 | /* On SMP we need to use memory barriers to ensure | ||
| 44 | * correct memory operation ordering, nop these out | ||
| 45 | * for uniprocessor. | ||
| 46 | */ | ||
| 47 | #ifdef CONFIG_SMP | ||
| 48 | |||
| 49 | #define ATOMIC_PRE_BARRIER membar #StoreLoad | #LoadLoad; | ||
| 50 | #define ATOMIC_POST_BARRIER \ | ||
| 51 | ba,pt %xcc, 80b; \ | ||
| 52 | membar #StoreLoad | #StoreStore | ||
| 53 | |||
| 54 | 80: retl | ||
| 55 | nop | ||
| 56 | #else | ||
| 57 | #define ATOMIC_PRE_BARRIER | ||
| 58 | #define ATOMIC_POST_BARRIER | ||
| 59 | #endif | ||
| 60 | |||
| 55 | .globl atomic_add_ret | 61 | .globl atomic_add_ret |
| 56 | .type atomic_add_ret,#function | 62 | .type atomic_add_ret,#function |
| 57 | atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ | 63 | atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ |
| @@ -62,9 +68,10 @@ atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ | |||
| 62 | cmp %g1, %g7 | 68 | cmp %g1, %g7 |
| 63 | bne,pn %icc, 1b | 69 | bne,pn %icc, 1b |
| 64 | add %g7, %o0, %g7 | 70 | add %g7, %o0, %g7 |
| 71 | sra %g7, 0, %o0 | ||
| 65 | ATOMIC_POST_BARRIER | 72 | ATOMIC_POST_BARRIER |
| 66 | retl | 73 | retl |
| 67 | sra %g7, 0, %o0 | 74 | nop |
| 68 | .size atomic_add_ret, .-atomic_add_ret | 75 | .size atomic_add_ret, .-atomic_add_ret |
| 69 | 76 | ||
| 70 | .globl atomic_sub_ret | 77 | .globl atomic_sub_ret |
| @@ -77,9 +84,10 @@ atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */ | |||
| 77 | cmp %g1, %g7 | 84 | cmp %g1, %g7 |
| 78 | bne,pn %icc, 1b | 85 | bne,pn %icc, 1b |
| 79 | sub %g7, %o0, %g7 | 86 | sub %g7, %o0, %g7 |
| 87 | sra %g7, 0, %o0 | ||
| 80 | ATOMIC_POST_BARRIER | 88 | ATOMIC_POST_BARRIER |
| 81 | retl | 89 | retl |
| 82 | sra %g7, 0, %o0 | 90 | nop |
| 83 | .size atomic_sub_ret, .-atomic_sub_ret | 91 | .size atomic_sub_ret, .-atomic_sub_ret |
| 84 | 92 | ||
| 85 | .globl atomic64_add | 93 | .globl atomic64_add |
| @@ -118,9 +126,10 @@ atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ | |||
| 118 | cmp %g1, %g7 | 126 | cmp %g1, %g7 |
| 119 | bne,pn %xcc, 1b | 127 | bne,pn %xcc, 1b |
| 120 | add %g7, %o0, %g7 | 128 | add %g7, %o0, %g7 |
| 129 | mov %g7, %o0 | ||
| 121 | ATOMIC_POST_BARRIER | 130 | ATOMIC_POST_BARRIER |
| 122 | retl | 131 | retl |
| 123 | mov %g7, %o0 | 132 | nop |
| 124 | .size atomic64_add_ret, .-atomic64_add_ret | 133 | .size atomic64_add_ret, .-atomic64_add_ret |
| 125 | 134 | ||
| 126 | .globl atomic64_sub_ret | 135 | .globl atomic64_sub_ret |
| @@ -133,7 +142,8 @@ atomic64_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */ | |||
| 133 | cmp %g1, %g7 | 142 | cmp %g1, %g7 |
| 134 | bne,pn %xcc, 1b | 143 | bne,pn %xcc, 1b |
| 135 | sub %g7, %o0, %g7 | 144 | sub %g7, %o0, %g7 |
| 145 | mov %g7, %o0 | ||
| 136 | ATOMIC_POST_BARRIER | 146 | ATOMIC_POST_BARRIER |
| 137 | retl | 147 | retl |
| 138 | mov %g7, %o0 | 148 | nop |
| 139 | .size atomic64_sub_ret, .-atomic64_sub_ret | 149 | .size atomic64_sub_ret, .-atomic64_sub_ret |
diff --git a/arch/sparc64/lib/bitops.S b/arch/sparc64/lib/bitops.S index 886dcd2b376a..31afbfe6c1e8 100644 --- a/arch/sparc64/lib/bitops.S +++ b/arch/sparc64/lib/bitops.S | |||
| @@ -7,20 +7,26 @@ | |||
| 7 | #include <linux/config.h> | 7 | #include <linux/config.h> |
| 8 | #include <asm/asi.h> | 8 | #include <asm/asi.h> |
| 9 | 9 | ||
| 10 | .text | ||
| 11 | |||
| 10 | /* On SMP we need to use memory barriers to ensure | 12 | /* On SMP we need to use memory barriers to ensure |
| 11 | * correct memory operation ordering, nop these out | 13 | * correct memory operation ordering, nop these out |
| 12 | * for uniprocessor. | 14 | * for uniprocessor. |
| 13 | */ | 15 | */ |
| 16 | |||
| 14 | #ifdef CONFIG_SMP | 17 | #ifdef CONFIG_SMP |
| 15 | #define BITOP_PRE_BARRIER membar #StoreLoad | #LoadLoad | 18 | #define BITOP_PRE_BARRIER membar #StoreLoad | #LoadLoad |
| 16 | #define BITOP_POST_BARRIER membar #StoreLoad | #StoreStore | 19 | #define BITOP_POST_BARRIER \ |
| 20 | ba,pt %xcc, 80b; \ | ||
| 21 | membar #StoreLoad | #StoreStore | ||
| 22 | |||
| 23 | 80: retl | ||
| 24 | nop | ||
| 17 | #else | 25 | #else |
| 18 | #define BITOP_PRE_BARRIER nop | 26 | #define BITOP_PRE_BARRIER |
| 19 | #define BITOP_POST_BARRIER nop | 27 | #define BITOP_POST_BARRIER |
| 20 | #endif | 28 | #endif |
| 21 | 29 | ||
| 22 | .text | ||
| 23 | |||
| 24 | .globl test_and_set_bit | 30 | .globl test_and_set_bit |
| 25 | .type test_and_set_bit,#function | 31 | .type test_and_set_bit,#function |
| 26 | test_and_set_bit: /* %o0=nr, %o1=addr */ | 32 | test_and_set_bit: /* %o0=nr, %o1=addr */ |
| @@ -37,10 +43,11 @@ test_and_set_bit: /* %o0=nr, %o1=addr */ | |||
| 37 | cmp %g7, %g1 | 43 | cmp %g7, %g1 |
| 38 | bne,pn %xcc, 1b | 44 | bne,pn %xcc, 1b |
| 39 | and %g7, %o2, %g2 | 45 | and %g7, %o2, %g2 |
| 40 | BITOP_POST_BARRIER | ||
| 41 | clr %o0 | 46 | clr %o0 |
| 47 | movrne %g2, 1, %o0 | ||
| 48 | BITOP_POST_BARRIER | ||
| 42 | retl | 49 | retl |
| 43 | movrne %g2, 1, %o0 | 50 | nop |
| 44 | .size test_and_set_bit, .-test_and_set_bit | 51 | .size test_and_set_bit, .-test_and_set_bit |
| 45 | 52 | ||
| 46 | .globl test_and_clear_bit | 53 | .globl test_and_clear_bit |
| @@ -59,10 +66,11 @@ test_and_clear_bit: /* %o0=nr, %o1=addr */ | |||
| 59 | cmp %g7, %g1 | 66 | cmp %g7, %g1 |
| 60 | bne,pn %xcc, 1b | 67 | bne,pn %xcc, 1b |
| 61 | and %g7, %o2, %g2 | 68 | and %g7, %o2, %g2 |
| 62 | BITOP_POST_BARRIER | ||
| 63 | clr %o0 | 69 | clr %o0 |
| 70 | movrne %g2, 1, %o0 | ||
| 71 | BITOP_POST_BARRIER | ||
| 64 | retl | 72 | retl |
| 65 | movrne %g2, 1, %o0 | 73 | nop |
| 66 | .size test_and_clear_bit, .-test_and_clear_bit | 74 | .size test_and_clear_bit, .-test_and_clear_bit |
| 67 | 75 | ||
| 68 | .globl test_and_change_bit | 76 | .globl test_and_change_bit |
| @@ -81,10 +89,11 @@ test_and_change_bit: /* %o0=nr, %o1=addr */ | |||
| 81 | cmp %g7, %g1 | 89 | cmp %g7, %g1 |
| 82 | bne,pn %xcc, 1b | 90 | bne,pn %xcc, 1b |
| 83 | and %g7, %o2, %g2 | 91 | and %g7, %o2, %g2 |
| 84 | BITOP_POST_BARRIER | ||
| 85 | clr %o0 | 92 | clr %o0 |
| 93 | movrne %g2, 1, %o0 | ||
| 94 | BITOP_POST_BARRIER | ||
| 86 | retl | 95 | retl |
| 87 | movrne %g2, 1, %o0 | 96 | nop |
| 88 | .size test_and_change_bit, .-test_and_change_bit | 97 | .size test_and_change_bit, .-test_and_change_bit |
| 89 | 98 | ||
| 90 | .globl set_bit | 99 | .globl set_bit |
diff --git a/arch/sparc64/lib/debuglocks.c b/arch/sparc64/lib/debuglocks.c index c421e0c65325..f03344cf784e 100644 --- a/arch/sparc64/lib/debuglocks.c +++ b/arch/sparc64/lib/debuglocks.c | |||
| @@ -252,8 +252,9 @@ wlock_again: | |||
| 252 | " andn %%g1, %%g3, %%g7\n" | 252 | " andn %%g1, %%g3, %%g7\n" |
| 253 | " casx [%0], %%g1, %%g7\n" | 253 | " casx [%0], %%g1, %%g7\n" |
| 254 | " cmp %%g1, %%g7\n" | 254 | " cmp %%g1, %%g7\n" |
| 255 | " membar #StoreLoad | #StoreStore\n" | ||
| 255 | " bne,pn %%xcc, 1b\n" | 256 | " bne,pn %%xcc, 1b\n" |
| 256 | " membar #StoreLoad | #StoreStore" | 257 | " nop" |
| 257 | : /* no outputs */ | 258 | : /* no outputs */ |
| 258 | : "r" (&(rw->lock)) | 259 | : "r" (&(rw->lock)) |
| 259 | : "g3", "g1", "g7", "cc", "memory"); | 260 | : "g3", "g1", "g7", "cc", "memory"); |
| @@ -351,8 +352,9 @@ int _do_write_trylock (rwlock_t *rw, char *str) | |||
| 351 | " andn %%g1, %%g3, %%g7\n" | 352 | " andn %%g1, %%g3, %%g7\n" |
| 352 | " casx [%0], %%g1, %%g7\n" | 353 | " casx [%0], %%g1, %%g7\n" |
| 353 | " cmp %%g1, %%g7\n" | 354 | " cmp %%g1, %%g7\n" |
| 355 | " membar #StoreLoad | #StoreStore\n" | ||
| 354 | " bne,pn %%xcc, 1b\n" | 356 | " bne,pn %%xcc, 1b\n" |
| 355 | " membar #StoreLoad | #StoreStore" | 357 | " nop" |
| 356 | : /* no outputs */ | 358 | : /* no outputs */ |
| 357 | : "r" (&(rw->lock)) | 359 | : "r" (&(rw->lock)) |
| 358 | : "g3", "g1", "g7", "cc", "memory"); | 360 | : "g3", "g1", "g7", "cc", "memory"); |
diff --git a/arch/sparc64/lib/dec_and_lock.S b/arch/sparc64/lib/dec_and_lock.S index 7e6fdaebedba..8ee288dd0afc 100644 --- a/arch/sparc64/lib/dec_and_lock.S +++ b/arch/sparc64/lib/dec_and_lock.S | |||
| @@ -48,8 +48,9 @@ start_to_zero: | |||
| 48 | #endif | 48 | #endif |
| 49 | to_zero: | 49 | to_zero: |
| 50 | ldstub [%o1], %g3 | 50 | ldstub [%o1], %g3 |
| 51 | membar #StoreLoad | #StoreStore | ||
| 51 | brnz,pn %g3, spin_on_lock | 52 | brnz,pn %g3, spin_on_lock |
| 52 | membar #StoreLoad | #StoreStore | 53 | nop |
| 53 | loop2: cas [%o0], %g2, %g7 /* ASSERT(g7 == 0) */ | 54 | loop2: cas [%o0], %g2, %g7 /* ASSERT(g7 == 0) */ |
| 54 | cmp %g2, %g7 | 55 | cmp %g2, %g7 |
| 55 | 56 | ||
| @@ -71,8 +72,9 @@ loop2: cas [%o0], %g2, %g7 /* ASSERT(g7 == 0) */ | |||
| 71 | nop | 72 | nop |
| 72 | spin_on_lock: | 73 | spin_on_lock: |
| 73 | ldub [%o1], %g3 | 74 | ldub [%o1], %g3 |
| 75 | membar #LoadLoad | ||
| 74 | brnz,pt %g3, spin_on_lock | 76 | brnz,pt %g3, spin_on_lock |
| 75 | membar #LoadLoad | 77 | nop |
| 76 | ba,pt %xcc, to_zero | 78 | ba,pt %xcc, to_zero |
| 77 | nop | 79 | nop |
| 78 | nop | 80 | nop |
diff --git a/arch/sparc64/lib/rwsem.S b/arch/sparc64/lib/rwsem.S index 174ff7b9164c..75f0e6b951d6 100644 --- a/arch/sparc64/lib/rwsem.S +++ b/arch/sparc64/lib/rwsem.S | |||
| @@ -17,8 +17,9 @@ __down_read: | |||
| 17 | bne,pn %icc, 1b | 17 | bne,pn %icc, 1b |
| 18 | add %g7, 1, %g7 | 18 | add %g7, 1, %g7 |
| 19 | cmp %g7, 0 | 19 | cmp %g7, 0 |
| 20 | membar #StoreLoad | #StoreStore | ||
| 20 | bl,pn %icc, 3f | 21 | bl,pn %icc, 3f |
| 21 | membar #StoreLoad | #StoreStore | 22 | nop |
| 22 | 2: | 23 | 2: |
| 23 | retl | 24 | retl |
| 24 | nop | 25 | nop |
| @@ -57,8 +58,9 @@ __down_write: | |||
| 57 | cmp %g3, %g7 | 58 | cmp %g3, %g7 |
| 58 | bne,pn %icc, 1b | 59 | bne,pn %icc, 1b |
| 59 | cmp %g7, 0 | 60 | cmp %g7, 0 |
| 61 | membar #StoreLoad | #StoreStore | ||
| 60 | bne,pn %icc, 3f | 62 | bne,pn %icc, 3f |
| 61 | membar #StoreLoad | #StoreStore | 63 | nop |
| 62 | 2: retl | 64 | 2: retl |
| 63 | nop | 65 | nop |
| 64 | 3: | 66 | 3: |
| @@ -97,8 +99,9 @@ __up_read: | |||
| 97 | cmp %g1, %g7 | 99 | cmp %g1, %g7 |
| 98 | bne,pn %icc, 1b | 100 | bne,pn %icc, 1b |
| 99 | cmp %g7, 0 | 101 | cmp %g7, 0 |
| 102 | membar #StoreLoad | #StoreStore | ||
| 100 | bl,pn %icc, 3f | 103 | bl,pn %icc, 3f |
| 101 | membar #StoreLoad | #StoreStore | 104 | nop |
| 102 | 2: retl | 105 | 2: retl |
| 103 | nop | 106 | nop |
| 104 | 3: sethi %hi(RWSEM_ACTIVE_MASK), %g1 | 107 | 3: sethi %hi(RWSEM_ACTIVE_MASK), %g1 |
| @@ -126,8 +129,9 @@ __up_write: | |||
| 126 | bne,pn %icc, 1b | 129 | bne,pn %icc, 1b |
| 127 | sub %g7, %g1, %g7 | 130 | sub %g7, %g1, %g7 |
| 128 | cmp %g7, 0 | 131 | cmp %g7, 0 |
| 132 | membar #StoreLoad | #StoreStore | ||
| 129 | bl,pn %icc, 3f | 133 | bl,pn %icc, 3f |
| 130 | membar #StoreLoad | #StoreStore | 134 | nop |
| 131 | 2: | 135 | 2: |
| 132 | retl | 136 | retl |
| 133 | nop | 137 | nop |
| @@ -151,8 +155,9 @@ __downgrade_write: | |||
| 151 | bne,pn %icc, 1b | 155 | bne,pn %icc, 1b |
| 152 | sub %g7, %g1, %g7 | 156 | sub %g7, %g1, %g7 |
| 153 | cmp %g7, 0 | 157 | cmp %g7, 0 |
| 158 | membar #StoreLoad | #StoreStore | ||
| 154 | bl,pn %icc, 3f | 159 | bl,pn %icc, 3f |
| 155 | membar #StoreLoad | #StoreStore | 160 | nop |
| 156 | 2: | 161 | 2: |
| 157 | retl | 162 | retl |
| 158 | nop | 163 | nop |
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 9c5222075da9..8fc413cb6acd 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c | |||
| @@ -136,8 +136,9 @@ static __inline__ void set_dcache_dirty(struct page *page, int this_cpu) | |||
| 136 | "or %%g1, %0, %%g1\n\t" | 136 | "or %%g1, %0, %%g1\n\t" |
| 137 | "casx [%2], %%g7, %%g1\n\t" | 137 | "casx [%2], %%g7, %%g1\n\t" |
| 138 | "cmp %%g7, %%g1\n\t" | 138 | "cmp %%g7, %%g1\n\t" |
| 139 | "membar #StoreLoad | #StoreStore\n\t" | ||
| 139 | "bne,pn %%xcc, 1b\n\t" | 140 | "bne,pn %%xcc, 1b\n\t" |
| 140 | " membar #StoreLoad | #StoreStore" | 141 | " nop" |
| 141 | : /* no outputs */ | 142 | : /* no outputs */ |
| 142 | : "r" (mask), "r" (non_cpu_bits), "r" (&page->flags) | 143 | : "r" (mask), "r" (non_cpu_bits), "r" (&page->flags) |
| 143 | : "g1", "g7"); | 144 | : "g1", "g7"); |
| @@ -157,8 +158,9 @@ static __inline__ void clear_dcache_dirty_cpu(struct page *page, unsigned long c | |||
| 157 | " andn %%g7, %1, %%g1\n\t" | 158 | " andn %%g7, %1, %%g1\n\t" |
| 158 | "casx [%2], %%g7, %%g1\n\t" | 159 | "casx [%2], %%g7, %%g1\n\t" |
| 159 | "cmp %%g7, %%g1\n\t" | 160 | "cmp %%g7, %%g1\n\t" |
| 161 | "membar #StoreLoad | #StoreStore\n\t" | ||
| 160 | "bne,pn %%xcc, 1b\n\t" | 162 | "bne,pn %%xcc, 1b\n\t" |
| 161 | " membar #StoreLoad | #StoreStore\n" | 163 | " nop\n" |
| 162 | "2:" | 164 | "2:" |
| 163 | : /* no outputs */ | 165 | : /* no outputs */ |
| 164 | : "r" (cpu), "r" (mask), "r" (&page->flags), | 166 | : "r" (cpu), "r" (mask), "r" (&page->flags), |
diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S index 7a0934321010..7a2431d3abc7 100644 --- a/arch/sparc64/mm/ultra.S +++ b/arch/sparc64/mm/ultra.S | |||
| @@ -266,8 +266,9 @@ __cheetah_flush_tlb_pending: /* 22 insns */ | |||
| 266 | andn %o3, 1, %o3 | 266 | andn %o3, 1, %o3 |
| 267 | stxa %g0, [%o3] ASI_IMMU_DEMAP | 267 | stxa %g0, [%o3] ASI_IMMU_DEMAP |
| 268 | 2: stxa %g0, [%o3] ASI_DMMU_DEMAP | 268 | 2: stxa %g0, [%o3] ASI_DMMU_DEMAP |
| 269 | membar #Sync | ||
| 269 | brnz,pt %o1, 1b | 270 | brnz,pt %o1, 1b |
| 270 | membar #Sync | 271 | nop |
| 271 | stxa %g2, [%o4] ASI_DMMU | 272 | stxa %g2, [%o4] ASI_DMMU |
| 272 | flush %g6 | 273 | flush %g6 |
| 273 | wrpr %g0, 0, %tl | 274 | wrpr %g0, 0, %tl |
diff --git a/include/asm-sparc64/rwsem.h b/include/asm-sparc64/rwsem.h index bf2ae90ed3df..a1cc94f95984 100644 --- a/include/asm-sparc64/rwsem.h +++ b/include/asm-sparc64/rwsem.h | |||
| @@ -55,8 +55,9 @@ static __inline__ int rwsem_atomic_update(int delta, struct rw_semaphore *sem) | |||
| 55 | "add %%g1, %1, %%g7\n\t" | 55 | "add %%g1, %1, %%g7\n\t" |
| 56 | "cas [%2], %%g1, %%g7\n\t" | 56 | "cas [%2], %%g1, %%g7\n\t" |
| 57 | "cmp %%g1, %%g7\n\t" | 57 | "cmp %%g1, %%g7\n\t" |
| 58 | "membar #StoreLoad | #StoreStore\n\t" | ||
| 58 | "bne,pn %%icc, 1b\n\t" | 59 | "bne,pn %%icc, 1b\n\t" |
| 59 | " membar #StoreLoad | #StoreStore\n\t" | 60 | " nop\n\t" |
| 60 | "mov %%g7, %0\n\t" | 61 | "mov %%g7, %0\n\t" |
| 61 | : "=&r" (tmp) | 62 | : "=&r" (tmp) |
| 62 | : "0" (tmp), "r" (sem) | 63 | : "0" (tmp), "r" (sem) |
diff --git a/include/asm-sparc64/spinlock.h b/include/asm-sparc64/spinlock.h index db7581bdb531..9cb93a5c2b4f 100644 --- a/include/asm-sparc64/spinlock.h +++ b/include/asm-sparc64/spinlock.h | |||
| @@ -52,12 +52,14 @@ static inline void _raw_spin_lock(spinlock_t *lock) | |||
| 52 | 52 | ||
| 53 | __asm__ __volatile__( | 53 | __asm__ __volatile__( |
| 54 | "1: ldstub [%1], %0\n" | 54 | "1: ldstub [%1], %0\n" |
| 55 | " membar #StoreLoad | #StoreStore\n" | ||
| 55 | " brnz,pn %0, 2f\n" | 56 | " brnz,pn %0, 2f\n" |
| 56 | " membar #StoreLoad | #StoreStore\n" | 57 | " nop\n" |
| 57 | " .subsection 2\n" | 58 | " .subsection 2\n" |
| 58 | "2: ldub [%1], %0\n" | 59 | "2: ldub [%1], %0\n" |
| 60 | " membar #LoadLoad\n" | ||
| 59 | " brnz,pt %0, 2b\n" | 61 | " brnz,pt %0, 2b\n" |
| 60 | " membar #LoadLoad\n" | 62 | " nop\n" |
| 61 | " ba,a,pt %%xcc, 1b\n" | 63 | " ba,a,pt %%xcc, 1b\n" |
| 62 | " .previous" | 64 | " .previous" |
| 63 | : "=&r" (tmp) | 65 | : "=&r" (tmp) |
| @@ -95,16 +97,18 @@ static inline void _raw_spin_lock_flags(spinlock_t *lock, unsigned long flags) | |||
| 95 | 97 | ||
| 96 | __asm__ __volatile__( | 98 | __asm__ __volatile__( |
| 97 | "1: ldstub [%2], %0\n" | 99 | "1: ldstub [%2], %0\n" |
| 98 | " brnz,pn %0, 2f\n" | ||
| 99 | " membar #StoreLoad | #StoreStore\n" | 100 | " membar #StoreLoad | #StoreStore\n" |
| 101 | " brnz,pn %0, 2f\n" | ||
| 102 | " nop\n" | ||
| 100 | " .subsection 2\n" | 103 | " .subsection 2\n" |
| 101 | "2: rdpr %%pil, %1\n" | 104 | "2: rdpr %%pil, %1\n" |
| 102 | " wrpr %3, %%pil\n" | 105 | " wrpr %3, %%pil\n" |
| 103 | "3: ldub [%2], %0\n" | 106 | "3: ldub [%2], %0\n" |
| 104 | " brnz,pt %0, 3b\n" | ||
| 105 | " membar #LoadLoad\n" | 107 | " membar #LoadLoad\n" |
| 108 | " brnz,pt %0, 3b\n" | ||
| 109 | " nop\n" | ||
| 106 | " ba,pt %%xcc, 1b\n" | 110 | " ba,pt %%xcc, 1b\n" |
| 107 | " wrpr %1, %%pil\n" | 111 | " wrpr %1, %%pil\n" |
| 108 | " .previous" | 112 | " .previous" |
| 109 | : "=&r" (tmp1), "=&r" (tmp2) | 113 | : "=&r" (tmp1), "=&r" (tmp2) |
| 110 | : "r"(lock), "r"(flags) | 114 | : "r"(lock), "r"(flags) |
| @@ -162,12 +166,14 @@ static void inline __read_lock(rwlock_t *lock) | |||
| 162 | "4: add %0, 1, %1\n" | 166 | "4: add %0, 1, %1\n" |
| 163 | " cas [%2], %0, %1\n" | 167 | " cas [%2], %0, %1\n" |
| 164 | " cmp %0, %1\n" | 168 | " cmp %0, %1\n" |
| 169 | " membar #StoreLoad | #StoreStore\n" | ||
| 165 | " bne,pn %%icc, 1b\n" | 170 | " bne,pn %%icc, 1b\n" |
| 166 | " membar #StoreLoad | #StoreStore\n" | 171 | " nop\n" |
| 167 | " .subsection 2\n" | 172 | " .subsection 2\n" |
| 168 | "2: ldsw [%2], %0\n" | 173 | "2: ldsw [%2], %0\n" |
| 174 | " membar #LoadLoad\n" | ||
| 169 | " brlz,pt %0, 2b\n" | 175 | " brlz,pt %0, 2b\n" |
| 170 | " membar #LoadLoad\n" | 176 | " nop\n" |
| 171 | " ba,a,pt %%xcc, 4b\n" | 177 | " ba,a,pt %%xcc, 4b\n" |
| 172 | " .previous" | 178 | " .previous" |
| 173 | : "=&r" (tmp1), "=&r" (tmp2) | 179 | : "=&r" (tmp1), "=&r" (tmp2) |
| @@ -204,12 +210,14 @@ static void inline __write_lock(rwlock_t *lock) | |||
| 204 | "4: or %0, %3, %1\n" | 210 | "4: or %0, %3, %1\n" |
| 205 | " cas [%2], %0, %1\n" | 211 | " cas [%2], %0, %1\n" |
| 206 | " cmp %0, %1\n" | 212 | " cmp %0, %1\n" |
| 213 | " membar #StoreLoad | #StoreStore\n" | ||
| 207 | " bne,pn %%icc, 1b\n" | 214 | " bne,pn %%icc, 1b\n" |
| 208 | " membar #StoreLoad | #StoreStore\n" | 215 | " nop\n" |
| 209 | " .subsection 2\n" | 216 | " .subsection 2\n" |
| 210 | "2: lduw [%2], %0\n" | 217 | "2: lduw [%2], %0\n" |
| 218 | " membar #LoadLoad\n" | ||
| 211 | " brnz,pt %0, 2b\n" | 219 | " brnz,pt %0, 2b\n" |
| 212 | " membar #LoadLoad\n" | 220 | " nop\n" |
| 213 | " ba,a,pt %%xcc, 4b\n" | 221 | " ba,a,pt %%xcc, 4b\n" |
| 214 | " .previous" | 222 | " .previous" |
| 215 | : "=&r" (tmp1), "=&r" (tmp2) | 223 | : "=&r" (tmp1), "=&r" (tmp2) |
| @@ -240,8 +248,9 @@ static int inline __write_trylock(rwlock_t *lock) | |||
| 240 | " or %0, %4, %1\n" | 248 | " or %0, %4, %1\n" |
| 241 | " cas [%3], %0, %1\n" | 249 | " cas [%3], %0, %1\n" |
| 242 | " cmp %0, %1\n" | 250 | " cmp %0, %1\n" |
| 251 | " membar #StoreLoad | #StoreStore\n" | ||
| 243 | " bne,pn %%icc, 1b\n" | 252 | " bne,pn %%icc, 1b\n" |
| 244 | " membar #StoreLoad | #StoreStore\n" | 253 | " nop\n" |
| 245 | " mov 1, %2\n" | 254 | " mov 1, %2\n" |
| 246 | "2:" | 255 | "2:" |
| 247 | : "=&r" (tmp1), "=&r" (tmp2), "=&r" (result) | 256 | : "=&r" (tmp1), "=&r" (tmp2), "=&r" (result) |
diff --git a/include/asm-sparc64/spitfire.h b/include/asm-sparc64/spitfire.h index 9d7613eea812..1aa932773af8 100644 --- a/include/asm-sparc64/spitfire.h +++ b/include/asm-sparc64/spitfire.h | |||
| @@ -111,7 +111,6 @@ static __inline__ void spitfire_put_dcache_tag(unsigned long addr, unsigned long | |||
| 111 | "membar #Sync" | 111 | "membar #Sync" |
| 112 | : /* No outputs */ | 112 | : /* No outputs */ |
| 113 | : "r" (tag), "r" (addr), "i" (ASI_DCACHE_TAG)); | 113 | : "r" (tag), "r" (addr), "i" (ASI_DCACHE_TAG)); |
| 114 | __asm__ __volatile__ ("membar #Sync" : : : "memory"); | ||
| 115 | } | 114 | } |
| 116 | 115 | ||
| 117 | /* The instruction cache lines are flushed with this, but note that | 116 | /* The instruction cache lines are flushed with this, but note that |
