diff options
-rw-r--r-- | arch/sparc64/kernel/entry.S | 6 | ||||
-rw-r--r-- | arch/sparc64/kernel/semaphore.c | 12 | ||||
-rw-r--r-- | arch/sparc64/kernel/trampoline.S | 3 | ||||
-rw-r--r-- | arch/sparc64/lib/U1memcpy.S | 103 | ||||
-rw-r--r-- | arch/sparc64/lib/VISsave.S | 15 | ||||
-rw-r--r-- | arch/sparc64/lib/atomic.S | 42 | ||||
-rw-r--r-- | arch/sparc64/lib/bitops.S | 31 | ||||
-rw-r--r-- | arch/sparc64/lib/debuglocks.c | 6 | ||||
-rw-r--r-- | arch/sparc64/lib/dec_and_lock.S | 6 | ||||
-rw-r--r-- | arch/sparc64/lib/rwsem.S | 15 | ||||
-rw-r--r-- | arch/sparc64/mm/init.c | 6 | ||||
-rw-r--r-- | arch/sparc64/mm/ultra.S | 3 | ||||
-rw-r--r-- | include/asm-sparc64/rwsem.h | 3 | ||||
-rw-r--r-- | include/asm-sparc64/spinlock.h | 29 | ||||
-rw-r--r-- | include/asm-sparc64/spitfire.h | 1 |
15 files changed, 171 insertions, 110 deletions
diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S index a47f2d0b1a29..ffe717ab7f83 100644 --- a/arch/sparc64/kernel/entry.S +++ b/arch/sparc64/kernel/entry.S | |||
@@ -271,8 +271,9 @@ cplus_fptrap_insn_1: | |||
271 | fmuld %f0, %f2, %f26 | 271 | fmuld %f0, %f2, %f26 |
272 | faddd %f0, %f2, %f28 | 272 | faddd %f0, %f2, %f28 |
273 | fmuld %f0, %f2, %f30 | 273 | fmuld %f0, %f2, %f30 |
274 | membar #Sync | ||
274 | b,pt %xcc, fpdis_exit | 275 | b,pt %xcc, fpdis_exit |
275 | membar #Sync | 276 | nop |
276 | 2: andcc %g5, FPRS_DU, %g0 | 277 | 2: andcc %g5, FPRS_DU, %g0 |
277 | bne,pt %icc, 3f | 278 | bne,pt %icc, 3f |
278 | fzero %f32 | 279 | fzero %f32 |
@@ -301,8 +302,9 @@ cplus_fptrap_insn_2: | |||
301 | fmuld %f32, %f34, %f58 | 302 | fmuld %f32, %f34, %f58 |
302 | faddd %f32, %f34, %f60 | 303 | faddd %f32, %f34, %f60 |
303 | fmuld %f32, %f34, %f62 | 304 | fmuld %f32, %f34, %f62 |
305 | membar #Sync | ||
304 | ba,pt %xcc, fpdis_exit | 306 | ba,pt %xcc, fpdis_exit |
305 | membar #Sync | 307 | nop |
306 | 3: mov SECONDARY_CONTEXT, %g3 | 308 | 3: mov SECONDARY_CONTEXT, %g3 |
307 | add %g6, TI_FPREGS, %g1 | 309 | add %g6, TI_FPREGS, %g1 |
308 | ldxa [%g3] ASI_DMMU, %g5 | 310 | ldxa [%g3] ASI_DMMU, %g5 |
diff --git a/arch/sparc64/kernel/semaphore.c b/arch/sparc64/kernel/semaphore.c index 63496c43fe17..a809e63f03ef 100644 --- a/arch/sparc64/kernel/semaphore.c +++ b/arch/sparc64/kernel/semaphore.c | |||
@@ -32,8 +32,9 @@ static __inline__ int __sem_update_count(struct semaphore *sem, int incr) | |||
32 | " add %1, %4, %1\n" | 32 | " add %1, %4, %1\n" |
33 | " cas [%3], %0, %1\n" | 33 | " cas [%3], %0, %1\n" |
34 | " cmp %0, %1\n" | 34 | " cmp %0, %1\n" |
35 | " membar #StoreLoad | #StoreStore\n" | ||
35 | " bne,pn %%icc, 1b\n" | 36 | " bne,pn %%icc, 1b\n" |
36 | " membar #StoreLoad | #StoreStore\n" | 37 | " nop\n" |
37 | : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count) | 38 | : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count) |
38 | : "r" (&sem->count), "r" (incr), "m" (sem->count) | 39 | : "r" (&sem->count), "r" (incr), "m" (sem->count) |
39 | : "cc"); | 40 | : "cc"); |
@@ -71,8 +72,9 @@ void up(struct semaphore *sem) | |||
71 | " cmp %%g1, %%g7\n" | 72 | " cmp %%g1, %%g7\n" |
72 | " bne,pn %%icc, 1b\n" | 73 | " bne,pn %%icc, 1b\n" |
73 | " addcc %%g7, 1, %%g0\n" | 74 | " addcc %%g7, 1, %%g0\n" |
75 | " membar #StoreLoad | #StoreStore\n" | ||
74 | " ble,pn %%icc, 3f\n" | 76 | " ble,pn %%icc, 3f\n" |
75 | " membar #StoreLoad | #StoreStore\n" | 77 | " nop\n" |
76 | "2:\n" | 78 | "2:\n" |
77 | " .subsection 2\n" | 79 | " .subsection 2\n" |
78 | "3: mov %0, %%g1\n" | 80 | "3: mov %0, %%g1\n" |
@@ -128,8 +130,9 @@ void __sched down(struct semaphore *sem) | |||
128 | " cmp %%g1, %%g7\n" | 130 | " cmp %%g1, %%g7\n" |
129 | " bne,pn %%icc, 1b\n" | 131 | " bne,pn %%icc, 1b\n" |
130 | " cmp %%g7, 1\n" | 132 | " cmp %%g7, 1\n" |
133 | " membar #StoreLoad | #StoreStore\n" | ||
131 | " bl,pn %%icc, 3f\n" | 134 | " bl,pn %%icc, 3f\n" |
132 | " membar #StoreLoad | #StoreStore\n" | 135 | " nop\n" |
133 | "2:\n" | 136 | "2:\n" |
134 | " .subsection 2\n" | 137 | " .subsection 2\n" |
135 | "3: mov %0, %%g1\n" | 138 | "3: mov %0, %%g1\n" |
@@ -233,8 +236,9 @@ int __sched down_interruptible(struct semaphore *sem) | |||
233 | " cmp %%g1, %%g7\n" | 236 | " cmp %%g1, %%g7\n" |
234 | " bne,pn %%icc, 1b\n" | 237 | " bne,pn %%icc, 1b\n" |
235 | " cmp %%g7, 1\n" | 238 | " cmp %%g7, 1\n" |
239 | " membar #StoreLoad | #StoreStore\n" | ||
236 | " bl,pn %%icc, 3f\n" | 240 | " bl,pn %%icc, 3f\n" |
237 | " membar #StoreLoad | #StoreStore\n" | 241 | " nop\n" |
238 | "2:\n" | 242 | "2:\n" |
239 | " .subsection 2\n" | 243 | " .subsection 2\n" |
240 | "3: mov %2, %%g1\n" | 244 | "3: mov %2, %%g1\n" |
diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S index 2c8f9344b4ee..3a145fc39cf2 100644 --- a/arch/sparc64/kernel/trampoline.S +++ b/arch/sparc64/kernel/trampoline.S | |||
@@ -98,8 +98,9 @@ startup_continue: | |||
98 | 98 | ||
99 | sethi %hi(prom_entry_lock), %g2 | 99 | sethi %hi(prom_entry_lock), %g2 |
100 | 1: ldstub [%g2 + %lo(prom_entry_lock)], %g1 | 100 | 1: ldstub [%g2 + %lo(prom_entry_lock)], %g1 |
101 | membar #StoreLoad | #StoreStore | ||
101 | brnz,pn %g1, 1b | 102 | brnz,pn %g1, 1b |
102 | membar #StoreLoad | #StoreStore | 103 | nop |
103 | 104 | ||
104 | sethi %hi(p1275buf), %g2 | 105 | sethi %hi(p1275buf), %g2 |
105 | or %g2, %lo(p1275buf), %g2 | 106 | or %g2, %lo(p1275buf), %g2 |
diff --git a/arch/sparc64/lib/U1memcpy.S b/arch/sparc64/lib/U1memcpy.S index da9b520c7189..bafd2fc07acb 100644 --- a/arch/sparc64/lib/U1memcpy.S +++ b/arch/sparc64/lib/U1memcpy.S | |||
@@ -87,14 +87,17 @@ | |||
87 | #define LOOP_CHUNK3(src, dest, len, branch_dest) \ | 87 | #define LOOP_CHUNK3(src, dest, len, branch_dest) \ |
88 | MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) | 88 | MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) |
89 | 89 | ||
90 | #define DO_SYNC membar #Sync; | ||
90 | #define STORE_SYNC(dest, fsrc) \ | 91 | #define STORE_SYNC(dest, fsrc) \ |
91 | EX_ST(STORE_BLK(%fsrc, %dest)); \ | 92 | EX_ST(STORE_BLK(%fsrc, %dest)); \ |
92 | add %dest, 0x40, %dest; | 93 | add %dest, 0x40, %dest; \ |
94 | DO_SYNC | ||
93 | 95 | ||
94 | #define STORE_JUMP(dest, fsrc, target) \ | 96 | #define STORE_JUMP(dest, fsrc, target) \ |
95 | EX_ST(STORE_BLK(%fsrc, %dest)); \ | 97 | EX_ST(STORE_BLK(%fsrc, %dest)); \ |
96 | add %dest, 0x40, %dest; \ | 98 | add %dest, 0x40, %dest; \ |
97 | ba,pt %xcc, target; | 99 | ba,pt %xcc, target; \ |
100 | nop; | ||
98 | 101 | ||
99 | #define FINISH_VISCHUNK(dest, f0, f1, left) \ | 102 | #define FINISH_VISCHUNK(dest, f0, f1, left) \ |
100 | subcc %left, 8, %left;\ | 103 | subcc %left, 8, %left;\ |
@@ -239,17 +242,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
239 | ba,pt %xcc, 1b+4 | 242 | ba,pt %xcc, 1b+4 |
240 | faligndata %f0, %f2, %f48 | 243 | faligndata %f0, %f2, %f48 |
241 | 1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) | 244 | 1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) |
242 | STORE_SYNC(o0, f48) membar #Sync | 245 | STORE_SYNC(o0, f48) |
243 | FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) | 246 | FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) |
244 | STORE_JUMP(o0, f48, 40f) membar #Sync | 247 | STORE_JUMP(o0, f48, 40f) |
245 | 2: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) | 248 | 2: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) |
246 | STORE_SYNC(o0, f48) membar #Sync | 249 | STORE_SYNC(o0, f48) |
247 | FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) | 250 | FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) |
248 | STORE_JUMP(o0, f48, 48f) membar #Sync | 251 | STORE_JUMP(o0, f48, 48f) |
249 | 3: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) | 252 | 3: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) |
250 | STORE_SYNC(o0, f48) membar #Sync | 253 | STORE_SYNC(o0, f48) |
251 | FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) | 254 | FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) |
252 | STORE_JUMP(o0, f48, 56f) membar #Sync | 255 | STORE_JUMP(o0, f48, 56f) |
253 | 256 | ||
254 | 1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) | 257 | 1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) |
255 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | 258 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) |
@@ -260,17 +263,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
260 | ba,pt %xcc, 1b+4 | 263 | ba,pt %xcc, 1b+4 |
261 | faligndata %f2, %f4, %f48 | 264 | faligndata %f2, %f4, %f48 |
262 | 1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) | 265 | 1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) |
263 | STORE_SYNC(o0, f48) membar #Sync | 266 | STORE_SYNC(o0, f48) |
264 | FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) | 267 | FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) |
265 | STORE_JUMP(o0, f48, 41f) membar #Sync | 268 | STORE_JUMP(o0, f48, 41f) |
266 | 2: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) | 269 | 2: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) |
267 | STORE_SYNC(o0, f48) membar #Sync | 270 | STORE_SYNC(o0, f48) |
268 | FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) | 271 | FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) |
269 | STORE_JUMP(o0, f48, 49f) membar #Sync | 272 | STORE_JUMP(o0, f48, 49f) |
270 | 3: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) | 273 | 3: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) |
271 | STORE_SYNC(o0, f48) membar #Sync | 274 | STORE_SYNC(o0, f48) |
272 | FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) | 275 | FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) |
273 | STORE_JUMP(o0, f48, 57f) membar #Sync | 276 | STORE_JUMP(o0, f48, 57f) |
274 | 277 | ||
275 | 1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) | 278 | 1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) |
276 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | 279 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) |
@@ -281,17 +284,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
281 | ba,pt %xcc, 1b+4 | 284 | ba,pt %xcc, 1b+4 |
282 | faligndata %f4, %f6, %f48 | 285 | faligndata %f4, %f6, %f48 |
283 | 1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) | 286 | 1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) |
284 | STORE_SYNC(o0, f48) membar #Sync | 287 | STORE_SYNC(o0, f48) |
285 | FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) | 288 | FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) |
286 | STORE_JUMP(o0, f48, 42f) membar #Sync | 289 | STORE_JUMP(o0, f48, 42f) |
287 | 2: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) | 290 | 2: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) |
288 | STORE_SYNC(o0, f48) membar #Sync | 291 | STORE_SYNC(o0, f48) |
289 | FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) | 292 | FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) |
290 | STORE_JUMP(o0, f48, 50f) membar #Sync | 293 | STORE_JUMP(o0, f48, 50f) |
291 | 3: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) | 294 | 3: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) |
292 | STORE_SYNC(o0, f48) membar #Sync | 295 | STORE_SYNC(o0, f48) |
293 | FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) | 296 | FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) |
294 | STORE_JUMP(o0, f48, 58f) membar #Sync | 297 | STORE_JUMP(o0, f48, 58f) |
295 | 298 | ||
296 | 1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) | 299 | 1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) |
297 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | 300 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) |
@@ -302,17 +305,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
302 | ba,pt %xcc, 1b+4 | 305 | ba,pt %xcc, 1b+4 |
303 | faligndata %f6, %f8, %f48 | 306 | faligndata %f6, %f8, %f48 |
304 | 1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) | 307 | 1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) |
305 | STORE_SYNC(o0, f48) membar #Sync | 308 | STORE_SYNC(o0, f48) |
306 | FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) | 309 | FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) |
307 | STORE_JUMP(o0, f48, 43f) membar #Sync | 310 | STORE_JUMP(o0, f48, 43f) |
308 | 2: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) | 311 | 2: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) |
309 | STORE_SYNC(o0, f48) membar #Sync | 312 | STORE_SYNC(o0, f48) |
310 | FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) | 313 | FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) |
311 | STORE_JUMP(o0, f48, 51f) membar #Sync | 314 | STORE_JUMP(o0, f48, 51f) |
312 | 3: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) | 315 | 3: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) |
313 | STORE_SYNC(o0, f48) membar #Sync | 316 | STORE_SYNC(o0, f48) |
314 | FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) | 317 | FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) |
315 | STORE_JUMP(o0, f48, 59f) membar #Sync | 318 | STORE_JUMP(o0, f48, 59f) |
316 | 319 | ||
317 | 1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) | 320 | 1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) |
318 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | 321 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) |
@@ -323,17 +326,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
323 | ba,pt %xcc, 1b+4 | 326 | ba,pt %xcc, 1b+4 |
324 | faligndata %f8, %f10, %f48 | 327 | faligndata %f8, %f10, %f48 |
325 | 1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) | 328 | 1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) |
326 | STORE_SYNC(o0, f48) membar #Sync | 329 | STORE_SYNC(o0, f48) |
327 | FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) | 330 | FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) |
328 | STORE_JUMP(o0, f48, 44f) membar #Sync | 331 | STORE_JUMP(o0, f48, 44f) |
329 | 2: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) | 332 | 2: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) |
330 | STORE_SYNC(o0, f48) membar #Sync | 333 | STORE_SYNC(o0, f48) |
331 | FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) | 334 | FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) |
332 | STORE_JUMP(o0, f48, 52f) membar #Sync | 335 | STORE_JUMP(o0, f48, 52f) |
333 | 3: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) | 336 | 3: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) |
334 | STORE_SYNC(o0, f48) membar #Sync | 337 | STORE_SYNC(o0, f48) |
335 | FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) | 338 | FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) |
336 | STORE_JUMP(o0, f48, 60f) membar #Sync | 339 | STORE_JUMP(o0, f48, 60f) |
337 | 340 | ||
338 | 1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) | 341 | 1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) |
339 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | 342 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) |
@@ -344,17 +347,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
344 | ba,pt %xcc, 1b+4 | 347 | ba,pt %xcc, 1b+4 |
345 | faligndata %f10, %f12, %f48 | 348 | faligndata %f10, %f12, %f48 |
346 | 1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) | 349 | 1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) |
347 | STORE_SYNC(o0, f48) membar #Sync | 350 | STORE_SYNC(o0, f48) |
348 | FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) | 351 | FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) |
349 | STORE_JUMP(o0, f48, 45f) membar #Sync | 352 | STORE_JUMP(o0, f48, 45f) |
350 | 2: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) | 353 | 2: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) |
351 | STORE_SYNC(o0, f48) membar #Sync | 354 | STORE_SYNC(o0, f48) |
352 | FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) | 355 | FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) |
353 | STORE_JUMP(o0, f48, 53f) membar #Sync | 356 | STORE_JUMP(o0, f48, 53f) |
354 | 3: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) | 357 | 3: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) |
355 | STORE_SYNC(o0, f48) membar #Sync | 358 | STORE_SYNC(o0, f48) |
356 | FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) | 359 | FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) |
357 | STORE_JUMP(o0, f48, 61f) membar #Sync | 360 | STORE_JUMP(o0, f48, 61f) |
358 | 361 | ||
359 | 1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) | 362 | 1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) |
360 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | 363 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) |
@@ -365,17 +368,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
365 | ba,pt %xcc, 1b+4 | 368 | ba,pt %xcc, 1b+4 |
366 | faligndata %f12, %f14, %f48 | 369 | faligndata %f12, %f14, %f48 |
367 | 1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) | 370 | 1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) |
368 | STORE_SYNC(o0, f48) membar #Sync | 371 | STORE_SYNC(o0, f48) |
369 | FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) | 372 | FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) |
370 | STORE_JUMP(o0, f48, 46f) membar #Sync | 373 | STORE_JUMP(o0, f48, 46f) |
371 | 2: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) | 374 | 2: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) |
372 | STORE_SYNC(o0, f48) membar #Sync | 375 | STORE_SYNC(o0, f48) |
373 | FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) | 376 | FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) |
374 | STORE_JUMP(o0, f48, 54f) membar #Sync | 377 | STORE_JUMP(o0, f48, 54f) |
375 | 3: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) | 378 | 3: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) |
376 | STORE_SYNC(o0, f48) membar #Sync | 379 | STORE_SYNC(o0, f48) |
377 | FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) | 380 | FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) |
378 | STORE_JUMP(o0, f48, 62f) membar #Sync | 381 | STORE_JUMP(o0, f48, 62f) |
379 | 382 | ||
380 | 1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) | 383 | 1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) |
381 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) | 384 | LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) |
@@ -386,17 +389,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
386 | ba,pt %xcc, 1b+4 | 389 | ba,pt %xcc, 1b+4 |
387 | faligndata %f14, %f16, %f48 | 390 | faligndata %f14, %f16, %f48 |
388 | 1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) | 391 | 1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) |
389 | STORE_SYNC(o0, f48) membar #Sync | 392 | STORE_SYNC(o0, f48) |
390 | FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) | 393 | FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) |
391 | STORE_JUMP(o0, f48, 47f) membar #Sync | 394 | STORE_JUMP(o0, f48, 47f) |
392 | 2: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) | 395 | 2: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) |
393 | STORE_SYNC(o0, f48) membar #Sync | 396 | STORE_SYNC(o0, f48) |
394 | FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) | 397 | FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) |
395 | STORE_JUMP(o0, f48, 55f) membar #Sync | 398 | STORE_JUMP(o0, f48, 55f) |
396 | 3: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) | 399 | 3: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) |
397 | STORE_SYNC(o0, f48) membar #Sync | 400 | STORE_SYNC(o0, f48) |
398 | FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) | 401 | FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) |
399 | STORE_JUMP(o0, f48, 63f) membar #Sync | 402 | STORE_JUMP(o0, f48, 63f) |
400 | 403 | ||
401 | 40: FINISH_VISCHUNK(o0, f0, f2, g3) | 404 | 40: FINISH_VISCHUNK(o0, f0, f2, g3) |
402 | 41: FINISH_VISCHUNK(o0, f2, f4, g3) | 405 | 41: FINISH_VISCHUNK(o0, f2, f4, g3) |
diff --git a/arch/sparc64/lib/VISsave.S b/arch/sparc64/lib/VISsave.S index 65e328d600a8..4e18989bd602 100644 --- a/arch/sparc64/lib/VISsave.S +++ b/arch/sparc64/lib/VISsave.S | |||
@@ -72,7 +72,11 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3 | |||
72 | 72 | ||
73 | stda %f48, [%g3 + %g1] ASI_BLK_P | 73 | stda %f48, [%g3 + %g1] ASI_BLK_P |
74 | 5: membar #Sync | 74 | 5: membar #Sync |
75 | jmpl %g7 + %g0, %g0 | 75 | ba,pt %xcc, 80f |
76 | nop | ||
77 | |||
78 | .align 32 | ||
79 | 80: jmpl %g7 + %g0, %g0 | ||
76 | nop | 80 | nop |
77 | 81 | ||
78 | 6: ldub [%g3 + TI_FPSAVED], %o5 | 82 | 6: ldub [%g3 + TI_FPSAVED], %o5 |
@@ -87,8 +91,11 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3 | |||
87 | stda %f32, [%g2 + %g1] ASI_BLK_P | 91 | stda %f32, [%g2 + %g1] ASI_BLK_P |
88 | stda %f48, [%g3 + %g1] ASI_BLK_P | 92 | stda %f48, [%g3 + %g1] ASI_BLK_P |
89 | membar #Sync | 93 | membar #Sync |
90 | jmpl %g7 + %g0, %g0 | 94 | ba,pt %xcc, 80f |
95 | nop | ||
91 | 96 | ||
97 | .align 32 | ||
98 | 80: jmpl %g7 + %g0, %g0 | ||
92 | nop | 99 | nop |
93 | 100 | ||
94 | .align 32 | 101 | .align 32 |
@@ -126,6 +133,10 @@ VISenterhalf: | |||
126 | stda %f0, [%g2 + %g1] ASI_BLK_P | 133 | stda %f0, [%g2 + %g1] ASI_BLK_P |
127 | stda %f16, [%g3 + %g1] ASI_BLK_P | 134 | stda %f16, [%g3 + %g1] ASI_BLK_P |
128 | membar #Sync | 135 | membar #Sync |
136 | ba,pt %xcc, 4f | ||
137 | nop | ||
138 | |||
139 | .align 32 | ||
129 | 4: and %o5, FPRS_DU, %o5 | 140 | 4: and %o5, FPRS_DU, %o5 |
130 | jmpl %g7 + %g0, %g0 | 141 | jmpl %g7 + %g0, %g0 |
131 | wr %o5, FPRS_FEF, %fprs | 142 | wr %o5, FPRS_FEF, %fprs |
diff --git a/arch/sparc64/lib/atomic.S b/arch/sparc64/lib/atomic.S index e528b8d1a3e6..faf87c31598b 100644 --- a/arch/sparc64/lib/atomic.S +++ b/arch/sparc64/lib/atomic.S | |||
@@ -7,18 +7,6 @@ | |||
7 | #include <linux/config.h> | 7 | #include <linux/config.h> |
8 | #include <asm/asi.h> | 8 | #include <asm/asi.h> |
9 | 9 | ||
10 | /* On SMP we need to use memory barriers to ensure | ||
11 | * correct memory operation ordering, nop these out | ||
12 | * for uniprocessor. | ||
13 | */ | ||
14 | #ifdef CONFIG_SMP | ||
15 | #define ATOMIC_PRE_BARRIER membar #StoreLoad | #LoadLoad | ||
16 | #define ATOMIC_POST_BARRIER membar #StoreLoad | #StoreStore | ||
17 | #else | ||
18 | #define ATOMIC_PRE_BARRIER nop | ||
19 | #define ATOMIC_POST_BARRIER nop | ||
20 | #endif | ||
21 | |||
22 | .text | 10 | .text |
23 | 11 | ||
24 | /* Two versions of the atomic routines, one that | 12 | /* Two versions of the atomic routines, one that |
@@ -52,6 +40,24 @@ atomic_sub: /* %o0 = decrement, %o1 = atomic_ptr */ | |||
52 | nop | 40 | nop |
53 | .size atomic_sub, .-atomic_sub | 41 | .size atomic_sub, .-atomic_sub |
54 | 42 | ||
43 | /* On SMP we need to use memory barriers to ensure | ||
44 | * correct memory operation ordering, nop these out | ||
45 | * for uniprocessor. | ||
46 | */ | ||
47 | #ifdef CONFIG_SMP | ||
48 | |||
49 | #define ATOMIC_PRE_BARRIER membar #StoreLoad | #LoadLoad; | ||
50 | #define ATOMIC_POST_BARRIER \ | ||
51 | ba,pt %xcc, 80b; \ | ||
52 | membar #StoreLoad | #StoreStore | ||
53 | |||
54 | 80: retl | ||
55 | nop | ||
56 | #else | ||
57 | #define ATOMIC_PRE_BARRIER | ||
58 | #define ATOMIC_POST_BARRIER | ||
59 | #endif | ||
60 | |||
55 | .globl atomic_add_ret | 61 | .globl atomic_add_ret |
56 | .type atomic_add_ret,#function | 62 | .type atomic_add_ret,#function |
57 | atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ | 63 | atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ |
@@ -62,9 +68,10 @@ atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ | |||
62 | cmp %g1, %g7 | 68 | cmp %g1, %g7 |
63 | bne,pn %icc, 1b | 69 | bne,pn %icc, 1b |
64 | add %g7, %o0, %g7 | 70 | add %g7, %o0, %g7 |
71 | sra %g7, 0, %o0 | ||
65 | ATOMIC_POST_BARRIER | 72 | ATOMIC_POST_BARRIER |
66 | retl | 73 | retl |
67 | sra %g7, 0, %o0 | 74 | nop |
68 | .size atomic_add_ret, .-atomic_add_ret | 75 | .size atomic_add_ret, .-atomic_add_ret |
69 | 76 | ||
70 | .globl atomic_sub_ret | 77 | .globl atomic_sub_ret |
@@ -77,9 +84,10 @@ atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */ | |||
77 | cmp %g1, %g7 | 84 | cmp %g1, %g7 |
78 | bne,pn %icc, 1b | 85 | bne,pn %icc, 1b |
79 | sub %g7, %o0, %g7 | 86 | sub %g7, %o0, %g7 |
87 | sra %g7, 0, %o0 | ||
80 | ATOMIC_POST_BARRIER | 88 | ATOMIC_POST_BARRIER |
81 | retl | 89 | retl |
82 | sra %g7, 0, %o0 | 90 | nop |
83 | .size atomic_sub_ret, .-atomic_sub_ret | 91 | .size atomic_sub_ret, .-atomic_sub_ret |
84 | 92 | ||
85 | .globl atomic64_add | 93 | .globl atomic64_add |
@@ -118,9 +126,10 @@ atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ | |||
118 | cmp %g1, %g7 | 126 | cmp %g1, %g7 |
119 | bne,pn %xcc, 1b | 127 | bne,pn %xcc, 1b |
120 | add %g7, %o0, %g7 | 128 | add %g7, %o0, %g7 |
129 | mov %g7, %o0 | ||
121 | ATOMIC_POST_BARRIER | 130 | ATOMIC_POST_BARRIER |
122 | retl | 131 | retl |
123 | mov %g7, %o0 | 132 | nop |
124 | .size atomic64_add_ret, .-atomic64_add_ret | 133 | .size atomic64_add_ret, .-atomic64_add_ret |
125 | 134 | ||
126 | .globl atomic64_sub_ret | 135 | .globl atomic64_sub_ret |
@@ -133,7 +142,8 @@ atomic64_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */ | |||
133 | cmp %g1, %g7 | 142 | cmp %g1, %g7 |
134 | bne,pn %xcc, 1b | 143 | bne,pn %xcc, 1b |
135 | sub %g7, %o0, %g7 | 144 | sub %g7, %o0, %g7 |
145 | mov %g7, %o0 | ||
136 | ATOMIC_POST_BARRIER | 146 | ATOMIC_POST_BARRIER |
137 | retl | 147 | retl |
138 | mov %g7, %o0 | 148 | nop |
139 | .size atomic64_sub_ret, .-atomic64_sub_ret | 149 | .size atomic64_sub_ret, .-atomic64_sub_ret |
diff --git a/arch/sparc64/lib/bitops.S b/arch/sparc64/lib/bitops.S index 886dcd2b376a..31afbfe6c1e8 100644 --- a/arch/sparc64/lib/bitops.S +++ b/arch/sparc64/lib/bitops.S | |||
@@ -7,20 +7,26 @@ | |||
7 | #include <linux/config.h> | 7 | #include <linux/config.h> |
8 | #include <asm/asi.h> | 8 | #include <asm/asi.h> |
9 | 9 | ||
10 | .text | ||
11 | |||
10 | /* On SMP we need to use memory barriers to ensure | 12 | /* On SMP we need to use memory barriers to ensure |
11 | * correct memory operation ordering, nop these out | 13 | * correct memory operation ordering, nop these out |
12 | * for uniprocessor. | 14 | * for uniprocessor. |
13 | */ | 15 | */ |
16 | |||
14 | #ifdef CONFIG_SMP | 17 | #ifdef CONFIG_SMP |
15 | #define BITOP_PRE_BARRIER membar #StoreLoad | #LoadLoad | 18 | #define BITOP_PRE_BARRIER membar #StoreLoad | #LoadLoad |
16 | #define BITOP_POST_BARRIER membar #StoreLoad | #StoreStore | 19 | #define BITOP_POST_BARRIER \ |
20 | ba,pt %xcc, 80b; \ | ||
21 | membar #StoreLoad | #StoreStore | ||
22 | |||
23 | 80: retl | ||
24 | nop | ||
17 | #else | 25 | #else |
18 | #define BITOP_PRE_BARRIER nop | 26 | #define BITOP_PRE_BARRIER |
19 | #define BITOP_POST_BARRIER nop | 27 | #define BITOP_POST_BARRIER |
20 | #endif | 28 | #endif |
21 | 29 | ||
22 | .text | ||
23 | |||
24 | .globl test_and_set_bit | 30 | .globl test_and_set_bit |
25 | .type test_and_set_bit,#function | 31 | .type test_and_set_bit,#function |
26 | test_and_set_bit: /* %o0=nr, %o1=addr */ | 32 | test_and_set_bit: /* %o0=nr, %o1=addr */ |
@@ -37,10 +43,11 @@ test_and_set_bit: /* %o0=nr, %o1=addr */ | |||
37 | cmp %g7, %g1 | 43 | cmp %g7, %g1 |
38 | bne,pn %xcc, 1b | 44 | bne,pn %xcc, 1b |
39 | and %g7, %o2, %g2 | 45 | and %g7, %o2, %g2 |
40 | BITOP_POST_BARRIER | ||
41 | clr %o0 | 46 | clr %o0 |
47 | movrne %g2, 1, %o0 | ||
48 | BITOP_POST_BARRIER | ||
42 | retl | 49 | retl |
43 | movrne %g2, 1, %o0 | 50 | nop |
44 | .size test_and_set_bit, .-test_and_set_bit | 51 | .size test_and_set_bit, .-test_and_set_bit |
45 | 52 | ||
46 | .globl test_and_clear_bit | 53 | .globl test_and_clear_bit |
@@ -59,10 +66,11 @@ test_and_clear_bit: /* %o0=nr, %o1=addr */ | |||
59 | cmp %g7, %g1 | 66 | cmp %g7, %g1 |
60 | bne,pn %xcc, 1b | 67 | bne,pn %xcc, 1b |
61 | and %g7, %o2, %g2 | 68 | and %g7, %o2, %g2 |
62 | BITOP_POST_BARRIER | ||
63 | clr %o0 | 69 | clr %o0 |
70 | movrne %g2, 1, %o0 | ||
71 | BITOP_POST_BARRIER | ||
64 | retl | 72 | retl |
65 | movrne %g2, 1, %o0 | 73 | nop |
66 | .size test_and_clear_bit, .-test_and_clear_bit | 74 | .size test_and_clear_bit, .-test_and_clear_bit |
67 | 75 | ||
68 | .globl test_and_change_bit | 76 | .globl test_and_change_bit |
@@ -81,10 +89,11 @@ test_and_change_bit: /* %o0=nr, %o1=addr */ | |||
81 | cmp %g7, %g1 | 89 | cmp %g7, %g1 |
82 | bne,pn %xcc, 1b | 90 | bne,pn %xcc, 1b |
83 | and %g7, %o2, %g2 | 91 | and %g7, %o2, %g2 |
84 | BITOP_POST_BARRIER | ||
85 | clr %o0 | 92 | clr %o0 |
93 | movrne %g2, 1, %o0 | ||
94 | BITOP_POST_BARRIER | ||
86 | retl | 95 | retl |
87 | movrne %g2, 1, %o0 | 96 | nop |
88 | .size test_and_change_bit, .-test_and_change_bit | 97 | .size test_and_change_bit, .-test_and_change_bit |
89 | 98 | ||
90 | .globl set_bit | 99 | .globl set_bit |
diff --git a/arch/sparc64/lib/debuglocks.c b/arch/sparc64/lib/debuglocks.c index c421e0c65325..f03344cf784e 100644 --- a/arch/sparc64/lib/debuglocks.c +++ b/arch/sparc64/lib/debuglocks.c | |||
@@ -252,8 +252,9 @@ wlock_again: | |||
252 | " andn %%g1, %%g3, %%g7\n" | 252 | " andn %%g1, %%g3, %%g7\n" |
253 | " casx [%0], %%g1, %%g7\n" | 253 | " casx [%0], %%g1, %%g7\n" |
254 | " cmp %%g1, %%g7\n" | 254 | " cmp %%g1, %%g7\n" |
255 | " membar #StoreLoad | #StoreStore\n" | ||
255 | " bne,pn %%xcc, 1b\n" | 256 | " bne,pn %%xcc, 1b\n" |
256 | " membar #StoreLoad | #StoreStore" | 257 | " nop" |
257 | : /* no outputs */ | 258 | : /* no outputs */ |
258 | : "r" (&(rw->lock)) | 259 | : "r" (&(rw->lock)) |
259 | : "g3", "g1", "g7", "cc", "memory"); | 260 | : "g3", "g1", "g7", "cc", "memory"); |
@@ -351,8 +352,9 @@ int _do_write_trylock (rwlock_t *rw, char *str) | |||
351 | " andn %%g1, %%g3, %%g7\n" | 352 | " andn %%g1, %%g3, %%g7\n" |
352 | " casx [%0], %%g1, %%g7\n" | 353 | " casx [%0], %%g1, %%g7\n" |
353 | " cmp %%g1, %%g7\n" | 354 | " cmp %%g1, %%g7\n" |
355 | " membar #StoreLoad | #StoreStore\n" | ||
354 | " bne,pn %%xcc, 1b\n" | 356 | " bne,pn %%xcc, 1b\n" |
355 | " membar #StoreLoad | #StoreStore" | 357 | " nop" |
356 | : /* no outputs */ | 358 | : /* no outputs */ |
357 | : "r" (&(rw->lock)) | 359 | : "r" (&(rw->lock)) |
358 | : "g3", "g1", "g7", "cc", "memory"); | 360 | : "g3", "g1", "g7", "cc", "memory"); |
diff --git a/arch/sparc64/lib/dec_and_lock.S b/arch/sparc64/lib/dec_and_lock.S index 7e6fdaebedba..8ee288dd0afc 100644 --- a/arch/sparc64/lib/dec_and_lock.S +++ b/arch/sparc64/lib/dec_and_lock.S | |||
@@ -48,8 +48,9 @@ start_to_zero: | |||
48 | #endif | 48 | #endif |
49 | to_zero: | 49 | to_zero: |
50 | ldstub [%o1], %g3 | 50 | ldstub [%o1], %g3 |
51 | membar #StoreLoad | #StoreStore | ||
51 | brnz,pn %g3, spin_on_lock | 52 | brnz,pn %g3, spin_on_lock |
52 | membar #StoreLoad | #StoreStore | 53 | nop |
53 | loop2: cas [%o0], %g2, %g7 /* ASSERT(g7 == 0) */ | 54 | loop2: cas [%o0], %g2, %g7 /* ASSERT(g7 == 0) */ |
54 | cmp %g2, %g7 | 55 | cmp %g2, %g7 |
55 | 56 | ||
@@ -71,8 +72,9 @@ loop2: cas [%o0], %g2, %g7 /* ASSERT(g7 == 0) */ | |||
71 | nop | 72 | nop |
72 | spin_on_lock: | 73 | spin_on_lock: |
73 | ldub [%o1], %g3 | 74 | ldub [%o1], %g3 |
75 | membar #LoadLoad | ||
74 | brnz,pt %g3, spin_on_lock | 76 | brnz,pt %g3, spin_on_lock |
75 | membar #LoadLoad | 77 | nop |
76 | ba,pt %xcc, to_zero | 78 | ba,pt %xcc, to_zero |
77 | nop | 79 | nop |
78 | nop | 80 | nop |
diff --git a/arch/sparc64/lib/rwsem.S b/arch/sparc64/lib/rwsem.S index 174ff7b9164c..75f0e6b951d6 100644 --- a/arch/sparc64/lib/rwsem.S +++ b/arch/sparc64/lib/rwsem.S | |||
@@ -17,8 +17,9 @@ __down_read: | |||
17 | bne,pn %icc, 1b | 17 | bne,pn %icc, 1b |
18 | add %g7, 1, %g7 | 18 | add %g7, 1, %g7 |
19 | cmp %g7, 0 | 19 | cmp %g7, 0 |
20 | membar #StoreLoad | #StoreStore | ||
20 | bl,pn %icc, 3f | 21 | bl,pn %icc, 3f |
21 | membar #StoreLoad | #StoreStore | 22 | nop |
22 | 2: | 23 | 2: |
23 | retl | 24 | retl |
24 | nop | 25 | nop |
@@ -57,8 +58,9 @@ __down_write: | |||
57 | cmp %g3, %g7 | 58 | cmp %g3, %g7 |
58 | bne,pn %icc, 1b | 59 | bne,pn %icc, 1b |
59 | cmp %g7, 0 | 60 | cmp %g7, 0 |
61 | membar #StoreLoad | #StoreStore | ||
60 | bne,pn %icc, 3f | 62 | bne,pn %icc, 3f |
61 | membar #StoreLoad | #StoreStore | 63 | nop |
62 | 2: retl | 64 | 2: retl |
63 | nop | 65 | nop |
64 | 3: | 66 | 3: |
@@ -97,8 +99,9 @@ __up_read: | |||
97 | cmp %g1, %g7 | 99 | cmp %g1, %g7 |
98 | bne,pn %icc, 1b | 100 | bne,pn %icc, 1b |
99 | cmp %g7, 0 | 101 | cmp %g7, 0 |
102 | membar #StoreLoad | #StoreStore | ||
100 | bl,pn %icc, 3f | 103 | bl,pn %icc, 3f |
101 | membar #StoreLoad | #StoreStore | 104 | nop |
102 | 2: retl | 105 | 2: retl |
103 | nop | 106 | nop |
104 | 3: sethi %hi(RWSEM_ACTIVE_MASK), %g1 | 107 | 3: sethi %hi(RWSEM_ACTIVE_MASK), %g1 |
@@ -126,8 +129,9 @@ __up_write: | |||
126 | bne,pn %icc, 1b | 129 | bne,pn %icc, 1b |
127 | sub %g7, %g1, %g7 | 130 | sub %g7, %g1, %g7 |
128 | cmp %g7, 0 | 131 | cmp %g7, 0 |
132 | membar #StoreLoad | #StoreStore | ||
129 | bl,pn %icc, 3f | 133 | bl,pn %icc, 3f |
130 | membar #StoreLoad | #StoreStore | 134 | nop |
131 | 2: | 135 | 2: |
132 | retl | 136 | retl |
133 | nop | 137 | nop |
@@ -151,8 +155,9 @@ __downgrade_write: | |||
151 | bne,pn %icc, 1b | 155 | bne,pn %icc, 1b |
152 | sub %g7, %g1, %g7 | 156 | sub %g7, %g1, %g7 |
153 | cmp %g7, 0 | 157 | cmp %g7, 0 |
158 | membar #StoreLoad | #StoreStore | ||
154 | bl,pn %icc, 3f | 159 | bl,pn %icc, 3f |
155 | membar #StoreLoad | #StoreStore | 160 | nop |
156 | 2: | 161 | 2: |
157 | retl | 162 | retl |
158 | nop | 163 | nop |
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 9c5222075da9..8fc413cb6acd 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c | |||
@@ -136,8 +136,9 @@ static __inline__ void set_dcache_dirty(struct page *page, int this_cpu) | |||
136 | "or %%g1, %0, %%g1\n\t" | 136 | "or %%g1, %0, %%g1\n\t" |
137 | "casx [%2], %%g7, %%g1\n\t" | 137 | "casx [%2], %%g7, %%g1\n\t" |
138 | "cmp %%g7, %%g1\n\t" | 138 | "cmp %%g7, %%g1\n\t" |
139 | "membar #StoreLoad | #StoreStore\n\t" | ||
139 | "bne,pn %%xcc, 1b\n\t" | 140 | "bne,pn %%xcc, 1b\n\t" |
140 | " membar #StoreLoad | #StoreStore" | 141 | " nop" |
141 | : /* no outputs */ | 142 | : /* no outputs */ |
142 | : "r" (mask), "r" (non_cpu_bits), "r" (&page->flags) | 143 | : "r" (mask), "r" (non_cpu_bits), "r" (&page->flags) |
143 | : "g1", "g7"); | 144 | : "g1", "g7"); |
@@ -157,8 +158,9 @@ static __inline__ void clear_dcache_dirty_cpu(struct page *page, unsigned long c | |||
157 | " andn %%g7, %1, %%g1\n\t" | 158 | " andn %%g7, %1, %%g1\n\t" |
158 | "casx [%2], %%g7, %%g1\n\t" | 159 | "casx [%2], %%g7, %%g1\n\t" |
159 | "cmp %%g7, %%g1\n\t" | 160 | "cmp %%g7, %%g1\n\t" |
161 | "membar #StoreLoad | #StoreStore\n\t" | ||
160 | "bne,pn %%xcc, 1b\n\t" | 162 | "bne,pn %%xcc, 1b\n\t" |
161 | " membar #StoreLoad | #StoreStore\n" | 163 | " nop\n" |
162 | "2:" | 164 | "2:" |
163 | : /* no outputs */ | 165 | : /* no outputs */ |
164 | : "r" (cpu), "r" (mask), "r" (&page->flags), | 166 | : "r" (cpu), "r" (mask), "r" (&page->flags), |
diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S index 7a0934321010..7a2431d3abc7 100644 --- a/arch/sparc64/mm/ultra.S +++ b/arch/sparc64/mm/ultra.S | |||
@@ -266,8 +266,9 @@ __cheetah_flush_tlb_pending: /* 22 insns */ | |||
266 | andn %o3, 1, %o3 | 266 | andn %o3, 1, %o3 |
267 | stxa %g0, [%o3] ASI_IMMU_DEMAP | 267 | stxa %g0, [%o3] ASI_IMMU_DEMAP |
268 | 2: stxa %g0, [%o3] ASI_DMMU_DEMAP | 268 | 2: stxa %g0, [%o3] ASI_DMMU_DEMAP |
269 | membar #Sync | ||
269 | brnz,pt %o1, 1b | 270 | brnz,pt %o1, 1b |
270 | membar #Sync | 271 | nop |
271 | stxa %g2, [%o4] ASI_DMMU | 272 | stxa %g2, [%o4] ASI_DMMU |
272 | flush %g6 | 273 | flush %g6 |
273 | wrpr %g0, 0, %tl | 274 | wrpr %g0, 0, %tl |
diff --git a/include/asm-sparc64/rwsem.h b/include/asm-sparc64/rwsem.h index bf2ae90ed3df..a1cc94f95984 100644 --- a/include/asm-sparc64/rwsem.h +++ b/include/asm-sparc64/rwsem.h | |||
@@ -55,8 +55,9 @@ static __inline__ int rwsem_atomic_update(int delta, struct rw_semaphore *sem) | |||
55 | "add %%g1, %1, %%g7\n\t" | 55 | "add %%g1, %1, %%g7\n\t" |
56 | "cas [%2], %%g1, %%g7\n\t" | 56 | "cas [%2], %%g1, %%g7\n\t" |
57 | "cmp %%g1, %%g7\n\t" | 57 | "cmp %%g1, %%g7\n\t" |
58 | "membar #StoreLoad | #StoreStore\n\t" | ||
58 | "bne,pn %%icc, 1b\n\t" | 59 | "bne,pn %%icc, 1b\n\t" |
59 | " membar #StoreLoad | #StoreStore\n\t" | 60 | " nop\n\t" |
60 | "mov %%g7, %0\n\t" | 61 | "mov %%g7, %0\n\t" |
61 | : "=&r" (tmp) | 62 | : "=&r" (tmp) |
62 | : "0" (tmp), "r" (sem) | 63 | : "0" (tmp), "r" (sem) |
diff --git a/include/asm-sparc64/spinlock.h b/include/asm-sparc64/spinlock.h index db7581bdb531..9cb93a5c2b4f 100644 --- a/include/asm-sparc64/spinlock.h +++ b/include/asm-sparc64/spinlock.h | |||
@@ -52,12 +52,14 @@ static inline void _raw_spin_lock(spinlock_t *lock) | |||
52 | 52 | ||
53 | __asm__ __volatile__( | 53 | __asm__ __volatile__( |
54 | "1: ldstub [%1], %0\n" | 54 | "1: ldstub [%1], %0\n" |
55 | " membar #StoreLoad | #StoreStore\n" | ||
55 | " brnz,pn %0, 2f\n" | 56 | " brnz,pn %0, 2f\n" |
56 | " membar #StoreLoad | #StoreStore\n" | 57 | " nop\n" |
57 | " .subsection 2\n" | 58 | " .subsection 2\n" |
58 | "2: ldub [%1], %0\n" | 59 | "2: ldub [%1], %0\n" |
60 | " membar #LoadLoad\n" | ||
59 | " brnz,pt %0, 2b\n" | 61 | " brnz,pt %0, 2b\n" |
60 | " membar #LoadLoad\n" | 62 | " nop\n" |
61 | " ba,a,pt %%xcc, 1b\n" | 63 | " ba,a,pt %%xcc, 1b\n" |
62 | " .previous" | 64 | " .previous" |
63 | : "=&r" (tmp) | 65 | : "=&r" (tmp) |
@@ -95,16 +97,18 @@ static inline void _raw_spin_lock_flags(spinlock_t *lock, unsigned long flags) | |||
95 | 97 | ||
96 | __asm__ __volatile__( | 98 | __asm__ __volatile__( |
97 | "1: ldstub [%2], %0\n" | 99 | "1: ldstub [%2], %0\n" |
98 | " brnz,pn %0, 2f\n" | ||
99 | " membar #StoreLoad | #StoreStore\n" | 100 | " membar #StoreLoad | #StoreStore\n" |
101 | " brnz,pn %0, 2f\n" | ||
102 | " nop\n" | ||
100 | " .subsection 2\n" | 103 | " .subsection 2\n" |
101 | "2: rdpr %%pil, %1\n" | 104 | "2: rdpr %%pil, %1\n" |
102 | " wrpr %3, %%pil\n" | 105 | " wrpr %3, %%pil\n" |
103 | "3: ldub [%2], %0\n" | 106 | "3: ldub [%2], %0\n" |
104 | " brnz,pt %0, 3b\n" | ||
105 | " membar #LoadLoad\n" | 107 | " membar #LoadLoad\n" |
108 | " brnz,pt %0, 3b\n" | ||
109 | " nop\n" | ||
106 | " ba,pt %%xcc, 1b\n" | 110 | " ba,pt %%xcc, 1b\n" |
107 | " wrpr %1, %%pil\n" | 111 | " wrpr %1, %%pil\n" |
108 | " .previous" | 112 | " .previous" |
109 | : "=&r" (tmp1), "=&r" (tmp2) | 113 | : "=&r" (tmp1), "=&r" (tmp2) |
110 | : "r"(lock), "r"(flags) | 114 | : "r"(lock), "r"(flags) |
@@ -162,12 +166,14 @@ static void inline __read_lock(rwlock_t *lock) | |||
162 | "4: add %0, 1, %1\n" | 166 | "4: add %0, 1, %1\n" |
163 | " cas [%2], %0, %1\n" | 167 | " cas [%2], %0, %1\n" |
164 | " cmp %0, %1\n" | 168 | " cmp %0, %1\n" |
169 | " membar #StoreLoad | #StoreStore\n" | ||
165 | " bne,pn %%icc, 1b\n" | 170 | " bne,pn %%icc, 1b\n" |
166 | " membar #StoreLoad | #StoreStore\n" | 171 | " nop\n" |
167 | " .subsection 2\n" | 172 | " .subsection 2\n" |
168 | "2: ldsw [%2], %0\n" | 173 | "2: ldsw [%2], %0\n" |
174 | " membar #LoadLoad\n" | ||
169 | " brlz,pt %0, 2b\n" | 175 | " brlz,pt %0, 2b\n" |
170 | " membar #LoadLoad\n" | 176 | " nop\n" |
171 | " ba,a,pt %%xcc, 4b\n" | 177 | " ba,a,pt %%xcc, 4b\n" |
172 | " .previous" | 178 | " .previous" |
173 | : "=&r" (tmp1), "=&r" (tmp2) | 179 | : "=&r" (tmp1), "=&r" (tmp2) |
@@ -204,12 +210,14 @@ static void inline __write_lock(rwlock_t *lock) | |||
204 | "4: or %0, %3, %1\n" | 210 | "4: or %0, %3, %1\n" |
205 | " cas [%2], %0, %1\n" | 211 | " cas [%2], %0, %1\n" |
206 | " cmp %0, %1\n" | 212 | " cmp %0, %1\n" |
213 | " membar #StoreLoad | #StoreStore\n" | ||
207 | " bne,pn %%icc, 1b\n" | 214 | " bne,pn %%icc, 1b\n" |
208 | " membar #StoreLoad | #StoreStore\n" | 215 | " nop\n" |
209 | " .subsection 2\n" | 216 | " .subsection 2\n" |
210 | "2: lduw [%2], %0\n" | 217 | "2: lduw [%2], %0\n" |
218 | " membar #LoadLoad\n" | ||
211 | " brnz,pt %0, 2b\n" | 219 | " brnz,pt %0, 2b\n" |
212 | " membar #LoadLoad\n" | 220 | " nop\n" |
213 | " ba,a,pt %%xcc, 4b\n" | 221 | " ba,a,pt %%xcc, 4b\n" |
214 | " .previous" | 222 | " .previous" |
215 | : "=&r" (tmp1), "=&r" (tmp2) | 223 | : "=&r" (tmp1), "=&r" (tmp2) |
@@ -240,8 +248,9 @@ static int inline __write_trylock(rwlock_t *lock) | |||
240 | " or %0, %4, %1\n" | 248 | " or %0, %4, %1\n" |
241 | " cas [%3], %0, %1\n" | 249 | " cas [%3], %0, %1\n" |
242 | " cmp %0, %1\n" | 250 | " cmp %0, %1\n" |
251 | " membar #StoreLoad | #StoreStore\n" | ||
243 | " bne,pn %%icc, 1b\n" | 252 | " bne,pn %%icc, 1b\n" |
244 | " membar #StoreLoad | #StoreStore\n" | 253 | " nop\n" |
245 | " mov 1, %2\n" | 254 | " mov 1, %2\n" |
246 | "2:" | 255 | "2:" |
247 | : "=&r" (tmp1), "=&r" (tmp2), "=&r" (result) | 256 | : "=&r" (tmp1), "=&r" (tmp2), "=&r" (result) |
diff --git a/include/asm-sparc64/spitfire.h b/include/asm-sparc64/spitfire.h index 9d7613eea812..1aa932773af8 100644 --- a/include/asm-sparc64/spitfire.h +++ b/include/asm-sparc64/spitfire.h | |||
@@ -111,7 +111,6 @@ static __inline__ void spitfire_put_dcache_tag(unsigned long addr, unsigned long | |||
111 | "membar #Sync" | 111 | "membar #Sync" |
112 | : /* No outputs */ | 112 | : /* No outputs */ |
113 | : "r" (tag), "r" (addr), "i" (ASI_DCACHE_TAG)); | 113 | : "r" (tag), "r" (addr), "i" (ASI_DCACHE_TAG)); |
114 | __asm__ __volatile__ ("membar #Sync" : : : "memory"); | ||
115 | } | 114 | } |
116 | 115 | ||
117 | /* The instruction cache lines are flushed with this, but note that | 116 | /* The instruction cache lines are flushed with this, but note that |