diff options
author | Paul Mackerras <paulus@samba.org> | 2006-09-13 08:08:26 -0400 |
---|---|---|
committer | Paul Mackerras <paulus@samba.org> | 2006-09-13 08:08:26 -0400 |
commit | f007cacffc8870702a1473d83ba5e4922d54e17c (patch) | |
tree | 7faa1dbd7ccd2c4536f29852e0fedf7499d90508 /include/asm-powerpc/io.h | |
parent | 2e8e8dacc566cc91cd8707cb092e76c7bbfab178 (diff) |
[POWERPC] Fix MMIO ops to provide expected barrier behaviour
This changes the writeX family of functions to have a sync instruction
before the MMIO store rather than after, because the generally expected
behaviour is that the device receiving the MMIO store can be guaranteed
to see the effects of any preceding writes to normal memory.
To preserve ordering between writeX and readX, and to preserve ordering
between preceding stores and the readX, the readX family of functions
have had an sync added before the load.
Although writeX followed by spin_unlock is not officially guaranteed
to keep the writeX inside the spin-locked region unless an mmiowb()
is used, there are currently drivers that depend on the previous
behaviour on powerpc, which was that the mmiowb wasn't actually required.
Therefore we have a per-cpu flag that is set by writeX, cleared by
__raw_spin_lock and mmiowb, and tested by __raw_spin_unlock. If it is
set, __raw_spin_unlock does a sync and clears it.
This changes both 32-bit and 64-bit readX/writeX. 32-bit already has a
sync in __raw_spin_unlock (since lwsync doesn't exist on 32-bit), and thus
doesn't need the per-cpu flag.
Tested on G5 (PPC970) and POWER5.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'include/asm-powerpc/io.h')
-rw-r--r-- | include/asm-powerpc/io.h | 43 |
1 files changed, 28 insertions, 15 deletions
diff --git a/include/asm-powerpc/io.h b/include/asm-powerpc/io.h index 36c4c34bf565..212428db0d8b 100644 --- a/include/asm-powerpc/io.h +++ b/include/asm-powerpc/io.h | |||
@@ -19,6 +19,7 @@ extern int check_legacy_ioport(unsigned long base_port); | |||
19 | #include <linux/compiler.h> | 19 | #include <linux/compiler.h> |
20 | #include <asm/page.h> | 20 | #include <asm/page.h> |
21 | #include <asm/byteorder.h> | 21 | #include <asm/byteorder.h> |
22 | #include <asm/paca.h> | ||
22 | #ifdef CONFIG_PPC_ISERIES | 23 | #ifdef CONFIG_PPC_ISERIES |
23 | #include <asm/iseries/iseries_io.h> | 24 | #include <asm/iseries/iseries_io.h> |
24 | #endif | 25 | #endif |
@@ -162,7 +163,11 @@ extern void _outsw_ns(volatile u16 __iomem *port, const void *buf, int ns); | |||
162 | extern void _insl_ns(volatile u32 __iomem *port, void *buf, int nl); | 163 | extern void _insl_ns(volatile u32 __iomem *port, void *buf, int nl); |
163 | extern void _outsl_ns(volatile u32 __iomem *port, const void *buf, int nl); | 164 | extern void _outsl_ns(volatile u32 __iomem *port, const void *buf, int nl); |
164 | 165 | ||
165 | #define mmiowb() | 166 | static inline void mmiowb(void) |
167 | { | ||
168 | __asm__ __volatile__ ("sync" : : : "memory"); | ||
169 | get_paca()->io_sync = 0; | ||
170 | } | ||
166 | 171 | ||
167 | /* | 172 | /* |
168 | * output pause versions need a delay at least for the | 173 | * output pause versions need a delay at least for the |
@@ -278,22 +283,23 @@ static inline int in_8(const volatile unsigned char __iomem *addr) | |||
278 | { | 283 | { |
279 | int ret; | 284 | int ret; |
280 | 285 | ||
281 | __asm__ __volatile__("lbz%U1%X1 %0,%1; twi 0,%0,0; isync" | 286 | __asm__ __volatile__("sync; lbz%U1%X1 %0,%1; twi 0,%0,0; isync" |
282 | : "=r" (ret) : "m" (*addr)); | 287 | : "=r" (ret) : "m" (*addr)); |
283 | return ret; | 288 | return ret; |
284 | } | 289 | } |
285 | 290 | ||
286 | static inline void out_8(volatile unsigned char __iomem *addr, int val) | 291 | static inline void out_8(volatile unsigned char __iomem *addr, int val) |
287 | { | 292 | { |
288 | __asm__ __volatile__("stb%U0%X0 %1,%0; sync" | 293 | __asm__ __volatile__("sync; stb%U0%X0 %1,%0" |
289 | : "=m" (*addr) : "r" (val)); | 294 | : "=m" (*addr) : "r" (val)); |
295 | get_paca()->io_sync = 1; | ||
290 | } | 296 | } |
291 | 297 | ||
292 | static inline int in_le16(const volatile unsigned short __iomem *addr) | 298 | static inline int in_le16(const volatile unsigned short __iomem *addr) |
293 | { | 299 | { |
294 | int ret; | 300 | int ret; |
295 | 301 | ||
296 | __asm__ __volatile__("lhbrx %0,0,%1; twi 0,%0,0; isync" | 302 | __asm__ __volatile__("sync; lhbrx %0,0,%1; twi 0,%0,0; isync" |
297 | : "=r" (ret) : "r" (addr), "m" (*addr)); | 303 | : "=r" (ret) : "r" (addr), "m" (*addr)); |
298 | return ret; | 304 | return ret; |
299 | } | 305 | } |
@@ -302,28 +308,30 @@ static inline int in_be16(const volatile unsigned short __iomem *addr) | |||
302 | { | 308 | { |
303 | int ret; | 309 | int ret; |
304 | 310 | ||
305 | __asm__ __volatile__("lhz%U1%X1 %0,%1; twi 0,%0,0; isync" | 311 | __asm__ __volatile__("sync; lhz%U1%X1 %0,%1; twi 0,%0,0; isync" |
306 | : "=r" (ret) : "m" (*addr)); | 312 | : "=r" (ret) : "m" (*addr)); |
307 | return ret; | 313 | return ret; |
308 | } | 314 | } |
309 | 315 | ||
310 | static inline void out_le16(volatile unsigned short __iomem *addr, int val) | 316 | static inline void out_le16(volatile unsigned short __iomem *addr, int val) |
311 | { | 317 | { |
312 | __asm__ __volatile__("sthbrx %1,0,%2; sync" | 318 | __asm__ __volatile__("sync; sthbrx %1,0,%2" |
313 | : "=m" (*addr) : "r" (val), "r" (addr)); | 319 | : "=m" (*addr) : "r" (val), "r" (addr)); |
320 | get_paca()->io_sync = 1; | ||
314 | } | 321 | } |
315 | 322 | ||
316 | static inline void out_be16(volatile unsigned short __iomem *addr, int val) | 323 | static inline void out_be16(volatile unsigned short __iomem *addr, int val) |
317 | { | 324 | { |
318 | __asm__ __volatile__("sth%U0%X0 %1,%0; sync" | 325 | __asm__ __volatile__("sync; sth%U0%X0 %1,%0" |
319 | : "=m" (*addr) : "r" (val)); | 326 | : "=m" (*addr) : "r" (val)); |
327 | get_paca()->io_sync = 1; | ||
320 | } | 328 | } |
321 | 329 | ||
322 | static inline unsigned in_le32(const volatile unsigned __iomem *addr) | 330 | static inline unsigned in_le32(const volatile unsigned __iomem *addr) |
323 | { | 331 | { |
324 | unsigned ret; | 332 | unsigned ret; |
325 | 333 | ||
326 | __asm__ __volatile__("lwbrx %0,0,%1; twi 0,%0,0; isync" | 334 | __asm__ __volatile__("sync; lwbrx %0,0,%1; twi 0,%0,0; isync" |
327 | : "=r" (ret) : "r" (addr), "m" (*addr)); | 335 | : "=r" (ret) : "r" (addr), "m" (*addr)); |
328 | return ret; | 336 | return ret; |
329 | } | 337 | } |
@@ -332,21 +340,23 @@ static inline unsigned in_be32(const volatile unsigned __iomem *addr) | |||
332 | { | 340 | { |
333 | unsigned ret; | 341 | unsigned ret; |
334 | 342 | ||
335 | __asm__ __volatile__("lwz%U1%X1 %0,%1; twi 0,%0,0; isync" | 343 | __asm__ __volatile__("sync; lwz%U1%X1 %0,%1; twi 0,%0,0; isync" |
336 | : "=r" (ret) : "m" (*addr)); | 344 | : "=r" (ret) : "m" (*addr)); |
337 | return ret; | 345 | return ret; |
338 | } | 346 | } |
339 | 347 | ||
340 | static inline void out_le32(volatile unsigned __iomem *addr, int val) | 348 | static inline void out_le32(volatile unsigned __iomem *addr, int val) |
341 | { | 349 | { |
342 | __asm__ __volatile__("stwbrx %1,0,%2; sync" : "=m" (*addr) | 350 | __asm__ __volatile__("sync; stwbrx %1,0,%2" : "=m" (*addr) |
343 | : "r" (val), "r" (addr)); | 351 | : "r" (val), "r" (addr)); |
352 | get_paca()->io_sync = 1; | ||
344 | } | 353 | } |
345 | 354 | ||
346 | static inline void out_be32(volatile unsigned __iomem *addr, int val) | 355 | static inline void out_be32(volatile unsigned __iomem *addr, int val) |
347 | { | 356 | { |
348 | __asm__ __volatile__("stw%U0%X0 %1,%0; sync" | 357 | __asm__ __volatile__("sync; stw%U0%X0 %1,%0" |
349 | : "=m" (*addr) : "r" (val)); | 358 | : "=m" (*addr) : "r" (val)); |
359 | get_paca()->io_sync = 1; | ||
350 | } | 360 | } |
351 | 361 | ||
352 | static inline unsigned long in_le64(const volatile unsigned long __iomem *addr) | 362 | static inline unsigned long in_le64(const volatile unsigned long __iomem *addr) |
@@ -354,6 +364,7 @@ static inline unsigned long in_le64(const volatile unsigned long __iomem *addr) | |||
354 | unsigned long tmp, ret; | 364 | unsigned long tmp, ret; |
355 | 365 | ||
356 | __asm__ __volatile__( | 366 | __asm__ __volatile__( |
367 | "sync\n" | ||
357 | "ld %1,0(%2)\n" | 368 | "ld %1,0(%2)\n" |
358 | "twi 0,%1,0\n" | 369 | "twi 0,%1,0\n" |
359 | "isync\n" | 370 | "isync\n" |
@@ -372,7 +383,7 @@ static inline unsigned long in_be64(const volatile unsigned long __iomem *addr) | |||
372 | { | 383 | { |
373 | unsigned long ret; | 384 | unsigned long ret; |
374 | 385 | ||
375 | __asm__ __volatile__("ld%U1%X1 %0,%1; twi 0,%0,0; isync" | 386 | __asm__ __volatile__("sync; ld%U1%X1 %0,%1; twi 0,%0,0; isync" |
376 | : "=r" (ret) : "m" (*addr)); | 387 | : "=r" (ret) : "m" (*addr)); |
377 | return ret; | 388 | return ret; |
378 | } | 389 | } |
@@ -389,14 +400,16 @@ static inline void out_le64(volatile unsigned long __iomem *addr, unsigned long | |||
389 | "rldicl %1,%1,32,0\n" | 400 | "rldicl %1,%1,32,0\n" |
390 | "rlwimi %0,%1,8,8,31\n" | 401 | "rlwimi %0,%1,8,8,31\n" |
391 | "rlwimi %0,%1,24,16,23\n" | 402 | "rlwimi %0,%1,24,16,23\n" |
392 | "std %0,0(%3)\n" | 403 | "sync\n" |
393 | "sync" | 404 | "std %0,0(%3)" |
394 | : "=&r" (tmp) , "=&r" (val) : "1" (val) , "b" (addr) , "m" (*addr)); | 405 | : "=&r" (tmp) , "=&r" (val) : "1" (val) , "b" (addr) , "m" (*addr)); |
406 | get_paca()->io_sync = 1; | ||
395 | } | 407 | } |
396 | 408 | ||
397 | static inline void out_be64(volatile unsigned long __iomem *addr, unsigned long val) | 409 | static inline void out_be64(volatile unsigned long __iomem *addr, unsigned long val) |
398 | { | 410 | { |
399 | __asm__ __volatile__("std%U0%X0 %1,%0; sync" : "=m" (*addr) : "r" (val)); | 411 | __asm__ __volatile__("sync; std%U0%X0 %1,%0" : "=m" (*addr) : "r" (val)); |
412 | get_paca()->io_sync = 1; | ||
400 | } | 413 | } |
401 | 414 | ||
402 | #ifndef CONFIG_PPC_ISERIES | 415 | #ifndef CONFIG_PPC_ISERIES |