From 69588298188b40ed7f75c98a6fd328d82f23ca21 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 4 Sep 2006 21:53:14 -0700 Subject: [POWERPC] Implement PowerPC futex_atomic_cmpxchg_inatomic(). The sys_[gs]et_robust_list() syscalls were wired up on PowerPC but didn't work correctly because futex_atomic_cmpxchg_inatomic() wasn't implemented. Implement it, based on __cmpxchg_u32(). Signed-off-by: David Woodhouse Signed-off-by: Paul Mackerras --- include/asm-powerpc/futex.h | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) (limited to 'include/asm-powerpc') diff --git a/include/asm-powerpc/futex.h b/include/asm-powerpc/futex.h index f1b3c00bc1ce..936422e54891 100644 --- a/include/asm-powerpc/futex.h +++ b/include/asm-powerpc/futex.h @@ -84,7 +84,33 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) { - return -ENOSYS; + int prev; + + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + __asm__ __volatile__ ( + LWSYNC_ON_SMP +"1: lwarx %0,0,%2 # futex_atomic_cmpxchg_inatomic\n\ + cmpw 0,%0,%3\n\ + bne- 3f\n" + PPC405_ERR77(0,%2) +"2: stwcx. %4,0,%2\n\ + bne- 1b\n" + ISYNC_ON_SMP +"3: .section .fixup,\"ax\"\n\ +4: li %0,%5\n\ + b 3b\n\ + .previous\n\ + .section __ex_table,\"a\"\n\ + .align 3\n\ + " PPC_LONG "1b,4b,2b,4b\n\ + .previous" \ + : "=&r" (prev), "+m" (*uaddr) + : "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT) + : "cc", "memory"); + + return prev; } #endif /* __KERNEL__ */ -- cgit v1.2.2 From e269d269e0b53a7a6cb1d04290f8174bf0488cb4 Mon Sep 17 00:00:00 2001 From: "Sachin P. Sant" Date: Fri, 8 Sep 2006 07:59:52 +0530 Subject: [POWERPC] kdump: Support kernels having 64k page size. This is required to generate proper core files using kdump on ppc64. Create a backup region of 64K size irrespective of the PAGE SIZE. At present 32K was used as backup size. In the case of 64K page size, second PT_LOAD segments starts at 32K and the first one is not page aligned. __ioremap() (crash_dump.c) fails if pfn = 0 which is the case for the second PT_LOAD segment. This is not an issue for 4K page size because the the first page (32K backup) is copied to second kernel memory and thus referencing with the second kernel pfn. Signed-off-by: Sachin Sant Signed-off-by: Paul Mackerras --- include/asm-powerpc/kdump.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/asm-powerpc') diff --git a/include/asm-powerpc/kdump.h b/include/asm-powerpc/kdump.h index dc1574c945f8..10e8eb1e6f4f 100644 --- a/include/asm-powerpc/kdump.h +++ b/include/asm-powerpc/kdump.h @@ -7,7 +7,7 @@ /* How many bytes to reserve at zero for kdump. The reserve limit should * be greater or equal to the trampoline's end address. * Reserve to the end of the FWNMI area, see head_64.S */ -#define KDUMP_RESERVE_LIMIT 0x8000 +#define KDUMP_RESERVE_LIMIT 0x10000 /* 64K */ #ifdef CONFIG_CRASH_DUMP -- cgit v1.2.2 From f007cacffc8870702a1473d83ba5e4922d54e17c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 13 Sep 2006 22:08:26 +1000 Subject: [POWERPC] Fix MMIO ops to provide expected barrier behaviour This changes the writeX family of functions to have a sync instruction before the MMIO store rather than after, because the generally expected behaviour is that the device receiving the MMIO store can be guaranteed to see the effects of any preceding writes to normal memory. To preserve ordering between writeX and readX, and to preserve ordering between preceding stores and the readX, the readX family of functions have had an sync added before the load. Although writeX followed by spin_unlock is not officially guaranteed to keep the writeX inside the spin-locked region unless an mmiowb() is used, there are currently drivers that depend on the previous behaviour on powerpc, which was that the mmiowb wasn't actually required. Therefore we have a per-cpu flag that is set by writeX, cleared by __raw_spin_lock and mmiowb, and tested by __raw_spin_unlock. If it is set, __raw_spin_unlock does a sync and clears it. This changes both 32-bit and 64-bit readX/writeX. 32-bit already has a sync in __raw_spin_unlock (since lwsync doesn't exist on 32-bit), and thus doesn't need the per-cpu flag. Tested on G5 (PPC970) and POWER5. Signed-off-by: Paul Mackerras --- include/asm-powerpc/eeh.h | 3 +++ include/asm-powerpc/io.h | 43 +++++++++++++++++++++++++++--------------- include/asm-powerpc/paca.h | 1 + include/asm-powerpc/spinlock.h | 17 +++++++++++++++++ 4 files changed, 49 insertions(+), 15 deletions(-) (limited to 'include/asm-powerpc') diff --git a/include/asm-powerpc/eeh.h b/include/asm-powerpc/eeh.h index 4df3e80118f4..6a784396660b 100644 --- a/include/asm-powerpc/eeh.h +++ b/include/asm-powerpc/eeh.h @@ -205,6 +205,7 @@ static inline void eeh_memset_io(volatile void __iomem *addr, int c, lc |= lc << 8; lc |= lc << 16; + __asm__ __volatile__ ("sync" : : : "memory"); while(n && !EEH_CHECK_ALIGN(p, 4)) { *((volatile u8 *)p) = c; p++; @@ -229,6 +230,7 @@ static inline void eeh_memcpy_fromio(void *dest, const volatile void __iomem *sr void *destsave = dest; unsigned long nsave = n; + __asm__ __volatile__ ("sync" : : : "memory"); while(n && (!EEH_CHECK_ALIGN(vsrc, 4) || !EEH_CHECK_ALIGN(dest, 4))) { *((u8 *)dest) = *((volatile u8 *)vsrc); __asm__ __volatile__ ("eieio" : : : "memory"); @@ -266,6 +268,7 @@ static inline void eeh_memcpy_toio(volatile void __iomem *dest, const void *src, { void *vdest = (void __force *) dest; + __asm__ __volatile__ ("sync" : : : "memory"); while(n && (!EEH_CHECK_ALIGN(vdest, 4) || !EEH_CHECK_ALIGN(src, 4))) { *((volatile u8 *)vdest) = *((u8 *)src); src++; diff --git a/include/asm-powerpc/io.h b/include/asm-powerpc/io.h index 36c4c34bf565..212428db0d8b 100644 --- a/include/asm-powerpc/io.h +++ b/include/asm-powerpc/io.h @@ -19,6 +19,7 @@ extern int check_legacy_ioport(unsigned long base_port); #include #include #include +#include #ifdef CONFIG_PPC_ISERIES #include #endif @@ -162,7 +163,11 @@ extern void _outsw_ns(volatile u16 __iomem *port, const void *buf, int ns); extern void _insl_ns(volatile u32 __iomem *port, void *buf, int nl); extern void _outsl_ns(volatile u32 __iomem *port, const void *buf, int nl); -#define mmiowb() +static inline void mmiowb(void) +{ + __asm__ __volatile__ ("sync" : : : "memory"); + get_paca()->io_sync = 0; +} /* * output pause versions need a delay at least for the @@ -278,22 +283,23 @@ static inline int in_8(const volatile unsigned char __iomem *addr) { int ret; - __asm__ __volatile__("lbz%U1%X1 %0,%1; twi 0,%0,0; isync" + __asm__ __volatile__("sync; lbz%U1%X1 %0,%1; twi 0,%0,0; isync" : "=r" (ret) : "m" (*addr)); return ret; } static inline void out_8(volatile unsigned char __iomem *addr, int val) { - __asm__ __volatile__("stb%U0%X0 %1,%0; sync" + __asm__ __volatile__("sync; stb%U0%X0 %1,%0" : "=m" (*addr) : "r" (val)); + get_paca()->io_sync = 1; } static inline int in_le16(const volatile unsigned short __iomem *addr) { int ret; - __asm__ __volatile__("lhbrx %0,0,%1; twi 0,%0,0; isync" + __asm__ __volatile__("sync; lhbrx %0,0,%1; twi 0,%0,0; isync" : "=r" (ret) : "r" (addr), "m" (*addr)); return ret; } @@ -302,28 +308,30 @@ static inline int in_be16(const volatile unsigned short __iomem *addr) { int ret; - __asm__ __volatile__("lhz%U1%X1 %0,%1; twi 0,%0,0; isync" + __asm__ __volatile__("sync; lhz%U1%X1 %0,%1; twi 0,%0,0; isync" : "=r" (ret) : "m" (*addr)); return ret; } static inline void out_le16(volatile unsigned short __iomem *addr, int val) { - __asm__ __volatile__("sthbrx %1,0,%2; sync" + __asm__ __volatile__("sync; sthbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr)); + get_paca()->io_sync = 1; } static inline void out_be16(volatile unsigned short __iomem *addr, int val) { - __asm__ __volatile__("sth%U0%X0 %1,%0; sync" + __asm__ __volatile__("sync; sth%U0%X0 %1,%0" : "=m" (*addr) : "r" (val)); + get_paca()->io_sync = 1; } static inline unsigned in_le32(const volatile unsigned __iomem *addr) { unsigned ret; - __asm__ __volatile__("lwbrx %0,0,%1; twi 0,%0,0; isync" + __asm__ __volatile__("sync; lwbrx %0,0,%1; twi 0,%0,0; isync" : "=r" (ret) : "r" (addr), "m" (*addr)); return ret; } @@ -332,21 +340,23 @@ static inline unsigned in_be32(const volatile unsigned __iomem *addr) { unsigned ret; - __asm__ __volatile__("lwz%U1%X1 %0,%1; twi 0,%0,0; isync" + __asm__ __volatile__("sync; lwz%U1%X1 %0,%1; twi 0,%0,0; isync" : "=r" (ret) : "m" (*addr)); return ret; } static inline void out_le32(volatile unsigned __iomem *addr, int val) { - __asm__ __volatile__("stwbrx %1,0,%2; sync" : "=m" (*addr) + __asm__ __volatile__("sync; stwbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr)); + get_paca()->io_sync = 1; } static inline void out_be32(volatile unsigned __iomem *addr, int val) { - __asm__ __volatile__("stw%U0%X0 %1,%0; sync" + __asm__ __volatile__("sync; stw%U0%X0 %1,%0" : "=m" (*addr) : "r" (val)); + get_paca()->io_sync = 1; } static inline unsigned long in_le64(const volatile unsigned long __iomem *addr) @@ -354,6 +364,7 @@ static inline unsigned long in_le64(const volatile unsigned long __iomem *addr) unsigned long tmp, ret; __asm__ __volatile__( + "sync\n" "ld %1,0(%2)\n" "twi 0,%1,0\n" "isync\n" @@ -372,7 +383,7 @@ static inline unsigned long in_be64(const volatile unsigned long __iomem *addr) { unsigned long ret; - __asm__ __volatile__("ld%U1%X1 %0,%1; twi 0,%0,0; isync" + __asm__ __volatile__("sync; ld%U1%X1 %0,%1; twi 0,%0,0; isync" : "=r" (ret) : "m" (*addr)); return ret; } @@ -389,14 +400,16 @@ static inline void out_le64(volatile unsigned long __iomem *addr, unsigned long "rldicl %1,%1,32,0\n" "rlwimi %0,%1,8,8,31\n" "rlwimi %0,%1,24,16,23\n" - "std %0,0(%3)\n" - "sync" + "sync\n" + "std %0,0(%3)" : "=&r" (tmp) , "=&r" (val) : "1" (val) , "b" (addr) , "m" (*addr)); + get_paca()->io_sync = 1; } static inline void out_be64(volatile unsigned long __iomem *addr, unsigned long val) { - __asm__ __volatile__("std%U0%X0 %1,%0; sync" : "=m" (*addr) : "r" (val)); + __asm__ __volatile__("sync; std%U0%X0 %1,%0" : "=m" (*addr) : "r" (val)); + get_paca()->io_sync = 1; } #ifndef CONFIG_PPC_ISERIES diff --git a/include/asm-powerpc/paca.h b/include/asm-powerpc/paca.h index 2d4585f06209..3d5d590bc4b0 100644 --- a/include/asm-powerpc/paca.h +++ b/include/asm-powerpc/paca.h @@ -93,6 +93,7 @@ struct paca_struct { u64 saved_r1; /* r1 save for RTAS calls */ u64 saved_msr; /* MSR saved here by enter_rtas */ u8 proc_enabled; /* irq soft-enable flag */ + u8 io_sync; /* writel() needs spin_unlock sync */ /* Stuff for accurate time accounting */ u64 user_time; /* accumulated usermode TB ticks */ diff --git a/include/asm-powerpc/spinlock.h b/include/asm-powerpc/spinlock.h index 895cb6d3a42a..c31e4382a775 100644 --- a/include/asm-powerpc/spinlock.h +++ b/include/asm-powerpc/spinlock.h @@ -36,6 +36,19 @@ #define LOCK_TOKEN 1 #endif +#if defined(CONFIG_PPC64) && defined(CONFIG_SMP) +#define CLEAR_IO_SYNC (get_paca()->io_sync = 0) +#define SYNC_IO do { \ + if (unlikely(get_paca()->io_sync)) { \ + mb(); \ + get_paca()->io_sync = 0; \ + } \ + } while (0) +#else +#define CLEAR_IO_SYNC +#define SYNC_IO +#endif + /* * This returns the old value in the lock, so we succeeded * in getting the lock if the return value is 0. @@ -61,6 +74,7 @@ static __inline__ unsigned long __spin_trylock(raw_spinlock_t *lock) static int __inline__ __raw_spin_trylock(raw_spinlock_t *lock) { + CLEAR_IO_SYNC; return __spin_trylock(lock) == 0; } @@ -91,6 +105,7 @@ extern void __rw_yield(raw_rwlock_t *lock); static void __inline__ __raw_spin_lock(raw_spinlock_t *lock) { + CLEAR_IO_SYNC; while (1) { if (likely(__spin_trylock(lock) == 0)) break; @@ -107,6 +122,7 @@ static void __inline__ __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long { unsigned long flags_dis; + CLEAR_IO_SYNC; while (1) { if (likely(__spin_trylock(lock) == 0)) break; @@ -124,6 +140,7 @@ static void __inline__ __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long static __inline__ void __raw_spin_unlock(raw_spinlock_t *lock) { + SYNC_IO; __asm__ __volatile__("# __raw_spin_unlock\n\t" LWSYNC_ON_SMP: : :"memory"); lock->slock = 0; -- cgit v1.2.2