[PATCH] powerpc: Merge bitops.h

Here's a revised version. This re-introduces the set_bits() function from ppc64, which I removed because I thought it was unused (it exists on no other arch). In fact it is used in the powermac interrupt code (but not on pSeries). - We use LARXL/STCXL macros to generate the right (32 or 64 bit) instructions, similar to LDL/STL from ppc_asm.h, used in fpu.S - ppc32 previously used a full "sync" barrier at the end of test_and_*_bit(), whereas ppc64 used an "isync". The merged version uses "isync", since I believe that's sufficient. - The ppc64 versions of then minix_*() bitmap functions have changed semantics. Previously on ppc64, these functions were big-endian (that is bit 0 was the LSB in the first 64-bit, big-endian word). On ppc32 (and x86, for that matter, they were little-endian. As far as I can tell, the big-endian usage was simply wrong - I guess no-one ever tried to use minixfs on ppc64. - On ppc32 find_next_bit() and find_next_zero_bit() are no longer inline (they were already out-of-line on ppc64). - For ppc64, sched_find_first_bit() has moved from mmu_context.h to the merged bitops. What it was doing in mmu_context.h in the first place, I have no idea. - The fls() function is now implemented using the cntlzw instruction on ppc64, instead of generic_fls(), as it already was on ppc32. - For ARCH=ppc, this patch requires adding arch/powerpc/lib to the arch/ppc/Makefile. This in turn requires some changes to arch/powerpc/lib/Makefile which didn't correctly handle ARCH=ppc. Built and running on G5. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Paul Mackerras <paulus@samba.org>
author: David Gibson <david@gibson.dropbear.id.au> 2005-11-01 01:28:10 -0500
committer: Paul Mackerras <paulus@samba.org> 2005-11-01 05:49:02 -0500
commit: a0e60b2033b30a6bb8479629001cf98e58e4079a (patch)
tree: 6386eeca340a25c4ae1876f2f9663f94628c8cc3 /include/asm-powerpc
parent: 031ef0a72aa8f7ee63ae9f307c1bcff92b3ccc2c (diff)
1 files changed, 437 insertions, 0 deletions
diff --git a/include/asm-powerpc/bitops.h b/include/asm-powerpc/bitops.h
new file mode 100644
index 000000000000..dc25c53704d5
--- /dev/null
+++ b/include/asm-powerpc/bitops.h
@@ -0,0 +1,437 @@
+/*
+ * PowerPC atomic bit operations.
+ *
+ * Merged version by David Gibson <david@gibson.dropbear.id.au>.
+ * Based on ppc64 versions by: Dave Engebretsen, Todd Inglett, Don
+ * Reed, Pat McCarthy, Peter Bergner, Anton Blanchard.  They
+ * originally took it from the ppc32 code.
+ *
+ * Within a word, bits are numbered LSB first.  Lot's of places make
+ * this assumption by directly testing bits with (val & (1<<nr)).
+ * This can cause confusion for large (> 1 word) bitmaps on a
+ * big-endian system because, unlike little endian, the number of each
+ * bit depends on the word size.
+ *
+ * The bitop functions are defined to work on unsigned longs, so for a
+ * ppc64 system the bits end up numbered:
+ *   |63..............0|127............64|191...........128|255...........196|
+ * and on ppc32:
+ *   |31.....0|63....31|95....64|127...96|159..128|191..160|223..192|255..224|
+ *
+ * There are a few little-endian macros used mostly for filesystem
+ * bitmaps, these work on similar bit arrays layouts, but
+ * byte-oriented:
+ *   |7...0|15...8|23...16|31...24|39...32|47...40|55...48|63...56|
+ *
+ * The main difference is that bit 3-5 (64b) or 3-4 (32b) in the bit
+ * number field needs to be reversed compared to the big-endian bit
+ * fields. This can be achieved by XOR with 0x38 (64b) or 0x18 (32b).
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _ASM_POWERPC_BITOPS_H
+#define _ASM_POWERPC_BITOPS_H
+#ifdef __KERNEL__
+#include <linux/compiler.h>
+#include <asm/atomic.h>
+#include <asm/synch.h>
+/*
+ * clear_bit doesn't imply a memory barrier
+ */
+#define smp_mb__before_clear_bit()      smp_mb()
+#define smp_mb__after_clear_bit()       smp_mb()
+#define BITOP_MASK(nr)          (1UL << ((nr) % BITS_PER_LONG))
+#define BITOP_WORD(nr)          ((nr) / BITS_PER_LONG)
+#define BITOP_LE_SWIZZLE        ((BITS_PER_LONG-1) & ~0x7)
+#ifdef CONFIG_PPC64
+#define LARXL           "ldarx"
+#define STCXL           "stdcx."
+#define CNTLZL          "cntlzd"
+#else
+#define LARXL           "lwarx"
+#define STCXL           "stwcx."
+#define CNTLZL          "cntlzw"
+#endif
+static __inline__ void set_bit(int nr, volatile unsigned long *addr)
+{
+        unsigned long old;
+        unsigned long mask = BITOP_MASK(nr);
+        unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+        __asm__ __volatile__(
+"1:"    LARXL " %0,0,%3 # set_bit\n"
+        "or     %0,%0,%2\n"
+        PPC405_ERR77(0,%3)
+        STCXL " %0,0,%3\n"
+        "bne-   1b"
+        : "=&r"(old), "=m"(*p)
+        : "r"(mask), "r"(p), "m"(*p)
+        : "cc" );
+}
+static __inline__ void clear_bit(int nr, volatile unsigned long *addr)
+{
+        unsigned long old;
+        unsigned long mask = BITOP_MASK(nr);
+        unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+        __asm__ __volatile__(
+"1:"    LARXL " %0,0,%3 # set_bit\n"
+        "andc   %0,%0,%2\n"
+        PPC405_ERR77(0,%3)
+        STCXL " %0,0,%3\n"
+        "bne-   1b"
+        : "=&r"(old), "=m"(*p)
+        : "r"(mask), "r"(p), "m"(*p)
+        : "cc" );
+}
+static __inline__ void change_bit(int nr, volatile unsigned long *addr)
+{
+        unsigned long old;
+        unsigned long mask = BITOP_MASK(nr);
+        unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+        __asm__ __volatile__(
+"1:"    LARXL " %0,0,%3 # set_bit\n"
+        "xor    %0,%0,%2\n"
+        PPC405_ERR77(0,%3)
+        STCXL " %0,0,%3\n"
+        "bne-   1b"
+        : "=&r"(old), "=m"(*p)
+        : "r"(mask), "r"(p), "m"(*p)
+        : "cc" );
+}
+static __inline__ int test_and_set_bit(unsigned long nr,
+                                       volatile unsigned long *addr)
+{
+        unsigned long old, t;
+        unsigned long mask = BITOP_MASK(nr);
+        unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+        __asm__ __volatile__(
+        EIEIO_ON_SMP
+"1:"    LARXL " %0,0,%3         # test_and_set_bit\n"
+        "or     %1,%0,%2 \n"
+        PPC405_ERR77(0,%3)
+        STCXL " %1,0,%3 \n"
+        "bne-   1b"
+        ISYNC_ON_SMP
+        : "=&r" (old), "=&r" (t)
+        : "r" (mask), "r" (p)
+        : "cc", "memory");
+        return (old & mask) != 0;
+}
+static __inline__ int test_and_clear_bit(unsigned long nr,
+                                         volatile unsigned long *addr)
+{
+        unsigned long old, t;
+        unsigned long mask = BITOP_MASK(nr);
+        unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+        __asm__ __volatile__(
+        EIEIO_ON_SMP
+"1:"    LARXL " %0,0,%3         # test_and_clear_bit\n"
+        "andc   %1,%0,%2 \n"
+        PPC405_ERR77(0,%3)
+        STCXL " %1,0,%3 \n"
+        "bne-   1b"
+        ISYNC_ON_SMP
+        : "=&r" (old), "=&r" (t)
+        : "r" (mask), "r" (p)
+        : "cc", "memory");
+        return (old & mask) != 0;
+}
+static __inline__ int test_and_change_bit(unsigned long nr,
+                                          volatile unsigned long *addr)
+{
+        unsigned long old, t;
+        unsigned long mask = BITOP_MASK(nr);
+        unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+        __asm__ __volatile__(
+        EIEIO_ON_SMP
+"1:"    LARXL " %0,0,%3         # test_and_change_bit\n"
+        "xor    %1,%0,%2 \n"
+        PPC405_ERR77(0,%3)
+        STCXL " %1,0,%3 \n"
+        "bne-   1b"
+        ISYNC_ON_SMP
+        : "=&r" (old), "=&r" (t)
+        : "r" (mask), "r" (p)
+        : "cc", "memory");
+        return (old & mask) != 0;
+}
+static __inline__ void set_bits(unsigned long mask, unsigned long *addr)
+{
+        unsigned long old;
+        __asm__ __volatile__(
+"1:"    LARXL " %0,0,%3         # set_bit\n"
+        "or     %0,%0,%2\n"
+        STCXL " %0,0,%3\n"
+        "bne-   1b"
+        : "=&r" (old), "=m" (*addr)
+        : "r" (mask), "r" (addr), "m" (*addr)
+        : "cc");
+}
+/* Non-atomic versions */
+static __inline__ int test_bit(unsigned long nr,
+                               __const__ volatile unsigned long *addr)
+{
+        return 1UL & (addr[BITOP_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
+}
+static __inline__ void __set_bit(unsigned long nr,
+                                 volatile unsigned long *addr)
+{
+        unsigned long mask = BITOP_MASK(nr);
+        unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+        *p  |= mask;
+}
+static __inline__ void __clear_bit(unsigned long nr,
+                                   volatile unsigned long *addr)
+{
+        unsigned long mask = BITOP_MASK(nr);
+        unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+        *p &= ~mask;
+}
+static __inline__ void __change_bit(unsigned long nr,
+                                    volatile unsigned long *addr)
+{
+        unsigned long mask = BITOP_MASK(nr);
+        unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+        *p ^= mask;
+}
+static __inline__ int __test_and_set_bit(unsigned long nr,
+                                         volatile unsigned long *addr)
+{
+        unsigned long mask = BITOP_MASK(nr);
+        unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+        unsigned long old = *p;
+        *p = old | mask;
+        return (old & mask) != 0;
+}
+static __inline__ int __test_and_clear_bit(unsigned long nr,
+                                           volatile unsigned long *addr)
+{
+        unsigned long mask = BITOP_MASK(nr);
+        unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+        unsigned long old = *p;
+        *p = old & ~mask;
+        return (old & mask) != 0;
+}
+static __inline__ int __test_and_change_bit(unsigned long nr,
+                                            volatile unsigned long *addr)
+{
+        unsigned long mask = BITOP_MASK(nr);
+        unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+        unsigned long old = *p;
+        *p = old ^ mask;
+        return (old & mask) != 0;
+}
+/*
+ * Return the zero-based bit position (LE, not IBM bit numbering) of
+ * the most significant 1-bit in a double word.
+ */
+static __inline__ int __ilog2(unsigned long x)
+{
+        int lz;
+        asm (CNTLZL " %0,%1" : "=r" (lz) : "r" (x));
+        return BITS_PER_LONG - 1 - lz;
+}
+/*
+ * Determines the bit position of the least significant 0 bit in the
+ * specified double word. The returned bit position will be
+ * zero-based, starting from the right side (63/31 - 0).
+ */
+static __inline__ unsigned long ffz(unsigned long x)
+{
+        /* no zero exists anywhere in the 8 byte area. */
+        if ((x = ~x) == 0)
+                return BITS_PER_LONG;
+        /*
+         * Calculate the bit position of the least signficant '1' bit in x
+         * (since x has been changed this will actually be the least signficant
+         * '0' bit in * the original x).  Note: (x & -x) gives us a mask that
+         * is the least significant * (RIGHT-most) 1-bit of the value in x.
+         */
+        return __ilog2(x & -x);
+}
+static __inline__ int __ffs(unsigned long x)
+{
+        return __ilog2(x & -x);
+}
+/*
+ * ffs: find first bit set. This is defined the same way as
+ * the libc and compiler builtin ffs routines, therefore
+ * differs in spirit from the above ffz (man ffs).
+ */
+static __inline__ int ffs(int x)
+{
+        unsigned long i = (unsigned long)x;
+        return __ilog2(i & -i) + 1;
+}
+/*
+ * fls: find last (most-significant) bit set.
+ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
+ */
+static __inline__ int fls(unsigned int x)
+{
+        int lz;
+        asm ("cntlzw %0,%1" : "=r" (lz) : "r" (x));
+        return 32 - lz;
+}
+/*
+ * hweightN: returns the hamming weight (i.e. the number
+ * of bits set) of a N-bit word
+ */
+#define hweight64(x) generic_hweight64(x)
+#define hweight32(x) generic_hweight32(x)
+#define hweight16(x) generic_hweight16(x)
+#define hweight8(x) generic_hweight8(x)
+#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
+unsigned long find_next_zero_bit(const unsigned long *addr,
+                                 unsigned long size, unsigned long offset);
+/**
+ * find_first_bit - find the first set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
+ *
+ * Returns the bit-number of the first set bit, not the number of the byte
+ * containing a bit.
+ */
+#define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
+unsigned long find_next_bit(const unsigned long *addr,
+                            unsigned long size, unsigned long offset);
+/* Little-endian versions */
+static __inline__ int test_le_bit(unsigned long nr,
+                                  __const__ unsigned long *addr)
+{
+        __const__ unsigned char *tmp = (__const__ unsigned char *) addr;
+        return (tmp[nr >> 3] >> (nr & 7)) & 1;
+}
+#define __set_le_bit(nr, addr) \
+        __set_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
+#define __clear_le_bit(nr, addr) \
+        __clear_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
+#define test_and_set_le_bit(nr, addr) \
+        test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
+#define test_and_clear_le_bit(nr, addr) \
+        test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
+#define __test_and_set_le_bit(nr, addr) \
+        __test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
+#define __test_and_clear_le_bit(nr, addr) \
+        __test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
+#define find_first_zero_le_bit(addr, size) find_next_zero_le_bit((addr), (size), 0)
+unsigned long find_next_zero_le_bit(const unsigned long *addr,
+                                    unsigned long size, unsigned long offset);
+/* Bitmap functions for the ext2 filesystem */
+#define ext2_set_bit(nr,addr) \
+        __test_and_set_le_bit((nr), (unsigned long*)addr)
+#define ext2_clear_bit(nr, addr) \
+        __test_and_clear_le_bit((nr), (unsigned long*)addr)
+#define ext2_set_bit_atomic(lock, nr, addr) \
+        test_and_set_le_bit((nr), (unsigned long*)addr)
+#define ext2_clear_bit_atomic(lock, nr, addr) \
+        test_and_clear_le_bit((nr), (unsigned long*)addr)
+#define ext2_test_bit(nr, addr)      test_le_bit((nr),(unsigned long*)addr)
+#define ext2_find_first_zero_bit(addr, size) \
+        find_first_zero_le_bit((unsigned long*)addr, size)
+#define ext2_find_next_zero_bit(addr, size, off) \
+        find_next_zero_le_bit((unsigned long*)addr, size, off)
+/* Bitmap functions for the minix filesystem.  */
+#define minix_test_and_set_bit(nr,addr) \
+        __test_and_set_le_bit(nr, (unsigned long *)addr)
+#define minix_set_bit(nr,addr) \
+        __set_le_bit(nr, (unsigned long *)addr)
+#define minix_test_and_clear_bit(nr,addr) \
+        __test_and_clear_le_bit(nr, (unsigned long *)addr)
+#define minix_test_bit(nr,addr) \
+        test_le_bit(nr, (unsigned long *)addr)
+#define minix_find_first_zero_bit(addr,size) \
+        find_first_zero_le_bit((unsigned long *)addr, size)
+/*
+ * Every architecture must define this function. It's the fastest
+ * way of searching a 140-bit bitmap where the first 100 bits are
+ * unlikely to be set. It's guaranteed that at least one of the 140
+ * bits is cleared.
+ */
+static inline int sched_find_first_bit(const unsigned long *b)
+{
+#ifdef CONFIG_PPC64
+        if (unlikely(b[0]))
+                return __ffs(b[0]);
+        if (unlikely(b[1]))
+                return __ffs(b[1]) + 64;
+        return __ffs(b[2]) + 128;
+#else
+        if (unlikely(b[0]))
+                return __ffs(b[0]);
+        if (unlikely(b[1]))
+                return __ffs(b[1]) + 32;
+        if (unlikely(b[2]))
+                return __ffs(b[2]) + 64;
+        if (b[3])
+                return __ffs(b[3]) + 96;
+        return __ffs(b[4]) + 128;
+#endif
+}
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_BITOPS_H */
author	David Gibson <david@gibson.dropbear.id.au>	2005-11-01 01:28:10 -0500
committer	Paul Mackerras <paulus@samba.org>	2005-11-01 05:49:02 -0500
commit	a0e60b2033b30a6bb8479629001cf98e58e4079a (patch)
tree	6386eeca340a25c4ae1876f2f9663f94628c8cc3 /include/asm-powerpc
parent	031ef0a72aa8f7ee63ae9f307c1bcff92b3ccc2c (diff)

diff --git a/include/asm-powerpc/bitops.h b/include/asm-powerpc/bitops.h new file mode 100644 index 000000000000..dc25c53704d5 --- /dev/null +++ b/include/asm-powerpc/bitops.h
@@ -0,0 +1,437 @@
	1	/*
	2	* PowerPC atomic bit operations.
	3	*
	4	* Merged version by David Gibson <david@gibson.dropbear.id.au>.
	5	* Based on ppc64 versions by: Dave Engebretsen, Todd Inglett, Don
	6	* Reed, Pat McCarthy, Peter Bergner, Anton Blanchard. They
	7	* originally took it from the ppc32 code.
	8	*
	9	* Within a word, bits are numbered LSB first. Lot's of places make
	10	* this assumption by directly testing bits with (val & (1<<nr)).
	11	* This can cause confusion for large (> 1 word) bitmaps on a
	12	* big-endian system because, unlike little endian, the number of each
	13	* bit depends on the word size.
	14	*
	15	* The bitop functions are defined to work on unsigned longs, so for a
	16	* ppc64 system the bits end up numbered:
	17	* \|63..............0\|127............64\|191...........128\|255...........196\|
	18	* and on ppc32:
	19	* \|31.....0\|63....31\|95....64\|127...96\|159..128\|191..160\|223..192\|255..224\|
	20	*
	21	* There are a few little-endian macros used mostly for filesystem
	22	* bitmaps, these work on similar bit arrays layouts, but
	23	* byte-oriented:
	24	* \|7...0\|15...8\|23...16\|31...24\|39...32\|47...40\|55...48\|63...56\|
	25	*
	26	* The main difference is that bit 3-5 (64b) or 3-4 (32b) in the bit
	27	* number field needs to be reversed compared to the big-endian bit
	28	* fields. This can be achieved by XOR with 0x38 (64b) or 0x18 (32b).
	29	*
	30	* This program is free software; you can redistribute it and/or
	31	* modify it under the terms of the GNU General Public License
	32	* as published by the Free Software Foundation; either version
	33	* 2 of the License, or (at your option) any later version.
	34	*/
	35
	36	#ifndef _ASM_POWERPC_BITOPS_H
	37	#define _ASM_POWERPC_BITOPS_H
	38
	39	#ifdef __KERNEL__
	40
	41	#include <linux/compiler.h>
	42	#include <asm/atomic.h>
	43	#include <asm/synch.h>
	44
	45	/*
	46	* clear_bit doesn't imply a memory barrier
	47	*/
	48	#define smp_mb__before_clear_bit() smp_mb()
	49	#define smp_mb__after_clear_bit() smp_mb()
	50
	51	#define BITOP_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
	52	#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
	53	#define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7)
	54
	55	#ifdef CONFIG_PPC64
	56	#define LARXL "ldarx"
	57	#define STCXL "stdcx."
	58	#define CNTLZL "cntlzd"
	59	#else
	60	#define LARXL "lwarx"
	61	#define STCXL "stwcx."
	62	#define CNTLZL "cntlzw"
	63	#endif
	64
	65	static __inline__ void set_bit(int nr, volatile unsigned long *addr)
	66	{
	67	unsigned long old;
	68	unsigned long mask = BITOP_MASK(nr);
	69	unsigned long p = ((unsigned long )addr) + BITOP_WORD(nr);
	70
	71	__asm__ __volatile__(
	72	"1:" LARXL " %0,0,%3 # set_bit\n"
	73	"or %0,%0,%2\n"
	74	PPC405_ERR77(0,%3)
	75	STCXL " %0,0,%3\n"
	76	"bne- 1b"
	77	: "=&r"(old), "=m"(*p)
	78	: "r"(mask), "r"(p), "m"(*p)
	79	: "cc" );
	80	}
	81
	82	static __inline__ void clear_bit(int nr, volatile unsigned long *addr)
	83	{
	84	unsigned long old;
	85	unsigned long mask = BITOP_MASK(nr);
	86	unsigned long p = ((unsigned long )addr) + BITOP_WORD(nr);
	87
	88	__asm__ __volatile__(
	89	"1:" LARXL " %0,0,%3 # set_bit\n"
	90	"andc %0,%0,%2\n"
	91	PPC405_ERR77(0,%3)
	92	STCXL " %0,0,%3\n"
	93	"bne- 1b"
	94	: "=&r"(old), "=m"(*p)
	95	: "r"(mask), "r"(p), "m"(*p)
	96	: "cc" );
	97	}
	98
	99	static __inline__ void change_bit(int nr, volatile unsigned long *addr)
	100	{
	101	unsigned long old;
	102	unsigned long mask = BITOP_MASK(nr);
	103	unsigned long p = ((unsigned long )addr) + BITOP_WORD(nr);
	104
	105	__asm__ __volatile__(
	106	"1:" LARXL " %0,0,%3 # set_bit\n"
	107	"xor %0,%0,%2\n"
	108	PPC405_ERR77(0,%3)
	109	STCXL " %0,0,%3\n"
	110	"bne- 1b"
	111	: "=&r"(old), "=m"(*p)
	112	: "r"(mask), "r"(p), "m"(*p)
	113	: "cc" );
	114	}
	115
	116	static __inline__ int test_and_set_bit(unsigned long nr,
	117	volatile unsigned long *addr)
	118	{
	119	unsigned long old, t;
	120	unsigned long mask = BITOP_MASK(nr);
	121	unsigned long p = ((unsigned long )addr) + BITOP_WORD(nr);
	122
	123	__asm__ __volatile__(
	124	EIEIO_ON_SMP
	125	"1:" LARXL " %0,0,%3 # test_and_set_bit\n"
	126	"or %1,%0,%2 \n"
	127	PPC405_ERR77(0,%3)
	128	STCXL " %1,0,%3 \n"
	129	"bne- 1b"
	130	ISYNC_ON_SMP
	131	: "=&r" (old), "=&r" (t)
	132	: "r" (mask), "r" (p)
	133	: "cc", "memory");
	134
	135	return (old & mask) != 0;
	136	}
	137
	138	static __inline__ int test_and_clear_bit(unsigned long nr,
	139	volatile unsigned long *addr)
	140	{
	141	unsigned long old, t;
	142	unsigned long mask = BITOP_MASK(nr);
	143	unsigned long p = ((unsigned long )addr) + BITOP_WORD(nr);
	144
	145	__asm__ __volatile__(
	146	EIEIO_ON_SMP
	147	"1:" LARXL " %0,0,%3 # test_and_clear_bit\n"
	148	"andc %1,%0,%2 \n"
	149	PPC405_ERR77(0,%3)
	150	STCXL " %1,0,%3 \n"
	151	"bne- 1b"
	152	ISYNC_ON_SMP
	153	: "=&r" (old), "=&r" (t)
	154	: "r" (mask), "r" (p)
	155	: "cc", "memory");
	156
	157	return (old & mask) != 0;
	158	}
	159
	160	static __inline__ int test_and_change_bit(unsigned long nr,
	161	volatile unsigned long *addr)
	162	{
	163	unsigned long old, t;
	164	unsigned long mask = BITOP_MASK(nr);
	165	unsigned long p = ((unsigned long )addr) + BITOP_WORD(nr);
	166
	167	__asm__ __volatile__(
	168	EIEIO_ON_SMP
	169	"1:" LARXL " %0,0,%3 # test_and_change_bit\n"
	170	"xor %1,%0,%2 \n"
	171	PPC405_ERR77(0,%3)
	172	STCXL " %1,0,%3 \n"
	173	"bne- 1b"
	174	ISYNC_ON_SMP
	175	: "=&r" (old), "=&r" (t)
	176	: "r" (mask), "r" (p)
	177	: "cc", "memory");
	178
	179	return (old & mask) != 0;
	180	}
	181
	182	static __inline__ void set_bits(unsigned long mask, unsigned long *addr)
	183	{
	184	unsigned long old;
	185
	186	__asm__ __volatile__(
	187	"1:" LARXL " %0,0,%3 # set_bit\n"
	188	"or %0,%0,%2\n"
	189	STCXL " %0,0,%3\n"
	190	"bne- 1b"
	191	: "=&r" (old), "=m" (*addr)
	192	: "r" (mask), "r" (addr), "m" (*addr)
	193	: "cc");
	194	}
	195
	196	/* Non-atomic versions */
	197	static __inline__ int test_bit(unsigned long nr,
	198	__const__ volatile unsigned long *addr)
	199	{
	200	return 1UL & (addr[BITOP_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
	201	}
	202
	203	static __inline__ void __set_bit(unsigned long nr,
	204	volatile unsigned long *addr)
	205	{
	206	unsigned long mask = BITOP_MASK(nr);
	207	unsigned long p = ((unsigned long )addr) + BITOP_WORD(nr);
	208
	209	*p \|= mask;
	210	}
	211
	212	static __inline__ void __clear_bit(unsigned long nr,
	213	volatile unsigned long *addr)
	214	{
	215	unsigned long mask = BITOP_MASK(nr);
	216	unsigned long p = ((unsigned long )addr) + BITOP_WORD(nr);
	217
	218	*p &= ~mask;
	219	}
	220
	221	static __inline__ void __change_bit(unsigned long nr,
	222	volatile unsigned long *addr)
	223	{
	224	unsigned long mask = BITOP_MASK(nr);
	225	unsigned long p = ((unsigned long )addr) + BITOP_WORD(nr);
	226
	227	*p ^= mask;
	228	}
	229
	230	static __inline__ int __test_and_set_bit(unsigned long nr,
	231	volatile unsigned long *addr)
	232	{
	233	unsigned long mask = BITOP_MASK(nr);
	234	unsigned long p = ((unsigned long )addr) + BITOP_WORD(nr);
	235	unsigned long old = *p;
	236
	237	*p = old \| mask;
	238	return (old & mask) != 0;
	239	}
	240
	241	static __inline__ int __test_and_clear_bit(unsigned long nr,
	242	volatile unsigned long *addr)
	243	{
	244	unsigned long mask = BITOP_MASK(nr);
	245	unsigned long p = ((unsigned long )addr) + BITOP_WORD(nr);
	246	unsigned long old = *p;
	247
	248	*p = old & ~mask;
	249	return (old & mask) != 0;
	250	}
	251
	252	static __inline__ int __test_and_change_bit(unsigned long nr,
	253	volatile unsigned long *addr)
	254	{
	255	unsigned long mask = BITOP_MASK(nr);
	256	unsigned long p = ((unsigned long )addr) + BITOP_WORD(nr);
	257	unsigned long old = *p;
	258
	259	*p = old ^ mask;
	260	return (old & mask) != 0;
	261	}
	262
	263	/*
	264	* Return the zero-based bit position (LE, not IBM bit numbering) of
	265	* the most significant 1-bit in a double word.
	266	*/
	267	static __inline__ int __ilog2(unsigned long x)
	268	{
	269	int lz;
	270
	271	asm (CNTLZL " %0,%1" : "=r" (lz) : "r" (x));
	272	return BITS_PER_LONG - 1 - lz;
	273	}
	274
	275	/*
	276	* Determines the bit position of the least significant 0 bit in the
	277	* specified double word. The returned bit position will be
	278	* zero-based, starting from the right side (63/31 - 0).
	279	*/
	280	static __inline__ unsigned long ffz(unsigned long x)
	281	{
	282	/* no zero exists anywhere in the 8 byte area. */
	283	if ((x = ~x) == 0)
	284	return BITS_PER_LONG;
	285
	286	/*
	287	* Calculate the bit position of the least signficant '1' bit in x
	288	* (since x has been changed this will actually be the least signficant
	289	* '0' bit in * the original x). Note: (x & -x) gives us a mask that
	290	* is the least significant * (RIGHT-most) 1-bit of the value in x.
	291	*/
	292	return __ilog2(x & -x);
	293	}
	294
	295	static __inline__ int __ffs(unsigned long x)
	296	{
	297	return __ilog2(x & -x);
	298	}
	299
	300	/*
	301	* ffs: find first bit set. This is defined the same way as
	302	* the libc and compiler builtin ffs routines, therefore
	303	* differs in spirit from the above ffz (man ffs).
	304	*/
	305	static __inline__ int ffs(int x)
	306	{
	307	unsigned long i = (unsigned long)x;
	308	return __ilog2(i & -i) + 1;
	309	}
	310
	311	/*
	312	* fls: find last (most-significant) bit set.
	313	* Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
	314	*/
	315	static __inline__ int fls(unsigned int x)
	316	{
	317	int lz;
	318
	319	asm ("cntlzw %0,%1" : "=r" (lz) : "r" (x));
	320	return 32 - lz;
	321	}
	322
	323	/*
	324	* hweightN: returns the hamming weight (i.e. the number
	325	* of bits set) of a N-bit word
	326	*/
	327	#define hweight64(x) generic_hweight64(x)
	328	#define hweight32(x) generic_hweight32(x)
	329	#define hweight16(x) generic_hweight16(x)
	330	#define hweight8(x) generic_hweight8(x)
	331
	332	#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
	333	unsigned long find_next_zero_bit(const unsigned long *addr,
	334	unsigned long size, unsigned long offset);
	335	/**
	336	* find_first_bit - find the first set bit in a memory region
	337	* @addr: The address to start the search at
	338	* @size: The maximum size to search
	339	*
	340	* Returns the bit-number of the first set bit, not the number of the byte
	341	* containing a bit.
	342	*/
	343	#define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
	344	unsigned long find_next_bit(const unsigned long *addr,
	345	unsigned long size, unsigned long offset);
	346
	347	/* Little-endian versions */
	348
	349	static __inline__ int test_le_bit(unsigned long nr,
	350	__const__ unsigned long *addr)
	351	{
	352	__const__ unsigned char tmp = (__const__ unsigned char ) addr;
	353	return (tmp[nr >> 3] >> (nr & 7)) & 1;
	354	}
	355
	356	#define __set_le_bit(nr, addr) \
	357	__set_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
	358	#define __clear_le_bit(nr, addr) \
	359	__clear_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
	360
	361	#define test_and_set_le_bit(nr, addr) \
	362	test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
	363	#define test_and_clear_le_bit(nr, addr) \
	364	test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
	365
	366	#define __test_and_set_le_bit(nr, addr) \
	367	__test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
	368	#define __test_and_clear_le_bit(nr, addr) \
	369	__test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
	370
	371	#define find_first_zero_le_bit(addr, size) find_next_zero_le_bit((addr), (size), 0)
	372	unsigned long find_next_zero_le_bit(const unsigned long *addr,
	373	unsigned long size, unsigned long offset);
	374
	375	/* Bitmap functions for the ext2 filesystem */
	376
	377	#define ext2_set_bit(nr,addr) \
	378	__test_and_set_le_bit((nr), (unsigned long*)addr)
	379	#define ext2_clear_bit(nr, addr) \
	380	__test_and_clear_le_bit((nr), (unsigned long*)addr)
	381
	382	#define ext2_set_bit_atomic(lock, nr, addr) \
	383	test_and_set_le_bit((nr), (unsigned long*)addr)
	384	#define ext2_clear_bit_atomic(lock, nr, addr) \
	385	test_and_clear_le_bit((nr), (unsigned long*)addr)
	386
	387	#define ext2_test_bit(nr, addr) test_le_bit((nr),(unsigned long*)addr)
	388
	389	#define ext2_find_first_zero_bit(addr, size) \
	390	find_first_zero_le_bit((unsigned long*)addr, size)
	391	#define ext2_find_next_zero_bit(addr, size, off) \
	392	find_next_zero_le_bit((unsigned long*)addr, size, off)
	393
	394	/* Bitmap functions for the minix filesystem. */
	395
	396	#define minix_test_and_set_bit(nr,addr) \
	397	__test_and_set_le_bit(nr, (unsigned long *)addr)
	398	#define minix_set_bit(nr,addr) \
	399	__set_le_bit(nr, (unsigned long *)addr)
	400	#define minix_test_and_clear_bit(nr,addr) \
	401	__test_and_clear_le_bit(nr, (unsigned long *)addr)
	402	#define minix_test_bit(nr,addr) \
	403	test_le_bit(nr, (unsigned long *)addr)
	404
	405	#define minix_find_first_zero_bit(addr,size) \
	406	find_first_zero_le_bit((unsigned long *)addr, size)
	407
	408	/*
	409	* Every architecture must define this function. It's the fastest
	410	* way of searching a 140-bit bitmap where the first 100 bits are
	411	* unlikely to be set. It's guaranteed that at least one of the 140
	412	* bits is cleared.
	413	*/
	414	static inline int sched_find_first_bit(const unsigned long *b)
	415	{
	416	#ifdef CONFIG_PPC64
	417	if (unlikely(b[0]))
	418	return __ffs(b[0]);
	419	if (unlikely(b[1]))
	420	return __ffs(b[1]) + 64;
	421	return __ffs(b[2]) + 128;
	422	#else
	423	if (unlikely(b[0]))
	424	return __ffs(b[0]);
	425	if (unlikely(b[1]))
	426	return __ffs(b[1]) + 32;
	427	if (unlikely(b[2]))
	428	return __ffs(b[2]) + 64;
	429	if (b[3])
	430	return __ffs(b[3]) + 96;
	431	return __ffs(b[4]) + 128;
	432	#endif
	433	}
	434
	435	#endif /* __KERNEL__ */
	436
	437	#endif /* _ASM_POWERPC_BITOPS_H */