Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-generic-bitops-v3

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-generic-bitops-v3: x86, bitops: select the generic bitmap search functions x86: include/asm-x86/pgalloc.h/bitops.h: checkpatch cleanups - formatting only x86: finalize bitops unification x86, UML: remove x86-specific implementations of find_first_bit x86: optimize find_first_bit for small bitmaps x86: switch 64-bit to generic find_first_bit x86: generic versions of find_first_(zero_)bit, convert i386 bitops: use __fls for fls64 on 64-bit archs generic: implement __fls on all 64-bit archs generic: introduce a generic __fls implementation x86: merge the simple bitops and move them to bitops.h x86, generic: optimize find_next_(zero_)bit for small constant-size bitmaps x86, uml: fix uml with generic find_next_bit for x86 x86: change x86 to use generic find_next_bit uml: Kconfig cleanup uml: fix build error
author: Linus Torvalds <torvalds@linux-foundation.org> 2008-04-26 16:46:11 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2008-04-26 16:46:11 -0400
commit: 9b79ed952bd7344d40152f8a560ad8a0d93f3886 (patch)
tree: 0cdf72321a9eeb2a766b7b98d5a87ad3d46ad620 /arch
parent: a52b0d25a722e84da999005b75f972aa4824253c (diff)
parent: 19870def587554c4055df3e74a21508e3647fb7e (diff)
8 files changed, 17 insertions, 250 deletions
diff --git a/arch/um/Kconfig.x86_64 b/arch/um/Kconfig.x86_64
index 3fbe69e359ed..5696e7b374b3 100644
--- a/arch/um/Kconfig.x86_64
+++ b/arch/um/Kconfig.x86_64
@@ -1,3 +1,10 @@
+menu "Host processor type and features"
+source "arch/x86/Kconfig.cpu"
+endmenu
 config UML_X86
        bool
        default y
diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c
index f4bd349d4412..f25c29a12d00 100644
--- a/arch/um/os-Linux/helper.c
+++ b/arch/um/os-Linux/helper.c
@@ -14,6 +14,7 @@
 #include "os.h"
 #include "um_malloc.h"
 #include "user.h"
+#include <linux/limits.h>
 struct helper_data {
        void (*pre_exec)(void*);
diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile
index 964dc1a04c37..598b5c1903af 100644
--- a/arch/um/sys-i386/Makefile
+++ b/arch/um/sys-i386/Makefile
@@ -6,7 +6,7 @@ obj-y = bug.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \
        ptrace_user.o setjmp.o signal.o stub.o stub_segv.o syscalls.o sysrq.o \
        sys_call_table.o tls.o
-subarch-obj-y = lib/bitops_32.o lib/semaphore_32.o lib/string_32.o
+subarch-obj-y = lib/semaphore_32.o lib/string_32.o
 subarch-obj-$(CONFIG_HIGHMEM) += mm/highmem_32.o
 subarch-obj-$(CONFIG_MODULES) += kernel/module_32.o
diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile
index 3c22de532088..c8b4cce9cfe1 100644
--- a/arch/um/sys-x86_64/Makefile
+++ b/arch/um/sys-x86_64/Makefile
@@ -10,7 +10,7 @@ obj-y = bug.o bugs.o delay.o fault.o ldt.o mem.o ptrace.o ptrace_user.o \
 obj-$(CONFIG_MODULES) += um_module.o
-subarch-obj-y = lib/bitops_64.o lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o
+subarch-obj-y = lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o
 subarch-obj-$(CONFIG_MODULES) += kernel/module_64.o
 ldt-y = ../sys-i386/ldt.o
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 4da3cdb9c1b1..7ef18b01f0bc 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -278,6 +278,11 @@ config GENERIC_CPU
 endchoice
+config X86_CPU
+        def_bool y
+        select GENERIC_FIND_FIRST_BIT
+        select GENERIC_FIND_NEXT_BIT
 config X86_GENERIC
        bool "Generic x86 support"
        depends on X86_32
@@ -398,7 +403,7 @@ config X86_TSC
 # generates cmov.
 config X86_CMOV
        def_bool y
-        depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7)
+        depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || X86_64)
 config X86_MINIMUM_CPU_FAMILY
        int
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 25df1c1989fe..76f60f52a885 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -11,7 +11,7 @@ lib-y += memcpy_$(BITS).o
 ifeq ($(CONFIG_X86_32),y)
        lib-y += checksum_32.o
        lib-y += strstr_32.o
-        lib-y += bitops_32.o semaphore_32.o string_32.o
+        lib-y += semaphore_32.o string_32.o
        lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o
 else
@@ -21,7 +21,6 @@ else
        lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
        lib-y += thunk_64.o clear_page_64.o copy_page_64.o
-        lib-y += bitops_64.o
        lib-y += memmove_64.o memset_64.o
        lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o
 endif
diff --git a/arch/x86/lib/bitops_32.c b/arch/x86/lib/bitops_32.c
deleted file mode 100644
index b65440459859..000000000000
--- a/arch/x86/lib/bitops_32.c
+++ /dev/null
@@ -1,70 +0,0 @@
-#include <linux/bitops.h>
-#include <linux/module.h>
-/**
- * find_next_bit - find the next set bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The maximum size to search
- */
-int find_next_bit(const unsigned long *addr, int size, int offset)
-{
-        const unsigned long *p = addr + (offset >> 5);
-        int set = 0, bit = offset & 31, res;
-        if (bit) {
-                /*
-                 * Look for nonzero in the first 32 bits:
-                 */
-                __asm__("bsfl %1,%0\n\t"
-                        "jne 1f\n\t"
-                        "movl $32, %0\n"
-                        "1:"
-                        : "=r" (set)
-                        : "r" (*p >> bit));
-                if (set < (32 - bit))
-                        return set + offset;
-                set = 32 - bit;
-                p++;
-        }
-        /*
-         * No set bit yet, search remaining full words for a bit
-         */
-        res = find_first_bit (p, size - 32 * (p - addr));
-        return (offset + set + res);
-}
-EXPORT_SYMBOL(find_next_bit);
-/**
- * find_next_zero_bit - find the first zero bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The maximum size to search
- */
-int find_next_zero_bit(const unsigned long *addr, int size, int offset)
-{
-        const unsigned long *p = addr + (offset >> 5);
-        int set = 0, bit = offset & 31, res;
-        if (bit) {
-                /*
-                 * Look for zero in the first 32 bits.
-                 */
-                __asm__("bsfl %1,%0\n\t"
-                        "jne 1f\n\t"
-                        "movl $32, %0\n"
-                        "1:"
-                        : "=r" (set)
-                        : "r" (~(*p >> bit)));
-                if (set < (32 - bit))
-                        return set + offset;
-                set = 32 - bit;
-                p++;
-        }
-        /*
-         * No zero yet, search remaining full bytes for a zero
-         */
-        res = find_first_zero_bit(p, size - 32 * (p - addr));
-        return (offset + set + res);
-}
-EXPORT_SYMBOL(find_next_zero_bit);
diff --git a/arch/x86/lib/bitops_64.c b/arch/x86/lib/bitops_64.c
deleted file mode 100644
index 0e8f491e6ccc..000000000000
--- a/arch/x86/lib/bitops_64.c
+++ /dev/null
@@ -1,175 +0,0 @@
-#include <linux/bitops.h>
-#undef find_first_zero_bit
-#undef find_next_zero_bit
-#undef find_first_bit
-#undef find_next_bit
-static inline long
-__find_first_zero_bit(const unsigned long * addr, unsigned long size)
-{
-        long d0, d1, d2;
-        long res;
-        /*
-         * We must test the size in words, not in bits, because
-         * otherwise incoming sizes in the range -63..-1 will not run
-         * any scasq instructions, and then the flags used by the je
-         * instruction will have whatever random value was in place
-         * before.  Nobody should call us like that, but
-         * find_next_zero_bit() does when offset and size are at the
-         * same word and it fails to find a zero itself.
-         */
-        size += 63;
-        size >>= 6;
-        if (!size)
-                return 0;
-        asm volatile(
-                "  repe; scasq\n"
-                "  je 1f\n"
-                "  xorq -8(%%rdi),%%rax\n"
-                "  subq $8,%%rdi\n"
-                "  bsfq %%rax,%%rdx\n"
-                "1:  subq %[addr],%%rdi\n"
-                "  shlq $3,%%rdi\n"
-                "  addq %%rdi,%%rdx"
-                :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
-                :"0" (0ULL), "1" (size), "2" (addr), "3" (-1ULL),
-                 [addr] "S" (addr) : "memory");
-        /*
-         * Any register would do for [addr] above, but GCC tends to
-         * prefer rbx over rsi, even though rsi is readily available
-         * and doesn't have to be saved.
-         */
-        return res;
-}
-/**
- * find_first_zero_bit - find the first zero bit in a memory region
- * @addr: The address to start the search at
- * @size: The maximum size to search
- *
- * Returns the bit-number of the first zero bit, not the number of the byte
- * containing a bit.
- */
-long find_first_zero_bit(const unsigned long * addr, unsigned long size)
-{
-        return __find_first_zero_bit (addr, size);
-}
-/**
- * find_next_zero_bit - find the next zero bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The maximum size to search
- */
-long find_next_zero_bit (const unsigned long * addr, long size, long offset)
-{
-        const unsigned long * p = addr + (offset >> 6);
-        unsigned long set = 0;
-        unsigned long res, bit = offset&63;
-        if (bit) {
-                /*
-                 * Look for zero in first word
-                 */
-                asm("bsfq %1,%0\n\t"
-                    "cmoveq %2,%0"
-                    : "=r" (set)
-                    : "r" (~(*p >> bit)), "r"(64L));
-                if (set < (64 - bit))
-                        return set + offset;
-                set = 64 - bit;
-                p++;
-        }
-        /*
-         * No zero yet, search remaining full words for a zero
-         */
-        res = __find_first_zero_bit (p, size - 64 * (p - addr));
-        return (offset + set + res);
-}
-static inline long
-__find_first_bit(const unsigned long * addr, unsigned long size)
-{
-        long d0, d1;
-        long res;
-        /*
-         * We must test the size in words, not in bits, because
-         * otherwise incoming sizes in the range -63..-1 will not run
-         * any scasq instructions, and then the flags used by the jz
-         * instruction will have whatever random value was in place
-         * before.  Nobody should call us like that, but
-         * find_next_bit() does when offset and size are at the same
-         * word and it fails to find a one itself.
-         */
-        size += 63;
-        size >>= 6;
-        if (!size)
-                return 0;
-        asm volatile(
-                "   repe; scasq\n"
-                "   jz 1f\n"
-                "   subq $8,%%rdi\n"
-                "   bsfq (%%rdi),%%rax\n"
-                "1: subq %[addr],%%rdi\n"
-                "   shlq $3,%%rdi\n"
-                "   addq %%rdi,%%rax"
-                :"=a" (res), "=&c" (d0), "=&D" (d1)
-                :"0" (0ULL), "1" (size), "2" (addr),
-                 [addr] "r" (addr) : "memory");
-        return res;
-}
-/**
- * find_first_bit - find the first set bit in a memory region
- * @addr: The address to start the search at
- * @size: The maximum size to search
- *
- * Returns the bit-number of the first set bit, not the number of the byte
- * containing a bit.
- */
-long find_first_bit(const unsigned long * addr, unsigned long size)
-{
-        return __find_first_bit(addr,size);
-}
-/**
- * find_next_bit - find the first set bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The maximum size to search
- */
-long find_next_bit(const unsigned long * addr, long size, long offset)
-{
-        const unsigned long * p = addr + (offset >> 6);
-        unsigned long set = 0, bit = offset & 63, res;
-        if (bit) {
-                /*
-                 * Look for nonzero in the first 64 bits:
-                 */
-                asm("bsfq %1,%0\n\t"
-                    "cmoveq %2,%0\n\t"
-                    : "=r" (set)
-                    : "r" (*p >> bit), "r" (64L));
-                if (set < (64 - bit))
-                        return set + offset;
-                set = 64 - bit;
-                p++;
-        }
-        /*
-         * No set bit yet, search remaining full words for a bit
-         */
-        res = __find_first_bit (p, size - 64 * (p - addr));
-        return (offset + set + res);
-}
-#include <linux/module.h>
-EXPORT_SYMBOL(find_next_bit);
-EXPORT_SYMBOL(find_first_bit);
-EXPORT_SYMBOL(find_first_zero_bit);
-EXPORT_SYMBOL(find_next_zero_bit);
author	Linus Torvalds <torvalds@linux-foundation.org>	2008-04-26 16:46:11 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2008-04-26 16:46:11 -0400
commit	9b79ed952bd7344d40152f8a560ad8a0d93f3886 (patch)
tree	0cdf72321a9eeb2a766b7b98d5a87ad3d46ad620 /arch
parent	a52b0d25a722e84da999005b75f972aa4824253c (diff)
parent	19870def587554c4055df3e74a21508e3647fb7e (diff)

diff --git a/arch/um/Kconfig.x86_64 b/arch/um/Kconfig.x86_64 index 3fbe69e359ed..5696e7b374b3 100644 --- a/arch/um/Kconfig.x86_64 +++ b/arch/um/Kconfig.x86_64
@@ -1,3 +1,10 @@
		1
		2	menu "Host processor type and features"
		3
		4	source "arch/x86/Kconfig.cpu"
		5
		6	endmenu
		7
1	config UML_X86	8	config UML_X86
2	bool	9	bool
3	default y	10	default y


diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c index f4bd349d4412..f25c29a12d00 100644 --- a/arch/um/os-Linux/helper.c +++ b/arch/um/os-Linux/helper.c
@@ -14,6 +14,7 @@
14	#include "os.h"	14	#include "os.h"
15	#include "um_malloc.h"	15	#include "um_malloc.h"
16	#include "user.h"	16	#include "user.h"
		17	#include <linux/limits.h>
17		18
18	struct helper_data {	19	struct helper_data {
19	void (pre_exec)(void);	20	void (pre_exec)(void);


diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile index 964dc1a04c37..598b5c1903af 100644 --- a/arch/um/sys-i386/Makefile +++ b/arch/um/sys-i386/Makefile
@@ -6,7 +6,7 @@ obj-y = bug.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \
6	ptrace_user.o setjmp.o signal.o stub.o stub_segv.o syscalls.o sysrq.o \	6	ptrace_user.o setjmp.o signal.o stub.o stub_segv.o syscalls.o sysrq.o \
7	sys_call_table.o tls.o	7	sys_call_table.o tls.o
8		8
9	subarch-obj-y = lib/bitops_32.o lib/semaphore_32.o lib/string_32.o	9	subarch-obj-y = lib/semaphore_32.o lib/string_32.o
10	subarch-obj-$(CONFIG_HIGHMEM) += mm/highmem_32.o	10	subarch-obj-$(CONFIG_HIGHMEM) += mm/highmem_32.o
11	subarch-obj-$(CONFIG_MODULES) += kernel/module_32.o	11	subarch-obj-$(CONFIG_MODULES) += kernel/module_32.o
12		12


diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile index 3c22de532088..c8b4cce9cfe1 100644 --- a/arch/um/sys-x86_64/Makefile +++ b/arch/um/sys-x86_64/Makefile
@@ -10,7 +10,7 @@ obj-y = bug.o bugs.o delay.o fault.o ldt.o mem.o ptrace.o ptrace_user.o \
10		10
11	obj-$(CONFIG_MODULES) += um_module.o	11	obj-$(CONFIG_MODULES) += um_module.o
12		12
13	subarch-obj-y = lib/bitops_64.o lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o	13	subarch-obj-y = lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o
14	subarch-obj-$(CONFIG_MODULES) += kernel/module_64.o	14	subarch-obj-$(CONFIG_MODULES) += kernel/module_64.o
15		15
16	ldt-y = ../sys-i386/ldt.o	16	ldt-y = ../sys-i386/ldt.o


diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 4da3cdb9c1b1..7ef18b01f0bc 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu
@@ -278,6 +278,11 @@ config GENERIC_CPU
278		278
279	endchoice	279	endchoice
280		280
		281	config X86_CPU
		282	def_bool y
		283	select GENERIC_FIND_FIRST_BIT
		284	select GENERIC_FIND_NEXT_BIT
		285
281	config X86_GENERIC	286	config X86_GENERIC
282	bool "Generic x86 support"	287	bool "Generic x86 support"
283	depends on X86_32	288	depends on X86_32
@@ -398,7 +403,7 @@ config X86_TSC
398	# generates cmov.	403	# generates cmov.
399	config X86_CMOV	404	config X86_CMOV
400	def_bool y	405	def_bool y
401	depends on (MK7 \|\| MPENTIUM4 \|\| MPENTIUMM \|\| MPENTIUMIII \|\| MPENTIUMII \|\| M686 \|\| MVIAC3_2 \|\| MVIAC7)	406	depends on (MK7 \|\| MPENTIUM4 \|\| MPENTIUMM \|\| MPENTIUMIII \|\| MPENTIUMII \|\| M686 \|\| MVIAC3_2 \|\| MVIAC7 \|\| X86_64)
402		407
403	config X86_MINIMUM_CPU_FAMILY	408	config X86_MINIMUM_CPU_FAMILY
404	int	409	int


diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 25df1c1989fe..76f60f52a885 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile
@@ -11,7 +11,7 @@ lib-y += memcpy_$(BITS).o
11	ifeq ($(CONFIG_X86_32),y)	11	ifeq ($(CONFIG_X86_32),y)
12	lib-y += checksum_32.o	12	lib-y += checksum_32.o
13	lib-y += strstr_32.o	13	lib-y += strstr_32.o
14	lib-y += bitops_32.o semaphore_32.o string_32.o	14	lib-y += semaphore_32.o string_32.o
15		15
16	lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o	16	lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o
17	else	17	else
@@ -21,7 +21,6 @@ else
21		21
22	lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o	22	lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
23	lib-y += thunk_64.o clear_page_64.o copy_page_64.o	23	lib-y += thunk_64.o clear_page_64.o copy_page_64.o
24	lib-y += bitops_64.o
25	lib-y += memmove_64.o memset_64.o	24	lib-y += memmove_64.o memset_64.o
26	lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o	25	lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o
27	endif	26	endif


diff --git a/arch/x86/lib/bitops_32.c b/arch/x86/lib/bitops_32.c deleted file mode 100644 index b65440459859..000000000000 --- a/arch/x86/lib/bitops_32.c +++ /dev/null
@@ -1,70 +0,0 @@
1	#include <linux/bitops.h>
2	#include <linux/module.h>
3
4	/**
5	* find_next_bit - find the next set bit in a memory region
6	* @addr: The address to base the search on
7	* @offset: The bitnumber to start searching at
8	* @size: The maximum size to search
9	*/
10	int find_next_bit(const unsigned long *addr, int size, int offset)
11	{
12	const unsigned long *p = addr + (offset >> 5);
13	int set = 0, bit = offset & 31, res;
14
15	if (bit) {
16	/*
17	* Look for nonzero in the first 32 bits:
18	*/
19	__asm__("bsfl %1,%0\n\t"
20	"jne 1f\n\t"
21	"movl $32, %0\n"
22	"1:"
23	: "=r" (set)
24	: "r" (*p >> bit));
25	if (set < (32 - bit))
26	return set + offset;
27	set = 32 - bit;
28	p++;
29	}
30	/*
31	* No set bit yet, search remaining full words for a bit
32	*/
33	res = find_first_bit (p, size - 32 * (p - addr));
34	return (offset + set + res);
35	}
36	EXPORT_SYMBOL(find_next_bit);
37
38	/**
39	* find_next_zero_bit - find the first zero bit in a memory region
40	* @addr: The address to base the search on
41	* @offset: The bitnumber to start searching at
42	* @size: The maximum size to search
43	*/
44	int find_next_zero_bit(const unsigned long *addr, int size, int offset)
45	{
46	const unsigned long *p = addr + (offset >> 5);
47	int set = 0, bit = offset & 31, res;
48
49	if (bit) {
50	/*
51	* Look for zero in the first 32 bits.
52	*/
53	__asm__("bsfl %1,%0\n\t"
54	"jne 1f\n\t"
55	"movl $32, %0\n"
56	"1:"
57	: "=r" (set)
58	: "r" (~(*p >> bit)));
59	if (set < (32 - bit))
60	return set + offset;
61	set = 32 - bit;
62	p++;
63	}
64	/*
65	* No zero yet, search remaining full bytes for a zero
66	*/
67	res = find_first_zero_bit(p, size - 32 * (p - addr));
68	return (offset + set + res);
69	}
70	EXPORT_SYMBOL(find_next_zero_bit);


diff --git a/arch/x86/lib/bitops_64.c b/arch/x86/lib/bitops_64.c deleted file mode 100644 index 0e8f491e6ccc..000000000000 --- a/arch/x86/lib/bitops_64.c +++ /dev/null
@@ -1,175 +0,0 @@
1	#include <linux/bitops.h>
2
3	#undef find_first_zero_bit
4	#undef find_next_zero_bit
5	#undef find_first_bit
6	#undef find_next_bit
7
8	static inline long
9	__find_first_zero_bit(const unsigned long * addr, unsigned long size)
10	{
11	long d0, d1, d2;
12	long res;
13
14	/*
15	* We must test the size in words, not in bits, because
16	* otherwise incoming sizes in the range -63..-1 will not run
17	* any scasq instructions, and then the flags used by the je
18	* instruction will have whatever random value was in place
19	* before. Nobody should call us like that, but
20	* find_next_zero_bit() does when offset and size are at the
21	* same word and it fails to find a zero itself.
22	*/
23	size += 63;
24	size >>= 6;
25	if (!size)
26	return 0;
27	asm volatile(
28	" repe; scasq\n"
29	" je 1f\n"
30	" xorq -8(%%rdi),%%rax\n"
31	" subq $8,%%rdi\n"
32	" bsfq %%rax,%%rdx\n"
33	"1: subq %[addr],%%rdi\n"
34	" shlq $3,%%rdi\n"
35	" addq %%rdi,%%rdx"
36	:"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
37	:"0" (0ULL), "1" (size), "2" (addr), "3" (-1ULL),
38	[addr] "S" (addr) : "memory");
39	/*
40	* Any register would do for [addr] above, but GCC tends to
41	* prefer rbx over rsi, even though rsi is readily available
42	* and doesn't have to be saved.
43	*/
44	return res;
45	}
46
47	/**
48	* find_first_zero_bit - find the first zero bit in a memory region
49	* @addr: The address to start the search at
50	* @size: The maximum size to search
51	*
52	* Returns the bit-number of the first zero bit, not the number of the byte
53	* containing a bit.
54	*/
55	long find_first_zero_bit(const unsigned long * addr, unsigned long size)
56	{
57	return __find_first_zero_bit (addr, size);
58	}
59
60	/**
61	* find_next_zero_bit - find the next zero bit in a memory region
62	* @addr: The address to base the search on
63	* @offset: The bitnumber to start searching at
64	* @size: The maximum size to search
65	*/
66	long find_next_zero_bit (const unsigned long * addr, long size, long offset)
67	{
68	const unsigned long * p = addr + (offset >> 6);
69	unsigned long set = 0;
70	unsigned long res, bit = offset&63;
71
72	if (bit) {
73	/*
74	* Look for zero in first word
75	*/
76	asm("bsfq %1,%0\n\t"
77	"cmoveq %2,%0"
78	: "=r" (set)
79	: "r" (~(*p >> bit)), "r"(64L));
80	if (set < (64 - bit))
81	return set + offset;
82	set = 64 - bit;
83	p++;
84	}
85	/*
86	* No zero yet, search remaining full words for a zero
87	*/
88	res = __find_first_zero_bit (p, size - 64 * (p - addr));
89
90	return (offset + set + res);
91	}
92
93	static inline long
94	__find_first_bit(const unsigned long * addr, unsigned long size)
95	{
96	long d0, d1;
97	long res;
98
99	/*
100	* We must test the size in words, not in bits, because
101	* otherwise incoming sizes in the range -63..-1 will not run
102	* any scasq instructions, and then the flags used by the jz
103	* instruction will have whatever random value was in place
104	* before. Nobody should call us like that, but
105	* find_next_bit() does when offset and size are at the same
106	* word and it fails to find a one itself.
107	*/
108	size += 63;
109	size >>= 6;
110	if (!size)
111	return 0;
112	asm volatile(
113	" repe; scasq\n"
114	" jz 1f\n"
115	" subq $8,%%rdi\n"
116	" bsfq (%%rdi),%%rax\n"
117	"1: subq %[addr],%%rdi\n"
118	" shlq $3,%%rdi\n"
119	" addq %%rdi,%%rax"
120	:"=a" (res), "=&c" (d0), "=&D" (d1)
121	:"0" (0ULL), "1" (size), "2" (addr),
122	[addr] "r" (addr) : "memory");
123	return res;
124	}
125
126	/**
127	* find_first_bit - find the first set bit in a memory region
128	* @addr: The address to start the search at
129	* @size: The maximum size to search
130	*
131	* Returns the bit-number of the first set bit, not the number of the byte
132	* containing a bit.
133	*/
134	long find_first_bit(const unsigned long * addr, unsigned long size)
135	{
136	return __find_first_bit(addr,size);
137	}
138
139	/**
140	* find_next_bit - find the first set bit in a memory region
141	* @addr: The address to base the search on
142	* @offset: The bitnumber to start searching at
143	* @size: The maximum size to search
144	*/
145	long find_next_bit(const unsigned long * addr, long size, long offset)
146	{
147	const unsigned long * p = addr + (offset >> 6);
148	unsigned long set = 0, bit = offset & 63, res;
149
150	if (bit) {
151	/*
152	* Look for nonzero in the first 64 bits:
153	*/
154	asm("bsfq %1,%0\n\t"
155	"cmoveq %2,%0\n\t"
156	: "=r" (set)
157	: "r" (*p >> bit), "r" (64L));
158	if (set < (64 - bit))
159	return set + offset;
160	set = 64 - bit;
161	p++;
162	}
163	/*
164	* No set bit yet, search remaining full words for a bit
165	*/
166	res = __find_first_bit (p, size - 64 * (p - addr));
167	return (offset + set + res);
168	}
169
170	#include <linux/module.h>
171
172	EXPORT_SYMBOL(find_next_bit);
173	EXPORT_SYMBOL(find_first_bit);
174	EXPORT_SYMBOL(find_first_zero_bit);
175	EXPORT_SYMBOL(find_next_zero_bit);