Make the "word-at-a-time" helper functions more commonly usable

I have a new optimized x86 "strncpy_from_user()" that will use these same helper functions for all the same reasons the name lookup code uses them. This is preparation for that. This moves them into an architecture-specific header file. It's architecture-specific for two reasons: - some of the functions are likely to want architecture-specific implementations. Even if the current code happens to be "generic" in the sense that it should work on any little-endian machine, it's likely that the "multiply by a big constant and shift" implementation is less than optimal for an architecture that has a guaranteed fast bit count instruction, for example. - I expect that if architectures like sparc want to start playing around with this, we'll need to abstract out a few more details (in particular the actual unaligned accesses). So we're likely to have more architecture-specific stuff if non-x86 architectures start using this. (and if it turns out that non-x86 architectures don't start using this, then having it in an architecture-specific header is still the right thing to do, of course) Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Linus Torvalds <torvalds@linux-foundation.org> 2012-04-06 16:54:56 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2012-04-06 16:54:56 -0400
commit: f68e556e23d1a4176b563bcb25d8baf2c5313f91 (patch)
tree: 4c43c375dd0c608ed506953d80ebfedacca37161 /arch/x86/include/asm
parent: 23f347ef63aa36b5a001b6791f657cd0e2a04de3 (diff)
1 files changed, 46 insertions, 0 deletions
diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h
new file mode 100644
index 000000000000..6fe6767b7124
--- /dev/null
+++ b/arch/x86/include/asm/word-at-a-time.h
@@ -0,0 +1,46 @@
+#ifndef _ASM_WORD_AT_A_TIME_H
+#define _ASM_WORD_AT_A_TIME_H
+/*
+ * This is largely generic for little-endian machines, but the
+ * optimal byte mask counting is probably going to be something
+ * that is architecture-specific. If you have a reliably fast
+ * bit count instruction, that might be better than the multiply
+ * and shift, for example.
+ */
+#ifdef CONFIG_64BIT
+/*
+ * Jan Achrenius on G+: microoptimized version of
+ * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
+ * that works for the bytemasks without having to
+ * mask them first.
+ */
+static inline long count_masked_bytes(unsigned long mask)
+{
+        return mask*0x0001020304050608ul >> 56;
+}
+#else   /* 32-bit case */
+/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
+static inline long count_masked_bytes(long mask)
+{
+        /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
+        long a = (0x0ff0001+mask) >> 23;
+        /* Fix the 1 for 00 case */
+        return a & mask;
+}
+#endif
+#define REPEAT_BYTE(x)  ((~0ul / 0xff) * (x))
+/* Return the high bit set in the first byte that is a zero */
+static inline unsigned long has_zero(unsigned long a)
+{
+        return ((a - REPEAT_BYTE(0x01)) & ~a) & REPEAT_BYTE(0x80);
+}
+#endif /* _ASM_WORD_AT_A_TIME_H */
author	Linus Torvalds <torvalds@linux-foundation.org>	2012-04-06 16:54:56 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2012-04-06 16:54:56 -0400
commit	f68e556e23d1a4176b563bcb25d8baf2c5313f91 (patch)
tree	4c43c375dd0c608ed506953d80ebfedacca37161 /arch/x86/include/asm
parent	23f347ef63aa36b5a001b6791f657cd0e2a04de3 (diff)

diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h new file mode 100644 index 000000000000..6fe6767b7124 --- /dev/null +++ b/arch/x86/include/asm/word-at-a-time.h
@@ -0,0 +1,46 @@
	1	#ifndef _ASM_WORD_AT_A_TIME_H
	2	#define _ASM_WORD_AT_A_TIME_H
	3
	4	/*
	5	* This is largely generic for little-endian machines, but the
	6	* optimal byte mask counting is probably going to be something
	7	* that is architecture-specific. If you have a reliably fast
	8	* bit count instruction, that might be better than the multiply
	9	* and shift, for example.
	10	*/
	11
	12	#ifdef CONFIG_64BIT
	13
	14	/*
	15	* Jan Achrenius on G+: microoptimized version of
	16	* the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
	17	* that works for the bytemasks without having to
	18	* mask them first.
	19	*/
	20	static inline long count_masked_bytes(unsigned long mask)
	21	{
	22	return mask*0x0001020304050608ul >> 56;
	23	}
	24
	25	#else /* 32-bit case */
	26
	27	/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
	28	static inline long count_masked_bytes(long mask)
	29	{
	30	/* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
	31	long a = (0x0ff0001+mask) >> 23;
	32	/* Fix the 1 for 00 case */
	33	return a & mask;
	34	}
	35
	36	#endif
	37
	38	#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x))
	39
	40	/* Return the high bit set in the first byte that is a zero */
	41	static inline unsigned long has_zero(unsigned long a)
	42	{
	43	return ((a - REPEAT_BYTE(0x01)) & ~a) & REPEAT_BYTE(0x80);
	44	}
	45
	46	#endif /* _ASM_WORD_AT_A_TIME_H */