Fix constant folding and poor optimization in byte swapping code

Constant folding does not work for the swabXX() byte swapping functions, and the C versions optimize poorly. Attempting to initialize a global variable to swab16(0x1234) or put something like "case swab32(42):" in a switch statement will not compile. It can work, swab.h just isn't doing it correctly. This patch fixes that. Contrary to the comment in asm-i386/byteorder.h, gcc does not recognize the "C" version of swab16 and turn it into efficient code. gcc can do this, just not with the current code. The simple function: u16 foo(u16 x) { return swab16(x); } Would compile to: movzwl %ax, %eax movl %eax, %edx shrl $8, %eax sall $8, %edx orl %eax, %edx With this patch, it will compile to: rolw $8, %ax I also attempted to document the maze different macros/inline functions that are used to create the final product. Signed-off-by: Trent Piepho <xyzzy@speakeasy.org> Cc: Francois-Rene Rideau <fare@tunes.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Trent Piepho <xyzzy@speakeasy.org> 2007-05-08 03:24:05 -0400
committer: Linus Torvalds <torvalds@woody.linux-foundation.org> 2007-05-08 14:14:59 -0400
commit: 8e2c20023f34b652605a5fb7c68bb843d2b100a8 (patch)
tree: bd041c1762724dbbc91f4b2da3fc0716165784e5 /include/linux/byteorder
parent: 02fb6149f7a64c62934c035e7635321cb9a8cf2e (diff)
1 files changed, 69 insertions, 39 deletions
diff --git a/include/linux/byteorder/swab.h b/include/linux/byteorder/swab.h
index 25f7f32883ec..142134ff1645 100644
--- a/include/linux/byteorder/swab.h
+++ b/include/linux/byteorder/swab.h
@@ -10,6 +10,10 @@
 *    separated swab functions from cpu_to_XX,
 *    to clean up support for bizarre-endian architectures.
 *
+ * Trent Piepho <xyzzy@speakeasy.org> 2007114
+ *    make constant-folding work, provide C versions that
+ *    gcc can optimize better, explain different versions
+ *
 * See asm-i386/byteorder.h and suches for examples of how to provide
 * architecture-dependent optimized versions
 *
@@ -17,40 +21,66 @@
 #include <linux/compiler.h>
+/* Functions/macros defined, there are a lot:
+ *
+ * ___swabXX
+ *    Generic C versions of the swab functions.
+ *
+ * ___constant_swabXX
+ *    C versions that gcc can fold into a compile-time constant when
+ *    the argument is a compile-time constant.
+ *
+ * __arch__swabXX[sp]?
+ *    Architecture optimized versions of all the swab functions
+ *    (including the s and p versions).  These can be defined in
+ *    asm-arch/byteorder.h.  Any which are not, are defined here.
+ *    __arch__swabXXs() is defined in terms of __arch__swabXXp(), which
+ *    is defined in terms of __arch__swabXX(), which is in turn defined
+ *    in terms of ___swabXX(x).
+ *    These must be macros.  They may be unsafe for arguments with
+ *    side-effects.
+ *
+ * __fswabXX
+ *    Inline function versions of the __arch__ macros.  These _are_ safe
+ *    if the arguments have side-effects.  Note there are no s and p
+ *    versions of these.
+ *
+ * __swabXX[sb]
+ *    There are the ones you should actually use.  The __swabXX versions
+ *    will be a constant given a constant argument and use the arch
+ *    specific code (if any) for non-constant arguments.  The s and p
+ *    versions always use the arch specific code (constant folding
+ *    doesn't apply).  They are safe to use with arguments with
+ *    side-effects.
+ *
+ * swabXX[sb]
+ *    Nicknames for __swabXX[sb] to use in the kernel.
+ */
 /* casts are necessary for constants, because we never know how for sure
 * how U/UL/ULL map to __u16, __u32, __u64. At least not in a portable way.
 */
-#define ___swab16(x) \
-({ \
-        __u16 __x = (x); \
-        ((__u16)( \
-                (((__u16)(__x) & (__u16)0x00ffU) << 8) | \
-                (((__u16)(__x) & (__u16)0xff00U) >> 8) )); \
-})
-#define ___swab32(x) \
+static __inline__ __attribute_const__ __u16 ___swab16(__u16 x)
-({ \
+{
-        __u32 __x = (x); \
+        return x<<8 | x>>8;
-        ((__u32)( \
+}
-                (((__u32)(__x) & (__u32)0x000000ffUL) << 24) | \
+static __inline__ __attribute_const__ __u32 ___swab32(__u32 x)
-                (((__u32)(__x) & (__u32)0x0000ff00UL) <<  8) | \
+{
-                (((__u32)(__x) & (__u32)0x00ff0000UL) >>  8) | \
+        return x<<24 | x>>24 |
-                (((__u32)(__x) & (__u32)0xff000000UL) >> 24) )); \
+                (x & (__u32)0x0000ff00UL)<<8 |
-})
+                (x & (__u32)0x00ff0000UL)>>8;
+}
-#define ___swab64(x) \
+static __inline__ __attribute_const__ __u64 ___swab64(__u64 x)
-({ \
+{
-        __u64 __x = (x); \
+        return x<<56 | x>>56 |
-        ((__u64)( \
+                (x & (__u64)0x000000000000ff00ULL)<<40 |
-                (__u64)(((__u64)(__x) & (__u64)0x00000000000000ffULL) << 56) | \
+                (x & (__u64)0x0000000000ff0000ULL)<<24 |
-                (__u64)(((__u64)(__x) & (__u64)0x000000000000ff00ULL) << 40) | \
+                (x & (__u64)0x00000000ff000000ULL)<< 8 |
-                (__u64)(((__u64)(__x) & (__u64)0x0000000000ff0000ULL) << 24) | \
+                (x & (__u64)0x000000ff00000000ULL)>> 8 |
-                (__u64)(((__u64)(__x) & (__u64)0x00000000ff000000ULL) <<  8) | \
+                (x & (__u64)0x0000ff0000000000ULL)>>24 |
-                (__u64)(((__u64)(__x) & (__u64)0x000000ff00000000ULL) >>  8) | \
+                (x & (__u64)0x00ff000000000000ULL)>>40;
-                (__u64)(((__u64)(__x) & (__u64)0x0000ff0000000000ULL) >> 24) | \
+}
-                (__u64)(((__u64)(__x) & (__u64)0x00ff000000000000ULL) >> 40) | \
-                (__u64)(((__u64)(__x) & (__u64)0xff00000000000000ULL) >> 56) )); \
-})
 #define ___constant_swab16(x) \
        ((__u16)( \
@@ -77,13 +107,13 @@
 * provide defaults when no architecture-specific optimization is detected
 */
 #ifndef __arch__swab16
-#  define __arch__swab16(x) ({ __u16 __tmp = (x) ; ___swab16(__tmp); })
+#  define __arch__swab16(x) ___swab16(x)
 #endif
 #ifndef __arch__swab32
-#  define __arch__swab32(x) ({ __u32 __tmp = (x) ; ___swab32(__tmp); })
+#  define __arch__swab32(x) ___swab32(x)
 #endif
 #ifndef __arch__swab64
-#  define __arch__swab64(x) ({ __u64 __tmp = (x) ; ___swab64(__tmp); })
+#  define __arch__swab64(x) ___swab64(x)
 #endif
 #ifndef __arch__swab16p
@@ -97,13 +127,13 @@
 #endif
 #ifndef __arch__swab16s
-#  define __arch__swab16s(x) do { *(x) = __arch__swab16p((x)); } while (0)
+#  define __arch__swab16s(x) ((void)(*(x) = __arch__swab16p(x)))
 #endif
 #ifndef __arch__swab32s
-#  define __arch__swab32s(x) do { *(x) = __arch__swab32p((x)); } while (0)
+#  define __arch__swab32s(x) ((void)(*(x) = __arch__swab32p(x)))
 #endif
 #ifndef __arch__swab64s
-#  define __arch__swab64s(x) do { *(x) = __arch__swab64p((x)); } while (0)
+#  define __arch__swab64s(x) ((void)(*(x) = __arch__swab64p(x)))
 #endif
@@ -113,15 +143,15 @@
 #if defined(__GNUC__) && defined(__OPTIMIZE__)
 #  define __swab16(x) \
 (__builtin_constant_p((__u16)(x)) ? \
- ___swab16((x)) : \
+ ___constant_swab16((x)) : \
 __fswab16((x)))
 #  define __swab32(x) \
 (__builtin_constant_p((__u32)(x)) ? \
- ___swab32((x)) : \
+ ___constant_swab32((x)) : \
 __fswab32((x)))
 #  define __swab64(x) \
 (__builtin_constant_p((__u64)(x)) ? \
- ___swab64((x)) : \
+ ___constant_swab64((x)) : \
 __fswab64((x)))
 #else
 #  define __swab16(x) __fswab16(x)
author	Trent Piepho <xyzzy@speakeasy.org>	2007-05-08 03:24:05 -0400
committer	Linus Torvalds <torvalds@woody.linux-foundation.org>	2007-05-08 14:14:59 -0400
commit	8e2c20023f34b652605a5fb7c68bb843d2b100a8 (patch)
tree	bd041c1762724dbbc91f4b2da3fc0716165784e5 /include/linux/byteorder
parent	02fb6149f7a64c62934c035e7635321cb9a8cf2e (diff)

diff --git a/include/linux/byteorder/swab.h b/include/linux/byteorder/swab.h index 25f7f32883ec..142134ff1645 100644 --- a/include/linux/byteorder/swab.h +++ b/include/linux/byteorder/swab.h
@@ -10,6 +10,10 @@
10	* separated swab functions from cpu_to_XX,	10	* separated swab functions from cpu_to_XX,
11	* to clean up support for bizarre-endian architectures.	11	* to clean up support for bizarre-endian architectures.
12	*	12	*
		13	* Trent Piepho <xyzzy@speakeasy.org> 2007114
		14	* make constant-folding work, provide C versions that
		15	* gcc can optimize better, explain different versions
		16	*
13	* See asm-i386/byteorder.h and suches for examples of how to provide	17	* See asm-i386/byteorder.h and suches for examples of how to provide
14	* architecture-dependent optimized versions	18	* architecture-dependent optimized versions
15	*	19	*
@@ -17,40 +21,66 @@
17		21
18	#include <linux/compiler.h>	22	#include <linux/compiler.h>
19		23
		24	/* Functions/macros defined, there are a lot:
		25	*
		26	* ___swabXX
		27	* Generic C versions of the swab functions.
		28	*
		29	* ___constant_swabXX
		30	* C versions that gcc can fold into a compile-time constant when
		31	* the argument is a compile-time constant.
		32	*
		33	* __arch__swabXX[sp]?
		34	* Architecture optimized versions of all the swab functions
		35	* (including the s and p versions). These can be defined in
		36	* asm-arch/byteorder.h. Any which are not, are defined here.
		37	* __arch__swabXXs() is defined in terms of __arch__swabXXp(), which
		38	* is defined in terms of __arch__swabXX(), which is in turn defined
		39	* in terms of ___swabXX(x).
		40	* These must be macros. They may be unsafe for arguments with
		41	* side-effects.
		42	*
		43	* __fswabXX
		44	* Inline function versions of the __arch__ macros. These _are_ safe
		45	* if the arguments have side-effects. Note there are no s and p
		46	* versions of these.
		47	*
		48	* __swabXX[sb]
		49	* There are the ones you should actually use. The __swabXX versions
		50	* will be a constant given a constant argument and use the arch
		51	* specific code (if any) for non-constant arguments. The s and p
		52	* versions always use the arch specific code (constant folding
		53	* doesn't apply). They are safe to use with arguments with
		54	* side-effects.
		55	*
		56	* swabXX[sb]
		57	* Nicknames for __swabXX[sb] to use in the kernel.
		58	*/
		59
20	/* casts are necessary for constants, because we never know how for sure	60	/* casts are necessary for constants, because we never know how for sure
21	* how U/UL/ULL map to __u16, __u32, __u64. At least not in a portable way.	61	* how U/UL/ULL map to __u16, __u32, __u64. At least not in a portable way.
22	*/	62	*/
23	#define ___swab16(x) \
24	({ \
25	__u16 __x = (x); \
26	((__u16)( \
27	(((__u16)(__x) & (__u16)0x00ffU) << 8) \| \
28	(((__u16)(__x) & (__u16)0xff00U) >> 8) )); \
29	})
30		63
31	#define ___swab32(x) \	64	static __inline__ __attribute_const__ __u16 ___swab16(__u16 x)
32	({ \	65	{
33	__u32 __x = (x); \	66	return x<<8 \| x>>8;
34	((__u32)( \	67	}
35	(((__u32)(__x) & (__u32)0x000000ffUL) << 24) \| \	68	static __inline__ __attribute_const__ __u32 ___swab32(__u32 x)
36	(((__u32)(__x) & (__u32)0x0000ff00UL) << 8) \| \	69	{
37	(((__u32)(__x) & (__u32)0x00ff0000UL) >> 8) \| \	70	return x<<24 \| x>>24 \|
38	(((__u32)(__x) & (__u32)0xff000000UL) >> 24) )); \	71	(x & (__u32)0x0000ff00UL)<<8 \|
39	})	72	(x & (__u32)0x00ff0000UL)>>8;
40		73	}
41	#define ___swab64(x) \	74	static __inline__ __attribute_const__ __u64 ___swab64(__u64 x)
42	({ \	75	{
43	__u64 __x = (x); \	76	return x<<56 \| x>>56 \|
44	((__u64)( \	77	(x & (__u64)0x000000000000ff00ULL)<<40 \|
45	(__u64)(((__u64)(__x) & (__u64)0x00000000000000ffULL) << 56) \| \	78	(x & (__u64)0x0000000000ff0000ULL)<<24 \|
46	(__u64)(((__u64)(__x) & (__u64)0x000000000000ff00ULL) << 40) \| \	79	(x & (__u64)0x00000000ff000000ULL)<< 8 \|
47	(__u64)(((__u64)(__x) & (__u64)0x0000000000ff0000ULL) << 24) \| \	80	(x & (__u64)0x000000ff00000000ULL)>> 8 \|
48	(__u64)(((__u64)(__x) & (__u64)0x00000000ff000000ULL) << 8) \| \	81	(x & (__u64)0x0000ff0000000000ULL)>>24 \|
49	(__u64)(((__u64)(__x) & (__u64)0x000000ff00000000ULL) >> 8) \| \	82	(x & (__u64)0x00ff000000000000ULL)>>40;
50	(__u64)(((__u64)(__x) & (__u64)0x0000ff0000000000ULL) >> 24) \| \	83	}
51	(__u64)(((__u64)(__x) & (__u64)0x00ff000000000000ULL) >> 40) \| \
52	(__u64)(((__u64)(__x) & (__u64)0xff00000000000000ULL) >> 56) )); \
53	})
54		84
55	#define ___constant_swab16(x) \	85	#define ___constant_swab16(x) \
56	((__u16)( \	86	((__u16)( \
@@ -77,13 +107,13 @@
77	* provide defaults when no architecture-specific optimization is detected	107	* provide defaults when no architecture-specific optimization is detected
78	*/	108	*/
79	#ifndef __arch__swab16	109	#ifndef __arch__swab16
80	# define __arch__swab16(x) ({ __u16 __tmp = (x) ; ___swab16(__tmp); })	110	# define __arch__swab16(x) ___swab16(x)
81	#endif	111	#endif
82	#ifndef __arch__swab32	112	#ifndef __arch__swab32
83	# define __arch__swab32(x) ({ __u32 __tmp = (x) ; ___swab32(__tmp); })	113	# define __arch__swab32(x) ___swab32(x)
84	#endif	114	#endif
85	#ifndef __arch__swab64	115	#ifndef __arch__swab64
86	# define __arch__swab64(x) ({ __u64 __tmp = (x) ; ___swab64(__tmp); })	116	# define __arch__swab64(x) ___swab64(x)
87	#endif	117	#endif
88		118
89	#ifndef __arch__swab16p	119	#ifndef __arch__swab16p
@@ -97,13 +127,13 @@
97	#endif	127	#endif
98		128
99	#ifndef __arch__swab16s	129	#ifndef __arch__swab16s
100	# define __arch__swab16s(x) do { *(x) = __arch__swab16p((x)); } while (0)	130	# define __arch__swab16s(x) ((void)(*(x) = __arch__swab16p(x)))
101	#endif	131	#endif
102	#ifndef __arch__swab32s	132	#ifndef __arch__swab32s
103	# define __arch__swab32s(x) do { *(x) = __arch__swab32p((x)); } while (0)	133	# define __arch__swab32s(x) ((void)(*(x) = __arch__swab32p(x)))
104	#endif	134	#endif
105	#ifndef __arch__swab64s	135	#ifndef __arch__swab64s
106	# define __arch__swab64s(x) do { *(x) = __arch__swab64p((x)); } while (0)	136	# define __arch__swab64s(x) ((void)(*(x) = __arch__swab64p(x)))
107	#endif	137	#endif
108		138
109		139
@@ -113,15 +143,15 @@
113	#if defined(__GNUC__) && defined(__OPTIMIZE__)	143	#if defined(__GNUC__) && defined(__OPTIMIZE__)
114	# define __swab16(x) \	144	# define __swab16(x) \
115	(__builtin_constant_p((__u16)(x)) ? \	145	(__builtin_constant_p((__u16)(x)) ? \
116	___swab16((x)) : \	146	___constant_swab16((x)) : \
117	__fswab16((x)))	147	__fswab16((x)))
118	# define __swab32(x) \	148	# define __swab32(x) \
119	(__builtin_constant_p((__u32)(x)) ? \	149	(__builtin_constant_p((__u32)(x)) ? \
120	___swab32((x)) : \	150	___constant_swab32((x)) : \
121	__fswab32((x)))	151	__fswab32((x)))
122	# define __swab64(x) \	152	# define __swab64(x) \
123	(__builtin_constant_p((__u64)(x)) ? \	153	(__builtin_constant_p((__u64)(x)) ? \
124	___swab64((x)) : \	154	___constant_swab64((x)) : \
125	__fswab64((x)))	155	__fswab64((x)))
126	#else	156	#else
127	# define __swab16(x) __fswab16(x)	157	# define __swab16(x) __fswab16(x)