diff options
author | Trent Piepho <xyzzy@speakeasy.org> | 2007-05-08 03:24:05 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-05-08 14:14:59 -0400 |
commit | 8e2c20023f34b652605a5fb7c68bb843d2b100a8 (patch) | |
tree | bd041c1762724dbbc91f4b2da3fc0716165784e5 /include | |
parent | 02fb6149f7a64c62934c035e7635321cb9a8cf2e (diff) |
Fix constant folding and poor optimization in byte swapping code
Constant folding does not work for the swabXX() byte swapping functions,
and the C versions optimize poorly.
Attempting to initialize a global variable to swab16(0x1234) or put
something like "case swab32(42):" in a switch statement will not compile.
It can work, swab.h just isn't doing it correctly. This patch fixes that.
Contrary to the comment in asm-i386/byteorder.h, gcc does not recognize the
"C" version of swab16 and turn it into efficient code. gcc can do this,
just not with the current code. The simple function:
u16 foo(u16 x) { return swab16(x); }
Would compile to:
movzwl %ax, %eax
movl %eax, %edx
shrl $8, %eax
sall $8, %edx
orl %eax, %edx
With this patch, it will compile to:
rolw $8, %ax
I also attempted to document the maze different macros/inline functions
that are used to create the final product.
Signed-off-by: Trent Piepho <xyzzy@speakeasy.org>
Cc: Francois-Rene Rideau <fare@tunes.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/byteorder/swab.h | 108 |
1 files changed, 69 insertions, 39 deletions
diff --git a/include/linux/byteorder/swab.h b/include/linux/byteorder/swab.h index 25f7f32883ec..142134ff1645 100644 --- a/include/linux/byteorder/swab.h +++ b/include/linux/byteorder/swab.h | |||
@@ -10,6 +10,10 @@ | |||
10 | * separated swab functions from cpu_to_XX, | 10 | * separated swab functions from cpu_to_XX, |
11 | * to clean up support for bizarre-endian architectures. | 11 | * to clean up support for bizarre-endian architectures. |
12 | * | 12 | * |
13 | * Trent Piepho <xyzzy@speakeasy.org> 2007114 | ||
14 | * make constant-folding work, provide C versions that | ||
15 | * gcc can optimize better, explain different versions | ||
16 | * | ||
13 | * See asm-i386/byteorder.h and suches for examples of how to provide | 17 | * See asm-i386/byteorder.h and suches for examples of how to provide |
14 | * architecture-dependent optimized versions | 18 | * architecture-dependent optimized versions |
15 | * | 19 | * |
@@ -17,40 +21,66 @@ | |||
17 | 21 | ||
18 | #include <linux/compiler.h> | 22 | #include <linux/compiler.h> |
19 | 23 | ||
24 | /* Functions/macros defined, there are a lot: | ||
25 | * | ||
26 | * ___swabXX | ||
27 | * Generic C versions of the swab functions. | ||
28 | * | ||
29 | * ___constant_swabXX | ||
30 | * C versions that gcc can fold into a compile-time constant when | ||
31 | * the argument is a compile-time constant. | ||
32 | * | ||
33 | * __arch__swabXX[sp]? | ||
34 | * Architecture optimized versions of all the swab functions | ||
35 | * (including the s and p versions). These can be defined in | ||
36 | * asm-arch/byteorder.h. Any which are not, are defined here. | ||
37 | * __arch__swabXXs() is defined in terms of __arch__swabXXp(), which | ||
38 | * is defined in terms of __arch__swabXX(), which is in turn defined | ||
39 | * in terms of ___swabXX(x). | ||
40 | * These must be macros. They may be unsafe for arguments with | ||
41 | * side-effects. | ||
42 | * | ||
43 | * __fswabXX | ||
44 | * Inline function versions of the __arch__ macros. These _are_ safe | ||
45 | * if the arguments have side-effects. Note there are no s and p | ||
46 | * versions of these. | ||
47 | * | ||
48 | * __swabXX[sb] | ||
49 | * There are the ones you should actually use. The __swabXX versions | ||
50 | * will be a constant given a constant argument and use the arch | ||
51 | * specific code (if any) for non-constant arguments. The s and p | ||
52 | * versions always use the arch specific code (constant folding | ||
53 | * doesn't apply). They are safe to use with arguments with | ||
54 | * side-effects. | ||
55 | * | ||
56 | * swabXX[sb] | ||
57 | * Nicknames for __swabXX[sb] to use in the kernel. | ||
58 | */ | ||
59 | |||
20 | /* casts are necessary for constants, because we never know how for sure | 60 | /* casts are necessary for constants, because we never know how for sure |
21 | * how U/UL/ULL map to __u16, __u32, __u64. At least not in a portable way. | 61 | * how U/UL/ULL map to __u16, __u32, __u64. At least not in a portable way. |
22 | */ | 62 | */ |
23 | #define ___swab16(x) \ | ||
24 | ({ \ | ||
25 | __u16 __x = (x); \ | ||
26 | ((__u16)( \ | ||
27 | (((__u16)(__x) & (__u16)0x00ffU) << 8) | \ | ||
28 | (((__u16)(__x) & (__u16)0xff00U) >> 8) )); \ | ||
29 | }) | ||
30 | 63 | ||
31 | #define ___swab32(x) \ | 64 | static __inline__ __attribute_const__ __u16 ___swab16(__u16 x) |
32 | ({ \ | 65 | { |
33 | __u32 __x = (x); \ | 66 | return x<<8 | x>>8; |
34 | ((__u32)( \ | 67 | } |
35 | (((__u32)(__x) & (__u32)0x000000ffUL) << 24) | \ | 68 | static __inline__ __attribute_const__ __u32 ___swab32(__u32 x) |
36 | (((__u32)(__x) & (__u32)0x0000ff00UL) << 8) | \ | 69 | { |
37 | (((__u32)(__x) & (__u32)0x00ff0000UL) >> 8) | \ | 70 | return x<<24 | x>>24 | |
38 | (((__u32)(__x) & (__u32)0xff000000UL) >> 24) )); \ | 71 | (x & (__u32)0x0000ff00UL)<<8 | |
39 | }) | 72 | (x & (__u32)0x00ff0000UL)>>8; |
40 | 73 | } | |
41 | #define ___swab64(x) \ | 74 | static __inline__ __attribute_const__ __u64 ___swab64(__u64 x) |
42 | ({ \ | 75 | { |
43 | __u64 __x = (x); \ | 76 | return x<<56 | x>>56 | |
44 | ((__u64)( \ | 77 | (x & (__u64)0x000000000000ff00ULL)<<40 | |
45 | (__u64)(((__u64)(__x) & (__u64)0x00000000000000ffULL) << 56) | \ | 78 | (x & (__u64)0x0000000000ff0000ULL)<<24 | |
46 | (__u64)(((__u64)(__x) & (__u64)0x000000000000ff00ULL) << 40) | \ | 79 | (x & (__u64)0x00000000ff000000ULL)<< 8 | |
47 | (__u64)(((__u64)(__x) & (__u64)0x0000000000ff0000ULL) << 24) | \ | 80 | (x & (__u64)0x000000ff00000000ULL)>> 8 | |
48 | (__u64)(((__u64)(__x) & (__u64)0x00000000ff000000ULL) << 8) | \ | 81 | (x & (__u64)0x0000ff0000000000ULL)>>24 | |
49 | (__u64)(((__u64)(__x) & (__u64)0x000000ff00000000ULL) >> 8) | \ | 82 | (x & (__u64)0x00ff000000000000ULL)>>40; |
50 | (__u64)(((__u64)(__x) & (__u64)0x0000ff0000000000ULL) >> 24) | \ | 83 | } |
51 | (__u64)(((__u64)(__x) & (__u64)0x00ff000000000000ULL) >> 40) | \ | ||
52 | (__u64)(((__u64)(__x) & (__u64)0xff00000000000000ULL) >> 56) )); \ | ||
53 | }) | ||
54 | 84 | ||
55 | #define ___constant_swab16(x) \ | 85 | #define ___constant_swab16(x) \ |
56 | ((__u16)( \ | 86 | ((__u16)( \ |
@@ -77,13 +107,13 @@ | |||
77 | * provide defaults when no architecture-specific optimization is detected | 107 | * provide defaults when no architecture-specific optimization is detected |
78 | */ | 108 | */ |
79 | #ifndef __arch__swab16 | 109 | #ifndef __arch__swab16 |
80 | # define __arch__swab16(x) ({ __u16 __tmp = (x) ; ___swab16(__tmp); }) | 110 | # define __arch__swab16(x) ___swab16(x) |
81 | #endif | 111 | #endif |
82 | #ifndef __arch__swab32 | 112 | #ifndef __arch__swab32 |
83 | # define __arch__swab32(x) ({ __u32 __tmp = (x) ; ___swab32(__tmp); }) | 113 | # define __arch__swab32(x) ___swab32(x) |
84 | #endif | 114 | #endif |
85 | #ifndef __arch__swab64 | 115 | #ifndef __arch__swab64 |
86 | # define __arch__swab64(x) ({ __u64 __tmp = (x) ; ___swab64(__tmp); }) | 116 | # define __arch__swab64(x) ___swab64(x) |
87 | #endif | 117 | #endif |
88 | 118 | ||
89 | #ifndef __arch__swab16p | 119 | #ifndef __arch__swab16p |
@@ -97,13 +127,13 @@ | |||
97 | #endif | 127 | #endif |
98 | 128 | ||
99 | #ifndef __arch__swab16s | 129 | #ifndef __arch__swab16s |
100 | # define __arch__swab16s(x) do { *(x) = __arch__swab16p((x)); } while (0) | 130 | # define __arch__swab16s(x) ((void)(*(x) = __arch__swab16p(x))) |
101 | #endif | 131 | #endif |
102 | #ifndef __arch__swab32s | 132 | #ifndef __arch__swab32s |
103 | # define __arch__swab32s(x) do { *(x) = __arch__swab32p((x)); } while (0) | 133 | # define __arch__swab32s(x) ((void)(*(x) = __arch__swab32p(x))) |
104 | #endif | 134 | #endif |
105 | #ifndef __arch__swab64s | 135 | #ifndef __arch__swab64s |
106 | # define __arch__swab64s(x) do { *(x) = __arch__swab64p((x)); } while (0) | 136 | # define __arch__swab64s(x) ((void)(*(x) = __arch__swab64p(x))) |
107 | #endif | 137 | #endif |
108 | 138 | ||
109 | 139 | ||
@@ -113,15 +143,15 @@ | |||
113 | #if defined(__GNUC__) && defined(__OPTIMIZE__) | 143 | #if defined(__GNUC__) && defined(__OPTIMIZE__) |
114 | # define __swab16(x) \ | 144 | # define __swab16(x) \ |
115 | (__builtin_constant_p((__u16)(x)) ? \ | 145 | (__builtin_constant_p((__u16)(x)) ? \ |
116 | ___swab16((x)) : \ | 146 | ___constant_swab16((x)) : \ |
117 | __fswab16((x))) | 147 | __fswab16((x))) |
118 | # define __swab32(x) \ | 148 | # define __swab32(x) \ |
119 | (__builtin_constant_p((__u32)(x)) ? \ | 149 | (__builtin_constant_p((__u32)(x)) ? \ |
120 | ___swab32((x)) : \ | 150 | ___constant_swab32((x)) : \ |
121 | __fswab32((x))) | 151 | __fswab32((x))) |
122 | # define __swab64(x) \ | 152 | # define __swab64(x) \ |
123 | (__builtin_constant_p((__u64)(x)) ? \ | 153 | (__builtin_constant_p((__u64)(x)) ? \ |
124 | ___swab64((x)) : \ | 154 | ___constant_swab64((x)) : \ |
125 | __fswab64((x))) | 155 | __fswab64((x))) |
126 | #else | 156 | #else |
127 | # define __swab16(x) __fswab16(x) | 157 | # define __swab16(x) __fswab16(x) |