diff options
| author | Trent Piepho <xyzzy@speakeasy.org> | 2007-05-08 03:24:05 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-05-08 14:14:59 -0400 |
| commit | 8e2c20023f34b652605a5fb7c68bb843d2b100a8 (patch) | |
| tree | bd041c1762724dbbc91f4b2da3fc0716165784e5 /include/linux/byteorder | |
| parent | 02fb6149f7a64c62934c035e7635321cb9a8cf2e (diff) | |
Fix constant folding and poor optimization in byte swapping code
Constant folding does not work for the swabXX() byte swapping functions,
and the C versions optimize poorly.
Attempting to initialize a global variable to swab16(0x1234) or put
something like "case swab32(42):" in a switch statement will not compile.
It can work, swab.h just isn't doing it correctly. This patch fixes that.
Contrary to the comment in asm-i386/byteorder.h, gcc does not recognize the
"C" version of swab16 and turn it into efficient code. gcc can do this,
just not with the current code. The simple function:
u16 foo(u16 x) { return swab16(x); }
Would compile to:
movzwl %ax, %eax
movl %eax, %edx
shrl $8, %eax
sall $8, %edx
orl %eax, %edx
With this patch, it will compile to:
rolw $8, %ax
I also attempted to document the maze different macros/inline functions
that are used to create the final product.
Signed-off-by: Trent Piepho <xyzzy@speakeasy.org>
Cc: Francois-Rene Rideau <fare@tunes.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux/byteorder')
| -rw-r--r-- | include/linux/byteorder/swab.h | 108 |
1 files changed, 69 insertions, 39 deletions
diff --git a/include/linux/byteorder/swab.h b/include/linux/byteorder/swab.h index 25f7f32883e..142134ff164 100644 --- a/include/linux/byteorder/swab.h +++ b/include/linux/byteorder/swab.h | |||
| @@ -10,6 +10,10 @@ | |||
| 10 | * separated swab functions from cpu_to_XX, | 10 | * separated swab functions from cpu_to_XX, |
| 11 | * to clean up support for bizarre-endian architectures. | 11 | * to clean up support for bizarre-endian architectures. |
| 12 | * | 12 | * |
| 13 | * Trent Piepho <xyzzy@speakeasy.org> 2007114 | ||
| 14 | * make constant-folding work, provide C versions that | ||
| 15 | * gcc can optimize better, explain different versions | ||
| 16 | * | ||
| 13 | * See asm-i386/byteorder.h and suches for examples of how to provide | 17 | * See asm-i386/byteorder.h and suches for examples of how to provide |
| 14 | * architecture-dependent optimized versions | 18 | * architecture-dependent optimized versions |
| 15 | * | 19 | * |
| @@ -17,40 +21,66 @@ | |||
| 17 | 21 | ||
| 18 | #include <linux/compiler.h> | 22 | #include <linux/compiler.h> |
| 19 | 23 | ||
| 24 | /* Functions/macros defined, there are a lot: | ||
| 25 | * | ||
| 26 | * ___swabXX | ||
| 27 | * Generic C versions of the swab functions. | ||
| 28 | * | ||
| 29 | * ___constant_swabXX | ||
| 30 | * C versions that gcc can fold into a compile-time constant when | ||
| 31 | * the argument is a compile-time constant. | ||
| 32 | * | ||
| 33 | * __arch__swabXX[sp]? | ||
| 34 | * Architecture optimized versions of all the swab functions | ||
| 35 | * (including the s and p versions). These can be defined in | ||
| 36 | * asm-arch/byteorder.h. Any which are not, are defined here. | ||
| 37 | * __arch__swabXXs() is defined in terms of __arch__swabXXp(), which | ||
| 38 | * is defined in terms of __arch__swabXX(), which is in turn defined | ||
| 39 | * in terms of ___swabXX(x). | ||
| 40 | * These must be macros. They may be unsafe for arguments with | ||
| 41 | * side-effects. | ||
| 42 | * | ||
| 43 | * __fswabXX | ||
| 44 | * Inline function versions of the __arch__ macros. These _are_ safe | ||
| 45 | * if the arguments have side-effects. Note there are no s and p | ||
| 46 | * versions of these. | ||
| 47 | * | ||
| 48 | * __swabXX[sb] | ||
| 49 | * There are the ones you should actually use. The __swabXX versions | ||
| 50 | * will be a constant given a constant argument and use the arch | ||
| 51 | * specific code (if any) for non-constant arguments. The s and p | ||
| 52 | * versions always use the arch specific code (constant folding | ||
| 53 | * doesn't apply). They are safe to use with arguments with | ||
| 54 | * side-effects. | ||
| 55 | * | ||
| 56 | * swabXX[sb] | ||
| 57 | * Nicknames for __swabXX[sb] to use in the kernel. | ||
| 58 | */ | ||
| 59 | |||
| 20 | /* casts are necessary for constants, because we never know how for sure | 60 | /* casts are necessary for constants, because we never know how for sure |
| 21 | * how U/UL/ULL map to __u16, __u32, __u64. At least not in a portable way. | 61 | * how U/UL/ULL map to __u16, __u32, __u64. At least not in a portable way. |
| 22 | */ | 62 | */ |
| 23 | #define ___swab16(x) \ | ||
| 24 | ({ \ | ||
| 25 | __u16 __x = (x); \ | ||
| 26 | ((__u16)( \ | ||
| 27 | (((__u16)(__x) & (__u16)0x00ffU) << 8) | \ | ||
| 28 | (((__u16)(__x) & (__u16)0xff00U) >> 8) )); \ | ||
| 29 | }) | ||
| 30 | 63 | ||
| 31 | #define ___swab32(x) \ | 64 | static __inline__ __attribute_const__ __u16 ___swab16(__u16 x) |
| 32 | ({ \ | 65 | { |
| 33 | __u32 __x = (x); \ | 66 | return x<<8 | x>>8; |
| 34 | ((__u32)( \ | 67 | } |
| 35 | (((__u32)(__x) & (__u32)0x000000ffUL) << 24) | \ | 68 | static __inline__ __attribute_const__ __u32 ___swab32(__u32 x) |
| 36 | (((__u32)(__x) & (__u32)0x0000ff00UL) << 8) | \ | 69 | { |
| 37 | (((__u32)(__x) & (__u32)0x00ff0000UL) >> 8) | \ | 70 | return x<<24 | x>>24 | |
| 38 | (((__u32)(__x) & (__u32)0xff000000UL) >> 24) )); \ | 71 | (x & (__u32)0x0000ff00UL)<<8 | |
| 39 | }) | 72 | (x & (__u32)0x00ff0000UL)>>8; |
| 40 | 73 | } | |
| 41 | #define ___swab64(x) \ | 74 | static __inline__ __attribute_const__ __u64 ___swab64(__u64 x) |
| 42 | ({ \ | 75 | { |
| 43 | __u64 __x = (x); \ | 76 | return x<<56 | x>>56 | |
| 44 | ((__u64)( \ | 77 | (x & (__u64)0x000000000000ff00ULL)<<40 | |
| 45 | (__u64)(((__u64)(__x) & (__u64)0x00000000000000ffULL) << 56) | \ | 78 | (x & (__u64)0x0000000000ff0000ULL)<<24 | |
| 46 | (__u64)(((__u64)(__x) & (__u64)0x000000000000ff00ULL) << 40) | \ | 79 | (x & (__u64)0x00000000ff000000ULL)<< 8 | |
| 47 | (__u64)(((__u64)(__x) & (__u64)0x0000000000ff0000ULL) << 24) | \ | 80 | (x & (__u64)0x000000ff00000000ULL)>> 8 | |
| 48 | (__u64)(((__u64)(__x) & (__u64)0x00000000ff000000ULL) << 8) | \ | 81 | (x & (__u64)0x0000ff0000000000ULL)>>24 | |
| 49 | (__u64)(((__u64)(__x) & (__u64)0x000000ff00000000ULL) >> 8) | \ | 82 | (x & (__u64)0x00ff000000000000ULL)>>40; |
| 50 | (__u64)(((__u64)(__x) & (__u64)0x0000ff0000000000ULL) >> 24) | \ | 83 | } |
| 51 | (__u64)(((__u64)(__x) & (__u64)0x00ff000000000000ULL) >> 40) | \ | ||
| 52 | (__u64)(((__u64)(__x) & (__u64)0xff00000000000000ULL) >> 56) )); \ | ||
| 53 | }) | ||
| 54 | 84 | ||
| 55 | #define ___constant_swab16(x) \ | 85 | #define ___constant_swab16(x) \ |
| 56 | ((__u16)( \ | 86 | ((__u16)( \ |
| @@ -77,13 +107,13 @@ | |||
| 77 | * provide defaults when no architecture-specific optimization is detected | 107 | * provide defaults when no architecture-specific optimization is detected |
| 78 | */ | 108 | */ |
| 79 | #ifndef __arch__swab16 | 109 | #ifndef __arch__swab16 |
| 80 | # define __arch__swab16(x) ({ __u16 __tmp = (x) ; ___swab16(__tmp); }) | 110 | # define __arch__swab16(x) ___swab16(x) |
| 81 | #endif | 111 | #endif |
| 82 | #ifndef __arch__swab32 | 112 | #ifndef __arch__swab32 |
| 83 | # define __arch__swab32(x) ({ __u32 __tmp = (x) ; ___swab32(__tmp); }) | 113 | # define __arch__swab32(x) ___swab32(x) |
| 84 | #endif | 114 | #endif |
| 85 | #ifndef __arch__swab64 | 115 | #ifndef __arch__swab64 |
| 86 | # define __arch__swab64(x) ({ __u64 __tmp = (x) ; ___swab64(__tmp); }) | 116 | # define __arch__swab64(x) ___swab64(x) |
| 87 | #endif | 117 | #endif |
| 88 | 118 | ||
| 89 | #ifndef __arch__swab16p | 119 | #ifndef __arch__swab16p |
| @@ -97,13 +127,13 @@ | |||
| 97 | #endif | 127 | #endif |
| 98 | 128 | ||
| 99 | #ifndef __arch__swab16s | 129 | #ifndef __arch__swab16s |
| 100 | # define __arch__swab16s(x) do { *(x) = __arch__swab16p((x)); } while (0) | 130 | # define __arch__swab16s(x) ((void)(*(x) = __arch__swab16p(x))) |
| 101 | #endif | 131 | #endif |
| 102 | #ifndef __arch__swab32s | 132 | #ifndef __arch__swab32s |
| 103 | # define __arch__swab32s(x) do { *(x) = __arch__swab32p((x)); } while (0) | 133 | # define __arch__swab32s(x) ((void)(*(x) = __arch__swab32p(x))) |
| 104 | #endif | 134 | #endif |
| 105 | #ifndef __arch__swab64s | 135 | #ifndef __arch__swab64s |
| 106 | # define __arch__swab64s(x) do { *(x) = __arch__swab64p((x)); } while (0) | 136 | # define __arch__swab64s(x) ((void)(*(x) = __arch__swab64p(x))) |
| 107 | #endif | 137 | #endif |
| 108 | 138 | ||
| 109 | 139 | ||
| @@ -113,15 +143,15 @@ | |||
| 113 | #if defined(__GNUC__) && defined(__OPTIMIZE__) | 143 | #if defined(__GNUC__) && defined(__OPTIMIZE__) |
| 114 | # define __swab16(x) \ | 144 | # define __swab16(x) \ |
| 115 | (__builtin_constant_p((__u16)(x)) ? \ | 145 | (__builtin_constant_p((__u16)(x)) ? \ |
| 116 | ___swab16((x)) : \ | 146 | ___constant_swab16((x)) : \ |
| 117 | __fswab16((x))) | 147 | __fswab16((x))) |
| 118 | # define __swab32(x) \ | 148 | # define __swab32(x) \ |
| 119 | (__builtin_constant_p((__u32)(x)) ? \ | 149 | (__builtin_constant_p((__u32)(x)) ? \ |
| 120 | ___swab32((x)) : \ | 150 | ___constant_swab32((x)) : \ |
| 121 | __fswab32((x))) | 151 | __fswab32((x))) |
| 122 | # define __swab64(x) \ | 152 | # define __swab64(x) \ |
| 123 | (__builtin_constant_p((__u64)(x)) ? \ | 153 | (__builtin_constant_p((__u64)(x)) ? \ |
| 124 | ___swab64((x)) : \ | 154 | ___constant_swab64((x)) : \ |
| 125 | __fswab64((x))) | 155 | __fswab64((x))) |
| 126 | #else | 156 | #else |
| 127 | # define __swab16(x) __fswab16(x) | 157 | # define __swab16(x) __fswab16(x) |
