aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTrent Piepho <xyzzy@speakeasy.org>2007-05-08 03:24:05 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-05-08 14:14:59 -0400
commit8e2c20023f34b652605a5fb7c68bb843d2b100a8 (patch)
treebd041c1762724dbbc91f4b2da3fc0716165784e5
parent02fb6149f7a64c62934c035e7635321cb9a8cf2e (diff)
Fix constant folding and poor optimization in byte swapping code
Constant folding does not work for the swabXX() byte swapping functions, and the C versions optimize poorly. Attempting to initialize a global variable to swab16(0x1234) or put something like "case swab32(42):" in a switch statement will not compile. It can work, swab.h just isn't doing it correctly. This patch fixes that. Contrary to the comment in asm-i386/byteorder.h, gcc does not recognize the "C" version of swab16 and turn it into efficient code. gcc can do this, just not with the current code. The simple function: u16 foo(u16 x) { return swab16(x); } Would compile to: movzwl %ax, %eax movl %eax, %edx shrl $8, %eax sall $8, %edx orl %eax, %edx With this patch, it will compile to: rolw $8, %ax I also attempted to document the maze different macros/inline functions that are used to create the final product. Signed-off-by: Trent Piepho <xyzzy@speakeasy.org> Cc: Francois-Rene Rideau <fare@tunes.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/byteorder/swab.h108
1 files changed, 69 insertions, 39 deletions
diff --git a/include/linux/byteorder/swab.h b/include/linux/byteorder/swab.h
index 25f7f32883ec..142134ff1645 100644
--- a/include/linux/byteorder/swab.h
+++ b/include/linux/byteorder/swab.h
@@ -10,6 +10,10 @@
10 * separated swab functions from cpu_to_XX, 10 * separated swab functions from cpu_to_XX,
11 * to clean up support for bizarre-endian architectures. 11 * to clean up support for bizarre-endian architectures.
12 * 12 *
13 * Trent Piepho <xyzzy@speakeasy.org> 2007114
14 * make constant-folding work, provide C versions that
15 * gcc can optimize better, explain different versions
16 *
13 * See asm-i386/byteorder.h and suches for examples of how to provide 17 * See asm-i386/byteorder.h and suches for examples of how to provide
14 * architecture-dependent optimized versions 18 * architecture-dependent optimized versions
15 * 19 *
@@ -17,40 +21,66 @@
17 21
18#include <linux/compiler.h> 22#include <linux/compiler.h>
19 23
24/* Functions/macros defined, there are a lot:
25 *
26 * ___swabXX
27 * Generic C versions of the swab functions.
28 *
29 * ___constant_swabXX
30 * C versions that gcc can fold into a compile-time constant when
31 * the argument is a compile-time constant.
32 *
33 * __arch__swabXX[sp]?
34 * Architecture optimized versions of all the swab functions
35 * (including the s and p versions). These can be defined in
36 * asm-arch/byteorder.h. Any which are not, are defined here.
37 * __arch__swabXXs() is defined in terms of __arch__swabXXp(), which
38 * is defined in terms of __arch__swabXX(), which is in turn defined
39 * in terms of ___swabXX(x).
40 * These must be macros. They may be unsafe for arguments with
41 * side-effects.
42 *
43 * __fswabXX
44 * Inline function versions of the __arch__ macros. These _are_ safe
45 * if the arguments have side-effects. Note there are no s and p
46 * versions of these.
47 *
48 * __swabXX[sb]
49 * There are the ones you should actually use. The __swabXX versions
50 * will be a constant given a constant argument and use the arch
51 * specific code (if any) for non-constant arguments. The s and p
52 * versions always use the arch specific code (constant folding
53 * doesn't apply). They are safe to use with arguments with
54 * side-effects.
55 *
56 * swabXX[sb]
57 * Nicknames for __swabXX[sb] to use in the kernel.
58 */
59
20/* casts are necessary for constants, because we never know how for sure 60/* casts are necessary for constants, because we never know how for sure
21 * how U/UL/ULL map to __u16, __u32, __u64. At least not in a portable way. 61 * how U/UL/ULL map to __u16, __u32, __u64. At least not in a portable way.
22 */ 62 */
23#define ___swab16(x) \
24({ \
25 __u16 __x = (x); \
26 ((__u16)( \
27 (((__u16)(__x) & (__u16)0x00ffU) << 8) | \
28 (((__u16)(__x) & (__u16)0xff00U) >> 8) )); \
29})
30 63
31#define ___swab32(x) \ 64static __inline__ __attribute_const__ __u16 ___swab16(__u16 x)
32({ \ 65{
33 __u32 __x = (x); \ 66 return x<<8 | x>>8;
34 ((__u32)( \ 67}
35 (((__u32)(__x) & (__u32)0x000000ffUL) << 24) | \ 68static __inline__ __attribute_const__ __u32 ___swab32(__u32 x)
36 (((__u32)(__x) & (__u32)0x0000ff00UL) << 8) | \ 69{
37 (((__u32)(__x) & (__u32)0x00ff0000UL) >> 8) | \ 70 return x<<24 | x>>24 |
38 (((__u32)(__x) & (__u32)0xff000000UL) >> 24) )); \ 71 (x & (__u32)0x0000ff00UL)<<8 |
39}) 72 (x & (__u32)0x00ff0000UL)>>8;
40 73}
41#define ___swab64(x) \ 74static __inline__ __attribute_const__ __u64 ___swab64(__u64 x)
42({ \ 75{
43 __u64 __x = (x); \ 76 return x<<56 | x>>56 |
44 ((__u64)( \ 77 (x & (__u64)0x000000000000ff00ULL)<<40 |
45 (__u64)(((__u64)(__x) & (__u64)0x00000000000000ffULL) << 56) | \ 78 (x & (__u64)0x0000000000ff0000ULL)<<24 |
46 (__u64)(((__u64)(__x) & (__u64)0x000000000000ff00ULL) << 40) | \ 79 (x & (__u64)0x00000000ff000000ULL)<< 8 |
47 (__u64)(((__u64)(__x) & (__u64)0x0000000000ff0000ULL) << 24) | \ 80 (x & (__u64)0x000000ff00000000ULL)>> 8 |
48 (__u64)(((__u64)(__x) & (__u64)0x00000000ff000000ULL) << 8) | \ 81 (x & (__u64)0x0000ff0000000000ULL)>>24 |
49 (__u64)(((__u64)(__x) & (__u64)0x000000ff00000000ULL) >> 8) | \ 82 (x & (__u64)0x00ff000000000000ULL)>>40;
50 (__u64)(((__u64)(__x) & (__u64)0x0000ff0000000000ULL) >> 24) | \ 83}
51 (__u64)(((__u64)(__x) & (__u64)0x00ff000000000000ULL) >> 40) | \
52 (__u64)(((__u64)(__x) & (__u64)0xff00000000000000ULL) >> 56) )); \
53})
54 84
55#define ___constant_swab16(x) \ 85#define ___constant_swab16(x) \
56 ((__u16)( \ 86 ((__u16)( \
@@ -77,13 +107,13 @@
77 * provide defaults when no architecture-specific optimization is detected 107 * provide defaults when no architecture-specific optimization is detected
78 */ 108 */
79#ifndef __arch__swab16 109#ifndef __arch__swab16
80# define __arch__swab16(x) ({ __u16 __tmp = (x) ; ___swab16(__tmp); }) 110# define __arch__swab16(x) ___swab16(x)
81#endif 111#endif
82#ifndef __arch__swab32 112#ifndef __arch__swab32
83# define __arch__swab32(x) ({ __u32 __tmp = (x) ; ___swab32(__tmp); }) 113# define __arch__swab32(x) ___swab32(x)
84#endif 114#endif
85#ifndef __arch__swab64 115#ifndef __arch__swab64
86# define __arch__swab64(x) ({ __u64 __tmp = (x) ; ___swab64(__tmp); }) 116# define __arch__swab64(x) ___swab64(x)
87#endif 117#endif
88 118
89#ifndef __arch__swab16p 119#ifndef __arch__swab16p
@@ -97,13 +127,13 @@
97#endif 127#endif
98 128
99#ifndef __arch__swab16s 129#ifndef __arch__swab16s
100# define __arch__swab16s(x) do { *(x) = __arch__swab16p((x)); } while (0) 130# define __arch__swab16s(x) ((void)(*(x) = __arch__swab16p(x)))
101#endif 131#endif
102#ifndef __arch__swab32s 132#ifndef __arch__swab32s
103# define __arch__swab32s(x) do { *(x) = __arch__swab32p((x)); } while (0) 133# define __arch__swab32s(x) ((void)(*(x) = __arch__swab32p(x)))
104#endif 134#endif
105#ifndef __arch__swab64s 135#ifndef __arch__swab64s
106# define __arch__swab64s(x) do { *(x) = __arch__swab64p((x)); } while (0) 136# define __arch__swab64s(x) ((void)(*(x) = __arch__swab64p(x)))
107#endif 137#endif
108 138
109 139
@@ -113,15 +143,15 @@
113#if defined(__GNUC__) && defined(__OPTIMIZE__) 143#if defined(__GNUC__) && defined(__OPTIMIZE__)
114# define __swab16(x) \ 144# define __swab16(x) \
115(__builtin_constant_p((__u16)(x)) ? \ 145(__builtin_constant_p((__u16)(x)) ? \
116 ___swab16((x)) : \ 146 ___constant_swab16((x)) : \
117 __fswab16((x))) 147 __fswab16((x)))
118# define __swab32(x) \ 148# define __swab32(x) \
119(__builtin_constant_p((__u32)(x)) ? \ 149(__builtin_constant_p((__u32)(x)) ? \
120 ___swab32((x)) : \ 150 ___constant_swab32((x)) : \
121 __fswab32((x))) 151 __fswab32((x)))
122# define __swab64(x) \ 152# define __swab64(x) \
123(__builtin_constant_p((__u64)(x)) ? \ 153(__builtin_constant_p((__u64)(x)) ? \
124 ___swab64((x)) : \ 154 ___constant_swab64((x)) : \
125 __fswab64((x))) 155 __fswab64((x)))
126#else 156#else
127# define __swab16(x) __fswab16(x) 157# define __swab16(x) __fswab16(x)