Skip to content

Commit 8e2c200

Browse files
Trent PiephoLinus Torvalds
authored andcommitted
Fix constant folding and poor optimization in byte swapping code
Constant folding does not work for the swabXX() byte swapping functions, and the C versions optimize poorly. Attempting to initialize a global variable to swab16(0x1234) or put something like "case swab32(42):" in a switch statement will not compile. It can work, swab.h just isn't doing it correctly. This patch fixes that. Contrary to the comment in asm-i386/byteorder.h, gcc does not recognize the "C" version of swab16 and turn it into efficient code. gcc can do this, just not with the current code. The simple function: u16 foo(u16 x) { return swab16(x); } Would compile to: movzwl %ax, %eax movl %eax, %edx shrl $8, %eax sall $8, %edx orl %eax, %edx With this patch, it will compile to: rolw $8, %ax I also attempted to document the maze different macros/inline functions that are used to create the final product. Signed-off-by: Trent Piepho <xyzzy@speakeasy.org> Cc: Francois-Rene Rideau <fare@tunes.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 02fb614 commit 8e2c200

File tree

1 file changed

+69
-39
lines changed

1 file changed

+69
-39
lines changed

include/linux/byteorder/swab.h

Lines changed: 69 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -10,47 +10,77 @@
1010
* separated swab functions from cpu_to_XX,
1111
* to clean up support for bizarre-endian architectures.
1212
*
13+
* Trent Piepho <xyzzy@speakeasy.org> 2007114
14+
* make constant-folding work, provide C versions that
15+
* gcc can optimize better, explain different versions
16+
*
1317
* See asm-i386/byteorder.h and suches for examples of how to provide
1418
* architecture-dependent optimized versions
1519
*
1620
*/
1721

1822
#include <linux/compiler.h>
1923

24+
/* Functions/macros defined, there are a lot:
25+
*
26+
* ___swabXX
27+
* Generic C versions of the swab functions.
28+
*
29+
* ___constant_swabXX
30+
* C versions that gcc can fold into a compile-time constant when
31+
* the argument is a compile-time constant.
32+
*
33+
* __arch__swabXX[sp]?
34+
* Architecture optimized versions of all the swab functions
35+
* (including the s and p versions). These can be defined in
36+
* asm-arch/byteorder.h. Any which are not, are defined here.
37+
* __arch__swabXXs() is defined in terms of __arch__swabXXp(), which
38+
* is defined in terms of __arch__swabXX(), which is in turn defined
39+
* in terms of ___swabXX(x).
40+
* These must be macros. They may be unsafe for arguments with
41+
* side-effects.
42+
*
43+
* __fswabXX
44+
* Inline function versions of the __arch__ macros. These _are_ safe
45+
* if the arguments have side-effects. Note there are no s and p
46+
* versions of these.
47+
*
48+
* __swabXX[sb]
49+
* There are the ones you should actually use. The __swabXX versions
50+
* will be a constant given a constant argument and use the arch
51+
* specific code (if any) for non-constant arguments. The s and p
52+
* versions always use the arch specific code (constant folding
53+
* doesn't apply). They are safe to use with arguments with
54+
* side-effects.
55+
*
56+
* swabXX[sb]
57+
* Nicknames for __swabXX[sb] to use in the kernel.
58+
*/
59+
2060
/* casts are necessary for constants, because we never know how for sure
2161
* how U/UL/ULL map to __u16, __u32, __u64. At least not in a portable way.
2262
*/
23-
#define ___swab16(x) \
24-
({ \
25-
__u16 __x = (x); \
26-
((__u16)( \
27-
(((__u16)(__x) & (__u16)0x00ffU) << 8) | \
28-
(((__u16)(__x) & (__u16)0xff00U) >> 8) )); \
29-
})
3063

31-
#define ___swab32(x) \
32-
({ \
33-
__u32 __x = (x); \
34-
((__u32)( \
35-
(((__u32)(__x) & (__u32)0x000000ffUL) << 24) | \
36-
(((__u32)(__x) & (__u32)0x0000ff00UL) << 8) | \
37-
(((__u32)(__x) & (__u32)0x00ff0000UL) >> 8) | \
38-
(((__u32)(__x) & (__u32)0xff000000UL) >> 24) )); \
39-
})
40-
41-
#define ___swab64(x) \
42-
({ \
43-
__u64 __x = (x); \
44-
((__u64)( \
45-
(__u64)(((__u64)(__x) & (__u64)0x00000000000000ffULL) << 56) | \
46-
(__u64)(((__u64)(__x) & (__u64)0x000000000000ff00ULL) << 40) | \
47-
(__u64)(((__u64)(__x) & (__u64)0x0000000000ff0000ULL) << 24) | \
48-
(__u64)(((__u64)(__x) & (__u64)0x00000000ff000000ULL) << 8) | \
49-
(__u64)(((__u64)(__x) & (__u64)0x000000ff00000000ULL) >> 8) | \
50-
(__u64)(((__u64)(__x) & (__u64)0x0000ff0000000000ULL) >> 24) | \
51-
(__u64)(((__u64)(__x) & (__u64)0x00ff000000000000ULL) >> 40) | \
52-
(__u64)(((__u64)(__x) & (__u64)0xff00000000000000ULL) >> 56) )); \
53-
})
64+
static __inline__ __attribute_const__ __u16 ___swab16(__u16 x)
65+
{
66+
return x<<8 | x>>8;
67+
}
68+
static __inline__ __attribute_const__ __u32 ___swab32(__u32 x)
69+
{
70+
return x<<24 | x>>24 |
71+
(x & (__u32)0x0000ff00UL)<<8 |
72+
(x & (__u32)0x00ff0000UL)>>8;
73+
}
74+
static __inline__ __attribute_const__ __u64 ___swab64(__u64 x)
75+
{
76+
return x<<56 | x>>56 |
77+
(x & (__u64)0x000000000000ff00ULL)<<40 |
78+
(x & (__u64)0x0000000000ff0000ULL)<<24 |
79+
(x & (__u64)0x00000000ff000000ULL)<< 8 |
80+
(x & (__u64)0x000000ff00000000ULL)>> 8 |
81+
(x & (__u64)0x0000ff0000000000ULL)>>24 |
82+
(x & (__u64)0x00ff000000000000ULL)>>40;
83+
}
5484

5585
#define ___constant_swab16(x) \
5686
((__u16)( \
@@ -77,13 +107,13 @@
77107
* provide defaults when no architecture-specific optimization is detected
78108
*/
79109
#ifndef __arch__swab16
80-
# define __arch__swab16(x) ({ __u16 __tmp = (x) ; ___swab16(__tmp); })
110+
# define __arch__swab16(x) ___swab16(x)
81111
#endif
82112
#ifndef __arch__swab32
83-
# define __arch__swab32(x) ({ __u32 __tmp = (x) ; ___swab32(__tmp); })
113+
# define __arch__swab32(x) ___swab32(x)
84114
#endif
85115
#ifndef __arch__swab64
86-
# define __arch__swab64(x) ({ __u64 __tmp = (x) ; ___swab64(__tmp); })
116+
# define __arch__swab64(x) ___swab64(x)
87117
#endif
88118

89119
#ifndef __arch__swab16p
@@ -97,13 +127,13 @@
97127
#endif
98128

99129
#ifndef __arch__swab16s
100-
# define __arch__swab16s(x) do { *(x) = __arch__swab16p((x)); } while (0)
130+
# define __arch__swab16s(x) ((void)(*(x) = __arch__swab16p(x)))
101131
#endif
102132
#ifndef __arch__swab32s
103-
# define __arch__swab32s(x) do { *(x) = __arch__swab32p((x)); } while (0)
133+
# define __arch__swab32s(x) ((void)(*(x) = __arch__swab32p(x)))
104134
#endif
105135
#ifndef __arch__swab64s
106-
# define __arch__swab64s(x) do { *(x) = __arch__swab64p((x)); } while (0)
136+
# define __arch__swab64s(x) ((void)(*(x) = __arch__swab64p(x)))
107137
#endif
108138

109139

@@ -113,15 +143,15 @@
113143
#if defined(__GNUC__) && defined(__OPTIMIZE__)
114144
# define __swab16(x) \
115145
(__builtin_constant_p((__u16)(x)) ? \
116-
___swab16((x)) : \
146+
___constant_swab16((x)) : \
117147
__fswab16((x)))
118148
# define __swab32(x) \
119149
(__builtin_constant_p((__u32)(x)) ? \
120-
___swab32((x)) : \
150+
___constant_swab32((x)) : \
121151
__fswab32((x)))
122152
# define __swab64(x) \
123153
(__builtin_constant_p((__u64)(x)) ? \
124-
___swab64((x)) : \
154+
___constant_swab64((x)) : \
125155
__fswab64((x)))
126156
#else
127157
# define __swab16(x) __fswab16(x)

0 commit comments

Comments
 (0)