aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndi Kleen <andi@firstfloor.org>2009-01-12 17:01:15 -0500
committerIngo Molnar <mingo@elte.hu>2009-01-13 12:56:30 -0500
commitc8399943bdb70fef78798b97f975506ecc99e039 (patch)
tree3d7cdf853bdfc012b9ea8513ab775238b94d6f75
parent4a922a969cb0190ce4580d4b064e2ac35f3ac9bf (diff)
x86, generic: mark complex bitops.h inlines as __always_inline
Impact: reduce kernel image size Hugh Dickins noticed that older gcc versions when the kernel is built for code size didn't inline some of the bitops. Mark all complex x86 bitops that have more than a single asm statement or two as always inline to avoid this problem. Probably should be done for other architectures too. Ingo then found a better fix that only requires a single line change, but it unfortunately only works on gcc 4.3. On older gccs the original patch still makes a ~0.3% defconfig difference with CONFIG_OPTIMIZE_INLINING=y. With gcc 4.1 and a defconfig like build: 6116998 1138540 883788 8139326 7c323e vmlinux-oi-with-patch 6137043 1138540 883788 8159371 7c808b vmlinux-optimize-inlining ~20k / 0.3% difference. Signed-off-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/include/asm/bitops.h14
-rw-r--r--include/asm-generic/bitops/__ffs.h2
-rw-r--r--include/asm-generic/bitops/__fls.h2
-rw-r--r--include/asm-generic/bitops/fls.h2
-rw-r--r--include/asm-generic/bitops/fls64.h4
5 files changed, 15 insertions, 9 deletions
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index e02a359d2aa5..02b47a603fc8 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -3,6 +3,9 @@
3 3
4/* 4/*
5 * Copyright 1992, Linus Torvalds. 5 * Copyright 1992, Linus Torvalds.
6 *
7 * Note: inlines with more than a single statement should be marked
8 * __always_inline to avoid problems with older gcc's inlining heuristics.
6 */ 9 */
7 10
8#ifndef _LINUX_BITOPS_H 11#ifndef _LINUX_BITOPS_H
@@ -53,7 +56,8 @@
53 * Note that @nr may be almost arbitrarily large; this function is not 56 * Note that @nr may be almost arbitrarily large; this function is not
54 * restricted to acting on a single-word quantity. 57 * restricted to acting on a single-word quantity.
55 */ 58 */
56static inline void set_bit(unsigned int nr, volatile unsigned long *addr) 59static __always_inline void
60set_bit(unsigned int nr, volatile unsigned long *addr)
57{ 61{
58 if (IS_IMMEDIATE(nr)) { 62 if (IS_IMMEDIATE(nr)) {
59 asm volatile(LOCK_PREFIX "orb %1,%0" 63 asm volatile(LOCK_PREFIX "orb %1,%0"
@@ -90,7 +94,8 @@ static inline void __set_bit(int nr, volatile unsigned long *addr)
90 * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() 94 * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
91 * in order to ensure changes are visible on other processors. 95 * in order to ensure changes are visible on other processors.
92 */ 96 */
93static inline void clear_bit(int nr, volatile unsigned long *addr) 97static __always_inline void
98clear_bit(int nr, volatile unsigned long *addr)
94{ 99{
95 if (IS_IMMEDIATE(nr)) { 100 if (IS_IMMEDIATE(nr)) {
96 asm volatile(LOCK_PREFIX "andb %1,%0" 101 asm volatile(LOCK_PREFIX "andb %1,%0"
@@ -204,7 +209,8 @@ static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
204 * 209 *
205 * This is the same as test_and_set_bit on x86. 210 * This is the same as test_and_set_bit on x86.
206 */ 211 */
207static inline int test_and_set_bit_lock(int nr, volatile unsigned long *addr) 212static __always_inline int
213test_and_set_bit_lock(int nr, volatile unsigned long *addr)
208{ 214{
209 return test_and_set_bit(nr, addr); 215 return test_and_set_bit(nr, addr);
210} 216}
@@ -300,7 +306,7 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
300 return oldbit; 306 return oldbit;
301} 307}
302 308
303static inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr) 309static __always_inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr)
304{ 310{
305 return ((1UL << (nr % BITS_PER_LONG)) & 311 return ((1UL << (nr % BITS_PER_LONG)) &
306 (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0; 312 (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0;
diff --git a/include/asm-generic/bitops/__ffs.h b/include/asm-generic/bitops/__ffs.h
index 9a3274aecf83..937d7c435575 100644
--- a/include/asm-generic/bitops/__ffs.h
+++ b/include/asm-generic/bitops/__ffs.h
@@ -9,7 +9,7 @@
9 * 9 *
10 * Undefined if no bit exists, so code should check against 0 first. 10 * Undefined if no bit exists, so code should check against 0 first.
11 */ 11 */
12static inline unsigned long __ffs(unsigned long word) 12static __always_inline unsigned long __ffs(unsigned long word)
13{ 13{
14 int num = 0; 14 int num = 0;
15 15
diff --git a/include/asm-generic/bitops/__fls.h b/include/asm-generic/bitops/__fls.h
index be24465403d6..a60a7ccb6782 100644
--- a/include/asm-generic/bitops/__fls.h
+++ b/include/asm-generic/bitops/__fls.h
@@ -9,7 +9,7 @@
9 * 9 *
10 * Undefined if no set bit exists, so code should check against 0 first. 10 * Undefined if no set bit exists, so code should check against 0 first.
11 */ 11 */
12static inline unsigned long __fls(unsigned long word) 12static __always_inline unsigned long __fls(unsigned long word)
13{ 13{
14 int num = BITS_PER_LONG - 1; 14 int num = BITS_PER_LONG - 1;
15 15
diff --git a/include/asm-generic/bitops/fls.h b/include/asm-generic/bitops/fls.h
index 850859bc5069..0576d1f42f43 100644
--- a/include/asm-generic/bitops/fls.h
+++ b/include/asm-generic/bitops/fls.h
@@ -9,7 +9,7 @@
9 * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. 9 * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
10 */ 10 */
11 11
12static inline int fls(int x) 12static __always_inline int fls(int x)
13{ 13{
14 int r = 32; 14 int r = 32;
15 15
diff --git a/include/asm-generic/bitops/fls64.h b/include/asm-generic/bitops/fls64.h
index 86d403f8b256..b097cf8444e3 100644
--- a/include/asm-generic/bitops/fls64.h
+++ b/include/asm-generic/bitops/fls64.h
@@ -15,7 +15,7 @@
15 * at position 64. 15 * at position 64.
16 */ 16 */
17#if BITS_PER_LONG == 32 17#if BITS_PER_LONG == 32
18static inline int fls64(__u64 x) 18static __always_inline int fls64(__u64 x)
19{ 19{
20 __u32 h = x >> 32; 20 __u32 h = x >> 32;
21 if (h) 21 if (h)
@@ -23,7 +23,7 @@ static inline int fls64(__u64 x)
23 return fls(x); 23 return fls(x);
24} 24}
25#elif BITS_PER_LONG == 64 25#elif BITS_PER_LONG == 64
26static inline int fls64(__u64 x) 26static __always_inline int fls64(__u64 x)
27{ 27{
28 if (x == 0) 28 if (x == 0)
29 return 0; 29 return 0;