diff options
author | Gerd Hoffmann <kraxel@suse.de> | 2006-06-26 07:56:16 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-06-26 13:48:14 -0400 |
commit | d167a51877e94dda73dd656c51f363502309f713 (patch) | |
tree | eb02c2974b61777f575dfdc07d4c2adf83bde434 /include | |
parent | 240cd6a80642da528bfa382ec2ae4e3cb8991ea7 (diff) |
[PATCH] x86_64: x86_64 version of the smp alternative patch.
Changes are largely identical to the i386 version:
* alternative #define are moved to the new alternative.h file.
* one new elf section with pointers to the lock prefixes which can be
nop'ed out for non-smp.
* two new elf sections simliar to the "classic" alternatives to
replace SMP code with simpler UP code.
* fixup headers to use alternative.h instead of defining their own
LOCK / LOCK_PREFIX macros.
The patch reuses the i386 version of the alternatives code to avoid code
duplication. The code in alternatives.c was shuffled around a bit to
reduce the number of #ifdefs needed. It also got some tweaks needed for
x86_64 (vsyscall page handling) and new features (noreplacement option
which was x86_64 only up to now). Debug printk's are changed from
compile-time to runtime.
Loosely based on a early version from Bastian Blank <waldi@debian.org>
Signed-off-by: Gerd Hoffmann <kraxel@suse.de>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'include')
-rw-r--r-- | include/asm-i386/alternative.h | 2 | ||||
-rw-r--r-- | include/asm-x86_64/alternative.h | 146 | ||||
-rw-r--r-- | include/asm-x86_64/atomic.h | 42 | ||||
-rw-r--r-- | include/asm-x86_64/bitops.h | 7 | ||||
-rw-r--r-- | include/asm-x86_64/cpufeature.h | 2 | ||||
-rw-r--r-- | include/asm-x86_64/mutex.h | 4 | ||||
-rw-r--r-- | include/asm-x86_64/rwlock.h | 8 | ||||
-rw-r--r-- | include/asm-x86_64/semaphore.h | 8 | ||||
-rw-r--r-- | include/asm-x86_64/spinlock.h | 10 | ||||
-rw-r--r-- | include/asm-x86_64/system.h | 86 |
10 files changed, 192 insertions, 123 deletions
diff --git a/include/asm-i386/alternative.h b/include/asm-i386/alternative.h index d79e9ee10fd7..c61bd1a17f37 100644 --- a/include/asm-i386/alternative.h +++ b/include/asm-i386/alternative.h | |||
@@ -5,6 +5,8 @@ | |||
5 | 5 | ||
6 | #include <asm/types.h> | 6 | #include <asm/types.h> |
7 | 7 | ||
8 | #include <linux/types.h> | ||
9 | |||
8 | struct alt_instr { | 10 | struct alt_instr { |
9 | u8 *instr; /* original instruction */ | 11 | u8 *instr; /* original instruction */ |
10 | u8 *replacement; | 12 | u8 *replacement; |
diff --git a/include/asm-x86_64/alternative.h b/include/asm-x86_64/alternative.h new file mode 100644 index 000000000000..387c8f66af7d --- /dev/null +++ b/include/asm-x86_64/alternative.h | |||
@@ -0,0 +1,146 @@ | |||
1 | #ifndef _X86_64_ALTERNATIVE_H | ||
2 | #define _X86_64_ALTERNATIVE_H | ||
3 | |||
4 | #ifdef __KERNEL__ | ||
5 | |||
6 | #include <linux/types.h> | ||
7 | |||
8 | struct alt_instr { | ||
9 | u8 *instr; /* original instruction */ | ||
10 | u8 *replacement; | ||
11 | u8 cpuid; /* cpuid bit set for replacement */ | ||
12 | u8 instrlen; /* length of original instruction */ | ||
13 | u8 replacementlen; /* length of new instruction, <= instrlen */ | ||
14 | u8 pad[5]; | ||
15 | }; | ||
16 | |||
17 | extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); | ||
18 | |||
19 | struct module; | ||
20 | extern void alternatives_smp_module_add(struct module *mod, char *name, | ||
21 | void *locks, void *locks_end, | ||
22 | void *text, void *text_end); | ||
23 | extern void alternatives_smp_module_del(struct module *mod); | ||
24 | extern void alternatives_smp_switch(int smp); | ||
25 | |||
26 | #endif | ||
27 | |||
28 | /* | ||
29 | * Alternative instructions for different CPU types or capabilities. | ||
30 | * | ||
31 | * This allows to use optimized instructions even on generic binary | ||
32 | * kernels. | ||
33 | * | ||
34 | * length of oldinstr must be longer or equal the length of newinstr | ||
35 | * It can be padded with nops as needed. | ||
36 | * | ||
37 | * For non barrier like inlines please define new variants | ||
38 | * without volatile and memory clobber. | ||
39 | */ | ||
40 | #define alternative(oldinstr, newinstr, feature) \ | ||
41 | asm volatile ("661:\n\t" oldinstr "\n662:\n" \ | ||
42 | ".section .altinstructions,\"a\"\n" \ | ||
43 | " .align 8\n" \ | ||
44 | " .quad 661b\n" /* label */ \ | ||
45 | " .quad 663f\n" /* new instruction */ \ | ||
46 | " .byte %c0\n" /* feature bit */ \ | ||
47 | " .byte 662b-661b\n" /* sourcelen */ \ | ||
48 | " .byte 664f-663f\n" /* replacementlen */ \ | ||
49 | ".previous\n" \ | ||
50 | ".section .altinstr_replacement,\"ax\"\n" \ | ||
51 | "663:\n\t" newinstr "\n664:\n" /* replacement */ \ | ||
52 | ".previous" :: "i" (feature) : "memory") | ||
53 | |||
54 | /* | ||
55 | * Alternative inline assembly with input. | ||
56 | * | ||
57 | * Pecularities: | ||
58 | * No memory clobber here. | ||
59 | * Argument numbers start with 1. | ||
60 | * Best is to use constraints that are fixed size (like (%1) ... "r") | ||
61 | * If you use variable sized constraints like "m" or "g" in the | ||
62 | * replacement make sure to pad to the worst case length. | ||
63 | */ | ||
64 | #define alternative_input(oldinstr, newinstr, feature, input...) \ | ||
65 | asm volatile ("661:\n\t" oldinstr "\n662:\n" \ | ||
66 | ".section .altinstructions,\"a\"\n" \ | ||
67 | " .align 8\n" \ | ||
68 | " .quad 661b\n" /* label */ \ | ||
69 | " .quad 663f\n" /* new instruction */ \ | ||
70 | " .byte %c0\n" /* feature bit */ \ | ||
71 | " .byte 662b-661b\n" /* sourcelen */ \ | ||
72 | " .byte 664f-663f\n" /* replacementlen */ \ | ||
73 | ".previous\n" \ | ||
74 | ".section .altinstr_replacement,\"ax\"\n" \ | ||
75 | "663:\n\t" newinstr "\n664:\n" /* replacement */ \ | ||
76 | ".previous" :: "i" (feature), ##input) | ||
77 | |||
78 | /* Like alternative_input, but with a single output argument */ | ||
79 | #define alternative_io(oldinstr, newinstr, feature, output, input...) \ | ||
80 | asm volatile ("661:\n\t" oldinstr "\n662:\n" \ | ||
81 | ".section .altinstructions,\"a\"\n" \ | ||
82 | " .align 8\n" \ | ||
83 | " .quad 661b\n" /* label */ \ | ||
84 | " .quad 663f\n" /* new instruction */ \ | ||
85 | " .byte %c[feat]\n" /* feature bit */ \ | ||
86 | " .byte 662b-661b\n" /* sourcelen */ \ | ||
87 | " .byte 664f-663f\n" /* replacementlen */ \ | ||
88 | ".previous\n" \ | ||
89 | ".section .altinstr_replacement,\"ax\"\n" \ | ||
90 | "663:\n\t" newinstr "\n664:\n" /* replacement */ \ | ||
91 | ".previous" : output : [feat] "i" (feature), ##input) | ||
92 | |||
93 | /* | ||
94 | * Alternative inline assembly for SMP. | ||
95 | * | ||
96 | * alternative_smp() takes two versions (SMP first, UP second) and is | ||
97 | * for more complex stuff such as spinlocks. | ||
98 | * | ||
99 | * The LOCK_PREFIX macro defined here replaces the LOCK and | ||
100 | * LOCK_PREFIX macros used everywhere in the source tree. | ||
101 | * | ||
102 | * SMP alternatives use the same data structures as the other | ||
103 | * alternatives and the X86_FEATURE_UP flag to indicate the case of a | ||
104 | * UP system running a SMP kernel. The existing apply_alternatives() | ||
105 | * works fine for patching a SMP kernel for UP. | ||
106 | * | ||
107 | * The SMP alternative tables can be kept after boot and contain both | ||
108 | * UP and SMP versions of the instructions to allow switching back to | ||
109 | * SMP at runtime, when hotplugging in a new CPU, which is especially | ||
110 | * useful in virtualized environments. | ||
111 | * | ||
112 | * The very common lock prefix is handled as special case in a | ||
113 | * separate table which is a pure address list without replacement ptr | ||
114 | * and size information. That keeps the table sizes small. | ||
115 | */ | ||
116 | |||
117 | #ifdef CONFIG_SMP | ||
118 | #define alternative_smp(smpinstr, upinstr, args...) \ | ||
119 | asm volatile ("661:\n\t" smpinstr "\n662:\n" \ | ||
120 | ".section .smp_altinstructions,\"a\"\n" \ | ||
121 | " .align 8\n" \ | ||
122 | " .quad 661b\n" /* label */ \ | ||
123 | " .quad 663f\n" /* new instruction */ \ | ||
124 | " .byte 0x66\n" /* X86_FEATURE_UP */ \ | ||
125 | " .byte 662b-661b\n" /* sourcelen */ \ | ||
126 | " .byte 664f-663f\n" /* replacementlen */ \ | ||
127 | ".previous\n" \ | ||
128 | ".section .smp_altinstr_replacement,\"awx\"\n" \ | ||
129 | "663:\n\t" upinstr "\n" /* replacement */ \ | ||
130 | "664:\n\t.fill 662b-661b,1,0x42\n" /* space for original */ \ | ||
131 | ".previous" : args) | ||
132 | |||
133 | #define LOCK_PREFIX \ | ||
134 | ".section .smp_locks,\"a\"\n" \ | ||
135 | " .align 8\n" \ | ||
136 | " .quad 661f\n" /* address */ \ | ||
137 | ".previous\n" \ | ||
138 | "661:\n\tlock; " | ||
139 | |||
140 | #else /* ! CONFIG_SMP */ | ||
141 | #define alternative_smp(smpinstr, upinstr, args...) \ | ||
142 | asm volatile (upinstr : args) | ||
143 | #define LOCK_PREFIX "" | ||
144 | #endif | ||
145 | |||
146 | #endif /* _X86_64_ALTERNATIVE_H */ | ||
diff --git a/include/asm-x86_64/atomic.h b/include/asm-x86_64/atomic.h index bd3fa67ed835..007e88d6d43f 100644 --- a/include/asm-x86_64/atomic.h +++ b/include/asm-x86_64/atomic.h | |||
@@ -1,7 +1,7 @@ | |||
1 | #ifndef __ARCH_X86_64_ATOMIC__ | 1 | #ifndef __ARCH_X86_64_ATOMIC__ |
2 | #define __ARCH_X86_64_ATOMIC__ | 2 | #define __ARCH_X86_64_ATOMIC__ |
3 | 3 | ||
4 | #include <asm/types.h> | 4 | #include <asm/alternative.h> |
5 | 5 | ||
6 | /* atomic_t should be 32 bit signed type */ | 6 | /* atomic_t should be 32 bit signed type */ |
7 | 7 | ||
@@ -52,7 +52,7 @@ typedef struct { volatile int counter; } atomic_t; | |||
52 | static __inline__ void atomic_add(int i, atomic_t *v) | 52 | static __inline__ void atomic_add(int i, atomic_t *v) |
53 | { | 53 | { |
54 | __asm__ __volatile__( | 54 | __asm__ __volatile__( |
55 | LOCK "addl %1,%0" | 55 | LOCK_PREFIX "addl %1,%0" |
56 | :"=m" (v->counter) | 56 | :"=m" (v->counter) |
57 | :"ir" (i), "m" (v->counter)); | 57 | :"ir" (i), "m" (v->counter)); |
58 | } | 58 | } |
@@ -67,7 +67,7 @@ static __inline__ void atomic_add(int i, atomic_t *v) | |||
67 | static __inline__ void atomic_sub(int i, atomic_t *v) | 67 | static __inline__ void atomic_sub(int i, atomic_t *v) |
68 | { | 68 | { |
69 | __asm__ __volatile__( | 69 | __asm__ __volatile__( |
70 | LOCK "subl %1,%0" | 70 | LOCK_PREFIX "subl %1,%0" |
71 | :"=m" (v->counter) | 71 | :"=m" (v->counter) |
72 | :"ir" (i), "m" (v->counter)); | 72 | :"ir" (i), "m" (v->counter)); |
73 | } | 73 | } |
@@ -86,7 +86,7 @@ static __inline__ int atomic_sub_and_test(int i, atomic_t *v) | |||
86 | unsigned char c; | 86 | unsigned char c; |
87 | 87 | ||
88 | __asm__ __volatile__( | 88 | __asm__ __volatile__( |
89 | LOCK "subl %2,%0; sete %1" | 89 | LOCK_PREFIX "subl %2,%0; sete %1" |
90 | :"=m" (v->counter), "=qm" (c) | 90 | :"=m" (v->counter), "=qm" (c) |
91 | :"ir" (i), "m" (v->counter) : "memory"); | 91 | :"ir" (i), "m" (v->counter) : "memory"); |
92 | return c; | 92 | return c; |
@@ -101,7 +101,7 @@ static __inline__ int atomic_sub_and_test(int i, atomic_t *v) | |||
101 | static __inline__ void atomic_inc(atomic_t *v) | 101 | static __inline__ void atomic_inc(atomic_t *v) |
102 | { | 102 | { |
103 | __asm__ __volatile__( | 103 | __asm__ __volatile__( |
104 | LOCK "incl %0" | 104 | LOCK_PREFIX "incl %0" |
105 | :"=m" (v->counter) | 105 | :"=m" (v->counter) |
106 | :"m" (v->counter)); | 106 | :"m" (v->counter)); |
107 | } | 107 | } |
@@ -115,7 +115,7 @@ static __inline__ void atomic_inc(atomic_t *v) | |||
115 | static __inline__ void atomic_dec(atomic_t *v) | 115 | static __inline__ void atomic_dec(atomic_t *v) |
116 | { | 116 | { |
117 | __asm__ __volatile__( | 117 | __asm__ __volatile__( |
118 | LOCK "decl %0" | 118 | LOCK_PREFIX "decl %0" |
119 | :"=m" (v->counter) | 119 | :"=m" (v->counter) |
120 | :"m" (v->counter)); | 120 | :"m" (v->counter)); |
121 | } | 121 | } |
@@ -133,7 +133,7 @@ static __inline__ int atomic_dec_and_test(atomic_t *v) | |||
133 | unsigned char c; | 133 | unsigned char c; |
134 | 134 | ||
135 | __asm__ __volatile__( | 135 | __asm__ __volatile__( |
136 | LOCK "decl %0; sete %1" | 136 | LOCK_PREFIX "decl %0; sete %1" |
137 | :"=m" (v->counter), "=qm" (c) | 137 | :"=m" (v->counter), "=qm" (c) |
138 | :"m" (v->counter) : "memory"); | 138 | :"m" (v->counter) : "memory"); |
139 | return c != 0; | 139 | return c != 0; |
@@ -152,7 +152,7 @@ static __inline__ int atomic_inc_and_test(atomic_t *v) | |||
152 | unsigned char c; | 152 | unsigned char c; |
153 | 153 | ||
154 | __asm__ __volatile__( | 154 | __asm__ __volatile__( |
155 | LOCK "incl %0; sete %1" | 155 | LOCK_PREFIX "incl %0; sete %1" |
156 | :"=m" (v->counter), "=qm" (c) | 156 | :"=m" (v->counter), "=qm" (c) |
157 | :"m" (v->counter) : "memory"); | 157 | :"m" (v->counter) : "memory"); |
158 | return c != 0; | 158 | return c != 0; |
@@ -172,7 +172,7 @@ static __inline__ int atomic_add_negative(int i, atomic_t *v) | |||
172 | unsigned char c; | 172 | unsigned char c; |
173 | 173 | ||
174 | __asm__ __volatile__( | 174 | __asm__ __volatile__( |
175 | LOCK "addl %2,%0; sets %1" | 175 | LOCK_PREFIX "addl %2,%0; sets %1" |
176 | :"=m" (v->counter), "=qm" (c) | 176 | :"=m" (v->counter), "=qm" (c) |
177 | :"ir" (i), "m" (v->counter) : "memory"); | 177 | :"ir" (i), "m" (v->counter) : "memory"); |
178 | return c; | 178 | return c; |
@@ -189,7 +189,7 @@ static __inline__ int atomic_add_return(int i, atomic_t *v) | |||
189 | { | 189 | { |
190 | int __i = i; | 190 | int __i = i; |
191 | __asm__ __volatile__( | 191 | __asm__ __volatile__( |
192 | LOCK "xaddl %0, %1;" | 192 | LOCK_PREFIX "xaddl %0, %1;" |
193 | :"=r"(i) | 193 | :"=r"(i) |
194 | :"m"(v->counter), "0"(i)); | 194 | :"m"(v->counter), "0"(i)); |
195 | return i + __i; | 195 | return i + __i; |
@@ -237,7 +237,7 @@ typedef struct { volatile long counter; } atomic64_t; | |||
237 | static __inline__ void atomic64_add(long i, atomic64_t *v) | 237 | static __inline__ void atomic64_add(long i, atomic64_t *v) |
238 | { | 238 | { |
239 | __asm__ __volatile__( | 239 | __asm__ __volatile__( |
240 | LOCK "addq %1,%0" | 240 | LOCK_PREFIX "addq %1,%0" |
241 | :"=m" (v->counter) | 241 | :"=m" (v->counter) |
242 | :"ir" (i), "m" (v->counter)); | 242 | :"ir" (i), "m" (v->counter)); |
243 | } | 243 | } |
@@ -252,7 +252,7 @@ static __inline__ void atomic64_add(long i, atomic64_t *v) | |||
252 | static __inline__ void atomic64_sub(long i, atomic64_t *v) | 252 | static __inline__ void atomic64_sub(long i, atomic64_t *v) |
253 | { | 253 | { |
254 | __asm__ __volatile__( | 254 | __asm__ __volatile__( |
255 | LOCK "subq %1,%0" | 255 | LOCK_PREFIX "subq %1,%0" |
256 | :"=m" (v->counter) | 256 | :"=m" (v->counter) |
257 | :"ir" (i), "m" (v->counter)); | 257 | :"ir" (i), "m" (v->counter)); |
258 | } | 258 | } |
@@ -271,7 +271,7 @@ static __inline__ int atomic64_sub_and_test(long i, atomic64_t *v) | |||
271 | unsigned char c; | 271 | unsigned char c; |
272 | 272 | ||
273 | __asm__ __volatile__( | 273 | __asm__ __volatile__( |
274 | LOCK "subq %2,%0; sete %1" | 274 | LOCK_PREFIX "subq %2,%0; sete %1" |
275 | :"=m" (v->counter), "=qm" (c) | 275 | :"=m" (v->counter), "=qm" (c) |
276 | :"ir" (i), "m" (v->counter) : "memory"); | 276 | :"ir" (i), "m" (v->counter) : "memory"); |
277 | return c; | 277 | return c; |
@@ -286,7 +286,7 @@ static __inline__ int atomic64_sub_and_test(long i, atomic64_t *v) | |||
286 | static __inline__ void atomic64_inc(atomic64_t *v) | 286 | static __inline__ void atomic64_inc(atomic64_t *v) |
287 | { | 287 | { |
288 | __asm__ __volatile__( | 288 | __asm__ __volatile__( |
289 | LOCK "incq %0" | 289 | LOCK_PREFIX "incq %0" |
290 | :"=m" (v->counter) | 290 | :"=m" (v->counter) |
291 | :"m" (v->counter)); | 291 | :"m" (v->counter)); |
292 | } | 292 | } |
@@ -300,7 +300,7 @@ static __inline__ void atomic64_inc(atomic64_t *v) | |||
300 | static __inline__ void atomic64_dec(atomic64_t *v) | 300 | static __inline__ void atomic64_dec(atomic64_t *v) |
301 | { | 301 | { |
302 | __asm__ __volatile__( | 302 | __asm__ __volatile__( |
303 | LOCK "decq %0" | 303 | LOCK_PREFIX "decq %0" |
304 | :"=m" (v->counter) | 304 | :"=m" (v->counter) |
305 | :"m" (v->counter)); | 305 | :"m" (v->counter)); |
306 | } | 306 | } |
@@ -318,7 +318,7 @@ static __inline__ int atomic64_dec_and_test(atomic64_t *v) | |||
318 | unsigned char c; | 318 | unsigned char c; |
319 | 319 | ||
320 | __asm__ __volatile__( | 320 | __asm__ __volatile__( |
321 | LOCK "decq %0; sete %1" | 321 | LOCK_PREFIX "decq %0; sete %1" |
322 | :"=m" (v->counter), "=qm" (c) | 322 | :"=m" (v->counter), "=qm" (c) |
323 | :"m" (v->counter) : "memory"); | 323 | :"m" (v->counter) : "memory"); |
324 | return c != 0; | 324 | return c != 0; |
@@ -337,7 +337,7 @@ static __inline__ int atomic64_inc_and_test(atomic64_t *v) | |||
337 | unsigned char c; | 337 | unsigned char c; |
338 | 338 | ||
339 | __asm__ __volatile__( | 339 | __asm__ __volatile__( |
340 | LOCK "incq %0; sete %1" | 340 | LOCK_PREFIX "incq %0; sete %1" |
341 | :"=m" (v->counter), "=qm" (c) | 341 | :"=m" (v->counter), "=qm" (c) |
342 | :"m" (v->counter) : "memory"); | 342 | :"m" (v->counter) : "memory"); |
343 | return c != 0; | 343 | return c != 0; |
@@ -357,7 +357,7 @@ static __inline__ int atomic64_add_negative(long i, atomic64_t *v) | |||
357 | unsigned char c; | 357 | unsigned char c; |
358 | 358 | ||
359 | __asm__ __volatile__( | 359 | __asm__ __volatile__( |
360 | LOCK "addq %2,%0; sets %1" | 360 | LOCK_PREFIX "addq %2,%0; sets %1" |
361 | :"=m" (v->counter), "=qm" (c) | 361 | :"=m" (v->counter), "=qm" (c) |
362 | :"ir" (i), "m" (v->counter) : "memory"); | 362 | :"ir" (i), "m" (v->counter) : "memory"); |
363 | return c; | 363 | return c; |
@@ -374,7 +374,7 @@ static __inline__ long atomic64_add_return(long i, atomic64_t *v) | |||
374 | { | 374 | { |
375 | long __i = i; | 375 | long __i = i; |
376 | __asm__ __volatile__( | 376 | __asm__ __volatile__( |
377 | LOCK "xaddq %0, %1;" | 377 | LOCK_PREFIX "xaddq %0, %1;" |
378 | :"=r"(i) | 378 | :"=r"(i) |
379 | :"m"(v->counter), "0"(i)); | 379 | :"m"(v->counter), "0"(i)); |
380 | return i + __i; | 380 | return i + __i; |
@@ -418,11 +418,11 @@ static __inline__ long atomic64_sub_return(long i, atomic64_t *v) | |||
418 | 418 | ||
419 | /* These are x86-specific, used by some header files */ | 419 | /* These are x86-specific, used by some header files */ |
420 | #define atomic_clear_mask(mask, addr) \ | 420 | #define atomic_clear_mask(mask, addr) \ |
421 | __asm__ __volatile__(LOCK "andl %0,%1" \ | 421 | __asm__ __volatile__(LOCK_PREFIX "andl %0,%1" \ |
422 | : : "r" (~(mask)),"m" (*addr) : "memory") | 422 | : : "r" (~(mask)),"m" (*addr) : "memory") |
423 | 423 | ||
424 | #define atomic_set_mask(mask, addr) \ | 424 | #define atomic_set_mask(mask, addr) \ |
425 | __asm__ __volatile__(LOCK "orl %0,%1" \ | 425 | __asm__ __volatile__(LOCK_PREFIX "orl %0,%1" \ |
426 | : : "r" ((unsigned)mask),"m" (*(addr)) : "memory") | 426 | : : "r" ((unsigned)mask),"m" (*(addr)) : "memory") |
427 | 427 | ||
428 | /* Atomic operations are already serializing on x86 */ | 428 | /* Atomic operations are already serializing on x86 */ |
diff --git a/include/asm-x86_64/bitops.h b/include/asm-x86_64/bitops.h index e9bf933d25d0..f7ba57b1cc08 100644 --- a/include/asm-x86_64/bitops.h +++ b/include/asm-x86_64/bitops.h | |||
@@ -5,12 +5,7 @@ | |||
5 | * Copyright 1992, Linus Torvalds. | 5 | * Copyright 1992, Linus Torvalds. |
6 | */ | 6 | */ |
7 | 7 | ||
8 | 8 | #include <asm/alternative.h> | |
9 | #ifdef CONFIG_SMP | ||
10 | #define LOCK_PREFIX "lock ; " | ||
11 | #else | ||
12 | #define LOCK_PREFIX "" | ||
13 | #endif | ||
14 | 9 | ||
15 | #define ADDR (*(volatile long *) addr) | 10 | #define ADDR (*(volatile long *) addr) |
16 | 11 | ||
diff --git a/include/asm-x86_64/cpufeature.h b/include/asm-x86_64/cpufeature.h index 662964b74e34..afc44e557400 100644 --- a/include/asm-x86_64/cpufeature.h +++ b/include/asm-x86_64/cpufeature.h | |||
@@ -65,6 +65,8 @@ | |||
65 | #define X86_FEATURE_CONSTANT_TSC (3*32+5) /* TSC runs at constant rate */ | 65 | #define X86_FEATURE_CONSTANT_TSC (3*32+5) /* TSC runs at constant rate */ |
66 | #define X86_FEATURE_SYNC_RDTSC (3*32+6) /* RDTSC syncs CPU core */ | 66 | #define X86_FEATURE_SYNC_RDTSC (3*32+6) /* RDTSC syncs CPU core */ |
67 | #define X86_FEATURE_FXSAVE_LEAK (3*32+7) /* FIP/FOP/FDP leaks through FXSAVE */ | 67 | #define X86_FEATURE_FXSAVE_LEAK (3*32+7) /* FIP/FOP/FDP leaks through FXSAVE */ |
68 | #define X86_FEATURE_UP (3*32+8) /* SMP kernel running on UP */ | ||
69 | |||
68 | 70 | ||
69 | /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ | 71 | /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ |
70 | #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ | 72 | #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ |
diff --git a/include/asm-x86_64/mutex.h b/include/asm-x86_64/mutex.h index 11fbee2bd6c0..06fab6de2a88 100644 --- a/include/asm-x86_64/mutex.h +++ b/include/asm-x86_64/mutex.h | |||
@@ -24,7 +24,7 @@ do { \ | |||
24 | typecheck_fn(fastcall void (*)(atomic_t *), fail_fn); \ | 24 | typecheck_fn(fastcall void (*)(atomic_t *), fail_fn); \ |
25 | \ | 25 | \ |
26 | __asm__ __volatile__( \ | 26 | __asm__ __volatile__( \ |
27 | LOCK " decl (%%rdi) \n" \ | 27 | LOCK_PREFIX " decl (%%rdi) \n" \ |
28 | " js 2f \n" \ | 28 | " js 2f \n" \ |
29 | "1: \n" \ | 29 | "1: \n" \ |
30 | \ | 30 | \ |
@@ -74,7 +74,7 @@ do { \ | |||
74 | typecheck_fn(fastcall void (*)(atomic_t *), fail_fn); \ | 74 | typecheck_fn(fastcall void (*)(atomic_t *), fail_fn); \ |
75 | \ | 75 | \ |
76 | __asm__ __volatile__( \ | 76 | __asm__ __volatile__( \ |
77 | LOCK " incl (%%rdi) \n" \ | 77 | LOCK_PREFIX " incl (%%rdi) \n" \ |
78 | " jle 2f \n" \ | 78 | " jle 2f \n" \ |
79 | "1: \n" \ | 79 | "1: \n" \ |
80 | \ | 80 | \ |
diff --git a/include/asm-x86_64/rwlock.h b/include/asm-x86_64/rwlock.h index 9942cc393064..dea0e9459264 100644 --- a/include/asm-x86_64/rwlock.h +++ b/include/asm-x86_64/rwlock.h | |||
@@ -24,7 +24,7 @@ | |||
24 | #define RW_LOCK_BIAS_STR "0x01000000" | 24 | #define RW_LOCK_BIAS_STR "0x01000000" |
25 | 25 | ||
26 | #define __build_read_lock_ptr(rw, helper) \ | 26 | #define __build_read_lock_ptr(rw, helper) \ |
27 | asm volatile(LOCK "subl $1,(%0)\n\t" \ | 27 | asm volatile(LOCK_PREFIX "subl $1,(%0)\n\t" \ |
28 | "js 2f\n" \ | 28 | "js 2f\n" \ |
29 | "1:\n" \ | 29 | "1:\n" \ |
30 | LOCK_SECTION_START("") \ | 30 | LOCK_SECTION_START("") \ |
@@ -34,7 +34,7 @@ | |||
34 | ::"a" (rw) : "memory") | 34 | ::"a" (rw) : "memory") |
35 | 35 | ||
36 | #define __build_read_lock_const(rw, helper) \ | 36 | #define __build_read_lock_const(rw, helper) \ |
37 | asm volatile(LOCK "subl $1,%0\n\t" \ | 37 | asm volatile(LOCK_PREFIX "subl $1,%0\n\t" \ |
38 | "js 2f\n" \ | 38 | "js 2f\n" \ |
39 | "1:\n" \ | 39 | "1:\n" \ |
40 | LOCK_SECTION_START("") \ | 40 | LOCK_SECTION_START("") \ |
@@ -54,7 +54,7 @@ | |||
54 | } while (0) | 54 | } while (0) |
55 | 55 | ||
56 | #define __build_write_lock_ptr(rw, helper) \ | 56 | #define __build_write_lock_ptr(rw, helper) \ |
57 | asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \ | 57 | asm volatile(LOCK_PREFIX "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \ |
58 | "jnz 2f\n" \ | 58 | "jnz 2f\n" \ |
59 | "1:\n" \ | 59 | "1:\n" \ |
60 | LOCK_SECTION_START("") \ | 60 | LOCK_SECTION_START("") \ |
@@ -64,7 +64,7 @@ | |||
64 | ::"a" (rw) : "memory") | 64 | ::"a" (rw) : "memory") |
65 | 65 | ||
66 | #define __build_write_lock_const(rw, helper) \ | 66 | #define __build_write_lock_const(rw, helper) \ |
67 | asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",%0\n\t" \ | 67 | asm volatile(LOCK_PREFIX "subl $" RW_LOCK_BIAS_STR ",%0\n\t" \ |
68 | "jnz 2f\n" \ | 68 | "jnz 2f\n" \ |
69 | "1:\n" \ | 69 | "1:\n" \ |
70 | LOCK_SECTION_START("") \ | 70 | LOCK_SECTION_START("") \ |
diff --git a/include/asm-x86_64/semaphore.h b/include/asm-x86_64/semaphore.h index a389aa6fe80f..064df08b9a0f 100644 --- a/include/asm-x86_64/semaphore.h +++ b/include/asm-x86_64/semaphore.h | |||
@@ -106,7 +106,7 @@ static inline void down(struct semaphore * sem) | |||
106 | 106 | ||
107 | __asm__ __volatile__( | 107 | __asm__ __volatile__( |
108 | "# atomic down operation\n\t" | 108 | "# atomic down operation\n\t" |
109 | LOCK "decl %0\n\t" /* --sem->count */ | 109 | LOCK_PREFIX "decl %0\n\t" /* --sem->count */ |
110 | "js 2f\n" | 110 | "js 2f\n" |
111 | "1:\n" | 111 | "1:\n" |
112 | LOCK_SECTION_START("") | 112 | LOCK_SECTION_START("") |
@@ -130,7 +130,7 @@ static inline int down_interruptible(struct semaphore * sem) | |||
130 | 130 | ||
131 | __asm__ __volatile__( | 131 | __asm__ __volatile__( |
132 | "# atomic interruptible down operation\n\t" | 132 | "# atomic interruptible down operation\n\t" |
133 | LOCK "decl %1\n\t" /* --sem->count */ | 133 | LOCK_PREFIX "decl %1\n\t" /* --sem->count */ |
134 | "js 2f\n\t" | 134 | "js 2f\n\t" |
135 | "xorl %0,%0\n" | 135 | "xorl %0,%0\n" |
136 | "1:\n" | 136 | "1:\n" |
@@ -154,7 +154,7 @@ static inline int down_trylock(struct semaphore * sem) | |||
154 | 154 | ||
155 | __asm__ __volatile__( | 155 | __asm__ __volatile__( |
156 | "# atomic interruptible down operation\n\t" | 156 | "# atomic interruptible down operation\n\t" |
157 | LOCK "decl %1\n\t" /* --sem->count */ | 157 | LOCK_PREFIX "decl %1\n\t" /* --sem->count */ |
158 | "js 2f\n\t" | 158 | "js 2f\n\t" |
159 | "xorl %0,%0\n" | 159 | "xorl %0,%0\n" |
160 | "1:\n" | 160 | "1:\n" |
@@ -178,7 +178,7 @@ static inline void up(struct semaphore * sem) | |||
178 | { | 178 | { |
179 | __asm__ __volatile__( | 179 | __asm__ __volatile__( |
180 | "# atomic up operation\n\t" | 180 | "# atomic up operation\n\t" |
181 | LOCK "incl %0\n\t" /* ++sem->count */ | 181 | LOCK_PREFIX "incl %0\n\t" /* ++sem->count */ |
182 | "jle 2f\n" | 182 | "jle 2f\n" |
183 | "1:\n" | 183 | "1:\n" |
184 | LOCK_SECTION_START("") | 184 | LOCK_SECTION_START("") |
diff --git a/include/asm-x86_64/spinlock.h b/include/asm-x86_64/spinlock.h index 5d8a5e3589ff..8d3421996f94 100644 --- a/include/asm-x86_64/spinlock.h +++ b/include/asm-x86_64/spinlock.h | |||
@@ -31,15 +31,19 @@ | |||
31 | "jmp 1b\n" \ | 31 | "jmp 1b\n" \ |
32 | LOCK_SECTION_END | 32 | LOCK_SECTION_END |
33 | 33 | ||
34 | #define __raw_spin_lock_string_up \ | ||
35 | "\n\tdecl %0" | ||
36 | |||
34 | #define __raw_spin_unlock_string \ | 37 | #define __raw_spin_unlock_string \ |
35 | "movl $1,%0" \ | 38 | "movl $1,%0" \ |
36 | :"=m" (lock->slock) : : "memory" | 39 | :"=m" (lock->slock) : : "memory" |
37 | 40 | ||
38 | static inline void __raw_spin_lock(raw_spinlock_t *lock) | 41 | static inline void __raw_spin_lock(raw_spinlock_t *lock) |
39 | { | 42 | { |
40 | __asm__ __volatile__( | 43 | alternative_smp( |
41 | __raw_spin_lock_string | 44 | __raw_spin_lock_string, |
42 | :"=m" (lock->slock) : : "memory"); | 45 | __raw_spin_lock_string_up, |
46 | "=m" (lock->slock) : : "memory"); | ||
43 | } | 47 | } |
44 | 48 | ||
45 | #define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) | 49 | #define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) |
diff --git a/include/asm-x86_64/system.h b/include/asm-x86_64/system.h index f48e0dad8b3d..68e559f3631c 100644 --- a/include/asm-x86_64/system.h +++ b/include/asm-x86_64/system.h | |||
@@ -3,15 +3,10 @@ | |||
3 | 3 | ||
4 | #include <linux/kernel.h> | 4 | #include <linux/kernel.h> |
5 | #include <asm/segment.h> | 5 | #include <asm/segment.h> |
6 | #include <asm/alternative.h> | ||
6 | 7 | ||
7 | #ifdef __KERNEL__ | 8 | #ifdef __KERNEL__ |
8 | 9 | ||
9 | #ifdef CONFIG_SMP | ||
10 | #define LOCK_PREFIX "lock ; " | ||
11 | #else | ||
12 | #define LOCK_PREFIX "" | ||
13 | #endif | ||
14 | |||
15 | #define __STR(x) #x | 10 | #define __STR(x) #x |
16 | #define STR(x) __STR(x) | 11 | #define STR(x) __STR(x) |
17 | 12 | ||
@@ -34,7 +29,7 @@ | |||
34 | "thread_return:\n\t" \ | 29 | "thread_return:\n\t" \ |
35 | "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ | 30 | "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ |
36 | "movq %P[thread_info](%%rsi),%%r8\n\t" \ | 31 | "movq %P[thread_info](%%rsi),%%r8\n\t" \ |
37 | LOCK "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ | 32 | LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ |
38 | "movq %%rax,%%rdi\n\t" \ | 33 | "movq %%rax,%%rdi\n\t" \ |
39 | "jc ret_from_fork\n\t" \ | 34 | "jc ret_from_fork\n\t" \ |
40 | RESTORE_CONTEXT \ | 35 | RESTORE_CONTEXT \ |
@@ -69,82 +64,6 @@ extern void load_gs_index(unsigned); | |||
69 | ".previous" \ | 64 | ".previous" \ |
70 | : :"r" (value), "r" (0)) | 65 | : :"r" (value), "r" (0)) |
71 | 66 | ||
72 | #ifdef __KERNEL__ | ||
73 | struct alt_instr { | ||
74 | __u8 *instr; /* original instruction */ | ||
75 | __u8 *replacement; | ||
76 | __u8 cpuid; /* cpuid bit set for replacement */ | ||
77 | __u8 instrlen; /* length of original instruction */ | ||
78 | __u8 replacementlen; /* length of new instruction, <= instrlen */ | ||
79 | __u8 pad[5]; | ||
80 | }; | ||
81 | #endif | ||
82 | |||
83 | /* | ||
84 | * Alternative instructions for different CPU types or capabilities. | ||
85 | * | ||
86 | * This allows to use optimized instructions even on generic binary | ||
87 | * kernels. | ||
88 | * | ||
89 | * length of oldinstr must be longer or equal the length of newinstr | ||
90 | * It can be padded with nops as needed. | ||
91 | * | ||
92 | * For non barrier like inlines please define new variants | ||
93 | * without volatile and memory clobber. | ||
94 | */ | ||
95 | #define alternative(oldinstr, newinstr, feature) \ | ||
96 | asm volatile ("661:\n\t" oldinstr "\n662:\n" \ | ||
97 | ".section .altinstructions,\"a\"\n" \ | ||
98 | " .align 8\n" \ | ||
99 | " .quad 661b\n" /* label */ \ | ||
100 | " .quad 663f\n" /* new instruction */ \ | ||
101 | " .byte %c0\n" /* feature bit */ \ | ||
102 | " .byte 662b-661b\n" /* sourcelen */ \ | ||
103 | " .byte 664f-663f\n" /* replacementlen */ \ | ||
104 | ".previous\n" \ | ||
105 | ".section .altinstr_replacement,\"ax\"\n" \ | ||
106 | "663:\n\t" newinstr "\n664:\n" /* replacement */ \ | ||
107 | ".previous" :: "i" (feature) : "memory") | ||
108 | |||
109 | /* | ||
110 | * Alternative inline assembly with input. | ||
111 | * | ||
112 | * Peculiarities: | ||
113 | * No memory clobber here. | ||
114 | * Argument numbers start with 1. | ||
115 | * Best is to use constraints that are fixed size (like (%1) ... "r") | ||
116 | * If you use variable sized constraints like "m" or "g" in the | ||
117 | * replacement make sure to pad to the worst case length. | ||
118 | */ | ||
119 | #define alternative_input(oldinstr, newinstr, feature, input...) \ | ||
120 | asm volatile ("661:\n\t" oldinstr "\n662:\n" \ | ||
121 | ".section .altinstructions,\"a\"\n" \ | ||
122 | " .align 8\n" \ | ||
123 | " .quad 661b\n" /* label */ \ | ||
124 | " .quad 663f\n" /* new instruction */ \ | ||
125 | " .byte %c0\n" /* feature bit */ \ | ||
126 | " .byte 662b-661b\n" /* sourcelen */ \ | ||
127 | " .byte 664f-663f\n" /* replacementlen */ \ | ||
128 | ".previous\n" \ | ||
129 | ".section .altinstr_replacement,\"ax\"\n" \ | ||
130 | "663:\n\t" newinstr "\n664:\n" /* replacement */ \ | ||
131 | ".previous" :: "i" (feature), ##input) | ||
132 | |||
133 | /* Like alternative_input, but with a single output argument */ | ||
134 | #define alternative_io(oldinstr, newinstr, feature, output, input...) \ | ||
135 | asm volatile ("661:\n\t" oldinstr "\n662:\n" \ | ||
136 | ".section .altinstructions,\"a\"\n" \ | ||
137 | " .align 8\n" \ | ||
138 | " .quad 661b\n" /* label */ \ | ||
139 | " .quad 663f\n" /* new instruction */ \ | ||
140 | " .byte %c[feat]\n" /* feature bit */ \ | ||
141 | " .byte 662b-661b\n" /* sourcelen */ \ | ||
142 | " .byte 664f-663f\n" /* replacementlen */ \ | ||
143 | ".previous\n" \ | ||
144 | ".section .altinstr_replacement,\"ax\"\n" \ | ||
145 | "663:\n\t" newinstr "\n664:\n" /* replacement */ \ | ||
146 | ".previous" : output : [feat] "i" (feature), ##input) | ||
147 | |||
148 | /* | 67 | /* |
149 | * Clear and set 'TS' bit respectively | 68 | * Clear and set 'TS' bit respectively |
150 | */ | 69 | */ |
@@ -366,5 +285,6 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, | |||
366 | void cpu_idle_wait(void); | 285 | void cpu_idle_wait(void); |
367 | 286 | ||
368 | extern unsigned long arch_align_stack(unsigned long sp); | 287 | extern unsigned long arch_align_stack(unsigned long sp); |
288 | extern void free_init_pages(char *what, unsigned long begin, unsigned long end); | ||
369 | 289 | ||
370 | #endif | 290 | #endif |