diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-04 11:39:38 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-04 11:39:38 -0400 |
commit | 3d7e5fc37f91c3ad4974262e173d9ba36139652a (patch) | |
tree | 471484562a2c0341dbee0b4d2e10fdfd5d049c58 | |
parent | 6924a4672dd07dbe11d76fe597d17a092434232f (diff) | |
parent | f69fa9a91f60fff6f2d8b658b7d84d235d9d89b7 (diff) |
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86/asm changes from Ingo Molnar:
"Main changes:
- Apply low level mutex optimization on x86-64, by Wedson Almeida
Filho.
- Change bitops to be naturally 'long', by H Peter Anvin.
- Add TSX-NI opcodes support to the x86 (instrumentation) decoder, by
Masami Hiramatsu.
- Add clang compatibility adjustments/workarounds, by Jan-Simon
Möller"
* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86, doc: Update uaccess.h comment to reflect clang changes
x86, asm: Fix a compilation issue with clang
x86, asm: Extend definitions of _ASM_* with a raw format
x86, insn: Add new opcodes as of June, 2013
x86/ia32/asm: Remove unused argument in macro
x86, bitops: Change bitops to be native operand size
x86: Use asm-goto to implement mutex fast path on x86-64
-rw-r--r-- | arch/x86/ia32/ia32entry.S | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/asm.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/bitops.h | 46 | ||||
-rw-r--r-- | arch/x86/include/asm/mutex_64.h | 30 | ||||
-rw-r--r-- | arch/x86/include/asm/sync_bitops.h | 24 | ||||
-rw-r--r-- | arch/x86/include/asm/uaccess.h | 7 | ||||
-rw-r--r-- | arch/x86/lib/x86-opcode-map.txt | 42 | ||||
-rw-r--r-- | arch/x86/tools/gen-insn-attr-x86.awk | 4 |
8 files changed, 107 insertions, 54 deletions
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 474dc1b59f72..4299eb05023c 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -452,7 +452,7 @@ ia32_badsys: | |||
452 | 452 | ||
453 | CFI_ENDPROC | 453 | CFI_ENDPROC |
454 | 454 | ||
455 | .macro PTREGSCALL label, func, arg | 455 | .macro PTREGSCALL label, func |
456 | ALIGN | 456 | ALIGN |
457 | GLOBAL(\label) | 457 | GLOBAL(\label) |
458 | leaq \func(%rip),%rax | 458 | leaq \func(%rip),%rax |
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 1c2d247f65ce..4582e8e1cd1a 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h | |||
@@ -3,21 +3,25 @@ | |||
3 | 3 | ||
4 | #ifdef __ASSEMBLY__ | 4 | #ifdef __ASSEMBLY__ |
5 | # define __ASM_FORM(x) x | 5 | # define __ASM_FORM(x) x |
6 | # define __ASM_FORM_RAW(x) x | ||
6 | # define __ASM_FORM_COMMA(x) x, | 7 | # define __ASM_FORM_COMMA(x) x, |
7 | #else | 8 | #else |
8 | # define __ASM_FORM(x) " " #x " " | 9 | # define __ASM_FORM(x) " " #x " " |
10 | # define __ASM_FORM_RAW(x) #x | ||
9 | # define __ASM_FORM_COMMA(x) " " #x "," | 11 | # define __ASM_FORM_COMMA(x) " " #x "," |
10 | #endif | 12 | #endif |
11 | 13 | ||
12 | #ifdef CONFIG_X86_32 | 14 | #ifdef CONFIG_X86_32 |
13 | # define __ASM_SEL(a,b) __ASM_FORM(a) | 15 | # define __ASM_SEL(a,b) __ASM_FORM(a) |
16 | # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a) | ||
14 | #else | 17 | #else |
15 | # define __ASM_SEL(a,b) __ASM_FORM(b) | 18 | # define __ASM_SEL(a,b) __ASM_FORM(b) |
19 | # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b) | ||
16 | #endif | 20 | #endif |
17 | 21 | ||
18 | #define __ASM_SIZE(inst, ...) __ASM_SEL(inst##l##__VA_ARGS__, \ | 22 | #define __ASM_SIZE(inst, ...) __ASM_SEL(inst##l##__VA_ARGS__, \ |
19 | inst##q##__VA_ARGS__) | 23 | inst##q##__VA_ARGS__) |
20 | #define __ASM_REG(reg) __ASM_SEL(e##reg, r##reg) | 24 | #define __ASM_REG(reg) __ASM_SEL_RAW(e##reg, r##reg) |
21 | 25 | ||
22 | #define _ASM_PTR __ASM_SEL(.long, .quad) | 26 | #define _ASM_PTR __ASM_SEL(.long, .quad) |
23 | #define _ASM_ALIGN __ASM_SEL(.balign 4, .balign 8) | 27 | #define _ASM_ALIGN __ASM_SEL(.balign 4, .balign 8) |
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 6dfd0195bb55..41639ce8fd63 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h | |||
@@ -15,6 +15,14 @@ | |||
15 | #include <linux/compiler.h> | 15 | #include <linux/compiler.h> |
16 | #include <asm/alternative.h> | 16 | #include <asm/alternative.h> |
17 | 17 | ||
18 | #if BITS_PER_LONG == 32 | ||
19 | # define _BITOPS_LONG_SHIFT 5 | ||
20 | #elif BITS_PER_LONG == 64 | ||
21 | # define _BITOPS_LONG_SHIFT 6 | ||
22 | #else | ||
23 | # error "Unexpected BITS_PER_LONG" | ||
24 | #endif | ||
25 | |||
18 | #define BIT_64(n) (U64_C(1) << (n)) | 26 | #define BIT_64(n) (U64_C(1) << (n)) |
19 | 27 | ||
20 | /* | 28 | /* |
@@ -59,7 +67,7 @@ | |||
59 | * restricted to acting on a single-word quantity. | 67 | * restricted to acting on a single-word quantity. |
60 | */ | 68 | */ |
61 | static __always_inline void | 69 | static __always_inline void |
62 | set_bit(unsigned int nr, volatile unsigned long *addr) | 70 | set_bit(long nr, volatile unsigned long *addr) |
63 | { | 71 | { |
64 | if (IS_IMMEDIATE(nr)) { | 72 | if (IS_IMMEDIATE(nr)) { |
65 | asm volatile(LOCK_PREFIX "orb %1,%0" | 73 | asm volatile(LOCK_PREFIX "orb %1,%0" |
@@ -81,7 +89,7 @@ set_bit(unsigned int nr, volatile unsigned long *addr) | |||
81 | * If it's called on the same region of memory simultaneously, the effect | 89 | * If it's called on the same region of memory simultaneously, the effect |
82 | * may be that only one operation succeeds. | 90 | * may be that only one operation succeeds. |
83 | */ | 91 | */ |
84 | static inline void __set_bit(int nr, volatile unsigned long *addr) | 92 | static inline void __set_bit(long nr, volatile unsigned long *addr) |
85 | { | 93 | { |
86 | asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory"); | 94 | asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory"); |
87 | } | 95 | } |
@@ -97,7 +105,7 @@ static inline void __set_bit(int nr, volatile unsigned long *addr) | |||
97 | * in order to ensure changes are visible on other processors. | 105 | * in order to ensure changes are visible on other processors. |
98 | */ | 106 | */ |
99 | static __always_inline void | 107 | static __always_inline void |
100 | clear_bit(int nr, volatile unsigned long *addr) | 108 | clear_bit(long nr, volatile unsigned long *addr) |
101 | { | 109 | { |
102 | if (IS_IMMEDIATE(nr)) { | 110 | if (IS_IMMEDIATE(nr)) { |
103 | asm volatile(LOCK_PREFIX "andb %1,%0" | 111 | asm volatile(LOCK_PREFIX "andb %1,%0" |
@@ -118,13 +126,13 @@ clear_bit(int nr, volatile unsigned long *addr) | |||
118 | * clear_bit() is atomic and implies release semantics before the memory | 126 | * clear_bit() is atomic and implies release semantics before the memory |
119 | * operation. It can be used for an unlock. | 127 | * operation. It can be used for an unlock. |
120 | */ | 128 | */ |
121 | static inline void clear_bit_unlock(unsigned nr, volatile unsigned long *addr) | 129 | static inline void clear_bit_unlock(long nr, volatile unsigned long *addr) |
122 | { | 130 | { |
123 | barrier(); | 131 | barrier(); |
124 | clear_bit(nr, addr); | 132 | clear_bit(nr, addr); |
125 | } | 133 | } |
126 | 134 | ||
127 | static inline void __clear_bit(int nr, volatile unsigned long *addr) | 135 | static inline void __clear_bit(long nr, volatile unsigned long *addr) |
128 | { | 136 | { |
129 | asm volatile("btr %1,%0" : ADDR : "Ir" (nr)); | 137 | asm volatile("btr %1,%0" : ADDR : "Ir" (nr)); |
130 | } | 138 | } |
@@ -141,7 +149,7 @@ static inline void __clear_bit(int nr, volatile unsigned long *addr) | |||
141 | * No memory barrier is required here, because x86 cannot reorder stores past | 149 | * No memory barrier is required here, because x86 cannot reorder stores past |
142 | * older loads. Same principle as spin_unlock. | 150 | * older loads. Same principle as spin_unlock. |
143 | */ | 151 | */ |
144 | static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr) | 152 | static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr) |
145 | { | 153 | { |
146 | barrier(); | 154 | barrier(); |
147 | __clear_bit(nr, addr); | 155 | __clear_bit(nr, addr); |
@@ -159,7 +167,7 @@ static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr) | |||
159 | * If it's called on the same region of memory simultaneously, the effect | 167 | * If it's called on the same region of memory simultaneously, the effect |
160 | * may be that only one operation succeeds. | 168 | * may be that only one operation succeeds. |
161 | */ | 169 | */ |
162 | static inline void __change_bit(int nr, volatile unsigned long *addr) | 170 | static inline void __change_bit(long nr, volatile unsigned long *addr) |
163 | { | 171 | { |
164 | asm volatile("btc %1,%0" : ADDR : "Ir" (nr)); | 172 | asm volatile("btc %1,%0" : ADDR : "Ir" (nr)); |
165 | } | 173 | } |
@@ -173,7 +181,7 @@ static inline void __change_bit(int nr, volatile unsigned long *addr) | |||
173 | * Note that @nr may be almost arbitrarily large; this function is not | 181 | * Note that @nr may be almost arbitrarily large; this function is not |
174 | * restricted to acting on a single-word quantity. | 182 | * restricted to acting on a single-word quantity. |
175 | */ | 183 | */ |
176 | static inline void change_bit(int nr, volatile unsigned long *addr) | 184 | static inline void change_bit(long nr, volatile unsigned long *addr) |
177 | { | 185 | { |
178 | if (IS_IMMEDIATE(nr)) { | 186 | if (IS_IMMEDIATE(nr)) { |
179 | asm volatile(LOCK_PREFIX "xorb %1,%0" | 187 | asm volatile(LOCK_PREFIX "xorb %1,%0" |
@@ -194,7 +202,7 @@ static inline void change_bit(int nr, volatile unsigned long *addr) | |||
194 | * This operation is atomic and cannot be reordered. | 202 | * This operation is atomic and cannot be reordered. |
195 | * It also implies a memory barrier. | 203 | * It also implies a memory barrier. |
196 | */ | 204 | */ |
197 | static inline int test_and_set_bit(int nr, volatile unsigned long *addr) | 205 | static inline int test_and_set_bit(long nr, volatile unsigned long *addr) |
198 | { | 206 | { |
199 | int oldbit; | 207 | int oldbit; |
200 | 208 | ||
@@ -212,7 +220,7 @@ static inline int test_and_set_bit(int nr, volatile unsigned long *addr) | |||
212 | * This is the same as test_and_set_bit on x86. | 220 | * This is the same as test_and_set_bit on x86. |
213 | */ | 221 | */ |
214 | static __always_inline int | 222 | static __always_inline int |
215 | test_and_set_bit_lock(int nr, volatile unsigned long *addr) | 223 | test_and_set_bit_lock(long nr, volatile unsigned long *addr) |
216 | { | 224 | { |
217 | return test_and_set_bit(nr, addr); | 225 | return test_and_set_bit(nr, addr); |
218 | } | 226 | } |
@@ -226,7 +234,7 @@ test_and_set_bit_lock(int nr, volatile unsigned long *addr) | |||
226 | * If two examples of this operation race, one can appear to succeed | 234 | * If two examples of this operation race, one can appear to succeed |
227 | * but actually fail. You must protect multiple accesses with a lock. | 235 | * but actually fail. You must protect multiple accesses with a lock. |
228 | */ | 236 | */ |
229 | static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) | 237 | static inline int __test_and_set_bit(long nr, volatile unsigned long *addr) |
230 | { | 238 | { |
231 | int oldbit; | 239 | int oldbit; |
232 | 240 | ||
@@ -245,7 +253,7 @@ static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) | |||
245 | * This operation is atomic and cannot be reordered. | 253 | * This operation is atomic and cannot be reordered. |
246 | * It also implies a memory barrier. | 254 | * It also implies a memory barrier. |
247 | */ | 255 | */ |
248 | static inline int test_and_clear_bit(int nr, volatile unsigned long *addr) | 256 | static inline int test_and_clear_bit(long nr, volatile unsigned long *addr) |
249 | { | 257 | { |
250 | int oldbit; | 258 | int oldbit; |
251 | 259 | ||
@@ -272,7 +280,7 @@ static inline int test_and_clear_bit(int nr, volatile unsigned long *addr) | |||
272 | * accessed from a hypervisor on the same CPU if running in a VM: don't change | 280 | * accessed from a hypervisor on the same CPU if running in a VM: don't change |
273 | * this without also updating arch/x86/kernel/kvm.c | 281 | * this without also updating arch/x86/kernel/kvm.c |
274 | */ | 282 | */ |
275 | static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) | 283 | static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr) |
276 | { | 284 | { |
277 | int oldbit; | 285 | int oldbit; |
278 | 286 | ||
@@ -284,7 +292,7 @@ static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) | |||
284 | } | 292 | } |
285 | 293 | ||
286 | /* WARNING: non atomic and it can be reordered! */ | 294 | /* WARNING: non atomic and it can be reordered! */ |
287 | static inline int __test_and_change_bit(int nr, volatile unsigned long *addr) | 295 | static inline int __test_and_change_bit(long nr, volatile unsigned long *addr) |
288 | { | 296 | { |
289 | int oldbit; | 297 | int oldbit; |
290 | 298 | ||
@@ -304,7 +312,7 @@ static inline int __test_and_change_bit(int nr, volatile unsigned long *addr) | |||
304 | * This operation is atomic and cannot be reordered. | 312 | * This operation is atomic and cannot be reordered. |
305 | * It also implies a memory barrier. | 313 | * It also implies a memory barrier. |
306 | */ | 314 | */ |
307 | static inline int test_and_change_bit(int nr, volatile unsigned long *addr) | 315 | static inline int test_and_change_bit(long nr, volatile unsigned long *addr) |
308 | { | 316 | { |
309 | int oldbit; | 317 | int oldbit; |
310 | 318 | ||
@@ -315,13 +323,13 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr) | |||
315 | return oldbit; | 323 | return oldbit; |
316 | } | 324 | } |
317 | 325 | ||
318 | static __always_inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr) | 326 | static __always_inline int constant_test_bit(long nr, const volatile unsigned long *addr) |
319 | { | 327 | { |
320 | return ((1UL << (nr % BITS_PER_LONG)) & | 328 | return ((1UL << (nr & (BITS_PER_LONG-1))) & |
321 | (addr[nr / BITS_PER_LONG])) != 0; | 329 | (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; |
322 | } | 330 | } |
323 | 331 | ||
324 | static inline int variable_test_bit(int nr, volatile const unsigned long *addr) | 332 | static inline int variable_test_bit(long nr, volatile const unsigned long *addr) |
325 | { | 333 | { |
326 | int oldbit; | 334 | int oldbit; |
327 | 335 | ||
diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h index 2c543fff241b..e7e6751648ed 100644 --- a/arch/x86/include/asm/mutex_64.h +++ b/arch/x86/include/asm/mutex_64.h | |||
@@ -16,6 +16,20 @@ | |||
16 | * | 16 | * |
17 | * Atomically decrements @v and calls <fail_fn> if the result is negative. | 17 | * Atomically decrements @v and calls <fail_fn> if the result is negative. |
18 | */ | 18 | */ |
19 | #ifdef CC_HAVE_ASM_GOTO | ||
20 | static inline void __mutex_fastpath_lock(atomic_t *v, | ||
21 | void (*fail_fn)(atomic_t *)) | ||
22 | { | ||
23 | asm volatile goto(LOCK_PREFIX " decl %0\n" | ||
24 | " jns %l[exit]\n" | ||
25 | : : "m" (v->counter) | ||
26 | : "memory", "cc" | ||
27 | : exit); | ||
28 | fail_fn(v); | ||
29 | exit: | ||
30 | return; | ||
31 | } | ||
32 | #else | ||
19 | #define __mutex_fastpath_lock(v, fail_fn) \ | 33 | #define __mutex_fastpath_lock(v, fail_fn) \ |
20 | do { \ | 34 | do { \ |
21 | unsigned long dummy; \ | 35 | unsigned long dummy; \ |
@@ -32,6 +46,7 @@ do { \ | |||
32 | : "rax", "rsi", "rdx", "rcx", \ | 46 | : "rax", "rsi", "rdx", "rcx", \ |
33 | "r8", "r9", "r10", "r11", "memory"); \ | 47 | "r8", "r9", "r10", "r11", "memory"); \ |
34 | } while (0) | 48 | } while (0) |
49 | #endif | ||
35 | 50 | ||
36 | /** | 51 | /** |
37 | * __mutex_fastpath_lock_retval - try to take the lock by moving the count | 52 | * __mutex_fastpath_lock_retval - try to take the lock by moving the count |
@@ -56,6 +71,20 @@ static inline int __mutex_fastpath_lock_retval(atomic_t *count) | |||
56 | * | 71 | * |
57 | * Atomically increments @v and calls <fail_fn> if the result is nonpositive. | 72 | * Atomically increments @v and calls <fail_fn> if the result is nonpositive. |
58 | */ | 73 | */ |
74 | #ifdef CC_HAVE_ASM_GOTO | ||
75 | static inline void __mutex_fastpath_unlock(atomic_t *v, | ||
76 | void (*fail_fn)(atomic_t *)) | ||
77 | { | ||
78 | asm volatile goto(LOCK_PREFIX " incl %0\n" | ||
79 | " jg %l[exit]\n" | ||
80 | : : "m" (v->counter) | ||
81 | : "memory", "cc" | ||
82 | : exit); | ||
83 | fail_fn(v); | ||
84 | exit: | ||
85 | return; | ||
86 | } | ||
87 | #else | ||
59 | #define __mutex_fastpath_unlock(v, fail_fn) \ | 88 | #define __mutex_fastpath_unlock(v, fail_fn) \ |
60 | do { \ | 89 | do { \ |
61 | unsigned long dummy; \ | 90 | unsigned long dummy; \ |
@@ -72,6 +101,7 @@ do { \ | |||
72 | : "rax", "rsi", "rdx", "rcx", \ | 101 | : "rax", "rsi", "rdx", "rcx", \ |
73 | "r8", "r9", "r10", "r11", "memory"); \ | 102 | "r8", "r9", "r10", "r11", "memory"); \ |
74 | } while (0) | 103 | } while (0) |
104 | #endif | ||
75 | 105 | ||
76 | #define __mutex_slowpath_needs_to_unlock() 1 | 106 | #define __mutex_slowpath_needs_to_unlock() 1 |
77 | 107 | ||
diff --git a/arch/x86/include/asm/sync_bitops.h b/arch/x86/include/asm/sync_bitops.h index 9d09b4073b60..05af3b31d522 100644 --- a/arch/x86/include/asm/sync_bitops.h +++ b/arch/x86/include/asm/sync_bitops.h | |||
@@ -26,9 +26,9 @@ | |||
26 | * Note that @nr may be almost arbitrarily large; this function is not | 26 | * Note that @nr may be almost arbitrarily large; this function is not |
27 | * restricted to acting on a single-word quantity. | 27 | * restricted to acting on a single-word quantity. |
28 | */ | 28 | */ |
29 | static inline void sync_set_bit(int nr, volatile unsigned long *addr) | 29 | static inline void sync_set_bit(long nr, volatile unsigned long *addr) |
30 | { | 30 | { |
31 | asm volatile("lock; btsl %1,%0" | 31 | asm volatile("lock; bts %1,%0" |
32 | : "+m" (ADDR) | 32 | : "+m" (ADDR) |
33 | : "Ir" (nr) | 33 | : "Ir" (nr) |
34 | : "memory"); | 34 | : "memory"); |
@@ -44,9 +44,9 @@ static inline void sync_set_bit(int nr, volatile unsigned long *addr) | |||
44 | * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() | 44 | * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() |
45 | * in order to ensure changes are visible on other processors. | 45 | * in order to ensure changes are visible on other processors. |
46 | */ | 46 | */ |
47 | static inline void sync_clear_bit(int nr, volatile unsigned long *addr) | 47 | static inline void sync_clear_bit(long nr, volatile unsigned long *addr) |
48 | { | 48 | { |
49 | asm volatile("lock; btrl %1,%0" | 49 | asm volatile("lock; btr %1,%0" |
50 | : "+m" (ADDR) | 50 | : "+m" (ADDR) |
51 | : "Ir" (nr) | 51 | : "Ir" (nr) |
52 | : "memory"); | 52 | : "memory"); |
@@ -61,9 +61,9 @@ static inline void sync_clear_bit(int nr, volatile unsigned long *addr) | |||
61 | * Note that @nr may be almost arbitrarily large; this function is not | 61 | * Note that @nr may be almost arbitrarily large; this function is not |
62 | * restricted to acting on a single-word quantity. | 62 | * restricted to acting on a single-word quantity. |
63 | */ | 63 | */ |
64 | static inline void sync_change_bit(int nr, volatile unsigned long *addr) | 64 | static inline void sync_change_bit(long nr, volatile unsigned long *addr) |
65 | { | 65 | { |
66 | asm volatile("lock; btcl %1,%0" | 66 | asm volatile("lock; btc %1,%0" |
67 | : "+m" (ADDR) | 67 | : "+m" (ADDR) |
68 | : "Ir" (nr) | 68 | : "Ir" (nr) |
69 | : "memory"); | 69 | : "memory"); |
@@ -77,11 +77,11 @@ static inline void sync_change_bit(int nr, volatile unsigned long *addr) | |||
77 | * This operation is atomic and cannot be reordered. | 77 | * This operation is atomic and cannot be reordered. |
78 | * It also implies a memory barrier. | 78 | * It also implies a memory barrier. |
79 | */ | 79 | */ |
80 | static inline int sync_test_and_set_bit(int nr, volatile unsigned long *addr) | 80 | static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr) |
81 | { | 81 | { |
82 | int oldbit; | 82 | int oldbit; |
83 | 83 | ||
84 | asm volatile("lock; btsl %2,%1\n\tsbbl %0,%0" | 84 | asm volatile("lock; bts %2,%1\n\tsbbl %0,%0" |
85 | : "=r" (oldbit), "+m" (ADDR) | 85 | : "=r" (oldbit), "+m" (ADDR) |
86 | : "Ir" (nr) : "memory"); | 86 | : "Ir" (nr) : "memory"); |
87 | return oldbit; | 87 | return oldbit; |
@@ -95,11 +95,11 @@ static inline int sync_test_and_set_bit(int nr, volatile unsigned long *addr) | |||
95 | * This operation is atomic and cannot be reordered. | 95 | * This operation is atomic and cannot be reordered. |
96 | * It also implies a memory barrier. | 96 | * It also implies a memory barrier. |
97 | */ | 97 | */ |
98 | static inline int sync_test_and_clear_bit(int nr, volatile unsigned long *addr) | 98 | static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr) |
99 | { | 99 | { |
100 | int oldbit; | 100 | int oldbit; |
101 | 101 | ||
102 | asm volatile("lock; btrl %2,%1\n\tsbbl %0,%0" | 102 | asm volatile("lock; btr %2,%1\n\tsbbl %0,%0" |
103 | : "=r" (oldbit), "+m" (ADDR) | 103 | : "=r" (oldbit), "+m" (ADDR) |
104 | : "Ir" (nr) : "memory"); | 104 | : "Ir" (nr) : "memory"); |
105 | return oldbit; | 105 | return oldbit; |
@@ -113,11 +113,11 @@ static inline int sync_test_and_clear_bit(int nr, volatile unsigned long *addr) | |||
113 | * This operation is atomic and cannot be reordered. | 113 | * This operation is atomic and cannot be reordered. |
114 | * It also implies a memory barrier. | 114 | * It also implies a memory barrier. |
115 | */ | 115 | */ |
116 | static inline int sync_test_and_change_bit(int nr, volatile unsigned long *addr) | 116 | static inline int sync_test_and_change_bit(long nr, volatile unsigned long *addr) |
117 | { | 117 | { |
118 | int oldbit; | 118 | int oldbit; |
119 | 119 | ||
120 | asm volatile("lock; btcl %2,%1\n\tsbbl %0,%0" | 120 | asm volatile("lock; btc %2,%1\n\tsbbl %0,%0" |
121 | : "=r" (oldbit), "+m" (ADDR) | 121 | : "=r" (oldbit), "+m" (ADDR) |
122 | : "Ir" (nr) : "memory"); | 122 | : "Ir" (nr) : "memory"); |
123 | return oldbit; | 123 | return oldbit; |
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 5ee26875baea..5838fa911aa0 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h | |||
@@ -153,16 +153,19 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) | |||
153 | * Careful: we have to cast the result to the type of the pointer | 153 | * Careful: we have to cast the result to the type of the pointer |
154 | * for sign reasons. | 154 | * for sign reasons. |
155 | * | 155 | * |
156 | * The use of %edx as the register specifier is a bit of a | 156 | * The use of _ASM_DX as the register specifier is a bit of a |
157 | * simplification, as gcc only cares about it as the starting point | 157 | * simplification, as gcc only cares about it as the starting point |
158 | * and not size: for a 64-bit value it will use %ecx:%edx on 32 bits | 158 | * and not size: for a 64-bit value it will use %ecx:%edx on 32 bits |
159 | * (%ecx being the next register in gcc's x86 register sequence), and | 159 | * (%ecx being the next register in gcc's x86 register sequence), and |
160 | * %rdx on 64 bits. | 160 | * %rdx on 64 bits. |
161 | * | ||
162 | * Clang/LLVM cares about the size of the register, but still wants | ||
163 | * the base register for something that ends up being a pair. | ||
161 | */ | 164 | */ |
162 | #define get_user(x, ptr) \ | 165 | #define get_user(x, ptr) \ |
163 | ({ \ | 166 | ({ \ |
164 | int __ret_gu; \ | 167 | int __ret_gu; \ |
165 | register __inttype(*(ptr)) __val_gu asm("%edx"); \ | 168 | register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \ |
166 | __chk_user_ptr(ptr); \ | 169 | __chk_user_ptr(ptr); \ |
167 | might_fault(); \ | 170 | might_fault(); \ |
168 | asm volatile("call __get_user_%P3" \ | 171 | asm volatile("call __get_user_%P3" \ |
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 5d7e51f3fd28..533a85e3a07e 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt | |||
@@ -1,10 +1,8 @@ | |||
1 | # x86 Opcode Maps | 1 | # x86 Opcode Maps |
2 | # | 2 | # |
3 | # This is (mostly) based on following documentations. | 3 | # This is (mostly) based on following documentations. |
4 | # - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2 | 4 | # - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2C |
5 | # (#325383-040US, October 2011) | 5 | # (#326018-047US, June 2013) |
6 | # - Intel(R) Advanced Vector Extensions Programming Reference | ||
7 | # (#319433-011,JUNE 2011). | ||
8 | # | 6 | # |
9 | #<Opcode maps> | 7 | #<Opcode maps> |
10 | # Table: table-name | 8 | # Table: table-name |
@@ -29,6 +27,7 @@ | |||
29 | # - (F3): the last prefix is 0xF3 | 27 | # - (F3): the last prefix is 0xF3 |
30 | # - (F2): the last prefix is 0xF2 | 28 | # - (F2): the last prefix is 0xF2 |
31 | # - (!F3) : the last prefix is not 0xF3 (including non-last prefix case) | 29 | # - (!F3) : the last prefix is not 0xF3 (including non-last prefix case) |
30 | # - (66&F2): Both 0x66 and 0xF2 prefixes are specified. | ||
32 | 31 | ||
33 | Table: one byte opcode | 32 | Table: one byte opcode |
34 | Referrer: | 33 | Referrer: |
@@ -246,8 +245,8 @@ c2: RETN Iw (f64) | |||
246 | c3: RETN | 245 | c3: RETN |
247 | c4: LES Gz,Mp (i64) | VEX+2byte (Prefix) | 246 | c4: LES Gz,Mp (i64) | VEX+2byte (Prefix) |
248 | c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix) | 247 | c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix) |
249 | c6: Grp11 Eb,Ib (1A) | 248 | c6: Grp11A Eb,Ib (1A) |
250 | c7: Grp11 Ev,Iz (1A) | 249 | c7: Grp11B Ev,Iz (1A) |
251 | c8: ENTER Iw,Ib | 250 | c8: ENTER Iw,Ib |
252 | c9: LEAVE (d64) | 251 | c9: LEAVE (d64) |
253 | ca: RETF Iw | 252 | ca: RETF Iw |
@@ -293,8 +292,8 @@ ef: OUT DX,eAX | |||
293 | # 0xf0 - 0xff | 292 | # 0xf0 - 0xff |
294 | f0: LOCK (Prefix) | 293 | f0: LOCK (Prefix) |
295 | f1: | 294 | f1: |
296 | f2: REPNE (Prefix) | 295 | f2: REPNE (Prefix) | XACQUIRE (Prefix) |
297 | f3: REP/REPE (Prefix) | 296 | f3: REP/REPE (Prefix) | XRELEASE (Prefix) |
298 | f4: HLT | 297 | f4: HLT |
299 | f5: CMC | 298 | f5: CMC |
300 | f6: Grp3_1 Eb (1A) | 299 | f6: Grp3_1 Eb (1A) |
@@ -326,7 +325,8 @@ AVXcode: 1 | |||
326 | 0a: | 325 | 0a: |
327 | 0b: UD2 (1B) | 326 | 0b: UD2 (1B) |
328 | 0c: | 327 | 0c: |
329 | 0d: NOP Ev | GrpP | 328 | # AMD's prefetch group. Intel supports prefetchw(/1) only. |
329 | 0d: GrpP | ||
330 | 0e: FEMMS | 330 | 0e: FEMMS |
331 | # 3DNow! uses the last imm byte as opcode extension. | 331 | # 3DNow! uses the last imm byte as opcode extension. |
332 | 0f: 3DNow! Pq,Qq,Ib | 332 | 0f: 3DNow! Pq,Qq,Ib |
@@ -729,12 +729,12 @@ dc: VAESENC Vdq,Hdq,Wdq (66),(v1) | |||
729 | dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) | 729 | dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) |
730 | de: VAESDEC Vdq,Hdq,Wdq (66),(v1) | 730 | de: VAESDEC Vdq,Hdq,Wdq (66),(v1) |
731 | df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1) | 731 | df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1) |
732 | f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | 732 | f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2) |
733 | f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | 733 | f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2) |
734 | f2: ANDN Gy,By,Ey (v) | 734 | f2: ANDN Gy,By,Ey (v) |
735 | f3: Grp17 (1A) | 735 | f3: Grp17 (1A) |
736 | f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) | 736 | f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) |
737 | f6: MULX By,Gy,rDX,Ey (F2),(v) | 737 | f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) |
738 | f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) | 738 | f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) |
739 | EndTable | 739 | EndTable |
740 | 740 | ||
@@ -861,8 +861,8 @@ EndTable | |||
861 | 861 | ||
862 | GrpTable: Grp7 | 862 | GrpTable: Grp7 |
863 | 0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) | 863 | 0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) |
864 | 1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001) | 864 | 1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) |
865 | 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | 865 | 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) |
866 | 3: LIDT Ms | 866 | 3: LIDT Ms |
867 | 4: SMSW Mw/Rv | 867 | 4: SMSW Mw/Rv |
868 | 5: | 868 | 5: |
@@ -880,15 +880,21 @@ EndTable | |||
880 | GrpTable: Grp9 | 880 | GrpTable: Grp9 |
881 | 1: CMPXCHG8B/16B Mq/Mdq | 881 | 1: CMPXCHG8B/16B Mq/Mdq |
882 | 6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) | 882 | 6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) |
883 | 7: VMPTRST Mq | VMPTRST Mq (F3) | 883 | 7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B) |
884 | EndTable | 884 | EndTable |
885 | 885 | ||
886 | GrpTable: Grp10 | 886 | GrpTable: Grp10 |
887 | EndTable | 887 | EndTable |
888 | 888 | ||
889 | GrpTable: Grp11 | 889 | # Grp11A and Grp11B are expressed as Grp11 in Intel SDM |
890 | # Note: the operands are given by group opcode | 890 | GrpTable: Grp11A |
891 | 0: MOV | 891 | 0: MOV Eb,Ib |
892 | 7: XABORT Ib (000),(11B) | ||
893 | EndTable | ||
894 | |||
895 | GrpTable: Grp11B | ||
896 | 0: MOV Eb,Iz | ||
897 | 7: XBEGIN Jz (000),(11B) | ||
892 | EndTable | 898 | EndTable |
893 | 899 | ||
894 | GrpTable: Grp12 | 900 | GrpTable: Grp12 |
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk index e6773dc8ac41..093a892026f9 100644 --- a/arch/x86/tools/gen-insn-attr-x86.awk +++ b/arch/x86/tools/gen-insn-attr-x86.awk | |||
@@ -68,7 +68,7 @@ BEGIN { | |||
68 | 68 | ||
69 | lprefix1_expr = "\\((66|!F3)\\)" | 69 | lprefix1_expr = "\\((66|!F3)\\)" |
70 | lprefix2_expr = "\\(F3\\)" | 70 | lprefix2_expr = "\\(F3\\)" |
71 | lprefix3_expr = "\\((F2|!F3)\\)" | 71 | lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)" |
72 | lprefix_expr = "\\((66|F2|F3)\\)" | 72 | lprefix_expr = "\\((66|F2|F3)\\)" |
73 | max_lprefix = 4 | 73 | max_lprefix = 4 |
74 | 74 | ||
@@ -83,6 +83,8 @@ BEGIN { | |||
83 | prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" | 83 | prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" |
84 | prefix_num["REPNE"] = "INAT_PFX_REPNE" | 84 | prefix_num["REPNE"] = "INAT_PFX_REPNE" |
85 | prefix_num["REP/REPE"] = "INAT_PFX_REPE" | 85 | prefix_num["REP/REPE"] = "INAT_PFX_REPE" |
86 | prefix_num["XACQUIRE"] = "INAT_PFX_REPNE" | ||
87 | prefix_num["XRELEASE"] = "INAT_PFX_REPE" | ||
86 | prefix_num["LOCK"] = "INAT_PFX_LOCK" | 88 | prefix_num["LOCK"] = "INAT_PFX_LOCK" |
87 | prefix_num["SEG=CS"] = "INAT_PFX_CS" | 89 | prefix_num["SEG=CS"] = "INAT_PFX_CS" |
88 | prefix_num["SEG=DS"] = "INAT_PFX_DS" | 90 | prefix_num["SEG=DS"] = "INAT_PFX_DS" |