aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-09-04 11:39:38 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-04 11:39:38 -0400
commit3d7e5fc37f91c3ad4974262e173d9ba36139652a (patch)
tree471484562a2c0341dbee0b4d2e10fdfd5d049c58
parent6924a4672dd07dbe11d76fe597d17a092434232f (diff)
parentf69fa9a91f60fff6f2d8b658b7d84d235d9d89b7 (diff)
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86/asm changes from Ingo Molnar: "Main changes: - Apply low level mutex optimization on x86-64, by Wedson Almeida Filho. - Change bitops to be naturally 'long', by H Peter Anvin. - Add TSX-NI opcodes support to the x86 (instrumentation) decoder, by Masami Hiramatsu. - Add clang compatibility adjustments/workarounds, by Jan-Simon Möller" * 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86, doc: Update uaccess.h comment to reflect clang changes x86, asm: Fix a compilation issue with clang x86, asm: Extend definitions of _ASM_* with a raw format x86, insn: Add new opcodes as of June, 2013 x86/ia32/asm: Remove unused argument in macro x86, bitops: Change bitops to be native operand size x86: Use asm-goto to implement mutex fast path on x86-64
-rw-r--r--arch/x86/ia32/ia32entry.S2
-rw-r--r--arch/x86/include/asm/asm.h6
-rw-r--r--arch/x86/include/asm/bitops.h46
-rw-r--r--arch/x86/include/asm/mutex_64.h30
-rw-r--r--arch/x86/include/asm/sync_bitops.h24
-rw-r--r--arch/x86/include/asm/uaccess.h7
-rw-r--r--arch/x86/lib/x86-opcode-map.txt42
-rw-r--r--arch/x86/tools/gen-insn-attr-x86.awk4
8 files changed, 107 insertions, 54 deletions
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 474dc1b59f72..4299eb05023c 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -452,7 +452,7 @@ ia32_badsys:
452 452
453 CFI_ENDPROC 453 CFI_ENDPROC
454 454
455 .macro PTREGSCALL label, func, arg 455 .macro PTREGSCALL label, func
456 ALIGN 456 ALIGN
457GLOBAL(\label) 457GLOBAL(\label)
458 leaq \func(%rip),%rax 458 leaq \func(%rip),%rax
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 1c2d247f65ce..4582e8e1cd1a 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -3,21 +3,25 @@
3 3
4#ifdef __ASSEMBLY__ 4#ifdef __ASSEMBLY__
5# define __ASM_FORM(x) x 5# define __ASM_FORM(x) x
6# define __ASM_FORM_RAW(x) x
6# define __ASM_FORM_COMMA(x) x, 7# define __ASM_FORM_COMMA(x) x,
7#else 8#else
8# define __ASM_FORM(x) " " #x " " 9# define __ASM_FORM(x) " " #x " "
10# define __ASM_FORM_RAW(x) #x
9# define __ASM_FORM_COMMA(x) " " #x "," 11# define __ASM_FORM_COMMA(x) " " #x ","
10#endif 12#endif
11 13
12#ifdef CONFIG_X86_32 14#ifdef CONFIG_X86_32
13# define __ASM_SEL(a,b) __ASM_FORM(a) 15# define __ASM_SEL(a,b) __ASM_FORM(a)
16# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a)
14#else 17#else
15# define __ASM_SEL(a,b) __ASM_FORM(b) 18# define __ASM_SEL(a,b) __ASM_FORM(b)
19# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b)
16#endif 20#endif
17 21
18#define __ASM_SIZE(inst, ...) __ASM_SEL(inst##l##__VA_ARGS__, \ 22#define __ASM_SIZE(inst, ...) __ASM_SEL(inst##l##__VA_ARGS__, \
19 inst##q##__VA_ARGS__) 23 inst##q##__VA_ARGS__)
20#define __ASM_REG(reg) __ASM_SEL(e##reg, r##reg) 24#define __ASM_REG(reg) __ASM_SEL_RAW(e##reg, r##reg)
21 25
22#define _ASM_PTR __ASM_SEL(.long, .quad) 26#define _ASM_PTR __ASM_SEL(.long, .quad)
23#define _ASM_ALIGN __ASM_SEL(.balign 4, .balign 8) 27#define _ASM_ALIGN __ASM_SEL(.balign 4, .balign 8)
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 6dfd0195bb55..41639ce8fd63 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -15,6 +15,14 @@
15#include <linux/compiler.h> 15#include <linux/compiler.h>
16#include <asm/alternative.h> 16#include <asm/alternative.h>
17 17
18#if BITS_PER_LONG == 32
19# define _BITOPS_LONG_SHIFT 5
20#elif BITS_PER_LONG == 64
21# define _BITOPS_LONG_SHIFT 6
22#else
23# error "Unexpected BITS_PER_LONG"
24#endif
25
18#define BIT_64(n) (U64_C(1) << (n)) 26#define BIT_64(n) (U64_C(1) << (n))
19 27
20/* 28/*
@@ -59,7 +67,7 @@
59 * restricted to acting on a single-word quantity. 67 * restricted to acting on a single-word quantity.
60 */ 68 */
61static __always_inline void 69static __always_inline void
62set_bit(unsigned int nr, volatile unsigned long *addr) 70set_bit(long nr, volatile unsigned long *addr)
63{ 71{
64 if (IS_IMMEDIATE(nr)) { 72 if (IS_IMMEDIATE(nr)) {
65 asm volatile(LOCK_PREFIX "orb %1,%0" 73 asm volatile(LOCK_PREFIX "orb %1,%0"
@@ -81,7 +89,7 @@ set_bit(unsigned int nr, volatile unsigned long *addr)
81 * If it's called on the same region of memory simultaneously, the effect 89 * If it's called on the same region of memory simultaneously, the effect
82 * may be that only one operation succeeds. 90 * may be that only one operation succeeds.
83 */ 91 */
84static inline void __set_bit(int nr, volatile unsigned long *addr) 92static inline void __set_bit(long nr, volatile unsigned long *addr)
85{ 93{
86 asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory"); 94 asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
87} 95}
@@ -97,7 +105,7 @@ static inline void __set_bit(int nr, volatile unsigned long *addr)
97 * in order to ensure changes are visible on other processors. 105 * in order to ensure changes are visible on other processors.
98 */ 106 */
99static __always_inline void 107static __always_inline void
100clear_bit(int nr, volatile unsigned long *addr) 108clear_bit(long nr, volatile unsigned long *addr)
101{ 109{
102 if (IS_IMMEDIATE(nr)) { 110 if (IS_IMMEDIATE(nr)) {
103 asm volatile(LOCK_PREFIX "andb %1,%0" 111 asm volatile(LOCK_PREFIX "andb %1,%0"
@@ -118,13 +126,13 @@ clear_bit(int nr, volatile unsigned long *addr)
118 * clear_bit() is atomic and implies release semantics before the memory 126 * clear_bit() is atomic and implies release semantics before the memory
119 * operation. It can be used for an unlock. 127 * operation. It can be used for an unlock.
120 */ 128 */
121static inline void clear_bit_unlock(unsigned nr, volatile unsigned long *addr) 129static inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
122{ 130{
123 barrier(); 131 barrier();
124 clear_bit(nr, addr); 132 clear_bit(nr, addr);
125} 133}
126 134
127static inline void __clear_bit(int nr, volatile unsigned long *addr) 135static inline void __clear_bit(long nr, volatile unsigned long *addr)
128{ 136{
129 asm volatile("btr %1,%0" : ADDR : "Ir" (nr)); 137 asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
130} 138}
@@ -141,7 +149,7 @@ static inline void __clear_bit(int nr, volatile unsigned long *addr)
141 * No memory barrier is required here, because x86 cannot reorder stores past 149 * No memory barrier is required here, because x86 cannot reorder stores past
142 * older loads. Same principle as spin_unlock. 150 * older loads. Same principle as spin_unlock.
143 */ 151 */
144static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr) 152static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
145{ 153{
146 barrier(); 154 barrier();
147 __clear_bit(nr, addr); 155 __clear_bit(nr, addr);
@@ -159,7 +167,7 @@ static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
159 * If it's called on the same region of memory simultaneously, the effect 167 * If it's called on the same region of memory simultaneously, the effect
160 * may be that only one operation succeeds. 168 * may be that only one operation succeeds.
161 */ 169 */
162static inline void __change_bit(int nr, volatile unsigned long *addr) 170static inline void __change_bit(long nr, volatile unsigned long *addr)
163{ 171{
164 asm volatile("btc %1,%0" : ADDR : "Ir" (nr)); 172 asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
165} 173}
@@ -173,7 +181,7 @@ static inline void __change_bit(int nr, volatile unsigned long *addr)
173 * Note that @nr may be almost arbitrarily large; this function is not 181 * Note that @nr may be almost arbitrarily large; this function is not
174 * restricted to acting on a single-word quantity. 182 * restricted to acting on a single-word quantity.
175 */ 183 */
176static inline void change_bit(int nr, volatile unsigned long *addr) 184static inline void change_bit(long nr, volatile unsigned long *addr)
177{ 185{
178 if (IS_IMMEDIATE(nr)) { 186 if (IS_IMMEDIATE(nr)) {
179 asm volatile(LOCK_PREFIX "xorb %1,%0" 187 asm volatile(LOCK_PREFIX "xorb %1,%0"
@@ -194,7 +202,7 @@ static inline void change_bit(int nr, volatile unsigned long *addr)
194 * This operation is atomic and cannot be reordered. 202 * This operation is atomic and cannot be reordered.
195 * It also implies a memory barrier. 203 * It also implies a memory barrier.
196 */ 204 */
197static inline int test_and_set_bit(int nr, volatile unsigned long *addr) 205static inline int test_and_set_bit(long nr, volatile unsigned long *addr)
198{ 206{
199 int oldbit; 207 int oldbit;
200 208
@@ -212,7 +220,7 @@ static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
212 * This is the same as test_and_set_bit on x86. 220 * This is the same as test_and_set_bit on x86.
213 */ 221 */
214static __always_inline int 222static __always_inline int
215test_and_set_bit_lock(int nr, volatile unsigned long *addr) 223test_and_set_bit_lock(long nr, volatile unsigned long *addr)
216{ 224{
217 return test_and_set_bit(nr, addr); 225 return test_and_set_bit(nr, addr);
218} 226}
@@ -226,7 +234,7 @@ test_and_set_bit_lock(int nr, volatile unsigned long *addr)
226 * If two examples of this operation race, one can appear to succeed 234 * If two examples of this operation race, one can appear to succeed
227 * but actually fail. You must protect multiple accesses with a lock. 235 * but actually fail. You must protect multiple accesses with a lock.
228 */ 236 */
229static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) 237static inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
230{ 238{
231 int oldbit; 239 int oldbit;
232 240
@@ -245,7 +253,7 @@ static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
245 * This operation is atomic and cannot be reordered. 253 * This operation is atomic and cannot be reordered.
246 * It also implies a memory barrier. 254 * It also implies a memory barrier.
247 */ 255 */
248static inline int test_and_clear_bit(int nr, volatile unsigned long *addr) 256static inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
249{ 257{
250 int oldbit; 258 int oldbit;
251 259
@@ -272,7 +280,7 @@ static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
272 * accessed from a hypervisor on the same CPU if running in a VM: don't change 280 * accessed from a hypervisor on the same CPU if running in a VM: don't change
273 * this without also updating arch/x86/kernel/kvm.c 281 * this without also updating arch/x86/kernel/kvm.c
274 */ 282 */
275static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) 283static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
276{ 284{
277 int oldbit; 285 int oldbit;
278 286
@@ -284,7 +292,7 @@ static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
284} 292}
285 293
286/* WARNING: non atomic and it can be reordered! */ 294/* WARNING: non atomic and it can be reordered! */
287static inline int __test_and_change_bit(int nr, volatile unsigned long *addr) 295static inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
288{ 296{
289 int oldbit; 297 int oldbit;
290 298
@@ -304,7 +312,7 @@ static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
304 * This operation is atomic and cannot be reordered. 312 * This operation is atomic and cannot be reordered.
305 * It also implies a memory barrier. 313 * It also implies a memory barrier.
306 */ 314 */
307static inline int test_and_change_bit(int nr, volatile unsigned long *addr) 315static inline int test_and_change_bit(long nr, volatile unsigned long *addr)
308{ 316{
309 int oldbit; 317 int oldbit;
310 318
@@ -315,13 +323,13 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
315 return oldbit; 323 return oldbit;
316} 324}
317 325
318static __always_inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr) 326static __always_inline int constant_test_bit(long nr, const volatile unsigned long *addr)
319{ 327{
320 return ((1UL << (nr % BITS_PER_LONG)) & 328 return ((1UL << (nr & (BITS_PER_LONG-1))) &
321 (addr[nr / BITS_PER_LONG])) != 0; 329 (addr[nr >> _BITOPS_LONG_SHIFT])) != 0;
322} 330}
323 331
324static inline int variable_test_bit(int nr, volatile const unsigned long *addr) 332static inline int variable_test_bit(long nr, volatile const unsigned long *addr)
325{ 333{
326 int oldbit; 334 int oldbit;
327 335
diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h
index 2c543fff241b..e7e6751648ed 100644
--- a/arch/x86/include/asm/mutex_64.h
+++ b/arch/x86/include/asm/mutex_64.h
@@ -16,6 +16,20 @@
16 * 16 *
17 * Atomically decrements @v and calls <fail_fn> if the result is negative. 17 * Atomically decrements @v and calls <fail_fn> if the result is negative.
18 */ 18 */
19#ifdef CC_HAVE_ASM_GOTO
20static inline void __mutex_fastpath_lock(atomic_t *v,
21 void (*fail_fn)(atomic_t *))
22{
23 asm volatile goto(LOCK_PREFIX " decl %0\n"
24 " jns %l[exit]\n"
25 : : "m" (v->counter)
26 : "memory", "cc"
27 : exit);
28 fail_fn(v);
29exit:
30 return;
31}
32#else
19#define __mutex_fastpath_lock(v, fail_fn) \ 33#define __mutex_fastpath_lock(v, fail_fn) \
20do { \ 34do { \
21 unsigned long dummy; \ 35 unsigned long dummy; \
@@ -32,6 +46,7 @@ do { \
32 : "rax", "rsi", "rdx", "rcx", \ 46 : "rax", "rsi", "rdx", "rcx", \
33 "r8", "r9", "r10", "r11", "memory"); \ 47 "r8", "r9", "r10", "r11", "memory"); \
34} while (0) 48} while (0)
49#endif
35 50
36/** 51/**
37 * __mutex_fastpath_lock_retval - try to take the lock by moving the count 52 * __mutex_fastpath_lock_retval - try to take the lock by moving the count
@@ -56,6 +71,20 @@ static inline int __mutex_fastpath_lock_retval(atomic_t *count)
56 * 71 *
57 * Atomically increments @v and calls <fail_fn> if the result is nonpositive. 72 * Atomically increments @v and calls <fail_fn> if the result is nonpositive.
58 */ 73 */
74#ifdef CC_HAVE_ASM_GOTO
75static inline void __mutex_fastpath_unlock(atomic_t *v,
76 void (*fail_fn)(atomic_t *))
77{
78 asm volatile goto(LOCK_PREFIX " incl %0\n"
79 " jg %l[exit]\n"
80 : : "m" (v->counter)
81 : "memory", "cc"
82 : exit);
83 fail_fn(v);
84exit:
85 return;
86}
87#else
59#define __mutex_fastpath_unlock(v, fail_fn) \ 88#define __mutex_fastpath_unlock(v, fail_fn) \
60do { \ 89do { \
61 unsigned long dummy; \ 90 unsigned long dummy; \
@@ -72,6 +101,7 @@ do { \
72 : "rax", "rsi", "rdx", "rcx", \ 101 : "rax", "rsi", "rdx", "rcx", \
73 "r8", "r9", "r10", "r11", "memory"); \ 102 "r8", "r9", "r10", "r11", "memory"); \
74} while (0) 103} while (0)
104#endif
75 105
76#define __mutex_slowpath_needs_to_unlock() 1 106#define __mutex_slowpath_needs_to_unlock() 1
77 107
diff --git a/arch/x86/include/asm/sync_bitops.h b/arch/x86/include/asm/sync_bitops.h
index 9d09b4073b60..05af3b31d522 100644
--- a/arch/x86/include/asm/sync_bitops.h
+++ b/arch/x86/include/asm/sync_bitops.h
@@ -26,9 +26,9 @@
26 * Note that @nr may be almost arbitrarily large; this function is not 26 * Note that @nr may be almost arbitrarily large; this function is not
27 * restricted to acting on a single-word quantity. 27 * restricted to acting on a single-word quantity.
28 */ 28 */
29static inline void sync_set_bit(int nr, volatile unsigned long *addr) 29static inline void sync_set_bit(long nr, volatile unsigned long *addr)
30{ 30{
31 asm volatile("lock; btsl %1,%0" 31 asm volatile("lock; bts %1,%0"
32 : "+m" (ADDR) 32 : "+m" (ADDR)
33 : "Ir" (nr) 33 : "Ir" (nr)
34 : "memory"); 34 : "memory");
@@ -44,9 +44,9 @@ static inline void sync_set_bit(int nr, volatile unsigned long *addr)
44 * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() 44 * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
45 * in order to ensure changes are visible on other processors. 45 * in order to ensure changes are visible on other processors.
46 */ 46 */
47static inline void sync_clear_bit(int nr, volatile unsigned long *addr) 47static inline void sync_clear_bit(long nr, volatile unsigned long *addr)
48{ 48{
49 asm volatile("lock; btrl %1,%0" 49 asm volatile("lock; btr %1,%0"
50 : "+m" (ADDR) 50 : "+m" (ADDR)
51 : "Ir" (nr) 51 : "Ir" (nr)
52 : "memory"); 52 : "memory");
@@ -61,9 +61,9 @@ static inline void sync_clear_bit(int nr, volatile unsigned long *addr)
61 * Note that @nr may be almost arbitrarily large; this function is not 61 * Note that @nr may be almost arbitrarily large; this function is not
62 * restricted to acting on a single-word quantity. 62 * restricted to acting on a single-word quantity.
63 */ 63 */
64static inline void sync_change_bit(int nr, volatile unsigned long *addr) 64static inline void sync_change_bit(long nr, volatile unsigned long *addr)
65{ 65{
66 asm volatile("lock; btcl %1,%0" 66 asm volatile("lock; btc %1,%0"
67 : "+m" (ADDR) 67 : "+m" (ADDR)
68 : "Ir" (nr) 68 : "Ir" (nr)
69 : "memory"); 69 : "memory");
@@ -77,11 +77,11 @@ static inline void sync_change_bit(int nr, volatile unsigned long *addr)
77 * This operation is atomic and cannot be reordered. 77 * This operation is atomic and cannot be reordered.
78 * It also implies a memory barrier. 78 * It also implies a memory barrier.
79 */ 79 */
80static inline int sync_test_and_set_bit(int nr, volatile unsigned long *addr) 80static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr)
81{ 81{
82 int oldbit; 82 int oldbit;
83 83
84 asm volatile("lock; btsl %2,%1\n\tsbbl %0,%0" 84 asm volatile("lock; bts %2,%1\n\tsbbl %0,%0"
85 : "=r" (oldbit), "+m" (ADDR) 85 : "=r" (oldbit), "+m" (ADDR)
86 : "Ir" (nr) : "memory"); 86 : "Ir" (nr) : "memory");
87 return oldbit; 87 return oldbit;
@@ -95,11 +95,11 @@ static inline int sync_test_and_set_bit(int nr, volatile unsigned long *addr)
95 * This operation is atomic and cannot be reordered. 95 * This operation is atomic and cannot be reordered.
96 * It also implies a memory barrier. 96 * It also implies a memory barrier.
97 */ 97 */
98static inline int sync_test_and_clear_bit(int nr, volatile unsigned long *addr) 98static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
99{ 99{
100 int oldbit; 100 int oldbit;
101 101
102 asm volatile("lock; btrl %2,%1\n\tsbbl %0,%0" 102 asm volatile("lock; btr %2,%1\n\tsbbl %0,%0"
103 : "=r" (oldbit), "+m" (ADDR) 103 : "=r" (oldbit), "+m" (ADDR)
104 : "Ir" (nr) : "memory"); 104 : "Ir" (nr) : "memory");
105 return oldbit; 105 return oldbit;
@@ -113,11 +113,11 @@ static inline int sync_test_and_clear_bit(int nr, volatile unsigned long *addr)
113 * This operation is atomic and cannot be reordered. 113 * This operation is atomic and cannot be reordered.
114 * It also implies a memory barrier. 114 * It also implies a memory barrier.
115 */ 115 */
116static inline int sync_test_and_change_bit(int nr, volatile unsigned long *addr) 116static inline int sync_test_and_change_bit(long nr, volatile unsigned long *addr)
117{ 117{
118 int oldbit; 118 int oldbit;
119 119
120 asm volatile("lock; btcl %2,%1\n\tsbbl %0,%0" 120 asm volatile("lock; btc %2,%1\n\tsbbl %0,%0"
121 : "=r" (oldbit), "+m" (ADDR) 121 : "=r" (oldbit), "+m" (ADDR)
122 : "Ir" (nr) : "memory"); 122 : "Ir" (nr) : "memory");
123 return oldbit; 123 return oldbit;
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 5ee26875baea..5838fa911aa0 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -153,16 +153,19 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
153 * Careful: we have to cast the result to the type of the pointer 153 * Careful: we have to cast the result to the type of the pointer
154 * for sign reasons. 154 * for sign reasons.
155 * 155 *
156 * The use of %edx as the register specifier is a bit of a 156 * The use of _ASM_DX as the register specifier is a bit of a
157 * simplification, as gcc only cares about it as the starting point 157 * simplification, as gcc only cares about it as the starting point
158 * and not size: for a 64-bit value it will use %ecx:%edx on 32 bits 158 * and not size: for a 64-bit value it will use %ecx:%edx on 32 bits
159 * (%ecx being the next register in gcc's x86 register sequence), and 159 * (%ecx being the next register in gcc's x86 register sequence), and
160 * %rdx on 64 bits. 160 * %rdx on 64 bits.
161 *
162 * Clang/LLVM cares about the size of the register, but still wants
163 * the base register for something that ends up being a pair.
161 */ 164 */
162#define get_user(x, ptr) \ 165#define get_user(x, ptr) \
163({ \ 166({ \
164 int __ret_gu; \ 167 int __ret_gu; \
165 register __inttype(*(ptr)) __val_gu asm("%edx"); \ 168 register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \
166 __chk_user_ptr(ptr); \ 169 __chk_user_ptr(ptr); \
167 might_fault(); \ 170 might_fault(); \
168 asm volatile("call __get_user_%P3" \ 171 asm volatile("call __get_user_%P3" \
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 5d7e51f3fd28..533a85e3a07e 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -1,10 +1,8 @@
1# x86 Opcode Maps 1# x86 Opcode Maps
2# 2#
3# This is (mostly) based on following documentations. 3# This is (mostly) based on following documentations.
4# - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2 4# - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2C
5# (#325383-040US, October 2011) 5# (#326018-047US, June 2013)
6# - Intel(R) Advanced Vector Extensions Programming Reference
7# (#319433-011,JUNE 2011).
8# 6#
9#<Opcode maps> 7#<Opcode maps>
10# Table: table-name 8# Table: table-name
@@ -29,6 +27,7 @@
29# - (F3): the last prefix is 0xF3 27# - (F3): the last prefix is 0xF3
30# - (F2): the last prefix is 0xF2 28# - (F2): the last prefix is 0xF2
31# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case) 29# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case)
30# - (66&F2): Both 0x66 and 0xF2 prefixes are specified.
32 31
33Table: one byte opcode 32Table: one byte opcode
34Referrer: 33Referrer:
@@ -246,8 +245,8 @@ c2: RETN Iw (f64)
246c3: RETN 245c3: RETN
247c4: LES Gz,Mp (i64) | VEX+2byte (Prefix) 246c4: LES Gz,Mp (i64) | VEX+2byte (Prefix)
248c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix) 247c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix)
249c6: Grp11 Eb,Ib (1A) 248c6: Grp11A Eb,Ib (1A)
250c7: Grp11 Ev,Iz (1A) 249c7: Grp11B Ev,Iz (1A)
251c8: ENTER Iw,Ib 250c8: ENTER Iw,Ib
252c9: LEAVE (d64) 251c9: LEAVE (d64)
253ca: RETF Iw 252ca: RETF Iw
@@ -293,8 +292,8 @@ ef: OUT DX,eAX
293# 0xf0 - 0xff 292# 0xf0 - 0xff
294f0: LOCK (Prefix) 293f0: LOCK (Prefix)
295f1: 294f1:
296f2: REPNE (Prefix) 295f2: REPNE (Prefix) | XACQUIRE (Prefix)
297f3: REP/REPE (Prefix) 296f3: REP/REPE (Prefix) | XRELEASE (Prefix)
298f4: HLT 297f4: HLT
299f5: CMC 298f5: CMC
300f6: Grp3_1 Eb (1A) 299f6: Grp3_1 Eb (1A)
@@ -326,7 +325,8 @@ AVXcode: 1
3260a: 3250a:
3270b: UD2 (1B) 3260b: UD2 (1B)
3280c: 3270c:
3290d: NOP Ev | GrpP 328# AMD's prefetch group. Intel supports prefetchw(/1) only.
3290d: GrpP
3300e: FEMMS 3300e: FEMMS
331# 3DNow! uses the last imm byte as opcode extension. 331# 3DNow! uses the last imm byte as opcode extension.
3320f: 3DNow! Pq,Qq,Ib 3320f: 3DNow! Pq,Qq,Ib
@@ -729,12 +729,12 @@ dc: VAESENC Vdq,Hdq,Wdq (66),(v1)
729dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) 729dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1)
730de: VAESDEC Vdq,Hdq,Wdq (66),(v1) 730de: VAESDEC Vdq,Hdq,Wdq (66),(v1)
731df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1) 731df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1)
732f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) 732f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2)
733f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) 733f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2)
734f2: ANDN Gy,By,Ey (v) 734f2: ANDN Gy,By,Ey (v)
735f3: Grp17 (1A) 735f3: Grp17 (1A)
736f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) 736f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v)
737f6: MULX By,Gy,rDX,Ey (F2),(v) 737f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v)
738f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) 738f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v)
739EndTable 739EndTable
740 740
@@ -861,8 +861,8 @@ EndTable
861 861
862GrpTable: Grp7 862GrpTable: Grp7
8630: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) 8630: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B)
8641: SIDT Ms | MONITOR (000),(11B) | MWAIT (001) 8641: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B)
8652: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) 8652: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B)
8663: LIDT Ms 8663: LIDT Ms
8674: SMSW Mw/Rv 8674: SMSW Mw/Rv
8685: 8685:
@@ -880,15 +880,21 @@ EndTable
880GrpTable: Grp9 880GrpTable: Grp9
8811: CMPXCHG8B/16B Mq/Mdq 8811: CMPXCHG8B/16B Mq/Mdq
8826: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) 8826: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B)
8837: VMPTRST Mq | VMPTRST Mq (F3) 8837: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B)
884EndTable 884EndTable
885 885
886GrpTable: Grp10 886GrpTable: Grp10
887EndTable 887EndTable
888 888
889GrpTable: Grp11 889# Grp11A and Grp11B are expressed as Grp11 in Intel SDM
890# Note: the operands are given by group opcode 890GrpTable: Grp11A
8910: MOV 8910: MOV Eb,Ib
8927: XABORT Ib (000),(11B)
893EndTable
894
895GrpTable: Grp11B
8960: MOV Eb,Iz
8977: XBEGIN Jz (000),(11B)
892EndTable 898EndTable
893 899
894GrpTable: Grp12 900GrpTable: Grp12
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk
index e6773dc8ac41..093a892026f9 100644
--- a/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/arch/x86/tools/gen-insn-attr-x86.awk
@@ -68,7 +68,7 @@ BEGIN {
68 68
69 lprefix1_expr = "\\((66|!F3)\\)" 69 lprefix1_expr = "\\((66|!F3)\\)"
70 lprefix2_expr = "\\(F3\\)" 70 lprefix2_expr = "\\(F3\\)"
71 lprefix3_expr = "\\((F2|!F3)\\)" 71 lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)"
72 lprefix_expr = "\\((66|F2|F3)\\)" 72 lprefix_expr = "\\((66|F2|F3)\\)"
73 max_lprefix = 4 73 max_lprefix = 4
74 74
@@ -83,6 +83,8 @@ BEGIN {
83 prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" 83 prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
84 prefix_num["REPNE"] = "INAT_PFX_REPNE" 84 prefix_num["REPNE"] = "INAT_PFX_REPNE"
85 prefix_num["REP/REPE"] = "INAT_PFX_REPE" 85 prefix_num["REP/REPE"] = "INAT_PFX_REPE"
86 prefix_num["XACQUIRE"] = "INAT_PFX_REPNE"
87 prefix_num["XRELEASE"] = "INAT_PFX_REPE"
86 prefix_num["LOCK"] = "INAT_PFX_LOCK" 88 prefix_num["LOCK"] = "INAT_PFX_LOCK"
87 prefix_num["SEG=CS"] = "INAT_PFX_CS" 89 prefix_num["SEG=CS"] = "INAT_PFX_CS"
88 prefix_num["SEG=DS"] = "INAT_PFX_DS" 90 prefix_num["SEG=DS"] = "INAT_PFX_DS"