diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-06 16:59:14 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-06 16:59:14 -0500 |
commit | 69734b644bf19f20d2989e1a8e5bf59c837ee5c1 (patch) | |
tree | b1afd22d6e84db04516e466c223d67c1c340e6d9 /arch/x86/include/asm | |
parent | 67b0243131150391125d8d0beb5359d7aec78b55 (diff) | |
parent | ceb7b40b65539a771d1bfaf47660ac0ee57e0c4f (diff) |
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits)
x86: Fix atomic64_xxx_cx8() functions
x86: Fix and improve cmpxchg_double{,_local}()
x86_64, asm: Optimise fls(), ffs() and fls64()
x86, bitops: Move fls64.h inside __KERNEL__
x86: Fix and improve percpu_cmpxchg{8,16}b_double()
x86: Report cpb and eff_freq_ro flags correctly
x86/i386: Use less assembly in strlen(), speed things up a bit
x86: Use the same node_distance for 32 and 64-bit
x86: Fix rflags in FAKE_STACK_FRAME
x86: Clean up and extend do_int3()
x86: Call do_notify_resume() with interrupts enabled
x86/div64: Add a micro-optimization shortcut if base is power of two
x86-64: Cleanup some assembly entry points
x86-64: Slightly shorten line system call entry and exit paths
x86-64: Reduce amount of redundant code generated for invalidate_interruptNN
x86-64: Slightly shorten int_ret_from_sys_call
x86, efi: Convert efi_phys_get_time() args to physical addresses
x86: Default to vsyscall=emulate
x86-64: Set siginfo and context on vsyscall emulation faults
x86: consolidate xchg and xadd macros
...
Diffstat (limited to 'arch/x86/include/asm')
-rw-r--r-- | arch/x86/include/asm/alternative-asm.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/bitops.h | 76 | ||||
-rw-r--r-- | arch/x86/include/asm/cmpxchg.h | 163 | ||||
-rw-r--r-- | arch/x86/include/asm/cmpxchg_32.h | 46 | ||||
-rw-r--r-- | arch/x86/include/asm/cmpxchg_64.h | 43 | ||||
-rw-r--r-- | arch/x86/include/asm/div64.h | 22 | ||||
-rw-r--r-- | arch/x86/include/asm/percpu.h | 53 | ||||
-rw-r--r-- | arch/x86/include/asm/processor-flags.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/spinlock.h | 15 | ||||
-rw-r--r-- | arch/x86/include/asm/thread_info.h | 9 | ||||
-rw-r--r-- | arch/x86/include/asm/topology.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/uaccess.h | 2 |
12 files changed, 204 insertions, 232 deletions
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index 091508b533b4..952bd0100c5c 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h | |||
@@ -4,10 +4,10 @@ | |||
4 | 4 | ||
5 | #ifdef CONFIG_SMP | 5 | #ifdef CONFIG_SMP |
6 | .macro LOCK_PREFIX | 6 | .macro LOCK_PREFIX |
7 | 1: lock | 7 | 672: lock |
8 | .section .smp_locks,"a" | 8 | .section .smp_locks,"a" |
9 | .balign 4 | 9 | .balign 4 |
10 | .long 1b - . | 10 | .long 672b - . |
11 | .previous | 11 | .previous |
12 | .endm | 12 | .endm |
13 | #else | 13 | #else |
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 1775d6e5920e..b97596e2b68c 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h | |||
@@ -380,6 +380,8 @@ static inline unsigned long __fls(unsigned long word) | |||
380 | return word; | 380 | return word; |
381 | } | 381 | } |
382 | 382 | ||
383 | #undef ADDR | ||
384 | |||
383 | #ifdef __KERNEL__ | 385 | #ifdef __KERNEL__ |
384 | /** | 386 | /** |
385 | * ffs - find first set bit in word | 387 | * ffs - find first set bit in word |
@@ -395,10 +397,25 @@ static inline unsigned long __fls(unsigned long word) | |||
395 | static inline int ffs(int x) | 397 | static inline int ffs(int x) |
396 | { | 398 | { |
397 | int r; | 399 | int r; |
398 | #ifdef CONFIG_X86_CMOV | 400 | |
401 | #ifdef CONFIG_X86_64 | ||
402 | /* | ||
403 | * AMD64 says BSFL won't clobber the dest reg if x==0; Intel64 says the | ||
404 | * dest reg is undefined if x==0, but their CPU architect says its | ||
405 | * value is written to set it to the same as before, except that the | ||
406 | * top 32 bits will be cleared. | ||
407 | * | ||
408 | * We cannot do this on 32 bits because at the very least some | ||
409 | * 486 CPUs did not behave this way. | ||
410 | */ | ||
411 | long tmp = -1; | ||
412 | asm("bsfl %1,%0" | ||
413 | : "=r" (r) | ||
414 | : "rm" (x), "0" (tmp)); | ||
415 | #elif defined(CONFIG_X86_CMOV) | ||
399 | asm("bsfl %1,%0\n\t" | 416 | asm("bsfl %1,%0\n\t" |
400 | "cmovzl %2,%0" | 417 | "cmovzl %2,%0" |
401 | : "=r" (r) : "rm" (x), "r" (-1)); | 418 | : "=&r" (r) : "rm" (x), "r" (-1)); |
402 | #else | 419 | #else |
403 | asm("bsfl %1,%0\n\t" | 420 | asm("bsfl %1,%0\n\t" |
404 | "jnz 1f\n\t" | 421 | "jnz 1f\n\t" |
@@ -422,7 +439,22 @@ static inline int ffs(int x) | |||
422 | static inline int fls(int x) | 439 | static inline int fls(int x) |
423 | { | 440 | { |
424 | int r; | 441 | int r; |
425 | #ifdef CONFIG_X86_CMOV | 442 | |
443 | #ifdef CONFIG_X86_64 | ||
444 | /* | ||
445 | * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the | ||
446 | * dest reg is undefined if x==0, but their CPU architect says its | ||
447 | * value is written to set it to the same as before, except that the | ||
448 | * top 32 bits will be cleared. | ||
449 | * | ||
450 | * We cannot do this on 32 bits because at the very least some | ||
451 | * 486 CPUs did not behave this way. | ||
452 | */ | ||
453 | long tmp = -1; | ||
454 | asm("bsrl %1,%0" | ||
455 | : "=r" (r) | ||
456 | : "rm" (x), "0" (tmp)); | ||
457 | #elif defined(CONFIG_X86_CMOV) | ||
426 | asm("bsrl %1,%0\n\t" | 458 | asm("bsrl %1,%0\n\t" |
427 | "cmovzl %2,%0" | 459 | "cmovzl %2,%0" |
428 | : "=&r" (r) : "rm" (x), "rm" (-1)); | 460 | : "=&r" (r) : "rm" (x), "rm" (-1)); |
@@ -434,11 +466,35 @@ static inline int fls(int x) | |||
434 | #endif | 466 | #endif |
435 | return r + 1; | 467 | return r + 1; |
436 | } | 468 | } |
437 | #endif /* __KERNEL__ */ | ||
438 | |||
439 | #undef ADDR | ||
440 | 469 | ||
441 | #ifdef __KERNEL__ | 470 | /** |
471 | * fls64 - find last set bit in a 64-bit word | ||
472 | * @x: the word to search | ||
473 | * | ||
474 | * This is defined in a similar way as the libc and compiler builtin | ||
475 | * ffsll, but returns the position of the most significant set bit. | ||
476 | * | ||
477 | * fls64(value) returns 0 if value is 0 or the position of the last | ||
478 | * set bit if value is nonzero. The last (most significant) bit is | ||
479 | * at position 64. | ||
480 | */ | ||
481 | #ifdef CONFIG_X86_64 | ||
482 | static __always_inline int fls64(__u64 x) | ||
483 | { | ||
484 | long bitpos = -1; | ||
485 | /* | ||
486 | * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the | ||
487 | * dest reg is undefined if x==0, but their CPU architect says its | ||
488 | * value is written to set it to the same as before. | ||
489 | */ | ||
490 | asm("bsrq %1,%0" | ||
491 | : "+r" (bitpos) | ||
492 | : "rm" (x)); | ||
493 | return bitpos + 1; | ||
494 | } | ||
495 | #else | ||
496 | #include <asm-generic/bitops/fls64.h> | ||
497 | #endif | ||
442 | 498 | ||
443 | #include <asm-generic/bitops/find.h> | 499 | #include <asm-generic/bitops/find.h> |
444 | 500 | ||
@@ -450,12 +506,6 @@ static inline int fls(int x) | |||
450 | 506 | ||
451 | #include <asm-generic/bitops/const_hweight.h> | 507 | #include <asm-generic/bitops/const_hweight.h> |
452 | 508 | ||
453 | #endif /* __KERNEL__ */ | ||
454 | |||
455 | #include <asm-generic/bitops/fls64.h> | ||
456 | |||
457 | #ifdef __KERNEL__ | ||
458 | |||
459 | #include <asm-generic/bitops/le.h> | 509 | #include <asm-generic/bitops/le.h> |
460 | 510 | ||
461 | #include <asm-generic/bitops/ext2-atomic-setbit.h> | 511 | #include <asm-generic/bitops/ext2-atomic-setbit.h> |
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index 5d3acdf5a7a6..0c9fa2745f13 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h | |||
@@ -14,6 +14,8 @@ extern void __cmpxchg_wrong_size(void) | |||
14 | __compiletime_error("Bad argument size for cmpxchg"); | 14 | __compiletime_error("Bad argument size for cmpxchg"); |
15 | extern void __xadd_wrong_size(void) | 15 | extern void __xadd_wrong_size(void) |
16 | __compiletime_error("Bad argument size for xadd"); | 16 | __compiletime_error("Bad argument size for xadd"); |
17 | extern void __add_wrong_size(void) | ||
18 | __compiletime_error("Bad argument size for add"); | ||
17 | 19 | ||
18 | /* | 20 | /* |
19 | * Constants for operation sizes. On 32-bit, the 64-bit size it set to | 21 | * Constants for operation sizes. On 32-bit, the 64-bit size it set to |
@@ -31,60 +33,47 @@ extern void __xadd_wrong_size(void) | |||
31 | #define __X86_CASE_Q -1 /* sizeof will never return -1 */ | 33 | #define __X86_CASE_Q -1 /* sizeof will never return -1 */ |
32 | #endif | 34 | #endif |
33 | 35 | ||
36 | /* | ||
37 | * An exchange-type operation, which takes a value and a pointer, and | ||
38 | * returns a the old value. | ||
39 | */ | ||
40 | #define __xchg_op(ptr, arg, op, lock) \ | ||
41 | ({ \ | ||
42 | __typeof__ (*(ptr)) __ret = (arg); \ | ||
43 | switch (sizeof(*(ptr))) { \ | ||
44 | case __X86_CASE_B: \ | ||
45 | asm volatile (lock #op "b %b0, %1\n" \ | ||
46 | : "+r" (__ret), "+m" (*(ptr)) \ | ||
47 | : : "memory", "cc"); \ | ||
48 | break; \ | ||
49 | case __X86_CASE_W: \ | ||
50 | asm volatile (lock #op "w %w0, %1\n" \ | ||
51 | : "+r" (__ret), "+m" (*(ptr)) \ | ||
52 | : : "memory", "cc"); \ | ||
53 | break; \ | ||
54 | case __X86_CASE_L: \ | ||
55 | asm volatile (lock #op "l %0, %1\n" \ | ||
56 | : "+r" (__ret), "+m" (*(ptr)) \ | ||
57 | : : "memory", "cc"); \ | ||
58 | break; \ | ||
59 | case __X86_CASE_Q: \ | ||
60 | asm volatile (lock #op "q %q0, %1\n" \ | ||
61 | : "+r" (__ret), "+m" (*(ptr)) \ | ||
62 | : : "memory", "cc"); \ | ||
63 | break; \ | ||
64 | default: \ | ||
65 | __ ## op ## _wrong_size(); \ | ||
66 | } \ | ||
67 | __ret; \ | ||
68 | }) | ||
69 | |||
34 | /* | 70 | /* |
35 | * Note: no "lock" prefix even on SMP: xchg always implies lock anyway. | 71 | * Note: no "lock" prefix even on SMP: xchg always implies lock anyway. |
36 | * Since this is generally used to protect other memory information, we | 72 | * Since this is generally used to protect other memory information, we |
37 | * use "asm volatile" and "memory" clobbers to prevent gcc from moving | 73 | * use "asm volatile" and "memory" clobbers to prevent gcc from moving |
38 | * information around. | 74 | * information around. |
39 | */ | 75 | */ |
40 | #define __xchg(x, ptr, size) \ | 76 | #define xchg(ptr, v) __xchg_op((ptr), (v), xchg, "") |
41 | ({ \ | ||
42 | __typeof(*(ptr)) __x = (x); \ | ||
43 | switch (size) { \ | ||
44 | case __X86_CASE_B: \ | ||
45 | { \ | ||
46 | volatile u8 *__ptr = (volatile u8 *)(ptr); \ | ||
47 | asm volatile("xchgb %0,%1" \ | ||
48 | : "=q" (__x), "+m" (*__ptr) \ | ||
49 | : "0" (__x) \ | ||
50 | : "memory"); \ | ||
51 | break; \ | ||
52 | } \ | ||
53 | case __X86_CASE_W: \ | ||
54 | { \ | ||
55 | volatile u16 *__ptr = (volatile u16 *)(ptr); \ | ||
56 | asm volatile("xchgw %0,%1" \ | ||
57 | : "=r" (__x), "+m" (*__ptr) \ | ||
58 | : "0" (__x) \ | ||
59 | : "memory"); \ | ||
60 | break; \ | ||
61 | } \ | ||
62 | case __X86_CASE_L: \ | ||
63 | { \ | ||
64 | volatile u32 *__ptr = (volatile u32 *)(ptr); \ | ||
65 | asm volatile("xchgl %0,%1" \ | ||
66 | : "=r" (__x), "+m" (*__ptr) \ | ||
67 | : "0" (__x) \ | ||
68 | : "memory"); \ | ||
69 | break; \ | ||
70 | } \ | ||
71 | case __X86_CASE_Q: \ | ||
72 | { \ | ||
73 | volatile u64 *__ptr = (volatile u64 *)(ptr); \ | ||
74 | asm volatile("xchgq %0,%1" \ | ||
75 | : "=r" (__x), "+m" (*__ptr) \ | ||
76 | : "0" (__x) \ | ||
77 | : "memory"); \ | ||
78 | break; \ | ||
79 | } \ | ||
80 | default: \ | ||
81 | __xchg_wrong_size(); \ | ||
82 | } \ | ||
83 | __x; \ | ||
84 | }) | ||
85 | |||
86 | #define xchg(ptr, v) \ | ||
87 | __xchg((v), (ptr), sizeof(*ptr)) | ||
88 | 77 | ||
89 | /* | 78 | /* |
90 | * Atomic compare and exchange. Compare OLD with MEM, if identical, | 79 | * Atomic compare and exchange. Compare OLD with MEM, if identical, |
@@ -165,46 +154,80 @@ extern void __xadd_wrong_size(void) | |||
165 | __cmpxchg_local((ptr), (old), (new), sizeof(*ptr)) | 154 | __cmpxchg_local((ptr), (old), (new), sizeof(*ptr)) |
166 | #endif | 155 | #endif |
167 | 156 | ||
168 | #define __xadd(ptr, inc, lock) \ | 157 | /* |
158 | * xadd() adds "inc" to "*ptr" and atomically returns the previous | ||
159 | * value of "*ptr". | ||
160 | * | ||
161 | * xadd() is locked when multiple CPUs are online | ||
162 | * xadd_sync() is always locked | ||
163 | * xadd_local() is never locked | ||
164 | */ | ||
165 | #define __xadd(ptr, inc, lock) __xchg_op((ptr), (inc), xadd, lock) | ||
166 | #define xadd(ptr, inc) __xadd((ptr), (inc), LOCK_PREFIX) | ||
167 | #define xadd_sync(ptr, inc) __xadd((ptr), (inc), "lock; ") | ||
168 | #define xadd_local(ptr, inc) __xadd((ptr), (inc), "") | ||
169 | |||
170 | #define __add(ptr, inc, lock) \ | ||
169 | ({ \ | 171 | ({ \ |
170 | __typeof__ (*(ptr)) __ret = (inc); \ | 172 | __typeof__ (*(ptr)) __ret = (inc); \ |
171 | switch (sizeof(*(ptr))) { \ | 173 | switch (sizeof(*(ptr))) { \ |
172 | case __X86_CASE_B: \ | 174 | case __X86_CASE_B: \ |
173 | asm volatile (lock "xaddb %b0, %1\n" \ | 175 | asm volatile (lock "addb %b1, %0\n" \ |
174 | : "+r" (__ret), "+m" (*(ptr)) \ | 176 | : "+m" (*(ptr)) : "ri" (inc) \ |
175 | : : "memory", "cc"); \ | 177 | : "memory", "cc"); \ |
176 | break; \ | 178 | break; \ |
177 | case __X86_CASE_W: \ | 179 | case __X86_CASE_W: \ |
178 | asm volatile (lock "xaddw %w0, %1\n" \ | 180 | asm volatile (lock "addw %w1, %0\n" \ |
179 | : "+r" (__ret), "+m" (*(ptr)) \ | 181 | : "+m" (*(ptr)) : "ri" (inc) \ |
180 | : : "memory", "cc"); \ | 182 | : "memory", "cc"); \ |
181 | break; \ | 183 | break; \ |
182 | case __X86_CASE_L: \ | 184 | case __X86_CASE_L: \ |
183 | asm volatile (lock "xaddl %0, %1\n" \ | 185 | asm volatile (lock "addl %1, %0\n" \ |
184 | : "+r" (__ret), "+m" (*(ptr)) \ | 186 | : "+m" (*(ptr)) : "ri" (inc) \ |
185 | : : "memory", "cc"); \ | 187 | : "memory", "cc"); \ |
186 | break; \ | 188 | break; \ |
187 | case __X86_CASE_Q: \ | 189 | case __X86_CASE_Q: \ |
188 | asm volatile (lock "xaddq %q0, %1\n" \ | 190 | asm volatile (lock "addq %1, %0\n" \ |
189 | : "+r" (__ret), "+m" (*(ptr)) \ | 191 | : "+m" (*(ptr)) : "ri" (inc) \ |
190 | : : "memory", "cc"); \ | 192 | : "memory", "cc"); \ |
191 | break; \ | 193 | break; \ |
192 | default: \ | 194 | default: \ |
193 | __xadd_wrong_size(); \ | 195 | __add_wrong_size(); \ |
194 | } \ | 196 | } \ |
195 | __ret; \ | 197 | __ret; \ |
196 | }) | 198 | }) |
197 | 199 | ||
198 | /* | 200 | /* |
199 | * xadd() adds "inc" to "*ptr" and atomically returns the previous | 201 | * add_*() adds "inc" to "*ptr" |
200 | * value of "*ptr". | ||
201 | * | 202 | * |
202 | * xadd() is locked when multiple CPUs are online | 203 | * __add() takes a lock prefix |
203 | * xadd_sync() is always locked | 204 | * add_smp() is locked when multiple CPUs are online |
204 | * xadd_local() is never locked | 205 | * add_sync() is always locked |
205 | */ | 206 | */ |
206 | #define xadd(ptr, inc) __xadd((ptr), (inc), LOCK_PREFIX) | 207 | #define add_smp(ptr, inc) __add((ptr), (inc), LOCK_PREFIX) |
207 | #define xadd_sync(ptr, inc) __xadd((ptr), (inc), "lock; ") | 208 | #define add_sync(ptr, inc) __add((ptr), (inc), "lock; ") |
208 | #define xadd_local(ptr, inc) __xadd((ptr), (inc), "") | 209 | |
210 | #define __cmpxchg_double(pfx, p1, p2, o1, o2, n1, n2) \ | ||
211 | ({ \ | ||
212 | bool __ret; \ | ||
213 | __typeof__(*(p1)) __old1 = (o1), __new1 = (n1); \ | ||
214 | __typeof__(*(p2)) __old2 = (o2), __new2 = (n2); \ | ||
215 | BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \ | ||
216 | BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \ | ||
217 | VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long))); \ | ||
218 | VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2)); \ | ||
219 | asm volatile(pfx "cmpxchg%c4b %2; sete %0" \ | ||
220 | : "=a" (__ret), "+d" (__old2), \ | ||
221 | "+m" (*(p1)), "+m" (*(p2)) \ | ||
222 | : "i" (2 * sizeof(long)), "a" (__old1), \ | ||
223 | "b" (__new1), "c" (__new2)); \ | ||
224 | __ret; \ | ||
225 | }) | ||
226 | |||
227 | #define cmpxchg_double(p1, p2, o1, o2, n1, n2) \ | ||
228 | __cmpxchg_double(LOCK_PREFIX, p1, p2, o1, o2, n1, n2) | ||
229 | |||
230 | #define cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \ | ||
231 | __cmpxchg_double(, p1, p2, o1, o2, n1, n2) | ||
209 | 232 | ||
210 | #endif /* ASM_X86_CMPXCHG_H */ | 233 | #endif /* ASM_X86_CMPXCHG_H */ |
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index fbebb07dd80b..53f4b219336b 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h | |||
@@ -166,52 +166,6 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old, | |||
166 | 166 | ||
167 | #endif | 167 | #endif |
168 | 168 | ||
169 | #define cmpxchg8b(ptr, o1, o2, n1, n2) \ | ||
170 | ({ \ | ||
171 | char __ret; \ | ||
172 | __typeof__(o2) __dummy; \ | ||
173 | __typeof__(*(ptr)) __old1 = (o1); \ | ||
174 | __typeof__(o2) __old2 = (o2); \ | ||
175 | __typeof__(*(ptr)) __new1 = (n1); \ | ||
176 | __typeof__(o2) __new2 = (n2); \ | ||
177 | asm volatile(LOCK_PREFIX "cmpxchg8b %2; setz %1" \ | ||
178 | : "=d"(__dummy), "=a" (__ret), "+m" (*ptr)\ | ||
179 | : "a" (__old1), "d"(__old2), \ | ||
180 | "b" (__new1), "c" (__new2) \ | ||
181 | : "memory"); \ | ||
182 | __ret; }) | ||
183 | |||
184 | |||
185 | #define cmpxchg8b_local(ptr, o1, o2, n1, n2) \ | ||
186 | ({ \ | ||
187 | char __ret; \ | ||
188 | __typeof__(o2) __dummy; \ | ||
189 | __typeof__(*(ptr)) __old1 = (o1); \ | ||
190 | __typeof__(o2) __old2 = (o2); \ | ||
191 | __typeof__(*(ptr)) __new1 = (n1); \ | ||
192 | __typeof__(o2) __new2 = (n2); \ | ||
193 | asm volatile("cmpxchg8b %2; setz %1" \ | ||
194 | : "=d"(__dummy), "=a"(__ret), "+m" (*ptr)\ | ||
195 | : "a" (__old), "d"(__old2), \ | ||
196 | "b" (__new1), "c" (__new2), \ | ||
197 | : "memory"); \ | ||
198 | __ret; }) | ||
199 | |||
200 | |||
201 | #define cmpxchg_double(ptr, o1, o2, n1, n2) \ | ||
202 | ({ \ | ||
203 | BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ | ||
204 | VM_BUG_ON((unsigned long)(ptr) % 8); \ | ||
205 | cmpxchg8b((ptr), (o1), (o2), (n1), (n2)); \ | ||
206 | }) | ||
207 | |||
208 | #define cmpxchg_double_local(ptr, o1, o2, n1, n2) \ | ||
209 | ({ \ | ||
210 | BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ | ||
211 | VM_BUG_ON((unsigned long)(ptr) % 8); \ | ||
212 | cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2)); \ | ||
213 | }) | ||
214 | |||
215 | #define system_has_cmpxchg_double() cpu_has_cx8 | 169 | #define system_has_cmpxchg_double() cpu_has_cx8 |
216 | 170 | ||
217 | #endif /* _ASM_X86_CMPXCHG_32_H */ | 171 | #endif /* _ASM_X86_CMPXCHG_32_H */ |
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 285da02c38fa..614be87f1a9b 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h | |||
@@ -20,49 +20,6 @@ static inline void set_64bit(volatile u64 *ptr, u64 val) | |||
20 | cmpxchg_local((ptr), (o), (n)); \ | 20 | cmpxchg_local((ptr), (o), (n)); \ |
21 | }) | 21 | }) |
22 | 22 | ||
23 | #define cmpxchg16b(ptr, o1, o2, n1, n2) \ | ||
24 | ({ \ | ||
25 | char __ret; \ | ||
26 | __typeof__(o2) __junk; \ | ||
27 | __typeof__(*(ptr)) __old1 = (o1); \ | ||
28 | __typeof__(o2) __old2 = (o2); \ | ||
29 | __typeof__(*(ptr)) __new1 = (n1); \ | ||
30 | __typeof__(o2) __new2 = (n2); \ | ||
31 | asm volatile(LOCK_PREFIX "cmpxchg16b %2;setz %1" \ | ||
32 | : "=d"(__junk), "=a"(__ret), "+m" (*ptr) \ | ||
33 | : "b"(__new1), "c"(__new2), \ | ||
34 | "a"(__old1), "d"(__old2)); \ | ||
35 | __ret; }) | ||
36 | |||
37 | |||
38 | #define cmpxchg16b_local(ptr, o1, o2, n1, n2) \ | ||
39 | ({ \ | ||
40 | char __ret; \ | ||
41 | __typeof__(o2) __junk; \ | ||
42 | __typeof__(*(ptr)) __old1 = (o1); \ | ||
43 | __typeof__(o2) __old2 = (o2); \ | ||
44 | __typeof__(*(ptr)) __new1 = (n1); \ | ||
45 | __typeof__(o2) __new2 = (n2); \ | ||
46 | asm volatile("cmpxchg16b %2;setz %1" \ | ||
47 | : "=d"(__junk), "=a"(__ret), "+m" (*ptr) \ | ||
48 | : "b"(__new1), "c"(__new2), \ | ||
49 | "a"(__old1), "d"(__old2)); \ | ||
50 | __ret; }) | ||
51 | |||
52 | #define cmpxchg_double(ptr, o1, o2, n1, n2) \ | ||
53 | ({ \ | ||
54 | BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ | ||
55 | VM_BUG_ON((unsigned long)(ptr) % 16); \ | ||
56 | cmpxchg16b((ptr), (o1), (o2), (n1), (n2)); \ | ||
57 | }) | ||
58 | |||
59 | #define cmpxchg_double_local(ptr, o1, o2, n1, n2) \ | ||
60 | ({ \ | ||
61 | BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ | ||
62 | VM_BUG_ON((unsigned long)(ptr) % 16); \ | ||
63 | cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2)); \ | ||
64 | }) | ||
65 | |||
66 | #define system_has_cmpxchg_double() cpu_has_cx16 | 23 | #define system_has_cmpxchg_double() cpu_has_cx16 |
67 | 24 | ||
68 | #endif /* _ASM_X86_CMPXCHG_64_H */ | 25 | #endif /* _ASM_X86_CMPXCHG_64_H */ |
diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h index 9a2d644c08ef..ced283ac79df 100644 --- a/arch/x86/include/asm/div64.h +++ b/arch/x86/include/asm/div64.h | |||
@@ -4,6 +4,7 @@ | |||
4 | #ifdef CONFIG_X86_32 | 4 | #ifdef CONFIG_X86_32 |
5 | 5 | ||
6 | #include <linux/types.h> | 6 | #include <linux/types.h> |
7 | #include <linux/log2.h> | ||
7 | 8 | ||
8 | /* | 9 | /* |
9 | * do_div() is NOT a C function. It wants to return | 10 | * do_div() is NOT a C function. It wants to return |
@@ -21,15 +22,20 @@ | |||
21 | ({ \ | 22 | ({ \ |
22 | unsigned long __upper, __low, __high, __mod, __base; \ | 23 | unsigned long __upper, __low, __high, __mod, __base; \ |
23 | __base = (base); \ | 24 | __base = (base); \ |
24 | asm("":"=a" (__low), "=d" (__high) : "A" (n)); \ | 25 | if (__builtin_constant_p(__base) && is_power_of_2(__base)) { \ |
25 | __upper = __high; \ | 26 | __mod = n & (__base - 1); \ |
26 | if (__high) { \ | 27 | n >>= ilog2(__base); \ |
27 | __upper = __high % (__base); \ | 28 | } else { \ |
28 | __high = __high / (__base); \ | 29 | asm("" : "=a" (__low), "=d" (__high) : "A" (n));\ |
30 | __upper = __high; \ | ||
31 | if (__high) { \ | ||
32 | __upper = __high % (__base); \ | ||
33 | __high = __high / (__base); \ | ||
34 | } \ | ||
35 | asm("divl %2" : "=a" (__low), "=d" (__mod) \ | ||
36 | : "rm" (__base), "0" (__low), "1" (__upper)); \ | ||
37 | asm("" : "=A" (n) : "a" (__low), "d" (__high)); \ | ||
29 | } \ | 38 | } \ |
30 | asm("divl %2":"=a" (__low), "=d" (__mod) \ | ||
31 | : "rm" (__base), "0" (__low), "1" (__upper)); \ | ||
32 | asm("":"=A" (n) : "a" (__low), "d" (__high)); \ | ||
33 | __mod; \ | 39 | __mod; \ |
34 | }) | 40 | }) |
35 | 41 | ||
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 3470c9d0ebba..529bf07e8067 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -451,23 +451,20 @@ do { \ | |||
451 | #endif /* !CONFIG_M386 */ | 451 | #endif /* !CONFIG_M386 */ |
452 | 452 | ||
453 | #ifdef CONFIG_X86_CMPXCHG64 | 453 | #ifdef CONFIG_X86_CMPXCHG64 |
454 | #define percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) \ | 454 | #define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \ |
455 | ({ \ | 455 | ({ \ |
456 | char __ret; \ | 456 | bool __ret; \ |
457 | typeof(o1) __o1 = o1; \ | 457 | typeof(pcp1) __o1 = (o1), __n1 = (n1); \ |
458 | typeof(o1) __n1 = n1; \ | 458 | typeof(pcp2) __o2 = (o2), __n2 = (n2); \ |
459 | typeof(o2) __o2 = o2; \ | ||
460 | typeof(o2) __n2 = n2; \ | ||
461 | typeof(o2) __dummy = n2; \ | ||
462 | asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \ | 459 | asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \ |
463 | : "=a"(__ret), "=m" (pcp1), "=d"(__dummy) \ | 460 | : "=a" (__ret), "+m" (pcp1), "+m" (pcp2), "+d" (__o2) \ |
464 | : "b"(__n1), "c"(__n2), "a"(__o1), "d"(__o2)); \ | 461 | : "b" (__n1), "c" (__n2), "a" (__o1)); \ |
465 | __ret; \ | 462 | __ret; \ |
466 | }) | 463 | }) |
467 | 464 | ||
468 | #define __this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) | 465 | #define __this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double |
469 | #define this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) | 466 | #define this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double |
470 | #define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) | 467 | #define irqsafe_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double |
471 | #endif /* CONFIG_X86_CMPXCHG64 */ | 468 | #endif /* CONFIG_X86_CMPXCHG64 */ |
472 | 469 | ||
473 | /* | 470 | /* |
@@ -508,31 +505,23 @@ do { \ | |||
508 | * it in software. The address used in the cmpxchg16 instruction must be | 505 | * it in software. The address used in the cmpxchg16 instruction must be |
509 | * aligned to a 16 byte boundary. | 506 | * aligned to a 16 byte boundary. |
510 | */ | 507 | */ |
511 | #ifdef CONFIG_SMP | 508 | #define percpu_cmpxchg16b_double(pcp1, pcp2, o1, o2, n1, n2) \ |
512 | #define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP3 | ||
513 | #else | ||
514 | #define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP2 | ||
515 | #endif | ||
516 | #define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) \ | ||
517 | ({ \ | 509 | ({ \ |
518 | char __ret; \ | 510 | bool __ret; \ |
519 | typeof(o1) __o1 = o1; \ | 511 | typeof(pcp1) __o1 = (o1), __n1 = (n1); \ |
520 | typeof(o1) __n1 = n1; \ | 512 | typeof(pcp2) __o2 = (o2), __n2 = (n2); \ |
521 | typeof(o2) __o2 = o2; \ | 513 | alternative_io("leaq %P1,%%rsi\n\tcall this_cpu_cmpxchg16b_emu\n\t", \ |
522 | typeof(o2) __n2 = n2; \ | 514 | "cmpxchg16b " __percpu_arg(1) "\n\tsetz %0\n\t", \ |
523 | typeof(o2) __dummy; \ | ||
524 | alternative_io(CMPXCHG16B_EMU_CALL, \ | ||
525 | "cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t", \ | ||
526 | X86_FEATURE_CX16, \ | 515 | X86_FEATURE_CX16, \ |
527 | ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \ | 516 | ASM_OUTPUT2("=a" (__ret), "+m" (pcp1), \ |
528 | "S" (&pcp1), "b"(__n1), "c"(__n2), \ | 517 | "+m" (pcp2), "+d" (__o2)), \ |
529 | "a"(__o1), "d"(__o2) : "memory"); \ | 518 | "b" (__n1), "c" (__n2), "a" (__o1) : "rsi"); \ |
530 | __ret; \ | 519 | __ret; \ |
531 | }) | 520 | }) |
532 | 521 | ||
533 | #define __this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) | 522 | #define __this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double |
534 | #define this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) | 523 | #define this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double |
535 | #define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) | 524 | #define irqsafe_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double |
536 | 525 | ||
537 | #endif | 526 | #endif |
538 | 527 | ||
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h index 2dddb317bb39..f8ab3eaad128 100644 --- a/arch/x86/include/asm/processor-flags.h +++ b/arch/x86/include/asm/processor-flags.h | |||
@@ -6,6 +6,7 @@ | |||
6 | * EFLAGS bits | 6 | * EFLAGS bits |
7 | */ | 7 | */ |
8 | #define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ | 8 | #define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ |
9 | #define X86_EFLAGS_BIT1 0x00000002 /* Bit 1 - always on */ | ||
9 | #define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ | 10 | #define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ |
10 | #define X86_EFLAGS_AF 0x00000010 /* Auxiliary carry Flag */ | 11 | #define X86_EFLAGS_AF 0x00000010 /* Auxiliary carry Flag */ |
11 | #define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ | 12 | #define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ |
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 972c260919a3..a82c2bf504b6 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h | |||
@@ -79,23 +79,10 @@ static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) | |||
79 | return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail; | 79 | return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail; |
80 | } | 80 | } |
81 | 81 | ||
82 | #if (NR_CPUS < 256) | ||
83 | static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) | 82 | static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) |
84 | { | 83 | { |
85 | asm volatile(UNLOCK_LOCK_PREFIX "incb %0" | 84 | __add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX); |
86 | : "+m" (lock->head_tail) | ||
87 | : | ||
88 | : "memory", "cc"); | ||
89 | } | 85 | } |
90 | #else | ||
91 | static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) | ||
92 | { | ||
93 | asm volatile(UNLOCK_LOCK_PREFIX "incw %0" | ||
94 | : "+m" (lock->head_tail) | ||
95 | : | ||
96 | : "memory", "cc"); | ||
97 | } | ||
98 | #endif | ||
99 | 86 | ||
100 | static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) | 87 | static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) |
101 | { | 88 | { |
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index a1fe5c127b52..185b719ec61a 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -40,7 +40,8 @@ struct thread_info { | |||
40 | */ | 40 | */ |
41 | __u8 supervisor_stack[0]; | 41 | __u8 supervisor_stack[0]; |
42 | #endif | 42 | #endif |
43 | int uaccess_err; | 43 | int sig_on_uaccess_error:1; |
44 | int uaccess_err:1; /* uaccess failed */ | ||
44 | }; | 45 | }; |
45 | 46 | ||
46 | #define INIT_THREAD_INFO(tsk) \ | 47 | #define INIT_THREAD_INFO(tsk) \ |
@@ -231,6 +232,12 @@ static inline struct thread_info *current_thread_info(void) | |||
231 | movq PER_CPU_VAR(kernel_stack),reg ; \ | 232 | movq PER_CPU_VAR(kernel_stack),reg ; \ |
232 | subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg | 233 | subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg |
233 | 234 | ||
235 | /* | ||
236 | * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in | ||
237 | * a certain register (to be used in assembler memory operands). | ||
238 | */ | ||
239 | #define THREAD_INFO(reg, off) KERNEL_STACK_OFFSET+(off)-THREAD_SIZE(reg) | ||
240 | |||
234 | #endif | 241 | #endif |
235 | 242 | ||
236 | #endif /* !X86_32 */ | 243 | #endif /* !X86_32 */ |
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index c00692476e9f..800f77c60051 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h | |||
@@ -130,10 +130,8 @@ extern void setup_node_to_cpumask_map(void); | |||
130 | .balance_interval = 1, \ | 130 | .balance_interval = 1, \ |
131 | } | 131 | } |
132 | 132 | ||
133 | #ifdef CONFIG_X86_64 | ||
134 | extern int __node_distance(int, int); | 133 | extern int __node_distance(int, int); |
135 | #define node_distance(a, b) __node_distance(a, b) | 134 | #define node_distance(a, b) __node_distance(a, b) |
136 | #endif | ||
137 | 135 | ||
138 | #else /* !CONFIG_NUMA */ | 136 | #else /* !CONFIG_NUMA */ |
139 | 137 | ||
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 36361bf6fdd1..8be5f54d9360 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h | |||
@@ -462,7 +462,7 @@ struct __large_struct { unsigned long buf[100]; }; | |||
462 | barrier(); | 462 | barrier(); |
463 | 463 | ||
464 | #define uaccess_catch(err) \ | 464 | #define uaccess_catch(err) \ |
465 | (err) |= current_thread_info()->uaccess_err; \ | 465 | (err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0); \ |
466 | current_thread_info()->uaccess_err = prev_err; \ | 466 | current_thread_info()->uaccess_err = prev_err; \ |
467 | } while (0) | 467 | } while (0) |
468 | 468 | ||