diff options
26 files changed, 345 insertions, 306 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 0293fc8daca3..e229769606f2 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -2755,11 +2755,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
2755 | functions are at fixed addresses, they make nice | 2755 | functions are at fixed addresses, they make nice |
2756 | targets for exploits that can control RIP. | 2756 | targets for exploits that can control RIP. |
2757 | 2757 | ||
2758 | emulate Vsyscalls turn into traps and are emulated | 2758 | emulate [default] Vsyscalls turn into traps and are |
2759 | reasonably safely. | 2759 | emulated reasonably safely. |
2760 | 2760 | ||
2761 | native [default] Vsyscalls are native syscall | 2761 | native Vsyscalls are native syscall instructions. |
2762 | instructions. | ||
2763 | This is a little bit faster than trapping | 2762 | This is a little bit faster than trapping |
2764 | and makes a few dynamic recompilers work | 2763 | and makes a few dynamic recompilers work |
2765 | better than they would in emulation mode. | 2764 | better than they would in emulation mode. |
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index a6253ec1b284..3e274564f6bf 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -134,7 +134,7 @@ ENTRY(ia32_sysenter_target) | |||
134 | CFI_REL_OFFSET rsp,0 | 134 | CFI_REL_OFFSET rsp,0 |
135 | pushfq_cfi | 135 | pushfq_cfi |
136 | /*CFI_REL_OFFSET rflags,0*/ | 136 | /*CFI_REL_OFFSET rflags,0*/ |
137 | movl 8*3-THREAD_SIZE+TI_sysenter_return(%rsp), %r10d | 137 | movl TI_sysenter_return+THREAD_INFO(%rsp,3*8-KERNEL_STACK_OFFSET),%r10d |
138 | CFI_REGISTER rip,r10 | 138 | CFI_REGISTER rip,r10 |
139 | pushq_cfi $__USER32_CS | 139 | pushq_cfi $__USER32_CS |
140 | /*CFI_REL_OFFSET cs,0*/ | 140 | /*CFI_REL_OFFSET cs,0*/ |
@@ -150,9 +150,8 @@ ENTRY(ia32_sysenter_target) | |||
150 | .section __ex_table,"a" | 150 | .section __ex_table,"a" |
151 | .quad 1b,ia32_badarg | 151 | .quad 1b,ia32_badarg |
152 | .previous | 152 | .previous |
153 | GET_THREAD_INFO(%r10) | 153 | orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
154 | orl $TS_COMPAT,TI_status(%r10) | 154 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
155 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) | ||
156 | CFI_REMEMBER_STATE | 155 | CFI_REMEMBER_STATE |
157 | jnz sysenter_tracesys | 156 | jnz sysenter_tracesys |
158 | cmpq $(IA32_NR_syscalls-1),%rax | 157 | cmpq $(IA32_NR_syscalls-1),%rax |
@@ -162,13 +161,12 @@ sysenter_do_call: | |||
162 | sysenter_dispatch: | 161 | sysenter_dispatch: |
163 | call *ia32_sys_call_table(,%rax,8) | 162 | call *ia32_sys_call_table(,%rax,8) |
164 | movq %rax,RAX-ARGOFFSET(%rsp) | 163 | movq %rax,RAX-ARGOFFSET(%rsp) |
165 | GET_THREAD_INFO(%r10) | ||
166 | DISABLE_INTERRUPTS(CLBR_NONE) | 164 | DISABLE_INTERRUPTS(CLBR_NONE) |
167 | TRACE_IRQS_OFF | 165 | TRACE_IRQS_OFF |
168 | testl $_TIF_ALLWORK_MASK,TI_flags(%r10) | 166 | testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
169 | jnz sysexit_audit | 167 | jnz sysexit_audit |
170 | sysexit_from_sys_call: | 168 | sysexit_from_sys_call: |
171 | andl $~TS_COMPAT,TI_status(%r10) | 169 | andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
172 | /* clear IF, that popfq doesn't enable interrupts early */ | 170 | /* clear IF, that popfq doesn't enable interrupts early */ |
173 | andl $~0x200,EFLAGS-R11(%rsp) | 171 | andl $~0x200,EFLAGS-R11(%rsp) |
174 | movl RIP-R11(%rsp),%edx /* User %eip */ | 172 | movl RIP-R11(%rsp),%edx /* User %eip */ |
@@ -205,7 +203,7 @@ sysexit_from_sys_call: | |||
205 | .endm | 203 | .endm |
206 | 204 | ||
207 | .macro auditsys_exit exit | 205 | .macro auditsys_exit exit |
208 | testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) | 206 | testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
209 | jnz ia32_ret_from_sys_call | 207 | jnz ia32_ret_from_sys_call |
210 | TRACE_IRQS_ON | 208 | TRACE_IRQS_ON |
211 | sti | 209 | sti |
@@ -215,12 +213,11 @@ sysexit_from_sys_call: | |||
215 | movzbl %al,%edi /* zero-extend that into %edi */ | 213 | movzbl %al,%edi /* zero-extend that into %edi */ |
216 | inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ | 214 | inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ |
217 | call audit_syscall_exit | 215 | call audit_syscall_exit |
218 | GET_THREAD_INFO(%r10) | ||
219 | movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */ | 216 | movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */ |
220 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi | 217 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi |
221 | cli | 218 | cli |
222 | TRACE_IRQS_OFF | 219 | TRACE_IRQS_OFF |
223 | testl %edi,TI_flags(%r10) | 220 | testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
224 | jz \exit | 221 | jz \exit |
225 | CLEAR_RREGS -ARGOFFSET | 222 | CLEAR_RREGS -ARGOFFSET |
226 | jmp int_with_check | 223 | jmp int_with_check |
@@ -238,7 +235,7 @@ sysexit_audit: | |||
238 | 235 | ||
239 | sysenter_tracesys: | 236 | sysenter_tracesys: |
240 | #ifdef CONFIG_AUDITSYSCALL | 237 | #ifdef CONFIG_AUDITSYSCALL |
241 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) | 238 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
242 | jz sysenter_auditsys | 239 | jz sysenter_auditsys |
243 | #endif | 240 | #endif |
244 | SAVE_REST | 241 | SAVE_REST |
@@ -309,9 +306,8 @@ ENTRY(ia32_cstar_target) | |||
309 | .section __ex_table,"a" | 306 | .section __ex_table,"a" |
310 | .quad 1b,ia32_badarg | 307 | .quad 1b,ia32_badarg |
311 | .previous | 308 | .previous |
312 | GET_THREAD_INFO(%r10) | 309 | orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
313 | orl $TS_COMPAT,TI_status(%r10) | 310 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
314 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) | ||
315 | CFI_REMEMBER_STATE | 311 | CFI_REMEMBER_STATE |
316 | jnz cstar_tracesys | 312 | jnz cstar_tracesys |
317 | cmpq $IA32_NR_syscalls-1,%rax | 313 | cmpq $IA32_NR_syscalls-1,%rax |
@@ -321,13 +317,12 @@ cstar_do_call: | |||
321 | cstar_dispatch: | 317 | cstar_dispatch: |
322 | call *ia32_sys_call_table(,%rax,8) | 318 | call *ia32_sys_call_table(,%rax,8) |
323 | movq %rax,RAX-ARGOFFSET(%rsp) | 319 | movq %rax,RAX-ARGOFFSET(%rsp) |
324 | GET_THREAD_INFO(%r10) | ||
325 | DISABLE_INTERRUPTS(CLBR_NONE) | 320 | DISABLE_INTERRUPTS(CLBR_NONE) |
326 | TRACE_IRQS_OFF | 321 | TRACE_IRQS_OFF |
327 | testl $_TIF_ALLWORK_MASK,TI_flags(%r10) | 322 | testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
328 | jnz sysretl_audit | 323 | jnz sysretl_audit |
329 | sysretl_from_sys_call: | 324 | sysretl_from_sys_call: |
330 | andl $~TS_COMPAT,TI_status(%r10) | 325 | andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
331 | RESTORE_ARGS 0,-ARG_SKIP,0,0,0 | 326 | RESTORE_ARGS 0,-ARG_SKIP,0,0,0 |
332 | movl RIP-ARGOFFSET(%rsp),%ecx | 327 | movl RIP-ARGOFFSET(%rsp),%ecx |
333 | CFI_REGISTER rip,rcx | 328 | CFI_REGISTER rip,rcx |
@@ -355,7 +350,7 @@ sysretl_audit: | |||
355 | 350 | ||
356 | cstar_tracesys: | 351 | cstar_tracesys: |
357 | #ifdef CONFIG_AUDITSYSCALL | 352 | #ifdef CONFIG_AUDITSYSCALL |
358 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) | 353 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
359 | jz cstar_auditsys | 354 | jz cstar_auditsys |
360 | #endif | 355 | #endif |
361 | xchgl %r9d,%ebp | 356 | xchgl %r9d,%ebp |
@@ -420,9 +415,8 @@ ENTRY(ia32_syscall) | |||
420 | /* note the registers are not zero extended to the sf. | 415 | /* note the registers are not zero extended to the sf. |
421 | this could be a problem. */ | 416 | this could be a problem. */ |
422 | SAVE_ARGS 0,1,0 | 417 | SAVE_ARGS 0,1,0 |
423 | GET_THREAD_INFO(%r10) | 418 | orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
424 | orl $TS_COMPAT,TI_status(%r10) | 419 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
425 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) | ||
426 | jnz ia32_tracesys | 420 | jnz ia32_tracesys |
427 | cmpq $(IA32_NR_syscalls-1),%rax | 421 | cmpq $(IA32_NR_syscalls-1),%rax |
428 | ja ia32_badsys | 422 | ja ia32_badsys |
@@ -459,8 +453,8 @@ quiet_ni_syscall: | |||
459 | CFI_ENDPROC | 453 | CFI_ENDPROC |
460 | 454 | ||
461 | .macro PTREGSCALL label, func, arg | 455 | .macro PTREGSCALL label, func, arg |
462 | .globl \label | 456 | ALIGN |
463 | \label: | 457 | GLOBAL(\label) |
464 | leaq \func(%rip),%rax | 458 | leaq \func(%rip),%rax |
465 | leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ | 459 | leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ |
466 | jmp ia32_ptregs_common | 460 | jmp ia32_ptregs_common |
@@ -477,7 +471,8 @@ quiet_ni_syscall: | |||
477 | PTREGSCALL stub32_vfork, sys_vfork, %rdi | 471 | PTREGSCALL stub32_vfork, sys_vfork, %rdi |
478 | PTREGSCALL stub32_iopl, sys_iopl, %rsi | 472 | PTREGSCALL stub32_iopl, sys_iopl, %rsi |
479 | 473 | ||
480 | ENTRY(ia32_ptregs_common) | 474 | ALIGN |
475 | ia32_ptregs_common: | ||
481 | popq %r11 | 476 | popq %r11 |
482 | CFI_ENDPROC | 477 | CFI_ENDPROC |
483 | CFI_STARTPROC32 simple | 478 | CFI_STARTPROC32 simple |
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index 091508b533b4..952bd0100c5c 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h | |||
@@ -4,10 +4,10 @@ | |||
4 | 4 | ||
5 | #ifdef CONFIG_SMP | 5 | #ifdef CONFIG_SMP |
6 | .macro LOCK_PREFIX | 6 | .macro LOCK_PREFIX |
7 | 1: lock | 7 | 672: lock |
8 | .section .smp_locks,"a" | 8 | .section .smp_locks,"a" |
9 | .balign 4 | 9 | .balign 4 |
10 | .long 1b - . | 10 | .long 672b - . |
11 | .previous | 11 | .previous |
12 | .endm | 12 | .endm |
13 | #else | 13 | #else |
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 1775d6e5920e..b97596e2b68c 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h | |||
@@ -380,6 +380,8 @@ static inline unsigned long __fls(unsigned long word) | |||
380 | return word; | 380 | return word; |
381 | } | 381 | } |
382 | 382 | ||
383 | #undef ADDR | ||
384 | |||
383 | #ifdef __KERNEL__ | 385 | #ifdef __KERNEL__ |
384 | /** | 386 | /** |
385 | * ffs - find first set bit in word | 387 | * ffs - find first set bit in word |
@@ -395,10 +397,25 @@ static inline unsigned long __fls(unsigned long word) | |||
395 | static inline int ffs(int x) | 397 | static inline int ffs(int x) |
396 | { | 398 | { |
397 | int r; | 399 | int r; |
398 | #ifdef CONFIG_X86_CMOV | 400 | |
401 | #ifdef CONFIG_X86_64 | ||
402 | /* | ||
403 | * AMD64 says BSFL won't clobber the dest reg if x==0; Intel64 says the | ||
404 | * dest reg is undefined if x==0, but their CPU architect says its | ||
405 | * value is written to set it to the same as before, except that the | ||
406 | * top 32 bits will be cleared. | ||
407 | * | ||
408 | * We cannot do this on 32 bits because at the very least some | ||
409 | * 486 CPUs did not behave this way. | ||
410 | */ | ||
411 | long tmp = -1; | ||
412 | asm("bsfl %1,%0" | ||
413 | : "=r" (r) | ||
414 | : "rm" (x), "0" (tmp)); | ||
415 | #elif defined(CONFIG_X86_CMOV) | ||
399 | asm("bsfl %1,%0\n\t" | 416 | asm("bsfl %1,%0\n\t" |
400 | "cmovzl %2,%0" | 417 | "cmovzl %2,%0" |
401 | : "=r" (r) : "rm" (x), "r" (-1)); | 418 | : "=&r" (r) : "rm" (x), "r" (-1)); |
402 | #else | 419 | #else |
403 | asm("bsfl %1,%0\n\t" | 420 | asm("bsfl %1,%0\n\t" |
404 | "jnz 1f\n\t" | 421 | "jnz 1f\n\t" |
@@ -422,7 +439,22 @@ static inline int ffs(int x) | |||
422 | static inline int fls(int x) | 439 | static inline int fls(int x) |
423 | { | 440 | { |
424 | int r; | 441 | int r; |
425 | #ifdef CONFIG_X86_CMOV | 442 | |
443 | #ifdef CONFIG_X86_64 | ||
444 | /* | ||
445 | * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the | ||
446 | * dest reg is undefined if x==0, but their CPU architect says its | ||
447 | * value is written to set it to the same as before, except that the | ||
448 | * top 32 bits will be cleared. | ||
449 | * | ||
450 | * We cannot do this on 32 bits because at the very least some | ||
451 | * 486 CPUs did not behave this way. | ||
452 | */ | ||
453 | long tmp = -1; | ||
454 | asm("bsrl %1,%0" | ||
455 | : "=r" (r) | ||
456 | : "rm" (x), "0" (tmp)); | ||
457 | #elif defined(CONFIG_X86_CMOV) | ||
426 | asm("bsrl %1,%0\n\t" | 458 | asm("bsrl %1,%0\n\t" |
427 | "cmovzl %2,%0" | 459 | "cmovzl %2,%0" |
428 | : "=&r" (r) : "rm" (x), "rm" (-1)); | 460 | : "=&r" (r) : "rm" (x), "rm" (-1)); |
@@ -434,11 +466,35 @@ static inline int fls(int x) | |||
434 | #endif | 466 | #endif |
435 | return r + 1; | 467 | return r + 1; |
436 | } | 468 | } |
437 | #endif /* __KERNEL__ */ | ||
438 | |||
439 | #undef ADDR | ||
440 | 469 | ||
441 | #ifdef __KERNEL__ | 470 | /** |
471 | * fls64 - find last set bit in a 64-bit word | ||
472 | * @x: the word to search | ||
473 | * | ||
474 | * This is defined in a similar way as the libc and compiler builtin | ||
475 | * ffsll, but returns the position of the most significant set bit. | ||
476 | * | ||
477 | * fls64(value) returns 0 if value is 0 or the position of the last | ||
478 | * set bit if value is nonzero. The last (most significant) bit is | ||
479 | * at position 64. | ||
480 | */ | ||
481 | #ifdef CONFIG_X86_64 | ||
482 | static __always_inline int fls64(__u64 x) | ||
483 | { | ||
484 | long bitpos = -1; | ||
485 | /* | ||
486 | * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the | ||
487 | * dest reg is undefined if x==0, but their CPU architect says its | ||
488 | * value is written to set it to the same as before. | ||
489 | */ | ||
490 | asm("bsrq %1,%0" | ||
491 | : "+r" (bitpos) | ||
492 | : "rm" (x)); | ||
493 | return bitpos + 1; | ||
494 | } | ||
495 | #else | ||
496 | #include <asm-generic/bitops/fls64.h> | ||
497 | #endif | ||
442 | 498 | ||
443 | #include <asm-generic/bitops/find.h> | 499 | #include <asm-generic/bitops/find.h> |
444 | 500 | ||
@@ -450,12 +506,6 @@ static inline int fls(int x) | |||
450 | 506 | ||
451 | #include <asm-generic/bitops/const_hweight.h> | 507 | #include <asm-generic/bitops/const_hweight.h> |
452 | 508 | ||
453 | #endif /* __KERNEL__ */ | ||
454 | |||
455 | #include <asm-generic/bitops/fls64.h> | ||
456 | |||
457 | #ifdef __KERNEL__ | ||
458 | |||
459 | #include <asm-generic/bitops/le.h> | 509 | #include <asm-generic/bitops/le.h> |
460 | 510 | ||
461 | #include <asm-generic/bitops/ext2-atomic-setbit.h> | 511 | #include <asm-generic/bitops/ext2-atomic-setbit.h> |
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index 5d3acdf5a7a6..0c9fa2745f13 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h | |||
@@ -14,6 +14,8 @@ extern void __cmpxchg_wrong_size(void) | |||
14 | __compiletime_error("Bad argument size for cmpxchg"); | 14 | __compiletime_error("Bad argument size for cmpxchg"); |
15 | extern void __xadd_wrong_size(void) | 15 | extern void __xadd_wrong_size(void) |
16 | __compiletime_error("Bad argument size for xadd"); | 16 | __compiletime_error("Bad argument size for xadd"); |
17 | extern void __add_wrong_size(void) | ||
18 | __compiletime_error("Bad argument size for add"); | ||
17 | 19 | ||
18 | /* | 20 | /* |
19 | * Constants for operation sizes. On 32-bit, the 64-bit size it set to | 21 | * Constants for operation sizes. On 32-bit, the 64-bit size it set to |
@@ -31,60 +33,47 @@ extern void __xadd_wrong_size(void) | |||
31 | #define __X86_CASE_Q -1 /* sizeof will never return -1 */ | 33 | #define __X86_CASE_Q -1 /* sizeof will never return -1 */ |
32 | #endif | 34 | #endif |
33 | 35 | ||
36 | /* | ||
37 | * An exchange-type operation, which takes a value and a pointer, and | ||
38 | * returns a the old value. | ||
39 | */ | ||
40 | #define __xchg_op(ptr, arg, op, lock) \ | ||
41 | ({ \ | ||
42 | __typeof__ (*(ptr)) __ret = (arg); \ | ||
43 | switch (sizeof(*(ptr))) { \ | ||
44 | case __X86_CASE_B: \ | ||
45 | asm volatile (lock #op "b %b0, %1\n" \ | ||
46 | : "+r" (__ret), "+m" (*(ptr)) \ | ||
47 | : : "memory", "cc"); \ | ||
48 | break; \ | ||
49 | case __X86_CASE_W: \ | ||
50 | asm volatile (lock #op "w %w0, %1\n" \ | ||
51 | : "+r" (__ret), "+m" (*(ptr)) \ | ||
52 | : : "memory", "cc"); \ | ||
53 | break; \ | ||
54 | case __X86_CASE_L: \ | ||
55 | asm volatile (lock #op "l %0, %1\n" \ | ||
56 | : "+r" (__ret), "+m" (*(ptr)) \ | ||
57 | : : "memory", "cc"); \ | ||
58 | break; \ | ||
59 | case __X86_CASE_Q: \ | ||
60 | asm volatile (lock #op "q %q0, %1\n" \ | ||
61 | : "+r" (__ret), "+m" (*(ptr)) \ | ||
62 | : : "memory", "cc"); \ | ||
63 | break; \ | ||
64 | default: \ | ||
65 | __ ## op ## _wrong_size(); \ | ||
66 | } \ | ||
67 | __ret; \ | ||
68 | }) | ||
69 | |||
34 | /* | 70 | /* |
35 | * Note: no "lock" prefix even on SMP: xchg always implies lock anyway. | 71 | * Note: no "lock" prefix even on SMP: xchg always implies lock anyway. |
36 | * Since this is generally used to protect other memory information, we | 72 | * Since this is generally used to protect other memory information, we |
37 | * use "asm volatile" and "memory" clobbers to prevent gcc from moving | 73 | * use "asm volatile" and "memory" clobbers to prevent gcc from moving |
38 | * information around. | 74 | * information around. |
39 | */ | 75 | */ |
40 | #define __xchg(x, ptr, size) \ | 76 | #define xchg(ptr, v) __xchg_op((ptr), (v), xchg, "") |
41 | ({ \ | ||
42 | __typeof(*(ptr)) __x = (x); \ | ||
43 | switch (size) { \ | ||
44 | case __X86_CASE_B: \ | ||
45 | { \ | ||
46 | volatile u8 *__ptr = (volatile u8 *)(ptr); \ | ||
47 | asm volatile("xchgb %0,%1" \ | ||
48 | : "=q" (__x), "+m" (*__ptr) \ | ||
49 | : "0" (__x) \ | ||
50 | : "memory"); \ | ||
51 | break; \ | ||
52 | } \ | ||
53 | case __X86_CASE_W: \ | ||
54 | { \ | ||
55 | volatile u16 *__ptr = (volatile u16 *)(ptr); \ | ||
56 | asm volatile("xchgw %0,%1" \ | ||
57 | : "=r" (__x), "+m" (*__ptr) \ | ||
58 | : "0" (__x) \ | ||
59 | : "memory"); \ | ||
60 | break; \ | ||
61 | } \ | ||
62 | case __X86_CASE_L: \ | ||
63 | { \ | ||
64 | volatile u32 *__ptr = (volatile u32 *)(ptr); \ | ||
65 | asm volatile("xchgl %0,%1" \ | ||
66 | : "=r" (__x), "+m" (*__ptr) \ | ||
67 | : "0" (__x) \ | ||
68 | : "memory"); \ | ||
69 | break; \ | ||
70 | } \ | ||
71 | case __X86_CASE_Q: \ | ||
72 | { \ | ||
73 | volatile u64 *__ptr = (volatile u64 *)(ptr); \ | ||
74 | asm volatile("xchgq %0,%1" \ | ||
75 | : "=r" (__x), "+m" (*__ptr) \ | ||
76 | : "0" (__x) \ | ||
77 | : "memory"); \ | ||
78 | break; \ | ||
79 | } \ | ||
80 | default: \ | ||
81 | __xchg_wrong_size(); \ | ||
82 | } \ | ||
83 | __x; \ | ||
84 | }) | ||
85 | |||
86 | #define xchg(ptr, v) \ | ||
87 | __xchg((v), (ptr), sizeof(*ptr)) | ||
88 | 77 | ||
89 | /* | 78 | /* |
90 | * Atomic compare and exchange. Compare OLD with MEM, if identical, | 79 | * Atomic compare and exchange. Compare OLD with MEM, if identical, |
@@ -165,46 +154,80 @@ extern void __xadd_wrong_size(void) | |||
165 | __cmpxchg_local((ptr), (old), (new), sizeof(*ptr)) | 154 | __cmpxchg_local((ptr), (old), (new), sizeof(*ptr)) |
166 | #endif | 155 | #endif |
167 | 156 | ||
168 | #define __xadd(ptr, inc, lock) \ | 157 | /* |
158 | * xadd() adds "inc" to "*ptr" and atomically returns the previous | ||
159 | * value of "*ptr". | ||
160 | * | ||
161 | * xadd() is locked when multiple CPUs are online | ||
162 | * xadd_sync() is always locked | ||
163 | * xadd_local() is never locked | ||
164 | */ | ||
165 | #define __xadd(ptr, inc, lock) __xchg_op((ptr), (inc), xadd, lock) | ||
166 | #define xadd(ptr, inc) __xadd((ptr), (inc), LOCK_PREFIX) | ||
167 | #define xadd_sync(ptr, inc) __xadd((ptr), (inc), "lock; ") | ||
168 | #define xadd_local(ptr, inc) __xadd((ptr), (inc), "") | ||
169 | |||
170 | #define __add(ptr, inc, lock) \ | ||
169 | ({ \ | 171 | ({ \ |
170 | __typeof__ (*(ptr)) __ret = (inc); \ | 172 | __typeof__ (*(ptr)) __ret = (inc); \ |
171 | switch (sizeof(*(ptr))) { \ | 173 | switch (sizeof(*(ptr))) { \ |
172 | case __X86_CASE_B: \ | 174 | case __X86_CASE_B: \ |
173 | asm volatile (lock "xaddb %b0, %1\n" \ | 175 | asm volatile (lock "addb %b1, %0\n" \ |
174 | : "+r" (__ret), "+m" (*(ptr)) \ | 176 | : "+m" (*(ptr)) : "ri" (inc) \ |
175 | : : "memory", "cc"); \ | 177 | : "memory", "cc"); \ |
176 | break; \ | 178 | break; \ |
177 | case __X86_CASE_W: \ | 179 | case __X86_CASE_W: \ |
178 | asm volatile (lock "xaddw %w0, %1\n" \ | 180 | asm volatile (lock "addw %w1, %0\n" \ |
179 | : "+r" (__ret), "+m" (*(ptr)) \ | 181 | : "+m" (*(ptr)) : "ri" (inc) \ |
180 | : : "memory", "cc"); \ | 182 | : "memory", "cc"); \ |
181 | break; \ | 183 | break; \ |
182 | case __X86_CASE_L: \ | 184 | case __X86_CASE_L: \ |
183 | asm volatile (lock "xaddl %0, %1\n" \ | 185 | asm volatile (lock "addl %1, %0\n" \ |
184 | : "+r" (__ret), "+m" (*(ptr)) \ | 186 | : "+m" (*(ptr)) : "ri" (inc) \ |
185 | : : "memory", "cc"); \ | 187 | : "memory", "cc"); \ |
186 | break; \ | 188 | break; \ |
187 | case __X86_CASE_Q: \ | 189 | case __X86_CASE_Q: \ |
188 | asm volatile (lock "xaddq %q0, %1\n" \ | 190 | asm volatile (lock "addq %1, %0\n" \ |
189 | : "+r" (__ret), "+m" (*(ptr)) \ | 191 | : "+m" (*(ptr)) : "ri" (inc) \ |
190 | : : "memory", "cc"); \ | 192 | : "memory", "cc"); \ |
191 | break; \ | 193 | break; \ |
192 | default: \ | 194 | default: \ |
193 | __xadd_wrong_size(); \ | 195 | __add_wrong_size(); \ |
194 | } \ | 196 | } \ |
195 | __ret; \ | 197 | __ret; \ |
196 | }) | 198 | }) |
197 | 199 | ||
198 | /* | 200 | /* |
199 | * xadd() adds "inc" to "*ptr" and atomically returns the previous | 201 | * add_*() adds "inc" to "*ptr" |
200 | * value of "*ptr". | ||
201 | * | 202 | * |
202 | * xadd() is locked when multiple CPUs are online | 203 | * __add() takes a lock prefix |
203 | * xadd_sync() is always locked | 204 | * add_smp() is locked when multiple CPUs are online |
204 | * xadd_local() is never locked | 205 | * add_sync() is always locked |
205 | */ | 206 | */ |
206 | #define xadd(ptr, inc) __xadd((ptr), (inc), LOCK_PREFIX) | 207 | #define add_smp(ptr, inc) __add((ptr), (inc), LOCK_PREFIX) |
207 | #define xadd_sync(ptr, inc) __xadd((ptr), (inc), "lock; ") | 208 | #define add_sync(ptr, inc) __add((ptr), (inc), "lock; ") |
208 | #define xadd_local(ptr, inc) __xadd((ptr), (inc), "") | 209 | |
210 | #define __cmpxchg_double(pfx, p1, p2, o1, o2, n1, n2) \ | ||
211 | ({ \ | ||
212 | bool __ret; \ | ||
213 | __typeof__(*(p1)) __old1 = (o1), __new1 = (n1); \ | ||
214 | __typeof__(*(p2)) __old2 = (o2), __new2 = (n2); \ | ||
215 | BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \ | ||
216 | BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \ | ||
217 | VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long))); \ | ||
218 | VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2)); \ | ||
219 | asm volatile(pfx "cmpxchg%c4b %2; sete %0" \ | ||
220 | : "=a" (__ret), "+d" (__old2), \ | ||
221 | "+m" (*(p1)), "+m" (*(p2)) \ | ||
222 | : "i" (2 * sizeof(long)), "a" (__old1), \ | ||
223 | "b" (__new1), "c" (__new2)); \ | ||
224 | __ret; \ | ||
225 | }) | ||
226 | |||
227 | #define cmpxchg_double(p1, p2, o1, o2, n1, n2) \ | ||
228 | __cmpxchg_double(LOCK_PREFIX, p1, p2, o1, o2, n1, n2) | ||
229 | |||
230 | #define cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \ | ||
231 | __cmpxchg_double(, p1, p2, o1, o2, n1, n2) | ||
209 | 232 | ||
210 | #endif /* ASM_X86_CMPXCHG_H */ | 233 | #endif /* ASM_X86_CMPXCHG_H */ |
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index fbebb07dd80b..53f4b219336b 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h | |||
@@ -166,52 +166,6 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old, | |||
166 | 166 | ||
167 | #endif | 167 | #endif |
168 | 168 | ||
169 | #define cmpxchg8b(ptr, o1, o2, n1, n2) \ | ||
170 | ({ \ | ||
171 | char __ret; \ | ||
172 | __typeof__(o2) __dummy; \ | ||
173 | __typeof__(*(ptr)) __old1 = (o1); \ | ||
174 | __typeof__(o2) __old2 = (o2); \ | ||
175 | __typeof__(*(ptr)) __new1 = (n1); \ | ||
176 | __typeof__(o2) __new2 = (n2); \ | ||
177 | asm volatile(LOCK_PREFIX "cmpxchg8b %2; setz %1" \ | ||
178 | : "=d"(__dummy), "=a" (__ret), "+m" (*ptr)\ | ||
179 | : "a" (__old1), "d"(__old2), \ | ||
180 | "b" (__new1), "c" (__new2) \ | ||
181 | : "memory"); \ | ||
182 | __ret; }) | ||
183 | |||
184 | |||
185 | #define cmpxchg8b_local(ptr, o1, o2, n1, n2) \ | ||
186 | ({ \ | ||
187 | char __ret; \ | ||
188 | __typeof__(o2) __dummy; \ | ||
189 | __typeof__(*(ptr)) __old1 = (o1); \ | ||
190 | __typeof__(o2) __old2 = (o2); \ | ||
191 | __typeof__(*(ptr)) __new1 = (n1); \ | ||
192 | __typeof__(o2) __new2 = (n2); \ | ||
193 | asm volatile("cmpxchg8b %2; setz %1" \ | ||
194 | : "=d"(__dummy), "=a"(__ret), "+m" (*ptr)\ | ||
195 | : "a" (__old), "d"(__old2), \ | ||
196 | "b" (__new1), "c" (__new2), \ | ||
197 | : "memory"); \ | ||
198 | __ret; }) | ||
199 | |||
200 | |||
201 | #define cmpxchg_double(ptr, o1, o2, n1, n2) \ | ||
202 | ({ \ | ||
203 | BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ | ||
204 | VM_BUG_ON((unsigned long)(ptr) % 8); \ | ||
205 | cmpxchg8b((ptr), (o1), (o2), (n1), (n2)); \ | ||
206 | }) | ||
207 | |||
208 | #define cmpxchg_double_local(ptr, o1, o2, n1, n2) \ | ||
209 | ({ \ | ||
210 | BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ | ||
211 | VM_BUG_ON((unsigned long)(ptr) % 8); \ | ||
212 | cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2)); \ | ||
213 | }) | ||
214 | |||
215 | #define system_has_cmpxchg_double() cpu_has_cx8 | 169 | #define system_has_cmpxchg_double() cpu_has_cx8 |
216 | 170 | ||
217 | #endif /* _ASM_X86_CMPXCHG_32_H */ | 171 | #endif /* _ASM_X86_CMPXCHG_32_H */ |
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 285da02c38fa..614be87f1a9b 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h | |||
@@ -20,49 +20,6 @@ static inline void set_64bit(volatile u64 *ptr, u64 val) | |||
20 | cmpxchg_local((ptr), (o), (n)); \ | 20 | cmpxchg_local((ptr), (o), (n)); \ |
21 | }) | 21 | }) |
22 | 22 | ||
23 | #define cmpxchg16b(ptr, o1, o2, n1, n2) \ | ||
24 | ({ \ | ||
25 | char __ret; \ | ||
26 | __typeof__(o2) __junk; \ | ||
27 | __typeof__(*(ptr)) __old1 = (o1); \ | ||
28 | __typeof__(o2) __old2 = (o2); \ | ||
29 | __typeof__(*(ptr)) __new1 = (n1); \ | ||
30 | __typeof__(o2) __new2 = (n2); \ | ||
31 | asm volatile(LOCK_PREFIX "cmpxchg16b %2;setz %1" \ | ||
32 | : "=d"(__junk), "=a"(__ret), "+m" (*ptr) \ | ||
33 | : "b"(__new1), "c"(__new2), \ | ||
34 | "a"(__old1), "d"(__old2)); \ | ||
35 | __ret; }) | ||
36 | |||
37 | |||
38 | #define cmpxchg16b_local(ptr, o1, o2, n1, n2) \ | ||
39 | ({ \ | ||
40 | char __ret; \ | ||
41 | __typeof__(o2) __junk; \ | ||
42 | __typeof__(*(ptr)) __old1 = (o1); \ | ||
43 | __typeof__(o2) __old2 = (o2); \ | ||
44 | __typeof__(*(ptr)) __new1 = (n1); \ | ||
45 | __typeof__(o2) __new2 = (n2); \ | ||
46 | asm volatile("cmpxchg16b %2;setz %1" \ | ||
47 | : "=d"(__junk), "=a"(__ret), "+m" (*ptr) \ | ||
48 | : "b"(__new1), "c"(__new2), \ | ||
49 | "a"(__old1), "d"(__old2)); \ | ||
50 | __ret; }) | ||
51 | |||
52 | #define cmpxchg_double(ptr, o1, o2, n1, n2) \ | ||
53 | ({ \ | ||
54 | BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ | ||
55 | VM_BUG_ON((unsigned long)(ptr) % 16); \ | ||
56 | cmpxchg16b((ptr), (o1), (o2), (n1), (n2)); \ | ||
57 | }) | ||
58 | |||
59 | #define cmpxchg_double_local(ptr, o1, o2, n1, n2) \ | ||
60 | ({ \ | ||
61 | BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ | ||
62 | VM_BUG_ON((unsigned long)(ptr) % 16); \ | ||
63 | cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2)); \ | ||
64 | }) | ||
65 | |||
66 | #define system_has_cmpxchg_double() cpu_has_cx16 | 23 | #define system_has_cmpxchg_double() cpu_has_cx16 |
67 | 24 | ||
68 | #endif /* _ASM_X86_CMPXCHG_64_H */ | 25 | #endif /* _ASM_X86_CMPXCHG_64_H */ |
diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h index 9a2d644c08ef..ced283ac79df 100644 --- a/arch/x86/include/asm/div64.h +++ b/arch/x86/include/asm/div64.h | |||
@@ -4,6 +4,7 @@ | |||
4 | #ifdef CONFIG_X86_32 | 4 | #ifdef CONFIG_X86_32 |
5 | 5 | ||
6 | #include <linux/types.h> | 6 | #include <linux/types.h> |
7 | #include <linux/log2.h> | ||
7 | 8 | ||
8 | /* | 9 | /* |
9 | * do_div() is NOT a C function. It wants to return | 10 | * do_div() is NOT a C function. It wants to return |
@@ -21,15 +22,20 @@ | |||
21 | ({ \ | 22 | ({ \ |
22 | unsigned long __upper, __low, __high, __mod, __base; \ | 23 | unsigned long __upper, __low, __high, __mod, __base; \ |
23 | __base = (base); \ | 24 | __base = (base); \ |
24 | asm("":"=a" (__low), "=d" (__high) : "A" (n)); \ | 25 | if (__builtin_constant_p(__base) && is_power_of_2(__base)) { \ |
25 | __upper = __high; \ | 26 | __mod = n & (__base - 1); \ |
26 | if (__high) { \ | 27 | n >>= ilog2(__base); \ |
27 | __upper = __high % (__base); \ | 28 | } else { \ |
28 | __high = __high / (__base); \ | 29 | asm("" : "=a" (__low), "=d" (__high) : "A" (n));\ |
30 | __upper = __high; \ | ||
31 | if (__high) { \ | ||
32 | __upper = __high % (__base); \ | ||
33 | __high = __high / (__base); \ | ||
34 | } \ | ||
35 | asm("divl %2" : "=a" (__low), "=d" (__mod) \ | ||
36 | : "rm" (__base), "0" (__low), "1" (__upper)); \ | ||
37 | asm("" : "=A" (n) : "a" (__low), "d" (__high)); \ | ||
29 | } \ | 38 | } \ |
30 | asm("divl %2":"=a" (__low), "=d" (__mod) \ | ||
31 | : "rm" (__base), "0" (__low), "1" (__upper)); \ | ||
32 | asm("":"=A" (n) : "a" (__low), "d" (__high)); \ | ||
33 | __mod; \ | 39 | __mod; \ |
34 | }) | 40 | }) |
35 | 41 | ||
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 3470c9d0ebba..529bf07e8067 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -451,23 +451,20 @@ do { \ | |||
451 | #endif /* !CONFIG_M386 */ | 451 | #endif /* !CONFIG_M386 */ |
452 | 452 | ||
453 | #ifdef CONFIG_X86_CMPXCHG64 | 453 | #ifdef CONFIG_X86_CMPXCHG64 |
454 | #define percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) \ | 454 | #define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \ |
455 | ({ \ | 455 | ({ \ |
456 | char __ret; \ | 456 | bool __ret; \ |
457 | typeof(o1) __o1 = o1; \ | 457 | typeof(pcp1) __o1 = (o1), __n1 = (n1); \ |
458 | typeof(o1) __n1 = n1; \ | 458 | typeof(pcp2) __o2 = (o2), __n2 = (n2); \ |
459 | typeof(o2) __o2 = o2; \ | ||
460 | typeof(o2) __n2 = n2; \ | ||
461 | typeof(o2) __dummy = n2; \ | ||
462 | asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \ | 459 | asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \ |
463 | : "=a"(__ret), "=m" (pcp1), "=d"(__dummy) \ | 460 | : "=a" (__ret), "+m" (pcp1), "+m" (pcp2), "+d" (__o2) \ |
464 | : "b"(__n1), "c"(__n2), "a"(__o1), "d"(__o2)); \ | 461 | : "b" (__n1), "c" (__n2), "a" (__o1)); \ |
465 | __ret; \ | 462 | __ret; \ |
466 | }) | 463 | }) |
467 | 464 | ||
468 | #define __this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) | 465 | #define __this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double |
469 | #define this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) | 466 | #define this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double |
470 | #define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) | 467 | #define irqsafe_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double |
471 | #endif /* CONFIG_X86_CMPXCHG64 */ | 468 | #endif /* CONFIG_X86_CMPXCHG64 */ |
472 | 469 | ||
473 | /* | 470 | /* |
@@ -508,31 +505,23 @@ do { \ | |||
508 | * it in software. The address used in the cmpxchg16 instruction must be | 505 | * it in software. The address used in the cmpxchg16 instruction must be |
509 | * aligned to a 16 byte boundary. | 506 | * aligned to a 16 byte boundary. |
510 | */ | 507 | */ |
511 | #ifdef CONFIG_SMP | 508 | #define percpu_cmpxchg16b_double(pcp1, pcp2, o1, o2, n1, n2) \ |
512 | #define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP3 | ||
513 | #else | ||
514 | #define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP2 | ||
515 | #endif | ||
516 | #define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) \ | ||
517 | ({ \ | 509 | ({ \ |
518 | char __ret; \ | 510 | bool __ret; \ |
519 | typeof(o1) __o1 = o1; \ | 511 | typeof(pcp1) __o1 = (o1), __n1 = (n1); \ |
520 | typeof(o1) __n1 = n1; \ | 512 | typeof(pcp2) __o2 = (o2), __n2 = (n2); \ |
521 | typeof(o2) __o2 = o2; \ | 513 | alternative_io("leaq %P1,%%rsi\n\tcall this_cpu_cmpxchg16b_emu\n\t", \ |
522 | typeof(o2) __n2 = n2; \ | 514 | "cmpxchg16b " __percpu_arg(1) "\n\tsetz %0\n\t", \ |
523 | typeof(o2) __dummy; \ | ||
524 | alternative_io(CMPXCHG16B_EMU_CALL, \ | ||
525 | "cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t", \ | ||
526 | X86_FEATURE_CX16, \ | 515 | X86_FEATURE_CX16, \ |
527 | ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \ | 516 | ASM_OUTPUT2("=a" (__ret), "+m" (pcp1), \ |
528 | "S" (&pcp1), "b"(__n1), "c"(__n2), \ | 517 | "+m" (pcp2), "+d" (__o2)), \ |
529 | "a"(__o1), "d"(__o2) : "memory"); \ | 518 | "b" (__n1), "c" (__n2), "a" (__o1) : "rsi"); \ |
530 | __ret; \ | 519 | __ret; \ |
531 | }) | 520 | }) |
532 | 521 | ||
533 | #define __this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) | 522 | #define __this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double |
534 | #define this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) | 523 | #define this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double |
535 | #define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) | 524 | #define irqsafe_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double |
536 | 525 | ||
537 | #endif | 526 | #endif |
538 | 527 | ||
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h index 2dddb317bb39..f8ab3eaad128 100644 --- a/arch/x86/include/asm/processor-flags.h +++ b/arch/x86/include/asm/processor-flags.h | |||
@@ -6,6 +6,7 @@ | |||
6 | * EFLAGS bits | 6 | * EFLAGS bits |
7 | */ | 7 | */ |
8 | #define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ | 8 | #define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ |
9 | #define X86_EFLAGS_BIT1 0x00000002 /* Bit 1 - always on */ | ||
9 | #define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ | 10 | #define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ |
10 | #define X86_EFLAGS_AF 0x00000010 /* Auxiliary carry Flag */ | 11 | #define X86_EFLAGS_AF 0x00000010 /* Auxiliary carry Flag */ |
11 | #define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ | 12 | #define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ |
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 972c260919a3..a82c2bf504b6 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h | |||
@@ -79,23 +79,10 @@ static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) | |||
79 | return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail; | 79 | return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail; |
80 | } | 80 | } |
81 | 81 | ||
82 | #if (NR_CPUS < 256) | ||
83 | static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) | 82 | static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) |
84 | { | 83 | { |
85 | asm volatile(UNLOCK_LOCK_PREFIX "incb %0" | 84 | __add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX); |
86 | : "+m" (lock->head_tail) | ||
87 | : | ||
88 | : "memory", "cc"); | ||
89 | } | 85 | } |
90 | #else | ||
91 | static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) | ||
92 | { | ||
93 | asm volatile(UNLOCK_LOCK_PREFIX "incw %0" | ||
94 | : "+m" (lock->head_tail) | ||
95 | : | ||
96 | : "memory", "cc"); | ||
97 | } | ||
98 | #endif | ||
99 | 86 | ||
100 | static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) | 87 | static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) |
101 | { | 88 | { |
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index a1fe5c127b52..185b719ec61a 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -40,7 +40,8 @@ struct thread_info { | |||
40 | */ | 40 | */ |
41 | __u8 supervisor_stack[0]; | 41 | __u8 supervisor_stack[0]; |
42 | #endif | 42 | #endif |
43 | int uaccess_err; | 43 | int sig_on_uaccess_error:1; |
44 | int uaccess_err:1; /* uaccess failed */ | ||
44 | }; | 45 | }; |
45 | 46 | ||
46 | #define INIT_THREAD_INFO(tsk) \ | 47 | #define INIT_THREAD_INFO(tsk) \ |
@@ -231,6 +232,12 @@ static inline struct thread_info *current_thread_info(void) | |||
231 | movq PER_CPU_VAR(kernel_stack),reg ; \ | 232 | movq PER_CPU_VAR(kernel_stack),reg ; \ |
232 | subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg | 233 | subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg |
233 | 234 | ||
235 | /* | ||
236 | * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in | ||
237 | * a certain register (to be used in assembler memory operands). | ||
238 | */ | ||
239 | #define THREAD_INFO(reg, off) KERNEL_STACK_OFFSET+(off)-THREAD_SIZE(reg) | ||
240 | |||
234 | #endif | 241 | #endif |
235 | 242 | ||
236 | #endif /* !X86_32 */ | 243 | #endif /* !X86_32 */ |
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index c00692476e9f..800f77c60051 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h | |||
@@ -130,10 +130,8 @@ extern void setup_node_to_cpumask_map(void); | |||
130 | .balance_interval = 1, \ | 130 | .balance_interval = 1, \ |
131 | } | 131 | } |
132 | 132 | ||
133 | #ifdef CONFIG_X86_64 | ||
134 | extern int __node_distance(int, int); | 133 | extern int __node_distance(int, int); |
135 | #define node_distance(a, b) __node_distance(a, b) | 134 | #define node_distance(a, b) __node_distance(a, b) |
136 | #endif | ||
137 | 135 | ||
138 | #else /* !CONFIG_NUMA */ | 136 | #else /* !CONFIG_NUMA */ |
139 | 137 | ||
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 36361bf6fdd1..8be5f54d9360 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h | |||
@@ -462,7 +462,7 @@ struct __large_struct { unsigned long buf[100]; }; | |||
462 | barrier(); | 462 | barrier(); |
463 | 463 | ||
464 | #define uaccess_catch(err) \ | 464 | #define uaccess_catch(err) \ |
465 | (err) |= current_thread_info()->uaccess_err; \ | 465 | (err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0); \ |
466 | current_thread_info()->uaccess_err = prev_err; \ | 466 | current_thread_info()->uaccess_err = prev_err; \ |
467 | } while (0) | 467 | } while (0) |
468 | 468 | ||
diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c index 5abbea297e0c..7b3fe56b1c21 100644 --- a/arch/x86/kernel/cpu/powerflags.c +++ b/arch/x86/kernel/cpu/powerflags.c | |||
@@ -16,5 +16,6 @@ const char *const x86_power_flags[32] = { | |||
16 | "100mhzsteps", | 16 | "100mhzsteps", |
17 | "hwpstate", | 17 | "hwpstate", |
18 | "", /* tsc invariant mapped to constant_tsc */ | 18 | "", /* tsc invariant mapped to constant_tsc */ |
19 | /* nothing */ | 19 | "cpb", /* core performance boost */ |
20 | "eff_freq_ro", /* Readonly aperf/mperf */ | ||
20 | }; | 21 | }; |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index f3f6f5344001..22d0e21b4dd7 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -625,6 +625,8 @@ work_notifysig: # deal with pending signals and | |||
625 | movl %esp, %eax | 625 | movl %esp, %eax |
626 | jne work_notifysig_v86 # returning to kernel-space or | 626 | jne work_notifysig_v86 # returning to kernel-space or |
627 | # vm86-space | 627 | # vm86-space |
628 | TRACE_IRQS_ON | ||
629 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
628 | xorl %edx, %edx | 630 | xorl %edx, %edx |
629 | call do_notify_resume | 631 | call do_notify_resume |
630 | jmp resume_userspace_sig | 632 | jmp resume_userspace_sig |
@@ -638,6 +640,8 @@ work_notifysig_v86: | |||
638 | #else | 640 | #else |
639 | movl %esp, %eax | 641 | movl %esp, %eax |
640 | #endif | 642 | #endif |
643 | TRACE_IRQS_ON | ||
644 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
641 | xorl %edx, %edx | 645 | xorl %edx, %edx |
642 | call do_notify_resume | 646 | call do_notify_resume |
643 | jmp resume_userspace_sig | 647 | jmp resume_userspace_sig |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index faf8d5e74b0b..a20e1cb9dc87 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -221,7 +221,7 @@ ENDPROC(native_usergs_sysret64) | |||
221 | /*CFI_REL_OFFSET ss,0*/ | 221 | /*CFI_REL_OFFSET ss,0*/ |
222 | pushq_cfi %rax /* rsp */ | 222 | pushq_cfi %rax /* rsp */ |
223 | CFI_REL_OFFSET rsp,0 | 223 | CFI_REL_OFFSET rsp,0 |
224 | pushq_cfi $X86_EFLAGS_IF /* eflags - interrupts on */ | 224 | pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_BIT1) /* eflags - interrupts on */ |
225 | /*CFI_REL_OFFSET rflags,0*/ | 225 | /*CFI_REL_OFFSET rflags,0*/ |
226 | pushq_cfi $__KERNEL_CS /* cs */ | 226 | pushq_cfi $__KERNEL_CS /* cs */ |
227 | /*CFI_REL_OFFSET cs,0*/ | 227 | /*CFI_REL_OFFSET cs,0*/ |
@@ -411,7 +411,7 @@ ENTRY(ret_from_fork) | |||
411 | RESTORE_REST | 411 | RESTORE_REST |
412 | 412 | ||
413 | testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? | 413 | testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? |
414 | je int_ret_from_sys_call | 414 | jz retint_restore_args |
415 | 415 | ||
416 | testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET | 416 | testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET |
417 | jnz int_ret_from_sys_call | 417 | jnz int_ret_from_sys_call |
@@ -465,7 +465,7 @@ ENTRY(system_call) | |||
465 | * after the swapgs, so that it can do the swapgs | 465 | * after the swapgs, so that it can do the swapgs |
466 | * for the guest and jump here on syscall. | 466 | * for the guest and jump here on syscall. |
467 | */ | 467 | */ |
468 | ENTRY(system_call_after_swapgs) | 468 | GLOBAL(system_call_after_swapgs) |
469 | 469 | ||
470 | movq %rsp,PER_CPU_VAR(old_rsp) | 470 | movq %rsp,PER_CPU_VAR(old_rsp) |
471 | movq PER_CPU_VAR(kernel_stack),%rsp | 471 | movq PER_CPU_VAR(kernel_stack),%rsp |
@@ -478,8 +478,7 @@ ENTRY(system_call_after_swapgs) | |||
478 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) | 478 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) |
479 | movq %rcx,RIP-ARGOFFSET(%rsp) | 479 | movq %rcx,RIP-ARGOFFSET(%rsp) |
480 | CFI_REL_OFFSET rip,RIP-ARGOFFSET | 480 | CFI_REL_OFFSET rip,RIP-ARGOFFSET |
481 | GET_THREAD_INFO(%rcx) | 481 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
482 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx) | ||
483 | jnz tracesys | 482 | jnz tracesys |
484 | system_call_fastpath: | 483 | system_call_fastpath: |
485 | cmpq $__NR_syscall_max,%rax | 484 | cmpq $__NR_syscall_max,%rax |
@@ -496,10 +495,9 @@ ret_from_sys_call: | |||
496 | /* edi: flagmask */ | 495 | /* edi: flagmask */ |
497 | sysret_check: | 496 | sysret_check: |
498 | LOCKDEP_SYS_EXIT | 497 | LOCKDEP_SYS_EXIT |
499 | GET_THREAD_INFO(%rcx) | ||
500 | DISABLE_INTERRUPTS(CLBR_NONE) | 498 | DISABLE_INTERRUPTS(CLBR_NONE) |
501 | TRACE_IRQS_OFF | 499 | TRACE_IRQS_OFF |
502 | movl TI_flags(%rcx),%edx | 500 | movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx |
503 | andl %edi,%edx | 501 | andl %edi,%edx |
504 | jnz sysret_careful | 502 | jnz sysret_careful |
505 | CFI_REMEMBER_STATE | 503 | CFI_REMEMBER_STATE |
@@ -583,7 +581,7 @@ sysret_audit: | |||
583 | /* Do syscall tracing */ | 581 | /* Do syscall tracing */ |
584 | tracesys: | 582 | tracesys: |
585 | #ifdef CONFIG_AUDITSYSCALL | 583 | #ifdef CONFIG_AUDITSYSCALL |
586 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx) | 584 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
587 | jz auditsys | 585 | jz auditsys |
588 | #endif | 586 | #endif |
589 | SAVE_REST | 587 | SAVE_REST |
@@ -612,8 +610,6 @@ tracesys: | |||
612 | GLOBAL(int_ret_from_sys_call) | 610 | GLOBAL(int_ret_from_sys_call) |
613 | DISABLE_INTERRUPTS(CLBR_NONE) | 611 | DISABLE_INTERRUPTS(CLBR_NONE) |
614 | TRACE_IRQS_OFF | 612 | TRACE_IRQS_OFF |
615 | testl $3,CS-ARGOFFSET(%rsp) | ||
616 | je retint_restore_args | ||
617 | movl $_TIF_ALLWORK_MASK,%edi | 613 | movl $_TIF_ALLWORK_MASK,%edi |
618 | /* edi: mask to check */ | 614 | /* edi: mask to check */ |
619 | GLOBAL(int_with_check) | 615 | GLOBAL(int_with_check) |
@@ -953,6 +949,7 @@ END(common_interrupt) | |||
953 | ENTRY(\sym) | 949 | ENTRY(\sym) |
954 | INTR_FRAME | 950 | INTR_FRAME |
955 | pushq_cfi $~(\num) | 951 | pushq_cfi $~(\num) |
952 | .Lcommon_\sym: | ||
956 | interrupt \do_sym | 953 | interrupt \do_sym |
957 | jmp ret_from_intr | 954 | jmp ret_from_intr |
958 | CFI_ENDPROC | 955 | CFI_ENDPROC |
@@ -976,13 +973,21 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \ | |||
976 | x86_platform_ipi smp_x86_platform_ipi | 973 | x86_platform_ipi smp_x86_platform_ipi |
977 | 974 | ||
978 | #ifdef CONFIG_SMP | 975 | #ifdef CONFIG_SMP |
979 | .irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \ | 976 | ALIGN |
977 | INTR_FRAME | ||
978 | .irp idx,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \ | ||
980 | 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 | 979 | 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 |
981 | .if NUM_INVALIDATE_TLB_VECTORS > \idx | 980 | .if NUM_INVALIDATE_TLB_VECTORS > \idx |
982 | apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \ | 981 | ENTRY(invalidate_interrupt\idx) |
983 | invalidate_interrupt\idx smp_invalidate_interrupt | 982 | pushq_cfi $~(INVALIDATE_TLB_VECTOR_START+\idx) |
983 | jmp .Lcommon_invalidate_interrupt0 | ||
984 | CFI_ADJUST_CFA_OFFSET -8 | ||
985 | END(invalidate_interrupt\idx) | ||
984 | .endif | 986 | .endif |
985 | .endr | 987 | .endr |
988 | CFI_ENDPROC | ||
989 | apicinterrupt INVALIDATE_TLB_VECTOR_START, \ | ||
990 | invalidate_interrupt0, smp_invalidate_interrupt | ||
986 | #endif | 991 | #endif |
987 | 992 | ||
988 | apicinterrupt THRESHOLD_APIC_VECTOR \ | 993 | apicinterrupt THRESHOLD_APIC_VECTOR \ |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ee5d4fbd53b4..15763af7bfe3 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -293,7 +293,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) | |||
293 | regs.orig_ax = -1; | 293 | regs.orig_ax = -1; |
294 | regs.ip = (unsigned long) kernel_thread_helper; | 294 | regs.ip = (unsigned long) kernel_thread_helper; |
295 | regs.cs = __KERNEL_CS | get_kernel_rpl(); | 295 | regs.cs = __KERNEL_CS | get_kernel_rpl(); |
296 | regs.flags = X86_EFLAGS_IF | 0x2; | 296 | regs.flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1; |
297 | 297 | ||
298 | /* Ok, create the new process.. */ | 298 | /* Ok, create the new process.. */ |
299 | return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); | 299 | return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a8e3eb83466c..fa1191fb679d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -306,15 +306,10 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) | |||
306 | == NOTIFY_STOP) | 306 | == NOTIFY_STOP) |
307 | return; | 307 | return; |
308 | #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ | 308 | #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ |
309 | #ifdef CONFIG_KPROBES | 309 | |
310 | if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) | 310 | if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) |
311 | == NOTIFY_STOP) | 311 | == NOTIFY_STOP) |
312 | return; | 312 | return; |
313 | #else | ||
314 | if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP) | ||
315 | == NOTIFY_STOP) | ||
316 | return; | ||
317 | #endif | ||
318 | 313 | ||
319 | preempt_conditional_sti(regs); | 314 | preempt_conditional_sti(regs); |
320 | do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); | 315 | do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); |
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index e4d4a22e8b94..b07ba9393564 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -57,7 +57,7 @@ DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = | |||
57 | .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), | 57 | .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), |
58 | }; | 58 | }; |
59 | 59 | ||
60 | static enum { EMULATE, NATIVE, NONE } vsyscall_mode = NATIVE; | 60 | static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE; |
61 | 61 | ||
62 | static int __init vsyscall_setup(char *str) | 62 | static int __init vsyscall_setup(char *str) |
63 | { | 63 | { |
@@ -140,11 +140,40 @@ static int addr_to_vsyscall_nr(unsigned long addr) | |||
140 | return nr; | 140 | return nr; |
141 | } | 141 | } |
142 | 142 | ||
143 | static bool write_ok_or_segv(unsigned long ptr, size_t size) | ||
144 | { | ||
145 | /* | ||
146 | * XXX: if access_ok, get_user, and put_user handled | ||
147 | * sig_on_uaccess_error, this could go away. | ||
148 | */ | ||
149 | |||
150 | if (!access_ok(VERIFY_WRITE, (void __user *)ptr, size)) { | ||
151 | siginfo_t info; | ||
152 | struct thread_struct *thread = ¤t->thread; | ||
153 | |||
154 | thread->error_code = 6; /* user fault, no page, write */ | ||
155 | thread->cr2 = ptr; | ||
156 | thread->trap_no = 14; | ||
157 | |||
158 | memset(&info, 0, sizeof(info)); | ||
159 | info.si_signo = SIGSEGV; | ||
160 | info.si_errno = 0; | ||
161 | info.si_code = SEGV_MAPERR; | ||
162 | info.si_addr = (void __user *)ptr; | ||
163 | |||
164 | force_sig_info(SIGSEGV, &info, current); | ||
165 | return false; | ||
166 | } else { | ||
167 | return true; | ||
168 | } | ||
169 | } | ||
170 | |||
143 | bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) | 171 | bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) |
144 | { | 172 | { |
145 | struct task_struct *tsk; | 173 | struct task_struct *tsk; |
146 | unsigned long caller; | 174 | unsigned long caller; |
147 | int vsyscall_nr; | 175 | int vsyscall_nr; |
176 | int prev_sig_on_uaccess_error; | ||
148 | long ret; | 177 | long ret; |
149 | 178 | ||
150 | /* | 179 | /* |
@@ -180,35 +209,65 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) | |||
180 | if (seccomp_mode(&tsk->seccomp)) | 209 | if (seccomp_mode(&tsk->seccomp)) |
181 | do_exit(SIGKILL); | 210 | do_exit(SIGKILL); |
182 | 211 | ||
212 | /* | ||
213 | * With a real vsyscall, page faults cause SIGSEGV. We want to | ||
214 | * preserve that behavior to make writing exploits harder. | ||
215 | */ | ||
216 | prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error; | ||
217 | current_thread_info()->sig_on_uaccess_error = 1; | ||
218 | |||
219 | /* | ||
220 | * 0 is a valid user pointer (in the access_ok sense) on 32-bit and | ||
221 | * 64-bit, so we don't need to special-case it here. For all the | ||
222 | * vsyscalls, 0 means "don't write anything" not "write it at | ||
223 | * address 0". | ||
224 | */ | ||
225 | ret = -EFAULT; | ||
183 | switch (vsyscall_nr) { | 226 | switch (vsyscall_nr) { |
184 | case 0: | 227 | case 0: |
228 | if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) || | ||
229 | !write_ok_or_segv(regs->si, sizeof(struct timezone))) | ||
230 | break; | ||
231 | |||
185 | ret = sys_gettimeofday( | 232 | ret = sys_gettimeofday( |
186 | (struct timeval __user *)regs->di, | 233 | (struct timeval __user *)regs->di, |
187 | (struct timezone __user *)regs->si); | 234 | (struct timezone __user *)regs->si); |
188 | break; | 235 | break; |
189 | 236 | ||
190 | case 1: | 237 | case 1: |
238 | if (!write_ok_or_segv(regs->di, sizeof(time_t))) | ||
239 | break; | ||
240 | |||
191 | ret = sys_time((time_t __user *)regs->di); | 241 | ret = sys_time((time_t __user *)regs->di); |
192 | break; | 242 | break; |
193 | 243 | ||
194 | case 2: | 244 | case 2: |
245 | if (!write_ok_or_segv(regs->di, sizeof(unsigned)) || | ||
246 | !write_ok_or_segv(regs->si, sizeof(unsigned))) | ||
247 | break; | ||
248 | |||
195 | ret = sys_getcpu((unsigned __user *)regs->di, | 249 | ret = sys_getcpu((unsigned __user *)regs->di, |
196 | (unsigned __user *)regs->si, | 250 | (unsigned __user *)regs->si, |
197 | 0); | 251 | 0); |
198 | break; | 252 | break; |
199 | } | 253 | } |
200 | 254 | ||
255 | current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error; | ||
256 | |||
201 | if (ret == -EFAULT) { | 257 | if (ret == -EFAULT) { |
202 | /* | 258 | /* Bad news -- userspace fed a bad pointer to a vsyscall. */ |
203 | * Bad news -- userspace fed a bad pointer to a vsyscall. | ||
204 | * | ||
205 | * With a real vsyscall, that would have caused SIGSEGV. | ||
206 | * To make writing reliable exploits using the emulated | ||
207 | * vsyscalls harder, generate SIGSEGV here as well. | ||
208 | */ | ||
209 | warn_bad_vsyscall(KERN_INFO, regs, | 259 | warn_bad_vsyscall(KERN_INFO, regs, |
210 | "vsyscall fault (exploit attempt?)"); | 260 | "vsyscall fault (exploit attempt?)"); |
211 | goto sigsegv; | 261 | |
262 | /* | ||
263 | * If we failed to generate a signal for any reason, | ||
264 | * generate one here. (This should be impossible.) | ||
265 | */ | ||
266 | if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) && | ||
267 | !sigismember(&tsk->pending.signal, SIGSEGV))) | ||
268 | goto sigsegv; | ||
269 | |||
270 | return true; /* Don't emulate the ret. */ | ||
212 | } | 271 | } |
213 | 272 | ||
214 | regs->ax = ret; | 273 | regs->ax = ret; |
diff --git a/arch/x86/lib/string_32.c b/arch/x86/lib/string_32.c index 82004d2bf05e..bd59090825db 100644 --- a/arch/x86/lib/string_32.c +++ b/arch/x86/lib/string_32.c | |||
@@ -164,15 +164,13 @@ EXPORT_SYMBOL(strchr); | |||
164 | size_t strlen(const char *s) | 164 | size_t strlen(const char *s) |
165 | { | 165 | { |
166 | int d0; | 166 | int d0; |
167 | int res; | 167 | size_t res; |
168 | asm volatile("repne\n\t" | 168 | asm volatile("repne\n\t" |
169 | "scasb\n\t" | 169 | "scasb" |
170 | "notl %0\n\t" | ||
171 | "decl %0" | ||
172 | : "=c" (res), "=&D" (d0) | 170 | : "=c" (res), "=&D" (d0) |
173 | : "1" (s), "a" (0), "0" (0xffffffffu) | 171 | : "1" (s), "a" (0), "0" (0xffffffffu) |
174 | : "memory"); | 172 | : "memory"); |
175 | return res; | 173 | return ~res - 1; |
176 | } | 174 | } |
177 | EXPORT_SYMBOL(strlen); | 175 | EXPORT_SYMBOL(strlen); |
178 | #endif | 176 | #endif |
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index d0474ad2a6e5..1fb85dbe390a 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c | |||
@@ -25,7 +25,7 @@ int fixup_exception(struct pt_regs *regs) | |||
25 | if (fixup) { | 25 | if (fixup) { |
26 | /* If fixup is less than 16, it means uaccess error */ | 26 | /* If fixup is less than 16, it means uaccess error */ |
27 | if (fixup->fixup < 16) { | 27 | if (fixup->fixup < 16) { |
28 | current_thread_info()->uaccess_err = -EFAULT; | 28 | current_thread_info()->uaccess_err = 1; |
29 | regs->ip += fixup->fixup; | 29 | regs->ip += fixup->fixup; |
30 | return 1; | 30 | return 1; |
31 | } | 31 | } |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 5db0490deb07..9d74824a708d 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -626,7 +626,7 @@ pgtable_bad(struct pt_regs *regs, unsigned long error_code, | |||
626 | 626 | ||
627 | static noinline void | 627 | static noinline void |
628 | no_context(struct pt_regs *regs, unsigned long error_code, | 628 | no_context(struct pt_regs *regs, unsigned long error_code, |
629 | unsigned long address) | 629 | unsigned long address, int signal, int si_code) |
630 | { | 630 | { |
631 | struct task_struct *tsk = current; | 631 | struct task_struct *tsk = current; |
632 | unsigned long *stackend; | 632 | unsigned long *stackend; |
@@ -634,8 +634,17 @@ no_context(struct pt_regs *regs, unsigned long error_code, | |||
634 | int sig; | 634 | int sig; |
635 | 635 | ||
636 | /* Are we prepared to handle this kernel fault? */ | 636 | /* Are we prepared to handle this kernel fault? */ |
637 | if (fixup_exception(regs)) | 637 | if (fixup_exception(regs)) { |
638 | if (current_thread_info()->sig_on_uaccess_error && signal) { | ||
639 | tsk->thread.trap_no = 14; | ||
640 | tsk->thread.error_code = error_code | PF_USER; | ||
641 | tsk->thread.cr2 = address; | ||
642 | |||
643 | /* XXX: hwpoison faults will set the wrong code. */ | ||
644 | force_sig_info_fault(signal, si_code, address, tsk, 0); | ||
645 | } | ||
638 | return; | 646 | return; |
647 | } | ||
639 | 648 | ||
640 | /* | 649 | /* |
641 | * 32-bit: | 650 | * 32-bit: |
@@ -755,7 +764,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, | |||
755 | if (is_f00f_bug(regs, address)) | 764 | if (is_f00f_bug(regs, address)) |
756 | return; | 765 | return; |
757 | 766 | ||
758 | no_context(regs, error_code, address); | 767 | no_context(regs, error_code, address, SIGSEGV, si_code); |
759 | } | 768 | } |
760 | 769 | ||
761 | static noinline void | 770 | static noinline void |
@@ -819,7 +828,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, | |||
819 | 828 | ||
820 | /* Kernel mode? Handle exceptions or die: */ | 829 | /* Kernel mode? Handle exceptions or die: */ |
821 | if (!(error_code & PF_USER)) { | 830 | if (!(error_code & PF_USER)) { |
822 | no_context(regs, error_code, address); | 831 | no_context(regs, error_code, address, SIGBUS, BUS_ADRERR); |
823 | return; | 832 | return; |
824 | } | 833 | } |
825 | 834 | ||
@@ -854,7 +863,7 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, | |||
854 | if (!(fault & VM_FAULT_RETRY)) | 863 | if (!(fault & VM_FAULT_RETRY)) |
855 | up_read(¤t->mm->mmap_sem); | 864 | up_read(¤t->mm->mmap_sem); |
856 | if (!(error_code & PF_USER)) | 865 | if (!(error_code & PF_USER)) |
857 | no_context(regs, error_code, address); | 866 | no_context(regs, error_code, address, 0, 0); |
858 | return 1; | 867 | return 1; |
859 | } | 868 | } |
860 | if (!(fault & VM_FAULT_ERROR)) | 869 | if (!(fault & VM_FAULT_ERROR)) |
@@ -864,7 +873,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, | |||
864 | /* Kernel mode? Handle exceptions or die: */ | 873 | /* Kernel mode? Handle exceptions or die: */ |
865 | if (!(error_code & PF_USER)) { | 874 | if (!(error_code & PF_USER)) { |
866 | up_read(¤t->mm->mmap_sem); | 875 | up_read(¤t->mm->mmap_sem); |
867 | no_context(regs, error_code, address); | 876 | no_context(regs, error_code, address, |
877 | SIGSEGV, SEGV_MAPERR); | ||
868 | return 1; | 878 | return 1; |
869 | } | 879 | } |
870 | 880 | ||
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 4a01967f02e7..4cf9bd0a1653 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c | |||
@@ -238,7 +238,8 @@ static efi_status_t __init phys_efi_get_time(efi_time_t *tm, | |||
238 | 238 | ||
239 | spin_lock_irqsave(&rtc_lock, flags); | 239 | spin_lock_irqsave(&rtc_lock, flags); |
240 | efi_call_phys_prelog(); | 240 | efi_call_phys_prelog(); |
241 | status = efi_call_phys2(efi_phys.get_time, tm, tc); | 241 | status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm), |
242 | virt_to_phys(tc)); | ||
242 | efi_call_phys_epilog(); | 243 | efi_call_phys_epilog(); |
243 | spin_unlock_irqrestore(&rtc_lock, flags); | 244 | spin_unlock_irqrestore(&rtc_lock, flags); |
244 | return status; | 245 | return status; |
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 65af42f2d593..39809035320a 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c | |||
@@ -697,7 +697,7 @@ void lguest_arch_setup_regs(struct lg_cpu *cpu, unsigned long start) | |||
697 | * interrupts are enabled. We always leave interrupts enabled while | 697 | * interrupts are enabled. We always leave interrupts enabled while |
698 | * running the Guest. | 698 | * running the Guest. |
699 | */ | 699 | */ |
700 | regs->eflags = X86_EFLAGS_IF | 0x2; | 700 | regs->eflags = X86_EFLAGS_IF | X86_EFLAGS_BIT1; |
701 | 701 | ||
702 | /* | 702 | /* |
703 | * The "Extended Instruction Pointer" register says where the Guest is | 703 | * The "Extended Instruction Pointer" register says where the Guest is |
@@ -368,7 +368,7 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page | |||
368 | VM_BUG_ON(!irqs_disabled()); | 368 | VM_BUG_ON(!irqs_disabled()); |
369 | #ifdef CONFIG_CMPXCHG_DOUBLE | 369 | #ifdef CONFIG_CMPXCHG_DOUBLE |
370 | if (s->flags & __CMPXCHG_DOUBLE) { | 370 | if (s->flags & __CMPXCHG_DOUBLE) { |
371 | if (cmpxchg_double(&page->freelist, | 371 | if (cmpxchg_double(&page->freelist, &page->counters, |
372 | freelist_old, counters_old, | 372 | freelist_old, counters_old, |
373 | freelist_new, counters_new)) | 373 | freelist_new, counters_new)) |
374 | return 1; | 374 | return 1; |
@@ -402,7 +402,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, | |||
402 | { | 402 | { |
403 | #ifdef CONFIG_CMPXCHG_DOUBLE | 403 | #ifdef CONFIG_CMPXCHG_DOUBLE |
404 | if (s->flags & __CMPXCHG_DOUBLE) { | 404 | if (s->flags & __CMPXCHG_DOUBLE) { |
405 | if (cmpxchg_double(&page->freelist, | 405 | if (cmpxchg_double(&page->freelist, &page->counters, |
406 | freelist_old, counters_old, | 406 | freelist_old, counters_old, |
407 | freelist_new, counters_new)) | 407 | freelist_new, counters_new)) |
408 | return 1; | 408 | return 1; |