diff options
| -rw-r--r-- | arch/x86_64/Kconfig.debug | 4 | ||||
| -rw-r--r-- | arch/x86_64/ia32/ia32entry.S | 19 | ||||
| -rw-r--r-- | arch/x86_64/kernel/entry.S | 188 | ||||
| -rw-r--r-- | arch/x86_64/kernel/irq.c | 4 | ||||
| -rw-r--r-- | include/asm-x86_64/irqflags.h | 61 | ||||
| -rw-r--r-- | include/asm-x86_64/system.h | 38 |
6 files changed, 223 insertions, 91 deletions
diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug index 1d92ab56c0f9..775d211a5cf9 100644 --- a/arch/x86_64/Kconfig.debug +++ b/arch/x86_64/Kconfig.debug | |||
| @@ -1,5 +1,9 @@ | |||
| 1 | menu "Kernel hacking" | 1 | menu "Kernel hacking" |
| 2 | 2 | ||
| 3 | config TRACE_IRQFLAGS_SUPPORT | ||
| 4 | bool | ||
| 5 | default y | ||
| 6 | |||
| 3 | source "lib/Kconfig.debug" | 7 | source "lib/Kconfig.debug" |
| 4 | 8 | ||
| 5 | config DEBUG_RODATA | 9 | config DEBUG_RODATA |
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index c536fa98ea37..9b5bb413a6e9 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <asm/thread_info.h> | 13 | #include <asm/thread_info.h> |
| 14 | #include <asm/segment.h> | 14 | #include <asm/segment.h> |
| 15 | #include <asm/vsyscall32.h> | 15 | #include <asm/vsyscall32.h> |
| 16 | #include <asm/irqflags.h> | ||
| 16 | #include <linux/linkage.h> | 17 | #include <linux/linkage.h> |
| 17 | 18 | ||
| 18 | #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) | 19 | #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) |
| @@ -75,6 +76,10 @@ ENTRY(ia32_sysenter_target) | |||
| 75 | swapgs | 76 | swapgs |
| 76 | movq %gs:pda_kernelstack, %rsp | 77 | movq %gs:pda_kernelstack, %rsp |
| 77 | addq $(PDA_STACKOFFSET),%rsp | 78 | addq $(PDA_STACKOFFSET),%rsp |
| 79 | /* | ||
| 80 | * No need to follow this irqs on/off section: the syscall | ||
| 81 | * disabled irqs, here we enable it straight after entry: | ||
| 82 | */ | ||
| 78 | sti | 83 | sti |
| 79 | movl %ebp,%ebp /* zero extension */ | 84 | movl %ebp,%ebp /* zero extension */ |
| 80 | pushq $__USER32_DS | 85 | pushq $__USER32_DS |
| @@ -118,6 +123,7 @@ sysenter_do_call: | |||
| 118 | movq %rax,RAX-ARGOFFSET(%rsp) | 123 | movq %rax,RAX-ARGOFFSET(%rsp) |
| 119 | GET_THREAD_INFO(%r10) | 124 | GET_THREAD_INFO(%r10) |
| 120 | cli | 125 | cli |
| 126 | TRACE_IRQS_OFF | ||
| 121 | testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) | 127 | testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) |
| 122 | jnz int_ret_from_sys_call | 128 | jnz int_ret_from_sys_call |
| 123 | andl $~TS_COMPAT,threadinfo_status(%r10) | 129 | andl $~TS_COMPAT,threadinfo_status(%r10) |
| @@ -132,6 +138,7 @@ sysenter_do_call: | |||
| 132 | CFI_REGISTER rsp,rcx | 138 | CFI_REGISTER rsp,rcx |
| 133 | movl $VSYSCALL32_SYSEXIT,%edx /* User %eip */ | 139 | movl $VSYSCALL32_SYSEXIT,%edx /* User %eip */ |
| 134 | CFI_REGISTER rip,rdx | 140 | CFI_REGISTER rip,rdx |
| 141 | TRACE_IRQS_ON | ||
| 135 | swapgs | 142 | swapgs |
| 136 | sti /* sti only takes effect after the next instruction */ | 143 | sti /* sti only takes effect after the next instruction */ |
| 137 | /* sysexit */ | 144 | /* sysexit */ |
| @@ -186,6 +193,10 @@ ENTRY(ia32_cstar_target) | |||
| 186 | movl %esp,%r8d | 193 | movl %esp,%r8d |
| 187 | CFI_REGISTER rsp,r8 | 194 | CFI_REGISTER rsp,r8 |
| 188 | movq %gs:pda_kernelstack,%rsp | 195 | movq %gs:pda_kernelstack,%rsp |
| 196 | /* | ||
| 197 | * No need to follow this irqs on/off section: the syscall | ||
| 198 | * disabled irqs and here we enable it straight after entry: | ||
| 199 | */ | ||
| 189 | sti | 200 | sti |
| 190 | SAVE_ARGS 8,1,1 | 201 | SAVE_ARGS 8,1,1 |
| 191 | movl %eax,%eax /* zero extension */ | 202 | movl %eax,%eax /* zero extension */ |
| @@ -220,6 +231,7 @@ cstar_do_call: | |||
| 220 | movq %rax,RAX-ARGOFFSET(%rsp) | 231 | movq %rax,RAX-ARGOFFSET(%rsp) |
| 221 | GET_THREAD_INFO(%r10) | 232 | GET_THREAD_INFO(%r10) |
| 222 | cli | 233 | cli |
| 234 | TRACE_IRQS_OFF | ||
| 223 | testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) | 235 | testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) |
| 224 | jnz int_ret_from_sys_call | 236 | jnz int_ret_from_sys_call |
| 225 | andl $~TS_COMPAT,threadinfo_status(%r10) | 237 | andl $~TS_COMPAT,threadinfo_status(%r10) |
| @@ -228,6 +240,7 @@ cstar_do_call: | |||
| 228 | CFI_REGISTER rip,rcx | 240 | CFI_REGISTER rip,rcx |
| 229 | movl EFLAGS-ARGOFFSET(%rsp),%r11d | 241 | movl EFLAGS-ARGOFFSET(%rsp),%r11d |
| 230 | /*CFI_REGISTER rflags,r11*/ | 242 | /*CFI_REGISTER rflags,r11*/ |
| 243 | TRACE_IRQS_ON | ||
| 231 | movl RSP-ARGOFFSET(%rsp),%esp | 244 | movl RSP-ARGOFFSET(%rsp),%esp |
| 232 | CFI_RESTORE rsp | 245 | CFI_RESTORE rsp |
| 233 | swapgs | 246 | swapgs |
| @@ -286,7 +299,11 @@ ENTRY(ia32_syscall) | |||
| 286 | /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/ | 299 | /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/ |
| 287 | /*CFI_REL_OFFSET cs,CS-RIP*/ | 300 | /*CFI_REL_OFFSET cs,CS-RIP*/ |
| 288 | CFI_REL_OFFSET rip,RIP-RIP | 301 | CFI_REL_OFFSET rip,RIP-RIP |
| 289 | swapgs | 302 | swapgs |
| 303 | /* | ||
| 304 | * No need to follow this irqs on/off section: the syscall | ||
| 305 | * disabled irqs and here we enable it straight after entry: | ||
| 306 | */ | ||
| 290 | sti | 307 | sti |
| 291 | movl %eax,%eax | 308 | movl %eax,%eax |
| 292 | pushq %rax | 309 | pushq %rax |
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index ed92c2983606..d464dded68c0 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S | |||
| @@ -41,13 +41,24 @@ | |||
| 41 | #include <asm/thread_info.h> | 41 | #include <asm/thread_info.h> |
| 42 | #include <asm/hw_irq.h> | 42 | #include <asm/hw_irq.h> |
| 43 | #include <asm/page.h> | 43 | #include <asm/page.h> |
| 44 | #include <asm/irqflags.h> | ||
| 44 | 45 | ||
| 45 | .code64 | 46 | .code64 |
| 46 | 47 | ||
| 47 | #ifndef CONFIG_PREEMPT | 48 | #ifndef CONFIG_PREEMPT |
| 48 | #define retint_kernel retint_restore_args | 49 | #define retint_kernel retint_restore_args |
| 49 | #endif | 50 | #endif |
| 50 | 51 | ||
| 52 | |||
| 53 | .macro TRACE_IRQS_IRETQ offset=ARGOFFSET | ||
| 54 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
| 55 | bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ | ||
| 56 | jnc 1f | ||
| 57 | TRACE_IRQS_ON | ||
| 58 | 1: | ||
| 59 | #endif | ||
| 60 | .endm | ||
| 61 | |||
| 51 | /* | 62 | /* |
| 52 | * C code is not supposed to know about undefined top of stack. Every time | 63 | * C code is not supposed to know about undefined top of stack. Every time |
| 53 | * a C function with an pt_regs argument is called from the SYSCALL based | 64 | * a C function with an pt_regs argument is called from the SYSCALL based |
| @@ -194,6 +205,10 @@ ENTRY(system_call) | |||
| 194 | swapgs | 205 | swapgs |
| 195 | movq %rsp,%gs:pda_oldrsp | 206 | movq %rsp,%gs:pda_oldrsp |
| 196 | movq %gs:pda_kernelstack,%rsp | 207 | movq %gs:pda_kernelstack,%rsp |
| 208 | /* | ||
| 209 | * No need to follow this irqs off/on section - it's straight | ||
| 210 | * and short: | ||
| 211 | */ | ||
| 197 | sti | 212 | sti |
| 198 | SAVE_ARGS 8,1 | 213 | SAVE_ARGS 8,1 |
| 199 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) | 214 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) |
| @@ -219,10 +234,15 @@ ret_from_sys_call: | |||
| 219 | sysret_check: | 234 | sysret_check: |
| 220 | GET_THREAD_INFO(%rcx) | 235 | GET_THREAD_INFO(%rcx) |
| 221 | cli | 236 | cli |
| 237 | TRACE_IRQS_OFF | ||
| 222 | movl threadinfo_flags(%rcx),%edx | 238 | movl threadinfo_flags(%rcx),%edx |
| 223 | andl %edi,%edx | 239 | andl %edi,%edx |
| 224 | CFI_REMEMBER_STATE | 240 | CFI_REMEMBER_STATE |
| 225 | jnz sysret_careful | 241 | jnz sysret_careful |
| 242 | /* | ||
| 243 | * sysretq will re-enable interrupts: | ||
| 244 | */ | ||
| 245 | TRACE_IRQS_ON | ||
| 226 | movq RIP-ARGOFFSET(%rsp),%rcx | 246 | movq RIP-ARGOFFSET(%rsp),%rcx |
| 227 | CFI_REGISTER rip,rcx | 247 | CFI_REGISTER rip,rcx |
| 228 | RESTORE_ARGS 0,-ARG_SKIP,1 | 248 | RESTORE_ARGS 0,-ARG_SKIP,1 |
| @@ -237,6 +257,7 @@ sysret_careful: | |||
| 237 | CFI_RESTORE_STATE | 257 | CFI_RESTORE_STATE |
| 238 | bt $TIF_NEED_RESCHED,%edx | 258 | bt $TIF_NEED_RESCHED,%edx |
| 239 | jnc sysret_signal | 259 | jnc sysret_signal |
| 260 | TRACE_IRQS_ON | ||
| 240 | sti | 261 | sti |
| 241 | pushq %rdi | 262 | pushq %rdi |
| 242 | CFI_ADJUST_CFA_OFFSET 8 | 263 | CFI_ADJUST_CFA_OFFSET 8 |
| @@ -247,6 +268,7 @@ sysret_careful: | |||
| 247 | 268 | ||
| 248 | /* Handle a signal */ | 269 | /* Handle a signal */ |
| 249 | sysret_signal: | 270 | sysret_signal: |
| 271 | TRACE_IRQS_ON | ||
| 250 | sti | 272 | sti |
| 251 | testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx | 273 | testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx |
| 252 | jz 1f | 274 | jz 1f |
| @@ -261,6 +283,7 @@ sysret_signal: | |||
| 261 | /* Use IRET because user could have changed frame. This | 283 | /* Use IRET because user could have changed frame. This |
| 262 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ | 284 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ |
| 263 | cli | 285 | cli |
| 286 | TRACE_IRQS_OFF | ||
| 264 | jmp int_with_check | 287 | jmp int_with_check |
| 265 | 288 | ||
| 266 | badsys: | 289 | badsys: |
| @@ -309,6 +332,7 @@ ENTRY(int_ret_from_sys_call) | |||
| 309 | CFI_REL_OFFSET r10,R10-ARGOFFSET | 332 | CFI_REL_OFFSET r10,R10-ARGOFFSET |
| 310 | CFI_REL_OFFSET r11,R11-ARGOFFSET | 333 | CFI_REL_OFFSET r11,R11-ARGOFFSET |
| 311 | cli | 334 | cli |
| 335 | TRACE_IRQS_OFF | ||
| 312 | testl $3,CS-ARGOFFSET(%rsp) | 336 | testl $3,CS-ARGOFFSET(%rsp) |
| 313 | je retint_restore_args | 337 | je retint_restore_args |
| 314 | movl $_TIF_ALLWORK_MASK,%edi | 338 | movl $_TIF_ALLWORK_MASK,%edi |
| @@ -327,6 +351,7 @@ int_with_check: | |||
| 327 | int_careful: | 351 | int_careful: |
| 328 | bt $TIF_NEED_RESCHED,%edx | 352 | bt $TIF_NEED_RESCHED,%edx |
| 329 | jnc int_very_careful | 353 | jnc int_very_careful |
| 354 | TRACE_IRQS_ON | ||
| 330 | sti | 355 | sti |
| 331 | pushq %rdi | 356 | pushq %rdi |
| 332 | CFI_ADJUST_CFA_OFFSET 8 | 357 | CFI_ADJUST_CFA_OFFSET 8 |
| @@ -334,10 +359,12 @@ int_careful: | |||
| 334 | popq %rdi | 359 | popq %rdi |
| 335 | CFI_ADJUST_CFA_OFFSET -8 | 360 | CFI_ADJUST_CFA_OFFSET -8 |
| 336 | cli | 361 | cli |
| 362 | TRACE_IRQS_OFF | ||
| 337 | jmp int_with_check | 363 | jmp int_with_check |
| 338 | 364 | ||
| 339 | /* handle signals and tracing -- both require a full stack frame */ | 365 | /* handle signals and tracing -- both require a full stack frame */ |
| 340 | int_very_careful: | 366 | int_very_careful: |
| 367 | TRACE_IRQS_ON | ||
| 341 | sti | 368 | sti |
| 342 | SAVE_REST | 369 | SAVE_REST |
| 343 | /* Check for syscall exit trace */ | 370 | /* Check for syscall exit trace */ |
| @@ -351,6 +378,7 @@ int_very_careful: | |||
| 351 | CFI_ADJUST_CFA_OFFSET -8 | 378 | CFI_ADJUST_CFA_OFFSET -8 |
| 352 | andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi | 379 | andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi |
| 353 | cli | 380 | cli |
| 381 | TRACE_IRQS_OFF | ||
| 354 | jmp int_restore_rest | 382 | jmp int_restore_rest |
| 355 | 383 | ||
| 356 | int_signal: | 384 | int_signal: |
| @@ -363,6 +391,7 @@ int_signal: | |||
| 363 | int_restore_rest: | 391 | int_restore_rest: |
| 364 | RESTORE_REST | 392 | RESTORE_REST |
| 365 | cli | 393 | cli |
| 394 | TRACE_IRQS_OFF | ||
| 366 | jmp int_with_check | 395 | jmp int_with_check |
| 367 | CFI_ENDPROC | 396 | CFI_ENDPROC |
| 368 | END(int_ret_from_sys_call) | 397 | END(int_ret_from_sys_call) |
| @@ -484,6 +513,10 @@ END(stub_rt_sigreturn) | |||
| 484 | swapgs | 513 | swapgs |
| 485 | 1: incl %gs:pda_irqcount # RED-PEN should check preempt count | 514 | 1: incl %gs:pda_irqcount # RED-PEN should check preempt count |
| 486 | cmoveq %gs:pda_irqstackptr,%rsp | 515 | cmoveq %gs:pda_irqstackptr,%rsp |
| 516 | /* | ||
| 517 | * We entered an interrupt context - irqs are off: | ||
| 518 | */ | ||
| 519 | TRACE_IRQS_OFF | ||
| 487 | call \func | 520 | call \func |
| 488 | .endm | 521 | .endm |
| 489 | 522 | ||
| @@ -493,6 +526,7 @@ ENTRY(common_interrupt) | |||
| 493 | /* 0(%rsp): oldrsp-ARGOFFSET */ | 526 | /* 0(%rsp): oldrsp-ARGOFFSET */ |
| 494 | ret_from_intr: | 527 | ret_from_intr: |
| 495 | cli | 528 | cli |
| 529 | TRACE_IRQS_OFF | ||
| 496 | decl %gs:pda_irqcount | 530 | decl %gs:pda_irqcount |
| 497 | leaveq | 531 | leaveq |
| 498 | CFI_DEF_CFA_REGISTER rsp | 532 | CFI_DEF_CFA_REGISTER rsp |
| @@ -515,9 +549,21 @@ retint_check: | |||
| 515 | CFI_REMEMBER_STATE | 549 | CFI_REMEMBER_STATE |
| 516 | jnz retint_careful | 550 | jnz retint_careful |
| 517 | retint_swapgs: | 551 | retint_swapgs: |
| 552 | /* | ||
| 553 | * The iretq could re-enable interrupts: | ||
| 554 | */ | ||
| 555 | cli | ||
| 556 | TRACE_IRQS_IRETQ | ||
| 518 | swapgs | 557 | swapgs |
| 558 | jmp restore_args | ||
| 559 | |||
| 519 | retint_restore_args: | 560 | retint_restore_args: |
| 520 | cli | 561 | cli |
| 562 | /* | ||
| 563 | * The iretq could re-enable interrupts: | ||
| 564 | */ | ||
| 565 | TRACE_IRQS_IRETQ | ||
| 566 | restore_args: | ||
| 521 | RESTORE_ARGS 0,8,0 | 567 | RESTORE_ARGS 0,8,0 |
| 522 | iret_label: | 568 | iret_label: |
| 523 | iretq | 569 | iretq |
| @@ -530,6 +576,7 @@ iret_label: | |||
| 530 | /* running with kernel gs */ | 576 | /* running with kernel gs */ |
| 531 | bad_iret: | 577 | bad_iret: |
| 532 | movq $11,%rdi /* SIGSEGV */ | 578 | movq $11,%rdi /* SIGSEGV */ |
| 579 | TRACE_IRQS_ON | ||
| 533 | sti | 580 | sti |
| 534 | jmp do_exit | 581 | jmp do_exit |
| 535 | .previous | 582 | .previous |
| @@ -539,6 +586,7 @@ retint_careful: | |||
| 539 | CFI_RESTORE_STATE | 586 | CFI_RESTORE_STATE |
| 540 | bt $TIF_NEED_RESCHED,%edx | 587 | bt $TIF_NEED_RESCHED,%edx |
| 541 | jnc retint_signal | 588 | jnc retint_signal |
| 589 | TRACE_IRQS_ON | ||
| 542 | sti | 590 | sti |
| 543 | pushq %rdi | 591 | pushq %rdi |
| 544 | CFI_ADJUST_CFA_OFFSET 8 | 592 | CFI_ADJUST_CFA_OFFSET 8 |
| @@ -547,11 +595,13 @@ retint_careful: | |||
| 547 | CFI_ADJUST_CFA_OFFSET -8 | 595 | CFI_ADJUST_CFA_OFFSET -8 |
| 548 | GET_THREAD_INFO(%rcx) | 596 | GET_THREAD_INFO(%rcx) |
| 549 | cli | 597 | cli |
| 598 | TRACE_IRQS_OFF | ||
| 550 | jmp retint_check | 599 | jmp retint_check |
| 551 | 600 | ||
| 552 | retint_signal: | 601 | retint_signal: |
| 553 | testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx | 602 | testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx |
| 554 | jz retint_swapgs | 603 | jz retint_swapgs |
| 604 | TRACE_IRQS_ON | ||
| 555 | sti | 605 | sti |
| 556 | SAVE_REST | 606 | SAVE_REST |
| 557 | movq $-1,ORIG_RAX(%rsp) | 607 | movq $-1,ORIG_RAX(%rsp) |
| @@ -560,6 +610,7 @@ retint_signal: | |||
| 560 | call do_notify_resume | 610 | call do_notify_resume |
| 561 | RESTORE_REST | 611 | RESTORE_REST |
| 562 | cli | 612 | cli |
| 613 | TRACE_IRQS_OFF | ||
| 563 | movl $_TIF_NEED_RESCHED,%edi | 614 | movl $_TIF_NEED_RESCHED,%edi |
| 564 | GET_THREAD_INFO(%rcx) | 615 | GET_THREAD_INFO(%rcx) |
| 565 | jmp retint_check | 616 | jmp retint_check |
| @@ -666,7 +717,7 @@ END(spurious_interrupt) | |||
| 666 | 717 | ||
| 667 | /* error code is on the stack already */ | 718 | /* error code is on the stack already */ |
| 668 | /* handle NMI like exceptions that can happen everywhere */ | 719 | /* handle NMI like exceptions that can happen everywhere */ |
| 669 | .macro paranoidentry sym, ist=0 | 720 | .macro paranoidentry sym, ist=0, irqtrace=1 |
| 670 | SAVE_ALL | 721 | SAVE_ALL |
| 671 | cld | 722 | cld |
| 672 | movl $1,%ebx | 723 | movl $1,%ebx |
| @@ -691,8 +742,73 @@ END(spurious_interrupt) | |||
| 691 | addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) | 742 | addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) |
| 692 | .endif | 743 | .endif |
| 693 | cli | 744 | cli |
| 745 | .if \irqtrace | ||
| 746 | TRACE_IRQS_OFF | ||
| 747 | .endif | ||
| 694 | .endm | 748 | .endm |
| 695 | 749 | ||
| 750 | /* | ||
| 751 | * "Paranoid" exit path from exception stack. | ||
| 752 | * Paranoid because this is used by NMIs and cannot take | ||
| 753 | * any kernel state for granted. | ||
| 754 | * We don't do kernel preemption checks here, because only | ||
| 755 | * NMI should be common and it does not enable IRQs and | ||
| 756 | * cannot get reschedule ticks. | ||
| 757 | * | ||
| 758 | * "trace" is 0 for the NMI handler only, because irq-tracing | ||
| 759 | * is fundamentally NMI-unsafe. (we cannot change the soft and | ||
| 760 | * hard flags at once, atomically) | ||
| 761 | */ | ||
| 762 | .macro paranoidexit trace=1 | ||
| 763 | /* ebx: no swapgs flag */ | ||
| 764 | paranoid_exit\trace: | ||
| 765 | testl %ebx,%ebx /* swapgs needed? */ | ||
| 766 | jnz paranoid_restore\trace | ||
| 767 | testl $3,CS(%rsp) | ||
| 768 | jnz paranoid_userspace\trace | ||
| 769 | paranoid_swapgs\trace: | ||
| 770 | TRACE_IRQS_IRETQ 0 | ||
| 771 | swapgs | ||
| 772 | paranoid_restore\trace: | ||
| 773 | RESTORE_ALL 8 | ||
| 774 | iretq | ||
| 775 | paranoid_userspace\trace: | ||
| 776 | GET_THREAD_INFO(%rcx) | ||
| 777 | movl threadinfo_flags(%rcx),%ebx | ||
| 778 | andl $_TIF_WORK_MASK,%ebx | ||
| 779 | jz paranoid_swapgs\trace | ||
| 780 | movq %rsp,%rdi /* &pt_regs */ | ||
| 781 | call sync_regs | ||
| 782 | movq %rax,%rsp /* switch stack for scheduling */ | ||
| 783 | testl $_TIF_NEED_RESCHED,%ebx | ||
| 784 | jnz paranoid_schedule\trace | ||
| 785 | movl %ebx,%edx /* arg3: thread flags */ | ||
| 786 | .if \trace | ||
| 787 | TRACE_IRQS_ON | ||
| 788 | .endif | ||
| 789 | sti | ||
| 790 | xorl %esi,%esi /* arg2: oldset */ | ||
| 791 | movq %rsp,%rdi /* arg1: &pt_regs */ | ||
| 792 | call do_notify_resume | ||
| 793 | cli | ||
| 794 | .if \trace | ||
| 795 | TRACE_IRQS_OFF | ||
| 796 | .endif | ||
| 797 | jmp paranoid_userspace\trace | ||
| 798 | paranoid_schedule\trace: | ||
| 799 | .if \trace | ||
| 800 | TRACE_IRQS_ON | ||
| 801 | .endif | ||
| 802 | sti | ||
| 803 | call schedule | ||
| 804 | cli | ||
| 805 | .if \trace | ||
| 806 | TRACE_IRQS_OFF | ||
| 807 | .endif | ||
| 808 | jmp paranoid_userspace\trace | ||
| 809 | CFI_ENDPROC | ||
| 810 | .endm | ||
| 811 | |||
| 696 | /* | 812 | /* |
| 697 | * Exception entry point. This expects an error code/orig_rax on the stack | 813 | * Exception entry point. This expects an error code/orig_rax on the stack |
| 698 | * and the exception handler in %rax. | 814 | * and the exception handler in %rax. |
| @@ -748,6 +864,7 @@ error_exit: | |||
| 748 | movl %ebx,%eax | 864 | movl %ebx,%eax |
| 749 | RESTORE_REST | 865 | RESTORE_REST |
| 750 | cli | 866 | cli |
| 867 | TRACE_IRQS_OFF | ||
| 751 | GET_THREAD_INFO(%rcx) | 868 | GET_THREAD_INFO(%rcx) |
| 752 | testl %eax,%eax | 869 | testl %eax,%eax |
| 753 | jne retint_kernel | 870 | jne retint_kernel |
| @@ -755,6 +872,10 @@ error_exit: | |||
| 755 | movl $_TIF_WORK_MASK,%edi | 872 | movl $_TIF_WORK_MASK,%edi |
| 756 | andl %edi,%edx | 873 | andl %edi,%edx |
| 757 | jnz retint_careful | 874 | jnz retint_careful |
| 875 | /* | ||
| 876 | * The iret might restore flags: | ||
| 877 | */ | ||
| 878 | TRACE_IRQS_IRETQ | ||
| 758 | swapgs | 879 | swapgs |
| 759 | RESTORE_ARGS 0,8,0 | 880 | RESTORE_ARGS 0,8,0 |
| 760 | jmp iret_label | 881 | jmp iret_label |
| @@ -916,8 +1037,7 @@ KPROBE_ENTRY(debug) | |||
| 916 | pushq $0 | 1037 | pushq $0 |
| 917 | CFI_ADJUST_CFA_OFFSET 8 | 1038 | CFI_ADJUST_CFA_OFFSET 8 |
| 918 | paranoidentry do_debug, DEBUG_STACK | 1039 | paranoidentry do_debug, DEBUG_STACK |
| 919 | jmp paranoid_exit | 1040 | paranoidexit |
| 920 | CFI_ENDPROC | ||
| 921 | END(debug) | 1041 | END(debug) |
| 922 | .previous .text | 1042 | .previous .text |
| 923 | 1043 | ||
| @@ -926,49 +1046,13 @@ KPROBE_ENTRY(nmi) | |||
| 926 | INTR_FRAME | 1046 | INTR_FRAME |
| 927 | pushq $-1 | 1047 | pushq $-1 |
| 928 | CFI_ADJUST_CFA_OFFSET 8 | 1048 | CFI_ADJUST_CFA_OFFSET 8 |
| 929 | paranoidentry do_nmi | 1049 | paranoidentry do_nmi, 0, 0 |
| 930 | /* | 1050 | #ifdef CONFIG_TRACE_IRQFLAGS |
| 931 | * "Paranoid" exit path from exception stack. | 1051 | paranoidexit 0 |
| 932 | * Paranoid because this is used by NMIs and cannot take | 1052 | #else |
| 933 | * any kernel state for granted. | 1053 | jmp paranoid_exit1 |
| 934 | * We don't do kernel preemption checks here, because only | 1054 | CFI_ENDPROC |
| 935 | * NMI should be common and it does not enable IRQs and | 1055 | #endif |
| 936 | * cannot get reschedule ticks. | ||
| 937 | */ | ||
| 938 | /* ebx: no swapgs flag */ | ||
| 939 | paranoid_exit: | ||
| 940 | testl %ebx,%ebx /* swapgs needed? */ | ||
| 941 | jnz paranoid_restore | ||
| 942 | testl $3,CS(%rsp) | ||
| 943 | jnz paranoid_userspace | ||
| 944 | paranoid_swapgs: | ||
| 945 | swapgs | ||
| 946 | paranoid_restore: | ||
| 947 | RESTORE_ALL 8 | ||
| 948 | iretq | ||
| 949 | paranoid_userspace: | ||
| 950 | GET_THREAD_INFO(%rcx) | ||
| 951 | movl threadinfo_flags(%rcx),%ebx | ||
| 952 | andl $_TIF_WORK_MASK,%ebx | ||
| 953 | jz paranoid_swapgs | ||
| 954 | movq %rsp,%rdi /* &pt_regs */ | ||
| 955 | call sync_regs | ||
| 956 | movq %rax,%rsp /* switch stack for scheduling */ | ||
| 957 | testl $_TIF_NEED_RESCHED,%ebx | ||
| 958 | jnz paranoid_schedule | ||
| 959 | movl %ebx,%edx /* arg3: thread flags */ | ||
| 960 | sti | ||
| 961 | xorl %esi,%esi /* arg2: oldset */ | ||
| 962 | movq %rsp,%rdi /* arg1: &pt_regs */ | ||
| 963 | call do_notify_resume | ||
| 964 | cli | ||
| 965 | jmp paranoid_userspace | ||
| 966 | paranoid_schedule: | ||
| 967 | sti | ||
| 968 | call schedule | ||
| 969 | cli | ||
| 970 | jmp paranoid_userspace | ||
| 971 | CFI_ENDPROC | ||
| 972 | END(nmi) | 1056 | END(nmi) |
| 973 | .previous .text | 1057 | .previous .text |
| 974 | 1058 | ||
| @@ -977,7 +1061,7 @@ KPROBE_ENTRY(int3) | |||
| 977 | pushq $0 | 1061 | pushq $0 |
| 978 | CFI_ADJUST_CFA_OFFSET 8 | 1062 | CFI_ADJUST_CFA_OFFSET 8 |
| 979 | paranoidentry do_int3, DEBUG_STACK | 1063 | paranoidentry do_int3, DEBUG_STACK |
| 980 | jmp paranoid_exit | 1064 | jmp paranoid_exit1 |
| 981 | CFI_ENDPROC | 1065 | CFI_ENDPROC |
| 982 | END(int3) | 1066 | END(int3) |
| 983 | .previous .text | 1067 | .previous .text |
| @@ -1006,7 +1090,7 @@ END(reserved) | |||
| 1006 | ENTRY(double_fault) | 1090 | ENTRY(double_fault) |
| 1007 | XCPT_FRAME | 1091 | XCPT_FRAME |
| 1008 | paranoidentry do_double_fault | 1092 | paranoidentry do_double_fault |
| 1009 | jmp paranoid_exit | 1093 | jmp paranoid_exit1 |
| 1010 | CFI_ENDPROC | 1094 | CFI_ENDPROC |
| 1011 | END(double_fault) | 1095 | END(double_fault) |
| 1012 | 1096 | ||
| @@ -1022,7 +1106,7 @@ END(segment_not_present) | |||
| 1022 | ENTRY(stack_segment) | 1106 | ENTRY(stack_segment) |
| 1023 | XCPT_FRAME | 1107 | XCPT_FRAME |
| 1024 | paranoidentry do_stack_segment | 1108 | paranoidentry do_stack_segment |
| 1025 | jmp paranoid_exit | 1109 | jmp paranoid_exit1 |
| 1026 | CFI_ENDPROC | 1110 | CFI_ENDPROC |
| 1027 | END(stack_segment) | 1111 | END(stack_segment) |
| 1028 | 1112 | ||
| @@ -1050,7 +1134,7 @@ ENTRY(machine_check) | |||
| 1050 | pushq $0 | 1134 | pushq $0 |
| 1051 | CFI_ADJUST_CFA_OFFSET 8 | 1135 | CFI_ADJUST_CFA_OFFSET 8 |
| 1052 | paranoidentry do_machine_check | 1136 | paranoidentry do_machine_check |
| 1053 | jmp paranoid_exit | 1137 | jmp paranoid_exit1 |
| 1054 | CFI_ENDPROC | 1138 | CFI_ENDPROC |
| 1055 | END(machine_check) | 1139 | END(machine_check) |
| 1056 | #endif | 1140 | #endif |
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c index a1f1df5f7bfc..5221a53e90c1 100644 --- a/arch/x86_64/kernel/irq.c +++ b/arch/x86_64/kernel/irq.c | |||
| @@ -177,8 +177,10 @@ asmlinkage void do_softirq(void) | |||
| 177 | local_irq_save(flags); | 177 | local_irq_save(flags); |
| 178 | pending = local_softirq_pending(); | 178 | pending = local_softirq_pending(); |
| 179 | /* Switch to interrupt stack */ | 179 | /* Switch to interrupt stack */ |
| 180 | if (pending) | 180 | if (pending) { |
| 181 | call_softirq(); | 181 | call_softirq(); |
| 182 | WARN_ON_ONCE(softirq_count()); | ||
| 183 | } | ||
| 182 | local_irq_restore(flags); | 184 | local_irq_restore(flags); |
| 183 | } | 185 | } |
| 184 | EXPORT_SYMBOL(do_softirq); | 186 | EXPORT_SYMBOL(do_softirq); |
diff --git a/include/asm-x86_64/irqflags.h b/include/asm-x86_64/irqflags.h new file mode 100644 index 000000000000..22f3c06b247e --- /dev/null +++ b/include/asm-x86_64/irqflags.h | |||
| @@ -0,0 +1,61 @@ | |||
| 1 | /* | ||
| 2 | * include/asm-x86_64/irqflags.h | ||
| 3 | * | ||
| 4 | * IRQ flags handling | ||
| 5 | * | ||
| 6 | * This file gets included from lowlevel asm headers too, to provide | ||
| 7 | * wrapped versions of the local_irq_*() APIs, based on the | ||
| 8 | * raw_local_irq_*() macros from the lowlevel headers. | ||
| 9 | */ | ||
| 10 | #ifndef _ASM_IRQFLAGS_H | ||
| 11 | #define _ASM_IRQFLAGS_H | ||
| 12 | |||
| 13 | #ifndef __ASSEMBLY__ | ||
| 14 | |||
| 15 | /* interrupt control.. */ | ||
| 16 | #define raw_local_save_flags(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# save_flags \n\t pushfq ; popq %q0":"=g" (x): /* no input */ :"memory"); } while (0) | ||
| 17 | #define raw_local_irq_restore(x) __asm__ __volatile__("# restore_flags \n\t pushq %0 ; popfq": /* no output */ :"g" (x):"memory", "cc") | ||
| 18 | |||
| 19 | #ifdef CONFIG_X86_VSMP | ||
| 20 | /* Interrupt control for VSMP architecture */ | ||
| 21 | #define raw_local_irq_disable() do { unsigned long flags; raw_local_save_flags(flags); raw_local_irq_restore((flags & ~(1 << 9)) | (1 << 18)); } while (0) | ||
| 22 | #define raw_local_irq_enable() do { unsigned long flags; raw_local_save_flags(flags); raw_local_irq_restore((flags | (1 << 9)) & ~(1 << 18)); } while (0) | ||
| 23 | |||
| 24 | #define raw_irqs_disabled_flags(flags) \ | ||
| 25 | ({ \ | ||
| 26 | (flags & (1<<18)) || !(flags & (1<<9)); \ | ||
| 27 | }) | ||
| 28 | |||
| 29 | /* For spinlocks etc */ | ||
| 30 | #define raw_local_irq_save(x) do { raw_local_save_flags(x); raw_local_irq_restore((x & ~(1 << 9)) | (1 << 18)); } while (0) | ||
| 31 | #else /* CONFIG_X86_VSMP */ | ||
| 32 | #define raw_local_irq_disable() __asm__ __volatile__("cli": : :"memory") | ||
| 33 | #define raw_local_irq_enable() __asm__ __volatile__("sti": : :"memory") | ||
| 34 | |||
| 35 | #define raw_irqs_disabled_flags(flags) \ | ||
| 36 | ({ \ | ||
| 37 | !(flags & (1<<9)); \ | ||
| 38 | }) | ||
| 39 | |||
| 40 | /* For spinlocks etc */ | ||
| 41 | #define raw_local_irq_save(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# raw_local_irq_save \n\t pushfq ; popq %0 ; cli":"=g" (x): /* no input */ :"memory"); } while (0) | ||
| 42 | #endif | ||
| 43 | |||
| 44 | #define raw_irqs_disabled() \ | ||
| 45 | ({ \ | ||
| 46 | unsigned long flags; \ | ||
| 47 | raw_local_save_flags(flags); \ | ||
| 48 | raw_irqs_disabled_flags(flags); \ | ||
| 49 | }) | ||
| 50 | |||
| 51 | /* used in the idle loop; sti takes one instruction cycle to complete */ | ||
| 52 | #define raw_safe_halt() __asm__ __volatile__("sti; hlt": : :"memory") | ||
| 53 | /* used when interrupts are already enabled or to shutdown the processor */ | ||
| 54 | #define halt() __asm__ __volatile__("hlt": : :"memory") | ||
| 55 | |||
| 56 | #else /* __ASSEMBLY__: */ | ||
| 57 | # define TRACE_IRQS_ON | ||
| 58 | # define TRACE_IRQS_OFF | ||
| 59 | #endif | ||
| 60 | |||
| 61 | #endif | ||
diff --git a/include/asm-x86_64/system.h b/include/asm-x86_64/system.h index 68e559f3631c..f67f2873a922 100644 --- a/include/asm-x86_64/system.h +++ b/include/asm-x86_64/system.h | |||
| @@ -244,43 +244,7 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, | |||
| 244 | 244 | ||
| 245 | #define warn_if_not_ulong(x) do { unsigned long foo; (void) (&(x) == &foo); } while (0) | 245 | #define warn_if_not_ulong(x) do { unsigned long foo; (void) (&(x) == &foo); } while (0) |
| 246 | 246 | ||
| 247 | /* interrupt control.. */ | 247 | #include <linux/irqflags.h> |
| 248 | #define local_save_flags(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# save_flags \n\t pushfq ; popq %q0":"=g" (x): /* no input */ :"memory"); } while (0) | ||
| 249 | #define local_irq_restore(x) __asm__ __volatile__("# restore_flags \n\t pushq %0 ; popfq": /* no output */ :"g" (x):"memory", "cc") | ||
| 250 | |||
| 251 | #ifdef CONFIG_X86_VSMP | ||
| 252 | /* Interrupt control for VSMP architecture */ | ||
| 253 | #define local_irq_disable() do { unsigned long flags; local_save_flags(flags); local_irq_restore((flags & ~(1 << 9)) | (1 << 18)); } while (0) | ||
| 254 | #define local_irq_enable() do { unsigned long flags; local_save_flags(flags); local_irq_restore((flags | (1 << 9)) & ~(1 << 18)); } while (0) | ||
| 255 | |||
| 256 | #define irqs_disabled() \ | ||
| 257 | ({ \ | ||
| 258 | unsigned long flags; \ | ||
| 259 | local_save_flags(flags); \ | ||
| 260 | (flags & (1<<18)) || !(flags & (1<<9)); \ | ||
| 261 | }) | ||
| 262 | |||
| 263 | /* For spinlocks etc */ | ||
| 264 | #define local_irq_save(x) do { local_save_flags(x); local_irq_restore((x & ~(1 << 9)) | (1 << 18)); } while (0) | ||
| 265 | #else /* CONFIG_X86_VSMP */ | ||
| 266 | #define local_irq_disable() __asm__ __volatile__("cli": : :"memory") | ||
| 267 | #define local_irq_enable() __asm__ __volatile__("sti": : :"memory") | ||
| 268 | |||
| 269 | #define irqs_disabled() \ | ||
| 270 | ({ \ | ||
| 271 | unsigned long flags; \ | ||
| 272 | local_save_flags(flags); \ | ||
| 273 | !(flags & (1<<9)); \ | ||
| 274 | }) | ||
| 275 | |||
| 276 | /* For spinlocks etc */ | ||
| 277 | #define local_irq_save(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# local_irq_save \n\t pushfq ; popq %0 ; cli":"=g" (x): /* no input */ :"memory"); } while (0) | ||
| 278 | #endif | ||
| 279 | |||
| 280 | /* used in the idle loop; sti takes one instruction cycle to complete */ | ||
| 281 | #define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory") | ||
| 282 | /* used when interrupts are already enabled or to shutdown the processor */ | ||
| 283 | #define halt() __asm__ __volatile__("hlt": : :"memory") | ||
| 284 | 248 | ||
| 285 | void cpu_idle_wait(void); | 249 | void cpu_idle_wait(void); |
| 286 | 250 | ||
