diff options
-rw-r--r-- | arch/x86_64/Kconfig.debug | 4 | ||||
-rw-r--r-- | arch/x86_64/ia32/ia32entry.S | 19 | ||||
-rw-r--r-- | arch/x86_64/kernel/entry.S | 188 | ||||
-rw-r--r-- | arch/x86_64/kernel/irq.c | 4 | ||||
-rw-r--r-- | include/asm-x86_64/irqflags.h | 61 | ||||
-rw-r--r-- | include/asm-x86_64/system.h | 38 |
6 files changed, 223 insertions, 91 deletions
diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug index 1d92ab56c0f9..775d211a5cf9 100644 --- a/arch/x86_64/Kconfig.debug +++ b/arch/x86_64/Kconfig.debug | |||
@@ -1,5 +1,9 @@ | |||
1 | menu "Kernel hacking" | 1 | menu "Kernel hacking" |
2 | 2 | ||
3 | config TRACE_IRQFLAGS_SUPPORT | ||
4 | bool | ||
5 | default y | ||
6 | |||
3 | source "lib/Kconfig.debug" | 7 | source "lib/Kconfig.debug" |
4 | 8 | ||
5 | config DEBUG_RODATA | 9 | config DEBUG_RODATA |
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index c536fa98ea37..9b5bb413a6e9 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <asm/thread_info.h> | 13 | #include <asm/thread_info.h> |
14 | #include <asm/segment.h> | 14 | #include <asm/segment.h> |
15 | #include <asm/vsyscall32.h> | 15 | #include <asm/vsyscall32.h> |
16 | #include <asm/irqflags.h> | ||
16 | #include <linux/linkage.h> | 17 | #include <linux/linkage.h> |
17 | 18 | ||
18 | #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) | 19 | #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) |
@@ -75,6 +76,10 @@ ENTRY(ia32_sysenter_target) | |||
75 | swapgs | 76 | swapgs |
76 | movq %gs:pda_kernelstack, %rsp | 77 | movq %gs:pda_kernelstack, %rsp |
77 | addq $(PDA_STACKOFFSET),%rsp | 78 | addq $(PDA_STACKOFFSET),%rsp |
79 | /* | ||
80 | * No need to follow this irqs on/off section: the syscall | ||
81 | * disabled irqs, here we enable it straight after entry: | ||
82 | */ | ||
78 | sti | 83 | sti |
79 | movl %ebp,%ebp /* zero extension */ | 84 | movl %ebp,%ebp /* zero extension */ |
80 | pushq $__USER32_DS | 85 | pushq $__USER32_DS |
@@ -118,6 +123,7 @@ sysenter_do_call: | |||
118 | movq %rax,RAX-ARGOFFSET(%rsp) | 123 | movq %rax,RAX-ARGOFFSET(%rsp) |
119 | GET_THREAD_INFO(%r10) | 124 | GET_THREAD_INFO(%r10) |
120 | cli | 125 | cli |
126 | TRACE_IRQS_OFF | ||
121 | testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) | 127 | testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) |
122 | jnz int_ret_from_sys_call | 128 | jnz int_ret_from_sys_call |
123 | andl $~TS_COMPAT,threadinfo_status(%r10) | 129 | andl $~TS_COMPAT,threadinfo_status(%r10) |
@@ -132,6 +138,7 @@ sysenter_do_call: | |||
132 | CFI_REGISTER rsp,rcx | 138 | CFI_REGISTER rsp,rcx |
133 | movl $VSYSCALL32_SYSEXIT,%edx /* User %eip */ | 139 | movl $VSYSCALL32_SYSEXIT,%edx /* User %eip */ |
134 | CFI_REGISTER rip,rdx | 140 | CFI_REGISTER rip,rdx |
141 | TRACE_IRQS_ON | ||
135 | swapgs | 142 | swapgs |
136 | sti /* sti only takes effect after the next instruction */ | 143 | sti /* sti only takes effect after the next instruction */ |
137 | /* sysexit */ | 144 | /* sysexit */ |
@@ -186,6 +193,10 @@ ENTRY(ia32_cstar_target) | |||
186 | movl %esp,%r8d | 193 | movl %esp,%r8d |
187 | CFI_REGISTER rsp,r8 | 194 | CFI_REGISTER rsp,r8 |
188 | movq %gs:pda_kernelstack,%rsp | 195 | movq %gs:pda_kernelstack,%rsp |
196 | /* | ||
197 | * No need to follow this irqs on/off section: the syscall | ||
198 | * disabled irqs and here we enable it straight after entry: | ||
199 | */ | ||
189 | sti | 200 | sti |
190 | SAVE_ARGS 8,1,1 | 201 | SAVE_ARGS 8,1,1 |
191 | movl %eax,%eax /* zero extension */ | 202 | movl %eax,%eax /* zero extension */ |
@@ -220,6 +231,7 @@ cstar_do_call: | |||
220 | movq %rax,RAX-ARGOFFSET(%rsp) | 231 | movq %rax,RAX-ARGOFFSET(%rsp) |
221 | GET_THREAD_INFO(%r10) | 232 | GET_THREAD_INFO(%r10) |
222 | cli | 233 | cli |
234 | TRACE_IRQS_OFF | ||
223 | testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) | 235 | testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) |
224 | jnz int_ret_from_sys_call | 236 | jnz int_ret_from_sys_call |
225 | andl $~TS_COMPAT,threadinfo_status(%r10) | 237 | andl $~TS_COMPAT,threadinfo_status(%r10) |
@@ -228,6 +240,7 @@ cstar_do_call: | |||
228 | CFI_REGISTER rip,rcx | 240 | CFI_REGISTER rip,rcx |
229 | movl EFLAGS-ARGOFFSET(%rsp),%r11d | 241 | movl EFLAGS-ARGOFFSET(%rsp),%r11d |
230 | /*CFI_REGISTER rflags,r11*/ | 242 | /*CFI_REGISTER rflags,r11*/ |
243 | TRACE_IRQS_ON | ||
231 | movl RSP-ARGOFFSET(%rsp),%esp | 244 | movl RSP-ARGOFFSET(%rsp),%esp |
232 | CFI_RESTORE rsp | 245 | CFI_RESTORE rsp |
233 | swapgs | 246 | swapgs |
@@ -286,7 +299,11 @@ ENTRY(ia32_syscall) | |||
286 | /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/ | 299 | /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/ |
287 | /*CFI_REL_OFFSET cs,CS-RIP*/ | 300 | /*CFI_REL_OFFSET cs,CS-RIP*/ |
288 | CFI_REL_OFFSET rip,RIP-RIP | 301 | CFI_REL_OFFSET rip,RIP-RIP |
289 | swapgs | 302 | swapgs |
303 | /* | ||
304 | * No need to follow this irqs on/off section: the syscall | ||
305 | * disabled irqs and here we enable it straight after entry: | ||
306 | */ | ||
290 | sti | 307 | sti |
291 | movl %eax,%eax | 308 | movl %eax,%eax |
292 | pushq %rax | 309 | pushq %rax |
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index ed92c2983606..d464dded68c0 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S | |||
@@ -41,13 +41,24 @@ | |||
41 | #include <asm/thread_info.h> | 41 | #include <asm/thread_info.h> |
42 | #include <asm/hw_irq.h> | 42 | #include <asm/hw_irq.h> |
43 | #include <asm/page.h> | 43 | #include <asm/page.h> |
44 | #include <asm/irqflags.h> | ||
44 | 45 | ||
45 | .code64 | 46 | .code64 |
46 | 47 | ||
47 | #ifndef CONFIG_PREEMPT | 48 | #ifndef CONFIG_PREEMPT |
48 | #define retint_kernel retint_restore_args | 49 | #define retint_kernel retint_restore_args |
49 | #endif | 50 | #endif |
50 | 51 | ||
52 | |||
53 | .macro TRACE_IRQS_IRETQ offset=ARGOFFSET | ||
54 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
55 | bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ | ||
56 | jnc 1f | ||
57 | TRACE_IRQS_ON | ||
58 | 1: | ||
59 | #endif | ||
60 | .endm | ||
61 | |||
51 | /* | 62 | /* |
52 | * C code is not supposed to know about undefined top of stack. Every time | 63 | * C code is not supposed to know about undefined top of stack. Every time |
53 | * a C function with an pt_regs argument is called from the SYSCALL based | 64 | * a C function with an pt_regs argument is called from the SYSCALL based |
@@ -194,6 +205,10 @@ ENTRY(system_call) | |||
194 | swapgs | 205 | swapgs |
195 | movq %rsp,%gs:pda_oldrsp | 206 | movq %rsp,%gs:pda_oldrsp |
196 | movq %gs:pda_kernelstack,%rsp | 207 | movq %gs:pda_kernelstack,%rsp |
208 | /* | ||
209 | * No need to follow this irqs off/on section - it's straight | ||
210 | * and short: | ||
211 | */ | ||
197 | sti | 212 | sti |
198 | SAVE_ARGS 8,1 | 213 | SAVE_ARGS 8,1 |
199 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) | 214 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) |
@@ -219,10 +234,15 @@ ret_from_sys_call: | |||
219 | sysret_check: | 234 | sysret_check: |
220 | GET_THREAD_INFO(%rcx) | 235 | GET_THREAD_INFO(%rcx) |
221 | cli | 236 | cli |
237 | TRACE_IRQS_OFF | ||
222 | movl threadinfo_flags(%rcx),%edx | 238 | movl threadinfo_flags(%rcx),%edx |
223 | andl %edi,%edx | 239 | andl %edi,%edx |
224 | CFI_REMEMBER_STATE | 240 | CFI_REMEMBER_STATE |
225 | jnz sysret_careful | 241 | jnz sysret_careful |
242 | /* | ||
243 | * sysretq will re-enable interrupts: | ||
244 | */ | ||
245 | TRACE_IRQS_ON | ||
226 | movq RIP-ARGOFFSET(%rsp),%rcx | 246 | movq RIP-ARGOFFSET(%rsp),%rcx |
227 | CFI_REGISTER rip,rcx | 247 | CFI_REGISTER rip,rcx |
228 | RESTORE_ARGS 0,-ARG_SKIP,1 | 248 | RESTORE_ARGS 0,-ARG_SKIP,1 |
@@ -237,6 +257,7 @@ sysret_careful: | |||
237 | CFI_RESTORE_STATE | 257 | CFI_RESTORE_STATE |
238 | bt $TIF_NEED_RESCHED,%edx | 258 | bt $TIF_NEED_RESCHED,%edx |
239 | jnc sysret_signal | 259 | jnc sysret_signal |
260 | TRACE_IRQS_ON | ||
240 | sti | 261 | sti |
241 | pushq %rdi | 262 | pushq %rdi |
242 | CFI_ADJUST_CFA_OFFSET 8 | 263 | CFI_ADJUST_CFA_OFFSET 8 |
@@ -247,6 +268,7 @@ sysret_careful: | |||
247 | 268 | ||
248 | /* Handle a signal */ | 269 | /* Handle a signal */ |
249 | sysret_signal: | 270 | sysret_signal: |
271 | TRACE_IRQS_ON | ||
250 | sti | 272 | sti |
251 | testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx | 273 | testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx |
252 | jz 1f | 274 | jz 1f |
@@ -261,6 +283,7 @@ sysret_signal: | |||
261 | /* Use IRET because user could have changed frame. This | 283 | /* Use IRET because user could have changed frame. This |
262 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ | 284 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ |
263 | cli | 285 | cli |
286 | TRACE_IRQS_OFF | ||
264 | jmp int_with_check | 287 | jmp int_with_check |
265 | 288 | ||
266 | badsys: | 289 | badsys: |
@@ -309,6 +332,7 @@ ENTRY(int_ret_from_sys_call) | |||
309 | CFI_REL_OFFSET r10,R10-ARGOFFSET | 332 | CFI_REL_OFFSET r10,R10-ARGOFFSET |
310 | CFI_REL_OFFSET r11,R11-ARGOFFSET | 333 | CFI_REL_OFFSET r11,R11-ARGOFFSET |
311 | cli | 334 | cli |
335 | TRACE_IRQS_OFF | ||
312 | testl $3,CS-ARGOFFSET(%rsp) | 336 | testl $3,CS-ARGOFFSET(%rsp) |
313 | je retint_restore_args | 337 | je retint_restore_args |
314 | movl $_TIF_ALLWORK_MASK,%edi | 338 | movl $_TIF_ALLWORK_MASK,%edi |
@@ -327,6 +351,7 @@ int_with_check: | |||
327 | int_careful: | 351 | int_careful: |
328 | bt $TIF_NEED_RESCHED,%edx | 352 | bt $TIF_NEED_RESCHED,%edx |
329 | jnc int_very_careful | 353 | jnc int_very_careful |
354 | TRACE_IRQS_ON | ||
330 | sti | 355 | sti |
331 | pushq %rdi | 356 | pushq %rdi |
332 | CFI_ADJUST_CFA_OFFSET 8 | 357 | CFI_ADJUST_CFA_OFFSET 8 |
@@ -334,10 +359,12 @@ int_careful: | |||
334 | popq %rdi | 359 | popq %rdi |
335 | CFI_ADJUST_CFA_OFFSET -8 | 360 | CFI_ADJUST_CFA_OFFSET -8 |
336 | cli | 361 | cli |
362 | TRACE_IRQS_OFF | ||
337 | jmp int_with_check | 363 | jmp int_with_check |
338 | 364 | ||
339 | /* handle signals and tracing -- both require a full stack frame */ | 365 | /* handle signals and tracing -- both require a full stack frame */ |
340 | int_very_careful: | 366 | int_very_careful: |
367 | TRACE_IRQS_ON | ||
341 | sti | 368 | sti |
342 | SAVE_REST | 369 | SAVE_REST |
343 | /* Check for syscall exit trace */ | 370 | /* Check for syscall exit trace */ |
@@ -351,6 +378,7 @@ int_very_careful: | |||
351 | CFI_ADJUST_CFA_OFFSET -8 | 378 | CFI_ADJUST_CFA_OFFSET -8 |
352 | andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi | 379 | andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi |
353 | cli | 380 | cli |
381 | TRACE_IRQS_OFF | ||
354 | jmp int_restore_rest | 382 | jmp int_restore_rest |
355 | 383 | ||
356 | int_signal: | 384 | int_signal: |
@@ -363,6 +391,7 @@ int_signal: | |||
363 | int_restore_rest: | 391 | int_restore_rest: |
364 | RESTORE_REST | 392 | RESTORE_REST |
365 | cli | 393 | cli |
394 | TRACE_IRQS_OFF | ||
366 | jmp int_with_check | 395 | jmp int_with_check |
367 | CFI_ENDPROC | 396 | CFI_ENDPROC |
368 | END(int_ret_from_sys_call) | 397 | END(int_ret_from_sys_call) |
@@ -484,6 +513,10 @@ END(stub_rt_sigreturn) | |||
484 | swapgs | 513 | swapgs |
485 | 1: incl %gs:pda_irqcount # RED-PEN should check preempt count | 514 | 1: incl %gs:pda_irqcount # RED-PEN should check preempt count |
486 | cmoveq %gs:pda_irqstackptr,%rsp | 515 | cmoveq %gs:pda_irqstackptr,%rsp |
516 | /* | ||
517 | * We entered an interrupt context - irqs are off: | ||
518 | */ | ||
519 | TRACE_IRQS_OFF | ||
487 | call \func | 520 | call \func |
488 | .endm | 521 | .endm |
489 | 522 | ||
@@ -493,6 +526,7 @@ ENTRY(common_interrupt) | |||
493 | /* 0(%rsp): oldrsp-ARGOFFSET */ | 526 | /* 0(%rsp): oldrsp-ARGOFFSET */ |
494 | ret_from_intr: | 527 | ret_from_intr: |
495 | cli | 528 | cli |
529 | TRACE_IRQS_OFF | ||
496 | decl %gs:pda_irqcount | 530 | decl %gs:pda_irqcount |
497 | leaveq | 531 | leaveq |
498 | CFI_DEF_CFA_REGISTER rsp | 532 | CFI_DEF_CFA_REGISTER rsp |
@@ -515,9 +549,21 @@ retint_check: | |||
515 | CFI_REMEMBER_STATE | 549 | CFI_REMEMBER_STATE |
516 | jnz retint_careful | 550 | jnz retint_careful |
517 | retint_swapgs: | 551 | retint_swapgs: |
552 | /* | ||
553 | * The iretq could re-enable interrupts: | ||
554 | */ | ||
555 | cli | ||
556 | TRACE_IRQS_IRETQ | ||
518 | swapgs | 557 | swapgs |
558 | jmp restore_args | ||
559 | |||
519 | retint_restore_args: | 560 | retint_restore_args: |
520 | cli | 561 | cli |
562 | /* | ||
563 | * The iretq could re-enable interrupts: | ||
564 | */ | ||
565 | TRACE_IRQS_IRETQ | ||
566 | restore_args: | ||
521 | RESTORE_ARGS 0,8,0 | 567 | RESTORE_ARGS 0,8,0 |
522 | iret_label: | 568 | iret_label: |
523 | iretq | 569 | iretq |
@@ -530,6 +576,7 @@ iret_label: | |||
530 | /* running with kernel gs */ | 576 | /* running with kernel gs */ |
531 | bad_iret: | 577 | bad_iret: |
532 | movq $11,%rdi /* SIGSEGV */ | 578 | movq $11,%rdi /* SIGSEGV */ |
579 | TRACE_IRQS_ON | ||
533 | sti | 580 | sti |
534 | jmp do_exit | 581 | jmp do_exit |
535 | .previous | 582 | .previous |
@@ -539,6 +586,7 @@ retint_careful: | |||
539 | CFI_RESTORE_STATE | 586 | CFI_RESTORE_STATE |
540 | bt $TIF_NEED_RESCHED,%edx | 587 | bt $TIF_NEED_RESCHED,%edx |
541 | jnc retint_signal | 588 | jnc retint_signal |
589 | TRACE_IRQS_ON | ||
542 | sti | 590 | sti |
543 | pushq %rdi | 591 | pushq %rdi |
544 | CFI_ADJUST_CFA_OFFSET 8 | 592 | CFI_ADJUST_CFA_OFFSET 8 |
@@ -547,11 +595,13 @@ retint_careful: | |||
547 | CFI_ADJUST_CFA_OFFSET -8 | 595 | CFI_ADJUST_CFA_OFFSET -8 |
548 | GET_THREAD_INFO(%rcx) | 596 | GET_THREAD_INFO(%rcx) |
549 | cli | 597 | cli |
598 | TRACE_IRQS_OFF | ||
550 | jmp retint_check | 599 | jmp retint_check |
551 | 600 | ||
552 | retint_signal: | 601 | retint_signal: |
553 | testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx | 602 | testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx |
554 | jz retint_swapgs | 603 | jz retint_swapgs |
604 | TRACE_IRQS_ON | ||
555 | sti | 605 | sti |
556 | SAVE_REST | 606 | SAVE_REST |
557 | movq $-1,ORIG_RAX(%rsp) | 607 | movq $-1,ORIG_RAX(%rsp) |
@@ -560,6 +610,7 @@ retint_signal: | |||
560 | call do_notify_resume | 610 | call do_notify_resume |
561 | RESTORE_REST | 611 | RESTORE_REST |
562 | cli | 612 | cli |
613 | TRACE_IRQS_OFF | ||
563 | movl $_TIF_NEED_RESCHED,%edi | 614 | movl $_TIF_NEED_RESCHED,%edi |
564 | GET_THREAD_INFO(%rcx) | 615 | GET_THREAD_INFO(%rcx) |
565 | jmp retint_check | 616 | jmp retint_check |
@@ -666,7 +717,7 @@ END(spurious_interrupt) | |||
666 | 717 | ||
667 | /* error code is on the stack already */ | 718 | /* error code is on the stack already */ |
668 | /* handle NMI like exceptions that can happen everywhere */ | 719 | /* handle NMI like exceptions that can happen everywhere */ |
669 | .macro paranoidentry sym, ist=0 | 720 | .macro paranoidentry sym, ist=0, irqtrace=1 |
670 | SAVE_ALL | 721 | SAVE_ALL |
671 | cld | 722 | cld |
672 | movl $1,%ebx | 723 | movl $1,%ebx |
@@ -691,8 +742,73 @@ END(spurious_interrupt) | |||
691 | addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) | 742 | addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) |
692 | .endif | 743 | .endif |
693 | cli | 744 | cli |
745 | .if \irqtrace | ||
746 | TRACE_IRQS_OFF | ||
747 | .endif | ||
694 | .endm | 748 | .endm |
695 | 749 | ||
750 | /* | ||
751 | * "Paranoid" exit path from exception stack. | ||
752 | * Paranoid because this is used by NMIs and cannot take | ||
753 | * any kernel state for granted. | ||
754 | * We don't do kernel preemption checks here, because only | ||
755 | * NMI should be common and it does not enable IRQs and | ||
756 | * cannot get reschedule ticks. | ||
757 | * | ||
758 | * "trace" is 0 for the NMI handler only, because irq-tracing | ||
759 | * is fundamentally NMI-unsafe. (we cannot change the soft and | ||
760 | * hard flags at once, atomically) | ||
761 | */ | ||
762 | .macro paranoidexit trace=1 | ||
763 | /* ebx: no swapgs flag */ | ||
764 | paranoid_exit\trace: | ||
765 | testl %ebx,%ebx /* swapgs needed? */ | ||
766 | jnz paranoid_restore\trace | ||
767 | testl $3,CS(%rsp) | ||
768 | jnz paranoid_userspace\trace | ||
769 | paranoid_swapgs\trace: | ||
770 | TRACE_IRQS_IRETQ 0 | ||
771 | swapgs | ||
772 | paranoid_restore\trace: | ||
773 | RESTORE_ALL 8 | ||
774 | iretq | ||
775 | paranoid_userspace\trace: | ||
776 | GET_THREAD_INFO(%rcx) | ||
777 | movl threadinfo_flags(%rcx),%ebx | ||
778 | andl $_TIF_WORK_MASK,%ebx | ||
779 | jz paranoid_swapgs\trace | ||
780 | movq %rsp,%rdi /* &pt_regs */ | ||
781 | call sync_regs | ||
782 | movq %rax,%rsp /* switch stack for scheduling */ | ||
783 | testl $_TIF_NEED_RESCHED,%ebx | ||
784 | jnz paranoid_schedule\trace | ||
785 | movl %ebx,%edx /* arg3: thread flags */ | ||
786 | .if \trace | ||
787 | TRACE_IRQS_ON | ||
788 | .endif | ||
789 | sti | ||
790 | xorl %esi,%esi /* arg2: oldset */ | ||
791 | movq %rsp,%rdi /* arg1: &pt_regs */ | ||
792 | call do_notify_resume | ||
793 | cli | ||
794 | .if \trace | ||
795 | TRACE_IRQS_OFF | ||
796 | .endif | ||
797 | jmp paranoid_userspace\trace | ||
798 | paranoid_schedule\trace: | ||
799 | .if \trace | ||
800 | TRACE_IRQS_ON | ||
801 | .endif | ||
802 | sti | ||
803 | call schedule | ||
804 | cli | ||
805 | .if \trace | ||
806 | TRACE_IRQS_OFF | ||
807 | .endif | ||
808 | jmp paranoid_userspace\trace | ||
809 | CFI_ENDPROC | ||
810 | .endm | ||
811 | |||
696 | /* | 812 | /* |
697 | * Exception entry point. This expects an error code/orig_rax on the stack | 813 | * Exception entry point. This expects an error code/orig_rax on the stack |
698 | * and the exception handler in %rax. | 814 | * and the exception handler in %rax. |
@@ -748,6 +864,7 @@ error_exit: | |||
748 | movl %ebx,%eax | 864 | movl %ebx,%eax |
749 | RESTORE_REST | 865 | RESTORE_REST |
750 | cli | 866 | cli |
867 | TRACE_IRQS_OFF | ||
751 | GET_THREAD_INFO(%rcx) | 868 | GET_THREAD_INFO(%rcx) |
752 | testl %eax,%eax | 869 | testl %eax,%eax |
753 | jne retint_kernel | 870 | jne retint_kernel |
@@ -755,6 +872,10 @@ error_exit: | |||
755 | movl $_TIF_WORK_MASK,%edi | 872 | movl $_TIF_WORK_MASK,%edi |
756 | andl %edi,%edx | 873 | andl %edi,%edx |
757 | jnz retint_careful | 874 | jnz retint_careful |
875 | /* | ||
876 | * The iret might restore flags: | ||
877 | */ | ||
878 | TRACE_IRQS_IRETQ | ||
758 | swapgs | 879 | swapgs |
759 | RESTORE_ARGS 0,8,0 | 880 | RESTORE_ARGS 0,8,0 |
760 | jmp iret_label | 881 | jmp iret_label |
@@ -916,8 +1037,7 @@ KPROBE_ENTRY(debug) | |||
916 | pushq $0 | 1037 | pushq $0 |
917 | CFI_ADJUST_CFA_OFFSET 8 | 1038 | CFI_ADJUST_CFA_OFFSET 8 |
918 | paranoidentry do_debug, DEBUG_STACK | 1039 | paranoidentry do_debug, DEBUG_STACK |
919 | jmp paranoid_exit | 1040 | paranoidexit |
920 | CFI_ENDPROC | ||
921 | END(debug) | 1041 | END(debug) |
922 | .previous .text | 1042 | .previous .text |
923 | 1043 | ||
@@ -926,49 +1046,13 @@ KPROBE_ENTRY(nmi) | |||
926 | INTR_FRAME | 1046 | INTR_FRAME |
927 | pushq $-1 | 1047 | pushq $-1 |
928 | CFI_ADJUST_CFA_OFFSET 8 | 1048 | CFI_ADJUST_CFA_OFFSET 8 |
929 | paranoidentry do_nmi | 1049 | paranoidentry do_nmi, 0, 0 |
930 | /* | 1050 | #ifdef CONFIG_TRACE_IRQFLAGS |
931 | * "Paranoid" exit path from exception stack. | 1051 | paranoidexit 0 |
932 | * Paranoid because this is used by NMIs and cannot take | 1052 | #else |
933 | * any kernel state for granted. | 1053 | jmp paranoid_exit1 |
934 | * We don't do kernel preemption checks here, because only | 1054 | CFI_ENDPROC |
935 | * NMI should be common and it does not enable IRQs and | 1055 | #endif |
936 | * cannot get reschedule ticks. | ||
937 | */ | ||
938 | /* ebx: no swapgs flag */ | ||
939 | paranoid_exit: | ||
940 | testl %ebx,%ebx /* swapgs needed? */ | ||
941 | jnz paranoid_restore | ||
942 | testl $3,CS(%rsp) | ||
943 | jnz paranoid_userspace | ||
944 | paranoid_swapgs: | ||
945 | swapgs | ||
946 | paranoid_restore: | ||
947 | RESTORE_ALL 8 | ||
948 | iretq | ||
949 | paranoid_userspace: | ||
950 | GET_THREAD_INFO(%rcx) | ||
951 | movl threadinfo_flags(%rcx),%ebx | ||
952 | andl $_TIF_WORK_MASK,%ebx | ||
953 | jz paranoid_swapgs | ||
954 | movq %rsp,%rdi /* &pt_regs */ | ||
955 | call sync_regs | ||
956 | movq %rax,%rsp /* switch stack for scheduling */ | ||
957 | testl $_TIF_NEED_RESCHED,%ebx | ||
958 | jnz paranoid_schedule | ||
959 | movl %ebx,%edx /* arg3: thread flags */ | ||
960 | sti | ||
961 | xorl %esi,%esi /* arg2: oldset */ | ||
962 | movq %rsp,%rdi /* arg1: &pt_regs */ | ||
963 | call do_notify_resume | ||
964 | cli | ||
965 | jmp paranoid_userspace | ||
966 | paranoid_schedule: | ||
967 | sti | ||
968 | call schedule | ||
969 | cli | ||
970 | jmp paranoid_userspace | ||
971 | CFI_ENDPROC | ||
972 | END(nmi) | 1056 | END(nmi) |
973 | .previous .text | 1057 | .previous .text |
974 | 1058 | ||
@@ -977,7 +1061,7 @@ KPROBE_ENTRY(int3) | |||
977 | pushq $0 | 1061 | pushq $0 |
978 | CFI_ADJUST_CFA_OFFSET 8 | 1062 | CFI_ADJUST_CFA_OFFSET 8 |
979 | paranoidentry do_int3, DEBUG_STACK | 1063 | paranoidentry do_int3, DEBUG_STACK |
980 | jmp paranoid_exit | 1064 | jmp paranoid_exit1 |
981 | CFI_ENDPROC | 1065 | CFI_ENDPROC |
982 | END(int3) | 1066 | END(int3) |
983 | .previous .text | 1067 | .previous .text |
@@ -1006,7 +1090,7 @@ END(reserved) | |||
1006 | ENTRY(double_fault) | 1090 | ENTRY(double_fault) |
1007 | XCPT_FRAME | 1091 | XCPT_FRAME |
1008 | paranoidentry do_double_fault | 1092 | paranoidentry do_double_fault |
1009 | jmp paranoid_exit | 1093 | jmp paranoid_exit1 |
1010 | CFI_ENDPROC | 1094 | CFI_ENDPROC |
1011 | END(double_fault) | 1095 | END(double_fault) |
1012 | 1096 | ||
@@ -1022,7 +1106,7 @@ END(segment_not_present) | |||
1022 | ENTRY(stack_segment) | 1106 | ENTRY(stack_segment) |
1023 | XCPT_FRAME | 1107 | XCPT_FRAME |
1024 | paranoidentry do_stack_segment | 1108 | paranoidentry do_stack_segment |
1025 | jmp paranoid_exit | 1109 | jmp paranoid_exit1 |
1026 | CFI_ENDPROC | 1110 | CFI_ENDPROC |
1027 | END(stack_segment) | 1111 | END(stack_segment) |
1028 | 1112 | ||
@@ -1050,7 +1134,7 @@ ENTRY(machine_check) | |||
1050 | pushq $0 | 1134 | pushq $0 |
1051 | CFI_ADJUST_CFA_OFFSET 8 | 1135 | CFI_ADJUST_CFA_OFFSET 8 |
1052 | paranoidentry do_machine_check | 1136 | paranoidentry do_machine_check |
1053 | jmp paranoid_exit | 1137 | jmp paranoid_exit1 |
1054 | CFI_ENDPROC | 1138 | CFI_ENDPROC |
1055 | END(machine_check) | 1139 | END(machine_check) |
1056 | #endif | 1140 | #endif |
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c index a1f1df5f7bfc..5221a53e90c1 100644 --- a/arch/x86_64/kernel/irq.c +++ b/arch/x86_64/kernel/irq.c | |||
@@ -177,8 +177,10 @@ asmlinkage void do_softirq(void) | |||
177 | local_irq_save(flags); | 177 | local_irq_save(flags); |
178 | pending = local_softirq_pending(); | 178 | pending = local_softirq_pending(); |
179 | /* Switch to interrupt stack */ | 179 | /* Switch to interrupt stack */ |
180 | if (pending) | 180 | if (pending) { |
181 | call_softirq(); | 181 | call_softirq(); |
182 | WARN_ON_ONCE(softirq_count()); | ||
183 | } | ||
182 | local_irq_restore(flags); | 184 | local_irq_restore(flags); |
183 | } | 185 | } |
184 | EXPORT_SYMBOL(do_softirq); | 186 | EXPORT_SYMBOL(do_softirq); |
diff --git a/include/asm-x86_64/irqflags.h b/include/asm-x86_64/irqflags.h new file mode 100644 index 000000000000..22f3c06b247e --- /dev/null +++ b/include/asm-x86_64/irqflags.h | |||
@@ -0,0 +1,61 @@ | |||
1 | /* | ||
2 | * include/asm-x86_64/irqflags.h | ||
3 | * | ||
4 | * IRQ flags handling | ||
5 | * | ||
6 | * This file gets included from lowlevel asm headers too, to provide | ||
7 | * wrapped versions of the local_irq_*() APIs, based on the | ||
8 | * raw_local_irq_*() macros from the lowlevel headers. | ||
9 | */ | ||
10 | #ifndef _ASM_IRQFLAGS_H | ||
11 | #define _ASM_IRQFLAGS_H | ||
12 | |||
13 | #ifndef __ASSEMBLY__ | ||
14 | |||
15 | /* interrupt control.. */ | ||
16 | #define raw_local_save_flags(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# save_flags \n\t pushfq ; popq %q0":"=g" (x): /* no input */ :"memory"); } while (0) | ||
17 | #define raw_local_irq_restore(x) __asm__ __volatile__("# restore_flags \n\t pushq %0 ; popfq": /* no output */ :"g" (x):"memory", "cc") | ||
18 | |||
19 | #ifdef CONFIG_X86_VSMP | ||
20 | /* Interrupt control for VSMP architecture */ | ||
21 | #define raw_local_irq_disable() do { unsigned long flags; raw_local_save_flags(flags); raw_local_irq_restore((flags & ~(1 << 9)) | (1 << 18)); } while (0) | ||
22 | #define raw_local_irq_enable() do { unsigned long flags; raw_local_save_flags(flags); raw_local_irq_restore((flags | (1 << 9)) & ~(1 << 18)); } while (0) | ||
23 | |||
24 | #define raw_irqs_disabled_flags(flags) \ | ||
25 | ({ \ | ||
26 | (flags & (1<<18)) || !(flags & (1<<9)); \ | ||
27 | }) | ||
28 | |||
29 | /* For spinlocks etc */ | ||
30 | #define raw_local_irq_save(x) do { raw_local_save_flags(x); raw_local_irq_restore((x & ~(1 << 9)) | (1 << 18)); } while (0) | ||
31 | #else /* CONFIG_X86_VSMP */ | ||
32 | #define raw_local_irq_disable() __asm__ __volatile__("cli": : :"memory") | ||
33 | #define raw_local_irq_enable() __asm__ __volatile__("sti": : :"memory") | ||
34 | |||
35 | #define raw_irqs_disabled_flags(flags) \ | ||
36 | ({ \ | ||
37 | !(flags & (1<<9)); \ | ||
38 | }) | ||
39 | |||
40 | /* For spinlocks etc */ | ||
41 | #define raw_local_irq_save(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# raw_local_irq_save \n\t pushfq ; popq %0 ; cli":"=g" (x): /* no input */ :"memory"); } while (0) | ||
42 | #endif | ||
43 | |||
44 | #define raw_irqs_disabled() \ | ||
45 | ({ \ | ||
46 | unsigned long flags; \ | ||
47 | raw_local_save_flags(flags); \ | ||
48 | raw_irqs_disabled_flags(flags); \ | ||
49 | }) | ||
50 | |||
51 | /* used in the idle loop; sti takes one instruction cycle to complete */ | ||
52 | #define raw_safe_halt() __asm__ __volatile__("sti; hlt": : :"memory") | ||
53 | /* used when interrupts are already enabled or to shutdown the processor */ | ||
54 | #define halt() __asm__ __volatile__("hlt": : :"memory") | ||
55 | |||
56 | #else /* __ASSEMBLY__: */ | ||
57 | # define TRACE_IRQS_ON | ||
58 | # define TRACE_IRQS_OFF | ||
59 | #endif | ||
60 | |||
61 | #endif | ||
diff --git a/include/asm-x86_64/system.h b/include/asm-x86_64/system.h index 68e559f3631c..f67f2873a922 100644 --- a/include/asm-x86_64/system.h +++ b/include/asm-x86_64/system.h | |||
@@ -244,43 +244,7 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, | |||
244 | 244 | ||
245 | #define warn_if_not_ulong(x) do { unsigned long foo; (void) (&(x) == &foo); } while (0) | 245 | #define warn_if_not_ulong(x) do { unsigned long foo; (void) (&(x) == &foo); } while (0) |
246 | 246 | ||
247 | /* interrupt control.. */ | 247 | #include <linux/irqflags.h> |
248 | #define local_save_flags(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# save_flags \n\t pushfq ; popq %q0":"=g" (x): /* no input */ :"memory"); } while (0) | ||
249 | #define local_irq_restore(x) __asm__ __volatile__("# restore_flags \n\t pushq %0 ; popfq": /* no output */ :"g" (x):"memory", "cc") | ||
250 | |||
251 | #ifdef CONFIG_X86_VSMP | ||
252 | /* Interrupt control for VSMP architecture */ | ||
253 | #define local_irq_disable() do { unsigned long flags; local_save_flags(flags); local_irq_restore((flags & ~(1 << 9)) | (1 << 18)); } while (0) | ||
254 | #define local_irq_enable() do { unsigned long flags; local_save_flags(flags); local_irq_restore((flags | (1 << 9)) & ~(1 << 18)); } while (0) | ||
255 | |||
256 | #define irqs_disabled() \ | ||
257 | ({ \ | ||
258 | unsigned long flags; \ | ||
259 | local_save_flags(flags); \ | ||
260 | (flags & (1<<18)) || !(flags & (1<<9)); \ | ||
261 | }) | ||
262 | |||
263 | /* For spinlocks etc */ | ||
264 | #define local_irq_save(x) do { local_save_flags(x); local_irq_restore((x & ~(1 << 9)) | (1 << 18)); } while (0) | ||
265 | #else /* CONFIG_X86_VSMP */ | ||
266 | #define local_irq_disable() __asm__ __volatile__("cli": : :"memory") | ||
267 | #define local_irq_enable() __asm__ __volatile__("sti": : :"memory") | ||
268 | |||
269 | #define irqs_disabled() \ | ||
270 | ({ \ | ||
271 | unsigned long flags; \ | ||
272 | local_save_flags(flags); \ | ||
273 | !(flags & (1<<9)); \ | ||
274 | }) | ||
275 | |||
276 | /* For spinlocks etc */ | ||
277 | #define local_irq_save(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# local_irq_save \n\t pushfq ; popq %0 ; cli":"=g" (x): /* no input */ :"memory"); } while (0) | ||
278 | #endif | ||
279 | |||
280 | /* used in the idle loop; sti takes one instruction cycle to complete */ | ||
281 | #define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory") | ||
282 | /* used when interrupts are already enabled or to shutdown the processor */ | ||
283 | #define halt() __asm__ __volatile__("hlt": : :"memory") | ||
284 | 248 | ||
285 | void cpu_idle_wait(void); | 249 | void cpu_idle_wait(void); |
286 | 250 | ||