diff options
author | Glauber de Oliveira Costa <gcosta@redhat.com> | 2008-01-30 07:32:08 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-01-30 07:32:08 -0500 |
commit | 72fe4858544292ad64600765cb78bc02298c6b1c (patch) | |
tree | a3876c02ee67a98f0d0f9606fcb943af627acea6 /arch/x86/kernel | |
parent | e801f864ec7e5b149bd05337800e419f408523bb (diff) |
x86: replace privileged instructions with paravirt macros
The assembly code in entry_64.S issues a bunch of privileged instructions,
like cli, sti, swapgs, and others. Paravirt guests are forbidden to do so,
and we then replace them with macros that will do the right thing.
Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/entry_64.S | 101 |
1 files changed, 59 insertions, 42 deletions
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index e70f3881d7e4..bea8474744ff 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -50,6 +50,7 @@ | |||
50 | #include <asm/hw_irq.h> | 50 | #include <asm/hw_irq.h> |
51 | #include <asm/page.h> | 51 | #include <asm/page.h> |
52 | #include <asm/irqflags.h> | 52 | #include <asm/irqflags.h> |
53 | #include <asm/paravirt.h> | ||
53 | 54 | ||
54 | .code64 | 55 | .code64 |
55 | 56 | ||
@@ -57,6 +58,13 @@ | |||
57 | #define retint_kernel retint_restore_args | 58 | #define retint_kernel retint_restore_args |
58 | #endif | 59 | #endif |
59 | 60 | ||
61 | #ifdef CONFIG_PARAVIRT | ||
62 | ENTRY(native_irq_enable_syscall_ret) | ||
63 | movq %gs:pda_oldrsp,%rsp | ||
64 | swapgs | ||
65 | sysretq | ||
66 | #endif /* CONFIG_PARAVIRT */ | ||
67 | |||
60 | 68 | ||
61 | .macro TRACE_IRQS_IRETQ offset=ARGOFFSET | 69 | .macro TRACE_IRQS_IRETQ offset=ARGOFFSET |
62 | #ifdef CONFIG_TRACE_IRQFLAGS | 70 | #ifdef CONFIG_TRACE_IRQFLAGS |
@@ -216,14 +224,21 @@ ENTRY(system_call) | |||
216 | CFI_DEF_CFA rsp,PDA_STACKOFFSET | 224 | CFI_DEF_CFA rsp,PDA_STACKOFFSET |
217 | CFI_REGISTER rip,rcx | 225 | CFI_REGISTER rip,rcx |
218 | /*CFI_REGISTER rflags,r11*/ | 226 | /*CFI_REGISTER rflags,r11*/ |
219 | swapgs | 227 | SWAPGS_UNSAFE_STACK |
228 | /* | ||
229 | * A hypervisor implementation might want to use a label | ||
230 | * after the swapgs, so that it can do the swapgs | ||
231 | * for the guest and jump here on syscall. | ||
232 | */ | ||
233 | ENTRY(system_call_after_swapgs) | ||
234 | |||
220 | movq %rsp,%gs:pda_oldrsp | 235 | movq %rsp,%gs:pda_oldrsp |
221 | movq %gs:pda_kernelstack,%rsp | 236 | movq %gs:pda_kernelstack,%rsp |
222 | /* | 237 | /* |
223 | * No need to follow this irqs off/on section - it's straight | 238 | * No need to follow this irqs off/on section - it's straight |
224 | * and short: | 239 | * and short: |
225 | */ | 240 | */ |
226 | sti | 241 | ENABLE_INTERRUPTS(CLBR_NONE) |
227 | SAVE_ARGS 8,1 | 242 | SAVE_ARGS 8,1 |
228 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) | 243 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) |
229 | movq %rcx,RIP-ARGOFFSET(%rsp) | 244 | movq %rcx,RIP-ARGOFFSET(%rsp) |
@@ -246,7 +261,7 @@ ret_from_sys_call: | |||
246 | sysret_check: | 261 | sysret_check: |
247 | LOCKDEP_SYS_EXIT | 262 | LOCKDEP_SYS_EXIT |
248 | GET_THREAD_INFO(%rcx) | 263 | GET_THREAD_INFO(%rcx) |
249 | cli | 264 | DISABLE_INTERRUPTS(CLBR_NONE) |
250 | TRACE_IRQS_OFF | 265 | TRACE_IRQS_OFF |
251 | movl threadinfo_flags(%rcx),%edx | 266 | movl threadinfo_flags(%rcx),%edx |
252 | andl %edi,%edx | 267 | andl %edi,%edx |
@@ -260,9 +275,7 @@ sysret_check: | |||
260 | CFI_REGISTER rip,rcx | 275 | CFI_REGISTER rip,rcx |
261 | RESTORE_ARGS 0,-ARG_SKIP,1 | 276 | RESTORE_ARGS 0,-ARG_SKIP,1 |
262 | /*CFI_REGISTER rflags,r11*/ | 277 | /*CFI_REGISTER rflags,r11*/ |
263 | movq %gs:pda_oldrsp,%rsp | 278 | ENABLE_INTERRUPTS_SYSCALL_RET |
264 | swapgs | ||
265 | sysretq | ||
266 | 279 | ||
267 | CFI_RESTORE_STATE | 280 | CFI_RESTORE_STATE |
268 | /* Handle reschedules */ | 281 | /* Handle reschedules */ |
@@ -271,7 +284,7 @@ sysret_careful: | |||
271 | bt $TIF_NEED_RESCHED,%edx | 284 | bt $TIF_NEED_RESCHED,%edx |
272 | jnc sysret_signal | 285 | jnc sysret_signal |
273 | TRACE_IRQS_ON | 286 | TRACE_IRQS_ON |
274 | sti | 287 | ENABLE_INTERRUPTS(CLBR_NONE) |
275 | pushq %rdi | 288 | pushq %rdi |
276 | CFI_ADJUST_CFA_OFFSET 8 | 289 | CFI_ADJUST_CFA_OFFSET 8 |
277 | call schedule | 290 | call schedule |
@@ -282,7 +295,7 @@ sysret_careful: | |||
282 | /* Handle a signal */ | 295 | /* Handle a signal */ |
283 | sysret_signal: | 296 | sysret_signal: |
284 | TRACE_IRQS_ON | 297 | TRACE_IRQS_ON |
285 | sti | 298 | ENABLE_INTERRUPTS(CLBR_NONE) |
286 | testl $_TIF_DO_NOTIFY_MASK,%edx | 299 | testl $_TIF_DO_NOTIFY_MASK,%edx |
287 | jz 1f | 300 | jz 1f |
288 | 301 | ||
@@ -295,7 +308,7 @@ sysret_signal: | |||
295 | 1: movl $_TIF_NEED_RESCHED,%edi | 308 | 1: movl $_TIF_NEED_RESCHED,%edi |
296 | /* Use IRET because user could have changed frame. This | 309 | /* Use IRET because user could have changed frame. This |
297 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ | 310 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ |
298 | cli | 311 | DISABLE_INTERRUPTS(CLBR_NONE) |
299 | TRACE_IRQS_OFF | 312 | TRACE_IRQS_OFF |
300 | jmp int_with_check | 313 | jmp int_with_check |
301 | 314 | ||
@@ -327,7 +340,7 @@ tracesys: | |||
327 | */ | 340 | */ |
328 | .globl int_ret_from_sys_call | 341 | .globl int_ret_from_sys_call |
329 | int_ret_from_sys_call: | 342 | int_ret_from_sys_call: |
330 | cli | 343 | DISABLE_INTERRUPTS(CLBR_NONE) |
331 | TRACE_IRQS_OFF | 344 | TRACE_IRQS_OFF |
332 | testl $3,CS-ARGOFFSET(%rsp) | 345 | testl $3,CS-ARGOFFSET(%rsp) |
333 | je retint_restore_args | 346 | je retint_restore_args |
@@ -349,20 +362,20 @@ int_careful: | |||
349 | bt $TIF_NEED_RESCHED,%edx | 362 | bt $TIF_NEED_RESCHED,%edx |
350 | jnc int_very_careful | 363 | jnc int_very_careful |
351 | TRACE_IRQS_ON | 364 | TRACE_IRQS_ON |
352 | sti | 365 | ENABLE_INTERRUPTS(CLBR_NONE) |
353 | pushq %rdi | 366 | pushq %rdi |
354 | CFI_ADJUST_CFA_OFFSET 8 | 367 | CFI_ADJUST_CFA_OFFSET 8 |
355 | call schedule | 368 | call schedule |
356 | popq %rdi | 369 | popq %rdi |
357 | CFI_ADJUST_CFA_OFFSET -8 | 370 | CFI_ADJUST_CFA_OFFSET -8 |
358 | cli | 371 | DISABLE_INTERRUPTS(CLBR_NONE) |
359 | TRACE_IRQS_OFF | 372 | TRACE_IRQS_OFF |
360 | jmp int_with_check | 373 | jmp int_with_check |
361 | 374 | ||
362 | /* handle signals and tracing -- both require a full stack frame */ | 375 | /* handle signals and tracing -- both require a full stack frame */ |
363 | int_very_careful: | 376 | int_very_careful: |
364 | TRACE_IRQS_ON | 377 | TRACE_IRQS_ON |
365 | sti | 378 | ENABLE_INTERRUPTS(CLBR_NONE) |
366 | SAVE_REST | 379 | SAVE_REST |
367 | /* Check for syscall exit trace */ | 380 | /* Check for syscall exit trace */ |
368 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx | 381 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx |
@@ -385,7 +398,7 @@ int_signal: | |||
385 | 1: movl $_TIF_NEED_RESCHED,%edi | 398 | 1: movl $_TIF_NEED_RESCHED,%edi |
386 | int_restore_rest: | 399 | int_restore_rest: |
387 | RESTORE_REST | 400 | RESTORE_REST |
388 | cli | 401 | DISABLE_INTERRUPTS(CLBR_NONE) |
389 | TRACE_IRQS_OFF | 402 | TRACE_IRQS_OFF |
390 | jmp int_with_check | 403 | jmp int_with_check |
391 | CFI_ENDPROC | 404 | CFI_ENDPROC |
@@ -506,7 +519,7 @@ END(stub_rt_sigreturn) | |||
506 | CFI_DEF_CFA_REGISTER rbp | 519 | CFI_DEF_CFA_REGISTER rbp |
507 | testl $3,CS(%rdi) | 520 | testl $3,CS(%rdi) |
508 | je 1f | 521 | je 1f |
509 | swapgs | 522 | SWAPGS |
510 | /* irqcount is used to check if a CPU is already on an interrupt | 523 | /* irqcount is used to check if a CPU is already on an interrupt |
511 | stack or not. While this is essentially redundant with preempt_count | 524 | stack or not. While this is essentially redundant with preempt_count |
512 | it is a little cheaper to use a separate counter in the PDA | 525 | it is a little cheaper to use a separate counter in the PDA |
@@ -527,7 +540,7 @@ ENTRY(common_interrupt) | |||
527 | interrupt do_IRQ | 540 | interrupt do_IRQ |
528 | /* 0(%rsp): oldrsp-ARGOFFSET */ | 541 | /* 0(%rsp): oldrsp-ARGOFFSET */ |
529 | ret_from_intr: | 542 | ret_from_intr: |
530 | cli | 543 | DISABLE_INTERRUPTS(CLBR_NONE) |
531 | TRACE_IRQS_OFF | 544 | TRACE_IRQS_OFF |
532 | decl %gs:pda_irqcount | 545 | decl %gs:pda_irqcount |
533 | leaveq | 546 | leaveq |
@@ -556,13 +569,13 @@ retint_swapgs: /* return to user-space */ | |||
556 | /* | 569 | /* |
557 | * The iretq could re-enable interrupts: | 570 | * The iretq could re-enable interrupts: |
558 | */ | 571 | */ |
559 | cli | 572 | DISABLE_INTERRUPTS(CLBR_ANY) |
560 | TRACE_IRQS_IRETQ | 573 | TRACE_IRQS_IRETQ |
561 | swapgs | 574 | SWAPGS |
562 | jmp restore_args | 575 | jmp restore_args |
563 | 576 | ||
564 | retint_restore_args: /* return to kernel space */ | 577 | retint_restore_args: /* return to kernel space */ |
565 | cli | 578 | DISABLE_INTERRUPTS(CLBR_ANY) |
566 | /* | 579 | /* |
567 | * The iretq could re-enable interrupts: | 580 | * The iretq could re-enable interrupts: |
568 | */ | 581 | */ |
@@ -570,10 +583,14 @@ retint_restore_args: /* return to kernel space */ | |||
570 | restore_args: | 583 | restore_args: |
571 | RESTORE_ARGS 0,8,0 | 584 | RESTORE_ARGS 0,8,0 |
572 | iret_label: | 585 | iret_label: |
586 | #ifdef CONFIG_PARAVIRT | ||
587 | INTERRUPT_RETURN | ||
588 | #endif | ||
589 | ENTRY(native_iret) | ||
573 | iretq | 590 | iretq |
574 | 591 | ||
575 | .section __ex_table,"a" | 592 | .section __ex_table,"a" |
576 | .quad iret_label,bad_iret | 593 | .quad native_iret, bad_iret |
577 | .previous | 594 | .previous |
578 | .section .fixup,"ax" | 595 | .section .fixup,"ax" |
579 | /* force a signal here? this matches i386 behaviour */ | 596 | /* force a signal here? this matches i386 behaviour */ |
@@ -581,24 +598,24 @@ iret_label: | |||
581 | bad_iret: | 598 | bad_iret: |
582 | movq $11,%rdi /* SIGSEGV */ | 599 | movq $11,%rdi /* SIGSEGV */ |
583 | TRACE_IRQS_ON | 600 | TRACE_IRQS_ON |
584 | sti | 601 | ENABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) |
585 | jmp do_exit | 602 | jmp do_exit |
586 | .previous | 603 | .previous |
587 | 604 | ||
588 | /* edi: workmask, edx: work */ | 605 | /* edi: workmask, edx: work */ |
589 | retint_careful: | 606 | retint_careful: |
590 | CFI_RESTORE_STATE | 607 | CFI_RESTORE_STATE |
591 | bt $TIF_NEED_RESCHED,%edx | 608 | bt $TIF_NEED_RESCHED,%edx |
592 | jnc retint_signal | 609 | jnc retint_signal |
593 | TRACE_IRQS_ON | 610 | TRACE_IRQS_ON |
594 | sti | 611 | ENABLE_INTERRUPTS(CLBR_NONE) |
595 | pushq %rdi | 612 | pushq %rdi |
596 | CFI_ADJUST_CFA_OFFSET 8 | 613 | CFI_ADJUST_CFA_OFFSET 8 |
597 | call schedule | 614 | call schedule |
598 | popq %rdi | 615 | popq %rdi |
599 | CFI_ADJUST_CFA_OFFSET -8 | 616 | CFI_ADJUST_CFA_OFFSET -8 |
600 | GET_THREAD_INFO(%rcx) | 617 | GET_THREAD_INFO(%rcx) |
601 | cli | 618 | DISABLE_INTERRUPTS(CLBR_NONE) |
602 | TRACE_IRQS_OFF | 619 | TRACE_IRQS_OFF |
603 | jmp retint_check | 620 | jmp retint_check |
604 | 621 | ||
@@ -606,14 +623,14 @@ retint_signal: | |||
606 | testl $_TIF_DO_NOTIFY_MASK,%edx | 623 | testl $_TIF_DO_NOTIFY_MASK,%edx |
607 | jz retint_swapgs | 624 | jz retint_swapgs |
608 | TRACE_IRQS_ON | 625 | TRACE_IRQS_ON |
609 | sti | 626 | ENABLE_INTERRUPTS(CLBR_NONE) |
610 | SAVE_REST | 627 | SAVE_REST |
611 | movq $-1,ORIG_RAX(%rsp) | 628 | movq $-1,ORIG_RAX(%rsp) |
612 | xorl %esi,%esi # oldset | 629 | xorl %esi,%esi # oldset |
613 | movq %rsp,%rdi # &pt_regs | 630 | movq %rsp,%rdi # &pt_regs |
614 | call do_notify_resume | 631 | call do_notify_resume |
615 | RESTORE_REST | 632 | RESTORE_REST |
616 | cli | 633 | DISABLE_INTERRUPTS(CLBR_NONE) |
617 | TRACE_IRQS_OFF | 634 | TRACE_IRQS_OFF |
618 | movl $_TIF_NEED_RESCHED,%edi | 635 | movl $_TIF_NEED_RESCHED,%edi |
619 | GET_THREAD_INFO(%rcx) | 636 | GET_THREAD_INFO(%rcx) |
@@ -731,7 +748,7 @@ END(spurious_interrupt) | |||
731 | rdmsr | 748 | rdmsr |
732 | testl %edx,%edx | 749 | testl %edx,%edx |
733 | js 1f | 750 | js 1f |
734 | swapgs | 751 | SWAPGS |
735 | xorl %ebx,%ebx | 752 | xorl %ebx,%ebx |
736 | 1: | 753 | 1: |
737 | .if \ist | 754 | .if \ist |
@@ -747,7 +764,7 @@ END(spurious_interrupt) | |||
747 | .if \ist | 764 | .if \ist |
748 | addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) | 765 | addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) |
749 | .endif | 766 | .endif |
750 | cli | 767 | DISABLE_INTERRUPTS(CLBR_NONE) |
751 | .if \irqtrace | 768 | .if \irqtrace |
752 | TRACE_IRQS_OFF | 769 | TRACE_IRQS_OFF |
753 | .endif | 770 | .endif |
@@ -776,10 +793,10 @@ paranoid_swapgs\trace: | |||
776 | .if \trace | 793 | .if \trace |
777 | TRACE_IRQS_IRETQ 0 | 794 | TRACE_IRQS_IRETQ 0 |
778 | .endif | 795 | .endif |
779 | swapgs | 796 | SWAPGS_UNSAFE_STACK |
780 | paranoid_restore\trace: | 797 | paranoid_restore\trace: |
781 | RESTORE_ALL 8 | 798 | RESTORE_ALL 8 |
782 | iretq | 799 | INTERRUPT_RETURN |
783 | paranoid_userspace\trace: | 800 | paranoid_userspace\trace: |
784 | GET_THREAD_INFO(%rcx) | 801 | GET_THREAD_INFO(%rcx) |
785 | movl threadinfo_flags(%rcx),%ebx | 802 | movl threadinfo_flags(%rcx),%ebx |
@@ -794,11 +811,11 @@ paranoid_userspace\trace: | |||
794 | .if \trace | 811 | .if \trace |
795 | TRACE_IRQS_ON | 812 | TRACE_IRQS_ON |
796 | .endif | 813 | .endif |
797 | sti | 814 | ENABLE_INTERRUPTS(CLBR_NONE) |
798 | xorl %esi,%esi /* arg2: oldset */ | 815 | xorl %esi,%esi /* arg2: oldset */ |
799 | movq %rsp,%rdi /* arg1: &pt_regs */ | 816 | movq %rsp,%rdi /* arg1: &pt_regs */ |
800 | call do_notify_resume | 817 | call do_notify_resume |
801 | cli | 818 | DISABLE_INTERRUPTS(CLBR_NONE) |
802 | .if \trace | 819 | .if \trace |
803 | TRACE_IRQS_OFF | 820 | TRACE_IRQS_OFF |
804 | .endif | 821 | .endif |
@@ -807,9 +824,9 @@ paranoid_schedule\trace: | |||
807 | .if \trace | 824 | .if \trace |
808 | TRACE_IRQS_ON | 825 | TRACE_IRQS_ON |
809 | .endif | 826 | .endif |
810 | sti | 827 | ENABLE_INTERRUPTS(CLBR_ANY) |
811 | call schedule | 828 | call schedule |
812 | cli | 829 | DISABLE_INTERRUPTS(CLBR_ANY) |
813 | .if \trace | 830 | .if \trace |
814 | TRACE_IRQS_OFF | 831 | TRACE_IRQS_OFF |
815 | .endif | 832 | .endif |
@@ -862,7 +879,7 @@ KPROBE_ENTRY(error_entry) | |||
862 | testl $3,CS(%rsp) | 879 | testl $3,CS(%rsp) |
863 | je error_kernelspace | 880 | je error_kernelspace |
864 | error_swapgs: | 881 | error_swapgs: |
865 | swapgs | 882 | SWAPGS |
866 | error_sti: | 883 | error_sti: |
867 | movq %rdi,RDI(%rsp) | 884 | movq %rdi,RDI(%rsp) |
868 | CFI_REL_OFFSET rdi,RDI | 885 | CFI_REL_OFFSET rdi,RDI |
@@ -874,7 +891,7 @@ error_sti: | |||
874 | error_exit: | 891 | error_exit: |
875 | movl %ebx,%eax | 892 | movl %ebx,%eax |
876 | RESTORE_REST | 893 | RESTORE_REST |
877 | cli | 894 | DISABLE_INTERRUPTS(CLBR_NONE) |
878 | TRACE_IRQS_OFF | 895 | TRACE_IRQS_OFF |
879 | GET_THREAD_INFO(%rcx) | 896 | GET_THREAD_INFO(%rcx) |
880 | testl %eax,%eax | 897 | testl %eax,%eax |
@@ -911,12 +928,12 @@ ENTRY(load_gs_index) | |||
911 | CFI_STARTPROC | 928 | CFI_STARTPROC |
912 | pushf | 929 | pushf |
913 | CFI_ADJUST_CFA_OFFSET 8 | 930 | CFI_ADJUST_CFA_OFFSET 8 |
914 | cli | 931 | DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) |
915 | swapgs | 932 | SWAPGS |
916 | gs_change: | 933 | gs_change: |
917 | movl %edi,%gs | 934 | movl %edi,%gs |
918 | 2: mfence /* workaround */ | 935 | 2: mfence /* workaround */ |
919 | swapgs | 936 | SWAPGS |
920 | popf | 937 | popf |
921 | CFI_ADJUST_CFA_OFFSET -8 | 938 | CFI_ADJUST_CFA_OFFSET -8 |
922 | ret | 939 | ret |
@@ -930,7 +947,7 @@ ENDPROC(load_gs_index) | |||
930 | .section .fixup,"ax" | 947 | .section .fixup,"ax" |
931 | /* running with kernelgs */ | 948 | /* running with kernelgs */ |
932 | bad_gs: | 949 | bad_gs: |
933 | swapgs /* switch back to user gs */ | 950 | SWAPGS /* switch back to user gs */ |
934 | xorl %eax,%eax | 951 | xorl %eax,%eax |
935 | movl %eax,%gs | 952 | movl %eax,%gs |
936 | jmp 2b | 953 | jmp 2b |