diff options
Diffstat (limited to 'arch/i386/kernel/vmi.c')
-rw-r--r-- | arch/i386/kernel/vmi.c | 164 |
1 files changed, 91 insertions, 73 deletions
diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c index 30e8253e6eef..af8c54245f9a 100644 --- a/arch/i386/kernel/vmi.c +++ b/arch/i386/kernel/vmi.c | |||
@@ -54,6 +54,7 @@ static int disable_sep; | |||
54 | static int disable_tsc; | 54 | static int disable_tsc; |
55 | static int disable_mtrr; | 55 | static int disable_mtrr; |
56 | static int disable_noidle; | 56 | static int disable_noidle; |
57 | static int disable_vmi_timer; | ||
57 | 58 | ||
58 | /* Cached VMI operations */ | 59 | /* Cached VMI operations */ |
59 | struct { | 60 | struct { |
@@ -661,12 +662,12 @@ static inline int __init probe_vmi_rom(void) | |||
661 | void vmi_bringup(void) | 662 | void vmi_bringup(void) |
662 | { | 663 | { |
663 | /* We must establish the lowmem mapping for MMU ops to work */ | 664 | /* We must establish the lowmem mapping for MMU ops to work */ |
664 | if (vmi_rom) | 665 | if (vmi_ops.set_linear_mapping) |
665 | vmi_ops.set_linear_mapping(0, __PAGE_OFFSET, max_low_pfn, 0); | 666 | vmi_ops.set_linear_mapping(0, __PAGE_OFFSET, max_low_pfn, 0); |
666 | } | 667 | } |
667 | 668 | ||
668 | /* | 669 | /* |
669 | * Return a pointer to the VMI function or a NOP stub | 670 | * Return a pointer to a VMI function or NULL if unimplemented |
670 | */ | 671 | */ |
671 | static void *vmi_get_function(int vmicall) | 672 | static void *vmi_get_function(int vmicall) |
672 | { | 673 | { |
@@ -677,12 +678,13 @@ static void *vmi_get_function(int vmicall) | |||
677 | if (rel->type == VMI_RELOCATION_CALL_REL) | 678 | if (rel->type == VMI_RELOCATION_CALL_REL) |
678 | return (void *)rel->eip; | 679 | return (void *)rel->eip; |
679 | else | 680 | else |
680 | return (void *)vmi_nop; | 681 | return NULL; |
681 | } | 682 | } |
682 | 683 | ||
683 | /* | 684 | /* |
684 | * Helper macro for making the VMI paravirt-ops fill code readable. | 685 | * Helper macro for making the VMI paravirt-ops fill code readable. |
685 | * For unimplemented operations, fall back to default. | 686 | * For unimplemented operations, fall back to default, unless nop |
687 | * is returned by the ROM. | ||
686 | */ | 688 | */ |
687 | #define para_fill(opname, vmicall) \ | 689 | #define para_fill(opname, vmicall) \ |
688 | do { \ | 690 | do { \ |
@@ -691,9 +693,29 @@ do { \ | |||
691 | if (rel->type != VMI_RELOCATION_NONE) { \ | 693 | if (rel->type != VMI_RELOCATION_NONE) { \ |
692 | BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); \ | 694 | BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); \ |
693 | paravirt_ops.opname = (void *)rel->eip; \ | 695 | paravirt_ops.opname = (void *)rel->eip; \ |
696 | } else if (rel->type == VMI_RELOCATION_NOP) \ | ||
697 | paravirt_ops.opname = (void *)vmi_nop; \ | ||
698 | } while (0) | ||
699 | |||
700 | /* | ||
701 | * Helper macro for making the VMI paravirt-ops fill code readable. | ||
702 | * For cached operations which do not match the VMI ROM ABI and must | ||
703 | * go through a tranlation stub. Ignore NOPs, since it is not clear | ||
704 | * a NOP * VMI function corresponds to a NOP paravirt-op when the | ||
705 | * functions are not in 1-1 correspondence. | ||
706 | */ | ||
707 | #define para_wrap(opname, wrapper, cache, vmicall) \ | ||
708 | do { \ | ||
709 | reloc = call_vrom_long_func(vmi_rom, get_reloc, \ | ||
710 | VMI_CALL_##vmicall); \ | ||
711 | BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL); \ | ||
712 | if (rel->type == VMI_RELOCATION_CALL_REL) { \ | ||
713 | paravirt_ops.opname = wrapper; \ | ||
714 | vmi_ops.cache = (void *)rel->eip; \ | ||
694 | } \ | 715 | } \ |
695 | } while (0) | 716 | } while (0) |
696 | 717 | ||
718 | |||
697 | /* | 719 | /* |
698 | * Activate the VMI interface and switch into paravirtualized mode | 720 | * Activate the VMI interface and switch into paravirtualized mode |
699 | */ | 721 | */ |
@@ -730,13 +752,8 @@ static inline int __init activate_vmi(void) | |||
730 | * rdpmc is not yet used in Linux | 752 | * rdpmc is not yet used in Linux |
731 | */ | 753 | */ |
732 | 754 | ||
733 | /* CPUID is special, so very special */ | 755 | /* CPUID is special, so very special it gets wrapped like a present */ |
734 | reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_CPUID); | 756 | para_wrap(cpuid, vmi_cpuid, cpuid, CPUID); |
735 | if (rel->type != VMI_RELOCATION_NONE) { | ||
736 | BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); | ||
737 | vmi_ops.cpuid = (void *)rel->eip; | ||
738 | paravirt_ops.cpuid = vmi_cpuid; | ||
739 | } | ||
740 | 757 | ||
741 | para_fill(clts, CLTS); | 758 | para_fill(clts, CLTS); |
742 | para_fill(get_debugreg, GetDR); | 759 | para_fill(get_debugreg, GetDR); |
@@ -753,6 +770,7 @@ static inline int __init activate_vmi(void) | |||
753 | para_fill(restore_fl, SetInterruptMask); | 770 | para_fill(restore_fl, SetInterruptMask); |
754 | para_fill(irq_disable, DisableInterrupts); | 771 | para_fill(irq_disable, DisableInterrupts); |
755 | para_fill(irq_enable, EnableInterrupts); | 772 | para_fill(irq_enable, EnableInterrupts); |
773 | |||
756 | /* irq_save_disable !!! sheer pain */ | 774 | /* irq_save_disable !!! sheer pain */ |
757 | patch_offset(&irq_save_disable_callout[IRQ_PATCH_INT_MASK], | 775 | patch_offset(&irq_save_disable_callout[IRQ_PATCH_INT_MASK], |
758 | (char *)paravirt_ops.save_fl); | 776 | (char *)paravirt_ops.save_fl); |
@@ -760,26 +778,18 @@ static inline int __init activate_vmi(void) | |||
760 | (char *)paravirt_ops.irq_disable); | 778 | (char *)paravirt_ops.irq_disable); |
761 | 779 | ||
762 | para_fill(wbinvd, WBINVD); | 780 | para_fill(wbinvd, WBINVD); |
781 | para_fill(read_tsc, RDTSC); | ||
782 | |||
783 | /* The following we emulate with trap and emulate for now */ | ||
763 | /* paravirt_ops.read_msr = vmi_rdmsr */ | 784 | /* paravirt_ops.read_msr = vmi_rdmsr */ |
764 | /* paravirt_ops.write_msr = vmi_wrmsr */ | 785 | /* paravirt_ops.write_msr = vmi_wrmsr */ |
765 | para_fill(read_tsc, RDTSC); | ||
766 | /* paravirt_ops.rdpmc = vmi_rdpmc */ | 786 | /* paravirt_ops.rdpmc = vmi_rdpmc */ |
767 | 787 | ||
768 | /* TR interface doesn't pass TR value */ | 788 | /* TR interface doesn't pass TR value, wrap */ |
769 | reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_SetTR); | 789 | para_wrap(load_tr_desc, vmi_set_tr, set_tr, SetTR); |
770 | if (rel->type != VMI_RELOCATION_NONE) { | ||
771 | BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); | ||
772 | vmi_ops.set_tr = (void *)rel->eip; | ||
773 | paravirt_ops.load_tr_desc = vmi_set_tr; | ||
774 | } | ||
775 | 790 | ||
776 | /* LDT is special, too */ | 791 | /* LDT is special, too */ |
777 | reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_SetLDT); | 792 | para_wrap(set_ldt, vmi_set_ldt, _set_ldt, SetLDT); |
778 | if (rel->type != VMI_RELOCATION_NONE) { | ||
779 | BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); | ||
780 | vmi_ops._set_ldt = (void *)rel->eip; | ||
781 | paravirt_ops.set_ldt = vmi_set_ldt; | ||
782 | } | ||
783 | 793 | ||
784 | para_fill(load_gdt, SetGDT); | 794 | para_fill(load_gdt, SetGDT); |
785 | para_fill(load_idt, SetIDT); | 795 | para_fill(load_idt, SetIDT); |
@@ -790,25 +800,14 @@ static inline int __init activate_vmi(void) | |||
790 | para_fill(write_ldt_entry, WriteLDTEntry); | 800 | para_fill(write_ldt_entry, WriteLDTEntry); |
791 | para_fill(write_gdt_entry, WriteGDTEntry); | 801 | para_fill(write_gdt_entry, WriteGDTEntry); |
792 | para_fill(write_idt_entry, WriteIDTEntry); | 802 | para_fill(write_idt_entry, WriteIDTEntry); |
793 | reloc = call_vrom_long_func(vmi_rom, get_reloc, | 803 | para_wrap(load_esp0, vmi_load_esp0, set_kernel_stack, UpdateKernelStack); |
794 | VMI_CALL_UpdateKernelStack); | ||
795 | if (rel->type != VMI_RELOCATION_NONE) { | ||
796 | BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); | ||
797 | vmi_ops.set_kernel_stack = (void *)rel->eip; | ||
798 | paravirt_ops.load_esp0 = vmi_load_esp0; | ||
799 | } | ||
800 | |||
801 | para_fill(set_iopl_mask, SetIOPLMask); | 804 | para_fill(set_iopl_mask, SetIOPLMask); |
802 | paravirt_ops.io_delay = (void *)vmi_nop; | 805 | para_fill(io_delay, IODelay); |
803 | |||
804 | para_fill(set_lazy_mode, SetLazyMode); | 806 | para_fill(set_lazy_mode, SetLazyMode); |
805 | 807 | ||
806 | reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_FlushTLB); | 808 | /* user and kernel flush are just handled with different flags to FlushTLB */ |
807 | if (rel->type != VMI_RELOCATION_NONE) { | 809 | para_wrap(flush_tlb_user, vmi_flush_tlb_user, flush_tlb, FlushTLB); |
808 | vmi_ops.flush_tlb = (void *)rel->eip; | 810 | para_wrap(flush_tlb_kernel, vmi_flush_tlb_kernel, flush_tlb, FlushTLB); |
809 | paravirt_ops.flush_tlb_user = vmi_flush_tlb_user; | ||
810 | paravirt_ops.flush_tlb_kernel = vmi_flush_tlb_kernel; | ||
811 | } | ||
812 | para_fill(flush_tlb_single, InvalPage); | 811 | para_fill(flush_tlb_single, InvalPage); |
813 | 812 | ||
814 | /* | 813 | /* |
@@ -823,28 +822,40 @@ static inline int __init activate_vmi(void) | |||
823 | vmi_ops.set_pte = vmi_get_function(VMI_CALL_SetPxE); | 822 | vmi_ops.set_pte = vmi_get_function(VMI_CALL_SetPxE); |
824 | vmi_ops.update_pte = vmi_get_function(VMI_CALL_UpdatePxE); | 823 | vmi_ops.update_pte = vmi_get_function(VMI_CALL_UpdatePxE); |
825 | #endif | 824 | #endif |
826 | vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping); | ||
827 | vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage); | ||
828 | vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage); | ||
829 | 825 | ||
830 | paravirt_ops.map_pt_hook = vmi_map_pt_hook; | 826 | if (vmi_ops.set_pte) { |
831 | paravirt_ops.alloc_pt = vmi_allocate_pt; | 827 | paravirt_ops.set_pte = vmi_set_pte; |
832 | paravirt_ops.alloc_pd = vmi_allocate_pd; | 828 | paravirt_ops.set_pte_at = vmi_set_pte_at; |
833 | paravirt_ops.alloc_pd_clone = vmi_allocate_pd_clone; | 829 | paravirt_ops.set_pmd = vmi_set_pmd; |
834 | paravirt_ops.release_pt = vmi_release_pt; | ||
835 | paravirt_ops.release_pd = vmi_release_pd; | ||
836 | paravirt_ops.set_pte = vmi_set_pte; | ||
837 | paravirt_ops.set_pte_at = vmi_set_pte_at; | ||
838 | paravirt_ops.set_pmd = vmi_set_pmd; | ||
839 | paravirt_ops.pte_update = vmi_update_pte; | ||
840 | paravirt_ops.pte_update_defer = vmi_update_pte_defer; | ||
841 | #ifdef CONFIG_X86_PAE | 830 | #ifdef CONFIG_X86_PAE |
842 | paravirt_ops.set_pte_atomic = vmi_set_pte_atomic; | 831 | paravirt_ops.set_pte_atomic = vmi_set_pte_atomic; |
843 | paravirt_ops.set_pte_present = vmi_set_pte_present; | 832 | paravirt_ops.set_pte_present = vmi_set_pte_present; |
844 | paravirt_ops.set_pud = vmi_set_pud; | 833 | paravirt_ops.set_pud = vmi_set_pud; |
845 | paravirt_ops.pte_clear = vmi_pte_clear; | 834 | paravirt_ops.pte_clear = vmi_pte_clear; |
846 | paravirt_ops.pmd_clear = vmi_pmd_clear; | 835 | paravirt_ops.pmd_clear = vmi_pmd_clear; |
847 | #endif | 836 | #endif |
837 | } | ||
838 | |||
839 | if (vmi_ops.update_pte) { | ||
840 | paravirt_ops.pte_update = vmi_update_pte; | ||
841 | paravirt_ops.pte_update_defer = vmi_update_pte_defer; | ||
842 | } | ||
843 | |||
844 | vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage); | ||
845 | if (vmi_ops.allocate_page) { | ||
846 | paravirt_ops.alloc_pt = vmi_allocate_pt; | ||
847 | paravirt_ops.alloc_pd = vmi_allocate_pd; | ||
848 | paravirt_ops.alloc_pd_clone = vmi_allocate_pd_clone; | ||
849 | } | ||
850 | |||
851 | vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage); | ||
852 | if (vmi_ops.release_page) { | ||
853 | paravirt_ops.release_pt = vmi_release_pt; | ||
854 | paravirt_ops.release_pd = vmi_release_pd; | ||
855 | } | ||
856 | para_wrap(map_pt_hook, vmi_map_pt_hook, set_linear_mapping, | ||
857 | SetLinearMapping); | ||
858 | |||
848 | /* | 859 | /* |
849 | * These MUST always be patched. Don't support indirect jumps | 860 | * These MUST always be patched. Don't support indirect jumps |
850 | * through these operations, as the VMI interface may use either | 861 | * through these operations, as the VMI interface may use either |
@@ -856,21 +867,20 @@ static inline int __init activate_vmi(void) | |||
856 | paravirt_ops.iret = (void *)0xbadbab0; | 867 | paravirt_ops.iret = (void *)0xbadbab0; |
857 | 868 | ||
858 | #ifdef CONFIG_SMP | 869 | #ifdef CONFIG_SMP |
859 | paravirt_ops.startup_ipi_hook = vmi_startup_ipi_hook; | 870 | para_wrap(startup_ipi_hook, vmi_startup_ipi_hook, set_initial_ap_state, SetInitialAPState); |
860 | vmi_ops.set_initial_ap_state = vmi_get_function(VMI_CALL_SetInitialAPState); | ||
861 | #endif | 871 | #endif |
862 | 872 | ||
863 | #ifdef CONFIG_X86_LOCAL_APIC | 873 | #ifdef CONFIG_X86_LOCAL_APIC |
864 | paravirt_ops.apic_read = vmi_get_function(VMI_CALL_APICRead); | 874 | para_fill(apic_read, APICRead); |
865 | paravirt_ops.apic_write = vmi_get_function(VMI_CALL_APICWrite); | 875 | para_fill(apic_write, APICWrite); |
866 | paravirt_ops.apic_write_atomic = vmi_get_function(VMI_CALL_APICWrite); | 876 | para_fill(apic_write_atomic, APICWrite); |
867 | #endif | 877 | #endif |
868 | 878 | ||
869 | /* | 879 | /* |
870 | * Check for VMI timer functionality by probing for a cycle frequency method | 880 | * Check for VMI timer functionality by probing for a cycle frequency method |
871 | */ | 881 | */ |
872 | reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_GetCycleFrequency); | 882 | reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_GetCycleFrequency); |
873 | if (rel->type != VMI_RELOCATION_NONE) { | 883 | if (!disable_vmi_timer && rel->type != VMI_RELOCATION_NONE) { |
874 | vmi_timer_ops.get_cycle_frequency = (void *)rel->eip; | 884 | vmi_timer_ops.get_cycle_frequency = (void *)rel->eip; |
875 | vmi_timer_ops.get_cycle_counter = | 885 | vmi_timer_ops.get_cycle_counter = |
876 | vmi_get_function(VMI_CALL_GetCycleCounter); | 886 | vmi_get_function(VMI_CALL_GetCycleCounter); |
@@ -890,13 +900,19 @@ static inline int __init activate_vmi(void) | |||
890 | #endif | 900 | #endif |
891 | paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles; | 901 | paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles; |
892 | paravirt_ops.get_cpu_khz = vmi_cpu_khz; | 902 | paravirt_ops.get_cpu_khz = vmi_cpu_khz; |
903 | |||
904 | /* We have true wallclock functions; disable CMOS clock sync */ | ||
905 | no_sync_cmos_clock = 1; | ||
906 | } else { | ||
907 | disable_noidle = 1; | ||
908 | disable_vmi_timer = 1; | ||
893 | } | 909 | } |
894 | if (!disable_noidle) | 910 | |
911 | /* No idle HZ mode only works if VMI timer and no idle is enabled */ | ||
912 | if (disable_noidle || disable_vmi_timer) | ||
895 | para_fill(safe_halt, Halt); | 913 | para_fill(safe_halt, Halt); |
896 | else { | 914 | else |
897 | vmi_ops.halt = vmi_get_function(VMI_CALL_Halt); | 915 | para_wrap(safe_halt, vmi_safe_halt, halt, Halt); |
898 | paravirt_ops.safe_halt = vmi_safe_halt; | ||
899 | } | ||
900 | 916 | ||
901 | /* | 917 | /* |
902 | * Alternative instruction rewriting doesn't happen soon enough | 918 | * Alternative instruction rewriting doesn't happen soon enough |
@@ -932,10 +948,9 @@ void __init vmi_init(void) | |||
932 | activate_vmi(); | 948 | activate_vmi(); |
933 | 949 | ||
934 | #ifdef CONFIG_X86_IO_APIC | 950 | #ifdef CONFIG_X86_IO_APIC |
951 | /* This is virtual hardware; timer routing is wired correctly */ | ||
935 | no_timer_check = 1; | 952 | no_timer_check = 1; |
936 | #endif | 953 | #endif |
937 | no_sync_cmos_clock = 1; | ||
938 | |||
939 | local_irq_restore(flags & X86_EFLAGS_IF); | 954 | local_irq_restore(flags & X86_EFLAGS_IF); |
940 | } | 955 | } |
941 | 956 | ||
@@ -959,6 +974,9 @@ static int __init parse_vmi(char *arg) | |||
959 | } else if (!strcmp(arg, "disable_mtrr")) { | 974 | } else if (!strcmp(arg, "disable_mtrr")) { |
960 | clear_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability); | 975 | clear_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability); |
961 | disable_mtrr = 1; | 976 | disable_mtrr = 1; |
977 | } else if (!strcmp(arg, "disable_timer")) { | ||
978 | disable_vmi_timer = 1; | ||
979 | disable_noidle = 1; | ||
962 | } else if (!strcmp(arg, "disable_noidle")) | 980 | } else if (!strcmp(arg, "disable_noidle")) |
963 | disable_noidle = 1; | 981 | disable_noidle = 1; |
964 | return 0; | 982 | return 0; |