diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-21 20:16:21 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-21 20:16:21 -0400 |
commit | 98edb6ca4174f17a64890a02f44c211c8b44fb3c (patch) | |
tree | 033bc5f7da410046d28dd1cefcd2d63cda33d25b | |
parent | a8251096b427283c47e7d8f9568be6b388dd68ec (diff) | |
parent | 8fbf065d625617bbbf6b72d5f78f84ad13c8b547 (diff) |
Merge branch 'kvm-updates/2.6.35' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/2.6.35' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (269 commits)
KVM: x86: Add missing locking to arch specific vcpu ioctls
KVM: PPC: Add missing vcpu_load()/vcpu_put() in vcpu ioctls
KVM: MMU: Segregate shadow pages with different cr0.wp
KVM: x86: Check LMA bit before set_efer
KVM: Don't allow lmsw to clear cr0.pe
KVM: Add cpuid.txt file
KVM: x86: Tell the guest we'll warn it about tsc stability
x86, paravirt: don't compute pvclock adjustments if we trust the tsc
x86: KVM guest: Try using new kvm clock msrs
KVM: x86: export paravirtual cpuid flags in KVM_GET_SUPPORTED_CPUID
KVM: x86: add new KVMCLOCK cpuid feature
KVM: x86: change msr numbers for kvmclock
x86, paravirt: Add a global synchronization point for pvclock
x86, paravirt: Enable pvclock flags in vcpu_time_info structure
KVM: x86: Inject #GP with the right rip on efer writes
KVM: SVM: Don't allow nested guest to VMMCALL into host
KVM: x86: Fix exception reinjection forced to true
KVM: Fix wallclock version writing race
KVM: MMU: Don't read pdptrs with mmu spinlock held in mmu_alloc_roots
KVM: VMX: enable VMXON check with SMX enabled (Intel TXT)
...
81 files changed, 7826 insertions, 2811 deletions
diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index c6416a398163..a237518e51b9 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt | |||
@@ -656,6 +656,7 @@ struct kvm_clock_data { | |||
656 | 4.29 KVM_GET_VCPU_EVENTS | 656 | 4.29 KVM_GET_VCPU_EVENTS |
657 | 657 | ||
658 | Capability: KVM_CAP_VCPU_EVENTS | 658 | Capability: KVM_CAP_VCPU_EVENTS |
659 | Extended by: KVM_CAP_INTR_SHADOW | ||
659 | Architectures: x86 | 660 | Architectures: x86 |
660 | Type: vm ioctl | 661 | Type: vm ioctl |
661 | Parameters: struct kvm_vcpu_event (out) | 662 | Parameters: struct kvm_vcpu_event (out) |
@@ -676,7 +677,7 @@ struct kvm_vcpu_events { | |||
676 | __u8 injected; | 677 | __u8 injected; |
677 | __u8 nr; | 678 | __u8 nr; |
678 | __u8 soft; | 679 | __u8 soft; |
679 | __u8 pad; | 680 | __u8 shadow; |
680 | } interrupt; | 681 | } interrupt; |
681 | struct { | 682 | struct { |
682 | __u8 injected; | 683 | __u8 injected; |
@@ -688,9 +689,13 @@ struct kvm_vcpu_events { | |||
688 | __u32 flags; | 689 | __u32 flags; |
689 | }; | 690 | }; |
690 | 691 | ||
692 | KVM_VCPUEVENT_VALID_SHADOW may be set in the flags field to signal that | ||
693 | interrupt.shadow contains a valid state. Otherwise, this field is undefined. | ||
694 | |||
691 | 4.30 KVM_SET_VCPU_EVENTS | 695 | 4.30 KVM_SET_VCPU_EVENTS |
692 | 696 | ||
693 | Capability: KVM_CAP_VCPU_EVENTS | 697 | Capability: KVM_CAP_VCPU_EVENTS |
698 | Extended by: KVM_CAP_INTR_SHADOW | ||
694 | Architectures: x86 | 699 | Architectures: x86 |
695 | Type: vm ioctl | 700 | Type: vm ioctl |
696 | Parameters: struct kvm_vcpu_event (in) | 701 | Parameters: struct kvm_vcpu_event (in) |
@@ -709,6 +714,183 @@ current in-kernel state. The bits are: | |||
709 | KVM_VCPUEVENT_VALID_NMI_PENDING - transfer nmi.pending to the kernel | 714 | KVM_VCPUEVENT_VALID_NMI_PENDING - transfer nmi.pending to the kernel |
710 | KVM_VCPUEVENT_VALID_SIPI_VECTOR - transfer sipi_vector | 715 | KVM_VCPUEVENT_VALID_SIPI_VECTOR - transfer sipi_vector |
711 | 716 | ||
717 | If KVM_CAP_INTR_SHADOW is available, KVM_VCPUEVENT_VALID_SHADOW can be set in | ||
718 | the flags field to signal that interrupt.shadow contains a valid state and | ||
719 | shall be written into the VCPU. | ||
720 | |||
721 | 4.32 KVM_GET_DEBUGREGS | ||
722 | |||
723 | Capability: KVM_CAP_DEBUGREGS | ||
724 | Architectures: x86 | ||
725 | Type: vm ioctl | ||
726 | Parameters: struct kvm_debugregs (out) | ||
727 | Returns: 0 on success, -1 on error | ||
728 | |||
729 | Reads debug registers from the vcpu. | ||
730 | |||
731 | struct kvm_debugregs { | ||
732 | __u64 db[4]; | ||
733 | __u64 dr6; | ||
734 | __u64 dr7; | ||
735 | __u64 flags; | ||
736 | __u64 reserved[9]; | ||
737 | }; | ||
738 | |||
739 | 4.33 KVM_SET_DEBUGREGS | ||
740 | |||
741 | Capability: KVM_CAP_DEBUGREGS | ||
742 | Architectures: x86 | ||
743 | Type: vm ioctl | ||
744 | Parameters: struct kvm_debugregs (in) | ||
745 | Returns: 0 on success, -1 on error | ||
746 | |||
747 | Writes debug registers into the vcpu. | ||
748 | |||
749 | See KVM_GET_DEBUGREGS for the data structure. The flags field is unused | ||
750 | yet and must be cleared on entry. | ||
751 | |||
752 | 4.34 KVM_SET_USER_MEMORY_REGION | ||
753 | |||
754 | Capability: KVM_CAP_USER_MEM | ||
755 | Architectures: all | ||
756 | Type: vm ioctl | ||
757 | Parameters: struct kvm_userspace_memory_region (in) | ||
758 | Returns: 0 on success, -1 on error | ||
759 | |||
760 | struct kvm_userspace_memory_region { | ||
761 | __u32 slot; | ||
762 | __u32 flags; | ||
763 | __u64 guest_phys_addr; | ||
764 | __u64 memory_size; /* bytes */ | ||
765 | __u64 userspace_addr; /* start of the userspace allocated memory */ | ||
766 | }; | ||
767 | |||
768 | /* for kvm_memory_region::flags */ | ||
769 | #define KVM_MEM_LOG_DIRTY_PAGES 1UL | ||
770 | |||
771 | This ioctl allows the user to create or modify a guest physical memory | ||
772 | slot. When changing an existing slot, it may be moved in the guest | ||
773 | physical memory space, or its flags may be modified. It may not be | ||
774 | resized. Slots may not overlap in guest physical address space. | ||
775 | |||
776 | Memory for the region is taken starting at the address denoted by the | ||
777 | field userspace_addr, which must point at user addressable memory for | ||
778 | the entire memory slot size. Any object may back this memory, including | ||
779 | anonymous memory, ordinary files, and hugetlbfs. | ||
780 | |||
781 | It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr | ||
782 | be identical. This allows large pages in the guest to be backed by large | ||
783 | pages in the host. | ||
784 | |||
785 | The flags field supports just one flag, KVM_MEM_LOG_DIRTY_PAGES, which | ||
786 | instructs kvm to keep track of writes to memory within the slot. See | ||
787 | the KVM_GET_DIRTY_LOG ioctl. | ||
788 | |||
789 | When the KVM_CAP_SYNC_MMU capability, changes in the backing of the memory | ||
790 | region are automatically reflected into the guest. For example, an mmap() | ||
791 | that affects the region will be made visible immediately. Another example | ||
792 | is madvise(MADV_DROP). | ||
793 | |||
794 | It is recommended to use this API instead of the KVM_SET_MEMORY_REGION ioctl. | ||
795 | The KVM_SET_MEMORY_REGION does not allow fine grained control over memory | ||
796 | allocation and is deprecated. | ||
797 | |||
798 | 4.35 KVM_SET_TSS_ADDR | ||
799 | |||
800 | Capability: KVM_CAP_SET_TSS_ADDR | ||
801 | Architectures: x86 | ||
802 | Type: vm ioctl | ||
803 | Parameters: unsigned long tss_address (in) | ||
804 | Returns: 0 on success, -1 on error | ||
805 | |||
806 | This ioctl defines the physical address of a three-page region in the guest | ||
807 | physical address space. The region must be within the first 4GB of the | ||
808 | guest physical address space and must not conflict with any memory slot | ||
809 | or any mmio address. The guest may malfunction if it accesses this memory | ||
810 | region. | ||
811 | |||
812 | This ioctl is required on Intel-based hosts. This is needed on Intel hardware | ||
813 | because of a quirk in the virtualization implementation (see the internals | ||
814 | documentation when it pops into existence). | ||
815 | |||
816 | 4.36 KVM_ENABLE_CAP | ||
817 | |||
818 | Capability: KVM_CAP_ENABLE_CAP | ||
819 | Architectures: ppc | ||
820 | Type: vcpu ioctl | ||
821 | Parameters: struct kvm_enable_cap (in) | ||
822 | Returns: 0 on success; -1 on error | ||
823 | |||
824 | +Not all extensions are enabled by default. Using this ioctl the application | ||
825 | can enable an extension, making it available to the guest. | ||
826 | |||
827 | On systems that do not support this ioctl, it always fails. On systems that | ||
828 | do support it, it only works for extensions that are supported for enablement. | ||
829 | |||
830 | To check if a capability can be enabled, the KVM_CHECK_EXTENSION ioctl should | ||
831 | be used. | ||
832 | |||
833 | struct kvm_enable_cap { | ||
834 | /* in */ | ||
835 | __u32 cap; | ||
836 | |||
837 | The capability that is supposed to get enabled. | ||
838 | |||
839 | __u32 flags; | ||
840 | |||
841 | A bitfield indicating future enhancements. Has to be 0 for now. | ||
842 | |||
843 | __u64 args[4]; | ||
844 | |||
845 | Arguments for enabling a feature. If a feature needs initial values to | ||
846 | function properly, this is the place to put them. | ||
847 | |||
848 | __u8 pad[64]; | ||
849 | }; | ||
850 | |||
851 | 4.37 KVM_GET_MP_STATE | ||
852 | |||
853 | Capability: KVM_CAP_MP_STATE | ||
854 | Architectures: x86, ia64 | ||
855 | Type: vcpu ioctl | ||
856 | Parameters: struct kvm_mp_state (out) | ||
857 | Returns: 0 on success; -1 on error | ||
858 | |||
859 | struct kvm_mp_state { | ||
860 | __u32 mp_state; | ||
861 | }; | ||
862 | |||
863 | Returns the vcpu's current "multiprocessing state" (though also valid on | ||
864 | uniprocessor guests). | ||
865 | |||
866 | Possible values are: | ||
867 | |||
868 | - KVM_MP_STATE_RUNNABLE: the vcpu is currently running | ||
869 | - KVM_MP_STATE_UNINITIALIZED: the vcpu is an application processor (AP) | ||
870 | which has not yet received an INIT signal | ||
871 | - KVM_MP_STATE_INIT_RECEIVED: the vcpu has received an INIT signal, and is | ||
872 | now ready for a SIPI | ||
873 | - KVM_MP_STATE_HALTED: the vcpu has executed a HLT instruction and | ||
874 | is waiting for an interrupt | ||
875 | - KVM_MP_STATE_SIPI_RECEIVED: the vcpu has just received a SIPI (vector | ||
876 | accesible via KVM_GET_VCPU_EVENTS) | ||
877 | |||
878 | This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel | ||
879 | irqchip, the multiprocessing state must be maintained by userspace. | ||
880 | |||
881 | 4.38 KVM_SET_MP_STATE | ||
882 | |||
883 | Capability: KVM_CAP_MP_STATE | ||
884 | Architectures: x86, ia64 | ||
885 | Type: vcpu ioctl | ||
886 | Parameters: struct kvm_mp_state (in) | ||
887 | Returns: 0 on success; -1 on error | ||
888 | |||
889 | Sets the vcpu's current "multiprocessing state"; see KVM_GET_MP_STATE for | ||
890 | arguments. | ||
891 | |||
892 | This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel | ||
893 | irqchip, the multiprocessing state must be maintained by userspace. | ||
712 | 894 | ||
713 | 5. The kvm_run structure | 895 | 5. The kvm_run structure |
714 | 896 | ||
@@ -820,6 +1002,13 @@ executed a memory-mapped I/O instruction which could not be satisfied | |||
820 | by kvm. The 'data' member contains the written data if 'is_write' is | 1002 | by kvm. The 'data' member contains the written data if 'is_write' is |
821 | true, and should be filled by application code otherwise. | 1003 | true, and should be filled by application code otherwise. |
822 | 1004 | ||
1005 | NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO and KVM_EXIT_OSI, the corresponding | ||
1006 | operations are complete (and guest state is consistent) only after userspace | ||
1007 | has re-entered the kernel with KVM_RUN. The kernel side will first finish | ||
1008 | incomplete operations and then check for pending signals. Userspace | ||
1009 | can re-enter the guest with an unmasked signal pending to complete | ||
1010 | pending operations. | ||
1011 | |||
823 | /* KVM_EXIT_HYPERCALL */ | 1012 | /* KVM_EXIT_HYPERCALL */ |
824 | struct { | 1013 | struct { |
825 | __u64 nr; | 1014 | __u64 nr; |
@@ -829,7 +1018,9 @@ true, and should be filled by application code otherwise. | |||
829 | __u32 pad; | 1018 | __u32 pad; |
830 | } hypercall; | 1019 | } hypercall; |
831 | 1020 | ||
832 | Unused. | 1021 | Unused. This was once used for 'hypercall to userspace'. To implement |
1022 | such functionality, use KVM_EXIT_IO (x86) or KVM_EXIT_MMIO (all except s390). | ||
1023 | Note KVM_EXIT_IO is significantly faster than KVM_EXIT_MMIO. | ||
833 | 1024 | ||
834 | /* KVM_EXIT_TPR_ACCESS */ | 1025 | /* KVM_EXIT_TPR_ACCESS */ |
835 | struct { | 1026 | struct { |
@@ -870,6 +1061,19 @@ s390 specific. | |||
870 | 1061 | ||
871 | powerpc specific. | 1062 | powerpc specific. |
872 | 1063 | ||
1064 | /* KVM_EXIT_OSI */ | ||
1065 | struct { | ||
1066 | __u64 gprs[32]; | ||
1067 | } osi; | ||
1068 | |||
1069 | MOL uses a special hypercall interface it calls 'OSI'. To enable it, we catch | ||
1070 | hypercalls and exit with this exit struct that contains all the guest gprs. | ||
1071 | |||
1072 | If exit_reason is KVM_EXIT_OSI, then the vcpu has triggered such a hypercall. | ||
1073 | Userspace can now handle the hypercall and when it's done modify the gprs as | ||
1074 | necessary. Upon guest entry all guest GPRs will then be replaced by the values | ||
1075 | in this struct. | ||
1076 | |||
873 | /* Fix the size of the union. */ | 1077 | /* Fix the size of the union. */ |
874 | char padding[256]; | 1078 | char padding[256]; |
875 | }; | 1079 | }; |
diff --git a/Documentation/kvm/cpuid.txt b/Documentation/kvm/cpuid.txt new file mode 100644 index 000000000000..14a12ea92b7f --- /dev/null +++ b/Documentation/kvm/cpuid.txt | |||
@@ -0,0 +1,42 @@ | |||
1 | KVM CPUID bits | ||
2 | Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010 | ||
3 | ===================================================== | ||
4 | |||
5 | A guest running on a kvm host, can check some of its features using | ||
6 | cpuid. This is not always guaranteed to work, since userspace can | ||
7 | mask-out some, or even all KVM-related cpuid features before launching | ||
8 | a guest. | ||
9 | |||
10 | KVM cpuid functions are: | ||
11 | |||
12 | function: KVM_CPUID_SIGNATURE (0x40000000) | ||
13 | returns : eax = 0, | ||
14 | ebx = 0x4b4d564b, | ||
15 | ecx = 0x564b4d56, | ||
16 | edx = 0x4d. | ||
17 | Note that this value in ebx, ecx and edx corresponds to the string "KVMKVMKVM". | ||
18 | This function queries the presence of KVM cpuid leafs. | ||
19 | |||
20 | |||
21 | function: define KVM_CPUID_FEATURES (0x40000001) | ||
22 | returns : ebx, ecx, edx = 0 | ||
23 | eax = and OR'ed group of (1 << flag), where each flags is: | ||
24 | |||
25 | |||
26 | flag || value || meaning | ||
27 | ============================================================================= | ||
28 | KVM_FEATURE_CLOCKSOURCE || 0 || kvmclock available at msrs | ||
29 | || || 0x11 and 0x12. | ||
30 | ------------------------------------------------------------------------------ | ||
31 | KVM_FEATURE_NOP_IO_DELAY || 1 || not necessary to perform delays | ||
32 | || || on PIO operations. | ||
33 | ------------------------------------------------------------------------------ | ||
34 | KVM_FEATURE_MMU_OP || 2 || deprecated. | ||
35 | ------------------------------------------------------------------------------ | ||
36 | KVM_FEATURE_CLOCKSOURCE2 || 3 || kvmclock available at msrs | ||
37 | || || 0x4b564d00 and 0x4b564d01 | ||
38 | ------------------------------------------------------------------------------ | ||
39 | KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side | ||
40 | || || per-cpu warps are expected in | ||
41 | || || kvmclock. | ||
42 | ------------------------------------------------------------------------------ | ||
diff --git a/Documentation/kvm/mmu.txt b/Documentation/kvm/mmu.txt new file mode 100644 index 000000000000..aaed6ab9d7ab --- /dev/null +++ b/Documentation/kvm/mmu.txt | |||
@@ -0,0 +1,304 @@ | |||
1 | The x86 kvm shadow mmu | ||
2 | ====================== | ||
3 | |||
4 | The mmu (in arch/x86/kvm, files mmu.[ch] and paging_tmpl.h) is responsible | ||
5 | for presenting a standard x86 mmu to the guest, while translating guest | ||
6 | physical addresses to host physical addresses. | ||
7 | |||
8 | The mmu code attempts to satisfy the following requirements: | ||
9 | |||
10 | - correctness: the guest should not be able to determine that it is running | ||
11 | on an emulated mmu except for timing (we attempt to comply | ||
12 | with the specification, not emulate the characteristics of | ||
13 | a particular implementation such as tlb size) | ||
14 | - security: the guest must not be able to touch host memory not assigned | ||
15 | to it | ||
16 | - performance: minimize the performance penalty imposed by the mmu | ||
17 | - scaling: need to scale to large memory and large vcpu guests | ||
18 | - hardware: support the full range of x86 virtualization hardware | ||
19 | - integration: Linux memory management code must be in control of guest memory | ||
20 | so that swapping, page migration, page merging, transparent | ||
21 | hugepages, and similar features work without change | ||
22 | - dirty tracking: report writes to guest memory to enable live migration | ||
23 | and framebuffer-based displays | ||
24 | - footprint: keep the amount of pinned kernel memory low (most memory | ||
25 | should be shrinkable) | ||
26 | - reliablity: avoid multipage or GFP_ATOMIC allocations | ||
27 | |||
28 | Acronyms | ||
29 | ======== | ||
30 | |||
31 | pfn host page frame number | ||
32 | hpa host physical address | ||
33 | hva host virtual address | ||
34 | gfn guest frame number | ||
35 | gpa guest physical address | ||
36 | gva guest virtual address | ||
37 | ngpa nested guest physical address | ||
38 | ngva nested guest virtual address | ||
39 | pte page table entry (used also to refer generically to paging structure | ||
40 | entries) | ||
41 | gpte guest pte (referring to gfns) | ||
42 | spte shadow pte (referring to pfns) | ||
43 | tdp two dimensional paging (vendor neutral term for NPT and EPT) | ||
44 | |||
45 | Virtual and real hardware supported | ||
46 | =================================== | ||
47 | |||
48 | The mmu supports first-generation mmu hardware, which allows an atomic switch | ||
49 | of the current paging mode and cr3 during guest entry, as well as | ||
50 | two-dimensional paging (AMD's NPT and Intel's EPT). The emulated hardware | ||
51 | it exposes is the traditional 2/3/4 level x86 mmu, with support for global | ||
52 | pages, pae, pse, pse36, cr0.wp, and 1GB pages. Work is in progress to support | ||
53 | exposing NPT capable hardware on NPT capable hosts. | ||
54 | |||
55 | Translation | ||
56 | =========== | ||
57 | |||
58 | The primary job of the mmu is to program the processor's mmu to translate | ||
59 | addresses for the guest. Different translations are required at different | ||
60 | times: | ||
61 | |||
62 | - when guest paging is disabled, we translate guest physical addresses to | ||
63 | host physical addresses (gpa->hpa) | ||
64 | - when guest paging is enabled, we translate guest virtual addresses, to | ||
65 | guest physical addresses, to host physical addresses (gva->gpa->hpa) | ||
66 | - when the guest launches a guest of its own, we translate nested guest | ||
67 | virtual addresses, to nested guest physical addresses, to guest physical | ||
68 | addresses, to host physical addresses (ngva->ngpa->gpa->hpa) | ||
69 | |||
70 | The primary challenge is to encode between 1 and 3 translations into hardware | ||
71 | that support only 1 (traditional) and 2 (tdp) translations. When the | ||
72 | number of required translations matches the hardware, the mmu operates in | ||
73 | direct mode; otherwise it operates in shadow mode (see below). | ||
74 | |||
75 | Memory | ||
76 | ====== | ||
77 | |||
78 | Guest memory (gpa) is part of the user address space of the process that is | ||
79 | using kvm. Userspace defines the translation between guest addresses and user | ||
80 | addresses (gpa->hva); note that two gpas may alias to the same gva, but not | ||
81 | vice versa. | ||
82 | |||
83 | These gvas may be backed using any method available to the host: anonymous | ||
84 | memory, file backed memory, and device memory. Memory might be paged by the | ||
85 | host at any time. | ||
86 | |||
87 | Events | ||
88 | ====== | ||
89 | |||
90 | The mmu is driven by events, some from the guest, some from the host. | ||
91 | |||
92 | Guest generated events: | ||
93 | - writes to control registers (especially cr3) | ||
94 | - invlpg/invlpga instruction execution | ||
95 | - access to missing or protected translations | ||
96 | |||
97 | Host generated events: | ||
98 | - changes in the gpa->hpa translation (either through gpa->hva changes or | ||
99 | through hva->hpa changes) | ||
100 | - memory pressure (the shrinker) | ||
101 | |||
102 | Shadow pages | ||
103 | ============ | ||
104 | |||
105 | The principal data structure is the shadow page, 'struct kvm_mmu_page'. A | ||
106 | shadow page contains 512 sptes, which can be either leaf or nonleaf sptes. A | ||
107 | shadow page may contain a mix of leaf and nonleaf sptes. | ||
108 | |||
109 | A nonleaf spte allows the hardware mmu to reach the leaf pages and | ||
110 | is not related to a translation directly. It points to other shadow pages. | ||
111 | |||
112 | A leaf spte corresponds to either one or two translations encoded into | ||
113 | one paging structure entry. These are always the lowest level of the | ||
114 | translation stack, with optional higher level translations left to NPT/EPT. | ||
115 | Leaf ptes point at guest pages. | ||
116 | |||
117 | The following table shows translations encoded by leaf ptes, with higher-level | ||
118 | translations in parentheses: | ||
119 | |||
120 | Non-nested guests: | ||
121 | nonpaging: gpa->hpa | ||
122 | paging: gva->gpa->hpa | ||
123 | paging, tdp: (gva->)gpa->hpa | ||
124 | Nested guests: | ||
125 | non-tdp: ngva->gpa->hpa (*) | ||
126 | tdp: (ngva->)ngpa->gpa->hpa | ||
127 | |||
128 | (*) the guest hypervisor will encode the ngva->gpa translation into its page | ||
129 | tables if npt is not present | ||
130 | |||
131 | Shadow pages contain the following information: | ||
132 | role.level: | ||
133 | The level in the shadow paging hierarchy that this shadow page belongs to. | ||
134 | 1=4k sptes, 2=2M sptes, 3=1G sptes, etc. | ||
135 | role.direct: | ||
136 | If set, leaf sptes reachable from this page are for a linear range. | ||
137 | Examples include real mode translation, large guest pages backed by small | ||
138 | host pages, and gpa->hpa translations when NPT or EPT is active. | ||
139 | The linear range starts at (gfn << PAGE_SHIFT) and its size is determined | ||
140 | by role.level (2MB for first level, 1GB for second level, 0.5TB for third | ||
141 | level, 256TB for fourth level) | ||
142 | If clear, this page corresponds to a guest page table denoted by the gfn | ||
143 | field. | ||
144 | role.quadrant: | ||
145 | When role.cr4_pae=0, the guest uses 32-bit gptes while the host uses 64-bit | ||
146 | sptes. That means a guest page table contains more ptes than the host, | ||
147 | so multiple shadow pages are needed to shadow one guest page. | ||
148 | For first-level shadow pages, role.quadrant can be 0 or 1 and denotes the | ||
149 | first or second 512-gpte block in the guest page table. For second-level | ||
150 | page tables, each 32-bit gpte is converted to two 64-bit sptes | ||
151 | (since each first-level guest page is shadowed by two first-level | ||
152 | shadow pages) so role.quadrant takes values in the range 0..3. Each | ||
153 | quadrant maps 1GB virtual address space. | ||
154 | role.access: | ||
155 | Inherited guest access permissions in the form uwx. Note execute | ||
156 | permission is positive, not negative. | ||
157 | role.invalid: | ||
158 | The page is invalid and should not be used. It is a root page that is | ||
159 | currently pinned (by a cpu hardware register pointing to it); once it is | ||
160 | unpinned it will be destroyed. | ||
161 | role.cr4_pae: | ||
162 | Contains the value of cr4.pae for which the page is valid (e.g. whether | ||
163 | 32-bit or 64-bit gptes are in use). | ||
164 | role.cr4_nxe: | ||
165 | Contains the value of efer.nxe for which the page is valid. | ||
166 | role.cr0_wp: | ||
167 | Contains the value of cr0.wp for which the page is valid. | ||
168 | gfn: | ||
169 | Either the guest page table containing the translations shadowed by this | ||
170 | page, or the base page frame for linear translations. See role.direct. | ||
171 | spt: | ||
172 | A pageful of 64-bit sptes containing the translations for this page. | ||
173 | Accessed by both kvm and hardware. | ||
174 | The page pointed to by spt will have its page->private pointing back | ||
175 | at the shadow page structure. | ||
176 | sptes in spt point either at guest pages, or at lower-level shadow pages. | ||
177 | Specifically, if sp1 and sp2 are shadow pages, then sp1->spt[n] may point | ||
178 | at __pa(sp2->spt). sp2 will point back at sp1 through parent_pte. | ||
179 | The spt array forms a DAG structure with the shadow page as a node, and | ||
180 | guest pages as leaves. | ||
181 | gfns: | ||
182 | An array of 512 guest frame numbers, one for each present pte. Used to | ||
183 | perform a reverse map from a pte to a gfn. | ||
184 | slot_bitmap: | ||
185 | A bitmap containing one bit per memory slot. If the page contains a pte | ||
186 | mapping a page from memory slot n, then bit n of slot_bitmap will be set | ||
187 | (if a page is aliased among several slots, then it is not guaranteed that | ||
188 | all slots will be marked). | ||
189 | Used during dirty logging to avoid scanning a shadow page if none if its | ||
190 | pages need tracking. | ||
191 | root_count: | ||
192 | A counter keeping track of how many hardware registers (guest cr3 or | ||
193 | pdptrs) are now pointing at the page. While this counter is nonzero, the | ||
194 | page cannot be destroyed. See role.invalid. | ||
195 | multimapped: | ||
196 | Whether there exist multiple sptes pointing at this page. | ||
197 | parent_pte/parent_ptes: | ||
198 | If multimapped is zero, parent_pte points at the single spte that points at | ||
199 | this page's spt. Otherwise, parent_ptes points at a data structure | ||
200 | with a list of parent_ptes. | ||
201 | unsync: | ||
202 | If true, then the translations in this page may not match the guest's | ||
203 | translation. This is equivalent to the state of the tlb when a pte is | ||
204 | changed but before the tlb entry is flushed. Accordingly, unsync ptes | ||
205 | are synchronized when the guest executes invlpg or flushes its tlb by | ||
206 | other means. Valid for leaf pages. | ||
207 | unsync_children: | ||
208 | How many sptes in the page point at pages that are unsync (or have | ||
209 | unsynchronized children). | ||
210 | unsync_child_bitmap: | ||
211 | A bitmap indicating which sptes in spt point (directly or indirectly) at | ||
212 | pages that may be unsynchronized. Used to quickly locate all unsychronized | ||
213 | pages reachable from a given page. | ||
214 | |||
215 | Reverse map | ||
216 | =========== | ||
217 | |||
218 | The mmu maintains a reverse mapping whereby all ptes mapping a page can be | ||
219 | reached given its gfn. This is used, for example, when swapping out a page. | ||
220 | |||
221 | Synchronized and unsynchronized pages | ||
222 | ===================================== | ||
223 | |||
224 | The guest uses two events to synchronize its tlb and page tables: tlb flushes | ||
225 | and page invalidations (invlpg). | ||
226 | |||
227 | A tlb flush means that we need to synchronize all sptes reachable from the | ||
228 | guest's cr3. This is expensive, so we keep all guest page tables write | ||
229 | protected, and synchronize sptes to gptes when a gpte is written. | ||
230 | |||
231 | A special case is when a guest page table is reachable from the current | ||
232 | guest cr3. In this case, the guest is obliged to issue an invlpg instruction | ||
233 | before using the translation. We take advantage of that by removing write | ||
234 | protection from the guest page, and allowing the guest to modify it freely. | ||
235 | We synchronize modified gptes when the guest invokes invlpg. This reduces | ||
236 | the amount of emulation we have to do when the guest modifies multiple gptes, | ||
237 | or when the a guest page is no longer used as a page table and is used for | ||
238 | random guest data. | ||
239 | |||
240 | As a side effect we have to resynchronize all reachable unsynchronized shadow | ||
241 | pages on a tlb flush. | ||
242 | |||
243 | |||
244 | Reaction to events | ||
245 | ================== | ||
246 | |||
247 | - guest page fault (or npt page fault, or ept violation) | ||
248 | |||
249 | This is the most complicated event. The cause of a page fault can be: | ||
250 | |||
251 | - a true guest fault (the guest translation won't allow the access) (*) | ||
252 | - access to a missing translation | ||
253 | - access to a protected translation | ||
254 | - when logging dirty pages, memory is write protected | ||
255 | - synchronized shadow pages are write protected (*) | ||
256 | - access to untranslatable memory (mmio) | ||
257 | |||
258 | (*) not applicable in direct mode | ||
259 | |||
260 | Handling a page fault is performed as follows: | ||
261 | |||
262 | - if needed, walk the guest page tables to determine the guest translation | ||
263 | (gva->gpa or ngpa->gpa) | ||
264 | - if permissions are insufficient, reflect the fault back to the guest | ||
265 | - determine the host page | ||
266 | - if this is an mmio request, there is no host page; call the emulator | ||
267 | to emulate the instruction instead | ||
268 | - walk the shadow page table to find the spte for the translation, | ||
269 | instantiating missing intermediate page tables as necessary | ||
270 | - try to unsynchronize the page | ||
271 | - if successful, we can let the guest continue and modify the gpte | ||
272 | - emulate the instruction | ||
273 | - if failed, unshadow the page and let the guest continue | ||
274 | - update any translations that were modified by the instruction | ||
275 | |||
276 | invlpg handling: | ||
277 | |||
278 | - walk the shadow page hierarchy and drop affected translations | ||
279 | - try to reinstantiate the indicated translation in the hope that the | ||
280 | guest will use it in the near future | ||
281 | |||
282 | Guest control register updates: | ||
283 | |||
284 | - mov to cr3 | ||
285 | - look up new shadow roots | ||
286 | - synchronize newly reachable shadow pages | ||
287 | |||
288 | - mov to cr0/cr4/efer | ||
289 | - set up mmu context for new paging mode | ||
290 | - look up new shadow roots | ||
291 | - synchronize newly reachable shadow pages | ||
292 | |||
293 | Host translation updates: | ||
294 | |||
295 | - mmu notifier called with updated hva | ||
296 | - look up affected sptes through reverse map | ||
297 | - drop (or update) translations | ||
298 | |||
299 | Further reading | ||
300 | =============== | ||
301 | |||
302 | - NPT presentation from KVM Forum 2008 | ||
303 | http://www.linux-kvm.org/wiki/images/c/c8/KvmForum2008%24kdf2008_21.pdf | ||
304 | |||
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 7f3c0a2e60cd..d5f4e9161201 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c | |||
@@ -979,11 +979,13 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
979 | r = -EFAULT; | 979 | r = -EFAULT; |
980 | if (copy_from_user(&irq_event, argp, sizeof irq_event)) | 980 | if (copy_from_user(&irq_event, argp, sizeof irq_event)) |
981 | goto out; | 981 | goto out; |
982 | r = -ENXIO; | ||
982 | if (irqchip_in_kernel(kvm)) { | 983 | if (irqchip_in_kernel(kvm)) { |
983 | __s32 status; | 984 | __s32 status; |
984 | status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, | 985 | status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, |
985 | irq_event.irq, irq_event.level); | 986 | irq_event.irq, irq_event.level); |
986 | if (ioctl == KVM_IRQ_LINE_STATUS) { | 987 | if (ioctl == KVM_IRQ_LINE_STATUS) { |
988 | r = -EFAULT; | ||
987 | irq_event.status = status; | 989 | irq_event.status = status; |
988 | if (copy_to_user(argp, &irq_event, | 990 | if (copy_to_user(argp, &irq_event, |
989 | sizeof irq_event)) | 991 | sizeof irq_event)) |
@@ -1379,7 +1381,7 @@ static void kvm_release_vm_pages(struct kvm *kvm) | |||
1379 | int i, j; | 1381 | int i, j; |
1380 | unsigned long base_gfn; | 1382 | unsigned long base_gfn; |
1381 | 1383 | ||
1382 | slots = rcu_dereference(kvm->memslots); | 1384 | slots = kvm_memslots(kvm); |
1383 | for (i = 0; i < slots->nmemslots; i++) { | 1385 | for (i = 0; i < slots->nmemslots; i++) { |
1384 | memslot = &slots->memslots[i]; | 1386 | memslot = &slots->memslots[i]; |
1385 | base_gfn = memslot->base_gfn; | 1387 | base_gfn = memslot->base_gfn; |
@@ -1535,8 +1537,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
1535 | goto out; | 1537 | goto out; |
1536 | 1538 | ||
1537 | if (copy_to_user(user_stack, stack, | 1539 | if (copy_to_user(user_stack, stack, |
1538 | sizeof(struct kvm_ia64_vcpu_stack))) | 1540 | sizeof(struct kvm_ia64_vcpu_stack))) { |
1541 | r = -EFAULT; | ||
1539 | goto out; | 1542 | goto out; |
1543 | } | ||
1540 | 1544 | ||
1541 | break; | 1545 | break; |
1542 | } | 1546 | } |
diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c index 7a62f75778c5..f0b9cac82414 100644 --- a/arch/ia64/kvm/vmm.c +++ b/arch/ia64/kvm/vmm.c | |||
@@ -51,7 +51,7 @@ static int __init kvm_vmm_init(void) | |||
51 | vmm_fpswa_interface = fpswa_interface; | 51 | vmm_fpswa_interface = fpswa_interface; |
52 | 52 | ||
53 | /*Register vmm data to kvm side*/ | 53 | /*Register vmm data to kvm side*/ |
54 | return kvm_init(&vmm_info, 1024, THIS_MODULE); | 54 | return kvm_init(&vmm_info, 1024, 0, THIS_MODULE); |
55 | } | 55 | } |
56 | 56 | ||
57 | static void __exit kvm_vmm_exit(void) | 57 | static void __exit kvm_vmm_exit(void) |
diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h index a9b91ed3d4b9..2048a6aeea91 100644 --- a/arch/powerpc/include/asm/asm-compat.h +++ b/arch/powerpc/include/asm/asm-compat.h | |||
@@ -21,6 +21,7 @@ | |||
21 | /* operations for longs and pointers */ | 21 | /* operations for longs and pointers */ |
22 | #define PPC_LL stringify_in_c(ld) | 22 | #define PPC_LL stringify_in_c(ld) |
23 | #define PPC_STL stringify_in_c(std) | 23 | #define PPC_STL stringify_in_c(std) |
24 | #define PPC_STLU stringify_in_c(stdu) | ||
24 | #define PPC_LCMPI stringify_in_c(cmpdi) | 25 | #define PPC_LCMPI stringify_in_c(cmpdi) |
25 | #define PPC_LONG stringify_in_c(.llong) | 26 | #define PPC_LONG stringify_in_c(.llong) |
26 | #define PPC_LONG_ALIGN stringify_in_c(.balign 8) | 27 | #define PPC_LONG_ALIGN stringify_in_c(.balign 8) |
@@ -44,6 +45,7 @@ | |||
44 | /* operations for longs and pointers */ | 45 | /* operations for longs and pointers */ |
45 | #define PPC_LL stringify_in_c(lwz) | 46 | #define PPC_LL stringify_in_c(lwz) |
46 | #define PPC_STL stringify_in_c(stw) | 47 | #define PPC_STL stringify_in_c(stw) |
48 | #define PPC_STLU stringify_in_c(stwu) | ||
47 | #define PPC_LCMPI stringify_in_c(cmpwi) | 49 | #define PPC_LCMPI stringify_in_c(cmpwi) |
48 | #define PPC_LONG stringify_in_c(.long) | 50 | #define PPC_LONG stringify_in_c(.long) |
49 | #define PPC_LONG_ALIGN stringify_in_c(.balign 4) | 51 | #define PPC_LONG_ALIGN stringify_in_c(.balign 4) |
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 81f3b0b5601e..6c5547d82bbe 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h | |||
@@ -77,4 +77,14 @@ struct kvm_debug_exit_arch { | |||
77 | struct kvm_guest_debug_arch { | 77 | struct kvm_guest_debug_arch { |
78 | }; | 78 | }; |
79 | 79 | ||
80 | #define KVM_REG_MASK 0x001f | ||
81 | #define KVM_REG_EXT_MASK 0xffe0 | ||
82 | #define KVM_REG_GPR 0x0000 | ||
83 | #define KVM_REG_FPR 0x0020 | ||
84 | #define KVM_REG_QPR 0x0040 | ||
85 | #define KVM_REG_FQPR 0x0060 | ||
86 | |||
87 | #define KVM_INTERRUPT_SET -1U | ||
88 | #define KVM_INTERRUPT_UNSET -2U | ||
89 | |||
80 | #endif /* __LINUX_KVM_POWERPC_H */ | 90 | #endif /* __LINUX_KVM_POWERPC_H */ |
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index aadf2dd6f84e..c5ea4cda34b3 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h | |||
@@ -88,6 +88,8 @@ | |||
88 | 88 | ||
89 | #define BOOK3S_HFLAG_DCBZ32 0x1 | 89 | #define BOOK3S_HFLAG_DCBZ32 0x1 |
90 | #define BOOK3S_HFLAG_SLB 0x2 | 90 | #define BOOK3S_HFLAG_SLB 0x2 |
91 | #define BOOK3S_HFLAG_PAIRED_SINGLE 0x4 | ||
92 | #define BOOK3S_HFLAG_NATIVE_PS 0x8 | ||
91 | 93 | ||
92 | #define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */ | 94 | #define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */ |
93 | #define RESUME_FLAG_HOST (1<<1) /* Resume host? */ | 95 | #define RESUME_FLAG_HOST (1<<1) /* Resume host? */ |
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index db7db0a96967..6f74d93725a0 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h | |||
@@ -22,46 +22,47 @@ | |||
22 | 22 | ||
23 | #include <linux/types.h> | 23 | #include <linux/types.h> |
24 | #include <linux/kvm_host.h> | 24 | #include <linux/kvm_host.h> |
25 | #include <asm/kvm_book3s_64_asm.h> | 25 | #include <asm/kvm_book3s_asm.h> |
26 | 26 | ||
27 | struct kvmppc_slb { | 27 | struct kvmppc_slb { |
28 | u64 esid; | 28 | u64 esid; |
29 | u64 vsid; | 29 | u64 vsid; |
30 | u64 orige; | 30 | u64 orige; |
31 | u64 origv; | 31 | u64 origv; |
32 | bool valid; | 32 | bool valid : 1; |
33 | bool Ks; | 33 | bool Ks : 1; |
34 | bool Kp; | 34 | bool Kp : 1; |
35 | bool nx; | 35 | bool nx : 1; |
36 | bool large; /* PTEs are 16MB */ | 36 | bool large : 1; /* PTEs are 16MB */ |
37 | bool tb; /* 1TB segment */ | 37 | bool tb : 1; /* 1TB segment */ |
38 | bool class; | 38 | bool class : 1; |
39 | }; | 39 | }; |
40 | 40 | ||
41 | struct kvmppc_sr { | 41 | struct kvmppc_sr { |
42 | u32 raw; | 42 | u32 raw; |
43 | u32 vsid; | 43 | u32 vsid; |
44 | bool Ks; | 44 | bool Ks : 1; |
45 | bool Kp; | 45 | bool Kp : 1; |
46 | bool nx; | 46 | bool nx : 1; |
47 | bool valid : 1; | ||
47 | }; | 48 | }; |
48 | 49 | ||
49 | struct kvmppc_bat { | 50 | struct kvmppc_bat { |
50 | u64 raw; | 51 | u64 raw; |
51 | u32 bepi; | 52 | u32 bepi; |
52 | u32 bepi_mask; | 53 | u32 bepi_mask; |
53 | bool vs; | ||
54 | bool vp; | ||
55 | u32 brpn; | 54 | u32 brpn; |
56 | u8 wimg; | 55 | u8 wimg; |
57 | u8 pp; | 56 | u8 pp; |
57 | bool vs : 1; | ||
58 | bool vp : 1; | ||
58 | }; | 59 | }; |
59 | 60 | ||
60 | struct kvmppc_sid_map { | 61 | struct kvmppc_sid_map { |
61 | u64 guest_vsid; | 62 | u64 guest_vsid; |
62 | u64 guest_esid; | 63 | u64 guest_esid; |
63 | u64 host_vsid; | 64 | u64 host_vsid; |
64 | bool valid; | 65 | bool valid : 1; |
65 | }; | 66 | }; |
66 | 67 | ||
67 | #define SID_MAP_BITS 9 | 68 | #define SID_MAP_BITS 9 |
@@ -70,7 +71,7 @@ struct kvmppc_sid_map { | |||
70 | 71 | ||
71 | struct kvmppc_vcpu_book3s { | 72 | struct kvmppc_vcpu_book3s { |
72 | struct kvm_vcpu vcpu; | 73 | struct kvm_vcpu vcpu; |
73 | struct kvmppc_book3s_shadow_vcpu shadow_vcpu; | 74 | struct kvmppc_book3s_shadow_vcpu *shadow_vcpu; |
74 | struct kvmppc_sid_map sid_map[SID_MAP_NUM]; | 75 | struct kvmppc_sid_map sid_map[SID_MAP_NUM]; |
75 | struct kvmppc_slb slb[64]; | 76 | struct kvmppc_slb slb[64]; |
76 | struct { | 77 | struct { |
@@ -82,9 +83,10 @@ struct kvmppc_vcpu_book3s { | |||
82 | struct kvmppc_bat ibat[8]; | 83 | struct kvmppc_bat ibat[8]; |
83 | struct kvmppc_bat dbat[8]; | 84 | struct kvmppc_bat dbat[8]; |
84 | u64 hid[6]; | 85 | u64 hid[6]; |
86 | u64 gqr[8]; | ||
85 | int slb_nr; | 87 | int slb_nr; |
88 | u32 dsisr; | ||
86 | u64 sdr1; | 89 | u64 sdr1; |
87 | u64 dsisr; | ||
88 | u64 hior; | 90 | u64 hior; |
89 | u64 msr_mask; | 91 | u64 msr_mask; |
90 | u64 vsid_first; | 92 | u64 vsid_first; |
@@ -98,15 +100,15 @@ struct kvmppc_vcpu_book3s { | |||
98 | #define CONTEXT_GUEST 1 | 100 | #define CONTEXT_GUEST 1 |
99 | #define CONTEXT_GUEST_END 2 | 101 | #define CONTEXT_GUEST_END 2 |
100 | 102 | ||
101 | #define VSID_REAL 0xfffffffffff00000 | 103 | #define VSID_REAL 0x1fffffffffc00000ULL |
102 | #define VSID_REAL_DR 0xffffffffffe00000 | 104 | #define VSID_BAT 0x1fffffffffb00000ULL |
103 | #define VSID_REAL_IR 0xffffffffffd00000 | 105 | #define VSID_REAL_DR 0x2000000000000000ULL |
104 | #define VSID_BAT 0xffffffffffc00000 | 106 | #define VSID_REAL_IR 0x4000000000000000ULL |
105 | #define VSID_PR 0x8000000000000000 | 107 | #define VSID_PR 0x8000000000000000ULL |
106 | 108 | ||
107 | extern void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, u64 ea, u64 ea_mask); | 109 | extern void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong ea, ulong ea_mask); |
108 | extern void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 vp, u64 vp_mask); | 110 | extern void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 vp, u64 vp_mask); |
109 | extern void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, u64 pa_start, u64 pa_end); | 111 | extern void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end); |
110 | extern void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 new_msr); | 112 | extern void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 new_msr); |
111 | extern void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu); | 113 | extern void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu); |
112 | extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu); | 114 | extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu); |
@@ -114,11 +116,13 @@ extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte); | |||
114 | extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr); | 116 | extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr); |
115 | extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); | 117 | extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); |
116 | extern struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool data); | 118 | extern struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool data); |
117 | extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr, bool data); | 119 | extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); |
118 | extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr); | 120 | extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); |
119 | extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); | 121 | extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); |
120 | extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, | 122 | extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, |
121 | bool upper, u32 val); | 123 | bool upper, u32 val); |
124 | extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); | ||
125 | extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); | ||
122 | 126 | ||
123 | extern u32 kvmppc_trampoline_lowmem; | 127 | extern u32 kvmppc_trampoline_lowmem; |
124 | extern u32 kvmppc_trampoline_enter; | 128 | extern u32 kvmppc_trampoline_enter; |
@@ -126,6 +130,8 @@ extern void kvmppc_rmcall(ulong srr0, ulong srr1); | |||
126 | extern void kvmppc_load_up_fpu(void); | 130 | extern void kvmppc_load_up_fpu(void); |
127 | extern void kvmppc_load_up_altivec(void); | 131 | extern void kvmppc_load_up_altivec(void); |
128 | extern void kvmppc_load_up_vsx(void); | 132 | extern void kvmppc_load_up_vsx(void); |
133 | extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst); | ||
134 | extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst); | ||
129 | 135 | ||
130 | static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) | 136 | static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) |
131 | { | 137 | { |
@@ -140,7 +146,108 @@ static inline ulong dsisr(void) | |||
140 | } | 146 | } |
141 | 147 | ||
142 | extern void kvm_return_point(void); | 148 | extern void kvm_return_point(void); |
149 | static inline struct kvmppc_book3s_shadow_vcpu *to_svcpu(struct kvm_vcpu *vcpu); | ||
150 | |||
151 | static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) | ||
152 | { | ||
153 | if ( num < 14 ) { | ||
154 | to_svcpu(vcpu)->gpr[num] = val; | ||
155 | to_book3s(vcpu)->shadow_vcpu->gpr[num] = val; | ||
156 | } else | ||
157 | vcpu->arch.gpr[num] = val; | ||
158 | } | ||
159 | |||
160 | static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) | ||
161 | { | ||
162 | if ( num < 14 ) | ||
163 | return to_svcpu(vcpu)->gpr[num]; | ||
164 | else | ||
165 | return vcpu->arch.gpr[num]; | ||
166 | } | ||
167 | |||
168 | static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) | ||
169 | { | ||
170 | to_svcpu(vcpu)->cr = val; | ||
171 | to_book3s(vcpu)->shadow_vcpu->cr = val; | ||
172 | } | ||
173 | |||
174 | static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) | ||
175 | { | ||
176 | return to_svcpu(vcpu)->cr; | ||
177 | } | ||
178 | |||
179 | static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val) | ||
180 | { | ||
181 | to_svcpu(vcpu)->xer = val; | ||
182 | to_book3s(vcpu)->shadow_vcpu->xer = val; | ||
183 | } | ||
184 | |||
185 | static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) | ||
186 | { | ||
187 | return to_svcpu(vcpu)->xer; | ||
188 | } | ||
189 | |||
190 | static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val) | ||
191 | { | ||
192 | to_svcpu(vcpu)->ctr = val; | ||
193 | } | ||
194 | |||
195 | static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu) | ||
196 | { | ||
197 | return to_svcpu(vcpu)->ctr; | ||
198 | } | ||
199 | |||
200 | static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val) | ||
201 | { | ||
202 | to_svcpu(vcpu)->lr = val; | ||
203 | } | ||
204 | |||
205 | static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu) | ||
206 | { | ||
207 | return to_svcpu(vcpu)->lr; | ||
208 | } | ||
209 | |||
210 | static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val) | ||
211 | { | ||
212 | to_svcpu(vcpu)->pc = val; | ||
213 | } | ||
214 | |||
215 | static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu) | ||
216 | { | ||
217 | return to_svcpu(vcpu)->pc; | ||
218 | } | ||
219 | |||
220 | static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) | ||
221 | { | ||
222 | ulong pc = kvmppc_get_pc(vcpu); | ||
223 | struct kvmppc_book3s_shadow_vcpu *svcpu = to_svcpu(vcpu); | ||
224 | |||
225 | /* Load the instruction manually if it failed to do so in the | ||
226 | * exit path */ | ||
227 | if (svcpu->last_inst == KVM_INST_FETCH_FAILED) | ||
228 | kvmppc_ld(vcpu, &pc, sizeof(u32), &svcpu->last_inst, false); | ||
229 | |||
230 | return svcpu->last_inst; | ||
231 | } | ||
232 | |||
233 | static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) | ||
234 | { | ||
235 | return to_svcpu(vcpu)->fault_dar; | ||
236 | } | ||
237 | |||
238 | /* Magic register values loaded into r3 and r4 before the 'sc' assembly | ||
239 | * instruction for the OSI hypercalls */ | ||
240 | #define OSI_SC_MAGIC_R3 0x113724FA | ||
241 | #define OSI_SC_MAGIC_R4 0x77810F9B | ||
143 | 242 | ||
144 | #define INS_DCBZ 0x7c0007ec | 243 | #define INS_DCBZ 0x7c0007ec |
145 | 244 | ||
245 | /* Also add subarch specific defines */ | ||
246 | |||
247 | #ifdef CONFIG_PPC_BOOK3S_32 | ||
248 | #include <asm/kvm_book3s_32.h> | ||
249 | #else | ||
250 | #include <asm/kvm_book3s_64.h> | ||
251 | #endif | ||
252 | |||
146 | #endif /* __ASM_KVM_BOOK3S_H__ */ | 253 | #endif /* __ASM_KVM_BOOK3S_H__ */ |
diff --git a/arch/powerpc/include/asm/kvm_book3s_32.h b/arch/powerpc/include/asm/kvm_book3s_32.h new file mode 100644 index 000000000000..de604db135f5 --- /dev/null +++ b/arch/powerpc/include/asm/kvm_book3s_32.h | |||
@@ -0,0 +1,42 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License, version 2, as | ||
4 | * published by the Free Software Foundation. | ||
5 | * | ||
6 | * This program is distributed in the hope that it will be useful, | ||
7 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
8 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
9 | * GNU General Public License for more details. | ||
10 | * | ||
11 | * You should have received a copy of the GNU General Public License | ||
12 | * along with this program; if not, write to the Free Software | ||
13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
14 | * | ||
15 | * Copyright SUSE Linux Products GmbH 2010 | ||
16 | * | ||
17 | * Authors: Alexander Graf <agraf@suse.de> | ||
18 | */ | ||
19 | |||
20 | #ifndef __ASM_KVM_BOOK3S_32_H__ | ||
21 | #define __ASM_KVM_BOOK3S_32_H__ | ||
22 | |||
23 | static inline struct kvmppc_book3s_shadow_vcpu *to_svcpu(struct kvm_vcpu *vcpu) | ||
24 | { | ||
25 | return to_book3s(vcpu)->shadow_vcpu; | ||
26 | } | ||
27 | |||
28 | #define PTE_SIZE 12 | ||
29 | #define VSID_ALL 0 | ||
30 | #define SR_INVALID 0x00000001 /* VSID 1 should always be unused */ | ||
31 | #define SR_KP 0x20000000 | ||
32 | #define PTE_V 0x80000000 | ||
33 | #define PTE_SEC 0x00000040 | ||
34 | #define PTE_M 0x00000010 | ||
35 | #define PTE_R 0x00000100 | ||
36 | #define PTE_C 0x00000080 | ||
37 | |||
38 | #define SID_SHIFT 28 | ||
39 | #define ESID_MASK 0xf0000000 | ||
40 | #define VSID_MASK 0x00fffffff0000000ULL | ||
41 | |||
42 | #endif /* __ASM_KVM_BOOK3S_32_H__ */ | ||
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h new file mode 100644 index 000000000000..4cadd612d575 --- /dev/null +++ b/arch/powerpc/include/asm/kvm_book3s_64.h | |||
@@ -0,0 +1,28 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License, version 2, as | ||
4 | * published by the Free Software Foundation. | ||
5 | * | ||
6 | * This program is distributed in the hope that it will be useful, | ||
7 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
8 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
9 | * GNU General Public License for more details. | ||
10 | * | ||
11 | * You should have received a copy of the GNU General Public License | ||
12 | * along with this program; if not, write to the Free Software | ||
13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
14 | * | ||
15 | * Copyright SUSE Linux Products GmbH 2010 | ||
16 | * | ||
17 | * Authors: Alexander Graf <agraf@suse.de> | ||
18 | */ | ||
19 | |||
20 | #ifndef __ASM_KVM_BOOK3S_64_H__ | ||
21 | #define __ASM_KVM_BOOK3S_64_H__ | ||
22 | |||
23 | static inline struct kvmppc_book3s_shadow_vcpu *to_svcpu(struct kvm_vcpu *vcpu) | ||
24 | { | ||
25 | return &get_paca()->shadow_vcpu; | ||
26 | } | ||
27 | |||
28 | #endif /* __ASM_KVM_BOOK3S_64_H__ */ | ||
diff --git a/arch/powerpc/include/asm/kvm_book3s_64_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index 183461b48407..36fdb3aff30b 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h | |||
@@ -22,7 +22,7 @@ | |||
22 | 22 | ||
23 | #ifdef __ASSEMBLY__ | 23 | #ifdef __ASSEMBLY__ |
24 | 24 | ||
25 | #ifdef CONFIG_KVM_BOOK3S_64_HANDLER | 25 | #ifdef CONFIG_KVM_BOOK3S_HANDLER |
26 | 26 | ||
27 | #include <asm/kvm_asm.h> | 27 | #include <asm/kvm_asm.h> |
28 | 28 | ||
@@ -55,7 +55,7 @@ kvmppc_resume_\intno: | |||
55 | .macro DO_KVM intno | 55 | .macro DO_KVM intno |
56 | .endm | 56 | .endm |
57 | 57 | ||
58 | #endif /* CONFIG_KVM_BOOK3S_64_HANDLER */ | 58 | #endif /* CONFIG_KVM_BOOK3S_HANDLER */ |
59 | 59 | ||
60 | #else /*__ASSEMBLY__ */ | 60 | #else /*__ASSEMBLY__ */ |
61 | 61 | ||
@@ -63,12 +63,33 @@ struct kvmppc_book3s_shadow_vcpu { | |||
63 | ulong gpr[14]; | 63 | ulong gpr[14]; |
64 | u32 cr; | 64 | u32 cr; |
65 | u32 xer; | 65 | u32 xer; |
66 | |||
67 | u32 fault_dsisr; | ||
68 | u32 last_inst; | ||
69 | ulong ctr; | ||
70 | ulong lr; | ||
71 | ulong pc; | ||
72 | ulong shadow_srr1; | ||
73 | ulong fault_dar; | ||
74 | |||
66 | ulong host_r1; | 75 | ulong host_r1; |
67 | ulong host_r2; | 76 | ulong host_r2; |
68 | ulong handler; | 77 | ulong handler; |
69 | ulong scratch0; | 78 | ulong scratch0; |
70 | ulong scratch1; | 79 | ulong scratch1; |
71 | ulong vmhandler; | 80 | ulong vmhandler; |
81 | u8 in_guest; | ||
82 | |||
83 | #ifdef CONFIG_PPC_BOOK3S_32 | ||
84 | u32 sr[16]; /* Guest SRs */ | ||
85 | #endif | ||
86 | #ifdef CONFIG_PPC_BOOK3S_64 | ||
87 | u8 slb_max; /* highest used guest slb entry */ | ||
88 | struct { | ||
89 | u64 esid; | ||
90 | u64 vsid; | ||
91 | } slb[64]; /* guest SLB */ | ||
92 | #endif | ||
72 | }; | 93 | }; |
73 | 94 | ||
74 | #endif /*__ASSEMBLY__ */ | 95 | #endif /*__ASSEMBLY__ */ |
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h new file mode 100644 index 000000000000..9c9ba3d59b1b --- /dev/null +++ b/arch/powerpc/include/asm/kvm_booke.h | |||
@@ -0,0 +1,96 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License, version 2, as | ||
4 | * published by the Free Software Foundation. | ||
5 | * | ||
6 | * This program is distributed in the hope that it will be useful, | ||
7 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
8 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
9 | * GNU General Public License for more details. | ||
10 | * | ||
11 | * You should have received a copy of the GNU General Public License | ||
12 | * along with this program; if not, write to the Free Software | ||
13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
14 | * | ||
15 | * Copyright SUSE Linux Products GmbH 2010 | ||
16 | * | ||
17 | * Authors: Alexander Graf <agraf@suse.de> | ||
18 | */ | ||
19 | |||
20 | #ifndef __ASM_KVM_BOOKE_H__ | ||
21 | #define __ASM_KVM_BOOKE_H__ | ||
22 | |||
23 | #include <linux/types.h> | ||
24 | #include <linux/kvm_host.h> | ||
25 | |||
26 | static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) | ||
27 | { | ||
28 | vcpu->arch.gpr[num] = val; | ||
29 | } | ||
30 | |||
31 | static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) | ||
32 | { | ||
33 | return vcpu->arch.gpr[num]; | ||
34 | } | ||
35 | |||
36 | static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) | ||
37 | { | ||
38 | vcpu->arch.cr = val; | ||
39 | } | ||
40 | |||
41 | static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) | ||
42 | { | ||
43 | return vcpu->arch.cr; | ||
44 | } | ||
45 | |||
46 | static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val) | ||
47 | { | ||
48 | vcpu->arch.xer = val; | ||
49 | } | ||
50 | |||
51 | static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) | ||
52 | { | ||
53 | return vcpu->arch.xer; | ||
54 | } | ||
55 | |||
56 | static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu) | ||
57 | { | ||
58 | return vcpu->arch.last_inst; | ||
59 | } | ||
60 | |||
61 | static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val) | ||
62 | { | ||
63 | vcpu->arch.ctr = val; | ||
64 | } | ||
65 | |||
66 | static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu) | ||
67 | { | ||
68 | return vcpu->arch.ctr; | ||
69 | } | ||
70 | |||
71 | static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val) | ||
72 | { | ||
73 | vcpu->arch.lr = val; | ||
74 | } | ||
75 | |||
76 | static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu) | ||
77 | { | ||
78 | return vcpu->arch.lr; | ||
79 | } | ||
80 | |||
81 | static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val) | ||
82 | { | ||
83 | vcpu->arch.pc = val; | ||
84 | } | ||
85 | |||
86 | static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu) | ||
87 | { | ||
88 | return vcpu->arch.pc; | ||
89 | } | ||
90 | |||
91 | static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) | ||
92 | { | ||
93 | return vcpu->arch.fault_dear; | ||
94 | } | ||
95 | |||
96 | #endif /* __ASM_KVM_BOOKE_H__ */ | ||
diff --git a/arch/powerpc/include/asm/kvm_fpu.h b/arch/powerpc/include/asm/kvm_fpu.h new file mode 100644 index 000000000000..94f05de9ad04 --- /dev/null +++ b/arch/powerpc/include/asm/kvm_fpu.h | |||
@@ -0,0 +1,85 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License, version 2, as | ||
4 | * published by the Free Software Foundation. | ||
5 | * | ||
6 | * This program is distributed in the hope that it will be useful, | ||
7 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
8 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
9 | * GNU General Public License for more details. | ||
10 | * | ||
11 | * You should have received a copy of the GNU General Public License | ||
12 | * along with this program; if not, write to the Free Software | ||
13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
14 | * | ||
15 | * Copyright Novell Inc. 2010 | ||
16 | * | ||
17 | * Authors: Alexander Graf <agraf@suse.de> | ||
18 | */ | ||
19 | |||
20 | #ifndef __ASM_KVM_FPU_H__ | ||
21 | #define __ASM_KVM_FPU_H__ | ||
22 | |||
23 | #include <linux/types.h> | ||
24 | |||
25 | extern void fps_fres(struct thread_struct *t, u32 *dst, u32 *src1); | ||
26 | extern void fps_frsqrte(struct thread_struct *t, u32 *dst, u32 *src1); | ||
27 | extern void fps_fsqrts(struct thread_struct *t, u32 *dst, u32 *src1); | ||
28 | |||
29 | extern void fps_fadds(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); | ||
30 | extern void fps_fdivs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); | ||
31 | extern void fps_fmuls(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); | ||
32 | extern void fps_fsubs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); | ||
33 | |||
34 | extern void fps_fmadds(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, | ||
35 | u32 *src3); | ||
36 | extern void fps_fmsubs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, | ||
37 | u32 *src3); | ||
38 | extern void fps_fnmadds(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, | ||
39 | u32 *src3); | ||
40 | extern void fps_fnmsubs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, | ||
41 | u32 *src3); | ||
42 | extern void fps_fsel(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, | ||
43 | u32 *src3); | ||
44 | |||
45 | #define FPD_ONE_IN(name) extern void fpd_ ## name(u64 *fpscr, u32 *cr, \ | ||
46 | u64 *dst, u64 *src1); | ||
47 | #define FPD_TWO_IN(name) extern void fpd_ ## name(u64 *fpscr, u32 *cr, \ | ||
48 | u64 *dst, u64 *src1, u64 *src2); | ||
49 | #define FPD_THREE_IN(name) extern void fpd_ ## name(u64 *fpscr, u32 *cr, \ | ||
50 | u64 *dst, u64 *src1, u64 *src2, u64 *src3); | ||
51 | |||
52 | extern void fpd_fcmpu(u64 *fpscr, u32 *cr, u64 *src1, u64 *src2); | ||
53 | extern void fpd_fcmpo(u64 *fpscr, u32 *cr, u64 *src1, u64 *src2); | ||
54 | |||
55 | FPD_ONE_IN(fsqrts) | ||
56 | FPD_ONE_IN(frsqrtes) | ||
57 | FPD_ONE_IN(fres) | ||
58 | FPD_ONE_IN(frsp) | ||
59 | FPD_ONE_IN(fctiw) | ||
60 | FPD_ONE_IN(fctiwz) | ||
61 | FPD_ONE_IN(fsqrt) | ||
62 | FPD_ONE_IN(fre) | ||
63 | FPD_ONE_IN(frsqrte) | ||
64 | FPD_ONE_IN(fneg) | ||
65 | FPD_ONE_IN(fabs) | ||
66 | FPD_TWO_IN(fadds) | ||
67 | FPD_TWO_IN(fsubs) | ||
68 | FPD_TWO_IN(fdivs) | ||
69 | FPD_TWO_IN(fmuls) | ||
70 | FPD_TWO_IN(fcpsgn) | ||
71 | FPD_TWO_IN(fdiv) | ||
72 | FPD_TWO_IN(fadd) | ||
73 | FPD_TWO_IN(fmul) | ||
74 | FPD_TWO_IN(fsub) | ||
75 | FPD_THREE_IN(fmsubs) | ||
76 | FPD_THREE_IN(fmadds) | ||
77 | FPD_THREE_IN(fnmsubs) | ||
78 | FPD_THREE_IN(fnmadds) | ||
79 | FPD_THREE_IN(fsel) | ||
80 | FPD_THREE_IN(fmsub) | ||
81 | FPD_THREE_IN(fmadd) | ||
82 | FPD_THREE_IN(fnmsub) | ||
83 | FPD_THREE_IN(fnmadd) | ||
84 | |||
85 | #endif | ||
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 5e5bae7e152f..0c9ad869decd 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
@@ -66,7 +66,7 @@ struct kvm_vcpu_stat { | |||
66 | u32 dec_exits; | 66 | u32 dec_exits; |
67 | u32 ext_intr_exits; | 67 | u32 ext_intr_exits; |
68 | u32 halt_wakeup; | 68 | u32 halt_wakeup; |
69 | #ifdef CONFIG_PPC64 | 69 | #ifdef CONFIG_PPC_BOOK3S |
70 | u32 pf_storage; | 70 | u32 pf_storage; |
71 | u32 pf_instruc; | 71 | u32 pf_instruc; |
72 | u32 sp_storage; | 72 | u32 sp_storage; |
@@ -124,12 +124,12 @@ struct kvm_arch { | |||
124 | }; | 124 | }; |
125 | 125 | ||
126 | struct kvmppc_pte { | 126 | struct kvmppc_pte { |
127 | u64 eaddr; | 127 | ulong eaddr; |
128 | u64 vpage; | 128 | u64 vpage; |
129 | u64 raddr; | 129 | ulong raddr; |
130 | bool may_read; | 130 | bool may_read : 1; |
131 | bool may_write; | 131 | bool may_write : 1; |
132 | bool may_execute; | 132 | bool may_execute : 1; |
133 | }; | 133 | }; |
134 | 134 | ||
135 | struct kvmppc_mmu { | 135 | struct kvmppc_mmu { |
@@ -145,7 +145,7 @@ struct kvmppc_mmu { | |||
145 | int (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *pte, bool data); | 145 | int (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *pte, bool data); |
146 | void (*reset_msr)(struct kvm_vcpu *vcpu); | 146 | void (*reset_msr)(struct kvm_vcpu *vcpu); |
147 | void (*tlbie)(struct kvm_vcpu *vcpu, ulong addr, bool large); | 147 | void (*tlbie)(struct kvm_vcpu *vcpu, ulong addr, bool large); |
148 | int (*esid_to_vsid)(struct kvm_vcpu *vcpu, u64 esid, u64 *vsid); | 148 | int (*esid_to_vsid)(struct kvm_vcpu *vcpu, ulong esid, u64 *vsid); |
149 | u64 (*ea_to_vp)(struct kvm_vcpu *vcpu, gva_t eaddr, bool data); | 149 | u64 (*ea_to_vp)(struct kvm_vcpu *vcpu, gva_t eaddr, bool data); |
150 | bool (*is_dcbz32)(struct kvm_vcpu *vcpu); | 150 | bool (*is_dcbz32)(struct kvm_vcpu *vcpu); |
151 | }; | 151 | }; |
@@ -160,7 +160,7 @@ struct hpte_cache { | |||
160 | struct kvm_vcpu_arch { | 160 | struct kvm_vcpu_arch { |
161 | ulong host_stack; | 161 | ulong host_stack; |
162 | u32 host_pid; | 162 | u32 host_pid; |
163 | #ifdef CONFIG_PPC64 | 163 | #ifdef CONFIG_PPC_BOOK3S |
164 | ulong host_msr; | 164 | ulong host_msr; |
165 | ulong host_r2; | 165 | ulong host_r2; |
166 | void *host_retip; | 166 | void *host_retip; |
@@ -175,7 +175,7 @@ struct kvm_vcpu_arch { | |||
175 | ulong gpr[32]; | 175 | ulong gpr[32]; |
176 | 176 | ||
177 | u64 fpr[32]; | 177 | u64 fpr[32]; |
178 | u32 fpscr; | 178 | u64 fpscr; |
179 | 179 | ||
180 | #ifdef CONFIG_ALTIVEC | 180 | #ifdef CONFIG_ALTIVEC |
181 | vector128 vr[32]; | 181 | vector128 vr[32]; |
@@ -186,19 +186,23 @@ struct kvm_vcpu_arch { | |||
186 | u64 vsr[32]; | 186 | u64 vsr[32]; |
187 | #endif | 187 | #endif |
188 | 188 | ||
189 | #ifdef CONFIG_PPC_BOOK3S | ||
190 | /* For Gekko paired singles */ | ||
191 | u32 qpr[32]; | ||
192 | #endif | ||
193 | |||
194 | #ifdef CONFIG_BOOKE | ||
189 | ulong pc; | 195 | ulong pc; |
190 | ulong ctr; | 196 | ulong ctr; |
191 | ulong lr; | 197 | ulong lr; |
192 | 198 | ||
193 | #ifdef CONFIG_BOOKE | ||
194 | ulong xer; | 199 | ulong xer; |
195 | u32 cr; | 200 | u32 cr; |
196 | #endif | 201 | #endif |
197 | 202 | ||
198 | ulong msr; | 203 | ulong msr; |
199 | #ifdef CONFIG_PPC64 | 204 | #ifdef CONFIG_PPC_BOOK3S |
200 | ulong shadow_msr; | 205 | ulong shadow_msr; |
201 | ulong shadow_srr1; | ||
202 | ulong hflags; | 206 | ulong hflags; |
203 | ulong guest_owned_ext; | 207 | ulong guest_owned_ext; |
204 | #endif | 208 | #endif |
@@ -253,20 +257,22 @@ struct kvm_vcpu_arch { | |||
253 | struct dentry *debugfs_exit_timing; | 257 | struct dentry *debugfs_exit_timing; |
254 | #endif | 258 | #endif |
255 | 259 | ||
260 | #ifdef CONFIG_BOOKE | ||
256 | u32 last_inst; | 261 | u32 last_inst; |
257 | #ifdef CONFIG_PPC64 | ||
258 | ulong fault_dsisr; | ||
259 | #endif | ||
260 | ulong fault_dear; | 262 | ulong fault_dear; |
261 | ulong fault_esr; | 263 | ulong fault_esr; |
262 | ulong queued_dear; | 264 | ulong queued_dear; |
263 | ulong queued_esr; | 265 | ulong queued_esr; |
266 | #endif | ||
264 | gpa_t paddr_accessed; | 267 | gpa_t paddr_accessed; |
265 | 268 | ||
266 | u8 io_gpr; /* GPR used as IO source/target */ | 269 | u8 io_gpr; /* GPR used as IO source/target */ |
267 | u8 mmio_is_bigendian; | 270 | u8 mmio_is_bigendian; |
271 | u8 mmio_sign_extend; | ||
268 | u8 dcr_needed; | 272 | u8 dcr_needed; |
269 | u8 dcr_is_write; | 273 | u8 dcr_is_write; |
274 | u8 osi_needed; | ||
275 | u8 osi_enabled; | ||
270 | 276 | ||
271 | u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ | 277 | u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ |
272 | 278 | ||
@@ -275,7 +281,7 @@ struct kvm_vcpu_arch { | |||
275 | u64 dec_jiffies; | 281 | u64 dec_jiffies; |
276 | unsigned long pending_exceptions; | 282 | unsigned long pending_exceptions; |
277 | 283 | ||
278 | #ifdef CONFIG_PPC64 | 284 | #ifdef CONFIG_PPC_BOOK3S |
279 | struct hpte_cache hpte_cache[HPTEG_CACHE_NUM]; | 285 | struct hpte_cache hpte_cache[HPTEG_CACHE_NUM]; |
280 | int hpte_cache_offset; | 286 | int hpte_cache_offset; |
281 | #endif | 287 | #endif |
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index e2642829e435..18d139ec2d22 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h | |||
@@ -30,6 +30,8 @@ | |||
30 | #include <linux/kvm_host.h> | 30 | #include <linux/kvm_host.h> |
31 | #ifdef CONFIG_PPC_BOOK3S | 31 | #ifdef CONFIG_PPC_BOOK3S |
32 | #include <asm/kvm_book3s.h> | 32 | #include <asm/kvm_book3s.h> |
33 | #else | ||
34 | #include <asm/kvm_booke.h> | ||
33 | #endif | 35 | #endif |
34 | 36 | ||
35 | enum emulation_result { | 37 | enum emulation_result { |
@@ -37,6 +39,7 @@ enum emulation_result { | |||
37 | EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */ | 39 | EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */ |
38 | EMULATE_DO_DCR, /* kvm_run filled with DCR request */ | 40 | EMULATE_DO_DCR, /* kvm_run filled with DCR request */ |
39 | EMULATE_FAIL, /* can't emulate this instruction */ | 41 | EMULATE_FAIL, /* can't emulate this instruction */ |
42 | EMULATE_AGAIN, /* something went wrong. go again */ | ||
40 | }; | 43 | }; |
41 | 44 | ||
42 | extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); | 45 | extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); |
@@ -48,8 +51,11 @@ extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu); | |||
48 | extern int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | 51 | extern int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, |
49 | unsigned int rt, unsigned int bytes, | 52 | unsigned int rt, unsigned int bytes, |
50 | int is_bigendian); | 53 | int is_bigendian); |
54 | extern int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||
55 | unsigned int rt, unsigned int bytes, | ||
56 | int is_bigendian); | ||
51 | extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, | 57 | extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, |
52 | u32 val, unsigned int bytes, int is_bigendian); | 58 | u64 val, unsigned int bytes, int is_bigendian); |
53 | 59 | ||
54 | extern int kvmppc_emulate_instruction(struct kvm_run *run, | 60 | extern int kvmppc_emulate_instruction(struct kvm_run *run, |
55 | struct kvm_vcpu *vcpu); | 61 | struct kvm_vcpu *vcpu); |
@@ -63,6 +69,7 @@ extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, | |||
63 | extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); | 69 | extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); |
64 | extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid); | 70 | extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid); |
65 | extern void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu); | 71 | extern void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu); |
72 | extern int kvmppc_mmu_init(struct kvm_vcpu *vcpu); | ||
66 | extern int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); | 73 | extern int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); |
67 | extern int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); | 74 | extern int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); |
68 | extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index, | 75 | extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index, |
@@ -88,6 +95,8 @@ extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); | |||
88 | extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu); | 95 | extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu); |
89 | extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, | 96 | extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, |
90 | struct kvm_interrupt *irq); | 97 | struct kvm_interrupt *irq); |
98 | extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, | ||
99 | struct kvm_interrupt *irq); | ||
91 | 100 | ||
92 | extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | 101 | extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, |
93 | unsigned int op, int *advance); | 102 | unsigned int op, int *advance); |
@@ -99,81 +108,37 @@ extern void kvmppc_booke_exit(void); | |||
99 | 108 | ||
100 | extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); | 109 | extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); |
101 | 110 | ||
102 | #ifdef CONFIG_PPC_BOOK3S | 111 | /* |
103 | 112 | * Cuts out inst bits with ordering according to spec. | |
104 | /* We assume we're always acting on the current vcpu */ | 113 | * That means the leftmost bit is zero. All given bits are included. |
105 | 114 | */ | |
106 | static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) | 115 | static inline u32 kvmppc_get_field(u64 inst, int msb, int lsb) |
107 | { | ||
108 | if ( num < 14 ) { | ||
109 | get_paca()->shadow_vcpu.gpr[num] = val; | ||
110 | to_book3s(vcpu)->shadow_vcpu.gpr[num] = val; | ||
111 | } else | ||
112 | vcpu->arch.gpr[num] = val; | ||
113 | } | ||
114 | |||
115 | static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) | ||
116 | { | ||
117 | if ( num < 14 ) | ||
118 | return get_paca()->shadow_vcpu.gpr[num]; | ||
119 | else | ||
120 | return vcpu->arch.gpr[num]; | ||
121 | } | ||
122 | |||
123 | static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) | ||
124 | { | ||
125 | get_paca()->shadow_vcpu.cr = val; | ||
126 | to_book3s(vcpu)->shadow_vcpu.cr = val; | ||
127 | } | ||
128 | |||
129 | static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) | ||
130 | { | ||
131 | return get_paca()->shadow_vcpu.cr; | ||
132 | } | ||
133 | |||
134 | static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val) | ||
135 | { | ||
136 | get_paca()->shadow_vcpu.xer = val; | ||
137 | to_book3s(vcpu)->shadow_vcpu.xer = val; | ||
138 | } | ||
139 | |||
140 | static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) | ||
141 | { | 116 | { |
142 | return get_paca()->shadow_vcpu.xer; | 117 | u32 r; |
143 | } | 118 | u32 mask; |
144 | 119 | ||
145 | #else | 120 | BUG_ON(msb > lsb); |
146 | 121 | ||
147 | static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) | 122 | mask = (1 << (lsb - msb + 1)) - 1; |
148 | { | 123 | r = (inst >> (63 - lsb)) & mask; |
149 | vcpu->arch.gpr[num] = val; | ||
150 | } | ||
151 | 124 | ||
152 | static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) | 125 | return r; |
153 | { | ||
154 | return vcpu->arch.gpr[num]; | ||
155 | } | 126 | } |
156 | 127 | ||
157 | static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) | 128 | /* |
129 | * Replaces inst bits with ordering according to spec. | ||
130 | */ | ||
131 | static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value) | ||
158 | { | 132 | { |
159 | vcpu->arch.cr = val; | 133 | u32 r; |
160 | } | 134 | u32 mask; |
161 | 135 | ||
162 | static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) | 136 | BUG_ON(msb > lsb); |
163 | { | ||
164 | return vcpu->arch.cr; | ||
165 | } | ||
166 | 137 | ||
167 | static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val) | 138 | mask = ((1 << (lsb - msb + 1)) - 1) << (63 - lsb); |
168 | { | 139 | r = (inst & ~mask) | ((value << (63 - lsb)) & mask); |
169 | vcpu->arch.xer = val; | ||
170 | } | ||
171 | 140 | ||
172 | static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) | 141 | return r; |
173 | { | ||
174 | return vcpu->arch.xer; | ||
175 | } | 142 | } |
176 | 143 | ||
177 | #endif | ||
178 | |||
179 | #endif /* __POWERPC_KVM_PPC_H__ */ | 144 | #endif /* __POWERPC_KVM_PPC_H__ */ |
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 26383e0778aa..81fb41289d6c 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h | |||
@@ -27,6 +27,8 @@ extern int __init_new_context(void); | |||
27 | extern void __destroy_context(int context_id); | 27 | extern void __destroy_context(int context_id); |
28 | static inline void mmu_context_init(void) { } | 28 | static inline void mmu_context_init(void) { } |
29 | #else | 29 | #else |
30 | extern unsigned long __init_new_context(void); | ||
31 | extern void __destroy_context(unsigned long context_id); | ||
30 | extern void mmu_context_init(void); | 32 | extern void mmu_context_init(void); |
31 | #endif | 33 | #endif |
32 | 34 | ||
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 971dfa4815f0..8ce7963ad41d 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h | |||
@@ -23,7 +23,7 @@ | |||
23 | #include <asm/page.h> | 23 | #include <asm/page.h> |
24 | #include <asm/exception-64e.h> | 24 | #include <asm/exception-64e.h> |
25 | #ifdef CONFIG_KVM_BOOK3S_64_HANDLER | 25 | #ifdef CONFIG_KVM_BOOK3S_64_HANDLER |
26 | #include <asm/kvm_book3s_64_asm.h> | 26 | #include <asm/kvm_book3s_asm.h> |
27 | #endif | 27 | #endif |
28 | 28 | ||
29 | register struct paca_struct *local_paca asm("r13"); | 29 | register struct paca_struct *local_paca asm("r13"); |
@@ -137,15 +137,9 @@ struct paca_struct { | |||
137 | u64 startpurr; /* PURR/TB value snapshot */ | 137 | u64 startpurr; /* PURR/TB value snapshot */ |
138 | u64 startspurr; /* SPURR value snapshot */ | 138 | u64 startspurr; /* SPURR value snapshot */ |
139 | 139 | ||
140 | #ifdef CONFIG_KVM_BOOK3S_64_HANDLER | 140 | #ifdef CONFIG_KVM_BOOK3S_HANDLER |
141 | struct { | ||
142 | u64 esid; | ||
143 | u64 vsid; | ||
144 | } kvm_slb[64]; /* guest SLB */ | ||
145 | /* We use this to store guest state in */ | 141 | /* We use this to store guest state in */ |
146 | struct kvmppc_book3s_shadow_vcpu shadow_vcpu; | 142 | struct kvmppc_book3s_shadow_vcpu shadow_vcpu; |
147 | u8 kvm_slb_max; /* highest used guest slb entry */ | ||
148 | u8 kvm_in_guest; /* are we inside the guest? */ | ||
149 | #endif | 143 | #endif |
150 | }; | 144 | }; |
151 | 145 | ||
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 221ba6240464..7492fe8ad6e4 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h | |||
@@ -229,6 +229,9 @@ struct thread_struct { | |||
229 | unsigned long spefscr; /* SPE & eFP status */ | 229 | unsigned long spefscr; /* SPE & eFP status */ |
230 | int used_spe; /* set if process has used spe */ | 230 | int used_spe; /* set if process has used spe */ |
231 | #endif /* CONFIG_SPE */ | 231 | #endif /* CONFIG_SPE */ |
232 | #ifdef CONFIG_KVM_BOOK3S_32_HANDLER | ||
233 | void* kvm_shadow_vcpu; /* KVM internal data */ | ||
234 | #endif /* CONFIG_KVM_BOOK3S_32_HANDLER */ | ||
232 | }; | 235 | }; |
233 | 236 | ||
234 | #define ARCH_MIN_TASKALIGN 16 | 237 | #define ARCH_MIN_TASKALIGN 16 |
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index b68f025924a8..d62fdf4e504b 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h | |||
@@ -293,10 +293,12 @@ | |||
293 | #define HID1_ABE (1<<10) /* 7450 Address Broadcast Enable */ | 293 | #define HID1_ABE (1<<10) /* 7450 Address Broadcast Enable */ |
294 | #define HID1_PS (1<<16) /* 750FX PLL selection */ | 294 | #define HID1_PS (1<<16) /* 750FX PLL selection */ |
295 | #define SPRN_HID2 0x3F8 /* Hardware Implementation Register 2 */ | 295 | #define SPRN_HID2 0x3F8 /* Hardware Implementation Register 2 */ |
296 | #define SPRN_HID2_GEKKO 0x398 /* Gekko HID2 Register */ | ||
296 | #define SPRN_IABR 0x3F2 /* Instruction Address Breakpoint Register */ | 297 | #define SPRN_IABR 0x3F2 /* Instruction Address Breakpoint Register */ |
297 | #define SPRN_IABR2 0x3FA /* 83xx */ | 298 | #define SPRN_IABR2 0x3FA /* 83xx */ |
298 | #define SPRN_IBCR 0x135 /* 83xx Insn Breakpoint Control Reg */ | 299 | #define SPRN_IBCR 0x135 /* 83xx Insn Breakpoint Control Reg */ |
299 | #define SPRN_HID4 0x3F4 /* 970 HID4 */ | 300 | #define SPRN_HID4 0x3F4 /* 970 HID4 */ |
301 | #define SPRN_HID4_GEKKO 0x3F3 /* Gekko HID4 */ | ||
300 | #define SPRN_HID5 0x3F6 /* 970 HID5 */ | 302 | #define SPRN_HID5 0x3F6 /* 970 HID5 */ |
301 | #define SPRN_HID6 0x3F9 /* BE HID 6 */ | 303 | #define SPRN_HID6 0x3F9 /* BE HID 6 */ |
302 | #define HID6_LB (0x0F<<12) /* Concurrent Large Page Modes */ | 304 | #define HID6_LB (0x0F<<12) /* Concurrent Large Page Modes */ |
@@ -465,6 +467,14 @@ | |||
465 | #define SPRN_VRSAVE 0x100 /* Vector Register Save Register */ | 467 | #define SPRN_VRSAVE 0x100 /* Vector Register Save Register */ |
466 | #define SPRN_XER 0x001 /* Fixed Point Exception Register */ | 468 | #define SPRN_XER 0x001 /* Fixed Point Exception Register */ |
467 | 469 | ||
470 | #define SPRN_MMCR0_GEKKO 0x3B8 /* Gekko Monitor Mode Control Register 0 */ | ||
471 | #define SPRN_MMCR1_GEKKO 0x3BC /* Gekko Monitor Mode Control Register 1 */ | ||
472 | #define SPRN_PMC1_GEKKO 0x3B9 /* Gekko Performance Monitor Control 1 */ | ||
473 | #define SPRN_PMC2_GEKKO 0x3BA /* Gekko Performance Monitor Control 2 */ | ||
474 | #define SPRN_PMC3_GEKKO 0x3BD /* Gekko Performance Monitor Control 3 */ | ||
475 | #define SPRN_PMC4_GEKKO 0x3BE /* Gekko Performance Monitor Control 4 */ | ||
476 | #define SPRN_WPAR_GEKKO 0x399 /* Gekko Write Pipe Address Register */ | ||
477 | |||
468 | #define SPRN_SCOMC 0x114 /* SCOM Access Control */ | 478 | #define SPRN_SCOMC 0x114 /* SCOM Access Control */ |
469 | #define SPRN_SCOMD 0x115 /* SCOM Access DATA */ | 479 | #define SPRN_SCOMD 0x115 /* SCOM Access DATA */ |
470 | 480 | ||
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 28a686fb269c..496cc5b3984f 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c | |||
@@ -50,6 +50,9 @@ | |||
50 | #endif | 50 | #endif |
51 | #ifdef CONFIG_KVM | 51 | #ifdef CONFIG_KVM |
52 | #include <linux/kvm_host.h> | 52 | #include <linux/kvm_host.h> |
53 | #ifndef CONFIG_BOOKE | ||
54 | #include <asm/kvm_book3s.h> | ||
55 | #endif | ||
53 | #endif | 56 | #endif |
54 | 57 | ||
55 | #ifdef CONFIG_PPC32 | 58 | #ifdef CONFIG_PPC32 |
@@ -105,6 +108,9 @@ int main(void) | |||
105 | DEFINE(THREAD_USED_SPE, offsetof(struct thread_struct, used_spe)); | 108 | DEFINE(THREAD_USED_SPE, offsetof(struct thread_struct, used_spe)); |
106 | #endif /* CONFIG_SPE */ | 109 | #endif /* CONFIG_SPE */ |
107 | #endif /* CONFIG_PPC64 */ | 110 | #endif /* CONFIG_PPC64 */ |
111 | #ifdef CONFIG_KVM_BOOK3S_32_HANDLER | ||
112 | DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu)); | ||
113 | #endif | ||
108 | 114 | ||
109 | DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); | 115 | DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); |
110 | DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); | 116 | DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); |
@@ -191,33 +197,9 @@ int main(void) | |||
191 | DEFINE(PACA_DATA_OFFSET, offsetof(struct paca_struct, data_offset)); | 197 | DEFINE(PACA_DATA_OFFSET, offsetof(struct paca_struct, data_offset)); |
192 | DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); | 198 | DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); |
193 | #ifdef CONFIG_KVM_BOOK3S_64_HANDLER | 199 | #ifdef CONFIG_KVM_BOOK3S_64_HANDLER |
194 | DEFINE(PACA_KVM_IN_GUEST, offsetof(struct paca_struct, kvm_in_guest)); | 200 | DEFINE(PACA_KVM_SVCPU, offsetof(struct paca_struct, shadow_vcpu)); |
195 | DEFINE(PACA_KVM_SLB, offsetof(struct paca_struct, kvm_slb)); | 201 | DEFINE(SVCPU_SLB, offsetof(struct kvmppc_book3s_shadow_vcpu, slb)); |
196 | DEFINE(PACA_KVM_SLB_MAX, offsetof(struct paca_struct, kvm_slb_max)); | 202 | DEFINE(SVCPU_SLB_MAX, offsetof(struct kvmppc_book3s_shadow_vcpu, slb_max)); |
197 | DEFINE(PACA_KVM_CR, offsetof(struct paca_struct, shadow_vcpu.cr)); | ||
198 | DEFINE(PACA_KVM_XER, offsetof(struct paca_struct, shadow_vcpu.xer)); | ||
199 | DEFINE(PACA_KVM_R0, offsetof(struct paca_struct, shadow_vcpu.gpr[0])); | ||
200 | DEFINE(PACA_KVM_R1, offsetof(struct paca_struct, shadow_vcpu.gpr[1])); | ||
201 | DEFINE(PACA_KVM_R2, offsetof(struct paca_struct, shadow_vcpu.gpr[2])); | ||
202 | DEFINE(PACA_KVM_R3, offsetof(struct paca_struct, shadow_vcpu.gpr[3])); | ||
203 | DEFINE(PACA_KVM_R4, offsetof(struct paca_struct, shadow_vcpu.gpr[4])); | ||
204 | DEFINE(PACA_KVM_R5, offsetof(struct paca_struct, shadow_vcpu.gpr[5])); | ||
205 | DEFINE(PACA_KVM_R6, offsetof(struct paca_struct, shadow_vcpu.gpr[6])); | ||
206 | DEFINE(PACA_KVM_R7, offsetof(struct paca_struct, shadow_vcpu.gpr[7])); | ||
207 | DEFINE(PACA_KVM_R8, offsetof(struct paca_struct, shadow_vcpu.gpr[8])); | ||
208 | DEFINE(PACA_KVM_R9, offsetof(struct paca_struct, shadow_vcpu.gpr[9])); | ||
209 | DEFINE(PACA_KVM_R10, offsetof(struct paca_struct, shadow_vcpu.gpr[10])); | ||
210 | DEFINE(PACA_KVM_R11, offsetof(struct paca_struct, shadow_vcpu.gpr[11])); | ||
211 | DEFINE(PACA_KVM_R12, offsetof(struct paca_struct, shadow_vcpu.gpr[12])); | ||
212 | DEFINE(PACA_KVM_R13, offsetof(struct paca_struct, shadow_vcpu.gpr[13])); | ||
213 | DEFINE(PACA_KVM_HOST_R1, offsetof(struct paca_struct, shadow_vcpu.host_r1)); | ||
214 | DEFINE(PACA_KVM_HOST_R2, offsetof(struct paca_struct, shadow_vcpu.host_r2)); | ||
215 | DEFINE(PACA_KVM_VMHANDLER, offsetof(struct paca_struct, | ||
216 | shadow_vcpu.vmhandler)); | ||
217 | DEFINE(PACA_KVM_SCRATCH0, offsetof(struct paca_struct, | ||
218 | shadow_vcpu.scratch0)); | ||
219 | DEFINE(PACA_KVM_SCRATCH1, offsetof(struct paca_struct, | ||
220 | shadow_vcpu.scratch1)); | ||
221 | #endif | 203 | #endif |
222 | #endif /* CONFIG_PPC64 */ | 204 | #endif /* CONFIG_PPC64 */ |
223 | 205 | ||
@@ -228,8 +210,8 @@ int main(void) | |||
228 | /* Interrupt register frame */ | 210 | /* Interrupt register frame */ |
229 | DEFINE(STACK_FRAME_OVERHEAD, STACK_FRAME_OVERHEAD); | 211 | DEFINE(STACK_FRAME_OVERHEAD, STACK_FRAME_OVERHEAD); |
230 | DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE); | 212 | DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE); |
231 | #ifdef CONFIG_PPC64 | ||
232 | DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)); | 213 | DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)); |
214 | #ifdef CONFIG_PPC64 | ||
233 | /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */ | 215 | /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */ |
234 | DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); | 216 | DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); |
235 | DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); | 217 | DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); |
@@ -412,9 +394,6 @@ int main(void) | |||
412 | DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); | 394 | DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); |
413 | DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); | 395 | DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); |
414 | DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); | 396 | DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); |
415 | DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); | ||
416 | DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); | ||
417 | DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); | ||
418 | DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr)); | 397 | DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr)); |
419 | DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4)); | 398 | DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4)); |
420 | DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); | 399 | DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); |
@@ -422,27 +401,68 @@ int main(void) | |||
422 | DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7)); | 401 | DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7)); |
423 | DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid)); | 402 | DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid)); |
424 | 403 | ||
425 | DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); | 404 | /* book3s */ |
426 | DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); | 405 | #ifdef CONFIG_PPC_BOOK3S |
427 | DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); | ||
428 | |||
429 | /* book3s_64 */ | ||
430 | #ifdef CONFIG_PPC64 | ||
431 | DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr)); | ||
432 | DEFINE(VCPU_HOST_RETIP, offsetof(struct kvm_vcpu, arch.host_retip)); | 406 | DEFINE(VCPU_HOST_RETIP, offsetof(struct kvm_vcpu, arch.host_retip)); |
433 | DEFINE(VCPU_HOST_R2, offsetof(struct kvm_vcpu, arch.host_r2)); | ||
434 | DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr)); | 407 | DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr)); |
435 | DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); | 408 | DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); |
436 | DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1)); | ||
437 | DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem)); | 409 | DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem)); |
438 | DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter)); | 410 | DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter)); |
439 | DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler)); | 411 | DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler)); |
440 | DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall)); | 412 | DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall)); |
441 | DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); | 413 | DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); |
414 | DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) - | ||
415 | offsetof(struct kvmppc_vcpu_book3s, vcpu)); | ||
416 | DEFINE(SVCPU_CR, offsetof(struct kvmppc_book3s_shadow_vcpu, cr)); | ||
417 | DEFINE(SVCPU_XER, offsetof(struct kvmppc_book3s_shadow_vcpu, xer)); | ||
418 | DEFINE(SVCPU_CTR, offsetof(struct kvmppc_book3s_shadow_vcpu, ctr)); | ||
419 | DEFINE(SVCPU_LR, offsetof(struct kvmppc_book3s_shadow_vcpu, lr)); | ||
420 | DEFINE(SVCPU_PC, offsetof(struct kvmppc_book3s_shadow_vcpu, pc)); | ||
421 | DEFINE(SVCPU_R0, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[0])); | ||
422 | DEFINE(SVCPU_R1, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[1])); | ||
423 | DEFINE(SVCPU_R2, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[2])); | ||
424 | DEFINE(SVCPU_R3, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[3])); | ||
425 | DEFINE(SVCPU_R4, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[4])); | ||
426 | DEFINE(SVCPU_R5, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[5])); | ||
427 | DEFINE(SVCPU_R6, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[6])); | ||
428 | DEFINE(SVCPU_R7, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[7])); | ||
429 | DEFINE(SVCPU_R8, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[8])); | ||
430 | DEFINE(SVCPU_R9, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[9])); | ||
431 | DEFINE(SVCPU_R10, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[10])); | ||
432 | DEFINE(SVCPU_R11, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[11])); | ||
433 | DEFINE(SVCPU_R12, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[12])); | ||
434 | DEFINE(SVCPU_R13, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[13])); | ||
435 | DEFINE(SVCPU_HOST_R1, offsetof(struct kvmppc_book3s_shadow_vcpu, host_r1)); | ||
436 | DEFINE(SVCPU_HOST_R2, offsetof(struct kvmppc_book3s_shadow_vcpu, host_r2)); | ||
437 | DEFINE(SVCPU_VMHANDLER, offsetof(struct kvmppc_book3s_shadow_vcpu, | ||
438 | vmhandler)); | ||
439 | DEFINE(SVCPU_SCRATCH0, offsetof(struct kvmppc_book3s_shadow_vcpu, | ||
440 | scratch0)); | ||
441 | DEFINE(SVCPU_SCRATCH1, offsetof(struct kvmppc_book3s_shadow_vcpu, | ||
442 | scratch1)); | ||
443 | DEFINE(SVCPU_IN_GUEST, offsetof(struct kvmppc_book3s_shadow_vcpu, | ||
444 | in_guest)); | ||
445 | DEFINE(SVCPU_FAULT_DSISR, offsetof(struct kvmppc_book3s_shadow_vcpu, | ||
446 | fault_dsisr)); | ||
447 | DEFINE(SVCPU_FAULT_DAR, offsetof(struct kvmppc_book3s_shadow_vcpu, | ||
448 | fault_dar)); | ||
449 | DEFINE(SVCPU_LAST_INST, offsetof(struct kvmppc_book3s_shadow_vcpu, | ||
450 | last_inst)); | ||
451 | DEFINE(SVCPU_SHADOW_SRR1, offsetof(struct kvmppc_book3s_shadow_vcpu, | ||
452 | shadow_srr1)); | ||
453 | #ifdef CONFIG_PPC_BOOK3S_32 | ||
454 | DEFINE(SVCPU_SR, offsetof(struct kvmppc_book3s_shadow_vcpu, sr)); | ||
455 | #endif | ||
442 | #else | 456 | #else |
443 | DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); | 457 | DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); |
444 | DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); | 458 | DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); |
445 | #endif /* CONFIG_PPC64 */ | 459 | DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); |
460 | DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); | ||
461 | DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); | ||
462 | DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); | ||
463 | DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); | ||
464 | DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); | ||
465 | #endif /* CONFIG_PPC_BOOK3S */ | ||
446 | #endif | 466 | #endif |
447 | #ifdef CONFIG_44x | 467 | #ifdef CONFIG_44x |
448 | DEFINE(PGD_T_LOG2, PGD_T_LOG2); | 468 | DEFINE(PGD_T_LOG2, PGD_T_LOG2); |
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index e025e89fe93e..98c4b29a56f4 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <asm/asm-offsets.h> | 33 | #include <asm/asm-offsets.h> |
34 | #include <asm/ptrace.h> | 34 | #include <asm/ptrace.h> |
35 | #include <asm/bug.h> | 35 | #include <asm/bug.h> |
36 | #include <asm/kvm_book3s_asm.h> | ||
36 | 37 | ||
37 | /* 601 only have IBAT; cr0.eq is set on 601 when using this macro */ | 38 | /* 601 only have IBAT; cr0.eq is set on 601 when using this macro */ |
38 | #define LOAD_BAT(n, reg, RA, RB) \ | 39 | #define LOAD_BAT(n, reg, RA, RB) \ |
@@ -303,6 +304,7 @@ __secondary_hold_acknowledge: | |||
303 | */ | 304 | */ |
304 | #define EXCEPTION(n, label, hdlr, xfer) \ | 305 | #define EXCEPTION(n, label, hdlr, xfer) \ |
305 | . = n; \ | 306 | . = n; \ |
307 | DO_KVM n; \ | ||
306 | label: \ | 308 | label: \ |
307 | EXCEPTION_PROLOG; \ | 309 | EXCEPTION_PROLOG; \ |
308 | addi r3,r1,STACK_FRAME_OVERHEAD; \ | 310 | addi r3,r1,STACK_FRAME_OVERHEAD; \ |
@@ -358,6 +360,7 @@ i##n: \ | |||
358 | * -- paulus. | 360 | * -- paulus. |
359 | */ | 361 | */ |
360 | . = 0x200 | 362 | . = 0x200 |
363 | DO_KVM 0x200 | ||
361 | mtspr SPRN_SPRG_SCRATCH0,r10 | 364 | mtspr SPRN_SPRG_SCRATCH0,r10 |
362 | mtspr SPRN_SPRG_SCRATCH1,r11 | 365 | mtspr SPRN_SPRG_SCRATCH1,r11 |
363 | mfcr r10 | 366 | mfcr r10 |
@@ -381,6 +384,7 @@ i##n: \ | |||
381 | 384 | ||
382 | /* Data access exception. */ | 385 | /* Data access exception. */ |
383 | . = 0x300 | 386 | . = 0x300 |
387 | DO_KVM 0x300 | ||
384 | DataAccess: | 388 | DataAccess: |
385 | EXCEPTION_PROLOG | 389 | EXCEPTION_PROLOG |
386 | mfspr r10,SPRN_DSISR | 390 | mfspr r10,SPRN_DSISR |
@@ -397,6 +401,7 @@ DataAccess: | |||
397 | 401 | ||
398 | /* Instruction access exception. */ | 402 | /* Instruction access exception. */ |
399 | . = 0x400 | 403 | . = 0x400 |
404 | DO_KVM 0x400 | ||
400 | InstructionAccess: | 405 | InstructionAccess: |
401 | EXCEPTION_PROLOG | 406 | EXCEPTION_PROLOG |
402 | andis. r0,r9,0x4000 /* no pte found? */ | 407 | andis. r0,r9,0x4000 /* no pte found? */ |
@@ -413,6 +418,7 @@ InstructionAccess: | |||
413 | 418 | ||
414 | /* Alignment exception */ | 419 | /* Alignment exception */ |
415 | . = 0x600 | 420 | . = 0x600 |
421 | DO_KVM 0x600 | ||
416 | Alignment: | 422 | Alignment: |
417 | EXCEPTION_PROLOG | 423 | EXCEPTION_PROLOG |
418 | mfspr r4,SPRN_DAR | 424 | mfspr r4,SPRN_DAR |
@@ -427,6 +433,7 @@ Alignment: | |||
427 | 433 | ||
428 | /* Floating-point unavailable */ | 434 | /* Floating-point unavailable */ |
429 | . = 0x800 | 435 | . = 0x800 |
436 | DO_KVM 0x800 | ||
430 | FPUnavailable: | 437 | FPUnavailable: |
431 | BEGIN_FTR_SECTION | 438 | BEGIN_FTR_SECTION |
432 | /* | 439 | /* |
@@ -450,6 +457,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) | |||
450 | 457 | ||
451 | /* System call */ | 458 | /* System call */ |
452 | . = 0xc00 | 459 | . = 0xc00 |
460 | DO_KVM 0xc00 | ||
453 | SystemCall: | 461 | SystemCall: |
454 | EXCEPTION_PROLOG | 462 | EXCEPTION_PROLOG |
455 | EXC_XFER_EE_LITE(0xc00, DoSyscall) | 463 | EXC_XFER_EE_LITE(0xc00, DoSyscall) |
@@ -467,9 +475,11 @@ SystemCall: | |||
467 | * by executing an altivec instruction. | 475 | * by executing an altivec instruction. |
468 | */ | 476 | */ |
469 | . = 0xf00 | 477 | . = 0xf00 |
478 | DO_KVM 0xf00 | ||
470 | b PerformanceMonitor | 479 | b PerformanceMonitor |
471 | 480 | ||
472 | . = 0xf20 | 481 | . = 0xf20 |
482 | DO_KVM 0xf20 | ||
473 | b AltiVecUnavailable | 483 | b AltiVecUnavailable |
474 | 484 | ||
475 | /* | 485 | /* |
@@ -882,6 +892,10 @@ __secondary_start: | |||
882 | RFI | 892 | RFI |
883 | #endif /* CONFIG_SMP */ | 893 | #endif /* CONFIG_SMP */ |
884 | 894 | ||
895 | #ifdef CONFIG_KVM_BOOK3S_HANDLER | ||
896 | #include "../kvm/book3s_rmhandlers.S" | ||
897 | #endif | ||
898 | |||
885 | /* | 899 | /* |
886 | * Those generic dummy functions are kept for CPUs not | 900 | * Those generic dummy functions are kept for CPUs not |
887 | * included in CONFIG_6xx | 901 | * included in CONFIG_6xx |
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index bed9a29ee383..844a44b64472 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S | |||
@@ -37,7 +37,7 @@ | |||
37 | #include <asm/firmware.h> | 37 | #include <asm/firmware.h> |
38 | #include <asm/page_64.h> | 38 | #include <asm/page_64.h> |
39 | #include <asm/irqflags.h> | 39 | #include <asm/irqflags.h> |
40 | #include <asm/kvm_book3s_64_asm.h> | 40 | #include <asm/kvm_book3s_asm.h> |
41 | 41 | ||
42 | /* The physical memory is layed out such that the secondary processor | 42 | /* The physical memory is layed out such that the secondary processor |
43 | * spin code sits at 0x0000...0x00ff. On server, the vectors follow | 43 | * spin code sits at 0x0000...0x00ff. On server, the vectors follow |
@@ -169,7 +169,7 @@ exception_marker: | |||
169 | /* KVM trampoline code needs to be close to the interrupt handlers */ | 169 | /* KVM trampoline code needs to be close to the interrupt handlers */ |
170 | 170 | ||
171 | #ifdef CONFIG_KVM_BOOK3S_64_HANDLER | 171 | #ifdef CONFIG_KVM_BOOK3S_64_HANDLER |
172 | #include "../kvm/book3s_64_rmhandlers.S" | 172 | #include "../kvm/book3s_rmhandlers.S" |
173 | #endif | 173 | #endif |
174 | 174 | ||
175 | _GLOBAL(generic_secondary_thread_init) | 175 | _GLOBAL(generic_secondary_thread_init) |
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index ab3e392ac63c..bc9f39d2598b 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c | |||
@@ -101,6 +101,10 @@ EXPORT_SYMBOL(pci_dram_offset); | |||
101 | EXPORT_SYMBOL(start_thread); | 101 | EXPORT_SYMBOL(start_thread); |
102 | EXPORT_SYMBOL(kernel_thread); | 102 | EXPORT_SYMBOL(kernel_thread); |
103 | 103 | ||
104 | #ifndef CONFIG_BOOKE | ||
105 | EXPORT_SYMBOL_GPL(cvt_df); | ||
106 | EXPORT_SYMBOL_GPL(cvt_fd); | ||
107 | #endif | ||
104 | EXPORT_SYMBOL(giveup_fpu); | 108 | EXPORT_SYMBOL(giveup_fpu); |
105 | #ifdef CONFIG_ALTIVEC | 109 | #ifdef CONFIG_ALTIVEC |
106 | EXPORT_SYMBOL(giveup_altivec); | 110 | EXPORT_SYMBOL(giveup_altivec); |
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index 689a57c2ac80..73c0a3f64ed1 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c | |||
@@ -147,7 +147,7 @@ static int __init kvmppc_44x_init(void) | |||
147 | if (r) | 147 | if (r) |
148 | return r; | 148 | return r; |
149 | 149 | ||
150 | return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), THIS_MODULE); | 150 | return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE); |
151 | } | 151 | } |
152 | 152 | ||
153 | static void __exit kvmppc_44x_exit(void) | 153 | static void __exit kvmppc_44x_exit(void) |
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 60624cc9f4d4..b7baff78f90c 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig | |||
@@ -22,12 +22,34 @@ config KVM | |||
22 | select ANON_INODES | 22 | select ANON_INODES |
23 | select KVM_MMIO | 23 | select KVM_MMIO |
24 | 24 | ||
25 | config KVM_BOOK3S_HANDLER | ||
26 | bool | ||
27 | |||
28 | config KVM_BOOK3S_32_HANDLER | ||
29 | bool | ||
30 | select KVM_BOOK3S_HANDLER | ||
31 | |||
25 | config KVM_BOOK3S_64_HANDLER | 32 | config KVM_BOOK3S_64_HANDLER |
26 | bool | 33 | bool |
34 | select KVM_BOOK3S_HANDLER | ||
35 | |||
36 | config KVM_BOOK3S_32 | ||
37 | tristate "KVM support for PowerPC book3s_32 processors" | ||
38 | depends on EXPERIMENTAL && PPC_BOOK3S_32 && !SMP && !PTE_64BIT | ||
39 | select KVM | ||
40 | select KVM_BOOK3S_32_HANDLER | ||
41 | ---help--- | ||
42 | Support running unmodified book3s_32 guest kernels | ||
43 | in virtual machines on book3s_32 host processors. | ||
44 | |||
45 | This module provides access to the hardware capabilities through | ||
46 | a character device node named /dev/kvm. | ||
47 | |||
48 | If unsure, say N. | ||
27 | 49 | ||
28 | config KVM_BOOK3S_64 | 50 | config KVM_BOOK3S_64 |
29 | tristate "KVM support for PowerPC book3s_64 processors" | 51 | tristate "KVM support for PowerPC book3s_64 processors" |
30 | depends on EXPERIMENTAL && PPC64 | 52 | depends on EXPERIMENTAL && PPC_BOOK3S_64 |
31 | select KVM | 53 | select KVM |
32 | select KVM_BOOK3S_64_HANDLER | 54 | select KVM_BOOK3S_64_HANDLER |
33 | ---help--- | 55 | ---help--- |
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 56484d652377..ff436066bf77 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile | |||
@@ -14,7 +14,7 @@ CFLAGS_emulate.o := -I. | |||
14 | 14 | ||
15 | common-objs-y += powerpc.o emulate.o | 15 | common-objs-y += powerpc.o emulate.o |
16 | obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o | 16 | obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o |
17 | obj-$(CONFIG_KVM_BOOK3S_64_HANDLER) += book3s_64_exports.o | 17 | obj-$(CONFIG_KVM_BOOK3S_HANDLER) += book3s_exports.o |
18 | 18 | ||
19 | AFLAGS_booke_interrupts.o := -I$(obj) | 19 | AFLAGS_booke_interrupts.o := -I$(obj) |
20 | 20 | ||
@@ -40,17 +40,31 @@ kvm-objs-$(CONFIG_KVM_E500) := $(kvm-e500-objs) | |||
40 | 40 | ||
41 | kvm-book3s_64-objs := \ | 41 | kvm-book3s_64-objs := \ |
42 | $(common-objs-y) \ | 42 | $(common-objs-y) \ |
43 | fpu.o \ | ||
44 | book3s_paired_singles.o \ | ||
43 | book3s.o \ | 45 | book3s.o \ |
44 | book3s_64_emulate.o \ | 46 | book3s_emulate.o \ |
45 | book3s_64_interrupts.o \ | 47 | book3s_interrupts.o \ |
46 | book3s_64_mmu_host.o \ | 48 | book3s_64_mmu_host.o \ |
47 | book3s_64_mmu.o \ | 49 | book3s_64_mmu.o \ |
48 | book3s_32_mmu.o | 50 | book3s_32_mmu.o |
49 | kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-objs) | 51 | kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-objs) |
50 | 52 | ||
53 | kvm-book3s_32-objs := \ | ||
54 | $(common-objs-y) \ | ||
55 | fpu.o \ | ||
56 | book3s_paired_singles.o \ | ||
57 | book3s.o \ | ||
58 | book3s_emulate.o \ | ||
59 | book3s_interrupts.o \ | ||
60 | book3s_32_mmu_host.o \ | ||
61 | book3s_32_mmu.o | ||
62 | kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs) | ||
63 | |||
51 | kvm-objs := $(kvm-objs-m) $(kvm-objs-y) | 64 | kvm-objs := $(kvm-objs-m) $(kvm-objs-y) |
52 | 65 | ||
53 | obj-$(CONFIG_KVM_440) += kvm.o | 66 | obj-$(CONFIG_KVM_440) += kvm.o |
54 | obj-$(CONFIG_KVM_E500) += kvm.o | 67 | obj-$(CONFIG_KVM_E500) += kvm.o |
55 | obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o | 68 | obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o |
69 | obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o | ||
56 | 70 | ||
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 604af29b71ed..b998abf1a63d 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c | |||
@@ -16,6 +16,7 @@ | |||
16 | 16 | ||
17 | #include <linux/kvm_host.h> | 17 | #include <linux/kvm_host.h> |
18 | #include <linux/err.h> | 18 | #include <linux/err.h> |
19 | #include <linux/slab.h> | ||
19 | 20 | ||
20 | #include <asm/reg.h> | 21 | #include <asm/reg.h> |
21 | #include <asm/cputable.h> | 22 | #include <asm/cputable.h> |
@@ -29,6 +30,7 @@ | |||
29 | #include <linux/gfp.h> | 30 | #include <linux/gfp.h> |
30 | #include <linux/sched.h> | 31 | #include <linux/sched.h> |
31 | #include <linux/vmalloc.h> | 32 | #include <linux/vmalloc.h> |
33 | #include <linux/highmem.h> | ||
32 | 34 | ||
33 | #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU | 35 | #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU |
34 | 36 | ||
@@ -36,7 +38,15 @@ | |||
36 | /* #define EXIT_DEBUG_SIMPLE */ | 38 | /* #define EXIT_DEBUG_SIMPLE */ |
37 | /* #define DEBUG_EXT */ | 39 | /* #define DEBUG_EXT */ |
38 | 40 | ||
39 | static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); | 41 | static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, |
42 | ulong msr); | ||
43 | |||
44 | /* Some compatibility defines */ | ||
45 | #ifdef CONFIG_PPC_BOOK3S_32 | ||
46 | #define MSR_USER32 MSR_USER | ||
47 | #define MSR_USER64 MSR_USER | ||
48 | #define HW_PAGE_SIZE PAGE_SIZE | ||
49 | #endif | ||
40 | 50 | ||
41 | struct kvm_stats_debugfs_item debugfs_entries[] = { | 51 | struct kvm_stats_debugfs_item debugfs_entries[] = { |
42 | { "exits", VCPU_STAT(sum_exits) }, | 52 | { "exits", VCPU_STAT(sum_exits) }, |
@@ -69,18 +79,26 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) | |||
69 | 79 | ||
70 | void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 80 | void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
71 | { | 81 | { |
72 | memcpy(get_paca()->kvm_slb, to_book3s(vcpu)->slb_shadow, sizeof(get_paca()->kvm_slb)); | 82 | #ifdef CONFIG_PPC_BOOK3S_64 |
73 | memcpy(&get_paca()->shadow_vcpu, &to_book3s(vcpu)->shadow_vcpu, | 83 | memcpy(to_svcpu(vcpu)->slb, to_book3s(vcpu)->slb_shadow, sizeof(to_svcpu(vcpu)->slb)); |
84 | memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu, | ||
74 | sizeof(get_paca()->shadow_vcpu)); | 85 | sizeof(get_paca()->shadow_vcpu)); |
75 | get_paca()->kvm_slb_max = to_book3s(vcpu)->slb_shadow_max; | 86 | to_svcpu(vcpu)->slb_max = to_book3s(vcpu)->slb_shadow_max; |
87 | #endif | ||
88 | |||
89 | #ifdef CONFIG_PPC_BOOK3S_32 | ||
90 | current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu; | ||
91 | #endif | ||
76 | } | 92 | } |
77 | 93 | ||
78 | void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) | 94 | void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) |
79 | { | 95 | { |
80 | memcpy(to_book3s(vcpu)->slb_shadow, get_paca()->kvm_slb, sizeof(get_paca()->kvm_slb)); | 96 | #ifdef CONFIG_PPC_BOOK3S_64 |
81 | memcpy(&to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu, | 97 | memcpy(to_book3s(vcpu)->slb_shadow, to_svcpu(vcpu)->slb, sizeof(to_svcpu(vcpu)->slb)); |
98 | memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu, | ||
82 | sizeof(get_paca()->shadow_vcpu)); | 99 | sizeof(get_paca()->shadow_vcpu)); |
83 | to_book3s(vcpu)->slb_shadow_max = get_paca()->kvm_slb_max; | 100 | to_book3s(vcpu)->slb_shadow_max = to_svcpu(vcpu)->slb_max; |
101 | #endif | ||
84 | 102 | ||
85 | kvmppc_giveup_ext(vcpu, MSR_FP); | 103 | kvmppc_giveup_ext(vcpu, MSR_FP); |
86 | kvmppc_giveup_ext(vcpu, MSR_VEC); | 104 | kvmppc_giveup_ext(vcpu, MSR_VEC); |
@@ -131,18 +149,22 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) | |||
131 | } | 149 | } |
132 | } | 150 | } |
133 | 151 | ||
134 | if (((vcpu->arch.msr & (MSR_IR|MSR_DR)) != (old_msr & (MSR_IR|MSR_DR))) || | 152 | if ((vcpu->arch.msr & (MSR_PR|MSR_IR|MSR_DR)) != |
135 | (vcpu->arch.msr & MSR_PR) != (old_msr & MSR_PR)) { | 153 | (old_msr & (MSR_PR|MSR_IR|MSR_DR))) { |
136 | kvmppc_mmu_flush_segments(vcpu); | 154 | kvmppc_mmu_flush_segments(vcpu); |
137 | kvmppc_mmu_map_segment(vcpu, vcpu->arch.pc); | 155 | kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); |
138 | } | 156 | } |
157 | |||
158 | /* Preload FPU if it's enabled */ | ||
159 | if (vcpu->arch.msr & MSR_FP) | ||
160 | kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); | ||
139 | } | 161 | } |
140 | 162 | ||
141 | void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) | 163 | void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) |
142 | { | 164 | { |
143 | vcpu->arch.srr0 = vcpu->arch.pc; | 165 | vcpu->arch.srr0 = kvmppc_get_pc(vcpu); |
144 | vcpu->arch.srr1 = vcpu->arch.msr | flags; | 166 | vcpu->arch.srr1 = vcpu->arch.msr | flags; |
145 | vcpu->arch.pc = to_book3s(vcpu)->hior + vec; | 167 | kvmppc_set_pc(vcpu, to_book3s(vcpu)->hior + vec); |
146 | vcpu->arch.mmu.reset_msr(vcpu); | 168 | vcpu->arch.mmu.reset_msr(vcpu); |
147 | } | 169 | } |
148 | 170 | ||
@@ -218,6 +240,12 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, | |||
218 | kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); | 240 | kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); |
219 | } | 241 | } |
220 | 242 | ||
243 | void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, | ||
244 | struct kvm_interrupt *irq) | ||
245 | { | ||
246 | kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); | ||
247 | } | ||
248 | |||
221 | int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) | 249 | int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) |
222 | { | 250 | { |
223 | int deliver = 1; | 251 | int deliver = 1; |
@@ -302,7 +330,7 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) | |||
302 | printk(KERN_EMERG "KVM: Check pending: %lx\n", vcpu->arch.pending_exceptions); | 330 | printk(KERN_EMERG "KVM: Check pending: %lx\n", vcpu->arch.pending_exceptions); |
303 | #endif | 331 | #endif |
304 | priority = __ffs(*pending); | 332 | priority = __ffs(*pending); |
305 | while (priority <= (sizeof(unsigned int) * 8)) { | 333 | while (priority < BOOK3S_IRQPRIO_MAX) { |
306 | if (kvmppc_book3s_irqprio_deliver(vcpu, priority) && | 334 | if (kvmppc_book3s_irqprio_deliver(vcpu, priority) && |
307 | (priority != BOOK3S_IRQPRIO_DECREMENTER)) { | 335 | (priority != BOOK3S_IRQPRIO_DECREMENTER)) { |
308 | /* DEC interrupts get cleared by mtdec */ | 336 | /* DEC interrupts get cleared by mtdec */ |
@@ -318,13 +346,18 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) | |||
318 | 346 | ||
319 | void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) | 347 | void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) |
320 | { | 348 | { |
349 | u32 host_pvr; | ||
350 | |||
321 | vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB; | 351 | vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB; |
322 | vcpu->arch.pvr = pvr; | 352 | vcpu->arch.pvr = pvr; |
353 | #ifdef CONFIG_PPC_BOOK3S_64 | ||
323 | if ((pvr >= 0x330000) && (pvr < 0x70330000)) { | 354 | if ((pvr >= 0x330000) && (pvr < 0x70330000)) { |
324 | kvmppc_mmu_book3s_64_init(vcpu); | 355 | kvmppc_mmu_book3s_64_init(vcpu); |
325 | to_book3s(vcpu)->hior = 0xfff00000; | 356 | to_book3s(vcpu)->hior = 0xfff00000; |
326 | to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL; | 357 | to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL; |
327 | } else { | 358 | } else |
359 | #endif | ||
360 | { | ||
328 | kvmppc_mmu_book3s_32_init(vcpu); | 361 | kvmppc_mmu_book3s_32_init(vcpu); |
329 | to_book3s(vcpu)->hior = 0; | 362 | to_book3s(vcpu)->hior = 0; |
330 | to_book3s(vcpu)->msr_mask = 0xffffffffULL; | 363 | to_book3s(vcpu)->msr_mask = 0xffffffffULL; |
@@ -337,6 +370,32 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) | |||
337 | !strcmp(cur_cpu_spec->platform, "ppc970")) | 370 | !strcmp(cur_cpu_spec->platform, "ppc970")) |
338 | vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; | 371 | vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; |
339 | 372 | ||
373 | /* Cell performs badly if MSR_FEx are set. So let's hope nobody | ||
374 | really needs them in a VM on Cell and force disable them. */ | ||
375 | if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be")) | ||
376 | to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1); | ||
377 | |||
378 | #ifdef CONFIG_PPC_BOOK3S_32 | ||
379 | /* 32 bit Book3S always has 32 byte dcbz */ | ||
380 | vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; | ||
381 | #endif | ||
382 | |||
383 | /* On some CPUs we can execute paired single operations natively */ | ||
384 | asm ( "mfpvr %0" : "=r"(host_pvr)); | ||
385 | switch (host_pvr) { | ||
386 | case 0x00080200: /* lonestar 2.0 */ | ||
387 | case 0x00088202: /* lonestar 2.2 */ | ||
388 | case 0x70000100: /* gekko 1.0 */ | ||
389 | case 0x00080100: /* gekko 2.0 */ | ||
390 | case 0x00083203: /* gekko 2.3a */ | ||
391 | case 0x00083213: /* gekko 2.3b */ | ||
392 | case 0x00083204: /* gekko 2.4 */ | ||
393 | case 0x00083214: /* gekko 2.4e (8SE) - retail HW2 */ | ||
394 | case 0x00087200: /* broadway */ | ||
395 | vcpu->arch.hflags |= BOOK3S_HFLAG_NATIVE_PS; | ||
396 | /* Enable HID2.PSE - in case we need it later */ | ||
397 | mtspr(SPRN_HID2_GEKKO, mfspr(SPRN_HID2_GEKKO) | (1 << 29)); | ||
398 | } | ||
340 | } | 399 | } |
341 | 400 | ||
342 | /* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To | 401 | /* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To |
@@ -350,34 +409,29 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) | |||
350 | */ | 409 | */ |
351 | static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) | 410 | static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) |
352 | { | 411 | { |
353 | bool touched = false; | 412 | struct page *hpage; |
354 | hva_t hpage; | 413 | u64 hpage_offset; |
355 | u32 *page; | 414 | u32 *page; |
356 | int i; | 415 | int i; |
357 | 416 | ||
358 | hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT); | 417 | hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT); |
359 | if (kvm_is_error_hva(hpage)) | 418 | if (is_error_page(hpage)) |
360 | return; | 419 | return; |
361 | 420 | ||
362 | hpage |= pte->raddr & ~PAGE_MASK; | 421 | hpage_offset = pte->raddr & ~PAGE_MASK; |
363 | hpage &= ~0xFFFULL; | 422 | hpage_offset &= ~0xFFFULL; |
364 | 423 | hpage_offset /= 4; | |
365 | page = vmalloc(HW_PAGE_SIZE); | ||
366 | |||
367 | if (copy_from_user(page, (void __user *)hpage, HW_PAGE_SIZE)) | ||
368 | goto out; | ||
369 | 424 | ||
370 | for (i=0; i < HW_PAGE_SIZE / 4; i++) | 425 | get_page(hpage); |
371 | if ((page[i] & 0xff0007ff) == INS_DCBZ) { | 426 | page = kmap_atomic(hpage, KM_USER0); |
372 | page[i] &= 0xfffffff7; // reserved instruction, so we trap | ||
373 | touched = true; | ||
374 | } | ||
375 | 427 | ||
376 | if (touched) | 428 | /* patch dcbz into reserved instruction, so we trap */ |
377 | copy_to_user((void __user *)hpage, page, HW_PAGE_SIZE); | 429 | for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++) |
430 | if ((page[i] & 0xff0007ff) == INS_DCBZ) | ||
431 | page[i] &= 0xfffffff7; | ||
378 | 432 | ||
379 | out: | 433 | kunmap_atomic(page, KM_USER0); |
380 | vfree(page); | 434 | put_page(hpage); |
381 | } | 435 | } |
382 | 436 | ||
383 | static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, | 437 | static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, |
@@ -391,15 +445,7 @@ static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, | |||
391 | } else { | 445 | } else { |
392 | pte->eaddr = eaddr; | 446 | pte->eaddr = eaddr; |
393 | pte->raddr = eaddr & 0xffffffff; | 447 | pte->raddr = eaddr & 0xffffffff; |
394 | pte->vpage = eaddr >> 12; | 448 | pte->vpage = VSID_REAL | eaddr >> 12; |
395 | switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { | ||
396 | case 0: | ||
397 | pte->vpage |= VSID_REAL; | ||
398 | case MSR_DR: | ||
399 | pte->vpage |= VSID_REAL_DR; | ||
400 | case MSR_IR: | ||
401 | pte->vpage |= VSID_REAL_IR; | ||
402 | } | ||
403 | pte->may_read = true; | 449 | pte->may_read = true; |
404 | pte->may_write = true; | 450 | pte->may_write = true; |
405 | pte->may_execute = true; | 451 | pte->may_execute = true; |
@@ -434,55 +480,55 @@ err: | |||
434 | return kvmppc_bad_hva(); | 480 | return kvmppc_bad_hva(); |
435 | } | 481 | } |
436 | 482 | ||
437 | int kvmppc_st(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr) | 483 | int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, |
484 | bool data) | ||
438 | { | 485 | { |
439 | struct kvmppc_pte pte; | 486 | struct kvmppc_pte pte; |
440 | hva_t hva = eaddr; | ||
441 | 487 | ||
442 | vcpu->stat.st++; | 488 | vcpu->stat.st++; |
443 | 489 | ||
444 | if (kvmppc_xlate(vcpu, eaddr, false, &pte)) | 490 | if (kvmppc_xlate(vcpu, *eaddr, data, &pte)) |
445 | goto err; | 491 | return -ENOENT; |
446 | 492 | ||
447 | hva = kvmppc_pte_to_hva(vcpu, &pte, false); | 493 | *eaddr = pte.raddr; |
448 | if (kvm_is_error_hva(hva)) | ||
449 | goto err; | ||
450 | 494 | ||
451 | if (copy_to_user((void __user *)hva, ptr, size)) { | 495 | if (!pte.may_write) |
452 | printk(KERN_INFO "kvmppc_st at 0x%lx failed\n", hva); | 496 | return -EPERM; |
453 | goto err; | ||
454 | } | ||
455 | 497 | ||
456 | return 0; | 498 | if (kvm_write_guest(vcpu->kvm, pte.raddr, ptr, size)) |
499 | return EMULATE_DO_MMIO; | ||
457 | 500 | ||
458 | err: | 501 | return EMULATE_DONE; |
459 | return -ENOENT; | ||
460 | } | 502 | } |
461 | 503 | ||
462 | int kvmppc_ld(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr, | 504 | int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, |
463 | bool data) | 505 | bool data) |
464 | { | 506 | { |
465 | struct kvmppc_pte pte; | 507 | struct kvmppc_pte pte; |
466 | hva_t hva = eaddr; | 508 | hva_t hva = *eaddr; |
467 | 509 | ||
468 | vcpu->stat.ld++; | 510 | vcpu->stat.ld++; |
469 | 511 | ||
470 | if (kvmppc_xlate(vcpu, eaddr, data, &pte)) | 512 | if (kvmppc_xlate(vcpu, *eaddr, data, &pte)) |
471 | goto err; | 513 | goto nopte; |
514 | |||
515 | *eaddr = pte.raddr; | ||
472 | 516 | ||
473 | hva = kvmppc_pte_to_hva(vcpu, &pte, true); | 517 | hva = kvmppc_pte_to_hva(vcpu, &pte, true); |
474 | if (kvm_is_error_hva(hva)) | 518 | if (kvm_is_error_hva(hva)) |
475 | goto err; | 519 | goto mmio; |
476 | 520 | ||
477 | if (copy_from_user(ptr, (void __user *)hva, size)) { | 521 | if (copy_from_user(ptr, (void __user *)hva, size)) { |
478 | printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva); | 522 | printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva); |
479 | goto err; | 523 | goto mmio; |
480 | } | 524 | } |
481 | 525 | ||
482 | return 0; | 526 | return EMULATE_DONE; |
483 | 527 | ||
484 | err: | 528 | nopte: |
485 | return -ENOENT; | 529 | return -ENOENT; |
530 | mmio: | ||
531 | return EMULATE_DO_MMIO; | ||
486 | } | 532 | } |
487 | 533 | ||
488 | static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) | 534 | static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) |
@@ -499,12 +545,11 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
499 | int page_found = 0; | 545 | int page_found = 0; |
500 | struct kvmppc_pte pte; | 546 | struct kvmppc_pte pte; |
501 | bool is_mmio = false; | 547 | bool is_mmio = false; |
548 | bool dr = (vcpu->arch.msr & MSR_DR) ? true : false; | ||
549 | bool ir = (vcpu->arch.msr & MSR_IR) ? true : false; | ||
550 | u64 vsid; | ||
502 | 551 | ||
503 | if ( vec == BOOK3S_INTERRUPT_DATA_STORAGE ) { | 552 | relocated = data ? dr : ir; |
504 | relocated = (vcpu->arch.msr & MSR_DR); | ||
505 | } else { | ||
506 | relocated = (vcpu->arch.msr & MSR_IR); | ||
507 | } | ||
508 | 553 | ||
509 | /* Resolve real address if translation turned on */ | 554 | /* Resolve real address if translation turned on */ |
510 | if (relocated) { | 555 | if (relocated) { |
@@ -516,14 +561,25 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
516 | pte.raddr = eaddr & 0xffffffff; | 561 | pte.raddr = eaddr & 0xffffffff; |
517 | pte.eaddr = eaddr; | 562 | pte.eaddr = eaddr; |
518 | pte.vpage = eaddr >> 12; | 563 | pte.vpage = eaddr >> 12; |
519 | switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { | 564 | } |
520 | case 0: | 565 | |
521 | pte.vpage |= VSID_REAL; | 566 | switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { |
522 | case MSR_DR: | 567 | case 0: |
523 | pte.vpage |= VSID_REAL_DR; | 568 | pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12)); |
524 | case MSR_IR: | 569 | break; |
525 | pte.vpage |= VSID_REAL_IR; | 570 | case MSR_DR: |
526 | } | 571 | case MSR_IR: |
572 | vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid); | ||
573 | |||
574 | if ((vcpu->arch.msr & (MSR_DR|MSR_IR)) == MSR_DR) | ||
575 | pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12)); | ||
576 | else | ||
577 | pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12)); | ||
578 | pte.vpage |= vsid; | ||
579 | |||
580 | if (vsid == -1) | ||
581 | page_found = -EINVAL; | ||
582 | break; | ||
527 | } | 583 | } |
528 | 584 | ||
529 | if (vcpu->arch.mmu.is_dcbz32(vcpu) && | 585 | if (vcpu->arch.mmu.is_dcbz32(vcpu) && |
@@ -538,20 +594,20 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
538 | 594 | ||
539 | if (page_found == -ENOENT) { | 595 | if (page_found == -ENOENT) { |
540 | /* Page not found in guest PTE entries */ | 596 | /* Page not found in guest PTE entries */ |
541 | vcpu->arch.dear = vcpu->arch.fault_dear; | 597 | vcpu->arch.dear = kvmppc_get_fault_dar(vcpu); |
542 | to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr; | 598 | to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr; |
543 | vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL); | 599 | vcpu->arch.msr |= (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL); |
544 | kvmppc_book3s_queue_irqprio(vcpu, vec); | 600 | kvmppc_book3s_queue_irqprio(vcpu, vec); |
545 | } else if (page_found == -EPERM) { | 601 | } else if (page_found == -EPERM) { |
546 | /* Storage protection */ | 602 | /* Storage protection */ |
547 | vcpu->arch.dear = vcpu->arch.fault_dear; | 603 | vcpu->arch.dear = kvmppc_get_fault_dar(vcpu); |
548 | to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE; | 604 | to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE; |
549 | to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT; | 605 | to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT; |
550 | vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL); | 606 | vcpu->arch.msr |= (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL); |
551 | kvmppc_book3s_queue_irqprio(vcpu, vec); | 607 | kvmppc_book3s_queue_irqprio(vcpu, vec); |
552 | } else if (page_found == -EINVAL) { | 608 | } else if (page_found == -EINVAL) { |
553 | /* Page not found in guest SLB */ | 609 | /* Page not found in guest SLB */ |
554 | vcpu->arch.dear = vcpu->arch.fault_dear; | 610 | vcpu->arch.dear = kvmppc_get_fault_dar(vcpu); |
555 | kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80); | 611 | kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80); |
556 | } else if (!is_mmio && | 612 | } else if (!is_mmio && |
557 | kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) { | 613 | kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) { |
@@ -583,11 +639,13 @@ static inline int get_fpr_index(int i) | |||
583 | } | 639 | } |
584 | 640 | ||
585 | /* Give up external provider (FPU, Altivec, VSX) */ | 641 | /* Give up external provider (FPU, Altivec, VSX) */ |
586 | static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) | 642 | void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) |
587 | { | 643 | { |
588 | struct thread_struct *t = ¤t->thread; | 644 | struct thread_struct *t = ¤t->thread; |
589 | u64 *vcpu_fpr = vcpu->arch.fpr; | 645 | u64 *vcpu_fpr = vcpu->arch.fpr; |
646 | #ifdef CONFIG_VSX | ||
590 | u64 *vcpu_vsx = vcpu->arch.vsr; | 647 | u64 *vcpu_vsx = vcpu->arch.vsr; |
648 | #endif | ||
591 | u64 *thread_fpr = (u64*)t->fpr; | 649 | u64 *thread_fpr = (u64*)t->fpr; |
592 | int i; | 650 | int i; |
593 | 651 | ||
@@ -629,21 +687,65 @@ static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) | |||
629 | kvmppc_recalc_shadow_msr(vcpu); | 687 | kvmppc_recalc_shadow_msr(vcpu); |
630 | } | 688 | } |
631 | 689 | ||
690 | static int kvmppc_read_inst(struct kvm_vcpu *vcpu) | ||
691 | { | ||
692 | ulong srr0 = kvmppc_get_pc(vcpu); | ||
693 | u32 last_inst = kvmppc_get_last_inst(vcpu); | ||
694 | int ret; | ||
695 | |||
696 | ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false); | ||
697 | if (ret == -ENOENT) { | ||
698 | vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 33, 33, 1); | ||
699 | vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 34, 36, 0); | ||
700 | vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 42, 47, 0); | ||
701 | kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE); | ||
702 | return EMULATE_AGAIN; | ||
703 | } | ||
704 | |||
705 | return EMULATE_DONE; | ||
706 | } | ||
707 | |||
708 | static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr) | ||
709 | { | ||
710 | |||
711 | /* Need to do paired single emulation? */ | ||
712 | if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)) | ||
713 | return EMULATE_DONE; | ||
714 | |||
715 | /* Read out the instruction */ | ||
716 | if (kvmppc_read_inst(vcpu) == EMULATE_DONE) | ||
717 | /* Need to emulate */ | ||
718 | return EMULATE_FAIL; | ||
719 | |||
720 | return EMULATE_AGAIN; | ||
721 | } | ||
722 | |||
632 | /* Handle external providers (FPU, Altivec, VSX) */ | 723 | /* Handle external providers (FPU, Altivec, VSX) */ |
633 | static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, | 724 | static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, |
634 | ulong msr) | 725 | ulong msr) |
635 | { | 726 | { |
636 | struct thread_struct *t = ¤t->thread; | 727 | struct thread_struct *t = ¤t->thread; |
637 | u64 *vcpu_fpr = vcpu->arch.fpr; | 728 | u64 *vcpu_fpr = vcpu->arch.fpr; |
729 | #ifdef CONFIG_VSX | ||
638 | u64 *vcpu_vsx = vcpu->arch.vsr; | 730 | u64 *vcpu_vsx = vcpu->arch.vsr; |
731 | #endif | ||
639 | u64 *thread_fpr = (u64*)t->fpr; | 732 | u64 *thread_fpr = (u64*)t->fpr; |
640 | int i; | 733 | int i; |
641 | 734 | ||
735 | /* When we have paired singles, we emulate in software */ | ||
736 | if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE) | ||
737 | return RESUME_GUEST; | ||
738 | |||
642 | if (!(vcpu->arch.msr & msr)) { | 739 | if (!(vcpu->arch.msr & msr)) { |
643 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); | 740 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); |
644 | return RESUME_GUEST; | 741 | return RESUME_GUEST; |
645 | } | 742 | } |
646 | 743 | ||
744 | /* We already own the ext */ | ||
745 | if (vcpu->arch.guest_owned_ext & msr) { | ||
746 | return RESUME_GUEST; | ||
747 | } | ||
748 | |||
647 | #ifdef DEBUG_EXT | 749 | #ifdef DEBUG_EXT |
648 | printk(KERN_INFO "Loading up ext 0x%lx\n", msr); | 750 | printk(KERN_INFO "Loading up ext 0x%lx\n", msr); |
649 | #endif | 751 | #endif |
@@ -696,21 +798,33 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
696 | run->ready_for_interrupt_injection = 1; | 798 | run->ready_for_interrupt_injection = 1; |
697 | #ifdef EXIT_DEBUG | 799 | #ifdef EXIT_DEBUG |
698 | printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | dec=0x%x | msr=0x%lx\n", | 800 | printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | dec=0x%x | msr=0x%lx\n", |
699 | exit_nr, vcpu->arch.pc, vcpu->arch.fault_dear, | 801 | exit_nr, kvmppc_get_pc(vcpu), kvmppc_get_fault_dar(vcpu), |
700 | kvmppc_get_dec(vcpu), vcpu->arch.msr); | 802 | kvmppc_get_dec(vcpu), to_svcpu(vcpu)->shadow_srr1); |
701 | #elif defined (EXIT_DEBUG_SIMPLE) | 803 | #elif defined (EXIT_DEBUG_SIMPLE) |
702 | if ((exit_nr != 0x900) && (exit_nr != 0x500)) | 804 | if ((exit_nr != 0x900) && (exit_nr != 0x500)) |
703 | printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | msr=0x%lx\n", | 805 | printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | msr=0x%lx\n", |
704 | exit_nr, vcpu->arch.pc, vcpu->arch.fault_dear, | 806 | exit_nr, kvmppc_get_pc(vcpu), kvmppc_get_fault_dar(vcpu), |
705 | vcpu->arch.msr); | 807 | vcpu->arch.msr); |
706 | #endif | 808 | #endif |
707 | kvm_resched(vcpu); | 809 | kvm_resched(vcpu); |
708 | switch (exit_nr) { | 810 | switch (exit_nr) { |
709 | case BOOK3S_INTERRUPT_INST_STORAGE: | 811 | case BOOK3S_INTERRUPT_INST_STORAGE: |
710 | vcpu->stat.pf_instruc++; | 812 | vcpu->stat.pf_instruc++; |
813 | |||
814 | #ifdef CONFIG_PPC_BOOK3S_32 | ||
815 | /* We set segments as unused segments when invalidating them. So | ||
816 | * treat the respective fault as segment fault. */ | ||
817 | if (to_svcpu(vcpu)->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT] | ||
818 | == SR_INVALID) { | ||
819 | kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); | ||
820 | r = RESUME_GUEST; | ||
821 | break; | ||
822 | } | ||
823 | #endif | ||
824 | |||
711 | /* only care about PTEG not found errors, but leave NX alone */ | 825 | /* only care about PTEG not found errors, but leave NX alone */ |
712 | if (vcpu->arch.shadow_srr1 & 0x40000000) { | 826 | if (to_svcpu(vcpu)->shadow_srr1 & 0x40000000) { |
713 | r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.pc, exit_nr); | 827 | r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr); |
714 | vcpu->stat.sp_instruc++; | 828 | vcpu->stat.sp_instruc++; |
715 | } else if (vcpu->arch.mmu.is_dcbz32(vcpu) && | 829 | } else if (vcpu->arch.mmu.is_dcbz32(vcpu) && |
716 | (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) { | 830 | (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) { |
@@ -719,37 +833,52 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
719 | * so we can't use the NX bit inside the guest. Let's cross our fingers, | 833 | * so we can't use the NX bit inside the guest. Let's cross our fingers, |
720 | * that no guest that needs the dcbz hack does NX. | 834 | * that no guest that needs the dcbz hack does NX. |
721 | */ | 835 | */ |
722 | kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL); | 836 | kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL); |
837 | r = RESUME_GUEST; | ||
723 | } else { | 838 | } else { |
724 | vcpu->arch.msr |= vcpu->arch.shadow_srr1 & 0x58000000; | 839 | vcpu->arch.msr |= to_svcpu(vcpu)->shadow_srr1 & 0x58000000; |
725 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); | 840 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); |
726 | kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL); | 841 | kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL); |
727 | r = RESUME_GUEST; | 842 | r = RESUME_GUEST; |
728 | } | 843 | } |
729 | break; | 844 | break; |
730 | case BOOK3S_INTERRUPT_DATA_STORAGE: | 845 | case BOOK3S_INTERRUPT_DATA_STORAGE: |
846 | { | ||
847 | ulong dar = kvmppc_get_fault_dar(vcpu); | ||
731 | vcpu->stat.pf_storage++; | 848 | vcpu->stat.pf_storage++; |
849 | |||
850 | #ifdef CONFIG_PPC_BOOK3S_32 | ||
851 | /* We set segments as unused segments when invalidating them. So | ||
852 | * treat the respective fault as segment fault. */ | ||
853 | if ((to_svcpu(vcpu)->sr[dar >> SID_SHIFT]) == SR_INVALID) { | ||
854 | kvmppc_mmu_map_segment(vcpu, dar); | ||
855 | r = RESUME_GUEST; | ||
856 | break; | ||
857 | } | ||
858 | #endif | ||
859 | |||
732 | /* The only case we need to handle is missing shadow PTEs */ | 860 | /* The only case we need to handle is missing shadow PTEs */ |
733 | if (vcpu->arch.fault_dsisr & DSISR_NOHPTE) { | 861 | if (to_svcpu(vcpu)->fault_dsisr & DSISR_NOHPTE) { |
734 | r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.fault_dear, exit_nr); | 862 | r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr); |
735 | } else { | 863 | } else { |
736 | vcpu->arch.dear = vcpu->arch.fault_dear; | 864 | vcpu->arch.dear = dar; |
737 | to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr; | 865 | to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr; |
738 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); | 866 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); |
739 | kvmppc_mmu_pte_flush(vcpu, vcpu->arch.dear, ~0xFFFULL); | 867 | kvmppc_mmu_pte_flush(vcpu, vcpu->arch.dear, ~0xFFFUL); |
740 | r = RESUME_GUEST; | 868 | r = RESUME_GUEST; |
741 | } | 869 | } |
742 | break; | 870 | break; |
871 | } | ||
743 | case BOOK3S_INTERRUPT_DATA_SEGMENT: | 872 | case BOOK3S_INTERRUPT_DATA_SEGMENT: |
744 | if (kvmppc_mmu_map_segment(vcpu, vcpu->arch.fault_dear) < 0) { | 873 | if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) { |
745 | vcpu->arch.dear = vcpu->arch.fault_dear; | 874 | vcpu->arch.dear = kvmppc_get_fault_dar(vcpu); |
746 | kvmppc_book3s_queue_irqprio(vcpu, | 875 | kvmppc_book3s_queue_irqprio(vcpu, |
747 | BOOK3S_INTERRUPT_DATA_SEGMENT); | 876 | BOOK3S_INTERRUPT_DATA_SEGMENT); |
748 | } | 877 | } |
749 | r = RESUME_GUEST; | 878 | r = RESUME_GUEST; |
750 | break; | 879 | break; |
751 | case BOOK3S_INTERRUPT_INST_SEGMENT: | 880 | case BOOK3S_INTERRUPT_INST_SEGMENT: |
752 | if (kvmppc_mmu_map_segment(vcpu, vcpu->arch.pc) < 0) { | 881 | if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)) < 0) { |
753 | kvmppc_book3s_queue_irqprio(vcpu, | 882 | kvmppc_book3s_queue_irqprio(vcpu, |
754 | BOOK3S_INTERRUPT_INST_SEGMENT); | 883 | BOOK3S_INTERRUPT_INST_SEGMENT); |
755 | } | 884 | } |
@@ -764,18 +893,22 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
764 | vcpu->stat.ext_intr_exits++; | 893 | vcpu->stat.ext_intr_exits++; |
765 | r = RESUME_GUEST; | 894 | r = RESUME_GUEST; |
766 | break; | 895 | break; |
896 | case BOOK3S_INTERRUPT_PERFMON: | ||
897 | r = RESUME_GUEST; | ||
898 | break; | ||
767 | case BOOK3S_INTERRUPT_PROGRAM: | 899 | case BOOK3S_INTERRUPT_PROGRAM: |
768 | { | 900 | { |
769 | enum emulation_result er; | 901 | enum emulation_result er; |
770 | ulong flags; | 902 | ulong flags; |
771 | 903 | ||
772 | flags = vcpu->arch.shadow_srr1 & 0x1f0000ull; | 904 | program_interrupt: |
905 | flags = to_svcpu(vcpu)->shadow_srr1 & 0x1f0000ull; | ||
773 | 906 | ||
774 | if (vcpu->arch.msr & MSR_PR) { | 907 | if (vcpu->arch.msr & MSR_PR) { |
775 | #ifdef EXIT_DEBUG | 908 | #ifdef EXIT_DEBUG |
776 | printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", vcpu->arch.pc, vcpu->arch.last_inst); | 909 | printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu)); |
777 | #endif | 910 | #endif |
778 | if ((vcpu->arch.last_inst & 0xff0007ff) != | 911 | if ((kvmppc_get_last_inst(vcpu) & 0xff0007ff) != |
779 | (INS_DCBZ & 0xfffffff7)) { | 912 | (INS_DCBZ & 0xfffffff7)) { |
780 | kvmppc_core_queue_program(vcpu, flags); | 913 | kvmppc_core_queue_program(vcpu, flags); |
781 | r = RESUME_GUEST; | 914 | r = RESUME_GUEST; |
@@ -789,33 +922,80 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
789 | case EMULATE_DONE: | 922 | case EMULATE_DONE: |
790 | r = RESUME_GUEST_NV; | 923 | r = RESUME_GUEST_NV; |
791 | break; | 924 | break; |
925 | case EMULATE_AGAIN: | ||
926 | r = RESUME_GUEST; | ||
927 | break; | ||
792 | case EMULATE_FAIL: | 928 | case EMULATE_FAIL: |
793 | printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", | 929 | printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", |
794 | __func__, vcpu->arch.pc, vcpu->arch.last_inst); | 930 | __func__, kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu)); |
795 | kvmppc_core_queue_program(vcpu, flags); | 931 | kvmppc_core_queue_program(vcpu, flags); |
796 | r = RESUME_GUEST; | 932 | r = RESUME_GUEST; |
797 | break; | 933 | break; |
934 | case EMULATE_DO_MMIO: | ||
935 | run->exit_reason = KVM_EXIT_MMIO; | ||
936 | r = RESUME_HOST_NV; | ||
937 | break; | ||
798 | default: | 938 | default: |
799 | BUG(); | 939 | BUG(); |
800 | } | 940 | } |
801 | break; | 941 | break; |
802 | } | 942 | } |
803 | case BOOK3S_INTERRUPT_SYSCALL: | 943 | case BOOK3S_INTERRUPT_SYSCALL: |
804 | #ifdef EXIT_DEBUG | 944 | // XXX make user settable |
805 | printk(KERN_INFO "Syscall Nr %d\n", (int)kvmppc_get_gpr(vcpu, 0)); | 945 | if (vcpu->arch.osi_enabled && |
806 | #endif | 946 | (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) && |
807 | vcpu->stat.syscall_exits++; | 947 | (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) { |
808 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); | 948 | u64 *gprs = run->osi.gprs; |
809 | r = RESUME_GUEST; | 949 | int i; |
950 | |||
951 | run->exit_reason = KVM_EXIT_OSI; | ||
952 | for (i = 0; i < 32; i++) | ||
953 | gprs[i] = kvmppc_get_gpr(vcpu, i); | ||
954 | vcpu->arch.osi_needed = 1; | ||
955 | r = RESUME_HOST_NV; | ||
956 | |||
957 | } else { | ||
958 | vcpu->stat.syscall_exits++; | ||
959 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); | ||
960 | r = RESUME_GUEST; | ||
961 | } | ||
810 | break; | 962 | break; |
811 | case BOOK3S_INTERRUPT_FP_UNAVAIL: | 963 | case BOOK3S_INTERRUPT_FP_UNAVAIL: |
812 | r = kvmppc_handle_ext(vcpu, exit_nr, MSR_FP); | ||
813 | break; | ||
814 | case BOOK3S_INTERRUPT_ALTIVEC: | 964 | case BOOK3S_INTERRUPT_ALTIVEC: |
815 | r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VEC); | ||
816 | break; | ||
817 | case BOOK3S_INTERRUPT_VSX: | 965 | case BOOK3S_INTERRUPT_VSX: |
818 | r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VSX); | 966 | { |
967 | int ext_msr = 0; | ||
968 | |||
969 | switch (exit_nr) { | ||
970 | case BOOK3S_INTERRUPT_FP_UNAVAIL: ext_msr = MSR_FP; break; | ||
971 | case BOOK3S_INTERRUPT_ALTIVEC: ext_msr = MSR_VEC; break; | ||
972 | case BOOK3S_INTERRUPT_VSX: ext_msr = MSR_VSX; break; | ||
973 | } | ||
974 | |||
975 | switch (kvmppc_check_ext(vcpu, exit_nr)) { | ||
976 | case EMULATE_DONE: | ||
977 | /* everything ok - let's enable the ext */ | ||
978 | r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr); | ||
979 | break; | ||
980 | case EMULATE_FAIL: | ||
981 | /* we need to emulate this instruction */ | ||
982 | goto program_interrupt; | ||
983 | break; | ||
984 | default: | ||
985 | /* nothing to worry about - go again */ | ||
986 | break; | ||
987 | } | ||
988 | break; | ||
989 | } | ||
990 | case BOOK3S_INTERRUPT_ALIGNMENT: | ||
991 | if (kvmppc_read_inst(vcpu) == EMULATE_DONE) { | ||
992 | to_book3s(vcpu)->dsisr = kvmppc_alignment_dsisr(vcpu, | ||
993 | kvmppc_get_last_inst(vcpu)); | ||
994 | vcpu->arch.dear = kvmppc_alignment_dar(vcpu, | ||
995 | kvmppc_get_last_inst(vcpu)); | ||
996 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); | ||
997 | } | ||
998 | r = RESUME_GUEST; | ||
819 | break; | 999 | break; |
820 | case BOOK3S_INTERRUPT_MACHINE_CHECK: | 1000 | case BOOK3S_INTERRUPT_MACHINE_CHECK: |
821 | case BOOK3S_INTERRUPT_TRACE: | 1001 | case BOOK3S_INTERRUPT_TRACE: |
@@ -825,7 +1005,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
825 | default: | 1005 | default: |
826 | /* Ugh - bork here! What did we get? */ | 1006 | /* Ugh - bork here! What did we get? */ |
827 | printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", | 1007 | printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", |
828 | exit_nr, vcpu->arch.pc, vcpu->arch.shadow_srr1); | 1008 | exit_nr, kvmppc_get_pc(vcpu), to_svcpu(vcpu)->shadow_srr1); |
829 | r = RESUME_HOST; | 1009 | r = RESUME_HOST; |
830 | BUG(); | 1010 | BUG(); |
831 | break; | 1011 | break; |
@@ -852,7 +1032,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
852 | } | 1032 | } |
853 | 1033 | ||
854 | #ifdef EXIT_DEBUG | 1034 | #ifdef EXIT_DEBUG |
855 | printk(KERN_EMERG "KVM exit: vcpu=0x%p pc=0x%lx r=0x%x\n", vcpu, vcpu->arch.pc, r); | 1035 | printk(KERN_EMERG "KVM exit: vcpu=0x%p pc=0x%lx r=0x%x\n", vcpu, kvmppc_get_pc(vcpu), r); |
856 | #endif | 1036 | #endif |
857 | 1037 | ||
858 | return r; | 1038 | return r; |
@@ -867,10 +1047,12 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
867 | { | 1047 | { |
868 | int i; | 1048 | int i; |
869 | 1049 | ||
870 | regs->pc = vcpu->arch.pc; | 1050 | vcpu_load(vcpu); |
1051 | |||
1052 | regs->pc = kvmppc_get_pc(vcpu); | ||
871 | regs->cr = kvmppc_get_cr(vcpu); | 1053 | regs->cr = kvmppc_get_cr(vcpu); |
872 | regs->ctr = vcpu->arch.ctr; | 1054 | regs->ctr = kvmppc_get_ctr(vcpu); |
873 | regs->lr = vcpu->arch.lr; | 1055 | regs->lr = kvmppc_get_lr(vcpu); |
874 | regs->xer = kvmppc_get_xer(vcpu); | 1056 | regs->xer = kvmppc_get_xer(vcpu); |
875 | regs->msr = vcpu->arch.msr; | 1057 | regs->msr = vcpu->arch.msr; |
876 | regs->srr0 = vcpu->arch.srr0; | 1058 | regs->srr0 = vcpu->arch.srr0; |
@@ -887,6 +1069,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
887 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) | 1069 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) |
888 | regs->gpr[i] = kvmppc_get_gpr(vcpu, i); | 1070 | regs->gpr[i] = kvmppc_get_gpr(vcpu, i); |
889 | 1071 | ||
1072 | vcpu_put(vcpu); | ||
1073 | |||
890 | return 0; | 1074 | return 0; |
891 | } | 1075 | } |
892 | 1076 | ||
@@ -894,10 +1078,12 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
894 | { | 1078 | { |
895 | int i; | 1079 | int i; |
896 | 1080 | ||
897 | vcpu->arch.pc = regs->pc; | 1081 | vcpu_load(vcpu); |
1082 | |||
1083 | kvmppc_set_pc(vcpu, regs->pc); | ||
898 | kvmppc_set_cr(vcpu, regs->cr); | 1084 | kvmppc_set_cr(vcpu, regs->cr); |
899 | vcpu->arch.ctr = regs->ctr; | 1085 | kvmppc_set_ctr(vcpu, regs->ctr); |
900 | vcpu->arch.lr = regs->lr; | 1086 | kvmppc_set_lr(vcpu, regs->lr); |
901 | kvmppc_set_xer(vcpu, regs->xer); | 1087 | kvmppc_set_xer(vcpu, regs->xer); |
902 | kvmppc_set_msr(vcpu, regs->msr); | 1088 | kvmppc_set_msr(vcpu, regs->msr); |
903 | vcpu->arch.srr0 = regs->srr0; | 1089 | vcpu->arch.srr0 = regs->srr0; |
@@ -913,6 +1099,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
913 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) | 1099 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) |
914 | kvmppc_set_gpr(vcpu, i, regs->gpr[i]); | 1100 | kvmppc_set_gpr(vcpu, i, regs->gpr[i]); |
915 | 1101 | ||
1102 | vcpu_put(vcpu); | ||
1103 | |||
916 | return 0; | 1104 | return 0; |
917 | } | 1105 | } |
918 | 1106 | ||
@@ -922,6 +1110,8 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
922 | struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); | 1110 | struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); |
923 | int i; | 1111 | int i; |
924 | 1112 | ||
1113 | vcpu_load(vcpu); | ||
1114 | |||
925 | sregs->pvr = vcpu->arch.pvr; | 1115 | sregs->pvr = vcpu->arch.pvr; |
926 | 1116 | ||
927 | sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1; | 1117 | sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1; |
@@ -940,6 +1130,9 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
940 | sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw; | 1130 | sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw; |
941 | } | 1131 | } |
942 | } | 1132 | } |
1133 | |||
1134 | vcpu_put(vcpu); | ||
1135 | |||
943 | return 0; | 1136 | return 0; |
944 | } | 1137 | } |
945 | 1138 | ||
@@ -949,6 +1142,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
949 | struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); | 1142 | struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); |
950 | int i; | 1143 | int i; |
951 | 1144 | ||
1145 | vcpu_load(vcpu); | ||
1146 | |||
952 | kvmppc_set_pvr(vcpu, sregs->pvr); | 1147 | kvmppc_set_pvr(vcpu, sregs->pvr); |
953 | 1148 | ||
954 | vcpu3s->sdr1 = sregs->u.s.sdr1; | 1149 | vcpu3s->sdr1 = sregs->u.s.sdr1; |
@@ -975,6 +1170,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
975 | 1170 | ||
976 | /* Flush the MMU after messing with the segments */ | 1171 | /* Flush the MMU after messing with the segments */ |
977 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | 1172 | kvmppc_mmu_pte_flush(vcpu, 0, 0); |
1173 | |||
1174 | vcpu_put(vcpu); | ||
1175 | |||
978 | return 0; | 1176 | return 0; |
979 | } | 1177 | } |
980 | 1178 | ||
@@ -1042,24 +1240,33 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) | |||
1042 | { | 1240 | { |
1043 | struct kvmppc_vcpu_book3s *vcpu_book3s; | 1241 | struct kvmppc_vcpu_book3s *vcpu_book3s; |
1044 | struct kvm_vcpu *vcpu; | 1242 | struct kvm_vcpu *vcpu; |
1045 | int err; | 1243 | int err = -ENOMEM; |
1046 | 1244 | ||
1047 | vcpu_book3s = (struct kvmppc_vcpu_book3s *)__get_free_pages( GFP_KERNEL | __GFP_ZERO, | 1245 | vcpu_book3s = vmalloc(sizeof(struct kvmppc_vcpu_book3s)); |
1048 | get_order(sizeof(struct kvmppc_vcpu_book3s))); | 1246 | if (!vcpu_book3s) |
1049 | if (!vcpu_book3s) { | ||
1050 | err = -ENOMEM; | ||
1051 | goto out; | 1247 | goto out; |
1052 | } | 1248 | |
1249 | memset(vcpu_book3s, 0, sizeof(struct kvmppc_vcpu_book3s)); | ||
1250 | |||
1251 | vcpu_book3s->shadow_vcpu = (struct kvmppc_book3s_shadow_vcpu *) | ||
1252 | kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL); | ||
1253 | if (!vcpu_book3s->shadow_vcpu) | ||
1254 | goto free_vcpu; | ||
1053 | 1255 | ||
1054 | vcpu = &vcpu_book3s->vcpu; | 1256 | vcpu = &vcpu_book3s->vcpu; |
1055 | err = kvm_vcpu_init(vcpu, kvm, id); | 1257 | err = kvm_vcpu_init(vcpu, kvm, id); |
1056 | if (err) | 1258 | if (err) |
1057 | goto free_vcpu; | 1259 | goto free_shadow_vcpu; |
1058 | 1260 | ||
1059 | vcpu->arch.host_retip = kvm_return_point; | 1261 | vcpu->arch.host_retip = kvm_return_point; |
1060 | vcpu->arch.host_msr = mfmsr(); | 1262 | vcpu->arch.host_msr = mfmsr(); |
1263 | #ifdef CONFIG_PPC_BOOK3S_64 | ||
1061 | /* default to book3s_64 (970fx) */ | 1264 | /* default to book3s_64 (970fx) */ |
1062 | vcpu->arch.pvr = 0x3C0301; | 1265 | vcpu->arch.pvr = 0x3C0301; |
1266 | #else | ||
1267 | /* default to book3s_32 (750) */ | ||
1268 | vcpu->arch.pvr = 0x84202; | ||
1269 | #endif | ||
1063 | kvmppc_set_pvr(vcpu, vcpu->arch.pvr); | 1270 | kvmppc_set_pvr(vcpu, vcpu->arch.pvr); |
1064 | vcpu_book3s->slb_nr = 64; | 1271 | vcpu_book3s->slb_nr = 64; |
1065 | 1272 | ||
@@ -1067,23 +1274,24 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) | |||
1067 | vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem; | 1274 | vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem; |
1068 | vcpu->arch.trampoline_enter = kvmppc_trampoline_enter; | 1275 | vcpu->arch.trampoline_enter = kvmppc_trampoline_enter; |
1069 | vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem; | 1276 | vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem; |
1277 | #ifdef CONFIG_PPC_BOOK3S_64 | ||
1070 | vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall; | 1278 | vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall; |
1279 | #else | ||
1280 | vcpu->arch.rmcall = (ulong)kvmppc_rmcall; | ||
1281 | #endif | ||
1071 | 1282 | ||
1072 | vcpu->arch.shadow_msr = MSR_USER64; | 1283 | vcpu->arch.shadow_msr = MSR_USER64; |
1073 | 1284 | ||
1074 | err = __init_new_context(); | 1285 | err = kvmppc_mmu_init(vcpu); |
1075 | if (err < 0) | 1286 | if (err < 0) |
1076 | goto free_vcpu; | 1287 | goto free_shadow_vcpu; |
1077 | vcpu_book3s->context_id = err; | ||
1078 | |||
1079 | vcpu_book3s->vsid_max = ((vcpu_book3s->context_id + 1) << USER_ESID_BITS) - 1; | ||
1080 | vcpu_book3s->vsid_first = vcpu_book3s->context_id << USER_ESID_BITS; | ||
1081 | vcpu_book3s->vsid_next = vcpu_book3s->vsid_first; | ||
1082 | 1288 | ||
1083 | return vcpu; | 1289 | return vcpu; |
1084 | 1290 | ||
1291 | free_shadow_vcpu: | ||
1292 | kfree(vcpu_book3s->shadow_vcpu); | ||
1085 | free_vcpu: | 1293 | free_vcpu: |
1086 | free_pages((long)vcpu_book3s, get_order(sizeof(struct kvmppc_vcpu_book3s))); | 1294 | vfree(vcpu_book3s); |
1087 | out: | 1295 | out: |
1088 | return ERR_PTR(err); | 1296 | return ERR_PTR(err); |
1089 | } | 1297 | } |
@@ -1092,9 +1300,9 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) | |||
1092 | { | 1300 | { |
1093 | struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); | 1301 | struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); |
1094 | 1302 | ||
1095 | __destroy_context(vcpu_book3s->context_id); | ||
1096 | kvm_vcpu_uninit(vcpu); | 1303 | kvm_vcpu_uninit(vcpu); |
1097 | free_pages((long)vcpu_book3s, get_order(sizeof(struct kvmppc_vcpu_book3s))); | 1304 | kfree(vcpu_book3s->shadow_vcpu); |
1305 | vfree(vcpu_book3s); | ||
1098 | } | 1306 | } |
1099 | 1307 | ||
1100 | extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); | 1308 | extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); |
@@ -1102,8 +1310,12 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
1102 | { | 1310 | { |
1103 | int ret; | 1311 | int ret; |
1104 | struct thread_struct ext_bkp; | 1312 | struct thread_struct ext_bkp; |
1313 | #ifdef CONFIG_ALTIVEC | ||
1105 | bool save_vec = current->thread.used_vr; | 1314 | bool save_vec = current->thread.used_vr; |
1315 | #endif | ||
1316 | #ifdef CONFIG_VSX | ||
1106 | bool save_vsx = current->thread.used_vsr; | 1317 | bool save_vsx = current->thread.used_vsr; |
1318 | #endif | ||
1107 | ulong ext_msr; | 1319 | ulong ext_msr; |
1108 | 1320 | ||
1109 | /* No need to go into the guest when all we do is going out */ | 1321 | /* No need to go into the guest when all we do is going out */ |
@@ -1144,6 +1356,10 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
1144 | /* XXX we get called with irq disabled - change that! */ | 1356 | /* XXX we get called with irq disabled - change that! */ |
1145 | local_irq_enable(); | 1357 | local_irq_enable(); |
1146 | 1358 | ||
1359 | /* Preload FPU if it's enabled */ | ||
1360 | if (vcpu->arch.msr & MSR_FP) | ||
1361 | kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); | ||
1362 | |||
1147 | ret = __kvmppc_vcpu_entry(kvm_run, vcpu); | 1363 | ret = __kvmppc_vcpu_entry(kvm_run, vcpu); |
1148 | 1364 | ||
1149 | local_irq_disable(); | 1365 | local_irq_disable(); |
@@ -1179,7 +1395,8 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
1179 | 1395 | ||
1180 | static int kvmppc_book3s_init(void) | 1396 | static int kvmppc_book3s_init(void) |
1181 | { | 1397 | { |
1182 | return kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), THIS_MODULE); | 1398 | return kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0, |
1399 | THIS_MODULE); | ||
1183 | } | 1400 | } |
1184 | 1401 | ||
1185 | static void kvmppc_book3s_exit(void) | 1402 | static void kvmppc_book3s_exit(void) |
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index faf99f20d993..0b10503c8a4a 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c | |||
@@ -37,7 +37,7 @@ | |||
37 | #define dprintk(X...) do { } while(0) | 37 | #define dprintk(X...) do { } while(0) |
38 | #endif | 38 | #endif |
39 | 39 | ||
40 | #ifdef DEBUG_PTE | 40 | #ifdef DEBUG_MMU_PTE |
41 | #define dprintk_pte(X...) printk(KERN_INFO X) | 41 | #define dprintk_pte(X...) printk(KERN_INFO X) |
42 | #else | 42 | #else |
43 | #define dprintk_pte(X...) do { } while(0) | 43 | #define dprintk_pte(X...) do { } while(0) |
@@ -45,6 +45,9 @@ | |||
45 | 45 | ||
46 | #define PTEG_FLAG_ACCESSED 0x00000100 | 46 | #define PTEG_FLAG_ACCESSED 0x00000100 |
47 | #define PTEG_FLAG_DIRTY 0x00000080 | 47 | #define PTEG_FLAG_DIRTY 0x00000080 |
48 | #ifndef SID_SHIFT | ||
49 | #define SID_SHIFT 28 | ||
50 | #endif | ||
48 | 51 | ||
49 | static inline bool check_debug_ip(struct kvm_vcpu *vcpu) | 52 | static inline bool check_debug_ip(struct kvm_vcpu *vcpu) |
50 | { | 53 | { |
@@ -57,6 +60,8 @@ static inline bool check_debug_ip(struct kvm_vcpu *vcpu) | |||
57 | 60 | ||
58 | static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, | 61 | static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, |
59 | struct kvmppc_pte *pte, bool data); | 62 | struct kvmppc_pte *pte, bool data); |
63 | static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, | ||
64 | u64 *vsid); | ||
60 | 65 | ||
61 | static struct kvmppc_sr *find_sr(struct kvmppc_vcpu_book3s *vcpu_book3s, gva_t eaddr) | 66 | static struct kvmppc_sr *find_sr(struct kvmppc_vcpu_book3s *vcpu_book3s, gva_t eaddr) |
62 | { | 67 | { |
@@ -66,13 +71,14 @@ static struct kvmppc_sr *find_sr(struct kvmppc_vcpu_book3s *vcpu_book3s, gva_t e | |||
66 | static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr, | 71 | static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr, |
67 | bool data) | 72 | bool data) |
68 | { | 73 | { |
69 | struct kvmppc_sr *sre = find_sr(to_book3s(vcpu), eaddr); | 74 | u64 vsid; |
70 | struct kvmppc_pte pte; | 75 | struct kvmppc_pte pte; |
71 | 76 | ||
72 | if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, &pte, data)) | 77 | if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, &pte, data)) |
73 | return pte.vpage; | 78 | return pte.vpage; |
74 | 79 | ||
75 | return (((u64)eaddr >> 12) & 0xffff) | (((u64)sre->vsid) << 16); | 80 | kvmppc_mmu_book3s_32_esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid); |
81 | return (((u64)eaddr >> 12) & 0xffff) | (vsid << 16); | ||
76 | } | 82 | } |
77 | 83 | ||
78 | static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu) | 84 | static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu) |
@@ -142,8 +148,13 @@ static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, | |||
142 | bat->bepi_mask); | 148 | bat->bepi_mask); |
143 | } | 149 | } |
144 | if ((eaddr & bat->bepi_mask) == bat->bepi) { | 150 | if ((eaddr & bat->bepi_mask) == bat->bepi) { |
151 | u64 vsid; | ||
152 | kvmppc_mmu_book3s_32_esid_to_vsid(vcpu, | ||
153 | eaddr >> SID_SHIFT, &vsid); | ||
154 | vsid <<= 16; | ||
155 | pte->vpage = (((u64)eaddr >> 12) & 0xffff) | vsid; | ||
156 | |||
145 | pte->raddr = bat->brpn | (eaddr & ~bat->bepi_mask); | 157 | pte->raddr = bat->brpn | (eaddr & ~bat->bepi_mask); |
146 | pte->vpage = (eaddr >> 12) | VSID_BAT; | ||
147 | pte->may_read = bat->pp; | 158 | pte->may_read = bat->pp; |
148 | pte->may_write = bat->pp > 1; | 159 | pte->may_write = bat->pp > 1; |
149 | pte->may_execute = true; | 160 | pte->may_execute = true; |
@@ -172,7 +183,7 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr, | |||
172 | struct kvmppc_sr *sre; | 183 | struct kvmppc_sr *sre; |
173 | hva_t ptegp; | 184 | hva_t ptegp; |
174 | u32 pteg[16]; | 185 | u32 pteg[16]; |
175 | u64 ptem = 0; | 186 | u32 ptem = 0; |
176 | int i; | 187 | int i; |
177 | int found = 0; | 188 | int found = 0; |
178 | 189 | ||
@@ -302,6 +313,7 @@ static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum, | |||
302 | /* And then put in the new SR */ | 313 | /* And then put in the new SR */ |
303 | sre->raw = value; | 314 | sre->raw = value; |
304 | sre->vsid = (value & 0x0fffffff); | 315 | sre->vsid = (value & 0x0fffffff); |
316 | sre->valid = (value & 0x80000000) ? false : true; | ||
305 | sre->Ks = (value & 0x40000000) ? true : false; | 317 | sre->Ks = (value & 0x40000000) ? true : false; |
306 | sre->Kp = (value & 0x20000000) ? true : false; | 318 | sre->Kp = (value & 0x20000000) ? true : false; |
307 | sre->nx = (value & 0x10000000) ? true : false; | 319 | sre->nx = (value & 0x10000000) ? true : false; |
@@ -312,36 +324,48 @@ static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum, | |||
312 | 324 | ||
313 | static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool large) | 325 | static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool large) |
314 | { | 326 | { |
315 | kvmppc_mmu_pte_flush(vcpu, ea, ~0xFFFULL); | 327 | kvmppc_mmu_pte_flush(vcpu, ea, 0x0FFFF000); |
316 | } | 328 | } |
317 | 329 | ||
318 | static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, u64 esid, | 330 | static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, |
319 | u64 *vsid) | 331 | u64 *vsid) |
320 | { | 332 | { |
333 | ulong ea = esid << SID_SHIFT; | ||
334 | struct kvmppc_sr *sr; | ||
335 | u64 gvsid = esid; | ||
336 | |||
337 | if (vcpu->arch.msr & (MSR_DR|MSR_IR)) { | ||
338 | sr = find_sr(to_book3s(vcpu), ea); | ||
339 | if (sr->valid) | ||
340 | gvsid = sr->vsid; | ||
341 | } | ||
342 | |||
321 | /* In case we only have one of MSR_IR or MSR_DR set, let's put | 343 | /* In case we only have one of MSR_IR or MSR_DR set, let's put |
322 | that in the real-mode context (and hope RM doesn't access | 344 | that in the real-mode context (and hope RM doesn't access |
323 | high memory) */ | 345 | high memory) */ |
324 | switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { | 346 | switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { |
325 | case 0: | 347 | case 0: |
326 | *vsid = (VSID_REAL >> 16) | esid; | 348 | *vsid = VSID_REAL | esid; |
327 | break; | 349 | break; |
328 | case MSR_IR: | 350 | case MSR_IR: |
329 | *vsid = (VSID_REAL_IR >> 16) | esid; | 351 | *vsid = VSID_REAL_IR | gvsid; |
330 | break; | 352 | break; |
331 | case MSR_DR: | 353 | case MSR_DR: |
332 | *vsid = (VSID_REAL_DR >> 16) | esid; | 354 | *vsid = VSID_REAL_DR | gvsid; |
333 | break; | 355 | break; |
334 | case MSR_DR|MSR_IR: | 356 | case MSR_DR|MSR_IR: |
335 | { | 357 | if (!sr->valid) |
336 | ulong ea; | 358 | return -1; |
337 | ea = esid << SID_SHIFT; | 359 | |
338 | *vsid = find_sr(to_book3s(vcpu), ea)->vsid; | 360 | *vsid = sr->vsid; |
339 | break; | 361 | break; |
340 | } | ||
341 | default: | 362 | default: |
342 | BUG(); | 363 | BUG(); |
343 | } | 364 | } |
344 | 365 | ||
366 | if (vcpu->arch.msr & MSR_PR) | ||
367 | *vsid |= VSID_PR; | ||
368 | |||
345 | return 0; | 369 | return 0; |
346 | } | 370 | } |
347 | 371 | ||
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c new file mode 100644 index 000000000000..0bb66005338f --- /dev/null +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c | |||
@@ -0,0 +1,483 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved. | ||
3 | * | ||
4 | * Authors: | ||
5 | * Alexander Graf <agraf@suse.de> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License, version 2, as | ||
9 | * published by the Free Software Foundation. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
19 | */ | ||
20 | |||
21 | #include <linux/kvm_host.h> | ||
22 | |||
23 | #include <asm/kvm_ppc.h> | ||
24 | #include <asm/kvm_book3s.h> | ||
25 | #include <asm/mmu-hash32.h> | ||
26 | #include <asm/machdep.h> | ||
27 | #include <asm/mmu_context.h> | ||
28 | #include <asm/hw_irq.h> | ||
29 | |||
30 | /* #define DEBUG_MMU */ | ||
31 | /* #define DEBUG_SR */ | ||
32 | |||
33 | #ifdef DEBUG_MMU | ||
34 | #define dprintk_mmu(a, ...) printk(KERN_INFO a, __VA_ARGS__) | ||
35 | #else | ||
36 | #define dprintk_mmu(a, ...) do { } while(0) | ||
37 | #endif | ||
38 | |||
39 | #ifdef DEBUG_SR | ||
40 | #define dprintk_sr(a, ...) printk(KERN_INFO a, __VA_ARGS__) | ||
41 | #else | ||
42 | #define dprintk_sr(a, ...) do { } while(0) | ||
43 | #endif | ||
44 | |||
45 | #if PAGE_SHIFT != 12 | ||
46 | #error Unknown page size | ||
47 | #endif | ||
48 | |||
49 | #ifdef CONFIG_SMP | ||
50 | #error XXX need to grab mmu_hash_lock | ||
51 | #endif | ||
52 | |||
53 | #ifdef CONFIG_PTE_64BIT | ||
54 | #error Only 32 bit pages are supported for now | ||
55 | #endif | ||
56 | |||
57 | static ulong htab; | ||
58 | static u32 htabmask; | ||
59 | |||
60 | static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) | ||
61 | { | ||
62 | volatile u32 *pteg; | ||
63 | |||
64 | dprintk_mmu("KVM: Flushing SPTE: 0x%llx (0x%llx) -> 0x%llx\n", | ||
65 | pte->pte.eaddr, pte->pte.vpage, pte->host_va); | ||
66 | |||
67 | pteg = (u32*)pte->slot; | ||
68 | |||
69 | pteg[0] = 0; | ||
70 | asm volatile ("sync"); | ||
71 | asm volatile ("tlbie %0" : : "r" (pte->pte.eaddr) : "memory"); | ||
72 | asm volatile ("sync"); | ||
73 | asm volatile ("tlbsync"); | ||
74 | |||
75 | pte->host_va = 0; | ||
76 | |||
77 | if (pte->pte.may_write) | ||
78 | kvm_release_pfn_dirty(pte->pfn); | ||
79 | else | ||
80 | kvm_release_pfn_clean(pte->pfn); | ||
81 | } | ||
82 | |||
83 | void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) | ||
84 | { | ||
85 | int i; | ||
86 | |||
87 | dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%x & 0x%x\n", | ||
88 | vcpu->arch.hpte_cache_offset, guest_ea, ea_mask); | ||
89 | BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); | ||
90 | |||
91 | guest_ea &= ea_mask; | ||
92 | for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { | ||
93 | struct hpte_cache *pte; | ||
94 | |||
95 | pte = &vcpu->arch.hpte_cache[i]; | ||
96 | if (!pte->host_va) | ||
97 | continue; | ||
98 | |||
99 | if ((pte->pte.eaddr & ea_mask) == guest_ea) { | ||
100 | invalidate_pte(vcpu, pte); | ||
101 | } | ||
102 | } | ||
103 | |||
104 | /* Doing a complete flush -> start from scratch */ | ||
105 | if (!ea_mask) | ||
106 | vcpu->arch.hpte_cache_offset = 0; | ||
107 | } | ||
108 | |||
109 | void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask) | ||
110 | { | ||
111 | int i; | ||
112 | |||
113 | dprintk_mmu("KVM: Flushing %d Shadow vPTEs: 0x%llx & 0x%llx\n", | ||
114 | vcpu->arch.hpte_cache_offset, guest_vp, vp_mask); | ||
115 | BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); | ||
116 | |||
117 | guest_vp &= vp_mask; | ||
118 | for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { | ||
119 | struct hpte_cache *pte; | ||
120 | |||
121 | pte = &vcpu->arch.hpte_cache[i]; | ||
122 | if (!pte->host_va) | ||
123 | continue; | ||
124 | |||
125 | if ((pte->pte.vpage & vp_mask) == guest_vp) { | ||
126 | invalidate_pte(vcpu, pte); | ||
127 | } | ||
128 | } | ||
129 | } | ||
130 | |||
131 | void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) | ||
132 | { | ||
133 | int i; | ||
134 | |||
135 | dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%llx & 0x%llx\n", | ||
136 | vcpu->arch.hpte_cache_offset, pa_start, pa_end); | ||
137 | BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); | ||
138 | |||
139 | for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { | ||
140 | struct hpte_cache *pte; | ||
141 | |||
142 | pte = &vcpu->arch.hpte_cache[i]; | ||
143 | if (!pte->host_va) | ||
144 | continue; | ||
145 | |||
146 | if ((pte->pte.raddr >= pa_start) && | ||
147 | (pte->pte.raddr < pa_end)) { | ||
148 | invalidate_pte(vcpu, pte); | ||
149 | } | ||
150 | } | ||
151 | } | ||
152 | |||
153 | struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool data) | ||
154 | { | ||
155 | int i; | ||
156 | u64 guest_vp; | ||
157 | |||
158 | guest_vp = vcpu->arch.mmu.ea_to_vp(vcpu, ea, false); | ||
159 | for (i=0; i<vcpu->arch.hpte_cache_offset; i++) { | ||
160 | struct hpte_cache *pte; | ||
161 | |||
162 | pte = &vcpu->arch.hpte_cache[i]; | ||
163 | if (!pte->host_va) | ||
164 | continue; | ||
165 | |||
166 | if (pte->pte.vpage == guest_vp) | ||
167 | return &pte->pte; | ||
168 | } | ||
169 | |||
170 | return NULL; | ||
171 | } | ||
172 | |||
173 | static int kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu) | ||
174 | { | ||
175 | if (vcpu->arch.hpte_cache_offset == HPTEG_CACHE_NUM) | ||
176 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | ||
177 | |||
178 | return vcpu->arch.hpte_cache_offset++; | ||
179 | } | ||
180 | |||
181 | /* We keep 512 gvsid->hvsid entries, mapping the guest ones to the array using | ||
182 | * a hash, so we don't waste cycles on looping */ | ||
183 | static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid) | ||
184 | { | ||
185 | return (u16)(((gvsid >> (SID_MAP_BITS * 7)) & SID_MAP_MASK) ^ | ||
186 | ((gvsid >> (SID_MAP_BITS * 6)) & SID_MAP_MASK) ^ | ||
187 | ((gvsid >> (SID_MAP_BITS * 5)) & SID_MAP_MASK) ^ | ||
188 | ((gvsid >> (SID_MAP_BITS * 4)) & SID_MAP_MASK) ^ | ||
189 | ((gvsid >> (SID_MAP_BITS * 3)) & SID_MAP_MASK) ^ | ||
190 | ((gvsid >> (SID_MAP_BITS * 2)) & SID_MAP_MASK) ^ | ||
191 | ((gvsid >> (SID_MAP_BITS * 1)) & SID_MAP_MASK) ^ | ||
192 | ((gvsid >> (SID_MAP_BITS * 0)) & SID_MAP_MASK)); | ||
193 | } | ||
194 | |||
195 | |||
196 | static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid) | ||
197 | { | ||
198 | struct kvmppc_sid_map *map; | ||
199 | u16 sid_map_mask; | ||
200 | |||
201 | if (vcpu->arch.msr & MSR_PR) | ||
202 | gvsid |= VSID_PR; | ||
203 | |||
204 | sid_map_mask = kvmppc_sid_hash(vcpu, gvsid); | ||
205 | map = &to_book3s(vcpu)->sid_map[sid_map_mask]; | ||
206 | if (map->guest_vsid == gvsid) { | ||
207 | dprintk_sr("SR: Searching 0x%llx -> 0x%llx\n", | ||
208 | gvsid, map->host_vsid); | ||
209 | return map; | ||
210 | } | ||
211 | |||
212 | map = &to_book3s(vcpu)->sid_map[SID_MAP_MASK - sid_map_mask]; | ||
213 | if (map->guest_vsid == gvsid) { | ||
214 | dprintk_sr("SR: Searching 0x%llx -> 0x%llx\n", | ||
215 | gvsid, map->host_vsid); | ||
216 | return map; | ||
217 | } | ||
218 | |||
219 | dprintk_sr("SR: Searching 0x%llx -> not found\n", gvsid); | ||
220 | return NULL; | ||
221 | } | ||
222 | |||
223 | static u32 *kvmppc_mmu_get_pteg(struct kvm_vcpu *vcpu, u32 vsid, u32 eaddr, | ||
224 | bool primary) | ||
225 | { | ||
226 | u32 page, hash; | ||
227 | ulong pteg = htab; | ||
228 | |||
229 | page = (eaddr & ~ESID_MASK) >> 12; | ||
230 | |||
231 | hash = ((vsid ^ page) << 6); | ||
232 | if (!primary) | ||
233 | hash = ~hash; | ||
234 | |||
235 | hash &= htabmask; | ||
236 | |||
237 | pteg |= hash; | ||
238 | |||
239 | dprintk_mmu("htab: %lx | hash: %x | htabmask: %x | pteg: %lx\n", | ||
240 | htab, hash, htabmask, pteg); | ||
241 | |||
242 | return (u32*)pteg; | ||
243 | } | ||
244 | |||
245 | extern char etext[]; | ||
246 | |||
247 | int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) | ||
248 | { | ||
249 | pfn_t hpaddr; | ||
250 | u64 va; | ||
251 | u64 vsid; | ||
252 | struct kvmppc_sid_map *map; | ||
253 | volatile u32 *pteg; | ||
254 | u32 eaddr = orig_pte->eaddr; | ||
255 | u32 pteg0, pteg1; | ||
256 | register int rr = 0; | ||
257 | bool primary = false; | ||
258 | bool evict = false; | ||
259 | int hpte_id; | ||
260 | struct hpte_cache *pte; | ||
261 | |||
262 | /* Get host physical address for gpa */ | ||
263 | hpaddr = gfn_to_pfn(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT); | ||
264 | if (kvm_is_error_hva(hpaddr)) { | ||
265 | printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", | ||
266 | orig_pte->eaddr); | ||
267 | return -EINVAL; | ||
268 | } | ||
269 | hpaddr <<= PAGE_SHIFT; | ||
270 | |||
271 | /* and write the mapping ea -> hpa into the pt */ | ||
272 | vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid); | ||
273 | map = find_sid_vsid(vcpu, vsid); | ||
274 | if (!map) { | ||
275 | kvmppc_mmu_map_segment(vcpu, eaddr); | ||
276 | map = find_sid_vsid(vcpu, vsid); | ||
277 | } | ||
278 | BUG_ON(!map); | ||
279 | |||
280 | vsid = map->host_vsid; | ||
281 | va = (vsid << SID_SHIFT) | (eaddr & ~ESID_MASK); | ||
282 | |||
283 | next_pteg: | ||
284 | if (rr == 16) { | ||
285 | primary = !primary; | ||
286 | evict = true; | ||
287 | rr = 0; | ||
288 | } | ||
289 | |||
290 | pteg = kvmppc_mmu_get_pteg(vcpu, vsid, eaddr, primary); | ||
291 | |||
292 | /* not evicting yet */ | ||
293 | if (!evict && (pteg[rr] & PTE_V)) { | ||
294 | rr += 2; | ||
295 | goto next_pteg; | ||
296 | } | ||
297 | |||
298 | dprintk_mmu("KVM: old PTEG: %p (%d)\n", pteg, rr); | ||
299 | dprintk_mmu("KVM: %08x - %08x\n", pteg[0], pteg[1]); | ||
300 | dprintk_mmu("KVM: %08x - %08x\n", pteg[2], pteg[3]); | ||
301 | dprintk_mmu("KVM: %08x - %08x\n", pteg[4], pteg[5]); | ||
302 | dprintk_mmu("KVM: %08x - %08x\n", pteg[6], pteg[7]); | ||
303 | dprintk_mmu("KVM: %08x - %08x\n", pteg[8], pteg[9]); | ||
304 | dprintk_mmu("KVM: %08x - %08x\n", pteg[10], pteg[11]); | ||
305 | dprintk_mmu("KVM: %08x - %08x\n", pteg[12], pteg[13]); | ||
306 | dprintk_mmu("KVM: %08x - %08x\n", pteg[14], pteg[15]); | ||
307 | |||
308 | pteg0 = ((eaddr & 0x0fffffff) >> 22) | (vsid << 7) | PTE_V | | ||
309 | (primary ? 0 : PTE_SEC); | ||
310 | pteg1 = hpaddr | PTE_M | PTE_R | PTE_C; | ||
311 | |||
312 | if (orig_pte->may_write) { | ||
313 | pteg1 |= PP_RWRW; | ||
314 | mark_page_dirty(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT); | ||
315 | } else { | ||
316 | pteg1 |= PP_RWRX; | ||
317 | } | ||
318 | |||
319 | local_irq_disable(); | ||
320 | |||
321 | if (pteg[rr]) { | ||
322 | pteg[rr] = 0; | ||
323 | asm volatile ("sync"); | ||
324 | } | ||
325 | pteg[rr + 1] = pteg1; | ||
326 | pteg[rr] = pteg0; | ||
327 | asm volatile ("sync"); | ||
328 | |||
329 | local_irq_enable(); | ||
330 | |||
331 | dprintk_mmu("KVM: new PTEG: %p\n", pteg); | ||
332 | dprintk_mmu("KVM: %08x - %08x\n", pteg[0], pteg[1]); | ||
333 | dprintk_mmu("KVM: %08x - %08x\n", pteg[2], pteg[3]); | ||
334 | dprintk_mmu("KVM: %08x - %08x\n", pteg[4], pteg[5]); | ||
335 | dprintk_mmu("KVM: %08x - %08x\n", pteg[6], pteg[7]); | ||
336 | dprintk_mmu("KVM: %08x - %08x\n", pteg[8], pteg[9]); | ||
337 | dprintk_mmu("KVM: %08x - %08x\n", pteg[10], pteg[11]); | ||
338 | dprintk_mmu("KVM: %08x - %08x\n", pteg[12], pteg[13]); | ||
339 | dprintk_mmu("KVM: %08x - %08x\n", pteg[14], pteg[15]); | ||
340 | |||
341 | |||
342 | /* Now tell our Shadow PTE code about the new page */ | ||
343 | |||
344 | hpte_id = kvmppc_mmu_hpte_cache_next(vcpu); | ||
345 | pte = &vcpu->arch.hpte_cache[hpte_id]; | ||
346 | |||
347 | dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%llx (0x%llx) -> %lx\n", | ||
348 | orig_pte->may_write ? 'w' : '-', | ||
349 | orig_pte->may_execute ? 'x' : '-', | ||
350 | orig_pte->eaddr, (ulong)pteg, va, | ||
351 | orig_pte->vpage, hpaddr); | ||
352 | |||
353 | pte->slot = (ulong)&pteg[rr]; | ||
354 | pte->host_va = va; | ||
355 | pte->pte = *orig_pte; | ||
356 | pte->pfn = hpaddr >> PAGE_SHIFT; | ||
357 | |||
358 | return 0; | ||
359 | } | ||
360 | |||
361 | static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) | ||
362 | { | ||
363 | struct kvmppc_sid_map *map; | ||
364 | struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); | ||
365 | u16 sid_map_mask; | ||
366 | static int backwards_map = 0; | ||
367 | |||
368 | if (vcpu->arch.msr & MSR_PR) | ||
369 | gvsid |= VSID_PR; | ||
370 | |||
371 | /* We might get collisions that trap in preceding order, so let's | ||
372 | map them differently */ | ||
373 | |||
374 | sid_map_mask = kvmppc_sid_hash(vcpu, gvsid); | ||
375 | if (backwards_map) | ||
376 | sid_map_mask = SID_MAP_MASK - sid_map_mask; | ||
377 | |||
378 | map = &to_book3s(vcpu)->sid_map[sid_map_mask]; | ||
379 | |||
380 | /* Make sure we're taking the other map next time */ | ||
381 | backwards_map = !backwards_map; | ||
382 | |||
383 | /* Uh-oh ... out of mappings. Let's flush! */ | ||
384 | if (vcpu_book3s->vsid_next >= vcpu_book3s->vsid_max) { | ||
385 | vcpu_book3s->vsid_next = vcpu_book3s->vsid_first; | ||
386 | memset(vcpu_book3s->sid_map, 0, | ||
387 | sizeof(struct kvmppc_sid_map) * SID_MAP_NUM); | ||
388 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | ||
389 | kvmppc_mmu_flush_segments(vcpu); | ||
390 | } | ||
391 | map->host_vsid = vcpu_book3s->vsid_next; | ||
392 | |||
393 | /* Would have to be 111 to be completely aligned with the rest of | ||
394 | Linux, but that is just way too little space! */ | ||
395 | vcpu_book3s->vsid_next+=1; | ||
396 | |||
397 | map->guest_vsid = gvsid; | ||
398 | map->valid = true; | ||
399 | |||
400 | return map; | ||
401 | } | ||
402 | |||
403 | int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr) | ||
404 | { | ||
405 | u32 esid = eaddr >> SID_SHIFT; | ||
406 | u64 gvsid; | ||
407 | u32 sr; | ||
408 | struct kvmppc_sid_map *map; | ||
409 | struct kvmppc_book3s_shadow_vcpu *svcpu = to_svcpu(vcpu); | ||
410 | |||
411 | if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) { | ||
412 | /* Invalidate an entry */ | ||
413 | svcpu->sr[esid] = SR_INVALID; | ||
414 | return -ENOENT; | ||
415 | } | ||
416 | |||
417 | map = find_sid_vsid(vcpu, gvsid); | ||
418 | if (!map) | ||
419 | map = create_sid_map(vcpu, gvsid); | ||
420 | |||
421 | map->guest_esid = esid; | ||
422 | sr = map->host_vsid | SR_KP; | ||
423 | svcpu->sr[esid] = sr; | ||
424 | |||
425 | dprintk_sr("MMU: mtsr %d, 0x%x\n", esid, sr); | ||
426 | |||
427 | return 0; | ||
428 | } | ||
429 | |||
430 | void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) | ||
431 | { | ||
432 | int i; | ||
433 | struct kvmppc_book3s_shadow_vcpu *svcpu = to_svcpu(vcpu); | ||
434 | |||
435 | dprintk_sr("MMU: flushing all segments (%d)\n", ARRAY_SIZE(svcpu->sr)); | ||
436 | for (i = 0; i < ARRAY_SIZE(svcpu->sr); i++) | ||
437 | svcpu->sr[i] = SR_INVALID; | ||
438 | } | ||
439 | |||
440 | void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) | ||
441 | { | ||
442 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | ||
443 | preempt_disable(); | ||
444 | __destroy_context(to_book3s(vcpu)->context_id); | ||
445 | preempt_enable(); | ||
446 | } | ||
447 | |||
448 | /* From mm/mmu_context_hash32.c */ | ||
449 | #define CTX_TO_VSID(ctx) (((ctx) * (897 * 16)) & 0xffffff) | ||
450 | |||
451 | int kvmppc_mmu_init(struct kvm_vcpu *vcpu) | ||
452 | { | ||
453 | struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); | ||
454 | int err; | ||
455 | ulong sdr1; | ||
456 | |||
457 | err = __init_new_context(); | ||
458 | if (err < 0) | ||
459 | return -1; | ||
460 | vcpu3s->context_id = err; | ||
461 | |||
462 | vcpu3s->vsid_max = CTX_TO_VSID(vcpu3s->context_id + 1) - 1; | ||
463 | vcpu3s->vsid_first = CTX_TO_VSID(vcpu3s->context_id); | ||
464 | |||
465 | #if 0 /* XXX still doesn't guarantee uniqueness */ | ||
466 | /* We could collide with the Linux vsid space because the vsid | ||
467 | * wraps around at 24 bits. We're safe if we do our own space | ||
468 | * though, so let's always set the highest bit. */ | ||
469 | |||
470 | vcpu3s->vsid_max |= 0x00800000; | ||
471 | vcpu3s->vsid_first |= 0x00800000; | ||
472 | #endif | ||
473 | BUG_ON(vcpu3s->vsid_max < vcpu3s->vsid_first); | ||
474 | |||
475 | vcpu3s->vsid_next = vcpu3s->vsid_first; | ||
476 | |||
477 | /* Remember where the HTAB is */ | ||
478 | asm ( "mfsdr1 %0" : "=r"(sdr1) ); | ||
479 | htabmask = ((sdr1 & 0x1FF) << 16) | 0xFFC0; | ||
480 | htab = (ulong)__va(sdr1 & 0xffff0000); | ||
481 | |||
482 | return 0; | ||
483 | } | ||
diff --git a/arch/powerpc/kvm/book3s_32_sr.S b/arch/powerpc/kvm/book3s_32_sr.S new file mode 100644 index 000000000000..3608471ad2d8 --- /dev/null +++ b/arch/powerpc/kvm/book3s_32_sr.S | |||
@@ -0,0 +1,143 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License, version 2, as | ||
4 | * published by the Free Software Foundation. | ||
5 | * | ||
6 | * This program is distributed in the hope that it will be useful, | ||
7 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
8 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
9 | * GNU General Public License for more details. | ||
10 | * | ||
11 | * You should have received a copy of the GNU General Public License | ||
12 | * along with this program; if not, write to the Free Software | ||
13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
14 | * | ||
15 | * Copyright SUSE Linux Products GmbH 2009 | ||
16 | * | ||
17 | * Authors: Alexander Graf <agraf@suse.de> | ||
18 | */ | ||
19 | |||
20 | /****************************************************************************** | ||
21 | * * | ||
22 | * Entry code * | ||
23 | * * | ||
24 | *****************************************************************************/ | ||
25 | |||
26 | .macro LOAD_GUEST_SEGMENTS | ||
27 | |||
28 | /* Required state: | ||
29 | * | ||
30 | * MSR = ~IR|DR | ||
31 | * R1 = host R1 | ||
32 | * R2 = host R2 | ||
33 | * R3 = shadow vcpu | ||
34 | * all other volatile GPRS = free | ||
35 | * SVCPU[CR] = guest CR | ||
36 | * SVCPU[XER] = guest XER | ||
37 | * SVCPU[CTR] = guest CTR | ||
38 | * SVCPU[LR] = guest LR | ||
39 | */ | ||
40 | |||
41 | #define XCHG_SR(n) lwz r9, (SVCPU_SR+(n*4))(r3); \ | ||
42 | mtsr n, r9 | ||
43 | |||
44 | XCHG_SR(0) | ||
45 | XCHG_SR(1) | ||
46 | XCHG_SR(2) | ||
47 | XCHG_SR(3) | ||
48 | XCHG_SR(4) | ||
49 | XCHG_SR(5) | ||
50 | XCHG_SR(6) | ||
51 | XCHG_SR(7) | ||
52 | XCHG_SR(8) | ||
53 | XCHG_SR(9) | ||
54 | XCHG_SR(10) | ||
55 | XCHG_SR(11) | ||
56 | XCHG_SR(12) | ||
57 | XCHG_SR(13) | ||
58 | XCHG_SR(14) | ||
59 | XCHG_SR(15) | ||
60 | |||
61 | /* Clear BATs. */ | ||
62 | |||
63 | #define KVM_KILL_BAT(n, reg) \ | ||
64 | mtspr SPRN_IBAT##n##U,reg; \ | ||
65 | mtspr SPRN_IBAT##n##L,reg; \ | ||
66 | mtspr SPRN_DBAT##n##U,reg; \ | ||
67 | mtspr SPRN_DBAT##n##L,reg; \ | ||
68 | |||
69 | li r9, 0 | ||
70 | KVM_KILL_BAT(0, r9) | ||
71 | KVM_KILL_BAT(1, r9) | ||
72 | KVM_KILL_BAT(2, r9) | ||
73 | KVM_KILL_BAT(3, r9) | ||
74 | |||
75 | .endm | ||
76 | |||
77 | /****************************************************************************** | ||
78 | * * | ||
79 | * Exit code * | ||
80 | * * | ||
81 | *****************************************************************************/ | ||
82 | |||
83 | .macro LOAD_HOST_SEGMENTS | ||
84 | |||
85 | /* Register usage at this point: | ||
86 | * | ||
87 | * R1 = host R1 | ||
88 | * R2 = host R2 | ||
89 | * R12 = exit handler id | ||
90 | * R13 = shadow vcpu - SHADOW_VCPU_OFF | ||
91 | * SVCPU.* = guest * | ||
92 | * SVCPU[CR] = guest CR | ||
93 | * SVCPU[XER] = guest XER | ||
94 | * SVCPU[CTR] = guest CTR | ||
95 | * SVCPU[LR] = guest LR | ||
96 | * | ||
97 | */ | ||
98 | |||
99 | /* Restore BATs */ | ||
100 | |||
101 | /* We only overwrite the upper part, so we only restoree | ||
102 | the upper part. */ | ||
103 | #define KVM_LOAD_BAT(n, reg, RA, RB) \ | ||
104 | lwz RA,(n*16)+0(reg); \ | ||
105 | lwz RB,(n*16)+4(reg); \ | ||
106 | mtspr SPRN_IBAT##n##U,RA; \ | ||
107 | mtspr SPRN_IBAT##n##L,RB; \ | ||
108 | lwz RA,(n*16)+8(reg); \ | ||
109 | lwz RB,(n*16)+12(reg); \ | ||
110 | mtspr SPRN_DBAT##n##U,RA; \ | ||
111 | mtspr SPRN_DBAT##n##L,RB; \ | ||
112 | |||
113 | lis r9, BATS@ha | ||
114 | addi r9, r9, BATS@l | ||
115 | tophys(r9, r9) | ||
116 | KVM_LOAD_BAT(0, r9, r10, r11) | ||
117 | KVM_LOAD_BAT(1, r9, r10, r11) | ||
118 | KVM_LOAD_BAT(2, r9, r10, r11) | ||
119 | KVM_LOAD_BAT(3, r9, r10, r11) | ||
120 | |||
121 | /* Restore Segment Registers */ | ||
122 | |||
123 | /* 0xc - 0xf */ | ||
124 | |||
125 | li r0, 4 | ||
126 | mtctr r0 | ||
127 | LOAD_REG_IMMEDIATE(r3, 0x20000000 | (0x111 * 0xc)) | ||
128 | lis r4, 0xc000 | ||
129 | 3: mtsrin r3, r4 | ||
130 | addi r3, r3, 0x111 /* increment VSID */ | ||
131 | addis r4, r4, 0x1000 /* address of next segment */ | ||
132 | bdnz 3b | ||
133 | |||
134 | /* 0x0 - 0xb */ | ||
135 | |||
136 | /* 'current->mm' needs to be in r4 */ | ||
137 | tophys(r4, r2) | ||
138 | lwz r4, MM(r4) | ||
139 | tophys(r4, r4) | ||
140 | /* This only clobbers r0, r3, r4 and r5 */ | ||
141 | bl switch_mmu_context | ||
142 | |||
143 | .endm | ||
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index 512dcff77554..4025ea26b3c1 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c | |||
@@ -232,7 +232,7 @@ do_second: | |||
232 | } | 232 | } |
233 | 233 | ||
234 | dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx " | 234 | dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx " |
235 | "-> 0x%llx\n", | 235 | "-> 0x%lx\n", |
236 | eaddr, avpn, gpte->vpage, gpte->raddr); | 236 | eaddr, avpn, gpte->vpage, gpte->raddr); |
237 | found = true; | 237 | found = true; |
238 | break; | 238 | break; |
@@ -383,7 +383,7 @@ static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu) | |||
383 | 383 | ||
384 | if (vcpu->arch.msr & MSR_IR) { | 384 | if (vcpu->arch.msr & MSR_IR) { |
385 | kvmppc_mmu_flush_segments(vcpu); | 385 | kvmppc_mmu_flush_segments(vcpu); |
386 | kvmppc_mmu_map_segment(vcpu, vcpu->arch.pc); | 386 | kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); |
387 | } | 387 | } |
388 | } | 388 | } |
389 | 389 | ||
@@ -439,37 +439,43 @@ static void kvmppc_mmu_book3s_64_tlbie(struct kvm_vcpu *vcpu, ulong va, | |||
439 | kvmppc_mmu_pte_vflush(vcpu, va >> 12, mask); | 439 | kvmppc_mmu_pte_vflush(vcpu, va >> 12, mask); |
440 | } | 440 | } |
441 | 441 | ||
442 | static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, u64 esid, | 442 | static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, |
443 | u64 *vsid) | 443 | u64 *vsid) |
444 | { | 444 | { |
445 | ulong ea = esid << SID_SHIFT; | ||
446 | struct kvmppc_slb *slb; | ||
447 | u64 gvsid = esid; | ||
448 | |||
449 | if (vcpu->arch.msr & (MSR_DR|MSR_IR)) { | ||
450 | slb = kvmppc_mmu_book3s_64_find_slbe(to_book3s(vcpu), ea); | ||
451 | if (slb) | ||
452 | gvsid = slb->vsid; | ||
453 | } | ||
454 | |||
445 | switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { | 455 | switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { |
446 | case 0: | 456 | case 0: |
447 | *vsid = (VSID_REAL >> 16) | esid; | 457 | *vsid = VSID_REAL | esid; |
448 | break; | 458 | break; |
449 | case MSR_IR: | 459 | case MSR_IR: |
450 | *vsid = (VSID_REAL_IR >> 16) | esid; | 460 | *vsid = VSID_REAL_IR | gvsid; |
451 | break; | 461 | break; |
452 | case MSR_DR: | 462 | case MSR_DR: |
453 | *vsid = (VSID_REAL_DR >> 16) | esid; | 463 | *vsid = VSID_REAL_DR | gvsid; |
454 | break; | 464 | break; |
455 | case MSR_DR|MSR_IR: | 465 | case MSR_DR|MSR_IR: |
456 | { | 466 | if (!slb) |
457 | ulong ea; | ||
458 | struct kvmppc_slb *slb; | ||
459 | ea = esid << SID_SHIFT; | ||
460 | slb = kvmppc_mmu_book3s_64_find_slbe(to_book3s(vcpu), ea); | ||
461 | if (slb) | ||
462 | *vsid = slb->vsid; | ||
463 | else | ||
464 | return -ENOENT; | 467 | return -ENOENT; |
465 | 468 | ||
469 | *vsid = gvsid; | ||
466 | break; | 470 | break; |
467 | } | ||
468 | default: | 471 | default: |
469 | BUG(); | 472 | BUG(); |
470 | break; | 473 | break; |
471 | } | 474 | } |
472 | 475 | ||
476 | if (vcpu->arch.msr & MSR_PR) | ||
477 | *vsid |= VSID_PR; | ||
478 | |||
473 | return 0; | 479 | return 0; |
474 | } | 480 | } |
475 | 481 | ||
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index f2899b297ffd..e4b5744977f6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c | |||
@@ -48,21 +48,25 @@ | |||
48 | 48 | ||
49 | static void invalidate_pte(struct hpte_cache *pte) | 49 | static void invalidate_pte(struct hpte_cache *pte) |
50 | { | 50 | { |
51 | dprintk_mmu("KVM: Flushing SPT %d: 0x%llx (0x%llx) -> 0x%llx\n", | 51 | dprintk_mmu("KVM: Flushing SPT: 0x%lx (0x%llx) -> 0x%llx\n", |
52 | i, pte->pte.eaddr, pte->pte.vpage, pte->host_va); | 52 | pte->pte.eaddr, pte->pte.vpage, pte->host_va); |
53 | 53 | ||
54 | ppc_md.hpte_invalidate(pte->slot, pte->host_va, | 54 | ppc_md.hpte_invalidate(pte->slot, pte->host_va, |
55 | MMU_PAGE_4K, MMU_SEGSIZE_256M, | 55 | MMU_PAGE_4K, MMU_SEGSIZE_256M, |
56 | false); | 56 | false); |
57 | pte->host_va = 0; | 57 | pte->host_va = 0; |
58 | kvm_release_pfn_dirty(pte->pfn); | 58 | |
59 | if (pte->pte.may_write) | ||
60 | kvm_release_pfn_dirty(pte->pfn); | ||
61 | else | ||
62 | kvm_release_pfn_clean(pte->pfn); | ||
59 | } | 63 | } |
60 | 64 | ||
61 | void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, u64 guest_ea, u64 ea_mask) | 65 | void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) |
62 | { | 66 | { |
63 | int i; | 67 | int i; |
64 | 68 | ||
65 | dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%llx & 0x%llx\n", | 69 | dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%lx & 0x%lx\n", |
66 | vcpu->arch.hpte_cache_offset, guest_ea, ea_mask); | 70 | vcpu->arch.hpte_cache_offset, guest_ea, ea_mask); |
67 | BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); | 71 | BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); |
68 | 72 | ||
@@ -106,12 +110,12 @@ void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask) | |||
106 | } | 110 | } |
107 | } | 111 | } |
108 | 112 | ||
109 | void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, u64 pa_start, u64 pa_end) | 113 | void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) |
110 | { | 114 | { |
111 | int i; | 115 | int i; |
112 | 116 | ||
113 | dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%llx & 0x%llx\n", | 117 | dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%lx & 0x%lx\n", |
114 | vcpu->arch.hpte_cache_offset, guest_pa, pa_mask); | 118 | vcpu->arch.hpte_cache_offset, pa_start, pa_end); |
115 | BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); | 119 | BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); |
116 | 120 | ||
117 | for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { | 121 | for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { |
@@ -182,7 +186,7 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid) | |||
182 | sid_map_mask = kvmppc_sid_hash(vcpu, gvsid); | 186 | sid_map_mask = kvmppc_sid_hash(vcpu, gvsid); |
183 | map = &to_book3s(vcpu)->sid_map[sid_map_mask]; | 187 | map = &to_book3s(vcpu)->sid_map[sid_map_mask]; |
184 | if (map->guest_vsid == gvsid) { | 188 | if (map->guest_vsid == gvsid) { |
185 | dprintk_slb("SLB: Searching 0x%llx -> 0x%llx\n", | 189 | dprintk_slb("SLB: Searching: 0x%llx -> 0x%llx\n", |
186 | gvsid, map->host_vsid); | 190 | gvsid, map->host_vsid); |
187 | return map; | 191 | return map; |
188 | } | 192 | } |
@@ -194,7 +198,8 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid) | |||
194 | return map; | 198 | return map; |
195 | } | 199 | } |
196 | 200 | ||
197 | dprintk_slb("SLB: Searching 0x%llx -> not found\n", gvsid); | 201 | dprintk_slb("SLB: Searching %d/%d: 0x%llx -> not found\n", |
202 | sid_map_mask, SID_MAP_MASK - sid_map_mask, gvsid); | ||
198 | return NULL; | 203 | return NULL; |
199 | } | 204 | } |
200 | 205 | ||
@@ -212,7 +217,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) | |||
212 | /* Get host physical address for gpa */ | 217 | /* Get host physical address for gpa */ |
213 | hpaddr = gfn_to_pfn(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT); | 218 | hpaddr = gfn_to_pfn(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT); |
214 | if (kvm_is_error_hva(hpaddr)) { | 219 | if (kvm_is_error_hva(hpaddr)) { |
215 | printk(KERN_INFO "Couldn't get guest page for gfn %llx!\n", orig_pte->eaddr); | 220 | printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr); |
216 | return -EINVAL; | 221 | return -EINVAL; |
217 | } | 222 | } |
218 | hpaddr <<= PAGE_SHIFT; | 223 | hpaddr <<= PAGE_SHIFT; |
@@ -227,10 +232,16 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) | |||
227 | vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid); | 232 | vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid); |
228 | map = find_sid_vsid(vcpu, vsid); | 233 | map = find_sid_vsid(vcpu, vsid); |
229 | if (!map) { | 234 | if (!map) { |
230 | kvmppc_mmu_map_segment(vcpu, orig_pte->eaddr); | 235 | ret = kvmppc_mmu_map_segment(vcpu, orig_pte->eaddr); |
236 | WARN_ON(ret < 0); | ||
231 | map = find_sid_vsid(vcpu, vsid); | 237 | map = find_sid_vsid(vcpu, vsid); |
232 | } | 238 | } |
233 | BUG_ON(!map); | 239 | if (!map) { |
240 | printk(KERN_ERR "KVM: Segment map for 0x%llx (0x%lx) failed\n", | ||
241 | vsid, orig_pte->eaddr); | ||
242 | WARN_ON(true); | ||
243 | return -EINVAL; | ||
244 | } | ||
234 | 245 | ||
235 | vsid = map->host_vsid; | 246 | vsid = map->host_vsid; |
236 | va = hpt_va(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M); | 247 | va = hpt_va(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M); |
@@ -257,26 +268,26 @@ map_again: | |||
257 | 268 | ||
258 | if (ret < 0) { | 269 | if (ret < 0) { |
259 | /* If we couldn't map a primary PTE, try a secondary */ | 270 | /* If we couldn't map a primary PTE, try a secondary */ |
260 | #ifdef USE_SECONDARY | ||
261 | hash = ~hash; | 271 | hash = ~hash; |
272 | vflags ^= HPTE_V_SECONDARY; | ||
262 | attempt++; | 273 | attempt++; |
263 | if (attempt % 2) | ||
264 | vflags = HPTE_V_SECONDARY; | ||
265 | else | ||
266 | vflags = 0; | ||
267 | #else | ||
268 | attempt = 2; | ||
269 | #endif | ||
270 | goto map_again; | 274 | goto map_again; |
271 | } else { | 275 | } else { |
272 | int hpte_id = kvmppc_mmu_hpte_cache_next(vcpu); | 276 | int hpte_id = kvmppc_mmu_hpte_cache_next(vcpu); |
273 | struct hpte_cache *pte = &vcpu->arch.hpte_cache[hpte_id]; | 277 | struct hpte_cache *pte = &vcpu->arch.hpte_cache[hpte_id]; |
274 | 278 | ||
275 | dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%lx (0x%llx) -> %lx\n", | 279 | dprintk_mmu("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx\n", |
276 | ((rflags & HPTE_R_PP) == 3) ? '-' : 'w', | 280 | ((rflags & HPTE_R_PP) == 3) ? '-' : 'w', |
277 | (rflags & HPTE_R_N) ? '-' : 'x', | 281 | (rflags & HPTE_R_N) ? '-' : 'x', |
278 | orig_pte->eaddr, hpteg, va, orig_pte->vpage, hpaddr); | 282 | orig_pte->eaddr, hpteg, va, orig_pte->vpage, hpaddr); |
279 | 283 | ||
284 | /* The ppc_md code may give us a secondary entry even though we | ||
285 | asked for a primary. Fix up. */ | ||
286 | if ((ret & _PTEIDX_SECONDARY) && !(vflags & HPTE_V_SECONDARY)) { | ||
287 | hash = ~hash; | ||
288 | hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); | ||
289 | } | ||
290 | |||
280 | pte->slot = hpteg + (ret & 7); | 291 | pte->slot = hpteg + (ret & 7); |
281 | pte->host_va = va; | 292 | pte->host_va = va; |
282 | pte->pte = *orig_pte; | 293 | pte->pte = *orig_pte; |
@@ -321,6 +332,9 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid) | |||
321 | map->guest_vsid = gvsid; | 332 | map->guest_vsid = gvsid; |
322 | map->valid = true; | 333 | map->valid = true; |
323 | 334 | ||
335 | dprintk_slb("SLB: New mapping at %d: 0x%llx -> 0x%llx\n", | ||
336 | sid_map_mask, gvsid, map->host_vsid); | ||
337 | |||
324 | return map; | 338 | return map; |
325 | } | 339 | } |
326 | 340 | ||
@@ -331,14 +345,14 @@ static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid) | |||
331 | int found_inval = -1; | 345 | int found_inval = -1; |
332 | int r; | 346 | int r; |
333 | 347 | ||
334 | if (!get_paca()->kvm_slb_max) | 348 | if (!to_svcpu(vcpu)->slb_max) |
335 | get_paca()->kvm_slb_max = 1; | 349 | to_svcpu(vcpu)->slb_max = 1; |
336 | 350 | ||
337 | /* Are we overwriting? */ | 351 | /* Are we overwriting? */ |
338 | for (i = 1; i < get_paca()->kvm_slb_max; i++) { | 352 | for (i = 1; i < to_svcpu(vcpu)->slb_max; i++) { |
339 | if (!(get_paca()->kvm_slb[i].esid & SLB_ESID_V)) | 353 | if (!(to_svcpu(vcpu)->slb[i].esid & SLB_ESID_V)) |
340 | found_inval = i; | 354 | found_inval = i; |
341 | else if ((get_paca()->kvm_slb[i].esid & ESID_MASK) == esid) | 355 | else if ((to_svcpu(vcpu)->slb[i].esid & ESID_MASK) == esid) |
342 | return i; | 356 | return i; |
343 | } | 357 | } |
344 | 358 | ||
@@ -352,11 +366,11 @@ static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid) | |||
352 | max_slb_size = mmu_slb_size; | 366 | max_slb_size = mmu_slb_size; |
353 | 367 | ||
354 | /* Overflowing -> purge */ | 368 | /* Overflowing -> purge */ |
355 | if ((get_paca()->kvm_slb_max) == max_slb_size) | 369 | if ((to_svcpu(vcpu)->slb_max) == max_slb_size) |
356 | kvmppc_mmu_flush_segments(vcpu); | 370 | kvmppc_mmu_flush_segments(vcpu); |
357 | 371 | ||
358 | r = get_paca()->kvm_slb_max; | 372 | r = to_svcpu(vcpu)->slb_max; |
359 | get_paca()->kvm_slb_max++; | 373 | to_svcpu(vcpu)->slb_max++; |
360 | 374 | ||
361 | return r; | 375 | return r; |
362 | } | 376 | } |
@@ -374,7 +388,7 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr) | |||
374 | 388 | ||
375 | if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) { | 389 | if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) { |
376 | /* Invalidate an entry */ | 390 | /* Invalidate an entry */ |
377 | get_paca()->kvm_slb[slb_index].esid = 0; | 391 | to_svcpu(vcpu)->slb[slb_index].esid = 0; |
378 | return -ENOENT; | 392 | return -ENOENT; |
379 | } | 393 | } |
380 | 394 | ||
@@ -388,8 +402,8 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr) | |||
388 | slb_vsid &= ~SLB_VSID_KP; | 402 | slb_vsid &= ~SLB_VSID_KP; |
389 | slb_esid |= slb_index; | 403 | slb_esid |= slb_index; |
390 | 404 | ||
391 | get_paca()->kvm_slb[slb_index].esid = slb_esid; | 405 | to_svcpu(vcpu)->slb[slb_index].esid = slb_esid; |
392 | get_paca()->kvm_slb[slb_index].vsid = slb_vsid; | 406 | to_svcpu(vcpu)->slb[slb_index].vsid = slb_vsid; |
393 | 407 | ||
394 | dprintk_slb("slbmte %#llx, %#llx\n", slb_vsid, slb_esid); | 408 | dprintk_slb("slbmte %#llx, %#llx\n", slb_vsid, slb_esid); |
395 | 409 | ||
@@ -398,11 +412,29 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr) | |||
398 | 412 | ||
399 | void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) | 413 | void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) |
400 | { | 414 | { |
401 | get_paca()->kvm_slb_max = 1; | 415 | to_svcpu(vcpu)->slb_max = 1; |
402 | get_paca()->kvm_slb[0].esid = 0; | 416 | to_svcpu(vcpu)->slb[0].esid = 0; |
403 | } | 417 | } |
404 | 418 | ||
405 | void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) | 419 | void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) |
406 | { | 420 | { |
407 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | 421 | kvmppc_mmu_pte_flush(vcpu, 0, 0); |
422 | __destroy_context(to_book3s(vcpu)->context_id); | ||
423 | } | ||
424 | |||
425 | int kvmppc_mmu_init(struct kvm_vcpu *vcpu) | ||
426 | { | ||
427 | struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); | ||
428 | int err; | ||
429 | |||
430 | err = __init_new_context(); | ||
431 | if (err < 0) | ||
432 | return -1; | ||
433 | vcpu3s->context_id = err; | ||
434 | |||
435 | vcpu3s->vsid_max = ((vcpu3s->context_id + 1) << USER_ESID_BITS) - 1; | ||
436 | vcpu3s->vsid_first = vcpu3s->context_id << USER_ESID_BITS; | ||
437 | vcpu3s->vsid_next = vcpu3s->vsid_first; | ||
438 | |||
439 | return 0; | ||
408 | } | 440 | } |
diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S index 35b762722187..04e7d3bbfe8b 100644 --- a/arch/powerpc/kvm/book3s_64_slb.S +++ b/arch/powerpc/kvm/book3s_64_slb.S | |||
@@ -44,8 +44,7 @@ slb_exit_skip_ ## num: | |||
44 | * * | 44 | * * |
45 | *****************************************************************************/ | 45 | *****************************************************************************/ |
46 | 46 | ||
47 | .global kvmppc_handler_trampoline_enter | 47 | .macro LOAD_GUEST_SEGMENTS |
48 | kvmppc_handler_trampoline_enter: | ||
49 | 48 | ||
50 | /* Required state: | 49 | /* Required state: |
51 | * | 50 | * |
@@ -53,20 +52,14 @@ kvmppc_handler_trampoline_enter: | |||
53 | * R13 = PACA | 52 | * R13 = PACA |
54 | * R1 = host R1 | 53 | * R1 = host R1 |
55 | * R2 = host R2 | 54 | * R2 = host R2 |
56 | * R9 = guest IP | 55 | * R3 = shadow vcpu |
57 | * R10 = guest MSR | 56 | * all other volatile GPRS = free |
58 | * all other GPRS = free | 57 | * SVCPU[CR] = guest CR |
59 | * PACA[KVM_CR] = guest CR | 58 | * SVCPU[XER] = guest XER |
60 | * PACA[KVM_XER] = guest XER | 59 | * SVCPU[CTR] = guest CTR |
60 | * SVCPU[LR] = guest LR | ||
61 | */ | 61 | */ |
62 | 62 | ||
63 | mtsrr0 r9 | ||
64 | mtsrr1 r10 | ||
65 | |||
66 | /* Activate guest mode, so faults get handled by KVM */ | ||
67 | li r11, KVM_GUEST_MODE_GUEST | ||
68 | stb r11, PACA_KVM_IN_GUEST(r13) | ||
69 | |||
70 | /* Remove LPAR shadow entries */ | 63 | /* Remove LPAR shadow entries */ |
71 | 64 | ||
72 | #if SLB_NUM_BOLTED == 3 | 65 | #if SLB_NUM_BOLTED == 3 |
@@ -101,14 +94,14 @@ kvmppc_handler_trampoline_enter: | |||
101 | 94 | ||
102 | /* Fill SLB with our shadow */ | 95 | /* Fill SLB with our shadow */ |
103 | 96 | ||
104 | lbz r12, PACA_KVM_SLB_MAX(r13) | 97 | lbz r12, SVCPU_SLB_MAX(r3) |
105 | mulli r12, r12, 16 | 98 | mulli r12, r12, 16 |
106 | addi r12, r12, PACA_KVM_SLB | 99 | addi r12, r12, SVCPU_SLB |
107 | add r12, r12, r13 | 100 | add r12, r12, r3 |
108 | 101 | ||
109 | /* for (r11 = kvm_slb; r11 < kvm_slb + kvm_slb_size; r11+=slb_entry) */ | 102 | /* for (r11 = kvm_slb; r11 < kvm_slb + kvm_slb_size; r11+=slb_entry) */ |
110 | li r11, PACA_KVM_SLB | 103 | li r11, SVCPU_SLB |
111 | add r11, r11, r13 | 104 | add r11, r11, r3 |
112 | 105 | ||
113 | slb_loop_enter: | 106 | slb_loop_enter: |
114 | 107 | ||
@@ -127,34 +120,7 @@ slb_loop_enter_skip: | |||
127 | 120 | ||
128 | slb_do_enter: | 121 | slb_do_enter: |
129 | 122 | ||
130 | /* Enter guest */ | 123 | .endm |
131 | |||
132 | ld r0, (PACA_KVM_R0)(r13) | ||
133 | ld r1, (PACA_KVM_R1)(r13) | ||
134 | ld r2, (PACA_KVM_R2)(r13) | ||
135 | ld r3, (PACA_KVM_R3)(r13) | ||
136 | ld r4, (PACA_KVM_R4)(r13) | ||
137 | ld r5, (PACA_KVM_R5)(r13) | ||
138 | ld r6, (PACA_KVM_R6)(r13) | ||
139 | ld r7, (PACA_KVM_R7)(r13) | ||
140 | ld r8, (PACA_KVM_R8)(r13) | ||
141 | ld r9, (PACA_KVM_R9)(r13) | ||
142 | ld r10, (PACA_KVM_R10)(r13) | ||
143 | ld r12, (PACA_KVM_R12)(r13) | ||
144 | |||
145 | lwz r11, (PACA_KVM_CR)(r13) | ||
146 | mtcr r11 | ||
147 | |||
148 | ld r11, (PACA_KVM_XER)(r13) | ||
149 | mtxer r11 | ||
150 | |||
151 | ld r11, (PACA_KVM_R11)(r13) | ||
152 | ld r13, (PACA_KVM_R13)(r13) | ||
153 | |||
154 | RFI | ||
155 | kvmppc_handler_trampoline_enter_end: | ||
156 | |||
157 | |||
158 | 124 | ||
159 | /****************************************************************************** | 125 | /****************************************************************************** |
160 | * * | 126 | * * |
@@ -162,99 +128,22 @@ kvmppc_handler_trampoline_enter_end: | |||
162 | * * | 128 | * * |
163 | *****************************************************************************/ | 129 | *****************************************************************************/ |
164 | 130 | ||
165 | .global kvmppc_handler_trampoline_exit | 131 | .macro LOAD_HOST_SEGMENTS |
166 | kvmppc_handler_trampoline_exit: | ||
167 | 132 | ||
168 | /* Register usage at this point: | 133 | /* Register usage at this point: |
169 | * | 134 | * |
170 | * SPRG_SCRATCH0 = guest R13 | 135 | * R1 = host R1 |
171 | * R12 = exit handler id | 136 | * R2 = host R2 |
172 | * R13 = PACA | 137 | * R12 = exit handler id |
173 | * PACA.KVM.SCRATCH0 = guest R12 | 138 | * R13 = shadow vcpu - SHADOW_VCPU_OFF [=PACA on PPC64] |
174 | * PACA.KVM.SCRATCH1 = guest CR | 139 | * SVCPU.* = guest * |
140 | * SVCPU[CR] = guest CR | ||
141 | * SVCPU[XER] = guest XER | ||
142 | * SVCPU[CTR] = guest CTR | ||
143 | * SVCPU[LR] = guest LR | ||
175 | * | 144 | * |
176 | */ | 145 | */ |
177 | 146 | ||
178 | /* Save registers */ | ||
179 | |||
180 | std r0, PACA_KVM_R0(r13) | ||
181 | std r1, PACA_KVM_R1(r13) | ||
182 | std r2, PACA_KVM_R2(r13) | ||
183 | std r3, PACA_KVM_R3(r13) | ||
184 | std r4, PACA_KVM_R4(r13) | ||
185 | std r5, PACA_KVM_R5(r13) | ||
186 | std r6, PACA_KVM_R6(r13) | ||
187 | std r7, PACA_KVM_R7(r13) | ||
188 | std r8, PACA_KVM_R8(r13) | ||
189 | std r9, PACA_KVM_R9(r13) | ||
190 | std r10, PACA_KVM_R10(r13) | ||
191 | std r11, PACA_KVM_R11(r13) | ||
192 | |||
193 | /* Restore R1/R2 so we can handle faults */ | ||
194 | ld r1, PACA_KVM_HOST_R1(r13) | ||
195 | ld r2, PACA_KVM_HOST_R2(r13) | ||
196 | |||
197 | /* Save guest PC and MSR in GPRs */ | ||
198 | mfsrr0 r3 | ||
199 | mfsrr1 r4 | ||
200 | |||
201 | /* Get scratch'ed off registers */ | ||
202 | mfspr r9, SPRN_SPRG_SCRATCH0 | ||
203 | std r9, PACA_KVM_R13(r13) | ||
204 | |||
205 | ld r8, PACA_KVM_SCRATCH0(r13) | ||
206 | std r8, PACA_KVM_R12(r13) | ||
207 | |||
208 | lwz r7, PACA_KVM_SCRATCH1(r13) | ||
209 | stw r7, PACA_KVM_CR(r13) | ||
210 | |||
211 | /* Save more register state */ | ||
212 | |||
213 | mfxer r6 | ||
214 | stw r6, PACA_KVM_XER(r13) | ||
215 | |||
216 | mfdar r5 | ||
217 | mfdsisr r6 | ||
218 | |||
219 | /* | ||
220 | * In order for us to easily get the last instruction, | ||
221 | * we got the #vmexit at, we exploit the fact that the | ||
222 | * virtual layout is still the same here, so we can just | ||
223 | * ld from the guest's PC address | ||
224 | */ | ||
225 | |||
226 | /* We only load the last instruction when it's safe */ | ||
227 | cmpwi r12, BOOK3S_INTERRUPT_DATA_STORAGE | ||
228 | beq ld_last_inst | ||
229 | cmpwi r12, BOOK3S_INTERRUPT_PROGRAM | ||
230 | beq ld_last_inst | ||
231 | |||
232 | b no_ld_last_inst | ||
233 | |||
234 | ld_last_inst: | ||
235 | /* Save off the guest instruction we're at */ | ||
236 | |||
237 | /* Set guest mode to 'jump over instruction' so if lwz faults | ||
238 | * we'll just continue at the next IP. */ | ||
239 | li r9, KVM_GUEST_MODE_SKIP | ||
240 | stb r9, PACA_KVM_IN_GUEST(r13) | ||
241 | |||
242 | /* 1) enable paging for data */ | ||
243 | mfmsr r9 | ||
244 | ori r11, r9, MSR_DR /* Enable paging for data */ | ||
245 | mtmsr r11 | ||
246 | /* 2) fetch the instruction */ | ||
247 | li r0, KVM_INST_FETCH_FAILED /* In case lwz faults */ | ||
248 | lwz r0, 0(r3) | ||
249 | /* 3) disable paging again */ | ||
250 | mtmsr r9 | ||
251 | |||
252 | no_ld_last_inst: | ||
253 | |||
254 | /* Unset guest mode */ | ||
255 | li r9, KVM_GUEST_MODE_NONE | ||
256 | stb r9, PACA_KVM_IN_GUEST(r13) | ||
257 | |||
258 | /* Restore bolted entries from the shadow and fix it along the way */ | 147 | /* Restore bolted entries from the shadow and fix it along the way */ |
259 | 148 | ||
260 | /* We don't store anything in entry 0, so we don't need to take care of it */ | 149 | /* We don't store anything in entry 0, so we don't need to take care of it */ |
@@ -275,28 +164,4 @@ no_ld_last_inst: | |||
275 | 164 | ||
276 | slb_do_exit: | 165 | slb_do_exit: |
277 | 166 | ||
278 | /* Register usage at this point: | 167 | .endm |
279 | * | ||
280 | * R0 = guest last inst | ||
281 | * R1 = host R1 | ||
282 | * R2 = host R2 | ||
283 | * R3 = guest PC | ||
284 | * R4 = guest MSR | ||
285 | * R5 = guest DAR | ||
286 | * R6 = guest DSISR | ||
287 | * R12 = exit handler id | ||
288 | * R13 = PACA | ||
289 | * PACA.KVM.* = guest * | ||
290 | * | ||
291 | */ | ||
292 | |||
293 | /* RFI into the highmem handler */ | ||
294 | mfmsr r7 | ||
295 | ori r7, r7, MSR_IR|MSR_DR|MSR_RI /* Enable paging */ | ||
296 | mtsrr1 r7 | ||
297 | ld r8, PACA_KVM_VMHANDLER(r13) /* Highmem handler address */ | ||
298 | mtsrr0 r8 | ||
299 | |||
300 | RFI | ||
301 | kvmppc_handler_trampoline_exit_end: | ||
302 | |||
diff --git a/arch/powerpc/kvm/book3s_64_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 2b0ee7e040c9..c85f906038ce 100644 --- a/arch/powerpc/kvm/book3s_64_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c | |||
@@ -28,13 +28,16 @@ | |||
28 | #define OP_31_XOP_MFMSR 83 | 28 | #define OP_31_XOP_MFMSR 83 |
29 | #define OP_31_XOP_MTMSR 146 | 29 | #define OP_31_XOP_MTMSR 146 |
30 | #define OP_31_XOP_MTMSRD 178 | 30 | #define OP_31_XOP_MTMSRD 178 |
31 | #define OP_31_XOP_MTSR 210 | ||
31 | #define OP_31_XOP_MTSRIN 242 | 32 | #define OP_31_XOP_MTSRIN 242 |
32 | #define OP_31_XOP_TLBIEL 274 | 33 | #define OP_31_XOP_TLBIEL 274 |
33 | #define OP_31_XOP_TLBIE 306 | 34 | #define OP_31_XOP_TLBIE 306 |
34 | #define OP_31_XOP_SLBMTE 402 | 35 | #define OP_31_XOP_SLBMTE 402 |
35 | #define OP_31_XOP_SLBIE 434 | 36 | #define OP_31_XOP_SLBIE 434 |
36 | #define OP_31_XOP_SLBIA 498 | 37 | #define OP_31_XOP_SLBIA 498 |
38 | #define OP_31_XOP_MFSR 595 | ||
37 | #define OP_31_XOP_MFSRIN 659 | 39 | #define OP_31_XOP_MFSRIN 659 |
40 | #define OP_31_XOP_DCBA 758 | ||
38 | #define OP_31_XOP_SLBMFEV 851 | 41 | #define OP_31_XOP_SLBMFEV 851 |
39 | #define OP_31_XOP_EIOIO 854 | 42 | #define OP_31_XOP_EIOIO 854 |
40 | #define OP_31_XOP_SLBMFEE 915 | 43 | #define OP_31_XOP_SLBMFEE 915 |
@@ -42,6 +45,24 @@ | |||
42 | /* DCBZ is actually 1014, but we patch it to 1010 so we get a trap */ | 45 | /* DCBZ is actually 1014, but we patch it to 1010 so we get a trap */ |
43 | #define OP_31_XOP_DCBZ 1010 | 46 | #define OP_31_XOP_DCBZ 1010 |
44 | 47 | ||
48 | #define OP_LFS 48 | ||
49 | #define OP_LFD 50 | ||
50 | #define OP_STFS 52 | ||
51 | #define OP_STFD 54 | ||
52 | |||
53 | #define SPRN_GQR0 912 | ||
54 | #define SPRN_GQR1 913 | ||
55 | #define SPRN_GQR2 914 | ||
56 | #define SPRN_GQR3 915 | ||
57 | #define SPRN_GQR4 916 | ||
58 | #define SPRN_GQR5 917 | ||
59 | #define SPRN_GQR6 918 | ||
60 | #define SPRN_GQR7 919 | ||
61 | |||
62 | /* Book3S_32 defines mfsrin(v) - but that messes up our abstract | ||
63 | * function pointers, so let's just disable the define. */ | ||
64 | #undef mfsrin | ||
65 | |||
45 | int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | 66 | int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, |
46 | unsigned int inst, int *advance) | 67 | unsigned int inst, int *advance) |
47 | { | 68 | { |
@@ -52,7 +73,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
52 | switch (get_xop(inst)) { | 73 | switch (get_xop(inst)) { |
53 | case OP_19_XOP_RFID: | 74 | case OP_19_XOP_RFID: |
54 | case OP_19_XOP_RFI: | 75 | case OP_19_XOP_RFI: |
55 | vcpu->arch.pc = vcpu->arch.srr0; | 76 | kvmppc_set_pc(vcpu, vcpu->arch.srr0); |
56 | kvmppc_set_msr(vcpu, vcpu->arch.srr1); | 77 | kvmppc_set_msr(vcpu, vcpu->arch.srr1); |
57 | *advance = 0; | 78 | *advance = 0; |
58 | break; | 79 | break; |
@@ -80,6 +101,18 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
80 | case OP_31_XOP_MTMSR: | 101 | case OP_31_XOP_MTMSR: |
81 | kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, get_rs(inst))); | 102 | kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, get_rs(inst))); |
82 | break; | 103 | break; |
104 | case OP_31_XOP_MFSR: | ||
105 | { | ||
106 | int srnum; | ||
107 | |||
108 | srnum = kvmppc_get_field(inst, 12 + 32, 15 + 32); | ||
109 | if (vcpu->arch.mmu.mfsrin) { | ||
110 | u32 sr; | ||
111 | sr = vcpu->arch.mmu.mfsrin(vcpu, srnum); | ||
112 | kvmppc_set_gpr(vcpu, get_rt(inst), sr); | ||
113 | } | ||
114 | break; | ||
115 | } | ||
83 | case OP_31_XOP_MFSRIN: | 116 | case OP_31_XOP_MFSRIN: |
84 | { | 117 | { |
85 | int srnum; | 118 | int srnum; |
@@ -92,6 +125,11 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
92 | } | 125 | } |
93 | break; | 126 | break; |
94 | } | 127 | } |
128 | case OP_31_XOP_MTSR: | ||
129 | vcpu->arch.mmu.mtsrin(vcpu, | ||
130 | (inst >> 16) & 0xf, | ||
131 | kvmppc_get_gpr(vcpu, get_rs(inst))); | ||
132 | break; | ||
95 | case OP_31_XOP_MTSRIN: | 133 | case OP_31_XOP_MTSRIN: |
96 | vcpu->arch.mmu.mtsrin(vcpu, | 134 | vcpu->arch.mmu.mtsrin(vcpu, |
97 | (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf, | 135 | (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf, |
@@ -150,12 +188,17 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
150 | kvmppc_set_gpr(vcpu, get_rt(inst), t); | 188 | kvmppc_set_gpr(vcpu, get_rt(inst), t); |
151 | } | 189 | } |
152 | break; | 190 | break; |
191 | case OP_31_XOP_DCBA: | ||
192 | /* Gets treated as NOP */ | ||
193 | break; | ||
153 | case OP_31_XOP_DCBZ: | 194 | case OP_31_XOP_DCBZ: |
154 | { | 195 | { |
155 | ulong rb = kvmppc_get_gpr(vcpu, get_rb(inst)); | 196 | ulong rb = kvmppc_get_gpr(vcpu, get_rb(inst)); |
156 | ulong ra = 0; | 197 | ulong ra = 0; |
157 | ulong addr; | 198 | ulong addr, vaddr; |
158 | u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; | 199 | u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; |
200 | u32 dsisr; | ||
201 | int r; | ||
159 | 202 | ||
160 | if (get_ra(inst)) | 203 | if (get_ra(inst)) |
161 | ra = kvmppc_get_gpr(vcpu, get_ra(inst)); | 204 | ra = kvmppc_get_gpr(vcpu, get_ra(inst)); |
@@ -163,15 +206,25 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
163 | addr = (ra + rb) & ~31ULL; | 206 | addr = (ra + rb) & ~31ULL; |
164 | if (!(vcpu->arch.msr & MSR_SF)) | 207 | if (!(vcpu->arch.msr & MSR_SF)) |
165 | addr &= 0xffffffff; | 208 | addr &= 0xffffffff; |
209 | vaddr = addr; | ||
210 | |||
211 | r = kvmppc_st(vcpu, &addr, 32, zeros, true); | ||
212 | if ((r == -ENOENT) || (r == -EPERM)) { | ||
213 | *advance = 0; | ||
214 | vcpu->arch.dear = vaddr; | ||
215 | to_svcpu(vcpu)->fault_dar = vaddr; | ||
216 | |||
217 | dsisr = DSISR_ISSTORE; | ||
218 | if (r == -ENOENT) | ||
219 | dsisr |= DSISR_NOHPTE; | ||
220 | else if (r == -EPERM) | ||
221 | dsisr |= DSISR_PROTFAULT; | ||
222 | |||
223 | to_book3s(vcpu)->dsisr = dsisr; | ||
224 | to_svcpu(vcpu)->fault_dsisr = dsisr; | ||
166 | 225 | ||
167 | if (kvmppc_st(vcpu, addr, 32, zeros)) { | ||
168 | vcpu->arch.dear = addr; | ||
169 | vcpu->arch.fault_dear = addr; | ||
170 | to_book3s(vcpu)->dsisr = DSISR_PROTFAULT | | ||
171 | DSISR_ISSTORE; | ||
172 | kvmppc_book3s_queue_irqprio(vcpu, | 226 | kvmppc_book3s_queue_irqprio(vcpu, |
173 | BOOK3S_INTERRUPT_DATA_STORAGE); | 227 | BOOK3S_INTERRUPT_DATA_STORAGE); |
174 | kvmppc_mmu_pte_flush(vcpu, addr, ~0xFFFULL); | ||
175 | } | 228 | } |
176 | 229 | ||
177 | break; | 230 | break; |
@@ -184,6 +237,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
184 | emulated = EMULATE_FAIL; | 237 | emulated = EMULATE_FAIL; |
185 | } | 238 | } |
186 | 239 | ||
240 | if (emulated == EMULATE_FAIL) | ||
241 | emulated = kvmppc_emulate_paired_single(run, vcpu); | ||
242 | |||
187 | return emulated; | 243 | return emulated; |
188 | } | 244 | } |
189 | 245 | ||
@@ -207,6 +263,34 @@ void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper, | |||
207 | } | 263 | } |
208 | } | 264 | } |
209 | 265 | ||
266 | static u32 kvmppc_read_bat(struct kvm_vcpu *vcpu, int sprn) | ||
267 | { | ||
268 | struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); | ||
269 | struct kvmppc_bat *bat; | ||
270 | |||
271 | switch (sprn) { | ||
272 | case SPRN_IBAT0U ... SPRN_IBAT3L: | ||
273 | bat = &vcpu_book3s->ibat[(sprn - SPRN_IBAT0U) / 2]; | ||
274 | break; | ||
275 | case SPRN_IBAT4U ... SPRN_IBAT7L: | ||
276 | bat = &vcpu_book3s->ibat[4 + ((sprn - SPRN_IBAT4U) / 2)]; | ||
277 | break; | ||
278 | case SPRN_DBAT0U ... SPRN_DBAT3L: | ||
279 | bat = &vcpu_book3s->dbat[(sprn - SPRN_DBAT0U) / 2]; | ||
280 | break; | ||
281 | case SPRN_DBAT4U ... SPRN_DBAT7L: | ||
282 | bat = &vcpu_book3s->dbat[4 + ((sprn - SPRN_DBAT4U) / 2)]; | ||
283 | break; | ||
284 | default: | ||
285 | BUG(); | ||
286 | } | ||
287 | |||
288 | if (sprn % 2) | ||
289 | return bat->raw >> 32; | ||
290 | else | ||
291 | return bat->raw; | ||
292 | } | ||
293 | |||
210 | static void kvmppc_write_bat(struct kvm_vcpu *vcpu, int sprn, u32 val) | 294 | static void kvmppc_write_bat(struct kvm_vcpu *vcpu, int sprn, u32 val) |
211 | { | 295 | { |
212 | struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); | 296 | struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); |
@@ -217,13 +301,13 @@ static void kvmppc_write_bat(struct kvm_vcpu *vcpu, int sprn, u32 val) | |||
217 | bat = &vcpu_book3s->ibat[(sprn - SPRN_IBAT0U) / 2]; | 301 | bat = &vcpu_book3s->ibat[(sprn - SPRN_IBAT0U) / 2]; |
218 | break; | 302 | break; |
219 | case SPRN_IBAT4U ... SPRN_IBAT7L: | 303 | case SPRN_IBAT4U ... SPRN_IBAT7L: |
220 | bat = &vcpu_book3s->ibat[(sprn - SPRN_IBAT4U) / 2]; | 304 | bat = &vcpu_book3s->ibat[4 + ((sprn - SPRN_IBAT4U) / 2)]; |
221 | break; | 305 | break; |
222 | case SPRN_DBAT0U ... SPRN_DBAT3L: | 306 | case SPRN_DBAT0U ... SPRN_DBAT3L: |
223 | bat = &vcpu_book3s->dbat[(sprn - SPRN_DBAT0U) / 2]; | 307 | bat = &vcpu_book3s->dbat[(sprn - SPRN_DBAT0U) / 2]; |
224 | break; | 308 | break; |
225 | case SPRN_DBAT4U ... SPRN_DBAT7L: | 309 | case SPRN_DBAT4U ... SPRN_DBAT7L: |
226 | bat = &vcpu_book3s->dbat[(sprn - SPRN_DBAT4U) / 2]; | 310 | bat = &vcpu_book3s->dbat[4 + ((sprn - SPRN_DBAT4U) / 2)]; |
227 | break; | 311 | break; |
228 | default: | 312 | default: |
229 | BUG(); | 313 | BUG(); |
@@ -258,6 +342,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) | |||
258 | /* BAT writes happen so rarely that we're ok to flush | 342 | /* BAT writes happen so rarely that we're ok to flush |
259 | * everything here */ | 343 | * everything here */ |
260 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | 344 | kvmppc_mmu_pte_flush(vcpu, 0, 0); |
345 | kvmppc_mmu_flush_segments(vcpu); | ||
261 | break; | 346 | break; |
262 | case SPRN_HID0: | 347 | case SPRN_HID0: |
263 | to_book3s(vcpu)->hid[0] = spr_val; | 348 | to_book3s(vcpu)->hid[0] = spr_val; |
@@ -268,7 +353,32 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) | |||
268 | case SPRN_HID2: | 353 | case SPRN_HID2: |
269 | to_book3s(vcpu)->hid[2] = spr_val; | 354 | to_book3s(vcpu)->hid[2] = spr_val; |
270 | break; | 355 | break; |
356 | case SPRN_HID2_GEKKO: | ||
357 | to_book3s(vcpu)->hid[2] = spr_val; | ||
358 | /* HID2.PSE controls paired single on gekko */ | ||
359 | switch (vcpu->arch.pvr) { | ||
360 | case 0x00080200: /* lonestar 2.0 */ | ||
361 | case 0x00088202: /* lonestar 2.2 */ | ||
362 | case 0x70000100: /* gekko 1.0 */ | ||
363 | case 0x00080100: /* gekko 2.0 */ | ||
364 | case 0x00083203: /* gekko 2.3a */ | ||
365 | case 0x00083213: /* gekko 2.3b */ | ||
366 | case 0x00083204: /* gekko 2.4 */ | ||
367 | case 0x00083214: /* gekko 2.4e (8SE) - retail HW2 */ | ||
368 | case 0x00087200: /* broadway */ | ||
369 | if (vcpu->arch.hflags & BOOK3S_HFLAG_NATIVE_PS) { | ||
370 | /* Native paired singles */ | ||
371 | } else if (spr_val & (1 << 29)) { /* HID2.PSE */ | ||
372 | vcpu->arch.hflags |= BOOK3S_HFLAG_PAIRED_SINGLE; | ||
373 | kvmppc_giveup_ext(vcpu, MSR_FP); | ||
374 | } else { | ||
375 | vcpu->arch.hflags &= ~BOOK3S_HFLAG_PAIRED_SINGLE; | ||
376 | } | ||
377 | break; | ||
378 | } | ||
379 | break; | ||
271 | case SPRN_HID4: | 380 | case SPRN_HID4: |
381 | case SPRN_HID4_GEKKO: | ||
272 | to_book3s(vcpu)->hid[4] = spr_val; | 382 | to_book3s(vcpu)->hid[4] = spr_val; |
273 | break; | 383 | break; |
274 | case SPRN_HID5: | 384 | case SPRN_HID5: |
@@ -278,12 +388,30 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) | |||
278 | (mfmsr() & MSR_HV)) | 388 | (mfmsr() & MSR_HV)) |
279 | vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; | 389 | vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; |
280 | break; | 390 | break; |
391 | case SPRN_GQR0: | ||
392 | case SPRN_GQR1: | ||
393 | case SPRN_GQR2: | ||
394 | case SPRN_GQR3: | ||
395 | case SPRN_GQR4: | ||
396 | case SPRN_GQR5: | ||
397 | case SPRN_GQR6: | ||
398 | case SPRN_GQR7: | ||
399 | to_book3s(vcpu)->gqr[sprn - SPRN_GQR0] = spr_val; | ||
400 | break; | ||
281 | case SPRN_ICTC: | 401 | case SPRN_ICTC: |
282 | case SPRN_THRM1: | 402 | case SPRN_THRM1: |
283 | case SPRN_THRM2: | 403 | case SPRN_THRM2: |
284 | case SPRN_THRM3: | 404 | case SPRN_THRM3: |
285 | case SPRN_CTRLF: | 405 | case SPRN_CTRLF: |
286 | case SPRN_CTRLT: | 406 | case SPRN_CTRLT: |
407 | case SPRN_L2CR: | ||
408 | case SPRN_MMCR0_GEKKO: | ||
409 | case SPRN_MMCR1_GEKKO: | ||
410 | case SPRN_PMC1_GEKKO: | ||
411 | case SPRN_PMC2_GEKKO: | ||
412 | case SPRN_PMC3_GEKKO: | ||
413 | case SPRN_PMC4_GEKKO: | ||
414 | case SPRN_WPAR_GEKKO: | ||
287 | break; | 415 | break; |
288 | default: | 416 | default: |
289 | printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn); | 417 | printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn); |
@@ -301,6 +429,12 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) | |||
301 | int emulated = EMULATE_DONE; | 429 | int emulated = EMULATE_DONE; |
302 | 430 | ||
303 | switch (sprn) { | 431 | switch (sprn) { |
432 | case SPRN_IBAT0U ... SPRN_IBAT3L: | ||
433 | case SPRN_IBAT4U ... SPRN_IBAT7L: | ||
434 | case SPRN_DBAT0U ... SPRN_DBAT3L: | ||
435 | case SPRN_DBAT4U ... SPRN_DBAT7L: | ||
436 | kvmppc_set_gpr(vcpu, rt, kvmppc_read_bat(vcpu, sprn)); | ||
437 | break; | ||
304 | case SPRN_SDR1: | 438 | case SPRN_SDR1: |
305 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1); | 439 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1); |
306 | break; | 440 | break; |
@@ -320,19 +454,40 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) | |||
320 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[1]); | 454 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[1]); |
321 | break; | 455 | break; |
322 | case SPRN_HID2: | 456 | case SPRN_HID2: |
457 | case SPRN_HID2_GEKKO: | ||
323 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[2]); | 458 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[2]); |
324 | break; | 459 | break; |
325 | case SPRN_HID4: | 460 | case SPRN_HID4: |
461 | case SPRN_HID4_GEKKO: | ||
326 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[4]); | 462 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[4]); |
327 | break; | 463 | break; |
328 | case SPRN_HID5: | 464 | case SPRN_HID5: |
329 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]); | 465 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]); |
330 | break; | 466 | break; |
467 | case SPRN_GQR0: | ||
468 | case SPRN_GQR1: | ||
469 | case SPRN_GQR2: | ||
470 | case SPRN_GQR3: | ||
471 | case SPRN_GQR4: | ||
472 | case SPRN_GQR5: | ||
473 | case SPRN_GQR6: | ||
474 | case SPRN_GQR7: | ||
475 | kvmppc_set_gpr(vcpu, rt, | ||
476 | to_book3s(vcpu)->gqr[sprn - SPRN_GQR0]); | ||
477 | break; | ||
331 | case SPRN_THRM1: | 478 | case SPRN_THRM1: |
332 | case SPRN_THRM2: | 479 | case SPRN_THRM2: |
333 | case SPRN_THRM3: | 480 | case SPRN_THRM3: |
334 | case SPRN_CTRLF: | 481 | case SPRN_CTRLF: |
335 | case SPRN_CTRLT: | 482 | case SPRN_CTRLT: |
483 | case SPRN_L2CR: | ||
484 | case SPRN_MMCR0_GEKKO: | ||
485 | case SPRN_MMCR1_GEKKO: | ||
486 | case SPRN_PMC1_GEKKO: | ||
487 | case SPRN_PMC2_GEKKO: | ||
488 | case SPRN_PMC3_GEKKO: | ||
489 | case SPRN_PMC4_GEKKO: | ||
490 | case SPRN_WPAR_GEKKO: | ||
336 | kvmppc_set_gpr(vcpu, rt, 0); | 491 | kvmppc_set_gpr(vcpu, rt, 0); |
337 | break; | 492 | break; |
338 | default: | 493 | default: |
@@ -346,3 +501,73 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) | |||
346 | return emulated; | 501 | return emulated; |
347 | } | 502 | } |
348 | 503 | ||
504 | u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst) | ||
505 | { | ||
506 | u32 dsisr = 0; | ||
507 | |||
508 | /* | ||
509 | * This is what the spec says about DSISR bits (not mentioned = 0): | ||
510 | * | ||
511 | * 12:13 [DS] Set to bits 30:31 | ||
512 | * 15:16 [X] Set to bits 29:30 | ||
513 | * 17 [X] Set to bit 25 | ||
514 | * [D/DS] Set to bit 5 | ||
515 | * 18:21 [X] Set to bits 21:24 | ||
516 | * [D/DS] Set to bits 1:4 | ||
517 | * 22:26 Set to bits 6:10 (RT/RS/FRT/FRS) | ||
518 | * 27:31 Set to bits 11:15 (RA) | ||
519 | */ | ||
520 | |||
521 | switch (get_op(inst)) { | ||
522 | /* D-form */ | ||
523 | case OP_LFS: | ||
524 | case OP_LFD: | ||
525 | case OP_STFD: | ||
526 | case OP_STFS: | ||
527 | dsisr |= (inst >> 12) & 0x4000; /* bit 17 */ | ||
528 | dsisr |= (inst >> 17) & 0x3c00; /* bits 18:21 */ | ||
529 | break; | ||
530 | /* X-form */ | ||
531 | case 31: | ||
532 | dsisr |= (inst << 14) & 0x18000; /* bits 15:16 */ | ||
533 | dsisr |= (inst << 8) & 0x04000; /* bit 17 */ | ||
534 | dsisr |= (inst << 3) & 0x03c00; /* bits 18:21 */ | ||
535 | break; | ||
536 | default: | ||
537 | printk(KERN_INFO "KVM: Unaligned instruction 0x%x\n", inst); | ||
538 | break; | ||
539 | } | ||
540 | |||
541 | dsisr |= (inst >> 16) & 0x03ff; /* bits 22:31 */ | ||
542 | |||
543 | return dsisr; | ||
544 | } | ||
545 | |||
546 | ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst) | ||
547 | { | ||
548 | ulong dar = 0; | ||
549 | ulong ra; | ||
550 | |||
551 | switch (get_op(inst)) { | ||
552 | case OP_LFS: | ||
553 | case OP_LFD: | ||
554 | case OP_STFD: | ||
555 | case OP_STFS: | ||
556 | ra = get_ra(inst); | ||
557 | if (ra) | ||
558 | dar = kvmppc_get_gpr(vcpu, ra); | ||
559 | dar += (s32)((s16)inst); | ||
560 | break; | ||
561 | case 31: | ||
562 | ra = get_ra(inst); | ||
563 | if (ra) | ||
564 | dar = kvmppc_get_gpr(vcpu, ra); | ||
565 | dar += kvmppc_get_gpr(vcpu, get_rb(inst)); | ||
566 | break; | ||
567 | default: | ||
568 | printk(KERN_INFO "KVM: Unaligned instruction 0x%x\n", inst); | ||
569 | break; | ||
570 | } | ||
571 | |||
572 | return dar; | ||
573 | } | ||
diff --git a/arch/powerpc/kvm/book3s_64_exports.c b/arch/powerpc/kvm/book3s_exports.c index 1dd5a1ddfd0d..1dd5a1ddfd0d 100644 --- a/arch/powerpc/kvm/book3s_64_exports.c +++ b/arch/powerpc/kvm/book3s_exports.c | |||
diff --git a/arch/powerpc/kvm/book3s_64_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index c1584d0cbce8..2f0bc928b08a 100644 --- a/arch/powerpc/kvm/book3s_64_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S | |||
@@ -24,36 +24,56 @@ | |||
24 | #include <asm/asm-offsets.h> | 24 | #include <asm/asm-offsets.h> |
25 | #include <asm/exception-64s.h> | 25 | #include <asm/exception-64s.h> |
26 | 26 | ||
27 | #define KVMPPC_HANDLE_EXIT .kvmppc_handle_exit | 27 | #if defined(CONFIG_PPC_BOOK3S_64) |
28 | #define ULONG_SIZE 8 | ||
29 | #define VCPU_GPR(n) (VCPU_GPRS + (n * ULONG_SIZE)) | ||
30 | 28 | ||
31 | .macro DISABLE_INTERRUPTS | 29 | #define ULONG_SIZE 8 |
32 | mfmsr r0 | 30 | #define FUNC(name) GLUE(.,name) |
33 | rldicl r0,r0,48,1 | ||
34 | rotldi r0,r0,16 | ||
35 | mtmsrd r0,1 | ||
36 | .endm | ||
37 | 31 | ||
32 | #define GET_SHADOW_VCPU(reg) \ | ||
33 | addi reg, r13, PACA_KVM_SVCPU | ||
34 | |||
35 | #define DISABLE_INTERRUPTS \ | ||
36 | mfmsr r0; \ | ||
37 | rldicl r0,r0,48,1; \ | ||
38 | rotldi r0,r0,16; \ | ||
39 | mtmsrd r0,1; \ | ||
40 | |||
41 | #elif defined(CONFIG_PPC_BOOK3S_32) | ||
42 | |||
43 | #define ULONG_SIZE 4 | ||
44 | #define FUNC(name) name | ||
45 | |||
46 | #define GET_SHADOW_VCPU(reg) \ | ||
47 | lwz reg, (THREAD + THREAD_KVM_SVCPU)(r2) | ||
48 | |||
49 | #define DISABLE_INTERRUPTS \ | ||
50 | mfmsr r0; \ | ||
51 | rlwinm r0,r0,0,17,15; \ | ||
52 | mtmsr r0; \ | ||
53 | |||
54 | #endif /* CONFIG_PPC_BOOK3S_XX */ | ||
55 | |||
56 | |||
57 | #define VCPU_GPR(n) (VCPU_GPRS + (n * ULONG_SIZE)) | ||
38 | #define VCPU_LOAD_NVGPRS(vcpu) \ | 58 | #define VCPU_LOAD_NVGPRS(vcpu) \ |
39 | ld r14, VCPU_GPR(r14)(vcpu); \ | 59 | PPC_LL r14, VCPU_GPR(r14)(vcpu); \ |
40 | ld r15, VCPU_GPR(r15)(vcpu); \ | 60 | PPC_LL r15, VCPU_GPR(r15)(vcpu); \ |
41 | ld r16, VCPU_GPR(r16)(vcpu); \ | 61 | PPC_LL r16, VCPU_GPR(r16)(vcpu); \ |
42 | ld r17, VCPU_GPR(r17)(vcpu); \ | 62 | PPC_LL r17, VCPU_GPR(r17)(vcpu); \ |
43 | ld r18, VCPU_GPR(r18)(vcpu); \ | 63 | PPC_LL r18, VCPU_GPR(r18)(vcpu); \ |
44 | ld r19, VCPU_GPR(r19)(vcpu); \ | 64 | PPC_LL r19, VCPU_GPR(r19)(vcpu); \ |
45 | ld r20, VCPU_GPR(r20)(vcpu); \ | 65 | PPC_LL r20, VCPU_GPR(r20)(vcpu); \ |
46 | ld r21, VCPU_GPR(r21)(vcpu); \ | 66 | PPC_LL r21, VCPU_GPR(r21)(vcpu); \ |
47 | ld r22, VCPU_GPR(r22)(vcpu); \ | 67 | PPC_LL r22, VCPU_GPR(r22)(vcpu); \ |
48 | ld r23, VCPU_GPR(r23)(vcpu); \ | 68 | PPC_LL r23, VCPU_GPR(r23)(vcpu); \ |
49 | ld r24, VCPU_GPR(r24)(vcpu); \ | 69 | PPC_LL r24, VCPU_GPR(r24)(vcpu); \ |
50 | ld r25, VCPU_GPR(r25)(vcpu); \ | 70 | PPC_LL r25, VCPU_GPR(r25)(vcpu); \ |
51 | ld r26, VCPU_GPR(r26)(vcpu); \ | 71 | PPC_LL r26, VCPU_GPR(r26)(vcpu); \ |
52 | ld r27, VCPU_GPR(r27)(vcpu); \ | 72 | PPC_LL r27, VCPU_GPR(r27)(vcpu); \ |
53 | ld r28, VCPU_GPR(r28)(vcpu); \ | 73 | PPC_LL r28, VCPU_GPR(r28)(vcpu); \ |
54 | ld r29, VCPU_GPR(r29)(vcpu); \ | 74 | PPC_LL r29, VCPU_GPR(r29)(vcpu); \ |
55 | ld r30, VCPU_GPR(r30)(vcpu); \ | 75 | PPC_LL r30, VCPU_GPR(r30)(vcpu); \ |
56 | ld r31, VCPU_GPR(r31)(vcpu); \ | 76 | PPC_LL r31, VCPU_GPR(r31)(vcpu); \ |
57 | 77 | ||
58 | /***************************************************************************** | 78 | /***************************************************************************** |
59 | * * | 79 | * * |
@@ -69,11 +89,11 @@ _GLOBAL(__kvmppc_vcpu_entry) | |||
69 | 89 | ||
70 | kvm_start_entry: | 90 | kvm_start_entry: |
71 | /* Write correct stack frame */ | 91 | /* Write correct stack frame */ |
72 | mflr r0 | 92 | mflr r0 |
73 | std r0,16(r1) | 93 | PPC_STL r0,PPC_LR_STKOFF(r1) |
74 | 94 | ||
75 | /* Save host state to the stack */ | 95 | /* Save host state to the stack */ |
76 | stdu r1, -SWITCH_FRAME_SIZE(r1) | 96 | PPC_STLU r1, -SWITCH_FRAME_SIZE(r1) |
77 | 97 | ||
78 | /* Save r3 (kvm_run) and r4 (vcpu) */ | 98 | /* Save r3 (kvm_run) and r4 (vcpu) */ |
79 | SAVE_2GPRS(3, r1) | 99 | SAVE_2GPRS(3, r1) |
@@ -82,33 +102,28 @@ kvm_start_entry: | |||
82 | SAVE_NVGPRS(r1) | 102 | SAVE_NVGPRS(r1) |
83 | 103 | ||
84 | /* Save LR */ | 104 | /* Save LR */ |
85 | std r0, _LINK(r1) | 105 | PPC_STL r0, _LINK(r1) |
86 | 106 | ||
87 | /* Load non-volatile guest state from the vcpu */ | 107 | /* Load non-volatile guest state from the vcpu */ |
88 | VCPU_LOAD_NVGPRS(r4) | 108 | VCPU_LOAD_NVGPRS(r4) |
89 | 109 | ||
110 | GET_SHADOW_VCPU(r5) | ||
111 | |||
90 | /* Save R1/R2 in the PACA */ | 112 | /* Save R1/R2 in the PACA */ |
91 | std r1, PACA_KVM_HOST_R1(r13) | 113 | PPC_STL r1, SVCPU_HOST_R1(r5) |
92 | std r2, PACA_KVM_HOST_R2(r13) | 114 | PPC_STL r2, SVCPU_HOST_R2(r5) |
93 | 115 | ||
94 | /* XXX swap in/out on load? */ | 116 | /* XXX swap in/out on load? */ |
95 | ld r3, VCPU_HIGHMEM_HANDLER(r4) | 117 | PPC_LL r3, VCPU_HIGHMEM_HANDLER(r4) |
96 | std r3, PACA_KVM_VMHANDLER(r13) | 118 | PPC_STL r3, SVCPU_VMHANDLER(r5) |
97 | 119 | ||
98 | kvm_start_lightweight: | 120 | kvm_start_lightweight: |
99 | 121 | ||
100 | ld r9, VCPU_PC(r4) /* r9 = vcpu->arch.pc */ | 122 | PPC_LL r10, VCPU_SHADOW_MSR(r4) /* r10 = vcpu->arch.shadow_msr */ |
101 | ld r10, VCPU_SHADOW_MSR(r4) /* r10 = vcpu->arch.shadow_msr */ | ||
102 | |||
103 | /* Load some guest state in the respective registers */ | ||
104 | ld r5, VCPU_CTR(r4) /* r5 = vcpu->arch.ctr */ | ||
105 | /* will be swapped in by rmcall */ | ||
106 | |||
107 | ld r3, VCPU_LR(r4) /* r3 = vcpu->arch.lr */ | ||
108 | mtlr r3 /* LR = r3 */ | ||
109 | 123 | ||
110 | DISABLE_INTERRUPTS | 124 | DISABLE_INTERRUPTS |
111 | 125 | ||
126 | #ifdef CONFIG_PPC_BOOK3S_64 | ||
112 | /* Some guests may need to have dcbz set to 32 byte length. | 127 | /* Some guests may need to have dcbz set to 32 byte length. |
113 | * | 128 | * |
114 | * Usually we ensure that by patching the guest's instructions | 129 | * Usually we ensure that by patching the guest's instructions |
@@ -118,7 +133,7 @@ kvm_start_lightweight: | |||
118 | * because that's a lot faster. | 133 | * because that's a lot faster. |
119 | */ | 134 | */ |
120 | 135 | ||
121 | ld r3, VCPU_HFLAGS(r4) | 136 | PPC_LL r3, VCPU_HFLAGS(r4) |
122 | rldicl. r3, r3, 0, 63 /* CR = ((r3 & 1) == 0) */ | 137 | rldicl. r3, r3, 0, 63 /* CR = ((r3 & 1) == 0) */ |
123 | beq no_dcbz32_on | 138 | beq no_dcbz32_on |
124 | 139 | ||
@@ -128,13 +143,15 @@ kvm_start_lightweight: | |||
128 | 143 | ||
129 | no_dcbz32_on: | 144 | no_dcbz32_on: |
130 | 145 | ||
131 | ld r6, VCPU_RMCALL(r4) | 146 | #endif /* CONFIG_PPC_BOOK3S_64 */ |
147 | |||
148 | PPC_LL r6, VCPU_RMCALL(r4) | ||
132 | mtctr r6 | 149 | mtctr r6 |
133 | 150 | ||
134 | ld r3, VCPU_TRAMPOLINE_ENTER(r4) | 151 | PPC_LL r3, VCPU_TRAMPOLINE_ENTER(r4) |
135 | LOAD_REG_IMMEDIATE(r4, MSR_KERNEL & ~(MSR_IR | MSR_DR)) | 152 | LOAD_REG_IMMEDIATE(r4, MSR_KERNEL & ~(MSR_IR | MSR_DR)) |
136 | 153 | ||
137 | /* Jump to SLB patching handlder and into our guest */ | 154 | /* Jump to segment patching handler and into our guest */ |
138 | bctr | 155 | bctr |
139 | 156 | ||
140 | /* | 157 | /* |
@@ -149,31 +166,20 @@ kvmppc_handler_highmem: | |||
149 | /* | 166 | /* |
150 | * Register usage at this point: | 167 | * Register usage at this point: |
151 | * | 168 | * |
152 | * R0 = guest last inst | 169 | * R1 = host R1 |
153 | * R1 = host R1 | 170 | * R2 = host R2 |
154 | * R2 = host R2 | 171 | * R12 = exit handler id |
155 | * R3 = guest PC | 172 | * R13 = PACA |
156 | * R4 = guest MSR | 173 | * SVCPU.* = guest * |
157 | * R5 = guest DAR | ||
158 | * R6 = guest DSISR | ||
159 | * R13 = PACA | ||
160 | * PACA.KVM.* = guest * | ||
161 | * | 174 | * |
162 | */ | 175 | */ |
163 | 176 | ||
164 | /* R7 = vcpu */ | 177 | /* R7 = vcpu */ |
165 | ld r7, GPR4(r1) | 178 | PPC_LL r7, GPR4(r1) |
166 | 179 | ||
167 | /* Now save the guest state */ | 180 | #ifdef CONFIG_PPC_BOOK3S_64 |
168 | 181 | ||
169 | stw r0, VCPU_LAST_INST(r7) | 182 | PPC_LL r5, VCPU_HFLAGS(r7) |
170 | |||
171 | std r3, VCPU_PC(r7) | ||
172 | std r4, VCPU_SHADOW_SRR1(r7) | ||
173 | std r5, VCPU_FAULT_DEAR(r7) | ||
174 | std r6, VCPU_FAULT_DSISR(r7) | ||
175 | |||
176 | ld r5, VCPU_HFLAGS(r7) | ||
177 | rldicl. r5, r5, 0, 63 /* CR = ((r5 & 1) == 0) */ | 183 | rldicl. r5, r5, 0, 63 /* CR = ((r5 & 1) == 0) */ |
178 | beq no_dcbz32_off | 184 | beq no_dcbz32_off |
179 | 185 | ||
@@ -184,35 +190,29 @@ kvmppc_handler_highmem: | |||
184 | 190 | ||
185 | no_dcbz32_off: | 191 | no_dcbz32_off: |
186 | 192 | ||
187 | std r14, VCPU_GPR(r14)(r7) | 193 | #endif /* CONFIG_PPC_BOOK3S_64 */ |
188 | std r15, VCPU_GPR(r15)(r7) | 194 | |
189 | std r16, VCPU_GPR(r16)(r7) | 195 | PPC_STL r14, VCPU_GPR(r14)(r7) |
190 | std r17, VCPU_GPR(r17)(r7) | 196 | PPC_STL r15, VCPU_GPR(r15)(r7) |
191 | std r18, VCPU_GPR(r18)(r7) | 197 | PPC_STL r16, VCPU_GPR(r16)(r7) |
192 | std r19, VCPU_GPR(r19)(r7) | 198 | PPC_STL r17, VCPU_GPR(r17)(r7) |
193 | std r20, VCPU_GPR(r20)(r7) | 199 | PPC_STL r18, VCPU_GPR(r18)(r7) |
194 | std r21, VCPU_GPR(r21)(r7) | 200 | PPC_STL r19, VCPU_GPR(r19)(r7) |
195 | std r22, VCPU_GPR(r22)(r7) | 201 | PPC_STL r20, VCPU_GPR(r20)(r7) |
196 | std r23, VCPU_GPR(r23)(r7) | 202 | PPC_STL r21, VCPU_GPR(r21)(r7) |
197 | std r24, VCPU_GPR(r24)(r7) | 203 | PPC_STL r22, VCPU_GPR(r22)(r7) |
198 | std r25, VCPU_GPR(r25)(r7) | 204 | PPC_STL r23, VCPU_GPR(r23)(r7) |
199 | std r26, VCPU_GPR(r26)(r7) | 205 | PPC_STL r24, VCPU_GPR(r24)(r7) |
200 | std r27, VCPU_GPR(r27)(r7) | 206 | PPC_STL r25, VCPU_GPR(r25)(r7) |
201 | std r28, VCPU_GPR(r28)(r7) | 207 | PPC_STL r26, VCPU_GPR(r26)(r7) |
202 | std r29, VCPU_GPR(r29)(r7) | 208 | PPC_STL r27, VCPU_GPR(r27)(r7) |
203 | std r30, VCPU_GPR(r30)(r7) | 209 | PPC_STL r28, VCPU_GPR(r28)(r7) |
204 | std r31, VCPU_GPR(r31)(r7) | 210 | PPC_STL r29, VCPU_GPR(r29)(r7) |
205 | 211 | PPC_STL r30, VCPU_GPR(r30)(r7) | |
206 | /* Save guest CTR */ | 212 | PPC_STL r31, VCPU_GPR(r31)(r7) |
207 | mfctr r5 | ||
208 | std r5, VCPU_CTR(r7) | ||
209 | |||
210 | /* Save guest LR */ | ||
211 | mflr r5 | ||
212 | std r5, VCPU_LR(r7) | ||
213 | 213 | ||
214 | /* Restore host msr -> SRR1 */ | 214 | /* Restore host msr -> SRR1 */ |
215 | ld r6, VCPU_HOST_MSR(r7) | 215 | PPC_LL r6, VCPU_HOST_MSR(r7) |
216 | 216 | ||
217 | /* | 217 | /* |
218 | * For some interrupts, we need to call the real Linux | 218 | * For some interrupts, we need to call the real Linux |
@@ -228,9 +228,12 @@ no_dcbz32_off: | |||
228 | beq call_linux_handler | 228 | beq call_linux_handler |
229 | cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER | 229 | cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER |
230 | beq call_linux_handler | 230 | beq call_linux_handler |
231 | cmpwi r12, BOOK3S_INTERRUPT_PERFMON | ||
232 | beq call_linux_handler | ||
231 | 233 | ||
232 | /* Back to EE=1 */ | 234 | /* Back to EE=1 */ |
233 | mtmsr r6 | 235 | mtmsr r6 |
236 | sync | ||
234 | b kvm_return_point | 237 | b kvm_return_point |
235 | 238 | ||
236 | call_linux_handler: | 239 | call_linux_handler: |
@@ -249,14 +252,14 @@ call_linux_handler: | |||
249 | */ | 252 | */ |
250 | 253 | ||
251 | /* Restore host IP -> SRR0 */ | 254 | /* Restore host IP -> SRR0 */ |
252 | ld r5, VCPU_HOST_RETIP(r7) | 255 | PPC_LL r5, VCPU_HOST_RETIP(r7) |
253 | 256 | ||
254 | /* XXX Better move to a safe function? | 257 | /* XXX Better move to a safe function? |
255 | * What if we get an HTAB flush in between mtsrr0 and mtsrr1? */ | 258 | * What if we get an HTAB flush in between mtsrr0 and mtsrr1? */ |
256 | 259 | ||
257 | mtlr r12 | 260 | mtlr r12 |
258 | 261 | ||
259 | ld r4, VCPU_TRAMPOLINE_LOWMEM(r7) | 262 | PPC_LL r4, VCPU_TRAMPOLINE_LOWMEM(r7) |
260 | mtsrr0 r4 | 263 | mtsrr0 r4 |
261 | LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR)) | 264 | LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR)) |
262 | mtsrr1 r3 | 265 | mtsrr1 r3 |
@@ -274,7 +277,7 @@ kvm_return_point: | |||
274 | 277 | ||
275 | /* Restore r3 (kvm_run) and r4 (vcpu) */ | 278 | /* Restore r3 (kvm_run) and r4 (vcpu) */ |
276 | REST_2GPRS(3, r1) | 279 | REST_2GPRS(3, r1) |
277 | bl KVMPPC_HANDLE_EXIT | 280 | bl FUNC(kvmppc_handle_exit) |
278 | 281 | ||
279 | /* If RESUME_GUEST, get back in the loop */ | 282 | /* If RESUME_GUEST, get back in the loop */ |
280 | cmpwi r3, RESUME_GUEST | 283 | cmpwi r3, RESUME_GUEST |
@@ -285,7 +288,7 @@ kvm_return_point: | |||
285 | 288 | ||
286 | kvm_exit_loop: | 289 | kvm_exit_loop: |
287 | 290 | ||
288 | ld r4, _LINK(r1) | 291 | PPC_LL r4, _LINK(r1) |
289 | mtlr r4 | 292 | mtlr r4 |
290 | 293 | ||
291 | /* Restore non-volatile host registers (r14 - r31) */ | 294 | /* Restore non-volatile host registers (r14 - r31) */ |
@@ -296,8 +299,8 @@ kvm_exit_loop: | |||
296 | 299 | ||
297 | kvm_loop_heavyweight: | 300 | kvm_loop_heavyweight: |
298 | 301 | ||
299 | ld r4, _LINK(r1) | 302 | PPC_LL r4, _LINK(r1) |
300 | std r4, (16 + SWITCH_FRAME_SIZE)(r1) | 303 | PPC_STL r4, (PPC_LR_STKOFF + SWITCH_FRAME_SIZE)(r1) |
301 | 304 | ||
302 | /* Load vcpu and cpu_run */ | 305 | /* Load vcpu and cpu_run */ |
303 | REST_2GPRS(3, r1) | 306 | REST_2GPRS(3, r1) |
@@ -315,4 +318,3 @@ kvm_loop_lightweight: | |||
315 | 318 | ||
316 | /* Jump back into the beginning of this function */ | 319 | /* Jump back into the beginning of this function */ |
317 | b kvm_start_lightweight | 320 | b kvm_start_lightweight |
318 | |||
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c new file mode 100644 index 000000000000..a9f66abafcb3 --- /dev/null +++ b/arch/powerpc/kvm/book3s_paired_singles.c | |||
@@ -0,0 +1,1289 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License, version 2, as | ||
4 | * published by the Free Software Foundation. | ||
5 | * | ||
6 | * This program is distributed in the hope that it will be useful, | ||
7 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
8 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
9 | * GNU General Public License for more details. | ||
10 | * | ||
11 | * You should have received a copy of the GNU General Public License | ||
12 | * along with this program; if not, write to the Free Software | ||
13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
14 | * | ||
15 | * Copyright Novell Inc 2010 | ||
16 | * | ||
17 | * Authors: Alexander Graf <agraf@suse.de> | ||
18 | */ | ||
19 | |||
20 | #include <asm/kvm.h> | ||
21 | #include <asm/kvm_ppc.h> | ||
22 | #include <asm/disassemble.h> | ||
23 | #include <asm/kvm_book3s.h> | ||
24 | #include <asm/kvm_fpu.h> | ||
25 | #include <asm/reg.h> | ||
26 | #include <asm/cacheflush.h> | ||
27 | #include <linux/vmalloc.h> | ||
28 | |||
29 | /* #define DEBUG */ | ||
30 | |||
31 | #ifdef DEBUG | ||
32 | #define dprintk printk | ||
33 | #else | ||
34 | #define dprintk(...) do { } while(0); | ||
35 | #endif | ||
36 | |||
37 | #define OP_LFS 48 | ||
38 | #define OP_LFSU 49 | ||
39 | #define OP_LFD 50 | ||
40 | #define OP_LFDU 51 | ||
41 | #define OP_STFS 52 | ||
42 | #define OP_STFSU 53 | ||
43 | #define OP_STFD 54 | ||
44 | #define OP_STFDU 55 | ||
45 | #define OP_PSQ_L 56 | ||
46 | #define OP_PSQ_LU 57 | ||
47 | #define OP_PSQ_ST 60 | ||
48 | #define OP_PSQ_STU 61 | ||
49 | |||
50 | #define OP_31_LFSX 535 | ||
51 | #define OP_31_LFSUX 567 | ||
52 | #define OP_31_LFDX 599 | ||
53 | #define OP_31_LFDUX 631 | ||
54 | #define OP_31_STFSX 663 | ||
55 | #define OP_31_STFSUX 695 | ||
56 | #define OP_31_STFX 727 | ||
57 | #define OP_31_STFUX 759 | ||
58 | #define OP_31_LWIZX 887 | ||
59 | #define OP_31_STFIWX 983 | ||
60 | |||
61 | #define OP_59_FADDS 21 | ||
62 | #define OP_59_FSUBS 20 | ||
63 | #define OP_59_FSQRTS 22 | ||
64 | #define OP_59_FDIVS 18 | ||
65 | #define OP_59_FRES 24 | ||
66 | #define OP_59_FMULS 25 | ||
67 | #define OP_59_FRSQRTES 26 | ||
68 | #define OP_59_FMSUBS 28 | ||
69 | #define OP_59_FMADDS 29 | ||
70 | #define OP_59_FNMSUBS 30 | ||
71 | #define OP_59_FNMADDS 31 | ||
72 | |||
73 | #define OP_63_FCMPU 0 | ||
74 | #define OP_63_FCPSGN 8 | ||
75 | #define OP_63_FRSP 12 | ||
76 | #define OP_63_FCTIW 14 | ||
77 | #define OP_63_FCTIWZ 15 | ||
78 | #define OP_63_FDIV 18 | ||
79 | #define OP_63_FADD 21 | ||
80 | #define OP_63_FSQRT 22 | ||
81 | #define OP_63_FSEL 23 | ||
82 | #define OP_63_FRE 24 | ||
83 | #define OP_63_FMUL 25 | ||
84 | #define OP_63_FRSQRTE 26 | ||
85 | #define OP_63_FMSUB 28 | ||
86 | #define OP_63_FMADD 29 | ||
87 | #define OP_63_FNMSUB 30 | ||
88 | #define OP_63_FNMADD 31 | ||
89 | #define OP_63_FCMPO 32 | ||
90 | #define OP_63_MTFSB1 38 // XXX | ||
91 | #define OP_63_FSUB 20 | ||
92 | #define OP_63_FNEG 40 | ||
93 | #define OP_63_MCRFS 64 | ||
94 | #define OP_63_MTFSB0 70 | ||
95 | #define OP_63_FMR 72 | ||
96 | #define OP_63_MTFSFI 134 | ||
97 | #define OP_63_FABS 264 | ||
98 | #define OP_63_MFFS 583 | ||
99 | #define OP_63_MTFSF 711 | ||
100 | |||
101 | #define OP_4X_PS_CMPU0 0 | ||
102 | #define OP_4X_PSQ_LX 6 | ||
103 | #define OP_4XW_PSQ_STX 7 | ||
104 | #define OP_4A_PS_SUM0 10 | ||
105 | #define OP_4A_PS_SUM1 11 | ||
106 | #define OP_4A_PS_MULS0 12 | ||
107 | #define OP_4A_PS_MULS1 13 | ||
108 | #define OP_4A_PS_MADDS0 14 | ||
109 | #define OP_4A_PS_MADDS1 15 | ||
110 | #define OP_4A_PS_DIV 18 | ||
111 | #define OP_4A_PS_SUB 20 | ||
112 | #define OP_4A_PS_ADD 21 | ||
113 | #define OP_4A_PS_SEL 23 | ||
114 | #define OP_4A_PS_RES 24 | ||
115 | #define OP_4A_PS_MUL 25 | ||
116 | #define OP_4A_PS_RSQRTE 26 | ||
117 | #define OP_4A_PS_MSUB 28 | ||
118 | #define OP_4A_PS_MADD 29 | ||
119 | #define OP_4A_PS_NMSUB 30 | ||
120 | #define OP_4A_PS_NMADD 31 | ||
121 | #define OP_4X_PS_CMPO0 32 | ||
122 | #define OP_4X_PSQ_LUX 38 | ||
123 | #define OP_4XW_PSQ_STUX 39 | ||
124 | #define OP_4X_PS_NEG 40 | ||
125 | #define OP_4X_PS_CMPU1 64 | ||
126 | #define OP_4X_PS_MR 72 | ||
127 | #define OP_4X_PS_CMPO1 96 | ||
128 | #define OP_4X_PS_NABS 136 | ||
129 | #define OP_4X_PS_ABS 264 | ||
130 | #define OP_4X_PS_MERGE00 528 | ||
131 | #define OP_4X_PS_MERGE01 560 | ||
132 | #define OP_4X_PS_MERGE10 592 | ||
133 | #define OP_4X_PS_MERGE11 624 | ||
134 | |||
135 | #define SCALAR_NONE 0 | ||
136 | #define SCALAR_HIGH (1 << 0) | ||
137 | #define SCALAR_LOW (1 << 1) | ||
138 | #define SCALAR_NO_PS0 (1 << 2) | ||
139 | #define SCALAR_NO_PS1 (1 << 3) | ||
140 | |||
141 | #define GQR_ST_TYPE_MASK 0x00000007 | ||
142 | #define GQR_ST_TYPE_SHIFT 0 | ||
143 | #define GQR_ST_SCALE_MASK 0x00003f00 | ||
144 | #define GQR_ST_SCALE_SHIFT 8 | ||
145 | #define GQR_LD_TYPE_MASK 0x00070000 | ||
146 | #define GQR_LD_TYPE_SHIFT 16 | ||
147 | #define GQR_LD_SCALE_MASK 0x3f000000 | ||
148 | #define GQR_LD_SCALE_SHIFT 24 | ||
149 | |||
150 | #define GQR_QUANTIZE_FLOAT 0 | ||
151 | #define GQR_QUANTIZE_U8 4 | ||
152 | #define GQR_QUANTIZE_U16 5 | ||
153 | #define GQR_QUANTIZE_S8 6 | ||
154 | #define GQR_QUANTIZE_S16 7 | ||
155 | |||
156 | #define FPU_LS_SINGLE 0 | ||
157 | #define FPU_LS_DOUBLE 1 | ||
158 | #define FPU_LS_SINGLE_LOW 2 | ||
159 | |||
160 | static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt) | ||
161 | { | ||
162 | struct thread_struct t; | ||
163 | |||
164 | t.fpscr.val = vcpu->arch.fpscr; | ||
165 | cvt_df((double*)&vcpu->arch.fpr[rt], (float*)&vcpu->arch.qpr[rt], &t); | ||
166 | } | ||
167 | |||
168 | static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store) | ||
169 | { | ||
170 | u64 dsisr; | ||
171 | |||
172 | vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 33, 36, 0); | ||
173 | vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 42, 47, 0); | ||
174 | vcpu->arch.dear = eaddr; | ||
175 | /* Page Fault */ | ||
176 | dsisr = kvmppc_set_field(0, 33, 33, 1); | ||
177 | if (is_store) | ||
178 | to_book3s(vcpu)->dsisr = kvmppc_set_field(dsisr, 38, 38, 1); | ||
179 | kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE); | ||
180 | } | ||
181 | |||
182 | static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||
183 | int rs, ulong addr, int ls_type) | ||
184 | { | ||
185 | int emulated = EMULATE_FAIL; | ||
186 | struct thread_struct t; | ||
187 | int r; | ||
188 | char tmp[8]; | ||
189 | int len = sizeof(u32); | ||
190 | |||
191 | if (ls_type == FPU_LS_DOUBLE) | ||
192 | len = sizeof(u64); | ||
193 | |||
194 | t.fpscr.val = vcpu->arch.fpscr; | ||
195 | |||
196 | /* read from memory */ | ||
197 | r = kvmppc_ld(vcpu, &addr, len, tmp, true); | ||
198 | vcpu->arch.paddr_accessed = addr; | ||
199 | |||
200 | if (r < 0) { | ||
201 | kvmppc_inject_pf(vcpu, addr, false); | ||
202 | goto done_load; | ||
203 | } else if (r == EMULATE_DO_MMIO) { | ||
204 | emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FPR | rs, len, 1); | ||
205 | goto done_load; | ||
206 | } | ||
207 | |||
208 | emulated = EMULATE_DONE; | ||
209 | |||
210 | /* put in registers */ | ||
211 | switch (ls_type) { | ||
212 | case FPU_LS_SINGLE: | ||
213 | cvt_fd((float*)tmp, (double*)&vcpu->arch.fpr[rs], &t); | ||
214 | vcpu->arch.qpr[rs] = *((u32*)tmp); | ||
215 | break; | ||
216 | case FPU_LS_DOUBLE: | ||
217 | vcpu->arch.fpr[rs] = *((u64*)tmp); | ||
218 | break; | ||
219 | } | ||
220 | |||
221 | dprintk(KERN_INFO "KVM: FPR_LD [0x%llx] at 0x%lx (%d)\n", *(u64*)tmp, | ||
222 | addr, len); | ||
223 | |||
224 | done_load: | ||
225 | return emulated; | ||
226 | } | ||
227 | |||
228 | static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||
229 | int rs, ulong addr, int ls_type) | ||
230 | { | ||
231 | int emulated = EMULATE_FAIL; | ||
232 | struct thread_struct t; | ||
233 | int r; | ||
234 | char tmp[8]; | ||
235 | u64 val; | ||
236 | int len; | ||
237 | |||
238 | t.fpscr.val = vcpu->arch.fpscr; | ||
239 | |||
240 | switch (ls_type) { | ||
241 | case FPU_LS_SINGLE: | ||
242 | cvt_df((double*)&vcpu->arch.fpr[rs], (float*)tmp, &t); | ||
243 | val = *((u32*)tmp); | ||
244 | len = sizeof(u32); | ||
245 | break; | ||
246 | case FPU_LS_SINGLE_LOW: | ||
247 | *((u32*)tmp) = vcpu->arch.fpr[rs]; | ||
248 | val = vcpu->arch.fpr[rs] & 0xffffffff; | ||
249 | len = sizeof(u32); | ||
250 | break; | ||
251 | case FPU_LS_DOUBLE: | ||
252 | *((u64*)tmp) = vcpu->arch.fpr[rs]; | ||
253 | val = vcpu->arch.fpr[rs]; | ||
254 | len = sizeof(u64); | ||
255 | break; | ||
256 | default: | ||
257 | val = 0; | ||
258 | len = 0; | ||
259 | } | ||
260 | |||
261 | r = kvmppc_st(vcpu, &addr, len, tmp, true); | ||
262 | vcpu->arch.paddr_accessed = addr; | ||
263 | if (r < 0) { | ||
264 | kvmppc_inject_pf(vcpu, addr, true); | ||
265 | } else if (r == EMULATE_DO_MMIO) { | ||
266 | emulated = kvmppc_handle_store(run, vcpu, val, len, 1); | ||
267 | } else { | ||
268 | emulated = EMULATE_DONE; | ||
269 | } | ||
270 | |||
271 | dprintk(KERN_INFO "KVM: FPR_ST [0x%llx] at 0x%lx (%d)\n", | ||
272 | val, addr, len); | ||
273 | |||
274 | return emulated; | ||
275 | } | ||
276 | |||
277 | static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||
278 | int rs, ulong addr, bool w, int i) | ||
279 | { | ||
280 | int emulated = EMULATE_FAIL; | ||
281 | struct thread_struct t; | ||
282 | int r; | ||
283 | float one = 1.0; | ||
284 | u32 tmp[2]; | ||
285 | |||
286 | t.fpscr.val = vcpu->arch.fpscr; | ||
287 | |||
288 | /* read from memory */ | ||
289 | if (w) { | ||
290 | r = kvmppc_ld(vcpu, &addr, sizeof(u32), tmp, true); | ||
291 | memcpy(&tmp[1], &one, sizeof(u32)); | ||
292 | } else { | ||
293 | r = kvmppc_ld(vcpu, &addr, sizeof(u32) * 2, tmp, true); | ||
294 | } | ||
295 | vcpu->arch.paddr_accessed = addr; | ||
296 | if (r < 0) { | ||
297 | kvmppc_inject_pf(vcpu, addr, false); | ||
298 | goto done_load; | ||
299 | } else if ((r == EMULATE_DO_MMIO) && w) { | ||
300 | emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FPR | rs, 4, 1); | ||
301 | vcpu->arch.qpr[rs] = tmp[1]; | ||
302 | goto done_load; | ||
303 | } else if (r == EMULATE_DO_MMIO) { | ||
304 | emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FQPR | rs, 8, 1); | ||
305 | goto done_load; | ||
306 | } | ||
307 | |||
308 | emulated = EMULATE_DONE; | ||
309 | |||
310 | /* put in registers */ | ||
311 | cvt_fd((float*)&tmp[0], (double*)&vcpu->arch.fpr[rs], &t); | ||
312 | vcpu->arch.qpr[rs] = tmp[1]; | ||
313 | |||
314 | dprintk(KERN_INFO "KVM: PSQ_LD [0x%x, 0x%x] at 0x%lx (%d)\n", tmp[0], | ||
315 | tmp[1], addr, w ? 4 : 8); | ||
316 | |||
317 | done_load: | ||
318 | return emulated; | ||
319 | } | ||
320 | |||
321 | static int kvmppc_emulate_psq_store(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||
322 | int rs, ulong addr, bool w, int i) | ||
323 | { | ||
324 | int emulated = EMULATE_FAIL; | ||
325 | struct thread_struct t; | ||
326 | int r; | ||
327 | u32 tmp[2]; | ||
328 | int len = w ? sizeof(u32) : sizeof(u64); | ||
329 | |||
330 | t.fpscr.val = vcpu->arch.fpscr; | ||
331 | |||
332 | cvt_df((double*)&vcpu->arch.fpr[rs], (float*)&tmp[0], &t); | ||
333 | tmp[1] = vcpu->arch.qpr[rs]; | ||
334 | |||
335 | r = kvmppc_st(vcpu, &addr, len, tmp, true); | ||
336 | vcpu->arch.paddr_accessed = addr; | ||
337 | if (r < 0) { | ||
338 | kvmppc_inject_pf(vcpu, addr, true); | ||
339 | } else if ((r == EMULATE_DO_MMIO) && w) { | ||
340 | emulated = kvmppc_handle_store(run, vcpu, tmp[0], 4, 1); | ||
341 | } else if (r == EMULATE_DO_MMIO) { | ||
342 | u64 val = ((u64)tmp[0] << 32) | tmp[1]; | ||
343 | emulated = kvmppc_handle_store(run, vcpu, val, 8, 1); | ||
344 | } else { | ||
345 | emulated = EMULATE_DONE; | ||
346 | } | ||
347 | |||
348 | dprintk(KERN_INFO "KVM: PSQ_ST [0x%x, 0x%x] at 0x%lx (%d)\n", | ||
349 | tmp[0], tmp[1], addr, len); | ||
350 | |||
351 | return emulated; | ||
352 | } | ||
353 | |||
354 | /* | ||
355 | * Cuts out inst bits with ordering according to spec. | ||
356 | * That means the leftmost bit is zero. All given bits are included. | ||
357 | */ | ||
358 | static inline u32 inst_get_field(u32 inst, int msb, int lsb) | ||
359 | { | ||
360 | return kvmppc_get_field(inst, msb + 32, lsb + 32); | ||
361 | } | ||
362 | |||
363 | /* | ||
364 | * Replaces inst bits with ordering according to spec. | ||
365 | */ | ||
366 | static inline u32 inst_set_field(u32 inst, int msb, int lsb, int value) | ||
367 | { | ||
368 | return kvmppc_set_field(inst, msb + 32, lsb + 32, value); | ||
369 | } | ||
370 | |||
371 | bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst) | ||
372 | { | ||
373 | if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)) | ||
374 | return false; | ||
375 | |||
376 | switch (get_op(inst)) { | ||
377 | case OP_PSQ_L: | ||
378 | case OP_PSQ_LU: | ||
379 | case OP_PSQ_ST: | ||
380 | case OP_PSQ_STU: | ||
381 | case OP_LFS: | ||
382 | case OP_LFSU: | ||
383 | case OP_LFD: | ||
384 | case OP_LFDU: | ||
385 | case OP_STFS: | ||
386 | case OP_STFSU: | ||
387 | case OP_STFD: | ||
388 | case OP_STFDU: | ||
389 | return true; | ||
390 | case 4: | ||
391 | /* X form */ | ||
392 | switch (inst_get_field(inst, 21, 30)) { | ||
393 | case OP_4X_PS_CMPU0: | ||
394 | case OP_4X_PSQ_LX: | ||
395 | case OP_4X_PS_CMPO0: | ||
396 | case OP_4X_PSQ_LUX: | ||
397 | case OP_4X_PS_NEG: | ||
398 | case OP_4X_PS_CMPU1: | ||
399 | case OP_4X_PS_MR: | ||
400 | case OP_4X_PS_CMPO1: | ||
401 | case OP_4X_PS_NABS: | ||
402 | case OP_4X_PS_ABS: | ||
403 | case OP_4X_PS_MERGE00: | ||
404 | case OP_4X_PS_MERGE01: | ||
405 | case OP_4X_PS_MERGE10: | ||
406 | case OP_4X_PS_MERGE11: | ||
407 | return true; | ||
408 | } | ||
409 | /* XW form */ | ||
410 | switch (inst_get_field(inst, 25, 30)) { | ||
411 | case OP_4XW_PSQ_STX: | ||
412 | case OP_4XW_PSQ_STUX: | ||
413 | return true; | ||
414 | } | ||
415 | /* A form */ | ||
416 | switch (inst_get_field(inst, 26, 30)) { | ||
417 | case OP_4A_PS_SUM1: | ||
418 | case OP_4A_PS_SUM0: | ||
419 | case OP_4A_PS_MULS0: | ||
420 | case OP_4A_PS_MULS1: | ||
421 | case OP_4A_PS_MADDS0: | ||
422 | case OP_4A_PS_MADDS1: | ||
423 | case OP_4A_PS_DIV: | ||
424 | case OP_4A_PS_SUB: | ||
425 | case OP_4A_PS_ADD: | ||
426 | case OP_4A_PS_SEL: | ||
427 | case OP_4A_PS_RES: | ||
428 | case OP_4A_PS_MUL: | ||
429 | case OP_4A_PS_RSQRTE: | ||
430 | case OP_4A_PS_MSUB: | ||
431 | case OP_4A_PS_MADD: | ||
432 | case OP_4A_PS_NMSUB: | ||
433 | case OP_4A_PS_NMADD: | ||
434 | return true; | ||
435 | } | ||
436 | break; | ||
437 | case 59: | ||
438 | switch (inst_get_field(inst, 21, 30)) { | ||
439 | case OP_59_FADDS: | ||
440 | case OP_59_FSUBS: | ||
441 | case OP_59_FDIVS: | ||
442 | case OP_59_FRES: | ||
443 | case OP_59_FRSQRTES: | ||
444 | return true; | ||
445 | } | ||
446 | switch (inst_get_field(inst, 26, 30)) { | ||
447 | case OP_59_FMULS: | ||
448 | case OP_59_FMSUBS: | ||
449 | case OP_59_FMADDS: | ||
450 | case OP_59_FNMSUBS: | ||
451 | case OP_59_FNMADDS: | ||
452 | return true; | ||
453 | } | ||
454 | break; | ||
455 | case 63: | ||
456 | switch (inst_get_field(inst, 21, 30)) { | ||
457 | case OP_63_MTFSB0: | ||
458 | case OP_63_MTFSB1: | ||
459 | case OP_63_MTFSF: | ||
460 | case OP_63_MTFSFI: | ||
461 | case OP_63_MCRFS: | ||
462 | case OP_63_MFFS: | ||
463 | case OP_63_FCMPU: | ||
464 | case OP_63_FCMPO: | ||
465 | case OP_63_FNEG: | ||
466 | case OP_63_FMR: | ||
467 | case OP_63_FABS: | ||
468 | case OP_63_FRSP: | ||
469 | case OP_63_FDIV: | ||
470 | case OP_63_FADD: | ||
471 | case OP_63_FSUB: | ||
472 | case OP_63_FCTIW: | ||
473 | case OP_63_FCTIWZ: | ||
474 | case OP_63_FRSQRTE: | ||
475 | case OP_63_FCPSGN: | ||
476 | return true; | ||
477 | } | ||
478 | switch (inst_get_field(inst, 26, 30)) { | ||
479 | case OP_63_FMUL: | ||
480 | case OP_63_FSEL: | ||
481 | case OP_63_FMSUB: | ||
482 | case OP_63_FMADD: | ||
483 | case OP_63_FNMSUB: | ||
484 | case OP_63_FNMADD: | ||
485 | return true; | ||
486 | } | ||
487 | break; | ||
488 | case 31: | ||
489 | switch (inst_get_field(inst, 21, 30)) { | ||
490 | case OP_31_LFSX: | ||
491 | case OP_31_LFSUX: | ||
492 | case OP_31_LFDX: | ||
493 | case OP_31_LFDUX: | ||
494 | case OP_31_STFSX: | ||
495 | case OP_31_STFSUX: | ||
496 | case OP_31_STFX: | ||
497 | case OP_31_STFUX: | ||
498 | case OP_31_STFIWX: | ||
499 | return true; | ||
500 | } | ||
501 | break; | ||
502 | } | ||
503 | |||
504 | return false; | ||
505 | } | ||
506 | |||
507 | static int get_d_signext(u32 inst) | ||
508 | { | ||
509 | int d = inst & 0x8ff; | ||
510 | |||
511 | if (d & 0x800) | ||
512 | return -(d & 0x7ff); | ||
513 | |||
514 | return (d & 0x7ff); | ||
515 | } | ||
516 | |||
517 | static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc, | ||
518 | int reg_out, int reg_in1, int reg_in2, | ||
519 | int reg_in3, int scalar, | ||
520 | void (*func)(struct thread_struct *t, | ||
521 | u32 *dst, u32 *src1, | ||
522 | u32 *src2, u32 *src3)) | ||
523 | { | ||
524 | u32 *qpr = vcpu->arch.qpr; | ||
525 | u64 *fpr = vcpu->arch.fpr; | ||
526 | u32 ps0_out; | ||
527 | u32 ps0_in1, ps0_in2, ps0_in3; | ||
528 | u32 ps1_in1, ps1_in2, ps1_in3; | ||
529 | struct thread_struct t; | ||
530 | t.fpscr.val = vcpu->arch.fpscr; | ||
531 | |||
532 | /* RC */ | ||
533 | WARN_ON(rc); | ||
534 | |||
535 | /* PS0 */ | ||
536 | cvt_df((double*)&fpr[reg_in1], (float*)&ps0_in1, &t); | ||
537 | cvt_df((double*)&fpr[reg_in2], (float*)&ps0_in2, &t); | ||
538 | cvt_df((double*)&fpr[reg_in3], (float*)&ps0_in3, &t); | ||
539 | |||
540 | if (scalar & SCALAR_LOW) | ||
541 | ps0_in2 = qpr[reg_in2]; | ||
542 | |||
543 | func(&t, &ps0_out, &ps0_in1, &ps0_in2, &ps0_in3); | ||
544 | |||
545 | dprintk(KERN_INFO "PS3 ps0 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n", | ||
546 | ps0_in1, ps0_in2, ps0_in3, ps0_out); | ||
547 | |||
548 | if (!(scalar & SCALAR_NO_PS0)) | ||
549 | cvt_fd((float*)&ps0_out, (double*)&fpr[reg_out], &t); | ||
550 | |||
551 | /* PS1 */ | ||
552 | ps1_in1 = qpr[reg_in1]; | ||
553 | ps1_in2 = qpr[reg_in2]; | ||
554 | ps1_in3 = qpr[reg_in3]; | ||
555 | |||
556 | if (scalar & SCALAR_HIGH) | ||
557 | ps1_in2 = ps0_in2; | ||
558 | |||
559 | if (!(scalar & SCALAR_NO_PS1)) | ||
560 | func(&t, &qpr[reg_out], &ps1_in1, &ps1_in2, &ps1_in3); | ||
561 | |||
562 | dprintk(KERN_INFO "PS3 ps1 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n", | ||
563 | ps1_in1, ps1_in2, ps1_in3, qpr[reg_out]); | ||
564 | |||
565 | return EMULATE_DONE; | ||
566 | } | ||
567 | |||
568 | static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc, | ||
569 | int reg_out, int reg_in1, int reg_in2, | ||
570 | int scalar, | ||
571 | void (*func)(struct thread_struct *t, | ||
572 | u32 *dst, u32 *src1, | ||
573 | u32 *src2)) | ||
574 | { | ||
575 | u32 *qpr = vcpu->arch.qpr; | ||
576 | u64 *fpr = vcpu->arch.fpr; | ||
577 | u32 ps0_out; | ||
578 | u32 ps0_in1, ps0_in2; | ||
579 | u32 ps1_out; | ||
580 | u32 ps1_in1, ps1_in2; | ||
581 | struct thread_struct t; | ||
582 | t.fpscr.val = vcpu->arch.fpscr; | ||
583 | |||
584 | /* RC */ | ||
585 | WARN_ON(rc); | ||
586 | |||
587 | /* PS0 */ | ||
588 | cvt_df((double*)&fpr[reg_in1], (float*)&ps0_in1, &t); | ||
589 | |||
590 | if (scalar & SCALAR_LOW) | ||
591 | ps0_in2 = qpr[reg_in2]; | ||
592 | else | ||
593 | cvt_df((double*)&fpr[reg_in2], (float*)&ps0_in2, &t); | ||
594 | |||
595 | func(&t, &ps0_out, &ps0_in1, &ps0_in2); | ||
596 | |||
597 | if (!(scalar & SCALAR_NO_PS0)) { | ||
598 | dprintk(KERN_INFO "PS2 ps0 -> f(0x%x, 0x%x) = 0x%x\n", | ||
599 | ps0_in1, ps0_in2, ps0_out); | ||
600 | |||
601 | cvt_fd((float*)&ps0_out, (double*)&fpr[reg_out], &t); | ||
602 | } | ||
603 | |||
604 | /* PS1 */ | ||
605 | ps1_in1 = qpr[reg_in1]; | ||
606 | ps1_in2 = qpr[reg_in2]; | ||
607 | |||
608 | if (scalar & SCALAR_HIGH) | ||
609 | ps1_in2 = ps0_in2; | ||
610 | |||
611 | func(&t, &ps1_out, &ps1_in1, &ps1_in2); | ||
612 | |||
613 | if (!(scalar & SCALAR_NO_PS1)) { | ||
614 | qpr[reg_out] = ps1_out; | ||
615 | |||
616 | dprintk(KERN_INFO "PS2 ps1 -> f(0x%x, 0x%x) = 0x%x\n", | ||
617 | ps1_in1, ps1_in2, qpr[reg_out]); | ||
618 | } | ||
619 | |||
620 | return EMULATE_DONE; | ||
621 | } | ||
622 | |||
623 | static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc, | ||
624 | int reg_out, int reg_in, | ||
625 | void (*func)(struct thread_struct *t, | ||
626 | u32 *dst, u32 *src1)) | ||
627 | { | ||
628 | u32 *qpr = vcpu->arch.qpr; | ||
629 | u64 *fpr = vcpu->arch.fpr; | ||
630 | u32 ps0_out, ps0_in; | ||
631 | u32 ps1_in; | ||
632 | struct thread_struct t; | ||
633 | t.fpscr.val = vcpu->arch.fpscr; | ||
634 | |||
635 | /* RC */ | ||
636 | WARN_ON(rc); | ||
637 | |||
638 | /* PS0 */ | ||
639 | cvt_df((double*)&fpr[reg_in], (float*)&ps0_in, &t); | ||
640 | func(&t, &ps0_out, &ps0_in); | ||
641 | |||
642 | dprintk(KERN_INFO "PS1 ps0 -> f(0x%x) = 0x%x\n", | ||
643 | ps0_in, ps0_out); | ||
644 | |||
645 | cvt_fd((float*)&ps0_out, (double*)&fpr[reg_out], &t); | ||
646 | |||
647 | /* PS1 */ | ||
648 | ps1_in = qpr[reg_in]; | ||
649 | func(&t, &qpr[reg_out], &ps1_in); | ||
650 | |||
651 | dprintk(KERN_INFO "PS1 ps1 -> f(0x%x) = 0x%x\n", | ||
652 | ps1_in, qpr[reg_out]); | ||
653 | |||
654 | return EMULATE_DONE; | ||
655 | } | ||
656 | |||
657 | int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) | ||
658 | { | ||
659 | u32 inst = kvmppc_get_last_inst(vcpu); | ||
660 | enum emulation_result emulated = EMULATE_DONE; | ||
661 | |||
662 | int ax_rd = inst_get_field(inst, 6, 10); | ||
663 | int ax_ra = inst_get_field(inst, 11, 15); | ||
664 | int ax_rb = inst_get_field(inst, 16, 20); | ||
665 | int ax_rc = inst_get_field(inst, 21, 25); | ||
666 | short full_d = inst_get_field(inst, 16, 31); | ||
667 | |||
668 | u64 *fpr_d = &vcpu->arch.fpr[ax_rd]; | ||
669 | u64 *fpr_a = &vcpu->arch.fpr[ax_ra]; | ||
670 | u64 *fpr_b = &vcpu->arch.fpr[ax_rb]; | ||
671 | u64 *fpr_c = &vcpu->arch.fpr[ax_rc]; | ||
672 | |||
673 | bool rcomp = (inst & 1) ? true : false; | ||
674 | u32 cr = kvmppc_get_cr(vcpu); | ||
675 | struct thread_struct t; | ||
676 | #ifdef DEBUG | ||
677 | int i; | ||
678 | #endif | ||
679 | |||
680 | t.fpscr.val = vcpu->arch.fpscr; | ||
681 | |||
682 | if (!kvmppc_inst_is_paired_single(vcpu, inst)) | ||
683 | return EMULATE_FAIL; | ||
684 | |||
685 | if (!(vcpu->arch.msr & MSR_FP)) { | ||
686 | kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL); | ||
687 | return EMULATE_AGAIN; | ||
688 | } | ||
689 | |||
690 | kvmppc_giveup_ext(vcpu, MSR_FP); | ||
691 | preempt_disable(); | ||
692 | enable_kernel_fp(); | ||
693 | /* Do we need to clear FE0 / FE1 here? Don't think so. */ | ||
694 | |||
695 | #ifdef DEBUG | ||
696 | for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { | ||
697 | u32 f; | ||
698 | cvt_df((double*)&vcpu->arch.fpr[i], (float*)&f, &t); | ||
699 | dprintk(KERN_INFO "FPR[%d] = 0x%x / 0x%llx QPR[%d] = 0x%x\n", | ||
700 | i, f, vcpu->arch.fpr[i], i, vcpu->arch.qpr[i]); | ||
701 | } | ||
702 | #endif | ||
703 | |||
704 | switch (get_op(inst)) { | ||
705 | case OP_PSQ_L: | ||
706 | { | ||
707 | ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; | ||
708 | bool w = inst_get_field(inst, 16, 16) ? true : false; | ||
709 | int i = inst_get_field(inst, 17, 19); | ||
710 | |||
711 | addr += get_d_signext(inst); | ||
712 | emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); | ||
713 | break; | ||
714 | } | ||
715 | case OP_PSQ_LU: | ||
716 | { | ||
717 | ulong addr = kvmppc_get_gpr(vcpu, ax_ra); | ||
718 | bool w = inst_get_field(inst, 16, 16) ? true : false; | ||
719 | int i = inst_get_field(inst, 17, 19); | ||
720 | |||
721 | addr += get_d_signext(inst); | ||
722 | emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); | ||
723 | |||
724 | if (emulated == EMULATE_DONE) | ||
725 | kvmppc_set_gpr(vcpu, ax_ra, addr); | ||
726 | break; | ||
727 | } | ||
728 | case OP_PSQ_ST: | ||
729 | { | ||
730 | ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; | ||
731 | bool w = inst_get_field(inst, 16, 16) ? true : false; | ||
732 | int i = inst_get_field(inst, 17, 19); | ||
733 | |||
734 | addr += get_d_signext(inst); | ||
735 | emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); | ||
736 | break; | ||
737 | } | ||
738 | case OP_PSQ_STU: | ||
739 | { | ||
740 | ulong addr = kvmppc_get_gpr(vcpu, ax_ra); | ||
741 | bool w = inst_get_field(inst, 16, 16) ? true : false; | ||
742 | int i = inst_get_field(inst, 17, 19); | ||
743 | |||
744 | addr += get_d_signext(inst); | ||
745 | emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); | ||
746 | |||
747 | if (emulated == EMULATE_DONE) | ||
748 | kvmppc_set_gpr(vcpu, ax_ra, addr); | ||
749 | break; | ||
750 | } | ||
751 | case 4: | ||
752 | /* X form */ | ||
753 | switch (inst_get_field(inst, 21, 30)) { | ||
754 | case OP_4X_PS_CMPU0: | ||
755 | /* XXX */ | ||
756 | emulated = EMULATE_FAIL; | ||
757 | break; | ||
758 | case OP_4X_PSQ_LX: | ||
759 | { | ||
760 | ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; | ||
761 | bool w = inst_get_field(inst, 21, 21) ? true : false; | ||
762 | int i = inst_get_field(inst, 22, 24); | ||
763 | |||
764 | addr += kvmppc_get_gpr(vcpu, ax_rb); | ||
765 | emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); | ||
766 | break; | ||
767 | } | ||
768 | case OP_4X_PS_CMPO0: | ||
769 | /* XXX */ | ||
770 | emulated = EMULATE_FAIL; | ||
771 | break; | ||
772 | case OP_4X_PSQ_LUX: | ||
773 | { | ||
774 | ulong addr = kvmppc_get_gpr(vcpu, ax_ra); | ||
775 | bool w = inst_get_field(inst, 21, 21) ? true : false; | ||
776 | int i = inst_get_field(inst, 22, 24); | ||
777 | |||
778 | addr += kvmppc_get_gpr(vcpu, ax_rb); | ||
779 | emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); | ||
780 | |||
781 | if (emulated == EMULATE_DONE) | ||
782 | kvmppc_set_gpr(vcpu, ax_ra, addr); | ||
783 | break; | ||
784 | } | ||
785 | case OP_4X_PS_NEG: | ||
786 | vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; | ||
787 | vcpu->arch.fpr[ax_rd] ^= 0x8000000000000000ULL; | ||
788 | vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; | ||
789 | vcpu->arch.qpr[ax_rd] ^= 0x80000000; | ||
790 | break; | ||
791 | case OP_4X_PS_CMPU1: | ||
792 | /* XXX */ | ||
793 | emulated = EMULATE_FAIL; | ||
794 | break; | ||
795 | case OP_4X_PS_MR: | ||
796 | WARN_ON(rcomp); | ||
797 | vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; | ||
798 | vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; | ||
799 | break; | ||
800 | case OP_4X_PS_CMPO1: | ||
801 | /* XXX */ | ||
802 | emulated = EMULATE_FAIL; | ||
803 | break; | ||
804 | case OP_4X_PS_NABS: | ||
805 | WARN_ON(rcomp); | ||
806 | vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; | ||
807 | vcpu->arch.fpr[ax_rd] |= 0x8000000000000000ULL; | ||
808 | vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; | ||
809 | vcpu->arch.qpr[ax_rd] |= 0x80000000; | ||
810 | break; | ||
811 | case OP_4X_PS_ABS: | ||
812 | WARN_ON(rcomp); | ||
813 | vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; | ||
814 | vcpu->arch.fpr[ax_rd] &= ~0x8000000000000000ULL; | ||
815 | vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; | ||
816 | vcpu->arch.qpr[ax_rd] &= ~0x80000000; | ||
817 | break; | ||
818 | case OP_4X_PS_MERGE00: | ||
819 | WARN_ON(rcomp); | ||
820 | vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra]; | ||
821 | /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ | ||
822 | cvt_df((double*)&vcpu->arch.fpr[ax_rb], | ||
823 | (float*)&vcpu->arch.qpr[ax_rd], &t); | ||
824 | break; | ||
825 | case OP_4X_PS_MERGE01: | ||
826 | WARN_ON(rcomp); | ||
827 | vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra]; | ||
828 | vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; | ||
829 | break; | ||
830 | case OP_4X_PS_MERGE10: | ||
831 | WARN_ON(rcomp); | ||
832 | /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ | ||
833 | cvt_fd((float*)&vcpu->arch.qpr[ax_ra], | ||
834 | (double*)&vcpu->arch.fpr[ax_rd], &t); | ||
835 | /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ | ||
836 | cvt_df((double*)&vcpu->arch.fpr[ax_rb], | ||
837 | (float*)&vcpu->arch.qpr[ax_rd], &t); | ||
838 | break; | ||
839 | case OP_4X_PS_MERGE11: | ||
840 | WARN_ON(rcomp); | ||
841 | /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ | ||
842 | cvt_fd((float*)&vcpu->arch.qpr[ax_ra], | ||
843 | (double*)&vcpu->arch.fpr[ax_rd], &t); | ||
844 | vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; | ||
845 | break; | ||
846 | } | ||
847 | /* XW form */ | ||
848 | switch (inst_get_field(inst, 25, 30)) { | ||
849 | case OP_4XW_PSQ_STX: | ||
850 | { | ||
851 | ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; | ||
852 | bool w = inst_get_field(inst, 21, 21) ? true : false; | ||
853 | int i = inst_get_field(inst, 22, 24); | ||
854 | |||
855 | addr += kvmppc_get_gpr(vcpu, ax_rb); | ||
856 | emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); | ||
857 | break; | ||
858 | } | ||
859 | case OP_4XW_PSQ_STUX: | ||
860 | { | ||
861 | ulong addr = kvmppc_get_gpr(vcpu, ax_ra); | ||
862 | bool w = inst_get_field(inst, 21, 21) ? true : false; | ||
863 | int i = inst_get_field(inst, 22, 24); | ||
864 | |||
865 | addr += kvmppc_get_gpr(vcpu, ax_rb); | ||
866 | emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); | ||
867 | |||
868 | if (emulated == EMULATE_DONE) | ||
869 | kvmppc_set_gpr(vcpu, ax_ra, addr); | ||
870 | break; | ||
871 | } | ||
872 | } | ||
873 | /* A form */ | ||
874 | switch (inst_get_field(inst, 26, 30)) { | ||
875 | case OP_4A_PS_SUM1: | ||
876 | emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, | ||
877 | ax_rb, ax_ra, SCALAR_NO_PS0 | SCALAR_HIGH, fps_fadds); | ||
878 | vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rc]; | ||
879 | break; | ||
880 | case OP_4A_PS_SUM0: | ||
881 | emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, | ||
882 | ax_ra, ax_rb, SCALAR_NO_PS1 | SCALAR_LOW, fps_fadds); | ||
883 | vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rc]; | ||
884 | break; | ||
885 | case OP_4A_PS_MULS0: | ||
886 | emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, | ||
887 | ax_ra, ax_rc, SCALAR_HIGH, fps_fmuls); | ||
888 | break; | ||
889 | case OP_4A_PS_MULS1: | ||
890 | emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, | ||
891 | ax_ra, ax_rc, SCALAR_LOW, fps_fmuls); | ||
892 | break; | ||
893 | case OP_4A_PS_MADDS0: | ||
894 | emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, | ||
895 | ax_ra, ax_rc, ax_rb, SCALAR_HIGH, fps_fmadds); | ||
896 | break; | ||
897 | case OP_4A_PS_MADDS1: | ||
898 | emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, | ||
899 | ax_ra, ax_rc, ax_rb, SCALAR_LOW, fps_fmadds); | ||
900 | break; | ||
901 | case OP_4A_PS_DIV: | ||
902 | emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, | ||
903 | ax_ra, ax_rb, SCALAR_NONE, fps_fdivs); | ||
904 | break; | ||
905 | case OP_4A_PS_SUB: | ||
906 | emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, | ||
907 | ax_ra, ax_rb, SCALAR_NONE, fps_fsubs); | ||
908 | break; | ||
909 | case OP_4A_PS_ADD: | ||
910 | emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, | ||
911 | ax_ra, ax_rb, SCALAR_NONE, fps_fadds); | ||
912 | break; | ||
913 | case OP_4A_PS_SEL: | ||
914 | emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, | ||
915 | ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fsel); | ||
916 | break; | ||
917 | case OP_4A_PS_RES: | ||
918 | emulated = kvmppc_ps_one_in(vcpu, rcomp, ax_rd, | ||
919 | ax_rb, fps_fres); | ||
920 | break; | ||
921 | case OP_4A_PS_MUL: | ||
922 | emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, | ||
923 | ax_ra, ax_rc, SCALAR_NONE, fps_fmuls); | ||
924 | break; | ||
925 | case OP_4A_PS_RSQRTE: | ||
926 | emulated = kvmppc_ps_one_in(vcpu, rcomp, ax_rd, | ||
927 | ax_rb, fps_frsqrte); | ||
928 | break; | ||
929 | case OP_4A_PS_MSUB: | ||
930 | emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, | ||
931 | ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fmsubs); | ||
932 | break; | ||
933 | case OP_4A_PS_MADD: | ||
934 | emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, | ||
935 | ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fmadds); | ||
936 | break; | ||
937 | case OP_4A_PS_NMSUB: | ||
938 | emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, | ||
939 | ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fnmsubs); | ||
940 | break; | ||
941 | case OP_4A_PS_NMADD: | ||
942 | emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, | ||
943 | ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fnmadds); | ||
944 | break; | ||
945 | } | ||
946 | break; | ||
947 | |||
948 | /* Real FPU operations */ | ||
949 | |||
950 | case OP_LFS: | ||
951 | { | ||
952 | ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; | ||
953 | |||
954 | emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, | ||
955 | FPU_LS_SINGLE); | ||
956 | break; | ||
957 | } | ||
958 | case OP_LFSU: | ||
959 | { | ||
960 | ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; | ||
961 | |||
962 | emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, | ||
963 | FPU_LS_SINGLE); | ||
964 | |||
965 | if (emulated == EMULATE_DONE) | ||
966 | kvmppc_set_gpr(vcpu, ax_ra, addr); | ||
967 | break; | ||
968 | } | ||
969 | case OP_LFD: | ||
970 | { | ||
971 | ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; | ||
972 | |||
973 | emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, | ||
974 | FPU_LS_DOUBLE); | ||
975 | break; | ||
976 | } | ||
977 | case OP_LFDU: | ||
978 | { | ||
979 | ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; | ||
980 | |||
981 | emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, | ||
982 | FPU_LS_DOUBLE); | ||
983 | |||
984 | if (emulated == EMULATE_DONE) | ||
985 | kvmppc_set_gpr(vcpu, ax_ra, addr); | ||
986 | break; | ||
987 | } | ||
988 | case OP_STFS: | ||
989 | { | ||
990 | ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; | ||
991 | |||
992 | emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, | ||
993 | FPU_LS_SINGLE); | ||
994 | break; | ||
995 | } | ||
996 | case OP_STFSU: | ||
997 | { | ||
998 | ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; | ||
999 | |||
1000 | emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, | ||
1001 | FPU_LS_SINGLE); | ||
1002 | |||
1003 | if (emulated == EMULATE_DONE) | ||
1004 | kvmppc_set_gpr(vcpu, ax_ra, addr); | ||
1005 | break; | ||
1006 | } | ||
1007 | case OP_STFD: | ||
1008 | { | ||
1009 | ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; | ||
1010 | |||
1011 | emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, | ||
1012 | FPU_LS_DOUBLE); | ||
1013 | break; | ||
1014 | } | ||
1015 | case OP_STFDU: | ||
1016 | { | ||
1017 | ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; | ||
1018 | |||
1019 | emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, | ||
1020 | FPU_LS_DOUBLE); | ||
1021 | |||
1022 | if (emulated == EMULATE_DONE) | ||
1023 | kvmppc_set_gpr(vcpu, ax_ra, addr); | ||
1024 | break; | ||
1025 | } | ||
1026 | case 31: | ||
1027 | switch (inst_get_field(inst, 21, 30)) { | ||
1028 | case OP_31_LFSX: | ||
1029 | { | ||
1030 | ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; | ||
1031 | |||
1032 | addr += kvmppc_get_gpr(vcpu, ax_rb); | ||
1033 | emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, | ||
1034 | addr, FPU_LS_SINGLE); | ||
1035 | break; | ||
1036 | } | ||
1037 | case OP_31_LFSUX: | ||
1038 | { | ||
1039 | ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + | ||
1040 | kvmppc_get_gpr(vcpu, ax_rb); | ||
1041 | |||
1042 | emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, | ||
1043 | addr, FPU_LS_SINGLE); | ||
1044 | |||
1045 | if (emulated == EMULATE_DONE) | ||
1046 | kvmppc_set_gpr(vcpu, ax_ra, addr); | ||
1047 | break; | ||
1048 | } | ||
1049 | case OP_31_LFDX: | ||
1050 | { | ||
1051 | ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + | ||
1052 | kvmppc_get_gpr(vcpu, ax_rb); | ||
1053 | |||
1054 | emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, | ||
1055 | addr, FPU_LS_DOUBLE); | ||
1056 | break; | ||
1057 | } | ||
1058 | case OP_31_LFDUX: | ||
1059 | { | ||
1060 | ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + | ||
1061 | kvmppc_get_gpr(vcpu, ax_rb); | ||
1062 | |||
1063 | emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, | ||
1064 | addr, FPU_LS_DOUBLE); | ||
1065 | |||
1066 | if (emulated == EMULATE_DONE) | ||
1067 | kvmppc_set_gpr(vcpu, ax_ra, addr); | ||
1068 | break; | ||
1069 | } | ||
1070 | case OP_31_STFSX: | ||
1071 | { | ||
1072 | ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + | ||
1073 | kvmppc_get_gpr(vcpu, ax_rb); | ||
1074 | |||
1075 | emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, | ||
1076 | addr, FPU_LS_SINGLE); | ||
1077 | break; | ||
1078 | } | ||
1079 | case OP_31_STFSUX: | ||
1080 | { | ||
1081 | ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + | ||
1082 | kvmppc_get_gpr(vcpu, ax_rb); | ||
1083 | |||
1084 | emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, | ||
1085 | addr, FPU_LS_SINGLE); | ||
1086 | |||
1087 | if (emulated == EMULATE_DONE) | ||
1088 | kvmppc_set_gpr(vcpu, ax_ra, addr); | ||
1089 | break; | ||
1090 | } | ||
1091 | case OP_31_STFX: | ||
1092 | { | ||
1093 | ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + | ||
1094 | kvmppc_get_gpr(vcpu, ax_rb); | ||
1095 | |||
1096 | emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, | ||
1097 | addr, FPU_LS_DOUBLE); | ||
1098 | break; | ||
1099 | } | ||
1100 | case OP_31_STFUX: | ||
1101 | { | ||
1102 | ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + | ||
1103 | kvmppc_get_gpr(vcpu, ax_rb); | ||
1104 | |||
1105 | emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, | ||
1106 | addr, FPU_LS_DOUBLE); | ||
1107 | |||
1108 | if (emulated == EMULATE_DONE) | ||
1109 | kvmppc_set_gpr(vcpu, ax_ra, addr); | ||
1110 | break; | ||
1111 | } | ||
1112 | case OP_31_STFIWX: | ||
1113 | { | ||
1114 | ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + | ||
1115 | kvmppc_get_gpr(vcpu, ax_rb); | ||
1116 | |||
1117 | emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, | ||
1118 | addr, | ||
1119 | FPU_LS_SINGLE_LOW); | ||
1120 | break; | ||
1121 | } | ||
1122 | break; | ||
1123 | } | ||
1124 | break; | ||
1125 | case 59: | ||
1126 | switch (inst_get_field(inst, 21, 30)) { | ||
1127 | case OP_59_FADDS: | ||
1128 | fpd_fadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); | ||
1129 | kvmppc_sync_qpr(vcpu, ax_rd); | ||
1130 | break; | ||
1131 | case OP_59_FSUBS: | ||
1132 | fpd_fsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); | ||
1133 | kvmppc_sync_qpr(vcpu, ax_rd); | ||
1134 | break; | ||
1135 | case OP_59_FDIVS: | ||
1136 | fpd_fdivs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); | ||
1137 | kvmppc_sync_qpr(vcpu, ax_rd); | ||
1138 | break; | ||
1139 | case OP_59_FRES: | ||
1140 | fpd_fres(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); | ||
1141 | kvmppc_sync_qpr(vcpu, ax_rd); | ||
1142 | break; | ||
1143 | case OP_59_FRSQRTES: | ||
1144 | fpd_frsqrtes(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); | ||
1145 | kvmppc_sync_qpr(vcpu, ax_rd); | ||
1146 | break; | ||
1147 | } | ||
1148 | switch (inst_get_field(inst, 26, 30)) { | ||
1149 | case OP_59_FMULS: | ||
1150 | fpd_fmuls(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c); | ||
1151 | kvmppc_sync_qpr(vcpu, ax_rd); | ||
1152 | break; | ||
1153 | case OP_59_FMSUBS: | ||
1154 | fpd_fmsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); | ||
1155 | kvmppc_sync_qpr(vcpu, ax_rd); | ||
1156 | break; | ||
1157 | case OP_59_FMADDS: | ||
1158 | fpd_fmadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); | ||
1159 | kvmppc_sync_qpr(vcpu, ax_rd); | ||
1160 | break; | ||
1161 | case OP_59_FNMSUBS: | ||
1162 | fpd_fnmsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); | ||
1163 | kvmppc_sync_qpr(vcpu, ax_rd); | ||
1164 | break; | ||
1165 | case OP_59_FNMADDS: | ||
1166 | fpd_fnmadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); | ||
1167 | kvmppc_sync_qpr(vcpu, ax_rd); | ||
1168 | break; | ||
1169 | } | ||
1170 | break; | ||
1171 | case 63: | ||
1172 | switch (inst_get_field(inst, 21, 30)) { | ||
1173 | case OP_63_MTFSB0: | ||
1174 | case OP_63_MTFSB1: | ||
1175 | case OP_63_MCRFS: | ||
1176 | case OP_63_MTFSFI: | ||
1177 | /* XXX need to implement */ | ||
1178 | break; | ||
1179 | case OP_63_MFFS: | ||
1180 | /* XXX missing CR */ | ||
1181 | *fpr_d = vcpu->arch.fpscr; | ||
1182 | break; | ||
1183 | case OP_63_MTFSF: | ||
1184 | /* XXX missing fm bits */ | ||
1185 | /* XXX missing CR */ | ||
1186 | vcpu->arch.fpscr = *fpr_b; | ||
1187 | break; | ||
1188 | case OP_63_FCMPU: | ||
1189 | { | ||
1190 | u32 tmp_cr; | ||
1191 | u32 cr0_mask = 0xf0000000; | ||
1192 | u32 cr_shift = inst_get_field(inst, 6, 8) * 4; | ||
1193 | |||
1194 | fpd_fcmpu(&vcpu->arch.fpscr, &tmp_cr, fpr_a, fpr_b); | ||
1195 | cr &= ~(cr0_mask >> cr_shift); | ||
1196 | cr |= (cr & cr0_mask) >> cr_shift; | ||
1197 | break; | ||
1198 | } | ||
1199 | case OP_63_FCMPO: | ||
1200 | { | ||
1201 | u32 tmp_cr; | ||
1202 | u32 cr0_mask = 0xf0000000; | ||
1203 | u32 cr_shift = inst_get_field(inst, 6, 8) * 4; | ||
1204 | |||
1205 | fpd_fcmpo(&vcpu->arch.fpscr, &tmp_cr, fpr_a, fpr_b); | ||
1206 | cr &= ~(cr0_mask >> cr_shift); | ||
1207 | cr |= (cr & cr0_mask) >> cr_shift; | ||
1208 | break; | ||
1209 | } | ||
1210 | case OP_63_FNEG: | ||
1211 | fpd_fneg(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); | ||
1212 | break; | ||
1213 | case OP_63_FMR: | ||
1214 | *fpr_d = *fpr_b; | ||
1215 | break; | ||
1216 | case OP_63_FABS: | ||
1217 | fpd_fabs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); | ||
1218 | break; | ||
1219 | case OP_63_FCPSGN: | ||
1220 | fpd_fcpsgn(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); | ||
1221 | break; | ||
1222 | case OP_63_FDIV: | ||
1223 | fpd_fdiv(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); | ||
1224 | break; | ||
1225 | case OP_63_FADD: | ||
1226 | fpd_fadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); | ||
1227 | break; | ||
1228 | case OP_63_FSUB: | ||
1229 | fpd_fsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); | ||
1230 | break; | ||
1231 | case OP_63_FCTIW: | ||
1232 | fpd_fctiw(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); | ||
1233 | break; | ||
1234 | case OP_63_FCTIWZ: | ||
1235 | fpd_fctiwz(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); | ||
1236 | break; | ||
1237 | case OP_63_FRSP: | ||
1238 | fpd_frsp(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); | ||
1239 | kvmppc_sync_qpr(vcpu, ax_rd); | ||
1240 | break; | ||
1241 | case OP_63_FRSQRTE: | ||
1242 | { | ||
1243 | double one = 1.0f; | ||
1244 | |||
1245 | /* fD = sqrt(fB) */ | ||
1246 | fpd_fsqrt(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); | ||
1247 | /* fD = 1.0f / fD */ | ||
1248 | fpd_fdiv(&vcpu->arch.fpscr, &cr, fpr_d, (u64*)&one, fpr_d); | ||
1249 | break; | ||
1250 | } | ||
1251 | } | ||
1252 | switch (inst_get_field(inst, 26, 30)) { | ||
1253 | case OP_63_FMUL: | ||
1254 | fpd_fmul(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c); | ||
1255 | break; | ||
1256 | case OP_63_FSEL: | ||
1257 | fpd_fsel(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); | ||
1258 | break; | ||
1259 | case OP_63_FMSUB: | ||
1260 | fpd_fmsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); | ||
1261 | break; | ||
1262 | case OP_63_FMADD: | ||
1263 | fpd_fmadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); | ||
1264 | break; | ||
1265 | case OP_63_FNMSUB: | ||
1266 | fpd_fnmsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); | ||
1267 | break; | ||
1268 | case OP_63_FNMADD: | ||
1269 | fpd_fnmadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); | ||
1270 | break; | ||
1271 | } | ||
1272 | break; | ||
1273 | } | ||
1274 | |||
1275 | #ifdef DEBUG | ||
1276 | for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { | ||
1277 | u32 f; | ||
1278 | cvt_df((double*)&vcpu->arch.fpr[i], (float*)&f, &t); | ||
1279 | dprintk(KERN_INFO "FPR[%d] = 0x%x\n", i, f); | ||
1280 | } | ||
1281 | #endif | ||
1282 | |||
1283 | if (rcomp) | ||
1284 | kvmppc_set_cr(vcpu, cr); | ||
1285 | |||
1286 | preempt_enable(); | ||
1287 | |||
1288 | return emulated; | ||
1289 | } | ||
diff --git a/arch/powerpc/kvm/book3s_64_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index c83c60ad96c5..506d5c316c96 100644 --- a/arch/powerpc/kvm/book3s_64_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S | |||
@@ -22,7 +22,10 @@ | |||
22 | #include <asm/reg.h> | 22 | #include <asm/reg.h> |
23 | #include <asm/page.h> | 23 | #include <asm/page.h> |
24 | #include <asm/asm-offsets.h> | 24 | #include <asm/asm-offsets.h> |
25 | |||
26 | #ifdef CONFIG_PPC_BOOK3S_64 | ||
25 | #include <asm/exception-64s.h> | 27 | #include <asm/exception-64s.h> |
28 | #endif | ||
26 | 29 | ||
27 | /***************************************************************************** | 30 | /***************************************************************************** |
28 | * * | 31 | * * |
@@ -30,6 +33,39 @@ | |||
30 | * * | 33 | * * |
31 | ****************************************************************************/ | 34 | ****************************************************************************/ |
32 | 35 | ||
36 | #if defined(CONFIG_PPC_BOOK3S_64) | ||
37 | |||
38 | #define LOAD_SHADOW_VCPU(reg) \ | ||
39 | mfspr reg, SPRN_SPRG_PACA | ||
40 | |||
41 | #define SHADOW_VCPU_OFF PACA_KVM_SVCPU | ||
42 | #define MSR_NOIRQ MSR_KERNEL & ~(MSR_IR | MSR_DR) | ||
43 | #define FUNC(name) GLUE(.,name) | ||
44 | |||
45 | #elif defined(CONFIG_PPC_BOOK3S_32) | ||
46 | |||
47 | #define LOAD_SHADOW_VCPU(reg) \ | ||
48 | mfspr reg, SPRN_SPRG_THREAD; \ | ||
49 | lwz reg, THREAD_KVM_SVCPU(reg); \ | ||
50 | /* PPC32 can have a NULL pointer - let's check for that */ \ | ||
51 | mtspr SPRN_SPRG_SCRATCH1, r12; /* Save r12 */ \ | ||
52 | mfcr r12; \ | ||
53 | cmpwi reg, 0; \ | ||
54 | bne 1f; \ | ||
55 | mfspr reg, SPRN_SPRG_SCRATCH0; \ | ||
56 | mtcr r12; \ | ||
57 | mfspr r12, SPRN_SPRG_SCRATCH1; \ | ||
58 | b kvmppc_resume_\intno; \ | ||
59 | 1:; \ | ||
60 | mtcr r12; \ | ||
61 | mfspr r12, SPRN_SPRG_SCRATCH1; \ | ||
62 | tophys(reg, reg) | ||
63 | |||
64 | #define SHADOW_VCPU_OFF 0 | ||
65 | #define MSR_NOIRQ MSR_KERNEL | ||
66 | #define FUNC(name) name | ||
67 | |||
68 | #endif | ||
33 | 69 | ||
34 | .macro INTERRUPT_TRAMPOLINE intno | 70 | .macro INTERRUPT_TRAMPOLINE intno |
35 | 71 | ||
@@ -42,19 +78,19 @@ kvmppc_trampoline_\intno: | |||
42 | * First thing to do is to find out if we're coming | 78 | * First thing to do is to find out if we're coming |
43 | * from a KVM guest or a Linux process. | 79 | * from a KVM guest or a Linux process. |
44 | * | 80 | * |
45 | * To distinguish, we check a magic byte in the PACA | 81 | * To distinguish, we check a magic byte in the PACA/current |
46 | */ | 82 | */ |
47 | mfspr r13, SPRN_SPRG_PACA /* r13 = PACA */ | 83 | LOAD_SHADOW_VCPU(r13) |
48 | std r12, PACA_KVM_SCRATCH0(r13) | 84 | PPC_STL r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13) |
49 | mfcr r12 | 85 | mfcr r12 |
50 | stw r12, PACA_KVM_SCRATCH1(r13) | 86 | stw r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13) |
51 | lbz r12, PACA_KVM_IN_GUEST(r13) | 87 | lbz r12, (SHADOW_VCPU_OFF + SVCPU_IN_GUEST)(r13) |
52 | cmpwi r12, KVM_GUEST_MODE_NONE | 88 | cmpwi r12, KVM_GUEST_MODE_NONE |
53 | bne ..kvmppc_handler_hasmagic_\intno | 89 | bne ..kvmppc_handler_hasmagic_\intno |
54 | /* No KVM guest? Then jump back to the Linux handler! */ | 90 | /* No KVM guest? Then jump back to the Linux handler! */ |
55 | lwz r12, PACA_KVM_SCRATCH1(r13) | 91 | lwz r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13) |
56 | mtcr r12 | 92 | mtcr r12 |
57 | ld r12, PACA_KVM_SCRATCH0(r13) | 93 | PPC_LL r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13) |
58 | mfspr r13, SPRN_SPRG_SCRATCH0 /* r13 = original r13 */ | 94 | mfspr r13, SPRN_SPRG_SCRATCH0 /* r13 = original r13 */ |
59 | b kvmppc_resume_\intno /* Get back original handler */ | 95 | b kvmppc_resume_\intno /* Get back original handler */ |
60 | 96 | ||
@@ -76,9 +112,7 @@ kvmppc_trampoline_\intno: | |||
76 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_SYSTEM_RESET | 112 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_SYSTEM_RESET |
77 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_MACHINE_CHECK | 113 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_MACHINE_CHECK |
78 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_DATA_STORAGE | 114 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_DATA_STORAGE |
79 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_DATA_SEGMENT | ||
80 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_INST_STORAGE | 115 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_INST_STORAGE |
81 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_INST_SEGMENT | ||
82 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_EXTERNAL | 116 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_EXTERNAL |
83 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALIGNMENT | 117 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALIGNMENT |
84 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_PROGRAM | 118 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_PROGRAM |
@@ -88,7 +122,14 @@ INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_SYSCALL | |||
88 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_TRACE | 122 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_TRACE |
89 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_PERFMON | 123 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_PERFMON |
90 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALTIVEC | 124 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALTIVEC |
125 | |||
126 | /* Those are only available on 64 bit machines */ | ||
127 | |||
128 | #ifdef CONFIG_PPC_BOOK3S_64 | ||
129 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_DATA_SEGMENT | ||
130 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_INST_SEGMENT | ||
91 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_VSX | 131 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_VSX |
132 | #endif | ||
92 | 133 | ||
93 | /* | 134 | /* |
94 | * Bring us back to the faulting code, but skip the | 135 | * Bring us back to the faulting code, but skip the |
@@ -99,11 +140,11 @@ INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_VSX | |||
99 | * | 140 | * |
100 | * Input Registers: | 141 | * Input Registers: |
101 | * | 142 | * |
102 | * R12 = free | 143 | * R12 = free |
103 | * R13 = PACA | 144 | * R13 = Shadow VCPU (PACA) |
104 | * PACA.KVM.SCRATCH0 = guest R12 | 145 | * SVCPU.SCRATCH0 = guest R12 |
105 | * PACA.KVM.SCRATCH1 = guest CR | 146 | * SVCPU.SCRATCH1 = guest CR |
106 | * SPRG_SCRATCH0 = guest R13 | 147 | * SPRG_SCRATCH0 = guest R13 |
107 | * | 148 | * |
108 | */ | 149 | */ |
109 | kvmppc_handler_skip_ins: | 150 | kvmppc_handler_skip_ins: |
@@ -114,9 +155,9 @@ kvmppc_handler_skip_ins: | |||
114 | mtsrr0 r12 | 155 | mtsrr0 r12 |
115 | 156 | ||
116 | /* Clean up all state */ | 157 | /* Clean up all state */ |
117 | lwz r12, PACA_KVM_SCRATCH1(r13) | 158 | lwz r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13) |
118 | mtcr r12 | 159 | mtcr r12 |
119 | ld r12, PACA_KVM_SCRATCH0(r13) | 160 | PPC_LL r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13) |
120 | mfspr r13, SPRN_SPRG_SCRATCH0 | 161 | mfspr r13, SPRN_SPRG_SCRATCH0 |
121 | 162 | ||
122 | /* And get back into the code */ | 163 | /* And get back into the code */ |
@@ -147,41 +188,48 @@ kvmppc_handler_lowmem_trampoline_end: | |||
147 | * | 188 | * |
148 | * R3 = function | 189 | * R3 = function |
149 | * R4 = MSR | 190 | * R4 = MSR |
150 | * R5 = CTR | 191 | * R5 = scratch register |
151 | * | 192 | * |
152 | */ | 193 | */ |
153 | _GLOBAL(kvmppc_rmcall) | 194 | _GLOBAL(kvmppc_rmcall) |
154 | mtmsr r4 /* Disable relocation, so mtsrr | 195 | LOAD_REG_IMMEDIATE(r5, MSR_NOIRQ) |
196 | mtmsr r5 /* Disable relocation and interrupts, so mtsrr | ||
155 | doesn't get interrupted */ | 197 | doesn't get interrupted */ |
156 | mtctr r5 | 198 | sync |
157 | mtsrr0 r3 | 199 | mtsrr0 r3 |
158 | mtsrr1 r4 | 200 | mtsrr1 r4 |
159 | RFI | 201 | RFI |
160 | 202 | ||
203 | #if defined(CONFIG_PPC_BOOK3S_32) | ||
204 | #define STACK_LR INT_FRAME_SIZE+4 | ||
205 | #elif defined(CONFIG_PPC_BOOK3S_64) | ||
206 | #define STACK_LR _LINK | ||
207 | #endif | ||
208 | |||
161 | /* | 209 | /* |
162 | * Activate current's external feature (FPU/Altivec/VSX) | 210 | * Activate current's external feature (FPU/Altivec/VSX) |
163 | */ | 211 | */ |
164 | #define define_load_up(what) \ | 212 | #define define_load_up(what) \ |
165 | \ | 213 | \ |
166 | _GLOBAL(kvmppc_load_up_ ## what); \ | 214 | _GLOBAL(kvmppc_load_up_ ## what); \ |
167 | subi r1, r1, INT_FRAME_SIZE; \ | 215 | PPC_STLU r1, -INT_FRAME_SIZE(r1); \ |
168 | mflr r3; \ | 216 | mflr r3; \ |
169 | std r3, _LINK(r1); \ | 217 | PPC_STL r3, STACK_LR(r1); \ |
170 | mfmsr r4; \ | 218 | PPC_STL r20, _NIP(r1); \ |
171 | std r31, GPR3(r1); \ | 219 | mfmsr r20; \ |
172 | mr r31, r4; \ | 220 | LOAD_REG_IMMEDIATE(r3, MSR_DR|MSR_EE); \ |
173 | li r5, MSR_DR; \ | 221 | andc r3,r20,r3; /* Disable DR,EE */ \ |
174 | oris r5, r5, MSR_EE@h; \ | 222 | mtmsr r3; \ |
175 | andc r4, r4, r5; \ | 223 | sync; \ |
176 | mtmsr r4; \ | 224 | \ |
177 | \ | 225 | bl FUNC(load_up_ ## what); \ |
178 | bl .load_up_ ## what; \ | 226 | \ |
179 | \ | 227 | mtmsr r20; /* Enable DR,EE */ \ |
180 | mtmsr r31; \ | 228 | sync; \ |
181 | ld r3, _LINK(r1); \ | 229 | PPC_LL r3, STACK_LR(r1); \ |
182 | ld r31, GPR3(r1); \ | 230 | PPC_LL r20, _NIP(r1); \ |
183 | addi r1, r1, INT_FRAME_SIZE; \ | 231 | mtlr r3; \ |
184 | mtlr r3; \ | 232 | addi r1, r1, INT_FRAME_SIZE; \ |
185 | blr | 233 | blr |
186 | 234 | ||
187 | define_load_up(fpu) | 235 | define_load_up(fpu) |
@@ -194,11 +242,10 @@ define_load_up(vsx) | |||
194 | 242 | ||
195 | .global kvmppc_trampoline_lowmem | 243 | .global kvmppc_trampoline_lowmem |
196 | kvmppc_trampoline_lowmem: | 244 | kvmppc_trampoline_lowmem: |
197 | .long kvmppc_handler_lowmem_trampoline - _stext | 245 | .long kvmppc_handler_lowmem_trampoline - CONFIG_KERNEL_START |
198 | 246 | ||
199 | .global kvmppc_trampoline_enter | 247 | .global kvmppc_trampoline_enter |
200 | kvmppc_trampoline_enter: | 248 | kvmppc_trampoline_enter: |
201 | .long kvmppc_handler_trampoline_enter - _stext | 249 | .long kvmppc_handler_trampoline_enter - CONFIG_KERNEL_START |
202 | |||
203 | #include "book3s_64_slb.S" | ||
204 | 250 | ||
251 | #include "book3s_segment.S" | ||
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S new file mode 100644 index 000000000000..7c52ed0b7051 --- /dev/null +++ b/arch/powerpc/kvm/book3s_segment.S | |||
@@ -0,0 +1,259 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License, version 2, as | ||
4 | * published by the Free Software Foundation. | ||
5 | * | ||
6 | * This program is distributed in the hope that it will be useful, | ||
7 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
8 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
9 | * GNU General Public License for more details. | ||
10 | * | ||
11 | * You should have received a copy of the GNU General Public License | ||
12 | * along with this program; if not, write to the Free Software | ||
13 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
14 | * | ||
15 | * Copyright SUSE Linux Products GmbH 2010 | ||
16 | * | ||
17 | * Authors: Alexander Graf <agraf@suse.de> | ||
18 | */ | ||
19 | |||
20 | /* Real mode helpers */ | ||
21 | |||
22 | #if defined(CONFIG_PPC_BOOK3S_64) | ||
23 | |||
24 | #define GET_SHADOW_VCPU(reg) \ | ||
25 | addi reg, r13, PACA_KVM_SVCPU | ||
26 | |||
27 | #elif defined(CONFIG_PPC_BOOK3S_32) | ||
28 | |||
29 | #define GET_SHADOW_VCPU(reg) \ | ||
30 | tophys(reg, r2); \ | ||
31 | lwz reg, (THREAD + THREAD_KVM_SVCPU)(reg); \ | ||
32 | tophys(reg, reg) | ||
33 | |||
34 | #endif | ||
35 | |||
36 | /* Disable for nested KVM */ | ||
37 | #define USE_QUICK_LAST_INST | ||
38 | |||
39 | |||
40 | /* Get helper functions for subarch specific functionality */ | ||
41 | |||
42 | #if defined(CONFIG_PPC_BOOK3S_64) | ||
43 | #include "book3s_64_slb.S" | ||
44 | #elif defined(CONFIG_PPC_BOOK3S_32) | ||
45 | #include "book3s_32_sr.S" | ||
46 | #endif | ||
47 | |||
48 | /****************************************************************************** | ||
49 | * * | ||
50 | * Entry code * | ||
51 | * * | ||
52 | *****************************************************************************/ | ||
53 | |||
54 | .global kvmppc_handler_trampoline_enter | ||
55 | kvmppc_handler_trampoline_enter: | ||
56 | |||
57 | /* Required state: | ||
58 | * | ||
59 | * MSR = ~IR|DR | ||
60 | * R13 = PACA | ||
61 | * R1 = host R1 | ||
62 | * R2 = host R2 | ||
63 | * R10 = guest MSR | ||
64 | * all other volatile GPRS = free | ||
65 | * SVCPU[CR] = guest CR | ||
66 | * SVCPU[XER] = guest XER | ||
67 | * SVCPU[CTR] = guest CTR | ||
68 | * SVCPU[LR] = guest LR | ||
69 | */ | ||
70 | |||
71 | /* r3 = shadow vcpu */ | ||
72 | GET_SHADOW_VCPU(r3) | ||
73 | |||
74 | /* Move SRR0 and SRR1 into the respective regs */ | ||
75 | PPC_LL r9, SVCPU_PC(r3) | ||
76 | mtsrr0 r9 | ||
77 | mtsrr1 r10 | ||
78 | |||
79 | /* Activate guest mode, so faults get handled by KVM */ | ||
80 | li r11, KVM_GUEST_MODE_GUEST | ||
81 | stb r11, SVCPU_IN_GUEST(r3) | ||
82 | |||
83 | /* Switch to guest segment. This is subarch specific. */ | ||
84 | LOAD_GUEST_SEGMENTS | ||
85 | |||
86 | /* Enter guest */ | ||
87 | |||
88 | PPC_LL r4, (SVCPU_CTR)(r3) | ||
89 | PPC_LL r5, (SVCPU_LR)(r3) | ||
90 | lwz r6, (SVCPU_CR)(r3) | ||
91 | lwz r7, (SVCPU_XER)(r3) | ||
92 | |||
93 | mtctr r4 | ||
94 | mtlr r5 | ||
95 | mtcr r6 | ||
96 | mtxer r7 | ||
97 | |||
98 | PPC_LL r0, (SVCPU_R0)(r3) | ||
99 | PPC_LL r1, (SVCPU_R1)(r3) | ||
100 | PPC_LL r2, (SVCPU_R2)(r3) | ||
101 | PPC_LL r4, (SVCPU_R4)(r3) | ||
102 | PPC_LL r5, (SVCPU_R5)(r3) | ||
103 | PPC_LL r6, (SVCPU_R6)(r3) | ||
104 | PPC_LL r7, (SVCPU_R7)(r3) | ||
105 | PPC_LL r8, (SVCPU_R8)(r3) | ||
106 | PPC_LL r9, (SVCPU_R9)(r3) | ||
107 | PPC_LL r10, (SVCPU_R10)(r3) | ||
108 | PPC_LL r11, (SVCPU_R11)(r3) | ||
109 | PPC_LL r12, (SVCPU_R12)(r3) | ||
110 | PPC_LL r13, (SVCPU_R13)(r3) | ||
111 | |||
112 | PPC_LL r3, (SVCPU_R3)(r3) | ||
113 | |||
114 | RFI | ||
115 | kvmppc_handler_trampoline_enter_end: | ||
116 | |||
117 | |||
118 | |||
119 | /****************************************************************************** | ||
120 | * * | ||
121 | * Exit code * | ||
122 | * * | ||
123 | *****************************************************************************/ | ||
124 | |||
125 | .global kvmppc_handler_trampoline_exit | ||
126 | kvmppc_handler_trampoline_exit: | ||
127 | |||
128 | /* Register usage at this point: | ||
129 | * | ||
130 | * SPRG_SCRATCH0 = guest R13 | ||
131 | * R12 = exit handler id | ||
132 | * R13 = shadow vcpu - SHADOW_VCPU_OFF [=PACA on PPC64] | ||
133 | * SVCPU.SCRATCH0 = guest R12 | ||
134 | * SVCPU.SCRATCH1 = guest CR | ||
135 | * | ||
136 | */ | ||
137 | |||
138 | /* Save registers */ | ||
139 | |||
140 | PPC_STL r0, (SHADOW_VCPU_OFF + SVCPU_R0)(r13) | ||
141 | PPC_STL r1, (SHADOW_VCPU_OFF + SVCPU_R1)(r13) | ||
142 | PPC_STL r2, (SHADOW_VCPU_OFF + SVCPU_R2)(r13) | ||
143 | PPC_STL r3, (SHADOW_VCPU_OFF + SVCPU_R3)(r13) | ||
144 | PPC_STL r4, (SHADOW_VCPU_OFF + SVCPU_R4)(r13) | ||
145 | PPC_STL r5, (SHADOW_VCPU_OFF + SVCPU_R5)(r13) | ||
146 | PPC_STL r6, (SHADOW_VCPU_OFF + SVCPU_R6)(r13) | ||
147 | PPC_STL r7, (SHADOW_VCPU_OFF + SVCPU_R7)(r13) | ||
148 | PPC_STL r8, (SHADOW_VCPU_OFF + SVCPU_R8)(r13) | ||
149 | PPC_STL r9, (SHADOW_VCPU_OFF + SVCPU_R9)(r13) | ||
150 | PPC_STL r10, (SHADOW_VCPU_OFF + SVCPU_R10)(r13) | ||
151 | PPC_STL r11, (SHADOW_VCPU_OFF + SVCPU_R11)(r13) | ||
152 | |||
153 | /* Restore R1/R2 so we can handle faults */ | ||
154 | PPC_LL r1, (SHADOW_VCPU_OFF + SVCPU_HOST_R1)(r13) | ||
155 | PPC_LL r2, (SHADOW_VCPU_OFF + SVCPU_HOST_R2)(r13) | ||
156 | |||
157 | /* Save guest PC and MSR */ | ||
158 | mfsrr0 r3 | ||
159 | mfsrr1 r4 | ||
160 | |||
161 | PPC_STL r3, (SHADOW_VCPU_OFF + SVCPU_PC)(r13) | ||
162 | PPC_STL r4, (SHADOW_VCPU_OFF + SVCPU_SHADOW_SRR1)(r13) | ||
163 | |||
164 | /* Get scratch'ed off registers */ | ||
165 | mfspr r9, SPRN_SPRG_SCRATCH0 | ||
166 | PPC_LL r8, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13) | ||
167 | lwz r7, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13) | ||
168 | |||
169 | PPC_STL r9, (SHADOW_VCPU_OFF + SVCPU_R13)(r13) | ||
170 | PPC_STL r8, (SHADOW_VCPU_OFF + SVCPU_R12)(r13) | ||
171 | stw r7, (SHADOW_VCPU_OFF + SVCPU_CR)(r13) | ||
172 | |||
173 | /* Save more register state */ | ||
174 | |||
175 | mfxer r5 | ||
176 | mfdar r6 | ||
177 | mfdsisr r7 | ||
178 | mfctr r8 | ||
179 | mflr r9 | ||
180 | |||
181 | stw r5, (SHADOW_VCPU_OFF + SVCPU_XER)(r13) | ||
182 | PPC_STL r6, (SHADOW_VCPU_OFF + SVCPU_FAULT_DAR)(r13) | ||
183 | stw r7, (SHADOW_VCPU_OFF + SVCPU_FAULT_DSISR)(r13) | ||
184 | PPC_STL r8, (SHADOW_VCPU_OFF + SVCPU_CTR)(r13) | ||
185 | PPC_STL r9, (SHADOW_VCPU_OFF + SVCPU_LR)(r13) | ||
186 | |||
187 | /* | ||
188 | * In order for us to easily get the last instruction, | ||
189 | * we got the #vmexit at, we exploit the fact that the | ||
190 | * virtual layout is still the same here, so we can just | ||
191 | * ld from the guest's PC address | ||
192 | */ | ||
193 | |||
194 | /* We only load the last instruction when it's safe */ | ||
195 | cmpwi r12, BOOK3S_INTERRUPT_DATA_STORAGE | ||
196 | beq ld_last_inst | ||
197 | cmpwi r12, BOOK3S_INTERRUPT_PROGRAM | ||
198 | beq ld_last_inst | ||
199 | cmpwi r12, BOOK3S_INTERRUPT_ALIGNMENT | ||
200 | beq- ld_last_inst | ||
201 | |||
202 | b no_ld_last_inst | ||
203 | |||
204 | ld_last_inst: | ||
205 | /* Save off the guest instruction we're at */ | ||
206 | |||
207 | /* In case lwz faults */ | ||
208 | li r0, KVM_INST_FETCH_FAILED | ||
209 | |||
210 | #ifdef USE_QUICK_LAST_INST | ||
211 | |||
212 | /* Set guest mode to 'jump over instruction' so if lwz faults | ||
213 | * we'll just continue at the next IP. */ | ||
214 | li r9, KVM_GUEST_MODE_SKIP | ||
215 | stb r9, (SHADOW_VCPU_OFF + SVCPU_IN_GUEST)(r13) | ||
216 | |||
217 | /* 1) enable paging for data */ | ||
218 | mfmsr r9 | ||
219 | ori r11, r9, MSR_DR /* Enable paging for data */ | ||
220 | mtmsr r11 | ||
221 | sync | ||
222 | /* 2) fetch the instruction */ | ||
223 | lwz r0, 0(r3) | ||
224 | /* 3) disable paging again */ | ||
225 | mtmsr r9 | ||
226 | sync | ||
227 | |||
228 | #endif | ||
229 | stw r0, (SHADOW_VCPU_OFF + SVCPU_LAST_INST)(r13) | ||
230 | |||
231 | no_ld_last_inst: | ||
232 | |||
233 | /* Unset guest mode */ | ||
234 | li r9, KVM_GUEST_MODE_NONE | ||
235 | stb r9, (SHADOW_VCPU_OFF + SVCPU_IN_GUEST)(r13) | ||
236 | |||
237 | /* Switch back to host MMU */ | ||
238 | LOAD_HOST_SEGMENTS | ||
239 | |||
240 | /* Register usage at this point: | ||
241 | * | ||
242 | * R1 = host R1 | ||
243 | * R2 = host R2 | ||
244 | * R12 = exit handler id | ||
245 | * R13 = shadow vcpu - SHADOW_VCPU_OFF [=PACA on PPC64] | ||
246 | * SVCPU.* = guest * | ||
247 | * | ||
248 | */ | ||
249 | |||
250 | /* RFI into the highmem handler */ | ||
251 | mfmsr r7 | ||
252 | ori r7, r7, MSR_IR|MSR_DR|MSR_RI|MSR_ME /* Enable paging */ | ||
253 | mtsrr1 r7 | ||
254 | /* Load highmem handler address */ | ||
255 | PPC_LL r8, (SHADOW_VCPU_OFF + SVCPU_VMHANDLER)(r13) | ||
256 | mtsrr0 r8 | ||
257 | |||
258 | RFI | ||
259 | kvmppc_handler_trampoline_exit_end: | ||
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 2a3a1953d4bd..a33ab8cc2ccc 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c | |||
@@ -133,6 +133,12 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, | |||
133 | kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL); | 133 | kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL); |
134 | } | 134 | } |
135 | 135 | ||
136 | void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, | ||
137 | struct kvm_interrupt *irq) | ||
138 | { | ||
139 | clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions); | ||
140 | } | ||
141 | |||
136 | /* Deliver the interrupt of the corresponding priority, if possible. */ | 142 | /* Deliver the interrupt of the corresponding priority, if possible. */ |
137 | static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, | 143 | static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, |
138 | unsigned int priority) | 144 | unsigned int priority) |
@@ -479,6 +485,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
479 | { | 485 | { |
480 | int i; | 486 | int i; |
481 | 487 | ||
488 | vcpu_load(vcpu); | ||
489 | |||
482 | regs->pc = vcpu->arch.pc; | 490 | regs->pc = vcpu->arch.pc; |
483 | regs->cr = kvmppc_get_cr(vcpu); | 491 | regs->cr = kvmppc_get_cr(vcpu); |
484 | regs->ctr = vcpu->arch.ctr; | 492 | regs->ctr = vcpu->arch.ctr; |
@@ -499,6 +507,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
499 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) | 507 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) |
500 | regs->gpr[i] = kvmppc_get_gpr(vcpu, i); | 508 | regs->gpr[i] = kvmppc_get_gpr(vcpu, i); |
501 | 509 | ||
510 | vcpu_put(vcpu); | ||
511 | |||
502 | return 0; | 512 | return 0; |
503 | } | 513 | } |
504 | 514 | ||
@@ -506,6 +516,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
506 | { | 516 | { |
507 | int i; | 517 | int i; |
508 | 518 | ||
519 | vcpu_load(vcpu); | ||
520 | |||
509 | vcpu->arch.pc = regs->pc; | 521 | vcpu->arch.pc = regs->pc; |
510 | kvmppc_set_cr(vcpu, regs->cr); | 522 | kvmppc_set_cr(vcpu, regs->cr); |
511 | vcpu->arch.ctr = regs->ctr; | 523 | vcpu->arch.ctr = regs->ctr; |
@@ -525,6 +537,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
525 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) | 537 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) |
526 | kvmppc_set_gpr(vcpu, i, regs->gpr[i]); | 538 | kvmppc_set_gpr(vcpu, i, regs->gpr[i]); |
527 | 539 | ||
540 | vcpu_put(vcpu); | ||
541 | |||
528 | return 0; | 542 | return 0; |
529 | } | 543 | } |
530 | 544 | ||
@@ -553,7 +567,12 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | |||
553 | int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | 567 | int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, |
554 | struct kvm_translation *tr) | 568 | struct kvm_translation *tr) |
555 | { | 569 | { |
556 | return kvmppc_core_vcpu_translate(vcpu, tr); | 570 | int r; |
571 | |||
572 | vcpu_load(vcpu); | ||
573 | r = kvmppc_core_vcpu_translate(vcpu, tr); | ||
574 | vcpu_put(vcpu); | ||
575 | return r; | ||
557 | } | 576 | } |
558 | 577 | ||
559 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) | 578 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) |
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 669a5c5fc7d7..bc2b4004eb26 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c | |||
@@ -161,7 +161,7 @@ static int __init kvmppc_e500_init(void) | |||
161 | flush_icache_range(kvmppc_booke_handlers, | 161 | flush_icache_range(kvmppc_booke_handlers, |
162 | kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); | 162 | kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); |
163 | 163 | ||
164 | return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), THIS_MODULE); | 164 | return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); |
165 | } | 165 | } |
166 | 166 | ||
167 | static void __init kvmppc_e500_exit(void) | 167 | static void __init kvmppc_e500_exit(void) |
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index cb72a65f4ecc..4568ec386c2a 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c | |||
@@ -38,10 +38,12 @@ | |||
38 | #define OP_31_XOP_LBZX 87 | 38 | #define OP_31_XOP_LBZX 87 |
39 | #define OP_31_XOP_STWX 151 | 39 | #define OP_31_XOP_STWX 151 |
40 | #define OP_31_XOP_STBX 215 | 40 | #define OP_31_XOP_STBX 215 |
41 | #define OP_31_XOP_LBZUX 119 | ||
41 | #define OP_31_XOP_STBUX 247 | 42 | #define OP_31_XOP_STBUX 247 |
42 | #define OP_31_XOP_LHZX 279 | 43 | #define OP_31_XOP_LHZX 279 |
43 | #define OP_31_XOP_LHZUX 311 | 44 | #define OP_31_XOP_LHZUX 311 |
44 | #define OP_31_XOP_MFSPR 339 | 45 | #define OP_31_XOP_MFSPR 339 |
46 | #define OP_31_XOP_LHAX 343 | ||
45 | #define OP_31_XOP_STHX 407 | 47 | #define OP_31_XOP_STHX 407 |
46 | #define OP_31_XOP_STHUX 439 | 48 | #define OP_31_XOP_STHUX 439 |
47 | #define OP_31_XOP_MTSPR 467 | 49 | #define OP_31_XOP_MTSPR 467 |
@@ -62,10 +64,12 @@ | |||
62 | #define OP_STBU 39 | 64 | #define OP_STBU 39 |
63 | #define OP_LHZ 40 | 65 | #define OP_LHZ 40 |
64 | #define OP_LHZU 41 | 66 | #define OP_LHZU 41 |
67 | #define OP_LHA 42 | ||
68 | #define OP_LHAU 43 | ||
65 | #define OP_STH 44 | 69 | #define OP_STH 44 |
66 | #define OP_STHU 45 | 70 | #define OP_STHU 45 |
67 | 71 | ||
68 | #ifdef CONFIG_PPC64 | 72 | #ifdef CONFIG_PPC_BOOK3S |
69 | static int kvmppc_dec_enabled(struct kvm_vcpu *vcpu) | 73 | static int kvmppc_dec_enabled(struct kvm_vcpu *vcpu) |
70 | { | 74 | { |
71 | return 1; | 75 | return 1; |
@@ -82,7 +86,7 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) | |||
82 | unsigned long dec_nsec; | 86 | unsigned long dec_nsec; |
83 | 87 | ||
84 | pr_debug("mtDEC: %x\n", vcpu->arch.dec); | 88 | pr_debug("mtDEC: %x\n", vcpu->arch.dec); |
85 | #ifdef CONFIG_PPC64 | 89 | #ifdef CONFIG_PPC_BOOK3S |
86 | /* mtdec lowers the interrupt line when positive. */ | 90 | /* mtdec lowers the interrupt line when positive. */ |
87 | kvmppc_core_dequeue_dec(vcpu); | 91 | kvmppc_core_dequeue_dec(vcpu); |
88 | 92 | ||
@@ -128,7 +132,7 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) | |||
128 | * from opcode tables in the future. */ | 132 | * from opcode tables in the future. */ |
129 | int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | 133 | int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) |
130 | { | 134 | { |
131 | u32 inst = vcpu->arch.last_inst; | 135 | u32 inst = kvmppc_get_last_inst(vcpu); |
132 | u32 ea; | 136 | u32 ea; |
133 | int ra; | 137 | int ra; |
134 | int rb; | 138 | int rb; |
@@ -143,13 +147,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
143 | 147 | ||
144 | pr_debug(KERN_INFO "Emulating opcode %d / %d\n", get_op(inst), get_xop(inst)); | 148 | pr_debug(KERN_INFO "Emulating opcode %d / %d\n", get_op(inst), get_xop(inst)); |
145 | 149 | ||
146 | /* Try again next time */ | ||
147 | if (inst == KVM_INST_FETCH_FAILED) | ||
148 | return EMULATE_DONE; | ||
149 | |||
150 | switch (get_op(inst)) { | 150 | switch (get_op(inst)) { |
151 | case OP_TRAP: | 151 | case OP_TRAP: |
152 | #ifdef CONFIG_PPC64 | 152 | #ifdef CONFIG_PPC_BOOK3S |
153 | case OP_TRAP_64: | 153 | case OP_TRAP_64: |
154 | kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP); | 154 | kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP); |
155 | #else | 155 | #else |
@@ -171,6 +171,19 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
171 | emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); | 171 | emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); |
172 | break; | 172 | break; |
173 | 173 | ||
174 | case OP_31_XOP_LBZUX: | ||
175 | rt = get_rt(inst); | ||
176 | ra = get_ra(inst); | ||
177 | rb = get_rb(inst); | ||
178 | |||
179 | ea = kvmppc_get_gpr(vcpu, rb); | ||
180 | if (ra) | ||
181 | ea += kvmppc_get_gpr(vcpu, ra); | ||
182 | |||
183 | emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); | ||
184 | kvmppc_set_gpr(vcpu, ra, ea); | ||
185 | break; | ||
186 | |||
174 | case OP_31_XOP_STWX: | 187 | case OP_31_XOP_STWX: |
175 | rs = get_rs(inst); | 188 | rs = get_rs(inst); |
176 | emulated = kvmppc_handle_store(run, vcpu, | 189 | emulated = kvmppc_handle_store(run, vcpu, |
@@ -200,6 +213,11 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
200 | kvmppc_set_gpr(vcpu, rs, ea); | 213 | kvmppc_set_gpr(vcpu, rs, ea); |
201 | break; | 214 | break; |
202 | 215 | ||
216 | case OP_31_XOP_LHAX: | ||
217 | rt = get_rt(inst); | ||
218 | emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); | ||
219 | break; | ||
220 | |||
203 | case OP_31_XOP_LHZX: | 221 | case OP_31_XOP_LHZX: |
204 | rt = get_rt(inst); | 222 | rt = get_rt(inst); |
205 | emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); | 223 | emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); |
@@ -450,6 +468,18 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
450 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); | 468 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); |
451 | break; | 469 | break; |
452 | 470 | ||
471 | case OP_LHA: | ||
472 | rt = get_rt(inst); | ||
473 | emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); | ||
474 | break; | ||
475 | |||
476 | case OP_LHAU: | ||
477 | ra = get_ra(inst); | ||
478 | rt = get_rt(inst); | ||
479 | emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); | ||
480 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); | ||
481 | break; | ||
482 | |||
453 | case OP_STH: | 483 | case OP_STH: |
454 | rs = get_rs(inst); | 484 | rs = get_rs(inst); |
455 | emulated = kvmppc_handle_store(run, vcpu, | 485 | emulated = kvmppc_handle_store(run, vcpu, |
@@ -472,7 +502,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
472 | 502 | ||
473 | if (emulated == EMULATE_FAIL) { | 503 | if (emulated == EMULATE_FAIL) { |
474 | emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance); | 504 | emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance); |
475 | if (emulated == EMULATE_FAIL) { | 505 | if (emulated == EMULATE_AGAIN) { |
506 | advance = 0; | ||
507 | } else if (emulated == EMULATE_FAIL) { | ||
476 | advance = 0; | 508 | advance = 0; |
477 | printk(KERN_ERR "Couldn't emulate instruction 0x%08x " | 509 | printk(KERN_ERR "Couldn't emulate instruction 0x%08x " |
478 | "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst)); | 510 | "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst)); |
@@ -480,10 +512,11 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
480 | } | 512 | } |
481 | } | 513 | } |
482 | 514 | ||
483 | trace_kvm_ppc_instr(inst, vcpu->arch.pc, emulated); | 515 | trace_kvm_ppc_instr(inst, kvmppc_get_pc(vcpu), emulated); |
484 | 516 | ||
517 | /* Advance past emulated instruction. */ | ||
485 | if (advance) | 518 | if (advance) |
486 | vcpu->arch.pc += 4; /* Advance past emulated instruction. */ | 519 | kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4); |
487 | 520 | ||
488 | return emulated; | 521 | return emulated; |
489 | } | 522 | } |
diff --git a/arch/powerpc/kvm/fpu.S b/arch/powerpc/kvm/fpu.S new file mode 100644 index 000000000000..2b340a3eee90 --- /dev/null +++ b/arch/powerpc/kvm/fpu.S | |||
@@ -0,0 +1,273 @@ | |||
1 | /* | ||
2 | * FPU helper code to use FPU operations from inside the kernel | ||
3 | * | ||
4 | * Copyright (C) 2010 Alexander Graf (agraf@suse.de) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <asm/reg.h> | ||
14 | #include <asm/page.h> | ||
15 | #include <asm/mmu.h> | ||
16 | #include <asm/pgtable.h> | ||
17 | #include <asm/cputable.h> | ||
18 | #include <asm/cache.h> | ||
19 | #include <asm/thread_info.h> | ||
20 | #include <asm/ppc_asm.h> | ||
21 | #include <asm/asm-offsets.h> | ||
22 | |||
23 | /* Instructions operating on single parameters */ | ||
24 | |||
25 | /* | ||
26 | * Single operation with one input operand | ||
27 | * | ||
28 | * R3 = (double*)&fpscr | ||
29 | * R4 = (short*)&result | ||
30 | * R5 = (short*)¶m1 | ||
31 | */ | ||
32 | #define FPS_ONE_IN(name) \ | ||
33 | _GLOBAL(fps_ ## name); \ | ||
34 | lfd 0,0(r3); /* load up fpscr value */ \ | ||
35 | MTFSF_L(0); \ | ||
36 | lfs 0,0(r5); \ | ||
37 | \ | ||
38 | name 0,0; \ | ||
39 | \ | ||
40 | stfs 0,0(r4); \ | ||
41 | mffs 0; \ | ||
42 | stfd 0,0(r3); /* save new fpscr value */ \ | ||
43 | blr | ||
44 | |||
45 | /* | ||
46 | * Single operation with two input operands | ||
47 | * | ||
48 | * R3 = (double*)&fpscr | ||
49 | * R4 = (short*)&result | ||
50 | * R5 = (short*)¶m1 | ||
51 | * R6 = (short*)¶m2 | ||
52 | */ | ||
53 | #define FPS_TWO_IN(name) \ | ||
54 | _GLOBAL(fps_ ## name); \ | ||
55 | lfd 0,0(r3); /* load up fpscr value */ \ | ||
56 | MTFSF_L(0); \ | ||
57 | lfs 0,0(r5); \ | ||
58 | lfs 1,0(r6); \ | ||
59 | \ | ||
60 | name 0,0,1; \ | ||
61 | \ | ||
62 | stfs 0,0(r4); \ | ||
63 | mffs 0; \ | ||
64 | stfd 0,0(r3); /* save new fpscr value */ \ | ||
65 | blr | ||
66 | |||
67 | /* | ||
68 | * Single operation with three input operands | ||
69 | * | ||
70 | * R3 = (double*)&fpscr | ||
71 | * R4 = (short*)&result | ||
72 | * R5 = (short*)¶m1 | ||
73 | * R6 = (short*)¶m2 | ||
74 | * R7 = (short*)¶m3 | ||
75 | */ | ||
76 | #define FPS_THREE_IN(name) \ | ||
77 | _GLOBAL(fps_ ## name); \ | ||
78 | lfd 0,0(r3); /* load up fpscr value */ \ | ||
79 | MTFSF_L(0); \ | ||
80 | lfs 0,0(r5); \ | ||
81 | lfs 1,0(r6); \ | ||
82 | lfs 2,0(r7); \ | ||
83 | \ | ||
84 | name 0,0,1,2; \ | ||
85 | \ | ||
86 | stfs 0,0(r4); \ | ||
87 | mffs 0; \ | ||
88 | stfd 0,0(r3); /* save new fpscr value */ \ | ||
89 | blr | ||
90 | |||
91 | FPS_ONE_IN(fres) | ||
92 | FPS_ONE_IN(frsqrte) | ||
93 | FPS_ONE_IN(fsqrts) | ||
94 | FPS_TWO_IN(fadds) | ||
95 | FPS_TWO_IN(fdivs) | ||
96 | FPS_TWO_IN(fmuls) | ||
97 | FPS_TWO_IN(fsubs) | ||
98 | FPS_THREE_IN(fmadds) | ||
99 | FPS_THREE_IN(fmsubs) | ||
100 | FPS_THREE_IN(fnmadds) | ||
101 | FPS_THREE_IN(fnmsubs) | ||
102 | FPS_THREE_IN(fsel) | ||
103 | |||
104 | |||
105 | /* Instructions operating on double parameters */ | ||
106 | |||
107 | /* | ||
108 | * Beginning of double instruction processing | ||
109 | * | ||
110 | * R3 = (double*)&fpscr | ||
111 | * R4 = (u32*)&cr | ||
112 | * R5 = (double*)&result | ||
113 | * R6 = (double*)¶m1 | ||
114 | * R7 = (double*)¶m2 [load_two] | ||
115 | * R8 = (double*)¶m3 [load_three] | ||
116 | * LR = instruction call function | ||
117 | */ | ||
118 | fpd_load_three: | ||
119 | lfd 2,0(r8) /* load param3 */ | ||
120 | fpd_load_two: | ||
121 | lfd 1,0(r7) /* load param2 */ | ||
122 | fpd_load_one: | ||
123 | lfd 0,0(r6) /* load param1 */ | ||
124 | fpd_load_none: | ||
125 | lfd 3,0(r3) /* load up fpscr value */ | ||
126 | MTFSF_L(3) | ||
127 | lwz r6, 0(r4) /* load cr */ | ||
128 | mtcr r6 | ||
129 | blr | ||
130 | |||
131 | /* | ||
132 | * End of double instruction processing | ||
133 | * | ||
134 | * R3 = (double*)&fpscr | ||
135 | * R4 = (u32*)&cr | ||
136 | * R5 = (double*)&result | ||
137 | * LR = caller of instruction call function | ||
138 | */ | ||
139 | fpd_return: | ||
140 | mfcr r6 | ||
141 | stfd 0,0(r5) /* save result */ | ||
142 | mffs 0 | ||
143 | stfd 0,0(r3) /* save new fpscr value */ | ||
144 | stw r6,0(r4) /* save new cr value */ | ||
145 | blr | ||
146 | |||
147 | /* | ||
148 | * Double operation with no input operand | ||
149 | * | ||
150 | * R3 = (double*)&fpscr | ||
151 | * R4 = (u32*)&cr | ||
152 | * R5 = (double*)&result | ||
153 | */ | ||
154 | #define FPD_NONE_IN(name) \ | ||
155 | _GLOBAL(fpd_ ## name); \ | ||
156 | mflr r12; \ | ||
157 | bl fpd_load_none; \ | ||
158 | mtlr r12; \ | ||
159 | \ | ||
160 | name. 0; /* call instruction */ \ | ||
161 | b fpd_return | ||
162 | |||
163 | /* | ||
164 | * Double operation with one input operand | ||
165 | * | ||
166 | * R3 = (double*)&fpscr | ||
167 | * R4 = (u32*)&cr | ||
168 | * R5 = (double*)&result | ||
169 | * R6 = (double*)¶m1 | ||
170 | */ | ||
171 | #define FPD_ONE_IN(name) \ | ||
172 | _GLOBAL(fpd_ ## name); \ | ||
173 | mflr r12; \ | ||
174 | bl fpd_load_one; \ | ||
175 | mtlr r12; \ | ||
176 | \ | ||
177 | name. 0,0; /* call instruction */ \ | ||
178 | b fpd_return | ||
179 | |||
180 | /* | ||
181 | * Double operation with two input operands | ||
182 | * | ||
183 | * R3 = (double*)&fpscr | ||
184 | * R4 = (u32*)&cr | ||
185 | * R5 = (double*)&result | ||
186 | * R6 = (double*)¶m1 | ||
187 | * R7 = (double*)¶m2 | ||
188 | * R8 = (double*)¶m3 | ||
189 | */ | ||
190 | #define FPD_TWO_IN(name) \ | ||
191 | _GLOBAL(fpd_ ## name); \ | ||
192 | mflr r12; \ | ||
193 | bl fpd_load_two; \ | ||
194 | mtlr r12; \ | ||
195 | \ | ||
196 | name. 0,0,1; /* call instruction */ \ | ||
197 | b fpd_return | ||
198 | |||
199 | /* | ||
200 | * CR Double operation with two input operands | ||
201 | * | ||
202 | * R3 = (double*)&fpscr | ||
203 | * R4 = (u32*)&cr | ||
204 | * R5 = (double*)¶m1 | ||
205 | * R6 = (double*)¶m2 | ||
206 | * R7 = (double*)¶m3 | ||
207 | */ | ||
208 | #define FPD_TWO_IN_CR(name) \ | ||
209 | _GLOBAL(fpd_ ## name); \ | ||
210 | lfd 1,0(r6); /* load param2 */ \ | ||
211 | lfd 0,0(r5); /* load param1 */ \ | ||
212 | lfd 3,0(r3); /* load up fpscr value */ \ | ||
213 | MTFSF_L(3); \ | ||
214 | lwz r6, 0(r4); /* load cr */ \ | ||
215 | mtcr r6; \ | ||
216 | \ | ||
217 | name 0,0,1; /* call instruction */ \ | ||
218 | mfcr r6; \ | ||
219 | mffs 0; \ | ||
220 | stfd 0,0(r3); /* save new fpscr value */ \ | ||
221 | stw r6,0(r4); /* save new cr value */ \ | ||
222 | blr | ||
223 | |||
224 | /* | ||
225 | * Double operation with three input operands | ||
226 | * | ||
227 | * R3 = (double*)&fpscr | ||
228 | * R4 = (u32*)&cr | ||
229 | * R5 = (double*)&result | ||
230 | * R6 = (double*)¶m1 | ||
231 | * R7 = (double*)¶m2 | ||
232 | * R8 = (double*)¶m3 | ||
233 | */ | ||
234 | #define FPD_THREE_IN(name) \ | ||
235 | _GLOBAL(fpd_ ## name); \ | ||
236 | mflr r12; \ | ||
237 | bl fpd_load_three; \ | ||
238 | mtlr r12; \ | ||
239 | \ | ||
240 | name. 0,0,1,2; /* call instruction */ \ | ||
241 | b fpd_return | ||
242 | |||
243 | FPD_ONE_IN(fsqrts) | ||
244 | FPD_ONE_IN(frsqrtes) | ||
245 | FPD_ONE_IN(fres) | ||
246 | FPD_ONE_IN(frsp) | ||
247 | FPD_ONE_IN(fctiw) | ||
248 | FPD_ONE_IN(fctiwz) | ||
249 | FPD_ONE_IN(fsqrt) | ||
250 | FPD_ONE_IN(fre) | ||
251 | FPD_ONE_IN(frsqrte) | ||
252 | FPD_ONE_IN(fneg) | ||
253 | FPD_ONE_IN(fabs) | ||
254 | FPD_TWO_IN(fadds) | ||
255 | FPD_TWO_IN(fsubs) | ||
256 | FPD_TWO_IN(fdivs) | ||
257 | FPD_TWO_IN(fmuls) | ||
258 | FPD_TWO_IN_CR(fcmpu) | ||
259 | FPD_TWO_IN(fcpsgn) | ||
260 | FPD_TWO_IN(fdiv) | ||
261 | FPD_TWO_IN(fadd) | ||
262 | FPD_TWO_IN(fmul) | ||
263 | FPD_TWO_IN_CR(fcmpo) | ||
264 | FPD_TWO_IN(fsub) | ||
265 | FPD_THREE_IN(fmsubs) | ||
266 | FPD_THREE_IN(fmadds) | ||
267 | FPD_THREE_IN(fnmsubs) | ||
268 | FPD_THREE_IN(fnmadds) | ||
269 | FPD_THREE_IN(fsel) | ||
270 | FPD_THREE_IN(fmsub) | ||
271 | FPD_THREE_IN(fmadd) | ||
272 | FPD_THREE_IN(fnmsub) | ||
273 | FPD_THREE_IN(fnmadd) | ||
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 297fcd2ff7d0..9b8683f39e05 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
@@ -70,7 +70,7 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
70 | case EMULATE_FAIL: | 70 | case EMULATE_FAIL: |
71 | /* XXX Deliver Program interrupt to guest. */ | 71 | /* XXX Deliver Program interrupt to guest. */ |
72 | printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__, | 72 | printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__, |
73 | vcpu->arch.last_inst); | 73 | kvmppc_get_last_inst(vcpu)); |
74 | r = RESUME_HOST; | 74 | r = RESUME_HOST; |
75 | break; | 75 | break; |
76 | default: | 76 | default: |
@@ -148,6 +148,10 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
148 | 148 | ||
149 | switch (ext) { | 149 | switch (ext) { |
150 | case KVM_CAP_PPC_SEGSTATE: | 150 | case KVM_CAP_PPC_SEGSTATE: |
151 | case KVM_CAP_PPC_PAIRED_SINGLES: | ||
152 | case KVM_CAP_PPC_UNSET_IRQ: | ||
153 | case KVM_CAP_ENABLE_CAP: | ||
154 | case KVM_CAP_PPC_OSI: | ||
151 | r = 1; | 155 | r = 1; |
152 | break; | 156 | break; |
153 | case KVM_CAP_COALESCED_MMIO: | 157 | case KVM_CAP_COALESCED_MMIO: |
@@ -193,12 +197,17 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) | |||
193 | { | 197 | { |
194 | struct kvm_vcpu *vcpu; | 198 | struct kvm_vcpu *vcpu; |
195 | vcpu = kvmppc_core_vcpu_create(kvm, id); | 199 | vcpu = kvmppc_core_vcpu_create(kvm, id); |
196 | kvmppc_create_vcpu_debugfs(vcpu, id); | 200 | if (!IS_ERR(vcpu)) |
201 | kvmppc_create_vcpu_debugfs(vcpu, id); | ||
197 | return vcpu; | 202 | return vcpu; |
198 | } | 203 | } |
199 | 204 | ||
200 | void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) | 205 | void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) |
201 | { | 206 | { |
207 | /* Make sure we're not using the vcpu anymore */ | ||
208 | hrtimer_cancel(&vcpu->arch.dec_timer); | ||
209 | tasklet_kill(&vcpu->arch.tasklet); | ||
210 | |||
202 | kvmppc_remove_vcpu_debugfs(vcpu); | 211 | kvmppc_remove_vcpu_debugfs(vcpu); |
203 | kvmppc_core_vcpu_free(vcpu); | 212 | kvmppc_core_vcpu_free(vcpu); |
204 | } | 213 | } |
@@ -278,7 +287,7 @@ static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, | |||
278 | static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, | 287 | static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, |
279 | struct kvm_run *run) | 288 | struct kvm_run *run) |
280 | { | 289 | { |
281 | ulong gpr; | 290 | u64 gpr; |
282 | 291 | ||
283 | if (run->mmio.len > sizeof(gpr)) { | 292 | if (run->mmio.len > sizeof(gpr)) { |
284 | printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); | 293 | printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); |
@@ -287,6 +296,7 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, | |||
287 | 296 | ||
288 | if (vcpu->arch.mmio_is_bigendian) { | 297 | if (vcpu->arch.mmio_is_bigendian) { |
289 | switch (run->mmio.len) { | 298 | switch (run->mmio.len) { |
299 | case 8: gpr = *(u64 *)run->mmio.data; break; | ||
290 | case 4: gpr = *(u32 *)run->mmio.data; break; | 300 | case 4: gpr = *(u32 *)run->mmio.data; break; |
291 | case 2: gpr = *(u16 *)run->mmio.data; break; | 301 | case 2: gpr = *(u16 *)run->mmio.data; break; |
292 | case 1: gpr = *(u8 *)run->mmio.data; break; | 302 | case 1: gpr = *(u8 *)run->mmio.data; break; |
@@ -300,7 +310,43 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, | |||
300 | } | 310 | } |
301 | } | 311 | } |
302 | 312 | ||
313 | if (vcpu->arch.mmio_sign_extend) { | ||
314 | switch (run->mmio.len) { | ||
315 | #ifdef CONFIG_PPC64 | ||
316 | case 4: | ||
317 | gpr = (s64)(s32)gpr; | ||
318 | break; | ||
319 | #endif | ||
320 | case 2: | ||
321 | gpr = (s64)(s16)gpr; | ||
322 | break; | ||
323 | case 1: | ||
324 | gpr = (s64)(s8)gpr; | ||
325 | break; | ||
326 | } | ||
327 | } | ||
328 | |||
303 | kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); | 329 | kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); |
330 | |||
331 | switch (vcpu->arch.io_gpr & KVM_REG_EXT_MASK) { | ||
332 | case KVM_REG_GPR: | ||
333 | kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); | ||
334 | break; | ||
335 | case KVM_REG_FPR: | ||
336 | vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; | ||
337 | break; | ||
338 | #ifdef CONFIG_PPC_BOOK3S | ||
339 | case KVM_REG_QPR: | ||
340 | vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; | ||
341 | break; | ||
342 | case KVM_REG_FQPR: | ||
343 | vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; | ||
344 | vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; | ||
345 | break; | ||
346 | #endif | ||
347 | default: | ||
348 | BUG(); | ||
349 | } | ||
304 | } | 350 | } |
305 | 351 | ||
306 | int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | 352 | int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, |
@@ -319,12 +365,25 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
319 | vcpu->arch.mmio_is_bigendian = is_bigendian; | 365 | vcpu->arch.mmio_is_bigendian = is_bigendian; |
320 | vcpu->mmio_needed = 1; | 366 | vcpu->mmio_needed = 1; |
321 | vcpu->mmio_is_write = 0; | 367 | vcpu->mmio_is_write = 0; |
368 | vcpu->arch.mmio_sign_extend = 0; | ||
322 | 369 | ||
323 | return EMULATE_DO_MMIO; | 370 | return EMULATE_DO_MMIO; |
324 | } | 371 | } |
325 | 372 | ||
373 | /* Same as above, but sign extends */ | ||
374 | int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, | ||
375 | unsigned int rt, unsigned int bytes, int is_bigendian) | ||
376 | { | ||
377 | int r; | ||
378 | |||
379 | r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian); | ||
380 | vcpu->arch.mmio_sign_extend = 1; | ||
381 | |||
382 | return r; | ||
383 | } | ||
384 | |||
326 | int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, | 385 | int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, |
327 | u32 val, unsigned int bytes, int is_bigendian) | 386 | u64 val, unsigned int bytes, int is_bigendian) |
328 | { | 387 | { |
329 | void *data = run->mmio.data; | 388 | void *data = run->mmio.data; |
330 | 389 | ||
@@ -342,6 +401,7 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
342 | /* Store the value at the lowest bytes in 'data'. */ | 401 | /* Store the value at the lowest bytes in 'data'. */ |
343 | if (is_bigendian) { | 402 | if (is_bigendian) { |
344 | switch (bytes) { | 403 | switch (bytes) { |
404 | case 8: *(u64 *)data = val; break; | ||
345 | case 4: *(u32 *)data = val; break; | 405 | case 4: *(u32 *)data = val; break; |
346 | case 2: *(u16 *)data = val; break; | 406 | case 2: *(u16 *)data = val; break; |
347 | case 1: *(u8 *)data = val; break; | 407 | case 1: *(u8 *)data = val; break; |
@@ -376,6 +436,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
376 | if (!vcpu->arch.dcr_is_write) | 436 | if (!vcpu->arch.dcr_is_write) |
377 | kvmppc_complete_dcr_load(vcpu, run); | 437 | kvmppc_complete_dcr_load(vcpu, run); |
378 | vcpu->arch.dcr_needed = 0; | 438 | vcpu->arch.dcr_needed = 0; |
439 | } else if (vcpu->arch.osi_needed) { | ||
440 | u64 *gprs = run->osi.gprs; | ||
441 | int i; | ||
442 | |||
443 | for (i = 0; i < 32; i++) | ||
444 | kvmppc_set_gpr(vcpu, i, gprs[i]); | ||
445 | vcpu->arch.osi_needed = 0; | ||
379 | } | 446 | } |
380 | 447 | ||
381 | kvmppc_core_deliver_interrupts(vcpu); | 448 | kvmppc_core_deliver_interrupts(vcpu); |
@@ -396,7 +463,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
396 | 463 | ||
397 | int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) | 464 | int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) |
398 | { | 465 | { |
399 | kvmppc_core_queue_external(vcpu, irq); | 466 | if (irq->irq == KVM_INTERRUPT_UNSET) |
467 | kvmppc_core_dequeue_external(vcpu, irq); | ||
468 | else | ||
469 | kvmppc_core_queue_external(vcpu, irq); | ||
400 | 470 | ||
401 | if (waitqueue_active(&vcpu->wq)) { | 471 | if (waitqueue_active(&vcpu->wq)) { |
402 | wake_up_interruptible(&vcpu->wq); | 472 | wake_up_interruptible(&vcpu->wq); |
@@ -406,6 +476,27 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) | |||
406 | return 0; | 476 | return 0; |
407 | } | 477 | } |
408 | 478 | ||
479 | static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, | ||
480 | struct kvm_enable_cap *cap) | ||
481 | { | ||
482 | int r; | ||
483 | |||
484 | if (cap->flags) | ||
485 | return -EINVAL; | ||
486 | |||
487 | switch (cap->cap) { | ||
488 | case KVM_CAP_PPC_OSI: | ||
489 | r = 0; | ||
490 | vcpu->arch.osi_enabled = true; | ||
491 | break; | ||
492 | default: | ||
493 | r = -EINVAL; | ||
494 | break; | ||
495 | } | ||
496 | |||
497 | return r; | ||
498 | } | ||
499 | |||
409 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, | 500 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, |
410 | struct kvm_mp_state *mp_state) | 501 | struct kvm_mp_state *mp_state) |
411 | { | 502 | { |
@@ -434,6 +525,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
434 | r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); | 525 | r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); |
435 | break; | 526 | break; |
436 | } | 527 | } |
528 | case KVM_ENABLE_CAP: | ||
529 | { | ||
530 | struct kvm_enable_cap cap; | ||
531 | r = -EFAULT; | ||
532 | if (copy_from_user(&cap, argp, sizeof(cap))) | ||
533 | goto out; | ||
534 | r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); | ||
535 | break; | ||
536 | } | ||
437 | default: | 537 | default: |
438 | r = -EINVAL; | 538 | r = -EINVAL; |
439 | } | 539 | } |
diff --git a/arch/powerpc/mm/mmu_context_hash32.c b/arch/powerpc/mm/mmu_context_hash32.c index 0dfba2bf7f31..d0ee554e86e4 100644 --- a/arch/powerpc/mm/mmu_context_hash32.c +++ b/arch/powerpc/mm/mmu_context_hash32.c | |||
@@ -60,11 +60,7 @@ | |||
60 | static unsigned long next_mmu_context; | 60 | static unsigned long next_mmu_context; |
61 | static unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1]; | 61 | static unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1]; |
62 | 62 | ||
63 | 63 | unsigned long __init_new_context(void) | |
64 | /* | ||
65 | * Set up the context for a new address space. | ||
66 | */ | ||
67 | int init_new_context(struct task_struct *t, struct mm_struct *mm) | ||
68 | { | 64 | { |
69 | unsigned long ctx = next_mmu_context; | 65 | unsigned long ctx = next_mmu_context; |
70 | 66 | ||
@@ -74,19 +70,38 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm) | |||
74 | ctx = 0; | 70 | ctx = 0; |
75 | } | 71 | } |
76 | next_mmu_context = (ctx + 1) & LAST_CONTEXT; | 72 | next_mmu_context = (ctx + 1) & LAST_CONTEXT; |
77 | mm->context.id = ctx; | 73 | |
74 | return ctx; | ||
75 | } | ||
76 | EXPORT_SYMBOL_GPL(__init_new_context); | ||
77 | |||
78 | /* | ||
79 | * Set up the context for a new address space. | ||
80 | */ | ||
81 | int init_new_context(struct task_struct *t, struct mm_struct *mm) | ||
82 | { | ||
83 | mm->context.id = __init_new_context(); | ||
78 | 84 | ||
79 | return 0; | 85 | return 0; |
80 | } | 86 | } |
81 | 87 | ||
82 | /* | 88 | /* |
89 | * Free a context ID. Make sure to call this with preempt disabled! | ||
90 | */ | ||
91 | void __destroy_context(unsigned long ctx) | ||
92 | { | ||
93 | clear_bit(ctx, context_map); | ||
94 | } | ||
95 | EXPORT_SYMBOL_GPL(__destroy_context); | ||
96 | |||
97 | /* | ||
83 | * We're finished using the context for an address space. | 98 | * We're finished using the context for an address space. |
84 | */ | 99 | */ |
85 | void destroy_context(struct mm_struct *mm) | 100 | void destroy_context(struct mm_struct *mm) |
86 | { | 101 | { |
87 | preempt_disable(); | 102 | preempt_disable(); |
88 | if (mm->context.id != NO_CONTEXT) { | 103 | if (mm->context.id != NO_CONTEXT) { |
89 | clear_bit(mm->context.id, context_map); | 104 | __destroy_context(mm->context.id); |
90 | mm->context.id = NO_CONTEXT; | 105 | mm->context.id = NO_CONTEXT; |
91 | } | 106 | } |
92 | preempt_enable(); | 107 | preempt_enable(); |
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 49292869a5cd..8093e6f47f49 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
@@ -341,11 +341,13 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, | |||
341 | 341 | ||
342 | rc = kvm_vcpu_init(vcpu, kvm, id); | 342 | rc = kvm_vcpu_init(vcpu, kvm, id); |
343 | if (rc) | 343 | if (rc) |
344 | goto out_free_cpu; | 344 | goto out_free_sie_block; |
345 | VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, | 345 | VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, |
346 | vcpu->arch.sie_block); | 346 | vcpu->arch.sie_block); |
347 | 347 | ||
348 | return vcpu; | 348 | return vcpu; |
349 | out_free_sie_block: | ||
350 | free_page((unsigned long)(vcpu->arch.sie_block)); | ||
349 | out_free_cpu: | 351 | out_free_cpu: |
350 | kfree(vcpu); | 352 | kfree(vcpu); |
351 | out_nomem: | 353 | out_nomem: |
@@ -750,7 +752,7 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) | |||
750 | static int __init kvm_s390_init(void) | 752 | static int __init kvm_s390_init(void) |
751 | { | 753 | { |
752 | int ret; | 754 | int ret; |
753 | ret = kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE); | 755 | ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); |
754 | if (ret) | 756 | if (ret) |
755 | return ret; | 757 | return ret; |
756 | 758 | ||
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 60f09ab3672c..cfa9d1777457 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h | |||
@@ -72,7 +72,7 @@ static inline void kvm_s390_vcpu_set_mem(struct kvm_vcpu *vcpu) | |||
72 | struct kvm_memslots *memslots; | 72 | struct kvm_memslots *memslots; |
73 | 73 | ||
74 | idx = srcu_read_lock(&vcpu->kvm->srcu); | 74 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
75 | memslots = rcu_dereference(vcpu->kvm->memslots); | 75 | memslots = kvm_memslots(vcpu->kvm); |
76 | 76 | ||
77 | mem = &memslots->memslots[0]; | 77 | mem = &memslots->memslots[0]; |
78 | 78 | ||
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index f46b79f6c16c..ff90055c7f0b 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h | |||
@@ -21,6 +21,7 @@ | |||
21 | #define __KVM_HAVE_PIT_STATE2 | 21 | #define __KVM_HAVE_PIT_STATE2 |
22 | #define __KVM_HAVE_XEN_HVM | 22 | #define __KVM_HAVE_XEN_HVM |
23 | #define __KVM_HAVE_VCPU_EVENTS | 23 | #define __KVM_HAVE_VCPU_EVENTS |
24 | #define __KVM_HAVE_DEBUGREGS | ||
24 | 25 | ||
25 | /* Architectural interrupt line count. */ | 26 | /* Architectural interrupt line count. */ |
26 | #define KVM_NR_INTERRUPTS 256 | 27 | #define KVM_NR_INTERRUPTS 256 |
@@ -257,6 +258,11 @@ struct kvm_reinject_control { | |||
257 | /* When set in flags, include corresponding fields on KVM_SET_VCPU_EVENTS */ | 258 | /* When set in flags, include corresponding fields on KVM_SET_VCPU_EVENTS */ |
258 | #define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001 | 259 | #define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001 |
259 | #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 | 260 | #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 |
261 | #define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 | ||
262 | |||
263 | /* Interrupt shadow states */ | ||
264 | #define KVM_X86_SHADOW_INT_MOV_SS 0x01 | ||
265 | #define KVM_X86_SHADOW_INT_STI 0x02 | ||
260 | 266 | ||
261 | /* for KVM_GET/SET_VCPU_EVENTS */ | 267 | /* for KVM_GET/SET_VCPU_EVENTS */ |
262 | struct kvm_vcpu_events { | 268 | struct kvm_vcpu_events { |
@@ -271,7 +277,7 @@ struct kvm_vcpu_events { | |||
271 | __u8 injected; | 277 | __u8 injected; |
272 | __u8 nr; | 278 | __u8 nr; |
273 | __u8 soft; | 279 | __u8 soft; |
274 | __u8 pad; | 280 | __u8 shadow; |
275 | } interrupt; | 281 | } interrupt; |
276 | struct { | 282 | struct { |
277 | __u8 injected; | 283 | __u8 injected; |
@@ -284,4 +290,13 @@ struct kvm_vcpu_events { | |||
284 | __u32 reserved[10]; | 290 | __u32 reserved[10]; |
285 | }; | 291 | }; |
286 | 292 | ||
293 | /* for KVM_GET/SET_DEBUGREGS */ | ||
294 | struct kvm_debugregs { | ||
295 | __u64 db[4]; | ||
296 | __u64 dr6; | ||
297 | __u64 dr7; | ||
298 | __u64 flags; | ||
299 | __u64 reserved[9]; | ||
300 | }; | ||
301 | |||
287 | #endif /* _ASM_X86_KVM_H */ | 302 | #endif /* _ASM_X86_KVM_H */ |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 7a6f54fa13ba..0b2729bf2070 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -11,6 +11,8 @@ | |||
11 | #ifndef _ASM_X86_KVM_X86_EMULATE_H | 11 | #ifndef _ASM_X86_KVM_X86_EMULATE_H |
12 | #define _ASM_X86_KVM_X86_EMULATE_H | 12 | #define _ASM_X86_KVM_X86_EMULATE_H |
13 | 13 | ||
14 | #include <asm/desc_defs.h> | ||
15 | |||
14 | struct x86_emulate_ctxt; | 16 | struct x86_emulate_ctxt; |
15 | 17 | ||
16 | /* | 18 | /* |
@@ -63,6 +65,15 @@ struct x86_emulate_ops { | |||
63 | unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); | 65 | unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); |
64 | 66 | ||
65 | /* | 67 | /* |
68 | * write_std: Write bytes of standard (non-emulated/special) memory. | ||
69 | * Used for descriptor writing. | ||
70 | * @addr: [IN ] Linear address to which to write. | ||
71 | * @val: [OUT] Value write to memory, zero-extended to 'u_long'. | ||
72 | * @bytes: [IN ] Number of bytes to write to memory. | ||
73 | */ | ||
74 | int (*write_std)(unsigned long addr, void *val, | ||
75 | unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); | ||
76 | /* | ||
66 | * fetch: Read bytes of standard (non-emulated/special) memory. | 77 | * fetch: Read bytes of standard (non-emulated/special) memory. |
67 | * Used for instruction fetch. | 78 | * Used for instruction fetch. |
68 | * @addr: [IN ] Linear address from which to read. | 79 | * @addr: [IN ] Linear address from which to read. |
@@ -109,6 +120,23 @@ struct x86_emulate_ops { | |||
109 | unsigned int bytes, | 120 | unsigned int bytes, |
110 | struct kvm_vcpu *vcpu); | 121 | struct kvm_vcpu *vcpu); |
111 | 122 | ||
123 | int (*pio_in_emulated)(int size, unsigned short port, void *val, | ||
124 | unsigned int count, struct kvm_vcpu *vcpu); | ||
125 | |||
126 | int (*pio_out_emulated)(int size, unsigned short port, const void *val, | ||
127 | unsigned int count, struct kvm_vcpu *vcpu); | ||
128 | |||
129 | bool (*get_cached_descriptor)(struct desc_struct *desc, | ||
130 | int seg, struct kvm_vcpu *vcpu); | ||
131 | void (*set_cached_descriptor)(struct desc_struct *desc, | ||
132 | int seg, struct kvm_vcpu *vcpu); | ||
133 | u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); | ||
134 | void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); | ||
135 | void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); | ||
136 | ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu); | ||
137 | void (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); | ||
138 | int (*cpl)(struct kvm_vcpu *vcpu); | ||
139 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); | ||
112 | }; | 140 | }; |
113 | 141 | ||
114 | /* Type, address-of, and value of an instruction's operand. */ | 142 | /* Type, address-of, and value of an instruction's operand. */ |
@@ -124,6 +152,12 @@ struct fetch_cache { | |||
124 | unsigned long end; | 152 | unsigned long end; |
125 | }; | 153 | }; |
126 | 154 | ||
155 | struct read_cache { | ||
156 | u8 data[1024]; | ||
157 | unsigned long pos; | ||
158 | unsigned long end; | ||
159 | }; | ||
160 | |||
127 | struct decode_cache { | 161 | struct decode_cache { |
128 | u8 twobyte; | 162 | u8 twobyte; |
129 | u8 b; | 163 | u8 b; |
@@ -139,7 +173,7 @@ struct decode_cache { | |||
139 | u8 seg_override; | 173 | u8 seg_override; |
140 | unsigned int d; | 174 | unsigned int d; |
141 | unsigned long regs[NR_VCPU_REGS]; | 175 | unsigned long regs[NR_VCPU_REGS]; |
142 | unsigned long eip, eip_orig; | 176 | unsigned long eip; |
143 | /* modrm */ | 177 | /* modrm */ |
144 | u8 modrm; | 178 | u8 modrm; |
145 | u8 modrm_mod; | 179 | u8 modrm_mod; |
@@ -151,16 +185,15 @@ struct decode_cache { | |||
151 | void *modrm_ptr; | 185 | void *modrm_ptr; |
152 | unsigned long modrm_val; | 186 | unsigned long modrm_val; |
153 | struct fetch_cache fetch; | 187 | struct fetch_cache fetch; |
188 | struct read_cache io_read; | ||
154 | }; | 189 | }; |
155 | 190 | ||
156 | #define X86_SHADOW_INT_MOV_SS 1 | ||
157 | #define X86_SHADOW_INT_STI 2 | ||
158 | |||
159 | struct x86_emulate_ctxt { | 191 | struct x86_emulate_ctxt { |
160 | /* Register state before/after emulation. */ | 192 | /* Register state before/after emulation. */ |
161 | struct kvm_vcpu *vcpu; | 193 | struct kvm_vcpu *vcpu; |
162 | 194 | ||
163 | unsigned long eflags; | 195 | unsigned long eflags; |
196 | unsigned long eip; /* eip before instruction emulation */ | ||
164 | /* Emulated execution mode, represented by an X86EMUL_MODE value. */ | 197 | /* Emulated execution mode, represented by an X86EMUL_MODE value. */ |
165 | int mode; | 198 | int mode; |
166 | u32 cs_base; | 199 | u32 cs_base; |
@@ -168,6 +201,7 @@ struct x86_emulate_ctxt { | |||
168 | /* interruptibility state, as a result of execution of STI or MOV SS */ | 201 | /* interruptibility state, as a result of execution of STI or MOV SS */ |
169 | int interruptibility; | 202 | int interruptibility; |
170 | 203 | ||
204 | bool restart; /* restart string instruction after writeback */ | ||
171 | /* decode cache */ | 205 | /* decode cache */ |
172 | struct decode_cache decode; | 206 | struct decode_cache decode; |
173 | }; | 207 | }; |
@@ -194,5 +228,9 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, | |||
194 | struct x86_emulate_ops *ops); | 228 | struct x86_emulate_ops *ops); |
195 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, | 229 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, |
196 | struct x86_emulate_ops *ops); | 230 | struct x86_emulate_ops *ops); |
231 | int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | ||
232 | struct x86_emulate_ops *ops, | ||
233 | u16 tss_selector, int reason, | ||
234 | bool has_error_code, u32 error_code); | ||
197 | 235 | ||
198 | #endif /* _ASM_X86_KVM_X86_EMULATE_H */ | 236 | #endif /* _ASM_X86_KVM_X86_EMULATE_H */ |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 06d9e79ca37d..76f5483cffec 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -171,15 +171,15 @@ struct kvm_pte_chain { | |||
171 | union kvm_mmu_page_role { | 171 | union kvm_mmu_page_role { |
172 | unsigned word; | 172 | unsigned word; |
173 | struct { | 173 | struct { |
174 | unsigned glevels:4; | ||
175 | unsigned level:4; | 174 | unsigned level:4; |
175 | unsigned cr4_pae:1; | ||
176 | unsigned quadrant:2; | 176 | unsigned quadrant:2; |
177 | unsigned pad_for_nice_hex_output:6; | 177 | unsigned pad_for_nice_hex_output:6; |
178 | unsigned direct:1; | 178 | unsigned direct:1; |
179 | unsigned access:3; | 179 | unsigned access:3; |
180 | unsigned invalid:1; | 180 | unsigned invalid:1; |
181 | unsigned cr4_pge:1; | ||
182 | unsigned nxe:1; | 181 | unsigned nxe:1; |
182 | unsigned cr0_wp:1; | ||
183 | }; | 183 | }; |
184 | }; | 184 | }; |
185 | 185 | ||
@@ -187,8 +187,6 @@ struct kvm_mmu_page { | |||
187 | struct list_head link; | 187 | struct list_head link; |
188 | struct hlist_node hash_link; | 188 | struct hlist_node hash_link; |
189 | 189 | ||
190 | struct list_head oos_link; | ||
191 | |||
192 | /* | 190 | /* |
193 | * The following two entries are used to key the shadow page in the | 191 | * The following two entries are used to key the shadow page in the |
194 | * hash table. | 192 | * hash table. |
@@ -204,9 +202,9 @@ struct kvm_mmu_page { | |||
204 | * in this shadow page. | 202 | * in this shadow page. |
205 | */ | 203 | */ |
206 | DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); | 204 | DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); |
207 | int multimapped; /* More than one parent_pte? */ | 205 | bool multimapped; /* More than one parent_pte? */ |
208 | int root_count; /* Currently serving as active root */ | ||
209 | bool unsync; | 206 | bool unsync; |
207 | int root_count; /* Currently serving as active root */ | ||
210 | unsigned int unsync_children; | 208 | unsigned int unsync_children; |
211 | union { | 209 | union { |
212 | u64 *parent_pte; /* !multimapped */ | 210 | u64 *parent_pte; /* !multimapped */ |
@@ -224,14 +222,9 @@ struct kvm_pv_mmu_op_buffer { | |||
224 | 222 | ||
225 | struct kvm_pio_request { | 223 | struct kvm_pio_request { |
226 | unsigned long count; | 224 | unsigned long count; |
227 | int cur_count; | ||
228 | gva_t guest_gva; | ||
229 | int in; | 225 | int in; |
230 | int port; | 226 | int port; |
231 | int size; | 227 | int size; |
232 | int string; | ||
233 | int down; | ||
234 | int rep; | ||
235 | }; | 228 | }; |
236 | 229 | ||
237 | /* | 230 | /* |
@@ -320,6 +313,7 @@ struct kvm_vcpu_arch { | |||
320 | struct kvm_queued_exception { | 313 | struct kvm_queued_exception { |
321 | bool pending; | 314 | bool pending; |
322 | bool has_error_code; | 315 | bool has_error_code; |
316 | bool reinject; | ||
323 | u8 nr; | 317 | u8 nr; |
324 | u32 error_code; | 318 | u32 error_code; |
325 | } exception; | 319 | } exception; |
@@ -362,8 +356,8 @@ struct kvm_vcpu_arch { | |||
362 | u64 *mce_banks; | 356 | u64 *mce_banks; |
363 | 357 | ||
364 | /* used for guest single stepping over the given code position */ | 358 | /* used for guest single stepping over the given code position */ |
365 | u16 singlestep_cs; | ||
366 | unsigned long singlestep_rip; | 359 | unsigned long singlestep_rip; |
360 | |||
367 | /* fields used by HYPER-V emulation */ | 361 | /* fields used by HYPER-V emulation */ |
368 | u64 hv_vapic; | 362 | u64 hv_vapic; |
369 | }; | 363 | }; |
@@ -389,6 +383,7 @@ struct kvm_arch { | |||
389 | unsigned int n_free_mmu_pages; | 383 | unsigned int n_free_mmu_pages; |
390 | unsigned int n_requested_mmu_pages; | 384 | unsigned int n_requested_mmu_pages; |
391 | unsigned int n_alloc_mmu_pages; | 385 | unsigned int n_alloc_mmu_pages; |
386 | atomic_t invlpg_counter; | ||
392 | struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; | 387 | struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; |
393 | /* | 388 | /* |
394 | * Hash table of struct kvm_mmu_page. | 389 | * Hash table of struct kvm_mmu_page. |
@@ -461,11 +456,6 @@ struct kvm_vcpu_stat { | |||
461 | u32 nmi_injections; | 456 | u32 nmi_injections; |
462 | }; | 457 | }; |
463 | 458 | ||
464 | struct descriptor_table { | ||
465 | u16 limit; | ||
466 | unsigned long base; | ||
467 | } __attribute__((packed)); | ||
468 | |||
469 | struct kvm_x86_ops { | 459 | struct kvm_x86_ops { |
470 | int (*cpu_has_kvm_support)(void); /* __init */ | 460 | int (*cpu_has_kvm_support)(void); /* __init */ |
471 | int (*disabled_by_bios)(void); /* __init */ | 461 | int (*disabled_by_bios)(void); /* __init */ |
@@ -503,12 +493,11 @@ struct kvm_x86_ops { | |||
503 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); | 493 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); |
504 | void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); | 494 | void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); |
505 | void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); | 495 | void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); |
506 | void (*get_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); | 496 | void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
507 | void (*set_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); | 497 | void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
508 | void (*get_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); | 498 | void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
509 | void (*set_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); | 499 | void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
510 | int (*get_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long *dest); | 500 | void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); |
511 | int (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value); | ||
512 | void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); | 501 | void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); |
513 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); | 502 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); |
514 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); | 503 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); |
@@ -527,7 +516,8 @@ struct kvm_x86_ops { | |||
527 | void (*set_irq)(struct kvm_vcpu *vcpu); | 516 | void (*set_irq)(struct kvm_vcpu *vcpu); |
528 | void (*set_nmi)(struct kvm_vcpu *vcpu); | 517 | void (*set_nmi)(struct kvm_vcpu *vcpu); |
529 | void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, | 518 | void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, |
530 | bool has_error_code, u32 error_code); | 519 | bool has_error_code, u32 error_code, |
520 | bool reinject); | ||
531 | int (*interrupt_allowed)(struct kvm_vcpu *vcpu); | 521 | int (*interrupt_allowed)(struct kvm_vcpu *vcpu); |
532 | int (*nmi_allowed)(struct kvm_vcpu *vcpu); | 522 | int (*nmi_allowed)(struct kvm_vcpu *vcpu); |
533 | bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); | 523 | bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); |
@@ -541,6 +531,8 @@ struct kvm_x86_ops { | |||
541 | int (*get_lpage_level)(void); | 531 | int (*get_lpage_level)(void); |
542 | bool (*rdtscp_supported)(void); | 532 | bool (*rdtscp_supported)(void); |
543 | 533 | ||
534 | void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry); | ||
535 | |||
544 | const struct trace_print_flags *exit_reasons_str; | 536 | const struct trace_print_flags *exit_reasons_str; |
545 | }; | 537 | }; |
546 | 538 | ||
@@ -587,23 +579,14 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
587 | void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); | 579 | void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); |
588 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); | 580 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); |
589 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); | 581 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); |
590 | void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, | ||
591 | unsigned long *rflags); | ||
592 | 582 | ||
593 | unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr); | ||
594 | void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value, | ||
595 | unsigned long *rflags); | ||
596 | void kvm_enable_efer_bits(u64); | 583 | void kvm_enable_efer_bits(u64); |
597 | int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); | 584 | int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); |
598 | int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); | 585 | int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); |
599 | 586 | ||
600 | struct x86_emulate_ctxt; | 587 | struct x86_emulate_ctxt; |
601 | 588 | ||
602 | int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, | 589 | int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); |
603 | int size, unsigned port); | ||
604 | int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, | ||
605 | int size, unsigned long count, int down, | ||
606 | gva_t address, int rep, unsigned port); | ||
607 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); | 590 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); |
608 | int kvm_emulate_halt(struct kvm_vcpu *vcpu); | 591 | int kvm_emulate_halt(struct kvm_vcpu *vcpu); |
609 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); | 592 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); |
@@ -616,12 +599,15 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, | |||
616 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); | 599 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); |
617 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); | 600 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); |
618 | 601 | ||
619 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason); | 602 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, |
603 | bool has_error_code, u32 error_code); | ||
620 | 604 | ||
621 | void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); | 605 | void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); |
622 | void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); | 606 | void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); |
623 | void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); | 607 | void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); |
624 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); | 608 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); |
609 | int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); | ||
610 | int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val); | ||
625 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); | 611 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); |
626 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); | 612 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); |
627 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); | 613 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); |
@@ -634,6 +620,8 @@ void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); | |||
634 | 620 | ||
635 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); | 621 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); |
636 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); | 622 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); |
623 | void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr); | ||
624 | void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); | ||
637 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, | 625 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, |
638 | u32 error_code); | 626 | u32 error_code); |
639 | bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); | 627 | bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); |
@@ -649,8 +637,6 @@ int emulator_write_emulated(unsigned long addr, | |||
649 | unsigned int bytes, | 637 | unsigned int bytes, |
650 | struct kvm_vcpu *vcpu); | 638 | struct kvm_vcpu *vcpu); |
651 | 639 | ||
652 | unsigned long segment_base(u16 selector); | ||
653 | |||
654 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); | 640 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); |
655 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 641 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
656 | const u8 *new, int bytes, | 642 | const u8 *new, int bytes, |
@@ -675,7 +661,6 @@ void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); | |||
675 | void kvm_enable_tdp(void); | 661 | void kvm_enable_tdp(void); |
676 | void kvm_disable_tdp(void); | 662 | void kvm_disable_tdp(void); |
677 | 663 | ||
678 | int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); | ||
679 | int complete_pio(struct kvm_vcpu *vcpu); | 664 | int complete_pio(struct kvm_vcpu *vcpu); |
680 | bool kvm_check_iopl(struct kvm_vcpu *vcpu); | 665 | bool kvm_check_iopl(struct kvm_vcpu *vcpu); |
681 | 666 | ||
@@ -724,23 +709,6 @@ static inline void kvm_load_ldt(u16 sel) | |||
724 | asm("lldt %0" : : "rm"(sel)); | 709 | asm("lldt %0" : : "rm"(sel)); |
725 | } | 710 | } |
726 | 711 | ||
727 | static inline void kvm_get_idt(struct descriptor_table *table) | ||
728 | { | ||
729 | asm("sidt %0" : "=m"(*table)); | ||
730 | } | ||
731 | |||
732 | static inline void kvm_get_gdt(struct descriptor_table *table) | ||
733 | { | ||
734 | asm("sgdt %0" : "=m"(*table)); | ||
735 | } | ||
736 | |||
737 | static inline unsigned long kvm_read_tr_base(void) | ||
738 | { | ||
739 | u16 tr; | ||
740 | asm("str %0" : "=g"(tr)); | ||
741 | return segment_base(tr); | ||
742 | } | ||
743 | |||
744 | #ifdef CONFIG_X86_64 | 712 | #ifdef CONFIG_X86_64 |
745 | static inline unsigned long read_msr(unsigned long msr) | 713 | static inline unsigned long read_msr(unsigned long msr) |
746 | { | 714 | { |
@@ -826,4 +794,6 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v); | |||
826 | void kvm_define_shared_msr(unsigned index, u32 msr); | 794 | void kvm_define_shared_msr(unsigned index, u32 msr); |
827 | void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); | 795 | void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); |
828 | 796 | ||
797 | bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); | ||
798 | |||
829 | #endif /* _ASM_X86_KVM_HOST_H */ | 799 | #endif /* _ASM_X86_KVM_HOST_H */ |
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index ffae1420e7d7..05eba5e9a8e8 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h | |||
@@ -16,10 +16,23 @@ | |||
16 | #define KVM_FEATURE_CLOCKSOURCE 0 | 16 | #define KVM_FEATURE_CLOCKSOURCE 0 |
17 | #define KVM_FEATURE_NOP_IO_DELAY 1 | 17 | #define KVM_FEATURE_NOP_IO_DELAY 1 |
18 | #define KVM_FEATURE_MMU_OP 2 | 18 | #define KVM_FEATURE_MMU_OP 2 |
19 | /* This indicates that the new set of kvmclock msrs | ||
20 | * are available. The use of 0x11 and 0x12 is deprecated | ||
21 | */ | ||
22 | #define KVM_FEATURE_CLOCKSOURCE2 3 | ||
23 | |||
24 | /* The last 8 bits are used to indicate how to interpret the flags field | ||
25 | * in pvclock structure. If no bits are set, all flags are ignored. | ||
26 | */ | ||
27 | #define KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 24 | ||
19 | 28 | ||
20 | #define MSR_KVM_WALL_CLOCK 0x11 | 29 | #define MSR_KVM_WALL_CLOCK 0x11 |
21 | #define MSR_KVM_SYSTEM_TIME 0x12 | 30 | #define MSR_KVM_SYSTEM_TIME 0x12 |
22 | 31 | ||
32 | /* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */ | ||
33 | #define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00 | ||
34 | #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 | ||
35 | |||
23 | #define KVM_MAX_MMU_OP_BATCH 32 | 36 | #define KVM_MAX_MMU_OP_BATCH 32 |
24 | 37 | ||
25 | /* Operations for KVM_HC_MMU_OP */ | 38 | /* Operations for KVM_HC_MMU_OP */ |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index bc473acfa7f9..f9324851eba0 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -202,8 +202,9 @@ | |||
202 | #define MSR_IA32_EBL_CR_POWERON 0x0000002a | 202 | #define MSR_IA32_EBL_CR_POWERON 0x0000002a |
203 | #define MSR_IA32_FEATURE_CONTROL 0x0000003a | 203 | #define MSR_IA32_FEATURE_CONTROL 0x0000003a |
204 | 204 | ||
205 | #define FEATURE_CONTROL_LOCKED (1<<0) | 205 | #define FEATURE_CONTROL_LOCKED (1<<0) |
206 | #define FEATURE_CONTROL_VMXON_ENABLED (1<<2) | 206 | #define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) |
207 | #define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2) | ||
207 | 208 | ||
208 | #define MSR_IA32_APICBASE 0x0000001b | 209 | #define MSR_IA32_APICBASE 0x0000001b |
209 | #define MSR_IA32_APICBASE_BSP (1<<8) | 210 | #define MSR_IA32_APICBASE_BSP (1<<8) |
diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h index 6d93508f2626..35f2d1948ada 100644 --- a/arch/x86/include/asm/pvclock-abi.h +++ b/arch/x86/include/asm/pvclock-abi.h | |||
@@ -29,7 +29,8 @@ struct pvclock_vcpu_time_info { | |||
29 | u64 system_time; | 29 | u64 system_time; |
30 | u32 tsc_to_system_mul; | 30 | u32 tsc_to_system_mul; |
31 | s8 tsc_shift; | 31 | s8 tsc_shift; |
32 | u8 pad[3]; | 32 | u8 flags; |
33 | u8 pad[2]; | ||
33 | } __attribute__((__packed__)); /* 32 bytes */ | 34 | } __attribute__((__packed__)); /* 32 bytes */ |
34 | 35 | ||
35 | struct pvclock_wall_clock { | 36 | struct pvclock_wall_clock { |
@@ -38,5 +39,6 @@ struct pvclock_wall_clock { | |||
38 | u32 nsec; | 39 | u32 nsec; |
39 | } __attribute__((__packed__)); | 40 | } __attribute__((__packed__)); |
40 | 41 | ||
42 | #define PVCLOCK_TSC_STABLE_BIT (1 << 0) | ||
41 | #endif /* __ASSEMBLY__ */ | 43 | #endif /* __ASSEMBLY__ */ |
42 | #endif /* _ASM_X86_PVCLOCK_ABI_H */ | 44 | #endif /* _ASM_X86_PVCLOCK_ABI_H */ |
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 53235fd5f8ce..cd02f324aa6b 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h | |||
@@ -6,6 +6,7 @@ | |||
6 | 6 | ||
7 | /* some helper functions for xen and kvm pv clock sources */ | 7 | /* some helper functions for xen and kvm pv clock sources */ |
8 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); | 8 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); |
9 | void pvclock_set_flags(u8 flags); | ||
9 | unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src); | 10 | unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src); |
10 | void pvclock_read_wallclock(struct pvclock_wall_clock *wall, | 11 | void pvclock_read_wallclock(struct pvclock_wall_clock *wall, |
11 | struct pvclock_vcpu_time_info *vcpu, | 12 | struct pvclock_vcpu_time_info *vcpu, |
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 38638cd2fa4c..0e831059ac5a 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h | |||
@@ -81,7 +81,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { | |||
81 | u32 event_inj_err; | 81 | u32 event_inj_err; |
82 | u64 nested_cr3; | 82 | u64 nested_cr3; |
83 | u64 lbr_ctl; | 83 | u64 lbr_ctl; |
84 | u8 reserved_5[832]; | 84 | u64 reserved_5; |
85 | u64 next_rip; | ||
86 | u8 reserved_6[816]; | ||
85 | }; | 87 | }; |
86 | 88 | ||
87 | 89 | ||
@@ -115,6 +117,10 @@ struct __attribute__ ((__packed__)) vmcb_control_area { | |||
115 | #define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT) | 117 | #define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT) |
116 | #define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT) | 118 | #define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT) |
117 | 119 | ||
120 | #define SVM_VM_CR_VALID_MASK 0x001fULL | ||
121 | #define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL | ||
122 | #define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL | ||
123 | |||
118 | struct __attribute__ ((__packed__)) vmcb_seg { | 124 | struct __attribute__ ((__packed__)) vmcb_seg { |
119 | u16 selector; | 125 | u16 selector; |
120 | u16 attrib; | 126 | u16 attrib; |
@@ -238,6 +244,7 @@ struct __attribute__ ((__packed__)) vmcb { | |||
238 | 244 | ||
239 | #define SVM_EXITINFOSHIFT_TS_REASON_IRET 36 | 245 | #define SVM_EXITINFOSHIFT_TS_REASON_IRET 36 |
240 | #define SVM_EXITINFOSHIFT_TS_REASON_JMP 38 | 246 | #define SVM_EXITINFOSHIFT_TS_REASON_JMP 38 |
247 | #define SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE 44 | ||
241 | 248 | ||
242 | #define SVM_EXIT_READ_CR0 0x000 | 249 | #define SVM_EXIT_READ_CR0 0x000 |
243 | #define SVM_EXIT_READ_CR3 0x003 | 250 | #define SVM_EXIT_READ_CR3 0x003 |
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index fb9a080740ec..9e6779f7cf2d 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -25,6 +25,8 @@ | |||
25 | * | 25 | * |
26 | */ | 26 | */ |
27 | 27 | ||
28 | #include <linux/types.h> | ||
29 | |||
28 | /* | 30 | /* |
29 | * Definitions of Primary Processor-Based VM-Execution Controls. | 31 | * Definitions of Primary Processor-Based VM-Execution Controls. |
30 | */ | 32 | */ |
@@ -120,6 +122,8 @@ enum vmcs_field { | |||
120 | GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, | 122 | GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, |
121 | GUEST_IA32_PAT = 0x00002804, | 123 | GUEST_IA32_PAT = 0x00002804, |
122 | GUEST_IA32_PAT_HIGH = 0x00002805, | 124 | GUEST_IA32_PAT_HIGH = 0x00002805, |
125 | GUEST_IA32_EFER = 0x00002806, | ||
126 | GUEST_IA32_EFER_HIGH = 0x00002807, | ||
123 | GUEST_PDPTR0 = 0x0000280a, | 127 | GUEST_PDPTR0 = 0x0000280a, |
124 | GUEST_PDPTR0_HIGH = 0x0000280b, | 128 | GUEST_PDPTR0_HIGH = 0x0000280b, |
125 | GUEST_PDPTR1 = 0x0000280c, | 129 | GUEST_PDPTR1 = 0x0000280c, |
@@ -130,6 +134,8 @@ enum vmcs_field { | |||
130 | GUEST_PDPTR3_HIGH = 0x00002811, | 134 | GUEST_PDPTR3_HIGH = 0x00002811, |
131 | HOST_IA32_PAT = 0x00002c00, | 135 | HOST_IA32_PAT = 0x00002c00, |
132 | HOST_IA32_PAT_HIGH = 0x00002c01, | 136 | HOST_IA32_PAT_HIGH = 0x00002c01, |
137 | HOST_IA32_EFER = 0x00002c02, | ||
138 | HOST_IA32_EFER_HIGH = 0x00002c03, | ||
133 | PIN_BASED_VM_EXEC_CONTROL = 0x00004000, | 139 | PIN_BASED_VM_EXEC_CONTROL = 0x00004000, |
134 | CPU_BASED_VM_EXEC_CONTROL = 0x00004002, | 140 | CPU_BASED_VM_EXEC_CONTROL = 0x00004002, |
135 | EXCEPTION_BITMAP = 0x00004004, | 141 | EXCEPTION_BITMAP = 0x00004004, |
@@ -394,6 +400,10 @@ enum vmcs_field { | |||
394 | #define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" | 400 | #define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" |
395 | #define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" | 401 | #define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" |
396 | 402 | ||
397 | 403 | struct vmx_msr_entry { | |
404 | u32 index; | ||
405 | u32 reserved; | ||
406 | u64 value; | ||
407 | } __aligned(16); | ||
398 | 408 | ||
399 | #endif | 409 | #endif |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index feaeb0d3aa4f..eb9b76c716c2 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -29,6 +29,8 @@ | |||
29 | #define KVM_SCALE 22 | 29 | #define KVM_SCALE 22 |
30 | 30 | ||
31 | static int kvmclock = 1; | 31 | static int kvmclock = 1; |
32 | static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; | ||
33 | static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; | ||
32 | 34 | ||
33 | static int parse_no_kvmclock(char *arg) | 35 | static int parse_no_kvmclock(char *arg) |
34 | { | 36 | { |
@@ -54,7 +56,8 @@ static unsigned long kvm_get_wallclock(void) | |||
54 | 56 | ||
55 | low = (int)__pa_symbol(&wall_clock); | 57 | low = (int)__pa_symbol(&wall_clock); |
56 | high = ((u64)__pa_symbol(&wall_clock) >> 32); | 58 | high = ((u64)__pa_symbol(&wall_clock) >> 32); |
57 | native_write_msr(MSR_KVM_WALL_CLOCK, low, high); | 59 | |
60 | native_write_msr(msr_kvm_wall_clock, low, high); | ||
58 | 61 | ||
59 | vcpu_time = &get_cpu_var(hv_clock); | 62 | vcpu_time = &get_cpu_var(hv_clock); |
60 | pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); | 63 | pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); |
@@ -130,7 +133,8 @@ static int kvm_register_clock(char *txt) | |||
130 | high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); | 133 | high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); |
131 | printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", | 134 | printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", |
132 | cpu, high, low, txt); | 135 | cpu, high, low, txt); |
133 | return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); | 136 | |
137 | return native_write_msr_safe(msr_kvm_system_time, low, high); | ||
134 | } | 138 | } |
135 | 139 | ||
136 | #ifdef CONFIG_X86_LOCAL_APIC | 140 | #ifdef CONFIG_X86_LOCAL_APIC |
@@ -165,14 +169,14 @@ static void __init kvm_smp_prepare_boot_cpu(void) | |||
165 | #ifdef CONFIG_KEXEC | 169 | #ifdef CONFIG_KEXEC |
166 | static void kvm_crash_shutdown(struct pt_regs *regs) | 170 | static void kvm_crash_shutdown(struct pt_regs *regs) |
167 | { | 171 | { |
168 | native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0); | 172 | native_write_msr(msr_kvm_system_time, 0, 0); |
169 | native_machine_crash_shutdown(regs); | 173 | native_machine_crash_shutdown(regs); |
170 | } | 174 | } |
171 | #endif | 175 | #endif |
172 | 176 | ||
173 | static void kvm_shutdown(void) | 177 | static void kvm_shutdown(void) |
174 | { | 178 | { |
175 | native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0); | 179 | native_write_msr(msr_kvm_system_time, 0, 0); |
176 | native_machine_shutdown(); | 180 | native_machine_shutdown(); |
177 | } | 181 | } |
178 | 182 | ||
@@ -181,27 +185,37 @@ void __init kvmclock_init(void) | |||
181 | if (!kvm_para_available()) | 185 | if (!kvm_para_available()) |
182 | return; | 186 | return; |
183 | 187 | ||
184 | if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { | 188 | if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE2)) { |
185 | if (kvm_register_clock("boot clock")) | 189 | msr_kvm_system_time = MSR_KVM_SYSTEM_TIME_NEW; |
186 | return; | 190 | msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK_NEW; |
187 | pv_time_ops.sched_clock = kvm_clock_read; | 191 | } else if (!(kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE))) |
188 | x86_platform.calibrate_tsc = kvm_get_tsc_khz; | 192 | return; |
189 | x86_platform.get_wallclock = kvm_get_wallclock; | 193 | |
190 | x86_platform.set_wallclock = kvm_set_wallclock; | 194 | printk(KERN_INFO "kvm-clock: Using msrs %x and %x", |
195 | msr_kvm_system_time, msr_kvm_wall_clock); | ||
196 | |||
197 | if (kvm_register_clock("boot clock")) | ||
198 | return; | ||
199 | pv_time_ops.sched_clock = kvm_clock_read; | ||
200 | x86_platform.calibrate_tsc = kvm_get_tsc_khz; | ||
201 | x86_platform.get_wallclock = kvm_get_wallclock; | ||
202 | x86_platform.set_wallclock = kvm_set_wallclock; | ||
191 | #ifdef CONFIG_X86_LOCAL_APIC | 203 | #ifdef CONFIG_X86_LOCAL_APIC |
192 | x86_cpuinit.setup_percpu_clockev = | 204 | x86_cpuinit.setup_percpu_clockev = |
193 | kvm_setup_secondary_clock; | 205 | kvm_setup_secondary_clock; |
194 | #endif | 206 | #endif |
195 | #ifdef CONFIG_SMP | 207 | #ifdef CONFIG_SMP |
196 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; | 208 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; |
197 | #endif | 209 | #endif |
198 | machine_ops.shutdown = kvm_shutdown; | 210 | machine_ops.shutdown = kvm_shutdown; |
199 | #ifdef CONFIG_KEXEC | 211 | #ifdef CONFIG_KEXEC |
200 | machine_ops.crash_shutdown = kvm_crash_shutdown; | 212 | machine_ops.crash_shutdown = kvm_crash_shutdown; |
201 | #endif | 213 | #endif |
202 | kvm_get_preset_lpj(); | 214 | kvm_get_preset_lpj(); |
203 | clocksource_register(&kvm_clock); | 215 | clocksource_register(&kvm_clock); |
204 | pv_info.paravirt_enabled = 1; | 216 | pv_info.paravirt_enabled = 1; |
205 | pv_info.name = "KVM"; | 217 | pv_info.name = "KVM"; |
206 | } | 218 | |
219 | if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) | ||
220 | pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); | ||
207 | } | 221 | } |
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 03801f2f761f..239427ca02af 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c | |||
@@ -31,8 +31,16 @@ struct pvclock_shadow_time { | |||
31 | u32 tsc_to_nsec_mul; | 31 | u32 tsc_to_nsec_mul; |
32 | int tsc_shift; | 32 | int tsc_shift; |
33 | u32 version; | 33 | u32 version; |
34 | u8 flags; | ||
34 | }; | 35 | }; |
35 | 36 | ||
37 | static u8 valid_flags __read_mostly = 0; | ||
38 | |||
39 | void pvclock_set_flags(u8 flags) | ||
40 | { | ||
41 | valid_flags = flags; | ||
42 | } | ||
43 | |||
36 | /* | 44 | /* |
37 | * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, | 45 | * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, |
38 | * yielding a 64-bit result. | 46 | * yielding a 64-bit result. |
@@ -91,6 +99,7 @@ static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst, | |||
91 | dst->system_timestamp = src->system_time; | 99 | dst->system_timestamp = src->system_time; |
92 | dst->tsc_to_nsec_mul = src->tsc_to_system_mul; | 100 | dst->tsc_to_nsec_mul = src->tsc_to_system_mul; |
93 | dst->tsc_shift = src->tsc_shift; | 101 | dst->tsc_shift = src->tsc_shift; |
102 | dst->flags = src->flags; | ||
94 | rmb(); /* test version after fetching data */ | 103 | rmb(); /* test version after fetching data */ |
95 | } while ((src->version & 1) || (dst->version != src->version)); | 104 | } while ((src->version & 1) || (dst->version != src->version)); |
96 | 105 | ||
@@ -109,11 +118,14 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src) | |||
109 | return pv_tsc_khz; | 118 | return pv_tsc_khz; |
110 | } | 119 | } |
111 | 120 | ||
121 | static atomic64_t last_value = ATOMIC64_INIT(0); | ||
122 | |||
112 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) | 123 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) |
113 | { | 124 | { |
114 | struct pvclock_shadow_time shadow; | 125 | struct pvclock_shadow_time shadow; |
115 | unsigned version; | 126 | unsigned version; |
116 | cycle_t ret, offset; | 127 | cycle_t ret, offset; |
128 | u64 last; | ||
117 | 129 | ||
118 | do { | 130 | do { |
119 | version = pvclock_get_time_values(&shadow, src); | 131 | version = pvclock_get_time_values(&shadow, src); |
@@ -123,6 +135,31 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) | |||
123 | barrier(); | 135 | barrier(); |
124 | } while (version != src->version); | 136 | } while (version != src->version); |
125 | 137 | ||
138 | if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) && | ||
139 | (shadow.flags & PVCLOCK_TSC_STABLE_BIT)) | ||
140 | return ret; | ||
141 | |||
142 | /* | ||
143 | * Assumption here is that last_value, a global accumulator, always goes | ||
144 | * forward. If we are less than that, we should not be much smaller. | ||
145 | * We assume there is an error marging we're inside, and then the correction | ||
146 | * does not sacrifice accuracy. | ||
147 | * | ||
148 | * For reads: global may have changed between test and return, | ||
149 | * but this means someone else updated poked the clock at a later time. | ||
150 | * We just need to make sure we are not seeing a backwards event. | ||
151 | * | ||
152 | * For updates: last_value = ret is not enough, since two vcpus could be | ||
153 | * updating at the same time, and one of them could be slightly behind, | ||
154 | * making the assumption that last_value always go forward fail to hold. | ||
155 | */ | ||
156 | last = atomic64_read(&last_value); | ||
157 | do { | ||
158 | if (ret < last) | ||
159 | return last; | ||
160 | last = atomic64_cmpxchg(&last_value, last, ret); | ||
161 | } while (unlikely(last != ret)); | ||
162 | |||
126 | return ret; | 163 | return ret; |
127 | } | 164 | } |
128 | 165 | ||
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index cc2c60474fd0..c2f1b26141e2 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c | |||
@@ -46,6 +46,7 @@ | |||
46 | 46 | ||
47 | /* Global pointer to shared data; NULL means no measured launch. */ | 47 | /* Global pointer to shared data; NULL means no measured launch. */ |
48 | struct tboot *tboot __read_mostly; | 48 | struct tboot *tboot __read_mostly; |
49 | EXPORT_SYMBOL(tboot); | ||
49 | 50 | ||
50 | /* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */ | 51 | /* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */ |
51 | #define AP_WAIT_TIMEOUT 1 | 52 | #define AP_WAIT_TIMEOUT 1 |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 4dade6ac0827..5ac0bb465ed6 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <asm/kvm_emulate.h> | 33 | #include <asm/kvm_emulate.h> |
34 | 34 | ||
35 | #include "x86.h" | 35 | #include "x86.h" |
36 | #include "tss.h" | ||
36 | 37 | ||
37 | /* | 38 | /* |
38 | * Opcode effective-address decode tables. | 39 | * Opcode effective-address decode tables. |
@@ -50,6 +51,8 @@ | |||
50 | #define DstReg (2<<1) /* Register operand. */ | 51 | #define DstReg (2<<1) /* Register operand. */ |
51 | #define DstMem (3<<1) /* Memory operand. */ | 52 | #define DstMem (3<<1) /* Memory operand. */ |
52 | #define DstAcc (4<<1) /* Destination Accumulator */ | 53 | #define DstAcc (4<<1) /* Destination Accumulator */ |
54 | #define DstDI (5<<1) /* Destination is in ES:(E)DI */ | ||
55 | #define DstMem64 (6<<1) /* 64bit memory operand */ | ||
53 | #define DstMask (7<<1) | 56 | #define DstMask (7<<1) |
54 | /* Source operand type. */ | 57 | /* Source operand type. */ |
55 | #define SrcNone (0<<4) /* No source operand. */ | 58 | #define SrcNone (0<<4) /* No source operand. */ |
@@ -63,6 +66,7 @@ | |||
63 | #define SrcOne (7<<4) /* Implied '1' */ | 66 | #define SrcOne (7<<4) /* Implied '1' */ |
64 | #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */ | 67 | #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */ |
65 | #define SrcImmU (9<<4) /* Immediate operand, unsigned */ | 68 | #define SrcImmU (9<<4) /* Immediate operand, unsigned */ |
69 | #define SrcSI (0xa<<4) /* Source is in the DS:RSI */ | ||
66 | #define SrcMask (0xf<<4) | 70 | #define SrcMask (0xf<<4) |
67 | /* Generic ModRM decode. */ | 71 | /* Generic ModRM decode. */ |
68 | #define ModRM (1<<8) | 72 | #define ModRM (1<<8) |
@@ -85,6 +89,9 @@ | |||
85 | #define Src2ImmByte (2<<29) | 89 | #define Src2ImmByte (2<<29) |
86 | #define Src2One (3<<29) | 90 | #define Src2One (3<<29) |
87 | #define Src2Imm16 (4<<29) | 91 | #define Src2Imm16 (4<<29) |
92 | #define Src2Mem16 (5<<29) /* Used for Ep encoding. First argument has to be | ||
93 | in memory and second argument is located | ||
94 | immediately after the first one in memory. */ | ||
88 | #define Src2Mask (7<<29) | 95 | #define Src2Mask (7<<29) |
89 | 96 | ||
90 | enum { | 97 | enum { |
@@ -147,8 +154,8 @@ static u32 opcode_table[256] = { | |||
147 | 0, 0, 0, 0, | 154 | 0, 0, 0, 0, |
148 | /* 0x68 - 0x6F */ | 155 | /* 0x68 - 0x6F */ |
149 | SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0, | 156 | SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0, |
150 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */ | 157 | DstDI | ByteOp | Mov | String, DstDI | Mov | String, /* insb, insw/insd */ |
151 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */ | 158 | SrcSI | ByteOp | ImplicitOps | String, SrcSI | ImplicitOps | String, /* outsb, outsw/outsd */ |
152 | /* 0x70 - 0x77 */ | 159 | /* 0x70 - 0x77 */ |
153 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, | 160 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, |
154 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, | 161 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, |
@@ -173,12 +180,12 @@ static u32 opcode_table[256] = { | |||
173 | /* 0xA0 - 0xA7 */ | 180 | /* 0xA0 - 0xA7 */ |
174 | ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, | 181 | ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, |
175 | ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs, | 182 | ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs, |
176 | ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, | 183 | ByteOp | SrcSI | DstDI | Mov | String, SrcSI | DstDI | Mov | String, |
177 | ByteOp | ImplicitOps | String, ImplicitOps | String, | 184 | ByteOp | SrcSI | DstDI | String, SrcSI | DstDI | String, |
178 | /* 0xA8 - 0xAF */ | 185 | /* 0xA8 - 0xAF */ |
179 | 0, 0, ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, | 186 | 0, 0, ByteOp | DstDI | Mov | String, DstDI | Mov | String, |
180 | ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, | 187 | ByteOp | SrcSI | DstAcc | Mov | String, SrcSI | DstAcc | Mov | String, |
181 | ByteOp | ImplicitOps | String, ImplicitOps | String, | 188 | ByteOp | DstDI | String, DstDI | String, |
182 | /* 0xB0 - 0xB7 */ | 189 | /* 0xB0 - 0xB7 */ |
183 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, | 190 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, |
184 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, | 191 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, |
@@ -204,13 +211,13 @@ static u32 opcode_table[256] = { | |||
204 | 0, 0, 0, 0, 0, 0, 0, 0, | 211 | 0, 0, 0, 0, 0, 0, 0, 0, |
205 | /* 0xE0 - 0xE7 */ | 212 | /* 0xE0 - 0xE7 */ |
206 | 0, 0, 0, 0, | 213 | 0, 0, 0, 0, |
207 | ByteOp | SrcImmUByte, SrcImmUByte, | 214 | ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc, |
208 | ByteOp | SrcImmUByte, SrcImmUByte, | 215 | ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc, |
209 | /* 0xE8 - 0xEF */ | 216 | /* 0xE8 - 0xEF */ |
210 | SrcImm | Stack, SrcImm | ImplicitOps, | 217 | SrcImm | Stack, SrcImm | ImplicitOps, |
211 | SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps, | 218 | SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps, |
212 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | 219 | SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, |
213 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | 220 | SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, |
214 | /* 0xF0 - 0xF7 */ | 221 | /* 0xF0 - 0xF7 */ |
215 | 0, 0, 0, 0, | 222 | 0, 0, 0, 0, |
216 | ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3, | 223 | ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3, |
@@ -343,7 +350,8 @@ static u32 group_table[] = { | |||
343 | [Group5*8] = | 350 | [Group5*8] = |
344 | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, | 351 | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, |
345 | SrcMem | ModRM | Stack, 0, | 352 | SrcMem | ModRM | Stack, 0, |
346 | SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0, | 353 | SrcMem | ModRM | Stack, SrcMem | ModRM | Src2Mem16 | ImplicitOps, |
354 | SrcMem | ModRM | Stack, 0, | ||
347 | [Group7*8] = | 355 | [Group7*8] = |
348 | 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv, | 356 | 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv, |
349 | SrcNone | ModRM | DstMem | Mov, 0, | 357 | SrcNone | ModRM | DstMem | Mov, 0, |
@@ -353,14 +361,14 @@ static u32 group_table[] = { | |||
353 | DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock, | 361 | DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock, |
354 | DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock, | 362 | DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock, |
355 | [Group9*8] = | 363 | [Group9*8] = |
356 | 0, ImplicitOps | ModRM | Lock, 0, 0, 0, 0, 0, 0, | 364 | 0, DstMem64 | ModRM | Lock, 0, 0, 0, 0, 0, 0, |
357 | }; | 365 | }; |
358 | 366 | ||
359 | static u32 group2_table[] = { | 367 | static u32 group2_table[] = { |
360 | [Group7*8] = | 368 | [Group7*8] = |
361 | SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM, | 369 | SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM | Priv, |
362 | SrcNone | ModRM | DstMem | Mov, 0, | 370 | SrcNone | ModRM | DstMem | Mov, 0, |
363 | SrcMem16 | ModRM | Mov, 0, | 371 | SrcMem16 | ModRM | Mov | Priv, 0, |
364 | [Group9*8] = | 372 | [Group9*8] = |
365 | 0, 0, 0, 0, 0, 0, 0, 0, | 373 | 0, 0, 0, 0, 0, 0, 0, 0, |
366 | }; | 374 | }; |
@@ -562,7 +570,7 @@ static u32 group2_table[] = { | |||
562 | #define insn_fetch(_type, _size, _eip) \ | 570 | #define insn_fetch(_type, _size, _eip) \ |
563 | ({ unsigned long _x; \ | 571 | ({ unsigned long _x; \ |
564 | rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size)); \ | 572 | rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size)); \ |
565 | if (rc != 0) \ | 573 | if (rc != X86EMUL_CONTINUE) \ |
566 | goto done; \ | 574 | goto done; \ |
567 | (_eip) += (_size); \ | 575 | (_eip) += (_size); \ |
568 | (_type)_x; \ | 576 | (_type)_x; \ |
@@ -638,40 +646,40 @@ static unsigned long ss_base(struct x86_emulate_ctxt *ctxt) | |||
638 | 646 | ||
639 | static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, | 647 | static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, |
640 | struct x86_emulate_ops *ops, | 648 | struct x86_emulate_ops *ops, |
641 | unsigned long linear, u8 *dest) | 649 | unsigned long eip, u8 *dest) |
642 | { | 650 | { |
643 | struct fetch_cache *fc = &ctxt->decode.fetch; | 651 | struct fetch_cache *fc = &ctxt->decode.fetch; |
644 | int rc; | 652 | int rc; |
645 | int size; | 653 | int size, cur_size; |
646 | 654 | ||
647 | if (linear < fc->start || linear >= fc->end) { | 655 | if (eip == fc->end) { |
648 | size = min(15UL, PAGE_SIZE - offset_in_page(linear)); | 656 | cur_size = fc->end - fc->start; |
649 | rc = ops->fetch(linear, fc->data, size, ctxt->vcpu, NULL); | 657 | size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip)); |
650 | if (rc) | 658 | rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size, |
659 | size, ctxt->vcpu, NULL); | ||
660 | if (rc != X86EMUL_CONTINUE) | ||
651 | return rc; | 661 | return rc; |
652 | fc->start = linear; | 662 | fc->end += size; |
653 | fc->end = linear + size; | ||
654 | } | 663 | } |
655 | *dest = fc->data[linear - fc->start]; | 664 | *dest = fc->data[eip - fc->start]; |
656 | return 0; | 665 | return X86EMUL_CONTINUE; |
657 | } | 666 | } |
658 | 667 | ||
659 | static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, | 668 | static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, |
660 | struct x86_emulate_ops *ops, | 669 | struct x86_emulate_ops *ops, |
661 | unsigned long eip, void *dest, unsigned size) | 670 | unsigned long eip, void *dest, unsigned size) |
662 | { | 671 | { |
663 | int rc = 0; | 672 | int rc; |
664 | 673 | ||
665 | /* x86 instructions are limited to 15 bytes. */ | 674 | /* x86 instructions are limited to 15 bytes. */ |
666 | if (eip + size - ctxt->decode.eip_orig > 15) | 675 | if (eip + size - ctxt->eip > 15) |
667 | return X86EMUL_UNHANDLEABLE; | 676 | return X86EMUL_UNHANDLEABLE; |
668 | eip += ctxt->cs_base; | ||
669 | while (size--) { | 677 | while (size--) { |
670 | rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++); | 678 | rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++); |
671 | if (rc) | 679 | if (rc != X86EMUL_CONTINUE) |
672 | return rc; | 680 | return rc; |
673 | } | 681 | } |
674 | return 0; | 682 | return X86EMUL_CONTINUE; |
675 | } | 683 | } |
676 | 684 | ||
677 | /* | 685 | /* |
@@ -702,7 +710,7 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, | |||
702 | *address = 0; | 710 | *address = 0; |
703 | rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, | 711 | rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, |
704 | ctxt->vcpu, NULL); | 712 | ctxt->vcpu, NULL); |
705 | if (rc) | 713 | if (rc != X86EMUL_CONTINUE) |
706 | return rc; | 714 | return rc; |
707 | rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, | 715 | rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, |
708 | ctxt->vcpu, NULL); | 716 | ctxt->vcpu, NULL); |
@@ -782,7 +790,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
782 | struct decode_cache *c = &ctxt->decode; | 790 | struct decode_cache *c = &ctxt->decode; |
783 | u8 sib; | 791 | u8 sib; |
784 | int index_reg = 0, base_reg = 0, scale; | 792 | int index_reg = 0, base_reg = 0, scale; |
785 | int rc = 0; | 793 | int rc = X86EMUL_CONTINUE; |
786 | 794 | ||
787 | if (c->rex_prefix) { | 795 | if (c->rex_prefix) { |
788 | c->modrm_reg = (c->rex_prefix & 4) << 1; /* REX.R */ | 796 | c->modrm_reg = (c->rex_prefix & 4) << 1; /* REX.R */ |
@@ -895,7 +903,7 @@ static int decode_abs(struct x86_emulate_ctxt *ctxt, | |||
895 | struct x86_emulate_ops *ops) | 903 | struct x86_emulate_ops *ops) |
896 | { | 904 | { |
897 | struct decode_cache *c = &ctxt->decode; | 905 | struct decode_cache *c = &ctxt->decode; |
898 | int rc = 0; | 906 | int rc = X86EMUL_CONTINUE; |
899 | 907 | ||
900 | switch (c->ad_bytes) { | 908 | switch (c->ad_bytes) { |
901 | case 2: | 909 | case 2: |
@@ -916,14 +924,18 @@ int | |||
916 | x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | 924 | x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) |
917 | { | 925 | { |
918 | struct decode_cache *c = &ctxt->decode; | 926 | struct decode_cache *c = &ctxt->decode; |
919 | int rc = 0; | 927 | int rc = X86EMUL_CONTINUE; |
920 | int mode = ctxt->mode; | 928 | int mode = ctxt->mode; |
921 | int def_op_bytes, def_ad_bytes, group; | 929 | int def_op_bytes, def_ad_bytes, group; |
922 | 930 | ||
923 | /* Shadow copy of register state. Committed on successful emulation. */ | ||
924 | 931 | ||
932 | /* we cannot decode insn before we complete previous rep insn */ | ||
933 | WARN_ON(ctxt->restart); | ||
934 | |||
935 | /* Shadow copy of register state. Committed on successful emulation. */ | ||
925 | memset(c, 0, sizeof(struct decode_cache)); | 936 | memset(c, 0, sizeof(struct decode_cache)); |
926 | c->eip = c->eip_orig = kvm_rip_read(ctxt->vcpu); | 937 | c->eip = ctxt->eip; |
938 | c->fetch.start = c->fetch.end = c->eip; | ||
927 | ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); | 939 | ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); |
928 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | 940 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); |
929 | 941 | ||
@@ -1015,11 +1027,6 @@ done_prefixes: | |||
1015 | } | 1027 | } |
1016 | } | 1028 | } |
1017 | 1029 | ||
1018 | if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { | ||
1019 | kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction"); | ||
1020 | return -1; | ||
1021 | } | ||
1022 | |||
1023 | if (c->d & Group) { | 1030 | if (c->d & Group) { |
1024 | group = c->d & GroupMask; | 1031 | group = c->d & GroupMask; |
1025 | c->modrm = insn_fetch(u8, 1, c->eip); | 1032 | c->modrm = insn_fetch(u8, 1, c->eip); |
@@ -1046,7 +1053,7 @@ done_prefixes: | |||
1046 | rc = decode_modrm(ctxt, ops); | 1053 | rc = decode_modrm(ctxt, ops); |
1047 | else if (c->d & MemAbs) | 1054 | else if (c->d & MemAbs) |
1048 | rc = decode_abs(ctxt, ops); | 1055 | rc = decode_abs(ctxt, ops); |
1049 | if (rc) | 1056 | if (rc != X86EMUL_CONTINUE) |
1050 | goto done; | 1057 | goto done; |
1051 | 1058 | ||
1052 | if (!c->has_seg_override) | 1059 | if (!c->has_seg_override) |
@@ -1057,6 +1064,10 @@ done_prefixes: | |||
1057 | 1064 | ||
1058 | if (c->ad_bytes != 8) | 1065 | if (c->ad_bytes != 8) |
1059 | c->modrm_ea = (u32)c->modrm_ea; | 1066 | c->modrm_ea = (u32)c->modrm_ea; |
1067 | |||
1068 | if (c->rip_relative) | ||
1069 | c->modrm_ea += c->eip; | ||
1070 | |||
1060 | /* | 1071 | /* |
1061 | * Decode and fetch the source operand: register, memory | 1072 | * Decode and fetch the source operand: register, memory |
1062 | * or immediate. | 1073 | * or immediate. |
@@ -1091,6 +1102,8 @@ done_prefixes: | |||
1091 | break; | 1102 | break; |
1092 | } | 1103 | } |
1093 | c->src.type = OP_MEM; | 1104 | c->src.type = OP_MEM; |
1105 | c->src.ptr = (unsigned long *)c->modrm_ea; | ||
1106 | c->src.val = 0; | ||
1094 | break; | 1107 | break; |
1095 | case SrcImm: | 1108 | case SrcImm: |
1096 | case SrcImmU: | 1109 | case SrcImmU: |
@@ -1139,6 +1152,14 @@ done_prefixes: | |||
1139 | c->src.bytes = 1; | 1152 | c->src.bytes = 1; |
1140 | c->src.val = 1; | 1153 | c->src.val = 1; |
1141 | break; | 1154 | break; |
1155 | case SrcSI: | ||
1156 | c->src.type = OP_MEM; | ||
1157 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
1158 | c->src.ptr = (unsigned long *) | ||
1159 | register_address(c, seg_override_base(ctxt, c), | ||
1160 | c->regs[VCPU_REGS_RSI]); | ||
1161 | c->src.val = 0; | ||
1162 | break; | ||
1142 | } | 1163 | } |
1143 | 1164 | ||
1144 | /* | 1165 | /* |
@@ -1168,6 +1189,12 @@ done_prefixes: | |||
1168 | c->src2.bytes = 1; | 1189 | c->src2.bytes = 1; |
1169 | c->src2.val = 1; | 1190 | c->src2.val = 1; |
1170 | break; | 1191 | break; |
1192 | case Src2Mem16: | ||
1193 | c->src2.type = OP_MEM; | ||
1194 | c->src2.bytes = 2; | ||
1195 | c->src2.ptr = (unsigned long *)(c->modrm_ea + c->src.bytes); | ||
1196 | c->src2.val = 0; | ||
1197 | break; | ||
1171 | } | 1198 | } |
1172 | 1199 | ||
1173 | /* Decode and fetch the destination operand: register or memory. */ | 1200 | /* Decode and fetch the destination operand: register or memory. */ |
@@ -1180,6 +1207,7 @@ done_prefixes: | |||
1180 | c->twobyte && (c->b == 0xb6 || c->b == 0xb7)); | 1207 | c->twobyte && (c->b == 0xb6 || c->b == 0xb7)); |
1181 | break; | 1208 | break; |
1182 | case DstMem: | 1209 | case DstMem: |
1210 | case DstMem64: | ||
1183 | if ((c->d & ModRM) && c->modrm_mod == 3) { | 1211 | if ((c->d & ModRM) && c->modrm_mod == 3) { |
1184 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 1212 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
1185 | c->dst.type = OP_REG; | 1213 | c->dst.type = OP_REG; |
@@ -1188,12 +1216,24 @@ done_prefixes: | |||
1188 | break; | 1216 | break; |
1189 | } | 1217 | } |
1190 | c->dst.type = OP_MEM; | 1218 | c->dst.type = OP_MEM; |
1219 | c->dst.ptr = (unsigned long *)c->modrm_ea; | ||
1220 | if ((c->d & DstMask) == DstMem64) | ||
1221 | c->dst.bytes = 8; | ||
1222 | else | ||
1223 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
1224 | c->dst.val = 0; | ||
1225 | if (c->d & BitOp) { | ||
1226 | unsigned long mask = ~(c->dst.bytes * 8 - 1); | ||
1227 | |||
1228 | c->dst.ptr = (void *)c->dst.ptr + | ||
1229 | (c->src.val & mask) / 8; | ||
1230 | } | ||
1191 | break; | 1231 | break; |
1192 | case DstAcc: | 1232 | case DstAcc: |
1193 | c->dst.type = OP_REG; | 1233 | c->dst.type = OP_REG; |
1194 | c->dst.bytes = c->op_bytes; | 1234 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
1195 | c->dst.ptr = &c->regs[VCPU_REGS_RAX]; | 1235 | c->dst.ptr = &c->regs[VCPU_REGS_RAX]; |
1196 | switch (c->op_bytes) { | 1236 | switch (c->dst.bytes) { |
1197 | case 1: | 1237 | case 1: |
1198 | c->dst.val = *(u8 *)c->dst.ptr; | 1238 | c->dst.val = *(u8 *)c->dst.ptr; |
1199 | break; | 1239 | break; |
@@ -1203,18 +1243,248 @@ done_prefixes: | |||
1203 | case 4: | 1243 | case 4: |
1204 | c->dst.val = *(u32 *)c->dst.ptr; | 1244 | c->dst.val = *(u32 *)c->dst.ptr; |
1205 | break; | 1245 | break; |
1246 | case 8: | ||
1247 | c->dst.val = *(u64 *)c->dst.ptr; | ||
1248 | break; | ||
1206 | } | 1249 | } |
1207 | c->dst.orig_val = c->dst.val; | 1250 | c->dst.orig_val = c->dst.val; |
1208 | break; | 1251 | break; |
1252 | case DstDI: | ||
1253 | c->dst.type = OP_MEM; | ||
1254 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
1255 | c->dst.ptr = (unsigned long *) | ||
1256 | register_address(c, es_base(ctxt), | ||
1257 | c->regs[VCPU_REGS_RDI]); | ||
1258 | c->dst.val = 0; | ||
1259 | break; | ||
1209 | } | 1260 | } |
1210 | 1261 | ||
1211 | if (c->rip_relative) | ||
1212 | c->modrm_ea += c->eip; | ||
1213 | |||
1214 | done: | 1262 | done: |
1215 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; | 1263 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; |
1216 | } | 1264 | } |
1217 | 1265 | ||
1266 | static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, | ||
1267 | struct x86_emulate_ops *ops, | ||
1268 | unsigned int size, unsigned short port, | ||
1269 | void *dest) | ||
1270 | { | ||
1271 | struct read_cache *rc = &ctxt->decode.io_read; | ||
1272 | |||
1273 | if (rc->pos == rc->end) { /* refill pio read ahead */ | ||
1274 | struct decode_cache *c = &ctxt->decode; | ||
1275 | unsigned int in_page, n; | ||
1276 | unsigned int count = c->rep_prefix ? | ||
1277 | address_mask(c, c->regs[VCPU_REGS_RCX]) : 1; | ||
1278 | in_page = (ctxt->eflags & EFLG_DF) ? | ||
1279 | offset_in_page(c->regs[VCPU_REGS_RDI]) : | ||
1280 | PAGE_SIZE - offset_in_page(c->regs[VCPU_REGS_RDI]); | ||
1281 | n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size, | ||
1282 | count); | ||
1283 | if (n == 0) | ||
1284 | n = 1; | ||
1285 | rc->pos = rc->end = 0; | ||
1286 | if (!ops->pio_in_emulated(size, port, rc->data, n, ctxt->vcpu)) | ||
1287 | return 0; | ||
1288 | rc->end = n * size; | ||
1289 | } | ||
1290 | |||
1291 | memcpy(dest, rc->data + rc->pos, size); | ||
1292 | rc->pos += size; | ||
1293 | return 1; | ||
1294 | } | ||
1295 | |||
1296 | static u32 desc_limit_scaled(struct desc_struct *desc) | ||
1297 | { | ||
1298 | u32 limit = get_desc_limit(desc); | ||
1299 | |||
1300 | return desc->g ? (limit << 12) | 0xfff : limit; | ||
1301 | } | ||
1302 | |||
1303 | static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, | ||
1304 | struct x86_emulate_ops *ops, | ||
1305 | u16 selector, struct desc_ptr *dt) | ||
1306 | { | ||
1307 | if (selector & 1 << 2) { | ||
1308 | struct desc_struct desc; | ||
1309 | memset (dt, 0, sizeof *dt); | ||
1310 | if (!ops->get_cached_descriptor(&desc, VCPU_SREG_LDTR, ctxt->vcpu)) | ||
1311 | return; | ||
1312 | |||
1313 | dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ | ||
1314 | dt->address = get_desc_base(&desc); | ||
1315 | } else | ||
1316 | ops->get_gdt(dt, ctxt->vcpu); | ||
1317 | } | ||
1318 | |||
1319 | /* allowed just for 8 bytes segments */ | ||
1320 | static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, | ||
1321 | struct x86_emulate_ops *ops, | ||
1322 | u16 selector, struct desc_struct *desc) | ||
1323 | { | ||
1324 | struct desc_ptr dt; | ||
1325 | u16 index = selector >> 3; | ||
1326 | int ret; | ||
1327 | u32 err; | ||
1328 | ulong addr; | ||
1329 | |||
1330 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); | ||
1331 | |||
1332 | if (dt.size < index * 8 + 7) { | ||
1333 | kvm_inject_gp(ctxt->vcpu, selector & 0xfffc); | ||
1334 | return X86EMUL_PROPAGATE_FAULT; | ||
1335 | } | ||
1336 | addr = dt.address + index * 8; | ||
1337 | ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); | ||
1338 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
1339 | kvm_inject_page_fault(ctxt->vcpu, addr, err); | ||
1340 | |||
1341 | return ret; | ||
1342 | } | ||
1343 | |||
1344 | /* allowed just for 8 bytes segments */ | ||
1345 | static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, | ||
1346 | struct x86_emulate_ops *ops, | ||
1347 | u16 selector, struct desc_struct *desc) | ||
1348 | { | ||
1349 | struct desc_ptr dt; | ||
1350 | u16 index = selector >> 3; | ||
1351 | u32 err; | ||
1352 | ulong addr; | ||
1353 | int ret; | ||
1354 | |||
1355 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); | ||
1356 | |||
1357 | if (dt.size < index * 8 + 7) { | ||
1358 | kvm_inject_gp(ctxt->vcpu, selector & 0xfffc); | ||
1359 | return X86EMUL_PROPAGATE_FAULT; | ||
1360 | } | ||
1361 | |||
1362 | addr = dt.address + index * 8; | ||
1363 | ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); | ||
1364 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
1365 | kvm_inject_page_fault(ctxt->vcpu, addr, err); | ||
1366 | |||
1367 | return ret; | ||
1368 | } | ||
1369 | |||
1370 | static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | ||
1371 | struct x86_emulate_ops *ops, | ||
1372 | u16 selector, int seg) | ||
1373 | { | ||
1374 | struct desc_struct seg_desc; | ||
1375 | u8 dpl, rpl, cpl; | ||
1376 | unsigned err_vec = GP_VECTOR; | ||
1377 | u32 err_code = 0; | ||
1378 | bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ | ||
1379 | int ret; | ||
1380 | |||
1381 | memset(&seg_desc, 0, sizeof seg_desc); | ||
1382 | |||
1383 | if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) | ||
1384 | || ctxt->mode == X86EMUL_MODE_REAL) { | ||
1385 | /* set real mode segment descriptor */ | ||
1386 | set_desc_base(&seg_desc, selector << 4); | ||
1387 | set_desc_limit(&seg_desc, 0xffff); | ||
1388 | seg_desc.type = 3; | ||
1389 | seg_desc.p = 1; | ||
1390 | seg_desc.s = 1; | ||
1391 | goto load; | ||
1392 | } | ||
1393 | |||
1394 | /* NULL selector is not valid for TR, CS and SS */ | ||
1395 | if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR) | ||
1396 | && null_selector) | ||
1397 | goto exception; | ||
1398 | |||
1399 | /* TR should be in GDT only */ | ||
1400 | if (seg == VCPU_SREG_TR && (selector & (1 << 2))) | ||
1401 | goto exception; | ||
1402 | |||
1403 | if (null_selector) /* for NULL selector skip all following checks */ | ||
1404 | goto load; | ||
1405 | |||
1406 | ret = read_segment_descriptor(ctxt, ops, selector, &seg_desc); | ||
1407 | if (ret != X86EMUL_CONTINUE) | ||
1408 | return ret; | ||
1409 | |||
1410 | err_code = selector & 0xfffc; | ||
1411 | err_vec = GP_VECTOR; | ||
1412 | |||
1413 | /* can't load system descriptor into segment selecor */ | ||
1414 | if (seg <= VCPU_SREG_GS && !seg_desc.s) | ||
1415 | goto exception; | ||
1416 | |||
1417 | if (!seg_desc.p) { | ||
1418 | err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; | ||
1419 | goto exception; | ||
1420 | } | ||
1421 | |||
1422 | rpl = selector & 3; | ||
1423 | dpl = seg_desc.dpl; | ||
1424 | cpl = ops->cpl(ctxt->vcpu); | ||
1425 | |||
1426 | switch (seg) { | ||
1427 | case VCPU_SREG_SS: | ||
1428 | /* | ||
1429 | * segment is not a writable data segment or segment | ||
1430 | * selector's RPL != CPL or segment selector's RPL != CPL | ||
1431 | */ | ||
1432 | if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl) | ||
1433 | goto exception; | ||
1434 | break; | ||
1435 | case VCPU_SREG_CS: | ||
1436 | if (!(seg_desc.type & 8)) | ||
1437 | goto exception; | ||
1438 | |||
1439 | if (seg_desc.type & 4) { | ||
1440 | /* conforming */ | ||
1441 | if (dpl > cpl) | ||
1442 | goto exception; | ||
1443 | } else { | ||
1444 | /* nonconforming */ | ||
1445 | if (rpl > cpl || dpl != cpl) | ||
1446 | goto exception; | ||
1447 | } | ||
1448 | /* CS(RPL) <- CPL */ | ||
1449 | selector = (selector & 0xfffc) | cpl; | ||
1450 | break; | ||
1451 | case VCPU_SREG_TR: | ||
1452 | if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9)) | ||
1453 | goto exception; | ||
1454 | break; | ||
1455 | case VCPU_SREG_LDTR: | ||
1456 | if (seg_desc.s || seg_desc.type != 2) | ||
1457 | goto exception; | ||
1458 | break; | ||
1459 | default: /* DS, ES, FS, or GS */ | ||
1460 | /* | ||
1461 | * segment is not a data or readable code segment or | ||
1462 | * ((segment is a data or nonconforming code segment) | ||
1463 | * and (both RPL and CPL > DPL)) | ||
1464 | */ | ||
1465 | if ((seg_desc.type & 0xa) == 0x8 || | ||
1466 | (((seg_desc.type & 0xc) != 0xc) && | ||
1467 | (rpl > dpl && cpl > dpl))) | ||
1468 | goto exception; | ||
1469 | break; | ||
1470 | } | ||
1471 | |||
1472 | if (seg_desc.s) { | ||
1473 | /* mark segment as accessed */ | ||
1474 | seg_desc.type |= 1; | ||
1475 | ret = write_segment_descriptor(ctxt, ops, selector, &seg_desc); | ||
1476 | if (ret != X86EMUL_CONTINUE) | ||
1477 | return ret; | ||
1478 | } | ||
1479 | load: | ||
1480 | ops->set_segment_selector(selector, seg, ctxt->vcpu); | ||
1481 | ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu); | ||
1482 | return X86EMUL_CONTINUE; | ||
1483 | exception: | ||
1484 | kvm_queue_exception_e(ctxt->vcpu, err_vec, err_code); | ||
1485 | return X86EMUL_PROPAGATE_FAULT; | ||
1486 | } | ||
1487 | |||
1218 | static inline void emulate_push(struct x86_emulate_ctxt *ctxt) | 1488 | static inline void emulate_push(struct x86_emulate_ctxt *ctxt) |
1219 | { | 1489 | { |
1220 | struct decode_cache *c = &ctxt->decode; | 1490 | struct decode_cache *c = &ctxt->decode; |
@@ -1251,7 +1521,7 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, | |||
1251 | int rc; | 1521 | int rc; |
1252 | unsigned long val, change_mask; | 1522 | unsigned long val, change_mask; |
1253 | int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; | 1523 | int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; |
1254 | int cpl = kvm_x86_ops->get_cpl(ctxt->vcpu); | 1524 | int cpl = ops->cpl(ctxt->vcpu); |
1255 | 1525 | ||
1256 | rc = emulate_pop(ctxt, ops, &val, len); | 1526 | rc = emulate_pop(ctxt, ops, &val, len); |
1257 | if (rc != X86EMUL_CONTINUE) | 1527 | if (rc != X86EMUL_CONTINUE) |
@@ -1306,10 +1576,10 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, | |||
1306 | int rc; | 1576 | int rc; |
1307 | 1577 | ||
1308 | rc = emulate_pop(ctxt, ops, &selector, c->op_bytes); | 1578 | rc = emulate_pop(ctxt, ops, &selector, c->op_bytes); |
1309 | if (rc != 0) | 1579 | if (rc != X86EMUL_CONTINUE) |
1310 | return rc; | 1580 | return rc; |
1311 | 1581 | ||
1312 | rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, seg); | 1582 | rc = load_segment_descriptor(ctxt, ops, (u16)selector, seg); |
1313 | return rc; | 1583 | return rc; |
1314 | } | 1584 | } |
1315 | 1585 | ||
@@ -1332,7 +1602,7 @@ static int emulate_popa(struct x86_emulate_ctxt *ctxt, | |||
1332 | struct x86_emulate_ops *ops) | 1602 | struct x86_emulate_ops *ops) |
1333 | { | 1603 | { |
1334 | struct decode_cache *c = &ctxt->decode; | 1604 | struct decode_cache *c = &ctxt->decode; |
1335 | int rc = 0; | 1605 | int rc = X86EMUL_CONTINUE; |
1336 | int reg = VCPU_REGS_RDI; | 1606 | int reg = VCPU_REGS_RDI; |
1337 | 1607 | ||
1338 | while (reg >= VCPU_REGS_RAX) { | 1608 | while (reg >= VCPU_REGS_RAX) { |
@@ -1343,7 +1613,7 @@ static int emulate_popa(struct x86_emulate_ctxt *ctxt, | |||
1343 | } | 1613 | } |
1344 | 1614 | ||
1345 | rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes); | 1615 | rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes); |
1346 | if (rc != 0) | 1616 | if (rc != X86EMUL_CONTINUE) |
1347 | break; | 1617 | break; |
1348 | --reg; | 1618 | --reg; |
1349 | } | 1619 | } |
@@ -1354,12 +1624,8 @@ static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, | |||
1354 | struct x86_emulate_ops *ops) | 1624 | struct x86_emulate_ops *ops) |
1355 | { | 1625 | { |
1356 | struct decode_cache *c = &ctxt->decode; | 1626 | struct decode_cache *c = &ctxt->decode; |
1357 | int rc; | ||
1358 | 1627 | ||
1359 | rc = emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes); | 1628 | return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes); |
1360 | if (rc != 0) | ||
1361 | return rc; | ||
1362 | return 0; | ||
1363 | } | 1629 | } |
1364 | 1630 | ||
1365 | static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) | 1631 | static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) |
@@ -1395,7 +1661,6 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, | |||
1395 | struct x86_emulate_ops *ops) | 1661 | struct x86_emulate_ops *ops) |
1396 | { | 1662 | { |
1397 | struct decode_cache *c = &ctxt->decode; | 1663 | struct decode_cache *c = &ctxt->decode; |
1398 | int rc = 0; | ||
1399 | 1664 | ||
1400 | switch (c->modrm_reg) { | 1665 | switch (c->modrm_reg) { |
1401 | case 0 ... 1: /* test */ | 1666 | case 0 ... 1: /* test */ |
@@ -1408,11 +1673,9 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, | |||
1408 | emulate_1op("neg", c->dst, ctxt->eflags); | 1673 | emulate_1op("neg", c->dst, ctxt->eflags); |
1409 | break; | 1674 | break; |
1410 | default: | 1675 | default: |
1411 | DPRINTF("Cannot emulate %02x\n", c->b); | 1676 | return 0; |
1412 | rc = X86EMUL_UNHANDLEABLE; | ||
1413 | break; | ||
1414 | } | 1677 | } |
1415 | return rc; | 1678 | return 1; |
1416 | } | 1679 | } |
1417 | 1680 | ||
1418 | static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, | 1681 | static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, |
@@ -1442,20 +1705,14 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, | |||
1442 | emulate_push(ctxt); | 1705 | emulate_push(ctxt); |
1443 | break; | 1706 | break; |
1444 | } | 1707 | } |
1445 | return 0; | 1708 | return X86EMUL_CONTINUE; |
1446 | } | 1709 | } |
1447 | 1710 | ||
1448 | static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, | 1711 | static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, |
1449 | struct x86_emulate_ops *ops, | 1712 | struct x86_emulate_ops *ops) |
1450 | unsigned long memop) | ||
1451 | { | 1713 | { |
1452 | struct decode_cache *c = &ctxt->decode; | 1714 | struct decode_cache *c = &ctxt->decode; |
1453 | u64 old, new; | 1715 | u64 old = c->dst.orig_val; |
1454 | int rc; | ||
1455 | |||
1456 | rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu); | ||
1457 | if (rc != X86EMUL_CONTINUE) | ||
1458 | return rc; | ||
1459 | 1716 | ||
1460 | if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) || | 1717 | if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) || |
1461 | ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) { | 1718 | ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) { |
@@ -1463,17 +1720,13 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, | |||
1463 | c->regs[VCPU_REGS_RAX] = (u32) (old >> 0); | 1720 | c->regs[VCPU_REGS_RAX] = (u32) (old >> 0); |
1464 | c->regs[VCPU_REGS_RDX] = (u32) (old >> 32); | 1721 | c->regs[VCPU_REGS_RDX] = (u32) (old >> 32); |
1465 | ctxt->eflags &= ~EFLG_ZF; | 1722 | ctxt->eflags &= ~EFLG_ZF; |
1466 | |||
1467 | } else { | 1723 | } else { |
1468 | new = ((u64)c->regs[VCPU_REGS_RCX] << 32) | | 1724 | c->dst.val = ((u64)c->regs[VCPU_REGS_RCX] << 32) | |
1469 | (u32) c->regs[VCPU_REGS_RBX]; | 1725 | (u32) c->regs[VCPU_REGS_RBX]; |
1470 | 1726 | ||
1471 | rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu); | ||
1472 | if (rc != X86EMUL_CONTINUE) | ||
1473 | return rc; | ||
1474 | ctxt->eflags |= EFLG_ZF; | 1727 | ctxt->eflags |= EFLG_ZF; |
1475 | } | 1728 | } |
1476 | return 0; | 1729 | return X86EMUL_CONTINUE; |
1477 | } | 1730 | } |
1478 | 1731 | ||
1479 | static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, | 1732 | static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, |
@@ -1484,14 +1737,14 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, | |||
1484 | unsigned long cs; | 1737 | unsigned long cs; |
1485 | 1738 | ||
1486 | rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes); | 1739 | rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes); |
1487 | if (rc) | 1740 | if (rc != X86EMUL_CONTINUE) |
1488 | return rc; | 1741 | return rc; |
1489 | if (c->op_bytes == 4) | 1742 | if (c->op_bytes == 4) |
1490 | c->eip = (u32)c->eip; | 1743 | c->eip = (u32)c->eip; |
1491 | rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); | 1744 | rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); |
1492 | if (rc) | 1745 | if (rc != X86EMUL_CONTINUE) |
1493 | return rc; | 1746 | return rc; |
1494 | rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, VCPU_SREG_CS); | 1747 | rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS); |
1495 | return rc; | 1748 | return rc; |
1496 | } | 1749 | } |
1497 | 1750 | ||
@@ -1544,7 +1797,7 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, | |||
1544 | default: | 1797 | default: |
1545 | break; | 1798 | break; |
1546 | } | 1799 | } |
1547 | return 0; | 1800 | return X86EMUL_CONTINUE; |
1548 | } | 1801 | } |
1549 | 1802 | ||
1550 | static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) | 1803 | static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) |
@@ -1598,8 +1851,11 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt) | |||
1598 | u64 msr_data; | 1851 | u64 msr_data; |
1599 | 1852 | ||
1600 | /* syscall is not available in real mode */ | 1853 | /* syscall is not available in real mode */ |
1601 | if (ctxt->mode == X86EMUL_MODE_REAL || ctxt->mode == X86EMUL_MODE_VM86) | 1854 | if (ctxt->mode == X86EMUL_MODE_REAL || |
1602 | return X86EMUL_UNHANDLEABLE; | 1855 | ctxt->mode == X86EMUL_MODE_VM86) { |
1856 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | ||
1857 | return X86EMUL_PROPAGATE_FAULT; | ||
1858 | } | ||
1603 | 1859 | ||
1604 | setup_syscalls_segments(ctxt, &cs, &ss); | 1860 | setup_syscalls_segments(ctxt, &cs, &ss); |
1605 | 1861 | ||
@@ -1649,14 +1905,16 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt) | |||
1649 | /* inject #GP if in real mode */ | 1905 | /* inject #GP if in real mode */ |
1650 | if (ctxt->mode == X86EMUL_MODE_REAL) { | 1906 | if (ctxt->mode == X86EMUL_MODE_REAL) { |
1651 | kvm_inject_gp(ctxt->vcpu, 0); | 1907 | kvm_inject_gp(ctxt->vcpu, 0); |
1652 | return X86EMUL_UNHANDLEABLE; | 1908 | return X86EMUL_PROPAGATE_FAULT; |
1653 | } | 1909 | } |
1654 | 1910 | ||
1655 | /* XXX sysenter/sysexit have not been tested in 64bit mode. | 1911 | /* XXX sysenter/sysexit have not been tested in 64bit mode. |
1656 | * Therefore, we inject an #UD. | 1912 | * Therefore, we inject an #UD. |
1657 | */ | 1913 | */ |
1658 | if (ctxt->mode == X86EMUL_MODE_PROT64) | 1914 | if (ctxt->mode == X86EMUL_MODE_PROT64) { |
1659 | return X86EMUL_UNHANDLEABLE; | 1915 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
1916 | return X86EMUL_PROPAGATE_FAULT; | ||
1917 | } | ||
1660 | 1918 | ||
1661 | setup_syscalls_segments(ctxt, &cs, &ss); | 1919 | setup_syscalls_segments(ctxt, &cs, &ss); |
1662 | 1920 | ||
@@ -1711,7 +1969,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt) | |||
1711 | if (ctxt->mode == X86EMUL_MODE_REAL || | 1969 | if (ctxt->mode == X86EMUL_MODE_REAL || |
1712 | ctxt->mode == X86EMUL_MODE_VM86) { | 1970 | ctxt->mode == X86EMUL_MODE_VM86) { |
1713 | kvm_inject_gp(ctxt->vcpu, 0); | 1971 | kvm_inject_gp(ctxt->vcpu, 0); |
1714 | return X86EMUL_UNHANDLEABLE; | 1972 | return X86EMUL_PROPAGATE_FAULT; |
1715 | } | 1973 | } |
1716 | 1974 | ||
1717 | setup_syscalls_segments(ctxt, &cs, &ss); | 1975 | setup_syscalls_segments(ctxt, &cs, &ss); |
@@ -1756,7 +2014,8 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt) | |||
1756 | return X86EMUL_CONTINUE; | 2014 | return X86EMUL_CONTINUE; |
1757 | } | 2015 | } |
1758 | 2016 | ||
1759 | static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) | 2017 | static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt, |
2018 | struct x86_emulate_ops *ops) | ||
1760 | { | 2019 | { |
1761 | int iopl; | 2020 | int iopl; |
1762 | if (ctxt->mode == X86EMUL_MODE_REAL) | 2021 | if (ctxt->mode == X86EMUL_MODE_REAL) |
@@ -1764,7 +2023,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) | |||
1764 | if (ctxt->mode == X86EMUL_MODE_VM86) | 2023 | if (ctxt->mode == X86EMUL_MODE_VM86) |
1765 | return true; | 2024 | return true; |
1766 | iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; | 2025 | iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; |
1767 | return kvm_x86_ops->get_cpl(ctxt->vcpu) > iopl; | 2026 | return ops->cpl(ctxt->vcpu) > iopl; |
1768 | } | 2027 | } |
1769 | 2028 | ||
1770 | static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, | 2029 | static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, |
@@ -1801,22 +2060,419 @@ static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt, | |||
1801 | struct x86_emulate_ops *ops, | 2060 | struct x86_emulate_ops *ops, |
1802 | u16 port, u16 len) | 2061 | u16 port, u16 len) |
1803 | { | 2062 | { |
1804 | if (emulator_bad_iopl(ctxt)) | 2063 | if (emulator_bad_iopl(ctxt, ops)) |
1805 | if (!emulator_io_port_access_allowed(ctxt, ops, port, len)) | 2064 | if (!emulator_io_port_access_allowed(ctxt, ops, port, len)) |
1806 | return false; | 2065 | return false; |
1807 | return true; | 2066 | return true; |
1808 | } | 2067 | } |
1809 | 2068 | ||
2069 | static u32 get_cached_descriptor_base(struct x86_emulate_ctxt *ctxt, | ||
2070 | struct x86_emulate_ops *ops, | ||
2071 | int seg) | ||
2072 | { | ||
2073 | struct desc_struct desc; | ||
2074 | if (ops->get_cached_descriptor(&desc, seg, ctxt->vcpu)) | ||
2075 | return get_desc_base(&desc); | ||
2076 | else | ||
2077 | return ~0; | ||
2078 | } | ||
2079 | |||
2080 | static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, | ||
2081 | struct x86_emulate_ops *ops, | ||
2082 | struct tss_segment_16 *tss) | ||
2083 | { | ||
2084 | struct decode_cache *c = &ctxt->decode; | ||
2085 | |||
2086 | tss->ip = c->eip; | ||
2087 | tss->flag = ctxt->eflags; | ||
2088 | tss->ax = c->regs[VCPU_REGS_RAX]; | ||
2089 | tss->cx = c->regs[VCPU_REGS_RCX]; | ||
2090 | tss->dx = c->regs[VCPU_REGS_RDX]; | ||
2091 | tss->bx = c->regs[VCPU_REGS_RBX]; | ||
2092 | tss->sp = c->regs[VCPU_REGS_RSP]; | ||
2093 | tss->bp = c->regs[VCPU_REGS_RBP]; | ||
2094 | tss->si = c->regs[VCPU_REGS_RSI]; | ||
2095 | tss->di = c->regs[VCPU_REGS_RDI]; | ||
2096 | |||
2097 | tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); | ||
2098 | tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); | ||
2099 | tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); | ||
2100 | tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); | ||
2101 | tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); | ||
2102 | } | ||
2103 | |||
2104 | static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, | ||
2105 | struct x86_emulate_ops *ops, | ||
2106 | struct tss_segment_16 *tss) | ||
2107 | { | ||
2108 | struct decode_cache *c = &ctxt->decode; | ||
2109 | int ret; | ||
2110 | |||
2111 | c->eip = tss->ip; | ||
2112 | ctxt->eflags = tss->flag | 2; | ||
2113 | c->regs[VCPU_REGS_RAX] = tss->ax; | ||
2114 | c->regs[VCPU_REGS_RCX] = tss->cx; | ||
2115 | c->regs[VCPU_REGS_RDX] = tss->dx; | ||
2116 | c->regs[VCPU_REGS_RBX] = tss->bx; | ||
2117 | c->regs[VCPU_REGS_RSP] = tss->sp; | ||
2118 | c->regs[VCPU_REGS_RBP] = tss->bp; | ||
2119 | c->regs[VCPU_REGS_RSI] = tss->si; | ||
2120 | c->regs[VCPU_REGS_RDI] = tss->di; | ||
2121 | |||
2122 | /* | ||
2123 | * SDM says that segment selectors are loaded before segment | ||
2124 | * descriptors | ||
2125 | */ | ||
2126 | ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu); | ||
2127 | ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); | ||
2128 | ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); | ||
2129 | ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); | ||
2130 | ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); | ||
2131 | |||
2132 | /* | ||
2133 | * Now load segment descriptors. If fault happenes at this stage | ||
2134 | * it is handled in a context of new task | ||
2135 | */ | ||
2136 | ret = load_segment_descriptor(ctxt, ops, tss->ldt, VCPU_SREG_LDTR); | ||
2137 | if (ret != X86EMUL_CONTINUE) | ||
2138 | return ret; | ||
2139 | ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES); | ||
2140 | if (ret != X86EMUL_CONTINUE) | ||
2141 | return ret; | ||
2142 | ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS); | ||
2143 | if (ret != X86EMUL_CONTINUE) | ||
2144 | return ret; | ||
2145 | ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS); | ||
2146 | if (ret != X86EMUL_CONTINUE) | ||
2147 | return ret; | ||
2148 | ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS); | ||
2149 | if (ret != X86EMUL_CONTINUE) | ||
2150 | return ret; | ||
2151 | |||
2152 | return X86EMUL_CONTINUE; | ||
2153 | } | ||
2154 | |||
2155 | static int task_switch_16(struct x86_emulate_ctxt *ctxt, | ||
2156 | struct x86_emulate_ops *ops, | ||
2157 | u16 tss_selector, u16 old_tss_sel, | ||
2158 | ulong old_tss_base, struct desc_struct *new_desc) | ||
2159 | { | ||
2160 | struct tss_segment_16 tss_seg; | ||
2161 | int ret; | ||
2162 | u32 err, new_tss_base = get_desc_base(new_desc); | ||
2163 | |||
2164 | ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
2165 | &err); | ||
2166 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2167 | /* FIXME: need to provide precise fault address */ | ||
2168 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | ||
2169 | return ret; | ||
2170 | } | ||
2171 | |||
2172 | save_state_to_tss16(ctxt, ops, &tss_seg); | ||
2173 | |||
2174 | ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
2175 | &err); | ||
2176 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2177 | /* FIXME: need to provide precise fault address */ | ||
2178 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | ||
2179 | return ret; | ||
2180 | } | ||
2181 | |||
2182 | ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
2183 | &err); | ||
2184 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2185 | /* FIXME: need to provide precise fault address */ | ||
2186 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | ||
2187 | return ret; | ||
2188 | } | ||
2189 | |||
2190 | if (old_tss_sel != 0xffff) { | ||
2191 | tss_seg.prev_task_link = old_tss_sel; | ||
2192 | |||
2193 | ret = ops->write_std(new_tss_base, | ||
2194 | &tss_seg.prev_task_link, | ||
2195 | sizeof tss_seg.prev_task_link, | ||
2196 | ctxt->vcpu, &err); | ||
2197 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2198 | /* FIXME: need to provide precise fault address */ | ||
2199 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | ||
2200 | return ret; | ||
2201 | } | ||
2202 | } | ||
2203 | |||
2204 | return load_state_from_tss16(ctxt, ops, &tss_seg); | ||
2205 | } | ||
2206 | |||
2207 | static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, | ||
2208 | struct x86_emulate_ops *ops, | ||
2209 | struct tss_segment_32 *tss) | ||
2210 | { | ||
2211 | struct decode_cache *c = &ctxt->decode; | ||
2212 | |||
2213 | tss->cr3 = ops->get_cr(3, ctxt->vcpu); | ||
2214 | tss->eip = c->eip; | ||
2215 | tss->eflags = ctxt->eflags; | ||
2216 | tss->eax = c->regs[VCPU_REGS_RAX]; | ||
2217 | tss->ecx = c->regs[VCPU_REGS_RCX]; | ||
2218 | tss->edx = c->regs[VCPU_REGS_RDX]; | ||
2219 | tss->ebx = c->regs[VCPU_REGS_RBX]; | ||
2220 | tss->esp = c->regs[VCPU_REGS_RSP]; | ||
2221 | tss->ebp = c->regs[VCPU_REGS_RBP]; | ||
2222 | tss->esi = c->regs[VCPU_REGS_RSI]; | ||
2223 | tss->edi = c->regs[VCPU_REGS_RDI]; | ||
2224 | |||
2225 | tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); | ||
2226 | tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); | ||
2227 | tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); | ||
2228 | tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); | ||
2229 | tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu); | ||
2230 | tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu); | ||
2231 | tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); | ||
2232 | } | ||
2233 | |||
2234 | static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, | ||
2235 | struct x86_emulate_ops *ops, | ||
2236 | struct tss_segment_32 *tss) | ||
2237 | { | ||
2238 | struct decode_cache *c = &ctxt->decode; | ||
2239 | int ret; | ||
2240 | |||
2241 | ops->set_cr(3, tss->cr3, ctxt->vcpu); | ||
2242 | c->eip = tss->eip; | ||
2243 | ctxt->eflags = tss->eflags | 2; | ||
2244 | c->regs[VCPU_REGS_RAX] = tss->eax; | ||
2245 | c->regs[VCPU_REGS_RCX] = tss->ecx; | ||
2246 | c->regs[VCPU_REGS_RDX] = tss->edx; | ||
2247 | c->regs[VCPU_REGS_RBX] = tss->ebx; | ||
2248 | c->regs[VCPU_REGS_RSP] = tss->esp; | ||
2249 | c->regs[VCPU_REGS_RBP] = tss->ebp; | ||
2250 | c->regs[VCPU_REGS_RSI] = tss->esi; | ||
2251 | c->regs[VCPU_REGS_RDI] = tss->edi; | ||
2252 | |||
2253 | /* | ||
2254 | * SDM says that segment selectors are loaded before segment | ||
2255 | * descriptors | ||
2256 | */ | ||
2257 | ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu); | ||
2258 | ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); | ||
2259 | ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); | ||
2260 | ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); | ||
2261 | ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); | ||
2262 | ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu); | ||
2263 | ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu); | ||
2264 | |||
2265 | /* | ||
2266 | * Now load segment descriptors. If fault happenes at this stage | ||
2267 | * it is handled in a context of new task | ||
2268 | */ | ||
2269 | ret = load_segment_descriptor(ctxt, ops, tss->ldt_selector, VCPU_SREG_LDTR); | ||
2270 | if (ret != X86EMUL_CONTINUE) | ||
2271 | return ret; | ||
2272 | ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES); | ||
2273 | if (ret != X86EMUL_CONTINUE) | ||
2274 | return ret; | ||
2275 | ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS); | ||
2276 | if (ret != X86EMUL_CONTINUE) | ||
2277 | return ret; | ||
2278 | ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS); | ||
2279 | if (ret != X86EMUL_CONTINUE) | ||
2280 | return ret; | ||
2281 | ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS); | ||
2282 | if (ret != X86EMUL_CONTINUE) | ||
2283 | return ret; | ||
2284 | ret = load_segment_descriptor(ctxt, ops, tss->fs, VCPU_SREG_FS); | ||
2285 | if (ret != X86EMUL_CONTINUE) | ||
2286 | return ret; | ||
2287 | ret = load_segment_descriptor(ctxt, ops, tss->gs, VCPU_SREG_GS); | ||
2288 | if (ret != X86EMUL_CONTINUE) | ||
2289 | return ret; | ||
2290 | |||
2291 | return X86EMUL_CONTINUE; | ||
2292 | } | ||
2293 | |||
2294 | static int task_switch_32(struct x86_emulate_ctxt *ctxt, | ||
2295 | struct x86_emulate_ops *ops, | ||
2296 | u16 tss_selector, u16 old_tss_sel, | ||
2297 | ulong old_tss_base, struct desc_struct *new_desc) | ||
2298 | { | ||
2299 | struct tss_segment_32 tss_seg; | ||
2300 | int ret; | ||
2301 | u32 err, new_tss_base = get_desc_base(new_desc); | ||
2302 | |||
2303 | ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
2304 | &err); | ||
2305 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2306 | /* FIXME: need to provide precise fault address */ | ||
2307 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | ||
2308 | return ret; | ||
2309 | } | ||
2310 | |||
2311 | save_state_to_tss32(ctxt, ops, &tss_seg); | ||
2312 | |||
2313 | ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
2314 | &err); | ||
2315 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2316 | /* FIXME: need to provide precise fault address */ | ||
2317 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | ||
2318 | return ret; | ||
2319 | } | ||
2320 | |||
2321 | ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
2322 | &err); | ||
2323 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2324 | /* FIXME: need to provide precise fault address */ | ||
2325 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | ||
2326 | return ret; | ||
2327 | } | ||
2328 | |||
2329 | if (old_tss_sel != 0xffff) { | ||
2330 | tss_seg.prev_task_link = old_tss_sel; | ||
2331 | |||
2332 | ret = ops->write_std(new_tss_base, | ||
2333 | &tss_seg.prev_task_link, | ||
2334 | sizeof tss_seg.prev_task_link, | ||
2335 | ctxt->vcpu, &err); | ||
2336 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
2337 | /* FIXME: need to provide precise fault address */ | ||
2338 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | ||
2339 | return ret; | ||
2340 | } | ||
2341 | } | ||
2342 | |||
2343 | return load_state_from_tss32(ctxt, ops, &tss_seg); | ||
2344 | } | ||
2345 | |||
2346 | static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | ||
2347 | struct x86_emulate_ops *ops, | ||
2348 | u16 tss_selector, int reason, | ||
2349 | bool has_error_code, u32 error_code) | ||
2350 | { | ||
2351 | struct desc_struct curr_tss_desc, next_tss_desc; | ||
2352 | int ret; | ||
2353 | u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu); | ||
2354 | ulong old_tss_base = | ||
2355 | get_cached_descriptor_base(ctxt, ops, VCPU_SREG_TR); | ||
2356 | u32 desc_limit; | ||
2357 | |||
2358 | /* FIXME: old_tss_base == ~0 ? */ | ||
2359 | |||
2360 | ret = read_segment_descriptor(ctxt, ops, tss_selector, &next_tss_desc); | ||
2361 | if (ret != X86EMUL_CONTINUE) | ||
2362 | return ret; | ||
2363 | ret = read_segment_descriptor(ctxt, ops, old_tss_sel, &curr_tss_desc); | ||
2364 | if (ret != X86EMUL_CONTINUE) | ||
2365 | return ret; | ||
2366 | |||
2367 | /* FIXME: check that next_tss_desc is tss */ | ||
2368 | |||
2369 | if (reason != TASK_SWITCH_IRET) { | ||
2370 | if ((tss_selector & 3) > next_tss_desc.dpl || | ||
2371 | ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) { | ||
2372 | kvm_inject_gp(ctxt->vcpu, 0); | ||
2373 | return X86EMUL_PROPAGATE_FAULT; | ||
2374 | } | ||
2375 | } | ||
2376 | |||
2377 | desc_limit = desc_limit_scaled(&next_tss_desc); | ||
2378 | if (!next_tss_desc.p || | ||
2379 | ((desc_limit < 0x67 && (next_tss_desc.type & 8)) || | ||
2380 | desc_limit < 0x2b)) { | ||
2381 | kvm_queue_exception_e(ctxt->vcpu, TS_VECTOR, | ||
2382 | tss_selector & 0xfffc); | ||
2383 | return X86EMUL_PROPAGATE_FAULT; | ||
2384 | } | ||
2385 | |||
2386 | if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { | ||
2387 | curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */ | ||
2388 | write_segment_descriptor(ctxt, ops, old_tss_sel, | ||
2389 | &curr_tss_desc); | ||
2390 | } | ||
2391 | |||
2392 | if (reason == TASK_SWITCH_IRET) | ||
2393 | ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT; | ||
2394 | |||
2395 | /* set back link to prev task only if NT bit is set in eflags | ||
2396 | note that old_tss_sel is not used afetr this point */ | ||
2397 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) | ||
2398 | old_tss_sel = 0xffff; | ||
2399 | |||
2400 | if (next_tss_desc.type & 8) | ||
2401 | ret = task_switch_32(ctxt, ops, tss_selector, old_tss_sel, | ||
2402 | old_tss_base, &next_tss_desc); | ||
2403 | else | ||
2404 | ret = task_switch_16(ctxt, ops, tss_selector, old_tss_sel, | ||
2405 | old_tss_base, &next_tss_desc); | ||
2406 | if (ret != X86EMUL_CONTINUE) | ||
2407 | return ret; | ||
2408 | |||
2409 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) | ||
2410 | ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT; | ||
2411 | |||
2412 | if (reason != TASK_SWITCH_IRET) { | ||
2413 | next_tss_desc.type |= (1 << 1); /* set busy flag */ | ||
2414 | write_segment_descriptor(ctxt, ops, tss_selector, | ||
2415 | &next_tss_desc); | ||
2416 | } | ||
2417 | |||
2418 | ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu); | ||
2419 | ops->set_cached_descriptor(&next_tss_desc, VCPU_SREG_TR, ctxt->vcpu); | ||
2420 | ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu); | ||
2421 | |||
2422 | if (has_error_code) { | ||
2423 | struct decode_cache *c = &ctxt->decode; | ||
2424 | |||
2425 | c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2; | ||
2426 | c->lock_prefix = 0; | ||
2427 | c->src.val = (unsigned long) error_code; | ||
2428 | emulate_push(ctxt); | ||
2429 | } | ||
2430 | |||
2431 | return ret; | ||
2432 | } | ||
2433 | |||
2434 | int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | ||
2435 | struct x86_emulate_ops *ops, | ||
2436 | u16 tss_selector, int reason, | ||
2437 | bool has_error_code, u32 error_code) | ||
2438 | { | ||
2439 | struct decode_cache *c = &ctxt->decode; | ||
2440 | int rc; | ||
2441 | |||
2442 | memset(c, 0, sizeof(struct decode_cache)); | ||
2443 | c->eip = ctxt->eip; | ||
2444 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | ||
2445 | c->dst.type = OP_NONE; | ||
2446 | |||
2447 | rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason, | ||
2448 | has_error_code, error_code); | ||
2449 | |||
2450 | if (rc == X86EMUL_CONTINUE) { | ||
2451 | memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); | ||
2452 | kvm_rip_write(ctxt->vcpu, c->eip); | ||
2453 | rc = writeback(ctxt, ops); | ||
2454 | } | ||
2455 | |||
2456 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; | ||
2457 | } | ||
2458 | |||
2459 | static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned long base, | ||
2460 | int reg, struct operand *op) | ||
2461 | { | ||
2462 | struct decode_cache *c = &ctxt->decode; | ||
2463 | int df = (ctxt->eflags & EFLG_DF) ? -1 : 1; | ||
2464 | |||
2465 | register_address_increment(c, &c->regs[reg], df * op->bytes); | ||
2466 | op->ptr = (unsigned long *)register_address(c, base, c->regs[reg]); | ||
2467 | } | ||
2468 | |||
1810 | int | 2469 | int |
1811 | x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | 2470 | x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) |
1812 | { | 2471 | { |
1813 | unsigned long memop = 0; | ||
1814 | u64 msr_data; | 2472 | u64 msr_data; |
1815 | unsigned long saved_eip = 0; | ||
1816 | struct decode_cache *c = &ctxt->decode; | 2473 | struct decode_cache *c = &ctxt->decode; |
1817 | unsigned int port; | 2474 | int rc = X86EMUL_CONTINUE; |
1818 | int io_dir_in; | 2475 | int saved_dst_type = c->dst.type; |
1819 | int rc = 0; | ||
1820 | 2476 | ||
1821 | ctxt->interruptibility = 0; | 2477 | ctxt->interruptibility = 0; |
1822 | 2478 | ||
@@ -1826,26 +2482,30 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1826 | */ | 2482 | */ |
1827 | 2483 | ||
1828 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | 2484 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); |
1829 | saved_eip = c->eip; | 2485 | |
2486 | if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { | ||
2487 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | ||
2488 | goto done; | ||
2489 | } | ||
1830 | 2490 | ||
1831 | /* LOCK prefix is allowed only with some instructions */ | 2491 | /* LOCK prefix is allowed only with some instructions */ |
1832 | if (c->lock_prefix && !(c->d & Lock)) { | 2492 | if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) { |
1833 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 2493 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
1834 | goto done; | 2494 | goto done; |
1835 | } | 2495 | } |
1836 | 2496 | ||
1837 | /* Privileged instruction can be executed only in CPL=0 */ | 2497 | /* Privileged instruction can be executed only in CPL=0 */ |
1838 | if ((c->d & Priv) && kvm_x86_ops->get_cpl(ctxt->vcpu)) { | 2498 | if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { |
1839 | kvm_inject_gp(ctxt->vcpu, 0); | 2499 | kvm_inject_gp(ctxt->vcpu, 0); |
1840 | goto done; | 2500 | goto done; |
1841 | } | 2501 | } |
1842 | 2502 | ||
1843 | if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs)) | ||
1844 | memop = c->modrm_ea; | ||
1845 | |||
1846 | if (c->rep_prefix && (c->d & String)) { | 2503 | if (c->rep_prefix && (c->d & String)) { |
2504 | ctxt->restart = true; | ||
1847 | /* All REP prefixes have the same first termination condition */ | 2505 | /* All REP prefixes have the same first termination condition */ |
1848 | if (c->regs[VCPU_REGS_RCX] == 0) { | 2506 | if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) { |
2507 | string_done: | ||
2508 | ctxt->restart = false; | ||
1849 | kvm_rip_write(ctxt->vcpu, c->eip); | 2509 | kvm_rip_write(ctxt->vcpu, c->eip); |
1850 | goto done; | 2510 | goto done; |
1851 | } | 2511 | } |
@@ -1857,25 +2517,18 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1857 | * - if REPNE/REPNZ and ZF = 1 then done | 2517 | * - if REPNE/REPNZ and ZF = 1 then done |
1858 | */ | 2518 | */ |
1859 | if ((c->b == 0xa6) || (c->b == 0xa7) || | 2519 | if ((c->b == 0xa6) || (c->b == 0xa7) || |
1860 | (c->b == 0xae) || (c->b == 0xaf)) { | 2520 | (c->b == 0xae) || (c->b == 0xaf)) { |
1861 | if ((c->rep_prefix == REPE_PREFIX) && | 2521 | if ((c->rep_prefix == REPE_PREFIX) && |
1862 | ((ctxt->eflags & EFLG_ZF) == 0)) { | 2522 | ((ctxt->eflags & EFLG_ZF) == 0)) |
1863 | kvm_rip_write(ctxt->vcpu, c->eip); | 2523 | goto string_done; |
1864 | goto done; | ||
1865 | } | ||
1866 | if ((c->rep_prefix == REPNE_PREFIX) && | 2524 | if ((c->rep_prefix == REPNE_PREFIX) && |
1867 | ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) { | 2525 | ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) |
1868 | kvm_rip_write(ctxt->vcpu, c->eip); | 2526 | goto string_done; |
1869 | goto done; | ||
1870 | } | ||
1871 | } | 2527 | } |
1872 | c->regs[VCPU_REGS_RCX]--; | 2528 | c->eip = ctxt->eip; |
1873 | c->eip = kvm_rip_read(ctxt->vcpu); | ||
1874 | } | 2529 | } |
1875 | 2530 | ||
1876 | if (c->src.type == OP_MEM) { | 2531 | if (c->src.type == OP_MEM) { |
1877 | c->src.ptr = (unsigned long *)memop; | ||
1878 | c->src.val = 0; | ||
1879 | rc = ops->read_emulated((unsigned long)c->src.ptr, | 2532 | rc = ops->read_emulated((unsigned long)c->src.ptr, |
1880 | &c->src.val, | 2533 | &c->src.val, |
1881 | c->src.bytes, | 2534 | c->src.bytes, |
@@ -1885,29 +2538,25 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1885 | c->src.orig_val = c->src.val; | 2538 | c->src.orig_val = c->src.val; |
1886 | } | 2539 | } |
1887 | 2540 | ||
2541 | if (c->src2.type == OP_MEM) { | ||
2542 | rc = ops->read_emulated((unsigned long)c->src2.ptr, | ||
2543 | &c->src2.val, | ||
2544 | c->src2.bytes, | ||
2545 | ctxt->vcpu); | ||
2546 | if (rc != X86EMUL_CONTINUE) | ||
2547 | goto done; | ||
2548 | } | ||
2549 | |||
1888 | if ((c->d & DstMask) == ImplicitOps) | 2550 | if ((c->d & DstMask) == ImplicitOps) |
1889 | goto special_insn; | 2551 | goto special_insn; |
1890 | 2552 | ||
1891 | 2553 | ||
1892 | if (c->dst.type == OP_MEM) { | 2554 | if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { |
1893 | c->dst.ptr = (unsigned long *)memop; | 2555 | /* optimisation - avoid slow emulated read if Mov */ |
1894 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 2556 | rc = ops->read_emulated((unsigned long)c->dst.ptr, &c->dst.val, |
1895 | c->dst.val = 0; | 2557 | c->dst.bytes, ctxt->vcpu); |
1896 | if (c->d & BitOp) { | 2558 | if (rc != X86EMUL_CONTINUE) |
1897 | unsigned long mask = ~(c->dst.bytes * 8 - 1); | 2559 | goto done; |
1898 | |||
1899 | c->dst.ptr = (void *)c->dst.ptr + | ||
1900 | (c->src.val & mask) / 8; | ||
1901 | } | ||
1902 | if (!(c->d & Mov)) { | ||
1903 | /* optimisation - avoid slow emulated read */ | ||
1904 | rc = ops->read_emulated((unsigned long)c->dst.ptr, | ||
1905 | &c->dst.val, | ||
1906 | c->dst.bytes, | ||
1907 | ctxt->vcpu); | ||
1908 | if (rc != X86EMUL_CONTINUE) | ||
1909 | goto done; | ||
1910 | } | ||
1911 | } | 2560 | } |
1912 | c->dst.orig_val = c->dst.val; | 2561 | c->dst.orig_val = c->dst.val; |
1913 | 2562 | ||
@@ -1926,7 +2575,7 @@ special_insn: | |||
1926 | break; | 2575 | break; |
1927 | case 0x07: /* pop es */ | 2576 | case 0x07: /* pop es */ |
1928 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); | 2577 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); |
1929 | if (rc != 0) | 2578 | if (rc != X86EMUL_CONTINUE) |
1930 | goto done; | 2579 | goto done; |
1931 | break; | 2580 | break; |
1932 | case 0x08 ... 0x0d: | 2581 | case 0x08 ... 0x0d: |
@@ -1945,7 +2594,7 @@ special_insn: | |||
1945 | break; | 2594 | break; |
1946 | case 0x17: /* pop ss */ | 2595 | case 0x17: /* pop ss */ |
1947 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); | 2596 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); |
1948 | if (rc != 0) | 2597 | if (rc != X86EMUL_CONTINUE) |
1949 | goto done; | 2598 | goto done; |
1950 | break; | 2599 | break; |
1951 | case 0x18 ... 0x1d: | 2600 | case 0x18 ... 0x1d: |
@@ -1957,7 +2606,7 @@ special_insn: | |||
1957 | break; | 2606 | break; |
1958 | case 0x1f: /* pop ds */ | 2607 | case 0x1f: /* pop ds */ |
1959 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); | 2608 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); |
1960 | if (rc != 0) | 2609 | if (rc != X86EMUL_CONTINUE) |
1961 | goto done; | 2610 | goto done; |
1962 | break; | 2611 | break; |
1963 | case 0x20 ... 0x25: | 2612 | case 0x20 ... 0x25: |
@@ -1988,7 +2637,7 @@ special_insn: | |||
1988 | case 0x58 ... 0x5f: /* pop reg */ | 2637 | case 0x58 ... 0x5f: /* pop reg */ |
1989 | pop_instruction: | 2638 | pop_instruction: |
1990 | rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes); | 2639 | rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes); |
1991 | if (rc != 0) | 2640 | if (rc != X86EMUL_CONTINUE) |
1992 | goto done; | 2641 | goto done; |
1993 | break; | 2642 | break; |
1994 | case 0x60: /* pusha */ | 2643 | case 0x60: /* pusha */ |
@@ -1996,7 +2645,7 @@ special_insn: | |||
1996 | break; | 2645 | break; |
1997 | case 0x61: /* popa */ | 2646 | case 0x61: /* popa */ |
1998 | rc = emulate_popa(ctxt, ops); | 2647 | rc = emulate_popa(ctxt, ops); |
1999 | if (rc != 0) | 2648 | if (rc != X86EMUL_CONTINUE) |
2000 | goto done; | 2649 | goto done; |
2001 | break; | 2650 | break; |
2002 | case 0x63: /* movsxd */ | 2651 | case 0x63: /* movsxd */ |
@@ -2010,47 +2659,29 @@ special_insn: | |||
2010 | break; | 2659 | break; |
2011 | case 0x6c: /* insb */ | 2660 | case 0x6c: /* insb */ |
2012 | case 0x6d: /* insw/insd */ | 2661 | case 0x6d: /* insw/insd */ |
2662 | c->dst.bytes = min(c->dst.bytes, 4u); | ||
2013 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], | 2663 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], |
2014 | (c->d & ByteOp) ? 1 : c->op_bytes)) { | 2664 | c->dst.bytes)) { |
2015 | kvm_inject_gp(ctxt->vcpu, 0); | 2665 | kvm_inject_gp(ctxt->vcpu, 0); |
2016 | goto done; | 2666 | goto done; |
2017 | } | 2667 | } |
2018 | if (kvm_emulate_pio_string(ctxt->vcpu, | 2668 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, |
2019 | 1, | 2669 | c->regs[VCPU_REGS_RDX], &c->dst.val)) |
2020 | (c->d & ByteOp) ? 1 : c->op_bytes, | 2670 | goto done; /* IO is needed, skip writeback */ |
2021 | c->rep_prefix ? | 2671 | break; |
2022 | address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, | ||
2023 | (ctxt->eflags & EFLG_DF), | ||
2024 | register_address(c, es_base(ctxt), | ||
2025 | c->regs[VCPU_REGS_RDI]), | ||
2026 | c->rep_prefix, | ||
2027 | c->regs[VCPU_REGS_RDX]) == 0) { | ||
2028 | c->eip = saved_eip; | ||
2029 | return -1; | ||
2030 | } | ||
2031 | return 0; | ||
2032 | case 0x6e: /* outsb */ | 2672 | case 0x6e: /* outsb */ |
2033 | case 0x6f: /* outsw/outsd */ | 2673 | case 0x6f: /* outsw/outsd */ |
2674 | c->src.bytes = min(c->src.bytes, 4u); | ||
2034 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], | 2675 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], |
2035 | (c->d & ByteOp) ? 1 : c->op_bytes)) { | 2676 | c->src.bytes)) { |
2036 | kvm_inject_gp(ctxt->vcpu, 0); | 2677 | kvm_inject_gp(ctxt->vcpu, 0); |
2037 | goto done; | 2678 | goto done; |
2038 | } | 2679 | } |
2039 | if (kvm_emulate_pio_string(ctxt->vcpu, | 2680 | ops->pio_out_emulated(c->src.bytes, c->regs[VCPU_REGS_RDX], |
2040 | 0, | 2681 | &c->src.val, 1, ctxt->vcpu); |
2041 | (c->d & ByteOp) ? 1 : c->op_bytes, | 2682 | |
2042 | c->rep_prefix ? | 2683 | c->dst.type = OP_NONE; /* nothing to writeback */ |
2043 | address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, | 2684 | break; |
2044 | (ctxt->eflags & EFLG_DF), | ||
2045 | register_address(c, | ||
2046 | seg_override_base(ctxt, c), | ||
2047 | c->regs[VCPU_REGS_RSI]), | ||
2048 | c->rep_prefix, | ||
2049 | c->regs[VCPU_REGS_RDX]) == 0) { | ||
2050 | c->eip = saved_eip; | ||
2051 | return -1; | ||
2052 | } | ||
2053 | return 0; | ||
2054 | case 0x70 ... 0x7f: /* jcc (short) */ | 2685 | case 0x70 ... 0x7f: /* jcc (short) */ |
2055 | if (test_cc(c->b, ctxt->eflags)) | 2686 | if (test_cc(c->b, ctxt->eflags)) |
2056 | jmp_rel(c, c->src.val); | 2687 | jmp_rel(c, c->src.val); |
@@ -2107,12 +2738,11 @@ special_insn: | |||
2107 | case 0x8c: { /* mov r/m, sreg */ | 2738 | case 0x8c: { /* mov r/m, sreg */ |
2108 | struct kvm_segment segreg; | 2739 | struct kvm_segment segreg; |
2109 | 2740 | ||
2110 | if (c->modrm_reg <= 5) | 2741 | if (c->modrm_reg <= VCPU_SREG_GS) |
2111 | kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg); | 2742 | kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg); |
2112 | else { | 2743 | else { |
2113 | printk(KERN_INFO "0x8c: Invalid segreg in modrm byte 0x%02x\n", | 2744 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
2114 | c->modrm); | 2745 | goto done; |
2115 | goto cannot_emulate; | ||
2116 | } | 2746 | } |
2117 | c->dst.val = segreg.selector; | 2747 | c->dst.val = segreg.selector; |
2118 | break; | 2748 | break; |
@@ -2132,16 +2762,16 @@ special_insn: | |||
2132 | } | 2762 | } |
2133 | 2763 | ||
2134 | if (c->modrm_reg == VCPU_SREG_SS) | 2764 | if (c->modrm_reg == VCPU_SREG_SS) |
2135 | toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS); | 2765 | toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_MOV_SS); |
2136 | 2766 | ||
2137 | rc = kvm_load_segment_descriptor(ctxt->vcpu, sel, c->modrm_reg); | 2767 | rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg); |
2138 | 2768 | ||
2139 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2769 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2140 | break; | 2770 | break; |
2141 | } | 2771 | } |
2142 | case 0x8f: /* pop (sole member of Grp1a) */ | 2772 | case 0x8f: /* pop (sole member of Grp1a) */ |
2143 | rc = emulate_grp1a(ctxt, ops); | 2773 | rc = emulate_grp1a(ctxt, ops); |
2144 | if (rc != 0) | 2774 | if (rc != X86EMUL_CONTINUE) |
2145 | goto done; | 2775 | goto done; |
2146 | break; | 2776 | break; |
2147 | case 0x90: /* nop / xchg r8,rax */ | 2777 | case 0x90: /* nop / xchg r8,rax */ |
@@ -2175,89 +2805,16 @@ special_insn: | |||
2175 | c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX]; | 2805 | c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX]; |
2176 | break; | 2806 | break; |
2177 | case 0xa4 ... 0xa5: /* movs */ | 2807 | case 0xa4 ... 0xa5: /* movs */ |
2178 | c->dst.type = OP_MEM; | 2808 | goto mov; |
2179 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
2180 | c->dst.ptr = (unsigned long *)register_address(c, | ||
2181 | es_base(ctxt), | ||
2182 | c->regs[VCPU_REGS_RDI]); | ||
2183 | rc = ops->read_emulated(register_address(c, | ||
2184 | seg_override_base(ctxt, c), | ||
2185 | c->regs[VCPU_REGS_RSI]), | ||
2186 | &c->dst.val, | ||
2187 | c->dst.bytes, ctxt->vcpu); | ||
2188 | if (rc != X86EMUL_CONTINUE) | ||
2189 | goto done; | ||
2190 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], | ||
2191 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | ||
2192 | : c->dst.bytes); | ||
2193 | register_address_increment(c, &c->regs[VCPU_REGS_RDI], | ||
2194 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | ||
2195 | : c->dst.bytes); | ||
2196 | break; | ||
2197 | case 0xa6 ... 0xa7: /* cmps */ | 2809 | case 0xa6 ... 0xa7: /* cmps */ |
2198 | c->src.type = OP_NONE; /* Disable writeback. */ | ||
2199 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
2200 | c->src.ptr = (unsigned long *)register_address(c, | ||
2201 | seg_override_base(ctxt, c), | ||
2202 | c->regs[VCPU_REGS_RSI]); | ||
2203 | rc = ops->read_emulated((unsigned long)c->src.ptr, | ||
2204 | &c->src.val, | ||
2205 | c->src.bytes, | ||
2206 | ctxt->vcpu); | ||
2207 | if (rc != X86EMUL_CONTINUE) | ||
2208 | goto done; | ||
2209 | |||
2210 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2810 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2211 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
2212 | c->dst.ptr = (unsigned long *)register_address(c, | ||
2213 | es_base(ctxt), | ||
2214 | c->regs[VCPU_REGS_RDI]); | ||
2215 | rc = ops->read_emulated((unsigned long)c->dst.ptr, | ||
2216 | &c->dst.val, | ||
2217 | c->dst.bytes, | ||
2218 | ctxt->vcpu); | ||
2219 | if (rc != X86EMUL_CONTINUE) | ||
2220 | goto done; | ||
2221 | |||
2222 | DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); | 2811 | DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); |
2223 | 2812 | goto cmp; | |
2224 | emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); | ||
2225 | |||
2226 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], | ||
2227 | (ctxt->eflags & EFLG_DF) ? -c->src.bytes | ||
2228 | : c->src.bytes); | ||
2229 | register_address_increment(c, &c->regs[VCPU_REGS_RDI], | ||
2230 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | ||
2231 | : c->dst.bytes); | ||
2232 | |||
2233 | break; | ||
2234 | case 0xaa ... 0xab: /* stos */ | 2813 | case 0xaa ... 0xab: /* stos */ |
2235 | c->dst.type = OP_MEM; | ||
2236 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
2237 | c->dst.ptr = (unsigned long *)register_address(c, | ||
2238 | es_base(ctxt), | ||
2239 | c->regs[VCPU_REGS_RDI]); | ||
2240 | c->dst.val = c->regs[VCPU_REGS_RAX]; | 2814 | c->dst.val = c->regs[VCPU_REGS_RAX]; |
2241 | register_address_increment(c, &c->regs[VCPU_REGS_RDI], | ||
2242 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | ||
2243 | : c->dst.bytes); | ||
2244 | break; | 2815 | break; |
2245 | case 0xac ... 0xad: /* lods */ | 2816 | case 0xac ... 0xad: /* lods */ |
2246 | c->dst.type = OP_REG; | 2817 | goto mov; |
2247 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
2248 | c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; | ||
2249 | rc = ops->read_emulated(register_address(c, | ||
2250 | seg_override_base(ctxt, c), | ||
2251 | c->regs[VCPU_REGS_RSI]), | ||
2252 | &c->dst.val, | ||
2253 | c->dst.bytes, | ||
2254 | ctxt->vcpu); | ||
2255 | if (rc != X86EMUL_CONTINUE) | ||
2256 | goto done; | ||
2257 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], | ||
2258 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | ||
2259 | : c->dst.bytes); | ||
2260 | break; | ||
2261 | case 0xae ... 0xaf: /* scas */ | 2818 | case 0xae ... 0xaf: /* scas */ |
2262 | DPRINTF("Urk! I don't handle SCAS.\n"); | 2819 | DPRINTF("Urk! I don't handle SCAS.\n"); |
2263 | goto cannot_emulate; | 2820 | goto cannot_emulate; |
@@ -2277,7 +2834,7 @@ special_insn: | |||
2277 | break; | 2834 | break; |
2278 | case 0xcb: /* ret far */ | 2835 | case 0xcb: /* ret far */ |
2279 | rc = emulate_ret_far(ctxt, ops); | 2836 | rc = emulate_ret_far(ctxt, ops); |
2280 | if (rc) | 2837 | if (rc != X86EMUL_CONTINUE) |
2281 | goto done; | 2838 | goto done; |
2282 | break; | 2839 | break; |
2283 | case 0xd0 ... 0xd1: /* Grp2 */ | 2840 | case 0xd0 ... 0xd1: /* Grp2 */ |
@@ -2290,14 +2847,10 @@ special_insn: | |||
2290 | break; | 2847 | break; |
2291 | case 0xe4: /* inb */ | 2848 | case 0xe4: /* inb */ |
2292 | case 0xe5: /* in */ | 2849 | case 0xe5: /* in */ |
2293 | port = c->src.val; | 2850 | goto do_io_in; |
2294 | io_dir_in = 1; | ||
2295 | goto do_io; | ||
2296 | case 0xe6: /* outb */ | 2851 | case 0xe6: /* outb */ |
2297 | case 0xe7: /* out */ | 2852 | case 0xe7: /* out */ |
2298 | port = c->src.val; | 2853 | goto do_io_out; |
2299 | io_dir_in = 0; | ||
2300 | goto do_io; | ||
2301 | case 0xe8: /* call (near) */ { | 2854 | case 0xe8: /* call (near) */ { |
2302 | long int rel = c->src.val; | 2855 | long int rel = c->src.val; |
2303 | c->src.val = (unsigned long) c->eip; | 2856 | c->src.val = (unsigned long) c->eip; |
@@ -2308,8 +2861,9 @@ special_insn: | |||
2308 | case 0xe9: /* jmp rel */ | 2861 | case 0xe9: /* jmp rel */ |
2309 | goto jmp; | 2862 | goto jmp; |
2310 | case 0xea: /* jmp far */ | 2863 | case 0xea: /* jmp far */ |
2311 | if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, | 2864 | jump_far: |
2312 | VCPU_SREG_CS)) | 2865 | if (load_segment_descriptor(ctxt, ops, c->src2.val, |
2866 | VCPU_SREG_CS)) | ||
2313 | goto done; | 2867 | goto done; |
2314 | 2868 | ||
2315 | c->eip = c->src.val; | 2869 | c->eip = c->src.val; |
@@ -2321,25 +2875,29 @@ special_insn: | |||
2321 | break; | 2875 | break; |
2322 | case 0xec: /* in al,dx */ | 2876 | case 0xec: /* in al,dx */ |
2323 | case 0xed: /* in (e/r)ax,dx */ | 2877 | case 0xed: /* in (e/r)ax,dx */ |
2324 | port = c->regs[VCPU_REGS_RDX]; | 2878 | c->src.val = c->regs[VCPU_REGS_RDX]; |
2325 | io_dir_in = 1; | 2879 | do_io_in: |
2326 | goto do_io; | 2880 | c->dst.bytes = min(c->dst.bytes, 4u); |
2881 | if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { | ||
2882 | kvm_inject_gp(ctxt->vcpu, 0); | ||
2883 | goto done; | ||
2884 | } | ||
2885 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, | ||
2886 | &c->dst.val)) | ||
2887 | goto done; /* IO is needed */ | ||
2888 | break; | ||
2327 | case 0xee: /* out al,dx */ | 2889 | case 0xee: /* out al,dx */ |
2328 | case 0xef: /* out (e/r)ax,dx */ | 2890 | case 0xef: /* out (e/r)ax,dx */ |
2329 | port = c->regs[VCPU_REGS_RDX]; | 2891 | c->src.val = c->regs[VCPU_REGS_RDX]; |
2330 | io_dir_in = 0; | 2892 | do_io_out: |
2331 | do_io: | 2893 | c->dst.bytes = min(c->dst.bytes, 4u); |
2332 | if (!emulator_io_permited(ctxt, ops, port, | 2894 | if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { |
2333 | (c->d & ByteOp) ? 1 : c->op_bytes)) { | ||
2334 | kvm_inject_gp(ctxt->vcpu, 0); | 2895 | kvm_inject_gp(ctxt->vcpu, 0); |
2335 | goto done; | 2896 | goto done; |
2336 | } | 2897 | } |
2337 | if (kvm_emulate_pio(ctxt->vcpu, io_dir_in, | 2898 | ops->pio_out_emulated(c->dst.bytes, c->src.val, &c->dst.val, 1, |
2338 | (c->d & ByteOp) ? 1 : c->op_bytes, | 2899 | ctxt->vcpu); |
2339 | port) != 0) { | 2900 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2340 | c->eip = saved_eip; | ||
2341 | goto cannot_emulate; | ||
2342 | } | ||
2343 | break; | 2901 | break; |
2344 | case 0xf4: /* hlt */ | 2902 | case 0xf4: /* hlt */ |
2345 | ctxt->vcpu->arch.halt_request = 1; | 2903 | ctxt->vcpu->arch.halt_request = 1; |
@@ -2350,16 +2908,15 @@ special_insn: | |||
2350 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2908 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2351 | break; | 2909 | break; |
2352 | case 0xf6 ... 0xf7: /* Grp3 */ | 2910 | case 0xf6 ... 0xf7: /* Grp3 */ |
2353 | rc = emulate_grp3(ctxt, ops); | 2911 | if (!emulate_grp3(ctxt, ops)) |
2354 | if (rc != 0) | 2912 | goto cannot_emulate; |
2355 | goto done; | ||
2356 | break; | 2913 | break; |
2357 | case 0xf8: /* clc */ | 2914 | case 0xf8: /* clc */ |
2358 | ctxt->eflags &= ~EFLG_CF; | 2915 | ctxt->eflags &= ~EFLG_CF; |
2359 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2916 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2360 | break; | 2917 | break; |
2361 | case 0xfa: /* cli */ | 2918 | case 0xfa: /* cli */ |
2362 | if (emulator_bad_iopl(ctxt)) | 2919 | if (emulator_bad_iopl(ctxt, ops)) |
2363 | kvm_inject_gp(ctxt->vcpu, 0); | 2920 | kvm_inject_gp(ctxt->vcpu, 0); |
2364 | else { | 2921 | else { |
2365 | ctxt->eflags &= ~X86_EFLAGS_IF; | 2922 | ctxt->eflags &= ~X86_EFLAGS_IF; |
@@ -2367,10 +2924,10 @@ special_insn: | |||
2367 | } | 2924 | } |
2368 | break; | 2925 | break; |
2369 | case 0xfb: /* sti */ | 2926 | case 0xfb: /* sti */ |
2370 | if (emulator_bad_iopl(ctxt)) | 2927 | if (emulator_bad_iopl(ctxt, ops)) |
2371 | kvm_inject_gp(ctxt->vcpu, 0); | 2928 | kvm_inject_gp(ctxt->vcpu, 0); |
2372 | else { | 2929 | else { |
2373 | toggle_interruptibility(ctxt, X86_SHADOW_INT_STI); | 2930 | toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_STI); |
2374 | ctxt->eflags |= X86_EFLAGS_IF; | 2931 | ctxt->eflags |= X86_EFLAGS_IF; |
2375 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2932 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2376 | } | 2933 | } |
@@ -2383,28 +2940,55 @@ special_insn: | |||
2383 | ctxt->eflags |= EFLG_DF; | 2940 | ctxt->eflags |= EFLG_DF; |
2384 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2941 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2385 | break; | 2942 | break; |
2386 | case 0xfe ... 0xff: /* Grp4/Grp5 */ | 2943 | case 0xfe: /* Grp4 */ |
2944 | grp45: | ||
2387 | rc = emulate_grp45(ctxt, ops); | 2945 | rc = emulate_grp45(ctxt, ops); |
2388 | if (rc != 0) | 2946 | if (rc != X86EMUL_CONTINUE) |
2389 | goto done; | 2947 | goto done; |
2390 | break; | 2948 | break; |
2949 | case 0xff: /* Grp5 */ | ||
2950 | if (c->modrm_reg == 5) | ||
2951 | goto jump_far; | ||
2952 | goto grp45; | ||
2391 | } | 2953 | } |
2392 | 2954 | ||
2393 | writeback: | 2955 | writeback: |
2394 | rc = writeback(ctxt, ops); | 2956 | rc = writeback(ctxt, ops); |
2395 | if (rc != 0) | 2957 | if (rc != X86EMUL_CONTINUE) |
2396 | goto done; | 2958 | goto done; |
2397 | 2959 | ||
2960 | /* | ||
2961 | * restore dst type in case the decoding will be reused | ||
2962 | * (happens for string instruction ) | ||
2963 | */ | ||
2964 | c->dst.type = saved_dst_type; | ||
2965 | |||
2966 | if ((c->d & SrcMask) == SrcSI) | ||
2967 | string_addr_inc(ctxt, seg_override_base(ctxt, c), VCPU_REGS_RSI, | ||
2968 | &c->src); | ||
2969 | |||
2970 | if ((c->d & DstMask) == DstDI) | ||
2971 | string_addr_inc(ctxt, es_base(ctxt), VCPU_REGS_RDI, &c->dst); | ||
2972 | |||
2973 | if (c->rep_prefix && (c->d & String)) { | ||
2974 | struct read_cache *rc = &ctxt->decode.io_read; | ||
2975 | register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1); | ||
2976 | /* | ||
2977 | * Re-enter guest when pio read ahead buffer is empty or, | ||
2978 | * if it is not used, after each 1024 iteration. | ||
2979 | */ | ||
2980 | if ((rc->end == 0 && !(c->regs[VCPU_REGS_RCX] & 0x3ff)) || | ||
2981 | (rc->end != 0 && rc->end == rc->pos)) | ||
2982 | ctxt->restart = false; | ||
2983 | } | ||
2984 | |||
2398 | /* Commit shadow register state. */ | 2985 | /* Commit shadow register state. */ |
2399 | memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); | 2986 | memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); |
2400 | kvm_rip_write(ctxt->vcpu, c->eip); | 2987 | kvm_rip_write(ctxt->vcpu, c->eip); |
2988 | ops->set_rflags(ctxt->vcpu, ctxt->eflags); | ||
2401 | 2989 | ||
2402 | done: | 2990 | done: |
2403 | if (rc == X86EMUL_UNHANDLEABLE) { | 2991 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; |
2404 | c->eip = saved_eip; | ||
2405 | return -1; | ||
2406 | } | ||
2407 | return 0; | ||
2408 | 2992 | ||
2409 | twobyte_insn: | 2993 | twobyte_insn: |
2410 | switch (c->b) { | 2994 | switch (c->b) { |
@@ -2418,18 +3002,18 @@ twobyte_insn: | |||
2418 | goto cannot_emulate; | 3002 | goto cannot_emulate; |
2419 | 3003 | ||
2420 | rc = kvm_fix_hypercall(ctxt->vcpu); | 3004 | rc = kvm_fix_hypercall(ctxt->vcpu); |
2421 | if (rc) | 3005 | if (rc != X86EMUL_CONTINUE) |
2422 | goto done; | 3006 | goto done; |
2423 | 3007 | ||
2424 | /* Let the processor re-execute the fixed hypercall */ | 3008 | /* Let the processor re-execute the fixed hypercall */ |
2425 | c->eip = kvm_rip_read(ctxt->vcpu); | 3009 | c->eip = ctxt->eip; |
2426 | /* Disable writeback. */ | 3010 | /* Disable writeback. */ |
2427 | c->dst.type = OP_NONE; | 3011 | c->dst.type = OP_NONE; |
2428 | break; | 3012 | break; |
2429 | case 2: /* lgdt */ | 3013 | case 2: /* lgdt */ |
2430 | rc = read_descriptor(ctxt, ops, c->src.ptr, | 3014 | rc = read_descriptor(ctxt, ops, c->src.ptr, |
2431 | &size, &address, c->op_bytes); | 3015 | &size, &address, c->op_bytes); |
2432 | if (rc) | 3016 | if (rc != X86EMUL_CONTINUE) |
2433 | goto done; | 3017 | goto done; |
2434 | realmode_lgdt(ctxt->vcpu, size, address); | 3018 | realmode_lgdt(ctxt->vcpu, size, address); |
2435 | /* Disable writeback. */ | 3019 | /* Disable writeback. */ |
@@ -2440,7 +3024,7 @@ twobyte_insn: | |||
2440 | switch (c->modrm_rm) { | 3024 | switch (c->modrm_rm) { |
2441 | case 1: | 3025 | case 1: |
2442 | rc = kvm_fix_hypercall(ctxt->vcpu); | 3026 | rc = kvm_fix_hypercall(ctxt->vcpu); |
2443 | if (rc) | 3027 | if (rc != X86EMUL_CONTINUE) |
2444 | goto done; | 3028 | goto done; |
2445 | break; | 3029 | break; |
2446 | default: | 3030 | default: |
@@ -2450,7 +3034,7 @@ twobyte_insn: | |||
2450 | rc = read_descriptor(ctxt, ops, c->src.ptr, | 3034 | rc = read_descriptor(ctxt, ops, c->src.ptr, |
2451 | &size, &address, | 3035 | &size, &address, |
2452 | c->op_bytes); | 3036 | c->op_bytes); |
2453 | if (rc) | 3037 | if (rc != X86EMUL_CONTINUE) |
2454 | goto done; | 3038 | goto done; |
2455 | realmode_lidt(ctxt->vcpu, size, address); | 3039 | realmode_lidt(ctxt->vcpu, size, address); |
2456 | } | 3040 | } |
@@ -2459,15 +3043,18 @@ twobyte_insn: | |||
2459 | break; | 3043 | break; |
2460 | case 4: /* smsw */ | 3044 | case 4: /* smsw */ |
2461 | c->dst.bytes = 2; | 3045 | c->dst.bytes = 2; |
2462 | c->dst.val = realmode_get_cr(ctxt->vcpu, 0); | 3046 | c->dst.val = ops->get_cr(0, ctxt->vcpu); |
2463 | break; | 3047 | break; |
2464 | case 6: /* lmsw */ | 3048 | case 6: /* lmsw */ |
2465 | realmode_lmsw(ctxt->vcpu, (u16)c->src.val, | 3049 | ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0ful) | |
2466 | &ctxt->eflags); | 3050 | (c->src.val & 0x0f), ctxt->vcpu); |
2467 | c->dst.type = OP_NONE; | 3051 | c->dst.type = OP_NONE; |
2468 | break; | 3052 | break; |
3053 | case 5: /* not defined */ | ||
3054 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | ||
3055 | goto done; | ||
2469 | case 7: /* invlpg*/ | 3056 | case 7: /* invlpg*/ |
2470 | emulate_invlpg(ctxt->vcpu, memop); | 3057 | emulate_invlpg(ctxt->vcpu, c->modrm_ea); |
2471 | /* Disable writeback. */ | 3058 | /* Disable writeback. */ |
2472 | c->dst.type = OP_NONE; | 3059 | c->dst.type = OP_NONE; |
2473 | break; | 3060 | break; |
@@ -2493,54 +3080,54 @@ twobyte_insn: | |||
2493 | c->dst.type = OP_NONE; | 3080 | c->dst.type = OP_NONE; |
2494 | break; | 3081 | break; |
2495 | case 0x20: /* mov cr, reg */ | 3082 | case 0x20: /* mov cr, reg */ |
2496 | if (c->modrm_mod != 3) | 3083 | switch (c->modrm_reg) { |
2497 | goto cannot_emulate; | 3084 | case 1: |
2498 | c->regs[c->modrm_rm] = | 3085 | case 5 ... 7: |
2499 | realmode_get_cr(ctxt->vcpu, c->modrm_reg); | 3086 | case 9 ... 15: |
3087 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | ||
3088 | goto done; | ||
3089 | } | ||
3090 | c->regs[c->modrm_rm] = ops->get_cr(c->modrm_reg, ctxt->vcpu); | ||
2500 | c->dst.type = OP_NONE; /* no writeback */ | 3091 | c->dst.type = OP_NONE; /* no writeback */ |
2501 | break; | 3092 | break; |
2502 | case 0x21: /* mov from dr to reg */ | 3093 | case 0x21: /* mov from dr to reg */ |
2503 | if (c->modrm_mod != 3) | 3094 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && |
2504 | goto cannot_emulate; | 3095 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { |
2505 | rc = emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]); | 3096 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
2506 | if (rc) | 3097 | goto done; |
2507 | goto cannot_emulate; | 3098 | } |
3099 | emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]); | ||
2508 | c->dst.type = OP_NONE; /* no writeback */ | 3100 | c->dst.type = OP_NONE; /* no writeback */ |
2509 | break; | 3101 | break; |
2510 | case 0x22: /* mov reg, cr */ | 3102 | case 0x22: /* mov reg, cr */ |
2511 | if (c->modrm_mod != 3) | 3103 | ops->set_cr(c->modrm_reg, c->modrm_val, ctxt->vcpu); |
2512 | goto cannot_emulate; | ||
2513 | realmode_set_cr(ctxt->vcpu, | ||
2514 | c->modrm_reg, c->modrm_val, &ctxt->eflags); | ||
2515 | c->dst.type = OP_NONE; | 3104 | c->dst.type = OP_NONE; |
2516 | break; | 3105 | break; |
2517 | case 0x23: /* mov from reg to dr */ | 3106 | case 0x23: /* mov from reg to dr */ |
2518 | if (c->modrm_mod != 3) | 3107 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && |
2519 | goto cannot_emulate; | 3108 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { |
2520 | rc = emulator_set_dr(ctxt, c->modrm_reg, | 3109 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
2521 | c->regs[c->modrm_rm]); | 3110 | goto done; |
2522 | if (rc) | 3111 | } |
2523 | goto cannot_emulate; | 3112 | emulator_set_dr(ctxt, c->modrm_reg, c->regs[c->modrm_rm]); |
2524 | c->dst.type = OP_NONE; /* no writeback */ | 3113 | c->dst.type = OP_NONE; /* no writeback */ |
2525 | break; | 3114 | break; |
2526 | case 0x30: | 3115 | case 0x30: |
2527 | /* wrmsr */ | 3116 | /* wrmsr */ |
2528 | msr_data = (u32)c->regs[VCPU_REGS_RAX] | 3117 | msr_data = (u32)c->regs[VCPU_REGS_RAX] |
2529 | | ((u64)c->regs[VCPU_REGS_RDX] << 32); | 3118 | | ((u64)c->regs[VCPU_REGS_RDX] << 32); |
2530 | rc = kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data); | 3119 | if (kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { |
2531 | if (rc) { | ||
2532 | kvm_inject_gp(ctxt->vcpu, 0); | 3120 | kvm_inject_gp(ctxt->vcpu, 0); |
2533 | c->eip = kvm_rip_read(ctxt->vcpu); | 3121 | goto done; |
2534 | } | 3122 | } |
2535 | rc = X86EMUL_CONTINUE; | 3123 | rc = X86EMUL_CONTINUE; |
2536 | c->dst.type = OP_NONE; | 3124 | c->dst.type = OP_NONE; |
2537 | break; | 3125 | break; |
2538 | case 0x32: | 3126 | case 0x32: |
2539 | /* rdmsr */ | 3127 | /* rdmsr */ |
2540 | rc = kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data); | 3128 | if (kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { |
2541 | if (rc) { | ||
2542 | kvm_inject_gp(ctxt->vcpu, 0); | 3129 | kvm_inject_gp(ctxt->vcpu, 0); |
2543 | c->eip = kvm_rip_read(ctxt->vcpu); | 3130 | goto done; |
2544 | } else { | 3131 | } else { |
2545 | c->regs[VCPU_REGS_RAX] = (u32)msr_data; | 3132 | c->regs[VCPU_REGS_RAX] = (u32)msr_data; |
2546 | c->regs[VCPU_REGS_RDX] = msr_data >> 32; | 3133 | c->regs[VCPU_REGS_RDX] = msr_data >> 32; |
@@ -2577,7 +3164,7 @@ twobyte_insn: | |||
2577 | break; | 3164 | break; |
2578 | case 0xa1: /* pop fs */ | 3165 | case 0xa1: /* pop fs */ |
2579 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); | 3166 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); |
2580 | if (rc != 0) | 3167 | if (rc != X86EMUL_CONTINUE) |
2581 | goto done; | 3168 | goto done; |
2582 | break; | 3169 | break; |
2583 | case 0xa3: | 3170 | case 0xa3: |
@@ -2596,7 +3183,7 @@ twobyte_insn: | |||
2596 | break; | 3183 | break; |
2597 | case 0xa9: /* pop gs */ | 3184 | case 0xa9: /* pop gs */ |
2598 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); | 3185 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); |
2599 | if (rc != 0) | 3186 | if (rc != X86EMUL_CONTINUE) |
2600 | goto done; | 3187 | goto done; |
2601 | break; | 3188 | break; |
2602 | case 0xab: | 3189 | case 0xab: |
@@ -2668,16 +3255,14 @@ twobyte_insn: | |||
2668 | (u64) c->src.val; | 3255 | (u64) c->src.val; |
2669 | break; | 3256 | break; |
2670 | case 0xc7: /* Grp9 (cmpxchg8b) */ | 3257 | case 0xc7: /* Grp9 (cmpxchg8b) */ |
2671 | rc = emulate_grp9(ctxt, ops, memop); | 3258 | rc = emulate_grp9(ctxt, ops); |
2672 | if (rc != 0) | 3259 | if (rc != X86EMUL_CONTINUE) |
2673 | goto done; | 3260 | goto done; |
2674 | c->dst.type = OP_NONE; | ||
2675 | break; | 3261 | break; |
2676 | } | 3262 | } |
2677 | goto writeback; | 3263 | goto writeback; |
2678 | 3264 | ||
2679 | cannot_emulate: | 3265 | cannot_emulate: |
2680 | DPRINTF("Cannot emulate %02x\n", c->b); | 3266 | DPRINTF("Cannot emulate %02x\n", c->b); |
2681 | c->eip = saved_eip; | ||
2682 | return -1; | 3267 | return -1; |
2683 | } | 3268 | } |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index a790fa128a9f..93825ff3338f 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -33,6 +33,29 @@ | |||
33 | #include <linux/kvm_host.h> | 33 | #include <linux/kvm_host.h> |
34 | #include "trace.h" | 34 | #include "trace.h" |
35 | 35 | ||
36 | static void pic_lock(struct kvm_pic *s) | ||
37 | __acquires(&s->lock) | ||
38 | { | ||
39 | raw_spin_lock(&s->lock); | ||
40 | } | ||
41 | |||
42 | static void pic_unlock(struct kvm_pic *s) | ||
43 | __releases(&s->lock) | ||
44 | { | ||
45 | bool wakeup = s->wakeup_needed; | ||
46 | struct kvm_vcpu *vcpu; | ||
47 | |||
48 | s->wakeup_needed = false; | ||
49 | |||
50 | raw_spin_unlock(&s->lock); | ||
51 | |||
52 | if (wakeup) { | ||
53 | vcpu = s->kvm->bsp_vcpu; | ||
54 | if (vcpu) | ||
55 | kvm_vcpu_kick(vcpu); | ||
56 | } | ||
57 | } | ||
58 | |||
36 | static void pic_clear_isr(struct kvm_kpic_state *s, int irq) | 59 | static void pic_clear_isr(struct kvm_kpic_state *s, int irq) |
37 | { | 60 | { |
38 | s->isr &= ~(1 << irq); | 61 | s->isr &= ~(1 << irq); |
@@ -45,19 +68,19 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq) | |||
45 | * Other interrupt may be delivered to PIC while lock is dropped but | 68 | * Other interrupt may be delivered to PIC while lock is dropped but |
46 | * it should be safe since PIC state is already updated at this stage. | 69 | * it should be safe since PIC state is already updated at this stage. |
47 | */ | 70 | */ |
48 | raw_spin_unlock(&s->pics_state->lock); | 71 | pic_unlock(s->pics_state); |
49 | kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); | 72 | kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); |
50 | raw_spin_lock(&s->pics_state->lock); | 73 | pic_lock(s->pics_state); |
51 | } | 74 | } |
52 | 75 | ||
53 | void kvm_pic_clear_isr_ack(struct kvm *kvm) | 76 | void kvm_pic_clear_isr_ack(struct kvm *kvm) |
54 | { | 77 | { |
55 | struct kvm_pic *s = pic_irqchip(kvm); | 78 | struct kvm_pic *s = pic_irqchip(kvm); |
56 | 79 | ||
57 | raw_spin_lock(&s->lock); | 80 | pic_lock(s); |
58 | s->pics[0].isr_ack = 0xff; | 81 | s->pics[0].isr_ack = 0xff; |
59 | s->pics[1].isr_ack = 0xff; | 82 | s->pics[1].isr_ack = 0xff; |
60 | raw_spin_unlock(&s->lock); | 83 | pic_unlock(s); |
61 | } | 84 | } |
62 | 85 | ||
63 | /* | 86 | /* |
@@ -158,9 +181,9 @@ static void pic_update_irq(struct kvm_pic *s) | |||
158 | 181 | ||
159 | void kvm_pic_update_irq(struct kvm_pic *s) | 182 | void kvm_pic_update_irq(struct kvm_pic *s) |
160 | { | 183 | { |
161 | raw_spin_lock(&s->lock); | 184 | pic_lock(s); |
162 | pic_update_irq(s); | 185 | pic_update_irq(s); |
163 | raw_spin_unlock(&s->lock); | 186 | pic_unlock(s); |
164 | } | 187 | } |
165 | 188 | ||
166 | int kvm_pic_set_irq(void *opaque, int irq, int level) | 189 | int kvm_pic_set_irq(void *opaque, int irq, int level) |
@@ -168,14 +191,14 @@ int kvm_pic_set_irq(void *opaque, int irq, int level) | |||
168 | struct kvm_pic *s = opaque; | 191 | struct kvm_pic *s = opaque; |
169 | int ret = -1; | 192 | int ret = -1; |
170 | 193 | ||
171 | raw_spin_lock(&s->lock); | 194 | pic_lock(s); |
172 | if (irq >= 0 && irq < PIC_NUM_PINS) { | 195 | if (irq >= 0 && irq < PIC_NUM_PINS) { |
173 | ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); | 196 | ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); |
174 | pic_update_irq(s); | 197 | pic_update_irq(s); |
175 | trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, | 198 | trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, |
176 | s->pics[irq >> 3].imr, ret == 0); | 199 | s->pics[irq >> 3].imr, ret == 0); |
177 | } | 200 | } |
178 | raw_spin_unlock(&s->lock); | 201 | pic_unlock(s); |
179 | 202 | ||
180 | return ret; | 203 | return ret; |
181 | } | 204 | } |
@@ -205,7 +228,7 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
205 | int irq, irq2, intno; | 228 | int irq, irq2, intno; |
206 | struct kvm_pic *s = pic_irqchip(kvm); | 229 | struct kvm_pic *s = pic_irqchip(kvm); |
207 | 230 | ||
208 | raw_spin_lock(&s->lock); | 231 | pic_lock(s); |
209 | irq = pic_get_irq(&s->pics[0]); | 232 | irq = pic_get_irq(&s->pics[0]); |
210 | if (irq >= 0) { | 233 | if (irq >= 0) { |
211 | pic_intack(&s->pics[0], irq); | 234 | pic_intack(&s->pics[0], irq); |
@@ -230,7 +253,7 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
230 | intno = s->pics[0].irq_base + irq; | 253 | intno = s->pics[0].irq_base + irq; |
231 | } | 254 | } |
232 | pic_update_irq(s); | 255 | pic_update_irq(s); |
233 | raw_spin_unlock(&s->lock); | 256 | pic_unlock(s); |
234 | 257 | ||
235 | return intno; | 258 | return intno; |
236 | } | 259 | } |
@@ -444,7 +467,7 @@ static int picdev_write(struct kvm_io_device *this, | |||
444 | printk(KERN_ERR "PIC: non byte write\n"); | 467 | printk(KERN_ERR "PIC: non byte write\n"); |
445 | return 0; | 468 | return 0; |
446 | } | 469 | } |
447 | raw_spin_lock(&s->lock); | 470 | pic_lock(s); |
448 | switch (addr) { | 471 | switch (addr) { |
449 | case 0x20: | 472 | case 0x20: |
450 | case 0x21: | 473 | case 0x21: |
@@ -457,7 +480,7 @@ static int picdev_write(struct kvm_io_device *this, | |||
457 | elcr_ioport_write(&s->pics[addr & 1], addr, data); | 480 | elcr_ioport_write(&s->pics[addr & 1], addr, data); |
458 | break; | 481 | break; |
459 | } | 482 | } |
460 | raw_spin_unlock(&s->lock); | 483 | pic_unlock(s); |
461 | return 0; | 484 | return 0; |
462 | } | 485 | } |
463 | 486 | ||
@@ -474,7 +497,7 @@ static int picdev_read(struct kvm_io_device *this, | |||
474 | printk(KERN_ERR "PIC: non byte read\n"); | 497 | printk(KERN_ERR "PIC: non byte read\n"); |
475 | return 0; | 498 | return 0; |
476 | } | 499 | } |
477 | raw_spin_lock(&s->lock); | 500 | pic_lock(s); |
478 | switch (addr) { | 501 | switch (addr) { |
479 | case 0x20: | 502 | case 0x20: |
480 | case 0x21: | 503 | case 0x21: |
@@ -488,7 +511,7 @@ static int picdev_read(struct kvm_io_device *this, | |||
488 | break; | 511 | break; |
489 | } | 512 | } |
490 | *(unsigned char *)val = data; | 513 | *(unsigned char *)val = data; |
491 | raw_spin_unlock(&s->lock); | 514 | pic_unlock(s); |
492 | return 0; | 515 | return 0; |
493 | } | 516 | } |
494 | 517 | ||
@@ -505,7 +528,7 @@ static void pic_irq_request(void *opaque, int level) | |||
505 | s->output = level; | 528 | s->output = level; |
506 | if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { | 529 | if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { |
507 | s->pics[0].isr_ack &= ~(1 << irq); | 530 | s->pics[0].isr_ack &= ~(1 << irq); |
508 | kvm_vcpu_kick(vcpu); | 531 | s->wakeup_needed = true; |
509 | } | 532 | } |
510 | } | 533 | } |
511 | 534 | ||
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 34b15915754d..cd1f362f413d 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
@@ -63,6 +63,7 @@ struct kvm_kpic_state { | |||
63 | 63 | ||
64 | struct kvm_pic { | 64 | struct kvm_pic { |
65 | raw_spinlock_t lock; | 65 | raw_spinlock_t lock; |
66 | bool wakeup_needed; | ||
66 | unsigned pending_acks; | 67 | unsigned pending_acks; |
67 | struct kvm *kvm; | 68 | struct kvm *kvm; |
68 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ | 69 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ |
diff --git a/arch/x86/kvm/kvm_timer.h b/arch/x86/kvm/kvm_timer.h index 55c7524dda54..64bc6ea78d90 100644 --- a/arch/x86/kvm/kvm_timer.h +++ b/arch/x86/kvm/kvm_timer.h | |||
@@ -10,9 +10,7 @@ struct kvm_timer { | |||
10 | }; | 10 | }; |
11 | 11 | ||
12 | struct kvm_timer_ops { | 12 | struct kvm_timer_ops { |
13 | bool (*is_periodic)(struct kvm_timer *); | 13 | bool (*is_periodic)(struct kvm_timer *); |
14 | }; | 14 | }; |
15 | 15 | ||
16 | |||
17 | enum hrtimer_restart kvm_timer_fn(struct hrtimer *data); | 16 | enum hrtimer_restart kvm_timer_fn(struct hrtimer *data); |
18 | |||
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 19a8906bcaa2..81563e76e28f 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -148,7 +148,6 @@ module_param(oos_shadow, bool, 0644); | |||
148 | 148 | ||
149 | #include <trace/events/kvm.h> | 149 | #include <trace/events/kvm.h> |
150 | 150 | ||
151 | #undef TRACE_INCLUDE_FILE | ||
152 | #define CREATE_TRACE_POINTS | 151 | #define CREATE_TRACE_POINTS |
153 | #include "mmutrace.h" | 152 | #include "mmutrace.h" |
154 | 153 | ||
@@ -174,12 +173,7 @@ struct kvm_shadow_walk_iterator { | |||
174 | shadow_walk_okay(&(_walker)); \ | 173 | shadow_walk_okay(&(_walker)); \ |
175 | shadow_walk_next(&(_walker))) | 174 | shadow_walk_next(&(_walker))) |
176 | 175 | ||
177 | 176 | typedef int (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp); | |
178 | struct kvm_unsync_walk { | ||
179 | int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk); | ||
180 | }; | ||
181 | |||
182 | typedef int (*mmu_parent_walk_fn) (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp); | ||
183 | 177 | ||
184 | static struct kmem_cache *pte_chain_cache; | 178 | static struct kmem_cache *pte_chain_cache; |
185 | static struct kmem_cache *rmap_desc_cache; | 179 | static struct kmem_cache *rmap_desc_cache; |
@@ -223,7 +217,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | |||
223 | } | 217 | } |
224 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); | 218 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); |
225 | 219 | ||
226 | static int is_write_protection(struct kvm_vcpu *vcpu) | 220 | static bool is_write_protection(struct kvm_vcpu *vcpu) |
227 | { | 221 | { |
228 | return kvm_read_cr0_bits(vcpu, X86_CR0_WP); | 222 | return kvm_read_cr0_bits(vcpu, X86_CR0_WP); |
229 | } | 223 | } |
@@ -327,7 +321,6 @@ static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, | |||
327 | page = alloc_page(GFP_KERNEL); | 321 | page = alloc_page(GFP_KERNEL); |
328 | if (!page) | 322 | if (!page) |
329 | return -ENOMEM; | 323 | return -ENOMEM; |
330 | set_page_private(page, 0); | ||
331 | cache->objects[cache->nobjs++] = page_address(page); | 324 | cache->objects[cache->nobjs++] = page_address(page); |
332 | } | 325 | } |
333 | return 0; | 326 | return 0; |
@@ -438,9 +431,9 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) | |||
438 | int i; | 431 | int i; |
439 | 432 | ||
440 | gfn = unalias_gfn(kvm, gfn); | 433 | gfn = unalias_gfn(kvm, gfn); |
434 | slot = gfn_to_memslot_unaliased(kvm, gfn); | ||
441 | for (i = PT_DIRECTORY_LEVEL; | 435 | for (i = PT_DIRECTORY_LEVEL; |
442 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 436 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { |
443 | slot = gfn_to_memslot_unaliased(kvm, gfn); | ||
444 | write_count = slot_largepage_idx(gfn, slot, i); | 437 | write_count = slot_largepage_idx(gfn, slot, i); |
445 | *write_count -= 1; | 438 | *write_count -= 1; |
446 | WARN_ON(*write_count < 0); | 439 | WARN_ON(*write_count < 0); |
@@ -654,7 +647,6 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
654 | static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) | 647 | static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) |
655 | { | 648 | { |
656 | struct kvm_rmap_desc *desc; | 649 | struct kvm_rmap_desc *desc; |
657 | struct kvm_rmap_desc *prev_desc; | ||
658 | u64 *prev_spte; | 650 | u64 *prev_spte; |
659 | int i; | 651 | int i; |
660 | 652 | ||
@@ -666,7 +658,6 @@ static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) | |||
666 | return NULL; | 658 | return NULL; |
667 | } | 659 | } |
668 | desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); | 660 | desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); |
669 | prev_desc = NULL; | ||
670 | prev_spte = NULL; | 661 | prev_spte = NULL; |
671 | while (desc) { | 662 | while (desc) { |
672 | for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i) { | 663 | for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i) { |
@@ -794,7 +785,7 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
794 | int retval = 0; | 785 | int retval = 0; |
795 | struct kvm_memslots *slots; | 786 | struct kvm_memslots *slots; |
796 | 787 | ||
797 | slots = rcu_dereference(kvm->memslots); | 788 | slots = kvm_memslots(kvm); |
798 | 789 | ||
799 | for (i = 0; i < slots->nmemslots; i++) { | 790 | for (i = 0; i < slots->nmemslots; i++) { |
800 | struct kvm_memory_slot *memslot = &slots->memslots[i]; | 791 | struct kvm_memory_slot *memslot = &slots->memslots[i]; |
@@ -925,7 +916,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | |||
925 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); | 916 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); |
926 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); | 917 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); |
927 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); | 918 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); |
928 | INIT_LIST_HEAD(&sp->oos_link); | ||
929 | bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); | 919 | bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); |
930 | sp->multimapped = 0; | 920 | sp->multimapped = 0; |
931 | sp->parent_pte = parent_pte; | 921 | sp->parent_pte = parent_pte; |
@@ -1009,8 +999,7 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp, | |||
1009 | } | 999 | } |
1010 | 1000 | ||
1011 | 1001 | ||
1012 | static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 1002 | static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn) |
1013 | mmu_parent_walk_fn fn) | ||
1014 | { | 1003 | { |
1015 | struct kvm_pte_chain *pte_chain; | 1004 | struct kvm_pte_chain *pte_chain; |
1016 | struct hlist_node *node; | 1005 | struct hlist_node *node; |
@@ -1019,8 +1008,8 @@ static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
1019 | 1008 | ||
1020 | if (!sp->multimapped && sp->parent_pte) { | 1009 | if (!sp->multimapped && sp->parent_pte) { |
1021 | parent_sp = page_header(__pa(sp->parent_pte)); | 1010 | parent_sp = page_header(__pa(sp->parent_pte)); |
1022 | fn(vcpu, parent_sp); | 1011 | fn(parent_sp); |
1023 | mmu_parent_walk(vcpu, parent_sp, fn); | 1012 | mmu_parent_walk(parent_sp, fn); |
1024 | return; | 1013 | return; |
1025 | } | 1014 | } |
1026 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) | 1015 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) |
@@ -1028,8 +1017,8 @@ static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
1028 | if (!pte_chain->parent_ptes[i]) | 1017 | if (!pte_chain->parent_ptes[i]) |
1029 | break; | 1018 | break; |
1030 | parent_sp = page_header(__pa(pte_chain->parent_ptes[i])); | 1019 | parent_sp = page_header(__pa(pte_chain->parent_ptes[i])); |
1031 | fn(vcpu, parent_sp); | 1020 | fn(parent_sp); |
1032 | mmu_parent_walk(vcpu, parent_sp, fn); | 1021 | mmu_parent_walk(parent_sp, fn); |
1033 | } | 1022 | } |
1034 | } | 1023 | } |
1035 | 1024 | ||
@@ -1066,16 +1055,15 @@ static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) | |||
1066 | } | 1055 | } |
1067 | } | 1056 | } |
1068 | 1057 | ||
1069 | static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 1058 | static int unsync_walk_fn(struct kvm_mmu_page *sp) |
1070 | { | 1059 | { |
1071 | kvm_mmu_update_parents_unsync(sp); | 1060 | kvm_mmu_update_parents_unsync(sp); |
1072 | return 1; | 1061 | return 1; |
1073 | } | 1062 | } |
1074 | 1063 | ||
1075 | static void kvm_mmu_mark_parents_unsync(struct kvm_vcpu *vcpu, | 1064 | static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) |
1076 | struct kvm_mmu_page *sp) | ||
1077 | { | 1065 | { |
1078 | mmu_parent_walk(vcpu, sp, unsync_walk_fn); | 1066 | mmu_parent_walk(sp, unsync_walk_fn); |
1079 | kvm_mmu_update_parents_unsync(sp); | 1067 | kvm_mmu_update_parents_unsync(sp); |
1080 | } | 1068 | } |
1081 | 1069 | ||
@@ -1201,6 +1189,7 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) | |||
1201 | static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) | 1189 | static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) |
1202 | { | 1190 | { |
1203 | WARN_ON(!sp->unsync); | 1191 | WARN_ON(!sp->unsync); |
1192 | trace_kvm_mmu_sync_page(sp); | ||
1204 | sp->unsync = 0; | 1193 | sp->unsync = 0; |
1205 | --kvm->stat.mmu_unsync; | 1194 | --kvm->stat.mmu_unsync; |
1206 | } | 1195 | } |
@@ -1209,12 +1198,11 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp); | |||
1209 | 1198 | ||
1210 | static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 1199 | static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) |
1211 | { | 1200 | { |
1212 | if (sp->role.glevels != vcpu->arch.mmu.root_level) { | 1201 | if (sp->role.cr4_pae != !!is_pae(vcpu)) { |
1213 | kvm_mmu_zap_page(vcpu->kvm, sp); | 1202 | kvm_mmu_zap_page(vcpu->kvm, sp); |
1214 | return 1; | 1203 | return 1; |
1215 | } | 1204 | } |
1216 | 1205 | ||
1217 | trace_kvm_mmu_sync_page(sp); | ||
1218 | if (rmap_write_protect(vcpu->kvm, sp->gfn)) | 1206 | if (rmap_write_protect(vcpu->kvm, sp->gfn)) |
1219 | kvm_flush_remote_tlbs(vcpu->kvm); | 1207 | kvm_flush_remote_tlbs(vcpu->kvm); |
1220 | kvm_unlink_unsync_page(vcpu->kvm, sp); | 1208 | kvm_unlink_unsync_page(vcpu->kvm, sp); |
@@ -1331,6 +1319,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
1331 | role = vcpu->arch.mmu.base_role; | 1319 | role = vcpu->arch.mmu.base_role; |
1332 | role.level = level; | 1320 | role.level = level; |
1333 | role.direct = direct; | 1321 | role.direct = direct; |
1322 | if (role.direct) | ||
1323 | role.cr4_pae = 0; | ||
1334 | role.access = access; | 1324 | role.access = access; |
1335 | if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { | 1325 | if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { |
1336 | quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); | 1326 | quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); |
@@ -1351,7 +1341,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
1351 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); | 1341 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); |
1352 | if (sp->unsync_children) { | 1342 | if (sp->unsync_children) { |
1353 | set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); | 1343 | set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); |
1354 | kvm_mmu_mark_parents_unsync(vcpu, sp); | 1344 | kvm_mmu_mark_parents_unsync(sp); |
1355 | } | 1345 | } |
1356 | trace_kvm_mmu_get_page(sp, false); | 1346 | trace_kvm_mmu_get_page(sp, false); |
1357 | return sp; | 1347 | return sp; |
@@ -1573,13 +1563,14 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | |||
1573 | r = 0; | 1563 | r = 0; |
1574 | index = kvm_page_table_hashfn(gfn); | 1564 | index = kvm_page_table_hashfn(gfn); |
1575 | bucket = &kvm->arch.mmu_page_hash[index]; | 1565 | bucket = &kvm->arch.mmu_page_hash[index]; |
1566 | restart: | ||
1576 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) | 1567 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) |
1577 | if (sp->gfn == gfn && !sp->role.direct) { | 1568 | if (sp->gfn == gfn && !sp->role.direct) { |
1578 | pgprintk("%s: gfn %lx role %x\n", __func__, gfn, | 1569 | pgprintk("%s: gfn %lx role %x\n", __func__, gfn, |
1579 | sp->role.word); | 1570 | sp->role.word); |
1580 | r = 1; | 1571 | r = 1; |
1581 | if (kvm_mmu_zap_page(kvm, sp)) | 1572 | if (kvm_mmu_zap_page(kvm, sp)) |
1582 | n = bucket->first; | 1573 | goto restart; |
1583 | } | 1574 | } |
1584 | return r; | 1575 | return r; |
1585 | } | 1576 | } |
@@ -1593,13 +1584,14 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) | |||
1593 | 1584 | ||
1594 | index = kvm_page_table_hashfn(gfn); | 1585 | index = kvm_page_table_hashfn(gfn); |
1595 | bucket = &kvm->arch.mmu_page_hash[index]; | 1586 | bucket = &kvm->arch.mmu_page_hash[index]; |
1587 | restart: | ||
1596 | hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) { | 1588 | hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) { |
1597 | if (sp->gfn == gfn && !sp->role.direct | 1589 | if (sp->gfn == gfn && !sp->role.direct |
1598 | && !sp->role.invalid) { | 1590 | && !sp->role.invalid) { |
1599 | pgprintk("%s: zap %lx %x\n", | 1591 | pgprintk("%s: zap %lx %x\n", |
1600 | __func__, gfn, sp->role.word); | 1592 | __func__, gfn, sp->role.word); |
1601 | if (kvm_mmu_zap_page(kvm, sp)) | 1593 | if (kvm_mmu_zap_page(kvm, sp)) |
1602 | nn = bucket->first; | 1594 | goto restart; |
1603 | } | 1595 | } |
1604 | } | 1596 | } |
1605 | } | 1597 | } |
@@ -1626,20 +1618,6 @@ static void mmu_convert_notrap(struct kvm_mmu_page *sp) | |||
1626 | } | 1618 | } |
1627 | } | 1619 | } |
1628 | 1620 | ||
1629 | struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) | ||
1630 | { | ||
1631 | struct page *page; | ||
1632 | |||
1633 | gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); | ||
1634 | |||
1635 | if (gpa == UNMAPPED_GVA) | ||
1636 | return NULL; | ||
1637 | |||
1638 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); | ||
1639 | |||
1640 | return page; | ||
1641 | } | ||
1642 | |||
1643 | /* | 1621 | /* |
1644 | * The function is based on mtrr_type_lookup() in | 1622 | * The function is based on mtrr_type_lookup() in |
1645 | * arch/x86/kernel/cpu/mtrr/generic.c | 1623 | * arch/x86/kernel/cpu/mtrr/generic.c |
@@ -1752,7 +1730,6 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
1752 | struct kvm_mmu_page *s; | 1730 | struct kvm_mmu_page *s; |
1753 | struct hlist_node *node, *n; | 1731 | struct hlist_node *node, *n; |
1754 | 1732 | ||
1755 | trace_kvm_mmu_unsync_page(sp); | ||
1756 | index = kvm_page_table_hashfn(sp->gfn); | 1733 | index = kvm_page_table_hashfn(sp->gfn); |
1757 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | 1734 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; |
1758 | /* don't unsync if pagetable is shadowed with multiple roles */ | 1735 | /* don't unsync if pagetable is shadowed with multiple roles */ |
@@ -1762,10 +1739,11 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
1762 | if (s->role.word != sp->role.word) | 1739 | if (s->role.word != sp->role.word) |
1763 | return 1; | 1740 | return 1; |
1764 | } | 1741 | } |
1742 | trace_kvm_mmu_unsync_page(sp); | ||
1765 | ++vcpu->kvm->stat.mmu_unsync; | 1743 | ++vcpu->kvm->stat.mmu_unsync; |
1766 | sp->unsync = 1; | 1744 | sp->unsync = 1; |
1767 | 1745 | ||
1768 | kvm_mmu_mark_parents_unsync(vcpu, sp); | 1746 | kvm_mmu_mark_parents_unsync(sp); |
1769 | 1747 | ||
1770 | mmu_convert_notrap(sp); | 1748 | mmu_convert_notrap(sp); |
1771 | return 0; | 1749 | return 0; |
@@ -2081,21 +2059,23 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
2081 | hpa_t root = vcpu->arch.mmu.root_hpa; | 2059 | hpa_t root = vcpu->arch.mmu.root_hpa; |
2082 | 2060 | ||
2083 | ASSERT(!VALID_PAGE(root)); | 2061 | ASSERT(!VALID_PAGE(root)); |
2084 | if (tdp_enabled) | ||
2085 | direct = 1; | ||
2086 | if (mmu_check_root(vcpu, root_gfn)) | 2062 | if (mmu_check_root(vcpu, root_gfn)) |
2087 | return 1; | 2063 | return 1; |
2064 | if (tdp_enabled) { | ||
2065 | direct = 1; | ||
2066 | root_gfn = 0; | ||
2067 | } | ||
2068 | spin_lock(&vcpu->kvm->mmu_lock); | ||
2088 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, | 2069 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, |
2089 | PT64_ROOT_LEVEL, direct, | 2070 | PT64_ROOT_LEVEL, direct, |
2090 | ACC_ALL, NULL); | 2071 | ACC_ALL, NULL); |
2091 | root = __pa(sp->spt); | 2072 | root = __pa(sp->spt); |
2092 | ++sp->root_count; | 2073 | ++sp->root_count; |
2074 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
2093 | vcpu->arch.mmu.root_hpa = root; | 2075 | vcpu->arch.mmu.root_hpa = root; |
2094 | return 0; | 2076 | return 0; |
2095 | } | 2077 | } |
2096 | direct = !is_paging(vcpu); | 2078 | direct = !is_paging(vcpu); |
2097 | if (tdp_enabled) | ||
2098 | direct = 1; | ||
2099 | for (i = 0; i < 4; ++i) { | 2079 | for (i = 0; i < 4; ++i) { |
2100 | hpa_t root = vcpu->arch.mmu.pae_root[i]; | 2080 | hpa_t root = vcpu->arch.mmu.pae_root[i]; |
2101 | 2081 | ||
@@ -2111,11 +2091,18 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
2111 | root_gfn = 0; | 2091 | root_gfn = 0; |
2112 | if (mmu_check_root(vcpu, root_gfn)) | 2092 | if (mmu_check_root(vcpu, root_gfn)) |
2113 | return 1; | 2093 | return 1; |
2094 | if (tdp_enabled) { | ||
2095 | direct = 1; | ||
2096 | root_gfn = i << 30; | ||
2097 | } | ||
2098 | spin_lock(&vcpu->kvm->mmu_lock); | ||
2114 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, | 2099 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, |
2115 | PT32_ROOT_LEVEL, direct, | 2100 | PT32_ROOT_LEVEL, direct, |
2116 | ACC_ALL, NULL); | 2101 | ACC_ALL, NULL); |
2117 | root = __pa(sp->spt); | 2102 | root = __pa(sp->spt); |
2118 | ++sp->root_count; | 2103 | ++sp->root_count; |
2104 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
2105 | |||
2119 | vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; | 2106 | vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; |
2120 | } | 2107 | } |
2121 | vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); | 2108 | vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); |
@@ -2299,13 +2286,19 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) | |||
2299 | /* no rsvd bits for 2 level 4K page table entries */ | 2286 | /* no rsvd bits for 2 level 4K page table entries */ |
2300 | context->rsvd_bits_mask[0][1] = 0; | 2287 | context->rsvd_bits_mask[0][1] = 0; |
2301 | context->rsvd_bits_mask[0][0] = 0; | 2288 | context->rsvd_bits_mask[0][0] = 0; |
2289 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; | ||
2290 | |||
2291 | if (!is_pse(vcpu)) { | ||
2292 | context->rsvd_bits_mask[1][1] = 0; | ||
2293 | break; | ||
2294 | } | ||
2295 | |||
2302 | if (is_cpuid_PSE36()) | 2296 | if (is_cpuid_PSE36()) |
2303 | /* 36bits PSE 4MB page */ | 2297 | /* 36bits PSE 4MB page */ |
2304 | context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); | 2298 | context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); |
2305 | else | 2299 | else |
2306 | /* 32 bits PSE 4MB page */ | 2300 | /* 32 bits PSE 4MB page */ |
2307 | context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); | 2301 | context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); |
2308 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; | ||
2309 | break; | 2302 | break; |
2310 | case PT32E_ROOT_LEVEL: | 2303 | case PT32E_ROOT_LEVEL: |
2311 | context->rsvd_bits_mask[0][2] = | 2304 | context->rsvd_bits_mask[0][2] = |
@@ -2318,7 +2311,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) | |||
2318 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | | 2311 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | |
2319 | rsvd_bits(maxphyaddr, 62) | | 2312 | rsvd_bits(maxphyaddr, 62) | |
2320 | rsvd_bits(13, 20); /* large page */ | 2313 | rsvd_bits(13, 20); /* large page */ |
2321 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; | 2314 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; |
2322 | break; | 2315 | break; |
2323 | case PT64_ROOT_LEVEL: | 2316 | case PT64_ROOT_LEVEL: |
2324 | context->rsvd_bits_mask[0][3] = exb_bit_rsvd | | 2317 | context->rsvd_bits_mask[0][3] = exb_bit_rsvd | |
@@ -2336,7 +2329,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) | |||
2336 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | | 2329 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | |
2337 | rsvd_bits(maxphyaddr, 51) | | 2330 | rsvd_bits(maxphyaddr, 51) | |
2338 | rsvd_bits(13, 20); /* large page */ | 2331 | rsvd_bits(13, 20); /* large page */ |
2339 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; | 2332 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; |
2340 | break; | 2333 | break; |
2341 | } | 2334 | } |
2342 | } | 2335 | } |
@@ -2438,7 +2431,8 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu) | |||
2438 | else | 2431 | else |
2439 | r = paging32_init_context(vcpu); | 2432 | r = paging32_init_context(vcpu); |
2440 | 2433 | ||
2441 | vcpu->arch.mmu.base_role.glevels = vcpu->arch.mmu.root_level; | 2434 | vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); |
2435 | vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); | ||
2442 | 2436 | ||
2443 | return r; | 2437 | return r; |
2444 | } | 2438 | } |
@@ -2478,7 +2472,9 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) | |||
2478 | goto out; | 2472 | goto out; |
2479 | spin_lock(&vcpu->kvm->mmu_lock); | 2473 | spin_lock(&vcpu->kvm->mmu_lock); |
2480 | kvm_mmu_free_some_pages(vcpu); | 2474 | kvm_mmu_free_some_pages(vcpu); |
2475 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
2481 | r = mmu_alloc_roots(vcpu); | 2476 | r = mmu_alloc_roots(vcpu); |
2477 | spin_lock(&vcpu->kvm->mmu_lock); | ||
2482 | mmu_sync_roots(vcpu); | 2478 | mmu_sync_roots(vcpu); |
2483 | spin_unlock(&vcpu->kvm->mmu_lock); | 2479 | spin_unlock(&vcpu->kvm->mmu_lock); |
2484 | if (r) | 2480 | if (r) |
@@ -2527,7 +2523,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | |||
2527 | } | 2523 | } |
2528 | 2524 | ||
2529 | ++vcpu->kvm->stat.mmu_pte_updated; | 2525 | ++vcpu->kvm->stat.mmu_pte_updated; |
2530 | if (sp->role.glevels == PT32_ROOT_LEVEL) | 2526 | if (!sp->role.cr4_pae) |
2531 | paging32_update_pte(vcpu, sp, spte, new); | 2527 | paging32_update_pte(vcpu, sp, spte, new); |
2532 | else | 2528 | else |
2533 | paging64_update_pte(vcpu, sp, spte, new); | 2529 | paging64_update_pte(vcpu, sp, spte, new); |
@@ -2562,36 +2558,11 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu) | |||
2562 | } | 2558 | } |
2563 | 2559 | ||
2564 | static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 2560 | static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
2565 | const u8 *new, int bytes) | 2561 | u64 gpte) |
2566 | { | 2562 | { |
2567 | gfn_t gfn; | 2563 | gfn_t gfn; |
2568 | int r; | ||
2569 | u64 gpte = 0; | ||
2570 | pfn_t pfn; | 2564 | pfn_t pfn; |
2571 | 2565 | ||
2572 | if (bytes != 4 && bytes != 8) | ||
2573 | return; | ||
2574 | |||
2575 | /* | ||
2576 | * Assume that the pte write on a page table of the same type | ||
2577 | * as the current vcpu paging mode. This is nearly always true | ||
2578 | * (might be false while changing modes). Note it is verified later | ||
2579 | * by update_pte(). | ||
2580 | */ | ||
2581 | if (is_pae(vcpu)) { | ||
2582 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ | ||
2583 | if ((bytes == 4) && (gpa % 4 == 0)) { | ||
2584 | r = kvm_read_guest(vcpu->kvm, gpa & ~(u64)7, &gpte, 8); | ||
2585 | if (r) | ||
2586 | return; | ||
2587 | memcpy((void *)&gpte + (gpa % 8), new, 4); | ||
2588 | } else if ((bytes == 8) && (gpa % 8 == 0)) { | ||
2589 | memcpy((void *)&gpte, new, 8); | ||
2590 | } | ||
2591 | } else { | ||
2592 | if ((bytes == 4) && (gpa % 4 == 0)) | ||
2593 | memcpy((void *)&gpte, new, 4); | ||
2594 | } | ||
2595 | if (!is_present_gpte(gpte)) | 2566 | if (!is_present_gpte(gpte)) |
2596 | return; | 2567 | return; |
2597 | gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; | 2568 | gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; |
@@ -2640,10 +2611,46 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
2640 | int flooded = 0; | 2611 | int flooded = 0; |
2641 | int npte; | 2612 | int npte; |
2642 | int r; | 2613 | int r; |
2614 | int invlpg_counter; | ||
2643 | 2615 | ||
2644 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); | 2616 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); |
2645 | mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes); | 2617 | |
2618 | invlpg_counter = atomic_read(&vcpu->kvm->arch.invlpg_counter); | ||
2619 | |||
2620 | /* | ||
2621 | * Assume that the pte write on a page table of the same type | ||
2622 | * as the current vcpu paging mode. This is nearly always true | ||
2623 | * (might be false while changing modes). Note it is verified later | ||
2624 | * by update_pte(). | ||
2625 | */ | ||
2626 | if ((is_pae(vcpu) && bytes == 4) || !new) { | ||
2627 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ | ||
2628 | if (is_pae(vcpu)) { | ||
2629 | gpa &= ~(gpa_t)7; | ||
2630 | bytes = 8; | ||
2631 | } | ||
2632 | r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8)); | ||
2633 | if (r) | ||
2634 | gentry = 0; | ||
2635 | new = (const u8 *)&gentry; | ||
2636 | } | ||
2637 | |||
2638 | switch (bytes) { | ||
2639 | case 4: | ||
2640 | gentry = *(const u32 *)new; | ||
2641 | break; | ||
2642 | case 8: | ||
2643 | gentry = *(const u64 *)new; | ||
2644 | break; | ||
2645 | default: | ||
2646 | gentry = 0; | ||
2647 | break; | ||
2648 | } | ||
2649 | |||
2650 | mmu_guess_page_from_pte_write(vcpu, gpa, gentry); | ||
2646 | spin_lock(&vcpu->kvm->mmu_lock); | 2651 | spin_lock(&vcpu->kvm->mmu_lock); |
2652 | if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) | ||
2653 | gentry = 0; | ||
2647 | kvm_mmu_access_page(vcpu, gfn); | 2654 | kvm_mmu_access_page(vcpu, gfn); |
2648 | kvm_mmu_free_some_pages(vcpu); | 2655 | kvm_mmu_free_some_pages(vcpu); |
2649 | ++vcpu->kvm->stat.mmu_pte_write; | 2656 | ++vcpu->kvm->stat.mmu_pte_write; |
@@ -2662,10 +2669,12 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
2662 | } | 2669 | } |
2663 | index = kvm_page_table_hashfn(gfn); | 2670 | index = kvm_page_table_hashfn(gfn); |
2664 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | 2671 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; |
2672 | |||
2673 | restart: | ||
2665 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { | 2674 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { |
2666 | if (sp->gfn != gfn || sp->role.direct || sp->role.invalid) | 2675 | if (sp->gfn != gfn || sp->role.direct || sp->role.invalid) |
2667 | continue; | 2676 | continue; |
2668 | pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8; | 2677 | pte_size = sp->role.cr4_pae ? 8 : 4; |
2669 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); | 2678 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); |
2670 | misaligned |= bytes < 4; | 2679 | misaligned |= bytes < 4; |
2671 | if (misaligned || flooded) { | 2680 | if (misaligned || flooded) { |
@@ -2682,14 +2691,14 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
2682 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", | 2691 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", |
2683 | gpa, bytes, sp->role.word); | 2692 | gpa, bytes, sp->role.word); |
2684 | if (kvm_mmu_zap_page(vcpu->kvm, sp)) | 2693 | if (kvm_mmu_zap_page(vcpu->kvm, sp)) |
2685 | n = bucket->first; | 2694 | goto restart; |
2686 | ++vcpu->kvm->stat.mmu_flooded; | 2695 | ++vcpu->kvm->stat.mmu_flooded; |
2687 | continue; | 2696 | continue; |
2688 | } | 2697 | } |
2689 | page_offset = offset; | 2698 | page_offset = offset; |
2690 | level = sp->role.level; | 2699 | level = sp->role.level; |
2691 | npte = 1; | 2700 | npte = 1; |
2692 | if (sp->role.glevels == PT32_ROOT_LEVEL) { | 2701 | if (!sp->role.cr4_pae) { |
2693 | page_offset <<= 1; /* 32->64 */ | 2702 | page_offset <<= 1; /* 32->64 */ |
2694 | /* | 2703 | /* |
2695 | * A 32-bit pde maps 4MB while the shadow pdes map | 2704 | * A 32-bit pde maps 4MB while the shadow pdes map |
@@ -2707,20 +2716,11 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
2707 | continue; | 2716 | continue; |
2708 | } | 2717 | } |
2709 | spte = &sp->spt[page_offset / sizeof(*spte)]; | 2718 | spte = &sp->spt[page_offset / sizeof(*spte)]; |
2710 | if ((gpa & (pte_size - 1)) || (bytes < pte_size)) { | ||
2711 | gentry = 0; | ||
2712 | r = kvm_read_guest_atomic(vcpu->kvm, | ||
2713 | gpa & ~(u64)(pte_size - 1), | ||
2714 | &gentry, pte_size); | ||
2715 | new = (const void *)&gentry; | ||
2716 | if (r < 0) | ||
2717 | new = NULL; | ||
2718 | } | ||
2719 | while (npte--) { | 2719 | while (npte--) { |
2720 | entry = *spte; | 2720 | entry = *spte; |
2721 | mmu_pte_write_zap_pte(vcpu, sp, spte); | 2721 | mmu_pte_write_zap_pte(vcpu, sp, spte); |
2722 | if (new) | 2722 | if (gentry) |
2723 | mmu_pte_write_new_pte(vcpu, sp, spte, new); | 2723 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); |
2724 | mmu_pte_write_flush_tlb(vcpu, entry, *spte); | 2724 | mmu_pte_write_flush_tlb(vcpu, entry, *spte); |
2725 | ++spte; | 2725 | ++spte; |
2726 | } | 2726 | } |
@@ -2900,22 +2900,23 @@ void kvm_mmu_zap_all(struct kvm *kvm) | |||
2900 | struct kvm_mmu_page *sp, *node; | 2900 | struct kvm_mmu_page *sp, *node; |
2901 | 2901 | ||
2902 | spin_lock(&kvm->mmu_lock); | 2902 | spin_lock(&kvm->mmu_lock); |
2903 | restart: | ||
2903 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) | 2904 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) |
2904 | if (kvm_mmu_zap_page(kvm, sp)) | 2905 | if (kvm_mmu_zap_page(kvm, sp)) |
2905 | node = container_of(kvm->arch.active_mmu_pages.next, | 2906 | goto restart; |
2906 | struct kvm_mmu_page, link); | 2907 | |
2907 | spin_unlock(&kvm->mmu_lock); | 2908 | spin_unlock(&kvm->mmu_lock); |
2908 | 2909 | ||
2909 | kvm_flush_remote_tlbs(kvm); | 2910 | kvm_flush_remote_tlbs(kvm); |
2910 | } | 2911 | } |
2911 | 2912 | ||
2912 | static void kvm_mmu_remove_one_alloc_mmu_page(struct kvm *kvm) | 2913 | static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm) |
2913 | { | 2914 | { |
2914 | struct kvm_mmu_page *page; | 2915 | struct kvm_mmu_page *page; |
2915 | 2916 | ||
2916 | page = container_of(kvm->arch.active_mmu_pages.prev, | 2917 | page = container_of(kvm->arch.active_mmu_pages.prev, |
2917 | struct kvm_mmu_page, link); | 2918 | struct kvm_mmu_page, link); |
2918 | kvm_mmu_zap_page(kvm, page); | 2919 | return kvm_mmu_zap_page(kvm, page) + 1; |
2919 | } | 2920 | } |
2920 | 2921 | ||
2921 | static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) | 2922 | static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) |
@@ -2927,7 +2928,7 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) | |||
2927 | spin_lock(&kvm_lock); | 2928 | spin_lock(&kvm_lock); |
2928 | 2929 | ||
2929 | list_for_each_entry(kvm, &vm_list, vm_list) { | 2930 | list_for_each_entry(kvm, &vm_list, vm_list) { |
2930 | int npages, idx; | 2931 | int npages, idx, freed_pages; |
2931 | 2932 | ||
2932 | idx = srcu_read_lock(&kvm->srcu); | 2933 | idx = srcu_read_lock(&kvm->srcu); |
2933 | spin_lock(&kvm->mmu_lock); | 2934 | spin_lock(&kvm->mmu_lock); |
@@ -2935,8 +2936,8 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) | |||
2935 | kvm->arch.n_free_mmu_pages; | 2936 | kvm->arch.n_free_mmu_pages; |
2936 | cache_count += npages; | 2937 | cache_count += npages; |
2937 | if (!kvm_freed && nr_to_scan > 0 && npages > 0) { | 2938 | if (!kvm_freed && nr_to_scan > 0 && npages > 0) { |
2938 | kvm_mmu_remove_one_alloc_mmu_page(kvm); | 2939 | freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm); |
2939 | cache_count--; | 2940 | cache_count -= freed_pages; |
2940 | kvm_freed = kvm; | 2941 | kvm_freed = kvm; |
2941 | } | 2942 | } |
2942 | nr_to_scan--; | 2943 | nr_to_scan--; |
@@ -3011,7 +3012,8 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) | |||
3011 | unsigned int nr_pages = 0; | 3012 | unsigned int nr_pages = 0; |
3012 | struct kvm_memslots *slots; | 3013 | struct kvm_memslots *slots; |
3013 | 3014 | ||
3014 | slots = rcu_dereference(kvm->memslots); | 3015 | slots = kvm_memslots(kvm); |
3016 | |||
3015 | for (i = 0; i < slots->nmemslots; i++) | 3017 | for (i = 0; i < slots->nmemslots; i++) |
3016 | nr_pages += slots->memslots[i].npages; | 3018 | nr_pages += slots->memslots[i].npages; |
3017 | 3019 | ||
@@ -3174,8 +3176,7 @@ static gva_t canonicalize(gva_t gva) | |||
3174 | } | 3176 | } |
3175 | 3177 | ||
3176 | 3178 | ||
3177 | typedef void (*inspect_spte_fn) (struct kvm *kvm, struct kvm_mmu_page *sp, | 3179 | typedef void (*inspect_spte_fn) (struct kvm *kvm, u64 *sptep); |
3178 | u64 *sptep); | ||
3179 | 3180 | ||
3180 | static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp, | 3181 | static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp, |
3181 | inspect_spte_fn fn) | 3182 | inspect_spte_fn fn) |
@@ -3191,7 +3192,7 @@ static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp, | |||
3191 | child = page_header(ent & PT64_BASE_ADDR_MASK); | 3192 | child = page_header(ent & PT64_BASE_ADDR_MASK); |
3192 | __mmu_spte_walk(kvm, child, fn); | 3193 | __mmu_spte_walk(kvm, child, fn); |
3193 | } else | 3194 | } else |
3194 | fn(kvm, sp, &sp->spt[i]); | 3195 | fn(kvm, &sp->spt[i]); |
3195 | } | 3196 | } |
3196 | } | 3197 | } |
3197 | } | 3198 | } |
@@ -3282,11 +3283,13 @@ static void audit_mappings(struct kvm_vcpu *vcpu) | |||
3282 | 3283 | ||
3283 | static int count_rmaps(struct kvm_vcpu *vcpu) | 3284 | static int count_rmaps(struct kvm_vcpu *vcpu) |
3284 | { | 3285 | { |
3286 | struct kvm *kvm = vcpu->kvm; | ||
3287 | struct kvm_memslots *slots; | ||
3285 | int nmaps = 0; | 3288 | int nmaps = 0; |
3286 | int i, j, k, idx; | 3289 | int i, j, k, idx; |
3287 | 3290 | ||
3288 | idx = srcu_read_lock(&kvm->srcu); | 3291 | idx = srcu_read_lock(&kvm->srcu); |
3289 | slots = rcu_dereference(kvm->memslots); | 3292 | slots = kvm_memslots(kvm); |
3290 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { | 3293 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { |
3291 | struct kvm_memory_slot *m = &slots->memslots[i]; | 3294 | struct kvm_memory_slot *m = &slots->memslots[i]; |
3292 | struct kvm_rmap_desc *d; | 3295 | struct kvm_rmap_desc *d; |
@@ -3315,7 +3318,7 @@ static int count_rmaps(struct kvm_vcpu *vcpu) | |||
3315 | return nmaps; | 3318 | return nmaps; |
3316 | } | 3319 | } |
3317 | 3320 | ||
3318 | void inspect_spte_has_rmap(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *sptep) | 3321 | void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) |
3319 | { | 3322 | { |
3320 | unsigned long *rmapp; | 3323 | unsigned long *rmapp; |
3321 | struct kvm_mmu_page *rev_sp; | 3324 | struct kvm_mmu_page *rev_sp; |
@@ -3331,14 +3334,14 @@ void inspect_spte_has_rmap(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *sptep) | |||
3331 | printk(KERN_ERR "%s: no memslot for gfn %ld\n", | 3334 | printk(KERN_ERR "%s: no memslot for gfn %ld\n", |
3332 | audit_msg, gfn); | 3335 | audit_msg, gfn); |
3333 | printk(KERN_ERR "%s: index %ld of sp (gfn=%lx)\n", | 3336 | printk(KERN_ERR "%s: index %ld of sp (gfn=%lx)\n", |
3334 | audit_msg, sptep - rev_sp->spt, | 3337 | audit_msg, (long int)(sptep - rev_sp->spt), |
3335 | rev_sp->gfn); | 3338 | rev_sp->gfn); |
3336 | dump_stack(); | 3339 | dump_stack(); |
3337 | return; | 3340 | return; |
3338 | } | 3341 | } |
3339 | 3342 | ||
3340 | rmapp = gfn_to_rmap(kvm, rev_sp->gfns[sptep - rev_sp->spt], | 3343 | rmapp = gfn_to_rmap(kvm, rev_sp->gfns[sptep - rev_sp->spt], |
3341 | is_large_pte(*sptep)); | 3344 | rev_sp->role.level); |
3342 | if (!*rmapp) { | 3345 | if (!*rmapp) { |
3343 | if (!printk_ratelimit()) | 3346 | if (!printk_ratelimit()) |
3344 | return; | 3347 | return; |
@@ -3373,7 +3376,7 @@ static void check_writable_mappings_rmap(struct kvm_vcpu *vcpu) | |||
3373 | continue; | 3376 | continue; |
3374 | if (!(ent & PT_WRITABLE_MASK)) | 3377 | if (!(ent & PT_WRITABLE_MASK)) |
3375 | continue; | 3378 | continue; |
3376 | inspect_spte_has_rmap(vcpu->kvm, sp, &pt[i]); | 3379 | inspect_spte_has_rmap(vcpu->kvm, &pt[i]); |
3377 | } | 3380 | } |
3378 | } | 3381 | } |
3379 | return; | 3382 | return; |
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 3e4a5c6ca2a9..42f07b1bfbc9 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h | |||
@@ -6,14 +6,12 @@ | |||
6 | 6 | ||
7 | #undef TRACE_SYSTEM | 7 | #undef TRACE_SYSTEM |
8 | #define TRACE_SYSTEM kvmmmu | 8 | #define TRACE_SYSTEM kvmmmu |
9 | #define TRACE_INCLUDE_PATH . | ||
10 | #define TRACE_INCLUDE_FILE mmutrace | ||
11 | 9 | ||
12 | #define KVM_MMU_PAGE_FIELDS \ | 10 | #define KVM_MMU_PAGE_FIELDS \ |
13 | __field(__u64, gfn) \ | 11 | __field(__u64, gfn) \ |
14 | __field(__u32, role) \ | 12 | __field(__u32, role) \ |
15 | __field(__u32, root_count) \ | 13 | __field(__u32, root_count) \ |
16 | __field(__u32, unsync) | 14 | __field(bool, unsync) |
17 | 15 | ||
18 | #define KVM_MMU_PAGE_ASSIGN(sp) \ | 16 | #define KVM_MMU_PAGE_ASSIGN(sp) \ |
19 | __entry->gfn = sp->gfn; \ | 17 | __entry->gfn = sp->gfn; \ |
@@ -30,14 +28,14 @@ | |||
30 | \ | 28 | \ |
31 | role.word = __entry->role; \ | 29 | role.word = __entry->role; \ |
32 | \ | 30 | \ |
33 | trace_seq_printf(p, "sp gfn %llx %u/%u q%u%s %s%s %spge" \ | 31 | trace_seq_printf(p, "sp gfn %llx %u%s q%u%s %s%s" \ |
34 | " %snxe root %u %s%c", \ | 32 | " %snxe root %u %s%c", \ |
35 | __entry->gfn, role.level, role.glevels, \ | 33 | __entry->gfn, role.level, \ |
34 | role.cr4_pae ? " pae" : "", \ | ||
36 | role.quadrant, \ | 35 | role.quadrant, \ |
37 | role.direct ? " direct" : "", \ | 36 | role.direct ? " direct" : "", \ |
38 | access_str[role.access], \ | 37 | access_str[role.access], \ |
39 | role.invalid ? " invalid" : "", \ | 38 | role.invalid ? " invalid" : "", \ |
40 | role.cr4_pge ? "" : "!", \ | ||
41 | role.nxe ? "" : "!", \ | 39 | role.nxe ? "" : "!", \ |
42 | __entry->root_count, \ | 40 | __entry->root_count, \ |
43 | __entry->unsync ? "unsync" : "sync", 0); \ | 41 | __entry->unsync ? "unsync" : "sync", 0); \ |
@@ -94,15 +92,15 @@ TRACE_EVENT( | |||
94 | TP_printk("pte %llx level %u", __entry->pte, __entry->level) | 92 | TP_printk("pte %llx level %u", __entry->pte, __entry->level) |
95 | ); | 93 | ); |
96 | 94 | ||
97 | /* We set a pte accessed bit */ | 95 | DECLARE_EVENT_CLASS(kvm_mmu_set_bit_class, |
98 | TRACE_EVENT( | 96 | |
99 | kvm_mmu_set_accessed_bit, | ||
100 | TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), | 97 | TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), |
98 | |||
101 | TP_ARGS(table_gfn, index, size), | 99 | TP_ARGS(table_gfn, index, size), |
102 | 100 | ||
103 | TP_STRUCT__entry( | 101 | TP_STRUCT__entry( |
104 | __field(__u64, gpa) | 102 | __field(__u64, gpa) |
105 | ), | 103 | ), |
106 | 104 | ||
107 | TP_fast_assign( | 105 | TP_fast_assign( |
108 | __entry->gpa = ((u64)table_gfn << PAGE_SHIFT) | 106 | __entry->gpa = ((u64)table_gfn << PAGE_SHIFT) |
@@ -112,22 +110,20 @@ TRACE_EVENT( | |||
112 | TP_printk("gpa %llx", __entry->gpa) | 110 | TP_printk("gpa %llx", __entry->gpa) |
113 | ); | 111 | ); |
114 | 112 | ||
115 | /* We set a pte dirty bit */ | 113 | /* We set a pte accessed bit */ |
116 | TRACE_EVENT( | 114 | DEFINE_EVENT(kvm_mmu_set_bit_class, kvm_mmu_set_accessed_bit, |
117 | kvm_mmu_set_dirty_bit, | 115 | |
118 | TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), | 116 | TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), |
119 | TP_ARGS(table_gfn, index, size), | ||
120 | 117 | ||
121 | TP_STRUCT__entry( | 118 | TP_ARGS(table_gfn, index, size) |
122 | __field(__u64, gpa) | 119 | ); |
123 | ), | ||
124 | 120 | ||
125 | TP_fast_assign( | 121 | /* We set a pte dirty bit */ |
126 | __entry->gpa = ((u64)table_gfn << PAGE_SHIFT) | 122 | DEFINE_EVENT(kvm_mmu_set_bit_class, kvm_mmu_set_dirty_bit, |
127 | + index * size; | ||
128 | ), | ||
129 | 123 | ||
130 | TP_printk("gpa %llx", __entry->gpa) | 124 | TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), |
125 | |||
126 | TP_ARGS(table_gfn, index, size) | ||
131 | ); | 127 | ); |
132 | 128 | ||
133 | TRACE_EVENT( | 129 | TRACE_EVENT( |
@@ -166,55 +162,45 @@ TRACE_EVENT( | |||
166 | __entry->created ? "new" : "existing") | 162 | __entry->created ? "new" : "existing") |
167 | ); | 163 | ); |
168 | 164 | ||
169 | TRACE_EVENT( | 165 | DECLARE_EVENT_CLASS(kvm_mmu_page_class, |
170 | kvm_mmu_sync_page, | 166 | |
171 | TP_PROTO(struct kvm_mmu_page *sp), | 167 | TP_PROTO(struct kvm_mmu_page *sp), |
172 | TP_ARGS(sp), | 168 | TP_ARGS(sp), |
173 | 169 | ||
174 | TP_STRUCT__entry( | 170 | TP_STRUCT__entry( |
175 | KVM_MMU_PAGE_FIELDS | 171 | KVM_MMU_PAGE_FIELDS |
176 | ), | 172 | ), |
177 | 173 | ||
178 | TP_fast_assign( | 174 | TP_fast_assign( |
179 | KVM_MMU_PAGE_ASSIGN(sp) | 175 | KVM_MMU_PAGE_ASSIGN(sp) |
180 | ), | 176 | ), |
181 | 177 | ||
182 | TP_printk("%s", KVM_MMU_PAGE_PRINTK()) | 178 | TP_printk("%s", KVM_MMU_PAGE_PRINTK()) |
183 | ); | 179 | ); |
184 | 180 | ||
185 | TRACE_EVENT( | 181 | DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_sync_page, |
186 | kvm_mmu_unsync_page, | ||
187 | TP_PROTO(struct kvm_mmu_page *sp), | 182 | TP_PROTO(struct kvm_mmu_page *sp), |
188 | TP_ARGS(sp), | ||
189 | |||
190 | TP_STRUCT__entry( | ||
191 | KVM_MMU_PAGE_FIELDS | ||
192 | ), | ||
193 | 183 | ||
194 | TP_fast_assign( | 184 | TP_ARGS(sp) |
195 | KVM_MMU_PAGE_ASSIGN(sp) | ||
196 | ), | ||
197 | |||
198 | TP_printk("%s", KVM_MMU_PAGE_PRINTK()) | ||
199 | ); | 185 | ); |
200 | 186 | ||
201 | TRACE_EVENT( | 187 | DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_unsync_page, |
202 | kvm_mmu_zap_page, | ||
203 | TP_PROTO(struct kvm_mmu_page *sp), | 188 | TP_PROTO(struct kvm_mmu_page *sp), |
204 | TP_ARGS(sp), | ||
205 | 189 | ||
206 | TP_STRUCT__entry( | 190 | TP_ARGS(sp) |
207 | KVM_MMU_PAGE_FIELDS | 191 | ); |
208 | ), | ||
209 | 192 | ||
210 | TP_fast_assign( | 193 | DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_zap_page, |
211 | KVM_MMU_PAGE_ASSIGN(sp) | 194 | TP_PROTO(struct kvm_mmu_page *sp), |
212 | ), | ||
213 | 195 | ||
214 | TP_printk("%s", KVM_MMU_PAGE_PRINTK()) | 196 | TP_ARGS(sp) |
215 | ); | 197 | ); |
216 | |||
217 | #endif /* _TRACE_KVMMMU_H */ | 198 | #endif /* _TRACE_KVMMMU_H */ |
218 | 199 | ||
200 | #undef TRACE_INCLUDE_PATH | ||
201 | #define TRACE_INCLUDE_PATH . | ||
202 | #undef TRACE_INCLUDE_FILE | ||
203 | #define TRACE_INCLUDE_FILE mmutrace | ||
204 | |||
219 | /* This part must be outside protection */ | 205 | /* This part must be outside protection */ |
220 | #include <trace/define_trace.h> | 206 | #include <trace/define_trace.h> |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 81eab9a50e6a..89d66ca4d87c 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -170,7 +170,7 @@ walk: | |||
170 | goto access_error; | 170 | goto access_error; |
171 | 171 | ||
172 | #if PTTYPE == 64 | 172 | #if PTTYPE == 64 |
173 | if (fetch_fault && is_nx(vcpu) && (pte & PT64_NX_MASK)) | 173 | if (fetch_fault && (pte & PT64_NX_MASK)) |
174 | goto access_error; | 174 | goto access_error; |
175 | #endif | 175 | #endif |
176 | 176 | ||
@@ -190,10 +190,10 @@ walk: | |||
190 | 190 | ||
191 | if ((walker->level == PT_PAGE_TABLE_LEVEL) || | 191 | if ((walker->level == PT_PAGE_TABLE_LEVEL) || |
192 | ((walker->level == PT_DIRECTORY_LEVEL) && | 192 | ((walker->level == PT_DIRECTORY_LEVEL) && |
193 | (pte & PT_PAGE_SIZE_MASK) && | 193 | is_large_pte(pte) && |
194 | (PTTYPE == 64 || is_pse(vcpu))) || | 194 | (PTTYPE == 64 || is_pse(vcpu))) || |
195 | ((walker->level == PT_PDPE_LEVEL) && | 195 | ((walker->level == PT_PDPE_LEVEL) && |
196 | (pte & PT_PAGE_SIZE_MASK) && | 196 | is_large_pte(pte) && |
197 | is_long_mode(vcpu))) { | 197 | is_long_mode(vcpu))) { |
198 | int lvl = walker->level; | 198 | int lvl = walker->level; |
199 | 199 | ||
@@ -258,11 +258,17 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
258 | pt_element_t gpte; | 258 | pt_element_t gpte; |
259 | unsigned pte_access; | 259 | unsigned pte_access; |
260 | pfn_t pfn; | 260 | pfn_t pfn; |
261 | u64 new_spte; | ||
261 | 262 | ||
262 | gpte = *(const pt_element_t *)pte; | 263 | gpte = *(const pt_element_t *)pte; |
263 | if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { | 264 | if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { |
264 | if (!is_present_gpte(gpte)) | 265 | if (!is_present_gpte(gpte)) { |
265 | __set_spte(spte, shadow_notrap_nonpresent_pte); | 266 | if (page->unsync) |
267 | new_spte = shadow_trap_nonpresent_pte; | ||
268 | else | ||
269 | new_spte = shadow_notrap_nonpresent_pte; | ||
270 | __set_spte(spte, new_spte); | ||
271 | } | ||
266 | return; | 272 | return; |
267 | } | 273 | } |
268 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); | 274 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); |
@@ -457,6 +463,7 @@ out_unlock: | |||
457 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | 463 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) |
458 | { | 464 | { |
459 | struct kvm_shadow_walk_iterator iterator; | 465 | struct kvm_shadow_walk_iterator iterator; |
466 | gpa_t pte_gpa = -1; | ||
460 | int level; | 467 | int level; |
461 | u64 *sptep; | 468 | u64 *sptep; |
462 | int need_flush = 0; | 469 | int need_flush = 0; |
@@ -467,9 +474,16 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
467 | level = iterator.level; | 474 | level = iterator.level; |
468 | sptep = iterator.sptep; | 475 | sptep = iterator.sptep; |
469 | 476 | ||
470 | if (level == PT_PAGE_TABLE_LEVEL || | 477 | if (is_last_spte(*sptep, level)) { |
471 | ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || | 478 | struct kvm_mmu_page *sp = page_header(__pa(sptep)); |
472 | ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { | 479 | int offset, shift; |
480 | |||
481 | shift = PAGE_SHIFT - | ||
482 | (PT_LEVEL_BITS - PT64_LEVEL_BITS) * level; | ||
483 | offset = sp->role.quadrant << shift; | ||
484 | |||
485 | pte_gpa = (sp->gfn << PAGE_SHIFT) + offset; | ||
486 | pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); | ||
473 | 487 | ||
474 | if (is_shadow_present_pte(*sptep)) { | 488 | if (is_shadow_present_pte(*sptep)) { |
475 | rmap_remove(vcpu->kvm, sptep); | 489 | rmap_remove(vcpu->kvm, sptep); |
@@ -487,7 +501,17 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
487 | 501 | ||
488 | if (need_flush) | 502 | if (need_flush) |
489 | kvm_flush_remote_tlbs(vcpu->kvm); | 503 | kvm_flush_remote_tlbs(vcpu->kvm); |
504 | |||
505 | atomic_inc(&vcpu->kvm->arch.invlpg_counter); | ||
506 | |||
490 | spin_unlock(&vcpu->kvm->mmu_lock); | 507 | spin_unlock(&vcpu->kvm->mmu_lock); |
508 | |||
509 | if (pte_gpa == -1) | ||
510 | return; | ||
511 | |||
512 | if (mmu_topup_memory_caches(vcpu)) | ||
513 | return; | ||
514 | kvm_mmu_pte_write(vcpu, pte_gpa, NULL, sizeof(pt_element_t), 0); | ||
491 | } | 515 | } |
492 | 516 | ||
493 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, | 517 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, |
@@ -551,12 +575,15 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
551 | { | 575 | { |
552 | int i, offset, nr_present; | 576 | int i, offset, nr_present; |
553 | bool reset_host_protection; | 577 | bool reset_host_protection; |
578 | gpa_t first_pte_gpa; | ||
554 | 579 | ||
555 | offset = nr_present = 0; | 580 | offset = nr_present = 0; |
556 | 581 | ||
557 | if (PTTYPE == 32) | 582 | if (PTTYPE == 32) |
558 | offset = sp->role.quadrant << PT64_LEVEL_BITS; | 583 | offset = sp->role.quadrant << PT64_LEVEL_BITS; |
559 | 584 | ||
585 | first_pte_gpa = gfn_to_gpa(sp->gfn) + offset * sizeof(pt_element_t); | ||
586 | |||
560 | for (i = 0; i < PT64_ENT_PER_PAGE; i++) { | 587 | for (i = 0; i < PT64_ENT_PER_PAGE; i++) { |
561 | unsigned pte_access; | 588 | unsigned pte_access; |
562 | pt_element_t gpte; | 589 | pt_element_t gpte; |
@@ -566,8 +593,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
566 | if (!is_shadow_present_pte(sp->spt[i])) | 593 | if (!is_shadow_present_pte(sp->spt[i])) |
567 | continue; | 594 | continue; |
568 | 595 | ||
569 | pte_gpa = gfn_to_gpa(sp->gfn); | 596 | pte_gpa = first_pte_gpa + i * sizeof(pt_element_t); |
570 | pte_gpa += (i+offset) * sizeof(pt_element_t); | ||
571 | 597 | ||
572 | if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, | 598 | if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, |
573 | sizeof(pt_element_t))) | 599 | sizeof(pt_element_t))) |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 737361fcd503..96dc232bfc56 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -44,10 +44,11 @@ MODULE_LICENSE("GPL"); | |||
44 | #define SEG_TYPE_LDT 2 | 44 | #define SEG_TYPE_LDT 2 |
45 | #define SEG_TYPE_BUSY_TSS16 3 | 45 | #define SEG_TYPE_BUSY_TSS16 3 |
46 | 46 | ||
47 | #define SVM_FEATURE_NPT (1 << 0) | 47 | #define SVM_FEATURE_NPT (1 << 0) |
48 | #define SVM_FEATURE_LBRV (1 << 1) | 48 | #define SVM_FEATURE_LBRV (1 << 1) |
49 | #define SVM_FEATURE_SVML (1 << 2) | 49 | #define SVM_FEATURE_SVML (1 << 2) |
50 | #define SVM_FEATURE_PAUSE_FILTER (1 << 10) | 50 | #define SVM_FEATURE_NRIP (1 << 3) |
51 | #define SVM_FEATURE_PAUSE_FILTER (1 << 10) | ||
51 | 52 | ||
52 | #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ | 53 | #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ |
53 | #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ | 54 | #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ |
@@ -70,6 +71,7 @@ struct kvm_vcpu; | |||
70 | struct nested_state { | 71 | struct nested_state { |
71 | struct vmcb *hsave; | 72 | struct vmcb *hsave; |
72 | u64 hsave_msr; | 73 | u64 hsave_msr; |
74 | u64 vm_cr_msr; | ||
73 | u64 vmcb; | 75 | u64 vmcb; |
74 | 76 | ||
75 | /* These are the merged vectors */ | 77 | /* These are the merged vectors */ |
@@ -77,6 +79,7 @@ struct nested_state { | |||
77 | 79 | ||
78 | /* gpa pointers to the real vectors */ | 80 | /* gpa pointers to the real vectors */ |
79 | u64 vmcb_msrpm; | 81 | u64 vmcb_msrpm; |
82 | u64 vmcb_iopm; | ||
80 | 83 | ||
81 | /* A VMEXIT is required but not yet emulated */ | 84 | /* A VMEXIT is required but not yet emulated */ |
82 | bool exit_required; | 85 | bool exit_required; |
@@ -91,6 +94,9 @@ struct nested_state { | |||
91 | 94 | ||
92 | }; | 95 | }; |
93 | 96 | ||
97 | #define MSRPM_OFFSETS 16 | ||
98 | static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; | ||
99 | |||
94 | struct vcpu_svm { | 100 | struct vcpu_svm { |
95 | struct kvm_vcpu vcpu; | 101 | struct kvm_vcpu vcpu; |
96 | struct vmcb *vmcb; | 102 | struct vmcb *vmcb; |
@@ -110,13 +116,39 @@ struct vcpu_svm { | |||
110 | struct nested_state nested; | 116 | struct nested_state nested; |
111 | 117 | ||
112 | bool nmi_singlestep; | 118 | bool nmi_singlestep; |
119 | |||
120 | unsigned int3_injected; | ||
121 | unsigned long int3_rip; | ||
122 | }; | ||
123 | |||
124 | #define MSR_INVALID 0xffffffffU | ||
125 | |||
126 | static struct svm_direct_access_msrs { | ||
127 | u32 index; /* Index of the MSR */ | ||
128 | bool always; /* True if intercept is always on */ | ||
129 | } direct_access_msrs[] = { | ||
130 | { .index = MSR_K6_STAR, .always = true }, | ||
131 | { .index = MSR_IA32_SYSENTER_CS, .always = true }, | ||
132 | #ifdef CONFIG_X86_64 | ||
133 | { .index = MSR_GS_BASE, .always = true }, | ||
134 | { .index = MSR_FS_BASE, .always = true }, | ||
135 | { .index = MSR_KERNEL_GS_BASE, .always = true }, | ||
136 | { .index = MSR_LSTAR, .always = true }, | ||
137 | { .index = MSR_CSTAR, .always = true }, | ||
138 | { .index = MSR_SYSCALL_MASK, .always = true }, | ||
139 | #endif | ||
140 | { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, | ||
141 | { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, | ||
142 | { .index = MSR_IA32_LASTINTFROMIP, .always = false }, | ||
143 | { .index = MSR_IA32_LASTINTTOIP, .always = false }, | ||
144 | { .index = MSR_INVALID, .always = false }, | ||
113 | }; | 145 | }; |
114 | 146 | ||
115 | /* enable NPT for AMD64 and X86 with PAE */ | 147 | /* enable NPT for AMD64 and X86 with PAE */ |
116 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | 148 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) |
117 | static bool npt_enabled = true; | 149 | static bool npt_enabled = true; |
118 | #else | 150 | #else |
119 | static bool npt_enabled = false; | 151 | static bool npt_enabled; |
120 | #endif | 152 | #endif |
121 | static int npt = 1; | 153 | static int npt = 1; |
122 | 154 | ||
@@ -129,6 +161,7 @@ static void svm_flush_tlb(struct kvm_vcpu *vcpu); | |||
129 | static void svm_complete_interrupts(struct vcpu_svm *svm); | 161 | static void svm_complete_interrupts(struct vcpu_svm *svm); |
130 | 162 | ||
131 | static int nested_svm_exit_handled(struct vcpu_svm *svm); | 163 | static int nested_svm_exit_handled(struct vcpu_svm *svm); |
164 | static int nested_svm_intercept(struct vcpu_svm *svm); | ||
132 | static int nested_svm_vmexit(struct vcpu_svm *svm); | 165 | static int nested_svm_vmexit(struct vcpu_svm *svm); |
133 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, | 166 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, |
134 | bool has_error_code, u32 error_code); | 167 | bool has_error_code, u32 error_code); |
@@ -163,8 +196,8 @@ static unsigned long iopm_base; | |||
163 | struct kvm_ldttss_desc { | 196 | struct kvm_ldttss_desc { |
164 | u16 limit0; | 197 | u16 limit0; |
165 | u16 base0; | 198 | u16 base0; |
166 | unsigned base1 : 8, type : 5, dpl : 2, p : 1; | 199 | unsigned base1:8, type:5, dpl:2, p:1; |
167 | unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8; | 200 | unsigned limit1:4, zero0:3, g:1, base2:8; |
168 | u32 base3; | 201 | u32 base3; |
169 | u32 zero1; | 202 | u32 zero1; |
170 | } __attribute__((packed)); | 203 | } __attribute__((packed)); |
@@ -194,6 +227,27 @@ static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000}; | |||
194 | #define MSRS_RANGE_SIZE 2048 | 227 | #define MSRS_RANGE_SIZE 2048 |
195 | #define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2) | 228 | #define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2) |
196 | 229 | ||
230 | static u32 svm_msrpm_offset(u32 msr) | ||
231 | { | ||
232 | u32 offset; | ||
233 | int i; | ||
234 | |||
235 | for (i = 0; i < NUM_MSR_MAPS; i++) { | ||
236 | if (msr < msrpm_ranges[i] || | ||
237 | msr >= msrpm_ranges[i] + MSRS_IN_RANGE) | ||
238 | continue; | ||
239 | |||
240 | offset = (msr - msrpm_ranges[i]) / 4; /* 4 msrs per u8 */ | ||
241 | offset += (i * MSRS_RANGE_SIZE); /* add range offset */ | ||
242 | |||
243 | /* Now we have the u8 offset - but need the u32 offset */ | ||
244 | return offset / 4; | ||
245 | } | ||
246 | |||
247 | /* MSR not in any range */ | ||
248 | return MSR_INVALID; | ||
249 | } | ||
250 | |||
197 | #define MAX_INST_SIZE 15 | 251 | #define MAX_INST_SIZE 15 |
198 | 252 | ||
199 | static inline u32 svm_has(u32 feat) | 253 | static inline u32 svm_has(u32 feat) |
@@ -213,7 +267,7 @@ static inline void stgi(void) | |||
213 | 267 | ||
214 | static inline void invlpga(unsigned long addr, u32 asid) | 268 | static inline void invlpga(unsigned long addr, u32 asid) |
215 | { | 269 | { |
216 | asm volatile (__ex(SVM_INVLPGA) :: "a"(addr), "c"(asid)); | 270 | asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid)); |
217 | } | 271 | } |
218 | 272 | ||
219 | static inline void force_new_asid(struct kvm_vcpu *vcpu) | 273 | static inline void force_new_asid(struct kvm_vcpu *vcpu) |
@@ -235,23 +289,6 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
235 | vcpu->arch.efer = efer; | 289 | vcpu->arch.efer = efer; |
236 | } | 290 | } |
237 | 291 | ||
238 | static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | ||
239 | bool has_error_code, u32 error_code) | ||
240 | { | ||
241 | struct vcpu_svm *svm = to_svm(vcpu); | ||
242 | |||
243 | /* If we are within a nested VM we'd better #VMEXIT and let the | ||
244 | guest handle the exception */ | ||
245 | if (nested_svm_check_exception(svm, nr, has_error_code, error_code)) | ||
246 | return; | ||
247 | |||
248 | svm->vmcb->control.event_inj = nr | ||
249 | | SVM_EVTINJ_VALID | ||
250 | | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0) | ||
251 | | SVM_EVTINJ_TYPE_EXEPT; | ||
252 | svm->vmcb->control.event_inj_err = error_code; | ||
253 | } | ||
254 | |||
255 | static int is_external_interrupt(u32 info) | 292 | static int is_external_interrupt(u32 info) |
256 | { | 293 | { |
257 | info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; | 294 | info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; |
@@ -264,7 +301,7 @@ static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) | |||
264 | u32 ret = 0; | 301 | u32 ret = 0; |
265 | 302 | ||
266 | if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) | 303 | if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) |
267 | ret |= X86_SHADOW_INT_STI | X86_SHADOW_INT_MOV_SS; | 304 | ret |= KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS; |
268 | return ret & mask; | 305 | return ret & mask; |
269 | } | 306 | } |
270 | 307 | ||
@@ -283,6 +320,9 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
283 | { | 320 | { |
284 | struct vcpu_svm *svm = to_svm(vcpu); | 321 | struct vcpu_svm *svm = to_svm(vcpu); |
285 | 322 | ||
323 | if (svm->vmcb->control.next_rip != 0) | ||
324 | svm->next_rip = svm->vmcb->control.next_rip; | ||
325 | |||
286 | if (!svm->next_rip) { | 326 | if (!svm->next_rip) { |
287 | if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) != | 327 | if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) != |
288 | EMULATE_DONE) | 328 | EMULATE_DONE) |
@@ -297,6 +337,43 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
297 | svm_set_interrupt_shadow(vcpu, 0); | 337 | svm_set_interrupt_shadow(vcpu, 0); |
298 | } | 338 | } |
299 | 339 | ||
340 | static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | ||
341 | bool has_error_code, u32 error_code, | ||
342 | bool reinject) | ||
343 | { | ||
344 | struct vcpu_svm *svm = to_svm(vcpu); | ||
345 | |||
346 | /* | ||
347 | * If we are within a nested VM we'd better #VMEXIT and let the guest | ||
348 | * handle the exception | ||
349 | */ | ||
350 | if (!reinject && | ||
351 | nested_svm_check_exception(svm, nr, has_error_code, error_code)) | ||
352 | return; | ||
353 | |||
354 | if (nr == BP_VECTOR && !svm_has(SVM_FEATURE_NRIP)) { | ||
355 | unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu); | ||
356 | |||
357 | /* | ||
358 | * For guest debugging where we have to reinject #BP if some | ||
359 | * INT3 is guest-owned: | ||
360 | * Emulate nRIP by moving RIP forward. Will fail if injection | ||
361 | * raises a fault that is not intercepted. Still better than | ||
362 | * failing in all cases. | ||
363 | */ | ||
364 | skip_emulated_instruction(&svm->vcpu); | ||
365 | rip = kvm_rip_read(&svm->vcpu); | ||
366 | svm->int3_rip = rip + svm->vmcb->save.cs.base; | ||
367 | svm->int3_injected = rip - old_rip; | ||
368 | } | ||
369 | |||
370 | svm->vmcb->control.event_inj = nr | ||
371 | | SVM_EVTINJ_VALID | ||
372 | | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0) | ||
373 | | SVM_EVTINJ_TYPE_EXEPT; | ||
374 | svm->vmcb->control.event_inj_err = error_code; | ||
375 | } | ||
376 | |||
300 | static int has_svm(void) | 377 | static int has_svm(void) |
301 | { | 378 | { |
302 | const char *msg; | 379 | const char *msg; |
@@ -319,7 +396,7 @@ static int svm_hardware_enable(void *garbage) | |||
319 | 396 | ||
320 | struct svm_cpu_data *sd; | 397 | struct svm_cpu_data *sd; |
321 | uint64_t efer; | 398 | uint64_t efer; |
322 | struct descriptor_table gdt_descr; | 399 | struct desc_ptr gdt_descr; |
323 | struct desc_struct *gdt; | 400 | struct desc_struct *gdt; |
324 | int me = raw_smp_processor_id(); | 401 | int me = raw_smp_processor_id(); |
325 | 402 | ||
@@ -344,8 +421,8 @@ static int svm_hardware_enable(void *garbage) | |||
344 | sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; | 421 | sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; |
345 | sd->next_asid = sd->max_asid + 1; | 422 | sd->next_asid = sd->max_asid + 1; |
346 | 423 | ||
347 | kvm_get_gdt(&gdt_descr); | 424 | native_store_gdt(&gdt_descr); |
348 | gdt = (struct desc_struct *)gdt_descr.base; | 425 | gdt = (struct desc_struct *)gdt_descr.address; |
349 | sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); | 426 | sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); |
350 | 427 | ||
351 | wrmsrl(MSR_EFER, efer | EFER_SVME); | 428 | wrmsrl(MSR_EFER, efer | EFER_SVME); |
@@ -391,42 +468,98 @@ err_1: | |||
391 | 468 | ||
392 | } | 469 | } |
393 | 470 | ||
471 | static bool valid_msr_intercept(u32 index) | ||
472 | { | ||
473 | int i; | ||
474 | |||
475 | for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) | ||
476 | if (direct_access_msrs[i].index == index) | ||
477 | return true; | ||
478 | |||
479 | return false; | ||
480 | } | ||
481 | |||
394 | static void set_msr_interception(u32 *msrpm, unsigned msr, | 482 | static void set_msr_interception(u32 *msrpm, unsigned msr, |
395 | int read, int write) | 483 | int read, int write) |
396 | { | 484 | { |
485 | u8 bit_read, bit_write; | ||
486 | unsigned long tmp; | ||
487 | u32 offset; | ||
488 | |||
489 | /* | ||
490 | * If this warning triggers extend the direct_access_msrs list at the | ||
491 | * beginning of the file | ||
492 | */ | ||
493 | WARN_ON(!valid_msr_intercept(msr)); | ||
494 | |||
495 | offset = svm_msrpm_offset(msr); | ||
496 | bit_read = 2 * (msr & 0x0f); | ||
497 | bit_write = 2 * (msr & 0x0f) + 1; | ||
498 | tmp = msrpm[offset]; | ||
499 | |||
500 | BUG_ON(offset == MSR_INVALID); | ||
501 | |||
502 | read ? clear_bit(bit_read, &tmp) : set_bit(bit_read, &tmp); | ||
503 | write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp); | ||
504 | |||
505 | msrpm[offset] = tmp; | ||
506 | } | ||
507 | |||
508 | static void svm_vcpu_init_msrpm(u32 *msrpm) | ||
509 | { | ||
397 | int i; | 510 | int i; |
398 | 511 | ||
399 | for (i = 0; i < NUM_MSR_MAPS; i++) { | 512 | memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); |
400 | if (msr >= msrpm_ranges[i] && | 513 | |
401 | msr < msrpm_ranges[i] + MSRS_IN_RANGE) { | 514 | for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) { |
402 | u32 msr_offset = (i * MSRS_IN_RANGE + msr - | 515 | if (!direct_access_msrs[i].always) |
403 | msrpm_ranges[i]) * 2; | 516 | continue; |
404 | 517 | ||
405 | u32 *base = msrpm + (msr_offset / 32); | 518 | set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1); |
406 | u32 msr_shift = msr_offset % 32; | 519 | } |
407 | u32 mask = ((write) ? 0 : 2) | ((read) ? 0 : 1); | 520 | } |
408 | *base = (*base & ~(0x3 << msr_shift)) | | 521 | |
409 | (mask << msr_shift); | 522 | static void add_msr_offset(u32 offset) |
523 | { | ||
524 | int i; | ||
525 | |||
526 | for (i = 0; i < MSRPM_OFFSETS; ++i) { | ||
527 | |||
528 | /* Offset already in list? */ | ||
529 | if (msrpm_offsets[i] == offset) | ||
410 | return; | 530 | return; |
411 | } | 531 | |
532 | /* Slot used by another offset? */ | ||
533 | if (msrpm_offsets[i] != MSR_INVALID) | ||
534 | continue; | ||
535 | |||
536 | /* Add offset to list */ | ||
537 | msrpm_offsets[i] = offset; | ||
538 | |||
539 | return; | ||
412 | } | 540 | } |
541 | |||
542 | /* | ||
543 | * If this BUG triggers the msrpm_offsets table has an overflow. Just | ||
544 | * increase MSRPM_OFFSETS in this case. | ||
545 | */ | ||
413 | BUG(); | 546 | BUG(); |
414 | } | 547 | } |
415 | 548 | ||
416 | static void svm_vcpu_init_msrpm(u32 *msrpm) | 549 | static void init_msrpm_offsets(void) |
417 | { | 550 | { |
418 | memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); | 551 | int i; |
419 | 552 | ||
420 | #ifdef CONFIG_X86_64 | 553 | memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets)); |
421 | set_msr_interception(msrpm, MSR_GS_BASE, 1, 1); | 554 | |
422 | set_msr_interception(msrpm, MSR_FS_BASE, 1, 1); | 555 | for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) { |
423 | set_msr_interception(msrpm, MSR_KERNEL_GS_BASE, 1, 1); | 556 | u32 offset; |
424 | set_msr_interception(msrpm, MSR_LSTAR, 1, 1); | 557 | |
425 | set_msr_interception(msrpm, MSR_CSTAR, 1, 1); | 558 | offset = svm_msrpm_offset(direct_access_msrs[i].index); |
426 | set_msr_interception(msrpm, MSR_SYSCALL_MASK, 1, 1); | 559 | BUG_ON(offset == MSR_INVALID); |
427 | #endif | 560 | |
428 | set_msr_interception(msrpm, MSR_K6_STAR, 1, 1); | 561 | add_msr_offset(offset); |
429 | set_msr_interception(msrpm, MSR_IA32_SYSENTER_CS, 1, 1); | 562 | } |
430 | } | 563 | } |
431 | 564 | ||
432 | static void svm_enable_lbrv(struct vcpu_svm *svm) | 565 | static void svm_enable_lbrv(struct vcpu_svm *svm) |
@@ -467,6 +600,8 @@ static __init int svm_hardware_setup(void) | |||
467 | memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER)); | 600 | memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER)); |
468 | iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; | 601 | iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; |
469 | 602 | ||
603 | init_msrpm_offsets(); | ||
604 | |||
470 | if (boot_cpu_has(X86_FEATURE_NX)) | 605 | if (boot_cpu_has(X86_FEATURE_NX)) |
471 | kvm_enable_efer_bits(EFER_NX); | 606 | kvm_enable_efer_bits(EFER_NX); |
472 | 607 | ||
@@ -523,7 +658,7 @@ static void init_seg(struct vmcb_seg *seg) | |||
523 | { | 658 | { |
524 | seg->selector = 0; | 659 | seg->selector = 0; |
525 | seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK | | 660 | seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK | |
526 | SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */ | 661 | SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */ |
527 | seg->limit = 0xffff; | 662 | seg->limit = 0xffff; |
528 | seg->base = 0; | 663 | seg->base = 0; |
529 | } | 664 | } |
@@ -543,16 +678,16 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
543 | 678 | ||
544 | svm->vcpu.fpu_active = 1; | 679 | svm->vcpu.fpu_active = 1; |
545 | 680 | ||
546 | control->intercept_cr_read = INTERCEPT_CR0_MASK | | 681 | control->intercept_cr_read = INTERCEPT_CR0_MASK | |
547 | INTERCEPT_CR3_MASK | | 682 | INTERCEPT_CR3_MASK | |
548 | INTERCEPT_CR4_MASK; | 683 | INTERCEPT_CR4_MASK; |
549 | 684 | ||
550 | control->intercept_cr_write = INTERCEPT_CR0_MASK | | 685 | control->intercept_cr_write = INTERCEPT_CR0_MASK | |
551 | INTERCEPT_CR3_MASK | | 686 | INTERCEPT_CR3_MASK | |
552 | INTERCEPT_CR4_MASK | | 687 | INTERCEPT_CR4_MASK | |
553 | INTERCEPT_CR8_MASK; | 688 | INTERCEPT_CR8_MASK; |
554 | 689 | ||
555 | control->intercept_dr_read = INTERCEPT_DR0_MASK | | 690 | control->intercept_dr_read = INTERCEPT_DR0_MASK | |
556 | INTERCEPT_DR1_MASK | | 691 | INTERCEPT_DR1_MASK | |
557 | INTERCEPT_DR2_MASK | | 692 | INTERCEPT_DR2_MASK | |
558 | INTERCEPT_DR3_MASK | | 693 | INTERCEPT_DR3_MASK | |
@@ -561,7 +696,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
561 | INTERCEPT_DR6_MASK | | 696 | INTERCEPT_DR6_MASK | |
562 | INTERCEPT_DR7_MASK; | 697 | INTERCEPT_DR7_MASK; |
563 | 698 | ||
564 | control->intercept_dr_write = INTERCEPT_DR0_MASK | | 699 | control->intercept_dr_write = INTERCEPT_DR0_MASK | |
565 | INTERCEPT_DR1_MASK | | 700 | INTERCEPT_DR1_MASK | |
566 | INTERCEPT_DR2_MASK | | 701 | INTERCEPT_DR2_MASK | |
567 | INTERCEPT_DR3_MASK | | 702 | INTERCEPT_DR3_MASK | |
@@ -575,7 +710,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
575 | (1 << MC_VECTOR); | 710 | (1 << MC_VECTOR); |
576 | 711 | ||
577 | 712 | ||
578 | control->intercept = (1ULL << INTERCEPT_INTR) | | 713 | control->intercept = (1ULL << INTERCEPT_INTR) | |
579 | (1ULL << INTERCEPT_NMI) | | 714 | (1ULL << INTERCEPT_NMI) | |
580 | (1ULL << INTERCEPT_SMI) | | 715 | (1ULL << INTERCEPT_SMI) | |
581 | (1ULL << INTERCEPT_SELECTIVE_CR0) | | 716 | (1ULL << INTERCEPT_SELECTIVE_CR0) | |
@@ -636,7 +771,8 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
636 | save->rip = 0x0000fff0; | 771 | save->rip = 0x0000fff0; |
637 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; | 772 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; |
638 | 773 | ||
639 | /* This is the guest-visible cr0 value. | 774 | /* |
775 | * This is the guest-visible cr0 value. | ||
640 | * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. | 776 | * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. |
641 | */ | 777 | */ |
642 | svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; | 778 | svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; |
@@ -729,6 +865,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
729 | svm_vcpu_init_msrpm(svm->msrpm); | 865 | svm_vcpu_init_msrpm(svm->msrpm); |
730 | 866 | ||
731 | svm->nested.msrpm = page_address(nested_msrpm_pages); | 867 | svm->nested.msrpm = page_address(nested_msrpm_pages); |
868 | svm_vcpu_init_msrpm(svm->nested.msrpm); | ||
732 | 869 | ||
733 | svm->vmcb = page_address(page); | 870 | svm->vmcb = page_address(page); |
734 | clear_page(svm->vmcb); | 871 | clear_page(svm->vmcb); |
@@ -882,7 +1019,8 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, | |||
882 | var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; | 1019 | var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; |
883 | var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; | 1020 | var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; |
884 | 1021 | ||
885 | /* AMD's VMCB does not have an explicit unusable field, so emulate it | 1022 | /* |
1023 | * AMD's VMCB does not have an explicit unusable field, so emulate it | ||
886 | * for cross vendor migration purposes by "not present" | 1024 | * for cross vendor migration purposes by "not present" |
887 | */ | 1025 | */ |
888 | var->unusable = !var->present || (var->type == 0); | 1026 | var->unusable = !var->present || (var->type == 0); |
@@ -918,7 +1056,8 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, | |||
918 | var->type |= 0x1; | 1056 | var->type |= 0x1; |
919 | break; | 1057 | break; |
920 | case VCPU_SREG_SS: | 1058 | case VCPU_SREG_SS: |
921 | /* On AMD CPUs sometimes the DB bit in the segment | 1059 | /* |
1060 | * On AMD CPUs sometimes the DB bit in the segment | ||
922 | * descriptor is left as 1, although the whole segment has | 1061 | * descriptor is left as 1, although the whole segment has |
923 | * been made unusable. Clear it here to pass an Intel VMX | 1062 | * been made unusable. Clear it here to pass an Intel VMX |
924 | * entry check when cross vendor migrating. | 1063 | * entry check when cross vendor migrating. |
@@ -936,36 +1075,36 @@ static int svm_get_cpl(struct kvm_vcpu *vcpu) | |||
936 | return save->cpl; | 1075 | return save->cpl; |
937 | } | 1076 | } |
938 | 1077 | ||
939 | static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 1078 | static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
940 | { | 1079 | { |
941 | struct vcpu_svm *svm = to_svm(vcpu); | 1080 | struct vcpu_svm *svm = to_svm(vcpu); |
942 | 1081 | ||
943 | dt->limit = svm->vmcb->save.idtr.limit; | 1082 | dt->size = svm->vmcb->save.idtr.limit; |
944 | dt->base = svm->vmcb->save.idtr.base; | 1083 | dt->address = svm->vmcb->save.idtr.base; |
945 | } | 1084 | } |
946 | 1085 | ||
947 | static void svm_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 1086 | static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
948 | { | 1087 | { |
949 | struct vcpu_svm *svm = to_svm(vcpu); | 1088 | struct vcpu_svm *svm = to_svm(vcpu); |
950 | 1089 | ||
951 | svm->vmcb->save.idtr.limit = dt->limit; | 1090 | svm->vmcb->save.idtr.limit = dt->size; |
952 | svm->vmcb->save.idtr.base = dt->base ; | 1091 | svm->vmcb->save.idtr.base = dt->address ; |
953 | } | 1092 | } |
954 | 1093 | ||
955 | static void svm_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 1094 | static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
956 | { | 1095 | { |
957 | struct vcpu_svm *svm = to_svm(vcpu); | 1096 | struct vcpu_svm *svm = to_svm(vcpu); |
958 | 1097 | ||
959 | dt->limit = svm->vmcb->save.gdtr.limit; | 1098 | dt->size = svm->vmcb->save.gdtr.limit; |
960 | dt->base = svm->vmcb->save.gdtr.base; | 1099 | dt->address = svm->vmcb->save.gdtr.base; |
961 | } | 1100 | } |
962 | 1101 | ||
963 | static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 1102 | static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
964 | { | 1103 | { |
965 | struct vcpu_svm *svm = to_svm(vcpu); | 1104 | struct vcpu_svm *svm = to_svm(vcpu); |
966 | 1105 | ||
967 | svm->vmcb->save.gdtr.limit = dt->limit; | 1106 | svm->vmcb->save.gdtr.limit = dt->size; |
968 | svm->vmcb->save.gdtr.base = dt->base ; | 1107 | svm->vmcb->save.gdtr.base = dt->address ; |
969 | } | 1108 | } |
970 | 1109 | ||
971 | static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) | 1110 | static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) |
@@ -978,6 +1117,7 @@ static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) | |||
978 | 1117 | ||
979 | static void update_cr0_intercept(struct vcpu_svm *svm) | 1118 | static void update_cr0_intercept(struct vcpu_svm *svm) |
980 | { | 1119 | { |
1120 | struct vmcb *vmcb = svm->vmcb; | ||
981 | ulong gcr0 = svm->vcpu.arch.cr0; | 1121 | ulong gcr0 = svm->vcpu.arch.cr0; |
982 | u64 *hcr0 = &svm->vmcb->save.cr0; | 1122 | u64 *hcr0 = &svm->vmcb->save.cr0; |
983 | 1123 | ||
@@ -989,11 +1129,25 @@ static void update_cr0_intercept(struct vcpu_svm *svm) | |||
989 | 1129 | ||
990 | 1130 | ||
991 | if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { | 1131 | if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { |
992 | svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; | 1132 | vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; |
993 | svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; | 1133 | vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; |
1134 | if (is_nested(svm)) { | ||
1135 | struct vmcb *hsave = svm->nested.hsave; | ||
1136 | |||
1137 | hsave->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; | ||
1138 | hsave->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; | ||
1139 | vmcb->control.intercept_cr_read |= svm->nested.intercept_cr_read; | ||
1140 | vmcb->control.intercept_cr_write |= svm->nested.intercept_cr_write; | ||
1141 | } | ||
994 | } else { | 1142 | } else { |
995 | svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK; | 1143 | svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK; |
996 | svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK; | 1144 | svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK; |
1145 | if (is_nested(svm)) { | ||
1146 | struct vmcb *hsave = svm->nested.hsave; | ||
1147 | |||
1148 | hsave->control.intercept_cr_read |= INTERCEPT_CR0_MASK; | ||
1149 | hsave->control.intercept_cr_write |= INTERCEPT_CR0_MASK; | ||
1150 | } | ||
997 | } | 1151 | } |
998 | } | 1152 | } |
999 | 1153 | ||
@@ -1001,6 +1155,27 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1001 | { | 1155 | { |
1002 | struct vcpu_svm *svm = to_svm(vcpu); | 1156 | struct vcpu_svm *svm = to_svm(vcpu); |
1003 | 1157 | ||
1158 | if (is_nested(svm)) { | ||
1159 | /* | ||
1160 | * We are here because we run in nested mode, the host kvm | ||
1161 | * intercepts cr0 writes but the l1 hypervisor does not. | ||
1162 | * But the L1 hypervisor may intercept selective cr0 writes. | ||
1163 | * This needs to be checked here. | ||
1164 | */ | ||
1165 | unsigned long old, new; | ||
1166 | |||
1167 | /* Remove bits that would trigger a real cr0 write intercept */ | ||
1168 | old = vcpu->arch.cr0 & SVM_CR0_SELECTIVE_MASK; | ||
1169 | new = cr0 & SVM_CR0_SELECTIVE_MASK; | ||
1170 | |||
1171 | if (old == new) { | ||
1172 | /* cr0 write with ts and mp unchanged */ | ||
1173 | svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; | ||
1174 | if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE) | ||
1175 | return; | ||
1176 | } | ||
1177 | } | ||
1178 | |||
1004 | #ifdef CONFIG_X86_64 | 1179 | #ifdef CONFIG_X86_64 |
1005 | if (vcpu->arch.efer & EFER_LME) { | 1180 | if (vcpu->arch.efer & EFER_LME) { |
1006 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { | 1181 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { |
@@ -1134,70 +1309,11 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) | |||
1134 | svm->vmcb->control.asid = sd->next_asid++; | 1309 | svm->vmcb->control.asid = sd->next_asid++; |
1135 | } | 1310 | } |
1136 | 1311 | ||
1137 | static int svm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *dest) | 1312 | static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) |
1138 | { | 1313 | { |
1139 | struct vcpu_svm *svm = to_svm(vcpu); | 1314 | struct vcpu_svm *svm = to_svm(vcpu); |
1140 | 1315 | ||
1141 | switch (dr) { | 1316 | svm->vmcb->save.dr7 = value; |
1142 | case 0 ... 3: | ||
1143 | *dest = vcpu->arch.db[dr]; | ||
1144 | break; | ||
1145 | case 4: | ||
1146 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
1147 | return EMULATE_FAIL; /* will re-inject UD */ | ||
1148 | /* fall through */ | ||
1149 | case 6: | ||
1150 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | ||
1151 | *dest = vcpu->arch.dr6; | ||
1152 | else | ||
1153 | *dest = svm->vmcb->save.dr6; | ||
1154 | break; | ||
1155 | case 5: | ||
1156 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
1157 | return EMULATE_FAIL; /* will re-inject UD */ | ||
1158 | /* fall through */ | ||
1159 | case 7: | ||
1160 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | ||
1161 | *dest = vcpu->arch.dr7; | ||
1162 | else | ||
1163 | *dest = svm->vmcb->save.dr7; | ||
1164 | break; | ||
1165 | } | ||
1166 | |||
1167 | return EMULATE_DONE; | ||
1168 | } | ||
1169 | |||
1170 | static int svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value) | ||
1171 | { | ||
1172 | struct vcpu_svm *svm = to_svm(vcpu); | ||
1173 | |||
1174 | switch (dr) { | ||
1175 | case 0 ... 3: | ||
1176 | vcpu->arch.db[dr] = value; | ||
1177 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) | ||
1178 | vcpu->arch.eff_db[dr] = value; | ||
1179 | break; | ||
1180 | case 4: | ||
1181 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
1182 | return EMULATE_FAIL; /* will re-inject UD */ | ||
1183 | /* fall through */ | ||
1184 | case 6: | ||
1185 | vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1; | ||
1186 | break; | ||
1187 | case 5: | ||
1188 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
1189 | return EMULATE_FAIL; /* will re-inject UD */ | ||
1190 | /* fall through */ | ||
1191 | case 7: | ||
1192 | vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1; | ||
1193 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { | ||
1194 | svm->vmcb->save.dr7 = vcpu->arch.dr7; | ||
1195 | vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK); | ||
1196 | } | ||
1197 | break; | ||
1198 | } | ||
1199 | |||
1200 | return EMULATE_DONE; | ||
1201 | } | 1317 | } |
1202 | 1318 | ||
1203 | static int pf_interception(struct vcpu_svm *svm) | 1319 | static int pf_interception(struct vcpu_svm *svm) |
@@ -1234,7 +1350,7 @@ static int db_interception(struct vcpu_svm *svm) | |||
1234 | } | 1350 | } |
1235 | 1351 | ||
1236 | if (svm->vcpu.guest_debug & | 1352 | if (svm->vcpu.guest_debug & |
1237 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)){ | 1353 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) { |
1238 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | 1354 | kvm_run->exit_reason = KVM_EXIT_DEBUG; |
1239 | kvm_run->debug.arch.pc = | 1355 | kvm_run->debug.arch.pc = |
1240 | svm->vmcb->save.cs.base + svm->vmcb->save.rip; | 1356 | svm->vmcb->save.cs.base + svm->vmcb->save.rip; |
@@ -1268,7 +1384,22 @@ static int ud_interception(struct vcpu_svm *svm) | |||
1268 | static void svm_fpu_activate(struct kvm_vcpu *vcpu) | 1384 | static void svm_fpu_activate(struct kvm_vcpu *vcpu) |
1269 | { | 1385 | { |
1270 | struct vcpu_svm *svm = to_svm(vcpu); | 1386 | struct vcpu_svm *svm = to_svm(vcpu); |
1271 | svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); | 1387 | u32 excp; |
1388 | |||
1389 | if (is_nested(svm)) { | ||
1390 | u32 h_excp, n_excp; | ||
1391 | |||
1392 | h_excp = svm->nested.hsave->control.intercept_exceptions; | ||
1393 | n_excp = svm->nested.intercept_exceptions; | ||
1394 | h_excp &= ~(1 << NM_VECTOR); | ||
1395 | excp = h_excp | n_excp; | ||
1396 | } else { | ||
1397 | excp = svm->vmcb->control.intercept_exceptions; | ||
1398 | excp &= ~(1 << NM_VECTOR); | ||
1399 | } | ||
1400 | |||
1401 | svm->vmcb->control.intercept_exceptions = excp; | ||
1402 | |||
1272 | svm->vcpu.fpu_active = 1; | 1403 | svm->vcpu.fpu_active = 1; |
1273 | update_cr0_intercept(svm); | 1404 | update_cr0_intercept(svm); |
1274 | } | 1405 | } |
@@ -1309,29 +1440,23 @@ static int shutdown_interception(struct vcpu_svm *svm) | |||
1309 | 1440 | ||
1310 | static int io_interception(struct vcpu_svm *svm) | 1441 | static int io_interception(struct vcpu_svm *svm) |
1311 | { | 1442 | { |
1443 | struct kvm_vcpu *vcpu = &svm->vcpu; | ||
1312 | u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ | 1444 | u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ |
1313 | int size, in, string; | 1445 | int size, in, string; |
1314 | unsigned port; | 1446 | unsigned port; |
1315 | 1447 | ||
1316 | ++svm->vcpu.stat.io_exits; | 1448 | ++svm->vcpu.stat.io_exits; |
1317 | |||
1318 | svm->next_rip = svm->vmcb->control.exit_info_2; | ||
1319 | |||
1320 | string = (io_info & SVM_IOIO_STR_MASK) != 0; | 1449 | string = (io_info & SVM_IOIO_STR_MASK) != 0; |
1321 | |||
1322 | if (string) { | ||
1323 | if (emulate_instruction(&svm->vcpu, | ||
1324 | 0, 0, 0) == EMULATE_DO_MMIO) | ||
1325 | return 0; | ||
1326 | return 1; | ||
1327 | } | ||
1328 | |||
1329 | in = (io_info & SVM_IOIO_TYPE_MASK) != 0; | 1450 | in = (io_info & SVM_IOIO_TYPE_MASK) != 0; |
1451 | if (string || in) | ||
1452 | return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO); | ||
1453 | |||
1330 | port = io_info >> 16; | 1454 | port = io_info >> 16; |
1331 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; | 1455 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; |
1332 | 1456 | svm->next_rip = svm->vmcb->control.exit_info_2; | |
1333 | skip_emulated_instruction(&svm->vcpu); | 1457 | skip_emulated_instruction(&svm->vcpu); |
1334 | return kvm_emulate_pio(&svm->vcpu, in, size, port); | 1458 | |
1459 | return kvm_fast_pio_out(vcpu, size, port); | ||
1335 | } | 1460 | } |
1336 | 1461 | ||
1337 | static int nmi_interception(struct vcpu_svm *svm) | 1462 | static int nmi_interception(struct vcpu_svm *svm) |
@@ -1384,6 +1509,8 @@ static int nested_svm_check_permissions(struct vcpu_svm *svm) | |||
1384 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, | 1509 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, |
1385 | bool has_error_code, u32 error_code) | 1510 | bool has_error_code, u32 error_code) |
1386 | { | 1511 | { |
1512 | int vmexit; | ||
1513 | |||
1387 | if (!is_nested(svm)) | 1514 | if (!is_nested(svm)) |
1388 | return 0; | 1515 | return 0; |
1389 | 1516 | ||
@@ -1392,21 +1519,28 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, | |||
1392 | svm->vmcb->control.exit_info_1 = error_code; | 1519 | svm->vmcb->control.exit_info_1 = error_code; |
1393 | svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; | 1520 | svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; |
1394 | 1521 | ||
1395 | return nested_svm_exit_handled(svm); | 1522 | vmexit = nested_svm_intercept(svm); |
1523 | if (vmexit == NESTED_EXIT_DONE) | ||
1524 | svm->nested.exit_required = true; | ||
1525 | |||
1526 | return vmexit; | ||
1396 | } | 1527 | } |
1397 | 1528 | ||
1398 | static inline int nested_svm_intr(struct vcpu_svm *svm) | 1529 | /* This function returns true if it is save to enable the irq window */ |
1530 | static inline bool nested_svm_intr(struct vcpu_svm *svm) | ||
1399 | { | 1531 | { |
1400 | if (!is_nested(svm)) | 1532 | if (!is_nested(svm)) |
1401 | return 0; | 1533 | return true; |
1402 | 1534 | ||
1403 | if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) | 1535 | if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) |
1404 | return 0; | 1536 | return true; |
1405 | 1537 | ||
1406 | if (!(svm->vcpu.arch.hflags & HF_HIF_MASK)) | 1538 | if (!(svm->vcpu.arch.hflags & HF_HIF_MASK)) |
1407 | return 0; | 1539 | return false; |
1408 | 1540 | ||
1409 | svm->vmcb->control.exit_code = SVM_EXIT_INTR; | 1541 | svm->vmcb->control.exit_code = SVM_EXIT_INTR; |
1542 | svm->vmcb->control.exit_info_1 = 0; | ||
1543 | svm->vmcb->control.exit_info_2 = 0; | ||
1410 | 1544 | ||
1411 | if (svm->nested.intercept & 1ULL) { | 1545 | if (svm->nested.intercept & 1ULL) { |
1412 | /* | 1546 | /* |
@@ -1417,21 +1551,40 @@ static inline int nested_svm_intr(struct vcpu_svm *svm) | |||
1417 | */ | 1551 | */ |
1418 | svm->nested.exit_required = true; | 1552 | svm->nested.exit_required = true; |
1419 | trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); | 1553 | trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); |
1420 | return 1; | 1554 | return false; |
1421 | } | 1555 | } |
1422 | 1556 | ||
1423 | return 0; | 1557 | return true; |
1558 | } | ||
1559 | |||
1560 | /* This function returns true if it is save to enable the nmi window */ | ||
1561 | static inline bool nested_svm_nmi(struct vcpu_svm *svm) | ||
1562 | { | ||
1563 | if (!is_nested(svm)) | ||
1564 | return true; | ||
1565 | |||
1566 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI))) | ||
1567 | return true; | ||
1568 | |||
1569 | svm->vmcb->control.exit_code = SVM_EXIT_NMI; | ||
1570 | svm->nested.exit_required = true; | ||
1571 | |||
1572 | return false; | ||
1424 | } | 1573 | } |
1425 | 1574 | ||
1426 | static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx) | 1575 | static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page) |
1427 | { | 1576 | { |
1428 | struct page *page; | 1577 | struct page *page; |
1429 | 1578 | ||
1579 | might_sleep(); | ||
1580 | |||
1430 | page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); | 1581 | page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); |
1431 | if (is_error_page(page)) | 1582 | if (is_error_page(page)) |
1432 | goto error; | 1583 | goto error; |
1433 | 1584 | ||
1434 | return kmap_atomic(page, idx); | 1585 | *_page = page; |
1586 | |||
1587 | return kmap(page); | ||
1435 | 1588 | ||
1436 | error: | 1589 | error: |
1437 | kvm_release_page_clean(page); | 1590 | kvm_release_page_clean(page); |
@@ -1440,61 +1593,55 @@ error: | |||
1440 | return NULL; | 1593 | return NULL; |
1441 | } | 1594 | } |
1442 | 1595 | ||
1443 | static void nested_svm_unmap(void *addr, enum km_type idx) | 1596 | static void nested_svm_unmap(struct page *page) |
1444 | { | 1597 | { |
1445 | struct page *page; | 1598 | kunmap(page); |
1599 | kvm_release_page_dirty(page); | ||
1600 | } | ||
1446 | 1601 | ||
1447 | if (!addr) | 1602 | static int nested_svm_intercept_ioio(struct vcpu_svm *svm) |
1448 | return; | 1603 | { |
1604 | unsigned port; | ||
1605 | u8 val, bit; | ||
1606 | u64 gpa; | ||
1449 | 1607 | ||
1450 | page = kmap_atomic_to_page(addr); | 1608 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT))) |
1609 | return NESTED_EXIT_HOST; | ||
1451 | 1610 | ||
1452 | kunmap_atomic(addr, idx); | 1611 | port = svm->vmcb->control.exit_info_1 >> 16; |
1453 | kvm_release_page_dirty(page); | 1612 | gpa = svm->nested.vmcb_iopm + (port / 8); |
1613 | bit = port % 8; | ||
1614 | val = 0; | ||
1615 | |||
1616 | if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, 1)) | ||
1617 | val &= (1 << bit); | ||
1618 | |||
1619 | return val ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; | ||
1454 | } | 1620 | } |
1455 | 1621 | ||
1456 | static bool nested_svm_exit_handled_msr(struct vcpu_svm *svm) | 1622 | static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) |
1457 | { | 1623 | { |
1458 | u32 param = svm->vmcb->control.exit_info_1 & 1; | 1624 | u32 offset, msr, value; |
1459 | u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; | 1625 | int write, mask; |
1460 | bool ret = false; | ||
1461 | u32 t0, t1; | ||
1462 | u8 *msrpm; | ||
1463 | 1626 | ||
1464 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) | 1627 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) |
1465 | return false; | 1628 | return NESTED_EXIT_HOST; |
1466 | 1629 | ||
1467 | msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0); | 1630 | msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; |
1631 | offset = svm_msrpm_offset(msr); | ||
1632 | write = svm->vmcb->control.exit_info_1 & 1; | ||
1633 | mask = 1 << ((2 * (msr & 0xf)) + write); | ||
1468 | 1634 | ||
1469 | if (!msrpm) | 1635 | if (offset == MSR_INVALID) |
1470 | goto out; | 1636 | return NESTED_EXIT_DONE; |
1471 | 1637 | ||
1472 | switch (msr) { | 1638 | /* Offset is in 32 bit units but need in 8 bit units */ |
1473 | case 0 ... 0x1fff: | 1639 | offset *= 4; |
1474 | t0 = (msr * 2) % 8; | ||
1475 | t1 = msr / 8; | ||
1476 | break; | ||
1477 | case 0xc0000000 ... 0xc0001fff: | ||
1478 | t0 = (8192 + msr - 0xc0000000) * 2; | ||
1479 | t1 = (t0 / 8); | ||
1480 | t0 %= 8; | ||
1481 | break; | ||
1482 | case 0xc0010000 ... 0xc0011fff: | ||
1483 | t0 = (16384 + msr - 0xc0010000) * 2; | ||
1484 | t1 = (t0 / 8); | ||
1485 | t0 %= 8; | ||
1486 | break; | ||
1487 | default: | ||
1488 | ret = true; | ||
1489 | goto out; | ||
1490 | } | ||
1491 | 1640 | ||
1492 | ret = msrpm[t1] & ((1 << param) << t0); | 1641 | if (kvm_read_guest(svm->vcpu.kvm, svm->nested.vmcb_msrpm + offset, &value, 4)) |
1493 | 1642 | return NESTED_EXIT_DONE; | |
1494 | out: | ||
1495 | nested_svm_unmap(msrpm, KM_USER0); | ||
1496 | 1643 | ||
1497 | return ret; | 1644 | return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; |
1498 | } | 1645 | } |
1499 | 1646 | ||
1500 | static int nested_svm_exit_special(struct vcpu_svm *svm) | 1647 | static int nested_svm_exit_special(struct vcpu_svm *svm) |
@@ -1504,17 +1651,21 @@ static int nested_svm_exit_special(struct vcpu_svm *svm) | |||
1504 | switch (exit_code) { | 1651 | switch (exit_code) { |
1505 | case SVM_EXIT_INTR: | 1652 | case SVM_EXIT_INTR: |
1506 | case SVM_EXIT_NMI: | 1653 | case SVM_EXIT_NMI: |
1654 | case SVM_EXIT_EXCP_BASE + MC_VECTOR: | ||
1507 | return NESTED_EXIT_HOST; | 1655 | return NESTED_EXIT_HOST; |
1508 | /* For now we are always handling NPFs when using them */ | ||
1509 | case SVM_EXIT_NPF: | 1656 | case SVM_EXIT_NPF: |
1657 | /* For now we are always handling NPFs when using them */ | ||
1510 | if (npt_enabled) | 1658 | if (npt_enabled) |
1511 | return NESTED_EXIT_HOST; | 1659 | return NESTED_EXIT_HOST; |
1512 | break; | 1660 | break; |
1513 | /* When we're shadowing, trap PFs */ | ||
1514 | case SVM_EXIT_EXCP_BASE + PF_VECTOR: | 1661 | case SVM_EXIT_EXCP_BASE + PF_VECTOR: |
1662 | /* When we're shadowing, trap PFs */ | ||
1515 | if (!npt_enabled) | 1663 | if (!npt_enabled) |
1516 | return NESTED_EXIT_HOST; | 1664 | return NESTED_EXIT_HOST; |
1517 | break; | 1665 | break; |
1666 | case SVM_EXIT_EXCP_BASE + NM_VECTOR: | ||
1667 | nm_interception(svm); | ||
1668 | break; | ||
1518 | default: | 1669 | default: |
1519 | break; | 1670 | break; |
1520 | } | 1671 | } |
@@ -1525,7 +1676,7 @@ static int nested_svm_exit_special(struct vcpu_svm *svm) | |||
1525 | /* | 1676 | /* |
1526 | * If this function returns true, this #vmexit was already handled | 1677 | * If this function returns true, this #vmexit was already handled |
1527 | */ | 1678 | */ |
1528 | static int nested_svm_exit_handled(struct vcpu_svm *svm) | 1679 | static int nested_svm_intercept(struct vcpu_svm *svm) |
1529 | { | 1680 | { |
1530 | u32 exit_code = svm->vmcb->control.exit_code; | 1681 | u32 exit_code = svm->vmcb->control.exit_code; |
1531 | int vmexit = NESTED_EXIT_HOST; | 1682 | int vmexit = NESTED_EXIT_HOST; |
@@ -1534,6 +1685,9 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm) | |||
1534 | case SVM_EXIT_MSR: | 1685 | case SVM_EXIT_MSR: |
1535 | vmexit = nested_svm_exit_handled_msr(svm); | 1686 | vmexit = nested_svm_exit_handled_msr(svm); |
1536 | break; | 1687 | break; |
1688 | case SVM_EXIT_IOIO: | ||
1689 | vmexit = nested_svm_intercept_ioio(svm); | ||
1690 | break; | ||
1537 | case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: { | 1691 | case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: { |
1538 | u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0); | 1692 | u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0); |
1539 | if (svm->nested.intercept_cr_read & cr_bits) | 1693 | if (svm->nested.intercept_cr_read & cr_bits) |
@@ -1564,6 +1718,10 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm) | |||
1564 | vmexit = NESTED_EXIT_DONE; | 1718 | vmexit = NESTED_EXIT_DONE; |
1565 | break; | 1719 | break; |
1566 | } | 1720 | } |
1721 | case SVM_EXIT_ERR: { | ||
1722 | vmexit = NESTED_EXIT_DONE; | ||
1723 | break; | ||
1724 | } | ||
1567 | default: { | 1725 | default: { |
1568 | u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); | 1726 | u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); |
1569 | if (svm->nested.intercept & exit_bits) | 1727 | if (svm->nested.intercept & exit_bits) |
@@ -1571,9 +1729,17 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm) | |||
1571 | } | 1729 | } |
1572 | } | 1730 | } |
1573 | 1731 | ||
1574 | if (vmexit == NESTED_EXIT_DONE) { | 1732 | return vmexit; |
1733 | } | ||
1734 | |||
1735 | static int nested_svm_exit_handled(struct vcpu_svm *svm) | ||
1736 | { | ||
1737 | int vmexit; | ||
1738 | |||
1739 | vmexit = nested_svm_intercept(svm); | ||
1740 | |||
1741 | if (vmexit == NESTED_EXIT_DONE) | ||
1575 | nested_svm_vmexit(svm); | 1742 | nested_svm_vmexit(svm); |
1576 | } | ||
1577 | 1743 | ||
1578 | return vmexit; | 1744 | return vmexit; |
1579 | } | 1745 | } |
@@ -1615,6 +1781,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1615 | struct vmcb *nested_vmcb; | 1781 | struct vmcb *nested_vmcb; |
1616 | struct vmcb *hsave = svm->nested.hsave; | 1782 | struct vmcb *hsave = svm->nested.hsave; |
1617 | struct vmcb *vmcb = svm->vmcb; | 1783 | struct vmcb *vmcb = svm->vmcb; |
1784 | struct page *page; | ||
1618 | 1785 | ||
1619 | trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, | 1786 | trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, |
1620 | vmcb->control.exit_info_1, | 1787 | vmcb->control.exit_info_1, |
@@ -1622,10 +1789,13 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1622 | vmcb->control.exit_int_info, | 1789 | vmcb->control.exit_int_info, |
1623 | vmcb->control.exit_int_info_err); | 1790 | vmcb->control.exit_int_info_err); |
1624 | 1791 | ||
1625 | nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0); | 1792 | nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page); |
1626 | if (!nested_vmcb) | 1793 | if (!nested_vmcb) |
1627 | return 1; | 1794 | return 1; |
1628 | 1795 | ||
1796 | /* Exit nested SVM mode */ | ||
1797 | svm->nested.vmcb = 0; | ||
1798 | |||
1629 | /* Give the current vmcb to the guest */ | 1799 | /* Give the current vmcb to the guest */ |
1630 | disable_gif(svm); | 1800 | disable_gif(svm); |
1631 | 1801 | ||
@@ -1635,9 +1805,10 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1635 | nested_vmcb->save.ds = vmcb->save.ds; | 1805 | nested_vmcb->save.ds = vmcb->save.ds; |
1636 | nested_vmcb->save.gdtr = vmcb->save.gdtr; | 1806 | nested_vmcb->save.gdtr = vmcb->save.gdtr; |
1637 | nested_vmcb->save.idtr = vmcb->save.idtr; | 1807 | nested_vmcb->save.idtr = vmcb->save.idtr; |
1638 | if (npt_enabled) | 1808 | nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu); |
1639 | nested_vmcb->save.cr3 = vmcb->save.cr3; | 1809 | nested_vmcb->save.cr3 = svm->vcpu.arch.cr3; |
1640 | nested_vmcb->save.cr2 = vmcb->save.cr2; | 1810 | nested_vmcb->save.cr2 = vmcb->save.cr2; |
1811 | nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; | ||
1641 | nested_vmcb->save.rflags = vmcb->save.rflags; | 1812 | nested_vmcb->save.rflags = vmcb->save.rflags; |
1642 | nested_vmcb->save.rip = vmcb->save.rip; | 1813 | nested_vmcb->save.rip = vmcb->save.rip; |
1643 | nested_vmcb->save.rsp = vmcb->save.rsp; | 1814 | nested_vmcb->save.rsp = vmcb->save.rsp; |
@@ -1709,10 +1880,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1709 | svm->vmcb->save.cpl = 0; | 1880 | svm->vmcb->save.cpl = 0; |
1710 | svm->vmcb->control.exit_int_info = 0; | 1881 | svm->vmcb->control.exit_int_info = 0; |
1711 | 1882 | ||
1712 | /* Exit nested SVM mode */ | 1883 | nested_svm_unmap(page); |
1713 | svm->nested.vmcb = 0; | ||
1714 | |||
1715 | nested_svm_unmap(nested_vmcb, KM_USER0); | ||
1716 | 1884 | ||
1717 | kvm_mmu_reset_context(&svm->vcpu); | 1885 | kvm_mmu_reset_context(&svm->vcpu); |
1718 | kvm_mmu_load(&svm->vcpu); | 1886 | kvm_mmu_load(&svm->vcpu); |
@@ -1722,19 +1890,33 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1722 | 1890 | ||
1723 | static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) | 1891 | static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) |
1724 | { | 1892 | { |
1725 | u32 *nested_msrpm; | 1893 | /* |
1894 | * This function merges the msr permission bitmaps of kvm and the | ||
1895 | * nested vmcb. It is omptimized in that it only merges the parts where | ||
1896 | * the kvm msr permission bitmap may contain zero bits | ||
1897 | */ | ||
1726 | int i; | 1898 | int i; |
1727 | 1899 | ||
1728 | nested_msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0); | 1900 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) |
1729 | if (!nested_msrpm) | 1901 | return true; |
1730 | return false; | ||
1731 | 1902 | ||
1732 | for (i=0; i< PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++) | 1903 | for (i = 0; i < MSRPM_OFFSETS; i++) { |
1733 | svm->nested.msrpm[i] = svm->msrpm[i] | nested_msrpm[i]; | 1904 | u32 value, p; |
1905 | u64 offset; | ||
1734 | 1906 | ||
1735 | svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm); | 1907 | if (msrpm_offsets[i] == 0xffffffff) |
1908 | break; | ||
1909 | |||
1910 | p = msrpm_offsets[i]; | ||
1911 | offset = svm->nested.vmcb_msrpm + (p * 4); | ||
1912 | |||
1913 | if (kvm_read_guest(svm->vcpu.kvm, offset, &value, 4)) | ||
1914 | return false; | ||
1915 | |||
1916 | svm->nested.msrpm[p] = svm->msrpm[p] | value; | ||
1917 | } | ||
1736 | 1918 | ||
1737 | nested_svm_unmap(nested_msrpm, KM_USER0); | 1919 | svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm); |
1738 | 1920 | ||
1739 | return true; | 1921 | return true; |
1740 | } | 1922 | } |
@@ -1744,26 +1926,34 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
1744 | struct vmcb *nested_vmcb; | 1926 | struct vmcb *nested_vmcb; |
1745 | struct vmcb *hsave = svm->nested.hsave; | 1927 | struct vmcb *hsave = svm->nested.hsave; |
1746 | struct vmcb *vmcb = svm->vmcb; | 1928 | struct vmcb *vmcb = svm->vmcb; |
1929 | struct page *page; | ||
1930 | u64 vmcb_gpa; | ||
1747 | 1931 | ||
1748 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); | 1932 | vmcb_gpa = svm->vmcb->save.rax; |
1933 | |||
1934 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); | ||
1749 | if (!nested_vmcb) | 1935 | if (!nested_vmcb) |
1750 | return false; | 1936 | return false; |
1751 | 1937 | ||
1752 | /* nested_vmcb is our indicator if nested SVM is activated */ | 1938 | trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, vmcb_gpa, |
1753 | svm->nested.vmcb = svm->vmcb->save.rax; | ||
1754 | |||
1755 | trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb, | ||
1756 | nested_vmcb->save.rip, | 1939 | nested_vmcb->save.rip, |
1757 | nested_vmcb->control.int_ctl, | 1940 | nested_vmcb->control.int_ctl, |
1758 | nested_vmcb->control.event_inj, | 1941 | nested_vmcb->control.event_inj, |
1759 | nested_vmcb->control.nested_ctl); | 1942 | nested_vmcb->control.nested_ctl); |
1760 | 1943 | ||
1944 | trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr_read, | ||
1945 | nested_vmcb->control.intercept_cr_write, | ||
1946 | nested_vmcb->control.intercept_exceptions, | ||
1947 | nested_vmcb->control.intercept); | ||
1948 | |||
1761 | /* Clear internal status */ | 1949 | /* Clear internal status */ |
1762 | kvm_clear_exception_queue(&svm->vcpu); | 1950 | kvm_clear_exception_queue(&svm->vcpu); |
1763 | kvm_clear_interrupt_queue(&svm->vcpu); | 1951 | kvm_clear_interrupt_queue(&svm->vcpu); |
1764 | 1952 | ||
1765 | /* Save the old vmcb, so we don't need to pick what we save, but | 1953 | /* |
1766 | can restore everything when a VMEXIT occurs */ | 1954 | * Save the old vmcb, so we don't need to pick what we save, but can |
1955 | * restore everything when a VMEXIT occurs | ||
1956 | */ | ||
1767 | hsave->save.es = vmcb->save.es; | 1957 | hsave->save.es = vmcb->save.es; |
1768 | hsave->save.cs = vmcb->save.cs; | 1958 | hsave->save.cs = vmcb->save.cs; |
1769 | hsave->save.ss = vmcb->save.ss; | 1959 | hsave->save.ss = vmcb->save.ss; |
@@ -1803,14 +1993,17 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
1803 | if (npt_enabled) { | 1993 | if (npt_enabled) { |
1804 | svm->vmcb->save.cr3 = nested_vmcb->save.cr3; | 1994 | svm->vmcb->save.cr3 = nested_vmcb->save.cr3; |
1805 | svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; | 1995 | svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; |
1806 | } else { | 1996 | } else |
1807 | kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); | 1997 | kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); |
1808 | kvm_mmu_reset_context(&svm->vcpu); | 1998 | |
1809 | } | 1999 | /* Guest paging mode is active - reset mmu */ |
2000 | kvm_mmu_reset_context(&svm->vcpu); | ||
2001 | |||
1810 | svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2; | 2002 | svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2; |
1811 | kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax); | 2003 | kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax); |
1812 | kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp); | 2004 | kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp); |
1813 | kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip); | 2005 | kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip); |
2006 | |||
1814 | /* In case we don't even reach vcpu_run, the fields are not updated */ | 2007 | /* In case we don't even reach vcpu_run, the fields are not updated */ |
1815 | svm->vmcb->save.rax = nested_vmcb->save.rax; | 2008 | svm->vmcb->save.rax = nested_vmcb->save.rax; |
1816 | svm->vmcb->save.rsp = nested_vmcb->save.rsp; | 2009 | svm->vmcb->save.rsp = nested_vmcb->save.rsp; |
@@ -1819,22 +2012,8 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
1819 | svm->vmcb->save.dr6 = nested_vmcb->save.dr6; | 2012 | svm->vmcb->save.dr6 = nested_vmcb->save.dr6; |
1820 | svm->vmcb->save.cpl = nested_vmcb->save.cpl; | 2013 | svm->vmcb->save.cpl = nested_vmcb->save.cpl; |
1821 | 2014 | ||
1822 | /* We don't want a nested guest to be more powerful than the guest, | 2015 | svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL; |
1823 | so all intercepts are ORed */ | 2016 | svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL; |
1824 | svm->vmcb->control.intercept_cr_read |= | ||
1825 | nested_vmcb->control.intercept_cr_read; | ||
1826 | svm->vmcb->control.intercept_cr_write |= | ||
1827 | nested_vmcb->control.intercept_cr_write; | ||
1828 | svm->vmcb->control.intercept_dr_read |= | ||
1829 | nested_vmcb->control.intercept_dr_read; | ||
1830 | svm->vmcb->control.intercept_dr_write |= | ||
1831 | nested_vmcb->control.intercept_dr_write; | ||
1832 | svm->vmcb->control.intercept_exceptions |= | ||
1833 | nested_vmcb->control.intercept_exceptions; | ||
1834 | |||
1835 | svm->vmcb->control.intercept |= nested_vmcb->control.intercept; | ||
1836 | |||
1837 | svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa; | ||
1838 | 2017 | ||
1839 | /* cache intercepts */ | 2018 | /* cache intercepts */ |
1840 | svm->nested.intercept_cr_read = nested_vmcb->control.intercept_cr_read; | 2019 | svm->nested.intercept_cr_read = nested_vmcb->control.intercept_cr_read; |
@@ -1851,13 +2030,43 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
1851 | else | 2030 | else |
1852 | svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; | 2031 | svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; |
1853 | 2032 | ||
2033 | if (svm->vcpu.arch.hflags & HF_VINTR_MASK) { | ||
2034 | /* We only want the cr8 intercept bits of the guest */ | ||
2035 | svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR8_MASK; | ||
2036 | svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; | ||
2037 | } | ||
2038 | |||
2039 | /* We don't want to see VMMCALLs from a nested guest */ | ||
2040 | svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMMCALL); | ||
2041 | |||
2042 | /* | ||
2043 | * We don't want a nested guest to be more powerful than the guest, so | ||
2044 | * all intercepts are ORed | ||
2045 | */ | ||
2046 | svm->vmcb->control.intercept_cr_read |= | ||
2047 | nested_vmcb->control.intercept_cr_read; | ||
2048 | svm->vmcb->control.intercept_cr_write |= | ||
2049 | nested_vmcb->control.intercept_cr_write; | ||
2050 | svm->vmcb->control.intercept_dr_read |= | ||
2051 | nested_vmcb->control.intercept_dr_read; | ||
2052 | svm->vmcb->control.intercept_dr_write |= | ||
2053 | nested_vmcb->control.intercept_dr_write; | ||
2054 | svm->vmcb->control.intercept_exceptions |= | ||
2055 | nested_vmcb->control.intercept_exceptions; | ||
2056 | |||
2057 | svm->vmcb->control.intercept |= nested_vmcb->control.intercept; | ||
2058 | |||
2059 | svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl; | ||
1854 | svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; | 2060 | svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; |
1855 | svm->vmcb->control.int_state = nested_vmcb->control.int_state; | 2061 | svm->vmcb->control.int_state = nested_vmcb->control.int_state; |
1856 | svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; | 2062 | svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; |
1857 | svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; | 2063 | svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; |
1858 | svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; | 2064 | svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; |
1859 | 2065 | ||
1860 | nested_svm_unmap(nested_vmcb, KM_USER0); | 2066 | nested_svm_unmap(page); |
2067 | |||
2068 | /* nested_vmcb is our indicator if nested SVM is activated */ | ||
2069 | svm->nested.vmcb = vmcb_gpa; | ||
1861 | 2070 | ||
1862 | enable_gif(svm); | 2071 | enable_gif(svm); |
1863 | 2072 | ||
@@ -1883,6 +2092,7 @@ static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) | |||
1883 | static int vmload_interception(struct vcpu_svm *svm) | 2092 | static int vmload_interception(struct vcpu_svm *svm) |
1884 | { | 2093 | { |
1885 | struct vmcb *nested_vmcb; | 2094 | struct vmcb *nested_vmcb; |
2095 | struct page *page; | ||
1886 | 2096 | ||
1887 | if (nested_svm_check_permissions(svm)) | 2097 | if (nested_svm_check_permissions(svm)) |
1888 | return 1; | 2098 | return 1; |
@@ -1890,12 +2100,12 @@ static int vmload_interception(struct vcpu_svm *svm) | |||
1890 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | 2100 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; |
1891 | skip_emulated_instruction(&svm->vcpu); | 2101 | skip_emulated_instruction(&svm->vcpu); |
1892 | 2102 | ||
1893 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); | 2103 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); |
1894 | if (!nested_vmcb) | 2104 | if (!nested_vmcb) |
1895 | return 1; | 2105 | return 1; |
1896 | 2106 | ||
1897 | nested_svm_vmloadsave(nested_vmcb, svm->vmcb); | 2107 | nested_svm_vmloadsave(nested_vmcb, svm->vmcb); |
1898 | nested_svm_unmap(nested_vmcb, KM_USER0); | 2108 | nested_svm_unmap(page); |
1899 | 2109 | ||
1900 | return 1; | 2110 | return 1; |
1901 | } | 2111 | } |
@@ -1903,6 +2113,7 @@ static int vmload_interception(struct vcpu_svm *svm) | |||
1903 | static int vmsave_interception(struct vcpu_svm *svm) | 2113 | static int vmsave_interception(struct vcpu_svm *svm) |
1904 | { | 2114 | { |
1905 | struct vmcb *nested_vmcb; | 2115 | struct vmcb *nested_vmcb; |
2116 | struct page *page; | ||
1906 | 2117 | ||
1907 | if (nested_svm_check_permissions(svm)) | 2118 | if (nested_svm_check_permissions(svm)) |
1908 | return 1; | 2119 | return 1; |
@@ -1910,12 +2121,12 @@ static int vmsave_interception(struct vcpu_svm *svm) | |||
1910 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | 2121 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; |
1911 | skip_emulated_instruction(&svm->vcpu); | 2122 | skip_emulated_instruction(&svm->vcpu); |
1912 | 2123 | ||
1913 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); | 2124 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); |
1914 | if (!nested_vmcb) | 2125 | if (!nested_vmcb) |
1915 | return 1; | 2126 | return 1; |
1916 | 2127 | ||
1917 | nested_svm_vmloadsave(svm->vmcb, nested_vmcb); | 2128 | nested_svm_vmloadsave(svm->vmcb, nested_vmcb); |
1918 | nested_svm_unmap(nested_vmcb, KM_USER0); | 2129 | nested_svm_unmap(page); |
1919 | 2130 | ||
1920 | return 1; | 2131 | return 1; |
1921 | } | 2132 | } |
@@ -2018,6 +2229,8 @@ static int task_switch_interception(struct vcpu_svm *svm) | |||
2018 | svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK; | 2229 | svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK; |
2019 | uint32_t idt_v = | 2230 | uint32_t idt_v = |
2020 | svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID; | 2231 | svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID; |
2232 | bool has_error_code = false; | ||
2233 | u32 error_code = 0; | ||
2021 | 2234 | ||
2022 | tss_selector = (u16)svm->vmcb->control.exit_info_1; | 2235 | tss_selector = (u16)svm->vmcb->control.exit_info_1; |
2023 | 2236 | ||
@@ -2038,6 +2251,12 @@ static int task_switch_interception(struct vcpu_svm *svm) | |||
2038 | svm->vcpu.arch.nmi_injected = false; | 2251 | svm->vcpu.arch.nmi_injected = false; |
2039 | break; | 2252 | break; |
2040 | case SVM_EXITINTINFO_TYPE_EXEPT: | 2253 | case SVM_EXITINTINFO_TYPE_EXEPT: |
2254 | if (svm->vmcb->control.exit_info_2 & | ||
2255 | (1ULL << SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE)) { | ||
2256 | has_error_code = true; | ||
2257 | error_code = | ||
2258 | (u32)svm->vmcb->control.exit_info_2; | ||
2259 | } | ||
2041 | kvm_clear_exception_queue(&svm->vcpu); | 2260 | kvm_clear_exception_queue(&svm->vcpu); |
2042 | break; | 2261 | break; |
2043 | case SVM_EXITINTINFO_TYPE_INTR: | 2262 | case SVM_EXITINTINFO_TYPE_INTR: |
@@ -2054,7 +2273,14 @@ static int task_switch_interception(struct vcpu_svm *svm) | |||
2054 | (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) | 2273 | (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) |
2055 | skip_emulated_instruction(&svm->vcpu); | 2274 | skip_emulated_instruction(&svm->vcpu); |
2056 | 2275 | ||
2057 | return kvm_task_switch(&svm->vcpu, tss_selector, reason); | 2276 | if (kvm_task_switch(&svm->vcpu, tss_selector, reason, |
2277 | has_error_code, error_code) == EMULATE_FAIL) { | ||
2278 | svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
2279 | svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | ||
2280 | svm->vcpu.run->internal.ndata = 0; | ||
2281 | return 0; | ||
2282 | } | ||
2283 | return 1; | ||
2058 | } | 2284 | } |
2059 | 2285 | ||
2060 | static int cpuid_interception(struct vcpu_svm *svm) | 2286 | static int cpuid_interception(struct vcpu_svm *svm) |
@@ -2145,9 +2371,11 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | |||
2145 | case MSR_IA32_SYSENTER_ESP: | 2371 | case MSR_IA32_SYSENTER_ESP: |
2146 | *data = svm->sysenter_esp; | 2372 | *data = svm->sysenter_esp; |
2147 | break; | 2373 | break; |
2148 | /* Nobody will change the following 5 values in the VMCB so | 2374 | /* |
2149 | we can safely return them on rdmsr. They will always be 0 | 2375 | * Nobody will change the following 5 values in the VMCB so we can |
2150 | until LBRV is implemented. */ | 2376 | * safely return them on rdmsr. They will always be 0 until LBRV is |
2377 | * implemented. | ||
2378 | */ | ||
2151 | case MSR_IA32_DEBUGCTLMSR: | 2379 | case MSR_IA32_DEBUGCTLMSR: |
2152 | *data = svm->vmcb->save.dbgctl; | 2380 | *data = svm->vmcb->save.dbgctl; |
2153 | break; | 2381 | break; |
@@ -2167,7 +2395,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | |||
2167 | *data = svm->nested.hsave_msr; | 2395 | *data = svm->nested.hsave_msr; |
2168 | break; | 2396 | break; |
2169 | case MSR_VM_CR: | 2397 | case MSR_VM_CR: |
2170 | *data = 0; | 2398 | *data = svm->nested.vm_cr_msr; |
2171 | break; | 2399 | break; |
2172 | case MSR_IA32_UCODE_REV: | 2400 | case MSR_IA32_UCODE_REV: |
2173 | *data = 0x01000065; | 2401 | *data = 0x01000065; |
@@ -2197,6 +2425,31 @@ static int rdmsr_interception(struct vcpu_svm *svm) | |||
2197 | return 1; | 2425 | return 1; |
2198 | } | 2426 | } |
2199 | 2427 | ||
2428 | static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data) | ||
2429 | { | ||
2430 | struct vcpu_svm *svm = to_svm(vcpu); | ||
2431 | int svm_dis, chg_mask; | ||
2432 | |||
2433 | if (data & ~SVM_VM_CR_VALID_MASK) | ||
2434 | return 1; | ||
2435 | |||
2436 | chg_mask = SVM_VM_CR_VALID_MASK; | ||
2437 | |||
2438 | if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK) | ||
2439 | chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK); | ||
2440 | |||
2441 | svm->nested.vm_cr_msr &= ~chg_mask; | ||
2442 | svm->nested.vm_cr_msr |= (data & chg_mask); | ||
2443 | |||
2444 | svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK; | ||
2445 | |||
2446 | /* check for svm_disable while efer.svme is set */ | ||
2447 | if (svm_dis && (vcpu->arch.efer & EFER_SVME)) | ||
2448 | return 1; | ||
2449 | |||
2450 | return 0; | ||
2451 | } | ||
2452 | |||
2200 | static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | 2453 | static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) |
2201 | { | 2454 | { |
2202 | struct vcpu_svm *svm = to_svm(vcpu); | 2455 | struct vcpu_svm *svm = to_svm(vcpu); |
@@ -2263,6 +2516,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | |||
2263 | svm->nested.hsave_msr = data; | 2516 | svm->nested.hsave_msr = data; |
2264 | break; | 2517 | break; |
2265 | case MSR_VM_CR: | 2518 | case MSR_VM_CR: |
2519 | return svm_set_vm_cr(vcpu, data); | ||
2266 | case MSR_VM_IGNNE: | 2520 | case MSR_VM_IGNNE: |
2267 | pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); | 2521 | pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); |
2268 | break; | 2522 | break; |
@@ -2326,16 +2580,16 @@ static int pause_interception(struct vcpu_svm *svm) | |||
2326 | } | 2580 | } |
2327 | 2581 | ||
2328 | static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | 2582 | static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { |
2329 | [SVM_EXIT_READ_CR0] = emulate_on_interception, | 2583 | [SVM_EXIT_READ_CR0] = emulate_on_interception, |
2330 | [SVM_EXIT_READ_CR3] = emulate_on_interception, | 2584 | [SVM_EXIT_READ_CR3] = emulate_on_interception, |
2331 | [SVM_EXIT_READ_CR4] = emulate_on_interception, | 2585 | [SVM_EXIT_READ_CR4] = emulate_on_interception, |
2332 | [SVM_EXIT_READ_CR8] = emulate_on_interception, | 2586 | [SVM_EXIT_READ_CR8] = emulate_on_interception, |
2333 | [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, | 2587 | [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, |
2334 | [SVM_EXIT_WRITE_CR0] = emulate_on_interception, | 2588 | [SVM_EXIT_WRITE_CR0] = emulate_on_interception, |
2335 | [SVM_EXIT_WRITE_CR3] = emulate_on_interception, | 2589 | [SVM_EXIT_WRITE_CR3] = emulate_on_interception, |
2336 | [SVM_EXIT_WRITE_CR4] = emulate_on_interception, | 2590 | [SVM_EXIT_WRITE_CR4] = emulate_on_interception, |
2337 | [SVM_EXIT_WRITE_CR8] = cr8_write_interception, | 2591 | [SVM_EXIT_WRITE_CR8] = cr8_write_interception, |
2338 | [SVM_EXIT_READ_DR0] = emulate_on_interception, | 2592 | [SVM_EXIT_READ_DR0] = emulate_on_interception, |
2339 | [SVM_EXIT_READ_DR1] = emulate_on_interception, | 2593 | [SVM_EXIT_READ_DR1] = emulate_on_interception, |
2340 | [SVM_EXIT_READ_DR2] = emulate_on_interception, | 2594 | [SVM_EXIT_READ_DR2] = emulate_on_interception, |
2341 | [SVM_EXIT_READ_DR3] = emulate_on_interception, | 2595 | [SVM_EXIT_READ_DR3] = emulate_on_interception, |
@@ -2354,15 +2608,14 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
2354 | [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, | 2608 | [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, |
2355 | [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, | 2609 | [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, |
2356 | [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, | 2610 | [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, |
2357 | [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, | 2611 | [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, |
2358 | [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, | 2612 | [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, |
2359 | [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, | 2613 | [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, |
2360 | [SVM_EXIT_INTR] = intr_interception, | 2614 | [SVM_EXIT_INTR] = intr_interception, |
2361 | [SVM_EXIT_NMI] = nmi_interception, | 2615 | [SVM_EXIT_NMI] = nmi_interception, |
2362 | [SVM_EXIT_SMI] = nop_on_interception, | 2616 | [SVM_EXIT_SMI] = nop_on_interception, |
2363 | [SVM_EXIT_INIT] = nop_on_interception, | 2617 | [SVM_EXIT_INIT] = nop_on_interception, |
2364 | [SVM_EXIT_VINTR] = interrupt_window_interception, | 2618 | [SVM_EXIT_VINTR] = interrupt_window_interception, |
2365 | /* [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, */ | ||
2366 | [SVM_EXIT_CPUID] = cpuid_interception, | 2619 | [SVM_EXIT_CPUID] = cpuid_interception, |
2367 | [SVM_EXIT_IRET] = iret_interception, | 2620 | [SVM_EXIT_IRET] = iret_interception, |
2368 | [SVM_EXIT_INVD] = emulate_on_interception, | 2621 | [SVM_EXIT_INVD] = emulate_on_interception, |
@@ -2370,7 +2623,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
2370 | [SVM_EXIT_HLT] = halt_interception, | 2623 | [SVM_EXIT_HLT] = halt_interception, |
2371 | [SVM_EXIT_INVLPG] = invlpg_interception, | 2624 | [SVM_EXIT_INVLPG] = invlpg_interception, |
2372 | [SVM_EXIT_INVLPGA] = invlpga_interception, | 2625 | [SVM_EXIT_INVLPGA] = invlpga_interception, |
2373 | [SVM_EXIT_IOIO] = io_interception, | 2626 | [SVM_EXIT_IOIO] = io_interception, |
2374 | [SVM_EXIT_MSR] = msr_interception, | 2627 | [SVM_EXIT_MSR] = msr_interception, |
2375 | [SVM_EXIT_TASK_SWITCH] = task_switch_interception, | 2628 | [SVM_EXIT_TASK_SWITCH] = task_switch_interception, |
2376 | [SVM_EXIT_SHUTDOWN] = shutdown_interception, | 2629 | [SVM_EXIT_SHUTDOWN] = shutdown_interception, |
@@ -2393,7 +2646,12 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
2393 | struct kvm_run *kvm_run = vcpu->run; | 2646 | struct kvm_run *kvm_run = vcpu->run; |
2394 | u32 exit_code = svm->vmcb->control.exit_code; | 2647 | u32 exit_code = svm->vmcb->control.exit_code; |
2395 | 2648 | ||
2396 | trace_kvm_exit(exit_code, svm->vmcb->save.rip); | 2649 | trace_kvm_exit(exit_code, vcpu); |
2650 | |||
2651 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK)) | ||
2652 | vcpu->arch.cr0 = svm->vmcb->save.cr0; | ||
2653 | if (npt_enabled) | ||
2654 | vcpu->arch.cr3 = svm->vmcb->save.cr3; | ||
2397 | 2655 | ||
2398 | if (unlikely(svm->nested.exit_required)) { | 2656 | if (unlikely(svm->nested.exit_required)) { |
2399 | nested_svm_vmexit(svm); | 2657 | nested_svm_vmexit(svm); |
@@ -2422,11 +2680,6 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
2422 | 2680 | ||
2423 | svm_complete_interrupts(svm); | 2681 | svm_complete_interrupts(svm); |
2424 | 2682 | ||
2425 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK)) | ||
2426 | vcpu->arch.cr0 = svm->vmcb->save.cr0; | ||
2427 | if (npt_enabled) | ||
2428 | vcpu->arch.cr3 = svm->vmcb->save.cr3; | ||
2429 | |||
2430 | if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { | 2683 | if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { |
2431 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 2684 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
2432 | kvm_run->fail_entry.hardware_entry_failure_reason | 2685 | kvm_run->fail_entry.hardware_entry_failure_reason |
@@ -2511,6 +2764,9 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) | |||
2511 | { | 2764 | { |
2512 | struct vcpu_svm *svm = to_svm(vcpu); | 2765 | struct vcpu_svm *svm = to_svm(vcpu); |
2513 | 2766 | ||
2767 | if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) | ||
2768 | return; | ||
2769 | |||
2514 | if (irr == -1) | 2770 | if (irr == -1) |
2515 | return; | 2771 | return; |
2516 | 2772 | ||
@@ -2522,8 +2778,12 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu) | |||
2522 | { | 2778 | { |
2523 | struct vcpu_svm *svm = to_svm(vcpu); | 2779 | struct vcpu_svm *svm = to_svm(vcpu); |
2524 | struct vmcb *vmcb = svm->vmcb; | 2780 | struct vmcb *vmcb = svm->vmcb; |
2525 | return !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && | 2781 | int ret; |
2526 | !(svm->vcpu.arch.hflags & HF_NMI_MASK); | 2782 | ret = !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && |
2783 | !(svm->vcpu.arch.hflags & HF_NMI_MASK); | ||
2784 | ret = ret && gif_set(svm) && nested_svm_nmi(svm); | ||
2785 | |||
2786 | return ret; | ||
2527 | } | 2787 | } |
2528 | 2788 | ||
2529 | static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) | 2789 | static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) |
@@ -2568,13 +2828,13 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) | |||
2568 | { | 2828 | { |
2569 | struct vcpu_svm *svm = to_svm(vcpu); | 2829 | struct vcpu_svm *svm = to_svm(vcpu); |
2570 | 2830 | ||
2571 | nested_svm_intr(svm); | 2831 | /* |
2572 | 2832 | * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes | |
2573 | /* In case GIF=0 we can't rely on the CPU to tell us when | 2833 | * 1, because that's a separate STGI/VMRUN intercept. The next time we |
2574 | * GIF becomes 1, because that's a separate STGI/VMRUN intercept. | 2834 | * get that intercept, this function will be called again though and |
2575 | * The next time we get that intercept, this function will be | 2835 | * we'll get the vintr intercept. |
2576 | * called again though and we'll get the vintr intercept. */ | 2836 | */ |
2577 | if (gif_set(svm)) { | 2837 | if (gif_set(svm) && nested_svm_intr(svm)) { |
2578 | svm_set_vintr(svm); | 2838 | svm_set_vintr(svm); |
2579 | svm_inject_irq(svm, 0x0); | 2839 | svm_inject_irq(svm, 0x0); |
2580 | } | 2840 | } |
@@ -2588,9 +2848,10 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) | |||
2588 | == HF_NMI_MASK) | 2848 | == HF_NMI_MASK) |
2589 | return; /* IRET will cause a vm exit */ | 2849 | return; /* IRET will cause a vm exit */ |
2590 | 2850 | ||
2591 | /* Something prevents NMI from been injected. Single step over | 2851 | /* |
2592 | possible problem (IRET or exception injection or interrupt | 2852 | * Something prevents NMI from been injected. Single step over possible |
2593 | shadow) */ | 2853 | * problem (IRET or exception injection or interrupt shadow) |
2854 | */ | ||
2594 | svm->nmi_singlestep = true; | 2855 | svm->nmi_singlestep = true; |
2595 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); | 2856 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); |
2596 | update_db_intercept(vcpu); | 2857 | update_db_intercept(vcpu); |
@@ -2614,6 +2875,9 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu) | |||
2614 | { | 2875 | { |
2615 | struct vcpu_svm *svm = to_svm(vcpu); | 2876 | struct vcpu_svm *svm = to_svm(vcpu); |
2616 | 2877 | ||
2878 | if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) | ||
2879 | return; | ||
2880 | |||
2617 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) { | 2881 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) { |
2618 | int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; | 2882 | int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; |
2619 | kvm_set_cr8(vcpu, cr8); | 2883 | kvm_set_cr8(vcpu, cr8); |
@@ -2625,6 +2889,9 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu) | |||
2625 | struct vcpu_svm *svm = to_svm(vcpu); | 2889 | struct vcpu_svm *svm = to_svm(vcpu); |
2626 | u64 cr8; | 2890 | u64 cr8; |
2627 | 2891 | ||
2892 | if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) | ||
2893 | return; | ||
2894 | |||
2628 | cr8 = kvm_get_cr8(vcpu); | 2895 | cr8 = kvm_get_cr8(vcpu); |
2629 | svm->vmcb->control.int_ctl &= ~V_TPR_MASK; | 2896 | svm->vmcb->control.int_ctl &= ~V_TPR_MASK; |
2630 | svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; | 2897 | svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; |
@@ -2635,6 +2902,9 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) | |||
2635 | u8 vector; | 2902 | u8 vector; |
2636 | int type; | 2903 | int type; |
2637 | u32 exitintinfo = svm->vmcb->control.exit_int_info; | 2904 | u32 exitintinfo = svm->vmcb->control.exit_int_info; |
2905 | unsigned int3_injected = svm->int3_injected; | ||
2906 | |||
2907 | svm->int3_injected = 0; | ||
2638 | 2908 | ||
2639 | if (svm->vcpu.arch.hflags & HF_IRET_MASK) | 2909 | if (svm->vcpu.arch.hflags & HF_IRET_MASK) |
2640 | svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); | 2910 | svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); |
@@ -2654,18 +2924,25 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) | |||
2654 | svm->vcpu.arch.nmi_injected = true; | 2924 | svm->vcpu.arch.nmi_injected = true; |
2655 | break; | 2925 | break; |
2656 | case SVM_EXITINTINFO_TYPE_EXEPT: | 2926 | case SVM_EXITINTINFO_TYPE_EXEPT: |
2657 | /* In case of software exception do not reinject an exception | 2927 | /* |
2658 | vector, but re-execute and instruction instead */ | 2928 | * In case of software exceptions, do not reinject the vector, |
2659 | if (is_nested(svm)) | 2929 | * but re-execute the instruction instead. Rewind RIP first |
2660 | break; | 2930 | * if we emulated INT3 before. |
2661 | if (kvm_exception_is_soft(vector)) | 2931 | */ |
2932 | if (kvm_exception_is_soft(vector)) { | ||
2933 | if (vector == BP_VECTOR && int3_injected && | ||
2934 | kvm_is_linear_rip(&svm->vcpu, svm->int3_rip)) | ||
2935 | kvm_rip_write(&svm->vcpu, | ||
2936 | kvm_rip_read(&svm->vcpu) - | ||
2937 | int3_injected); | ||
2662 | break; | 2938 | break; |
2939 | } | ||
2663 | if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) { | 2940 | if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) { |
2664 | u32 err = svm->vmcb->control.exit_int_info_err; | 2941 | u32 err = svm->vmcb->control.exit_int_info_err; |
2665 | kvm_queue_exception_e(&svm->vcpu, vector, err); | 2942 | kvm_requeue_exception_e(&svm->vcpu, vector, err); |
2666 | 2943 | ||
2667 | } else | 2944 | } else |
2668 | kvm_queue_exception(&svm->vcpu, vector); | 2945 | kvm_requeue_exception(&svm->vcpu, vector); |
2669 | break; | 2946 | break; |
2670 | case SVM_EXITINTINFO_TYPE_INTR: | 2947 | case SVM_EXITINTINFO_TYPE_INTR: |
2671 | kvm_queue_interrupt(&svm->vcpu, vector, false); | 2948 | kvm_queue_interrupt(&svm->vcpu, vector, false); |
@@ -2688,6 +2965,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
2688 | u16 gs_selector; | 2965 | u16 gs_selector; |
2689 | u16 ldt_selector; | 2966 | u16 ldt_selector; |
2690 | 2967 | ||
2968 | svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; | ||
2969 | svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; | ||
2970 | svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; | ||
2971 | |||
2691 | /* | 2972 | /* |
2692 | * A vmexit emulation is required before the vcpu can be executed | 2973 | * A vmexit emulation is required before the vcpu can be executed |
2693 | * again. | 2974 | * again. |
@@ -2695,10 +2976,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
2695 | if (unlikely(svm->nested.exit_required)) | 2976 | if (unlikely(svm->nested.exit_required)) |
2696 | return; | 2977 | return; |
2697 | 2978 | ||
2698 | svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; | ||
2699 | svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; | ||
2700 | svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; | ||
2701 | |||
2702 | pre_svm_run(svm); | 2979 | pre_svm_run(svm); |
2703 | 2980 | ||
2704 | sync_lapic_to_cr8(vcpu); | 2981 | sync_lapic_to_cr8(vcpu); |
@@ -2879,25 +3156,39 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu) | |||
2879 | { | 3156 | { |
2880 | } | 3157 | } |
2881 | 3158 | ||
3159 | static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | ||
3160 | { | ||
3161 | switch (func) { | ||
3162 | case 0x8000000A: | ||
3163 | entry->eax = 1; /* SVM revision 1 */ | ||
3164 | entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper | ||
3165 | ASID emulation to nested SVM */ | ||
3166 | entry->ecx = 0; /* Reserved */ | ||
3167 | entry->edx = 0; /* Do not support any additional features */ | ||
3168 | |||
3169 | break; | ||
3170 | } | ||
3171 | } | ||
3172 | |||
2882 | static const struct trace_print_flags svm_exit_reasons_str[] = { | 3173 | static const struct trace_print_flags svm_exit_reasons_str[] = { |
2883 | { SVM_EXIT_READ_CR0, "read_cr0" }, | 3174 | { SVM_EXIT_READ_CR0, "read_cr0" }, |
2884 | { SVM_EXIT_READ_CR3, "read_cr3" }, | 3175 | { SVM_EXIT_READ_CR3, "read_cr3" }, |
2885 | { SVM_EXIT_READ_CR4, "read_cr4" }, | 3176 | { SVM_EXIT_READ_CR4, "read_cr4" }, |
2886 | { SVM_EXIT_READ_CR8, "read_cr8" }, | 3177 | { SVM_EXIT_READ_CR8, "read_cr8" }, |
2887 | { SVM_EXIT_WRITE_CR0, "write_cr0" }, | 3178 | { SVM_EXIT_WRITE_CR0, "write_cr0" }, |
2888 | { SVM_EXIT_WRITE_CR3, "write_cr3" }, | 3179 | { SVM_EXIT_WRITE_CR3, "write_cr3" }, |
2889 | { SVM_EXIT_WRITE_CR4, "write_cr4" }, | 3180 | { SVM_EXIT_WRITE_CR4, "write_cr4" }, |
2890 | { SVM_EXIT_WRITE_CR8, "write_cr8" }, | 3181 | { SVM_EXIT_WRITE_CR8, "write_cr8" }, |
2891 | { SVM_EXIT_READ_DR0, "read_dr0" }, | 3182 | { SVM_EXIT_READ_DR0, "read_dr0" }, |
2892 | { SVM_EXIT_READ_DR1, "read_dr1" }, | 3183 | { SVM_EXIT_READ_DR1, "read_dr1" }, |
2893 | { SVM_EXIT_READ_DR2, "read_dr2" }, | 3184 | { SVM_EXIT_READ_DR2, "read_dr2" }, |
2894 | { SVM_EXIT_READ_DR3, "read_dr3" }, | 3185 | { SVM_EXIT_READ_DR3, "read_dr3" }, |
2895 | { SVM_EXIT_WRITE_DR0, "write_dr0" }, | 3186 | { SVM_EXIT_WRITE_DR0, "write_dr0" }, |
2896 | { SVM_EXIT_WRITE_DR1, "write_dr1" }, | 3187 | { SVM_EXIT_WRITE_DR1, "write_dr1" }, |
2897 | { SVM_EXIT_WRITE_DR2, "write_dr2" }, | 3188 | { SVM_EXIT_WRITE_DR2, "write_dr2" }, |
2898 | { SVM_EXIT_WRITE_DR3, "write_dr3" }, | 3189 | { SVM_EXIT_WRITE_DR3, "write_dr3" }, |
2899 | { SVM_EXIT_WRITE_DR5, "write_dr5" }, | 3190 | { SVM_EXIT_WRITE_DR5, "write_dr5" }, |
2900 | { SVM_EXIT_WRITE_DR7, "write_dr7" }, | 3191 | { SVM_EXIT_WRITE_DR7, "write_dr7" }, |
2901 | { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, | 3192 | { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, |
2902 | { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, | 3193 | { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, |
2903 | { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, | 3194 | { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, |
@@ -2946,8 +3237,10 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) | |||
2946 | { | 3237 | { |
2947 | struct vcpu_svm *svm = to_svm(vcpu); | 3238 | struct vcpu_svm *svm = to_svm(vcpu); |
2948 | 3239 | ||
2949 | update_cr0_intercept(svm); | ||
2950 | svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR; | 3240 | svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR; |
3241 | if (is_nested(svm)) | ||
3242 | svm->nested.hsave->control.intercept_exceptions |= 1 << NM_VECTOR; | ||
3243 | update_cr0_intercept(svm); | ||
2951 | } | 3244 | } |
2952 | 3245 | ||
2953 | static struct kvm_x86_ops svm_x86_ops = { | 3246 | static struct kvm_x86_ops svm_x86_ops = { |
@@ -2986,8 +3279,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
2986 | .set_idt = svm_set_idt, | 3279 | .set_idt = svm_set_idt, |
2987 | .get_gdt = svm_get_gdt, | 3280 | .get_gdt = svm_get_gdt, |
2988 | .set_gdt = svm_set_gdt, | 3281 | .set_gdt = svm_set_gdt, |
2989 | .get_dr = svm_get_dr, | 3282 | .set_dr7 = svm_set_dr7, |
2990 | .set_dr = svm_set_dr, | ||
2991 | .cache_reg = svm_cache_reg, | 3283 | .cache_reg = svm_cache_reg, |
2992 | .get_rflags = svm_get_rflags, | 3284 | .get_rflags = svm_get_rflags, |
2993 | .set_rflags = svm_set_rflags, | 3285 | .set_rflags = svm_set_rflags, |
@@ -3023,12 +3315,14 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
3023 | .cpuid_update = svm_cpuid_update, | 3315 | .cpuid_update = svm_cpuid_update, |
3024 | 3316 | ||
3025 | .rdtscp_supported = svm_rdtscp_supported, | 3317 | .rdtscp_supported = svm_rdtscp_supported, |
3318 | |||
3319 | .set_supported_cpuid = svm_set_supported_cpuid, | ||
3026 | }; | 3320 | }; |
3027 | 3321 | ||
3028 | static int __init svm_init(void) | 3322 | static int __init svm_init(void) |
3029 | { | 3323 | { |
3030 | return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm), | 3324 | return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm), |
3031 | THIS_MODULE); | 3325 | __alignof__(struct vcpu_svm), THIS_MODULE); |
3032 | } | 3326 | } |
3033 | 3327 | ||
3034 | static void __exit svm_exit(void) | 3328 | static void __exit svm_exit(void) |
diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c index eea40439066c..4ddadb1a5ffe 100644 --- a/arch/x86/kvm/timer.c +++ b/arch/x86/kvm/timer.c | |||
@@ -12,7 +12,8 @@ static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer) | |||
12 | /* | 12 | /* |
13 | * There is a race window between reading and incrementing, but we do | 13 | * There is a race window between reading and incrementing, but we do |
14 | * not care about potentially loosing timer events in the !reinject | 14 | * not care about potentially loosing timer events in the !reinject |
15 | * case anyway. | 15 | * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked |
16 | * in vcpu_enter_guest. | ||
16 | */ | 17 | */ |
17 | if (ktimer->reinject || !atomic_read(&ktimer->pending)) { | 18 | if (ktimer->reinject || !atomic_read(&ktimer->pending)) { |
18 | atomic_inc(&ktimer->pending); | 19 | atomic_inc(&ktimer->pending); |
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 6ad30a29f044..a6544b8e7c0f 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
@@ -5,8 +5,6 @@ | |||
5 | 5 | ||
6 | #undef TRACE_SYSTEM | 6 | #undef TRACE_SYSTEM |
7 | #define TRACE_SYSTEM kvm | 7 | #define TRACE_SYSTEM kvm |
8 | #define TRACE_INCLUDE_PATH arch/x86/kvm | ||
9 | #define TRACE_INCLUDE_FILE trace | ||
10 | 8 | ||
11 | /* | 9 | /* |
12 | * Tracepoint for guest mode entry. | 10 | * Tracepoint for guest mode entry. |
@@ -184,8 +182,8 @@ TRACE_EVENT(kvm_apic, | |||
184 | * Tracepoint for kvm guest exit: | 182 | * Tracepoint for kvm guest exit: |
185 | */ | 183 | */ |
186 | TRACE_EVENT(kvm_exit, | 184 | TRACE_EVENT(kvm_exit, |
187 | TP_PROTO(unsigned int exit_reason, unsigned long guest_rip), | 185 | TP_PROTO(unsigned int exit_reason, struct kvm_vcpu *vcpu), |
188 | TP_ARGS(exit_reason, guest_rip), | 186 | TP_ARGS(exit_reason, vcpu), |
189 | 187 | ||
190 | TP_STRUCT__entry( | 188 | TP_STRUCT__entry( |
191 | __field( unsigned int, exit_reason ) | 189 | __field( unsigned int, exit_reason ) |
@@ -194,7 +192,7 @@ TRACE_EVENT(kvm_exit, | |||
194 | 192 | ||
195 | TP_fast_assign( | 193 | TP_fast_assign( |
196 | __entry->exit_reason = exit_reason; | 194 | __entry->exit_reason = exit_reason; |
197 | __entry->guest_rip = guest_rip; | 195 | __entry->guest_rip = kvm_rip_read(vcpu); |
198 | ), | 196 | ), |
199 | 197 | ||
200 | TP_printk("reason %s rip 0x%lx", | 198 | TP_printk("reason %s rip 0x%lx", |
@@ -221,6 +219,38 @@ TRACE_EVENT(kvm_inj_virq, | |||
221 | TP_printk("irq %u", __entry->irq) | 219 | TP_printk("irq %u", __entry->irq) |
222 | ); | 220 | ); |
223 | 221 | ||
222 | #define EXS(x) { x##_VECTOR, "#" #x } | ||
223 | |||
224 | #define kvm_trace_sym_exc \ | ||
225 | EXS(DE), EXS(DB), EXS(BP), EXS(OF), EXS(BR), EXS(UD), EXS(NM), \ | ||
226 | EXS(DF), EXS(TS), EXS(NP), EXS(SS), EXS(GP), EXS(PF), \ | ||
227 | EXS(MF), EXS(MC) | ||
228 | |||
229 | /* | ||
230 | * Tracepoint for kvm interrupt injection: | ||
231 | */ | ||
232 | TRACE_EVENT(kvm_inj_exception, | ||
233 | TP_PROTO(unsigned exception, bool has_error, unsigned error_code), | ||
234 | TP_ARGS(exception, has_error, error_code), | ||
235 | |||
236 | TP_STRUCT__entry( | ||
237 | __field( u8, exception ) | ||
238 | __field( u8, has_error ) | ||
239 | __field( u32, error_code ) | ||
240 | ), | ||
241 | |||
242 | TP_fast_assign( | ||
243 | __entry->exception = exception; | ||
244 | __entry->has_error = has_error; | ||
245 | __entry->error_code = error_code; | ||
246 | ), | ||
247 | |||
248 | TP_printk("%s (0x%x)", | ||
249 | __print_symbolic(__entry->exception, kvm_trace_sym_exc), | ||
250 | /* FIXME: don't print error_code if not present */ | ||
251 | __entry->has_error ? __entry->error_code : 0) | ||
252 | ); | ||
253 | |||
224 | /* | 254 | /* |
225 | * Tracepoint for page fault. | 255 | * Tracepoint for page fault. |
226 | */ | 256 | */ |
@@ -413,12 +443,34 @@ TRACE_EVENT(kvm_nested_vmrun, | |||
413 | ), | 443 | ), |
414 | 444 | ||
415 | TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x " | 445 | TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x " |
416 | "event_inj: 0x%08x npt: %s\n", | 446 | "event_inj: 0x%08x npt: %s", |
417 | __entry->rip, __entry->vmcb, __entry->nested_rip, | 447 | __entry->rip, __entry->vmcb, __entry->nested_rip, |
418 | __entry->int_ctl, __entry->event_inj, | 448 | __entry->int_ctl, __entry->event_inj, |
419 | __entry->npt ? "on" : "off") | 449 | __entry->npt ? "on" : "off") |
420 | ); | 450 | ); |
421 | 451 | ||
452 | TRACE_EVENT(kvm_nested_intercepts, | ||
453 | TP_PROTO(__u16 cr_read, __u16 cr_write, __u32 exceptions, __u64 intercept), | ||
454 | TP_ARGS(cr_read, cr_write, exceptions, intercept), | ||
455 | |||
456 | TP_STRUCT__entry( | ||
457 | __field( __u16, cr_read ) | ||
458 | __field( __u16, cr_write ) | ||
459 | __field( __u32, exceptions ) | ||
460 | __field( __u64, intercept ) | ||
461 | ), | ||
462 | |||
463 | TP_fast_assign( | ||
464 | __entry->cr_read = cr_read; | ||
465 | __entry->cr_write = cr_write; | ||
466 | __entry->exceptions = exceptions; | ||
467 | __entry->intercept = intercept; | ||
468 | ), | ||
469 | |||
470 | TP_printk("cr_read: %04x cr_write: %04x excp: %08x intercept: %016llx", | ||
471 | __entry->cr_read, __entry->cr_write, __entry->exceptions, | ||
472 | __entry->intercept) | ||
473 | ); | ||
422 | /* | 474 | /* |
423 | * Tracepoint for #VMEXIT while nested | 475 | * Tracepoint for #VMEXIT while nested |
424 | */ | 476 | */ |
@@ -447,7 +499,7 @@ TRACE_EVENT(kvm_nested_vmexit, | |||
447 | __entry->exit_int_info_err = exit_int_info_err; | 499 | __entry->exit_int_info_err = exit_int_info_err; |
448 | ), | 500 | ), |
449 | TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx " | 501 | TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx " |
450 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", | 502 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", |
451 | __entry->rip, | 503 | __entry->rip, |
452 | ftrace_print_symbols_seq(p, __entry->exit_code, | 504 | ftrace_print_symbols_seq(p, __entry->exit_code, |
453 | kvm_x86_ops->exit_reasons_str), | 505 | kvm_x86_ops->exit_reasons_str), |
@@ -482,7 +534,7 @@ TRACE_EVENT(kvm_nested_vmexit_inject, | |||
482 | ), | 534 | ), |
483 | 535 | ||
484 | TP_printk("reason: %s ext_inf1: 0x%016llx " | 536 | TP_printk("reason: %s ext_inf1: 0x%016llx " |
485 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", | 537 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", |
486 | ftrace_print_symbols_seq(p, __entry->exit_code, | 538 | ftrace_print_symbols_seq(p, __entry->exit_code, |
487 | kvm_x86_ops->exit_reasons_str), | 539 | kvm_x86_ops->exit_reasons_str), |
488 | __entry->exit_info1, __entry->exit_info2, | 540 | __entry->exit_info1, __entry->exit_info2, |
@@ -504,7 +556,7 @@ TRACE_EVENT(kvm_nested_intr_vmexit, | |||
504 | __entry->rip = rip | 556 | __entry->rip = rip |
505 | ), | 557 | ), |
506 | 558 | ||
507 | TP_printk("rip: 0x%016llx\n", __entry->rip) | 559 | TP_printk("rip: 0x%016llx", __entry->rip) |
508 | ); | 560 | ); |
509 | 561 | ||
510 | /* | 562 | /* |
@@ -526,7 +578,7 @@ TRACE_EVENT(kvm_invlpga, | |||
526 | __entry->address = address; | 578 | __entry->address = address; |
527 | ), | 579 | ), |
528 | 580 | ||
529 | TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx\n", | 581 | TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx", |
530 | __entry->rip, __entry->asid, __entry->address) | 582 | __entry->rip, __entry->asid, __entry->address) |
531 | ); | 583 | ); |
532 | 584 | ||
@@ -547,11 +599,102 @@ TRACE_EVENT(kvm_skinit, | |||
547 | __entry->slb = slb; | 599 | __entry->slb = slb; |
548 | ), | 600 | ), |
549 | 601 | ||
550 | TP_printk("rip: 0x%016llx slb: 0x%08x\n", | 602 | TP_printk("rip: 0x%016llx slb: 0x%08x", |
551 | __entry->rip, __entry->slb) | 603 | __entry->rip, __entry->slb) |
552 | ); | 604 | ); |
553 | 605 | ||
606 | #define __print_insn(insn, ilen) ({ \ | ||
607 | int i; \ | ||
608 | const char *ret = p->buffer + p->len; \ | ||
609 | \ | ||
610 | for (i = 0; i < ilen; ++i) \ | ||
611 | trace_seq_printf(p, " %02x", insn[i]); \ | ||
612 | trace_seq_printf(p, "%c", 0); \ | ||
613 | ret; \ | ||
614 | }) | ||
615 | |||
616 | #define KVM_EMUL_INSN_F_CR0_PE (1 << 0) | ||
617 | #define KVM_EMUL_INSN_F_EFL_VM (1 << 1) | ||
618 | #define KVM_EMUL_INSN_F_CS_D (1 << 2) | ||
619 | #define KVM_EMUL_INSN_F_CS_L (1 << 3) | ||
620 | |||
621 | #define kvm_trace_symbol_emul_flags \ | ||
622 | { 0, "real" }, \ | ||
623 | { KVM_EMUL_INSN_F_CR0_PE \ | ||
624 | | KVM_EMUL_INSN_F_EFL_VM, "vm16" }, \ | ||
625 | { KVM_EMUL_INSN_F_CR0_PE, "prot16" }, \ | ||
626 | { KVM_EMUL_INSN_F_CR0_PE \ | ||
627 | | KVM_EMUL_INSN_F_CS_D, "prot32" }, \ | ||
628 | { KVM_EMUL_INSN_F_CR0_PE \ | ||
629 | | KVM_EMUL_INSN_F_CS_L, "prot64" } | ||
630 | |||
631 | #define kei_decode_mode(mode) ({ \ | ||
632 | u8 flags = 0xff; \ | ||
633 | switch (mode) { \ | ||
634 | case X86EMUL_MODE_REAL: \ | ||
635 | flags = 0; \ | ||
636 | break; \ | ||
637 | case X86EMUL_MODE_VM86: \ | ||
638 | flags = KVM_EMUL_INSN_F_EFL_VM; \ | ||
639 | break; \ | ||
640 | case X86EMUL_MODE_PROT16: \ | ||
641 | flags = KVM_EMUL_INSN_F_CR0_PE; \ | ||
642 | break; \ | ||
643 | case X86EMUL_MODE_PROT32: \ | ||
644 | flags = KVM_EMUL_INSN_F_CR0_PE \ | ||
645 | | KVM_EMUL_INSN_F_CS_D; \ | ||
646 | break; \ | ||
647 | case X86EMUL_MODE_PROT64: \ | ||
648 | flags = KVM_EMUL_INSN_F_CR0_PE \ | ||
649 | | KVM_EMUL_INSN_F_CS_L; \ | ||
650 | break; \ | ||
651 | } \ | ||
652 | flags; \ | ||
653 | }) | ||
654 | |||
655 | TRACE_EVENT(kvm_emulate_insn, | ||
656 | TP_PROTO(struct kvm_vcpu *vcpu, __u8 failed), | ||
657 | TP_ARGS(vcpu, failed), | ||
658 | |||
659 | TP_STRUCT__entry( | ||
660 | __field( __u64, rip ) | ||
661 | __field( __u32, csbase ) | ||
662 | __field( __u8, len ) | ||
663 | __array( __u8, insn, 15 ) | ||
664 | __field( __u8, flags ) | ||
665 | __field( __u8, failed ) | ||
666 | ), | ||
667 | |||
668 | TP_fast_assign( | ||
669 | __entry->rip = vcpu->arch.emulate_ctxt.decode.fetch.start; | ||
670 | __entry->csbase = kvm_x86_ops->get_segment_base(vcpu, VCPU_SREG_CS); | ||
671 | __entry->len = vcpu->arch.emulate_ctxt.decode.eip | ||
672 | - vcpu->arch.emulate_ctxt.decode.fetch.start; | ||
673 | memcpy(__entry->insn, | ||
674 | vcpu->arch.emulate_ctxt.decode.fetch.data, | ||
675 | 15); | ||
676 | __entry->flags = kei_decode_mode(vcpu->arch.emulate_ctxt.mode); | ||
677 | __entry->failed = failed; | ||
678 | ), | ||
679 | |||
680 | TP_printk("%x:%llx:%s (%s)%s", | ||
681 | __entry->csbase, __entry->rip, | ||
682 | __print_insn(__entry->insn, __entry->len), | ||
683 | __print_symbolic(__entry->flags, | ||
684 | kvm_trace_symbol_emul_flags), | ||
685 | __entry->failed ? " failed" : "" | ||
686 | ) | ||
687 | ); | ||
688 | |||
689 | #define trace_kvm_emulate_insn_start(vcpu) trace_kvm_emulate_insn(vcpu, 0) | ||
690 | #define trace_kvm_emulate_insn_failed(vcpu) trace_kvm_emulate_insn(vcpu, 1) | ||
691 | |||
554 | #endif /* _TRACE_KVM_H */ | 692 | #endif /* _TRACE_KVM_H */ |
555 | 693 | ||
694 | #undef TRACE_INCLUDE_PATH | ||
695 | #define TRACE_INCLUDE_PATH arch/x86/kvm | ||
696 | #undef TRACE_INCLUDE_FILE | ||
697 | #define TRACE_INCLUDE_FILE trace | ||
698 | |||
556 | /* This part must be outside protection */ | 699 | /* This part must be outside protection */ |
557 | #include <trace/define_trace.h> | 700 | #include <trace/define_trace.h> |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index edca080407a5..859a01a07dbf 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/moduleparam.h> | 27 | #include <linux/moduleparam.h> |
28 | #include <linux/ftrace_event.h> | 28 | #include <linux/ftrace_event.h> |
29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
30 | #include <linux/tboot.h> | ||
30 | #include "kvm_cache_regs.h" | 31 | #include "kvm_cache_regs.h" |
31 | #include "x86.h" | 32 | #include "x86.h" |
32 | 33 | ||
@@ -98,6 +99,8 @@ module_param(ple_gap, int, S_IRUGO); | |||
98 | static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; | 99 | static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; |
99 | module_param(ple_window, int, S_IRUGO); | 100 | module_param(ple_window, int, S_IRUGO); |
100 | 101 | ||
102 | #define NR_AUTOLOAD_MSRS 1 | ||
103 | |||
101 | struct vmcs { | 104 | struct vmcs { |
102 | u32 revision_id; | 105 | u32 revision_id; |
103 | u32 abort; | 106 | u32 abort; |
@@ -125,6 +128,11 @@ struct vcpu_vmx { | |||
125 | u64 msr_guest_kernel_gs_base; | 128 | u64 msr_guest_kernel_gs_base; |
126 | #endif | 129 | #endif |
127 | struct vmcs *vmcs; | 130 | struct vmcs *vmcs; |
131 | struct msr_autoload { | ||
132 | unsigned nr; | ||
133 | struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS]; | ||
134 | struct vmx_msr_entry host[NR_AUTOLOAD_MSRS]; | ||
135 | } msr_autoload; | ||
128 | struct { | 136 | struct { |
129 | int loaded; | 137 | int loaded; |
130 | u16 fs_sel, gs_sel, ldt_sel; | 138 | u16 fs_sel, gs_sel, ldt_sel; |
@@ -234,56 +242,56 @@ static const u32 vmx_msr_index[] = { | |||
234 | }; | 242 | }; |
235 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) | 243 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) |
236 | 244 | ||
237 | static inline int is_page_fault(u32 intr_info) | 245 | static inline bool is_page_fault(u32 intr_info) |
238 | { | 246 | { |
239 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 247 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
240 | INTR_INFO_VALID_MASK)) == | 248 | INTR_INFO_VALID_MASK)) == |
241 | (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); | 249 | (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); |
242 | } | 250 | } |
243 | 251 | ||
244 | static inline int is_no_device(u32 intr_info) | 252 | static inline bool is_no_device(u32 intr_info) |
245 | { | 253 | { |
246 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 254 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
247 | INTR_INFO_VALID_MASK)) == | 255 | INTR_INFO_VALID_MASK)) == |
248 | (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); | 256 | (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); |
249 | } | 257 | } |
250 | 258 | ||
251 | static inline int is_invalid_opcode(u32 intr_info) | 259 | static inline bool is_invalid_opcode(u32 intr_info) |
252 | { | 260 | { |
253 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 261 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
254 | INTR_INFO_VALID_MASK)) == | 262 | INTR_INFO_VALID_MASK)) == |
255 | (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK); | 263 | (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK); |
256 | } | 264 | } |
257 | 265 | ||
258 | static inline int is_external_interrupt(u32 intr_info) | 266 | static inline bool is_external_interrupt(u32 intr_info) |
259 | { | 267 | { |
260 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) | 268 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) |
261 | == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); | 269 | == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); |
262 | } | 270 | } |
263 | 271 | ||
264 | static inline int is_machine_check(u32 intr_info) | 272 | static inline bool is_machine_check(u32 intr_info) |
265 | { | 273 | { |
266 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 274 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
267 | INTR_INFO_VALID_MASK)) == | 275 | INTR_INFO_VALID_MASK)) == |
268 | (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK); | 276 | (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK); |
269 | } | 277 | } |
270 | 278 | ||
271 | static inline int cpu_has_vmx_msr_bitmap(void) | 279 | static inline bool cpu_has_vmx_msr_bitmap(void) |
272 | { | 280 | { |
273 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS; | 281 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS; |
274 | } | 282 | } |
275 | 283 | ||
276 | static inline int cpu_has_vmx_tpr_shadow(void) | 284 | static inline bool cpu_has_vmx_tpr_shadow(void) |
277 | { | 285 | { |
278 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW; | 286 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW; |
279 | } | 287 | } |
280 | 288 | ||
281 | static inline int vm_need_tpr_shadow(struct kvm *kvm) | 289 | static inline bool vm_need_tpr_shadow(struct kvm *kvm) |
282 | { | 290 | { |
283 | return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)); | 291 | return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)); |
284 | } | 292 | } |
285 | 293 | ||
286 | static inline int cpu_has_secondary_exec_ctrls(void) | 294 | static inline bool cpu_has_secondary_exec_ctrls(void) |
287 | { | 295 | { |
288 | return vmcs_config.cpu_based_exec_ctrl & | 296 | return vmcs_config.cpu_based_exec_ctrl & |
289 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; | 297 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; |
@@ -303,80 +311,80 @@ static inline bool cpu_has_vmx_flexpriority(void) | |||
303 | 311 | ||
304 | static inline bool cpu_has_vmx_ept_execute_only(void) | 312 | static inline bool cpu_has_vmx_ept_execute_only(void) |
305 | { | 313 | { |
306 | return !!(vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT); | 314 | return vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT; |
307 | } | 315 | } |
308 | 316 | ||
309 | static inline bool cpu_has_vmx_eptp_uncacheable(void) | 317 | static inline bool cpu_has_vmx_eptp_uncacheable(void) |
310 | { | 318 | { |
311 | return !!(vmx_capability.ept & VMX_EPTP_UC_BIT); | 319 | return vmx_capability.ept & VMX_EPTP_UC_BIT; |
312 | } | 320 | } |
313 | 321 | ||
314 | static inline bool cpu_has_vmx_eptp_writeback(void) | 322 | static inline bool cpu_has_vmx_eptp_writeback(void) |
315 | { | 323 | { |
316 | return !!(vmx_capability.ept & VMX_EPTP_WB_BIT); | 324 | return vmx_capability.ept & VMX_EPTP_WB_BIT; |
317 | } | 325 | } |
318 | 326 | ||
319 | static inline bool cpu_has_vmx_ept_2m_page(void) | 327 | static inline bool cpu_has_vmx_ept_2m_page(void) |
320 | { | 328 | { |
321 | return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT); | 329 | return vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT; |
322 | } | 330 | } |
323 | 331 | ||
324 | static inline bool cpu_has_vmx_ept_1g_page(void) | 332 | static inline bool cpu_has_vmx_ept_1g_page(void) |
325 | { | 333 | { |
326 | return !!(vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT); | 334 | return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT; |
327 | } | 335 | } |
328 | 336 | ||
329 | static inline int cpu_has_vmx_invept_individual_addr(void) | 337 | static inline bool cpu_has_vmx_invept_individual_addr(void) |
330 | { | 338 | { |
331 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT); | 339 | return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT; |
332 | } | 340 | } |
333 | 341 | ||
334 | static inline int cpu_has_vmx_invept_context(void) | 342 | static inline bool cpu_has_vmx_invept_context(void) |
335 | { | 343 | { |
336 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT); | 344 | return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT; |
337 | } | 345 | } |
338 | 346 | ||
339 | static inline int cpu_has_vmx_invept_global(void) | 347 | static inline bool cpu_has_vmx_invept_global(void) |
340 | { | 348 | { |
341 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT); | 349 | return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT; |
342 | } | 350 | } |
343 | 351 | ||
344 | static inline int cpu_has_vmx_ept(void) | 352 | static inline bool cpu_has_vmx_ept(void) |
345 | { | 353 | { |
346 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 354 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
347 | SECONDARY_EXEC_ENABLE_EPT; | 355 | SECONDARY_EXEC_ENABLE_EPT; |
348 | } | 356 | } |
349 | 357 | ||
350 | static inline int cpu_has_vmx_unrestricted_guest(void) | 358 | static inline bool cpu_has_vmx_unrestricted_guest(void) |
351 | { | 359 | { |
352 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 360 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
353 | SECONDARY_EXEC_UNRESTRICTED_GUEST; | 361 | SECONDARY_EXEC_UNRESTRICTED_GUEST; |
354 | } | 362 | } |
355 | 363 | ||
356 | static inline int cpu_has_vmx_ple(void) | 364 | static inline bool cpu_has_vmx_ple(void) |
357 | { | 365 | { |
358 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 366 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
359 | SECONDARY_EXEC_PAUSE_LOOP_EXITING; | 367 | SECONDARY_EXEC_PAUSE_LOOP_EXITING; |
360 | } | 368 | } |
361 | 369 | ||
362 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) | 370 | static inline bool vm_need_virtualize_apic_accesses(struct kvm *kvm) |
363 | { | 371 | { |
364 | return flexpriority_enabled && irqchip_in_kernel(kvm); | 372 | return flexpriority_enabled && irqchip_in_kernel(kvm); |
365 | } | 373 | } |
366 | 374 | ||
367 | static inline int cpu_has_vmx_vpid(void) | 375 | static inline bool cpu_has_vmx_vpid(void) |
368 | { | 376 | { |
369 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 377 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
370 | SECONDARY_EXEC_ENABLE_VPID; | 378 | SECONDARY_EXEC_ENABLE_VPID; |
371 | } | 379 | } |
372 | 380 | ||
373 | static inline int cpu_has_vmx_rdtscp(void) | 381 | static inline bool cpu_has_vmx_rdtscp(void) |
374 | { | 382 | { |
375 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 383 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
376 | SECONDARY_EXEC_RDTSCP; | 384 | SECONDARY_EXEC_RDTSCP; |
377 | } | 385 | } |
378 | 386 | ||
379 | static inline int cpu_has_virtual_nmis(void) | 387 | static inline bool cpu_has_virtual_nmis(void) |
380 | { | 388 | { |
381 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; | 389 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; |
382 | } | 390 | } |
@@ -595,16 +603,56 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | |||
595 | vmcs_write32(EXCEPTION_BITMAP, eb); | 603 | vmcs_write32(EXCEPTION_BITMAP, eb); |
596 | } | 604 | } |
597 | 605 | ||
606 | static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) | ||
607 | { | ||
608 | unsigned i; | ||
609 | struct msr_autoload *m = &vmx->msr_autoload; | ||
610 | |||
611 | for (i = 0; i < m->nr; ++i) | ||
612 | if (m->guest[i].index == msr) | ||
613 | break; | ||
614 | |||
615 | if (i == m->nr) | ||
616 | return; | ||
617 | --m->nr; | ||
618 | m->guest[i] = m->guest[m->nr]; | ||
619 | m->host[i] = m->host[m->nr]; | ||
620 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr); | ||
621 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr); | ||
622 | } | ||
623 | |||
624 | static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, | ||
625 | u64 guest_val, u64 host_val) | ||
626 | { | ||
627 | unsigned i; | ||
628 | struct msr_autoload *m = &vmx->msr_autoload; | ||
629 | |||
630 | for (i = 0; i < m->nr; ++i) | ||
631 | if (m->guest[i].index == msr) | ||
632 | break; | ||
633 | |||
634 | if (i == m->nr) { | ||
635 | ++m->nr; | ||
636 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr); | ||
637 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr); | ||
638 | } | ||
639 | |||
640 | m->guest[i].index = msr; | ||
641 | m->guest[i].value = guest_val; | ||
642 | m->host[i].index = msr; | ||
643 | m->host[i].value = host_val; | ||
644 | } | ||
645 | |||
598 | static void reload_tss(void) | 646 | static void reload_tss(void) |
599 | { | 647 | { |
600 | /* | 648 | /* |
601 | * VT restores TR but not its size. Useless. | 649 | * VT restores TR but not its size. Useless. |
602 | */ | 650 | */ |
603 | struct descriptor_table gdt; | 651 | struct desc_ptr gdt; |
604 | struct desc_struct *descs; | 652 | struct desc_struct *descs; |
605 | 653 | ||
606 | kvm_get_gdt(&gdt); | 654 | native_store_gdt(&gdt); |
607 | descs = (void *)gdt.base; | 655 | descs = (void *)gdt.address; |
608 | descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ | 656 | descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ |
609 | load_TR_desc(); | 657 | load_TR_desc(); |
610 | } | 658 | } |
@@ -631,9 +679,57 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) | |||
631 | guest_efer |= host_efer & ignore_bits; | 679 | guest_efer |= host_efer & ignore_bits; |
632 | vmx->guest_msrs[efer_offset].data = guest_efer; | 680 | vmx->guest_msrs[efer_offset].data = guest_efer; |
633 | vmx->guest_msrs[efer_offset].mask = ~ignore_bits; | 681 | vmx->guest_msrs[efer_offset].mask = ~ignore_bits; |
682 | |||
683 | clear_atomic_switch_msr(vmx, MSR_EFER); | ||
684 | /* On ept, can't emulate nx, and must switch nx atomically */ | ||
685 | if (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX)) { | ||
686 | guest_efer = vmx->vcpu.arch.efer; | ||
687 | if (!(guest_efer & EFER_LMA)) | ||
688 | guest_efer &= ~EFER_LME; | ||
689 | add_atomic_switch_msr(vmx, MSR_EFER, guest_efer, host_efer); | ||
690 | return false; | ||
691 | } | ||
692 | |||
634 | return true; | 693 | return true; |
635 | } | 694 | } |
636 | 695 | ||
696 | static unsigned long segment_base(u16 selector) | ||
697 | { | ||
698 | struct desc_ptr gdt; | ||
699 | struct desc_struct *d; | ||
700 | unsigned long table_base; | ||
701 | unsigned long v; | ||
702 | |||
703 | if (!(selector & ~3)) | ||
704 | return 0; | ||
705 | |||
706 | native_store_gdt(&gdt); | ||
707 | table_base = gdt.address; | ||
708 | |||
709 | if (selector & 4) { /* from ldt */ | ||
710 | u16 ldt_selector = kvm_read_ldt(); | ||
711 | |||
712 | if (!(ldt_selector & ~3)) | ||
713 | return 0; | ||
714 | |||
715 | table_base = segment_base(ldt_selector); | ||
716 | } | ||
717 | d = (struct desc_struct *)(table_base + (selector & ~7)); | ||
718 | v = get_desc_base(d); | ||
719 | #ifdef CONFIG_X86_64 | ||
720 | if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11)) | ||
721 | v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32; | ||
722 | #endif | ||
723 | return v; | ||
724 | } | ||
725 | |||
726 | static inline unsigned long kvm_read_tr_base(void) | ||
727 | { | ||
728 | u16 tr; | ||
729 | asm("str %0" : "=g"(tr)); | ||
730 | return segment_base(tr); | ||
731 | } | ||
732 | |||
637 | static void vmx_save_host_state(struct kvm_vcpu *vcpu) | 733 | static void vmx_save_host_state(struct kvm_vcpu *vcpu) |
638 | { | 734 | { |
639 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 735 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
@@ -758,7 +854,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
758 | } | 854 | } |
759 | 855 | ||
760 | if (vcpu->cpu != cpu) { | 856 | if (vcpu->cpu != cpu) { |
761 | struct descriptor_table dt; | 857 | struct desc_ptr dt; |
762 | unsigned long sysenter_esp; | 858 | unsigned long sysenter_esp; |
763 | 859 | ||
764 | vcpu->cpu = cpu; | 860 | vcpu->cpu = cpu; |
@@ -767,8 +863,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
767 | * processors. | 863 | * processors. |
768 | */ | 864 | */ |
769 | vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */ | 865 | vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */ |
770 | kvm_get_gdt(&dt); | 866 | native_store_gdt(&dt); |
771 | vmcs_writel(HOST_GDTR_BASE, dt.base); /* 22.2.4 */ | 867 | vmcs_writel(HOST_GDTR_BASE, dt.address); /* 22.2.4 */ |
772 | 868 | ||
773 | rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); | 869 | rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); |
774 | vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ | 870 | vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ |
@@ -846,9 +942,9 @@ static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) | |||
846 | int ret = 0; | 942 | int ret = 0; |
847 | 943 | ||
848 | if (interruptibility & GUEST_INTR_STATE_STI) | 944 | if (interruptibility & GUEST_INTR_STATE_STI) |
849 | ret |= X86_SHADOW_INT_STI; | 945 | ret |= KVM_X86_SHADOW_INT_STI; |
850 | if (interruptibility & GUEST_INTR_STATE_MOV_SS) | 946 | if (interruptibility & GUEST_INTR_STATE_MOV_SS) |
851 | ret |= X86_SHADOW_INT_MOV_SS; | 947 | ret |= KVM_X86_SHADOW_INT_MOV_SS; |
852 | 948 | ||
853 | return ret & mask; | 949 | return ret & mask; |
854 | } | 950 | } |
@@ -860,9 +956,9 @@ static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) | |||
860 | 956 | ||
861 | interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS); | 957 | interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS); |
862 | 958 | ||
863 | if (mask & X86_SHADOW_INT_MOV_SS) | 959 | if (mask & KVM_X86_SHADOW_INT_MOV_SS) |
864 | interruptibility |= GUEST_INTR_STATE_MOV_SS; | 960 | interruptibility |= GUEST_INTR_STATE_MOV_SS; |
865 | if (mask & X86_SHADOW_INT_STI) | 961 | else if (mask & KVM_X86_SHADOW_INT_STI) |
866 | interruptibility |= GUEST_INTR_STATE_STI; | 962 | interruptibility |= GUEST_INTR_STATE_STI; |
867 | 963 | ||
868 | if ((interruptibility != interruptibility_old)) | 964 | if ((interruptibility != interruptibility_old)) |
@@ -882,7 +978,8 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
882 | } | 978 | } |
883 | 979 | ||
884 | static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | 980 | static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, |
885 | bool has_error_code, u32 error_code) | 981 | bool has_error_code, u32 error_code, |
982 | bool reinject) | ||
886 | { | 983 | { |
887 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 984 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
888 | u32 intr_info = nr | INTR_INFO_VALID_MASK; | 985 | u32 intr_info = nr | INTR_INFO_VALID_MASK; |
@@ -1176,9 +1273,16 @@ static __init int vmx_disabled_by_bios(void) | |||
1176 | u64 msr; | 1273 | u64 msr; |
1177 | 1274 | ||
1178 | rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); | 1275 | rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); |
1179 | return (msr & (FEATURE_CONTROL_LOCKED | | 1276 | if (msr & FEATURE_CONTROL_LOCKED) { |
1180 | FEATURE_CONTROL_VMXON_ENABLED)) | 1277 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) |
1181 | == FEATURE_CONTROL_LOCKED; | 1278 | && tboot_enabled()) |
1279 | return 1; | ||
1280 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) | ||
1281 | && !tboot_enabled()) | ||
1282 | return 1; | ||
1283 | } | ||
1284 | |||
1285 | return 0; | ||
1182 | /* locked but not enabled */ | 1286 | /* locked but not enabled */ |
1183 | } | 1287 | } |
1184 | 1288 | ||
@@ -1186,21 +1290,23 @@ static int hardware_enable(void *garbage) | |||
1186 | { | 1290 | { |
1187 | int cpu = raw_smp_processor_id(); | 1291 | int cpu = raw_smp_processor_id(); |
1188 | u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); | 1292 | u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); |
1189 | u64 old; | 1293 | u64 old, test_bits; |
1190 | 1294 | ||
1191 | if (read_cr4() & X86_CR4_VMXE) | 1295 | if (read_cr4() & X86_CR4_VMXE) |
1192 | return -EBUSY; | 1296 | return -EBUSY; |
1193 | 1297 | ||
1194 | INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); | 1298 | INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); |
1195 | rdmsrl(MSR_IA32_FEATURE_CONTROL, old); | 1299 | rdmsrl(MSR_IA32_FEATURE_CONTROL, old); |
1196 | if ((old & (FEATURE_CONTROL_LOCKED | | 1300 | |
1197 | FEATURE_CONTROL_VMXON_ENABLED)) | 1301 | test_bits = FEATURE_CONTROL_LOCKED; |
1198 | != (FEATURE_CONTROL_LOCKED | | 1302 | test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; |
1199 | FEATURE_CONTROL_VMXON_ENABLED)) | 1303 | if (tboot_enabled()) |
1304 | test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX; | ||
1305 | |||
1306 | if ((old & test_bits) != test_bits) { | ||
1200 | /* enable and lock */ | 1307 | /* enable and lock */ |
1201 | wrmsrl(MSR_IA32_FEATURE_CONTROL, old | | 1308 | wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); |
1202 | FEATURE_CONTROL_LOCKED | | 1309 | } |
1203 | FEATURE_CONTROL_VMXON_ENABLED); | ||
1204 | write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ | 1310 | write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ |
1205 | asm volatile (ASM_VMX_VMXON_RAX | 1311 | asm volatile (ASM_VMX_VMXON_RAX |
1206 | : : "a"(&phys_addr), "m"(phys_addr) | 1312 | : : "a"(&phys_addr), "m"(phys_addr) |
@@ -1521,7 +1627,7 @@ static gva_t rmode_tss_base(struct kvm *kvm) | |||
1521 | struct kvm_memslots *slots; | 1627 | struct kvm_memslots *slots; |
1522 | gfn_t base_gfn; | 1628 | gfn_t base_gfn; |
1523 | 1629 | ||
1524 | slots = rcu_dereference(kvm->memslots); | 1630 | slots = kvm_memslots(kvm); |
1525 | base_gfn = kvm->memslots->memslots[0].base_gfn + | 1631 | base_gfn = kvm->memslots->memslots[0].base_gfn + |
1526 | kvm->memslots->memslots[0].npages - 3; | 1632 | kvm->memslots->memslots[0].npages - 3; |
1527 | return base_gfn << PAGE_SHIFT; | 1633 | return base_gfn << PAGE_SHIFT; |
@@ -1649,6 +1755,7 @@ static void exit_lmode(struct kvm_vcpu *vcpu) | |||
1649 | vmcs_write32(VM_ENTRY_CONTROLS, | 1755 | vmcs_write32(VM_ENTRY_CONTROLS, |
1650 | vmcs_read32(VM_ENTRY_CONTROLS) | 1756 | vmcs_read32(VM_ENTRY_CONTROLS) |
1651 | & ~VM_ENTRY_IA32E_MODE); | 1757 | & ~VM_ENTRY_IA32E_MODE); |
1758 | vmx_set_efer(vcpu, vcpu->arch.efer); | ||
1652 | } | 1759 | } |
1653 | 1760 | ||
1654 | #endif | 1761 | #endif |
@@ -1934,28 +2041,28 @@ static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) | |||
1934 | *l = (ar >> 13) & 1; | 2041 | *l = (ar >> 13) & 1; |
1935 | } | 2042 | } |
1936 | 2043 | ||
1937 | static void vmx_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 2044 | static void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
1938 | { | 2045 | { |
1939 | dt->limit = vmcs_read32(GUEST_IDTR_LIMIT); | 2046 | dt->size = vmcs_read32(GUEST_IDTR_LIMIT); |
1940 | dt->base = vmcs_readl(GUEST_IDTR_BASE); | 2047 | dt->address = vmcs_readl(GUEST_IDTR_BASE); |
1941 | } | 2048 | } |
1942 | 2049 | ||
1943 | static void vmx_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 2050 | static void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
1944 | { | 2051 | { |
1945 | vmcs_write32(GUEST_IDTR_LIMIT, dt->limit); | 2052 | vmcs_write32(GUEST_IDTR_LIMIT, dt->size); |
1946 | vmcs_writel(GUEST_IDTR_BASE, dt->base); | 2053 | vmcs_writel(GUEST_IDTR_BASE, dt->address); |
1947 | } | 2054 | } |
1948 | 2055 | ||
1949 | static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 2056 | static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
1950 | { | 2057 | { |
1951 | dt->limit = vmcs_read32(GUEST_GDTR_LIMIT); | 2058 | dt->size = vmcs_read32(GUEST_GDTR_LIMIT); |
1952 | dt->base = vmcs_readl(GUEST_GDTR_BASE); | 2059 | dt->address = vmcs_readl(GUEST_GDTR_BASE); |
1953 | } | 2060 | } |
1954 | 2061 | ||
1955 | static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 2062 | static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
1956 | { | 2063 | { |
1957 | vmcs_write32(GUEST_GDTR_LIMIT, dt->limit); | 2064 | vmcs_write32(GUEST_GDTR_LIMIT, dt->size); |
1958 | vmcs_writel(GUEST_GDTR_BASE, dt->base); | 2065 | vmcs_writel(GUEST_GDTR_BASE, dt->address); |
1959 | } | 2066 | } |
1960 | 2067 | ||
1961 | static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) | 2068 | static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) |
@@ -2296,6 +2403,16 @@ static void allocate_vpid(struct vcpu_vmx *vmx) | |||
2296 | spin_unlock(&vmx_vpid_lock); | 2403 | spin_unlock(&vmx_vpid_lock); |
2297 | } | 2404 | } |
2298 | 2405 | ||
2406 | static void free_vpid(struct vcpu_vmx *vmx) | ||
2407 | { | ||
2408 | if (!enable_vpid) | ||
2409 | return; | ||
2410 | spin_lock(&vmx_vpid_lock); | ||
2411 | if (vmx->vpid != 0) | ||
2412 | __clear_bit(vmx->vpid, vmx_vpid_bitmap); | ||
2413 | spin_unlock(&vmx_vpid_lock); | ||
2414 | } | ||
2415 | |||
2299 | static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) | 2416 | static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) |
2300 | { | 2417 | { |
2301 | int f = sizeof(unsigned long); | 2418 | int f = sizeof(unsigned long); |
@@ -2334,7 +2451,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2334 | u32 junk; | 2451 | u32 junk; |
2335 | u64 host_pat, tsc_this, tsc_base; | 2452 | u64 host_pat, tsc_this, tsc_base; |
2336 | unsigned long a; | 2453 | unsigned long a; |
2337 | struct descriptor_table dt; | 2454 | struct desc_ptr dt; |
2338 | int i; | 2455 | int i; |
2339 | unsigned long kvm_vmx_return; | 2456 | unsigned long kvm_vmx_return; |
2340 | u32 exec_control; | 2457 | u32 exec_control; |
@@ -2415,14 +2532,16 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2415 | 2532 | ||
2416 | vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ | 2533 | vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ |
2417 | 2534 | ||
2418 | kvm_get_idt(&dt); | 2535 | native_store_idt(&dt); |
2419 | vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */ | 2536 | vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ |
2420 | 2537 | ||
2421 | asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); | 2538 | asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); |
2422 | vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ | 2539 | vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ |
2423 | vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); | 2540 | vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); |
2424 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); | 2541 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); |
2542 | vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host)); | ||
2425 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); | 2543 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); |
2544 | vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest)); | ||
2426 | 2545 | ||
2427 | rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk); | 2546 | rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk); |
2428 | vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs); | 2547 | vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs); |
@@ -2947,22 +3066,20 @@ static int handle_io(struct kvm_vcpu *vcpu) | |||
2947 | int size, in, string; | 3066 | int size, in, string; |
2948 | unsigned port; | 3067 | unsigned port; |
2949 | 3068 | ||
2950 | ++vcpu->stat.io_exits; | ||
2951 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 3069 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
2952 | string = (exit_qualification & 16) != 0; | 3070 | string = (exit_qualification & 16) != 0; |
3071 | in = (exit_qualification & 8) != 0; | ||
2953 | 3072 | ||
2954 | if (string) { | 3073 | ++vcpu->stat.io_exits; |
2955 | if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO) | ||
2956 | return 0; | ||
2957 | return 1; | ||
2958 | } | ||
2959 | 3074 | ||
2960 | size = (exit_qualification & 7) + 1; | 3075 | if (string || in) |
2961 | in = (exit_qualification & 8) != 0; | 3076 | return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO); |
2962 | port = exit_qualification >> 16; | ||
2963 | 3077 | ||
3078 | port = exit_qualification >> 16; | ||
3079 | size = (exit_qualification & 7) + 1; | ||
2964 | skip_emulated_instruction(vcpu); | 3080 | skip_emulated_instruction(vcpu); |
2965 | return kvm_emulate_pio(vcpu, in, size, port); | 3081 | |
3082 | return kvm_fast_pio_out(vcpu, size, port); | ||
2966 | } | 3083 | } |
2967 | 3084 | ||
2968 | static void | 3085 | static void |
@@ -3053,19 +3170,9 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
3053 | return 0; | 3170 | return 0; |
3054 | } | 3171 | } |
3055 | 3172 | ||
3056 | static int check_dr_alias(struct kvm_vcpu *vcpu) | ||
3057 | { | ||
3058 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
3059 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
3060 | return -1; | ||
3061 | } | ||
3062 | return 0; | ||
3063 | } | ||
3064 | |||
3065 | static int handle_dr(struct kvm_vcpu *vcpu) | 3173 | static int handle_dr(struct kvm_vcpu *vcpu) |
3066 | { | 3174 | { |
3067 | unsigned long exit_qualification; | 3175 | unsigned long exit_qualification; |
3068 | unsigned long val; | ||
3069 | int dr, reg; | 3176 | int dr, reg; |
3070 | 3177 | ||
3071 | /* Do not handle if the CPL > 0, will trigger GP on re-entry */ | 3178 | /* Do not handle if the CPL > 0, will trigger GP on re-entry */ |
@@ -3100,67 +3207,20 @@ static int handle_dr(struct kvm_vcpu *vcpu) | |||
3100 | dr = exit_qualification & DEBUG_REG_ACCESS_NUM; | 3207 | dr = exit_qualification & DEBUG_REG_ACCESS_NUM; |
3101 | reg = DEBUG_REG_ACCESS_REG(exit_qualification); | 3208 | reg = DEBUG_REG_ACCESS_REG(exit_qualification); |
3102 | if (exit_qualification & TYPE_MOV_FROM_DR) { | 3209 | if (exit_qualification & TYPE_MOV_FROM_DR) { |
3103 | switch (dr) { | 3210 | unsigned long val; |
3104 | case 0 ... 3: | 3211 | if (!kvm_get_dr(vcpu, dr, &val)) |
3105 | val = vcpu->arch.db[dr]; | 3212 | kvm_register_write(vcpu, reg, val); |
3106 | break; | 3213 | } else |
3107 | case 4: | 3214 | kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]); |
3108 | if (check_dr_alias(vcpu) < 0) | ||
3109 | return 1; | ||
3110 | /* fall through */ | ||
3111 | case 6: | ||
3112 | val = vcpu->arch.dr6; | ||
3113 | break; | ||
3114 | case 5: | ||
3115 | if (check_dr_alias(vcpu) < 0) | ||
3116 | return 1; | ||
3117 | /* fall through */ | ||
3118 | default: /* 7 */ | ||
3119 | val = vcpu->arch.dr7; | ||
3120 | break; | ||
3121 | } | ||
3122 | kvm_register_write(vcpu, reg, val); | ||
3123 | } else { | ||
3124 | val = vcpu->arch.regs[reg]; | ||
3125 | switch (dr) { | ||
3126 | case 0 ... 3: | ||
3127 | vcpu->arch.db[dr] = val; | ||
3128 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) | ||
3129 | vcpu->arch.eff_db[dr] = val; | ||
3130 | break; | ||
3131 | case 4: | ||
3132 | if (check_dr_alias(vcpu) < 0) | ||
3133 | return 1; | ||
3134 | /* fall through */ | ||
3135 | case 6: | ||
3136 | if (val & 0xffffffff00000000ULL) { | ||
3137 | kvm_inject_gp(vcpu, 0); | ||
3138 | return 1; | ||
3139 | } | ||
3140 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; | ||
3141 | break; | ||
3142 | case 5: | ||
3143 | if (check_dr_alias(vcpu) < 0) | ||
3144 | return 1; | ||
3145 | /* fall through */ | ||
3146 | default: /* 7 */ | ||
3147 | if (val & 0xffffffff00000000ULL) { | ||
3148 | kvm_inject_gp(vcpu, 0); | ||
3149 | return 1; | ||
3150 | } | ||
3151 | vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; | ||
3152 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { | ||
3153 | vmcs_writel(GUEST_DR7, vcpu->arch.dr7); | ||
3154 | vcpu->arch.switch_db_regs = | ||
3155 | (val & DR7_BP_EN_MASK); | ||
3156 | } | ||
3157 | break; | ||
3158 | } | ||
3159 | } | ||
3160 | skip_emulated_instruction(vcpu); | 3215 | skip_emulated_instruction(vcpu); |
3161 | return 1; | 3216 | return 1; |
3162 | } | 3217 | } |
3163 | 3218 | ||
3219 | static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) | ||
3220 | { | ||
3221 | vmcs_writel(GUEST_DR7, val); | ||
3222 | } | ||
3223 | |||
3164 | static int handle_cpuid(struct kvm_vcpu *vcpu) | 3224 | static int handle_cpuid(struct kvm_vcpu *vcpu) |
3165 | { | 3225 | { |
3166 | kvm_emulate_cpuid(vcpu); | 3226 | kvm_emulate_cpuid(vcpu); |
@@ -3292,6 +3352,8 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) | |||
3292 | { | 3352 | { |
3293 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3353 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3294 | unsigned long exit_qualification; | 3354 | unsigned long exit_qualification; |
3355 | bool has_error_code = false; | ||
3356 | u32 error_code = 0; | ||
3295 | u16 tss_selector; | 3357 | u16 tss_selector; |
3296 | int reason, type, idt_v; | 3358 | int reason, type, idt_v; |
3297 | 3359 | ||
@@ -3314,6 +3376,13 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) | |||
3314 | kvm_clear_interrupt_queue(vcpu); | 3376 | kvm_clear_interrupt_queue(vcpu); |
3315 | break; | 3377 | break; |
3316 | case INTR_TYPE_HARD_EXCEPTION: | 3378 | case INTR_TYPE_HARD_EXCEPTION: |
3379 | if (vmx->idt_vectoring_info & | ||
3380 | VECTORING_INFO_DELIVER_CODE_MASK) { | ||
3381 | has_error_code = true; | ||
3382 | error_code = | ||
3383 | vmcs_read32(IDT_VECTORING_ERROR_CODE); | ||
3384 | } | ||
3385 | /* fall through */ | ||
3317 | case INTR_TYPE_SOFT_EXCEPTION: | 3386 | case INTR_TYPE_SOFT_EXCEPTION: |
3318 | kvm_clear_exception_queue(vcpu); | 3387 | kvm_clear_exception_queue(vcpu); |
3319 | break; | 3388 | break; |
@@ -3328,8 +3397,13 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) | |||
3328 | type != INTR_TYPE_NMI_INTR)) | 3397 | type != INTR_TYPE_NMI_INTR)) |
3329 | skip_emulated_instruction(vcpu); | 3398 | skip_emulated_instruction(vcpu); |
3330 | 3399 | ||
3331 | if (!kvm_task_switch(vcpu, tss_selector, reason)) | 3400 | if (kvm_task_switch(vcpu, tss_selector, reason, |
3401 | has_error_code, error_code) == EMULATE_FAIL) { | ||
3402 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
3403 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | ||
3404 | vcpu->run->internal.ndata = 0; | ||
3332 | return 0; | 3405 | return 0; |
3406 | } | ||
3333 | 3407 | ||
3334 | /* clear all local breakpoint enable flags */ | 3408 | /* clear all local breakpoint enable flags */ |
3335 | vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55); | 3409 | vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55); |
@@ -3574,7 +3648,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
3574 | u32 exit_reason = vmx->exit_reason; | 3648 | u32 exit_reason = vmx->exit_reason; |
3575 | u32 vectoring_info = vmx->idt_vectoring_info; | 3649 | u32 vectoring_info = vmx->idt_vectoring_info; |
3576 | 3650 | ||
3577 | trace_kvm_exit(exit_reason, kvm_rip_read(vcpu)); | 3651 | trace_kvm_exit(exit_reason, vcpu); |
3578 | 3652 | ||
3579 | /* If guest state is invalid, start emulating */ | 3653 | /* If guest state is invalid, start emulating */ |
3580 | if (vmx->emulation_required && emulate_invalid_guest_state) | 3654 | if (vmx->emulation_required && emulate_invalid_guest_state) |
@@ -3923,10 +3997,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) | |||
3923 | { | 3997 | { |
3924 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3998 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3925 | 3999 | ||
3926 | spin_lock(&vmx_vpid_lock); | 4000 | free_vpid(vmx); |
3927 | if (vmx->vpid != 0) | ||
3928 | __clear_bit(vmx->vpid, vmx_vpid_bitmap); | ||
3929 | spin_unlock(&vmx_vpid_lock); | ||
3930 | vmx_free_vmcs(vcpu); | 4001 | vmx_free_vmcs(vcpu); |
3931 | kfree(vmx->guest_msrs); | 4002 | kfree(vmx->guest_msrs); |
3932 | kvm_vcpu_uninit(vcpu); | 4003 | kvm_vcpu_uninit(vcpu); |
@@ -3988,6 +4059,7 @@ free_msrs: | |||
3988 | uninit_vcpu: | 4059 | uninit_vcpu: |
3989 | kvm_vcpu_uninit(&vmx->vcpu); | 4060 | kvm_vcpu_uninit(&vmx->vcpu); |
3990 | free_vcpu: | 4061 | free_vcpu: |
4062 | free_vpid(vmx); | ||
3991 | kmem_cache_free(kvm_vcpu_cache, vmx); | 4063 | kmem_cache_free(kvm_vcpu_cache, vmx); |
3992 | return ERR_PTR(err); | 4064 | return ERR_PTR(err); |
3993 | } | 4065 | } |
@@ -4118,6 +4190,10 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) | |||
4118 | } | 4190 | } |
4119 | } | 4191 | } |
4120 | 4192 | ||
4193 | static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | ||
4194 | { | ||
4195 | } | ||
4196 | |||
4121 | static struct kvm_x86_ops vmx_x86_ops = { | 4197 | static struct kvm_x86_ops vmx_x86_ops = { |
4122 | .cpu_has_kvm_support = cpu_has_kvm_support, | 4198 | .cpu_has_kvm_support = cpu_has_kvm_support, |
4123 | .disabled_by_bios = vmx_disabled_by_bios, | 4199 | .disabled_by_bios = vmx_disabled_by_bios, |
@@ -4154,6 +4230,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
4154 | .set_idt = vmx_set_idt, | 4230 | .set_idt = vmx_set_idt, |
4155 | .get_gdt = vmx_get_gdt, | 4231 | .get_gdt = vmx_get_gdt, |
4156 | .set_gdt = vmx_set_gdt, | 4232 | .set_gdt = vmx_set_gdt, |
4233 | .set_dr7 = vmx_set_dr7, | ||
4157 | .cache_reg = vmx_cache_reg, | 4234 | .cache_reg = vmx_cache_reg, |
4158 | .get_rflags = vmx_get_rflags, | 4235 | .get_rflags = vmx_get_rflags, |
4159 | .set_rflags = vmx_set_rflags, | 4236 | .set_rflags = vmx_set_rflags, |
@@ -4189,6 +4266,8 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
4189 | .cpuid_update = vmx_cpuid_update, | 4266 | .cpuid_update = vmx_cpuid_update, |
4190 | 4267 | ||
4191 | .rdtscp_supported = vmx_rdtscp_supported, | 4268 | .rdtscp_supported = vmx_rdtscp_supported, |
4269 | |||
4270 | .set_supported_cpuid = vmx_set_supported_cpuid, | ||
4192 | }; | 4271 | }; |
4193 | 4272 | ||
4194 | static int __init vmx_init(void) | 4273 | static int __init vmx_init(void) |
@@ -4236,7 +4315,8 @@ static int __init vmx_init(void) | |||
4236 | 4315 | ||
4237 | set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ | 4316 | set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ |
4238 | 4317 | ||
4239 | r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE); | 4318 | r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), |
4319 | __alignof__(struct vcpu_vmx), THIS_MODULE); | ||
4240 | if (r) | 4320 | if (r) |
4241 | goto out3; | 4321 | goto out3; |
4242 | 4322 | ||
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index dd9bc8fb81ab..05d571f6f196 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -42,7 +42,7 @@ | |||
42 | #include <linux/slab.h> | 42 | #include <linux/slab.h> |
43 | #include <linux/perf_event.h> | 43 | #include <linux/perf_event.h> |
44 | #include <trace/events/kvm.h> | 44 | #include <trace/events/kvm.h> |
45 | #undef TRACE_INCLUDE_FILE | 45 | |
46 | #define CREATE_TRACE_POINTS | 46 | #define CREATE_TRACE_POINTS |
47 | #include "trace.h" | 47 | #include "trace.h" |
48 | 48 | ||
@@ -224,34 +224,6 @@ static void drop_user_return_notifiers(void *ignore) | |||
224 | kvm_on_user_return(&smsr->urn); | 224 | kvm_on_user_return(&smsr->urn); |
225 | } | 225 | } |
226 | 226 | ||
227 | unsigned long segment_base(u16 selector) | ||
228 | { | ||
229 | struct descriptor_table gdt; | ||
230 | struct desc_struct *d; | ||
231 | unsigned long table_base; | ||
232 | unsigned long v; | ||
233 | |||
234 | if (selector == 0) | ||
235 | return 0; | ||
236 | |||
237 | kvm_get_gdt(&gdt); | ||
238 | table_base = gdt.base; | ||
239 | |||
240 | if (selector & 4) { /* from ldt */ | ||
241 | u16 ldt_selector = kvm_read_ldt(); | ||
242 | |||
243 | table_base = segment_base(ldt_selector); | ||
244 | } | ||
245 | d = (struct desc_struct *)(table_base + (selector & ~7)); | ||
246 | v = get_desc_base(d); | ||
247 | #ifdef CONFIG_X86_64 | ||
248 | if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11)) | ||
249 | v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32; | ||
250 | #endif | ||
251 | return v; | ||
252 | } | ||
253 | EXPORT_SYMBOL_GPL(segment_base); | ||
254 | |||
255 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) | 227 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) |
256 | { | 228 | { |
257 | if (irqchip_in_kernel(vcpu->kvm)) | 229 | if (irqchip_in_kernel(vcpu->kvm)) |
@@ -293,7 +265,8 @@ static int exception_class(int vector) | |||
293 | } | 265 | } |
294 | 266 | ||
295 | static void kvm_multiple_exception(struct kvm_vcpu *vcpu, | 267 | static void kvm_multiple_exception(struct kvm_vcpu *vcpu, |
296 | unsigned nr, bool has_error, u32 error_code) | 268 | unsigned nr, bool has_error, u32 error_code, |
269 | bool reinject) | ||
297 | { | 270 | { |
298 | u32 prev_nr; | 271 | u32 prev_nr; |
299 | int class1, class2; | 272 | int class1, class2; |
@@ -304,6 +277,7 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, | |||
304 | vcpu->arch.exception.has_error_code = has_error; | 277 | vcpu->arch.exception.has_error_code = has_error; |
305 | vcpu->arch.exception.nr = nr; | 278 | vcpu->arch.exception.nr = nr; |
306 | vcpu->arch.exception.error_code = error_code; | 279 | vcpu->arch.exception.error_code = error_code; |
280 | vcpu->arch.exception.reinject = reinject; | ||
307 | return; | 281 | return; |
308 | } | 282 | } |
309 | 283 | ||
@@ -332,10 +306,16 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, | |||
332 | 306 | ||
333 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) | 307 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) |
334 | { | 308 | { |
335 | kvm_multiple_exception(vcpu, nr, false, 0); | 309 | kvm_multiple_exception(vcpu, nr, false, 0, false); |
336 | } | 310 | } |
337 | EXPORT_SYMBOL_GPL(kvm_queue_exception); | 311 | EXPORT_SYMBOL_GPL(kvm_queue_exception); |
338 | 312 | ||
313 | void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr) | ||
314 | { | ||
315 | kvm_multiple_exception(vcpu, nr, false, 0, true); | ||
316 | } | ||
317 | EXPORT_SYMBOL_GPL(kvm_requeue_exception); | ||
318 | |||
339 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, | 319 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, |
340 | u32 error_code) | 320 | u32 error_code) |
341 | { | 321 | { |
@@ -352,10 +332,16 @@ EXPORT_SYMBOL_GPL(kvm_inject_nmi); | |||
352 | 332 | ||
353 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) | 333 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) |
354 | { | 334 | { |
355 | kvm_multiple_exception(vcpu, nr, true, error_code); | 335 | kvm_multiple_exception(vcpu, nr, true, error_code, false); |
356 | } | 336 | } |
357 | EXPORT_SYMBOL_GPL(kvm_queue_exception_e); | 337 | EXPORT_SYMBOL_GPL(kvm_queue_exception_e); |
358 | 338 | ||
339 | void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) | ||
340 | { | ||
341 | kvm_multiple_exception(vcpu, nr, true, error_code, true); | ||
342 | } | ||
343 | EXPORT_SYMBOL_GPL(kvm_requeue_exception_e); | ||
344 | |||
359 | /* | 345 | /* |
360 | * Checks if cpl <= required_cpl; if true, return true. Otherwise queue | 346 | * Checks if cpl <= required_cpl; if true, return true. Otherwise queue |
361 | * a #GP and return false. | 347 | * a #GP and return false. |
@@ -476,7 +462,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
476 | } | 462 | } |
477 | 463 | ||
478 | kvm_x86_ops->set_cr0(vcpu, cr0); | 464 | kvm_x86_ops->set_cr0(vcpu, cr0); |
479 | vcpu->arch.cr0 = cr0; | ||
480 | 465 | ||
481 | kvm_mmu_reset_context(vcpu); | 466 | kvm_mmu_reset_context(vcpu); |
482 | return; | 467 | return; |
@@ -485,7 +470,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr0); | |||
485 | 470 | ||
486 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) | 471 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) |
487 | { | 472 | { |
488 | kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0ful) | (msw & 0x0f)); | 473 | kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f)); |
489 | } | 474 | } |
490 | EXPORT_SYMBOL_GPL(kvm_lmsw); | 475 | EXPORT_SYMBOL_GPL(kvm_lmsw); |
491 | 476 | ||
@@ -517,7 +502,6 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
517 | } | 502 | } |
518 | kvm_x86_ops->set_cr4(vcpu, cr4); | 503 | kvm_x86_ops->set_cr4(vcpu, cr4); |
519 | vcpu->arch.cr4 = cr4; | 504 | vcpu->arch.cr4 = cr4; |
520 | vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled; | ||
521 | kvm_mmu_reset_context(vcpu); | 505 | kvm_mmu_reset_context(vcpu); |
522 | } | 506 | } |
523 | EXPORT_SYMBOL_GPL(kvm_set_cr4); | 507 | EXPORT_SYMBOL_GPL(kvm_set_cr4); |
@@ -592,6 +576,80 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) | |||
592 | } | 576 | } |
593 | EXPORT_SYMBOL_GPL(kvm_get_cr8); | 577 | EXPORT_SYMBOL_GPL(kvm_get_cr8); |
594 | 578 | ||
579 | int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) | ||
580 | { | ||
581 | switch (dr) { | ||
582 | case 0 ... 3: | ||
583 | vcpu->arch.db[dr] = val; | ||
584 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) | ||
585 | vcpu->arch.eff_db[dr] = val; | ||
586 | break; | ||
587 | case 4: | ||
588 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
589 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
590 | return 1; | ||
591 | } | ||
592 | /* fall through */ | ||
593 | case 6: | ||
594 | if (val & 0xffffffff00000000ULL) { | ||
595 | kvm_inject_gp(vcpu, 0); | ||
596 | return 1; | ||
597 | } | ||
598 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; | ||
599 | break; | ||
600 | case 5: | ||
601 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
602 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
603 | return 1; | ||
604 | } | ||
605 | /* fall through */ | ||
606 | default: /* 7 */ | ||
607 | if (val & 0xffffffff00000000ULL) { | ||
608 | kvm_inject_gp(vcpu, 0); | ||
609 | return 1; | ||
610 | } | ||
611 | vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; | ||
612 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { | ||
613 | kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7); | ||
614 | vcpu->arch.switch_db_regs = (val & DR7_BP_EN_MASK); | ||
615 | } | ||
616 | break; | ||
617 | } | ||
618 | |||
619 | return 0; | ||
620 | } | ||
621 | EXPORT_SYMBOL_GPL(kvm_set_dr); | ||
622 | |||
623 | int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) | ||
624 | { | ||
625 | switch (dr) { | ||
626 | case 0 ... 3: | ||
627 | *val = vcpu->arch.db[dr]; | ||
628 | break; | ||
629 | case 4: | ||
630 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
631 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
632 | return 1; | ||
633 | } | ||
634 | /* fall through */ | ||
635 | case 6: | ||
636 | *val = vcpu->arch.dr6; | ||
637 | break; | ||
638 | case 5: | ||
639 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
640 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
641 | return 1; | ||
642 | } | ||
643 | /* fall through */ | ||
644 | default: /* 7 */ | ||
645 | *val = vcpu->arch.dr7; | ||
646 | break; | ||
647 | } | ||
648 | |||
649 | return 0; | ||
650 | } | ||
651 | EXPORT_SYMBOL_GPL(kvm_get_dr); | ||
652 | |||
595 | static inline u32 bit(int bitno) | 653 | static inline u32 bit(int bitno) |
596 | { | 654 | { |
597 | return 1 << (bitno & 31); | 655 | return 1 << (bitno & 31); |
@@ -606,9 +664,10 @@ static inline u32 bit(int bitno) | |||
606 | * kvm-specific. Those are put in the beginning of the list. | 664 | * kvm-specific. Those are put in the beginning of the list. |
607 | */ | 665 | */ |
608 | 666 | ||
609 | #define KVM_SAVE_MSRS_BEGIN 5 | 667 | #define KVM_SAVE_MSRS_BEGIN 7 |
610 | static u32 msrs_to_save[] = { | 668 | static u32 msrs_to_save[] = { |
611 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, | 669 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, |
670 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, | ||
612 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, | 671 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, |
613 | HV_X64_MSR_APIC_ASSIST_PAGE, | 672 | HV_X64_MSR_APIC_ASSIST_PAGE, |
614 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, | 673 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, |
@@ -625,48 +684,42 @@ static u32 emulated_msrs[] = { | |||
625 | MSR_IA32_MISC_ENABLE, | 684 | MSR_IA32_MISC_ENABLE, |
626 | }; | 685 | }; |
627 | 686 | ||
628 | static void set_efer(struct kvm_vcpu *vcpu, u64 efer) | 687 | static int set_efer(struct kvm_vcpu *vcpu, u64 efer) |
629 | { | 688 | { |
630 | if (efer & efer_reserved_bits) { | 689 | if (efer & efer_reserved_bits) |
631 | kvm_inject_gp(vcpu, 0); | 690 | return 1; |
632 | return; | ||
633 | } | ||
634 | 691 | ||
635 | if (is_paging(vcpu) | 692 | if (is_paging(vcpu) |
636 | && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) { | 693 | && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) |
637 | kvm_inject_gp(vcpu, 0); | 694 | return 1; |
638 | return; | ||
639 | } | ||
640 | 695 | ||
641 | if (efer & EFER_FFXSR) { | 696 | if (efer & EFER_FFXSR) { |
642 | struct kvm_cpuid_entry2 *feat; | 697 | struct kvm_cpuid_entry2 *feat; |
643 | 698 | ||
644 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | 699 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); |
645 | if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) { | 700 | if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) |
646 | kvm_inject_gp(vcpu, 0); | 701 | return 1; |
647 | return; | ||
648 | } | ||
649 | } | 702 | } |
650 | 703 | ||
651 | if (efer & EFER_SVME) { | 704 | if (efer & EFER_SVME) { |
652 | struct kvm_cpuid_entry2 *feat; | 705 | struct kvm_cpuid_entry2 *feat; |
653 | 706 | ||
654 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | 707 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); |
655 | if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) { | 708 | if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) |
656 | kvm_inject_gp(vcpu, 0); | 709 | return 1; |
657 | return; | ||
658 | } | ||
659 | } | 710 | } |
660 | 711 | ||
661 | kvm_x86_ops->set_efer(vcpu, efer); | ||
662 | |||
663 | efer &= ~EFER_LMA; | 712 | efer &= ~EFER_LMA; |
664 | efer |= vcpu->arch.efer & EFER_LMA; | 713 | efer |= vcpu->arch.efer & EFER_LMA; |
665 | 714 | ||
715 | kvm_x86_ops->set_efer(vcpu, efer); | ||
716 | |||
666 | vcpu->arch.efer = efer; | 717 | vcpu->arch.efer = efer; |
667 | 718 | ||
668 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; | 719 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; |
669 | kvm_mmu_reset_context(vcpu); | 720 | kvm_mmu_reset_context(vcpu); |
721 | |||
722 | return 0; | ||
670 | } | 723 | } |
671 | 724 | ||
672 | void kvm_enable_efer_bits(u64 mask) | 725 | void kvm_enable_efer_bits(u64 mask) |
@@ -696,14 +749,22 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) | |||
696 | 749 | ||
697 | static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) | 750 | static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) |
698 | { | 751 | { |
699 | static int version; | 752 | int version; |
753 | int r; | ||
700 | struct pvclock_wall_clock wc; | 754 | struct pvclock_wall_clock wc; |
701 | struct timespec boot; | 755 | struct timespec boot; |
702 | 756 | ||
703 | if (!wall_clock) | 757 | if (!wall_clock) |
704 | return; | 758 | return; |
705 | 759 | ||
706 | version++; | 760 | r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version)); |
761 | if (r) | ||
762 | return; | ||
763 | |||
764 | if (version & 1) | ||
765 | ++version; /* first time write, random junk */ | ||
766 | |||
767 | ++version; | ||
707 | 768 | ||
708 | kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); | 769 | kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); |
709 | 770 | ||
@@ -796,6 +857,8 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) | |||
796 | vcpu->hv_clock.system_time = ts.tv_nsec + | 857 | vcpu->hv_clock.system_time = ts.tv_nsec + |
797 | (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset; | 858 | (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset; |
798 | 859 | ||
860 | vcpu->hv_clock.flags = 0; | ||
861 | |||
799 | /* | 862 | /* |
800 | * The interface expects us to write an even number signaling that the | 863 | * The interface expects us to write an even number signaling that the |
801 | * update is finished. Since the guest won't see the intermediate | 864 | * update is finished. Since the guest won't see the intermediate |
@@ -1087,10 +1150,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1087 | { | 1150 | { |
1088 | switch (msr) { | 1151 | switch (msr) { |
1089 | case MSR_EFER: | 1152 | case MSR_EFER: |
1090 | set_efer(vcpu, data); | 1153 | return set_efer(vcpu, data); |
1091 | break; | ||
1092 | case MSR_K7_HWCR: | 1154 | case MSR_K7_HWCR: |
1093 | data &= ~(u64)0x40; /* ignore flush filter disable */ | 1155 | data &= ~(u64)0x40; /* ignore flush filter disable */ |
1156 | data &= ~(u64)0x100; /* ignore ignne emulation enable */ | ||
1094 | if (data != 0) { | 1157 | if (data != 0) { |
1095 | pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", | 1158 | pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", |
1096 | data); | 1159 | data); |
@@ -1133,10 +1196,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1133 | case MSR_IA32_MISC_ENABLE: | 1196 | case MSR_IA32_MISC_ENABLE: |
1134 | vcpu->arch.ia32_misc_enable_msr = data; | 1197 | vcpu->arch.ia32_misc_enable_msr = data; |
1135 | break; | 1198 | break; |
1199 | case MSR_KVM_WALL_CLOCK_NEW: | ||
1136 | case MSR_KVM_WALL_CLOCK: | 1200 | case MSR_KVM_WALL_CLOCK: |
1137 | vcpu->kvm->arch.wall_clock = data; | 1201 | vcpu->kvm->arch.wall_clock = data; |
1138 | kvm_write_wall_clock(vcpu->kvm, data); | 1202 | kvm_write_wall_clock(vcpu->kvm, data); |
1139 | break; | 1203 | break; |
1204 | case MSR_KVM_SYSTEM_TIME_NEW: | ||
1140 | case MSR_KVM_SYSTEM_TIME: { | 1205 | case MSR_KVM_SYSTEM_TIME: { |
1141 | if (vcpu->arch.time_page) { | 1206 | if (vcpu->arch.time_page) { |
1142 | kvm_release_page_dirty(vcpu->arch.time_page); | 1207 | kvm_release_page_dirty(vcpu->arch.time_page); |
@@ -1408,9 +1473,11 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1408 | data = vcpu->arch.efer; | 1473 | data = vcpu->arch.efer; |
1409 | break; | 1474 | break; |
1410 | case MSR_KVM_WALL_CLOCK: | 1475 | case MSR_KVM_WALL_CLOCK: |
1476 | case MSR_KVM_WALL_CLOCK_NEW: | ||
1411 | data = vcpu->kvm->arch.wall_clock; | 1477 | data = vcpu->kvm->arch.wall_clock; |
1412 | break; | 1478 | break; |
1413 | case MSR_KVM_SYSTEM_TIME: | 1479 | case MSR_KVM_SYSTEM_TIME: |
1480 | case MSR_KVM_SYSTEM_TIME_NEW: | ||
1414 | data = vcpu->arch.time; | 1481 | data = vcpu->arch.time; |
1415 | break; | 1482 | break; |
1416 | case MSR_IA32_P5_MC_ADDR: | 1483 | case MSR_IA32_P5_MC_ADDR: |
@@ -1549,6 +1616,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1549 | case KVM_CAP_HYPERV_VAPIC: | 1616 | case KVM_CAP_HYPERV_VAPIC: |
1550 | case KVM_CAP_HYPERV_SPIN: | 1617 | case KVM_CAP_HYPERV_SPIN: |
1551 | case KVM_CAP_PCI_SEGMENT: | 1618 | case KVM_CAP_PCI_SEGMENT: |
1619 | case KVM_CAP_DEBUGREGS: | ||
1552 | case KVM_CAP_X86_ROBUST_SINGLESTEP: | 1620 | case KVM_CAP_X86_ROBUST_SINGLESTEP: |
1553 | r = 1; | 1621 | r = 1; |
1554 | break; | 1622 | break; |
@@ -1769,6 +1837,7 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, | |||
1769 | { | 1837 | { |
1770 | int r; | 1838 | int r; |
1771 | 1839 | ||
1840 | vcpu_load(vcpu); | ||
1772 | r = -E2BIG; | 1841 | r = -E2BIG; |
1773 | if (cpuid->nent < vcpu->arch.cpuid_nent) | 1842 | if (cpuid->nent < vcpu->arch.cpuid_nent) |
1774 | goto out; | 1843 | goto out; |
@@ -1780,6 +1849,7 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, | |||
1780 | 1849 | ||
1781 | out: | 1850 | out: |
1782 | cpuid->nent = vcpu->arch.cpuid_nent; | 1851 | cpuid->nent = vcpu->arch.cpuid_nent; |
1852 | vcpu_put(vcpu); | ||
1783 | return r; | 1853 | return r; |
1784 | } | 1854 | } |
1785 | 1855 | ||
@@ -1910,6 +1980,24 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
1910 | } | 1980 | } |
1911 | break; | 1981 | break; |
1912 | } | 1982 | } |
1983 | case KVM_CPUID_SIGNATURE: { | ||
1984 | char signature[12] = "KVMKVMKVM\0\0"; | ||
1985 | u32 *sigptr = (u32 *)signature; | ||
1986 | entry->eax = 0; | ||
1987 | entry->ebx = sigptr[0]; | ||
1988 | entry->ecx = sigptr[1]; | ||
1989 | entry->edx = sigptr[2]; | ||
1990 | break; | ||
1991 | } | ||
1992 | case KVM_CPUID_FEATURES: | ||
1993 | entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) | | ||
1994 | (1 << KVM_FEATURE_NOP_IO_DELAY) | | ||
1995 | (1 << KVM_FEATURE_CLOCKSOURCE2) | | ||
1996 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); | ||
1997 | entry->ebx = 0; | ||
1998 | entry->ecx = 0; | ||
1999 | entry->edx = 0; | ||
2000 | break; | ||
1913 | case 0x80000000: | 2001 | case 0x80000000: |
1914 | entry->eax = min(entry->eax, 0x8000001a); | 2002 | entry->eax = min(entry->eax, 0x8000001a); |
1915 | break; | 2003 | break; |
@@ -1918,6 +2006,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
1918 | entry->ecx &= kvm_supported_word6_x86_features; | 2006 | entry->ecx &= kvm_supported_word6_x86_features; |
1919 | break; | 2007 | break; |
1920 | } | 2008 | } |
2009 | |||
2010 | kvm_x86_ops->set_supported_cpuid(function, entry); | ||
2011 | |||
1921 | put_cpu(); | 2012 | put_cpu(); |
1922 | } | 2013 | } |
1923 | 2014 | ||
@@ -1953,6 +2044,23 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | |||
1953 | for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func) | 2044 | for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func) |
1954 | do_cpuid_ent(&cpuid_entries[nent], func, 0, | 2045 | do_cpuid_ent(&cpuid_entries[nent], func, 0, |
1955 | &nent, cpuid->nent); | 2046 | &nent, cpuid->nent); |
2047 | |||
2048 | |||
2049 | |||
2050 | r = -E2BIG; | ||
2051 | if (nent >= cpuid->nent) | ||
2052 | goto out_free; | ||
2053 | |||
2054 | do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent, | ||
2055 | cpuid->nent); | ||
2056 | |||
2057 | r = -E2BIG; | ||
2058 | if (nent >= cpuid->nent) | ||
2059 | goto out_free; | ||
2060 | |||
2061 | do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_FEATURES, 0, &nent, | ||
2062 | cpuid->nent); | ||
2063 | |||
1956 | r = -E2BIG; | 2064 | r = -E2BIG; |
1957 | if (nent >= cpuid->nent) | 2065 | if (nent >= cpuid->nent) |
1958 | goto out_free; | 2066 | goto out_free; |
@@ -2032,6 +2140,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, | |||
2032 | int r; | 2140 | int r; |
2033 | unsigned bank_num = mcg_cap & 0xff, bank; | 2141 | unsigned bank_num = mcg_cap & 0xff, bank; |
2034 | 2142 | ||
2143 | vcpu_load(vcpu); | ||
2035 | r = -EINVAL; | 2144 | r = -EINVAL; |
2036 | if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) | 2145 | if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) |
2037 | goto out; | 2146 | goto out; |
@@ -2046,6 +2155,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, | |||
2046 | for (bank = 0; bank < bank_num; bank++) | 2155 | for (bank = 0; bank < bank_num; bank++) |
2047 | vcpu->arch.mce_banks[bank*4] = ~(u64)0; | 2156 | vcpu->arch.mce_banks[bank*4] = ~(u64)0; |
2048 | out: | 2157 | out: |
2158 | vcpu_put(vcpu); | ||
2049 | return r; | 2159 | return r; |
2050 | } | 2160 | } |
2051 | 2161 | ||
@@ -2105,14 +2215,20 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | |||
2105 | { | 2215 | { |
2106 | vcpu_load(vcpu); | 2216 | vcpu_load(vcpu); |
2107 | 2217 | ||
2108 | events->exception.injected = vcpu->arch.exception.pending; | 2218 | events->exception.injected = |
2219 | vcpu->arch.exception.pending && | ||
2220 | !kvm_exception_is_soft(vcpu->arch.exception.nr); | ||
2109 | events->exception.nr = vcpu->arch.exception.nr; | 2221 | events->exception.nr = vcpu->arch.exception.nr; |
2110 | events->exception.has_error_code = vcpu->arch.exception.has_error_code; | 2222 | events->exception.has_error_code = vcpu->arch.exception.has_error_code; |
2111 | events->exception.error_code = vcpu->arch.exception.error_code; | 2223 | events->exception.error_code = vcpu->arch.exception.error_code; |
2112 | 2224 | ||
2113 | events->interrupt.injected = vcpu->arch.interrupt.pending; | 2225 | events->interrupt.injected = |
2226 | vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft; | ||
2114 | events->interrupt.nr = vcpu->arch.interrupt.nr; | 2227 | events->interrupt.nr = vcpu->arch.interrupt.nr; |
2115 | events->interrupt.soft = vcpu->arch.interrupt.soft; | 2228 | events->interrupt.soft = 0; |
2229 | events->interrupt.shadow = | ||
2230 | kvm_x86_ops->get_interrupt_shadow(vcpu, | ||
2231 | KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI); | ||
2116 | 2232 | ||
2117 | events->nmi.injected = vcpu->arch.nmi_injected; | 2233 | events->nmi.injected = vcpu->arch.nmi_injected; |
2118 | events->nmi.pending = vcpu->arch.nmi_pending; | 2234 | events->nmi.pending = vcpu->arch.nmi_pending; |
@@ -2121,7 +2237,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | |||
2121 | events->sipi_vector = vcpu->arch.sipi_vector; | 2237 | events->sipi_vector = vcpu->arch.sipi_vector; |
2122 | 2238 | ||
2123 | events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING | 2239 | events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING |
2124 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR); | 2240 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR |
2241 | | KVM_VCPUEVENT_VALID_SHADOW); | ||
2125 | 2242 | ||
2126 | vcpu_put(vcpu); | 2243 | vcpu_put(vcpu); |
2127 | } | 2244 | } |
@@ -2130,7 +2247,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
2130 | struct kvm_vcpu_events *events) | 2247 | struct kvm_vcpu_events *events) |
2131 | { | 2248 | { |
2132 | if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING | 2249 | if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING |
2133 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR)) | 2250 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR |
2251 | | KVM_VCPUEVENT_VALID_SHADOW)) | ||
2134 | return -EINVAL; | 2252 | return -EINVAL; |
2135 | 2253 | ||
2136 | vcpu_load(vcpu); | 2254 | vcpu_load(vcpu); |
@@ -2145,6 +2263,9 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
2145 | vcpu->arch.interrupt.soft = events->interrupt.soft; | 2263 | vcpu->arch.interrupt.soft = events->interrupt.soft; |
2146 | if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) | 2264 | if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) |
2147 | kvm_pic_clear_isr_ack(vcpu->kvm); | 2265 | kvm_pic_clear_isr_ack(vcpu->kvm); |
2266 | if (events->flags & KVM_VCPUEVENT_VALID_SHADOW) | ||
2267 | kvm_x86_ops->set_interrupt_shadow(vcpu, | ||
2268 | events->interrupt.shadow); | ||
2148 | 2269 | ||
2149 | vcpu->arch.nmi_injected = events->nmi.injected; | 2270 | vcpu->arch.nmi_injected = events->nmi.injected; |
2150 | if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) | 2271 | if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) |
@@ -2159,6 +2280,36 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
2159 | return 0; | 2280 | return 0; |
2160 | } | 2281 | } |
2161 | 2282 | ||
2283 | static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, | ||
2284 | struct kvm_debugregs *dbgregs) | ||
2285 | { | ||
2286 | vcpu_load(vcpu); | ||
2287 | |||
2288 | memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); | ||
2289 | dbgregs->dr6 = vcpu->arch.dr6; | ||
2290 | dbgregs->dr7 = vcpu->arch.dr7; | ||
2291 | dbgregs->flags = 0; | ||
2292 | |||
2293 | vcpu_put(vcpu); | ||
2294 | } | ||
2295 | |||
2296 | static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, | ||
2297 | struct kvm_debugregs *dbgregs) | ||
2298 | { | ||
2299 | if (dbgregs->flags) | ||
2300 | return -EINVAL; | ||
2301 | |||
2302 | vcpu_load(vcpu); | ||
2303 | |||
2304 | memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); | ||
2305 | vcpu->arch.dr6 = dbgregs->dr6; | ||
2306 | vcpu->arch.dr7 = dbgregs->dr7; | ||
2307 | |||
2308 | vcpu_put(vcpu); | ||
2309 | |||
2310 | return 0; | ||
2311 | } | ||
2312 | |||
2162 | long kvm_arch_vcpu_ioctl(struct file *filp, | 2313 | long kvm_arch_vcpu_ioctl(struct file *filp, |
2163 | unsigned int ioctl, unsigned long arg) | 2314 | unsigned int ioctl, unsigned long arg) |
2164 | { | 2315 | { |
@@ -2313,7 +2464,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
2313 | r = -EFAULT; | 2464 | r = -EFAULT; |
2314 | if (copy_from_user(&mce, argp, sizeof mce)) | 2465 | if (copy_from_user(&mce, argp, sizeof mce)) |
2315 | goto out; | 2466 | goto out; |
2467 | vcpu_load(vcpu); | ||
2316 | r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); | 2468 | r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); |
2469 | vcpu_put(vcpu); | ||
2317 | break; | 2470 | break; |
2318 | } | 2471 | } |
2319 | case KVM_GET_VCPU_EVENTS: { | 2472 | case KVM_GET_VCPU_EVENTS: { |
@@ -2337,6 +2490,29 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
2337 | r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); | 2490 | r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); |
2338 | break; | 2491 | break; |
2339 | } | 2492 | } |
2493 | case KVM_GET_DEBUGREGS: { | ||
2494 | struct kvm_debugregs dbgregs; | ||
2495 | |||
2496 | kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs); | ||
2497 | |||
2498 | r = -EFAULT; | ||
2499 | if (copy_to_user(argp, &dbgregs, | ||
2500 | sizeof(struct kvm_debugregs))) | ||
2501 | break; | ||
2502 | r = 0; | ||
2503 | break; | ||
2504 | } | ||
2505 | case KVM_SET_DEBUGREGS: { | ||
2506 | struct kvm_debugregs dbgregs; | ||
2507 | |||
2508 | r = -EFAULT; | ||
2509 | if (copy_from_user(&dbgregs, argp, | ||
2510 | sizeof(struct kvm_debugregs))) | ||
2511 | break; | ||
2512 | |||
2513 | r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs); | ||
2514 | break; | ||
2515 | } | ||
2340 | default: | 2516 | default: |
2341 | r = -EINVAL; | 2517 | r = -EINVAL; |
2342 | } | 2518 | } |
@@ -2390,7 +2566,7 @@ gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn) | |||
2390 | struct kvm_mem_alias *alias; | 2566 | struct kvm_mem_alias *alias; |
2391 | struct kvm_mem_aliases *aliases; | 2567 | struct kvm_mem_aliases *aliases; |
2392 | 2568 | ||
2393 | aliases = rcu_dereference(kvm->arch.aliases); | 2569 | aliases = kvm_aliases(kvm); |
2394 | 2570 | ||
2395 | for (i = 0; i < aliases->naliases; ++i) { | 2571 | for (i = 0; i < aliases->naliases; ++i) { |
2396 | alias = &aliases->aliases[i]; | 2572 | alias = &aliases->aliases[i]; |
@@ -2409,7 +2585,7 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) | |||
2409 | struct kvm_mem_alias *alias; | 2585 | struct kvm_mem_alias *alias; |
2410 | struct kvm_mem_aliases *aliases; | 2586 | struct kvm_mem_aliases *aliases; |
2411 | 2587 | ||
2412 | aliases = rcu_dereference(kvm->arch.aliases); | 2588 | aliases = kvm_aliases(kvm); |
2413 | 2589 | ||
2414 | for (i = 0; i < aliases->naliases; ++i) { | 2590 | for (i = 0; i < aliases->naliases; ++i) { |
2415 | alias = &aliases->aliases[i]; | 2591 | alias = &aliases->aliases[i]; |
@@ -2804,11 +2980,13 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2804 | r = -EFAULT; | 2980 | r = -EFAULT; |
2805 | if (copy_from_user(&irq_event, argp, sizeof irq_event)) | 2981 | if (copy_from_user(&irq_event, argp, sizeof irq_event)) |
2806 | goto out; | 2982 | goto out; |
2983 | r = -ENXIO; | ||
2807 | if (irqchip_in_kernel(kvm)) { | 2984 | if (irqchip_in_kernel(kvm)) { |
2808 | __s32 status; | 2985 | __s32 status; |
2809 | status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, | 2986 | status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, |
2810 | irq_event.irq, irq_event.level); | 2987 | irq_event.irq, irq_event.level); |
2811 | if (ioctl == KVM_IRQ_LINE_STATUS) { | 2988 | if (ioctl == KVM_IRQ_LINE_STATUS) { |
2989 | r = -EFAULT; | ||
2812 | irq_event.status = status; | 2990 | irq_event.status = status; |
2813 | if (copy_to_user(argp, &irq_event, | 2991 | if (copy_to_user(argp, &irq_event, |
2814 | sizeof irq_event)) | 2992 | sizeof irq_event)) |
@@ -3024,6 +3202,18 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) | |||
3024 | return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); | 3202 | return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); |
3025 | } | 3203 | } |
3026 | 3204 | ||
3205 | static void kvm_set_segment(struct kvm_vcpu *vcpu, | ||
3206 | struct kvm_segment *var, int seg) | ||
3207 | { | ||
3208 | kvm_x86_ops->set_segment(vcpu, var, seg); | ||
3209 | } | ||
3210 | |||
3211 | void kvm_get_segment(struct kvm_vcpu *vcpu, | ||
3212 | struct kvm_segment *var, int seg) | ||
3213 | { | ||
3214 | kvm_x86_ops->get_segment(vcpu, var, seg); | ||
3215 | } | ||
3216 | |||
3027 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) | 3217 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) |
3028 | { | 3218 | { |
3029 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | 3219 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; |
@@ -3104,14 +3294,17 @@ static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes, | |||
3104 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error); | 3294 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error); |
3105 | } | 3295 | } |
3106 | 3296 | ||
3107 | static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, | 3297 | static int kvm_write_guest_virt_system(gva_t addr, void *val, |
3108 | struct kvm_vcpu *vcpu, u32 *error) | 3298 | unsigned int bytes, |
3299 | struct kvm_vcpu *vcpu, | ||
3300 | u32 *error) | ||
3109 | { | 3301 | { |
3110 | void *data = val; | 3302 | void *data = val; |
3111 | int r = X86EMUL_CONTINUE; | 3303 | int r = X86EMUL_CONTINUE; |
3112 | 3304 | ||
3113 | while (bytes) { | 3305 | while (bytes) { |
3114 | gpa_t gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error); | 3306 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr, |
3307 | PFERR_WRITE_MASK, error); | ||
3115 | unsigned offset = addr & (PAGE_SIZE-1); | 3308 | unsigned offset = addr & (PAGE_SIZE-1); |
3116 | unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); | 3309 | unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); |
3117 | int ret; | 3310 | int ret; |
@@ -3134,7 +3327,6 @@ out: | |||
3134 | return r; | 3327 | return r; |
3135 | } | 3328 | } |
3136 | 3329 | ||
3137 | |||
3138 | static int emulator_read_emulated(unsigned long addr, | 3330 | static int emulator_read_emulated(unsigned long addr, |
3139 | void *val, | 3331 | void *val, |
3140 | unsigned int bytes, | 3332 | unsigned int bytes, |
@@ -3237,9 +3429,9 @@ mmio: | |||
3237 | } | 3429 | } |
3238 | 3430 | ||
3239 | int emulator_write_emulated(unsigned long addr, | 3431 | int emulator_write_emulated(unsigned long addr, |
3240 | const void *val, | 3432 | const void *val, |
3241 | unsigned int bytes, | 3433 | unsigned int bytes, |
3242 | struct kvm_vcpu *vcpu) | 3434 | struct kvm_vcpu *vcpu) |
3243 | { | 3435 | { |
3244 | /* Crossing a page boundary? */ | 3436 | /* Crossing a page boundary? */ |
3245 | if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { | 3437 | if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { |
@@ -3257,45 +3449,150 @@ int emulator_write_emulated(unsigned long addr, | |||
3257 | } | 3449 | } |
3258 | EXPORT_SYMBOL_GPL(emulator_write_emulated); | 3450 | EXPORT_SYMBOL_GPL(emulator_write_emulated); |
3259 | 3451 | ||
3452 | #define CMPXCHG_TYPE(t, ptr, old, new) \ | ||
3453 | (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old)) | ||
3454 | |||
3455 | #ifdef CONFIG_X86_64 | ||
3456 | # define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new) | ||
3457 | #else | ||
3458 | # define CMPXCHG64(ptr, old, new) \ | ||
3459 | (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old)) | ||
3460 | #endif | ||
3461 | |||
3260 | static int emulator_cmpxchg_emulated(unsigned long addr, | 3462 | static int emulator_cmpxchg_emulated(unsigned long addr, |
3261 | const void *old, | 3463 | const void *old, |
3262 | const void *new, | 3464 | const void *new, |
3263 | unsigned int bytes, | 3465 | unsigned int bytes, |
3264 | struct kvm_vcpu *vcpu) | 3466 | struct kvm_vcpu *vcpu) |
3265 | { | 3467 | { |
3266 | printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); | 3468 | gpa_t gpa; |
3267 | #ifndef CONFIG_X86_64 | 3469 | struct page *page; |
3268 | /* guests cmpxchg8b have to be emulated atomically */ | 3470 | char *kaddr; |
3269 | if (bytes == 8) { | 3471 | bool exchanged; |
3270 | gpa_t gpa; | ||
3271 | struct page *page; | ||
3272 | char *kaddr; | ||
3273 | u64 val; | ||
3274 | 3472 | ||
3275 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); | 3473 | /* guests cmpxchg8b have to be emulated atomically */ |
3474 | if (bytes > 8 || (bytes & (bytes - 1))) | ||
3475 | goto emul_write; | ||
3276 | 3476 | ||
3277 | if (gpa == UNMAPPED_GVA || | 3477 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); |
3278 | (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | ||
3279 | goto emul_write; | ||
3280 | 3478 | ||
3281 | if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK)) | 3479 | if (gpa == UNMAPPED_GVA || |
3282 | goto emul_write; | 3480 | (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) |
3481 | goto emul_write; | ||
3283 | 3482 | ||
3284 | val = *(u64 *)new; | 3483 | if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK)) |
3484 | goto emul_write; | ||
3285 | 3485 | ||
3286 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 3486 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
3287 | 3487 | ||
3288 | kaddr = kmap_atomic(page, KM_USER0); | 3488 | kaddr = kmap_atomic(page, KM_USER0); |
3289 | set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); | 3489 | kaddr += offset_in_page(gpa); |
3290 | kunmap_atomic(kaddr, KM_USER0); | 3490 | switch (bytes) { |
3291 | kvm_release_page_dirty(page); | 3491 | case 1: |
3492 | exchanged = CMPXCHG_TYPE(u8, kaddr, old, new); | ||
3493 | break; | ||
3494 | case 2: | ||
3495 | exchanged = CMPXCHG_TYPE(u16, kaddr, old, new); | ||
3496 | break; | ||
3497 | case 4: | ||
3498 | exchanged = CMPXCHG_TYPE(u32, kaddr, old, new); | ||
3499 | break; | ||
3500 | case 8: | ||
3501 | exchanged = CMPXCHG64(kaddr, old, new); | ||
3502 | break; | ||
3503 | default: | ||
3504 | BUG(); | ||
3292 | } | 3505 | } |
3506 | kunmap_atomic(kaddr, KM_USER0); | ||
3507 | kvm_release_page_dirty(page); | ||
3508 | |||
3509 | if (!exchanged) | ||
3510 | return X86EMUL_CMPXCHG_FAILED; | ||
3511 | |||
3512 | kvm_mmu_pte_write(vcpu, gpa, new, bytes, 1); | ||
3513 | |||
3514 | return X86EMUL_CONTINUE; | ||
3515 | |||
3293 | emul_write: | 3516 | emul_write: |
3294 | #endif | 3517 | printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); |
3295 | 3518 | ||
3296 | return emulator_write_emulated(addr, new, bytes, vcpu); | 3519 | return emulator_write_emulated(addr, new, bytes, vcpu); |
3297 | } | 3520 | } |
3298 | 3521 | ||
3522 | static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) | ||
3523 | { | ||
3524 | /* TODO: String I/O for in kernel device */ | ||
3525 | int r; | ||
3526 | |||
3527 | if (vcpu->arch.pio.in) | ||
3528 | r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port, | ||
3529 | vcpu->arch.pio.size, pd); | ||
3530 | else | ||
3531 | r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, | ||
3532 | vcpu->arch.pio.port, vcpu->arch.pio.size, | ||
3533 | pd); | ||
3534 | return r; | ||
3535 | } | ||
3536 | |||
3537 | |||
3538 | static int emulator_pio_in_emulated(int size, unsigned short port, void *val, | ||
3539 | unsigned int count, struct kvm_vcpu *vcpu) | ||
3540 | { | ||
3541 | if (vcpu->arch.pio.count) | ||
3542 | goto data_avail; | ||
3543 | |||
3544 | trace_kvm_pio(1, port, size, 1); | ||
3545 | |||
3546 | vcpu->arch.pio.port = port; | ||
3547 | vcpu->arch.pio.in = 1; | ||
3548 | vcpu->arch.pio.count = count; | ||
3549 | vcpu->arch.pio.size = size; | ||
3550 | |||
3551 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { | ||
3552 | data_avail: | ||
3553 | memcpy(val, vcpu->arch.pio_data, size * count); | ||
3554 | vcpu->arch.pio.count = 0; | ||
3555 | return 1; | ||
3556 | } | ||
3557 | |||
3558 | vcpu->run->exit_reason = KVM_EXIT_IO; | ||
3559 | vcpu->run->io.direction = KVM_EXIT_IO_IN; | ||
3560 | vcpu->run->io.size = size; | ||
3561 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
3562 | vcpu->run->io.count = count; | ||
3563 | vcpu->run->io.port = port; | ||
3564 | |||
3565 | return 0; | ||
3566 | } | ||
3567 | |||
3568 | static int emulator_pio_out_emulated(int size, unsigned short port, | ||
3569 | const void *val, unsigned int count, | ||
3570 | struct kvm_vcpu *vcpu) | ||
3571 | { | ||
3572 | trace_kvm_pio(0, port, size, 1); | ||
3573 | |||
3574 | vcpu->arch.pio.port = port; | ||
3575 | vcpu->arch.pio.in = 0; | ||
3576 | vcpu->arch.pio.count = count; | ||
3577 | vcpu->arch.pio.size = size; | ||
3578 | |||
3579 | memcpy(vcpu->arch.pio_data, val, size * count); | ||
3580 | |||
3581 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { | ||
3582 | vcpu->arch.pio.count = 0; | ||
3583 | return 1; | ||
3584 | } | ||
3585 | |||
3586 | vcpu->run->exit_reason = KVM_EXIT_IO; | ||
3587 | vcpu->run->io.direction = KVM_EXIT_IO_OUT; | ||
3588 | vcpu->run->io.size = size; | ||
3589 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
3590 | vcpu->run->io.count = count; | ||
3591 | vcpu->run->io.port = port; | ||
3592 | |||
3593 | return 0; | ||
3594 | } | ||
3595 | |||
3299 | static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) | 3596 | static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) |
3300 | { | 3597 | { |
3301 | return kvm_x86_ops->get_segment_base(vcpu, seg); | 3598 | return kvm_x86_ops->get_segment_base(vcpu, seg); |
@@ -3316,14 +3613,14 @@ int emulate_clts(struct kvm_vcpu *vcpu) | |||
3316 | 3613 | ||
3317 | int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) | 3614 | int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) |
3318 | { | 3615 | { |
3319 | return kvm_x86_ops->get_dr(ctxt->vcpu, dr, dest); | 3616 | return kvm_get_dr(ctxt->vcpu, dr, dest); |
3320 | } | 3617 | } |
3321 | 3618 | ||
3322 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) | 3619 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) |
3323 | { | 3620 | { |
3324 | unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; | 3621 | unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; |
3325 | 3622 | ||
3326 | return kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask); | 3623 | return kvm_set_dr(ctxt->vcpu, dr, value & mask); |
3327 | } | 3624 | } |
3328 | 3625 | ||
3329 | void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) | 3626 | void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) |
@@ -3344,12 +3641,167 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) | |||
3344 | } | 3641 | } |
3345 | EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); | 3642 | EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); |
3346 | 3643 | ||
3644 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) | ||
3645 | { | ||
3646 | return (curr_cr & ~((1ULL << 32) - 1)) | new_val; | ||
3647 | } | ||
3648 | |||
3649 | static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) | ||
3650 | { | ||
3651 | unsigned long value; | ||
3652 | |||
3653 | switch (cr) { | ||
3654 | case 0: | ||
3655 | value = kvm_read_cr0(vcpu); | ||
3656 | break; | ||
3657 | case 2: | ||
3658 | value = vcpu->arch.cr2; | ||
3659 | break; | ||
3660 | case 3: | ||
3661 | value = vcpu->arch.cr3; | ||
3662 | break; | ||
3663 | case 4: | ||
3664 | value = kvm_read_cr4(vcpu); | ||
3665 | break; | ||
3666 | case 8: | ||
3667 | value = kvm_get_cr8(vcpu); | ||
3668 | break; | ||
3669 | default: | ||
3670 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | ||
3671 | return 0; | ||
3672 | } | ||
3673 | |||
3674 | return value; | ||
3675 | } | ||
3676 | |||
3677 | static void emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) | ||
3678 | { | ||
3679 | switch (cr) { | ||
3680 | case 0: | ||
3681 | kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); | ||
3682 | break; | ||
3683 | case 2: | ||
3684 | vcpu->arch.cr2 = val; | ||
3685 | break; | ||
3686 | case 3: | ||
3687 | kvm_set_cr3(vcpu, val); | ||
3688 | break; | ||
3689 | case 4: | ||
3690 | kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); | ||
3691 | break; | ||
3692 | case 8: | ||
3693 | kvm_set_cr8(vcpu, val & 0xfUL); | ||
3694 | break; | ||
3695 | default: | ||
3696 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | ||
3697 | } | ||
3698 | } | ||
3699 | |||
3700 | static int emulator_get_cpl(struct kvm_vcpu *vcpu) | ||
3701 | { | ||
3702 | return kvm_x86_ops->get_cpl(vcpu); | ||
3703 | } | ||
3704 | |||
3705 | static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) | ||
3706 | { | ||
3707 | kvm_x86_ops->get_gdt(vcpu, dt); | ||
3708 | } | ||
3709 | |||
3710 | static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg, | ||
3711 | struct kvm_vcpu *vcpu) | ||
3712 | { | ||
3713 | struct kvm_segment var; | ||
3714 | |||
3715 | kvm_get_segment(vcpu, &var, seg); | ||
3716 | |||
3717 | if (var.unusable) | ||
3718 | return false; | ||
3719 | |||
3720 | if (var.g) | ||
3721 | var.limit >>= 12; | ||
3722 | set_desc_limit(desc, var.limit); | ||
3723 | set_desc_base(desc, (unsigned long)var.base); | ||
3724 | desc->type = var.type; | ||
3725 | desc->s = var.s; | ||
3726 | desc->dpl = var.dpl; | ||
3727 | desc->p = var.present; | ||
3728 | desc->avl = var.avl; | ||
3729 | desc->l = var.l; | ||
3730 | desc->d = var.db; | ||
3731 | desc->g = var.g; | ||
3732 | |||
3733 | return true; | ||
3734 | } | ||
3735 | |||
3736 | static void emulator_set_cached_descriptor(struct desc_struct *desc, int seg, | ||
3737 | struct kvm_vcpu *vcpu) | ||
3738 | { | ||
3739 | struct kvm_segment var; | ||
3740 | |||
3741 | /* needed to preserve selector */ | ||
3742 | kvm_get_segment(vcpu, &var, seg); | ||
3743 | |||
3744 | var.base = get_desc_base(desc); | ||
3745 | var.limit = get_desc_limit(desc); | ||
3746 | if (desc->g) | ||
3747 | var.limit = (var.limit << 12) | 0xfff; | ||
3748 | var.type = desc->type; | ||
3749 | var.present = desc->p; | ||
3750 | var.dpl = desc->dpl; | ||
3751 | var.db = desc->d; | ||
3752 | var.s = desc->s; | ||
3753 | var.l = desc->l; | ||
3754 | var.g = desc->g; | ||
3755 | var.avl = desc->avl; | ||
3756 | var.present = desc->p; | ||
3757 | var.unusable = !var.present; | ||
3758 | var.padding = 0; | ||
3759 | |||
3760 | kvm_set_segment(vcpu, &var, seg); | ||
3761 | return; | ||
3762 | } | ||
3763 | |||
3764 | static u16 emulator_get_segment_selector(int seg, struct kvm_vcpu *vcpu) | ||
3765 | { | ||
3766 | struct kvm_segment kvm_seg; | ||
3767 | |||
3768 | kvm_get_segment(vcpu, &kvm_seg, seg); | ||
3769 | return kvm_seg.selector; | ||
3770 | } | ||
3771 | |||
3772 | static void emulator_set_segment_selector(u16 sel, int seg, | ||
3773 | struct kvm_vcpu *vcpu) | ||
3774 | { | ||
3775 | struct kvm_segment kvm_seg; | ||
3776 | |||
3777 | kvm_get_segment(vcpu, &kvm_seg, seg); | ||
3778 | kvm_seg.selector = sel; | ||
3779 | kvm_set_segment(vcpu, &kvm_seg, seg); | ||
3780 | } | ||
3781 | |||
3782 | static void emulator_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | ||
3783 | { | ||
3784 | kvm_x86_ops->set_rflags(vcpu, rflags); | ||
3785 | } | ||
3786 | |||
3347 | static struct x86_emulate_ops emulate_ops = { | 3787 | static struct x86_emulate_ops emulate_ops = { |
3348 | .read_std = kvm_read_guest_virt_system, | 3788 | .read_std = kvm_read_guest_virt_system, |
3789 | .write_std = kvm_write_guest_virt_system, | ||
3349 | .fetch = kvm_fetch_guest_virt, | 3790 | .fetch = kvm_fetch_guest_virt, |
3350 | .read_emulated = emulator_read_emulated, | 3791 | .read_emulated = emulator_read_emulated, |
3351 | .write_emulated = emulator_write_emulated, | 3792 | .write_emulated = emulator_write_emulated, |
3352 | .cmpxchg_emulated = emulator_cmpxchg_emulated, | 3793 | .cmpxchg_emulated = emulator_cmpxchg_emulated, |
3794 | .pio_in_emulated = emulator_pio_in_emulated, | ||
3795 | .pio_out_emulated = emulator_pio_out_emulated, | ||
3796 | .get_cached_descriptor = emulator_get_cached_descriptor, | ||
3797 | .set_cached_descriptor = emulator_set_cached_descriptor, | ||
3798 | .get_segment_selector = emulator_get_segment_selector, | ||
3799 | .set_segment_selector = emulator_set_segment_selector, | ||
3800 | .get_gdt = emulator_get_gdt, | ||
3801 | .get_cr = emulator_get_cr, | ||
3802 | .set_cr = emulator_set_cr, | ||
3803 | .cpl = emulator_get_cpl, | ||
3804 | .set_rflags = emulator_set_rflags, | ||
3353 | }; | 3805 | }; |
3354 | 3806 | ||
3355 | static void cache_all_regs(struct kvm_vcpu *vcpu) | 3807 | static void cache_all_regs(struct kvm_vcpu *vcpu) |
@@ -3380,14 +3832,14 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
3380 | cache_all_regs(vcpu); | 3832 | cache_all_regs(vcpu); |
3381 | 3833 | ||
3382 | vcpu->mmio_is_write = 0; | 3834 | vcpu->mmio_is_write = 0; |
3383 | vcpu->arch.pio.string = 0; | ||
3384 | 3835 | ||
3385 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { | 3836 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { |
3386 | int cs_db, cs_l; | 3837 | int cs_db, cs_l; |
3387 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | 3838 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
3388 | 3839 | ||
3389 | vcpu->arch.emulate_ctxt.vcpu = vcpu; | 3840 | vcpu->arch.emulate_ctxt.vcpu = vcpu; |
3390 | vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); | 3841 | vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); |
3842 | vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); | ||
3391 | vcpu->arch.emulate_ctxt.mode = | 3843 | vcpu->arch.emulate_ctxt.mode = |
3392 | (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : | 3844 | (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : |
3393 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) | 3845 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) |
@@ -3396,6 +3848,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
3396 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; | 3848 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; |
3397 | 3849 | ||
3398 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); | 3850 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); |
3851 | trace_kvm_emulate_insn_start(vcpu); | ||
3399 | 3852 | ||
3400 | /* Only allow emulation of specific instructions on #UD | 3853 | /* Only allow emulation of specific instructions on #UD |
3401 | * (namely VMMCALL, sysenter, sysexit, syscall)*/ | 3854 | * (namely VMMCALL, sysenter, sysexit, syscall)*/ |
@@ -3428,6 +3881,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
3428 | ++vcpu->stat.insn_emulation; | 3881 | ++vcpu->stat.insn_emulation; |
3429 | if (r) { | 3882 | if (r) { |
3430 | ++vcpu->stat.insn_emulation_fail; | 3883 | ++vcpu->stat.insn_emulation_fail; |
3884 | trace_kvm_emulate_insn_failed(vcpu); | ||
3431 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) | 3885 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) |
3432 | return EMULATE_DONE; | 3886 | return EMULATE_DONE; |
3433 | return EMULATE_FAIL; | 3887 | return EMULATE_FAIL; |
@@ -3439,16 +3893,20 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
3439 | return EMULATE_DONE; | 3893 | return EMULATE_DONE; |
3440 | } | 3894 | } |
3441 | 3895 | ||
3896 | restart: | ||
3442 | r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); | 3897 | r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); |
3443 | shadow_mask = vcpu->arch.emulate_ctxt.interruptibility; | 3898 | shadow_mask = vcpu->arch.emulate_ctxt.interruptibility; |
3444 | 3899 | ||
3445 | if (r == 0) | 3900 | if (r == 0) |
3446 | kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask); | 3901 | kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask); |
3447 | 3902 | ||
3448 | if (vcpu->arch.pio.string) | 3903 | if (vcpu->arch.pio.count) { |
3904 | if (!vcpu->arch.pio.in) | ||
3905 | vcpu->arch.pio.count = 0; | ||
3449 | return EMULATE_DO_MMIO; | 3906 | return EMULATE_DO_MMIO; |
3907 | } | ||
3450 | 3908 | ||
3451 | if ((r || vcpu->mmio_is_write) && run) { | 3909 | if (r || vcpu->mmio_is_write) { |
3452 | run->exit_reason = KVM_EXIT_MMIO; | 3910 | run->exit_reason = KVM_EXIT_MMIO; |
3453 | run->mmio.phys_addr = vcpu->mmio_phys_addr; | 3911 | run->mmio.phys_addr = vcpu->mmio_phys_addr; |
3454 | memcpy(run->mmio.data, vcpu->mmio_data, 8); | 3912 | memcpy(run->mmio.data, vcpu->mmio_data, 8); |
@@ -3458,222 +3916,41 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
3458 | 3916 | ||
3459 | if (r) { | 3917 | if (r) { |
3460 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) | 3918 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) |
3461 | return EMULATE_DONE; | 3919 | goto done; |
3462 | if (!vcpu->mmio_needed) { | 3920 | if (!vcpu->mmio_needed) { |
3921 | ++vcpu->stat.insn_emulation_fail; | ||
3922 | trace_kvm_emulate_insn_failed(vcpu); | ||
3463 | kvm_report_emulation_failure(vcpu, "mmio"); | 3923 | kvm_report_emulation_failure(vcpu, "mmio"); |
3464 | return EMULATE_FAIL; | 3924 | return EMULATE_FAIL; |
3465 | } | 3925 | } |
3466 | return EMULATE_DO_MMIO; | 3926 | return EMULATE_DO_MMIO; |
3467 | } | 3927 | } |
3468 | 3928 | ||
3469 | kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); | ||
3470 | |||
3471 | if (vcpu->mmio_is_write) { | 3929 | if (vcpu->mmio_is_write) { |
3472 | vcpu->mmio_needed = 0; | 3930 | vcpu->mmio_needed = 0; |
3473 | return EMULATE_DO_MMIO; | 3931 | return EMULATE_DO_MMIO; |
3474 | } | 3932 | } |
3475 | 3933 | ||
3476 | return EMULATE_DONE; | 3934 | done: |
3477 | } | 3935 | if (vcpu->arch.exception.pending) |
3478 | EXPORT_SYMBOL_GPL(emulate_instruction); | 3936 | vcpu->arch.emulate_ctxt.restart = false; |
3479 | |||
3480 | static int pio_copy_data(struct kvm_vcpu *vcpu) | ||
3481 | { | ||
3482 | void *p = vcpu->arch.pio_data; | ||
3483 | gva_t q = vcpu->arch.pio.guest_gva; | ||
3484 | unsigned bytes; | ||
3485 | int ret; | ||
3486 | u32 error_code; | ||
3487 | |||
3488 | bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count; | ||
3489 | if (vcpu->arch.pio.in) | ||
3490 | ret = kvm_write_guest_virt(q, p, bytes, vcpu, &error_code); | ||
3491 | else | ||
3492 | ret = kvm_read_guest_virt(q, p, bytes, vcpu, &error_code); | ||
3493 | |||
3494 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
3495 | kvm_inject_page_fault(vcpu, q, error_code); | ||
3496 | |||
3497 | return ret; | ||
3498 | } | ||
3499 | |||
3500 | int complete_pio(struct kvm_vcpu *vcpu) | ||
3501 | { | ||
3502 | struct kvm_pio_request *io = &vcpu->arch.pio; | ||
3503 | long delta; | ||
3504 | int r; | ||
3505 | unsigned long val; | ||
3506 | |||
3507 | if (!io->string) { | ||
3508 | if (io->in) { | ||
3509 | val = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
3510 | memcpy(&val, vcpu->arch.pio_data, io->size); | ||
3511 | kvm_register_write(vcpu, VCPU_REGS_RAX, val); | ||
3512 | } | ||
3513 | } else { | ||
3514 | if (io->in) { | ||
3515 | r = pio_copy_data(vcpu); | ||
3516 | if (r) | ||
3517 | goto out; | ||
3518 | } | ||
3519 | |||
3520 | delta = 1; | ||
3521 | if (io->rep) { | ||
3522 | delta *= io->cur_count; | ||
3523 | /* | ||
3524 | * The size of the register should really depend on | ||
3525 | * current address size. | ||
3526 | */ | ||
3527 | val = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
3528 | val -= delta; | ||
3529 | kvm_register_write(vcpu, VCPU_REGS_RCX, val); | ||
3530 | } | ||
3531 | if (io->down) | ||
3532 | delta = -delta; | ||
3533 | delta *= io->size; | ||
3534 | if (io->in) { | ||
3535 | val = kvm_register_read(vcpu, VCPU_REGS_RDI); | ||
3536 | val += delta; | ||
3537 | kvm_register_write(vcpu, VCPU_REGS_RDI, val); | ||
3538 | } else { | ||
3539 | val = kvm_register_read(vcpu, VCPU_REGS_RSI); | ||
3540 | val += delta; | ||
3541 | kvm_register_write(vcpu, VCPU_REGS_RSI, val); | ||
3542 | } | ||
3543 | } | ||
3544 | out: | ||
3545 | io->count -= io->cur_count; | ||
3546 | io->cur_count = 0; | ||
3547 | |||
3548 | return 0; | ||
3549 | } | ||
3550 | |||
3551 | static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) | ||
3552 | { | ||
3553 | /* TODO: String I/O for in kernel device */ | ||
3554 | int r; | ||
3555 | |||
3556 | if (vcpu->arch.pio.in) | ||
3557 | r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port, | ||
3558 | vcpu->arch.pio.size, pd); | ||
3559 | else | ||
3560 | r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, | ||
3561 | vcpu->arch.pio.port, vcpu->arch.pio.size, | ||
3562 | pd); | ||
3563 | return r; | ||
3564 | } | ||
3565 | 3937 | ||
3566 | static int pio_string_write(struct kvm_vcpu *vcpu) | 3938 | if (vcpu->arch.emulate_ctxt.restart) |
3567 | { | 3939 | goto restart; |
3568 | struct kvm_pio_request *io = &vcpu->arch.pio; | ||
3569 | void *pd = vcpu->arch.pio_data; | ||
3570 | int i, r = 0; | ||
3571 | 3940 | ||
3572 | for (i = 0; i < io->cur_count; i++) { | 3941 | return EMULATE_DONE; |
3573 | if (kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, | ||
3574 | io->port, io->size, pd)) { | ||
3575 | r = -EOPNOTSUPP; | ||
3576 | break; | ||
3577 | } | ||
3578 | pd += io->size; | ||
3579 | } | ||
3580 | return r; | ||
3581 | } | ||
3582 | |||
3583 | int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port) | ||
3584 | { | ||
3585 | unsigned long val; | ||
3586 | |||
3587 | trace_kvm_pio(!in, port, size, 1); | ||
3588 | |||
3589 | vcpu->run->exit_reason = KVM_EXIT_IO; | ||
3590 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; | ||
3591 | vcpu->run->io.size = vcpu->arch.pio.size = size; | ||
3592 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
3593 | vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = 1; | ||
3594 | vcpu->run->io.port = vcpu->arch.pio.port = port; | ||
3595 | vcpu->arch.pio.in = in; | ||
3596 | vcpu->arch.pio.string = 0; | ||
3597 | vcpu->arch.pio.down = 0; | ||
3598 | vcpu->arch.pio.rep = 0; | ||
3599 | |||
3600 | if (!vcpu->arch.pio.in) { | ||
3601 | val = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
3602 | memcpy(vcpu->arch.pio_data, &val, 4); | ||
3603 | } | ||
3604 | |||
3605 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { | ||
3606 | complete_pio(vcpu); | ||
3607 | return 1; | ||
3608 | } | ||
3609 | return 0; | ||
3610 | } | 3942 | } |
3611 | EXPORT_SYMBOL_GPL(kvm_emulate_pio); | 3943 | EXPORT_SYMBOL_GPL(emulate_instruction); |
3612 | 3944 | ||
3613 | int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, | 3945 | int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) |
3614 | int size, unsigned long count, int down, | ||
3615 | gva_t address, int rep, unsigned port) | ||
3616 | { | 3946 | { |
3617 | unsigned now, in_page; | 3947 | unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); |
3618 | int ret = 0; | 3948 | int ret = emulator_pio_out_emulated(size, port, &val, 1, vcpu); |
3619 | 3949 | /* do not return to emulator after return from userspace */ | |
3620 | trace_kvm_pio(!in, port, size, count); | 3950 | vcpu->arch.pio.count = 0; |
3621 | |||
3622 | vcpu->run->exit_reason = KVM_EXIT_IO; | ||
3623 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; | ||
3624 | vcpu->run->io.size = vcpu->arch.pio.size = size; | ||
3625 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
3626 | vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = count; | ||
3627 | vcpu->run->io.port = vcpu->arch.pio.port = port; | ||
3628 | vcpu->arch.pio.in = in; | ||
3629 | vcpu->arch.pio.string = 1; | ||
3630 | vcpu->arch.pio.down = down; | ||
3631 | vcpu->arch.pio.rep = rep; | ||
3632 | |||
3633 | if (!count) { | ||
3634 | kvm_x86_ops->skip_emulated_instruction(vcpu); | ||
3635 | return 1; | ||
3636 | } | ||
3637 | |||
3638 | if (!down) | ||
3639 | in_page = PAGE_SIZE - offset_in_page(address); | ||
3640 | else | ||
3641 | in_page = offset_in_page(address) + size; | ||
3642 | now = min(count, (unsigned long)in_page / size); | ||
3643 | if (!now) | ||
3644 | now = 1; | ||
3645 | if (down) { | ||
3646 | /* | ||
3647 | * String I/O in reverse. Yuck. Kill the guest, fix later. | ||
3648 | */ | ||
3649 | pr_unimpl(vcpu, "guest string pio down\n"); | ||
3650 | kvm_inject_gp(vcpu, 0); | ||
3651 | return 1; | ||
3652 | } | ||
3653 | vcpu->run->io.count = now; | ||
3654 | vcpu->arch.pio.cur_count = now; | ||
3655 | |||
3656 | if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count) | ||
3657 | kvm_x86_ops->skip_emulated_instruction(vcpu); | ||
3658 | |||
3659 | vcpu->arch.pio.guest_gva = address; | ||
3660 | |||
3661 | if (!vcpu->arch.pio.in) { | ||
3662 | /* string PIO write */ | ||
3663 | ret = pio_copy_data(vcpu); | ||
3664 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
3665 | return 1; | ||
3666 | if (ret == 0 && !pio_string_write(vcpu)) { | ||
3667 | complete_pio(vcpu); | ||
3668 | if (vcpu->arch.pio.count == 0) | ||
3669 | ret = 1; | ||
3670 | } | ||
3671 | } | ||
3672 | /* no string PIO read support yet */ | ||
3673 | |||
3674 | return ret; | 3951 | return ret; |
3675 | } | 3952 | } |
3676 | EXPORT_SYMBOL_GPL(kvm_emulate_pio_string); | 3953 | EXPORT_SYMBOL_GPL(kvm_fast_pio_out); |
3677 | 3954 | ||
3678 | static void bounce_off(void *info) | 3955 | static void bounce_off(void *info) |
3679 | { | 3956 | { |
@@ -3996,85 +4273,20 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) | |||
3996 | return emulator_write_emulated(rip, instruction, 3, vcpu); | 4273 | return emulator_write_emulated(rip, instruction, 3, vcpu); |
3997 | } | 4274 | } |
3998 | 4275 | ||
3999 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) | ||
4000 | { | ||
4001 | return (curr_cr & ~((1ULL << 32) - 1)) | new_val; | ||
4002 | } | ||
4003 | |||
4004 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) | 4276 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) |
4005 | { | 4277 | { |
4006 | struct descriptor_table dt = { limit, base }; | 4278 | struct desc_ptr dt = { limit, base }; |
4007 | 4279 | ||
4008 | kvm_x86_ops->set_gdt(vcpu, &dt); | 4280 | kvm_x86_ops->set_gdt(vcpu, &dt); |
4009 | } | 4281 | } |
4010 | 4282 | ||
4011 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) | 4283 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) |
4012 | { | 4284 | { |
4013 | struct descriptor_table dt = { limit, base }; | 4285 | struct desc_ptr dt = { limit, base }; |
4014 | 4286 | ||
4015 | kvm_x86_ops->set_idt(vcpu, &dt); | 4287 | kvm_x86_ops->set_idt(vcpu, &dt); |
4016 | } | 4288 | } |
4017 | 4289 | ||
4018 | void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, | ||
4019 | unsigned long *rflags) | ||
4020 | { | ||
4021 | kvm_lmsw(vcpu, msw); | ||
4022 | *rflags = kvm_get_rflags(vcpu); | ||
4023 | } | ||
4024 | |||
4025 | unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) | ||
4026 | { | ||
4027 | unsigned long value; | ||
4028 | |||
4029 | switch (cr) { | ||
4030 | case 0: | ||
4031 | value = kvm_read_cr0(vcpu); | ||
4032 | break; | ||
4033 | case 2: | ||
4034 | value = vcpu->arch.cr2; | ||
4035 | break; | ||
4036 | case 3: | ||
4037 | value = vcpu->arch.cr3; | ||
4038 | break; | ||
4039 | case 4: | ||
4040 | value = kvm_read_cr4(vcpu); | ||
4041 | break; | ||
4042 | case 8: | ||
4043 | value = kvm_get_cr8(vcpu); | ||
4044 | break; | ||
4045 | default: | ||
4046 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | ||
4047 | return 0; | ||
4048 | } | ||
4049 | |||
4050 | return value; | ||
4051 | } | ||
4052 | |||
4053 | void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, | ||
4054 | unsigned long *rflags) | ||
4055 | { | ||
4056 | switch (cr) { | ||
4057 | case 0: | ||
4058 | kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); | ||
4059 | *rflags = kvm_get_rflags(vcpu); | ||
4060 | break; | ||
4061 | case 2: | ||
4062 | vcpu->arch.cr2 = val; | ||
4063 | break; | ||
4064 | case 3: | ||
4065 | kvm_set_cr3(vcpu, val); | ||
4066 | break; | ||
4067 | case 4: | ||
4068 | kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); | ||
4069 | break; | ||
4070 | case 8: | ||
4071 | kvm_set_cr8(vcpu, val & 0xfUL); | ||
4072 | break; | ||
4073 | default: | ||
4074 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | ||
4075 | } | ||
4076 | } | ||
4077 | |||
4078 | static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) | 4290 | static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) |
4079 | { | 4291 | { |
4080 | struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i]; | 4292 | struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i]; |
@@ -4138,9 +4350,13 @@ int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) | |||
4138 | { | 4350 | { |
4139 | struct kvm_cpuid_entry2 *best; | 4351 | struct kvm_cpuid_entry2 *best; |
4140 | 4352 | ||
4353 | best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0); | ||
4354 | if (!best || best->eax < 0x80000008) | ||
4355 | goto not_found; | ||
4141 | best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); | 4356 | best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); |
4142 | if (best) | 4357 | if (best) |
4143 | return best->eax & 0xff; | 4358 | return best->eax & 0xff; |
4359 | not_found: | ||
4144 | return 36; | 4360 | return 36; |
4145 | } | 4361 | } |
4146 | 4362 | ||
@@ -4254,9 +4470,13 @@ static void inject_pending_event(struct kvm_vcpu *vcpu) | |||
4254 | { | 4470 | { |
4255 | /* try to reinject previous events if any */ | 4471 | /* try to reinject previous events if any */ |
4256 | if (vcpu->arch.exception.pending) { | 4472 | if (vcpu->arch.exception.pending) { |
4473 | trace_kvm_inj_exception(vcpu->arch.exception.nr, | ||
4474 | vcpu->arch.exception.has_error_code, | ||
4475 | vcpu->arch.exception.error_code); | ||
4257 | kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, | 4476 | kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, |
4258 | vcpu->arch.exception.has_error_code, | 4477 | vcpu->arch.exception.has_error_code, |
4259 | vcpu->arch.exception.error_code); | 4478 | vcpu->arch.exception.error_code, |
4479 | vcpu->arch.exception.reinject); | ||
4260 | return; | 4480 | return; |
4261 | } | 4481 | } |
4262 | 4482 | ||
@@ -4486,7 +4706,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
4486 | } | 4706 | } |
4487 | 4707 | ||
4488 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); | 4708 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
4489 | post_kvm_run_save(vcpu); | ||
4490 | 4709 | ||
4491 | vapic_exit(vcpu); | 4710 | vapic_exit(vcpu); |
4492 | 4711 | ||
@@ -4514,26 +4733,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
4514 | if (!irqchip_in_kernel(vcpu->kvm)) | 4733 | if (!irqchip_in_kernel(vcpu->kvm)) |
4515 | kvm_set_cr8(vcpu, kvm_run->cr8); | 4734 | kvm_set_cr8(vcpu, kvm_run->cr8); |
4516 | 4735 | ||
4517 | if (vcpu->arch.pio.cur_count) { | 4736 | if (vcpu->arch.pio.count || vcpu->mmio_needed || |
4518 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | 4737 | vcpu->arch.emulate_ctxt.restart) { |
4519 | r = complete_pio(vcpu); | 4738 | if (vcpu->mmio_needed) { |
4520 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | 4739 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); |
4521 | if (r) | 4740 | vcpu->mmio_read_completed = 1; |
4522 | goto out; | 4741 | vcpu->mmio_needed = 0; |
4523 | } | 4742 | } |
4524 | if (vcpu->mmio_needed) { | ||
4525 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); | ||
4526 | vcpu->mmio_read_completed = 1; | ||
4527 | vcpu->mmio_needed = 0; | ||
4528 | |||
4529 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | 4743 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); |
4530 | r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0, | 4744 | r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE); |
4531 | EMULTYPE_NO_DECODE); | ||
4532 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | 4745 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
4533 | if (r == EMULATE_DO_MMIO) { | 4746 | if (r == EMULATE_DO_MMIO) { |
4534 | /* | ||
4535 | * Read-modify-write. Back to userspace. | ||
4536 | */ | ||
4537 | r = 0; | 4747 | r = 0; |
4538 | goto out; | 4748 | goto out; |
4539 | } | 4749 | } |
@@ -4545,6 +4755,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
4545 | r = __vcpu_run(vcpu); | 4755 | r = __vcpu_run(vcpu); |
4546 | 4756 | ||
4547 | out: | 4757 | out: |
4758 | post_kvm_run_save(vcpu); | ||
4548 | if (vcpu->sigset_active) | 4759 | if (vcpu->sigset_active) |
4549 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 4760 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
4550 | 4761 | ||
@@ -4616,12 +4827,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
4616 | return 0; | 4827 | return 0; |
4617 | } | 4828 | } |
4618 | 4829 | ||
4619 | void kvm_get_segment(struct kvm_vcpu *vcpu, | ||
4620 | struct kvm_segment *var, int seg) | ||
4621 | { | ||
4622 | kvm_x86_ops->get_segment(vcpu, var, seg); | ||
4623 | } | ||
4624 | |||
4625 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) | 4830 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) |
4626 | { | 4831 | { |
4627 | struct kvm_segment cs; | 4832 | struct kvm_segment cs; |
@@ -4635,7 +4840,7 @@ EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits); | |||
4635 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | 4840 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, |
4636 | struct kvm_sregs *sregs) | 4841 | struct kvm_sregs *sregs) |
4637 | { | 4842 | { |
4638 | struct descriptor_table dt; | 4843 | struct desc_ptr dt; |
4639 | 4844 | ||
4640 | vcpu_load(vcpu); | 4845 | vcpu_load(vcpu); |
4641 | 4846 | ||
@@ -4650,11 +4855,11 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
4650 | kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); | 4855 | kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); |
4651 | 4856 | ||
4652 | kvm_x86_ops->get_idt(vcpu, &dt); | 4857 | kvm_x86_ops->get_idt(vcpu, &dt); |
4653 | sregs->idt.limit = dt.limit; | 4858 | sregs->idt.limit = dt.size; |
4654 | sregs->idt.base = dt.base; | 4859 | sregs->idt.base = dt.address; |
4655 | kvm_x86_ops->get_gdt(vcpu, &dt); | 4860 | kvm_x86_ops->get_gdt(vcpu, &dt); |
4656 | sregs->gdt.limit = dt.limit; | 4861 | sregs->gdt.limit = dt.size; |
4657 | sregs->gdt.base = dt.base; | 4862 | sregs->gdt.base = dt.address; |
4658 | 4863 | ||
4659 | sregs->cr0 = kvm_read_cr0(vcpu); | 4864 | sregs->cr0 = kvm_read_cr0(vcpu); |
4660 | sregs->cr2 = vcpu->arch.cr2; | 4865 | sregs->cr2 = vcpu->arch.cr2; |
@@ -4693,563 +4898,33 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | |||
4693 | return 0; | 4898 | return 0; |
4694 | } | 4899 | } |
4695 | 4900 | ||
4696 | static void kvm_set_segment(struct kvm_vcpu *vcpu, | 4901 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, |
4697 | struct kvm_segment *var, int seg) | 4902 | bool has_error_code, u32 error_code) |
4698 | { | ||
4699 | kvm_x86_ops->set_segment(vcpu, var, seg); | ||
4700 | } | ||
4701 | |||
4702 | static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector, | ||
4703 | struct kvm_segment *kvm_desct) | ||
4704 | { | ||
4705 | kvm_desct->base = get_desc_base(seg_desc); | ||
4706 | kvm_desct->limit = get_desc_limit(seg_desc); | ||
4707 | if (seg_desc->g) { | ||
4708 | kvm_desct->limit <<= 12; | ||
4709 | kvm_desct->limit |= 0xfff; | ||
4710 | } | ||
4711 | kvm_desct->selector = selector; | ||
4712 | kvm_desct->type = seg_desc->type; | ||
4713 | kvm_desct->present = seg_desc->p; | ||
4714 | kvm_desct->dpl = seg_desc->dpl; | ||
4715 | kvm_desct->db = seg_desc->d; | ||
4716 | kvm_desct->s = seg_desc->s; | ||
4717 | kvm_desct->l = seg_desc->l; | ||
4718 | kvm_desct->g = seg_desc->g; | ||
4719 | kvm_desct->avl = seg_desc->avl; | ||
4720 | if (!selector) | ||
4721 | kvm_desct->unusable = 1; | ||
4722 | else | ||
4723 | kvm_desct->unusable = 0; | ||
4724 | kvm_desct->padding = 0; | ||
4725 | } | ||
4726 | |||
4727 | static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu, | ||
4728 | u16 selector, | ||
4729 | struct descriptor_table *dtable) | ||
4730 | { | ||
4731 | if (selector & 1 << 2) { | ||
4732 | struct kvm_segment kvm_seg; | ||
4733 | |||
4734 | kvm_get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR); | ||
4735 | |||
4736 | if (kvm_seg.unusable) | ||
4737 | dtable->limit = 0; | ||
4738 | else | ||
4739 | dtable->limit = kvm_seg.limit; | ||
4740 | dtable->base = kvm_seg.base; | ||
4741 | } | ||
4742 | else | ||
4743 | kvm_x86_ops->get_gdt(vcpu, dtable); | ||
4744 | } | ||
4745 | |||
4746 | /* allowed just for 8 bytes segments */ | ||
4747 | static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | ||
4748 | struct desc_struct *seg_desc) | ||
4749 | { | ||
4750 | struct descriptor_table dtable; | ||
4751 | u16 index = selector >> 3; | ||
4752 | int ret; | ||
4753 | u32 err; | ||
4754 | gva_t addr; | ||
4755 | |||
4756 | get_segment_descriptor_dtable(vcpu, selector, &dtable); | ||
4757 | |||
4758 | if (dtable.limit < index * 8 + 7) { | ||
4759 | kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); | ||
4760 | return X86EMUL_PROPAGATE_FAULT; | ||
4761 | } | ||
4762 | addr = dtable.base + index * 8; | ||
4763 | ret = kvm_read_guest_virt_system(addr, seg_desc, sizeof(*seg_desc), | ||
4764 | vcpu, &err); | ||
4765 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
4766 | kvm_inject_page_fault(vcpu, addr, err); | ||
4767 | |||
4768 | return ret; | ||
4769 | } | ||
4770 | |||
4771 | /* allowed just for 8 bytes segments */ | ||
4772 | static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | ||
4773 | struct desc_struct *seg_desc) | ||
4774 | { | ||
4775 | struct descriptor_table dtable; | ||
4776 | u16 index = selector >> 3; | ||
4777 | |||
4778 | get_segment_descriptor_dtable(vcpu, selector, &dtable); | ||
4779 | |||
4780 | if (dtable.limit < index * 8 + 7) | ||
4781 | return 1; | ||
4782 | return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu, NULL); | ||
4783 | } | ||
4784 | |||
4785 | static gpa_t get_tss_base_addr_write(struct kvm_vcpu *vcpu, | ||
4786 | struct desc_struct *seg_desc) | ||
4787 | { | ||
4788 | u32 base_addr = get_desc_base(seg_desc); | ||
4789 | |||
4790 | return kvm_mmu_gva_to_gpa_write(vcpu, base_addr, NULL); | ||
4791 | } | ||
4792 | |||
4793 | static gpa_t get_tss_base_addr_read(struct kvm_vcpu *vcpu, | ||
4794 | struct desc_struct *seg_desc) | ||
4795 | { | ||
4796 | u32 base_addr = get_desc_base(seg_desc); | ||
4797 | |||
4798 | return kvm_mmu_gva_to_gpa_read(vcpu, base_addr, NULL); | ||
4799 | } | ||
4800 | |||
4801 | static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) | ||
4802 | { | ||
4803 | struct kvm_segment kvm_seg; | ||
4804 | |||
4805 | kvm_get_segment(vcpu, &kvm_seg, seg); | ||
4806 | return kvm_seg.selector; | ||
4807 | } | ||
4808 | |||
4809 | static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg) | ||
4810 | { | ||
4811 | struct kvm_segment segvar = { | ||
4812 | .base = selector << 4, | ||
4813 | .limit = 0xffff, | ||
4814 | .selector = selector, | ||
4815 | .type = 3, | ||
4816 | .present = 1, | ||
4817 | .dpl = 3, | ||
4818 | .db = 0, | ||
4819 | .s = 1, | ||
4820 | .l = 0, | ||
4821 | .g = 0, | ||
4822 | .avl = 0, | ||
4823 | .unusable = 0, | ||
4824 | }; | ||
4825 | kvm_x86_ops->set_segment(vcpu, &segvar, seg); | ||
4826 | return X86EMUL_CONTINUE; | ||
4827 | } | ||
4828 | |||
4829 | static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg) | ||
4830 | { | 4903 | { |
4831 | return (seg != VCPU_SREG_LDTR) && | 4904 | int cs_db, cs_l, ret; |
4832 | (seg != VCPU_SREG_TR) && | 4905 | cache_all_regs(vcpu); |
4833 | (kvm_get_rflags(vcpu) & X86_EFLAGS_VM); | ||
4834 | } | ||
4835 | |||
4836 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg) | ||
4837 | { | ||
4838 | struct kvm_segment kvm_seg; | ||
4839 | struct desc_struct seg_desc; | ||
4840 | u8 dpl, rpl, cpl; | ||
4841 | unsigned err_vec = GP_VECTOR; | ||
4842 | u32 err_code = 0; | ||
4843 | bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ | ||
4844 | int ret; | ||
4845 | 4906 | ||
4846 | if (is_vm86_segment(vcpu, seg) || !is_protmode(vcpu)) | 4907 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
4847 | return kvm_load_realmode_segment(vcpu, selector, seg); | ||
4848 | 4908 | ||
4849 | /* NULL selector is not valid for TR, CS and SS */ | 4909 | vcpu->arch.emulate_ctxt.vcpu = vcpu; |
4850 | if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR) | 4910 | vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); |
4851 | && null_selector) | 4911 | vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); |
4852 | goto exception; | 4912 | vcpu->arch.emulate_ctxt.mode = |
4913 | (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : | ||
4914 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) | ||
4915 | ? X86EMUL_MODE_VM86 : cs_l | ||
4916 | ? X86EMUL_MODE_PROT64 : cs_db | ||
4917 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; | ||
4853 | 4918 | ||
4854 | /* TR should be in GDT only */ | 4919 | ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, &emulate_ops, |
4855 | if (seg == VCPU_SREG_TR && (selector & (1 << 2))) | 4920 | tss_selector, reason, has_error_code, |
4856 | goto exception; | 4921 | error_code); |
4857 | 4922 | ||
4858 | ret = load_guest_segment_descriptor(vcpu, selector, &seg_desc); | ||
4859 | if (ret) | 4923 | if (ret) |
4860 | return ret; | 4924 | return EMULATE_FAIL; |
4861 | |||
4862 | seg_desct_to_kvm_desct(&seg_desc, selector, &kvm_seg); | ||
4863 | |||
4864 | if (null_selector) { /* for NULL selector skip all following checks */ | ||
4865 | kvm_seg.unusable = 1; | ||
4866 | goto load; | ||
4867 | } | ||
4868 | |||
4869 | err_code = selector & 0xfffc; | ||
4870 | err_vec = GP_VECTOR; | ||
4871 | |||
4872 | /* can't load system descriptor into segment selecor */ | ||
4873 | if (seg <= VCPU_SREG_GS && !kvm_seg.s) | ||
4874 | goto exception; | ||
4875 | |||
4876 | if (!kvm_seg.present) { | ||
4877 | err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; | ||
4878 | goto exception; | ||
4879 | } | ||
4880 | |||
4881 | rpl = selector & 3; | ||
4882 | dpl = kvm_seg.dpl; | ||
4883 | cpl = kvm_x86_ops->get_cpl(vcpu); | ||
4884 | |||
4885 | switch (seg) { | ||
4886 | case VCPU_SREG_SS: | ||
4887 | /* | ||
4888 | * segment is not a writable data segment or segment | ||
4889 | * selector's RPL != CPL or segment selector's RPL != CPL | ||
4890 | */ | ||
4891 | if (rpl != cpl || (kvm_seg.type & 0xa) != 0x2 || dpl != cpl) | ||
4892 | goto exception; | ||
4893 | break; | ||
4894 | case VCPU_SREG_CS: | ||
4895 | if (!(kvm_seg.type & 8)) | ||
4896 | goto exception; | ||
4897 | |||
4898 | if (kvm_seg.type & 4) { | ||
4899 | /* conforming */ | ||
4900 | if (dpl > cpl) | ||
4901 | goto exception; | ||
4902 | } else { | ||
4903 | /* nonconforming */ | ||
4904 | if (rpl > cpl || dpl != cpl) | ||
4905 | goto exception; | ||
4906 | } | ||
4907 | /* CS(RPL) <- CPL */ | ||
4908 | selector = (selector & 0xfffc) | cpl; | ||
4909 | break; | ||
4910 | case VCPU_SREG_TR: | ||
4911 | if (kvm_seg.s || (kvm_seg.type != 1 && kvm_seg.type != 9)) | ||
4912 | goto exception; | ||
4913 | break; | ||
4914 | case VCPU_SREG_LDTR: | ||
4915 | if (kvm_seg.s || kvm_seg.type != 2) | ||
4916 | goto exception; | ||
4917 | break; | ||
4918 | default: /* DS, ES, FS, or GS */ | ||
4919 | /* | ||
4920 | * segment is not a data or readable code segment or | ||
4921 | * ((segment is a data or nonconforming code segment) | ||
4922 | * and (both RPL and CPL > DPL)) | ||
4923 | */ | ||
4924 | if ((kvm_seg.type & 0xa) == 0x8 || | ||
4925 | (((kvm_seg.type & 0xc) != 0xc) && (rpl > dpl && cpl > dpl))) | ||
4926 | goto exception; | ||
4927 | break; | ||
4928 | } | ||
4929 | |||
4930 | if (!kvm_seg.unusable && kvm_seg.s) { | ||
4931 | /* mark segment as accessed */ | ||
4932 | kvm_seg.type |= 1; | ||
4933 | seg_desc.type |= 1; | ||
4934 | save_guest_segment_descriptor(vcpu, selector, &seg_desc); | ||
4935 | } | ||
4936 | load: | ||
4937 | kvm_set_segment(vcpu, &kvm_seg, seg); | ||
4938 | return X86EMUL_CONTINUE; | ||
4939 | exception: | ||
4940 | kvm_queue_exception_e(vcpu, err_vec, err_code); | ||
4941 | return X86EMUL_PROPAGATE_FAULT; | ||
4942 | } | ||
4943 | |||
4944 | static void save_state_to_tss32(struct kvm_vcpu *vcpu, | ||
4945 | struct tss_segment_32 *tss) | ||
4946 | { | ||
4947 | tss->cr3 = vcpu->arch.cr3; | ||
4948 | tss->eip = kvm_rip_read(vcpu); | ||
4949 | tss->eflags = kvm_get_rflags(vcpu); | ||
4950 | tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
4951 | tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
4952 | tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX); | ||
4953 | tss->ebx = kvm_register_read(vcpu, VCPU_REGS_RBX); | ||
4954 | tss->esp = kvm_register_read(vcpu, VCPU_REGS_RSP); | ||
4955 | tss->ebp = kvm_register_read(vcpu, VCPU_REGS_RBP); | ||
4956 | tss->esi = kvm_register_read(vcpu, VCPU_REGS_RSI); | ||
4957 | tss->edi = kvm_register_read(vcpu, VCPU_REGS_RDI); | ||
4958 | tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); | ||
4959 | tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); | ||
4960 | tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); | ||
4961 | tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); | ||
4962 | tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS); | ||
4963 | tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS); | ||
4964 | tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); | ||
4965 | } | ||
4966 | |||
4967 | static void kvm_load_segment_selector(struct kvm_vcpu *vcpu, u16 sel, int seg) | ||
4968 | { | ||
4969 | struct kvm_segment kvm_seg; | ||
4970 | kvm_get_segment(vcpu, &kvm_seg, seg); | ||
4971 | kvm_seg.selector = sel; | ||
4972 | kvm_set_segment(vcpu, &kvm_seg, seg); | ||
4973 | } | ||
4974 | |||
4975 | static int load_state_from_tss32(struct kvm_vcpu *vcpu, | ||
4976 | struct tss_segment_32 *tss) | ||
4977 | { | ||
4978 | kvm_set_cr3(vcpu, tss->cr3); | ||
4979 | |||
4980 | kvm_rip_write(vcpu, tss->eip); | ||
4981 | kvm_set_rflags(vcpu, tss->eflags | 2); | ||
4982 | |||
4983 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax); | ||
4984 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx); | ||
4985 | kvm_register_write(vcpu, VCPU_REGS_RDX, tss->edx); | ||
4986 | kvm_register_write(vcpu, VCPU_REGS_RBX, tss->ebx); | ||
4987 | kvm_register_write(vcpu, VCPU_REGS_RSP, tss->esp); | ||
4988 | kvm_register_write(vcpu, VCPU_REGS_RBP, tss->ebp); | ||
4989 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi); | ||
4990 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi); | ||
4991 | |||
4992 | /* | ||
4993 | * SDM says that segment selectors are loaded before segment | ||
4994 | * descriptors | ||
4995 | */ | ||
4996 | kvm_load_segment_selector(vcpu, tss->ldt_selector, VCPU_SREG_LDTR); | ||
4997 | kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES); | ||
4998 | kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS); | ||
4999 | kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS); | ||
5000 | kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS); | ||
5001 | kvm_load_segment_selector(vcpu, tss->fs, VCPU_SREG_FS); | ||
5002 | kvm_load_segment_selector(vcpu, tss->gs, VCPU_SREG_GS); | ||
5003 | |||
5004 | /* | ||
5005 | * Now load segment descriptors. If fault happenes at this stage | ||
5006 | * it is handled in a context of new task | ||
5007 | */ | ||
5008 | if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, VCPU_SREG_LDTR)) | ||
5009 | return 1; | ||
5010 | |||
5011 | if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES)) | ||
5012 | return 1; | ||
5013 | 4925 | ||
5014 | if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) | 4926 | kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); |
5015 | return 1; | 4927 | return EMULATE_DONE; |
5016 | |||
5017 | if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS)) | ||
5018 | return 1; | ||
5019 | |||
5020 | if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS)) | ||
5021 | return 1; | ||
5022 | |||
5023 | if (kvm_load_segment_descriptor(vcpu, tss->fs, VCPU_SREG_FS)) | ||
5024 | return 1; | ||
5025 | |||
5026 | if (kvm_load_segment_descriptor(vcpu, tss->gs, VCPU_SREG_GS)) | ||
5027 | return 1; | ||
5028 | return 0; | ||
5029 | } | ||
5030 | |||
5031 | static void save_state_to_tss16(struct kvm_vcpu *vcpu, | ||
5032 | struct tss_segment_16 *tss) | ||
5033 | { | ||
5034 | tss->ip = kvm_rip_read(vcpu); | ||
5035 | tss->flag = kvm_get_rflags(vcpu); | ||
5036 | tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
5037 | tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
5038 | tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX); | ||
5039 | tss->bx = kvm_register_read(vcpu, VCPU_REGS_RBX); | ||
5040 | tss->sp = kvm_register_read(vcpu, VCPU_REGS_RSP); | ||
5041 | tss->bp = kvm_register_read(vcpu, VCPU_REGS_RBP); | ||
5042 | tss->si = kvm_register_read(vcpu, VCPU_REGS_RSI); | ||
5043 | tss->di = kvm_register_read(vcpu, VCPU_REGS_RDI); | ||
5044 | |||
5045 | tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); | ||
5046 | tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); | ||
5047 | tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); | ||
5048 | tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); | ||
5049 | tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR); | ||
5050 | } | ||
5051 | |||
5052 | static int load_state_from_tss16(struct kvm_vcpu *vcpu, | ||
5053 | struct tss_segment_16 *tss) | ||
5054 | { | ||
5055 | kvm_rip_write(vcpu, tss->ip); | ||
5056 | kvm_set_rflags(vcpu, tss->flag | 2); | ||
5057 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax); | ||
5058 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx); | ||
5059 | kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx); | ||
5060 | kvm_register_write(vcpu, VCPU_REGS_RBX, tss->bx); | ||
5061 | kvm_register_write(vcpu, VCPU_REGS_RSP, tss->sp); | ||
5062 | kvm_register_write(vcpu, VCPU_REGS_RBP, tss->bp); | ||
5063 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si); | ||
5064 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di); | ||
5065 | |||
5066 | /* | ||
5067 | * SDM says that segment selectors are loaded before segment | ||
5068 | * descriptors | ||
5069 | */ | ||
5070 | kvm_load_segment_selector(vcpu, tss->ldt, VCPU_SREG_LDTR); | ||
5071 | kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES); | ||
5072 | kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS); | ||
5073 | kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS); | ||
5074 | kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS); | ||
5075 | |||
5076 | /* | ||
5077 | * Now load segment descriptors. If fault happenes at this stage | ||
5078 | * it is handled in a context of new task | ||
5079 | */ | ||
5080 | if (kvm_load_segment_descriptor(vcpu, tss->ldt, VCPU_SREG_LDTR)) | ||
5081 | return 1; | ||
5082 | |||
5083 | if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES)) | ||
5084 | return 1; | ||
5085 | |||
5086 | if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) | ||
5087 | return 1; | ||
5088 | |||
5089 | if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS)) | ||
5090 | return 1; | ||
5091 | |||
5092 | if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS)) | ||
5093 | return 1; | ||
5094 | return 0; | ||
5095 | } | ||
5096 | |||
5097 | static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, | ||
5098 | u16 old_tss_sel, u32 old_tss_base, | ||
5099 | struct desc_struct *nseg_desc) | ||
5100 | { | ||
5101 | struct tss_segment_16 tss_segment_16; | ||
5102 | int ret = 0; | ||
5103 | |||
5104 | if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_16, | ||
5105 | sizeof tss_segment_16)) | ||
5106 | goto out; | ||
5107 | |||
5108 | save_state_to_tss16(vcpu, &tss_segment_16); | ||
5109 | |||
5110 | if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_16, | ||
5111 | sizeof tss_segment_16)) | ||
5112 | goto out; | ||
5113 | |||
5114 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc), | ||
5115 | &tss_segment_16, sizeof tss_segment_16)) | ||
5116 | goto out; | ||
5117 | |||
5118 | if (old_tss_sel != 0xffff) { | ||
5119 | tss_segment_16.prev_task_link = old_tss_sel; | ||
5120 | |||
5121 | if (kvm_write_guest(vcpu->kvm, | ||
5122 | get_tss_base_addr_write(vcpu, nseg_desc), | ||
5123 | &tss_segment_16.prev_task_link, | ||
5124 | sizeof tss_segment_16.prev_task_link)) | ||
5125 | goto out; | ||
5126 | } | ||
5127 | |||
5128 | if (load_state_from_tss16(vcpu, &tss_segment_16)) | ||
5129 | goto out; | ||
5130 | |||
5131 | ret = 1; | ||
5132 | out: | ||
5133 | return ret; | ||
5134 | } | ||
5135 | |||
5136 | static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, | ||
5137 | u16 old_tss_sel, u32 old_tss_base, | ||
5138 | struct desc_struct *nseg_desc) | ||
5139 | { | ||
5140 | struct tss_segment_32 tss_segment_32; | ||
5141 | int ret = 0; | ||
5142 | |||
5143 | if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_32, | ||
5144 | sizeof tss_segment_32)) | ||
5145 | goto out; | ||
5146 | |||
5147 | save_state_to_tss32(vcpu, &tss_segment_32); | ||
5148 | |||
5149 | if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_32, | ||
5150 | sizeof tss_segment_32)) | ||
5151 | goto out; | ||
5152 | |||
5153 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc), | ||
5154 | &tss_segment_32, sizeof tss_segment_32)) | ||
5155 | goto out; | ||
5156 | |||
5157 | if (old_tss_sel != 0xffff) { | ||
5158 | tss_segment_32.prev_task_link = old_tss_sel; | ||
5159 | |||
5160 | if (kvm_write_guest(vcpu->kvm, | ||
5161 | get_tss_base_addr_write(vcpu, nseg_desc), | ||
5162 | &tss_segment_32.prev_task_link, | ||
5163 | sizeof tss_segment_32.prev_task_link)) | ||
5164 | goto out; | ||
5165 | } | ||
5166 | |||
5167 | if (load_state_from_tss32(vcpu, &tss_segment_32)) | ||
5168 | goto out; | ||
5169 | |||
5170 | ret = 1; | ||
5171 | out: | ||
5172 | return ret; | ||
5173 | } | ||
5174 | |||
5175 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | ||
5176 | { | ||
5177 | struct kvm_segment tr_seg; | ||
5178 | struct desc_struct cseg_desc; | ||
5179 | struct desc_struct nseg_desc; | ||
5180 | int ret = 0; | ||
5181 | u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); | ||
5182 | u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); | ||
5183 | u32 desc_limit; | ||
5184 | |||
5185 | old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL); | ||
5186 | |||
5187 | /* FIXME: Handle errors. Failure to read either TSS or their | ||
5188 | * descriptors should generate a pagefault. | ||
5189 | */ | ||
5190 | if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc)) | ||
5191 | goto out; | ||
5192 | |||
5193 | if (load_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc)) | ||
5194 | goto out; | ||
5195 | |||
5196 | if (reason != TASK_SWITCH_IRET) { | ||
5197 | int cpl; | ||
5198 | |||
5199 | cpl = kvm_x86_ops->get_cpl(vcpu); | ||
5200 | if ((tss_selector & 3) > nseg_desc.dpl || cpl > nseg_desc.dpl) { | ||
5201 | kvm_queue_exception_e(vcpu, GP_VECTOR, 0); | ||
5202 | return 1; | ||
5203 | } | ||
5204 | } | ||
5205 | |||
5206 | desc_limit = get_desc_limit(&nseg_desc); | ||
5207 | if (!nseg_desc.p || | ||
5208 | ((desc_limit < 0x67 && (nseg_desc.type & 8)) || | ||
5209 | desc_limit < 0x2b)) { | ||
5210 | kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc); | ||
5211 | return 1; | ||
5212 | } | ||
5213 | |||
5214 | if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { | ||
5215 | cseg_desc.type &= ~(1 << 1); //clear the B flag | ||
5216 | save_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc); | ||
5217 | } | ||
5218 | |||
5219 | if (reason == TASK_SWITCH_IRET) { | ||
5220 | u32 eflags = kvm_get_rflags(vcpu); | ||
5221 | kvm_set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); | ||
5222 | } | ||
5223 | |||
5224 | /* set back link to prev task only if NT bit is set in eflags | ||
5225 | note that old_tss_sel is not used afetr this point */ | ||
5226 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) | ||
5227 | old_tss_sel = 0xffff; | ||
5228 | |||
5229 | if (nseg_desc.type & 8) | ||
5230 | ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel, | ||
5231 | old_tss_base, &nseg_desc); | ||
5232 | else | ||
5233 | ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_sel, | ||
5234 | old_tss_base, &nseg_desc); | ||
5235 | |||
5236 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { | ||
5237 | u32 eflags = kvm_get_rflags(vcpu); | ||
5238 | kvm_set_rflags(vcpu, eflags | X86_EFLAGS_NT); | ||
5239 | } | ||
5240 | |||
5241 | if (reason != TASK_SWITCH_IRET) { | ||
5242 | nseg_desc.type |= (1 << 1); | ||
5243 | save_guest_segment_descriptor(vcpu, tss_selector, | ||
5244 | &nseg_desc); | ||
5245 | } | ||
5246 | |||
5247 | kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0(vcpu) | X86_CR0_TS); | ||
5248 | seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); | ||
5249 | tr_seg.type = 11; | ||
5250 | kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); | ||
5251 | out: | ||
5252 | return ret; | ||
5253 | } | 4928 | } |
5254 | EXPORT_SYMBOL_GPL(kvm_task_switch); | 4929 | EXPORT_SYMBOL_GPL(kvm_task_switch); |
5255 | 4930 | ||
@@ -5258,15 +4933,15 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
5258 | { | 4933 | { |
5259 | int mmu_reset_needed = 0; | 4934 | int mmu_reset_needed = 0; |
5260 | int pending_vec, max_bits; | 4935 | int pending_vec, max_bits; |
5261 | struct descriptor_table dt; | 4936 | struct desc_ptr dt; |
5262 | 4937 | ||
5263 | vcpu_load(vcpu); | 4938 | vcpu_load(vcpu); |
5264 | 4939 | ||
5265 | dt.limit = sregs->idt.limit; | 4940 | dt.size = sregs->idt.limit; |
5266 | dt.base = sregs->idt.base; | 4941 | dt.address = sregs->idt.base; |
5267 | kvm_x86_ops->set_idt(vcpu, &dt); | 4942 | kvm_x86_ops->set_idt(vcpu, &dt); |
5268 | dt.limit = sregs->gdt.limit; | 4943 | dt.size = sregs->gdt.limit; |
5269 | dt.base = sregs->gdt.base; | 4944 | dt.address = sregs->gdt.base; |
5270 | kvm_x86_ops->set_gdt(vcpu, &dt); | 4945 | kvm_x86_ops->set_gdt(vcpu, &dt); |
5271 | 4946 | ||
5272 | vcpu->arch.cr2 = sregs->cr2; | 4947 | vcpu->arch.cr2 = sregs->cr2; |
@@ -5365,11 +5040,9 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | |||
5365 | vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); | 5040 | vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); |
5366 | } | 5041 | } |
5367 | 5042 | ||
5368 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { | 5043 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) |
5369 | vcpu->arch.singlestep_cs = | 5044 | vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) + |
5370 | get_segment_selector(vcpu, VCPU_SREG_CS); | 5045 | get_segment_base(vcpu, VCPU_SREG_CS); |
5371 | vcpu->arch.singlestep_rip = kvm_rip_read(vcpu); | ||
5372 | } | ||
5373 | 5046 | ||
5374 | /* | 5047 | /* |
5375 | * Trigger an rflags update that will inject or remove the trace | 5048 | * Trigger an rflags update that will inject or remove the trace |
@@ -5860,13 +5533,22 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) | |||
5860 | return kvm_x86_ops->interrupt_allowed(vcpu); | 5533 | return kvm_x86_ops->interrupt_allowed(vcpu); |
5861 | } | 5534 | } |
5862 | 5535 | ||
5536 | bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip) | ||
5537 | { | ||
5538 | unsigned long current_rip = kvm_rip_read(vcpu) + | ||
5539 | get_segment_base(vcpu, VCPU_SREG_CS); | ||
5540 | |||
5541 | return current_rip == linear_rip; | ||
5542 | } | ||
5543 | EXPORT_SYMBOL_GPL(kvm_is_linear_rip); | ||
5544 | |||
5863 | unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) | 5545 | unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) |
5864 | { | 5546 | { |
5865 | unsigned long rflags; | 5547 | unsigned long rflags; |
5866 | 5548 | ||
5867 | rflags = kvm_x86_ops->get_rflags(vcpu); | 5549 | rflags = kvm_x86_ops->get_rflags(vcpu); |
5868 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | 5550 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) |
5869 | rflags &= ~(unsigned long)(X86_EFLAGS_TF | X86_EFLAGS_RF); | 5551 | rflags &= ~X86_EFLAGS_TF; |
5870 | return rflags; | 5552 | return rflags; |
5871 | } | 5553 | } |
5872 | EXPORT_SYMBOL_GPL(kvm_get_rflags); | 5554 | EXPORT_SYMBOL_GPL(kvm_get_rflags); |
@@ -5874,10 +5556,8 @@ EXPORT_SYMBOL_GPL(kvm_get_rflags); | |||
5874 | void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | 5556 | void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) |
5875 | { | 5557 | { |
5876 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && | 5558 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && |
5877 | vcpu->arch.singlestep_cs == | 5559 | kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip)) |
5878 | get_segment_selector(vcpu, VCPU_SREG_CS) && | 5560 | rflags |= X86_EFLAGS_TF; |
5879 | vcpu->arch.singlestep_rip == kvm_rip_read(vcpu)) | ||
5880 | rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; | ||
5881 | kvm_x86_ops->set_rflags(vcpu, rflags); | 5561 | kvm_x86_ops->set_rflags(vcpu, rflags); |
5882 | } | 5562 | } |
5883 | EXPORT_SYMBOL_GPL(kvm_set_rflags); | 5563 | EXPORT_SYMBOL_GPL(kvm_set_rflags); |
@@ -5893,3 +5573,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject); | |||
5893 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); | 5573 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); |
5894 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); | 5574 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); |
5895 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); | 5575 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); |
5576 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts); | ||
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index b7a404722d2b..f4b54458285b 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -65,6 +65,13 @@ static inline int is_paging(struct kvm_vcpu *vcpu) | |||
65 | return kvm_read_cr0_bits(vcpu, X86_CR0_PG); | 65 | return kvm_read_cr0_bits(vcpu, X86_CR0_PG); |
66 | } | 66 | } |
67 | 67 | ||
68 | static inline struct kvm_mem_aliases *kvm_aliases(struct kvm *kvm) | ||
69 | { | ||
70 | return rcu_dereference_check(kvm->arch.aliases, | ||
71 | srcu_read_lock_held(&kvm->srcu) | ||
72 | || lockdep_is_held(&kvm->slots_lock)); | ||
73 | } | ||
74 | |||
68 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); | 75 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); |
69 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); | 76 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); |
70 | 77 | ||
diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 60df9c84ecae..23ea02253900 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h | |||
@@ -160,6 +160,7 @@ struct kvm_pit_config { | |||
160 | #define KVM_EXIT_DCR 15 | 160 | #define KVM_EXIT_DCR 15 |
161 | #define KVM_EXIT_NMI 16 | 161 | #define KVM_EXIT_NMI 16 |
162 | #define KVM_EXIT_INTERNAL_ERROR 17 | 162 | #define KVM_EXIT_INTERNAL_ERROR 17 |
163 | #define KVM_EXIT_OSI 18 | ||
163 | 164 | ||
164 | /* For KVM_EXIT_INTERNAL_ERROR */ | 165 | /* For KVM_EXIT_INTERNAL_ERROR */ |
165 | #define KVM_INTERNAL_ERROR_EMULATION 1 | 166 | #define KVM_INTERNAL_ERROR_EMULATION 1 |
@@ -259,6 +260,10 @@ struct kvm_run { | |||
259 | __u32 ndata; | 260 | __u32 ndata; |
260 | __u64 data[16]; | 261 | __u64 data[16]; |
261 | } internal; | 262 | } internal; |
263 | /* KVM_EXIT_OSI */ | ||
264 | struct { | ||
265 | __u64 gprs[32]; | ||
266 | } osi; | ||
262 | /* Fix the size of the union. */ | 267 | /* Fix the size of the union. */ |
263 | char padding[256]; | 268 | char padding[256]; |
264 | }; | 269 | }; |
@@ -400,6 +405,15 @@ struct kvm_ioeventfd { | |||
400 | __u8 pad[36]; | 405 | __u8 pad[36]; |
401 | }; | 406 | }; |
402 | 407 | ||
408 | /* for KVM_ENABLE_CAP */ | ||
409 | struct kvm_enable_cap { | ||
410 | /* in */ | ||
411 | __u32 cap; | ||
412 | __u32 flags; | ||
413 | __u64 args[4]; | ||
414 | __u8 pad[64]; | ||
415 | }; | ||
416 | |||
403 | #define KVMIO 0xAE | 417 | #define KVMIO 0xAE |
404 | 418 | ||
405 | /* | 419 | /* |
@@ -501,7 +515,15 @@ struct kvm_ioeventfd { | |||
501 | #define KVM_CAP_HYPERV_VAPIC 45 | 515 | #define KVM_CAP_HYPERV_VAPIC 45 |
502 | #define KVM_CAP_HYPERV_SPIN 46 | 516 | #define KVM_CAP_HYPERV_SPIN 46 |
503 | #define KVM_CAP_PCI_SEGMENT 47 | 517 | #define KVM_CAP_PCI_SEGMENT 47 |
518 | #define KVM_CAP_PPC_PAIRED_SINGLES 48 | ||
519 | #define KVM_CAP_INTR_SHADOW 49 | ||
520 | #ifdef __KVM_HAVE_DEBUGREGS | ||
521 | #define KVM_CAP_DEBUGREGS 50 | ||
522 | #endif | ||
504 | #define KVM_CAP_X86_ROBUST_SINGLESTEP 51 | 523 | #define KVM_CAP_X86_ROBUST_SINGLESTEP 51 |
524 | #define KVM_CAP_PPC_OSI 52 | ||
525 | #define KVM_CAP_PPC_UNSET_IRQ 53 | ||
526 | #define KVM_CAP_ENABLE_CAP 54 | ||
505 | 527 | ||
506 | #ifdef KVM_CAP_IRQ_ROUTING | 528 | #ifdef KVM_CAP_IRQ_ROUTING |
507 | 529 | ||
@@ -688,6 +710,10 @@ struct kvm_clock_data { | |||
688 | /* Available with KVM_CAP_VCPU_EVENTS */ | 710 | /* Available with KVM_CAP_VCPU_EVENTS */ |
689 | #define KVM_GET_VCPU_EVENTS _IOR(KVMIO, 0x9f, struct kvm_vcpu_events) | 711 | #define KVM_GET_VCPU_EVENTS _IOR(KVMIO, 0x9f, struct kvm_vcpu_events) |
690 | #define KVM_SET_VCPU_EVENTS _IOW(KVMIO, 0xa0, struct kvm_vcpu_events) | 712 | #define KVM_SET_VCPU_EVENTS _IOW(KVMIO, 0xa0, struct kvm_vcpu_events) |
713 | /* Available with KVM_CAP_DEBUGREGS */ | ||
714 | #define KVM_GET_DEBUGREGS _IOR(KVMIO, 0xa1, struct kvm_debugregs) | ||
715 | #define KVM_SET_DEBUGREGS _IOW(KVMIO, 0xa2, struct kvm_debugregs) | ||
716 | #define KVM_ENABLE_CAP _IOW(KVMIO, 0xa3, struct kvm_enable_cap) | ||
691 | 717 | ||
692 | #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) | 718 | #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) |
693 | 719 | ||
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 169d07758ee5..7cb116afa1cd 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -105,6 +105,12 @@ struct kvm_vcpu { | |||
105 | struct kvm_vcpu_arch arch; | 105 | struct kvm_vcpu_arch arch; |
106 | }; | 106 | }; |
107 | 107 | ||
108 | /* | ||
109 | * Some of the bitops functions do not support too long bitmaps. | ||
110 | * This number must be determined not to exceed such limits. | ||
111 | */ | ||
112 | #define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1) | ||
113 | |||
108 | struct kvm_memory_slot { | 114 | struct kvm_memory_slot { |
109 | gfn_t base_gfn; | 115 | gfn_t base_gfn; |
110 | unsigned long npages; | 116 | unsigned long npages; |
@@ -237,17 +243,23 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); | |||
237 | void vcpu_load(struct kvm_vcpu *vcpu); | 243 | void vcpu_load(struct kvm_vcpu *vcpu); |
238 | void vcpu_put(struct kvm_vcpu *vcpu); | 244 | void vcpu_put(struct kvm_vcpu *vcpu); |
239 | 245 | ||
240 | int kvm_init(void *opaque, unsigned int vcpu_size, | 246 | int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, |
241 | struct module *module); | 247 | struct module *module); |
242 | void kvm_exit(void); | 248 | void kvm_exit(void); |
243 | 249 | ||
244 | void kvm_get_kvm(struct kvm *kvm); | 250 | void kvm_get_kvm(struct kvm *kvm); |
245 | void kvm_put_kvm(struct kvm *kvm); | 251 | void kvm_put_kvm(struct kvm *kvm); |
246 | 252 | ||
253 | static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm) | ||
254 | { | ||
255 | return rcu_dereference_check(kvm->memslots, | ||
256 | srcu_read_lock_held(&kvm->srcu) | ||
257 | || lockdep_is_held(&kvm->slots_lock)); | ||
258 | } | ||
259 | |||
247 | #define HPA_MSB ((sizeof(hpa_t) * 8) - 1) | 260 | #define HPA_MSB ((sizeof(hpa_t) * 8) - 1) |
248 | #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB) | 261 | #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB) |
249 | static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } | 262 | static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } |
250 | struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva); | ||
251 | 263 | ||
252 | extern struct page *bad_page; | 264 | extern struct page *bad_page; |
253 | extern pfn_t bad_pfn; | 265 | extern pfn_t bad_pfn; |
diff --git a/include/linux/tboot.h b/include/linux/tboot.h index bf2a0c748878..1dba6ee55203 100644 --- a/include/linux/tboot.h +++ b/include/linux/tboot.h | |||
@@ -150,6 +150,7 @@ extern int tboot_force_iommu(void); | |||
150 | 150 | ||
151 | #else | 151 | #else |
152 | 152 | ||
153 | #define tboot_enabled() 0 | ||
153 | #define tboot_probe() do { } while (0) | 154 | #define tboot_probe() do { } while (0) |
154 | #define tboot_shutdown(shutdown_type) do { } while (0) | 155 | #define tboot_shutdown(shutdown_type) do { } while (0) |
155 | #define tboot_sleep(sleep_state, pm1a_control, pm1b_control) \ | 156 | #define tboot_sleep(sleep_state, pm1a_control, pm1b_control) \ |
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index b17d49dfc3ef..6dd3a51ab1cb 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h | |||
@@ -5,7 +5,6 @@ | |||
5 | 5 | ||
6 | #undef TRACE_SYSTEM | 6 | #undef TRACE_SYSTEM |
7 | #define TRACE_SYSTEM kvm | 7 | #define TRACE_SYSTEM kvm |
8 | #define TRACE_INCLUDE_FILE kvm | ||
9 | 8 | ||
10 | #if defined(__KVM_HAVE_IOAPIC) | 9 | #if defined(__KVM_HAVE_IOAPIC) |
11 | TRACE_EVENT(kvm_set_irq, | 10 | TRACE_EVENT(kvm_set_irq, |
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 02ff2b19dbe2..4d10b1e047f4 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c | |||
@@ -316,12 +316,16 @@ static int assigned_device_enable_host_msix(struct kvm *kvm, | |||
316 | kvm_assigned_dev_intr, 0, | 316 | kvm_assigned_dev_intr, 0, |
317 | "kvm_assigned_msix_device", | 317 | "kvm_assigned_msix_device", |
318 | (void *)dev); | 318 | (void *)dev); |
319 | /* FIXME: free requested_irq's on failure */ | ||
320 | if (r) | 319 | if (r) |
321 | return r; | 320 | goto err; |
322 | } | 321 | } |
323 | 322 | ||
324 | return 0; | 323 | return 0; |
324 | err: | ||
325 | for (i -= 1; i >= 0; i--) | ||
326 | free_irq(dev->host_msix_entries[i].vector, (void *)dev); | ||
327 | pci_disable_msix(dev->dev); | ||
328 | return r; | ||
325 | } | 329 | } |
326 | 330 | ||
327 | #endif | 331 | #endif |
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 36e258029649..53850177163f 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c | |||
@@ -120,8 +120,10 @@ int kvm_coalesced_mmio_init(struct kvm *kvm) | |||
120 | return ret; | 120 | return ret; |
121 | 121 | ||
122 | out_free_dev: | 122 | out_free_dev: |
123 | kvm->coalesced_mmio_dev = NULL; | ||
123 | kfree(dev); | 124 | kfree(dev); |
124 | out_free_page: | 125 | out_free_page: |
126 | kvm->coalesced_mmio_ring = NULL; | ||
125 | __free_page(page); | 127 | __free_page(page); |
126 | out_err: | 128 | out_err: |
127 | return ret; | 129 | return ret; |
@@ -139,7 +141,7 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, | |||
139 | struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; | 141 | struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; |
140 | 142 | ||
141 | if (dev == NULL) | 143 | if (dev == NULL) |
142 | return -EINVAL; | 144 | return -ENXIO; |
143 | 145 | ||
144 | mutex_lock(&kvm->slots_lock); | 146 | mutex_lock(&kvm->slots_lock); |
145 | if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { | 147 | if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { |
@@ -162,7 +164,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, | |||
162 | struct kvm_coalesced_mmio_zone *z; | 164 | struct kvm_coalesced_mmio_zone *z; |
163 | 165 | ||
164 | if (dev == NULL) | 166 | if (dev == NULL) |
165 | return -EINVAL; | 167 | return -ENXIO; |
166 | 168 | ||
167 | mutex_lock(&kvm->slots_lock); | 169 | mutex_lock(&kvm->slots_lock); |
168 | 170 | ||
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 11692b9e8830..d2f06be63354 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c | |||
@@ -127,7 +127,7 @@ static int kvm_iommu_map_memslots(struct kvm *kvm) | |||
127 | int i, r = 0; | 127 | int i, r = 0; |
128 | struct kvm_memslots *slots; | 128 | struct kvm_memslots *slots; |
129 | 129 | ||
130 | slots = rcu_dereference(kvm->memslots); | 130 | slots = kvm_memslots(kvm); |
131 | 131 | ||
132 | for (i = 0; i < slots->nmemslots; i++) { | 132 | for (i = 0; i < slots->nmemslots; i++) { |
133 | r = kvm_iommu_map_pages(kvm, &slots->memslots[i]); | 133 | r = kvm_iommu_map_pages(kvm, &slots->memslots[i]); |
@@ -286,7 +286,7 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm) | |||
286 | int i; | 286 | int i; |
287 | struct kvm_memslots *slots; | 287 | struct kvm_memslots *slots; |
288 | 288 | ||
289 | slots = rcu_dereference(kvm->memslots); | 289 | slots = kvm_memslots(kvm); |
290 | 290 | ||
291 | for (i = 0; i < slots->nmemslots; i++) { | 291 | for (i = 0; i < slots->nmemslots; i++) { |
292 | kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn, | 292 | kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn, |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c82ae2492634..f032806a212f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -422,9 +422,6 @@ static struct kvm *kvm_create_vm(void) | |||
422 | spin_lock(&kvm_lock); | 422 | spin_lock(&kvm_lock); |
423 | list_add(&kvm->vm_list, &vm_list); | 423 | list_add(&kvm->vm_list, &vm_list); |
424 | spin_unlock(&kvm_lock); | 424 | spin_unlock(&kvm_lock); |
425 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | ||
426 | kvm_coalesced_mmio_init(kvm); | ||
427 | #endif | ||
428 | out: | 425 | out: |
429 | return kvm; | 426 | return kvm; |
430 | 427 | ||
@@ -560,6 +557,10 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
560 | base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; | 557 | base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; |
561 | npages = mem->memory_size >> PAGE_SHIFT; | 558 | npages = mem->memory_size >> PAGE_SHIFT; |
562 | 559 | ||
560 | r = -EINVAL; | ||
561 | if (npages > KVM_MEM_MAX_NR_PAGES) | ||
562 | goto out; | ||
563 | |||
563 | if (!npages) | 564 | if (!npages) |
564 | mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; | 565 | mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; |
565 | 566 | ||
@@ -833,7 +834,7 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva); | |||
833 | struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) | 834 | struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) |
834 | { | 835 | { |
835 | int i; | 836 | int i; |
836 | struct kvm_memslots *slots = rcu_dereference(kvm->memslots); | 837 | struct kvm_memslots *slots = kvm_memslots(kvm); |
837 | 838 | ||
838 | for (i = 0; i < slots->nmemslots; ++i) { | 839 | for (i = 0; i < slots->nmemslots; ++i) { |
839 | struct kvm_memory_slot *memslot = &slots->memslots[i]; | 840 | struct kvm_memory_slot *memslot = &slots->memslots[i]; |
@@ -855,7 +856,7 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) | |||
855 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) | 856 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) |
856 | { | 857 | { |
857 | int i; | 858 | int i; |
858 | struct kvm_memslots *slots = rcu_dereference(kvm->memslots); | 859 | struct kvm_memslots *slots = kvm_memslots(kvm); |
859 | 860 | ||
860 | gfn = unalias_gfn_instantiation(kvm, gfn); | 861 | gfn = unalias_gfn_instantiation(kvm, gfn); |
861 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { | 862 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { |
@@ -899,7 +900,7 @@ out: | |||
899 | int memslot_id(struct kvm *kvm, gfn_t gfn) | 900 | int memslot_id(struct kvm *kvm, gfn_t gfn) |
900 | { | 901 | { |
901 | int i; | 902 | int i; |
902 | struct kvm_memslots *slots = rcu_dereference(kvm->memslots); | 903 | struct kvm_memslots *slots = kvm_memslots(kvm); |
903 | struct kvm_memory_slot *memslot = NULL; | 904 | struct kvm_memory_slot *memslot = NULL; |
904 | 905 | ||
905 | gfn = unalias_gfn(kvm, gfn); | 906 | gfn = unalias_gfn(kvm, gfn); |
@@ -914,6 +915,11 @@ int memslot_id(struct kvm *kvm, gfn_t gfn) | |||
914 | return memslot - slots->memslots; | 915 | return memslot - slots->memslots; |
915 | } | 916 | } |
916 | 917 | ||
918 | static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn) | ||
919 | { | ||
920 | return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE; | ||
921 | } | ||
922 | |||
917 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) | 923 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) |
918 | { | 924 | { |
919 | struct kvm_memory_slot *slot; | 925 | struct kvm_memory_slot *slot; |
@@ -922,7 +928,7 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) | |||
922 | slot = gfn_to_memslot_unaliased(kvm, gfn); | 928 | slot = gfn_to_memslot_unaliased(kvm, gfn); |
923 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID) | 929 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID) |
924 | return bad_hva(); | 930 | return bad_hva(); |
925 | return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); | 931 | return gfn_to_hva_memslot(slot, gfn); |
926 | } | 932 | } |
927 | EXPORT_SYMBOL_GPL(gfn_to_hva); | 933 | EXPORT_SYMBOL_GPL(gfn_to_hva); |
928 | 934 | ||
@@ -972,11 +978,6 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) | |||
972 | } | 978 | } |
973 | EXPORT_SYMBOL_GPL(gfn_to_pfn); | 979 | EXPORT_SYMBOL_GPL(gfn_to_pfn); |
974 | 980 | ||
975 | static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn) | ||
976 | { | ||
977 | return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); | ||
978 | } | ||
979 | |||
980 | pfn_t gfn_to_pfn_memslot(struct kvm *kvm, | 981 | pfn_t gfn_to_pfn_memslot(struct kvm *kvm, |
981 | struct kvm_memory_slot *slot, gfn_t gfn) | 982 | struct kvm_memory_slot *slot, gfn_t gfn) |
982 | { | 983 | { |
@@ -1190,13 +1191,8 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) | |||
1190 | memslot = gfn_to_memslot_unaliased(kvm, gfn); | 1191 | memslot = gfn_to_memslot_unaliased(kvm, gfn); |
1191 | if (memslot && memslot->dirty_bitmap) { | 1192 | if (memslot && memslot->dirty_bitmap) { |
1192 | unsigned long rel_gfn = gfn - memslot->base_gfn; | 1193 | unsigned long rel_gfn = gfn - memslot->base_gfn; |
1193 | unsigned long *p = memslot->dirty_bitmap + | ||
1194 | rel_gfn / BITS_PER_LONG; | ||
1195 | int offset = rel_gfn % BITS_PER_LONG; | ||
1196 | 1194 | ||
1197 | /* avoid RMW */ | 1195 | generic___set_le_bit(rel_gfn, memslot->dirty_bitmap); |
1198 | if (!generic_test_le_bit(offset, p)) | ||
1199 | generic___set_le_bit(offset, p); | ||
1200 | } | 1196 | } |
1201 | } | 1197 | } |
1202 | 1198 | ||
@@ -1609,7 +1605,6 @@ static long kvm_vm_ioctl(struct file *filp, | |||
1609 | r = -EFAULT; | 1605 | r = -EFAULT; |
1610 | if (copy_from_user(&zone, argp, sizeof zone)) | 1606 | if (copy_from_user(&zone, argp, sizeof zone)) |
1611 | goto out; | 1607 | goto out; |
1612 | r = -ENXIO; | ||
1613 | r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); | 1608 | r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); |
1614 | if (r) | 1609 | if (r) |
1615 | goto out; | 1610 | goto out; |
@@ -1621,7 +1616,6 @@ static long kvm_vm_ioctl(struct file *filp, | |||
1621 | r = -EFAULT; | 1616 | r = -EFAULT; |
1622 | if (copy_from_user(&zone, argp, sizeof zone)) | 1617 | if (copy_from_user(&zone, argp, sizeof zone)) |
1623 | goto out; | 1618 | goto out; |
1624 | r = -ENXIO; | ||
1625 | r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); | 1619 | r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); |
1626 | if (r) | 1620 | if (r) |
1627 | goto out; | 1621 | goto out; |
@@ -1755,12 +1749,19 @@ static struct file_operations kvm_vm_fops = { | |||
1755 | 1749 | ||
1756 | static int kvm_dev_ioctl_create_vm(void) | 1750 | static int kvm_dev_ioctl_create_vm(void) |
1757 | { | 1751 | { |
1758 | int fd; | 1752 | int fd, r; |
1759 | struct kvm *kvm; | 1753 | struct kvm *kvm; |
1760 | 1754 | ||
1761 | kvm = kvm_create_vm(); | 1755 | kvm = kvm_create_vm(); |
1762 | if (IS_ERR(kvm)) | 1756 | if (IS_ERR(kvm)) |
1763 | return PTR_ERR(kvm); | 1757 | return PTR_ERR(kvm); |
1758 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | ||
1759 | r = kvm_coalesced_mmio_init(kvm); | ||
1760 | if (r < 0) { | ||
1761 | kvm_put_kvm(kvm); | ||
1762 | return r; | ||
1763 | } | ||
1764 | #endif | ||
1764 | fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); | 1765 | fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); |
1765 | if (fd < 0) | 1766 | if (fd < 0) |
1766 | kvm_put_kvm(kvm); | 1767 | kvm_put_kvm(kvm); |
@@ -1928,11 +1929,6 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, | |||
1928 | cpu); | 1929 | cpu); |
1929 | hardware_disable(NULL); | 1930 | hardware_disable(NULL); |
1930 | break; | 1931 | break; |
1931 | case CPU_UP_CANCELED: | ||
1932 | printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", | ||
1933 | cpu); | ||
1934 | smp_call_function_single(cpu, hardware_disable, NULL, 1); | ||
1935 | break; | ||
1936 | case CPU_ONLINE: | 1932 | case CPU_ONLINE: |
1937 | printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", | 1933 | printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", |
1938 | cpu); | 1934 | cpu); |
@@ -1991,7 +1987,9 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |||
1991 | int len, const void *val) | 1987 | int len, const void *val) |
1992 | { | 1988 | { |
1993 | int i; | 1989 | int i; |
1994 | struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]); | 1990 | struct kvm_io_bus *bus; |
1991 | |||
1992 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); | ||
1995 | for (i = 0; i < bus->dev_count; i++) | 1993 | for (i = 0; i < bus->dev_count; i++) |
1996 | if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) | 1994 | if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) |
1997 | return 0; | 1995 | return 0; |
@@ -2003,8 +2001,9 @@ int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |||
2003 | int len, void *val) | 2001 | int len, void *val) |
2004 | { | 2002 | { |
2005 | int i; | 2003 | int i; |
2006 | struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]); | 2004 | struct kvm_io_bus *bus; |
2007 | 2005 | ||
2006 | bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); | ||
2008 | for (i = 0; i < bus->dev_count; i++) | 2007 | for (i = 0; i < bus->dev_count; i++) |
2009 | if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) | 2008 | if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) |
2010 | return 0; | 2009 | return 0; |
@@ -2179,7 +2178,7 @@ static void kvm_sched_out(struct preempt_notifier *pn, | |||
2179 | kvm_arch_vcpu_put(vcpu); | 2178 | kvm_arch_vcpu_put(vcpu); |
2180 | } | 2179 | } |
2181 | 2180 | ||
2182 | int kvm_init(void *opaque, unsigned int vcpu_size, | 2181 | int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, |
2183 | struct module *module) | 2182 | struct module *module) |
2184 | { | 2183 | { |
2185 | int r; | 2184 | int r; |
@@ -2229,8 +2228,9 @@ int kvm_init(void *opaque, unsigned int vcpu_size, | |||
2229 | goto out_free_4; | 2228 | goto out_free_4; |
2230 | 2229 | ||
2231 | /* A kmem cache lets us meet the alignment requirements of fx_save. */ | 2230 | /* A kmem cache lets us meet the alignment requirements of fx_save. */ |
2232 | kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, | 2231 | if (!vcpu_align) |
2233 | __alignof__(struct kvm_vcpu), | 2232 | vcpu_align = __alignof__(struct kvm_vcpu); |
2233 | kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align, | ||
2234 | 0, NULL); | 2234 | 0, NULL); |
2235 | if (!kvm_vcpu_cache) { | 2235 | if (!kvm_vcpu_cache) { |
2236 | r = -ENOMEM; | 2236 | r = -ENOMEM; |
@@ -2279,7 +2279,6 @@ EXPORT_SYMBOL_GPL(kvm_init); | |||
2279 | 2279 | ||
2280 | void kvm_exit(void) | 2280 | void kvm_exit(void) |
2281 | { | 2281 | { |
2282 | tracepoint_synchronize_unregister(); | ||
2283 | kvm_exit_debug(); | 2282 | kvm_exit_debug(); |
2284 | misc_deregister(&kvm_dev); | 2283 | misc_deregister(&kvm_dev); |
2285 | kmem_cache_destroy(kvm_vcpu_cache); | 2284 | kmem_cache_destroy(kvm_vcpu_cache); |