aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/kvm/api.txt208
-rw-r--r--Documentation/kvm/cpuid.txt42
-rw-r--r--Documentation/kvm/mmu.txt304
-rw-r--r--arch/ia64/kvm/kvm-ia64.c8
-rw-r--r--arch/ia64/kvm/vmm.c2
-rw-r--r--arch/powerpc/include/asm/asm-compat.h2
-rw-r--r--arch/powerpc/include/asm/kvm.h10
-rw-r--r--arch/powerpc/include/asm/kvm_asm.h2
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h157
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_32.h42
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h28
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_asm.h (renamed from arch/powerpc/include/asm/kvm_book3s_64_asm.h)25
-rw-r--r--arch/powerpc/include/asm/kvm_booke.h96
-rw-r--r--arch/powerpc/include/asm/kvm_fpu.h85
-rw-r--r--arch/powerpc/include/asm/kvm_host.h38
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h97
-rw-r--r--arch/powerpc/include/asm/mmu_context.h2
-rw-r--r--arch/powerpc/include/asm/paca.h10
-rw-r--r--arch/powerpc/include/asm/processor.h3
-rw-r--r--arch/powerpc/include/asm/reg.h10
-rw-r--r--arch/powerpc/kernel/asm-offsets.c102
-rw-r--r--arch/powerpc/kernel/head_32.S14
-rw-r--r--arch/powerpc/kernel/head_64.S4
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c4
-rw-r--r--arch/powerpc/kvm/44x.c2
-rw-r--r--arch/powerpc/kvm/Kconfig24
-rw-r--r--arch/powerpc/kvm/Makefile20
-rw-r--r--arch/powerpc/kvm/book3s.c503
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu.c54
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu_host.c483
-rw-r--r--arch/powerpc/kvm/book3s_32_sr.S143
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c36
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_host.c102
-rw-r--r--arch/powerpc/kvm/book3s_64_slb.S183
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c (renamed from arch/powerpc/kvm/book3s_64_emulate.c)245
-rw-r--r--arch/powerpc/kvm/book3s_exports.c (renamed from arch/powerpc/kvm/book3s_64_exports.c)0
-rw-r--r--arch/powerpc/kvm/book3s_interrupts.S (renamed from arch/powerpc/kvm/book3s_64_interrupts.S)204
-rw-r--r--arch/powerpc/kvm/book3s_paired_singles.c1289
-rw-r--r--arch/powerpc/kvm/book3s_rmhandlers.S (renamed from arch/powerpc/kvm/book3s_64_rmhandlers.S)135
-rw-r--r--arch/powerpc/kvm/book3s_segment.S259
-rw-r--r--arch/powerpc/kvm/booke.c21
-rw-r--r--arch/powerpc/kvm/e500.c2
-rw-r--r--arch/powerpc/kvm/emulate.c55
-rw-r--r--arch/powerpc/kvm/fpu.S273
-rw-r--r--arch/powerpc/kvm/powerpc.c110
-rw-r--r--arch/powerpc/mm/mmu_context_hash32.c29
-rw-r--r--arch/s390/kvm/kvm-s390.c6
-rw-r--r--arch/s390/kvm/kvm-s390.h2
-rw-r--r--arch/x86/include/asm/kvm.h17
-rw-r--r--arch/x86/include/asm/kvm_emulate.h46
-rw-r--r--arch/x86/include/asm/kvm_host.h80
-rw-r--r--arch/x86/include/asm/kvm_para.h13
-rw-r--r--arch/x86/include/asm/msr-index.h5
-rw-r--r--arch/x86/include/asm/pvclock-abi.h4
-rw-r--r--arch/x86/include/asm/pvclock.h1
-rw-r--r--arch/x86/include/asm/svm.h9
-rw-r--r--arch/x86/include/asm/vmx.h12
-rw-r--r--arch/x86/kernel/kvmclock.c56
-rw-r--r--arch/x86/kernel/pvclock.c37
-rw-r--r--arch/x86/kernel/tboot.c1
-rw-r--r--arch/x86/kvm/emulate.c1247
-rw-r--r--arch/x86/kvm/i8259.c53
-rw-r--r--arch/x86/kvm/irq.h1
-rw-r--r--arch/x86/kvm/kvm_timer.h4
-rw-r--r--arch/x86/kvm/mmu.c225
-rw-r--r--arch/x86/kvm/mmutrace.h84
-rw-r--r--arch/x86/kvm/paging_tmpl.h46
-rw-r--r--arch/x86/kvm/svm.c944
-rw-r--r--arch/x86/kvm/timer.c3
-rw-r--r--arch/x86/kvm/trace.h165
-rw-r--r--arch/x86/kvm/vmx.c378
-rw-r--r--arch/x86/kvm/x86.c1599
-rw-r--r--arch/x86/kvm/x86.h7
-rw-r--r--include/linux/kvm.h26
-rw-r--r--include/linux/kvm_host.h16
-rw-r--r--include/linux/tboot.h1
-rw-r--r--include/trace/events/kvm.h1
-rw-r--r--virt/kvm/assigned-dev.c8
-rw-r--r--virt/kvm/coalesced_mmio.c6
-rw-r--r--virt/kvm/iommu.c4
-rw-r--r--virt/kvm/kvm_main.c63
81 files changed, 7826 insertions, 2811 deletions
diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
index c6416a398163..a237518e51b9 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -656,6 +656,7 @@ struct kvm_clock_data {
6564.29 KVM_GET_VCPU_EVENTS 6564.29 KVM_GET_VCPU_EVENTS
657 657
658Capability: KVM_CAP_VCPU_EVENTS 658Capability: KVM_CAP_VCPU_EVENTS
659Extended by: KVM_CAP_INTR_SHADOW
659Architectures: x86 660Architectures: x86
660Type: vm ioctl 661Type: vm ioctl
661Parameters: struct kvm_vcpu_event (out) 662Parameters: struct kvm_vcpu_event (out)
@@ -676,7 +677,7 @@ struct kvm_vcpu_events {
676 __u8 injected; 677 __u8 injected;
677 __u8 nr; 678 __u8 nr;
678 __u8 soft; 679 __u8 soft;
679 __u8 pad; 680 __u8 shadow;
680 } interrupt; 681 } interrupt;
681 struct { 682 struct {
682 __u8 injected; 683 __u8 injected;
@@ -688,9 +689,13 @@ struct kvm_vcpu_events {
688 __u32 flags; 689 __u32 flags;
689}; 690};
690 691
692KVM_VCPUEVENT_VALID_SHADOW may be set in the flags field to signal that
693interrupt.shadow contains a valid state. Otherwise, this field is undefined.
694
6914.30 KVM_SET_VCPU_EVENTS 6954.30 KVM_SET_VCPU_EVENTS
692 696
693Capability: KVM_CAP_VCPU_EVENTS 697Capability: KVM_CAP_VCPU_EVENTS
698Extended by: KVM_CAP_INTR_SHADOW
694Architectures: x86 699Architectures: x86
695Type: vm ioctl 700Type: vm ioctl
696Parameters: struct kvm_vcpu_event (in) 701Parameters: struct kvm_vcpu_event (in)
@@ -709,6 +714,183 @@ current in-kernel state. The bits are:
709KVM_VCPUEVENT_VALID_NMI_PENDING - transfer nmi.pending to the kernel 714KVM_VCPUEVENT_VALID_NMI_PENDING - transfer nmi.pending to the kernel
710KVM_VCPUEVENT_VALID_SIPI_VECTOR - transfer sipi_vector 715KVM_VCPUEVENT_VALID_SIPI_VECTOR - transfer sipi_vector
711 716
717If KVM_CAP_INTR_SHADOW is available, KVM_VCPUEVENT_VALID_SHADOW can be set in
718the flags field to signal that interrupt.shadow contains a valid state and
719shall be written into the VCPU.
720
7214.32 KVM_GET_DEBUGREGS
722
723Capability: KVM_CAP_DEBUGREGS
724Architectures: x86
725Type: vm ioctl
726Parameters: struct kvm_debugregs (out)
727Returns: 0 on success, -1 on error
728
729Reads debug registers from the vcpu.
730
731struct kvm_debugregs {
732 __u64 db[4];
733 __u64 dr6;
734 __u64 dr7;
735 __u64 flags;
736 __u64 reserved[9];
737};
738
7394.33 KVM_SET_DEBUGREGS
740
741Capability: KVM_CAP_DEBUGREGS
742Architectures: x86
743Type: vm ioctl
744Parameters: struct kvm_debugregs (in)
745Returns: 0 on success, -1 on error
746
747Writes debug registers into the vcpu.
748
749See KVM_GET_DEBUGREGS for the data structure. The flags field is unused
750yet and must be cleared on entry.
751
7524.34 KVM_SET_USER_MEMORY_REGION
753
754Capability: KVM_CAP_USER_MEM
755Architectures: all
756Type: vm ioctl
757Parameters: struct kvm_userspace_memory_region (in)
758Returns: 0 on success, -1 on error
759
760struct kvm_userspace_memory_region {
761 __u32 slot;
762 __u32 flags;
763 __u64 guest_phys_addr;
764 __u64 memory_size; /* bytes */
765 __u64 userspace_addr; /* start of the userspace allocated memory */
766};
767
768/* for kvm_memory_region::flags */
769#define KVM_MEM_LOG_DIRTY_PAGES 1UL
770
771This ioctl allows the user to create or modify a guest physical memory
772slot. When changing an existing slot, it may be moved in the guest
773physical memory space, or its flags may be modified. It may not be
774resized. Slots may not overlap in guest physical address space.
775
776Memory for the region is taken starting at the address denoted by the
777field userspace_addr, which must point at user addressable memory for
778the entire memory slot size. Any object may back this memory, including
779anonymous memory, ordinary files, and hugetlbfs.
780
781It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr
782be identical. This allows large pages in the guest to be backed by large
783pages in the host.
784
785The flags field supports just one flag, KVM_MEM_LOG_DIRTY_PAGES, which
786instructs kvm to keep track of writes to memory within the slot. See
787the KVM_GET_DIRTY_LOG ioctl.
788
789When the KVM_CAP_SYNC_MMU capability, changes in the backing of the memory
790region are automatically reflected into the guest. For example, an mmap()
791that affects the region will be made visible immediately. Another example
792is madvise(MADV_DROP).
793
794It is recommended to use this API instead of the KVM_SET_MEMORY_REGION ioctl.
795The KVM_SET_MEMORY_REGION does not allow fine grained control over memory
796allocation and is deprecated.
797
7984.35 KVM_SET_TSS_ADDR
799
800Capability: KVM_CAP_SET_TSS_ADDR
801Architectures: x86
802Type: vm ioctl
803Parameters: unsigned long tss_address (in)
804Returns: 0 on success, -1 on error
805
806This ioctl defines the physical address of a three-page region in the guest
807physical address space. The region must be within the first 4GB of the
808guest physical address space and must not conflict with any memory slot
809or any mmio address. The guest may malfunction if it accesses this memory
810region.
811
812This ioctl is required on Intel-based hosts. This is needed on Intel hardware
813because of a quirk in the virtualization implementation (see the internals
814documentation when it pops into existence).
815
8164.36 KVM_ENABLE_CAP
817
818Capability: KVM_CAP_ENABLE_CAP
819Architectures: ppc
820Type: vcpu ioctl
821Parameters: struct kvm_enable_cap (in)
822Returns: 0 on success; -1 on error
823
824+Not all extensions are enabled by default. Using this ioctl the application
825can enable an extension, making it available to the guest.
826
827On systems that do not support this ioctl, it always fails. On systems that
828do support it, it only works for extensions that are supported for enablement.
829
830To check if a capability can be enabled, the KVM_CHECK_EXTENSION ioctl should
831be used.
832
833struct kvm_enable_cap {
834 /* in */
835 __u32 cap;
836
837The capability that is supposed to get enabled.
838
839 __u32 flags;
840
841A bitfield indicating future enhancements. Has to be 0 for now.
842
843 __u64 args[4];
844
845Arguments for enabling a feature. If a feature needs initial values to
846function properly, this is the place to put them.
847
848 __u8 pad[64];
849};
850
8514.37 KVM_GET_MP_STATE
852
853Capability: KVM_CAP_MP_STATE
854Architectures: x86, ia64
855Type: vcpu ioctl
856Parameters: struct kvm_mp_state (out)
857Returns: 0 on success; -1 on error
858
859struct kvm_mp_state {
860 __u32 mp_state;
861};
862
863Returns the vcpu's current "multiprocessing state" (though also valid on
864uniprocessor guests).
865
866Possible values are:
867
868 - KVM_MP_STATE_RUNNABLE: the vcpu is currently running
869 - KVM_MP_STATE_UNINITIALIZED: the vcpu is an application processor (AP)
870 which has not yet received an INIT signal
871 - KVM_MP_STATE_INIT_RECEIVED: the vcpu has received an INIT signal, and is
872 now ready for a SIPI
873 - KVM_MP_STATE_HALTED: the vcpu has executed a HLT instruction and
874 is waiting for an interrupt
875 - KVM_MP_STATE_SIPI_RECEIVED: the vcpu has just received a SIPI (vector
876 accesible via KVM_GET_VCPU_EVENTS)
877
878This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel
879irqchip, the multiprocessing state must be maintained by userspace.
880
8814.38 KVM_SET_MP_STATE
882
883Capability: KVM_CAP_MP_STATE
884Architectures: x86, ia64
885Type: vcpu ioctl
886Parameters: struct kvm_mp_state (in)
887Returns: 0 on success; -1 on error
888
889Sets the vcpu's current "multiprocessing state"; see KVM_GET_MP_STATE for
890arguments.
891
892This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel
893irqchip, the multiprocessing state must be maintained by userspace.
712 894
7135. The kvm_run structure 8955. The kvm_run structure
714 896
@@ -820,6 +1002,13 @@ executed a memory-mapped I/O instruction which could not be satisfied
820by kvm. The 'data' member contains the written data if 'is_write' is 1002by kvm. The 'data' member contains the written data if 'is_write' is
821true, and should be filled by application code otherwise. 1003true, and should be filled by application code otherwise.
822 1004
1005NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO and KVM_EXIT_OSI, the corresponding
1006operations are complete (and guest state is consistent) only after userspace
1007has re-entered the kernel with KVM_RUN. The kernel side will first finish
1008incomplete operations and then check for pending signals. Userspace
1009can re-enter the guest with an unmasked signal pending to complete
1010pending operations.
1011
823 /* KVM_EXIT_HYPERCALL */ 1012 /* KVM_EXIT_HYPERCALL */
824 struct { 1013 struct {
825 __u64 nr; 1014 __u64 nr;
@@ -829,7 +1018,9 @@ true, and should be filled by application code otherwise.
829 __u32 pad; 1018 __u32 pad;
830 } hypercall; 1019 } hypercall;
831 1020
832Unused. 1021Unused. This was once used for 'hypercall to userspace'. To implement
1022such functionality, use KVM_EXIT_IO (x86) or KVM_EXIT_MMIO (all except s390).
1023Note KVM_EXIT_IO is significantly faster than KVM_EXIT_MMIO.
833 1024
834 /* KVM_EXIT_TPR_ACCESS */ 1025 /* KVM_EXIT_TPR_ACCESS */
835 struct { 1026 struct {
@@ -870,6 +1061,19 @@ s390 specific.
870 1061
871powerpc specific. 1062powerpc specific.
872 1063
1064 /* KVM_EXIT_OSI */
1065 struct {
1066 __u64 gprs[32];
1067 } osi;
1068
1069MOL uses a special hypercall interface it calls 'OSI'. To enable it, we catch
1070hypercalls and exit with this exit struct that contains all the guest gprs.
1071
1072If exit_reason is KVM_EXIT_OSI, then the vcpu has triggered such a hypercall.
1073Userspace can now handle the hypercall and when it's done modify the gprs as
1074necessary. Upon guest entry all guest GPRs will then be replaced by the values
1075in this struct.
1076
873 /* Fix the size of the union. */ 1077 /* Fix the size of the union. */
874 char padding[256]; 1078 char padding[256];
875 }; 1079 };
diff --git a/Documentation/kvm/cpuid.txt b/Documentation/kvm/cpuid.txt
new file mode 100644
index 000000000000..14a12ea92b7f
--- /dev/null
+++ b/Documentation/kvm/cpuid.txt
@@ -0,0 +1,42 @@
1KVM CPUID bits
2Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010
3=====================================================
4
5A guest running on a kvm host, can check some of its features using
6cpuid. This is not always guaranteed to work, since userspace can
7mask-out some, or even all KVM-related cpuid features before launching
8a guest.
9
10KVM cpuid functions are:
11
12function: KVM_CPUID_SIGNATURE (0x40000000)
13returns : eax = 0,
14 ebx = 0x4b4d564b,
15 ecx = 0x564b4d56,
16 edx = 0x4d.
17Note that this value in ebx, ecx and edx corresponds to the string "KVMKVMKVM".
18This function queries the presence of KVM cpuid leafs.
19
20
21function: define KVM_CPUID_FEATURES (0x40000001)
22returns : ebx, ecx, edx = 0
23 eax = and OR'ed group of (1 << flag), where each flags is:
24
25
26flag || value || meaning
27=============================================================================
28KVM_FEATURE_CLOCKSOURCE || 0 || kvmclock available at msrs
29 || || 0x11 and 0x12.
30------------------------------------------------------------------------------
31KVM_FEATURE_NOP_IO_DELAY || 1 || not necessary to perform delays
32 || || on PIO operations.
33------------------------------------------------------------------------------
34KVM_FEATURE_MMU_OP || 2 || deprecated.
35------------------------------------------------------------------------------
36KVM_FEATURE_CLOCKSOURCE2 || 3 || kvmclock available at msrs
37 || || 0x4b564d00 and 0x4b564d01
38------------------------------------------------------------------------------
39KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side
40 || || per-cpu warps are expected in
41 || || kvmclock.
42------------------------------------------------------------------------------
diff --git a/Documentation/kvm/mmu.txt b/Documentation/kvm/mmu.txt
new file mode 100644
index 000000000000..aaed6ab9d7ab
--- /dev/null
+++ b/Documentation/kvm/mmu.txt
@@ -0,0 +1,304 @@
1The x86 kvm shadow mmu
2======================
3
4The mmu (in arch/x86/kvm, files mmu.[ch] and paging_tmpl.h) is responsible
5for presenting a standard x86 mmu to the guest, while translating guest
6physical addresses to host physical addresses.
7
8The mmu code attempts to satisfy the following requirements:
9
10- correctness: the guest should not be able to determine that it is running
11 on an emulated mmu except for timing (we attempt to comply
12 with the specification, not emulate the characteristics of
13 a particular implementation such as tlb size)
14- security: the guest must not be able to touch host memory not assigned
15 to it
16- performance: minimize the performance penalty imposed by the mmu
17- scaling: need to scale to large memory and large vcpu guests
18- hardware: support the full range of x86 virtualization hardware
19- integration: Linux memory management code must be in control of guest memory
20 so that swapping, page migration, page merging, transparent
21 hugepages, and similar features work without change
22- dirty tracking: report writes to guest memory to enable live migration
23 and framebuffer-based displays
24- footprint: keep the amount of pinned kernel memory low (most memory
25 should be shrinkable)
26- reliablity: avoid multipage or GFP_ATOMIC allocations
27
28Acronyms
29========
30
31pfn host page frame number
32hpa host physical address
33hva host virtual address
34gfn guest frame number
35gpa guest physical address
36gva guest virtual address
37ngpa nested guest physical address
38ngva nested guest virtual address
39pte page table entry (used also to refer generically to paging structure
40 entries)
41gpte guest pte (referring to gfns)
42spte shadow pte (referring to pfns)
43tdp two dimensional paging (vendor neutral term for NPT and EPT)
44
45Virtual and real hardware supported
46===================================
47
48The mmu supports first-generation mmu hardware, which allows an atomic switch
49of the current paging mode and cr3 during guest entry, as well as
50two-dimensional paging (AMD's NPT and Intel's EPT). The emulated hardware
51it exposes is the traditional 2/3/4 level x86 mmu, with support for global
52pages, pae, pse, pse36, cr0.wp, and 1GB pages. Work is in progress to support
53exposing NPT capable hardware on NPT capable hosts.
54
55Translation
56===========
57
58The primary job of the mmu is to program the processor's mmu to translate
59addresses for the guest. Different translations are required at different
60times:
61
62- when guest paging is disabled, we translate guest physical addresses to
63 host physical addresses (gpa->hpa)
64- when guest paging is enabled, we translate guest virtual addresses, to
65 guest physical addresses, to host physical addresses (gva->gpa->hpa)
66- when the guest launches a guest of its own, we translate nested guest
67 virtual addresses, to nested guest physical addresses, to guest physical
68 addresses, to host physical addresses (ngva->ngpa->gpa->hpa)
69
70The primary challenge is to encode between 1 and 3 translations into hardware
71that support only 1 (traditional) and 2 (tdp) translations. When the
72number of required translations matches the hardware, the mmu operates in
73direct mode; otherwise it operates in shadow mode (see below).
74
75Memory
76======
77
78Guest memory (gpa) is part of the user address space of the process that is
79using kvm. Userspace defines the translation between guest addresses and user
80addresses (gpa->hva); note that two gpas may alias to the same gva, but not
81vice versa.
82
83These gvas may be backed using any method available to the host: anonymous
84memory, file backed memory, and device memory. Memory might be paged by the
85host at any time.
86
87Events
88======
89
90The mmu is driven by events, some from the guest, some from the host.
91
92Guest generated events:
93- writes to control registers (especially cr3)
94- invlpg/invlpga instruction execution
95- access to missing or protected translations
96
97Host generated events:
98- changes in the gpa->hpa translation (either through gpa->hva changes or
99 through hva->hpa changes)
100- memory pressure (the shrinker)
101
102Shadow pages
103============
104
105The principal data structure is the shadow page, 'struct kvm_mmu_page'. A
106shadow page contains 512 sptes, which can be either leaf or nonleaf sptes. A
107shadow page may contain a mix of leaf and nonleaf sptes.
108
109A nonleaf spte allows the hardware mmu to reach the leaf pages and
110is not related to a translation directly. It points to other shadow pages.
111
112A leaf spte corresponds to either one or two translations encoded into
113one paging structure entry. These are always the lowest level of the
114translation stack, with optional higher level translations left to NPT/EPT.
115Leaf ptes point at guest pages.
116
117The following table shows translations encoded by leaf ptes, with higher-level
118translations in parentheses:
119
120 Non-nested guests:
121 nonpaging: gpa->hpa
122 paging: gva->gpa->hpa
123 paging, tdp: (gva->)gpa->hpa
124 Nested guests:
125 non-tdp: ngva->gpa->hpa (*)
126 tdp: (ngva->)ngpa->gpa->hpa
127
128(*) the guest hypervisor will encode the ngva->gpa translation into its page
129 tables if npt is not present
130
131Shadow pages contain the following information:
132 role.level:
133 The level in the shadow paging hierarchy that this shadow page belongs to.
134 1=4k sptes, 2=2M sptes, 3=1G sptes, etc.
135 role.direct:
136 If set, leaf sptes reachable from this page are for a linear range.
137 Examples include real mode translation, large guest pages backed by small
138 host pages, and gpa->hpa translations when NPT or EPT is active.
139 The linear range starts at (gfn << PAGE_SHIFT) and its size is determined
140 by role.level (2MB for first level, 1GB for second level, 0.5TB for third
141 level, 256TB for fourth level)
142 If clear, this page corresponds to a guest page table denoted by the gfn
143 field.
144 role.quadrant:
145 When role.cr4_pae=0, the guest uses 32-bit gptes while the host uses 64-bit
146 sptes. That means a guest page table contains more ptes than the host,
147 so multiple shadow pages are needed to shadow one guest page.
148 For first-level shadow pages, role.quadrant can be 0 or 1 and denotes the
149 first or second 512-gpte block in the guest page table. For second-level
150 page tables, each 32-bit gpte is converted to two 64-bit sptes
151 (since each first-level guest page is shadowed by two first-level
152 shadow pages) so role.quadrant takes values in the range 0..3. Each
153 quadrant maps 1GB virtual address space.
154 role.access:
155 Inherited guest access permissions in the form uwx. Note execute
156 permission is positive, not negative.
157 role.invalid:
158 The page is invalid and should not be used. It is a root page that is
159 currently pinned (by a cpu hardware register pointing to it); once it is
160 unpinned it will be destroyed.
161 role.cr4_pae:
162 Contains the value of cr4.pae for which the page is valid (e.g. whether
163 32-bit or 64-bit gptes are in use).
164 role.cr4_nxe:
165 Contains the value of efer.nxe for which the page is valid.
166 role.cr0_wp:
167 Contains the value of cr0.wp for which the page is valid.
168 gfn:
169 Either the guest page table containing the translations shadowed by this
170 page, or the base page frame for linear translations. See role.direct.
171 spt:
172 A pageful of 64-bit sptes containing the translations for this page.
173 Accessed by both kvm and hardware.
174 The page pointed to by spt will have its page->private pointing back
175 at the shadow page structure.
176 sptes in spt point either at guest pages, or at lower-level shadow pages.
177 Specifically, if sp1 and sp2 are shadow pages, then sp1->spt[n] may point
178 at __pa(sp2->spt). sp2 will point back at sp1 through parent_pte.
179 The spt array forms a DAG structure with the shadow page as a node, and
180 guest pages as leaves.
181 gfns:
182 An array of 512 guest frame numbers, one for each present pte. Used to
183 perform a reverse map from a pte to a gfn.
184 slot_bitmap:
185 A bitmap containing one bit per memory slot. If the page contains a pte
186 mapping a page from memory slot n, then bit n of slot_bitmap will be set
187 (if a page is aliased among several slots, then it is not guaranteed that
188 all slots will be marked).
189 Used during dirty logging to avoid scanning a shadow page if none if its
190 pages need tracking.
191 root_count:
192 A counter keeping track of how many hardware registers (guest cr3 or
193 pdptrs) are now pointing at the page. While this counter is nonzero, the
194 page cannot be destroyed. See role.invalid.
195 multimapped:
196 Whether there exist multiple sptes pointing at this page.
197 parent_pte/parent_ptes:
198 If multimapped is zero, parent_pte points at the single spte that points at
199 this page's spt. Otherwise, parent_ptes points at a data structure
200 with a list of parent_ptes.
201 unsync:
202 If true, then the translations in this page may not match the guest's
203 translation. This is equivalent to the state of the tlb when a pte is
204 changed but before the tlb entry is flushed. Accordingly, unsync ptes
205 are synchronized when the guest executes invlpg or flushes its tlb by
206 other means. Valid for leaf pages.
207 unsync_children:
208 How many sptes in the page point at pages that are unsync (or have
209 unsynchronized children).
210 unsync_child_bitmap:
211 A bitmap indicating which sptes in spt point (directly or indirectly) at
212 pages that may be unsynchronized. Used to quickly locate all unsychronized
213 pages reachable from a given page.
214
215Reverse map
216===========
217
218The mmu maintains a reverse mapping whereby all ptes mapping a page can be
219reached given its gfn. This is used, for example, when swapping out a page.
220
221Synchronized and unsynchronized pages
222=====================================
223
224The guest uses two events to synchronize its tlb and page tables: tlb flushes
225and page invalidations (invlpg).
226
227A tlb flush means that we need to synchronize all sptes reachable from the
228guest's cr3. This is expensive, so we keep all guest page tables write
229protected, and synchronize sptes to gptes when a gpte is written.
230
231A special case is when a guest page table is reachable from the current
232guest cr3. In this case, the guest is obliged to issue an invlpg instruction
233before using the translation. We take advantage of that by removing write
234protection from the guest page, and allowing the guest to modify it freely.
235We synchronize modified gptes when the guest invokes invlpg. This reduces
236the amount of emulation we have to do when the guest modifies multiple gptes,
237or when the a guest page is no longer used as a page table and is used for
238random guest data.
239
240As a side effect we have to resynchronize all reachable unsynchronized shadow
241pages on a tlb flush.
242
243
244Reaction to events
245==================
246
247- guest page fault (or npt page fault, or ept violation)
248
249This is the most complicated event. The cause of a page fault can be:
250
251 - a true guest fault (the guest translation won't allow the access) (*)
252 - access to a missing translation
253 - access to a protected translation
254 - when logging dirty pages, memory is write protected
255 - synchronized shadow pages are write protected (*)
256 - access to untranslatable memory (mmio)
257
258 (*) not applicable in direct mode
259
260Handling a page fault is performed as follows:
261
262 - if needed, walk the guest page tables to determine the guest translation
263 (gva->gpa or ngpa->gpa)
264 - if permissions are insufficient, reflect the fault back to the guest
265 - determine the host page
266 - if this is an mmio request, there is no host page; call the emulator
267 to emulate the instruction instead
268 - walk the shadow page table to find the spte for the translation,
269 instantiating missing intermediate page tables as necessary
270 - try to unsynchronize the page
271 - if successful, we can let the guest continue and modify the gpte
272 - emulate the instruction
273 - if failed, unshadow the page and let the guest continue
274 - update any translations that were modified by the instruction
275
276invlpg handling:
277
278 - walk the shadow page hierarchy and drop affected translations
279 - try to reinstantiate the indicated translation in the hope that the
280 guest will use it in the near future
281
282Guest control register updates:
283
284- mov to cr3
285 - look up new shadow roots
286 - synchronize newly reachable shadow pages
287
288- mov to cr0/cr4/efer
289 - set up mmu context for new paging mode
290 - look up new shadow roots
291 - synchronize newly reachable shadow pages
292
293Host translation updates:
294
295 - mmu notifier called with updated hva
296 - look up affected sptes through reverse map
297 - drop (or update) translations
298
299Further reading
300===============
301
302- NPT presentation from KVM Forum 2008
303 http://www.linux-kvm.org/wiki/images/c/c8/KvmForum2008%24kdf2008_21.pdf
304
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 7f3c0a2e60cd..d5f4e9161201 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -979,11 +979,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
979 r = -EFAULT; 979 r = -EFAULT;
980 if (copy_from_user(&irq_event, argp, sizeof irq_event)) 980 if (copy_from_user(&irq_event, argp, sizeof irq_event))
981 goto out; 981 goto out;
982 r = -ENXIO;
982 if (irqchip_in_kernel(kvm)) { 983 if (irqchip_in_kernel(kvm)) {
983 __s32 status; 984 __s32 status;
984 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 985 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
985 irq_event.irq, irq_event.level); 986 irq_event.irq, irq_event.level);
986 if (ioctl == KVM_IRQ_LINE_STATUS) { 987 if (ioctl == KVM_IRQ_LINE_STATUS) {
988 r = -EFAULT;
987 irq_event.status = status; 989 irq_event.status = status;
988 if (copy_to_user(argp, &irq_event, 990 if (copy_to_user(argp, &irq_event,
989 sizeof irq_event)) 991 sizeof irq_event))
@@ -1379,7 +1381,7 @@ static void kvm_release_vm_pages(struct kvm *kvm)
1379 int i, j; 1381 int i, j;
1380 unsigned long base_gfn; 1382 unsigned long base_gfn;
1381 1383
1382 slots = rcu_dereference(kvm->memslots); 1384 slots = kvm_memslots(kvm);
1383 for (i = 0; i < slots->nmemslots; i++) { 1385 for (i = 0; i < slots->nmemslots; i++) {
1384 memslot = &slots->memslots[i]; 1386 memslot = &slots->memslots[i];
1385 base_gfn = memslot->base_gfn; 1387 base_gfn = memslot->base_gfn;
@@ -1535,8 +1537,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
1535 goto out; 1537 goto out;
1536 1538
1537 if (copy_to_user(user_stack, stack, 1539 if (copy_to_user(user_stack, stack,
1538 sizeof(struct kvm_ia64_vcpu_stack))) 1540 sizeof(struct kvm_ia64_vcpu_stack))) {
1541 r = -EFAULT;
1539 goto out; 1542 goto out;
1543 }
1540 1544
1541 break; 1545 break;
1542 } 1546 }
diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c
index 7a62f75778c5..f0b9cac82414 100644
--- a/arch/ia64/kvm/vmm.c
+++ b/arch/ia64/kvm/vmm.c
@@ -51,7 +51,7 @@ static int __init kvm_vmm_init(void)
51 vmm_fpswa_interface = fpswa_interface; 51 vmm_fpswa_interface = fpswa_interface;
52 52
53 /*Register vmm data to kvm side*/ 53 /*Register vmm data to kvm side*/
54 return kvm_init(&vmm_info, 1024, THIS_MODULE); 54 return kvm_init(&vmm_info, 1024, 0, THIS_MODULE);
55} 55}
56 56
57static void __exit kvm_vmm_exit(void) 57static void __exit kvm_vmm_exit(void)
diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h
index a9b91ed3d4b9..2048a6aeea91 100644
--- a/arch/powerpc/include/asm/asm-compat.h
+++ b/arch/powerpc/include/asm/asm-compat.h
@@ -21,6 +21,7 @@
21/* operations for longs and pointers */ 21/* operations for longs and pointers */
22#define PPC_LL stringify_in_c(ld) 22#define PPC_LL stringify_in_c(ld)
23#define PPC_STL stringify_in_c(std) 23#define PPC_STL stringify_in_c(std)
24#define PPC_STLU stringify_in_c(stdu)
24#define PPC_LCMPI stringify_in_c(cmpdi) 25#define PPC_LCMPI stringify_in_c(cmpdi)
25#define PPC_LONG stringify_in_c(.llong) 26#define PPC_LONG stringify_in_c(.llong)
26#define PPC_LONG_ALIGN stringify_in_c(.balign 8) 27#define PPC_LONG_ALIGN stringify_in_c(.balign 8)
@@ -44,6 +45,7 @@
44/* operations for longs and pointers */ 45/* operations for longs and pointers */
45#define PPC_LL stringify_in_c(lwz) 46#define PPC_LL stringify_in_c(lwz)
46#define PPC_STL stringify_in_c(stw) 47#define PPC_STL stringify_in_c(stw)
48#define PPC_STLU stringify_in_c(stwu)
47#define PPC_LCMPI stringify_in_c(cmpwi) 49#define PPC_LCMPI stringify_in_c(cmpwi)
48#define PPC_LONG stringify_in_c(.long) 50#define PPC_LONG stringify_in_c(.long)
49#define PPC_LONG_ALIGN stringify_in_c(.balign 4) 51#define PPC_LONG_ALIGN stringify_in_c(.balign 4)
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index 81f3b0b5601e..6c5547d82bbe 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -77,4 +77,14 @@ struct kvm_debug_exit_arch {
77struct kvm_guest_debug_arch { 77struct kvm_guest_debug_arch {
78}; 78};
79 79
80#define KVM_REG_MASK 0x001f
81#define KVM_REG_EXT_MASK 0xffe0
82#define KVM_REG_GPR 0x0000
83#define KVM_REG_FPR 0x0020
84#define KVM_REG_QPR 0x0040
85#define KVM_REG_FQPR 0x0060
86
87#define KVM_INTERRUPT_SET -1U
88#define KVM_INTERRUPT_UNSET -2U
89
80#endif /* __LINUX_KVM_POWERPC_H */ 90#endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index aadf2dd6f84e..c5ea4cda34b3 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -88,6 +88,8 @@
88 88
89#define BOOK3S_HFLAG_DCBZ32 0x1 89#define BOOK3S_HFLAG_DCBZ32 0x1
90#define BOOK3S_HFLAG_SLB 0x2 90#define BOOK3S_HFLAG_SLB 0x2
91#define BOOK3S_HFLAG_PAIRED_SINGLE 0x4
92#define BOOK3S_HFLAG_NATIVE_PS 0x8
91 93
92#define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */ 94#define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */
93#define RESUME_FLAG_HOST (1<<1) /* Resume host? */ 95#define RESUME_FLAG_HOST (1<<1) /* Resume host? */
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index db7db0a96967..6f74d93725a0 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -22,46 +22,47 @@
22 22
23#include <linux/types.h> 23#include <linux/types.h>
24#include <linux/kvm_host.h> 24#include <linux/kvm_host.h>
25#include <asm/kvm_book3s_64_asm.h> 25#include <asm/kvm_book3s_asm.h>
26 26
27struct kvmppc_slb { 27struct kvmppc_slb {
28 u64 esid; 28 u64 esid;
29 u64 vsid; 29 u64 vsid;
30 u64 orige; 30 u64 orige;
31 u64 origv; 31 u64 origv;
32 bool valid; 32 bool valid : 1;
33 bool Ks; 33 bool Ks : 1;
34 bool Kp; 34 bool Kp : 1;
35 bool nx; 35 bool nx : 1;
36 bool large; /* PTEs are 16MB */ 36 bool large : 1; /* PTEs are 16MB */
37 bool tb; /* 1TB segment */ 37 bool tb : 1; /* 1TB segment */
38 bool class; 38 bool class : 1;
39}; 39};
40 40
41struct kvmppc_sr { 41struct kvmppc_sr {
42 u32 raw; 42 u32 raw;
43 u32 vsid; 43 u32 vsid;
44 bool Ks; 44 bool Ks : 1;
45 bool Kp; 45 bool Kp : 1;
46 bool nx; 46 bool nx : 1;
47 bool valid : 1;
47}; 48};
48 49
49struct kvmppc_bat { 50struct kvmppc_bat {
50 u64 raw; 51 u64 raw;
51 u32 bepi; 52 u32 bepi;
52 u32 bepi_mask; 53 u32 bepi_mask;
53 bool vs;
54 bool vp;
55 u32 brpn; 54 u32 brpn;
56 u8 wimg; 55 u8 wimg;
57 u8 pp; 56 u8 pp;
57 bool vs : 1;
58 bool vp : 1;
58}; 59};
59 60
60struct kvmppc_sid_map { 61struct kvmppc_sid_map {
61 u64 guest_vsid; 62 u64 guest_vsid;
62 u64 guest_esid; 63 u64 guest_esid;
63 u64 host_vsid; 64 u64 host_vsid;
64 bool valid; 65 bool valid : 1;
65}; 66};
66 67
67#define SID_MAP_BITS 9 68#define SID_MAP_BITS 9
@@ -70,7 +71,7 @@ struct kvmppc_sid_map {
70 71
71struct kvmppc_vcpu_book3s { 72struct kvmppc_vcpu_book3s {
72 struct kvm_vcpu vcpu; 73 struct kvm_vcpu vcpu;
73 struct kvmppc_book3s_shadow_vcpu shadow_vcpu; 74 struct kvmppc_book3s_shadow_vcpu *shadow_vcpu;
74 struct kvmppc_sid_map sid_map[SID_MAP_NUM]; 75 struct kvmppc_sid_map sid_map[SID_MAP_NUM];
75 struct kvmppc_slb slb[64]; 76 struct kvmppc_slb slb[64];
76 struct { 77 struct {
@@ -82,9 +83,10 @@ struct kvmppc_vcpu_book3s {
82 struct kvmppc_bat ibat[8]; 83 struct kvmppc_bat ibat[8];
83 struct kvmppc_bat dbat[8]; 84 struct kvmppc_bat dbat[8];
84 u64 hid[6]; 85 u64 hid[6];
86 u64 gqr[8];
85 int slb_nr; 87 int slb_nr;
88 u32 dsisr;
86 u64 sdr1; 89 u64 sdr1;
87 u64 dsisr;
88 u64 hior; 90 u64 hior;
89 u64 msr_mask; 91 u64 msr_mask;
90 u64 vsid_first; 92 u64 vsid_first;
@@ -98,15 +100,15 @@ struct kvmppc_vcpu_book3s {
98#define CONTEXT_GUEST 1 100#define CONTEXT_GUEST 1
99#define CONTEXT_GUEST_END 2 101#define CONTEXT_GUEST_END 2
100 102
101#define VSID_REAL 0xfffffffffff00000 103#define VSID_REAL 0x1fffffffffc00000ULL
102#define VSID_REAL_DR 0xffffffffffe00000 104#define VSID_BAT 0x1fffffffffb00000ULL
103#define VSID_REAL_IR 0xffffffffffd00000 105#define VSID_REAL_DR 0x2000000000000000ULL
104#define VSID_BAT 0xffffffffffc00000 106#define VSID_REAL_IR 0x4000000000000000ULL
105#define VSID_PR 0x8000000000000000 107#define VSID_PR 0x8000000000000000ULL
106 108
107extern void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, u64 ea, u64 ea_mask); 109extern void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong ea, ulong ea_mask);
108extern void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 vp, u64 vp_mask); 110extern void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 vp, u64 vp_mask);
109extern void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, u64 pa_start, u64 pa_end); 111extern void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end);
110extern void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 new_msr); 112extern void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 new_msr);
111extern void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu); 113extern void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu);
112extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu); 114extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu);
@@ -114,11 +116,13 @@ extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte);
114extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr); 116extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr);
115extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); 117extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu);
116extern struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool data); 118extern struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool data);
117extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr, bool data); 119extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
118extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr); 120extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
119extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); 121extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec);
120extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, 122extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
121 bool upper, u32 val); 123 bool upper, u32 val);
124extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
125extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu);
122 126
123extern u32 kvmppc_trampoline_lowmem; 127extern u32 kvmppc_trampoline_lowmem;
124extern u32 kvmppc_trampoline_enter; 128extern u32 kvmppc_trampoline_enter;
@@ -126,6 +130,8 @@ extern void kvmppc_rmcall(ulong srr0, ulong srr1);
126extern void kvmppc_load_up_fpu(void); 130extern void kvmppc_load_up_fpu(void);
127extern void kvmppc_load_up_altivec(void); 131extern void kvmppc_load_up_altivec(void);
128extern void kvmppc_load_up_vsx(void); 132extern void kvmppc_load_up_vsx(void);
133extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst);
134extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst);
129 135
130static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) 136static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
131{ 137{
@@ -140,7 +146,108 @@ static inline ulong dsisr(void)
140} 146}
141 147
142extern void kvm_return_point(void); 148extern void kvm_return_point(void);
149static inline struct kvmppc_book3s_shadow_vcpu *to_svcpu(struct kvm_vcpu *vcpu);
150
151static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
152{
153 if ( num < 14 ) {
154 to_svcpu(vcpu)->gpr[num] = val;
155 to_book3s(vcpu)->shadow_vcpu->gpr[num] = val;
156 } else
157 vcpu->arch.gpr[num] = val;
158}
159
160static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
161{
162 if ( num < 14 )
163 return to_svcpu(vcpu)->gpr[num];
164 else
165 return vcpu->arch.gpr[num];
166}
167
168static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
169{
170 to_svcpu(vcpu)->cr = val;
171 to_book3s(vcpu)->shadow_vcpu->cr = val;
172}
173
174static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
175{
176 return to_svcpu(vcpu)->cr;
177}
178
179static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
180{
181 to_svcpu(vcpu)->xer = val;
182 to_book3s(vcpu)->shadow_vcpu->xer = val;
183}
184
185static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
186{
187 return to_svcpu(vcpu)->xer;
188}
189
190static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val)
191{
192 to_svcpu(vcpu)->ctr = val;
193}
194
195static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu)
196{
197 return to_svcpu(vcpu)->ctr;
198}
199
200static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val)
201{
202 to_svcpu(vcpu)->lr = val;
203}
204
205static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu)
206{
207 return to_svcpu(vcpu)->lr;
208}
209
210static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val)
211{
212 to_svcpu(vcpu)->pc = val;
213}
214
215static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu)
216{
217 return to_svcpu(vcpu)->pc;
218}
219
220static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
221{
222 ulong pc = kvmppc_get_pc(vcpu);
223 struct kvmppc_book3s_shadow_vcpu *svcpu = to_svcpu(vcpu);
224
225 /* Load the instruction manually if it failed to do so in the
226 * exit path */
227 if (svcpu->last_inst == KVM_INST_FETCH_FAILED)
228 kvmppc_ld(vcpu, &pc, sizeof(u32), &svcpu->last_inst, false);
229
230 return svcpu->last_inst;
231}
232
233static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
234{
235 return to_svcpu(vcpu)->fault_dar;
236}
237
238/* Magic register values loaded into r3 and r4 before the 'sc' assembly
239 * instruction for the OSI hypercalls */
240#define OSI_SC_MAGIC_R3 0x113724FA
241#define OSI_SC_MAGIC_R4 0x77810F9B
143 242
144#define INS_DCBZ 0x7c0007ec 243#define INS_DCBZ 0x7c0007ec
145 244
245/* Also add subarch specific defines */
246
247#ifdef CONFIG_PPC_BOOK3S_32
248#include <asm/kvm_book3s_32.h>
249#else
250#include <asm/kvm_book3s_64.h>
251#endif
252
146#endif /* __ASM_KVM_BOOK3S_H__ */ 253#endif /* __ASM_KVM_BOOK3S_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s_32.h b/arch/powerpc/include/asm/kvm_book3s_32.h
new file mode 100644
index 000000000000..de604db135f5
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_book3s_32.h
@@ -0,0 +1,42 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright SUSE Linux Products GmbH 2010
16 *
17 * Authors: Alexander Graf <agraf@suse.de>
18 */
19
20#ifndef __ASM_KVM_BOOK3S_32_H__
21#define __ASM_KVM_BOOK3S_32_H__
22
23static inline struct kvmppc_book3s_shadow_vcpu *to_svcpu(struct kvm_vcpu *vcpu)
24{
25 return to_book3s(vcpu)->shadow_vcpu;
26}
27
28#define PTE_SIZE 12
29#define VSID_ALL 0
30#define SR_INVALID 0x00000001 /* VSID 1 should always be unused */
31#define SR_KP 0x20000000
32#define PTE_V 0x80000000
33#define PTE_SEC 0x00000040
34#define PTE_M 0x00000010
35#define PTE_R 0x00000100
36#define PTE_C 0x00000080
37
38#define SID_SHIFT 28
39#define ESID_MASK 0xf0000000
40#define VSID_MASK 0x00fffffff0000000ULL
41
42#endif /* __ASM_KVM_BOOK3S_32_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
new file mode 100644
index 000000000000..4cadd612d575
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -0,0 +1,28 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright SUSE Linux Products GmbH 2010
16 *
17 * Authors: Alexander Graf <agraf@suse.de>
18 */
19
20#ifndef __ASM_KVM_BOOK3S_64_H__
21#define __ASM_KVM_BOOK3S_64_H__
22
23static inline struct kvmppc_book3s_shadow_vcpu *to_svcpu(struct kvm_vcpu *vcpu)
24{
25 return &get_paca()->shadow_vcpu;
26}
27
28#endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s_64_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 183461b48407..36fdb3aff30b 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -22,7 +22,7 @@
22 22
23#ifdef __ASSEMBLY__ 23#ifdef __ASSEMBLY__
24 24
25#ifdef CONFIG_KVM_BOOK3S_64_HANDLER 25#ifdef CONFIG_KVM_BOOK3S_HANDLER
26 26
27#include <asm/kvm_asm.h> 27#include <asm/kvm_asm.h>
28 28
@@ -55,7 +55,7 @@ kvmppc_resume_\intno:
55.macro DO_KVM intno 55.macro DO_KVM intno
56.endm 56.endm
57 57
58#endif /* CONFIG_KVM_BOOK3S_64_HANDLER */ 58#endif /* CONFIG_KVM_BOOK3S_HANDLER */
59 59
60#else /*__ASSEMBLY__ */ 60#else /*__ASSEMBLY__ */
61 61
@@ -63,12 +63,33 @@ struct kvmppc_book3s_shadow_vcpu {
63 ulong gpr[14]; 63 ulong gpr[14];
64 u32 cr; 64 u32 cr;
65 u32 xer; 65 u32 xer;
66
67 u32 fault_dsisr;
68 u32 last_inst;
69 ulong ctr;
70 ulong lr;
71 ulong pc;
72 ulong shadow_srr1;
73 ulong fault_dar;
74
66 ulong host_r1; 75 ulong host_r1;
67 ulong host_r2; 76 ulong host_r2;
68 ulong handler; 77 ulong handler;
69 ulong scratch0; 78 ulong scratch0;
70 ulong scratch1; 79 ulong scratch1;
71 ulong vmhandler; 80 ulong vmhandler;
81 u8 in_guest;
82
83#ifdef CONFIG_PPC_BOOK3S_32
84 u32 sr[16]; /* Guest SRs */
85#endif
86#ifdef CONFIG_PPC_BOOK3S_64
87 u8 slb_max; /* highest used guest slb entry */
88 struct {
89 u64 esid;
90 u64 vsid;
91 } slb[64]; /* guest SLB */
92#endif
72}; 93};
73 94
74#endif /*__ASSEMBLY__ */ 95#endif /*__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
new file mode 100644
index 000000000000..9c9ba3d59b1b
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -0,0 +1,96 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright SUSE Linux Products GmbH 2010
16 *
17 * Authors: Alexander Graf <agraf@suse.de>
18 */
19
20#ifndef __ASM_KVM_BOOKE_H__
21#define __ASM_KVM_BOOKE_H__
22
23#include <linux/types.h>
24#include <linux/kvm_host.h>
25
26static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
27{
28 vcpu->arch.gpr[num] = val;
29}
30
31static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
32{
33 return vcpu->arch.gpr[num];
34}
35
36static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
37{
38 vcpu->arch.cr = val;
39}
40
41static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
42{
43 return vcpu->arch.cr;
44}
45
46static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
47{
48 vcpu->arch.xer = val;
49}
50
51static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
52{
53 return vcpu->arch.xer;
54}
55
56static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
57{
58 return vcpu->arch.last_inst;
59}
60
61static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val)
62{
63 vcpu->arch.ctr = val;
64}
65
66static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu)
67{
68 return vcpu->arch.ctr;
69}
70
71static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val)
72{
73 vcpu->arch.lr = val;
74}
75
76static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu)
77{
78 return vcpu->arch.lr;
79}
80
81static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val)
82{
83 vcpu->arch.pc = val;
84}
85
86static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu)
87{
88 return vcpu->arch.pc;
89}
90
91static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
92{
93 return vcpu->arch.fault_dear;
94}
95
96#endif /* __ASM_KVM_BOOKE_H__ */
diff --git a/arch/powerpc/include/asm/kvm_fpu.h b/arch/powerpc/include/asm/kvm_fpu.h
new file mode 100644
index 000000000000..94f05de9ad04
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_fpu.h
@@ -0,0 +1,85 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright Novell Inc. 2010
16 *
17 * Authors: Alexander Graf <agraf@suse.de>
18 */
19
20#ifndef __ASM_KVM_FPU_H__
21#define __ASM_KVM_FPU_H__
22
23#include <linux/types.h>
24
25extern void fps_fres(struct thread_struct *t, u32 *dst, u32 *src1);
26extern void fps_frsqrte(struct thread_struct *t, u32 *dst, u32 *src1);
27extern void fps_fsqrts(struct thread_struct *t, u32 *dst, u32 *src1);
28
29extern void fps_fadds(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2);
30extern void fps_fdivs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2);
31extern void fps_fmuls(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2);
32extern void fps_fsubs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2);
33
34extern void fps_fmadds(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2,
35 u32 *src3);
36extern void fps_fmsubs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2,
37 u32 *src3);
38extern void fps_fnmadds(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2,
39 u32 *src3);
40extern void fps_fnmsubs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2,
41 u32 *src3);
42extern void fps_fsel(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2,
43 u32 *src3);
44
45#define FPD_ONE_IN(name) extern void fpd_ ## name(u64 *fpscr, u32 *cr, \
46 u64 *dst, u64 *src1);
47#define FPD_TWO_IN(name) extern void fpd_ ## name(u64 *fpscr, u32 *cr, \
48 u64 *dst, u64 *src1, u64 *src2);
49#define FPD_THREE_IN(name) extern void fpd_ ## name(u64 *fpscr, u32 *cr, \
50 u64 *dst, u64 *src1, u64 *src2, u64 *src3);
51
52extern void fpd_fcmpu(u64 *fpscr, u32 *cr, u64 *src1, u64 *src2);
53extern void fpd_fcmpo(u64 *fpscr, u32 *cr, u64 *src1, u64 *src2);
54
55FPD_ONE_IN(fsqrts)
56FPD_ONE_IN(frsqrtes)
57FPD_ONE_IN(fres)
58FPD_ONE_IN(frsp)
59FPD_ONE_IN(fctiw)
60FPD_ONE_IN(fctiwz)
61FPD_ONE_IN(fsqrt)
62FPD_ONE_IN(fre)
63FPD_ONE_IN(frsqrte)
64FPD_ONE_IN(fneg)
65FPD_ONE_IN(fabs)
66FPD_TWO_IN(fadds)
67FPD_TWO_IN(fsubs)
68FPD_TWO_IN(fdivs)
69FPD_TWO_IN(fmuls)
70FPD_TWO_IN(fcpsgn)
71FPD_TWO_IN(fdiv)
72FPD_TWO_IN(fadd)
73FPD_TWO_IN(fmul)
74FPD_TWO_IN(fsub)
75FPD_THREE_IN(fmsubs)
76FPD_THREE_IN(fmadds)
77FPD_THREE_IN(fnmsubs)
78FPD_THREE_IN(fnmadds)
79FPD_THREE_IN(fsel)
80FPD_THREE_IN(fmsub)
81FPD_THREE_IN(fmadd)
82FPD_THREE_IN(fnmsub)
83FPD_THREE_IN(fnmadd)
84
85#endif
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 5e5bae7e152f..0c9ad869decd 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -66,7 +66,7 @@ struct kvm_vcpu_stat {
66 u32 dec_exits; 66 u32 dec_exits;
67 u32 ext_intr_exits; 67 u32 ext_intr_exits;
68 u32 halt_wakeup; 68 u32 halt_wakeup;
69#ifdef CONFIG_PPC64 69#ifdef CONFIG_PPC_BOOK3S
70 u32 pf_storage; 70 u32 pf_storage;
71 u32 pf_instruc; 71 u32 pf_instruc;
72 u32 sp_storage; 72 u32 sp_storage;
@@ -124,12 +124,12 @@ struct kvm_arch {
124}; 124};
125 125
126struct kvmppc_pte { 126struct kvmppc_pte {
127 u64 eaddr; 127 ulong eaddr;
128 u64 vpage; 128 u64 vpage;
129 u64 raddr; 129 ulong raddr;
130 bool may_read; 130 bool may_read : 1;
131 bool may_write; 131 bool may_write : 1;
132 bool may_execute; 132 bool may_execute : 1;
133}; 133};
134 134
135struct kvmppc_mmu { 135struct kvmppc_mmu {
@@ -145,7 +145,7 @@ struct kvmppc_mmu {
145 int (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *pte, bool data); 145 int (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *pte, bool data);
146 void (*reset_msr)(struct kvm_vcpu *vcpu); 146 void (*reset_msr)(struct kvm_vcpu *vcpu);
147 void (*tlbie)(struct kvm_vcpu *vcpu, ulong addr, bool large); 147 void (*tlbie)(struct kvm_vcpu *vcpu, ulong addr, bool large);
148 int (*esid_to_vsid)(struct kvm_vcpu *vcpu, u64 esid, u64 *vsid); 148 int (*esid_to_vsid)(struct kvm_vcpu *vcpu, ulong esid, u64 *vsid);
149 u64 (*ea_to_vp)(struct kvm_vcpu *vcpu, gva_t eaddr, bool data); 149 u64 (*ea_to_vp)(struct kvm_vcpu *vcpu, gva_t eaddr, bool data);
150 bool (*is_dcbz32)(struct kvm_vcpu *vcpu); 150 bool (*is_dcbz32)(struct kvm_vcpu *vcpu);
151}; 151};
@@ -160,7 +160,7 @@ struct hpte_cache {
160struct kvm_vcpu_arch { 160struct kvm_vcpu_arch {
161 ulong host_stack; 161 ulong host_stack;
162 u32 host_pid; 162 u32 host_pid;
163#ifdef CONFIG_PPC64 163#ifdef CONFIG_PPC_BOOK3S
164 ulong host_msr; 164 ulong host_msr;
165 ulong host_r2; 165 ulong host_r2;
166 void *host_retip; 166 void *host_retip;
@@ -175,7 +175,7 @@ struct kvm_vcpu_arch {
175 ulong gpr[32]; 175 ulong gpr[32];
176 176
177 u64 fpr[32]; 177 u64 fpr[32];
178 u32 fpscr; 178 u64 fpscr;
179 179
180#ifdef CONFIG_ALTIVEC 180#ifdef CONFIG_ALTIVEC
181 vector128 vr[32]; 181 vector128 vr[32];
@@ -186,19 +186,23 @@ struct kvm_vcpu_arch {
186 u64 vsr[32]; 186 u64 vsr[32];
187#endif 187#endif
188 188
189#ifdef CONFIG_PPC_BOOK3S
190 /* For Gekko paired singles */
191 u32 qpr[32];
192#endif
193
194#ifdef CONFIG_BOOKE
189 ulong pc; 195 ulong pc;
190 ulong ctr; 196 ulong ctr;
191 ulong lr; 197 ulong lr;
192 198
193#ifdef CONFIG_BOOKE
194 ulong xer; 199 ulong xer;
195 u32 cr; 200 u32 cr;
196#endif 201#endif
197 202
198 ulong msr; 203 ulong msr;
199#ifdef CONFIG_PPC64 204#ifdef CONFIG_PPC_BOOK3S
200 ulong shadow_msr; 205 ulong shadow_msr;
201 ulong shadow_srr1;
202 ulong hflags; 206 ulong hflags;
203 ulong guest_owned_ext; 207 ulong guest_owned_ext;
204#endif 208#endif
@@ -253,20 +257,22 @@ struct kvm_vcpu_arch {
253 struct dentry *debugfs_exit_timing; 257 struct dentry *debugfs_exit_timing;
254#endif 258#endif
255 259
260#ifdef CONFIG_BOOKE
256 u32 last_inst; 261 u32 last_inst;
257#ifdef CONFIG_PPC64
258 ulong fault_dsisr;
259#endif
260 ulong fault_dear; 262 ulong fault_dear;
261 ulong fault_esr; 263 ulong fault_esr;
262 ulong queued_dear; 264 ulong queued_dear;
263 ulong queued_esr; 265 ulong queued_esr;
266#endif
264 gpa_t paddr_accessed; 267 gpa_t paddr_accessed;
265 268
266 u8 io_gpr; /* GPR used as IO source/target */ 269 u8 io_gpr; /* GPR used as IO source/target */
267 u8 mmio_is_bigendian; 270 u8 mmio_is_bigendian;
271 u8 mmio_sign_extend;
268 u8 dcr_needed; 272 u8 dcr_needed;
269 u8 dcr_is_write; 273 u8 dcr_is_write;
274 u8 osi_needed;
275 u8 osi_enabled;
270 276
271 u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ 277 u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
272 278
@@ -275,7 +281,7 @@ struct kvm_vcpu_arch {
275 u64 dec_jiffies; 281 u64 dec_jiffies;
276 unsigned long pending_exceptions; 282 unsigned long pending_exceptions;
277 283
278#ifdef CONFIG_PPC64 284#ifdef CONFIG_PPC_BOOK3S
279 struct hpte_cache hpte_cache[HPTEG_CACHE_NUM]; 285 struct hpte_cache hpte_cache[HPTEG_CACHE_NUM];
280 int hpte_cache_offset; 286 int hpte_cache_offset;
281#endif 287#endif
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index e2642829e435..18d139ec2d22 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -30,6 +30,8 @@
30#include <linux/kvm_host.h> 30#include <linux/kvm_host.h>
31#ifdef CONFIG_PPC_BOOK3S 31#ifdef CONFIG_PPC_BOOK3S
32#include <asm/kvm_book3s.h> 32#include <asm/kvm_book3s.h>
33#else
34#include <asm/kvm_booke.h>
33#endif 35#endif
34 36
35enum emulation_result { 37enum emulation_result {
@@ -37,6 +39,7 @@ enum emulation_result {
37 EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */ 39 EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */
38 EMULATE_DO_DCR, /* kvm_run filled with DCR request */ 40 EMULATE_DO_DCR, /* kvm_run filled with DCR request */
39 EMULATE_FAIL, /* can't emulate this instruction */ 41 EMULATE_FAIL, /* can't emulate this instruction */
42 EMULATE_AGAIN, /* something went wrong. go again */
40}; 43};
41 44
42extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); 45extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
@@ -48,8 +51,11 @@ extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu);
48extern int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, 51extern int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
49 unsigned int rt, unsigned int bytes, 52 unsigned int rt, unsigned int bytes,
50 int is_bigendian); 53 int is_bigendian);
54extern int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
55 unsigned int rt, unsigned int bytes,
56 int is_bigendian);
51extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, 57extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
52 u32 val, unsigned int bytes, int is_bigendian); 58 u64 val, unsigned int bytes, int is_bigendian);
53 59
54extern int kvmppc_emulate_instruction(struct kvm_run *run, 60extern int kvmppc_emulate_instruction(struct kvm_run *run,
55 struct kvm_vcpu *vcpu); 61 struct kvm_vcpu *vcpu);
@@ -63,6 +69,7 @@ extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
63extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); 69extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
64extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid); 70extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
65extern void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu); 71extern void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu);
72extern int kvmppc_mmu_init(struct kvm_vcpu *vcpu);
66extern int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); 73extern int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
67extern int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); 74extern int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
68extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index, 75extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index,
@@ -88,6 +95,8 @@ extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
88extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu); 95extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu);
89extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, 96extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
90 struct kvm_interrupt *irq); 97 struct kvm_interrupt *irq);
98extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
99 struct kvm_interrupt *irq);
91 100
92extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 101extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
93 unsigned int op, int *advance); 102 unsigned int op, int *advance);
@@ -99,81 +108,37 @@ extern void kvmppc_booke_exit(void);
99 108
100extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); 109extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
101 110
102#ifdef CONFIG_PPC_BOOK3S 111/*
103 112 * Cuts out inst bits with ordering according to spec.
104/* We assume we're always acting on the current vcpu */ 113 * That means the leftmost bit is zero. All given bits are included.
105 114 */
106static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) 115static inline u32 kvmppc_get_field(u64 inst, int msb, int lsb)
107{
108 if ( num < 14 ) {
109 get_paca()->shadow_vcpu.gpr[num] = val;
110 to_book3s(vcpu)->shadow_vcpu.gpr[num] = val;
111 } else
112 vcpu->arch.gpr[num] = val;
113}
114
115static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
116{
117 if ( num < 14 )
118 return get_paca()->shadow_vcpu.gpr[num];
119 else
120 return vcpu->arch.gpr[num];
121}
122
123static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
124{
125 get_paca()->shadow_vcpu.cr = val;
126 to_book3s(vcpu)->shadow_vcpu.cr = val;
127}
128
129static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
130{
131 return get_paca()->shadow_vcpu.cr;
132}
133
134static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
135{
136 get_paca()->shadow_vcpu.xer = val;
137 to_book3s(vcpu)->shadow_vcpu.xer = val;
138}
139
140static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
141{ 116{
142 return get_paca()->shadow_vcpu.xer; 117 u32 r;
143} 118 u32 mask;
144 119
145#else 120 BUG_ON(msb > lsb);
146 121
147static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) 122 mask = (1 << (lsb - msb + 1)) - 1;
148{ 123 r = (inst >> (63 - lsb)) & mask;
149 vcpu->arch.gpr[num] = val;
150}
151 124
152static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) 125 return r;
153{
154 return vcpu->arch.gpr[num];
155} 126}
156 127
157static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) 128/*
129 * Replaces inst bits with ordering according to spec.
130 */
131static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value)
158{ 132{
159 vcpu->arch.cr = val; 133 u32 r;
160} 134 u32 mask;
161 135
162static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) 136 BUG_ON(msb > lsb);
163{
164 return vcpu->arch.cr;
165}
166 137
167static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val) 138 mask = ((1 << (lsb - msb + 1)) - 1) << (63 - lsb);
168{ 139 r = (inst & ~mask) | ((value << (63 - lsb)) & mask);
169 vcpu->arch.xer = val;
170}
171 140
172static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) 141 return r;
173{
174 return vcpu->arch.xer;
175} 142}
176 143
177#endif
178
179#endif /* __POWERPC_KVM_PPC_H__ */ 144#endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 26383e0778aa..81fb41289d6c 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -27,6 +27,8 @@ extern int __init_new_context(void);
27extern void __destroy_context(int context_id); 27extern void __destroy_context(int context_id);
28static inline void mmu_context_init(void) { } 28static inline void mmu_context_init(void) { }
29#else 29#else
30extern unsigned long __init_new_context(void);
31extern void __destroy_context(unsigned long context_id);
30extern void mmu_context_init(void); 32extern void mmu_context_init(void);
31#endif 33#endif
32 34
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 971dfa4815f0..8ce7963ad41d 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -23,7 +23,7 @@
23#include <asm/page.h> 23#include <asm/page.h>
24#include <asm/exception-64e.h> 24#include <asm/exception-64e.h>
25#ifdef CONFIG_KVM_BOOK3S_64_HANDLER 25#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
26#include <asm/kvm_book3s_64_asm.h> 26#include <asm/kvm_book3s_asm.h>
27#endif 27#endif
28 28
29register struct paca_struct *local_paca asm("r13"); 29register struct paca_struct *local_paca asm("r13");
@@ -137,15 +137,9 @@ struct paca_struct {
137 u64 startpurr; /* PURR/TB value snapshot */ 137 u64 startpurr; /* PURR/TB value snapshot */
138 u64 startspurr; /* SPURR value snapshot */ 138 u64 startspurr; /* SPURR value snapshot */
139 139
140#ifdef CONFIG_KVM_BOOK3S_64_HANDLER 140#ifdef CONFIG_KVM_BOOK3S_HANDLER
141 struct {
142 u64 esid;
143 u64 vsid;
144 } kvm_slb[64]; /* guest SLB */
145 /* We use this to store guest state in */ 141 /* We use this to store guest state in */
146 struct kvmppc_book3s_shadow_vcpu shadow_vcpu; 142 struct kvmppc_book3s_shadow_vcpu shadow_vcpu;
147 u8 kvm_slb_max; /* highest used guest slb entry */
148 u8 kvm_in_guest; /* are we inside the guest? */
149#endif 143#endif
150}; 144};
151 145
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 221ba6240464..7492fe8ad6e4 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -229,6 +229,9 @@ struct thread_struct {
229 unsigned long spefscr; /* SPE & eFP status */ 229 unsigned long spefscr; /* SPE & eFP status */
230 int used_spe; /* set if process has used spe */ 230 int used_spe; /* set if process has used spe */
231#endif /* CONFIG_SPE */ 231#endif /* CONFIG_SPE */
232#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
233 void* kvm_shadow_vcpu; /* KVM internal data */
234#endif /* CONFIG_KVM_BOOK3S_32_HANDLER */
232}; 235};
233 236
234#define ARCH_MIN_TASKALIGN 16 237#define ARCH_MIN_TASKALIGN 16
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index b68f025924a8..d62fdf4e504b 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -293,10 +293,12 @@
293#define HID1_ABE (1<<10) /* 7450 Address Broadcast Enable */ 293#define HID1_ABE (1<<10) /* 7450 Address Broadcast Enable */
294#define HID1_PS (1<<16) /* 750FX PLL selection */ 294#define HID1_PS (1<<16) /* 750FX PLL selection */
295#define SPRN_HID2 0x3F8 /* Hardware Implementation Register 2 */ 295#define SPRN_HID2 0x3F8 /* Hardware Implementation Register 2 */
296#define SPRN_HID2_GEKKO 0x398 /* Gekko HID2 Register */
296#define SPRN_IABR 0x3F2 /* Instruction Address Breakpoint Register */ 297#define SPRN_IABR 0x3F2 /* Instruction Address Breakpoint Register */
297#define SPRN_IABR2 0x3FA /* 83xx */ 298#define SPRN_IABR2 0x3FA /* 83xx */
298#define SPRN_IBCR 0x135 /* 83xx Insn Breakpoint Control Reg */ 299#define SPRN_IBCR 0x135 /* 83xx Insn Breakpoint Control Reg */
299#define SPRN_HID4 0x3F4 /* 970 HID4 */ 300#define SPRN_HID4 0x3F4 /* 970 HID4 */
301#define SPRN_HID4_GEKKO 0x3F3 /* Gekko HID4 */
300#define SPRN_HID5 0x3F6 /* 970 HID5 */ 302#define SPRN_HID5 0x3F6 /* 970 HID5 */
301#define SPRN_HID6 0x3F9 /* BE HID 6 */ 303#define SPRN_HID6 0x3F9 /* BE HID 6 */
302#define HID6_LB (0x0F<<12) /* Concurrent Large Page Modes */ 304#define HID6_LB (0x0F<<12) /* Concurrent Large Page Modes */
@@ -465,6 +467,14 @@
465#define SPRN_VRSAVE 0x100 /* Vector Register Save Register */ 467#define SPRN_VRSAVE 0x100 /* Vector Register Save Register */
466#define SPRN_XER 0x001 /* Fixed Point Exception Register */ 468#define SPRN_XER 0x001 /* Fixed Point Exception Register */
467 469
470#define SPRN_MMCR0_GEKKO 0x3B8 /* Gekko Monitor Mode Control Register 0 */
471#define SPRN_MMCR1_GEKKO 0x3BC /* Gekko Monitor Mode Control Register 1 */
472#define SPRN_PMC1_GEKKO 0x3B9 /* Gekko Performance Monitor Control 1 */
473#define SPRN_PMC2_GEKKO 0x3BA /* Gekko Performance Monitor Control 2 */
474#define SPRN_PMC3_GEKKO 0x3BD /* Gekko Performance Monitor Control 3 */
475#define SPRN_PMC4_GEKKO 0x3BE /* Gekko Performance Monitor Control 4 */
476#define SPRN_WPAR_GEKKO 0x399 /* Gekko Write Pipe Address Register */
477
468#define SPRN_SCOMC 0x114 /* SCOM Access Control */ 478#define SPRN_SCOMC 0x114 /* SCOM Access Control */
469#define SPRN_SCOMD 0x115 /* SCOM Access DATA */ 479#define SPRN_SCOMD 0x115 /* SCOM Access DATA */
470 480
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 28a686fb269c..496cc5b3984f 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -50,6 +50,9 @@
50#endif 50#endif
51#ifdef CONFIG_KVM 51#ifdef CONFIG_KVM
52#include <linux/kvm_host.h> 52#include <linux/kvm_host.h>
53#ifndef CONFIG_BOOKE
54#include <asm/kvm_book3s.h>
55#endif
53#endif 56#endif
54 57
55#ifdef CONFIG_PPC32 58#ifdef CONFIG_PPC32
@@ -105,6 +108,9 @@ int main(void)
105 DEFINE(THREAD_USED_SPE, offsetof(struct thread_struct, used_spe)); 108 DEFINE(THREAD_USED_SPE, offsetof(struct thread_struct, used_spe));
106#endif /* CONFIG_SPE */ 109#endif /* CONFIG_SPE */
107#endif /* CONFIG_PPC64 */ 110#endif /* CONFIG_PPC64 */
111#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
112 DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu));
113#endif
108 114
109 DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); 115 DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
110 DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); 116 DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
@@ -191,33 +197,9 @@ int main(void)
191 DEFINE(PACA_DATA_OFFSET, offsetof(struct paca_struct, data_offset)); 197 DEFINE(PACA_DATA_OFFSET, offsetof(struct paca_struct, data_offset));
192 DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); 198 DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save));
193#ifdef CONFIG_KVM_BOOK3S_64_HANDLER 199#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
194 DEFINE(PACA_KVM_IN_GUEST, offsetof(struct paca_struct, kvm_in_guest)); 200 DEFINE(PACA_KVM_SVCPU, offsetof(struct paca_struct, shadow_vcpu));
195 DEFINE(PACA_KVM_SLB, offsetof(struct paca_struct, kvm_slb)); 201 DEFINE(SVCPU_SLB, offsetof(struct kvmppc_book3s_shadow_vcpu, slb));
196 DEFINE(PACA_KVM_SLB_MAX, offsetof(struct paca_struct, kvm_slb_max)); 202 DEFINE(SVCPU_SLB_MAX, offsetof(struct kvmppc_book3s_shadow_vcpu, slb_max));
197 DEFINE(PACA_KVM_CR, offsetof(struct paca_struct, shadow_vcpu.cr));
198 DEFINE(PACA_KVM_XER, offsetof(struct paca_struct, shadow_vcpu.xer));
199 DEFINE(PACA_KVM_R0, offsetof(struct paca_struct, shadow_vcpu.gpr[0]));
200 DEFINE(PACA_KVM_R1, offsetof(struct paca_struct, shadow_vcpu.gpr[1]));
201 DEFINE(PACA_KVM_R2, offsetof(struct paca_struct, shadow_vcpu.gpr[2]));
202 DEFINE(PACA_KVM_R3, offsetof(struct paca_struct, shadow_vcpu.gpr[3]));
203 DEFINE(PACA_KVM_R4, offsetof(struct paca_struct, shadow_vcpu.gpr[4]));
204 DEFINE(PACA_KVM_R5, offsetof(struct paca_struct, shadow_vcpu.gpr[5]));
205 DEFINE(PACA_KVM_R6, offsetof(struct paca_struct, shadow_vcpu.gpr[6]));
206 DEFINE(PACA_KVM_R7, offsetof(struct paca_struct, shadow_vcpu.gpr[7]));
207 DEFINE(PACA_KVM_R8, offsetof(struct paca_struct, shadow_vcpu.gpr[8]));
208 DEFINE(PACA_KVM_R9, offsetof(struct paca_struct, shadow_vcpu.gpr[9]));
209 DEFINE(PACA_KVM_R10, offsetof(struct paca_struct, shadow_vcpu.gpr[10]));
210 DEFINE(PACA_KVM_R11, offsetof(struct paca_struct, shadow_vcpu.gpr[11]));
211 DEFINE(PACA_KVM_R12, offsetof(struct paca_struct, shadow_vcpu.gpr[12]));
212 DEFINE(PACA_KVM_R13, offsetof(struct paca_struct, shadow_vcpu.gpr[13]));
213 DEFINE(PACA_KVM_HOST_R1, offsetof(struct paca_struct, shadow_vcpu.host_r1));
214 DEFINE(PACA_KVM_HOST_R2, offsetof(struct paca_struct, shadow_vcpu.host_r2));
215 DEFINE(PACA_KVM_VMHANDLER, offsetof(struct paca_struct,
216 shadow_vcpu.vmhandler));
217 DEFINE(PACA_KVM_SCRATCH0, offsetof(struct paca_struct,
218 shadow_vcpu.scratch0));
219 DEFINE(PACA_KVM_SCRATCH1, offsetof(struct paca_struct,
220 shadow_vcpu.scratch1));
221#endif 203#endif
222#endif /* CONFIG_PPC64 */ 204#endif /* CONFIG_PPC64 */
223 205
@@ -228,8 +210,8 @@ int main(void)
228 /* Interrupt register frame */ 210 /* Interrupt register frame */
229 DEFINE(STACK_FRAME_OVERHEAD, STACK_FRAME_OVERHEAD); 211 DEFINE(STACK_FRAME_OVERHEAD, STACK_FRAME_OVERHEAD);
230 DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE); 212 DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE);
231#ifdef CONFIG_PPC64
232 DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)); 213 DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs));
214#ifdef CONFIG_PPC64
233 /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */ 215 /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */
234 DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); 216 DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
235 DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); 217 DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
@@ -412,9 +394,6 @@ int main(void)
412 DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); 394 DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
413 DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); 395 DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
414 DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); 396 DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
415 DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
416 DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
417 DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
418 DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr)); 397 DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr));
419 DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4)); 398 DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4));
420 DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); 399 DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5));
@@ -422,27 +401,68 @@ int main(void)
422 DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7)); 401 DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7));
423 DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid)); 402 DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid));
424 403
425 DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); 404 /* book3s */
426 DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); 405#ifdef CONFIG_PPC_BOOK3S
427 DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
428
429 /* book3s_64 */
430#ifdef CONFIG_PPC64
431 DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr));
432 DEFINE(VCPU_HOST_RETIP, offsetof(struct kvm_vcpu, arch.host_retip)); 406 DEFINE(VCPU_HOST_RETIP, offsetof(struct kvm_vcpu, arch.host_retip));
433 DEFINE(VCPU_HOST_R2, offsetof(struct kvm_vcpu, arch.host_r2));
434 DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr)); 407 DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr));
435 DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); 408 DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr));
436 DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1));
437 DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem)); 409 DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem));
438 DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter)); 410 DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter));
439 DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler)); 411 DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler));
440 DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall)); 412 DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall));
441 DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); 413 DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags));
414 DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) -
415 offsetof(struct kvmppc_vcpu_book3s, vcpu));
416 DEFINE(SVCPU_CR, offsetof(struct kvmppc_book3s_shadow_vcpu, cr));
417 DEFINE(SVCPU_XER, offsetof(struct kvmppc_book3s_shadow_vcpu, xer));
418 DEFINE(SVCPU_CTR, offsetof(struct kvmppc_book3s_shadow_vcpu, ctr));
419 DEFINE(SVCPU_LR, offsetof(struct kvmppc_book3s_shadow_vcpu, lr));
420 DEFINE(SVCPU_PC, offsetof(struct kvmppc_book3s_shadow_vcpu, pc));
421 DEFINE(SVCPU_R0, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[0]));
422 DEFINE(SVCPU_R1, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[1]));
423 DEFINE(SVCPU_R2, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[2]));
424 DEFINE(SVCPU_R3, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[3]));
425 DEFINE(SVCPU_R4, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[4]));
426 DEFINE(SVCPU_R5, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[5]));
427 DEFINE(SVCPU_R6, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[6]));
428 DEFINE(SVCPU_R7, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[7]));
429 DEFINE(SVCPU_R8, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[8]));
430 DEFINE(SVCPU_R9, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[9]));
431 DEFINE(SVCPU_R10, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[10]));
432 DEFINE(SVCPU_R11, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[11]));
433 DEFINE(SVCPU_R12, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[12]));
434 DEFINE(SVCPU_R13, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[13]));
435 DEFINE(SVCPU_HOST_R1, offsetof(struct kvmppc_book3s_shadow_vcpu, host_r1));
436 DEFINE(SVCPU_HOST_R2, offsetof(struct kvmppc_book3s_shadow_vcpu, host_r2));
437 DEFINE(SVCPU_VMHANDLER, offsetof(struct kvmppc_book3s_shadow_vcpu,
438 vmhandler));
439 DEFINE(SVCPU_SCRATCH0, offsetof(struct kvmppc_book3s_shadow_vcpu,
440 scratch0));
441 DEFINE(SVCPU_SCRATCH1, offsetof(struct kvmppc_book3s_shadow_vcpu,
442 scratch1));
443 DEFINE(SVCPU_IN_GUEST, offsetof(struct kvmppc_book3s_shadow_vcpu,
444 in_guest));
445 DEFINE(SVCPU_FAULT_DSISR, offsetof(struct kvmppc_book3s_shadow_vcpu,
446 fault_dsisr));
447 DEFINE(SVCPU_FAULT_DAR, offsetof(struct kvmppc_book3s_shadow_vcpu,
448 fault_dar));
449 DEFINE(SVCPU_LAST_INST, offsetof(struct kvmppc_book3s_shadow_vcpu,
450 last_inst));
451 DEFINE(SVCPU_SHADOW_SRR1, offsetof(struct kvmppc_book3s_shadow_vcpu,
452 shadow_srr1));
453#ifdef CONFIG_PPC_BOOK3S_32
454 DEFINE(SVCPU_SR, offsetof(struct kvmppc_book3s_shadow_vcpu, sr));
455#endif
442#else 456#else
443 DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); 457 DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
444 DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); 458 DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
445#endif /* CONFIG_PPC64 */ 459 DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
460 DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
461 DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
462 DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
463 DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
464 DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
465#endif /* CONFIG_PPC_BOOK3S */
446#endif 466#endif
447#ifdef CONFIG_44x 467#ifdef CONFIG_44x
448 DEFINE(PGD_T_LOG2, PGD_T_LOG2); 468 DEFINE(PGD_T_LOG2, PGD_T_LOG2);
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index e025e89fe93e..98c4b29a56f4 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -33,6 +33,7 @@
33#include <asm/asm-offsets.h> 33#include <asm/asm-offsets.h>
34#include <asm/ptrace.h> 34#include <asm/ptrace.h>
35#include <asm/bug.h> 35#include <asm/bug.h>
36#include <asm/kvm_book3s_asm.h>
36 37
37/* 601 only have IBAT; cr0.eq is set on 601 when using this macro */ 38/* 601 only have IBAT; cr0.eq is set on 601 when using this macro */
38#define LOAD_BAT(n, reg, RA, RB) \ 39#define LOAD_BAT(n, reg, RA, RB) \
@@ -303,6 +304,7 @@ __secondary_hold_acknowledge:
303 */ 304 */
304#define EXCEPTION(n, label, hdlr, xfer) \ 305#define EXCEPTION(n, label, hdlr, xfer) \
305 . = n; \ 306 . = n; \
307 DO_KVM n; \
306label: \ 308label: \
307 EXCEPTION_PROLOG; \ 309 EXCEPTION_PROLOG; \
308 addi r3,r1,STACK_FRAME_OVERHEAD; \ 310 addi r3,r1,STACK_FRAME_OVERHEAD; \
@@ -358,6 +360,7 @@ i##n: \
358 * -- paulus. 360 * -- paulus.
359 */ 361 */
360 . = 0x200 362 . = 0x200
363 DO_KVM 0x200
361 mtspr SPRN_SPRG_SCRATCH0,r10 364 mtspr SPRN_SPRG_SCRATCH0,r10
362 mtspr SPRN_SPRG_SCRATCH1,r11 365 mtspr SPRN_SPRG_SCRATCH1,r11
363 mfcr r10 366 mfcr r10
@@ -381,6 +384,7 @@ i##n: \
381 384
382/* Data access exception. */ 385/* Data access exception. */
383 . = 0x300 386 . = 0x300
387 DO_KVM 0x300
384DataAccess: 388DataAccess:
385 EXCEPTION_PROLOG 389 EXCEPTION_PROLOG
386 mfspr r10,SPRN_DSISR 390 mfspr r10,SPRN_DSISR
@@ -397,6 +401,7 @@ DataAccess:
397 401
398/* Instruction access exception. */ 402/* Instruction access exception. */
399 . = 0x400 403 . = 0x400
404 DO_KVM 0x400
400InstructionAccess: 405InstructionAccess:
401 EXCEPTION_PROLOG 406 EXCEPTION_PROLOG
402 andis. r0,r9,0x4000 /* no pte found? */ 407 andis. r0,r9,0x4000 /* no pte found? */
@@ -413,6 +418,7 @@ InstructionAccess:
413 418
414/* Alignment exception */ 419/* Alignment exception */
415 . = 0x600 420 . = 0x600
421 DO_KVM 0x600
416Alignment: 422Alignment:
417 EXCEPTION_PROLOG 423 EXCEPTION_PROLOG
418 mfspr r4,SPRN_DAR 424 mfspr r4,SPRN_DAR
@@ -427,6 +433,7 @@ Alignment:
427 433
428/* Floating-point unavailable */ 434/* Floating-point unavailable */
429 . = 0x800 435 . = 0x800
436 DO_KVM 0x800
430FPUnavailable: 437FPUnavailable:
431BEGIN_FTR_SECTION 438BEGIN_FTR_SECTION
432/* 439/*
@@ -450,6 +457,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE)
450 457
451/* System call */ 458/* System call */
452 . = 0xc00 459 . = 0xc00
460 DO_KVM 0xc00
453SystemCall: 461SystemCall:
454 EXCEPTION_PROLOG 462 EXCEPTION_PROLOG
455 EXC_XFER_EE_LITE(0xc00, DoSyscall) 463 EXC_XFER_EE_LITE(0xc00, DoSyscall)
@@ -467,9 +475,11 @@ SystemCall:
467 * by executing an altivec instruction. 475 * by executing an altivec instruction.
468 */ 476 */
469 . = 0xf00 477 . = 0xf00
478 DO_KVM 0xf00
470 b PerformanceMonitor 479 b PerformanceMonitor
471 480
472 . = 0xf20 481 . = 0xf20
482 DO_KVM 0xf20
473 b AltiVecUnavailable 483 b AltiVecUnavailable
474 484
475/* 485/*
@@ -882,6 +892,10 @@ __secondary_start:
882 RFI 892 RFI
883#endif /* CONFIG_SMP */ 893#endif /* CONFIG_SMP */
884 894
895#ifdef CONFIG_KVM_BOOK3S_HANDLER
896#include "../kvm/book3s_rmhandlers.S"
897#endif
898
885/* 899/*
886 * Those generic dummy functions are kept for CPUs not 900 * Those generic dummy functions are kept for CPUs not
887 * included in CONFIG_6xx 901 * included in CONFIG_6xx
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index bed9a29ee383..844a44b64472 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -37,7 +37,7 @@
37#include <asm/firmware.h> 37#include <asm/firmware.h>
38#include <asm/page_64.h> 38#include <asm/page_64.h>
39#include <asm/irqflags.h> 39#include <asm/irqflags.h>
40#include <asm/kvm_book3s_64_asm.h> 40#include <asm/kvm_book3s_asm.h>
41 41
42/* The physical memory is layed out such that the secondary processor 42/* The physical memory is layed out such that the secondary processor
43 * spin code sits at 0x0000...0x00ff. On server, the vectors follow 43 * spin code sits at 0x0000...0x00ff. On server, the vectors follow
@@ -169,7 +169,7 @@ exception_marker:
169/* KVM trampoline code needs to be close to the interrupt handlers */ 169/* KVM trampoline code needs to be close to the interrupt handlers */
170 170
171#ifdef CONFIG_KVM_BOOK3S_64_HANDLER 171#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
172#include "../kvm/book3s_64_rmhandlers.S" 172#include "../kvm/book3s_rmhandlers.S"
173#endif 173#endif
174 174
175_GLOBAL(generic_secondary_thread_init) 175_GLOBAL(generic_secondary_thread_init)
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index ab3e392ac63c..bc9f39d2598b 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -101,6 +101,10 @@ EXPORT_SYMBOL(pci_dram_offset);
101EXPORT_SYMBOL(start_thread); 101EXPORT_SYMBOL(start_thread);
102EXPORT_SYMBOL(kernel_thread); 102EXPORT_SYMBOL(kernel_thread);
103 103
104#ifndef CONFIG_BOOKE
105EXPORT_SYMBOL_GPL(cvt_df);
106EXPORT_SYMBOL_GPL(cvt_fd);
107#endif
104EXPORT_SYMBOL(giveup_fpu); 108EXPORT_SYMBOL(giveup_fpu);
105#ifdef CONFIG_ALTIVEC 109#ifdef CONFIG_ALTIVEC
106EXPORT_SYMBOL(giveup_altivec); 110EXPORT_SYMBOL(giveup_altivec);
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 689a57c2ac80..73c0a3f64ed1 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -147,7 +147,7 @@ static int __init kvmppc_44x_init(void)
147 if (r) 147 if (r)
148 return r; 148 return r;
149 149
150 return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), THIS_MODULE); 150 return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE);
151} 151}
152 152
153static void __exit kvmppc_44x_exit(void) 153static void __exit kvmppc_44x_exit(void)
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 60624cc9f4d4..b7baff78f90c 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -22,12 +22,34 @@ config KVM
22 select ANON_INODES 22 select ANON_INODES
23 select KVM_MMIO 23 select KVM_MMIO
24 24
25config KVM_BOOK3S_HANDLER
26 bool
27
28config KVM_BOOK3S_32_HANDLER
29 bool
30 select KVM_BOOK3S_HANDLER
31
25config KVM_BOOK3S_64_HANDLER 32config KVM_BOOK3S_64_HANDLER
26 bool 33 bool
34 select KVM_BOOK3S_HANDLER
35
36config KVM_BOOK3S_32
37 tristate "KVM support for PowerPC book3s_32 processors"
38 depends on EXPERIMENTAL && PPC_BOOK3S_32 && !SMP && !PTE_64BIT
39 select KVM
40 select KVM_BOOK3S_32_HANDLER
41 ---help---
42 Support running unmodified book3s_32 guest kernels
43 in virtual machines on book3s_32 host processors.
44
45 This module provides access to the hardware capabilities through
46 a character device node named /dev/kvm.
47
48 If unsure, say N.
27 49
28config KVM_BOOK3S_64 50config KVM_BOOK3S_64
29 tristate "KVM support for PowerPC book3s_64 processors" 51 tristate "KVM support for PowerPC book3s_64 processors"
30 depends on EXPERIMENTAL && PPC64 52 depends on EXPERIMENTAL && PPC_BOOK3S_64
31 select KVM 53 select KVM
32 select KVM_BOOK3S_64_HANDLER 54 select KVM_BOOK3S_64_HANDLER
33 ---help--- 55 ---help---
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 56484d652377..ff436066bf77 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -14,7 +14,7 @@ CFLAGS_emulate.o := -I.
14 14
15common-objs-y += powerpc.o emulate.o 15common-objs-y += powerpc.o emulate.o
16obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o 16obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
17obj-$(CONFIG_KVM_BOOK3S_64_HANDLER) += book3s_64_exports.o 17obj-$(CONFIG_KVM_BOOK3S_HANDLER) += book3s_exports.o
18 18
19AFLAGS_booke_interrupts.o := -I$(obj) 19AFLAGS_booke_interrupts.o := -I$(obj)
20 20
@@ -40,17 +40,31 @@ kvm-objs-$(CONFIG_KVM_E500) := $(kvm-e500-objs)
40 40
41kvm-book3s_64-objs := \ 41kvm-book3s_64-objs := \
42 $(common-objs-y) \ 42 $(common-objs-y) \
43 fpu.o \
44 book3s_paired_singles.o \
43 book3s.o \ 45 book3s.o \
44 book3s_64_emulate.o \ 46 book3s_emulate.o \
45 book3s_64_interrupts.o \ 47 book3s_interrupts.o \
46 book3s_64_mmu_host.o \ 48 book3s_64_mmu_host.o \
47 book3s_64_mmu.o \ 49 book3s_64_mmu.o \
48 book3s_32_mmu.o 50 book3s_32_mmu.o
49kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-objs) 51kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-objs)
50 52
53kvm-book3s_32-objs := \
54 $(common-objs-y) \
55 fpu.o \
56 book3s_paired_singles.o \
57 book3s.o \
58 book3s_emulate.o \
59 book3s_interrupts.o \
60 book3s_32_mmu_host.o \
61 book3s_32_mmu.o
62kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
63
51kvm-objs := $(kvm-objs-m) $(kvm-objs-y) 64kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
52 65
53obj-$(CONFIG_KVM_440) += kvm.o 66obj-$(CONFIG_KVM_440) += kvm.o
54obj-$(CONFIG_KVM_E500) += kvm.o 67obj-$(CONFIG_KVM_E500) += kvm.o
55obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o 68obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o
69obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o
56 70
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 604af29b71ed..b998abf1a63d 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -16,6 +16,7 @@
16 16
17#include <linux/kvm_host.h> 17#include <linux/kvm_host.h>
18#include <linux/err.h> 18#include <linux/err.h>
19#include <linux/slab.h>
19 20
20#include <asm/reg.h> 21#include <asm/reg.h>
21#include <asm/cputable.h> 22#include <asm/cputable.h>
@@ -29,6 +30,7 @@
29#include <linux/gfp.h> 30#include <linux/gfp.h>
30#include <linux/sched.h> 31#include <linux/sched.h>
31#include <linux/vmalloc.h> 32#include <linux/vmalloc.h>
33#include <linux/highmem.h>
32 34
33#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 35#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
34 36
@@ -36,7 +38,15 @@
36/* #define EXIT_DEBUG_SIMPLE */ 38/* #define EXIT_DEBUG_SIMPLE */
37/* #define DEBUG_EXT */ 39/* #define DEBUG_EXT */
38 40
39static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); 41static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
42 ulong msr);
43
44/* Some compatibility defines */
45#ifdef CONFIG_PPC_BOOK3S_32
46#define MSR_USER32 MSR_USER
47#define MSR_USER64 MSR_USER
48#define HW_PAGE_SIZE PAGE_SIZE
49#endif
40 50
41struct kvm_stats_debugfs_item debugfs_entries[] = { 51struct kvm_stats_debugfs_item debugfs_entries[] = {
42 { "exits", VCPU_STAT(sum_exits) }, 52 { "exits", VCPU_STAT(sum_exits) },
@@ -69,18 +79,26 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
69 79
70void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 80void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
71{ 81{
72 memcpy(get_paca()->kvm_slb, to_book3s(vcpu)->slb_shadow, sizeof(get_paca()->kvm_slb)); 82#ifdef CONFIG_PPC_BOOK3S_64
73 memcpy(&get_paca()->shadow_vcpu, &to_book3s(vcpu)->shadow_vcpu, 83 memcpy(to_svcpu(vcpu)->slb, to_book3s(vcpu)->slb_shadow, sizeof(to_svcpu(vcpu)->slb));
84 memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu,
74 sizeof(get_paca()->shadow_vcpu)); 85 sizeof(get_paca()->shadow_vcpu));
75 get_paca()->kvm_slb_max = to_book3s(vcpu)->slb_shadow_max; 86 to_svcpu(vcpu)->slb_max = to_book3s(vcpu)->slb_shadow_max;
87#endif
88
89#ifdef CONFIG_PPC_BOOK3S_32
90 current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu;
91#endif
76} 92}
77 93
78void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 94void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
79{ 95{
80 memcpy(to_book3s(vcpu)->slb_shadow, get_paca()->kvm_slb, sizeof(get_paca()->kvm_slb)); 96#ifdef CONFIG_PPC_BOOK3S_64
81 memcpy(&to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu, 97 memcpy(to_book3s(vcpu)->slb_shadow, to_svcpu(vcpu)->slb, sizeof(to_svcpu(vcpu)->slb));
98 memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
82 sizeof(get_paca()->shadow_vcpu)); 99 sizeof(get_paca()->shadow_vcpu));
83 to_book3s(vcpu)->slb_shadow_max = get_paca()->kvm_slb_max; 100 to_book3s(vcpu)->slb_shadow_max = to_svcpu(vcpu)->slb_max;
101#endif
84 102
85 kvmppc_giveup_ext(vcpu, MSR_FP); 103 kvmppc_giveup_ext(vcpu, MSR_FP);
86 kvmppc_giveup_ext(vcpu, MSR_VEC); 104 kvmppc_giveup_ext(vcpu, MSR_VEC);
@@ -131,18 +149,22 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
131 } 149 }
132 } 150 }
133 151
134 if (((vcpu->arch.msr & (MSR_IR|MSR_DR)) != (old_msr & (MSR_IR|MSR_DR))) || 152 if ((vcpu->arch.msr & (MSR_PR|MSR_IR|MSR_DR)) !=
135 (vcpu->arch.msr & MSR_PR) != (old_msr & MSR_PR)) { 153 (old_msr & (MSR_PR|MSR_IR|MSR_DR))) {
136 kvmppc_mmu_flush_segments(vcpu); 154 kvmppc_mmu_flush_segments(vcpu);
137 kvmppc_mmu_map_segment(vcpu, vcpu->arch.pc); 155 kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
138 } 156 }
157
158 /* Preload FPU if it's enabled */
159 if (vcpu->arch.msr & MSR_FP)
160 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
139} 161}
140 162
141void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) 163void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
142{ 164{
143 vcpu->arch.srr0 = vcpu->arch.pc; 165 vcpu->arch.srr0 = kvmppc_get_pc(vcpu);
144 vcpu->arch.srr1 = vcpu->arch.msr | flags; 166 vcpu->arch.srr1 = vcpu->arch.msr | flags;
145 vcpu->arch.pc = to_book3s(vcpu)->hior + vec; 167 kvmppc_set_pc(vcpu, to_book3s(vcpu)->hior + vec);
146 vcpu->arch.mmu.reset_msr(vcpu); 168 vcpu->arch.mmu.reset_msr(vcpu);
147} 169}
148 170
@@ -218,6 +240,12 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
218 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); 240 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
219} 241}
220 242
243void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
244 struct kvm_interrupt *irq)
245{
246 kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
247}
248
221int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) 249int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
222{ 250{
223 int deliver = 1; 251 int deliver = 1;
@@ -302,7 +330,7 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
302 printk(KERN_EMERG "KVM: Check pending: %lx\n", vcpu->arch.pending_exceptions); 330 printk(KERN_EMERG "KVM: Check pending: %lx\n", vcpu->arch.pending_exceptions);
303#endif 331#endif
304 priority = __ffs(*pending); 332 priority = __ffs(*pending);
305 while (priority <= (sizeof(unsigned int) * 8)) { 333 while (priority < BOOK3S_IRQPRIO_MAX) {
306 if (kvmppc_book3s_irqprio_deliver(vcpu, priority) && 334 if (kvmppc_book3s_irqprio_deliver(vcpu, priority) &&
307 (priority != BOOK3S_IRQPRIO_DECREMENTER)) { 335 (priority != BOOK3S_IRQPRIO_DECREMENTER)) {
308 /* DEC interrupts get cleared by mtdec */ 336 /* DEC interrupts get cleared by mtdec */
@@ -318,13 +346,18 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
318 346
319void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) 347void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
320{ 348{
349 u32 host_pvr;
350
321 vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB; 351 vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB;
322 vcpu->arch.pvr = pvr; 352 vcpu->arch.pvr = pvr;
353#ifdef CONFIG_PPC_BOOK3S_64
323 if ((pvr >= 0x330000) && (pvr < 0x70330000)) { 354 if ((pvr >= 0x330000) && (pvr < 0x70330000)) {
324 kvmppc_mmu_book3s_64_init(vcpu); 355 kvmppc_mmu_book3s_64_init(vcpu);
325 to_book3s(vcpu)->hior = 0xfff00000; 356 to_book3s(vcpu)->hior = 0xfff00000;
326 to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL; 357 to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL;
327 } else { 358 } else
359#endif
360 {
328 kvmppc_mmu_book3s_32_init(vcpu); 361 kvmppc_mmu_book3s_32_init(vcpu);
329 to_book3s(vcpu)->hior = 0; 362 to_book3s(vcpu)->hior = 0;
330 to_book3s(vcpu)->msr_mask = 0xffffffffULL; 363 to_book3s(vcpu)->msr_mask = 0xffffffffULL;
@@ -337,6 +370,32 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
337 !strcmp(cur_cpu_spec->platform, "ppc970")) 370 !strcmp(cur_cpu_spec->platform, "ppc970"))
338 vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; 371 vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
339 372
373 /* Cell performs badly if MSR_FEx are set. So let's hope nobody
374 really needs them in a VM on Cell and force disable them. */
375 if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be"))
376 to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1);
377
378#ifdef CONFIG_PPC_BOOK3S_32
379 /* 32 bit Book3S always has 32 byte dcbz */
380 vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
381#endif
382
383 /* On some CPUs we can execute paired single operations natively */
384 asm ( "mfpvr %0" : "=r"(host_pvr));
385 switch (host_pvr) {
386 case 0x00080200: /* lonestar 2.0 */
387 case 0x00088202: /* lonestar 2.2 */
388 case 0x70000100: /* gekko 1.0 */
389 case 0x00080100: /* gekko 2.0 */
390 case 0x00083203: /* gekko 2.3a */
391 case 0x00083213: /* gekko 2.3b */
392 case 0x00083204: /* gekko 2.4 */
393 case 0x00083214: /* gekko 2.4e (8SE) - retail HW2 */
394 case 0x00087200: /* broadway */
395 vcpu->arch.hflags |= BOOK3S_HFLAG_NATIVE_PS;
396 /* Enable HID2.PSE - in case we need it later */
397 mtspr(SPRN_HID2_GEKKO, mfspr(SPRN_HID2_GEKKO) | (1 << 29));
398 }
340} 399}
341 400
342/* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To 401/* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To
@@ -350,34 +409,29 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
350 */ 409 */
351static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) 410static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
352{ 411{
353 bool touched = false; 412 struct page *hpage;
354 hva_t hpage; 413 u64 hpage_offset;
355 u32 *page; 414 u32 *page;
356 int i; 415 int i;
357 416
358 hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT); 417 hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
359 if (kvm_is_error_hva(hpage)) 418 if (is_error_page(hpage))
360 return; 419 return;
361 420
362 hpage |= pte->raddr & ~PAGE_MASK; 421 hpage_offset = pte->raddr & ~PAGE_MASK;
363 hpage &= ~0xFFFULL; 422 hpage_offset &= ~0xFFFULL;
364 423 hpage_offset /= 4;
365 page = vmalloc(HW_PAGE_SIZE);
366
367 if (copy_from_user(page, (void __user *)hpage, HW_PAGE_SIZE))
368 goto out;
369 424
370 for (i=0; i < HW_PAGE_SIZE / 4; i++) 425 get_page(hpage);
371 if ((page[i] & 0xff0007ff) == INS_DCBZ) { 426 page = kmap_atomic(hpage, KM_USER0);
372 page[i] &= 0xfffffff7; // reserved instruction, so we trap
373 touched = true;
374 }
375 427
376 if (touched) 428 /* patch dcbz into reserved instruction, so we trap */
377 copy_to_user((void __user *)hpage, page, HW_PAGE_SIZE); 429 for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++)
430 if ((page[i] & 0xff0007ff) == INS_DCBZ)
431 page[i] &= 0xfffffff7;
378 432
379out: 433 kunmap_atomic(page, KM_USER0);
380 vfree(page); 434 put_page(hpage);
381} 435}
382 436
383static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, 437static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
@@ -391,15 +445,7 @@ static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
391 } else { 445 } else {
392 pte->eaddr = eaddr; 446 pte->eaddr = eaddr;
393 pte->raddr = eaddr & 0xffffffff; 447 pte->raddr = eaddr & 0xffffffff;
394 pte->vpage = eaddr >> 12; 448 pte->vpage = VSID_REAL | eaddr >> 12;
395 switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
396 case 0:
397 pte->vpage |= VSID_REAL;
398 case MSR_DR:
399 pte->vpage |= VSID_REAL_DR;
400 case MSR_IR:
401 pte->vpage |= VSID_REAL_IR;
402 }
403 pte->may_read = true; 449 pte->may_read = true;
404 pte->may_write = true; 450 pte->may_write = true;
405 pte->may_execute = true; 451 pte->may_execute = true;
@@ -434,55 +480,55 @@ err:
434 return kvmppc_bad_hva(); 480 return kvmppc_bad_hva();
435} 481}
436 482
437int kvmppc_st(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr) 483int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
484 bool data)
438{ 485{
439 struct kvmppc_pte pte; 486 struct kvmppc_pte pte;
440 hva_t hva = eaddr;
441 487
442 vcpu->stat.st++; 488 vcpu->stat.st++;
443 489
444 if (kvmppc_xlate(vcpu, eaddr, false, &pte)) 490 if (kvmppc_xlate(vcpu, *eaddr, data, &pte))
445 goto err; 491 return -ENOENT;
446 492
447 hva = kvmppc_pte_to_hva(vcpu, &pte, false); 493 *eaddr = pte.raddr;
448 if (kvm_is_error_hva(hva))
449 goto err;
450 494
451 if (copy_to_user((void __user *)hva, ptr, size)) { 495 if (!pte.may_write)
452 printk(KERN_INFO "kvmppc_st at 0x%lx failed\n", hva); 496 return -EPERM;
453 goto err;
454 }
455 497
456 return 0; 498 if (kvm_write_guest(vcpu->kvm, pte.raddr, ptr, size))
499 return EMULATE_DO_MMIO;
457 500
458err: 501 return EMULATE_DONE;
459 return -ENOENT;
460} 502}
461 503
462int kvmppc_ld(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr, 504int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
463 bool data) 505 bool data)
464{ 506{
465 struct kvmppc_pte pte; 507 struct kvmppc_pte pte;
466 hva_t hva = eaddr; 508 hva_t hva = *eaddr;
467 509
468 vcpu->stat.ld++; 510 vcpu->stat.ld++;
469 511
470 if (kvmppc_xlate(vcpu, eaddr, data, &pte)) 512 if (kvmppc_xlate(vcpu, *eaddr, data, &pte))
471 goto err; 513 goto nopte;
514
515 *eaddr = pte.raddr;
472 516
473 hva = kvmppc_pte_to_hva(vcpu, &pte, true); 517 hva = kvmppc_pte_to_hva(vcpu, &pte, true);
474 if (kvm_is_error_hva(hva)) 518 if (kvm_is_error_hva(hva))
475 goto err; 519 goto mmio;
476 520
477 if (copy_from_user(ptr, (void __user *)hva, size)) { 521 if (copy_from_user(ptr, (void __user *)hva, size)) {
478 printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva); 522 printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva);
479 goto err; 523 goto mmio;
480 } 524 }
481 525
482 return 0; 526 return EMULATE_DONE;
483 527
484err: 528nopte:
485 return -ENOENT; 529 return -ENOENT;
530mmio:
531 return EMULATE_DO_MMIO;
486} 532}
487 533
488static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) 534static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
@@ -499,12 +545,11 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
499 int page_found = 0; 545 int page_found = 0;
500 struct kvmppc_pte pte; 546 struct kvmppc_pte pte;
501 bool is_mmio = false; 547 bool is_mmio = false;
548 bool dr = (vcpu->arch.msr & MSR_DR) ? true : false;
549 bool ir = (vcpu->arch.msr & MSR_IR) ? true : false;
550 u64 vsid;
502 551
503 if ( vec == BOOK3S_INTERRUPT_DATA_STORAGE ) { 552 relocated = data ? dr : ir;
504 relocated = (vcpu->arch.msr & MSR_DR);
505 } else {
506 relocated = (vcpu->arch.msr & MSR_IR);
507 }
508 553
509 /* Resolve real address if translation turned on */ 554 /* Resolve real address if translation turned on */
510 if (relocated) { 555 if (relocated) {
@@ -516,14 +561,25 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
516 pte.raddr = eaddr & 0xffffffff; 561 pte.raddr = eaddr & 0xffffffff;
517 pte.eaddr = eaddr; 562 pte.eaddr = eaddr;
518 pte.vpage = eaddr >> 12; 563 pte.vpage = eaddr >> 12;
519 switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { 564 }
520 case 0: 565
521 pte.vpage |= VSID_REAL; 566 switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
522 case MSR_DR: 567 case 0:
523 pte.vpage |= VSID_REAL_DR; 568 pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12));
524 case MSR_IR: 569 break;
525 pte.vpage |= VSID_REAL_IR; 570 case MSR_DR:
526 } 571 case MSR_IR:
572 vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
573
574 if ((vcpu->arch.msr & (MSR_DR|MSR_IR)) == MSR_DR)
575 pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12));
576 else
577 pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12));
578 pte.vpage |= vsid;
579
580 if (vsid == -1)
581 page_found = -EINVAL;
582 break;
527 } 583 }
528 584
529 if (vcpu->arch.mmu.is_dcbz32(vcpu) && 585 if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
@@ -538,20 +594,20 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
538 594
539 if (page_found == -ENOENT) { 595 if (page_found == -ENOENT) {
540 /* Page not found in guest PTE entries */ 596 /* Page not found in guest PTE entries */
541 vcpu->arch.dear = vcpu->arch.fault_dear; 597 vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
542 to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr; 598 to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr;
543 vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL); 599 vcpu->arch.msr |= (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
544 kvmppc_book3s_queue_irqprio(vcpu, vec); 600 kvmppc_book3s_queue_irqprio(vcpu, vec);
545 } else if (page_found == -EPERM) { 601 } else if (page_found == -EPERM) {
546 /* Storage protection */ 602 /* Storage protection */
547 vcpu->arch.dear = vcpu->arch.fault_dear; 603 vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
548 to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE; 604 to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE;
549 to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT; 605 to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT;
550 vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL); 606 vcpu->arch.msr |= (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
551 kvmppc_book3s_queue_irqprio(vcpu, vec); 607 kvmppc_book3s_queue_irqprio(vcpu, vec);
552 } else if (page_found == -EINVAL) { 608 } else if (page_found == -EINVAL) {
553 /* Page not found in guest SLB */ 609 /* Page not found in guest SLB */
554 vcpu->arch.dear = vcpu->arch.fault_dear; 610 vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
555 kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80); 611 kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
556 } else if (!is_mmio && 612 } else if (!is_mmio &&
557 kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) { 613 kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) {
@@ -583,11 +639,13 @@ static inline int get_fpr_index(int i)
583} 639}
584 640
585/* Give up external provider (FPU, Altivec, VSX) */ 641/* Give up external provider (FPU, Altivec, VSX) */
586static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) 642void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
587{ 643{
588 struct thread_struct *t = &current->thread; 644 struct thread_struct *t = &current->thread;
589 u64 *vcpu_fpr = vcpu->arch.fpr; 645 u64 *vcpu_fpr = vcpu->arch.fpr;
646#ifdef CONFIG_VSX
590 u64 *vcpu_vsx = vcpu->arch.vsr; 647 u64 *vcpu_vsx = vcpu->arch.vsr;
648#endif
591 u64 *thread_fpr = (u64*)t->fpr; 649 u64 *thread_fpr = (u64*)t->fpr;
592 int i; 650 int i;
593 651
@@ -629,21 +687,65 @@ static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
629 kvmppc_recalc_shadow_msr(vcpu); 687 kvmppc_recalc_shadow_msr(vcpu);
630} 688}
631 689
690static int kvmppc_read_inst(struct kvm_vcpu *vcpu)
691{
692 ulong srr0 = kvmppc_get_pc(vcpu);
693 u32 last_inst = kvmppc_get_last_inst(vcpu);
694 int ret;
695
696 ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);
697 if (ret == -ENOENT) {
698 vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 33, 33, 1);
699 vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 34, 36, 0);
700 vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 42, 47, 0);
701 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
702 return EMULATE_AGAIN;
703 }
704
705 return EMULATE_DONE;
706}
707
708static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr)
709{
710
711 /* Need to do paired single emulation? */
712 if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
713 return EMULATE_DONE;
714
715 /* Read out the instruction */
716 if (kvmppc_read_inst(vcpu) == EMULATE_DONE)
717 /* Need to emulate */
718 return EMULATE_FAIL;
719
720 return EMULATE_AGAIN;
721}
722
632/* Handle external providers (FPU, Altivec, VSX) */ 723/* Handle external providers (FPU, Altivec, VSX) */
633static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, 724static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
634 ulong msr) 725 ulong msr)
635{ 726{
636 struct thread_struct *t = &current->thread; 727 struct thread_struct *t = &current->thread;
637 u64 *vcpu_fpr = vcpu->arch.fpr; 728 u64 *vcpu_fpr = vcpu->arch.fpr;
729#ifdef CONFIG_VSX
638 u64 *vcpu_vsx = vcpu->arch.vsr; 730 u64 *vcpu_vsx = vcpu->arch.vsr;
731#endif
639 u64 *thread_fpr = (u64*)t->fpr; 732 u64 *thread_fpr = (u64*)t->fpr;
640 int i; 733 int i;
641 734
735 /* When we have paired singles, we emulate in software */
736 if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)
737 return RESUME_GUEST;
738
642 if (!(vcpu->arch.msr & msr)) { 739 if (!(vcpu->arch.msr & msr)) {
643 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 740 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
644 return RESUME_GUEST; 741 return RESUME_GUEST;
645 } 742 }
646 743
744 /* We already own the ext */
745 if (vcpu->arch.guest_owned_ext & msr) {
746 return RESUME_GUEST;
747 }
748
647#ifdef DEBUG_EXT 749#ifdef DEBUG_EXT
648 printk(KERN_INFO "Loading up ext 0x%lx\n", msr); 750 printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
649#endif 751#endif
@@ -696,21 +798,33 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
696 run->ready_for_interrupt_injection = 1; 798 run->ready_for_interrupt_injection = 1;
697#ifdef EXIT_DEBUG 799#ifdef EXIT_DEBUG
698 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | dec=0x%x | msr=0x%lx\n", 800 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | dec=0x%x | msr=0x%lx\n",
699 exit_nr, vcpu->arch.pc, vcpu->arch.fault_dear, 801 exit_nr, kvmppc_get_pc(vcpu), kvmppc_get_fault_dar(vcpu),
700 kvmppc_get_dec(vcpu), vcpu->arch.msr); 802 kvmppc_get_dec(vcpu), to_svcpu(vcpu)->shadow_srr1);
701#elif defined (EXIT_DEBUG_SIMPLE) 803#elif defined (EXIT_DEBUG_SIMPLE)
702 if ((exit_nr != 0x900) && (exit_nr != 0x500)) 804 if ((exit_nr != 0x900) && (exit_nr != 0x500))
703 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | msr=0x%lx\n", 805 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | msr=0x%lx\n",
704 exit_nr, vcpu->arch.pc, vcpu->arch.fault_dear, 806 exit_nr, kvmppc_get_pc(vcpu), kvmppc_get_fault_dar(vcpu),
705 vcpu->arch.msr); 807 vcpu->arch.msr);
706#endif 808#endif
707 kvm_resched(vcpu); 809 kvm_resched(vcpu);
708 switch (exit_nr) { 810 switch (exit_nr) {
709 case BOOK3S_INTERRUPT_INST_STORAGE: 811 case BOOK3S_INTERRUPT_INST_STORAGE:
710 vcpu->stat.pf_instruc++; 812 vcpu->stat.pf_instruc++;
813
814#ifdef CONFIG_PPC_BOOK3S_32
815 /* We set segments as unused segments when invalidating them. So
816 * treat the respective fault as segment fault. */
817 if (to_svcpu(vcpu)->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT]
818 == SR_INVALID) {
819 kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
820 r = RESUME_GUEST;
821 break;
822 }
823#endif
824
711 /* only care about PTEG not found errors, but leave NX alone */ 825 /* only care about PTEG not found errors, but leave NX alone */
712 if (vcpu->arch.shadow_srr1 & 0x40000000) { 826 if (to_svcpu(vcpu)->shadow_srr1 & 0x40000000) {
713 r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.pc, exit_nr); 827 r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr);
714 vcpu->stat.sp_instruc++; 828 vcpu->stat.sp_instruc++;
715 } else if (vcpu->arch.mmu.is_dcbz32(vcpu) && 829 } else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
716 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) { 830 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
@@ -719,37 +833,52 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
719 * so we can't use the NX bit inside the guest. Let's cross our fingers, 833 * so we can't use the NX bit inside the guest. Let's cross our fingers,
720 * that no guest that needs the dcbz hack does NX. 834 * that no guest that needs the dcbz hack does NX.
721 */ 835 */
722 kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL); 836 kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
837 r = RESUME_GUEST;
723 } else { 838 } else {
724 vcpu->arch.msr |= vcpu->arch.shadow_srr1 & 0x58000000; 839 vcpu->arch.msr |= to_svcpu(vcpu)->shadow_srr1 & 0x58000000;
725 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 840 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
726 kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL); 841 kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
727 r = RESUME_GUEST; 842 r = RESUME_GUEST;
728 } 843 }
729 break; 844 break;
730 case BOOK3S_INTERRUPT_DATA_STORAGE: 845 case BOOK3S_INTERRUPT_DATA_STORAGE:
846 {
847 ulong dar = kvmppc_get_fault_dar(vcpu);
731 vcpu->stat.pf_storage++; 848 vcpu->stat.pf_storage++;
849
850#ifdef CONFIG_PPC_BOOK3S_32
851 /* We set segments as unused segments when invalidating them. So
852 * treat the respective fault as segment fault. */
853 if ((to_svcpu(vcpu)->sr[dar >> SID_SHIFT]) == SR_INVALID) {
854 kvmppc_mmu_map_segment(vcpu, dar);
855 r = RESUME_GUEST;
856 break;
857 }
858#endif
859
732 /* The only case we need to handle is missing shadow PTEs */ 860 /* The only case we need to handle is missing shadow PTEs */
733 if (vcpu->arch.fault_dsisr & DSISR_NOHPTE) { 861 if (to_svcpu(vcpu)->fault_dsisr & DSISR_NOHPTE) {
734 r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.fault_dear, exit_nr); 862 r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
735 } else { 863 } else {
736 vcpu->arch.dear = vcpu->arch.fault_dear; 864 vcpu->arch.dear = dar;
737 to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr; 865 to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr;
738 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 866 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
739 kvmppc_mmu_pte_flush(vcpu, vcpu->arch.dear, ~0xFFFULL); 867 kvmppc_mmu_pte_flush(vcpu, vcpu->arch.dear, ~0xFFFUL);
740 r = RESUME_GUEST; 868 r = RESUME_GUEST;
741 } 869 }
742 break; 870 break;
871 }
743 case BOOK3S_INTERRUPT_DATA_SEGMENT: 872 case BOOK3S_INTERRUPT_DATA_SEGMENT:
744 if (kvmppc_mmu_map_segment(vcpu, vcpu->arch.fault_dear) < 0) { 873 if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) {
745 vcpu->arch.dear = vcpu->arch.fault_dear; 874 vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
746 kvmppc_book3s_queue_irqprio(vcpu, 875 kvmppc_book3s_queue_irqprio(vcpu,
747 BOOK3S_INTERRUPT_DATA_SEGMENT); 876 BOOK3S_INTERRUPT_DATA_SEGMENT);
748 } 877 }
749 r = RESUME_GUEST; 878 r = RESUME_GUEST;
750 break; 879 break;
751 case BOOK3S_INTERRUPT_INST_SEGMENT: 880 case BOOK3S_INTERRUPT_INST_SEGMENT:
752 if (kvmppc_mmu_map_segment(vcpu, vcpu->arch.pc) < 0) { 881 if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)) < 0) {
753 kvmppc_book3s_queue_irqprio(vcpu, 882 kvmppc_book3s_queue_irqprio(vcpu,
754 BOOK3S_INTERRUPT_INST_SEGMENT); 883 BOOK3S_INTERRUPT_INST_SEGMENT);
755 } 884 }
@@ -764,18 +893,22 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
764 vcpu->stat.ext_intr_exits++; 893 vcpu->stat.ext_intr_exits++;
765 r = RESUME_GUEST; 894 r = RESUME_GUEST;
766 break; 895 break;
896 case BOOK3S_INTERRUPT_PERFMON:
897 r = RESUME_GUEST;
898 break;
767 case BOOK3S_INTERRUPT_PROGRAM: 899 case BOOK3S_INTERRUPT_PROGRAM:
768 { 900 {
769 enum emulation_result er; 901 enum emulation_result er;
770 ulong flags; 902 ulong flags;
771 903
772 flags = vcpu->arch.shadow_srr1 & 0x1f0000ull; 904program_interrupt:
905 flags = to_svcpu(vcpu)->shadow_srr1 & 0x1f0000ull;
773 906
774 if (vcpu->arch.msr & MSR_PR) { 907 if (vcpu->arch.msr & MSR_PR) {
775#ifdef EXIT_DEBUG 908#ifdef EXIT_DEBUG
776 printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", vcpu->arch.pc, vcpu->arch.last_inst); 909 printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
777#endif 910#endif
778 if ((vcpu->arch.last_inst & 0xff0007ff) != 911 if ((kvmppc_get_last_inst(vcpu) & 0xff0007ff) !=
779 (INS_DCBZ & 0xfffffff7)) { 912 (INS_DCBZ & 0xfffffff7)) {
780 kvmppc_core_queue_program(vcpu, flags); 913 kvmppc_core_queue_program(vcpu, flags);
781 r = RESUME_GUEST; 914 r = RESUME_GUEST;
@@ -789,33 +922,80 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
789 case EMULATE_DONE: 922 case EMULATE_DONE:
790 r = RESUME_GUEST_NV; 923 r = RESUME_GUEST_NV;
791 break; 924 break;
925 case EMULATE_AGAIN:
926 r = RESUME_GUEST;
927 break;
792 case EMULATE_FAIL: 928 case EMULATE_FAIL:
793 printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", 929 printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
794 __func__, vcpu->arch.pc, vcpu->arch.last_inst); 930 __func__, kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
795 kvmppc_core_queue_program(vcpu, flags); 931 kvmppc_core_queue_program(vcpu, flags);
796 r = RESUME_GUEST; 932 r = RESUME_GUEST;
797 break; 933 break;
934 case EMULATE_DO_MMIO:
935 run->exit_reason = KVM_EXIT_MMIO;
936 r = RESUME_HOST_NV;
937 break;
798 default: 938 default:
799 BUG(); 939 BUG();
800 } 940 }
801 break; 941 break;
802 } 942 }
803 case BOOK3S_INTERRUPT_SYSCALL: 943 case BOOK3S_INTERRUPT_SYSCALL:
804#ifdef EXIT_DEBUG 944 // XXX make user settable
805 printk(KERN_INFO "Syscall Nr %d\n", (int)kvmppc_get_gpr(vcpu, 0)); 945 if (vcpu->arch.osi_enabled &&
806#endif 946 (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) &&
807 vcpu->stat.syscall_exits++; 947 (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) {
808 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 948 u64 *gprs = run->osi.gprs;
809 r = RESUME_GUEST; 949 int i;
950
951 run->exit_reason = KVM_EXIT_OSI;
952 for (i = 0; i < 32; i++)
953 gprs[i] = kvmppc_get_gpr(vcpu, i);
954 vcpu->arch.osi_needed = 1;
955 r = RESUME_HOST_NV;
956
957 } else {
958 vcpu->stat.syscall_exits++;
959 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
960 r = RESUME_GUEST;
961 }
810 break; 962 break;
811 case BOOK3S_INTERRUPT_FP_UNAVAIL: 963 case BOOK3S_INTERRUPT_FP_UNAVAIL:
812 r = kvmppc_handle_ext(vcpu, exit_nr, MSR_FP);
813 break;
814 case BOOK3S_INTERRUPT_ALTIVEC: 964 case BOOK3S_INTERRUPT_ALTIVEC:
815 r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VEC);
816 break;
817 case BOOK3S_INTERRUPT_VSX: 965 case BOOK3S_INTERRUPT_VSX:
818 r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VSX); 966 {
967 int ext_msr = 0;
968
969 switch (exit_nr) {
970 case BOOK3S_INTERRUPT_FP_UNAVAIL: ext_msr = MSR_FP; break;
971 case BOOK3S_INTERRUPT_ALTIVEC: ext_msr = MSR_VEC; break;
972 case BOOK3S_INTERRUPT_VSX: ext_msr = MSR_VSX; break;
973 }
974
975 switch (kvmppc_check_ext(vcpu, exit_nr)) {
976 case EMULATE_DONE:
977 /* everything ok - let's enable the ext */
978 r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr);
979 break;
980 case EMULATE_FAIL:
981 /* we need to emulate this instruction */
982 goto program_interrupt;
983 break;
984 default:
985 /* nothing to worry about - go again */
986 break;
987 }
988 break;
989 }
990 case BOOK3S_INTERRUPT_ALIGNMENT:
991 if (kvmppc_read_inst(vcpu) == EMULATE_DONE) {
992 to_book3s(vcpu)->dsisr = kvmppc_alignment_dsisr(vcpu,
993 kvmppc_get_last_inst(vcpu));
994 vcpu->arch.dear = kvmppc_alignment_dar(vcpu,
995 kvmppc_get_last_inst(vcpu));
996 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
997 }
998 r = RESUME_GUEST;
819 break; 999 break;
820 case BOOK3S_INTERRUPT_MACHINE_CHECK: 1000 case BOOK3S_INTERRUPT_MACHINE_CHECK:
821 case BOOK3S_INTERRUPT_TRACE: 1001 case BOOK3S_INTERRUPT_TRACE:
@@ -825,7 +1005,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
825 default: 1005 default:
826 /* Ugh - bork here! What did we get? */ 1006 /* Ugh - bork here! What did we get? */
827 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", 1007 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
828 exit_nr, vcpu->arch.pc, vcpu->arch.shadow_srr1); 1008 exit_nr, kvmppc_get_pc(vcpu), to_svcpu(vcpu)->shadow_srr1);
829 r = RESUME_HOST; 1009 r = RESUME_HOST;
830 BUG(); 1010 BUG();
831 break; 1011 break;
@@ -852,7 +1032,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
852 } 1032 }
853 1033
854#ifdef EXIT_DEBUG 1034#ifdef EXIT_DEBUG
855 printk(KERN_EMERG "KVM exit: vcpu=0x%p pc=0x%lx r=0x%x\n", vcpu, vcpu->arch.pc, r); 1035 printk(KERN_EMERG "KVM exit: vcpu=0x%p pc=0x%lx r=0x%x\n", vcpu, kvmppc_get_pc(vcpu), r);
856#endif 1036#endif
857 1037
858 return r; 1038 return r;
@@ -867,10 +1047,12 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
867{ 1047{
868 int i; 1048 int i;
869 1049
870 regs->pc = vcpu->arch.pc; 1050 vcpu_load(vcpu);
1051
1052 regs->pc = kvmppc_get_pc(vcpu);
871 regs->cr = kvmppc_get_cr(vcpu); 1053 regs->cr = kvmppc_get_cr(vcpu);
872 regs->ctr = vcpu->arch.ctr; 1054 regs->ctr = kvmppc_get_ctr(vcpu);
873 regs->lr = vcpu->arch.lr; 1055 regs->lr = kvmppc_get_lr(vcpu);
874 regs->xer = kvmppc_get_xer(vcpu); 1056 regs->xer = kvmppc_get_xer(vcpu);
875 regs->msr = vcpu->arch.msr; 1057 regs->msr = vcpu->arch.msr;
876 regs->srr0 = vcpu->arch.srr0; 1058 regs->srr0 = vcpu->arch.srr0;
@@ -887,6 +1069,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
887 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 1069 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
888 regs->gpr[i] = kvmppc_get_gpr(vcpu, i); 1070 regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
889 1071
1072 vcpu_put(vcpu);
1073
890 return 0; 1074 return 0;
891} 1075}
892 1076
@@ -894,10 +1078,12 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
894{ 1078{
895 int i; 1079 int i;
896 1080
897 vcpu->arch.pc = regs->pc; 1081 vcpu_load(vcpu);
1082
1083 kvmppc_set_pc(vcpu, regs->pc);
898 kvmppc_set_cr(vcpu, regs->cr); 1084 kvmppc_set_cr(vcpu, regs->cr);
899 vcpu->arch.ctr = regs->ctr; 1085 kvmppc_set_ctr(vcpu, regs->ctr);
900 vcpu->arch.lr = regs->lr; 1086 kvmppc_set_lr(vcpu, regs->lr);
901 kvmppc_set_xer(vcpu, regs->xer); 1087 kvmppc_set_xer(vcpu, regs->xer);
902 kvmppc_set_msr(vcpu, regs->msr); 1088 kvmppc_set_msr(vcpu, regs->msr);
903 vcpu->arch.srr0 = regs->srr0; 1089 vcpu->arch.srr0 = regs->srr0;
@@ -913,6 +1099,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
913 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 1099 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
914 kvmppc_set_gpr(vcpu, i, regs->gpr[i]); 1100 kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
915 1101
1102 vcpu_put(vcpu);
1103
916 return 0; 1104 return 0;
917} 1105}
918 1106
@@ -922,6 +1110,8 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
922 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); 1110 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
923 int i; 1111 int i;
924 1112
1113 vcpu_load(vcpu);
1114
925 sregs->pvr = vcpu->arch.pvr; 1115 sregs->pvr = vcpu->arch.pvr;
926 1116
927 sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1; 1117 sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1;
@@ -940,6 +1130,9 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
940 sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw; 1130 sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw;
941 } 1131 }
942 } 1132 }
1133
1134 vcpu_put(vcpu);
1135
943 return 0; 1136 return 0;
944} 1137}
945 1138
@@ -949,6 +1142,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
949 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); 1142 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
950 int i; 1143 int i;
951 1144
1145 vcpu_load(vcpu);
1146
952 kvmppc_set_pvr(vcpu, sregs->pvr); 1147 kvmppc_set_pvr(vcpu, sregs->pvr);
953 1148
954 vcpu3s->sdr1 = sregs->u.s.sdr1; 1149 vcpu3s->sdr1 = sregs->u.s.sdr1;
@@ -975,6 +1170,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
975 1170
976 /* Flush the MMU after messing with the segments */ 1171 /* Flush the MMU after messing with the segments */
977 kvmppc_mmu_pte_flush(vcpu, 0, 0); 1172 kvmppc_mmu_pte_flush(vcpu, 0, 0);
1173
1174 vcpu_put(vcpu);
1175
978 return 0; 1176 return 0;
979} 1177}
980 1178
@@ -1042,24 +1240,33 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
1042{ 1240{
1043 struct kvmppc_vcpu_book3s *vcpu_book3s; 1241 struct kvmppc_vcpu_book3s *vcpu_book3s;
1044 struct kvm_vcpu *vcpu; 1242 struct kvm_vcpu *vcpu;
1045 int err; 1243 int err = -ENOMEM;
1046 1244
1047 vcpu_book3s = (struct kvmppc_vcpu_book3s *)__get_free_pages( GFP_KERNEL | __GFP_ZERO, 1245 vcpu_book3s = vmalloc(sizeof(struct kvmppc_vcpu_book3s));
1048 get_order(sizeof(struct kvmppc_vcpu_book3s))); 1246 if (!vcpu_book3s)
1049 if (!vcpu_book3s) {
1050 err = -ENOMEM;
1051 goto out; 1247 goto out;
1052 } 1248
1249 memset(vcpu_book3s, 0, sizeof(struct kvmppc_vcpu_book3s));
1250
1251 vcpu_book3s->shadow_vcpu = (struct kvmppc_book3s_shadow_vcpu *)
1252 kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL);
1253 if (!vcpu_book3s->shadow_vcpu)
1254 goto free_vcpu;
1053 1255
1054 vcpu = &vcpu_book3s->vcpu; 1256 vcpu = &vcpu_book3s->vcpu;
1055 err = kvm_vcpu_init(vcpu, kvm, id); 1257 err = kvm_vcpu_init(vcpu, kvm, id);
1056 if (err) 1258 if (err)
1057 goto free_vcpu; 1259 goto free_shadow_vcpu;
1058 1260
1059 vcpu->arch.host_retip = kvm_return_point; 1261 vcpu->arch.host_retip = kvm_return_point;
1060 vcpu->arch.host_msr = mfmsr(); 1262 vcpu->arch.host_msr = mfmsr();
1263#ifdef CONFIG_PPC_BOOK3S_64
1061 /* default to book3s_64 (970fx) */ 1264 /* default to book3s_64 (970fx) */
1062 vcpu->arch.pvr = 0x3C0301; 1265 vcpu->arch.pvr = 0x3C0301;
1266#else
1267 /* default to book3s_32 (750) */
1268 vcpu->arch.pvr = 0x84202;
1269#endif
1063 kvmppc_set_pvr(vcpu, vcpu->arch.pvr); 1270 kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
1064 vcpu_book3s->slb_nr = 64; 1271 vcpu_book3s->slb_nr = 64;
1065 1272
@@ -1067,23 +1274,24 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
1067 vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem; 1274 vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem;
1068 vcpu->arch.trampoline_enter = kvmppc_trampoline_enter; 1275 vcpu->arch.trampoline_enter = kvmppc_trampoline_enter;
1069 vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem; 1276 vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem;
1277#ifdef CONFIG_PPC_BOOK3S_64
1070 vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall; 1278 vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall;
1279#else
1280 vcpu->arch.rmcall = (ulong)kvmppc_rmcall;
1281#endif
1071 1282
1072 vcpu->arch.shadow_msr = MSR_USER64; 1283 vcpu->arch.shadow_msr = MSR_USER64;
1073 1284
1074 err = __init_new_context(); 1285 err = kvmppc_mmu_init(vcpu);
1075 if (err < 0) 1286 if (err < 0)
1076 goto free_vcpu; 1287 goto free_shadow_vcpu;
1077 vcpu_book3s->context_id = err;
1078
1079 vcpu_book3s->vsid_max = ((vcpu_book3s->context_id + 1) << USER_ESID_BITS) - 1;
1080 vcpu_book3s->vsid_first = vcpu_book3s->context_id << USER_ESID_BITS;
1081 vcpu_book3s->vsid_next = vcpu_book3s->vsid_first;
1082 1288
1083 return vcpu; 1289 return vcpu;
1084 1290
1291free_shadow_vcpu:
1292 kfree(vcpu_book3s->shadow_vcpu);
1085free_vcpu: 1293free_vcpu:
1086 free_pages((long)vcpu_book3s, get_order(sizeof(struct kvmppc_vcpu_book3s))); 1294 vfree(vcpu_book3s);
1087out: 1295out:
1088 return ERR_PTR(err); 1296 return ERR_PTR(err);
1089} 1297}
@@ -1092,9 +1300,9 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
1092{ 1300{
1093 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); 1301 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
1094 1302
1095 __destroy_context(vcpu_book3s->context_id);
1096 kvm_vcpu_uninit(vcpu); 1303 kvm_vcpu_uninit(vcpu);
1097 free_pages((long)vcpu_book3s, get_order(sizeof(struct kvmppc_vcpu_book3s))); 1304 kfree(vcpu_book3s->shadow_vcpu);
1305 vfree(vcpu_book3s);
1098} 1306}
1099 1307
1100extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); 1308extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
@@ -1102,8 +1310,12 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1102{ 1310{
1103 int ret; 1311 int ret;
1104 struct thread_struct ext_bkp; 1312 struct thread_struct ext_bkp;
1313#ifdef CONFIG_ALTIVEC
1105 bool save_vec = current->thread.used_vr; 1314 bool save_vec = current->thread.used_vr;
1315#endif
1316#ifdef CONFIG_VSX
1106 bool save_vsx = current->thread.used_vsr; 1317 bool save_vsx = current->thread.used_vsr;
1318#endif
1107 ulong ext_msr; 1319 ulong ext_msr;
1108 1320
1109 /* No need to go into the guest when all we do is going out */ 1321 /* No need to go into the guest when all we do is going out */
@@ -1144,6 +1356,10 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1144 /* XXX we get called with irq disabled - change that! */ 1356 /* XXX we get called with irq disabled - change that! */
1145 local_irq_enable(); 1357 local_irq_enable();
1146 1358
1359 /* Preload FPU if it's enabled */
1360 if (vcpu->arch.msr & MSR_FP)
1361 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
1362
1147 ret = __kvmppc_vcpu_entry(kvm_run, vcpu); 1363 ret = __kvmppc_vcpu_entry(kvm_run, vcpu);
1148 1364
1149 local_irq_disable(); 1365 local_irq_disable();
@@ -1179,7 +1395,8 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1179 1395
1180static int kvmppc_book3s_init(void) 1396static int kvmppc_book3s_init(void)
1181{ 1397{
1182 return kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), THIS_MODULE); 1398 return kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0,
1399 THIS_MODULE);
1183} 1400}
1184 1401
1185static void kvmppc_book3s_exit(void) 1402static void kvmppc_book3s_exit(void)
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index faf99f20d993..0b10503c8a4a 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -37,7 +37,7 @@
37#define dprintk(X...) do { } while(0) 37#define dprintk(X...) do { } while(0)
38#endif 38#endif
39 39
40#ifdef DEBUG_PTE 40#ifdef DEBUG_MMU_PTE
41#define dprintk_pte(X...) printk(KERN_INFO X) 41#define dprintk_pte(X...) printk(KERN_INFO X)
42#else 42#else
43#define dprintk_pte(X...) do { } while(0) 43#define dprintk_pte(X...) do { } while(0)
@@ -45,6 +45,9 @@
45 45
46#define PTEG_FLAG_ACCESSED 0x00000100 46#define PTEG_FLAG_ACCESSED 0x00000100
47#define PTEG_FLAG_DIRTY 0x00000080 47#define PTEG_FLAG_DIRTY 0x00000080
48#ifndef SID_SHIFT
49#define SID_SHIFT 28
50#endif
48 51
49static inline bool check_debug_ip(struct kvm_vcpu *vcpu) 52static inline bool check_debug_ip(struct kvm_vcpu *vcpu)
50{ 53{
@@ -57,6 +60,8 @@ static inline bool check_debug_ip(struct kvm_vcpu *vcpu)
57 60
58static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, 61static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
59 struct kvmppc_pte *pte, bool data); 62 struct kvmppc_pte *pte, bool data);
63static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
64 u64 *vsid);
60 65
61static struct kvmppc_sr *find_sr(struct kvmppc_vcpu_book3s *vcpu_book3s, gva_t eaddr) 66static struct kvmppc_sr *find_sr(struct kvmppc_vcpu_book3s *vcpu_book3s, gva_t eaddr)
62{ 67{
@@ -66,13 +71,14 @@ static struct kvmppc_sr *find_sr(struct kvmppc_vcpu_book3s *vcpu_book3s, gva_t e
66static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr, 71static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
67 bool data) 72 bool data)
68{ 73{
69 struct kvmppc_sr *sre = find_sr(to_book3s(vcpu), eaddr); 74 u64 vsid;
70 struct kvmppc_pte pte; 75 struct kvmppc_pte pte;
71 76
72 if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, &pte, data)) 77 if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, &pte, data))
73 return pte.vpage; 78 return pte.vpage;
74 79
75 return (((u64)eaddr >> 12) & 0xffff) | (((u64)sre->vsid) << 16); 80 kvmppc_mmu_book3s_32_esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
81 return (((u64)eaddr >> 12) & 0xffff) | (vsid << 16);
76} 82}
77 83
78static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu) 84static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu)
@@ -142,8 +148,13 @@ static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
142 bat->bepi_mask); 148 bat->bepi_mask);
143 } 149 }
144 if ((eaddr & bat->bepi_mask) == bat->bepi) { 150 if ((eaddr & bat->bepi_mask) == bat->bepi) {
151 u64 vsid;
152 kvmppc_mmu_book3s_32_esid_to_vsid(vcpu,
153 eaddr >> SID_SHIFT, &vsid);
154 vsid <<= 16;
155 pte->vpage = (((u64)eaddr >> 12) & 0xffff) | vsid;
156
145 pte->raddr = bat->brpn | (eaddr & ~bat->bepi_mask); 157 pte->raddr = bat->brpn | (eaddr & ~bat->bepi_mask);
146 pte->vpage = (eaddr >> 12) | VSID_BAT;
147 pte->may_read = bat->pp; 158 pte->may_read = bat->pp;
148 pte->may_write = bat->pp > 1; 159 pte->may_write = bat->pp > 1;
149 pte->may_execute = true; 160 pte->may_execute = true;
@@ -172,7 +183,7 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
172 struct kvmppc_sr *sre; 183 struct kvmppc_sr *sre;
173 hva_t ptegp; 184 hva_t ptegp;
174 u32 pteg[16]; 185 u32 pteg[16];
175 u64 ptem = 0; 186 u32 ptem = 0;
176 int i; 187 int i;
177 int found = 0; 188 int found = 0;
178 189
@@ -302,6 +313,7 @@ static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum,
302 /* And then put in the new SR */ 313 /* And then put in the new SR */
303 sre->raw = value; 314 sre->raw = value;
304 sre->vsid = (value & 0x0fffffff); 315 sre->vsid = (value & 0x0fffffff);
316 sre->valid = (value & 0x80000000) ? false : true;
305 sre->Ks = (value & 0x40000000) ? true : false; 317 sre->Ks = (value & 0x40000000) ? true : false;
306 sre->Kp = (value & 0x20000000) ? true : false; 318 sre->Kp = (value & 0x20000000) ? true : false;
307 sre->nx = (value & 0x10000000) ? true : false; 319 sre->nx = (value & 0x10000000) ? true : false;
@@ -312,36 +324,48 @@ static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum,
312 324
313static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool large) 325static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool large)
314{ 326{
315 kvmppc_mmu_pte_flush(vcpu, ea, ~0xFFFULL); 327 kvmppc_mmu_pte_flush(vcpu, ea, 0x0FFFF000);
316} 328}
317 329
318static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, u64 esid, 330static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
319 u64 *vsid) 331 u64 *vsid)
320{ 332{
333 ulong ea = esid << SID_SHIFT;
334 struct kvmppc_sr *sr;
335 u64 gvsid = esid;
336
337 if (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
338 sr = find_sr(to_book3s(vcpu), ea);
339 if (sr->valid)
340 gvsid = sr->vsid;
341 }
342
321 /* In case we only have one of MSR_IR or MSR_DR set, let's put 343 /* In case we only have one of MSR_IR or MSR_DR set, let's put
322 that in the real-mode context (and hope RM doesn't access 344 that in the real-mode context (and hope RM doesn't access
323 high memory) */ 345 high memory) */
324 switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { 346 switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
325 case 0: 347 case 0:
326 *vsid = (VSID_REAL >> 16) | esid; 348 *vsid = VSID_REAL | esid;
327 break; 349 break;
328 case MSR_IR: 350 case MSR_IR:
329 *vsid = (VSID_REAL_IR >> 16) | esid; 351 *vsid = VSID_REAL_IR | gvsid;
330 break; 352 break;
331 case MSR_DR: 353 case MSR_DR:
332 *vsid = (VSID_REAL_DR >> 16) | esid; 354 *vsid = VSID_REAL_DR | gvsid;
333 break; 355 break;
334 case MSR_DR|MSR_IR: 356 case MSR_DR|MSR_IR:
335 { 357 if (!sr->valid)
336 ulong ea; 358 return -1;
337 ea = esid << SID_SHIFT; 359
338 *vsid = find_sr(to_book3s(vcpu), ea)->vsid; 360 *vsid = sr->vsid;
339 break; 361 break;
340 }
341 default: 362 default:
342 BUG(); 363 BUG();
343 } 364 }
344 365
366 if (vcpu->arch.msr & MSR_PR)
367 *vsid |= VSID_PR;
368
345 return 0; 369 return 0;
346} 370}
347 371
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
new file mode 100644
index 000000000000..0bb66005338f
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -0,0 +1,483 @@
1/*
2 * Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved.
3 *
4 * Authors:
5 * Alexander Graf <agraf@suse.de>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License, version 2, as
9 * published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20
21#include <linux/kvm_host.h>
22
23#include <asm/kvm_ppc.h>
24#include <asm/kvm_book3s.h>
25#include <asm/mmu-hash32.h>
26#include <asm/machdep.h>
27#include <asm/mmu_context.h>
28#include <asm/hw_irq.h>
29
30/* #define DEBUG_MMU */
31/* #define DEBUG_SR */
32
33#ifdef DEBUG_MMU
34#define dprintk_mmu(a, ...) printk(KERN_INFO a, __VA_ARGS__)
35#else
36#define dprintk_mmu(a, ...) do { } while(0)
37#endif
38
39#ifdef DEBUG_SR
40#define dprintk_sr(a, ...) printk(KERN_INFO a, __VA_ARGS__)
41#else
42#define dprintk_sr(a, ...) do { } while(0)
43#endif
44
45#if PAGE_SHIFT != 12
46#error Unknown page size
47#endif
48
49#ifdef CONFIG_SMP
50#error XXX need to grab mmu_hash_lock
51#endif
52
53#ifdef CONFIG_PTE_64BIT
54#error Only 32 bit pages are supported for now
55#endif
56
57static ulong htab;
58static u32 htabmask;
59
60static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
61{
62 volatile u32 *pteg;
63
64 dprintk_mmu("KVM: Flushing SPTE: 0x%llx (0x%llx) -> 0x%llx\n",
65 pte->pte.eaddr, pte->pte.vpage, pte->host_va);
66
67 pteg = (u32*)pte->slot;
68
69 pteg[0] = 0;
70 asm volatile ("sync");
71 asm volatile ("tlbie %0" : : "r" (pte->pte.eaddr) : "memory");
72 asm volatile ("sync");
73 asm volatile ("tlbsync");
74
75 pte->host_va = 0;
76
77 if (pte->pte.may_write)
78 kvm_release_pfn_dirty(pte->pfn);
79 else
80 kvm_release_pfn_clean(pte->pfn);
81}
82
83void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask)
84{
85 int i;
86
87 dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%x & 0x%x\n",
88 vcpu->arch.hpte_cache_offset, guest_ea, ea_mask);
89 BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM);
90
91 guest_ea &= ea_mask;
92 for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) {
93 struct hpte_cache *pte;
94
95 pte = &vcpu->arch.hpte_cache[i];
96 if (!pte->host_va)
97 continue;
98
99 if ((pte->pte.eaddr & ea_mask) == guest_ea) {
100 invalidate_pte(vcpu, pte);
101 }
102 }
103
104 /* Doing a complete flush -> start from scratch */
105 if (!ea_mask)
106 vcpu->arch.hpte_cache_offset = 0;
107}
108
109void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask)
110{
111 int i;
112
113 dprintk_mmu("KVM: Flushing %d Shadow vPTEs: 0x%llx & 0x%llx\n",
114 vcpu->arch.hpte_cache_offset, guest_vp, vp_mask);
115 BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM);
116
117 guest_vp &= vp_mask;
118 for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) {
119 struct hpte_cache *pte;
120
121 pte = &vcpu->arch.hpte_cache[i];
122 if (!pte->host_va)
123 continue;
124
125 if ((pte->pte.vpage & vp_mask) == guest_vp) {
126 invalidate_pte(vcpu, pte);
127 }
128 }
129}
130
131void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
132{
133 int i;
134
135 dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%llx & 0x%llx\n",
136 vcpu->arch.hpte_cache_offset, pa_start, pa_end);
137 BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM);
138
139 for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) {
140 struct hpte_cache *pte;
141
142 pte = &vcpu->arch.hpte_cache[i];
143 if (!pte->host_va)
144 continue;
145
146 if ((pte->pte.raddr >= pa_start) &&
147 (pte->pte.raddr < pa_end)) {
148 invalidate_pte(vcpu, pte);
149 }
150 }
151}
152
153struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool data)
154{
155 int i;
156 u64 guest_vp;
157
158 guest_vp = vcpu->arch.mmu.ea_to_vp(vcpu, ea, false);
159 for (i=0; i<vcpu->arch.hpte_cache_offset; i++) {
160 struct hpte_cache *pte;
161
162 pte = &vcpu->arch.hpte_cache[i];
163 if (!pte->host_va)
164 continue;
165
166 if (pte->pte.vpage == guest_vp)
167 return &pte->pte;
168 }
169
170 return NULL;
171}
172
173static int kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu)
174{
175 if (vcpu->arch.hpte_cache_offset == HPTEG_CACHE_NUM)
176 kvmppc_mmu_pte_flush(vcpu, 0, 0);
177
178 return vcpu->arch.hpte_cache_offset++;
179}
180
181/* We keep 512 gvsid->hvsid entries, mapping the guest ones to the array using
182 * a hash, so we don't waste cycles on looping */
183static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid)
184{
185 return (u16)(((gvsid >> (SID_MAP_BITS * 7)) & SID_MAP_MASK) ^
186 ((gvsid >> (SID_MAP_BITS * 6)) & SID_MAP_MASK) ^
187 ((gvsid >> (SID_MAP_BITS * 5)) & SID_MAP_MASK) ^
188 ((gvsid >> (SID_MAP_BITS * 4)) & SID_MAP_MASK) ^
189 ((gvsid >> (SID_MAP_BITS * 3)) & SID_MAP_MASK) ^
190 ((gvsid >> (SID_MAP_BITS * 2)) & SID_MAP_MASK) ^
191 ((gvsid >> (SID_MAP_BITS * 1)) & SID_MAP_MASK) ^
192 ((gvsid >> (SID_MAP_BITS * 0)) & SID_MAP_MASK));
193}
194
195
196static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
197{
198 struct kvmppc_sid_map *map;
199 u16 sid_map_mask;
200
201 if (vcpu->arch.msr & MSR_PR)
202 gvsid |= VSID_PR;
203
204 sid_map_mask = kvmppc_sid_hash(vcpu, gvsid);
205 map = &to_book3s(vcpu)->sid_map[sid_map_mask];
206 if (map->guest_vsid == gvsid) {
207 dprintk_sr("SR: Searching 0x%llx -> 0x%llx\n",
208 gvsid, map->host_vsid);
209 return map;
210 }
211
212 map = &to_book3s(vcpu)->sid_map[SID_MAP_MASK - sid_map_mask];
213 if (map->guest_vsid == gvsid) {
214 dprintk_sr("SR: Searching 0x%llx -> 0x%llx\n",
215 gvsid, map->host_vsid);
216 return map;
217 }
218
219 dprintk_sr("SR: Searching 0x%llx -> not found\n", gvsid);
220 return NULL;
221}
222
223static u32 *kvmppc_mmu_get_pteg(struct kvm_vcpu *vcpu, u32 vsid, u32 eaddr,
224 bool primary)
225{
226 u32 page, hash;
227 ulong pteg = htab;
228
229 page = (eaddr & ~ESID_MASK) >> 12;
230
231 hash = ((vsid ^ page) << 6);
232 if (!primary)
233 hash = ~hash;
234
235 hash &= htabmask;
236
237 pteg |= hash;
238
239 dprintk_mmu("htab: %lx | hash: %x | htabmask: %x | pteg: %lx\n",
240 htab, hash, htabmask, pteg);
241
242 return (u32*)pteg;
243}
244
245extern char etext[];
246
247int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
248{
249 pfn_t hpaddr;
250 u64 va;
251 u64 vsid;
252 struct kvmppc_sid_map *map;
253 volatile u32 *pteg;
254 u32 eaddr = orig_pte->eaddr;
255 u32 pteg0, pteg1;
256 register int rr = 0;
257 bool primary = false;
258 bool evict = false;
259 int hpte_id;
260 struct hpte_cache *pte;
261
262 /* Get host physical address for gpa */
263 hpaddr = gfn_to_pfn(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT);
264 if (kvm_is_error_hva(hpaddr)) {
265 printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n",
266 orig_pte->eaddr);
267 return -EINVAL;
268 }
269 hpaddr <<= PAGE_SHIFT;
270
271 /* and write the mapping ea -> hpa into the pt */
272 vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid);
273 map = find_sid_vsid(vcpu, vsid);
274 if (!map) {
275 kvmppc_mmu_map_segment(vcpu, eaddr);
276 map = find_sid_vsid(vcpu, vsid);
277 }
278 BUG_ON(!map);
279
280 vsid = map->host_vsid;
281 va = (vsid << SID_SHIFT) | (eaddr & ~ESID_MASK);
282
283next_pteg:
284 if (rr == 16) {
285 primary = !primary;
286 evict = true;
287 rr = 0;
288 }
289
290 pteg = kvmppc_mmu_get_pteg(vcpu, vsid, eaddr, primary);
291
292 /* not evicting yet */
293 if (!evict && (pteg[rr] & PTE_V)) {
294 rr += 2;
295 goto next_pteg;
296 }
297
298 dprintk_mmu("KVM: old PTEG: %p (%d)\n", pteg, rr);
299 dprintk_mmu("KVM: %08x - %08x\n", pteg[0], pteg[1]);
300 dprintk_mmu("KVM: %08x - %08x\n", pteg[2], pteg[3]);
301 dprintk_mmu("KVM: %08x - %08x\n", pteg[4], pteg[5]);
302 dprintk_mmu("KVM: %08x - %08x\n", pteg[6], pteg[7]);
303 dprintk_mmu("KVM: %08x - %08x\n", pteg[8], pteg[9]);
304 dprintk_mmu("KVM: %08x - %08x\n", pteg[10], pteg[11]);
305 dprintk_mmu("KVM: %08x - %08x\n", pteg[12], pteg[13]);
306 dprintk_mmu("KVM: %08x - %08x\n", pteg[14], pteg[15]);
307
308 pteg0 = ((eaddr & 0x0fffffff) >> 22) | (vsid << 7) | PTE_V |
309 (primary ? 0 : PTE_SEC);
310 pteg1 = hpaddr | PTE_M | PTE_R | PTE_C;
311
312 if (orig_pte->may_write) {
313 pteg1 |= PP_RWRW;
314 mark_page_dirty(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT);
315 } else {
316 pteg1 |= PP_RWRX;
317 }
318
319 local_irq_disable();
320
321 if (pteg[rr]) {
322 pteg[rr] = 0;
323 asm volatile ("sync");
324 }
325 pteg[rr + 1] = pteg1;
326 pteg[rr] = pteg0;
327 asm volatile ("sync");
328
329 local_irq_enable();
330
331 dprintk_mmu("KVM: new PTEG: %p\n", pteg);
332 dprintk_mmu("KVM: %08x - %08x\n", pteg[0], pteg[1]);
333 dprintk_mmu("KVM: %08x - %08x\n", pteg[2], pteg[3]);
334 dprintk_mmu("KVM: %08x - %08x\n", pteg[4], pteg[5]);
335 dprintk_mmu("KVM: %08x - %08x\n", pteg[6], pteg[7]);
336 dprintk_mmu("KVM: %08x - %08x\n", pteg[8], pteg[9]);
337 dprintk_mmu("KVM: %08x - %08x\n", pteg[10], pteg[11]);
338 dprintk_mmu("KVM: %08x - %08x\n", pteg[12], pteg[13]);
339 dprintk_mmu("KVM: %08x - %08x\n", pteg[14], pteg[15]);
340
341
342 /* Now tell our Shadow PTE code about the new page */
343
344 hpte_id = kvmppc_mmu_hpte_cache_next(vcpu);
345 pte = &vcpu->arch.hpte_cache[hpte_id];
346
347 dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%llx (0x%llx) -> %lx\n",
348 orig_pte->may_write ? 'w' : '-',
349 orig_pte->may_execute ? 'x' : '-',
350 orig_pte->eaddr, (ulong)pteg, va,
351 orig_pte->vpage, hpaddr);
352
353 pte->slot = (ulong)&pteg[rr];
354 pte->host_va = va;
355 pte->pte = *orig_pte;
356 pte->pfn = hpaddr >> PAGE_SHIFT;
357
358 return 0;
359}
360
361static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
362{
363 struct kvmppc_sid_map *map;
364 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
365 u16 sid_map_mask;
366 static int backwards_map = 0;
367
368 if (vcpu->arch.msr & MSR_PR)
369 gvsid |= VSID_PR;
370
371 /* We might get collisions that trap in preceding order, so let's
372 map them differently */
373
374 sid_map_mask = kvmppc_sid_hash(vcpu, gvsid);
375 if (backwards_map)
376 sid_map_mask = SID_MAP_MASK - sid_map_mask;
377
378 map = &to_book3s(vcpu)->sid_map[sid_map_mask];
379
380 /* Make sure we're taking the other map next time */
381 backwards_map = !backwards_map;
382
383 /* Uh-oh ... out of mappings. Let's flush! */
384 if (vcpu_book3s->vsid_next >= vcpu_book3s->vsid_max) {
385 vcpu_book3s->vsid_next = vcpu_book3s->vsid_first;
386 memset(vcpu_book3s->sid_map, 0,
387 sizeof(struct kvmppc_sid_map) * SID_MAP_NUM);
388 kvmppc_mmu_pte_flush(vcpu, 0, 0);
389 kvmppc_mmu_flush_segments(vcpu);
390 }
391 map->host_vsid = vcpu_book3s->vsid_next;
392
393 /* Would have to be 111 to be completely aligned with the rest of
394 Linux, but that is just way too little space! */
395 vcpu_book3s->vsid_next+=1;
396
397 map->guest_vsid = gvsid;
398 map->valid = true;
399
400 return map;
401}
402
403int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
404{
405 u32 esid = eaddr >> SID_SHIFT;
406 u64 gvsid;
407 u32 sr;
408 struct kvmppc_sid_map *map;
409 struct kvmppc_book3s_shadow_vcpu *svcpu = to_svcpu(vcpu);
410
411 if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) {
412 /* Invalidate an entry */
413 svcpu->sr[esid] = SR_INVALID;
414 return -ENOENT;
415 }
416
417 map = find_sid_vsid(vcpu, gvsid);
418 if (!map)
419 map = create_sid_map(vcpu, gvsid);
420
421 map->guest_esid = esid;
422 sr = map->host_vsid | SR_KP;
423 svcpu->sr[esid] = sr;
424
425 dprintk_sr("MMU: mtsr %d, 0x%x\n", esid, sr);
426
427 return 0;
428}
429
430void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
431{
432 int i;
433 struct kvmppc_book3s_shadow_vcpu *svcpu = to_svcpu(vcpu);
434
435 dprintk_sr("MMU: flushing all segments (%d)\n", ARRAY_SIZE(svcpu->sr));
436 for (i = 0; i < ARRAY_SIZE(svcpu->sr); i++)
437 svcpu->sr[i] = SR_INVALID;
438}
439
440void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
441{
442 kvmppc_mmu_pte_flush(vcpu, 0, 0);
443 preempt_disable();
444 __destroy_context(to_book3s(vcpu)->context_id);
445 preempt_enable();
446}
447
448/* From mm/mmu_context_hash32.c */
449#define CTX_TO_VSID(ctx) (((ctx) * (897 * 16)) & 0xffffff)
450
451int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
452{
453 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
454 int err;
455 ulong sdr1;
456
457 err = __init_new_context();
458 if (err < 0)
459 return -1;
460 vcpu3s->context_id = err;
461
462 vcpu3s->vsid_max = CTX_TO_VSID(vcpu3s->context_id + 1) - 1;
463 vcpu3s->vsid_first = CTX_TO_VSID(vcpu3s->context_id);
464
465#if 0 /* XXX still doesn't guarantee uniqueness */
466 /* We could collide with the Linux vsid space because the vsid
467 * wraps around at 24 bits. We're safe if we do our own space
468 * though, so let's always set the highest bit. */
469
470 vcpu3s->vsid_max |= 0x00800000;
471 vcpu3s->vsid_first |= 0x00800000;
472#endif
473 BUG_ON(vcpu3s->vsid_max < vcpu3s->vsid_first);
474
475 vcpu3s->vsid_next = vcpu3s->vsid_first;
476
477 /* Remember where the HTAB is */
478 asm ( "mfsdr1 %0" : "=r"(sdr1) );
479 htabmask = ((sdr1 & 0x1FF) << 16) | 0xFFC0;
480 htab = (ulong)__va(sdr1 & 0xffff0000);
481
482 return 0;
483}
diff --git a/arch/powerpc/kvm/book3s_32_sr.S b/arch/powerpc/kvm/book3s_32_sr.S
new file mode 100644
index 000000000000..3608471ad2d8
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_32_sr.S
@@ -0,0 +1,143 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright SUSE Linux Products GmbH 2009
16 *
17 * Authors: Alexander Graf <agraf@suse.de>
18 */
19
20/******************************************************************************
21 * *
22 * Entry code *
23 * *
24 *****************************************************************************/
25
26.macro LOAD_GUEST_SEGMENTS
27
28 /* Required state:
29 *
30 * MSR = ~IR|DR
31 * R1 = host R1
32 * R2 = host R2
33 * R3 = shadow vcpu
34 * all other volatile GPRS = free
35 * SVCPU[CR] = guest CR
36 * SVCPU[XER] = guest XER
37 * SVCPU[CTR] = guest CTR
38 * SVCPU[LR] = guest LR
39 */
40
41#define XCHG_SR(n) lwz r9, (SVCPU_SR+(n*4))(r3); \
42 mtsr n, r9
43
44 XCHG_SR(0)
45 XCHG_SR(1)
46 XCHG_SR(2)
47 XCHG_SR(3)
48 XCHG_SR(4)
49 XCHG_SR(5)
50 XCHG_SR(6)
51 XCHG_SR(7)
52 XCHG_SR(8)
53 XCHG_SR(9)
54 XCHG_SR(10)
55 XCHG_SR(11)
56 XCHG_SR(12)
57 XCHG_SR(13)
58 XCHG_SR(14)
59 XCHG_SR(15)
60
61 /* Clear BATs. */
62
63#define KVM_KILL_BAT(n, reg) \
64 mtspr SPRN_IBAT##n##U,reg; \
65 mtspr SPRN_IBAT##n##L,reg; \
66 mtspr SPRN_DBAT##n##U,reg; \
67 mtspr SPRN_DBAT##n##L,reg; \
68
69 li r9, 0
70 KVM_KILL_BAT(0, r9)
71 KVM_KILL_BAT(1, r9)
72 KVM_KILL_BAT(2, r9)
73 KVM_KILL_BAT(3, r9)
74
75.endm
76
77/******************************************************************************
78 * *
79 * Exit code *
80 * *
81 *****************************************************************************/
82
83.macro LOAD_HOST_SEGMENTS
84
85 /* Register usage at this point:
86 *
87 * R1 = host R1
88 * R2 = host R2
89 * R12 = exit handler id
90 * R13 = shadow vcpu - SHADOW_VCPU_OFF
91 * SVCPU.* = guest *
92 * SVCPU[CR] = guest CR
93 * SVCPU[XER] = guest XER
94 * SVCPU[CTR] = guest CTR
95 * SVCPU[LR] = guest LR
96 *
97 */
98
99 /* Restore BATs */
100
101 /* We only overwrite the upper part, so we only restoree
102 the upper part. */
103#define KVM_LOAD_BAT(n, reg, RA, RB) \
104 lwz RA,(n*16)+0(reg); \
105 lwz RB,(n*16)+4(reg); \
106 mtspr SPRN_IBAT##n##U,RA; \
107 mtspr SPRN_IBAT##n##L,RB; \
108 lwz RA,(n*16)+8(reg); \
109 lwz RB,(n*16)+12(reg); \
110 mtspr SPRN_DBAT##n##U,RA; \
111 mtspr SPRN_DBAT##n##L,RB; \
112
113 lis r9, BATS@ha
114 addi r9, r9, BATS@l
115 tophys(r9, r9)
116 KVM_LOAD_BAT(0, r9, r10, r11)
117 KVM_LOAD_BAT(1, r9, r10, r11)
118 KVM_LOAD_BAT(2, r9, r10, r11)
119 KVM_LOAD_BAT(3, r9, r10, r11)
120
121 /* Restore Segment Registers */
122
123 /* 0xc - 0xf */
124
125 li r0, 4
126 mtctr r0
127 LOAD_REG_IMMEDIATE(r3, 0x20000000 | (0x111 * 0xc))
128 lis r4, 0xc000
1293: mtsrin r3, r4
130 addi r3, r3, 0x111 /* increment VSID */
131 addis r4, r4, 0x1000 /* address of next segment */
132 bdnz 3b
133
134 /* 0x0 - 0xb */
135
136 /* 'current->mm' needs to be in r4 */
137 tophys(r4, r2)
138 lwz r4, MM(r4)
139 tophys(r4, r4)
140 /* This only clobbers r0, r3, r4 and r5 */
141 bl switch_mmu_context
142
143.endm
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 512dcff77554..4025ea26b3c1 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -232,7 +232,7 @@ do_second:
232 } 232 }
233 233
234 dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx " 234 dprintk("KVM MMU: Translated 0x%lx [0x%llx] -> 0x%llx "
235 "-> 0x%llx\n", 235 "-> 0x%lx\n",
236 eaddr, avpn, gpte->vpage, gpte->raddr); 236 eaddr, avpn, gpte->vpage, gpte->raddr);
237 found = true; 237 found = true;
238 break; 238 break;
@@ -383,7 +383,7 @@ static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu)
383 383
384 if (vcpu->arch.msr & MSR_IR) { 384 if (vcpu->arch.msr & MSR_IR) {
385 kvmppc_mmu_flush_segments(vcpu); 385 kvmppc_mmu_flush_segments(vcpu);
386 kvmppc_mmu_map_segment(vcpu, vcpu->arch.pc); 386 kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
387 } 387 }
388} 388}
389 389
@@ -439,37 +439,43 @@ static void kvmppc_mmu_book3s_64_tlbie(struct kvm_vcpu *vcpu, ulong va,
439 kvmppc_mmu_pte_vflush(vcpu, va >> 12, mask); 439 kvmppc_mmu_pte_vflush(vcpu, va >> 12, mask);
440} 440}
441 441
442static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, u64 esid, 442static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
443 u64 *vsid) 443 u64 *vsid)
444{ 444{
445 ulong ea = esid << SID_SHIFT;
446 struct kvmppc_slb *slb;
447 u64 gvsid = esid;
448
449 if (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
450 slb = kvmppc_mmu_book3s_64_find_slbe(to_book3s(vcpu), ea);
451 if (slb)
452 gvsid = slb->vsid;
453 }
454
445 switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { 455 switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
446 case 0: 456 case 0:
447 *vsid = (VSID_REAL >> 16) | esid; 457 *vsid = VSID_REAL | esid;
448 break; 458 break;
449 case MSR_IR: 459 case MSR_IR:
450 *vsid = (VSID_REAL_IR >> 16) | esid; 460 *vsid = VSID_REAL_IR | gvsid;
451 break; 461 break;
452 case MSR_DR: 462 case MSR_DR:
453 *vsid = (VSID_REAL_DR >> 16) | esid; 463 *vsid = VSID_REAL_DR | gvsid;
454 break; 464 break;
455 case MSR_DR|MSR_IR: 465 case MSR_DR|MSR_IR:
456 { 466 if (!slb)
457 ulong ea;
458 struct kvmppc_slb *slb;
459 ea = esid << SID_SHIFT;
460 slb = kvmppc_mmu_book3s_64_find_slbe(to_book3s(vcpu), ea);
461 if (slb)
462 *vsid = slb->vsid;
463 else
464 return -ENOENT; 467 return -ENOENT;
465 468
469 *vsid = gvsid;
466 break; 470 break;
467 }
468 default: 471 default:
469 BUG(); 472 BUG();
470 break; 473 break;
471 } 474 }
472 475
476 if (vcpu->arch.msr & MSR_PR)
477 *vsid |= VSID_PR;
478
473 return 0; 479 return 0;
474} 480}
475 481
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index f2899b297ffd..e4b5744977f6 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -48,21 +48,25 @@
48 48
49static void invalidate_pte(struct hpte_cache *pte) 49static void invalidate_pte(struct hpte_cache *pte)
50{ 50{
51 dprintk_mmu("KVM: Flushing SPT %d: 0x%llx (0x%llx) -> 0x%llx\n", 51 dprintk_mmu("KVM: Flushing SPT: 0x%lx (0x%llx) -> 0x%llx\n",
52 i, pte->pte.eaddr, pte->pte.vpage, pte->host_va); 52 pte->pte.eaddr, pte->pte.vpage, pte->host_va);
53 53
54 ppc_md.hpte_invalidate(pte->slot, pte->host_va, 54 ppc_md.hpte_invalidate(pte->slot, pte->host_va,
55 MMU_PAGE_4K, MMU_SEGSIZE_256M, 55 MMU_PAGE_4K, MMU_SEGSIZE_256M,
56 false); 56 false);
57 pte->host_va = 0; 57 pte->host_va = 0;
58 kvm_release_pfn_dirty(pte->pfn); 58
59 if (pte->pte.may_write)
60 kvm_release_pfn_dirty(pte->pfn);
61 else
62 kvm_release_pfn_clean(pte->pfn);
59} 63}
60 64
61void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, u64 guest_ea, u64 ea_mask) 65void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask)
62{ 66{
63 int i; 67 int i;
64 68
65 dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%llx & 0x%llx\n", 69 dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%lx & 0x%lx\n",
66 vcpu->arch.hpte_cache_offset, guest_ea, ea_mask); 70 vcpu->arch.hpte_cache_offset, guest_ea, ea_mask);
67 BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); 71 BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM);
68 72
@@ -106,12 +110,12 @@ void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask)
106 } 110 }
107} 111}
108 112
109void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, u64 pa_start, u64 pa_end) 113void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
110{ 114{
111 int i; 115 int i;
112 116
113 dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%llx & 0x%llx\n", 117 dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%lx & 0x%lx\n",
114 vcpu->arch.hpte_cache_offset, guest_pa, pa_mask); 118 vcpu->arch.hpte_cache_offset, pa_start, pa_end);
115 BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); 119 BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM);
116 120
117 for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { 121 for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) {
@@ -182,7 +186,7 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
182 sid_map_mask = kvmppc_sid_hash(vcpu, gvsid); 186 sid_map_mask = kvmppc_sid_hash(vcpu, gvsid);
183 map = &to_book3s(vcpu)->sid_map[sid_map_mask]; 187 map = &to_book3s(vcpu)->sid_map[sid_map_mask];
184 if (map->guest_vsid == gvsid) { 188 if (map->guest_vsid == gvsid) {
185 dprintk_slb("SLB: Searching 0x%llx -> 0x%llx\n", 189 dprintk_slb("SLB: Searching: 0x%llx -> 0x%llx\n",
186 gvsid, map->host_vsid); 190 gvsid, map->host_vsid);
187 return map; 191 return map;
188 } 192 }
@@ -194,7 +198,8 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
194 return map; 198 return map;
195 } 199 }
196 200
197 dprintk_slb("SLB: Searching 0x%llx -> not found\n", gvsid); 201 dprintk_slb("SLB: Searching %d/%d: 0x%llx -> not found\n",
202 sid_map_mask, SID_MAP_MASK - sid_map_mask, gvsid);
198 return NULL; 203 return NULL;
199} 204}
200 205
@@ -212,7 +217,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
212 /* Get host physical address for gpa */ 217 /* Get host physical address for gpa */
213 hpaddr = gfn_to_pfn(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT); 218 hpaddr = gfn_to_pfn(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT);
214 if (kvm_is_error_hva(hpaddr)) { 219 if (kvm_is_error_hva(hpaddr)) {
215 printk(KERN_INFO "Couldn't get guest page for gfn %llx!\n", orig_pte->eaddr); 220 printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr);
216 return -EINVAL; 221 return -EINVAL;
217 } 222 }
218 hpaddr <<= PAGE_SHIFT; 223 hpaddr <<= PAGE_SHIFT;
@@ -227,10 +232,16 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
227 vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid); 232 vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid);
228 map = find_sid_vsid(vcpu, vsid); 233 map = find_sid_vsid(vcpu, vsid);
229 if (!map) { 234 if (!map) {
230 kvmppc_mmu_map_segment(vcpu, orig_pte->eaddr); 235 ret = kvmppc_mmu_map_segment(vcpu, orig_pte->eaddr);
236 WARN_ON(ret < 0);
231 map = find_sid_vsid(vcpu, vsid); 237 map = find_sid_vsid(vcpu, vsid);
232 } 238 }
233 BUG_ON(!map); 239 if (!map) {
240 printk(KERN_ERR "KVM: Segment map for 0x%llx (0x%lx) failed\n",
241 vsid, orig_pte->eaddr);
242 WARN_ON(true);
243 return -EINVAL;
244 }
234 245
235 vsid = map->host_vsid; 246 vsid = map->host_vsid;
236 va = hpt_va(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M); 247 va = hpt_va(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M);
@@ -257,26 +268,26 @@ map_again:
257 268
258 if (ret < 0) { 269 if (ret < 0) {
259 /* If we couldn't map a primary PTE, try a secondary */ 270 /* If we couldn't map a primary PTE, try a secondary */
260#ifdef USE_SECONDARY
261 hash = ~hash; 271 hash = ~hash;
272 vflags ^= HPTE_V_SECONDARY;
262 attempt++; 273 attempt++;
263 if (attempt % 2)
264 vflags = HPTE_V_SECONDARY;
265 else
266 vflags = 0;
267#else
268 attempt = 2;
269#endif
270 goto map_again; 274 goto map_again;
271 } else { 275 } else {
272 int hpte_id = kvmppc_mmu_hpte_cache_next(vcpu); 276 int hpte_id = kvmppc_mmu_hpte_cache_next(vcpu);
273 struct hpte_cache *pte = &vcpu->arch.hpte_cache[hpte_id]; 277 struct hpte_cache *pte = &vcpu->arch.hpte_cache[hpte_id];
274 278
275 dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%lx (0x%llx) -> %lx\n", 279 dprintk_mmu("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx\n",
276 ((rflags & HPTE_R_PP) == 3) ? '-' : 'w', 280 ((rflags & HPTE_R_PP) == 3) ? '-' : 'w',
277 (rflags & HPTE_R_N) ? '-' : 'x', 281 (rflags & HPTE_R_N) ? '-' : 'x',
278 orig_pte->eaddr, hpteg, va, orig_pte->vpage, hpaddr); 282 orig_pte->eaddr, hpteg, va, orig_pte->vpage, hpaddr);
279 283
284 /* The ppc_md code may give us a secondary entry even though we
285 asked for a primary. Fix up. */
286 if ((ret & _PTEIDX_SECONDARY) && !(vflags & HPTE_V_SECONDARY)) {
287 hash = ~hash;
288 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
289 }
290
280 pte->slot = hpteg + (ret & 7); 291 pte->slot = hpteg + (ret & 7);
281 pte->host_va = va; 292 pte->host_va = va;
282 pte->pte = *orig_pte; 293 pte->pte = *orig_pte;
@@ -321,6 +332,9 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
321 map->guest_vsid = gvsid; 332 map->guest_vsid = gvsid;
322 map->valid = true; 333 map->valid = true;
323 334
335 dprintk_slb("SLB: New mapping at %d: 0x%llx -> 0x%llx\n",
336 sid_map_mask, gvsid, map->host_vsid);
337
324 return map; 338 return map;
325} 339}
326 340
@@ -331,14 +345,14 @@ static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid)
331 int found_inval = -1; 345 int found_inval = -1;
332 int r; 346 int r;
333 347
334 if (!get_paca()->kvm_slb_max) 348 if (!to_svcpu(vcpu)->slb_max)
335 get_paca()->kvm_slb_max = 1; 349 to_svcpu(vcpu)->slb_max = 1;
336 350
337 /* Are we overwriting? */ 351 /* Are we overwriting? */
338 for (i = 1; i < get_paca()->kvm_slb_max; i++) { 352 for (i = 1; i < to_svcpu(vcpu)->slb_max; i++) {
339 if (!(get_paca()->kvm_slb[i].esid & SLB_ESID_V)) 353 if (!(to_svcpu(vcpu)->slb[i].esid & SLB_ESID_V))
340 found_inval = i; 354 found_inval = i;
341 else if ((get_paca()->kvm_slb[i].esid & ESID_MASK) == esid) 355 else if ((to_svcpu(vcpu)->slb[i].esid & ESID_MASK) == esid)
342 return i; 356 return i;
343 } 357 }
344 358
@@ -352,11 +366,11 @@ static int kvmppc_mmu_next_segment(struct kvm_vcpu *vcpu, ulong esid)
352 max_slb_size = mmu_slb_size; 366 max_slb_size = mmu_slb_size;
353 367
354 /* Overflowing -> purge */ 368 /* Overflowing -> purge */
355 if ((get_paca()->kvm_slb_max) == max_slb_size) 369 if ((to_svcpu(vcpu)->slb_max) == max_slb_size)
356 kvmppc_mmu_flush_segments(vcpu); 370 kvmppc_mmu_flush_segments(vcpu);
357 371
358 r = get_paca()->kvm_slb_max; 372 r = to_svcpu(vcpu)->slb_max;
359 get_paca()->kvm_slb_max++; 373 to_svcpu(vcpu)->slb_max++;
360 374
361 return r; 375 return r;
362} 376}
@@ -374,7 +388,7 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
374 388
375 if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) { 389 if (vcpu->arch.mmu.esid_to_vsid(vcpu, esid, &gvsid)) {
376 /* Invalidate an entry */ 390 /* Invalidate an entry */
377 get_paca()->kvm_slb[slb_index].esid = 0; 391 to_svcpu(vcpu)->slb[slb_index].esid = 0;
378 return -ENOENT; 392 return -ENOENT;
379 } 393 }
380 394
@@ -388,8 +402,8 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
388 slb_vsid &= ~SLB_VSID_KP; 402 slb_vsid &= ~SLB_VSID_KP;
389 slb_esid |= slb_index; 403 slb_esid |= slb_index;
390 404
391 get_paca()->kvm_slb[slb_index].esid = slb_esid; 405 to_svcpu(vcpu)->slb[slb_index].esid = slb_esid;
392 get_paca()->kvm_slb[slb_index].vsid = slb_vsid; 406 to_svcpu(vcpu)->slb[slb_index].vsid = slb_vsid;
393 407
394 dprintk_slb("slbmte %#llx, %#llx\n", slb_vsid, slb_esid); 408 dprintk_slb("slbmte %#llx, %#llx\n", slb_vsid, slb_esid);
395 409
@@ -398,11 +412,29 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
398 412
399void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) 413void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
400{ 414{
401 get_paca()->kvm_slb_max = 1; 415 to_svcpu(vcpu)->slb_max = 1;
402 get_paca()->kvm_slb[0].esid = 0; 416 to_svcpu(vcpu)->slb[0].esid = 0;
403} 417}
404 418
405void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 419void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
406{ 420{
407 kvmppc_mmu_pte_flush(vcpu, 0, 0); 421 kvmppc_mmu_pte_flush(vcpu, 0, 0);
422 __destroy_context(to_book3s(vcpu)->context_id);
423}
424
425int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
426{
427 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
428 int err;
429
430 err = __init_new_context();
431 if (err < 0)
432 return -1;
433 vcpu3s->context_id = err;
434
435 vcpu3s->vsid_max = ((vcpu3s->context_id + 1) << USER_ESID_BITS) - 1;
436 vcpu3s->vsid_first = vcpu3s->context_id << USER_ESID_BITS;
437 vcpu3s->vsid_next = vcpu3s->vsid_first;
438
439 return 0;
408} 440}
diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S
index 35b762722187..04e7d3bbfe8b 100644
--- a/arch/powerpc/kvm/book3s_64_slb.S
+++ b/arch/powerpc/kvm/book3s_64_slb.S
@@ -44,8 +44,7 @@ slb_exit_skip_ ## num:
44 * * 44 * *
45 *****************************************************************************/ 45 *****************************************************************************/
46 46
47.global kvmppc_handler_trampoline_enter 47.macro LOAD_GUEST_SEGMENTS
48kvmppc_handler_trampoline_enter:
49 48
50 /* Required state: 49 /* Required state:
51 * 50 *
@@ -53,20 +52,14 @@ kvmppc_handler_trampoline_enter:
53 * R13 = PACA 52 * R13 = PACA
54 * R1 = host R1 53 * R1 = host R1
55 * R2 = host R2 54 * R2 = host R2
56 * R9 = guest IP 55 * R3 = shadow vcpu
57 * R10 = guest MSR 56 * all other volatile GPRS = free
58 * all other GPRS = free 57 * SVCPU[CR] = guest CR
59 * PACA[KVM_CR] = guest CR 58 * SVCPU[XER] = guest XER
60 * PACA[KVM_XER] = guest XER 59 * SVCPU[CTR] = guest CTR
60 * SVCPU[LR] = guest LR
61 */ 61 */
62 62
63 mtsrr0 r9
64 mtsrr1 r10
65
66 /* Activate guest mode, so faults get handled by KVM */
67 li r11, KVM_GUEST_MODE_GUEST
68 stb r11, PACA_KVM_IN_GUEST(r13)
69
70 /* Remove LPAR shadow entries */ 63 /* Remove LPAR shadow entries */
71 64
72#if SLB_NUM_BOLTED == 3 65#if SLB_NUM_BOLTED == 3
@@ -101,14 +94,14 @@ kvmppc_handler_trampoline_enter:
101 94
102 /* Fill SLB with our shadow */ 95 /* Fill SLB with our shadow */
103 96
104 lbz r12, PACA_KVM_SLB_MAX(r13) 97 lbz r12, SVCPU_SLB_MAX(r3)
105 mulli r12, r12, 16 98 mulli r12, r12, 16
106 addi r12, r12, PACA_KVM_SLB 99 addi r12, r12, SVCPU_SLB
107 add r12, r12, r13 100 add r12, r12, r3
108 101
109 /* for (r11 = kvm_slb; r11 < kvm_slb + kvm_slb_size; r11+=slb_entry) */ 102 /* for (r11 = kvm_slb; r11 < kvm_slb + kvm_slb_size; r11+=slb_entry) */
110 li r11, PACA_KVM_SLB 103 li r11, SVCPU_SLB
111 add r11, r11, r13 104 add r11, r11, r3
112 105
113slb_loop_enter: 106slb_loop_enter:
114 107
@@ -127,34 +120,7 @@ slb_loop_enter_skip:
127 120
128slb_do_enter: 121slb_do_enter:
129 122
130 /* Enter guest */ 123.endm
131
132 ld r0, (PACA_KVM_R0)(r13)
133 ld r1, (PACA_KVM_R1)(r13)
134 ld r2, (PACA_KVM_R2)(r13)
135 ld r3, (PACA_KVM_R3)(r13)
136 ld r4, (PACA_KVM_R4)(r13)
137 ld r5, (PACA_KVM_R5)(r13)
138 ld r6, (PACA_KVM_R6)(r13)
139 ld r7, (PACA_KVM_R7)(r13)
140 ld r8, (PACA_KVM_R8)(r13)
141 ld r9, (PACA_KVM_R9)(r13)
142 ld r10, (PACA_KVM_R10)(r13)
143 ld r12, (PACA_KVM_R12)(r13)
144
145 lwz r11, (PACA_KVM_CR)(r13)
146 mtcr r11
147
148 ld r11, (PACA_KVM_XER)(r13)
149 mtxer r11
150
151 ld r11, (PACA_KVM_R11)(r13)
152 ld r13, (PACA_KVM_R13)(r13)
153
154 RFI
155kvmppc_handler_trampoline_enter_end:
156
157
158 124
159/****************************************************************************** 125/******************************************************************************
160 * * 126 * *
@@ -162,99 +128,22 @@ kvmppc_handler_trampoline_enter_end:
162 * * 128 * *
163 *****************************************************************************/ 129 *****************************************************************************/
164 130
165.global kvmppc_handler_trampoline_exit 131.macro LOAD_HOST_SEGMENTS
166kvmppc_handler_trampoline_exit:
167 132
168 /* Register usage at this point: 133 /* Register usage at this point:
169 * 134 *
170 * SPRG_SCRATCH0 = guest R13 135 * R1 = host R1
171 * R12 = exit handler id 136 * R2 = host R2
172 * R13 = PACA 137 * R12 = exit handler id
173 * PACA.KVM.SCRATCH0 = guest R12 138 * R13 = shadow vcpu - SHADOW_VCPU_OFF [=PACA on PPC64]
174 * PACA.KVM.SCRATCH1 = guest CR 139 * SVCPU.* = guest *
140 * SVCPU[CR] = guest CR
141 * SVCPU[XER] = guest XER
142 * SVCPU[CTR] = guest CTR
143 * SVCPU[LR] = guest LR
175 * 144 *
176 */ 145 */
177 146
178 /* Save registers */
179
180 std r0, PACA_KVM_R0(r13)
181 std r1, PACA_KVM_R1(r13)
182 std r2, PACA_KVM_R2(r13)
183 std r3, PACA_KVM_R3(r13)
184 std r4, PACA_KVM_R4(r13)
185 std r5, PACA_KVM_R5(r13)
186 std r6, PACA_KVM_R6(r13)
187 std r7, PACA_KVM_R7(r13)
188 std r8, PACA_KVM_R8(r13)
189 std r9, PACA_KVM_R9(r13)
190 std r10, PACA_KVM_R10(r13)
191 std r11, PACA_KVM_R11(r13)
192
193 /* Restore R1/R2 so we can handle faults */
194 ld r1, PACA_KVM_HOST_R1(r13)
195 ld r2, PACA_KVM_HOST_R2(r13)
196
197 /* Save guest PC and MSR in GPRs */
198 mfsrr0 r3
199 mfsrr1 r4
200
201 /* Get scratch'ed off registers */
202 mfspr r9, SPRN_SPRG_SCRATCH0
203 std r9, PACA_KVM_R13(r13)
204
205 ld r8, PACA_KVM_SCRATCH0(r13)
206 std r8, PACA_KVM_R12(r13)
207
208 lwz r7, PACA_KVM_SCRATCH1(r13)
209 stw r7, PACA_KVM_CR(r13)
210
211 /* Save more register state */
212
213 mfxer r6
214 stw r6, PACA_KVM_XER(r13)
215
216 mfdar r5
217 mfdsisr r6
218
219 /*
220 * In order for us to easily get the last instruction,
221 * we got the #vmexit at, we exploit the fact that the
222 * virtual layout is still the same here, so we can just
223 * ld from the guest's PC address
224 */
225
226 /* We only load the last instruction when it's safe */
227 cmpwi r12, BOOK3S_INTERRUPT_DATA_STORAGE
228 beq ld_last_inst
229 cmpwi r12, BOOK3S_INTERRUPT_PROGRAM
230 beq ld_last_inst
231
232 b no_ld_last_inst
233
234ld_last_inst:
235 /* Save off the guest instruction we're at */
236
237 /* Set guest mode to 'jump over instruction' so if lwz faults
238 * we'll just continue at the next IP. */
239 li r9, KVM_GUEST_MODE_SKIP
240 stb r9, PACA_KVM_IN_GUEST(r13)
241
242 /* 1) enable paging for data */
243 mfmsr r9
244 ori r11, r9, MSR_DR /* Enable paging for data */
245 mtmsr r11
246 /* 2) fetch the instruction */
247 li r0, KVM_INST_FETCH_FAILED /* In case lwz faults */
248 lwz r0, 0(r3)
249 /* 3) disable paging again */
250 mtmsr r9
251
252no_ld_last_inst:
253
254 /* Unset guest mode */
255 li r9, KVM_GUEST_MODE_NONE
256 stb r9, PACA_KVM_IN_GUEST(r13)
257
258 /* Restore bolted entries from the shadow and fix it along the way */ 147 /* Restore bolted entries from the shadow and fix it along the way */
259 148
260 /* We don't store anything in entry 0, so we don't need to take care of it */ 149 /* We don't store anything in entry 0, so we don't need to take care of it */
@@ -275,28 +164,4 @@ no_ld_last_inst:
275 164
276slb_do_exit: 165slb_do_exit:
277 166
278 /* Register usage at this point: 167.endm
279 *
280 * R0 = guest last inst
281 * R1 = host R1
282 * R2 = host R2
283 * R3 = guest PC
284 * R4 = guest MSR
285 * R5 = guest DAR
286 * R6 = guest DSISR
287 * R12 = exit handler id
288 * R13 = PACA
289 * PACA.KVM.* = guest *
290 *
291 */
292
293 /* RFI into the highmem handler */
294 mfmsr r7
295 ori r7, r7, MSR_IR|MSR_DR|MSR_RI /* Enable paging */
296 mtsrr1 r7
297 ld r8, PACA_KVM_VMHANDLER(r13) /* Highmem handler address */
298 mtsrr0 r8
299
300 RFI
301kvmppc_handler_trampoline_exit_end:
302
diff --git a/arch/powerpc/kvm/book3s_64_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 2b0ee7e040c9..c85f906038ce 100644
--- a/arch/powerpc/kvm/book3s_64_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -28,13 +28,16 @@
28#define OP_31_XOP_MFMSR 83 28#define OP_31_XOP_MFMSR 83
29#define OP_31_XOP_MTMSR 146 29#define OP_31_XOP_MTMSR 146
30#define OP_31_XOP_MTMSRD 178 30#define OP_31_XOP_MTMSRD 178
31#define OP_31_XOP_MTSR 210
31#define OP_31_XOP_MTSRIN 242 32#define OP_31_XOP_MTSRIN 242
32#define OP_31_XOP_TLBIEL 274 33#define OP_31_XOP_TLBIEL 274
33#define OP_31_XOP_TLBIE 306 34#define OP_31_XOP_TLBIE 306
34#define OP_31_XOP_SLBMTE 402 35#define OP_31_XOP_SLBMTE 402
35#define OP_31_XOP_SLBIE 434 36#define OP_31_XOP_SLBIE 434
36#define OP_31_XOP_SLBIA 498 37#define OP_31_XOP_SLBIA 498
38#define OP_31_XOP_MFSR 595
37#define OP_31_XOP_MFSRIN 659 39#define OP_31_XOP_MFSRIN 659
40#define OP_31_XOP_DCBA 758
38#define OP_31_XOP_SLBMFEV 851 41#define OP_31_XOP_SLBMFEV 851
39#define OP_31_XOP_EIOIO 854 42#define OP_31_XOP_EIOIO 854
40#define OP_31_XOP_SLBMFEE 915 43#define OP_31_XOP_SLBMFEE 915
@@ -42,6 +45,24 @@
42/* DCBZ is actually 1014, but we patch it to 1010 so we get a trap */ 45/* DCBZ is actually 1014, but we patch it to 1010 so we get a trap */
43#define OP_31_XOP_DCBZ 1010 46#define OP_31_XOP_DCBZ 1010
44 47
48#define OP_LFS 48
49#define OP_LFD 50
50#define OP_STFS 52
51#define OP_STFD 54
52
53#define SPRN_GQR0 912
54#define SPRN_GQR1 913
55#define SPRN_GQR2 914
56#define SPRN_GQR3 915
57#define SPRN_GQR4 916
58#define SPRN_GQR5 917
59#define SPRN_GQR6 918
60#define SPRN_GQR7 919
61
62/* Book3S_32 defines mfsrin(v) - but that messes up our abstract
63 * function pointers, so let's just disable the define. */
64#undef mfsrin
65
45int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 66int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
46 unsigned int inst, int *advance) 67 unsigned int inst, int *advance)
47{ 68{
@@ -52,7 +73,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
52 switch (get_xop(inst)) { 73 switch (get_xop(inst)) {
53 case OP_19_XOP_RFID: 74 case OP_19_XOP_RFID:
54 case OP_19_XOP_RFI: 75 case OP_19_XOP_RFI:
55 vcpu->arch.pc = vcpu->arch.srr0; 76 kvmppc_set_pc(vcpu, vcpu->arch.srr0);
56 kvmppc_set_msr(vcpu, vcpu->arch.srr1); 77 kvmppc_set_msr(vcpu, vcpu->arch.srr1);
57 *advance = 0; 78 *advance = 0;
58 break; 79 break;
@@ -80,6 +101,18 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
80 case OP_31_XOP_MTMSR: 101 case OP_31_XOP_MTMSR:
81 kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, get_rs(inst))); 102 kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, get_rs(inst)));
82 break; 103 break;
104 case OP_31_XOP_MFSR:
105 {
106 int srnum;
107
108 srnum = kvmppc_get_field(inst, 12 + 32, 15 + 32);
109 if (vcpu->arch.mmu.mfsrin) {
110 u32 sr;
111 sr = vcpu->arch.mmu.mfsrin(vcpu, srnum);
112 kvmppc_set_gpr(vcpu, get_rt(inst), sr);
113 }
114 break;
115 }
83 case OP_31_XOP_MFSRIN: 116 case OP_31_XOP_MFSRIN:
84 { 117 {
85 int srnum; 118 int srnum;
@@ -92,6 +125,11 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
92 } 125 }
93 break; 126 break;
94 } 127 }
128 case OP_31_XOP_MTSR:
129 vcpu->arch.mmu.mtsrin(vcpu,
130 (inst >> 16) & 0xf,
131 kvmppc_get_gpr(vcpu, get_rs(inst)));
132 break;
95 case OP_31_XOP_MTSRIN: 133 case OP_31_XOP_MTSRIN:
96 vcpu->arch.mmu.mtsrin(vcpu, 134 vcpu->arch.mmu.mtsrin(vcpu,
97 (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf, 135 (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf,
@@ -150,12 +188,17 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
150 kvmppc_set_gpr(vcpu, get_rt(inst), t); 188 kvmppc_set_gpr(vcpu, get_rt(inst), t);
151 } 189 }
152 break; 190 break;
191 case OP_31_XOP_DCBA:
192 /* Gets treated as NOP */
193 break;
153 case OP_31_XOP_DCBZ: 194 case OP_31_XOP_DCBZ:
154 { 195 {
155 ulong rb = kvmppc_get_gpr(vcpu, get_rb(inst)); 196 ulong rb = kvmppc_get_gpr(vcpu, get_rb(inst));
156 ulong ra = 0; 197 ulong ra = 0;
157 ulong addr; 198 ulong addr, vaddr;
158 u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; 199 u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
200 u32 dsisr;
201 int r;
159 202
160 if (get_ra(inst)) 203 if (get_ra(inst))
161 ra = kvmppc_get_gpr(vcpu, get_ra(inst)); 204 ra = kvmppc_get_gpr(vcpu, get_ra(inst));
@@ -163,15 +206,25 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
163 addr = (ra + rb) & ~31ULL; 206 addr = (ra + rb) & ~31ULL;
164 if (!(vcpu->arch.msr & MSR_SF)) 207 if (!(vcpu->arch.msr & MSR_SF))
165 addr &= 0xffffffff; 208 addr &= 0xffffffff;
209 vaddr = addr;
210
211 r = kvmppc_st(vcpu, &addr, 32, zeros, true);
212 if ((r == -ENOENT) || (r == -EPERM)) {
213 *advance = 0;
214 vcpu->arch.dear = vaddr;
215 to_svcpu(vcpu)->fault_dar = vaddr;
216
217 dsisr = DSISR_ISSTORE;
218 if (r == -ENOENT)
219 dsisr |= DSISR_NOHPTE;
220 else if (r == -EPERM)
221 dsisr |= DSISR_PROTFAULT;
222
223 to_book3s(vcpu)->dsisr = dsisr;
224 to_svcpu(vcpu)->fault_dsisr = dsisr;
166 225
167 if (kvmppc_st(vcpu, addr, 32, zeros)) {
168 vcpu->arch.dear = addr;
169 vcpu->arch.fault_dear = addr;
170 to_book3s(vcpu)->dsisr = DSISR_PROTFAULT |
171 DSISR_ISSTORE;
172 kvmppc_book3s_queue_irqprio(vcpu, 226 kvmppc_book3s_queue_irqprio(vcpu,
173 BOOK3S_INTERRUPT_DATA_STORAGE); 227 BOOK3S_INTERRUPT_DATA_STORAGE);
174 kvmppc_mmu_pte_flush(vcpu, addr, ~0xFFFULL);
175 } 228 }
176 229
177 break; 230 break;
@@ -184,6 +237,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
184 emulated = EMULATE_FAIL; 237 emulated = EMULATE_FAIL;
185 } 238 }
186 239
240 if (emulated == EMULATE_FAIL)
241 emulated = kvmppc_emulate_paired_single(run, vcpu);
242
187 return emulated; 243 return emulated;
188} 244}
189 245
@@ -207,6 +263,34 @@ void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper,
207 } 263 }
208} 264}
209 265
266static u32 kvmppc_read_bat(struct kvm_vcpu *vcpu, int sprn)
267{
268 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
269 struct kvmppc_bat *bat;
270
271 switch (sprn) {
272 case SPRN_IBAT0U ... SPRN_IBAT3L:
273 bat = &vcpu_book3s->ibat[(sprn - SPRN_IBAT0U) / 2];
274 break;
275 case SPRN_IBAT4U ... SPRN_IBAT7L:
276 bat = &vcpu_book3s->ibat[4 + ((sprn - SPRN_IBAT4U) / 2)];
277 break;
278 case SPRN_DBAT0U ... SPRN_DBAT3L:
279 bat = &vcpu_book3s->dbat[(sprn - SPRN_DBAT0U) / 2];
280 break;
281 case SPRN_DBAT4U ... SPRN_DBAT7L:
282 bat = &vcpu_book3s->dbat[4 + ((sprn - SPRN_DBAT4U) / 2)];
283 break;
284 default:
285 BUG();
286 }
287
288 if (sprn % 2)
289 return bat->raw >> 32;
290 else
291 return bat->raw;
292}
293
210static void kvmppc_write_bat(struct kvm_vcpu *vcpu, int sprn, u32 val) 294static void kvmppc_write_bat(struct kvm_vcpu *vcpu, int sprn, u32 val)
211{ 295{
212 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); 296 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
@@ -217,13 +301,13 @@ static void kvmppc_write_bat(struct kvm_vcpu *vcpu, int sprn, u32 val)
217 bat = &vcpu_book3s->ibat[(sprn - SPRN_IBAT0U) / 2]; 301 bat = &vcpu_book3s->ibat[(sprn - SPRN_IBAT0U) / 2];
218 break; 302 break;
219 case SPRN_IBAT4U ... SPRN_IBAT7L: 303 case SPRN_IBAT4U ... SPRN_IBAT7L:
220 bat = &vcpu_book3s->ibat[(sprn - SPRN_IBAT4U) / 2]; 304 bat = &vcpu_book3s->ibat[4 + ((sprn - SPRN_IBAT4U) / 2)];
221 break; 305 break;
222 case SPRN_DBAT0U ... SPRN_DBAT3L: 306 case SPRN_DBAT0U ... SPRN_DBAT3L:
223 bat = &vcpu_book3s->dbat[(sprn - SPRN_DBAT0U) / 2]; 307 bat = &vcpu_book3s->dbat[(sprn - SPRN_DBAT0U) / 2];
224 break; 308 break;
225 case SPRN_DBAT4U ... SPRN_DBAT7L: 309 case SPRN_DBAT4U ... SPRN_DBAT7L:
226 bat = &vcpu_book3s->dbat[(sprn - SPRN_DBAT4U) / 2]; 310 bat = &vcpu_book3s->dbat[4 + ((sprn - SPRN_DBAT4U) / 2)];
227 break; 311 break;
228 default: 312 default:
229 BUG(); 313 BUG();
@@ -258,6 +342,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
258 /* BAT writes happen so rarely that we're ok to flush 342 /* BAT writes happen so rarely that we're ok to flush
259 * everything here */ 343 * everything here */
260 kvmppc_mmu_pte_flush(vcpu, 0, 0); 344 kvmppc_mmu_pte_flush(vcpu, 0, 0);
345 kvmppc_mmu_flush_segments(vcpu);
261 break; 346 break;
262 case SPRN_HID0: 347 case SPRN_HID0:
263 to_book3s(vcpu)->hid[0] = spr_val; 348 to_book3s(vcpu)->hid[0] = spr_val;
@@ -268,7 +353,32 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
268 case SPRN_HID2: 353 case SPRN_HID2:
269 to_book3s(vcpu)->hid[2] = spr_val; 354 to_book3s(vcpu)->hid[2] = spr_val;
270 break; 355 break;
356 case SPRN_HID2_GEKKO:
357 to_book3s(vcpu)->hid[2] = spr_val;
358 /* HID2.PSE controls paired single on gekko */
359 switch (vcpu->arch.pvr) {
360 case 0x00080200: /* lonestar 2.0 */
361 case 0x00088202: /* lonestar 2.2 */
362 case 0x70000100: /* gekko 1.0 */
363 case 0x00080100: /* gekko 2.0 */
364 case 0x00083203: /* gekko 2.3a */
365 case 0x00083213: /* gekko 2.3b */
366 case 0x00083204: /* gekko 2.4 */
367 case 0x00083214: /* gekko 2.4e (8SE) - retail HW2 */
368 case 0x00087200: /* broadway */
369 if (vcpu->arch.hflags & BOOK3S_HFLAG_NATIVE_PS) {
370 /* Native paired singles */
371 } else if (spr_val & (1 << 29)) { /* HID2.PSE */
372 vcpu->arch.hflags |= BOOK3S_HFLAG_PAIRED_SINGLE;
373 kvmppc_giveup_ext(vcpu, MSR_FP);
374 } else {
375 vcpu->arch.hflags &= ~BOOK3S_HFLAG_PAIRED_SINGLE;
376 }
377 break;
378 }
379 break;
271 case SPRN_HID4: 380 case SPRN_HID4:
381 case SPRN_HID4_GEKKO:
272 to_book3s(vcpu)->hid[4] = spr_val; 382 to_book3s(vcpu)->hid[4] = spr_val;
273 break; 383 break;
274 case SPRN_HID5: 384 case SPRN_HID5:
@@ -278,12 +388,30 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
278 (mfmsr() & MSR_HV)) 388 (mfmsr() & MSR_HV))
279 vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; 389 vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
280 break; 390 break;
391 case SPRN_GQR0:
392 case SPRN_GQR1:
393 case SPRN_GQR2:
394 case SPRN_GQR3:
395 case SPRN_GQR4:
396 case SPRN_GQR5:
397 case SPRN_GQR6:
398 case SPRN_GQR7:
399 to_book3s(vcpu)->gqr[sprn - SPRN_GQR0] = spr_val;
400 break;
281 case SPRN_ICTC: 401 case SPRN_ICTC:
282 case SPRN_THRM1: 402 case SPRN_THRM1:
283 case SPRN_THRM2: 403 case SPRN_THRM2:
284 case SPRN_THRM3: 404 case SPRN_THRM3:
285 case SPRN_CTRLF: 405 case SPRN_CTRLF:
286 case SPRN_CTRLT: 406 case SPRN_CTRLT:
407 case SPRN_L2CR:
408 case SPRN_MMCR0_GEKKO:
409 case SPRN_MMCR1_GEKKO:
410 case SPRN_PMC1_GEKKO:
411 case SPRN_PMC2_GEKKO:
412 case SPRN_PMC3_GEKKO:
413 case SPRN_PMC4_GEKKO:
414 case SPRN_WPAR_GEKKO:
287 break; 415 break;
288 default: 416 default:
289 printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn); 417 printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn);
@@ -301,6 +429,12 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
301 int emulated = EMULATE_DONE; 429 int emulated = EMULATE_DONE;
302 430
303 switch (sprn) { 431 switch (sprn) {
432 case SPRN_IBAT0U ... SPRN_IBAT3L:
433 case SPRN_IBAT4U ... SPRN_IBAT7L:
434 case SPRN_DBAT0U ... SPRN_DBAT3L:
435 case SPRN_DBAT4U ... SPRN_DBAT7L:
436 kvmppc_set_gpr(vcpu, rt, kvmppc_read_bat(vcpu, sprn));
437 break;
304 case SPRN_SDR1: 438 case SPRN_SDR1:
305 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1); 439 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1);
306 break; 440 break;
@@ -320,19 +454,40 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
320 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[1]); 454 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[1]);
321 break; 455 break;
322 case SPRN_HID2: 456 case SPRN_HID2:
457 case SPRN_HID2_GEKKO:
323 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[2]); 458 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[2]);
324 break; 459 break;
325 case SPRN_HID4: 460 case SPRN_HID4:
461 case SPRN_HID4_GEKKO:
326 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[4]); 462 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[4]);
327 break; 463 break;
328 case SPRN_HID5: 464 case SPRN_HID5:
329 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]); 465 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]);
330 break; 466 break;
467 case SPRN_GQR0:
468 case SPRN_GQR1:
469 case SPRN_GQR2:
470 case SPRN_GQR3:
471 case SPRN_GQR4:
472 case SPRN_GQR5:
473 case SPRN_GQR6:
474 case SPRN_GQR7:
475 kvmppc_set_gpr(vcpu, rt,
476 to_book3s(vcpu)->gqr[sprn - SPRN_GQR0]);
477 break;
331 case SPRN_THRM1: 478 case SPRN_THRM1:
332 case SPRN_THRM2: 479 case SPRN_THRM2:
333 case SPRN_THRM3: 480 case SPRN_THRM3:
334 case SPRN_CTRLF: 481 case SPRN_CTRLF:
335 case SPRN_CTRLT: 482 case SPRN_CTRLT:
483 case SPRN_L2CR:
484 case SPRN_MMCR0_GEKKO:
485 case SPRN_MMCR1_GEKKO:
486 case SPRN_PMC1_GEKKO:
487 case SPRN_PMC2_GEKKO:
488 case SPRN_PMC3_GEKKO:
489 case SPRN_PMC4_GEKKO:
490 case SPRN_WPAR_GEKKO:
336 kvmppc_set_gpr(vcpu, rt, 0); 491 kvmppc_set_gpr(vcpu, rt, 0);
337 break; 492 break;
338 default: 493 default:
@@ -346,3 +501,73 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
346 return emulated; 501 return emulated;
347} 502}
348 503
504u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst)
505{
506 u32 dsisr = 0;
507
508 /*
509 * This is what the spec says about DSISR bits (not mentioned = 0):
510 *
511 * 12:13 [DS] Set to bits 30:31
512 * 15:16 [X] Set to bits 29:30
513 * 17 [X] Set to bit 25
514 * [D/DS] Set to bit 5
515 * 18:21 [X] Set to bits 21:24
516 * [D/DS] Set to bits 1:4
517 * 22:26 Set to bits 6:10 (RT/RS/FRT/FRS)
518 * 27:31 Set to bits 11:15 (RA)
519 */
520
521 switch (get_op(inst)) {
522 /* D-form */
523 case OP_LFS:
524 case OP_LFD:
525 case OP_STFD:
526 case OP_STFS:
527 dsisr |= (inst >> 12) & 0x4000; /* bit 17 */
528 dsisr |= (inst >> 17) & 0x3c00; /* bits 18:21 */
529 break;
530 /* X-form */
531 case 31:
532 dsisr |= (inst << 14) & 0x18000; /* bits 15:16 */
533 dsisr |= (inst << 8) & 0x04000; /* bit 17 */
534 dsisr |= (inst << 3) & 0x03c00; /* bits 18:21 */
535 break;
536 default:
537 printk(KERN_INFO "KVM: Unaligned instruction 0x%x\n", inst);
538 break;
539 }
540
541 dsisr |= (inst >> 16) & 0x03ff; /* bits 22:31 */
542
543 return dsisr;
544}
545
546ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst)
547{
548 ulong dar = 0;
549 ulong ra;
550
551 switch (get_op(inst)) {
552 case OP_LFS:
553 case OP_LFD:
554 case OP_STFD:
555 case OP_STFS:
556 ra = get_ra(inst);
557 if (ra)
558 dar = kvmppc_get_gpr(vcpu, ra);
559 dar += (s32)((s16)inst);
560 break;
561 case 31:
562 ra = get_ra(inst);
563 if (ra)
564 dar = kvmppc_get_gpr(vcpu, ra);
565 dar += kvmppc_get_gpr(vcpu, get_rb(inst));
566 break;
567 default:
568 printk(KERN_INFO "KVM: Unaligned instruction 0x%x\n", inst);
569 break;
570 }
571
572 return dar;
573}
diff --git a/arch/powerpc/kvm/book3s_64_exports.c b/arch/powerpc/kvm/book3s_exports.c
index 1dd5a1ddfd0d..1dd5a1ddfd0d 100644
--- a/arch/powerpc/kvm/book3s_64_exports.c
+++ b/arch/powerpc/kvm/book3s_exports.c
diff --git a/arch/powerpc/kvm/book3s_64_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
index c1584d0cbce8..2f0bc928b08a 100644
--- a/arch/powerpc/kvm/book3s_64_interrupts.S
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -24,36 +24,56 @@
24#include <asm/asm-offsets.h> 24#include <asm/asm-offsets.h>
25#include <asm/exception-64s.h> 25#include <asm/exception-64s.h>
26 26
27#define KVMPPC_HANDLE_EXIT .kvmppc_handle_exit 27#if defined(CONFIG_PPC_BOOK3S_64)
28#define ULONG_SIZE 8
29#define VCPU_GPR(n) (VCPU_GPRS + (n * ULONG_SIZE))
30 28
31.macro DISABLE_INTERRUPTS 29#define ULONG_SIZE 8
32 mfmsr r0 30#define FUNC(name) GLUE(.,name)
33 rldicl r0,r0,48,1
34 rotldi r0,r0,16
35 mtmsrd r0,1
36.endm
37 31
32#define GET_SHADOW_VCPU(reg) \
33 addi reg, r13, PACA_KVM_SVCPU
34
35#define DISABLE_INTERRUPTS \
36 mfmsr r0; \
37 rldicl r0,r0,48,1; \
38 rotldi r0,r0,16; \
39 mtmsrd r0,1; \
40
41#elif defined(CONFIG_PPC_BOOK3S_32)
42
43#define ULONG_SIZE 4
44#define FUNC(name) name
45
46#define GET_SHADOW_VCPU(reg) \
47 lwz reg, (THREAD + THREAD_KVM_SVCPU)(r2)
48
49#define DISABLE_INTERRUPTS \
50 mfmsr r0; \
51 rlwinm r0,r0,0,17,15; \
52 mtmsr r0; \
53
54#endif /* CONFIG_PPC_BOOK3S_XX */
55
56
57#define VCPU_GPR(n) (VCPU_GPRS + (n * ULONG_SIZE))
38#define VCPU_LOAD_NVGPRS(vcpu) \ 58#define VCPU_LOAD_NVGPRS(vcpu) \
39 ld r14, VCPU_GPR(r14)(vcpu); \ 59 PPC_LL r14, VCPU_GPR(r14)(vcpu); \
40 ld r15, VCPU_GPR(r15)(vcpu); \ 60 PPC_LL r15, VCPU_GPR(r15)(vcpu); \
41 ld r16, VCPU_GPR(r16)(vcpu); \ 61 PPC_LL r16, VCPU_GPR(r16)(vcpu); \
42 ld r17, VCPU_GPR(r17)(vcpu); \ 62 PPC_LL r17, VCPU_GPR(r17)(vcpu); \
43 ld r18, VCPU_GPR(r18)(vcpu); \ 63 PPC_LL r18, VCPU_GPR(r18)(vcpu); \
44 ld r19, VCPU_GPR(r19)(vcpu); \ 64 PPC_LL r19, VCPU_GPR(r19)(vcpu); \
45 ld r20, VCPU_GPR(r20)(vcpu); \ 65 PPC_LL r20, VCPU_GPR(r20)(vcpu); \
46 ld r21, VCPU_GPR(r21)(vcpu); \ 66 PPC_LL r21, VCPU_GPR(r21)(vcpu); \
47 ld r22, VCPU_GPR(r22)(vcpu); \ 67 PPC_LL r22, VCPU_GPR(r22)(vcpu); \
48 ld r23, VCPU_GPR(r23)(vcpu); \ 68 PPC_LL r23, VCPU_GPR(r23)(vcpu); \
49 ld r24, VCPU_GPR(r24)(vcpu); \ 69 PPC_LL r24, VCPU_GPR(r24)(vcpu); \
50 ld r25, VCPU_GPR(r25)(vcpu); \ 70 PPC_LL r25, VCPU_GPR(r25)(vcpu); \
51 ld r26, VCPU_GPR(r26)(vcpu); \ 71 PPC_LL r26, VCPU_GPR(r26)(vcpu); \
52 ld r27, VCPU_GPR(r27)(vcpu); \ 72 PPC_LL r27, VCPU_GPR(r27)(vcpu); \
53 ld r28, VCPU_GPR(r28)(vcpu); \ 73 PPC_LL r28, VCPU_GPR(r28)(vcpu); \
54 ld r29, VCPU_GPR(r29)(vcpu); \ 74 PPC_LL r29, VCPU_GPR(r29)(vcpu); \
55 ld r30, VCPU_GPR(r30)(vcpu); \ 75 PPC_LL r30, VCPU_GPR(r30)(vcpu); \
56 ld r31, VCPU_GPR(r31)(vcpu); \ 76 PPC_LL r31, VCPU_GPR(r31)(vcpu); \
57 77
58/***************************************************************************** 78/*****************************************************************************
59 * * 79 * *
@@ -69,11 +89,11 @@ _GLOBAL(__kvmppc_vcpu_entry)
69 89
70kvm_start_entry: 90kvm_start_entry:
71 /* Write correct stack frame */ 91 /* Write correct stack frame */
72 mflr r0 92 mflr r0
73 std r0,16(r1) 93 PPC_STL r0,PPC_LR_STKOFF(r1)
74 94
75 /* Save host state to the stack */ 95 /* Save host state to the stack */
76 stdu r1, -SWITCH_FRAME_SIZE(r1) 96 PPC_STLU r1, -SWITCH_FRAME_SIZE(r1)
77 97
78 /* Save r3 (kvm_run) and r4 (vcpu) */ 98 /* Save r3 (kvm_run) and r4 (vcpu) */
79 SAVE_2GPRS(3, r1) 99 SAVE_2GPRS(3, r1)
@@ -82,33 +102,28 @@ kvm_start_entry:
82 SAVE_NVGPRS(r1) 102 SAVE_NVGPRS(r1)
83 103
84 /* Save LR */ 104 /* Save LR */
85 std r0, _LINK(r1) 105 PPC_STL r0, _LINK(r1)
86 106
87 /* Load non-volatile guest state from the vcpu */ 107 /* Load non-volatile guest state from the vcpu */
88 VCPU_LOAD_NVGPRS(r4) 108 VCPU_LOAD_NVGPRS(r4)
89 109
110 GET_SHADOW_VCPU(r5)
111
90 /* Save R1/R2 in the PACA */ 112 /* Save R1/R2 in the PACA */
91 std r1, PACA_KVM_HOST_R1(r13) 113 PPC_STL r1, SVCPU_HOST_R1(r5)
92 std r2, PACA_KVM_HOST_R2(r13) 114 PPC_STL r2, SVCPU_HOST_R2(r5)
93 115
94 /* XXX swap in/out on load? */ 116 /* XXX swap in/out on load? */
95 ld r3, VCPU_HIGHMEM_HANDLER(r4) 117 PPC_LL r3, VCPU_HIGHMEM_HANDLER(r4)
96 std r3, PACA_KVM_VMHANDLER(r13) 118 PPC_STL r3, SVCPU_VMHANDLER(r5)
97 119
98kvm_start_lightweight: 120kvm_start_lightweight:
99 121
100 ld r9, VCPU_PC(r4) /* r9 = vcpu->arch.pc */ 122 PPC_LL r10, VCPU_SHADOW_MSR(r4) /* r10 = vcpu->arch.shadow_msr */
101 ld r10, VCPU_SHADOW_MSR(r4) /* r10 = vcpu->arch.shadow_msr */
102
103 /* Load some guest state in the respective registers */
104 ld r5, VCPU_CTR(r4) /* r5 = vcpu->arch.ctr */
105 /* will be swapped in by rmcall */
106
107 ld r3, VCPU_LR(r4) /* r3 = vcpu->arch.lr */
108 mtlr r3 /* LR = r3 */
109 123
110 DISABLE_INTERRUPTS 124 DISABLE_INTERRUPTS
111 125
126#ifdef CONFIG_PPC_BOOK3S_64
112 /* Some guests may need to have dcbz set to 32 byte length. 127 /* Some guests may need to have dcbz set to 32 byte length.
113 * 128 *
114 * Usually we ensure that by patching the guest's instructions 129 * Usually we ensure that by patching the guest's instructions
@@ -118,7 +133,7 @@ kvm_start_lightweight:
118 * because that's a lot faster. 133 * because that's a lot faster.
119 */ 134 */
120 135
121 ld r3, VCPU_HFLAGS(r4) 136 PPC_LL r3, VCPU_HFLAGS(r4)
122 rldicl. r3, r3, 0, 63 /* CR = ((r3 & 1) == 0) */ 137 rldicl. r3, r3, 0, 63 /* CR = ((r3 & 1) == 0) */
123 beq no_dcbz32_on 138 beq no_dcbz32_on
124 139
@@ -128,13 +143,15 @@ kvm_start_lightweight:
128 143
129no_dcbz32_on: 144no_dcbz32_on:
130 145
131 ld r6, VCPU_RMCALL(r4) 146#endif /* CONFIG_PPC_BOOK3S_64 */
147
148 PPC_LL r6, VCPU_RMCALL(r4)
132 mtctr r6 149 mtctr r6
133 150
134 ld r3, VCPU_TRAMPOLINE_ENTER(r4) 151 PPC_LL r3, VCPU_TRAMPOLINE_ENTER(r4)
135 LOAD_REG_IMMEDIATE(r4, MSR_KERNEL & ~(MSR_IR | MSR_DR)) 152 LOAD_REG_IMMEDIATE(r4, MSR_KERNEL & ~(MSR_IR | MSR_DR))
136 153
137 /* Jump to SLB patching handlder and into our guest */ 154 /* Jump to segment patching handler and into our guest */
138 bctr 155 bctr
139 156
140/* 157/*
@@ -149,31 +166,20 @@ kvmppc_handler_highmem:
149 /* 166 /*
150 * Register usage at this point: 167 * Register usage at this point:
151 * 168 *
152 * R0 = guest last inst 169 * R1 = host R1
153 * R1 = host R1 170 * R2 = host R2
154 * R2 = host R2 171 * R12 = exit handler id
155 * R3 = guest PC 172 * R13 = PACA
156 * R4 = guest MSR 173 * SVCPU.* = guest *
157 * R5 = guest DAR
158 * R6 = guest DSISR
159 * R13 = PACA
160 * PACA.KVM.* = guest *
161 * 174 *
162 */ 175 */
163 176
164 /* R7 = vcpu */ 177 /* R7 = vcpu */
165 ld r7, GPR4(r1) 178 PPC_LL r7, GPR4(r1)
166 179
167 /* Now save the guest state */ 180#ifdef CONFIG_PPC_BOOK3S_64
168 181
169 stw r0, VCPU_LAST_INST(r7) 182 PPC_LL r5, VCPU_HFLAGS(r7)
170
171 std r3, VCPU_PC(r7)
172 std r4, VCPU_SHADOW_SRR1(r7)
173 std r5, VCPU_FAULT_DEAR(r7)
174 std r6, VCPU_FAULT_DSISR(r7)
175
176 ld r5, VCPU_HFLAGS(r7)
177 rldicl. r5, r5, 0, 63 /* CR = ((r5 & 1) == 0) */ 183 rldicl. r5, r5, 0, 63 /* CR = ((r5 & 1) == 0) */
178 beq no_dcbz32_off 184 beq no_dcbz32_off
179 185
@@ -184,35 +190,29 @@ kvmppc_handler_highmem:
184 190
185no_dcbz32_off: 191no_dcbz32_off:
186 192
187 std r14, VCPU_GPR(r14)(r7) 193#endif /* CONFIG_PPC_BOOK3S_64 */
188 std r15, VCPU_GPR(r15)(r7) 194
189 std r16, VCPU_GPR(r16)(r7) 195 PPC_STL r14, VCPU_GPR(r14)(r7)
190 std r17, VCPU_GPR(r17)(r7) 196 PPC_STL r15, VCPU_GPR(r15)(r7)
191 std r18, VCPU_GPR(r18)(r7) 197 PPC_STL r16, VCPU_GPR(r16)(r7)
192 std r19, VCPU_GPR(r19)(r7) 198 PPC_STL r17, VCPU_GPR(r17)(r7)
193 std r20, VCPU_GPR(r20)(r7) 199 PPC_STL r18, VCPU_GPR(r18)(r7)
194 std r21, VCPU_GPR(r21)(r7) 200 PPC_STL r19, VCPU_GPR(r19)(r7)
195 std r22, VCPU_GPR(r22)(r7) 201 PPC_STL r20, VCPU_GPR(r20)(r7)
196 std r23, VCPU_GPR(r23)(r7) 202 PPC_STL r21, VCPU_GPR(r21)(r7)
197 std r24, VCPU_GPR(r24)(r7) 203 PPC_STL r22, VCPU_GPR(r22)(r7)
198 std r25, VCPU_GPR(r25)(r7) 204 PPC_STL r23, VCPU_GPR(r23)(r7)
199 std r26, VCPU_GPR(r26)(r7) 205 PPC_STL r24, VCPU_GPR(r24)(r7)
200 std r27, VCPU_GPR(r27)(r7) 206 PPC_STL r25, VCPU_GPR(r25)(r7)
201 std r28, VCPU_GPR(r28)(r7) 207 PPC_STL r26, VCPU_GPR(r26)(r7)
202 std r29, VCPU_GPR(r29)(r7) 208 PPC_STL r27, VCPU_GPR(r27)(r7)
203 std r30, VCPU_GPR(r30)(r7) 209 PPC_STL r28, VCPU_GPR(r28)(r7)
204 std r31, VCPU_GPR(r31)(r7) 210 PPC_STL r29, VCPU_GPR(r29)(r7)
205 211 PPC_STL r30, VCPU_GPR(r30)(r7)
206 /* Save guest CTR */ 212 PPC_STL r31, VCPU_GPR(r31)(r7)
207 mfctr r5
208 std r5, VCPU_CTR(r7)
209
210 /* Save guest LR */
211 mflr r5
212 std r5, VCPU_LR(r7)
213 213
214 /* Restore host msr -> SRR1 */ 214 /* Restore host msr -> SRR1 */
215 ld r6, VCPU_HOST_MSR(r7) 215 PPC_LL r6, VCPU_HOST_MSR(r7)
216 216
217 /* 217 /*
218 * For some interrupts, we need to call the real Linux 218 * For some interrupts, we need to call the real Linux
@@ -228,9 +228,12 @@ no_dcbz32_off:
228 beq call_linux_handler 228 beq call_linux_handler
229 cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER 229 cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER
230 beq call_linux_handler 230 beq call_linux_handler
231 cmpwi r12, BOOK3S_INTERRUPT_PERFMON
232 beq call_linux_handler
231 233
232 /* Back to EE=1 */ 234 /* Back to EE=1 */
233 mtmsr r6 235 mtmsr r6
236 sync
234 b kvm_return_point 237 b kvm_return_point
235 238
236call_linux_handler: 239call_linux_handler:
@@ -249,14 +252,14 @@ call_linux_handler:
249 */ 252 */
250 253
251 /* Restore host IP -> SRR0 */ 254 /* Restore host IP -> SRR0 */
252 ld r5, VCPU_HOST_RETIP(r7) 255 PPC_LL r5, VCPU_HOST_RETIP(r7)
253 256
254 /* XXX Better move to a safe function? 257 /* XXX Better move to a safe function?
255 * What if we get an HTAB flush in between mtsrr0 and mtsrr1? */ 258 * What if we get an HTAB flush in between mtsrr0 and mtsrr1? */
256 259
257 mtlr r12 260 mtlr r12
258 261
259 ld r4, VCPU_TRAMPOLINE_LOWMEM(r7) 262 PPC_LL r4, VCPU_TRAMPOLINE_LOWMEM(r7)
260 mtsrr0 r4 263 mtsrr0 r4
261 LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR)) 264 LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR))
262 mtsrr1 r3 265 mtsrr1 r3
@@ -274,7 +277,7 @@ kvm_return_point:
274 277
275 /* Restore r3 (kvm_run) and r4 (vcpu) */ 278 /* Restore r3 (kvm_run) and r4 (vcpu) */
276 REST_2GPRS(3, r1) 279 REST_2GPRS(3, r1)
277 bl KVMPPC_HANDLE_EXIT 280 bl FUNC(kvmppc_handle_exit)
278 281
279 /* If RESUME_GUEST, get back in the loop */ 282 /* If RESUME_GUEST, get back in the loop */
280 cmpwi r3, RESUME_GUEST 283 cmpwi r3, RESUME_GUEST
@@ -285,7 +288,7 @@ kvm_return_point:
285 288
286kvm_exit_loop: 289kvm_exit_loop:
287 290
288 ld r4, _LINK(r1) 291 PPC_LL r4, _LINK(r1)
289 mtlr r4 292 mtlr r4
290 293
291 /* Restore non-volatile host registers (r14 - r31) */ 294 /* Restore non-volatile host registers (r14 - r31) */
@@ -296,8 +299,8 @@ kvm_exit_loop:
296 299
297kvm_loop_heavyweight: 300kvm_loop_heavyweight:
298 301
299 ld r4, _LINK(r1) 302 PPC_LL r4, _LINK(r1)
300 std r4, (16 + SWITCH_FRAME_SIZE)(r1) 303 PPC_STL r4, (PPC_LR_STKOFF + SWITCH_FRAME_SIZE)(r1)
301 304
302 /* Load vcpu and cpu_run */ 305 /* Load vcpu and cpu_run */
303 REST_2GPRS(3, r1) 306 REST_2GPRS(3, r1)
@@ -315,4 +318,3 @@ kvm_loop_lightweight:
315 318
316 /* Jump back into the beginning of this function */ 319 /* Jump back into the beginning of this function */
317 b kvm_start_lightweight 320 b kvm_start_lightweight
318
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c
new file mode 100644
index 000000000000..a9f66abafcb3
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_paired_singles.c
@@ -0,0 +1,1289 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright Novell Inc 2010
16 *
17 * Authors: Alexander Graf <agraf@suse.de>
18 */
19
20#include <asm/kvm.h>
21#include <asm/kvm_ppc.h>
22#include <asm/disassemble.h>
23#include <asm/kvm_book3s.h>
24#include <asm/kvm_fpu.h>
25#include <asm/reg.h>
26#include <asm/cacheflush.h>
27#include <linux/vmalloc.h>
28
29/* #define DEBUG */
30
31#ifdef DEBUG
32#define dprintk printk
33#else
34#define dprintk(...) do { } while(0);
35#endif
36
37#define OP_LFS 48
38#define OP_LFSU 49
39#define OP_LFD 50
40#define OP_LFDU 51
41#define OP_STFS 52
42#define OP_STFSU 53
43#define OP_STFD 54
44#define OP_STFDU 55
45#define OP_PSQ_L 56
46#define OP_PSQ_LU 57
47#define OP_PSQ_ST 60
48#define OP_PSQ_STU 61
49
50#define OP_31_LFSX 535
51#define OP_31_LFSUX 567
52#define OP_31_LFDX 599
53#define OP_31_LFDUX 631
54#define OP_31_STFSX 663
55#define OP_31_STFSUX 695
56#define OP_31_STFX 727
57#define OP_31_STFUX 759
58#define OP_31_LWIZX 887
59#define OP_31_STFIWX 983
60
61#define OP_59_FADDS 21
62#define OP_59_FSUBS 20
63#define OP_59_FSQRTS 22
64#define OP_59_FDIVS 18
65#define OP_59_FRES 24
66#define OP_59_FMULS 25
67#define OP_59_FRSQRTES 26
68#define OP_59_FMSUBS 28
69#define OP_59_FMADDS 29
70#define OP_59_FNMSUBS 30
71#define OP_59_FNMADDS 31
72
73#define OP_63_FCMPU 0
74#define OP_63_FCPSGN 8
75#define OP_63_FRSP 12
76#define OP_63_FCTIW 14
77#define OP_63_FCTIWZ 15
78#define OP_63_FDIV 18
79#define OP_63_FADD 21
80#define OP_63_FSQRT 22
81#define OP_63_FSEL 23
82#define OP_63_FRE 24
83#define OP_63_FMUL 25
84#define OP_63_FRSQRTE 26
85#define OP_63_FMSUB 28
86#define OP_63_FMADD 29
87#define OP_63_FNMSUB 30
88#define OP_63_FNMADD 31
89#define OP_63_FCMPO 32
90#define OP_63_MTFSB1 38 // XXX
91#define OP_63_FSUB 20
92#define OP_63_FNEG 40
93#define OP_63_MCRFS 64
94#define OP_63_MTFSB0 70
95#define OP_63_FMR 72
96#define OP_63_MTFSFI 134
97#define OP_63_FABS 264
98#define OP_63_MFFS 583
99#define OP_63_MTFSF 711
100
101#define OP_4X_PS_CMPU0 0
102#define OP_4X_PSQ_LX 6
103#define OP_4XW_PSQ_STX 7
104#define OP_4A_PS_SUM0 10
105#define OP_4A_PS_SUM1 11
106#define OP_4A_PS_MULS0 12
107#define OP_4A_PS_MULS1 13
108#define OP_4A_PS_MADDS0 14
109#define OP_4A_PS_MADDS1 15
110#define OP_4A_PS_DIV 18
111#define OP_4A_PS_SUB 20
112#define OP_4A_PS_ADD 21
113#define OP_4A_PS_SEL 23
114#define OP_4A_PS_RES 24
115#define OP_4A_PS_MUL 25
116#define OP_4A_PS_RSQRTE 26
117#define OP_4A_PS_MSUB 28
118#define OP_4A_PS_MADD 29
119#define OP_4A_PS_NMSUB 30
120#define OP_4A_PS_NMADD 31
121#define OP_4X_PS_CMPO0 32
122#define OP_4X_PSQ_LUX 38
123#define OP_4XW_PSQ_STUX 39
124#define OP_4X_PS_NEG 40
125#define OP_4X_PS_CMPU1 64
126#define OP_4X_PS_MR 72
127#define OP_4X_PS_CMPO1 96
128#define OP_4X_PS_NABS 136
129#define OP_4X_PS_ABS 264
130#define OP_4X_PS_MERGE00 528
131#define OP_4X_PS_MERGE01 560
132#define OP_4X_PS_MERGE10 592
133#define OP_4X_PS_MERGE11 624
134
135#define SCALAR_NONE 0
136#define SCALAR_HIGH (1 << 0)
137#define SCALAR_LOW (1 << 1)
138#define SCALAR_NO_PS0 (1 << 2)
139#define SCALAR_NO_PS1 (1 << 3)
140
141#define GQR_ST_TYPE_MASK 0x00000007
142#define GQR_ST_TYPE_SHIFT 0
143#define GQR_ST_SCALE_MASK 0x00003f00
144#define GQR_ST_SCALE_SHIFT 8
145#define GQR_LD_TYPE_MASK 0x00070000
146#define GQR_LD_TYPE_SHIFT 16
147#define GQR_LD_SCALE_MASK 0x3f000000
148#define GQR_LD_SCALE_SHIFT 24
149
150#define GQR_QUANTIZE_FLOAT 0
151#define GQR_QUANTIZE_U8 4
152#define GQR_QUANTIZE_U16 5
153#define GQR_QUANTIZE_S8 6
154#define GQR_QUANTIZE_S16 7
155
156#define FPU_LS_SINGLE 0
157#define FPU_LS_DOUBLE 1
158#define FPU_LS_SINGLE_LOW 2
159
160static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt)
161{
162 struct thread_struct t;
163
164 t.fpscr.val = vcpu->arch.fpscr;
165 cvt_df((double*)&vcpu->arch.fpr[rt], (float*)&vcpu->arch.qpr[rt], &t);
166}
167
168static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store)
169{
170 u64 dsisr;
171
172 vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 33, 36, 0);
173 vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 42, 47, 0);
174 vcpu->arch.dear = eaddr;
175 /* Page Fault */
176 dsisr = kvmppc_set_field(0, 33, 33, 1);
177 if (is_store)
178 to_book3s(vcpu)->dsisr = kvmppc_set_field(dsisr, 38, 38, 1);
179 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE);
180}
181
182static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
183 int rs, ulong addr, int ls_type)
184{
185 int emulated = EMULATE_FAIL;
186 struct thread_struct t;
187 int r;
188 char tmp[8];
189 int len = sizeof(u32);
190
191 if (ls_type == FPU_LS_DOUBLE)
192 len = sizeof(u64);
193
194 t.fpscr.val = vcpu->arch.fpscr;
195
196 /* read from memory */
197 r = kvmppc_ld(vcpu, &addr, len, tmp, true);
198 vcpu->arch.paddr_accessed = addr;
199
200 if (r < 0) {
201 kvmppc_inject_pf(vcpu, addr, false);
202 goto done_load;
203 } else if (r == EMULATE_DO_MMIO) {
204 emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FPR | rs, len, 1);
205 goto done_load;
206 }
207
208 emulated = EMULATE_DONE;
209
210 /* put in registers */
211 switch (ls_type) {
212 case FPU_LS_SINGLE:
213 cvt_fd((float*)tmp, (double*)&vcpu->arch.fpr[rs], &t);
214 vcpu->arch.qpr[rs] = *((u32*)tmp);
215 break;
216 case FPU_LS_DOUBLE:
217 vcpu->arch.fpr[rs] = *((u64*)tmp);
218 break;
219 }
220
221 dprintk(KERN_INFO "KVM: FPR_LD [0x%llx] at 0x%lx (%d)\n", *(u64*)tmp,
222 addr, len);
223
224done_load:
225 return emulated;
226}
227
228static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
229 int rs, ulong addr, int ls_type)
230{
231 int emulated = EMULATE_FAIL;
232 struct thread_struct t;
233 int r;
234 char tmp[8];
235 u64 val;
236 int len;
237
238 t.fpscr.val = vcpu->arch.fpscr;
239
240 switch (ls_type) {
241 case FPU_LS_SINGLE:
242 cvt_df((double*)&vcpu->arch.fpr[rs], (float*)tmp, &t);
243 val = *((u32*)tmp);
244 len = sizeof(u32);
245 break;
246 case FPU_LS_SINGLE_LOW:
247 *((u32*)tmp) = vcpu->arch.fpr[rs];
248 val = vcpu->arch.fpr[rs] & 0xffffffff;
249 len = sizeof(u32);
250 break;
251 case FPU_LS_DOUBLE:
252 *((u64*)tmp) = vcpu->arch.fpr[rs];
253 val = vcpu->arch.fpr[rs];
254 len = sizeof(u64);
255 break;
256 default:
257 val = 0;
258 len = 0;
259 }
260
261 r = kvmppc_st(vcpu, &addr, len, tmp, true);
262 vcpu->arch.paddr_accessed = addr;
263 if (r < 0) {
264 kvmppc_inject_pf(vcpu, addr, true);
265 } else if (r == EMULATE_DO_MMIO) {
266 emulated = kvmppc_handle_store(run, vcpu, val, len, 1);
267 } else {
268 emulated = EMULATE_DONE;
269 }
270
271 dprintk(KERN_INFO "KVM: FPR_ST [0x%llx] at 0x%lx (%d)\n",
272 val, addr, len);
273
274 return emulated;
275}
276
277static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
278 int rs, ulong addr, bool w, int i)
279{
280 int emulated = EMULATE_FAIL;
281 struct thread_struct t;
282 int r;
283 float one = 1.0;
284 u32 tmp[2];
285
286 t.fpscr.val = vcpu->arch.fpscr;
287
288 /* read from memory */
289 if (w) {
290 r = kvmppc_ld(vcpu, &addr, sizeof(u32), tmp, true);
291 memcpy(&tmp[1], &one, sizeof(u32));
292 } else {
293 r = kvmppc_ld(vcpu, &addr, sizeof(u32) * 2, tmp, true);
294 }
295 vcpu->arch.paddr_accessed = addr;
296 if (r < 0) {
297 kvmppc_inject_pf(vcpu, addr, false);
298 goto done_load;
299 } else if ((r == EMULATE_DO_MMIO) && w) {
300 emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FPR | rs, 4, 1);
301 vcpu->arch.qpr[rs] = tmp[1];
302 goto done_load;
303 } else if (r == EMULATE_DO_MMIO) {
304 emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FQPR | rs, 8, 1);
305 goto done_load;
306 }
307
308 emulated = EMULATE_DONE;
309
310 /* put in registers */
311 cvt_fd((float*)&tmp[0], (double*)&vcpu->arch.fpr[rs], &t);
312 vcpu->arch.qpr[rs] = tmp[1];
313
314 dprintk(KERN_INFO "KVM: PSQ_LD [0x%x, 0x%x] at 0x%lx (%d)\n", tmp[0],
315 tmp[1], addr, w ? 4 : 8);
316
317done_load:
318 return emulated;
319}
320
321static int kvmppc_emulate_psq_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
322 int rs, ulong addr, bool w, int i)
323{
324 int emulated = EMULATE_FAIL;
325 struct thread_struct t;
326 int r;
327 u32 tmp[2];
328 int len = w ? sizeof(u32) : sizeof(u64);
329
330 t.fpscr.val = vcpu->arch.fpscr;
331
332 cvt_df((double*)&vcpu->arch.fpr[rs], (float*)&tmp[0], &t);
333 tmp[1] = vcpu->arch.qpr[rs];
334
335 r = kvmppc_st(vcpu, &addr, len, tmp, true);
336 vcpu->arch.paddr_accessed = addr;
337 if (r < 0) {
338 kvmppc_inject_pf(vcpu, addr, true);
339 } else if ((r == EMULATE_DO_MMIO) && w) {
340 emulated = kvmppc_handle_store(run, vcpu, tmp[0], 4, 1);
341 } else if (r == EMULATE_DO_MMIO) {
342 u64 val = ((u64)tmp[0] << 32) | tmp[1];
343 emulated = kvmppc_handle_store(run, vcpu, val, 8, 1);
344 } else {
345 emulated = EMULATE_DONE;
346 }
347
348 dprintk(KERN_INFO "KVM: PSQ_ST [0x%x, 0x%x] at 0x%lx (%d)\n",
349 tmp[0], tmp[1], addr, len);
350
351 return emulated;
352}
353
354/*
355 * Cuts out inst bits with ordering according to spec.
356 * That means the leftmost bit is zero. All given bits are included.
357 */
358static inline u32 inst_get_field(u32 inst, int msb, int lsb)
359{
360 return kvmppc_get_field(inst, msb + 32, lsb + 32);
361}
362
363/*
364 * Replaces inst bits with ordering according to spec.
365 */
366static inline u32 inst_set_field(u32 inst, int msb, int lsb, int value)
367{
368 return kvmppc_set_field(inst, msb + 32, lsb + 32, value);
369}
370
371bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst)
372{
373 if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
374 return false;
375
376 switch (get_op(inst)) {
377 case OP_PSQ_L:
378 case OP_PSQ_LU:
379 case OP_PSQ_ST:
380 case OP_PSQ_STU:
381 case OP_LFS:
382 case OP_LFSU:
383 case OP_LFD:
384 case OP_LFDU:
385 case OP_STFS:
386 case OP_STFSU:
387 case OP_STFD:
388 case OP_STFDU:
389 return true;
390 case 4:
391 /* X form */
392 switch (inst_get_field(inst, 21, 30)) {
393 case OP_4X_PS_CMPU0:
394 case OP_4X_PSQ_LX:
395 case OP_4X_PS_CMPO0:
396 case OP_4X_PSQ_LUX:
397 case OP_4X_PS_NEG:
398 case OP_4X_PS_CMPU1:
399 case OP_4X_PS_MR:
400 case OP_4X_PS_CMPO1:
401 case OP_4X_PS_NABS:
402 case OP_4X_PS_ABS:
403 case OP_4X_PS_MERGE00:
404 case OP_4X_PS_MERGE01:
405 case OP_4X_PS_MERGE10:
406 case OP_4X_PS_MERGE11:
407 return true;
408 }
409 /* XW form */
410 switch (inst_get_field(inst, 25, 30)) {
411 case OP_4XW_PSQ_STX:
412 case OP_4XW_PSQ_STUX:
413 return true;
414 }
415 /* A form */
416 switch (inst_get_field(inst, 26, 30)) {
417 case OP_4A_PS_SUM1:
418 case OP_4A_PS_SUM0:
419 case OP_4A_PS_MULS0:
420 case OP_4A_PS_MULS1:
421 case OP_4A_PS_MADDS0:
422 case OP_4A_PS_MADDS1:
423 case OP_4A_PS_DIV:
424 case OP_4A_PS_SUB:
425 case OP_4A_PS_ADD:
426 case OP_4A_PS_SEL:
427 case OP_4A_PS_RES:
428 case OP_4A_PS_MUL:
429 case OP_4A_PS_RSQRTE:
430 case OP_4A_PS_MSUB:
431 case OP_4A_PS_MADD:
432 case OP_4A_PS_NMSUB:
433 case OP_4A_PS_NMADD:
434 return true;
435 }
436 break;
437 case 59:
438 switch (inst_get_field(inst, 21, 30)) {
439 case OP_59_FADDS:
440 case OP_59_FSUBS:
441 case OP_59_FDIVS:
442 case OP_59_FRES:
443 case OP_59_FRSQRTES:
444 return true;
445 }
446 switch (inst_get_field(inst, 26, 30)) {
447 case OP_59_FMULS:
448 case OP_59_FMSUBS:
449 case OP_59_FMADDS:
450 case OP_59_FNMSUBS:
451 case OP_59_FNMADDS:
452 return true;
453 }
454 break;
455 case 63:
456 switch (inst_get_field(inst, 21, 30)) {
457 case OP_63_MTFSB0:
458 case OP_63_MTFSB1:
459 case OP_63_MTFSF:
460 case OP_63_MTFSFI:
461 case OP_63_MCRFS:
462 case OP_63_MFFS:
463 case OP_63_FCMPU:
464 case OP_63_FCMPO:
465 case OP_63_FNEG:
466 case OP_63_FMR:
467 case OP_63_FABS:
468 case OP_63_FRSP:
469 case OP_63_FDIV:
470 case OP_63_FADD:
471 case OP_63_FSUB:
472 case OP_63_FCTIW:
473 case OP_63_FCTIWZ:
474 case OP_63_FRSQRTE:
475 case OP_63_FCPSGN:
476 return true;
477 }
478 switch (inst_get_field(inst, 26, 30)) {
479 case OP_63_FMUL:
480 case OP_63_FSEL:
481 case OP_63_FMSUB:
482 case OP_63_FMADD:
483 case OP_63_FNMSUB:
484 case OP_63_FNMADD:
485 return true;
486 }
487 break;
488 case 31:
489 switch (inst_get_field(inst, 21, 30)) {
490 case OP_31_LFSX:
491 case OP_31_LFSUX:
492 case OP_31_LFDX:
493 case OP_31_LFDUX:
494 case OP_31_STFSX:
495 case OP_31_STFSUX:
496 case OP_31_STFX:
497 case OP_31_STFUX:
498 case OP_31_STFIWX:
499 return true;
500 }
501 break;
502 }
503
504 return false;
505}
506
507static int get_d_signext(u32 inst)
508{
509 int d = inst & 0x8ff;
510
511 if (d & 0x800)
512 return -(d & 0x7ff);
513
514 return (d & 0x7ff);
515}
516
517static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc,
518 int reg_out, int reg_in1, int reg_in2,
519 int reg_in3, int scalar,
520 void (*func)(struct thread_struct *t,
521 u32 *dst, u32 *src1,
522 u32 *src2, u32 *src3))
523{
524 u32 *qpr = vcpu->arch.qpr;
525 u64 *fpr = vcpu->arch.fpr;
526 u32 ps0_out;
527 u32 ps0_in1, ps0_in2, ps0_in3;
528 u32 ps1_in1, ps1_in2, ps1_in3;
529 struct thread_struct t;
530 t.fpscr.val = vcpu->arch.fpscr;
531
532 /* RC */
533 WARN_ON(rc);
534
535 /* PS0 */
536 cvt_df((double*)&fpr[reg_in1], (float*)&ps0_in1, &t);
537 cvt_df((double*)&fpr[reg_in2], (float*)&ps0_in2, &t);
538 cvt_df((double*)&fpr[reg_in3], (float*)&ps0_in3, &t);
539
540 if (scalar & SCALAR_LOW)
541 ps0_in2 = qpr[reg_in2];
542
543 func(&t, &ps0_out, &ps0_in1, &ps0_in2, &ps0_in3);
544
545 dprintk(KERN_INFO "PS3 ps0 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n",
546 ps0_in1, ps0_in2, ps0_in3, ps0_out);
547
548 if (!(scalar & SCALAR_NO_PS0))
549 cvt_fd((float*)&ps0_out, (double*)&fpr[reg_out], &t);
550
551 /* PS1 */
552 ps1_in1 = qpr[reg_in1];
553 ps1_in2 = qpr[reg_in2];
554 ps1_in3 = qpr[reg_in3];
555
556 if (scalar & SCALAR_HIGH)
557 ps1_in2 = ps0_in2;
558
559 if (!(scalar & SCALAR_NO_PS1))
560 func(&t, &qpr[reg_out], &ps1_in1, &ps1_in2, &ps1_in3);
561
562 dprintk(KERN_INFO "PS3 ps1 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n",
563 ps1_in1, ps1_in2, ps1_in3, qpr[reg_out]);
564
565 return EMULATE_DONE;
566}
567
568static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc,
569 int reg_out, int reg_in1, int reg_in2,
570 int scalar,
571 void (*func)(struct thread_struct *t,
572 u32 *dst, u32 *src1,
573 u32 *src2))
574{
575 u32 *qpr = vcpu->arch.qpr;
576 u64 *fpr = vcpu->arch.fpr;
577 u32 ps0_out;
578 u32 ps0_in1, ps0_in2;
579 u32 ps1_out;
580 u32 ps1_in1, ps1_in2;
581 struct thread_struct t;
582 t.fpscr.val = vcpu->arch.fpscr;
583
584 /* RC */
585 WARN_ON(rc);
586
587 /* PS0 */
588 cvt_df((double*)&fpr[reg_in1], (float*)&ps0_in1, &t);
589
590 if (scalar & SCALAR_LOW)
591 ps0_in2 = qpr[reg_in2];
592 else
593 cvt_df((double*)&fpr[reg_in2], (float*)&ps0_in2, &t);
594
595 func(&t, &ps0_out, &ps0_in1, &ps0_in2);
596
597 if (!(scalar & SCALAR_NO_PS0)) {
598 dprintk(KERN_INFO "PS2 ps0 -> f(0x%x, 0x%x) = 0x%x\n",
599 ps0_in1, ps0_in2, ps0_out);
600
601 cvt_fd((float*)&ps0_out, (double*)&fpr[reg_out], &t);
602 }
603
604 /* PS1 */
605 ps1_in1 = qpr[reg_in1];
606 ps1_in2 = qpr[reg_in2];
607
608 if (scalar & SCALAR_HIGH)
609 ps1_in2 = ps0_in2;
610
611 func(&t, &ps1_out, &ps1_in1, &ps1_in2);
612
613 if (!(scalar & SCALAR_NO_PS1)) {
614 qpr[reg_out] = ps1_out;
615
616 dprintk(KERN_INFO "PS2 ps1 -> f(0x%x, 0x%x) = 0x%x\n",
617 ps1_in1, ps1_in2, qpr[reg_out]);
618 }
619
620 return EMULATE_DONE;
621}
622
623static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc,
624 int reg_out, int reg_in,
625 void (*func)(struct thread_struct *t,
626 u32 *dst, u32 *src1))
627{
628 u32 *qpr = vcpu->arch.qpr;
629 u64 *fpr = vcpu->arch.fpr;
630 u32 ps0_out, ps0_in;
631 u32 ps1_in;
632 struct thread_struct t;
633 t.fpscr.val = vcpu->arch.fpscr;
634
635 /* RC */
636 WARN_ON(rc);
637
638 /* PS0 */
639 cvt_df((double*)&fpr[reg_in], (float*)&ps0_in, &t);
640 func(&t, &ps0_out, &ps0_in);
641
642 dprintk(KERN_INFO "PS1 ps0 -> f(0x%x) = 0x%x\n",
643 ps0_in, ps0_out);
644
645 cvt_fd((float*)&ps0_out, (double*)&fpr[reg_out], &t);
646
647 /* PS1 */
648 ps1_in = qpr[reg_in];
649 func(&t, &qpr[reg_out], &ps1_in);
650
651 dprintk(KERN_INFO "PS1 ps1 -> f(0x%x) = 0x%x\n",
652 ps1_in, qpr[reg_out]);
653
654 return EMULATE_DONE;
655}
656
657int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
658{
659 u32 inst = kvmppc_get_last_inst(vcpu);
660 enum emulation_result emulated = EMULATE_DONE;
661
662 int ax_rd = inst_get_field(inst, 6, 10);
663 int ax_ra = inst_get_field(inst, 11, 15);
664 int ax_rb = inst_get_field(inst, 16, 20);
665 int ax_rc = inst_get_field(inst, 21, 25);
666 short full_d = inst_get_field(inst, 16, 31);
667
668 u64 *fpr_d = &vcpu->arch.fpr[ax_rd];
669 u64 *fpr_a = &vcpu->arch.fpr[ax_ra];
670 u64 *fpr_b = &vcpu->arch.fpr[ax_rb];
671 u64 *fpr_c = &vcpu->arch.fpr[ax_rc];
672
673 bool rcomp = (inst & 1) ? true : false;
674 u32 cr = kvmppc_get_cr(vcpu);
675 struct thread_struct t;
676#ifdef DEBUG
677 int i;
678#endif
679
680 t.fpscr.val = vcpu->arch.fpscr;
681
682 if (!kvmppc_inst_is_paired_single(vcpu, inst))
683 return EMULATE_FAIL;
684
685 if (!(vcpu->arch.msr & MSR_FP)) {
686 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL);
687 return EMULATE_AGAIN;
688 }
689
690 kvmppc_giveup_ext(vcpu, MSR_FP);
691 preempt_disable();
692 enable_kernel_fp();
693 /* Do we need to clear FE0 / FE1 here? Don't think so. */
694
695#ifdef DEBUG
696 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) {
697 u32 f;
698 cvt_df((double*)&vcpu->arch.fpr[i], (float*)&f, &t);
699 dprintk(KERN_INFO "FPR[%d] = 0x%x / 0x%llx QPR[%d] = 0x%x\n",
700 i, f, vcpu->arch.fpr[i], i, vcpu->arch.qpr[i]);
701 }
702#endif
703
704 switch (get_op(inst)) {
705 case OP_PSQ_L:
706 {
707 ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0;
708 bool w = inst_get_field(inst, 16, 16) ? true : false;
709 int i = inst_get_field(inst, 17, 19);
710
711 addr += get_d_signext(inst);
712 emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i);
713 break;
714 }
715 case OP_PSQ_LU:
716 {
717 ulong addr = kvmppc_get_gpr(vcpu, ax_ra);
718 bool w = inst_get_field(inst, 16, 16) ? true : false;
719 int i = inst_get_field(inst, 17, 19);
720
721 addr += get_d_signext(inst);
722 emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i);
723
724 if (emulated == EMULATE_DONE)
725 kvmppc_set_gpr(vcpu, ax_ra, addr);
726 break;
727 }
728 case OP_PSQ_ST:
729 {
730 ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0;
731 bool w = inst_get_field(inst, 16, 16) ? true : false;
732 int i = inst_get_field(inst, 17, 19);
733
734 addr += get_d_signext(inst);
735 emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i);
736 break;
737 }
738 case OP_PSQ_STU:
739 {
740 ulong addr = kvmppc_get_gpr(vcpu, ax_ra);
741 bool w = inst_get_field(inst, 16, 16) ? true : false;
742 int i = inst_get_field(inst, 17, 19);
743
744 addr += get_d_signext(inst);
745 emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i);
746
747 if (emulated == EMULATE_DONE)
748 kvmppc_set_gpr(vcpu, ax_ra, addr);
749 break;
750 }
751 case 4:
752 /* X form */
753 switch (inst_get_field(inst, 21, 30)) {
754 case OP_4X_PS_CMPU0:
755 /* XXX */
756 emulated = EMULATE_FAIL;
757 break;
758 case OP_4X_PSQ_LX:
759 {
760 ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0;
761 bool w = inst_get_field(inst, 21, 21) ? true : false;
762 int i = inst_get_field(inst, 22, 24);
763
764 addr += kvmppc_get_gpr(vcpu, ax_rb);
765 emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i);
766 break;
767 }
768 case OP_4X_PS_CMPO0:
769 /* XXX */
770 emulated = EMULATE_FAIL;
771 break;
772 case OP_4X_PSQ_LUX:
773 {
774 ulong addr = kvmppc_get_gpr(vcpu, ax_ra);
775 bool w = inst_get_field(inst, 21, 21) ? true : false;
776 int i = inst_get_field(inst, 22, 24);
777
778 addr += kvmppc_get_gpr(vcpu, ax_rb);
779 emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i);
780
781 if (emulated == EMULATE_DONE)
782 kvmppc_set_gpr(vcpu, ax_ra, addr);
783 break;
784 }
785 case OP_4X_PS_NEG:
786 vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb];
787 vcpu->arch.fpr[ax_rd] ^= 0x8000000000000000ULL;
788 vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
789 vcpu->arch.qpr[ax_rd] ^= 0x80000000;
790 break;
791 case OP_4X_PS_CMPU1:
792 /* XXX */
793 emulated = EMULATE_FAIL;
794 break;
795 case OP_4X_PS_MR:
796 WARN_ON(rcomp);
797 vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb];
798 vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
799 break;
800 case OP_4X_PS_CMPO1:
801 /* XXX */
802 emulated = EMULATE_FAIL;
803 break;
804 case OP_4X_PS_NABS:
805 WARN_ON(rcomp);
806 vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb];
807 vcpu->arch.fpr[ax_rd] |= 0x8000000000000000ULL;
808 vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
809 vcpu->arch.qpr[ax_rd] |= 0x80000000;
810 break;
811 case OP_4X_PS_ABS:
812 WARN_ON(rcomp);
813 vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb];
814 vcpu->arch.fpr[ax_rd] &= ~0x8000000000000000ULL;
815 vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
816 vcpu->arch.qpr[ax_rd] &= ~0x80000000;
817 break;
818 case OP_4X_PS_MERGE00:
819 WARN_ON(rcomp);
820 vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra];
821 /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */
822 cvt_df((double*)&vcpu->arch.fpr[ax_rb],
823 (float*)&vcpu->arch.qpr[ax_rd], &t);
824 break;
825 case OP_4X_PS_MERGE01:
826 WARN_ON(rcomp);
827 vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra];
828 vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
829 break;
830 case OP_4X_PS_MERGE10:
831 WARN_ON(rcomp);
832 /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */
833 cvt_fd((float*)&vcpu->arch.qpr[ax_ra],
834 (double*)&vcpu->arch.fpr[ax_rd], &t);
835 /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */
836 cvt_df((double*)&vcpu->arch.fpr[ax_rb],
837 (float*)&vcpu->arch.qpr[ax_rd], &t);
838 break;
839 case OP_4X_PS_MERGE11:
840 WARN_ON(rcomp);
841 /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */
842 cvt_fd((float*)&vcpu->arch.qpr[ax_ra],
843 (double*)&vcpu->arch.fpr[ax_rd], &t);
844 vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
845 break;
846 }
847 /* XW form */
848 switch (inst_get_field(inst, 25, 30)) {
849 case OP_4XW_PSQ_STX:
850 {
851 ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0;
852 bool w = inst_get_field(inst, 21, 21) ? true : false;
853 int i = inst_get_field(inst, 22, 24);
854
855 addr += kvmppc_get_gpr(vcpu, ax_rb);
856 emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i);
857 break;
858 }
859 case OP_4XW_PSQ_STUX:
860 {
861 ulong addr = kvmppc_get_gpr(vcpu, ax_ra);
862 bool w = inst_get_field(inst, 21, 21) ? true : false;
863 int i = inst_get_field(inst, 22, 24);
864
865 addr += kvmppc_get_gpr(vcpu, ax_rb);
866 emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i);
867
868 if (emulated == EMULATE_DONE)
869 kvmppc_set_gpr(vcpu, ax_ra, addr);
870 break;
871 }
872 }
873 /* A form */
874 switch (inst_get_field(inst, 26, 30)) {
875 case OP_4A_PS_SUM1:
876 emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
877 ax_rb, ax_ra, SCALAR_NO_PS0 | SCALAR_HIGH, fps_fadds);
878 vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rc];
879 break;
880 case OP_4A_PS_SUM0:
881 emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
882 ax_ra, ax_rb, SCALAR_NO_PS1 | SCALAR_LOW, fps_fadds);
883 vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rc];
884 break;
885 case OP_4A_PS_MULS0:
886 emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
887 ax_ra, ax_rc, SCALAR_HIGH, fps_fmuls);
888 break;
889 case OP_4A_PS_MULS1:
890 emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
891 ax_ra, ax_rc, SCALAR_LOW, fps_fmuls);
892 break;
893 case OP_4A_PS_MADDS0:
894 emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd,
895 ax_ra, ax_rc, ax_rb, SCALAR_HIGH, fps_fmadds);
896 break;
897 case OP_4A_PS_MADDS1:
898 emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd,
899 ax_ra, ax_rc, ax_rb, SCALAR_LOW, fps_fmadds);
900 break;
901 case OP_4A_PS_DIV:
902 emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
903 ax_ra, ax_rb, SCALAR_NONE, fps_fdivs);
904 break;
905 case OP_4A_PS_SUB:
906 emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
907 ax_ra, ax_rb, SCALAR_NONE, fps_fsubs);
908 break;
909 case OP_4A_PS_ADD:
910 emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
911 ax_ra, ax_rb, SCALAR_NONE, fps_fadds);
912 break;
913 case OP_4A_PS_SEL:
914 emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd,
915 ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fsel);
916 break;
917 case OP_4A_PS_RES:
918 emulated = kvmppc_ps_one_in(vcpu, rcomp, ax_rd,
919 ax_rb, fps_fres);
920 break;
921 case OP_4A_PS_MUL:
922 emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd,
923 ax_ra, ax_rc, SCALAR_NONE, fps_fmuls);
924 break;
925 case OP_4A_PS_RSQRTE:
926 emulated = kvmppc_ps_one_in(vcpu, rcomp, ax_rd,
927 ax_rb, fps_frsqrte);
928 break;
929 case OP_4A_PS_MSUB:
930 emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd,
931 ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fmsubs);
932 break;
933 case OP_4A_PS_MADD:
934 emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd,
935 ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fmadds);
936 break;
937 case OP_4A_PS_NMSUB:
938 emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd,
939 ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fnmsubs);
940 break;
941 case OP_4A_PS_NMADD:
942 emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd,
943 ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fnmadds);
944 break;
945 }
946 break;
947
948 /* Real FPU operations */
949
950 case OP_LFS:
951 {
952 ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d;
953
954 emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr,
955 FPU_LS_SINGLE);
956 break;
957 }
958 case OP_LFSU:
959 {
960 ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d;
961
962 emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr,
963 FPU_LS_SINGLE);
964
965 if (emulated == EMULATE_DONE)
966 kvmppc_set_gpr(vcpu, ax_ra, addr);
967 break;
968 }
969 case OP_LFD:
970 {
971 ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d;
972
973 emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr,
974 FPU_LS_DOUBLE);
975 break;
976 }
977 case OP_LFDU:
978 {
979 ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d;
980
981 emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr,
982 FPU_LS_DOUBLE);
983
984 if (emulated == EMULATE_DONE)
985 kvmppc_set_gpr(vcpu, ax_ra, addr);
986 break;
987 }
988 case OP_STFS:
989 {
990 ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d;
991
992 emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr,
993 FPU_LS_SINGLE);
994 break;
995 }
996 case OP_STFSU:
997 {
998 ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d;
999
1000 emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr,
1001 FPU_LS_SINGLE);
1002
1003 if (emulated == EMULATE_DONE)
1004 kvmppc_set_gpr(vcpu, ax_ra, addr);
1005 break;
1006 }
1007 case OP_STFD:
1008 {
1009 ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d;
1010
1011 emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr,
1012 FPU_LS_DOUBLE);
1013 break;
1014 }
1015 case OP_STFDU:
1016 {
1017 ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d;
1018
1019 emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr,
1020 FPU_LS_DOUBLE);
1021
1022 if (emulated == EMULATE_DONE)
1023 kvmppc_set_gpr(vcpu, ax_ra, addr);
1024 break;
1025 }
1026 case 31:
1027 switch (inst_get_field(inst, 21, 30)) {
1028 case OP_31_LFSX:
1029 {
1030 ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0;
1031
1032 addr += kvmppc_get_gpr(vcpu, ax_rb);
1033 emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd,
1034 addr, FPU_LS_SINGLE);
1035 break;
1036 }
1037 case OP_31_LFSUX:
1038 {
1039 ulong addr = kvmppc_get_gpr(vcpu, ax_ra) +
1040 kvmppc_get_gpr(vcpu, ax_rb);
1041
1042 emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd,
1043 addr, FPU_LS_SINGLE);
1044
1045 if (emulated == EMULATE_DONE)
1046 kvmppc_set_gpr(vcpu, ax_ra, addr);
1047 break;
1048 }
1049 case OP_31_LFDX:
1050 {
1051 ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) +
1052 kvmppc_get_gpr(vcpu, ax_rb);
1053
1054 emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd,
1055 addr, FPU_LS_DOUBLE);
1056 break;
1057 }
1058 case OP_31_LFDUX:
1059 {
1060 ulong addr = kvmppc_get_gpr(vcpu, ax_ra) +
1061 kvmppc_get_gpr(vcpu, ax_rb);
1062
1063 emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd,
1064 addr, FPU_LS_DOUBLE);
1065
1066 if (emulated == EMULATE_DONE)
1067 kvmppc_set_gpr(vcpu, ax_ra, addr);
1068 break;
1069 }
1070 case OP_31_STFSX:
1071 {
1072 ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) +
1073 kvmppc_get_gpr(vcpu, ax_rb);
1074
1075 emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd,
1076 addr, FPU_LS_SINGLE);
1077 break;
1078 }
1079 case OP_31_STFSUX:
1080 {
1081 ulong addr = kvmppc_get_gpr(vcpu, ax_ra) +
1082 kvmppc_get_gpr(vcpu, ax_rb);
1083
1084 emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd,
1085 addr, FPU_LS_SINGLE);
1086
1087 if (emulated == EMULATE_DONE)
1088 kvmppc_set_gpr(vcpu, ax_ra, addr);
1089 break;
1090 }
1091 case OP_31_STFX:
1092 {
1093 ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) +
1094 kvmppc_get_gpr(vcpu, ax_rb);
1095
1096 emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd,
1097 addr, FPU_LS_DOUBLE);
1098 break;
1099 }
1100 case OP_31_STFUX:
1101 {
1102 ulong addr = kvmppc_get_gpr(vcpu, ax_ra) +
1103 kvmppc_get_gpr(vcpu, ax_rb);
1104
1105 emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd,
1106 addr, FPU_LS_DOUBLE);
1107
1108 if (emulated == EMULATE_DONE)
1109 kvmppc_set_gpr(vcpu, ax_ra, addr);
1110 break;
1111 }
1112 case OP_31_STFIWX:
1113 {
1114 ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) +
1115 kvmppc_get_gpr(vcpu, ax_rb);
1116
1117 emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd,
1118 addr,
1119 FPU_LS_SINGLE_LOW);
1120 break;
1121 }
1122 break;
1123 }
1124 break;
1125 case 59:
1126 switch (inst_get_field(inst, 21, 30)) {
1127 case OP_59_FADDS:
1128 fpd_fadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
1129 kvmppc_sync_qpr(vcpu, ax_rd);
1130 break;
1131 case OP_59_FSUBS:
1132 fpd_fsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
1133 kvmppc_sync_qpr(vcpu, ax_rd);
1134 break;
1135 case OP_59_FDIVS:
1136 fpd_fdivs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
1137 kvmppc_sync_qpr(vcpu, ax_rd);
1138 break;
1139 case OP_59_FRES:
1140 fpd_fres(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
1141 kvmppc_sync_qpr(vcpu, ax_rd);
1142 break;
1143 case OP_59_FRSQRTES:
1144 fpd_frsqrtes(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
1145 kvmppc_sync_qpr(vcpu, ax_rd);
1146 break;
1147 }
1148 switch (inst_get_field(inst, 26, 30)) {
1149 case OP_59_FMULS:
1150 fpd_fmuls(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c);
1151 kvmppc_sync_qpr(vcpu, ax_rd);
1152 break;
1153 case OP_59_FMSUBS:
1154 fpd_fmsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
1155 kvmppc_sync_qpr(vcpu, ax_rd);
1156 break;
1157 case OP_59_FMADDS:
1158 fpd_fmadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
1159 kvmppc_sync_qpr(vcpu, ax_rd);
1160 break;
1161 case OP_59_FNMSUBS:
1162 fpd_fnmsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
1163 kvmppc_sync_qpr(vcpu, ax_rd);
1164 break;
1165 case OP_59_FNMADDS:
1166 fpd_fnmadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
1167 kvmppc_sync_qpr(vcpu, ax_rd);
1168 break;
1169 }
1170 break;
1171 case 63:
1172 switch (inst_get_field(inst, 21, 30)) {
1173 case OP_63_MTFSB0:
1174 case OP_63_MTFSB1:
1175 case OP_63_MCRFS:
1176 case OP_63_MTFSFI:
1177 /* XXX need to implement */
1178 break;
1179 case OP_63_MFFS:
1180 /* XXX missing CR */
1181 *fpr_d = vcpu->arch.fpscr;
1182 break;
1183 case OP_63_MTFSF:
1184 /* XXX missing fm bits */
1185 /* XXX missing CR */
1186 vcpu->arch.fpscr = *fpr_b;
1187 break;
1188 case OP_63_FCMPU:
1189 {
1190 u32 tmp_cr;
1191 u32 cr0_mask = 0xf0000000;
1192 u32 cr_shift = inst_get_field(inst, 6, 8) * 4;
1193
1194 fpd_fcmpu(&vcpu->arch.fpscr, &tmp_cr, fpr_a, fpr_b);
1195 cr &= ~(cr0_mask >> cr_shift);
1196 cr |= (cr & cr0_mask) >> cr_shift;
1197 break;
1198 }
1199 case OP_63_FCMPO:
1200 {
1201 u32 tmp_cr;
1202 u32 cr0_mask = 0xf0000000;
1203 u32 cr_shift = inst_get_field(inst, 6, 8) * 4;
1204
1205 fpd_fcmpo(&vcpu->arch.fpscr, &tmp_cr, fpr_a, fpr_b);
1206 cr &= ~(cr0_mask >> cr_shift);
1207 cr |= (cr & cr0_mask) >> cr_shift;
1208 break;
1209 }
1210 case OP_63_FNEG:
1211 fpd_fneg(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
1212 break;
1213 case OP_63_FMR:
1214 *fpr_d = *fpr_b;
1215 break;
1216 case OP_63_FABS:
1217 fpd_fabs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
1218 break;
1219 case OP_63_FCPSGN:
1220 fpd_fcpsgn(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
1221 break;
1222 case OP_63_FDIV:
1223 fpd_fdiv(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
1224 break;
1225 case OP_63_FADD:
1226 fpd_fadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
1227 break;
1228 case OP_63_FSUB:
1229 fpd_fsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b);
1230 break;
1231 case OP_63_FCTIW:
1232 fpd_fctiw(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
1233 break;
1234 case OP_63_FCTIWZ:
1235 fpd_fctiwz(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
1236 break;
1237 case OP_63_FRSP:
1238 fpd_frsp(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
1239 kvmppc_sync_qpr(vcpu, ax_rd);
1240 break;
1241 case OP_63_FRSQRTE:
1242 {
1243 double one = 1.0f;
1244
1245 /* fD = sqrt(fB) */
1246 fpd_fsqrt(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b);
1247 /* fD = 1.0f / fD */
1248 fpd_fdiv(&vcpu->arch.fpscr, &cr, fpr_d, (u64*)&one, fpr_d);
1249 break;
1250 }
1251 }
1252 switch (inst_get_field(inst, 26, 30)) {
1253 case OP_63_FMUL:
1254 fpd_fmul(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c);
1255 break;
1256 case OP_63_FSEL:
1257 fpd_fsel(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
1258 break;
1259 case OP_63_FMSUB:
1260 fpd_fmsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
1261 break;
1262 case OP_63_FMADD:
1263 fpd_fmadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
1264 break;
1265 case OP_63_FNMSUB:
1266 fpd_fnmsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
1267 break;
1268 case OP_63_FNMADD:
1269 fpd_fnmadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b);
1270 break;
1271 }
1272 break;
1273 }
1274
1275#ifdef DEBUG
1276 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) {
1277 u32 f;
1278 cvt_df((double*)&vcpu->arch.fpr[i], (float*)&f, &t);
1279 dprintk(KERN_INFO "FPR[%d] = 0x%x\n", i, f);
1280 }
1281#endif
1282
1283 if (rcomp)
1284 kvmppc_set_cr(vcpu, cr);
1285
1286 preempt_enable();
1287
1288 return emulated;
1289}
diff --git a/arch/powerpc/kvm/book3s_64_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index c83c60ad96c5..506d5c316c96 100644
--- a/arch/powerpc/kvm/book3s_64_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -22,7 +22,10 @@
22#include <asm/reg.h> 22#include <asm/reg.h>
23#include <asm/page.h> 23#include <asm/page.h>
24#include <asm/asm-offsets.h> 24#include <asm/asm-offsets.h>
25
26#ifdef CONFIG_PPC_BOOK3S_64
25#include <asm/exception-64s.h> 27#include <asm/exception-64s.h>
28#endif
26 29
27/***************************************************************************** 30/*****************************************************************************
28 * * 31 * *
@@ -30,6 +33,39 @@
30 * * 33 * *
31 ****************************************************************************/ 34 ****************************************************************************/
32 35
36#if defined(CONFIG_PPC_BOOK3S_64)
37
38#define LOAD_SHADOW_VCPU(reg) \
39 mfspr reg, SPRN_SPRG_PACA
40
41#define SHADOW_VCPU_OFF PACA_KVM_SVCPU
42#define MSR_NOIRQ MSR_KERNEL & ~(MSR_IR | MSR_DR)
43#define FUNC(name) GLUE(.,name)
44
45#elif defined(CONFIG_PPC_BOOK3S_32)
46
47#define LOAD_SHADOW_VCPU(reg) \
48 mfspr reg, SPRN_SPRG_THREAD; \
49 lwz reg, THREAD_KVM_SVCPU(reg); \
50 /* PPC32 can have a NULL pointer - let's check for that */ \
51 mtspr SPRN_SPRG_SCRATCH1, r12; /* Save r12 */ \
52 mfcr r12; \
53 cmpwi reg, 0; \
54 bne 1f; \
55 mfspr reg, SPRN_SPRG_SCRATCH0; \
56 mtcr r12; \
57 mfspr r12, SPRN_SPRG_SCRATCH1; \
58 b kvmppc_resume_\intno; \
591:; \
60 mtcr r12; \
61 mfspr r12, SPRN_SPRG_SCRATCH1; \
62 tophys(reg, reg)
63
64#define SHADOW_VCPU_OFF 0
65#define MSR_NOIRQ MSR_KERNEL
66#define FUNC(name) name
67
68#endif
33 69
34.macro INTERRUPT_TRAMPOLINE intno 70.macro INTERRUPT_TRAMPOLINE intno
35 71
@@ -42,19 +78,19 @@ kvmppc_trampoline_\intno:
42 * First thing to do is to find out if we're coming 78 * First thing to do is to find out if we're coming
43 * from a KVM guest or a Linux process. 79 * from a KVM guest or a Linux process.
44 * 80 *
45 * To distinguish, we check a magic byte in the PACA 81 * To distinguish, we check a magic byte in the PACA/current
46 */ 82 */
47 mfspr r13, SPRN_SPRG_PACA /* r13 = PACA */ 83 LOAD_SHADOW_VCPU(r13)
48 std r12, PACA_KVM_SCRATCH0(r13) 84 PPC_STL r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13)
49 mfcr r12 85 mfcr r12
50 stw r12, PACA_KVM_SCRATCH1(r13) 86 stw r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13)
51 lbz r12, PACA_KVM_IN_GUEST(r13) 87 lbz r12, (SHADOW_VCPU_OFF + SVCPU_IN_GUEST)(r13)
52 cmpwi r12, KVM_GUEST_MODE_NONE 88 cmpwi r12, KVM_GUEST_MODE_NONE
53 bne ..kvmppc_handler_hasmagic_\intno 89 bne ..kvmppc_handler_hasmagic_\intno
54 /* No KVM guest? Then jump back to the Linux handler! */ 90 /* No KVM guest? Then jump back to the Linux handler! */
55 lwz r12, PACA_KVM_SCRATCH1(r13) 91 lwz r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13)
56 mtcr r12 92 mtcr r12
57 ld r12, PACA_KVM_SCRATCH0(r13) 93 PPC_LL r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13)
58 mfspr r13, SPRN_SPRG_SCRATCH0 /* r13 = original r13 */ 94 mfspr r13, SPRN_SPRG_SCRATCH0 /* r13 = original r13 */
59 b kvmppc_resume_\intno /* Get back original handler */ 95 b kvmppc_resume_\intno /* Get back original handler */
60 96
@@ -76,9 +112,7 @@ kvmppc_trampoline_\intno:
76INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_SYSTEM_RESET 112INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_SYSTEM_RESET
77INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_MACHINE_CHECK 113INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_MACHINE_CHECK
78INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_DATA_STORAGE 114INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_DATA_STORAGE
79INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_DATA_SEGMENT
80INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_INST_STORAGE 115INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_INST_STORAGE
81INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_INST_SEGMENT
82INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_EXTERNAL 116INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_EXTERNAL
83INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALIGNMENT 117INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALIGNMENT
84INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_PROGRAM 118INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_PROGRAM
@@ -88,7 +122,14 @@ INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_SYSCALL
88INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_TRACE 122INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_TRACE
89INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_PERFMON 123INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_PERFMON
90INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALTIVEC 124INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALTIVEC
125
126/* Those are only available on 64 bit machines */
127
128#ifdef CONFIG_PPC_BOOK3S_64
129INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_DATA_SEGMENT
130INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_INST_SEGMENT
91INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_VSX 131INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_VSX
132#endif
92 133
93/* 134/*
94 * Bring us back to the faulting code, but skip the 135 * Bring us back to the faulting code, but skip the
@@ -99,11 +140,11 @@ INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_VSX
99 * 140 *
100 * Input Registers: 141 * Input Registers:
101 * 142 *
102 * R12 = free 143 * R12 = free
103 * R13 = PACA 144 * R13 = Shadow VCPU (PACA)
104 * PACA.KVM.SCRATCH0 = guest R12 145 * SVCPU.SCRATCH0 = guest R12
105 * PACA.KVM.SCRATCH1 = guest CR 146 * SVCPU.SCRATCH1 = guest CR
106 * SPRG_SCRATCH0 = guest R13 147 * SPRG_SCRATCH0 = guest R13
107 * 148 *
108 */ 149 */
109kvmppc_handler_skip_ins: 150kvmppc_handler_skip_ins:
@@ -114,9 +155,9 @@ kvmppc_handler_skip_ins:
114 mtsrr0 r12 155 mtsrr0 r12
115 156
116 /* Clean up all state */ 157 /* Clean up all state */
117 lwz r12, PACA_KVM_SCRATCH1(r13) 158 lwz r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13)
118 mtcr r12 159 mtcr r12
119 ld r12, PACA_KVM_SCRATCH0(r13) 160 PPC_LL r12, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13)
120 mfspr r13, SPRN_SPRG_SCRATCH0 161 mfspr r13, SPRN_SPRG_SCRATCH0
121 162
122 /* And get back into the code */ 163 /* And get back into the code */
@@ -147,41 +188,48 @@ kvmppc_handler_lowmem_trampoline_end:
147 * 188 *
148 * R3 = function 189 * R3 = function
149 * R4 = MSR 190 * R4 = MSR
150 * R5 = CTR 191 * R5 = scratch register
151 * 192 *
152 */ 193 */
153_GLOBAL(kvmppc_rmcall) 194_GLOBAL(kvmppc_rmcall)
154 mtmsr r4 /* Disable relocation, so mtsrr 195 LOAD_REG_IMMEDIATE(r5, MSR_NOIRQ)
196 mtmsr r5 /* Disable relocation and interrupts, so mtsrr
155 doesn't get interrupted */ 197 doesn't get interrupted */
156 mtctr r5 198 sync
157 mtsrr0 r3 199 mtsrr0 r3
158 mtsrr1 r4 200 mtsrr1 r4
159 RFI 201 RFI
160 202
203#if defined(CONFIG_PPC_BOOK3S_32)
204#define STACK_LR INT_FRAME_SIZE+4
205#elif defined(CONFIG_PPC_BOOK3S_64)
206#define STACK_LR _LINK
207#endif
208
161/* 209/*
162 * Activate current's external feature (FPU/Altivec/VSX) 210 * Activate current's external feature (FPU/Altivec/VSX)
163 */ 211 */
164#define define_load_up(what) \ 212#define define_load_up(what) \
165 \ 213 \
166_GLOBAL(kvmppc_load_up_ ## what); \ 214_GLOBAL(kvmppc_load_up_ ## what); \
167 subi r1, r1, INT_FRAME_SIZE; \ 215 PPC_STLU r1, -INT_FRAME_SIZE(r1); \
168 mflr r3; \ 216 mflr r3; \
169 std r3, _LINK(r1); \ 217 PPC_STL r3, STACK_LR(r1); \
170 mfmsr r4; \ 218 PPC_STL r20, _NIP(r1); \
171 std r31, GPR3(r1); \ 219 mfmsr r20; \
172 mr r31, r4; \ 220 LOAD_REG_IMMEDIATE(r3, MSR_DR|MSR_EE); \
173 li r5, MSR_DR; \ 221 andc r3,r20,r3; /* Disable DR,EE */ \
174 oris r5, r5, MSR_EE@h; \ 222 mtmsr r3; \
175 andc r4, r4, r5; \ 223 sync; \
176 mtmsr r4; \ 224 \
177 \ 225 bl FUNC(load_up_ ## what); \
178 bl .load_up_ ## what; \ 226 \
179 \ 227 mtmsr r20; /* Enable DR,EE */ \
180 mtmsr r31; \ 228 sync; \
181 ld r3, _LINK(r1); \ 229 PPC_LL r3, STACK_LR(r1); \
182 ld r31, GPR3(r1); \ 230 PPC_LL r20, _NIP(r1); \
183 addi r1, r1, INT_FRAME_SIZE; \ 231 mtlr r3; \
184 mtlr r3; \ 232 addi r1, r1, INT_FRAME_SIZE; \
185 blr 233 blr
186 234
187define_load_up(fpu) 235define_load_up(fpu)
@@ -194,11 +242,10 @@ define_load_up(vsx)
194 242
195.global kvmppc_trampoline_lowmem 243.global kvmppc_trampoline_lowmem
196kvmppc_trampoline_lowmem: 244kvmppc_trampoline_lowmem:
197 .long kvmppc_handler_lowmem_trampoline - _stext 245 .long kvmppc_handler_lowmem_trampoline - CONFIG_KERNEL_START
198 246
199.global kvmppc_trampoline_enter 247.global kvmppc_trampoline_enter
200kvmppc_trampoline_enter: 248kvmppc_trampoline_enter:
201 .long kvmppc_handler_trampoline_enter - _stext 249 .long kvmppc_handler_trampoline_enter - CONFIG_KERNEL_START
202
203#include "book3s_64_slb.S"
204 250
251#include "book3s_segment.S"
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
new file mode 100644
index 000000000000..7c52ed0b7051
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -0,0 +1,259 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 *
15 * Copyright SUSE Linux Products GmbH 2010
16 *
17 * Authors: Alexander Graf <agraf@suse.de>
18 */
19
20/* Real mode helpers */
21
22#if defined(CONFIG_PPC_BOOK3S_64)
23
24#define GET_SHADOW_VCPU(reg) \
25 addi reg, r13, PACA_KVM_SVCPU
26
27#elif defined(CONFIG_PPC_BOOK3S_32)
28
29#define GET_SHADOW_VCPU(reg) \
30 tophys(reg, r2); \
31 lwz reg, (THREAD + THREAD_KVM_SVCPU)(reg); \
32 tophys(reg, reg)
33
34#endif
35
36/* Disable for nested KVM */
37#define USE_QUICK_LAST_INST
38
39
40/* Get helper functions for subarch specific functionality */
41
42#if defined(CONFIG_PPC_BOOK3S_64)
43#include "book3s_64_slb.S"
44#elif defined(CONFIG_PPC_BOOK3S_32)
45#include "book3s_32_sr.S"
46#endif
47
48/******************************************************************************
49 * *
50 * Entry code *
51 * *
52 *****************************************************************************/
53
54.global kvmppc_handler_trampoline_enter
55kvmppc_handler_trampoline_enter:
56
57 /* Required state:
58 *
59 * MSR = ~IR|DR
60 * R13 = PACA
61 * R1 = host R1
62 * R2 = host R2
63 * R10 = guest MSR
64 * all other volatile GPRS = free
65 * SVCPU[CR] = guest CR
66 * SVCPU[XER] = guest XER
67 * SVCPU[CTR] = guest CTR
68 * SVCPU[LR] = guest LR
69 */
70
71 /* r3 = shadow vcpu */
72 GET_SHADOW_VCPU(r3)
73
74 /* Move SRR0 and SRR1 into the respective regs */
75 PPC_LL r9, SVCPU_PC(r3)
76 mtsrr0 r9
77 mtsrr1 r10
78
79 /* Activate guest mode, so faults get handled by KVM */
80 li r11, KVM_GUEST_MODE_GUEST
81 stb r11, SVCPU_IN_GUEST(r3)
82
83 /* Switch to guest segment. This is subarch specific. */
84 LOAD_GUEST_SEGMENTS
85
86 /* Enter guest */
87
88 PPC_LL r4, (SVCPU_CTR)(r3)
89 PPC_LL r5, (SVCPU_LR)(r3)
90 lwz r6, (SVCPU_CR)(r3)
91 lwz r7, (SVCPU_XER)(r3)
92
93 mtctr r4
94 mtlr r5
95 mtcr r6
96 mtxer r7
97
98 PPC_LL r0, (SVCPU_R0)(r3)
99 PPC_LL r1, (SVCPU_R1)(r3)
100 PPC_LL r2, (SVCPU_R2)(r3)
101 PPC_LL r4, (SVCPU_R4)(r3)
102 PPC_LL r5, (SVCPU_R5)(r3)
103 PPC_LL r6, (SVCPU_R6)(r3)
104 PPC_LL r7, (SVCPU_R7)(r3)
105 PPC_LL r8, (SVCPU_R8)(r3)
106 PPC_LL r9, (SVCPU_R9)(r3)
107 PPC_LL r10, (SVCPU_R10)(r3)
108 PPC_LL r11, (SVCPU_R11)(r3)
109 PPC_LL r12, (SVCPU_R12)(r3)
110 PPC_LL r13, (SVCPU_R13)(r3)
111
112 PPC_LL r3, (SVCPU_R3)(r3)
113
114 RFI
115kvmppc_handler_trampoline_enter_end:
116
117
118
119/******************************************************************************
120 * *
121 * Exit code *
122 * *
123 *****************************************************************************/
124
125.global kvmppc_handler_trampoline_exit
126kvmppc_handler_trampoline_exit:
127
128 /* Register usage at this point:
129 *
130 * SPRG_SCRATCH0 = guest R13
131 * R12 = exit handler id
132 * R13 = shadow vcpu - SHADOW_VCPU_OFF [=PACA on PPC64]
133 * SVCPU.SCRATCH0 = guest R12
134 * SVCPU.SCRATCH1 = guest CR
135 *
136 */
137
138 /* Save registers */
139
140 PPC_STL r0, (SHADOW_VCPU_OFF + SVCPU_R0)(r13)
141 PPC_STL r1, (SHADOW_VCPU_OFF + SVCPU_R1)(r13)
142 PPC_STL r2, (SHADOW_VCPU_OFF + SVCPU_R2)(r13)
143 PPC_STL r3, (SHADOW_VCPU_OFF + SVCPU_R3)(r13)
144 PPC_STL r4, (SHADOW_VCPU_OFF + SVCPU_R4)(r13)
145 PPC_STL r5, (SHADOW_VCPU_OFF + SVCPU_R5)(r13)
146 PPC_STL r6, (SHADOW_VCPU_OFF + SVCPU_R6)(r13)
147 PPC_STL r7, (SHADOW_VCPU_OFF + SVCPU_R7)(r13)
148 PPC_STL r8, (SHADOW_VCPU_OFF + SVCPU_R8)(r13)
149 PPC_STL r9, (SHADOW_VCPU_OFF + SVCPU_R9)(r13)
150 PPC_STL r10, (SHADOW_VCPU_OFF + SVCPU_R10)(r13)
151 PPC_STL r11, (SHADOW_VCPU_OFF + SVCPU_R11)(r13)
152
153 /* Restore R1/R2 so we can handle faults */
154 PPC_LL r1, (SHADOW_VCPU_OFF + SVCPU_HOST_R1)(r13)
155 PPC_LL r2, (SHADOW_VCPU_OFF + SVCPU_HOST_R2)(r13)
156
157 /* Save guest PC and MSR */
158 mfsrr0 r3
159 mfsrr1 r4
160
161 PPC_STL r3, (SHADOW_VCPU_OFF + SVCPU_PC)(r13)
162 PPC_STL r4, (SHADOW_VCPU_OFF + SVCPU_SHADOW_SRR1)(r13)
163
164 /* Get scratch'ed off registers */
165 mfspr r9, SPRN_SPRG_SCRATCH0
166 PPC_LL r8, (SHADOW_VCPU_OFF + SVCPU_SCRATCH0)(r13)
167 lwz r7, (SHADOW_VCPU_OFF + SVCPU_SCRATCH1)(r13)
168
169 PPC_STL r9, (SHADOW_VCPU_OFF + SVCPU_R13)(r13)
170 PPC_STL r8, (SHADOW_VCPU_OFF + SVCPU_R12)(r13)
171 stw r7, (SHADOW_VCPU_OFF + SVCPU_CR)(r13)
172
173 /* Save more register state */
174
175 mfxer r5
176 mfdar r6
177 mfdsisr r7
178 mfctr r8
179 mflr r9
180
181 stw r5, (SHADOW_VCPU_OFF + SVCPU_XER)(r13)
182 PPC_STL r6, (SHADOW_VCPU_OFF + SVCPU_FAULT_DAR)(r13)
183 stw r7, (SHADOW_VCPU_OFF + SVCPU_FAULT_DSISR)(r13)
184 PPC_STL r8, (SHADOW_VCPU_OFF + SVCPU_CTR)(r13)
185 PPC_STL r9, (SHADOW_VCPU_OFF + SVCPU_LR)(r13)
186
187 /*
188 * In order for us to easily get the last instruction,
189 * we got the #vmexit at, we exploit the fact that the
190 * virtual layout is still the same here, so we can just
191 * ld from the guest's PC address
192 */
193
194 /* We only load the last instruction when it's safe */
195 cmpwi r12, BOOK3S_INTERRUPT_DATA_STORAGE
196 beq ld_last_inst
197 cmpwi r12, BOOK3S_INTERRUPT_PROGRAM
198 beq ld_last_inst
199 cmpwi r12, BOOK3S_INTERRUPT_ALIGNMENT
200 beq- ld_last_inst
201
202 b no_ld_last_inst
203
204ld_last_inst:
205 /* Save off the guest instruction we're at */
206
207 /* In case lwz faults */
208 li r0, KVM_INST_FETCH_FAILED
209
210#ifdef USE_QUICK_LAST_INST
211
212 /* Set guest mode to 'jump over instruction' so if lwz faults
213 * we'll just continue at the next IP. */
214 li r9, KVM_GUEST_MODE_SKIP
215 stb r9, (SHADOW_VCPU_OFF + SVCPU_IN_GUEST)(r13)
216
217 /* 1) enable paging for data */
218 mfmsr r9
219 ori r11, r9, MSR_DR /* Enable paging for data */
220 mtmsr r11
221 sync
222 /* 2) fetch the instruction */
223 lwz r0, 0(r3)
224 /* 3) disable paging again */
225 mtmsr r9
226 sync
227
228#endif
229 stw r0, (SHADOW_VCPU_OFF + SVCPU_LAST_INST)(r13)
230
231no_ld_last_inst:
232
233 /* Unset guest mode */
234 li r9, KVM_GUEST_MODE_NONE
235 stb r9, (SHADOW_VCPU_OFF + SVCPU_IN_GUEST)(r13)
236
237 /* Switch back to host MMU */
238 LOAD_HOST_SEGMENTS
239
240 /* Register usage at this point:
241 *
242 * R1 = host R1
243 * R2 = host R2
244 * R12 = exit handler id
245 * R13 = shadow vcpu - SHADOW_VCPU_OFF [=PACA on PPC64]
246 * SVCPU.* = guest *
247 *
248 */
249
250 /* RFI into the highmem handler */
251 mfmsr r7
252 ori r7, r7, MSR_IR|MSR_DR|MSR_RI|MSR_ME /* Enable paging */
253 mtsrr1 r7
254 /* Load highmem handler address */
255 PPC_LL r8, (SHADOW_VCPU_OFF + SVCPU_VMHANDLER)(r13)
256 mtsrr0 r8
257
258 RFI
259kvmppc_handler_trampoline_exit_end:
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 2a3a1953d4bd..a33ab8cc2ccc 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -133,6 +133,12 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
133 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL); 133 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL);
134} 134}
135 135
136void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
137 struct kvm_interrupt *irq)
138{
139 clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
140}
141
136/* Deliver the interrupt of the corresponding priority, if possible. */ 142/* Deliver the interrupt of the corresponding priority, if possible. */
137static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, 143static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
138 unsigned int priority) 144 unsigned int priority)
@@ -479,6 +485,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
479{ 485{
480 int i; 486 int i;
481 487
488 vcpu_load(vcpu);
489
482 regs->pc = vcpu->arch.pc; 490 regs->pc = vcpu->arch.pc;
483 regs->cr = kvmppc_get_cr(vcpu); 491 regs->cr = kvmppc_get_cr(vcpu);
484 regs->ctr = vcpu->arch.ctr; 492 regs->ctr = vcpu->arch.ctr;
@@ -499,6 +507,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
499 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 507 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
500 regs->gpr[i] = kvmppc_get_gpr(vcpu, i); 508 regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
501 509
510 vcpu_put(vcpu);
511
502 return 0; 512 return 0;
503} 513}
504 514
@@ -506,6 +516,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
506{ 516{
507 int i; 517 int i;
508 518
519 vcpu_load(vcpu);
520
509 vcpu->arch.pc = regs->pc; 521 vcpu->arch.pc = regs->pc;
510 kvmppc_set_cr(vcpu, regs->cr); 522 kvmppc_set_cr(vcpu, regs->cr);
511 vcpu->arch.ctr = regs->ctr; 523 vcpu->arch.ctr = regs->ctr;
@@ -525,6 +537,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
525 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 537 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
526 kvmppc_set_gpr(vcpu, i, regs->gpr[i]); 538 kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
527 539
540 vcpu_put(vcpu);
541
528 return 0; 542 return 0;
529} 543}
530 544
@@ -553,7 +567,12 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
553int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 567int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
554 struct kvm_translation *tr) 568 struct kvm_translation *tr)
555{ 569{
556 return kvmppc_core_vcpu_translate(vcpu, tr); 570 int r;
571
572 vcpu_load(vcpu);
573 r = kvmppc_core_vcpu_translate(vcpu, tr);
574 vcpu_put(vcpu);
575 return r;
557} 576}
558 577
559int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) 578int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 669a5c5fc7d7..bc2b4004eb26 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -161,7 +161,7 @@ static int __init kvmppc_e500_init(void)
161 flush_icache_range(kvmppc_booke_handlers, 161 flush_icache_range(kvmppc_booke_handlers,
162 kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); 162 kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
163 163
164 return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), THIS_MODULE); 164 return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
165} 165}
166 166
167static void __init kvmppc_e500_exit(void) 167static void __init kvmppc_e500_exit(void)
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index cb72a65f4ecc..4568ec386c2a 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -38,10 +38,12 @@
38#define OP_31_XOP_LBZX 87 38#define OP_31_XOP_LBZX 87
39#define OP_31_XOP_STWX 151 39#define OP_31_XOP_STWX 151
40#define OP_31_XOP_STBX 215 40#define OP_31_XOP_STBX 215
41#define OP_31_XOP_LBZUX 119
41#define OP_31_XOP_STBUX 247 42#define OP_31_XOP_STBUX 247
42#define OP_31_XOP_LHZX 279 43#define OP_31_XOP_LHZX 279
43#define OP_31_XOP_LHZUX 311 44#define OP_31_XOP_LHZUX 311
44#define OP_31_XOP_MFSPR 339 45#define OP_31_XOP_MFSPR 339
46#define OP_31_XOP_LHAX 343
45#define OP_31_XOP_STHX 407 47#define OP_31_XOP_STHX 407
46#define OP_31_XOP_STHUX 439 48#define OP_31_XOP_STHUX 439
47#define OP_31_XOP_MTSPR 467 49#define OP_31_XOP_MTSPR 467
@@ -62,10 +64,12 @@
62#define OP_STBU 39 64#define OP_STBU 39
63#define OP_LHZ 40 65#define OP_LHZ 40
64#define OP_LHZU 41 66#define OP_LHZU 41
67#define OP_LHA 42
68#define OP_LHAU 43
65#define OP_STH 44 69#define OP_STH 44
66#define OP_STHU 45 70#define OP_STHU 45
67 71
68#ifdef CONFIG_PPC64 72#ifdef CONFIG_PPC_BOOK3S
69static int kvmppc_dec_enabled(struct kvm_vcpu *vcpu) 73static int kvmppc_dec_enabled(struct kvm_vcpu *vcpu)
70{ 74{
71 return 1; 75 return 1;
@@ -82,7 +86,7 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
82 unsigned long dec_nsec; 86 unsigned long dec_nsec;
83 87
84 pr_debug("mtDEC: %x\n", vcpu->arch.dec); 88 pr_debug("mtDEC: %x\n", vcpu->arch.dec);
85#ifdef CONFIG_PPC64 89#ifdef CONFIG_PPC_BOOK3S
86 /* mtdec lowers the interrupt line when positive. */ 90 /* mtdec lowers the interrupt line when positive. */
87 kvmppc_core_dequeue_dec(vcpu); 91 kvmppc_core_dequeue_dec(vcpu);
88 92
@@ -128,7 +132,7 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
128 * from opcode tables in the future. */ 132 * from opcode tables in the future. */
129int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) 133int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
130{ 134{
131 u32 inst = vcpu->arch.last_inst; 135 u32 inst = kvmppc_get_last_inst(vcpu);
132 u32 ea; 136 u32 ea;
133 int ra; 137 int ra;
134 int rb; 138 int rb;
@@ -143,13 +147,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
143 147
144 pr_debug(KERN_INFO "Emulating opcode %d / %d\n", get_op(inst), get_xop(inst)); 148 pr_debug(KERN_INFO "Emulating opcode %d / %d\n", get_op(inst), get_xop(inst));
145 149
146 /* Try again next time */
147 if (inst == KVM_INST_FETCH_FAILED)
148 return EMULATE_DONE;
149
150 switch (get_op(inst)) { 150 switch (get_op(inst)) {
151 case OP_TRAP: 151 case OP_TRAP:
152#ifdef CONFIG_PPC64 152#ifdef CONFIG_PPC_BOOK3S
153 case OP_TRAP_64: 153 case OP_TRAP_64:
154 kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP); 154 kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP);
155#else 155#else
@@ -171,6 +171,19 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
171 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); 171 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
172 break; 172 break;
173 173
174 case OP_31_XOP_LBZUX:
175 rt = get_rt(inst);
176 ra = get_ra(inst);
177 rb = get_rb(inst);
178
179 ea = kvmppc_get_gpr(vcpu, rb);
180 if (ra)
181 ea += kvmppc_get_gpr(vcpu, ra);
182
183 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
184 kvmppc_set_gpr(vcpu, ra, ea);
185 break;
186
174 case OP_31_XOP_STWX: 187 case OP_31_XOP_STWX:
175 rs = get_rs(inst); 188 rs = get_rs(inst);
176 emulated = kvmppc_handle_store(run, vcpu, 189 emulated = kvmppc_handle_store(run, vcpu,
@@ -200,6 +213,11 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
200 kvmppc_set_gpr(vcpu, rs, ea); 213 kvmppc_set_gpr(vcpu, rs, ea);
201 break; 214 break;
202 215
216 case OP_31_XOP_LHAX:
217 rt = get_rt(inst);
218 emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
219 break;
220
203 case OP_31_XOP_LHZX: 221 case OP_31_XOP_LHZX:
204 rt = get_rt(inst); 222 rt = get_rt(inst);
205 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); 223 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
@@ -450,6 +468,18 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
450 kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); 468 kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
451 break; 469 break;
452 470
471 case OP_LHA:
472 rt = get_rt(inst);
473 emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
474 break;
475
476 case OP_LHAU:
477 ra = get_ra(inst);
478 rt = get_rt(inst);
479 emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
480 kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
481 break;
482
453 case OP_STH: 483 case OP_STH:
454 rs = get_rs(inst); 484 rs = get_rs(inst);
455 emulated = kvmppc_handle_store(run, vcpu, 485 emulated = kvmppc_handle_store(run, vcpu,
@@ -472,7 +502,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
472 502
473 if (emulated == EMULATE_FAIL) { 503 if (emulated == EMULATE_FAIL) {
474 emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance); 504 emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance);
475 if (emulated == EMULATE_FAIL) { 505 if (emulated == EMULATE_AGAIN) {
506 advance = 0;
507 } else if (emulated == EMULATE_FAIL) {
476 advance = 0; 508 advance = 0;
477 printk(KERN_ERR "Couldn't emulate instruction 0x%08x " 509 printk(KERN_ERR "Couldn't emulate instruction 0x%08x "
478 "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst)); 510 "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst));
@@ -480,10 +512,11 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
480 } 512 }
481 } 513 }
482 514
483 trace_kvm_ppc_instr(inst, vcpu->arch.pc, emulated); 515 trace_kvm_ppc_instr(inst, kvmppc_get_pc(vcpu), emulated);
484 516
517 /* Advance past emulated instruction. */
485 if (advance) 518 if (advance)
486 vcpu->arch.pc += 4; /* Advance past emulated instruction. */ 519 kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4);
487 520
488 return emulated; 521 return emulated;
489} 522}
diff --git a/arch/powerpc/kvm/fpu.S b/arch/powerpc/kvm/fpu.S
new file mode 100644
index 000000000000..2b340a3eee90
--- /dev/null
+++ b/arch/powerpc/kvm/fpu.S
@@ -0,0 +1,273 @@
1/*
2 * FPU helper code to use FPU operations from inside the kernel
3 *
4 * Copyright (C) 2010 Alexander Graf (agraf@suse.de)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <asm/reg.h>
14#include <asm/page.h>
15#include <asm/mmu.h>
16#include <asm/pgtable.h>
17#include <asm/cputable.h>
18#include <asm/cache.h>
19#include <asm/thread_info.h>
20#include <asm/ppc_asm.h>
21#include <asm/asm-offsets.h>
22
23/* Instructions operating on single parameters */
24
25/*
26 * Single operation with one input operand
27 *
28 * R3 = (double*)&fpscr
29 * R4 = (short*)&result
30 * R5 = (short*)&param1
31 */
32#define FPS_ONE_IN(name) \
33_GLOBAL(fps_ ## name); \
34 lfd 0,0(r3); /* load up fpscr value */ \
35 MTFSF_L(0); \
36 lfs 0,0(r5); \
37 \
38 name 0,0; \
39 \
40 stfs 0,0(r4); \
41 mffs 0; \
42 stfd 0,0(r3); /* save new fpscr value */ \
43 blr
44
45/*
46 * Single operation with two input operands
47 *
48 * R3 = (double*)&fpscr
49 * R4 = (short*)&result
50 * R5 = (short*)&param1
51 * R6 = (short*)&param2
52 */
53#define FPS_TWO_IN(name) \
54_GLOBAL(fps_ ## name); \
55 lfd 0,0(r3); /* load up fpscr value */ \
56 MTFSF_L(0); \
57 lfs 0,0(r5); \
58 lfs 1,0(r6); \
59 \
60 name 0,0,1; \
61 \
62 stfs 0,0(r4); \
63 mffs 0; \
64 stfd 0,0(r3); /* save new fpscr value */ \
65 blr
66
67/*
68 * Single operation with three input operands
69 *
70 * R3 = (double*)&fpscr
71 * R4 = (short*)&result
72 * R5 = (short*)&param1
73 * R6 = (short*)&param2
74 * R7 = (short*)&param3
75 */
76#define FPS_THREE_IN(name) \
77_GLOBAL(fps_ ## name); \
78 lfd 0,0(r3); /* load up fpscr value */ \
79 MTFSF_L(0); \
80 lfs 0,0(r5); \
81 lfs 1,0(r6); \
82 lfs 2,0(r7); \
83 \
84 name 0,0,1,2; \
85 \
86 stfs 0,0(r4); \
87 mffs 0; \
88 stfd 0,0(r3); /* save new fpscr value */ \
89 blr
90
91FPS_ONE_IN(fres)
92FPS_ONE_IN(frsqrte)
93FPS_ONE_IN(fsqrts)
94FPS_TWO_IN(fadds)
95FPS_TWO_IN(fdivs)
96FPS_TWO_IN(fmuls)
97FPS_TWO_IN(fsubs)
98FPS_THREE_IN(fmadds)
99FPS_THREE_IN(fmsubs)
100FPS_THREE_IN(fnmadds)
101FPS_THREE_IN(fnmsubs)
102FPS_THREE_IN(fsel)
103
104
105/* Instructions operating on double parameters */
106
107/*
108 * Beginning of double instruction processing
109 *
110 * R3 = (double*)&fpscr
111 * R4 = (u32*)&cr
112 * R5 = (double*)&result
113 * R6 = (double*)&param1
114 * R7 = (double*)&param2 [load_two]
115 * R8 = (double*)&param3 [load_three]
116 * LR = instruction call function
117 */
118fpd_load_three:
119 lfd 2,0(r8) /* load param3 */
120fpd_load_two:
121 lfd 1,0(r7) /* load param2 */
122fpd_load_one:
123 lfd 0,0(r6) /* load param1 */
124fpd_load_none:
125 lfd 3,0(r3) /* load up fpscr value */
126 MTFSF_L(3)
127 lwz r6, 0(r4) /* load cr */
128 mtcr r6
129 blr
130
131/*
132 * End of double instruction processing
133 *
134 * R3 = (double*)&fpscr
135 * R4 = (u32*)&cr
136 * R5 = (double*)&result
137 * LR = caller of instruction call function
138 */
139fpd_return:
140 mfcr r6
141 stfd 0,0(r5) /* save result */
142 mffs 0
143 stfd 0,0(r3) /* save new fpscr value */
144 stw r6,0(r4) /* save new cr value */
145 blr
146
147/*
148 * Double operation with no input operand
149 *
150 * R3 = (double*)&fpscr
151 * R4 = (u32*)&cr
152 * R5 = (double*)&result
153 */
154#define FPD_NONE_IN(name) \
155_GLOBAL(fpd_ ## name); \
156 mflr r12; \
157 bl fpd_load_none; \
158 mtlr r12; \
159 \
160 name. 0; /* call instruction */ \
161 b fpd_return
162
163/*
164 * Double operation with one input operand
165 *
166 * R3 = (double*)&fpscr
167 * R4 = (u32*)&cr
168 * R5 = (double*)&result
169 * R6 = (double*)&param1
170 */
171#define FPD_ONE_IN(name) \
172_GLOBAL(fpd_ ## name); \
173 mflr r12; \
174 bl fpd_load_one; \
175 mtlr r12; \
176 \
177 name. 0,0; /* call instruction */ \
178 b fpd_return
179
180/*
181 * Double operation with two input operands
182 *
183 * R3 = (double*)&fpscr
184 * R4 = (u32*)&cr
185 * R5 = (double*)&result
186 * R6 = (double*)&param1
187 * R7 = (double*)&param2
188 * R8 = (double*)&param3
189 */
190#define FPD_TWO_IN(name) \
191_GLOBAL(fpd_ ## name); \
192 mflr r12; \
193 bl fpd_load_two; \
194 mtlr r12; \
195 \
196 name. 0,0,1; /* call instruction */ \
197 b fpd_return
198
199/*
200 * CR Double operation with two input operands
201 *
202 * R3 = (double*)&fpscr
203 * R4 = (u32*)&cr
204 * R5 = (double*)&param1
205 * R6 = (double*)&param2
206 * R7 = (double*)&param3
207 */
208#define FPD_TWO_IN_CR(name) \
209_GLOBAL(fpd_ ## name); \
210 lfd 1,0(r6); /* load param2 */ \
211 lfd 0,0(r5); /* load param1 */ \
212 lfd 3,0(r3); /* load up fpscr value */ \
213 MTFSF_L(3); \
214 lwz r6, 0(r4); /* load cr */ \
215 mtcr r6; \
216 \
217 name 0,0,1; /* call instruction */ \
218 mfcr r6; \
219 mffs 0; \
220 stfd 0,0(r3); /* save new fpscr value */ \
221 stw r6,0(r4); /* save new cr value */ \
222 blr
223
224/*
225 * Double operation with three input operands
226 *
227 * R3 = (double*)&fpscr
228 * R4 = (u32*)&cr
229 * R5 = (double*)&result
230 * R6 = (double*)&param1
231 * R7 = (double*)&param2
232 * R8 = (double*)&param3
233 */
234#define FPD_THREE_IN(name) \
235_GLOBAL(fpd_ ## name); \
236 mflr r12; \
237 bl fpd_load_three; \
238 mtlr r12; \
239 \
240 name. 0,0,1,2; /* call instruction */ \
241 b fpd_return
242
243FPD_ONE_IN(fsqrts)
244FPD_ONE_IN(frsqrtes)
245FPD_ONE_IN(fres)
246FPD_ONE_IN(frsp)
247FPD_ONE_IN(fctiw)
248FPD_ONE_IN(fctiwz)
249FPD_ONE_IN(fsqrt)
250FPD_ONE_IN(fre)
251FPD_ONE_IN(frsqrte)
252FPD_ONE_IN(fneg)
253FPD_ONE_IN(fabs)
254FPD_TWO_IN(fadds)
255FPD_TWO_IN(fsubs)
256FPD_TWO_IN(fdivs)
257FPD_TWO_IN(fmuls)
258FPD_TWO_IN_CR(fcmpu)
259FPD_TWO_IN(fcpsgn)
260FPD_TWO_IN(fdiv)
261FPD_TWO_IN(fadd)
262FPD_TWO_IN(fmul)
263FPD_TWO_IN_CR(fcmpo)
264FPD_TWO_IN(fsub)
265FPD_THREE_IN(fmsubs)
266FPD_THREE_IN(fmadds)
267FPD_THREE_IN(fnmsubs)
268FPD_THREE_IN(fnmadds)
269FPD_THREE_IN(fsel)
270FPD_THREE_IN(fmsub)
271FPD_THREE_IN(fmadd)
272FPD_THREE_IN(fnmsub)
273FPD_THREE_IN(fnmadd)
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 297fcd2ff7d0..9b8683f39e05 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -70,7 +70,7 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
70 case EMULATE_FAIL: 70 case EMULATE_FAIL:
71 /* XXX Deliver Program interrupt to guest. */ 71 /* XXX Deliver Program interrupt to guest. */
72 printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__, 72 printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__,
73 vcpu->arch.last_inst); 73 kvmppc_get_last_inst(vcpu));
74 r = RESUME_HOST; 74 r = RESUME_HOST;
75 break; 75 break;
76 default: 76 default:
@@ -148,6 +148,10 @@ int kvm_dev_ioctl_check_extension(long ext)
148 148
149 switch (ext) { 149 switch (ext) {
150 case KVM_CAP_PPC_SEGSTATE: 150 case KVM_CAP_PPC_SEGSTATE:
151 case KVM_CAP_PPC_PAIRED_SINGLES:
152 case KVM_CAP_PPC_UNSET_IRQ:
153 case KVM_CAP_ENABLE_CAP:
154 case KVM_CAP_PPC_OSI:
151 r = 1; 155 r = 1;
152 break; 156 break;
153 case KVM_CAP_COALESCED_MMIO: 157 case KVM_CAP_COALESCED_MMIO:
@@ -193,12 +197,17 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
193{ 197{
194 struct kvm_vcpu *vcpu; 198 struct kvm_vcpu *vcpu;
195 vcpu = kvmppc_core_vcpu_create(kvm, id); 199 vcpu = kvmppc_core_vcpu_create(kvm, id);
196 kvmppc_create_vcpu_debugfs(vcpu, id); 200 if (!IS_ERR(vcpu))
201 kvmppc_create_vcpu_debugfs(vcpu, id);
197 return vcpu; 202 return vcpu;
198} 203}
199 204
200void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) 205void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
201{ 206{
207 /* Make sure we're not using the vcpu anymore */
208 hrtimer_cancel(&vcpu->arch.dec_timer);
209 tasklet_kill(&vcpu->arch.tasklet);
210
202 kvmppc_remove_vcpu_debugfs(vcpu); 211 kvmppc_remove_vcpu_debugfs(vcpu);
203 kvmppc_core_vcpu_free(vcpu); 212 kvmppc_core_vcpu_free(vcpu);
204} 213}
@@ -278,7 +287,7 @@ static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
278static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, 287static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
279 struct kvm_run *run) 288 struct kvm_run *run)
280{ 289{
281 ulong gpr; 290 u64 gpr;
282 291
283 if (run->mmio.len > sizeof(gpr)) { 292 if (run->mmio.len > sizeof(gpr)) {
284 printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); 293 printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len);
@@ -287,6 +296,7 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
287 296
288 if (vcpu->arch.mmio_is_bigendian) { 297 if (vcpu->arch.mmio_is_bigendian) {
289 switch (run->mmio.len) { 298 switch (run->mmio.len) {
299 case 8: gpr = *(u64 *)run->mmio.data; break;
290 case 4: gpr = *(u32 *)run->mmio.data; break; 300 case 4: gpr = *(u32 *)run->mmio.data; break;
291 case 2: gpr = *(u16 *)run->mmio.data; break; 301 case 2: gpr = *(u16 *)run->mmio.data; break;
292 case 1: gpr = *(u8 *)run->mmio.data; break; 302 case 1: gpr = *(u8 *)run->mmio.data; break;
@@ -300,7 +310,43 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
300 } 310 }
301 } 311 }
302 312
313 if (vcpu->arch.mmio_sign_extend) {
314 switch (run->mmio.len) {
315#ifdef CONFIG_PPC64
316 case 4:
317 gpr = (s64)(s32)gpr;
318 break;
319#endif
320 case 2:
321 gpr = (s64)(s16)gpr;
322 break;
323 case 1:
324 gpr = (s64)(s8)gpr;
325 break;
326 }
327 }
328
303 kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); 329 kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
330
331 switch (vcpu->arch.io_gpr & KVM_REG_EXT_MASK) {
332 case KVM_REG_GPR:
333 kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
334 break;
335 case KVM_REG_FPR:
336 vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr;
337 break;
338#ifdef CONFIG_PPC_BOOK3S
339 case KVM_REG_QPR:
340 vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr;
341 break;
342 case KVM_REG_FQPR:
343 vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr;
344 vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr;
345 break;
346#endif
347 default:
348 BUG();
349 }
304} 350}
305 351
306int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, 352int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
@@ -319,12 +365,25 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
319 vcpu->arch.mmio_is_bigendian = is_bigendian; 365 vcpu->arch.mmio_is_bigendian = is_bigendian;
320 vcpu->mmio_needed = 1; 366 vcpu->mmio_needed = 1;
321 vcpu->mmio_is_write = 0; 367 vcpu->mmio_is_write = 0;
368 vcpu->arch.mmio_sign_extend = 0;
322 369
323 return EMULATE_DO_MMIO; 370 return EMULATE_DO_MMIO;
324} 371}
325 372
373/* Same as above, but sign extends */
374int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
375 unsigned int rt, unsigned int bytes, int is_bigendian)
376{
377 int r;
378
379 r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian);
380 vcpu->arch.mmio_sign_extend = 1;
381
382 return r;
383}
384
326int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, 385int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
327 u32 val, unsigned int bytes, int is_bigendian) 386 u64 val, unsigned int bytes, int is_bigendian)
328{ 387{
329 void *data = run->mmio.data; 388 void *data = run->mmio.data;
330 389
@@ -342,6 +401,7 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
342 /* Store the value at the lowest bytes in 'data'. */ 401 /* Store the value at the lowest bytes in 'data'. */
343 if (is_bigendian) { 402 if (is_bigendian) {
344 switch (bytes) { 403 switch (bytes) {
404 case 8: *(u64 *)data = val; break;
345 case 4: *(u32 *)data = val; break; 405 case 4: *(u32 *)data = val; break;
346 case 2: *(u16 *)data = val; break; 406 case 2: *(u16 *)data = val; break;
347 case 1: *(u8 *)data = val; break; 407 case 1: *(u8 *)data = val; break;
@@ -376,6 +436,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
376 if (!vcpu->arch.dcr_is_write) 436 if (!vcpu->arch.dcr_is_write)
377 kvmppc_complete_dcr_load(vcpu, run); 437 kvmppc_complete_dcr_load(vcpu, run);
378 vcpu->arch.dcr_needed = 0; 438 vcpu->arch.dcr_needed = 0;
439 } else if (vcpu->arch.osi_needed) {
440 u64 *gprs = run->osi.gprs;
441 int i;
442
443 for (i = 0; i < 32; i++)
444 kvmppc_set_gpr(vcpu, i, gprs[i]);
445 vcpu->arch.osi_needed = 0;
379 } 446 }
380 447
381 kvmppc_core_deliver_interrupts(vcpu); 448 kvmppc_core_deliver_interrupts(vcpu);
@@ -396,7 +463,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
396 463
397int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) 464int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
398{ 465{
399 kvmppc_core_queue_external(vcpu, irq); 466 if (irq->irq == KVM_INTERRUPT_UNSET)
467 kvmppc_core_dequeue_external(vcpu, irq);
468 else
469 kvmppc_core_queue_external(vcpu, irq);
400 470
401 if (waitqueue_active(&vcpu->wq)) { 471 if (waitqueue_active(&vcpu->wq)) {
402 wake_up_interruptible(&vcpu->wq); 472 wake_up_interruptible(&vcpu->wq);
@@ -406,6 +476,27 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
406 return 0; 476 return 0;
407} 477}
408 478
479static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
480 struct kvm_enable_cap *cap)
481{
482 int r;
483
484 if (cap->flags)
485 return -EINVAL;
486
487 switch (cap->cap) {
488 case KVM_CAP_PPC_OSI:
489 r = 0;
490 vcpu->arch.osi_enabled = true;
491 break;
492 default:
493 r = -EINVAL;
494 break;
495 }
496
497 return r;
498}
499
409int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 500int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
410 struct kvm_mp_state *mp_state) 501 struct kvm_mp_state *mp_state)
411{ 502{
@@ -434,6 +525,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
434 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); 525 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
435 break; 526 break;
436 } 527 }
528 case KVM_ENABLE_CAP:
529 {
530 struct kvm_enable_cap cap;
531 r = -EFAULT;
532 if (copy_from_user(&cap, argp, sizeof(cap)))
533 goto out;
534 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
535 break;
536 }
437 default: 537 default:
438 r = -EINVAL; 538 r = -EINVAL;
439 } 539 }
diff --git a/arch/powerpc/mm/mmu_context_hash32.c b/arch/powerpc/mm/mmu_context_hash32.c
index 0dfba2bf7f31..d0ee554e86e4 100644
--- a/arch/powerpc/mm/mmu_context_hash32.c
+++ b/arch/powerpc/mm/mmu_context_hash32.c
@@ -60,11 +60,7 @@
60static unsigned long next_mmu_context; 60static unsigned long next_mmu_context;
61static unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1]; 61static unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1];
62 62
63 63unsigned long __init_new_context(void)
64/*
65 * Set up the context for a new address space.
66 */
67int init_new_context(struct task_struct *t, struct mm_struct *mm)
68{ 64{
69 unsigned long ctx = next_mmu_context; 65 unsigned long ctx = next_mmu_context;
70 66
@@ -74,19 +70,38 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm)
74 ctx = 0; 70 ctx = 0;
75 } 71 }
76 next_mmu_context = (ctx + 1) & LAST_CONTEXT; 72 next_mmu_context = (ctx + 1) & LAST_CONTEXT;
77 mm->context.id = ctx; 73
74 return ctx;
75}
76EXPORT_SYMBOL_GPL(__init_new_context);
77
78/*
79 * Set up the context for a new address space.
80 */
81int init_new_context(struct task_struct *t, struct mm_struct *mm)
82{
83 mm->context.id = __init_new_context();
78 84
79 return 0; 85 return 0;
80} 86}
81 87
82/* 88/*
89 * Free a context ID. Make sure to call this with preempt disabled!
90 */
91void __destroy_context(unsigned long ctx)
92{
93 clear_bit(ctx, context_map);
94}
95EXPORT_SYMBOL_GPL(__destroy_context);
96
97/*
83 * We're finished using the context for an address space. 98 * We're finished using the context for an address space.
84 */ 99 */
85void destroy_context(struct mm_struct *mm) 100void destroy_context(struct mm_struct *mm)
86{ 101{
87 preempt_disable(); 102 preempt_disable();
88 if (mm->context.id != NO_CONTEXT) { 103 if (mm->context.id != NO_CONTEXT) {
89 clear_bit(mm->context.id, context_map); 104 __destroy_context(mm->context.id);
90 mm->context.id = NO_CONTEXT; 105 mm->context.id = NO_CONTEXT;
91 } 106 }
92 preempt_enable(); 107 preempt_enable();
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 49292869a5cd..8093e6f47f49 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -341,11 +341,13 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
341 341
342 rc = kvm_vcpu_init(vcpu, kvm, id); 342 rc = kvm_vcpu_init(vcpu, kvm, id);
343 if (rc) 343 if (rc)
344 goto out_free_cpu; 344 goto out_free_sie_block;
345 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, 345 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
346 vcpu->arch.sie_block); 346 vcpu->arch.sie_block);
347 347
348 return vcpu; 348 return vcpu;
349out_free_sie_block:
350 free_page((unsigned long)(vcpu->arch.sie_block));
349out_free_cpu: 351out_free_cpu:
350 kfree(vcpu); 352 kfree(vcpu);
351out_nomem: 353out_nomem:
@@ -750,7 +752,7 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
750static int __init kvm_s390_init(void) 752static int __init kvm_s390_init(void)
751{ 753{
752 int ret; 754 int ret;
753 ret = kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE); 755 ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
754 if (ret) 756 if (ret)
755 return ret; 757 return ret;
756 758
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 60f09ab3672c..cfa9d1777457 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -72,7 +72,7 @@ static inline void kvm_s390_vcpu_set_mem(struct kvm_vcpu *vcpu)
72 struct kvm_memslots *memslots; 72 struct kvm_memslots *memslots;
73 73
74 idx = srcu_read_lock(&vcpu->kvm->srcu); 74 idx = srcu_read_lock(&vcpu->kvm->srcu);
75 memslots = rcu_dereference(vcpu->kvm->memslots); 75 memslots = kvm_memslots(vcpu->kvm);
76 76
77 mem = &memslots->memslots[0]; 77 mem = &memslots->memslots[0];
78 78
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index f46b79f6c16c..ff90055c7f0b 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -21,6 +21,7 @@
21#define __KVM_HAVE_PIT_STATE2 21#define __KVM_HAVE_PIT_STATE2
22#define __KVM_HAVE_XEN_HVM 22#define __KVM_HAVE_XEN_HVM
23#define __KVM_HAVE_VCPU_EVENTS 23#define __KVM_HAVE_VCPU_EVENTS
24#define __KVM_HAVE_DEBUGREGS
24 25
25/* Architectural interrupt line count. */ 26/* Architectural interrupt line count. */
26#define KVM_NR_INTERRUPTS 256 27#define KVM_NR_INTERRUPTS 256
@@ -257,6 +258,11 @@ struct kvm_reinject_control {
257/* When set in flags, include corresponding fields on KVM_SET_VCPU_EVENTS */ 258/* When set in flags, include corresponding fields on KVM_SET_VCPU_EVENTS */
258#define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001 259#define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001
259#define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 260#define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002
261#define KVM_VCPUEVENT_VALID_SHADOW 0x00000004
262
263/* Interrupt shadow states */
264#define KVM_X86_SHADOW_INT_MOV_SS 0x01
265#define KVM_X86_SHADOW_INT_STI 0x02
260 266
261/* for KVM_GET/SET_VCPU_EVENTS */ 267/* for KVM_GET/SET_VCPU_EVENTS */
262struct kvm_vcpu_events { 268struct kvm_vcpu_events {
@@ -271,7 +277,7 @@ struct kvm_vcpu_events {
271 __u8 injected; 277 __u8 injected;
272 __u8 nr; 278 __u8 nr;
273 __u8 soft; 279 __u8 soft;
274 __u8 pad; 280 __u8 shadow;
275 } interrupt; 281 } interrupt;
276 struct { 282 struct {
277 __u8 injected; 283 __u8 injected;
@@ -284,4 +290,13 @@ struct kvm_vcpu_events {
284 __u32 reserved[10]; 290 __u32 reserved[10];
285}; 291};
286 292
293/* for KVM_GET/SET_DEBUGREGS */
294struct kvm_debugregs {
295 __u64 db[4];
296 __u64 dr6;
297 __u64 dr7;
298 __u64 flags;
299 __u64 reserved[9];
300};
301
287#endif /* _ASM_X86_KVM_H */ 302#endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 7a6f54fa13ba..0b2729bf2070 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -11,6 +11,8 @@
11#ifndef _ASM_X86_KVM_X86_EMULATE_H 11#ifndef _ASM_X86_KVM_X86_EMULATE_H
12#define _ASM_X86_KVM_X86_EMULATE_H 12#define _ASM_X86_KVM_X86_EMULATE_H
13 13
14#include <asm/desc_defs.h>
15
14struct x86_emulate_ctxt; 16struct x86_emulate_ctxt;
15 17
16/* 18/*
@@ -63,6 +65,15 @@ struct x86_emulate_ops {
63 unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); 65 unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error);
64 66
65 /* 67 /*
68 * write_std: Write bytes of standard (non-emulated/special) memory.
69 * Used for descriptor writing.
70 * @addr: [IN ] Linear address to which to write.
71 * @val: [OUT] Value write to memory, zero-extended to 'u_long'.
72 * @bytes: [IN ] Number of bytes to write to memory.
73 */
74 int (*write_std)(unsigned long addr, void *val,
75 unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error);
76 /*
66 * fetch: Read bytes of standard (non-emulated/special) memory. 77 * fetch: Read bytes of standard (non-emulated/special) memory.
67 * Used for instruction fetch. 78 * Used for instruction fetch.
68 * @addr: [IN ] Linear address from which to read. 79 * @addr: [IN ] Linear address from which to read.
@@ -109,6 +120,23 @@ struct x86_emulate_ops {
109 unsigned int bytes, 120 unsigned int bytes,
110 struct kvm_vcpu *vcpu); 121 struct kvm_vcpu *vcpu);
111 122
123 int (*pio_in_emulated)(int size, unsigned short port, void *val,
124 unsigned int count, struct kvm_vcpu *vcpu);
125
126 int (*pio_out_emulated)(int size, unsigned short port, const void *val,
127 unsigned int count, struct kvm_vcpu *vcpu);
128
129 bool (*get_cached_descriptor)(struct desc_struct *desc,
130 int seg, struct kvm_vcpu *vcpu);
131 void (*set_cached_descriptor)(struct desc_struct *desc,
132 int seg, struct kvm_vcpu *vcpu);
133 u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu);
134 void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu);
135 void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu);
136 ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu);
137 void (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu);
138 int (*cpl)(struct kvm_vcpu *vcpu);
139 void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
112}; 140};
113 141
114/* Type, address-of, and value of an instruction's operand. */ 142/* Type, address-of, and value of an instruction's operand. */
@@ -124,6 +152,12 @@ struct fetch_cache {
124 unsigned long end; 152 unsigned long end;
125}; 153};
126 154
155struct read_cache {
156 u8 data[1024];
157 unsigned long pos;
158 unsigned long end;
159};
160
127struct decode_cache { 161struct decode_cache {
128 u8 twobyte; 162 u8 twobyte;
129 u8 b; 163 u8 b;
@@ -139,7 +173,7 @@ struct decode_cache {
139 u8 seg_override; 173 u8 seg_override;
140 unsigned int d; 174 unsigned int d;
141 unsigned long regs[NR_VCPU_REGS]; 175 unsigned long regs[NR_VCPU_REGS];
142 unsigned long eip, eip_orig; 176 unsigned long eip;
143 /* modrm */ 177 /* modrm */
144 u8 modrm; 178 u8 modrm;
145 u8 modrm_mod; 179 u8 modrm_mod;
@@ -151,16 +185,15 @@ struct decode_cache {
151 void *modrm_ptr; 185 void *modrm_ptr;
152 unsigned long modrm_val; 186 unsigned long modrm_val;
153 struct fetch_cache fetch; 187 struct fetch_cache fetch;
188 struct read_cache io_read;
154}; 189};
155 190
156#define X86_SHADOW_INT_MOV_SS 1
157#define X86_SHADOW_INT_STI 2
158
159struct x86_emulate_ctxt { 191struct x86_emulate_ctxt {
160 /* Register state before/after emulation. */ 192 /* Register state before/after emulation. */
161 struct kvm_vcpu *vcpu; 193 struct kvm_vcpu *vcpu;
162 194
163 unsigned long eflags; 195 unsigned long eflags;
196 unsigned long eip; /* eip before instruction emulation */
164 /* Emulated execution mode, represented by an X86EMUL_MODE value. */ 197 /* Emulated execution mode, represented by an X86EMUL_MODE value. */
165 int mode; 198 int mode;
166 u32 cs_base; 199 u32 cs_base;
@@ -168,6 +201,7 @@ struct x86_emulate_ctxt {
168 /* interruptibility state, as a result of execution of STI or MOV SS */ 201 /* interruptibility state, as a result of execution of STI or MOV SS */
169 int interruptibility; 202 int interruptibility;
170 203
204 bool restart; /* restart string instruction after writeback */
171 /* decode cache */ 205 /* decode cache */
172 struct decode_cache decode; 206 struct decode_cache decode;
173}; 207};
@@ -194,5 +228,9 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt,
194 struct x86_emulate_ops *ops); 228 struct x86_emulate_ops *ops);
195int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, 229int x86_emulate_insn(struct x86_emulate_ctxt *ctxt,
196 struct x86_emulate_ops *ops); 230 struct x86_emulate_ops *ops);
231int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
232 struct x86_emulate_ops *ops,
233 u16 tss_selector, int reason,
234 bool has_error_code, u32 error_code);
197 235
198#endif /* _ASM_X86_KVM_X86_EMULATE_H */ 236#endif /* _ASM_X86_KVM_X86_EMULATE_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 06d9e79ca37d..76f5483cffec 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -171,15 +171,15 @@ struct kvm_pte_chain {
171union kvm_mmu_page_role { 171union kvm_mmu_page_role {
172 unsigned word; 172 unsigned word;
173 struct { 173 struct {
174 unsigned glevels:4;
175 unsigned level:4; 174 unsigned level:4;
175 unsigned cr4_pae:1;
176 unsigned quadrant:2; 176 unsigned quadrant:2;
177 unsigned pad_for_nice_hex_output:6; 177 unsigned pad_for_nice_hex_output:6;
178 unsigned direct:1; 178 unsigned direct:1;
179 unsigned access:3; 179 unsigned access:3;
180 unsigned invalid:1; 180 unsigned invalid:1;
181 unsigned cr4_pge:1;
182 unsigned nxe:1; 181 unsigned nxe:1;
182 unsigned cr0_wp:1;
183 }; 183 };
184}; 184};
185 185
@@ -187,8 +187,6 @@ struct kvm_mmu_page {
187 struct list_head link; 187 struct list_head link;
188 struct hlist_node hash_link; 188 struct hlist_node hash_link;
189 189
190 struct list_head oos_link;
191
192 /* 190 /*
193 * The following two entries are used to key the shadow page in the 191 * The following two entries are used to key the shadow page in the
194 * hash table. 192 * hash table.
@@ -204,9 +202,9 @@ struct kvm_mmu_page {
204 * in this shadow page. 202 * in this shadow page.
205 */ 203 */
206 DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); 204 DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
207 int multimapped; /* More than one parent_pte? */ 205 bool multimapped; /* More than one parent_pte? */
208 int root_count; /* Currently serving as active root */
209 bool unsync; 206 bool unsync;
207 int root_count; /* Currently serving as active root */
210 unsigned int unsync_children; 208 unsigned int unsync_children;
211 union { 209 union {
212 u64 *parent_pte; /* !multimapped */ 210 u64 *parent_pte; /* !multimapped */
@@ -224,14 +222,9 @@ struct kvm_pv_mmu_op_buffer {
224 222
225struct kvm_pio_request { 223struct kvm_pio_request {
226 unsigned long count; 224 unsigned long count;
227 int cur_count;
228 gva_t guest_gva;
229 int in; 225 int in;
230 int port; 226 int port;
231 int size; 227 int size;
232 int string;
233 int down;
234 int rep;
235}; 228};
236 229
237/* 230/*
@@ -320,6 +313,7 @@ struct kvm_vcpu_arch {
320 struct kvm_queued_exception { 313 struct kvm_queued_exception {
321 bool pending; 314 bool pending;
322 bool has_error_code; 315 bool has_error_code;
316 bool reinject;
323 u8 nr; 317 u8 nr;
324 u32 error_code; 318 u32 error_code;
325 } exception; 319 } exception;
@@ -362,8 +356,8 @@ struct kvm_vcpu_arch {
362 u64 *mce_banks; 356 u64 *mce_banks;
363 357
364 /* used for guest single stepping over the given code position */ 358 /* used for guest single stepping over the given code position */
365 u16 singlestep_cs;
366 unsigned long singlestep_rip; 359 unsigned long singlestep_rip;
360
367 /* fields used by HYPER-V emulation */ 361 /* fields used by HYPER-V emulation */
368 u64 hv_vapic; 362 u64 hv_vapic;
369}; 363};
@@ -389,6 +383,7 @@ struct kvm_arch {
389 unsigned int n_free_mmu_pages; 383 unsigned int n_free_mmu_pages;
390 unsigned int n_requested_mmu_pages; 384 unsigned int n_requested_mmu_pages;
391 unsigned int n_alloc_mmu_pages; 385 unsigned int n_alloc_mmu_pages;
386 atomic_t invlpg_counter;
392 struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; 387 struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
393 /* 388 /*
394 * Hash table of struct kvm_mmu_page. 389 * Hash table of struct kvm_mmu_page.
@@ -461,11 +456,6 @@ struct kvm_vcpu_stat {
461 u32 nmi_injections; 456 u32 nmi_injections;
462}; 457};
463 458
464struct descriptor_table {
465 u16 limit;
466 unsigned long base;
467} __attribute__((packed));
468
469struct kvm_x86_ops { 459struct kvm_x86_ops {
470 int (*cpu_has_kvm_support)(void); /* __init */ 460 int (*cpu_has_kvm_support)(void); /* __init */
471 int (*disabled_by_bios)(void); /* __init */ 461 int (*disabled_by_bios)(void); /* __init */
@@ -503,12 +493,11 @@ struct kvm_x86_ops {
503 void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); 493 void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
504 void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); 494 void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
505 void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); 495 void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
506 void (*get_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); 496 void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
507 void (*set_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); 497 void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
508 void (*get_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); 498 void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
509 void (*set_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); 499 void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
510 int (*get_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long *dest); 500 void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
511 int (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value);
512 void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); 501 void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
513 unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); 502 unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
514 void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); 503 void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
@@ -527,7 +516,8 @@ struct kvm_x86_ops {
527 void (*set_irq)(struct kvm_vcpu *vcpu); 516 void (*set_irq)(struct kvm_vcpu *vcpu);
528 void (*set_nmi)(struct kvm_vcpu *vcpu); 517 void (*set_nmi)(struct kvm_vcpu *vcpu);
529 void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, 518 void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
530 bool has_error_code, u32 error_code); 519 bool has_error_code, u32 error_code,
520 bool reinject);
531 int (*interrupt_allowed)(struct kvm_vcpu *vcpu); 521 int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
532 int (*nmi_allowed)(struct kvm_vcpu *vcpu); 522 int (*nmi_allowed)(struct kvm_vcpu *vcpu);
533 bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); 523 bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
@@ -541,6 +531,8 @@ struct kvm_x86_ops {
541 int (*get_lpage_level)(void); 531 int (*get_lpage_level)(void);
542 bool (*rdtscp_supported)(void); 532 bool (*rdtscp_supported)(void);
543 533
534 void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry);
535
544 const struct trace_print_flags *exit_reasons_str; 536 const struct trace_print_flags *exit_reasons_str;
545}; 537};
546 538
@@ -587,23 +579,14 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
587void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); 579void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context);
588void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); 580void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
589void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); 581void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
590void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
591 unsigned long *rflags);
592 582
593unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr);
594void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value,
595 unsigned long *rflags);
596void kvm_enable_efer_bits(u64); 583void kvm_enable_efer_bits(u64);
597int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); 584int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data);
598int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); 585int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
599 586
600struct x86_emulate_ctxt; 587struct x86_emulate_ctxt;
601 588
602int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, 589int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port);
603 int size, unsigned port);
604int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in,
605 int size, unsigned long count, int down,
606 gva_t address, int rep, unsigned port);
607void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); 590void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
608int kvm_emulate_halt(struct kvm_vcpu *vcpu); 591int kvm_emulate_halt(struct kvm_vcpu *vcpu);
609int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); 592int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address);
@@ -616,12 +599,15 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
616void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); 599void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
617int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); 600int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
618 601
619int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason); 602int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,
603 bool has_error_code, u32 error_code);
620 604
621void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); 605void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
622void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); 606void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
623void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); 607void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
624void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); 608void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8);
609int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val);
610int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val);
625unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); 611unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu);
626void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); 612void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
627void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); 613void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
@@ -634,6 +620,8 @@ void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
634 620
635void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); 621void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
636void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); 622void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
623void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr);
624void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
637void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, 625void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
638 u32 error_code); 626 u32 error_code);
639bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); 627bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
@@ -649,8 +637,6 @@ int emulator_write_emulated(unsigned long addr,
649 unsigned int bytes, 637 unsigned int bytes,
650 struct kvm_vcpu *vcpu); 638 struct kvm_vcpu *vcpu);
651 639
652unsigned long segment_base(u16 selector);
653
654void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); 640void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
655void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, 641void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
656 const u8 *new, int bytes, 642 const u8 *new, int bytes,
@@ -675,7 +661,6 @@ void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
675void kvm_enable_tdp(void); 661void kvm_enable_tdp(void);
676void kvm_disable_tdp(void); 662void kvm_disable_tdp(void);
677 663
678int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
679int complete_pio(struct kvm_vcpu *vcpu); 664int complete_pio(struct kvm_vcpu *vcpu);
680bool kvm_check_iopl(struct kvm_vcpu *vcpu); 665bool kvm_check_iopl(struct kvm_vcpu *vcpu);
681 666
@@ -724,23 +709,6 @@ static inline void kvm_load_ldt(u16 sel)
724 asm("lldt %0" : : "rm"(sel)); 709 asm("lldt %0" : : "rm"(sel));
725} 710}
726 711
727static inline void kvm_get_idt(struct descriptor_table *table)
728{
729 asm("sidt %0" : "=m"(*table));
730}
731
732static inline void kvm_get_gdt(struct descriptor_table *table)
733{
734 asm("sgdt %0" : "=m"(*table));
735}
736
737static inline unsigned long kvm_read_tr_base(void)
738{
739 u16 tr;
740 asm("str %0" : "=g"(tr));
741 return segment_base(tr);
742}
743
744#ifdef CONFIG_X86_64 712#ifdef CONFIG_X86_64
745static inline unsigned long read_msr(unsigned long msr) 713static inline unsigned long read_msr(unsigned long msr)
746{ 714{
@@ -826,4 +794,6 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
826void kvm_define_shared_msr(unsigned index, u32 msr); 794void kvm_define_shared_msr(unsigned index, u32 msr);
827void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); 795void kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
828 796
797bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
798
829#endif /* _ASM_X86_KVM_HOST_H */ 799#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index ffae1420e7d7..05eba5e9a8e8 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -16,10 +16,23 @@
16#define KVM_FEATURE_CLOCKSOURCE 0 16#define KVM_FEATURE_CLOCKSOURCE 0
17#define KVM_FEATURE_NOP_IO_DELAY 1 17#define KVM_FEATURE_NOP_IO_DELAY 1
18#define KVM_FEATURE_MMU_OP 2 18#define KVM_FEATURE_MMU_OP 2
19/* This indicates that the new set of kvmclock msrs
20 * are available. The use of 0x11 and 0x12 is deprecated
21 */
22#define KVM_FEATURE_CLOCKSOURCE2 3
23
24/* The last 8 bits are used to indicate how to interpret the flags field
25 * in pvclock structure. If no bits are set, all flags are ignored.
26 */
27#define KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 24
19 28
20#define MSR_KVM_WALL_CLOCK 0x11 29#define MSR_KVM_WALL_CLOCK 0x11
21#define MSR_KVM_SYSTEM_TIME 0x12 30#define MSR_KVM_SYSTEM_TIME 0x12
22 31
32/* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */
33#define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00
34#define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
35
23#define KVM_MAX_MMU_OP_BATCH 32 36#define KVM_MAX_MMU_OP_BATCH 32
24 37
25/* Operations for KVM_HC_MMU_OP */ 38/* Operations for KVM_HC_MMU_OP */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index bc473acfa7f9..f9324851eba0 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -202,8 +202,9 @@
202#define MSR_IA32_EBL_CR_POWERON 0x0000002a 202#define MSR_IA32_EBL_CR_POWERON 0x0000002a
203#define MSR_IA32_FEATURE_CONTROL 0x0000003a 203#define MSR_IA32_FEATURE_CONTROL 0x0000003a
204 204
205#define FEATURE_CONTROL_LOCKED (1<<0) 205#define FEATURE_CONTROL_LOCKED (1<<0)
206#define FEATURE_CONTROL_VMXON_ENABLED (1<<2) 206#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1)
207#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2)
207 208
208#define MSR_IA32_APICBASE 0x0000001b 209#define MSR_IA32_APICBASE 0x0000001b
209#define MSR_IA32_APICBASE_BSP (1<<8) 210#define MSR_IA32_APICBASE_BSP (1<<8)
diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h
index 6d93508f2626..35f2d1948ada 100644
--- a/arch/x86/include/asm/pvclock-abi.h
+++ b/arch/x86/include/asm/pvclock-abi.h
@@ -29,7 +29,8 @@ struct pvclock_vcpu_time_info {
29 u64 system_time; 29 u64 system_time;
30 u32 tsc_to_system_mul; 30 u32 tsc_to_system_mul;
31 s8 tsc_shift; 31 s8 tsc_shift;
32 u8 pad[3]; 32 u8 flags;
33 u8 pad[2];
33} __attribute__((__packed__)); /* 32 bytes */ 34} __attribute__((__packed__)); /* 32 bytes */
34 35
35struct pvclock_wall_clock { 36struct pvclock_wall_clock {
@@ -38,5 +39,6 @@ struct pvclock_wall_clock {
38 u32 nsec; 39 u32 nsec;
39} __attribute__((__packed__)); 40} __attribute__((__packed__));
40 41
42#define PVCLOCK_TSC_STABLE_BIT (1 << 0)
41#endif /* __ASSEMBLY__ */ 43#endif /* __ASSEMBLY__ */
42#endif /* _ASM_X86_PVCLOCK_ABI_H */ 44#endif /* _ASM_X86_PVCLOCK_ABI_H */
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index 53235fd5f8ce..cd02f324aa6b 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -6,6 +6,7 @@
6 6
7/* some helper functions for xen and kvm pv clock sources */ 7/* some helper functions for xen and kvm pv clock sources */
8cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); 8cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
9void pvclock_set_flags(u8 flags);
9unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src); 10unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src);
10void pvclock_read_wallclock(struct pvclock_wall_clock *wall, 11void pvclock_read_wallclock(struct pvclock_wall_clock *wall,
11 struct pvclock_vcpu_time_info *vcpu, 12 struct pvclock_vcpu_time_info *vcpu,
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 38638cd2fa4c..0e831059ac5a 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -81,7 +81,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
81 u32 event_inj_err; 81 u32 event_inj_err;
82 u64 nested_cr3; 82 u64 nested_cr3;
83 u64 lbr_ctl; 83 u64 lbr_ctl;
84 u8 reserved_5[832]; 84 u64 reserved_5;
85 u64 next_rip;
86 u8 reserved_6[816];
85}; 87};
86 88
87 89
@@ -115,6 +117,10 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
115#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT) 117#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT)
116#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT) 118#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT)
117 119
120#define SVM_VM_CR_VALID_MASK 0x001fULL
121#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL
122#define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL
123
118struct __attribute__ ((__packed__)) vmcb_seg { 124struct __attribute__ ((__packed__)) vmcb_seg {
119 u16 selector; 125 u16 selector;
120 u16 attrib; 126 u16 attrib;
@@ -238,6 +244,7 @@ struct __attribute__ ((__packed__)) vmcb {
238 244
239#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36 245#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36
240#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38 246#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38
247#define SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE 44
241 248
242#define SVM_EXIT_READ_CR0 0x000 249#define SVM_EXIT_READ_CR0 0x000
243#define SVM_EXIT_READ_CR3 0x003 250#define SVM_EXIT_READ_CR3 0x003
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index fb9a080740ec..9e6779f7cf2d 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -25,6 +25,8 @@
25 * 25 *
26 */ 26 */
27 27
28#include <linux/types.h>
29
28/* 30/*
29 * Definitions of Primary Processor-Based VM-Execution Controls. 31 * Definitions of Primary Processor-Based VM-Execution Controls.
30 */ 32 */
@@ -120,6 +122,8 @@ enum vmcs_field {
120 GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, 122 GUEST_IA32_DEBUGCTL_HIGH = 0x00002803,
121 GUEST_IA32_PAT = 0x00002804, 123 GUEST_IA32_PAT = 0x00002804,
122 GUEST_IA32_PAT_HIGH = 0x00002805, 124 GUEST_IA32_PAT_HIGH = 0x00002805,
125 GUEST_IA32_EFER = 0x00002806,
126 GUEST_IA32_EFER_HIGH = 0x00002807,
123 GUEST_PDPTR0 = 0x0000280a, 127 GUEST_PDPTR0 = 0x0000280a,
124 GUEST_PDPTR0_HIGH = 0x0000280b, 128 GUEST_PDPTR0_HIGH = 0x0000280b,
125 GUEST_PDPTR1 = 0x0000280c, 129 GUEST_PDPTR1 = 0x0000280c,
@@ -130,6 +134,8 @@ enum vmcs_field {
130 GUEST_PDPTR3_HIGH = 0x00002811, 134 GUEST_PDPTR3_HIGH = 0x00002811,
131 HOST_IA32_PAT = 0x00002c00, 135 HOST_IA32_PAT = 0x00002c00,
132 HOST_IA32_PAT_HIGH = 0x00002c01, 136 HOST_IA32_PAT_HIGH = 0x00002c01,
137 HOST_IA32_EFER = 0x00002c02,
138 HOST_IA32_EFER_HIGH = 0x00002c03,
133 PIN_BASED_VM_EXEC_CONTROL = 0x00004000, 139 PIN_BASED_VM_EXEC_CONTROL = 0x00004000,
134 CPU_BASED_VM_EXEC_CONTROL = 0x00004002, 140 CPU_BASED_VM_EXEC_CONTROL = 0x00004002,
135 EXCEPTION_BITMAP = 0x00004004, 141 EXCEPTION_BITMAP = 0x00004004,
@@ -394,6 +400,10 @@ enum vmcs_field {
394#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" 400#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
395#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" 401#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
396 402
397 403struct vmx_msr_entry {
404 u32 index;
405 u32 reserved;
406 u64 value;
407} __aligned(16);
398 408
399#endif 409#endif
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index feaeb0d3aa4f..eb9b76c716c2 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -29,6 +29,8 @@
29#define KVM_SCALE 22 29#define KVM_SCALE 22
30 30
31static int kvmclock = 1; 31static int kvmclock = 1;
32static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
33static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK;
32 34
33static int parse_no_kvmclock(char *arg) 35static int parse_no_kvmclock(char *arg)
34{ 36{
@@ -54,7 +56,8 @@ static unsigned long kvm_get_wallclock(void)
54 56
55 low = (int)__pa_symbol(&wall_clock); 57 low = (int)__pa_symbol(&wall_clock);
56 high = ((u64)__pa_symbol(&wall_clock) >> 32); 58 high = ((u64)__pa_symbol(&wall_clock) >> 32);
57 native_write_msr(MSR_KVM_WALL_CLOCK, low, high); 59
60 native_write_msr(msr_kvm_wall_clock, low, high);
58 61
59 vcpu_time = &get_cpu_var(hv_clock); 62 vcpu_time = &get_cpu_var(hv_clock);
60 pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); 63 pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
@@ -130,7 +133,8 @@ static int kvm_register_clock(char *txt)
130 high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); 133 high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
131 printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", 134 printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
132 cpu, high, low, txt); 135 cpu, high, low, txt);
133 return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); 136
137 return native_write_msr_safe(msr_kvm_system_time, low, high);
134} 138}
135 139
136#ifdef CONFIG_X86_LOCAL_APIC 140#ifdef CONFIG_X86_LOCAL_APIC
@@ -165,14 +169,14 @@ static void __init kvm_smp_prepare_boot_cpu(void)
165#ifdef CONFIG_KEXEC 169#ifdef CONFIG_KEXEC
166static void kvm_crash_shutdown(struct pt_regs *regs) 170static void kvm_crash_shutdown(struct pt_regs *regs)
167{ 171{
168 native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0); 172 native_write_msr(msr_kvm_system_time, 0, 0);
169 native_machine_crash_shutdown(regs); 173 native_machine_crash_shutdown(regs);
170} 174}
171#endif 175#endif
172 176
173static void kvm_shutdown(void) 177static void kvm_shutdown(void)
174{ 178{
175 native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0); 179 native_write_msr(msr_kvm_system_time, 0, 0);
176 native_machine_shutdown(); 180 native_machine_shutdown();
177} 181}
178 182
@@ -181,27 +185,37 @@ void __init kvmclock_init(void)
181 if (!kvm_para_available()) 185 if (!kvm_para_available())
182 return; 186 return;
183 187
184 if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { 188 if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE2)) {
185 if (kvm_register_clock("boot clock")) 189 msr_kvm_system_time = MSR_KVM_SYSTEM_TIME_NEW;
186 return; 190 msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK_NEW;
187 pv_time_ops.sched_clock = kvm_clock_read; 191 } else if (!(kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)))
188 x86_platform.calibrate_tsc = kvm_get_tsc_khz; 192 return;
189 x86_platform.get_wallclock = kvm_get_wallclock; 193
190 x86_platform.set_wallclock = kvm_set_wallclock; 194 printk(KERN_INFO "kvm-clock: Using msrs %x and %x",
195 msr_kvm_system_time, msr_kvm_wall_clock);
196
197 if (kvm_register_clock("boot clock"))
198 return;
199 pv_time_ops.sched_clock = kvm_clock_read;
200 x86_platform.calibrate_tsc = kvm_get_tsc_khz;
201 x86_platform.get_wallclock = kvm_get_wallclock;
202 x86_platform.set_wallclock = kvm_set_wallclock;
191#ifdef CONFIG_X86_LOCAL_APIC 203#ifdef CONFIG_X86_LOCAL_APIC
192 x86_cpuinit.setup_percpu_clockev = 204 x86_cpuinit.setup_percpu_clockev =
193 kvm_setup_secondary_clock; 205 kvm_setup_secondary_clock;
194#endif 206#endif
195#ifdef CONFIG_SMP 207#ifdef CONFIG_SMP
196 smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; 208 smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
197#endif 209#endif
198 machine_ops.shutdown = kvm_shutdown; 210 machine_ops.shutdown = kvm_shutdown;
199#ifdef CONFIG_KEXEC 211#ifdef CONFIG_KEXEC
200 machine_ops.crash_shutdown = kvm_crash_shutdown; 212 machine_ops.crash_shutdown = kvm_crash_shutdown;
201#endif 213#endif
202 kvm_get_preset_lpj(); 214 kvm_get_preset_lpj();
203 clocksource_register(&kvm_clock); 215 clocksource_register(&kvm_clock);
204 pv_info.paravirt_enabled = 1; 216 pv_info.paravirt_enabled = 1;
205 pv_info.name = "KVM"; 217 pv_info.name = "KVM";
206 } 218
219 if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
220 pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
207} 221}
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 03801f2f761f..239427ca02af 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -31,8 +31,16 @@ struct pvclock_shadow_time {
31 u32 tsc_to_nsec_mul; 31 u32 tsc_to_nsec_mul;
32 int tsc_shift; 32 int tsc_shift;
33 u32 version; 33 u32 version;
34 u8 flags;
34}; 35};
35 36
37static u8 valid_flags __read_mostly = 0;
38
39void pvclock_set_flags(u8 flags)
40{
41 valid_flags = flags;
42}
43
36/* 44/*
37 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, 45 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
38 * yielding a 64-bit result. 46 * yielding a 64-bit result.
@@ -91,6 +99,7 @@ static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
91 dst->system_timestamp = src->system_time; 99 dst->system_timestamp = src->system_time;
92 dst->tsc_to_nsec_mul = src->tsc_to_system_mul; 100 dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
93 dst->tsc_shift = src->tsc_shift; 101 dst->tsc_shift = src->tsc_shift;
102 dst->flags = src->flags;
94 rmb(); /* test version after fetching data */ 103 rmb(); /* test version after fetching data */
95 } while ((src->version & 1) || (dst->version != src->version)); 104 } while ((src->version & 1) || (dst->version != src->version));
96 105
@@ -109,11 +118,14 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src)
109 return pv_tsc_khz; 118 return pv_tsc_khz;
110} 119}
111 120
121static atomic64_t last_value = ATOMIC64_INIT(0);
122
112cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) 123cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
113{ 124{
114 struct pvclock_shadow_time shadow; 125 struct pvclock_shadow_time shadow;
115 unsigned version; 126 unsigned version;
116 cycle_t ret, offset; 127 cycle_t ret, offset;
128 u64 last;
117 129
118 do { 130 do {
119 version = pvclock_get_time_values(&shadow, src); 131 version = pvclock_get_time_values(&shadow, src);
@@ -123,6 +135,31 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
123 barrier(); 135 barrier();
124 } while (version != src->version); 136 } while (version != src->version);
125 137
138 if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) &&
139 (shadow.flags & PVCLOCK_TSC_STABLE_BIT))
140 return ret;
141
142 /*
143 * Assumption here is that last_value, a global accumulator, always goes
144 * forward. If we are less than that, we should not be much smaller.
145 * We assume there is an error marging we're inside, and then the correction
146 * does not sacrifice accuracy.
147 *
148 * For reads: global may have changed between test and return,
149 * but this means someone else updated poked the clock at a later time.
150 * We just need to make sure we are not seeing a backwards event.
151 *
152 * For updates: last_value = ret is not enough, since two vcpus could be
153 * updating at the same time, and one of them could be slightly behind,
154 * making the assumption that last_value always go forward fail to hold.
155 */
156 last = atomic64_read(&last_value);
157 do {
158 if (ret < last)
159 return last;
160 last = atomic64_cmpxchg(&last_value, last, ret);
161 } while (unlikely(last != ret));
162
126 return ret; 163 return ret;
127} 164}
128 165
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index cc2c60474fd0..c2f1b26141e2 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -46,6 +46,7 @@
46 46
47/* Global pointer to shared data; NULL means no measured launch. */ 47/* Global pointer to shared data; NULL means no measured launch. */
48struct tboot *tboot __read_mostly; 48struct tboot *tboot __read_mostly;
49EXPORT_SYMBOL(tboot);
49 50
50/* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */ 51/* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */
51#define AP_WAIT_TIMEOUT 1 52#define AP_WAIT_TIMEOUT 1
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 4dade6ac0827..5ac0bb465ed6 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -33,6 +33,7 @@
33#include <asm/kvm_emulate.h> 33#include <asm/kvm_emulate.h>
34 34
35#include "x86.h" 35#include "x86.h"
36#include "tss.h"
36 37
37/* 38/*
38 * Opcode effective-address decode tables. 39 * Opcode effective-address decode tables.
@@ -50,6 +51,8 @@
50#define DstReg (2<<1) /* Register operand. */ 51#define DstReg (2<<1) /* Register operand. */
51#define DstMem (3<<1) /* Memory operand. */ 52#define DstMem (3<<1) /* Memory operand. */
52#define DstAcc (4<<1) /* Destination Accumulator */ 53#define DstAcc (4<<1) /* Destination Accumulator */
54#define DstDI (5<<1) /* Destination is in ES:(E)DI */
55#define DstMem64 (6<<1) /* 64bit memory operand */
53#define DstMask (7<<1) 56#define DstMask (7<<1)
54/* Source operand type. */ 57/* Source operand type. */
55#define SrcNone (0<<4) /* No source operand. */ 58#define SrcNone (0<<4) /* No source operand. */
@@ -63,6 +66,7 @@
63#define SrcOne (7<<4) /* Implied '1' */ 66#define SrcOne (7<<4) /* Implied '1' */
64#define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */ 67#define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */
65#define SrcImmU (9<<4) /* Immediate operand, unsigned */ 68#define SrcImmU (9<<4) /* Immediate operand, unsigned */
69#define SrcSI (0xa<<4) /* Source is in the DS:RSI */
66#define SrcMask (0xf<<4) 70#define SrcMask (0xf<<4)
67/* Generic ModRM decode. */ 71/* Generic ModRM decode. */
68#define ModRM (1<<8) 72#define ModRM (1<<8)
@@ -85,6 +89,9 @@
85#define Src2ImmByte (2<<29) 89#define Src2ImmByte (2<<29)
86#define Src2One (3<<29) 90#define Src2One (3<<29)
87#define Src2Imm16 (4<<29) 91#define Src2Imm16 (4<<29)
92#define Src2Mem16 (5<<29) /* Used for Ep encoding. First argument has to be
93 in memory and second argument is located
94 immediately after the first one in memory. */
88#define Src2Mask (7<<29) 95#define Src2Mask (7<<29)
89 96
90enum { 97enum {
@@ -147,8 +154,8 @@ static u32 opcode_table[256] = {
147 0, 0, 0, 0, 154 0, 0, 0, 0,
148 /* 0x68 - 0x6F */ 155 /* 0x68 - 0x6F */
149 SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0, 156 SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0,
150 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */ 157 DstDI | ByteOp | Mov | String, DstDI | Mov | String, /* insb, insw/insd */
151 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */ 158 SrcSI | ByteOp | ImplicitOps | String, SrcSI | ImplicitOps | String, /* outsb, outsw/outsd */
152 /* 0x70 - 0x77 */ 159 /* 0x70 - 0x77 */
153 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, 160 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
154 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, 161 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
@@ -173,12 +180,12 @@ static u32 opcode_table[256] = {
173 /* 0xA0 - 0xA7 */ 180 /* 0xA0 - 0xA7 */
174 ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, 181 ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs,
175 ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs, 182 ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs,
176 ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, 183 ByteOp | SrcSI | DstDI | Mov | String, SrcSI | DstDI | Mov | String,
177 ByteOp | ImplicitOps | String, ImplicitOps | String, 184 ByteOp | SrcSI | DstDI | String, SrcSI | DstDI | String,
178 /* 0xA8 - 0xAF */ 185 /* 0xA8 - 0xAF */
179 0, 0, ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, 186 0, 0, ByteOp | DstDI | Mov | String, DstDI | Mov | String,
180 ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, 187 ByteOp | SrcSI | DstAcc | Mov | String, SrcSI | DstAcc | Mov | String,
181 ByteOp | ImplicitOps | String, ImplicitOps | String, 188 ByteOp | DstDI | String, DstDI | String,
182 /* 0xB0 - 0xB7 */ 189 /* 0xB0 - 0xB7 */
183 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, 190 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
184 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, 191 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
@@ -204,13 +211,13 @@ static u32 opcode_table[256] = {
204 0, 0, 0, 0, 0, 0, 0, 0, 211 0, 0, 0, 0, 0, 0, 0, 0,
205 /* 0xE0 - 0xE7 */ 212 /* 0xE0 - 0xE7 */
206 0, 0, 0, 0, 213 0, 0, 0, 0,
207 ByteOp | SrcImmUByte, SrcImmUByte, 214 ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc,
208 ByteOp | SrcImmUByte, SrcImmUByte, 215 ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc,
209 /* 0xE8 - 0xEF */ 216 /* 0xE8 - 0xEF */
210 SrcImm | Stack, SrcImm | ImplicitOps, 217 SrcImm | Stack, SrcImm | ImplicitOps,
211 SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps, 218 SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps,
212 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, 219 SrcNone | ByteOp | DstAcc, SrcNone | DstAcc,
213 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, 220 SrcNone | ByteOp | DstAcc, SrcNone | DstAcc,
214 /* 0xF0 - 0xF7 */ 221 /* 0xF0 - 0xF7 */
215 0, 0, 0, 0, 222 0, 0, 0, 0,
216 ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3, 223 ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3,
@@ -343,7 +350,8 @@ static u32 group_table[] = {
343 [Group5*8] = 350 [Group5*8] =
344 DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, 351 DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
345 SrcMem | ModRM | Stack, 0, 352 SrcMem | ModRM | Stack, 0,
346 SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0, 353 SrcMem | ModRM | Stack, SrcMem | ModRM | Src2Mem16 | ImplicitOps,
354 SrcMem | ModRM | Stack, 0,
347 [Group7*8] = 355 [Group7*8] =
348 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv, 356 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv,
349 SrcNone | ModRM | DstMem | Mov, 0, 357 SrcNone | ModRM | DstMem | Mov, 0,
@@ -353,14 +361,14 @@ static u32 group_table[] = {
353 DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock, 361 DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock,
354 DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock, 362 DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock,
355 [Group9*8] = 363 [Group9*8] =
356 0, ImplicitOps | ModRM | Lock, 0, 0, 0, 0, 0, 0, 364 0, DstMem64 | ModRM | Lock, 0, 0, 0, 0, 0, 0,
357}; 365};
358 366
359static u32 group2_table[] = { 367static u32 group2_table[] = {
360 [Group7*8] = 368 [Group7*8] =
361 SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM, 369 SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM | Priv,
362 SrcNone | ModRM | DstMem | Mov, 0, 370 SrcNone | ModRM | DstMem | Mov, 0,
363 SrcMem16 | ModRM | Mov, 0, 371 SrcMem16 | ModRM | Mov | Priv, 0,
364 [Group9*8] = 372 [Group9*8] =
365 0, 0, 0, 0, 0, 0, 0, 0, 373 0, 0, 0, 0, 0, 0, 0, 0,
366}; 374};
@@ -562,7 +570,7 @@ static u32 group2_table[] = {
562#define insn_fetch(_type, _size, _eip) \ 570#define insn_fetch(_type, _size, _eip) \
563({ unsigned long _x; \ 571({ unsigned long _x; \
564 rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size)); \ 572 rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size)); \
565 if (rc != 0) \ 573 if (rc != X86EMUL_CONTINUE) \
566 goto done; \ 574 goto done; \
567 (_eip) += (_size); \ 575 (_eip) += (_size); \
568 (_type)_x; \ 576 (_type)_x; \
@@ -638,40 +646,40 @@ static unsigned long ss_base(struct x86_emulate_ctxt *ctxt)
638 646
639static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, 647static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
640 struct x86_emulate_ops *ops, 648 struct x86_emulate_ops *ops,
641 unsigned long linear, u8 *dest) 649 unsigned long eip, u8 *dest)
642{ 650{
643 struct fetch_cache *fc = &ctxt->decode.fetch; 651 struct fetch_cache *fc = &ctxt->decode.fetch;
644 int rc; 652 int rc;
645 int size; 653 int size, cur_size;
646 654
647 if (linear < fc->start || linear >= fc->end) { 655 if (eip == fc->end) {
648 size = min(15UL, PAGE_SIZE - offset_in_page(linear)); 656 cur_size = fc->end - fc->start;
649 rc = ops->fetch(linear, fc->data, size, ctxt->vcpu, NULL); 657 size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip));
650 if (rc) 658 rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size,
659 size, ctxt->vcpu, NULL);
660 if (rc != X86EMUL_CONTINUE)
651 return rc; 661 return rc;
652 fc->start = linear; 662 fc->end += size;
653 fc->end = linear + size;
654 } 663 }
655 *dest = fc->data[linear - fc->start]; 664 *dest = fc->data[eip - fc->start];
656 return 0; 665 return X86EMUL_CONTINUE;
657} 666}
658 667
659static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, 668static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
660 struct x86_emulate_ops *ops, 669 struct x86_emulate_ops *ops,
661 unsigned long eip, void *dest, unsigned size) 670 unsigned long eip, void *dest, unsigned size)
662{ 671{
663 int rc = 0; 672 int rc;
664 673
665 /* x86 instructions are limited to 15 bytes. */ 674 /* x86 instructions are limited to 15 bytes. */
666 if (eip + size - ctxt->decode.eip_orig > 15) 675 if (eip + size - ctxt->eip > 15)
667 return X86EMUL_UNHANDLEABLE; 676 return X86EMUL_UNHANDLEABLE;
668 eip += ctxt->cs_base;
669 while (size--) { 677 while (size--) {
670 rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++); 678 rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++);
671 if (rc) 679 if (rc != X86EMUL_CONTINUE)
672 return rc; 680 return rc;
673 } 681 }
674 return 0; 682 return X86EMUL_CONTINUE;
675} 683}
676 684
677/* 685/*
@@ -702,7 +710,7 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt,
702 *address = 0; 710 *address = 0;
703 rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, 711 rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2,
704 ctxt->vcpu, NULL); 712 ctxt->vcpu, NULL);
705 if (rc) 713 if (rc != X86EMUL_CONTINUE)
706 return rc; 714 return rc;
707 rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, 715 rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes,
708 ctxt->vcpu, NULL); 716 ctxt->vcpu, NULL);
@@ -782,7 +790,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
782 struct decode_cache *c = &ctxt->decode; 790 struct decode_cache *c = &ctxt->decode;
783 u8 sib; 791 u8 sib;
784 int index_reg = 0, base_reg = 0, scale; 792 int index_reg = 0, base_reg = 0, scale;
785 int rc = 0; 793 int rc = X86EMUL_CONTINUE;
786 794
787 if (c->rex_prefix) { 795 if (c->rex_prefix) {
788 c->modrm_reg = (c->rex_prefix & 4) << 1; /* REX.R */ 796 c->modrm_reg = (c->rex_prefix & 4) << 1; /* REX.R */
@@ -895,7 +903,7 @@ static int decode_abs(struct x86_emulate_ctxt *ctxt,
895 struct x86_emulate_ops *ops) 903 struct x86_emulate_ops *ops)
896{ 904{
897 struct decode_cache *c = &ctxt->decode; 905 struct decode_cache *c = &ctxt->decode;
898 int rc = 0; 906 int rc = X86EMUL_CONTINUE;
899 907
900 switch (c->ad_bytes) { 908 switch (c->ad_bytes) {
901 case 2: 909 case 2:
@@ -916,14 +924,18 @@ int
916x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) 924x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
917{ 925{
918 struct decode_cache *c = &ctxt->decode; 926 struct decode_cache *c = &ctxt->decode;
919 int rc = 0; 927 int rc = X86EMUL_CONTINUE;
920 int mode = ctxt->mode; 928 int mode = ctxt->mode;
921 int def_op_bytes, def_ad_bytes, group; 929 int def_op_bytes, def_ad_bytes, group;
922 930
923 /* Shadow copy of register state. Committed on successful emulation. */
924 931
932 /* we cannot decode insn before we complete previous rep insn */
933 WARN_ON(ctxt->restart);
934
935 /* Shadow copy of register state. Committed on successful emulation. */
925 memset(c, 0, sizeof(struct decode_cache)); 936 memset(c, 0, sizeof(struct decode_cache));
926 c->eip = c->eip_orig = kvm_rip_read(ctxt->vcpu); 937 c->eip = ctxt->eip;
938 c->fetch.start = c->fetch.end = c->eip;
927 ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); 939 ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS);
928 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); 940 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
929 941
@@ -1015,11 +1027,6 @@ done_prefixes:
1015 } 1027 }
1016 } 1028 }
1017 1029
1018 if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) {
1019 kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction");
1020 return -1;
1021 }
1022
1023 if (c->d & Group) { 1030 if (c->d & Group) {
1024 group = c->d & GroupMask; 1031 group = c->d & GroupMask;
1025 c->modrm = insn_fetch(u8, 1, c->eip); 1032 c->modrm = insn_fetch(u8, 1, c->eip);
@@ -1046,7 +1053,7 @@ done_prefixes:
1046 rc = decode_modrm(ctxt, ops); 1053 rc = decode_modrm(ctxt, ops);
1047 else if (c->d & MemAbs) 1054 else if (c->d & MemAbs)
1048 rc = decode_abs(ctxt, ops); 1055 rc = decode_abs(ctxt, ops);
1049 if (rc) 1056 if (rc != X86EMUL_CONTINUE)
1050 goto done; 1057 goto done;
1051 1058
1052 if (!c->has_seg_override) 1059 if (!c->has_seg_override)
@@ -1057,6 +1064,10 @@ done_prefixes:
1057 1064
1058 if (c->ad_bytes != 8) 1065 if (c->ad_bytes != 8)
1059 c->modrm_ea = (u32)c->modrm_ea; 1066 c->modrm_ea = (u32)c->modrm_ea;
1067
1068 if (c->rip_relative)
1069 c->modrm_ea += c->eip;
1070
1060 /* 1071 /*
1061 * Decode and fetch the source operand: register, memory 1072 * Decode and fetch the source operand: register, memory
1062 * or immediate. 1073 * or immediate.
@@ -1091,6 +1102,8 @@ done_prefixes:
1091 break; 1102 break;
1092 } 1103 }
1093 c->src.type = OP_MEM; 1104 c->src.type = OP_MEM;
1105 c->src.ptr = (unsigned long *)c->modrm_ea;
1106 c->src.val = 0;
1094 break; 1107 break;
1095 case SrcImm: 1108 case SrcImm:
1096 case SrcImmU: 1109 case SrcImmU:
@@ -1139,6 +1152,14 @@ done_prefixes:
1139 c->src.bytes = 1; 1152 c->src.bytes = 1;
1140 c->src.val = 1; 1153 c->src.val = 1;
1141 break; 1154 break;
1155 case SrcSI:
1156 c->src.type = OP_MEM;
1157 c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1158 c->src.ptr = (unsigned long *)
1159 register_address(c, seg_override_base(ctxt, c),
1160 c->regs[VCPU_REGS_RSI]);
1161 c->src.val = 0;
1162 break;
1142 } 1163 }
1143 1164
1144 /* 1165 /*
@@ -1168,6 +1189,12 @@ done_prefixes:
1168 c->src2.bytes = 1; 1189 c->src2.bytes = 1;
1169 c->src2.val = 1; 1190 c->src2.val = 1;
1170 break; 1191 break;
1192 case Src2Mem16:
1193 c->src2.type = OP_MEM;
1194 c->src2.bytes = 2;
1195 c->src2.ptr = (unsigned long *)(c->modrm_ea + c->src.bytes);
1196 c->src2.val = 0;
1197 break;
1171 } 1198 }
1172 1199
1173 /* Decode and fetch the destination operand: register or memory. */ 1200 /* Decode and fetch the destination operand: register or memory. */
@@ -1180,6 +1207,7 @@ done_prefixes:
1180 c->twobyte && (c->b == 0xb6 || c->b == 0xb7)); 1207 c->twobyte && (c->b == 0xb6 || c->b == 0xb7));
1181 break; 1208 break;
1182 case DstMem: 1209 case DstMem:
1210 case DstMem64:
1183 if ((c->d & ModRM) && c->modrm_mod == 3) { 1211 if ((c->d & ModRM) && c->modrm_mod == 3) {
1184 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 1212 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1185 c->dst.type = OP_REG; 1213 c->dst.type = OP_REG;
@@ -1188,12 +1216,24 @@ done_prefixes:
1188 break; 1216 break;
1189 } 1217 }
1190 c->dst.type = OP_MEM; 1218 c->dst.type = OP_MEM;
1219 c->dst.ptr = (unsigned long *)c->modrm_ea;
1220 if ((c->d & DstMask) == DstMem64)
1221 c->dst.bytes = 8;
1222 else
1223 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1224 c->dst.val = 0;
1225 if (c->d & BitOp) {
1226 unsigned long mask = ~(c->dst.bytes * 8 - 1);
1227
1228 c->dst.ptr = (void *)c->dst.ptr +
1229 (c->src.val & mask) / 8;
1230 }
1191 break; 1231 break;
1192 case DstAcc: 1232 case DstAcc:
1193 c->dst.type = OP_REG; 1233 c->dst.type = OP_REG;
1194 c->dst.bytes = c->op_bytes; 1234 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1195 c->dst.ptr = &c->regs[VCPU_REGS_RAX]; 1235 c->dst.ptr = &c->regs[VCPU_REGS_RAX];
1196 switch (c->op_bytes) { 1236 switch (c->dst.bytes) {
1197 case 1: 1237 case 1:
1198 c->dst.val = *(u8 *)c->dst.ptr; 1238 c->dst.val = *(u8 *)c->dst.ptr;
1199 break; 1239 break;
@@ -1203,18 +1243,248 @@ done_prefixes:
1203 case 4: 1243 case 4:
1204 c->dst.val = *(u32 *)c->dst.ptr; 1244 c->dst.val = *(u32 *)c->dst.ptr;
1205 break; 1245 break;
1246 case 8:
1247 c->dst.val = *(u64 *)c->dst.ptr;
1248 break;
1206 } 1249 }
1207 c->dst.orig_val = c->dst.val; 1250 c->dst.orig_val = c->dst.val;
1208 break; 1251 break;
1252 case DstDI:
1253 c->dst.type = OP_MEM;
1254 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1255 c->dst.ptr = (unsigned long *)
1256 register_address(c, es_base(ctxt),
1257 c->regs[VCPU_REGS_RDI]);
1258 c->dst.val = 0;
1259 break;
1209 } 1260 }
1210 1261
1211 if (c->rip_relative)
1212 c->modrm_ea += c->eip;
1213
1214done: 1262done:
1215 return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; 1263 return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
1216} 1264}
1217 1265
1266static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
1267 struct x86_emulate_ops *ops,
1268 unsigned int size, unsigned short port,
1269 void *dest)
1270{
1271 struct read_cache *rc = &ctxt->decode.io_read;
1272
1273 if (rc->pos == rc->end) { /* refill pio read ahead */
1274 struct decode_cache *c = &ctxt->decode;
1275 unsigned int in_page, n;
1276 unsigned int count = c->rep_prefix ?
1277 address_mask(c, c->regs[VCPU_REGS_RCX]) : 1;
1278 in_page = (ctxt->eflags & EFLG_DF) ?
1279 offset_in_page(c->regs[VCPU_REGS_RDI]) :
1280 PAGE_SIZE - offset_in_page(c->regs[VCPU_REGS_RDI]);
1281 n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size,
1282 count);
1283 if (n == 0)
1284 n = 1;
1285 rc->pos = rc->end = 0;
1286 if (!ops->pio_in_emulated(size, port, rc->data, n, ctxt->vcpu))
1287 return 0;
1288 rc->end = n * size;
1289 }
1290
1291 memcpy(dest, rc->data + rc->pos, size);
1292 rc->pos += size;
1293 return 1;
1294}
1295
1296static u32 desc_limit_scaled(struct desc_struct *desc)
1297{
1298 u32 limit = get_desc_limit(desc);
1299
1300 return desc->g ? (limit << 12) | 0xfff : limit;
1301}
1302
1303static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1304 struct x86_emulate_ops *ops,
1305 u16 selector, struct desc_ptr *dt)
1306{
1307 if (selector & 1 << 2) {
1308 struct desc_struct desc;
1309 memset (dt, 0, sizeof *dt);
1310 if (!ops->get_cached_descriptor(&desc, VCPU_SREG_LDTR, ctxt->vcpu))
1311 return;
1312
1313 dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1314 dt->address = get_desc_base(&desc);
1315 } else
1316 ops->get_gdt(dt, ctxt->vcpu);
1317}
1318
1319/* allowed just for 8 bytes segments */
1320static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1321 struct x86_emulate_ops *ops,
1322 u16 selector, struct desc_struct *desc)
1323{
1324 struct desc_ptr dt;
1325 u16 index = selector >> 3;
1326 int ret;
1327 u32 err;
1328 ulong addr;
1329
1330 get_descriptor_table_ptr(ctxt, ops, selector, &dt);
1331
1332 if (dt.size < index * 8 + 7) {
1333 kvm_inject_gp(ctxt->vcpu, selector & 0xfffc);
1334 return X86EMUL_PROPAGATE_FAULT;
1335 }
1336 addr = dt.address + index * 8;
1337 ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, &err);
1338 if (ret == X86EMUL_PROPAGATE_FAULT)
1339 kvm_inject_page_fault(ctxt->vcpu, addr, err);
1340
1341 return ret;
1342}
1343
1344/* allowed just for 8 bytes segments */
1345static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1346 struct x86_emulate_ops *ops,
1347 u16 selector, struct desc_struct *desc)
1348{
1349 struct desc_ptr dt;
1350 u16 index = selector >> 3;
1351 u32 err;
1352 ulong addr;
1353 int ret;
1354
1355 get_descriptor_table_ptr(ctxt, ops, selector, &dt);
1356
1357 if (dt.size < index * 8 + 7) {
1358 kvm_inject_gp(ctxt->vcpu, selector & 0xfffc);
1359 return X86EMUL_PROPAGATE_FAULT;
1360 }
1361
1362 addr = dt.address + index * 8;
1363 ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err);
1364 if (ret == X86EMUL_PROPAGATE_FAULT)
1365 kvm_inject_page_fault(ctxt->vcpu, addr, err);
1366
1367 return ret;
1368}
1369
1370static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1371 struct x86_emulate_ops *ops,
1372 u16 selector, int seg)
1373{
1374 struct desc_struct seg_desc;
1375 u8 dpl, rpl, cpl;
1376 unsigned err_vec = GP_VECTOR;
1377 u32 err_code = 0;
1378 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
1379 int ret;
1380
1381 memset(&seg_desc, 0, sizeof seg_desc);
1382
1383 if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86)
1384 || ctxt->mode == X86EMUL_MODE_REAL) {
1385 /* set real mode segment descriptor */
1386 set_desc_base(&seg_desc, selector << 4);
1387 set_desc_limit(&seg_desc, 0xffff);
1388 seg_desc.type = 3;
1389 seg_desc.p = 1;
1390 seg_desc.s = 1;
1391 goto load;
1392 }
1393
1394 /* NULL selector is not valid for TR, CS and SS */
1395 if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR)
1396 && null_selector)
1397 goto exception;
1398
1399 /* TR should be in GDT only */
1400 if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
1401 goto exception;
1402
1403 if (null_selector) /* for NULL selector skip all following checks */
1404 goto load;
1405
1406 ret = read_segment_descriptor(ctxt, ops, selector, &seg_desc);
1407 if (ret != X86EMUL_CONTINUE)
1408 return ret;
1409
1410 err_code = selector & 0xfffc;
1411 err_vec = GP_VECTOR;
1412
1413 /* can't load system descriptor into segment selecor */
1414 if (seg <= VCPU_SREG_GS && !seg_desc.s)
1415 goto exception;
1416
1417 if (!seg_desc.p) {
1418 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
1419 goto exception;
1420 }
1421
1422 rpl = selector & 3;
1423 dpl = seg_desc.dpl;
1424 cpl = ops->cpl(ctxt->vcpu);
1425
1426 switch (seg) {
1427 case VCPU_SREG_SS:
1428 /*
1429 * segment is not a writable data segment or segment
1430 * selector's RPL != CPL or segment selector's RPL != CPL
1431 */
1432 if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
1433 goto exception;
1434 break;
1435 case VCPU_SREG_CS:
1436 if (!(seg_desc.type & 8))
1437 goto exception;
1438
1439 if (seg_desc.type & 4) {
1440 /* conforming */
1441 if (dpl > cpl)
1442 goto exception;
1443 } else {
1444 /* nonconforming */
1445 if (rpl > cpl || dpl != cpl)
1446 goto exception;
1447 }
1448 /* CS(RPL) <- CPL */
1449 selector = (selector & 0xfffc) | cpl;
1450 break;
1451 case VCPU_SREG_TR:
1452 if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
1453 goto exception;
1454 break;
1455 case VCPU_SREG_LDTR:
1456 if (seg_desc.s || seg_desc.type != 2)
1457 goto exception;
1458 break;
1459 default: /* DS, ES, FS, or GS */
1460 /*
1461 * segment is not a data or readable code segment or
1462 * ((segment is a data or nonconforming code segment)
1463 * and (both RPL and CPL > DPL))
1464 */
1465 if ((seg_desc.type & 0xa) == 0x8 ||
1466 (((seg_desc.type & 0xc) != 0xc) &&
1467 (rpl > dpl && cpl > dpl)))
1468 goto exception;
1469 break;
1470 }
1471
1472 if (seg_desc.s) {
1473 /* mark segment as accessed */
1474 seg_desc.type |= 1;
1475 ret = write_segment_descriptor(ctxt, ops, selector, &seg_desc);
1476 if (ret != X86EMUL_CONTINUE)
1477 return ret;
1478 }
1479load:
1480 ops->set_segment_selector(selector, seg, ctxt->vcpu);
1481 ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu);
1482 return X86EMUL_CONTINUE;
1483exception:
1484 kvm_queue_exception_e(ctxt->vcpu, err_vec, err_code);
1485 return X86EMUL_PROPAGATE_FAULT;
1486}
1487
1218static inline void emulate_push(struct x86_emulate_ctxt *ctxt) 1488static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
1219{ 1489{
1220 struct decode_cache *c = &ctxt->decode; 1490 struct decode_cache *c = &ctxt->decode;
@@ -1251,7 +1521,7 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1251 int rc; 1521 int rc;
1252 unsigned long val, change_mask; 1522 unsigned long val, change_mask;
1253 int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; 1523 int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1254 int cpl = kvm_x86_ops->get_cpl(ctxt->vcpu); 1524 int cpl = ops->cpl(ctxt->vcpu);
1255 1525
1256 rc = emulate_pop(ctxt, ops, &val, len); 1526 rc = emulate_pop(ctxt, ops, &val, len);
1257 if (rc != X86EMUL_CONTINUE) 1527 if (rc != X86EMUL_CONTINUE)
@@ -1306,10 +1576,10 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
1306 int rc; 1576 int rc;
1307 1577
1308 rc = emulate_pop(ctxt, ops, &selector, c->op_bytes); 1578 rc = emulate_pop(ctxt, ops, &selector, c->op_bytes);
1309 if (rc != 0) 1579 if (rc != X86EMUL_CONTINUE)
1310 return rc; 1580 return rc;
1311 1581
1312 rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, seg); 1582 rc = load_segment_descriptor(ctxt, ops, (u16)selector, seg);
1313 return rc; 1583 return rc;
1314} 1584}
1315 1585
@@ -1332,7 +1602,7 @@ static int emulate_popa(struct x86_emulate_ctxt *ctxt,
1332 struct x86_emulate_ops *ops) 1602 struct x86_emulate_ops *ops)
1333{ 1603{
1334 struct decode_cache *c = &ctxt->decode; 1604 struct decode_cache *c = &ctxt->decode;
1335 int rc = 0; 1605 int rc = X86EMUL_CONTINUE;
1336 int reg = VCPU_REGS_RDI; 1606 int reg = VCPU_REGS_RDI;
1337 1607
1338 while (reg >= VCPU_REGS_RAX) { 1608 while (reg >= VCPU_REGS_RAX) {
@@ -1343,7 +1613,7 @@ static int emulate_popa(struct x86_emulate_ctxt *ctxt,
1343 } 1613 }
1344 1614
1345 rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes); 1615 rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes);
1346 if (rc != 0) 1616 if (rc != X86EMUL_CONTINUE)
1347 break; 1617 break;
1348 --reg; 1618 --reg;
1349 } 1619 }
@@ -1354,12 +1624,8 @@ static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
1354 struct x86_emulate_ops *ops) 1624 struct x86_emulate_ops *ops)
1355{ 1625{
1356 struct decode_cache *c = &ctxt->decode; 1626 struct decode_cache *c = &ctxt->decode;
1357 int rc;
1358 1627
1359 rc = emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes); 1628 return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes);
1360 if (rc != 0)
1361 return rc;
1362 return 0;
1363} 1629}
1364 1630
1365static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) 1631static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt)
@@ -1395,7 +1661,6 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
1395 struct x86_emulate_ops *ops) 1661 struct x86_emulate_ops *ops)
1396{ 1662{
1397 struct decode_cache *c = &ctxt->decode; 1663 struct decode_cache *c = &ctxt->decode;
1398 int rc = 0;
1399 1664
1400 switch (c->modrm_reg) { 1665 switch (c->modrm_reg) {
1401 case 0 ... 1: /* test */ 1666 case 0 ... 1: /* test */
@@ -1408,11 +1673,9 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
1408 emulate_1op("neg", c->dst, ctxt->eflags); 1673 emulate_1op("neg", c->dst, ctxt->eflags);
1409 break; 1674 break;
1410 default: 1675 default:
1411 DPRINTF("Cannot emulate %02x\n", c->b); 1676 return 0;
1412 rc = X86EMUL_UNHANDLEABLE;
1413 break;
1414 } 1677 }
1415 return rc; 1678 return 1;
1416} 1679}
1417 1680
1418static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, 1681static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
@@ -1442,20 +1705,14 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
1442 emulate_push(ctxt); 1705 emulate_push(ctxt);
1443 break; 1706 break;
1444 } 1707 }
1445 return 0; 1708 return X86EMUL_CONTINUE;
1446} 1709}
1447 1710
1448static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, 1711static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
1449 struct x86_emulate_ops *ops, 1712 struct x86_emulate_ops *ops)
1450 unsigned long memop)
1451{ 1713{
1452 struct decode_cache *c = &ctxt->decode; 1714 struct decode_cache *c = &ctxt->decode;
1453 u64 old, new; 1715 u64 old = c->dst.orig_val;
1454 int rc;
1455
1456 rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu);
1457 if (rc != X86EMUL_CONTINUE)
1458 return rc;
1459 1716
1460 if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) || 1717 if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) ||
1461 ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) { 1718 ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) {
@@ -1463,17 +1720,13 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
1463 c->regs[VCPU_REGS_RAX] = (u32) (old >> 0); 1720 c->regs[VCPU_REGS_RAX] = (u32) (old >> 0);
1464 c->regs[VCPU_REGS_RDX] = (u32) (old >> 32); 1721 c->regs[VCPU_REGS_RDX] = (u32) (old >> 32);
1465 ctxt->eflags &= ~EFLG_ZF; 1722 ctxt->eflags &= ~EFLG_ZF;
1466
1467 } else { 1723 } else {
1468 new = ((u64)c->regs[VCPU_REGS_RCX] << 32) | 1724 c->dst.val = ((u64)c->regs[VCPU_REGS_RCX] << 32) |
1469 (u32) c->regs[VCPU_REGS_RBX]; 1725 (u32) c->regs[VCPU_REGS_RBX];
1470 1726
1471 rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu);
1472 if (rc != X86EMUL_CONTINUE)
1473 return rc;
1474 ctxt->eflags |= EFLG_ZF; 1727 ctxt->eflags |= EFLG_ZF;
1475 } 1728 }
1476 return 0; 1729 return X86EMUL_CONTINUE;
1477} 1730}
1478 1731
1479static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, 1732static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
@@ -1484,14 +1737,14 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
1484 unsigned long cs; 1737 unsigned long cs;
1485 1738
1486 rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes); 1739 rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes);
1487 if (rc) 1740 if (rc != X86EMUL_CONTINUE)
1488 return rc; 1741 return rc;
1489 if (c->op_bytes == 4) 1742 if (c->op_bytes == 4)
1490 c->eip = (u32)c->eip; 1743 c->eip = (u32)c->eip;
1491 rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); 1744 rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
1492 if (rc) 1745 if (rc != X86EMUL_CONTINUE)
1493 return rc; 1746 return rc;
1494 rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, VCPU_SREG_CS); 1747 rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS);
1495 return rc; 1748 return rc;
1496} 1749}
1497 1750
@@ -1544,7 +1797,7 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt,
1544 default: 1797 default:
1545 break; 1798 break;
1546 } 1799 }
1547 return 0; 1800 return X86EMUL_CONTINUE;
1548} 1801}
1549 1802
1550static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) 1803static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask)
@@ -1598,8 +1851,11 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt)
1598 u64 msr_data; 1851 u64 msr_data;
1599 1852
1600 /* syscall is not available in real mode */ 1853 /* syscall is not available in real mode */
1601 if (ctxt->mode == X86EMUL_MODE_REAL || ctxt->mode == X86EMUL_MODE_VM86) 1854 if (ctxt->mode == X86EMUL_MODE_REAL ||
1602 return X86EMUL_UNHANDLEABLE; 1855 ctxt->mode == X86EMUL_MODE_VM86) {
1856 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
1857 return X86EMUL_PROPAGATE_FAULT;
1858 }
1603 1859
1604 setup_syscalls_segments(ctxt, &cs, &ss); 1860 setup_syscalls_segments(ctxt, &cs, &ss);
1605 1861
@@ -1649,14 +1905,16 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt)
1649 /* inject #GP if in real mode */ 1905 /* inject #GP if in real mode */
1650 if (ctxt->mode == X86EMUL_MODE_REAL) { 1906 if (ctxt->mode == X86EMUL_MODE_REAL) {
1651 kvm_inject_gp(ctxt->vcpu, 0); 1907 kvm_inject_gp(ctxt->vcpu, 0);
1652 return X86EMUL_UNHANDLEABLE; 1908 return X86EMUL_PROPAGATE_FAULT;
1653 } 1909 }
1654 1910
1655 /* XXX sysenter/sysexit have not been tested in 64bit mode. 1911 /* XXX sysenter/sysexit have not been tested in 64bit mode.
1656 * Therefore, we inject an #UD. 1912 * Therefore, we inject an #UD.
1657 */ 1913 */
1658 if (ctxt->mode == X86EMUL_MODE_PROT64) 1914 if (ctxt->mode == X86EMUL_MODE_PROT64) {
1659 return X86EMUL_UNHANDLEABLE; 1915 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
1916 return X86EMUL_PROPAGATE_FAULT;
1917 }
1660 1918
1661 setup_syscalls_segments(ctxt, &cs, &ss); 1919 setup_syscalls_segments(ctxt, &cs, &ss);
1662 1920
@@ -1711,7 +1969,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt)
1711 if (ctxt->mode == X86EMUL_MODE_REAL || 1969 if (ctxt->mode == X86EMUL_MODE_REAL ||
1712 ctxt->mode == X86EMUL_MODE_VM86) { 1970 ctxt->mode == X86EMUL_MODE_VM86) {
1713 kvm_inject_gp(ctxt->vcpu, 0); 1971 kvm_inject_gp(ctxt->vcpu, 0);
1714 return X86EMUL_UNHANDLEABLE; 1972 return X86EMUL_PROPAGATE_FAULT;
1715 } 1973 }
1716 1974
1717 setup_syscalls_segments(ctxt, &cs, &ss); 1975 setup_syscalls_segments(ctxt, &cs, &ss);
@@ -1756,7 +2014,8 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt)
1756 return X86EMUL_CONTINUE; 2014 return X86EMUL_CONTINUE;
1757} 2015}
1758 2016
1759static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) 2017static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt,
2018 struct x86_emulate_ops *ops)
1760{ 2019{
1761 int iopl; 2020 int iopl;
1762 if (ctxt->mode == X86EMUL_MODE_REAL) 2021 if (ctxt->mode == X86EMUL_MODE_REAL)
@@ -1764,7 +2023,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
1764 if (ctxt->mode == X86EMUL_MODE_VM86) 2023 if (ctxt->mode == X86EMUL_MODE_VM86)
1765 return true; 2024 return true;
1766 iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; 2025 iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1767 return kvm_x86_ops->get_cpl(ctxt->vcpu) > iopl; 2026 return ops->cpl(ctxt->vcpu) > iopl;
1768} 2027}
1769 2028
1770static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, 2029static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
@@ -1801,22 +2060,419 @@ static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
1801 struct x86_emulate_ops *ops, 2060 struct x86_emulate_ops *ops,
1802 u16 port, u16 len) 2061 u16 port, u16 len)
1803{ 2062{
1804 if (emulator_bad_iopl(ctxt)) 2063 if (emulator_bad_iopl(ctxt, ops))
1805 if (!emulator_io_port_access_allowed(ctxt, ops, port, len)) 2064 if (!emulator_io_port_access_allowed(ctxt, ops, port, len))
1806 return false; 2065 return false;
1807 return true; 2066 return true;
1808} 2067}
1809 2068
2069static u32 get_cached_descriptor_base(struct x86_emulate_ctxt *ctxt,
2070 struct x86_emulate_ops *ops,
2071 int seg)
2072{
2073 struct desc_struct desc;
2074 if (ops->get_cached_descriptor(&desc, seg, ctxt->vcpu))
2075 return get_desc_base(&desc);
2076 else
2077 return ~0;
2078}
2079
2080static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
2081 struct x86_emulate_ops *ops,
2082 struct tss_segment_16 *tss)
2083{
2084 struct decode_cache *c = &ctxt->decode;
2085
2086 tss->ip = c->eip;
2087 tss->flag = ctxt->eflags;
2088 tss->ax = c->regs[VCPU_REGS_RAX];
2089 tss->cx = c->regs[VCPU_REGS_RCX];
2090 tss->dx = c->regs[VCPU_REGS_RDX];
2091 tss->bx = c->regs[VCPU_REGS_RBX];
2092 tss->sp = c->regs[VCPU_REGS_RSP];
2093 tss->bp = c->regs[VCPU_REGS_RBP];
2094 tss->si = c->regs[VCPU_REGS_RSI];
2095 tss->di = c->regs[VCPU_REGS_RDI];
2096
2097 tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu);
2098 tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
2099 tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu);
2100 tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu);
2101 tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu);
2102}
2103
2104static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2105 struct x86_emulate_ops *ops,
2106 struct tss_segment_16 *tss)
2107{
2108 struct decode_cache *c = &ctxt->decode;
2109 int ret;
2110
2111 c->eip = tss->ip;
2112 ctxt->eflags = tss->flag | 2;
2113 c->regs[VCPU_REGS_RAX] = tss->ax;
2114 c->regs[VCPU_REGS_RCX] = tss->cx;
2115 c->regs[VCPU_REGS_RDX] = tss->dx;
2116 c->regs[VCPU_REGS_RBX] = tss->bx;
2117 c->regs[VCPU_REGS_RSP] = tss->sp;
2118 c->regs[VCPU_REGS_RBP] = tss->bp;
2119 c->regs[VCPU_REGS_RSI] = tss->si;
2120 c->regs[VCPU_REGS_RDI] = tss->di;
2121
2122 /*
2123 * SDM says that segment selectors are loaded before segment
2124 * descriptors
2125 */
2126 ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu);
2127 ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu);
2128 ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu);
2129 ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu);
2130 ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu);
2131
2132 /*
2133 * Now load segment descriptors. If fault happenes at this stage
2134 * it is handled in a context of new task
2135 */
2136 ret = load_segment_descriptor(ctxt, ops, tss->ldt, VCPU_SREG_LDTR);
2137 if (ret != X86EMUL_CONTINUE)
2138 return ret;
2139 ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES);
2140 if (ret != X86EMUL_CONTINUE)
2141 return ret;
2142 ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS);
2143 if (ret != X86EMUL_CONTINUE)
2144 return ret;
2145 ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS);
2146 if (ret != X86EMUL_CONTINUE)
2147 return ret;
2148 ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS);
2149 if (ret != X86EMUL_CONTINUE)
2150 return ret;
2151
2152 return X86EMUL_CONTINUE;
2153}
2154
2155static int task_switch_16(struct x86_emulate_ctxt *ctxt,
2156 struct x86_emulate_ops *ops,
2157 u16 tss_selector, u16 old_tss_sel,
2158 ulong old_tss_base, struct desc_struct *new_desc)
2159{
2160 struct tss_segment_16 tss_seg;
2161 int ret;
2162 u32 err, new_tss_base = get_desc_base(new_desc);
2163
2164 ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2165 &err);
2166 if (ret == X86EMUL_PROPAGATE_FAULT) {
2167 /* FIXME: need to provide precise fault address */
2168 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2169 return ret;
2170 }
2171
2172 save_state_to_tss16(ctxt, ops, &tss_seg);
2173
2174 ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2175 &err);
2176 if (ret == X86EMUL_PROPAGATE_FAULT) {
2177 /* FIXME: need to provide precise fault address */
2178 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2179 return ret;
2180 }
2181
2182 ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2183 &err);
2184 if (ret == X86EMUL_PROPAGATE_FAULT) {
2185 /* FIXME: need to provide precise fault address */
2186 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2187 return ret;
2188 }
2189
2190 if (old_tss_sel != 0xffff) {
2191 tss_seg.prev_task_link = old_tss_sel;
2192
2193 ret = ops->write_std(new_tss_base,
2194 &tss_seg.prev_task_link,
2195 sizeof tss_seg.prev_task_link,
2196 ctxt->vcpu, &err);
2197 if (ret == X86EMUL_PROPAGATE_FAULT) {
2198 /* FIXME: need to provide precise fault address */
2199 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2200 return ret;
2201 }
2202 }
2203
2204 return load_state_from_tss16(ctxt, ops, &tss_seg);
2205}
2206
2207static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
2208 struct x86_emulate_ops *ops,
2209 struct tss_segment_32 *tss)
2210{
2211 struct decode_cache *c = &ctxt->decode;
2212
2213 tss->cr3 = ops->get_cr(3, ctxt->vcpu);
2214 tss->eip = c->eip;
2215 tss->eflags = ctxt->eflags;
2216 tss->eax = c->regs[VCPU_REGS_RAX];
2217 tss->ecx = c->regs[VCPU_REGS_RCX];
2218 tss->edx = c->regs[VCPU_REGS_RDX];
2219 tss->ebx = c->regs[VCPU_REGS_RBX];
2220 tss->esp = c->regs[VCPU_REGS_RSP];
2221 tss->ebp = c->regs[VCPU_REGS_RBP];
2222 tss->esi = c->regs[VCPU_REGS_RSI];
2223 tss->edi = c->regs[VCPU_REGS_RDI];
2224
2225 tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu);
2226 tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
2227 tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu);
2228 tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu);
2229 tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu);
2230 tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu);
2231 tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu);
2232}
2233
2234static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2235 struct x86_emulate_ops *ops,
2236 struct tss_segment_32 *tss)
2237{
2238 struct decode_cache *c = &ctxt->decode;
2239 int ret;
2240
2241 ops->set_cr(3, tss->cr3, ctxt->vcpu);
2242 c->eip = tss->eip;
2243 ctxt->eflags = tss->eflags | 2;
2244 c->regs[VCPU_REGS_RAX] = tss->eax;
2245 c->regs[VCPU_REGS_RCX] = tss->ecx;
2246 c->regs[VCPU_REGS_RDX] = tss->edx;
2247 c->regs[VCPU_REGS_RBX] = tss->ebx;
2248 c->regs[VCPU_REGS_RSP] = tss->esp;
2249 c->regs[VCPU_REGS_RBP] = tss->ebp;
2250 c->regs[VCPU_REGS_RSI] = tss->esi;
2251 c->regs[VCPU_REGS_RDI] = tss->edi;
2252
2253 /*
2254 * SDM says that segment selectors are loaded before segment
2255 * descriptors
2256 */
2257 ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu);
2258 ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu);
2259 ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu);
2260 ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu);
2261 ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu);
2262 ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu);
2263 ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu);
2264
2265 /*
2266 * Now load segment descriptors. If fault happenes at this stage
2267 * it is handled in a context of new task
2268 */
2269 ret = load_segment_descriptor(ctxt, ops, tss->ldt_selector, VCPU_SREG_LDTR);
2270 if (ret != X86EMUL_CONTINUE)
2271 return ret;
2272 ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES);
2273 if (ret != X86EMUL_CONTINUE)
2274 return ret;
2275 ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS);
2276 if (ret != X86EMUL_CONTINUE)
2277 return ret;
2278 ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS);
2279 if (ret != X86EMUL_CONTINUE)
2280 return ret;
2281 ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS);
2282 if (ret != X86EMUL_CONTINUE)
2283 return ret;
2284 ret = load_segment_descriptor(ctxt, ops, tss->fs, VCPU_SREG_FS);
2285 if (ret != X86EMUL_CONTINUE)
2286 return ret;
2287 ret = load_segment_descriptor(ctxt, ops, tss->gs, VCPU_SREG_GS);
2288 if (ret != X86EMUL_CONTINUE)
2289 return ret;
2290
2291 return X86EMUL_CONTINUE;
2292}
2293
2294static int task_switch_32(struct x86_emulate_ctxt *ctxt,
2295 struct x86_emulate_ops *ops,
2296 u16 tss_selector, u16 old_tss_sel,
2297 ulong old_tss_base, struct desc_struct *new_desc)
2298{
2299 struct tss_segment_32 tss_seg;
2300 int ret;
2301 u32 err, new_tss_base = get_desc_base(new_desc);
2302
2303 ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2304 &err);
2305 if (ret == X86EMUL_PROPAGATE_FAULT) {
2306 /* FIXME: need to provide precise fault address */
2307 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2308 return ret;
2309 }
2310
2311 save_state_to_tss32(ctxt, ops, &tss_seg);
2312
2313 ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2314 &err);
2315 if (ret == X86EMUL_PROPAGATE_FAULT) {
2316 /* FIXME: need to provide precise fault address */
2317 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2318 return ret;
2319 }
2320
2321 ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2322 &err);
2323 if (ret == X86EMUL_PROPAGATE_FAULT) {
2324 /* FIXME: need to provide precise fault address */
2325 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2326 return ret;
2327 }
2328
2329 if (old_tss_sel != 0xffff) {
2330 tss_seg.prev_task_link = old_tss_sel;
2331
2332 ret = ops->write_std(new_tss_base,
2333 &tss_seg.prev_task_link,
2334 sizeof tss_seg.prev_task_link,
2335 ctxt->vcpu, &err);
2336 if (ret == X86EMUL_PROPAGATE_FAULT) {
2337 /* FIXME: need to provide precise fault address */
2338 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2339 return ret;
2340 }
2341 }
2342
2343 return load_state_from_tss32(ctxt, ops, &tss_seg);
2344}
2345
2346static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2347 struct x86_emulate_ops *ops,
2348 u16 tss_selector, int reason,
2349 bool has_error_code, u32 error_code)
2350{
2351 struct desc_struct curr_tss_desc, next_tss_desc;
2352 int ret;
2353 u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu);
2354 ulong old_tss_base =
2355 get_cached_descriptor_base(ctxt, ops, VCPU_SREG_TR);
2356 u32 desc_limit;
2357
2358 /* FIXME: old_tss_base == ~0 ? */
2359
2360 ret = read_segment_descriptor(ctxt, ops, tss_selector, &next_tss_desc);
2361 if (ret != X86EMUL_CONTINUE)
2362 return ret;
2363 ret = read_segment_descriptor(ctxt, ops, old_tss_sel, &curr_tss_desc);
2364 if (ret != X86EMUL_CONTINUE)
2365 return ret;
2366
2367 /* FIXME: check that next_tss_desc is tss */
2368
2369 if (reason != TASK_SWITCH_IRET) {
2370 if ((tss_selector & 3) > next_tss_desc.dpl ||
2371 ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) {
2372 kvm_inject_gp(ctxt->vcpu, 0);
2373 return X86EMUL_PROPAGATE_FAULT;
2374 }
2375 }
2376
2377 desc_limit = desc_limit_scaled(&next_tss_desc);
2378 if (!next_tss_desc.p ||
2379 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
2380 desc_limit < 0x2b)) {
2381 kvm_queue_exception_e(ctxt->vcpu, TS_VECTOR,
2382 tss_selector & 0xfffc);
2383 return X86EMUL_PROPAGATE_FAULT;
2384 }
2385
2386 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
2387 curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
2388 write_segment_descriptor(ctxt, ops, old_tss_sel,
2389 &curr_tss_desc);
2390 }
2391
2392 if (reason == TASK_SWITCH_IRET)
2393 ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
2394
2395 /* set back link to prev task only if NT bit is set in eflags
2396 note that old_tss_sel is not used afetr this point */
2397 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
2398 old_tss_sel = 0xffff;
2399
2400 if (next_tss_desc.type & 8)
2401 ret = task_switch_32(ctxt, ops, tss_selector, old_tss_sel,
2402 old_tss_base, &next_tss_desc);
2403 else
2404 ret = task_switch_16(ctxt, ops, tss_selector, old_tss_sel,
2405 old_tss_base, &next_tss_desc);
2406 if (ret != X86EMUL_CONTINUE)
2407 return ret;
2408
2409 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
2410 ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
2411
2412 if (reason != TASK_SWITCH_IRET) {
2413 next_tss_desc.type |= (1 << 1); /* set busy flag */
2414 write_segment_descriptor(ctxt, ops, tss_selector,
2415 &next_tss_desc);
2416 }
2417
2418 ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu);
2419 ops->set_cached_descriptor(&next_tss_desc, VCPU_SREG_TR, ctxt->vcpu);
2420 ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu);
2421
2422 if (has_error_code) {
2423 struct decode_cache *c = &ctxt->decode;
2424
2425 c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
2426 c->lock_prefix = 0;
2427 c->src.val = (unsigned long) error_code;
2428 emulate_push(ctxt);
2429 }
2430
2431 return ret;
2432}
2433
2434int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
2435 struct x86_emulate_ops *ops,
2436 u16 tss_selector, int reason,
2437 bool has_error_code, u32 error_code)
2438{
2439 struct decode_cache *c = &ctxt->decode;
2440 int rc;
2441
2442 memset(c, 0, sizeof(struct decode_cache));
2443 c->eip = ctxt->eip;
2444 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
2445 c->dst.type = OP_NONE;
2446
2447 rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason,
2448 has_error_code, error_code);
2449
2450 if (rc == X86EMUL_CONTINUE) {
2451 memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs);
2452 kvm_rip_write(ctxt->vcpu, c->eip);
2453 rc = writeback(ctxt, ops);
2454 }
2455
2456 return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
2457}
2458
2459static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned long base,
2460 int reg, struct operand *op)
2461{
2462 struct decode_cache *c = &ctxt->decode;
2463 int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;
2464
2465 register_address_increment(c, &c->regs[reg], df * op->bytes);
2466 op->ptr = (unsigned long *)register_address(c, base, c->regs[reg]);
2467}
2468
1810int 2469int
1811x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) 2470x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1812{ 2471{
1813 unsigned long memop = 0;
1814 u64 msr_data; 2472 u64 msr_data;
1815 unsigned long saved_eip = 0;
1816 struct decode_cache *c = &ctxt->decode; 2473 struct decode_cache *c = &ctxt->decode;
1817 unsigned int port; 2474 int rc = X86EMUL_CONTINUE;
1818 int io_dir_in; 2475 int saved_dst_type = c->dst.type;
1819 int rc = 0;
1820 2476
1821 ctxt->interruptibility = 0; 2477 ctxt->interruptibility = 0;
1822 2478
@@ -1826,26 +2482,30 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1826 */ 2482 */
1827 2483
1828 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); 2484 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
1829 saved_eip = c->eip; 2485
2486 if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) {
2487 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2488 goto done;
2489 }
1830 2490
1831 /* LOCK prefix is allowed only with some instructions */ 2491 /* LOCK prefix is allowed only with some instructions */
1832 if (c->lock_prefix && !(c->d & Lock)) { 2492 if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) {
1833 kvm_queue_exception(ctxt->vcpu, UD_VECTOR); 2493 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
1834 goto done; 2494 goto done;
1835 } 2495 }
1836 2496
1837 /* Privileged instruction can be executed only in CPL=0 */ 2497 /* Privileged instruction can be executed only in CPL=0 */
1838 if ((c->d & Priv) && kvm_x86_ops->get_cpl(ctxt->vcpu)) { 2498 if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) {
1839 kvm_inject_gp(ctxt->vcpu, 0); 2499 kvm_inject_gp(ctxt->vcpu, 0);
1840 goto done; 2500 goto done;
1841 } 2501 }
1842 2502
1843 if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs))
1844 memop = c->modrm_ea;
1845
1846 if (c->rep_prefix && (c->d & String)) { 2503 if (c->rep_prefix && (c->d & String)) {
2504 ctxt->restart = true;
1847 /* All REP prefixes have the same first termination condition */ 2505 /* All REP prefixes have the same first termination condition */
1848 if (c->regs[VCPU_REGS_RCX] == 0) { 2506 if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) {
2507 string_done:
2508 ctxt->restart = false;
1849 kvm_rip_write(ctxt->vcpu, c->eip); 2509 kvm_rip_write(ctxt->vcpu, c->eip);
1850 goto done; 2510 goto done;
1851 } 2511 }
@@ -1857,25 +2517,18 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1857 * - if REPNE/REPNZ and ZF = 1 then done 2517 * - if REPNE/REPNZ and ZF = 1 then done
1858 */ 2518 */
1859 if ((c->b == 0xa6) || (c->b == 0xa7) || 2519 if ((c->b == 0xa6) || (c->b == 0xa7) ||
1860 (c->b == 0xae) || (c->b == 0xaf)) { 2520 (c->b == 0xae) || (c->b == 0xaf)) {
1861 if ((c->rep_prefix == REPE_PREFIX) && 2521 if ((c->rep_prefix == REPE_PREFIX) &&
1862 ((ctxt->eflags & EFLG_ZF) == 0)) { 2522 ((ctxt->eflags & EFLG_ZF) == 0))
1863 kvm_rip_write(ctxt->vcpu, c->eip); 2523 goto string_done;
1864 goto done;
1865 }
1866 if ((c->rep_prefix == REPNE_PREFIX) && 2524 if ((c->rep_prefix == REPNE_PREFIX) &&
1867 ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) { 2525 ((ctxt->eflags & EFLG_ZF) == EFLG_ZF))
1868 kvm_rip_write(ctxt->vcpu, c->eip); 2526 goto string_done;
1869 goto done;
1870 }
1871 } 2527 }
1872 c->regs[VCPU_REGS_RCX]--; 2528 c->eip = ctxt->eip;
1873 c->eip = kvm_rip_read(ctxt->vcpu);
1874 } 2529 }
1875 2530
1876 if (c->src.type == OP_MEM) { 2531 if (c->src.type == OP_MEM) {
1877 c->src.ptr = (unsigned long *)memop;
1878 c->src.val = 0;
1879 rc = ops->read_emulated((unsigned long)c->src.ptr, 2532 rc = ops->read_emulated((unsigned long)c->src.ptr,
1880 &c->src.val, 2533 &c->src.val,
1881 c->src.bytes, 2534 c->src.bytes,
@@ -1885,29 +2538,25 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1885 c->src.orig_val = c->src.val; 2538 c->src.orig_val = c->src.val;
1886 } 2539 }
1887 2540
2541 if (c->src2.type == OP_MEM) {
2542 rc = ops->read_emulated((unsigned long)c->src2.ptr,
2543 &c->src2.val,
2544 c->src2.bytes,
2545 ctxt->vcpu);
2546 if (rc != X86EMUL_CONTINUE)
2547 goto done;
2548 }
2549
1888 if ((c->d & DstMask) == ImplicitOps) 2550 if ((c->d & DstMask) == ImplicitOps)
1889 goto special_insn; 2551 goto special_insn;
1890 2552
1891 2553
1892 if (c->dst.type == OP_MEM) { 2554 if ((c->dst.type == OP_MEM) && !(c->d & Mov)) {
1893 c->dst.ptr = (unsigned long *)memop; 2555 /* optimisation - avoid slow emulated read if Mov */
1894 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 2556 rc = ops->read_emulated((unsigned long)c->dst.ptr, &c->dst.val,
1895 c->dst.val = 0; 2557 c->dst.bytes, ctxt->vcpu);
1896 if (c->d & BitOp) { 2558 if (rc != X86EMUL_CONTINUE)
1897 unsigned long mask = ~(c->dst.bytes * 8 - 1); 2559 goto done;
1898
1899 c->dst.ptr = (void *)c->dst.ptr +
1900 (c->src.val & mask) / 8;
1901 }
1902 if (!(c->d & Mov)) {
1903 /* optimisation - avoid slow emulated read */
1904 rc = ops->read_emulated((unsigned long)c->dst.ptr,
1905 &c->dst.val,
1906 c->dst.bytes,
1907 ctxt->vcpu);
1908 if (rc != X86EMUL_CONTINUE)
1909 goto done;
1910 }
1911 } 2560 }
1912 c->dst.orig_val = c->dst.val; 2561 c->dst.orig_val = c->dst.val;
1913 2562
@@ -1926,7 +2575,7 @@ special_insn:
1926 break; 2575 break;
1927 case 0x07: /* pop es */ 2576 case 0x07: /* pop es */
1928 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); 2577 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES);
1929 if (rc != 0) 2578 if (rc != X86EMUL_CONTINUE)
1930 goto done; 2579 goto done;
1931 break; 2580 break;
1932 case 0x08 ... 0x0d: 2581 case 0x08 ... 0x0d:
@@ -1945,7 +2594,7 @@ special_insn:
1945 break; 2594 break;
1946 case 0x17: /* pop ss */ 2595 case 0x17: /* pop ss */
1947 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); 2596 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS);
1948 if (rc != 0) 2597 if (rc != X86EMUL_CONTINUE)
1949 goto done; 2598 goto done;
1950 break; 2599 break;
1951 case 0x18 ... 0x1d: 2600 case 0x18 ... 0x1d:
@@ -1957,7 +2606,7 @@ special_insn:
1957 break; 2606 break;
1958 case 0x1f: /* pop ds */ 2607 case 0x1f: /* pop ds */
1959 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); 2608 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS);
1960 if (rc != 0) 2609 if (rc != X86EMUL_CONTINUE)
1961 goto done; 2610 goto done;
1962 break; 2611 break;
1963 case 0x20 ... 0x25: 2612 case 0x20 ... 0x25:
@@ -1988,7 +2637,7 @@ special_insn:
1988 case 0x58 ... 0x5f: /* pop reg */ 2637 case 0x58 ... 0x5f: /* pop reg */
1989 pop_instruction: 2638 pop_instruction:
1990 rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes); 2639 rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes);
1991 if (rc != 0) 2640 if (rc != X86EMUL_CONTINUE)
1992 goto done; 2641 goto done;
1993 break; 2642 break;
1994 case 0x60: /* pusha */ 2643 case 0x60: /* pusha */
@@ -1996,7 +2645,7 @@ special_insn:
1996 break; 2645 break;
1997 case 0x61: /* popa */ 2646 case 0x61: /* popa */
1998 rc = emulate_popa(ctxt, ops); 2647 rc = emulate_popa(ctxt, ops);
1999 if (rc != 0) 2648 if (rc != X86EMUL_CONTINUE)
2000 goto done; 2649 goto done;
2001 break; 2650 break;
2002 case 0x63: /* movsxd */ 2651 case 0x63: /* movsxd */
@@ -2010,47 +2659,29 @@ special_insn:
2010 break; 2659 break;
2011 case 0x6c: /* insb */ 2660 case 0x6c: /* insb */
2012 case 0x6d: /* insw/insd */ 2661 case 0x6d: /* insw/insd */
2662 c->dst.bytes = min(c->dst.bytes, 4u);
2013 if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], 2663 if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
2014 (c->d & ByteOp) ? 1 : c->op_bytes)) { 2664 c->dst.bytes)) {
2015 kvm_inject_gp(ctxt->vcpu, 0); 2665 kvm_inject_gp(ctxt->vcpu, 0);
2016 goto done; 2666 goto done;
2017 } 2667 }
2018 if (kvm_emulate_pio_string(ctxt->vcpu, 2668 if (!pio_in_emulated(ctxt, ops, c->dst.bytes,
2019 1, 2669 c->regs[VCPU_REGS_RDX], &c->dst.val))
2020 (c->d & ByteOp) ? 1 : c->op_bytes, 2670 goto done; /* IO is needed, skip writeback */
2021 c->rep_prefix ? 2671 break;
2022 address_mask(c, c->regs[VCPU_REGS_RCX]) : 1,
2023 (ctxt->eflags & EFLG_DF),
2024 register_address(c, es_base(ctxt),
2025 c->regs[VCPU_REGS_RDI]),
2026 c->rep_prefix,
2027 c->regs[VCPU_REGS_RDX]) == 0) {
2028 c->eip = saved_eip;
2029 return -1;
2030 }
2031 return 0;
2032 case 0x6e: /* outsb */ 2672 case 0x6e: /* outsb */
2033 case 0x6f: /* outsw/outsd */ 2673 case 0x6f: /* outsw/outsd */
2674 c->src.bytes = min(c->src.bytes, 4u);
2034 if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], 2675 if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
2035 (c->d & ByteOp) ? 1 : c->op_bytes)) { 2676 c->src.bytes)) {
2036 kvm_inject_gp(ctxt->vcpu, 0); 2677 kvm_inject_gp(ctxt->vcpu, 0);
2037 goto done; 2678 goto done;
2038 } 2679 }
2039 if (kvm_emulate_pio_string(ctxt->vcpu, 2680 ops->pio_out_emulated(c->src.bytes, c->regs[VCPU_REGS_RDX],
2040 0, 2681 &c->src.val, 1, ctxt->vcpu);
2041 (c->d & ByteOp) ? 1 : c->op_bytes, 2682
2042 c->rep_prefix ? 2683 c->dst.type = OP_NONE; /* nothing to writeback */
2043 address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, 2684 break;
2044 (ctxt->eflags & EFLG_DF),
2045 register_address(c,
2046 seg_override_base(ctxt, c),
2047 c->regs[VCPU_REGS_RSI]),
2048 c->rep_prefix,
2049 c->regs[VCPU_REGS_RDX]) == 0) {
2050 c->eip = saved_eip;
2051 return -1;
2052 }
2053 return 0;
2054 case 0x70 ... 0x7f: /* jcc (short) */ 2685 case 0x70 ... 0x7f: /* jcc (short) */
2055 if (test_cc(c->b, ctxt->eflags)) 2686 if (test_cc(c->b, ctxt->eflags))
2056 jmp_rel(c, c->src.val); 2687 jmp_rel(c, c->src.val);
@@ -2107,12 +2738,11 @@ special_insn:
2107 case 0x8c: { /* mov r/m, sreg */ 2738 case 0x8c: { /* mov r/m, sreg */
2108 struct kvm_segment segreg; 2739 struct kvm_segment segreg;
2109 2740
2110 if (c->modrm_reg <= 5) 2741 if (c->modrm_reg <= VCPU_SREG_GS)
2111 kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg); 2742 kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg);
2112 else { 2743 else {
2113 printk(KERN_INFO "0x8c: Invalid segreg in modrm byte 0x%02x\n", 2744 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2114 c->modrm); 2745 goto done;
2115 goto cannot_emulate;
2116 } 2746 }
2117 c->dst.val = segreg.selector; 2747 c->dst.val = segreg.selector;
2118 break; 2748 break;
@@ -2132,16 +2762,16 @@ special_insn:
2132 } 2762 }
2133 2763
2134 if (c->modrm_reg == VCPU_SREG_SS) 2764 if (c->modrm_reg == VCPU_SREG_SS)
2135 toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS); 2765 toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_MOV_SS);
2136 2766
2137 rc = kvm_load_segment_descriptor(ctxt->vcpu, sel, c->modrm_reg); 2767 rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg);
2138 2768
2139 c->dst.type = OP_NONE; /* Disable writeback. */ 2769 c->dst.type = OP_NONE; /* Disable writeback. */
2140 break; 2770 break;
2141 } 2771 }
2142 case 0x8f: /* pop (sole member of Grp1a) */ 2772 case 0x8f: /* pop (sole member of Grp1a) */
2143 rc = emulate_grp1a(ctxt, ops); 2773 rc = emulate_grp1a(ctxt, ops);
2144 if (rc != 0) 2774 if (rc != X86EMUL_CONTINUE)
2145 goto done; 2775 goto done;
2146 break; 2776 break;
2147 case 0x90: /* nop / xchg r8,rax */ 2777 case 0x90: /* nop / xchg r8,rax */
@@ -2175,89 +2805,16 @@ special_insn:
2175 c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX]; 2805 c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX];
2176 break; 2806 break;
2177 case 0xa4 ... 0xa5: /* movs */ 2807 case 0xa4 ... 0xa5: /* movs */
2178 c->dst.type = OP_MEM; 2808 goto mov;
2179 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2180 c->dst.ptr = (unsigned long *)register_address(c,
2181 es_base(ctxt),
2182 c->regs[VCPU_REGS_RDI]);
2183 rc = ops->read_emulated(register_address(c,
2184 seg_override_base(ctxt, c),
2185 c->regs[VCPU_REGS_RSI]),
2186 &c->dst.val,
2187 c->dst.bytes, ctxt->vcpu);
2188 if (rc != X86EMUL_CONTINUE)
2189 goto done;
2190 register_address_increment(c, &c->regs[VCPU_REGS_RSI],
2191 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
2192 : c->dst.bytes);
2193 register_address_increment(c, &c->regs[VCPU_REGS_RDI],
2194 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
2195 : c->dst.bytes);
2196 break;
2197 case 0xa6 ... 0xa7: /* cmps */ 2809 case 0xa6 ... 0xa7: /* cmps */
2198 c->src.type = OP_NONE; /* Disable writeback. */
2199 c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2200 c->src.ptr = (unsigned long *)register_address(c,
2201 seg_override_base(ctxt, c),
2202 c->regs[VCPU_REGS_RSI]);
2203 rc = ops->read_emulated((unsigned long)c->src.ptr,
2204 &c->src.val,
2205 c->src.bytes,
2206 ctxt->vcpu);
2207 if (rc != X86EMUL_CONTINUE)
2208 goto done;
2209
2210 c->dst.type = OP_NONE; /* Disable writeback. */ 2810 c->dst.type = OP_NONE; /* Disable writeback. */
2211 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2212 c->dst.ptr = (unsigned long *)register_address(c,
2213 es_base(ctxt),
2214 c->regs[VCPU_REGS_RDI]);
2215 rc = ops->read_emulated((unsigned long)c->dst.ptr,
2216 &c->dst.val,
2217 c->dst.bytes,
2218 ctxt->vcpu);
2219 if (rc != X86EMUL_CONTINUE)
2220 goto done;
2221
2222 DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); 2811 DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr);
2223 2812 goto cmp;
2224 emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
2225
2226 register_address_increment(c, &c->regs[VCPU_REGS_RSI],
2227 (ctxt->eflags & EFLG_DF) ? -c->src.bytes
2228 : c->src.bytes);
2229 register_address_increment(c, &c->regs[VCPU_REGS_RDI],
2230 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
2231 : c->dst.bytes);
2232
2233 break;
2234 case 0xaa ... 0xab: /* stos */ 2813 case 0xaa ... 0xab: /* stos */
2235 c->dst.type = OP_MEM;
2236 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2237 c->dst.ptr = (unsigned long *)register_address(c,
2238 es_base(ctxt),
2239 c->regs[VCPU_REGS_RDI]);
2240 c->dst.val = c->regs[VCPU_REGS_RAX]; 2814 c->dst.val = c->regs[VCPU_REGS_RAX];
2241 register_address_increment(c, &c->regs[VCPU_REGS_RDI],
2242 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
2243 : c->dst.bytes);
2244 break; 2815 break;
2245 case 0xac ... 0xad: /* lods */ 2816 case 0xac ... 0xad: /* lods */
2246 c->dst.type = OP_REG; 2817 goto mov;
2247 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2248 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
2249 rc = ops->read_emulated(register_address(c,
2250 seg_override_base(ctxt, c),
2251 c->regs[VCPU_REGS_RSI]),
2252 &c->dst.val,
2253 c->dst.bytes,
2254 ctxt->vcpu);
2255 if (rc != X86EMUL_CONTINUE)
2256 goto done;
2257 register_address_increment(c, &c->regs[VCPU_REGS_RSI],
2258 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
2259 : c->dst.bytes);
2260 break;
2261 case 0xae ... 0xaf: /* scas */ 2818 case 0xae ... 0xaf: /* scas */
2262 DPRINTF("Urk! I don't handle SCAS.\n"); 2819 DPRINTF("Urk! I don't handle SCAS.\n");
2263 goto cannot_emulate; 2820 goto cannot_emulate;
@@ -2277,7 +2834,7 @@ special_insn:
2277 break; 2834 break;
2278 case 0xcb: /* ret far */ 2835 case 0xcb: /* ret far */
2279 rc = emulate_ret_far(ctxt, ops); 2836 rc = emulate_ret_far(ctxt, ops);
2280 if (rc) 2837 if (rc != X86EMUL_CONTINUE)
2281 goto done; 2838 goto done;
2282 break; 2839 break;
2283 case 0xd0 ... 0xd1: /* Grp2 */ 2840 case 0xd0 ... 0xd1: /* Grp2 */
@@ -2290,14 +2847,10 @@ special_insn:
2290 break; 2847 break;
2291 case 0xe4: /* inb */ 2848 case 0xe4: /* inb */
2292 case 0xe5: /* in */ 2849 case 0xe5: /* in */
2293 port = c->src.val; 2850 goto do_io_in;
2294 io_dir_in = 1;
2295 goto do_io;
2296 case 0xe6: /* outb */ 2851 case 0xe6: /* outb */
2297 case 0xe7: /* out */ 2852 case 0xe7: /* out */
2298 port = c->src.val; 2853 goto do_io_out;
2299 io_dir_in = 0;
2300 goto do_io;
2301 case 0xe8: /* call (near) */ { 2854 case 0xe8: /* call (near) */ {
2302 long int rel = c->src.val; 2855 long int rel = c->src.val;
2303 c->src.val = (unsigned long) c->eip; 2856 c->src.val = (unsigned long) c->eip;
@@ -2308,8 +2861,9 @@ special_insn:
2308 case 0xe9: /* jmp rel */ 2861 case 0xe9: /* jmp rel */
2309 goto jmp; 2862 goto jmp;
2310 case 0xea: /* jmp far */ 2863 case 0xea: /* jmp far */
2311 if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, 2864 jump_far:
2312 VCPU_SREG_CS)) 2865 if (load_segment_descriptor(ctxt, ops, c->src2.val,
2866 VCPU_SREG_CS))
2313 goto done; 2867 goto done;
2314 2868
2315 c->eip = c->src.val; 2869 c->eip = c->src.val;
@@ -2321,25 +2875,29 @@ special_insn:
2321 break; 2875 break;
2322 case 0xec: /* in al,dx */ 2876 case 0xec: /* in al,dx */
2323 case 0xed: /* in (e/r)ax,dx */ 2877 case 0xed: /* in (e/r)ax,dx */
2324 port = c->regs[VCPU_REGS_RDX]; 2878 c->src.val = c->regs[VCPU_REGS_RDX];
2325 io_dir_in = 1; 2879 do_io_in:
2326 goto do_io; 2880 c->dst.bytes = min(c->dst.bytes, 4u);
2881 if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) {
2882 kvm_inject_gp(ctxt->vcpu, 0);
2883 goto done;
2884 }
2885 if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val,
2886 &c->dst.val))
2887 goto done; /* IO is needed */
2888 break;
2327 case 0xee: /* out al,dx */ 2889 case 0xee: /* out al,dx */
2328 case 0xef: /* out (e/r)ax,dx */ 2890 case 0xef: /* out (e/r)ax,dx */
2329 port = c->regs[VCPU_REGS_RDX]; 2891 c->src.val = c->regs[VCPU_REGS_RDX];
2330 io_dir_in = 0; 2892 do_io_out:
2331 do_io: 2893 c->dst.bytes = min(c->dst.bytes, 4u);
2332 if (!emulator_io_permited(ctxt, ops, port, 2894 if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) {
2333 (c->d & ByteOp) ? 1 : c->op_bytes)) {
2334 kvm_inject_gp(ctxt->vcpu, 0); 2895 kvm_inject_gp(ctxt->vcpu, 0);
2335 goto done; 2896 goto done;
2336 } 2897 }
2337 if (kvm_emulate_pio(ctxt->vcpu, io_dir_in, 2898 ops->pio_out_emulated(c->dst.bytes, c->src.val, &c->dst.val, 1,
2338 (c->d & ByteOp) ? 1 : c->op_bytes, 2899 ctxt->vcpu);
2339 port) != 0) { 2900 c->dst.type = OP_NONE; /* Disable writeback. */
2340 c->eip = saved_eip;
2341 goto cannot_emulate;
2342 }
2343 break; 2901 break;
2344 case 0xf4: /* hlt */ 2902 case 0xf4: /* hlt */
2345 ctxt->vcpu->arch.halt_request = 1; 2903 ctxt->vcpu->arch.halt_request = 1;
@@ -2350,16 +2908,15 @@ special_insn:
2350 c->dst.type = OP_NONE; /* Disable writeback. */ 2908 c->dst.type = OP_NONE; /* Disable writeback. */
2351 break; 2909 break;
2352 case 0xf6 ... 0xf7: /* Grp3 */ 2910 case 0xf6 ... 0xf7: /* Grp3 */
2353 rc = emulate_grp3(ctxt, ops); 2911 if (!emulate_grp3(ctxt, ops))
2354 if (rc != 0) 2912 goto cannot_emulate;
2355 goto done;
2356 break; 2913 break;
2357 case 0xf8: /* clc */ 2914 case 0xf8: /* clc */
2358 ctxt->eflags &= ~EFLG_CF; 2915 ctxt->eflags &= ~EFLG_CF;
2359 c->dst.type = OP_NONE; /* Disable writeback. */ 2916 c->dst.type = OP_NONE; /* Disable writeback. */
2360 break; 2917 break;
2361 case 0xfa: /* cli */ 2918 case 0xfa: /* cli */
2362 if (emulator_bad_iopl(ctxt)) 2919 if (emulator_bad_iopl(ctxt, ops))
2363 kvm_inject_gp(ctxt->vcpu, 0); 2920 kvm_inject_gp(ctxt->vcpu, 0);
2364 else { 2921 else {
2365 ctxt->eflags &= ~X86_EFLAGS_IF; 2922 ctxt->eflags &= ~X86_EFLAGS_IF;
@@ -2367,10 +2924,10 @@ special_insn:
2367 } 2924 }
2368 break; 2925 break;
2369 case 0xfb: /* sti */ 2926 case 0xfb: /* sti */
2370 if (emulator_bad_iopl(ctxt)) 2927 if (emulator_bad_iopl(ctxt, ops))
2371 kvm_inject_gp(ctxt->vcpu, 0); 2928 kvm_inject_gp(ctxt->vcpu, 0);
2372 else { 2929 else {
2373 toggle_interruptibility(ctxt, X86_SHADOW_INT_STI); 2930 toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_STI);
2374 ctxt->eflags |= X86_EFLAGS_IF; 2931 ctxt->eflags |= X86_EFLAGS_IF;
2375 c->dst.type = OP_NONE; /* Disable writeback. */ 2932 c->dst.type = OP_NONE; /* Disable writeback. */
2376 } 2933 }
@@ -2383,28 +2940,55 @@ special_insn:
2383 ctxt->eflags |= EFLG_DF; 2940 ctxt->eflags |= EFLG_DF;
2384 c->dst.type = OP_NONE; /* Disable writeback. */ 2941 c->dst.type = OP_NONE; /* Disable writeback. */
2385 break; 2942 break;
2386 case 0xfe ... 0xff: /* Grp4/Grp5 */ 2943 case 0xfe: /* Grp4 */
2944 grp45:
2387 rc = emulate_grp45(ctxt, ops); 2945 rc = emulate_grp45(ctxt, ops);
2388 if (rc != 0) 2946 if (rc != X86EMUL_CONTINUE)
2389 goto done; 2947 goto done;
2390 break; 2948 break;
2949 case 0xff: /* Grp5 */
2950 if (c->modrm_reg == 5)
2951 goto jump_far;
2952 goto grp45;
2391 } 2953 }
2392 2954
2393writeback: 2955writeback:
2394 rc = writeback(ctxt, ops); 2956 rc = writeback(ctxt, ops);
2395 if (rc != 0) 2957 if (rc != X86EMUL_CONTINUE)
2396 goto done; 2958 goto done;
2397 2959
2960 /*
2961 * restore dst type in case the decoding will be reused
2962 * (happens for string instruction )
2963 */
2964 c->dst.type = saved_dst_type;
2965
2966 if ((c->d & SrcMask) == SrcSI)
2967 string_addr_inc(ctxt, seg_override_base(ctxt, c), VCPU_REGS_RSI,
2968 &c->src);
2969
2970 if ((c->d & DstMask) == DstDI)
2971 string_addr_inc(ctxt, es_base(ctxt), VCPU_REGS_RDI, &c->dst);
2972
2973 if (c->rep_prefix && (c->d & String)) {
2974 struct read_cache *rc = &ctxt->decode.io_read;
2975 register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1);
2976 /*
2977 * Re-enter guest when pio read ahead buffer is empty or,
2978 * if it is not used, after each 1024 iteration.
2979 */
2980 if ((rc->end == 0 && !(c->regs[VCPU_REGS_RCX] & 0x3ff)) ||
2981 (rc->end != 0 && rc->end == rc->pos))
2982 ctxt->restart = false;
2983 }
2984
2398 /* Commit shadow register state. */ 2985 /* Commit shadow register state. */
2399 memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); 2986 memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs);
2400 kvm_rip_write(ctxt->vcpu, c->eip); 2987 kvm_rip_write(ctxt->vcpu, c->eip);
2988 ops->set_rflags(ctxt->vcpu, ctxt->eflags);
2401 2989
2402done: 2990done:
2403 if (rc == X86EMUL_UNHANDLEABLE) { 2991 return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
2404 c->eip = saved_eip;
2405 return -1;
2406 }
2407 return 0;
2408 2992
2409twobyte_insn: 2993twobyte_insn:
2410 switch (c->b) { 2994 switch (c->b) {
@@ -2418,18 +3002,18 @@ twobyte_insn:
2418 goto cannot_emulate; 3002 goto cannot_emulate;
2419 3003
2420 rc = kvm_fix_hypercall(ctxt->vcpu); 3004 rc = kvm_fix_hypercall(ctxt->vcpu);
2421 if (rc) 3005 if (rc != X86EMUL_CONTINUE)
2422 goto done; 3006 goto done;
2423 3007
2424 /* Let the processor re-execute the fixed hypercall */ 3008 /* Let the processor re-execute the fixed hypercall */
2425 c->eip = kvm_rip_read(ctxt->vcpu); 3009 c->eip = ctxt->eip;
2426 /* Disable writeback. */ 3010 /* Disable writeback. */
2427 c->dst.type = OP_NONE; 3011 c->dst.type = OP_NONE;
2428 break; 3012 break;
2429 case 2: /* lgdt */ 3013 case 2: /* lgdt */
2430 rc = read_descriptor(ctxt, ops, c->src.ptr, 3014 rc = read_descriptor(ctxt, ops, c->src.ptr,
2431 &size, &address, c->op_bytes); 3015 &size, &address, c->op_bytes);
2432 if (rc) 3016 if (rc != X86EMUL_CONTINUE)
2433 goto done; 3017 goto done;
2434 realmode_lgdt(ctxt->vcpu, size, address); 3018 realmode_lgdt(ctxt->vcpu, size, address);
2435 /* Disable writeback. */ 3019 /* Disable writeback. */
@@ -2440,7 +3024,7 @@ twobyte_insn:
2440 switch (c->modrm_rm) { 3024 switch (c->modrm_rm) {
2441 case 1: 3025 case 1:
2442 rc = kvm_fix_hypercall(ctxt->vcpu); 3026 rc = kvm_fix_hypercall(ctxt->vcpu);
2443 if (rc) 3027 if (rc != X86EMUL_CONTINUE)
2444 goto done; 3028 goto done;
2445 break; 3029 break;
2446 default: 3030 default:
@@ -2450,7 +3034,7 @@ twobyte_insn:
2450 rc = read_descriptor(ctxt, ops, c->src.ptr, 3034 rc = read_descriptor(ctxt, ops, c->src.ptr,
2451 &size, &address, 3035 &size, &address,
2452 c->op_bytes); 3036 c->op_bytes);
2453 if (rc) 3037 if (rc != X86EMUL_CONTINUE)
2454 goto done; 3038 goto done;
2455 realmode_lidt(ctxt->vcpu, size, address); 3039 realmode_lidt(ctxt->vcpu, size, address);
2456 } 3040 }
@@ -2459,15 +3043,18 @@ twobyte_insn:
2459 break; 3043 break;
2460 case 4: /* smsw */ 3044 case 4: /* smsw */
2461 c->dst.bytes = 2; 3045 c->dst.bytes = 2;
2462 c->dst.val = realmode_get_cr(ctxt->vcpu, 0); 3046 c->dst.val = ops->get_cr(0, ctxt->vcpu);
2463 break; 3047 break;
2464 case 6: /* lmsw */ 3048 case 6: /* lmsw */
2465 realmode_lmsw(ctxt->vcpu, (u16)c->src.val, 3049 ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0ful) |
2466 &ctxt->eflags); 3050 (c->src.val & 0x0f), ctxt->vcpu);
2467 c->dst.type = OP_NONE; 3051 c->dst.type = OP_NONE;
2468 break; 3052 break;
3053 case 5: /* not defined */
3054 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
3055 goto done;
2469 case 7: /* invlpg*/ 3056 case 7: /* invlpg*/
2470 emulate_invlpg(ctxt->vcpu, memop); 3057 emulate_invlpg(ctxt->vcpu, c->modrm_ea);
2471 /* Disable writeback. */ 3058 /* Disable writeback. */
2472 c->dst.type = OP_NONE; 3059 c->dst.type = OP_NONE;
2473 break; 3060 break;
@@ -2493,54 +3080,54 @@ twobyte_insn:
2493 c->dst.type = OP_NONE; 3080 c->dst.type = OP_NONE;
2494 break; 3081 break;
2495 case 0x20: /* mov cr, reg */ 3082 case 0x20: /* mov cr, reg */
2496 if (c->modrm_mod != 3) 3083 switch (c->modrm_reg) {
2497 goto cannot_emulate; 3084 case 1:
2498 c->regs[c->modrm_rm] = 3085 case 5 ... 7:
2499 realmode_get_cr(ctxt->vcpu, c->modrm_reg); 3086 case 9 ... 15:
3087 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
3088 goto done;
3089 }
3090 c->regs[c->modrm_rm] = ops->get_cr(c->modrm_reg, ctxt->vcpu);
2500 c->dst.type = OP_NONE; /* no writeback */ 3091 c->dst.type = OP_NONE; /* no writeback */
2501 break; 3092 break;
2502 case 0x21: /* mov from dr to reg */ 3093 case 0x21: /* mov from dr to reg */
2503 if (c->modrm_mod != 3) 3094 if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
2504 goto cannot_emulate; 3095 (c->modrm_reg == 4 || c->modrm_reg == 5)) {
2505 rc = emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]); 3096 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2506 if (rc) 3097 goto done;
2507 goto cannot_emulate; 3098 }
3099 emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]);
2508 c->dst.type = OP_NONE; /* no writeback */ 3100 c->dst.type = OP_NONE; /* no writeback */
2509 break; 3101 break;
2510 case 0x22: /* mov reg, cr */ 3102 case 0x22: /* mov reg, cr */
2511 if (c->modrm_mod != 3) 3103 ops->set_cr(c->modrm_reg, c->modrm_val, ctxt->vcpu);
2512 goto cannot_emulate;
2513 realmode_set_cr(ctxt->vcpu,
2514 c->modrm_reg, c->modrm_val, &ctxt->eflags);
2515 c->dst.type = OP_NONE; 3104 c->dst.type = OP_NONE;
2516 break; 3105 break;
2517 case 0x23: /* mov from reg to dr */ 3106 case 0x23: /* mov from reg to dr */
2518 if (c->modrm_mod != 3) 3107 if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
2519 goto cannot_emulate; 3108 (c->modrm_reg == 4 || c->modrm_reg == 5)) {
2520 rc = emulator_set_dr(ctxt, c->modrm_reg, 3109 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2521 c->regs[c->modrm_rm]); 3110 goto done;
2522 if (rc) 3111 }
2523 goto cannot_emulate; 3112 emulator_set_dr(ctxt, c->modrm_reg, c->regs[c->modrm_rm]);
2524 c->dst.type = OP_NONE; /* no writeback */ 3113 c->dst.type = OP_NONE; /* no writeback */
2525 break; 3114 break;
2526 case 0x30: 3115 case 0x30:
2527 /* wrmsr */ 3116 /* wrmsr */
2528 msr_data = (u32)c->regs[VCPU_REGS_RAX] 3117 msr_data = (u32)c->regs[VCPU_REGS_RAX]
2529 | ((u64)c->regs[VCPU_REGS_RDX] << 32); 3118 | ((u64)c->regs[VCPU_REGS_RDX] << 32);
2530 rc = kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data); 3119 if (kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) {
2531 if (rc) {
2532 kvm_inject_gp(ctxt->vcpu, 0); 3120 kvm_inject_gp(ctxt->vcpu, 0);
2533 c->eip = kvm_rip_read(ctxt->vcpu); 3121 goto done;
2534 } 3122 }
2535 rc = X86EMUL_CONTINUE; 3123 rc = X86EMUL_CONTINUE;
2536 c->dst.type = OP_NONE; 3124 c->dst.type = OP_NONE;
2537 break; 3125 break;
2538 case 0x32: 3126 case 0x32:
2539 /* rdmsr */ 3127 /* rdmsr */
2540 rc = kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data); 3128 if (kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) {
2541 if (rc) {
2542 kvm_inject_gp(ctxt->vcpu, 0); 3129 kvm_inject_gp(ctxt->vcpu, 0);
2543 c->eip = kvm_rip_read(ctxt->vcpu); 3130 goto done;
2544 } else { 3131 } else {
2545 c->regs[VCPU_REGS_RAX] = (u32)msr_data; 3132 c->regs[VCPU_REGS_RAX] = (u32)msr_data;
2546 c->regs[VCPU_REGS_RDX] = msr_data >> 32; 3133 c->regs[VCPU_REGS_RDX] = msr_data >> 32;
@@ -2577,7 +3164,7 @@ twobyte_insn:
2577 break; 3164 break;
2578 case 0xa1: /* pop fs */ 3165 case 0xa1: /* pop fs */
2579 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); 3166 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS);
2580 if (rc != 0) 3167 if (rc != X86EMUL_CONTINUE)
2581 goto done; 3168 goto done;
2582 break; 3169 break;
2583 case 0xa3: 3170 case 0xa3:
@@ -2596,7 +3183,7 @@ twobyte_insn:
2596 break; 3183 break;
2597 case 0xa9: /* pop gs */ 3184 case 0xa9: /* pop gs */
2598 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); 3185 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS);
2599 if (rc != 0) 3186 if (rc != X86EMUL_CONTINUE)
2600 goto done; 3187 goto done;
2601 break; 3188 break;
2602 case 0xab: 3189 case 0xab:
@@ -2668,16 +3255,14 @@ twobyte_insn:
2668 (u64) c->src.val; 3255 (u64) c->src.val;
2669 break; 3256 break;
2670 case 0xc7: /* Grp9 (cmpxchg8b) */ 3257 case 0xc7: /* Grp9 (cmpxchg8b) */
2671 rc = emulate_grp9(ctxt, ops, memop); 3258 rc = emulate_grp9(ctxt, ops);
2672 if (rc != 0) 3259 if (rc != X86EMUL_CONTINUE)
2673 goto done; 3260 goto done;
2674 c->dst.type = OP_NONE;
2675 break; 3261 break;
2676 } 3262 }
2677 goto writeback; 3263 goto writeback;
2678 3264
2679cannot_emulate: 3265cannot_emulate:
2680 DPRINTF("Cannot emulate %02x\n", c->b); 3266 DPRINTF("Cannot emulate %02x\n", c->b);
2681 c->eip = saved_eip;
2682 return -1; 3267 return -1;
2683} 3268}
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index a790fa128a9f..93825ff3338f 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -33,6 +33,29 @@
33#include <linux/kvm_host.h> 33#include <linux/kvm_host.h>
34#include "trace.h" 34#include "trace.h"
35 35
36static void pic_lock(struct kvm_pic *s)
37 __acquires(&s->lock)
38{
39 raw_spin_lock(&s->lock);
40}
41
42static void pic_unlock(struct kvm_pic *s)
43 __releases(&s->lock)
44{
45 bool wakeup = s->wakeup_needed;
46 struct kvm_vcpu *vcpu;
47
48 s->wakeup_needed = false;
49
50 raw_spin_unlock(&s->lock);
51
52 if (wakeup) {
53 vcpu = s->kvm->bsp_vcpu;
54 if (vcpu)
55 kvm_vcpu_kick(vcpu);
56 }
57}
58
36static void pic_clear_isr(struct kvm_kpic_state *s, int irq) 59static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
37{ 60{
38 s->isr &= ~(1 << irq); 61 s->isr &= ~(1 << irq);
@@ -45,19 +68,19 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
45 * Other interrupt may be delivered to PIC while lock is dropped but 68 * Other interrupt may be delivered to PIC while lock is dropped but
46 * it should be safe since PIC state is already updated at this stage. 69 * it should be safe since PIC state is already updated at this stage.
47 */ 70 */
48 raw_spin_unlock(&s->pics_state->lock); 71 pic_unlock(s->pics_state);
49 kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); 72 kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq);
50 raw_spin_lock(&s->pics_state->lock); 73 pic_lock(s->pics_state);
51} 74}
52 75
53void kvm_pic_clear_isr_ack(struct kvm *kvm) 76void kvm_pic_clear_isr_ack(struct kvm *kvm)
54{ 77{
55 struct kvm_pic *s = pic_irqchip(kvm); 78 struct kvm_pic *s = pic_irqchip(kvm);
56 79
57 raw_spin_lock(&s->lock); 80 pic_lock(s);
58 s->pics[0].isr_ack = 0xff; 81 s->pics[0].isr_ack = 0xff;
59 s->pics[1].isr_ack = 0xff; 82 s->pics[1].isr_ack = 0xff;
60 raw_spin_unlock(&s->lock); 83 pic_unlock(s);
61} 84}
62 85
63/* 86/*
@@ -158,9 +181,9 @@ static void pic_update_irq(struct kvm_pic *s)
158 181
159void kvm_pic_update_irq(struct kvm_pic *s) 182void kvm_pic_update_irq(struct kvm_pic *s)
160{ 183{
161 raw_spin_lock(&s->lock); 184 pic_lock(s);
162 pic_update_irq(s); 185 pic_update_irq(s);
163 raw_spin_unlock(&s->lock); 186 pic_unlock(s);
164} 187}
165 188
166int kvm_pic_set_irq(void *opaque, int irq, int level) 189int kvm_pic_set_irq(void *opaque, int irq, int level)
@@ -168,14 +191,14 @@ int kvm_pic_set_irq(void *opaque, int irq, int level)
168 struct kvm_pic *s = opaque; 191 struct kvm_pic *s = opaque;
169 int ret = -1; 192 int ret = -1;
170 193
171 raw_spin_lock(&s->lock); 194 pic_lock(s);
172 if (irq >= 0 && irq < PIC_NUM_PINS) { 195 if (irq >= 0 && irq < PIC_NUM_PINS) {
173 ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); 196 ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
174 pic_update_irq(s); 197 pic_update_irq(s);
175 trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, 198 trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr,
176 s->pics[irq >> 3].imr, ret == 0); 199 s->pics[irq >> 3].imr, ret == 0);
177 } 200 }
178 raw_spin_unlock(&s->lock); 201 pic_unlock(s);
179 202
180 return ret; 203 return ret;
181} 204}
@@ -205,7 +228,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
205 int irq, irq2, intno; 228 int irq, irq2, intno;
206 struct kvm_pic *s = pic_irqchip(kvm); 229 struct kvm_pic *s = pic_irqchip(kvm);
207 230
208 raw_spin_lock(&s->lock); 231 pic_lock(s);
209 irq = pic_get_irq(&s->pics[0]); 232 irq = pic_get_irq(&s->pics[0]);
210 if (irq >= 0) { 233 if (irq >= 0) {
211 pic_intack(&s->pics[0], irq); 234 pic_intack(&s->pics[0], irq);
@@ -230,7 +253,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
230 intno = s->pics[0].irq_base + irq; 253 intno = s->pics[0].irq_base + irq;
231 } 254 }
232 pic_update_irq(s); 255 pic_update_irq(s);
233 raw_spin_unlock(&s->lock); 256 pic_unlock(s);
234 257
235 return intno; 258 return intno;
236} 259}
@@ -444,7 +467,7 @@ static int picdev_write(struct kvm_io_device *this,
444 printk(KERN_ERR "PIC: non byte write\n"); 467 printk(KERN_ERR "PIC: non byte write\n");
445 return 0; 468 return 0;
446 } 469 }
447 raw_spin_lock(&s->lock); 470 pic_lock(s);
448 switch (addr) { 471 switch (addr) {
449 case 0x20: 472 case 0x20:
450 case 0x21: 473 case 0x21:
@@ -457,7 +480,7 @@ static int picdev_write(struct kvm_io_device *this,
457 elcr_ioport_write(&s->pics[addr & 1], addr, data); 480 elcr_ioport_write(&s->pics[addr & 1], addr, data);
458 break; 481 break;
459 } 482 }
460 raw_spin_unlock(&s->lock); 483 pic_unlock(s);
461 return 0; 484 return 0;
462} 485}
463 486
@@ -474,7 +497,7 @@ static int picdev_read(struct kvm_io_device *this,
474 printk(KERN_ERR "PIC: non byte read\n"); 497 printk(KERN_ERR "PIC: non byte read\n");
475 return 0; 498 return 0;
476 } 499 }
477 raw_spin_lock(&s->lock); 500 pic_lock(s);
478 switch (addr) { 501 switch (addr) {
479 case 0x20: 502 case 0x20:
480 case 0x21: 503 case 0x21:
@@ -488,7 +511,7 @@ static int picdev_read(struct kvm_io_device *this,
488 break; 511 break;
489 } 512 }
490 *(unsigned char *)val = data; 513 *(unsigned char *)val = data;
491 raw_spin_unlock(&s->lock); 514 pic_unlock(s);
492 return 0; 515 return 0;
493} 516}
494 517
@@ -505,7 +528,7 @@ static void pic_irq_request(void *opaque, int level)
505 s->output = level; 528 s->output = level;
506 if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { 529 if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) {
507 s->pics[0].isr_ack &= ~(1 << irq); 530 s->pics[0].isr_ack &= ~(1 << irq);
508 kvm_vcpu_kick(vcpu); 531 s->wakeup_needed = true;
509 } 532 }
510} 533}
511 534
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 34b15915754d..cd1f362f413d 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -63,6 +63,7 @@ struct kvm_kpic_state {
63 63
64struct kvm_pic { 64struct kvm_pic {
65 raw_spinlock_t lock; 65 raw_spinlock_t lock;
66 bool wakeup_needed;
66 unsigned pending_acks; 67 unsigned pending_acks;
67 struct kvm *kvm; 68 struct kvm *kvm;
68 struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ 69 struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
diff --git a/arch/x86/kvm/kvm_timer.h b/arch/x86/kvm/kvm_timer.h
index 55c7524dda54..64bc6ea78d90 100644
--- a/arch/x86/kvm/kvm_timer.h
+++ b/arch/x86/kvm/kvm_timer.h
@@ -10,9 +10,7 @@ struct kvm_timer {
10}; 10};
11 11
12struct kvm_timer_ops { 12struct kvm_timer_ops {
13 bool (*is_periodic)(struct kvm_timer *); 13 bool (*is_periodic)(struct kvm_timer *);
14}; 14};
15 15
16
17enum hrtimer_restart kvm_timer_fn(struct hrtimer *data); 16enum hrtimer_restart kvm_timer_fn(struct hrtimer *data);
18
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 19a8906bcaa2..81563e76e28f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -148,7 +148,6 @@ module_param(oos_shadow, bool, 0644);
148 148
149#include <trace/events/kvm.h> 149#include <trace/events/kvm.h>
150 150
151#undef TRACE_INCLUDE_FILE
152#define CREATE_TRACE_POINTS 151#define CREATE_TRACE_POINTS
153#include "mmutrace.h" 152#include "mmutrace.h"
154 153
@@ -174,12 +173,7 @@ struct kvm_shadow_walk_iterator {
174 shadow_walk_okay(&(_walker)); \ 173 shadow_walk_okay(&(_walker)); \
175 shadow_walk_next(&(_walker))) 174 shadow_walk_next(&(_walker)))
176 175
177 176typedef int (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp);
178struct kvm_unsync_walk {
179 int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk);
180};
181
182typedef int (*mmu_parent_walk_fn) (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp);
183 177
184static struct kmem_cache *pte_chain_cache; 178static struct kmem_cache *pte_chain_cache;
185static struct kmem_cache *rmap_desc_cache; 179static struct kmem_cache *rmap_desc_cache;
@@ -223,7 +217,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
223} 217}
224EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); 218EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
225 219
226static int is_write_protection(struct kvm_vcpu *vcpu) 220static bool is_write_protection(struct kvm_vcpu *vcpu)
227{ 221{
228 return kvm_read_cr0_bits(vcpu, X86_CR0_WP); 222 return kvm_read_cr0_bits(vcpu, X86_CR0_WP);
229} 223}
@@ -327,7 +321,6 @@ static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache,
327 page = alloc_page(GFP_KERNEL); 321 page = alloc_page(GFP_KERNEL);
328 if (!page) 322 if (!page)
329 return -ENOMEM; 323 return -ENOMEM;
330 set_page_private(page, 0);
331 cache->objects[cache->nobjs++] = page_address(page); 324 cache->objects[cache->nobjs++] = page_address(page);
332 } 325 }
333 return 0; 326 return 0;
@@ -438,9 +431,9 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
438 int i; 431 int i;
439 432
440 gfn = unalias_gfn(kvm, gfn); 433 gfn = unalias_gfn(kvm, gfn);
434 slot = gfn_to_memslot_unaliased(kvm, gfn);
441 for (i = PT_DIRECTORY_LEVEL; 435 for (i = PT_DIRECTORY_LEVEL;
442 i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { 436 i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
443 slot = gfn_to_memslot_unaliased(kvm, gfn);
444 write_count = slot_largepage_idx(gfn, slot, i); 437 write_count = slot_largepage_idx(gfn, slot, i);
445 *write_count -= 1; 438 *write_count -= 1;
446 WARN_ON(*write_count < 0); 439 WARN_ON(*write_count < 0);
@@ -654,7 +647,6 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
654static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) 647static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
655{ 648{
656 struct kvm_rmap_desc *desc; 649 struct kvm_rmap_desc *desc;
657 struct kvm_rmap_desc *prev_desc;
658 u64 *prev_spte; 650 u64 *prev_spte;
659 int i; 651 int i;
660 652
@@ -666,7 +658,6 @@ static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
666 return NULL; 658 return NULL;
667 } 659 }
668 desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); 660 desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
669 prev_desc = NULL;
670 prev_spte = NULL; 661 prev_spte = NULL;
671 while (desc) { 662 while (desc) {
672 for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i) { 663 for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i) {
@@ -794,7 +785,7 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
794 int retval = 0; 785 int retval = 0;
795 struct kvm_memslots *slots; 786 struct kvm_memslots *slots;
796 787
797 slots = rcu_dereference(kvm->memslots); 788 slots = kvm_memslots(kvm);
798 789
799 for (i = 0; i < slots->nmemslots; i++) { 790 for (i = 0; i < slots->nmemslots; i++) {
800 struct kvm_memory_slot *memslot = &slots->memslots[i]; 791 struct kvm_memory_slot *memslot = &slots->memslots[i];
@@ -925,7 +916,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
925 sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); 916 sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
926 set_page_private(virt_to_page(sp->spt), (unsigned long)sp); 917 set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
927 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); 918 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
928 INIT_LIST_HEAD(&sp->oos_link);
929 bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); 919 bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
930 sp->multimapped = 0; 920 sp->multimapped = 0;
931 sp->parent_pte = parent_pte; 921 sp->parent_pte = parent_pte;
@@ -1009,8 +999,7 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp,
1009} 999}
1010 1000
1011 1001
1012static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, 1002static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn)
1013 mmu_parent_walk_fn fn)
1014{ 1003{
1015 struct kvm_pte_chain *pte_chain; 1004 struct kvm_pte_chain *pte_chain;
1016 struct hlist_node *node; 1005 struct hlist_node *node;
@@ -1019,8 +1008,8 @@ static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
1019 1008
1020 if (!sp->multimapped && sp->parent_pte) { 1009 if (!sp->multimapped && sp->parent_pte) {
1021 parent_sp = page_header(__pa(sp->parent_pte)); 1010 parent_sp = page_header(__pa(sp->parent_pte));
1022 fn(vcpu, parent_sp); 1011 fn(parent_sp);
1023 mmu_parent_walk(vcpu, parent_sp, fn); 1012 mmu_parent_walk(parent_sp, fn);
1024 return; 1013 return;
1025 } 1014 }
1026 hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) 1015 hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link)
@@ -1028,8 +1017,8 @@ static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
1028 if (!pte_chain->parent_ptes[i]) 1017 if (!pte_chain->parent_ptes[i])
1029 break; 1018 break;
1030 parent_sp = page_header(__pa(pte_chain->parent_ptes[i])); 1019 parent_sp = page_header(__pa(pte_chain->parent_ptes[i]));
1031 fn(vcpu, parent_sp); 1020 fn(parent_sp);
1032 mmu_parent_walk(vcpu, parent_sp, fn); 1021 mmu_parent_walk(parent_sp, fn);
1033 } 1022 }
1034} 1023}
1035 1024
@@ -1066,16 +1055,15 @@ static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp)
1066 } 1055 }
1067} 1056}
1068 1057
1069static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 1058static int unsync_walk_fn(struct kvm_mmu_page *sp)
1070{ 1059{
1071 kvm_mmu_update_parents_unsync(sp); 1060 kvm_mmu_update_parents_unsync(sp);
1072 return 1; 1061 return 1;
1073} 1062}
1074 1063
1075static void kvm_mmu_mark_parents_unsync(struct kvm_vcpu *vcpu, 1064static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp)
1076 struct kvm_mmu_page *sp)
1077{ 1065{
1078 mmu_parent_walk(vcpu, sp, unsync_walk_fn); 1066 mmu_parent_walk(sp, unsync_walk_fn);
1079 kvm_mmu_update_parents_unsync(sp); 1067 kvm_mmu_update_parents_unsync(sp);
1080} 1068}
1081 1069
@@ -1201,6 +1189,7 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
1201static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) 1189static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
1202{ 1190{
1203 WARN_ON(!sp->unsync); 1191 WARN_ON(!sp->unsync);
1192 trace_kvm_mmu_sync_page(sp);
1204 sp->unsync = 0; 1193 sp->unsync = 0;
1205 --kvm->stat.mmu_unsync; 1194 --kvm->stat.mmu_unsync;
1206} 1195}
@@ -1209,12 +1198,11 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp);
1209 1198
1210static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 1199static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1211{ 1200{
1212 if (sp->role.glevels != vcpu->arch.mmu.root_level) { 1201 if (sp->role.cr4_pae != !!is_pae(vcpu)) {
1213 kvm_mmu_zap_page(vcpu->kvm, sp); 1202 kvm_mmu_zap_page(vcpu->kvm, sp);
1214 return 1; 1203 return 1;
1215 } 1204 }
1216 1205
1217 trace_kvm_mmu_sync_page(sp);
1218 if (rmap_write_protect(vcpu->kvm, sp->gfn)) 1206 if (rmap_write_protect(vcpu->kvm, sp->gfn))
1219 kvm_flush_remote_tlbs(vcpu->kvm); 1207 kvm_flush_remote_tlbs(vcpu->kvm);
1220 kvm_unlink_unsync_page(vcpu->kvm, sp); 1208 kvm_unlink_unsync_page(vcpu->kvm, sp);
@@ -1331,6 +1319,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1331 role = vcpu->arch.mmu.base_role; 1319 role = vcpu->arch.mmu.base_role;
1332 role.level = level; 1320 role.level = level;
1333 role.direct = direct; 1321 role.direct = direct;
1322 if (role.direct)
1323 role.cr4_pae = 0;
1334 role.access = access; 1324 role.access = access;
1335 if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { 1325 if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) {
1336 quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); 1326 quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
@@ -1351,7 +1341,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1351 mmu_page_add_parent_pte(vcpu, sp, parent_pte); 1341 mmu_page_add_parent_pte(vcpu, sp, parent_pte);
1352 if (sp->unsync_children) { 1342 if (sp->unsync_children) {
1353 set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); 1343 set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests);
1354 kvm_mmu_mark_parents_unsync(vcpu, sp); 1344 kvm_mmu_mark_parents_unsync(sp);
1355 } 1345 }
1356 trace_kvm_mmu_get_page(sp, false); 1346 trace_kvm_mmu_get_page(sp, false);
1357 return sp; 1347 return sp;
@@ -1573,13 +1563,14 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
1573 r = 0; 1563 r = 0;
1574 index = kvm_page_table_hashfn(gfn); 1564 index = kvm_page_table_hashfn(gfn);
1575 bucket = &kvm->arch.mmu_page_hash[index]; 1565 bucket = &kvm->arch.mmu_page_hash[index];
1566restart:
1576 hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) 1567 hlist_for_each_entry_safe(sp, node, n, bucket, hash_link)
1577 if (sp->gfn == gfn && !sp->role.direct) { 1568 if (sp->gfn == gfn && !sp->role.direct) {
1578 pgprintk("%s: gfn %lx role %x\n", __func__, gfn, 1569 pgprintk("%s: gfn %lx role %x\n", __func__, gfn,
1579 sp->role.word); 1570 sp->role.word);
1580 r = 1; 1571 r = 1;
1581 if (kvm_mmu_zap_page(kvm, sp)) 1572 if (kvm_mmu_zap_page(kvm, sp))
1582 n = bucket->first; 1573 goto restart;
1583 } 1574 }
1584 return r; 1575 return r;
1585} 1576}
@@ -1593,13 +1584,14 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
1593 1584
1594 index = kvm_page_table_hashfn(gfn); 1585 index = kvm_page_table_hashfn(gfn);
1595 bucket = &kvm->arch.mmu_page_hash[index]; 1586 bucket = &kvm->arch.mmu_page_hash[index];
1587restart:
1596 hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) { 1588 hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) {
1597 if (sp->gfn == gfn && !sp->role.direct 1589 if (sp->gfn == gfn && !sp->role.direct
1598 && !sp->role.invalid) { 1590 && !sp->role.invalid) {
1599 pgprintk("%s: zap %lx %x\n", 1591 pgprintk("%s: zap %lx %x\n",
1600 __func__, gfn, sp->role.word); 1592 __func__, gfn, sp->role.word);
1601 if (kvm_mmu_zap_page(kvm, sp)) 1593 if (kvm_mmu_zap_page(kvm, sp))
1602 nn = bucket->first; 1594 goto restart;
1603 } 1595 }
1604 } 1596 }
1605} 1597}
@@ -1626,20 +1618,6 @@ static void mmu_convert_notrap(struct kvm_mmu_page *sp)
1626 } 1618 }
1627} 1619}
1628 1620
1629struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
1630{
1631 struct page *page;
1632
1633 gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
1634
1635 if (gpa == UNMAPPED_GVA)
1636 return NULL;
1637
1638 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
1639
1640 return page;
1641}
1642
1643/* 1621/*
1644 * The function is based on mtrr_type_lookup() in 1622 * The function is based on mtrr_type_lookup() in
1645 * arch/x86/kernel/cpu/mtrr/generic.c 1623 * arch/x86/kernel/cpu/mtrr/generic.c
@@ -1752,7 +1730,6 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1752 struct kvm_mmu_page *s; 1730 struct kvm_mmu_page *s;
1753 struct hlist_node *node, *n; 1731 struct hlist_node *node, *n;
1754 1732
1755 trace_kvm_mmu_unsync_page(sp);
1756 index = kvm_page_table_hashfn(sp->gfn); 1733 index = kvm_page_table_hashfn(sp->gfn);
1757 bucket = &vcpu->kvm->arch.mmu_page_hash[index]; 1734 bucket = &vcpu->kvm->arch.mmu_page_hash[index];
1758 /* don't unsync if pagetable is shadowed with multiple roles */ 1735 /* don't unsync if pagetable is shadowed with multiple roles */
@@ -1762,10 +1739,11 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1762 if (s->role.word != sp->role.word) 1739 if (s->role.word != sp->role.word)
1763 return 1; 1740 return 1;
1764 } 1741 }
1742 trace_kvm_mmu_unsync_page(sp);
1765 ++vcpu->kvm->stat.mmu_unsync; 1743 ++vcpu->kvm->stat.mmu_unsync;
1766 sp->unsync = 1; 1744 sp->unsync = 1;
1767 1745
1768 kvm_mmu_mark_parents_unsync(vcpu, sp); 1746 kvm_mmu_mark_parents_unsync(sp);
1769 1747
1770 mmu_convert_notrap(sp); 1748 mmu_convert_notrap(sp);
1771 return 0; 1749 return 0;
@@ -2081,21 +2059,23 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
2081 hpa_t root = vcpu->arch.mmu.root_hpa; 2059 hpa_t root = vcpu->arch.mmu.root_hpa;
2082 2060
2083 ASSERT(!VALID_PAGE(root)); 2061 ASSERT(!VALID_PAGE(root));
2084 if (tdp_enabled)
2085 direct = 1;
2086 if (mmu_check_root(vcpu, root_gfn)) 2062 if (mmu_check_root(vcpu, root_gfn))
2087 return 1; 2063 return 1;
2064 if (tdp_enabled) {
2065 direct = 1;
2066 root_gfn = 0;
2067 }
2068 spin_lock(&vcpu->kvm->mmu_lock);
2088 sp = kvm_mmu_get_page(vcpu, root_gfn, 0, 2069 sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
2089 PT64_ROOT_LEVEL, direct, 2070 PT64_ROOT_LEVEL, direct,
2090 ACC_ALL, NULL); 2071 ACC_ALL, NULL);
2091 root = __pa(sp->spt); 2072 root = __pa(sp->spt);
2092 ++sp->root_count; 2073 ++sp->root_count;
2074 spin_unlock(&vcpu->kvm->mmu_lock);
2093 vcpu->arch.mmu.root_hpa = root; 2075 vcpu->arch.mmu.root_hpa = root;
2094 return 0; 2076 return 0;
2095 } 2077 }
2096 direct = !is_paging(vcpu); 2078 direct = !is_paging(vcpu);
2097 if (tdp_enabled)
2098 direct = 1;
2099 for (i = 0; i < 4; ++i) { 2079 for (i = 0; i < 4; ++i) {
2100 hpa_t root = vcpu->arch.mmu.pae_root[i]; 2080 hpa_t root = vcpu->arch.mmu.pae_root[i];
2101 2081
@@ -2111,11 +2091,18 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
2111 root_gfn = 0; 2091 root_gfn = 0;
2112 if (mmu_check_root(vcpu, root_gfn)) 2092 if (mmu_check_root(vcpu, root_gfn))
2113 return 1; 2093 return 1;
2094 if (tdp_enabled) {
2095 direct = 1;
2096 root_gfn = i << 30;
2097 }
2098 spin_lock(&vcpu->kvm->mmu_lock);
2114 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, 2099 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
2115 PT32_ROOT_LEVEL, direct, 2100 PT32_ROOT_LEVEL, direct,
2116 ACC_ALL, NULL); 2101 ACC_ALL, NULL);
2117 root = __pa(sp->spt); 2102 root = __pa(sp->spt);
2118 ++sp->root_count; 2103 ++sp->root_count;
2104 spin_unlock(&vcpu->kvm->mmu_lock);
2105
2119 vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; 2106 vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK;
2120 } 2107 }
2121 vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); 2108 vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
@@ -2299,13 +2286,19 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
2299 /* no rsvd bits for 2 level 4K page table entries */ 2286 /* no rsvd bits for 2 level 4K page table entries */
2300 context->rsvd_bits_mask[0][1] = 0; 2287 context->rsvd_bits_mask[0][1] = 0;
2301 context->rsvd_bits_mask[0][0] = 0; 2288 context->rsvd_bits_mask[0][0] = 0;
2289 context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
2290
2291 if (!is_pse(vcpu)) {
2292 context->rsvd_bits_mask[1][1] = 0;
2293 break;
2294 }
2295
2302 if (is_cpuid_PSE36()) 2296 if (is_cpuid_PSE36())
2303 /* 36bits PSE 4MB page */ 2297 /* 36bits PSE 4MB page */
2304 context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); 2298 context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
2305 else 2299 else
2306 /* 32 bits PSE 4MB page */ 2300 /* 32 bits PSE 4MB page */
2307 context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); 2301 context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
2308 context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0];
2309 break; 2302 break;
2310 case PT32E_ROOT_LEVEL: 2303 case PT32E_ROOT_LEVEL:
2311 context->rsvd_bits_mask[0][2] = 2304 context->rsvd_bits_mask[0][2] =
@@ -2318,7 +2311,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
2318 context->rsvd_bits_mask[1][1] = exb_bit_rsvd | 2311 context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
2319 rsvd_bits(maxphyaddr, 62) | 2312 rsvd_bits(maxphyaddr, 62) |
2320 rsvd_bits(13, 20); /* large page */ 2313 rsvd_bits(13, 20); /* large page */
2321 context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; 2314 context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
2322 break; 2315 break;
2323 case PT64_ROOT_LEVEL: 2316 case PT64_ROOT_LEVEL:
2324 context->rsvd_bits_mask[0][3] = exb_bit_rsvd | 2317 context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
@@ -2336,7 +2329,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
2336 context->rsvd_bits_mask[1][1] = exb_bit_rsvd | 2329 context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
2337 rsvd_bits(maxphyaddr, 51) | 2330 rsvd_bits(maxphyaddr, 51) |
2338 rsvd_bits(13, 20); /* large page */ 2331 rsvd_bits(13, 20); /* large page */
2339 context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; 2332 context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
2340 break; 2333 break;
2341 } 2334 }
2342} 2335}
@@ -2438,7 +2431,8 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
2438 else 2431 else
2439 r = paging32_init_context(vcpu); 2432 r = paging32_init_context(vcpu);
2440 2433
2441 vcpu->arch.mmu.base_role.glevels = vcpu->arch.mmu.root_level; 2434 vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu);
2435 vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu);
2442 2436
2443 return r; 2437 return r;
2444} 2438}
@@ -2478,7 +2472,9 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
2478 goto out; 2472 goto out;
2479 spin_lock(&vcpu->kvm->mmu_lock); 2473 spin_lock(&vcpu->kvm->mmu_lock);
2480 kvm_mmu_free_some_pages(vcpu); 2474 kvm_mmu_free_some_pages(vcpu);
2475 spin_unlock(&vcpu->kvm->mmu_lock);
2481 r = mmu_alloc_roots(vcpu); 2476 r = mmu_alloc_roots(vcpu);
2477 spin_lock(&vcpu->kvm->mmu_lock);
2482 mmu_sync_roots(vcpu); 2478 mmu_sync_roots(vcpu);
2483 spin_unlock(&vcpu->kvm->mmu_lock); 2479 spin_unlock(&vcpu->kvm->mmu_lock);
2484 if (r) 2480 if (r)
@@ -2527,7 +2523,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
2527 } 2523 }
2528 2524
2529 ++vcpu->kvm->stat.mmu_pte_updated; 2525 ++vcpu->kvm->stat.mmu_pte_updated;
2530 if (sp->role.glevels == PT32_ROOT_LEVEL) 2526 if (!sp->role.cr4_pae)
2531 paging32_update_pte(vcpu, sp, spte, new); 2527 paging32_update_pte(vcpu, sp, spte, new);
2532 else 2528 else
2533 paging64_update_pte(vcpu, sp, spte, new); 2529 paging64_update_pte(vcpu, sp, spte, new);
@@ -2562,36 +2558,11 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu)
2562} 2558}
2563 2559
2564static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, 2560static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
2565 const u8 *new, int bytes) 2561 u64 gpte)
2566{ 2562{
2567 gfn_t gfn; 2563 gfn_t gfn;
2568 int r;
2569 u64 gpte = 0;
2570 pfn_t pfn; 2564 pfn_t pfn;
2571 2565
2572 if (bytes != 4 && bytes != 8)
2573 return;
2574
2575 /*
2576 * Assume that the pte write on a page table of the same type
2577 * as the current vcpu paging mode. This is nearly always true
2578 * (might be false while changing modes). Note it is verified later
2579 * by update_pte().
2580 */
2581 if (is_pae(vcpu)) {
2582 /* Handle a 32-bit guest writing two halves of a 64-bit gpte */
2583 if ((bytes == 4) && (gpa % 4 == 0)) {
2584 r = kvm_read_guest(vcpu->kvm, gpa & ~(u64)7, &gpte, 8);
2585 if (r)
2586 return;
2587 memcpy((void *)&gpte + (gpa % 8), new, 4);
2588 } else if ((bytes == 8) && (gpa % 8 == 0)) {
2589 memcpy((void *)&gpte, new, 8);
2590 }
2591 } else {
2592 if ((bytes == 4) && (gpa % 4 == 0))
2593 memcpy((void *)&gpte, new, 4);
2594 }
2595 if (!is_present_gpte(gpte)) 2566 if (!is_present_gpte(gpte))
2596 return; 2567 return;
2597 gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; 2568 gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
@@ -2640,10 +2611,46 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
2640 int flooded = 0; 2611 int flooded = 0;
2641 int npte; 2612 int npte;
2642 int r; 2613 int r;
2614 int invlpg_counter;
2643 2615
2644 pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); 2616 pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
2645 mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes); 2617
2618 invlpg_counter = atomic_read(&vcpu->kvm->arch.invlpg_counter);
2619
2620 /*
2621 * Assume that the pte write on a page table of the same type
2622 * as the current vcpu paging mode. This is nearly always true
2623 * (might be false while changing modes). Note it is verified later
2624 * by update_pte().
2625 */
2626 if ((is_pae(vcpu) && bytes == 4) || !new) {
2627 /* Handle a 32-bit guest writing two halves of a 64-bit gpte */
2628 if (is_pae(vcpu)) {
2629 gpa &= ~(gpa_t)7;
2630 bytes = 8;
2631 }
2632 r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8));
2633 if (r)
2634 gentry = 0;
2635 new = (const u8 *)&gentry;
2636 }
2637
2638 switch (bytes) {
2639 case 4:
2640 gentry = *(const u32 *)new;
2641 break;
2642 case 8:
2643 gentry = *(const u64 *)new;
2644 break;
2645 default:
2646 gentry = 0;
2647 break;
2648 }
2649
2650 mmu_guess_page_from_pte_write(vcpu, gpa, gentry);
2646 spin_lock(&vcpu->kvm->mmu_lock); 2651 spin_lock(&vcpu->kvm->mmu_lock);
2652 if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter)
2653 gentry = 0;
2647 kvm_mmu_access_page(vcpu, gfn); 2654 kvm_mmu_access_page(vcpu, gfn);
2648 kvm_mmu_free_some_pages(vcpu); 2655 kvm_mmu_free_some_pages(vcpu);
2649 ++vcpu->kvm->stat.mmu_pte_write; 2656 ++vcpu->kvm->stat.mmu_pte_write;
@@ -2662,10 +2669,12 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
2662 } 2669 }
2663 index = kvm_page_table_hashfn(gfn); 2670 index = kvm_page_table_hashfn(gfn);
2664 bucket = &vcpu->kvm->arch.mmu_page_hash[index]; 2671 bucket = &vcpu->kvm->arch.mmu_page_hash[index];
2672
2673restart:
2665 hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { 2674 hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) {
2666 if (sp->gfn != gfn || sp->role.direct || sp->role.invalid) 2675 if (sp->gfn != gfn || sp->role.direct || sp->role.invalid)
2667 continue; 2676 continue;
2668 pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8; 2677 pte_size = sp->role.cr4_pae ? 8 : 4;
2669 misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); 2678 misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
2670 misaligned |= bytes < 4; 2679 misaligned |= bytes < 4;
2671 if (misaligned || flooded) { 2680 if (misaligned || flooded) {
@@ -2682,14 +2691,14 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
2682 pgprintk("misaligned: gpa %llx bytes %d role %x\n", 2691 pgprintk("misaligned: gpa %llx bytes %d role %x\n",
2683 gpa, bytes, sp->role.word); 2692 gpa, bytes, sp->role.word);
2684 if (kvm_mmu_zap_page(vcpu->kvm, sp)) 2693 if (kvm_mmu_zap_page(vcpu->kvm, sp))
2685 n = bucket->first; 2694 goto restart;
2686 ++vcpu->kvm->stat.mmu_flooded; 2695 ++vcpu->kvm->stat.mmu_flooded;
2687 continue; 2696 continue;
2688 } 2697 }
2689 page_offset = offset; 2698 page_offset = offset;
2690 level = sp->role.level; 2699 level = sp->role.level;
2691 npte = 1; 2700 npte = 1;
2692 if (sp->role.glevels == PT32_ROOT_LEVEL) { 2701 if (!sp->role.cr4_pae) {
2693 page_offset <<= 1; /* 32->64 */ 2702 page_offset <<= 1; /* 32->64 */
2694 /* 2703 /*
2695 * A 32-bit pde maps 4MB while the shadow pdes map 2704 * A 32-bit pde maps 4MB while the shadow pdes map
@@ -2707,20 +2716,11 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
2707 continue; 2716 continue;
2708 } 2717 }
2709 spte = &sp->spt[page_offset / sizeof(*spte)]; 2718 spte = &sp->spt[page_offset / sizeof(*spte)];
2710 if ((gpa & (pte_size - 1)) || (bytes < pte_size)) {
2711 gentry = 0;
2712 r = kvm_read_guest_atomic(vcpu->kvm,
2713 gpa & ~(u64)(pte_size - 1),
2714 &gentry, pte_size);
2715 new = (const void *)&gentry;
2716 if (r < 0)
2717 new = NULL;
2718 }
2719 while (npte--) { 2719 while (npte--) {
2720 entry = *spte; 2720 entry = *spte;
2721 mmu_pte_write_zap_pte(vcpu, sp, spte); 2721 mmu_pte_write_zap_pte(vcpu, sp, spte);
2722 if (new) 2722 if (gentry)
2723 mmu_pte_write_new_pte(vcpu, sp, spte, new); 2723 mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
2724 mmu_pte_write_flush_tlb(vcpu, entry, *spte); 2724 mmu_pte_write_flush_tlb(vcpu, entry, *spte);
2725 ++spte; 2725 ++spte;
2726 } 2726 }
@@ -2900,22 +2900,23 @@ void kvm_mmu_zap_all(struct kvm *kvm)
2900 struct kvm_mmu_page *sp, *node; 2900 struct kvm_mmu_page *sp, *node;
2901 2901
2902 spin_lock(&kvm->mmu_lock); 2902 spin_lock(&kvm->mmu_lock);
2903restart:
2903 list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) 2904 list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link)
2904 if (kvm_mmu_zap_page(kvm, sp)) 2905 if (kvm_mmu_zap_page(kvm, sp))
2905 node = container_of(kvm->arch.active_mmu_pages.next, 2906 goto restart;
2906 struct kvm_mmu_page, link); 2907
2907 spin_unlock(&kvm->mmu_lock); 2908 spin_unlock(&kvm->mmu_lock);
2908 2909
2909 kvm_flush_remote_tlbs(kvm); 2910 kvm_flush_remote_tlbs(kvm);
2910} 2911}
2911 2912
2912static void kvm_mmu_remove_one_alloc_mmu_page(struct kvm *kvm) 2913static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm)
2913{ 2914{
2914 struct kvm_mmu_page *page; 2915 struct kvm_mmu_page *page;
2915 2916
2916 page = container_of(kvm->arch.active_mmu_pages.prev, 2917 page = container_of(kvm->arch.active_mmu_pages.prev,
2917 struct kvm_mmu_page, link); 2918 struct kvm_mmu_page, link);
2918 kvm_mmu_zap_page(kvm, page); 2919 return kvm_mmu_zap_page(kvm, page) + 1;
2919} 2920}
2920 2921
2921static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) 2922static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
@@ -2927,7 +2928,7 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
2927 spin_lock(&kvm_lock); 2928 spin_lock(&kvm_lock);
2928 2929
2929 list_for_each_entry(kvm, &vm_list, vm_list) { 2930 list_for_each_entry(kvm, &vm_list, vm_list) {
2930 int npages, idx; 2931 int npages, idx, freed_pages;
2931 2932
2932 idx = srcu_read_lock(&kvm->srcu); 2933 idx = srcu_read_lock(&kvm->srcu);
2933 spin_lock(&kvm->mmu_lock); 2934 spin_lock(&kvm->mmu_lock);
@@ -2935,8 +2936,8 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
2935 kvm->arch.n_free_mmu_pages; 2936 kvm->arch.n_free_mmu_pages;
2936 cache_count += npages; 2937 cache_count += npages;
2937 if (!kvm_freed && nr_to_scan > 0 && npages > 0) { 2938 if (!kvm_freed && nr_to_scan > 0 && npages > 0) {
2938 kvm_mmu_remove_one_alloc_mmu_page(kvm); 2939 freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm);
2939 cache_count--; 2940 cache_count -= freed_pages;
2940 kvm_freed = kvm; 2941 kvm_freed = kvm;
2941 } 2942 }
2942 nr_to_scan--; 2943 nr_to_scan--;
@@ -3011,7 +3012,8 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
3011 unsigned int nr_pages = 0; 3012 unsigned int nr_pages = 0;
3012 struct kvm_memslots *slots; 3013 struct kvm_memslots *slots;
3013 3014
3014 slots = rcu_dereference(kvm->memslots); 3015 slots = kvm_memslots(kvm);
3016
3015 for (i = 0; i < slots->nmemslots; i++) 3017 for (i = 0; i < slots->nmemslots; i++)
3016 nr_pages += slots->memslots[i].npages; 3018 nr_pages += slots->memslots[i].npages;
3017 3019
@@ -3174,8 +3176,7 @@ static gva_t canonicalize(gva_t gva)
3174} 3176}
3175 3177
3176 3178
3177typedef void (*inspect_spte_fn) (struct kvm *kvm, struct kvm_mmu_page *sp, 3179typedef void (*inspect_spte_fn) (struct kvm *kvm, u64 *sptep);
3178 u64 *sptep);
3179 3180
3180static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp, 3181static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp,
3181 inspect_spte_fn fn) 3182 inspect_spte_fn fn)
@@ -3191,7 +3192,7 @@ static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp,
3191 child = page_header(ent & PT64_BASE_ADDR_MASK); 3192 child = page_header(ent & PT64_BASE_ADDR_MASK);
3192 __mmu_spte_walk(kvm, child, fn); 3193 __mmu_spte_walk(kvm, child, fn);
3193 } else 3194 } else
3194 fn(kvm, sp, &sp->spt[i]); 3195 fn(kvm, &sp->spt[i]);
3195 } 3196 }
3196 } 3197 }
3197} 3198}
@@ -3282,11 +3283,13 @@ static void audit_mappings(struct kvm_vcpu *vcpu)
3282 3283
3283static int count_rmaps(struct kvm_vcpu *vcpu) 3284static int count_rmaps(struct kvm_vcpu *vcpu)
3284{ 3285{
3286 struct kvm *kvm = vcpu->kvm;
3287 struct kvm_memslots *slots;
3285 int nmaps = 0; 3288 int nmaps = 0;
3286 int i, j, k, idx; 3289 int i, j, k, idx;
3287 3290
3288 idx = srcu_read_lock(&kvm->srcu); 3291 idx = srcu_read_lock(&kvm->srcu);
3289 slots = rcu_dereference(kvm->memslots); 3292 slots = kvm_memslots(kvm);
3290 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { 3293 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
3291 struct kvm_memory_slot *m = &slots->memslots[i]; 3294 struct kvm_memory_slot *m = &slots->memslots[i];
3292 struct kvm_rmap_desc *d; 3295 struct kvm_rmap_desc *d;
@@ -3315,7 +3318,7 @@ static int count_rmaps(struct kvm_vcpu *vcpu)
3315 return nmaps; 3318 return nmaps;
3316} 3319}
3317 3320
3318void inspect_spte_has_rmap(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *sptep) 3321void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
3319{ 3322{
3320 unsigned long *rmapp; 3323 unsigned long *rmapp;
3321 struct kvm_mmu_page *rev_sp; 3324 struct kvm_mmu_page *rev_sp;
@@ -3331,14 +3334,14 @@ void inspect_spte_has_rmap(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *sptep)
3331 printk(KERN_ERR "%s: no memslot for gfn %ld\n", 3334 printk(KERN_ERR "%s: no memslot for gfn %ld\n",
3332 audit_msg, gfn); 3335 audit_msg, gfn);
3333 printk(KERN_ERR "%s: index %ld of sp (gfn=%lx)\n", 3336 printk(KERN_ERR "%s: index %ld of sp (gfn=%lx)\n",
3334 audit_msg, sptep - rev_sp->spt, 3337 audit_msg, (long int)(sptep - rev_sp->spt),
3335 rev_sp->gfn); 3338 rev_sp->gfn);
3336 dump_stack(); 3339 dump_stack();
3337 return; 3340 return;
3338 } 3341 }
3339 3342
3340 rmapp = gfn_to_rmap(kvm, rev_sp->gfns[sptep - rev_sp->spt], 3343 rmapp = gfn_to_rmap(kvm, rev_sp->gfns[sptep - rev_sp->spt],
3341 is_large_pte(*sptep)); 3344 rev_sp->role.level);
3342 if (!*rmapp) { 3345 if (!*rmapp) {
3343 if (!printk_ratelimit()) 3346 if (!printk_ratelimit())
3344 return; 3347 return;
@@ -3373,7 +3376,7 @@ static void check_writable_mappings_rmap(struct kvm_vcpu *vcpu)
3373 continue; 3376 continue;
3374 if (!(ent & PT_WRITABLE_MASK)) 3377 if (!(ent & PT_WRITABLE_MASK))
3375 continue; 3378 continue;
3376 inspect_spte_has_rmap(vcpu->kvm, sp, &pt[i]); 3379 inspect_spte_has_rmap(vcpu->kvm, &pt[i]);
3377 } 3380 }
3378 } 3381 }
3379 return; 3382 return;
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h
index 3e4a5c6ca2a9..42f07b1bfbc9 100644
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/kvm/mmutrace.h
@@ -6,14 +6,12 @@
6 6
7#undef TRACE_SYSTEM 7#undef TRACE_SYSTEM
8#define TRACE_SYSTEM kvmmmu 8#define TRACE_SYSTEM kvmmmu
9#define TRACE_INCLUDE_PATH .
10#define TRACE_INCLUDE_FILE mmutrace
11 9
12#define KVM_MMU_PAGE_FIELDS \ 10#define KVM_MMU_PAGE_FIELDS \
13 __field(__u64, gfn) \ 11 __field(__u64, gfn) \
14 __field(__u32, role) \ 12 __field(__u32, role) \
15 __field(__u32, root_count) \ 13 __field(__u32, root_count) \
16 __field(__u32, unsync) 14 __field(bool, unsync)
17 15
18#define KVM_MMU_PAGE_ASSIGN(sp) \ 16#define KVM_MMU_PAGE_ASSIGN(sp) \
19 __entry->gfn = sp->gfn; \ 17 __entry->gfn = sp->gfn; \
@@ -30,14 +28,14 @@
30 \ 28 \
31 role.word = __entry->role; \ 29 role.word = __entry->role; \
32 \ 30 \
33 trace_seq_printf(p, "sp gfn %llx %u/%u q%u%s %s%s %spge" \ 31 trace_seq_printf(p, "sp gfn %llx %u%s q%u%s %s%s" \
34 " %snxe root %u %s%c", \ 32 " %snxe root %u %s%c", \
35 __entry->gfn, role.level, role.glevels, \ 33 __entry->gfn, role.level, \
34 role.cr4_pae ? " pae" : "", \
36 role.quadrant, \ 35 role.quadrant, \
37 role.direct ? " direct" : "", \ 36 role.direct ? " direct" : "", \
38 access_str[role.access], \ 37 access_str[role.access], \
39 role.invalid ? " invalid" : "", \ 38 role.invalid ? " invalid" : "", \
40 role.cr4_pge ? "" : "!", \
41 role.nxe ? "" : "!", \ 39 role.nxe ? "" : "!", \
42 __entry->root_count, \ 40 __entry->root_count, \
43 __entry->unsync ? "unsync" : "sync", 0); \ 41 __entry->unsync ? "unsync" : "sync", 0); \
@@ -94,15 +92,15 @@ TRACE_EVENT(
94 TP_printk("pte %llx level %u", __entry->pte, __entry->level) 92 TP_printk("pte %llx level %u", __entry->pte, __entry->level)
95); 93);
96 94
97/* We set a pte accessed bit */ 95DECLARE_EVENT_CLASS(kvm_mmu_set_bit_class,
98TRACE_EVENT( 96
99 kvm_mmu_set_accessed_bit,
100 TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), 97 TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size),
98
101 TP_ARGS(table_gfn, index, size), 99 TP_ARGS(table_gfn, index, size),
102 100
103 TP_STRUCT__entry( 101 TP_STRUCT__entry(
104 __field(__u64, gpa) 102 __field(__u64, gpa)
105 ), 103 ),
106 104
107 TP_fast_assign( 105 TP_fast_assign(
108 __entry->gpa = ((u64)table_gfn << PAGE_SHIFT) 106 __entry->gpa = ((u64)table_gfn << PAGE_SHIFT)
@@ -112,22 +110,20 @@ TRACE_EVENT(
112 TP_printk("gpa %llx", __entry->gpa) 110 TP_printk("gpa %llx", __entry->gpa)
113); 111);
114 112
115/* We set a pte dirty bit */ 113/* We set a pte accessed bit */
116TRACE_EVENT( 114DEFINE_EVENT(kvm_mmu_set_bit_class, kvm_mmu_set_accessed_bit,
117 kvm_mmu_set_dirty_bit, 115
118 TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), 116 TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size),
119 TP_ARGS(table_gfn, index, size),
120 117
121 TP_STRUCT__entry( 118 TP_ARGS(table_gfn, index, size)
122 __field(__u64, gpa) 119);
123 ),
124 120
125 TP_fast_assign( 121/* We set a pte dirty bit */
126 __entry->gpa = ((u64)table_gfn << PAGE_SHIFT) 122DEFINE_EVENT(kvm_mmu_set_bit_class, kvm_mmu_set_dirty_bit,
127 + index * size;
128 ),
129 123
130 TP_printk("gpa %llx", __entry->gpa) 124 TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size),
125
126 TP_ARGS(table_gfn, index, size)
131); 127);
132 128
133TRACE_EVENT( 129TRACE_EVENT(
@@ -166,55 +162,45 @@ TRACE_EVENT(
166 __entry->created ? "new" : "existing") 162 __entry->created ? "new" : "existing")
167); 163);
168 164
169TRACE_EVENT( 165DECLARE_EVENT_CLASS(kvm_mmu_page_class,
170 kvm_mmu_sync_page, 166
171 TP_PROTO(struct kvm_mmu_page *sp), 167 TP_PROTO(struct kvm_mmu_page *sp),
172 TP_ARGS(sp), 168 TP_ARGS(sp),
173 169
174 TP_STRUCT__entry( 170 TP_STRUCT__entry(
175 KVM_MMU_PAGE_FIELDS 171 KVM_MMU_PAGE_FIELDS
176 ), 172 ),
177 173
178 TP_fast_assign( 174 TP_fast_assign(
179 KVM_MMU_PAGE_ASSIGN(sp) 175 KVM_MMU_PAGE_ASSIGN(sp)
180 ), 176 ),
181 177
182 TP_printk("%s", KVM_MMU_PAGE_PRINTK()) 178 TP_printk("%s", KVM_MMU_PAGE_PRINTK())
183); 179);
184 180
185TRACE_EVENT( 181DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_sync_page,
186 kvm_mmu_unsync_page,
187 TP_PROTO(struct kvm_mmu_page *sp), 182 TP_PROTO(struct kvm_mmu_page *sp),
188 TP_ARGS(sp),
189
190 TP_STRUCT__entry(
191 KVM_MMU_PAGE_FIELDS
192 ),
193 183
194 TP_fast_assign( 184 TP_ARGS(sp)
195 KVM_MMU_PAGE_ASSIGN(sp)
196 ),
197
198 TP_printk("%s", KVM_MMU_PAGE_PRINTK())
199); 185);
200 186
201TRACE_EVENT( 187DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_unsync_page,
202 kvm_mmu_zap_page,
203 TP_PROTO(struct kvm_mmu_page *sp), 188 TP_PROTO(struct kvm_mmu_page *sp),
204 TP_ARGS(sp),
205 189
206 TP_STRUCT__entry( 190 TP_ARGS(sp)
207 KVM_MMU_PAGE_FIELDS 191);
208 ),
209 192
210 TP_fast_assign( 193DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_zap_page,
211 KVM_MMU_PAGE_ASSIGN(sp) 194 TP_PROTO(struct kvm_mmu_page *sp),
212 ),
213 195
214 TP_printk("%s", KVM_MMU_PAGE_PRINTK()) 196 TP_ARGS(sp)
215); 197);
216
217#endif /* _TRACE_KVMMMU_H */ 198#endif /* _TRACE_KVMMMU_H */
218 199
200#undef TRACE_INCLUDE_PATH
201#define TRACE_INCLUDE_PATH .
202#undef TRACE_INCLUDE_FILE
203#define TRACE_INCLUDE_FILE mmutrace
204
219/* This part must be outside protection */ 205/* This part must be outside protection */
220#include <trace/define_trace.h> 206#include <trace/define_trace.h>
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 81eab9a50e6a..89d66ca4d87c 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -170,7 +170,7 @@ walk:
170 goto access_error; 170 goto access_error;
171 171
172#if PTTYPE == 64 172#if PTTYPE == 64
173 if (fetch_fault && is_nx(vcpu) && (pte & PT64_NX_MASK)) 173 if (fetch_fault && (pte & PT64_NX_MASK))
174 goto access_error; 174 goto access_error;
175#endif 175#endif
176 176
@@ -190,10 +190,10 @@ walk:
190 190
191 if ((walker->level == PT_PAGE_TABLE_LEVEL) || 191 if ((walker->level == PT_PAGE_TABLE_LEVEL) ||
192 ((walker->level == PT_DIRECTORY_LEVEL) && 192 ((walker->level == PT_DIRECTORY_LEVEL) &&
193 (pte & PT_PAGE_SIZE_MASK) && 193 is_large_pte(pte) &&
194 (PTTYPE == 64 || is_pse(vcpu))) || 194 (PTTYPE == 64 || is_pse(vcpu))) ||
195 ((walker->level == PT_PDPE_LEVEL) && 195 ((walker->level == PT_PDPE_LEVEL) &&
196 (pte & PT_PAGE_SIZE_MASK) && 196 is_large_pte(pte) &&
197 is_long_mode(vcpu))) { 197 is_long_mode(vcpu))) {
198 int lvl = walker->level; 198 int lvl = walker->level;
199 199
@@ -258,11 +258,17 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
258 pt_element_t gpte; 258 pt_element_t gpte;
259 unsigned pte_access; 259 unsigned pte_access;
260 pfn_t pfn; 260 pfn_t pfn;
261 u64 new_spte;
261 262
262 gpte = *(const pt_element_t *)pte; 263 gpte = *(const pt_element_t *)pte;
263 if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { 264 if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
264 if (!is_present_gpte(gpte)) 265 if (!is_present_gpte(gpte)) {
265 __set_spte(spte, shadow_notrap_nonpresent_pte); 266 if (page->unsync)
267 new_spte = shadow_trap_nonpresent_pte;
268 else
269 new_spte = shadow_notrap_nonpresent_pte;
270 __set_spte(spte, new_spte);
271 }
266 return; 272 return;
267 } 273 }
268 pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); 274 pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
@@ -457,6 +463,7 @@ out_unlock:
457static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) 463static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
458{ 464{
459 struct kvm_shadow_walk_iterator iterator; 465 struct kvm_shadow_walk_iterator iterator;
466 gpa_t pte_gpa = -1;
460 int level; 467 int level;
461 u64 *sptep; 468 u64 *sptep;
462 int need_flush = 0; 469 int need_flush = 0;
@@ -467,9 +474,16 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
467 level = iterator.level; 474 level = iterator.level;
468 sptep = iterator.sptep; 475 sptep = iterator.sptep;
469 476
470 if (level == PT_PAGE_TABLE_LEVEL || 477 if (is_last_spte(*sptep, level)) {
471 ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || 478 struct kvm_mmu_page *sp = page_header(__pa(sptep));
472 ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { 479 int offset, shift;
480
481 shift = PAGE_SHIFT -
482 (PT_LEVEL_BITS - PT64_LEVEL_BITS) * level;
483 offset = sp->role.quadrant << shift;
484
485 pte_gpa = (sp->gfn << PAGE_SHIFT) + offset;
486 pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
473 487
474 if (is_shadow_present_pte(*sptep)) { 488 if (is_shadow_present_pte(*sptep)) {
475 rmap_remove(vcpu->kvm, sptep); 489 rmap_remove(vcpu->kvm, sptep);
@@ -487,7 +501,17 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
487 501
488 if (need_flush) 502 if (need_flush)
489 kvm_flush_remote_tlbs(vcpu->kvm); 503 kvm_flush_remote_tlbs(vcpu->kvm);
504
505 atomic_inc(&vcpu->kvm->arch.invlpg_counter);
506
490 spin_unlock(&vcpu->kvm->mmu_lock); 507 spin_unlock(&vcpu->kvm->mmu_lock);
508
509 if (pte_gpa == -1)
510 return;
511
512 if (mmu_topup_memory_caches(vcpu))
513 return;
514 kvm_mmu_pte_write(vcpu, pte_gpa, NULL, sizeof(pt_element_t), 0);
491} 515}
492 516
493static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, 517static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
@@ -551,12 +575,15 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
551{ 575{
552 int i, offset, nr_present; 576 int i, offset, nr_present;
553 bool reset_host_protection; 577 bool reset_host_protection;
578 gpa_t first_pte_gpa;
554 579
555 offset = nr_present = 0; 580 offset = nr_present = 0;
556 581
557 if (PTTYPE == 32) 582 if (PTTYPE == 32)
558 offset = sp->role.quadrant << PT64_LEVEL_BITS; 583 offset = sp->role.quadrant << PT64_LEVEL_BITS;
559 584
585 first_pte_gpa = gfn_to_gpa(sp->gfn) + offset * sizeof(pt_element_t);
586
560 for (i = 0; i < PT64_ENT_PER_PAGE; i++) { 587 for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
561 unsigned pte_access; 588 unsigned pte_access;
562 pt_element_t gpte; 589 pt_element_t gpte;
@@ -566,8 +593,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
566 if (!is_shadow_present_pte(sp->spt[i])) 593 if (!is_shadow_present_pte(sp->spt[i]))
567 continue; 594 continue;
568 595
569 pte_gpa = gfn_to_gpa(sp->gfn); 596 pte_gpa = first_pte_gpa + i * sizeof(pt_element_t);
570 pte_gpa += (i+offset) * sizeof(pt_element_t);
571 597
572 if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, 598 if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
573 sizeof(pt_element_t))) 599 sizeof(pt_element_t)))
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 737361fcd503..96dc232bfc56 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -44,10 +44,11 @@ MODULE_LICENSE("GPL");
44#define SEG_TYPE_LDT 2 44#define SEG_TYPE_LDT 2
45#define SEG_TYPE_BUSY_TSS16 3 45#define SEG_TYPE_BUSY_TSS16 3
46 46
47#define SVM_FEATURE_NPT (1 << 0) 47#define SVM_FEATURE_NPT (1 << 0)
48#define SVM_FEATURE_LBRV (1 << 1) 48#define SVM_FEATURE_LBRV (1 << 1)
49#define SVM_FEATURE_SVML (1 << 2) 49#define SVM_FEATURE_SVML (1 << 2)
50#define SVM_FEATURE_PAUSE_FILTER (1 << 10) 50#define SVM_FEATURE_NRIP (1 << 3)
51#define SVM_FEATURE_PAUSE_FILTER (1 << 10)
51 52
52#define NESTED_EXIT_HOST 0 /* Exit handled on host level */ 53#define NESTED_EXIT_HOST 0 /* Exit handled on host level */
53#define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ 54#define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */
@@ -70,6 +71,7 @@ struct kvm_vcpu;
70struct nested_state { 71struct nested_state {
71 struct vmcb *hsave; 72 struct vmcb *hsave;
72 u64 hsave_msr; 73 u64 hsave_msr;
74 u64 vm_cr_msr;
73 u64 vmcb; 75 u64 vmcb;
74 76
75 /* These are the merged vectors */ 77 /* These are the merged vectors */
@@ -77,6 +79,7 @@ struct nested_state {
77 79
78 /* gpa pointers to the real vectors */ 80 /* gpa pointers to the real vectors */
79 u64 vmcb_msrpm; 81 u64 vmcb_msrpm;
82 u64 vmcb_iopm;
80 83
81 /* A VMEXIT is required but not yet emulated */ 84 /* A VMEXIT is required but not yet emulated */
82 bool exit_required; 85 bool exit_required;
@@ -91,6 +94,9 @@ struct nested_state {
91 94
92}; 95};
93 96
97#define MSRPM_OFFSETS 16
98static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
99
94struct vcpu_svm { 100struct vcpu_svm {
95 struct kvm_vcpu vcpu; 101 struct kvm_vcpu vcpu;
96 struct vmcb *vmcb; 102 struct vmcb *vmcb;
@@ -110,13 +116,39 @@ struct vcpu_svm {
110 struct nested_state nested; 116 struct nested_state nested;
111 117
112 bool nmi_singlestep; 118 bool nmi_singlestep;
119
120 unsigned int3_injected;
121 unsigned long int3_rip;
122};
123
124#define MSR_INVALID 0xffffffffU
125
126static struct svm_direct_access_msrs {
127 u32 index; /* Index of the MSR */
128 bool always; /* True if intercept is always on */
129} direct_access_msrs[] = {
130 { .index = MSR_K6_STAR, .always = true },
131 { .index = MSR_IA32_SYSENTER_CS, .always = true },
132#ifdef CONFIG_X86_64
133 { .index = MSR_GS_BASE, .always = true },
134 { .index = MSR_FS_BASE, .always = true },
135 { .index = MSR_KERNEL_GS_BASE, .always = true },
136 { .index = MSR_LSTAR, .always = true },
137 { .index = MSR_CSTAR, .always = true },
138 { .index = MSR_SYSCALL_MASK, .always = true },
139#endif
140 { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
141 { .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
142 { .index = MSR_IA32_LASTINTFROMIP, .always = false },
143 { .index = MSR_IA32_LASTINTTOIP, .always = false },
144 { .index = MSR_INVALID, .always = false },
113}; 145};
114 146
115/* enable NPT for AMD64 and X86 with PAE */ 147/* enable NPT for AMD64 and X86 with PAE */
116#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) 148#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
117static bool npt_enabled = true; 149static bool npt_enabled = true;
118#else 150#else
119static bool npt_enabled = false; 151static bool npt_enabled;
120#endif 152#endif
121static int npt = 1; 153static int npt = 1;
122 154
@@ -129,6 +161,7 @@ static void svm_flush_tlb(struct kvm_vcpu *vcpu);
129static void svm_complete_interrupts(struct vcpu_svm *svm); 161static void svm_complete_interrupts(struct vcpu_svm *svm);
130 162
131static int nested_svm_exit_handled(struct vcpu_svm *svm); 163static int nested_svm_exit_handled(struct vcpu_svm *svm);
164static int nested_svm_intercept(struct vcpu_svm *svm);
132static int nested_svm_vmexit(struct vcpu_svm *svm); 165static int nested_svm_vmexit(struct vcpu_svm *svm);
133static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, 166static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
134 bool has_error_code, u32 error_code); 167 bool has_error_code, u32 error_code);
@@ -163,8 +196,8 @@ static unsigned long iopm_base;
163struct kvm_ldttss_desc { 196struct kvm_ldttss_desc {
164 u16 limit0; 197 u16 limit0;
165 u16 base0; 198 u16 base0;
166 unsigned base1 : 8, type : 5, dpl : 2, p : 1; 199 unsigned base1:8, type:5, dpl:2, p:1;
167 unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8; 200 unsigned limit1:4, zero0:3, g:1, base2:8;
168 u32 base3; 201 u32 base3;
169 u32 zero1; 202 u32 zero1;
170} __attribute__((packed)); 203} __attribute__((packed));
@@ -194,6 +227,27 @@ static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
194#define MSRS_RANGE_SIZE 2048 227#define MSRS_RANGE_SIZE 2048
195#define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2) 228#define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2)
196 229
230static u32 svm_msrpm_offset(u32 msr)
231{
232 u32 offset;
233 int i;
234
235 for (i = 0; i < NUM_MSR_MAPS; i++) {
236 if (msr < msrpm_ranges[i] ||
237 msr >= msrpm_ranges[i] + MSRS_IN_RANGE)
238 continue;
239
240 offset = (msr - msrpm_ranges[i]) / 4; /* 4 msrs per u8 */
241 offset += (i * MSRS_RANGE_SIZE); /* add range offset */
242
243 /* Now we have the u8 offset - but need the u32 offset */
244 return offset / 4;
245 }
246
247 /* MSR not in any range */
248 return MSR_INVALID;
249}
250
197#define MAX_INST_SIZE 15 251#define MAX_INST_SIZE 15
198 252
199static inline u32 svm_has(u32 feat) 253static inline u32 svm_has(u32 feat)
@@ -213,7 +267,7 @@ static inline void stgi(void)
213 267
214static inline void invlpga(unsigned long addr, u32 asid) 268static inline void invlpga(unsigned long addr, u32 asid)
215{ 269{
216 asm volatile (__ex(SVM_INVLPGA) :: "a"(addr), "c"(asid)); 270 asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid));
217} 271}
218 272
219static inline void force_new_asid(struct kvm_vcpu *vcpu) 273static inline void force_new_asid(struct kvm_vcpu *vcpu)
@@ -235,23 +289,6 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
235 vcpu->arch.efer = efer; 289 vcpu->arch.efer = efer;
236} 290}
237 291
238static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
239 bool has_error_code, u32 error_code)
240{
241 struct vcpu_svm *svm = to_svm(vcpu);
242
243 /* If we are within a nested VM we'd better #VMEXIT and let the
244 guest handle the exception */
245 if (nested_svm_check_exception(svm, nr, has_error_code, error_code))
246 return;
247
248 svm->vmcb->control.event_inj = nr
249 | SVM_EVTINJ_VALID
250 | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
251 | SVM_EVTINJ_TYPE_EXEPT;
252 svm->vmcb->control.event_inj_err = error_code;
253}
254
255static int is_external_interrupt(u32 info) 292static int is_external_interrupt(u32 info)
256{ 293{
257 info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; 294 info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
@@ -264,7 +301,7 @@ static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
264 u32 ret = 0; 301 u32 ret = 0;
265 302
266 if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) 303 if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
267 ret |= X86_SHADOW_INT_STI | X86_SHADOW_INT_MOV_SS; 304 ret |= KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS;
268 return ret & mask; 305 return ret & mask;
269} 306}
270 307
@@ -283,6 +320,9 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
283{ 320{
284 struct vcpu_svm *svm = to_svm(vcpu); 321 struct vcpu_svm *svm = to_svm(vcpu);
285 322
323 if (svm->vmcb->control.next_rip != 0)
324 svm->next_rip = svm->vmcb->control.next_rip;
325
286 if (!svm->next_rip) { 326 if (!svm->next_rip) {
287 if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) != 327 if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) !=
288 EMULATE_DONE) 328 EMULATE_DONE)
@@ -297,6 +337,43 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
297 svm_set_interrupt_shadow(vcpu, 0); 337 svm_set_interrupt_shadow(vcpu, 0);
298} 338}
299 339
340static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
341 bool has_error_code, u32 error_code,
342 bool reinject)
343{
344 struct vcpu_svm *svm = to_svm(vcpu);
345
346 /*
347 * If we are within a nested VM we'd better #VMEXIT and let the guest
348 * handle the exception
349 */
350 if (!reinject &&
351 nested_svm_check_exception(svm, nr, has_error_code, error_code))
352 return;
353
354 if (nr == BP_VECTOR && !svm_has(SVM_FEATURE_NRIP)) {
355 unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
356
357 /*
358 * For guest debugging where we have to reinject #BP if some
359 * INT3 is guest-owned:
360 * Emulate nRIP by moving RIP forward. Will fail if injection
361 * raises a fault that is not intercepted. Still better than
362 * failing in all cases.
363 */
364 skip_emulated_instruction(&svm->vcpu);
365 rip = kvm_rip_read(&svm->vcpu);
366 svm->int3_rip = rip + svm->vmcb->save.cs.base;
367 svm->int3_injected = rip - old_rip;
368 }
369
370 svm->vmcb->control.event_inj = nr
371 | SVM_EVTINJ_VALID
372 | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
373 | SVM_EVTINJ_TYPE_EXEPT;
374 svm->vmcb->control.event_inj_err = error_code;
375}
376
300static int has_svm(void) 377static int has_svm(void)
301{ 378{
302 const char *msg; 379 const char *msg;
@@ -319,7 +396,7 @@ static int svm_hardware_enable(void *garbage)
319 396
320 struct svm_cpu_data *sd; 397 struct svm_cpu_data *sd;
321 uint64_t efer; 398 uint64_t efer;
322 struct descriptor_table gdt_descr; 399 struct desc_ptr gdt_descr;
323 struct desc_struct *gdt; 400 struct desc_struct *gdt;
324 int me = raw_smp_processor_id(); 401 int me = raw_smp_processor_id();
325 402
@@ -344,8 +421,8 @@ static int svm_hardware_enable(void *garbage)
344 sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; 421 sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
345 sd->next_asid = sd->max_asid + 1; 422 sd->next_asid = sd->max_asid + 1;
346 423
347 kvm_get_gdt(&gdt_descr); 424 native_store_gdt(&gdt_descr);
348 gdt = (struct desc_struct *)gdt_descr.base; 425 gdt = (struct desc_struct *)gdt_descr.address;
349 sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); 426 sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
350 427
351 wrmsrl(MSR_EFER, efer | EFER_SVME); 428 wrmsrl(MSR_EFER, efer | EFER_SVME);
@@ -391,42 +468,98 @@ err_1:
391 468
392} 469}
393 470
471static bool valid_msr_intercept(u32 index)
472{
473 int i;
474
475 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++)
476 if (direct_access_msrs[i].index == index)
477 return true;
478
479 return false;
480}
481
394static void set_msr_interception(u32 *msrpm, unsigned msr, 482static void set_msr_interception(u32 *msrpm, unsigned msr,
395 int read, int write) 483 int read, int write)
396{ 484{
485 u8 bit_read, bit_write;
486 unsigned long tmp;
487 u32 offset;
488
489 /*
490 * If this warning triggers extend the direct_access_msrs list at the
491 * beginning of the file
492 */
493 WARN_ON(!valid_msr_intercept(msr));
494
495 offset = svm_msrpm_offset(msr);
496 bit_read = 2 * (msr & 0x0f);
497 bit_write = 2 * (msr & 0x0f) + 1;
498 tmp = msrpm[offset];
499
500 BUG_ON(offset == MSR_INVALID);
501
502 read ? clear_bit(bit_read, &tmp) : set_bit(bit_read, &tmp);
503 write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
504
505 msrpm[offset] = tmp;
506}
507
508static void svm_vcpu_init_msrpm(u32 *msrpm)
509{
397 int i; 510 int i;
398 511
399 for (i = 0; i < NUM_MSR_MAPS; i++) { 512 memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
400 if (msr >= msrpm_ranges[i] && 513
401 msr < msrpm_ranges[i] + MSRS_IN_RANGE) { 514 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
402 u32 msr_offset = (i * MSRS_IN_RANGE + msr - 515 if (!direct_access_msrs[i].always)
403 msrpm_ranges[i]) * 2; 516 continue;
404 517
405 u32 *base = msrpm + (msr_offset / 32); 518 set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
406 u32 msr_shift = msr_offset % 32; 519 }
407 u32 mask = ((write) ? 0 : 2) | ((read) ? 0 : 1); 520}
408 *base = (*base & ~(0x3 << msr_shift)) | 521
409 (mask << msr_shift); 522static void add_msr_offset(u32 offset)
523{
524 int i;
525
526 for (i = 0; i < MSRPM_OFFSETS; ++i) {
527
528 /* Offset already in list? */
529 if (msrpm_offsets[i] == offset)
410 return; 530 return;
411 } 531
532 /* Slot used by another offset? */
533 if (msrpm_offsets[i] != MSR_INVALID)
534 continue;
535
536 /* Add offset to list */
537 msrpm_offsets[i] = offset;
538
539 return;
412 } 540 }
541
542 /*
543 * If this BUG triggers the msrpm_offsets table has an overflow. Just
544 * increase MSRPM_OFFSETS in this case.
545 */
413 BUG(); 546 BUG();
414} 547}
415 548
416static void svm_vcpu_init_msrpm(u32 *msrpm) 549static void init_msrpm_offsets(void)
417{ 550{
418 memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); 551 int i;
419 552
420#ifdef CONFIG_X86_64 553 memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets));
421 set_msr_interception(msrpm, MSR_GS_BASE, 1, 1); 554
422 set_msr_interception(msrpm, MSR_FS_BASE, 1, 1); 555 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
423 set_msr_interception(msrpm, MSR_KERNEL_GS_BASE, 1, 1); 556 u32 offset;
424 set_msr_interception(msrpm, MSR_LSTAR, 1, 1); 557
425 set_msr_interception(msrpm, MSR_CSTAR, 1, 1); 558 offset = svm_msrpm_offset(direct_access_msrs[i].index);
426 set_msr_interception(msrpm, MSR_SYSCALL_MASK, 1, 1); 559 BUG_ON(offset == MSR_INVALID);
427#endif 560
428 set_msr_interception(msrpm, MSR_K6_STAR, 1, 1); 561 add_msr_offset(offset);
429 set_msr_interception(msrpm, MSR_IA32_SYSENTER_CS, 1, 1); 562 }
430} 563}
431 564
432static void svm_enable_lbrv(struct vcpu_svm *svm) 565static void svm_enable_lbrv(struct vcpu_svm *svm)
@@ -467,6 +600,8 @@ static __init int svm_hardware_setup(void)
467 memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER)); 600 memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER));
468 iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; 601 iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
469 602
603 init_msrpm_offsets();
604
470 if (boot_cpu_has(X86_FEATURE_NX)) 605 if (boot_cpu_has(X86_FEATURE_NX))
471 kvm_enable_efer_bits(EFER_NX); 606 kvm_enable_efer_bits(EFER_NX);
472 607
@@ -523,7 +658,7 @@ static void init_seg(struct vmcb_seg *seg)
523{ 658{
524 seg->selector = 0; 659 seg->selector = 0;
525 seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK | 660 seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK |
526 SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */ 661 SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */
527 seg->limit = 0xffff; 662 seg->limit = 0xffff;
528 seg->base = 0; 663 seg->base = 0;
529} 664}
@@ -543,16 +678,16 @@ static void init_vmcb(struct vcpu_svm *svm)
543 678
544 svm->vcpu.fpu_active = 1; 679 svm->vcpu.fpu_active = 1;
545 680
546 control->intercept_cr_read = INTERCEPT_CR0_MASK | 681 control->intercept_cr_read = INTERCEPT_CR0_MASK |
547 INTERCEPT_CR3_MASK | 682 INTERCEPT_CR3_MASK |
548 INTERCEPT_CR4_MASK; 683 INTERCEPT_CR4_MASK;
549 684
550 control->intercept_cr_write = INTERCEPT_CR0_MASK | 685 control->intercept_cr_write = INTERCEPT_CR0_MASK |
551 INTERCEPT_CR3_MASK | 686 INTERCEPT_CR3_MASK |
552 INTERCEPT_CR4_MASK | 687 INTERCEPT_CR4_MASK |
553 INTERCEPT_CR8_MASK; 688 INTERCEPT_CR8_MASK;
554 689
555 control->intercept_dr_read = INTERCEPT_DR0_MASK | 690 control->intercept_dr_read = INTERCEPT_DR0_MASK |
556 INTERCEPT_DR1_MASK | 691 INTERCEPT_DR1_MASK |
557 INTERCEPT_DR2_MASK | 692 INTERCEPT_DR2_MASK |
558 INTERCEPT_DR3_MASK | 693 INTERCEPT_DR3_MASK |
@@ -561,7 +696,7 @@ static void init_vmcb(struct vcpu_svm *svm)
561 INTERCEPT_DR6_MASK | 696 INTERCEPT_DR6_MASK |
562 INTERCEPT_DR7_MASK; 697 INTERCEPT_DR7_MASK;
563 698
564 control->intercept_dr_write = INTERCEPT_DR0_MASK | 699 control->intercept_dr_write = INTERCEPT_DR0_MASK |
565 INTERCEPT_DR1_MASK | 700 INTERCEPT_DR1_MASK |
566 INTERCEPT_DR2_MASK | 701 INTERCEPT_DR2_MASK |
567 INTERCEPT_DR3_MASK | 702 INTERCEPT_DR3_MASK |
@@ -575,7 +710,7 @@ static void init_vmcb(struct vcpu_svm *svm)
575 (1 << MC_VECTOR); 710 (1 << MC_VECTOR);
576 711
577 712
578 control->intercept = (1ULL << INTERCEPT_INTR) | 713 control->intercept = (1ULL << INTERCEPT_INTR) |
579 (1ULL << INTERCEPT_NMI) | 714 (1ULL << INTERCEPT_NMI) |
580 (1ULL << INTERCEPT_SMI) | 715 (1ULL << INTERCEPT_SMI) |
581 (1ULL << INTERCEPT_SELECTIVE_CR0) | 716 (1ULL << INTERCEPT_SELECTIVE_CR0) |
@@ -636,7 +771,8 @@ static void init_vmcb(struct vcpu_svm *svm)
636 save->rip = 0x0000fff0; 771 save->rip = 0x0000fff0;
637 svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; 772 svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
638 773
639 /* This is the guest-visible cr0 value. 774 /*
775 * This is the guest-visible cr0 value.
640 * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. 776 * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
641 */ 777 */
642 svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; 778 svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
@@ -729,6 +865,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
729 svm_vcpu_init_msrpm(svm->msrpm); 865 svm_vcpu_init_msrpm(svm->msrpm);
730 866
731 svm->nested.msrpm = page_address(nested_msrpm_pages); 867 svm->nested.msrpm = page_address(nested_msrpm_pages);
868 svm_vcpu_init_msrpm(svm->nested.msrpm);
732 869
733 svm->vmcb = page_address(page); 870 svm->vmcb = page_address(page);
734 clear_page(svm->vmcb); 871 clear_page(svm->vmcb);
@@ -882,7 +1019,8 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
882 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; 1019 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
883 var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; 1020 var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
884 1021
885 /* AMD's VMCB does not have an explicit unusable field, so emulate it 1022 /*
1023 * AMD's VMCB does not have an explicit unusable field, so emulate it
886 * for cross vendor migration purposes by "not present" 1024 * for cross vendor migration purposes by "not present"
887 */ 1025 */
888 var->unusable = !var->present || (var->type == 0); 1026 var->unusable = !var->present || (var->type == 0);
@@ -918,7 +1056,8 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
918 var->type |= 0x1; 1056 var->type |= 0x1;
919 break; 1057 break;
920 case VCPU_SREG_SS: 1058 case VCPU_SREG_SS:
921 /* On AMD CPUs sometimes the DB bit in the segment 1059 /*
1060 * On AMD CPUs sometimes the DB bit in the segment
922 * descriptor is left as 1, although the whole segment has 1061 * descriptor is left as 1, although the whole segment has
923 * been made unusable. Clear it here to pass an Intel VMX 1062 * been made unusable. Clear it here to pass an Intel VMX
924 * entry check when cross vendor migrating. 1063 * entry check when cross vendor migrating.
@@ -936,36 +1075,36 @@ static int svm_get_cpl(struct kvm_vcpu *vcpu)
936 return save->cpl; 1075 return save->cpl;
937} 1076}
938 1077
939static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) 1078static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
940{ 1079{
941 struct vcpu_svm *svm = to_svm(vcpu); 1080 struct vcpu_svm *svm = to_svm(vcpu);
942 1081
943 dt->limit = svm->vmcb->save.idtr.limit; 1082 dt->size = svm->vmcb->save.idtr.limit;
944 dt->base = svm->vmcb->save.idtr.base; 1083 dt->address = svm->vmcb->save.idtr.base;
945} 1084}
946 1085
947static void svm_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) 1086static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
948{ 1087{
949 struct vcpu_svm *svm = to_svm(vcpu); 1088 struct vcpu_svm *svm = to_svm(vcpu);
950 1089
951 svm->vmcb->save.idtr.limit = dt->limit; 1090 svm->vmcb->save.idtr.limit = dt->size;
952 svm->vmcb->save.idtr.base = dt->base ; 1091 svm->vmcb->save.idtr.base = dt->address ;
953} 1092}
954 1093
955static void svm_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) 1094static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
956{ 1095{
957 struct vcpu_svm *svm = to_svm(vcpu); 1096 struct vcpu_svm *svm = to_svm(vcpu);
958 1097
959 dt->limit = svm->vmcb->save.gdtr.limit; 1098 dt->size = svm->vmcb->save.gdtr.limit;
960 dt->base = svm->vmcb->save.gdtr.base; 1099 dt->address = svm->vmcb->save.gdtr.base;
961} 1100}
962 1101
963static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) 1102static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
964{ 1103{
965 struct vcpu_svm *svm = to_svm(vcpu); 1104 struct vcpu_svm *svm = to_svm(vcpu);
966 1105
967 svm->vmcb->save.gdtr.limit = dt->limit; 1106 svm->vmcb->save.gdtr.limit = dt->size;
968 svm->vmcb->save.gdtr.base = dt->base ; 1107 svm->vmcb->save.gdtr.base = dt->address ;
969} 1108}
970 1109
971static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) 1110static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
@@ -978,6 +1117,7 @@ static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
978 1117
979static void update_cr0_intercept(struct vcpu_svm *svm) 1118static void update_cr0_intercept(struct vcpu_svm *svm)
980{ 1119{
1120 struct vmcb *vmcb = svm->vmcb;
981 ulong gcr0 = svm->vcpu.arch.cr0; 1121 ulong gcr0 = svm->vcpu.arch.cr0;
982 u64 *hcr0 = &svm->vmcb->save.cr0; 1122 u64 *hcr0 = &svm->vmcb->save.cr0;
983 1123
@@ -989,11 +1129,25 @@ static void update_cr0_intercept(struct vcpu_svm *svm)
989 1129
990 1130
991 if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { 1131 if (gcr0 == *hcr0 && svm->vcpu.fpu_active) {
992 svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; 1132 vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK;
993 svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; 1133 vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK;
1134 if (is_nested(svm)) {
1135 struct vmcb *hsave = svm->nested.hsave;
1136
1137 hsave->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK;
1138 hsave->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK;
1139 vmcb->control.intercept_cr_read |= svm->nested.intercept_cr_read;
1140 vmcb->control.intercept_cr_write |= svm->nested.intercept_cr_write;
1141 }
994 } else { 1142 } else {
995 svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK; 1143 svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK;
996 svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK; 1144 svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK;
1145 if (is_nested(svm)) {
1146 struct vmcb *hsave = svm->nested.hsave;
1147
1148 hsave->control.intercept_cr_read |= INTERCEPT_CR0_MASK;
1149 hsave->control.intercept_cr_write |= INTERCEPT_CR0_MASK;
1150 }
997 } 1151 }
998} 1152}
999 1153
@@ -1001,6 +1155,27 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1001{ 1155{
1002 struct vcpu_svm *svm = to_svm(vcpu); 1156 struct vcpu_svm *svm = to_svm(vcpu);
1003 1157
1158 if (is_nested(svm)) {
1159 /*
1160 * We are here because we run in nested mode, the host kvm
1161 * intercepts cr0 writes but the l1 hypervisor does not.
1162 * But the L1 hypervisor may intercept selective cr0 writes.
1163 * This needs to be checked here.
1164 */
1165 unsigned long old, new;
1166
1167 /* Remove bits that would trigger a real cr0 write intercept */
1168 old = vcpu->arch.cr0 & SVM_CR0_SELECTIVE_MASK;
1169 new = cr0 & SVM_CR0_SELECTIVE_MASK;
1170
1171 if (old == new) {
1172 /* cr0 write with ts and mp unchanged */
1173 svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
1174 if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE)
1175 return;
1176 }
1177 }
1178
1004#ifdef CONFIG_X86_64 1179#ifdef CONFIG_X86_64
1005 if (vcpu->arch.efer & EFER_LME) { 1180 if (vcpu->arch.efer & EFER_LME) {
1006 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { 1181 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
@@ -1134,70 +1309,11 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
1134 svm->vmcb->control.asid = sd->next_asid++; 1309 svm->vmcb->control.asid = sd->next_asid++;
1135} 1310}
1136 1311
1137static int svm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *dest) 1312static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
1138{ 1313{
1139 struct vcpu_svm *svm = to_svm(vcpu); 1314 struct vcpu_svm *svm = to_svm(vcpu);
1140 1315
1141 switch (dr) { 1316 svm->vmcb->save.dr7 = value;
1142 case 0 ... 3:
1143 *dest = vcpu->arch.db[dr];
1144 break;
1145 case 4:
1146 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
1147 return EMULATE_FAIL; /* will re-inject UD */
1148 /* fall through */
1149 case 6:
1150 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1151 *dest = vcpu->arch.dr6;
1152 else
1153 *dest = svm->vmcb->save.dr6;
1154 break;
1155 case 5:
1156 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
1157 return EMULATE_FAIL; /* will re-inject UD */
1158 /* fall through */
1159 case 7:
1160 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1161 *dest = vcpu->arch.dr7;
1162 else
1163 *dest = svm->vmcb->save.dr7;
1164 break;
1165 }
1166
1167 return EMULATE_DONE;
1168}
1169
1170static int svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value)
1171{
1172 struct vcpu_svm *svm = to_svm(vcpu);
1173
1174 switch (dr) {
1175 case 0 ... 3:
1176 vcpu->arch.db[dr] = value;
1177 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
1178 vcpu->arch.eff_db[dr] = value;
1179 break;
1180 case 4:
1181 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
1182 return EMULATE_FAIL; /* will re-inject UD */
1183 /* fall through */
1184 case 6:
1185 vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1;
1186 break;
1187 case 5:
1188 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
1189 return EMULATE_FAIL; /* will re-inject UD */
1190 /* fall through */
1191 case 7:
1192 vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1;
1193 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
1194 svm->vmcb->save.dr7 = vcpu->arch.dr7;
1195 vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK);
1196 }
1197 break;
1198 }
1199
1200 return EMULATE_DONE;
1201} 1317}
1202 1318
1203static int pf_interception(struct vcpu_svm *svm) 1319static int pf_interception(struct vcpu_svm *svm)
@@ -1234,7 +1350,7 @@ static int db_interception(struct vcpu_svm *svm)
1234 } 1350 }
1235 1351
1236 if (svm->vcpu.guest_debug & 1352 if (svm->vcpu.guest_debug &
1237 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)){ 1353 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) {
1238 kvm_run->exit_reason = KVM_EXIT_DEBUG; 1354 kvm_run->exit_reason = KVM_EXIT_DEBUG;
1239 kvm_run->debug.arch.pc = 1355 kvm_run->debug.arch.pc =
1240 svm->vmcb->save.cs.base + svm->vmcb->save.rip; 1356 svm->vmcb->save.cs.base + svm->vmcb->save.rip;
@@ -1268,7 +1384,22 @@ static int ud_interception(struct vcpu_svm *svm)
1268static void svm_fpu_activate(struct kvm_vcpu *vcpu) 1384static void svm_fpu_activate(struct kvm_vcpu *vcpu)
1269{ 1385{
1270 struct vcpu_svm *svm = to_svm(vcpu); 1386 struct vcpu_svm *svm = to_svm(vcpu);
1271 svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); 1387 u32 excp;
1388
1389 if (is_nested(svm)) {
1390 u32 h_excp, n_excp;
1391
1392 h_excp = svm->nested.hsave->control.intercept_exceptions;
1393 n_excp = svm->nested.intercept_exceptions;
1394 h_excp &= ~(1 << NM_VECTOR);
1395 excp = h_excp | n_excp;
1396 } else {
1397 excp = svm->vmcb->control.intercept_exceptions;
1398 excp &= ~(1 << NM_VECTOR);
1399 }
1400
1401 svm->vmcb->control.intercept_exceptions = excp;
1402
1272 svm->vcpu.fpu_active = 1; 1403 svm->vcpu.fpu_active = 1;
1273 update_cr0_intercept(svm); 1404 update_cr0_intercept(svm);
1274} 1405}
@@ -1309,29 +1440,23 @@ static int shutdown_interception(struct vcpu_svm *svm)
1309 1440
1310static int io_interception(struct vcpu_svm *svm) 1441static int io_interception(struct vcpu_svm *svm)
1311{ 1442{
1443 struct kvm_vcpu *vcpu = &svm->vcpu;
1312 u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ 1444 u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
1313 int size, in, string; 1445 int size, in, string;
1314 unsigned port; 1446 unsigned port;
1315 1447
1316 ++svm->vcpu.stat.io_exits; 1448 ++svm->vcpu.stat.io_exits;
1317
1318 svm->next_rip = svm->vmcb->control.exit_info_2;
1319
1320 string = (io_info & SVM_IOIO_STR_MASK) != 0; 1449 string = (io_info & SVM_IOIO_STR_MASK) != 0;
1321
1322 if (string) {
1323 if (emulate_instruction(&svm->vcpu,
1324 0, 0, 0) == EMULATE_DO_MMIO)
1325 return 0;
1326 return 1;
1327 }
1328
1329 in = (io_info & SVM_IOIO_TYPE_MASK) != 0; 1450 in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
1451 if (string || in)
1452 return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO);
1453
1330 port = io_info >> 16; 1454 port = io_info >> 16;
1331 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; 1455 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
1332 1456 svm->next_rip = svm->vmcb->control.exit_info_2;
1333 skip_emulated_instruction(&svm->vcpu); 1457 skip_emulated_instruction(&svm->vcpu);
1334 return kvm_emulate_pio(&svm->vcpu, in, size, port); 1458
1459 return kvm_fast_pio_out(vcpu, size, port);
1335} 1460}
1336 1461
1337static int nmi_interception(struct vcpu_svm *svm) 1462static int nmi_interception(struct vcpu_svm *svm)
@@ -1384,6 +1509,8 @@ static int nested_svm_check_permissions(struct vcpu_svm *svm)
1384static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, 1509static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
1385 bool has_error_code, u32 error_code) 1510 bool has_error_code, u32 error_code)
1386{ 1511{
1512 int vmexit;
1513
1387 if (!is_nested(svm)) 1514 if (!is_nested(svm))
1388 return 0; 1515 return 0;
1389 1516
@@ -1392,21 +1519,28 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
1392 svm->vmcb->control.exit_info_1 = error_code; 1519 svm->vmcb->control.exit_info_1 = error_code;
1393 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; 1520 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
1394 1521
1395 return nested_svm_exit_handled(svm); 1522 vmexit = nested_svm_intercept(svm);
1523 if (vmexit == NESTED_EXIT_DONE)
1524 svm->nested.exit_required = true;
1525
1526 return vmexit;
1396} 1527}
1397 1528
1398static inline int nested_svm_intr(struct vcpu_svm *svm) 1529/* This function returns true if it is save to enable the irq window */
1530static inline bool nested_svm_intr(struct vcpu_svm *svm)
1399{ 1531{
1400 if (!is_nested(svm)) 1532 if (!is_nested(svm))
1401 return 0; 1533 return true;
1402 1534
1403 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) 1535 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
1404 return 0; 1536 return true;
1405 1537
1406 if (!(svm->vcpu.arch.hflags & HF_HIF_MASK)) 1538 if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
1407 return 0; 1539 return false;
1408 1540
1409 svm->vmcb->control.exit_code = SVM_EXIT_INTR; 1541 svm->vmcb->control.exit_code = SVM_EXIT_INTR;
1542 svm->vmcb->control.exit_info_1 = 0;
1543 svm->vmcb->control.exit_info_2 = 0;
1410 1544
1411 if (svm->nested.intercept & 1ULL) { 1545 if (svm->nested.intercept & 1ULL) {
1412 /* 1546 /*
@@ -1417,21 +1551,40 @@ static inline int nested_svm_intr(struct vcpu_svm *svm)
1417 */ 1551 */
1418 svm->nested.exit_required = true; 1552 svm->nested.exit_required = true;
1419 trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); 1553 trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
1420 return 1; 1554 return false;
1421 } 1555 }
1422 1556
1423 return 0; 1557 return true;
1558}
1559
1560/* This function returns true if it is save to enable the nmi window */
1561static inline bool nested_svm_nmi(struct vcpu_svm *svm)
1562{
1563 if (!is_nested(svm))
1564 return true;
1565
1566 if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI)))
1567 return true;
1568
1569 svm->vmcb->control.exit_code = SVM_EXIT_NMI;
1570 svm->nested.exit_required = true;
1571
1572 return false;
1424} 1573}
1425 1574
1426static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx) 1575static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
1427{ 1576{
1428 struct page *page; 1577 struct page *page;
1429 1578
1579 might_sleep();
1580
1430 page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); 1581 page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
1431 if (is_error_page(page)) 1582 if (is_error_page(page))
1432 goto error; 1583 goto error;
1433 1584
1434 return kmap_atomic(page, idx); 1585 *_page = page;
1586
1587 return kmap(page);
1435 1588
1436error: 1589error:
1437 kvm_release_page_clean(page); 1590 kvm_release_page_clean(page);
@@ -1440,61 +1593,55 @@ error:
1440 return NULL; 1593 return NULL;
1441} 1594}
1442 1595
1443static void nested_svm_unmap(void *addr, enum km_type idx) 1596static void nested_svm_unmap(struct page *page)
1444{ 1597{
1445 struct page *page; 1598 kunmap(page);
1599 kvm_release_page_dirty(page);
1600}
1446 1601
1447 if (!addr) 1602static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
1448 return; 1603{
1604 unsigned port;
1605 u8 val, bit;
1606 u64 gpa;
1449 1607
1450 page = kmap_atomic_to_page(addr); 1608 if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT)))
1609 return NESTED_EXIT_HOST;
1451 1610
1452 kunmap_atomic(addr, idx); 1611 port = svm->vmcb->control.exit_info_1 >> 16;
1453 kvm_release_page_dirty(page); 1612 gpa = svm->nested.vmcb_iopm + (port / 8);
1613 bit = port % 8;
1614 val = 0;
1615
1616 if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, 1))
1617 val &= (1 << bit);
1618
1619 return val ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
1454} 1620}
1455 1621
1456static bool nested_svm_exit_handled_msr(struct vcpu_svm *svm) 1622static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
1457{ 1623{
1458 u32 param = svm->vmcb->control.exit_info_1 & 1; 1624 u32 offset, msr, value;
1459 u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; 1625 int write, mask;
1460 bool ret = false;
1461 u32 t0, t1;
1462 u8 *msrpm;
1463 1626
1464 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) 1627 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
1465 return false; 1628 return NESTED_EXIT_HOST;
1466 1629
1467 msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0); 1630 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
1631 offset = svm_msrpm_offset(msr);
1632 write = svm->vmcb->control.exit_info_1 & 1;
1633 mask = 1 << ((2 * (msr & 0xf)) + write);
1468 1634
1469 if (!msrpm) 1635 if (offset == MSR_INVALID)
1470 goto out; 1636 return NESTED_EXIT_DONE;
1471 1637
1472 switch (msr) { 1638 /* Offset is in 32 bit units but need in 8 bit units */
1473 case 0 ... 0x1fff: 1639 offset *= 4;
1474 t0 = (msr * 2) % 8;
1475 t1 = msr / 8;
1476 break;
1477 case 0xc0000000 ... 0xc0001fff:
1478 t0 = (8192 + msr - 0xc0000000) * 2;
1479 t1 = (t0 / 8);
1480 t0 %= 8;
1481 break;
1482 case 0xc0010000 ... 0xc0011fff:
1483 t0 = (16384 + msr - 0xc0010000) * 2;
1484 t1 = (t0 / 8);
1485 t0 %= 8;
1486 break;
1487 default:
1488 ret = true;
1489 goto out;
1490 }
1491 1640
1492 ret = msrpm[t1] & ((1 << param) << t0); 1641 if (kvm_read_guest(svm->vcpu.kvm, svm->nested.vmcb_msrpm + offset, &value, 4))
1493 1642 return NESTED_EXIT_DONE;
1494out:
1495 nested_svm_unmap(msrpm, KM_USER0);
1496 1643
1497 return ret; 1644 return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
1498} 1645}
1499 1646
1500static int nested_svm_exit_special(struct vcpu_svm *svm) 1647static int nested_svm_exit_special(struct vcpu_svm *svm)
@@ -1504,17 +1651,21 @@ static int nested_svm_exit_special(struct vcpu_svm *svm)
1504 switch (exit_code) { 1651 switch (exit_code) {
1505 case SVM_EXIT_INTR: 1652 case SVM_EXIT_INTR:
1506 case SVM_EXIT_NMI: 1653 case SVM_EXIT_NMI:
1654 case SVM_EXIT_EXCP_BASE + MC_VECTOR:
1507 return NESTED_EXIT_HOST; 1655 return NESTED_EXIT_HOST;
1508 /* For now we are always handling NPFs when using them */
1509 case SVM_EXIT_NPF: 1656 case SVM_EXIT_NPF:
1657 /* For now we are always handling NPFs when using them */
1510 if (npt_enabled) 1658 if (npt_enabled)
1511 return NESTED_EXIT_HOST; 1659 return NESTED_EXIT_HOST;
1512 break; 1660 break;
1513 /* When we're shadowing, trap PFs */
1514 case SVM_EXIT_EXCP_BASE + PF_VECTOR: 1661 case SVM_EXIT_EXCP_BASE + PF_VECTOR:
1662 /* When we're shadowing, trap PFs */
1515 if (!npt_enabled) 1663 if (!npt_enabled)
1516 return NESTED_EXIT_HOST; 1664 return NESTED_EXIT_HOST;
1517 break; 1665 break;
1666 case SVM_EXIT_EXCP_BASE + NM_VECTOR:
1667 nm_interception(svm);
1668 break;
1518 default: 1669 default:
1519 break; 1670 break;
1520 } 1671 }
@@ -1525,7 +1676,7 @@ static int nested_svm_exit_special(struct vcpu_svm *svm)
1525/* 1676/*
1526 * If this function returns true, this #vmexit was already handled 1677 * If this function returns true, this #vmexit was already handled
1527 */ 1678 */
1528static int nested_svm_exit_handled(struct vcpu_svm *svm) 1679static int nested_svm_intercept(struct vcpu_svm *svm)
1529{ 1680{
1530 u32 exit_code = svm->vmcb->control.exit_code; 1681 u32 exit_code = svm->vmcb->control.exit_code;
1531 int vmexit = NESTED_EXIT_HOST; 1682 int vmexit = NESTED_EXIT_HOST;
@@ -1534,6 +1685,9 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm)
1534 case SVM_EXIT_MSR: 1685 case SVM_EXIT_MSR:
1535 vmexit = nested_svm_exit_handled_msr(svm); 1686 vmexit = nested_svm_exit_handled_msr(svm);
1536 break; 1687 break;
1688 case SVM_EXIT_IOIO:
1689 vmexit = nested_svm_intercept_ioio(svm);
1690 break;
1537 case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: { 1691 case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: {
1538 u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0); 1692 u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0);
1539 if (svm->nested.intercept_cr_read & cr_bits) 1693 if (svm->nested.intercept_cr_read & cr_bits)
@@ -1564,6 +1718,10 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm)
1564 vmexit = NESTED_EXIT_DONE; 1718 vmexit = NESTED_EXIT_DONE;
1565 break; 1719 break;
1566 } 1720 }
1721 case SVM_EXIT_ERR: {
1722 vmexit = NESTED_EXIT_DONE;
1723 break;
1724 }
1567 default: { 1725 default: {
1568 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); 1726 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
1569 if (svm->nested.intercept & exit_bits) 1727 if (svm->nested.intercept & exit_bits)
@@ -1571,9 +1729,17 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm)
1571 } 1729 }
1572 } 1730 }
1573 1731
1574 if (vmexit == NESTED_EXIT_DONE) { 1732 return vmexit;
1733}
1734
1735static int nested_svm_exit_handled(struct vcpu_svm *svm)
1736{
1737 int vmexit;
1738
1739 vmexit = nested_svm_intercept(svm);
1740
1741 if (vmexit == NESTED_EXIT_DONE)
1575 nested_svm_vmexit(svm); 1742 nested_svm_vmexit(svm);
1576 }
1577 1743
1578 return vmexit; 1744 return vmexit;
1579} 1745}
@@ -1615,6 +1781,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
1615 struct vmcb *nested_vmcb; 1781 struct vmcb *nested_vmcb;
1616 struct vmcb *hsave = svm->nested.hsave; 1782 struct vmcb *hsave = svm->nested.hsave;
1617 struct vmcb *vmcb = svm->vmcb; 1783 struct vmcb *vmcb = svm->vmcb;
1784 struct page *page;
1618 1785
1619 trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, 1786 trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
1620 vmcb->control.exit_info_1, 1787 vmcb->control.exit_info_1,
@@ -1622,10 +1789,13 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
1622 vmcb->control.exit_int_info, 1789 vmcb->control.exit_int_info,
1623 vmcb->control.exit_int_info_err); 1790 vmcb->control.exit_int_info_err);
1624 1791
1625 nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0); 1792 nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page);
1626 if (!nested_vmcb) 1793 if (!nested_vmcb)
1627 return 1; 1794 return 1;
1628 1795
1796 /* Exit nested SVM mode */
1797 svm->nested.vmcb = 0;
1798
1629 /* Give the current vmcb to the guest */ 1799 /* Give the current vmcb to the guest */
1630 disable_gif(svm); 1800 disable_gif(svm);
1631 1801
@@ -1635,9 +1805,10 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
1635 nested_vmcb->save.ds = vmcb->save.ds; 1805 nested_vmcb->save.ds = vmcb->save.ds;
1636 nested_vmcb->save.gdtr = vmcb->save.gdtr; 1806 nested_vmcb->save.gdtr = vmcb->save.gdtr;
1637 nested_vmcb->save.idtr = vmcb->save.idtr; 1807 nested_vmcb->save.idtr = vmcb->save.idtr;
1638 if (npt_enabled) 1808 nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu);
1639 nested_vmcb->save.cr3 = vmcb->save.cr3; 1809 nested_vmcb->save.cr3 = svm->vcpu.arch.cr3;
1640 nested_vmcb->save.cr2 = vmcb->save.cr2; 1810 nested_vmcb->save.cr2 = vmcb->save.cr2;
1811 nested_vmcb->save.cr4 = svm->vcpu.arch.cr4;
1641 nested_vmcb->save.rflags = vmcb->save.rflags; 1812 nested_vmcb->save.rflags = vmcb->save.rflags;
1642 nested_vmcb->save.rip = vmcb->save.rip; 1813 nested_vmcb->save.rip = vmcb->save.rip;
1643 nested_vmcb->save.rsp = vmcb->save.rsp; 1814 nested_vmcb->save.rsp = vmcb->save.rsp;
@@ -1709,10 +1880,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
1709 svm->vmcb->save.cpl = 0; 1880 svm->vmcb->save.cpl = 0;
1710 svm->vmcb->control.exit_int_info = 0; 1881 svm->vmcb->control.exit_int_info = 0;
1711 1882
1712 /* Exit nested SVM mode */ 1883 nested_svm_unmap(page);
1713 svm->nested.vmcb = 0;
1714
1715 nested_svm_unmap(nested_vmcb, KM_USER0);
1716 1884
1717 kvm_mmu_reset_context(&svm->vcpu); 1885 kvm_mmu_reset_context(&svm->vcpu);
1718 kvm_mmu_load(&svm->vcpu); 1886 kvm_mmu_load(&svm->vcpu);
@@ -1722,19 +1890,33 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
1722 1890
1723static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) 1891static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
1724{ 1892{
1725 u32 *nested_msrpm; 1893 /*
1894 * This function merges the msr permission bitmaps of kvm and the
1895 * nested vmcb. It is omptimized in that it only merges the parts where
1896 * the kvm msr permission bitmap may contain zero bits
1897 */
1726 int i; 1898 int i;
1727 1899
1728 nested_msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0); 1900 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
1729 if (!nested_msrpm) 1901 return true;
1730 return false;
1731 1902
1732 for (i=0; i< PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++) 1903 for (i = 0; i < MSRPM_OFFSETS; i++) {
1733 svm->nested.msrpm[i] = svm->msrpm[i] | nested_msrpm[i]; 1904 u32 value, p;
1905 u64 offset;
1734 1906
1735 svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm); 1907 if (msrpm_offsets[i] == 0xffffffff)
1908 break;
1909
1910 p = msrpm_offsets[i];
1911 offset = svm->nested.vmcb_msrpm + (p * 4);
1912
1913 if (kvm_read_guest(svm->vcpu.kvm, offset, &value, 4))
1914 return false;
1915
1916 svm->nested.msrpm[p] = svm->msrpm[p] | value;
1917 }
1736 1918
1737 nested_svm_unmap(nested_msrpm, KM_USER0); 1919 svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm);
1738 1920
1739 return true; 1921 return true;
1740} 1922}
@@ -1744,26 +1926,34 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
1744 struct vmcb *nested_vmcb; 1926 struct vmcb *nested_vmcb;
1745 struct vmcb *hsave = svm->nested.hsave; 1927 struct vmcb *hsave = svm->nested.hsave;
1746 struct vmcb *vmcb = svm->vmcb; 1928 struct vmcb *vmcb = svm->vmcb;
1929 struct page *page;
1930 u64 vmcb_gpa;
1747 1931
1748 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); 1932 vmcb_gpa = svm->vmcb->save.rax;
1933
1934 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
1749 if (!nested_vmcb) 1935 if (!nested_vmcb)
1750 return false; 1936 return false;
1751 1937
1752 /* nested_vmcb is our indicator if nested SVM is activated */ 1938 trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, vmcb_gpa,
1753 svm->nested.vmcb = svm->vmcb->save.rax;
1754
1755 trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb,
1756 nested_vmcb->save.rip, 1939 nested_vmcb->save.rip,
1757 nested_vmcb->control.int_ctl, 1940 nested_vmcb->control.int_ctl,
1758 nested_vmcb->control.event_inj, 1941 nested_vmcb->control.event_inj,
1759 nested_vmcb->control.nested_ctl); 1942 nested_vmcb->control.nested_ctl);
1760 1943
1944 trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr_read,
1945 nested_vmcb->control.intercept_cr_write,
1946 nested_vmcb->control.intercept_exceptions,
1947 nested_vmcb->control.intercept);
1948
1761 /* Clear internal status */ 1949 /* Clear internal status */
1762 kvm_clear_exception_queue(&svm->vcpu); 1950 kvm_clear_exception_queue(&svm->vcpu);
1763 kvm_clear_interrupt_queue(&svm->vcpu); 1951 kvm_clear_interrupt_queue(&svm->vcpu);
1764 1952
1765 /* Save the old vmcb, so we don't need to pick what we save, but 1953 /*
1766 can restore everything when a VMEXIT occurs */ 1954 * Save the old vmcb, so we don't need to pick what we save, but can
1955 * restore everything when a VMEXIT occurs
1956 */
1767 hsave->save.es = vmcb->save.es; 1957 hsave->save.es = vmcb->save.es;
1768 hsave->save.cs = vmcb->save.cs; 1958 hsave->save.cs = vmcb->save.cs;
1769 hsave->save.ss = vmcb->save.ss; 1959 hsave->save.ss = vmcb->save.ss;
@@ -1803,14 +1993,17 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
1803 if (npt_enabled) { 1993 if (npt_enabled) {
1804 svm->vmcb->save.cr3 = nested_vmcb->save.cr3; 1994 svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
1805 svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; 1995 svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
1806 } else { 1996 } else
1807 kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); 1997 kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
1808 kvm_mmu_reset_context(&svm->vcpu); 1998
1809 } 1999 /* Guest paging mode is active - reset mmu */
2000 kvm_mmu_reset_context(&svm->vcpu);
2001
1810 svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2; 2002 svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
1811 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax); 2003 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
1812 kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp); 2004 kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
1813 kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip); 2005 kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
2006
1814 /* In case we don't even reach vcpu_run, the fields are not updated */ 2007 /* In case we don't even reach vcpu_run, the fields are not updated */
1815 svm->vmcb->save.rax = nested_vmcb->save.rax; 2008 svm->vmcb->save.rax = nested_vmcb->save.rax;
1816 svm->vmcb->save.rsp = nested_vmcb->save.rsp; 2009 svm->vmcb->save.rsp = nested_vmcb->save.rsp;
@@ -1819,22 +2012,8 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
1819 svm->vmcb->save.dr6 = nested_vmcb->save.dr6; 2012 svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
1820 svm->vmcb->save.cpl = nested_vmcb->save.cpl; 2013 svm->vmcb->save.cpl = nested_vmcb->save.cpl;
1821 2014
1822 /* We don't want a nested guest to be more powerful than the guest, 2015 svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;
1823 so all intercepts are ORed */ 2016 svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL;
1824 svm->vmcb->control.intercept_cr_read |=
1825 nested_vmcb->control.intercept_cr_read;
1826 svm->vmcb->control.intercept_cr_write |=
1827 nested_vmcb->control.intercept_cr_write;
1828 svm->vmcb->control.intercept_dr_read |=
1829 nested_vmcb->control.intercept_dr_read;
1830 svm->vmcb->control.intercept_dr_write |=
1831 nested_vmcb->control.intercept_dr_write;
1832 svm->vmcb->control.intercept_exceptions |=
1833 nested_vmcb->control.intercept_exceptions;
1834
1835 svm->vmcb->control.intercept |= nested_vmcb->control.intercept;
1836
1837 svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa;
1838 2017
1839 /* cache intercepts */ 2018 /* cache intercepts */
1840 svm->nested.intercept_cr_read = nested_vmcb->control.intercept_cr_read; 2019 svm->nested.intercept_cr_read = nested_vmcb->control.intercept_cr_read;
@@ -1851,13 +2030,43 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
1851 else 2030 else
1852 svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; 2031 svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
1853 2032
2033 if (svm->vcpu.arch.hflags & HF_VINTR_MASK) {
2034 /* We only want the cr8 intercept bits of the guest */
2035 svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR8_MASK;
2036 svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
2037 }
2038
2039 /* We don't want to see VMMCALLs from a nested guest */
2040 svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMMCALL);
2041
2042 /*
2043 * We don't want a nested guest to be more powerful than the guest, so
2044 * all intercepts are ORed
2045 */
2046 svm->vmcb->control.intercept_cr_read |=
2047 nested_vmcb->control.intercept_cr_read;
2048 svm->vmcb->control.intercept_cr_write |=
2049 nested_vmcb->control.intercept_cr_write;
2050 svm->vmcb->control.intercept_dr_read |=
2051 nested_vmcb->control.intercept_dr_read;
2052 svm->vmcb->control.intercept_dr_write |=
2053 nested_vmcb->control.intercept_dr_write;
2054 svm->vmcb->control.intercept_exceptions |=
2055 nested_vmcb->control.intercept_exceptions;
2056
2057 svm->vmcb->control.intercept |= nested_vmcb->control.intercept;
2058
2059 svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl;
1854 svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; 2060 svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
1855 svm->vmcb->control.int_state = nested_vmcb->control.int_state; 2061 svm->vmcb->control.int_state = nested_vmcb->control.int_state;
1856 svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; 2062 svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
1857 svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; 2063 svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
1858 svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; 2064 svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
1859 2065
1860 nested_svm_unmap(nested_vmcb, KM_USER0); 2066 nested_svm_unmap(page);
2067
2068 /* nested_vmcb is our indicator if nested SVM is activated */
2069 svm->nested.vmcb = vmcb_gpa;
1861 2070
1862 enable_gif(svm); 2071 enable_gif(svm);
1863 2072
@@ -1883,6 +2092,7 @@ static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
1883static int vmload_interception(struct vcpu_svm *svm) 2092static int vmload_interception(struct vcpu_svm *svm)
1884{ 2093{
1885 struct vmcb *nested_vmcb; 2094 struct vmcb *nested_vmcb;
2095 struct page *page;
1886 2096
1887 if (nested_svm_check_permissions(svm)) 2097 if (nested_svm_check_permissions(svm))
1888 return 1; 2098 return 1;
@@ -1890,12 +2100,12 @@ static int vmload_interception(struct vcpu_svm *svm)
1890 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 2100 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1891 skip_emulated_instruction(&svm->vcpu); 2101 skip_emulated_instruction(&svm->vcpu);
1892 2102
1893 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); 2103 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
1894 if (!nested_vmcb) 2104 if (!nested_vmcb)
1895 return 1; 2105 return 1;
1896 2106
1897 nested_svm_vmloadsave(nested_vmcb, svm->vmcb); 2107 nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
1898 nested_svm_unmap(nested_vmcb, KM_USER0); 2108 nested_svm_unmap(page);
1899 2109
1900 return 1; 2110 return 1;
1901} 2111}
@@ -1903,6 +2113,7 @@ static int vmload_interception(struct vcpu_svm *svm)
1903static int vmsave_interception(struct vcpu_svm *svm) 2113static int vmsave_interception(struct vcpu_svm *svm)
1904{ 2114{
1905 struct vmcb *nested_vmcb; 2115 struct vmcb *nested_vmcb;
2116 struct page *page;
1906 2117
1907 if (nested_svm_check_permissions(svm)) 2118 if (nested_svm_check_permissions(svm))
1908 return 1; 2119 return 1;
@@ -1910,12 +2121,12 @@ static int vmsave_interception(struct vcpu_svm *svm)
1910 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 2121 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1911 skip_emulated_instruction(&svm->vcpu); 2122 skip_emulated_instruction(&svm->vcpu);
1912 2123
1913 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); 2124 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
1914 if (!nested_vmcb) 2125 if (!nested_vmcb)
1915 return 1; 2126 return 1;
1916 2127
1917 nested_svm_vmloadsave(svm->vmcb, nested_vmcb); 2128 nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
1918 nested_svm_unmap(nested_vmcb, KM_USER0); 2129 nested_svm_unmap(page);
1919 2130
1920 return 1; 2131 return 1;
1921} 2132}
@@ -2018,6 +2229,8 @@ static int task_switch_interception(struct vcpu_svm *svm)
2018 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK; 2229 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK;
2019 uint32_t idt_v = 2230 uint32_t idt_v =
2020 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID; 2231 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID;
2232 bool has_error_code = false;
2233 u32 error_code = 0;
2021 2234
2022 tss_selector = (u16)svm->vmcb->control.exit_info_1; 2235 tss_selector = (u16)svm->vmcb->control.exit_info_1;
2023 2236
@@ -2038,6 +2251,12 @@ static int task_switch_interception(struct vcpu_svm *svm)
2038 svm->vcpu.arch.nmi_injected = false; 2251 svm->vcpu.arch.nmi_injected = false;
2039 break; 2252 break;
2040 case SVM_EXITINTINFO_TYPE_EXEPT: 2253 case SVM_EXITINTINFO_TYPE_EXEPT:
2254 if (svm->vmcb->control.exit_info_2 &
2255 (1ULL << SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE)) {
2256 has_error_code = true;
2257 error_code =
2258 (u32)svm->vmcb->control.exit_info_2;
2259 }
2041 kvm_clear_exception_queue(&svm->vcpu); 2260 kvm_clear_exception_queue(&svm->vcpu);
2042 break; 2261 break;
2043 case SVM_EXITINTINFO_TYPE_INTR: 2262 case SVM_EXITINTINFO_TYPE_INTR:
@@ -2054,7 +2273,14 @@ static int task_switch_interception(struct vcpu_svm *svm)
2054 (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) 2273 (int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
2055 skip_emulated_instruction(&svm->vcpu); 2274 skip_emulated_instruction(&svm->vcpu);
2056 2275
2057 return kvm_task_switch(&svm->vcpu, tss_selector, reason); 2276 if (kvm_task_switch(&svm->vcpu, tss_selector, reason,
2277 has_error_code, error_code) == EMULATE_FAIL) {
2278 svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
2279 svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
2280 svm->vcpu.run->internal.ndata = 0;
2281 return 0;
2282 }
2283 return 1;
2058} 2284}
2059 2285
2060static int cpuid_interception(struct vcpu_svm *svm) 2286static int cpuid_interception(struct vcpu_svm *svm)
@@ -2145,9 +2371,11 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
2145 case MSR_IA32_SYSENTER_ESP: 2371 case MSR_IA32_SYSENTER_ESP:
2146 *data = svm->sysenter_esp; 2372 *data = svm->sysenter_esp;
2147 break; 2373 break;
2148 /* Nobody will change the following 5 values in the VMCB so 2374 /*
2149 we can safely return them on rdmsr. They will always be 0 2375 * Nobody will change the following 5 values in the VMCB so we can
2150 until LBRV is implemented. */ 2376 * safely return them on rdmsr. They will always be 0 until LBRV is
2377 * implemented.
2378 */
2151 case MSR_IA32_DEBUGCTLMSR: 2379 case MSR_IA32_DEBUGCTLMSR:
2152 *data = svm->vmcb->save.dbgctl; 2380 *data = svm->vmcb->save.dbgctl;
2153 break; 2381 break;
@@ -2167,7 +2395,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
2167 *data = svm->nested.hsave_msr; 2395 *data = svm->nested.hsave_msr;
2168 break; 2396 break;
2169 case MSR_VM_CR: 2397 case MSR_VM_CR:
2170 *data = 0; 2398 *data = svm->nested.vm_cr_msr;
2171 break; 2399 break;
2172 case MSR_IA32_UCODE_REV: 2400 case MSR_IA32_UCODE_REV:
2173 *data = 0x01000065; 2401 *data = 0x01000065;
@@ -2197,6 +2425,31 @@ static int rdmsr_interception(struct vcpu_svm *svm)
2197 return 1; 2425 return 1;
2198} 2426}
2199 2427
2428static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
2429{
2430 struct vcpu_svm *svm = to_svm(vcpu);
2431 int svm_dis, chg_mask;
2432
2433 if (data & ~SVM_VM_CR_VALID_MASK)
2434 return 1;
2435
2436 chg_mask = SVM_VM_CR_VALID_MASK;
2437
2438 if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK)
2439 chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK);
2440
2441 svm->nested.vm_cr_msr &= ~chg_mask;
2442 svm->nested.vm_cr_msr |= (data & chg_mask);
2443
2444 svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK;
2445
2446 /* check for svm_disable while efer.svme is set */
2447 if (svm_dis && (vcpu->arch.efer & EFER_SVME))
2448 return 1;
2449
2450 return 0;
2451}
2452
2200static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) 2453static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
2201{ 2454{
2202 struct vcpu_svm *svm = to_svm(vcpu); 2455 struct vcpu_svm *svm = to_svm(vcpu);
@@ -2263,6 +2516,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
2263 svm->nested.hsave_msr = data; 2516 svm->nested.hsave_msr = data;
2264 break; 2517 break;
2265 case MSR_VM_CR: 2518 case MSR_VM_CR:
2519 return svm_set_vm_cr(vcpu, data);
2266 case MSR_VM_IGNNE: 2520 case MSR_VM_IGNNE:
2267 pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); 2521 pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
2268 break; 2522 break;
@@ -2326,16 +2580,16 @@ static int pause_interception(struct vcpu_svm *svm)
2326} 2580}
2327 2581
2328static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { 2582static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
2329 [SVM_EXIT_READ_CR0] = emulate_on_interception, 2583 [SVM_EXIT_READ_CR0] = emulate_on_interception,
2330 [SVM_EXIT_READ_CR3] = emulate_on_interception, 2584 [SVM_EXIT_READ_CR3] = emulate_on_interception,
2331 [SVM_EXIT_READ_CR4] = emulate_on_interception, 2585 [SVM_EXIT_READ_CR4] = emulate_on_interception,
2332 [SVM_EXIT_READ_CR8] = emulate_on_interception, 2586 [SVM_EXIT_READ_CR8] = emulate_on_interception,
2333 [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, 2587 [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception,
2334 [SVM_EXIT_WRITE_CR0] = emulate_on_interception, 2588 [SVM_EXIT_WRITE_CR0] = emulate_on_interception,
2335 [SVM_EXIT_WRITE_CR3] = emulate_on_interception, 2589 [SVM_EXIT_WRITE_CR3] = emulate_on_interception,
2336 [SVM_EXIT_WRITE_CR4] = emulate_on_interception, 2590 [SVM_EXIT_WRITE_CR4] = emulate_on_interception,
2337 [SVM_EXIT_WRITE_CR8] = cr8_write_interception, 2591 [SVM_EXIT_WRITE_CR8] = cr8_write_interception,
2338 [SVM_EXIT_READ_DR0] = emulate_on_interception, 2592 [SVM_EXIT_READ_DR0] = emulate_on_interception,
2339 [SVM_EXIT_READ_DR1] = emulate_on_interception, 2593 [SVM_EXIT_READ_DR1] = emulate_on_interception,
2340 [SVM_EXIT_READ_DR2] = emulate_on_interception, 2594 [SVM_EXIT_READ_DR2] = emulate_on_interception,
2341 [SVM_EXIT_READ_DR3] = emulate_on_interception, 2595 [SVM_EXIT_READ_DR3] = emulate_on_interception,
@@ -2354,15 +2608,14 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
2354 [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, 2608 [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception,
2355 [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, 2609 [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception,
2356 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, 2610 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
2357 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, 2611 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
2358 [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, 2612 [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception,
2359 [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, 2613 [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception,
2360 [SVM_EXIT_INTR] = intr_interception, 2614 [SVM_EXIT_INTR] = intr_interception,
2361 [SVM_EXIT_NMI] = nmi_interception, 2615 [SVM_EXIT_NMI] = nmi_interception,
2362 [SVM_EXIT_SMI] = nop_on_interception, 2616 [SVM_EXIT_SMI] = nop_on_interception,
2363 [SVM_EXIT_INIT] = nop_on_interception, 2617 [SVM_EXIT_INIT] = nop_on_interception,
2364 [SVM_EXIT_VINTR] = interrupt_window_interception, 2618 [SVM_EXIT_VINTR] = interrupt_window_interception,
2365 /* [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, */
2366 [SVM_EXIT_CPUID] = cpuid_interception, 2619 [SVM_EXIT_CPUID] = cpuid_interception,
2367 [SVM_EXIT_IRET] = iret_interception, 2620 [SVM_EXIT_IRET] = iret_interception,
2368 [SVM_EXIT_INVD] = emulate_on_interception, 2621 [SVM_EXIT_INVD] = emulate_on_interception,
@@ -2370,7 +2623,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
2370 [SVM_EXIT_HLT] = halt_interception, 2623 [SVM_EXIT_HLT] = halt_interception,
2371 [SVM_EXIT_INVLPG] = invlpg_interception, 2624 [SVM_EXIT_INVLPG] = invlpg_interception,
2372 [SVM_EXIT_INVLPGA] = invlpga_interception, 2625 [SVM_EXIT_INVLPGA] = invlpga_interception,
2373 [SVM_EXIT_IOIO] = io_interception, 2626 [SVM_EXIT_IOIO] = io_interception,
2374 [SVM_EXIT_MSR] = msr_interception, 2627 [SVM_EXIT_MSR] = msr_interception,
2375 [SVM_EXIT_TASK_SWITCH] = task_switch_interception, 2628 [SVM_EXIT_TASK_SWITCH] = task_switch_interception,
2376 [SVM_EXIT_SHUTDOWN] = shutdown_interception, 2629 [SVM_EXIT_SHUTDOWN] = shutdown_interception,
@@ -2393,7 +2646,12 @@ static int handle_exit(struct kvm_vcpu *vcpu)
2393 struct kvm_run *kvm_run = vcpu->run; 2646 struct kvm_run *kvm_run = vcpu->run;
2394 u32 exit_code = svm->vmcb->control.exit_code; 2647 u32 exit_code = svm->vmcb->control.exit_code;
2395 2648
2396 trace_kvm_exit(exit_code, svm->vmcb->save.rip); 2649 trace_kvm_exit(exit_code, vcpu);
2650
2651 if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK))
2652 vcpu->arch.cr0 = svm->vmcb->save.cr0;
2653 if (npt_enabled)
2654 vcpu->arch.cr3 = svm->vmcb->save.cr3;
2397 2655
2398 if (unlikely(svm->nested.exit_required)) { 2656 if (unlikely(svm->nested.exit_required)) {
2399 nested_svm_vmexit(svm); 2657 nested_svm_vmexit(svm);
@@ -2422,11 +2680,6 @@ static int handle_exit(struct kvm_vcpu *vcpu)
2422 2680
2423 svm_complete_interrupts(svm); 2681 svm_complete_interrupts(svm);
2424 2682
2425 if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK))
2426 vcpu->arch.cr0 = svm->vmcb->save.cr0;
2427 if (npt_enabled)
2428 vcpu->arch.cr3 = svm->vmcb->save.cr3;
2429
2430 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { 2683 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
2431 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; 2684 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
2432 kvm_run->fail_entry.hardware_entry_failure_reason 2685 kvm_run->fail_entry.hardware_entry_failure_reason
@@ -2511,6 +2764,9 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
2511{ 2764{
2512 struct vcpu_svm *svm = to_svm(vcpu); 2765 struct vcpu_svm *svm = to_svm(vcpu);
2513 2766
2767 if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK))
2768 return;
2769
2514 if (irr == -1) 2770 if (irr == -1)
2515 return; 2771 return;
2516 2772
@@ -2522,8 +2778,12 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
2522{ 2778{
2523 struct vcpu_svm *svm = to_svm(vcpu); 2779 struct vcpu_svm *svm = to_svm(vcpu);
2524 struct vmcb *vmcb = svm->vmcb; 2780 struct vmcb *vmcb = svm->vmcb;
2525 return !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && 2781 int ret;
2526 !(svm->vcpu.arch.hflags & HF_NMI_MASK); 2782 ret = !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
2783 !(svm->vcpu.arch.hflags & HF_NMI_MASK);
2784 ret = ret && gif_set(svm) && nested_svm_nmi(svm);
2785
2786 return ret;
2527} 2787}
2528 2788
2529static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) 2789static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
@@ -2568,13 +2828,13 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
2568{ 2828{
2569 struct vcpu_svm *svm = to_svm(vcpu); 2829 struct vcpu_svm *svm = to_svm(vcpu);
2570 2830
2571 nested_svm_intr(svm); 2831 /*
2572 2832 * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes
2573 /* In case GIF=0 we can't rely on the CPU to tell us when 2833 * 1, because that's a separate STGI/VMRUN intercept. The next time we
2574 * GIF becomes 1, because that's a separate STGI/VMRUN intercept. 2834 * get that intercept, this function will be called again though and
2575 * The next time we get that intercept, this function will be 2835 * we'll get the vintr intercept.
2576 * called again though and we'll get the vintr intercept. */ 2836 */
2577 if (gif_set(svm)) { 2837 if (gif_set(svm) && nested_svm_intr(svm)) {
2578 svm_set_vintr(svm); 2838 svm_set_vintr(svm);
2579 svm_inject_irq(svm, 0x0); 2839 svm_inject_irq(svm, 0x0);
2580 } 2840 }
@@ -2588,9 +2848,10 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
2588 == HF_NMI_MASK) 2848 == HF_NMI_MASK)
2589 return; /* IRET will cause a vm exit */ 2849 return; /* IRET will cause a vm exit */
2590 2850
2591 /* Something prevents NMI from been injected. Single step over 2851 /*
2592 possible problem (IRET or exception injection or interrupt 2852 * Something prevents NMI from been injected. Single step over possible
2593 shadow) */ 2853 * problem (IRET or exception injection or interrupt shadow)
2854 */
2594 svm->nmi_singlestep = true; 2855 svm->nmi_singlestep = true;
2595 svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); 2856 svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
2596 update_db_intercept(vcpu); 2857 update_db_intercept(vcpu);
@@ -2614,6 +2875,9 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
2614{ 2875{
2615 struct vcpu_svm *svm = to_svm(vcpu); 2876 struct vcpu_svm *svm = to_svm(vcpu);
2616 2877
2878 if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK))
2879 return;
2880
2617 if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) { 2881 if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) {
2618 int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; 2882 int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
2619 kvm_set_cr8(vcpu, cr8); 2883 kvm_set_cr8(vcpu, cr8);
@@ -2625,6 +2889,9 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
2625 struct vcpu_svm *svm = to_svm(vcpu); 2889 struct vcpu_svm *svm = to_svm(vcpu);
2626 u64 cr8; 2890 u64 cr8;
2627 2891
2892 if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK))
2893 return;
2894
2628 cr8 = kvm_get_cr8(vcpu); 2895 cr8 = kvm_get_cr8(vcpu);
2629 svm->vmcb->control.int_ctl &= ~V_TPR_MASK; 2896 svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
2630 svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; 2897 svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
@@ -2635,6 +2902,9 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
2635 u8 vector; 2902 u8 vector;
2636 int type; 2903 int type;
2637 u32 exitintinfo = svm->vmcb->control.exit_int_info; 2904 u32 exitintinfo = svm->vmcb->control.exit_int_info;
2905 unsigned int3_injected = svm->int3_injected;
2906
2907 svm->int3_injected = 0;
2638 2908
2639 if (svm->vcpu.arch.hflags & HF_IRET_MASK) 2909 if (svm->vcpu.arch.hflags & HF_IRET_MASK)
2640 svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); 2910 svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
@@ -2654,18 +2924,25 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
2654 svm->vcpu.arch.nmi_injected = true; 2924 svm->vcpu.arch.nmi_injected = true;
2655 break; 2925 break;
2656 case SVM_EXITINTINFO_TYPE_EXEPT: 2926 case SVM_EXITINTINFO_TYPE_EXEPT:
2657 /* In case of software exception do not reinject an exception 2927 /*
2658 vector, but re-execute and instruction instead */ 2928 * In case of software exceptions, do not reinject the vector,
2659 if (is_nested(svm)) 2929 * but re-execute the instruction instead. Rewind RIP first
2660 break; 2930 * if we emulated INT3 before.
2661 if (kvm_exception_is_soft(vector)) 2931 */
2932 if (kvm_exception_is_soft(vector)) {
2933 if (vector == BP_VECTOR && int3_injected &&
2934 kvm_is_linear_rip(&svm->vcpu, svm->int3_rip))
2935 kvm_rip_write(&svm->vcpu,
2936 kvm_rip_read(&svm->vcpu) -
2937 int3_injected);
2662 break; 2938 break;
2939 }
2663 if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) { 2940 if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
2664 u32 err = svm->vmcb->control.exit_int_info_err; 2941 u32 err = svm->vmcb->control.exit_int_info_err;
2665 kvm_queue_exception_e(&svm->vcpu, vector, err); 2942 kvm_requeue_exception_e(&svm->vcpu, vector, err);
2666 2943
2667 } else 2944 } else
2668 kvm_queue_exception(&svm->vcpu, vector); 2945 kvm_requeue_exception(&svm->vcpu, vector);
2669 break; 2946 break;
2670 case SVM_EXITINTINFO_TYPE_INTR: 2947 case SVM_EXITINTINFO_TYPE_INTR:
2671 kvm_queue_interrupt(&svm->vcpu, vector, false); 2948 kvm_queue_interrupt(&svm->vcpu, vector, false);
@@ -2688,6 +2965,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
2688 u16 gs_selector; 2965 u16 gs_selector;
2689 u16 ldt_selector; 2966 u16 ldt_selector;
2690 2967
2968 svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
2969 svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
2970 svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
2971
2691 /* 2972 /*
2692 * A vmexit emulation is required before the vcpu can be executed 2973 * A vmexit emulation is required before the vcpu can be executed
2693 * again. 2974 * again.
@@ -2695,10 +2976,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
2695 if (unlikely(svm->nested.exit_required)) 2976 if (unlikely(svm->nested.exit_required))
2696 return; 2977 return;
2697 2978
2698 svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
2699 svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
2700 svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
2701
2702 pre_svm_run(svm); 2979 pre_svm_run(svm);
2703 2980
2704 sync_lapic_to_cr8(vcpu); 2981 sync_lapic_to_cr8(vcpu);
@@ -2879,25 +3156,39 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu)
2879{ 3156{
2880} 3157}
2881 3158
3159static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
3160{
3161 switch (func) {
3162 case 0x8000000A:
3163 entry->eax = 1; /* SVM revision 1 */
3164 entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper
3165 ASID emulation to nested SVM */
3166 entry->ecx = 0; /* Reserved */
3167 entry->edx = 0; /* Do not support any additional features */
3168
3169 break;
3170 }
3171}
3172
2882static const struct trace_print_flags svm_exit_reasons_str[] = { 3173static const struct trace_print_flags svm_exit_reasons_str[] = {
2883 { SVM_EXIT_READ_CR0, "read_cr0" }, 3174 { SVM_EXIT_READ_CR0, "read_cr0" },
2884 { SVM_EXIT_READ_CR3, "read_cr3" }, 3175 { SVM_EXIT_READ_CR3, "read_cr3" },
2885 { SVM_EXIT_READ_CR4, "read_cr4" }, 3176 { SVM_EXIT_READ_CR4, "read_cr4" },
2886 { SVM_EXIT_READ_CR8, "read_cr8" }, 3177 { SVM_EXIT_READ_CR8, "read_cr8" },
2887 { SVM_EXIT_WRITE_CR0, "write_cr0" }, 3178 { SVM_EXIT_WRITE_CR0, "write_cr0" },
2888 { SVM_EXIT_WRITE_CR3, "write_cr3" }, 3179 { SVM_EXIT_WRITE_CR3, "write_cr3" },
2889 { SVM_EXIT_WRITE_CR4, "write_cr4" }, 3180 { SVM_EXIT_WRITE_CR4, "write_cr4" },
2890 { SVM_EXIT_WRITE_CR8, "write_cr8" }, 3181 { SVM_EXIT_WRITE_CR8, "write_cr8" },
2891 { SVM_EXIT_READ_DR0, "read_dr0" }, 3182 { SVM_EXIT_READ_DR0, "read_dr0" },
2892 { SVM_EXIT_READ_DR1, "read_dr1" }, 3183 { SVM_EXIT_READ_DR1, "read_dr1" },
2893 { SVM_EXIT_READ_DR2, "read_dr2" }, 3184 { SVM_EXIT_READ_DR2, "read_dr2" },
2894 { SVM_EXIT_READ_DR3, "read_dr3" }, 3185 { SVM_EXIT_READ_DR3, "read_dr3" },
2895 { SVM_EXIT_WRITE_DR0, "write_dr0" }, 3186 { SVM_EXIT_WRITE_DR0, "write_dr0" },
2896 { SVM_EXIT_WRITE_DR1, "write_dr1" }, 3187 { SVM_EXIT_WRITE_DR1, "write_dr1" },
2897 { SVM_EXIT_WRITE_DR2, "write_dr2" }, 3188 { SVM_EXIT_WRITE_DR2, "write_dr2" },
2898 { SVM_EXIT_WRITE_DR3, "write_dr3" }, 3189 { SVM_EXIT_WRITE_DR3, "write_dr3" },
2899 { SVM_EXIT_WRITE_DR5, "write_dr5" }, 3190 { SVM_EXIT_WRITE_DR5, "write_dr5" },
2900 { SVM_EXIT_WRITE_DR7, "write_dr7" }, 3191 { SVM_EXIT_WRITE_DR7, "write_dr7" },
2901 { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, 3192 { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" },
2902 { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, 3193 { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" },
2903 { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, 3194 { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" },
@@ -2946,8 +3237,10 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
2946{ 3237{
2947 struct vcpu_svm *svm = to_svm(vcpu); 3238 struct vcpu_svm *svm = to_svm(vcpu);
2948 3239
2949 update_cr0_intercept(svm);
2950 svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR; 3240 svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR;
3241 if (is_nested(svm))
3242 svm->nested.hsave->control.intercept_exceptions |= 1 << NM_VECTOR;
3243 update_cr0_intercept(svm);
2951} 3244}
2952 3245
2953static struct kvm_x86_ops svm_x86_ops = { 3246static struct kvm_x86_ops svm_x86_ops = {
@@ -2986,8 +3279,7 @@ static struct kvm_x86_ops svm_x86_ops = {
2986 .set_idt = svm_set_idt, 3279 .set_idt = svm_set_idt,
2987 .get_gdt = svm_get_gdt, 3280 .get_gdt = svm_get_gdt,
2988 .set_gdt = svm_set_gdt, 3281 .set_gdt = svm_set_gdt,
2989 .get_dr = svm_get_dr, 3282 .set_dr7 = svm_set_dr7,
2990 .set_dr = svm_set_dr,
2991 .cache_reg = svm_cache_reg, 3283 .cache_reg = svm_cache_reg,
2992 .get_rflags = svm_get_rflags, 3284 .get_rflags = svm_get_rflags,
2993 .set_rflags = svm_set_rflags, 3285 .set_rflags = svm_set_rflags,
@@ -3023,12 +3315,14 @@ static struct kvm_x86_ops svm_x86_ops = {
3023 .cpuid_update = svm_cpuid_update, 3315 .cpuid_update = svm_cpuid_update,
3024 3316
3025 .rdtscp_supported = svm_rdtscp_supported, 3317 .rdtscp_supported = svm_rdtscp_supported,
3318
3319 .set_supported_cpuid = svm_set_supported_cpuid,
3026}; 3320};
3027 3321
3028static int __init svm_init(void) 3322static int __init svm_init(void)
3029{ 3323{
3030 return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm), 3324 return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm),
3031 THIS_MODULE); 3325 __alignof__(struct vcpu_svm), THIS_MODULE);
3032} 3326}
3033 3327
3034static void __exit svm_exit(void) 3328static void __exit svm_exit(void)
diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c
index eea40439066c..4ddadb1a5ffe 100644
--- a/arch/x86/kvm/timer.c
+++ b/arch/x86/kvm/timer.c
@@ -12,7 +12,8 @@ static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer)
12 /* 12 /*
13 * There is a race window between reading and incrementing, but we do 13 * There is a race window between reading and incrementing, but we do
14 * not care about potentially loosing timer events in the !reinject 14 * not care about potentially loosing timer events in the !reinject
15 * case anyway. 15 * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked
16 * in vcpu_enter_guest.
16 */ 17 */
17 if (ktimer->reinject || !atomic_read(&ktimer->pending)) { 18 if (ktimer->reinject || !atomic_read(&ktimer->pending)) {
18 atomic_inc(&ktimer->pending); 19 atomic_inc(&ktimer->pending);
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 6ad30a29f044..a6544b8e7c0f 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -5,8 +5,6 @@
5 5
6#undef TRACE_SYSTEM 6#undef TRACE_SYSTEM
7#define TRACE_SYSTEM kvm 7#define TRACE_SYSTEM kvm
8#define TRACE_INCLUDE_PATH arch/x86/kvm
9#define TRACE_INCLUDE_FILE trace
10 8
11/* 9/*
12 * Tracepoint for guest mode entry. 10 * Tracepoint for guest mode entry.
@@ -184,8 +182,8 @@ TRACE_EVENT(kvm_apic,
184 * Tracepoint for kvm guest exit: 182 * Tracepoint for kvm guest exit:
185 */ 183 */
186TRACE_EVENT(kvm_exit, 184TRACE_EVENT(kvm_exit,
187 TP_PROTO(unsigned int exit_reason, unsigned long guest_rip), 185 TP_PROTO(unsigned int exit_reason, struct kvm_vcpu *vcpu),
188 TP_ARGS(exit_reason, guest_rip), 186 TP_ARGS(exit_reason, vcpu),
189 187
190 TP_STRUCT__entry( 188 TP_STRUCT__entry(
191 __field( unsigned int, exit_reason ) 189 __field( unsigned int, exit_reason )
@@ -194,7 +192,7 @@ TRACE_EVENT(kvm_exit,
194 192
195 TP_fast_assign( 193 TP_fast_assign(
196 __entry->exit_reason = exit_reason; 194 __entry->exit_reason = exit_reason;
197 __entry->guest_rip = guest_rip; 195 __entry->guest_rip = kvm_rip_read(vcpu);
198 ), 196 ),
199 197
200 TP_printk("reason %s rip 0x%lx", 198 TP_printk("reason %s rip 0x%lx",
@@ -221,6 +219,38 @@ TRACE_EVENT(kvm_inj_virq,
221 TP_printk("irq %u", __entry->irq) 219 TP_printk("irq %u", __entry->irq)
222); 220);
223 221
222#define EXS(x) { x##_VECTOR, "#" #x }
223
224#define kvm_trace_sym_exc \
225 EXS(DE), EXS(DB), EXS(BP), EXS(OF), EXS(BR), EXS(UD), EXS(NM), \
226 EXS(DF), EXS(TS), EXS(NP), EXS(SS), EXS(GP), EXS(PF), \
227 EXS(MF), EXS(MC)
228
229/*
230 * Tracepoint for kvm interrupt injection:
231 */
232TRACE_EVENT(kvm_inj_exception,
233 TP_PROTO(unsigned exception, bool has_error, unsigned error_code),
234 TP_ARGS(exception, has_error, error_code),
235
236 TP_STRUCT__entry(
237 __field( u8, exception )
238 __field( u8, has_error )
239 __field( u32, error_code )
240 ),
241
242 TP_fast_assign(
243 __entry->exception = exception;
244 __entry->has_error = has_error;
245 __entry->error_code = error_code;
246 ),
247
248 TP_printk("%s (0x%x)",
249 __print_symbolic(__entry->exception, kvm_trace_sym_exc),
250 /* FIXME: don't print error_code if not present */
251 __entry->has_error ? __entry->error_code : 0)
252);
253
224/* 254/*
225 * Tracepoint for page fault. 255 * Tracepoint for page fault.
226 */ 256 */
@@ -413,12 +443,34 @@ TRACE_EVENT(kvm_nested_vmrun,
413 ), 443 ),
414 444
415 TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x " 445 TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x "
416 "event_inj: 0x%08x npt: %s\n", 446 "event_inj: 0x%08x npt: %s",
417 __entry->rip, __entry->vmcb, __entry->nested_rip, 447 __entry->rip, __entry->vmcb, __entry->nested_rip,
418 __entry->int_ctl, __entry->event_inj, 448 __entry->int_ctl, __entry->event_inj,
419 __entry->npt ? "on" : "off") 449 __entry->npt ? "on" : "off")
420); 450);
421 451
452TRACE_EVENT(kvm_nested_intercepts,
453 TP_PROTO(__u16 cr_read, __u16 cr_write, __u32 exceptions, __u64 intercept),
454 TP_ARGS(cr_read, cr_write, exceptions, intercept),
455
456 TP_STRUCT__entry(
457 __field( __u16, cr_read )
458 __field( __u16, cr_write )
459 __field( __u32, exceptions )
460 __field( __u64, intercept )
461 ),
462
463 TP_fast_assign(
464 __entry->cr_read = cr_read;
465 __entry->cr_write = cr_write;
466 __entry->exceptions = exceptions;
467 __entry->intercept = intercept;
468 ),
469
470 TP_printk("cr_read: %04x cr_write: %04x excp: %08x intercept: %016llx",
471 __entry->cr_read, __entry->cr_write, __entry->exceptions,
472 __entry->intercept)
473);
422/* 474/*
423 * Tracepoint for #VMEXIT while nested 475 * Tracepoint for #VMEXIT while nested
424 */ 476 */
@@ -447,7 +499,7 @@ TRACE_EVENT(kvm_nested_vmexit,
447 __entry->exit_int_info_err = exit_int_info_err; 499 __entry->exit_int_info_err = exit_int_info_err;
448 ), 500 ),
449 TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx " 501 TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx "
450 "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", 502 "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x",
451 __entry->rip, 503 __entry->rip,
452 ftrace_print_symbols_seq(p, __entry->exit_code, 504 ftrace_print_symbols_seq(p, __entry->exit_code,
453 kvm_x86_ops->exit_reasons_str), 505 kvm_x86_ops->exit_reasons_str),
@@ -482,7 +534,7 @@ TRACE_EVENT(kvm_nested_vmexit_inject,
482 ), 534 ),
483 535
484 TP_printk("reason: %s ext_inf1: 0x%016llx " 536 TP_printk("reason: %s ext_inf1: 0x%016llx "
485 "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", 537 "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x",
486 ftrace_print_symbols_seq(p, __entry->exit_code, 538 ftrace_print_symbols_seq(p, __entry->exit_code,
487 kvm_x86_ops->exit_reasons_str), 539 kvm_x86_ops->exit_reasons_str),
488 __entry->exit_info1, __entry->exit_info2, 540 __entry->exit_info1, __entry->exit_info2,
@@ -504,7 +556,7 @@ TRACE_EVENT(kvm_nested_intr_vmexit,
504 __entry->rip = rip 556 __entry->rip = rip
505 ), 557 ),
506 558
507 TP_printk("rip: 0x%016llx\n", __entry->rip) 559 TP_printk("rip: 0x%016llx", __entry->rip)
508); 560);
509 561
510/* 562/*
@@ -526,7 +578,7 @@ TRACE_EVENT(kvm_invlpga,
526 __entry->address = address; 578 __entry->address = address;
527 ), 579 ),
528 580
529 TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx\n", 581 TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx",
530 __entry->rip, __entry->asid, __entry->address) 582 __entry->rip, __entry->asid, __entry->address)
531); 583);
532 584
@@ -547,11 +599,102 @@ TRACE_EVENT(kvm_skinit,
547 __entry->slb = slb; 599 __entry->slb = slb;
548 ), 600 ),
549 601
550 TP_printk("rip: 0x%016llx slb: 0x%08x\n", 602 TP_printk("rip: 0x%016llx slb: 0x%08x",
551 __entry->rip, __entry->slb) 603 __entry->rip, __entry->slb)
552); 604);
553 605
606#define __print_insn(insn, ilen) ({ \
607 int i; \
608 const char *ret = p->buffer + p->len; \
609 \
610 for (i = 0; i < ilen; ++i) \
611 trace_seq_printf(p, " %02x", insn[i]); \
612 trace_seq_printf(p, "%c", 0); \
613 ret; \
614 })
615
616#define KVM_EMUL_INSN_F_CR0_PE (1 << 0)
617#define KVM_EMUL_INSN_F_EFL_VM (1 << 1)
618#define KVM_EMUL_INSN_F_CS_D (1 << 2)
619#define KVM_EMUL_INSN_F_CS_L (1 << 3)
620
621#define kvm_trace_symbol_emul_flags \
622 { 0, "real" }, \
623 { KVM_EMUL_INSN_F_CR0_PE \
624 | KVM_EMUL_INSN_F_EFL_VM, "vm16" }, \
625 { KVM_EMUL_INSN_F_CR0_PE, "prot16" }, \
626 { KVM_EMUL_INSN_F_CR0_PE \
627 | KVM_EMUL_INSN_F_CS_D, "prot32" }, \
628 { KVM_EMUL_INSN_F_CR0_PE \
629 | KVM_EMUL_INSN_F_CS_L, "prot64" }
630
631#define kei_decode_mode(mode) ({ \
632 u8 flags = 0xff; \
633 switch (mode) { \
634 case X86EMUL_MODE_REAL: \
635 flags = 0; \
636 break; \
637 case X86EMUL_MODE_VM86: \
638 flags = KVM_EMUL_INSN_F_EFL_VM; \
639 break; \
640 case X86EMUL_MODE_PROT16: \
641 flags = KVM_EMUL_INSN_F_CR0_PE; \
642 break; \
643 case X86EMUL_MODE_PROT32: \
644 flags = KVM_EMUL_INSN_F_CR0_PE \
645 | KVM_EMUL_INSN_F_CS_D; \
646 break; \
647 case X86EMUL_MODE_PROT64: \
648 flags = KVM_EMUL_INSN_F_CR0_PE \
649 | KVM_EMUL_INSN_F_CS_L; \
650 break; \
651 } \
652 flags; \
653 })
654
655TRACE_EVENT(kvm_emulate_insn,
656 TP_PROTO(struct kvm_vcpu *vcpu, __u8 failed),
657 TP_ARGS(vcpu, failed),
658
659 TP_STRUCT__entry(
660 __field( __u64, rip )
661 __field( __u32, csbase )
662 __field( __u8, len )
663 __array( __u8, insn, 15 )
664 __field( __u8, flags )
665 __field( __u8, failed )
666 ),
667
668 TP_fast_assign(
669 __entry->rip = vcpu->arch.emulate_ctxt.decode.fetch.start;
670 __entry->csbase = kvm_x86_ops->get_segment_base(vcpu, VCPU_SREG_CS);
671 __entry->len = vcpu->arch.emulate_ctxt.decode.eip
672 - vcpu->arch.emulate_ctxt.decode.fetch.start;
673 memcpy(__entry->insn,
674 vcpu->arch.emulate_ctxt.decode.fetch.data,
675 15);
676 __entry->flags = kei_decode_mode(vcpu->arch.emulate_ctxt.mode);
677 __entry->failed = failed;
678 ),
679
680 TP_printk("%x:%llx:%s (%s)%s",
681 __entry->csbase, __entry->rip,
682 __print_insn(__entry->insn, __entry->len),
683 __print_symbolic(__entry->flags,
684 kvm_trace_symbol_emul_flags),
685 __entry->failed ? " failed" : ""
686 )
687 );
688
689#define trace_kvm_emulate_insn_start(vcpu) trace_kvm_emulate_insn(vcpu, 0)
690#define trace_kvm_emulate_insn_failed(vcpu) trace_kvm_emulate_insn(vcpu, 1)
691
554#endif /* _TRACE_KVM_H */ 692#endif /* _TRACE_KVM_H */
555 693
694#undef TRACE_INCLUDE_PATH
695#define TRACE_INCLUDE_PATH arch/x86/kvm
696#undef TRACE_INCLUDE_FILE
697#define TRACE_INCLUDE_FILE trace
698
556/* This part must be outside protection */ 699/* This part must be outside protection */
557#include <trace/define_trace.h> 700#include <trace/define_trace.h>
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index edca080407a5..859a01a07dbf 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -27,6 +27,7 @@
27#include <linux/moduleparam.h> 27#include <linux/moduleparam.h>
28#include <linux/ftrace_event.h> 28#include <linux/ftrace_event.h>
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include <linux/tboot.h>
30#include "kvm_cache_regs.h" 31#include "kvm_cache_regs.h"
31#include "x86.h" 32#include "x86.h"
32 33
@@ -98,6 +99,8 @@ module_param(ple_gap, int, S_IRUGO);
98static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; 99static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
99module_param(ple_window, int, S_IRUGO); 100module_param(ple_window, int, S_IRUGO);
100 101
102#define NR_AUTOLOAD_MSRS 1
103
101struct vmcs { 104struct vmcs {
102 u32 revision_id; 105 u32 revision_id;
103 u32 abort; 106 u32 abort;
@@ -125,6 +128,11 @@ struct vcpu_vmx {
125 u64 msr_guest_kernel_gs_base; 128 u64 msr_guest_kernel_gs_base;
126#endif 129#endif
127 struct vmcs *vmcs; 130 struct vmcs *vmcs;
131 struct msr_autoload {
132 unsigned nr;
133 struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS];
134 struct vmx_msr_entry host[NR_AUTOLOAD_MSRS];
135 } msr_autoload;
128 struct { 136 struct {
129 int loaded; 137 int loaded;
130 u16 fs_sel, gs_sel, ldt_sel; 138 u16 fs_sel, gs_sel, ldt_sel;
@@ -234,56 +242,56 @@ static const u32 vmx_msr_index[] = {
234}; 242};
235#define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) 243#define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)
236 244
237static inline int is_page_fault(u32 intr_info) 245static inline bool is_page_fault(u32 intr_info)
238{ 246{
239 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | 247 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
240 INTR_INFO_VALID_MASK)) == 248 INTR_INFO_VALID_MASK)) ==
241 (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); 249 (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK);
242} 250}
243 251
244static inline int is_no_device(u32 intr_info) 252static inline bool is_no_device(u32 intr_info)
245{ 253{
246 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | 254 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
247 INTR_INFO_VALID_MASK)) == 255 INTR_INFO_VALID_MASK)) ==
248 (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); 256 (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK);
249} 257}
250 258
251static inline int is_invalid_opcode(u32 intr_info) 259static inline bool is_invalid_opcode(u32 intr_info)
252{ 260{
253 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | 261 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
254 INTR_INFO_VALID_MASK)) == 262 INTR_INFO_VALID_MASK)) ==
255 (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK); 263 (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK);
256} 264}
257 265
258static inline int is_external_interrupt(u32 intr_info) 266static inline bool is_external_interrupt(u32 intr_info)
259{ 267{
260 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) 268 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
261 == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); 269 == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
262} 270}
263 271
264static inline int is_machine_check(u32 intr_info) 272static inline bool is_machine_check(u32 intr_info)
265{ 273{
266 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | 274 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
267 INTR_INFO_VALID_MASK)) == 275 INTR_INFO_VALID_MASK)) ==
268 (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK); 276 (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK);
269} 277}
270 278
271static inline int cpu_has_vmx_msr_bitmap(void) 279static inline bool cpu_has_vmx_msr_bitmap(void)
272{ 280{
273 return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS; 281 return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS;
274} 282}
275 283
276static inline int cpu_has_vmx_tpr_shadow(void) 284static inline bool cpu_has_vmx_tpr_shadow(void)
277{ 285{
278 return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW; 286 return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW;
279} 287}
280 288
281static inline int vm_need_tpr_shadow(struct kvm *kvm) 289static inline bool vm_need_tpr_shadow(struct kvm *kvm)
282{ 290{
283 return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)); 291 return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm));
284} 292}
285 293
286static inline int cpu_has_secondary_exec_ctrls(void) 294static inline bool cpu_has_secondary_exec_ctrls(void)
287{ 295{
288 return vmcs_config.cpu_based_exec_ctrl & 296 return vmcs_config.cpu_based_exec_ctrl &
289 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; 297 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
@@ -303,80 +311,80 @@ static inline bool cpu_has_vmx_flexpriority(void)
303 311
304static inline bool cpu_has_vmx_ept_execute_only(void) 312static inline bool cpu_has_vmx_ept_execute_only(void)
305{ 313{
306 return !!(vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT); 314 return vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT;
307} 315}
308 316
309static inline bool cpu_has_vmx_eptp_uncacheable(void) 317static inline bool cpu_has_vmx_eptp_uncacheable(void)
310{ 318{
311 return !!(vmx_capability.ept & VMX_EPTP_UC_BIT); 319 return vmx_capability.ept & VMX_EPTP_UC_BIT;
312} 320}
313 321
314static inline bool cpu_has_vmx_eptp_writeback(void) 322static inline bool cpu_has_vmx_eptp_writeback(void)
315{ 323{
316 return !!(vmx_capability.ept & VMX_EPTP_WB_BIT); 324 return vmx_capability.ept & VMX_EPTP_WB_BIT;
317} 325}
318 326
319static inline bool cpu_has_vmx_ept_2m_page(void) 327static inline bool cpu_has_vmx_ept_2m_page(void)
320{ 328{
321 return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT); 329 return vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT;
322} 330}
323 331
324static inline bool cpu_has_vmx_ept_1g_page(void) 332static inline bool cpu_has_vmx_ept_1g_page(void)
325{ 333{
326 return !!(vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT); 334 return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT;
327} 335}
328 336
329static inline int cpu_has_vmx_invept_individual_addr(void) 337static inline bool cpu_has_vmx_invept_individual_addr(void)
330{ 338{
331 return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT); 339 return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT;
332} 340}
333 341
334static inline int cpu_has_vmx_invept_context(void) 342static inline bool cpu_has_vmx_invept_context(void)
335{ 343{
336 return !!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT); 344 return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT;
337} 345}
338 346
339static inline int cpu_has_vmx_invept_global(void) 347static inline bool cpu_has_vmx_invept_global(void)
340{ 348{
341 return !!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT); 349 return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT;
342} 350}
343 351
344static inline int cpu_has_vmx_ept(void) 352static inline bool cpu_has_vmx_ept(void)
345{ 353{
346 return vmcs_config.cpu_based_2nd_exec_ctrl & 354 return vmcs_config.cpu_based_2nd_exec_ctrl &
347 SECONDARY_EXEC_ENABLE_EPT; 355 SECONDARY_EXEC_ENABLE_EPT;
348} 356}
349 357
350static inline int cpu_has_vmx_unrestricted_guest(void) 358static inline bool cpu_has_vmx_unrestricted_guest(void)
351{ 359{
352 return vmcs_config.cpu_based_2nd_exec_ctrl & 360 return vmcs_config.cpu_based_2nd_exec_ctrl &
353 SECONDARY_EXEC_UNRESTRICTED_GUEST; 361 SECONDARY_EXEC_UNRESTRICTED_GUEST;
354} 362}
355 363
356static inline int cpu_has_vmx_ple(void) 364static inline bool cpu_has_vmx_ple(void)
357{ 365{
358 return vmcs_config.cpu_based_2nd_exec_ctrl & 366 return vmcs_config.cpu_based_2nd_exec_ctrl &
359 SECONDARY_EXEC_PAUSE_LOOP_EXITING; 367 SECONDARY_EXEC_PAUSE_LOOP_EXITING;
360} 368}
361 369
362static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) 370static inline bool vm_need_virtualize_apic_accesses(struct kvm *kvm)
363{ 371{
364 return flexpriority_enabled && irqchip_in_kernel(kvm); 372 return flexpriority_enabled && irqchip_in_kernel(kvm);
365} 373}
366 374
367static inline int cpu_has_vmx_vpid(void) 375static inline bool cpu_has_vmx_vpid(void)
368{ 376{
369 return vmcs_config.cpu_based_2nd_exec_ctrl & 377 return vmcs_config.cpu_based_2nd_exec_ctrl &
370 SECONDARY_EXEC_ENABLE_VPID; 378 SECONDARY_EXEC_ENABLE_VPID;
371} 379}
372 380
373static inline int cpu_has_vmx_rdtscp(void) 381static inline bool cpu_has_vmx_rdtscp(void)
374{ 382{
375 return vmcs_config.cpu_based_2nd_exec_ctrl & 383 return vmcs_config.cpu_based_2nd_exec_ctrl &
376 SECONDARY_EXEC_RDTSCP; 384 SECONDARY_EXEC_RDTSCP;
377} 385}
378 386
379static inline int cpu_has_virtual_nmis(void) 387static inline bool cpu_has_virtual_nmis(void)
380{ 388{
381 return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; 389 return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
382} 390}
@@ -595,16 +603,56 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
595 vmcs_write32(EXCEPTION_BITMAP, eb); 603 vmcs_write32(EXCEPTION_BITMAP, eb);
596} 604}
597 605
606static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
607{
608 unsigned i;
609 struct msr_autoload *m = &vmx->msr_autoload;
610
611 for (i = 0; i < m->nr; ++i)
612 if (m->guest[i].index == msr)
613 break;
614
615 if (i == m->nr)
616 return;
617 --m->nr;
618 m->guest[i] = m->guest[m->nr];
619 m->host[i] = m->host[m->nr];
620 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
621 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
622}
623
624static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
625 u64 guest_val, u64 host_val)
626{
627 unsigned i;
628 struct msr_autoload *m = &vmx->msr_autoload;
629
630 for (i = 0; i < m->nr; ++i)
631 if (m->guest[i].index == msr)
632 break;
633
634 if (i == m->nr) {
635 ++m->nr;
636 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
637 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
638 }
639
640 m->guest[i].index = msr;
641 m->guest[i].value = guest_val;
642 m->host[i].index = msr;
643 m->host[i].value = host_val;
644}
645
598static void reload_tss(void) 646static void reload_tss(void)
599{ 647{
600 /* 648 /*
601 * VT restores TR but not its size. Useless. 649 * VT restores TR but not its size. Useless.
602 */ 650 */
603 struct descriptor_table gdt; 651 struct desc_ptr gdt;
604 struct desc_struct *descs; 652 struct desc_struct *descs;
605 653
606 kvm_get_gdt(&gdt); 654 native_store_gdt(&gdt);
607 descs = (void *)gdt.base; 655 descs = (void *)gdt.address;
608 descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ 656 descs[GDT_ENTRY_TSS].type = 9; /* available TSS */
609 load_TR_desc(); 657 load_TR_desc();
610} 658}
@@ -631,9 +679,57 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
631 guest_efer |= host_efer & ignore_bits; 679 guest_efer |= host_efer & ignore_bits;
632 vmx->guest_msrs[efer_offset].data = guest_efer; 680 vmx->guest_msrs[efer_offset].data = guest_efer;
633 vmx->guest_msrs[efer_offset].mask = ~ignore_bits; 681 vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
682
683 clear_atomic_switch_msr(vmx, MSR_EFER);
684 /* On ept, can't emulate nx, and must switch nx atomically */
685 if (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX)) {
686 guest_efer = vmx->vcpu.arch.efer;
687 if (!(guest_efer & EFER_LMA))
688 guest_efer &= ~EFER_LME;
689 add_atomic_switch_msr(vmx, MSR_EFER, guest_efer, host_efer);
690 return false;
691 }
692
634 return true; 693 return true;
635} 694}
636 695
696static unsigned long segment_base(u16 selector)
697{
698 struct desc_ptr gdt;
699 struct desc_struct *d;
700 unsigned long table_base;
701 unsigned long v;
702
703 if (!(selector & ~3))
704 return 0;
705
706 native_store_gdt(&gdt);
707 table_base = gdt.address;
708
709 if (selector & 4) { /* from ldt */
710 u16 ldt_selector = kvm_read_ldt();
711
712 if (!(ldt_selector & ~3))
713 return 0;
714
715 table_base = segment_base(ldt_selector);
716 }
717 d = (struct desc_struct *)(table_base + (selector & ~7));
718 v = get_desc_base(d);
719#ifdef CONFIG_X86_64
720 if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
721 v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32;
722#endif
723 return v;
724}
725
726static inline unsigned long kvm_read_tr_base(void)
727{
728 u16 tr;
729 asm("str %0" : "=g"(tr));
730 return segment_base(tr);
731}
732
637static void vmx_save_host_state(struct kvm_vcpu *vcpu) 733static void vmx_save_host_state(struct kvm_vcpu *vcpu)
638{ 734{
639 struct vcpu_vmx *vmx = to_vmx(vcpu); 735 struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -758,7 +854,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
758 } 854 }
759 855
760 if (vcpu->cpu != cpu) { 856 if (vcpu->cpu != cpu) {
761 struct descriptor_table dt; 857 struct desc_ptr dt;
762 unsigned long sysenter_esp; 858 unsigned long sysenter_esp;
763 859
764 vcpu->cpu = cpu; 860 vcpu->cpu = cpu;
@@ -767,8 +863,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
767 * processors. 863 * processors.
768 */ 864 */
769 vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */ 865 vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */
770 kvm_get_gdt(&dt); 866 native_store_gdt(&dt);
771 vmcs_writel(HOST_GDTR_BASE, dt.base); /* 22.2.4 */ 867 vmcs_writel(HOST_GDTR_BASE, dt.address); /* 22.2.4 */
772 868
773 rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); 869 rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
774 vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ 870 vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
@@ -846,9 +942,9 @@ static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
846 int ret = 0; 942 int ret = 0;
847 943
848 if (interruptibility & GUEST_INTR_STATE_STI) 944 if (interruptibility & GUEST_INTR_STATE_STI)
849 ret |= X86_SHADOW_INT_STI; 945 ret |= KVM_X86_SHADOW_INT_STI;
850 if (interruptibility & GUEST_INTR_STATE_MOV_SS) 946 if (interruptibility & GUEST_INTR_STATE_MOV_SS)
851 ret |= X86_SHADOW_INT_MOV_SS; 947 ret |= KVM_X86_SHADOW_INT_MOV_SS;
852 948
853 return ret & mask; 949 return ret & mask;
854} 950}
@@ -860,9 +956,9 @@ static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
860 956
861 interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS); 957 interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS);
862 958
863 if (mask & X86_SHADOW_INT_MOV_SS) 959 if (mask & KVM_X86_SHADOW_INT_MOV_SS)
864 interruptibility |= GUEST_INTR_STATE_MOV_SS; 960 interruptibility |= GUEST_INTR_STATE_MOV_SS;
865 if (mask & X86_SHADOW_INT_STI) 961 else if (mask & KVM_X86_SHADOW_INT_STI)
866 interruptibility |= GUEST_INTR_STATE_STI; 962 interruptibility |= GUEST_INTR_STATE_STI;
867 963
868 if ((interruptibility != interruptibility_old)) 964 if ((interruptibility != interruptibility_old))
@@ -882,7 +978,8 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
882} 978}
883 979
884static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, 980static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
885 bool has_error_code, u32 error_code) 981 bool has_error_code, u32 error_code,
982 bool reinject)
886{ 983{
887 struct vcpu_vmx *vmx = to_vmx(vcpu); 984 struct vcpu_vmx *vmx = to_vmx(vcpu);
888 u32 intr_info = nr | INTR_INFO_VALID_MASK; 985 u32 intr_info = nr | INTR_INFO_VALID_MASK;
@@ -1176,9 +1273,16 @@ static __init int vmx_disabled_by_bios(void)
1176 u64 msr; 1273 u64 msr;
1177 1274
1178 rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); 1275 rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
1179 return (msr & (FEATURE_CONTROL_LOCKED | 1276 if (msr & FEATURE_CONTROL_LOCKED) {
1180 FEATURE_CONTROL_VMXON_ENABLED)) 1277 if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
1181 == FEATURE_CONTROL_LOCKED; 1278 && tboot_enabled())
1279 return 1;
1280 if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
1281 && !tboot_enabled())
1282 return 1;
1283 }
1284
1285 return 0;
1182 /* locked but not enabled */ 1286 /* locked but not enabled */
1183} 1287}
1184 1288
@@ -1186,21 +1290,23 @@ static int hardware_enable(void *garbage)
1186{ 1290{
1187 int cpu = raw_smp_processor_id(); 1291 int cpu = raw_smp_processor_id();
1188 u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); 1292 u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
1189 u64 old; 1293 u64 old, test_bits;
1190 1294
1191 if (read_cr4() & X86_CR4_VMXE) 1295 if (read_cr4() & X86_CR4_VMXE)
1192 return -EBUSY; 1296 return -EBUSY;
1193 1297
1194 INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); 1298 INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu));
1195 rdmsrl(MSR_IA32_FEATURE_CONTROL, old); 1299 rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
1196 if ((old & (FEATURE_CONTROL_LOCKED | 1300
1197 FEATURE_CONTROL_VMXON_ENABLED)) 1301 test_bits = FEATURE_CONTROL_LOCKED;
1198 != (FEATURE_CONTROL_LOCKED | 1302 test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
1199 FEATURE_CONTROL_VMXON_ENABLED)) 1303 if (tboot_enabled())
1304 test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX;
1305
1306 if ((old & test_bits) != test_bits) {
1200 /* enable and lock */ 1307 /* enable and lock */
1201 wrmsrl(MSR_IA32_FEATURE_CONTROL, old | 1308 wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
1202 FEATURE_CONTROL_LOCKED | 1309 }
1203 FEATURE_CONTROL_VMXON_ENABLED);
1204 write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ 1310 write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */
1205 asm volatile (ASM_VMX_VMXON_RAX 1311 asm volatile (ASM_VMX_VMXON_RAX
1206 : : "a"(&phys_addr), "m"(phys_addr) 1312 : : "a"(&phys_addr), "m"(phys_addr)
@@ -1521,7 +1627,7 @@ static gva_t rmode_tss_base(struct kvm *kvm)
1521 struct kvm_memslots *slots; 1627 struct kvm_memslots *slots;
1522 gfn_t base_gfn; 1628 gfn_t base_gfn;
1523 1629
1524 slots = rcu_dereference(kvm->memslots); 1630 slots = kvm_memslots(kvm);
1525 base_gfn = kvm->memslots->memslots[0].base_gfn + 1631 base_gfn = kvm->memslots->memslots[0].base_gfn +
1526 kvm->memslots->memslots[0].npages - 3; 1632 kvm->memslots->memslots[0].npages - 3;
1527 return base_gfn << PAGE_SHIFT; 1633 return base_gfn << PAGE_SHIFT;
@@ -1649,6 +1755,7 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
1649 vmcs_write32(VM_ENTRY_CONTROLS, 1755 vmcs_write32(VM_ENTRY_CONTROLS,
1650 vmcs_read32(VM_ENTRY_CONTROLS) 1756 vmcs_read32(VM_ENTRY_CONTROLS)
1651 & ~VM_ENTRY_IA32E_MODE); 1757 & ~VM_ENTRY_IA32E_MODE);
1758 vmx_set_efer(vcpu, vcpu->arch.efer);
1652} 1759}
1653 1760
1654#endif 1761#endif
@@ -1934,28 +2041,28 @@ static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
1934 *l = (ar >> 13) & 1; 2041 *l = (ar >> 13) & 1;
1935} 2042}
1936 2043
1937static void vmx_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) 2044static void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1938{ 2045{
1939 dt->limit = vmcs_read32(GUEST_IDTR_LIMIT); 2046 dt->size = vmcs_read32(GUEST_IDTR_LIMIT);
1940 dt->base = vmcs_readl(GUEST_IDTR_BASE); 2047 dt->address = vmcs_readl(GUEST_IDTR_BASE);
1941} 2048}
1942 2049
1943static void vmx_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) 2050static void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1944{ 2051{
1945 vmcs_write32(GUEST_IDTR_LIMIT, dt->limit); 2052 vmcs_write32(GUEST_IDTR_LIMIT, dt->size);
1946 vmcs_writel(GUEST_IDTR_BASE, dt->base); 2053 vmcs_writel(GUEST_IDTR_BASE, dt->address);
1947} 2054}
1948 2055
1949static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) 2056static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1950{ 2057{
1951 dt->limit = vmcs_read32(GUEST_GDTR_LIMIT); 2058 dt->size = vmcs_read32(GUEST_GDTR_LIMIT);
1952 dt->base = vmcs_readl(GUEST_GDTR_BASE); 2059 dt->address = vmcs_readl(GUEST_GDTR_BASE);
1953} 2060}
1954 2061
1955static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) 2062static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1956{ 2063{
1957 vmcs_write32(GUEST_GDTR_LIMIT, dt->limit); 2064 vmcs_write32(GUEST_GDTR_LIMIT, dt->size);
1958 vmcs_writel(GUEST_GDTR_BASE, dt->base); 2065 vmcs_writel(GUEST_GDTR_BASE, dt->address);
1959} 2066}
1960 2067
1961static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) 2068static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg)
@@ -2296,6 +2403,16 @@ static void allocate_vpid(struct vcpu_vmx *vmx)
2296 spin_unlock(&vmx_vpid_lock); 2403 spin_unlock(&vmx_vpid_lock);
2297} 2404}
2298 2405
2406static void free_vpid(struct vcpu_vmx *vmx)
2407{
2408 if (!enable_vpid)
2409 return;
2410 spin_lock(&vmx_vpid_lock);
2411 if (vmx->vpid != 0)
2412 __clear_bit(vmx->vpid, vmx_vpid_bitmap);
2413 spin_unlock(&vmx_vpid_lock);
2414}
2415
2299static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) 2416static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
2300{ 2417{
2301 int f = sizeof(unsigned long); 2418 int f = sizeof(unsigned long);
@@ -2334,7 +2451,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2334 u32 junk; 2451 u32 junk;
2335 u64 host_pat, tsc_this, tsc_base; 2452 u64 host_pat, tsc_this, tsc_base;
2336 unsigned long a; 2453 unsigned long a;
2337 struct descriptor_table dt; 2454 struct desc_ptr dt;
2338 int i; 2455 int i;
2339 unsigned long kvm_vmx_return; 2456 unsigned long kvm_vmx_return;
2340 u32 exec_control; 2457 u32 exec_control;
@@ -2415,14 +2532,16 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2415 2532
2416 vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ 2533 vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */
2417 2534
2418 kvm_get_idt(&dt); 2535 native_store_idt(&dt);
2419 vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */ 2536 vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */
2420 2537
2421 asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); 2538 asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return));
2422 vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ 2539 vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */
2423 vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); 2540 vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
2424 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); 2541 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
2542 vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
2425 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); 2543 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
2544 vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
2426 2545
2427 rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk); 2546 rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk);
2428 vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs); 2547 vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs);
@@ -2947,22 +3066,20 @@ static int handle_io(struct kvm_vcpu *vcpu)
2947 int size, in, string; 3066 int size, in, string;
2948 unsigned port; 3067 unsigned port;
2949 3068
2950 ++vcpu->stat.io_exits;
2951 exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 3069 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
2952 string = (exit_qualification & 16) != 0; 3070 string = (exit_qualification & 16) != 0;
3071 in = (exit_qualification & 8) != 0;
2953 3072
2954 if (string) { 3073 ++vcpu->stat.io_exits;
2955 if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO)
2956 return 0;
2957 return 1;
2958 }
2959 3074
2960 size = (exit_qualification & 7) + 1; 3075 if (string || in)
2961 in = (exit_qualification & 8) != 0; 3076 return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO);
2962 port = exit_qualification >> 16;
2963 3077
3078 port = exit_qualification >> 16;
3079 size = (exit_qualification & 7) + 1;
2964 skip_emulated_instruction(vcpu); 3080 skip_emulated_instruction(vcpu);
2965 return kvm_emulate_pio(vcpu, in, size, port); 3081
3082 return kvm_fast_pio_out(vcpu, size, port);
2966} 3083}
2967 3084
2968static void 3085static void
@@ -3053,19 +3170,9 @@ static int handle_cr(struct kvm_vcpu *vcpu)
3053 return 0; 3170 return 0;
3054} 3171}
3055 3172
3056static int check_dr_alias(struct kvm_vcpu *vcpu)
3057{
3058 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) {
3059 kvm_queue_exception(vcpu, UD_VECTOR);
3060 return -1;
3061 }
3062 return 0;
3063}
3064
3065static int handle_dr(struct kvm_vcpu *vcpu) 3173static int handle_dr(struct kvm_vcpu *vcpu)
3066{ 3174{
3067 unsigned long exit_qualification; 3175 unsigned long exit_qualification;
3068 unsigned long val;
3069 int dr, reg; 3176 int dr, reg;
3070 3177
3071 /* Do not handle if the CPL > 0, will trigger GP on re-entry */ 3178 /* Do not handle if the CPL > 0, will trigger GP on re-entry */
@@ -3100,67 +3207,20 @@ static int handle_dr(struct kvm_vcpu *vcpu)
3100 dr = exit_qualification & DEBUG_REG_ACCESS_NUM; 3207 dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
3101 reg = DEBUG_REG_ACCESS_REG(exit_qualification); 3208 reg = DEBUG_REG_ACCESS_REG(exit_qualification);
3102 if (exit_qualification & TYPE_MOV_FROM_DR) { 3209 if (exit_qualification & TYPE_MOV_FROM_DR) {
3103 switch (dr) { 3210 unsigned long val;
3104 case 0 ... 3: 3211 if (!kvm_get_dr(vcpu, dr, &val))
3105 val = vcpu->arch.db[dr]; 3212 kvm_register_write(vcpu, reg, val);
3106 break; 3213 } else
3107 case 4: 3214 kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]);
3108 if (check_dr_alias(vcpu) < 0)
3109 return 1;
3110 /* fall through */
3111 case 6:
3112 val = vcpu->arch.dr6;
3113 break;
3114 case 5:
3115 if (check_dr_alias(vcpu) < 0)
3116 return 1;
3117 /* fall through */
3118 default: /* 7 */
3119 val = vcpu->arch.dr7;
3120 break;
3121 }
3122 kvm_register_write(vcpu, reg, val);
3123 } else {
3124 val = vcpu->arch.regs[reg];
3125 switch (dr) {
3126 case 0 ... 3:
3127 vcpu->arch.db[dr] = val;
3128 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
3129 vcpu->arch.eff_db[dr] = val;
3130 break;
3131 case 4:
3132 if (check_dr_alias(vcpu) < 0)
3133 return 1;
3134 /* fall through */
3135 case 6:
3136 if (val & 0xffffffff00000000ULL) {
3137 kvm_inject_gp(vcpu, 0);
3138 return 1;
3139 }
3140 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
3141 break;
3142 case 5:
3143 if (check_dr_alias(vcpu) < 0)
3144 return 1;
3145 /* fall through */
3146 default: /* 7 */
3147 if (val & 0xffffffff00000000ULL) {
3148 kvm_inject_gp(vcpu, 0);
3149 return 1;
3150 }
3151 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
3152 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
3153 vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
3154 vcpu->arch.switch_db_regs =
3155 (val & DR7_BP_EN_MASK);
3156 }
3157 break;
3158 }
3159 }
3160 skip_emulated_instruction(vcpu); 3215 skip_emulated_instruction(vcpu);
3161 return 1; 3216 return 1;
3162} 3217}
3163 3218
3219static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
3220{
3221 vmcs_writel(GUEST_DR7, val);
3222}
3223
3164static int handle_cpuid(struct kvm_vcpu *vcpu) 3224static int handle_cpuid(struct kvm_vcpu *vcpu)
3165{ 3225{
3166 kvm_emulate_cpuid(vcpu); 3226 kvm_emulate_cpuid(vcpu);
@@ -3292,6 +3352,8 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
3292{ 3352{
3293 struct vcpu_vmx *vmx = to_vmx(vcpu); 3353 struct vcpu_vmx *vmx = to_vmx(vcpu);
3294 unsigned long exit_qualification; 3354 unsigned long exit_qualification;
3355 bool has_error_code = false;
3356 u32 error_code = 0;
3295 u16 tss_selector; 3357 u16 tss_selector;
3296 int reason, type, idt_v; 3358 int reason, type, idt_v;
3297 3359
@@ -3314,6 +3376,13 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
3314 kvm_clear_interrupt_queue(vcpu); 3376 kvm_clear_interrupt_queue(vcpu);
3315 break; 3377 break;
3316 case INTR_TYPE_HARD_EXCEPTION: 3378 case INTR_TYPE_HARD_EXCEPTION:
3379 if (vmx->idt_vectoring_info &
3380 VECTORING_INFO_DELIVER_CODE_MASK) {
3381 has_error_code = true;
3382 error_code =
3383 vmcs_read32(IDT_VECTORING_ERROR_CODE);
3384 }
3385 /* fall through */
3317 case INTR_TYPE_SOFT_EXCEPTION: 3386 case INTR_TYPE_SOFT_EXCEPTION:
3318 kvm_clear_exception_queue(vcpu); 3387 kvm_clear_exception_queue(vcpu);
3319 break; 3388 break;
@@ -3328,8 +3397,13 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
3328 type != INTR_TYPE_NMI_INTR)) 3397 type != INTR_TYPE_NMI_INTR))
3329 skip_emulated_instruction(vcpu); 3398 skip_emulated_instruction(vcpu);
3330 3399
3331 if (!kvm_task_switch(vcpu, tss_selector, reason)) 3400 if (kvm_task_switch(vcpu, tss_selector, reason,
3401 has_error_code, error_code) == EMULATE_FAIL) {
3402 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
3403 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
3404 vcpu->run->internal.ndata = 0;
3332 return 0; 3405 return 0;
3406 }
3333 3407
3334 /* clear all local breakpoint enable flags */ 3408 /* clear all local breakpoint enable flags */
3335 vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55); 3409 vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55);
@@ -3574,7 +3648,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
3574 u32 exit_reason = vmx->exit_reason; 3648 u32 exit_reason = vmx->exit_reason;
3575 u32 vectoring_info = vmx->idt_vectoring_info; 3649 u32 vectoring_info = vmx->idt_vectoring_info;
3576 3650
3577 trace_kvm_exit(exit_reason, kvm_rip_read(vcpu)); 3651 trace_kvm_exit(exit_reason, vcpu);
3578 3652
3579 /* If guest state is invalid, start emulating */ 3653 /* If guest state is invalid, start emulating */
3580 if (vmx->emulation_required && emulate_invalid_guest_state) 3654 if (vmx->emulation_required && emulate_invalid_guest_state)
@@ -3923,10 +3997,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
3923{ 3997{
3924 struct vcpu_vmx *vmx = to_vmx(vcpu); 3998 struct vcpu_vmx *vmx = to_vmx(vcpu);
3925 3999
3926 spin_lock(&vmx_vpid_lock); 4000 free_vpid(vmx);
3927 if (vmx->vpid != 0)
3928 __clear_bit(vmx->vpid, vmx_vpid_bitmap);
3929 spin_unlock(&vmx_vpid_lock);
3930 vmx_free_vmcs(vcpu); 4001 vmx_free_vmcs(vcpu);
3931 kfree(vmx->guest_msrs); 4002 kfree(vmx->guest_msrs);
3932 kvm_vcpu_uninit(vcpu); 4003 kvm_vcpu_uninit(vcpu);
@@ -3988,6 +4059,7 @@ free_msrs:
3988uninit_vcpu: 4059uninit_vcpu:
3989 kvm_vcpu_uninit(&vmx->vcpu); 4060 kvm_vcpu_uninit(&vmx->vcpu);
3990free_vcpu: 4061free_vcpu:
4062 free_vpid(vmx);
3991 kmem_cache_free(kvm_vcpu_cache, vmx); 4063 kmem_cache_free(kvm_vcpu_cache, vmx);
3992 return ERR_PTR(err); 4064 return ERR_PTR(err);
3993} 4065}
@@ -4118,6 +4190,10 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
4118 } 4190 }
4119} 4191}
4120 4192
4193static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
4194{
4195}
4196
4121static struct kvm_x86_ops vmx_x86_ops = { 4197static struct kvm_x86_ops vmx_x86_ops = {
4122 .cpu_has_kvm_support = cpu_has_kvm_support, 4198 .cpu_has_kvm_support = cpu_has_kvm_support,
4123 .disabled_by_bios = vmx_disabled_by_bios, 4199 .disabled_by_bios = vmx_disabled_by_bios,
@@ -4154,6 +4230,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
4154 .set_idt = vmx_set_idt, 4230 .set_idt = vmx_set_idt,
4155 .get_gdt = vmx_get_gdt, 4231 .get_gdt = vmx_get_gdt,
4156 .set_gdt = vmx_set_gdt, 4232 .set_gdt = vmx_set_gdt,
4233 .set_dr7 = vmx_set_dr7,
4157 .cache_reg = vmx_cache_reg, 4234 .cache_reg = vmx_cache_reg,
4158 .get_rflags = vmx_get_rflags, 4235 .get_rflags = vmx_get_rflags,
4159 .set_rflags = vmx_set_rflags, 4236 .set_rflags = vmx_set_rflags,
@@ -4189,6 +4266,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
4189 .cpuid_update = vmx_cpuid_update, 4266 .cpuid_update = vmx_cpuid_update,
4190 4267
4191 .rdtscp_supported = vmx_rdtscp_supported, 4268 .rdtscp_supported = vmx_rdtscp_supported,
4269
4270 .set_supported_cpuid = vmx_set_supported_cpuid,
4192}; 4271};
4193 4272
4194static int __init vmx_init(void) 4273static int __init vmx_init(void)
@@ -4236,7 +4315,8 @@ static int __init vmx_init(void)
4236 4315
4237 set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ 4316 set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
4238 4317
4239 r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE); 4318 r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
4319 __alignof__(struct vcpu_vmx), THIS_MODULE);
4240 if (r) 4320 if (r)
4241 goto out3; 4321 goto out3;
4242 4322
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index dd9bc8fb81ab..05d571f6f196 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -42,7 +42,7 @@
42#include <linux/slab.h> 42#include <linux/slab.h>
43#include <linux/perf_event.h> 43#include <linux/perf_event.h>
44#include <trace/events/kvm.h> 44#include <trace/events/kvm.h>
45#undef TRACE_INCLUDE_FILE 45
46#define CREATE_TRACE_POINTS 46#define CREATE_TRACE_POINTS
47#include "trace.h" 47#include "trace.h"
48 48
@@ -224,34 +224,6 @@ static void drop_user_return_notifiers(void *ignore)
224 kvm_on_user_return(&smsr->urn); 224 kvm_on_user_return(&smsr->urn);
225} 225}
226 226
227unsigned long segment_base(u16 selector)
228{
229 struct descriptor_table gdt;
230 struct desc_struct *d;
231 unsigned long table_base;
232 unsigned long v;
233
234 if (selector == 0)
235 return 0;
236
237 kvm_get_gdt(&gdt);
238 table_base = gdt.base;
239
240 if (selector & 4) { /* from ldt */
241 u16 ldt_selector = kvm_read_ldt();
242
243 table_base = segment_base(ldt_selector);
244 }
245 d = (struct desc_struct *)(table_base + (selector & ~7));
246 v = get_desc_base(d);
247#ifdef CONFIG_X86_64
248 if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
249 v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32;
250#endif
251 return v;
252}
253EXPORT_SYMBOL_GPL(segment_base);
254
255u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) 227u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
256{ 228{
257 if (irqchip_in_kernel(vcpu->kvm)) 229 if (irqchip_in_kernel(vcpu->kvm))
@@ -293,7 +265,8 @@ static int exception_class(int vector)
293} 265}
294 266
295static void kvm_multiple_exception(struct kvm_vcpu *vcpu, 267static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
296 unsigned nr, bool has_error, u32 error_code) 268 unsigned nr, bool has_error, u32 error_code,
269 bool reinject)
297{ 270{
298 u32 prev_nr; 271 u32 prev_nr;
299 int class1, class2; 272 int class1, class2;
@@ -304,6 +277,7 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
304 vcpu->arch.exception.has_error_code = has_error; 277 vcpu->arch.exception.has_error_code = has_error;
305 vcpu->arch.exception.nr = nr; 278 vcpu->arch.exception.nr = nr;
306 vcpu->arch.exception.error_code = error_code; 279 vcpu->arch.exception.error_code = error_code;
280 vcpu->arch.exception.reinject = reinject;
307 return; 281 return;
308 } 282 }
309 283
@@ -332,10 +306,16 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
332 306
333void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) 307void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
334{ 308{
335 kvm_multiple_exception(vcpu, nr, false, 0); 309 kvm_multiple_exception(vcpu, nr, false, 0, false);
336} 310}
337EXPORT_SYMBOL_GPL(kvm_queue_exception); 311EXPORT_SYMBOL_GPL(kvm_queue_exception);
338 312
313void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
314{
315 kvm_multiple_exception(vcpu, nr, false, 0, true);
316}
317EXPORT_SYMBOL_GPL(kvm_requeue_exception);
318
339void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, 319void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
340 u32 error_code) 320 u32 error_code)
341{ 321{
@@ -352,10 +332,16 @@ EXPORT_SYMBOL_GPL(kvm_inject_nmi);
352 332
353void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) 333void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
354{ 334{
355 kvm_multiple_exception(vcpu, nr, true, error_code); 335 kvm_multiple_exception(vcpu, nr, true, error_code, false);
356} 336}
357EXPORT_SYMBOL_GPL(kvm_queue_exception_e); 337EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
358 338
339void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
340{
341 kvm_multiple_exception(vcpu, nr, true, error_code, true);
342}
343EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
344
359/* 345/*
360 * Checks if cpl <= required_cpl; if true, return true. Otherwise queue 346 * Checks if cpl <= required_cpl; if true, return true. Otherwise queue
361 * a #GP and return false. 347 * a #GP and return false.
@@ -476,7 +462,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
476 } 462 }
477 463
478 kvm_x86_ops->set_cr0(vcpu, cr0); 464 kvm_x86_ops->set_cr0(vcpu, cr0);
479 vcpu->arch.cr0 = cr0;
480 465
481 kvm_mmu_reset_context(vcpu); 466 kvm_mmu_reset_context(vcpu);
482 return; 467 return;
@@ -485,7 +470,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr0);
485 470
486void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) 471void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
487{ 472{
488 kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0ful) | (msw & 0x0f)); 473 kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
489} 474}
490EXPORT_SYMBOL_GPL(kvm_lmsw); 475EXPORT_SYMBOL_GPL(kvm_lmsw);
491 476
@@ -517,7 +502,6 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
517 } 502 }
518 kvm_x86_ops->set_cr4(vcpu, cr4); 503 kvm_x86_ops->set_cr4(vcpu, cr4);
519 vcpu->arch.cr4 = cr4; 504 vcpu->arch.cr4 = cr4;
520 vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled;
521 kvm_mmu_reset_context(vcpu); 505 kvm_mmu_reset_context(vcpu);
522} 506}
523EXPORT_SYMBOL_GPL(kvm_set_cr4); 507EXPORT_SYMBOL_GPL(kvm_set_cr4);
@@ -592,6 +576,80 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
592} 576}
593EXPORT_SYMBOL_GPL(kvm_get_cr8); 577EXPORT_SYMBOL_GPL(kvm_get_cr8);
594 578
579int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
580{
581 switch (dr) {
582 case 0 ... 3:
583 vcpu->arch.db[dr] = val;
584 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
585 vcpu->arch.eff_db[dr] = val;
586 break;
587 case 4:
588 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) {
589 kvm_queue_exception(vcpu, UD_VECTOR);
590 return 1;
591 }
592 /* fall through */
593 case 6:
594 if (val & 0xffffffff00000000ULL) {
595 kvm_inject_gp(vcpu, 0);
596 return 1;
597 }
598 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
599 break;
600 case 5:
601 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) {
602 kvm_queue_exception(vcpu, UD_VECTOR);
603 return 1;
604 }
605 /* fall through */
606 default: /* 7 */
607 if (val & 0xffffffff00000000ULL) {
608 kvm_inject_gp(vcpu, 0);
609 return 1;
610 }
611 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
612 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
613 kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7);
614 vcpu->arch.switch_db_regs = (val & DR7_BP_EN_MASK);
615 }
616 break;
617 }
618
619 return 0;
620}
621EXPORT_SYMBOL_GPL(kvm_set_dr);
622
623int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
624{
625 switch (dr) {
626 case 0 ... 3:
627 *val = vcpu->arch.db[dr];
628 break;
629 case 4:
630 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) {
631 kvm_queue_exception(vcpu, UD_VECTOR);
632 return 1;
633 }
634 /* fall through */
635 case 6:
636 *val = vcpu->arch.dr6;
637 break;
638 case 5:
639 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) {
640 kvm_queue_exception(vcpu, UD_VECTOR);
641 return 1;
642 }
643 /* fall through */
644 default: /* 7 */
645 *val = vcpu->arch.dr7;
646 break;
647 }
648
649 return 0;
650}
651EXPORT_SYMBOL_GPL(kvm_get_dr);
652
595static inline u32 bit(int bitno) 653static inline u32 bit(int bitno)
596{ 654{
597 return 1 << (bitno & 31); 655 return 1 << (bitno & 31);
@@ -606,9 +664,10 @@ static inline u32 bit(int bitno)
606 * kvm-specific. Those are put in the beginning of the list. 664 * kvm-specific. Those are put in the beginning of the list.
607 */ 665 */
608 666
609#define KVM_SAVE_MSRS_BEGIN 5 667#define KVM_SAVE_MSRS_BEGIN 7
610static u32 msrs_to_save[] = { 668static u32 msrs_to_save[] = {
611 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, 669 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
670 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
612 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, 671 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
613 HV_X64_MSR_APIC_ASSIST_PAGE, 672 HV_X64_MSR_APIC_ASSIST_PAGE,
614 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, 673 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
@@ -625,48 +684,42 @@ static u32 emulated_msrs[] = {
625 MSR_IA32_MISC_ENABLE, 684 MSR_IA32_MISC_ENABLE,
626}; 685};
627 686
628static void set_efer(struct kvm_vcpu *vcpu, u64 efer) 687static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
629{ 688{
630 if (efer & efer_reserved_bits) { 689 if (efer & efer_reserved_bits)
631 kvm_inject_gp(vcpu, 0); 690 return 1;
632 return;
633 }
634 691
635 if (is_paging(vcpu) 692 if (is_paging(vcpu)
636 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) { 693 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
637 kvm_inject_gp(vcpu, 0); 694 return 1;
638 return;
639 }
640 695
641 if (efer & EFER_FFXSR) { 696 if (efer & EFER_FFXSR) {
642 struct kvm_cpuid_entry2 *feat; 697 struct kvm_cpuid_entry2 *feat;
643 698
644 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); 699 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
645 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) { 700 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT)))
646 kvm_inject_gp(vcpu, 0); 701 return 1;
647 return;
648 }
649 } 702 }
650 703
651 if (efer & EFER_SVME) { 704 if (efer & EFER_SVME) {
652 struct kvm_cpuid_entry2 *feat; 705 struct kvm_cpuid_entry2 *feat;
653 706
654 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); 707 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
655 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) { 708 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM)))
656 kvm_inject_gp(vcpu, 0); 709 return 1;
657 return;
658 }
659 } 710 }
660 711
661 kvm_x86_ops->set_efer(vcpu, efer);
662
663 efer &= ~EFER_LMA; 712 efer &= ~EFER_LMA;
664 efer |= vcpu->arch.efer & EFER_LMA; 713 efer |= vcpu->arch.efer & EFER_LMA;
665 714
715 kvm_x86_ops->set_efer(vcpu, efer);
716
666 vcpu->arch.efer = efer; 717 vcpu->arch.efer = efer;
667 718
668 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; 719 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
669 kvm_mmu_reset_context(vcpu); 720 kvm_mmu_reset_context(vcpu);
721
722 return 0;
670} 723}
671 724
672void kvm_enable_efer_bits(u64 mask) 725void kvm_enable_efer_bits(u64 mask)
@@ -696,14 +749,22 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
696 749
697static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) 750static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
698{ 751{
699 static int version; 752 int version;
753 int r;
700 struct pvclock_wall_clock wc; 754 struct pvclock_wall_clock wc;
701 struct timespec boot; 755 struct timespec boot;
702 756
703 if (!wall_clock) 757 if (!wall_clock)
704 return; 758 return;
705 759
706 version++; 760 r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version));
761 if (r)
762 return;
763
764 if (version & 1)
765 ++version; /* first time write, random junk */
766
767 ++version;
707 768
708 kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); 769 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
709 770
@@ -796,6 +857,8 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
796 vcpu->hv_clock.system_time = ts.tv_nsec + 857 vcpu->hv_clock.system_time = ts.tv_nsec +
797 (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset; 858 (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset;
798 859
860 vcpu->hv_clock.flags = 0;
861
799 /* 862 /*
800 * The interface expects us to write an even number signaling that the 863 * The interface expects us to write an even number signaling that the
801 * update is finished. Since the guest won't see the intermediate 864 * update is finished. Since the guest won't see the intermediate
@@ -1087,10 +1150,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1087{ 1150{
1088 switch (msr) { 1151 switch (msr) {
1089 case MSR_EFER: 1152 case MSR_EFER:
1090 set_efer(vcpu, data); 1153 return set_efer(vcpu, data);
1091 break;
1092 case MSR_K7_HWCR: 1154 case MSR_K7_HWCR:
1093 data &= ~(u64)0x40; /* ignore flush filter disable */ 1155 data &= ~(u64)0x40; /* ignore flush filter disable */
1156 data &= ~(u64)0x100; /* ignore ignne emulation enable */
1094 if (data != 0) { 1157 if (data != 0) {
1095 pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", 1158 pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
1096 data); 1159 data);
@@ -1133,10 +1196,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1133 case MSR_IA32_MISC_ENABLE: 1196 case MSR_IA32_MISC_ENABLE:
1134 vcpu->arch.ia32_misc_enable_msr = data; 1197 vcpu->arch.ia32_misc_enable_msr = data;
1135 break; 1198 break;
1199 case MSR_KVM_WALL_CLOCK_NEW:
1136 case MSR_KVM_WALL_CLOCK: 1200 case MSR_KVM_WALL_CLOCK:
1137 vcpu->kvm->arch.wall_clock = data; 1201 vcpu->kvm->arch.wall_clock = data;
1138 kvm_write_wall_clock(vcpu->kvm, data); 1202 kvm_write_wall_clock(vcpu->kvm, data);
1139 break; 1203 break;
1204 case MSR_KVM_SYSTEM_TIME_NEW:
1140 case MSR_KVM_SYSTEM_TIME: { 1205 case MSR_KVM_SYSTEM_TIME: {
1141 if (vcpu->arch.time_page) { 1206 if (vcpu->arch.time_page) {
1142 kvm_release_page_dirty(vcpu->arch.time_page); 1207 kvm_release_page_dirty(vcpu->arch.time_page);
@@ -1408,9 +1473,11 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1408 data = vcpu->arch.efer; 1473 data = vcpu->arch.efer;
1409 break; 1474 break;
1410 case MSR_KVM_WALL_CLOCK: 1475 case MSR_KVM_WALL_CLOCK:
1476 case MSR_KVM_WALL_CLOCK_NEW:
1411 data = vcpu->kvm->arch.wall_clock; 1477 data = vcpu->kvm->arch.wall_clock;
1412 break; 1478 break;
1413 case MSR_KVM_SYSTEM_TIME: 1479 case MSR_KVM_SYSTEM_TIME:
1480 case MSR_KVM_SYSTEM_TIME_NEW:
1414 data = vcpu->arch.time; 1481 data = vcpu->arch.time;
1415 break; 1482 break;
1416 case MSR_IA32_P5_MC_ADDR: 1483 case MSR_IA32_P5_MC_ADDR:
@@ -1549,6 +1616,7 @@ int kvm_dev_ioctl_check_extension(long ext)
1549 case KVM_CAP_HYPERV_VAPIC: 1616 case KVM_CAP_HYPERV_VAPIC:
1550 case KVM_CAP_HYPERV_SPIN: 1617 case KVM_CAP_HYPERV_SPIN:
1551 case KVM_CAP_PCI_SEGMENT: 1618 case KVM_CAP_PCI_SEGMENT:
1619 case KVM_CAP_DEBUGREGS:
1552 case KVM_CAP_X86_ROBUST_SINGLESTEP: 1620 case KVM_CAP_X86_ROBUST_SINGLESTEP:
1553 r = 1; 1621 r = 1;
1554 break; 1622 break;
@@ -1769,6 +1837,7 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
1769{ 1837{
1770 int r; 1838 int r;
1771 1839
1840 vcpu_load(vcpu);
1772 r = -E2BIG; 1841 r = -E2BIG;
1773 if (cpuid->nent < vcpu->arch.cpuid_nent) 1842 if (cpuid->nent < vcpu->arch.cpuid_nent)
1774 goto out; 1843 goto out;
@@ -1780,6 +1849,7 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
1780 1849
1781out: 1850out:
1782 cpuid->nent = vcpu->arch.cpuid_nent; 1851 cpuid->nent = vcpu->arch.cpuid_nent;
1852 vcpu_put(vcpu);
1783 return r; 1853 return r;
1784} 1854}
1785 1855
@@ -1910,6 +1980,24 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1910 } 1980 }
1911 break; 1981 break;
1912 } 1982 }
1983 case KVM_CPUID_SIGNATURE: {
1984 char signature[12] = "KVMKVMKVM\0\0";
1985 u32 *sigptr = (u32 *)signature;
1986 entry->eax = 0;
1987 entry->ebx = sigptr[0];
1988 entry->ecx = sigptr[1];
1989 entry->edx = sigptr[2];
1990 break;
1991 }
1992 case KVM_CPUID_FEATURES:
1993 entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) |
1994 (1 << KVM_FEATURE_NOP_IO_DELAY) |
1995 (1 << KVM_FEATURE_CLOCKSOURCE2) |
1996 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
1997 entry->ebx = 0;
1998 entry->ecx = 0;
1999 entry->edx = 0;
2000 break;
1913 case 0x80000000: 2001 case 0x80000000:
1914 entry->eax = min(entry->eax, 0x8000001a); 2002 entry->eax = min(entry->eax, 0x8000001a);
1915 break; 2003 break;
@@ -1918,6 +2006,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1918 entry->ecx &= kvm_supported_word6_x86_features; 2006 entry->ecx &= kvm_supported_word6_x86_features;
1919 break; 2007 break;
1920 } 2008 }
2009
2010 kvm_x86_ops->set_supported_cpuid(function, entry);
2011
1921 put_cpu(); 2012 put_cpu();
1922} 2013}
1923 2014
@@ -1953,6 +2044,23 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
1953 for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func) 2044 for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func)
1954 do_cpuid_ent(&cpuid_entries[nent], func, 0, 2045 do_cpuid_ent(&cpuid_entries[nent], func, 0,
1955 &nent, cpuid->nent); 2046 &nent, cpuid->nent);
2047
2048
2049
2050 r = -E2BIG;
2051 if (nent >= cpuid->nent)
2052 goto out_free;
2053
2054 do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent,
2055 cpuid->nent);
2056
2057 r = -E2BIG;
2058 if (nent >= cpuid->nent)
2059 goto out_free;
2060
2061 do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_FEATURES, 0, &nent,
2062 cpuid->nent);
2063
1956 r = -E2BIG; 2064 r = -E2BIG;
1957 if (nent >= cpuid->nent) 2065 if (nent >= cpuid->nent)
1958 goto out_free; 2066 goto out_free;
@@ -2032,6 +2140,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
2032 int r; 2140 int r;
2033 unsigned bank_num = mcg_cap & 0xff, bank; 2141 unsigned bank_num = mcg_cap & 0xff, bank;
2034 2142
2143 vcpu_load(vcpu);
2035 r = -EINVAL; 2144 r = -EINVAL;
2036 if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) 2145 if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
2037 goto out; 2146 goto out;
@@ -2046,6 +2155,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
2046 for (bank = 0; bank < bank_num; bank++) 2155 for (bank = 0; bank < bank_num; bank++)
2047 vcpu->arch.mce_banks[bank*4] = ~(u64)0; 2156 vcpu->arch.mce_banks[bank*4] = ~(u64)0;
2048out: 2157out:
2158 vcpu_put(vcpu);
2049 return r; 2159 return r;
2050} 2160}
2051 2161
@@ -2105,14 +2215,20 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
2105{ 2215{
2106 vcpu_load(vcpu); 2216 vcpu_load(vcpu);
2107 2217
2108 events->exception.injected = vcpu->arch.exception.pending; 2218 events->exception.injected =
2219 vcpu->arch.exception.pending &&
2220 !kvm_exception_is_soft(vcpu->arch.exception.nr);
2109 events->exception.nr = vcpu->arch.exception.nr; 2221 events->exception.nr = vcpu->arch.exception.nr;
2110 events->exception.has_error_code = vcpu->arch.exception.has_error_code; 2222 events->exception.has_error_code = vcpu->arch.exception.has_error_code;
2111 events->exception.error_code = vcpu->arch.exception.error_code; 2223 events->exception.error_code = vcpu->arch.exception.error_code;
2112 2224
2113 events->interrupt.injected = vcpu->arch.interrupt.pending; 2225 events->interrupt.injected =
2226 vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft;
2114 events->interrupt.nr = vcpu->arch.interrupt.nr; 2227 events->interrupt.nr = vcpu->arch.interrupt.nr;
2115 events->interrupt.soft = vcpu->arch.interrupt.soft; 2228 events->interrupt.soft = 0;
2229 events->interrupt.shadow =
2230 kvm_x86_ops->get_interrupt_shadow(vcpu,
2231 KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI);
2116 2232
2117 events->nmi.injected = vcpu->arch.nmi_injected; 2233 events->nmi.injected = vcpu->arch.nmi_injected;
2118 events->nmi.pending = vcpu->arch.nmi_pending; 2234 events->nmi.pending = vcpu->arch.nmi_pending;
@@ -2121,7 +2237,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
2121 events->sipi_vector = vcpu->arch.sipi_vector; 2237 events->sipi_vector = vcpu->arch.sipi_vector;
2122 2238
2123 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING 2239 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
2124 | KVM_VCPUEVENT_VALID_SIPI_VECTOR); 2240 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
2241 | KVM_VCPUEVENT_VALID_SHADOW);
2125 2242
2126 vcpu_put(vcpu); 2243 vcpu_put(vcpu);
2127} 2244}
@@ -2130,7 +2247,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
2130 struct kvm_vcpu_events *events) 2247 struct kvm_vcpu_events *events)
2131{ 2248{
2132 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING 2249 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
2133 | KVM_VCPUEVENT_VALID_SIPI_VECTOR)) 2250 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
2251 | KVM_VCPUEVENT_VALID_SHADOW))
2134 return -EINVAL; 2252 return -EINVAL;
2135 2253
2136 vcpu_load(vcpu); 2254 vcpu_load(vcpu);
@@ -2145,6 +2263,9 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
2145 vcpu->arch.interrupt.soft = events->interrupt.soft; 2263 vcpu->arch.interrupt.soft = events->interrupt.soft;
2146 if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) 2264 if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm))
2147 kvm_pic_clear_isr_ack(vcpu->kvm); 2265 kvm_pic_clear_isr_ack(vcpu->kvm);
2266 if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
2267 kvm_x86_ops->set_interrupt_shadow(vcpu,
2268 events->interrupt.shadow);
2148 2269
2149 vcpu->arch.nmi_injected = events->nmi.injected; 2270 vcpu->arch.nmi_injected = events->nmi.injected;
2150 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) 2271 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
@@ -2159,6 +2280,36 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
2159 return 0; 2280 return 0;
2160} 2281}
2161 2282
2283static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
2284 struct kvm_debugregs *dbgregs)
2285{
2286 vcpu_load(vcpu);
2287
2288 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
2289 dbgregs->dr6 = vcpu->arch.dr6;
2290 dbgregs->dr7 = vcpu->arch.dr7;
2291 dbgregs->flags = 0;
2292
2293 vcpu_put(vcpu);
2294}
2295
2296static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
2297 struct kvm_debugregs *dbgregs)
2298{
2299 if (dbgregs->flags)
2300 return -EINVAL;
2301
2302 vcpu_load(vcpu);
2303
2304 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
2305 vcpu->arch.dr6 = dbgregs->dr6;
2306 vcpu->arch.dr7 = dbgregs->dr7;
2307
2308 vcpu_put(vcpu);
2309
2310 return 0;
2311}
2312
2162long kvm_arch_vcpu_ioctl(struct file *filp, 2313long kvm_arch_vcpu_ioctl(struct file *filp,
2163 unsigned int ioctl, unsigned long arg) 2314 unsigned int ioctl, unsigned long arg)
2164{ 2315{
@@ -2313,7 +2464,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
2313 r = -EFAULT; 2464 r = -EFAULT;
2314 if (copy_from_user(&mce, argp, sizeof mce)) 2465 if (copy_from_user(&mce, argp, sizeof mce))
2315 goto out; 2466 goto out;
2467 vcpu_load(vcpu);
2316 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); 2468 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
2469 vcpu_put(vcpu);
2317 break; 2470 break;
2318 } 2471 }
2319 case KVM_GET_VCPU_EVENTS: { 2472 case KVM_GET_VCPU_EVENTS: {
@@ -2337,6 +2490,29 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
2337 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); 2490 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
2338 break; 2491 break;
2339 } 2492 }
2493 case KVM_GET_DEBUGREGS: {
2494 struct kvm_debugregs dbgregs;
2495
2496 kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
2497
2498 r = -EFAULT;
2499 if (copy_to_user(argp, &dbgregs,
2500 sizeof(struct kvm_debugregs)))
2501 break;
2502 r = 0;
2503 break;
2504 }
2505 case KVM_SET_DEBUGREGS: {
2506 struct kvm_debugregs dbgregs;
2507
2508 r = -EFAULT;
2509 if (copy_from_user(&dbgregs, argp,
2510 sizeof(struct kvm_debugregs)))
2511 break;
2512
2513 r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
2514 break;
2515 }
2340 default: 2516 default:
2341 r = -EINVAL; 2517 r = -EINVAL;
2342 } 2518 }
@@ -2390,7 +2566,7 @@ gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn)
2390 struct kvm_mem_alias *alias; 2566 struct kvm_mem_alias *alias;
2391 struct kvm_mem_aliases *aliases; 2567 struct kvm_mem_aliases *aliases;
2392 2568
2393 aliases = rcu_dereference(kvm->arch.aliases); 2569 aliases = kvm_aliases(kvm);
2394 2570
2395 for (i = 0; i < aliases->naliases; ++i) { 2571 for (i = 0; i < aliases->naliases; ++i) {
2396 alias = &aliases->aliases[i]; 2572 alias = &aliases->aliases[i];
@@ -2409,7 +2585,7 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
2409 struct kvm_mem_alias *alias; 2585 struct kvm_mem_alias *alias;
2410 struct kvm_mem_aliases *aliases; 2586 struct kvm_mem_aliases *aliases;
2411 2587
2412 aliases = rcu_dereference(kvm->arch.aliases); 2588 aliases = kvm_aliases(kvm);
2413 2589
2414 for (i = 0; i < aliases->naliases; ++i) { 2590 for (i = 0; i < aliases->naliases; ++i) {
2415 alias = &aliases->aliases[i]; 2591 alias = &aliases->aliases[i];
@@ -2804,11 +2980,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
2804 r = -EFAULT; 2980 r = -EFAULT;
2805 if (copy_from_user(&irq_event, argp, sizeof irq_event)) 2981 if (copy_from_user(&irq_event, argp, sizeof irq_event))
2806 goto out; 2982 goto out;
2983 r = -ENXIO;
2807 if (irqchip_in_kernel(kvm)) { 2984 if (irqchip_in_kernel(kvm)) {
2808 __s32 status; 2985 __s32 status;
2809 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 2986 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
2810 irq_event.irq, irq_event.level); 2987 irq_event.irq, irq_event.level);
2811 if (ioctl == KVM_IRQ_LINE_STATUS) { 2988 if (ioctl == KVM_IRQ_LINE_STATUS) {
2989 r = -EFAULT;
2812 irq_event.status = status; 2990 irq_event.status = status;
2813 if (copy_to_user(argp, &irq_event, 2991 if (copy_to_user(argp, &irq_event,
2814 sizeof irq_event)) 2992 sizeof irq_event))
@@ -3024,6 +3202,18 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
3024 return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); 3202 return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
3025} 3203}
3026 3204
3205static void kvm_set_segment(struct kvm_vcpu *vcpu,
3206 struct kvm_segment *var, int seg)
3207{
3208 kvm_x86_ops->set_segment(vcpu, var, seg);
3209}
3210
3211void kvm_get_segment(struct kvm_vcpu *vcpu,
3212 struct kvm_segment *var, int seg)
3213{
3214 kvm_x86_ops->get_segment(vcpu, var, seg);
3215}
3216
3027gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) 3217gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
3028{ 3218{
3029 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; 3219 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
@@ -3104,14 +3294,17 @@ static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes,
3104 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error); 3294 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error);
3105} 3295}
3106 3296
3107static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, 3297static int kvm_write_guest_virt_system(gva_t addr, void *val,
3108 struct kvm_vcpu *vcpu, u32 *error) 3298 unsigned int bytes,
3299 struct kvm_vcpu *vcpu,
3300 u32 *error)
3109{ 3301{
3110 void *data = val; 3302 void *data = val;
3111 int r = X86EMUL_CONTINUE; 3303 int r = X86EMUL_CONTINUE;
3112 3304
3113 while (bytes) { 3305 while (bytes) {
3114 gpa_t gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error); 3306 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr,
3307 PFERR_WRITE_MASK, error);
3115 unsigned offset = addr & (PAGE_SIZE-1); 3308 unsigned offset = addr & (PAGE_SIZE-1);
3116 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); 3309 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
3117 int ret; 3310 int ret;
@@ -3134,7 +3327,6 @@ out:
3134 return r; 3327 return r;
3135} 3328}
3136 3329
3137
3138static int emulator_read_emulated(unsigned long addr, 3330static int emulator_read_emulated(unsigned long addr,
3139 void *val, 3331 void *val,
3140 unsigned int bytes, 3332 unsigned int bytes,
@@ -3237,9 +3429,9 @@ mmio:
3237} 3429}
3238 3430
3239int emulator_write_emulated(unsigned long addr, 3431int emulator_write_emulated(unsigned long addr,
3240 const void *val, 3432 const void *val,
3241 unsigned int bytes, 3433 unsigned int bytes,
3242 struct kvm_vcpu *vcpu) 3434 struct kvm_vcpu *vcpu)
3243{ 3435{
3244 /* Crossing a page boundary? */ 3436 /* Crossing a page boundary? */
3245 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { 3437 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
@@ -3257,45 +3449,150 @@ int emulator_write_emulated(unsigned long addr,
3257} 3449}
3258EXPORT_SYMBOL_GPL(emulator_write_emulated); 3450EXPORT_SYMBOL_GPL(emulator_write_emulated);
3259 3451
3452#define CMPXCHG_TYPE(t, ptr, old, new) \
3453 (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
3454
3455#ifdef CONFIG_X86_64
3456# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new)
3457#else
3458# define CMPXCHG64(ptr, old, new) \
3459 (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
3460#endif
3461
3260static int emulator_cmpxchg_emulated(unsigned long addr, 3462static int emulator_cmpxchg_emulated(unsigned long addr,
3261 const void *old, 3463 const void *old,
3262 const void *new, 3464 const void *new,
3263 unsigned int bytes, 3465 unsigned int bytes,
3264 struct kvm_vcpu *vcpu) 3466 struct kvm_vcpu *vcpu)
3265{ 3467{
3266 printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); 3468 gpa_t gpa;
3267#ifndef CONFIG_X86_64 3469 struct page *page;
3268 /* guests cmpxchg8b have to be emulated atomically */ 3470 char *kaddr;
3269 if (bytes == 8) { 3471 bool exchanged;
3270 gpa_t gpa;
3271 struct page *page;
3272 char *kaddr;
3273 u64 val;
3274 3472
3275 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); 3473 /* guests cmpxchg8b have to be emulated atomically */
3474 if (bytes > 8 || (bytes & (bytes - 1)))
3475 goto emul_write;
3276 3476
3277 if (gpa == UNMAPPED_GVA || 3477 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
3278 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
3279 goto emul_write;
3280 3478
3281 if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK)) 3479 if (gpa == UNMAPPED_GVA ||
3282 goto emul_write; 3480 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
3481 goto emul_write;
3283 3482
3284 val = *(u64 *)new; 3483 if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
3484 goto emul_write;
3285 3485
3286 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); 3486 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
3287 3487
3288 kaddr = kmap_atomic(page, KM_USER0); 3488 kaddr = kmap_atomic(page, KM_USER0);
3289 set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); 3489 kaddr += offset_in_page(gpa);
3290 kunmap_atomic(kaddr, KM_USER0); 3490 switch (bytes) {
3291 kvm_release_page_dirty(page); 3491 case 1:
3492 exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
3493 break;
3494 case 2:
3495 exchanged = CMPXCHG_TYPE(u16, kaddr, old, new);
3496 break;
3497 case 4:
3498 exchanged = CMPXCHG_TYPE(u32, kaddr, old, new);
3499 break;
3500 case 8:
3501 exchanged = CMPXCHG64(kaddr, old, new);
3502 break;
3503 default:
3504 BUG();
3292 } 3505 }
3506 kunmap_atomic(kaddr, KM_USER0);
3507 kvm_release_page_dirty(page);
3508
3509 if (!exchanged)
3510 return X86EMUL_CMPXCHG_FAILED;
3511
3512 kvm_mmu_pte_write(vcpu, gpa, new, bytes, 1);
3513
3514 return X86EMUL_CONTINUE;
3515
3293emul_write: 3516emul_write:
3294#endif 3517 printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
3295 3518
3296 return emulator_write_emulated(addr, new, bytes, vcpu); 3519 return emulator_write_emulated(addr, new, bytes, vcpu);
3297} 3520}
3298 3521
3522static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
3523{
3524 /* TODO: String I/O for in kernel device */
3525 int r;
3526
3527 if (vcpu->arch.pio.in)
3528 r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
3529 vcpu->arch.pio.size, pd);
3530 else
3531 r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
3532 vcpu->arch.pio.port, vcpu->arch.pio.size,
3533 pd);
3534 return r;
3535}
3536
3537
3538static int emulator_pio_in_emulated(int size, unsigned short port, void *val,
3539 unsigned int count, struct kvm_vcpu *vcpu)
3540{
3541 if (vcpu->arch.pio.count)
3542 goto data_avail;
3543
3544 trace_kvm_pio(1, port, size, 1);
3545
3546 vcpu->arch.pio.port = port;
3547 vcpu->arch.pio.in = 1;
3548 vcpu->arch.pio.count = count;
3549 vcpu->arch.pio.size = size;
3550
3551 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
3552 data_avail:
3553 memcpy(val, vcpu->arch.pio_data, size * count);
3554 vcpu->arch.pio.count = 0;
3555 return 1;
3556 }
3557
3558 vcpu->run->exit_reason = KVM_EXIT_IO;
3559 vcpu->run->io.direction = KVM_EXIT_IO_IN;
3560 vcpu->run->io.size = size;
3561 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
3562 vcpu->run->io.count = count;
3563 vcpu->run->io.port = port;
3564
3565 return 0;
3566}
3567
3568static int emulator_pio_out_emulated(int size, unsigned short port,
3569 const void *val, unsigned int count,
3570 struct kvm_vcpu *vcpu)
3571{
3572 trace_kvm_pio(0, port, size, 1);
3573
3574 vcpu->arch.pio.port = port;
3575 vcpu->arch.pio.in = 0;
3576 vcpu->arch.pio.count = count;
3577 vcpu->arch.pio.size = size;
3578
3579 memcpy(vcpu->arch.pio_data, val, size * count);
3580
3581 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
3582 vcpu->arch.pio.count = 0;
3583 return 1;
3584 }
3585
3586 vcpu->run->exit_reason = KVM_EXIT_IO;
3587 vcpu->run->io.direction = KVM_EXIT_IO_OUT;
3588 vcpu->run->io.size = size;
3589 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
3590 vcpu->run->io.count = count;
3591 vcpu->run->io.port = port;
3592
3593 return 0;
3594}
3595
3299static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) 3596static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
3300{ 3597{
3301 return kvm_x86_ops->get_segment_base(vcpu, seg); 3598 return kvm_x86_ops->get_segment_base(vcpu, seg);
@@ -3316,14 +3613,14 @@ int emulate_clts(struct kvm_vcpu *vcpu)
3316 3613
3317int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) 3614int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
3318{ 3615{
3319 return kvm_x86_ops->get_dr(ctxt->vcpu, dr, dest); 3616 return kvm_get_dr(ctxt->vcpu, dr, dest);
3320} 3617}
3321 3618
3322int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) 3619int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
3323{ 3620{
3324 unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; 3621 unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U;
3325 3622
3326 return kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask); 3623 return kvm_set_dr(ctxt->vcpu, dr, value & mask);
3327} 3624}
3328 3625
3329void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) 3626void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
@@ -3344,12 +3641,167 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
3344} 3641}
3345EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); 3642EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
3346 3643
3644static u64 mk_cr_64(u64 curr_cr, u32 new_val)
3645{
3646 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
3647}
3648
3649static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu)
3650{
3651 unsigned long value;
3652
3653 switch (cr) {
3654 case 0:
3655 value = kvm_read_cr0(vcpu);
3656 break;
3657 case 2:
3658 value = vcpu->arch.cr2;
3659 break;
3660 case 3:
3661 value = vcpu->arch.cr3;
3662 break;
3663 case 4:
3664 value = kvm_read_cr4(vcpu);
3665 break;
3666 case 8:
3667 value = kvm_get_cr8(vcpu);
3668 break;
3669 default:
3670 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
3671 return 0;
3672 }
3673
3674 return value;
3675}
3676
3677static void emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu)
3678{
3679 switch (cr) {
3680 case 0:
3681 kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
3682 break;
3683 case 2:
3684 vcpu->arch.cr2 = val;
3685 break;
3686 case 3:
3687 kvm_set_cr3(vcpu, val);
3688 break;
3689 case 4:
3690 kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
3691 break;
3692 case 8:
3693 kvm_set_cr8(vcpu, val & 0xfUL);
3694 break;
3695 default:
3696 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
3697 }
3698}
3699
3700static int emulator_get_cpl(struct kvm_vcpu *vcpu)
3701{
3702 return kvm_x86_ops->get_cpl(vcpu);
3703}
3704
3705static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu)
3706{
3707 kvm_x86_ops->get_gdt(vcpu, dt);
3708}
3709
3710static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg,
3711 struct kvm_vcpu *vcpu)
3712{
3713 struct kvm_segment var;
3714
3715 kvm_get_segment(vcpu, &var, seg);
3716
3717 if (var.unusable)
3718 return false;
3719
3720 if (var.g)
3721 var.limit >>= 12;
3722 set_desc_limit(desc, var.limit);
3723 set_desc_base(desc, (unsigned long)var.base);
3724 desc->type = var.type;
3725 desc->s = var.s;
3726 desc->dpl = var.dpl;
3727 desc->p = var.present;
3728 desc->avl = var.avl;
3729 desc->l = var.l;
3730 desc->d = var.db;
3731 desc->g = var.g;
3732
3733 return true;
3734}
3735
3736static void emulator_set_cached_descriptor(struct desc_struct *desc, int seg,
3737 struct kvm_vcpu *vcpu)
3738{
3739 struct kvm_segment var;
3740
3741 /* needed to preserve selector */
3742 kvm_get_segment(vcpu, &var, seg);
3743
3744 var.base = get_desc_base(desc);
3745 var.limit = get_desc_limit(desc);
3746 if (desc->g)
3747 var.limit = (var.limit << 12) | 0xfff;
3748 var.type = desc->type;
3749 var.present = desc->p;
3750 var.dpl = desc->dpl;
3751 var.db = desc->d;
3752 var.s = desc->s;
3753 var.l = desc->l;
3754 var.g = desc->g;
3755 var.avl = desc->avl;
3756 var.present = desc->p;
3757 var.unusable = !var.present;
3758 var.padding = 0;
3759
3760 kvm_set_segment(vcpu, &var, seg);
3761 return;
3762}
3763
3764static u16 emulator_get_segment_selector(int seg, struct kvm_vcpu *vcpu)
3765{
3766 struct kvm_segment kvm_seg;
3767
3768 kvm_get_segment(vcpu, &kvm_seg, seg);
3769 return kvm_seg.selector;
3770}
3771
3772static void emulator_set_segment_selector(u16 sel, int seg,
3773 struct kvm_vcpu *vcpu)
3774{
3775 struct kvm_segment kvm_seg;
3776
3777 kvm_get_segment(vcpu, &kvm_seg, seg);
3778 kvm_seg.selector = sel;
3779 kvm_set_segment(vcpu, &kvm_seg, seg);
3780}
3781
3782static void emulator_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
3783{
3784 kvm_x86_ops->set_rflags(vcpu, rflags);
3785}
3786
3347static struct x86_emulate_ops emulate_ops = { 3787static struct x86_emulate_ops emulate_ops = {
3348 .read_std = kvm_read_guest_virt_system, 3788 .read_std = kvm_read_guest_virt_system,
3789 .write_std = kvm_write_guest_virt_system,
3349 .fetch = kvm_fetch_guest_virt, 3790 .fetch = kvm_fetch_guest_virt,
3350 .read_emulated = emulator_read_emulated, 3791 .read_emulated = emulator_read_emulated,
3351 .write_emulated = emulator_write_emulated, 3792 .write_emulated = emulator_write_emulated,
3352 .cmpxchg_emulated = emulator_cmpxchg_emulated, 3793 .cmpxchg_emulated = emulator_cmpxchg_emulated,
3794 .pio_in_emulated = emulator_pio_in_emulated,
3795 .pio_out_emulated = emulator_pio_out_emulated,
3796 .get_cached_descriptor = emulator_get_cached_descriptor,
3797 .set_cached_descriptor = emulator_set_cached_descriptor,
3798 .get_segment_selector = emulator_get_segment_selector,
3799 .set_segment_selector = emulator_set_segment_selector,
3800 .get_gdt = emulator_get_gdt,
3801 .get_cr = emulator_get_cr,
3802 .set_cr = emulator_set_cr,
3803 .cpl = emulator_get_cpl,
3804 .set_rflags = emulator_set_rflags,
3353}; 3805};
3354 3806
3355static void cache_all_regs(struct kvm_vcpu *vcpu) 3807static void cache_all_regs(struct kvm_vcpu *vcpu)
@@ -3380,14 +3832,14 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
3380 cache_all_regs(vcpu); 3832 cache_all_regs(vcpu);
3381 3833
3382 vcpu->mmio_is_write = 0; 3834 vcpu->mmio_is_write = 0;
3383 vcpu->arch.pio.string = 0;
3384 3835
3385 if (!(emulation_type & EMULTYPE_NO_DECODE)) { 3836 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
3386 int cs_db, cs_l; 3837 int cs_db, cs_l;
3387 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 3838 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
3388 3839
3389 vcpu->arch.emulate_ctxt.vcpu = vcpu; 3840 vcpu->arch.emulate_ctxt.vcpu = vcpu;
3390 vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); 3841 vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
3842 vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu);
3391 vcpu->arch.emulate_ctxt.mode = 3843 vcpu->arch.emulate_ctxt.mode =
3392 (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : 3844 (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
3393 (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) 3845 (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
@@ -3396,6 +3848,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
3396 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; 3848 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
3397 3849
3398 r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); 3850 r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
3851 trace_kvm_emulate_insn_start(vcpu);
3399 3852
3400 /* Only allow emulation of specific instructions on #UD 3853 /* Only allow emulation of specific instructions on #UD
3401 * (namely VMMCALL, sysenter, sysexit, syscall)*/ 3854 * (namely VMMCALL, sysenter, sysexit, syscall)*/
@@ -3428,6 +3881,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
3428 ++vcpu->stat.insn_emulation; 3881 ++vcpu->stat.insn_emulation;
3429 if (r) { 3882 if (r) {
3430 ++vcpu->stat.insn_emulation_fail; 3883 ++vcpu->stat.insn_emulation_fail;
3884 trace_kvm_emulate_insn_failed(vcpu);
3431 if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) 3885 if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
3432 return EMULATE_DONE; 3886 return EMULATE_DONE;
3433 return EMULATE_FAIL; 3887 return EMULATE_FAIL;
@@ -3439,16 +3893,20 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
3439 return EMULATE_DONE; 3893 return EMULATE_DONE;
3440 } 3894 }
3441 3895
3896restart:
3442 r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); 3897 r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
3443 shadow_mask = vcpu->arch.emulate_ctxt.interruptibility; 3898 shadow_mask = vcpu->arch.emulate_ctxt.interruptibility;
3444 3899
3445 if (r == 0) 3900 if (r == 0)
3446 kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask); 3901 kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask);
3447 3902
3448 if (vcpu->arch.pio.string) 3903 if (vcpu->arch.pio.count) {
3904 if (!vcpu->arch.pio.in)
3905 vcpu->arch.pio.count = 0;
3449 return EMULATE_DO_MMIO; 3906 return EMULATE_DO_MMIO;
3907 }
3450 3908
3451 if ((r || vcpu->mmio_is_write) && run) { 3909 if (r || vcpu->mmio_is_write) {
3452 run->exit_reason = KVM_EXIT_MMIO; 3910 run->exit_reason = KVM_EXIT_MMIO;
3453 run->mmio.phys_addr = vcpu->mmio_phys_addr; 3911 run->mmio.phys_addr = vcpu->mmio_phys_addr;
3454 memcpy(run->mmio.data, vcpu->mmio_data, 8); 3912 memcpy(run->mmio.data, vcpu->mmio_data, 8);
@@ -3458,222 +3916,41 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
3458 3916
3459 if (r) { 3917 if (r) {
3460 if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) 3918 if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
3461 return EMULATE_DONE; 3919 goto done;
3462 if (!vcpu->mmio_needed) { 3920 if (!vcpu->mmio_needed) {
3921 ++vcpu->stat.insn_emulation_fail;
3922 trace_kvm_emulate_insn_failed(vcpu);
3463 kvm_report_emulation_failure(vcpu, "mmio"); 3923 kvm_report_emulation_failure(vcpu, "mmio");
3464 return EMULATE_FAIL; 3924 return EMULATE_FAIL;
3465 } 3925 }
3466 return EMULATE_DO_MMIO; 3926 return EMULATE_DO_MMIO;
3467 } 3927 }
3468 3928
3469 kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
3470
3471 if (vcpu->mmio_is_write) { 3929 if (vcpu->mmio_is_write) {
3472 vcpu->mmio_needed = 0; 3930 vcpu->mmio_needed = 0;
3473 return EMULATE_DO_MMIO; 3931 return EMULATE_DO_MMIO;
3474 } 3932 }
3475 3933
3476 return EMULATE_DONE; 3934done:
3477} 3935 if (vcpu->arch.exception.pending)
3478EXPORT_SYMBOL_GPL(emulate_instruction); 3936 vcpu->arch.emulate_ctxt.restart = false;
3479
3480static int pio_copy_data(struct kvm_vcpu *vcpu)
3481{
3482 void *p = vcpu->arch.pio_data;
3483 gva_t q = vcpu->arch.pio.guest_gva;
3484 unsigned bytes;
3485 int ret;
3486 u32 error_code;
3487
3488 bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count;
3489 if (vcpu->arch.pio.in)
3490 ret = kvm_write_guest_virt(q, p, bytes, vcpu, &error_code);
3491 else
3492 ret = kvm_read_guest_virt(q, p, bytes, vcpu, &error_code);
3493
3494 if (ret == X86EMUL_PROPAGATE_FAULT)
3495 kvm_inject_page_fault(vcpu, q, error_code);
3496
3497 return ret;
3498}
3499
3500int complete_pio(struct kvm_vcpu *vcpu)
3501{
3502 struct kvm_pio_request *io = &vcpu->arch.pio;
3503 long delta;
3504 int r;
3505 unsigned long val;
3506
3507 if (!io->string) {
3508 if (io->in) {
3509 val = kvm_register_read(vcpu, VCPU_REGS_RAX);
3510 memcpy(&val, vcpu->arch.pio_data, io->size);
3511 kvm_register_write(vcpu, VCPU_REGS_RAX, val);
3512 }
3513 } else {
3514 if (io->in) {
3515 r = pio_copy_data(vcpu);
3516 if (r)
3517 goto out;
3518 }
3519
3520 delta = 1;
3521 if (io->rep) {
3522 delta *= io->cur_count;
3523 /*
3524 * The size of the register should really depend on
3525 * current address size.
3526 */
3527 val = kvm_register_read(vcpu, VCPU_REGS_RCX);
3528 val -= delta;
3529 kvm_register_write(vcpu, VCPU_REGS_RCX, val);
3530 }
3531 if (io->down)
3532 delta = -delta;
3533 delta *= io->size;
3534 if (io->in) {
3535 val = kvm_register_read(vcpu, VCPU_REGS_RDI);
3536 val += delta;
3537 kvm_register_write(vcpu, VCPU_REGS_RDI, val);
3538 } else {
3539 val = kvm_register_read(vcpu, VCPU_REGS_RSI);
3540 val += delta;
3541 kvm_register_write(vcpu, VCPU_REGS_RSI, val);
3542 }
3543 }
3544out:
3545 io->count -= io->cur_count;
3546 io->cur_count = 0;
3547
3548 return 0;
3549}
3550
3551static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
3552{
3553 /* TODO: String I/O for in kernel device */
3554 int r;
3555
3556 if (vcpu->arch.pio.in)
3557 r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
3558 vcpu->arch.pio.size, pd);
3559 else
3560 r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
3561 vcpu->arch.pio.port, vcpu->arch.pio.size,
3562 pd);
3563 return r;
3564}
3565 3937
3566static int pio_string_write(struct kvm_vcpu *vcpu) 3938 if (vcpu->arch.emulate_ctxt.restart)
3567{ 3939 goto restart;
3568 struct kvm_pio_request *io = &vcpu->arch.pio;
3569 void *pd = vcpu->arch.pio_data;
3570 int i, r = 0;
3571 3940
3572 for (i = 0; i < io->cur_count; i++) { 3941 return EMULATE_DONE;
3573 if (kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
3574 io->port, io->size, pd)) {
3575 r = -EOPNOTSUPP;
3576 break;
3577 }
3578 pd += io->size;
3579 }
3580 return r;
3581}
3582
3583int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port)
3584{
3585 unsigned long val;
3586
3587 trace_kvm_pio(!in, port, size, 1);
3588
3589 vcpu->run->exit_reason = KVM_EXIT_IO;
3590 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
3591 vcpu->run->io.size = vcpu->arch.pio.size = size;
3592 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
3593 vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = 1;
3594 vcpu->run->io.port = vcpu->arch.pio.port = port;
3595 vcpu->arch.pio.in = in;
3596 vcpu->arch.pio.string = 0;
3597 vcpu->arch.pio.down = 0;
3598 vcpu->arch.pio.rep = 0;
3599
3600 if (!vcpu->arch.pio.in) {
3601 val = kvm_register_read(vcpu, VCPU_REGS_RAX);
3602 memcpy(vcpu->arch.pio_data, &val, 4);
3603 }
3604
3605 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
3606 complete_pio(vcpu);
3607 return 1;
3608 }
3609 return 0;
3610} 3942}
3611EXPORT_SYMBOL_GPL(kvm_emulate_pio); 3943EXPORT_SYMBOL_GPL(emulate_instruction);
3612 3944
3613int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, 3945int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
3614 int size, unsigned long count, int down,
3615 gva_t address, int rep, unsigned port)
3616{ 3946{
3617 unsigned now, in_page; 3947 unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
3618 int ret = 0; 3948 int ret = emulator_pio_out_emulated(size, port, &val, 1, vcpu);
3619 3949 /* do not return to emulator after return from userspace */
3620 trace_kvm_pio(!in, port, size, count); 3950 vcpu->arch.pio.count = 0;
3621
3622 vcpu->run->exit_reason = KVM_EXIT_IO;
3623 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
3624 vcpu->run->io.size = vcpu->arch.pio.size = size;
3625 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
3626 vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = count;
3627 vcpu->run->io.port = vcpu->arch.pio.port = port;
3628 vcpu->arch.pio.in = in;
3629 vcpu->arch.pio.string = 1;
3630 vcpu->arch.pio.down = down;
3631 vcpu->arch.pio.rep = rep;
3632
3633 if (!count) {
3634 kvm_x86_ops->skip_emulated_instruction(vcpu);
3635 return 1;
3636 }
3637
3638 if (!down)
3639 in_page = PAGE_SIZE - offset_in_page(address);
3640 else
3641 in_page = offset_in_page(address) + size;
3642 now = min(count, (unsigned long)in_page / size);
3643 if (!now)
3644 now = 1;
3645 if (down) {
3646 /*
3647 * String I/O in reverse. Yuck. Kill the guest, fix later.
3648 */
3649 pr_unimpl(vcpu, "guest string pio down\n");
3650 kvm_inject_gp(vcpu, 0);
3651 return 1;
3652 }
3653 vcpu->run->io.count = now;
3654 vcpu->arch.pio.cur_count = now;
3655
3656 if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count)
3657 kvm_x86_ops->skip_emulated_instruction(vcpu);
3658
3659 vcpu->arch.pio.guest_gva = address;
3660
3661 if (!vcpu->arch.pio.in) {
3662 /* string PIO write */
3663 ret = pio_copy_data(vcpu);
3664 if (ret == X86EMUL_PROPAGATE_FAULT)
3665 return 1;
3666 if (ret == 0 && !pio_string_write(vcpu)) {
3667 complete_pio(vcpu);
3668 if (vcpu->arch.pio.count == 0)
3669 ret = 1;
3670 }
3671 }
3672 /* no string PIO read support yet */
3673
3674 return ret; 3951 return ret;
3675} 3952}
3676EXPORT_SYMBOL_GPL(kvm_emulate_pio_string); 3953EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
3677 3954
3678static void bounce_off(void *info) 3955static void bounce_off(void *info)
3679{ 3956{
@@ -3996,85 +4273,20 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
3996 return emulator_write_emulated(rip, instruction, 3, vcpu); 4273 return emulator_write_emulated(rip, instruction, 3, vcpu);
3997} 4274}
3998 4275
3999static u64 mk_cr_64(u64 curr_cr, u32 new_val)
4000{
4001 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
4002}
4003
4004void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) 4276void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
4005{ 4277{
4006 struct descriptor_table dt = { limit, base }; 4278 struct desc_ptr dt = { limit, base };
4007 4279
4008 kvm_x86_ops->set_gdt(vcpu, &dt); 4280 kvm_x86_ops->set_gdt(vcpu, &dt);
4009} 4281}
4010 4282
4011void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) 4283void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
4012{ 4284{
4013 struct descriptor_table dt = { limit, base }; 4285 struct desc_ptr dt = { limit, base };
4014 4286
4015 kvm_x86_ops->set_idt(vcpu, &dt); 4287 kvm_x86_ops->set_idt(vcpu, &dt);
4016} 4288}
4017 4289
4018void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
4019 unsigned long *rflags)
4020{
4021 kvm_lmsw(vcpu, msw);
4022 *rflags = kvm_get_rflags(vcpu);
4023}
4024
4025unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
4026{
4027 unsigned long value;
4028
4029 switch (cr) {
4030 case 0:
4031 value = kvm_read_cr0(vcpu);
4032 break;
4033 case 2:
4034 value = vcpu->arch.cr2;
4035 break;
4036 case 3:
4037 value = vcpu->arch.cr3;
4038 break;
4039 case 4:
4040 value = kvm_read_cr4(vcpu);
4041 break;
4042 case 8:
4043 value = kvm_get_cr8(vcpu);
4044 break;
4045 default:
4046 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
4047 return 0;
4048 }
4049
4050 return value;
4051}
4052
4053void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
4054 unsigned long *rflags)
4055{
4056 switch (cr) {
4057 case 0:
4058 kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
4059 *rflags = kvm_get_rflags(vcpu);
4060 break;
4061 case 2:
4062 vcpu->arch.cr2 = val;
4063 break;
4064 case 3:
4065 kvm_set_cr3(vcpu, val);
4066 break;
4067 case 4:
4068 kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
4069 break;
4070 case 8:
4071 kvm_set_cr8(vcpu, val & 0xfUL);
4072 break;
4073 default:
4074 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
4075 }
4076}
4077
4078static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) 4290static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
4079{ 4291{
4080 struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i]; 4292 struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i];
@@ -4138,9 +4350,13 @@ int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
4138{ 4350{
4139 struct kvm_cpuid_entry2 *best; 4351 struct kvm_cpuid_entry2 *best;
4140 4352
4353 best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
4354 if (!best || best->eax < 0x80000008)
4355 goto not_found;
4141 best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); 4356 best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
4142 if (best) 4357 if (best)
4143 return best->eax & 0xff; 4358 return best->eax & 0xff;
4359not_found:
4144 return 36; 4360 return 36;
4145} 4361}
4146 4362
@@ -4254,9 +4470,13 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
4254{ 4470{
4255 /* try to reinject previous events if any */ 4471 /* try to reinject previous events if any */
4256 if (vcpu->arch.exception.pending) { 4472 if (vcpu->arch.exception.pending) {
4473 trace_kvm_inj_exception(vcpu->arch.exception.nr,
4474 vcpu->arch.exception.has_error_code,
4475 vcpu->arch.exception.error_code);
4257 kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, 4476 kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
4258 vcpu->arch.exception.has_error_code, 4477 vcpu->arch.exception.has_error_code,
4259 vcpu->arch.exception.error_code); 4478 vcpu->arch.exception.error_code,
4479 vcpu->arch.exception.reinject);
4260 return; 4480 return;
4261 } 4481 }
4262 4482
@@ -4486,7 +4706,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
4486 } 4706 }
4487 4707
4488 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); 4708 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
4489 post_kvm_run_save(vcpu);
4490 4709
4491 vapic_exit(vcpu); 4710 vapic_exit(vcpu);
4492 4711
@@ -4514,26 +4733,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4514 if (!irqchip_in_kernel(vcpu->kvm)) 4733 if (!irqchip_in_kernel(vcpu->kvm))
4515 kvm_set_cr8(vcpu, kvm_run->cr8); 4734 kvm_set_cr8(vcpu, kvm_run->cr8);
4516 4735
4517 if (vcpu->arch.pio.cur_count) { 4736 if (vcpu->arch.pio.count || vcpu->mmio_needed ||
4518 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4737 vcpu->arch.emulate_ctxt.restart) {
4519 r = complete_pio(vcpu); 4738 if (vcpu->mmio_needed) {
4520 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 4739 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
4521 if (r) 4740 vcpu->mmio_read_completed = 1;
4522 goto out; 4741 vcpu->mmio_needed = 0;
4523 } 4742 }
4524 if (vcpu->mmio_needed) {
4525 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
4526 vcpu->mmio_read_completed = 1;
4527 vcpu->mmio_needed = 0;
4528
4529 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 4743 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4530 r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0, 4744 r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE);
4531 EMULTYPE_NO_DECODE);
4532 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 4745 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4533 if (r == EMULATE_DO_MMIO) { 4746 if (r == EMULATE_DO_MMIO) {
4534 /*
4535 * Read-modify-write. Back to userspace.
4536 */
4537 r = 0; 4747 r = 0;
4538 goto out; 4748 goto out;
4539 } 4749 }
@@ -4545,6 +4755,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4545 r = __vcpu_run(vcpu); 4755 r = __vcpu_run(vcpu);
4546 4756
4547out: 4757out:
4758 post_kvm_run_save(vcpu);
4548 if (vcpu->sigset_active) 4759 if (vcpu->sigset_active)
4549 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 4760 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
4550 4761
@@ -4616,12 +4827,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4616 return 0; 4827 return 0;
4617} 4828}
4618 4829
4619void kvm_get_segment(struct kvm_vcpu *vcpu,
4620 struct kvm_segment *var, int seg)
4621{
4622 kvm_x86_ops->get_segment(vcpu, var, seg);
4623}
4624
4625void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) 4830void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
4626{ 4831{
4627 struct kvm_segment cs; 4832 struct kvm_segment cs;
@@ -4635,7 +4840,7 @@ EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
4635int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 4840int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
4636 struct kvm_sregs *sregs) 4841 struct kvm_sregs *sregs)
4637{ 4842{
4638 struct descriptor_table dt; 4843 struct desc_ptr dt;
4639 4844
4640 vcpu_load(vcpu); 4845 vcpu_load(vcpu);
4641 4846
@@ -4650,11 +4855,11 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
4650 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); 4855 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
4651 4856
4652 kvm_x86_ops->get_idt(vcpu, &dt); 4857 kvm_x86_ops->get_idt(vcpu, &dt);
4653 sregs->idt.limit = dt.limit; 4858 sregs->idt.limit = dt.size;
4654 sregs->idt.base = dt.base; 4859 sregs->idt.base = dt.address;
4655 kvm_x86_ops->get_gdt(vcpu, &dt); 4860 kvm_x86_ops->get_gdt(vcpu, &dt);
4656 sregs->gdt.limit = dt.limit; 4861 sregs->gdt.limit = dt.size;
4657 sregs->gdt.base = dt.base; 4862 sregs->gdt.base = dt.address;
4658 4863
4659 sregs->cr0 = kvm_read_cr0(vcpu); 4864 sregs->cr0 = kvm_read_cr0(vcpu);
4660 sregs->cr2 = vcpu->arch.cr2; 4865 sregs->cr2 = vcpu->arch.cr2;
@@ -4693,563 +4898,33 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
4693 return 0; 4898 return 0;
4694} 4899}
4695 4900
4696static void kvm_set_segment(struct kvm_vcpu *vcpu, 4901int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,
4697 struct kvm_segment *var, int seg) 4902 bool has_error_code, u32 error_code)
4698{
4699 kvm_x86_ops->set_segment(vcpu, var, seg);
4700}
4701
4702static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
4703 struct kvm_segment *kvm_desct)
4704{
4705 kvm_desct->base = get_desc_base(seg_desc);
4706 kvm_desct->limit = get_desc_limit(seg_desc);
4707 if (seg_desc->g) {
4708 kvm_desct->limit <<= 12;
4709 kvm_desct->limit |= 0xfff;
4710 }
4711 kvm_desct->selector = selector;
4712 kvm_desct->type = seg_desc->type;
4713 kvm_desct->present = seg_desc->p;
4714 kvm_desct->dpl = seg_desc->dpl;
4715 kvm_desct->db = seg_desc->d;
4716 kvm_desct->s = seg_desc->s;
4717 kvm_desct->l = seg_desc->l;
4718 kvm_desct->g = seg_desc->g;
4719 kvm_desct->avl = seg_desc->avl;
4720 if (!selector)
4721 kvm_desct->unusable = 1;
4722 else
4723 kvm_desct->unusable = 0;
4724 kvm_desct->padding = 0;
4725}
4726
4727static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu,
4728 u16 selector,
4729 struct descriptor_table *dtable)
4730{
4731 if (selector & 1 << 2) {
4732 struct kvm_segment kvm_seg;
4733
4734 kvm_get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR);
4735
4736 if (kvm_seg.unusable)
4737 dtable->limit = 0;
4738 else
4739 dtable->limit = kvm_seg.limit;
4740 dtable->base = kvm_seg.base;
4741 }
4742 else
4743 kvm_x86_ops->get_gdt(vcpu, dtable);
4744}
4745
4746/* allowed just for 8 bytes segments */
4747static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
4748 struct desc_struct *seg_desc)
4749{
4750 struct descriptor_table dtable;
4751 u16 index = selector >> 3;
4752 int ret;
4753 u32 err;
4754 gva_t addr;
4755
4756 get_segment_descriptor_dtable(vcpu, selector, &dtable);
4757
4758 if (dtable.limit < index * 8 + 7) {
4759 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
4760 return X86EMUL_PROPAGATE_FAULT;
4761 }
4762 addr = dtable.base + index * 8;
4763 ret = kvm_read_guest_virt_system(addr, seg_desc, sizeof(*seg_desc),
4764 vcpu, &err);
4765 if (ret == X86EMUL_PROPAGATE_FAULT)
4766 kvm_inject_page_fault(vcpu, addr, err);
4767
4768 return ret;
4769}
4770
4771/* allowed just for 8 bytes segments */
4772static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
4773 struct desc_struct *seg_desc)
4774{
4775 struct descriptor_table dtable;
4776 u16 index = selector >> 3;
4777
4778 get_segment_descriptor_dtable(vcpu, selector, &dtable);
4779
4780 if (dtable.limit < index * 8 + 7)
4781 return 1;
4782 return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu, NULL);
4783}
4784
4785static gpa_t get_tss_base_addr_write(struct kvm_vcpu *vcpu,
4786 struct desc_struct *seg_desc)
4787{
4788 u32 base_addr = get_desc_base(seg_desc);
4789
4790 return kvm_mmu_gva_to_gpa_write(vcpu, base_addr, NULL);
4791}
4792
4793static gpa_t get_tss_base_addr_read(struct kvm_vcpu *vcpu,
4794 struct desc_struct *seg_desc)
4795{
4796 u32 base_addr = get_desc_base(seg_desc);
4797
4798 return kvm_mmu_gva_to_gpa_read(vcpu, base_addr, NULL);
4799}
4800
4801static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
4802{
4803 struct kvm_segment kvm_seg;
4804
4805 kvm_get_segment(vcpu, &kvm_seg, seg);
4806 return kvm_seg.selector;
4807}
4808
4809static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg)
4810{
4811 struct kvm_segment segvar = {
4812 .base = selector << 4,
4813 .limit = 0xffff,
4814 .selector = selector,
4815 .type = 3,
4816 .present = 1,
4817 .dpl = 3,
4818 .db = 0,
4819 .s = 1,
4820 .l = 0,
4821 .g = 0,
4822 .avl = 0,
4823 .unusable = 0,
4824 };
4825 kvm_x86_ops->set_segment(vcpu, &segvar, seg);
4826 return X86EMUL_CONTINUE;
4827}
4828
4829static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg)
4830{ 4903{
4831 return (seg != VCPU_SREG_LDTR) && 4904 int cs_db, cs_l, ret;
4832 (seg != VCPU_SREG_TR) && 4905 cache_all_regs(vcpu);
4833 (kvm_get_rflags(vcpu) & X86_EFLAGS_VM);
4834}
4835
4836int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg)
4837{
4838 struct kvm_segment kvm_seg;
4839 struct desc_struct seg_desc;
4840 u8 dpl, rpl, cpl;
4841 unsigned err_vec = GP_VECTOR;
4842 u32 err_code = 0;
4843 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
4844 int ret;
4845 4906
4846 if (is_vm86_segment(vcpu, seg) || !is_protmode(vcpu)) 4907 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
4847 return kvm_load_realmode_segment(vcpu, selector, seg);
4848 4908
4849 /* NULL selector is not valid for TR, CS and SS */ 4909 vcpu->arch.emulate_ctxt.vcpu = vcpu;
4850 if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR) 4910 vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
4851 && null_selector) 4911 vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu);
4852 goto exception; 4912 vcpu->arch.emulate_ctxt.mode =
4913 (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
4914 (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
4915 ? X86EMUL_MODE_VM86 : cs_l
4916 ? X86EMUL_MODE_PROT64 : cs_db
4917 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
4853 4918
4854 /* TR should be in GDT only */ 4919 ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, &emulate_ops,
4855 if (seg == VCPU_SREG_TR && (selector & (1 << 2))) 4920 tss_selector, reason, has_error_code,
4856 goto exception; 4921 error_code);
4857 4922
4858 ret = load_guest_segment_descriptor(vcpu, selector, &seg_desc);
4859 if (ret) 4923 if (ret)
4860 return ret; 4924 return EMULATE_FAIL;
4861
4862 seg_desct_to_kvm_desct(&seg_desc, selector, &kvm_seg);
4863
4864 if (null_selector) { /* for NULL selector skip all following checks */
4865 kvm_seg.unusable = 1;
4866 goto load;
4867 }
4868
4869 err_code = selector & 0xfffc;
4870 err_vec = GP_VECTOR;
4871
4872 /* can't load system descriptor into segment selecor */
4873 if (seg <= VCPU_SREG_GS && !kvm_seg.s)
4874 goto exception;
4875
4876 if (!kvm_seg.present) {
4877 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
4878 goto exception;
4879 }
4880
4881 rpl = selector & 3;
4882 dpl = kvm_seg.dpl;
4883 cpl = kvm_x86_ops->get_cpl(vcpu);
4884
4885 switch (seg) {
4886 case VCPU_SREG_SS:
4887 /*
4888 * segment is not a writable data segment or segment
4889 * selector's RPL != CPL or segment selector's RPL != CPL
4890 */
4891 if (rpl != cpl || (kvm_seg.type & 0xa) != 0x2 || dpl != cpl)
4892 goto exception;
4893 break;
4894 case VCPU_SREG_CS:
4895 if (!(kvm_seg.type & 8))
4896 goto exception;
4897
4898 if (kvm_seg.type & 4) {
4899 /* conforming */
4900 if (dpl > cpl)
4901 goto exception;
4902 } else {
4903 /* nonconforming */
4904 if (rpl > cpl || dpl != cpl)
4905 goto exception;
4906 }
4907 /* CS(RPL) <- CPL */
4908 selector = (selector & 0xfffc) | cpl;
4909 break;
4910 case VCPU_SREG_TR:
4911 if (kvm_seg.s || (kvm_seg.type != 1 && kvm_seg.type != 9))
4912 goto exception;
4913 break;
4914 case VCPU_SREG_LDTR:
4915 if (kvm_seg.s || kvm_seg.type != 2)
4916 goto exception;
4917 break;
4918 default: /* DS, ES, FS, or GS */
4919 /*
4920 * segment is not a data or readable code segment or
4921 * ((segment is a data or nonconforming code segment)
4922 * and (both RPL and CPL > DPL))
4923 */
4924 if ((kvm_seg.type & 0xa) == 0x8 ||
4925 (((kvm_seg.type & 0xc) != 0xc) && (rpl > dpl && cpl > dpl)))
4926 goto exception;
4927 break;
4928 }
4929
4930 if (!kvm_seg.unusable && kvm_seg.s) {
4931 /* mark segment as accessed */
4932 kvm_seg.type |= 1;
4933 seg_desc.type |= 1;
4934 save_guest_segment_descriptor(vcpu, selector, &seg_desc);
4935 }
4936load:
4937 kvm_set_segment(vcpu, &kvm_seg, seg);
4938 return X86EMUL_CONTINUE;
4939exception:
4940 kvm_queue_exception_e(vcpu, err_vec, err_code);
4941 return X86EMUL_PROPAGATE_FAULT;
4942}
4943
4944static void save_state_to_tss32(struct kvm_vcpu *vcpu,
4945 struct tss_segment_32 *tss)
4946{
4947 tss->cr3 = vcpu->arch.cr3;
4948 tss->eip = kvm_rip_read(vcpu);
4949 tss->eflags = kvm_get_rflags(vcpu);
4950 tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
4951 tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
4952 tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX);
4953 tss->ebx = kvm_register_read(vcpu, VCPU_REGS_RBX);
4954 tss->esp = kvm_register_read(vcpu, VCPU_REGS_RSP);
4955 tss->ebp = kvm_register_read(vcpu, VCPU_REGS_RBP);
4956 tss->esi = kvm_register_read(vcpu, VCPU_REGS_RSI);
4957 tss->edi = kvm_register_read(vcpu, VCPU_REGS_RDI);
4958 tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
4959 tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
4960 tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
4961 tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
4962 tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS);
4963 tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS);
4964 tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR);
4965}
4966
4967static void kvm_load_segment_selector(struct kvm_vcpu *vcpu, u16 sel, int seg)
4968{
4969 struct kvm_segment kvm_seg;
4970 kvm_get_segment(vcpu, &kvm_seg, seg);
4971 kvm_seg.selector = sel;
4972 kvm_set_segment(vcpu, &kvm_seg, seg);
4973}
4974
4975static int load_state_from_tss32(struct kvm_vcpu *vcpu,
4976 struct tss_segment_32 *tss)
4977{
4978 kvm_set_cr3(vcpu, tss->cr3);
4979
4980 kvm_rip_write(vcpu, tss->eip);
4981 kvm_set_rflags(vcpu, tss->eflags | 2);
4982
4983 kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax);
4984 kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx);
4985 kvm_register_write(vcpu, VCPU_REGS_RDX, tss->edx);
4986 kvm_register_write(vcpu, VCPU_REGS_RBX, tss->ebx);
4987 kvm_register_write(vcpu, VCPU_REGS_RSP, tss->esp);
4988 kvm_register_write(vcpu, VCPU_REGS_RBP, tss->ebp);
4989 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi);
4990 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi);
4991
4992 /*
4993 * SDM says that segment selectors are loaded before segment
4994 * descriptors
4995 */
4996 kvm_load_segment_selector(vcpu, tss->ldt_selector, VCPU_SREG_LDTR);
4997 kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES);
4998 kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS);
4999 kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS);
5000 kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS);
5001 kvm_load_segment_selector(vcpu, tss->fs, VCPU_SREG_FS);
5002 kvm_load_segment_selector(vcpu, tss->gs, VCPU_SREG_GS);
5003
5004 /*
5005 * Now load segment descriptors. If fault happenes at this stage
5006 * it is handled in a context of new task
5007 */
5008 if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, VCPU_SREG_LDTR))
5009 return 1;
5010
5011 if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES))
5012 return 1;
5013 4925
5014 if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) 4926 kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
5015 return 1; 4927 return EMULATE_DONE;
5016
5017 if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS))
5018 return 1;
5019
5020 if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS))
5021 return 1;
5022
5023 if (kvm_load_segment_descriptor(vcpu, tss->fs, VCPU_SREG_FS))
5024 return 1;
5025
5026 if (kvm_load_segment_descriptor(vcpu, tss->gs, VCPU_SREG_GS))
5027 return 1;
5028 return 0;
5029}
5030
5031static void save_state_to_tss16(struct kvm_vcpu *vcpu,
5032 struct tss_segment_16 *tss)
5033{
5034 tss->ip = kvm_rip_read(vcpu);
5035 tss->flag = kvm_get_rflags(vcpu);
5036 tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX);
5037 tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX);
5038 tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX);
5039 tss->bx = kvm_register_read(vcpu, VCPU_REGS_RBX);
5040 tss->sp = kvm_register_read(vcpu, VCPU_REGS_RSP);
5041 tss->bp = kvm_register_read(vcpu, VCPU_REGS_RBP);
5042 tss->si = kvm_register_read(vcpu, VCPU_REGS_RSI);
5043 tss->di = kvm_register_read(vcpu, VCPU_REGS_RDI);
5044
5045 tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
5046 tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
5047 tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
5048 tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
5049 tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR);
5050}
5051
5052static int load_state_from_tss16(struct kvm_vcpu *vcpu,
5053 struct tss_segment_16 *tss)
5054{
5055 kvm_rip_write(vcpu, tss->ip);
5056 kvm_set_rflags(vcpu, tss->flag | 2);
5057 kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax);
5058 kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx);
5059 kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx);
5060 kvm_register_write(vcpu, VCPU_REGS_RBX, tss->bx);
5061 kvm_register_write(vcpu, VCPU_REGS_RSP, tss->sp);
5062 kvm_register_write(vcpu, VCPU_REGS_RBP, tss->bp);
5063 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si);
5064 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di);
5065
5066 /*
5067 * SDM says that segment selectors are loaded before segment
5068 * descriptors
5069 */
5070 kvm_load_segment_selector(vcpu, tss->ldt, VCPU_SREG_LDTR);
5071 kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES);
5072 kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS);
5073 kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS);
5074 kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS);
5075
5076 /*
5077 * Now load segment descriptors. If fault happenes at this stage
5078 * it is handled in a context of new task
5079 */
5080 if (kvm_load_segment_descriptor(vcpu, tss->ldt, VCPU_SREG_LDTR))
5081 return 1;
5082
5083 if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES))
5084 return 1;
5085
5086 if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS))
5087 return 1;
5088
5089 if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS))
5090 return 1;
5091
5092 if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS))
5093 return 1;
5094 return 0;
5095}
5096
5097static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
5098 u16 old_tss_sel, u32 old_tss_base,
5099 struct desc_struct *nseg_desc)
5100{
5101 struct tss_segment_16 tss_segment_16;
5102 int ret = 0;
5103
5104 if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_16,
5105 sizeof tss_segment_16))
5106 goto out;
5107
5108 save_state_to_tss16(vcpu, &tss_segment_16);
5109
5110 if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_16,
5111 sizeof tss_segment_16))
5112 goto out;
5113
5114 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc),
5115 &tss_segment_16, sizeof tss_segment_16))
5116 goto out;
5117
5118 if (old_tss_sel != 0xffff) {
5119 tss_segment_16.prev_task_link = old_tss_sel;
5120
5121 if (kvm_write_guest(vcpu->kvm,
5122 get_tss_base_addr_write(vcpu, nseg_desc),
5123 &tss_segment_16.prev_task_link,
5124 sizeof tss_segment_16.prev_task_link))
5125 goto out;
5126 }
5127
5128 if (load_state_from_tss16(vcpu, &tss_segment_16))
5129 goto out;
5130
5131 ret = 1;
5132out:
5133 return ret;
5134}
5135
5136static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
5137 u16 old_tss_sel, u32 old_tss_base,
5138 struct desc_struct *nseg_desc)
5139{
5140 struct tss_segment_32 tss_segment_32;
5141 int ret = 0;
5142
5143 if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_32,
5144 sizeof tss_segment_32))
5145 goto out;
5146
5147 save_state_to_tss32(vcpu, &tss_segment_32);
5148
5149 if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_32,
5150 sizeof tss_segment_32))
5151 goto out;
5152
5153 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc),
5154 &tss_segment_32, sizeof tss_segment_32))
5155 goto out;
5156
5157 if (old_tss_sel != 0xffff) {
5158 tss_segment_32.prev_task_link = old_tss_sel;
5159
5160 if (kvm_write_guest(vcpu->kvm,
5161 get_tss_base_addr_write(vcpu, nseg_desc),
5162 &tss_segment_32.prev_task_link,
5163 sizeof tss_segment_32.prev_task_link))
5164 goto out;
5165 }
5166
5167 if (load_state_from_tss32(vcpu, &tss_segment_32))
5168 goto out;
5169
5170 ret = 1;
5171out:
5172 return ret;
5173}
5174
5175int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
5176{
5177 struct kvm_segment tr_seg;
5178 struct desc_struct cseg_desc;
5179 struct desc_struct nseg_desc;
5180 int ret = 0;
5181 u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR);
5182 u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR);
5183 u32 desc_limit;
5184
5185 old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL);
5186
5187 /* FIXME: Handle errors. Failure to read either TSS or their
5188 * descriptors should generate a pagefault.
5189 */
5190 if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc))
5191 goto out;
5192
5193 if (load_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc))
5194 goto out;
5195
5196 if (reason != TASK_SWITCH_IRET) {
5197 int cpl;
5198
5199 cpl = kvm_x86_ops->get_cpl(vcpu);
5200 if ((tss_selector & 3) > nseg_desc.dpl || cpl > nseg_desc.dpl) {
5201 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
5202 return 1;
5203 }
5204 }
5205
5206 desc_limit = get_desc_limit(&nseg_desc);
5207 if (!nseg_desc.p ||
5208 ((desc_limit < 0x67 && (nseg_desc.type & 8)) ||
5209 desc_limit < 0x2b)) {
5210 kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);
5211 return 1;
5212 }
5213
5214 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
5215 cseg_desc.type &= ~(1 << 1); //clear the B flag
5216 save_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc);
5217 }
5218
5219 if (reason == TASK_SWITCH_IRET) {
5220 u32 eflags = kvm_get_rflags(vcpu);
5221 kvm_set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
5222 }
5223
5224 /* set back link to prev task only if NT bit is set in eflags
5225 note that old_tss_sel is not used afetr this point */
5226 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
5227 old_tss_sel = 0xffff;
5228
5229 if (nseg_desc.type & 8)
5230 ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel,
5231 old_tss_base, &nseg_desc);
5232 else
5233 ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_sel,
5234 old_tss_base, &nseg_desc);
5235
5236 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
5237 u32 eflags = kvm_get_rflags(vcpu);
5238 kvm_set_rflags(vcpu, eflags | X86_EFLAGS_NT);
5239 }
5240
5241 if (reason != TASK_SWITCH_IRET) {
5242 nseg_desc.type |= (1 << 1);
5243 save_guest_segment_descriptor(vcpu, tss_selector,
5244 &nseg_desc);
5245 }
5246
5247 kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0(vcpu) | X86_CR0_TS);
5248 seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg);
5249 tr_seg.type = 11;
5250 kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR);
5251out:
5252 return ret;
5253} 4928}
5254EXPORT_SYMBOL_GPL(kvm_task_switch); 4929EXPORT_SYMBOL_GPL(kvm_task_switch);
5255 4930
@@ -5258,15 +4933,15 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
5258{ 4933{
5259 int mmu_reset_needed = 0; 4934 int mmu_reset_needed = 0;
5260 int pending_vec, max_bits; 4935 int pending_vec, max_bits;
5261 struct descriptor_table dt; 4936 struct desc_ptr dt;
5262 4937
5263 vcpu_load(vcpu); 4938 vcpu_load(vcpu);
5264 4939
5265 dt.limit = sregs->idt.limit; 4940 dt.size = sregs->idt.limit;
5266 dt.base = sregs->idt.base; 4941 dt.address = sregs->idt.base;
5267 kvm_x86_ops->set_idt(vcpu, &dt); 4942 kvm_x86_ops->set_idt(vcpu, &dt);
5268 dt.limit = sregs->gdt.limit; 4943 dt.size = sregs->gdt.limit;
5269 dt.base = sregs->gdt.base; 4944 dt.address = sregs->gdt.base;
5270 kvm_x86_ops->set_gdt(vcpu, &dt); 4945 kvm_x86_ops->set_gdt(vcpu, &dt);
5271 4946
5272 vcpu->arch.cr2 = sregs->cr2; 4947 vcpu->arch.cr2 = sregs->cr2;
@@ -5365,11 +5040,9 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
5365 vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); 5040 vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
5366 } 5041 }
5367 5042
5368 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { 5043 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
5369 vcpu->arch.singlestep_cs = 5044 vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
5370 get_segment_selector(vcpu, VCPU_SREG_CS); 5045 get_segment_base(vcpu, VCPU_SREG_CS);
5371 vcpu->arch.singlestep_rip = kvm_rip_read(vcpu);
5372 }
5373 5046
5374 /* 5047 /*
5375 * Trigger an rflags update that will inject or remove the trace 5048 * Trigger an rflags update that will inject or remove the trace
@@ -5860,13 +5533,22 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
5860 return kvm_x86_ops->interrupt_allowed(vcpu); 5533 return kvm_x86_ops->interrupt_allowed(vcpu);
5861} 5534}
5862 5535
5536bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
5537{
5538 unsigned long current_rip = kvm_rip_read(vcpu) +
5539 get_segment_base(vcpu, VCPU_SREG_CS);
5540
5541 return current_rip == linear_rip;
5542}
5543EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
5544
5863unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) 5545unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
5864{ 5546{
5865 unsigned long rflags; 5547 unsigned long rflags;
5866 5548
5867 rflags = kvm_x86_ops->get_rflags(vcpu); 5549 rflags = kvm_x86_ops->get_rflags(vcpu);
5868 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) 5550 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
5869 rflags &= ~(unsigned long)(X86_EFLAGS_TF | X86_EFLAGS_RF); 5551 rflags &= ~X86_EFLAGS_TF;
5870 return rflags; 5552 return rflags;
5871} 5553}
5872EXPORT_SYMBOL_GPL(kvm_get_rflags); 5554EXPORT_SYMBOL_GPL(kvm_get_rflags);
@@ -5874,10 +5556,8 @@ EXPORT_SYMBOL_GPL(kvm_get_rflags);
5874void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 5556void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
5875{ 5557{
5876 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && 5558 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
5877 vcpu->arch.singlestep_cs == 5559 kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
5878 get_segment_selector(vcpu, VCPU_SREG_CS) && 5560 rflags |= X86_EFLAGS_TF;
5879 vcpu->arch.singlestep_rip == kvm_rip_read(vcpu))
5880 rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
5881 kvm_x86_ops->set_rflags(vcpu, rflags); 5561 kvm_x86_ops->set_rflags(vcpu, rflags);
5882} 5562}
5883EXPORT_SYMBOL_GPL(kvm_set_rflags); 5563EXPORT_SYMBOL_GPL(kvm_set_rflags);
@@ -5893,3 +5573,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
5893EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); 5573EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
5894EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); 5574EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
5895EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); 5575EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
5576EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index b7a404722d2b..f4b54458285b 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -65,6 +65,13 @@ static inline int is_paging(struct kvm_vcpu *vcpu)
65 return kvm_read_cr0_bits(vcpu, X86_CR0_PG); 65 return kvm_read_cr0_bits(vcpu, X86_CR0_PG);
66} 66}
67 67
68static inline struct kvm_mem_aliases *kvm_aliases(struct kvm *kvm)
69{
70 return rcu_dereference_check(kvm->arch.aliases,
71 srcu_read_lock_held(&kvm->srcu)
72 || lockdep_is_held(&kvm->slots_lock));
73}
74
68void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); 75void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
69void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); 76void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
70 77
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 60df9c84ecae..23ea02253900 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -160,6 +160,7 @@ struct kvm_pit_config {
160#define KVM_EXIT_DCR 15 160#define KVM_EXIT_DCR 15
161#define KVM_EXIT_NMI 16 161#define KVM_EXIT_NMI 16
162#define KVM_EXIT_INTERNAL_ERROR 17 162#define KVM_EXIT_INTERNAL_ERROR 17
163#define KVM_EXIT_OSI 18
163 164
164/* For KVM_EXIT_INTERNAL_ERROR */ 165/* For KVM_EXIT_INTERNAL_ERROR */
165#define KVM_INTERNAL_ERROR_EMULATION 1 166#define KVM_INTERNAL_ERROR_EMULATION 1
@@ -259,6 +260,10 @@ struct kvm_run {
259 __u32 ndata; 260 __u32 ndata;
260 __u64 data[16]; 261 __u64 data[16];
261 } internal; 262 } internal;
263 /* KVM_EXIT_OSI */
264 struct {
265 __u64 gprs[32];
266 } osi;
262 /* Fix the size of the union. */ 267 /* Fix the size of the union. */
263 char padding[256]; 268 char padding[256];
264 }; 269 };
@@ -400,6 +405,15 @@ struct kvm_ioeventfd {
400 __u8 pad[36]; 405 __u8 pad[36];
401}; 406};
402 407
408/* for KVM_ENABLE_CAP */
409struct kvm_enable_cap {
410 /* in */
411 __u32 cap;
412 __u32 flags;
413 __u64 args[4];
414 __u8 pad[64];
415};
416
403#define KVMIO 0xAE 417#define KVMIO 0xAE
404 418
405/* 419/*
@@ -501,7 +515,15 @@ struct kvm_ioeventfd {
501#define KVM_CAP_HYPERV_VAPIC 45 515#define KVM_CAP_HYPERV_VAPIC 45
502#define KVM_CAP_HYPERV_SPIN 46 516#define KVM_CAP_HYPERV_SPIN 46
503#define KVM_CAP_PCI_SEGMENT 47 517#define KVM_CAP_PCI_SEGMENT 47
518#define KVM_CAP_PPC_PAIRED_SINGLES 48
519#define KVM_CAP_INTR_SHADOW 49
520#ifdef __KVM_HAVE_DEBUGREGS
521#define KVM_CAP_DEBUGREGS 50
522#endif
504#define KVM_CAP_X86_ROBUST_SINGLESTEP 51 523#define KVM_CAP_X86_ROBUST_SINGLESTEP 51
524#define KVM_CAP_PPC_OSI 52
525#define KVM_CAP_PPC_UNSET_IRQ 53
526#define KVM_CAP_ENABLE_CAP 54
505 527
506#ifdef KVM_CAP_IRQ_ROUTING 528#ifdef KVM_CAP_IRQ_ROUTING
507 529
@@ -688,6 +710,10 @@ struct kvm_clock_data {
688/* Available with KVM_CAP_VCPU_EVENTS */ 710/* Available with KVM_CAP_VCPU_EVENTS */
689#define KVM_GET_VCPU_EVENTS _IOR(KVMIO, 0x9f, struct kvm_vcpu_events) 711#define KVM_GET_VCPU_EVENTS _IOR(KVMIO, 0x9f, struct kvm_vcpu_events)
690#define KVM_SET_VCPU_EVENTS _IOW(KVMIO, 0xa0, struct kvm_vcpu_events) 712#define KVM_SET_VCPU_EVENTS _IOW(KVMIO, 0xa0, struct kvm_vcpu_events)
713/* Available with KVM_CAP_DEBUGREGS */
714#define KVM_GET_DEBUGREGS _IOR(KVMIO, 0xa1, struct kvm_debugregs)
715#define KVM_SET_DEBUGREGS _IOW(KVMIO, 0xa2, struct kvm_debugregs)
716#define KVM_ENABLE_CAP _IOW(KVMIO, 0xa3, struct kvm_enable_cap)
691 717
692#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) 718#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
693 719
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 169d07758ee5..7cb116afa1cd 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -105,6 +105,12 @@ struct kvm_vcpu {
105 struct kvm_vcpu_arch arch; 105 struct kvm_vcpu_arch arch;
106}; 106};
107 107
108/*
109 * Some of the bitops functions do not support too long bitmaps.
110 * This number must be determined not to exceed such limits.
111 */
112#define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1)
113
108struct kvm_memory_slot { 114struct kvm_memory_slot {
109 gfn_t base_gfn; 115 gfn_t base_gfn;
110 unsigned long npages; 116 unsigned long npages;
@@ -237,17 +243,23 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
237void vcpu_load(struct kvm_vcpu *vcpu); 243void vcpu_load(struct kvm_vcpu *vcpu);
238void vcpu_put(struct kvm_vcpu *vcpu); 244void vcpu_put(struct kvm_vcpu *vcpu);
239 245
240int kvm_init(void *opaque, unsigned int vcpu_size, 246int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
241 struct module *module); 247 struct module *module);
242void kvm_exit(void); 248void kvm_exit(void);
243 249
244void kvm_get_kvm(struct kvm *kvm); 250void kvm_get_kvm(struct kvm *kvm);
245void kvm_put_kvm(struct kvm *kvm); 251void kvm_put_kvm(struct kvm *kvm);
246 252
253static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
254{
255 return rcu_dereference_check(kvm->memslots,
256 srcu_read_lock_held(&kvm->srcu)
257 || lockdep_is_held(&kvm->slots_lock));
258}
259
247#define HPA_MSB ((sizeof(hpa_t) * 8) - 1) 260#define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
248#define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB) 261#define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB)
249static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } 262static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; }
250struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva);
251 263
252extern struct page *bad_page; 264extern struct page *bad_page;
253extern pfn_t bad_pfn; 265extern pfn_t bad_pfn;
diff --git a/include/linux/tboot.h b/include/linux/tboot.h
index bf2a0c748878..1dba6ee55203 100644
--- a/include/linux/tboot.h
+++ b/include/linux/tboot.h
@@ -150,6 +150,7 @@ extern int tboot_force_iommu(void);
150 150
151#else 151#else
152 152
153#define tboot_enabled() 0
153#define tboot_probe() do { } while (0) 154#define tboot_probe() do { } while (0)
154#define tboot_shutdown(shutdown_type) do { } while (0) 155#define tboot_shutdown(shutdown_type) do { } while (0)
155#define tboot_sleep(sleep_state, pm1a_control, pm1b_control) \ 156#define tboot_sleep(sleep_state, pm1a_control, pm1b_control) \
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index b17d49dfc3ef..6dd3a51ab1cb 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -5,7 +5,6 @@
5 5
6#undef TRACE_SYSTEM 6#undef TRACE_SYSTEM
7#define TRACE_SYSTEM kvm 7#define TRACE_SYSTEM kvm
8#define TRACE_INCLUDE_FILE kvm
9 8
10#if defined(__KVM_HAVE_IOAPIC) 9#if defined(__KVM_HAVE_IOAPIC)
11TRACE_EVENT(kvm_set_irq, 10TRACE_EVENT(kvm_set_irq,
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index 02ff2b19dbe2..4d10b1e047f4 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -316,12 +316,16 @@ static int assigned_device_enable_host_msix(struct kvm *kvm,
316 kvm_assigned_dev_intr, 0, 316 kvm_assigned_dev_intr, 0,
317 "kvm_assigned_msix_device", 317 "kvm_assigned_msix_device",
318 (void *)dev); 318 (void *)dev);
319 /* FIXME: free requested_irq's on failure */
320 if (r) 319 if (r)
321 return r; 320 goto err;
322 } 321 }
323 322
324 return 0; 323 return 0;
324err:
325 for (i -= 1; i >= 0; i--)
326 free_irq(dev->host_msix_entries[i].vector, (void *)dev);
327 pci_disable_msix(dev->dev);
328 return r;
325} 329}
326 330
327#endif 331#endif
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index 36e258029649..53850177163f 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -120,8 +120,10 @@ int kvm_coalesced_mmio_init(struct kvm *kvm)
120 return ret; 120 return ret;
121 121
122out_free_dev: 122out_free_dev:
123 kvm->coalesced_mmio_dev = NULL;
123 kfree(dev); 124 kfree(dev);
124out_free_page: 125out_free_page:
126 kvm->coalesced_mmio_ring = NULL;
125 __free_page(page); 127 __free_page(page);
126out_err: 128out_err:
127 return ret; 129 return ret;
@@ -139,7 +141,7 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
139 struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; 141 struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev;
140 142
141 if (dev == NULL) 143 if (dev == NULL)
142 return -EINVAL; 144 return -ENXIO;
143 145
144 mutex_lock(&kvm->slots_lock); 146 mutex_lock(&kvm->slots_lock);
145 if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { 147 if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) {
@@ -162,7 +164,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
162 struct kvm_coalesced_mmio_zone *z; 164 struct kvm_coalesced_mmio_zone *z;
163 165
164 if (dev == NULL) 166 if (dev == NULL)
165 return -EINVAL; 167 return -ENXIO;
166 168
167 mutex_lock(&kvm->slots_lock); 169 mutex_lock(&kvm->slots_lock);
168 170
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 11692b9e8830..d2f06be63354 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -127,7 +127,7 @@ static int kvm_iommu_map_memslots(struct kvm *kvm)
127 int i, r = 0; 127 int i, r = 0;
128 struct kvm_memslots *slots; 128 struct kvm_memslots *slots;
129 129
130 slots = rcu_dereference(kvm->memslots); 130 slots = kvm_memslots(kvm);
131 131
132 for (i = 0; i < slots->nmemslots; i++) { 132 for (i = 0; i < slots->nmemslots; i++) {
133 r = kvm_iommu_map_pages(kvm, &slots->memslots[i]); 133 r = kvm_iommu_map_pages(kvm, &slots->memslots[i]);
@@ -286,7 +286,7 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm)
286 int i; 286 int i;
287 struct kvm_memslots *slots; 287 struct kvm_memslots *slots;
288 288
289 slots = rcu_dereference(kvm->memslots); 289 slots = kvm_memslots(kvm);
290 290
291 for (i = 0; i < slots->nmemslots; i++) { 291 for (i = 0; i < slots->nmemslots; i++) {
292 kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn, 292 kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c82ae2492634..f032806a212f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -422,9 +422,6 @@ static struct kvm *kvm_create_vm(void)
422 spin_lock(&kvm_lock); 422 spin_lock(&kvm_lock);
423 list_add(&kvm->vm_list, &vm_list); 423 list_add(&kvm->vm_list, &vm_list);
424 spin_unlock(&kvm_lock); 424 spin_unlock(&kvm_lock);
425#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
426 kvm_coalesced_mmio_init(kvm);
427#endif
428out: 425out:
429 return kvm; 426 return kvm;
430 427
@@ -560,6 +557,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
560 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; 557 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
561 npages = mem->memory_size >> PAGE_SHIFT; 558 npages = mem->memory_size >> PAGE_SHIFT;
562 559
560 r = -EINVAL;
561 if (npages > KVM_MEM_MAX_NR_PAGES)
562 goto out;
563
563 if (!npages) 564 if (!npages)
564 mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; 565 mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
565 566
@@ -833,7 +834,7 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva);
833struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) 834struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
834{ 835{
835 int i; 836 int i;
836 struct kvm_memslots *slots = rcu_dereference(kvm->memslots); 837 struct kvm_memslots *slots = kvm_memslots(kvm);
837 838
838 for (i = 0; i < slots->nmemslots; ++i) { 839 for (i = 0; i < slots->nmemslots; ++i) {
839 struct kvm_memory_slot *memslot = &slots->memslots[i]; 840 struct kvm_memory_slot *memslot = &slots->memslots[i];
@@ -855,7 +856,7 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
855int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) 856int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
856{ 857{
857 int i; 858 int i;
858 struct kvm_memslots *slots = rcu_dereference(kvm->memslots); 859 struct kvm_memslots *slots = kvm_memslots(kvm);
859 860
860 gfn = unalias_gfn_instantiation(kvm, gfn); 861 gfn = unalias_gfn_instantiation(kvm, gfn);
861 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { 862 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
@@ -899,7 +900,7 @@ out:
899int memslot_id(struct kvm *kvm, gfn_t gfn) 900int memslot_id(struct kvm *kvm, gfn_t gfn)
900{ 901{
901 int i; 902 int i;
902 struct kvm_memslots *slots = rcu_dereference(kvm->memslots); 903 struct kvm_memslots *slots = kvm_memslots(kvm);
903 struct kvm_memory_slot *memslot = NULL; 904 struct kvm_memory_slot *memslot = NULL;
904 905
905 gfn = unalias_gfn(kvm, gfn); 906 gfn = unalias_gfn(kvm, gfn);
@@ -914,6 +915,11 @@ int memslot_id(struct kvm *kvm, gfn_t gfn)
914 return memslot - slots->memslots; 915 return memslot - slots->memslots;
915} 916}
916 917
918static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
919{
920 return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE;
921}
922
917unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) 923unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
918{ 924{
919 struct kvm_memory_slot *slot; 925 struct kvm_memory_slot *slot;
@@ -922,7 +928,7 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
922 slot = gfn_to_memslot_unaliased(kvm, gfn); 928 slot = gfn_to_memslot_unaliased(kvm, gfn);
923 if (!slot || slot->flags & KVM_MEMSLOT_INVALID) 929 if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
924 return bad_hva(); 930 return bad_hva();
925 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); 931 return gfn_to_hva_memslot(slot, gfn);
926} 932}
927EXPORT_SYMBOL_GPL(gfn_to_hva); 933EXPORT_SYMBOL_GPL(gfn_to_hva);
928 934
@@ -972,11 +978,6 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
972} 978}
973EXPORT_SYMBOL_GPL(gfn_to_pfn); 979EXPORT_SYMBOL_GPL(gfn_to_pfn);
974 980
975static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
976{
977 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
978}
979
980pfn_t gfn_to_pfn_memslot(struct kvm *kvm, 981pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
981 struct kvm_memory_slot *slot, gfn_t gfn) 982 struct kvm_memory_slot *slot, gfn_t gfn)
982{ 983{
@@ -1190,13 +1191,8 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
1190 memslot = gfn_to_memslot_unaliased(kvm, gfn); 1191 memslot = gfn_to_memslot_unaliased(kvm, gfn);
1191 if (memslot && memslot->dirty_bitmap) { 1192 if (memslot && memslot->dirty_bitmap) {
1192 unsigned long rel_gfn = gfn - memslot->base_gfn; 1193 unsigned long rel_gfn = gfn - memslot->base_gfn;
1193 unsigned long *p = memslot->dirty_bitmap +
1194 rel_gfn / BITS_PER_LONG;
1195 int offset = rel_gfn % BITS_PER_LONG;
1196 1194
1197 /* avoid RMW */ 1195 generic___set_le_bit(rel_gfn, memslot->dirty_bitmap);
1198 if (!generic_test_le_bit(offset, p))
1199 generic___set_le_bit(offset, p);
1200 } 1196 }
1201} 1197}
1202 1198
@@ -1609,7 +1605,6 @@ static long kvm_vm_ioctl(struct file *filp,
1609 r = -EFAULT; 1605 r = -EFAULT;
1610 if (copy_from_user(&zone, argp, sizeof zone)) 1606 if (copy_from_user(&zone, argp, sizeof zone))
1611 goto out; 1607 goto out;
1612 r = -ENXIO;
1613 r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); 1608 r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone);
1614 if (r) 1609 if (r)
1615 goto out; 1610 goto out;
@@ -1621,7 +1616,6 @@ static long kvm_vm_ioctl(struct file *filp,
1621 r = -EFAULT; 1616 r = -EFAULT;
1622 if (copy_from_user(&zone, argp, sizeof zone)) 1617 if (copy_from_user(&zone, argp, sizeof zone))
1623 goto out; 1618 goto out;
1624 r = -ENXIO;
1625 r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); 1619 r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone);
1626 if (r) 1620 if (r)
1627 goto out; 1621 goto out;
@@ -1755,12 +1749,19 @@ static struct file_operations kvm_vm_fops = {
1755 1749
1756static int kvm_dev_ioctl_create_vm(void) 1750static int kvm_dev_ioctl_create_vm(void)
1757{ 1751{
1758 int fd; 1752 int fd, r;
1759 struct kvm *kvm; 1753 struct kvm *kvm;
1760 1754
1761 kvm = kvm_create_vm(); 1755 kvm = kvm_create_vm();
1762 if (IS_ERR(kvm)) 1756 if (IS_ERR(kvm))
1763 return PTR_ERR(kvm); 1757 return PTR_ERR(kvm);
1758#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
1759 r = kvm_coalesced_mmio_init(kvm);
1760 if (r < 0) {
1761 kvm_put_kvm(kvm);
1762 return r;
1763 }
1764#endif
1764 fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); 1765 fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
1765 if (fd < 0) 1766 if (fd < 0)
1766 kvm_put_kvm(kvm); 1767 kvm_put_kvm(kvm);
@@ -1928,11 +1929,6 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
1928 cpu); 1929 cpu);
1929 hardware_disable(NULL); 1930 hardware_disable(NULL);
1930 break; 1931 break;
1931 case CPU_UP_CANCELED:
1932 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
1933 cpu);
1934 smp_call_function_single(cpu, hardware_disable, NULL, 1);
1935 break;
1936 case CPU_ONLINE: 1932 case CPU_ONLINE:
1937 printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", 1933 printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
1938 cpu); 1934 cpu);
@@ -1991,7 +1987,9 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
1991 int len, const void *val) 1987 int len, const void *val)
1992{ 1988{
1993 int i; 1989 int i;
1994 struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]); 1990 struct kvm_io_bus *bus;
1991
1992 bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
1995 for (i = 0; i < bus->dev_count; i++) 1993 for (i = 0; i < bus->dev_count; i++)
1996 if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) 1994 if (!kvm_iodevice_write(bus->devs[i], addr, len, val))
1997 return 0; 1995 return 0;
@@ -2003,8 +2001,9 @@ int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
2003 int len, void *val) 2001 int len, void *val)
2004{ 2002{
2005 int i; 2003 int i;
2006 struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]); 2004 struct kvm_io_bus *bus;
2007 2005
2006 bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
2008 for (i = 0; i < bus->dev_count; i++) 2007 for (i = 0; i < bus->dev_count; i++)
2009 if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) 2008 if (!kvm_iodevice_read(bus->devs[i], addr, len, val))
2010 return 0; 2009 return 0;
@@ -2179,7 +2178,7 @@ static void kvm_sched_out(struct preempt_notifier *pn,
2179 kvm_arch_vcpu_put(vcpu); 2178 kvm_arch_vcpu_put(vcpu);
2180} 2179}
2181 2180
2182int kvm_init(void *opaque, unsigned int vcpu_size, 2181int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
2183 struct module *module) 2182 struct module *module)
2184{ 2183{
2185 int r; 2184 int r;
@@ -2229,8 +2228,9 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
2229 goto out_free_4; 2228 goto out_free_4;
2230 2229
2231 /* A kmem cache lets us meet the alignment requirements of fx_save. */ 2230 /* A kmem cache lets us meet the alignment requirements of fx_save. */
2232 kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, 2231 if (!vcpu_align)
2233 __alignof__(struct kvm_vcpu), 2232 vcpu_align = __alignof__(struct kvm_vcpu);
2233 kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align,
2234 0, NULL); 2234 0, NULL);
2235 if (!kvm_vcpu_cache) { 2235 if (!kvm_vcpu_cache) {
2236 r = -ENOMEM; 2236 r = -ENOMEM;
@@ -2279,7 +2279,6 @@ EXPORT_SYMBOL_GPL(kvm_init);
2279 2279
2280void kvm_exit(void) 2280void kvm_exit(void)
2281{ 2281{
2282 tracepoint_synchronize_unregister();
2283 kvm_exit_debug(); 2282 kvm_exit_debug();
2284 misc_deregister(&kvm_dev); 2283 misc_deregister(&kvm_dev);
2285 kmem_cache_destroy(kvm_vcpu_cache); 2284 kmem_cache_destroy(kvm_vcpu_cache);