diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-08-04 13:43:01 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-08-04 13:43:01 -0400 |
commit | 5e83f6fbdb020b70c0e413312801424d13c58d68 (patch) | |
tree | ca270178fa891813dbc47751c331fed975d3766c /arch | |
parent | fe445c6e2cb62a566e1a89f8798de11459975710 (diff) | |
parent | 3444d7da1839b851eefedd372978d8a982316c36 (diff) |
Merge branch 'kvm-updates/2.6.36' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/2.6.36' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (198 commits)
KVM: VMX: Fix host GDT.LIMIT corruption
KVM: MMU: using __xchg_spte more smarter
KVM: MMU: cleanup spte set and accssed/dirty tracking
KVM: MMU: don't atomicly set spte if it's not present
KVM: MMU: fix page dirty tracking lost while sync page
KVM: MMU: fix broken page accessed tracking with ept enabled
KVM: MMU: add missing reserved bits check in speculative path
KVM: MMU: fix mmu notifier invalidate handler for huge spte
KVM: x86 emulator: fix xchg instruction emulation
KVM: x86: Call mask notifiers from pic
KVM: x86: never re-execute instruction with enabled tdp
KVM: Document KVM_GET_SUPPORTED_CPUID2 ioctl
KVM: x86: emulator: inc/dec can have lock prefix
KVM: MMU: Eliminate redundant temporaries in FNAME(fetch)
KVM: MMU: Validate all gptes during fetch, not just those used for new pages
KVM: MMU: Simplify spte fetch() function
KVM: MMU: Add gpte_valid() helper
KVM: MMU: Add validate_direct_spte() helper
KVM: MMU: Add drop_large_spte() helper
KVM: MMU: Use __set_spte to link shadow pages
...
Diffstat (limited to 'arch')
46 files changed, 2729 insertions, 1992 deletions
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index a362e67e0ca6..2f229e5de498 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h | |||
@@ -235,6 +235,7 @@ struct kvm_vm_data { | |||
235 | #define KVM_REQ_PTC_G 32 | 235 | #define KVM_REQ_PTC_G 32 |
236 | #define KVM_REQ_RESUME 33 | 236 | #define KVM_REQ_RESUME 33 |
237 | 237 | ||
238 | #define KVM_HPAGE_GFN_SHIFT(x) 0 | ||
238 | #define KVM_NR_PAGE_SIZES 1 | 239 | #define KVM_NR_PAGE_SIZES 1 |
239 | #define KVM_PAGES_PER_HPAGE(x) 1 | 240 | #define KVM_PAGES_PER_HPAGE(x) 1 |
240 | 241 | ||
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 21b701374f72..5cb58655cd5f 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c | |||
@@ -725,8 +725,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
725 | int r; | 725 | int r; |
726 | sigset_t sigsaved; | 726 | sigset_t sigsaved; |
727 | 727 | ||
728 | vcpu_load(vcpu); | ||
729 | |||
730 | if (vcpu->sigset_active) | 728 | if (vcpu->sigset_active) |
731 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); | 729 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); |
732 | 730 | ||
@@ -748,7 +746,6 @@ out: | |||
748 | if (vcpu->sigset_active) | 746 | if (vcpu->sigset_active) |
749 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 747 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
750 | 748 | ||
751 | vcpu_put(vcpu); | ||
752 | return r; | 749 | return r; |
753 | } | 750 | } |
754 | 751 | ||
@@ -883,8 +880,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
883 | struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); | 880 | struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); |
884 | int i; | 881 | int i; |
885 | 882 | ||
886 | vcpu_load(vcpu); | ||
887 | |||
888 | for (i = 0; i < 16; i++) { | 883 | for (i = 0; i < 16; i++) { |
889 | vpd->vgr[i] = regs->vpd.vgr[i]; | 884 | vpd->vgr[i] = regs->vpd.vgr[i]; |
890 | vpd->vbgr[i] = regs->vpd.vbgr[i]; | 885 | vpd->vbgr[i] = regs->vpd.vbgr[i]; |
@@ -931,8 +926,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
931 | vcpu->arch.itc_offset = regs->saved_itc - kvm_get_itc(vcpu); | 926 | vcpu->arch.itc_offset = regs->saved_itc - kvm_get_itc(vcpu); |
932 | set_bit(KVM_REQ_RESUME, &vcpu->requests); | 927 | set_bit(KVM_REQ_RESUME, &vcpu->requests); |
933 | 928 | ||
934 | vcpu_put(vcpu); | ||
935 | |||
936 | return 0; | 929 | return 0; |
937 | } | 930 | } |
938 | 931 | ||
@@ -1802,35 +1795,24 @@ void kvm_arch_exit(void) | |||
1802 | kvm_vmm_info = NULL; | 1795 | kvm_vmm_info = NULL; |
1803 | } | 1796 | } |
1804 | 1797 | ||
1805 | static int kvm_ia64_sync_dirty_log(struct kvm *kvm, | 1798 | static void kvm_ia64_sync_dirty_log(struct kvm *kvm, |
1806 | struct kvm_dirty_log *log) | 1799 | struct kvm_memory_slot *memslot) |
1807 | { | 1800 | { |
1808 | struct kvm_memory_slot *memslot; | 1801 | int i; |
1809 | int r, i; | ||
1810 | long base; | 1802 | long base; |
1811 | unsigned long n; | 1803 | unsigned long n; |
1812 | unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base + | 1804 | unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base + |
1813 | offsetof(struct kvm_vm_data, kvm_mem_dirty_log)); | 1805 | offsetof(struct kvm_vm_data, kvm_mem_dirty_log)); |
1814 | 1806 | ||
1815 | r = -EINVAL; | ||
1816 | if (log->slot >= KVM_MEMORY_SLOTS) | ||
1817 | goto out; | ||
1818 | |||
1819 | memslot = &kvm->memslots->memslots[log->slot]; | ||
1820 | r = -ENOENT; | ||
1821 | if (!memslot->dirty_bitmap) | ||
1822 | goto out; | ||
1823 | |||
1824 | n = kvm_dirty_bitmap_bytes(memslot); | 1807 | n = kvm_dirty_bitmap_bytes(memslot); |
1825 | base = memslot->base_gfn / BITS_PER_LONG; | 1808 | base = memslot->base_gfn / BITS_PER_LONG; |
1826 | 1809 | ||
1810 | spin_lock(&kvm->arch.dirty_log_lock); | ||
1827 | for (i = 0; i < n/sizeof(long); ++i) { | 1811 | for (i = 0; i < n/sizeof(long); ++i) { |
1828 | memslot->dirty_bitmap[i] = dirty_bitmap[base + i]; | 1812 | memslot->dirty_bitmap[i] = dirty_bitmap[base + i]; |
1829 | dirty_bitmap[base + i] = 0; | 1813 | dirty_bitmap[base + i] = 0; |
1830 | } | 1814 | } |
1831 | r = 0; | 1815 | spin_unlock(&kvm->arch.dirty_log_lock); |
1832 | out: | ||
1833 | return r; | ||
1834 | } | 1816 | } |
1835 | 1817 | ||
1836 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | 1818 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, |
@@ -1842,12 +1824,17 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
1842 | int is_dirty = 0; | 1824 | int is_dirty = 0; |
1843 | 1825 | ||
1844 | mutex_lock(&kvm->slots_lock); | 1826 | mutex_lock(&kvm->slots_lock); |
1845 | spin_lock(&kvm->arch.dirty_log_lock); | ||
1846 | 1827 | ||
1847 | r = kvm_ia64_sync_dirty_log(kvm, log); | 1828 | r = -EINVAL; |
1848 | if (r) | 1829 | if (log->slot >= KVM_MEMORY_SLOTS) |
1830 | goto out; | ||
1831 | |||
1832 | memslot = &kvm->memslots->memslots[log->slot]; | ||
1833 | r = -ENOENT; | ||
1834 | if (!memslot->dirty_bitmap) | ||
1849 | goto out; | 1835 | goto out; |
1850 | 1836 | ||
1837 | kvm_ia64_sync_dirty_log(kvm, memslot); | ||
1851 | r = kvm_get_dirty_log(kvm, log, &is_dirty); | 1838 | r = kvm_get_dirty_log(kvm, log, &is_dirty); |
1852 | if (r) | 1839 | if (r) |
1853 | goto out; | 1840 | goto out; |
@@ -1855,14 +1842,12 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
1855 | /* If nothing is dirty, don't bother messing with page tables. */ | 1842 | /* If nothing is dirty, don't bother messing with page tables. */ |
1856 | if (is_dirty) { | 1843 | if (is_dirty) { |
1857 | kvm_flush_remote_tlbs(kvm); | 1844 | kvm_flush_remote_tlbs(kvm); |
1858 | memslot = &kvm->memslots->memslots[log->slot]; | ||
1859 | n = kvm_dirty_bitmap_bytes(memslot); | 1845 | n = kvm_dirty_bitmap_bytes(memslot); |
1860 | memset(memslot->dirty_bitmap, 0, n); | 1846 | memset(memslot->dirty_bitmap, 0, n); |
1861 | } | 1847 | } |
1862 | r = 0; | 1848 | r = 0; |
1863 | out: | 1849 | out: |
1864 | mutex_unlock(&kvm->slots_lock); | 1850 | mutex_unlock(&kvm->slots_lock); |
1865 | spin_unlock(&kvm->arch.dirty_log_lock); | ||
1866 | return r; | 1851 | return r; |
1867 | } | 1852 | } |
1868 | 1853 | ||
@@ -1953,11 +1938,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) | |||
1953 | return vcpu->arch.timer_fired; | 1938 | return vcpu->arch.timer_fired; |
1954 | } | 1939 | } |
1955 | 1940 | ||
1956 | gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) | ||
1957 | { | ||
1958 | return gfn; | ||
1959 | } | ||
1960 | |||
1961 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | 1941 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) |
1962 | { | 1942 | { |
1963 | return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) || | 1943 | return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) || |
@@ -1967,9 +1947,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | |||
1967 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, | 1947 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, |
1968 | struct kvm_mp_state *mp_state) | 1948 | struct kvm_mp_state *mp_state) |
1969 | { | 1949 | { |
1970 | vcpu_load(vcpu); | ||
1971 | mp_state->mp_state = vcpu->arch.mp_state; | 1950 | mp_state->mp_state = vcpu->arch.mp_state; |
1972 | vcpu_put(vcpu); | ||
1973 | return 0; | 1951 | return 0; |
1974 | } | 1952 | } |
1975 | 1953 | ||
@@ -2000,10 +1978,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | |||
2000 | { | 1978 | { |
2001 | int r = 0; | 1979 | int r = 0; |
2002 | 1980 | ||
2003 | vcpu_load(vcpu); | ||
2004 | vcpu->arch.mp_state = mp_state->mp_state; | 1981 | vcpu->arch.mp_state = mp_state->mp_state; |
2005 | if (vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED) | 1982 | if (vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED) |
2006 | r = vcpu_reset(vcpu); | 1983 | r = vcpu_reset(vcpu); |
2007 | vcpu_put(vcpu); | ||
2008 | return r; | 1984 | return r; |
2009 | } | 1985 | } |
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 6f74d93725a0..8274a2d43925 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h | |||
@@ -115,7 +115,15 @@ extern void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu); | |||
115 | extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte); | 115 | extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte); |
116 | extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr); | 116 | extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr); |
117 | extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); | 117 | extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); |
118 | extern struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool data); | 118 | |
119 | extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte); | ||
120 | extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu); | ||
121 | extern void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu); | ||
122 | extern int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu); | ||
123 | extern void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte); | ||
124 | extern int kvmppc_mmu_hpte_sysinit(void); | ||
125 | extern void kvmppc_mmu_hpte_sysexit(void); | ||
126 | |||
119 | extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); | 127 | extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); |
120 | extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); | 128 | extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); |
121 | extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); | 129 | extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); |
diff --git a/arch/powerpc/include/asm/kvm_fpu.h b/arch/powerpc/include/asm/kvm_fpu.h index 94f05de9ad04..c3d4f0518a67 100644 --- a/arch/powerpc/include/asm/kvm_fpu.h +++ b/arch/powerpc/include/asm/kvm_fpu.h | |||
@@ -22,24 +22,24 @@ | |||
22 | 22 | ||
23 | #include <linux/types.h> | 23 | #include <linux/types.h> |
24 | 24 | ||
25 | extern void fps_fres(struct thread_struct *t, u32 *dst, u32 *src1); | 25 | extern void fps_fres(u64 *fpscr, u32 *dst, u32 *src1); |
26 | extern void fps_frsqrte(struct thread_struct *t, u32 *dst, u32 *src1); | 26 | extern void fps_frsqrte(u64 *fpscr, u32 *dst, u32 *src1); |
27 | extern void fps_fsqrts(struct thread_struct *t, u32 *dst, u32 *src1); | 27 | extern void fps_fsqrts(u64 *fpscr, u32 *dst, u32 *src1); |
28 | 28 | ||
29 | extern void fps_fadds(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); | 29 | extern void fps_fadds(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2); |
30 | extern void fps_fdivs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); | 30 | extern void fps_fdivs(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2); |
31 | extern void fps_fmuls(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); | 31 | extern void fps_fmuls(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2); |
32 | extern void fps_fsubs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); | 32 | extern void fps_fsubs(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2); |
33 | 33 | ||
34 | extern void fps_fmadds(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, | 34 | extern void fps_fmadds(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2, |
35 | u32 *src3); | 35 | u32 *src3); |
36 | extern void fps_fmsubs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, | 36 | extern void fps_fmsubs(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2, |
37 | u32 *src3); | 37 | u32 *src3); |
38 | extern void fps_fnmadds(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, | 38 | extern void fps_fnmadds(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2, |
39 | u32 *src3); | 39 | u32 *src3); |
40 | extern void fps_fnmsubs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, | 40 | extern void fps_fnmsubs(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2, |
41 | u32 *src3); | 41 | u32 *src3); |
42 | extern void fps_fsel(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, | 42 | extern void fps_fsel(u64 *fpscr, u32 *dst, u32 *src1, u32 *src2, |
43 | u32 *src3); | 43 | u32 *src3); |
44 | 44 | ||
45 | #define FPD_ONE_IN(name) extern void fpd_ ## name(u64 *fpscr, u32 *cr, \ | 45 | #define FPD_ONE_IN(name) extern void fpd_ ## name(u64 *fpscr, u32 *cr, \ |
@@ -82,4 +82,7 @@ FPD_THREE_IN(fmadd) | |||
82 | FPD_THREE_IN(fnmsub) | 82 | FPD_THREE_IN(fnmsub) |
83 | FPD_THREE_IN(fnmadd) | 83 | FPD_THREE_IN(fnmadd) |
84 | 84 | ||
85 | extern void kvm_cvt_fd(u32 *from, u64 *to, u64 *fpscr); | ||
86 | extern void kvm_cvt_df(u64 *from, u32 *to, u64 *fpscr); | ||
87 | |||
85 | #endif | 88 | #endif |
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 0c9ad869decd..b0b23c007d6e 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
@@ -35,10 +35,17 @@ | |||
35 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 | 35 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 |
36 | 36 | ||
37 | /* We don't currently support large pages. */ | 37 | /* We don't currently support large pages. */ |
38 | #define KVM_HPAGE_GFN_SHIFT(x) 0 | ||
38 | #define KVM_NR_PAGE_SIZES 1 | 39 | #define KVM_NR_PAGE_SIZES 1 |
39 | #define KVM_PAGES_PER_HPAGE(x) (1UL<<31) | 40 | #define KVM_PAGES_PER_HPAGE(x) (1UL<<31) |
40 | 41 | ||
41 | #define HPTEG_CACHE_NUM 1024 | 42 | #define HPTEG_CACHE_NUM (1 << 15) |
43 | #define HPTEG_HASH_BITS_PTE 13 | ||
44 | #define HPTEG_HASH_BITS_VPTE 13 | ||
45 | #define HPTEG_HASH_BITS_VPTE_LONG 5 | ||
46 | #define HPTEG_HASH_NUM_PTE (1 << HPTEG_HASH_BITS_PTE) | ||
47 | #define HPTEG_HASH_NUM_VPTE (1 << HPTEG_HASH_BITS_VPTE) | ||
48 | #define HPTEG_HASH_NUM_VPTE_LONG (1 << HPTEG_HASH_BITS_VPTE_LONG) | ||
42 | 49 | ||
43 | struct kvm; | 50 | struct kvm; |
44 | struct kvm_run; | 51 | struct kvm_run; |
@@ -151,6 +158,9 @@ struct kvmppc_mmu { | |||
151 | }; | 158 | }; |
152 | 159 | ||
153 | struct hpte_cache { | 160 | struct hpte_cache { |
161 | struct hlist_node list_pte; | ||
162 | struct hlist_node list_vpte; | ||
163 | struct hlist_node list_vpte_long; | ||
154 | u64 host_va; | 164 | u64 host_va; |
155 | u64 pfn; | 165 | u64 pfn; |
156 | ulong slot; | 166 | ulong slot; |
@@ -282,8 +292,10 @@ struct kvm_vcpu_arch { | |||
282 | unsigned long pending_exceptions; | 292 | unsigned long pending_exceptions; |
283 | 293 | ||
284 | #ifdef CONFIG_PPC_BOOK3S | 294 | #ifdef CONFIG_PPC_BOOK3S |
285 | struct hpte_cache hpte_cache[HPTEG_CACHE_NUM]; | 295 | struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE]; |
286 | int hpte_cache_offset; | 296 | struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE]; |
297 | struct hlist_head hpte_hash_vpte_long[HPTEG_HASH_NUM_VPTE_LONG]; | ||
298 | int hpte_cache_count; | ||
287 | #endif | 299 | #endif |
288 | }; | 300 | }; |
289 | 301 | ||
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 3b4dcc82a4c1..ab3e392ac63c 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c | |||
@@ -101,10 +101,6 @@ EXPORT_SYMBOL(pci_dram_offset); | |||
101 | EXPORT_SYMBOL(start_thread); | 101 | EXPORT_SYMBOL(start_thread); |
102 | EXPORT_SYMBOL(kernel_thread); | 102 | EXPORT_SYMBOL(kernel_thread); |
103 | 103 | ||
104 | #ifdef CONFIG_PPC_FPU | ||
105 | EXPORT_SYMBOL_GPL(cvt_df); | ||
106 | EXPORT_SYMBOL_GPL(cvt_fd); | ||
107 | #endif | ||
108 | EXPORT_SYMBOL(giveup_fpu); | 104 | EXPORT_SYMBOL(giveup_fpu); |
109 | #ifdef CONFIG_ALTIVEC | 105 | #ifdef CONFIG_ALTIVEC |
110 | EXPORT_SYMBOL(giveup_altivec); | 106 | EXPORT_SYMBOL(giveup_altivec); |
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c index 812312542e50..9b9b5cdea840 100644 --- a/arch/powerpc/kvm/44x_tlb.c +++ b/arch/powerpc/kvm/44x_tlb.c | |||
@@ -316,7 +316,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, | |||
316 | gfn = gpaddr >> PAGE_SHIFT; | 316 | gfn = gpaddr >> PAGE_SHIFT; |
317 | new_page = gfn_to_page(vcpu->kvm, gfn); | 317 | new_page = gfn_to_page(vcpu->kvm, gfn); |
318 | if (is_error_page(new_page)) { | 318 | if (is_error_page(new_page)) { |
319 | printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn); | 319 | printk(KERN_ERR "Couldn't get guest page for gfn %llx!\n", |
320 | (unsigned long long)gfn); | ||
320 | kvm_release_page_clean(new_page); | 321 | kvm_release_page_clean(new_page); |
321 | return; | 322 | return; |
322 | } | 323 | } |
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index ff436066bf77..d45c818a384c 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile | |||
@@ -45,6 +45,7 @@ kvm-book3s_64-objs := \ | |||
45 | book3s.o \ | 45 | book3s.o \ |
46 | book3s_emulate.o \ | 46 | book3s_emulate.o \ |
47 | book3s_interrupts.o \ | 47 | book3s_interrupts.o \ |
48 | book3s_mmu_hpte.o \ | ||
48 | book3s_64_mmu_host.o \ | 49 | book3s_64_mmu_host.o \ |
49 | book3s_64_mmu.o \ | 50 | book3s_64_mmu.o \ |
50 | book3s_32_mmu.o | 51 | book3s_32_mmu.o |
@@ -57,6 +58,7 @@ kvm-book3s_32-objs := \ | |||
57 | book3s.o \ | 58 | book3s.o \ |
58 | book3s_emulate.o \ | 59 | book3s_emulate.o \ |
59 | book3s_interrupts.o \ | 60 | book3s_interrupts.o \ |
61 | book3s_mmu_hpte.o \ | ||
60 | book3s_32_mmu_host.o \ | 62 | book3s_32_mmu_host.o \ |
61 | book3s_32_mmu.o | 63 | book3s_32_mmu.o |
62 | kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs) | 64 | kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs) |
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index b998abf1a63d..a3cef30d1d42 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c | |||
@@ -1047,8 +1047,6 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
1047 | { | 1047 | { |
1048 | int i; | 1048 | int i; |
1049 | 1049 | ||
1050 | vcpu_load(vcpu); | ||
1051 | |||
1052 | regs->pc = kvmppc_get_pc(vcpu); | 1050 | regs->pc = kvmppc_get_pc(vcpu); |
1053 | regs->cr = kvmppc_get_cr(vcpu); | 1051 | regs->cr = kvmppc_get_cr(vcpu); |
1054 | regs->ctr = kvmppc_get_ctr(vcpu); | 1052 | regs->ctr = kvmppc_get_ctr(vcpu); |
@@ -1069,8 +1067,6 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
1069 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) | 1067 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) |
1070 | regs->gpr[i] = kvmppc_get_gpr(vcpu, i); | 1068 | regs->gpr[i] = kvmppc_get_gpr(vcpu, i); |
1071 | 1069 | ||
1072 | vcpu_put(vcpu); | ||
1073 | |||
1074 | return 0; | 1070 | return 0; |
1075 | } | 1071 | } |
1076 | 1072 | ||
@@ -1078,8 +1074,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
1078 | { | 1074 | { |
1079 | int i; | 1075 | int i; |
1080 | 1076 | ||
1081 | vcpu_load(vcpu); | ||
1082 | |||
1083 | kvmppc_set_pc(vcpu, regs->pc); | 1077 | kvmppc_set_pc(vcpu, regs->pc); |
1084 | kvmppc_set_cr(vcpu, regs->cr); | 1078 | kvmppc_set_cr(vcpu, regs->cr); |
1085 | kvmppc_set_ctr(vcpu, regs->ctr); | 1079 | kvmppc_set_ctr(vcpu, regs->ctr); |
@@ -1099,8 +1093,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
1099 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) | 1093 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) |
1100 | kvmppc_set_gpr(vcpu, i, regs->gpr[i]); | 1094 | kvmppc_set_gpr(vcpu, i, regs->gpr[i]); |
1101 | 1095 | ||
1102 | vcpu_put(vcpu); | ||
1103 | |||
1104 | return 0; | 1096 | return 0; |
1105 | } | 1097 | } |
1106 | 1098 | ||
@@ -1110,8 +1102,6 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
1110 | struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); | 1102 | struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); |
1111 | int i; | 1103 | int i; |
1112 | 1104 | ||
1113 | vcpu_load(vcpu); | ||
1114 | |||
1115 | sregs->pvr = vcpu->arch.pvr; | 1105 | sregs->pvr = vcpu->arch.pvr; |
1116 | 1106 | ||
1117 | sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1; | 1107 | sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1; |
@@ -1131,8 +1121,6 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
1131 | } | 1121 | } |
1132 | } | 1122 | } |
1133 | 1123 | ||
1134 | vcpu_put(vcpu); | ||
1135 | |||
1136 | return 0; | 1124 | return 0; |
1137 | } | 1125 | } |
1138 | 1126 | ||
@@ -1142,8 +1130,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
1142 | struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); | 1130 | struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); |
1143 | int i; | 1131 | int i; |
1144 | 1132 | ||
1145 | vcpu_load(vcpu); | ||
1146 | |||
1147 | kvmppc_set_pvr(vcpu, sregs->pvr); | 1133 | kvmppc_set_pvr(vcpu, sregs->pvr); |
1148 | 1134 | ||
1149 | vcpu3s->sdr1 = sregs->u.s.sdr1; | 1135 | vcpu3s->sdr1 = sregs->u.s.sdr1; |
@@ -1171,8 +1157,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
1171 | /* Flush the MMU after messing with the segments */ | 1157 | /* Flush the MMU after messing with the segments */ |
1172 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | 1158 | kvmppc_mmu_pte_flush(vcpu, 0, 0); |
1173 | 1159 | ||
1174 | vcpu_put(vcpu); | ||
1175 | |||
1176 | return 0; | 1160 | return 0; |
1177 | } | 1161 | } |
1178 | 1162 | ||
@@ -1309,12 +1293,17 @@ extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); | |||
1309 | int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | 1293 | int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) |
1310 | { | 1294 | { |
1311 | int ret; | 1295 | int ret; |
1312 | struct thread_struct ext_bkp; | 1296 | double fpr[32][TS_FPRWIDTH]; |
1297 | unsigned int fpscr; | ||
1298 | int fpexc_mode; | ||
1313 | #ifdef CONFIG_ALTIVEC | 1299 | #ifdef CONFIG_ALTIVEC |
1314 | bool save_vec = current->thread.used_vr; | 1300 | vector128 vr[32]; |
1301 | vector128 vscr; | ||
1302 | unsigned long uninitialized_var(vrsave); | ||
1303 | int used_vr; | ||
1315 | #endif | 1304 | #endif |
1316 | #ifdef CONFIG_VSX | 1305 | #ifdef CONFIG_VSX |
1317 | bool save_vsx = current->thread.used_vsr; | 1306 | int used_vsr; |
1318 | #endif | 1307 | #endif |
1319 | ulong ext_msr; | 1308 | ulong ext_msr; |
1320 | 1309 | ||
@@ -1327,27 +1316,27 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
1327 | /* Save FPU state in stack */ | 1316 | /* Save FPU state in stack */ |
1328 | if (current->thread.regs->msr & MSR_FP) | 1317 | if (current->thread.regs->msr & MSR_FP) |
1329 | giveup_fpu(current); | 1318 | giveup_fpu(current); |
1330 | memcpy(ext_bkp.fpr, current->thread.fpr, sizeof(current->thread.fpr)); | 1319 | memcpy(fpr, current->thread.fpr, sizeof(current->thread.fpr)); |
1331 | ext_bkp.fpscr = current->thread.fpscr; | 1320 | fpscr = current->thread.fpscr.val; |
1332 | ext_bkp.fpexc_mode = current->thread.fpexc_mode; | 1321 | fpexc_mode = current->thread.fpexc_mode; |
1333 | 1322 | ||
1334 | #ifdef CONFIG_ALTIVEC | 1323 | #ifdef CONFIG_ALTIVEC |
1335 | /* Save Altivec state in stack */ | 1324 | /* Save Altivec state in stack */ |
1336 | if (save_vec) { | 1325 | used_vr = current->thread.used_vr; |
1326 | if (used_vr) { | ||
1337 | if (current->thread.regs->msr & MSR_VEC) | 1327 | if (current->thread.regs->msr & MSR_VEC) |
1338 | giveup_altivec(current); | 1328 | giveup_altivec(current); |
1339 | memcpy(ext_bkp.vr, current->thread.vr, sizeof(ext_bkp.vr)); | 1329 | memcpy(vr, current->thread.vr, sizeof(current->thread.vr)); |
1340 | ext_bkp.vscr = current->thread.vscr; | 1330 | vscr = current->thread.vscr; |
1341 | ext_bkp.vrsave = current->thread.vrsave; | 1331 | vrsave = current->thread.vrsave; |
1342 | } | 1332 | } |
1343 | ext_bkp.used_vr = current->thread.used_vr; | ||
1344 | #endif | 1333 | #endif |
1345 | 1334 | ||
1346 | #ifdef CONFIG_VSX | 1335 | #ifdef CONFIG_VSX |
1347 | /* Save VSX state in stack */ | 1336 | /* Save VSX state in stack */ |
1348 | if (save_vsx && (current->thread.regs->msr & MSR_VSX)) | 1337 | used_vsr = current->thread.used_vsr; |
1338 | if (used_vsr && (current->thread.regs->msr & MSR_VSX)) | ||
1349 | __giveup_vsx(current); | 1339 | __giveup_vsx(current); |
1350 | ext_bkp.used_vsr = current->thread.used_vsr; | ||
1351 | #endif | 1340 | #endif |
1352 | 1341 | ||
1353 | /* Remember the MSR with disabled extensions */ | 1342 | /* Remember the MSR with disabled extensions */ |
@@ -1372,22 +1361,22 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
1372 | kvmppc_giveup_ext(vcpu, MSR_VSX); | 1361 | kvmppc_giveup_ext(vcpu, MSR_VSX); |
1373 | 1362 | ||
1374 | /* Restore FPU state from stack */ | 1363 | /* Restore FPU state from stack */ |
1375 | memcpy(current->thread.fpr, ext_bkp.fpr, sizeof(ext_bkp.fpr)); | 1364 | memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr)); |
1376 | current->thread.fpscr = ext_bkp.fpscr; | 1365 | current->thread.fpscr.val = fpscr; |
1377 | current->thread.fpexc_mode = ext_bkp.fpexc_mode; | 1366 | current->thread.fpexc_mode = fpexc_mode; |
1378 | 1367 | ||
1379 | #ifdef CONFIG_ALTIVEC | 1368 | #ifdef CONFIG_ALTIVEC |
1380 | /* Restore Altivec state from stack */ | 1369 | /* Restore Altivec state from stack */ |
1381 | if (save_vec && current->thread.used_vr) { | 1370 | if (used_vr && current->thread.used_vr) { |
1382 | memcpy(current->thread.vr, ext_bkp.vr, sizeof(ext_bkp.vr)); | 1371 | memcpy(current->thread.vr, vr, sizeof(current->thread.vr)); |
1383 | current->thread.vscr = ext_bkp.vscr; | 1372 | current->thread.vscr = vscr; |
1384 | current->thread.vrsave= ext_bkp.vrsave; | 1373 | current->thread.vrsave = vrsave; |
1385 | } | 1374 | } |
1386 | current->thread.used_vr = ext_bkp.used_vr; | 1375 | current->thread.used_vr = used_vr; |
1387 | #endif | 1376 | #endif |
1388 | 1377 | ||
1389 | #ifdef CONFIG_VSX | 1378 | #ifdef CONFIG_VSX |
1390 | current->thread.used_vsr = ext_bkp.used_vsr; | 1379 | current->thread.used_vsr = used_vsr; |
1391 | #endif | 1380 | #endif |
1392 | 1381 | ||
1393 | return ret; | 1382 | return ret; |
@@ -1395,12 +1384,22 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
1395 | 1384 | ||
1396 | static int kvmppc_book3s_init(void) | 1385 | static int kvmppc_book3s_init(void) |
1397 | { | 1386 | { |
1398 | return kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0, | 1387 | int r; |
1399 | THIS_MODULE); | 1388 | |
1389 | r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0, | ||
1390 | THIS_MODULE); | ||
1391 | |||
1392 | if (r) | ||
1393 | return r; | ||
1394 | |||
1395 | r = kvmppc_mmu_hpte_sysinit(); | ||
1396 | |||
1397 | return r; | ||
1400 | } | 1398 | } |
1401 | 1399 | ||
1402 | static void kvmppc_book3s_exit(void) | 1400 | static void kvmppc_book3s_exit(void) |
1403 | { | 1401 | { |
1402 | kvmppc_mmu_hpte_sysexit(); | ||
1404 | kvm_exit(); | 1403 | kvm_exit(); |
1405 | } | 1404 | } |
1406 | 1405 | ||
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index 0b10503c8a4a..3292d76101d2 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c | |||
@@ -354,10 +354,10 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid, | |||
354 | *vsid = VSID_REAL_DR | gvsid; | 354 | *vsid = VSID_REAL_DR | gvsid; |
355 | break; | 355 | break; |
356 | case MSR_DR|MSR_IR: | 356 | case MSR_DR|MSR_IR: |
357 | if (!sr->valid) | 357 | if (sr->valid) |
358 | return -1; | 358 | *vsid = sr->vsid; |
359 | 359 | else | |
360 | *vsid = sr->vsid; | 360 | *vsid = VSID_BAT | gvsid; |
361 | break; | 361 | break; |
362 | default: | 362 | default: |
363 | BUG(); | 363 | BUG(); |
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index 0bb66005338f..0b51ef872c1e 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c | |||
@@ -19,6 +19,7 @@ | |||
19 | */ | 19 | */ |
20 | 20 | ||
21 | #include <linux/kvm_host.h> | 21 | #include <linux/kvm_host.h> |
22 | #include <linux/hash.h> | ||
22 | 23 | ||
23 | #include <asm/kvm_ppc.h> | 24 | #include <asm/kvm_ppc.h> |
24 | #include <asm/kvm_book3s.h> | 25 | #include <asm/kvm_book3s.h> |
@@ -57,139 +58,26 @@ | |||
57 | static ulong htab; | 58 | static ulong htab; |
58 | static u32 htabmask; | 59 | static u32 htabmask; |
59 | 60 | ||
60 | static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) | 61 | void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) |
61 | { | 62 | { |
62 | volatile u32 *pteg; | 63 | volatile u32 *pteg; |
63 | 64 | ||
64 | dprintk_mmu("KVM: Flushing SPTE: 0x%llx (0x%llx) -> 0x%llx\n", | 65 | /* Remove from host HTAB */ |
65 | pte->pte.eaddr, pte->pte.vpage, pte->host_va); | ||
66 | |||
67 | pteg = (u32*)pte->slot; | 66 | pteg = (u32*)pte->slot; |
68 | |||
69 | pteg[0] = 0; | 67 | pteg[0] = 0; |
68 | |||
69 | /* And make sure it's gone from the TLB too */ | ||
70 | asm volatile ("sync"); | 70 | asm volatile ("sync"); |
71 | asm volatile ("tlbie %0" : : "r" (pte->pte.eaddr) : "memory"); | 71 | asm volatile ("tlbie %0" : : "r" (pte->pte.eaddr) : "memory"); |
72 | asm volatile ("sync"); | 72 | asm volatile ("sync"); |
73 | asm volatile ("tlbsync"); | 73 | asm volatile ("tlbsync"); |
74 | |||
75 | pte->host_va = 0; | ||
76 | |||
77 | if (pte->pte.may_write) | ||
78 | kvm_release_pfn_dirty(pte->pfn); | ||
79 | else | ||
80 | kvm_release_pfn_clean(pte->pfn); | ||
81 | } | ||
82 | |||
83 | void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) | ||
84 | { | ||
85 | int i; | ||
86 | |||
87 | dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%x & 0x%x\n", | ||
88 | vcpu->arch.hpte_cache_offset, guest_ea, ea_mask); | ||
89 | BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); | ||
90 | |||
91 | guest_ea &= ea_mask; | ||
92 | for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { | ||
93 | struct hpte_cache *pte; | ||
94 | |||
95 | pte = &vcpu->arch.hpte_cache[i]; | ||
96 | if (!pte->host_va) | ||
97 | continue; | ||
98 | |||
99 | if ((pte->pte.eaddr & ea_mask) == guest_ea) { | ||
100 | invalidate_pte(vcpu, pte); | ||
101 | } | ||
102 | } | ||
103 | |||
104 | /* Doing a complete flush -> start from scratch */ | ||
105 | if (!ea_mask) | ||
106 | vcpu->arch.hpte_cache_offset = 0; | ||
107 | } | ||
108 | |||
109 | void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask) | ||
110 | { | ||
111 | int i; | ||
112 | |||
113 | dprintk_mmu("KVM: Flushing %d Shadow vPTEs: 0x%llx & 0x%llx\n", | ||
114 | vcpu->arch.hpte_cache_offset, guest_vp, vp_mask); | ||
115 | BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); | ||
116 | |||
117 | guest_vp &= vp_mask; | ||
118 | for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { | ||
119 | struct hpte_cache *pte; | ||
120 | |||
121 | pte = &vcpu->arch.hpte_cache[i]; | ||
122 | if (!pte->host_va) | ||
123 | continue; | ||
124 | |||
125 | if ((pte->pte.vpage & vp_mask) == guest_vp) { | ||
126 | invalidate_pte(vcpu, pte); | ||
127 | } | ||
128 | } | ||
129 | } | ||
130 | |||
131 | void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) | ||
132 | { | ||
133 | int i; | ||
134 | |||
135 | dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%llx & 0x%llx\n", | ||
136 | vcpu->arch.hpte_cache_offset, pa_start, pa_end); | ||
137 | BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); | ||
138 | |||
139 | for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { | ||
140 | struct hpte_cache *pte; | ||
141 | |||
142 | pte = &vcpu->arch.hpte_cache[i]; | ||
143 | if (!pte->host_va) | ||
144 | continue; | ||
145 | |||
146 | if ((pte->pte.raddr >= pa_start) && | ||
147 | (pte->pte.raddr < pa_end)) { | ||
148 | invalidate_pte(vcpu, pte); | ||
149 | } | ||
150 | } | ||
151 | } | ||
152 | |||
153 | struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool data) | ||
154 | { | ||
155 | int i; | ||
156 | u64 guest_vp; | ||
157 | |||
158 | guest_vp = vcpu->arch.mmu.ea_to_vp(vcpu, ea, false); | ||
159 | for (i=0; i<vcpu->arch.hpte_cache_offset; i++) { | ||
160 | struct hpte_cache *pte; | ||
161 | |||
162 | pte = &vcpu->arch.hpte_cache[i]; | ||
163 | if (!pte->host_va) | ||
164 | continue; | ||
165 | |||
166 | if (pte->pte.vpage == guest_vp) | ||
167 | return &pte->pte; | ||
168 | } | ||
169 | |||
170 | return NULL; | ||
171 | } | ||
172 | |||
173 | static int kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu) | ||
174 | { | ||
175 | if (vcpu->arch.hpte_cache_offset == HPTEG_CACHE_NUM) | ||
176 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | ||
177 | |||
178 | return vcpu->arch.hpte_cache_offset++; | ||
179 | } | 74 | } |
180 | 75 | ||
181 | /* We keep 512 gvsid->hvsid entries, mapping the guest ones to the array using | 76 | /* We keep 512 gvsid->hvsid entries, mapping the guest ones to the array using |
182 | * a hash, so we don't waste cycles on looping */ | 77 | * a hash, so we don't waste cycles on looping */ |
183 | static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid) | 78 | static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid) |
184 | { | 79 | { |
185 | return (u16)(((gvsid >> (SID_MAP_BITS * 7)) & SID_MAP_MASK) ^ | 80 | return hash_64(gvsid, SID_MAP_BITS); |
186 | ((gvsid >> (SID_MAP_BITS * 6)) & SID_MAP_MASK) ^ | ||
187 | ((gvsid >> (SID_MAP_BITS * 5)) & SID_MAP_MASK) ^ | ||
188 | ((gvsid >> (SID_MAP_BITS * 4)) & SID_MAP_MASK) ^ | ||
189 | ((gvsid >> (SID_MAP_BITS * 3)) & SID_MAP_MASK) ^ | ||
190 | ((gvsid >> (SID_MAP_BITS * 2)) & SID_MAP_MASK) ^ | ||
191 | ((gvsid >> (SID_MAP_BITS * 1)) & SID_MAP_MASK) ^ | ||
192 | ((gvsid >> (SID_MAP_BITS * 0)) & SID_MAP_MASK)); | ||
193 | } | 81 | } |
194 | 82 | ||
195 | 83 | ||
@@ -256,7 +144,6 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) | |||
256 | register int rr = 0; | 144 | register int rr = 0; |
257 | bool primary = false; | 145 | bool primary = false; |
258 | bool evict = false; | 146 | bool evict = false; |
259 | int hpte_id; | ||
260 | struct hpte_cache *pte; | 147 | struct hpte_cache *pte; |
261 | 148 | ||
262 | /* Get host physical address for gpa */ | 149 | /* Get host physical address for gpa */ |
@@ -341,8 +228,7 @@ next_pteg: | |||
341 | 228 | ||
342 | /* Now tell our Shadow PTE code about the new page */ | 229 | /* Now tell our Shadow PTE code about the new page */ |
343 | 230 | ||
344 | hpte_id = kvmppc_mmu_hpte_cache_next(vcpu); | 231 | pte = kvmppc_mmu_hpte_cache_next(vcpu); |
345 | pte = &vcpu->arch.hpte_cache[hpte_id]; | ||
346 | 232 | ||
347 | dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%llx (0x%llx) -> %lx\n", | 233 | dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%llx (0x%llx) -> %lx\n", |
348 | orig_pte->may_write ? 'w' : '-', | 234 | orig_pte->may_write ? 'w' : '-', |
@@ -355,6 +241,8 @@ next_pteg: | |||
355 | pte->pte = *orig_pte; | 241 | pte->pte = *orig_pte; |
356 | pte->pfn = hpaddr >> PAGE_SHIFT; | 242 | pte->pfn = hpaddr >> PAGE_SHIFT; |
357 | 243 | ||
244 | kvmppc_mmu_hpte_cache_map(vcpu, pte); | ||
245 | |||
358 | return 0; | 246 | return 0; |
359 | } | 247 | } |
360 | 248 | ||
@@ -439,7 +327,7 @@ void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) | |||
439 | 327 | ||
440 | void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) | 328 | void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) |
441 | { | 329 | { |
442 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | 330 | kvmppc_mmu_hpte_destroy(vcpu); |
443 | preempt_disable(); | 331 | preempt_disable(); |
444 | __destroy_context(to_book3s(vcpu)->context_id); | 332 | __destroy_context(to_book3s(vcpu)->context_id); |
445 | preempt_enable(); | 333 | preempt_enable(); |
@@ -479,5 +367,7 @@ int kvmppc_mmu_init(struct kvm_vcpu *vcpu) | |||
479 | htabmask = ((sdr1 & 0x1FF) << 16) | 0xFFC0; | 367 | htabmask = ((sdr1 & 0x1FF) << 16) | 0xFFC0; |
480 | htab = (ulong)__va(sdr1 & 0xffff0000); | 368 | htab = (ulong)__va(sdr1 & 0xffff0000); |
481 | 369 | ||
370 | kvmppc_mmu_hpte_init(vcpu); | ||
371 | |||
482 | return 0; | 372 | return 0; |
483 | } | 373 | } |
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index e4b5744977f6..384179a5002b 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c | |||
@@ -20,6 +20,7 @@ | |||
20 | */ | 20 | */ |
21 | 21 | ||
22 | #include <linux/kvm_host.h> | 22 | #include <linux/kvm_host.h> |
23 | #include <linux/hash.h> | ||
23 | 24 | ||
24 | #include <asm/kvm_ppc.h> | 25 | #include <asm/kvm_ppc.h> |
25 | #include <asm/kvm_book3s.h> | 26 | #include <asm/kvm_book3s.h> |
@@ -46,135 +47,20 @@ | |||
46 | #define dprintk_slb(a, ...) do { } while(0) | 47 | #define dprintk_slb(a, ...) do { } while(0) |
47 | #endif | 48 | #endif |
48 | 49 | ||
49 | static void invalidate_pte(struct hpte_cache *pte) | 50 | void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) |
50 | { | 51 | { |
51 | dprintk_mmu("KVM: Flushing SPT: 0x%lx (0x%llx) -> 0x%llx\n", | ||
52 | pte->pte.eaddr, pte->pte.vpage, pte->host_va); | ||
53 | |||
54 | ppc_md.hpte_invalidate(pte->slot, pte->host_va, | 52 | ppc_md.hpte_invalidate(pte->slot, pte->host_va, |
55 | MMU_PAGE_4K, MMU_SEGSIZE_256M, | 53 | MMU_PAGE_4K, MMU_SEGSIZE_256M, |
56 | false); | 54 | false); |
57 | pte->host_va = 0; | ||
58 | |||
59 | if (pte->pte.may_write) | ||
60 | kvm_release_pfn_dirty(pte->pfn); | ||
61 | else | ||
62 | kvm_release_pfn_clean(pte->pfn); | ||
63 | } | ||
64 | |||
65 | void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) | ||
66 | { | ||
67 | int i; | ||
68 | |||
69 | dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%lx & 0x%lx\n", | ||
70 | vcpu->arch.hpte_cache_offset, guest_ea, ea_mask); | ||
71 | BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); | ||
72 | |||
73 | guest_ea &= ea_mask; | ||
74 | for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { | ||
75 | struct hpte_cache *pte; | ||
76 | |||
77 | pte = &vcpu->arch.hpte_cache[i]; | ||
78 | if (!pte->host_va) | ||
79 | continue; | ||
80 | |||
81 | if ((pte->pte.eaddr & ea_mask) == guest_ea) { | ||
82 | invalidate_pte(pte); | ||
83 | } | ||
84 | } | ||
85 | |||
86 | /* Doing a complete flush -> start from scratch */ | ||
87 | if (!ea_mask) | ||
88 | vcpu->arch.hpte_cache_offset = 0; | ||
89 | } | ||
90 | |||
91 | void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask) | ||
92 | { | ||
93 | int i; | ||
94 | |||
95 | dprintk_mmu("KVM: Flushing %d Shadow vPTEs: 0x%llx & 0x%llx\n", | ||
96 | vcpu->arch.hpte_cache_offset, guest_vp, vp_mask); | ||
97 | BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); | ||
98 | |||
99 | guest_vp &= vp_mask; | ||
100 | for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { | ||
101 | struct hpte_cache *pte; | ||
102 | |||
103 | pte = &vcpu->arch.hpte_cache[i]; | ||
104 | if (!pte->host_va) | ||
105 | continue; | ||
106 | |||
107 | if ((pte->pte.vpage & vp_mask) == guest_vp) { | ||
108 | invalidate_pte(pte); | ||
109 | } | ||
110 | } | ||
111 | } | ||
112 | |||
113 | void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) | ||
114 | { | ||
115 | int i; | ||
116 | |||
117 | dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%lx & 0x%lx\n", | ||
118 | vcpu->arch.hpte_cache_offset, pa_start, pa_end); | ||
119 | BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM); | ||
120 | |||
121 | for (i = 0; i < vcpu->arch.hpte_cache_offset; i++) { | ||
122 | struct hpte_cache *pte; | ||
123 | |||
124 | pte = &vcpu->arch.hpte_cache[i]; | ||
125 | if (!pte->host_va) | ||
126 | continue; | ||
127 | |||
128 | if ((pte->pte.raddr >= pa_start) && | ||
129 | (pte->pte.raddr < pa_end)) { | ||
130 | invalidate_pte(pte); | ||
131 | } | ||
132 | } | ||
133 | } | ||
134 | |||
135 | struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool data) | ||
136 | { | ||
137 | int i; | ||
138 | u64 guest_vp; | ||
139 | |||
140 | guest_vp = vcpu->arch.mmu.ea_to_vp(vcpu, ea, false); | ||
141 | for (i=0; i<vcpu->arch.hpte_cache_offset; i++) { | ||
142 | struct hpte_cache *pte; | ||
143 | |||
144 | pte = &vcpu->arch.hpte_cache[i]; | ||
145 | if (!pte->host_va) | ||
146 | continue; | ||
147 | |||
148 | if (pte->pte.vpage == guest_vp) | ||
149 | return &pte->pte; | ||
150 | } | ||
151 | |||
152 | return NULL; | ||
153 | } | ||
154 | |||
155 | static int kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu) | ||
156 | { | ||
157 | if (vcpu->arch.hpte_cache_offset == HPTEG_CACHE_NUM) | ||
158 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | ||
159 | |||
160 | return vcpu->arch.hpte_cache_offset++; | ||
161 | } | 55 | } |
162 | 56 | ||
163 | /* We keep 512 gvsid->hvsid entries, mapping the guest ones to the array using | 57 | /* We keep 512 gvsid->hvsid entries, mapping the guest ones to the array using |
164 | * a hash, so we don't waste cycles on looping */ | 58 | * a hash, so we don't waste cycles on looping */ |
165 | static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid) | 59 | static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid) |
166 | { | 60 | { |
167 | return (u16)(((gvsid >> (SID_MAP_BITS * 7)) & SID_MAP_MASK) ^ | 61 | return hash_64(gvsid, SID_MAP_BITS); |
168 | ((gvsid >> (SID_MAP_BITS * 6)) & SID_MAP_MASK) ^ | ||
169 | ((gvsid >> (SID_MAP_BITS * 5)) & SID_MAP_MASK) ^ | ||
170 | ((gvsid >> (SID_MAP_BITS * 4)) & SID_MAP_MASK) ^ | ||
171 | ((gvsid >> (SID_MAP_BITS * 3)) & SID_MAP_MASK) ^ | ||
172 | ((gvsid >> (SID_MAP_BITS * 2)) & SID_MAP_MASK) ^ | ||
173 | ((gvsid >> (SID_MAP_BITS * 1)) & SID_MAP_MASK) ^ | ||
174 | ((gvsid >> (SID_MAP_BITS * 0)) & SID_MAP_MASK)); | ||
175 | } | 62 | } |
176 | 63 | ||
177 | |||
178 | static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid) | 64 | static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid) |
179 | { | 65 | { |
180 | struct kvmppc_sid_map *map; | 66 | struct kvmppc_sid_map *map; |
@@ -273,8 +159,7 @@ map_again: | |||
273 | attempt++; | 159 | attempt++; |
274 | goto map_again; | 160 | goto map_again; |
275 | } else { | 161 | } else { |
276 | int hpte_id = kvmppc_mmu_hpte_cache_next(vcpu); | 162 | struct hpte_cache *pte = kvmppc_mmu_hpte_cache_next(vcpu); |
277 | struct hpte_cache *pte = &vcpu->arch.hpte_cache[hpte_id]; | ||
278 | 163 | ||
279 | dprintk_mmu("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx\n", | 164 | dprintk_mmu("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx\n", |
280 | ((rflags & HPTE_R_PP) == 3) ? '-' : 'w', | 165 | ((rflags & HPTE_R_PP) == 3) ? '-' : 'w', |
@@ -292,6 +177,8 @@ map_again: | |||
292 | pte->host_va = va; | 177 | pte->host_va = va; |
293 | pte->pte = *orig_pte; | 178 | pte->pte = *orig_pte; |
294 | pte->pfn = hpaddr >> PAGE_SHIFT; | 179 | pte->pfn = hpaddr >> PAGE_SHIFT; |
180 | |||
181 | kvmppc_mmu_hpte_cache_map(vcpu, pte); | ||
295 | } | 182 | } |
296 | 183 | ||
297 | return 0; | 184 | return 0; |
@@ -418,7 +305,7 @@ void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu) | |||
418 | 305 | ||
419 | void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) | 306 | void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) |
420 | { | 307 | { |
421 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | 308 | kvmppc_mmu_hpte_destroy(vcpu); |
422 | __destroy_context(to_book3s(vcpu)->context_id); | 309 | __destroy_context(to_book3s(vcpu)->context_id); |
423 | } | 310 | } |
424 | 311 | ||
@@ -436,5 +323,7 @@ int kvmppc_mmu_init(struct kvm_vcpu *vcpu) | |||
436 | vcpu3s->vsid_first = vcpu3s->context_id << USER_ESID_BITS; | 323 | vcpu3s->vsid_first = vcpu3s->context_id << USER_ESID_BITS; |
437 | vcpu3s->vsid_next = vcpu3s->vsid_first; | 324 | vcpu3s->vsid_next = vcpu3s->vsid_first; |
438 | 325 | ||
326 | kvmppc_mmu_hpte_init(vcpu); | ||
327 | |||
439 | return 0; | 328 | return 0; |
440 | } | 329 | } |
diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c new file mode 100644 index 000000000000..4868d4a7ebc5 --- /dev/null +++ b/arch/powerpc/kvm/book3s_mmu_hpte.c | |||
@@ -0,0 +1,277 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved. | ||
3 | * | ||
4 | * Authors: | ||
5 | * Alexander Graf <agraf@suse.de> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License, version 2, as | ||
9 | * published by the Free Software Foundation. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
19 | */ | ||
20 | |||
21 | #include <linux/kvm_host.h> | ||
22 | #include <linux/hash.h> | ||
23 | #include <linux/slab.h> | ||
24 | |||
25 | #include <asm/kvm_ppc.h> | ||
26 | #include <asm/kvm_book3s.h> | ||
27 | #include <asm/machdep.h> | ||
28 | #include <asm/mmu_context.h> | ||
29 | #include <asm/hw_irq.h> | ||
30 | |||
31 | #define PTE_SIZE 12 | ||
32 | |||
33 | /* #define DEBUG_MMU */ | ||
34 | |||
35 | #ifdef DEBUG_MMU | ||
36 | #define dprintk_mmu(a, ...) printk(KERN_INFO a, __VA_ARGS__) | ||
37 | #else | ||
38 | #define dprintk_mmu(a, ...) do { } while(0) | ||
39 | #endif | ||
40 | |||
41 | static struct kmem_cache *hpte_cache; | ||
42 | |||
43 | static inline u64 kvmppc_mmu_hash_pte(u64 eaddr) | ||
44 | { | ||
45 | return hash_64(eaddr >> PTE_SIZE, HPTEG_HASH_BITS_PTE); | ||
46 | } | ||
47 | |||
48 | static inline u64 kvmppc_mmu_hash_vpte(u64 vpage) | ||
49 | { | ||
50 | return hash_64(vpage & 0xfffffffffULL, HPTEG_HASH_BITS_VPTE); | ||
51 | } | ||
52 | |||
53 | static inline u64 kvmppc_mmu_hash_vpte_long(u64 vpage) | ||
54 | { | ||
55 | return hash_64((vpage & 0xffffff000ULL) >> 12, | ||
56 | HPTEG_HASH_BITS_VPTE_LONG); | ||
57 | } | ||
58 | |||
59 | void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte) | ||
60 | { | ||
61 | u64 index; | ||
62 | |||
63 | /* Add to ePTE list */ | ||
64 | index = kvmppc_mmu_hash_pte(pte->pte.eaddr); | ||
65 | hlist_add_head(&pte->list_pte, &vcpu->arch.hpte_hash_pte[index]); | ||
66 | |||
67 | /* Add to vPTE list */ | ||
68 | index = kvmppc_mmu_hash_vpte(pte->pte.vpage); | ||
69 | hlist_add_head(&pte->list_vpte, &vcpu->arch.hpte_hash_vpte[index]); | ||
70 | |||
71 | /* Add to vPTE_long list */ | ||
72 | index = kvmppc_mmu_hash_vpte_long(pte->pte.vpage); | ||
73 | hlist_add_head(&pte->list_vpte_long, | ||
74 | &vcpu->arch.hpte_hash_vpte_long[index]); | ||
75 | } | ||
76 | |||
77 | static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) | ||
78 | { | ||
79 | dprintk_mmu("KVM: Flushing SPT: 0x%lx (0x%llx) -> 0x%llx\n", | ||
80 | pte->pte.eaddr, pte->pte.vpage, pte->host_va); | ||
81 | |||
82 | /* Different for 32 and 64 bit */ | ||
83 | kvmppc_mmu_invalidate_pte(vcpu, pte); | ||
84 | |||
85 | if (pte->pte.may_write) | ||
86 | kvm_release_pfn_dirty(pte->pfn); | ||
87 | else | ||
88 | kvm_release_pfn_clean(pte->pfn); | ||
89 | |||
90 | hlist_del(&pte->list_pte); | ||
91 | hlist_del(&pte->list_vpte); | ||
92 | hlist_del(&pte->list_vpte_long); | ||
93 | |||
94 | vcpu->arch.hpte_cache_count--; | ||
95 | kmem_cache_free(hpte_cache, pte); | ||
96 | } | ||
97 | |||
98 | static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu) | ||
99 | { | ||
100 | struct hpte_cache *pte; | ||
101 | struct hlist_node *node, *tmp; | ||
102 | int i; | ||
103 | |||
104 | for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) { | ||
105 | struct hlist_head *list = &vcpu->arch.hpte_hash_vpte_long[i]; | ||
106 | |||
107 | hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte_long) | ||
108 | invalidate_pte(vcpu, pte); | ||
109 | } | ||
110 | } | ||
111 | |||
112 | static void kvmppc_mmu_pte_flush_page(struct kvm_vcpu *vcpu, ulong guest_ea) | ||
113 | { | ||
114 | struct hlist_head *list; | ||
115 | struct hlist_node *node, *tmp; | ||
116 | struct hpte_cache *pte; | ||
117 | |||
118 | /* Find the list of entries in the map */ | ||
119 | list = &vcpu->arch.hpte_hash_pte[kvmppc_mmu_hash_pte(guest_ea)]; | ||
120 | |||
121 | /* Check the list for matching entries and invalidate */ | ||
122 | hlist_for_each_entry_safe(pte, node, tmp, list, list_pte) | ||
123 | if ((pte->pte.eaddr & ~0xfffUL) == guest_ea) | ||
124 | invalidate_pte(vcpu, pte); | ||
125 | } | ||
126 | |||
127 | void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask) | ||
128 | { | ||
129 | u64 i; | ||
130 | |||
131 | dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%lx & 0x%lx\n", | ||
132 | vcpu->arch.hpte_cache_count, guest_ea, ea_mask); | ||
133 | |||
134 | guest_ea &= ea_mask; | ||
135 | |||
136 | switch (ea_mask) { | ||
137 | case ~0xfffUL: | ||
138 | kvmppc_mmu_pte_flush_page(vcpu, guest_ea); | ||
139 | break; | ||
140 | case 0x0ffff000: | ||
141 | /* 32-bit flush w/o segment, go through all possible segments */ | ||
142 | for (i = 0; i < 0x100000000ULL; i += 0x10000000ULL) | ||
143 | kvmppc_mmu_pte_flush(vcpu, guest_ea | i, ~0xfffUL); | ||
144 | break; | ||
145 | case 0: | ||
146 | /* Doing a complete flush -> start from scratch */ | ||
147 | kvmppc_mmu_pte_flush_all(vcpu); | ||
148 | break; | ||
149 | default: | ||
150 | WARN_ON(1); | ||
151 | break; | ||
152 | } | ||
153 | } | ||
154 | |||
155 | /* Flush with mask 0xfffffffff */ | ||
156 | static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp) | ||
157 | { | ||
158 | struct hlist_head *list; | ||
159 | struct hlist_node *node, *tmp; | ||
160 | struct hpte_cache *pte; | ||
161 | u64 vp_mask = 0xfffffffffULL; | ||
162 | |||
163 | list = &vcpu->arch.hpte_hash_vpte[kvmppc_mmu_hash_vpte(guest_vp)]; | ||
164 | |||
165 | /* Check the list for matching entries and invalidate */ | ||
166 | hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte) | ||
167 | if ((pte->pte.vpage & vp_mask) == guest_vp) | ||
168 | invalidate_pte(vcpu, pte); | ||
169 | } | ||
170 | |||
171 | /* Flush with mask 0xffffff000 */ | ||
172 | static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp) | ||
173 | { | ||
174 | struct hlist_head *list; | ||
175 | struct hlist_node *node, *tmp; | ||
176 | struct hpte_cache *pte; | ||
177 | u64 vp_mask = 0xffffff000ULL; | ||
178 | |||
179 | list = &vcpu->arch.hpte_hash_vpte_long[ | ||
180 | kvmppc_mmu_hash_vpte_long(guest_vp)]; | ||
181 | |||
182 | /* Check the list for matching entries and invalidate */ | ||
183 | hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte_long) | ||
184 | if ((pte->pte.vpage & vp_mask) == guest_vp) | ||
185 | invalidate_pte(vcpu, pte); | ||
186 | } | ||
187 | |||
188 | void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask) | ||
189 | { | ||
190 | dprintk_mmu("KVM: Flushing %d Shadow vPTEs: 0x%llx & 0x%llx\n", | ||
191 | vcpu->arch.hpte_cache_count, guest_vp, vp_mask); | ||
192 | guest_vp &= vp_mask; | ||
193 | |||
194 | switch(vp_mask) { | ||
195 | case 0xfffffffffULL: | ||
196 | kvmppc_mmu_pte_vflush_short(vcpu, guest_vp); | ||
197 | break; | ||
198 | case 0xffffff000ULL: | ||
199 | kvmppc_mmu_pte_vflush_long(vcpu, guest_vp); | ||
200 | break; | ||
201 | default: | ||
202 | WARN_ON(1); | ||
203 | return; | ||
204 | } | ||
205 | } | ||
206 | |||
207 | void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) | ||
208 | { | ||
209 | struct hlist_node *node, *tmp; | ||
210 | struct hpte_cache *pte; | ||
211 | int i; | ||
212 | |||
213 | dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%lx - 0x%lx\n", | ||
214 | vcpu->arch.hpte_cache_count, pa_start, pa_end); | ||
215 | |||
216 | for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) { | ||
217 | struct hlist_head *list = &vcpu->arch.hpte_hash_vpte_long[i]; | ||
218 | |||
219 | hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte_long) | ||
220 | if ((pte->pte.raddr >= pa_start) && | ||
221 | (pte->pte.raddr < pa_end)) | ||
222 | invalidate_pte(vcpu, pte); | ||
223 | } | ||
224 | } | ||
225 | |||
226 | struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu) | ||
227 | { | ||
228 | struct hpte_cache *pte; | ||
229 | |||
230 | pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL); | ||
231 | vcpu->arch.hpte_cache_count++; | ||
232 | |||
233 | if (vcpu->arch.hpte_cache_count == HPTEG_CACHE_NUM) | ||
234 | kvmppc_mmu_pte_flush_all(vcpu); | ||
235 | |||
236 | return pte; | ||
237 | } | ||
238 | |||
239 | void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu) | ||
240 | { | ||
241 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | ||
242 | } | ||
243 | |||
244 | static void kvmppc_mmu_hpte_init_hash(struct hlist_head *hash_list, int len) | ||
245 | { | ||
246 | int i; | ||
247 | |||
248 | for (i = 0; i < len; i++) | ||
249 | INIT_HLIST_HEAD(&hash_list[i]); | ||
250 | } | ||
251 | |||
252 | int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu) | ||
253 | { | ||
254 | /* init hpte lookup hashes */ | ||
255 | kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_pte, | ||
256 | ARRAY_SIZE(vcpu->arch.hpte_hash_pte)); | ||
257 | kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_vpte, | ||
258 | ARRAY_SIZE(vcpu->arch.hpte_hash_vpte)); | ||
259 | kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_vpte_long, | ||
260 | ARRAY_SIZE(vcpu->arch.hpte_hash_vpte_long)); | ||
261 | |||
262 | return 0; | ||
263 | } | ||
264 | |||
265 | int kvmppc_mmu_hpte_sysinit(void) | ||
266 | { | ||
267 | /* init hpte slab cache */ | ||
268 | hpte_cache = kmem_cache_create("kvm-spt", sizeof(struct hpte_cache), | ||
269 | sizeof(struct hpte_cache), 0, NULL); | ||
270 | |||
271 | return 0; | ||
272 | } | ||
273 | |||
274 | void kvmppc_mmu_hpte_sysexit(void) | ||
275 | { | ||
276 | kmem_cache_destroy(hpte_cache); | ||
277 | } | ||
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c index a9f66abafcb3..474f2e24050a 100644 --- a/arch/powerpc/kvm/book3s_paired_singles.c +++ b/arch/powerpc/kvm/book3s_paired_singles.c | |||
@@ -159,10 +159,7 @@ | |||
159 | 159 | ||
160 | static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt) | 160 | static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt) |
161 | { | 161 | { |
162 | struct thread_struct t; | 162 | kvm_cvt_df(&vcpu->arch.fpr[rt], &vcpu->arch.qpr[rt], &vcpu->arch.fpscr); |
163 | |||
164 | t.fpscr.val = vcpu->arch.fpscr; | ||
165 | cvt_df((double*)&vcpu->arch.fpr[rt], (float*)&vcpu->arch.qpr[rt], &t); | ||
166 | } | 163 | } |
167 | 164 | ||
168 | static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store) | 165 | static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store) |
@@ -183,7 +180,6 @@ static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
183 | int rs, ulong addr, int ls_type) | 180 | int rs, ulong addr, int ls_type) |
184 | { | 181 | { |
185 | int emulated = EMULATE_FAIL; | 182 | int emulated = EMULATE_FAIL; |
186 | struct thread_struct t; | ||
187 | int r; | 183 | int r; |
188 | char tmp[8]; | 184 | char tmp[8]; |
189 | int len = sizeof(u32); | 185 | int len = sizeof(u32); |
@@ -191,8 +187,6 @@ static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
191 | if (ls_type == FPU_LS_DOUBLE) | 187 | if (ls_type == FPU_LS_DOUBLE) |
192 | len = sizeof(u64); | 188 | len = sizeof(u64); |
193 | 189 | ||
194 | t.fpscr.val = vcpu->arch.fpscr; | ||
195 | |||
196 | /* read from memory */ | 190 | /* read from memory */ |
197 | r = kvmppc_ld(vcpu, &addr, len, tmp, true); | 191 | r = kvmppc_ld(vcpu, &addr, len, tmp, true); |
198 | vcpu->arch.paddr_accessed = addr; | 192 | vcpu->arch.paddr_accessed = addr; |
@@ -210,7 +204,7 @@ static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
210 | /* put in registers */ | 204 | /* put in registers */ |
211 | switch (ls_type) { | 205 | switch (ls_type) { |
212 | case FPU_LS_SINGLE: | 206 | case FPU_LS_SINGLE: |
213 | cvt_fd((float*)tmp, (double*)&vcpu->arch.fpr[rs], &t); | 207 | kvm_cvt_fd((u32*)tmp, &vcpu->arch.fpr[rs], &vcpu->arch.fpscr); |
214 | vcpu->arch.qpr[rs] = *((u32*)tmp); | 208 | vcpu->arch.qpr[rs] = *((u32*)tmp); |
215 | break; | 209 | break; |
216 | case FPU_LS_DOUBLE: | 210 | case FPU_LS_DOUBLE: |
@@ -229,17 +223,14 @@ static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
229 | int rs, ulong addr, int ls_type) | 223 | int rs, ulong addr, int ls_type) |
230 | { | 224 | { |
231 | int emulated = EMULATE_FAIL; | 225 | int emulated = EMULATE_FAIL; |
232 | struct thread_struct t; | ||
233 | int r; | 226 | int r; |
234 | char tmp[8]; | 227 | char tmp[8]; |
235 | u64 val; | 228 | u64 val; |
236 | int len; | 229 | int len; |
237 | 230 | ||
238 | t.fpscr.val = vcpu->arch.fpscr; | ||
239 | |||
240 | switch (ls_type) { | 231 | switch (ls_type) { |
241 | case FPU_LS_SINGLE: | 232 | case FPU_LS_SINGLE: |
242 | cvt_df((double*)&vcpu->arch.fpr[rs], (float*)tmp, &t); | 233 | kvm_cvt_df(&vcpu->arch.fpr[rs], (u32*)tmp, &vcpu->arch.fpscr); |
243 | val = *((u32*)tmp); | 234 | val = *((u32*)tmp); |
244 | len = sizeof(u32); | 235 | len = sizeof(u32); |
245 | break; | 236 | break; |
@@ -278,13 +269,10 @@ static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
278 | int rs, ulong addr, bool w, int i) | 269 | int rs, ulong addr, bool w, int i) |
279 | { | 270 | { |
280 | int emulated = EMULATE_FAIL; | 271 | int emulated = EMULATE_FAIL; |
281 | struct thread_struct t; | ||
282 | int r; | 272 | int r; |
283 | float one = 1.0; | 273 | float one = 1.0; |
284 | u32 tmp[2]; | 274 | u32 tmp[2]; |
285 | 275 | ||
286 | t.fpscr.val = vcpu->arch.fpscr; | ||
287 | |||
288 | /* read from memory */ | 276 | /* read from memory */ |
289 | if (w) { | 277 | if (w) { |
290 | r = kvmppc_ld(vcpu, &addr, sizeof(u32), tmp, true); | 278 | r = kvmppc_ld(vcpu, &addr, sizeof(u32), tmp, true); |
@@ -308,7 +296,7 @@ static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
308 | emulated = EMULATE_DONE; | 296 | emulated = EMULATE_DONE; |
309 | 297 | ||
310 | /* put in registers */ | 298 | /* put in registers */ |
311 | cvt_fd((float*)&tmp[0], (double*)&vcpu->arch.fpr[rs], &t); | 299 | kvm_cvt_fd(&tmp[0], &vcpu->arch.fpr[rs], &vcpu->arch.fpscr); |
312 | vcpu->arch.qpr[rs] = tmp[1]; | 300 | vcpu->arch.qpr[rs] = tmp[1]; |
313 | 301 | ||
314 | dprintk(KERN_INFO "KVM: PSQ_LD [0x%x, 0x%x] at 0x%lx (%d)\n", tmp[0], | 302 | dprintk(KERN_INFO "KVM: PSQ_LD [0x%x, 0x%x] at 0x%lx (%d)\n", tmp[0], |
@@ -322,14 +310,11 @@ static int kvmppc_emulate_psq_store(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
322 | int rs, ulong addr, bool w, int i) | 310 | int rs, ulong addr, bool w, int i) |
323 | { | 311 | { |
324 | int emulated = EMULATE_FAIL; | 312 | int emulated = EMULATE_FAIL; |
325 | struct thread_struct t; | ||
326 | int r; | 313 | int r; |
327 | u32 tmp[2]; | 314 | u32 tmp[2]; |
328 | int len = w ? sizeof(u32) : sizeof(u64); | 315 | int len = w ? sizeof(u32) : sizeof(u64); |
329 | 316 | ||
330 | t.fpscr.val = vcpu->arch.fpscr; | 317 | kvm_cvt_df(&vcpu->arch.fpr[rs], &tmp[0], &vcpu->arch.fpscr); |
331 | |||
332 | cvt_df((double*)&vcpu->arch.fpr[rs], (float*)&tmp[0], &t); | ||
333 | tmp[1] = vcpu->arch.qpr[rs]; | 318 | tmp[1] = vcpu->arch.qpr[rs]; |
334 | 319 | ||
335 | r = kvmppc_st(vcpu, &addr, len, tmp, true); | 320 | r = kvmppc_st(vcpu, &addr, len, tmp, true); |
@@ -517,7 +502,7 @@ static int get_d_signext(u32 inst) | |||
517 | static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc, | 502 | static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc, |
518 | int reg_out, int reg_in1, int reg_in2, | 503 | int reg_out, int reg_in1, int reg_in2, |
519 | int reg_in3, int scalar, | 504 | int reg_in3, int scalar, |
520 | void (*func)(struct thread_struct *t, | 505 | void (*func)(u64 *fpscr, |
521 | u32 *dst, u32 *src1, | 506 | u32 *dst, u32 *src1, |
522 | u32 *src2, u32 *src3)) | 507 | u32 *src2, u32 *src3)) |
523 | { | 508 | { |
@@ -526,27 +511,25 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc, | |||
526 | u32 ps0_out; | 511 | u32 ps0_out; |
527 | u32 ps0_in1, ps0_in2, ps0_in3; | 512 | u32 ps0_in1, ps0_in2, ps0_in3; |
528 | u32 ps1_in1, ps1_in2, ps1_in3; | 513 | u32 ps1_in1, ps1_in2, ps1_in3; |
529 | struct thread_struct t; | ||
530 | t.fpscr.val = vcpu->arch.fpscr; | ||
531 | 514 | ||
532 | /* RC */ | 515 | /* RC */ |
533 | WARN_ON(rc); | 516 | WARN_ON(rc); |
534 | 517 | ||
535 | /* PS0 */ | 518 | /* PS0 */ |
536 | cvt_df((double*)&fpr[reg_in1], (float*)&ps0_in1, &t); | 519 | kvm_cvt_df(&fpr[reg_in1], &ps0_in1, &vcpu->arch.fpscr); |
537 | cvt_df((double*)&fpr[reg_in2], (float*)&ps0_in2, &t); | 520 | kvm_cvt_df(&fpr[reg_in2], &ps0_in2, &vcpu->arch.fpscr); |
538 | cvt_df((double*)&fpr[reg_in3], (float*)&ps0_in3, &t); | 521 | kvm_cvt_df(&fpr[reg_in3], &ps0_in3, &vcpu->arch.fpscr); |
539 | 522 | ||
540 | if (scalar & SCALAR_LOW) | 523 | if (scalar & SCALAR_LOW) |
541 | ps0_in2 = qpr[reg_in2]; | 524 | ps0_in2 = qpr[reg_in2]; |
542 | 525 | ||
543 | func(&t, &ps0_out, &ps0_in1, &ps0_in2, &ps0_in3); | 526 | func(&vcpu->arch.fpscr, &ps0_out, &ps0_in1, &ps0_in2, &ps0_in3); |
544 | 527 | ||
545 | dprintk(KERN_INFO "PS3 ps0 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n", | 528 | dprintk(KERN_INFO "PS3 ps0 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n", |
546 | ps0_in1, ps0_in2, ps0_in3, ps0_out); | 529 | ps0_in1, ps0_in2, ps0_in3, ps0_out); |
547 | 530 | ||
548 | if (!(scalar & SCALAR_NO_PS0)) | 531 | if (!(scalar & SCALAR_NO_PS0)) |
549 | cvt_fd((float*)&ps0_out, (double*)&fpr[reg_out], &t); | 532 | kvm_cvt_fd(&ps0_out, &fpr[reg_out], &vcpu->arch.fpscr); |
550 | 533 | ||
551 | /* PS1 */ | 534 | /* PS1 */ |
552 | ps1_in1 = qpr[reg_in1]; | 535 | ps1_in1 = qpr[reg_in1]; |
@@ -557,7 +540,7 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc, | |||
557 | ps1_in2 = ps0_in2; | 540 | ps1_in2 = ps0_in2; |
558 | 541 | ||
559 | if (!(scalar & SCALAR_NO_PS1)) | 542 | if (!(scalar & SCALAR_NO_PS1)) |
560 | func(&t, &qpr[reg_out], &ps1_in1, &ps1_in2, &ps1_in3); | 543 | func(&vcpu->arch.fpscr, &qpr[reg_out], &ps1_in1, &ps1_in2, &ps1_in3); |
561 | 544 | ||
562 | dprintk(KERN_INFO "PS3 ps1 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n", | 545 | dprintk(KERN_INFO "PS3 ps1 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n", |
563 | ps1_in1, ps1_in2, ps1_in3, qpr[reg_out]); | 546 | ps1_in1, ps1_in2, ps1_in3, qpr[reg_out]); |
@@ -568,7 +551,7 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc, | |||
568 | static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc, | 551 | static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc, |
569 | int reg_out, int reg_in1, int reg_in2, | 552 | int reg_out, int reg_in1, int reg_in2, |
570 | int scalar, | 553 | int scalar, |
571 | void (*func)(struct thread_struct *t, | 554 | void (*func)(u64 *fpscr, |
572 | u32 *dst, u32 *src1, | 555 | u32 *dst, u32 *src1, |
573 | u32 *src2)) | 556 | u32 *src2)) |
574 | { | 557 | { |
@@ -578,27 +561,25 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc, | |||
578 | u32 ps0_in1, ps0_in2; | 561 | u32 ps0_in1, ps0_in2; |
579 | u32 ps1_out; | 562 | u32 ps1_out; |
580 | u32 ps1_in1, ps1_in2; | 563 | u32 ps1_in1, ps1_in2; |
581 | struct thread_struct t; | ||
582 | t.fpscr.val = vcpu->arch.fpscr; | ||
583 | 564 | ||
584 | /* RC */ | 565 | /* RC */ |
585 | WARN_ON(rc); | 566 | WARN_ON(rc); |
586 | 567 | ||
587 | /* PS0 */ | 568 | /* PS0 */ |
588 | cvt_df((double*)&fpr[reg_in1], (float*)&ps0_in1, &t); | 569 | kvm_cvt_df(&fpr[reg_in1], &ps0_in1, &vcpu->arch.fpscr); |
589 | 570 | ||
590 | if (scalar & SCALAR_LOW) | 571 | if (scalar & SCALAR_LOW) |
591 | ps0_in2 = qpr[reg_in2]; | 572 | ps0_in2 = qpr[reg_in2]; |
592 | else | 573 | else |
593 | cvt_df((double*)&fpr[reg_in2], (float*)&ps0_in2, &t); | 574 | kvm_cvt_df(&fpr[reg_in2], &ps0_in2, &vcpu->arch.fpscr); |
594 | 575 | ||
595 | func(&t, &ps0_out, &ps0_in1, &ps0_in2); | 576 | func(&vcpu->arch.fpscr, &ps0_out, &ps0_in1, &ps0_in2); |
596 | 577 | ||
597 | if (!(scalar & SCALAR_NO_PS0)) { | 578 | if (!(scalar & SCALAR_NO_PS0)) { |
598 | dprintk(KERN_INFO "PS2 ps0 -> f(0x%x, 0x%x) = 0x%x\n", | 579 | dprintk(KERN_INFO "PS2 ps0 -> f(0x%x, 0x%x) = 0x%x\n", |
599 | ps0_in1, ps0_in2, ps0_out); | 580 | ps0_in1, ps0_in2, ps0_out); |
600 | 581 | ||
601 | cvt_fd((float*)&ps0_out, (double*)&fpr[reg_out], &t); | 582 | kvm_cvt_fd(&ps0_out, &fpr[reg_out], &vcpu->arch.fpscr); |
602 | } | 583 | } |
603 | 584 | ||
604 | /* PS1 */ | 585 | /* PS1 */ |
@@ -608,7 +589,7 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc, | |||
608 | if (scalar & SCALAR_HIGH) | 589 | if (scalar & SCALAR_HIGH) |
609 | ps1_in2 = ps0_in2; | 590 | ps1_in2 = ps0_in2; |
610 | 591 | ||
611 | func(&t, &ps1_out, &ps1_in1, &ps1_in2); | 592 | func(&vcpu->arch.fpscr, &ps1_out, &ps1_in1, &ps1_in2); |
612 | 593 | ||
613 | if (!(scalar & SCALAR_NO_PS1)) { | 594 | if (!(scalar & SCALAR_NO_PS1)) { |
614 | qpr[reg_out] = ps1_out; | 595 | qpr[reg_out] = ps1_out; |
@@ -622,31 +603,29 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc, | |||
622 | 603 | ||
623 | static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc, | 604 | static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc, |
624 | int reg_out, int reg_in, | 605 | int reg_out, int reg_in, |
625 | void (*func)(struct thread_struct *t, | 606 | void (*func)(u64 *t, |
626 | u32 *dst, u32 *src1)) | 607 | u32 *dst, u32 *src1)) |
627 | { | 608 | { |
628 | u32 *qpr = vcpu->arch.qpr; | 609 | u32 *qpr = vcpu->arch.qpr; |
629 | u64 *fpr = vcpu->arch.fpr; | 610 | u64 *fpr = vcpu->arch.fpr; |
630 | u32 ps0_out, ps0_in; | 611 | u32 ps0_out, ps0_in; |
631 | u32 ps1_in; | 612 | u32 ps1_in; |
632 | struct thread_struct t; | ||
633 | t.fpscr.val = vcpu->arch.fpscr; | ||
634 | 613 | ||
635 | /* RC */ | 614 | /* RC */ |
636 | WARN_ON(rc); | 615 | WARN_ON(rc); |
637 | 616 | ||
638 | /* PS0 */ | 617 | /* PS0 */ |
639 | cvt_df((double*)&fpr[reg_in], (float*)&ps0_in, &t); | 618 | kvm_cvt_df(&fpr[reg_in], &ps0_in, &vcpu->arch.fpscr); |
640 | func(&t, &ps0_out, &ps0_in); | 619 | func(&vcpu->arch.fpscr, &ps0_out, &ps0_in); |
641 | 620 | ||
642 | dprintk(KERN_INFO "PS1 ps0 -> f(0x%x) = 0x%x\n", | 621 | dprintk(KERN_INFO "PS1 ps0 -> f(0x%x) = 0x%x\n", |
643 | ps0_in, ps0_out); | 622 | ps0_in, ps0_out); |
644 | 623 | ||
645 | cvt_fd((float*)&ps0_out, (double*)&fpr[reg_out], &t); | 624 | kvm_cvt_fd(&ps0_out, &fpr[reg_out], &vcpu->arch.fpscr); |
646 | 625 | ||
647 | /* PS1 */ | 626 | /* PS1 */ |
648 | ps1_in = qpr[reg_in]; | 627 | ps1_in = qpr[reg_in]; |
649 | func(&t, &qpr[reg_out], &ps1_in); | 628 | func(&vcpu->arch.fpscr, &qpr[reg_out], &ps1_in); |
650 | 629 | ||
651 | dprintk(KERN_INFO "PS1 ps1 -> f(0x%x) = 0x%x\n", | 630 | dprintk(KERN_INFO "PS1 ps1 -> f(0x%x) = 0x%x\n", |
652 | ps1_in, qpr[reg_out]); | 631 | ps1_in, qpr[reg_out]); |
@@ -672,13 +651,10 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
672 | 651 | ||
673 | bool rcomp = (inst & 1) ? true : false; | 652 | bool rcomp = (inst & 1) ? true : false; |
674 | u32 cr = kvmppc_get_cr(vcpu); | 653 | u32 cr = kvmppc_get_cr(vcpu); |
675 | struct thread_struct t; | ||
676 | #ifdef DEBUG | 654 | #ifdef DEBUG |
677 | int i; | 655 | int i; |
678 | #endif | 656 | #endif |
679 | 657 | ||
680 | t.fpscr.val = vcpu->arch.fpscr; | ||
681 | |||
682 | if (!kvmppc_inst_is_paired_single(vcpu, inst)) | 658 | if (!kvmppc_inst_is_paired_single(vcpu, inst)) |
683 | return EMULATE_FAIL; | 659 | return EMULATE_FAIL; |
684 | 660 | ||
@@ -695,7 +671,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
695 | #ifdef DEBUG | 671 | #ifdef DEBUG |
696 | for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { | 672 | for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { |
697 | u32 f; | 673 | u32 f; |
698 | cvt_df((double*)&vcpu->arch.fpr[i], (float*)&f, &t); | 674 | kvm_cvt_df(&vcpu->arch.fpr[i], &f, &vcpu->arch.fpscr); |
699 | dprintk(KERN_INFO "FPR[%d] = 0x%x / 0x%llx QPR[%d] = 0x%x\n", | 675 | dprintk(KERN_INFO "FPR[%d] = 0x%x / 0x%llx QPR[%d] = 0x%x\n", |
700 | i, f, vcpu->arch.fpr[i], i, vcpu->arch.qpr[i]); | 676 | i, f, vcpu->arch.fpr[i], i, vcpu->arch.qpr[i]); |
701 | } | 677 | } |
@@ -819,8 +795,9 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
819 | WARN_ON(rcomp); | 795 | WARN_ON(rcomp); |
820 | vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra]; | 796 | vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra]; |
821 | /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ | 797 | /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ |
822 | cvt_df((double*)&vcpu->arch.fpr[ax_rb], | 798 | kvm_cvt_df(&vcpu->arch.fpr[ax_rb], |
823 | (float*)&vcpu->arch.qpr[ax_rd], &t); | 799 | &vcpu->arch.qpr[ax_rd], |
800 | &vcpu->arch.fpscr); | ||
824 | break; | 801 | break; |
825 | case OP_4X_PS_MERGE01: | 802 | case OP_4X_PS_MERGE01: |
826 | WARN_ON(rcomp); | 803 | WARN_ON(rcomp); |
@@ -830,17 +807,20 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
830 | case OP_4X_PS_MERGE10: | 807 | case OP_4X_PS_MERGE10: |
831 | WARN_ON(rcomp); | 808 | WARN_ON(rcomp); |
832 | /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ | 809 | /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ |
833 | cvt_fd((float*)&vcpu->arch.qpr[ax_ra], | 810 | kvm_cvt_fd(&vcpu->arch.qpr[ax_ra], |
834 | (double*)&vcpu->arch.fpr[ax_rd], &t); | 811 | &vcpu->arch.fpr[ax_rd], |
812 | &vcpu->arch.fpscr); | ||
835 | /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ | 813 | /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ |
836 | cvt_df((double*)&vcpu->arch.fpr[ax_rb], | 814 | kvm_cvt_df(&vcpu->arch.fpr[ax_rb], |
837 | (float*)&vcpu->arch.qpr[ax_rd], &t); | 815 | &vcpu->arch.qpr[ax_rd], |
816 | &vcpu->arch.fpscr); | ||
838 | break; | 817 | break; |
839 | case OP_4X_PS_MERGE11: | 818 | case OP_4X_PS_MERGE11: |
840 | WARN_ON(rcomp); | 819 | WARN_ON(rcomp); |
841 | /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ | 820 | /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ |
842 | cvt_fd((float*)&vcpu->arch.qpr[ax_ra], | 821 | kvm_cvt_fd(&vcpu->arch.qpr[ax_ra], |
843 | (double*)&vcpu->arch.fpr[ax_rd], &t); | 822 | &vcpu->arch.fpr[ax_rd], |
823 | &vcpu->arch.fpscr); | ||
844 | vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; | 824 | vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; |
845 | break; | 825 | break; |
846 | } | 826 | } |
@@ -1275,7 +1255,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
1275 | #ifdef DEBUG | 1255 | #ifdef DEBUG |
1276 | for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { | 1256 | for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { |
1277 | u32 f; | 1257 | u32 f; |
1278 | cvt_df((double*)&vcpu->arch.fpr[i], (float*)&f, &t); | 1258 | kvm_cvt_df(&vcpu->arch.fpr[i], &f, &vcpu->arch.fpscr); |
1279 | dprintk(KERN_INFO "FPR[%d] = 0x%x\n", i, f); | 1259 | dprintk(KERN_INFO "FPR[%d] = 0x%x\n", i, f); |
1280 | } | 1260 | } |
1281 | #endif | 1261 | #endif |
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index a33ab8cc2ccc..8d4e35f5372c 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c | |||
@@ -144,7 +144,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, | |||
144 | unsigned int priority) | 144 | unsigned int priority) |
145 | { | 145 | { |
146 | int allowed = 0; | 146 | int allowed = 0; |
147 | ulong msr_mask; | 147 | ulong uninitialized_var(msr_mask); |
148 | bool update_esr = false, update_dear = false; | 148 | bool update_esr = false, update_dear = false; |
149 | 149 | ||
150 | switch (priority) { | 150 | switch (priority) { |
@@ -485,8 +485,6 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
485 | { | 485 | { |
486 | int i; | 486 | int i; |
487 | 487 | ||
488 | vcpu_load(vcpu); | ||
489 | |||
490 | regs->pc = vcpu->arch.pc; | 488 | regs->pc = vcpu->arch.pc; |
491 | regs->cr = kvmppc_get_cr(vcpu); | 489 | regs->cr = kvmppc_get_cr(vcpu); |
492 | regs->ctr = vcpu->arch.ctr; | 490 | regs->ctr = vcpu->arch.ctr; |
@@ -507,8 +505,6 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
507 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) | 505 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) |
508 | regs->gpr[i] = kvmppc_get_gpr(vcpu, i); | 506 | regs->gpr[i] = kvmppc_get_gpr(vcpu, i); |
509 | 507 | ||
510 | vcpu_put(vcpu); | ||
511 | |||
512 | return 0; | 508 | return 0; |
513 | } | 509 | } |
514 | 510 | ||
@@ -516,8 +512,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
516 | { | 512 | { |
517 | int i; | 513 | int i; |
518 | 514 | ||
519 | vcpu_load(vcpu); | ||
520 | |||
521 | vcpu->arch.pc = regs->pc; | 515 | vcpu->arch.pc = regs->pc; |
522 | kvmppc_set_cr(vcpu, regs->cr); | 516 | kvmppc_set_cr(vcpu, regs->cr); |
523 | vcpu->arch.ctr = regs->ctr; | 517 | vcpu->arch.ctr = regs->ctr; |
@@ -537,8 +531,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
537 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) | 531 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) |
538 | kvmppc_set_gpr(vcpu, i, regs->gpr[i]); | 532 | kvmppc_set_gpr(vcpu, i, regs->gpr[i]); |
539 | 533 | ||
540 | vcpu_put(vcpu); | ||
541 | |||
542 | return 0; | 534 | return 0; |
543 | } | 535 | } |
544 | 536 | ||
@@ -569,9 +561,7 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | |||
569 | { | 561 | { |
570 | int r; | 562 | int r; |
571 | 563 | ||
572 | vcpu_load(vcpu); | ||
573 | r = kvmppc_core_vcpu_translate(vcpu, tr); | 564 | r = kvmppc_core_vcpu_translate(vcpu, tr); |
574 | vcpu_put(vcpu); | ||
575 | return r; | 565 | return r; |
576 | } | 566 | } |
577 | 567 | ||
diff --git a/arch/powerpc/kvm/fpu.S b/arch/powerpc/kvm/fpu.S index 2b340a3eee90..cb34bbe16113 100644 --- a/arch/powerpc/kvm/fpu.S +++ b/arch/powerpc/kvm/fpu.S | |||
@@ -271,3 +271,21 @@ FPD_THREE_IN(fmsub) | |||
271 | FPD_THREE_IN(fmadd) | 271 | FPD_THREE_IN(fmadd) |
272 | FPD_THREE_IN(fnmsub) | 272 | FPD_THREE_IN(fnmsub) |
273 | FPD_THREE_IN(fnmadd) | 273 | FPD_THREE_IN(fnmadd) |
274 | |||
275 | _GLOBAL(kvm_cvt_fd) | ||
276 | lfd 0,0(r5) /* load up fpscr value */ | ||
277 | MTFSF_L(0) | ||
278 | lfs 0,0(r3) | ||
279 | stfd 0,0(r4) | ||
280 | mffs 0 | ||
281 | stfd 0,0(r5) /* save new fpscr value */ | ||
282 | blr | ||
283 | |||
284 | _GLOBAL(kvm_cvt_df) | ||
285 | lfd 0,0(r5) /* load up fpscr value */ | ||
286 | MTFSF_L(0) | ||
287 | lfd 0,0(r3) | ||
288 | stfs 0,0(r4) | ||
289 | mffs 0 | ||
290 | stfd 0,0(r5) /* save new fpscr value */ | ||
291 | blr | ||
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 9b8683f39e05..72a4ad86ee91 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
@@ -36,11 +36,6 @@ | |||
36 | #define CREATE_TRACE_POINTS | 36 | #define CREATE_TRACE_POINTS |
37 | #include "trace.h" | 37 | #include "trace.h" |
38 | 38 | ||
39 | gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) | ||
40 | { | ||
41 | return gfn; | ||
42 | } | ||
43 | |||
44 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) | 39 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) |
45 | { | 40 | { |
46 | return !(v->arch.msr & MSR_WE) || !!(v->arch.pending_exceptions); | 41 | return !(v->arch.msr & MSR_WE) || !!(v->arch.pending_exceptions); |
@@ -287,7 +282,7 @@ static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, | |||
287 | static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, | 282 | static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, |
288 | struct kvm_run *run) | 283 | struct kvm_run *run) |
289 | { | 284 | { |
290 | u64 gpr; | 285 | u64 uninitialized_var(gpr); |
291 | 286 | ||
292 | if (run->mmio.len > sizeof(gpr)) { | 287 | if (run->mmio.len > sizeof(gpr)) { |
293 | printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); | 288 | printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); |
@@ -423,8 +418,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
423 | int r; | 418 | int r; |
424 | sigset_t sigsaved; | 419 | sigset_t sigsaved; |
425 | 420 | ||
426 | vcpu_load(vcpu); | ||
427 | |||
428 | if (vcpu->sigset_active) | 421 | if (vcpu->sigset_active) |
429 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); | 422 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); |
430 | 423 | ||
@@ -456,8 +449,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
456 | if (vcpu->sigset_active) | 449 | if (vcpu->sigset_active) |
457 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 450 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
458 | 451 | ||
459 | vcpu_put(vcpu); | ||
460 | |||
461 | return r; | 452 | return r; |
462 | } | 453 | } |
463 | 454 | ||
@@ -523,8 +514,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
523 | if (copy_from_user(&irq, argp, sizeof(irq))) | 514 | if (copy_from_user(&irq, argp, sizeof(irq))) |
524 | goto out; | 515 | goto out; |
525 | r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); | 516 | r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); |
526 | break; | 517 | goto out; |
527 | } | 518 | } |
519 | |||
528 | case KVM_ENABLE_CAP: | 520 | case KVM_ENABLE_CAP: |
529 | { | 521 | { |
530 | struct kvm_enable_cap cap; | 522 | struct kvm_enable_cap cap; |
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 27605b62b980..cef7dbf69dfc 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h | |||
@@ -26,7 +26,7 @@ | |||
26 | 26 | ||
27 | struct sca_entry { | 27 | struct sca_entry { |
28 | atomic_t scn; | 28 | atomic_t scn; |
29 | __u64 reserved; | 29 | __u32 reserved; |
30 | __u64 sda; | 30 | __u64 sda; |
31 | __u64 reserved2[2]; | 31 | __u64 reserved2[2]; |
32 | } __attribute__((packed)); | 32 | } __attribute__((packed)); |
@@ -41,7 +41,8 @@ struct sca_block { | |||
41 | } __attribute__((packed)); | 41 | } __attribute__((packed)); |
42 | 42 | ||
43 | #define KVM_NR_PAGE_SIZES 2 | 43 | #define KVM_NR_PAGE_SIZES 2 |
44 | #define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + ((x) - 1) * 8) | 44 | #define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 8) |
45 | #define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x)) | ||
45 | #define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) | 46 | #define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) |
46 | #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) | 47 | #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) |
47 | #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) | 48 | #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) |
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 3ddc30895e31..f7b6df45d8be 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c | |||
@@ -135,7 +135,7 @@ static int handle_stop(struct kvm_vcpu *vcpu) | |||
135 | spin_lock_bh(&vcpu->arch.local_int.lock); | 135 | spin_lock_bh(&vcpu->arch.local_int.lock); |
136 | if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) { | 136 | if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) { |
137 | vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP; | 137 | vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP; |
138 | rc = __kvm_s390_vcpu_store_status(vcpu, | 138 | rc = kvm_s390_vcpu_store_status(vcpu, |
139 | KVM_S390_STORE_STATUS_NOADDR); | 139 | KVM_S390_STORE_STATUS_NOADDR); |
140 | if (rc >= 0) | 140 | if (rc >= 0) |
141 | rc = -EOPNOTSUPP; | 141 | rc = -EOPNOTSUPP; |
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ae3705816878..4fe68650535c 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
@@ -207,6 +207,7 @@ out_nokvm: | |||
207 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) | 207 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) |
208 | { | 208 | { |
209 | VCPU_EVENT(vcpu, 3, "%s", "free cpu"); | 209 | VCPU_EVENT(vcpu, 3, "%s", "free cpu"); |
210 | clear_bit(63 - vcpu->vcpu_id, (unsigned long *) &vcpu->kvm->arch.sca->mcn); | ||
210 | if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda == | 211 | if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda == |
211 | (__u64) vcpu->arch.sie_block) | 212 | (__u64) vcpu->arch.sie_block) |
212 | vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0; | 213 | vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0; |
@@ -296,7 +297,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
296 | { | 297 | { |
297 | atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH); | 298 | atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH); |
298 | set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests); | 299 | set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests); |
299 | vcpu->arch.sie_block->ecb = 2; | 300 | vcpu->arch.sie_block->ecb = 6; |
300 | vcpu->arch.sie_block->eca = 0xC1002001U; | 301 | vcpu->arch.sie_block->eca = 0xC1002001U; |
301 | vcpu->arch.sie_block->fac = (int) (long) facilities; | 302 | vcpu->arch.sie_block->fac = (int) (long) facilities; |
302 | hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); | 303 | hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); |
@@ -329,6 +330,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, | |||
329 | kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; | 330 | kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; |
330 | vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32); | 331 | vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32); |
331 | vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; | 332 | vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; |
333 | set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn); | ||
332 | 334 | ||
333 | spin_lock_init(&vcpu->arch.local_int.lock); | 335 | spin_lock_init(&vcpu->arch.local_int.lock); |
334 | INIT_LIST_HEAD(&vcpu->arch.local_int.list); | 336 | INIT_LIST_HEAD(&vcpu->arch.local_int.list); |
@@ -363,63 +365,49 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | |||
363 | 365 | ||
364 | static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) | 366 | static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) |
365 | { | 367 | { |
366 | vcpu_load(vcpu); | ||
367 | kvm_s390_vcpu_initial_reset(vcpu); | 368 | kvm_s390_vcpu_initial_reset(vcpu); |
368 | vcpu_put(vcpu); | ||
369 | return 0; | 369 | return 0; |
370 | } | 370 | } |
371 | 371 | ||
372 | int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | 372 | int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) |
373 | { | 373 | { |
374 | vcpu_load(vcpu); | ||
375 | memcpy(&vcpu->arch.guest_gprs, ®s->gprs, sizeof(regs->gprs)); | 374 | memcpy(&vcpu->arch.guest_gprs, ®s->gprs, sizeof(regs->gprs)); |
376 | vcpu_put(vcpu); | ||
377 | return 0; | 375 | return 0; |
378 | } | 376 | } |
379 | 377 | ||
380 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | 378 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) |
381 | { | 379 | { |
382 | vcpu_load(vcpu); | ||
383 | memcpy(®s->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs)); | 380 | memcpy(®s->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs)); |
384 | vcpu_put(vcpu); | ||
385 | return 0; | 381 | return 0; |
386 | } | 382 | } |
387 | 383 | ||
388 | int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | 384 | int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, |
389 | struct kvm_sregs *sregs) | 385 | struct kvm_sregs *sregs) |
390 | { | 386 | { |
391 | vcpu_load(vcpu); | ||
392 | memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs)); | 387 | memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs)); |
393 | memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); | 388 | memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); |
394 | vcpu_put(vcpu); | ||
395 | return 0; | 389 | return 0; |
396 | } | 390 | } |
397 | 391 | ||
398 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | 392 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, |
399 | struct kvm_sregs *sregs) | 393 | struct kvm_sregs *sregs) |
400 | { | 394 | { |
401 | vcpu_load(vcpu); | ||
402 | memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs)); | 395 | memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs)); |
403 | memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); | 396 | memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); |
404 | vcpu_put(vcpu); | ||
405 | return 0; | 397 | return 0; |
406 | } | 398 | } |
407 | 399 | ||
408 | int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | 400 | int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) |
409 | { | 401 | { |
410 | vcpu_load(vcpu); | ||
411 | memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); | 402 | memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); |
412 | vcpu->arch.guest_fpregs.fpc = fpu->fpc; | 403 | vcpu->arch.guest_fpregs.fpc = fpu->fpc; |
413 | vcpu_put(vcpu); | ||
414 | return 0; | 404 | return 0; |
415 | } | 405 | } |
416 | 406 | ||
417 | int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | 407 | int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) |
418 | { | 408 | { |
419 | vcpu_load(vcpu); | ||
420 | memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs)); | 409 | memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs)); |
421 | fpu->fpc = vcpu->arch.guest_fpregs.fpc; | 410 | fpu->fpc = vcpu->arch.guest_fpregs.fpc; |
422 | vcpu_put(vcpu); | ||
423 | return 0; | 411 | return 0; |
424 | } | 412 | } |
425 | 413 | ||
@@ -427,14 +415,12 @@ static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) | |||
427 | { | 415 | { |
428 | int rc = 0; | 416 | int rc = 0; |
429 | 417 | ||
430 | vcpu_load(vcpu); | ||
431 | if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING) | 418 | if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING) |
432 | rc = -EBUSY; | 419 | rc = -EBUSY; |
433 | else { | 420 | else { |
434 | vcpu->run->psw_mask = psw.mask; | 421 | vcpu->run->psw_mask = psw.mask; |
435 | vcpu->run->psw_addr = psw.addr; | 422 | vcpu->run->psw_addr = psw.addr; |
436 | } | 423 | } |
437 | vcpu_put(vcpu); | ||
438 | return rc; | 424 | return rc; |
439 | } | 425 | } |
440 | 426 | ||
@@ -498,8 +484,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
498 | int rc; | 484 | int rc; |
499 | sigset_t sigsaved; | 485 | sigset_t sigsaved; |
500 | 486 | ||
501 | vcpu_load(vcpu); | ||
502 | |||
503 | rerun_vcpu: | 487 | rerun_vcpu: |
504 | if (vcpu->requests) | 488 | if (vcpu->requests) |
505 | if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) | 489 | if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) |
@@ -568,8 +552,6 @@ rerun_vcpu: | |||
568 | if (vcpu->sigset_active) | 552 | if (vcpu->sigset_active) |
569 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 553 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
570 | 554 | ||
571 | vcpu_put(vcpu); | ||
572 | |||
573 | vcpu->stat.exit_userspace++; | 555 | vcpu->stat.exit_userspace++; |
574 | return rc; | 556 | return rc; |
575 | } | 557 | } |
@@ -589,7 +571,7 @@ static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from, | |||
589 | * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit | 571 | * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit |
590 | * KVM_S390_STORE_STATUS_PREFIXED: -> prefix | 572 | * KVM_S390_STORE_STATUS_PREFIXED: -> prefix |
591 | */ | 573 | */ |
592 | int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) | 574 | int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) |
593 | { | 575 | { |
594 | const unsigned char archmode = 1; | 576 | const unsigned char archmode = 1; |
595 | int prefix; | 577 | int prefix; |
@@ -651,45 +633,42 @@ int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) | |||
651 | return 0; | 633 | return 0; |
652 | } | 634 | } |
653 | 635 | ||
654 | static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) | ||
655 | { | ||
656 | int rc; | ||
657 | |||
658 | vcpu_load(vcpu); | ||
659 | rc = __kvm_s390_vcpu_store_status(vcpu, addr); | ||
660 | vcpu_put(vcpu); | ||
661 | return rc; | ||
662 | } | ||
663 | |||
664 | long kvm_arch_vcpu_ioctl(struct file *filp, | 636 | long kvm_arch_vcpu_ioctl(struct file *filp, |
665 | unsigned int ioctl, unsigned long arg) | 637 | unsigned int ioctl, unsigned long arg) |
666 | { | 638 | { |
667 | struct kvm_vcpu *vcpu = filp->private_data; | 639 | struct kvm_vcpu *vcpu = filp->private_data; |
668 | void __user *argp = (void __user *)arg; | 640 | void __user *argp = (void __user *)arg; |
641 | long r; | ||
669 | 642 | ||
670 | switch (ioctl) { | 643 | switch (ioctl) { |
671 | case KVM_S390_INTERRUPT: { | 644 | case KVM_S390_INTERRUPT: { |
672 | struct kvm_s390_interrupt s390int; | 645 | struct kvm_s390_interrupt s390int; |
673 | 646 | ||
647 | r = -EFAULT; | ||
674 | if (copy_from_user(&s390int, argp, sizeof(s390int))) | 648 | if (copy_from_user(&s390int, argp, sizeof(s390int))) |
675 | return -EFAULT; | 649 | break; |
676 | return kvm_s390_inject_vcpu(vcpu, &s390int); | 650 | r = kvm_s390_inject_vcpu(vcpu, &s390int); |
651 | break; | ||
677 | } | 652 | } |
678 | case KVM_S390_STORE_STATUS: | 653 | case KVM_S390_STORE_STATUS: |
679 | return kvm_s390_vcpu_store_status(vcpu, arg); | 654 | r = kvm_s390_vcpu_store_status(vcpu, arg); |
655 | break; | ||
680 | case KVM_S390_SET_INITIAL_PSW: { | 656 | case KVM_S390_SET_INITIAL_PSW: { |
681 | psw_t psw; | 657 | psw_t psw; |
682 | 658 | ||
659 | r = -EFAULT; | ||
683 | if (copy_from_user(&psw, argp, sizeof(psw))) | 660 | if (copy_from_user(&psw, argp, sizeof(psw))) |
684 | return -EFAULT; | 661 | break; |
685 | return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); | 662 | r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); |
663 | break; | ||
686 | } | 664 | } |
687 | case KVM_S390_INITIAL_RESET: | 665 | case KVM_S390_INITIAL_RESET: |
688 | return kvm_arch_vcpu_ioctl_initial_reset(vcpu); | 666 | r = kvm_arch_vcpu_ioctl_initial_reset(vcpu); |
667 | break; | ||
689 | default: | 668 | default: |
690 | ; | 669 | r = -EINVAL; |
691 | } | 670 | } |
692 | return -EINVAL; | 671 | return r; |
693 | } | 672 | } |
694 | 673 | ||
695 | /* Section: memory related */ | 674 | /* Section: memory related */ |
@@ -744,11 +723,6 @@ void kvm_arch_flush_shadow(struct kvm *kvm) | |||
744 | { | 723 | { |
745 | } | 724 | } |
746 | 725 | ||
747 | gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) | ||
748 | { | ||
749 | return gfn; | ||
750 | } | ||
751 | |||
752 | static int __init kvm_s390_init(void) | 726 | static int __init kvm_s390_init(void) |
753 | { | 727 | { |
754 | int ret; | 728 | int ret; |
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index cfa9d1777457..a7b7586626db 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h | |||
@@ -92,7 +92,7 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu); | |||
92 | int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); | 92 | int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); |
93 | 93 | ||
94 | /* implemented in kvm-s390.c */ | 94 | /* implemented in kvm-s390.c */ |
95 | int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, | 95 | int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, |
96 | unsigned long addr); | 96 | unsigned long addr); |
97 | /* implemented in diag.c */ | 97 | /* implemented in diag.c */ |
98 | int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); | 98 | int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); |
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index c991b3a7b904..815c5b2b9f57 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h | |||
@@ -482,6 +482,8 @@ static inline void fpu_copy(struct fpu *dst, struct fpu *src) | |||
482 | memcpy(dst->state, src->state, xstate_size); | 482 | memcpy(dst->state, src->state, xstate_size); |
483 | } | 483 | } |
484 | 484 | ||
485 | extern void fpu_finit(struct fpu *fpu); | ||
486 | |||
485 | #endif /* __ASSEMBLY__ */ | 487 | #endif /* __ASSEMBLY__ */ |
486 | 488 | ||
487 | #define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 | 489 | #define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 |
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index ff90055c7f0b..4d8dcbdfc120 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h | |||
@@ -22,6 +22,8 @@ | |||
22 | #define __KVM_HAVE_XEN_HVM | 22 | #define __KVM_HAVE_XEN_HVM |
23 | #define __KVM_HAVE_VCPU_EVENTS | 23 | #define __KVM_HAVE_VCPU_EVENTS |
24 | #define __KVM_HAVE_DEBUGREGS | 24 | #define __KVM_HAVE_DEBUGREGS |
25 | #define __KVM_HAVE_XSAVE | ||
26 | #define __KVM_HAVE_XCRS | ||
25 | 27 | ||
26 | /* Architectural interrupt line count. */ | 28 | /* Architectural interrupt line count. */ |
27 | #define KVM_NR_INTERRUPTS 256 | 29 | #define KVM_NR_INTERRUPTS 256 |
@@ -299,4 +301,24 @@ struct kvm_debugregs { | |||
299 | __u64 reserved[9]; | 301 | __u64 reserved[9]; |
300 | }; | 302 | }; |
301 | 303 | ||
304 | /* for KVM_CAP_XSAVE */ | ||
305 | struct kvm_xsave { | ||
306 | __u32 region[1024]; | ||
307 | }; | ||
308 | |||
309 | #define KVM_MAX_XCRS 16 | ||
310 | |||
311 | struct kvm_xcr { | ||
312 | __u32 xcr; | ||
313 | __u32 reserved; | ||
314 | __u64 value; | ||
315 | }; | ||
316 | |||
317 | struct kvm_xcrs { | ||
318 | __u32 nr_xcrs; | ||
319 | __u32 flags; | ||
320 | struct kvm_xcr xcrs[KVM_MAX_XCRS]; | ||
321 | __u64 padding[16]; | ||
322 | }; | ||
323 | |||
302 | #endif /* _ASM_X86_KVM_H */ | 324 | #endif /* _ASM_X86_KVM_H */ |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 0b2729bf2070..51cfd730ac5d 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -51,8 +51,10 @@ struct x86_emulate_ctxt; | |||
51 | #define X86EMUL_UNHANDLEABLE 1 | 51 | #define X86EMUL_UNHANDLEABLE 1 |
52 | /* Terminate emulation but return success to the caller. */ | 52 | /* Terminate emulation but return success to the caller. */ |
53 | #define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */ | 53 | #define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */ |
54 | #define X86EMUL_RETRY_INSTR 2 /* retry the instruction for some reason */ | 54 | #define X86EMUL_RETRY_INSTR 3 /* retry the instruction for some reason */ |
55 | #define X86EMUL_CMPXCHG_FAILED 2 /* cmpxchg did not see expected value */ | 55 | #define X86EMUL_CMPXCHG_FAILED 4 /* cmpxchg did not see expected value */ |
56 | #define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */ | ||
57 | |||
56 | struct x86_emulate_ops { | 58 | struct x86_emulate_ops { |
57 | /* | 59 | /* |
58 | * read_std: Read bytes of standard (non-emulated/special) memory. | 60 | * read_std: Read bytes of standard (non-emulated/special) memory. |
@@ -92,6 +94,7 @@ struct x86_emulate_ops { | |||
92 | int (*read_emulated)(unsigned long addr, | 94 | int (*read_emulated)(unsigned long addr, |
93 | void *val, | 95 | void *val, |
94 | unsigned int bytes, | 96 | unsigned int bytes, |
97 | unsigned int *error, | ||
95 | struct kvm_vcpu *vcpu); | 98 | struct kvm_vcpu *vcpu); |
96 | 99 | ||
97 | /* | 100 | /* |
@@ -104,6 +107,7 @@ struct x86_emulate_ops { | |||
104 | int (*write_emulated)(unsigned long addr, | 107 | int (*write_emulated)(unsigned long addr, |
105 | const void *val, | 108 | const void *val, |
106 | unsigned int bytes, | 109 | unsigned int bytes, |
110 | unsigned int *error, | ||
107 | struct kvm_vcpu *vcpu); | 111 | struct kvm_vcpu *vcpu); |
108 | 112 | ||
109 | /* | 113 | /* |
@@ -118,6 +122,7 @@ struct x86_emulate_ops { | |||
118 | const void *old, | 122 | const void *old, |
119 | const void *new, | 123 | const void *new, |
120 | unsigned int bytes, | 124 | unsigned int bytes, |
125 | unsigned int *error, | ||
121 | struct kvm_vcpu *vcpu); | 126 | struct kvm_vcpu *vcpu); |
122 | 127 | ||
123 | int (*pio_in_emulated)(int size, unsigned short port, void *val, | 128 | int (*pio_in_emulated)(int size, unsigned short port, void *val, |
@@ -132,18 +137,26 @@ struct x86_emulate_ops { | |||
132 | int seg, struct kvm_vcpu *vcpu); | 137 | int seg, struct kvm_vcpu *vcpu); |
133 | u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); | 138 | u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); |
134 | void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); | 139 | void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); |
140 | unsigned long (*get_cached_segment_base)(int seg, struct kvm_vcpu *vcpu); | ||
135 | void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); | 141 | void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); |
136 | ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu); | 142 | ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu); |
137 | void (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); | 143 | int (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); |
138 | int (*cpl)(struct kvm_vcpu *vcpu); | 144 | int (*cpl)(struct kvm_vcpu *vcpu); |
139 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); | 145 | int (*get_dr)(int dr, unsigned long *dest, struct kvm_vcpu *vcpu); |
146 | int (*set_dr)(int dr, unsigned long value, struct kvm_vcpu *vcpu); | ||
147 | int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); | ||
148 | int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); | ||
140 | }; | 149 | }; |
141 | 150 | ||
142 | /* Type, address-of, and value of an instruction's operand. */ | 151 | /* Type, address-of, and value of an instruction's operand. */ |
143 | struct operand { | 152 | struct operand { |
144 | enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type; | 153 | enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type; |
145 | unsigned int bytes; | 154 | unsigned int bytes; |
146 | unsigned long val, orig_val, *ptr; | 155 | unsigned long orig_val, *ptr; |
156 | union { | ||
157 | unsigned long val; | ||
158 | char valptr[sizeof(unsigned long) + 2]; | ||
159 | }; | ||
147 | }; | 160 | }; |
148 | 161 | ||
149 | struct fetch_cache { | 162 | struct fetch_cache { |
@@ -186,6 +199,7 @@ struct decode_cache { | |||
186 | unsigned long modrm_val; | 199 | unsigned long modrm_val; |
187 | struct fetch_cache fetch; | 200 | struct fetch_cache fetch; |
188 | struct read_cache io_read; | 201 | struct read_cache io_read; |
202 | struct read_cache mem_read; | ||
189 | }; | 203 | }; |
190 | 204 | ||
191 | struct x86_emulate_ctxt { | 205 | struct x86_emulate_ctxt { |
@@ -202,6 +216,12 @@ struct x86_emulate_ctxt { | |||
202 | int interruptibility; | 216 | int interruptibility; |
203 | 217 | ||
204 | bool restart; /* restart string instruction after writeback */ | 218 | bool restart; /* restart string instruction after writeback */ |
219 | |||
220 | int exception; /* exception that happens during emulation or -1 */ | ||
221 | u32 error_code; /* error code for exception */ | ||
222 | bool error_code_valid; | ||
223 | unsigned long cr2; /* faulted address in case of #PF */ | ||
224 | |||
205 | /* decode cache */ | 225 | /* decode cache */ |
206 | struct decode_cache decode; | 226 | struct decode_cache decode; |
207 | }; | 227 | }; |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 76f5483cffec..502e53f999cf 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/mm.h> | 15 | #include <linux/mm.h> |
16 | #include <linux/mmu_notifier.h> | 16 | #include <linux/mmu_notifier.h> |
17 | #include <linux/tracepoint.h> | 17 | #include <linux/tracepoint.h> |
18 | #include <linux/cpumask.h> | ||
18 | 19 | ||
19 | #include <linux/kvm.h> | 20 | #include <linux/kvm.h> |
20 | #include <linux/kvm_para.h> | 21 | #include <linux/kvm_para.h> |
@@ -39,11 +40,14 @@ | |||
39 | 0xFFFFFF0000000000ULL) | 40 | 0xFFFFFF0000000000ULL) |
40 | 41 | ||
41 | #define INVALID_PAGE (~(hpa_t)0) | 42 | #define INVALID_PAGE (~(hpa_t)0) |
43 | #define VALID_PAGE(x) ((x) != INVALID_PAGE) | ||
44 | |||
42 | #define UNMAPPED_GVA (~(gpa_t)0) | 45 | #define UNMAPPED_GVA (~(gpa_t)0) |
43 | 46 | ||
44 | /* KVM Hugepage definitions for x86 */ | 47 | /* KVM Hugepage definitions for x86 */ |
45 | #define KVM_NR_PAGE_SIZES 3 | 48 | #define KVM_NR_PAGE_SIZES 3 |
46 | #define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + (((x) - 1) * 9)) | 49 | #define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 9) |
50 | #define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x)) | ||
47 | #define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) | 51 | #define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) |
48 | #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) | 52 | #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) |
49 | #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) | 53 | #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) |
@@ -69,8 +73,6 @@ | |||
69 | 73 | ||
70 | #define IOPL_SHIFT 12 | 74 | #define IOPL_SHIFT 12 |
71 | 75 | ||
72 | #define KVM_ALIAS_SLOTS 4 | ||
73 | |||
74 | #define KVM_PERMILLE_MMU_PAGES 20 | 76 | #define KVM_PERMILLE_MMU_PAGES 20 |
75 | #define KVM_MIN_ALLOC_MMU_PAGES 64 | 77 | #define KVM_MIN_ALLOC_MMU_PAGES 64 |
76 | #define KVM_MMU_HASH_SHIFT 10 | 78 | #define KVM_MMU_HASH_SHIFT 10 |
@@ -241,7 +243,7 @@ struct kvm_mmu { | |||
241 | void (*prefetch_page)(struct kvm_vcpu *vcpu, | 243 | void (*prefetch_page)(struct kvm_vcpu *vcpu, |
242 | struct kvm_mmu_page *page); | 244 | struct kvm_mmu_page *page); |
243 | int (*sync_page)(struct kvm_vcpu *vcpu, | 245 | int (*sync_page)(struct kvm_vcpu *vcpu, |
244 | struct kvm_mmu_page *sp); | 246 | struct kvm_mmu_page *sp, bool clear_unsync); |
245 | void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); | 247 | void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); |
246 | hpa_t root_hpa; | 248 | hpa_t root_hpa; |
247 | int root_level; | 249 | int root_level; |
@@ -301,8 +303,8 @@ struct kvm_vcpu_arch { | |||
301 | unsigned long mmu_seq; | 303 | unsigned long mmu_seq; |
302 | } update_pte; | 304 | } update_pte; |
303 | 305 | ||
304 | struct i387_fxsave_struct host_fx_image; | 306 | struct fpu guest_fpu; |
305 | struct i387_fxsave_struct guest_fx_image; | 307 | u64 xcr0; |
306 | 308 | ||
307 | gva_t mmio_fault_cr2; | 309 | gva_t mmio_fault_cr2; |
308 | struct kvm_pio_request pio; | 310 | struct kvm_pio_request pio; |
@@ -360,26 +362,11 @@ struct kvm_vcpu_arch { | |||
360 | 362 | ||
361 | /* fields used by HYPER-V emulation */ | 363 | /* fields used by HYPER-V emulation */ |
362 | u64 hv_vapic; | 364 | u64 hv_vapic; |
363 | }; | ||
364 | |||
365 | struct kvm_mem_alias { | ||
366 | gfn_t base_gfn; | ||
367 | unsigned long npages; | ||
368 | gfn_t target_gfn; | ||
369 | #define KVM_ALIAS_INVALID 1UL | ||
370 | unsigned long flags; | ||
371 | }; | ||
372 | 365 | ||
373 | #define KVM_ARCH_HAS_UNALIAS_INSTANTIATION | 366 | cpumask_var_t wbinvd_dirty_mask; |
374 | |||
375 | struct kvm_mem_aliases { | ||
376 | struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; | ||
377 | int naliases; | ||
378 | }; | 367 | }; |
379 | 368 | ||
380 | struct kvm_arch { | 369 | struct kvm_arch { |
381 | struct kvm_mem_aliases *aliases; | ||
382 | |||
383 | unsigned int n_free_mmu_pages; | 370 | unsigned int n_free_mmu_pages; |
384 | unsigned int n_requested_mmu_pages; | 371 | unsigned int n_requested_mmu_pages; |
385 | unsigned int n_alloc_mmu_pages; | 372 | unsigned int n_alloc_mmu_pages; |
@@ -533,6 +520,8 @@ struct kvm_x86_ops { | |||
533 | 520 | ||
534 | void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry); | 521 | void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry); |
535 | 522 | ||
523 | bool (*has_wbinvd_exit)(void); | ||
524 | |||
536 | const struct trace_print_flags *exit_reasons_str; | 525 | const struct trace_print_flags *exit_reasons_str; |
537 | }; | 526 | }; |
538 | 527 | ||
@@ -576,7 +565,6 @@ enum emulation_result { | |||
576 | #define EMULTYPE_SKIP (1 << 2) | 565 | #define EMULTYPE_SKIP (1 << 2) |
577 | int emulate_instruction(struct kvm_vcpu *vcpu, | 566 | int emulate_instruction(struct kvm_vcpu *vcpu, |
578 | unsigned long cr2, u16 error_code, int emulation_type); | 567 | unsigned long cr2, u16 error_code, int emulation_type); |
579 | void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); | ||
580 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); | 568 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); |
581 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); | 569 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); |
582 | 570 | ||
@@ -591,10 +579,7 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); | |||
591 | int kvm_emulate_halt(struct kvm_vcpu *vcpu); | 579 | int kvm_emulate_halt(struct kvm_vcpu *vcpu); |
592 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); | 580 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); |
593 | int emulate_clts(struct kvm_vcpu *vcpu); | 581 | int emulate_clts(struct kvm_vcpu *vcpu); |
594 | int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, | 582 | int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); |
595 | unsigned long *dest); | ||
596 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, | ||
597 | unsigned long value); | ||
598 | 583 | ||
599 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); | 584 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); |
600 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); | 585 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); |
@@ -602,15 +587,16 @@ int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); | |||
602 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, | 587 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, |
603 | bool has_error_code, u32 error_code); | 588 | bool has_error_code, u32 error_code); |
604 | 589 | ||
605 | void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); | 590 | int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); |
606 | void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); | 591 | int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); |
607 | void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); | 592 | int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); |
608 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); | 593 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); |
609 | int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); | 594 | int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); |
610 | int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val); | 595 | int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val); |
611 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); | 596 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); |
612 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); | 597 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); |
613 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); | 598 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); |
599 | int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr); | ||
614 | 600 | ||
615 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); | 601 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); |
616 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); | 602 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); |
@@ -630,12 +616,7 @@ int kvm_pic_set_irq(void *opaque, int irq, int level); | |||
630 | 616 | ||
631 | void kvm_inject_nmi(struct kvm_vcpu *vcpu); | 617 | void kvm_inject_nmi(struct kvm_vcpu *vcpu); |
632 | 618 | ||
633 | void fx_init(struct kvm_vcpu *vcpu); | 619 | int fx_init(struct kvm_vcpu *vcpu); |
634 | |||
635 | int emulator_write_emulated(unsigned long addr, | ||
636 | const void *val, | ||
637 | unsigned int bytes, | ||
638 | struct kvm_vcpu *vcpu); | ||
639 | 620 | ||
640 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); | 621 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); |
641 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 622 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
@@ -664,8 +645,6 @@ void kvm_disable_tdp(void); | |||
664 | int complete_pio(struct kvm_vcpu *vcpu); | 645 | int complete_pio(struct kvm_vcpu *vcpu); |
665 | bool kvm_check_iopl(struct kvm_vcpu *vcpu); | 646 | bool kvm_check_iopl(struct kvm_vcpu *vcpu); |
666 | 647 | ||
667 | struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn); | ||
668 | |||
669 | static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) | 648 | static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) |
670 | { | 649 | { |
671 | struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); | 650 | struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); |
@@ -719,21 +698,6 @@ static inline unsigned long read_msr(unsigned long msr) | |||
719 | } | 698 | } |
720 | #endif | 699 | #endif |
721 | 700 | ||
722 | static inline void kvm_fx_save(struct i387_fxsave_struct *image) | ||
723 | { | ||
724 | asm("fxsave (%0)":: "r" (image)); | ||
725 | } | ||
726 | |||
727 | static inline void kvm_fx_restore(struct i387_fxsave_struct *image) | ||
728 | { | ||
729 | asm("fxrstor (%0)":: "r" (image)); | ||
730 | } | ||
731 | |||
732 | static inline void kvm_fx_finit(void) | ||
733 | { | ||
734 | asm("finit"); | ||
735 | } | ||
736 | |||
737 | static inline u32 get_rdx_init_val(void) | 701 | static inline u32 get_rdx_init_val(void) |
738 | { | 702 | { |
739 | return 0x600; /* P6 family */ | 703 | return 0x600; /* P6 family */ |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 8c7ae4318629..509a42187dc2 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -20,6 +20,7 @@ | |||
20 | #define _EFER_LMA 10 /* Long mode active (read-only) */ | 20 | #define _EFER_LMA 10 /* Long mode active (read-only) */ |
21 | #define _EFER_NX 11 /* No execute enable */ | 21 | #define _EFER_NX 11 /* No execute enable */ |
22 | #define _EFER_SVME 12 /* Enable virtualization */ | 22 | #define _EFER_SVME 12 /* Enable virtualization */ |
23 | #define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */ | ||
23 | #define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ | 24 | #define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ |
24 | 25 | ||
25 | #define EFER_SCE (1<<_EFER_SCE) | 26 | #define EFER_SCE (1<<_EFER_SCE) |
@@ -27,6 +28,7 @@ | |||
27 | #define EFER_LMA (1<<_EFER_LMA) | 28 | #define EFER_LMA (1<<_EFER_LMA) |
28 | #define EFER_NX (1<<_EFER_NX) | 29 | #define EFER_NX (1<<_EFER_NX) |
29 | #define EFER_SVME (1<<_EFER_SVME) | 30 | #define EFER_SVME (1<<_EFER_SVME) |
31 | #define EFER_LMSLE (1<<_EFER_LMSLE) | ||
30 | #define EFER_FFXSR (1<<_EFER_FFXSR) | 32 | #define EFER_FFXSR (1<<_EFER_FFXSR) |
31 | 33 | ||
32 | /* Intel MSRs. Some also available on other CPUs */ | 34 | /* Intel MSRs. Some also available on other CPUs */ |
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 9e6779f7cf2d..9f0cbd987d50 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -257,6 +257,7 @@ enum vmcs_field { | |||
257 | #define EXIT_REASON_IO_INSTRUCTION 30 | 257 | #define EXIT_REASON_IO_INSTRUCTION 30 |
258 | #define EXIT_REASON_MSR_READ 31 | 258 | #define EXIT_REASON_MSR_READ 31 |
259 | #define EXIT_REASON_MSR_WRITE 32 | 259 | #define EXIT_REASON_MSR_WRITE 32 |
260 | #define EXIT_REASON_INVALID_STATE 33 | ||
260 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 | 261 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 |
261 | #define EXIT_REASON_MONITOR_INSTRUCTION 39 | 262 | #define EXIT_REASON_MONITOR_INSTRUCTION 39 |
262 | #define EXIT_REASON_PAUSE_INSTRUCTION 40 | 263 | #define EXIT_REASON_PAUSE_INSTRUCTION 40 |
@@ -266,6 +267,7 @@ enum vmcs_field { | |||
266 | #define EXIT_REASON_EPT_VIOLATION 48 | 267 | #define EXIT_REASON_EPT_VIOLATION 48 |
267 | #define EXIT_REASON_EPT_MISCONFIG 49 | 268 | #define EXIT_REASON_EPT_MISCONFIG 49 |
268 | #define EXIT_REASON_WBINVD 54 | 269 | #define EXIT_REASON_WBINVD 54 |
270 | #define EXIT_REASON_XSETBV 55 | ||
269 | 271 | ||
270 | /* | 272 | /* |
271 | * Interruption-information format | 273 | * Interruption-information format |
@@ -375,6 +377,9 @@ enum vmcs_field { | |||
375 | #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) | 377 | #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) |
376 | #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) | 378 | #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) |
377 | 379 | ||
380 | #define VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT (1ull << 9) /* (41 - 32) */ | ||
381 | #define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT (1ull << 10) /* (42 - 32) */ | ||
382 | |||
378 | #define VMX_EPT_DEFAULT_GAW 3 | 383 | #define VMX_EPT_DEFAULT_GAW 3 |
379 | #define VMX_EPT_MAX_GAW 0x4 | 384 | #define VMX_EPT_MAX_GAW 0x4 |
380 | #define VMX_EPT_MT_EPTE_SHIFT 3 | 385 | #define VMX_EPT_MT_EPTE_SHIFT 3 |
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 2c4390cae228..32c36668fa7b 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h | |||
@@ -13,6 +13,12 @@ | |||
13 | 13 | ||
14 | #define FXSAVE_SIZE 512 | 14 | #define FXSAVE_SIZE 512 |
15 | 15 | ||
16 | #define XSAVE_HDR_SIZE 64 | ||
17 | #define XSAVE_HDR_OFFSET FXSAVE_SIZE | ||
18 | |||
19 | #define XSAVE_YMM_SIZE 256 | ||
20 | #define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) | ||
21 | |||
16 | /* | 22 | /* |
17 | * These are the features that the OS can handle currently. | 23 | * These are the features that the OS can handle currently. |
18 | */ | 24 | */ |
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 86cef6b32253..c4444bce8469 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -107,7 +107,7 @@ void __cpuinit fpu_init(void) | |||
107 | } | 107 | } |
108 | #endif /* CONFIG_X86_64 */ | 108 | #endif /* CONFIG_X86_64 */ |
109 | 109 | ||
110 | static void fpu_finit(struct fpu *fpu) | 110 | void fpu_finit(struct fpu *fpu) |
111 | { | 111 | { |
112 | #ifdef CONFIG_X86_32 | 112 | #ifdef CONFIG_X86_32 |
113 | if (!HAVE_HWFP) { | 113 | if (!HAVE_HWFP) { |
@@ -132,6 +132,7 @@ static void fpu_finit(struct fpu *fpu) | |||
132 | fp->fos = 0xffff0000u; | 132 | fp->fos = 0xffff0000u; |
133 | } | 133 | } |
134 | } | 134 | } |
135 | EXPORT_SYMBOL_GPL(fpu_finit); | ||
135 | 136 | ||
136 | /* | 137 | /* |
137 | * The _current_ task is using the FPU for the first time | 138 | * The _current_ task is using the FPU for the first time |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e7e35219b32f..ebcfcceccc72 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -28,6 +28,7 @@ unsigned long idle_nomwait; | |||
28 | EXPORT_SYMBOL(idle_nomwait); | 28 | EXPORT_SYMBOL(idle_nomwait); |
29 | 29 | ||
30 | struct kmem_cache *task_xstate_cachep; | 30 | struct kmem_cache *task_xstate_cachep; |
31 | EXPORT_SYMBOL_GPL(task_xstate_cachep); | ||
31 | 32 | ||
32 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) | 33 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) |
33 | { | 34 | { |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 5ac0bb465ed6..b38bd8b92aa6 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -9,6 +9,7 @@ | |||
9 | * privileged instructions: | 9 | * privileged instructions: |
10 | * | 10 | * |
11 | * Copyright (C) 2006 Qumranet | 11 | * Copyright (C) 2006 Qumranet |
12 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | ||
12 | * | 13 | * |
13 | * Avi Kivity <avi@qumranet.com> | 14 | * Avi Kivity <avi@qumranet.com> |
14 | * Yaniv Kamay <yaniv@qumranet.com> | 15 | * Yaniv Kamay <yaniv@qumranet.com> |
@@ -67,6 +68,9 @@ | |||
67 | #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */ | 68 | #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */ |
68 | #define SrcImmU (9<<4) /* Immediate operand, unsigned */ | 69 | #define SrcImmU (9<<4) /* Immediate operand, unsigned */ |
69 | #define SrcSI (0xa<<4) /* Source is in the DS:RSI */ | 70 | #define SrcSI (0xa<<4) /* Source is in the DS:RSI */ |
71 | #define SrcImmFAddr (0xb<<4) /* Source is immediate far address */ | ||
72 | #define SrcMemFAddr (0xc<<4) /* Source is far address in memory */ | ||
73 | #define SrcAcc (0xd<<4) /* Source Accumulator */ | ||
70 | #define SrcMask (0xf<<4) | 74 | #define SrcMask (0xf<<4) |
71 | /* Generic ModRM decode. */ | 75 | /* Generic ModRM decode. */ |
72 | #define ModRM (1<<8) | 76 | #define ModRM (1<<8) |
@@ -88,10 +92,6 @@ | |||
88 | #define Src2CL (1<<29) | 92 | #define Src2CL (1<<29) |
89 | #define Src2ImmByte (2<<29) | 93 | #define Src2ImmByte (2<<29) |
90 | #define Src2One (3<<29) | 94 | #define Src2One (3<<29) |
91 | #define Src2Imm16 (4<<29) | ||
92 | #define Src2Mem16 (5<<29) /* Used for Ep encoding. First argument has to be | ||
93 | in memory and second argument is located | ||
94 | immediately after the first one in memory. */ | ||
95 | #define Src2Mask (7<<29) | 95 | #define Src2Mask (7<<29) |
96 | 96 | ||
97 | enum { | 97 | enum { |
@@ -124,15 +124,15 @@ static u32 opcode_table[256] = { | |||
124 | /* 0x20 - 0x27 */ | 124 | /* 0x20 - 0x27 */ |
125 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, | 125 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
126 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 126 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
127 | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0, | 127 | ByteOp | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0, |
128 | /* 0x28 - 0x2F */ | 128 | /* 0x28 - 0x2F */ |
129 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, | 129 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
130 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 130 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
131 | 0, 0, 0, 0, | 131 | ByteOp | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0, |
132 | /* 0x30 - 0x37 */ | 132 | /* 0x30 - 0x37 */ |
133 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, | 133 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
134 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 134 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
135 | 0, 0, 0, 0, | 135 | ByteOp | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0, |
136 | /* 0x38 - 0x3F */ | 136 | /* 0x38 - 0x3F */ |
137 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 137 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
138 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 138 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
@@ -170,20 +170,20 @@ static u32 opcode_table[256] = { | |||
170 | /* 0x88 - 0x8F */ | 170 | /* 0x88 - 0x8F */ |
171 | ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, | 171 | ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, |
172 | ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, | 172 | ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, |
173 | DstMem | SrcReg | ModRM | Mov, ModRM | DstReg, | 173 | DstMem | SrcNone | ModRM | Mov, ModRM | DstReg, |
174 | DstReg | SrcMem | ModRM | Mov, Group | Group1A, | 174 | ImplicitOps | SrcMem16 | ModRM, Group | Group1A, |
175 | /* 0x90 - 0x97 */ | 175 | /* 0x90 - 0x97 */ |
176 | DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, | 176 | DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, |
177 | /* 0x98 - 0x9F */ | 177 | /* 0x98 - 0x9F */ |
178 | 0, 0, SrcImm | Src2Imm16 | No64, 0, | 178 | 0, 0, SrcImmFAddr | No64, 0, |
179 | ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, | 179 | ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, |
180 | /* 0xA0 - 0xA7 */ | 180 | /* 0xA0 - 0xA7 */ |
181 | ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, | 181 | ByteOp | DstAcc | SrcMem | Mov | MemAbs, DstAcc | SrcMem | Mov | MemAbs, |
182 | ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs, | 182 | ByteOp | DstMem | SrcAcc | Mov | MemAbs, DstMem | SrcAcc | Mov | MemAbs, |
183 | ByteOp | SrcSI | DstDI | Mov | String, SrcSI | DstDI | Mov | String, | 183 | ByteOp | SrcSI | DstDI | Mov | String, SrcSI | DstDI | Mov | String, |
184 | ByteOp | SrcSI | DstDI | String, SrcSI | DstDI | String, | 184 | ByteOp | SrcSI | DstDI | String, SrcSI | DstDI | String, |
185 | /* 0xA8 - 0xAF */ | 185 | /* 0xA8 - 0xAF */ |
186 | 0, 0, ByteOp | DstDI | Mov | String, DstDI | Mov | String, | 186 | DstAcc | SrcImmByte | ByteOp, DstAcc | SrcImm, ByteOp | DstDI | Mov | String, DstDI | Mov | String, |
187 | ByteOp | SrcSI | DstAcc | Mov | String, SrcSI | DstAcc | Mov | String, | 187 | ByteOp | SrcSI | DstAcc | Mov | String, SrcSI | DstAcc | Mov | String, |
188 | ByteOp | DstDI | String, DstDI | String, | 188 | ByteOp | DstDI | String, DstDI | String, |
189 | /* 0xB0 - 0xB7 */ | 189 | /* 0xB0 - 0xB7 */ |
@@ -215,7 +215,7 @@ static u32 opcode_table[256] = { | |||
215 | ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc, | 215 | ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc, |
216 | /* 0xE8 - 0xEF */ | 216 | /* 0xE8 - 0xEF */ |
217 | SrcImm | Stack, SrcImm | ImplicitOps, | 217 | SrcImm | Stack, SrcImm | ImplicitOps, |
218 | SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps, | 218 | SrcImmFAddr | No64, SrcImmByte | ImplicitOps, |
219 | SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, | 219 | SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, |
220 | SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, | 220 | SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, |
221 | /* 0xF0 - 0xF7 */ | 221 | /* 0xF0 - 0xF7 */ |
@@ -337,20 +337,20 @@ static u32 group_table[] = { | |||
337 | [Group1A*8] = | 337 | [Group1A*8] = |
338 | DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0, | 338 | DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0, |
339 | [Group3_Byte*8] = | 339 | [Group3_Byte*8] = |
340 | ByteOp | SrcImm | DstMem | ModRM, 0, | 340 | ByteOp | SrcImm | DstMem | ModRM, ByteOp | SrcImm | DstMem | ModRM, |
341 | ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM, | 341 | ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM, |
342 | 0, 0, 0, 0, | 342 | 0, 0, 0, 0, |
343 | [Group3*8] = | 343 | [Group3*8] = |
344 | DstMem | SrcImm | ModRM, 0, | 344 | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, |
345 | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, | 345 | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, |
346 | 0, 0, 0, 0, | 346 | 0, 0, 0, 0, |
347 | [Group4*8] = | 347 | [Group4*8] = |
348 | ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM, | 348 | ByteOp | DstMem | SrcNone | ModRM | Lock, ByteOp | DstMem | SrcNone | ModRM | Lock, |
349 | 0, 0, 0, 0, 0, 0, | 349 | 0, 0, 0, 0, 0, 0, |
350 | [Group5*8] = | 350 | [Group5*8] = |
351 | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, | 351 | DstMem | SrcNone | ModRM | Lock, DstMem | SrcNone | ModRM | Lock, |
352 | SrcMem | ModRM | Stack, 0, | 352 | SrcMem | ModRM | Stack, 0, |
353 | SrcMem | ModRM | Stack, SrcMem | ModRM | Src2Mem16 | ImplicitOps, | 353 | SrcMem | ModRM | Stack, SrcMemFAddr | ModRM | ImplicitOps, |
354 | SrcMem | ModRM | Stack, 0, | 354 | SrcMem | ModRM | Stack, 0, |
355 | [Group7*8] = | 355 | [Group7*8] = |
356 | 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv, | 356 | 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv, |
@@ -576,6 +576,13 @@ static u32 group2_table[] = { | |||
576 | (_type)_x; \ | 576 | (_type)_x; \ |
577 | }) | 577 | }) |
578 | 578 | ||
579 | #define insn_fetch_arr(_arr, _size, _eip) \ | ||
580 | ({ rc = do_insn_fetch(ctxt, ops, (_eip), _arr, (_size)); \ | ||
581 | if (rc != X86EMUL_CONTINUE) \ | ||
582 | goto done; \ | ||
583 | (_eip) += (_size); \ | ||
584 | }) | ||
585 | |||
579 | static inline unsigned long ad_mask(struct decode_cache *c) | 586 | static inline unsigned long ad_mask(struct decode_cache *c) |
580 | { | 587 | { |
581 | return (1UL << (c->ad_bytes << 3)) - 1; | 588 | return (1UL << (c->ad_bytes << 3)) - 1; |
@@ -617,31 +624,66 @@ static void set_seg_override(struct decode_cache *c, int seg) | |||
617 | c->seg_override = seg; | 624 | c->seg_override = seg; |
618 | } | 625 | } |
619 | 626 | ||
620 | static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg) | 627 | static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, |
628 | struct x86_emulate_ops *ops, int seg) | ||
621 | { | 629 | { |
622 | if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS) | 630 | if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS) |
623 | return 0; | 631 | return 0; |
624 | 632 | ||
625 | return kvm_x86_ops->get_segment_base(ctxt->vcpu, seg); | 633 | return ops->get_cached_segment_base(seg, ctxt->vcpu); |
626 | } | 634 | } |
627 | 635 | ||
628 | static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt, | 636 | static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt, |
637 | struct x86_emulate_ops *ops, | ||
629 | struct decode_cache *c) | 638 | struct decode_cache *c) |
630 | { | 639 | { |
631 | if (!c->has_seg_override) | 640 | if (!c->has_seg_override) |
632 | return 0; | 641 | return 0; |
633 | 642 | ||
634 | return seg_base(ctxt, c->seg_override); | 643 | return seg_base(ctxt, ops, c->seg_override); |
644 | } | ||
645 | |||
646 | static unsigned long es_base(struct x86_emulate_ctxt *ctxt, | ||
647 | struct x86_emulate_ops *ops) | ||
648 | { | ||
649 | return seg_base(ctxt, ops, VCPU_SREG_ES); | ||
650 | } | ||
651 | |||
652 | static unsigned long ss_base(struct x86_emulate_ctxt *ctxt, | ||
653 | struct x86_emulate_ops *ops) | ||
654 | { | ||
655 | return seg_base(ctxt, ops, VCPU_SREG_SS); | ||
656 | } | ||
657 | |||
658 | static void emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, | ||
659 | u32 error, bool valid) | ||
660 | { | ||
661 | ctxt->exception = vec; | ||
662 | ctxt->error_code = error; | ||
663 | ctxt->error_code_valid = valid; | ||
664 | ctxt->restart = false; | ||
665 | } | ||
666 | |||
667 | static void emulate_gp(struct x86_emulate_ctxt *ctxt, int err) | ||
668 | { | ||
669 | emulate_exception(ctxt, GP_VECTOR, err, true); | ||
635 | } | 670 | } |
636 | 671 | ||
637 | static unsigned long es_base(struct x86_emulate_ctxt *ctxt) | 672 | static void emulate_pf(struct x86_emulate_ctxt *ctxt, unsigned long addr, |
673 | int err) | ||
638 | { | 674 | { |
639 | return seg_base(ctxt, VCPU_SREG_ES); | 675 | ctxt->cr2 = addr; |
676 | emulate_exception(ctxt, PF_VECTOR, err, true); | ||
640 | } | 677 | } |
641 | 678 | ||
642 | static unsigned long ss_base(struct x86_emulate_ctxt *ctxt) | 679 | static void emulate_ud(struct x86_emulate_ctxt *ctxt) |
643 | { | 680 | { |
644 | return seg_base(ctxt, VCPU_SREG_SS); | 681 | emulate_exception(ctxt, UD_VECTOR, 0, false); |
682 | } | ||
683 | |||
684 | static void emulate_ts(struct x86_emulate_ctxt *ctxt, int err) | ||
685 | { | ||
686 | emulate_exception(ctxt, TS_VECTOR, err, true); | ||
645 | } | 687 | } |
646 | 688 | ||
647 | static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, | 689 | static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, |
@@ -932,12 +974,9 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
932 | /* we cannot decode insn before we complete previous rep insn */ | 974 | /* we cannot decode insn before we complete previous rep insn */ |
933 | WARN_ON(ctxt->restart); | 975 | WARN_ON(ctxt->restart); |
934 | 976 | ||
935 | /* Shadow copy of register state. Committed on successful emulation. */ | ||
936 | memset(c, 0, sizeof(struct decode_cache)); | ||
937 | c->eip = ctxt->eip; | 977 | c->eip = ctxt->eip; |
938 | c->fetch.start = c->fetch.end = c->eip; | 978 | c->fetch.start = c->fetch.end = c->eip; |
939 | ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); | 979 | ctxt->cs_base = seg_base(ctxt, ops, VCPU_SREG_CS); |
940 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | ||
941 | 980 | ||
942 | switch (mode) { | 981 | switch (mode) { |
943 | case X86EMUL_MODE_REAL: | 982 | case X86EMUL_MODE_REAL: |
@@ -1060,7 +1099,7 @@ done_prefixes: | |||
1060 | set_seg_override(c, VCPU_SREG_DS); | 1099 | set_seg_override(c, VCPU_SREG_DS); |
1061 | 1100 | ||
1062 | if (!(!c->twobyte && c->b == 0x8d)) | 1101 | if (!(!c->twobyte && c->b == 0x8d)) |
1063 | c->modrm_ea += seg_override_base(ctxt, c); | 1102 | c->modrm_ea += seg_override_base(ctxt, ops, c); |
1064 | 1103 | ||
1065 | if (c->ad_bytes != 8) | 1104 | if (c->ad_bytes != 8) |
1066 | c->modrm_ea = (u32)c->modrm_ea; | 1105 | c->modrm_ea = (u32)c->modrm_ea; |
@@ -1148,6 +1187,25 @@ done_prefixes: | |||
1148 | else | 1187 | else |
1149 | c->src.val = insn_fetch(u8, 1, c->eip); | 1188 | c->src.val = insn_fetch(u8, 1, c->eip); |
1150 | break; | 1189 | break; |
1190 | case SrcAcc: | ||
1191 | c->src.type = OP_REG; | ||
1192 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
1193 | c->src.ptr = &c->regs[VCPU_REGS_RAX]; | ||
1194 | switch (c->src.bytes) { | ||
1195 | case 1: | ||
1196 | c->src.val = *(u8 *)c->src.ptr; | ||
1197 | break; | ||
1198 | case 2: | ||
1199 | c->src.val = *(u16 *)c->src.ptr; | ||
1200 | break; | ||
1201 | case 4: | ||
1202 | c->src.val = *(u32 *)c->src.ptr; | ||
1203 | break; | ||
1204 | case 8: | ||
1205 | c->src.val = *(u64 *)c->src.ptr; | ||
1206 | break; | ||
1207 | } | ||
1208 | break; | ||
1151 | case SrcOne: | 1209 | case SrcOne: |
1152 | c->src.bytes = 1; | 1210 | c->src.bytes = 1; |
1153 | c->src.val = 1; | 1211 | c->src.val = 1; |
@@ -1156,10 +1214,21 @@ done_prefixes: | |||
1156 | c->src.type = OP_MEM; | 1214 | c->src.type = OP_MEM; |
1157 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 1215 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
1158 | c->src.ptr = (unsigned long *) | 1216 | c->src.ptr = (unsigned long *) |
1159 | register_address(c, seg_override_base(ctxt, c), | 1217 | register_address(c, seg_override_base(ctxt, ops, c), |
1160 | c->regs[VCPU_REGS_RSI]); | 1218 | c->regs[VCPU_REGS_RSI]); |
1161 | c->src.val = 0; | 1219 | c->src.val = 0; |
1162 | break; | 1220 | break; |
1221 | case SrcImmFAddr: | ||
1222 | c->src.type = OP_IMM; | ||
1223 | c->src.ptr = (unsigned long *)c->eip; | ||
1224 | c->src.bytes = c->op_bytes + 2; | ||
1225 | insn_fetch_arr(c->src.valptr, c->src.bytes, c->eip); | ||
1226 | break; | ||
1227 | case SrcMemFAddr: | ||
1228 | c->src.type = OP_MEM; | ||
1229 | c->src.ptr = (unsigned long *)c->modrm_ea; | ||
1230 | c->src.bytes = c->op_bytes + 2; | ||
1231 | break; | ||
1163 | } | 1232 | } |
1164 | 1233 | ||
1165 | /* | 1234 | /* |
@@ -1179,22 +1248,10 @@ done_prefixes: | |||
1179 | c->src2.bytes = 1; | 1248 | c->src2.bytes = 1; |
1180 | c->src2.val = insn_fetch(u8, 1, c->eip); | 1249 | c->src2.val = insn_fetch(u8, 1, c->eip); |
1181 | break; | 1250 | break; |
1182 | case Src2Imm16: | ||
1183 | c->src2.type = OP_IMM; | ||
1184 | c->src2.ptr = (unsigned long *)c->eip; | ||
1185 | c->src2.bytes = 2; | ||
1186 | c->src2.val = insn_fetch(u16, 2, c->eip); | ||
1187 | break; | ||
1188 | case Src2One: | 1251 | case Src2One: |
1189 | c->src2.bytes = 1; | 1252 | c->src2.bytes = 1; |
1190 | c->src2.val = 1; | 1253 | c->src2.val = 1; |
1191 | break; | 1254 | break; |
1192 | case Src2Mem16: | ||
1193 | c->src2.type = OP_MEM; | ||
1194 | c->src2.bytes = 2; | ||
1195 | c->src2.ptr = (unsigned long *)(c->modrm_ea + c->src.bytes); | ||
1196 | c->src2.val = 0; | ||
1197 | break; | ||
1198 | } | 1255 | } |
1199 | 1256 | ||
1200 | /* Decode and fetch the destination operand: register or memory. */ | 1257 | /* Decode and fetch the destination operand: register or memory. */ |
@@ -1253,7 +1310,7 @@ done_prefixes: | |||
1253 | c->dst.type = OP_MEM; | 1310 | c->dst.type = OP_MEM; |
1254 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 1311 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
1255 | c->dst.ptr = (unsigned long *) | 1312 | c->dst.ptr = (unsigned long *) |
1256 | register_address(c, es_base(ctxt), | 1313 | register_address(c, es_base(ctxt, ops), |
1257 | c->regs[VCPU_REGS_RDI]); | 1314 | c->regs[VCPU_REGS_RDI]); |
1258 | c->dst.val = 0; | 1315 | c->dst.val = 0; |
1259 | break; | 1316 | break; |
@@ -1263,6 +1320,37 @@ done: | |||
1263 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; | 1320 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; |
1264 | } | 1321 | } |
1265 | 1322 | ||
1323 | static int read_emulated(struct x86_emulate_ctxt *ctxt, | ||
1324 | struct x86_emulate_ops *ops, | ||
1325 | unsigned long addr, void *dest, unsigned size) | ||
1326 | { | ||
1327 | int rc; | ||
1328 | struct read_cache *mc = &ctxt->decode.mem_read; | ||
1329 | u32 err; | ||
1330 | |||
1331 | while (size) { | ||
1332 | int n = min(size, 8u); | ||
1333 | size -= n; | ||
1334 | if (mc->pos < mc->end) | ||
1335 | goto read_cached; | ||
1336 | |||
1337 | rc = ops->read_emulated(addr, mc->data + mc->end, n, &err, | ||
1338 | ctxt->vcpu); | ||
1339 | if (rc == X86EMUL_PROPAGATE_FAULT) | ||
1340 | emulate_pf(ctxt, addr, err); | ||
1341 | if (rc != X86EMUL_CONTINUE) | ||
1342 | return rc; | ||
1343 | mc->end += n; | ||
1344 | |||
1345 | read_cached: | ||
1346 | memcpy(dest, mc->data + mc->pos, n); | ||
1347 | mc->pos += n; | ||
1348 | dest += n; | ||
1349 | addr += n; | ||
1350 | } | ||
1351 | return X86EMUL_CONTINUE; | ||
1352 | } | ||
1353 | |||
1266 | static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, | 1354 | static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, |
1267 | struct x86_emulate_ops *ops, | 1355 | struct x86_emulate_ops *ops, |
1268 | unsigned int size, unsigned short port, | 1356 | unsigned int size, unsigned short port, |
@@ -1330,13 +1418,13 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1330 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); | 1418 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); |
1331 | 1419 | ||
1332 | if (dt.size < index * 8 + 7) { | 1420 | if (dt.size < index * 8 + 7) { |
1333 | kvm_inject_gp(ctxt->vcpu, selector & 0xfffc); | 1421 | emulate_gp(ctxt, selector & 0xfffc); |
1334 | return X86EMUL_PROPAGATE_FAULT; | 1422 | return X86EMUL_PROPAGATE_FAULT; |
1335 | } | 1423 | } |
1336 | addr = dt.address + index * 8; | 1424 | addr = dt.address + index * 8; |
1337 | ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); | 1425 | ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); |
1338 | if (ret == X86EMUL_PROPAGATE_FAULT) | 1426 | if (ret == X86EMUL_PROPAGATE_FAULT) |
1339 | kvm_inject_page_fault(ctxt->vcpu, addr, err); | 1427 | emulate_pf(ctxt, addr, err); |
1340 | 1428 | ||
1341 | return ret; | 1429 | return ret; |
1342 | } | 1430 | } |
@@ -1355,14 +1443,14 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1355 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); | 1443 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); |
1356 | 1444 | ||
1357 | if (dt.size < index * 8 + 7) { | 1445 | if (dt.size < index * 8 + 7) { |
1358 | kvm_inject_gp(ctxt->vcpu, selector & 0xfffc); | 1446 | emulate_gp(ctxt, selector & 0xfffc); |
1359 | return X86EMUL_PROPAGATE_FAULT; | 1447 | return X86EMUL_PROPAGATE_FAULT; |
1360 | } | 1448 | } |
1361 | 1449 | ||
1362 | addr = dt.address + index * 8; | 1450 | addr = dt.address + index * 8; |
1363 | ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); | 1451 | ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); |
1364 | if (ret == X86EMUL_PROPAGATE_FAULT) | 1452 | if (ret == X86EMUL_PROPAGATE_FAULT) |
1365 | kvm_inject_page_fault(ctxt->vcpu, addr, err); | 1453 | emulate_pf(ctxt, addr, err); |
1366 | 1454 | ||
1367 | return ret; | 1455 | return ret; |
1368 | } | 1456 | } |
@@ -1481,11 +1569,70 @@ load: | |||
1481 | ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu); | 1569 | ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu); |
1482 | return X86EMUL_CONTINUE; | 1570 | return X86EMUL_CONTINUE; |
1483 | exception: | 1571 | exception: |
1484 | kvm_queue_exception_e(ctxt->vcpu, err_vec, err_code); | 1572 | emulate_exception(ctxt, err_vec, err_code, true); |
1485 | return X86EMUL_PROPAGATE_FAULT; | 1573 | return X86EMUL_PROPAGATE_FAULT; |
1486 | } | 1574 | } |
1487 | 1575 | ||
1488 | static inline void emulate_push(struct x86_emulate_ctxt *ctxt) | 1576 | static inline int writeback(struct x86_emulate_ctxt *ctxt, |
1577 | struct x86_emulate_ops *ops) | ||
1578 | { | ||
1579 | int rc; | ||
1580 | struct decode_cache *c = &ctxt->decode; | ||
1581 | u32 err; | ||
1582 | |||
1583 | switch (c->dst.type) { | ||
1584 | case OP_REG: | ||
1585 | /* The 4-byte case *is* correct: | ||
1586 | * in 64-bit mode we zero-extend. | ||
1587 | */ | ||
1588 | switch (c->dst.bytes) { | ||
1589 | case 1: | ||
1590 | *(u8 *)c->dst.ptr = (u8)c->dst.val; | ||
1591 | break; | ||
1592 | case 2: | ||
1593 | *(u16 *)c->dst.ptr = (u16)c->dst.val; | ||
1594 | break; | ||
1595 | case 4: | ||
1596 | *c->dst.ptr = (u32)c->dst.val; | ||
1597 | break; /* 64b: zero-ext */ | ||
1598 | case 8: | ||
1599 | *c->dst.ptr = c->dst.val; | ||
1600 | break; | ||
1601 | } | ||
1602 | break; | ||
1603 | case OP_MEM: | ||
1604 | if (c->lock_prefix) | ||
1605 | rc = ops->cmpxchg_emulated( | ||
1606 | (unsigned long)c->dst.ptr, | ||
1607 | &c->dst.orig_val, | ||
1608 | &c->dst.val, | ||
1609 | c->dst.bytes, | ||
1610 | &err, | ||
1611 | ctxt->vcpu); | ||
1612 | else | ||
1613 | rc = ops->write_emulated( | ||
1614 | (unsigned long)c->dst.ptr, | ||
1615 | &c->dst.val, | ||
1616 | c->dst.bytes, | ||
1617 | &err, | ||
1618 | ctxt->vcpu); | ||
1619 | if (rc == X86EMUL_PROPAGATE_FAULT) | ||
1620 | emulate_pf(ctxt, | ||
1621 | (unsigned long)c->dst.ptr, err); | ||
1622 | if (rc != X86EMUL_CONTINUE) | ||
1623 | return rc; | ||
1624 | break; | ||
1625 | case OP_NONE: | ||
1626 | /* no writeback */ | ||
1627 | break; | ||
1628 | default: | ||
1629 | break; | ||
1630 | } | ||
1631 | return X86EMUL_CONTINUE; | ||
1632 | } | ||
1633 | |||
1634 | static inline void emulate_push(struct x86_emulate_ctxt *ctxt, | ||
1635 | struct x86_emulate_ops *ops) | ||
1489 | { | 1636 | { |
1490 | struct decode_cache *c = &ctxt->decode; | 1637 | struct decode_cache *c = &ctxt->decode; |
1491 | 1638 | ||
@@ -1493,7 +1640,7 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt) | |||
1493 | c->dst.bytes = c->op_bytes; | 1640 | c->dst.bytes = c->op_bytes; |
1494 | c->dst.val = c->src.val; | 1641 | c->dst.val = c->src.val; |
1495 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); | 1642 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); |
1496 | c->dst.ptr = (void *) register_address(c, ss_base(ctxt), | 1643 | c->dst.ptr = (void *) register_address(c, ss_base(ctxt, ops), |
1497 | c->regs[VCPU_REGS_RSP]); | 1644 | c->regs[VCPU_REGS_RSP]); |
1498 | } | 1645 | } |
1499 | 1646 | ||
@@ -1504,9 +1651,9 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, | |||
1504 | struct decode_cache *c = &ctxt->decode; | 1651 | struct decode_cache *c = &ctxt->decode; |
1505 | int rc; | 1652 | int rc; |
1506 | 1653 | ||
1507 | rc = ops->read_emulated(register_address(c, ss_base(ctxt), | 1654 | rc = read_emulated(ctxt, ops, register_address(c, ss_base(ctxt, ops), |
1508 | c->regs[VCPU_REGS_RSP]), | 1655 | c->regs[VCPU_REGS_RSP]), |
1509 | dest, len, ctxt->vcpu); | 1656 | dest, len); |
1510 | if (rc != X86EMUL_CONTINUE) | 1657 | if (rc != X86EMUL_CONTINUE) |
1511 | return rc; | 1658 | return rc; |
1512 | 1659 | ||
@@ -1541,7 +1688,7 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, | |||
1541 | break; | 1688 | break; |
1542 | case X86EMUL_MODE_VM86: | 1689 | case X86EMUL_MODE_VM86: |
1543 | if (iopl < 3) { | 1690 | if (iopl < 3) { |
1544 | kvm_inject_gp(ctxt->vcpu, 0); | 1691 | emulate_gp(ctxt, 0); |
1545 | return X86EMUL_PROPAGATE_FAULT; | 1692 | return X86EMUL_PROPAGATE_FAULT; |
1546 | } | 1693 | } |
1547 | change_mask |= EFLG_IF; | 1694 | change_mask |= EFLG_IF; |
@@ -1557,15 +1704,14 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, | |||
1557 | return rc; | 1704 | return rc; |
1558 | } | 1705 | } |
1559 | 1706 | ||
1560 | static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg) | 1707 | static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, |
1708 | struct x86_emulate_ops *ops, int seg) | ||
1561 | { | 1709 | { |
1562 | struct decode_cache *c = &ctxt->decode; | 1710 | struct decode_cache *c = &ctxt->decode; |
1563 | struct kvm_segment segment; | ||
1564 | 1711 | ||
1565 | kvm_x86_ops->get_segment(ctxt->vcpu, &segment, seg); | 1712 | c->src.val = ops->get_segment_selector(seg, ctxt->vcpu); |
1566 | 1713 | ||
1567 | c->src.val = segment.selector; | 1714 | emulate_push(ctxt, ops); |
1568 | emulate_push(ctxt); | ||
1569 | } | 1715 | } |
1570 | 1716 | ||
1571 | static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, | 1717 | static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, |
@@ -1583,19 +1729,31 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, | |||
1583 | return rc; | 1729 | return rc; |
1584 | } | 1730 | } |
1585 | 1731 | ||
1586 | static void emulate_pusha(struct x86_emulate_ctxt *ctxt) | 1732 | static int emulate_pusha(struct x86_emulate_ctxt *ctxt, |
1733 | struct x86_emulate_ops *ops) | ||
1587 | { | 1734 | { |
1588 | struct decode_cache *c = &ctxt->decode; | 1735 | struct decode_cache *c = &ctxt->decode; |
1589 | unsigned long old_esp = c->regs[VCPU_REGS_RSP]; | 1736 | unsigned long old_esp = c->regs[VCPU_REGS_RSP]; |
1737 | int rc = X86EMUL_CONTINUE; | ||
1590 | int reg = VCPU_REGS_RAX; | 1738 | int reg = VCPU_REGS_RAX; |
1591 | 1739 | ||
1592 | while (reg <= VCPU_REGS_RDI) { | 1740 | while (reg <= VCPU_REGS_RDI) { |
1593 | (reg == VCPU_REGS_RSP) ? | 1741 | (reg == VCPU_REGS_RSP) ? |
1594 | (c->src.val = old_esp) : (c->src.val = c->regs[reg]); | 1742 | (c->src.val = old_esp) : (c->src.val = c->regs[reg]); |
1595 | 1743 | ||
1596 | emulate_push(ctxt); | 1744 | emulate_push(ctxt, ops); |
1745 | |||
1746 | rc = writeback(ctxt, ops); | ||
1747 | if (rc != X86EMUL_CONTINUE) | ||
1748 | return rc; | ||
1749 | |||
1597 | ++reg; | 1750 | ++reg; |
1598 | } | 1751 | } |
1752 | |||
1753 | /* Disable writeback. */ | ||
1754 | c->dst.type = OP_NONE; | ||
1755 | |||
1756 | return rc; | ||
1599 | } | 1757 | } |
1600 | 1758 | ||
1601 | static int emulate_popa(struct x86_emulate_ctxt *ctxt, | 1759 | static int emulate_popa(struct x86_emulate_ctxt *ctxt, |
@@ -1695,14 +1853,14 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, | |||
1695 | old_eip = c->eip; | 1853 | old_eip = c->eip; |
1696 | c->eip = c->src.val; | 1854 | c->eip = c->src.val; |
1697 | c->src.val = old_eip; | 1855 | c->src.val = old_eip; |
1698 | emulate_push(ctxt); | 1856 | emulate_push(ctxt, ops); |
1699 | break; | 1857 | break; |
1700 | } | 1858 | } |
1701 | case 4: /* jmp abs */ | 1859 | case 4: /* jmp abs */ |
1702 | c->eip = c->src.val; | 1860 | c->eip = c->src.val; |
1703 | break; | 1861 | break; |
1704 | case 6: /* push */ | 1862 | case 6: /* push */ |
1705 | emulate_push(ctxt); | 1863 | emulate_push(ctxt, ops); |
1706 | break; | 1864 | break; |
1707 | } | 1865 | } |
1708 | return X86EMUL_CONTINUE; | 1866 | return X86EMUL_CONTINUE; |
@@ -1748,145 +1906,82 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, | |||
1748 | return rc; | 1906 | return rc; |
1749 | } | 1907 | } |
1750 | 1908 | ||
1751 | static inline int writeback(struct x86_emulate_ctxt *ctxt, | ||
1752 | struct x86_emulate_ops *ops) | ||
1753 | { | ||
1754 | int rc; | ||
1755 | struct decode_cache *c = &ctxt->decode; | ||
1756 | |||
1757 | switch (c->dst.type) { | ||
1758 | case OP_REG: | ||
1759 | /* The 4-byte case *is* correct: | ||
1760 | * in 64-bit mode we zero-extend. | ||
1761 | */ | ||
1762 | switch (c->dst.bytes) { | ||
1763 | case 1: | ||
1764 | *(u8 *)c->dst.ptr = (u8)c->dst.val; | ||
1765 | break; | ||
1766 | case 2: | ||
1767 | *(u16 *)c->dst.ptr = (u16)c->dst.val; | ||
1768 | break; | ||
1769 | case 4: | ||
1770 | *c->dst.ptr = (u32)c->dst.val; | ||
1771 | break; /* 64b: zero-ext */ | ||
1772 | case 8: | ||
1773 | *c->dst.ptr = c->dst.val; | ||
1774 | break; | ||
1775 | } | ||
1776 | break; | ||
1777 | case OP_MEM: | ||
1778 | if (c->lock_prefix) | ||
1779 | rc = ops->cmpxchg_emulated( | ||
1780 | (unsigned long)c->dst.ptr, | ||
1781 | &c->dst.orig_val, | ||
1782 | &c->dst.val, | ||
1783 | c->dst.bytes, | ||
1784 | ctxt->vcpu); | ||
1785 | else | ||
1786 | rc = ops->write_emulated( | ||
1787 | (unsigned long)c->dst.ptr, | ||
1788 | &c->dst.val, | ||
1789 | c->dst.bytes, | ||
1790 | ctxt->vcpu); | ||
1791 | if (rc != X86EMUL_CONTINUE) | ||
1792 | return rc; | ||
1793 | break; | ||
1794 | case OP_NONE: | ||
1795 | /* no writeback */ | ||
1796 | break; | ||
1797 | default: | ||
1798 | break; | ||
1799 | } | ||
1800 | return X86EMUL_CONTINUE; | ||
1801 | } | ||
1802 | |||
1803 | static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) | ||
1804 | { | ||
1805 | u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask); | ||
1806 | /* | ||
1807 | * an sti; sti; sequence only disable interrupts for the first | ||
1808 | * instruction. So, if the last instruction, be it emulated or | ||
1809 | * not, left the system with the INT_STI flag enabled, it | ||
1810 | * means that the last instruction is an sti. We should not | ||
1811 | * leave the flag on in this case. The same goes for mov ss | ||
1812 | */ | ||
1813 | if (!(int_shadow & mask)) | ||
1814 | ctxt->interruptibility = mask; | ||
1815 | } | ||
1816 | |||
1817 | static inline void | 1909 | static inline void |
1818 | setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, | 1910 | setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, |
1819 | struct kvm_segment *cs, struct kvm_segment *ss) | 1911 | struct x86_emulate_ops *ops, struct desc_struct *cs, |
1912 | struct desc_struct *ss) | ||
1820 | { | 1913 | { |
1821 | memset(cs, 0, sizeof(struct kvm_segment)); | 1914 | memset(cs, 0, sizeof(struct desc_struct)); |
1822 | kvm_x86_ops->get_segment(ctxt->vcpu, cs, VCPU_SREG_CS); | 1915 | ops->get_cached_descriptor(cs, VCPU_SREG_CS, ctxt->vcpu); |
1823 | memset(ss, 0, sizeof(struct kvm_segment)); | 1916 | memset(ss, 0, sizeof(struct desc_struct)); |
1824 | 1917 | ||
1825 | cs->l = 0; /* will be adjusted later */ | 1918 | cs->l = 0; /* will be adjusted later */ |
1826 | cs->base = 0; /* flat segment */ | 1919 | set_desc_base(cs, 0); /* flat segment */ |
1827 | cs->g = 1; /* 4kb granularity */ | 1920 | cs->g = 1; /* 4kb granularity */ |
1828 | cs->limit = 0xffffffff; /* 4GB limit */ | 1921 | set_desc_limit(cs, 0xfffff); /* 4GB limit */ |
1829 | cs->type = 0x0b; /* Read, Execute, Accessed */ | 1922 | cs->type = 0x0b; /* Read, Execute, Accessed */ |
1830 | cs->s = 1; | 1923 | cs->s = 1; |
1831 | cs->dpl = 0; /* will be adjusted later */ | 1924 | cs->dpl = 0; /* will be adjusted later */ |
1832 | cs->present = 1; | 1925 | cs->p = 1; |
1833 | cs->db = 1; | 1926 | cs->d = 1; |
1834 | 1927 | ||
1835 | ss->unusable = 0; | 1928 | set_desc_base(ss, 0); /* flat segment */ |
1836 | ss->base = 0; /* flat segment */ | 1929 | set_desc_limit(ss, 0xfffff); /* 4GB limit */ |
1837 | ss->limit = 0xffffffff; /* 4GB limit */ | ||
1838 | ss->g = 1; /* 4kb granularity */ | 1930 | ss->g = 1; /* 4kb granularity */ |
1839 | ss->s = 1; | 1931 | ss->s = 1; |
1840 | ss->type = 0x03; /* Read/Write, Accessed */ | 1932 | ss->type = 0x03; /* Read/Write, Accessed */ |
1841 | ss->db = 1; /* 32bit stack segment */ | 1933 | ss->d = 1; /* 32bit stack segment */ |
1842 | ss->dpl = 0; | 1934 | ss->dpl = 0; |
1843 | ss->present = 1; | 1935 | ss->p = 1; |
1844 | } | 1936 | } |
1845 | 1937 | ||
1846 | static int | 1938 | static int |
1847 | emulate_syscall(struct x86_emulate_ctxt *ctxt) | 1939 | emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) |
1848 | { | 1940 | { |
1849 | struct decode_cache *c = &ctxt->decode; | 1941 | struct decode_cache *c = &ctxt->decode; |
1850 | struct kvm_segment cs, ss; | 1942 | struct desc_struct cs, ss; |
1851 | u64 msr_data; | 1943 | u64 msr_data; |
1944 | u16 cs_sel, ss_sel; | ||
1852 | 1945 | ||
1853 | /* syscall is not available in real mode */ | 1946 | /* syscall is not available in real mode */ |
1854 | if (ctxt->mode == X86EMUL_MODE_REAL || | 1947 | if (ctxt->mode == X86EMUL_MODE_REAL || |
1855 | ctxt->mode == X86EMUL_MODE_VM86) { | 1948 | ctxt->mode == X86EMUL_MODE_VM86) { |
1856 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 1949 | emulate_ud(ctxt); |
1857 | return X86EMUL_PROPAGATE_FAULT; | 1950 | return X86EMUL_PROPAGATE_FAULT; |
1858 | } | 1951 | } |
1859 | 1952 | ||
1860 | setup_syscalls_segments(ctxt, &cs, &ss); | 1953 | setup_syscalls_segments(ctxt, ops, &cs, &ss); |
1861 | 1954 | ||
1862 | kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); | 1955 | ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); |
1863 | msr_data >>= 32; | 1956 | msr_data >>= 32; |
1864 | cs.selector = (u16)(msr_data & 0xfffc); | 1957 | cs_sel = (u16)(msr_data & 0xfffc); |
1865 | ss.selector = (u16)(msr_data + 8); | 1958 | ss_sel = (u16)(msr_data + 8); |
1866 | 1959 | ||
1867 | if (is_long_mode(ctxt->vcpu)) { | 1960 | if (is_long_mode(ctxt->vcpu)) { |
1868 | cs.db = 0; | 1961 | cs.d = 0; |
1869 | cs.l = 1; | 1962 | cs.l = 1; |
1870 | } | 1963 | } |
1871 | kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS); | 1964 | ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu); |
1872 | kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS); | 1965 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); |
1966 | ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu); | ||
1967 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); | ||
1873 | 1968 | ||
1874 | c->regs[VCPU_REGS_RCX] = c->eip; | 1969 | c->regs[VCPU_REGS_RCX] = c->eip; |
1875 | if (is_long_mode(ctxt->vcpu)) { | 1970 | if (is_long_mode(ctxt->vcpu)) { |
1876 | #ifdef CONFIG_X86_64 | 1971 | #ifdef CONFIG_X86_64 |
1877 | c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF; | 1972 | c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF; |
1878 | 1973 | ||
1879 | kvm_x86_ops->get_msr(ctxt->vcpu, | 1974 | ops->get_msr(ctxt->vcpu, |
1880 | ctxt->mode == X86EMUL_MODE_PROT64 ? | 1975 | ctxt->mode == X86EMUL_MODE_PROT64 ? |
1881 | MSR_LSTAR : MSR_CSTAR, &msr_data); | 1976 | MSR_LSTAR : MSR_CSTAR, &msr_data); |
1882 | c->eip = msr_data; | 1977 | c->eip = msr_data; |
1883 | 1978 | ||
1884 | kvm_x86_ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data); | 1979 | ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data); |
1885 | ctxt->eflags &= ~(msr_data | EFLG_RF); | 1980 | ctxt->eflags &= ~(msr_data | EFLG_RF); |
1886 | #endif | 1981 | #endif |
1887 | } else { | 1982 | } else { |
1888 | /* legacy mode */ | 1983 | /* legacy mode */ |
1889 | kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); | 1984 | ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data); |
1890 | c->eip = (u32)msr_data; | 1985 | c->eip = (u32)msr_data; |
1891 | 1986 | ||
1892 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); | 1987 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); |
@@ -1896,15 +1991,16 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt) | |||
1896 | } | 1991 | } |
1897 | 1992 | ||
1898 | static int | 1993 | static int |
1899 | emulate_sysenter(struct x86_emulate_ctxt *ctxt) | 1994 | emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) |
1900 | { | 1995 | { |
1901 | struct decode_cache *c = &ctxt->decode; | 1996 | struct decode_cache *c = &ctxt->decode; |
1902 | struct kvm_segment cs, ss; | 1997 | struct desc_struct cs, ss; |
1903 | u64 msr_data; | 1998 | u64 msr_data; |
1999 | u16 cs_sel, ss_sel; | ||
1904 | 2000 | ||
1905 | /* inject #GP if in real mode */ | 2001 | /* inject #GP if in real mode */ |
1906 | if (ctxt->mode == X86EMUL_MODE_REAL) { | 2002 | if (ctxt->mode == X86EMUL_MODE_REAL) { |
1907 | kvm_inject_gp(ctxt->vcpu, 0); | 2003 | emulate_gp(ctxt, 0); |
1908 | return X86EMUL_PROPAGATE_FAULT; | 2004 | return X86EMUL_PROPAGATE_FAULT; |
1909 | } | 2005 | } |
1910 | 2006 | ||
@@ -1912,67 +2008,70 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt) | |||
1912 | * Therefore, we inject an #UD. | 2008 | * Therefore, we inject an #UD. |
1913 | */ | 2009 | */ |
1914 | if (ctxt->mode == X86EMUL_MODE_PROT64) { | 2010 | if (ctxt->mode == X86EMUL_MODE_PROT64) { |
1915 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 2011 | emulate_ud(ctxt); |
1916 | return X86EMUL_PROPAGATE_FAULT; | 2012 | return X86EMUL_PROPAGATE_FAULT; |
1917 | } | 2013 | } |
1918 | 2014 | ||
1919 | setup_syscalls_segments(ctxt, &cs, &ss); | 2015 | setup_syscalls_segments(ctxt, ops, &cs, &ss); |
1920 | 2016 | ||
1921 | kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); | 2017 | ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); |
1922 | switch (ctxt->mode) { | 2018 | switch (ctxt->mode) { |
1923 | case X86EMUL_MODE_PROT32: | 2019 | case X86EMUL_MODE_PROT32: |
1924 | if ((msr_data & 0xfffc) == 0x0) { | 2020 | if ((msr_data & 0xfffc) == 0x0) { |
1925 | kvm_inject_gp(ctxt->vcpu, 0); | 2021 | emulate_gp(ctxt, 0); |
1926 | return X86EMUL_PROPAGATE_FAULT; | 2022 | return X86EMUL_PROPAGATE_FAULT; |
1927 | } | 2023 | } |
1928 | break; | 2024 | break; |
1929 | case X86EMUL_MODE_PROT64: | 2025 | case X86EMUL_MODE_PROT64: |
1930 | if (msr_data == 0x0) { | 2026 | if (msr_data == 0x0) { |
1931 | kvm_inject_gp(ctxt->vcpu, 0); | 2027 | emulate_gp(ctxt, 0); |
1932 | return X86EMUL_PROPAGATE_FAULT; | 2028 | return X86EMUL_PROPAGATE_FAULT; |
1933 | } | 2029 | } |
1934 | break; | 2030 | break; |
1935 | } | 2031 | } |
1936 | 2032 | ||
1937 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); | 2033 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); |
1938 | cs.selector = (u16)msr_data; | 2034 | cs_sel = (u16)msr_data; |
1939 | cs.selector &= ~SELECTOR_RPL_MASK; | 2035 | cs_sel &= ~SELECTOR_RPL_MASK; |
1940 | ss.selector = cs.selector + 8; | 2036 | ss_sel = cs_sel + 8; |
1941 | ss.selector &= ~SELECTOR_RPL_MASK; | 2037 | ss_sel &= ~SELECTOR_RPL_MASK; |
1942 | if (ctxt->mode == X86EMUL_MODE_PROT64 | 2038 | if (ctxt->mode == X86EMUL_MODE_PROT64 |
1943 | || is_long_mode(ctxt->vcpu)) { | 2039 | || is_long_mode(ctxt->vcpu)) { |
1944 | cs.db = 0; | 2040 | cs.d = 0; |
1945 | cs.l = 1; | 2041 | cs.l = 1; |
1946 | } | 2042 | } |
1947 | 2043 | ||
1948 | kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS); | 2044 | ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu); |
1949 | kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS); | 2045 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); |
2046 | ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu); | ||
2047 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); | ||
1950 | 2048 | ||
1951 | kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data); | 2049 | ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data); |
1952 | c->eip = msr_data; | 2050 | c->eip = msr_data; |
1953 | 2051 | ||
1954 | kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data); | 2052 | ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data); |
1955 | c->regs[VCPU_REGS_RSP] = msr_data; | 2053 | c->regs[VCPU_REGS_RSP] = msr_data; |
1956 | 2054 | ||
1957 | return X86EMUL_CONTINUE; | 2055 | return X86EMUL_CONTINUE; |
1958 | } | 2056 | } |
1959 | 2057 | ||
1960 | static int | 2058 | static int |
1961 | emulate_sysexit(struct x86_emulate_ctxt *ctxt) | 2059 | emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) |
1962 | { | 2060 | { |
1963 | struct decode_cache *c = &ctxt->decode; | 2061 | struct decode_cache *c = &ctxt->decode; |
1964 | struct kvm_segment cs, ss; | 2062 | struct desc_struct cs, ss; |
1965 | u64 msr_data; | 2063 | u64 msr_data; |
1966 | int usermode; | 2064 | int usermode; |
2065 | u16 cs_sel, ss_sel; | ||
1967 | 2066 | ||
1968 | /* inject #GP if in real mode or Virtual 8086 mode */ | 2067 | /* inject #GP if in real mode or Virtual 8086 mode */ |
1969 | if (ctxt->mode == X86EMUL_MODE_REAL || | 2068 | if (ctxt->mode == X86EMUL_MODE_REAL || |
1970 | ctxt->mode == X86EMUL_MODE_VM86) { | 2069 | ctxt->mode == X86EMUL_MODE_VM86) { |
1971 | kvm_inject_gp(ctxt->vcpu, 0); | 2070 | emulate_gp(ctxt, 0); |
1972 | return X86EMUL_PROPAGATE_FAULT; | 2071 | return X86EMUL_PROPAGATE_FAULT; |
1973 | } | 2072 | } |
1974 | 2073 | ||
1975 | setup_syscalls_segments(ctxt, &cs, &ss); | 2074 | setup_syscalls_segments(ctxt, ops, &cs, &ss); |
1976 | 2075 | ||
1977 | if ((c->rex_prefix & 0x8) != 0x0) | 2076 | if ((c->rex_prefix & 0x8) != 0x0) |
1978 | usermode = X86EMUL_MODE_PROT64; | 2077 | usermode = X86EMUL_MODE_PROT64; |
@@ -1981,35 +2080,37 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt) | |||
1981 | 2080 | ||
1982 | cs.dpl = 3; | 2081 | cs.dpl = 3; |
1983 | ss.dpl = 3; | 2082 | ss.dpl = 3; |
1984 | kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); | 2083 | ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data); |
1985 | switch (usermode) { | 2084 | switch (usermode) { |
1986 | case X86EMUL_MODE_PROT32: | 2085 | case X86EMUL_MODE_PROT32: |
1987 | cs.selector = (u16)(msr_data + 16); | 2086 | cs_sel = (u16)(msr_data + 16); |
1988 | if ((msr_data & 0xfffc) == 0x0) { | 2087 | if ((msr_data & 0xfffc) == 0x0) { |
1989 | kvm_inject_gp(ctxt->vcpu, 0); | 2088 | emulate_gp(ctxt, 0); |
1990 | return X86EMUL_PROPAGATE_FAULT; | 2089 | return X86EMUL_PROPAGATE_FAULT; |
1991 | } | 2090 | } |
1992 | ss.selector = (u16)(msr_data + 24); | 2091 | ss_sel = (u16)(msr_data + 24); |
1993 | break; | 2092 | break; |
1994 | case X86EMUL_MODE_PROT64: | 2093 | case X86EMUL_MODE_PROT64: |
1995 | cs.selector = (u16)(msr_data + 32); | 2094 | cs_sel = (u16)(msr_data + 32); |
1996 | if (msr_data == 0x0) { | 2095 | if (msr_data == 0x0) { |
1997 | kvm_inject_gp(ctxt->vcpu, 0); | 2096 | emulate_gp(ctxt, 0); |
1998 | return X86EMUL_PROPAGATE_FAULT; | 2097 | return X86EMUL_PROPAGATE_FAULT; |
1999 | } | 2098 | } |
2000 | ss.selector = cs.selector + 8; | 2099 | ss_sel = cs_sel + 8; |
2001 | cs.db = 0; | 2100 | cs.d = 0; |
2002 | cs.l = 1; | 2101 | cs.l = 1; |
2003 | break; | 2102 | break; |
2004 | } | 2103 | } |
2005 | cs.selector |= SELECTOR_RPL_MASK; | 2104 | cs_sel |= SELECTOR_RPL_MASK; |
2006 | ss.selector |= SELECTOR_RPL_MASK; | 2105 | ss_sel |= SELECTOR_RPL_MASK; |
2007 | 2106 | ||
2008 | kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS); | 2107 | ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu); |
2009 | kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS); | 2108 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); |
2109 | ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu); | ||
2110 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); | ||
2010 | 2111 | ||
2011 | c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX]; | 2112 | c->eip = c->regs[VCPU_REGS_RDX]; |
2012 | c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX]; | 2113 | c->regs[VCPU_REGS_RSP] = c->regs[VCPU_REGS_RCX]; |
2013 | 2114 | ||
2014 | return X86EMUL_CONTINUE; | 2115 | return X86EMUL_CONTINUE; |
2015 | } | 2116 | } |
@@ -2030,25 +2131,25 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, | |||
2030 | struct x86_emulate_ops *ops, | 2131 | struct x86_emulate_ops *ops, |
2031 | u16 port, u16 len) | 2132 | u16 port, u16 len) |
2032 | { | 2133 | { |
2033 | struct kvm_segment tr_seg; | 2134 | struct desc_struct tr_seg; |
2034 | int r; | 2135 | int r; |
2035 | u16 io_bitmap_ptr; | 2136 | u16 io_bitmap_ptr; |
2036 | u8 perm, bit_idx = port & 0x7; | 2137 | u8 perm, bit_idx = port & 0x7; |
2037 | unsigned mask = (1 << len) - 1; | 2138 | unsigned mask = (1 << len) - 1; |
2038 | 2139 | ||
2039 | kvm_get_segment(ctxt->vcpu, &tr_seg, VCPU_SREG_TR); | 2140 | ops->get_cached_descriptor(&tr_seg, VCPU_SREG_TR, ctxt->vcpu); |
2040 | if (tr_seg.unusable) | 2141 | if (!tr_seg.p) |
2041 | return false; | 2142 | return false; |
2042 | if (tr_seg.limit < 103) | 2143 | if (desc_limit_scaled(&tr_seg) < 103) |
2043 | return false; | 2144 | return false; |
2044 | r = ops->read_std(tr_seg.base + 102, &io_bitmap_ptr, 2, ctxt->vcpu, | 2145 | r = ops->read_std(get_desc_base(&tr_seg) + 102, &io_bitmap_ptr, 2, |
2045 | NULL); | 2146 | ctxt->vcpu, NULL); |
2046 | if (r != X86EMUL_CONTINUE) | 2147 | if (r != X86EMUL_CONTINUE) |
2047 | return false; | 2148 | return false; |
2048 | if (io_bitmap_ptr + port/8 > tr_seg.limit) | 2149 | if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg)) |
2049 | return false; | 2150 | return false; |
2050 | r = ops->read_std(tr_seg.base + io_bitmap_ptr + port/8, &perm, 1, | 2151 | r = ops->read_std(get_desc_base(&tr_seg) + io_bitmap_ptr + port/8, |
2051 | ctxt->vcpu, NULL); | 2152 | &perm, 1, ctxt->vcpu, NULL); |
2052 | if (r != X86EMUL_CONTINUE) | 2153 | if (r != X86EMUL_CONTINUE) |
2053 | return false; | 2154 | return false; |
2054 | if ((perm >> bit_idx) & mask) | 2155 | if ((perm >> bit_idx) & mask) |
@@ -2066,17 +2167,6 @@ static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt, | |||
2066 | return true; | 2167 | return true; |
2067 | } | 2168 | } |
2068 | 2169 | ||
2069 | static u32 get_cached_descriptor_base(struct x86_emulate_ctxt *ctxt, | ||
2070 | struct x86_emulate_ops *ops, | ||
2071 | int seg) | ||
2072 | { | ||
2073 | struct desc_struct desc; | ||
2074 | if (ops->get_cached_descriptor(&desc, seg, ctxt->vcpu)) | ||
2075 | return get_desc_base(&desc); | ||
2076 | else | ||
2077 | return ~0; | ||
2078 | } | ||
2079 | |||
2080 | static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, | 2170 | static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, |
2081 | struct x86_emulate_ops *ops, | 2171 | struct x86_emulate_ops *ops, |
2082 | struct tss_segment_16 *tss) | 2172 | struct tss_segment_16 *tss) |
@@ -2165,7 +2255,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, | |||
2165 | &err); | 2255 | &err); |
2166 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2256 | if (ret == X86EMUL_PROPAGATE_FAULT) { |
2167 | /* FIXME: need to provide precise fault address */ | 2257 | /* FIXME: need to provide precise fault address */ |
2168 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | 2258 | emulate_pf(ctxt, old_tss_base, err); |
2169 | return ret; | 2259 | return ret; |
2170 | } | 2260 | } |
2171 | 2261 | ||
@@ -2175,7 +2265,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, | |||
2175 | &err); | 2265 | &err); |
2176 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2266 | if (ret == X86EMUL_PROPAGATE_FAULT) { |
2177 | /* FIXME: need to provide precise fault address */ | 2267 | /* FIXME: need to provide precise fault address */ |
2178 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | 2268 | emulate_pf(ctxt, old_tss_base, err); |
2179 | return ret; | 2269 | return ret; |
2180 | } | 2270 | } |
2181 | 2271 | ||
@@ -2183,7 +2273,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, | |||
2183 | &err); | 2273 | &err); |
2184 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2274 | if (ret == X86EMUL_PROPAGATE_FAULT) { |
2185 | /* FIXME: need to provide precise fault address */ | 2275 | /* FIXME: need to provide precise fault address */ |
2186 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | 2276 | emulate_pf(ctxt, new_tss_base, err); |
2187 | return ret; | 2277 | return ret; |
2188 | } | 2278 | } |
2189 | 2279 | ||
@@ -2196,7 +2286,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, | |||
2196 | ctxt->vcpu, &err); | 2286 | ctxt->vcpu, &err); |
2197 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2287 | if (ret == X86EMUL_PROPAGATE_FAULT) { |
2198 | /* FIXME: need to provide precise fault address */ | 2288 | /* FIXME: need to provide precise fault address */ |
2199 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | 2289 | emulate_pf(ctxt, new_tss_base, err); |
2200 | return ret; | 2290 | return ret; |
2201 | } | 2291 | } |
2202 | } | 2292 | } |
@@ -2238,7 +2328,10 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, | |||
2238 | struct decode_cache *c = &ctxt->decode; | 2328 | struct decode_cache *c = &ctxt->decode; |
2239 | int ret; | 2329 | int ret; |
2240 | 2330 | ||
2241 | ops->set_cr(3, tss->cr3, ctxt->vcpu); | 2331 | if (ops->set_cr(3, tss->cr3, ctxt->vcpu)) { |
2332 | emulate_gp(ctxt, 0); | ||
2333 | return X86EMUL_PROPAGATE_FAULT; | ||
2334 | } | ||
2242 | c->eip = tss->eip; | 2335 | c->eip = tss->eip; |
2243 | ctxt->eflags = tss->eflags | 2; | 2336 | ctxt->eflags = tss->eflags | 2; |
2244 | c->regs[VCPU_REGS_RAX] = tss->eax; | 2337 | c->regs[VCPU_REGS_RAX] = tss->eax; |
@@ -2304,7 +2397,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, | |||
2304 | &err); | 2397 | &err); |
2305 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2398 | if (ret == X86EMUL_PROPAGATE_FAULT) { |
2306 | /* FIXME: need to provide precise fault address */ | 2399 | /* FIXME: need to provide precise fault address */ |
2307 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | 2400 | emulate_pf(ctxt, old_tss_base, err); |
2308 | return ret; | 2401 | return ret; |
2309 | } | 2402 | } |
2310 | 2403 | ||
@@ -2314,7 +2407,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, | |||
2314 | &err); | 2407 | &err); |
2315 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2408 | if (ret == X86EMUL_PROPAGATE_FAULT) { |
2316 | /* FIXME: need to provide precise fault address */ | 2409 | /* FIXME: need to provide precise fault address */ |
2317 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | 2410 | emulate_pf(ctxt, old_tss_base, err); |
2318 | return ret; | 2411 | return ret; |
2319 | } | 2412 | } |
2320 | 2413 | ||
@@ -2322,7 +2415,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, | |||
2322 | &err); | 2415 | &err); |
2323 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2416 | if (ret == X86EMUL_PROPAGATE_FAULT) { |
2324 | /* FIXME: need to provide precise fault address */ | 2417 | /* FIXME: need to provide precise fault address */ |
2325 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | 2418 | emulate_pf(ctxt, new_tss_base, err); |
2326 | return ret; | 2419 | return ret; |
2327 | } | 2420 | } |
2328 | 2421 | ||
@@ -2335,7 +2428,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, | |||
2335 | ctxt->vcpu, &err); | 2428 | ctxt->vcpu, &err); |
2336 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 2429 | if (ret == X86EMUL_PROPAGATE_FAULT) { |
2337 | /* FIXME: need to provide precise fault address */ | 2430 | /* FIXME: need to provide precise fault address */ |
2338 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | 2431 | emulate_pf(ctxt, new_tss_base, err); |
2339 | return ret; | 2432 | return ret; |
2340 | } | 2433 | } |
2341 | } | 2434 | } |
@@ -2352,7 +2445,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2352 | int ret; | 2445 | int ret; |
2353 | u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu); | 2446 | u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu); |
2354 | ulong old_tss_base = | 2447 | ulong old_tss_base = |
2355 | get_cached_descriptor_base(ctxt, ops, VCPU_SREG_TR); | 2448 | ops->get_cached_segment_base(VCPU_SREG_TR, ctxt->vcpu); |
2356 | u32 desc_limit; | 2449 | u32 desc_limit; |
2357 | 2450 | ||
2358 | /* FIXME: old_tss_base == ~0 ? */ | 2451 | /* FIXME: old_tss_base == ~0 ? */ |
@@ -2369,7 +2462,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2369 | if (reason != TASK_SWITCH_IRET) { | 2462 | if (reason != TASK_SWITCH_IRET) { |
2370 | if ((tss_selector & 3) > next_tss_desc.dpl || | 2463 | if ((tss_selector & 3) > next_tss_desc.dpl || |
2371 | ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) { | 2464 | ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) { |
2372 | kvm_inject_gp(ctxt->vcpu, 0); | 2465 | emulate_gp(ctxt, 0); |
2373 | return X86EMUL_PROPAGATE_FAULT; | 2466 | return X86EMUL_PROPAGATE_FAULT; |
2374 | } | 2467 | } |
2375 | } | 2468 | } |
@@ -2378,8 +2471,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2378 | if (!next_tss_desc.p || | 2471 | if (!next_tss_desc.p || |
2379 | ((desc_limit < 0x67 && (next_tss_desc.type & 8)) || | 2472 | ((desc_limit < 0x67 && (next_tss_desc.type & 8)) || |
2380 | desc_limit < 0x2b)) { | 2473 | desc_limit < 0x2b)) { |
2381 | kvm_queue_exception_e(ctxt->vcpu, TS_VECTOR, | 2474 | emulate_ts(ctxt, tss_selector & 0xfffc); |
2382 | tss_selector & 0xfffc); | ||
2383 | return X86EMUL_PROPAGATE_FAULT; | 2475 | return X86EMUL_PROPAGATE_FAULT; |
2384 | } | 2476 | } |
2385 | 2477 | ||
@@ -2425,7 +2517,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2425 | c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2; | 2517 | c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2; |
2426 | c->lock_prefix = 0; | 2518 | c->lock_prefix = 0; |
2427 | c->src.val = (unsigned long) error_code; | 2519 | c->src.val = (unsigned long) error_code; |
2428 | emulate_push(ctxt); | 2520 | emulate_push(ctxt, ops); |
2429 | } | 2521 | } |
2430 | 2522 | ||
2431 | return ret; | 2523 | return ret; |
@@ -2439,18 +2531,16 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2439 | struct decode_cache *c = &ctxt->decode; | 2531 | struct decode_cache *c = &ctxt->decode; |
2440 | int rc; | 2532 | int rc; |
2441 | 2533 | ||
2442 | memset(c, 0, sizeof(struct decode_cache)); | ||
2443 | c->eip = ctxt->eip; | 2534 | c->eip = ctxt->eip; |
2444 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | ||
2445 | c->dst.type = OP_NONE; | 2535 | c->dst.type = OP_NONE; |
2446 | 2536 | ||
2447 | rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason, | 2537 | rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason, |
2448 | has_error_code, error_code); | 2538 | has_error_code, error_code); |
2449 | 2539 | ||
2450 | if (rc == X86EMUL_CONTINUE) { | 2540 | if (rc == X86EMUL_CONTINUE) { |
2451 | memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); | ||
2452 | kvm_rip_write(ctxt->vcpu, c->eip); | ||
2453 | rc = writeback(ctxt, ops); | 2541 | rc = writeback(ctxt, ops); |
2542 | if (rc == X86EMUL_CONTINUE) | ||
2543 | ctxt->eip = c->eip; | ||
2454 | } | 2544 | } |
2455 | 2545 | ||
2456 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; | 2546 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; |
@@ -2474,29 +2564,22 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
2474 | int rc = X86EMUL_CONTINUE; | 2564 | int rc = X86EMUL_CONTINUE; |
2475 | int saved_dst_type = c->dst.type; | 2565 | int saved_dst_type = c->dst.type; |
2476 | 2566 | ||
2477 | ctxt->interruptibility = 0; | 2567 | ctxt->decode.mem_read.pos = 0; |
2478 | |||
2479 | /* Shadow copy of register state. Committed on successful emulation. | ||
2480 | * NOTE: we can copy them from vcpu as x86_decode_insn() doesn't | ||
2481 | * modify them. | ||
2482 | */ | ||
2483 | |||
2484 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | ||
2485 | 2568 | ||
2486 | if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { | 2569 | if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { |
2487 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 2570 | emulate_ud(ctxt); |
2488 | goto done; | 2571 | goto done; |
2489 | } | 2572 | } |
2490 | 2573 | ||
2491 | /* LOCK prefix is allowed only with some instructions */ | 2574 | /* LOCK prefix is allowed only with some instructions */ |
2492 | if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) { | 2575 | if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) { |
2493 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 2576 | emulate_ud(ctxt); |
2494 | goto done; | 2577 | goto done; |
2495 | } | 2578 | } |
2496 | 2579 | ||
2497 | /* Privileged instruction can be executed only in CPL=0 */ | 2580 | /* Privileged instruction can be executed only in CPL=0 */ |
2498 | if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { | 2581 | if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { |
2499 | kvm_inject_gp(ctxt->vcpu, 0); | 2582 | emulate_gp(ctxt, 0); |
2500 | goto done; | 2583 | goto done; |
2501 | } | 2584 | } |
2502 | 2585 | ||
@@ -2506,7 +2589,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
2506 | if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) { | 2589 | if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) { |
2507 | string_done: | 2590 | string_done: |
2508 | ctxt->restart = false; | 2591 | ctxt->restart = false; |
2509 | kvm_rip_write(ctxt->vcpu, c->eip); | 2592 | ctxt->eip = c->eip; |
2510 | goto done; | 2593 | goto done; |
2511 | } | 2594 | } |
2512 | /* The second termination condition only applies for REPE | 2595 | /* The second termination condition only applies for REPE |
@@ -2529,20 +2612,16 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
2529 | } | 2612 | } |
2530 | 2613 | ||
2531 | if (c->src.type == OP_MEM) { | 2614 | if (c->src.type == OP_MEM) { |
2532 | rc = ops->read_emulated((unsigned long)c->src.ptr, | 2615 | rc = read_emulated(ctxt, ops, (unsigned long)c->src.ptr, |
2533 | &c->src.val, | 2616 | c->src.valptr, c->src.bytes); |
2534 | c->src.bytes, | ||
2535 | ctxt->vcpu); | ||
2536 | if (rc != X86EMUL_CONTINUE) | 2617 | if (rc != X86EMUL_CONTINUE) |
2537 | goto done; | 2618 | goto done; |
2538 | c->src.orig_val = c->src.val; | 2619 | c->src.orig_val = c->src.val; |
2539 | } | 2620 | } |
2540 | 2621 | ||
2541 | if (c->src2.type == OP_MEM) { | 2622 | if (c->src2.type == OP_MEM) { |
2542 | rc = ops->read_emulated((unsigned long)c->src2.ptr, | 2623 | rc = read_emulated(ctxt, ops, (unsigned long)c->src2.ptr, |
2543 | &c->src2.val, | 2624 | &c->src2.val, c->src2.bytes); |
2544 | c->src2.bytes, | ||
2545 | ctxt->vcpu); | ||
2546 | if (rc != X86EMUL_CONTINUE) | 2625 | if (rc != X86EMUL_CONTINUE) |
2547 | goto done; | 2626 | goto done; |
2548 | } | 2627 | } |
@@ -2553,8 +2632,8 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
2553 | 2632 | ||
2554 | if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { | 2633 | if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { |
2555 | /* optimisation - avoid slow emulated read if Mov */ | 2634 | /* optimisation - avoid slow emulated read if Mov */ |
2556 | rc = ops->read_emulated((unsigned long)c->dst.ptr, &c->dst.val, | 2635 | rc = read_emulated(ctxt, ops, (unsigned long)c->dst.ptr, |
2557 | c->dst.bytes, ctxt->vcpu); | 2636 | &c->dst.val, c->dst.bytes); |
2558 | if (rc != X86EMUL_CONTINUE) | 2637 | if (rc != X86EMUL_CONTINUE) |
2559 | goto done; | 2638 | goto done; |
2560 | } | 2639 | } |
@@ -2571,7 +2650,7 @@ special_insn: | |||
2571 | emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); | 2650 | emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); |
2572 | break; | 2651 | break; |
2573 | case 0x06: /* push es */ | 2652 | case 0x06: /* push es */ |
2574 | emulate_push_sreg(ctxt, VCPU_SREG_ES); | 2653 | emulate_push_sreg(ctxt, ops, VCPU_SREG_ES); |
2575 | break; | 2654 | break; |
2576 | case 0x07: /* pop es */ | 2655 | case 0x07: /* pop es */ |
2577 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); | 2656 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); |
@@ -2583,14 +2662,14 @@ special_insn: | |||
2583 | emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); | 2662 | emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); |
2584 | break; | 2663 | break; |
2585 | case 0x0e: /* push cs */ | 2664 | case 0x0e: /* push cs */ |
2586 | emulate_push_sreg(ctxt, VCPU_SREG_CS); | 2665 | emulate_push_sreg(ctxt, ops, VCPU_SREG_CS); |
2587 | break; | 2666 | break; |
2588 | case 0x10 ... 0x15: | 2667 | case 0x10 ... 0x15: |
2589 | adc: /* adc */ | 2668 | adc: /* adc */ |
2590 | emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); | 2669 | emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); |
2591 | break; | 2670 | break; |
2592 | case 0x16: /* push ss */ | 2671 | case 0x16: /* push ss */ |
2593 | emulate_push_sreg(ctxt, VCPU_SREG_SS); | 2672 | emulate_push_sreg(ctxt, ops, VCPU_SREG_SS); |
2594 | break; | 2673 | break; |
2595 | case 0x17: /* pop ss */ | 2674 | case 0x17: /* pop ss */ |
2596 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); | 2675 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); |
@@ -2602,7 +2681,7 @@ special_insn: | |||
2602 | emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); | 2681 | emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); |
2603 | break; | 2682 | break; |
2604 | case 0x1e: /* push ds */ | 2683 | case 0x1e: /* push ds */ |
2605 | emulate_push_sreg(ctxt, VCPU_SREG_DS); | 2684 | emulate_push_sreg(ctxt, ops, VCPU_SREG_DS); |
2606 | break; | 2685 | break; |
2607 | case 0x1f: /* pop ds */ | 2686 | case 0x1f: /* pop ds */ |
2608 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); | 2687 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); |
@@ -2632,7 +2711,7 @@ special_insn: | |||
2632 | emulate_1op("dec", c->dst, ctxt->eflags); | 2711 | emulate_1op("dec", c->dst, ctxt->eflags); |
2633 | break; | 2712 | break; |
2634 | case 0x50 ... 0x57: /* push reg */ | 2713 | case 0x50 ... 0x57: /* push reg */ |
2635 | emulate_push(ctxt); | 2714 | emulate_push(ctxt, ops); |
2636 | break; | 2715 | break; |
2637 | case 0x58 ... 0x5f: /* pop reg */ | 2716 | case 0x58 ... 0x5f: /* pop reg */ |
2638 | pop_instruction: | 2717 | pop_instruction: |
@@ -2641,7 +2720,9 @@ special_insn: | |||
2641 | goto done; | 2720 | goto done; |
2642 | break; | 2721 | break; |
2643 | case 0x60: /* pusha */ | 2722 | case 0x60: /* pusha */ |
2644 | emulate_pusha(ctxt); | 2723 | rc = emulate_pusha(ctxt, ops); |
2724 | if (rc != X86EMUL_CONTINUE) | ||
2725 | goto done; | ||
2645 | break; | 2726 | break; |
2646 | case 0x61: /* popa */ | 2727 | case 0x61: /* popa */ |
2647 | rc = emulate_popa(ctxt, ops); | 2728 | rc = emulate_popa(ctxt, ops); |
@@ -2655,14 +2736,14 @@ special_insn: | |||
2655 | break; | 2736 | break; |
2656 | case 0x68: /* push imm */ | 2737 | case 0x68: /* push imm */ |
2657 | case 0x6a: /* push imm8 */ | 2738 | case 0x6a: /* push imm8 */ |
2658 | emulate_push(ctxt); | 2739 | emulate_push(ctxt, ops); |
2659 | break; | 2740 | break; |
2660 | case 0x6c: /* insb */ | 2741 | case 0x6c: /* insb */ |
2661 | case 0x6d: /* insw/insd */ | 2742 | case 0x6d: /* insw/insd */ |
2662 | c->dst.bytes = min(c->dst.bytes, 4u); | 2743 | c->dst.bytes = min(c->dst.bytes, 4u); |
2663 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], | 2744 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], |
2664 | c->dst.bytes)) { | 2745 | c->dst.bytes)) { |
2665 | kvm_inject_gp(ctxt->vcpu, 0); | 2746 | emulate_gp(ctxt, 0); |
2666 | goto done; | 2747 | goto done; |
2667 | } | 2748 | } |
2668 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, | 2749 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, |
@@ -2674,7 +2755,7 @@ special_insn: | |||
2674 | c->src.bytes = min(c->src.bytes, 4u); | 2755 | c->src.bytes = min(c->src.bytes, 4u); |
2675 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], | 2756 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], |
2676 | c->src.bytes)) { | 2757 | c->src.bytes)) { |
2677 | kvm_inject_gp(ctxt->vcpu, 0); | 2758 | emulate_gp(ctxt, 0); |
2678 | goto done; | 2759 | goto done; |
2679 | } | 2760 | } |
2680 | ops->pio_out_emulated(c->src.bytes, c->regs[VCPU_REGS_RDX], | 2761 | ops->pio_out_emulated(c->src.bytes, c->regs[VCPU_REGS_RDX], |
@@ -2707,6 +2788,7 @@ special_insn: | |||
2707 | } | 2788 | } |
2708 | break; | 2789 | break; |
2709 | case 0x84 ... 0x85: | 2790 | case 0x84 ... 0x85: |
2791 | test: | ||
2710 | emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags); | 2792 | emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags); |
2711 | break; | 2793 | break; |
2712 | case 0x86 ... 0x87: /* xchg */ | 2794 | case 0x86 ... 0x87: /* xchg */ |
@@ -2735,18 +2817,13 @@ special_insn: | |||
2735 | break; | 2817 | break; |
2736 | case 0x88 ... 0x8b: /* mov */ | 2818 | case 0x88 ... 0x8b: /* mov */ |
2737 | goto mov; | 2819 | goto mov; |
2738 | case 0x8c: { /* mov r/m, sreg */ | 2820 | case 0x8c: /* mov r/m, sreg */ |
2739 | struct kvm_segment segreg; | 2821 | if (c->modrm_reg > VCPU_SREG_GS) { |
2740 | 2822 | emulate_ud(ctxt); | |
2741 | if (c->modrm_reg <= VCPU_SREG_GS) | ||
2742 | kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg); | ||
2743 | else { | ||
2744 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | ||
2745 | goto done; | 2823 | goto done; |
2746 | } | 2824 | } |
2747 | c->dst.val = segreg.selector; | 2825 | c->dst.val = ops->get_segment_selector(c->modrm_reg, ctxt->vcpu); |
2748 | break; | 2826 | break; |
2749 | } | ||
2750 | case 0x8d: /* lea r16/r32, m */ | 2827 | case 0x8d: /* lea r16/r32, m */ |
2751 | c->dst.val = c->modrm_ea; | 2828 | c->dst.val = c->modrm_ea; |
2752 | break; | 2829 | break; |
@@ -2757,12 +2834,12 @@ special_insn: | |||
2757 | 2834 | ||
2758 | if (c->modrm_reg == VCPU_SREG_CS || | 2835 | if (c->modrm_reg == VCPU_SREG_CS || |
2759 | c->modrm_reg > VCPU_SREG_GS) { | 2836 | c->modrm_reg > VCPU_SREG_GS) { |
2760 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 2837 | emulate_ud(ctxt); |
2761 | goto done; | 2838 | goto done; |
2762 | } | 2839 | } |
2763 | 2840 | ||
2764 | if (c->modrm_reg == VCPU_SREG_SS) | 2841 | if (c->modrm_reg == VCPU_SREG_SS) |
2765 | toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_MOV_SS); | 2842 | ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS; |
2766 | 2843 | ||
2767 | rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg); | 2844 | rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg); |
2768 | 2845 | ||
@@ -2775,19 +2852,19 @@ special_insn: | |||
2775 | goto done; | 2852 | goto done; |
2776 | break; | 2853 | break; |
2777 | case 0x90: /* nop / xchg r8,rax */ | 2854 | case 0x90: /* nop / xchg r8,rax */ |
2778 | if (!(c->rex_prefix & 1)) { /* nop */ | 2855 | if (c->dst.ptr == (unsigned long *)&c->regs[VCPU_REGS_RAX]) { |
2779 | c->dst.type = OP_NONE; | 2856 | c->dst.type = OP_NONE; /* nop */ |
2780 | break; | 2857 | break; |
2781 | } | 2858 | } |
2782 | case 0x91 ... 0x97: /* xchg reg,rax */ | 2859 | case 0x91 ... 0x97: /* xchg reg,rax */ |
2783 | c->src.type = c->dst.type = OP_REG; | 2860 | c->src.type = OP_REG; |
2784 | c->src.bytes = c->dst.bytes = c->op_bytes; | 2861 | c->src.bytes = c->op_bytes; |
2785 | c->src.ptr = (unsigned long *) &c->regs[VCPU_REGS_RAX]; | 2862 | c->src.ptr = (unsigned long *) &c->regs[VCPU_REGS_RAX]; |
2786 | c->src.val = *(c->src.ptr); | 2863 | c->src.val = *(c->src.ptr); |
2787 | goto xchg; | 2864 | goto xchg; |
2788 | case 0x9c: /* pushf */ | 2865 | case 0x9c: /* pushf */ |
2789 | c->src.val = (unsigned long) ctxt->eflags; | 2866 | c->src.val = (unsigned long) ctxt->eflags; |
2790 | emulate_push(ctxt); | 2867 | emulate_push(ctxt, ops); |
2791 | break; | 2868 | break; |
2792 | case 0x9d: /* popf */ | 2869 | case 0x9d: /* popf */ |
2793 | c->dst.type = OP_REG; | 2870 | c->dst.type = OP_REG; |
@@ -2797,19 +2874,15 @@ special_insn: | |||
2797 | if (rc != X86EMUL_CONTINUE) | 2874 | if (rc != X86EMUL_CONTINUE) |
2798 | goto done; | 2875 | goto done; |
2799 | break; | 2876 | break; |
2800 | case 0xa0 ... 0xa1: /* mov */ | 2877 | case 0xa0 ... 0xa3: /* mov */ |
2801 | c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; | ||
2802 | c->dst.val = c->src.val; | ||
2803 | break; | ||
2804 | case 0xa2 ... 0xa3: /* mov */ | ||
2805 | c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX]; | ||
2806 | break; | ||
2807 | case 0xa4 ... 0xa5: /* movs */ | 2878 | case 0xa4 ... 0xa5: /* movs */ |
2808 | goto mov; | 2879 | goto mov; |
2809 | case 0xa6 ... 0xa7: /* cmps */ | 2880 | case 0xa6 ... 0xa7: /* cmps */ |
2810 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2881 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2811 | DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); | 2882 | DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); |
2812 | goto cmp; | 2883 | goto cmp; |
2884 | case 0xa8 ... 0xa9: /* test ax, imm */ | ||
2885 | goto test; | ||
2813 | case 0xaa ... 0xab: /* stos */ | 2886 | case 0xaa ... 0xab: /* stos */ |
2814 | c->dst.val = c->regs[VCPU_REGS_RAX]; | 2887 | c->dst.val = c->regs[VCPU_REGS_RAX]; |
2815 | break; | 2888 | break; |
@@ -2855,19 +2928,23 @@ special_insn: | |||
2855 | long int rel = c->src.val; | 2928 | long int rel = c->src.val; |
2856 | c->src.val = (unsigned long) c->eip; | 2929 | c->src.val = (unsigned long) c->eip; |
2857 | jmp_rel(c, rel); | 2930 | jmp_rel(c, rel); |
2858 | emulate_push(ctxt); | 2931 | emulate_push(ctxt, ops); |
2859 | break; | 2932 | break; |
2860 | } | 2933 | } |
2861 | case 0xe9: /* jmp rel */ | 2934 | case 0xe9: /* jmp rel */ |
2862 | goto jmp; | 2935 | goto jmp; |
2863 | case 0xea: /* jmp far */ | 2936 | case 0xea: { /* jmp far */ |
2937 | unsigned short sel; | ||
2864 | jump_far: | 2938 | jump_far: |
2865 | if (load_segment_descriptor(ctxt, ops, c->src2.val, | 2939 | memcpy(&sel, c->src.valptr + c->op_bytes, 2); |
2866 | VCPU_SREG_CS)) | 2940 | |
2941 | if (load_segment_descriptor(ctxt, ops, sel, VCPU_SREG_CS)) | ||
2867 | goto done; | 2942 | goto done; |
2868 | 2943 | ||
2869 | c->eip = c->src.val; | 2944 | c->eip = 0; |
2945 | memcpy(&c->eip, c->src.valptr, c->op_bytes); | ||
2870 | break; | 2946 | break; |
2947 | } | ||
2871 | case 0xeb: | 2948 | case 0xeb: |
2872 | jmp: /* jmp rel short */ | 2949 | jmp: /* jmp rel short */ |
2873 | jmp_rel(c, c->src.val); | 2950 | jmp_rel(c, c->src.val); |
@@ -2879,20 +2956,20 @@ special_insn: | |||
2879 | do_io_in: | 2956 | do_io_in: |
2880 | c->dst.bytes = min(c->dst.bytes, 4u); | 2957 | c->dst.bytes = min(c->dst.bytes, 4u); |
2881 | if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { | 2958 | if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { |
2882 | kvm_inject_gp(ctxt->vcpu, 0); | 2959 | emulate_gp(ctxt, 0); |
2883 | goto done; | 2960 | goto done; |
2884 | } | 2961 | } |
2885 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, | 2962 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, |
2886 | &c->dst.val)) | 2963 | &c->dst.val)) |
2887 | goto done; /* IO is needed */ | 2964 | goto done; /* IO is needed */ |
2888 | break; | 2965 | break; |
2889 | case 0xee: /* out al,dx */ | 2966 | case 0xee: /* out dx,al */ |
2890 | case 0xef: /* out (e/r)ax,dx */ | 2967 | case 0xef: /* out dx,(e/r)ax */ |
2891 | c->src.val = c->regs[VCPU_REGS_RDX]; | 2968 | c->src.val = c->regs[VCPU_REGS_RDX]; |
2892 | do_io_out: | 2969 | do_io_out: |
2893 | c->dst.bytes = min(c->dst.bytes, 4u); | 2970 | c->dst.bytes = min(c->dst.bytes, 4u); |
2894 | if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { | 2971 | if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { |
2895 | kvm_inject_gp(ctxt->vcpu, 0); | 2972 | emulate_gp(ctxt, 0); |
2896 | goto done; | 2973 | goto done; |
2897 | } | 2974 | } |
2898 | ops->pio_out_emulated(c->dst.bytes, c->src.val, &c->dst.val, 1, | 2975 | ops->pio_out_emulated(c->dst.bytes, c->src.val, &c->dst.val, 1, |
@@ -2916,18 +2993,20 @@ special_insn: | |||
2916 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2993 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2917 | break; | 2994 | break; |
2918 | case 0xfa: /* cli */ | 2995 | case 0xfa: /* cli */ |
2919 | if (emulator_bad_iopl(ctxt, ops)) | 2996 | if (emulator_bad_iopl(ctxt, ops)) { |
2920 | kvm_inject_gp(ctxt->vcpu, 0); | 2997 | emulate_gp(ctxt, 0); |
2921 | else { | 2998 | goto done; |
2999 | } else { | ||
2922 | ctxt->eflags &= ~X86_EFLAGS_IF; | 3000 | ctxt->eflags &= ~X86_EFLAGS_IF; |
2923 | c->dst.type = OP_NONE; /* Disable writeback. */ | 3001 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2924 | } | 3002 | } |
2925 | break; | 3003 | break; |
2926 | case 0xfb: /* sti */ | 3004 | case 0xfb: /* sti */ |
2927 | if (emulator_bad_iopl(ctxt, ops)) | 3005 | if (emulator_bad_iopl(ctxt, ops)) { |
2928 | kvm_inject_gp(ctxt->vcpu, 0); | 3006 | emulate_gp(ctxt, 0); |
2929 | else { | 3007 | goto done; |
2930 | toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_STI); | 3008 | } else { |
3009 | ctxt->interruptibility = KVM_X86_SHADOW_INT_STI; | ||
2931 | ctxt->eflags |= X86_EFLAGS_IF; | 3010 | ctxt->eflags |= X86_EFLAGS_IF; |
2932 | c->dst.type = OP_NONE; /* Disable writeback. */ | 3011 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2933 | } | 3012 | } |
@@ -2964,11 +3043,12 @@ writeback: | |||
2964 | c->dst.type = saved_dst_type; | 3043 | c->dst.type = saved_dst_type; |
2965 | 3044 | ||
2966 | if ((c->d & SrcMask) == SrcSI) | 3045 | if ((c->d & SrcMask) == SrcSI) |
2967 | string_addr_inc(ctxt, seg_override_base(ctxt, c), VCPU_REGS_RSI, | 3046 | string_addr_inc(ctxt, seg_override_base(ctxt, ops, c), |
2968 | &c->src); | 3047 | VCPU_REGS_RSI, &c->src); |
2969 | 3048 | ||
2970 | if ((c->d & DstMask) == DstDI) | 3049 | if ((c->d & DstMask) == DstDI) |
2971 | string_addr_inc(ctxt, es_base(ctxt), VCPU_REGS_RDI, &c->dst); | 3050 | string_addr_inc(ctxt, es_base(ctxt, ops), VCPU_REGS_RDI, |
3051 | &c->dst); | ||
2972 | 3052 | ||
2973 | if (c->rep_prefix && (c->d & String)) { | 3053 | if (c->rep_prefix && (c->d & String)) { |
2974 | struct read_cache *rc = &ctxt->decode.io_read; | 3054 | struct read_cache *rc = &ctxt->decode.io_read; |
@@ -2981,11 +3061,12 @@ writeback: | |||
2981 | (rc->end != 0 && rc->end == rc->pos)) | 3061 | (rc->end != 0 && rc->end == rc->pos)) |
2982 | ctxt->restart = false; | 3062 | ctxt->restart = false; |
2983 | } | 3063 | } |
2984 | 3064 | /* | |
2985 | /* Commit shadow register state. */ | 3065 | * reset read cache here in case string instruction is restared |
2986 | memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); | 3066 | * without decoding |
2987 | kvm_rip_write(ctxt->vcpu, c->eip); | 3067 | */ |
2988 | ops->set_rflags(ctxt->vcpu, ctxt->eflags); | 3068 | ctxt->decode.mem_read.end = 0; |
3069 | ctxt->eip = c->eip; | ||
2989 | 3070 | ||
2990 | done: | 3071 | done: |
2991 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; | 3072 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; |
@@ -3051,7 +3132,7 @@ twobyte_insn: | |||
3051 | c->dst.type = OP_NONE; | 3132 | c->dst.type = OP_NONE; |
3052 | break; | 3133 | break; |
3053 | case 5: /* not defined */ | 3134 | case 5: /* not defined */ |
3054 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 3135 | emulate_ud(ctxt); |
3055 | goto done; | 3136 | goto done; |
3056 | case 7: /* invlpg*/ | 3137 | case 7: /* invlpg*/ |
3057 | emulate_invlpg(ctxt->vcpu, c->modrm_ea); | 3138 | emulate_invlpg(ctxt->vcpu, c->modrm_ea); |
@@ -3063,7 +3144,7 @@ twobyte_insn: | |||
3063 | } | 3144 | } |
3064 | break; | 3145 | break; |
3065 | case 0x05: /* syscall */ | 3146 | case 0x05: /* syscall */ |
3066 | rc = emulate_syscall(ctxt); | 3147 | rc = emulate_syscall(ctxt, ops); |
3067 | if (rc != X86EMUL_CONTINUE) | 3148 | if (rc != X86EMUL_CONTINUE) |
3068 | goto done; | 3149 | goto done; |
3069 | else | 3150 | else |
@@ -3073,8 +3154,11 @@ twobyte_insn: | |||
3073 | emulate_clts(ctxt->vcpu); | 3154 | emulate_clts(ctxt->vcpu); |
3074 | c->dst.type = OP_NONE; | 3155 | c->dst.type = OP_NONE; |
3075 | break; | 3156 | break; |
3076 | case 0x08: /* invd */ | ||
3077 | case 0x09: /* wbinvd */ | 3157 | case 0x09: /* wbinvd */ |
3158 | kvm_emulate_wbinvd(ctxt->vcpu); | ||
3159 | c->dst.type = OP_NONE; | ||
3160 | break; | ||
3161 | case 0x08: /* invd */ | ||
3078 | case 0x0d: /* GrpP (prefetch) */ | 3162 | case 0x0d: /* GrpP (prefetch) */ |
3079 | case 0x18: /* Grp16 (prefetch/nop) */ | 3163 | case 0x18: /* Grp16 (prefetch/nop) */ |
3080 | c->dst.type = OP_NONE; | 3164 | c->dst.type = OP_NONE; |
@@ -3084,7 +3168,7 @@ twobyte_insn: | |||
3084 | case 1: | 3168 | case 1: |
3085 | case 5 ... 7: | 3169 | case 5 ... 7: |
3086 | case 9 ... 15: | 3170 | case 9 ... 15: |
3087 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 3171 | emulate_ud(ctxt); |
3088 | goto done; | 3172 | goto done; |
3089 | } | 3173 | } |
3090 | c->regs[c->modrm_rm] = ops->get_cr(c->modrm_reg, ctxt->vcpu); | 3174 | c->regs[c->modrm_rm] = ops->get_cr(c->modrm_reg, ctxt->vcpu); |
@@ -3093,31 +3177,42 @@ twobyte_insn: | |||
3093 | case 0x21: /* mov from dr to reg */ | 3177 | case 0x21: /* mov from dr to reg */ |
3094 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && | 3178 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && |
3095 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { | 3179 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { |
3096 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 3180 | emulate_ud(ctxt); |
3097 | goto done; | 3181 | goto done; |
3098 | } | 3182 | } |
3099 | emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]); | 3183 | ops->get_dr(c->modrm_reg, &c->regs[c->modrm_rm], ctxt->vcpu); |
3100 | c->dst.type = OP_NONE; /* no writeback */ | 3184 | c->dst.type = OP_NONE; /* no writeback */ |
3101 | break; | 3185 | break; |
3102 | case 0x22: /* mov reg, cr */ | 3186 | case 0x22: /* mov reg, cr */ |
3103 | ops->set_cr(c->modrm_reg, c->modrm_val, ctxt->vcpu); | 3187 | if (ops->set_cr(c->modrm_reg, c->modrm_val, ctxt->vcpu)) { |
3188 | emulate_gp(ctxt, 0); | ||
3189 | goto done; | ||
3190 | } | ||
3104 | c->dst.type = OP_NONE; | 3191 | c->dst.type = OP_NONE; |
3105 | break; | 3192 | break; |
3106 | case 0x23: /* mov from reg to dr */ | 3193 | case 0x23: /* mov from reg to dr */ |
3107 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && | 3194 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && |
3108 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { | 3195 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { |
3109 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 3196 | emulate_ud(ctxt); |
3197 | goto done; | ||
3198 | } | ||
3199 | |||
3200 | if (ops->set_dr(c->modrm_reg, c->regs[c->modrm_rm] & | ||
3201 | ((ctxt->mode == X86EMUL_MODE_PROT64) ? | ||
3202 | ~0ULL : ~0U), ctxt->vcpu) < 0) { | ||
3203 | /* #UD condition is already handled by the code above */ | ||
3204 | emulate_gp(ctxt, 0); | ||
3110 | goto done; | 3205 | goto done; |
3111 | } | 3206 | } |
3112 | emulator_set_dr(ctxt, c->modrm_reg, c->regs[c->modrm_rm]); | 3207 | |
3113 | c->dst.type = OP_NONE; /* no writeback */ | 3208 | c->dst.type = OP_NONE; /* no writeback */ |
3114 | break; | 3209 | break; |
3115 | case 0x30: | 3210 | case 0x30: |
3116 | /* wrmsr */ | 3211 | /* wrmsr */ |
3117 | msr_data = (u32)c->regs[VCPU_REGS_RAX] | 3212 | msr_data = (u32)c->regs[VCPU_REGS_RAX] |
3118 | | ((u64)c->regs[VCPU_REGS_RDX] << 32); | 3213 | | ((u64)c->regs[VCPU_REGS_RDX] << 32); |
3119 | if (kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { | 3214 | if (ops->set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { |
3120 | kvm_inject_gp(ctxt->vcpu, 0); | 3215 | emulate_gp(ctxt, 0); |
3121 | goto done; | 3216 | goto done; |
3122 | } | 3217 | } |
3123 | rc = X86EMUL_CONTINUE; | 3218 | rc = X86EMUL_CONTINUE; |
@@ -3125,8 +3220,8 @@ twobyte_insn: | |||
3125 | break; | 3220 | break; |
3126 | case 0x32: | 3221 | case 0x32: |
3127 | /* rdmsr */ | 3222 | /* rdmsr */ |
3128 | if (kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { | 3223 | if (ops->get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { |
3129 | kvm_inject_gp(ctxt->vcpu, 0); | 3224 | emulate_gp(ctxt, 0); |
3130 | goto done; | 3225 | goto done; |
3131 | } else { | 3226 | } else { |
3132 | c->regs[VCPU_REGS_RAX] = (u32)msr_data; | 3227 | c->regs[VCPU_REGS_RAX] = (u32)msr_data; |
@@ -3136,14 +3231,14 @@ twobyte_insn: | |||
3136 | c->dst.type = OP_NONE; | 3231 | c->dst.type = OP_NONE; |
3137 | break; | 3232 | break; |
3138 | case 0x34: /* sysenter */ | 3233 | case 0x34: /* sysenter */ |
3139 | rc = emulate_sysenter(ctxt); | 3234 | rc = emulate_sysenter(ctxt, ops); |
3140 | if (rc != X86EMUL_CONTINUE) | 3235 | if (rc != X86EMUL_CONTINUE) |
3141 | goto done; | 3236 | goto done; |
3142 | else | 3237 | else |
3143 | goto writeback; | 3238 | goto writeback; |
3144 | break; | 3239 | break; |
3145 | case 0x35: /* sysexit */ | 3240 | case 0x35: /* sysexit */ |
3146 | rc = emulate_sysexit(ctxt); | 3241 | rc = emulate_sysexit(ctxt, ops); |
3147 | if (rc != X86EMUL_CONTINUE) | 3242 | if (rc != X86EMUL_CONTINUE) |
3148 | goto done; | 3243 | goto done; |
3149 | else | 3244 | else |
@@ -3160,7 +3255,7 @@ twobyte_insn: | |||
3160 | c->dst.type = OP_NONE; | 3255 | c->dst.type = OP_NONE; |
3161 | break; | 3256 | break; |
3162 | case 0xa0: /* push fs */ | 3257 | case 0xa0: /* push fs */ |
3163 | emulate_push_sreg(ctxt, VCPU_SREG_FS); | 3258 | emulate_push_sreg(ctxt, ops, VCPU_SREG_FS); |
3164 | break; | 3259 | break; |
3165 | case 0xa1: /* pop fs */ | 3260 | case 0xa1: /* pop fs */ |
3166 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); | 3261 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); |
@@ -3179,7 +3274,7 @@ twobyte_insn: | |||
3179 | emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); | 3274 | emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); |
3180 | break; | 3275 | break; |
3181 | case 0xa8: /* push gs */ | 3276 | case 0xa8: /* push gs */ |
3182 | emulate_push_sreg(ctxt, VCPU_SREG_GS); | 3277 | emulate_push_sreg(ctxt, ops, VCPU_SREG_GS); |
3183 | break; | 3278 | break; |
3184 | case 0xa9: /* pop gs */ | 3279 | case 0xa9: /* pop gs */ |
3185 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); | 3280 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 0150affad25d..0fd6378981f4 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -5,6 +5,7 @@ | |||
5 | * Copyright (c) 2006 Intel Corporation | 5 | * Copyright (c) 2006 Intel Corporation |
6 | * Copyright (c) 2007 Keir Fraser, XenSource Inc | 6 | * Copyright (c) 2007 Keir Fraser, XenSource Inc |
7 | * Copyright (c) 2008 Intel Corporation | 7 | * Copyright (c) 2008 Intel Corporation |
8 | * Copyright 2009 Red Hat, Inc. and/or its affilates. | ||
8 | * | 9 | * |
9 | * Permission is hereby granted, free of charge, to any person obtaining a copy | 10 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
10 | * of this software and associated documentation files (the "Software"), to deal | 11 | * of this software and associated documentation files (the "Software"), to deal |
@@ -33,6 +34,7 @@ | |||
33 | 34 | ||
34 | #include <linux/kvm_host.h> | 35 | #include <linux/kvm_host.h> |
35 | #include <linux/slab.h> | 36 | #include <linux/slab.h> |
37 | #include <linux/workqueue.h> | ||
36 | 38 | ||
37 | #include "irq.h" | 39 | #include "irq.h" |
38 | #include "i8254.h" | 40 | #include "i8254.h" |
@@ -243,11 +245,22 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian) | |||
243 | { | 245 | { |
244 | struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state, | 246 | struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state, |
245 | irq_ack_notifier); | 247 | irq_ack_notifier); |
246 | raw_spin_lock(&ps->inject_lock); | 248 | int value; |
247 | if (atomic_dec_return(&ps->pit_timer.pending) < 0) | 249 | |
250 | spin_lock(&ps->inject_lock); | ||
251 | value = atomic_dec_return(&ps->pit_timer.pending); | ||
252 | if (value < 0) | ||
253 | /* spurious acks can be generated if, for example, the | ||
254 | * PIC is being reset. Handle it gracefully here | ||
255 | */ | ||
248 | atomic_inc(&ps->pit_timer.pending); | 256 | atomic_inc(&ps->pit_timer.pending); |
257 | else if (value > 0) | ||
258 | /* in this case, we had multiple outstanding pit interrupts | ||
259 | * that we needed to inject. Reinject | ||
260 | */ | ||
261 | queue_work(ps->pit->wq, &ps->pit->expired); | ||
249 | ps->irq_ack = 1; | 262 | ps->irq_ack = 1; |
250 | raw_spin_unlock(&ps->inject_lock); | 263 | spin_unlock(&ps->inject_lock); |
251 | } | 264 | } |
252 | 265 | ||
253 | void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) | 266 | void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) |
@@ -263,10 +276,10 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) | |||
263 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); | 276 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); |
264 | } | 277 | } |
265 | 278 | ||
266 | static void destroy_pit_timer(struct kvm_timer *pt) | 279 | static void destroy_pit_timer(struct kvm_pit *pit) |
267 | { | 280 | { |
268 | pr_debug("execute del timer!\n"); | 281 | hrtimer_cancel(&pit->pit_state.pit_timer.timer); |
269 | hrtimer_cancel(&pt->timer); | 282 | cancel_work_sync(&pit->expired); |
270 | } | 283 | } |
271 | 284 | ||
272 | static bool kpit_is_periodic(struct kvm_timer *ktimer) | 285 | static bool kpit_is_periodic(struct kvm_timer *ktimer) |
@@ -280,6 +293,60 @@ static struct kvm_timer_ops kpit_ops = { | |||
280 | .is_periodic = kpit_is_periodic, | 293 | .is_periodic = kpit_is_periodic, |
281 | }; | 294 | }; |
282 | 295 | ||
296 | static void pit_do_work(struct work_struct *work) | ||
297 | { | ||
298 | struct kvm_pit *pit = container_of(work, struct kvm_pit, expired); | ||
299 | struct kvm *kvm = pit->kvm; | ||
300 | struct kvm_vcpu *vcpu; | ||
301 | int i; | ||
302 | struct kvm_kpit_state *ps = &pit->pit_state; | ||
303 | int inject = 0; | ||
304 | |||
305 | /* Try to inject pending interrupts when | ||
306 | * last one has been acked. | ||
307 | */ | ||
308 | spin_lock(&ps->inject_lock); | ||
309 | if (ps->irq_ack) { | ||
310 | ps->irq_ack = 0; | ||
311 | inject = 1; | ||
312 | } | ||
313 | spin_unlock(&ps->inject_lock); | ||
314 | if (inject) { | ||
315 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); | ||
316 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); | ||
317 | |||
318 | /* | ||
319 | * Provides NMI watchdog support via Virtual Wire mode. | ||
320 | * The route is: PIT -> PIC -> LVT0 in NMI mode. | ||
321 | * | ||
322 | * Note: Our Virtual Wire implementation is simplified, only | ||
323 | * propagating PIT interrupts to all VCPUs when they have set | ||
324 | * LVT0 to NMI delivery. Other PIC interrupts are just sent to | ||
325 | * VCPU0, and only if its LVT0 is in EXTINT mode. | ||
326 | */ | ||
327 | if (kvm->arch.vapics_in_nmi_mode > 0) | ||
328 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
329 | kvm_apic_nmi_wd_deliver(vcpu); | ||
330 | } | ||
331 | } | ||
332 | |||
333 | static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) | ||
334 | { | ||
335 | struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); | ||
336 | struct kvm_pit *pt = ktimer->kvm->arch.vpit; | ||
337 | |||
338 | if (ktimer->reinject || !atomic_read(&ktimer->pending)) { | ||
339 | atomic_inc(&ktimer->pending); | ||
340 | queue_work(pt->wq, &pt->expired); | ||
341 | } | ||
342 | |||
343 | if (ktimer->t_ops->is_periodic(ktimer)) { | ||
344 | hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); | ||
345 | return HRTIMER_RESTART; | ||
346 | } else | ||
347 | return HRTIMER_NORESTART; | ||
348 | } | ||
349 | |||
283 | static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) | 350 | static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) |
284 | { | 351 | { |
285 | struct kvm_timer *pt = &ps->pit_timer; | 352 | struct kvm_timer *pt = &ps->pit_timer; |
@@ -291,13 +358,13 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) | |||
291 | 358 | ||
292 | /* TODO The new value only affected after the retriggered */ | 359 | /* TODO The new value only affected after the retriggered */ |
293 | hrtimer_cancel(&pt->timer); | 360 | hrtimer_cancel(&pt->timer); |
361 | cancel_work_sync(&ps->pit->expired); | ||
294 | pt->period = interval; | 362 | pt->period = interval; |
295 | ps->is_periodic = is_period; | 363 | ps->is_periodic = is_period; |
296 | 364 | ||
297 | pt->timer.function = kvm_timer_fn; | 365 | pt->timer.function = pit_timer_fn; |
298 | pt->t_ops = &kpit_ops; | 366 | pt->t_ops = &kpit_ops; |
299 | pt->kvm = ps->pit->kvm; | 367 | pt->kvm = ps->pit->kvm; |
300 | pt->vcpu = pt->kvm->bsp_vcpu; | ||
301 | 368 | ||
302 | atomic_set(&pt->pending, 0); | 369 | atomic_set(&pt->pending, 0); |
303 | ps->irq_ack = 1; | 370 | ps->irq_ack = 1; |
@@ -346,7 +413,7 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val) | |||
346 | } | 413 | } |
347 | break; | 414 | break; |
348 | default: | 415 | default: |
349 | destroy_pit_timer(&ps->pit_timer); | 416 | destroy_pit_timer(kvm->arch.vpit); |
350 | } | 417 | } |
351 | } | 418 | } |
352 | 419 | ||
@@ -625,7 +692,15 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) | |||
625 | 692 | ||
626 | mutex_init(&pit->pit_state.lock); | 693 | mutex_init(&pit->pit_state.lock); |
627 | mutex_lock(&pit->pit_state.lock); | 694 | mutex_lock(&pit->pit_state.lock); |
628 | raw_spin_lock_init(&pit->pit_state.inject_lock); | 695 | spin_lock_init(&pit->pit_state.inject_lock); |
696 | |||
697 | pit->wq = create_singlethread_workqueue("kvm-pit-wq"); | ||
698 | if (!pit->wq) { | ||
699 | mutex_unlock(&pit->pit_state.lock); | ||
700 | kfree(pit); | ||
701 | return NULL; | ||
702 | } | ||
703 | INIT_WORK(&pit->expired, pit_do_work); | ||
629 | 704 | ||
630 | kvm->arch.vpit = pit; | 705 | kvm->arch.vpit = pit; |
631 | pit->kvm = kvm; | 706 | pit->kvm = kvm; |
@@ -677,6 +752,9 @@ void kvm_free_pit(struct kvm *kvm) | |||
677 | struct hrtimer *timer; | 752 | struct hrtimer *timer; |
678 | 753 | ||
679 | if (kvm->arch.vpit) { | 754 | if (kvm->arch.vpit) { |
755 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &kvm->arch.vpit->dev); | ||
756 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, | ||
757 | &kvm->arch.vpit->speaker_dev); | ||
680 | kvm_unregister_irq_mask_notifier(kvm, 0, | 758 | kvm_unregister_irq_mask_notifier(kvm, 0, |
681 | &kvm->arch.vpit->mask_notifier); | 759 | &kvm->arch.vpit->mask_notifier); |
682 | kvm_unregister_irq_ack_notifier(kvm, | 760 | kvm_unregister_irq_ack_notifier(kvm, |
@@ -684,54 +762,10 @@ void kvm_free_pit(struct kvm *kvm) | |||
684 | mutex_lock(&kvm->arch.vpit->pit_state.lock); | 762 | mutex_lock(&kvm->arch.vpit->pit_state.lock); |
685 | timer = &kvm->arch.vpit->pit_state.pit_timer.timer; | 763 | timer = &kvm->arch.vpit->pit_state.pit_timer.timer; |
686 | hrtimer_cancel(timer); | 764 | hrtimer_cancel(timer); |
765 | cancel_work_sync(&kvm->arch.vpit->expired); | ||
687 | kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id); | 766 | kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id); |
688 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); | 767 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); |
768 | destroy_workqueue(kvm->arch.vpit->wq); | ||
689 | kfree(kvm->arch.vpit); | 769 | kfree(kvm->arch.vpit); |
690 | } | 770 | } |
691 | } | 771 | } |
692 | |||
693 | static void __inject_pit_timer_intr(struct kvm *kvm) | ||
694 | { | ||
695 | struct kvm_vcpu *vcpu; | ||
696 | int i; | ||
697 | |||
698 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); | ||
699 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); | ||
700 | |||
701 | /* | ||
702 | * Provides NMI watchdog support via Virtual Wire mode. | ||
703 | * The route is: PIT -> PIC -> LVT0 in NMI mode. | ||
704 | * | ||
705 | * Note: Our Virtual Wire implementation is simplified, only | ||
706 | * propagating PIT interrupts to all VCPUs when they have set | ||
707 | * LVT0 to NMI delivery. Other PIC interrupts are just sent to | ||
708 | * VCPU0, and only if its LVT0 is in EXTINT mode. | ||
709 | */ | ||
710 | if (kvm->arch.vapics_in_nmi_mode > 0) | ||
711 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
712 | kvm_apic_nmi_wd_deliver(vcpu); | ||
713 | } | ||
714 | |||
715 | void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu) | ||
716 | { | ||
717 | struct kvm_pit *pit = vcpu->kvm->arch.vpit; | ||
718 | struct kvm *kvm = vcpu->kvm; | ||
719 | struct kvm_kpit_state *ps; | ||
720 | |||
721 | if (pit) { | ||
722 | int inject = 0; | ||
723 | ps = &pit->pit_state; | ||
724 | |||
725 | /* Try to inject pending interrupts when | ||
726 | * last one has been acked. | ||
727 | */ | ||
728 | raw_spin_lock(&ps->inject_lock); | ||
729 | if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) { | ||
730 | ps->irq_ack = 0; | ||
731 | inject = 1; | ||
732 | } | ||
733 | raw_spin_unlock(&ps->inject_lock); | ||
734 | if (inject) | ||
735 | __inject_pit_timer_intr(kvm); | ||
736 | } | ||
737 | } | ||
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index 900d6b0ba7c2..46d08ca0b48f 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h | |||
@@ -27,7 +27,7 @@ struct kvm_kpit_state { | |||
27 | u32 speaker_data_on; | 27 | u32 speaker_data_on; |
28 | struct mutex lock; | 28 | struct mutex lock; |
29 | struct kvm_pit *pit; | 29 | struct kvm_pit *pit; |
30 | raw_spinlock_t inject_lock; | 30 | spinlock_t inject_lock; |
31 | unsigned long irq_ack; | 31 | unsigned long irq_ack; |
32 | struct kvm_irq_ack_notifier irq_ack_notifier; | 32 | struct kvm_irq_ack_notifier irq_ack_notifier; |
33 | }; | 33 | }; |
@@ -40,6 +40,8 @@ struct kvm_pit { | |||
40 | struct kvm_kpit_state pit_state; | 40 | struct kvm_kpit_state pit_state; |
41 | int irq_source_id; | 41 | int irq_source_id; |
42 | struct kvm_irq_mask_notifier mask_notifier; | 42 | struct kvm_irq_mask_notifier mask_notifier; |
43 | struct workqueue_struct *wq; | ||
44 | struct work_struct expired; | ||
43 | }; | 45 | }; |
44 | 46 | ||
45 | #define KVM_PIT_BASE_ADDRESS 0x40 | 47 | #define KVM_PIT_BASE_ADDRESS 0x40 |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 93825ff3338f..8d10c063d7f2 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -3,6 +3,7 @@ | |||
3 | * | 3 | * |
4 | * Copyright (c) 2003-2004 Fabrice Bellard | 4 | * Copyright (c) 2003-2004 Fabrice Bellard |
5 | * Copyright (c) 2007 Intel Corporation | 5 | * Copyright (c) 2007 Intel Corporation |
6 | * Copyright 2009 Red Hat, Inc. and/or its affilates. | ||
6 | * | 7 | * |
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy | 8 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
8 | * of this software and associated documentation files (the "Software"), to deal | 9 | * of this software and associated documentation files (the "Software"), to deal |
@@ -33,6 +34,8 @@ | |||
33 | #include <linux/kvm_host.h> | 34 | #include <linux/kvm_host.h> |
34 | #include "trace.h" | 35 | #include "trace.h" |
35 | 36 | ||
37 | static void pic_irq_request(struct kvm *kvm, int level); | ||
38 | |||
36 | static void pic_lock(struct kvm_pic *s) | 39 | static void pic_lock(struct kvm_pic *s) |
37 | __acquires(&s->lock) | 40 | __acquires(&s->lock) |
38 | { | 41 | { |
@@ -43,16 +46,25 @@ static void pic_unlock(struct kvm_pic *s) | |||
43 | __releases(&s->lock) | 46 | __releases(&s->lock) |
44 | { | 47 | { |
45 | bool wakeup = s->wakeup_needed; | 48 | bool wakeup = s->wakeup_needed; |
46 | struct kvm_vcpu *vcpu; | 49 | struct kvm_vcpu *vcpu, *found = NULL; |
50 | int i; | ||
47 | 51 | ||
48 | s->wakeup_needed = false; | 52 | s->wakeup_needed = false; |
49 | 53 | ||
50 | raw_spin_unlock(&s->lock); | 54 | raw_spin_unlock(&s->lock); |
51 | 55 | ||
52 | if (wakeup) { | 56 | if (wakeup) { |
53 | vcpu = s->kvm->bsp_vcpu; | 57 | kvm_for_each_vcpu(i, vcpu, s->kvm) { |
54 | if (vcpu) | 58 | if (kvm_apic_accept_pic_intr(vcpu)) { |
55 | kvm_vcpu_kick(vcpu); | 59 | found = vcpu; |
60 | break; | ||
61 | } | ||
62 | } | ||
63 | |||
64 | if (!found) | ||
65 | found = s->kvm->bsp_vcpu; | ||
66 | |||
67 | kvm_vcpu_kick(found); | ||
56 | } | 68 | } |
57 | } | 69 | } |
58 | 70 | ||
@@ -173,10 +185,7 @@ static void pic_update_irq(struct kvm_pic *s) | |||
173 | pic_set_irq1(&s->pics[0], 2, 0); | 185 | pic_set_irq1(&s->pics[0], 2, 0); |
174 | } | 186 | } |
175 | irq = pic_get_irq(&s->pics[0]); | 187 | irq = pic_get_irq(&s->pics[0]); |
176 | if (irq >= 0) | 188 | pic_irq_request(s->kvm, irq >= 0); |
177 | s->irq_request(s->irq_request_opaque, 1); | ||
178 | else | ||
179 | s->irq_request(s->irq_request_opaque, 0); | ||
180 | } | 189 | } |
181 | 190 | ||
182 | void kvm_pic_update_irq(struct kvm_pic *s) | 191 | void kvm_pic_update_irq(struct kvm_pic *s) |
@@ -261,8 +270,7 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
261 | void kvm_pic_reset(struct kvm_kpic_state *s) | 270 | void kvm_pic_reset(struct kvm_kpic_state *s) |
262 | { | 271 | { |
263 | int irq; | 272 | int irq; |
264 | struct kvm *kvm = s->pics_state->irq_request_opaque; | 273 | struct kvm_vcpu *vcpu0 = s->pics_state->kvm->bsp_vcpu; |
265 | struct kvm_vcpu *vcpu0 = kvm->bsp_vcpu; | ||
266 | u8 irr = s->irr, isr = s->imr; | 274 | u8 irr = s->irr, isr = s->imr; |
267 | 275 | ||
268 | s->last_irr = 0; | 276 | s->last_irr = 0; |
@@ -301,8 +309,7 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val) | |||
301 | /* | 309 | /* |
302 | * deassert a pending interrupt | 310 | * deassert a pending interrupt |
303 | */ | 311 | */ |
304 | s->pics_state->irq_request(s->pics_state-> | 312 | pic_irq_request(s->pics_state->kvm, 0); |
305 | irq_request_opaque, 0); | ||
306 | s->init_state = 1; | 313 | s->init_state = 1; |
307 | s->init4 = val & 1; | 314 | s->init4 = val & 1; |
308 | if (val & 0x02) | 315 | if (val & 0x02) |
@@ -356,10 +363,20 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val) | |||
356 | } | 363 | } |
357 | } else | 364 | } else |
358 | switch (s->init_state) { | 365 | switch (s->init_state) { |
359 | case 0: /* normal mode */ | 366 | case 0: { /* normal mode */ |
367 | u8 imr_diff = s->imr ^ val, | ||
368 | off = (s == &s->pics_state->pics[0]) ? 0 : 8; | ||
360 | s->imr = val; | 369 | s->imr = val; |
370 | for (irq = 0; irq < PIC_NUM_PINS/2; irq++) | ||
371 | if (imr_diff & (1 << irq)) | ||
372 | kvm_fire_mask_notifiers( | ||
373 | s->pics_state->kvm, | ||
374 | SELECT_PIC(irq + off), | ||
375 | irq + off, | ||
376 | !!(s->imr & (1 << irq))); | ||
361 | pic_update_irq(s->pics_state); | 377 | pic_update_irq(s->pics_state); |
362 | break; | 378 | break; |
379 | } | ||
363 | case 1: | 380 | case 1: |
364 | s->irq_base = val & 0xf8; | 381 | s->irq_base = val & 0xf8; |
365 | s->init_state = 2; | 382 | s->init_state = 2; |
@@ -518,9 +535,8 @@ static int picdev_read(struct kvm_io_device *this, | |||
518 | /* | 535 | /* |
519 | * callback when PIC0 irq status changed | 536 | * callback when PIC0 irq status changed |
520 | */ | 537 | */ |
521 | static void pic_irq_request(void *opaque, int level) | 538 | static void pic_irq_request(struct kvm *kvm, int level) |
522 | { | 539 | { |
523 | struct kvm *kvm = opaque; | ||
524 | struct kvm_vcpu *vcpu = kvm->bsp_vcpu; | 540 | struct kvm_vcpu *vcpu = kvm->bsp_vcpu; |
525 | struct kvm_pic *s = pic_irqchip(kvm); | 541 | struct kvm_pic *s = pic_irqchip(kvm); |
526 | int irq = pic_get_irq(&s->pics[0]); | 542 | int irq = pic_get_irq(&s->pics[0]); |
@@ -549,8 +565,6 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) | |||
549 | s->kvm = kvm; | 565 | s->kvm = kvm; |
550 | s->pics[0].elcr_mask = 0xf8; | 566 | s->pics[0].elcr_mask = 0xf8; |
551 | s->pics[1].elcr_mask = 0xde; | 567 | s->pics[1].elcr_mask = 0xde; |
552 | s->irq_request = pic_irq_request; | ||
553 | s->irq_request_opaque = kvm; | ||
554 | s->pics[0].pics_state = s; | 568 | s->pics[0].pics_state = s; |
555 | s->pics[1].pics_state = s; | 569 | s->pics[1].pics_state = s; |
556 | 570 | ||
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 96dfbb6ad2a9..2095a049835e 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c | |||
@@ -1,6 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * irq.c: API for in kernel interrupt controller | 2 | * irq.c: API for in kernel interrupt controller |
3 | * Copyright (c) 2007, Intel Corporation. | 3 | * Copyright (c) 2007, Intel Corporation. |
4 | * Copyright 2009 Red Hat, Inc. and/or its affilates. | ||
4 | * | 5 | * |
5 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
6 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
@@ -89,7 +90,6 @@ EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt); | |||
89 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) | 90 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) |
90 | { | 91 | { |
91 | kvm_inject_apic_timer_irqs(vcpu); | 92 | kvm_inject_apic_timer_irqs(vcpu); |
92 | kvm_inject_pit_timer_irqs(vcpu); | ||
93 | /* TODO: PIT, RTC etc. */ | 93 | /* TODO: PIT, RTC etc. */ |
94 | } | 94 | } |
95 | EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); | 95 | EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); |
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index cd1f362f413d..ffed06871c5c 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
@@ -38,8 +38,6 @@ | |||
38 | struct kvm; | 38 | struct kvm; |
39 | struct kvm_vcpu; | 39 | struct kvm_vcpu; |
40 | 40 | ||
41 | typedef void irq_request_func(void *opaque, int level); | ||
42 | |||
43 | struct kvm_kpic_state { | 41 | struct kvm_kpic_state { |
44 | u8 last_irr; /* edge detection */ | 42 | u8 last_irr; /* edge detection */ |
45 | u8 irr; /* interrupt request register */ | 43 | u8 irr; /* interrupt request register */ |
@@ -67,8 +65,6 @@ struct kvm_pic { | |||
67 | unsigned pending_acks; | 65 | unsigned pending_acks; |
68 | struct kvm *kvm; | 66 | struct kvm *kvm; |
69 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ | 67 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ |
70 | irq_request_func *irq_request; | ||
71 | void *irq_request_opaque; | ||
72 | int output; /* intr from master PIC */ | 68 | int output; /* intr from master PIC */ |
73 | struct kvm_io_device dev; | 69 | struct kvm_io_device dev; |
74 | void (*ack_notifier)(void *opaque, int irq); | 70 | void (*ack_notifier)(void *opaque, int irq); |
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index cff851cf5322..6491ac8e755b 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h | |||
@@ -36,6 +36,8 @@ static inline void kvm_rip_write(struct kvm_vcpu *vcpu, unsigned long val) | |||
36 | 36 | ||
37 | static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) | 37 | static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) |
38 | { | 38 | { |
39 | might_sleep(); /* on svm */ | ||
40 | |||
39 | if (!test_bit(VCPU_EXREG_PDPTR, | 41 | if (!test_bit(VCPU_EXREG_PDPTR, |
40 | (unsigned long *)&vcpu->arch.regs_avail)) | 42 | (unsigned long *)&vcpu->arch.regs_avail)) |
41 | kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR); | 43 | kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR); |
@@ -69,4 +71,10 @@ static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu) | |||
69 | return kvm_read_cr4_bits(vcpu, ~0UL); | 71 | return kvm_read_cr4_bits(vcpu, ~0UL); |
70 | } | 72 | } |
71 | 73 | ||
74 | static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu) | ||
75 | { | ||
76 | return (kvm_register_read(vcpu, VCPU_REGS_RAX) & -1u) | ||
77 | | ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32); | ||
78 | } | ||
79 | |||
72 | #endif | 80 | #endif |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 1eb7a4ae0c9c..77d8c0f4817d 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -5,6 +5,7 @@ | |||
5 | * Copyright (C) 2006 Qumranet, Inc. | 5 | * Copyright (C) 2006 Qumranet, Inc. |
6 | * Copyright (C) 2007 Novell | 6 | * Copyright (C) 2007 Novell |
7 | * Copyright (C) 2007 Intel | 7 | * Copyright (C) 2007 Intel |
8 | * Copyright 2009 Red Hat, Inc. and/or its affilates. | ||
8 | * | 9 | * |
9 | * Authors: | 10 | * Authors: |
10 | * Dor Laor <dor.laor@qumranet.com> | 11 | * Dor Laor <dor.laor@qumranet.com> |
@@ -328,7 +329,7 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, | |||
328 | "dest_mode 0x%x, short_hand 0x%x\n", | 329 | "dest_mode 0x%x, short_hand 0x%x\n", |
329 | target, source, dest, dest_mode, short_hand); | 330 | target, source, dest, dest_mode, short_hand); |
330 | 331 | ||
331 | ASSERT(!target); | 332 | ASSERT(target); |
332 | switch (short_hand) { | 333 | switch (short_hand) { |
333 | case APIC_DEST_NOSHORT: | 334 | case APIC_DEST_NOSHORT: |
334 | if (dest_mode == 0) | 335 | if (dest_mode == 0) |
@@ -533,7 +534,7 @@ static void __report_tpr_access(struct kvm_lapic *apic, bool write) | |||
533 | struct kvm_vcpu *vcpu = apic->vcpu; | 534 | struct kvm_vcpu *vcpu = apic->vcpu; |
534 | struct kvm_run *run = vcpu->run; | 535 | struct kvm_run *run = vcpu->run; |
535 | 536 | ||
536 | set_bit(KVM_REQ_REPORT_TPR_ACCESS, &vcpu->requests); | 537 | kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu); |
537 | run->tpr_access.rip = kvm_rip_read(vcpu); | 538 | run->tpr_access.rip = kvm_rip_read(vcpu); |
538 | run->tpr_access.is_write = write; | 539 | run->tpr_access.is_write = write; |
539 | } | 540 | } |
@@ -1106,13 +1107,11 @@ int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) | |||
1106 | u32 lvt0 = apic_get_reg(vcpu->arch.apic, APIC_LVT0); | 1107 | u32 lvt0 = apic_get_reg(vcpu->arch.apic, APIC_LVT0); |
1107 | int r = 0; | 1108 | int r = 0; |
1108 | 1109 | ||
1109 | if (kvm_vcpu_is_bsp(vcpu)) { | 1110 | if (!apic_hw_enabled(vcpu->arch.apic)) |
1110 | if (!apic_hw_enabled(vcpu->arch.apic)) | 1111 | r = 1; |
1111 | r = 1; | 1112 | if ((lvt0 & APIC_LVT_MASKED) == 0 && |
1112 | if ((lvt0 & APIC_LVT_MASKED) == 0 && | 1113 | GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) |
1113 | GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) | 1114 | r = 1; |
1114 | r = 1; | ||
1115 | } | ||
1116 | return r; | 1115 | return r; |
1117 | } | 1116 | } |
1118 | 1117 | ||
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index b1ed0a1a5913..0dcc95e09876 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -7,6 +7,7 @@ | |||
7 | * MMU support | 7 | * MMU support |
8 | * | 8 | * |
9 | * Copyright (C) 2006 Qumranet, Inc. | 9 | * Copyright (C) 2006 Qumranet, Inc. |
10 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | ||
10 | * | 11 | * |
11 | * Authors: | 12 | * Authors: |
12 | * Yaniv Kamay <yaniv@qumranet.com> | 13 | * Yaniv Kamay <yaniv@qumranet.com> |
@@ -32,6 +33,7 @@ | |||
32 | #include <linux/compiler.h> | 33 | #include <linux/compiler.h> |
33 | #include <linux/srcu.h> | 34 | #include <linux/srcu.h> |
34 | #include <linux/slab.h> | 35 | #include <linux/slab.h> |
36 | #include <linux/uaccess.h> | ||
35 | 37 | ||
36 | #include <asm/page.h> | 38 | #include <asm/page.h> |
37 | #include <asm/cmpxchg.h> | 39 | #include <asm/cmpxchg.h> |
@@ -90,8 +92,6 @@ module_param(oos_shadow, bool, 0644); | |||
90 | #define PT_FIRST_AVAIL_BITS_SHIFT 9 | 92 | #define PT_FIRST_AVAIL_BITS_SHIFT 9 |
91 | #define PT64_SECOND_AVAIL_BITS_SHIFT 52 | 93 | #define PT64_SECOND_AVAIL_BITS_SHIFT 52 |
92 | 94 | ||
93 | #define VALID_PAGE(x) ((x) != INVALID_PAGE) | ||
94 | |||
95 | #define PT64_LEVEL_BITS 9 | 95 | #define PT64_LEVEL_BITS 9 |
96 | 96 | ||
97 | #define PT64_LEVEL_SHIFT(level) \ | 97 | #define PT64_LEVEL_SHIFT(level) \ |
@@ -173,7 +173,7 @@ struct kvm_shadow_walk_iterator { | |||
173 | shadow_walk_okay(&(_walker)); \ | 173 | shadow_walk_okay(&(_walker)); \ |
174 | shadow_walk_next(&(_walker))) | 174 | shadow_walk_next(&(_walker))) |
175 | 175 | ||
176 | typedef int (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp); | 176 | typedef void (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp, u64 *spte); |
177 | 177 | ||
178 | static struct kmem_cache *pte_chain_cache; | 178 | static struct kmem_cache *pte_chain_cache; |
179 | static struct kmem_cache *rmap_desc_cache; | 179 | static struct kmem_cache *rmap_desc_cache; |
@@ -288,6 +288,35 @@ static void __set_spte(u64 *sptep, u64 spte) | |||
288 | #endif | 288 | #endif |
289 | } | 289 | } |
290 | 290 | ||
291 | static u64 __xchg_spte(u64 *sptep, u64 new_spte) | ||
292 | { | ||
293 | #ifdef CONFIG_X86_64 | ||
294 | return xchg(sptep, new_spte); | ||
295 | #else | ||
296 | u64 old_spte; | ||
297 | |||
298 | do { | ||
299 | old_spte = *sptep; | ||
300 | } while (cmpxchg64(sptep, old_spte, new_spte) != old_spte); | ||
301 | |||
302 | return old_spte; | ||
303 | #endif | ||
304 | } | ||
305 | |||
306 | static void update_spte(u64 *sptep, u64 new_spte) | ||
307 | { | ||
308 | u64 old_spte; | ||
309 | |||
310 | if (!shadow_accessed_mask || (new_spte & shadow_accessed_mask) || | ||
311 | !is_rmap_spte(*sptep)) | ||
312 | __set_spte(sptep, new_spte); | ||
313 | else { | ||
314 | old_spte = __xchg_spte(sptep, new_spte); | ||
315 | if (old_spte & shadow_accessed_mask) | ||
316 | mark_page_accessed(pfn_to_page(spte_to_pfn(old_spte))); | ||
317 | } | ||
318 | } | ||
319 | |||
291 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, | 320 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, |
292 | struct kmem_cache *base_cache, int min) | 321 | struct kmem_cache *base_cache, int min) |
293 | { | 322 | { |
@@ -304,10 +333,11 @@ static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, | |||
304 | return 0; | 333 | return 0; |
305 | } | 334 | } |
306 | 335 | ||
307 | static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) | 336 | static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc, |
337 | struct kmem_cache *cache) | ||
308 | { | 338 | { |
309 | while (mc->nobjs) | 339 | while (mc->nobjs) |
310 | kfree(mc->objects[--mc->nobjs]); | 340 | kmem_cache_free(cache, mc->objects[--mc->nobjs]); |
311 | } | 341 | } |
312 | 342 | ||
313 | static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, | 343 | static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, |
@@ -355,10 +385,11 @@ out: | |||
355 | 385 | ||
356 | static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) | 386 | static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) |
357 | { | 387 | { |
358 | mmu_free_memory_cache(&vcpu->arch.mmu_pte_chain_cache); | 388 | mmu_free_memory_cache(&vcpu->arch.mmu_pte_chain_cache, pte_chain_cache); |
359 | mmu_free_memory_cache(&vcpu->arch.mmu_rmap_desc_cache); | 389 | mmu_free_memory_cache(&vcpu->arch.mmu_rmap_desc_cache, rmap_desc_cache); |
360 | mmu_free_memory_cache_page(&vcpu->arch.mmu_page_cache); | 390 | mmu_free_memory_cache_page(&vcpu->arch.mmu_page_cache); |
361 | mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache); | 391 | mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache, |
392 | mmu_page_header_cache); | ||
362 | } | 393 | } |
363 | 394 | ||
364 | static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc, | 395 | static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc, |
@@ -379,7 +410,7 @@ static struct kvm_pte_chain *mmu_alloc_pte_chain(struct kvm_vcpu *vcpu) | |||
379 | 410 | ||
380 | static void mmu_free_pte_chain(struct kvm_pte_chain *pc) | 411 | static void mmu_free_pte_chain(struct kvm_pte_chain *pc) |
381 | { | 412 | { |
382 | kfree(pc); | 413 | kmem_cache_free(pte_chain_cache, pc); |
383 | } | 414 | } |
384 | 415 | ||
385 | static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu) | 416 | static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu) |
@@ -390,7 +421,23 @@ static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu) | |||
390 | 421 | ||
391 | static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd) | 422 | static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd) |
392 | { | 423 | { |
393 | kfree(rd); | 424 | kmem_cache_free(rmap_desc_cache, rd); |
425 | } | ||
426 | |||
427 | static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index) | ||
428 | { | ||
429 | if (!sp->role.direct) | ||
430 | return sp->gfns[index]; | ||
431 | |||
432 | return sp->gfn + (index << ((sp->role.level - 1) * PT64_LEVEL_BITS)); | ||
433 | } | ||
434 | |||
435 | static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn) | ||
436 | { | ||
437 | if (sp->role.direct) | ||
438 | BUG_ON(gfn != kvm_mmu_page_get_gfn(sp, index)); | ||
439 | else | ||
440 | sp->gfns[index] = gfn; | ||
394 | } | 441 | } |
395 | 442 | ||
396 | /* | 443 | /* |
@@ -403,8 +450,8 @@ static int *slot_largepage_idx(gfn_t gfn, | |||
403 | { | 450 | { |
404 | unsigned long idx; | 451 | unsigned long idx; |
405 | 452 | ||
406 | idx = (gfn / KVM_PAGES_PER_HPAGE(level)) - | 453 | idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - |
407 | (slot->base_gfn / KVM_PAGES_PER_HPAGE(level)); | 454 | (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); |
408 | return &slot->lpage_info[level - 2][idx].write_count; | 455 | return &slot->lpage_info[level - 2][idx].write_count; |
409 | } | 456 | } |
410 | 457 | ||
@@ -414,9 +461,7 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn) | |||
414 | int *write_count; | 461 | int *write_count; |
415 | int i; | 462 | int i; |
416 | 463 | ||
417 | gfn = unalias_gfn(kvm, gfn); | 464 | slot = gfn_to_memslot(kvm, gfn); |
418 | |||
419 | slot = gfn_to_memslot_unaliased(kvm, gfn); | ||
420 | for (i = PT_DIRECTORY_LEVEL; | 465 | for (i = PT_DIRECTORY_LEVEL; |
421 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 466 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { |
422 | write_count = slot_largepage_idx(gfn, slot, i); | 467 | write_count = slot_largepage_idx(gfn, slot, i); |
@@ -430,8 +475,7 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) | |||
430 | int *write_count; | 475 | int *write_count; |
431 | int i; | 476 | int i; |
432 | 477 | ||
433 | gfn = unalias_gfn(kvm, gfn); | 478 | slot = gfn_to_memslot(kvm, gfn); |
434 | slot = gfn_to_memslot_unaliased(kvm, gfn); | ||
435 | for (i = PT_DIRECTORY_LEVEL; | 479 | for (i = PT_DIRECTORY_LEVEL; |
436 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 480 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { |
437 | write_count = slot_largepage_idx(gfn, slot, i); | 481 | write_count = slot_largepage_idx(gfn, slot, i); |
@@ -447,8 +491,7 @@ static int has_wrprotected_page(struct kvm *kvm, | |||
447 | struct kvm_memory_slot *slot; | 491 | struct kvm_memory_slot *slot; |
448 | int *largepage_idx; | 492 | int *largepage_idx; |
449 | 493 | ||
450 | gfn = unalias_gfn(kvm, gfn); | 494 | slot = gfn_to_memslot(kvm, gfn); |
451 | slot = gfn_to_memslot_unaliased(kvm, gfn); | ||
452 | if (slot) { | 495 | if (slot) { |
453 | largepage_idx = slot_largepage_idx(gfn, slot, level); | 496 | largepage_idx = slot_largepage_idx(gfn, slot, level); |
454 | return *largepage_idx; | 497 | return *largepage_idx; |
@@ -501,7 +544,6 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | |||
501 | 544 | ||
502 | /* | 545 | /* |
503 | * Take gfn and return the reverse mapping to it. | 546 | * Take gfn and return the reverse mapping to it. |
504 | * Note: gfn must be unaliased before this function get called | ||
505 | */ | 547 | */ |
506 | 548 | ||
507 | static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) | 549 | static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) |
@@ -513,8 +555,8 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) | |||
513 | if (likely(level == PT_PAGE_TABLE_LEVEL)) | 555 | if (likely(level == PT_PAGE_TABLE_LEVEL)) |
514 | return &slot->rmap[gfn - slot->base_gfn]; | 556 | return &slot->rmap[gfn - slot->base_gfn]; |
515 | 557 | ||
516 | idx = (gfn / KVM_PAGES_PER_HPAGE(level)) - | 558 | idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - |
517 | (slot->base_gfn / KVM_PAGES_PER_HPAGE(level)); | 559 | (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); |
518 | 560 | ||
519 | return &slot->lpage_info[level - 2][idx].rmap_pde; | 561 | return &slot->lpage_info[level - 2][idx].rmap_pde; |
520 | } | 562 | } |
@@ -541,9 +583,8 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | |||
541 | 583 | ||
542 | if (!is_rmap_spte(*spte)) | 584 | if (!is_rmap_spte(*spte)) |
543 | return count; | 585 | return count; |
544 | gfn = unalias_gfn(vcpu->kvm, gfn); | ||
545 | sp = page_header(__pa(spte)); | 586 | sp = page_header(__pa(spte)); |
546 | sp->gfns[spte - sp->spt] = gfn; | 587 | kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn); |
547 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); | 588 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); |
548 | if (!*rmapp) { | 589 | if (!*rmapp) { |
549 | rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); | 590 | rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); |
@@ -600,19 +641,13 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
600 | struct kvm_rmap_desc *desc; | 641 | struct kvm_rmap_desc *desc; |
601 | struct kvm_rmap_desc *prev_desc; | 642 | struct kvm_rmap_desc *prev_desc; |
602 | struct kvm_mmu_page *sp; | 643 | struct kvm_mmu_page *sp; |
603 | pfn_t pfn; | 644 | gfn_t gfn; |
604 | unsigned long *rmapp; | 645 | unsigned long *rmapp; |
605 | int i; | 646 | int i; |
606 | 647 | ||
607 | if (!is_rmap_spte(*spte)) | ||
608 | return; | ||
609 | sp = page_header(__pa(spte)); | 648 | sp = page_header(__pa(spte)); |
610 | pfn = spte_to_pfn(*spte); | 649 | gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt); |
611 | if (*spte & shadow_accessed_mask) | 650 | rmapp = gfn_to_rmap(kvm, gfn, sp->role.level); |
612 | kvm_set_pfn_accessed(pfn); | ||
613 | if (is_writable_pte(*spte)) | ||
614 | kvm_set_pfn_dirty(pfn); | ||
615 | rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level); | ||
616 | if (!*rmapp) { | 651 | if (!*rmapp) { |
617 | printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); | 652 | printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); |
618 | BUG(); | 653 | BUG(); |
@@ -644,6 +679,32 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
644 | } | 679 | } |
645 | } | 680 | } |
646 | 681 | ||
682 | static void set_spte_track_bits(u64 *sptep, u64 new_spte) | ||
683 | { | ||
684 | pfn_t pfn; | ||
685 | u64 old_spte = *sptep; | ||
686 | |||
687 | if (!shadow_accessed_mask || !is_shadow_present_pte(old_spte) || | ||
688 | old_spte & shadow_accessed_mask) { | ||
689 | __set_spte(sptep, new_spte); | ||
690 | } else | ||
691 | old_spte = __xchg_spte(sptep, new_spte); | ||
692 | |||
693 | if (!is_rmap_spte(old_spte)) | ||
694 | return; | ||
695 | pfn = spte_to_pfn(old_spte); | ||
696 | if (!shadow_accessed_mask || old_spte & shadow_accessed_mask) | ||
697 | kvm_set_pfn_accessed(pfn); | ||
698 | if (is_writable_pte(old_spte)) | ||
699 | kvm_set_pfn_dirty(pfn); | ||
700 | } | ||
701 | |||
702 | static void drop_spte(struct kvm *kvm, u64 *sptep, u64 new_spte) | ||
703 | { | ||
704 | set_spte_track_bits(sptep, new_spte); | ||
705 | rmap_remove(kvm, sptep); | ||
706 | } | ||
707 | |||
647 | static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) | 708 | static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) |
648 | { | 709 | { |
649 | struct kvm_rmap_desc *desc; | 710 | struct kvm_rmap_desc *desc; |
@@ -676,7 +737,6 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
676 | u64 *spte; | 737 | u64 *spte; |
677 | int i, write_protected = 0; | 738 | int i, write_protected = 0; |
678 | 739 | ||
679 | gfn = unalias_gfn(kvm, gfn); | ||
680 | rmapp = gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL); | 740 | rmapp = gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL); |
681 | 741 | ||
682 | spte = rmap_next(kvm, rmapp, NULL); | 742 | spte = rmap_next(kvm, rmapp, NULL); |
@@ -685,7 +745,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
685 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 745 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
686 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); | 746 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); |
687 | if (is_writable_pte(*spte)) { | 747 | if (is_writable_pte(*spte)) { |
688 | __set_spte(spte, *spte & ~PT_WRITABLE_MASK); | 748 | update_spte(spte, *spte & ~PT_WRITABLE_MASK); |
689 | write_protected = 1; | 749 | write_protected = 1; |
690 | } | 750 | } |
691 | spte = rmap_next(kvm, rmapp, spte); | 751 | spte = rmap_next(kvm, rmapp, spte); |
@@ -709,9 +769,9 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
709 | BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)); | 769 | BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)); |
710 | pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); | 770 | pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); |
711 | if (is_writable_pte(*spte)) { | 771 | if (is_writable_pte(*spte)) { |
712 | rmap_remove(kvm, spte); | 772 | drop_spte(kvm, spte, |
773 | shadow_trap_nonpresent_pte); | ||
713 | --kvm->stat.lpages; | 774 | --kvm->stat.lpages; |
714 | __set_spte(spte, shadow_trap_nonpresent_pte); | ||
715 | spte = NULL; | 775 | spte = NULL; |
716 | write_protected = 1; | 776 | write_protected = 1; |
717 | } | 777 | } |
@@ -731,8 +791,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
731 | while ((spte = rmap_next(kvm, rmapp, NULL))) { | 791 | while ((spte = rmap_next(kvm, rmapp, NULL))) { |
732 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 792 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
733 | rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); | 793 | rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); |
734 | rmap_remove(kvm, spte); | 794 | drop_spte(kvm, spte, shadow_trap_nonpresent_pte); |
735 | __set_spte(spte, shadow_trap_nonpresent_pte); | ||
736 | need_tlb_flush = 1; | 795 | need_tlb_flush = 1; |
737 | } | 796 | } |
738 | return need_tlb_flush; | 797 | return need_tlb_flush; |
@@ -754,8 +813,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
754 | rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); | 813 | rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); |
755 | need_flush = 1; | 814 | need_flush = 1; |
756 | if (pte_write(*ptep)) { | 815 | if (pte_write(*ptep)) { |
757 | rmap_remove(kvm, spte); | 816 | drop_spte(kvm, spte, shadow_trap_nonpresent_pte); |
758 | __set_spte(spte, shadow_trap_nonpresent_pte); | ||
759 | spte = rmap_next(kvm, rmapp, NULL); | 817 | spte = rmap_next(kvm, rmapp, NULL); |
760 | } else { | 818 | } else { |
761 | new_spte = *spte &~ (PT64_BASE_ADDR_MASK); | 819 | new_spte = *spte &~ (PT64_BASE_ADDR_MASK); |
@@ -763,9 +821,8 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
763 | 821 | ||
764 | new_spte &= ~PT_WRITABLE_MASK; | 822 | new_spte &= ~PT_WRITABLE_MASK; |
765 | new_spte &= ~SPTE_HOST_WRITEABLE; | 823 | new_spte &= ~SPTE_HOST_WRITEABLE; |
766 | if (is_writable_pte(*spte)) | 824 | new_spte &= ~shadow_accessed_mask; |
767 | kvm_set_pfn_dirty(spte_to_pfn(*spte)); | 825 | set_spte_track_bits(spte, new_spte); |
768 | __set_spte(spte, new_spte); | ||
769 | spte = rmap_next(kvm, rmapp, spte); | 826 | spte = rmap_next(kvm, rmapp, spte); |
770 | } | 827 | } |
771 | } | 828 | } |
@@ -799,8 +856,12 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
799 | ret = handler(kvm, &memslot->rmap[gfn_offset], data); | 856 | ret = handler(kvm, &memslot->rmap[gfn_offset], data); |
800 | 857 | ||
801 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { | 858 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { |
802 | int idx = gfn_offset; | 859 | unsigned long idx; |
803 | idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); | 860 | int sh; |
861 | |||
862 | sh = KVM_HPAGE_GFN_SHIFT(PT_DIRECTORY_LEVEL+j); | ||
863 | idx = ((memslot->base_gfn+gfn_offset) >> sh) - | ||
864 | (memslot->base_gfn >> sh); | ||
804 | ret |= handler(kvm, | 865 | ret |= handler(kvm, |
805 | &memslot->lpage_info[j][idx].rmap_pde, | 866 | &memslot->lpage_info[j][idx].rmap_pde, |
806 | data); | 867 | data); |
@@ -863,7 +924,6 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | |||
863 | 924 | ||
864 | sp = page_header(__pa(spte)); | 925 | sp = page_header(__pa(spte)); |
865 | 926 | ||
866 | gfn = unalias_gfn(vcpu->kvm, gfn); | ||
867 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); | 927 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); |
868 | 928 | ||
869 | kvm_unmap_rmapp(vcpu->kvm, rmapp, 0); | 929 | kvm_unmap_rmapp(vcpu->kvm, rmapp, 0); |
@@ -894,10 +954,12 @@ static int is_empty_shadow_page(u64 *spt) | |||
894 | static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp) | 954 | static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp) |
895 | { | 955 | { |
896 | ASSERT(is_empty_shadow_page(sp->spt)); | 956 | ASSERT(is_empty_shadow_page(sp->spt)); |
957 | hlist_del(&sp->hash_link); | ||
897 | list_del(&sp->link); | 958 | list_del(&sp->link); |
898 | __free_page(virt_to_page(sp->spt)); | 959 | __free_page(virt_to_page(sp->spt)); |
899 | __free_page(virt_to_page(sp->gfns)); | 960 | if (!sp->role.direct) |
900 | kfree(sp); | 961 | __free_page(virt_to_page(sp->gfns)); |
962 | kmem_cache_free(mmu_page_header_cache, sp); | ||
901 | ++kvm->arch.n_free_mmu_pages; | 963 | ++kvm->arch.n_free_mmu_pages; |
902 | } | 964 | } |
903 | 965 | ||
@@ -907,13 +969,15 @@ static unsigned kvm_page_table_hashfn(gfn_t gfn) | |||
907 | } | 969 | } |
908 | 970 | ||
909 | static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | 971 | static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, |
910 | u64 *parent_pte) | 972 | u64 *parent_pte, int direct) |
911 | { | 973 | { |
912 | struct kvm_mmu_page *sp; | 974 | struct kvm_mmu_page *sp; |
913 | 975 | ||
914 | sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache, sizeof *sp); | 976 | sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache, sizeof *sp); |
915 | sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); | 977 | sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); |
916 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); | 978 | if (!direct) |
979 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, | ||
980 | PAGE_SIZE); | ||
917 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); | 981 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); |
918 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); | 982 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); |
919 | bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); | 983 | bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); |
@@ -998,7 +1062,6 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp, | |||
998 | BUG(); | 1062 | BUG(); |
999 | } | 1063 | } |
1000 | 1064 | ||
1001 | |||
1002 | static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn) | 1065 | static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn) |
1003 | { | 1066 | { |
1004 | struct kvm_pte_chain *pte_chain; | 1067 | struct kvm_pte_chain *pte_chain; |
@@ -1008,63 +1071,37 @@ static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn) | |||
1008 | 1071 | ||
1009 | if (!sp->multimapped && sp->parent_pte) { | 1072 | if (!sp->multimapped && sp->parent_pte) { |
1010 | parent_sp = page_header(__pa(sp->parent_pte)); | 1073 | parent_sp = page_header(__pa(sp->parent_pte)); |
1011 | fn(parent_sp); | 1074 | fn(parent_sp, sp->parent_pte); |
1012 | mmu_parent_walk(parent_sp, fn); | ||
1013 | return; | 1075 | return; |
1014 | } | 1076 | } |
1077 | |||
1015 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) | 1078 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) |
1016 | for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) { | 1079 | for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) { |
1017 | if (!pte_chain->parent_ptes[i]) | 1080 | u64 *spte = pte_chain->parent_ptes[i]; |
1081 | |||
1082 | if (!spte) | ||
1018 | break; | 1083 | break; |
1019 | parent_sp = page_header(__pa(pte_chain->parent_ptes[i])); | 1084 | parent_sp = page_header(__pa(spte)); |
1020 | fn(parent_sp); | 1085 | fn(parent_sp, spte); |
1021 | mmu_parent_walk(parent_sp, fn); | ||
1022 | } | 1086 | } |
1023 | } | 1087 | } |
1024 | 1088 | ||
1025 | static void kvm_mmu_update_unsync_bitmap(u64 *spte) | 1089 | static void mark_unsync(struct kvm_mmu_page *sp, u64 *spte); |
1090 | static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) | ||
1026 | { | 1091 | { |
1027 | unsigned int index; | 1092 | mmu_parent_walk(sp, mark_unsync); |
1028 | struct kvm_mmu_page *sp = page_header(__pa(spte)); | ||
1029 | |||
1030 | index = spte - sp->spt; | ||
1031 | if (!__test_and_set_bit(index, sp->unsync_child_bitmap)) | ||
1032 | sp->unsync_children++; | ||
1033 | WARN_ON(!sp->unsync_children); | ||
1034 | } | 1093 | } |
1035 | 1094 | ||
1036 | static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) | 1095 | static void mark_unsync(struct kvm_mmu_page *sp, u64 *spte) |
1037 | { | 1096 | { |
1038 | struct kvm_pte_chain *pte_chain; | 1097 | unsigned int index; |
1039 | struct hlist_node *node; | ||
1040 | int i; | ||
1041 | 1098 | ||
1042 | if (!sp->parent_pte) | 1099 | index = spte - sp->spt; |
1100 | if (__test_and_set_bit(index, sp->unsync_child_bitmap)) | ||
1043 | return; | 1101 | return; |
1044 | 1102 | if (sp->unsync_children++) | |
1045 | if (!sp->multimapped) { | ||
1046 | kvm_mmu_update_unsync_bitmap(sp->parent_pte); | ||
1047 | return; | 1103 | return; |
1048 | } | 1104 | kvm_mmu_mark_parents_unsync(sp); |
1049 | |||
1050 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) | ||
1051 | for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) { | ||
1052 | if (!pte_chain->parent_ptes[i]) | ||
1053 | break; | ||
1054 | kvm_mmu_update_unsync_bitmap(pte_chain->parent_ptes[i]); | ||
1055 | } | ||
1056 | } | ||
1057 | |||
1058 | static int unsync_walk_fn(struct kvm_mmu_page *sp) | ||
1059 | { | ||
1060 | kvm_mmu_update_parents_unsync(sp); | ||
1061 | return 1; | ||
1062 | } | ||
1063 | |||
1064 | static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) | ||
1065 | { | ||
1066 | mmu_parent_walk(sp, unsync_walk_fn); | ||
1067 | kvm_mmu_update_parents_unsync(sp); | ||
1068 | } | 1105 | } |
1069 | 1106 | ||
1070 | static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, | 1107 | static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, |
@@ -1077,7 +1114,7 @@ static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, | |||
1077 | } | 1114 | } |
1078 | 1115 | ||
1079 | static int nonpaging_sync_page(struct kvm_vcpu *vcpu, | 1116 | static int nonpaging_sync_page(struct kvm_vcpu *vcpu, |
1080 | struct kvm_mmu_page *sp) | 1117 | struct kvm_mmu_page *sp, bool clear_unsync) |
1081 | { | 1118 | { |
1082 | return 1; | 1119 | return 1; |
1083 | } | 1120 | } |
@@ -1123,35 +1160,40 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp, | |||
1123 | int i, ret, nr_unsync_leaf = 0; | 1160 | int i, ret, nr_unsync_leaf = 0; |
1124 | 1161 | ||
1125 | for_each_unsync_children(sp->unsync_child_bitmap, i) { | 1162 | for_each_unsync_children(sp->unsync_child_bitmap, i) { |
1163 | struct kvm_mmu_page *child; | ||
1126 | u64 ent = sp->spt[i]; | 1164 | u64 ent = sp->spt[i]; |
1127 | 1165 | ||
1128 | if (is_shadow_present_pte(ent) && !is_large_pte(ent)) { | 1166 | if (!is_shadow_present_pte(ent) || is_large_pte(ent)) |
1129 | struct kvm_mmu_page *child; | 1167 | goto clear_child_bitmap; |
1130 | child = page_header(ent & PT64_BASE_ADDR_MASK); | 1168 | |
1131 | 1169 | child = page_header(ent & PT64_BASE_ADDR_MASK); | |
1132 | if (child->unsync_children) { | 1170 | |
1133 | if (mmu_pages_add(pvec, child, i)) | 1171 | if (child->unsync_children) { |
1134 | return -ENOSPC; | 1172 | if (mmu_pages_add(pvec, child, i)) |
1135 | 1173 | return -ENOSPC; | |
1136 | ret = __mmu_unsync_walk(child, pvec); | 1174 | |
1137 | if (!ret) | 1175 | ret = __mmu_unsync_walk(child, pvec); |
1138 | __clear_bit(i, sp->unsync_child_bitmap); | 1176 | if (!ret) |
1139 | else if (ret > 0) | 1177 | goto clear_child_bitmap; |
1140 | nr_unsync_leaf += ret; | 1178 | else if (ret > 0) |
1141 | else | 1179 | nr_unsync_leaf += ret; |
1142 | return ret; | 1180 | else |
1143 | } | 1181 | return ret; |
1182 | } else if (child->unsync) { | ||
1183 | nr_unsync_leaf++; | ||
1184 | if (mmu_pages_add(pvec, child, i)) | ||
1185 | return -ENOSPC; | ||
1186 | } else | ||
1187 | goto clear_child_bitmap; | ||
1144 | 1188 | ||
1145 | if (child->unsync) { | 1189 | continue; |
1146 | nr_unsync_leaf++; | 1190 | |
1147 | if (mmu_pages_add(pvec, child, i)) | 1191 | clear_child_bitmap: |
1148 | return -ENOSPC; | 1192 | __clear_bit(i, sp->unsync_child_bitmap); |
1149 | } | 1193 | sp->unsync_children--; |
1150 | } | 1194 | WARN_ON((int)sp->unsync_children < 0); |
1151 | } | 1195 | } |
1152 | 1196 | ||
1153 | if (find_first_bit(sp->unsync_child_bitmap, 512) == 512) | ||
1154 | sp->unsync_children = 0; | ||
1155 | 1197 | ||
1156 | return nr_unsync_leaf; | 1198 | return nr_unsync_leaf; |
1157 | } | 1199 | } |
@@ -1166,26 +1208,6 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp, | |||
1166 | return __mmu_unsync_walk(sp, pvec); | 1208 | return __mmu_unsync_walk(sp, pvec); |
1167 | } | 1209 | } |
1168 | 1210 | ||
1169 | static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) | ||
1170 | { | ||
1171 | unsigned index; | ||
1172 | struct hlist_head *bucket; | ||
1173 | struct kvm_mmu_page *sp; | ||
1174 | struct hlist_node *node; | ||
1175 | |||
1176 | pgprintk("%s: looking for gfn %lx\n", __func__, gfn); | ||
1177 | index = kvm_page_table_hashfn(gfn); | ||
1178 | bucket = &kvm->arch.mmu_page_hash[index]; | ||
1179 | hlist_for_each_entry(sp, node, bucket, hash_link) | ||
1180 | if (sp->gfn == gfn && !sp->role.direct | ||
1181 | && !sp->role.invalid) { | ||
1182 | pgprintk("%s: found role %x\n", | ||
1183 | __func__, sp->role.word); | ||
1184 | return sp; | ||
1185 | } | ||
1186 | return NULL; | ||
1187 | } | ||
1188 | |||
1189 | static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) | 1211 | static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) |
1190 | { | 1212 | { |
1191 | WARN_ON(!sp->unsync); | 1213 | WARN_ON(!sp->unsync); |
@@ -1194,20 +1216,36 @@ static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
1194 | --kvm->stat.mmu_unsync; | 1216 | --kvm->stat.mmu_unsync; |
1195 | } | 1217 | } |
1196 | 1218 | ||
1197 | static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp); | 1219 | static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, |
1220 | struct list_head *invalid_list); | ||
1221 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, | ||
1222 | struct list_head *invalid_list); | ||
1223 | |||
1224 | #define for_each_gfn_sp(kvm, sp, gfn, pos) \ | ||
1225 | hlist_for_each_entry(sp, pos, \ | ||
1226 | &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ | ||
1227 | if ((sp)->gfn != (gfn)) {} else | ||
1228 | |||
1229 | #define for_each_gfn_indirect_valid_sp(kvm, sp, gfn, pos) \ | ||
1230 | hlist_for_each_entry(sp, pos, \ | ||
1231 | &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ | ||
1232 | if ((sp)->gfn != (gfn) || (sp)->role.direct || \ | ||
1233 | (sp)->role.invalid) {} else | ||
1198 | 1234 | ||
1199 | static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 1235 | /* @sp->gfn should be write-protected at the call site */ |
1236 | static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | ||
1237 | struct list_head *invalid_list, bool clear_unsync) | ||
1200 | { | 1238 | { |
1201 | if (sp->role.cr4_pae != !!is_pae(vcpu)) { | 1239 | if (sp->role.cr4_pae != !!is_pae(vcpu)) { |
1202 | kvm_mmu_zap_page(vcpu->kvm, sp); | 1240 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); |
1203 | return 1; | 1241 | return 1; |
1204 | } | 1242 | } |
1205 | 1243 | ||
1206 | if (rmap_write_protect(vcpu->kvm, sp->gfn)) | 1244 | if (clear_unsync) |
1207 | kvm_flush_remote_tlbs(vcpu->kvm); | 1245 | kvm_unlink_unsync_page(vcpu->kvm, sp); |
1208 | kvm_unlink_unsync_page(vcpu->kvm, sp); | 1246 | |
1209 | if (vcpu->arch.mmu.sync_page(vcpu, sp)) { | 1247 | if (vcpu->arch.mmu.sync_page(vcpu, sp, clear_unsync)) { |
1210 | kvm_mmu_zap_page(vcpu->kvm, sp); | 1248 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); |
1211 | return 1; | 1249 | return 1; |
1212 | } | 1250 | } |
1213 | 1251 | ||
@@ -1215,6 +1253,52 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
1215 | return 0; | 1253 | return 0; |
1216 | } | 1254 | } |
1217 | 1255 | ||
1256 | static int kvm_sync_page_transient(struct kvm_vcpu *vcpu, | ||
1257 | struct kvm_mmu_page *sp) | ||
1258 | { | ||
1259 | LIST_HEAD(invalid_list); | ||
1260 | int ret; | ||
1261 | |||
1262 | ret = __kvm_sync_page(vcpu, sp, &invalid_list, false); | ||
1263 | if (ret) | ||
1264 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | ||
1265 | |||
1266 | return ret; | ||
1267 | } | ||
1268 | |||
1269 | static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | ||
1270 | struct list_head *invalid_list) | ||
1271 | { | ||
1272 | return __kvm_sync_page(vcpu, sp, invalid_list, true); | ||
1273 | } | ||
1274 | |||
1275 | /* @gfn should be write-protected at the call site */ | ||
1276 | static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
1277 | { | ||
1278 | struct kvm_mmu_page *s; | ||
1279 | struct hlist_node *node; | ||
1280 | LIST_HEAD(invalid_list); | ||
1281 | bool flush = false; | ||
1282 | |||
1283 | for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { | ||
1284 | if (!s->unsync) | ||
1285 | continue; | ||
1286 | |||
1287 | WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); | ||
1288 | if ((s->role.cr4_pae != !!is_pae(vcpu)) || | ||
1289 | (vcpu->arch.mmu.sync_page(vcpu, s, true))) { | ||
1290 | kvm_mmu_prepare_zap_page(vcpu->kvm, s, &invalid_list); | ||
1291 | continue; | ||
1292 | } | ||
1293 | kvm_unlink_unsync_page(vcpu->kvm, s); | ||
1294 | flush = true; | ||
1295 | } | ||
1296 | |||
1297 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | ||
1298 | if (flush) | ||
1299 | kvm_mmu_flush_tlb(vcpu); | ||
1300 | } | ||
1301 | |||
1218 | struct mmu_page_path { | 1302 | struct mmu_page_path { |
1219 | struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1]; | 1303 | struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1]; |
1220 | unsigned int idx[PT64_ROOT_LEVEL-1]; | 1304 | unsigned int idx[PT64_ROOT_LEVEL-1]; |
@@ -1281,6 +1365,7 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu, | |||
1281 | struct kvm_mmu_page *sp; | 1365 | struct kvm_mmu_page *sp; |
1282 | struct mmu_page_path parents; | 1366 | struct mmu_page_path parents; |
1283 | struct kvm_mmu_pages pages; | 1367 | struct kvm_mmu_pages pages; |
1368 | LIST_HEAD(invalid_list); | ||
1284 | 1369 | ||
1285 | kvm_mmu_pages_init(parent, &parents, &pages); | 1370 | kvm_mmu_pages_init(parent, &parents, &pages); |
1286 | while (mmu_unsync_walk(parent, &pages)) { | 1371 | while (mmu_unsync_walk(parent, &pages)) { |
@@ -1293,9 +1378,10 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu, | |||
1293 | kvm_flush_remote_tlbs(vcpu->kvm); | 1378 | kvm_flush_remote_tlbs(vcpu->kvm); |
1294 | 1379 | ||
1295 | for_each_sp(pages, sp, parents, i) { | 1380 | for_each_sp(pages, sp, parents, i) { |
1296 | kvm_sync_page(vcpu, sp); | 1381 | kvm_sync_page(vcpu, sp, &invalid_list); |
1297 | mmu_pages_clear_parents(&parents); | 1382 | mmu_pages_clear_parents(&parents); |
1298 | } | 1383 | } |
1384 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | ||
1299 | cond_resched_lock(&vcpu->kvm->mmu_lock); | 1385 | cond_resched_lock(&vcpu->kvm->mmu_lock); |
1300 | kvm_mmu_pages_init(parent, &parents, &pages); | 1386 | kvm_mmu_pages_init(parent, &parents, &pages); |
1301 | } | 1387 | } |
@@ -1310,11 +1396,10 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
1310 | u64 *parent_pte) | 1396 | u64 *parent_pte) |
1311 | { | 1397 | { |
1312 | union kvm_mmu_page_role role; | 1398 | union kvm_mmu_page_role role; |
1313 | unsigned index; | ||
1314 | unsigned quadrant; | 1399 | unsigned quadrant; |
1315 | struct hlist_head *bucket; | ||
1316 | struct kvm_mmu_page *sp; | 1400 | struct kvm_mmu_page *sp; |
1317 | struct hlist_node *node, *tmp; | 1401 | struct hlist_node *node; |
1402 | bool need_sync = false; | ||
1318 | 1403 | ||
1319 | role = vcpu->arch.mmu.base_role; | 1404 | role = vcpu->arch.mmu.base_role; |
1320 | role.level = level; | 1405 | role.level = level; |
@@ -1322,40 +1407,45 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
1322 | if (role.direct) | 1407 | if (role.direct) |
1323 | role.cr4_pae = 0; | 1408 | role.cr4_pae = 0; |
1324 | role.access = access; | 1409 | role.access = access; |
1325 | if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { | 1410 | if (!tdp_enabled && vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { |
1326 | quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); | 1411 | quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); |
1327 | quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; | 1412 | quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; |
1328 | role.quadrant = quadrant; | 1413 | role.quadrant = quadrant; |
1329 | } | 1414 | } |
1330 | index = kvm_page_table_hashfn(gfn); | 1415 | for_each_gfn_sp(vcpu->kvm, sp, gfn, node) { |
1331 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | 1416 | if (!need_sync && sp->unsync) |
1332 | hlist_for_each_entry_safe(sp, node, tmp, bucket, hash_link) | 1417 | need_sync = true; |
1333 | if (sp->gfn == gfn) { | ||
1334 | if (sp->unsync) | ||
1335 | if (kvm_sync_page(vcpu, sp)) | ||
1336 | continue; | ||
1337 | 1418 | ||
1338 | if (sp->role.word != role.word) | 1419 | if (sp->role.word != role.word) |
1339 | continue; | 1420 | continue; |
1340 | 1421 | ||
1341 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); | 1422 | if (sp->unsync && kvm_sync_page_transient(vcpu, sp)) |
1342 | if (sp->unsync_children) { | 1423 | break; |
1343 | set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); | 1424 | |
1344 | kvm_mmu_mark_parents_unsync(sp); | 1425 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); |
1345 | } | 1426 | if (sp->unsync_children) { |
1346 | trace_kvm_mmu_get_page(sp, false); | 1427 | kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); |
1347 | return sp; | 1428 | kvm_mmu_mark_parents_unsync(sp); |
1348 | } | 1429 | } else if (sp->unsync) |
1430 | kvm_mmu_mark_parents_unsync(sp); | ||
1431 | |||
1432 | trace_kvm_mmu_get_page(sp, false); | ||
1433 | return sp; | ||
1434 | } | ||
1349 | ++vcpu->kvm->stat.mmu_cache_miss; | 1435 | ++vcpu->kvm->stat.mmu_cache_miss; |
1350 | sp = kvm_mmu_alloc_page(vcpu, parent_pte); | 1436 | sp = kvm_mmu_alloc_page(vcpu, parent_pte, direct); |
1351 | if (!sp) | 1437 | if (!sp) |
1352 | return sp; | 1438 | return sp; |
1353 | sp->gfn = gfn; | 1439 | sp->gfn = gfn; |
1354 | sp->role = role; | 1440 | sp->role = role; |
1355 | hlist_add_head(&sp->hash_link, bucket); | 1441 | hlist_add_head(&sp->hash_link, |
1442 | &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]); | ||
1356 | if (!direct) { | 1443 | if (!direct) { |
1357 | if (rmap_write_protect(vcpu->kvm, gfn)) | 1444 | if (rmap_write_protect(vcpu->kvm, gfn)) |
1358 | kvm_flush_remote_tlbs(vcpu->kvm); | 1445 | kvm_flush_remote_tlbs(vcpu->kvm); |
1446 | if (level > PT_PAGE_TABLE_LEVEL && need_sync) | ||
1447 | kvm_sync_pages(vcpu, gfn); | ||
1448 | |||
1359 | account_shadowed(vcpu->kvm, gfn); | 1449 | account_shadowed(vcpu->kvm, gfn); |
1360 | } | 1450 | } |
1361 | if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) | 1451 | if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) |
@@ -1402,6 +1492,47 @@ static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator) | |||
1402 | --iterator->level; | 1492 | --iterator->level; |
1403 | } | 1493 | } |
1404 | 1494 | ||
1495 | static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp) | ||
1496 | { | ||
1497 | u64 spte; | ||
1498 | |||
1499 | spte = __pa(sp->spt) | ||
1500 | | PT_PRESENT_MASK | PT_ACCESSED_MASK | ||
1501 | | PT_WRITABLE_MASK | PT_USER_MASK; | ||
1502 | __set_spte(sptep, spte); | ||
1503 | } | ||
1504 | |||
1505 | static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) | ||
1506 | { | ||
1507 | if (is_large_pte(*sptep)) { | ||
1508 | drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte); | ||
1509 | kvm_flush_remote_tlbs(vcpu->kvm); | ||
1510 | } | ||
1511 | } | ||
1512 | |||
1513 | static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, | ||
1514 | unsigned direct_access) | ||
1515 | { | ||
1516 | if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) { | ||
1517 | struct kvm_mmu_page *child; | ||
1518 | |||
1519 | /* | ||
1520 | * For the direct sp, if the guest pte's dirty bit | ||
1521 | * changed form clean to dirty, it will corrupt the | ||
1522 | * sp's access: allow writable in the read-only sp, | ||
1523 | * so we should update the spte at this point to get | ||
1524 | * a new sp with the correct access. | ||
1525 | */ | ||
1526 | child = page_header(*sptep & PT64_BASE_ADDR_MASK); | ||
1527 | if (child->role.access == direct_access) | ||
1528 | return; | ||
1529 | |||
1530 | mmu_page_remove_parent_pte(child, sptep); | ||
1531 | __set_spte(sptep, shadow_trap_nonpresent_pte); | ||
1532 | kvm_flush_remote_tlbs(vcpu->kvm); | ||
1533 | } | ||
1534 | } | ||
1535 | |||
1405 | static void kvm_mmu_page_unlink_children(struct kvm *kvm, | 1536 | static void kvm_mmu_page_unlink_children(struct kvm *kvm, |
1406 | struct kvm_mmu_page *sp) | 1537 | struct kvm_mmu_page *sp) |
1407 | { | 1538 | { |
@@ -1422,7 +1553,8 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm, | |||
1422 | } else { | 1553 | } else { |
1423 | if (is_large_pte(ent)) | 1554 | if (is_large_pte(ent)) |
1424 | --kvm->stat.lpages; | 1555 | --kvm->stat.lpages; |
1425 | rmap_remove(kvm, &pt[i]); | 1556 | drop_spte(kvm, &pt[i], |
1557 | shadow_trap_nonpresent_pte); | ||
1426 | } | 1558 | } |
1427 | } | 1559 | } |
1428 | pt[i] = shadow_trap_nonpresent_pte; | 1560 | pt[i] = shadow_trap_nonpresent_pte; |
@@ -1464,7 +1596,8 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
1464 | } | 1596 | } |
1465 | 1597 | ||
1466 | static int mmu_zap_unsync_children(struct kvm *kvm, | 1598 | static int mmu_zap_unsync_children(struct kvm *kvm, |
1467 | struct kvm_mmu_page *parent) | 1599 | struct kvm_mmu_page *parent, |
1600 | struct list_head *invalid_list) | ||
1468 | { | 1601 | { |
1469 | int i, zapped = 0; | 1602 | int i, zapped = 0; |
1470 | struct mmu_page_path parents; | 1603 | struct mmu_page_path parents; |
@@ -1478,7 +1611,7 @@ static int mmu_zap_unsync_children(struct kvm *kvm, | |||
1478 | struct kvm_mmu_page *sp; | 1611 | struct kvm_mmu_page *sp; |
1479 | 1612 | ||
1480 | for_each_sp(pages, sp, parents, i) { | 1613 | for_each_sp(pages, sp, parents, i) { |
1481 | kvm_mmu_zap_page(kvm, sp); | 1614 | kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); |
1482 | mmu_pages_clear_parents(&parents); | 1615 | mmu_pages_clear_parents(&parents); |
1483 | zapped++; | 1616 | zapped++; |
1484 | } | 1617 | } |
@@ -1488,32 +1621,52 @@ static int mmu_zap_unsync_children(struct kvm *kvm, | |||
1488 | return zapped; | 1621 | return zapped; |
1489 | } | 1622 | } |
1490 | 1623 | ||
1491 | static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) | 1624 | static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, |
1625 | struct list_head *invalid_list) | ||
1492 | { | 1626 | { |
1493 | int ret; | 1627 | int ret; |
1494 | 1628 | ||
1495 | trace_kvm_mmu_zap_page(sp); | 1629 | trace_kvm_mmu_prepare_zap_page(sp); |
1496 | ++kvm->stat.mmu_shadow_zapped; | 1630 | ++kvm->stat.mmu_shadow_zapped; |
1497 | ret = mmu_zap_unsync_children(kvm, sp); | 1631 | ret = mmu_zap_unsync_children(kvm, sp, invalid_list); |
1498 | kvm_mmu_page_unlink_children(kvm, sp); | 1632 | kvm_mmu_page_unlink_children(kvm, sp); |
1499 | kvm_mmu_unlink_parents(kvm, sp); | 1633 | kvm_mmu_unlink_parents(kvm, sp); |
1500 | kvm_flush_remote_tlbs(kvm); | ||
1501 | if (!sp->role.invalid && !sp->role.direct) | 1634 | if (!sp->role.invalid && !sp->role.direct) |
1502 | unaccount_shadowed(kvm, sp->gfn); | 1635 | unaccount_shadowed(kvm, sp->gfn); |
1503 | if (sp->unsync) | 1636 | if (sp->unsync) |
1504 | kvm_unlink_unsync_page(kvm, sp); | 1637 | kvm_unlink_unsync_page(kvm, sp); |
1505 | if (!sp->root_count) { | 1638 | if (!sp->root_count) { |
1506 | hlist_del(&sp->hash_link); | 1639 | /* Count self */ |
1507 | kvm_mmu_free_page(kvm, sp); | 1640 | ret++; |
1641 | list_move(&sp->link, invalid_list); | ||
1508 | } else { | 1642 | } else { |
1509 | sp->role.invalid = 1; | ||
1510 | list_move(&sp->link, &kvm->arch.active_mmu_pages); | 1643 | list_move(&sp->link, &kvm->arch.active_mmu_pages); |
1511 | kvm_reload_remote_mmus(kvm); | 1644 | kvm_reload_remote_mmus(kvm); |
1512 | } | 1645 | } |
1646 | |||
1647 | sp->role.invalid = 1; | ||
1513 | kvm_mmu_reset_last_pte_updated(kvm); | 1648 | kvm_mmu_reset_last_pte_updated(kvm); |
1514 | return ret; | 1649 | return ret; |
1515 | } | 1650 | } |
1516 | 1651 | ||
1652 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, | ||
1653 | struct list_head *invalid_list) | ||
1654 | { | ||
1655 | struct kvm_mmu_page *sp; | ||
1656 | |||
1657 | if (list_empty(invalid_list)) | ||
1658 | return; | ||
1659 | |||
1660 | kvm_flush_remote_tlbs(kvm); | ||
1661 | |||
1662 | do { | ||
1663 | sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); | ||
1664 | WARN_ON(!sp->role.invalid || sp->root_count); | ||
1665 | kvm_mmu_free_page(kvm, sp); | ||
1666 | } while (!list_empty(invalid_list)); | ||
1667 | |||
1668 | } | ||
1669 | |||
1517 | /* | 1670 | /* |
1518 | * Changing the number of mmu pages allocated to the vm | 1671 | * Changing the number of mmu pages allocated to the vm |
1519 | * Note: if kvm_nr_mmu_pages is too small, you will get dead lock | 1672 | * Note: if kvm_nr_mmu_pages is too small, you will get dead lock |
@@ -1521,6 +1674,7 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
1521 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) | 1674 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) |
1522 | { | 1675 | { |
1523 | int used_pages; | 1676 | int used_pages; |
1677 | LIST_HEAD(invalid_list); | ||
1524 | 1678 | ||
1525 | used_pages = kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages; | 1679 | used_pages = kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages; |
1526 | used_pages = max(0, used_pages); | 1680 | used_pages = max(0, used_pages); |
@@ -1538,9 +1692,10 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) | |||
1538 | 1692 | ||
1539 | page = container_of(kvm->arch.active_mmu_pages.prev, | 1693 | page = container_of(kvm->arch.active_mmu_pages.prev, |
1540 | struct kvm_mmu_page, link); | 1694 | struct kvm_mmu_page, link); |
1541 | used_pages -= kvm_mmu_zap_page(kvm, page); | 1695 | used_pages -= kvm_mmu_prepare_zap_page(kvm, page, |
1542 | used_pages--; | 1696 | &invalid_list); |
1543 | } | 1697 | } |
1698 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | ||
1544 | kvm_nr_mmu_pages = used_pages; | 1699 | kvm_nr_mmu_pages = used_pages; |
1545 | kvm->arch.n_free_mmu_pages = 0; | 1700 | kvm->arch.n_free_mmu_pages = 0; |
1546 | } | 1701 | } |
@@ -1553,47 +1708,36 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) | |||
1553 | 1708 | ||
1554 | static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | 1709 | static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) |
1555 | { | 1710 | { |
1556 | unsigned index; | ||
1557 | struct hlist_head *bucket; | ||
1558 | struct kvm_mmu_page *sp; | 1711 | struct kvm_mmu_page *sp; |
1559 | struct hlist_node *node, *n; | 1712 | struct hlist_node *node; |
1713 | LIST_HEAD(invalid_list); | ||
1560 | int r; | 1714 | int r; |
1561 | 1715 | ||
1562 | pgprintk("%s: looking for gfn %lx\n", __func__, gfn); | 1716 | pgprintk("%s: looking for gfn %lx\n", __func__, gfn); |
1563 | r = 0; | 1717 | r = 0; |
1564 | index = kvm_page_table_hashfn(gfn); | 1718 | |
1565 | bucket = &kvm->arch.mmu_page_hash[index]; | 1719 | for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { |
1566 | restart: | 1720 | pgprintk("%s: gfn %lx role %x\n", __func__, gfn, |
1567 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) | 1721 | sp->role.word); |
1568 | if (sp->gfn == gfn && !sp->role.direct) { | 1722 | r = 1; |
1569 | pgprintk("%s: gfn %lx role %x\n", __func__, gfn, | 1723 | kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); |
1570 | sp->role.word); | 1724 | } |
1571 | r = 1; | 1725 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
1572 | if (kvm_mmu_zap_page(kvm, sp)) | ||
1573 | goto restart; | ||
1574 | } | ||
1575 | return r; | 1726 | return r; |
1576 | } | 1727 | } |
1577 | 1728 | ||
1578 | static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) | 1729 | static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) |
1579 | { | 1730 | { |
1580 | unsigned index; | ||
1581 | struct hlist_head *bucket; | ||
1582 | struct kvm_mmu_page *sp; | 1731 | struct kvm_mmu_page *sp; |
1583 | struct hlist_node *node, *nn; | 1732 | struct hlist_node *node; |
1733 | LIST_HEAD(invalid_list); | ||
1584 | 1734 | ||
1585 | index = kvm_page_table_hashfn(gfn); | 1735 | for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { |
1586 | bucket = &kvm->arch.mmu_page_hash[index]; | 1736 | pgprintk("%s: zap %lx %x\n", |
1587 | restart: | 1737 | __func__, gfn, sp->role.word); |
1588 | hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) { | 1738 | kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); |
1589 | if (sp->gfn == gfn && !sp->role.direct | ||
1590 | && !sp->role.invalid) { | ||
1591 | pgprintk("%s: zap %lx %x\n", | ||
1592 | __func__, gfn, sp->role.word); | ||
1593 | if (kvm_mmu_zap_page(kvm, sp)) | ||
1594 | goto restart; | ||
1595 | } | ||
1596 | } | 1739 | } |
1740 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | ||
1597 | } | 1741 | } |
1598 | 1742 | ||
1599 | static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) | 1743 | static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) |
@@ -1723,47 +1867,51 @@ u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) | |||
1723 | } | 1867 | } |
1724 | EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type); | 1868 | EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type); |
1725 | 1869 | ||
1726 | static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 1870 | static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) |
1727 | { | 1871 | { |
1728 | unsigned index; | ||
1729 | struct hlist_head *bucket; | ||
1730 | struct kvm_mmu_page *s; | ||
1731 | struct hlist_node *node, *n; | ||
1732 | |||
1733 | index = kvm_page_table_hashfn(sp->gfn); | ||
1734 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | ||
1735 | /* don't unsync if pagetable is shadowed with multiple roles */ | ||
1736 | hlist_for_each_entry_safe(s, node, n, bucket, hash_link) { | ||
1737 | if (s->gfn != sp->gfn || s->role.direct) | ||
1738 | continue; | ||
1739 | if (s->role.word != sp->role.word) | ||
1740 | return 1; | ||
1741 | } | ||
1742 | trace_kvm_mmu_unsync_page(sp); | 1872 | trace_kvm_mmu_unsync_page(sp); |
1743 | ++vcpu->kvm->stat.mmu_unsync; | 1873 | ++vcpu->kvm->stat.mmu_unsync; |
1744 | sp->unsync = 1; | 1874 | sp->unsync = 1; |
1745 | 1875 | ||
1746 | kvm_mmu_mark_parents_unsync(sp); | 1876 | kvm_mmu_mark_parents_unsync(sp); |
1747 | |||
1748 | mmu_convert_notrap(sp); | 1877 | mmu_convert_notrap(sp); |
1749 | return 0; | 1878 | } |
1879 | |||
1880 | static void kvm_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
1881 | { | ||
1882 | struct kvm_mmu_page *s; | ||
1883 | struct hlist_node *node; | ||
1884 | |||
1885 | for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { | ||
1886 | if (s->unsync) | ||
1887 | continue; | ||
1888 | WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); | ||
1889 | __kvm_unsync_page(vcpu, s); | ||
1890 | } | ||
1750 | } | 1891 | } |
1751 | 1892 | ||
1752 | static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, | 1893 | static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, |
1753 | bool can_unsync) | 1894 | bool can_unsync) |
1754 | { | 1895 | { |
1755 | struct kvm_mmu_page *shadow; | 1896 | struct kvm_mmu_page *s; |
1897 | struct hlist_node *node; | ||
1898 | bool need_unsync = false; | ||
1756 | 1899 | ||
1757 | shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn); | 1900 | for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { |
1758 | if (shadow) { | 1901 | if (!can_unsync) |
1759 | if (shadow->role.level != PT_PAGE_TABLE_LEVEL) | ||
1760 | return 1; | 1902 | return 1; |
1761 | if (shadow->unsync) | 1903 | |
1762 | return 0; | 1904 | if (s->role.level != PT_PAGE_TABLE_LEVEL) |
1763 | if (can_unsync && oos_shadow) | 1905 | return 1; |
1764 | return kvm_unsync_page(vcpu, shadow); | 1906 | |
1765 | return 1; | 1907 | if (!need_unsync && !s->unsync) { |
1908 | if (!oos_shadow) | ||
1909 | return 1; | ||
1910 | need_unsync = true; | ||
1911 | } | ||
1766 | } | 1912 | } |
1913 | if (need_unsync) | ||
1914 | kvm_unsync_pages(vcpu, gfn); | ||
1767 | return 0; | 1915 | return 0; |
1768 | } | 1916 | } |
1769 | 1917 | ||
@@ -1804,13 +1952,14 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1804 | spte |= (u64)pfn << PAGE_SHIFT; | 1952 | spte |= (u64)pfn << PAGE_SHIFT; |
1805 | 1953 | ||
1806 | if ((pte_access & ACC_WRITE_MASK) | 1954 | if ((pte_access & ACC_WRITE_MASK) |
1807 | || (write_fault && !is_write_protection(vcpu) && !user_fault)) { | 1955 | || (!tdp_enabled && write_fault && !is_write_protection(vcpu) |
1956 | && !user_fault)) { | ||
1808 | 1957 | ||
1809 | if (level > PT_PAGE_TABLE_LEVEL && | 1958 | if (level > PT_PAGE_TABLE_LEVEL && |
1810 | has_wrprotected_page(vcpu->kvm, gfn, level)) { | 1959 | has_wrprotected_page(vcpu->kvm, gfn, level)) { |
1811 | ret = 1; | 1960 | ret = 1; |
1812 | spte = shadow_trap_nonpresent_pte; | 1961 | drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte); |
1813 | goto set_pte; | 1962 | goto done; |
1814 | } | 1963 | } |
1815 | 1964 | ||
1816 | spte |= PT_WRITABLE_MASK; | 1965 | spte |= PT_WRITABLE_MASK; |
@@ -1841,7 +1990,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1841 | mark_page_dirty(vcpu->kvm, gfn); | 1990 | mark_page_dirty(vcpu->kvm, gfn); |
1842 | 1991 | ||
1843 | set_pte: | 1992 | set_pte: |
1844 | __set_spte(sptep, spte); | 1993 | if (is_writable_pte(*sptep) && !is_writable_pte(spte)) |
1994 | kvm_set_pfn_dirty(pfn); | ||
1995 | update_spte(sptep, spte); | ||
1996 | done: | ||
1845 | return ret; | 1997 | return ret; |
1846 | } | 1998 | } |
1847 | 1999 | ||
@@ -1853,7 +2005,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1853 | bool reset_host_protection) | 2005 | bool reset_host_protection) |
1854 | { | 2006 | { |
1855 | int was_rmapped = 0; | 2007 | int was_rmapped = 0; |
1856 | int was_writable = is_writable_pte(*sptep); | ||
1857 | int rmap_count; | 2008 | int rmap_count; |
1858 | 2009 | ||
1859 | pgprintk("%s: spte %llx access %x write_fault %d" | 2010 | pgprintk("%s: spte %llx access %x write_fault %d" |
@@ -1878,8 +2029,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1878 | } else if (pfn != spte_to_pfn(*sptep)) { | 2029 | } else if (pfn != spte_to_pfn(*sptep)) { |
1879 | pgprintk("hfn old %lx new %lx\n", | 2030 | pgprintk("hfn old %lx new %lx\n", |
1880 | spte_to_pfn(*sptep), pfn); | 2031 | spte_to_pfn(*sptep), pfn); |
1881 | rmap_remove(vcpu->kvm, sptep); | 2032 | drop_spte(vcpu->kvm, sptep, shadow_trap_nonpresent_pte); |
1882 | __set_spte(sptep, shadow_trap_nonpresent_pte); | ||
1883 | kvm_flush_remote_tlbs(vcpu->kvm); | 2033 | kvm_flush_remote_tlbs(vcpu->kvm); |
1884 | } else | 2034 | } else |
1885 | was_rmapped = 1; | 2035 | was_rmapped = 1; |
@@ -1890,7 +2040,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1890 | reset_host_protection)) { | 2040 | reset_host_protection)) { |
1891 | if (write_fault) | 2041 | if (write_fault) |
1892 | *ptwrite = 1; | 2042 | *ptwrite = 1; |
1893 | kvm_x86_ops->tlb_flush(vcpu); | 2043 | kvm_mmu_flush_tlb(vcpu); |
1894 | } | 2044 | } |
1895 | 2045 | ||
1896 | pgprintk("%s: setting spte %llx\n", __func__, *sptep); | 2046 | pgprintk("%s: setting spte %llx\n", __func__, *sptep); |
@@ -1904,15 +2054,10 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1904 | page_header_update_slot(vcpu->kvm, sptep, gfn); | 2054 | page_header_update_slot(vcpu->kvm, sptep, gfn); |
1905 | if (!was_rmapped) { | 2055 | if (!was_rmapped) { |
1906 | rmap_count = rmap_add(vcpu, sptep, gfn); | 2056 | rmap_count = rmap_add(vcpu, sptep, gfn); |
1907 | kvm_release_pfn_clean(pfn); | ||
1908 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) | 2057 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) |
1909 | rmap_recycle(vcpu, sptep, gfn); | 2058 | rmap_recycle(vcpu, sptep, gfn); |
1910 | } else { | ||
1911 | if (was_writable) | ||
1912 | kvm_release_pfn_dirty(pfn); | ||
1913 | else | ||
1914 | kvm_release_pfn_clean(pfn); | ||
1915 | } | 2059 | } |
2060 | kvm_release_pfn_clean(pfn); | ||
1916 | if (speculative) { | 2061 | if (speculative) { |
1917 | vcpu->arch.last_pte_updated = sptep; | 2062 | vcpu->arch.last_pte_updated = sptep; |
1918 | vcpu->arch.last_pte_gfn = gfn; | 2063 | vcpu->arch.last_pte_gfn = gfn; |
@@ -1941,7 +2086,10 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
1941 | } | 2086 | } |
1942 | 2087 | ||
1943 | if (*iterator.sptep == shadow_trap_nonpresent_pte) { | 2088 | if (*iterator.sptep == shadow_trap_nonpresent_pte) { |
1944 | pseudo_gfn = (iterator.addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT; | 2089 | u64 base_addr = iterator.addr; |
2090 | |||
2091 | base_addr &= PT64_LVL_ADDR_MASK(iterator.level); | ||
2092 | pseudo_gfn = base_addr >> PAGE_SHIFT; | ||
1945 | sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr, | 2093 | sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr, |
1946 | iterator.level - 1, | 2094 | iterator.level - 1, |
1947 | 1, ACC_ALL, iterator.sptep); | 2095 | 1, ACC_ALL, iterator.sptep); |
@@ -1960,6 +2108,29 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
1960 | return pt_write; | 2108 | return pt_write; |
1961 | } | 2109 | } |
1962 | 2110 | ||
2111 | static void kvm_send_hwpoison_signal(struct kvm *kvm, gfn_t gfn) | ||
2112 | { | ||
2113 | char buf[1]; | ||
2114 | void __user *hva; | ||
2115 | int r; | ||
2116 | |||
2117 | /* Touch the page, so send SIGBUS */ | ||
2118 | hva = (void __user *)gfn_to_hva(kvm, gfn); | ||
2119 | r = copy_from_user(buf, hva, 1); | ||
2120 | } | ||
2121 | |||
2122 | static int kvm_handle_bad_page(struct kvm *kvm, gfn_t gfn, pfn_t pfn) | ||
2123 | { | ||
2124 | kvm_release_pfn_clean(pfn); | ||
2125 | if (is_hwpoison_pfn(pfn)) { | ||
2126 | kvm_send_hwpoison_signal(kvm, gfn); | ||
2127 | return 0; | ||
2128 | } else if (is_fault_pfn(pfn)) | ||
2129 | return -EFAULT; | ||
2130 | |||
2131 | return 1; | ||
2132 | } | ||
2133 | |||
1963 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | 2134 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) |
1964 | { | 2135 | { |
1965 | int r; | 2136 | int r; |
@@ -1983,10 +2154,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
1983 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 2154 | pfn = gfn_to_pfn(vcpu->kvm, gfn); |
1984 | 2155 | ||
1985 | /* mmio */ | 2156 | /* mmio */ |
1986 | if (is_error_pfn(pfn)) { | 2157 | if (is_error_pfn(pfn)) |
1987 | kvm_release_pfn_clean(pfn); | 2158 | return kvm_handle_bad_page(vcpu->kvm, gfn, pfn); |
1988 | return 1; | ||
1989 | } | ||
1990 | 2159 | ||
1991 | spin_lock(&vcpu->kvm->mmu_lock); | 2160 | spin_lock(&vcpu->kvm->mmu_lock); |
1992 | if (mmu_notifier_retry(vcpu, mmu_seq)) | 2161 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
@@ -2009,6 +2178,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
2009 | { | 2178 | { |
2010 | int i; | 2179 | int i; |
2011 | struct kvm_mmu_page *sp; | 2180 | struct kvm_mmu_page *sp; |
2181 | LIST_HEAD(invalid_list); | ||
2012 | 2182 | ||
2013 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | 2183 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) |
2014 | return; | 2184 | return; |
@@ -2018,8 +2188,10 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
2018 | 2188 | ||
2019 | sp = page_header(root); | 2189 | sp = page_header(root); |
2020 | --sp->root_count; | 2190 | --sp->root_count; |
2021 | if (!sp->root_count && sp->role.invalid) | 2191 | if (!sp->root_count && sp->role.invalid) { |
2022 | kvm_mmu_zap_page(vcpu->kvm, sp); | 2192 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); |
2193 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | ||
2194 | } | ||
2023 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; | 2195 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; |
2024 | spin_unlock(&vcpu->kvm->mmu_lock); | 2196 | spin_unlock(&vcpu->kvm->mmu_lock); |
2025 | return; | 2197 | return; |
@@ -2032,10 +2204,12 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
2032 | sp = page_header(root); | 2204 | sp = page_header(root); |
2033 | --sp->root_count; | 2205 | --sp->root_count; |
2034 | if (!sp->root_count && sp->role.invalid) | 2206 | if (!sp->root_count && sp->role.invalid) |
2035 | kvm_mmu_zap_page(vcpu->kvm, sp); | 2207 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, |
2208 | &invalid_list); | ||
2036 | } | 2209 | } |
2037 | vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; | 2210 | vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; |
2038 | } | 2211 | } |
2212 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | ||
2039 | spin_unlock(&vcpu->kvm->mmu_lock); | 2213 | spin_unlock(&vcpu->kvm->mmu_lock); |
2040 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; | 2214 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; |
2041 | } | 2215 | } |
@@ -2045,7 +2219,7 @@ static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn) | |||
2045 | int ret = 0; | 2219 | int ret = 0; |
2046 | 2220 | ||
2047 | if (!kvm_is_visible_gfn(vcpu->kvm, root_gfn)) { | 2221 | if (!kvm_is_visible_gfn(vcpu->kvm, root_gfn)) { |
2048 | set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); | 2222 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); |
2049 | ret = 1; | 2223 | ret = 1; |
2050 | } | 2224 | } |
2051 | 2225 | ||
@@ -2073,6 +2247,7 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
2073 | root_gfn = 0; | 2247 | root_gfn = 0; |
2074 | } | 2248 | } |
2075 | spin_lock(&vcpu->kvm->mmu_lock); | 2249 | spin_lock(&vcpu->kvm->mmu_lock); |
2250 | kvm_mmu_free_some_pages(vcpu); | ||
2076 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, | 2251 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, |
2077 | PT64_ROOT_LEVEL, direct, | 2252 | PT64_ROOT_LEVEL, direct, |
2078 | ACC_ALL, NULL); | 2253 | ACC_ALL, NULL); |
@@ -2103,6 +2278,7 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
2103 | root_gfn = i << 30; | 2278 | root_gfn = i << 30; |
2104 | } | 2279 | } |
2105 | spin_lock(&vcpu->kvm->mmu_lock); | 2280 | spin_lock(&vcpu->kvm->mmu_lock); |
2281 | kvm_mmu_free_some_pages(vcpu); | ||
2106 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, | 2282 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, |
2107 | PT32_ROOT_LEVEL, direct, | 2283 | PT32_ROOT_LEVEL, direct, |
2108 | ACC_ALL, NULL); | 2284 | ACC_ALL, NULL); |
@@ -2198,10 +2374,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | |||
2198 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 2374 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
2199 | smp_rmb(); | 2375 | smp_rmb(); |
2200 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 2376 | pfn = gfn_to_pfn(vcpu->kvm, gfn); |
2201 | if (is_error_pfn(pfn)) { | 2377 | if (is_error_pfn(pfn)) |
2202 | kvm_release_pfn_clean(pfn); | 2378 | return kvm_handle_bad_page(vcpu->kvm, gfn, pfn); |
2203 | return 1; | ||
2204 | } | ||
2205 | spin_lock(&vcpu->kvm->mmu_lock); | 2379 | spin_lock(&vcpu->kvm->mmu_lock); |
2206 | if (mmu_notifier_retry(vcpu, mmu_seq)) | 2380 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
2207 | goto out_unlock; | 2381 | goto out_unlock; |
@@ -2243,7 +2417,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu) | |||
2243 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) | 2417 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) |
2244 | { | 2418 | { |
2245 | ++vcpu->stat.tlb_flush; | 2419 | ++vcpu->stat.tlb_flush; |
2246 | kvm_x86_ops->tlb_flush(vcpu); | 2420 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); |
2247 | } | 2421 | } |
2248 | 2422 | ||
2249 | static void paging_new_cr3(struct kvm_vcpu *vcpu) | 2423 | static void paging_new_cr3(struct kvm_vcpu *vcpu) |
@@ -2457,10 +2631,9 @@ static int init_kvm_mmu(struct kvm_vcpu *vcpu) | |||
2457 | static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) | 2631 | static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) |
2458 | { | 2632 | { |
2459 | ASSERT(vcpu); | 2633 | ASSERT(vcpu); |
2460 | if (VALID_PAGE(vcpu->arch.mmu.root_hpa)) { | 2634 | if (VALID_PAGE(vcpu->arch.mmu.root_hpa)) |
2635 | /* mmu.free() should set root_hpa = INVALID_PAGE */ | ||
2461 | vcpu->arch.mmu.free(vcpu); | 2636 | vcpu->arch.mmu.free(vcpu); |
2462 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; | ||
2463 | } | ||
2464 | } | 2637 | } |
2465 | 2638 | ||
2466 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) | 2639 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) |
@@ -2477,9 +2650,6 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) | |||
2477 | r = mmu_topup_memory_caches(vcpu); | 2650 | r = mmu_topup_memory_caches(vcpu); |
2478 | if (r) | 2651 | if (r) |
2479 | goto out; | 2652 | goto out; |
2480 | spin_lock(&vcpu->kvm->mmu_lock); | ||
2481 | kvm_mmu_free_some_pages(vcpu); | ||
2482 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
2483 | r = mmu_alloc_roots(vcpu); | 2653 | r = mmu_alloc_roots(vcpu); |
2484 | spin_lock(&vcpu->kvm->mmu_lock); | 2654 | spin_lock(&vcpu->kvm->mmu_lock); |
2485 | mmu_sync_roots(vcpu); | 2655 | mmu_sync_roots(vcpu); |
@@ -2508,7 +2678,7 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, | |||
2508 | pte = *spte; | 2678 | pte = *spte; |
2509 | if (is_shadow_present_pte(pte)) { | 2679 | if (is_shadow_present_pte(pte)) { |
2510 | if (is_last_spte(pte, sp->role.level)) | 2680 | if (is_last_spte(pte, sp->role.level)) |
2511 | rmap_remove(vcpu->kvm, spte); | 2681 | drop_spte(vcpu->kvm, spte, shadow_trap_nonpresent_pte); |
2512 | else { | 2682 | else { |
2513 | child = page_header(pte & PT64_BASE_ADDR_MASK); | 2683 | child = page_header(pte & PT64_BASE_ADDR_MASK); |
2514 | mmu_page_remove_parent_pte(child, spte); | 2684 | mmu_page_remove_parent_pte(child, spte); |
@@ -2529,6 +2699,9 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | |||
2529 | return; | 2699 | return; |
2530 | } | 2700 | } |
2531 | 2701 | ||
2702 | if (is_rsvd_bits_set(vcpu, *(u64 *)new, PT_PAGE_TABLE_LEVEL)) | ||
2703 | return; | ||
2704 | |||
2532 | ++vcpu->kvm->stat.mmu_pte_updated; | 2705 | ++vcpu->kvm->stat.mmu_pte_updated; |
2533 | if (!sp->role.cr4_pae) | 2706 | if (!sp->role.cr4_pae) |
2534 | paging32_update_pte(vcpu, sp, spte, new); | 2707 | paging32_update_pte(vcpu, sp, spte, new); |
@@ -2549,11 +2722,15 @@ static bool need_remote_flush(u64 old, u64 new) | |||
2549 | return (old & ~new & PT64_PERM_MASK) != 0; | 2722 | return (old & ~new & PT64_PERM_MASK) != 0; |
2550 | } | 2723 | } |
2551 | 2724 | ||
2552 | static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, u64 old, u64 new) | 2725 | static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page, |
2726 | bool remote_flush, bool local_flush) | ||
2553 | { | 2727 | { |
2554 | if (need_remote_flush(old, new)) | 2728 | if (zap_page) |
2729 | return; | ||
2730 | |||
2731 | if (remote_flush) | ||
2555 | kvm_flush_remote_tlbs(vcpu->kvm); | 2732 | kvm_flush_remote_tlbs(vcpu->kvm); |
2556 | else | 2733 | else if (local_flush) |
2557 | kvm_mmu_flush_tlb(vcpu); | 2734 | kvm_mmu_flush_tlb(vcpu); |
2558 | } | 2735 | } |
2559 | 2736 | ||
@@ -2603,10 +2780,10 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
2603 | bool guest_initiated) | 2780 | bool guest_initiated) |
2604 | { | 2781 | { |
2605 | gfn_t gfn = gpa >> PAGE_SHIFT; | 2782 | gfn_t gfn = gpa >> PAGE_SHIFT; |
2783 | union kvm_mmu_page_role mask = { .word = 0 }; | ||
2606 | struct kvm_mmu_page *sp; | 2784 | struct kvm_mmu_page *sp; |
2607 | struct hlist_node *node, *n; | 2785 | struct hlist_node *node; |
2608 | struct hlist_head *bucket; | 2786 | LIST_HEAD(invalid_list); |
2609 | unsigned index; | ||
2610 | u64 entry, gentry; | 2787 | u64 entry, gentry; |
2611 | u64 *spte; | 2788 | u64 *spte; |
2612 | unsigned offset = offset_in_page(gpa); | 2789 | unsigned offset = offset_in_page(gpa); |
@@ -2619,6 +2796,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
2619 | int npte; | 2796 | int npte; |
2620 | int r; | 2797 | int r; |
2621 | int invlpg_counter; | 2798 | int invlpg_counter; |
2799 | bool remote_flush, local_flush, zap_page; | ||
2800 | |||
2801 | zap_page = remote_flush = local_flush = false; | ||
2622 | 2802 | ||
2623 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); | 2803 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); |
2624 | 2804 | ||
@@ -2674,13 +2854,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
2674 | vcpu->arch.last_pte_updated = NULL; | 2854 | vcpu->arch.last_pte_updated = NULL; |
2675 | } | 2855 | } |
2676 | } | 2856 | } |
2677 | index = kvm_page_table_hashfn(gfn); | ||
2678 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | ||
2679 | 2857 | ||
2680 | restart: | 2858 | mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; |
2681 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { | 2859 | for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { |
2682 | if (sp->gfn != gfn || sp->role.direct || sp->role.invalid) | ||
2683 | continue; | ||
2684 | pte_size = sp->role.cr4_pae ? 8 : 4; | 2860 | pte_size = sp->role.cr4_pae ? 8 : 4; |
2685 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); | 2861 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); |
2686 | misaligned |= bytes < 4; | 2862 | misaligned |= bytes < 4; |
@@ -2697,8 +2873,8 @@ restart: | |||
2697 | */ | 2873 | */ |
2698 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", | 2874 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", |
2699 | gpa, bytes, sp->role.word); | 2875 | gpa, bytes, sp->role.word); |
2700 | if (kvm_mmu_zap_page(vcpu->kvm, sp)) | 2876 | zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, |
2701 | goto restart; | 2877 | &invalid_list); |
2702 | ++vcpu->kvm->stat.mmu_flooded; | 2878 | ++vcpu->kvm->stat.mmu_flooded; |
2703 | continue; | 2879 | continue; |
2704 | } | 2880 | } |
@@ -2722,16 +2898,22 @@ restart: | |||
2722 | if (quadrant != sp->role.quadrant) | 2898 | if (quadrant != sp->role.quadrant) |
2723 | continue; | 2899 | continue; |
2724 | } | 2900 | } |
2901 | local_flush = true; | ||
2725 | spte = &sp->spt[page_offset / sizeof(*spte)]; | 2902 | spte = &sp->spt[page_offset / sizeof(*spte)]; |
2726 | while (npte--) { | 2903 | while (npte--) { |
2727 | entry = *spte; | 2904 | entry = *spte; |
2728 | mmu_pte_write_zap_pte(vcpu, sp, spte); | 2905 | mmu_pte_write_zap_pte(vcpu, sp, spte); |
2729 | if (gentry) | 2906 | if (gentry && |
2907 | !((sp->role.word ^ vcpu->arch.mmu.base_role.word) | ||
2908 | & mask.word)) | ||
2730 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); | 2909 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); |
2731 | mmu_pte_write_flush_tlb(vcpu, entry, *spte); | 2910 | if (!remote_flush && need_remote_flush(entry, *spte)) |
2911 | remote_flush = true; | ||
2732 | ++spte; | 2912 | ++spte; |
2733 | } | 2913 | } |
2734 | } | 2914 | } |
2915 | mmu_pte_write_flush_tlb(vcpu, zap_page, remote_flush, local_flush); | ||
2916 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | ||
2735 | kvm_mmu_audit(vcpu, "post pte write"); | 2917 | kvm_mmu_audit(vcpu, "post pte write"); |
2736 | spin_unlock(&vcpu->kvm->mmu_lock); | 2918 | spin_unlock(&vcpu->kvm->mmu_lock); |
2737 | if (!is_error_pfn(vcpu->arch.update_pte.pfn)) { | 2919 | if (!is_error_pfn(vcpu->arch.update_pte.pfn)) { |
@@ -2759,15 +2941,21 @@ EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); | |||
2759 | 2941 | ||
2760 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | 2942 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) |
2761 | { | 2943 | { |
2762 | while (vcpu->kvm->arch.n_free_mmu_pages < KVM_REFILL_PAGES && | 2944 | int free_pages; |
2945 | LIST_HEAD(invalid_list); | ||
2946 | |||
2947 | free_pages = vcpu->kvm->arch.n_free_mmu_pages; | ||
2948 | while (free_pages < KVM_REFILL_PAGES && | ||
2763 | !list_empty(&vcpu->kvm->arch.active_mmu_pages)) { | 2949 | !list_empty(&vcpu->kvm->arch.active_mmu_pages)) { |
2764 | struct kvm_mmu_page *sp; | 2950 | struct kvm_mmu_page *sp; |
2765 | 2951 | ||
2766 | sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev, | 2952 | sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev, |
2767 | struct kvm_mmu_page, link); | 2953 | struct kvm_mmu_page, link); |
2768 | kvm_mmu_zap_page(vcpu->kvm, sp); | 2954 | free_pages += kvm_mmu_prepare_zap_page(vcpu->kvm, sp, |
2955 | &invalid_list); | ||
2769 | ++vcpu->kvm->stat.mmu_recycled; | 2956 | ++vcpu->kvm->stat.mmu_recycled; |
2770 | } | 2957 | } |
2958 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | ||
2771 | } | 2959 | } |
2772 | 2960 | ||
2773 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) | 2961 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) |
@@ -2795,11 +2983,8 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) | |||
2795 | return 1; | 2983 | return 1; |
2796 | case EMULATE_DO_MMIO: | 2984 | case EMULATE_DO_MMIO: |
2797 | ++vcpu->stat.mmio_exits; | 2985 | ++vcpu->stat.mmio_exits; |
2798 | return 0; | 2986 | /* fall through */ |
2799 | case EMULATE_FAIL: | 2987 | case EMULATE_FAIL: |
2800 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
2801 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | ||
2802 | vcpu->run->internal.ndata = 0; | ||
2803 | return 0; | 2988 | return 0; |
2804 | default: | 2989 | default: |
2805 | BUG(); | 2990 | BUG(); |
@@ -2896,7 +3081,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
2896 | pt = sp->spt; | 3081 | pt = sp->spt; |
2897 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) | 3082 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) |
2898 | /* avoid RMW */ | 3083 | /* avoid RMW */ |
2899 | if (pt[i] & PT_WRITABLE_MASK) | 3084 | if (is_writable_pte(pt[i])) |
2900 | pt[i] &= ~PT_WRITABLE_MASK; | 3085 | pt[i] &= ~PT_WRITABLE_MASK; |
2901 | } | 3086 | } |
2902 | kvm_flush_remote_tlbs(kvm); | 3087 | kvm_flush_remote_tlbs(kvm); |
@@ -2905,25 +3090,26 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
2905 | void kvm_mmu_zap_all(struct kvm *kvm) | 3090 | void kvm_mmu_zap_all(struct kvm *kvm) |
2906 | { | 3091 | { |
2907 | struct kvm_mmu_page *sp, *node; | 3092 | struct kvm_mmu_page *sp, *node; |
3093 | LIST_HEAD(invalid_list); | ||
2908 | 3094 | ||
2909 | spin_lock(&kvm->mmu_lock); | 3095 | spin_lock(&kvm->mmu_lock); |
2910 | restart: | 3096 | restart: |
2911 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) | 3097 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) |
2912 | if (kvm_mmu_zap_page(kvm, sp)) | 3098 | if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list)) |
2913 | goto restart; | 3099 | goto restart; |
2914 | 3100 | ||
3101 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | ||
2915 | spin_unlock(&kvm->mmu_lock); | 3102 | spin_unlock(&kvm->mmu_lock); |
2916 | |||
2917 | kvm_flush_remote_tlbs(kvm); | ||
2918 | } | 3103 | } |
2919 | 3104 | ||
2920 | static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm) | 3105 | static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm, |
3106 | struct list_head *invalid_list) | ||
2921 | { | 3107 | { |
2922 | struct kvm_mmu_page *page; | 3108 | struct kvm_mmu_page *page; |
2923 | 3109 | ||
2924 | page = container_of(kvm->arch.active_mmu_pages.prev, | 3110 | page = container_of(kvm->arch.active_mmu_pages.prev, |
2925 | struct kvm_mmu_page, link); | 3111 | struct kvm_mmu_page, link); |
2926 | return kvm_mmu_zap_page(kvm, page) + 1; | 3112 | return kvm_mmu_prepare_zap_page(kvm, page, invalid_list); |
2927 | } | 3113 | } |
2928 | 3114 | ||
2929 | static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) | 3115 | static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) |
@@ -2936,6 +3122,7 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) | |||
2936 | 3122 | ||
2937 | list_for_each_entry(kvm, &vm_list, vm_list) { | 3123 | list_for_each_entry(kvm, &vm_list, vm_list) { |
2938 | int npages, idx, freed_pages; | 3124 | int npages, idx, freed_pages; |
3125 | LIST_HEAD(invalid_list); | ||
2939 | 3126 | ||
2940 | idx = srcu_read_lock(&kvm->srcu); | 3127 | idx = srcu_read_lock(&kvm->srcu); |
2941 | spin_lock(&kvm->mmu_lock); | 3128 | spin_lock(&kvm->mmu_lock); |
@@ -2943,12 +3130,14 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) | |||
2943 | kvm->arch.n_free_mmu_pages; | 3130 | kvm->arch.n_free_mmu_pages; |
2944 | cache_count += npages; | 3131 | cache_count += npages; |
2945 | if (!kvm_freed && nr_to_scan > 0 && npages > 0) { | 3132 | if (!kvm_freed && nr_to_scan > 0 && npages > 0) { |
2946 | freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm); | 3133 | freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm, |
3134 | &invalid_list); | ||
2947 | cache_count -= freed_pages; | 3135 | cache_count -= freed_pages; |
2948 | kvm_freed = kvm; | 3136 | kvm_freed = kvm; |
2949 | } | 3137 | } |
2950 | nr_to_scan--; | 3138 | nr_to_scan--; |
2951 | 3139 | ||
3140 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | ||
2952 | spin_unlock(&kvm->mmu_lock); | 3141 | spin_unlock(&kvm->mmu_lock); |
2953 | srcu_read_unlock(&kvm->srcu, idx); | 3142 | srcu_read_unlock(&kvm->srcu, idx); |
2954 | } | 3143 | } |
@@ -3074,7 +3263,7 @@ static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu, | |||
3074 | 3263 | ||
3075 | static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) | 3264 | static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) |
3076 | { | 3265 | { |
3077 | kvm_set_cr3(vcpu, vcpu->arch.cr3); | 3266 | (void)kvm_set_cr3(vcpu, vcpu->arch.cr3); |
3078 | return 1; | 3267 | return 1; |
3079 | } | 3268 | } |
3080 | 3269 | ||
@@ -3331,9 +3520,9 @@ void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) | |||
3331 | struct kvm_mmu_page *rev_sp; | 3520 | struct kvm_mmu_page *rev_sp; |
3332 | gfn_t gfn; | 3521 | gfn_t gfn; |
3333 | 3522 | ||
3334 | if (*sptep & PT_WRITABLE_MASK) { | 3523 | if (is_writable_pte(*sptep)) { |
3335 | rev_sp = page_header(__pa(sptep)); | 3524 | rev_sp = page_header(__pa(sptep)); |
3336 | gfn = rev_sp->gfns[sptep - rev_sp->spt]; | 3525 | gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt); |
3337 | 3526 | ||
3338 | if (!gfn_to_memslot(kvm, gfn)) { | 3527 | if (!gfn_to_memslot(kvm, gfn)) { |
3339 | if (!printk_ratelimit()) | 3528 | if (!printk_ratelimit()) |
@@ -3347,8 +3536,7 @@ void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) | |||
3347 | return; | 3536 | return; |
3348 | } | 3537 | } |
3349 | 3538 | ||
3350 | rmapp = gfn_to_rmap(kvm, rev_sp->gfns[sptep - rev_sp->spt], | 3539 | rmapp = gfn_to_rmap(kvm, gfn, rev_sp->role.level); |
3351 | rev_sp->role.level); | ||
3352 | if (!*rmapp) { | 3540 | if (!*rmapp) { |
3353 | if (!printk_ratelimit()) | 3541 | if (!printk_ratelimit()) |
3354 | return; | 3542 | return; |
@@ -3381,7 +3569,7 @@ static void check_writable_mappings_rmap(struct kvm_vcpu *vcpu) | |||
3381 | 3569 | ||
3382 | if (!(ent & PT_PRESENT_MASK)) | 3570 | if (!(ent & PT_PRESENT_MASK)) |
3383 | continue; | 3571 | continue; |
3384 | if (!(ent & PT_WRITABLE_MASK)) | 3572 | if (!is_writable_pte(ent)) |
3385 | continue; | 3573 | continue; |
3386 | inspect_spte_has_rmap(vcpu->kvm, &pt[i]); | 3574 | inspect_spte_has_rmap(vcpu->kvm, &pt[i]); |
3387 | } | 3575 | } |
@@ -3409,13 +3597,12 @@ static void audit_write_protection(struct kvm_vcpu *vcpu) | |||
3409 | if (sp->unsync) | 3597 | if (sp->unsync) |
3410 | continue; | 3598 | continue; |
3411 | 3599 | ||
3412 | gfn = unalias_gfn(vcpu->kvm, sp->gfn); | 3600 | slot = gfn_to_memslot(vcpu->kvm, sp->gfn); |
3413 | slot = gfn_to_memslot_unaliased(vcpu->kvm, sp->gfn); | ||
3414 | rmapp = &slot->rmap[gfn - slot->base_gfn]; | 3601 | rmapp = &slot->rmap[gfn - slot->base_gfn]; |
3415 | 3602 | ||
3416 | spte = rmap_next(vcpu->kvm, rmapp, NULL); | 3603 | spte = rmap_next(vcpu->kvm, rmapp, NULL); |
3417 | while (spte) { | 3604 | while (spte) { |
3418 | if (*spte & PT_WRITABLE_MASK) | 3605 | if (is_writable_pte(*spte)) |
3419 | printk(KERN_ERR "%s: (%s) shadow page has " | 3606 | printk(KERN_ERR "%s: (%s) shadow page has " |
3420 | "writable mappings: gfn %lx role %x\n", | 3607 | "writable mappings: gfn %lx role %x\n", |
3421 | __func__, audit_msg, sp->gfn, | 3608 | __func__, audit_msg, sp->gfn, |
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 42f07b1bfbc9..3aab0f0930ef 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h | |||
@@ -190,7 +190,7 @@ DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_unsync_page, | |||
190 | TP_ARGS(sp) | 190 | TP_ARGS(sp) |
191 | ); | 191 | ); |
192 | 192 | ||
193 | DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_zap_page, | 193 | DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_prepare_zap_page, |
194 | TP_PROTO(struct kvm_mmu_page *sp), | 194 | TP_PROTO(struct kvm_mmu_page *sp), |
195 | 195 | ||
196 | TP_ARGS(sp) | 196 | TP_ARGS(sp) |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 2331bdc2b549..51ef9097960d 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -7,6 +7,7 @@ | |||
7 | * MMU support | 7 | * MMU support |
8 | * | 8 | * |
9 | * Copyright (C) 2006 Qumranet, Inc. | 9 | * Copyright (C) 2006 Qumranet, Inc. |
10 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | ||
10 | * | 11 | * |
11 | * Authors: | 12 | * Authors: |
12 | * Yaniv Kamay <yaniv@qumranet.com> | 13 | * Yaniv Kamay <yaniv@qumranet.com> |
@@ -118,21 +119,25 @@ static int FNAME(walk_addr)(struct guest_walker *walker, | |||
118 | { | 119 | { |
119 | pt_element_t pte; | 120 | pt_element_t pte; |
120 | gfn_t table_gfn; | 121 | gfn_t table_gfn; |
121 | unsigned index, pt_access, pte_access; | 122 | unsigned index, pt_access, uninitialized_var(pte_access); |
122 | gpa_t pte_gpa; | 123 | gpa_t pte_gpa; |
123 | int rsvd_fault = 0; | 124 | bool eperm, present, rsvd_fault; |
124 | 125 | ||
125 | trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault, | 126 | trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault, |
126 | fetch_fault); | 127 | fetch_fault); |
127 | walk: | 128 | walk: |
129 | present = true; | ||
130 | eperm = rsvd_fault = false; | ||
128 | walker->level = vcpu->arch.mmu.root_level; | 131 | walker->level = vcpu->arch.mmu.root_level; |
129 | pte = vcpu->arch.cr3; | 132 | pte = vcpu->arch.cr3; |
130 | #if PTTYPE == 64 | 133 | #if PTTYPE == 64 |
131 | if (!is_long_mode(vcpu)) { | 134 | if (!is_long_mode(vcpu)) { |
132 | pte = kvm_pdptr_read(vcpu, (addr >> 30) & 3); | 135 | pte = kvm_pdptr_read(vcpu, (addr >> 30) & 3); |
133 | trace_kvm_mmu_paging_element(pte, walker->level); | 136 | trace_kvm_mmu_paging_element(pte, walker->level); |
134 | if (!is_present_gpte(pte)) | 137 | if (!is_present_gpte(pte)) { |
135 | goto not_present; | 138 | present = false; |
139 | goto error; | ||
140 | } | ||
136 | --walker->level; | 141 | --walker->level; |
137 | } | 142 | } |
138 | #endif | 143 | #endif |
@@ -150,37 +155,42 @@ walk: | |||
150 | walker->table_gfn[walker->level - 1] = table_gfn; | 155 | walker->table_gfn[walker->level - 1] = table_gfn; |
151 | walker->pte_gpa[walker->level - 1] = pte_gpa; | 156 | walker->pte_gpa[walker->level - 1] = pte_gpa; |
152 | 157 | ||
153 | if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte))) | 158 | if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte))) { |
154 | goto not_present; | 159 | present = false; |
160 | break; | ||
161 | } | ||
155 | 162 | ||
156 | trace_kvm_mmu_paging_element(pte, walker->level); | 163 | trace_kvm_mmu_paging_element(pte, walker->level); |
157 | 164 | ||
158 | if (!is_present_gpte(pte)) | 165 | if (!is_present_gpte(pte)) { |
159 | goto not_present; | 166 | present = false; |
167 | break; | ||
168 | } | ||
160 | 169 | ||
161 | rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level); | 170 | if (is_rsvd_bits_set(vcpu, pte, walker->level)) { |
162 | if (rsvd_fault) | 171 | rsvd_fault = true; |
163 | goto access_error; | 172 | break; |
173 | } | ||
164 | 174 | ||
165 | if (write_fault && !is_writable_pte(pte)) | 175 | if (write_fault && !is_writable_pte(pte)) |
166 | if (user_fault || is_write_protection(vcpu)) | 176 | if (user_fault || is_write_protection(vcpu)) |
167 | goto access_error; | 177 | eperm = true; |
168 | 178 | ||
169 | if (user_fault && !(pte & PT_USER_MASK)) | 179 | if (user_fault && !(pte & PT_USER_MASK)) |
170 | goto access_error; | 180 | eperm = true; |
171 | 181 | ||
172 | #if PTTYPE == 64 | 182 | #if PTTYPE == 64 |
173 | if (fetch_fault && (pte & PT64_NX_MASK)) | 183 | if (fetch_fault && (pte & PT64_NX_MASK)) |
174 | goto access_error; | 184 | eperm = true; |
175 | #endif | 185 | #endif |
176 | 186 | ||
177 | if (!(pte & PT_ACCESSED_MASK)) { | 187 | if (!eperm && !rsvd_fault && !(pte & PT_ACCESSED_MASK)) { |
178 | trace_kvm_mmu_set_accessed_bit(table_gfn, index, | 188 | trace_kvm_mmu_set_accessed_bit(table_gfn, index, |
179 | sizeof(pte)); | 189 | sizeof(pte)); |
180 | mark_page_dirty(vcpu->kvm, table_gfn); | ||
181 | if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, | 190 | if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, |
182 | index, pte, pte|PT_ACCESSED_MASK)) | 191 | index, pte, pte|PT_ACCESSED_MASK)) |
183 | goto walk; | 192 | goto walk; |
193 | mark_page_dirty(vcpu->kvm, table_gfn); | ||
184 | pte |= PT_ACCESSED_MASK; | 194 | pte |= PT_ACCESSED_MASK; |
185 | } | 195 | } |
186 | 196 | ||
@@ -213,15 +223,18 @@ walk: | |||
213 | --walker->level; | 223 | --walker->level; |
214 | } | 224 | } |
215 | 225 | ||
226 | if (!present || eperm || rsvd_fault) | ||
227 | goto error; | ||
228 | |||
216 | if (write_fault && !is_dirty_gpte(pte)) { | 229 | if (write_fault && !is_dirty_gpte(pte)) { |
217 | bool ret; | 230 | bool ret; |
218 | 231 | ||
219 | trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); | 232 | trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); |
220 | mark_page_dirty(vcpu->kvm, table_gfn); | ||
221 | ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte, | 233 | ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte, |
222 | pte|PT_DIRTY_MASK); | 234 | pte|PT_DIRTY_MASK); |
223 | if (ret) | 235 | if (ret) |
224 | goto walk; | 236 | goto walk; |
237 | mark_page_dirty(vcpu->kvm, table_gfn); | ||
225 | pte |= PT_DIRTY_MASK; | 238 | pte |= PT_DIRTY_MASK; |
226 | walker->ptes[walker->level - 1] = pte; | 239 | walker->ptes[walker->level - 1] = pte; |
227 | } | 240 | } |
@@ -229,22 +242,18 @@ walk: | |||
229 | walker->pt_access = pt_access; | 242 | walker->pt_access = pt_access; |
230 | walker->pte_access = pte_access; | 243 | walker->pte_access = pte_access; |
231 | pgprintk("%s: pte %llx pte_access %x pt_access %x\n", | 244 | pgprintk("%s: pte %llx pte_access %x pt_access %x\n", |
232 | __func__, (u64)pte, pt_access, pte_access); | 245 | __func__, (u64)pte, pte_access, pt_access); |
233 | return 1; | 246 | return 1; |
234 | 247 | ||
235 | not_present: | 248 | error: |
236 | walker->error_code = 0; | 249 | walker->error_code = 0; |
237 | goto err; | 250 | if (present) |
238 | 251 | walker->error_code |= PFERR_PRESENT_MASK; | |
239 | access_error: | ||
240 | walker->error_code = PFERR_PRESENT_MASK; | ||
241 | |||
242 | err: | ||
243 | if (write_fault) | 252 | if (write_fault) |
244 | walker->error_code |= PFERR_WRITE_MASK; | 253 | walker->error_code |= PFERR_WRITE_MASK; |
245 | if (user_fault) | 254 | if (user_fault) |
246 | walker->error_code |= PFERR_USER_MASK; | 255 | walker->error_code |= PFERR_USER_MASK; |
247 | if (fetch_fault) | 256 | if (fetch_fault && is_nx(vcpu)) |
248 | walker->error_code |= PFERR_FETCH_MASK; | 257 | walker->error_code |= PFERR_FETCH_MASK; |
249 | if (rsvd_fault) | 258 | if (rsvd_fault) |
250 | walker->error_code |= PFERR_RSVD_MASK; | 259 | walker->error_code |= PFERR_RSVD_MASK; |
@@ -252,7 +261,7 @@ err: | |||
252 | return 0; | 261 | return 0; |
253 | } | 262 | } |
254 | 263 | ||
255 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | 264 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
256 | u64 *spte, const void *pte) | 265 | u64 *spte, const void *pte) |
257 | { | 266 | { |
258 | pt_element_t gpte; | 267 | pt_element_t gpte; |
@@ -263,7 +272,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
263 | gpte = *(const pt_element_t *)pte; | 272 | gpte = *(const pt_element_t *)pte; |
264 | if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { | 273 | if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { |
265 | if (!is_present_gpte(gpte)) { | 274 | if (!is_present_gpte(gpte)) { |
266 | if (page->unsync) | 275 | if (sp->unsync) |
267 | new_spte = shadow_trap_nonpresent_pte; | 276 | new_spte = shadow_trap_nonpresent_pte; |
268 | else | 277 | else |
269 | new_spte = shadow_notrap_nonpresent_pte; | 278 | new_spte = shadow_notrap_nonpresent_pte; |
@@ -272,7 +281,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
272 | return; | 281 | return; |
273 | } | 282 | } |
274 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); | 283 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); |
275 | pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte); | 284 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); |
276 | if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn) | 285 | if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn) |
277 | return; | 286 | return; |
278 | pfn = vcpu->arch.update_pte.pfn; | 287 | pfn = vcpu->arch.update_pte.pfn; |
@@ -285,11 +294,22 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
285 | * we call mmu_set_spte() with reset_host_protection = true beacuse that | 294 | * we call mmu_set_spte() with reset_host_protection = true beacuse that |
286 | * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). | 295 | * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). |
287 | */ | 296 | */ |
288 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, | 297 | mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, |
289 | gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL, | 298 | is_dirty_gpte(gpte), NULL, PT_PAGE_TABLE_LEVEL, |
290 | gpte_to_gfn(gpte), pfn, true, true); | 299 | gpte_to_gfn(gpte), pfn, true, true); |
291 | } | 300 | } |
292 | 301 | ||
302 | static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu, | ||
303 | struct guest_walker *gw, int level) | ||
304 | { | ||
305 | int r; | ||
306 | pt_element_t curr_pte; | ||
307 | |||
308 | r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 1], | ||
309 | &curr_pte, sizeof(curr_pte)); | ||
310 | return r || curr_pte != gw->ptes[level - 1]; | ||
311 | } | ||
312 | |||
293 | /* | 313 | /* |
294 | * Fetch a shadow pte for a specific level in the paging hierarchy. | 314 | * Fetch a shadow pte for a specific level in the paging hierarchy. |
295 | */ | 315 | */ |
@@ -299,75 +319,86 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
299 | int *ptwrite, pfn_t pfn) | 319 | int *ptwrite, pfn_t pfn) |
300 | { | 320 | { |
301 | unsigned access = gw->pt_access; | 321 | unsigned access = gw->pt_access; |
302 | struct kvm_mmu_page *shadow_page; | 322 | struct kvm_mmu_page *sp = NULL; |
303 | u64 spte, *sptep = NULL; | 323 | bool dirty = is_dirty_gpte(gw->ptes[gw->level - 1]); |
304 | int direct; | 324 | int top_level; |
305 | gfn_t table_gfn; | 325 | unsigned direct_access; |
306 | int r; | 326 | struct kvm_shadow_walk_iterator it; |
307 | int level; | ||
308 | pt_element_t curr_pte; | ||
309 | struct kvm_shadow_walk_iterator iterator; | ||
310 | 327 | ||
311 | if (!is_present_gpte(gw->ptes[gw->level - 1])) | 328 | if (!is_present_gpte(gw->ptes[gw->level - 1])) |
312 | return NULL; | 329 | return NULL; |
313 | 330 | ||
314 | for_each_shadow_entry(vcpu, addr, iterator) { | 331 | direct_access = gw->pt_access & gw->pte_access; |
315 | level = iterator.level; | 332 | if (!dirty) |
316 | sptep = iterator.sptep; | 333 | direct_access &= ~ACC_WRITE_MASK; |
317 | if (iterator.level == hlevel) { | ||
318 | mmu_set_spte(vcpu, sptep, access, | ||
319 | gw->pte_access & access, | ||
320 | user_fault, write_fault, | ||
321 | gw->ptes[gw->level-1] & PT_DIRTY_MASK, | ||
322 | ptwrite, level, | ||
323 | gw->gfn, pfn, false, true); | ||
324 | break; | ||
325 | } | ||
326 | 334 | ||
327 | if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) | 335 | top_level = vcpu->arch.mmu.root_level; |
328 | continue; | 336 | if (top_level == PT32E_ROOT_LEVEL) |
337 | top_level = PT32_ROOT_LEVEL; | ||
338 | /* | ||
339 | * Verify that the top-level gpte is still there. Since the page | ||
340 | * is a root page, it is either write protected (and cannot be | ||
341 | * changed from now on) or it is invalid (in which case, we don't | ||
342 | * really care if it changes underneath us after this point). | ||
343 | */ | ||
344 | if (FNAME(gpte_changed)(vcpu, gw, top_level)) | ||
345 | goto out_gpte_changed; | ||
329 | 346 | ||
330 | if (is_large_pte(*sptep)) { | 347 | for (shadow_walk_init(&it, vcpu, addr); |
331 | rmap_remove(vcpu->kvm, sptep); | 348 | shadow_walk_okay(&it) && it.level > gw->level; |
332 | __set_spte(sptep, shadow_trap_nonpresent_pte); | 349 | shadow_walk_next(&it)) { |
333 | kvm_flush_remote_tlbs(vcpu->kvm); | 350 | gfn_t table_gfn; |
334 | } | ||
335 | 351 | ||
336 | if (level <= gw->level) { | 352 | drop_large_spte(vcpu, it.sptep); |
337 | int delta = level - gw->level + 1; | 353 | |
338 | direct = 1; | 354 | sp = NULL; |
339 | if (!is_dirty_gpte(gw->ptes[level - delta])) | 355 | if (!is_shadow_present_pte(*it.sptep)) { |
340 | access &= ~ACC_WRITE_MASK; | 356 | table_gfn = gw->table_gfn[it.level - 2]; |
341 | table_gfn = gpte_to_gfn(gw->ptes[level - delta]); | 357 | sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1, |
342 | /* advance table_gfn when emulating 1gb pages with 4k */ | 358 | false, access, it.sptep); |
343 | if (delta == 0) | ||
344 | table_gfn += PT_INDEX(addr, level); | ||
345 | access &= gw->pte_access; | ||
346 | } else { | ||
347 | direct = 0; | ||
348 | table_gfn = gw->table_gfn[level - 2]; | ||
349 | } | ||
350 | shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, | ||
351 | direct, access, sptep); | ||
352 | if (!direct) { | ||
353 | r = kvm_read_guest_atomic(vcpu->kvm, | ||
354 | gw->pte_gpa[level - 2], | ||
355 | &curr_pte, sizeof(curr_pte)); | ||
356 | if (r || curr_pte != gw->ptes[level - 2]) { | ||
357 | kvm_mmu_put_page(shadow_page, sptep); | ||
358 | kvm_release_pfn_clean(pfn); | ||
359 | sptep = NULL; | ||
360 | break; | ||
361 | } | ||
362 | } | 359 | } |
363 | 360 | ||
364 | spte = __pa(shadow_page->spt) | 361 | /* |
365 | | PT_PRESENT_MASK | PT_ACCESSED_MASK | 362 | * Verify that the gpte in the page we've just write |
366 | | PT_WRITABLE_MASK | PT_USER_MASK; | 363 | * protected is still there. |
367 | *sptep = spte; | 364 | */ |
365 | if (FNAME(gpte_changed)(vcpu, gw, it.level - 1)) | ||
366 | goto out_gpte_changed; | ||
367 | |||
368 | if (sp) | ||
369 | link_shadow_page(it.sptep, sp); | ||
368 | } | 370 | } |
369 | 371 | ||
370 | return sptep; | 372 | for (; |
373 | shadow_walk_okay(&it) && it.level > hlevel; | ||
374 | shadow_walk_next(&it)) { | ||
375 | gfn_t direct_gfn; | ||
376 | |||
377 | validate_direct_spte(vcpu, it.sptep, direct_access); | ||
378 | |||
379 | drop_large_spte(vcpu, it.sptep); | ||
380 | |||
381 | if (is_shadow_present_pte(*it.sptep)) | ||
382 | continue; | ||
383 | |||
384 | direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); | ||
385 | |||
386 | sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1, | ||
387 | true, direct_access, it.sptep); | ||
388 | link_shadow_page(it.sptep, sp); | ||
389 | } | ||
390 | |||
391 | mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access, | ||
392 | user_fault, write_fault, dirty, ptwrite, it.level, | ||
393 | gw->gfn, pfn, false, true); | ||
394 | |||
395 | return it.sptep; | ||
396 | |||
397 | out_gpte_changed: | ||
398 | if (sp) | ||
399 | kvm_mmu_put_page(sp, it.sptep); | ||
400 | kvm_release_pfn_clean(pfn); | ||
401 | return NULL; | ||
371 | } | 402 | } |
372 | 403 | ||
373 | /* | 404 | /* |
@@ -431,11 +462,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
431 | pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); | 462 | pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); |
432 | 463 | ||
433 | /* mmio */ | 464 | /* mmio */ |
434 | if (is_error_pfn(pfn)) { | 465 | if (is_error_pfn(pfn)) |
435 | pgprintk("gfn %lx is mmio\n", walker.gfn); | 466 | return kvm_handle_bad_page(vcpu->kvm, walker.gfn, pfn); |
436 | kvm_release_pfn_clean(pfn); | ||
437 | return 1; | ||
438 | } | ||
439 | 467 | ||
440 | spin_lock(&vcpu->kvm->mmu_lock); | 468 | spin_lock(&vcpu->kvm->mmu_lock); |
441 | if (mmu_notifier_retry(vcpu, mmu_seq)) | 469 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
@@ -443,6 +471,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
443 | kvm_mmu_free_some_pages(vcpu); | 471 | kvm_mmu_free_some_pages(vcpu); |
444 | sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, | 472 | sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, |
445 | level, &write_pt, pfn); | 473 | level, &write_pt, pfn); |
474 | (void)sptep; | ||
446 | pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__, | 475 | pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__, |
447 | sptep, *sptep, write_pt); | 476 | sptep, *sptep, write_pt); |
448 | 477 | ||
@@ -464,6 +493,7 @@ out_unlock: | |||
464 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | 493 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) |
465 | { | 494 | { |
466 | struct kvm_shadow_walk_iterator iterator; | 495 | struct kvm_shadow_walk_iterator iterator; |
496 | struct kvm_mmu_page *sp; | ||
467 | gpa_t pte_gpa = -1; | 497 | gpa_t pte_gpa = -1; |
468 | int level; | 498 | int level; |
469 | u64 *sptep; | 499 | u64 *sptep; |
@@ -475,10 +505,13 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
475 | level = iterator.level; | 505 | level = iterator.level; |
476 | sptep = iterator.sptep; | 506 | sptep = iterator.sptep; |
477 | 507 | ||
508 | sp = page_header(__pa(sptep)); | ||
478 | if (is_last_spte(*sptep, level)) { | 509 | if (is_last_spte(*sptep, level)) { |
479 | struct kvm_mmu_page *sp = page_header(__pa(sptep)); | ||
480 | int offset, shift; | 510 | int offset, shift; |
481 | 511 | ||
512 | if (!sp->unsync) | ||
513 | break; | ||
514 | |||
482 | shift = PAGE_SHIFT - | 515 | shift = PAGE_SHIFT - |
483 | (PT_LEVEL_BITS - PT64_LEVEL_BITS) * level; | 516 | (PT_LEVEL_BITS - PT64_LEVEL_BITS) * level; |
484 | offset = sp->role.quadrant << shift; | 517 | offset = sp->role.quadrant << shift; |
@@ -487,16 +520,17 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
487 | pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); | 520 | pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); |
488 | 521 | ||
489 | if (is_shadow_present_pte(*sptep)) { | 522 | if (is_shadow_present_pte(*sptep)) { |
490 | rmap_remove(vcpu->kvm, sptep); | ||
491 | if (is_large_pte(*sptep)) | 523 | if (is_large_pte(*sptep)) |
492 | --vcpu->kvm->stat.lpages; | 524 | --vcpu->kvm->stat.lpages; |
525 | drop_spte(vcpu->kvm, sptep, | ||
526 | shadow_trap_nonpresent_pte); | ||
493 | need_flush = 1; | 527 | need_flush = 1; |
494 | } | 528 | } else |
495 | __set_spte(sptep, shadow_trap_nonpresent_pte); | 529 | __set_spte(sptep, shadow_trap_nonpresent_pte); |
496 | break; | 530 | break; |
497 | } | 531 | } |
498 | 532 | ||
499 | if (!is_shadow_present_pte(*sptep)) | 533 | if (!is_shadow_present_pte(*sptep) || !sp->unsync_children) |
500 | break; | 534 | break; |
501 | } | 535 | } |
502 | 536 | ||
@@ -570,9 +604,9 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, | |||
570 | * Using the cached information from sp->gfns is safe because: | 604 | * Using the cached information from sp->gfns is safe because: |
571 | * - The spte has a reference to the struct page, so the pfn for a given gfn | 605 | * - The spte has a reference to the struct page, so the pfn for a given gfn |
572 | * can't change unless all sptes pointing to it are nuked first. | 606 | * can't change unless all sptes pointing to it are nuked first. |
573 | * - Alias changes zap the entire shadow cache. | ||
574 | */ | 607 | */ |
575 | static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 608 | static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
609 | bool clear_unsync) | ||
576 | { | 610 | { |
577 | int i, offset, nr_present; | 611 | int i, offset, nr_present; |
578 | bool reset_host_protection; | 612 | bool reset_host_protection; |
@@ -580,6 +614,9 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
580 | 614 | ||
581 | offset = nr_present = 0; | 615 | offset = nr_present = 0; |
582 | 616 | ||
617 | /* direct kvm_mmu_page can not be unsync. */ | ||
618 | BUG_ON(sp->role.direct); | ||
619 | |||
583 | if (PTTYPE == 32) | 620 | if (PTTYPE == 32) |
584 | offset = sp->role.quadrant << PT64_LEVEL_BITS; | 621 | offset = sp->role.quadrant << PT64_LEVEL_BITS; |
585 | 622 | ||
@@ -589,7 +626,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
589 | unsigned pte_access; | 626 | unsigned pte_access; |
590 | pt_element_t gpte; | 627 | pt_element_t gpte; |
591 | gpa_t pte_gpa; | 628 | gpa_t pte_gpa; |
592 | gfn_t gfn = sp->gfns[i]; | 629 | gfn_t gfn; |
593 | 630 | ||
594 | if (!is_shadow_present_pte(sp->spt[i])) | 631 | if (!is_shadow_present_pte(sp->spt[i])) |
595 | continue; | 632 | continue; |
@@ -600,16 +637,17 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
600 | sizeof(pt_element_t))) | 637 | sizeof(pt_element_t))) |
601 | return -EINVAL; | 638 | return -EINVAL; |
602 | 639 | ||
603 | if (gpte_to_gfn(gpte) != gfn || !is_present_gpte(gpte) || | 640 | gfn = gpte_to_gfn(gpte); |
604 | !(gpte & PT_ACCESSED_MASK)) { | 641 | if (is_rsvd_bits_set(vcpu, gpte, PT_PAGE_TABLE_LEVEL) |
642 | || gfn != sp->gfns[i] || !is_present_gpte(gpte) | ||
643 | || !(gpte & PT_ACCESSED_MASK)) { | ||
605 | u64 nonpresent; | 644 | u64 nonpresent; |
606 | 645 | ||
607 | rmap_remove(vcpu->kvm, &sp->spt[i]); | 646 | if (is_present_gpte(gpte) || !clear_unsync) |
608 | if (is_present_gpte(gpte)) | ||
609 | nonpresent = shadow_trap_nonpresent_pte; | 647 | nonpresent = shadow_trap_nonpresent_pte; |
610 | else | 648 | else |
611 | nonpresent = shadow_notrap_nonpresent_pte; | 649 | nonpresent = shadow_notrap_nonpresent_pte; |
612 | __set_spte(&sp->spt[i], nonpresent); | 650 | drop_spte(vcpu->kvm, &sp->spt[i], nonpresent); |
613 | continue; | 651 | continue; |
614 | } | 652 | } |
615 | 653 | ||
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ce438e0fdd26..56c9b6bd7655 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -4,6 +4,7 @@ | |||
4 | * AMD SVM support | 4 | * AMD SVM support |
5 | * | 5 | * |
6 | * Copyright (C) 2006 Qumranet, Inc. | 6 | * Copyright (C) 2006 Qumranet, Inc. |
7 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | ||
7 | * | 8 | * |
8 | * Authors: | 9 | * Authors: |
9 | * Yaniv Kamay <yaniv@qumranet.com> | 10 | * Yaniv Kamay <yaniv@qumranet.com> |
@@ -285,11 +286,11 @@ static inline void flush_guest_tlb(struct kvm_vcpu *vcpu) | |||
285 | 286 | ||
286 | static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) | 287 | static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) |
287 | { | 288 | { |
289 | vcpu->arch.efer = efer; | ||
288 | if (!npt_enabled && !(efer & EFER_LMA)) | 290 | if (!npt_enabled && !(efer & EFER_LMA)) |
289 | efer &= ~EFER_LME; | 291 | efer &= ~EFER_LME; |
290 | 292 | ||
291 | to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; | 293 | to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; |
292 | vcpu->arch.efer = efer; | ||
293 | } | 294 | } |
294 | 295 | ||
295 | static int is_external_interrupt(u32 info) | 296 | static int is_external_interrupt(u32 info) |
@@ -640,7 +641,7 @@ static __init int svm_hardware_setup(void) | |||
640 | 641 | ||
641 | if (nested) { | 642 | if (nested) { |
642 | printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); | 643 | printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); |
643 | kvm_enable_efer_bits(EFER_SVME); | 644 | kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); |
644 | } | 645 | } |
645 | 646 | ||
646 | for_each_possible_cpu(cpu) { | 647 | for_each_possible_cpu(cpu) { |
@@ -806,7 +807,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
806 | * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. | 807 | * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. |
807 | */ | 808 | */ |
808 | svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; | 809 | svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; |
809 | kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0); | 810 | (void)kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0); |
810 | 811 | ||
811 | save->cr4 = X86_CR4_PAE; | 812 | save->cr4 = X86_CR4_PAE; |
812 | /* rdx = ?? */ | 813 | /* rdx = ?? */ |
@@ -903,13 +904,18 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
903 | svm->asid_generation = 0; | 904 | svm->asid_generation = 0; |
904 | init_vmcb(svm); | 905 | init_vmcb(svm); |
905 | 906 | ||
906 | fx_init(&svm->vcpu); | 907 | err = fx_init(&svm->vcpu); |
908 | if (err) | ||
909 | goto free_page4; | ||
910 | |||
907 | svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; | 911 | svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; |
908 | if (kvm_vcpu_is_bsp(&svm->vcpu)) | 912 | if (kvm_vcpu_is_bsp(&svm->vcpu)) |
909 | svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; | 913 | svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; |
910 | 914 | ||
911 | return &svm->vcpu; | 915 | return &svm->vcpu; |
912 | 916 | ||
917 | free_page4: | ||
918 | __free_page(hsave_page); | ||
913 | free_page3: | 919 | free_page3: |
914 | __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER); | 920 | __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER); |
915 | free_page2: | 921 | free_page2: |
@@ -1488,7 +1494,7 @@ static void svm_handle_mce(struct vcpu_svm *svm) | |||
1488 | */ | 1494 | */ |
1489 | pr_err("KVM: Guest triggered AMD Erratum 383\n"); | 1495 | pr_err("KVM: Guest triggered AMD Erratum 383\n"); |
1490 | 1496 | ||
1491 | set_bit(KVM_REQ_TRIPLE_FAULT, &svm->vcpu.requests); | 1497 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, &svm->vcpu); |
1492 | 1498 | ||
1493 | return; | 1499 | return; |
1494 | } | 1500 | } |
@@ -1535,7 +1541,7 @@ static int io_interception(struct vcpu_svm *svm) | |||
1535 | string = (io_info & SVM_IOIO_STR_MASK) != 0; | 1541 | string = (io_info & SVM_IOIO_STR_MASK) != 0; |
1536 | in = (io_info & SVM_IOIO_TYPE_MASK) != 0; | 1542 | in = (io_info & SVM_IOIO_TYPE_MASK) != 0; |
1537 | if (string || in) | 1543 | if (string || in) |
1538 | return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO); | 1544 | return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; |
1539 | 1545 | ||
1540 | port = io_info >> 16; | 1546 | port = io_info >> 16; |
1541 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; | 1547 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; |
@@ -1957,7 +1963,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
1957 | svm->vmcb->save.cr3 = hsave->save.cr3; | 1963 | svm->vmcb->save.cr3 = hsave->save.cr3; |
1958 | svm->vcpu.arch.cr3 = hsave->save.cr3; | 1964 | svm->vcpu.arch.cr3 = hsave->save.cr3; |
1959 | } else { | 1965 | } else { |
1960 | kvm_set_cr3(&svm->vcpu, hsave->save.cr3); | 1966 | (void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3); |
1961 | } | 1967 | } |
1962 | kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax); | 1968 | kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax); |
1963 | kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp); | 1969 | kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp); |
@@ -2080,7 +2086,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
2080 | svm->vmcb->save.cr3 = nested_vmcb->save.cr3; | 2086 | svm->vmcb->save.cr3 = nested_vmcb->save.cr3; |
2081 | svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; | 2087 | svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; |
2082 | } else | 2088 | } else |
2083 | kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); | 2089 | (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); |
2084 | 2090 | ||
2085 | /* Guest paging mode is active - reset mmu */ | 2091 | /* Guest paging mode is active - reset mmu */ |
2086 | kvm_mmu_reset_context(&svm->vcpu); | 2092 | kvm_mmu_reset_context(&svm->vcpu); |
@@ -2386,16 +2392,12 @@ static int iret_interception(struct vcpu_svm *svm) | |||
2386 | 2392 | ||
2387 | static int invlpg_interception(struct vcpu_svm *svm) | 2393 | static int invlpg_interception(struct vcpu_svm *svm) |
2388 | { | 2394 | { |
2389 | if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE) | 2395 | return emulate_instruction(&svm->vcpu, 0, 0, 0) == EMULATE_DONE; |
2390 | pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); | ||
2391 | return 1; | ||
2392 | } | 2396 | } |
2393 | 2397 | ||
2394 | static int emulate_on_interception(struct vcpu_svm *svm) | 2398 | static int emulate_on_interception(struct vcpu_svm *svm) |
2395 | { | 2399 | { |
2396 | if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE) | 2400 | return emulate_instruction(&svm->vcpu, 0, 0, 0) == EMULATE_DONE; |
2397 | pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); | ||
2398 | return 1; | ||
2399 | } | 2401 | } |
2400 | 2402 | ||
2401 | static int cr8_write_interception(struct vcpu_svm *svm) | 2403 | static int cr8_write_interception(struct vcpu_svm *svm) |
@@ -2726,6 +2728,99 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
2726 | [SVM_EXIT_NPF] = pf_interception, | 2728 | [SVM_EXIT_NPF] = pf_interception, |
2727 | }; | 2729 | }; |
2728 | 2730 | ||
2731 | void dump_vmcb(struct kvm_vcpu *vcpu) | ||
2732 | { | ||
2733 | struct vcpu_svm *svm = to_svm(vcpu); | ||
2734 | struct vmcb_control_area *control = &svm->vmcb->control; | ||
2735 | struct vmcb_save_area *save = &svm->vmcb->save; | ||
2736 | |||
2737 | pr_err("VMCB Control Area:\n"); | ||
2738 | pr_err("cr_read: %04x\n", control->intercept_cr_read); | ||
2739 | pr_err("cr_write: %04x\n", control->intercept_cr_write); | ||
2740 | pr_err("dr_read: %04x\n", control->intercept_dr_read); | ||
2741 | pr_err("dr_write: %04x\n", control->intercept_dr_write); | ||
2742 | pr_err("exceptions: %08x\n", control->intercept_exceptions); | ||
2743 | pr_err("intercepts: %016llx\n", control->intercept); | ||
2744 | pr_err("pause filter count: %d\n", control->pause_filter_count); | ||
2745 | pr_err("iopm_base_pa: %016llx\n", control->iopm_base_pa); | ||
2746 | pr_err("msrpm_base_pa: %016llx\n", control->msrpm_base_pa); | ||
2747 | pr_err("tsc_offset: %016llx\n", control->tsc_offset); | ||
2748 | pr_err("asid: %d\n", control->asid); | ||
2749 | pr_err("tlb_ctl: %d\n", control->tlb_ctl); | ||
2750 | pr_err("int_ctl: %08x\n", control->int_ctl); | ||
2751 | pr_err("int_vector: %08x\n", control->int_vector); | ||
2752 | pr_err("int_state: %08x\n", control->int_state); | ||
2753 | pr_err("exit_code: %08x\n", control->exit_code); | ||
2754 | pr_err("exit_info1: %016llx\n", control->exit_info_1); | ||
2755 | pr_err("exit_info2: %016llx\n", control->exit_info_2); | ||
2756 | pr_err("exit_int_info: %08x\n", control->exit_int_info); | ||
2757 | pr_err("exit_int_info_err: %08x\n", control->exit_int_info_err); | ||
2758 | pr_err("nested_ctl: %lld\n", control->nested_ctl); | ||
2759 | pr_err("nested_cr3: %016llx\n", control->nested_cr3); | ||
2760 | pr_err("event_inj: %08x\n", control->event_inj); | ||
2761 | pr_err("event_inj_err: %08x\n", control->event_inj_err); | ||
2762 | pr_err("lbr_ctl: %lld\n", control->lbr_ctl); | ||
2763 | pr_err("next_rip: %016llx\n", control->next_rip); | ||
2764 | pr_err("VMCB State Save Area:\n"); | ||
2765 | pr_err("es: s: %04x a: %04x l: %08x b: %016llx\n", | ||
2766 | save->es.selector, save->es.attrib, | ||
2767 | save->es.limit, save->es.base); | ||
2768 | pr_err("cs: s: %04x a: %04x l: %08x b: %016llx\n", | ||
2769 | save->cs.selector, save->cs.attrib, | ||
2770 | save->cs.limit, save->cs.base); | ||
2771 | pr_err("ss: s: %04x a: %04x l: %08x b: %016llx\n", | ||
2772 | save->ss.selector, save->ss.attrib, | ||
2773 | save->ss.limit, save->ss.base); | ||
2774 | pr_err("ds: s: %04x a: %04x l: %08x b: %016llx\n", | ||
2775 | save->ds.selector, save->ds.attrib, | ||
2776 | save->ds.limit, save->ds.base); | ||
2777 | pr_err("fs: s: %04x a: %04x l: %08x b: %016llx\n", | ||
2778 | save->fs.selector, save->fs.attrib, | ||
2779 | save->fs.limit, save->fs.base); | ||
2780 | pr_err("gs: s: %04x a: %04x l: %08x b: %016llx\n", | ||
2781 | save->gs.selector, save->gs.attrib, | ||
2782 | save->gs.limit, save->gs.base); | ||
2783 | pr_err("gdtr: s: %04x a: %04x l: %08x b: %016llx\n", | ||
2784 | save->gdtr.selector, save->gdtr.attrib, | ||
2785 | save->gdtr.limit, save->gdtr.base); | ||
2786 | pr_err("ldtr: s: %04x a: %04x l: %08x b: %016llx\n", | ||
2787 | save->ldtr.selector, save->ldtr.attrib, | ||
2788 | save->ldtr.limit, save->ldtr.base); | ||
2789 | pr_err("idtr: s: %04x a: %04x l: %08x b: %016llx\n", | ||
2790 | save->idtr.selector, save->idtr.attrib, | ||
2791 | save->idtr.limit, save->idtr.base); | ||
2792 | pr_err("tr: s: %04x a: %04x l: %08x b: %016llx\n", | ||
2793 | save->tr.selector, save->tr.attrib, | ||
2794 | save->tr.limit, save->tr.base); | ||
2795 | pr_err("cpl: %d efer: %016llx\n", | ||
2796 | save->cpl, save->efer); | ||
2797 | pr_err("cr0: %016llx cr2: %016llx\n", | ||
2798 | save->cr0, save->cr2); | ||
2799 | pr_err("cr3: %016llx cr4: %016llx\n", | ||
2800 | save->cr3, save->cr4); | ||
2801 | pr_err("dr6: %016llx dr7: %016llx\n", | ||
2802 | save->dr6, save->dr7); | ||
2803 | pr_err("rip: %016llx rflags: %016llx\n", | ||
2804 | save->rip, save->rflags); | ||
2805 | pr_err("rsp: %016llx rax: %016llx\n", | ||
2806 | save->rsp, save->rax); | ||
2807 | pr_err("star: %016llx lstar: %016llx\n", | ||
2808 | save->star, save->lstar); | ||
2809 | pr_err("cstar: %016llx sfmask: %016llx\n", | ||
2810 | save->cstar, save->sfmask); | ||
2811 | pr_err("kernel_gs_base: %016llx sysenter_cs: %016llx\n", | ||
2812 | save->kernel_gs_base, save->sysenter_cs); | ||
2813 | pr_err("sysenter_esp: %016llx sysenter_eip: %016llx\n", | ||
2814 | save->sysenter_esp, save->sysenter_eip); | ||
2815 | pr_err("gpat: %016llx dbgctl: %016llx\n", | ||
2816 | save->g_pat, save->dbgctl); | ||
2817 | pr_err("br_from: %016llx br_to: %016llx\n", | ||
2818 | save->br_from, save->br_to); | ||
2819 | pr_err("excp_from: %016llx excp_to: %016llx\n", | ||
2820 | save->last_excp_from, save->last_excp_to); | ||
2821 | |||
2822 | } | ||
2823 | |||
2729 | static int handle_exit(struct kvm_vcpu *vcpu) | 2824 | static int handle_exit(struct kvm_vcpu *vcpu) |
2730 | { | 2825 | { |
2731 | struct vcpu_svm *svm = to_svm(vcpu); | 2826 | struct vcpu_svm *svm = to_svm(vcpu); |
@@ -2770,6 +2865,8 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
2770 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 2865 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
2771 | kvm_run->fail_entry.hardware_entry_failure_reason | 2866 | kvm_run->fail_entry.hardware_entry_failure_reason |
2772 | = svm->vmcb->control.exit_code; | 2867 | = svm->vmcb->control.exit_code; |
2868 | pr_err("KVM: FAILED VMRUN WITH VMCB:\n"); | ||
2869 | dump_vmcb(vcpu); | ||
2773 | return 0; | 2870 | return 0; |
2774 | } | 2871 | } |
2775 | 2872 | ||
@@ -2826,9 +2923,6 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq) | |||
2826 | { | 2923 | { |
2827 | struct vmcb_control_area *control; | 2924 | struct vmcb_control_area *control; |
2828 | 2925 | ||
2829 | trace_kvm_inj_virq(irq); | ||
2830 | |||
2831 | ++svm->vcpu.stat.irq_injections; | ||
2832 | control = &svm->vmcb->control; | 2926 | control = &svm->vmcb->control; |
2833 | control->int_vector = irq; | 2927 | control->int_vector = irq; |
2834 | control->int_ctl &= ~V_INTR_PRIO_MASK; | 2928 | control->int_ctl &= ~V_INTR_PRIO_MASK; |
@@ -2842,6 +2936,9 @@ static void svm_set_irq(struct kvm_vcpu *vcpu) | |||
2842 | 2936 | ||
2843 | BUG_ON(!(gif_set(svm))); | 2937 | BUG_ON(!(gif_set(svm))); |
2844 | 2938 | ||
2939 | trace_kvm_inj_virq(vcpu->arch.interrupt.nr); | ||
2940 | ++vcpu->stat.irq_injections; | ||
2941 | |||
2845 | svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr | | 2942 | svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr | |
2846 | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR; | 2943 | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR; |
2847 | } | 2944 | } |
@@ -3327,6 +3424,11 @@ static bool svm_rdtscp_supported(void) | |||
3327 | return false; | 3424 | return false; |
3328 | } | 3425 | } |
3329 | 3426 | ||
3427 | static bool svm_has_wbinvd_exit(void) | ||
3428 | { | ||
3429 | return true; | ||
3430 | } | ||
3431 | |||
3330 | static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) | 3432 | static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) |
3331 | { | 3433 | { |
3332 | struct vcpu_svm *svm = to_svm(vcpu); | 3434 | struct vcpu_svm *svm = to_svm(vcpu); |
@@ -3411,6 +3513,8 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
3411 | .rdtscp_supported = svm_rdtscp_supported, | 3513 | .rdtscp_supported = svm_rdtscp_supported, |
3412 | 3514 | ||
3413 | .set_supported_cpuid = svm_set_supported_cpuid, | 3515 | .set_supported_cpuid = svm_set_supported_cpuid, |
3516 | |||
3517 | .has_wbinvd_exit = svm_has_wbinvd_exit, | ||
3414 | }; | 3518 | }; |
3415 | 3519 | ||
3416 | static int __init svm_init(void) | 3520 | static int __init svm_init(void) |
diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c index 4ddadb1a5ffe..e16a0dbe74d8 100644 --- a/arch/x86/kvm/timer.c +++ b/arch/x86/kvm/timer.c | |||
@@ -1,3 +1,17 @@ | |||
1 | /* | ||
2 | * Kernel-based Virtual Machine driver for Linux | ||
3 | * | ||
4 | * This module enables machines with Intel VT-x extensions to run virtual | ||
5 | * machines without emulation or binary translation. | ||
6 | * | ||
7 | * timer support | ||
8 | * | ||
9 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | ||
10 | * | ||
11 | * This work is licensed under the terms of the GNU GPL, version 2. See | ||
12 | * the COPYING file in the top-level directory. | ||
13 | */ | ||
14 | |||
1 | #include <linux/kvm_host.h> | 15 | #include <linux/kvm_host.h> |
2 | #include <linux/kvm.h> | 16 | #include <linux/kvm.h> |
3 | #include <linux/hrtimer.h> | 17 | #include <linux/hrtimer.h> |
@@ -18,7 +32,7 @@ static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer) | |||
18 | if (ktimer->reinject || !atomic_read(&ktimer->pending)) { | 32 | if (ktimer->reinject || !atomic_read(&ktimer->pending)) { |
19 | atomic_inc(&ktimer->pending); | 33 | atomic_inc(&ktimer->pending); |
20 | /* FIXME: this code should not know anything about vcpus */ | 34 | /* FIXME: this code should not know anything about vcpus */ |
21 | set_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests); | 35 | kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); |
22 | } | 36 | } |
23 | 37 | ||
24 | if (waitqueue_active(q)) | 38 | if (waitqueue_active(q)) |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ee03679efe78..27a0222c2946 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -5,6 +5,7 @@ | |||
5 | * machines without emulation or binary translation. | 5 | * machines without emulation or binary translation. |
6 | * | 6 | * |
7 | * Copyright (C) 2006 Qumranet, Inc. | 7 | * Copyright (C) 2006 Qumranet, Inc. |
8 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | ||
8 | * | 9 | * |
9 | * Authors: | 10 | * Authors: |
10 | * Avi Kivity <avi@qumranet.com> | 11 | * Avi Kivity <avi@qumranet.com> |
@@ -36,6 +37,8 @@ | |||
36 | #include <asm/vmx.h> | 37 | #include <asm/vmx.h> |
37 | #include <asm/virtext.h> | 38 | #include <asm/virtext.h> |
38 | #include <asm/mce.h> | 39 | #include <asm/mce.h> |
40 | #include <asm/i387.h> | ||
41 | #include <asm/xcr.h> | ||
39 | 42 | ||
40 | #include "trace.h" | 43 | #include "trace.h" |
41 | 44 | ||
@@ -63,6 +66,9 @@ module_param_named(unrestricted_guest, | |||
63 | static int __read_mostly emulate_invalid_guest_state = 0; | 66 | static int __read_mostly emulate_invalid_guest_state = 0; |
64 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); | 67 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); |
65 | 68 | ||
69 | static int __read_mostly vmm_exclusive = 1; | ||
70 | module_param(vmm_exclusive, bool, S_IRUGO); | ||
71 | |||
66 | #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ | 72 | #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ |
67 | (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) | 73 | (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) |
68 | #define KVM_GUEST_CR0_MASK \ | 74 | #define KVM_GUEST_CR0_MASK \ |
@@ -173,10 +179,13 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | |||
173 | 179 | ||
174 | static int init_rmode(struct kvm *kvm); | 180 | static int init_rmode(struct kvm *kvm); |
175 | static u64 construct_eptp(unsigned long root_hpa); | 181 | static u64 construct_eptp(unsigned long root_hpa); |
182 | static void kvm_cpu_vmxon(u64 addr); | ||
183 | static void kvm_cpu_vmxoff(void); | ||
176 | 184 | ||
177 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); | 185 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
178 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | 186 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
179 | static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu); | 187 | static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu); |
188 | static DEFINE_PER_CPU(struct desc_ptr, host_gdt); | ||
180 | 189 | ||
181 | static unsigned long *vmx_io_bitmap_a; | 190 | static unsigned long *vmx_io_bitmap_a; |
182 | static unsigned long *vmx_io_bitmap_b; | 191 | static unsigned long *vmx_io_bitmap_b; |
@@ -334,6 +343,11 @@ static inline bool cpu_has_vmx_ept_1g_page(void) | |||
334 | return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT; | 343 | return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT; |
335 | } | 344 | } |
336 | 345 | ||
346 | static inline bool cpu_has_vmx_ept_4levels(void) | ||
347 | { | ||
348 | return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT; | ||
349 | } | ||
350 | |||
337 | static inline bool cpu_has_vmx_invept_individual_addr(void) | 351 | static inline bool cpu_has_vmx_invept_individual_addr(void) |
338 | { | 352 | { |
339 | return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT; | 353 | return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT; |
@@ -349,6 +363,16 @@ static inline bool cpu_has_vmx_invept_global(void) | |||
349 | return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT; | 363 | return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT; |
350 | } | 364 | } |
351 | 365 | ||
366 | static inline bool cpu_has_vmx_invvpid_single(void) | ||
367 | { | ||
368 | return vmx_capability.vpid & VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT; | ||
369 | } | ||
370 | |||
371 | static inline bool cpu_has_vmx_invvpid_global(void) | ||
372 | { | ||
373 | return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT; | ||
374 | } | ||
375 | |||
352 | static inline bool cpu_has_vmx_ept(void) | 376 | static inline bool cpu_has_vmx_ept(void) |
353 | { | 377 | { |
354 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 378 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
@@ -389,6 +413,12 @@ static inline bool cpu_has_virtual_nmis(void) | |||
389 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; | 413 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; |
390 | } | 414 | } |
391 | 415 | ||
416 | static inline bool cpu_has_vmx_wbinvd_exit(void) | ||
417 | { | ||
418 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
419 | SECONDARY_EXEC_WBINVD_EXITING; | ||
420 | } | ||
421 | |||
392 | static inline bool report_flexpriority(void) | 422 | static inline bool report_flexpriority(void) |
393 | { | 423 | { |
394 | return flexpriority_enabled; | 424 | return flexpriority_enabled; |
@@ -453,6 +483,19 @@ static void vmcs_clear(struct vmcs *vmcs) | |||
453 | vmcs, phys_addr); | 483 | vmcs, phys_addr); |
454 | } | 484 | } |
455 | 485 | ||
486 | static void vmcs_load(struct vmcs *vmcs) | ||
487 | { | ||
488 | u64 phys_addr = __pa(vmcs); | ||
489 | u8 error; | ||
490 | |||
491 | asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0" | ||
492 | : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) | ||
493 | : "cc", "memory"); | ||
494 | if (error) | ||
495 | printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n", | ||
496 | vmcs, phys_addr); | ||
497 | } | ||
498 | |||
456 | static void __vcpu_clear(void *arg) | 499 | static void __vcpu_clear(void *arg) |
457 | { | 500 | { |
458 | struct vcpu_vmx *vmx = arg; | 501 | struct vcpu_vmx *vmx = arg; |
@@ -475,12 +518,27 @@ static void vcpu_clear(struct vcpu_vmx *vmx) | |||
475 | smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1); | 518 | smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1); |
476 | } | 519 | } |
477 | 520 | ||
478 | static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx) | 521 | static inline void vpid_sync_vcpu_single(struct vcpu_vmx *vmx) |
479 | { | 522 | { |
480 | if (vmx->vpid == 0) | 523 | if (vmx->vpid == 0) |
481 | return; | 524 | return; |
482 | 525 | ||
483 | __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); | 526 | if (cpu_has_vmx_invvpid_single()) |
527 | __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); | ||
528 | } | ||
529 | |||
530 | static inline void vpid_sync_vcpu_global(void) | ||
531 | { | ||
532 | if (cpu_has_vmx_invvpid_global()) | ||
533 | __invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0); | ||
534 | } | ||
535 | |||
536 | static inline void vpid_sync_context(struct vcpu_vmx *vmx) | ||
537 | { | ||
538 | if (cpu_has_vmx_invvpid_single()) | ||
539 | vpid_sync_vcpu_single(vmx); | ||
540 | else | ||
541 | vpid_sync_vcpu_global(); | ||
484 | } | 542 | } |
485 | 543 | ||
486 | static inline void ept_sync_global(void) | 544 | static inline void ept_sync_global(void) |
@@ -812,6 +870,9 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) | |||
812 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); | 870 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); |
813 | } | 871 | } |
814 | #endif | 872 | #endif |
873 | if (current_thread_info()->status & TS_USEDFPU) | ||
874 | clts(); | ||
875 | load_gdt(&__get_cpu_var(host_gdt)); | ||
815 | } | 876 | } |
816 | 877 | ||
817 | static void vmx_load_host_state(struct vcpu_vmx *vmx) | 878 | static void vmx_load_host_state(struct vcpu_vmx *vmx) |
@@ -828,35 +889,30 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx) | |||
828 | static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 889 | static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
829 | { | 890 | { |
830 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 891 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
831 | u64 phys_addr = __pa(vmx->vmcs); | ||
832 | u64 tsc_this, delta, new_offset; | 892 | u64 tsc_this, delta, new_offset; |
893 | u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); | ||
833 | 894 | ||
834 | if (vcpu->cpu != cpu) { | 895 | if (!vmm_exclusive) |
896 | kvm_cpu_vmxon(phys_addr); | ||
897 | else if (vcpu->cpu != cpu) | ||
835 | vcpu_clear(vmx); | 898 | vcpu_clear(vmx); |
836 | kvm_migrate_timers(vcpu); | ||
837 | set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests); | ||
838 | local_irq_disable(); | ||
839 | list_add(&vmx->local_vcpus_link, | ||
840 | &per_cpu(vcpus_on_cpu, cpu)); | ||
841 | local_irq_enable(); | ||
842 | } | ||
843 | 899 | ||
844 | if (per_cpu(current_vmcs, cpu) != vmx->vmcs) { | 900 | if (per_cpu(current_vmcs, cpu) != vmx->vmcs) { |
845 | u8 error; | ||
846 | |||
847 | per_cpu(current_vmcs, cpu) = vmx->vmcs; | 901 | per_cpu(current_vmcs, cpu) = vmx->vmcs; |
848 | asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0" | 902 | vmcs_load(vmx->vmcs); |
849 | : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) | ||
850 | : "cc"); | ||
851 | if (error) | ||
852 | printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n", | ||
853 | vmx->vmcs, phys_addr); | ||
854 | } | 903 | } |
855 | 904 | ||
856 | if (vcpu->cpu != cpu) { | 905 | if (vcpu->cpu != cpu) { |
857 | struct desc_ptr dt; | 906 | struct desc_ptr dt; |
858 | unsigned long sysenter_esp; | 907 | unsigned long sysenter_esp; |
859 | 908 | ||
909 | kvm_migrate_timers(vcpu); | ||
910 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); | ||
911 | local_irq_disable(); | ||
912 | list_add(&vmx->local_vcpus_link, | ||
913 | &per_cpu(vcpus_on_cpu, cpu)); | ||
914 | local_irq_enable(); | ||
915 | |||
860 | vcpu->cpu = cpu; | 916 | vcpu->cpu = cpu; |
861 | /* | 917 | /* |
862 | * Linux uses per-cpu TSS and GDT, so set these when switching | 918 | * Linux uses per-cpu TSS and GDT, so set these when switching |
@@ -884,6 +940,10 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
884 | static void vmx_vcpu_put(struct kvm_vcpu *vcpu) | 940 | static void vmx_vcpu_put(struct kvm_vcpu *vcpu) |
885 | { | 941 | { |
886 | __vmx_load_host_state(to_vmx(vcpu)); | 942 | __vmx_load_host_state(to_vmx(vcpu)); |
943 | if (!vmm_exclusive) { | ||
944 | __vcpu_clear(to_vmx(vcpu)); | ||
945 | kvm_cpu_vmxoff(); | ||
946 | } | ||
887 | } | 947 | } |
888 | 948 | ||
889 | static void vmx_fpu_activate(struct kvm_vcpu *vcpu) | 949 | static void vmx_fpu_activate(struct kvm_vcpu *vcpu) |
@@ -1286,6 +1346,13 @@ static __init int vmx_disabled_by_bios(void) | |||
1286 | /* locked but not enabled */ | 1346 | /* locked but not enabled */ |
1287 | } | 1347 | } |
1288 | 1348 | ||
1349 | static void kvm_cpu_vmxon(u64 addr) | ||
1350 | { | ||
1351 | asm volatile (ASM_VMX_VMXON_RAX | ||
1352 | : : "a"(&addr), "m"(addr) | ||
1353 | : "memory", "cc"); | ||
1354 | } | ||
1355 | |||
1289 | static int hardware_enable(void *garbage) | 1356 | static int hardware_enable(void *garbage) |
1290 | { | 1357 | { |
1291 | int cpu = raw_smp_processor_id(); | 1358 | int cpu = raw_smp_processor_id(); |
@@ -1308,11 +1375,13 @@ static int hardware_enable(void *garbage) | |||
1308 | wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); | 1375 | wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); |
1309 | } | 1376 | } |
1310 | write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ | 1377 | write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ |
1311 | asm volatile (ASM_VMX_VMXON_RAX | ||
1312 | : : "a"(&phys_addr), "m"(phys_addr) | ||
1313 | : "memory", "cc"); | ||
1314 | 1378 | ||
1315 | ept_sync_global(); | 1379 | if (vmm_exclusive) { |
1380 | kvm_cpu_vmxon(phys_addr); | ||
1381 | ept_sync_global(); | ||
1382 | } | ||
1383 | |||
1384 | store_gdt(&__get_cpu_var(host_gdt)); | ||
1316 | 1385 | ||
1317 | return 0; | 1386 | return 0; |
1318 | } | 1387 | } |
@@ -1334,13 +1403,15 @@ static void vmclear_local_vcpus(void) | |||
1334 | static void kvm_cpu_vmxoff(void) | 1403 | static void kvm_cpu_vmxoff(void) |
1335 | { | 1404 | { |
1336 | asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); | 1405 | asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); |
1337 | write_cr4(read_cr4() & ~X86_CR4_VMXE); | ||
1338 | } | 1406 | } |
1339 | 1407 | ||
1340 | static void hardware_disable(void *garbage) | 1408 | static void hardware_disable(void *garbage) |
1341 | { | 1409 | { |
1342 | vmclear_local_vcpus(); | 1410 | if (vmm_exclusive) { |
1343 | kvm_cpu_vmxoff(); | 1411 | vmclear_local_vcpus(); |
1412 | kvm_cpu_vmxoff(); | ||
1413 | } | ||
1414 | write_cr4(read_cr4() & ~X86_CR4_VMXE); | ||
1344 | } | 1415 | } |
1345 | 1416 | ||
1346 | static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, | 1417 | static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, |
@@ -1539,7 +1610,8 @@ static __init int hardware_setup(void) | |||
1539 | if (!cpu_has_vmx_vpid()) | 1610 | if (!cpu_has_vmx_vpid()) |
1540 | enable_vpid = 0; | 1611 | enable_vpid = 0; |
1541 | 1612 | ||
1542 | if (!cpu_has_vmx_ept()) { | 1613 | if (!cpu_has_vmx_ept() || |
1614 | !cpu_has_vmx_ept_4levels()) { | ||
1543 | enable_ept = 0; | 1615 | enable_ept = 0; |
1544 | enable_unrestricted_guest = 0; | 1616 | enable_unrestricted_guest = 0; |
1545 | } | 1617 | } |
@@ -1628,7 +1700,7 @@ static gva_t rmode_tss_base(struct kvm *kvm) | |||
1628 | gfn_t base_gfn; | 1700 | gfn_t base_gfn; |
1629 | 1701 | ||
1630 | slots = kvm_memslots(kvm); | 1702 | slots = kvm_memslots(kvm); |
1631 | base_gfn = kvm->memslots->memslots[0].base_gfn + | 1703 | base_gfn = slots->memslots[0].base_gfn + |
1632 | kvm->memslots->memslots[0].npages - 3; | 1704 | kvm->memslots->memslots[0].npages - 3; |
1633 | return base_gfn << PAGE_SHIFT; | 1705 | return base_gfn << PAGE_SHIFT; |
1634 | } | 1706 | } |
@@ -1759,9 +1831,12 @@ static void exit_lmode(struct kvm_vcpu *vcpu) | |||
1759 | 1831 | ||
1760 | static void vmx_flush_tlb(struct kvm_vcpu *vcpu) | 1832 | static void vmx_flush_tlb(struct kvm_vcpu *vcpu) |
1761 | { | 1833 | { |
1762 | vpid_sync_vcpu_all(to_vmx(vcpu)); | 1834 | vpid_sync_context(to_vmx(vcpu)); |
1763 | if (enable_ept) | 1835 | if (enable_ept) { |
1836 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | ||
1837 | return; | ||
1764 | ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); | 1838 | ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); |
1839 | } | ||
1765 | } | 1840 | } |
1766 | 1841 | ||
1767 | static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) | 1842 | static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) |
@@ -2507,7 +2582,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2507 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf); | 2582 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf); |
2508 | vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ | 2583 | vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ |
2509 | 2584 | ||
2510 | vmcs_writel(HOST_CR0, read_cr0()); /* 22.2.3 */ | 2585 | vmcs_writel(HOST_CR0, read_cr0() | X86_CR0_TS); /* 22.2.3 */ |
2511 | vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */ | 2586 | vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */ |
2512 | vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ | 2587 | vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ |
2513 | 2588 | ||
@@ -2599,21 +2674,27 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2599 | 2674 | ||
2600 | static int init_rmode(struct kvm *kvm) | 2675 | static int init_rmode(struct kvm *kvm) |
2601 | { | 2676 | { |
2677 | int idx, ret = 0; | ||
2678 | |||
2679 | idx = srcu_read_lock(&kvm->srcu); | ||
2602 | if (!init_rmode_tss(kvm)) | 2680 | if (!init_rmode_tss(kvm)) |
2603 | return 0; | 2681 | goto exit; |
2604 | if (!init_rmode_identity_map(kvm)) | 2682 | if (!init_rmode_identity_map(kvm)) |
2605 | return 0; | 2683 | goto exit; |
2606 | return 1; | 2684 | |
2685 | ret = 1; | ||
2686 | exit: | ||
2687 | srcu_read_unlock(&kvm->srcu, idx); | ||
2688 | return ret; | ||
2607 | } | 2689 | } |
2608 | 2690 | ||
2609 | static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | 2691 | static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) |
2610 | { | 2692 | { |
2611 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2693 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2612 | u64 msr; | 2694 | u64 msr; |
2613 | int ret, idx; | 2695 | int ret; |
2614 | 2696 | ||
2615 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); | 2697 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); |
2616 | idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
2617 | if (!init_rmode(vmx->vcpu.kvm)) { | 2698 | if (!init_rmode(vmx->vcpu.kvm)) { |
2618 | ret = -ENOMEM; | 2699 | ret = -ENOMEM; |
2619 | goto out; | 2700 | goto out; |
@@ -2630,7 +2711,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2630 | msr |= MSR_IA32_APICBASE_BSP; | 2711 | msr |= MSR_IA32_APICBASE_BSP; |
2631 | kvm_set_apic_base(&vmx->vcpu, msr); | 2712 | kvm_set_apic_base(&vmx->vcpu, msr); |
2632 | 2713 | ||
2633 | fx_init(&vmx->vcpu); | 2714 | ret = fx_init(&vmx->vcpu); |
2715 | if (ret != 0) | ||
2716 | goto out; | ||
2634 | 2717 | ||
2635 | seg_setup(VCPU_SREG_CS); | 2718 | seg_setup(VCPU_SREG_CS); |
2636 | /* | 2719 | /* |
@@ -2713,7 +2796,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2713 | vmx_fpu_activate(&vmx->vcpu); | 2796 | vmx_fpu_activate(&vmx->vcpu); |
2714 | update_exception_bitmap(&vmx->vcpu); | 2797 | update_exception_bitmap(&vmx->vcpu); |
2715 | 2798 | ||
2716 | vpid_sync_vcpu_all(vmx); | 2799 | vpid_sync_context(vmx); |
2717 | 2800 | ||
2718 | ret = 0; | 2801 | ret = 0; |
2719 | 2802 | ||
@@ -2721,7 +2804,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2721 | vmx->emulation_required = 0; | 2804 | vmx->emulation_required = 0; |
2722 | 2805 | ||
2723 | out: | 2806 | out: |
2724 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | ||
2725 | return ret; | 2807 | return ret; |
2726 | } | 2808 | } |
2727 | 2809 | ||
@@ -2826,9 +2908,7 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) | |||
2826 | { | 2908 | { |
2827 | if (!cpu_has_virtual_nmis()) | 2909 | if (!cpu_has_virtual_nmis()) |
2828 | return to_vmx(vcpu)->soft_vnmi_blocked; | 2910 | return to_vmx(vcpu)->soft_vnmi_blocked; |
2829 | else | 2911 | return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; |
2830 | return !!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | ||
2831 | GUEST_INTR_STATE_NMI); | ||
2832 | } | 2912 | } |
2833 | 2913 | ||
2834 | static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | 2914 | static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) |
@@ -3070,7 +3150,7 @@ static int handle_io(struct kvm_vcpu *vcpu) | |||
3070 | ++vcpu->stat.io_exits; | 3150 | ++vcpu->stat.io_exits; |
3071 | 3151 | ||
3072 | if (string || in) | 3152 | if (string || in) |
3073 | return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO); | 3153 | return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; |
3074 | 3154 | ||
3075 | port = exit_qualification >> 16; | 3155 | port = exit_qualification >> 16; |
3076 | size = (exit_qualification & 7) + 1; | 3156 | size = (exit_qualification & 7) + 1; |
@@ -3090,11 +3170,20 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) | |||
3090 | hypercall[2] = 0xc1; | 3170 | hypercall[2] = 0xc1; |
3091 | } | 3171 | } |
3092 | 3172 | ||
3173 | static void complete_insn_gp(struct kvm_vcpu *vcpu, int err) | ||
3174 | { | ||
3175 | if (err) | ||
3176 | kvm_inject_gp(vcpu, 0); | ||
3177 | else | ||
3178 | skip_emulated_instruction(vcpu); | ||
3179 | } | ||
3180 | |||
3093 | static int handle_cr(struct kvm_vcpu *vcpu) | 3181 | static int handle_cr(struct kvm_vcpu *vcpu) |
3094 | { | 3182 | { |
3095 | unsigned long exit_qualification, val; | 3183 | unsigned long exit_qualification, val; |
3096 | int cr; | 3184 | int cr; |
3097 | int reg; | 3185 | int reg; |
3186 | int err; | ||
3098 | 3187 | ||
3099 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 3188 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
3100 | cr = exit_qualification & 15; | 3189 | cr = exit_qualification & 15; |
@@ -3105,16 +3194,16 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
3105 | trace_kvm_cr_write(cr, val); | 3194 | trace_kvm_cr_write(cr, val); |
3106 | switch (cr) { | 3195 | switch (cr) { |
3107 | case 0: | 3196 | case 0: |
3108 | kvm_set_cr0(vcpu, val); | 3197 | err = kvm_set_cr0(vcpu, val); |
3109 | skip_emulated_instruction(vcpu); | 3198 | complete_insn_gp(vcpu, err); |
3110 | return 1; | 3199 | return 1; |
3111 | case 3: | 3200 | case 3: |
3112 | kvm_set_cr3(vcpu, val); | 3201 | err = kvm_set_cr3(vcpu, val); |
3113 | skip_emulated_instruction(vcpu); | 3202 | complete_insn_gp(vcpu, err); |
3114 | return 1; | 3203 | return 1; |
3115 | case 4: | 3204 | case 4: |
3116 | kvm_set_cr4(vcpu, val); | 3205 | err = kvm_set_cr4(vcpu, val); |
3117 | skip_emulated_instruction(vcpu); | 3206 | complete_insn_gp(vcpu, err); |
3118 | return 1; | 3207 | return 1; |
3119 | case 8: { | 3208 | case 8: { |
3120 | u8 cr8_prev = kvm_get_cr8(vcpu); | 3209 | u8 cr8_prev = kvm_get_cr8(vcpu); |
@@ -3321,30 +3410,25 @@ static int handle_invlpg(struct kvm_vcpu *vcpu) | |||
3321 | static int handle_wbinvd(struct kvm_vcpu *vcpu) | 3410 | static int handle_wbinvd(struct kvm_vcpu *vcpu) |
3322 | { | 3411 | { |
3323 | skip_emulated_instruction(vcpu); | 3412 | skip_emulated_instruction(vcpu); |
3324 | /* TODO: Add support for VT-d/pass-through device */ | 3413 | kvm_emulate_wbinvd(vcpu); |
3325 | return 1; | 3414 | return 1; |
3326 | } | 3415 | } |
3327 | 3416 | ||
3328 | static int handle_apic_access(struct kvm_vcpu *vcpu) | 3417 | static int handle_xsetbv(struct kvm_vcpu *vcpu) |
3329 | { | 3418 | { |
3330 | unsigned long exit_qualification; | 3419 | u64 new_bv = kvm_read_edx_eax(vcpu); |
3331 | enum emulation_result er; | 3420 | u32 index = kvm_register_read(vcpu, VCPU_REGS_RCX); |
3332 | unsigned long offset; | ||
3333 | 3421 | ||
3334 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 3422 | if (kvm_set_xcr(vcpu, index, new_bv) == 0) |
3335 | offset = exit_qualification & 0xffful; | 3423 | skip_emulated_instruction(vcpu); |
3336 | |||
3337 | er = emulate_instruction(vcpu, 0, 0, 0); | ||
3338 | |||
3339 | if (er != EMULATE_DONE) { | ||
3340 | printk(KERN_ERR | ||
3341 | "Fail to handle apic access vmexit! Offset is 0x%lx\n", | ||
3342 | offset); | ||
3343 | return -ENOEXEC; | ||
3344 | } | ||
3345 | return 1; | 3424 | return 1; |
3346 | } | 3425 | } |
3347 | 3426 | ||
3427 | static int handle_apic_access(struct kvm_vcpu *vcpu) | ||
3428 | { | ||
3429 | return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; | ||
3430 | } | ||
3431 | |||
3348 | static int handle_task_switch(struct kvm_vcpu *vcpu) | 3432 | static int handle_task_switch(struct kvm_vcpu *vcpu) |
3349 | { | 3433 | { |
3350 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3434 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
@@ -3554,13 +3638,8 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
3554 | goto out; | 3638 | goto out; |
3555 | } | 3639 | } |
3556 | 3640 | ||
3557 | if (err != EMULATE_DONE) { | 3641 | if (err != EMULATE_DONE) |
3558 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | 3642 | return 0; |
3559 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | ||
3560 | vcpu->run->internal.ndata = 0; | ||
3561 | ret = 0; | ||
3562 | goto out; | ||
3563 | } | ||
3564 | 3643 | ||
3565 | if (signal_pending(current)) | 3644 | if (signal_pending(current)) |
3566 | goto out; | 3645 | goto out; |
@@ -3623,6 +3702,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
3623 | [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, | 3702 | [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, |
3624 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, | 3703 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, |
3625 | [EXIT_REASON_WBINVD] = handle_wbinvd, | 3704 | [EXIT_REASON_WBINVD] = handle_wbinvd, |
3705 | [EXIT_REASON_XSETBV] = handle_xsetbv, | ||
3626 | [EXIT_REASON_TASK_SWITCH] = handle_task_switch, | 3706 | [EXIT_REASON_TASK_SWITCH] = handle_task_switch, |
3627 | [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, | 3707 | [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, |
3628 | [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, | 3708 | [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, |
@@ -3656,6 +3736,13 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
3656 | if (enable_ept && is_paging(vcpu)) | 3736 | if (enable_ept && is_paging(vcpu)) |
3657 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); | 3737 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); |
3658 | 3738 | ||
3739 | if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { | ||
3740 | vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; | ||
3741 | vcpu->run->fail_entry.hardware_entry_failure_reason | ||
3742 | = exit_reason; | ||
3743 | return 0; | ||
3744 | } | ||
3745 | |||
3659 | if (unlikely(vmx->fail)) { | 3746 | if (unlikely(vmx->fail)) { |
3660 | vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 3747 | vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
3661 | vcpu->run->fail_entry.hardware_entry_failure_reason | 3748 | vcpu->run->fail_entry.hardware_entry_failure_reason |
@@ -3861,11 +3948,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
3861 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | 3948 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) |
3862 | vmx_set_interrupt_shadow(vcpu, 0); | 3949 | vmx_set_interrupt_shadow(vcpu, 0); |
3863 | 3950 | ||
3864 | /* | ||
3865 | * Loading guest fpu may have cleared host cr0.ts | ||
3866 | */ | ||
3867 | vmcs_writel(HOST_CR0, read_cr0()); | ||
3868 | |||
3869 | asm( | 3951 | asm( |
3870 | /* Store host registers */ | 3952 | /* Store host registers */ |
3871 | "push %%"R"dx; push %%"R"bp;" | 3953 | "push %%"R"dx; push %%"R"bp;" |
@@ -4001,6 +4083,19 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) | |||
4001 | kmem_cache_free(kvm_vcpu_cache, vmx); | 4083 | kmem_cache_free(kvm_vcpu_cache, vmx); |
4002 | } | 4084 | } |
4003 | 4085 | ||
4086 | static inline void vmcs_init(struct vmcs *vmcs) | ||
4087 | { | ||
4088 | u64 phys_addr = __pa(per_cpu(vmxarea, raw_smp_processor_id())); | ||
4089 | |||
4090 | if (!vmm_exclusive) | ||
4091 | kvm_cpu_vmxon(phys_addr); | ||
4092 | |||
4093 | vmcs_clear(vmcs); | ||
4094 | |||
4095 | if (!vmm_exclusive) | ||
4096 | kvm_cpu_vmxoff(); | ||
4097 | } | ||
4098 | |||
4004 | static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | 4099 | static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) |
4005 | { | 4100 | { |
4006 | int err; | 4101 | int err; |
@@ -4026,7 +4121,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
4026 | if (!vmx->vmcs) | 4121 | if (!vmx->vmcs) |
4027 | goto free_msrs; | 4122 | goto free_msrs; |
4028 | 4123 | ||
4029 | vmcs_clear(vmx->vmcs); | 4124 | vmcs_init(vmx->vmcs); |
4030 | 4125 | ||
4031 | cpu = get_cpu(); | 4126 | cpu = get_cpu(); |
4032 | vmx_vcpu_load(&vmx->vcpu, cpu); | 4127 | vmx_vcpu_load(&vmx->vcpu, cpu); |
@@ -4265,6 +4360,8 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
4265 | .rdtscp_supported = vmx_rdtscp_supported, | 4360 | .rdtscp_supported = vmx_rdtscp_supported, |
4266 | 4361 | ||
4267 | .set_supported_cpuid = vmx_set_supported_cpuid, | 4362 | .set_supported_cpuid = vmx_set_supported_cpuid, |
4363 | |||
4364 | .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, | ||
4268 | }; | 4365 | }; |
4269 | 4366 | ||
4270 | static int __init vmx_init(void) | 4367 | static int __init vmx_init(void) |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7fa89c39c64f..97aab036dabf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -6,6 +6,7 @@ | |||
6 | * Copyright (C) 2006 Qumranet, Inc. | 6 | * Copyright (C) 2006 Qumranet, Inc. |
7 | * Copyright (C) 2008 Qumranet, Inc. | 7 | * Copyright (C) 2008 Qumranet, Inc. |
8 | * Copyright IBM Corporation, 2008 | 8 | * Copyright IBM Corporation, 2008 |
9 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | ||
9 | * | 10 | * |
10 | * Authors: | 11 | * Authors: |
11 | * Avi Kivity <avi@qumranet.com> | 12 | * Avi Kivity <avi@qumranet.com> |
@@ -41,17 +42,19 @@ | |||
41 | #include <linux/srcu.h> | 42 | #include <linux/srcu.h> |
42 | #include <linux/slab.h> | 43 | #include <linux/slab.h> |
43 | #include <linux/perf_event.h> | 44 | #include <linux/perf_event.h> |
45 | #include <linux/uaccess.h> | ||
44 | #include <trace/events/kvm.h> | 46 | #include <trace/events/kvm.h> |
45 | 47 | ||
46 | #define CREATE_TRACE_POINTS | 48 | #define CREATE_TRACE_POINTS |
47 | #include "trace.h" | 49 | #include "trace.h" |
48 | 50 | ||
49 | #include <asm/debugreg.h> | 51 | #include <asm/debugreg.h> |
50 | #include <asm/uaccess.h> | ||
51 | #include <asm/msr.h> | 52 | #include <asm/msr.h> |
52 | #include <asm/desc.h> | 53 | #include <asm/desc.h> |
53 | #include <asm/mtrr.h> | 54 | #include <asm/mtrr.h> |
54 | #include <asm/mce.h> | 55 | #include <asm/mce.h> |
56 | #include <asm/i387.h> | ||
57 | #include <asm/xcr.h> | ||
55 | 58 | ||
56 | #define MAX_IO_MSRS 256 | 59 | #define MAX_IO_MSRS 256 |
57 | #define CR0_RESERVED_BITS \ | 60 | #define CR0_RESERVED_BITS \ |
@@ -62,6 +65,7 @@ | |||
62 | (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | 65 | (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ |
63 | | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | 66 | | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ |
64 | | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ | 67 | | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ |
68 | | X86_CR4_OSXSAVE \ | ||
65 | | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) | 69 | | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) |
66 | 70 | ||
67 | #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) | 71 | #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) |
@@ -147,6 +151,13 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
147 | { NULL } | 151 | { NULL } |
148 | }; | 152 | }; |
149 | 153 | ||
154 | u64 __read_mostly host_xcr0; | ||
155 | |||
156 | static inline u32 bit(int bitno) | ||
157 | { | ||
158 | return 1 << (bitno & 31); | ||
159 | } | ||
160 | |||
150 | static void kvm_on_user_return(struct user_return_notifier *urn) | 161 | static void kvm_on_user_return(struct user_return_notifier *urn) |
151 | { | 162 | { |
152 | unsigned slot; | 163 | unsigned slot; |
@@ -285,7 +296,7 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, | |||
285 | prev_nr = vcpu->arch.exception.nr; | 296 | prev_nr = vcpu->arch.exception.nr; |
286 | if (prev_nr == DF_VECTOR) { | 297 | if (prev_nr == DF_VECTOR) { |
287 | /* triple fault -> shutdown */ | 298 | /* triple fault -> shutdown */ |
288 | set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); | 299 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); |
289 | return; | 300 | return; |
290 | } | 301 | } |
291 | class1 = exception_class(prev_nr); | 302 | class1 = exception_class(prev_nr); |
@@ -414,121 +425,163 @@ out: | |||
414 | return changed; | 425 | return changed; |
415 | } | 426 | } |
416 | 427 | ||
417 | void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | 428 | int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) |
418 | { | 429 | { |
430 | unsigned long old_cr0 = kvm_read_cr0(vcpu); | ||
431 | unsigned long update_bits = X86_CR0_PG | X86_CR0_WP | | ||
432 | X86_CR0_CD | X86_CR0_NW; | ||
433 | |||
419 | cr0 |= X86_CR0_ET; | 434 | cr0 |= X86_CR0_ET; |
420 | 435 | ||
421 | #ifdef CONFIG_X86_64 | 436 | #ifdef CONFIG_X86_64 |
422 | if (cr0 & 0xffffffff00000000UL) { | 437 | if (cr0 & 0xffffffff00000000UL) |
423 | kvm_inject_gp(vcpu, 0); | 438 | return 1; |
424 | return; | ||
425 | } | ||
426 | #endif | 439 | #endif |
427 | 440 | ||
428 | cr0 &= ~CR0_RESERVED_BITS; | 441 | cr0 &= ~CR0_RESERVED_BITS; |
429 | 442 | ||
430 | if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { | 443 | if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) |
431 | kvm_inject_gp(vcpu, 0); | 444 | return 1; |
432 | return; | ||
433 | } | ||
434 | 445 | ||
435 | if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) { | 446 | if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) |
436 | kvm_inject_gp(vcpu, 0); | 447 | return 1; |
437 | return; | ||
438 | } | ||
439 | 448 | ||
440 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { | 449 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { |
441 | #ifdef CONFIG_X86_64 | 450 | #ifdef CONFIG_X86_64 |
442 | if ((vcpu->arch.efer & EFER_LME)) { | 451 | if ((vcpu->arch.efer & EFER_LME)) { |
443 | int cs_db, cs_l; | 452 | int cs_db, cs_l; |
444 | 453 | ||
445 | if (!is_pae(vcpu)) { | 454 | if (!is_pae(vcpu)) |
446 | kvm_inject_gp(vcpu, 0); | 455 | return 1; |
447 | return; | ||
448 | } | ||
449 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | 456 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
450 | if (cs_l) { | 457 | if (cs_l) |
451 | kvm_inject_gp(vcpu, 0); | 458 | return 1; |
452 | return; | ||
453 | |||
454 | } | ||
455 | } else | 459 | } else |
456 | #endif | 460 | #endif |
457 | if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { | 461 | if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) |
458 | kvm_inject_gp(vcpu, 0); | 462 | return 1; |
459 | return; | ||
460 | } | ||
461 | |||
462 | } | 463 | } |
463 | 464 | ||
464 | kvm_x86_ops->set_cr0(vcpu, cr0); | 465 | kvm_x86_ops->set_cr0(vcpu, cr0); |
465 | 466 | ||
466 | kvm_mmu_reset_context(vcpu); | 467 | if ((cr0 ^ old_cr0) & update_bits) |
467 | return; | 468 | kvm_mmu_reset_context(vcpu); |
469 | return 0; | ||
468 | } | 470 | } |
469 | EXPORT_SYMBOL_GPL(kvm_set_cr0); | 471 | EXPORT_SYMBOL_GPL(kvm_set_cr0); |
470 | 472 | ||
471 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) | 473 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) |
472 | { | 474 | { |
473 | kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f)); | 475 | (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f)); |
474 | } | 476 | } |
475 | EXPORT_SYMBOL_GPL(kvm_lmsw); | 477 | EXPORT_SYMBOL_GPL(kvm_lmsw); |
476 | 478 | ||
477 | void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 479 | int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) |
478 | { | 480 | { |
479 | unsigned long old_cr4 = kvm_read_cr4(vcpu); | 481 | u64 xcr0; |
480 | unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; | ||
481 | 482 | ||
482 | if (cr4 & CR4_RESERVED_BITS) { | 483 | /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */ |
484 | if (index != XCR_XFEATURE_ENABLED_MASK) | ||
485 | return 1; | ||
486 | xcr0 = xcr; | ||
487 | if (kvm_x86_ops->get_cpl(vcpu) != 0) | ||
488 | return 1; | ||
489 | if (!(xcr0 & XSTATE_FP)) | ||
490 | return 1; | ||
491 | if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE)) | ||
492 | return 1; | ||
493 | if (xcr0 & ~host_xcr0) | ||
494 | return 1; | ||
495 | vcpu->arch.xcr0 = xcr0; | ||
496 | vcpu->guest_xcr0_loaded = 0; | ||
497 | return 0; | ||
498 | } | ||
499 | |||
500 | int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) | ||
501 | { | ||
502 | if (__kvm_set_xcr(vcpu, index, xcr)) { | ||
483 | kvm_inject_gp(vcpu, 0); | 503 | kvm_inject_gp(vcpu, 0); |
504 | return 1; | ||
505 | } | ||
506 | return 0; | ||
507 | } | ||
508 | EXPORT_SYMBOL_GPL(kvm_set_xcr); | ||
509 | |||
510 | static bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu) | ||
511 | { | ||
512 | struct kvm_cpuid_entry2 *best; | ||
513 | |||
514 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | ||
515 | return best && (best->ecx & bit(X86_FEATURE_XSAVE)); | ||
516 | } | ||
517 | |||
518 | static void update_cpuid(struct kvm_vcpu *vcpu) | ||
519 | { | ||
520 | struct kvm_cpuid_entry2 *best; | ||
521 | |||
522 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | ||
523 | if (!best) | ||
484 | return; | 524 | return; |
525 | |||
526 | /* Update OSXSAVE bit */ | ||
527 | if (cpu_has_xsave && best->function == 0x1) { | ||
528 | best->ecx &= ~(bit(X86_FEATURE_OSXSAVE)); | ||
529 | if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) | ||
530 | best->ecx |= bit(X86_FEATURE_OSXSAVE); | ||
485 | } | 531 | } |
532 | } | ||
533 | |||
534 | int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | ||
535 | { | ||
536 | unsigned long old_cr4 = kvm_read_cr4(vcpu); | ||
537 | unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; | ||
538 | |||
539 | if (cr4 & CR4_RESERVED_BITS) | ||
540 | return 1; | ||
541 | |||
542 | if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE)) | ||
543 | return 1; | ||
486 | 544 | ||
487 | if (is_long_mode(vcpu)) { | 545 | if (is_long_mode(vcpu)) { |
488 | if (!(cr4 & X86_CR4_PAE)) { | 546 | if (!(cr4 & X86_CR4_PAE)) |
489 | kvm_inject_gp(vcpu, 0); | 547 | return 1; |
490 | return; | ||
491 | } | ||
492 | } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) | 548 | } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) |
493 | && ((cr4 ^ old_cr4) & pdptr_bits) | 549 | && ((cr4 ^ old_cr4) & pdptr_bits) |
494 | && !load_pdptrs(vcpu, vcpu->arch.cr3)) { | 550 | && !load_pdptrs(vcpu, vcpu->arch.cr3)) |
495 | kvm_inject_gp(vcpu, 0); | 551 | return 1; |
496 | return; | 552 | |
497 | } | 553 | if (cr4 & X86_CR4_VMXE) |
554 | return 1; | ||
498 | 555 | ||
499 | if (cr4 & X86_CR4_VMXE) { | ||
500 | kvm_inject_gp(vcpu, 0); | ||
501 | return; | ||
502 | } | ||
503 | kvm_x86_ops->set_cr4(vcpu, cr4); | 556 | kvm_x86_ops->set_cr4(vcpu, cr4); |
504 | vcpu->arch.cr4 = cr4; | 557 | |
505 | kvm_mmu_reset_context(vcpu); | 558 | if ((cr4 ^ old_cr4) & pdptr_bits) |
559 | kvm_mmu_reset_context(vcpu); | ||
560 | |||
561 | if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) | ||
562 | update_cpuid(vcpu); | ||
563 | |||
564 | return 0; | ||
506 | } | 565 | } |
507 | EXPORT_SYMBOL_GPL(kvm_set_cr4); | 566 | EXPORT_SYMBOL_GPL(kvm_set_cr4); |
508 | 567 | ||
509 | void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | 568 | int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) |
510 | { | 569 | { |
511 | if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) { | 570 | if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) { |
512 | kvm_mmu_sync_roots(vcpu); | 571 | kvm_mmu_sync_roots(vcpu); |
513 | kvm_mmu_flush_tlb(vcpu); | 572 | kvm_mmu_flush_tlb(vcpu); |
514 | return; | 573 | return 0; |
515 | } | 574 | } |
516 | 575 | ||
517 | if (is_long_mode(vcpu)) { | 576 | if (is_long_mode(vcpu)) { |
518 | if (cr3 & CR3_L_MODE_RESERVED_BITS) { | 577 | if (cr3 & CR3_L_MODE_RESERVED_BITS) |
519 | kvm_inject_gp(vcpu, 0); | 578 | return 1; |
520 | return; | ||
521 | } | ||
522 | } else { | 579 | } else { |
523 | if (is_pae(vcpu)) { | 580 | if (is_pae(vcpu)) { |
524 | if (cr3 & CR3_PAE_RESERVED_BITS) { | 581 | if (cr3 & CR3_PAE_RESERVED_BITS) |
525 | kvm_inject_gp(vcpu, 0); | 582 | return 1; |
526 | return; | 583 | if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) |
527 | } | 584 | return 1; |
528 | if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) { | ||
529 | kvm_inject_gp(vcpu, 0); | ||
530 | return; | ||
531 | } | ||
532 | } | 585 | } |
533 | /* | 586 | /* |
534 | * We don't check reserved bits in nonpae mode, because | 587 | * We don't check reserved bits in nonpae mode, because |
@@ -546,24 +599,28 @@ void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
546 | * to debug) behavior on the guest side. | 599 | * to debug) behavior on the guest side. |
547 | */ | 600 | */ |
548 | if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) | 601 | if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) |
549 | kvm_inject_gp(vcpu, 0); | 602 | return 1; |
550 | else { | 603 | vcpu->arch.cr3 = cr3; |
551 | vcpu->arch.cr3 = cr3; | 604 | vcpu->arch.mmu.new_cr3(vcpu); |
552 | vcpu->arch.mmu.new_cr3(vcpu); | 605 | return 0; |
553 | } | ||
554 | } | 606 | } |
555 | EXPORT_SYMBOL_GPL(kvm_set_cr3); | 607 | EXPORT_SYMBOL_GPL(kvm_set_cr3); |
556 | 608 | ||
557 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) | 609 | int __kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) |
558 | { | 610 | { |
559 | if (cr8 & CR8_RESERVED_BITS) { | 611 | if (cr8 & CR8_RESERVED_BITS) |
560 | kvm_inject_gp(vcpu, 0); | 612 | return 1; |
561 | return; | ||
562 | } | ||
563 | if (irqchip_in_kernel(vcpu->kvm)) | 613 | if (irqchip_in_kernel(vcpu->kvm)) |
564 | kvm_lapic_set_tpr(vcpu, cr8); | 614 | kvm_lapic_set_tpr(vcpu, cr8); |
565 | else | 615 | else |
566 | vcpu->arch.cr8 = cr8; | 616 | vcpu->arch.cr8 = cr8; |
617 | return 0; | ||
618 | } | ||
619 | |||
620 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) | ||
621 | { | ||
622 | if (__kvm_set_cr8(vcpu, cr8)) | ||
623 | kvm_inject_gp(vcpu, 0); | ||
567 | } | 624 | } |
568 | EXPORT_SYMBOL_GPL(kvm_set_cr8); | 625 | EXPORT_SYMBOL_GPL(kvm_set_cr8); |
569 | 626 | ||
@@ -576,7 +633,7 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) | |||
576 | } | 633 | } |
577 | EXPORT_SYMBOL_GPL(kvm_get_cr8); | 634 | EXPORT_SYMBOL_GPL(kvm_get_cr8); |
578 | 635 | ||
579 | int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) | 636 | static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) |
580 | { | 637 | { |
581 | switch (dr) { | 638 | switch (dr) { |
582 | case 0 ... 3: | 639 | case 0 ... 3: |
@@ -585,29 +642,21 @@ int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) | |||
585 | vcpu->arch.eff_db[dr] = val; | 642 | vcpu->arch.eff_db[dr] = val; |
586 | break; | 643 | break; |
587 | case 4: | 644 | case 4: |
588 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | 645 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) |
589 | kvm_queue_exception(vcpu, UD_VECTOR); | 646 | return 1; /* #UD */ |
590 | return 1; | ||
591 | } | ||
592 | /* fall through */ | 647 | /* fall through */ |
593 | case 6: | 648 | case 6: |
594 | if (val & 0xffffffff00000000ULL) { | 649 | if (val & 0xffffffff00000000ULL) |
595 | kvm_inject_gp(vcpu, 0); | 650 | return -1; /* #GP */ |
596 | return 1; | ||
597 | } | ||
598 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; | 651 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; |
599 | break; | 652 | break; |
600 | case 5: | 653 | case 5: |
601 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | 654 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) |
602 | kvm_queue_exception(vcpu, UD_VECTOR); | 655 | return 1; /* #UD */ |
603 | return 1; | ||
604 | } | ||
605 | /* fall through */ | 656 | /* fall through */ |
606 | default: /* 7 */ | 657 | default: /* 7 */ |
607 | if (val & 0xffffffff00000000ULL) { | 658 | if (val & 0xffffffff00000000ULL) |
608 | kvm_inject_gp(vcpu, 0); | 659 | return -1; /* #GP */ |
609 | return 1; | ||
610 | } | ||
611 | vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; | 660 | vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; |
612 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { | 661 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { |
613 | kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7); | 662 | kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7); |
@@ -618,28 +667,37 @@ int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) | |||
618 | 667 | ||
619 | return 0; | 668 | return 0; |
620 | } | 669 | } |
670 | |||
671 | int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) | ||
672 | { | ||
673 | int res; | ||
674 | |||
675 | res = __kvm_set_dr(vcpu, dr, val); | ||
676 | if (res > 0) | ||
677 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
678 | else if (res < 0) | ||
679 | kvm_inject_gp(vcpu, 0); | ||
680 | |||
681 | return res; | ||
682 | } | ||
621 | EXPORT_SYMBOL_GPL(kvm_set_dr); | 683 | EXPORT_SYMBOL_GPL(kvm_set_dr); |
622 | 684 | ||
623 | int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) | 685 | static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) |
624 | { | 686 | { |
625 | switch (dr) { | 687 | switch (dr) { |
626 | case 0 ... 3: | 688 | case 0 ... 3: |
627 | *val = vcpu->arch.db[dr]; | 689 | *val = vcpu->arch.db[dr]; |
628 | break; | 690 | break; |
629 | case 4: | 691 | case 4: |
630 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | 692 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) |
631 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
632 | return 1; | 693 | return 1; |
633 | } | ||
634 | /* fall through */ | 694 | /* fall through */ |
635 | case 6: | 695 | case 6: |
636 | *val = vcpu->arch.dr6; | 696 | *val = vcpu->arch.dr6; |
637 | break; | 697 | break; |
638 | case 5: | 698 | case 5: |
639 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | 699 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) |
640 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
641 | return 1; | 700 | return 1; |
642 | } | ||
643 | /* fall through */ | 701 | /* fall through */ |
644 | default: /* 7 */ | 702 | default: /* 7 */ |
645 | *val = vcpu->arch.dr7; | 703 | *val = vcpu->arch.dr7; |
@@ -648,12 +706,16 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) | |||
648 | 706 | ||
649 | return 0; | 707 | return 0; |
650 | } | 708 | } |
651 | EXPORT_SYMBOL_GPL(kvm_get_dr); | ||
652 | 709 | ||
653 | static inline u32 bit(int bitno) | 710 | int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) |
654 | { | 711 | { |
655 | return 1 << (bitno & 31); | 712 | if (_kvm_get_dr(vcpu, dr, val)) { |
713 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
714 | return 1; | ||
715 | } | ||
716 | return 0; | ||
656 | } | 717 | } |
718 | EXPORT_SYMBOL_GPL(kvm_get_dr); | ||
657 | 719 | ||
658 | /* | 720 | /* |
659 | * List of msr numbers which we expose to userspace through KVM_GET_MSRS | 721 | * List of msr numbers which we expose to userspace through KVM_GET_MSRS |
@@ -682,10 +744,14 @@ static unsigned num_msrs_to_save; | |||
682 | 744 | ||
683 | static u32 emulated_msrs[] = { | 745 | static u32 emulated_msrs[] = { |
684 | MSR_IA32_MISC_ENABLE, | 746 | MSR_IA32_MISC_ENABLE, |
747 | MSR_IA32_MCG_STATUS, | ||
748 | MSR_IA32_MCG_CTL, | ||
685 | }; | 749 | }; |
686 | 750 | ||
687 | static int set_efer(struct kvm_vcpu *vcpu, u64 efer) | 751 | static int set_efer(struct kvm_vcpu *vcpu, u64 efer) |
688 | { | 752 | { |
753 | u64 old_efer = vcpu->arch.efer; | ||
754 | |||
689 | if (efer & efer_reserved_bits) | 755 | if (efer & efer_reserved_bits) |
690 | return 1; | 756 | return 1; |
691 | 757 | ||
@@ -714,11 +780,13 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
714 | 780 | ||
715 | kvm_x86_ops->set_efer(vcpu, efer); | 781 | kvm_x86_ops->set_efer(vcpu, efer); |
716 | 782 | ||
717 | vcpu->arch.efer = efer; | ||
718 | |||
719 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; | 783 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; |
720 | kvm_mmu_reset_context(vcpu); | 784 | kvm_mmu_reset_context(vcpu); |
721 | 785 | ||
786 | /* Update reserved bits */ | ||
787 | if ((efer ^ old_efer) & EFER_NX) | ||
788 | kvm_mmu_reset_context(vcpu); | ||
789 | |||
722 | return 0; | 790 | return 0; |
723 | } | 791 | } |
724 | 792 | ||
@@ -882,7 +950,7 @@ static int kvm_request_guest_time_update(struct kvm_vcpu *v) | |||
882 | 950 | ||
883 | if (!vcpu->time_page) | 951 | if (!vcpu->time_page) |
884 | return 0; | 952 | return 0; |
885 | set_bit(KVM_REQ_KVMCLOCK_UPDATE, &v->requests); | 953 | kvm_make_request(KVM_REQ_KVMCLOCK_UPDATE, v); |
886 | return 1; | 954 | return 1; |
887 | } | 955 | } |
888 | 956 | ||
@@ -1524,16 +1592,12 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs, | |||
1524 | { | 1592 | { |
1525 | int i, idx; | 1593 | int i, idx; |
1526 | 1594 | ||
1527 | vcpu_load(vcpu); | ||
1528 | |||
1529 | idx = srcu_read_lock(&vcpu->kvm->srcu); | 1595 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
1530 | for (i = 0; i < msrs->nmsrs; ++i) | 1596 | for (i = 0; i < msrs->nmsrs; ++i) |
1531 | if (do_msr(vcpu, entries[i].index, &entries[i].data)) | 1597 | if (do_msr(vcpu, entries[i].index, &entries[i].data)) |
1532 | break; | 1598 | break; |
1533 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | 1599 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
1534 | 1600 | ||
1535 | vcpu_put(vcpu); | ||
1536 | |||
1537 | return i; | 1601 | return i; |
1538 | } | 1602 | } |
1539 | 1603 | ||
@@ -1618,6 +1682,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1618 | case KVM_CAP_PCI_SEGMENT: | 1682 | case KVM_CAP_PCI_SEGMENT: |
1619 | case KVM_CAP_DEBUGREGS: | 1683 | case KVM_CAP_DEBUGREGS: |
1620 | case KVM_CAP_X86_ROBUST_SINGLESTEP: | 1684 | case KVM_CAP_X86_ROBUST_SINGLESTEP: |
1685 | case KVM_CAP_XSAVE: | ||
1621 | r = 1; | 1686 | r = 1; |
1622 | break; | 1687 | break; |
1623 | case KVM_CAP_COALESCED_MMIO: | 1688 | case KVM_CAP_COALESCED_MMIO: |
@@ -1641,6 +1706,9 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1641 | case KVM_CAP_MCE: | 1706 | case KVM_CAP_MCE: |
1642 | r = KVM_MAX_MCE_BANKS; | 1707 | r = KVM_MAX_MCE_BANKS; |
1643 | break; | 1708 | break; |
1709 | case KVM_CAP_XCRS: | ||
1710 | r = cpu_has_xsave; | ||
1711 | break; | ||
1644 | default: | 1712 | default: |
1645 | r = 0; | 1713 | r = 0; |
1646 | break; | 1714 | break; |
@@ -1717,8 +1785,28 @@ out: | |||
1717 | return r; | 1785 | return r; |
1718 | } | 1786 | } |
1719 | 1787 | ||
1788 | static void wbinvd_ipi(void *garbage) | ||
1789 | { | ||
1790 | wbinvd(); | ||
1791 | } | ||
1792 | |||
1793 | static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu) | ||
1794 | { | ||
1795 | return vcpu->kvm->arch.iommu_domain && | ||
1796 | !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY); | ||
1797 | } | ||
1798 | |||
1720 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 1799 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
1721 | { | 1800 | { |
1801 | /* Address WBINVD may be executed by guest */ | ||
1802 | if (need_emulate_wbinvd(vcpu)) { | ||
1803 | if (kvm_x86_ops->has_wbinvd_exit()) | ||
1804 | cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask); | ||
1805 | else if (vcpu->cpu != -1 && vcpu->cpu != cpu) | ||
1806 | smp_call_function_single(vcpu->cpu, | ||
1807 | wbinvd_ipi, NULL, 1); | ||
1808 | } | ||
1809 | |||
1722 | kvm_x86_ops->vcpu_load(vcpu, cpu); | 1810 | kvm_x86_ops->vcpu_load(vcpu, cpu); |
1723 | if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) { | 1811 | if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) { |
1724 | unsigned long khz = cpufreq_quick_get(cpu); | 1812 | unsigned long khz = cpufreq_quick_get(cpu); |
@@ -1731,8 +1819,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
1731 | 1819 | ||
1732 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | 1820 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) |
1733 | { | 1821 | { |
1734 | kvm_put_guest_fpu(vcpu); | ||
1735 | kvm_x86_ops->vcpu_put(vcpu); | 1822 | kvm_x86_ops->vcpu_put(vcpu); |
1823 | kvm_put_guest_fpu(vcpu); | ||
1736 | } | 1824 | } |
1737 | 1825 | ||
1738 | static int is_efer_nx(void) | 1826 | static int is_efer_nx(void) |
@@ -1781,7 +1869,6 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | |||
1781 | if (copy_from_user(cpuid_entries, entries, | 1869 | if (copy_from_user(cpuid_entries, entries, |
1782 | cpuid->nent * sizeof(struct kvm_cpuid_entry))) | 1870 | cpuid->nent * sizeof(struct kvm_cpuid_entry))) |
1783 | goto out_free; | 1871 | goto out_free; |
1784 | vcpu_load(vcpu); | ||
1785 | for (i = 0; i < cpuid->nent; i++) { | 1872 | for (i = 0; i < cpuid->nent; i++) { |
1786 | vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function; | 1873 | vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function; |
1787 | vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax; | 1874 | vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax; |
@@ -1799,7 +1886,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | |||
1799 | r = 0; | 1886 | r = 0; |
1800 | kvm_apic_set_version(vcpu); | 1887 | kvm_apic_set_version(vcpu); |
1801 | kvm_x86_ops->cpuid_update(vcpu); | 1888 | kvm_x86_ops->cpuid_update(vcpu); |
1802 | vcpu_put(vcpu); | 1889 | update_cpuid(vcpu); |
1803 | 1890 | ||
1804 | out_free: | 1891 | out_free: |
1805 | vfree(cpuid_entries); | 1892 | vfree(cpuid_entries); |
@@ -1820,11 +1907,10 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, | |||
1820 | if (copy_from_user(&vcpu->arch.cpuid_entries, entries, | 1907 | if (copy_from_user(&vcpu->arch.cpuid_entries, entries, |
1821 | cpuid->nent * sizeof(struct kvm_cpuid_entry2))) | 1908 | cpuid->nent * sizeof(struct kvm_cpuid_entry2))) |
1822 | goto out; | 1909 | goto out; |
1823 | vcpu_load(vcpu); | ||
1824 | vcpu->arch.cpuid_nent = cpuid->nent; | 1910 | vcpu->arch.cpuid_nent = cpuid->nent; |
1825 | kvm_apic_set_version(vcpu); | 1911 | kvm_apic_set_version(vcpu); |
1826 | kvm_x86_ops->cpuid_update(vcpu); | 1912 | kvm_x86_ops->cpuid_update(vcpu); |
1827 | vcpu_put(vcpu); | 1913 | update_cpuid(vcpu); |
1828 | return 0; | 1914 | return 0; |
1829 | 1915 | ||
1830 | out: | 1916 | out: |
@@ -1837,7 +1923,6 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, | |||
1837 | { | 1923 | { |
1838 | int r; | 1924 | int r; |
1839 | 1925 | ||
1840 | vcpu_load(vcpu); | ||
1841 | r = -E2BIG; | 1926 | r = -E2BIG; |
1842 | if (cpuid->nent < vcpu->arch.cpuid_nent) | 1927 | if (cpuid->nent < vcpu->arch.cpuid_nent) |
1843 | goto out; | 1928 | goto out; |
@@ -1849,7 +1934,6 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, | |||
1849 | 1934 | ||
1850 | out: | 1935 | out: |
1851 | cpuid->nent = vcpu->arch.cpuid_nent; | 1936 | cpuid->nent = vcpu->arch.cpuid_nent; |
1852 | vcpu_put(vcpu); | ||
1853 | return r; | 1937 | return r; |
1854 | } | 1938 | } |
1855 | 1939 | ||
@@ -1901,13 +1985,13 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
1901 | 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); | 1985 | 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); |
1902 | /* cpuid 1.ecx */ | 1986 | /* cpuid 1.ecx */ |
1903 | const u32 kvm_supported_word4_x86_features = | 1987 | const u32 kvm_supported_word4_x86_features = |
1904 | F(XMM3) | 0 /* Reserved, DTES64, MONITOR */ | | 1988 | F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ | |
1905 | 0 /* DS-CPL, VMX, SMX, EST */ | | 1989 | 0 /* DS-CPL, VMX, SMX, EST */ | |
1906 | 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | | 1990 | 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | |
1907 | 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ | | 1991 | 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ | |
1908 | 0 /* Reserved, DCA */ | F(XMM4_1) | | 1992 | 0 /* Reserved, DCA */ | F(XMM4_1) | |
1909 | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | | 1993 | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | |
1910 | 0 /* Reserved, XSAVE, OSXSAVE */; | 1994 | 0 /* Reserved, AES */ | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX); |
1911 | /* cpuid 0x80000001.ecx */ | 1995 | /* cpuid 0x80000001.ecx */ |
1912 | const u32 kvm_supported_word6_x86_features = | 1996 | const u32 kvm_supported_word6_x86_features = |
1913 | F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ | | 1997 | F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ | |
@@ -1922,7 +2006,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
1922 | 2006 | ||
1923 | switch (function) { | 2007 | switch (function) { |
1924 | case 0: | 2008 | case 0: |
1925 | entry->eax = min(entry->eax, (u32)0xb); | 2009 | entry->eax = min(entry->eax, (u32)0xd); |
1926 | break; | 2010 | break; |
1927 | case 1: | 2011 | case 1: |
1928 | entry->edx &= kvm_supported_word0_x86_features; | 2012 | entry->edx &= kvm_supported_word0_x86_features; |
@@ -1980,6 +2064,20 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
1980 | } | 2064 | } |
1981 | break; | 2065 | break; |
1982 | } | 2066 | } |
2067 | case 0xd: { | ||
2068 | int i; | ||
2069 | |||
2070 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
2071 | for (i = 1; *nent < maxnent; ++i) { | ||
2072 | if (entry[i - 1].eax == 0 && i != 2) | ||
2073 | break; | ||
2074 | do_cpuid_1_ent(&entry[i], function, i); | ||
2075 | entry[i].flags |= | ||
2076 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | ||
2077 | ++*nent; | ||
2078 | } | ||
2079 | break; | ||
2080 | } | ||
1983 | case KVM_CPUID_SIGNATURE: { | 2081 | case KVM_CPUID_SIGNATURE: { |
1984 | char signature[12] = "KVMKVMKVM\0\0"; | 2082 | char signature[12] = "KVMKVMKVM\0\0"; |
1985 | u32 *sigptr = (u32 *)signature; | 2083 | u32 *sigptr = (u32 *)signature; |
@@ -2081,9 +2179,7 @@ out: | |||
2081 | static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, | 2179 | static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, |
2082 | struct kvm_lapic_state *s) | 2180 | struct kvm_lapic_state *s) |
2083 | { | 2181 | { |
2084 | vcpu_load(vcpu); | ||
2085 | memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); | 2182 | memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); |
2086 | vcpu_put(vcpu); | ||
2087 | 2183 | ||
2088 | return 0; | 2184 | return 0; |
2089 | } | 2185 | } |
@@ -2091,11 +2187,9 @@ static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, | |||
2091 | static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, | 2187 | static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, |
2092 | struct kvm_lapic_state *s) | 2188 | struct kvm_lapic_state *s) |
2093 | { | 2189 | { |
2094 | vcpu_load(vcpu); | ||
2095 | memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s); | 2190 | memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s); |
2096 | kvm_apic_post_state_restore(vcpu); | 2191 | kvm_apic_post_state_restore(vcpu); |
2097 | update_cr8_intercept(vcpu); | 2192 | update_cr8_intercept(vcpu); |
2098 | vcpu_put(vcpu); | ||
2099 | 2193 | ||
2100 | return 0; | 2194 | return 0; |
2101 | } | 2195 | } |
@@ -2107,20 +2201,15 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, | |||
2107 | return -EINVAL; | 2201 | return -EINVAL; |
2108 | if (irqchip_in_kernel(vcpu->kvm)) | 2202 | if (irqchip_in_kernel(vcpu->kvm)) |
2109 | return -ENXIO; | 2203 | return -ENXIO; |
2110 | vcpu_load(vcpu); | ||
2111 | 2204 | ||
2112 | kvm_queue_interrupt(vcpu, irq->irq, false); | 2205 | kvm_queue_interrupt(vcpu, irq->irq, false); |
2113 | 2206 | ||
2114 | vcpu_put(vcpu); | ||
2115 | |||
2116 | return 0; | 2207 | return 0; |
2117 | } | 2208 | } |
2118 | 2209 | ||
2119 | static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu) | 2210 | static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu) |
2120 | { | 2211 | { |
2121 | vcpu_load(vcpu); | ||
2122 | kvm_inject_nmi(vcpu); | 2212 | kvm_inject_nmi(vcpu); |
2123 | vcpu_put(vcpu); | ||
2124 | 2213 | ||
2125 | return 0; | 2214 | return 0; |
2126 | } | 2215 | } |
@@ -2140,7 +2229,6 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, | |||
2140 | int r; | 2229 | int r; |
2141 | unsigned bank_num = mcg_cap & 0xff, bank; | 2230 | unsigned bank_num = mcg_cap & 0xff, bank; |
2142 | 2231 | ||
2143 | vcpu_load(vcpu); | ||
2144 | r = -EINVAL; | 2232 | r = -EINVAL; |
2145 | if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) | 2233 | if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) |
2146 | goto out; | 2234 | goto out; |
@@ -2155,7 +2243,6 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, | |||
2155 | for (bank = 0; bank < bank_num; bank++) | 2243 | for (bank = 0; bank < bank_num; bank++) |
2156 | vcpu->arch.mce_banks[bank*4] = ~(u64)0; | 2244 | vcpu->arch.mce_banks[bank*4] = ~(u64)0; |
2157 | out: | 2245 | out: |
2158 | vcpu_put(vcpu); | ||
2159 | return r; | 2246 | return r; |
2160 | } | 2247 | } |
2161 | 2248 | ||
@@ -2188,7 +2275,7 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, | |||
2188 | printk(KERN_DEBUG "kvm: set_mce: " | 2275 | printk(KERN_DEBUG "kvm: set_mce: " |
2189 | "injects mce exception while " | 2276 | "injects mce exception while " |
2190 | "previous one is in progress!\n"); | 2277 | "previous one is in progress!\n"); |
2191 | set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); | 2278 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); |
2192 | return 0; | 2279 | return 0; |
2193 | } | 2280 | } |
2194 | if (banks[1] & MCI_STATUS_VAL) | 2281 | if (banks[1] & MCI_STATUS_VAL) |
@@ -2213,8 +2300,6 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, | |||
2213 | static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | 2300 | static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, |
2214 | struct kvm_vcpu_events *events) | 2301 | struct kvm_vcpu_events *events) |
2215 | { | 2302 | { |
2216 | vcpu_load(vcpu); | ||
2217 | |||
2218 | events->exception.injected = | 2303 | events->exception.injected = |
2219 | vcpu->arch.exception.pending && | 2304 | vcpu->arch.exception.pending && |
2220 | !kvm_exception_is_soft(vcpu->arch.exception.nr); | 2305 | !kvm_exception_is_soft(vcpu->arch.exception.nr); |
@@ -2239,8 +2324,6 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | |||
2239 | events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING | 2324 | events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING |
2240 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR | 2325 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR |
2241 | | KVM_VCPUEVENT_VALID_SHADOW); | 2326 | | KVM_VCPUEVENT_VALID_SHADOW); |
2242 | |||
2243 | vcpu_put(vcpu); | ||
2244 | } | 2327 | } |
2245 | 2328 | ||
2246 | static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | 2329 | static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, |
@@ -2251,8 +2334,6 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
2251 | | KVM_VCPUEVENT_VALID_SHADOW)) | 2334 | | KVM_VCPUEVENT_VALID_SHADOW)) |
2252 | return -EINVAL; | 2335 | return -EINVAL; |
2253 | 2336 | ||
2254 | vcpu_load(vcpu); | ||
2255 | |||
2256 | vcpu->arch.exception.pending = events->exception.injected; | 2337 | vcpu->arch.exception.pending = events->exception.injected; |
2257 | vcpu->arch.exception.nr = events->exception.nr; | 2338 | vcpu->arch.exception.nr = events->exception.nr; |
2258 | vcpu->arch.exception.has_error_code = events->exception.has_error_code; | 2339 | vcpu->arch.exception.has_error_code = events->exception.has_error_code; |
@@ -2275,22 +2356,16 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
2275 | if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR) | 2356 | if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR) |
2276 | vcpu->arch.sipi_vector = events->sipi_vector; | 2357 | vcpu->arch.sipi_vector = events->sipi_vector; |
2277 | 2358 | ||
2278 | vcpu_put(vcpu); | ||
2279 | |||
2280 | return 0; | 2359 | return 0; |
2281 | } | 2360 | } |
2282 | 2361 | ||
2283 | static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, | 2362 | static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, |
2284 | struct kvm_debugregs *dbgregs) | 2363 | struct kvm_debugregs *dbgregs) |
2285 | { | 2364 | { |
2286 | vcpu_load(vcpu); | ||
2287 | |||
2288 | memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); | 2365 | memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); |
2289 | dbgregs->dr6 = vcpu->arch.dr6; | 2366 | dbgregs->dr6 = vcpu->arch.dr6; |
2290 | dbgregs->dr7 = vcpu->arch.dr7; | 2367 | dbgregs->dr7 = vcpu->arch.dr7; |
2291 | dbgregs->flags = 0; | 2368 | dbgregs->flags = 0; |
2292 | |||
2293 | vcpu_put(vcpu); | ||
2294 | } | 2369 | } |
2295 | 2370 | ||
2296 | static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, | 2371 | static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, |
@@ -2299,40 +2374,113 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, | |||
2299 | if (dbgregs->flags) | 2374 | if (dbgregs->flags) |
2300 | return -EINVAL; | 2375 | return -EINVAL; |
2301 | 2376 | ||
2302 | vcpu_load(vcpu); | ||
2303 | |||
2304 | memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); | 2377 | memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); |
2305 | vcpu->arch.dr6 = dbgregs->dr6; | 2378 | vcpu->arch.dr6 = dbgregs->dr6; |
2306 | vcpu->arch.dr7 = dbgregs->dr7; | 2379 | vcpu->arch.dr7 = dbgregs->dr7; |
2307 | 2380 | ||
2308 | vcpu_put(vcpu); | 2381 | return 0; |
2382 | } | ||
2383 | |||
2384 | static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, | ||
2385 | struct kvm_xsave *guest_xsave) | ||
2386 | { | ||
2387 | if (cpu_has_xsave) | ||
2388 | memcpy(guest_xsave->region, | ||
2389 | &vcpu->arch.guest_fpu.state->xsave, | ||
2390 | sizeof(struct xsave_struct)); | ||
2391 | else { | ||
2392 | memcpy(guest_xsave->region, | ||
2393 | &vcpu->arch.guest_fpu.state->fxsave, | ||
2394 | sizeof(struct i387_fxsave_struct)); | ||
2395 | *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] = | ||
2396 | XSTATE_FPSSE; | ||
2397 | } | ||
2398 | } | ||
2399 | |||
2400 | static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, | ||
2401 | struct kvm_xsave *guest_xsave) | ||
2402 | { | ||
2403 | u64 xstate_bv = | ||
2404 | *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)]; | ||
2309 | 2405 | ||
2406 | if (cpu_has_xsave) | ||
2407 | memcpy(&vcpu->arch.guest_fpu.state->xsave, | ||
2408 | guest_xsave->region, sizeof(struct xsave_struct)); | ||
2409 | else { | ||
2410 | if (xstate_bv & ~XSTATE_FPSSE) | ||
2411 | return -EINVAL; | ||
2412 | memcpy(&vcpu->arch.guest_fpu.state->fxsave, | ||
2413 | guest_xsave->region, sizeof(struct i387_fxsave_struct)); | ||
2414 | } | ||
2310 | return 0; | 2415 | return 0; |
2311 | } | 2416 | } |
2312 | 2417 | ||
2418 | static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu, | ||
2419 | struct kvm_xcrs *guest_xcrs) | ||
2420 | { | ||
2421 | if (!cpu_has_xsave) { | ||
2422 | guest_xcrs->nr_xcrs = 0; | ||
2423 | return; | ||
2424 | } | ||
2425 | |||
2426 | guest_xcrs->nr_xcrs = 1; | ||
2427 | guest_xcrs->flags = 0; | ||
2428 | guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK; | ||
2429 | guest_xcrs->xcrs[0].value = vcpu->arch.xcr0; | ||
2430 | } | ||
2431 | |||
2432 | static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu, | ||
2433 | struct kvm_xcrs *guest_xcrs) | ||
2434 | { | ||
2435 | int i, r = 0; | ||
2436 | |||
2437 | if (!cpu_has_xsave) | ||
2438 | return -EINVAL; | ||
2439 | |||
2440 | if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags) | ||
2441 | return -EINVAL; | ||
2442 | |||
2443 | for (i = 0; i < guest_xcrs->nr_xcrs; i++) | ||
2444 | /* Only support XCR0 currently */ | ||
2445 | if (guest_xcrs->xcrs[0].xcr == XCR_XFEATURE_ENABLED_MASK) { | ||
2446 | r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK, | ||
2447 | guest_xcrs->xcrs[0].value); | ||
2448 | break; | ||
2449 | } | ||
2450 | if (r) | ||
2451 | r = -EINVAL; | ||
2452 | return r; | ||
2453 | } | ||
2454 | |||
2313 | long kvm_arch_vcpu_ioctl(struct file *filp, | 2455 | long kvm_arch_vcpu_ioctl(struct file *filp, |
2314 | unsigned int ioctl, unsigned long arg) | 2456 | unsigned int ioctl, unsigned long arg) |
2315 | { | 2457 | { |
2316 | struct kvm_vcpu *vcpu = filp->private_data; | 2458 | struct kvm_vcpu *vcpu = filp->private_data; |
2317 | void __user *argp = (void __user *)arg; | 2459 | void __user *argp = (void __user *)arg; |
2318 | int r; | 2460 | int r; |
2319 | struct kvm_lapic_state *lapic = NULL; | 2461 | union { |
2462 | struct kvm_lapic_state *lapic; | ||
2463 | struct kvm_xsave *xsave; | ||
2464 | struct kvm_xcrs *xcrs; | ||
2465 | void *buffer; | ||
2466 | } u; | ||
2320 | 2467 | ||
2468 | u.buffer = NULL; | ||
2321 | switch (ioctl) { | 2469 | switch (ioctl) { |
2322 | case KVM_GET_LAPIC: { | 2470 | case KVM_GET_LAPIC: { |
2323 | r = -EINVAL; | 2471 | r = -EINVAL; |
2324 | if (!vcpu->arch.apic) | 2472 | if (!vcpu->arch.apic) |
2325 | goto out; | 2473 | goto out; |
2326 | lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); | 2474 | u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); |
2327 | 2475 | ||
2328 | r = -ENOMEM; | 2476 | r = -ENOMEM; |
2329 | if (!lapic) | 2477 | if (!u.lapic) |
2330 | goto out; | 2478 | goto out; |
2331 | r = kvm_vcpu_ioctl_get_lapic(vcpu, lapic); | 2479 | r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic); |
2332 | if (r) | 2480 | if (r) |
2333 | goto out; | 2481 | goto out; |
2334 | r = -EFAULT; | 2482 | r = -EFAULT; |
2335 | if (copy_to_user(argp, lapic, sizeof(struct kvm_lapic_state))) | 2483 | if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state))) |
2336 | goto out; | 2484 | goto out; |
2337 | r = 0; | 2485 | r = 0; |
2338 | break; | 2486 | break; |
@@ -2341,14 +2489,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
2341 | r = -EINVAL; | 2489 | r = -EINVAL; |
2342 | if (!vcpu->arch.apic) | 2490 | if (!vcpu->arch.apic) |
2343 | goto out; | 2491 | goto out; |
2344 | lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); | 2492 | u.lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); |
2345 | r = -ENOMEM; | 2493 | r = -ENOMEM; |
2346 | if (!lapic) | 2494 | if (!u.lapic) |
2347 | goto out; | 2495 | goto out; |
2348 | r = -EFAULT; | 2496 | r = -EFAULT; |
2349 | if (copy_from_user(lapic, argp, sizeof(struct kvm_lapic_state))) | 2497 | if (copy_from_user(u.lapic, argp, sizeof(struct kvm_lapic_state))) |
2350 | goto out; | 2498 | goto out; |
2351 | r = kvm_vcpu_ioctl_set_lapic(vcpu, lapic); | 2499 | r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic); |
2352 | if (r) | 2500 | if (r) |
2353 | goto out; | 2501 | goto out; |
2354 | r = 0; | 2502 | r = 0; |
@@ -2464,9 +2612,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
2464 | r = -EFAULT; | 2612 | r = -EFAULT; |
2465 | if (copy_from_user(&mce, argp, sizeof mce)) | 2613 | if (copy_from_user(&mce, argp, sizeof mce)) |
2466 | goto out; | 2614 | goto out; |
2467 | vcpu_load(vcpu); | ||
2468 | r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); | 2615 | r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); |
2469 | vcpu_put(vcpu); | ||
2470 | break; | 2616 | break; |
2471 | } | 2617 | } |
2472 | case KVM_GET_VCPU_EVENTS: { | 2618 | case KVM_GET_VCPU_EVENTS: { |
@@ -2513,11 +2659,67 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
2513 | r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs); | 2659 | r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs); |
2514 | break; | 2660 | break; |
2515 | } | 2661 | } |
2662 | case KVM_GET_XSAVE: { | ||
2663 | u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL); | ||
2664 | r = -ENOMEM; | ||
2665 | if (!u.xsave) | ||
2666 | break; | ||
2667 | |||
2668 | kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave); | ||
2669 | |||
2670 | r = -EFAULT; | ||
2671 | if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave))) | ||
2672 | break; | ||
2673 | r = 0; | ||
2674 | break; | ||
2675 | } | ||
2676 | case KVM_SET_XSAVE: { | ||
2677 | u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL); | ||
2678 | r = -ENOMEM; | ||
2679 | if (!u.xsave) | ||
2680 | break; | ||
2681 | |||
2682 | r = -EFAULT; | ||
2683 | if (copy_from_user(u.xsave, argp, sizeof(struct kvm_xsave))) | ||
2684 | break; | ||
2685 | |||
2686 | r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave); | ||
2687 | break; | ||
2688 | } | ||
2689 | case KVM_GET_XCRS: { | ||
2690 | u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL); | ||
2691 | r = -ENOMEM; | ||
2692 | if (!u.xcrs) | ||
2693 | break; | ||
2694 | |||
2695 | kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs); | ||
2696 | |||
2697 | r = -EFAULT; | ||
2698 | if (copy_to_user(argp, u.xcrs, | ||
2699 | sizeof(struct kvm_xcrs))) | ||
2700 | break; | ||
2701 | r = 0; | ||
2702 | break; | ||
2703 | } | ||
2704 | case KVM_SET_XCRS: { | ||
2705 | u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL); | ||
2706 | r = -ENOMEM; | ||
2707 | if (!u.xcrs) | ||
2708 | break; | ||
2709 | |||
2710 | r = -EFAULT; | ||
2711 | if (copy_from_user(u.xcrs, argp, | ||
2712 | sizeof(struct kvm_xcrs))) | ||
2713 | break; | ||
2714 | |||
2715 | r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs); | ||
2716 | break; | ||
2717 | } | ||
2516 | default: | 2718 | default: |
2517 | r = -EINVAL; | 2719 | r = -EINVAL; |
2518 | } | 2720 | } |
2519 | out: | 2721 | out: |
2520 | kfree(lapic); | 2722 | kfree(u.buffer); |
2521 | return r; | 2723 | return r; |
2522 | } | 2724 | } |
2523 | 2725 | ||
@@ -2560,115 +2762,6 @@ static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm) | |||
2560 | return kvm->arch.n_alloc_mmu_pages; | 2762 | return kvm->arch.n_alloc_mmu_pages; |
2561 | } | 2763 | } |
2562 | 2764 | ||
2563 | gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn) | ||
2564 | { | ||
2565 | int i; | ||
2566 | struct kvm_mem_alias *alias; | ||
2567 | struct kvm_mem_aliases *aliases; | ||
2568 | |||
2569 | aliases = kvm_aliases(kvm); | ||
2570 | |||
2571 | for (i = 0; i < aliases->naliases; ++i) { | ||
2572 | alias = &aliases->aliases[i]; | ||
2573 | if (alias->flags & KVM_ALIAS_INVALID) | ||
2574 | continue; | ||
2575 | if (gfn >= alias->base_gfn | ||
2576 | && gfn < alias->base_gfn + alias->npages) | ||
2577 | return alias->target_gfn + gfn - alias->base_gfn; | ||
2578 | } | ||
2579 | return gfn; | ||
2580 | } | ||
2581 | |||
2582 | gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) | ||
2583 | { | ||
2584 | int i; | ||
2585 | struct kvm_mem_alias *alias; | ||
2586 | struct kvm_mem_aliases *aliases; | ||
2587 | |||
2588 | aliases = kvm_aliases(kvm); | ||
2589 | |||
2590 | for (i = 0; i < aliases->naliases; ++i) { | ||
2591 | alias = &aliases->aliases[i]; | ||
2592 | if (gfn >= alias->base_gfn | ||
2593 | && gfn < alias->base_gfn + alias->npages) | ||
2594 | return alias->target_gfn + gfn - alias->base_gfn; | ||
2595 | } | ||
2596 | return gfn; | ||
2597 | } | ||
2598 | |||
2599 | /* | ||
2600 | * Set a new alias region. Aliases map a portion of physical memory into | ||
2601 | * another portion. This is useful for memory windows, for example the PC | ||
2602 | * VGA region. | ||
2603 | */ | ||
2604 | static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, | ||
2605 | struct kvm_memory_alias *alias) | ||
2606 | { | ||
2607 | int r, n; | ||
2608 | struct kvm_mem_alias *p; | ||
2609 | struct kvm_mem_aliases *aliases, *old_aliases; | ||
2610 | |||
2611 | r = -EINVAL; | ||
2612 | /* General sanity checks */ | ||
2613 | if (alias->memory_size & (PAGE_SIZE - 1)) | ||
2614 | goto out; | ||
2615 | if (alias->guest_phys_addr & (PAGE_SIZE - 1)) | ||
2616 | goto out; | ||
2617 | if (alias->slot >= KVM_ALIAS_SLOTS) | ||
2618 | goto out; | ||
2619 | if (alias->guest_phys_addr + alias->memory_size | ||
2620 | < alias->guest_phys_addr) | ||
2621 | goto out; | ||
2622 | if (alias->target_phys_addr + alias->memory_size | ||
2623 | < alias->target_phys_addr) | ||
2624 | goto out; | ||
2625 | |||
2626 | r = -ENOMEM; | ||
2627 | aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); | ||
2628 | if (!aliases) | ||
2629 | goto out; | ||
2630 | |||
2631 | mutex_lock(&kvm->slots_lock); | ||
2632 | |||
2633 | /* invalidate any gfn reference in case of deletion/shrinking */ | ||
2634 | memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases)); | ||
2635 | aliases->aliases[alias->slot].flags |= KVM_ALIAS_INVALID; | ||
2636 | old_aliases = kvm->arch.aliases; | ||
2637 | rcu_assign_pointer(kvm->arch.aliases, aliases); | ||
2638 | synchronize_srcu_expedited(&kvm->srcu); | ||
2639 | kvm_mmu_zap_all(kvm); | ||
2640 | kfree(old_aliases); | ||
2641 | |||
2642 | r = -ENOMEM; | ||
2643 | aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); | ||
2644 | if (!aliases) | ||
2645 | goto out_unlock; | ||
2646 | |||
2647 | memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases)); | ||
2648 | |||
2649 | p = &aliases->aliases[alias->slot]; | ||
2650 | p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; | ||
2651 | p->npages = alias->memory_size >> PAGE_SHIFT; | ||
2652 | p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT; | ||
2653 | p->flags &= ~(KVM_ALIAS_INVALID); | ||
2654 | |||
2655 | for (n = KVM_ALIAS_SLOTS; n > 0; --n) | ||
2656 | if (aliases->aliases[n - 1].npages) | ||
2657 | break; | ||
2658 | aliases->naliases = n; | ||
2659 | |||
2660 | old_aliases = kvm->arch.aliases; | ||
2661 | rcu_assign_pointer(kvm->arch.aliases, aliases); | ||
2662 | synchronize_srcu_expedited(&kvm->srcu); | ||
2663 | kfree(old_aliases); | ||
2664 | r = 0; | ||
2665 | |||
2666 | out_unlock: | ||
2667 | mutex_unlock(&kvm->slots_lock); | ||
2668 | out: | ||
2669 | return r; | ||
2670 | } | ||
2671 | |||
2672 | static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) | 2765 | static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) |
2673 | { | 2766 | { |
2674 | int r; | 2767 | int r; |
@@ -2797,7 +2890,6 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
2797 | struct kvm_memory_slot *memslot; | 2890 | struct kvm_memory_slot *memslot; |
2798 | unsigned long n; | 2891 | unsigned long n; |
2799 | unsigned long is_dirty = 0; | 2892 | unsigned long is_dirty = 0; |
2800 | unsigned long *dirty_bitmap = NULL; | ||
2801 | 2893 | ||
2802 | mutex_lock(&kvm->slots_lock); | 2894 | mutex_lock(&kvm->slots_lock); |
2803 | 2895 | ||
@@ -2812,27 +2904,30 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
2812 | 2904 | ||
2813 | n = kvm_dirty_bitmap_bytes(memslot); | 2905 | n = kvm_dirty_bitmap_bytes(memslot); |
2814 | 2906 | ||
2815 | r = -ENOMEM; | ||
2816 | dirty_bitmap = vmalloc(n); | ||
2817 | if (!dirty_bitmap) | ||
2818 | goto out; | ||
2819 | memset(dirty_bitmap, 0, n); | ||
2820 | |||
2821 | for (i = 0; !is_dirty && i < n/sizeof(long); i++) | 2907 | for (i = 0; !is_dirty && i < n/sizeof(long); i++) |
2822 | is_dirty = memslot->dirty_bitmap[i]; | 2908 | is_dirty = memslot->dirty_bitmap[i]; |
2823 | 2909 | ||
2824 | /* If nothing is dirty, don't bother messing with page tables. */ | 2910 | /* If nothing is dirty, don't bother messing with page tables. */ |
2825 | if (is_dirty) { | 2911 | if (is_dirty) { |
2826 | struct kvm_memslots *slots, *old_slots; | 2912 | struct kvm_memslots *slots, *old_slots; |
2913 | unsigned long *dirty_bitmap; | ||
2827 | 2914 | ||
2828 | spin_lock(&kvm->mmu_lock); | 2915 | spin_lock(&kvm->mmu_lock); |
2829 | kvm_mmu_slot_remove_write_access(kvm, log->slot); | 2916 | kvm_mmu_slot_remove_write_access(kvm, log->slot); |
2830 | spin_unlock(&kvm->mmu_lock); | 2917 | spin_unlock(&kvm->mmu_lock); |
2831 | 2918 | ||
2832 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | 2919 | r = -ENOMEM; |
2833 | if (!slots) | 2920 | dirty_bitmap = vmalloc(n); |
2834 | goto out_free; | 2921 | if (!dirty_bitmap) |
2922 | goto out; | ||
2923 | memset(dirty_bitmap, 0, n); | ||
2835 | 2924 | ||
2925 | r = -ENOMEM; | ||
2926 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | ||
2927 | if (!slots) { | ||
2928 | vfree(dirty_bitmap); | ||
2929 | goto out; | ||
2930 | } | ||
2836 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); | 2931 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); |
2837 | slots->memslots[log->slot].dirty_bitmap = dirty_bitmap; | 2932 | slots->memslots[log->slot].dirty_bitmap = dirty_bitmap; |
2838 | 2933 | ||
@@ -2841,13 +2936,20 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
2841 | synchronize_srcu_expedited(&kvm->srcu); | 2936 | synchronize_srcu_expedited(&kvm->srcu); |
2842 | dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap; | 2937 | dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap; |
2843 | kfree(old_slots); | 2938 | kfree(old_slots); |
2939 | |||
2940 | r = -EFAULT; | ||
2941 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) { | ||
2942 | vfree(dirty_bitmap); | ||
2943 | goto out; | ||
2944 | } | ||
2945 | vfree(dirty_bitmap); | ||
2946 | } else { | ||
2947 | r = -EFAULT; | ||
2948 | if (clear_user(log->dirty_bitmap, n)) | ||
2949 | goto out; | ||
2844 | } | 2950 | } |
2845 | 2951 | ||
2846 | r = 0; | 2952 | r = 0; |
2847 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) | ||
2848 | r = -EFAULT; | ||
2849 | out_free: | ||
2850 | vfree(dirty_bitmap); | ||
2851 | out: | 2953 | out: |
2852 | mutex_unlock(&kvm->slots_lock); | 2954 | mutex_unlock(&kvm->slots_lock); |
2853 | return r; | 2955 | return r; |
@@ -2867,7 +2969,6 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2867 | union { | 2969 | union { |
2868 | struct kvm_pit_state ps; | 2970 | struct kvm_pit_state ps; |
2869 | struct kvm_pit_state2 ps2; | 2971 | struct kvm_pit_state2 ps2; |
2870 | struct kvm_memory_alias alias; | ||
2871 | struct kvm_pit_config pit_config; | 2972 | struct kvm_pit_config pit_config; |
2872 | } u; | 2973 | } u; |
2873 | 2974 | ||
@@ -2888,22 +2989,6 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2888 | goto out; | 2989 | goto out; |
2889 | break; | 2990 | break; |
2890 | } | 2991 | } |
2891 | case KVM_SET_MEMORY_REGION: { | ||
2892 | struct kvm_memory_region kvm_mem; | ||
2893 | struct kvm_userspace_memory_region kvm_userspace_mem; | ||
2894 | |||
2895 | r = -EFAULT; | ||
2896 | if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem)) | ||
2897 | goto out; | ||
2898 | kvm_userspace_mem.slot = kvm_mem.slot; | ||
2899 | kvm_userspace_mem.flags = kvm_mem.flags; | ||
2900 | kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr; | ||
2901 | kvm_userspace_mem.memory_size = kvm_mem.memory_size; | ||
2902 | r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0); | ||
2903 | if (r) | ||
2904 | goto out; | ||
2905 | break; | ||
2906 | } | ||
2907 | case KVM_SET_NR_MMU_PAGES: | 2992 | case KVM_SET_NR_MMU_PAGES: |
2908 | r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg); | 2993 | r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg); |
2909 | if (r) | 2994 | if (r) |
@@ -2912,14 +2997,6 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2912 | case KVM_GET_NR_MMU_PAGES: | 2997 | case KVM_GET_NR_MMU_PAGES: |
2913 | r = kvm_vm_ioctl_get_nr_mmu_pages(kvm); | 2998 | r = kvm_vm_ioctl_get_nr_mmu_pages(kvm); |
2914 | break; | 2999 | break; |
2915 | case KVM_SET_MEMORY_ALIAS: | ||
2916 | r = -EFAULT; | ||
2917 | if (copy_from_user(&u.alias, argp, sizeof(struct kvm_memory_alias))) | ||
2918 | goto out; | ||
2919 | r = kvm_vm_ioctl_set_memory_alias(kvm, &u.alias); | ||
2920 | if (r) | ||
2921 | goto out; | ||
2922 | break; | ||
2923 | case KVM_CREATE_IRQCHIP: { | 3000 | case KVM_CREATE_IRQCHIP: { |
2924 | struct kvm_pic *vpic; | 3001 | struct kvm_pic *vpic; |
2925 | 3002 | ||
@@ -3259,7 +3336,7 @@ static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, | |||
3259 | } | 3336 | } |
3260 | ret = kvm_read_guest(vcpu->kvm, gpa, data, toread); | 3337 | ret = kvm_read_guest(vcpu->kvm, gpa, data, toread); |
3261 | if (ret < 0) { | 3338 | if (ret < 0) { |
3262 | r = X86EMUL_UNHANDLEABLE; | 3339 | r = X86EMUL_IO_NEEDED; |
3263 | goto out; | 3340 | goto out; |
3264 | } | 3341 | } |
3265 | 3342 | ||
@@ -3315,7 +3392,7 @@ static int kvm_write_guest_virt_system(gva_t addr, void *val, | |||
3315 | } | 3392 | } |
3316 | ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite); | 3393 | ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite); |
3317 | if (ret < 0) { | 3394 | if (ret < 0) { |
3318 | r = X86EMUL_UNHANDLEABLE; | 3395 | r = X86EMUL_IO_NEEDED; |
3319 | goto out; | 3396 | goto out; |
3320 | } | 3397 | } |
3321 | 3398 | ||
@@ -3330,10 +3407,10 @@ out: | |||
3330 | static int emulator_read_emulated(unsigned long addr, | 3407 | static int emulator_read_emulated(unsigned long addr, |
3331 | void *val, | 3408 | void *val, |
3332 | unsigned int bytes, | 3409 | unsigned int bytes, |
3410 | unsigned int *error_code, | ||
3333 | struct kvm_vcpu *vcpu) | 3411 | struct kvm_vcpu *vcpu) |
3334 | { | 3412 | { |
3335 | gpa_t gpa; | 3413 | gpa_t gpa; |
3336 | u32 error_code; | ||
3337 | 3414 | ||
3338 | if (vcpu->mmio_read_completed) { | 3415 | if (vcpu->mmio_read_completed) { |
3339 | memcpy(val, vcpu->mmio_data, bytes); | 3416 | memcpy(val, vcpu->mmio_data, bytes); |
@@ -3343,12 +3420,10 @@ static int emulator_read_emulated(unsigned long addr, | |||
3343 | return X86EMUL_CONTINUE; | 3420 | return X86EMUL_CONTINUE; |
3344 | } | 3421 | } |
3345 | 3422 | ||
3346 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, &error_code); | 3423 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, error_code); |
3347 | 3424 | ||
3348 | if (gpa == UNMAPPED_GVA) { | 3425 | if (gpa == UNMAPPED_GVA) |
3349 | kvm_inject_page_fault(vcpu, addr, error_code); | ||
3350 | return X86EMUL_PROPAGATE_FAULT; | 3426 | return X86EMUL_PROPAGATE_FAULT; |
3351 | } | ||
3352 | 3427 | ||
3353 | /* For APIC access vmexit */ | 3428 | /* For APIC access vmexit */ |
3354 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | 3429 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) |
@@ -3370,11 +3445,12 @@ mmio: | |||
3370 | trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); | 3445 | trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); |
3371 | 3446 | ||
3372 | vcpu->mmio_needed = 1; | 3447 | vcpu->mmio_needed = 1; |
3373 | vcpu->mmio_phys_addr = gpa; | 3448 | vcpu->run->exit_reason = KVM_EXIT_MMIO; |
3374 | vcpu->mmio_size = bytes; | 3449 | vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; |
3375 | vcpu->mmio_is_write = 0; | 3450 | vcpu->run->mmio.len = vcpu->mmio_size = bytes; |
3451 | vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0; | ||
3376 | 3452 | ||
3377 | return X86EMUL_UNHANDLEABLE; | 3453 | return X86EMUL_IO_NEEDED; |
3378 | } | 3454 | } |
3379 | 3455 | ||
3380 | int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | 3456 | int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, |
@@ -3392,17 +3468,15 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3392 | static int emulator_write_emulated_onepage(unsigned long addr, | 3468 | static int emulator_write_emulated_onepage(unsigned long addr, |
3393 | const void *val, | 3469 | const void *val, |
3394 | unsigned int bytes, | 3470 | unsigned int bytes, |
3471 | unsigned int *error_code, | ||
3395 | struct kvm_vcpu *vcpu) | 3472 | struct kvm_vcpu *vcpu) |
3396 | { | 3473 | { |
3397 | gpa_t gpa; | 3474 | gpa_t gpa; |
3398 | u32 error_code; | ||
3399 | 3475 | ||
3400 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, &error_code); | 3476 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error_code); |
3401 | 3477 | ||
3402 | if (gpa == UNMAPPED_GVA) { | 3478 | if (gpa == UNMAPPED_GVA) |
3403 | kvm_inject_page_fault(vcpu, addr, error_code); | ||
3404 | return X86EMUL_PROPAGATE_FAULT; | 3479 | return X86EMUL_PROPAGATE_FAULT; |
3405 | } | ||
3406 | 3480 | ||
3407 | /* For APIC access vmexit */ | 3481 | /* For APIC access vmexit */ |
3408 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | 3482 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) |
@@ -3420,10 +3494,11 @@ mmio: | |||
3420 | return X86EMUL_CONTINUE; | 3494 | return X86EMUL_CONTINUE; |
3421 | 3495 | ||
3422 | vcpu->mmio_needed = 1; | 3496 | vcpu->mmio_needed = 1; |
3423 | vcpu->mmio_phys_addr = gpa; | 3497 | vcpu->run->exit_reason = KVM_EXIT_MMIO; |
3424 | vcpu->mmio_size = bytes; | 3498 | vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; |
3425 | vcpu->mmio_is_write = 1; | 3499 | vcpu->run->mmio.len = vcpu->mmio_size = bytes; |
3426 | memcpy(vcpu->mmio_data, val, bytes); | 3500 | vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1; |
3501 | memcpy(vcpu->run->mmio.data, val, bytes); | ||
3427 | 3502 | ||
3428 | return X86EMUL_CONTINUE; | 3503 | return X86EMUL_CONTINUE; |
3429 | } | 3504 | } |
@@ -3431,6 +3506,7 @@ mmio: | |||
3431 | int emulator_write_emulated(unsigned long addr, | 3506 | int emulator_write_emulated(unsigned long addr, |
3432 | const void *val, | 3507 | const void *val, |
3433 | unsigned int bytes, | 3508 | unsigned int bytes, |
3509 | unsigned int *error_code, | ||
3434 | struct kvm_vcpu *vcpu) | 3510 | struct kvm_vcpu *vcpu) |
3435 | { | 3511 | { |
3436 | /* Crossing a page boundary? */ | 3512 | /* Crossing a page boundary? */ |
@@ -3438,16 +3514,17 @@ int emulator_write_emulated(unsigned long addr, | |||
3438 | int rc, now; | 3514 | int rc, now; |
3439 | 3515 | ||
3440 | now = -addr & ~PAGE_MASK; | 3516 | now = -addr & ~PAGE_MASK; |
3441 | rc = emulator_write_emulated_onepage(addr, val, now, vcpu); | 3517 | rc = emulator_write_emulated_onepage(addr, val, now, error_code, |
3518 | vcpu); | ||
3442 | if (rc != X86EMUL_CONTINUE) | 3519 | if (rc != X86EMUL_CONTINUE) |
3443 | return rc; | 3520 | return rc; |
3444 | addr += now; | 3521 | addr += now; |
3445 | val += now; | 3522 | val += now; |
3446 | bytes -= now; | 3523 | bytes -= now; |
3447 | } | 3524 | } |
3448 | return emulator_write_emulated_onepage(addr, val, bytes, vcpu); | 3525 | return emulator_write_emulated_onepage(addr, val, bytes, error_code, |
3526 | vcpu); | ||
3449 | } | 3527 | } |
3450 | EXPORT_SYMBOL_GPL(emulator_write_emulated); | ||
3451 | 3528 | ||
3452 | #define CMPXCHG_TYPE(t, ptr, old, new) \ | 3529 | #define CMPXCHG_TYPE(t, ptr, old, new) \ |
3453 | (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old)) | 3530 | (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old)) |
@@ -3463,6 +3540,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
3463 | const void *old, | 3540 | const void *old, |
3464 | const void *new, | 3541 | const void *new, |
3465 | unsigned int bytes, | 3542 | unsigned int bytes, |
3543 | unsigned int *error_code, | ||
3466 | struct kvm_vcpu *vcpu) | 3544 | struct kvm_vcpu *vcpu) |
3467 | { | 3545 | { |
3468 | gpa_t gpa; | 3546 | gpa_t gpa; |
@@ -3484,6 +3562,10 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
3484 | goto emul_write; | 3562 | goto emul_write; |
3485 | 3563 | ||
3486 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 3564 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
3565 | if (is_error_page(page)) { | ||
3566 | kvm_release_page_clean(page); | ||
3567 | goto emul_write; | ||
3568 | } | ||
3487 | 3569 | ||
3488 | kaddr = kmap_atomic(page, KM_USER0); | 3570 | kaddr = kmap_atomic(page, KM_USER0); |
3489 | kaddr += offset_in_page(gpa); | 3571 | kaddr += offset_in_page(gpa); |
@@ -3516,7 +3598,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
3516 | emul_write: | 3598 | emul_write: |
3517 | printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); | 3599 | printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); |
3518 | 3600 | ||
3519 | return emulator_write_emulated(addr, new, bytes, vcpu); | 3601 | return emulator_write_emulated(addr, new, bytes, error_code, vcpu); |
3520 | } | 3602 | } |
3521 | 3603 | ||
3522 | static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) | 3604 | static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) |
@@ -3604,42 +3686,38 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) | |||
3604 | return X86EMUL_CONTINUE; | 3686 | return X86EMUL_CONTINUE; |
3605 | } | 3687 | } |
3606 | 3688 | ||
3607 | int emulate_clts(struct kvm_vcpu *vcpu) | 3689 | int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) |
3608 | { | 3690 | { |
3609 | kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); | 3691 | if (!need_emulate_wbinvd(vcpu)) |
3610 | kvm_x86_ops->fpu_activate(vcpu); | 3692 | return X86EMUL_CONTINUE; |
3693 | |||
3694 | if (kvm_x86_ops->has_wbinvd_exit()) { | ||
3695 | smp_call_function_many(vcpu->arch.wbinvd_dirty_mask, | ||
3696 | wbinvd_ipi, NULL, 1); | ||
3697 | cpumask_clear(vcpu->arch.wbinvd_dirty_mask); | ||
3698 | } | ||
3699 | wbinvd(); | ||
3611 | return X86EMUL_CONTINUE; | 3700 | return X86EMUL_CONTINUE; |
3612 | } | 3701 | } |
3702 | EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); | ||
3613 | 3703 | ||
3614 | int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) | 3704 | int emulate_clts(struct kvm_vcpu *vcpu) |
3615 | { | 3705 | { |
3616 | return kvm_get_dr(ctxt->vcpu, dr, dest); | 3706 | kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); |
3707 | kvm_x86_ops->fpu_activate(vcpu); | ||
3708 | return X86EMUL_CONTINUE; | ||
3617 | } | 3709 | } |
3618 | 3710 | ||
3619 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) | 3711 | int emulator_get_dr(int dr, unsigned long *dest, struct kvm_vcpu *vcpu) |
3620 | { | 3712 | { |
3621 | unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; | 3713 | return _kvm_get_dr(vcpu, dr, dest); |
3622 | |||
3623 | return kvm_set_dr(ctxt->vcpu, dr, value & mask); | ||
3624 | } | 3714 | } |
3625 | 3715 | ||
3626 | void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) | 3716 | int emulator_set_dr(int dr, unsigned long value, struct kvm_vcpu *vcpu) |
3627 | { | 3717 | { |
3628 | u8 opcodes[4]; | ||
3629 | unsigned long rip = kvm_rip_read(vcpu); | ||
3630 | unsigned long rip_linear; | ||
3631 | |||
3632 | if (!printk_ratelimit()) | ||
3633 | return; | ||
3634 | 3718 | ||
3635 | rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); | 3719 | return __kvm_set_dr(vcpu, dr, value); |
3636 | |||
3637 | kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu, NULL); | ||
3638 | |||
3639 | printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", | ||
3640 | context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); | ||
3641 | } | 3720 | } |
3642 | EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); | ||
3643 | 3721 | ||
3644 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) | 3722 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) |
3645 | { | 3723 | { |
@@ -3674,27 +3752,32 @@ static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) | |||
3674 | return value; | 3752 | return value; |
3675 | } | 3753 | } |
3676 | 3754 | ||
3677 | static void emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) | 3755 | static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) |
3678 | { | 3756 | { |
3757 | int res = 0; | ||
3758 | |||
3679 | switch (cr) { | 3759 | switch (cr) { |
3680 | case 0: | 3760 | case 0: |
3681 | kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); | 3761 | res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); |
3682 | break; | 3762 | break; |
3683 | case 2: | 3763 | case 2: |
3684 | vcpu->arch.cr2 = val; | 3764 | vcpu->arch.cr2 = val; |
3685 | break; | 3765 | break; |
3686 | case 3: | 3766 | case 3: |
3687 | kvm_set_cr3(vcpu, val); | 3767 | res = kvm_set_cr3(vcpu, val); |
3688 | break; | 3768 | break; |
3689 | case 4: | 3769 | case 4: |
3690 | kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); | 3770 | res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); |
3691 | break; | 3771 | break; |
3692 | case 8: | 3772 | case 8: |
3693 | kvm_set_cr8(vcpu, val & 0xfUL); | 3773 | res = __kvm_set_cr8(vcpu, val & 0xfUL); |
3694 | break; | 3774 | break; |
3695 | default: | 3775 | default: |
3696 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | 3776 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); |
3777 | res = -1; | ||
3697 | } | 3778 | } |
3779 | |||
3780 | return res; | ||
3698 | } | 3781 | } |
3699 | 3782 | ||
3700 | static int emulator_get_cpl(struct kvm_vcpu *vcpu) | 3783 | static int emulator_get_cpl(struct kvm_vcpu *vcpu) |
@@ -3707,6 +3790,12 @@ static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) | |||
3707 | kvm_x86_ops->get_gdt(vcpu, dt); | 3790 | kvm_x86_ops->get_gdt(vcpu, dt); |
3708 | } | 3791 | } |
3709 | 3792 | ||
3793 | static unsigned long emulator_get_cached_segment_base(int seg, | ||
3794 | struct kvm_vcpu *vcpu) | ||
3795 | { | ||
3796 | return get_segment_base(vcpu, seg); | ||
3797 | } | ||
3798 | |||
3710 | static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg, | 3799 | static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg, |
3711 | struct kvm_vcpu *vcpu) | 3800 | struct kvm_vcpu *vcpu) |
3712 | { | 3801 | { |
@@ -3779,11 +3868,6 @@ static void emulator_set_segment_selector(u16 sel, int seg, | |||
3779 | kvm_set_segment(vcpu, &kvm_seg, seg); | 3868 | kvm_set_segment(vcpu, &kvm_seg, seg); |
3780 | } | 3869 | } |
3781 | 3870 | ||
3782 | static void emulator_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | ||
3783 | { | ||
3784 | kvm_x86_ops->set_rflags(vcpu, rflags); | ||
3785 | } | ||
3786 | |||
3787 | static struct x86_emulate_ops emulate_ops = { | 3871 | static struct x86_emulate_ops emulate_ops = { |
3788 | .read_std = kvm_read_guest_virt_system, | 3872 | .read_std = kvm_read_guest_virt_system, |
3789 | .write_std = kvm_write_guest_virt_system, | 3873 | .write_std = kvm_write_guest_virt_system, |
@@ -3797,11 +3881,15 @@ static struct x86_emulate_ops emulate_ops = { | |||
3797 | .set_cached_descriptor = emulator_set_cached_descriptor, | 3881 | .set_cached_descriptor = emulator_set_cached_descriptor, |
3798 | .get_segment_selector = emulator_get_segment_selector, | 3882 | .get_segment_selector = emulator_get_segment_selector, |
3799 | .set_segment_selector = emulator_set_segment_selector, | 3883 | .set_segment_selector = emulator_set_segment_selector, |
3884 | .get_cached_segment_base = emulator_get_cached_segment_base, | ||
3800 | .get_gdt = emulator_get_gdt, | 3885 | .get_gdt = emulator_get_gdt, |
3801 | .get_cr = emulator_get_cr, | 3886 | .get_cr = emulator_get_cr, |
3802 | .set_cr = emulator_set_cr, | 3887 | .set_cr = emulator_set_cr, |
3803 | .cpl = emulator_get_cpl, | 3888 | .cpl = emulator_get_cpl, |
3804 | .set_rflags = emulator_set_rflags, | 3889 | .get_dr = emulator_get_dr, |
3890 | .set_dr = emulator_set_dr, | ||
3891 | .set_msr = kvm_set_msr, | ||
3892 | .get_msr = kvm_get_msr, | ||
3805 | }; | 3893 | }; |
3806 | 3894 | ||
3807 | static void cache_all_regs(struct kvm_vcpu *vcpu) | 3895 | static void cache_all_regs(struct kvm_vcpu *vcpu) |
@@ -3812,14 +3900,75 @@ static void cache_all_regs(struct kvm_vcpu *vcpu) | |||
3812 | vcpu->arch.regs_dirty = ~0; | 3900 | vcpu->arch.regs_dirty = ~0; |
3813 | } | 3901 | } |
3814 | 3902 | ||
3903 | static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) | ||
3904 | { | ||
3905 | u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask); | ||
3906 | /* | ||
3907 | * an sti; sti; sequence only disable interrupts for the first | ||
3908 | * instruction. So, if the last instruction, be it emulated or | ||
3909 | * not, left the system with the INT_STI flag enabled, it | ||
3910 | * means that the last instruction is an sti. We should not | ||
3911 | * leave the flag on in this case. The same goes for mov ss | ||
3912 | */ | ||
3913 | if (!(int_shadow & mask)) | ||
3914 | kvm_x86_ops->set_interrupt_shadow(vcpu, mask); | ||
3915 | } | ||
3916 | |||
3917 | static void inject_emulated_exception(struct kvm_vcpu *vcpu) | ||
3918 | { | ||
3919 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; | ||
3920 | if (ctxt->exception == PF_VECTOR) | ||
3921 | kvm_inject_page_fault(vcpu, ctxt->cr2, ctxt->error_code); | ||
3922 | else if (ctxt->error_code_valid) | ||
3923 | kvm_queue_exception_e(vcpu, ctxt->exception, ctxt->error_code); | ||
3924 | else | ||
3925 | kvm_queue_exception(vcpu, ctxt->exception); | ||
3926 | } | ||
3927 | |||
3928 | static int handle_emulation_failure(struct kvm_vcpu *vcpu) | ||
3929 | { | ||
3930 | ++vcpu->stat.insn_emulation_fail; | ||
3931 | trace_kvm_emulate_insn_failed(vcpu); | ||
3932 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
3933 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | ||
3934 | vcpu->run->internal.ndata = 0; | ||
3935 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
3936 | return EMULATE_FAIL; | ||
3937 | } | ||
3938 | |||
3939 | static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) | ||
3940 | { | ||
3941 | gpa_t gpa; | ||
3942 | |||
3943 | if (tdp_enabled) | ||
3944 | return false; | ||
3945 | |||
3946 | /* | ||
3947 | * if emulation was due to access to shadowed page table | ||
3948 | * and it failed try to unshadow page and re-entetr the | ||
3949 | * guest to let CPU execute the instruction. | ||
3950 | */ | ||
3951 | if (kvm_mmu_unprotect_page_virt(vcpu, gva)) | ||
3952 | return true; | ||
3953 | |||
3954 | gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL); | ||
3955 | |||
3956 | if (gpa == UNMAPPED_GVA) | ||
3957 | return true; /* let cpu generate fault */ | ||
3958 | |||
3959 | if (!kvm_is_error_hva(gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT))) | ||
3960 | return true; | ||
3961 | |||
3962 | return false; | ||
3963 | } | ||
3964 | |||
3815 | int emulate_instruction(struct kvm_vcpu *vcpu, | 3965 | int emulate_instruction(struct kvm_vcpu *vcpu, |
3816 | unsigned long cr2, | 3966 | unsigned long cr2, |
3817 | u16 error_code, | 3967 | u16 error_code, |
3818 | int emulation_type) | 3968 | int emulation_type) |
3819 | { | 3969 | { |
3820 | int r, shadow_mask; | 3970 | int r; |
3821 | struct decode_cache *c; | 3971 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; |
3822 | struct kvm_run *run = vcpu->run; | ||
3823 | 3972 | ||
3824 | kvm_clear_exception_queue(vcpu); | 3973 | kvm_clear_exception_queue(vcpu); |
3825 | vcpu->arch.mmio_fault_cr2 = cr2; | 3974 | vcpu->arch.mmio_fault_cr2 = cr2; |
@@ -3831,8 +3980,6 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
3831 | */ | 3980 | */ |
3832 | cache_all_regs(vcpu); | 3981 | cache_all_regs(vcpu); |
3833 | 3982 | ||
3834 | vcpu->mmio_is_write = 0; | ||
3835 | |||
3836 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { | 3983 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { |
3837 | int cs_db, cs_l; | 3984 | int cs_db, cs_l; |
3838 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | 3985 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
@@ -3846,13 +3993,16 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
3846 | ? X86EMUL_MODE_VM86 : cs_l | 3993 | ? X86EMUL_MODE_VM86 : cs_l |
3847 | ? X86EMUL_MODE_PROT64 : cs_db | 3994 | ? X86EMUL_MODE_PROT64 : cs_db |
3848 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; | 3995 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; |
3996 | memset(c, 0, sizeof(struct decode_cache)); | ||
3997 | memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); | ||
3998 | vcpu->arch.emulate_ctxt.interruptibility = 0; | ||
3999 | vcpu->arch.emulate_ctxt.exception = -1; | ||
3849 | 4000 | ||
3850 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); | 4001 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); |
3851 | trace_kvm_emulate_insn_start(vcpu); | 4002 | trace_kvm_emulate_insn_start(vcpu); |
3852 | 4003 | ||
3853 | /* Only allow emulation of specific instructions on #UD | 4004 | /* Only allow emulation of specific instructions on #UD |
3854 | * (namely VMMCALL, sysenter, sysexit, syscall)*/ | 4005 | * (namely VMMCALL, sysenter, sysexit, syscall)*/ |
3855 | c = &vcpu->arch.emulate_ctxt.decode; | ||
3856 | if (emulation_type & EMULTYPE_TRAP_UD) { | 4006 | if (emulation_type & EMULTYPE_TRAP_UD) { |
3857 | if (!c->twobyte) | 4007 | if (!c->twobyte) |
3858 | return EMULATE_FAIL; | 4008 | return EMULATE_FAIL; |
@@ -3880,11 +4030,11 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
3880 | 4030 | ||
3881 | ++vcpu->stat.insn_emulation; | 4031 | ++vcpu->stat.insn_emulation; |
3882 | if (r) { | 4032 | if (r) { |
3883 | ++vcpu->stat.insn_emulation_fail; | 4033 | if (reexecute_instruction(vcpu, cr2)) |
3884 | trace_kvm_emulate_insn_failed(vcpu); | ||
3885 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) | ||
3886 | return EMULATE_DONE; | 4034 | return EMULATE_DONE; |
3887 | return EMULATE_FAIL; | 4035 | if (emulation_type & EMULTYPE_SKIP) |
4036 | return EMULATE_FAIL; | ||
4037 | return handle_emulation_failure(vcpu); | ||
3888 | } | 4038 | } |
3889 | } | 4039 | } |
3890 | 4040 | ||
@@ -3893,48 +4043,42 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
3893 | return EMULATE_DONE; | 4043 | return EMULATE_DONE; |
3894 | } | 4044 | } |
3895 | 4045 | ||
4046 | /* this is needed for vmware backdor interface to work since it | ||
4047 | changes registers values during IO operation */ | ||
4048 | memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); | ||
4049 | |||
3896 | restart: | 4050 | restart: |
3897 | r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); | 4051 | r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); |
3898 | shadow_mask = vcpu->arch.emulate_ctxt.interruptibility; | ||
3899 | 4052 | ||
3900 | if (r == 0) | 4053 | if (r) { /* emulation failed */ |
3901 | kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask); | 4054 | if (reexecute_instruction(vcpu, cr2)) |
4055 | return EMULATE_DONE; | ||
3902 | 4056 | ||
3903 | if (vcpu->arch.pio.count) { | 4057 | return handle_emulation_failure(vcpu); |
3904 | if (!vcpu->arch.pio.in) | ||
3905 | vcpu->arch.pio.count = 0; | ||
3906 | return EMULATE_DO_MMIO; | ||
3907 | } | 4058 | } |
3908 | 4059 | ||
3909 | if (r || vcpu->mmio_is_write) { | 4060 | toggle_interruptibility(vcpu, vcpu->arch.emulate_ctxt.interruptibility); |
3910 | run->exit_reason = KVM_EXIT_MMIO; | 4061 | kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); |
3911 | run->mmio.phys_addr = vcpu->mmio_phys_addr; | 4062 | memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); |
3912 | memcpy(run->mmio.data, vcpu->mmio_data, 8); | 4063 | kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); |
3913 | run->mmio.len = vcpu->mmio_size; | 4064 | |
3914 | run->mmio.is_write = vcpu->mmio_is_write; | 4065 | if (vcpu->arch.emulate_ctxt.exception >= 0) { |
4066 | inject_emulated_exception(vcpu); | ||
4067 | return EMULATE_DONE; | ||
3915 | } | 4068 | } |
3916 | 4069 | ||
3917 | if (r) { | 4070 | if (vcpu->arch.pio.count) { |
3918 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) | 4071 | if (!vcpu->arch.pio.in) |
3919 | goto done; | 4072 | vcpu->arch.pio.count = 0; |
3920 | if (!vcpu->mmio_needed) { | ||
3921 | ++vcpu->stat.insn_emulation_fail; | ||
3922 | trace_kvm_emulate_insn_failed(vcpu); | ||
3923 | kvm_report_emulation_failure(vcpu, "mmio"); | ||
3924 | return EMULATE_FAIL; | ||
3925 | } | ||
3926 | return EMULATE_DO_MMIO; | 4073 | return EMULATE_DO_MMIO; |
3927 | } | 4074 | } |
3928 | 4075 | ||
3929 | if (vcpu->mmio_is_write) { | 4076 | if (vcpu->mmio_needed) { |
3930 | vcpu->mmio_needed = 0; | 4077 | if (vcpu->mmio_is_write) |
4078 | vcpu->mmio_needed = 0; | ||
3931 | return EMULATE_DO_MMIO; | 4079 | return EMULATE_DO_MMIO; |
3932 | } | 4080 | } |
3933 | 4081 | ||
3934 | done: | ||
3935 | if (vcpu->arch.exception.pending) | ||
3936 | vcpu->arch.emulate_ctxt.restart = false; | ||
3937 | |||
3938 | if (vcpu->arch.emulate_ctxt.restart) | 4082 | if (vcpu->arch.emulate_ctxt.restart) |
3939 | goto restart; | 4083 | goto restart; |
3940 | 4084 | ||
@@ -4108,6 +4252,9 @@ int kvm_arch_init(void *opaque) | |||
4108 | 4252 | ||
4109 | perf_register_guest_info_callbacks(&kvm_guest_cbs); | 4253 | perf_register_guest_info_callbacks(&kvm_guest_cbs); |
4110 | 4254 | ||
4255 | if (cpu_has_xsave) | ||
4256 | host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
4257 | |||
4111 | return 0; | 4258 | return 0; |
4112 | 4259 | ||
4113 | out: | 4260 | out: |
@@ -4270,7 +4417,7 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) | |||
4270 | 4417 | ||
4271 | kvm_x86_ops->patch_hypercall(vcpu, instruction); | 4418 | kvm_x86_ops->patch_hypercall(vcpu, instruction); |
4272 | 4419 | ||
4273 | return emulator_write_emulated(rip, instruction, 3, vcpu); | 4420 | return emulator_write_emulated(rip, instruction, 3, NULL, vcpu); |
4274 | } | 4421 | } |
4275 | 4422 | ||
4276 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) | 4423 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) |
@@ -4506,59 +4653,78 @@ static void inject_pending_event(struct kvm_vcpu *vcpu) | |||
4506 | } | 4653 | } |
4507 | } | 4654 | } |
4508 | 4655 | ||
4656 | static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu) | ||
4657 | { | ||
4658 | if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) && | ||
4659 | !vcpu->guest_xcr0_loaded) { | ||
4660 | /* kvm_set_xcr() also depends on this */ | ||
4661 | xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0); | ||
4662 | vcpu->guest_xcr0_loaded = 1; | ||
4663 | } | ||
4664 | } | ||
4665 | |||
4666 | static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) | ||
4667 | { | ||
4668 | if (vcpu->guest_xcr0_loaded) { | ||
4669 | if (vcpu->arch.xcr0 != host_xcr0) | ||
4670 | xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0); | ||
4671 | vcpu->guest_xcr0_loaded = 0; | ||
4672 | } | ||
4673 | } | ||
4674 | |||
4509 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | 4675 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) |
4510 | { | 4676 | { |
4511 | int r; | 4677 | int r; |
4512 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && | 4678 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && |
4513 | vcpu->run->request_interrupt_window; | 4679 | vcpu->run->request_interrupt_window; |
4514 | 4680 | ||
4515 | if (vcpu->requests) | ||
4516 | if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) | ||
4517 | kvm_mmu_unload(vcpu); | ||
4518 | |||
4519 | r = kvm_mmu_reload(vcpu); | ||
4520 | if (unlikely(r)) | ||
4521 | goto out; | ||
4522 | |||
4523 | if (vcpu->requests) { | 4681 | if (vcpu->requests) { |
4524 | if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) | 4682 | if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) |
4683 | kvm_mmu_unload(vcpu); | ||
4684 | if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) | ||
4525 | __kvm_migrate_timers(vcpu); | 4685 | __kvm_migrate_timers(vcpu); |
4526 | if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests)) | 4686 | if (kvm_check_request(KVM_REQ_KVMCLOCK_UPDATE, vcpu)) |
4527 | kvm_write_guest_time(vcpu); | 4687 | kvm_write_guest_time(vcpu); |
4528 | if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests)) | 4688 | if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu)) |
4529 | kvm_mmu_sync_roots(vcpu); | 4689 | kvm_mmu_sync_roots(vcpu); |
4530 | if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) | 4690 | if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) |
4531 | kvm_x86_ops->tlb_flush(vcpu); | 4691 | kvm_x86_ops->tlb_flush(vcpu); |
4532 | if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, | 4692 | if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) { |
4533 | &vcpu->requests)) { | ||
4534 | vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; | 4693 | vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; |
4535 | r = 0; | 4694 | r = 0; |
4536 | goto out; | 4695 | goto out; |
4537 | } | 4696 | } |
4538 | if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) { | 4697 | if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { |
4539 | vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; | 4698 | vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; |
4540 | r = 0; | 4699 | r = 0; |
4541 | goto out; | 4700 | goto out; |
4542 | } | 4701 | } |
4543 | if (test_and_clear_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests)) { | 4702 | if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) { |
4544 | vcpu->fpu_active = 0; | 4703 | vcpu->fpu_active = 0; |
4545 | kvm_x86_ops->fpu_deactivate(vcpu); | 4704 | kvm_x86_ops->fpu_deactivate(vcpu); |
4546 | } | 4705 | } |
4547 | } | 4706 | } |
4548 | 4707 | ||
4708 | r = kvm_mmu_reload(vcpu); | ||
4709 | if (unlikely(r)) | ||
4710 | goto out; | ||
4711 | |||
4549 | preempt_disable(); | 4712 | preempt_disable(); |
4550 | 4713 | ||
4551 | kvm_x86_ops->prepare_guest_switch(vcpu); | 4714 | kvm_x86_ops->prepare_guest_switch(vcpu); |
4552 | if (vcpu->fpu_active) | 4715 | if (vcpu->fpu_active) |
4553 | kvm_load_guest_fpu(vcpu); | 4716 | kvm_load_guest_fpu(vcpu); |
4717 | kvm_load_guest_xcr0(vcpu); | ||
4554 | 4718 | ||
4555 | local_irq_disable(); | 4719 | atomic_set(&vcpu->guest_mode, 1); |
4720 | smp_wmb(); | ||
4556 | 4721 | ||
4557 | clear_bit(KVM_REQ_KICK, &vcpu->requests); | 4722 | local_irq_disable(); |
4558 | smp_mb__after_clear_bit(); | ||
4559 | 4723 | ||
4560 | if (vcpu->requests || need_resched() || signal_pending(current)) { | 4724 | if (!atomic_read(&vcpu->guest_mode) || vcpu->requests |
4561 | set_bit(KVM_REQ_KICK, &vcpu->requests); | 4725 | || need_resched() || signal_pending(current)) { |
4726 | atomic_set(&vcpu->guest_mode, 0); | ||
4727 | smp_wmb(); | ||
4562 | local_irq_enable(); | 4728 | local_irq_enable(); |
4563 | preempt_enable(); | 4729 | preempt_enable(); |
4564 | r = 1; | 4730 | r = 1; |
@@ -4603,7 +4769,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
4603 | if (hw_breakpoint_active()) | 4769 | if (hw_breakpoint_active()) |
4604 | hw_breakpoint_restore(); | 4770 | hw_breakpoint_restore(); |
4605 | 4771 | ||
4606 | set_bit(KVM_REQ_KICK, &vcpu->requests); | 4772 | atomic_set(&vcpu->guest_mode, 0); |
4773 | smp_wmb(); | ||
4607 | local_irq_enable(); | 4774 | local_irq_enable(); |
4608 | 4775 | ||
4609 | ++vcpu->stat.exits; | 4776 | ++vcpu->stat.exits; |
@@ -4665,7 +4832,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
4665 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); | 4832 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
4666 | kvm_vcpu_block(vcpu); | 4833 | kvm_vcpu_block(vcpu); |
4667 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); | 4834 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); |
4668 | if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) | 4835 | if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) |
4669 | { | 4836 | { |
4670 | switch(vcpu->arch.mp_state) { | 4837 | switch(vcpu->arch.mp_state) { |
4671 | case KVM_MP_STATE_HALTED: | 4838 | case KVM_MP_STATE_HALTED: |
@@ -4717,8 +4884,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
4717 | int r; | 4884 | int r; |
4718 | sigset_t sigsaved; | 4885 | sigset_t sigsaved; |
4719 | 4886 | ||
4720 | vcpu_load(vcpu); | ||
4721 | |||
4722 | if (vcpu->sigset_active) | 4887 | if (vcpu->sigset_active) |
4723 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); | 4888 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); |
4724 | 4889 | ||
@@ -4743,7 +4908,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
4743 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | 4908 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); |
4744 | r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE); | 4909 | r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE); |
4745 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | 4910 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
4746 | if (r == EMULATE_DO_MMIO) { | 4911 | if (r != EMULATE_DONE) { |
4747 | r = 0; | 4912 | r = 0; |
4748 | goto out; | 4913 | goto out; |
4749 | } | 4914 | } |
@@ -4759,14 +4924,11 @@ out: | |||
4759 | if (vcpu->sigset_active) | 4924 | if (vcpu->sigset_active) |
4760 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 4925 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
4761 | 4926 | ||
4762 | vcpu_put(vcpu); | ||
4763 | return r; | 4927 | return r; |
4764 | } | 4928 | } |
4765 | 4929 | ||
4766 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | 4930 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) |
4767 | { | 4931 | { |
4768 | vcpu_load(vcpu); | ||
4769 | |||
4770 | regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); | 4932 | regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); |
4771 | regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX); | 4933 | regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX); |
4772 | regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX); | 4934 | regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX); |
@@ -4789,15 +4951,11 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
4789 | regs->rip = kvm_rip_read(vcpu); | 4951 | regs->rip = kvm_rip_read(vcpu); |
4790 | regs->rflags = kvm_get_rflags(vcpu); | 4952 | regs->rflags = kvm_get_rflags(vcpu); |
4791 | 4953 | ||
4792 | vcpu_put(vcpu); | ||
4793 | |||
4794 | return 0; | 4954 | return 0; |
4795 | } | 4955 | } |
4796 | 4956 | ||
4797 | int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | 4957 | int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) |
4798 | { | 4958 | { |
4799 | vcpu_load(vcpu); | ||
4800 | |||
4801 | kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax); | 4959 | kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax); |
4802 | kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx); | 4960 | kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx); |
4803 | kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); | 4961 | kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); |
@@ -4822,8 +4980,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
4822 | 4980 | ||
4823 | vcpu->arch.exception.pending = false; | 4981 | vcpu->arch.exception.pending = false; |
4824 | 4982 | ||
4825 | vcpu_put(vcpu); | ||
4826 | |||
4827 | return 0; | 4983 | return 0; |
4828 | } | 4984 | } |
4829 | 4985 | ||
@@ -4842,8 +4998,6 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
4842 | { | 4998 | { |
4843 | struct desc_ptr dt; | 4999 | struct desc_ptr dt; |
4844 | 5000 | ||
4845 | vcpu_load(vcpu); | ||
4846 | |||
4847 | kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); | 5001 | kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); |
4848 | kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); | 5002 | kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); |
4849 | kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES); | 5003 | kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES); |
@@ -4875,32 +5029,27 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
4875 | set_bit(vcpu->arch.interrupt.nr, | 5029 | set_bit(vcpu->arch.interrupt.nr, |
4876 | (unsigned long *)sregs->interrupt_bitmap); | 5030 | (unsigned long *)sregs->interrupt_bitmap); |
4877 | 5031 | ||
4878 | vcpu_put(vcpu); | ||
4879 | |||
4880 | return 0; | 5032 | return 0; |
4881 | } | 5033 | } |
4882 | 5034 | ||
4883 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, | 5035 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, |
4884 | struct kvm_mp_state *mp_state) | 5036 | struct kvm_mp_state *mp_state) |
4885 | { | 5037 | { |
4886 | vcpu_load(vcpu); | ||
4887 | mp_state->mp_state = vcpu->arch.mp_state; | 5038 | mp_state->mp_state = vcpu->arch.mp_state; |
4888 | vcpu_put(vcpu); | ||
4889 | return 0; | 5039 | return 0; |
4890 | } | 5040 | } |
4891 | 5041 | ||
4892 | int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | 5042 | int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, |
4893 | struct kvm_mp_state *mp_state) | 5043 | struct kvm_mp_state *mp_state) |
4894 | { | 5044 | { |
4895 | vcpu_load(vcpu); | ||
4896 | vcpu->arch.mp_state = mp_state->mp_state; | 5045 | vcpu->arch.mp_state = mp_state->mp_state; |
4897 | vcpu_put(vcpu); | ||
4898 | return 0; | 5046 | return 0; |
4899 | } | 5047 | } |
4900 | 5048 | ||
4901 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, | 5049 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, |
4902 | bool has_error_code, u32 error_code) | 5050 | bool has_error_code, u32 error_code) |
4903 | { | 5051 | { |
5052 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; | ||
4904 | int cs_db, cs_l, ret; | 5053 | int cs_db, cs_l, ret; |
4905 | cache_all_regs(vcpu); | 5054 | cache_all_regs(vcpu); |
4906 | 5055 | ||
@@ -4915,6 +5064,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, | |||
4915 | ? X86EMUL_MODE_VM86 : cs_l | 5064 | ? X86EMUL_MODE_VM86 : cs_l |
4916 | ? X86EMUL_MODE_PROT64 : cs_db | 5065 | ? X86EMUL_MODE_PROT64 : cs_db |
4917 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; | 5066 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; |
5067 | memset(c, 0, sizeof(struct decode_cache)); | ||
5068 | memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); | ||
4918 | 5069 | ||
4919 | ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, &emulate_ops, | 5070 | ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, &emulate_ops, |
4920 | tss_selector, reason, has_error_code, | 5071 | tss_selector, reason, has_error_code, |
@@ -4923,6 +5074,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, | |||
4923 | if (ret) | 5074 | if (ret) |
4924 | return EMULATE_FAIL; | 5075 | return EMULATE_FAIL; |
4925 | 5076 | ||
5077 | memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); | ||
5078 | kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); | ||
4926 | kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); | 5079 | kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); |
4927 | return EMULATE_DONE; | 5080 | return EMULATE_DONE; |
4928 | } | 5081 | } |
@@ -4935,8 +5088,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
4935 | int pending_vec, max_bits; | 5088 | int pending_vec, max_bits; |
4936 | struct desc_ptr dt; | 5089 | struct desc_ptr dt; |
4937 | 5090 | ||
4938 | vcpu_load(vcpu); | ||
4939 | |||
4940 | dt.size = sregs->idt.limit; | 5091 | dt.size = sregs->idt.limit; |
4941 | dt.address = sregs->idt.base; | 5092 | dt.address = sregs->idt.base; |
4942 | kvm_x86_ops->set_idt(vcpu, &dt); | 5093 | kvm_x86_ops->set_idt(vcpu, &dt); |
@@ -4996,8 +5147,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
4996 | !is_protmode(vcpu)) | 5147 | !is_protmode(vcpu)) |
4997 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 5148 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
4998 | 5149 | ||
4999 | vcpu_put(vcpu); | ||
5000 | |||
5001 | return 0; | 5150 | return 0; |
5002 | } | 5151 | } |
5003 | 5152 | ||
@@ -5007,12 +5156,10 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | |||
5007 | unsigned long rflags; | 5156 | unsigned long rflags; |
5008 | int i, r; | 5157 | int i, r; |
5009 | 5158 | ||
5010 | vcpu_load(vcpu); | ||
5011 | |||
5012 | if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) { | 5159 | if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) { |
5013 | r = -EBUSY; | 5160 | r = -EBUSY; |
5014 | if (vcpu->arch.exception.pending) | 5161 | if (vcpu->arch.exception.pending) |
5015 | goto unlock_out; | 5162 | goto out; |
5016 | if (dbg->control & KVM_GUESTDBG_INJECT_DB) | 5163 | if (dbg->control & KVM_GUESTDBG_INJECT_DB) |
5017 | kvm_queue_exception(vcpu, DB_VECTOR); | 5164 | kvm_queue_exception(vcpu, DB_VECTOR); |
5018 | else | 5165 | else |
@@ -5054,34 +5201,12 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | |||
5054 | 5201 | ||
5055 | r = 0; | 5202 | r = 0; |
5056 | 5203 | ||
5057 | unlock_out: | 5204 | out: |
5058 | vcpu_put(vcpu); | ||
5059 | 5205 | ||
5060 | return r; | 5206 | return r; |
5061 | } | 5207 | } |
5062 | 5208 | ||
5063 | /* | 5209 | /* |
5064 | * fxsave fpu state. Taken from x86_64/processor.h. To be killed when | ||
5065 | * we have asm/x86/processor.h | ||
5066 | */ | ||
5067 | struct fxsave { | ||
5068 | u16 cwd; | ||
5069 | u16 swd; | ||
5070 | u16 twd; | ||
5071 | u16 fop; | ||
5072 | u64 rip; | ||
5073 | u64 rdp; | ||
5074 | u32 mxcsr; | ||
5075 | u32 mxcsr_mask; | ||
5076 | u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ | ||
5077 | #ifdef CONFIG_X86_64 | ||
5078 | u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */ | ||
5079 | #else | ||
5080 | u32 xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ | ||
5081 | #endif | ||
5082 | }; | ||
5083 | |||
5084 | /* | ||
5085 | * Translate a guest virtual address to a guest physical address. | 5210 | * Translate a guest virtual address to a guest physical address. |
5086 | */ | 5211 | */ |
5087 | int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | 5212 | int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, |
@@ -5091,7 +5216,6 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | |||
5091 | gpa_t gpa; | 5216 | gpa_t gpa; |
5092 | int idx; | 5217 | int idx; |
5093 | 5218 | ||
5094 | vcpu_load(vcpu); | ||
5095 | idx = srcu_read_lock(&vcpu->kvm->srcu); | 5219 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
5096 | gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL); | 5220 | gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL); |
5097 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | 5221 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
@@ -5099,16 +5223,14 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | |||
5099 | tr->valid = gpa != UNMAPPED_GVA; | 5223 | tr->valid = gpa != UNMAPPED_GVA; |
5100 | tr->writeable = 1; | 5224 | tr->writeable = 1; |
5101 | tr->usermode = 0; | 5225 | tr->usermode = 0; |
5102 | vcpu_put(vcpu); | ||
5103 | 5226 | ||
5104 | return 0; | 5227 | return 0; |
5105 | } | 5228 | } |
5106 | 5229 | ||
5107 | int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | 5230 | int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) |
5108 | { | 5231 | { |
5109 | struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image; | 5232 | struct i387_fxsave_struct *fxsave = |
5110 | 5233 | &vcpu->arch.guest_fpu.state->fxsave; | |
5111 | vcpu_load(vcpu); | ||
5112 | 5234 | ||
5113 | memcpy(fpu->fpr, fxsave->st_space, 128); | 5235 | memcpy(fpu->fpr, fxsave->st_space, 128); |
5114 | fpu->fcw = fxsave->cwd; | 5236 | fpu->fcw = fxsave->cwd; |
@@ -5119,16 +5241,13 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | |||
5119 | fpu->last_dp = fxsave->rdp; | 5241 | fpu->last_dp = fxsave->rdp; |
5120 | memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space); | 5242 | memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space); |
5121 | 5243 | ||
5122 | vcpu_put(vcpu); | ||
5123 | |||
5124 | return 0; | 5244 | return 0; |
5125 | } | 5245 | } |
5126 | 5246 | ||
5127 | int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | 5247 | int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) |
5128 | { | 5248 | { |
5129 | struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image; | 5249 | struct i387_fxsave_struct *fxsave = |
5130 | 5250 | &vcpu->arch.guest_fpu.state->fxsave; | |
5131 | vcpu_load(vcpu); | ||
5132 | 5251 | ||
5133 | memcpy(fxsave->st_space, fpu->fpr, 128); | 5252 | memcpy(fxsave->st_space, fpu->fpr, 128); |
5134 | fxsave->cwd = fpu->fcw; | 5253 | fxsave->cwd = fpu->fcw; |
@@ -5139,61 +5258,63 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | |||
5139 | fxsave->rdp = fpu->last_dp; | 5258 | fxsave->rdp = fpu->last_dp; |
5140 | memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space); | 5259 | memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space); |
5141 | 5260 | ||
5142 | vcpu_put(vcpu); | ||
5143 | |||
5144 | return 0; | 5261 | return 0; |
5145 | } | 5262 | } |
5146 | 5263 | ||
5147 | void fx_init(struct kvm_vcpu *vcpu) | 5264 | int fx_init(struct kvm_vcpu *vcpu) |
5148 | { | 5265 | { |
5149 | unsigned after_mxcsr_mask; | 5266 | int err; |
5267 | |||
5268 | err = fpu_alloc(&vcpu->arch.guest_fpu); | ||
5269 | if (err) | ||
5270 | return err; | ||
5271 | |||
5272 | fpu_finit(&vcpu->arch.guest_fpu); | ||
5150 | 5273 | ||
5151 | /* | 5274 | /* |
5152 | * Touch the fpu the first time in non atomic context as if | 5275 | * Ensure guest xcr0 is valid for loading |
5153 | * this is the first fpu instruction the exception handler | ||
5154 | * will fire before the instruction returns and it'll have to | ||
5155 | * allocate ram with GFP_KERNEL. | ||
5156 | */ | 5276 | */ |
5157 | if (!used_math()) | 5277 | vcpu->arch.xcr0 = XSTATE_FP; |
5158 | kvm_fx_save(&vcpu->arch.host_fx_image); | ||
5159 | |||
5160 | /* Initialize guest FPU by resetting ours and saving into guest's */ | ||
5161 | preempt_disable(); | ||
5162 | kvm_fx_save(&vcpu->arch.host_fx_image); | ||
5163 | kvm_fx_finit(); | ||
5164 | kvm_fx_save(&vcpu->arch.guest_fx_image); | ||
5165 | kvm_fx_restore(&vcpu->arch.host_fx_image); | ||
5166 | preempt_enable(); | ||
5167 | 5278 | ||
5168 | vcpu->arch.cr0 |= X86_CR0_ET; | 5279 | vcpu->arch.cr0 |= X86_CR0_ET; |
5169 | after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space); | 5280 | |
5170 | vcpu->arch.guest_fx_image.mxcsr = 0x1f80; | 5281 | return 0; |
5171 | memset((void *)&vcpu->arch.guest_fx_image + after_mxcsr_mask, | ||
5172 | 0, sizeof(struct i387_fxsave_struct) - after_mxcsr_mask); | ||
5173 | } | 5282 | } |
5174 | EXPORT_SYMBOL_GPL(fx_init); | 5283 | EXPORT_SYMBOL_GPL(fx_init); |
5175 | 5284 | ||
5285 | static void fx_free(struct kvm_vcpu *vcpu) | ||
5286 | { | ||
5287 | fpu_free(&vcpu->arch.guest_fpu); | ||
5288 | } | ||
5289 | |||
5176 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) | 5290 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) |
5177 | { | 5291 | { |
5178 | if (vcpu->guest_fpu_loaded) | 5292 | if (vcpu->guest_fpu_loaded) |
5179 | return; | 5293 | return; |
5180 | 5294 | ||
5295 | /* | ||
5296 | * Restore all possible states in the guest, | ||
5297 | * and assume host would use all available bits. | ||
5298 | * Guest xcr0 would be loaded later. | ||
5299 | */ | ||
5300 | kvm_put_guest_xcr0(vcpu); | ||
5181 | vcpu->guest_fpu_loaded = 1; | 5301 | vcpu->guest_fpu_loaded = 1; |
5182 | kvm_fx_save(&vcpu->arch.host_fx_image); | 5302 | unlazy_fpu(current); |
5183 | kvm_fx_restore(&vcpu->arch.guest_fx_image); | 5303 | fpu_restore_checking(&vcpu->arch.guest_fpu); |
5184 | trace_kvm_fpu(1); | 5304 | trace_kvm_fpu(1); |
5185 | } | 5305 | } |
5186 | 5306 | ||
5187 | void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) | 5307 | void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) |
5188 | { | 5308 | { |
5309 | kvm_put_guest_xcr0(vcpu); | ||
5310 | |||
5189 | if (!vcpu->guest_fpu_loaded) | 5311 | if (!vcpu->guest_fpu_loaded) |
5190 | return; | 5312 | return; |
5191 | 5313 | ||
5192 | vcpu->guest_fpu_loaded = 0; | 5314 | vcpu->guest_fpu_loaded = 0; |
5193 | kvm_fx_save(&vcpu->arch.guest_fx_image); | 5315 | fpu_save_init(&vcpu->arch.guest_fpu); |
5194 | kvm_fx_restore(&vcpu->arch.host_fx_image); | ||
5195 | ++vcpu->stat.fpu_reload; | 5316 | ++vcpu->stat.fpu_reload; |
5196 | set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests); | 5317 | kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); |
5197 | trace_kvm_fpu(0); | 5318 | trace_kvm_fpu(0); |
5198 | } | 5319 | } |
5199 | 5320 | ||
@@ -5204,6 +5325,8 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) | |||
5204 | vcpu->arch.time_page = NULL; | 5325 | vcpu->arch.time_page = NULL; |
5205 | } | 5326 | } |
5206 | 5327 | ||
5328 | free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); | ||
5329 | fx_free(vcpu); | ||
5207 | kvm_x86_ops->vcpu_free(vcpu); | 5330 | kvm_x86_ops->vcpu_free(vcpu); |
5208 | } | 5331 | } |
5209 | 5332 | ||
@@ -5217,9 +5340,6 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
5217 | { | 5340 | { |
5218 | int r; | 5341 | int r; |
5219 | 5342 | ||
5220 | /* We do fxsave: this must be aligned. */ | ||
5221 | BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF); | ||
5222 | |||
5223 | vcpu->arch.mtrr_state.have_fixed = 1; | 5343 | vcpu->arch.mtrr_state.have_fixed = 1; |
5224 | vcpu_load(vcpu); | 5344 | vcpu_load(vcpu); |
5225 | r = kvm_arch_vcpu_reset(vcpu); | 5345 | r = kvm_arch_vcpu_reset(vcpu); |
@@ -5241,6 +5361,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) | |||
5241 | kvm_mmu_unload(vcpu); | 5361 | kvm_mmu_unload(vcpu); |
5242 | vcpu_put(vcpu); | 5362 | vcpu_put(vcpu); |
5243 | 5363 | ||
5364 | fx_free(vcpu); | ||
5244 | kvm_x86_ops->vcpu_free(vcpu); | 5365 | kvm_x86_ops->vcpu_free(vcpu); |
5245 | } | 5366 | } |
5246 | 5367 | ||
@@ -5334,7 +5455,12 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
5334 | } | 5455 | } |
5335 | vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; | 5456 | vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; |
5336 | 5457 | ||
5458 | if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) | ||
5459 | goto fail_free_mce_banks; | ||
5460 | |||
5337 | return 0; | 5461 | return 0; |
5462 | fail_free_mce_banks: | ||
5463 | kfree(vcpu->arch.mce_banks); | ||
5338 | fail_free_lapic: | 5464 | fail_free_lapic: |
5339 | kvm_free_lapic(vcpu); | 5465 | kvm_free_lapic(vcpu); |
5340 | fail_mmu_destroy: | 5466 | fail_mmu_destroy: |
@@ -5364,12 +5490,6 @@ struct kvm *kvm_arch_create_vm(void) | |||
5364 | if (!kvm) | 5490 | if (!kvm) |
5365 | return ERR_PTR(-ENOMEM); | 5491 | return ERR_PTR(-ENOMEM); |
5366 | 5492 | ||
5367 | kvm->arch.aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); | ||
5368 | if (!kvm->arch.aliases) { | ||
5369 | kfree(kvm); | ||
5370 | return ERR_PTR(-ENOMEM); | ||
5371 | } | ||
5372 | |||
5373 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); | 5493 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); |
5374 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); | 5494 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); |
5375 | 5495 | ||
@@ -5412,12 +5532,12 @@ static void kvm_free_vcpus(struct kvm *kvm) | |||
5412 | void kvm_arch_sync_events(struct kvm *kvm) | 5532 | void kvm_arch_sync_events(struct kvm *kvm) |
5413 | { | 5533 | { |
5414 | kvm_free_all_assigned_devices(kvm); | 5534 | kvm_free_all_assigned_devices(kvm); |
5535 | kvm_free_pit(kvm); | ||
5415 | } | 5536 | } |
5416 | 5537 | ||
5417 | void kvm_arch_destroy_vm(struct kvm *kvm) | 5538 | void kvm_arch_destroy_vm(struct kvm *kvm) |
5418 | { | 5539 | { |
5419 | kvm_iommu_unmap_guest(kvm); | 5540 | kvm_iommu_unmap_guest(kvm); |
5420 | kvm_free_pit(kvm); | ||
5421 | kfree(kvm->arch.vpic); | 5541 | kfree(kvm->arch.vpic); |
5422 | kfree(kvm->arch.vioapic); | 5542 | kfree(kvm->arch.vioapic); |
5423 | kvm_free_vcpus(kvm); | 5543 | kvm_free_vcpus(kvm); |
@@ -5427,7 +5547,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
5427 | if (kvm->arch.ept_identity_pagetable) | 5547 | if (kvm->arch.ept_identity_pagetable) |
5428 | put_page(kvm->arch.ept_identity_pagetable); | 5548 | put_page(kvm->arch.ept_identity_pagetable); |
5429 | cleanup_srcu_struct(&kvm->srcu); | 5549 | cleanup_srcu_struct(&kvm->srcu); |
5430 | kfree(kvm->arch.aliases); | ||
5431 | kfree(kvm); | 5550 | kfree(kvm); |
5432 | } | 5551 | } |
5433 | 5552 | ||
@@ -5438,6 +5557,11 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
5438 | int user_alloc) | 5557 | int user_alloc) |
5439 | { | 5558 | { |
5440 | int npages = memslot->npages; | 5559 | int npages = memslot->npages; |
5560 | int map_flags = MAP_PRIVATE | MAP_ANONYMOUS; | ||
5561 | |||
5562 | /* Prevent internal slot pages from being moved by fork()/COW. */ | ||
5563 | if (memslot->id >= KVM_MEMORY_SLOTS) | ||
5564 | map_flags = MAP_SHARED | MAP_ANONYMOUS; | ||
5441 | 5565 | ||
5442 | /*To keep backward compatibility with older userspace, | 5566 | /*To keep backward compatibility with older userspace, |
5443 | *x86 needs to hanlde !user_alloc case. | 5567 | *x86 needs to hanlde !user_alloc case. |
@@ -5450,7 +5574,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
5450 | userspace_addr = do_mmap(NULL, 0, | 5574 | userspace_addr = do_mmap(NULL, 0, |
5451 | npages * PAGE_SIZE, | 5575 | npages * PAGE_SIZE, |
5452 | PROT_READ | PROT_WRITE, | 5576 | PROT_READ | PROT_WRITE, |
5453 | MAP_PRIVATE | MAP_ANONYMOUS, | 5577 | map_flags, |
5454 | 0); | 5578 | 0); |
5455 | up_write(¤t->mm->mmap_sem); | 5579 | up_write(¤t->mm->mmap_sem); |
5456 | 5580 | ||
@@ -5523,7 +5647,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) | |||
5523 | 5647 | ||
5524 | me = get_cpu(); | 5648 | me = get_cpu(); |
5525 | if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) | 5649 | if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) |
5526 | if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests)) | 5650 | if (atomic_xchg(&vcpu->guest_mode, 0)) |
5527 | smp_send_reschedule(cpu); | 5651 | smp_send_reschedule(cpu); |
5528 | put_cpu(); | 5652 | put_cpu(); |
5529 | } | 5653 | } |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index f4b54458285b..b7a404722d2b 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -65,13 +65,6 @@ static inline int is_paging(struct kvm_vcpu *vcpu) | |||
65 | return kvm_read_cr0_bits(vcpu, X86_CR0_PG); | 65 | return kvm_read_cr0_bits(vcpu, X86_CR0_PG); |
66 | } | 66 | } |
67 | 67 | ||
68 | static inline struct kvm_mem_aliases *kvm_aliases(struct kvm *kvm) | ||
69 | { | ||
70 | return rcu_dereference_check(kvm->arch.aliases, | ||
71 | srcu_read_lock_held(&kvm->srcu) | ||
72 | || lockdep_is_held(&kvm->slots_lock)); | ||
73 | } | ||
74 | |||
75 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); | 68 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); |
76 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); | 69 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); |
77 | 70 | ||