aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/kernel/kvmclock.c2
-rw-r--r--arch/x86/kvm/Makefile3
-rw-r--r--arch/x86/kvm/i8254.c24
-rw-r--r--arch/x86/kvm/i8259.c9
-rw-r--r--arch/x86/kvm/irq.h2
-rw-r--r--arch/x86/kvm/lapic.c14
-rw-r--r--arch/x86/kvm/lapic.h1
-rw-r--r--arch/x86/kvm/mmu.c62
-rw-r--r--arch/x86/kvm/mmu.h3
-rw-r--r--arch/x86/kvm/paging_tmpl.h28
-rw-r--r--arch/x86/kvm/svm.c131
-rw-r--r--arch/x86/kvm/vmx.c230
-rw-r--r--arch/x86/kvm/vmx.h12
-rw-r--r--arch/x86/kvm/x86.c299
-rw-r--r--arch/x86/kvm/x86_emulate.c257
15 files changed, 691 insertions, 386 deletions
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 87edf1ceb1df..d02def06ca91 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -113,7 +113,7 @@ static void kvm_setup_secondary_clock(void)
113#endif 113#endif
114 114
115#ifdef CONFIG_SMP 115#ifdef CONFIG_SMP
116void __init kvm_smp_prepare_boot_cpu(void) 116static void __init kvm_smp_prepare_boot_cpu(void)
117{ 117{
118 WARN_ON(kvm_register_clock("primary cpu clock")); 118 WARN_ON(kvm_register_clock("primary cpu clock"));
119 native_smp_prepare_boot_cpu(); 119 native_smp_prepare_boot_cpu();
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index c97d35c218db..d0e940bb6f40 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -2,7 +2,8 @@
2# Makefile for Kernel-based Virtual Machine module 2# Makefile for Kernel-based Virtual Machine module
3# 3#
4 4
5common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o) 5common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
6 coalesced_mmio.o)
6ifeq ($(CONFIG_KVM_TRACE),y) 7ifeq ($(CONFIG_KVM_TRACE),y)
7common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o) 8common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
8endif 9endif
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 3829aa7b663f..c0f7872a9124 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -91,7 +91,7 @@ static void pit_set_gate(struct kvm *kvm, int channel, u32 val)
91 c->gate = val; 91 c->gate = val;
92} 92}
93 93
94int pit_get_gate(struct kvm *kvm, int channel) 94static int pit_get_gate(struct kvm *kvm, int channel)
95{ 95{
96 WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); 96 WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
97 97
@@ -193,19 +193,16 @@ static void pit_latch_status(struct kvm *kvm, int channel)
193 } 193 }
194} 194}
195 195
196int __pit_timer_fn(struct kvm_kpit_state *ps) 196static int __pit_timer_fn(struct kvm_kpit_state *ps)
197{ 197{
198 struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0]; 198 struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0];
199 struct kvm_kpit_timer *pt = &ps->pit_timer; 199 struct kvm_kpit_timer *pt = &ps->pit_timer;
200 200
201 atomic_inc(&pt->pending); 201 if (!atomic_inc_and_test(&pt->pending))
202 smp_mb__after_atomic_inc();
203 if (vcpu0) {
204 set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests); 202 set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests);
205 if (waitqueue_active(&vcpu0->wq)) { 203 if (vcpu0 && waitqueue_active(&vcpu0->wq)) {
206 vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE; 204 vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE;
207 wake_up_interruptible(&vcpu0->wq); 205 wake_up_interruptible(&vcpu0->wq);
208 }
209 } 206 }
210 207
211 pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period); 208 pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
@@ -308,6 +305,7 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
308 create_pit_timer(&ps->pit_timer, val, 0); 305 create_pit_timer(&ps->pit_timer, val, 0);
309 break; 306 break;
310 case 2: 307 case 2:
308 case 3:
311 create_pit_timer(&ps->pit_timer, val, 1); 309 create_pit_timer(&ps->pit_timer, val, 1);
312 break; 310 break;
313 default: 311 default:
@@ -459,7 +457,8 @@ static void pit_ioport_read(struct kvm_io_device *this,
459 mutex_unlock(&pit_state->lock); 457 mutex_unlock(&pit_state->lock);
460} 458}
461 459
462static int pit_in_range(struct kvm_io_device *this, gpa_t addr) 460static int pit_in_range(struct kvm_io_device *this, gpa_t addr,
461 int len, int is_write)
463{ 462{
464 return ((addr >= KVM_PIT_BASE_ADDRESS) && 463 return ((addr >= KVM_PIT_BASE_ADDRESS) &&
465 (addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH)); 464 (addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
@@ -500,7 +499,8 @@ static void speaker_ioport_read(struct kvm_io_device *this,
500 mutex_unlock(&pit_state->lock); 499 mutex_unlock(&pit_state->lock);
501} 500}
502 501
503static int speaker_in_range(struct kvm_io_device *this, gpa_t addr) 502static int speaker_in_range(struct kvm_io_device *this, gpa_t addr,
503 int len, int is_write)
504{ 504{
505 return (addr == KVM_SPEAKER_BASE_ADDRESS); 505 return (addr == KVM_SPEAKER_BASE_ADDRESS);
506} 506}
@@ -575,7 +575,7 @@ void kvm_free_pit(struct kvm *kvm)
575 } 575 }
576} 576}
577 577
578void __inject_pit_timer_intr(struct kvm *kvm) 578static void __inject_pit_timer_intr(struct kvm *kvm)
579{ 579{
580 mutex_lock(&kvm->lock); 580 mutex_lock(&kvm->lock);
581 kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 1); 581 kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 1);
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index ab29cf2def47..c31164e8aa46 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -130,8 +130,10 @@ void kvm_pic_set_irq(void *opaque, int irq, int level)
130{ 130{
131 struct kvm_pic *s = opaque; 131 struct kvm_pic *s = opaque;
132 132
133 pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); 133 if (irq >= 0 && irq < PIC_NUM_PINS) {
134 pic_update_irq(s); 134 pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
135 pic_update_irq(s);
136 }
135} 137}
136 138
137/* 139/*
@@ -346,7 +348,8 @@ static u32 elcr_ioport_read(void *opaque, u32 addr1)
346 return s->elcr; 348 return s->elcr;
347} 349}
348 350
349static int picdev_in_range(struct kvm_io_device *this, gpa_t addr) 351static int picdev_in_range(struct kvm_io_device *this, gpa_t addr,
352 int len, int is_write)
350{ 353{
351 switch (addr) { 354 switch (addr) {
352 case 0x20: 355 case 0x20:
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 2a15be2275c0..7ca47cbb48bb 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -30,6 +30,8 @@
30#include "ioapic.h" 30#include "ioapic.h"
31#include "lapic.h" 31#include "lapic.h"
32 32
33#define PIC_NUM_PINS 16
34
33struct kvm; 35struct kvm;
34struct kvm_vcpu; 36struct kvm_vcpu;
35 37
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ebc03f5ae162..73f43de69f67 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -356,8 +356,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
356 case APIC_DM_SMI: 356 case APIC_DM_SMI:
357 printk(KERN_DEBUG "Ignoring guest SMI\n"); 357 printk(KERN_DEBUG "Ignoring guest SMI\n");
358 break; 358 break;
359
359 case APIC_DM_NMI: 360 case APIC_DM_NMI:
360 printk(KERN_DEBUG "Ignoring guest NMI\n"); 361 kvm_inject_nmi(vcpu);
361 break; 362 break;
362 363
363 case APIC_DM_INIT: 364 case APIC_DM_INIT:
@@ -572,6 +573,8 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
572{ 573{
573 u32 val = 0; 574 u32 val = 0;
574 575
576 KVMTRACE_1D(APIC_ACCESS, apic->vcpu, (u32)offset, handler);
577
575 if (offset >= LAPIC_MMIO_LENGTH) 578 if (offset >= LAPIC_MMIO_LENGTH)
576 return 0; 579 return 0;
577 580
@@ -695,6 +698,8 @@ static void apic_mmio_write(struct kvm_io_device *this,
695 698
696 offset &= 0xff0; 699 offset &= 0xff0;
697 700
701 KVMTRACE_1D(APIC_ACCESS, apic->vcpu, (u32)offset, handler);
702
698 switch (offset) { 703 switch (offset) {
699 case APIC_ID: /* Local APIC ID */ 704 case APIC_ID: /* Local APIC ID */
700 apic_set_reg(apic, APIC_ID, val); 705 apic_set_reg(apic, APIC_ID, val);
@@ -780,7 +785,8 @@ static void apic_mmio_write(struct kvm_io_device *this,
780 785
781} 786}
782 787
783static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr) 788static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr,
789 int len, int size)
784{ 790{
785 struct kvm_lapic *apic = (struct kvm_lapic *)this->private; 791 struct kvm_lapic *apic = (struct kvm_lapic *)this->private;
786 int ret = 0; 792 int ret = 0;
@@ -939,8 +945,8 @@ static int __apic_timer_fn(struct kvm_lapic *apic)
939 int result = 0; 945 int result = 0;
940 wait_queue_head_t *q = &apic->vcpu->wq; 946 wait_queue_head_t *q = &apic->vcpu->wq;
941 947
942 atomic_inc(&apic->timer.pending); 948 if(!atomic_inc_and_test(&apic->timer.pending))
943 set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests); 949 set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests);
944 if (waitqueue_active(q)) { 950 if (waitqueue_active(q)) {
945 apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 951 apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
946 wake_up_interruptible(q); 952 wake_up_interruptible(q);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 676c396c9cee..81858881287e 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -31,6 +31,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu);
31u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); 31u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
32void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); 32void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
33void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); 33void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
34u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
34 35
35int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); 36int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
36int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); 37int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 7e7c3969f7a2..b0e4ddca6c18 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -66,7 +66,8 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) {}
66#endif 66#endif
67 67
68#if defined(MMU_DEBUG) || defined(AUDIT) 68#if defined(MMU_DEBUG) || defined(AUDIT)
69static int dbg = 1; 69static int dbg = 0;
70module_param(dbg, bool, 0644);
70#endif 71#endif
71 72
72#ifndef MMU_DEBUG 73#ifndef MMU_DEBUG
@@ -776,6 +777,15 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp,
776 BUG(); 777 BUG();
777} 778}
778 779
780static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu,
781 struct kvm_mmu_page *sp)
782{
783 int i;
784
785 for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
786 sp->spt[i] = shadow_trap_nonpresent_pte;
787}
788
779static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) 789static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
780{ 790{
781 unsigned index; 791 unsigned index;
@@ -841,7 +851,10 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
841 hlist_add_head(&sp->hash_link, bucket); 851 hlist_add_head(&sp->hash_link, bucket);
842 if (!metaphysical) 852 if (!metaphysical)
843 rmap_write_protect(vcpu->kvm, gfn); 853 rmap_write_protect(vcpu->kvm, gfn);
844 vcpu->arch.mmu.prefetch_page(vcpu, sp); 854 if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte)
855 vcpu->arch.mmu.prefetch_page(vcpu, sp);
856 else
857 nonpaging_prefetch_page(vcpu, sp);
845 return sp; 858 return sp;
846} 859}
847 860
@@ -917,14 +930,17 @@ static void kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
917 } 930 }
918 kvm_mmu_page_unlink_children(kvm, sp); 931 kvm_mmu_page_unlink_children(kvm, sp);
919 if (!sp->root_count) { 932 if (!sp->root_count) {
920 if (!sp->role.metaphysical) 933 if (!sp->role.metaphysical && !sp->role.invalid)
921 unaccount_shadowed(kvm, sp->gfn); 934 unaccount_shadowed(kvm, sp->gfn);
922 hlist_del(&sp->hash_link); 935 hlist_del(&sp->hash_link);
923 kvm_mmu_free_page(kvm, sp); 936 kvm_mmu_free_page(kvm, sp);
924 } else { 937 } else {
938 int invalid = sp->role.invalid;
925 list_move(&sp->link, &kvm->arch.active_mmu_pages); 939 list_move(&sp->link, &kvm->arch.active_mmu_pages);
926 sp->role.invalid = 1; 940 sp->role.invalid = 1;
927 kvm_reload_remote_mmus(kvm); 941 kvm_reload_remote_mmus(kvm);
942 if (!sp->role.metaphysical && !invalid)
943 unaccount_shadowed(kvm, sp->gfn);
928 } 944 }
929 kvm_mmu_reset_last_pte_updated(kvm); 945 kvm_mmu_reset_last_pte_updated(kvm);
930} 946}
@@ -1103,7 +1119,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1103 mark_page_dirty(vcpu->kvm, gfn); 1119 mark_page_dirty(vcpu->kvm, gfn);
1104 1120
1105 pgprintk("%s: setting spte %llx\n", __func__, spte); 1121 pgprintk("%s: setting spte %llx\n", __func__, spte);
1106 pgprintk("instantiating %s PTE (%s) at %d (%llx) addr %llx\n", 1122 pgprintk("instantiating %s PTE (%s) at %ld (%llx) addr %p\n",
1107 (spte&PT_PAGE_SIZE_MASK)? "2MB" : "4kB", 1123 (spte&PT_PAGE_SIZE_MASK)? "2MB" : "4kB",
1108 (spte&PT_WRITABLE_MASK)?"RW":"R", gfn, spte, shadow_pte); 1124 (spte&PT_WRITABLE_MASK)?"RW":"R", gfn, spte, shadow_pte);
1109 set_shadow_pte(shadow_pte, spte); 1125 set_shadow_pte(shadow_pte, spte);
@@ -1122,8 +1138,10 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1122 else 1138 else
1123 kvm_release_pfn_clean(pfn); 1139 kvm_release_pfn_clean(pfn);
1124 } 1140 }
1125 if (!ptwrite || !*ptwrite) 1141 if (speculative) {
1126 vcpu->arch.last_pte_updated = shadow_pte; 1142 vcpu->arch.last_pte_updated = shadow_pte;
1143 vcpu->arch.last_pte_gfn = gfn;
1144 }
1127} 1145}
1128 1146
1129static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) 1147static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
@@ -1171,9 +1189,10 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
1171 return -ENOMEM; 1189 return -ENOMEM;
1172 } 1190 }
1173 1191
1174 table[index] = __pa(new_table->spt) 1192 set_shadow_pte(&table[index],
1175 | PT_PRESENT_MASK | PT_WRITABLE_MASK 1193 __pa(new_table->spt)
1176 | shadow_user_mask | shadow_x_mask; 1194 | PT_PRESENT_MASK | PT_WRITABLE_MASK
1195 | shadow_user_mask | shadow_x_mask);
1177 } 1196 }
1178 table_addr = table[index] & PT64_BASE_ADDR_MASK; 1197 table_addr = table[index] & PT64_BASE_ADDR_MASK;
1179 } 1198 }
@@ -1211,15 +1230,6 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
1211} 1230}
1212 1231
1213 1232
1214static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu,
1215 struct kvm_mmu_page *sp)
1216{
1217 int i;
1218
1219 for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
1220 sp->spt[i] = shadow_trap_nonpresent_pte;
1221}
1222
1223static void mmu_free_roots(struct kvm_vcpu *vcpu) 1233static void mmu_free_roots(struct kvm_vcpu *vcpu)
1224{ 1234{
1225 int i; 1235 int i;
@@ -1671,6 +1681,18 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1671 vcpu->arch.update_pte.pfn = pfn; 1681 vcpu->arch.update_pte.pfn = pfn;
1672} 1682}
1673 1683
1684static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn)
1685{
1686 u64 *spte = vcpu->arch.last_pte_updated;
1687
1688 if (spte
1689 && vcpu->arch.last_pte_gfn == gfn
1690 && shadow_accessed_mask
1691 && !(*spte & shadow_accessed_mask)
1692 && is_shadow_present_pte(*spte))
1693 set_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte);
1694}
1695
1674void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, 1696void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1675 const u8 *new, int bytes) 1697 const u8 *new, int bytes)
1676{ 1698{
@@ -1694,6 +1716,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1694 pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); 1716 pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
1695 mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes); 1717 mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes);
1696 spin_lock(&vcpu->kvm->mmu_lock); 1718 spin_lock(&vcpu->kvm->mmu_lock);
1719 kvm_mmu_access_page(vcpu, gfn);
1697 kvm_mmu_free_some_pages(vcpu); 1720 kvm_mmu_free_some_pages(vcpu);
1698 ++vcpu->kvm->stat.mmu_pte_write; 1721 ++vcpu->kvm->stat.mmu_pte_write;
1699 kvm_mmu_audit(vcpu, "pre pte write"); 1722 kvm_mmu_audit(vcpu, "pre pte write");
@@ -1948,7 +1971,7 @@ void kvm_mmu_zap_all(struct kvm *kvm)
1948 kvm_flush_remote_tlbs(kvm); 1971 kvm_flush_remote_tlbs(kvm);
1949} 1972}
1950 1973
1951void kvm_mmu_remove_one_alloc_mmu_page(struct kvm *kvm) 1974static void kvm_mmu_remove_one_alloc_mmu_page(struct kvm *kvm)
1952{ 1975{
1953 struct kvm_mmu_page *page; 1976 struct kvm_mmu_page *page;
1954 1977
@@ -1968,6 +1991,8 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
1968 list_for_each_entry(kvm, &vm_list, vm_list) { 1991 list_for_each_entry(kvm, &vm_list, vm_list) {
1969 int npages; 1992 int npages;
1970 1993
1994 if (!down_read_trylock(&kvm->slots_lock))
1995 continue;
1971 spin_lock(&kvm->mmu_lock); 1996 spin_lock(&kvm->mmu_lock);
1972 npages = kvm->arch.n_alloc_mmu_pages - 1997 npages = kvm->arch.n_alloc_mmu_pages -
1973 kvm->arch.n_free_mmu_pages; 1998 kvm->arch.n_free_mmu_pages;
@@ -1980,6 +2005,7 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
1980 nr_to_scan--; 2005 nr_to_scan--;
1981 2006
1982 spin_unlock(&kvm->mmu_lock); 2007 spin_unlock(&kvm->mmu_lock);
2008 up_read(&kvm->slots_lock);
1983 } 2009 }
1984 if (kvm_freed) 2010 if (kvm_freed)
1985 list_move_tail(&kvm_freed->vm_list, &vm_list); 2011 list_move_tail(&kvm_freed->vm_list, &vm_list);
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 1730757bbc7a..258e5d56298e 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -15,7 +15,8 @@
15#define PT_USER_MASK (1ULL << 2) 15#define PT_USER_MASK (1ULL << 2)
16#define PT_PWT_MASK (1ULL << 3) 16#define PT_PWT_MASK (1ULL << 3)
17#define PT_PCD_MASK (1ULL << 4) 17#define PT_PCD_MASK (1ULL << 4)
18#define PT_ACCESSED_MASK (1ULL << 5) 18#define PT_ACCESSED_SHIFT 5
19#define PT_ACCESSED_MASK (1ULL << PT_ACCESSED_SHIFT)
19#define PT_DIRTY_MASK (1ULL << 6) 20#define PT_DIRTY_MASK (1ULL << 6)
20#define PT_PAGE_SIZE_MASK (1ULL << 7) 21#define PT_PAGE_SIZE_MASK (1ULL << 7)
21#define PT_PAT_MASK (1ULL << 7) 22#define PT_PAT_MASK (1ULL << 7)
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 934c7b619396..4d918220baeb 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -460,8 +460,9 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
460static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, 460static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
461 struct kvm_mmu_page *sp) 461 struct kvm_mmu_page *sp)
462{ 462{
463 int i, offset = 0, r = 0; 463 int i, j, offset, r;
464 pt_element_t pt; 464 pt_element_t pt[256 / sizeof(pt_element_t)];
465 gpa_t pte_gpa;
465 466
466 if (sp->role.metaphysical 467 if (sp->role.metaphysical
467 || (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) { 468 || (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) {
@@ -469,19 +470,20 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
469 return; 470 return;
470 } 471 }
471 472
472 if (PTTYPE == 32) 473 pte_gpa = gfn_to_gpa(sp->gfn);
474 if (PTTYPE == 32) {
473 offset = sp->role.quadrant << PT64_LEVEL_BITS; 475 offset = sp->role.quadrant << PT64_LEVEL_BITS;
476 pte_gpa += offset * sizeof(pt_element_t);
477 }
474 478
475 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { 479 for (i = 0; i < PT64_ENT_PER_PAGE; i += ARRAY_SIZE(pt)) {
476 gpa_t pte_gpa = gfn_to_gpa(sp->gfn); 480 r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa, pt, sizeof pt);
477 pte_gpa += (i+offset) * sizeof(pt_element_t); 481 pte_gpa += ARRAY_SIZE(pt) * sizeof(pt_element_t);
478 482 for (j = 0; j < ARRAY_SIZE(pt); ++j)
479 r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &pt, 483 if (r || is_present_pte(pt[j]))
480 sizeof(pt_element_t)); 484 sp->spt[i+j] = shadow_trap_nonpresent_pte;
481 if (r || is_present_pte(pt)) 485 else
482 sp->spt[i] = shadow_trap_nonpresent_pte; 486 sp->spt[i+j] = shadow_notrap_nonpresent_pte;
483 else
484 sp->spt[i] = shadow_notrap_nonpresent_pte;
485 } 487 }
486} 488}
487 489
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 6b0d5fa5bab3..b756e876dce3 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -27,6 +27,8 @@
27 27
28#include <asm/desc.h> 28#include <asm/desc.h>
29 29
30#define __ex(x) __kvm_handle_fault_on_reboot(x)
31
30MODULE_AUTHOR("Qumranet"); 32MODULE_AUTHOR("Qumranet");
31MODULE_LICENSE("GPL"); 33MODULE_LICENSE("GPL");
32 34
@@ -129,17 +131,17 @@ static inline void push_irq(struct kvm_vcpu *vcpu, u8 irq)
129 131
130static inline void clgi(void) 132static inline void clgi(void)
131{ 133{
132 asm volatile (SVM_CLGI); 134 asm volatile (__ex(SVM_CLGI));
133} 135}
134 136
135static inline void stgi(void) 137static inline void stgi(void)
136{ 138{
137 asm volatile (SVM_STGI); 139 asm volatile (__ex(SVM_STGI));
138} 140}
139 141
140static inline void invlpga(unsigned long addr, u32 asid) 142static inline void invlpga(unsigned long addr, u32 asid)
141{ 143{
142 asm volatile (SVM_INVLPGA :: "a"(addr), "c"(asid)); 144 asm volatile (__ex(SVM_INVLPGA) :: "a"(addr), "c"(asid));
143} 145}
144 146
145static inline unsigned long kvm_read_cr2(void) 147static inline unsigned long kvm_read_cr2(void)
@@ -270,19 +272,11 @@ static int has_svm(void)
270 272
271static void svm_hardware_disable(void *garbage) 273static void svm_hardware_disable(void *garbage)
272{ 274{
273 struct svm_cpu_data *svm_data 275 uint64_t efer;
274 = per_cpu(svm_data, raw_smp_processor_id());
275
276 if (svm_data) {
277 uint64_t efer;
278 276
279 wrmsrl(MSR_VM_HSAVE_PA, 0); 277 wrmsrl(MSR_VM_HSAVE_PA, 0);
280 rdmsrl(MSR_EFER, efer); 278 rdmsrl(MSR_EFER, efer);
281 wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK); 279 wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
282 per_cpu(svm_data, raw_smp_processor_id()) = NULL;
283 __free_page(svm_data->save_area);
284 kfree(svm_data);
285 }
286} 280}
287 281
288static void svm_hardware_enable(void *garbage) 282static void svm_hardware_enable(void *garbage)
@@ -321,6 +315,19 @@ static void svm_hardware_enable(void *garbage)
321 page_to_pfn(svm_data->save_area) << PAGE_SHIFT); 315 page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
322} 316}
323 317
318static void svm_cpu_uninit(int cpu)
319{
320 struct svm_cpu_data *svm_data
321 = per_cpu(svm_data, raw_smp_processor_id());
322
323 if (!svm_data)
324 return;
325
326 per_cpu(svm_data, raw_smp_processor_id()) = NULL;
327 __free_page(svm_data->save_area);
328 kfree(svm_data);
329}
330
324static int svm_cpu_init(int cpu) 331static int svm_cpu_init(int cpu)
325{ 332{
326 struct svm_cpu_data *svm_data; 333 struct svm_cpu_data *svm_data;
@@ -458,6 +465,11 @@ err:
458 465
459static __exit void svm_hardware_unsetup(void) 466static __exit void svm_hardware_unsetup(void)
460{ 467{
468 int cpu;
469
470 for_each_online_cpu(cpu)
471 svm_cpu_uninit(cpu);
472
461 __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); 473 __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
462 iopm_base = 0; 474 iopm_base = 0;
463} 475}
@@ -707,10 +719,6 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
707 rdtscll(vcpu->arch.host_tsc); 719 rdtscll(vcpu->arch.host_tsc);
708} 720}
709 721
710static void svm_vcpu_decache(struct kvm_vcpu *vcpu)
711{
712}
713
714static void svm_cache_regs(struct kvm_vcpu *vcpu) 722static void svm_cache_regs(struct kvm_vcpu *vcpu)
715{ 723{
716 struct vcpu_svm *svm = to_svm(vcpu); 724 struct vcpu_svm *svm = to_svm(vcpu);
@@ -949,7 +957,9 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data)
949 957
950static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr) 958static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr)
951{ 959{
952 return to_svm(vcpu)->db_regs[dr]; 960 unsigned long val = to_svm(vcpu)->db_regs[dr];
961 KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
962 return val;
953} 963}
954 964
955static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, 965static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
@@ -1004,6 +1014,16 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1004 1014
1005 fault_address = svm->vmcb->control.exit_info_2; 1015 fault_address = svm->vmcb->control.exit_info_2;
1006 error_code = svm->vmcb->control.exit_info_1; 1016 error_code = svm->vmcb->control.exit_info_1;
1017
1018 if (!npt_enabled)
1019 KVMTRACE_3D(PAGE_FAULT, &svm->vcpu, error_code,
1020 (u32)fault_address, (u32)(fault_address >> 32),
1021 handler);
1022 else
1023 KVMTRACE_3D(TDP_FAULT, &svm->vcpu, error_code,
1024 (u32)fault_address, (u32)(fault_address >> 32),
1025 handler);
1026
1007 return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); 1027 return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
1008} 1028}
1009 1029
@@ -1081,6 +1101,19 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1081 return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); 1101 return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
1082} 1102}
1083 1103
1104static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1105{
1106 KVMTRACE_0D(NMI, &svm->vcpu, handler);
1107 return 1;
1108}
1109
1110static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1111{
1112 ++svm->vcpu.stat.irq_exits;
1113 KVMTRACE_0D(INTR, &svm->vcpu, handler);
1114 return 1;
1115}
1116
1084static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1117static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1085{ 1118{
1086 return 1; 1119 return 1;
@@ -1219,6 +1252,9 @@ static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1219 if (svm_get_msr(&svm->vcpu, ecx, &data)) 1252 if (svm_get_msr(&svm->vcpu, ecx, &data))
1220 kvm_inject_gp(&svm->vcpu, 0); 1253 kvm_inject_gp(&svm->vcpu, 0);
1221 else { 1254 else {
1255 KVMTRACE_3D(MSR_READ, &svm->vcpu, ecx, (u32)data,
1256 (u32)(data >> 32), handler);
1257
1222 svm->vmcb->save.rax = data & 0xffffffff; 1258 svm->vmcb->save.rax = data & 0xffffffff;
1223 svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32; 1259 svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32;
1224 svm->next_rip = svm->vmcb->save.rip + 2; 1260 svm->next_rip = svm->vmcb->save.rip + 2;
@@ -1284,16 +1320,19 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
1284 case MSR_K7_EVNTSEL1: 1320 case MSR_K7_EVNTSEL1:
1285 case MSR_K7_EVNTSEL2: 1321 case MSR_K7_EVNTSEL2:
1286 case MSR_K7_EVNTSEL3: 1322 case MSR_K7_EVNTSEL3:
1323 case MSR_K7_PERFCTR0:
1324 case MSR_K7_PERFCTR1:
1325 case MSR_K7_PERFCTR2:
1326 case MSR_K7_PERFCTR3:
1287 /* 1327 /*
1288 * only support writing 0 to the performance counters for now 1328 * Just discard all writes to the performance counters; this
1289 * to make Windows happy. Should be replaced by a real 1329 * should keep both older linux and windows 64-bit guests
1290 * performance counter emulation later. 1330 * happy
1291 */ 1331 */
1292 if (data != 0) 1332 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", ecx, data);
1293 goto unhandled; 1333
1294 break; 1334 break;
1295 default: 1335 default:
1296 unhandled:
1297 return kvm_set_msr_common(vcpu, ecx, data); 1336 return kvm_set_msr_common(vcpu, ecx, data);
1298 } 1337 }
1299 return 0; 1338 return 0;
@@ -1304,6 +1343,10 @@ static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1304 u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; 1343 u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
1305 u64 data = (svm->vmcb->save.rax & -1u) 1344 u64 data = (svm->vmcb->save.rax & -1u)
1306 | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32); 1345 | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
1346
1347 KVMTRACE_3D(MSR_WRITE, &svm->vcpu, ecx, (u32)data, (u32)(data >> 32),
1348 handler);
1349
1307 svm->next_rip = svm->vmcb->save.rip + 2; 1350 svm->next_rip = svm->vmcb->save.rip + 2;
1308 if (svm_set_msr(&svm->vcpu, ecx, data)) 1351 if (svm_set_msr(&svm->vcpu, ecx, data))
1309 kvm_inject_gp(&svm->vcpu, 0); 1352 kvm_inject_gp(&svm->vcpu, 0);
@@ -1323,6 +1366,8 @@ static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1323static int interrupt_window_interception(struct vcpu_svm *svm, 1366static int interrupt_window_interception(struct vcpu_svm *svm,
1324 struct kvm_run *kvm_run) 1367 struct kvm_run *kvm_run)
1325{ 1368{
1369 KVMTRACE_0D(PEND_INTR, &svm->vcpu, handler);
1370
1326 svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR); 1371 svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
1327 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; 1372 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
1328 /* 1373 /*
@@ -1364,8 +1409,8 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
1364 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, 1409 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
1365 [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, 1410 [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception,
1366 [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, 1411 [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception,
1367 [SVM_EXIT_INTR] = nop_on_interception, 1412 [SVM_EXIT_INTR] = intr_interception,
1368 [SVM_EXIT_NMI] = nop_on_interception, 1413 [SVM_EXIT_NMI] = nmi_interception,
1369 [SVM_EXIT_SMI] = nop_on_interception, 1414 [SVM_EXIT_SMI] = nop_on_interception,
1370 [SVM_EXIT_INIT] = nop_on_interception, 1415 [SVM_EXIT_INIT] = nop_on_interception,
1371 [SVM_EXIT_VINTR] = interrupt_window_interception, 1416 [SVM_EXIT_VINTR] = interrupt_window_interception,
@@ -1397,6 +1442,9 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1397 struct vcpu_svm *svm = to_svm(vcpu); 1442 struct vcpu_svm *svm = to_svm(vcpu);
1398 u32 exit_code = svm->vmcb->control.exit_code; 1443 u32 exit_code = svm->vmcb->control.exit_code;
1399 1444
1445 KVMTRACE_3D(VMEXIT, vcpu, exit_code, (u32)svm->vmcb->save.rip,
1446 (u32)((u64)svm->vmcb->save.rip >> 32), entryexit);
1447
1400 if (npt_enabled) { 1448 if (npt_enabled) {
1401 int mmu_reload = 0; 1449 int mmu_reload = 0;
1402 if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) { 1450 if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) {
@@ -1470,6 +1518,8 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
1470{ 1518{
1471 struct vmcb_control_area *control; 1519 struct vmcb_control_area *control;
1472 1520
1521 KVMTRACE_1D(INJ_VIRQ, &svm->vcpu, (u32)irq, handler);
1522
1473 control = &svm->vmcb->control; 1523 control = &svm->vmcb->control;
1474 control->int_vector = irq; 1524 control->int_vector = irq;
1475 control->int_ctl &= ~V_INTR_PRIO_MASK; 1525 control->int_ctl &= ~V_INTR_PRIO_MASK;
@@ -1660,9 +1710,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1660 sync_lapic_to_cr8(vcpu); 1710 sync_lapic_to_cr8(vcpu);
1661 1711
1662 save_host_msrs(vcpu); 1712 save_host_msrs(vcpu);
1663 fs_selector = read_fs(); 1713 fs_selector = kvm_read_fs();
1664 gs_selector = read_gs(); 1714 gs_selector = kvm_read_gs();
1665 ldt_selector = read_ldt(); 1715 ldt_selector = kvm_read_ldt();
1666 svm->host_cr2 = kvm_read_cr2(); 1716 svm->host_cr2 = kvm_read_cr2();
1667 svm->host_dr6 = read_dr6(); 1717 svm->host_dr6 = read_dr6();
1668 svm->host_dr7 = read_dr7(); 1718 svm->host_dr7 = read_dr7();
@@ -1716,17 +1766,17 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1716 /* Enter guest mode */ 1766 /* Enter guest mode */
1717 "push %%rax \n\t" 1767 "push %%rax \n\t"
1718 "mov %c[vmcb](%[svm]), %%rax \n\t" 1768 "mov %c[vmcb](%[svm]), %%rax \n\t"
1719 SVM_VMLOAD "\n\t" 1769 __ex(SVM_VMLOAD) "\n\t"
1720 SVM_VMRUN "\n\t" 1770 __ex(SVM_VMRUN) "\n\t"
1721 SVM_VMSAVE "\n\t" 1771 __ex(SVM_VMSAVE) "\n\t"
1722 "pop %%rax \n\t" 1772 "pop %%rax \n\t"
1723#else 1773#else
1724 /* Enter guest mode */ 1774 /* Enter guest mode */
1725 "push %%eax \n\t" 1775 "push %%eax \n\t"
1726 "mov %c[vmcb](%[svm]), %%eax \n\t" 1776 "mov %c[vmcb](%[svm]), %%eax \n\t"
1727 SVM_VMLOAD "\n\t" 1777 __ex(SVM_VMLOAD) "\n\t"
1728 SVM_VMRUN "\n\t" 1778 __ex(SVM_VMRUN) "\n\t"
1729 SVM_VMSAVE "\n\t" 1779 __ex(SVM_VMSAVE) "\n\t"
1730 "pop %%eax \n\t" 1780 "pop %%eax \n\t"
1731#endif 1781#endif
1732 1782
@@ -1795,9 +1845,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1795 write_dr7(svm->host_dr7); 1845 write_dr7(svm->host_dr7);
1796 kvm_write_cr2(svm->host_cr2); 1846 kvm_write_cr2(svm->host_cr2);
1797 1847
1798 load_fs(fs_selector); 1848 kvm_load_fs(fs_selector);
1799 load_gs(gs_selector); 1849 kvm_load_gs(gs_selector);
1800 load_ldt(ldt_selector); 1850 kvm_load_ldt(ldt_selector);
1801 load_host_msrs(vcpu); 1851 load_host_msrs(vcpu);
1802 1852
1803 reload_tss(vcpu); 1853 reload_tss(vcpu);
@@ -1889,7 +1939,6 @@ static struct kvm_x86_ops svm_x86_ops = {
1889 .prepare_guest_switch = svm_prepare_guest_switch, 1939 .prepare_guest_switch = svm_prepare_guest_switch,
1890 .vcpu_load = svm_vcpu_load, 1940 .vcpu_load = svm_vcpu_load,
1891 .vcpu_put = svm_vcpu_put, 1941 .vcpu_put = svm_vcpu_put,
1892 .vcpu_decache = svm_vcpu_decache,
1893 1942
1894 .set_guest_debug = svm_guest_debug, 1943 .set_guest_debug = svm_guest_debug,
1895 .get_msr = svm_get_msr, 1944 .get_msr = svm_get_msr,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 10ce6ee4c491..0cac63701719 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -30,6 +30,8 @@
30#include <asm/io.h> 30#include <asm/io.h>
31#include <asm/desc.h> 31#include <asm/desc.h>
32 32
33#define __ex(x) __kvm_handle_fault_on_reboot(x)
34
33MODULE_AUTHOR("Qumranet"); 35MODULE_AUTHOR("Qumranet");
34MODULE_LICENSE("GPL"); 36MODULE_LICENSE("GPL");
35 37
@@ -53,6 +55,7 @@ struct vmcs {
53 55
54struct vcpu_vmx { 56struct vcpu_vmx {
55 struct kvm_vcpu vcpu; 57 struct kvm_vcpu vcpu;
58 struct list_head local_vcpus_link;
56 int launched; 59 int launched;
57 u8 fail; 60 u8 fail;
58 u32 idt_vectoring_info; 61 u32 idt_vectoring_info;
@@ -88,9 +91,11 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
88} 91}
89 92
90static int init_rmode(struct kvm *kvm); 93static int init_rmode(struct kvm *kvm);
94static u64 construct_eptp(unsigned long root_hpa);
91 95
92static DEFINE_PER_CPU(struct vmcs *, vmxarea); 96static DEFINE_PER_CPU(struct vmcs *, vmxarea);
93static DEFINE_PER_CPU(struct vmcs *, current_vmcs); 97static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
98static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu);
94 99
95static struct page *vmx_io_bitmap_a; 100static struct page *vmx_io_bitmap_a;
96static struct page *vmx_io_bitmap_b; 101static struct page *vmx_io_bitmap_b;
@@ -260,6 +265,11 @@ static inline int cpu_has_vmx_vpid(void)
260 SECONDARY_EXEC_ENABLE_VPID); 265 SECONDARY_EXEC_ENABLE_VPID);
261} 266}
262 267
268static inline int cpu_has_virtual_nmis(void)
269{
270 return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
271}
272
263static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) 273static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
264{ 274{
265 int i; 275 int i;
@@ -278,7 +288,7 @@ static inline void __invvpid(int ext, u16 vpid, gva_t gva)
278 u64 gva; 288 u64 gva;
279 } operand = { vpid, 0, gva }; 289 } operand = { vpid, 0, gva };
280 290
281 asm volatile (ASM_VMX_INVVPID 291 asm volatile (__ex(ASM_VMX_INVVPID)
282 /* CF==1 or ZF==1 --> rc = -1 */ 292 /* CF==1 or ZF==1 --> rc = -1 */
283 "; ja 1f ; ud2 ; 1:" 293 "; ja 1f ; ud2 ; 1:"
284 : : "a"(&operand), "c"(ext) : "cc", "memory"); 294 : : "a"(&operand), "c"(ext) : "cc", "memory");
@@ -290,7 +300,7 @@ static inline void __invept(int ext, u64 eptp, gpa_t gpa)
290 u64 eptp, gpa; 300 u64 eptp, gpa;
291 } operand = {eptp, gpa}; 301 } operand = {eptp, gpa};
292 302
293 asm volatile (ASM_VMX_INVEPT 303 asm volatile (__ex(ASM_VMX_INVEPT)
294 /* CF==1 or ZF==1 --> rc = -1 */ 304 /* CF==1 or ZF==1 --> rc = -1 */
295 "; ja 1f ; ud2 ; 1:\n" 305 "; ja 1f ; ud2 ; 1:\n"
296 : : "a" (&operand), "c" (ext) : "cc", "memory"); 306 : : "a" (&operand), "c" (ext) : "cc", "memory");
@@ -311,7 +321,7 @@ static void vmcs_clear(struct vmcs *vmcs)
311 u64 phys_addr = __pa(vmcs); 321 u64 phys_addr = __pa(vmcs);
312 u8 error; 322 u8 error;
313 323
314 asm volatile (ASM_VMX_VMCLEAR_RAX "; setna %0" 324 asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) "; setna %0"
315 : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) 325 : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
316 : "cc", "memory"); 326 : "cc", "memory");
317 if (error) 327 if (error)
@@ -329,6 +339,9 @@ static void __vcpu_clear(void *arg)
329 if (per_cpu(current_vmcs, cpu) == vmx->vmcs) 339 if (per_cpu(current_vmcs, cpu) == vmx->vmcs)
330 per_cpu(current_vmcs, cpu) = NULL; 340 per_cpu(current_vmcs, cpu) = NULL;
331 rdtscll(vmx->vcpu.arch.host_tsc); 341 rdtscll(vmx->vcpu.arch.host_tsc);
342 list_del(&vmx->local_vcpus_link);
343 vmx->vcpu.cpu = -1;
344 vmx->launched = 0;
332} 345}
333 346
334static void vcpu_clear(struct vcpu_vmx *vmx) 347static void vcpu_clear(struct vcpu_vmx *vmx)
@@ -336,7 +349,6 @@ static void vcpu_clear(struct vcpu_vmx *vmx)
336 if (vmx->vcpu.cpu == -1) 349 if (vmx->vcpu.cpu == -1)
337 return; 350 return;
338 smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1); 351 smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1);
339 vmx->launched = 0;
340} 352}
341 353
342static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx) 354static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx)
@@ -378,7 +390,7 @@ static unsigned long vmcs_readl(unsigned long field)
378{ 390{
379 unsigned long value; 391 unsigned long value;
380 392
381 asm volatile (ASM_VMX_VMREAD_RDX_RAX 393 asm volatile (__ex(ASM_VMX_VMREAD_RDX_RAX)
382 : "=a"(value) : "d"(field) : "cc"); 394 : "=a"(value) : "d"(field) : "cc");
383 return value; 395 return value;
384} 396}
@@ -413,7 +425,7 @@ static void vmcs_writel(unsigned long field, unsigned long value)
413{ 425{
414 u8 error; 426 u8 error;
415 427
416 asm volatile (ASM_VMX_VMWRITE_RAX_RDX "; setna %0" 428 asm volatile (__ex(ASM_VMX_VMWRITE_RAX_RDX) "; setna %0"
417 : "=q"(error) : "a"(value), "d"(field) : "cc"); 429 : "=q"(error) : "a"(value), "d"(field) : "cc");
418 if (unlikely(error)) 430 if (unlikely(error))
419 vmwrite_error(field, value); 431 vmwrite_error(field, value);
@@ -431,10 +443,8 @@ static void vmcs_write32(unsigned long field, u32 value)
431 443
432static void vmcs_write64(unsigned long field, u64 value) 444static void vmcs_write64(unsigned long field, u64 value)
433{ 445{
434#ifdef CONFIG_X86_64
435 vmcs_writel(field, value);
436#else
437 vmcs_writel(field, value); 446 vmcs_writel(field, value);
447#ifndef CONFIG_X86_64
438 asm volatile (""); 448 asm volatile ("");
439 vmcs_writel(field+1, value >> 32); 449 vmcs_writel(field+1, value >> 32);
440#endif 450#endif
@@ -474,7 +484,7 @@ static void reload_tss(void)
474 struct descriptor_table gdt; 484 struct descriptor_table gdt;
475 struct desc_struct *descs; 485 struct desc_struct *descs;
476 486
477 get_gdt(&gdt); 487 kvm_get_gdt(&gdt);
478 descs = (void *)gdt.base; 488 descs = (void *)gdt.base;
479 descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ 489 descs[GDT_ENTRY_TSS].type = 9; /* available TSS */
480 load_TR_desc(); 490 load_TR_desc();
@@ -530,9 +540,9 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
530 * Set host fs and gs selectors. Unfortunately, 22.2.3 does not 540 * Set host fs and gs selectors. Unfortunately, 22.2.3 does not
531 * allow segment selectors with cpl > 0 or ti == 1. 541 * allow segment selectors with cpl > 0 or ti == 1.
532 */ 542 */
533 vmx->host_state.ldt_sel = read_ldt(); 543 vmx->host_state.ldt_sel = kvm_read_ldt();
534 vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel; 544 vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel;
535 vmx->host_state.fs_sel = read_fs(); 545 vmx->host_state.fs_sel = kvm_read_fs();
536 if (!(vmx->host_state.fs_sel & 7)) { 546 if (!(vmx->host_state.fs_sel & 7)) {
537 vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel); 547 vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
538 vmx->host_state.fs_reload_needed = 0; 548 vmx->host_state.fs_reload_needed = 0;
@@ -540,7 +550,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
540 vmcs_write16(HOST_FS_SELECTOR, 0); 550 vmcs_write16(HOST_FS_SELECTOR, 0);
541 vmx->host_state.fs_reload_needed = 1; 551 vmx->host_state.fs_reload_needed = 1;
542 } 552 }
543 vmx->host_state.gs_sel = read_gs(); 553 vmx->host_state.gs_sel = kvm_read_gs();
544 if (!(vmx->host_state.gs_sel & 7)) 554 if (!(vmx->host_state.gs_sel & 7))
545 vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel); 555 vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel);
546 else { 556 else {
@@ -576,15 +586,15 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
576 ++vmx->vcpu.stat.host_state_reload; 586 ++vmx->vcpu.stat.host_state_reload;
577 vmx->host_state.loaded = 0; 587 vmx->host_state.loaded = 0;
578 if (vmx->host_state.fs_reload_needed) 588 if (vmx->host_state.fs_reload_needed)
579 load_fs(vmx->host_state.fs_sel); 589 kvm_load_fs(vmx->host_state.fs_sel);
580 if (vmx->host_state.gs_ldt_reload_needed) { 590 if (vmx->host_state.gs_ldt_reload_needed) {
581 load_ldt(vmx->host_state.ldt_sel); 591 kvm_load_ldt(vmx->host_state.ldt_sel);
582 /* 592 /*
583 * If we have to reload gs, we must take care to 593 * If we have to reload gs, we must take care to
584 * preserve our gs base. 594 * preserve our gs base.
585 */ 595 */
586 local_irq_save(flags); 596 local_irq_save(flags);
587 load_gs(vmx->host_state.gs_sel); 597 kvm_load_gs(vmx->host_state.gs_sel);
588#ifdef CONFIG_X86_64 598#ifdef CONFIG_X86_64
589 wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); 599 wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE));
590#endif 600#endif
@@ -617,13 +627,17 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
617 vcpu_clear(vmx); 627 vcpu_clear(vmx);
618 kvm_migrate_timers(vcpu); 628 kvm_migrate_timers(vcpu);
619 vpid_sync_vcpu_all(vmx); 629 vpid_sync_vcpu_all(vmx);
630 local_irq_disable();
631 list_add(&vmx->local_vcpus_link,
632 &per_cpu(vcpus_on_cpu, cpu));
633 local_irq_enable();
620 } 634 }
621 635
622 if (per_cpu(current_vmcs, cpu) != vmx->vmcs) { 636 if (per_cpu(current_vmcs, cpu) != vmx->vmcs) {
623 u8 error; 637 u8 error;
624 638
625 per_cpu(current_vmcs, cpu) = vmx->vmcs; 639 per_cpu(current_vmcs, cpu) = vmx->vmcs;
626 asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0" 640 asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0"
627 : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) 641 : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
628 : "cc"); 642 : "cc");
629 if (error) 643 if (error)
@@ -640,8 +654,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
640 * Linux uses per-cpu TSS and GDT, so set these when switching 654 * Linux uses per-cpu TSS and GDT, so set these when switching
641 * processors. 655 * processors.
642 */ 656 */
643 vmcs_writel(HOST_TR_BASE, read_tr_base()); /* 22.2.4 */ 657 vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */
644 get_gdt(&dt); 658 kvm_get_gdt(&dt);
645 vmcs_writel(HOST_GDTR_BASE, dt.base); /* 22.2.4 */ 659 vmcs_writel(HOST_GDTR_BASE, dt.base); /* 22.2.4 */
646 660
647 rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); 661 rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
@@ -684,11 +698,6 @@ static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
684 update_exception_bitmap(vcpu); 698 update_exception_bitmap(vcpu);
685} 699}
686 700
687static void vmx_vcpu_decache(struct kvm_vcpu *vcpu)
688{
689 vcpu_clear(to_vmx(vcpu));
690}
691
692static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) 701static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
693{ 702{
694 return vmcs_readl(GUEST_RFLAGS); 703 return vmcs_readl(GUEST_RFLAGS);
@@ -913,6 +922,18 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
913 case MSR_IA32_TIME_STAMP_COUNTER: 922 case MSR_IA32_TIME_STAMP_COUNTER:
914 guest_write_tsc(data); 923 guest_write_tsc(data);
915 break; 924 break;
925 case MSR_P6_PERFCTR0:
926 case MSR_P6_PERFCTR1:
927 case MSR_P6_EVNTSEL0:
928 case MSR_P6_EVNTSEL1:
929 /*
930 * Just discard all writes to the performance counters; this
931 * should keep both older linux and windows 64-bit guests
932 * happy
933 */
934 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data);
935
936 break;
916 default: 937 default:
917 vmx_load_host_state(vmx); 938 vmx_load_host_state(vmx);
918 msr = find_msr_entry(vmx, msr_index); 939 msr = find_msr_entry(vmx, msr_index);
@@ -1022,6 +1043,7 @@ static void hardware_enable(void *garbage)
1022 u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); 1043 u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
1023 u64 old; 1044 u64 old;
1024 1045
1046 INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu));
1025 rdmsrl(MSR_IA32_FEATURE_CONTROL, old); 1047 rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
1026 if ((old & (MSR_IA32_FEATURE_CONTROL_LOCKED | 1048 if ((old & (MSR_IA32_FEATURE_CONTROL_LOCKED |
1027 MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED)) 1049 MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED))
@@ -1032,13 +1054,25 @@ static void hardware_enable(void *garbage)
1032 MSR_IA32_FEATURE_CONTROL_LOCKED | 1054 MSR_IA32_FEATURE_CONTROL_LOCKED |
1033 MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED); 1055 MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED);
1034 write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ 1056 write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */
1035 asm volatile (ASM_VMX_VMXON_RAX : : "a"(&phys_addr), "m"(phys_addr) 1057 asm volatile (ASM_VMX_VMXON_RAX
1058 : : "a"(&phys_addr), "m"(phys_addr)
1036 : "memory", "cc"); 1059 : "memory", "cc");
1037} 1060}
1038 1061
1062static void vmclear_local_vcpus(void)
1063{
1064 int cpu = raw_smp_processor_id();
1065 struct vcpu_vmx *vmx, *n;
1066
1067 list_for_each_entry_safe(vmx, n, &per_cpu(vcpus_on_cpu, cpu),
1068 local_vcpus_link)
1069 __vcpu_clear(vmx);
1070}
1071
1039static void hardware_disable(void *garbage) 1072static void hardware_disable(void *garbage)
1040{ 1073{
1041 asm volatile (ASM_VMX_VMXOFF : : : "cc"); 1074 vmclear_local_vcpus();
1075 asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
1042 write_cr4(read_cr4() & ~X86_CR4_VMXE); 1076 write_cr4(read_cr4() & ~X86_CR4_VMXE);
1043} 1077}
1044 1078
@@ -1072,7 +1106,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
1072 u32 _vmentry_control = 0; 1106 u32 _vmentry_control = 0;
1073 1107
1074 min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; 1108 min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
1075 opt = 0; 1109 opt = PIN_BASED_VIRTUAL_NMIS;
1076 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, 1110 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
1077 &_pin_based_exec_control) < 0) 1111 &_pin_based_exec_control) < 0)
1078 return -EIO; 1112 return -EIO;
@@ -1389,6 +1423,8 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
1389static void vmx_flush_tlb(struct kvm_vcpu *vcpu) 1423static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
1390{ 1424{
1391 vpid_sync_vcpu_all(to_vmx(vcpu)); 1425 vpid_sync_vcpu_all(to_vmx(vcpu));
1426 if (vm_need_ept())
1427 ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa));
1392} 1428}
1393 1429
1394static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) 1430static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
@@ -1420,7 +1456,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
1420 if (!(cr0 & X86_CR0_PG)) { 1456 if (!(cr0 & X86_CR0_PG)) {
1421 /* From paging/starting to nonpaging */ 1457 /* From paging/starting to nonpaging */
1422 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, 1458 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
1423 vmcs_config.cpu_based_exec_ctrl | 1459 vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) |
1424 (CPU_BASED_CR3_LOAD_EXITING | 1460 (CPU_BASED_CR3_LOAD_EXITING |
1425 CPU_BASED_CR3_STORE_EXITING)); 1461 CPU_BASED_CR3_STORE_EXITING));
1426 vcpu->arch.cr0 = cr0; 1462 vcpu->arch.cr0 = cr0;
@@ -1430,7 +1466,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
1430 } else if (!is_paging(vcpu)) { 1466 } else if (!is_paging(vcpu)) {
1431 /* From nonpaging to paging */ 1467 /* From nonpaging to paging */
1432 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, 1468 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
1433 vmcs_config.cpu_based_exec_ctrl & 1469 vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
1434 ~(CPU_BASED_CR3_LOAD_EXITING | 1470 ~(CPU_BASED_CR3_LOAD_EXITING |
1435 CPU_BASED_CR3_STORE_EXITING)); 1471 CPU_BASED_CR3_STORE_EXITING));
1436 vcpu->arch.cr0 = cr0; 1472 vcpu->arch.cr0 = cr0;
@@ -1821,7 +1857,7 @@ static void allocate_vpid(struct vcpu_vmx *vmx)
1821 spin_unlock(&vmx_vpid_lock); 1857 spin_unlock(&vmx_vpid_lock);
1822} 1858}
1823 1859
1824void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr) 1860static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr)
1825{ 1861{
1826 void *va; 1862 void *va;
1827 1863
@@ -1907,8 +1943,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
1907 vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ 1943 vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */
1908 vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ 1944 vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
1909 vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */ 1945 vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */
1910 vmcs_write16(HOST_FS_SELECTOR, read_fs()); /* 22.2.4 */ 1946 vmcs_write16(HOST_FS_SELECTOR, kvm_read_fs()); /* 22.2.4 */
1911 vmcs_write16(HOST_GS_SELECTOR, read_gs()); /* 22.2.4 */ 1947 vmcs_write16(HOST_GS_SELECTOR, kvm_read_gs()); /* 22.2.4 */
1912 vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ 1948 vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
1913#ifdef CONFIG_X86_64 1949#ifdef CONFIG_X86_64
1914 rdmsrl(MSR_FS_BASE, a); 1950 rdmsrl(MSR_FS_BASE, a);
@@ -1922,7 +1958,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
1922 1958
1923 vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ 1959 vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */
1924 1960
1925 get_idt(&dt); 1961 kvm_get_idt(&dt);
1926 vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */ 1962 vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */
1927 1963
1928 asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); 1964 asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return));
@@ -2114,6 +2150,13 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
2114 irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); 2150 irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
2115} 2151}
2116 2152
2153static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
2154{
2155 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
2156 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
2157 vcpu->arch.nmi_pending = 0;
2158}
2159
2117static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) 2160static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
2118{ 2161{
2119 int word_index = __ffs(vcpu->arch.irq_summary); 2162 int word_index = __ffs(vcpu->arch.irq_summary);
@@ -2554,8 +2597,6 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2554 exit_qualification = vmcs_read64(EXIT_QUALIFICATION); 2597 exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
2555 offset = exit_qualification & 0xffful; 2598 offset = exit_qualification & 0xffful;
2556 2599
2557 KVMTRACE_1D(APIC_ACCESS, vcpu, (u32)offset, handler);
2558
2559 er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); 2600 er = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
2560 2601
2561 if (er != EMULATE_DONE) { 2602 if (er != EMULATE_DONE) {
@@ -2639,6 +2680,19 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2639 return 1; 2680 return 1;
2640} 2681}
2641 2682
2683static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2684{
2685 u32 cpu_based_vm_exec_control;
2686
2687 /* clear pending NMI */
2688 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
2689 cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
2690 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2691 ++vcpu->stat.nmi_window_exits;
2692
2693 return 1;
2694}
2695
2642/* 2696/*
2643 * The exit handlers return 1 if the exit was handled fully and guest execution 2697 * The exit handlers return 1 if the exit was handled fully and guest execution
2644 * may resume. Otherwise they set the kvm_run parameter to indicate what needs 2698 * may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -2649,6 +2703,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
2649 [EXIT_REASON_EXCEPTION_NMI] = handle_exception, 2703 [EXIT_REASON_EXCEPTION_NMI] = handle_exception,
2650 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, 2704 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
2651 [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, 2705 [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault,
2706 [EXIT_REASON_NMI_WINDOW] = handle_nmi_window,
2652 [EXIT_REASON_IO_INSTRUCTION] = handle_io, 2707 [EXIT_REASON_IO_INSTRUCTION] = handle_io,
2653 [EXIT_REASON_CR_ACCESS] = handle_cr, 2708 [EXIT_REASON_CR_ACCESS] = handle_cr,
2654 [EXIT_REASON_DR_ACCESS] = handle_dr, 2709 [EXIT_REASON_DR_ACCESS] = handle_dr,
@@ -2736,17 +2791,52 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
2736 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); 2791 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2737} 2792}
2738 2793
2794static void enable_nmi_window(struct kvm_vcpu *vcpu)
2795{
2796 u32 cpu_based_vm_exec_control;
2797
2798 if (!cpu_has_virtual_nmis())
2799 return;
2800
2801 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
2802 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
2803 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2804}
2805
2806static int vmx_nmi_enabled(struct kvm_vcpu *vcpu)
2807{
2808 u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
2809 return !(guest_intr & (GUEST_INTR_STATE_NMI |
2810 GUEST_INTR_STATE_MOV_SS |
2811 GUEST_INTR_STATE_STI));
2812}
2813
2814static int vmx_irq_enabled(struct kvm_vcpu *vcpu)
2815{
2816 u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
2817 return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS |
2818 GUEST_INTR_STATE_STI)) &&
2819 (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));
2820}
2821
2822static void enable_intr_window(struct kvm_vcpu *vcpu)
2823{
2824 if (vcpu->arch.nmi_pending)
2825 enable_nmi_window(vcpu);
2826 else if (kvm_cpu_has_interrupt(vcpu))
2827 enable_irq_window(vcpu);
2828}
2829
2739static void vmx_intr_assist(struct kvm_vcpu *vcpu) 2830static void vmx_intr_assist(struct kvm_vcpu *vcpu)
2740{ 2831{
2741 struct vcpu_vmx *vmx = to_vmx(vcpu); 2832 struct vcpu_vmx *vmx = to_vmx(vcpu);
2742 u32 idtv_info_field, intr_info_field; 2833 u32 idtv_info_field, intr_info_field, exit_intr_info_field;
2743 int has_ext_irq, interrupt_window_open;
2744 int vector; 2834 int vector;
2745 2835
2746 update_tpr_threshold(vcpu); 2836 update_tpr_threshold(vcpu);
2747 2837
2748 has_ext_irq = kvm_cpu_has_interrupt(vcpu);
2749 intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD); 2838 intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);
2839 exit_intr_info_field = vmcs_read32(VM_EXIT_INTR_INFO);
2750 idtv_info_field = vmx->idt_vectoring_info; 2840 idtv_info_field = vmx->idt_vectoring_info;
2751 if (intr_info_field & INTR_INFO_VALID_MASK) { 2841 if (intr_info_field & INTR_INFO_VALID_MASK) {
2752 if (idtv_info_field & INTR_INFO_VALID_MASK) { 2842 if (idtv_info_field & INTR_INFO_VALID_MASK) {
@@ -2754,8 +2844,7 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
2754 if (printk_ratelimit()) 2844 if (printk_ratelimit())
2755 printk(KERN_ERR "Fault when IDT_Vectoring\n"); 2845 printk(KERN_ERR "Fault when IDT_Vectoring\n");
2756 } 2846 }
2757 if (has_ext_irq) 2847 enable_intr_window(vcpu);
2758 enable_irq_window(vcpu);
2759 return; 2848 return;
2760 } 2849 }
2761 if (unlikely(idtv_info_field & INTR_INFO_VALID_MASK)) { 2850 if (unlikely(idtv_info_field & INTR_INFO_VALID_MASK)) {
@@ -2765,30 +2854,56 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
2765 u8 vect = idtv_info_field & VECTORING_INFO_VECTOR_MASK; 2854 u8 vect = idtv_info_field & VECTORING_INFO_VECTOR_MASK;
2766 2855
2767 vmx_inject_irq(vcpu, vect); 2856 vmx_inject_irq(vcpu, vect);
2768 if (unlikely(has_ext_irq)) 2857 enable_intr_window(vcpu);
2769 enable_irq_window(vcpu);
2770 return; 2858 return;
2771 } 2859 }
2772 2860
2773 KVMTRACE_1D(REDELIVER_EVT, vcpu, idtv_info_field, handler); 2861 KVMTRACE_1D(REDELIVER_EVT, vcpu, idtv_info_field, handler);
2774 2862
2775 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field); 2863 /*
2864 * SDM 3: 25.7.1.2
2865 * Clear bit "block by NMI" before VM entry if a NMI delivery
2866 * faulted.
2867 */
2868 if ((idtv_info_field & VECTORING_INFO_TYPE_MASK)
2869 == INTR_TYPE_NMI_INTR && cpu_has_virtual_nmis())
2870 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
2871 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
2872 ~GUEST_INTR_STATE_NMI);
2873
2874 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field
2875 & ~INTR_INFO_RESVD_BITS_MASK);
2776 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 2876 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
2777 vmcs_read32(VM_EXIT_INSTRUCTION_LEN)); 2877 vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
2778 2878
2779 if (unlikely(idtv_info_field & INTR_INFO_DELIVER_CODE_MASK)) 2879 if (unlikely(idtv_info_field & INTR_INFO_DELIVER_CODE_MASK))
2780 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, 2880 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
2781 vmcs_read32(IDT_VECTORING_ERROR_CODE)); 2881 vmcs_read32(IDT_VECTORING_ERROR_CODE));
2782 if (unlikely(has_ext_irq)) 2882 enable_intr_window(vcpu);
2783 enable_irq_window(vcpu);
2784 return; 2883 return;
2785 } 2884 }
2786 if (!has_ext_irq) 2885 if (cpu_has_virtual_nmis()) {
2886 /*
2887 * SDM 3: 25.7.1.2
2888 * Re-set bit "block by NMI" before VM entry if vmexit caused by
2889 * a guest IRET fault.
2890 */
2891 if ((exit_intr_info_field & INTR_INFO_UNBLOCK_NMI) &&
2892 (exit_intr_info_field & INTR_INFO_VECTOR_MASK) != 8)
2893 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
2894 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) |
2895 GUEST_INTR_STATE_NMI);
2896 else if (vcpu->arch.nmi_pending) {
2897 if (vmx_nmi_enabled(vcpu))
2898 vmx_inject_nmi(vcpu);
2899 enable_intr_window(vcpu);
2900 return;
2901 }
2902
2903 }
2904 if (!kvm_cpu_has_interrupt(vcpu))
2787 return; 2905 return;
2788 interrupt_window_open = 2906 if (vmx_irq_enabled(vcpu)) {
2789 ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
2790 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
2791 if (interrupt_window_open) {
2792 vector = kvm_cpu_get_interrupt(vcpu); 2907 vector = kvm_cpu_get_interrupt(vcpu);
2793 vmx_inject_irq(vcpu, vector); 2908 vmx_inject_irq(vcpu, vector);
2794 kvm_timer_intr_post(vcpu, vector); 2909 kvm_timer_intr_post(vcpu, vector);
@@ -2838,7 +2953,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2838 "push %%edx; push %%ebp;" 2953 "push %%edx; push %%ebp;"
2839 "push %%ecx \n\t" 2954 "push %%ecx \n\t"
2840#endif 2955#endif
2841 ASM_VMX_VMWRITE_RSP_RDX "\n\t" 2956 __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
2842 /* Check if vmlaunch of vmresume is needed */ 2957 /* Check if vmlaunch of vmresume is needed */
2843 "cmpl $0, %c[launched](%0) \n\t" 2958 "cmpl $0, %c[launched](%0) \n\t"
2844 /* Load guest registers. Don't clobber flags. */ 2959 /* Load guest registers. Don't clobber flags. */
@@ -2873,9 +2988,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2873#endif 2988#endif
2874 /* Enter guest mode */ 2989 /* Enter guest mode */
2875 "jne .Llaunched \n\t" 2990 "jne .Llaunched \n\t"
2876 ASM_VMX_VMLAUNCH "\n\t" 2991 __ex(ASM_VMX_VMLAUNCH) "\n\t"
2877 "jmp .Lkvm_vmx_return \n\t" 2992 "jmp .Lkvm_vmx_return \n\t"
2878 ".Llaunched: " ASM_VMX_VMRESUME "\n\t" 2993 ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t"
2879 ".Lkvm_vmx_return: " 2994 ".Lkvm_vmx_return: "
2880 /* Save guest registers, load host registers, keep flags */ 2995 /* Save guest registers, load host registers, keep flags */
2881#ifdef CONFIG_X86_64 2996#ifdef CONFIG_X86_64
@@ -2949,7 +3064,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2949 fixup_rmode_irq(vmx); 3064 fixup_rmode_irq(vmx);
2950 3065
2951 vcpu->arch.interrupt_window_open = 3066 vcpu->arch.interrupt_window_open =
2952 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0; 3067 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
3068 (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0;
2953 3069
2954 asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); 3070 asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
2955 vmx->launched = 1; 3071 vmx->launched = 1;
@@ -2957,7 +3073,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2957 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 3073 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
2958 3074
2959 /* We need to handle NMIs before interrupts are enabled */ 3075 /* We need to handle NMIs before interrupts are enabled */
2960 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */ 3076 if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 &&
3077 (intr_info & INTR_INFO_VALID_MASK)) {
2961 KVMTRACE_0D(NMI, vcpu, handler); 3078 KVMTRACE_0D(NMI, vcpu, handler);
2962 asm("int $2"); 3079 asm("int $2");
2963 } 3080 }
@@ -2968,7 +3085,7 @@ static void vmx_free_vmcs(struct kvm_vcpu *vcpu)
2968 struct vcpu_vmx *vmx = to_vmx(vcpu); 3085 struct vcpu_vmx *vmx = to_vmx(vcpu);
2969 3086
2970 if (vmx->vmcs) { 3087 if (vmx->vmcs) {
2971 on_each_cpu(__vcpu_clear, vmx, 1); 3088 vcpu_clear(vmx);
2972 free_vmcs(vmx->vmcs); 3089 free_vmcs(vmx->vmcs);
2973 vmx->vmcs = NULL; 3090 vmx->vmcs = NULL;
2974 } 3091 }
@@ -3095,7 +3212,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
3095 .prepare_guest_switch = vmx_save_host_state, 3212 .prepare_guest_switch = vmx_save_host_state,
3096 .vcpu_load = vmx_vcpu_load, 3213 .vcpu_load = vmx_vcpu_load,
3097 .vcpu_put = vmx_vcpu_put, 3214 .vcpu_put = vmx_vcpu_put,
3098 .vcpu_decache = vmx_vcpu_decache,
3099 3215
3100 .set_guest_debug = set_guest_debug, 3216 .set_guest_debug = set_guest_debug,
3101 .guest_debug_pre = kvm_guest_debug_pre, 3217 .guest_debug_pre = kvm_guest_debug_pre,
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
index 79d94c610dfe..425a13436b3f 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -40,6 +40,7 @@
40#define CPU_BASED_CR8_LOAD_EXITING 0x00080000 40#define CPU_BASED_CR8_LOAD_EXITING 0x00080000
41#define CPU_BASED_CR8_STORE_EXITING 0x00100000 41#define CPU_BASED_CR8_STORE_EXITING 0x00100000
42#define CPU_BASED_TPR_SHADOW 0x00200000 42#define CPU_BASED_TPR_SHADOW 0x00200000
43#define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000
43#define CPU_BASED_MOV_DR_EXITING 0x00800000 44#define CPU_BASED_MOV_DR_EXITING 0x00800000
44#define CPU_BASED_UNCOND_IO_EXITING 0x01000000 45#define CPU_BASED_UNCOND_IO_EXITING 0x01000000
45#define CPU_BASED_USE_IO_BITMAPS 0x02000000 46#define CPU_BASED_USE_IO_BITMAPS 0x02000000
@@ -216,7 +217,7 @@ enum vmcs_field {
216#define EXIT_REASON_TRIPLE_FAULT 2 217#define EXIT_REASON_TRIPLE_FAULT 2
217 218
218#define EXIT_REASON_PENDING_INTERRUPT 7 219#define EXIT_REASON_PENDING_INTERRUPT 7
219 220#define EXIT_REASON_NMI_WINDOW 8
220#define EXIT_REASON_TASK_SWITCH 9 221#define EXIT_REASON_TASK_SWITCH 9
221#define EXIT_REASON_CPUID 10 222#define EXIT_REASON_CPUID 10
222#define EXIT_REASON_HLT 12 223#define EXIT_REASON_HLT 12
@@ -251,7 +252,9 @@ enum vmcs_field {
251#define INTR_INFO_VECTOR_MASK 0xff /* 7:0 */ 252#define INTR_INFO_VECTOR_MASK 0xff /* 7:0 */
252#define INTR_INFO_INTR_TYPE_MASK 0x700 /* 10:8 */ 253#define INTR_INFO_INTR_TYPE_MASK 0x700 /* 10:8 */
253#define INTR_INFO_DELIVER_CODE_MASK 0x800 /* 11 */ 254#define INTR_INFO_DELIVER_CODE_MASK 0x800 /* 11 */
255#define INTR_INFO_UNBLOCK_NMI 0x1000 /* 12 */
254#define INTR_INFO_VALID_MASK 0x80000000 /* 31 */ 256#define INTR_INFO_VALID_MASK 0x80000000 /* 31 */
257#define INTR_INFO_RESVD_BITS_MASK 0x7ffff000
255 258
256#define VECTORING_INFO_VECTOR_MASK INTR_INFO_VECTOR_MASK 259#define VECTORING_INFO_VECTOR_MASK INTR_INFO_VECTOR_MASK
257#define VECTORING_INFO_TYPE_MASK INTR_INFO_INTR_TYPE_MASK 260#define VECTORING_INFO_TYPE_MASK INTR_INFO_INTR_TYPE_MASK
@@ -259,9 +262,16 @@ enum vmcs_field {
259#define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK 262#define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK
260 263
261#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ 264#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */
265#define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */
262#define INTR_TYPE_EXCEPTION (3 << 8) /* processor exception */ 266#define INTR_TYPE_EXCEPTION (3 << 8) /* processor exception */
263#define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ 267#define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */
264 268
269/* GUEST_INTERRUPTIBILITY_INFO flags. */
270#define GUEST_INTR_STATE_STI 0x00000001
271#define GUEST_INTR_STATE_MOV_SS 0x00000002
272#define GUEST_INTR_STATE_SMI 0x00000004
273#define GUEST_INTR_STATE_NMI 0x00000008
274
265/* 275/*
266 * Exit Qualifications for MOV for Control Register Access 276 * Exit Qualifications for MOV for Control Register Access
267 */ 277 */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0faa2546b1cd..9f1cdb011cff 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -72,6 +72,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
72 { "mmio_exits", VCPU_STAT(mmio_exits) }, 72 { "mmio_exits", VCPU_STAT(mmio_exits) },
73 { "signal_exits", VCPU_STAT(signal_exits) }, 73 { "signal_exits", VCPU_STAT(signal_exits) },
74 { "irq_window", VCPU_STAT(irq_window_exits) }, 74 { "irq_window", VCPU_STAT(irq_window_exits) },
75 { "nmi_window", VCPU_STAT(nmi_window_exits) },
75 { "halt_exits", VCPU_STAT(halt_exits) }, 76 { "halt_exits", VCPU_STAT(halt_exits) },
76 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 77 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
77 { "hypercalls", VCPU_STAT(hypercalls) }, 78 { "hypercalls", VCPU_STAT(hypercalls) },
@@ -173,6 +174,12 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
173 kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); 174 kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
174} 175}
175 176
177void kvm_inject_nmi(struct kvm_vcpu *vcpu)
178{
179 vcpu->arch.nmi_pending = 1;
180}
181EXPORT_SYMBOL_GPL(kvm_inject_nmi);
182
176void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) 183void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
177{ 184{
178 WARN_ON(vcpu->arch.exception.pending); 185 WARN_ON(vcpu->arch.exception.pending);
@@ -604,6 +611,38 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
604 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); 611 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
605} 612}
606 613
614static bool msr_mtrr_valid(unsigned msr)
615{
616 switch (msr) {
617 case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1:
618 case MSR_MTRRfix64K_00000:
619 case MSR_MTRRfix16K_80000:
620 case MSR_MTRRfix16K_A0000:
621 case MSR_MTRRfix4K_C0000:
622 case MSR_MTRRfix4K_C8000:
623 case MSR_MTRRfix4K_D0000:
624 case MSR_MTRRfix4K_D8000:
625 case MSR_MTRRfix4K_E0000:
626 case MSR_MTRRfix4K_E8000:
627 case MSR_MTRRfix4K_F0000:
628 case MSR_MTRRfix4K_F8000:
629 case MSR_MTRRdefType:
630 case MSR_IA32_CR_PAT:
631 return true;
632 case 0x2f8:
633 return true;
634 }
635 return false;
636}
637
638static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
639{
640 if (!msr_mtrr_valid(msr))
641 return 1;
642
643 vcpu->arch.mtrr[msr - 0x200] = data;
644 return 0;
645}
607 646
608int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) 647int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
609{ 648{
@@ -625,8 +664,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
625 break; 664 break;
626 case MSR_IA32_UCODE_REV: 665 case MSR_IA32_UCODE_REV:
627 case MSR_IA32_UCODE_WRITE: 666 case MSR_IA32_UCODE_WRITE:
628 case 0x200 ... 0x2ff: /* MTRRs */
629 break; 667 break;
668 case 0x200 ... 0x2ff:
669 return set_msr_mtrr(vcpu, msr, data);
630 case MSR_IA32_APICBASE: 670 case MSR_IA32_APICBASE:
631 kvm_set_apic_base(vcpu, data); 671 kvm_set_apic_base(vcpu, data);
632 break; 672 break;
@@ -684,6 +724,15 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
684 return kvm_x86_ops->get_msr(vcpu, msr_index, pdata); 724 return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
685} 725}
686 726
727static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
728{
729 if (!msr_mtrr_valid(msr))
730 return 1;
731
732 *pdata = vcpu->arch.mtrr[msr - 0x200];
733 return 0;
734}
735
687int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 736int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
688{ 737{
689 u64 data; 738 u64 data;
@@ -705,11 +754,13 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
705 case MSR_IA32_MC0_MISC+16: 754 case MSR_IA32_MC0_MISC+16:
706 case MSR_IA32_UCODE_REV: 755 case MSR_IA32_UCODE_REV:
707 case MSR_IA32_EBL_CR_POWERON: 756 case MSR_IA32_EBL_CR_POWERON:
708 /* MTRR registers */
709 case 0xfe:
710 case 0x200 ... 0x2ff:
711 data = 0; 757 data = 0;
712 break; 758 break;
759 case MSR_MTRRcap:
760 data = 0x500 | KVM_NR_VAR_MTRR;
761 break;
762 case 0x200 ... 0x2ff:
763 return get_msr_mtrr(vcpu, msr, pdata);
713 case 0xcd: /* fsb frequency */ 764 case 0xcd: /* fsb frequency */
714 data = 3; 765 data = 3;
715 break; 766 break;
@@ -817,41 +868,6 @@ out:
817 return r; 868 return r;
818} 869}
819 870
820/*
821 * Make sure that a cpu that is being hot-unplugged does not have any vcpus
822 * cached on it.
823 */
824void decache_vcpus_on_cpu(int cpu)
825{
826 struct kvm *vm;
827 struct kvm_vcpu *vcpu;
828 int i;
829
830 spin_lock(&kvm_lock);
831 list_for_each_entry(vm, &vm_list, vm_list)
832 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
833 vcpu = vm->vcpus[i];
834 if (!vcpu)
835 continue;
836 /*
837 * If the vcpu is locked, then it is running on some
838 * other cpu and therefore it is not cached on the
839 * cpu in question.
840 *
841 * If it's not locked, check the last cpu it executed
842 * on.
843 */
844 if (mutex_trylock(&vcpu->mutex)) {
845 if (vcpu->cpu == cpu) {
846 kvm_x86_ops->vcpu_decache(vcpu);
847 vcpu->cpu = -1;
848 }
849 mutex_unlock(&vcpu->mutex);
850 }
851 }
852 spin_unlock(&kvm_lock);
853}
854
855int kvm_dev_ioctl_check_extension(long ext) 871int kvm_dev_ioctl_check_extension(long ext)
856{ 872{
857 int r; 873 int r;
@@ -869,6 +885,9 @@ int kvm_dev_ioctl_check_extension(long ext)
869 case KVM_CAP_MP_STATE: 885 case KVM_CAP_MP_STATE:
870 r = 1; 886 r = 1;
871 break; 887 break;
888 case KVM_CAP_COALESCED_MMIO:
889 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
890 break;
872 case KVM_CAP_VAPIC: 891 case KVM_CAP_VAPIC:
873 r = !kvm_x86_ops->cpu_has_accelerated_tpr(); 892 r = !kvm_x86_ops->cpu_has_accelerated_tpr();
874 break; 893 break;
@@ -1781,13 +1800,14 @@ static void kvm_init_msr_list(void)
1781 * Only apic need an MMIO device hook, so shortcut now.. 1800 * Only apic need an MMIO device hook, so shortcut now..
1782 */ 1801 */
1783static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu, 1802static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu,
1784 gpa_t addr) 1803 gpa_t addr, int len,
1804 int is_write)
1785{ 1805{
1786 struct kvm_io_device *dev; 1806 struct kvm_io_device *dev;
1787 1807
1788 if (vcpu->arch.apic) { 1808 if (vcpu->arch.apic) {
1789 dev = &vcpu->arch.apic->dev; 1809 dev = &vcpu->arch.apic->dev;
1790 if (dev->in_range(dev, addr)) 1810 if (dev->in_range(dev, addr, len, is_write))
1791 return dev; 1811 return dev;
1792 } 1812 }
1793 return NULL; 1813 return NULL;
@@ -1795,13 +1815,15 @@ static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu,
1795 1815
1796 1816
1797static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, 1817static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
1798 gpa_t addr) 1818 gpa_t addr, int len,
1819 int is_write)
1799{ 1820{
1800 struct kvm_io_device *dev; 1821 struct kvm_io_device *dev;
1801 1822
1802 dev = vcpu_find_pervcpu_dev(vcpu, addr); 1823 dev = vcpu_find_pervcpu_dev(vcpu, addr, len, is_write);
1803 if (dev == NULL) 1824 if (dev == NULL)
1804 dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr); 1825 dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len,
1826 is_write);
1805 return dev; 1827 return dev;
1806} 1828}
1807 1829
@@ -1869,7 +1891,7 @@ mmio:
1869 * Is this MMIO handled locally? 1891 * Is this MMIO handled locally?
1870 */ 1892 */
1871 mutex_lock(&vcpu->kvm->lock); 1893 mutex_lock(&vcpu->kvm->lock);
1872 mmio_dev = vcpu_find_mmio_dev(vcpu, gpa); 1894 mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0);
1873 if (mmio_dev) { 1895 if (mmio_dev) {
1874 kvm_iodevice_read(mmio_dev, gpa, bytes, val); 1896 kvm_iodevice_read(mmio_dev, gpa, bytes, val);
1875 mutex_unlock(&vcpu->kvm->lock); 1897 mutex_unlock(&vcpu->kvm->lock);
@@ -1924,7 +1946,7 @@ mmio:
1924 * Is this MMIO handled locally? 1946 * Is this MMIO handled locally?
1925 */ 1947 */
1926 mutex_lock(&vcpu->kvm->lock); 1948 mutex_lock(&vcpu->kvm->lock);
1927 mmio_dev = vcpu_find_mmio_dev(vcpu, gpa); 1949 mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 1);
1928 if (mmio_dev) { 1950 if (mmio_dev) {
1929 kvm_iodevice_write(mmio_dev, gpa, bytes, val); 1951 kvm_iodevice_write(mmio_dev, gpa, bytes, val);
1930 mutex_unlock(&vcpu->kvm->lock); 1952 mutex_unlock(&vcpu->kvm->lock);
@@ -2020,6 +2042,7 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
2020 2042
2021int emulate_clts(struct kvm_vcpu *vcpu) 2043int emulate_clts(struct kvm_vcpu *vcpu)
2022{ 2044{
2045 KVMTRACE_0D(CLTS, vcpu, handler);
2023 kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS); 2046 kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS);
2024 return X86EMUL_CONTINUE; 2047 return X86EMUL_CONTINUE;
2025} 2048}
@@ -2053,21 +2076,19 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
2053 2076
2054void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) 2077void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
2055{ 2078{
2056 static int reported;
2057 u8 opcodes[4]; 2079 u8 opcodes[4];
2058 unsigned long rip = vcpu->arch.rip; 2080 unsigned long rip = vcpu->arch.rip;
2059 unsigned long rip_linear; 2081 unsigned long rip_linear;
2060 2082
2061 rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); 2083 if (!printk_ratelimit())
2062
2063 if (reported)
2064 return; 2084 return;
2065 2085
2086 rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
2087
2066 emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu); 2088 emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu);
2067 2089
2068 printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", 2090 printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
2069 context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); 2091 context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
2070 reported = 1;
2071} 2092}
2072EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); 2093EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
2073 2094
@@ -2105,27 +2126,6 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
2105 ? X86EMUL_MODE_PROT64 : cs_db 2126 ? X86EMUL_MODE_PROT64 : cs_db
2106 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; 2127 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
2107 2128
2108 if (vcpu->arch.emulate_ctxt.mode == X86EMUL_MODE_PROT64) {
2109 vcpu->arch.emulate_ctxt.cs_base = 0;
2110 vcpu->arch.emulate_ctxt.ds_base = 0;
2111 vcpu->arch.emulate_ctxt.es_base = 0;
2112 vcpu->arch.emulate_ctxt.ss_base = 0;
2113 } else {
2114 vcpu->arch.emulate_ctxt.cs_base =
2115 get_segment_base(vcpu, VCPU_SREG_CS);
2116 vcpu->arch.emulate_ctxt.ds_base =
2117 get_segment_base(vcpu, VCPU_SREG_DS);
2118 vcpu->arch.emulate_ctxt.es_base =
2119 get_segment_base(vcpu, VCPU_SREG_ES);
2120 vcpu->arch.emulate_ctxt.ss_base =
2121 get_segment_base(vcpu, VCPU_SREG_SS);
2122 }
2123
2124 vcpu->arch.emulate_ctxt.gs_base =
2125 get_segment_base(vcpu, VCPU_SREG_GS);
2126 vcpu->arch.emulate_ctxt.fs_base =
2127 get_segment_base(vcpu, VCPU_SREG_FS);
2128
2129 r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); 2129 r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
2130 2130
2131 /* Reject the instructions other than VMCALL/VMMCALL when 2131 /* Reject the instructions other than VMCALL/VMMCALL when
@@ -2300,9 +2300,10 @@ static void pio_string_write(struct kvm_io_device *pio_dev,
2300} 2300}
2301 2301
2302static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu, 2302static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
2303 gpa_t addr) 2303 gpa_t addr, int len,
2304 int is_write)
2304{ 2305{
2305 return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr); 2306 return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr, len, is_write);
2306} 2307}
2307 2308
2308int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, 2309int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
@@ -2331,11 +2332,10 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2331 2332
2332 kvm_x86_ops->cache_regs(vcpu); 2333 kvm_x86_ops->cache_regs(vcpu);
2333 memcpy(vcpu->arch.pio_data, &vcpu->arch.regs[VCPU_REGS_RAX], 4); 2334 memcpy(vcpu->arch.pio_data, &vcpu->arch.regs[VCPU_REGS_RAX], 4);
2334 kvm_x86_ops->decache_regs(vcpu);
2335 2335
2336 kvm_x86_ops->skip_emulated_instruction(vcpu); 2336 kvm_x86_ops->skip_emulated_instruction(vcpu);
2337 2337
2338 pio_dev = vcpu_find_pio_dev(vcpu, port); 2338 pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
2339 if (pio_dev) { 2339 if (pio_dev) {
2340 kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data); 2340 kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
2341 complete_pio(vcpu); 2341 complete_pio(vcpu);
@@ -2417,7 +2417,9 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2417 } 2417 }
2418 } 2418 }
2419 2419
2420 pio_dev = vcpu_find_pio_dev(vcpu, port); 2420 pio_dev = vcpu_find_pio_dev(vcpu, port,
2421 vcpu->arch.pio.cur_count,
2422 !vcpu->arch.pio.in);
2421 if (!vcpu->arch.pio.in) { 2423 if (!vcpu->arch.pio.in) {
2422 /* string PIO write */ 2424 /* string PIO write */
2423 ret = pio_copy_data(vcpu); 2425 ret = pio_copy_data(vcpu);
@@ -2600,27 +2602,41 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
2600 2602
2601unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) 2603unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
2602{ 2604{
2605 unsigned long value;
2606
2603 kvm_x86_ops->decache_cr4_guest_bits(vcpu); 2607 kvm_x86_ops->decache_cr4_guest_bits(vcpu);
2604 switch (cr) { 2608 switch (cr) {
2605 case 0: 2609 case 0:
2606 return vcpu->arch.cr0; 2610 value = vcpu->arch.cr0;
2611 break;
2607 case 2: 2612 case 2:
2608 return vcpu->arch.cr2; 2613 value = vcpu->arch.cr2;
2614 break;
2609 case 3: 2615 case 3:
2610 return vcpu->arch.cr3; 2616 value = vcpu->arch.cr3;
2617 break;
2611 case 4: 2618 case 4:
2612 return vcpu->arch.cr4; 2619 value = vcpu->arch.cr4;
2620 break;
2613 case 8: 2621 case 8:
2614 return kvm_get_cr8(vcpu); 2622 value = kvm_get_cr8(vcpu);
2623 break;
2615 default: 2624 default:
2616 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); 2625 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
2617 return 0; 2626 return 0;
2618 } 2627 }
2628 KVMTRACE_3D(CR_READ, vcpu, (u32)cr, (u32)value,
2629 (u32)((u64)value >> 32), handler);
2630
2631 return value;
2619} 2632}
2620 2633
2621void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, 2634void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
2622 unsigned long *rflags) 2635 unsigned long *rflags)
2623{ 2636{
2637 KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, (u32)val,
2638 (u32)((u64)val >> 32), handler);
2639
2624 switch (cr) { 2640 switch (cr) {
2625 case 0: 2641 case 0:
2626 kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); 2642 kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
@@ -2771,8 +2787,10 @@ static void vapic_exit(struct kvm_vcpu *vcpu)
2771 if (!apic || !apic->vapic_addr) 2787 if (!apic || !apic->vapic_addr)
2772 return; 2788 return;
2773 2789
2790 down_read(&vcpu->kvm->slots_lock);
2774 kvm_release_page_dirty(apic->vapic_page); 2791 kvm_release_page_dirty(apic->vapic_page);
2775 mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); 2792 mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
2793 up_read(&vcpu->kvm->slots_lock);
2776} 2794}
2777 2795
2778static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2796static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
@@ -2928,9 +2946,7 @@ out:
2928 2946
2929 post_kvm_run_save(vcpu, kvm_run); 2947 post_kvm_run_save(vcpu, kvm_run);
2930 2948
2931 down_read(&vcpu->kvm->slots_lock);
2932 vapic_exit(vcpu); 2949 vapic_exit(vcpu);
2933 up_read(&vcpu->kvm->slots_lock);
2934 2950
2935 return r; 2951 return r;
2936} 2952}
@@ -2942,15 +2958,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2942 2958
2943 vcpu_load(vcpu); 2959 vcpu_load(vcpu);
2944 2960
2961 if (vcpu->sigset_active)
2962 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2963
2945 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { 2964 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
2946 kvm_vcpu_block(vcpu); 2965 kvm_vcpu_block(vcpu);
2947 vcpu_put(vcpu); 2966 r = -EAGAIN;
2948 return -EAGAIN; 2967 goto out;
2949 } 2968 }
2950 2969
2951 if (vcpu->sigset_active)
2952 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2953
2954 /* re-sync apic's tpr */ 2970 /* re-sync apic's tpr */
2955 if (!irqchip_in_kernel(vcpu->kvm)) 2971 if (!irqchip_in_kernel(vcpu->kvm))
2956 kvm_set_cr8(vcpu, kvm_run->cr8); 2972 kvm_set_cr8(vcpu, kvm_run->cr8);
@@ -3070,8 +3086,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3070 return 0; 3086 return 0;
3071} 3087}
3072 3088
3073static void get_segment(struct kvm_vcpu *vcpu, 3089void kvm_get_segment(struct kvm_vcpu *vcpu,
3074 struct kvm_segment *var, int seg) 3090 struct kvm_segment *var, int seg)
3075{ 3091{
3076 kvm_x86_ops->get_segment(vcpu, var, seg); 3092 kvm_x86_ops->get_segment(vcpu, var, seg);
3077} 3093}
@@ -3080,7 +3096,7 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
3080{ 3096{
3081 struct kvm_segment cs; 3097 struct kvm_segment cs;
3082 3098
3083 get_segment(vcpu, &cs, VCPU_SREG_CS); 3099 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
3084 *db = cs.db; 3100 *db = cs.db;
3085 *l = cs.l; 3101 *l = cs.l;
3086} 3102}
@@ -3094,15 +3110,15 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3094 3110
3095 vcpu_load(vcpu); 3111 vcpu_load(vcpu);
3096 3112
3097 get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); 3113 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
3098 get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); 3114 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
3099 get_segment(vcpu, &sregs->es, VCPU_SREG_ES); 3115 kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
3100 get_segment(vcpu, &sregs->fs, VCPU_SREG_FS); 3116 kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
3101 get_segment(vcpu, &sregs->gs, VCPU_SREG_GS); 3117 kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
3102 get_segment(vcpu, &sregs->ss, VCPU_SREG_SS); 3118 kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
3103 3119
3104 get_segment(vcpu, &sregs->tr, VCPU_SREG_TR); 3120 kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
3105 get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); 3121 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
3106 3122
3107 kvm_x86_ops->get_idt(vcpu, &dt); 3123 kvm_x86_ops->get_idt(vcpu, &dt);
3108 sregs->idt.limit = dt.limit; 3124 sregs->idt.limit = dt.limit;
@@ -3154,7 +3170,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3154 return 0; 3170 return 0;
3155} 3171}
3156 3172
3157static void set_segment(struct kvm_vcpu *vcpu, 3173static void kvm_set_segment(struct kvm_vcpu *vcpu,
3158 struct kvm_segment *var, int seg) 3174 struct kvm_segment *var, int seg)
3159{ 3175{
3160 kvm_x86_ops->set_segment(vcpu, var, seg); 3176 kvm_x86_ops->set_segment(vcpu, var, seg);
@@ -3191,7 +3207,7 @@ static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu,
3191 if (selector & 1 << 2) { 3207 if (selector & 1 << 2) {
3192 struct kvm_segment kvm_seg; 3208 struct kvm_segment kvm_seg;
3193 3209
3194 get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR); 3210 kvm_get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR);
3195 3211
3196 if (kvm_seg.unusable) 3212 if (kvm_seg.unusable)
3197 dtable->limit = 0; 3213 dtable->limit = 0;
@@ -3297,7 +3313,7 @@ static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
3297{ 3313{
3298 struct kvm_segment kvm_seg; 3314 struct kvm_segment kvm_seg;
3299 3315
3300 get_segment(vcpu, &kvm_seg, seg); 3316 kvm_get_segment(vcpu, &kvm_seg, seg);
3301 return kvm_seg.selector; 3317 return kvm_seg.selector;
3302} 3318}
3303 3319
@@ -3313,8 +3329,8 @@ static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu,
3313 return 0; 3329 return 0;
3314} 3330}
3315 3331
3316static int load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, 3332int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3317 int type_bits, int seg) 3333 int type_bits, int seg)
3318{ 3334{
3319 struct kvm_segment kvm_seg; 3335 struct kvm_segment kvm_seg;
3320 3336
@@ -3327,7 +3343,7 @@ static int load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3327 if (!kvm_seg.s) 3343 if (!kvm_seg.s)
3328 kvm_seg.unusable = 1; 3344 kvm_seg.unusable = 1;
3329 3345
3330 set_segment(vcpu, &kvm_seg, seg); 3346 kvm_set_segment(vcpu, &kvm_seg, seg);
3331 return 0; 3347 return 0;
3332} 3348}
3333 3349
@@ -3373,25 +3389,25 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu,
3373 vcpu->arch.regs[VCPU_REGS_RSI] = tss->esi; 3389 vcpu->arch.regs[VCPU_REGS_RSI] = tss->esi;
3374 vcpu->arch.regs[VCPU_REGS_RDI] = tss->edi; 3390 vcpu->arch.regs[VCPU_REGS_RDI] = tss->edi;
3375 3391
3376 if (load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR)) 3392 if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR))
3377 return 1; 3393 return 1;
3378 3394
3379 if (load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) 3395 if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
3380 return 1; 3396 return 1;
3381 3397
3382 if (load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) 3398 if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
3383 return 1; 3399 return 1;
3384 3400
3385 if (load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) 3401 if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
3386 return 1; 3402 return 1;
3387 3403
3388 if (load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) 3404 if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
3389 return 1; 3405 return 1;
3390 3406
3391 if (load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS)) 3407 if (kvm_load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS))
3392 return 1; 3408 return 1;
3393 3409
3394 if (load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS)) 3410 if (kvm_load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS))
3395 return 1; 3411 return 1;
3396 return 0; 3412 return 0;
3397} 3413}
@@ -3432,24 +3448,24 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu,
3432 vcpu->arch.regs[VCPU_REGS_RSI] = tss->si; 3448 vcpu->arch.regs[VCPU_REGS_RSI] = tss->si;
3433 vcpu->arch.regs[VCPU_REGS_RDI] = tss->di; 3449 vcpu->arch.regs[VCPU_REGS_RDI] = tss->di;
3434 3450
3435 if (load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR)) 3451 if (kvm_load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR))
3436 return 1; 3452 return 1;
3437 3453
3438 if (load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) 3454 if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
3439 return 1; 3455 return 1;
3440 3456
3441 if (load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) 3457 if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
3442 return 1; 3458 return 1;
3443 3459
3444 if (load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) 3460 if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
3445 return 1; 3461 return 1;
3446 3462
3447 if (load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) 3463 if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
3448 return 1; 3464 return 1;
3449 return 0; 3465 return 0;
3450} 3466}
3451 3467
3452int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, 3468static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
3453 struct desc_struct *cseg_desc, 3469 struct desc_struct *cseg_desc,
3454 struct desc_struct *nseg_desc) 3470 struct desc_struct *nseg_desc)
3455{ 3471{
@@ -3472,7 +3488,7 @@ out:
3472 return ret; 3488 return ret;
3473} 3489}
3474 3490
3475int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, 3491static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
3476 struct desc_struct *cseg_desc, 3492 struct desc_struct *cseg_desc,
3477 struct desc_struct *nseg_desc) 3493 struct desc_struct *nseg_desc)
3478{ 3494{
@@ -3502,7 +3518,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
3502 struct desc_struct nseg_desc; 3518 struct desc_struct nseg_desc;
3503 int ret = 0; 3519 int ret = 0;
3504 3520
3505 get_segment(vcpu, &tr_seg, VCPU_SREG_TR); 3521 kvm_get_segment(vcpu, &tr_seg, VCPU_SREG_TR);
3506 3522
3507 if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc)) 3523 if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc))
3508 goto out; 3524 goto out;
@@ -3561,7 +3577,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
3561 kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS); 3577 kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS);
3562 seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); 3578 seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg);
3563 tr_seg.type = 11; 3579 tr_seg.type = 11;
3564 set_segment(vcpu, &tr_seg, VCPU_SREG_TR); 3580 kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR);
3565out: 3581out:
3566 kvm_x86_ops->decache_regs(vcpu); 3582 kvm_x86_ops->decache_regs(vcpu);
3567 return ret; 3583 return ret;
@@ -3628,15 +3644,15 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3628 } 3644 }
3629 } 3645 }
3630 3646
3631 set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); 3647 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
3632 set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); 3648 kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
3633 set_segment(vcpu, &sregs->es, VCPU_SREG_ES); 3649 kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
3634 set_segment(vcpu, &sregs->fs, VCPU_SREG_FS); 3650 kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
3635 set_segment(vcpu, &sregs->gs, VCPU_SREG_GS); 3651 kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
3636 set_segment(vcpu, &sregs->ss, VCPU_SREG_SS); 3652 kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
3637 3653
3638 set_segment(vcpu, &sregs->tr, VCPU_SREG_TR); 3654 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
3639 set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); 3655 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
3640 3656
3641 vcpu_put(vcpu); 3657 vcpu_put(vcpu);
3642 3658
@@ -3751,14 +3767,14 @@ void fx_init(struct kvm_vcpu *vcpu)
3751 * allocate ram with GFP_KERNEL. 3767 * allocate ram with GFP_KERNEL.
3752 */ 3768 */
3753 if (!used_math()) 3769 if (!used_math())
3754 fx_save(&vcpu->arch.host_fx_image); 3770 kvm_fx_save(&vcpu->arch.host_fx_image);
3755 3771
3756 /* Initialize guest FPU by resetting ours and saving into guest's */ 3772 /* Initialize guest FPU by resetting ours and saving into guest's */
3757 preempt_disable(); 3773 preempt_disable();
3758 fx_save(&vcpu->arch.host_fx_image); 3774 kvm_fx_save(&vcpu->arch.host_fx_image);
3759 fx_finit(); 3775 kvm_fx_finit();
3760 fx_save(&vcpu->arch.guest_fx_image); 3776 kvm_fx_save(&vcpu->arch.guest_fx_image);
3761 fx_restore(&vcpu->arch.host_fx_image); 3777 kvm_fx_restore(&vcpu->arch.host_fx_image);
3762 preempt_enable(); 3778 preempt_enable();
3763 3779
3764 vcpu->arch.cr0 |= X86_CR0_ET; 3780 vcpu->arch.cr0 |= X86_CR0_ET;
@@ -3775,8 +3791,8 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
3775 return; 3791 return;
3776 3792
3777 vcpu->guest_fpu_loaded = 1; 3793 vcpu->guest_fpu_loaded = 1;
3778 fx_save(&vcpu->arch.host_fx_image); 3794 kvm_fx_save(&vcpu->arch.host_fx_image);
3779 fx_restore(&vcpu->arch.guest_fx_image); 3795 kvm_fx_restore(&vcpu->arch.guest_fx_image);
3780} 3796}
3781EXPORT_SYMBOL_GPL(kvm_load_guest_fpu); 3797EXPORT_SYMBOL_GPL(kvm_load_guest_fpu);
3782 3798
@@ -3786,8 +3802,8 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
3786 return; 3802 return;
3787 3803
3788 vcpu->guest_fpu_loaded = 0; 3804 vcpu->guest_fpu_loaded = 0;
3789 fx_save(&vcpu->arch.guest_fx_image); 3805 kvm_fx_save(&vcpu->arch.guest_fx_image);
3790 fx_restore(&vcpu->arch.host_fx_image); 3806 kvm_fx_restore(&vcpu->arch.host_fx_image);
3791 ++vcpu->stat.fpu_reload; 3807 ++vcpu->stat.fpu_reload;
3792} 3808}
3793EXPORT_SYMBOL_GPL(kvm_put_guest_fpu); 3809EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
@@ -4016,6 +4032,11 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
4016 return 0; 4032 return 0;
4017} 4033}
4018 4034
4035void kvm_arch_flush_shadow(struct kvm *kvm)
4036{
4037 kvm_mmu_zap_all(kvm);
4038}
4039
4019int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 4040int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
4020{ 4041{
4021 return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE 4042 return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 932f216d890c..f2f90468f8b1 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -121,7 +121,7 @@ static u16 opcode_table[256] = {
121 0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ , 121 0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
122 0, 0, 0, 0, 122 0, 0, 0, 0,
123 /* 0x68 - 0x6F */ 123 /* 0x68 - 0x6F */
124 0, 0, ImplicitOps | Mov | Stack, 0, 124 SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0,
125 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */ 125 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */
126 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */ 126 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */
127 /* 0x70 - 0x77 */ 127 /* 0x70 - 0x77 */
@@ -138,9 +138,11 @@ static u16 opcode_table[256] = {
138 /* 0x88 - 0x8F */ 138 /* 0x88 - 0x8F */
139 ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, 139 ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,
140 ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, 140 ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
141 0, ModRM | DstReg, 0, Group | Group1A, 141 DstMem | SrcReg | ModRM | Mov, ModRM | DstReg,
142 /* 0x90 - 0x9F */ 142 DstReg | SrcMem | ModRM | Mov, Group | Group1A,
143 0, 0, 0, 0, 0, 0, 0, 0, 143 /* 0x90 - 0x97 */
144 DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
145 /* 0x98 - 0x9F */
144 0, 0, 0, 0, ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, 146 0, 0, 0, 0, ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
145 /* 0xA0 - 0xA7 */ 147 /* 0xA0 - 0xA7 */
146 ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, 148 ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs,
@@ -152,7 +154,8 @@ static u16 opcode_table[256] = {
152 ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, 154 ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String,
153 ByteOp | ImplicitOps | String, ImplicitOps | String, 155 ByteOp | ImplicitOps | String, ImplicitOps | String,
154 /* 0xB0 - 0xBF */ 156 /* 0xB0 - 0xBF */
155 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 157 0, 0, 0, 0, 0, 0, 0, 0,
158 DstReg | SrcImm | Mov, 0, 0, 0, 0, 0, 0, 0,
156 /* 0xC0 - 0xC7 */ 159 /* 0xC0 - 0xC7 */
157 ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM, 160 ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM,
158 0, ImplicitOps | Stack, 0, 0, 161 0, ImplicitOps | Stack, 0, 0,
@@ -168,7 +171,8 @@ static u16 opcode_table[256] = {
168 /* 0xE0 - 0xE7 */ 171 /* 0xE0 - 0xE7 */
169 0, 0, 0, 0, 0, 0, 0, 0, 172 0, 0, 0, 0, 0, 0, 0, 0,
170 /* 0xE8 - 0xEF */ 173 /* 0xE8 - 0xEF */
171 ImplicitOps | Stack, SrcImm|ImplicitOps, 0, SrcImmByte|ImplicitOps, 174 ImplicitOps | Stack, SrcImm | ImplicitOps,
175 ImplicitOps, SrcImmByte | ImplicitOps,
172 0, 0, 0, 0, 176 0, 0, 0, 0,
173 /* 0xF0 - 0xF7 */ 177 /* 0xF0 - 0xF7 */
174 0, 0, 0, 0, 178 0, 0, 0, 0,
@@ -215,7 +219,7 @@ static u16 twobyte_table[256] = {
215 /* 0xA0 - 0xA7 */ 219 /* 0xA0 - 0xA7 */
216 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0, 220 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0,
217 /* 0xA8 - 0xAF */ 221 /* 0xA8 - 0xAF */
218 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0, 222 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, ModRM, 0,
219 /* 0xB0 - 0xB7 */ 223 /* 0xB0 - 0xB7 */
220 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0, 224 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0,
221 DstMem | SrcReg | ModRM | BitOp, 225 DstMem | SrcReg | ModRM | BitOp,
@@ -518,6 +522,39 @@ static inline void jmp_rel(struct decode_cache *c, int rel)
518 register_address_increment(c, &c->eip, rel); 522 register_address_increment(c, &c->eip, rel);
519} 523}
520 524
525static void set_seg_override(struct decode_cache *c, int seg)
526{
527 c->has_seg_override = true;
528 c->seg_override = seg;
529}
530
531static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
532{
533 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
534 return 0;
535
536 return kvm_x86_ops->get_segment_base(ctxt->vcpu, seg);
537}
538
539static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt,
540 struct decode_cache *c)
541{
542 if (!c->has_seg_override)
543 return 0;
544
545 return seg_base(ctxt, c->seg_override);
546}
547
548static unsigned long es_base(struct x86_emulate_ctxt *ctxt)
549{
550 return seg_base(ctxt, VCPU_SREG_ES);
551}
552
553static unsigned long ss_base(struct x86_emulate_ctxt *ctxt)
554{
555 return seg_base(ctxt, VCPU_SREG_SS);
556}
557
521static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, 558static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
522 struct x86_emulate_ops *ops, 559 struct x86_emulate_ops *ops,
523 unsigned long linear, u8 *dest) 560 unsigned long linear, u8 *dest)
@@ -660,7 +697,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
660{ 697{
661 struct decode_cache *c = &ctxt->decode; 698 struct decode_cache *c = &ctxt->decode;
662 u8 sib; 699 u8 sib;
663 int index_reg = 0, base_reg = 0, scale, rip_relative = 0; 700 int index_reg = 0, base_reg = 0, scale;
664 int rc = 0; 701 int rc = 0;
665 702
666 if (c->rex_prefix) { 703 if (c->rex_prefix) {
@@ -731,47 +768,28 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
731 } 768 }
732 if (c->modrm_rm == 2 || c->modrm_rm == 3 || 769 if (c->modrm_rm == 2 || c->modrm_rm == 3 ||
733 (c->modrm_rm == 6 && c->modrm_mod != 0)) 770 (c->modrm_rm == 6 && c->modrm_mod != 0))
734 if (!c->override_base) 771 if (!c->has_seg_override)
735 c->override_base = &ctxt->ss_base; 772 set_seg_override(c, VCPU_SREG_SS);
736 c->modrm_ea = (u16)c->modrm_ea; 773 c->modrm_ea = (u16)c->modrm_ea;
737 } else { 774 } else {
738 /* 32/64-bit ModR/M decode. */ 775 /* 32/64-bit ModR/M decode. */
739 switch (c->modrm_rm) { 776 if ((c->modrm_rm & 7) == 4) {
740 case 4:
741 case 12:
742 sib = insn_fetch(u8, 1, c->eip); 777 sib = insn_fetch(u8, 1, c->eip);
743 index_reg |= (sib >> 3) & 7; 778 index_reg |= (sib >> 3) & 7;
744 base_reg |= sib & 7; 779 base_reg |= sib & 7;
745 scale = sib >> 6; 780 scale = sib >> 6;
746 781
747 switch (base_reg) { 782 if ((base_reg & 7) == 5 && c->modrm_mod == 0)
748 case 5: 783 c->modrm_ea += insn_fetch(s32, 4, c->eip);
749 if (c->modrm_mod != 0) 784 else
750 c->modrm_ea += c->regs[base_reg];
751 else
752 c->modrm_ea +=
753 insn_fetch(s32, 4, c->eip);
754 break;
755 default:
756 c->modrm_ea += c->regs[base_reg]; 785 c->modrm_ea += c->regs[base_reg];
757 } 786 if (index_reg != 4)
758 switch (index_reg) {
759 case 4:
760 break;
761 default:
762 c->modrm_ea += c->regs[index_reg] << scale; 787 c->modrm_ea += c->regs[index_reg] << scale;
763 } 788 } else if ((c->modrm_rm & 7) == 5 && c->modrm_mod == 0) {
764 break; 789 if (ctxt->mode == X86EMUL_MODE_PROT64)
765 case 5: 790 c->rip_relative = 1;
766 if (c->modrm_mod != 0) 791 } else
767 c->modrm_ea += c->regs[c->modrm_rm];
768 else if (ctxt->mode == X86EMUL_MODE_PROT64)
769 rip_relative = 1;
770 break;
771 default:
772 c->modrm_ea += c->regs[c->modrm_rm]; 792 c->modrm_ea += c->regs[c->modrm_rm];
773 break;
774 }
775 switch (c->modrm_mod) { 793 switch (c->modrm_mod) {
776 case 0: 794 case 0:
777 if (c->modrm_rm == 5) 795 if (c->modrm_rm == 5)
@@ -785,22 +803,6 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
785 break; 803 break;
786 } 804 }
787 } 805 }
788 if (rip_relative) {
789 c->modrm_ea += c->eip;
790 switch (c->d & SrcMask) {
791 case SrcImmByte:
792 c->modrm_ea += 1;
793 break;
794 case SrcImm:
795 if (c->d & ByteOp)
796 c->modrm_ea += 1;
797 else
798 if (c->op_bytes == 8)
799 c->modrm_ea += 4;
800 else
801 c->modrm_ea += c->op_bytes;
802 }
803 }
804done: 806done:
805 return rc; 807 return rc;
806} 808}
@@ -838,6 +840,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
838 840
839 memset(c, 0, sizeof(struct decode_cache)); 841 memset(c, 0, sizeof(struct decode_cache));
840 c->eip = ctxt->vcpu->arch.rip; 842 c->eip = ctxt->vcpu->arch.rip;
843 ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS);
841 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); 844 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
842 845
843 switch (mode) { 846 switch (mode) {
@@ -876,23 +879,15 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
876 /* switch between 2/4 bytes */ 879 /* switch between 2/4 bytes */
877 c->ad_bytes = def_ad_bytes ^ 6; 880 c->ad_bytes = def_ad_bytes ^ 6;
878 break; 881 break;
882 case 0x26: /* ES override */
879 case 0x2e: /* CS override */ 883 case 0x2e: /* CS override */
880 c->override_base = &ctxt->cs_base; 884 case 0x36: /* SS override */
881 break;
882 case 0x3e: /* DS override */ 885 case 0x3e: /* DS override */
883 c->override_base = &ctxt->ds_base; 886 set_seg_override(c, (c->b >> 3) & 3);
884 break;
885 case 0x26: /* ES override */
886 c->override_base = &ctxt->es_base;
887 break; 887 break;
888 case 0x64: /* FS override */ 888 case 0x64: /* FS override */
889 c->override_base = &ctxt->fs_base;
890 break;
891 case 0x65: /* GS override */ 889 case 0x65: /* GS override */
892 c->override_base = &ctxt->gs_base; 890 set_seg_override(c, c->b & 7);
893 break;
894 case 0x36: /* SS override */
895 c->override_base = &ctxt->ss_base;
896 break; 891 break;
897 case 0x40 ... 0x4f: /* REX */ 892 case 0x40 ... 0x4f: /* REX */
898 if (mode != X86EMUL_MODE_PROT64) 893 if (mode != X86EMUL_MODE_PROT64)
@@ -964,15 +959,11 @@ done_prefixes:
964 if (rc) 959 if (rc)
965 goto done; 960 goto done;
966 961
967 if (!c->override_base) 962 if (!c->has_seg_override)
968 c->override_base = &ctxt->ds_base; 963 set_seg_override(c, VCPU_SREG_DS);
969 if (mode == X86EMUL_MODE_PROT64 &&
970 c->override_base != &ctxt->fs_base &&
971 c->override_base != &ctxt->gs_base)
972 c->override_base = NULL;
973 964
974 if (c->override_base) 965 if (!(!c->twobyte && c->b == 0x8d))
975 c->modrm_ea += *c->override_base; 966 c->modrm_ea += seg_override_base(ctxt, c);
976 967
977 if (c->ad_bytes != 8) 968 if (c->ad_bytes != 8)
978 c->modrm_ea = (u32)c->modrm_ea; 969 c->modrm_ea = (u32)c->modrm_ea;
@@ -1049,6 +1040,7 @@ done_prefixes:
1049 break; 1040 break;
1050 case DstMem: 1041 case DstMem:
1051 if ((c->d & ModRM) && c->modrm_mod == 3) { 1042 if ((c->d & ModRM) && c->modrm_mod == 3) {
1043 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1052 c->dst.type = OP_REG; 1044 c->dst.type = OP_REG;
1053 c->dst.val = c->dst.orig_val = c->modrm_val; 1045 c->dst.val = c->dst.orig_val = c->modrm_val;
1054 c->dst.ptr = c->modrm_ptr; 1046 c->dst.ptr = c->modrm_ptr;
@@ -1058,6 +1050,9 @@ done_prefixes:
1058 break; 1050 break;
1059 } 1051 }
1060 1052
1053 if (c->rip_relative)
1054 c->modrm_ea += c->eip;
1055
1061done: 1056done:
1062 return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; 1057 return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
1063} 1058}
@@ -1070,7 +1065,7 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
1070 c->dst.bytes = c->op_bytes; 1065 c->dst.bytes = c->op_bytes;
1071 c->dst.val = c->src.val; 1066 c->dst.val = c->src.val;
1072 register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); 1067 register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes);
1073 c->dst.ptr = (void *) register_address(c, ctxt->ss_base, 1068 c->dst.ptr = (void *) register_address(c, ss_base(ctxt),
1074 c->regs[VCPU_REGS_RSP]); 1069 c->regs[VCPU_REGS_RSP]);
1075} 1070}
1076 1071
@@ -1080,7 +1075,7 @@ static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
1080 struct decode_cache *c = &ctxt->decode; 1075 struct decode_cache *c = &ctxt->decode;
1081 int rc; 1076 int rc;
1082 1077
1083 rc = ops->read_std(register_address(c, ctxt->ss_base, 1078 rc = ops->read_std(register_address(c, ss_base(ctxt),
1084 c->regs[VCPU_REGS_RSP]), 1079 c->regs[VCPU_REGS_RSP]),
1085 &c->dst.val, c->dst.bytes, ctxt->vcpu); 1080 &c->dst.val, c->dst.bytes, ctxt->vcpu);
1086 if (rc != 0) 1081 if (rc != 0)
@@ -1402,11 +1397,11 @@ special_insn:
1402 register_address_increment(c, &c->regs[VCPU_REGS_RSP], 1397 register_address_increment(c, &c->regs[VCPU_REGS_RSP],
1403 -c->op_bytes); 1398 -c->op_bytes);
1404 c->dst.ptr = (void *) register_address( 1399 c->dst.ptr = (void *) register_address(
1405 c, ctxt->ss_base, c->regs[VCPU_REGS_RSP]); 1400 c, ss_base(ctxt), c->regs[VCPU_REGS_RSP]);
1406 break; 1401 break;
1407 case 0x58 ... 0x5f: /* pop reg */ 1402 case 0x58 ... 0x5f: /* pop reg */
1408 pop_instruction: 1403 pop_instruction:
1409 if ((rc = ops->read_std(register_address(c, ctxt->ss_base, 1404 if ((rc = ops->read_std(register_address(c, ss_base(ctxt),
1410 c->regs[VCPU_REGS_RSP]), c->dst.ptr, 1405 c->regs[VCPU_REGS_RSP]), c->dst.ptr,
1411 c->op_bytes, ctxt->vcpu)) != 0) 1406 c->op_bytes, ctxt->vcpu)) != 0)
1412 goto done; 1407 goto done;
@@ -1420,9 +1415,8 @@ special_insn:
1420 goto cannot_emulate; 1415 goto cannot_emulate;
1421 c->dst.val = (s32) c->src.val; 1416 c->dst.val = (s32) c->src.val;
1422 break; 1417 break;
1418 case 0x68: /* push imm */
1423 case 0x6a: /* push imm8 */ 1419 case 0x6a: /* push imm8 */
1424 c->src.val = 0L;
1425 c->src.val = insn_fetch(s8, 1, c->eip);
1426 emulate_push(ctxt); 1420 emulate_push(ctxt);
1427 break; 1421 break;
1428 case 0x6c: /* insb */ 1422 case 0x6c: /* insb */
@@ -1433,7 +1427,7 @@ special_insn:
1433 c->rep_prefix ? 1427 c->rep_prefix ?
1434 address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, 1428 address_mask(c, c->regs[VCPU_REGS_RCX]) : 1,
1435 (ctxt->eflags & EFLG_DF), 1429 (ctxt->eflags & EFLG_DF),
1436 register_address(c, ctxt->es_base, 1430 register_address(c, es_base(ctxt),
1437 c->regs[VCPU_REGS_RDI]), 1431 c->regs[VCPU_REGS_RDI]),
1438 c->rep_prefix, 1432 c->rep_prefix,
1439 c->regs[VCPU_REGS_RDX]) == 0) { 1433 c->regs[VCPU_REGS_RDX]) == 0) {
@@ -1449,9 +1443,8 @@ special_insn:
1449 c->rep_prefix ? 1443 c->rep_prefix ?
1450 address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, 1444 address_mask(c, c->regs[VCPU_REGS_RCX]) : 1,
1451 (ctxt->eflags & EFLG_DF), 1445 (ctxt->eflags & EFLG_DF),
1452 register_address(c, c->override_base ? 1446 register_address(c,
1453 *c->override_base : 1447 seg_override_base(ctxt, c),
1454 ctxt->ds_base,
1455 c->regs[VCPU_REGS_RSI]), 1448 c->regs[VCPU_REGS_RSI]),
1456 c->rep_prefix, 1449 c->rep_prefix,
1457 c->regs[VCPU_REGS_RDX]) == 0) { 1450 c->regs[VCPU_REGS_RDX]) == 0) {
@@ -1490,6 +1483,7 @@ special_insn:
1490 emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags); 1483 emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
1491 break; 1484 break;
1492 case 0x86 ... 0x87: /* xchg */ 1485 case 0x86 ... 0x87: /* xchg */
1486 xchg:
1493 /* Write back the register source. */ 1487 /* Write back the register source. */
1494 switch (c->dst.bytes) { 1488 switch (c->dst.bytes) {
1495 case 1: 1489 case 1:
@@ -1514,14 +1508,60 @@ special_insn:
1514 break; 1508 break;
1515 case 0x88 ... 0x8b: /* mov */ 1509 case 0x88 ... 0x8b: /* mov */
1516 goto mov; 1510 goto mov;
1511 case 0x8c: { /* mov r/m, sreg */
1512 struct kvm_segment segreg;
1513
1514 if (c->modrm_reg <= 5)
1515 kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg);
1516 else {
1517 printk(KERN_INFO "0x8c: Invalid segreg in modrm byte 0x%02x\n",
1518 c->modrm);
1519 goto cannot_emulate;
1520 }
1521 c->dst.val = segreg.selector;
1522 break;
1523 }
1517 case 0x8d: /* lea r16/r32, m */ 1524 case 0x8d: /* lea r16/r32, m */
1518 c->dst.val = c->modrm_ea; 1525 c->dst.val = c->modrm_ea;
1519 break; 1526 break;
1527 case 0x8e: { /* mov seg, r/m16 */
1528 uint16_t sel;
1529 int type_bits;
1530 int err;
1531
1532 sel = c->src.val;
1533 if (c->modrm_reg <= 5) {
1534 type_bits = (c->modrm_reg == 1) ? 9 : 1;
1535 err = kvm_load_segment_descriptor(ctxt->vcpu, sel,
1536 type_bits, c->modrm_reg);
1537 } else {
1538 printk(KERN_INFO "Invalid segreg in modrm byte 0x%02x\n",
1539 c->modrm);
1540 goto cannot_emulate;
1541 }
1542
1543 if (err < 0)
1544 goto cannot_emulate;
1545
1546 c->dst.type = OP_NONE; /* Disable writeback. */
1547 break;
1548 }
1520 case 0x8f: /* pop (sole member of Grp1a) */ 1549 case 0x8f: /* pop (sole member of Grp1a) */
1521 rc = emulate_grp1a(ctxt, ops); 1550 rc = emulate_grp1a(ctxt, ops);
1522 if (rc != 0) 1551 if (rc != 0)
1523 goto done; 1552 goto done;
1524 break; 1553 break;
1554 case 0x90: /* nop / xchg r8,rax */
1555 if (!(c->rex_prefix & 1)) { /* nop */
1556 c->dst.type = OP_NONE;
1557 break;
1558 }
1559 case 0x91 ... 0x97: /* xchg reg,rax */
1560 c->src.type = c->dst.type = OP_REG;
1561 c->src.bytes = c->dst.bytes = c->op_bytes;
1562 c->src.ptr = (unsigned long *) &c->regs[VCPU_REGS_RAX];
1563 c->src.val = *(c->src.ptr);
1564 goto xchg;
1525 case 0x9c: /* pushf */ 1565 case 0x9c: /* pushf */
1526 c->src.val = (unsigned long) ctxt->eflags; 1566 c->src.val = (unsigned long) ctxt->eflags;
1527 emulate_push(ctxt); 1567 emulate_push(ctxt);
@@ -1540,11 +1580,10 @@ special_insn:
1540 c->dst.type = OP_MEM; 1580 c->dst.type = OP_MEM;
1541 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 1581 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1542 c->dst.ptr = (unsigned long *)register_address(c, 1582 c->dst.ptr = (unsigned long *)register_address(c,
1543 ctxt->es_base, 1583 es_base(ctxt),
1544 c->regs[VCPU_REGS_RDI]); 1584 c->regs[VCPU_REGS_RDI]);
1545 if ((rc = ops->read_emulated(register_address(c, 1585 if ((rc = ops->read_emulated(register_address(c,
1546 c->override_base ? *c->override_base : 1586 seg_override_base(ctxt, c),
1547 ctxt->ds_base,
1548 c->regs[VCPU_REGS_RSI]), 1587 c->regs[VCPU_REGS_RSI]),
1549 &c->dst.val, 1588 &c->dst.val,
1550 c->dst.bytes, ctxt->vcpu)) != 0) 1589 c->dst.bytes, ctxt->vcpu)) != 0)
@@ -1560,8 +1599,7 @@ special_insn:
1560 c->src.type = OP_NONE; /* Disable writeback. */ 1599 c->src.type = OP_NONE; /* Disable writeback. */
1561 c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 1600 c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1562 c->src.ptr = (unsigned long *)register_address(c, 1601 c->src.ptr = (unsigned long *)register_address(c,
1563 c->override_base ? *c->override_base : 1602 seg_override_base(ctxt, c),
1564 ctxt->ds_base,
1565 c->regs[VCPU_REGS_RSI]); 1603 c->regs[VCPU_REGS_RSI]);
1566 if ((rc = ops->read_emulated((unsigned long)c->src.ptr, 1604 if ((rc = ops->read_emulated((unsigned long)c->src.ptr,
1567 &c->src.val, 1605 &c->src.val,
@@ -1572,7 +1610,7 @@ special_insn:
1572 c->dst.type = OP_NONE; /* Disable writeback. */ 1610 c->dst.type = OP_NONE; /* Disable writeback. */
1573 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 1611 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1574 c->dst.ptr = (unsigned long *)register_address(c, 1612 c->dst.ptr = (unsigned long *)register_address(c,
1575 ctxt->es_base, 1613 es_base(ctxt),
1576 c->regs[VCPU_REGS_RDI]); 1614 c->regs[VCPU_REGS_RDI]);
1577 if ((rc = ops->read_emulated((unsigned long)c->dst.ptr, 1615 if ((rc = ops->read_emulated((unsigned long)c->dst.ptr,
1578 &c->dst.val, 1616 &c->dst.val,
@@ -1596,7 +1634,7 @@ special_insn:
1596 c->dst.type = OP_MEM; 1634 c->dst.type = OP_MEM;
1597 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 1635 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1598 c->dst.ptr = (unsigned long *)register_address(c, 1636 c->dst.ptr = (unsigned long *)register_address(c,
1599 ctxt->es_base, 1637 es_base(ctxt),
1600 c->regs[VCPU_REGS_RDI]); 1638 c->regs[VCPU_REGS_RDI]);
1601 c->dst.val = c->regs[VCPU_REGS_RAX]; 1639 c->dst.val = c->regs[VCPU_REGS_RAX];
1602 register_address_increment(c, &c->regs[VCPU_REGS_RDI], 1640 register_address_increment(c, &c->regs[VCPU_REGS_RDI],
@@ -1608,8 +1646,7 @@ special_insn:
1608 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 1646 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1609 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; 1647 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
1610 if ((rc = ops->read_emulated(register_address(c, 1648 if ((rc = ops->read_emulated(register_address(c,
1611 c->override_base ? *c->override_base : 1649 seg_override_base(ctxt, c),
1612 ctxt->ds_base,
1613 c->regs[VCPU_REGS_RSI]), 1650 c->regs[VCPU_REGS_RSI]),
1614 &c->dst.val, 1651 &c->dst.val,
1615 c->dst.bytes, 1652 c->dst.bytes,
@@ -1622,6 +1659,8 @@ special_insn:
1622 case 0xae ... 0xaf: /* scas */ 1659 case 0xae ... 0xaf: /* scas */
1623 DPRINTF("Urk! I don't handle SCAS.\n"); 1660 DPRINTF("Urk! I don't handle SCAS.\n");
1624 goto cannot_emulate; 1661 goto cannot_emulate;
1662 case 0xb8: /* mov r, imm */
1663 goto mov;
1625 case 0xc0 ... 0xc1: 1664 case 0xc0 ... 0xc1:
1626 emulate_grp2(ctxt); 1665 emulate_grp2(ctxt);
1627 break; 1666 break;
@@ -1660,13 +1699,39 @@ special_insn:
1660 break; 1699 break;
1661 } 1700 }
1662 case 0xe9: /* jmp rel */ 1701 case 0xe9: /* jmp rel */
1663 case 0xeb: /* jmp rel short */ 1702 goto jmp;
1703 case 0xea: /* jmp far */ {
1704 uint32_t eip;
1705 uint16_t sel;
1706
1707 switch (c->op_bytes) {
1708 case 2:
1709 eip = insn_fetch(u16, 2, c->eip);
1710 break;
1711 case 4:
1712 eip = insn_fetch(u32, 4, c->eip);
1713 break;
1714 default:
1715 DPRINTF("jmp far: Invalid op_bytes\n");
1716 goto cannot_emulate;
1717 }
1718 sel = insn_fetch(u16, 2, c->eip);
1719 if (kvm_load_segment_descriptor(ctxt->vcpu, sel, 9, VCPU_SREG_CS) < 0) {
1720 DPRINTF("jmp far: Failed to load CS descriptor\n");
1721 goto cannot_emulate;
1722 }
1723
1724 c->eip = eip;
1725 break;
1726 }
1727 case 0xeb:
1728 jmp: /* jmp rel short */
1664 jmp_rel(c, c->src.val); 1729 jmp_rel(c, c->src.val);
1665 c->dst.type = OP_NONE; /* Disable writeback. */ 1730 c->dst.type = OP_NONE; /* Disable writeback. */
1666 break; 1731 break;
1667 case 0xf4: /* hlt */ 1732 case 0xf4: /* hlt */
1668 ctxt->vcpu->arch.halt_request = 1; 1733 ctxt->vcpu->arch.halt_request = 1;
1669 goto done; 1734 break;
1670 case 0xf5: /* cmc */ 1735 case 0xf5: /* cmc */
1671 /* complement carry flag from eflags reg */ 1736 /* complement carry flag from eflags reg */
1672 ctxt->eflags ^= EFLG_CF; 1737 ctxt->eflags ^= EFLG_CF;
@@ -1882,6 +1947,8 @@ twobyte_insn:
1882 c->src.val &= (c->dst.bytes << 3) - 1; 1947 c->src.val &= (c->dst.bytes << 3) - 1;
1883 emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags); 1948 emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags);
1884 break; 1949 break;
1950 case 0xae: /* clflush */
1951 break;
1885 case 0xb0 ... 0xb1: /* cmpxchg */ 1952 case 0xb0 ... 0xb1: /* cmpxchg */
1886 /* 1953 /*
1887 * Save real source value, then compare EAX against 1954 * Save real source value, then compare EAX against