aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c474
1 files changed, 351 insertions, 123 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 46a368cb651e..bcc0efce85bf 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -43,6 +43,7 @@
43#include <linux/slab.h> 43#include <linux/slab.h>
44#include <linux/perf_event.h> 44#include <linux/perf_event.h>
45#include <linux/uaccess.h> 45#include <linux/uaccess.h>
46#include <linux/hash.h>
46#include <trace/events/kvm.h> 47#include <trace/events/kvm.h>
47 48
48#define CREATE_TRACE_POINTS 49#define CREATE_TRACE_POINTS
@@ -155,6 +156,13 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
155 156
156u64 __read_mostly host_xcr0; 157u64 __read_mostly host_xcr0;
157 158
159static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
160{
161 int i;
162 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++)
163 vcpu->arch.apf.gfns[i] = ~0;
164}
165
158static void kvm_on_user_return(struct user_return_notifier *urn) 166static void kvm_on_user_return(struct user_return_notifier *urn)
159{ 167{
160 unsigned slot; 168 unsigned slot;
@@ -326,23 +334,28 @@ void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
326} 334}
327EXPORT_SYMBOL_GPL(kvm_requeue_exception); 335EXPORT_SYMBOL_GPL(kvm_requeue_exception);
328 336
329void kvm_inject_page_fault(struct kvm_vcpu *vcpu) 337void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
330{ 338{
331 unsigned error_code = vcpu->arch.fault.error_code; 339 if (err)
340 kvm_inject_gp(vcpu, 0);
341 else
342 kvm_x86_ops->skip_emulated_instruction(vcpu);
343}
344EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
332 345
346void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
347{
333 ++vcpu->stat.pf_guest; 348 ++vcpu->stat.pf_guest;
334 vcpu->arch.cr2 = vcpu->arch.fault.address; 349 vcpu->arch.cr2 = fault->address;
335 kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); 350 kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
336} 351}
337 352
338void kvm_propagate_fault(struct kvm_vcpu *vcpu) 353void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
339{ 354{
340 if (mmu_is_nested(vcpu) && !vcpu->arch.fault.nested) 355 if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
341 vcpu->arch.nested_mmu.inject_page_fault(vcpu); 356 vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
342 else 357 else
343 vcpu->arch.mmu.inject_page_fault(vcpu); 358 vcpu->arch.mmu.inject_page_fault(vcpu, fault);
344
345 vcpu->arch.fault.nested = false;
346} 359}
347 360
348void kvm_inject_nmi(struct kvm_vcpu *vcpu) 361void kvm_inject_nmi(struct kvm_vcpu *vcpu)
@@ -460,8 +473,8 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu)
460 (unsigned long *)&vcpu->arch.regs_avail)) 473 (unsigned long *)&vcpu->arch.regs_avail))
461 return true; 474 return true;
462 475
463 gfn = (vcpu->arch.cr3 & ~31u) >> PAGE_SHIFT; 476 gfn = (kvm_read_cr3(vcpu) & ~31u) >> PAGE_SHIFT;
464 offset = (vcpu->arch.cr3 & ~31u) & (PAGE_SIZE - 1); 477 offset = (kvm_read_cr3(vcpu) & ~31u) & (PAGE_SIZE - 1);
465 r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte), 478 r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
466 PFERR_USER_MASK | PFERR_WRITE_MASK); 479 PFERR_USER_MASK | PFERR_WRITE_MASK);
467 if (r < 0) 480 if (r < 0)
@@ -506,12 +519,15 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
506 } else 519 } else
507#endif 520#endif
508 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, 521 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
509 vcpu->arch.cr3)) 522 kvm_read_cr3(vcpu)))
510 return 1; 523 return 1;
511 } 524 }
512 525
513 kvm_x86_ops->set_cr0(vcpu, cr0); 526 kvm_x86_ops->set_cr0(vcpu, cr0);
514 527
528 if ((cr0 ^ old_cr0) & X86_CR0_PG)
529 kvm_clear_async_pf_completion_queue(vcpu);
530
515 if ((cr0 ^ old_cr0) & update_bits) 531 if ((cr0 ^ old_cr0) & update_bits)
516 kvm_mmu_reset_context(vcpu); 532 kvm_mmu_reset_context(vcpu);
517 return 0; 533 return 0;
@@ -595,7 +611,8 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
595 return 1; 611 return 1;
596 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) 612 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
597 && ((cr4 ^ old_cr4) & pdptr_bits) 613 && ((cr4 ^ old_cr4) & pdptr_bits)
598 && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3)) 614 && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
615 kvm_read_cr3(vcpu)))
599 return 1; 616 return 1;
600 617
601 if (cr4 & X86_CR4_VMXE) 618 if (cr4 & X86_CR4_VMXE)
@@ -615,7 +632,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4);
615 632
616int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) 633int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
617{ 634{
618 if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) { 635 if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
619 kvm_mmu_sync_roots(vcpu); 636 kvm_mmu_sync_roots(vcpu);
620 kvm_mmu_flush_tlb(vcpu); 637 kvm_mmu_flush_tlb(vcpu);
621 return 0; 638 return 0;
@@ -650,12 +667,13 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
650 if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) 667 if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
651 return 1; 668 return 1;
652 vcpu->arch.cr3 = cr3; 669 vcpu->arch.cr3 = cr3;
670 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
653 vcpu->arch.mmu.new_cr3(vcpu); 671 vcpu->arch.mmu.new_cr3(vcpu);
654 return 0; 672 return 0;
655} 673}
656EXPORT_SYMBOL_GPL(kvm_set_cr3); 674EXPORT_SYMBOL_GPL(kvm_set_cr3);
657 675
658int __kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) 676int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
659{ 677{
660 if (cr8 & CR8_RESERVED_BITS) 678 if (cr8 & CR8_RESERVED_BITS)
661 return 1; 679 return 1;
@@ -665,12 +683,6 @@ int __kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
665 vcpu->arch.cr8 = cr8; 683 vcpu->arch.cr8 = cr8;
666 return 0; 684 return 0;
667} 685}
668
669void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
670{
671 if (__kvm_set_cr8(vcpu, cr8))
672 kvm_inject_gp(vcpu, 0);
673}
674EXPORT_SYMBOL_GPL(kvm_set_cr8); 686EXPORT_SYMBOL_GPL(kvm_set_cr8);
675 687
676unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) 688unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
@@ -775,12 +787,12 @@ EXPORT_SYMBOL_GPL(kvm_get_dr);
775 * kvm-specific. Those are put in the beginning of the list. 787 * kvm-specific. Those are put in the beginning of the list.
776 */ 788 */
777 789
778#define KVM_SAVE_MSRS_BEGIN 7 790#define KVM_SAVE_MSRS_BEGIN 8
779static u32 msrs_to_save[] = { 791static u32 msrs_to_save[] = {
780 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, 792 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
781 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, 793 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
782 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, 794 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
783 HV_X64_MSR_APIC_ASSIST_PAGE, 795 HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN,
784 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, 796 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
785 MSR_STAR, 797 MSR_STAR,
786#ifdef CONFIG_X86_64 798#ifdef CONFIG_X86_64
@@ -830,7 +842,6 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
830 kvm_x86_ops->set_efer(vcpu, efer); 842 kvm_x86_ops->set_efer(vcpu, efer);
831 843
832 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; 844 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
833 kvm_mmu_reset_context(vcpu);
834 845
835 /* Update reserved bits */ 846 /* Update reserved bits */
836 if ((efer ^ old_efer) & EFER_NX) 847 if ((efer ^ old_efer) & EFER_NX)
@@ -1418,6 +1429,30 @@ static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1418 return 0; 1429 return 0;
1419} 1430}
1420 1431
1432static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
1433{
1434 gpa_t gpa = data & ~0x3f;
1435
1436 /* Bits 2:5 are resrved, Should be zero */
1437 if (data & 0x3c)
1438 return 1;
1439
1440 vcpu->arch.apf.msr_val = data;
1441
1442 if (!(data & KVM_ASYNC_PF_ENABLED)) {
1443 kvm_clear_async_pf_completion_queue(vcpu);
1444 kvm_async_pf_hash_reset(vcpu);
1445 return 0;
1446 }
1447
1448 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa))
1449 return 1;
1450
1451 vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
1452 kvm_async_pf_wakeup_all(vcpu);
1453 return 0;
1454}
1455
1421int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) 1456int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1422{ 1457{
1423 switch (msr) { 1458 switch (msr) {
@@ -1499,6 +1534,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1499 } 1534 }
1500 break; 1535 break;
1501 } 1536 }
1537 case MSR_KVM_ASYNC_PF_EN:
1538 if (kvm_pv_enable_async_pf(vcpu, data))
1539 return 1;
1540 break;
1502 case MSR_IA32_MCG_CTL: 1541 case MSR_IA32_MCG_CTL:
1503 case MSR_IA32_MCG_STATUS: 1542 case MSR_IA32_MCG_STATUS:
1504 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: 1543 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
@@ -1775,6 +1814,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1775 case MSR_KVM_SYSTEM_TIME_NEW: 1814 case MSR_KVM_SYSTEM_TIME_NEW:
1776 data = vcpu->arch.time; 1815 data = vcpu->arch.time;
1777 break; 1816 break;
1817 case MSR_KVM_ASYNC_PF_EN:
1818 data = vcpu->arch.apf.msr_val;
1819 break;
1778 case MSR_IA32_P5_MC_ADDR: 1820 case MSR_IA32_P5_MC_ADDR:
1779 case MSR_IA32_P5_MC_TYPE: 1821 case MSR_IA32_P5_MC_TYPE:
1780 case MSR_IA32_MCG_CAP: 1822 case MSR_IA32_MCG_CAP:
@@ -1904,6 +1946,7 @@ int kvm_dev_ioctl_check_extension(long ext)
1904 case KVM_CAP_NOP_IO_DELAY: 1946 case KVM_CAP_NOP_IO_DELAY:
1905 case KVM_CAP_MP_STATE: 1947 case KVM_CAP_MP_STATE:
1906 case KVM_CAP_SYNC_MMU: 1948 case KVM_CAP_SYNC_MMU:
1949 case KVM_CAP_USER_NMI:
1907 case KVM_CAP_REINJECT_CONTROL: 1950 case KVM_CAP_REINJECT_CONTROL:
1908 case KVM_CAP_IRQ_INJECT_STATUS: 1951 case KVM_CAP_IRQ_INJECT_STATUS:
1909 case KVM_CAP_ASSIGN_DEV_IRQ: 1952 case KVM_CAP_ASSIGN_DEV_IRQ:
@@ -1922,6 +1965,7 @@ int kvm_dev_ioctl_check_extension(long ext)
1922 case KVM_CAP_DEBUGREGS: 1965 case KVM_CAP_DEBUGREGS:
1923 case KVM_CAP_X86_ROBUST_SINGLESTEP: 1966 case KVM_CAP_X86_ROBUST_SINGLESTEP:
1924 case KVM_CAP_XSAVE: 1967 case KVM_CAP_XSAVE:
1968 case KVM_CAP_ASYNC_PF:
1925 r = 1; 1969 r = 1;
1926 break; 1970 break;
1927 case KVM_CAP_COALESCED_MMIO: 1971 case KVM_CAP_COALESCED_MMIO:
@@ -2185,6 +2229,11 @@ out:
2185 return r; 2229 return r;
2186} 2230}
2187 2231
2232static void cpuid_mask(u32 *word, int wordnum)
2233{
2234 *word &= boot_cpu_data.x86_capability[wordnum];
2235}
2236
2188static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, 2237static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
2189 u32 index) 2238 u32 index)
2190{ 2239{
@@ -2259,7 +2308,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
2259 break; 2308 break;
2260 case 1: 2309 case 1:
2261 entry->edx &= kvm_supported_word0_x86_features; 2310 entry->edx &= kvm_supported_word0_x86_features;
2311 cpuid_mask(&entry->edx, 0);
2262 entry->ecx &= kvm_supported_word4_x86_features; 2312 entry->ecx &= kvm_supported_word4_x86_features;
2313 cpuid_mask(&entry->ecx, 4);
2263 /* we support x2apic emulation even if host does not support 2314 /* we support x2apic emulation even if host does not support
2264 * it since we emulate x2apic in software */ 2315 * it since we emulate x2apic in software */
2265 entry->ecx |= F(X2APIC); 2316 entry->ecx |= F(X2APIC);
@@ -2350,7 +2401,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
2350 break; 2401 break;
2351 case 0x80000001: 2402 case 0x80000001:
2352 entry->edx &= kvm_supported_word1_x86_features; 2403 entry->edx &= kvm_supported_word1_x86_features;
2404 cpuid_mask(&entry->edx, 1);
2353 entry->ecx &= kvm_supported_word6_x86_features; 2405 entry->ecx &= kvm_supported_word6_x86_features;
2406 cpuid_mask(&entry->ecx, 6);
2354 break; 2407 break;
2355 } 2408 }
2356 2409
@@ -3169,20 +3222,18 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
3169 struct kvm_memslots *slots, *old_slots; 3222 struct kvm_memslots *slots, *old_slots;
3170 unsigned long *dirty_bitmap; 3223 unsigned long *dirty_bitmap;
3171 3224
3172 r = -ENOMEM; 3225 dirty_bitmap = memslot->dirty_bitmap_head;
3173 dirty_bitmap = vmalloc(n); 3226 if (memslot->dirty_bitmap == dirty_bitmap)
3174 if (!dirty_bitmap) 3227 dirty_bitmap += n / sizeof(long);
3175 goto out;
3176 memset(dirty_bitmap, 0, n); 3228 memset(dirty_bitmap, 0, n);
3177 3229
3178 r = -ENOMEM; 3230 r = -ENOMEM;
3179 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); 3231 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
3180 if (!slots) { 3232 if (!slots)
3181 vfree(dirty_bitmap);
3182 goto out; 3233 goto out;
3183 }
3184 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); 3234 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
3185 slots->memslots[log->slot].dirty_bitmap = dirty_bitmap; 3235 slots->memslots[log->slot].dirty_bitmap = dirty_bitmap;
3236 slots->generation++;
3186 3237
3187 old_slots = kvm->memslots; 3238 old_slots = kvm->memslots;
3188 rcu_assign_pointer(kvm->memslots, slots); 3239 rcu_assign_pointer(kvm->memslots, slots);
@@ -3195,11 +3246,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
3195 spin_unlock(&kvm->mmu_lock); 3246 spin_unlock(&kvm->mmu_lock);
3196 3247
3197 r = -EFAULT; 3248 r = -EFAULT;
3198 if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) { 3249 if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n))
3199 vfree(dirty_bitmap);
3200 goto out; 3250 goto out;
3201 }
3202 vfree(dirty_bitmap);
3203 } else { 3251 } else {
3204 r = -EFAULT; 3252 r = -EFAULT;
3205 if (clear_user(log->dirty_bitmap, n)) 3253 if (clear_user(log->dirty_bitmap, n))
@@ -3266,8 +3314,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
3266 if (vpic) { 3314 if (vpic) {
3267 r = kvm_ioapic_init(kvm); 3315 r = kvm_ioapic_init(kvm);
3268 if (r) { 3316 if (r) {
3317 mutex_lock(&kvm->slots_lock);
3269 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, 3318 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
3270 &vpic->dev); 3319 &vpic->dev);
3320 mutex_unlock(&kvm->slots_lock);
3271 kfree(vpic); 3321 kfree(vpic);
3272 goto create_irqchip_unlock; 3322 goto create_irqchip_unlock;
3273 } 3323 }
@@ -3278,10 +3328,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
3278 smp_wmb(); 3328 smp_wmb();
3279 r = kvm_setup_default_irq_routing(kvm); 3329 r = kvm_setup_default_irq_routing(kvm);
3280 if (r) { 3330 if (r) {
3331 mutex_lock(&kvm->slots_lock);
3281 mutex_lock(&kvm->irq_lock); 3332 mutex_lock(&kvm->irq_lock);
3282 kvm_ioapic_destroy(kvm); 3333 kvm_ioapic_destroy(kvm);
3283 kvm_destroy_pic(kvm); 3334 kvm_destroy_pic(kvm);
3284 mutex_unlock(&kvm->irq_lock); 3335 mutex_unlock(&kvm->irq_lock);
3336 mutex_unlock(&kvm->slots_lock);
3285 } 3337 }
3286 create_irqchip_unlock: 3338 create_irqchip_unlock:
3287 mutex_unlock(&kvm->lock); 3339 mutex_unlock(&kvm->lock);
@@ -3557,63 +3609,63 @@ static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
3557static gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) 3609static gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
3558{ 3610{
3559 gpa_t t_gpa; 3611 gpa_t t_gpa;
3560 u32 error; 3612 struct x86_exception exception;
3561 3613
3562 BUG_ON(!mmu_is_nested(vcpu)); 3614 BUG_ON(!mmu_is_nested(vcpu));
3563 3615
3564 /* NPT walks are always user-walks */ 3616 /* NPT walks are always user-walks */
3565 access |= PFERR_USER_MASK; 3617 access |= PFERR_USER_MASK;
3566 t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, &error); 3618 t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, &exception);
3567 if (t_gpa == UNMAPPED_GVA)
3568 vcpu->arch.fault.nested = true;
3569 3619
3570 return t_gpa; 3620 return t_gpa;
3571} 3621}
3572 3622
3573gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) 3623gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
3624 struct x86_exception *exception)
3574{ 3625{
3575 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; 3626 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3576 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, error); 3627 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
3577} 3628}
3578 3629
3579 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) 3630 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
3631 struct x86_exception *exception)
3580{ 3632{
3581 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; 3633 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3582 access |= PFERR_FETCH_MASK; 3634 access |= PFERR_FETCH_MASK;
3583 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, error); 3635 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
3584} 3636}
3585 3637
3586gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) 3638gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
3639 struct x86_exception *exception)
3587{ 3640{
3588 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; 3641 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3589 access |= PFERR_WRITE_MASK; 3642 access |= PFERR_WRITE_MASK;
3590 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, error); 3643 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
3591} 3644}
3592 3645
3593/* uses this to access any guest's mapped memory without checking CPL */ 3646/* uses this to access any guest's mapped memory without checking CPL */
3594gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) 3647gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
3648 struct x86_exception *exception)
3595{ 3649{
3596 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, error); 3650 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception);
3597} 3651}
3598 3652
3599static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, 3653static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
3600 struct kvm_vcpu *vcpu, u32 access, 3654 struct kvm_vcpu *vcpu, u32 access,
3601 u32 *error) 3655 struct x86_exception *exception)
3602{ 3656{
3603 void *data = val; 3657 void *data = val;
3604 int r = X86EMUL_CONTINUE; 3658 int r = X86EMUL_CONTINUE;
3605 3659
3606 while (bytes) { 3660 while (bytes) {
3607 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access, 3661 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access,
3608 error); 3662 exception);
3609 unsigned offset = addr & (PAGE_SIZE-1); 3663 unsigned offset = addr & (PAGE_SIZE-1);
3610 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset); 3664 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
3611 int ret; 3665 int ret;
3612 3666
3613 if (gpa == UNMAPPED_GVA) { 3667 if (gpa == UNMAPPED_GVA)
3614 r = X86EMUL_PROPAGATE_FAULT; 3668 return X86EMUL_PROPAGATE_FAULT;
3615 goto out;
3616 }
3617 ret = kvm_read_guest(vcpu->kvm, gpa, data, toread); 3669 ret = kvm_read_guest(vcpu->kvm, gpa, data, toread);
3618 if (ret < 0) { 3670 if (ret < 0) {
3619 r = X86EMUL_IO_NEEDED; 3671 r = X86EMUL_IO_NEEDED;
@@ -3630,31 +3682,35 @@ out:
3630 3682
3631/* used for instruction fetching */ 3683/* used for instruction fetching */
3632static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes, 3684static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes,
3633 struct kvm_vcpu *vcpu, u32 *error) 3685 struct kvm_vcpu *vcpu,
3686 struct x86_exception *exception)
3634{ 3687{
3635 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; 3688 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3636 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 3689 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu,
3637 access | PFERR_FETCH_MASK, error); 3690 access | PFERR_FETCH_MASK,
3691 exception);
3638} 3692}
3639 3693
3640static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, 3694static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
3641 struct kvm_vcpu *vcpu, u32 *error) 3695 struct kvm_vcpu *vcpu,
3696 struct x86_exception *exception)
3642{ 3697{
3643 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; 3698 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3644 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, 3699 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
3645 error); 3700 exception);
3646} 3701}
3647 3702
3648static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes, 3703static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes,
3649 struct kvm_vcpu *vcpu, u32 *error) 3704 struct kvm_vcpu *vcpu,
3705 struct x86_exception *exception)
3650{ 3706{
3651 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error); 3707 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
3652} 3708}
3653 3709
3654static int kvm_write_guest_virt_system(gva_t addr, void *val, 3710static int kvm_write_guest_virt_system(gva_t addr, void *val,
3655 unsigned int bytes, 3711 unsigned int bytes,
3656 struct kvm_vcpu *vcpu, 3712 struct kvm_vcpu *vcpu,
3657 u32 *error) 3713 struct x86_exception *exception)
3658{ 3714{
3659 void *data = val; 3715 void *data = val;
3660 int r = X86EMUL_CONTINUE; 3716 int r = X86EMUL_CONTINUE;
@@ -3662,15 +3718,13 @@ static int kvm_write_guest_virt_system(gva_t addr, void *val,
3662 while (bytes) { 3718 while (bytes) {
3663 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, 3719 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
3664 PFERR_WRITE_MASK, 3720 PFERR_WRITE_MASK,
3665 error); 3721 exception);
3666 unsigned offset = addr & (PAGE_SIZE-1); 3722 unsigned offset = addr & (PAGE_SIZE-1);
3667 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); 3723 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
3668 int ret; 3724 int ret;
3669 3725
3670 if (gpa == UNMAPPED_GVA) { 3726 if (gpa == UNMAPPED_GVA)
3671 r = X86EMUL_PROPAGATE_FAULT; 3727 return X86EMUL_PROPAGATE_FAULT;
3672 goto out;
3673 }
3674 ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite); 3728 ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
3675 if (ret < 0) { 3729 if (ret < 0) {
3676 r = X86EMUL_IO_NEEDED; 3730 r = X86EMUL_IO_NEEDED;
@@ -3688,7 +3742,7 @@ out:
3688static int emulator_read_emulated(unsigned long addr, 3742static int emulator_read_emulated(unsigned long addr,
3689 void *val, 3743 void *val,
3690 unsigned int bytes, 3744 unsigned int bytes,
3691 unsigned int *error_code, 3745 struct x86_exception *exception,
3692 struct kvm_vcpu *vcpu) 3746 struct kvm_vcpu *vcpu)
3693{ 3747{
3694 gpa_t gpa; 3748 gpa_t gpa;
@@ -3701,7 +3755,7 @@ static int emulator_read_emulated(unsigned long addr,
3701 return X86EMUL_CONTINUE; 3755 return X86EMUL_CONTINUE;
3702 } 3756 }
3703 3757
3704 gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, error_code); 3758 gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, exception);
3705 3759
3706 if (gpa == UNMAPPED_GVA) 3760 if (gpa == UNMAPPED_GVA)
3707 return X86EMUL_PROPAGATE_FAULT; 3761 return X86EMUL_PROPAGATE_FAULT;
@@ -3710,8 +3764,8 @@ static int emulator_read_emulated(unsigned long addr,
3710 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) 3764 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
3711 goto mmio; 3765 goto mmio;
3712 3766
3713 if (kvm_read_guest_virt(addr, val, bytes, vcpu, NULL) 3767 if (kvm_read_guest_virt(addr, val, bytes, vcpu, exception)
3714 == X86EMUL_CONTINUE) 3768 == X86EMUL_CONTINUE)
3715 return X86EMUL_CONTINUE; 3769 return X86EMUL_CONTINUE;
3716 3770
3717mmio: 3771mmio:
@@ -3735,7 +3789,7 @@ mmio:
3735} 3789}
3736 3790
3737int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, 3791int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
3738 const void *val, int bytes) 3792 const void *val, int bytes)
3739{ 3793{
3740 int ret; 3794 int ret;
3741 3795
@@ -3749,12 +3803,12 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
3749static int emulator_write_emulated_onepage(unsigned long addr, 3803static int emulator_write_emulated_onepage(unsigned long addr,
3750 const void *val, 3804 const void *val,
3751 unsigned int bytes, 3805 unsigned int bytes,
3752 unsigned int *error_code, 3806 struct x86_exception *exception,
3753 struct kvm_vcpu *vcpu) 3807 struct kvm_vcpu *vcpu)
3754{ 3808{
3755 gpa_t gpa; 3809 gpa_t gpa;
3756 3810
3757 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error_code); 3811 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception);
3758 3812
3759 if (gpa == UNMAPPED_GVA) 3813 if (gpa == UNMAPPED_GVA)
3760 return X86EMUL_PROPAGATE_FAULT; 3814 return X86EMUL_PROPAGATE_FAULT;
@@ -3787,7 +3841,7 @@ mmio:
3787int emulator_write_emulated(unsigned long addr, 3841int emulator_write_emulated(unsigned long addr,
3788 const void *val, 3842 const void *val,
3789 unsigned int bytes, 3843 unsigned int bytes,
3790 unsigned int *error_code, 3844 struct x86_exception *exception,
3791 struct kvm_vcpu *vcpu) 3845 struct kvm_vcpu *vcpu)
3792{ 3846{
3793 /* Crossing a page boundary? */ 3847 /* Crossing a page boundary? */
@@ -3795,7 +3849,7 @@ int emulator_write_emulated(unsigned long addr,
3795 int rc, now; 3849 int rc, now;
3796 3850
3797 now = -addr & ~PAGE_MASK; 3851 now = -addr & ~PAGE_MASK;
3798 rc = emulator_write_emulated_onepage(addr, val, now, error_code, 3852 rc = emulator_write_emulated_onepage(addr, val, now, exception,
3799 vcpu); 3853 vcpu);
3800 if (rc != X86EMUL_CONTINUE) 3854 if (rc != X86EMUL_CONTINUE)
3801 return rc; 3855 return rc;
@@ -3803,7 +3857,7 @@ int emulator_write_emulated(unsigned long addr,
3803 val += now; 3857 val += now;
3804 bytes -= now; 3858 bytes -= now;
3805 } 3859 }
3806 return emulator_write_emulated_onepage(addr, val, bytes, error_code, 3860 return emulator_write_emulated_onepage(addr, val, bytes, exception,
3807 vcpu); 3861 vcpu);
3808} 3862}
3809 3863
@@ -3821,7 +3875,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
3821 const void *old, 3875 const void *old,
3822 const void *new, 3876 const void *new,
3823 unsigned int bytes, 3877 unsigned int bytes,
3824 unsigned int *error_code, 3878 struct x86_exception *exception,
3825 struct kvm_vcpu *vcpu) 3879 struct kvm_vcpu *vcpu)
3826{ 3880{
3827 gpa_t gpa; 3881 gpa_t gpa;
@@ -3879,7 +3933,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
3879emul_write: 3933emul_write:
3880 printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); 3934 printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
3881 3935
3882 return emulator_write_emulated(addr, new, bytes, error_code, vcpu); 3936 return emulator_write_emulated(addr, new, bytes, exception, vcpu);
3883} 3937}
3884 3938
3885static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) 3939static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
@@ -3904,7 +3958,7 @@ static int emulator_pio_in_emulated(int size, unsigned short port, void *val,
3904 if (vcpu->arch.pio.count) 3958 if (vcpu->arch.pio.count)
3905 goto data_avail; 3959 goto data_avail;
3906 3960
3907 trace_kvm_pio(0, port, size, 1); 3961 trace_kvm_pio(0, port, size, count);
3908 3962
3909 vcpu->arch.pio.port = port; 3963 vcpu->arch.pio.port = port;
3910 vcpu->arch.pio.in = 1; 3964 vcpu->arch.pio.in = 1;
@@ -3932,7 +3986,7 @@ static int emulator_pio_out_emulated(int size, unsigned short port,
3932 const void *val, unsigned int count, 3986 const void *val, unsigned int count,
3933 struct kvm_vcpu *vcpu) 3987 struct kvm_vcpu *vcpu)
3934{ 3988{
3935 trace_kvm_pio(1, port, size, 1); 3989 trace_kvm_pio(1, port, size, count);
3936 3990
3937 vcpu->arch.pio.port = port; 3991 vcpu->arch.pio.port = port;
3938 vcpu->arch.pio.in = 0; 3992 vcpu->arch.pio.in = 0;
@@ -3973,13 +4027,15 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
3973 return X86EMUL_CONTINUE; 4027 return X86EMUL_CONTINUE;
3974 4028
3975 if (kvm_x86_ops->has_wbinvd_exit()) { 4029 if (kvm_x86_ops->has_wbinvd_exit()) {
3976 preempt_disable(); 4030 int cpu = get_cpu();
4031
4032 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
3977 smp_call_function_many(vcpu->arch.wbinvd_dirty_mask, 4033 smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
3978 wbinvd_ipi, NULL, 1); 4034 wbinvd_ipi, NULL, 1);
3979 preempt_enable(); 4035 put_cpu();
3980 cpumask_clear(vcpu->arch.wbinvd_dirty_mask); 4036 cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
3981 } 4037 } else
3982 wbinvd(); 4038 wbinvd();
3983 return X86EMUL_CONTINUE; 4039 return X86EMUL_CONTINUE;
3984} 4040}
3985EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); 4041EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
@@ -4019,7 +4075,7 @@ static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu)
4019 value = vcpu->arch.cr2; 4075 value = vcpu->arch.cr2;
4020 break; 4076 break;
4021 case 3: 4077 case 3:
4022 value = vcpu->arch.cr3; 4078 value = kvm_read_cr3(vcpu);
4023 break; 4079 break;
4024 case 4: 4080 case 4:
4025 value = kvm_read_cr4(vcpu); 4081 value = kvm_read_cr4(vcpu);
@@ -4053,7 +4109,7 @@ static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu)
4053 res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); 4109 res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
4054 break; 4110 break;
4055 case 8: 4111 case 8:
4056 res = __kvm_set_cr8(vcpu, val & 0xfUL); 4112 res = kvm_set_cr8(vcpu, val);
4057 break; 4113 break;
4058 default: 4114 default:
4059 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); 4115 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
@@ -4206,12 +4262,13 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
4206static void inject_emulated_exception(struct kvm_vcpu *vcpu) 4262static void inject_emulated_exception(struct kvm_vcpu *vcpu)
4207{ 4263{
4208 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; 4264 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4209 if (ctxt->exception == PF_VECTOR) 4265 if (ctxt->exception.vector == PF_VECTOR)
4210 kvm_propagate_fault(vcpu); 4266 kvm_propagate_fault(vcpu, &ctxt->exception);
4211 else if (ctxt->error_code_valid) 4267 else if (ctxt->exception.error_code_valid)
4212 kvm_queue_exception_e(vcpu, ctxt->exception, ctxt->error_code); 4268 kvm_queue_exception_e(vcpu, ctxt->exception.vector,
4269 ctxt->exception.error_code);
4213 else 4270 else
4214 kvm_queue_exception(vcpu, ctxt->exception); 4271 kvm_queue_exception(vcpu, ctxt->exception.vector);
4215} 4272}
4216 4273
4217static void init_emulate_ctxt(struct kvm_vcpu *vcpu) 4274static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
@@ -4267,13 +4324,19 @@ EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
4267 4324
4268static int handle_emulation_failure(struct kvm_vcpu *vcpu) 4325static int handle_emulation_failure(struct kvm_vcpu *vcpu)
4269{ 4326{
4327 int r = EMULATE_DONE;
4328
4270 ++vcpu->stat.insn_emulation_fail; 4329 ++vcpu->stat.insn_emulation_fail;
4271 trace_kvm_emulate_insn_failed(vcpu); 4330 trace_kvm_emulate_insn_failed(vcpu);
4272 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 4331 if (!is_guest_mode(vcpu)) {
4273 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; 4332 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
4274 vcpu->run->internal.ndata = 0; 4333 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
4334 vcpu->run->internal.ndata = 0;
4335 r = EMULATE_FAIL;
4336 }
4275 kvm_queue_exception(vcpu, UD_VECTOR); 4337 kvm_queue_exception(vcpu, UD_VECTOR);
4276 return EMULATE_FAIL; 4338
4339 return r;
4277} 4340}
4278 4341
4279static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) 4342static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
@@ -4302,10 +4365,11 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
4302 return false; 4365 return false;
4303} 4366}
4304 4367
4305int emulate_instruction(struct kvm_vcpu *vcpu, 4368int x86_emulate_instruction(struct kvm_vcpu *vcpu,
4306 unsigned long cr2, 4369 unsigned long cr2,
4307 u16 error_code, 4370 int emulation_type,
4308 int emulation_type) 4371 void *insn,
4372 int insn_len)
4309{ 4373{
4310 int r; 4374 int r;
4311 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; 4375 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
@@ -4323,10 +4387,10 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
4323 if (!(emulation_type & EMULTYPE_NO_DECODE)) { 4387 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
4324 init_emulate_ctxt(vcpu); 4388 init_emulate_ctxt(vcpu);
4325 vcpu->arch.emulate_ctxt.interruptibility = 0; 4389 vcpu->arch.emulate_ctxt.interruptibility = 0;
4326 vcpu->arch.emulate_ctxt.exception = -1; 4390 vcpu->arch.emulate_ctxt.have_exception = false;
4327 vcpu->arch.emulate_ctxt.perm_ok = false; 4391 vcpu->arch.emulate_ctxt.perm_ok = false;
4328 4392
4329 r = x86_decode_insn(&vcpu->arch.emulate_ctxt); 4393 r = x86_decode_insn(&vcpu->arch.emulate_ctxt, insn, insn_len);
4330 if (r == X86EMUL_PROPAGATE_FAULT) 4394 if (r == X86EMUL_PROPAGATE_FAULT)
4331 goto done; 4395 goto done;
4332 4396
@@ -4389,7 +4453,7 @@ restart:
4389 } 4453 }
4390 4454
4391done: 4455done:
4392 if (vcpu->arch.emulate_ctxt.exception >= 0) { 4456 if (vcpu->arch.emulate_ctxt.have_exception) {
4393 inject_emulated_exception(vcpu); 4457 inject_emulated_exception(vcpu);
4394 r = EMULATE_DONE; 4458 r = EMULATE_DONE;
4395 } else if (vcpu->arch.pio.count) { 4459 } else if (vcpu->arch.pio.count) {
@@ -4413,7 +4477,7 @@ done:
4413 4477
4414 return r; 4478 return r;
4415} 4479}
4416EXPORT_SYMBOL_GPL(emulate_instruction); 4480EXPORT_SYMBOL_GPL(x86_emulate_instruction);
4417 4481
4418int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) 4482int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
4419{ 4483{
@@ -4653,7 +4717,6 @@ int kvm_arch_init(void *opaque)
4653 4717
4654 kvm_x86_ops = ops; 4718 kvm_x86_ops = ops;
4655 kvm_mmu_set_nonpresent_ptes(0ull, 0ull); 4719 kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
4656 kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
4657 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, 4720 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
4658 PT_DIRTY_MASK, PT64_NX_MASK, 0); 4721 PT_DIRTY_MASK, PT64_NX_MASK, 0);
4659 4722
@@ -5116,6 +5179,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5116 vcpu->fpu_active = 0; 5179 vcpu->fpu_active = 0;
5117 kvm_x86_ops->fpu_deactivate(vcpu); 5180 kvm_x86_ops->fpu_deactivate(vcpu);
5118 } 5181 }
5182 if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
5183 /* Page is swapped out. Do synthetic halt */
5184 vcpu->arch.apf.halted = true;
5185 r = 1;
5186 goto out;
5187 }
5119 } 5188 }
5120 5189
5121 r = kvm_mmu_reload(vcpu); 5190 r = kvm_mmu_reload(vcpu);
@@ -5244,7 +5313,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
5244 5313
5245 r = 1; 5314 r = 1;
5246 while (r > 0) { 5315 while (r > 0) {
5247 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) 5316 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
5317 !vcpu->arch.apf.halted)
5248 r = vcpu_enter_guest(vcpu); 5318 r = vcpu_enter_guest(vcpu);
5249 else { 5319 else {
5250 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); 5320 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
@@ -5257,6 +5327,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
5257 vcpu->arch.mp_state = 5327 vcpu->arch.mp_state =
5258 KVM_MP_STATE_RUNNABLE; 5328 KVM_MP_STATE_RUNNABLE;
5259 case KVM_MP_STATE_RUNNABLE: 5329 case KVM_MP_STATE_RUNNABLE:
5330 vcpu->arch.apf.halted = false;
5260 break; 5331 break;
5261 case KVM_MP_STATE_SIPI_RECEIVED: 5332 case KVM_MP_STATE_SIPI_RECEIVED:
5262 default: 5333 default:
@@ -5278,6 +5349,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
5278 vcpu->run->exit_reason = KVM_EXIT_INTR; 5349 vcpu->run->exit_reason = KVM_EXIT_INTR;
5279 ++vcpu->stat.request_irq_exits; 5350 ++vcpu->stat.request_irq_exits;
5280 } 5351 }
5352
5353 kvm_check_async_pf_completion(vcpu);
5354
5281 if (signal_pending(current)) { 5355 if (signal_pending(current)) {
5282 r = -EINTR; 5356 r = -EINTR;
5283 vcpu->run->exit_reason = KVM_EXIT_INTR; 5357 vcpu->run->exit_reason = KVM_EXIT_INTR;
@@ -5302,6 +5376,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
5302 int r; 5376 int r;
5303 sigset_t sigsaved; 5377 sigset_t sigsaved;
5304 5378
5379 if (!tsk_used_math(current) && init_fpu(current))
5380 return -ENOMEM;
5381
5305 if (vcpu->sigset_active) 5382 if (vcpu->sigset_active)
5306 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 5383 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
5307 5384
@@ -5313,8 +5390,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
5313 } 5390 }
5314 5391
5315 /* re-sync apic's tpr */ 5392 /* re-sync apic's tpr */
5316 if (!irqchip_in_kernel(vcpu->kvm)) 5393 if (!irqchip_in_kernel(vcpu->kvm)) {
5317 kvm_set_cr8(vcpu, kvm_run->cr8); 5394 if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
5395 r = -EINVAL;
5396 goto out;
5397 }
5398 }
5318 5399
5319 if (vcpu->arch.pio.count || vcpu->mmio_needed) { 5400 if (vcpu->arch.pio.count || vcpu->mmio_needed) {
5320 if (vcpu->mmio_needed) { 5401 if (vcpu->mmio_needed) {
@@ -5323,7 +5404,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
5323 vcpu->mmio_needed = 0; 5404 vcpu->mmio_needed = 0;
5324 } 5405 }
5325 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 5406 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5326 r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE); 5407 r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
5327 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 5408 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
5328 if (r != EMULATE_DONE) { 5409 if (r != EMULATE_DONE) {
5329 r = 0; 5410 r = 0;
@@ -5436,7 +5517,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
5436 5517
5437 sregs->cr0 = kvm_read_cr0(vcpu); 5518 sregs->cr0 = kvm_read_cr0(vcpu);
5438 sregs->cr2 = vcpu->arch.cr2; 5519 sregs->cr2 = vcpu->arch.cr2;
5439 sregs->cr3 = vcpu->arch.cr3; 5520 sregs->cr3 = kvm_read_cr3(vcpu);
5440 sregs->cr4 = kvm_read_cr4(vcpu); 5521 sregs->cr4 = kvm_read_cr4(vcpu);
5441 sregs->cr8 = kvm_get_cr8(vcpu); 5522 sregs->cr8 = kvm_get_cr8(vcpu);
5442 sregs->efer = vcpu->arch.efer; 5523 sregs->efer = vcpu->arch.efer;
@@ -5504,8 +5585,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
5504 kvm_x86_ops->set_gdt(vcpu, &dt); 5585 kvm_x86_ops->set_gdt(vcpu, &dt);
5505 5586
5506 vcpu->arch.cr2 = sregs->cr2; 5587 vcpu->arch.cr2 = sregs->cr2;
5507 mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3; 5588 mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
5508 vcpu->arch.cr3 = sregs->cr3; 5589 vcpu->arch.cr3 = sregs->cr3;
5590 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
5509 5591
5510 kvm_set_cr8(vcpu, sregs->cr8); 5592 kvm_set_cr8(vcpu, sregs->cr8);
5511 5593
@@ -5522,7 +5604,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
5522 if (sregs->cr4 & X86_CR4_OSXSAVE) 5604 if (sregs->cr4 & X86_CR4_OSXSAVE)
5523 update_cpuid(vcpu); 5605 update_cpuid(vcpu);
5524 if (!is_long_mode(vcpu) && is_pae(vcpu)) { 5606 if (!is_long_mode(vcpu) && is_pae(vcpu)) {
5525 load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3); 5607 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
5526 mmu_reset_needed = 1; 5608 mmu_reset_needed = 1;
5527 } 5609 }
5528 5610
@@ -5773,6 +5855,8 @@ free_vcpu:
5773 5855
5774void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 5856void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
5775{ 5857{
5858 vcpu->arch.apf.msr_val = 0;
5859
5776 vcpu_load(vcpu); 5860 vcpu_load(vcpu);
5777 kvm_mmu_unload(vcpu); 5861 kvm_mmu_unload(vcpu);
5778 vcpu_put(vcpu); 5862 vcpu_put(vcpu);
@@ -5792,6 +5876,11 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
5792 vcpu->arch.dr7 = DR7_FIXED_1; 5876 vcpu->arch.dr7 = DR7_FIXED_1;
5793 5877
5794 kvm_make_request(KVM_REQ_EVENT, vcpu); 5878 kvm_make_request(KVM_REQ_EVENT, vcpu);
5879 vcpu->arch.apf.msr_val = 0;
5880
5881 kvm_clear_async_pf_completion_queue(vcpu);
5882 kvm_async_pf_hash_reset(vcpu);
5883 vcpu->arch.apf.halted = false;
5795 5884
5796 return kvm_x86_ops->vcpu_reset(vcpu); 5885 return kvm_x86_ops->vcpu_reset(vcpu);
5797} 5886}
@@ -5881,6 +5970,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
5881 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) 5970 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
5882 goto fail_free_mce_banks; 5971 goto fail_free_mce_banks;
5883 5972
5973 kvm_async_pf_hash_reset(vcpu);
5974
5884 return 0; 5975 return 0;
5885fail_free_mce_banks: 5976fail_free_mce_banks:
5886 kfree(vcpu->arch.mce_banks); 5977 kfree(vcpu->arch.mce_banks);
@@ -5906,13 +5997,8 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
5906 free_page((unsigned long)vcpu->arch.pio_data); 5997 free_page((unsigned long)vcpu->arch.pio_data);
5907} 5998}
5908 5999
5909struct kvm *kvm_arch_create_vm(void) 6000int kvm_arch_init_vm(struct kvm *kvm)
5910{ 6001{
5911 struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
5912
5913 if (!kvm)
5914 return ERR_PTR(-ENOMEM);
5915
5916 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 6002 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
5917 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); 6003 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
5918 6004
@@ -5921,7 +6007,7 @@ struct kvm *kvm_arch_create_vm(void)
5921 6007
5922 spin_lock_init(&kvm->arch.tsc_write_lock); 6008 spin_lock_init(&kvm->arch.tsc_write_lock);
5923 6009
5924 return kvm; 6010 return 0;
5925} 6011}
5926 6012
5927static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) 6013static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
@@ -5939,8 +6025,10 @@ static void kvm_free_vcpus(struct kvm *kvm)
5939 /* 6025 /*
5940 * Unpin any mmu pages first. 6026 * Unpin any mmu pages first.
5941 */ 6027 */
5942 kvm_for_each_vcpu(i, vcpu, kvm) 6028 kvm_for_each_vcpu(i, vcpu, kvm) {
6029 kvm_clear_async_pf_completion_queue(vcpu);
5943 kvm_unload_vcpu_mmu(vcpu); 6030 kvm_unload_vcpu_mmu(vcpu);
6031 }
5944 kvm_for_each_vcpu(i, vcpu, kvm) 6032 kvm_for_each_vcpu(i, vcpu, kvm)
5945 kvm_arch_vcpu_free(vcpu); 6033 kvm_arch_vcpu_free(vcpu);
5946 6034
@@ -5964,13 +6052,10 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
5964 kfree(kvm->arch.vpic); 6052 kfree(kvm->arch.vpic);
5965 kfree(kvm->arch.vioapic); 6053 kfree(kvm->arch.vioapic);
5966 kvm_free_vcpus(kvm); 6054 kvm_free_vcpus(kvm);
5967 kvm_free_physmem(kvm);
5968 if (kvm->arch.apic_access_page) 6055 if (kvm->arch.apic_access_page)
5969 put_page(kvm->arch.apic_access_page); 6056 put_page(kvm->arch.apic_access_page);
5970 if (kvm->arch.ept_identity_pagetable) 6057 if (kvm->arch.ept_identity_pagetable)
5971 put_page(kvm->arch.ept_identity_pagetable); 6058 put_page(kvm->arch.ept_identity_pagetable);
5972 cleanup_srcu_struct(&kvm->srcu);
5973 kfree(kvm);
5974} 6059}
5975 6060
5976int kvm_arch_prepare_memory_region(struct kvm *kvm, 6061int kvm_arch_prepare_memory_region(struct kvm *kvm,
@@ -6051,7 +6136,9 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
6051 6136
6052int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 6137int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
6053{ 6138{
6054 return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE 6139 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
6140 !vcpu->arch.apf.halted)
6141 || !list_empty_careful(&vcpu->async_pf.done)
6055 || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED 6142 || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
6056 || vcpu->arch.nmi_pending || 6143 || vcpu->arch.nmi_pending ||
6057 (kvm_arch_interrupt_allowed(vcpu) && 6144 (kvm_arch_interrupt_allowed(vcpu) &&
@@ -6110,6 +6197,147 @@ void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
6110} 6197}
6111EXPORT_SYMBOL_GPL(kvm_set_rflags); 6198EXPORT_SYMBOL_GPL(kvm_set_rflags);
6112 6199
6200void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
6201{
6202 int r;
6203
6204 if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
6205 is_error_page(work->page))
6206 return;
6207
6208 r = kvm_mmu_reload(vcpu);
6209 if (unlikely(r))
6210 return;
6211
6212 if (!vcpu->arch.mmu.direct_map &&
6213 work->arch.cr3 != vcpu->arch.mmu.get_cr3(vcpu))
6214 return;
6215
6216 vcpu->arch.mmu.page_fault(vcpu, work->gva, 0, true);
6217}
6218
6219static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
6220{
6221 return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
6222}
6223
6224static inline u32 kvm_async_pf_next_probe(u32 key)
6225{
6226 return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1);
6227}
6228
6229static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
6230{
6231 u32 key = kvm_async_pf_hash_fn(gfn);
6232
6233 while (vcpu->arch.apf.gfns[key] != ~0)
6234 key = kvm_async_pf_next_probe(key);
6235
6236 vcpu->arch.apf.gfns[key] = gfn;
6237}
6238
6239static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
6240{
6241 int i;
6242 u32 key = kvm_async_pf_hash_fn(gfn);
6243
6244 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
6245 (vcpu->arch.apf.gfns[key] != gfn &&
6246 vcpu->arch.apf.gfns[key] != ~0); i++)
6247 key = kvm_async_pf_next_probe(key);
6248
6249 return key;
6250}
6251
6252bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
6253{
6254 return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
6255}
6256
6257static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
6258{
6259 u32 i, j, k;
6260
6261 i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
6262 while (true) {
6263 vcpu->arch.apf.gfns[i] = ~0;
6264 do {
6265 j = kvm_async_pf_next_probe(j);
6266 if (vcpu->arch.apf.gfns[j] == ~0)
6267 return;
6268 k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
6269 /*
6270 * k lies cyclically in ]i,j]
6271 * | i.k.j |
6272 * |....j i.k.| or |.k..j i...|
6273 */
6274 } while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
6275 vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
6276 i = j;
6277 }
6278}
6279
6280static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
6281{
6282
6283 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val,
6284 sizeof(val));
6285}
6286
6287void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
6288 struct kvm_async_pf *work)
6289{
6290 struct x86_exception fault;
6291
6292 trace_kvm_async_pf_not_present(work->arch.token, work->gva);
6293 kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
6294
6295 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
6296 (vcpu->arch.apf.send_user_only &&
6297 kvm_x86_ops->get_cpl(vcpu) == 0))
6298 kvm_make_request(KVM_REQ_APF_HALT, vcpu);
6299 else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
6300 fault.vector = PF_VECTOR;
6301 fault.error_code_valid = true;
6302 fault.error_code = 0;
6303 fault.nested_page_fault = false;
6304 fault.address = work->arch.token;
6305 kvm_inject_page_fault(vcpu, &fault);
6306 }
6307}
6308
6309void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
6310 struct kvm_async_pf *work)
6311{
6312 struct x86_exception fault;
6313
6314 trace_kvm_async_pf_ready(work->arch.token, work->gva);
6315 if (is_error_page(work->page))
6316 work->arch.token = ~0; /* broadcast wakeup */
6317 else
6318 kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
6319
6320 if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
6321 !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
6322 fault.vector = PF_VECTOR;
6323 fault.error_code_valid = true;
6324 fault.error_code = 0;
6325 fault.nested_page_fault = false;
6326 fault.address = work->arch.token;
6327 kvm_inject_page_fault(vcpu, &fault);
6328 }
6329 vcpu->arch.apf.halted = false;
6330}
6331
6332bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
6333{
6334 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
6335 return true;
6336 else
6337 return !kvm_event_needs_reinjection(vcpu) &&
6338 kvm_x86_ops->interrupt_allowed(vcpu);
6339}
6340
6113EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); 6341EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
6114EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); 6342EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
6115EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); 6343EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);