diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
commit | c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch) | |
tree | ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /arch/x86/kvm/x86.c | |
parent | ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff) | |
parent | 6a00f206debf8a5c8899055726ad127dbeeed098 (diff) |
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts:
litmus/sched_cedf.c
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r-- | arch/x86/kvm/x86.c | 1860 |
1 files changed, 1381 insertions, 479 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3a09c625d526..77c9d8673dc4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -6,7 +6,7 @@ | |||
6 | * Copyright (C) 2006 Qumranet, Inc. | 6 | * Copyright (C) 2006 Qumranet, Inc. |
7 | * Copyright (C) 2008 Qumranet, Inc. | 7 | * Copyright (C) 2008 Qumranet, Inc. |
8 | * Copyright IBM Corporation, 2008 | 8 | * Copyright IBM Corporation, 2008 |
9 | * Copyright 2010 Red Hat, Inc. and/or its affilates. | 9 | * Copyright 2010 Red Hat, Inc. and/or its affiliates. |
10 | * | 10 | * |
11 | * Authors: | 11 | * Authors: |
12 | * Avi Kivity <avi@qumranet.com> | 12 | * Avi Kivity <avi@qumranet.com> |
@@ -43,6 +43,7 @@ | |||
43 | #include <linux/slab.h> | 43 | #include <linux/slab.h> |
44 | #include <linux/perf_event.h> | 44 | #include <linux/perf_event.h> |
45 | #include <linux/uaccess.h> | 45 | #include <linux/uaccess.h> |
46 | #include <linux/hash.h> | ||
46 | #include <trace/events/kvm.h> | 47 | #include <trace/events/kvm.h> |
47 | 48 | ||
48 | #define CREATE_TRACE_POINTS | 49 | #define CREATE_TRACE_POINTS |
@@ -55,32 +56,25 @@ | |||
55 | #include <asm/mce.h> | 56 | #include <asm/mce.h> |
56 | #include <asm/i387.h> | 57 | #include <asm/i387.h> |
57 | #include <asm/xcr.h> | 58 | #include <asm/xcr.h> |
59 | #include <asm/pvclock.h> | ||
60 | #include <asm/div64.h> | ||
58 | 61 | ||
59 | #define MAX_IO_MSRS 256 | 62 | #define MAX_IO_MSRS 256 |
60 | #define CR0_RESERVED_BITS \ | ||
61 | (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ | ||
62 | | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ | ||
63 | | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) | ||
64 | #define CR4_RESERVED_BITS \ | ||
65 | (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | ||
66 | | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | ||
67 | | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ | ||
68 | | X86_CR4_OSXSAVE \ | ||
69 | | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) | ||
70 | |||
71 | #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) | ||
72 | |||
73 | #define KVM_MAX_MCE_BANKS 32 | 63 | #define KVM_MAX_MCE_BANKS 32 |
74 | #define KVM_MCE_CAP_SUPPORTED MCG_CTL_P | 64 | #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P) |
65 | |||
66 | #define emul_to_vcpu(ctxt) \ | ||
67 | container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt) | ||
75 | 68 | ||
76 | /* EFER defaults: | 69 | /* EFER defaults: |
77 | * - enable syscall per default because its emulated by KVM | 70 | * - enable syscall per default because its emulated by KVM |
78 | * - enable LME and LMA per default on 64 bit KVM | 71 | * - enable LME and LMA per default on 64 bit KVM |
79 | */ | 72 | */ |
80 | #ifdef CONFIG_X86_64 | 73 | #ifdef CONFIG_X86_64 |
81 | static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffafeULL; | 74 | static |
75 | u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA)); | ||
82 | #else | 76 | #else |
83 | static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL; | 77 | static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); |
84 | #endif | 78 | #endif |
85 | 79 | ||
86 | #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM | 80 | #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM |
@@ -96,6 +90,11 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops); | |||
96 | int ignore_msrs = 0; | 90 | int ignore_msrs = 0; |
97 | module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); | 91 | module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); |
98 | 92 | ||
93 | bool kvm_has_tsc_control; | ||
94 | EXPORT_SYMBOL_GPL(kvm_has_tsc_control); | ||
95 | u32 kvm_max_guest_tsc_khz; | ||
96 | EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz); | ||
97 | |||
99 | #define KVM_NR_SHARED_MSRS 16 | 98 | #define KVM_NR_SHARED_MSRS 16 |
100 | 99 | ||
101 | struct kvm_shared_msrs_global { | 100 | struct kvm_shared_msrs_global { |
@@ -153,9 +152,13 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
153 | 152 | ||
154 | u64 __read_mostly host_xcr0; | 153 | u64 __read_mostly host_xcr0; |
155 | 154 | ||
156 | static inline u32 bit(int bitno) | 155 | int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); |
156 | |||
157 | static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) | ||
157 | { | 158 | { |
158 | return 1 << (bitno & 31); | 159 | int i; |
160 | for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++) | ||
161 | vcpu->arch.apf.gfns[i] = ~0; | ||
159 | } | 162 | } |
160 | 163 | ||
161 | static void kvm_on_user_return(struct user_return_notifier *urn) | 164 | static void kvm_on_user_return(struct user_return_notifier *urn) |
@@ -282,6 +285,8 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, | |||
282 | u32 prev_nr; | 285 | u32 prev_nr; |
283 | int class1, class2; | 286 | int class1, class2; |
284 | 287 | ||
288 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
289 | |||
285 | if (!vcpu->arch.exception.pending) { | 290 | if (!vcpu->arch.exception.pending) { |
286 | queue: | 291 | queue: |
287 | vcpu->arch.exception.pending = true; | 292 | vcpu->arch.exception.pending = true; |
@@ -327,16 +332,33 @@ void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr) | |||
327 | } | 332 | } |
328 | EXPORT_SYMBOL_GPL(kvm_requeue_exception); | 333 | EXPORT_SYMBOL_GPL(kvm_requeue_exception); |
329 | 334 | ||
330 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, | 335 | void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err) |
331 | u32 error_code) | 336 | { |
337 | if (err) | ||
338 | kvm_inject_gp(vcpu, 0); | ||
339 | else | ||
340 | kvm_x86_ops->skip_emulated_instruction(vcpu); | ||
341 | } | ||
342 | EXPORT_SYMBOL_GPL(kvm_complete_insn_gp); | ||
343 | |||
344 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) | ||
332 | { | 345 | { |
333 | ++vcpu->stat.pf_guest; | 346 | ++vcpu->stat.pf_guest; |
334 | vcpu->arch.cr2 = addr; | 347 | vcpu->arch.cr2 = fault->address; |
335 | kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); | 348 | kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code); |
349 | } | ||
350 | |||
351 | void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) | ||
352 | { | ||
353 | if (mmu_is_nested(vcpu) && !fault->nested_page_fault) | ||
354 | vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault); | ||
355 | else | ||
356 | vcpu->arch.mmu.inject_page_fault(vcpu, fault); | ||
336 | } | 357 | } |
337 | 358 | ||
338 | void kvm_inject_nmi(struct kvm_vcpu *vcpu) | 359 | void kvm_inject_nmi(struct kvm_vcpu *vcpu) |
339 | { | 360 | { |
361 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
340 | vcpu->arch.nmi_pending = 1; | 362 | vcpu->arch.nmi_pending = 1; |
341 | } | 363 | } |
342 | EXPORT_SYMBOL_GPL(kvm_inject_nmi); | 364 | EXPORT_SYMBOL_GPL(kvm_inject_nmi); |
@@ -367,18 +389,49 @@ bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl) | |||
367 | EXPORT_SYMBOL_GPL(kvm_require_cpl); | 389 | EXPORT_SYMBOL_GPL(kvm_require_cpl); |
368 | 390 | ||
369 | /* | 391 | /* |
392 | * This function will be used to read from the physical memory of the currently | ||
393 | * running guest. The difference to kvm_read_guest_page is that this function | ||
394 | * can read from guest physical or from the guest's guest physical memory. | ||
395 | */ | ||
396 | int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, | ||
397 | gfn_t ngfn, void *data, int offset, int len, | ||
398 | u32 access) | ||
399 | { | ||
400 | gfn_t real_gfn; | ||
401 | gpa_t ngpa; | ||
402 | |||
403 | ngpa = gfn_to_gpa(ngfn); | ||
404 | real_gfn = mmu->translate_gpa(vcpu, ngpa, access); | ||
405 | if (real_gfn == UNMAPPED_GVA) | ||
406 | return -EFAULT; | ||
407 | |||
408 | real_gfn = gpa_to_gfn(real_gfn); | ||
409 | |||
410 | return kvm_read_guest_page(vcpu->kvm, real_gfn, data, offset, len); | ||
411 | } | ||
412 | EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu); | ||
413 | |||
414 | int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, | ||
415 | void *data, int offset, int len, u32 access) | ||
416 | { | ||
417 | return kvm_read_guest_page_mmu(vcpu, vcpu->arch.walk_mmu, gfn, | ||
418 | data, offset, len, access); | ||
419 | } | ||
420 | |||
421 | /* | ||
370 | * Load the pae pdptrs. Return true is they are all valid. | 422 | * Load the pae pdptrs. Return true is they are all valid. |
371 | */ | 423 | */ |
372 | int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) | 424 | int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3) |
373 | { | 425 | { |
374 | gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT; | 426 | gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT; |
375 | unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2; | 427 | unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2; |
376 | int i; | 428 | int i; |
377 | int ret; | 429 | int ret; |
378 | u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)]; | 430 | u64 pdpte[ARRAY_SIZE(mmu->pdptrs)]; |
379 | 431 | ||
380 | ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte, | 432 | ret = kvm_read_guest_page_mmu(vcpu, mmu, pdpt_gfn, pdpte, |
381 | offset * sizeof(u64), sizeof(pdpte)); | 433 | offset * sizeof(u64), sizeof(pdpte), |
434 | PFERR_USER_MASK|PFERR_WRITE_MASK); | ||
382 | if (ret < 0) { | 435 | if (ret < 0) { |
383 | ret = 0; | 436 | ret = 0; |
384 | goto out; | 437 | goto out; |
@@ -392,7 +445,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
392 | } | 445 | } |
393 | ret = 1; | 446 | ret = 1; |
394 | 447 | ||
395 | memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs)); | 448 | memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs)); |
396 | __set_bit(VCPU_EXREG_PDPTR, | 449 | __set_bit(VCPU_EXREG_PDPTR, |
397 | (unsigned long *)&vcpu->arch.regs_avail); | 450 | (unsigned long *)&vcpu->arch.regs_avail); |
398 | __set_bit(VCPU_EXREG_PDPTR, | 451 | __set_bit(VCPU_EXREG_PDPTR, |
@@ -405,8 +458,10 @@ EXPORT_SYMBOL_GPL(load_pdptrs); | |||
405 | 458 | ||
406 | static bool pdptrs_changed(struct kvm_vcpu *vcpu) | 459 | static bool pdptrs_changed(struct kvm_vcpu *vcpu) |
407 | { | 460 | { |
408 | u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)]; | 461 | u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)]; |
409 | bool changed = true; | 462 | bool changed = true; |
463 | int offset; | ||
464 | gfn_t gfn; | ||
410 | int r; | 465 | int r; |
411 | 466 | ||
412 | if (is_long_mode(vcpu) || !is_pae(vcpu)) | 467 | if (is_long_mode(vcpu) || !is_pae(vcpu)) |
@@ -416,10 +471,13 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu) | |||
416 | (unsigned long *)&vcpu->arch.regs_avail)) | 471 | (unsigned long *)&vcpu->arch.regs_avail)) |
417 | return true; | 472 | return true; |
418 | 473 | ||
419 | r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte)); | 474 | gfn = (kvm_read_cr3(vcpu) & ~31u) >> PAGE_SHIFT; |
475 | offset = (kvm_read_cr3(vcpu) & ~31u) & (PAGE_SIZE - 1); | ||
476 | r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte), | ||
477 | PFERR_USER_MASK | PFERR_WRITE_MASK); | ||
420 | if (r < 0) | 478 | if (r < 0) |
421 | goto out; | 479 | goto out; |
422 | changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0; | 480 | changed = memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0; |
423 | out: | 481 | out: |
424 | 482 | ||
425 | return changed; | 483 | return changed; |
@@ -458,12 +516,18 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
458 | return 1; | 516 | return 1; |
459 | } else | 517 | } else |
460 | #endif | 518 | #endif |
461 | if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) | 519 | if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, |
520 | kvm_read_cr3(vcpu))) | ||
462 | return 1; | 521 | return 1; |
463 | } | 522 | } |
464 | 523 | ||
465 | kvm_x86_ops->set_cr0(vcpu, cr0); | 524 | kvm_x86_ops->set_cr0(vcpu, cr0); |
466 | 525 | ||
526 | if ((cr0 ^ old_cr0) & X86_CR0_PG) { | ||
527 | kvm_clear_async_pf_completion_queue(vcpu); | ||
528 | kvm_async_pf_hash_reset(vcpu); | ||
529 | } | ||
530 | |||
467 | if ((cr0 ^ old_cr0) & update_bits) | 531 | if ((cr0 ^ old_cr0) & update_bits) |
468 | kvm_mmu_reset_context(vcpu); | 532 | kvm_mmu_reset_context(vcpu); |
469 | return 0; | 533 | return 0; |
@@ -547,7 +611,8 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
547 | return 1; | 611 | return 1; |
548 | } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) | 612 | } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) |
549 | && ((cr4 ^ old_cr4) & pdptr_bits) | 613 | && ((cr4 ^ old_cr4) & pdptr_bits) |
550 | && !load_pdptrs(vcpu, vcpu->arch.cr3)) | 614 | && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, |
615 | kvm_read_cr3(vcpu))) | ||
551 | return 1; | 616 | return 1; |
552 | 617 | ||
553 | if (cr4 & X86_CR4_VMXE) | 618 | if (cr4 & X86_CR4_VMXE) |
@@ -567,7 +632,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4); | |||
567 | 632 | ||
568 | int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | 633 | int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) |
569 | { | 634 | { |
570 | if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) { | 635 | if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) { |
571 | kvm_mmu_sync_roots(vcpu); | 636 | kvm_mmu_sync_roots(vcpu); |
572 | kvm_mmu_flush_tlb(vcpu); | 637 | kvm_mmu_flush_tlb(vcpu); |
573 | return 0; | 638 | return 0; |
@@ -580,7 +645,8 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
580 | if (is_pae(vcpu)) { | 645 | if (is_pae(vcpu)) { |
581 | if (cr3 & CR3_PAE_RESERVED_BITS) | 646 | if (cr3 & CR3_PAE_RESERVED_BITS) |
582 | return 1; | 647 | return 1; |
583 | if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) | 648 | if (is_paging(vcpu) && |
649 | !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) | ||
584 | return 1; | 650 | return 1; |
585 | } | 651 | } |
586 | /* | 652 | /* |
@@ -601,12 +667,13 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
601 | if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) | 667 | if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) |
602 | return 1; | 668 | return 1; |
603 | vcpu->arch.cr3 = cr3; | 669 | vcpu->arch.cr3 = cr3; |
670 | __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); | ||
604 | vcpu->arch.mmu.new_cr3(vcpu); | 671 | vcpu->arch.mmu.new_cr3(vcpu); |
605 | return 0; | 672 | return 0; |
606 | } | 673 | } |
607 | EXPORT_SYMBOL_GPL(kvm_set_cr3); | 674 | EXPORT_SYMBOL_GPL(kvm_set_cr3); |
608 | 675 | ||
609 | int __kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) | 676 | int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) |
610 | { | 677 | { |
611 | if (cr8 & CR8_RESERVED_BITS) | 678 | if (cr8 & CR8_RESERVED_BITS) |
612 | return 1; | 679 | return 1; |
@@ -616,12 +683,6 @@ int __kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) | |||
616 | vcpu->arch.cr8 = cr8; | 683 | vcpu->arch.cr8 = cr8; |
617 | return 0; | 684 | return 0; |
618 | } | 685 | } |
619 | |||
620 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) | ||
621 | { | ||
622 | if (__kvm_set_cr8(vcpu, cr8)) | ||
623 | kvm_inject_gp(vcpu, 0); | ||
624 | } | ||
625 | EXPORT_SYMBOL_GPL(kvm_set_cr8); | 686 | EXPORT_SYMBOL_GPL(kvm_set_cr8); |
626 | 687 | ||
627 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) | 688 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) |
@@ -726,18 +787,18 @@ EXPORT_SYMBOL_GPL(kvm_get_dr); | |||
726 | * kvm-specific. Those are put in the beginning of the list. | 787 | * kvm-specific. Those are put in the beginning of the list. |
727 | */ | 788 | */ |
728 | 789 | ||
729 | #define KVM_SAVE_MSRS_BEGIN 7 | 790 | #define KVM_SAVE_MSRS_BEGIN 8 |
730 | static u32 msrs_to_save[] = { | 791 | static u32 msrs_to_save[] = { |
731 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, | 792 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, |
732 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, | 793 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, |
733 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, | 794 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, |
734 | HV_X64_MSR_APIC_ASSIST_PAGE, | 795 | HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, |
735 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, | 796 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, |
736 | MSR_STAR, | 797 | MSR_STAR, |
737 | #ifdef CONFIG_X86_64 | 798 | #ifdef CONFIG_X86_64 |
738 | MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, | 799 | MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, |
739 | #endif | 800 | #endif |
740 | MSR_IA32_TSC, MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA | 801 | MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA |
741 | }; | 802 | }; |
742 | 803 | ||
743 | static unsigned num_msrs_to_save; | 804 | static unsigned num_msrs_to_save; |
@@ -781,7 +842,6 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
781 | kvm_x86_ops->set_efer(vcpu, efer); | 842 | kvm_x86_ops->set_efer(vcpu, efer); |
782 | 843 | ||
783 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; | 844 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; |
784 | kvm_mmu_reset_context(vcpu); | ||
785 | 845 | ||
786 | /* Update reserved bits */ | 846 | /* Update reserved bits */ |
787 | if ((efer ^ old_efer) & EFER_NX) | 847 | if ((efer ^ old_efer) & EFER_NX) |
@@ -838,7 +898,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) | |||
838 | 898 | ||
839 | /* | 899 | /* |
840 | * The guest calculates current wall clock time by adding | 900 | * The guest calculates current wall clock time by adding |
841 | * system time (updated by kvm_write_guest_time below) to the | 901 | * system time (updated by kvm_guest_time_update below) to the |
842 | * wall clock specified here. guest system time equals host | 902 | * wall clock specified here. guest system time equals host |
843 | * system time for us, thus we must fill in host boot time here. | 903 | * system time for us, thus we must fill in host boot time here. |
844 | */ | 904 | */ |
@@ -866,65 +926,235 @@ static uint32_t div_frac(uint32_t dividend, uint32_t divisor) | |||
866 | return quotient; | 926 | return quotient; |
867 | } | 927 | } |
868 | 928 | ||
869 | static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *hv_clock) | 929 | static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz, |
930 | s8 *pshift, u32 *pmultiplier) | ||
870 | { | 931 | { |
871 | uint64_t nsecs = 1000000000LL; | 932 | uint64_t scaled64; |
872 | int32_t shift = 0; | 933 | int32_t shift = 0; |
873 | uint64_t tps64; | 934 | uint64_t tps64; |
874 | uint32_t tps32; | 935 | uint32_t tps32; |
875 | 936 | ||
876 | tps64 = tsc_khz * 1000LL; | 937 | tps64 = base_khz * 1000LL; |
877 | while (tps64 > nsecs*2) { | 938 | scaled64 = scaled_khz * 1000LL; |
939 | while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) { | ||
878 | tps64 >>= 1; | 940 | tps64 >>= 1; |
879 | shift--; | 941 | shift--; |
880 | } | 942 | } |
881 | 943 | ||
882 | tps32 = (uint32_t)tps64; | 944 | tps32 = (uint32_t)tps64; |
883 | while (tps32 <= (uint32_t)nsecs) { | 945 | while (tps32 <= scaled64 || scaled64 & 0xffffffff00000000ULL) { |
884 | tps32 <<= 1; | 946 | if (scaled64 & 0xffffffff00000000ULL || tps32 & 0x80000000) |
947 | scaled64 >>= 1; | ||
948 | else | ||
949 | tps32 <<= 1; | ||
885 | shift++; | 950 | shift++; |
886 | } | 951 | } |
887 | 952 | ||
888 | hv_clock->tsc_shift = shift; | 953 | *pshift = shift; |
889 | hv_clock->tsc_to_system_mul = div_frac(nsecs, tps32); | 954 | *pmultiplier = div_frac(scaled64, tps32); |
890 | 955 | ||
891 | pr_debug("%s: tsc_khz %u, tsc_shift %d, tsc_mul %u\n", | 956 | pr_debug("%s: base_khz %u => %u, shift %d, mul %u\n", |
892 | __func__, tsc_khz, hv_clock->tsc_shift, | 957 | __func__, base_khz, scaled_khz, shift, *pmultiplier); |
893 | hv_clock->tsc_to_system_mul); | 958 | } |
959 | |||
960 | static inline u64 get_kernel_ns(void) | ||
961 | { | ||
962 | struct timespec ts; | ||
963 | |||
964 | WARN_ON(preemptible()); | ||
965 | ktime_get_ts(&ts); | ||
966 | monotonic_to_bootbased(&ts); | ||
967 | return timespec_to_ns(&ts); | ||
894 | } | 968 | } |
895 | 969 | ||
896 | static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); | 970 | static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); |
971 | unsigned long max_tsc_khz; | ||
897 | 972 | ||
898 | static void kvm_write_guest_time(struct kvm_vcpu *v) | 973 | static inline int kvm_tsc_changes_freq(void) |
974 | { | ||
975 | int cpu = get_cpu(); | ||
976 | int ret = !boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && | ||
977 | cpufreq_quick_get(cpu) != 0; | ||
978 | put_cpu(); | ||
979 | return ret; | ||
980 | } | ||
981 | |||
982 | static u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu) | ||
983 | { | ||
984 | if (vcpu->arch.virtual_tsc_khz) | ||
985 | return vcpu->arch.virtual_tsc_khz; | ||
986 | else | ||
987 | return __this_cpu_read(cpu_tsc_khz); | ||
988 | } | ||
989 | |||
990 | static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) | ||
991 | { | ||
992 | u64 ret; | ||
993 | |||
994 | WARN_ON(preemptible()); | ||
995 | if (kvm_tsc_changes_freq()) | ||
996 | printk_once(KERN_WARNING | ||
997 | "kvm: unreliable cycle conversion on adjustable rate TSC\n"); | ||
998 | ret = nsec * vcpu_tsc_khz(vcpu); | ||
999 | do_div(ret, USEC_PER_SEC); | ||
1000 | return ret; | ||
1001 | } | ||
1002 | |||
1003 | static void kvm_init_tsc_catchup(struct kvm_vcpu *vcpu, u32 this_tsc_khz) | ||
1004 | { | ||
1005 | /* Compute a scale to convert nanoseconds in TSC cycles */ | ||
1006 | kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, | ||
1007 | &vcpu->arch.tsc_catchup_shift, | ||
1008 | &vcpu->arch.tsc_catchup_mult); | ||
1009 | } | ||
1010 | |||
1011 | static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) | ||
1012 | { | ||
1013 | u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec, | ||
1014 | vcpu->arch.tsc_catchup_mult, | ||
1015 | vcpu->arch.tsc_catchup_shift); | ||
1016 | tsc += vcpu->arch.last_tsc_write; | ||
1017 | return tsc; | ||
1018 | } | ||
1019 | |||
1020 | void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) | ||
1021 | { | ||
1022 | struct kvm *kvm = vcpu->kvm; | ||
1023 | u64 offset, ns, elapsed; | ||
1024 | unsigned long flags; | ||
1025 | s64 sdiff; | ||
1026 | |||
1027 | raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); | ||
1028 | offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); | ||
1029 | ns = get_kernel_ns(); | ||
1030 | elapsed = ns - kvm->arch.last_tsc_nsec; | ||
1031 | sdiff = data - kvm->arch.last_tsc_write; | ||
1032 | if (sdiff < 0) | ||
1033 | sdiff = -sdiff; | ||
1034 | |||
1035 | /* | ||
1036 | * Special case: close write to TSC within 5 seconds of | ||
1037 | * another CPU is interpreted as an attempt to synchronize | ||
1038 | * The 5 seconds is to accommodate host load / swapping as | ||
1039 | * well as any reset of TSC during the boot process. | ||
1040 | * | ||
1041 | * In that case, for a reliable TSC, we can match TSC offsets, | ||
1042 | * or make a best guest using elapsed value. | ||
1043 | */ | ||
1044 | if (sdiff < nsec_to_cycles(vcpu, 5ULL * NSEC_PER_SEC) && | ||
1045 | elapsed < 5ULL * NSEC_PER_SEC) { | ||
1046 | if (!check_tsc_unstable()) { | ||
1047 | offset = kvm->arch.last_tsc_offset; | ||
1048 | pr_debug("kvm: matched tsc offset for %llu\n", data); | ||
1049 | } else { | ||
1050 | u64 delta = nsec_to_cycles(vcpu, elapsed); | ||
1051 | offset += delta; | ||
1052 | pr_debug("kvm: adjusted tsc offset by %llu\n", delta); | ||
1053 | } | ||
1054 | ns = kvm->arch.last_tsc_nsec; | ||
1055 | } | ||
1056 | kvm->arch.last_tsc_nsec = ns; | ||
1057 | kvm->arch.last_tsc_write = data; | ||
1058 | kvm->arch.last_tsc_offset = offset; | ||
1059 | kvm_x86_ops->write_tsc_offset(vcpu, offset); | ||
1060 | raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); | ||
1061 | |||
1062 | /* Reset of TSC must disable overshoot protection below */ | ||
1063 | vcpu->arch.hv_clock.tsc_timestamp = 0; | ||
1064 | vcpu->arch.last_tsc_write = data; | ||
1065 | vcpu->arch.last_tsc_nsec = ns; | ||
1066 | } | ||
1067 | EXPORT_SYMBOL_GPL(kvm_write_tsc); | ||
1068 | |||
1069 | static int kvm_guest_time_update(struct kvm_vcpu *v) | ||
899 | { | 1070 | { |
900 | struct timespec ts; | ||
901 | unsigned long flags; | 1071 | unsigned long flags; |
902 | struct kvm_vcpu_arch *vcpu = &v->arch; | 1072 | struct kvm_vcpu_arch *vcpu = &v->arch; |
903 | void *shared_kaddr; | 1073 | void *shared_kaddr; |
904 | unsigned long this_tsc_khz; | 1074 | unsigned long this_tsc_khz; |
1075 | s64 kernel_ns, max_kernel_ns; | ||
1076 | u64 tsc_timestamp; | ||
905 | 1077 | ||
906 | if ((!vcpu->time_page)) | 1078 | /* Keep irq disabled to prevent changes to the clock */ |
907 | return; | 1079 | local_irq_save(flags); |
1080 | kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp); | ||
1081 | kernel_ns = get_kernel_ns(); | ||
1082 | this_tsc_khz = vcpu_tsc_khz(v); | ||
1083 | if (unlikely(this_tsc_khz == 0)) { | ||
1084 | local_irq_restore(flags); | ||
1085 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); | ||
1086 | return 1; | ||
1087 | } | ||
908 | 1088 | ||
909 | this_tsc_khz = get_cpu_var(cpu_tsc_khz); | 1089 | /* |
910 | if (unlikely(vcpu->hv_clock_tsc_khz != this_tsc_khz)) { | 1090 | * We may have to catch up the TSC to match elapsed wall clock |
911 | kvm_set_time_scale(this_tsc_khz, &vcpu->hv_clock); | 1091 | * time for two reasons, even if kvmclock is used. |
912 | vcpu->hv_clock_tsc_khz = this_tsc_khz; | 1092 | * 1) CPU could have been running below the maximum TSC rate |
1093 | * 2) Broken TSC compensation resets the base at each VCPU | ||
1094 | * entry to avoid unknown leaps of TSC even when running | ||
1095 | * again on the same CPU. This may cause apparent elapsed | ||
1096 | * time to disappear, and the guest to stand still or run | ||
1097 | * very slowly. | ||
1098 | */ | ||
1099 | if (vcpu->tsc_catchup) { | ||
1100 | u64 tsc = compute_guest_tsc(v, kernel_ns); | ||
1101 | if (tsc > tsc_timestamp) { | ||
1102 | kvm_x86_ops->adjust_tsc_offset(v, tsc - tsc_timestamp); | ||
1103 | tsc_timestamp = tsc; | ||
1104 | } | ||
913 | } | 1105 | } |
914 | put_cpu_var(cpu_tsc_khz); | ||
915 | 1106 | ||
916 | /* Keep irq disabled to prevent changes to the clock */ | ||
917 | local_irq_save(flags); | ||
918 | kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp); | ||
919 | ktime_get_ts(&ts); | ||
920 | monotonic_to_bootbased(&ts); | ||
921 | local_irq_restore(flags); | 1107 | local_irq_restore(flags); |
922 | 1108 | ||
923 | /* With all the info we got, fill in the values */ | 1109 | if (!vcpu->time_page) |
1110 | return 0; | ||
924 | 1111 | ||
925 | vcpu->hv_clock.system_time = ts.tv_nsec + | 1112 | /* |
926 | (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset; | 1113 | * Time as measured by the TSC may go backwards when resetting the base |
1114 | * tsc_timestamp. The reason for this is that the TSC resolution is | ||
1115 | * higher than the resolution of the other clock scales. Thus, many | ||
1116 | * possible measurments of the TSC correspond to one measurement of any | ||
1117 | * other clock, and so a spread of values is possible. This is not a | ||
1118 | * problem for the computation of the nanosecond clock; with TSC rates | ||
1119 | * around 1GHZ, there can only be a few cycles which correspond to one | ||
1120 | * nanosecond value, and any path through this code will inevitably | ||
1121 | * take longer than that. However, with the kernel_ns value itself, | ||
1122 | * the precision may be much lower, down to HZ granularity. If the | ||
1123 | * first sampling of TSC against kernel_ns ends in the low part of the | ||
1124 | * range, and the second in the high end of the range, we can get: | ||
1125 | * | ||
1126 | * (TSC - offset_low) * S + kns_old > (TSC - offset_high) * S + kns_new | ||
1127 | * | ||
1128 | * As the sampling errors potentially range in the thousands of cycles, | ||
1129 | * it is possible such a time value has already been observed by the | ||
1130 | * guest. To protect against this, we must compute the system time as | ||
1131 | * observed by the guest and ensure the new system time is greater. | ||
1132 | */ | ||
1133 | max_kernel_ns = 0; | ||
1134 | if (vcpu->hv_clock.tsc_timestamp && vcpu->last_guest_tsc) { | ||
1135 | max_kernel_ns = vcpu->last_guest_tsc - | ||
1136 | vcpu->hv_clock.tsc_timestamp; | ||
1137 | max_kernel_ns = pvclock_scale_delta(max_kernel_ns, | ||
1138 | vcpu->hv_clock.tsc_to_system_mul, | ||
1139 | vcpu->hv_clock.tsc_shift); | ||
1140 | max_kernel_ns += vcpu->last_kernel_ns; | ||
1141 | } | ||
927 | 1142 | ||
1143 | if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) { | ||
1144 | kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz, | ||
1145 | &vcpu->hv_clock.tsc_shift, | ||
1146 | &vcpu->hv_clock.tsc_to_system_mul); | ||
1147 | vcpu->hw_tsc_khz = this_tsc_khz; | ||
1148 | } | ||
1149 | |||
1150 | if (max_kernel_ns > kernel_ns) | ||
1151 | kernel_ns = max_kernel_ns; | ||
1152 | |||
1153 | /* With all the info we got, fill in the values */ | ||
1154 | vcpu->hv_clock.tsc_timestamp = tsc_timestamp; | ||
1155 | vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; | ||
1156 | vcpu->last_kernel_ns = kernel_ns; | ||
1157 | vcpu->last_guest_tsc = tsc_timestamp; | ||
928 | vcpu->hv_clock.flags = 0; | 1158 | vcpu->hv_clock.flags = 0; |
929 | 1159 | ||
930 | /* | 1160 | /* |
@@ -942,16 +1172,7 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) | |||
942 | kunmap_atomic(shared_kaddr, KM_USER0); | 1172 | kunmap_atomic(shared_kaddr, KM_USER0); |
943 | 1173 | ||
944 | mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); | 1174 | mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); |
945 | } | 1175 | return 0; |
946 | |||
947 | static int kvm_request_guest_time_update(struct kvm_vcpu *v) | ||
948 | { | ||
949 | struct kvm_vcpu_arch *vcpu = &v->arch; | ||
950 | |||
951 | if (!vcpu->time_page) | ||
952 | return 0; | ||
953 | kvm_make_request(KVM_REQ_KVMCLOCK_UPDATE, v); | ||
954 | return 1; | ||
955 | } | 1176 | } |
956 | 1177 | ||
957 | static bool msr_mtrr_valid(unsigned msr) | 1178 | static bool msr_mtrr_valid(unsigned msr) |
@@ -1214,6 +1435,38 @@ static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1214 | return 0; | 1435 | return 0; |
1215 | } | 1436 | } |
1216 | 1437 | ||
1438 | static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) | ||
1439 | { | ||
1440 | gpa_t gpa = data & ~0x3f; | ||
1441 | |||
1442 | /* Bits 2:5 are resrved, Should be zero */ | ||
1443 | if (data & 0x3c) | ||
1444 | return 1; | ||
1445 | |||
1446 | vcpu->arch.apf.msr_val = data; | ||
1447 | |||
1448 | if (!(data & KVM_ASYNC_PF_ENABLED)) { | ||
1449 | kvm_clear_async_pf_completion_queue(vcpu); | ||
1450 | kvm_async_pf_hash_reset(vcpu); | ||
1451 | return 0; | ||
1452 | } | ||
1453 | |||
1454 | if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa)) | ||
1455 | return 1; | ||
1456 | |||
1457 | vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS); | ||
1458 | kvm_async_pf_wakeup_all(vcpu); | ||
1459 | return 0; | ||
1460 | } | ||
1461 | |||
1462 | static void kvmclock_reset(struct kvm_vcpu *vcpu) | ||
1463 | { | ||
1464 | if (vcpu->arch.time_page) { | ||
1465 | kvm_release_page_dirty(vcpu->arch.time_page); | ||
1466 | vcpu->arch.time_page = NULL; | ||
1467 | } | ||
1468 | } | ||
1469 | |||
1217 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | 1470 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) |
1218 | { | 1471 | { |
1219 | switch (msr) { | 1472 | switch (msr) { |
@@ -1271,12 +1524,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1271 | break; | 1524 | break; |
1272 | case MSR_KVM_SYSTEM_TIME_NEW: | 1525 | case MSR_KVM_SYSTEM_TIME_NEW: |
1273 | case MSR_KVM_SYSTEM_TIME: { | 1526 | case MSR_KVM_SYSTEM_TIME: { |
1274 | if (vcpu->arch.time_page) { | 1527 | kvmclock_reset(vcpu); |
1275 | kvm_release_page_dirty(vcpu->arch.time_page); | ||
1276 | vcpu->arch.time_page = NULL; | ||
1277 | } | ||
1278 | 1528 | ||
1279 | vcpu->arch.time = data; | 1529 | vcpu->arch.time = data; |
1530 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); | ||
1280 | 1531 | ||
1281 | /* we verify if the enable bit is set... */ | 1532 | /* we verify if the enable bit is set... */ |
1282 | if (!(data & 1)) | 1533 | if (!(data & 1)) |
@@ -1292,10 +1543,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1292 | kvm_release_page_clean(vcpu->arch.time_page); | 1543 | kvm_release_page_clean(vcpu->arch.time_page); |
1293 | vcpu->arch.time_page = NULL; | 1544 | vcpu->arch.time_page = NULL; |
1294 | } | 1545 | } |
1295 | |||
1296 | kvm_request_guest_time_update(vcpu); | ||
1297 | break; | 1546 | break; |
1298 | } | 1547 | } |
1548 | case MSR_KVM_ASYNC_PF_EN: | ||
1549 | if (kvm_pv_enable_async_pf(vcpu, data)) | ||
1550 | return 1; | ||
1551 | break; | ||
1299 | case MSR_IA32_MCG_CTL: | 1552 | case MSR_IA32_MCG_CTL: |
1300 | case MSR_IA32_MCG_STATUS: | 1553 | case MSR_IA32_MCG_STATUS: |
1301 | case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: | 1554 | case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: |
@@ -1330,6 +1583,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1330 | pr_unimpl(vcpu, "unimplemented perfctr wrmsr: " | 1583 | pr_unimpl(vcpu, "unimplemented perfctr wrmsr: " |
1331 | "0x%x data 0x%llx\n", msr, data); | 1584 | "0x%x data 0x%llx\n", msr, data); |
1332 | break; | 1585 | break; |
1586 | case MSR_K7_CLK_CTL: | ||
1587 | /* | ||
1588 | * Ignore all writes to this no longer documented MSR. | ||
1589 | * Writes are only relevant for old K7 processors, | ||
1590 | * all pre-dating SVM, but a recommended workaround from | ||
1591 | * AMD for these chips. It is possible to speicify the | ||
1592 | * affected processor models on the command line, hence | ||
1593 | * the need to ignore the workaround. | ||
1594 | */ | ||
1595 | break; | ||
1333 | case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: | 1596 | case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: |
1334 | if (kvm_hv_msr_partition_wide(msr)) { | 1597 | if (kvm_hv_msr_partition_wide(msr)) { |
1335 | int r; | 1598 | int r; |
@@ -1340,6 +1603,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1340 | } else | 1603 | } else |
1341 | return set_msr_hyperv(vcpu, msr, data); | 1604 | return set_msr_hyperv(vcpu, msr, data); |
1342 | break; | 1605 | break; |
1606 | case MSR_IA32_BBL_CR_CTL3: | ||
1607 | /* Drop writes to this legacy MSR -- see rdmsr | ||
1608 | * counterpart for further detail. | ||
1609 | */ | ||
1610 | pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data); | ||
1611 | break; | ||
1343 | default: | 1612 | default: |
1344 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) | 1613 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) |
1345 | return xen_hvm_config(vcpu, data); | 1614 | return xen_hvm_config(vcpu, data); |
@@ -1522,6 +1791,20 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1522 | case 0xcd: /* fsb frequency */ | 1791 | case 0xcd: /* fsb frequency */ |
1523 | data = 3; | 1792 | data = 3; |
1524 | break; | 1793 | break; |
1794 | /* | ||
1795 | * MSR_EBC_FREQUENCY_ID | ||
1796 | * Conservative value valid for even the basic CPU models. | ||
1797 | * Models 0,1: 000 in bits 23:21 indicating a bus speed of | ||
1798 | * 100MHz, model 2 000 in bits 18:16 indicating 100MHz, | ||
1799 | * and 266MHz for model 3, or 4. Set Core Clock | ||
1800 | * Frequency to System Bus Frequency Ratio to 1 (bits | ||
1801 | * 31:24) even though these are only valid for CPU | ||
1802 | * models > 2, however guests may end up dividing or | ||
1803 | * multiplying by zero otherwise. | ||
1804 | */ | ||
1805 | case MSR_EBC_FREQUENCY_ID: | ||
1806 | data = 1 << 24; | ||
1807 | break; | ||
1525 | case MSR_IA32_APICBASE: | 1808 | case MSR_IA32_APICBASE: |
1526 | data = kvm_get_apic_base(vcpu); | 1809 | data = kvm_get_apic_base(vcpu); |
1527 | break; | 1810 | break; |
@@ -1548,6 +1831,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1548 | case MSR_KVM_SYSTEM_TIME_NEW: | 1831 | case MSR_KVM_SYSTEM_TIME_NEW: |
1549 | data = vcpu->arch.time; | 1832 | data = vcpu->arch.time; |
1550 | break; | 1833 | break; |
1834 | case MSR_KVM_ASYNC_PF_EN: | ||
1835 | data = vcpu->arch.apf.msr_val; | ||
1836 | break; | ||
1551 | case MSR_IA32_P5_MC_ADDR: | 1837 | case MSR_IA32_P5_MC_ADDR: |
1552 | case MSR_IA32_P5_MC_TYPE: | 1838 | case MSR_IA32_P5_MC_TYPE: |
1553 | case MSR_IA32_MCG_CAP: | 1839 | case MSR_IA32_MCG_CAP: |
@@ -1555,6 +1841,18 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1555 | case MSR_IA32_MCG_STATUS: | 1841 | case MSR_IA32_MCG_STATUS: |
1556 | case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: | 1842 | case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: |
1557 | return get_msr_mce(vcpu, msr, pdata); | 1843 | return get_msr_mce(vcpu, msr, pdata); |
1844 | case MSR_K7_CLK_CTL: | ||
1845 | /* | ||
1846 | * Provide expected ramp-up count for K7. All other | ||
1847 | * are set to zero, indicating minimum divisors for | ||
1848 | * every field. | ||
1849 | * | ||
1850 | * This prevents guest kernels on AMD host with CPU | ||
1851 | * type 6, model 8 and higher from exploding due to | ||
1852 | * the rdmsr failing. | ||
1853 | */ | ||
1854 | data = 0x20000000; | ||
1855 | break; | ||
1558 | case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: | 1856 | case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: |
1559 | if (kvm_hv_msr_partition_wide(msr)) { | 1857 | if (kvm_hv_msr_partition_wide(msr)) { |
1560 | int r; | 1858 | int r; |
@@ -1565,6 +1863,19 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1565 | } else | 1863 | } else |
1566 | return get_msr_hyperv(vcpu, msr, pdata); | 1864 | return get_msr_hyperv(vcpu, msr, pdata); |
1567 | break; | 1865 | break; |
1866 | case MSR_IA32_BBL_CR_CTL3: | ||
1867 | /* This legacy MSR exists but isn't fully documented in current | ||
1868 | * silicon. It is however accessed by winxp in very narrow | ||
1869 | * scenarios where it sets bit #19, itself documented as | ||
1870 | * a "reserved" bit. Best effort attempt to source coherent | ||
1871 | * read data here should the balance of the register be | ||
1872 | * interpreted by the guest: | ||
1873 | * | ||
1874 | * L2 cache control register 3: 64GB range, 256KB size, | ||
1875 | * enabled, latency 0x1, configured | ||
1876 | */ | ||
1877 | data = 0xbe702111; | ||
1878 | break; | ||
1568 | default: | 1879 | default: |
1569 | if (!ignore_msrs) { | 1880 | if (!ignore_msrs) { |
1570 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); | 1881 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); |
@@ -1665,6 +1976,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1665 | case KVM_CAP_NOP_IO_DELAY: | 1976 | case KVM_CAP_NOP_IO_DELAY: |
1666 | case KVM_CAP_MP_STATE: | 1977 | case KVM_CAP_MP_STATE: |
1667 | case KVM_CAP_SYNC_MMU: | 1978 | case KVM_CAP_SYNC_MMU: |
1979 | case KVM_CAP_USER_NMI: | ||
1668 | case KVM_CAP_REINJECT_CONTROL: | 1980 | case KVM_CAP_REINJECT_CONTROL: |
1669 | case KVM_CAP_IRQ_INJECT_STATUS: | 1981 | case KVM_CAP_IRQ_INJECT_STATUS: |
1670 | case KVM_CAP_ASSIGN_DEV_IRQ: | 1982 | case KVM_CAP_ASSIGN_DEV_IRQ: |
@@ -1683,6 +1995,8 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1683 | case KVM_CAP_DEBUGREGS: | 1995 | case KVM_CAP_DEBUGREGS: |
1684 | case KVM_CAP_X86_ROBUST_SINGLESTEP: | 1996 | case KVM_CAP_X86_ROBUST_SINGLESTEP: |
1685 | case KVM_CAP_XSAVE: | 1997 | case KVM_CAP_XSAVE: |
1998 | case KVM_CAP_ASYNC_PF: | ||
1999 | case KVM_CAP_GET_TSC_KHZ: | ||
1686 | r = 1; | 2000 | r = 1; |
1687 | break; | 2001 | break; |
1688 | case KVM_CAP_COALESCED_MMIO: | 2002 | case KVM_CAP_COALESCED_MMIO: |
@@ -1709,6 +2023,9 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1709 | case KVM_CAP_XCRS: | 2023 | case KVM_CAP_XCRS: |
1710 | r = cpu_has_xsave; | 2024 | r = cpu_has_xsave; |
1711 | break; | 2025 | break; |
2026 | case KVM_CAP_TSC_CONTROL: | ||
2027 | r = kvm_has_tsc_control; | ||
2028 | break; | ||
1712 | default: | 2029 | default: |
1713 | r = 0; | 2030 | r = 0; |
1714 | break; | 2031 | break; |
@@ -1808,19 +2125,33 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
1808 | } | 2125 | } |
1809 | 2126 | ||
1810 | kvm_x86_ops->vcpu_load(vcpu, cpu); | 2127 | kvm_x86_ops->vcpu_load(vcpu, cpu); |
1811 | if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) { | 2128 | if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { |
1812 | unsigned long khz = cpufreq_quick_get(cpu); | 2129 | /* Make sure TSC doesn't go backwards */ |
1813 | if (!khz) | 2130 | s64 tsc_delta; |
1814 | khz = tsc_khz; | 2131 | u64 tsc; |
1815 | per_cpu(cpu_tsc_khz, cpu) = khz; | 2132 | |
2133 | kvm_get_msr(vcpu, MSR_IA32_TSC, &tsc); | ||
2134 | tsc_delta = !vcpu->arch.last_guest_tsc ? 0 : | ||
2135 | tsc - vcpu->arch.last_guest_tsc; | ||
2136 | |||
2137 | if (tsc_delta < 0) | ||
2138 | mark_tsc_unstable("KVM discovered backwards TSC"); | ||
2139 | if (check_tsc_unstable()) { | ||
2140 | kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta); | ||
2141 | vcpu->arch.tsc_catchup = 1; | ||
2142 | } | ||
2143 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); | ||
2144 | if (vcpu->cpu != cpu) | ||
2145 | kvm_migrate_timers(vcpu); | ||
2146 | vcpu->cpu = cpu; | ||
1816 | } | 2147 | } |
1817 | kvm_request_guest_time_update(vcpu); | ||
1818 | } | 2148 | } |
1819 | 2149 | ||
1820 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | 2150 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) |
1821 | { | 2151 | { |
1822 | kvm_x86_ops->vcpu_put(vcpu); | 2152 | kvm_x86_ops->vcpu_put(vcpu); |
1823 | kvm_put_guest_fpu(vcpu); | 2153 | kvm_put_guest_fpu(vcpu); |
2154 | kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc); | ||
1824 | } | 2155 | } |
1825 | 2156 | ||
1826 | static int is_efer_nx(void) | 2157 | static int is_efer_nx(void) |
@@ -1937,6 +2268,11 @@ out: | |||
1937 | return r; | 2268 | return r; |
1938 | } | 2269 | } |
1939 | 2270 | ||
2271 | static void cpuid_mask(u32 *word, int wordnum) | ||
2272 | { | ||
2273 | *word &= boot_cpu_data.x86_capability[wordnum]; | ||
2274 | } | ||
2275 | |||
1940 | static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, | 2276 | static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, |
1941 | u32 index) | 2277 | u32 index) |
1942 | { | 2278 | { |
@@ -1991,13 +2327,20 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
1991 | 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ | | 2327 | 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ | |
1992 | 0 /* Reserved, DCA */ | F(XMM4_1) | | 2328 | 0 /* Reserved, DCA */ | F(XMM4_1) | |
1993 | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | | 2329 | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | |
1994 | 0 /* Reserved, AES */ | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX); | 2330 | 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) | |
2331 | F(F16C); | ||
1995 | /* cpuid 0x80000001.ecx */ | 2332 | /* cpuid 0x80000001.ecx */ |
1996 | const u32 kvm_supported_word6_x86_features = | 2333 | const u32 kvm_supported_word6_x86_features = |
1997 | F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ | | 2334 | F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | |
1998 | F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | | 2335 | F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | |
1999 | F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(SSE5) | | 2336 | F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) | |
2000 | 0 /* SKINIT */ | 0 /* WDT */; | 2337 | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); |
2338 | |||
2339 | /* cpuid 0xC0000001.edx */ | ||
2340 | const u32 kvm_supported_word5_x86_features = | ||
2341 | F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | | ||
2342 | F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) | | ||
2343 | F(PMM) | F(PMM_EN); | ||
2001 | 2344 | ||
2002 | /* all calls to cpuid_count() should be made on the same cpu */ | 2345 | /* all calls to cpuid_count() should be made on the same cpu */ |
2003 | get_cpu(); | 2346 | get_cpu(); |
@@ -2010,7 +2353,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
2010 | break; | 2353 | break; |
2011 | case 1: | 2354 | case 1: |
2012 | entry->edx &= kvm_supported_word0_x86_features; | 2355 | entry->edx &= kvm_supported_word0_x86_features; |
2356 | cpuid_mask(&entry->edx, 0); | ||
2013 | entry->ecx &= kvm_supported_word4_x86_features; | 2357 | entry->ecx &= kvm_supported_word4_x86_features; |
2358 | cpuid_mask(&entry->ecx, 4); | ||
2014 | /* we support x2apic emulation even if host does not support | 2359 | /* we support x2apic emulation even if host does not support |
2015 | * it since we emulate x2apic in software */ | 2360 | * it since we emulate x2apic in software */ |
2016 | entry->ecx |= F(X2APIC); | 2361 | entry->ecx |= F(X2APIC); |
@@ -2068,9 +2413,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
2068 | int i; | 2413 | int i; |
2069 | 2414 | ||
2070 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | 2415 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; |
2071 | for (i = 1; *nent < maxnent; ++i) { | 2416 | for (i = 1; *nent < maxnent && i < 64; ++i) { |
2072 | if (entry[i - 1].eax == 0 && i != 2) | 2417 | if (entry[i].eax == 0) |
2073 | break; | 2418 | continue; |
2074 | do_cpuid_1_ent(&entry[i], function, i); | 2419 | do_cpuid_1_ent(&entry[i], function, i); |
2075 | entry[i].flags |= | 2420 | entry[i].flags |= |
2076 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | 2421 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; |
@@ -2091,6 +2436,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
2091 | entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) | | 2436 | entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) | |
2092 | (1 << KVM_FEATURE_NOP_IO_DELAY) | | 2437 | (1 << KVM_FEATURE_NOP_IO_DELAY) | |
2093 | (1 << KVM_FEATURE_CLOCKSOURCE2) | | 2438 | (1 << KVM_FEATURE_CLOCKSOURCE2) | |
2439 | (1 << KVM_FEATURE_ASYNC_PF) | | ||
2094 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); | 2440 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); |
2095 | entry->ebx = 0; | 2441 | entry->ebx = 0; |
2096 | entry->ecx = 0; | 2442 | entry->ecx = 0; |
@@ -2101,7 +2447,23 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
2101 | break; | 2447 | break; |
2102 | case 0x80000001: | 2448 | case 0x80000001: |
2103 | entry->edx &= kvm_supported_word1_x86_features; | 2449 | entry->edx &= kvm_supported_word1_x86_features; |
2450 | cpuid_mask(&entry->edx, 1); | ||
2104 | entry->ecx &= kvm_supported_word6_x86_features; | 2451 | entry->ecx &= kvm_supported_word6_x86_features; |
2452 | cpuid_mask(&entry->ecx, 6); | ||
2453 | break; | ||
2454 | /*Add support for Centaur's CPUID instruction*/ | ||
2455 | case 0xC0000000: | ||
2456 | /*Just support up to 0xC0000004 now*/ | ||
2457 | entry->eax = min(entry->eax, 0xC0000004); | ||
2458 | break; | ||
2459 | case 0xC0000001: | ||
2460 | entry->edx &= kvm_supported_word5_x86_features; | ||
2461 | cpuid_mask(&entry->edx, 5); | ||
2462 | break; | ||
2463 | case 0xC0000002: | ||
2464 | case 0xC0000003: | ||
2465 | case 0xC0000004: | ||
2466 | /*Now nothing to do, reserved for the future*/ | ||
2105 | break; | 2467 | break; |
2106 | } | 2468 | } |
2107 | 2469 | ||
@@ -2149,6 +2511,26 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | |||
2149 | if (nent >= cpuid->nent) | 2511 | if (nent >= cpuid->nent) |
2150 | goto out_free; | 2512 | goto out_free; |
2151 | 2513 | ||
2514 | /* Add support for Centaur's CPUID instruction. */ | ||
2515 | if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR) { | ||
2516 | do_cpuid_ent(&cpuid_entries[nent], 0xC0000000, 0, | ||
2517 | &nent, cpuid->nent); | ||
2518 | |||
2519 | r = -E2BIG; | ||
2520 | if (nent >= cpuid->nent) | ||
2521 | goto out_free; | ||
2522 | |||
2523 | limit = cpuid_entries[nent - 1].eax; | ||
2524 | for (func = 0xC0000001; | ||
2525 | func <= limit && nent < cpuid->nent; ++func) | ||
2526 | do_cpuid_ent(&cpuid_entries[nent], func, 0, | ||
2527 | &nent, cpuid->nent); | ||
2528 | |||
2529 | r = -E2BIG; | ||
2530 | if (nent >= cpuid->nent) | ||
2531 | goto out_free; | ||
2532 | } | ||
2533 | |||
2152 | do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent, | 2534 | do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent, |
2153 | cpuid->nent); | 2535 | cpuid->nent); |
2154 | 2536 | ||
@@ -2203,6 +2585,7 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, | |||
2203 | return -ENXIO; | 2585 | return -ENXIO; |
2204 | 2586 | ||
2205 | kvm_queue_interrupt(vcpu, irq->irq, false); | 2587 | kvm_queue_interrupt(vcpu, irq->irq, false); |
2588 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
2206 | 2589 | ||
2207 | return 0; | 2590 | return 0; |
2208 | } | 2591 | } |
@@ -2272,9 +2655,6 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, | |||
2272 | if (mce->status & MCI_STATUS_UC) { | 2655 | if (mce->status & MCI_STATUS_UC) { |
2273 | if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) || | 2656 | if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) || |
2274 | !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) { | 2657 | !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) { |
2275 | printk(KERN_DEBUG "kvm: set_mce: " | ||
2276 | "injects mce exception while " | ||
2277 | "previous one is in progress!\n"); | ||
2278 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); | 2658 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); |
2279 | return 0; | 2659 | return 0; |
2280 | } | 2660 | } |
@@ -2305,6 +2685,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | |||
2305 | !kvm_exception_is_soft(vcpu->arch.exception.nr); | 2685 | !kvm_exception_is_soft(vcpu->arch.exception.nr); |
2306 | events->exception.nr = vcpu->arch.exception.nr; | 2686 | events->exception.nr = vcpu->arch.exception.nr; |
2307 | events->exception.has_error_code = vcpu->arch.exception.has_error_code; | 2687 | events->exception.has_error_code = vcpu->arch.exception.has_error_code; |
2688 | events->exception.pad = 0; | ||
2308 | events->exception.error_code = vcpu->arch.exception.error_code; | 2689 | events->exception.error_code = vcpu->arch.exception.error_code; |
2309 | 2690 | ||
2310 | events->interrupt.injected = | 2691 | events->interrupt.injected = |
@@ -2318,12 +2699,14 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | |||
2318 | events->nmi.injected = vcpu->arch.nmi_injected; | 2699 | events->nmi.injected = vcpu->arch.nmi_injected; |
2319 | events->nmi.pending = vcpu->arch.nmi_pending; | 2700 | events->nmi.pending = vcpu->arch.nmi_pending; |
2320 | events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); | 2701 | events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); |
2702 | events->nmi.pad = 0; | ||
2321 | 2703 | ||
2322 | events->sipi_vector = vcpu->arch.sipi_vector; | 2704 | events->sipi_vector = vcpu->arch.sipi_vector; |
2323 | 2705 | ||
2324 | events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING | 2706 | events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING |
2325 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR | 2707 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR |
2326 | | KVM_VCPUEVENT_VALID_SHADOW); | 2708 | | KVM_VCPUEVENT_VALID_SHADOW); |
2709 | memset(&events->reserved, 0, sizeof(events->reserved)); | ||
2327 | } | 2710 | } |
2328 | 2711 | ||
2329 | static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | 2712 | static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, |
@@ -2342,8 +2725,6 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
2342 | vcpu->arch.interrupt.pending = events->interrupt.injected; | 2725 | vcpu->arch.interrupt.pending = events->interrupt.injected; |
2343 | vcpu->arch.interrupt.nr = events->interrupt.nr; | 2726 | vcpu->arch.interrupt.nr = events->interrupt.nr; |
2344 | vcpu->arch.interrupt.soft = events->interrupt.soft; | 2727 | vcpu->arch.interrupt.soft = events->interrupt.soft; |
2345 | if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) | ||
2346 | kvm_pic_clear_isr_ack(vcpu->kvm); | ||
2347 | if (events->flags & KVM_VCPUEVENT_VALID_SHADOW) | 2728 | if (events->flags & KVM_VCPUEVENT_VALID_SHADOW) |
2348 | kvm_x86_ops->set_interrupt_shadow(vcpu, | 2729 | kvm_x86_ops->set_interrupt_shadow(vcpu, |
2349 | events->interrupt.shadow); | 2730 | events->interrupt.shadow); |
@@ -2356,6 +2737,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
2356 | if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR) | 2737 | if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR) |
2357 | vcpu->arch.sipi_vector = events->sipi_vector; | 2738 | vcpu->arch.sipi_vector = events->sipi_vector; |
2358 | 2739 | ||
2740 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
2741 | |||
2359 | return 0; | 2742 | return 0; |
2360 | } | 2743 | } |
2361 | 2744 | ||
@@ -2366,6 +2749,7 @@ static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, | |||
2366 | dbgregs->dr6 = vcpu->arch.dr6; | 2749 | dbgregs->dr6 = vcpu->arch.dr6; |
2367 | dbgregs->dr7 = vcpu->arch.dr7; | 2750 | dbgregs->dr7 = vcpu->arch.dr7; |
2368 | dbgregs->flags = 0; | 2751 | dbgregs->flags = 0; |
2752 | memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved)); | ||
2369 | } | 2753 | } |
2370 | 2754 | ||
2371 | static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, | 2755 | static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, |
@@ -2715,6 +3099,32 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
2715 | r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs); | 3099 | r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs); |
2716 | break; | 3100 | break; |
2717 | } | 3101 | } |
3102 | case KVM_SET_TSC_KHZ: { | ||
3103 | u32 user_tsc_khz; | ||
3104 | |||
3105 | r = -EINVAL; | ||
3106 | if (!kvm_has_tsc_control) | ||
3107 | break; | ||
3108 | |||
3109 | user_tsc_khz = (u32)arg; | ||
3110 | |||
3111 | if (user_tsc_khz >= kvm_max_guest_tsc_khz) | ||
3112 | goto out; | ||
3113 | |||
3114 | kvm_x86_ops->set_tsc_khz(vcpu, user_tsc_khz); | ||
3115 | |||
3116 | r = 0; | ||
3117 | goto out; | ||
3118 | } | ||
3119 | case KVM_GET_TSC_KHZ: { | ||
3120 | r = -EIO; | ||
3121 | if (check_tsc_unstable()) | ||
3122 | goto out; | ||
3123 | |||
3124 | r = vcpu_tsc_khz(vcpu); | ||
3125 | |||
3126 | goto out; | ||
3127 | } | ||
2718 | default: | 3128 | default: |
2719 | r = -EINVAL; | 3129 | r = -EINVAL; |
2720 | } | 3130 | } |
@@ -2759,7 +3169,7 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, | |||
2759 | 3169 | ||
2760 | static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm) | 3170 | static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm) |
2761 | { | 3171 | { |
2762 | return kvm->arch.n_alloc_mmu_pages; | 3172 | return kvm->arch.n_max_mmu_pages; |
2763 | } | 3173 | } |
2764 | 3174 | ||
2765 | static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) | 3175 | static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) |
@@ -2795,18 +3205,18 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) | |||
2795 | r = 0; | 3205 | r = 0; |
2796 | switch (chip->chip_id) { | 3206 | switch (chip->chip_id) { |
2797 | case KVM_IRQCHIP_PIC_MASTER: | 3207 | case KVM_IRQCHIP_PIC_MASTER: |
2798 | raw_spin_lock(&pic_irqchip(kvm)->lock); | 3208 | spin_lock(&pic_irqchip(kvm)->lock); |
2799 | memcpy(&pic_irqchip(kvm)->pics[0], | 3209 | memcpy(&pic_irqchip(kvm)->pics[0], |
2800 | &chip->chip.pic, | 3210 | &chip->chip.pic, |
2801 | sizeof(struct kvm_pic_state)); | 3211 | sizeof(struct kvm_pic_state)); |
2802 | raw_spin_unlock(&pic_irqchip(kvm)->lock); | 3212 | spin_unlock(&pic_irqchip(kvm)->lock); |
2803 | break; | 3213 | break; |
2804 | case KVM_IRQCHIP_PIC_SLAVE: | 3214 | case KVM_IRQCHIP_PIC_SLAVE: |
2805 | raw_spin_lock(&pic_irqchip(kvm)->lock); | 3215 | spin_lock(&pic_irqchip(kvm)->lock); |
2806 | memcpy(&pic_irqchip(kvm)->pics[1], | 3216 | memcpy(&pic_irqchip(kvm)->pics[1], |
2807 | &chip->chip.pic, | 3217 | &chip->chip.pic, |
2808 | sizeof(struct kvm_pic_state)); | 3218 | sizeof(struct kvm_pic_state)); |
2809 | raw_spin_unlock(&pic_irqchip(kvm)->lock); | 3219 | spin_unlock(&pic_irqchip(kvm)->lock); |
2810 | break; | 3220 | break; |
2811 | case KVM_IRQCHIP_IOAPIC: | 3221 | case KVM_IRQCHIP_IOAPIC: |
2812 | r = kvm_set_ioapic(kvm, &chip->chip.ioapic); | 3222 | r = kvm_set_ioapic(kvm, &chip->chip.ioapic); |
@@ -2849,6 +3259,7 @@ static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) | |||
2849 | sizeof(ps->channels)); | 3259 | sizeof(ps->channels)); |
2850 | ps->flags = kvm->arch.vpit->pit_state.flags; | 3260 | ps->flags = kvm->arch.vpit->pit_state.flags; |
2851 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); | 3261 | mutex_unlock(&kvm->arch.vpit->pit_state.lock); |
3262 | memset(&ps->reserved, 0, sizeof(ps->reserved)); | ||
2852 | return r; | 3263 | return r; |
2853 | } | 3264 | } |
2854 | 3265 | ||
@@ -2912,24 +3323,18 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
2912 | struct kvm_memslots *slots, *old_slots; | 3323 | struct kvm_memslots *slots, *old_slots; |
2913 | unsigned long *dirty_bitmap; | 3324 | unsigned long *dirty_bitmap; |
2914 | 3325 | ||
2915 | spin_lock(&kvm->mmu_lock); | 3326 | dirty_bitmap = memslot->dirty_bitmap_head; |
2916 | kvm_mmu_slot_remove_write_access(kvm, log->slot); | 3327 | if (memslot->dirty_bitmap == dirty_bitmap) |
2917 | spin_unlock(&kvm->mmu_lock); | 3328 | dirty_bitmap += n / sizeof(long); |
2918 | |||
2919 | r = -ENOMEM; | ||
2920 | dirty_bitmap = vmalloc(n); | ||
2921 | if (!dirty_bitmap) | ||
2922 | goto out; | ||
2923 | memset(dirty_bitmap, 0, n); | 3329 | memset(dirty_bitmap, 0, n); |
2924 | 3330 | ||
2925 | r = -ENOMEM; | 3331 | r = -ENOMEM; |
2926 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | 3332 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); |
2927 | if (!slots) { | 3333 | if (!slots) |
2928 | vfree(dirty_bitmap); | ||
2929 | goto out; | 3334 | goto out; |
2930 | } | ||
2931 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); | 3335 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); |
2932 | slots->memslots[log->slot].dirty_bitmap = dirty_bitmap; | 3336 | slots->memslots[log->slot].dirty_bitmap = dirty_bitmap; |
3337 | slots->generation++; | ||
2933 | 3338 | ||
2934 | old_slots = kvm->memslots; | 3339 | old_slots = kvm->memslots; |
2935 | rcu_assign_pointer(kvm->memslots, slots); | 3340 | rcu_assign_pointer(kvm->memslots, slots); |
@@ -2937,12 +3342,13 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
2937 | dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap; | 3342 | dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap; |
2938 | kfree(old_slots); | 3343 | kfree(old_slots); |
2939 | 3344 | ||
3345 | spin_lock(&kvm->mmu_lock); | ||
3346 | kvm_mmu_slot_remove_write_access(kvm, log->slot); | ||
3347 | spin_unlock(&kvm->mmu_lock); | ||
3348 | |||
2940 | r = -EFAULT; | 3349 | r = -EFAULT; |
2941 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) { | 3350 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) |
2942 | vfree(dirty_bitmap); | ||
2943 | goto out; | 3351 | goto out; |
2944 | } | ||
2945 | vfree(dirty_bitmap); | ||
2946 | } else { | 3352 | } else { |
2947 | r = -EFAULT; | 3353 | r = -EFAULT; |
2948 | if (clear_user(log->dirty_bitmap, n)) | 3354 | if (clear_user(log->dirty_bitmap, n)) |
@@ -3009,8 +3415,10 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
3009 | if (vpic) { | 3415 | if (vpic) { |
3010 | r = kvm_ioapic_init(kvm); | 3416 | r = kvm_ioapic_init(kvm); |
3011 | if (r) { | 3417 | if (r) { |
3418 | mutex_lock(&kvm->slots_lock); | ||
3012 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, | 3419 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, |
3013 | &vpic->dev); | 3420 | &vpic->dev); |
3421 | mutex_unlock(&kvm->slots_lock); | ||
3014 | kfree(vpic); | 3422 | kfree(vpic); |
3015 | goto create_irqchip_unlock; | 3423 | goto create_irqchip_unlock; |
3016 | } | 3424 | } |
@@ -3021,10 +3429,12 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
3021 | smp_wmb(); | 3429 | smp_wmb(); |
3022 | r = kvm_setup_default_irq_routing(kvm); | 3430 | r = kvm_setup_default_irq_routing(kvm); |
3023 | if (r) { | 3431 | if (r) { |
3432 | mutex_lock(&kvm->slots_lock); | ||
3024 | mutex_lock(&kvm->irq_lock); | 3433 | mutex_lock(&kvm->irq_lock); |
3025 | kvm_ioapic_destroy(kvm); | 3434 | kvm_ioapic_destroy(kvm); |
3026 | kvm_destroy_pic(kvm); | 3435 | kvm_destroy_pic(kvm); |
3027 | mutex_unlock(&kvm->irq_lock); | 3436 | mutex_unlock(&kvm->irq_lock); |
3437 | mutex_unlock(&kvm->slots_lock); | ||
3028 | } | 3438 | } |
3029 | create_irqchip_unlock: | 3439 | create_irqchip_unlock: |
3030 | mutex_unlock(&kvm->lock); | 3440 | mutex_unlock(&kvm->lock); |
@@ -3200,7 +3610,6 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
3200 | break; | 3610 | break; |
3201 | } | 3611 | } |
3202 | case KVM_SET_CLOCK: { | 3612 | case KVM_SET_CLOCK: { |
3203 | struct timespec now; | ||
3204 | struct kvm_clock_data user_ns; | 3613 | struct kvm_clock_data user_ns; |
3205 | u64 now_ns; | 3614 | u64 now_ns; |
3206 | s64 delta; | 3615 | s64 delta; |
@@ -3214,21 +3623,23 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
3214 | goto out; | 3623 | goto out; |
3215 | 3624 | ||
3216 | r = 0; | 3625 | r = 0; |
3217 | ktime_get_ts(&now); | 3626 | local_irq_disable(); |
3218 | now_ns = timespec_to_ns(&now); | 3627 | now_ns = get_kernel_ns(); |
3219 | delta = user_ns.clock - now_ns; | 3628 | delta = user_ns.clock - now_ns; |
3629 | local_irq_enable(); | ||
3220 | kvm->arch.kvmclock_offset = delta; | 3630 | kvm->arch.kvmclock_offset = delta; |
3221 | break; | 3631 | break; |
3222 | } | 3632 | } |
3223 | case KVM_GET_CLOCK: { | 3633 | case KVM_GET_CLOCK: { |
3224 | struct timespec now; | ||
3225 | struct kvm_clock_data user_ns; | 3634 | struct kvm_clock_data user_ns; |
3226 | u64 now_ns; | 3635 | u64 now_ns; |
3227 | 3636 | ||
3228 | ktime_get_ts(&now); | 3637 | local_irq_disable(); |
3229 | now_ns = timespec_to_ns(&now); | 3638 | now_ns = get_kernel_ns(); |
3230 | user_ns.clock = kvm->arch.kvmclock_offset + now_ns; | 3639 | user_ns.clock = kvm->arch.kvmclock_offset + now_ns; |
3640 | local_irq_enable(); | ||
3231 | user_ns.flags = 0; | 3641 | user_ns.flags = 0; |
3642 | memset(&user_ns.pad, 0, sizeof(user_ns.pad)); | ||
3232 | 3643 | ||
3233 | r = -EFAULT; | 3644 | r = -EFAULT; |
3234 | if (copy_to_user(argp, &user_ns, sizeof(user_ns))) | 3645 | if (copy_to_user(argp, &user_ns, sizeof(user_ns))) |
@@ -3263,20 +3674,43 @@ static void kvm_init_msr_list(void) | |||
3263 | static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, | 3674 | static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, |
3264 | const void *v) | 3675 | const void *v) |
3265 | { | 3676 | { |
3266 | if (vcpu->arch.apic && | 3677 | int handled = 0; |
3267 | !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v)) | 3678 | int n; |
3268 | return 0; | 3679 | |
3680 | do { | ||
3681 | n = min(len, 8); | ||
3682 | if (!(vcpu->arch.apic && | ||
3683 | !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v)) | ||
3684 | && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v)) | ||
3685 | break; | ||
3686 | handled += n; | ||
3687 | addr += n; | ||
3688 | len -= n; | ||
3689 | v += n; | ||
3690 | } while (len); | ||
3269 | 3691 | ||
3270 | return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); | 3692 | return handled; |
3271 | } | 3693 | } |
3272 | 3694 | ||
3273 | static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) | 3695 | static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) |
3274 | { | 3696 | { |
3275 | if (vcpu->arch.apic && | 3697 | int handled = 0; |
3276 | !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v)) | 3698 | int n; |
3277 | return 0; | 3699 | |
3700 | do { | ||
3701 | n = min(len, 8); | ||
3702 | if (!(vcpu->arch.apic && | ||
3703 | !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v)) | ||
3704 | && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v)) | ||
3705 | break; | ||
3706 | trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v); | ||
3707 | handled += n; | ||
3708 | addr += n; | ||
3709 | len -= n; | ||
3710 | v += n; | ||
3711 | } while (len); | ||
3278 | 3712 | ||
3279 | return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); | 3713 | return handled; |
3280 | } | 3714 | } |
3281 | 3715 | ||
3282 | static void kvm_set_segment(struct kvm_vcpu *vcpu, | 3716 | static void kvm_set_segment(struct kvm_vcpu *vcpu, |
@@ -3291,49 +3725,71 @@ void kvm_get_segment(struct kvm_vcpu *vcpu, | |||
3291 | kvm_x86_ops->get_segment(vcpu, var, seg); | 3725 | kvm_x86_ops->get_segment(vcpu, var, seg); |
3292 | } | 3726 | } |
3293 | 3727 | ||
3294 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) | 3728 | static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) |
3729 | { | ||
3730 | return gpa; | ||
3731 | } | ||
3732 | |||
3733 | static gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) | ||
3734 | { | ||
3735 | gpa_t t_gpa; | ||
3736 | struct x86_exception exception; | ||
3737 | |||
3738 | BUG_ON(!mmu_is_nested(vcpu)); | ||
3739 | |||
3740 | /* NPT walks are always user-walks */ | ||
3741 | access |= PFERR_USER_MASK; | ||
3742 | t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, &exception); | ||
3743 | |||
3744 | return t_gpa; | ||
3745 | } | ||
3746 | |||
3747 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, | ||
3748 | struct x86_exception *exception) | ||
3295 | { | 3749 | { |
3296 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | 3750 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; |
3297 | return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error); | 3751 | return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception); |
3298 | } | 3752 | } |
3299 | 3753 | ||
3300 | gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) | 3754 | gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, |
3755 | struct x86_exception *exception) | ||
3301 | { | 3756 | { |
3302 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | 3757 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; |
3303 | access |= PFERR_FETCH_MASK; | 3758 | access |= PFERR_FETCH_MASK; |
3304 | return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error); | 3759 | return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception); |
3305 | } | 3760 | } |
3306 | 3761 | ||
3307 | gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) | 3762 | gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, |
3763 | struct x86_exception *exception) | ||
3308 | { | 3764 | { |
3309 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | 3765 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; |
3310 | access |= PFERR_WRITE_MASK; | 3766 | access |= PFERR_WRITE_MASK; |
3311 | return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error); | 3767 | return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception); |
3312 | } | 3768 | } |
3313 | 3769 | ||
3314 | /* uses this to access any guest's mapped memory without checking CPL */ | 3770 | /* uses this to access any guest's mapped memory without checking CPL */ |
3315 | gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) | 3771 | gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, |
3772 | struct x86_exception *exception) | ||
3316 | { | 3773 | { |
3317 | return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, 0, error); | 3774 | return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception); |
3318 | } | 3775 | } |
3319 | 3776 | ||
3320 | static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, | 3777 | static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, |
3321 | struct kvm_vcpu *vcpu, u32 access, | 3778 | struct kvm_vcpu *vcpu, u32 access, |
3322 | u32 *error) | 3779 | struct x86_exception *exception) |
3323 | { | 3780 | { |
3324 | void *data = val; | 3781 | void *data = val; |
3325 | int r = X86EMUL_CONTINUE; | 3782 | int r = X86EMUL_CONTINUE; |
3326 | 3783 | ||
3327 | while (bytes) { | 3784 | while (bytes) { |
3328 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr, access, error); | 3785 | gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access, |
3786 | exception); | ||
3329 | unsigned offset = addr & (PAGE_SIZE-1); | 3787 | unsigned offset = addr & (PAGE_SIZE-1); |
3330 | unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset); | 3788 | unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset); |
3331 | int ret; | 3789 | int ret; |
3332 | 3790 | ||
3333 | if (gpa == UNMAPPED_GVA) { | 3791 | if (gpa == UNMAPPED_GVA) |
3334 | r = X86EMUL_PROPAGATE_FAULT; | 3792 | return X86EMUL_PROPAGATE_FAULT; |
3335 | goto out; | ||
3336 | } | ||
3337 | ret = kvm_read_guest(vcpu->kvm, gpa, data, toread); | 3793 | ret = kvm_read_guest(vcpu->kvm, gpa, data, toread); |
3338 | if (ret < 0) { | 3794 | if (ret < 0) { |
3339 | r = X86EMUL_IO_NEEDED; | 3795 | r = X86EMUL_IO_NEEDED; |
@@ -3349,47 +3805,56 @@ out: | |||
3349 | } | 3805 | } |
3350 | 3806 | ||
3351 | /* used for instruction fetching */ | 3807 | /* used for instruction fetching */ |
3352 | static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes, | 3808 | static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt, |
3353 | struct kvm_vcpu *vcpu, u32 *error) | 3809 | gva_t addr, void *val, unsigned int bytes, |
3810 | struct x86_exception *exception) | ||
3354 | { | 3811 | { |
3812 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
3355 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | 3813 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; |
3814 | |||
3356 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, | 3815 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, |
3357 | access | PFERR_FETCH_MASK, error); | 3816 | access | PFERR_FETCH_MASK, |
3817 | exception); | ||
3358 | } | 3818 | } |
3359 | 3819 | ||
3360 | static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, | 3820 | static int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt, |
3361 | struct kvm_vcpu *vcpu, u32 *error) | 3821 | gva_t addr, void *val, unsigned int bytes, |
3822 | struct x86_exception *exception) | ||
3362 | { | 3823 | { |
3824 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
3363 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | 3825 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; |
3826 | |||
3364 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, | 3827 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, |
3365 | error); | 3828 | exception); |
3366 | } | 3829 | } |
3367 | 3830 | ||
3368 | static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes, | 3831 | static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt, |
3369 | struct kvm_vcpu *vcpu, u32 *error) | 3832 | gva_t addr, void *val, unsigned int bytes, |
3833 | struct x86_exception *exception) | ||
3370 | { | 3834 | { |
3371 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error); | 3835 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); |
3836 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception); | ||
3372 | } | 3837 | } |
3373 | 3838 | ||
3374 | static int kvm_write_guest_virt_system(gva_t addr, void *val, | 3839 | static int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, |
3840 | gva_t addr, void *val, | ||
3375 | unsigned int bytes, | 3841 | unsigned int bytes, |
3376 | struct kvm_vcpu *vcpu, | 3842 | struct x86_exception *exception) |
3377 | u32 *error) | ||
3378 | { | 3843 | { |
3844 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
3379 | void *data = val; | 3845 | void *data = val; |
3380 | int r = X86EMUL_CONTINUE; | 3846 | int r = X86EMUL_CONTINUE; |
3381 | 3847 | ||
3382 | while (bytes) { | 3848 | while (bytes) { |
3383 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr, | 3849 | gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, |
3384 | PFERR_WRITE_MASK, error); | 3850 | PFERR_WRITE_MASK, |
3851 | exception); | ||
3385 | unsigned offset = addr & (PAGE_SIZE-1); | 3852 | unsigned offset = addr & (PAGE_SIZE-1); |
3386 | unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); | 3853 | unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); |
3387 | int ret; | 3854 | int ret; |
3388 | 3855 | ||
3389 | if (gpa == UNMAPPED_GVA) { | 3856 | if (gpa == UNMAPPED_GVA) |
3390 | r = X86EMUL_PROPAGATE_FAULT; | 3857 | return X86EMUL_PROPAGATE_FAULT; |
3391 | goto out; | ||
3392 | } | ||
3393 | ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite); | 3858 | ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite); |
3394 | if (ret < 0) { | 3859 | if (ret < 0) { |
3395 | r = X86EMUL_IO_NEEDED; | 3860 | r = X86EMUL_IO_NEEDED; |
@@ -3404,13 +3869,15 @@ out: | |||
3404 | return r; | 3869 | return r; |
3405 | } | 3870 | } |
3406 | 3871 | ||
3407 | static int emulator_read_emulated(unsigned long addr, | 3872 | static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt, |
3873 | unsigned long addr, | ||
3408 | void *val, | 3874 | void *val, |
3409 | unsigned int bytes, | 3875 | unsigned int bytes, |
3410 | unsigned int *error_code, | 3876 | struct x86_exception *exception) |
3411 | struct kvm_vcpu *vcpu) | ||
3412 | { | 3877 | { |
3878 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
3413 | gpa_t gpa; | 3879 | gpa_t gpa; |
3880 | int handled; | ||
3414 | 3881 | ||
3415 | if (vcpu->mmio_read_completed) { | 3882 | if (vcpu->mmio_read_completed) { |
3416 | memcpy(val, vcpu->mmio_data, bytes); | 3883 | memcpy(val, vcpu->mmio_data, bytes); |
@@ -3420,7 +3887,7 @@ static int emulator_read_emulated(unsigned long addr, | |||
3420 | return X86EMUL_CONTINUE; | 3887 | return X86EMUL_CONTINUE; |
3421 | } | 3888 | } |
3422 | 3889 | ||
3423 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, error_code); | 3890 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, exception); |
3424 | 3891 | ||
3425 | if (gpa == UNMAPPED_GVA) | 3892 | if (gpa == UNMAPPED_GVA) |
3426 | return X86EMUL_PROPAGATE_FAULT; | 3893 | return X86EMUL_PROPAGATE_FAULT; |
@@ -3429,32 +3896,38 @@ static int emulator_read_emulated(unsigned long addr, | |||
3429 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | 3896 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) |
3430 | goto mmio; | 3897 | goto mmio; |
3431 | 3898 | ||
3432 | if (kvm_read_guest_virt(addr, val, bytes, vcpu, NULL) | 3899 | if (kvm_read_guest_virt(ctxt, addr, val, bytes, exception) |
3433 | == X86EMUL_CONTINUE) | 3900 | == X86EMUL_CONTINUE) |
3434 | return X86EMUL_CONTINUE; | 3901 | return X86EMUL_CONTINUE; |
3435 | 3902 | ||
3436 | mmio: | 3903 | mmio: |
3437 | /* | 3904 | /* |
3438 | * Is this MMIO handled locally? | 3905 | * Is this MMIO handled locally? |
3439 | */ | 3906 | */ |
3440 | if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) { | 3907 | handled = vcpu_mmio_read(vcpu, gpa, bytes, val); |
3441 | trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val); | 3908 | |
3909 | if (handled == bytes) | ||
3442 | return X86EMUL_CONTINUE; | 3910 | return X86EMUL_CONTINUE; |
3443 | } | 3911 | |
3912 | gpa += handled; | ||
3913 | bytes -= handled; | ||
3914 | val += handled; | ||
3444 | 3915 | ||
3445 | trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); | 3916 | trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); |
3446 | 3917 | ||
3447 | vcpu->mmio_needed = 1; | 3918 | vcpu->mmio_needed = 1; |
3448 | vcpu->run->exit_reason = KVM_EXIT_MMIO; | 3919 | vcpu->run->exit_reason = KVM_EXIT_MMIO; |
3449 | vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; | 3920 | vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; |
3450 | vcpu->run->mmio.len = vcpu->mmio_size = bytes; | 3921 | vcpu->mmio_size = bytes; |
3922 | vcpu->run->mmio.len = min(vcpu->mmio_size, 8); | ||
3451 | vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0; | 3923 | vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0; |
3924 | vcpu->mmio_index = 0; | ||
3452 | 3925 | ||
3453 | return X86EMUL_IO_NEEDED; | 3926 | return X86EMUL_IO_NEEDED; |
3454 | } | 3927 | } |
3455 | 3928 | ||
3456 | int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | 3929 | int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, |
3457 | const void *val, int bytes) | 3930 | const void *val, int bytes) |
3458 | { | 3931 | { |
3459 | int ret; | 3932 | int ret; |
3460 | 3933 | ||
@@ -3468,12 +3941,13 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3468 | static int emulator_write_emulated_onepage(unsigned long addr, | 3941 | static int emulator_write_emulated_onepage(unsigned long addr, |
3469 | const void *val, | 3942 | const void *val, |
3470 | unsigned int bytes, | 3943 | unsigned int bytes, |
3471 | unsigned int *error_code, | 3944 | struct x86_exception *exception, |
3472 | struct kvm_vcpu *vcpu) | 3945 | struct kvm_vcpu *vcpu) |
3473 | { | 3946 | { |
3474 | gpa_t gpa; | 3947 | gpa_t gpa; |
3948 | int handled; | ||
3475 | 3949 | ||
3476 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error_code); | 3950 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception); |
3477 | 3951 | ||
3478 | if (gpa == UNMAPPED_GVA) | 3952 | if (gpa == UNMAPPED_GVA) |
3479 | return X86EMUL_PROPAGATE_FAULT; | 3953 | return X86EMUL_PROPAGATE_FAULT; |
@@ -3490,31 +3964,41 @@ mmio: | |||
3490 | /* | 3964 | /* |
3491 | * Is this MMIO handled locally? | 3965 | * Is this MMIO handled locally? |
3492 | */ | 3966 | */ |
3493 | if (!vcpu_mmio_write(vcpu, gpa, bytes, val)) | 3967 | handled = vcpu_mmio_write(vcpu, gpa, bytes, val); |
3968 | if (handled == bytes) | ||
3494 | return X86EMUL_CONTINUE; | 3969 | return X86EMUL_CONTINUE; |
3495 | 3970 | ||
3971 | gpa += handled; | ||
3972 | bytes -= handled; | ||
3973 | val += handled; | ||
3974 | |||
3496 | vcpu->mmio_needed = 1; | 3975 | vcpu->mmio_needed = 1; |
3976 | memcpy(vcpu->mmio_data, val, bytes); | ||
3497 | vcpu->run->exit_reason = KVM_EXIT_MMIO; | 3977 | vcpu->run->exit_reason = KVM_EXIT_MMIO; |
3498 | vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; | 3978 | vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; |
3499 | vcpu->run->mmio.len = vcpu->mmio_size = bytes; | 3979 | vcpu->mmio_size = bytes; |
3980 | vcpu->run->mmio.len = min(vcpu->mmio_size, 8); | ||
3500 | vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1; | 3981 | vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1; |
3501 | memcpy(vcpu->run->mmio.data, val, bytes); | 3982 | memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8); |
3983 | vcpu->mmio_index = 0; | ||
3502 | 3984 | ||
3503 | return X86EMUL_CONTINUE; | 3985 | return X86EMUL_CONTINUE; |
3504 | } | 3986 | } |
3505 | 3987 | ||
3506 | int emulator_write_emulated(unsigned long addr, | 3988 | int emulator_write_emulated(struct x86_emulate_ctxt *ctxt, |
3989 | unsigned long addr, | ||
3507 | const void *val, | 3990 | const void *val, |
3508 | unsigned int bytes, | 3991 | unsigned int bytes, |
3509 | unsigned int *error_code, | 3992 | struct x86_exception *exception) |
3510 | struct kvm_vcpu *vcpu) | ||
3511 | { | 3993 | { |
3994 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
3995 | |||
3512 | /* Crossing a page boundary? */ | 3996 | /* Crossing a page boundary? */ |
3513 | if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { | 3997 | if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { |
3514 | int rc, now; | 3998 | int rc, now; |
3515 | 3999 | ||
3516 | now = -addr & ~PAGE_MASK; | 4000 | now = -addr & ~PAGE_MASK; |
3517 | rc = emulator_write_emulated_onepage(addr, val, now, error_code, | 4001 | rc = emulator_write_emulated_onepage(addr, val, now, exception, |
3518 | vcpu); | 4002 | vcpu); |
3519 | if (rc != X86EMUL_CONTINUE) | 4003 | if (rc != X86EMUL_CONTINUE) |
3520 | return rc; | 4004 | return rc; |
@@ -3522,7 +4006,7 @@ int emulator_write_emulated(unsigned long addr, | |||
3522 | val += now; | 4006 | val += now; |
3523 | bytes -= now; | 4007 | bytes -= now; |
3524 | } | 4008 | } |
3525 | return emulator_write_emulated_onepage(addr, val, bytes, error_code, | 4009 | return emulator_write_emulated_onepage(addr, val, bytes, exception, |
3526 | vcpu); | 4010 | vcpu); |
3527 | } | 4011 | } |
3528 | 4012 | ||
@@ -3536,13 +4020,14 @@ int emulator_write_emulated(unsigned long addr, | |||
3536 | (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old)) | 4020 | (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old)) |
3537 | #endif | 4021 | #endif |
3538 | 4022 | ||
3539 | static int emulator_cmpxchg_emulated(unsigned long addr, | 4023 | static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, |
4024 | unsigned long addr, | ||
3540 | const void *old, | 4025 | const void *old, |
3541 | const void *new, | 4026 | const void *new, |
3542 | unsigned int bytes, | 4027 | unsigned int bytes, |
3543 | unsigned int *error_code, | 4028 | struct x86_exception *exception) |
3544 | struct kvm_vcpu *vcpu) | ||
3545 | { | 4029 | { |
4030 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
3546 | gpa_t gpa; | 4031 | gpa_t gpa; |
3547 | struct page *page; | 4032 | struct page *page; |
3548 | char *kaddr; | 4033 | char *kaddr; |
@@ -3598,7 +4083,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
3598 | emul_write: | 4083 | emul_write: |
3599 | printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); | 4084 | printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); |
3600 | 4085 | ||
3601 | return emulator_write_emulated(addr, new, bytes, error_code, vcpu); | 4086 | return emulator_write_emulated(ctxt, addr, new, bytes, exception); |
3602 | } | 4087 | } |
3603 | 4088 | ||
3604 | static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) | 4089 | static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) |
@@ -3617,13 +4102,16 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) | |||
3617 | } | 4102 | } |
3618 | 4103 | ||
3619 | 4104 | ||
3620 | static int emulator_pio_in_emulated(int size, unsigned short port, void *val, | 4105 | static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt, |
3621 | unsigned int count, struct kvm_vcpu *vcpu) | 4106 | int size, unsigned short port, void *val, |
4107 | unsigned int count) | ||
3622 | { | 4108 | { |
4109 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
4110 | |||
3623 | if (vcpu->arch.pio.count) | 4111 | if (vcpu->arch.pio.count) |
3624 | goto data_avail; | 4112 | goto data_avail; |
3625 | 4113 | ||
3626 | trace_kvm_pio(1, port, size, 1); | 4114 | trace_kvm_pio(0, port, size, count); |
3627 | 4115 | ||
3628 | vcpu->arch.pio.port = port; | 4116 | vcpu->arch.pio.port = port; |
3629 | vcpu->arch.pio.in = 1; | 4117 | vcpu->arch.pio.in = 1; |
@@ -3647,11 +4135,13 @@ static int emulator_pio_in_emulated(int size, unsigned short port, void *val, | |||
3647 | return 0; | 4135 | return 0; |
3648 | } | 4136 | } |
3649 | 4137 | ||
3650 | static int emulator_pio_out_emulated(int size, unsigned short port, | 4138 | static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt, |
3651 | const void *val, unsigned int count, | 4139 | int size, unsigned short port, |
3652 | struct kvm_vcpu *vcpu) | 4140 | const void *val, unsigned int count) |
3653 | { | 4141 | { |
3654 | trace_kvm_pio(0, port, size, 1); | 4142 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); |
4143 | |||
4144 | trace_kvm_pio(1, port, size, count); | ||
3655 | 4145 | ||
3656 | vcpu->arch.pio.port = port; | 4146 | vcpu->arch.pio.port = port; |
3657 | vcpu->arch.pio.in = 0; | 4147 | vcpu->arch.pio.in = 0; |
@@ -3680,10 +4170,9 @@ static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) | |||
3680 | return kvm_x86_ops->get_segment_base(vcpu, seg); | 4170 | return kvm_x86_ops->get_segment_base(vcpu, seg); |
3681 | } | 4171 | } |
3682 | 4172 | ||
3683 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) | 4173 | static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address) |
3684 | { | 4174 | { |
3685 | kvm_mmu_invlpg(vcpu, address); | 4175 | kvm_mmu_invlpg(emul_to_vcpu(ctxt), address); |
3686 | return X86EMUL_CONTINUE; | ||
3687 | } | 4176 | } |
3688 | 4177 | ||
3689 | int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) | 4178 | int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) |
@@ -3692,31 +4181,33 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) | |||
3692 | return X86EMUL_CONTINUE; | 4181 | return X86EMUL_CONTINUE; |
3693 | 4182 | ||
3694 | if (kvm_x86_ops->has_wbinvd_exit()) { | 4183 | if (kvm_x86_ops->has_wbinvd_exit()) { |
4184 | int cpu = get_cpu(); | ||
4185 | |||
4186 | cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask); | ||
3695 | smp_call_function_many(vcpu->arch.wbinvd_dirty_mask, | 4187 | smp_call_function_many(vcpu->arch.wbinvd_dirty_mask, |
3696 | wbinvd_ipi, NULL, 1); | 4188 | wbinvd_ipi, NULL, 1); |
4189 | put_cpu(); | ||
3697 | cpumask_clear(vcpu->arch.wbinvd_dirty_mask); | 4190 | cpumask_clear(vcpu->arch.wbinvd_dirty_mask); |
3698 | } | 4191 | } else |
3699 | wbinvd(); | 4192 | wbinvd(); |
3700 | return X86EMUL_CONTINUE; | 4193 | return X86EMUL_CONTINUE; |
3701 | } | 4194 | } |
3702 | EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); | 4195 | EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); |
3703 | 4196 | ||
3704 | int emulate_clts(struct kvm_vcpu *vcpu) | 4197 | static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt) |
3705 | { | 4198 | { |
3706 | kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); | 4199 | kvm_emulate_wbinvd(emul_to_vcpu(ctxt)); |
3707 | kvm_x86_ops->fpu_activate(vcpu); | ||
3708 | return X86EMUL_CONTINUE; | ||
3709 | } | 4200 | } |
3710 | 4201 | ||
3711 | int emulator_get_dr(int dr, unsigned long *dest, struct kvm_vcpu *vcpu) | 4202 | int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) |
3712 | { | 4203 | { |
3713 | return _kvm_get_dr(vcpu, dr, dest); | 4204 | return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest); |
3714 | } | 4205 | } |
3715 | 4206 | ||
3716 | int emulator_set_dr(int dr, unsigned long value, struct kvm_vcpu *vcpu) | 4207 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) |
3717 | { | 4208 | { |
3718 | 4209 | ||
3719 | return __kvm_set_dr(vcpu, dr, value); | 4210 | return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value); |
3720 | } | 4211 | } |
3721 | 4212 | ||
3722 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) | 4213 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) |
@@ -3724,8 +4215,9 @@ static u64 mk_cr_64(u64 curr_cr, u32 new_val) | |||
3724 | return (curr_cr & ~((1ULL << 32) - 1)) | new_val; | 4215 | return (curr_cr & ~((1ULL << 32) - 1)) | new_val; |
3725 | } | 4216 | } |
3726 | 4217 | ||
3727 | static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) | 4218 | static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr) |
3728 | { | 4219 | { |
4220 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
3729 | unsigned long value; | 4221 | unsigned long value; |
3730 | 4222 | ||
3731 | switch (cr) { | 4223 | switch (cr) { |
@@ -3736,7 +4228,7 @@ static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) | |||
3736 | value = vcpu->arch.cr2; | 4228 | value = vcpu->arch.cr2; |
3737 | break; | 4229 | break; |
3738 | case 3: | 4230 | case 3: |
3739 | value = vcpu->arch.cr3; | 4231 | value = kvm_read_cr3(vcpu); |
3740 | break; | 4232 | break; |
3741 | case 4: | 4233 | case 4: |
3742 | value = kvm_read_cr4(vcpu); | 4234 | value = kvm_read_cr4(vcpu); |
@@ -3752,8 +4244,9 @@ static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) | |||
3752 | return value; | 4244 | return value; |
3753 | } | 4245 | } |
3754 | 4246 | ||
3755 | static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) | 4247 | static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val) |
3756 | { | 4248 | { |
4249 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
3757 | int res = 0; | 4250 | int res = 0; |
3758 | 4251 | ||
3759 | switch (cr) { | 4252 | switch (cr) { |
@@ -3770,7 +4263,7 @@ static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) | |||
3770 | res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); | 4263 | res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); |
3771 | break; | 4264 | break; |
3772 | case 8: | 4265 | case 8: |
3773 | res = __kvm_set_cr8(vcpu, val & 0xfUL); | 4266 | res = kvm_set_cr8(vcpu, val); |
3774 | break; | 4267 | break; |
3775 | default: | 4268 | default: |
3776 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | 4269 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); |
@@ -3780,28 +4273,45 @@ static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) | |||
3780 | return res; | 4273 | return res; |
3781 | } | 4274 | } |
3782 | 4275 | ||
3783 | static int emulator_get_cpl(struct kvm_vcpu *vcpu) | 4276 | static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt) |
3784 | { | 4277 | { |
3785 | return kvm_x86_ops->get_cpl(vcpu); | 4278 | return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt)); |
3786 | } | 4279 | } |
3787 | 4280 | ||
3788 | static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) | 4281 | static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) |
3789 | { | 4282 | { |
3790 | kvm_x86_ops->get_gdt(vcpu, dt); | 4283 | kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt); |
3791 | } | 4284 | } |
3792 | 4285 | ||
3793 | static unsigned long emulator_get_cached_segment_base(int seg, | 4286 | static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) |
3794 | struct kvm_vcpu *vcpu) | ||
3795 | { | 4287 | { |
3796 | return get_segment_base(vcpu, seg); | 4288 | kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt); |
3797 | } | 4289 | } |
3798 | 4290 | ||
3799 | static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg, | 4291 | static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) |
3800 | struct kvm_vcpu *vcpu) | 4292 | { |
4293 | kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt); | ||
4294 | } | ||
4295 | |||
4296 | static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt) | ||
4297 | { | ||
4298 | kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt); | ||
4299 | } | ||
4300 | |||
4301 | static unsigned long emulator_get_cached_segment_base( | ||
4302 | struct x86_emulate_ctxt *ctxt, int seg) | ||
4303 | { | ||
4304 | return get_segment_base(emul_to_vcpu(ctxt), seg); | ||
4305 | } | ||
4306 | |||
4307 | static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector, | ||
4308 | struct desc_struct *desc, u32 *base3, | ||
4309 | int seg) | ||
3801 | { | 4310 | { |
3802 | struct kvm_segment var; | 4311 | struct kvm_segment var; |
3803 | 4312 | ||
3804 | kvm_get_segment(vcpu, &var, seg); | 4313 | kvm_get_segment(emul_to_vcpu(ctxt), &var, seg); |
4314 | *selector = var.selector; | ||
3805 | 4315 | ||
3806 | if (var.unusable) | 4316 | if (var.unusable) |
3807 | return false; | 4317 | return false; |
@@ -3810,6 +4320,10 @@ static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg, | |||
3810 | var.limit >>= 12; | 4320 | var.limit >>= 12; |
3811 | set_desc_limit(desc, var.limit); | 4321 | set_desc_limit(desc, var.limit); |
3812 | set_desc_base(desc, (unsigned long)var.base); | 4322 | set_desc_base(desc, (unsigned long)var.base); |
4323 | #ifdef CONFIG_X86_64 | ||
4324 | if (base3) | ||
4325 | *base3 = var.base >> 32; | ||
4326 | #endif | ||
3813 | desc->type = var.type; | 4327 | desc->type = var.type; |
3814 | desc->s = var.s; | 4328 | desc->s = var.s; |
3815 | desc->dpl = var.dpl; | 4329 | desc->dpl = var.dpl; |
@@ -3822,15 +4336,18 @@ static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg, | |||
3822 | return true; | 4336 | return true; |
3823 | } | 4337 | } |
3824 | 4338 | ||
3825 | static void emulator_set_cached_descriptor(struct desc_struct *desc, int seg, | 4339 | static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector, |
3826 | struct kvm_vcpu *vcpu) | 4340 | struct desc_struct *desc, u32 base3, |
4341 | int seg) | ||
3827 | { | 4342 | { |
4343 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
3828 | struct kvm_segment var; | 4344 | struct kvm_segment var; |
3829 | 4345 | ||
3830 | /* needed to preserve selector */ | 4346 | var.selector = selector; |
3831 | kvm_get_segment(vcpu, &var, seg); | ||
3832 | |||
3833 | var.base = get_desc_base(desc); | 4347 | var.base = get_desc_base(desc); |
4348 | #ifdef CONFIG_X86_64 | ||
4349 | var.base |= ((u64)base3) << 32; | ||
4350 | #endif | ||
3834 | var.limit = get_desc_limit(desc); | 4351 | var.limit = get_desc_limit(desc); |
3835 | if (desc->g) | 4352 | if (desc->g) |
3836 | var.limit = (var.limit << 12) | 0xfff; | 4353 | var.limit = (var.limit << 12) | 0xfff; |
@@ -3850,22 +4367,44 @@ static void emulator_set_cached_descriptor(struct desc_struct *desc, int seg, | |||
3850 | return; | 4367 | return; |
3851 | } | 4368 | } |
3852 | 4369 | ||
3853 | static u16 emulator_get_segment_selector(int seg, struct kvm_vcpu *vcpu) | 4370 | static int emulator_get_msr(struct x86_emulate_ctxt *ctxt, |
4371 | u32 msr_index, u64 *pdata) | ||
3854 | { | 4372 | { |
3855 | struct kvm_segment kvm_seg; | 4373 | return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata); |
4374 | } | ||
3856 | 4375 | ||
3857 | kvm_get_segment(vcpu, &kvm_seg, seg); | 4376 | static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, |
3858 | return kvm_seg.selector; | 4377 | u32 msr_index, u64 data) |
4378 | { | ||
4379 | return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data); | ||
4380 | } | ||
4381 | |||
4382 | static void emulator_halt(struct x86_emulate_ctxt *ctxt) | ||
4383 | { | ||
4384 | emul_to_vcpu(ctxt)->arch.halt_request = 1; | ||
4385 | } | ||
4386 | |||
4387 | static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt) | ||
4388 | { | ||
4389 | preempt_disable(); | ||
4390 | kvm_load_guest_fpu(emul_to_vcpu(ctxt)); | ||
4391 | /* | ||
4392 | * CR0.TS may reference the host fpu state, not the guest fpu state, | ||
4393 | * so it may be clear at this point. | ||
4394 | */ | ||
4395 | clts(); | ||
3859 | } | 4396 | } |
3860 | 4397 | ||
3861 | static void emulator_set_segment_selector(u16 sel, int seg, | 4398 | static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt) |
3862 | struct kvm_vcpu *vcpu) | ||
3863 | { | 4399 | { |
3864 | struct kvm_segment kvm_seg; | 4400 | preempt_enable(); |
4401 | } | ||
3865 | 4402 | ||
3866 | kvm_get_segment(vcpu, &kvm_seg, seg); | 4403 | static int emulator_intercept(struct x86_emulate_ctxt *ctxt, |
3867 | kvm_seg.selector = sel; | 4404 | struct x86_instruction_info *info, |
3868 | kvm_set_segment(vcpu, &kvm_seg, seg); | 4405 | enum x86_intercept_stage stage) |
4406 | { | ||
4407 | return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage); | ||
3869 | } | 4408 | } |
3870 | 4409 | ||
3871 | static struct x86_emulate_ops emulate_ops = { | 4410 | static struct x86_emulate_ops emulate_ops = { |
@@ -3875,21 +4414,29 @@ static struct x86_emulate_ops emulate_ops = { | |||
3875 | .read_emulated = emulator_read_emulated, | 4414 | .read_emulated = emulator_read_emulated, |
3876 | .write_emulated = emulator_write_emulated, | 4415 | .write_emulated = emulator_write_emulated, |
3877 | .cmpxchg_emulated = emulator_cmpxchg_emulated, | 4416 | .cmpxchg_emulated = emulator_cmpxchg_emulated, |
4417 | .invlpg = emulator_invlpg, | ||
3878 | .pio_in_emulated = emulator_pio_in_emulated, | 4418 | .pio_in_emulated = emulator_pio_in_emulated, |
3879 | .pio_out_emulated = emulator_pio_out_emulated, | 4419 | .pio_out_emulated = emulator_pio_out_emulated, |
3880 | .get_cached_descriptor = emulator_get_cached_descriptor, | 4420 | .get_segment = emulator_get_segment, |
3881 | .set_cached_descriptor = emulator_set_cached_descriptor, | 4421 | .set_segment = emulator_set_segment, |
3882 | .get_segment_selector = emulator_get_segment_selector, | ||
3883 | .set_segment_selector = emulator_set_segment_selector, | ||
3884 | .get_cached_segment_base = emulator_get_cached_segment_base, | 4422 | .get_cached_segment_base = emulator_get_cached_segment_base, |
3885 | .get_gdt = emulator_get_gdt, | 4423 | .get_gdt = emulator_get_gdt, |
4424 | .get_idt = emulator_get_idt, | ||
4425 | .set_gdt = emulator_set_gdt, | ||
4426 | .set_idt = emulator_set_idt, | ||
3886 | .get_cr = emulator_get_cr, | 4427 | .get_cr = emulator_get_cr, |
3887 | .set_cr = emulator_set_cr, | 4428 | .set_cr = emulator_set_cr, |
3888 | .cpl = emulator_get_cpl, | 4429 | .cpl = emulator_get_cpl, |
3889 | .get_dr = emulator_get_dr, | 4430 | .get_dr = emulator_get_dr, |
3890 | .set_dr = emulator_set_dr, | 4431 | .set_dr = emulator_set_dr, |
3891 | .set_msr = kvm_set_msr, | 4432 | .set_msr = emulator_set_msr, |
3892 | .get_msr = kvm_get_msr, | 4433 | .get_msr = emulator_get_msr, |
4434 | .halt = emulator_halt, | ||
4435 | .wbinvd = emulator_wbinvd, | ||
4436 | .fix_hypercall = emulator_fix_hypercall, | ||
4437 | .get_fpu = emulator_get_fpu, | ||
4438 | .put_fpu = emulator_put_fpu, | ||
4439 | .intercept = emulator_intercept, | ||
3893 | }; | 4440 | }; |
3894 | 4441 | ||
3895 | static void cache_all_regs(struct kvm_vcpu *vcpu) | 4442 | static void cache_all_regs(struct kvm_vcpu *vcpu) |
@@ -3917,23 +4464,89 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) | |||
3917 | static void inject_emulated_exception(struct kvm_vcpu *vcpu) | 4464 | static void inject_emulated_exception(struct kvm_vcpu *vcpu) |
3918 | { | 4465 | { |
3919 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; | 4466 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; |
3920 | if (ctxt->exception == PF_VECTOR) | 4467 | if (ctxt->exception.vector == PF_VECTOR) |
3921 | kvm_inject_page_fault(vcpu, ctxt->cr2, ctxt->error_code); | 4468 | kvm_propagate_fault(vcpu, &ctxt->exception); |
3922 | else if (ctxt->error_code_valid) | 4469 | else if (ctxt->exception.error_code_valid) |
3923 | kvm_queue_exception_e(vcpu, ctxt->exception, ctxt->error_code); | 4470 | kvm_queue_exception_e(vcpu, ctxt->exception.vector, |
4471 | ctxt->exception.error_code); | ||
4472 | else | ||
4473 | kvm_queue_exception(vcpu, ctxt->exception.vector); | ||
4474 | } | ||
4475 | |||
4476 | static void init_emulate_ctxt(struct kvm_vcpu *vcpu) | ||
4477 | { | ||
4478 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; | ||
4479 | int cs_db, cs_l; | ||
4480 | |||
4481 | /* | ||
4482 | * TODO: fix emulate.c to use guest_read/write_register | ||
4483 | * instead of direct ->regs accesses, can save hundred cycles | ||
4484 | * on Intel for instructions that don't read/change RSP, for | ||
4485 | * for example. | ||
4486 | */ | ||
4487 | cache_all_regs(vcpu); | ||
4488 | |||
4489 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | ||
4490 | |||
4491 | vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); | ||
4492 | vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); | ||
4493 | vcpu->arch.emulate_ctxt.mode = | ||
4494 | (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : | ||
4495 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) | ||
4496 | ? X86EMUL_MODE_VM86 : cs_l | ||
4497 | ? X86EMUL_MODE_PROT64 : cs_db | ||
4498 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; | ||
4499 | vcpu->arch.emulate_ctxt.guest_mode = is_guest_mode(vcpu); | ||
4500 | memset(c, 0, sizeof(struct decode_cache)); | ||
4501 | memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); | ||
4502 | vcpu->arch.emulate_regs_need_sync_from_vcpu = false; | ||
4503 | } | ||
4504 | |||
4505 | int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) | ||
4506 | { | ||
4507 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; | ||
4508 | int ret; | ||
4509 | |||
4510 | init_emulate_ctxt(vcpu); | ||
4511 | |||
4512 | vcpu->arch.emulate_ctxt.decode.op_bytes = 2; | ||
4513 | vcpu->arch.emulate_ctxt.decode.ad_bytes = 2; | ||
4514 | vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip + | ||
4515 | inc_eip; | ||
4516 | ret = emulate_int_real(&vcpu->arch.emulate_ctxt, &emulate_ops, irq); | ||
4517 | |||
4518 | if (ret != X86EMUL_CONTINUE) | ||
4519 | return EMULATE_FAIL; | ||
4520 | |||
4521 | vcpu->arch.emulate_ctxt.eip = c->eip; | ||
4522 | memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); | ||
4523 | kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); | ||
4524 | kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); | ||
4525 | |||
4526 | if (irq == NMI_VECTOR) | ||
4527 | vcpu->arch.nmi_pending = false; | ||
3924 | else | 4528 | else |
3925 | kvm_queue_exception(vcpu, ctxt->exception); | 4529 | vcpu->arch.interrupt.pending = false; |
4530 | |||
4531 | return EMULATE_DONE; | ||
3926 | } | 4532 | } |
4533 | EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt); | ||
3927 | 4534 | ||
3928 | static int handle_emulation_failure(struct kvm_vcpu *vcpu) | 4535 | static int handle_emulation_failure(struct kvm_vcpu *vcpu) |
3929 | { | 4536 | { |
4537 | int r = EMULATE_DONE; | ||
4538 | |||
3930 | ++vcpu->stat.insn_emulation_fail; | 4539 | ++vcpu->stat.insn_emulation_fail; |
3931 | trace_kvm_emulate_insn_failed(vcpu); | 4540 | trace_kvm_emulate_insn_failed(vcpu); |
3932 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | 4541 | if (!is_guest_mode(vcpu)) { |
3933 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | 4542 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
3934 | vcpu->run->internal.ndata = 0; | 4543 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; |
4544 | vcpu->run->internal.ndata = 0; | ||
4545 | r = EMULATE_FAIL; | ||
4546 | } | ||
3935 | kvm_queue_exception(vcpu, UD_VECTOR); | 4547 | kvm_queue_exception(vcpu, UD_VECTOR); |
3936 | return EMULATE_FAIL; | 4548 | |
4549 | return r; | ||
3937 | } | 4550 | } |
3938 | 4551 | ||
3939 | static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) | 4552 | static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) |
@@ -3962,74 +4575,34 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) | |||
3962 | return false; | 4575 | return false; |
3963 | } | 4576 | } |
3964 | 4577 | ||
3965 | int emulate_instruction(struct kvm_vcpu *vcpu, | 4578 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, |
3966 | unsigned long cr2, | 4579 | unsigned long cr2, |
3967 | u16 error_code, | 4580 | int emulation_type, |
3968 | int emulation_type) | 4581 | void *insn, |
4582 | int insn_len) | ||
3969 | { | 4583 | { |
3970 | int r; | 4584 | int r; |
3971 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; | 4585 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; |
4586 | bool writeback = true; | ||
3972 | 4587 | ||
3973 | kvm_clear_exception_queue(vcpu); | 4588 | kvm_clear_exception_queue(vcpu); |
3974 | vcpu->arch.mmio_fault_cr2 = cr2; | ||
3975 | /* | ||
3976 | * TODO: fix emulate.c to use guest_read/write_register | ||
3977 | * instead of direct ->regs accesses, can save hundred cycles | ||
3978 | * on Intel for instructions that don't read/change RSP, for | ||
3979 | * for example. | ||
3980 | */ | ||
3981 | cache_all_regs(vcpu); | ||
3982 | 4589 | ||
3983 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { | 4590 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { |
3984 | int cs_db, cs_l; | 4591 | init_emulate_ctxt(vcpu); |
3985 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | ||
3986 | |||
3987 | vcpu->arch.emulate_ctxt.vcpu = vcpu; | ||
3988 | vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); | ||
3989 | vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); | ||
3990 | vcpu->arch.emulate_ctxt.mode = | ||
3991 | (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : | ||
3992 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) | ||
3993 | ? X86EMUL_MODE_VM86 : cs_l | ||
3994 | ? X86EMUL_MODE_PROT64 : cs_db | ||
3995 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; | ||
3996 | memset(c, 0, sizeof(struct decode_cache)); | ||
3997 | memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); | ||
3998 | vcpu->arch.emulate_ctxt.interruptibility = 0; | 4592 | vcpu->arch.emulate_ctxt.interruptibility = 0; |
3999 | vcpu->arch.emulate_ctxt.exception = -1; | 4593 | vcpu->arch.emulate_ctxt.have_exception = false; |
4000 | 4594 | vcpu->arch.emulate_ctxt.perm_ok = false; | |
4001 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); | ||
4002 | trace_kvm_emulate_insn_start(vcpu); | ||
4003 | 4595 | ||
4004 | /* Only allow emulation of specific instructions on #UD | 4596 | vcpu->arch.emulate_ctxt.only_vendor_specific_insn |
4005 | * (namely VMMCALL, sysenter, sysexit, syscall)*/ | 4597 | = emulation_type & EMULTYPE_TRAP_UD; |
4006 | if (emulation_type & EMULTYPE_TRAP_UD) { | ||
4007 | if (!c->twobyte) | ||
4008 | return EMULATE_FAIL; | ||
4009 | switch (c->b) { | ||
4010 | case 0x01: /* VMMCALL */ | ||
4011 | if (c->modrm_mod != 3 || c->modrm_rm != 1) | ||
4012 | return EMULATE_FAIL; | ||
4013 | break; | ||
4014 | case 0x34: /* sysenter */ | ||
4015 | case 0x35: /* sysexit */ | ||
4016 | if (c->modrm_mod != 0 || c->modrm_rm != 0) | ||
4017 | return EMULATE_FAIL; | ||
4018 | break; | ||
4019 | case 0x05: /* syscall */ | ||
4020 | if (c->modrm_mod != 0 || c->modrm_rm != 0) | ||
4021 | return EMULATE_FAIL; | ||
4022 | break; | ||
4023 | default: | ||
4024 | return EMULATE_FAIL; | ||
4025 | } | ||
4026 | 4598 | ||
4027 | if (!(c->modrm_reg == 0 || c->modrm_reg == 3)) | 4599 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt, insn, insn_len); |
4028 | return EMULATE_FAIL; | ||
4029 | } | ||
4030 | 4600 | ||
4601 | trace_kvm_emulate_insn_start(vcpu); | ||
4031 | ++vcpu->stat.insn_emulation; | 4602 | ++vcpu->stat.insn_emulation; |
4032 | if (r) { | 4603 | if (r) { |
4604 | if (emulation_type & EMULTYPE_TRAP_UD) | ||
4605 | return EMULATE_FAIL; | ||
4033 | if (reexecute_instruction(vcpu, cr2)) | 4606 | if (reexecute_instruction(vcpu, cr2)) |
4034 | return EMULATE_DONE; | 4607 | return EMULATE_DONE; |
4035 | if (emulation_type & EMULTYPE_SKIP) | 4608 | if (emulation_type & EMULTYPE_SKIP) |
@@ -4043,62 +4616,87 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
4043 | return EMULATE_DONE; | 4616 | return EMULATE_DONE; |
4044 | } | 4617 | } |
4045 | 4618 | ||
4046 | /* this is needed for vmware backdor interface to work since it | 4619 | /* this is needed for vmware backdoor interface to work since it |
4047 | changes registers values during IO operation */ | 4620 | changes registers values during IO operation */ |
4048 | memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); | 4621 | if (vcpu->arch.emulate_regs_need_sync_from_vcpu) { |
4622 | vcpu->arch.emulate_regs_need_sync_from_vcpu = false; | ||
4623 | memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); | ||
4624 | } | ||
4049 | 4625 | ||
4050 | restart: | 4626 | restart: |
4051 | r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); | 4627 | r = x86_emulate_insn(&vcpu->arch.emulate_ctxt); |
4628 | |||
4629 | if (r == EMULATION_INTERCEPTED) | ||
4630 | return EMULATE_DONE; | ||
4052 | 4631 | ||
4053 | if (r) { /* emulation failed */ | 4632 | if (r == EMULATION_FAILED) { |
4054 | if (reexecute_instruction(vcpu, cr2)) | 4633 | if (reexecute_instruction(vcpu, cr2)) |
4055 | return EMULATE_DONE; | 4634 | return EMULATE_DONE; |
4056 | 4635 | ||
4057 | return handle_emulation_failure(vcpu); | 4636 | return handle_emulation_failure(vcpu); |
4058 | } | 4637 | } |
4059 | 4638 | ||
4060 | toggle_interruptibility(vcpu, vcpu->arch.emulate_ctxt.interruptibility); | 4639 | if (vcpu->arch.emulate_ctxt.have_exception) { |
4061 | kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); | ||
4062 | memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); | ||
4063 | kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); | ||
4064 | |||
4065 | if (vcpu->arch.emulate_ctxt.exception >= 0) { | ||
4066 | inject_emulated_exception(vcpu); | 4640 | inject_emulated_exception(vcpu); |
4067 | return EMULATE_DONE; | 4641 | r = EMULATE_DONE; |
4068 | } | 4642 | } else if (vcpu->arch.pio.count) { |
4069 | |||
4070 | if (vcpu->arch.pio.count) { | ||
4071 | if (!vcpu->arch.pio.in) | 4643 | if (!vcpu->arch.pio.in) |
4072 | vcpu->arch.pio.count = 0; | 4644 | vcpu->arch.pio.count = 0; |
4073 | return EMULATE_DO_MMIO; | 4645 | else |
4074 | } | 4646 | writeback = false; |
4075 | 4647 | r = EMULATE_DO_MMIO; | |
4076 | if (vcpu->mmio_needed) { | 4648 | } else if (vcpu->mmio_needed) { |
4077 | if (vcpu->mmio_is_write) | 4649 | if (!vcpu->mmio_is_write) |
4078 | vcpu->mmio_needed = 0; | 4650 | writeback = false; |
4079 | return EMULATE_DO_MMIO; | 4651 | r = EMULATE_DO_MMIO; |
4080 | } | 4652 | } else if (r == EMULATION_RESTART) |
4081 | |||
4082 | if (vcpu->arch.emulate_ctxt.restart) | ||
4083 | goto restart; | 4653 | goto restart; |
4654 | else | ||
4655 | r = EMULATE_DONE; | ||
4656 | |||
4657 | if (writeback) { | ||
4658 | toggle_interruptibility(vcpu, | ||
4659 | vcpu->arch.emulate_ctxt.interruptibility); | ||
4660 | kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); | ||
4661 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
4662 | memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); | ||
4663 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; | ||
4664 | kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); | ||
4665 | } else | ||
4666 | vcpu->arch.emulate_regs_need_sync_to_vcpu = true; | ||
4084 | 4667 | ||
4085 | return EMULATE_DONE; | 4668 | return r; |
4086 | } | 4669 | } |
4087 | EXPORT_SYMBOL_GPL(emulate_instruction); | 4670 | EXPORT_SYMBOL_GPL(x86_emulate_instruction); |
4088 | 4671 | ||
4089 | int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) | 4672 | int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) |
4090 | { | 4673 | { |
4091 | unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); | 4674 | unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); |
4092 | int ret = emulator_pio_out_emulated(size, port, &val, 1, vcpu); | 4675 | int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt, |
4676 | size, port, &val, 1); | ||
4093 | /* do not return to emulator after return from userspace */ | 4677 | /* do not return to emulator after return from userspace */ |
4094 | vcpu->arch.pio.count = 0; | 4678 | vcpu->arch.pio.count = 0; |
4095 | return ret; | 4679 | return ret; |
4096 | } | 4680 | } |
4097 | EXPORT_SYMBOL_GPL(kvm_fast_pio_out); | 4681 | EXPORT_SYMBOL_GPL(kvm_fast_pio_out); |
4098 | 4682 | ||
4099 | static void bounce_off(void *info) | 4683 | static void tsc_bad(void *info) |
4100 | { | 4684 | { |
4101 | /* nothing */ | 4685 | __this_cpu_write(cpu_tsc_khz, 0); |
4686 | } | ||
4687 | |||
4688 | static void tsc_khz_changed(void *data) | ||
4689 | { | ||
4690 | struct cpufreq_freqs *freq = data; | ||
4691 | unsigned long khz = 0; | ||
4692 | |||
4693 | if (data) | ||
4694 | khz = freq->new; | ||
4695 | else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | ||
4696 | khz = cpufreq_quick_get(raw_smp_processor_id()); | ||
4697 | if (!khz) | ||
4698 | khz = tsc_khz; | ||
4699 | __this_cpu_write(cpu_tsc_khz, khz); | ||
4102 | } | 4700 | } |
4103 | 4701 | ||
4104 | static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | 4702 | static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, |
@@ -4109,24 +4707,63 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va | |||
4109 | struct kvm_vcpu *vcpu; | 4707 | struct kvm_vcpu *vcpu; |
4110 | int i, send_ipi = 0; | 4708 | int i, send_ipi = 0; |
4111 | 4709 | ||
4710 | /* | ||
4711 | * We allow guests to temporarily run on slowing clocks, | ||
4712 | * provided we notify them after, or to run on accelerating | ||
4713 | * clocks, provided we notify them before. Thus time never | ||
4714 | * goes backwards. | ||
4715 | * | ||
4716 | * However, we have a problem. We can't atomically update | ||
4717 | * the frequency of a given CPU from this function; it is | ||
4718 | * merely a notifier, which can be called from any CPU. | ||
4719 | * Changing the TSC frequency at arbitrary points in time | ||
4720 | * requires a recomputation of local variables related to | ||
4721 | * the TSC for each VCPU. We must flag these local variables | ||
4722 | * to be updated and be sure the update takes place with the | ||
4723 | * new frequency before any guests proceed. | ||
4724 | * | ||
4725 | * Unfortunately, the combination of hotplug CPU and frequency | ||
4726 | * change creates an intractable locking scenario; the order | ||
4727 | * of when these callouts happen is undefined with respect to | ||
4728 | * CPU hotplug, and they can race with each other. As such, | ||
4729 | * merely setting per_cpu(cpu_tsc_khz) = X during a hotadd is | ||
4730 | * undefined; you can actually have a CPU frequency change take | ||
4731 | * place in between the computation of X and the setting of the | ||
4732 | * variable. To protect against this problem, all updates of | ||
4733 | * the per_cpu tsc_khz variable are done in an interrupt | ||
4734 | * protected IPI, and all callers wishing to update the value | ||
4735 | * must wait for a synchronous IPI to complete (which is trivial | ||
4736 | * if the caller is on the CPU already). This establishes the | ||
4737 | * necessary total order on variable updates. | ||
4738 | * | ||
4739 | * Note that because a guest time update may take place | ||
4740 | * anytime after the setting of the VCPU's request bit, the | ||
4741 | * correct TSC value must be set before the request. However, | ||
4742 | * to ensure the update actually makes it to any guest which | ||
4743 | * starts running in hardware virtualization between the set | ||
4744 | * and the acquisition of the spinlock, we must also ping the | ||
4745 | * CPU after setting the request bit. | ||
4746 | * | ||
4747 | */ | ||
4748 | |||
4112 | if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) | 4749 | if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) |
4113 | return 0; | 4750 | return 0; |
4114 | if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) | 4751 | if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) |
4115 | return 0; | 4752 | return 0; |
4116 | per_cpu(cpu_tsc_khz, freq->cpu) = freq->new; | ||
4117 | 4753 | ||
4118 | spin_lock(&kvm_lock); | 4754 | smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); |
4755 | |||
4756 | raw_spin_lock(&kvm_lock); | ||
4119 | list_for_each_entry(kvm, &vm_list, vm_list) { | 4757 | list_for_each_entry(kvm, &vm_list, vm_list) { |
4120 | kvm_for_each_vcpu(i, vcpu, kvm) { | 4758 | kvm_for_each_vcpu(i, vcpu, kvm) { |
4121 | if (vcpu->cpu != freq->cpu) | 4759 | if (vcpu->cpu != freq->cpu) |
4122 | continue; | 4760 | continue; |
4123 | if (!kvm_request_guest_time_update(vcpu)) | 4761 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); |
4124 | continue; | ||
4125 | if (vcpu->cpu != smp_processor_id()) | 4762 | if (vcpu->cpu != smp_processor_id()) |
4126 | send_ipi++; | 4763 | send_ipi = 1; |
4127 | } | 4764 | } |
4128 | } | 4765 | } |
4129 | spin_unlock(&kvm_lock); | 4766 | raw_spin_unlock(&kvm_lock); |
4130 | 4767 | ||
4131 | if (freq->old < freq->new && send_ipi) { | 4768 | if (freq->old < freq->new && send_ipi) { |
4132 | /* | 4769 | /* |
@@ -4141,32 +4778,59 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va | |||
4141 | * guest context is entered kvmclock will be updated, | 4778 | * guest context is entered kvmclock will be updated, |
4142 | * so the guest will not see stale values. | 4779 | * so the guest will not see stale values. |
4143 | */ | 4780 | */ |
4144 | smp_call_function_single(freq->cpu, bounce_off, NULL, 1); | 4781 | smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); |
4145 | } | 4782 | } |
4146 | return 0; | 4783 | return 0; |
4147 | } | 4784 | } |
4148 | 4785 | ||
4149 | static struct notifier_block kvmclock_cpufreq_notifier_block = { | 4786 | static struct notifier_block kvmclock_cpufreq_notifier_block = { |
4150 | .notifier_call = kvmclock_cpufreq_notifier | 4787 | .notifier_call = kvmclock_cpufreq_notifier |
4788 | }; | ||
4789 | |||
4790 | static int kvmclock_cpu_notifier(struct notifier_block *nfb, | ||
4791 | unsigned long action, void *hcpu) | ||
4792 | { | ||
4793 | unsigned int cpu = (unsigned long)hcpu; | ||
4794 | |||
4795 | switch (action) { | ||
4796 | case CPU_ONLINE: | ||
4797 | case CPU_DOWN_FAILED: | ||
4798 | smp_call_function_single(cpu, tsc_khz_changed, NULL, 1); | ||
4799 | break; | ||
4800 | case CPU_DOWN_PREPARE: | ||
4801 | smp_call_function_single(cpu, tsc_bad, NULL, 1); | ||
4802 | break; | ||
4803 | } | ||
4804 | return NOTIFY_OK; | ||
4805 | } | ||
4806 | |||
4807 | static struct notifier_block kvmclock_cpu_notifier_block = { | ||
4808 | .notifier_call = kvmclock_cpu_notifier, | ||
4809 | .priority = -INT_MAX | ||
4151 | }; | 4810 | }; |
4152 | 4811 | ||
4153 | static void kvm_timer_init(void) | 4812 | static void kvm_timer_init(void) |
4154 | { | 4813 | { |
4155 | int cpu; | 4814 | int cpu; |
4156 | 4815 | ||
4816 | max_tsc_khz = tsc_khz; | ||
4817 | register_hotcpu_notifier(&kvmclock_cpu_notifier_block); | ||
4157 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { | 4818 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { |
4819 | #ifdef CONFIG_CPU_FREQ | ||
4820 | struct cpufreq_policy policy; | ||
4821 | memset(&policy, 0, sizeof(policy)); | ||
4822 | cpu = get_cpu(); | ||
4823 | cpufreq_get_policy(&policy, cpu); | ||
4824 | if (policy.cpuinfo.max_freq) | ||
4825 | max_tsc_khz = policy.cpuinfo.max_freq; | ||
4826 | put_cpu(); | ||
4827 | #endif | ||
4158 | cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, | 4828 | cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, |
4159 | CPUFREQ_TRANSITION_NOTIFIER); | 4829 | CPUFREQ_TRANSITION_NOTIFIER); |
4160 | for_each_online_cpu(cpu) { | ||
4161 | unsigned long khz = cpufreq_get(cpu); | ||
4162 | if (!khz) | ||
4163 | khz = tsc_khz; | ||
4164 | per_cpu(cpu_tsc_khz, cpu) = khz; | ||
4165 | } | ||
4166 | } else { | ||
4167 | for_each_possible_cpu(cpu) | ||
4168 | per_cpu(cpu_tsc_khz, cpu) = tsc_khz; | ||
4169 | } | 4830 | } |
4831 | pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz); | ||
4832 | for_each_online_cpu(cpu) | ||
4833 | smp_call_function_single(cpu, tsc_khz_changed, NULL, 1); | ||
4170 | } | 4834 | } |
4171 | 4835 | ||
4172 | static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); | 4836 | static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); |
@@ -4244,7 +4908,6 @@ int kvm_arch_init(void *opaque) | |||
4244 | 4908 | ||
4245 | kvm_x86_ops = ops; | 4909 | kvm_x86_ops = ops; |
4246 | kvm_mmu_set_nonpresent_ptes(0ull, 0ull); | 4910 | kvm_mmu_set_nonpresent_ptes(0ull, 0ull); |
4247 | kvm_mmu_set_base_ptes(PT_PRESENT_MASK); | ||
4248 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, | 4911 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, |
4249 | PT_DIRTY_MASK, PT64_NX_MASK, 0); | 4912 | PT_DIRTY_MASK, PT64_NX_MASK, 0); |
4250 | 4913 | ||
@@ -4268,6 +4931,7 @@ void kvm_arch_exit(void) | |||
4268 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | 4931 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) |
4269 | cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, | 4932 | cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, |
4270 | CPUFREQ_TRANSITION_NOTIFIER); | 4933 | CPUFREQ_TRANSITION_NOTIFIER); |
4934 | unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block); | ||
4271 | kvm_x86_ops = NULL; | 4935 | kvm_x86_ops = NULL; |
4272 | kvm_mmu_module_exit(); | 4936 | kvm_mmu_module_exit(); |
4273 | } | 4937 | } |
@@ -4403,8 +5067,9 @@ out: | |||
4403 | } | 5067 | } |
4404 | EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); | 5068 | EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); |
4405 | 5069 | ||
4406 | int kvm_fix_hypercall(struct kvm_vcpu *vcpu) | 5070 | int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) |
4407 | { | 5071 | { |
5072 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | ||
4408 | char instruction[3]; | 5073 | char instruction[3]; |
4409 | unsigned long rip = kvm_rip_read(vcpu); | 5074 | unsigned long rip = kvm_rip_read(vcpu); |
4410 | 5075 | ||
@@ -4417,21 +5082,8 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) | |||
4417 | 5082 | ||
4418 | kvm_x86_ops->patch_hypercall(vcpu, instruction); | 5083 | kvm_x86_ops->patch_hypercall(vcpu, instruction); |
4419 | 5084 | ||
4420 | return emulator_write_emulated(rip, instruction, 3, NULL, vcpu); | 5085 | return emulator_write_emulated(&vcpu->arch.emulate_ctxt, |
4421 | } | 5086 | rip, instruction, 3, NULL); |
4422 | |||
4423 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) | ||
4424 | { | ||
4425 | struct desc_ptr dt = { limit, base }; | ||
4426 | |||
4427 | kvm_x86_ops->set_gdt(vcpu, &dt); | ||
4428 | } | ||
4429 | |||
4430 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) | ||
4431 | { | ||
4432 | struct desc_ptr dt = { limit, base }; | ||
4433 | |||
4434 | kvm_x86_ops->set_idt(vcpu, &dt); | ||
4435 | } | 5087 | } |
4436 | 5088 | ||
4437 | static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) | 5089 | static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) |
@@ -4482,12 +5134,6 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, | |||
4482 | best = e; | 5134 | best = e; |
4483 | break; | 5135 | break; |
4484 | } | 5136 | } |
4485 | /* | ||
4486 | * Both basic or both extended? | ||
4487 | */ | ||
4488 | if (((e->function ^ function) & 0x80000000) == 0) | ||
4489 | if (!best || e->function > best->function) | ||
4490 | best = e; | ||
4491 | } | 5137 | } |
4492 | return best; | 5138 | return best; |
4493 | } | 5139 | } |
@@ -4507,6 +5153,27 @@ not_found: | |||
4507 | return 36; | 5153 | return 36; |
4508 | } | 5154 | } |
4509 | 5155 | ||
5156 | /* | ||
5157 | * If no match is found, check whether we exceed the vCPU's limit | ||
5158 | * and return the content of the highest valid _standard_ leaf instead. | ||
5159 | * This is to satisfy the CPUID specification. | ||
5160 | */ | ||
5161 | static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu, | ||
5162 | u32 function, u32 index) | ||
5163 | { | ||
5164 | struct kvm_cpuid_entry2 *maxlevel; | ||
5165 | |||
5166 | maxlevel = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0); | ||
5167 | if (!maxlevel || maxlevel->eax >= function) | ||
5168 | return NULL; | ||
5169 | if (function & 0x80000000) { | ||
5170 | maxlevel = kvm_find_cpuid_entry(vcpu, 0, 0); | ||
5171 | if (!maxlevel) | ||
5172 | return NULL; | ||
5173 | } | ||
5174 | return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index); | ||
5175 | } | ||
5176 | |||
4510 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) | 5177 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) |
4511 | { | 5178 | { |
4512 | u32 function, index; | 5179 | u32 function, index; |
@@ -4519,6 +5186,10 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) | |||
4519 | kvm_register_write(vcpu, VCPU_REGS_RCX, 0); | 5186 | kvm_register_write(vcpu, VCPU_REGS_RCX, 0); |
4520 | kvm_register_write(vcpu, VCPU_REGS_RDX, 0); | 5187 | kvm_register_write(vcpu, VCPU_REGS_RDX, 0); |
4521 | best = kvm_find_cpuid_entry(vcpu, function, index); | 5188 | best = kvm_find_cpuid_entry(vcpu, function, index); |
5189 | |||
5190 | if (!best) | ||
5191 | best = check_cpuid_limit(vcpu, function, index); | ||
5192 | |||
4522 | if (best) { | 5193 | if (best) { |
4523 | kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax); | 5194 | kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax); |
4524 | kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx); | 5195 | kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx); |
@@ -4675,6 +5346,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) | |||
4675 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | 5346 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) |
4676 | { | 5347 | { |
4677 | int r; | 5348 | int r; |
5349 | bool nmi_pending; | ||
4678 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && | 5350 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && |
4679 | vcpu->run->request_interrupt_window; | 5351 | vcpu->run->request_interrupt_window; |
4680 | 5352 | ||
@@ -4683,8 +5355,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
4683 | kvm_mmu_unload(vcpu); | 5355 | kvm_mmu_unload(vcpu); |
4684 | if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) | 5356 | if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) |
4685 | __kvm_migrate_timers(vcpu); | 5357 | __kvm_migrate_timers(vcpu); |
4686 | if (kvm_check_request(KVM_REQ_KVMCLOCK_UPDATE, vcpu)) | 5358 | if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) { |
4687 | kvm_write_guest_time(vcpu); | 5359 | r = kvm_guest_time_update(vcpu); |
5360 | if (unlikely(r)) | ||
5361 | goto out; | ||
5362 | } | ||
4688 | if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu)) | 5363 | if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu)) |
4689 | kvm_mmu_sync_roots(vcpu); | 5364 | kvm_mmu_sync_roots(vcpu); |
4690 | if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) | 5365 | if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) |
@@ -4703,12 +5378,41 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
4703 | vcpu->fpu_active = 0; | 5378 | vcpu->fpu_active = 0; |
4704 | kvm_x86_ops->fpu_deactivate(vcpu); | 5379 | kvm_x86_ops->fpu_deactivate(vcpu); |
4705 | } | 5380 | } |
5381 | if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) { | ||
5382 | /* Page is swapped out. Do synthetic halt */ | ||
5383 | vcpu->arch.apf.halted = true; | ||
5384 | r = 1; | ||
5385 | goto out; | ||
5386 | } | ||
4706 | } | 5387 | } |
4707 | 5388 | ||
4708 | r = kvm_mmu_reload(vcpu); | 5389 | r = kvm_mmu_reload(vcpu); |
4709 | if (unlikely(r)) | 5390 | if (unlikely(r)) |
4710 | goto out; | 5391 | goto out; |
4711 | 5392 | ||
5393 | /* | ||
5394 | * An NMI can be injected between local nmi_pending read and | ||
5395 | * vcpu->arch.nmi_pending read inside inject_pending_event(). | ||
5396 | * But in that case, KVM_REQ_EVENT will be set, which makes | ||
5397 | * the race described above benign. | ||
5398 | */ | ||
5399 | nmi_pending = ACCESS_ONCE(vcpu->arch.nmi_pending); | ||
5400 | |||
5401 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { | ||
5402 | inject_pending_event(vcpu); | ||
5403 | |||
5404 | /* enable NMI/IRQ window open exits if needed */ | ||
5405 | if (nmi_pending) | ||
5406 | kvm_x86_ops->enable_nmi_window(vcpu); | ||
5407 | else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) | ||
5408 | kvm_x86_ops->enable_irq_window(vcpu); | ||
5409 | |||
5410 | if (kvm_lapic_enabled(vcpu)) { | ||
5411 | update_cr8_intercept(vcpu); | ||
5412 | kvm_lapic_sync_to_vapic(vcpu); | ||
5413 | } | ||
5414 | } | ||
5415 | |||
4712 | preempt_disable(); | 5416 | preempt_disable(); |
4713 | 5417 | ||
4714 | kvm_x86_ops->prepare_guest_switch(vcpu); | 5418 | kvm_x86_ops->prepare_guest_switch(vcpu); |
@@ -4716,34 +5420,26 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
4716 | kvm_load_guest_fpu(vcpu); | 5420 | kvm_load_guest_fpu(vcpu); |
4717 | kvm_load_guest_xcr0(vcpu); | 5421 | kvm_load_guest_xcr0(vcpu); |
4718 | 5422 | ||
4719 | atomic_set(&vcpu->guest_mode, 1); | 5423 | vcpu->mode = IN_GUEST_MODE; |
4720 | smp_wmb(); | 5424 | |
5425 | /* We should set ->mode before check ->requests, | ||
5426 | * see the comment in make_all_cpus_request. | ||
5427 | */ | ||
5428 | smp_mb(); | ||
4721 | 5429 | ||
4722 | local_irq_disable(); | 5430 | local_irq_disable(); |
4723 | 5431 | ||
4724 | if (!atomic_read(&vcpu->guest_mode) || vcpu->requests | 5432 | if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests |
4725 | || need_resched() || signal_pending(current)) { | 5433 | || need_resched() || signal_pending(current)) { |
4726 | atomic_set(&vcpu->guest_mode, 0); | 5434 | vcpu->mode = OUTSIDE_GUEST_MODE; |
4727 | smp_wmb(); | 5435 | smp_wmb(); |
4728 | local_irq_enable(); | 5436 | local_irq_enable(); |
4729 | preempt_enable(); | 5437 | preempt_enable(); |
5438 | kvm_x86_ops->cancel_injection(vcpu); | ||
4730 | r = 1; | 5439 | r = 1; |
4731 | goto out; | 5440 | goto out; |
4732 | } | 5441 | } |
4733 | 5442 | ||
4734 | inject_pending_event(vcpu); | ||
4735 | |||
4736 | /* enable NMI/IRQ window open exits if needed */ | ||
4737 | if (vcpu->arch.nmi_pending) | ||
4738 | kvm_x86_ops->enable_nmi_window(vcpu); | ||
4739 | else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) | ||
4740 | kvm_x86_ops->enable_irq_window(vcpu); | ||
4741 | |||
4742 | if (kvm_lapic_enabled(vcpu)) { | ||
4743 | update_cr8_intercept(vcpu); | ||
4744 | kvm_lapic_sync_to_vapic(vcpu); | ||
4745 | } | ||
4746 | |||
4747 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | 5443 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
4748 | 5444 | ||
4749 | kvm_guest_enter(); | 5445 | kvm_guest_enter(); |
@@ -4769,7 +5465,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
4769 | if (hw_breakpoint_active()) | 5465 | if (hw_breakpoint_active()) |
4770 | hw_breakpoint_restore(); | 5466 | hw_breakpoint_restore(); |
4771 | 5467 | ||
4772 | atomic_set(&vcpu->guest_mode, 0); | 5468 | kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc); |
5469 | |||
5470 | vcpu->mode = OUTSIDE_GUEST_MODE; | ||
4773 | smp_wmb(); | 5471 | smp_wmb(); |
4774 | local_irq_enable(); | 5472 | local_irq_enable(); |
4775 | 5473 | ||
@@ -4826,7 +5524,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
4826 | 5524 | ||
4827 | r = 1; | 5525 | r = 1; |
4828 | while (r > 0) { | 5526 | while (r > 0) { |
4829 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) | 5527 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && |
5528 | !vcpu->arch.apf.halted) | ||
4830 | r = vcpu_enter_guest(vcpu); | 5529 | r = vcpu_enter_guest(vcpu); |
4831 | else { | 5530 | else { |
4832 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); | 5531 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
@@ -4839,6 +5538,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
4839 | vcpu->arch.mp_state = | 5538 | vcpu->arch.mp_state = |
4840 | KVM_MP_STATE_RUNNABLE; | 5539 | KVM_MP_STATE_RUNNABLE; |
4841 | case KVM_MP_STATE_RUNNABLE: | 5540 | case KVM_MP_STATE_RUNNABLE: |
5541 | vcpu->arch.apf.halted = false; | ||
4842 | break; | 5542 | break; |
4843 | case KVM_MP_STATE_SIPI_RECEIVED: | 5543 | case KVM_MP_STATE_SIPI_RECEIVED: |
4844 | default: | 5544 | default: |
@@ -4860,6 +5560,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
4860 | vcpu->run->exit_reason = KVM_EXIT_INTR; | 5560 | vcpu->run->exit_reason = KVM_EXIT_INTR; |
4861 | ++vcpu->stat.request_irq_exits; | 5561 | ++vcpu->stat.request_irq_exits; |
4862 | } | 5562 | } |
5563 | |||
5564 | kvm_check_async_pf_completion(vcpu); | ||
5565 | |||
4863 | if (signal_pending(current)) { | 5566 | if (signal_pending(current)) { |
4864 | r = -EINTR; | 5567 | r = -EINTR; |
4865 | vcpu->run->exit_reason = KVM_EXIT_INTR; | 5568 | vcpu->run->exit_reason = KVM_EXIT_INTR; |
@@ -4879,11 +5582,49 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
4879 | return r; | 5582 | return r; |
4880 | } | 5583 | } |
4881 | 5584 | ||
5585 | static int complete_mmio(struct kvm_vcpu *vcpu) | ||
5586 | { | ||
5587 | struct kvm_run *run = vcpu->run; | ||
5588 | int r; | ||
5589 | |||
5590 | if (!(vcpu->arch.pio.count || vcpu->mmio_needed)) | ||
5591 | return 1; | ||
5592 | |||
5593 | if (vcpu->mmio_needed) { | ||
5594 | vcpu->mmio_needed = 0; | ||
5595 | if (!vcpu->mmio_is_write) | ||
5596 | memcpy(vcpu->mmio_data + vcpu->mmio_index, | ||
5597 | run->mmio.data, 8); | ||
5598 | vcpu->mmio_index += 8; | ||
5599 | if (vcpu->mmio_index < vcpu->mmio_size) { | ||
5600 | run->exit_reason = KVM_EXIT_MMIO; | ||
5601 | run->mmio.phys_addr = vcpu->mmio_phys_addr + vcpu->mmio_index; | ||
5602 | memcpy(run->mmio.data, vcpu->mmio_data + vcpu->mmio_index, 8); | ||
5603 | run->mmio.len = min(vcpu->mmio_size - vcpu->mmio_index, 8); | ||
5604 | run->mmio.is_write = vcpu->mmio_is_write; | ||
5605 | vcpu->mmio_needed = 1; | ||
5606 | return 0; | ||
5607 | } | ||
5608 | if (vcpu->mmio_is_write) | ||
5609 | return 1; | ||
5610 | vcpu->mmio_read_completed = 1; | ||
5611 | } | ||
5612 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
5613 | r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); | ||
5614 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | ||
5615 | if (r != EMULATE_DONE) | ||
5616 | return 0; | ||
5617 | return 1; | ||
5618 | } | ||
5619 | |||
4882 | int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 5620 | int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
4883 | { | 5621 | { |
4884 | int r; | 5622 | int r; |
4885 | sigset_t sigsaved; | 5623 | sigset_t sigsaved; |
4886 | 5624 | ||
5625 | if (!tsk_used_math(current) && init_fpu(current)) | ||
5626 | return -ENOMEM; | ||
5627 | |||
4887 | if (vcpu->sigset_active) | 5628 | if (vcpu->sigset_active) |
4888 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); | 5629 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); |
4889 | 5630 | ||
@@ -4895,24 +5636,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
4895 | } | 5636 | } |
4896 | 5637 | ||
4897 | /* re-sync apic's tpr */ | 5638 | /* re-sync apic's tpr */ |
4898 | if (!irqchip_in_kernel(vcpu->kvm)) | 5639 | if (!irqchip_in_kernel(vcpu->kvm)) { |
4899 | kvm_set_cr8(vcpu, kvm_run->cr8); | 5640 | if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) { |
4900 | 5641 | r = -EINVAL; | |
4901 | if (vcpu->arch.pio.count || vcpu->mmio_needed || | ||
4902 | vcpu->arch.emulate_ctxt.restart) { | ||
4903 | if (vcpu->mmio_needed) { | ||
4904 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); | ||
4905 | vcpu->mmio_read_completed = 1; | ||
4906 | vcpu->mmio_needed = 0; | ||
4907 | } | ||
4908 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
4909 | r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE); | ||
4910 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | ||
4911 | if (r != EMULATE_DONE) { | ||
4912 | r = 0; | ||
4913 | goto out; | 5642 | goto out; |
4914 | } | 5643 | } |
4915 | } | 5644 | } |
5645 | |||
5646 | r = complete_mmio(vcpu); | ||
5647 | if (r <= 0) | ||
5648 | goto out; | ||
5649 | |||
4916 | if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) | 5650 | if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) |
4917 | kvm_register_write(vcpu, VCPU_REGS_RAX, | 5651 | kvm_register_write(vcpu, VCPU_REGS_RAX, |
4918 | kvm_run->hypercall.ret); | 5652 | kvm_run->hypercall.ret); |
@@ -4929,6 +5663,18 @@ out: | |||
4929 | 5663 | ||
4930 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | 5664 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) |
4931 | { | 5665 | { |
5666 | if (vcpu->arch.emulate_regs_need_sync_to_vcpu) { | ||
5667 | /* | ||
5668 | * We are here if userspace calls get_regs() in the middle of | ||
5669 | * instruction emulation. Registers state needs to be copied | ||
5670 | * back from emulation context to vcpu. Usrapace shouldn't do | ||
5671 | * that usually, but some bad designed PV devices (vmware | ||
5672 | * backdoor interface) need this to work | ||
5673 | */ | ||
5674 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; | ||
5675 | memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); | ||
5676 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; | ||
5677 | } | ||
4932 | regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); | 5678 | regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); |
4933 | regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX); | 5679 | regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX); |
4934 | regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX); | 5680 | regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX); |
@@ -4956,6 +5702,9 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
4956 | 5702 | ||
4957 | int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | 5703 | int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) |
4958 | { | 5704 | { |
5705 | vcpu->arch.emulate_regs_need_sync_from_vcpu = true; | ||
5706 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; | ||
5707 | |||
4959 | kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax); | 5708 | kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax); |
4960 | kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx); | 5709 | kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx); |
4961 | kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); | 5710 | kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); |
@@ -4980,6 +5729,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
4980 | 5729 | ||
4981 | vcpu->arch.exception.pending = false; | 5730 | vcpu->arch.exception.pending = false; |
4982 | 5731 | ||
5732 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
5733 | |||
4983 | return 0; | 5734 | return 0; |
4984 | } | 5735 | } |
4985 | 5736 | ||
@@ -5017,7 +5768,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
5017 | 5768 | ||
5018 | sregs->cr0 = kvm_read_cr0(vcpu); | 5769 | sregs->cr0 = kvm_read_cr0(vcpu); |
5019 | sregs->cr2 = vcpu->arch.cr2; | 5770 | sregs->cr2 = vcpu->arch.cr2; |
5020 | sregs->cr3 = vcpu->arch.cr3; | 5771 | sregs->cr3 = kvm_read_cr3(vcpu); |
5021 | sregs->cr4 = kvm_read_cr4(vcpu); | 5772 | sregs->cr4 = kvm_read_cr4(vcpu); |
5022 | sregs->cr8 = kvm_get_cr8(vcpu); | 5773 | sregs->cr8 = kvm_get_cr8(vcpu); |
5023 | sregs->efer = vcpu->arch.efer; | 5774 | sregs->efer = vcpu->arch.efer; |
@@ -5043,6 +5794,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | |||
5043 | struct kvm_mp_state *mp_state) | 5794 | struct kvm_mp_state *mp_state) |
5044 | { | 5795 | { |
5045 | vcpu->arch.mp_state = mp_state->mp_state; | 5796 | vcpu->arch.mp_state = mp_state->mp_state; |
5797 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
5046 | return 0; | 5798 | return 0; |
5047 | } | 5799 | } |
5048 | 5800 | ||
@@ -5050,24 +5802,11 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, | |||
5050 | bool has_error_code, u32 error_code) | 5802 | bool has_error_code, u32 error_code) |
5051 | { | 5803 | { |
5052 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; | 5804 | struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; |
5053 | int cs_db, cs_l, ret; | 5805 | int ret; |
5054 | cache_all_regs(vcpu); | ||
5055 | |||
5056 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | ||
5057 | 5806 | ||
5058 | vcpu->arch.emulate_ctxt.vcpu = vcpu; | 5807 | init_emulate_ctxt(vcpu); |
5059 | vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); | ||
5060 | vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); | ||
5061 | vcpu->arch.emulate_ctxt.mode = | ||
5062 | (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : | ||
5063 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) | ||
5064 | ? X86EMUL_MODE_VM86 : cs_l | ||
5065 | ? X86EMUL_MODE_PROT64 : cs_db | ||
5066 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; | ||
5067 | memset(c, 0, sizeof(struct decode_cache)); | ||
5068 | memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); | ||
5069 | 5808 | ||
5070 | ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, &emulate_ops, | 5809 | ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, |
5071 | tss_selector, reason, has_error_code, | 5810 | tss_selector, reason, has_error_code, |
5072 | error_code); | 5811 | error_code); |
5073 | 5812 | ||
@@ -5076,7 +5815,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, | |||
5076 | 5815 | ||
5077 | memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); | 5816 | memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); |
5078 | kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); | 5817 | kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); |
5079 | kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); | 5818 | kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); |
5819 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
5080 | return EMULATE_DONE; | 5820 | return EMULATE_DONE; |
5081 | } | 5821 | } |
5082 | EXPORT_SYMBOL_GPL(kvm_task_switch); | 5822 | EXPORT_SYMBOL_GPL(kvm_task_switch); |
@@ -5085,7 +5825,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
5085 | struct kvm_sregs *sregs) | 5825 | struct kvm_sregs *sregs) |
5086 | { | 5826 | { |
5087 | int mmu_reset_needed = 0; | 5827 | int mmu_reset_needed = 0; |
5088 | int pending_vec, max_bits; | 5828 | int pending_vec, max_bits, idx; |
5089 | struct desc_ptr dt; | 5829 | struct desc_ptr dt; |
5090 | 5830 | ||
5091 | dt.size = sregs->idt.limit; | 5831 | dt.size = sregs->idt.limit; |
@@ -5096,8 +5836,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
5096 | kvm_x86_ops->set_gdt(vcpu, &dt); | 5836 | kvm_x86_ops->set_gdt(vcpu, &dt); |
5097 | 5837 | ||
5098 | vcpu->arch.cr2 = sregs->cr2; | 5838 | vcpu->arch.cr2 = sregs->cr2; |
5099 | mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3; | 5839 | mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3; |
5100 | vcpu->arch.cr3 = sregs->cr3; | 5840 | vcpu->arch.cr3 = sregs->cr3; |
5841 | __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); | ||
5101 | 5842 | ||
5102 | kvm_set_cr8(vcpu, sregs->cr8); | 5843 | kvm_set_cr8(vcpu, sregs->cr8); |
5103 | 5844 | ||
@@ -5111,10 +5852,15 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
5111 | 5852 | ||
5112 | mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; | 5853 | mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; |
5113 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); | 5854 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); |
5855 | if (sregs->cr4 & X86_CR4_OSXSAVE) | ||
5856 | update_cpuid(vcpu); | ||
5857 | |||
5858 | idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
5114 | if (!is_long_mode(vcpu) && is_pae(vcpu)) { | 5859 | if (!is_long_mode(vcpu) && is_pae(vcpu)) { |
5115 | load_pdptrs(vcpu, vcpu->arch.cr3); | 5860 | load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)); |
5116 | mmu_reset_needed = 1; | 5861 | mmu_reset_needed = 1; |
5117 | } | 5862 | } |
5863 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | ||
5118 | 5864 | ||
5119 | if (mmu_reset_needed) | 5865 | if (mmu_reset_needed) |
5120 | kvm_mmu_reset_context(vcpu); | 5866 | kvm_mmu_reset_context(vcpu); |
@@ -5125,8 +5871,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
5125 | if (pending_vec < max_bits) { | 5871 | if (pending_vec < max_bits) { |
5126 | kvm_queue_interrupt(vcpu, pending_vec, false); | 5872 | kvm_queue_interrupt(vcpu, pending_vec, false); |
5127 | pr_debug("Set back pending irq %d\n", pending_vec); | 5873 | pr_debug("Set back pending irq %d\n", pending_vec); |
5128 | if (irqchip_in_kernel(vcpu->kvm)) | ||
5129 | kvm_pic_clear_isr_ack(vcpu->kvm); | ||
5130 | } | 5874 | } |
5131 | 5875 | ||
5132 | kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); | 5876 | kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); |
@@ -5147,6 +5891,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
5147 | !is_protmode(vcpu)) | 5891 | !is_protmode(vcpu)) |
5148 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 5892 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
5149 | 5893 | ||
5894 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
5895 | |||
5150 | return 0; | 5896 | return 0; |
5151 | } | 5897 | } |
5152 | 5898 | ||
@@ -5320,10 +6066,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) | |||
5320 | 6066 | ||
5321 | void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) | 6067 | void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) |
5322 | { | 6068 | { |
5323 | if (vcpu->arch.time_page) { | 6069 | kvmclock_reset(vcpu); |
5324 | kvm_release_page_dirty(vcpu->arch.time_page); | ||
5325 | vcpu->arch.time_page = NULL; | ||
5326 | } | ||
5327 | 6070 | ||
5328 | free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); | 6071 | free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); |
5329 | fx_free(vcpu); | 6072 | fx_free(vcpu); |
@@ -5333,6 +6076,10 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) | |||
5333 | struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, | 6076 | struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, |
5334 | unsigned int id) | 6077 | unsigned int id) |
5335 | { | 6078 | { |
6079 | if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0) | ||
6080 | printk_once(KERN_WARNING | ||
6081 | "kvm: SMP vm created on host with unstable TSC; " | ||
6082 | "guest TSC will not be reliable\n"); | ||
5336 | return kvm_x86_ops->vcpu_create(kvm, id); | 6083 | return kvm_x86_ops->vcpu_create(kvm, id); |
5337 | } | 6084 | } |
5338 | 6085 | ||
@@ -5357,6 +6104,8 @@ free_vcpu: | |||
5357 | 6104 | ||
5358 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) | 6105 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) |
5359 | { | 6106 | { |
6107 | vcpu->arch.apf.msr_val = 0; | ||
6108 | |||
5360 | vcpu_load(vcpu); | 6109 | vcpu_load(vcpu); |
5361 | kvm_mmu_unload(vcpu); | 6110 | kvm_mmu_unload(vcpu); |
5362 | vcpu_put(vcpu); | 6111 | vcpu_put(vcpu); |
@@ -5375,22 +6124,29 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) | |||
5375 | vcpu->arch.dr6 = DR6_FIXED_1; | 6124 | vcpu->arch.dr6 = DR6_FIXED_1; |
5376 | vcpu->arch.dr7 = DR7_FIXED_1; | 6125 | vcpu->arch.dr7 = DR7_FIXED_1; |
5377 | 6126 | ||
6127 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
6128 | vcpu->arch.apf.msr_val = 0; | ||
6129 | |||
6130 | kvmclock_reset(vcpu); | ||
6131 | |||
6132 | kvm_clear_async_pf_completion_queue(vcpu); | ||
6133 | kvm_async_pf_hash_reset(vcpu); | ||
6134 | vcpu->arch.apf.halted = false; | ||
6135 | |||
5378 | return kvm_x86_ops->vcpu_reset(vcpu); | 6136 | return kvm_x86_ops->vcpu_reset(vcpu); |
5379 | } | 6137 | } |
5380 | 6138 | ||
5381 | int kvm_arch_hardware_enable(void *garbage) | 6139 | int kvm_arch_hardware_enable(void *garbage) |
5382 | { | 6140 | { |
5383 | /* | 6141 | struct kvm *kvm; |
5384 | * Since this may be called from a hotplug notifcation, | 6142 | struct kvm_vcpu *vcpu; |
5385 | * we can't get the CPU frequency directly. | 6143 | int i; |
5386 | */ | ||
5387 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { | ||
5388 | int cpu = raw_smp_processor_id(); | ||
5389 | per_cpu(cpu_tsc_khz, cpu) = 0; | ||
5390 | } | ||
5391 | 6144 | ||
5392 | kvm_shared_msr_cpu_online(); | 6145 | kvm_shared_msr_cpu_online(); |
5393 | 6146 | list_for_each_entry(kvm, &vm_list, vm_list) | |
6147 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
6148 | if (vcpu->cpu == smp_processor_id()) | ||
6149 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); | ||
5394 | return kvm_x86_ops->hardware_enable(garbage); | 6150 | return kvm_x86_ops->hardware_enable(garbage); |
5395 | } | 6151 | } |
5396 | 6152 | ||
@@ -5424,7 +6180,11 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
5424 | BUG_ON(vcpu->kvm == NULL); | 6180 | BUG_ON(vcpu->kvm == NULL); |
5425 | kvm = vcpu->kvm; | 6181 | kvm = vcpu->kvm; |
5426 | 6182 | ||
6183 | vcpu->arch.emulate_ctxt.ops = &emulate_ops; | ||
6184 | vcpu->arch.walk_mmu = &vcpu->arch.mmu; | ||
5427 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; | 6185 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; |
6186 | vcpu->arch.mmu.translate_gpa = translate_gpa; | ||
6187 | vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa; | ||
5428 | if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu)) | 6188 | if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu)) |
5429 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 6189 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
5430 | else | 6190 | else |
@@ -5437,6 +6197,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
5437 | } | 6197 | } |
5438 | vcpu->arch.pio_data = page_address(page); | 6198 | vcpu->arch.pio_data = page_address(page); |
5439 | 6199 | ||
6200 | kvm_init_tsc_catchup(vcpu, max_tsc_khz); | ||
6201 | |||
5440 | r = kvm_mmu_create(vcpu); | 6202 | r = kvm_mmu_create(vcpu); |
5441 | if (r < 0) | 6203 | if (r < 0) |
5442 | goto fail_free_pio_data; | 6204 | goto fail_free_pio_data; |
@@ -5458,6 +6220,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
5458 | if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) | 6220 | if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) |
5459 | goto fail_free_mce_banks; | 6221 | goto fail_free_mce_banks; |
5460 | 6222 | ||
6223 | kvm_async_pf_hash_reset(vcpu); | ||
6224 | |||
5461 | return 0; | 6225 | return 0; |
5462 | fail_free_mce_banks: | 6226 | fail_free_mce_banks: |
5463 | kfree(vcpu->arch.mce_banks); | 6227 | kfree(vcpu->arch.mce_banks); |
@@ -5483,22 +6247,17 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) | |||
5483 | free_page((unsigned long)vcpu->arch.pio_data); | 6247 | free_page((unsigned long)vcpu->arch.pio_data); |
5484 | } | 6248 | } |
5485 | 6249 | ||
5486 | struct kvm *kvm_arch_create_vm(void) | 6250 | int kvm_arch_init_vm(struct kvm *kvm) |
5487 | { | 6251 | { |
5488 | struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); | ||
5489 | |||
5490 | if (!kvm) | ||
5491 | return ERR_PTR(-ENOMEM); | ||
5492 | |||
5493 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); | 6252 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); |
5494 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); | 6253 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); |
5495 | 6254 | ||
5496 | /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ | 6255 | /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ |
5497 | set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); | 6256 | set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); |
5498 | 6257 | ||
5499 | rdtscll(kvm->arch.vm_init_tsc); | 6258 | raw_spin_lock_init(&kvm->arch.tsc_write_lock); |
5500 | 6259 | ||
5501 | return kvm; | 6260 | return 0; |
5502 | } | 6261 | } |
5503 | 6262 | ||
5504 | static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) | 6263 | static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) |
@@ -5516,8 +6275,10 @@ static void kvm_free_vcpus(struct kvm *kvm) | |||
5516 | /* | 6275 | /* |
5517 | * Unpin any mmu pages first. | 6276 | * Unpin any mmu pages first. |
5518 | */ | 6277 | */ |
5519 | kvm_for_each_vcpu(i, vcpu, kvm) | 6278 | kvm_for_each_vcpu(i, vcpu, kvm) { |
6279 | kvm_clear_async_pf_completion_queue(vcpu); | ||
5520 | kvm_unload_vcpu_mmu(vcpu); | 6280 | kvm_unload_vcpu_mmu(vcpu); |
6281 | } | ||
5521 | kvm_for_each_vcpu(i, vcpu, kvm) | 6282 | kvm_for_each_vcpu(i, vcpu, kvm) |
5522 | kvm_arch_vcpu_free(vcpu); | 6283 | kvm_arch_vcpu_free(vcpu); |
5523 | 6284 | ||
@@ -5541,13 +6302,10 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
5541 | kfree(kvm->arch.vpic); | 6302 | kfree(kvm->arch.vpic); |
5542 | kfree(kvm->arch.vioapic); | 6303 | kfree(kvm->arch.vioapic); |
5543 | kvm_free_vcpus(kvm); | 6304 | kvm_free_vcpus(kvm); |
5544 | kvm_free_physmem(kvm); | ||
5545 | if (kvm->arch.apic_access_page) | 6305 | if (kvm->arch.apic_access_page) |
5546 | put_page(kvm->arch.apic_access_page); | 6306 | put_page(kvm->arch.apic_access_page); |
5547 | if (kvm->arch.ept_identity_pagetable) | 6307 | if (kvm->arch.ept_identity_pagetable) |
5548 | put_page(kvm->arch.ept_identity_pagetable); | 6308 | put_page(kvm->arch.ept_identity_pagetable); |
5549 | cleanup_srcu_struct(&kvm->srcu); | ||
5550 | kfree(kvm); | ||
5551 | } | 6309 | } |
5552 | 6310 | ||
5553 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 6311 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
@@ -5595,7 +6353,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
5595 | int user_alloc) | 6353 | int user_alloc) |
5596 | { | 6354 | { |
5597 | 6355 | ||
5598 | int npages = mem->memory_size >> PAGE_SHIFT; | 6356 | int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; |
5599 | 6357 | ||
5600 | if (!user_alloc && !old.user_alloc && old.rmap && !npages) { | 6358 | if (!user_alloc && !old.user_alloc && old.rmap && !npages) { |
5601 | int ret; | 6359 | int ret; |
@@ -5610,12 +6368,12 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
5610 | "failed to munmap memory\n"); | 6368 | "failed to munmap memory\n"); |
5611 | } | 6369 | } |
5612 | 6370 | ||
6371 | if (!kvm->arch.n_requested_mmu_pages) | ||
6372 | nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); | ||
6373 | |||
5613 | spin_lock(&kvm->mmu_lock); | 6374 | spin_lock(&kvm->mmu_lock); |
5614 | if (!kvm->arch.n_requested_mmu_pages) { | 6375 | if (nr_mmu_pages) |
5615 | unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); | ||
5616 | kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); | 6376 | kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); |
5617 | } | ||
5618 | |||
5619 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); | 6377 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); |
5620 | spin_unlock(&kvm->mmu_lock); | 6378 | spin_unlock(&kvm->mmu_lock); |
5621 | } | 6379 | } |
@@ -5628,7 +6386,9 @@ void kvm_arch_flush_shadow(struct kvm *kvm) | |||
5628 | 6386 | ||
5629 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | 6387 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) |
5630 | { | 6388 | { |
5631 | return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE | 6389 | return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && |
6390 | !vcpu->arch.apf.halted) | ||
6391 | || !list_empty_careful(&vcpu->async_pf.done) | ||
5632 | || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED | 6392 | || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED |
5633 | || vcpu->arch.nmi_pending || | 6393 | || vcpu->arch.nmi_pending || |
5634 | (kvm_arch_interrupt_allowed(vcpu) && | 6394 | (kvm_arch_interrupt_allowed(vcpu) && |
@@ -5647,7 +6407,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) | |||
5647 | 6407 | ||
5648 | me = get_cpu(); | 6408 | me = get_cpu(); |
5649 | if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) | 6409 | if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) |
5650 | if (atomic_xchg(&vcpu->guest_mode, 0)) | 6410 | if (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE) |
5651 | smp_send_reschedule(cpu); | 6411 | smp_send_reschedule(cpu); |
5652 | put_cpu(); | 6412 | put_cpu(); |
5653 | } | 6413 | } |
@@ -5683,9 +6443,151 @@ void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | |||
5683 | kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip)) | 6443 | kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip)) |
5684 | rflags |= X86_EFLAGS_TF; | 6444 | rflags |= X86_EFLAGS_TF; |
5685 | kvm_x86_ops->set_rflags(vcpu, rflags); | 6445 | kvm_x86_ops->set_rflags(vcpu, rflags); |
6446 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
5686 | } | 6447 | } |
5687 | EXPORT_SYMBOL_GPL(kvm_set_rflags); | 6448 | EXPORT_SYMBOL_GPL(kvm_set_rflags); |
5688 | 6449 | ||
6450 | void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) | ||
6451 | { | ||
6452 | int r; | ||
6453 | |||
6454 | if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) || | ||
6455 | is_error_page(work->page)) | ||
6456 | return; | ||
6457 | |||
6458 | r = kvm_mmu_reload(vcpu); | ||
6459 | if (unlikely(r)) | ||
6460 | return; | ||
6461 | |||
6462 | if (!vcpu->arch.mmu.direct_map && | ||
6463 | work->arch.cr3 != vcpu->arch.mmu.get_cr3(vcpu)) | ||
6464 | return; | ||
6465 | |||
6466 | vcpu->arch.mmu.page_fault(vcpu, work->gva, 0, true); | ||
6467 | } | ||
6468 | |||
6469 | static inline u32 kvm_async_pf_hash_fn(gfn_t gfn) | ||
6470 | { | ||
6471 | return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU)); | ||
6472 | } | ||
6473 | |||
6474 | static inline u32 kvm_async_pf_next_probe(u32 key) | ||
6475 | { | ||
6476 | return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1); | ||
6477 | } | ||
6478 | |||
6479 | static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
6480 | { | ||
6481 | u32 key = kvm_async_pf_hash_fn(gfn); | ||
6482 | |||
6483 | while (vcpu->arch.apf.gfns[key] != ~0) | ||
6484 | key = kvm_async_pf_next_probe(key); | ||
6485 | |||
6486 | vcpu->arch.apf.gfns[key] = gfn; | ||
6487 | } | ||
6488 | |||
6489 | static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
6490 | { | ||
6491 | int i; | ||
6492 | u32 key = kvm_async_pf_hash_fn(gfn); | ||
6493 | |||
6494 | for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) && | ||
6495 | (vcpu->arch.apf.gfns[key] != gfn && | ||
6496 | vcpu->arch.apf.gfns[key] != ~0); i++) | ||
6497 | key = kvm_async_pf_next_probe(key); | ||
6498 | |||
6499 | return key; | ||
6500 | } | ||
6501 | |||
6502 | bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
6503 | { | ||
6504 | return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn; | ||
6505 | } | ||
6506 | |||
6507 | static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
6508 | { | ||
6509 | u32 i, j, k; | ||
6510 | |||
6511 | i = j = kvm_async_pf_gfn_slot(vcpu, gfn); | ||
6512 | while (true) { | ||
6513 | vcpu->arch.apf.gfns[i] = ~0; | ||
6514 | do { | ||
6515 | j = kvm_async_pf_next_probe(j); | ||
6516 | if (vcpu->arch.apf.gfns[j] == ~0) | ||
6517 | return; | ||
6518 | k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]); | ||
6519 | /* | ||
6520 | * k lies cyclically in ]i,j] | ||
6521 | * | i.k.j | | ||
6522 | * |....j i.k.| or |.k..j i...| | ||
6523 | */ | ||
6524 | } while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j)); | ||
6525 | vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j]; | ||
6526 | i = j; | ||
6527 | } | ||
6528 | } | ||
6529 | |||
6530 | static int apf_put_user(struct kvm_vcpu *vcpu, u32 val) | ||
6531 | { | ||
6532 | |||
6533 | return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val, | ||
6534 | sizeof(val)); | ||
6535 | } | ||
6536 | |||
6537 | void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, | ||
6538 | struct kvm_async_pf *work) | ||
6539 | { | ||
6540 | struct x86_exception fault; | ||
6541 | |||
6542 | trace_kvm_async_pf_not_present(work->arch.token, work->gva); | ||
6543 | kvm_add_async_pf_gfn(vcpu, work->arch.gfn); | ||
6544 | |||
6545 | if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) || | ||
6546 | (vcpu->arch.apf.send_user_only && | ||
6547 | kvm_x86_ops->get_cpl(vcpu) == 0)) | ||
6548 | kvm_make_request(KVM_REQ_APF_HALT, vcpu); | ||
6549 | else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) { | ||
6550 | fault.vector = PF_VECTOR; | ||
6551 | fault.error_code_valid = true; | ||
6552 | fault.error_code = 0; | ||
6553 | fault.nested_page_fault = false; | ||
6554 | fault.address = work->arch.token; | ||
6555 | kvm_inject_page_fault(vcpu, &fault); | ||
6556 | } | ||
6557 | } | ||
6558 | |||
6559 | void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, | ||
6560 | struct kvm_async_pf *work) | ||
6561 | { | ||
6562 | struct x86_exception fault; | ||
6563 | |||
6564 | trace_kvm_async_pf_ready(work->arch.token, work->gva); | ||
6565 | if (is_error_page(work->page)) | ||
6566 | work->arch.token = ~0; /* broadcast wakeup */ | ||
6567 | else | ||
6568 | kvm_del_async_pf_gfn(vcpu, work->arch.gfn); | ||
6569 | |||
6570 | if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) && | ||
6571 | !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) { | ||
6572 | fault.vector = PF_VECTOR; | ||
6573 | fault.error_code_valid = true; | ||
6574 | fault.error_code = 0; | ||
6575 | fault.nested_page_fault = false; | ||
6576 | fault.address = work->arch.token; | ||
6577 | kvm_inject_page_fault(vcpu, &fault); | ||
6578 | } | ||
6579 | vcpu->arch.apf.halted = false; | ||
6580 | } | ||
6581 | |||
6582 | bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) | ||
6583 | { | ||
6584 | if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED)) | ||
6585 | return true; | ||
6586 | else | ||
6587 | return !kvm_event_needs_reinjection(vcpu) && | ||
6588 | kvm_x86_ops->interrupt_allowed(vcpu); | ||
6589 | } | ||
6590 | |||
5689 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); | 6591 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); |
5690 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); | 6592 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); |
5691 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); | 6593 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); |